diff options
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/cciss.c | 86 | ||||
-rw-r--r-- | drivers/block/cciss.h | 1 | ||||
-rw-r--r-- | drivers/block/cciss_cmd.h | 1 | ||||
-rw-r--r-- | drivers/block/cciss_scsi.c | 13 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_actlog.c | 335 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_bitmap.c | 752 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 270 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 673 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_nl.c | 183 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_proc.c | 114 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 608 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.c | 169 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.h | 36 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_strings.c | 6 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_worker.c | 360 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_wrappers.h | 2 |
16 files changed, 2214 insertions, 1395 deletions
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 35658f445fca..9bf13988f1a2 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c | |||
@@ -193,7 +193,7 @@ static int __devinit cciss_find_cfg_addrs(struct pci_dev *pdev, | |||
193 | u64 *cfg_offset); | 193 | u64 *cfg_offset); |
194 | static int __devinit cciss_pci_find_memory_BAR(struct pci_dev *pdev, | 194 | static int __devinit cciss_pci_find_memory_BAR(struct pci_dev *pdev, |
195 | unsigned long *memory_bar); | 195 | unsigned long *memory_bar); |
196 | 196 | static inline u32 cciss_tag_discard_error_bits(ctlr_info_t *h, u32 tag); | |
197 | 197 | ||
198 | /* performant mode helper functions */ | 198 | /* performant mode helper functions */ |
199 | static void calc_bucket_map(int *bucket, int num_buckets, int nsgs, | 199 | static void calc_bucket_map(int *bucket, int num_buckets, int nsgs, |
@@ -231,7 +231,7 @@ static const struct block_device_operations cciss_fops = { | |||
231 | */ | 231 | */ |
232 | static void set_performant_mode(ctlr_info_t *h, CommandList_struct *c) | 232 | static void set_performant_mode(ctlr_info_t *h, CommandList_struct *c) |
233 | { | 233 | { |
234 | if (likely(h->transMethod == CFGTBL_Trans_Performant)) | 234 | if (likely(h->transMethod & CFGTBL_Trans_Performant)) |
235 | c->busaddr |= 1 | (h->blockFetchTable[c->Header.SGList] << 1); | 235 | c->busaddr |= 1 | (h->blockFetchTable[c->Header.SGList] << 1); |
236 | } | 236 | } |
237 | 237 | ||
@@ -556,6 +556,44 @@ static void __devinit cciss_procinit(ctlr_info_t *h) | |||
556 | #define to_hba(n) container_of(n, struct ctlr_info, dev) | 556 | #define to_hba(n) container_of(n, struct ctlr_info, dev) |
557 | #define to_drv(n) container_of(n, drive_info_struct, dev) | 557 | #define to_drv(n) container_of(n, drive_info_struct, dev) |
558 | 558 | ||
559 | /* List of controllers which cannot be reset on kexec with reset_devices */ | ||
560 | static u32 unresettable_controller[] = { | ||
561 | 0x324a103C, /* Smart Array P712m */ | ||
562 | 0x324b103C, /* SmartArray P711m */ | ||
563 | 0x3223103C, /* Smart Array P800 */ | ||
564 | 0x3234103C, /* Smart Array P400 */ | ||
565 | 0x3235103C, /* Smart Array P400i */ | ||
566 | 0x3211103C, /* Smart Array E200i */ | ||
567 | 0x3212103C, /* Smart Array E200 */ | ||
568 | 0x3213103C, /* Smart Array E200i */ | ||
569 | 0x3214103C, /* Smart Array E200i */ | ||
570 | 0x3215103C, /* Smart Array E200i */ | ||
571 | 0x3237103C, /* Smart Array E500 */ | ||
572 | 0x323D103C, /* Smart Array P700m */ | ||
573 | 0x409C0E11, /* Smart Array 6400 */ | ||
574 | 0x409D0E11, /* Smart Array 6400 EM */ | ||
575 | }; | ||
576 | |||
577 | static int ctlr_is_resettable(struct ctlr_info *h) | ||
578 | { | ||
579 | int i; | ||
580 | |||
581 | for (i = 0; i < ARRAY_SIZE(unresettable_controller); i++) | ||
582 | if (unresettable_controller[i] == h->board_id) | ||
583 | return 0; | ||
584 | return 1; | ||
585 | } | ||
586 | |||
587 | static ssize_t host_show_resettable(struct device *dev, | ||
588 | struct device_attribute *attr, | ||
589 | char *buf) | ||
590 | { | ||
591 | struct ctlr_info *h = to_hba(dev); | ||
592 | |||
593 | return snprintf(buf, 20, "%d\n", ctlr_is_resettable(h)); | ||
594 | } | ||
595 | static DEVICE_ATTR(resettable, S_IRUGO, host_show_resettable, NULL); | ||
596 | |||
559 | static ssize_t host_store_rescan(struct device *dev, | 597 | static ssize_t host_store_rescan(struct device *dev, |
560 | struct device_attribute *attr, | 598 | struct device_attribute *attr, |
561 | const char *buf, size_t count) | 599 | const char *buf, size_t count) |
@@ -741,6 +779,7 @@ static DEVICE_ATTR(usage_count, S_IRUGO, cciss_show_usage_count, NULL); | |||
741 | 779 | ||
742 | static struct attribute *cciss_host_attrs[] = { | 780 | static struct attribute *cciss_host_attrs[] = { |
743 | &dev_attr_rescan.attr, | 781 | &dev_attr_rescan.attr, |
782 | &dev_attr_resettable.attr, | ||
744 | NULL | 783 | NULL |
745 | }; | 784 | }; |
746 | 785 | ||
@@ -973,8 +1012,8 @@ static void cmd_special_free(ctlr_info_t *h, CommandList_struct *c) | |||
973 | temp64.val32.upper = c->ErrDesc.Addr.upper; | 1012 | temp64.val32.upper = c->ErrDesc.Addr.upper; |
974 | pci_free_consistent(h->pdev, sizeof(ErrorInfo_struct), | 1013 | pci_free_consistent(h->pdev, sizeof(ErrorInfo_struct), |
975 | c->err_info, (dma_addr_t) temp64.val); | 1014 | c->err_info, (dma_addr_t) temp64.val); |
976 | pci_free_consistent(h->pdev, sizeof(CommandList_struct), | 1015 | pci_free_consistent(h->pdev, sizeof(CommandList_struct), c, |
977 | c, (dma_addr_t) c->busaddr); | 1016 | (dma_addr_t) cciss_tag_discard_error_bits(h, (u32) c->busaddr)); |
978 | } | 1017 | } |
979 | 1018 | ||
980 | static inline ctlr_info_t *get_host(struct gendisk *disk) | 1019 | static inline ctlr_info_t *get_host(struct gendisk *disk) |
@@ -1490,8 +1529,7 @@ static int cciss_bigpassthru(ctlr_info_t *h, void __user *argp) | |||
1490 | return -EINVAL; | 1529 | return -EINVAL; |
1491 | if (!capable(CAP_SYS_RAWIO)) | 1530 | if (!capable(CAP_SYS_RAWIO)) |
1492 | return -EPERM; | 1531 | return -EPERM; |
1493 | ioc = (BIG_IOCTL_Command_struct *) | 1532 | ioc = kmalloc(sizeof(*ioc), GFP_KERNEL); |
1494 | kmalloc(sizeof(*ioc), GFP_KERNEL); | ||
1495 | if (!ioc) { | 1533 | if (!ioc) { |
1496 | status = -ENOMEM; | 1534 | status = -ENOMEM; |
1497 | goto cleanup1; | 1535 | goto cleanup1; |
@@ -2653,6 +2691,10 @@ static int process_sendcmd_error(ctlr_info_t *h, CommandList_struct *c) | |||
2653 | c->Request.CDB[0]); | 2691 | c->Request.CDB[0]); |
2654 | return_status = IO_NEEDS_RETRY; | 2692 | return_status = IO_NEEDS_RETRY; |
2655 | break; | 2693 | break; |
2694 | case CMD_UNABORTABLE: | ||
2695 | dev_warn(&h->pdev->dev, "cmd unabortable\n"); | ||
2696 | return_status = IO_ERROR; | ||
2697 | break; | ||
2656 | default: | 2698 | default: |
2657 | dev_warn(&h->pdev->dev, "cmd 0x%02x returned " | 2699 | dev_warn(&h->pdev->dev, "cmd 0x%02x returned " |
2658 | "unknown status %x\n", c->Request.CDB[0], | 2700 | "unknown status %x\n", c->Request.CDB[0], |
@@ -3103,6 +3145,13 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, | |||
3103 | (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ? | 3145 | (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ? |
3104 | DID_PASSTHROUGH : DID_ERROR); | 3146 | DID_PASSTHROUGH : DID_ERROR); |
3105 | break; | 3147 | break; |
3148 | case CMD_UNABORTABLE: | ||
3149 | dev_warn(&h->pdev->dev, "cmd %p unabortable\n", cmd); | ||
3150 | rq->errors = make_status_bytes(SAM_STAT_GOOD, | ||
3151 | cmd->err_info->CommandStatus, DRIVER_OK, | ||
3152 | cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC ? | ||
3153 | DID_PASSTHROUGH : DID_ERROR); | ||
3154 | break; | ||
3106 | default: | 3155 | default: |
3107 | dev_warn(&h->pdev->dev, "cmd %p returned " | 3156 | dev_warn(&h->pdev->dev, "cmd %p returned " |
3108 | "unknown status %x\n", cmd, | 3157 | "unknown status %x\n", cmd, |
@@ -3136,10 +3185,13 @@ static inline u32 cciss_tag_to_index(u32 tag) | |||
3136 | return tag >> DIRECT_LOOKUP_SHIFT; | 3185 | return tag >> DIRECT_LOOKUP_SHIFT; |
3137 | } | 3186 | } |
3138 | 3187 | ||
3139 | static inline u32 cciss_tag_discard_error_bits(u32 tag) | 3188 | static inline u32 cciss_tag_discard_error_bits(ctlr_info_t *h, u32 tag) |
3140 | { | 3189 | { |
3141 | #define CCISS_ERROR_BITS 0x03 | 3190 | #define CCISS_PERF_ERROR_BITS ((1 << DIRECT_LOOKUP_SHIFT) - 1) |
3142 | return tag & ~CCISS_ERROR_BITS; | 3191 | #define CCISS_SIMPLE_ERROR_BITS 0x03 |
3192 | if (likely(h->transMethod & CFGTBL_Trans_Performant)) | ||
3193 | return tag & ~CCISS_PERF_ERROR_BITS; | ||
3194 | return tag & ~CCISS_SIMPLE_ERROR_BITS; | ||
3143 | } | 3195 | } |
3144 | 3196 | ||
3145 | static inline void cciss_mark_tag_indexed(u32 *tag) | 3197 | static inline void cciss_mark_tag_indexed(u32 *tag) |
@@ -3359,7 +3411,7 @@ static inline u32 next_command(ctlr_info_t *h) | |||
3359 | { | 3411 | { |
3360 | u32 a; | 3412 | u32 a; |
3361 | 3413 | ||
3362 | if (unlikely(h->transMethod != CFGTBL_Trans_Performant)) | 3414 | if (unlikely(!(h->transMethod & CFGTBL_Trans_Performant))) |
3363 | return h->access.command_completed(h); | 3415 | return h->access.command_completed(h); |
3364 | 3416 | ||
3365 | if ((*(h->reply_pool_head) & 1) == (h->reply_pool_wraparound)) { | 3417 | if ((*(h->reply_pool_head) & 1) == (h->reply_pool_wraparound)) { |
@@ -3394,14 +3446,12 @@ static inline u32 process_indexed_cmd(ctlr_info_t *h, u32 raw_tag) | |||
3394 | /* process completion of a non-indexed command */ | 3446 | /* process completion of a non-indexed command */ |
3395 | static inline u32 process_nonindexed_cmd(ctlr_info_t *h, u32 raw_tag) | 3447 | static inline u32 process_nonindexed_cmd(ctlr_info_t *h, u32 raw_tag) |
3396 | { | 3448 | { |
3397 | u32 tag; | ||
3398 | CommandList_struct *c = NULL; | 3449 | CommandList_struct *c = NULL; |
3399 | __u32 busaddr_masked, tag_masked; | 3450 | __u32 busaddr_masked, tag_masked; |
3400 | 3451 | ||
3401 | tag = cciss_tag_discard_error_bits(raw_tag); | 3452 | tag_masked = cciss_tag_discard_error_bits(h, raw_tag); |
3402 | list_for_each_entry(c, &h->cmpQ, list) { | 3453 | list_for_each_entry(c, &h->cmpQ, list) { |
3403 | busaddr_masked = cciss_tag_discard_error_bits(c->busaddr); | 3454 | busaddr_masked = cciss_tag_discard_error_bits(h, c->busaddr); |
3404 | tag_masked = cciss_tag_discard_error_bits(tag); | ||
3405 | if (busaddr_masked == tag_masked) { | 3455 | if (busaddr_masked == tag_masked) { |
3406 | finish_cmd(h, c, raw_tag); | 3456 | finish_cmd(h, c, raw_tag); |
3407 | return next_command(h); | 3457 | return next_command(h); |
@@ -3753,7 +3803,8 @@ static void __devinit cciss_wait_for_mode_change_ack(ctlr_info_t *h) | |||
3753 | } | 3803 | } |
3754 | } | 3804 | } |
3755 | 3805 | ||
3756 | static __devinit void cciss_enter_performant_mode(ctlr_info_t *h) | 3806 | static __devinit void cciss_enter_performant_mode(ctlr_info_t *h, |
3807 | u32 use_short_tags) | ||
3757 | { | 3808 | { |
3758 | /* This is a bit complicated. There are 8 registers on | 3809 | /* This is a bit complicated. There are 8 registers on |
3759 | * the controller which we write to to tell it 8 different | 3810 | * the controller which we write to to tell it 8 different |
@@ -3808,7 +3859,7 @@ static __devinit void cciss_enter_performant_mode(ctlr_info_t *h) | |||
3808 | writel(0, &h->transtable->RepQCtrAddrHigh32); | 3859 | writel(0, &h->transtable->RepQCtrAddrHigh32); |
3809 | writel(h->reply_pool_dhandle, &h->transtable->RepQAddr0Low32); | 3860 | writel(h->reply_pool_dhandle, &h->transtable->RepQAddr0Low32); |
3810 | writel(0, &h->transtable->RepQAddr0High32); | 3861 | writel(0, &h->transtable->RepQAddr0High32); |
3811 | writel(CFGTBL_Trans_Performant, | 3862 | writel(CFGTBL_Trans_Performant | use_short_tags, |
3812 | &(h->cfgtable->HostWrite.TransportRequest)); | 3863 | &(h->cfgtable->HostWrite.TransportRequest)); |
3813 | 3864 | ||
3814 | writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL); | 3865 | writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL); |
@@ -3855,7 +3906,8 @@ static void __devinit cciss_put_controller_into_performant_mode(ctlr_info_t *h) | |||
3855 | if ((h->reply_pool == NULL) || (h->blockFetchTable == NULL)) | 3906 | if ((h->reply_pool == NULL) || (h->blockFetchTable == NULL)) |
3856 | goto clean_up; | 3907 | goto clean_up; |
3857 | 3908 | ||
3858 | cciss_enter_performant_mode(h); | 3909 | cciss_enter_performant_mode(h, |
3910 | trans_support & CFGTBL_Trans_use_short_tags); | ||
3859 | 3911 | ||
3860 | /* Change the access methods to the performant access methods */ | 3912 | /* Change the access methods to the performant access methods */ |
3861 | h->access = SA5_performant_access; | 3913 | h->access = SA5_performant_access; |
diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h index 579f74918493..554bbd907d14 100644 --- a/drivers/block/cciss.h +++ b/drivers/block/cciss.h | |||
@@ -222,6 +222,7 @@ static void SA5_submit_command( ctlr_info_t *h, CommandList_struct *c) | |||
222 | h->ctlr, c->busaddr); | 222 | h->ctlr, c->busaddr); |
223 | #endif /* CCISS_DEBUG */ | 223 | #endif /* CCISS_DEBUG */ |
224 | writel(c->busaddr, h->vaddr + SA5_REQUEST_PORT_OFFSET); | 224 | writel(c->busaddr, h->vaddr + SA5_REQUEST_PORT_OFFSET); |
225 | readl(h->vaddr + SA5_REQUEST_PORT_OFFSET); | ||
225 | h->commands_outstanding++; | 226 | h->commands_outstanding++; |
226 | if ( h->commands_outstanding > h->max_outstanding) | 227 | if ( h->commands_outstanding > h->max_outstanding) |
227 | h->max_outstanding = h->commands_outstanding; | 228 | h->max_outstanding = h->commands_outstanding; |
diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h index 35463d2f0ee7..cd441bef031f 100644 --- a/drivers/block/cciss_cmd.h +++ b/drivers/block/cciss_cmd.h | |||
@@ -56,6 +56,7 @@ | |||
56 | 56 | ||
57 | #define CFGTBL_Trans_Simple 0x00000002l | 57 | #define CFGTBL_Trans_Simple 0x00000002l |
58 | #define CFGTBL_Trans_Performant 0x00000004l | 58 | #define CFGTBL_Trans_Performant 0x00000004l |
59 | #define CFGTBL_Trans_use_short_tags 0x20000000l | ||
59 | 60 | ||
60 | #define CFGTBL_BusType_Ultra2 0x00000001l | 61 | #define CFGTBL_BusType_Ultra2 0x00000001l |
61 | #define CFGTBL_BusType_Ultra3 0x00000002l | 62 | #define CFGTBL_BusType_Ultra3 0x00000002l |
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c index 727d0225b7d0..df793803f5ae 100644 --- a/drivers/block/cciss_scsi.c +++ b/drivers/block/cciss_scsi.c | |||
@@ -824,13 +824,18 @@ static void complete_scsi_command(CommandList_struct *c, int timeout, | |||
824 | break; | 824 | break; |
825 | case CMD_UNSOLICITED_ABORT: | 825 | case CMD_UNSOLICITED_ABORT: |
826 | cmd->result = DID_ABORT << 16; | 826 | cmd->result = DID_ABORT << 16; |
827 | dev_warn(&h->pdev->dev, "%p aborted do to an " | 827 | dev_warn(&h->pdev->dev, "%p aborted due to an " |
828 | "unsolicited abort\n", c); | 828 | "unsolicited abort\n", c); |
829 | break; | 829 | break; |
830 | case CMD_TIMEOUT: | 830 | case CMD_TIMEOUT: |
831 | cmd->result = DID_TIME_OUT << 16; | 831 | cmd->result = DID_TIME_OUT << 16; |
832 | dev_warn(&h->pdev->dev, "%p timedout\n", c); | 832 | dev_warn(&h->pdev->dev, "%p timedout\n", c); |
833 | break; | 833 | break; |
834 | case CMD_UNABORTABLE: | ||
835 | cmd->result = DID_ERROR << 16; | ||
836 | dev_warn(&h->pdev->dev, "c %p command " | ||
837 | "unabortable\n", c); | ||
838 | break; | ||
834 | default: | 839 | default: |
835 | cmd->result = DID_ERROR << 16; | 840 | cmd->result = DID_ERROR << 16; |
836 | dev_warn(&h->pdev->dev, | 841 | dev_warn(&h->pdev->dev, |
@@ -1007,11 +1012,15 @@ cciss_scsi_interpret_error(ctlr_info_t *h, CommandList_struct *c) | |||
1007 | break; | 1012 | break; |
1008 | case CMD_UNSOLICITED_ABORT: | 1013 | case CMD_UNSOLICITED_ABORT: |
1009 | dev_warn(&h->pdev->dev, | 1014 | dev_warn(&h->pdev->dev, |
1010 | "%p aborted do to an unsolicited abort\n", c); | 1015 | "%p aborted due to an unsolicited abort\n", c); |
1011 | break; | 1016 | break; |
1012 | case CMD_TIMEOUT: | 1017 | case CMD_TIMEOUT: |
1013 | dev_warn(&h->pdev->dev, "%p timedout\n", c); | 1018 | dev_warn(&h->pdev->dev, "%p timedout\n", c); |
1014 | break; | 1019 | break; |
1020 | case CMD_UNABORTABLE: | ||
1021 | dev_warn(&h->pdev->dev, | ||
1022 | "%p unabortable\n", c); | ||
1023 | break; | ||
1015 | default: | 1024 | default: |
1016 | dev_warn(&h->pdev->dev, | 1025 | dev_warn(&h->pdev->dev, |
1017 | "%p returned unknown status %x\n", | 1026 | "%p returned unknown status %x\n", |
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index aca302492ff2..2a1642bc451d 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c | |||
@@ -92,7 +92,7 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, | |||
92 | bio->bi_end_io = drbd_md_io_complete; | 92 | bio->bi_end_io = drbd_md_io_complete; |
93 | bio->bi_rw = rw; | 93 | bio->bi_rw = rw; |
94 | 94 | ||
95 | if (FAULT_ACTIVE(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) | 95 | if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) |
96 | bio_endio(bio, -EIO); | 96 | bio_endio(bio, -EIO); |
97 | else | 97 | else |
98 | submit_bio(rw, bio); | 98 | submit_bio(rw, bio); |
@@ -176,13 +176,17 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr) | |||
176 | struct lc_element *al_ext; | 176 | struct lc_element *al_ext; |
177 | struct lc_element *tmp; | 177 | struct lc_element *tmp; |
178 | unsigned long al_flags = 0; | 178 | unsigned long al_flags = 0; |
179 | int wake; | ||
179 | 180 | ||
180 | spin_lock_irq(&mdev->al_lock); | 181 | spin_lock_irq(&mdev->al_lock); |
181 | tmp = lc_find(mdev->resync, enr/AL_EXT_PER_BM_SECT); | 182 | tmp = lc_find(mdev->resync, enr/AL_EXT_PER_BM_SECT); |
182 | if (unlikely(tmp != NULL)) { | 183 | if (unlikely(tmp != NULL)) { |
183 | struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); | 184 | struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); |
184 | if (test_bit(BME_NO_WRITES, &bm_ext->flags)) { | 185 | if (test_bit(BME_NO_WRITES, &bm_ext->flags)) { |
186 | wake = !test_and_set_bit(BME_PRIORITY, &bm_ext->flags); | ||
185 | spin_unlock_irq(&mdev->al_lock); | 187 | spin_unlock_irq(&mdev->al_lock); |
188 | if (wake) | ||
189 | wake_up(&mdev->al_wait); | ||
186 | return NULL; | 190 | return NULL; |
187 | } | 191 | } |
188 | } | 192 | } |
@@ -258,6 +262,33 @@ void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector) | |||
258 | spin_unlock_irqrestore(&mdev->al_lock, flags); | 262 | spin_unlock_irqrestore(&mdev->al_lock, flags); |
259 | } | 263 | } |
260 | 264 | ||
265 | #if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT) | ||
266 | /* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT | ||
267 | * are still coupled, or assume too much about their relation. | ||
268 | * Code below will not work if this is violated. | ||
269 | * Will be cleaned up with some followup patch. | ||
270 | */ | ||
271 | # error FIXME | ||
272 | #endif | ||
273 | |||
274 | static unsigned int al_extent_to_bm_page(unsigned int al_enr) | ||
275 | { | ||
276 | return al_enr >> | ||
277 | /* bit to page */ | ||
278 | ((PAGE_SHIFT + 3) - | ||
279 | /* al extent number to bit */ | ||
280 | (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)); | ||
281 | } | ||
282 | |||
283 | static unsigned int rs_extent_to_bm_page(unsigned int rs_enr) | ||
284 | { | ||
285 | return rs_enr >> | ||
286 | /* bit to page */ | ||
287 | ((PAGE_SHIFT + 3) - | ||
288 | /* al extent number to bit */ | ||
289 | (BM_EXT_SHIFT - BM_BLOCK_SHIFT)); | ||
290 | } | ||
291 | |||
261 | int | 292 | int |
262 | w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) | 293 | w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) |
263 | { | 294 | { |
@@ -285,7 +316,7 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) | |||
285 | * For now, we must not write the transaction, | 316 | * For now, we must not write the transaction, |
286 | * if we cannot write out the bitmap of the evicted extent. */ | 317 | * if we cannot write out the bitmap of the evicted extent. */ |
287 | if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE) | 318 | if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE) |
288 | drbd_bm_write_sect(mdev, evicted/AL_EXT_PER_BM_SECT); | 319 | drbd_bm_write_page(mdev, al_extent_to_bm_page(evicted)); |
289 | 320 | ||
290 | /* The bitmap write may have failed, causing a state change. */ | 321 | /* The bitmap write may have failed, causing a state change. */ |
291 | if (mdev->state.disk < D_INCONSISTENT) { | 322 | if (mdev->state.disk < D_INCONSISTENT) { |
@@ -334,7 +365,7 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) | |||
334 | + mdev->ldev->md.al_offset + mdev->al_tr_pos; | 365 | + mdev->ldev->md.al_offset + mdev->al_tr_pos; |
335 | 366 | ||
336 | if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) | 367 | if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) |
337 | drbd_chk_io_error(mdev, 1, TRUE); | 368 | drbd_chk_io_error(mdev, 1, true); |
338 | 369 | ||
339 | if (++mdev->al_tr_pos > | 370 | if (++mdev->al_tr_pos > |
340 | div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT)) | 371 | div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT)) |
@@ -511,225 +542,6 @@ cancel: | |||
511 | return 1; | 542 | return 1; |
512 | } | 543 | } |
513 | 544 | ||
514 | static void atodb_endio(struct bio *bio, int error) | ||
515 | { | ||
516 | struct drbd_atodb_wait *wc = bio->bi_private; | ||
517 | struct drbd_conf *mdev = wc->mdev; | ||
518 | struct page *page; | ||
519 | int uptodate = bio_flagged(bio, BIO_UPTODATE); | ||
520 | |||
521 | /* strange behavior of some lower level drivers... | ||
522 | * fail the request by clearing the uptodate flag, | ||
523 | * but do not return any error?! */ | ||
524 | if (!error && !uptodate) | ||
525 | error = -EIO; | ||
526 | |||
527 | drbd_chk_io_error(mdev, error, TRUE); | ||
528 | if (error && wc->error == 0) | ||
529 | wc->error = error; | ||
530 | |||
531 | if (atomic_dec_and_test(&wc->count)) | ||
532 | complete(&wc->io_done); | ||
533 | |||
534 | page = bio->bi_io_vec[0].bv_page; | ||
535 | put_page(page); | ||
536 | bio_put(bio); | ||
537 | mdev->bm_writ_cnt++; | ||
538 | put_ldev(mdev); | ||
539 | } | ||
540 | |||
541 | /* sector to word */ | ||
542 | #define S2W(s) ((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL)) | ||
543 | |||
544 | /* activity log to on disk bitmap -- prepare bio unless that sector | ||
545 | * is already covered by previously prepared bios */ | ||
546 | static int atodb_prepare_unless_covered(struct drbd_conf *mdev, | ||
547 | struct bio **bios, | ||
548 | unsigned int enr, | ||
549 | struct drbd_atodb_wait *wc) __must_hold(local) | ||
550 | { | ||
551 | struct bio *bio; | ||
552 | struct page *page; | ||
553 | sector_t on_disk_sector; | ||
554 | unsigned int page_offset = PAGE_SIZE; | ||
555 | int offset; | ||
556 | int i = 0; | ||
557 | int err = -ENOMEM; | ||
558 | |||
559 | /* We always write aligned, full 4k blocks, | ||
560 | * so we can ignore the logical_block_size (for now) */ | ||
561 | enr &= ~7U; | ||
562 | on_disk_sector = enr + mdev->ldev->md.md_offset | ||
563 | + mdev->ldev->md.bm_offset; | ||
564 | |||
565 | D_ASSERT(!(on_disk_sector & 7U)); | ||
566 | |||
567 | /* Check if that enr is already covered by an already created bio. | ||
568 | * Caution, bios[] is not NULL terminated, | ||
569 | * but only initialized to all NULL. | ||
570 | * For completely scattered activity log, | ||
571 | * the last invocation iterates over all bios, | ||
572 | * and finds the last NULL entry. | ||
573 | */ | ||
574 | while ((bio = bios[i])) { | ||
575 | if (bio->bi_sector == on_disk_sector) | ||
576 | return 0; | ||
577 | i++; | ||
578 | } | ||
579 | /* bios[i] == NULL, the next not yet used slot */ | ||
580 | |||
581 | /* GFP_KERNEL, we are not in the write-out path */ | ||
582 | bio = bio_alloc(GFP_KERNEL, 1); | ||
583 | if (bio == NULL) | ||
584 | return -ENOMEM; | ||
585 | |||
586 | if (i > 0) { | ||
587 | const struct bio_vec *prev_bv = bios[i-1]->bi_io_vec; | ||
588 | page_offset = prev_bv->bv_offset + prev_bv->bv_len; | ||
589 | page = prev_bv->bv_page; | ||
590 | } | ||
591 | if (page_offset == PAGE_SIZE) { | ||
592 | page = alloc_page(__GFP_HIGHMEM); | ||
593 | if (page == NULL) | ||
594 | goto out_bio_put; | ||
595 | page_offset = 0; | ||
596 | } else { | ||
597 | get_page(page); | ||
598 | } | ||
599 | |||
600 | offset = S2W(enr); | ||
601 | drbd_bm_get_lel(mdev, offset, | ||
602 | min_t(size_t, S2W(8), drbd_bm_words(mdev) - offset), | ||
603 | kmap(page) + page_offset); | ||
604 | kunmap(page); | ||
605 | |||
606 | bio->bi_private = wc; | ||
607 | bio->bi_end_io = atodb_endio; | ||
608 | bio->bi_bdev = mdev->ldev->md_bdev; | ||
609 | bio->bi_sector = on_disk_sector; | ||
610 | |||
611 | if (bio_add_page(bio, page, 4096, page_offset) != 4096) | ||
612 | goto out_put_page; | ||
613 | |||
614 | atomic_inc(&wc->count); | ||
615 | /* we already know that we may do this... | ||
616 | * get_ldev_if_state(mdev,D_ATTACHING); | ||
617 | * just get the extra reference, so that the local_cnt reflects | ||
618 | * the number of pending IO requests DRBD at its backing device. | ||
619 | */ | ||
620 | atomic_inc(&mdev->local_cnt); | ||
621 | |||
622 | bios[i] = bio; | ||
623 | |||
624 | return 0; | ||
625 | |||
626 | out_put_page: | ||
627 | err = -EINVAL; | ||
628 | put_page(page); | ||
629 | out_bio_put: | ||
630 | bio_put(bio); | ||
631 | return err; | ||
632 | } | ||
633 | |||
634 | /** | ||
635 | * drbd_al_to_on_disk_bm() - * Writes bitmap parts covered by active AL extents | ||
636 | * @mdev: DRBD device. | ||
637 | * | ||
638 | * Called when we detach (unconfigure) local storage, | ||
639 | * or when we go from R_PRIMARY to R_SECONDARY role. | ||
640 | */ | ||
641 | void drbd_al_to_on_disk_bm(struct drbd_conf *mdev) | ||
642 | { | ||
643 | int i, nr_elements; | ||
644 | unsigned int enr; | ||
645 | struct bio **bios; | ||
646 | struct drbd_atodb_wait wc; | ||
647 | |||
648 | ERR_IF (!get_ldev_if_state(mdev, D_ATTACHING)) | ||
649 | return; /* sorry, I don't have any act_log etc... */ | ||
650 | |||
651 | wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); | ||
652 | |||
653 | nr_elements = mdev->act_log->nr_elements; | ||
654 | |||
655 | /* GFP_KERNEL, we are not in anyone's write-out path */ | ||
656 | bios = kzalloc(sizeof(struct bio *) * nr_elements, GFP_KERNEL); | ||
657 | if (!bios) | ||
658 | goto submit_one_by_one; | ||
659 | |||
660 | atomic_set(&wc.count, 0); | ||
661 | init_completion(&wc.io_done); | ||
662 | wc.mdev = mdev; | ||
663 | wc.error = 0; | ||
664 | |||
665 | for (i = 0; i < nr_elements; i++) { | ||
666 | enr = lc_element_by_index(mdev->act_log, i)->lc_number; | ||
667 | if (enr == LC_FREE) | ||
668 | continue; | ||
669 | /* next statement also does atomic_inc wc.count and local_cnt */ | ||
670 | if (atodb_prepare_unless_covered(mdev, bios, | ||
671 | enr/AL_EXT_PER_BM_SECT, | ||
672 | &wc)) | ||
673 | goto free_bios_submit_one_by_one; | ||
674 | } | ||
675 | |||
676 | /* unnecessary optimization? */ | ||
677 | lc_unlock(mdev->act_log); | ||
678 | wake_up(&mdev->al_wait); | ||
679 | |||
680 | /* all prepared, submit them */ | ||
681 | for (i = 0; i < nr_elements; i++) { | ||
682 | if (bios[i] == NULL) | ||
683 | break; | ||
684 | if (FAULT_ACTIVE(mdev, DRBD_FAULT_MD_WR)) { | ||
685 | bios[i]->bi_rw = WRITE; | ||
686 | bio_endio(bios[i], -EIO); | ||
687 | } else { | ||
688 | submit_bio(WRITE, bios[i]); | ||
689 | } | ||
690 | } | ||
691 | |||
692 | /* always (try to) flush bitmap to stable storage */ | ||
693 | drbd_md_flush(mdev); | ||
694 | |||
695 | /* In case we did not submit a single IO do not wait for | ||
696 | * them to complete. ( Because we would wait forever here. ) | ||
697 | * | ||
698 | * In case we had IOs and they are already complete, there | ||
699 | * is not point in waiting anyways. | ||
700 | * Therefore this if () ... */ | ||
701 | if (atomic_read(&wc.count)) | ||
702 | wait_for_completion(&wc.io_done); | ||
703 | |||
704 | put_ldev(mdev); | ||
705 | |||
706 | kfree(bios); | ||
707 | return; | ||
708 | |||
709 | free_bios_submit_one_by_one: | ||
710 | /* free everything by calling the endio callback directly. */ | ||
711 | for (i = 0; i < nr_elements && bios[i]; i++) | ||
712 | bio_endio(bios[i], 0); | ||
713 | |||
714 | kfree(bios); | ||
715 | |||
716 | submit_one_by_one: | ||
717 | dev_warn(DEV, "Using the slow drbd_al_to_on_disk_bm()\n"); | ||
718 | |||
719 | for (i = 0; i < mdev->act_log->nr_elements; i++) { | ||
720 | enr = lc_element_by_index(mdev->act_log, i)->lc_number; | ||
721 | if (enr == LC_FREE) | ||
722 | continue; | ||
723 | /* Really slow: if we have al-extents 16..19 active, | ||
724 | * sector 4 will be written four times! Synchronous! */ | ||
725 | drbd_bm_write_sect(mdev, enr/AL_EXT_PER_BM_SECT); | ||
726 | } | ||
727 | |||
728 | lc_unlock(mdev->act_log); | ||
729 | wake_up(&mdev->al_wait); | ||
730 | put_ldev(mdev); | ||
731 | } | ||
732 | |||
733 | /** | 545 | /** |
734 | * drbd_al_apply_to_bm() - Sets the bitmap to diry(1) where covered ba active AL extents | 546 | * drbd_al_apply_to_bm() - Sets the bitmap to diry(1) where covered ba active AL extents |
735 | * @mdev: DRBD device. | 547 | * @mdev: DRBD device. |
@@ -809,7 +621,7 @@ static int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused | |||
809 | return 1; | 621 | return 1; |
810 | } | 622 | } |
811 | 623 | ||
812 | drbd_bm_write_sect(mdev, udw->enr); | 624 | drbd_bm_write_page(mdev, rs_extent_to_bm_page(udw->enr)); |
813 | put_ldev(mdev); | 625 | put_ldev(mdev); |
814 | 626 | ||
815 | kfree(udw); | 627 | kfree(udw); |
@@ -889,7 +701,6 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, | |||
889 | dev_warn(DEV, "Kicking resync_lru element enr=%u " | 701 | dev_warn(DEV, "Kicking resync_lru element enr=%u " |
890 | "out with rs_failed=%d\n", | 702 | "out with rs_failed=%d\n", |
891 | ext->lce.lc_number, ext->rs_failed); | 703 | ext->lce.lc_number, ext->rs_failed); |
892 | set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags); | ||
893 | } | 704 | } |
894 | ext->rs_left = rs_left; | 705 | ext->rs_left = rs_left; |
895 | ext->rs_failed = success ? 0 : count; | 706 | ext->rs_failed = success ? 0 : count; |
@@ -908,7 +719,6 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, | |||
908 | drbd_queue_work_front(&mdev->data.work, &udw->w); | 719 | drbd_queue_work_front(&mdev->data.work, &udw->w); |
909 | } else { | 720 | } else { |
910 | dev_warn(DEV, "Could not kmalloc an udw\n"); | 721 | dev_warn(DEV, "Could not kmalloc an udw\n"); |
911 | set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags); | ||
912 | } | 722 | } |
913 | } | 723 | } |
914 | } else { | 724 | } else { |
@@ -919,6 +729,22 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, | |||
919 | } | 729 | } |
920 | } | 730 | } |
921 | 731 | ||
732 | void drbd_advance_rs_marks(struct drbd_conf *mdev, unsigned long still_to_go) | ||
733 | { | ||
734 | unsigned long now = jiffies; | ||
735 | unsigned long last = mdev->rs_mark_time[mdev->rs_last_mark]; | ||
736 | int next = (mdev->rs_last_mark + 1) % DRBD_SYNC_MARKS; | ||
737 | if (time_after_eq(now, last + DRBD_SYNC_MARK_STEP)) { | ||
738 | if (mdev->rs_mark_left[mdev->rs_last_mark] != still_to_go && | ||
739 | mdev->state.conn != C_PAUSED_SYNC_T && | ||
740 | mdev->state.conn != C_PAUSED_SYNC_S) { | ||
741 | mdev->rs_mark_time[next] = now; | ||
742 | mdev->rs_mark_left[next] = still_to_go; | ||
743 | mdev->rs_last_mark = next; | ||
744 | } | ||
745 | } | ||
746 | } | ||
747 | |||
922 | /* clear the bit corresponding to the piece of storage in question: | 748 | /* clear the bit corresponding to the piece of storage in question: |
923 | * size byte of data starting from sector. Only clear a bits of the affected | 749 | * size byte of data starting from sector. Only clear a bits of the affected |
924 | * one ore more _aligned_ BM_BLOCK_SIZE blocks. | 750 | * one ore more _aligned_ BM_BLOCK_SIZE blocks. |
@@ -936,7 +762,7 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, | |||
936 | int wake_up = 0; | 762 | int wake_up = 0; |
937 | unsigned long flags; | 763 | unsigned long flags; |
938 | 764 | ||
939 | if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { | 765 | if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) { |
940 | dev_err(DEV, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n", | 766 | dev_err(DEV, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n", |
941 | (unsigned long long)sector, size); | 767 | (unsigned long long)sector, size); |
942 | return; | 768 | return; |
@@ -969,21 +795,9 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, | |||
969 | */ | 795 | */ |
970 | count = drbd_bm_clear_bits(mdev, sbnr, ebnr); | 796 | count = drbd_bm_clear_bits(mdev, sbnr, ebnr); |
971 | if (count && get_ldev(mdev)) { | 797 | if (count && get_ldev(mdev)) { |
972 | unsigned long now = jiffies; | 798 | drbd_advance_rs_marks(mdev, drbd_bm_total_weight(mdev)); |
973 | unsigned long last = mdev->rs_mark_time[mdev->rs_last_mark]; | ||
974 | int next = (mdev->rs_last_mark + 1) % DRBD_SYNC_MARKS; | ||
975 | if (time_after_eq(now, last + DRBD_SYNC_MARK_STEP)) { | ||
976 | unsigned long tw = drbd_bm_total_weight(mdev); | ||
977 | if (mdev->rs_mark_left[mdev->rs_last_mark] != tw && | ||
978 | mdev->state.conn != C_PAUSED_SYNC_T && | ||
979 | mdev->state.conn != C_PAUSED_SYNC_S) { | ||
980 | mdev->rs_mark_time[next] = now; | ||
981 | mdev->rs_mark_left[next] = tw; | ||
982 | mdev->rs_last_mark = next; | ||
983 | } | ||
984 | } | ||
985 | spin_lock_irqsave(&mdev->al_lock, flags); | 799 | spin_lock_irqsave(&mdev->al_lock, flags); |
986 | drbd_try_clear_on_disk_bm(mdev, sector, count, TRUE); | 800 | drbd_try_clear_on_disk_bm(mdev, sector, count, true); |
987 | spin_unlock_irqrestore(&mdev->al_lock, flags); | 801 | spin_unlock_irqrestore(&mdev->al_lock, flags); |
988 | 802 | ||
989 | /* just wake_up unconditional now, various lc_chaged(), | 803 | /* just wake_up unconditional now, various lc_chaged(), |
@@ -998,27 +812,27 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, | |||
998 | /* | 812 | /* |
999 | * this is intended to set one request worth of data out of sync. | 813 | * this is intended to set one request worth of data out of sync. |
1000 | * affects at least 1 bit, | 814 | * affects at least 1 bit, |
1001 | * and at most 1+DRBD_MAX_SEGMENT_SIZE/BM_BLOCK_SIZE bits. | 815 | * and at most 1+DRBD_MAX_BIO_SIZE/BM_BLOCK_SIZE bits. |
1002 | * | 816 | * |
1003 | * called by tl_clear and drbd_send_dblock (==drbd_make_request). | 817 | * called by tl_clear and drbd_send_dblock (==drbd_make_request). |
1004 | * so this can be _any_ process. | 818 | * so this can be _any_ process. |
1005 | */ | 819 | */ |
1006 | void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, | 820 | int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, |
1007 | const char *file, const unsigned int line) | 821 | const char *file, const unsigned int line) |
1008 | { | 822 | { |
1009 | unsigned long sbnr, ebnr, lbnr, flags; | 823 | unsigned long sbnr, ebnr, lbnr, flags; |
1010 | sector_t esector, nr_sectors; | 824 | sector_t esector, nr_sectors; |
1011 | unsigned int enr, count; | 825 | unsigned int enr, count = 0; |
1012 | struct lc_element *e; | 826 | struct lc_element *e; |
1013 | 827 | ||
1014 | if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { | 828 | if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) { |
1015 | dev_err(DEV, "sector: %llus, size: %d\n", | 829 | dev_err(DEV, "sector: %llus, size: %d\n", |
1016 | (unsigned long long)sector, size); | 830 | (unsigned long long)sector, size); |
1017 | return; | 831 | return 0; |
1018 | } | 832 | } |
1019 | 833 | ||
1020 | if (!get_ldev(mdev)) | 834 | if (!get_ldev(mdev)) |
1021 | return; /* no disk, no metadata, no bitmap to set bits in */ | 835 | return 0; /* no disk, no metadata, no bitmap to set bits in */ |
1022 | 836 | ||
1023 | nr_sectors = drbd_get_capacity(mdev->this_bdev); | 837 | nr_sectors = drbd_get_capacity(mdev->this_bdev); |
1024 | esector = sector + (size >> 9) - 1; | 838 | esector = sector + (size >> 9) - 1; |
@@ -1048,6 +862,8 @@ void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, | |||
1048 | 862 | ||
1049 | out: | 863 | out: |
1050 | put_ldev(mdev); | 864 | put_ldev(mdev); |
865 | |||
866 | return count; | ||
1051 | } | 867 | } |
1052 | 868 | ||
1053 | static | 869 | static |
@@ -1128,7 +944,10 @@ int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector) | |||
1128 | unsigned int enr = BM_SECT_TO_EXT(sector); | 944 | unsigned int enr = BM_SECT_TO_EXT(sector); |
1129 | struct bm_extent *bm_ext; | 945 | struct bm_extent *bm_ext; |
1130 | int i, sig; | 946 | int i, sig; |
947 | int sa = 200; /* Step aside 200 times, then grab the extent and let app-IO wait. | ||
948 | 200 times -> 20 seconds. */ | ||
1131 | 949 | ||
950 | retry: | ||
1132 | sig = wait_event_interruptible(mdev->al_wait, | 951 | sig = wait_event_interruptible(mdev->al_wait, |
1133 | (bm_ext = _bme_get(mdev, enr))); | 952 | (bm_ext = _bme_get(mdev, enr))); |
1134 | if (sig) | 953 | if (sig) |
@@ -1139,16 +958,25 @@ int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector) | |||
1139 | 958 | ||
1140 | for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { | 959 | for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { |
1141 | sig = wait_event_interruptible(mdev->al_wait, | 960 | sig = wait_event_interruptible(mdev->al_wait, |
1142 | !_is_in_al(mdev, enr * AL_EXT_PER_BM_SECT + i)); | 961 | !_is_in_al(mdev, enr * AL_EXT_PER_BM_SECT + i) || |
1143 | if (sig) { | 962 | test_bit(BME_PRIORITY, &bm_ext->flags)); |
963 | |||
964 | if (sig || (test_bit(BME_PRIORITY, &bm_ext->flags) && sa)) { | ||
1144 | spin_lock_irq(&mdev->al_lock); | 965 | spin_lock_irq(&mdev->al_lock); |
1145 | if (lc_put(mdev->resync, &bm_ext->lce) == 0) { | 966 | if (lc_put(mdev->resync, &bm_ext->lce) == 0) { |
1146 | clear_bit(BME_NO_WRITES, &bm_ext->flags); | 967 | bm_ext->flags = 0; /* clears BME_NO_WRITES and eventually BME_PRIORITY */ |
1147 | mdev->resync_locked--; | 968 | mdev->resync_locked--; |
1148 | wake_up(&mdev->al_wait); | 969 | wake_up(&mdev->al_wait); |
1149 | } | 970 | } |
1150 | spin_unlock_irq(&mdev->al_lock); | 971 | spin_unlock_irq(&mdev->al_lock); |
1151 | return -EINTR; | 972 | if (sig) |
973 | return -EINTR; | ||
974 | if (schedule_timeout_interruptible(HZ/10)) | ||
975 | return -EINTR; | ||
976 | if (sa && --sa == 0) | ||
977 | dev_warn(DEV,"drbd_rs_begin_io() stepped aside for 20sec." | ||
978 | "Resync stalled?\n"); | ||
979 | goto retry; | ||
1152 | } | 980 | } |
1153 | } | 981 | } |
1154 | set_bit(BME_LOCKED, &bm_ext->flags); | 982 | set_bit(BME_LOCKED, &bm_ext->flags); |
@@ -1291,8 +1119,7 @@ void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector) | |||
1291 | } | 1119 | } |
1292 | 1120 | ||
1293 | if (lc_put(mdev->resync, &bm_ext->lce) == 0) { | 1121 | if (lc_put(mdev->resync, &bm_ext->lce) == 0) { |
1294 | clear_bit(BME_LOCKED, &bm_ext->flags); | 1122 | bm_ext->flags = 0; /* clear BME_LOCKED, BME_NO_WRITES and BME_PRIORITY */ |
1295 | clear_bit(BME_NO_WRITES, &bm_ext->flags); | ||
1296 | mdev->resync_locked--; | 1123 | mdev->resync_locked--; |
1297 | wake_up(&mdev->al_wait); | 1124 | wake_up(&mdev->al_wait); |
1298 | } | 1125 | } |
@@ -1383,7 +1210,7 @@ void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size) | |||
1383 | sector_t esector, nr_sectors; | 1210 | sector_t esector, nr_sectors; |
1384 | int wake_up = 0; | 1211 | int wake_up = 0; |
1385 | 1212 | ||
1386 | if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { | 1213 | if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) { |
1387 | dev_err(DEV, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n", | 1214 | dev_err(DEV, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n", |
1388 | (unsigned long long)sector, size); | 1215 | (unsigned long long)sector, size); |
1389 | return; | 1216 | return; |
@@ -1420,7 +1247,7 @@ void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size) | |||
1420 | mdev->rs_failed += count; | 1247 | mdev->rs_failed += count; |
1421 | 1248 | ||
1422 | if (get_ldev(mdev)) { | 1249 | if (get_ldev(mdev)) { |
1423 | drbd_try_clear_on_disk_bm(mdev, sector, count, FALSE); | 1250 | drbd_try_clear_on_disk_bm(mdev, sector, count, false); |
1424 | put_ldev(mdev); | 1251 | put_ldev(mdev); |
1425 | } | 1252 | } |
1426 | 1253 | ||
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 0645ca829a94..f0ae63d2df65 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c | |||
@@ -28,18 +28,58 @@ | |||
28 | #include <linux/drbd.h> | 28 | #include <linux/drbd.h> |
29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
30 | #include <asm/kmap_types.h> | 30 | #include <asm/kmap_types.h> |
31 | |||
32 | #include <asm-generic/bitops/le.h> | ||
33 | |||
31 | #include "drbd_int.h" | 34 | #include "drbd_int.h" |
32 | 35 | ||
36 | |||
33 | /* OPAQUE outside this file! | 37 | /* OPAQUE outside this file! |
34 | * interface defined in drbd_int.h | 38 | * interface defined in drbd_int.h |
35 | 39 | ||
36 | * convention: | 40 | * convention: |
37 | * function name drbd_bm_... => used elsewhere, "public". | 41 | * function name drbd_bm_... => used elsewhere, "public". |
38 | * function name bm_... => internal to implementation, "private". | 42 | * function name bm_... => internal to implementation, "private". |
43 | */ | ||
44 | |||
45 | |||
46 | /* | ||
47 | * LIMITATIONS: | ||
48 | * We want to support >= peta byte of backend storage, while for now still using | ||
49 | * a granularity of one bit per 4KiB of storage. | ||
50 | * 1 << 50 bytes backend storage (1 PiB) | ||
51 | * 1 << (50 - 12) bits needed | ||
52 | * 38 --> we need u64 to index and count bits | ||
53 | * 1 << (38 - 3) bitmap bytes needed | ||
54 | * 35 --> we still need u64 to index and count bytes | ||
55 | * (that's 32 GiB of bitmap for 1 PiB storage) | ||
56 | * 1 << (35 - 2) 32bit longs needed | ||
57 | * 33 --> we'd even need u64 to index and count 32bit long words. | ||
58 | * 1 << (35 - 3) 64bit longs needed | ||
59 | * 32 --> we could get away with a 32bit unsigned int to index and count | ||
60 | * 64bit long words, but I rather stay with unsigned long for now. | ||
61 | * We probably should neither count nor point to bytes or long words | ||
62 | * directly, but either by bitnumber, or by page index and offset. | ||
63 | * 1 << (35 - 12) | ||
64 | * 22 --> we need that much 4KiB pages of bitmap. | ||
65 | * 1 << (22 + 3) --> on a 64bit arch, | ||
66 | * we need 32 MiB to store the array of page pointers. | ||
67 | * | ||
68 | * Because I'm lazy, and because the resulting patch was too large, too ugly | ||
69 | * and still incomplete, on 32bit we still "only" support 16 TiB (minus some), | ||
70 | * (1 << 32) bits * 4k storage. | ||
71 | * | ||
39 | 72 | ||
40 | * Note that since find_first_bit returns int, at the current granularity of | 73 | * bitmap storage and IO: |
41 | * the bitmap (4KB per byte), this implementation "only" supports up to | 74 | * Bitmap is stored little endian on disk, and is kept little endian in |
42 | * 1<<(32+12) == 16 TB... | 75 | * core memory. Currently we still hold the full bitmap in core as long |
76 | * as we are "attached" to a local disk, which at 32 GiB for 1PiB storage | ||
77 | * seems excessive. | ||
78 | * | ||
79 | * We plan to reduce the amount of in-core bitmap pages by pageing them in | ||
80 | * and out against their on-disk location as necessary, but need to make | ||
81 | * sure we don't cause too much meta data IO, and must not deadlock in | ||
82 | * tight memory situations. This needs some more work. | ||
43 | */ | 83 | */ |
44 | 84 | ||
45 | /* | 85 | /* |
@@ -55,13 +95,9 @@ | |||
55 | struct drbd_bitmap { | 95 | struct drbd_bitmap { |
56 | struct page **bm_pages; | 96 | struct page **bm_pages; |
57 | spinlock_t bm_lock; | 97 | spinlock_t bm_lock; |
58 | /* WARNING unsigned long bm_*: | 98 | |
59 | * 32bit number of bit offset is just enough for 512 MB bitmap. | 99 | /* see LIMITATIONS: above */ |
60 | * it will blow up if we make the bitmap bigger... | 100 | |
61 | * not that it makes much sense to have a bitmap that large, | ||
62 | * rather change the granularity to 16k or 64k or something. | ||
63 | * (that implies other problems, however...) | ||
64 | */ | ||
65 | unsigned long bm_set; /* nr of set bits; THINK maybe atomic_t? */ | 101 | unsigned long bm_set; /* nr of set bits; THINK maybe atomic_t? */ |
66 | unsigned long bm_bits; | 102 | unsigned long bm_bits; |
67 | size_t bm_words; | 103 | size_t bm_words; |
@@ -69,29 +105,18 @@ struct drbd_bitmap { | |||
69 | sector_t bm_dev_capacity; | 105 | sector_t bm_dev_capacity; |
70 | struct mutex bm_change; /* serializes resize operations */ | 106 | struct mutex bm_change; /* serializes resize operations */ |
71 | 107 | ||
72 | atomic_t bm_async_io; | 108 | wait_queue_head_t bm_io_wait; /* used to serialize IO of single pages */ |
73 | wait_queue_head_t bm_io_wait; | ||
74 | 109 | ||
75 | unsigned long bm_flags; | 110 | enum bm_flag bm_flags; |
76 | 111 | ||
77 | /* debugging aid, in case we are still racy somewhere */ | 112 | /* debugging aid, in case we are still racy somewhere */ |
78 | char *bm_why; | 113 | char *bm_why; |
79 | struct task_struct *bm_task; | 114 | struct task_struct *bm_task; |
80 | }; | 115 | }; |
81 | 116 | ||
82 | /* definition of bits in bm_flags */ | ||
83 | #define BM_LOCKED 0 | ||
84 | #define BM_MD_IO_ERROR 1 | ||
85 | #define BM_P_VMALLOCED 2 | ||
86 | |||
87 | static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, | 117 | static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, |
88 | unsigned long e, int val, const enum km_type km); | 118 | unsigned long e, int val, const enum km_type km); |
89 | 119 | ||
90 | static int bm_is_locked(struct drbd_bitmap *b) | ||
91 | { | ||
92 | return test_bit(BM_LOCKED, &b->bm_flags); | ||
93 | } | ||
94 | |||
95 | #define bm_print_lock_info(m) __bm_print_lock_info(m, __func__) | 120 | #define bm_print_lock_info(m) __bm_print_lock_info(m, __func__) |
96 | static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func) | 121 | static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func) |
97 | { | 122 | { |
@@ -108,7 +133,7 @@ static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func) | |||
108 | b->bm_task == mdev->worker.task ? "worker" : "?"); | 133 | b->bm_task == mdev->worker.task ? "worker" : "?"); |
109 | } | 134 | } |
110 | 135 | ||
111 | void drbd_bm_lock(struct drbd_conf *mdev, char *why) | 136 | void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags) |
112 | { | 137 | { |
113 | struct drbd_bitmap *b = mdev->bitmap; | 138 | struct drbd_bitmap *b = mdev->bitmap; |
114 | int trylock_failed; | 139 | int trylock_failed; |
@@ -131,8 +156,9 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why) | |||
131 | b->bm_task == mdev->worker.task ? "worker" : "?"); | 156 | b->bm_task == mdev->worker.task ? "worker" : "?"); |
132 | mutex_lock(&b->bm_change); | 157 | mutex_lock(&b->bm_change); |
133 | } | 158 | } |
134 | if (__test_and_set_bit(BM_LOCKED, &b->bm_flags)) | 159 | if (BM_LOCKED_MASK & b->bm_flags) |
135 | dev_err(DEV, "FIXME bitmap already locked in bm_lock\n"); | 160 | dev_err(DEV, "FIXME bitmap already locked in bm_lock\n"); |
161 | b->bm_flags |= flags & BM_LOCKED_MASK; | ||
136 | 162 | ||
137 | b->bm_why = why; | 163 | b->bm_why = why; |
138 | b->bm_task = current; | 164 | b->bm_task = current; |
@@ -146,31 +172,137 @@ void drbd_bm_unlock(struct drbd_conf *mdev) | |||
146 | return; | 172 | return; |
147 | } | 173 | } |
148 | 174 | ||
149 | if (!__test_and_clear_bit(BM_LOCKED, &mdev->bitmap->bm_flags)) | 175 | if (!(BM_LOCKED_MASK & mdev->bitmap->bm_flags)) |
150 | dev_err(DEV, "FIXME bitmap not locked in bm_unlock\n"); | 176 | dev_err(DEV, "FIXME bitmap not locked in bm_unlock\n"); |
151 | 177 | ||
178 | b->bm_flags &= ~BM_LOCKED_MASK; | ||
152 | b->bm_why = NULL; | 179 | b->bm_why = NULL; |
153 | b->bm_task = NULL; | 180 | b->bm_task = NULL; |
154 | mutex_unlock(&b->bm_change); | 181 | mutex_unlock(&b->bm_change); |
155 | } | 182 | } |
156 | 183 | ||
157 | /* word offset to long pointer */ | 184 | /* we store some "meta" info about our pages in page->private */ |
158 | static unsigned long *__bm_map_paddr(struct drbd_bitmap *b, unsigned long offset, const enum km_type km) | 185 | /* at a granularity of 4k storage per bitmap bit: |
186 | * one peta byte storage: 1<<50 byte, 1<<38 * 4k storage blocks | ||
187 | * 1<<38 bits, | ||
188 | * 1<<23 4k bitmap pages. | ||
189 | * Use 24 bits as page index, covers 2 peta byte storage | ||
190 | * at a granularity of 4k per bit. | ||
191 | * Used to report the failed page idx on io error from the endio handlers. | ||
192 | */ | ||
193 | #define BM_PAGE_IDX_MASK ((1UL<<24)-1) | ||
194 | /* this page is currently read in, or written back */ | ||
195 | #define BM_PAGE_IO_LOCK 31 | ||
196 | /* if there has been an IO error for this page */ | ||
197 | #define BM_PAGE_IO_ERROR 30 | ||
198 | /* this is to be able to intelligently skip disk IO, | ||
199 | * set if bits have been set since last IO. */ | ||
200 | #define BM_PAGE_NEED_WRITEOUT 29 | ||
201 | /* to mark for lazy writeout once syncer cleared all clearable bits, | ||
202 | * we if bits have been cleared since last IO. */ | ||
203 | #define BM_PAGE_LAZY_WRITEOUT 28 | ||
204 | |||
205 | /* store_page_idx uses non-atomic assingment. It is only used directly after | ||
206 | * allocating the page. All other bm_set_page_* and bm_clear_page_* need to | ||
207 | * use atomic bit manipulation, as set_out_of_sync (and therefore bitmap | ||
208 | * changes) may happen from various contexts, and wait_on_bit/wake_up_bit | ||
209 | * requires it all to be atomic as well. */ | ||
210 | static void bm_store_page_idx(struct page *page, unsigned long idx) | ||
159 | { | 211 | { |
160 | struct page *page; | 212 | BUG_ON(0 != (idx & ~BM_PAGE_IDX_MASK)); |
161 | unsigned long page_nr; | 213 | page_private(page) |= idx; |
214 | } | ||
215 | |||
216 | static unsigned long bm_page_to_idx(struct page *page) | ||
217 | { | ||
218 | return page_private(page) & BM_PAGE_IDX_MASK; | ||
219 | } | ||
220 | |||
221 | /* As is very unlikely that the same page is under IO from more than one | ||
222 | * context, we can get away with a bit per page and one wait queue per bitmap. | ||
223 | */ | ||
224 | static void bm_page_lock_io(struct drbd_conf *mdev, int page_nr) | ||
225 | { | ||
226 | struct drbd_bitmap *b = mdev->bitmap; | ||
227 | void *addr = &page_private(b->bm_pages[page_nr]); | ||
228 | wait_event(b->bm_io_wait, !test_and_set_bit(BM_PAGE_IO_LOCK, addr)); | ||
229 | } | ||
230 | |||
231 | static void bm_page_unlock_io(struct drbd_conf *mdev, int page_nr) | ||
232 | { | ||
233 | struct drbd_bitmap *b = mdev->bitmap; | ||
234 | void *addr = &page_private(b->bm_pages[page_nr]); | ||
235 | clear_bit(BM_PAGE_IO_LOCK, addr); | ||
236 | smp_mb__after_clear_bit(); | ||
237 | wake_up(&mdev->bitmap->bm_io_wait); | ||
238 | } | ||
239 | |||
240 | /* set _before_ submit_io, so it may be reset due to being changed | ||
241 | * while this page is in flight... will get submitted later again */ | ||
242 | static void bm_set_page_unchanged(struct page *page) | ||
243 | { | ||
244 | /* use cmpxchg? */ | ||
245 | clear_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page)); | ||
246 | clear_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page)); | ||
247 | } | ||
162 | 248 | ||
249 | static void bm_set_page_need_writeout(struct page *page) | ||
250 | { | ||
251 | set_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page)); | ||
252 | } | ||
253 | |||
254 | static int bm_test_page_unchanged(struct page *page) | ||
255 | { | ||
256 | volatile const unsigned long *addr = &page_private(page); | ||
257 | return (*addr & ((1UL<<BM_PAGE_NEED_WRITEOUT)|(1UL<<BM_PAGE_LAZY_WRITEOUT))) == 0; | ||
258 | } | ||
259 | |||
260 | static void bm_set_page_io_err(struct page *page) | ||
261 | { | ||
262 | set_bit(BM_PAGE_IO_ERROR, &page_private(page)); | ||
263 | } | ||
264 | |||
265 | static void bm_clear_page_io_err(struct page *page) | ||
266 | { | ||
267 | clear_bit(BM_PAGE_IO_ERROR, &page_private(page)); | ||
268 | } | ||
269 | |||
270 | static void bm_set_page_lazy_writeout(struct page *page) | ||
271 | { | ||
272 | set_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page)); | ||
273 | } | ||
274 | |||
275 | static int bm_test_page_lazy_writeout(struct page *page) | ||
276 | { | ||
277 | return test_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page)); | ||
278 | } | ||
279 | |||
280 | /* on a 32bit box, this would allow for exactly (2<<38) bits. */ | ||
281 | static unsigned int bm_word_to_page_idx(struct drbd_bitmap *b, unsigned long long_nr) | ||
282 | { | ||
163 | /* page_nr = (word*sizeof(long)) >> PAGE_SHIFT; */ | 283 | /* page_nr = (word*sizeof(long)) >> PAGE_SHIFT; */ |
164 | page_nr = offset >> (PAGE_SHIFT - LN2_BPL + 3); | 284 | unsigned int page_nr = long_nr >> (PAGE_SHIFT - LN2_BPL + 3); |
165 | BUG_ON(page_nr >= b->bm_number_of_pages); | 285 | BUG_ON(page_nr >= b->bm_number_of_pages); |
166 | page = b->bm_pages[page_nr]; | 286 | return page_nr; |
287 | } | ||
167 | 288 | ||
289 | static unsigned int bm_bit_to_page_idx(struct drbd_bitmap *b, u64 bitnr) | ||
290 | { | ||
291 | /* page_nr = (bitnr/8) >> PAGE_SHIFT; */ | ||
292 | unsigned int page_nr = bitnr >> (PAGE_SHIFT + 3); | ||
293 | BUG_ON(page_nr >= b->bm_number_of_pages); | ||
294 | return page_nr; | ||
295 | } | ||
296 | |||
297 | static unsigned long *__bm_map_pidx(struct drbd_bitmap *b, unsigned int idx, const enum km_type km) | ||
298 | { | ||
299 | struct page *page = b->bm_pages[idx]; | ||
168 | return (unsigned long *) kmap_atomic(page, km); | 300 | return (unsigned long *) kmap_atomic(page, km); |
169 | } | 301 | } |
170 | 302 | ||
171 | static unsigned long * bm_map_paddr(struct drbd_bitmap *b, unsigned long offset) | 303 | static unsigned long *bm_map_pidx(struct drbd_bitmap *b, unsigned int idx) |
172 | { | 304 | { |
173 | return __bm_map_paddr(b, offset, KM_IRQ1); | 305 | return __bm_map_pidx(b, idx, KM_IRQ1); |
174 | } | 306 | } |
175 | 307 | ||
176 | static void __bm_unmap(unsigned long *p_addr, const enum km_type km) | 308 | static void __bm_unmap(unsigned long *p_addr, const enum km_type km) |
@@ -202,6 +334,7 @@ static void bm_unmap(unsigned long *p_addr) | |||
202 | * to be able to report device specific. | 334 | * to be able to report device specific. |
203 | */ | 335 | */ |
204 | 336 | ||
337 | |||
205 | static void bm_free_pages(struct page **pages, unsigned long number) | 338 | static void bm_free_pages(struct page **pages, unsigned long number) |
206 | { | 339 | { |
207 | unsigned long i; | 340 | unsigned long i; |
@@ -269,6 +402,9 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) | |||
269 | bm_vk_free(new_pages, vmalloced); | 402 | bm_vk_free(new_pages, vmalloced); |
270 | return NULL; | 403 | return NULL; |
271 | } | 404 | } |
405 | /* we want to know which page it is | ||
406 | * from the endio handlers */ | ||
407 | bm_store_page_idx(page, i); | ||
272 | new_pages[i] = page; | 408 | new_pages[i] = page; |
273 | } | 409 | } |
274 | } else { | 410 | } else { |
@@ -280,9 +416,9 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) | |||
280 | } | 416 | } |
281 | 417 | ||
282 | if (vmalloced) | 418 | if (vmalloced) |
283 | set_bit(BM_P_VMALLOCED, &b->bm_flags); | 419 | b->bm_flags |= BM_P_VMALLOCED; |
284 | else | 420 | else |
285 | clear_bit(BM_P_VMALLOCED, &b->bm_flags); | 421 | b->bm_flags &= ~BM_P_VMALLOCED; |
286 | 422 | ||
287 | return new_pages; | 423 | return new_pages; |
288 | } | 424 | } |
@@ -319,7 +455,7 @@ void drbd_bm_cleanup(struct drbd_conf *mdev) | |||
319 | { | 455 | { |
320 | ERR_IF (!mdev->bitmap) return; | 456 | ERR_IF (!mdev->bitmap) return; |
321 | bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages); | 457 | bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages); |
322 | bm_vk_free(mdev->bitmap->bm_pages, test_bit(BM_P_VMALLOCED, &mdev->bitmap->bm_flags)); | 458 | bm_vk_free(mdev->bitmap->bm_pages, (BM_P_VMALLOCED & mdev->bitmap->bm_flags)); |
323 | kfree(mdev->bitmap); | 459 | kfree(mdev->bitmap); |
324 | mdev->bitmap = NULL; | 460 | mdev->bitmap = NULL; |
325 | } | 461 | } |
@@ -329,22 +465,39 @@ void drbd_bm_cleanup(struct drbd_conf *mdev) | |||
329 | * this masks out the remaining bits. | 465 | * this masks out the remaining bits. |
330 | * Returns the number of bits cleared. | 466 | * Returns the number of bits cleared. |
331 | */ | 467 | */ |
468 | #define BITS_PER_PAGE (1UL << (PAGE_SHIFT + 3)) | ||
469 | #define BITS_PER_PAGE_MASK (BITS_PER_PAGE - 1) | ||
470 | #define BITS_PER_LONG_MASK (BITS_PER_LONG - 1) | ||
332 | static int bm_clear_surplus(struct drbd_bitmap *b) | 471 | static int bm_clear_surplus(struct drbd_bitmap *b) |
333 | { | 472 | { |
334 | const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) - 1; | 473 | unsigned long mask; |
335 | size_t w = b->bm_bits >> LN2_BPL; | ||
336 | int cleared = 0; | ||
337 | unsigned long *p_addr, *bm; | 474 | unsigned long *p_addr, *bm; |
475 | int tmp; | ||
476 | int cleared = 0; | ||
338 | 477 | ||
339 | p_addr = bm_map_paddr(b, w); | 478 | /* number of bits modulo bits per page */ |
340 | bm = p_addr + MLPP(w); | 479 | tmp = (b->bm_bits & BITS_PER_PAGE_MASK); |
341 | if (w < b->bm_words) { | 480 | /* mask the used bits of the word containing the last bit */ |
481 | mask = (1UL << (tmp & BITS_PER_LONG_MASK)) -1; | ||
482 | /* bitmap is always stored little endian, | ||
483 | * on disk and in core memory alike */ | ||
484 | mask = cpu_to_lel(mask); | ||
485 | |||
486 | p_addr = bm_map_pidx(b, b->bm_number_of_pages - 1); | ||
487 | bm = p_addr + (tmp/BITS_PER_LONG); | ||
488 | if (mask) { | ||
489 | /* If mask != 0, we are not exactly aligned, so bm now points | ||
490 | * to the long containing the last bit. | ||
491 | * If mask == 0, bm already points to the word immediately | ||
492 | * after the last (long word aligned) bit. */ | ||
342 | cleared = hweight_long(*bm & ~mask); | 493 | cleared = hweight_long(*bm & ~mask); |
343 | *bm &= mask; | 494 | *bm &= mask; |
344 | w++; bm++; | 495 | bm++; |
345 | } | 496 | } |
346 | 497 | ||
347 | if (w < b->bm_words) { | 498 | if (BITS_PER_LONG == 32 && ((bm - p_addr) & 1) == 1) { |
499 | /* on a 32bit arch, we may need to zero out | ||
500 | * a padding long to align with a 64bit remote */ | ||
348 | cleared += hweight_long(*bm); | 501 | cleared += hweight_long(*bm); |
349 | *bm = 0; | 502 | *bm = 0; |
350 | } | 503 | } |
@@ -354,66 +507,75 @@ static int bm_clear_surplus(struct drbd_bitmap *b) | |||
354 | 507 | ||
355 | static void bm_set_surplus(struct drbd_bitmap *b) | 508 | static void bm_set_surplus(struct drbd_bitmap *b) |
356 | { | 509 | { |
357 | const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) - 1; | 510 | unsigned long mask; |
358 | size_t w = b->bm_bits >> LN2_BPL; | ||
359 | unsigned long *p_addr, *bm; | 511 | unsigned long *p_addr, *bm; |
360 | 512 | int tmp; | |
361 | p_addr = bm_map_paddr(b, w); | 513 | |
362 | bm = p_addr + MLPP(w); | 514 | /* number of bits modulo bits per page */ |
363 | if (w < b->bm_words) { | 515 | tmp = (b->bm_bits & BITS_PER_PAGE_MASK); |
516 | /* mask the used bits of the word containing the last bit */ | ||
517 | mask = (1UL << (tmp & BITS_PER_LONG_MASK)) -1; | ||
518 | /* bitmap is always stored little endian, | ||
519 | * on disk and in core memory alike */ | ||
520 | mask = cpu_to_lel(mask); | ||
521 | |||
522 | p_addr = bm_map_pidx(b, b->bm_number_of_pages - 1); | ||
523 | bm = p_addr + (tmp/BITS_PER_LONG); | ||
524 | if (mask) { | ||
525 | /* If mask != 0, we are not exactly aligned, so bm now points | ||
526 | * to the long containing the last bit. | ||
527 | * If mask == 0, bm already points to the word immediately | ||
528 | * after the last (long word aligned) bit. */ | ||
364 | *bm |= ~mask; | 529 | *bm |= ~mask; |
365 | bm++; w++; | 530 | bm++; |
366 | } | 531 | } |
367 | 532 | ||
368 | if (w < b->bm_words) { | 533 | if (BITS_PER_LONG == 32 && ((bm - p_addr) & 1) == 1) { |
369 | *bm = ~(0UL); | 534 | /* on a 32bit arch, we may need to zero out |
535 | * a padding long to align with a 64bit remote */ | ||
536 | *bm = ~0UL; | ||
370 | } | 537 | } |
371 | bm_unmap(p_addr); | 538 | bm_unmap(p_addr); |
372 | } | 539 | } |
373 | 540 | ||
374 | static unsigned long __bm_count_bits(struct drbd_bitmap *b, const int swap_endian) | 541 | /* you better not modify the bitmap while this is running, |
542 | * or its results will be stale */ | ||
543 | static unsigned long bm_count_bits(struct drbd_bitmap *b) | ||
375 | { | 544 | { |
376 | unsigned long *p_addr, *bm, offset = 0; | 545 | unsigned long *p_addr; |
377 | unsigned long bits = 0; | 546 | unsigned long bits = 0; |
378 | unsigned long i, do_now; | 547 | unsigned long mask = (1UL << (b->bm_bits & BITS_PER_LONG_MASK)) -1; |
379 | 548 | int idx, i, last_word; | |
380 | while (offset < b->bm_words) { | 549 | |
381 | i = do_now = min_t(size_t, b->bm_words-offset, LWPP); | 550 | /* all but last page */ |
382 | p_addr = __bm_map_paddr(b, offset, KM_USER0); | 551 | for (idx = 0; idx < b->bm_number_of_pages - 1; idx++) { |
383 | bm = p_addr + MLPP(offset); | 552 | p_addr = __bm_map_pidx(b, idx, KM_USER0); |
384 | while (i--) { | 553 | for (i = 0; i < LWPP; i++) |
385 | #ifndef __LITTLE_ENDIAN | 554 | bits += hweight_long(p_addr[i]); |
386 | if (swap_endian) | ||
387 | *bm = lel_to_cpu(*bm); | ||
388 | #endif | ||
389 | bits += hweight_long(*bm++); | ||
390 | } | ||
391 | __bm_unmap(p_addr, KM_USER0); | 555 | __bm_unmap(p_addr, KM_USER0); |
392 | offset += do_now; | ||
393 | cond_resched(); | 556 | cond_resched(); |
394 | } | 557 | } |
395 | 558 | /* last (or only) page */ | |
559 | last_word = ((b->bm_bits - 1) & BITS_PER_PAGE_MASK) >> LN2_BPL; | ||
560 | p_addr = __bm_map_pidx(b, idx, KM_USER0); | ||
561 | for (i = 0; i < last_word; i++) | ||
562 | bits += hweight_long(p_addr[i]); | ||
563 | p_addr[last_word] &= cpu_to_lel(mask); | ||
564 | bits += hweight_long(p_addr[last_word]); | ||
565 | /* 32bit arch, may have an unused padding long */ | ||
566 | if (BITS_PER_LONG == 32 && (last_word & 1) == 0) | ||
567 | p_addr[last_word+1] = 0; | ||
568 | __bm_unmap(p_addr, KM_USER0); | ||
396 | return bits; | 569 | return bits; |
397 | } | 570 | } |
398 | 571 | ||
399 | static unsigned long bm_count_bits(struct drbd_bitmap *b) | ||
400 | { | ||
401 | return __bm_count_bits(b, 0); | ||
402 | } | ||
403 | |||
404 | static unsigned long bm_count_bits_swap_endian(struct drbd_bitmap *b) | ||
405 | { | ||
406 | return __bm_count_bits(b, 1); | ||
407 | } | ||
408 | |||
409 | /* offset and len in long words.*/ | 572 | /* offset and len in long words.*/ |
410 | static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len) | 573 | static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len) |
411 | { | 574 | { |
412 | unsigned long *p_addr, *bm; | 575 | unsigned long *p_addr, *bm; |
576 | unsigned int idx; | ||
413 | size_t do_now, end; | 577 | size_t do_now, end; |
414 | 578 | ||
415 | #define BM_SECTORS_PER_BIT (BM_BLOCK_SIZE/512) | ||
416 | |||
417 | end = offset + len; | 579 | end = offset + len; |
418 | 580 | ||
419 | if (end > b->bm_words) { | 581 | if (end > b->bm_words) { |
@@ -423,15 +585,16 @@ static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len) | |||
423 | 585 | ||
424 | while (offset < end) { | 586 | while (offset < end) { |
425 | do_now = min_t(size_t, ALIGN(offset + 1, LWPP), end) - offset; | 587 | do_now = min_t(size_t, ALIGN(offset + 1, LWPP), end) - offset; |
426 | p_addr = bm_map_paddr(b, offset); | 588 | idx = bm_word_to_page_idx(b, offset); |
589 | p_addr = bm_map_pidx(b, idx); | ||
427 | bm = p_addr + MLPP(offset); | 590 | bm = p_addr + MLPP(offset); |
428 | if (bm+do_now > p_addr + LWPP) { | 591 | if (bm+do_now > p_addr + LWPP) { |
429 | printk(KERN_ALERT "drbd: BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n", | 592 | printk(KERN_ALERT "drbd: BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n", |
430 | p_addr, bm, (int)do_now); | 593 | p_addr, bm, (int)do_now); |
431 | break; /* breaks to after catch_oob_access_end() only! */ | 594 | } else |
432 | } | 595 | memset(bm, c, do_now * sizeof(long)); |
433 | memset(bm, c, do_now * sizeof(long)); | ||
434 | bm_unmap(p_addr); | 596 | bm_unmap(p_addr); |
597 | bm_set_page_need_writeout(b->bm_pages[idx]); | ||
435 | offset += do_now; | 598 | offset += do_now; |
436 | } | 599 | } |
437 | } | 600 | } |
@@ -447,7 +610,7 @@ static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len) | |||
447 | int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) | 610 | int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) |
448 | { | 611 | { |
449 | struct drbd_bitmap *b = mdev->bitmap; | 612 | struct drbd_bitmap *b = mdev->bitmap; |
450 | unsigned long bits, words, owords, obits, *p_addr, *bm; | 613 | unsigned long bits, words, owords, obits; |
451 | unsigned long want, have, onpages; /* number of pages */ | 614 | unsigned long want, have, onpages; /* number of pages */ |
452 | struct page **npages, **opages = NULL; | 615 | struct page **npages, **opages = NULL; |
453 | int err = 0, growing; | 616 | int err = 0, growing; |
@@ -455,7 +618,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) | |||
455 | 618 | ||
456 | ERR_IF(!b) return -ENOMEM; | 619 | ERR_IF(!b) return -ENOMEM; |
457 | 620 | ||
458 | drbd_bm_lock(mdev, "resize"); | 621 | drbd_bm_lock(mdev, "resize", BM_LOCKED_MASK); |
459 | 622 | ||
460 | dev_info(DEV, "drbd_bm_resize called with capacity == %llu\n", | 623 | dev_info(DEV, "drbd_bm_resize called with capacity == %llu\n", |
461 | (unsigned long long)capacity); | 624 | (unsigned long long)capacity); |
@@ -463,7 +626,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) | |||
463 | if (capacity == b->bm_dev_capacity) | 626 | if (capacity == b->bm_dev_capacity) |
464 | goto out; | 627 | goto out; |
465 | 628 | ||
466 | opages_vmalloced = test_bit(BM_P_VMALLOCED, &b->bm_flags); | 629 | opages_vmalloced = (BM_P_VMALLOCED & b->bm_flags); |
467 | 630 | ||
468 | if (capacity == 0) { | 631 | if (capacity == 0) { |
469 | spin_lock_irq(&b->bm_lock); | 632 | spin_lock_irq(&b->bm_lock); |
@@ -491,18 +654,23 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) | |||
491 | words = ALIGN(bits, 64) >> LN2_BPL; | 654 | words = ALIGN(bits, 64) >> LN2_BPL; |
492 | 655 | ||
493 | if (get_ldev(mdev)) { | 656 | if (get_ldev(mdev)) { |
494 | D_ASSERT((u64)bits <= (((u64)mdev->ldev->md.md_size_sect-MD_BM_OFFSET) << 12)); | 657 | u64 bits_on_disk = ((u64)mdev->ldev->md.md_size_sect-MD_BM_OFFSET) << 12; |
495 | put_ldev(mdev); | 658 | put_ldev(mdev); |
659 | if (bits > bits_on_disk) { | ||
660 | dev_info(DEV, "bits = %lu\n", bits); | ||
661 | dev_info(DEV, "bits_on_disk = %llu\n", bits_on_disk); | ||
662 | err = -ENOSPC; | ||
663 | goto out; | ||
664 | } | ||
496 | } | 665 | } |
497 | 666 | ||
498 | /* one extra long to catch off by one errors */ | 667 | want = ALIGN(words*sizeof(long), PAGE_SIZE) >> PAGE_SHIFT; |
499 | want = ALIGN((words+1)*sizeof(long), PAGE_SIZE) >> PAGE_SHIFT; | ||
500 | have = b->bm_number_of_pages; | 668 | have = b->bm_number_of_pages; |
501 | if (want == have) { | 669 | if (want == have) { |
502 | D_ASSERT(b->bm_pages != NULL); | 670 | D_ASSERT(b->bm_pages != NULL); |
503 | npages = b->bm_pages; | 671 | npages = b->bm_pages; |
504 | } else { | 672 | } else { |
505 | if (FAULT_ACTIVE(mdev, DRBD_FAULT_BM_ALLOC)) | 673 | if (drbd_insert_fault(mdev, DRBD_FAULT_BM_ALLOC)) |
506 | npages = NULL; | 674 | npages = NULL; |
507 | else | 675 | else |
508 | npages = bm_realloc_pages(b, want); | 676 | npages = bm_realloc_pages(b, want); |
@@ -542,11 +710,6 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) | |||
542 | bm_free_pages(opages + want, have - want); | 710 | bm_free_pages(opages + want, have - want); |
543 | } | 711 | } |
544 | 712 | ||
545 | p_addr = bm_map_paddr(b, words); | ||
546 | bm = p_addr + MLPP(words); | ||
547 | *bm = DRBD_MAGIC; | ||
548 | bm_unmap(p_addr); | ||
549 | |||
550 | (void)bm_clear_surplus(b); | 713 | (void)bm_clear_surplus(b); |
551 | 714 | ||
552 | spin_unlock_irq(&b->bm_lock); | 715 | spin_unlock_irq(&b->bm_lock); |
@@ -554,7 +717,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) | |||
554 | bm_vk_free(opages, opages_vmalloced); | 717 | bm_vk_free(opages, opages_vmalloced); |
555 | if (!growing) | 718 | if (!growing) |
556 | b->bm_set = bm_count_bits(b); | 719 | b->bm_set = bm_count_bits(b); |
557 | dev_info(DEV, "resync bitmap: bits=%lu words=%lu\n", bits, words); | 720 | dev_info(DEV, "resync bitmap: bits=%lu words=%lu pages=%lu\n", bits, words, want); |
558 | 721 | ||
559 | out: | 722 | out: |
560 | drbd_bm_unlock(mdev); | 723 | drbd_bm_unlock(mdev); |
@@ -624,6 +787,7 @@ void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number, | |||
624 | struct drbd_bitmap *b = mdev->bitmap; | 787 | struct drbd_bitmap *b = mdev->bitmap; |
625 | unsigned long *p_addr, *bm; | 788 | unsigned long *p_addr, *bm; |
626 | unsigned long word, bits; | 789 | unsigned long word, bits; |
790 | unsigned int idx; | ||
627 | size_t end, do_now; | 791 | size_t end, do_now; |
628 | 792 | ||
629 | end = offset + number; | 793 | end = offset + number; |
@@ -638,16 +802,18 @@ void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number, | |||
638 | spin_lock_irq(&b->bm_lock); | 802 | spin_lock_irq(&b->bm_lock); |
639 | while (offset < end) { | 803 | while (offset < end) { |
640 | do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset; | 804 | do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset; |
641 | p_addr = bm_map_paddr(b, offset); | 805 | idx = bm_word_to_page_idx(b, offset); |
806 | p_addr = bm_map_pidx(b, idx); | ||
642 | bm = p_addr + MLPP(offset); | 807 | bm = p_addr + MLPP(offset); |
643 | offset += do_now; | 808 | offset += do_now; |
644 | while (do_now--) { | 809 | while (do_now--) { |
645 | bits = hweight_long(*bm); | 810 | bits = hweight_long(*bm); |
646 | word = *bm | lel_to_cpu(*buffer++); | 811 | word = *bm | *buffer++; |
647 | *bm++ = word; | 812 | *bm++ = word; |
648 | b->bm_set += hweight_long(word) - bits; | 813 | b->bm_set += hweight_long(word) - bits; |
649 | } | 814 | } |
650 | bm_unmap(p_addr); | 815 | bm_unmap(p_addr); |
816 | bm_set_page_need_writeout(b->bm_pages[idx]); | ||
651 | } | 817 | } |
652 | /* with 32bit <-> 64bit cross-platform connect | 818 | /* with 32bit <-> 64bit cross-platform connect |
653 | * this is only correct for current usage, | 819 | * this is only correct for current usage, |
@@ -656,7 +822,6 @@ void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number, | |||
656 | */ | 822 | */ |
657 | if (end == b->bm_words) | 823 | if (end == b->bm_words) |
658 | b->bm_set -= bm_clear_surplus(b); | 824 | b->bm_set -= bm_clear_surplus(b); |
659 | |||
660 | spin_unlock_irq(&b->bm_lock); | 825 | spin_unlock_irq(&b->bm_lock); |
661 | } | 826 | } |
662 | 827 | ||
@@ -686,11 +851,11 @@ void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number, | |||
686 | else { | 851 | else { |
687 | while (offset < end) { | 852 | while (offset < end) { |
688 | do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset; | 853 | do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset; |
689 | p_addr = bm_map_paddr(b, offset); | 854 | p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, offset)); |
690 | bm = p_addr + MLPP(offset); | 855 | bm = p_addr + MLPP(offset); |
691 | offset += do_now; | 856 | offset += do_now; |
692 | while (do_now--) | 857 | while (do_now--) |
693 | *buffer++ = cpu_to_lel(*bm++); | 858 | *buffer++ = *bm++; |
694 | bm_unmap(p_addr); | 859 | bm_unmap(p_addr); |
695 | } | 860 | } |
696 | } | 861 | } |
@@ -724,9 +889,22 @@ void drbd_bm_clear_all(struct drbd_conf *mdev) | |||
724 | spin_unlock_irq(&b->bm_lock); | 889 | spin_unlock_irq(&b->bm_lock); |
725 | } | 890 | } |
726 | 891 | ||
892 | struct bm_aio_ctx { | ||
893 | struct drbd_conf *mdev; | ||
894 | atomic_t in_flight; | ||
895 | struct completion done; | ||
896 | unsigned flags; | ||
897 | #define BM_AIO_COPY_PAGES 1 | ||
898 | int error; | ||
899 | }; | ||
900 | |||
901 | /* bv_page may be a copy, or may be the original */ | ||
727 | static void bm_async_io_complete(struct bio *bio, int error) | 902 | static void bm_async_io_complete(struct bio *bio, int error) |
728 | { | 903 | { |
729 | struct drbd_bitmap *b = bio->bi_private; | 904 | struct bm_aio_ctx *ctx = bio->bi_private; |
905 | struct drbd_conf *mdev = ctx->mdev; | ||
906 | struct drbd_bitmap *b = mdev->bitmap; | ||
907 | unsigned int idx = bm_page_to_idx(bio->bi_io_vec[0].bv_page); | ||
730 | int uptodate = bio_flagged(bio, BIO_UPTODATE); | 908 | int uptodate = bio_flagged(bio, BIO_UPTODATE); |
731 | 909 | ||
732 | 910 | ||
@@ -737,38 +915,83 @@ static void bm_async_io_complete(struct bio *bio, int error) | |||
737 | if (!error && !uptodate) | 915 | if (!error && !uptodate) |
738 | error = -EIO; | 916 | error = -EIO; |
739 | 917 | ||
918 | if ((ctx->flags & BM_AIO_COPY_PAGES) == 0 && | ||
919 | !bm_test_page_unchanged(b->bm_pages[idx])) | ||
920 | dev_warn(DEV, "bitmap page idx %u changed during IO!\n", idx); | ||
921 | |||
740 | if (error) { | 922 | if (error) { |
741 | /* doh. what now? | 923 | /* ctx error will hold the completed-last non-zero error code, |
742 | * for now, set all bits, and flag MD_IO_ERROR */ | 924 | * in case error codes differ. */ |
743 | __set_bit(BM_MD_IO_ERROR, &b->bm_flags); | 925 | ctx->error = error; |
926 | bm_set_page_io_err(b->bm_pages[idx]); | ||
927 | /* Not identical to on disk version of it. | ||
928 | * Is BM_PAGE_IO_ERROR enough? */ | ||
929 | if (__ratelimit(&drbd_ratelimit_state)) | ||
930 | dev_err(DEV, "IO ERROR %d on bitmap page idx %u\n", | ||
931 | error, idx); | ||
932 | } else { | ||
933 | bm_clear_page_io_err(b->bm_pages[idx]); | ||
934 | dynamic_dev_dbg(DEV, "bitmap page idx %u completed\n", idx); | ||
744 | } | 935 | } |
745 | if (atomic_dec_and_test(&b->bm_async_io)) | 936 | |
746 | wake_up(&b->bm_io_wait); | 937 | bm_page_unlock_io(mdev, idx); |
938 | |||
939 | /* FIXME give back to page pool */ | ||
940 | if (ctx->flags & BM_AIO_COPY_PAGES) | ||
941 | put_page(bio->bi_io_vec[0].bv_page); | ||
747 | 942 | ||
748 | bio_put(bio); | 943 | bio_put(bio); |
944 | |||
945 | if (atomic_dec_and_test(&ctx->in_flight)) | ||
946 | complete(&ctx->done); | ||
749 | } | 947 | } |
750 | 948 | ||
751 | static void bm_page_io_async(struct drbd_conf *mdev, struct drbd_bitmap *b, int page_nr, int rw) __must_hold(local) | 949 | static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local) |
752 | { | 950 | { |
753 | /* we are process context. we always get a bio */ | 951 | /* we are process context. we always get a bio */ |
754 | struct bio *bio = bio_alloc(GFP_KERNEL, 1); | 952 | struct bio *bio = bio_alloc(GFP_KERNEL, 1); |
953 | struct drbd_conf *mdev = ctx->mdev; | ||
954 | struct drbd_bitmap *b = mdev->bitmap; | ||
955 | struct page *page; | ||
755 | unsigned int len; | 956 | unsigned int len; |
957 | |||
756 | sector_t on_disk_sector = | 958 | sector_t on_disk_sector = |
757 | mdev->ldev->md.md_offset + mdev->ldev->md.bm_offset; | 959 | mdev->ldev->md.md_offset + mdev->ldev->md.bm_offset; |
758 | on_disk_sector += ((sector_t)page_nr) << (PAGE_SHIFT-9); | 960 | on_disk_sector += ((sector_t)page_nr) << (PAGE_SHIFT-9); |
759 | 961 | ||
760 | /* this might happen with very small | 962 | /* this might happen with very small |
761 | * flexible external meta data device */ | 963 | * flexible external meta data device, |
964 | * or with PAGE_SIZE > 4k */ | ||
762 | len = min_t(unsigned int, PAGE_SIZE, | 965 | len = min_t(unsigned int, PAGE_SIZE, |
763 | (drbd_md_last_sector(mdev->ldev) - on_disk_sector + 1)<<9); | 966 | (drbd_md_last_sector(mdev->ldev) - on_disk_sector + 1)<<9); |
764 | 967 | ||
968 | /* serialize IO on this page */ | ||
969 | bm_page_lock_io(mdev, page_nr); | ||
970 | /* before memcpy and submit, | ||
971 | * so it can be redirtied any time */ | ||
972 | bm_set_page_unchanged(b->bm_pages[page_nr]); | ||
973 | |||
974 | if (ctx->flags & BM_AIO_COPY_PAGES) { | ||
975 | /* FIXME alloc_page is good enough for now, but actually needs | ||
976 | * to use pre-allocated page pool */ | ||
977 | void *src, *dest; | ||
978 | page = alloc_page(__GFP_HIGHMEM|__GFP_WAIT); | ||
979 | dest = kmap_atomic(page, KM_USER0); | ||
980 | src = kmap_atomic(b->bm_pages[page_nr], KM_USER1); | ||
981 | memcpy(dest, src, PAGE_SIZE); | ||
982 | kunmap_atomic(src, KM_USER1); | ||
983 | kunmap_atomic(dest, KM_USER0); | ||
984 | bm_store_page_idx(page, page_nr); | ||
985 | } else | ||
986 | page = b->bm_pages[page_nr]; | ||
987 | |||
765 | bio->bi_bdev = mdev->ldev->md_bdev; | 988 | bio->bi_bdev = mdev->ldev->md_bdev; |
766 | bio->bi_sector = on_disk_sector; | 989 | bio->bi_sector = on_disk_sector; |
767 | bio_add_page(bio, b->bm_pages[page_nr], len, 0); | 990 | bio_add_page(bio, page, len, 0); |
768 | bio->bi_private = b; | 991 | bio->bi_private = ctx; |
769 | bio->bi_end_io = bm_async_io_complete; | 992 | bio->bi_end_io = bm_async_io_complete; |
770 | 993 | ||
771 | if (FAULT_ACTIVE(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) { | 994 | if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) { |
772 | bio->bi_rw |= rw; | 995 | bio->bi_rw |= rw; |
773 | bio_endio(bio, -EIO); | 996 | bio_endio(bio, -EIO); |
774 | } else { | 997 | } else { |
@@ -776,87 +999,84 @@ static void bm_page_io_async(struct drbd_conf *mdev, struct drbd_bitmap *b, int | |||
776 | } | 999 | } |
777 | } | 1000 | } |
778 | 1001 | ||
779 | # if defined(__LITTLE_ENDIAN) | ||
780 | /* nothing to do, on disk == in memory */ | ||
781 | # define bm_cpu_to_lel(x) ((void)0) | ||
782 | # else | ||
783 | static void bm_cpu_to_lel(struct drbd_bitmap *b) | ||
784 | { | ||
785 | /* need to cpu_to_lel all the pages ... | ||
786 | * this may be optimized by using | ||
787 | * cpu_to_lel(-1) == -1 and cpu_to_lel(0) == 0; | ||
788 | * the following is still not optimal, but better than nothing */ | ||
789 | unsigned int i; | ||
790 | unsigned long *p_addr, *bm; | ||
791 | if (b->bm_set == 0) { | ||
792 | /* no page at all; avoid swap if all is 0 */ | ||
793 | i = b->bm_number_of_pages; | ||
794 | } else if (b->bm_set == b->bm_bits) { | ||
795 | /* only the last page */ | ||
796 | i = b->bm_number_of_pages - 1; | ||
797 | } else { | ||
798 | /* all pages */ | ||
799 | i = 0; | ||
800 | } | ||
801 | for (; i < b->bm_number_of_pages; i++) { | ||
802 | p_addr = kmap_atomic(b->bm_pages[i], KM_USER0); | ||
803 | for (bm = p_addr; bm < p_addr + PAGE_SIZE/sizeof(long); bm++) | ||
804 | *bm = cpu_to_lel(*bm); | ||
805 | kunmap_atomic(p_addr, KM_USER0); | ||
806 | } | ||
807 | } | ||
808 | # endif | ||
809 | /* lel_to_cpu == cpu_to_lel */ | ||
810 | # define bm_lel_to_cpu(x) bm_cpu_to_lel(x) | ||
811 | |||
812 | /* | 1002 | /* |
813 | * bm_rw: read/write the whole bitmap from/to its on disk location. | 1003 | * bm_rw: read/write the whole bitmap from/to its on disk location. |
814 | */ | 1004 | */ |
815 | static int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local) | 1005 | static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_idx) __must_hold(local) |
816 | { | 1006 | { |
1007 | struct bm_aio_ctx ctx = { | ||
1008 | .mdev = mdev, | ||
1009 | .in_flight = ATOMIC_INIT(1), | ||
1010 | .done = COMPLETION_INITIALIZER_ONSTACK(ctx.done), | ||
1011 | .flags = lazy_writeout_upper_idx ? BM_AIO_COPY_PAGES : 0, | ||
1012 | }; | ||
817 | struct drbd_bitmap *b = mdev->bitmap; | 1013 | struct drbd_bitmap *b = mdev->bitmap; |
818 | /* sector_t sector; */ | 1014 | int num_pages, i, count = 0; |
819 | int bm_words, num_pages, i; | ||
820 | unsigned long now; | 1015 | unsigned long now; |
821 | char ppb[10]; | 1016 | char ppb[10]; |
822 | int err = 0; | 1017 | int err = 0; |
823 | 1018 | ||
824 | WARN_ON(!bm_is_locked(b)); | 1019 | /* |
825 | 1020 | * We are protected against bitmap disappearing/resizing by holding an | |
826 | /* no spinlock here, the drbd_bm_lock should be enough! */ | 1021 | * ldev reference (caller must have called get_ldev()). |
827 | 1022 | * For read/write, we are protected against changes to the bitmap by | |
828 | bm_words = drbd_bm_words(mdev); | 1023 | * the bitmap lock (see drbd_bitmap_io). |
829 | num_pages = (bm_words*sizeof(long) + PAGE_SIZE-1) >> PAGE_SHIFT; | 1024 | * For lazy writeout, we don't care for ongoing changes to the bitmap, |
1025 | * as we submit copies of pages anyways. | ||
1026 | */ | ||
1027 | if (!ctx.flags) | ||
1028 | WARN_ON(!(BM_LOCKED_MASK & b->bm_flags)); | ||
830 | 1029 | ||
831 | /* on disk bitmap is little endian */ | 1030 | num_pages = b->bm_number_of_pages; |
832 | if (rw == WRITE) | ||
833 | bm_cpu_to_lel(b); | ||
834 | 1031 | ||
835 | now = jiffies; | 1032 | now = jiffies; |
836 | atomic_set(&b->bm_async_io, num_pages); | ||
837 | __clear_bit(BM_MD_IO_ERROR, &b->bm_flags); | ||
838 | 1033 | ||
839 | /* let the layers below us try to merge these bios... */ | 1034 | /* let the layers below us try to merge these bios... */ |
840 | for (i = 0; i < num_pages; i++) | 1035 | for (i = 0; i < num_pages; i++) { |
841 | bm_page_io_async(mdev, b, i, rw); | 1036 | /* ignore completely unchanged pages */ |
1037 | if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx) | ||
1038 | break; | ||
1039 | if (rw & WRITE) { | ||
1040 | if (bm_test_page_unchanged(b->bm_pages[i])) { | ||
1041 | dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i); | ||
1042 | continue; | ||
1043 | } | ||
1044 | /* during lazy writeout, | ||
1045 | * ignore those pages not marked for lazy writeout. */ | ||
1046 | if (lazy_writeout_upper_idx && | ||
1047 | !bm_test_page_lazy_writeout(b->bm_pages[i])) { | ||
1048 | dynamic_dev_dbg(DEV, "skipped bm lazy write for idx %u\n", i); | ||
1049 | continue; | ||
1050 | } | ||
1051 | } | ||
1052 | atomic_inc(&ctx.in_flight); | ||
1053 | bm_page_io_async(&ctx, i, rw); | ||
1054 | ++count; | ||
1055 | cond_resched(); | ||
1056 | } | ||
842 | 1057 | ||
843 | wait_event(b->bm_io_wait, atomic_read(&b->bm_async_io) == 0); | 1058 | /* |
1059 | * We initialize ctx.in_flight to one to make sure bm_async_io_complete | ||
1060 | * will not complete() early, and decrement / test it here. If there | ||
1061 | * are still some bios in flight, we need to wait for them here. | ||
1062 | */ | ||
1063 | if (!atomic_dec_and_test(&ctx.in_flight)) | ||
1064 | wait_for_completion(&ctx.done); | ||
1065 | dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n", | ||
1066 | rw == WRITE ? "WRITE" : "READ", | ||
1067 | count, jiffies - now); | ||
844 | 1068 | ||
845 | if (test_bit(BM_MD_IO_ERROR, &b->bm_flags)) { | 1069 | if (ctx.error) { |
846 | dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); | 1070 | dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); |
847 | drbd_chk_io_error(mdev, 1, TRUE); | 1071 | drbd_chk_io_error(mdev, 1, true); |
848 | err = -EIO; | 1072 | err = -EIO; /* ctx.error ? */ |
849 | } | 1073 | } |
850 | 1074 | ||
851 | now = jiffies; | 1075 | now = jiffies; |
852 | if (rw == WRITE) { | 1076 | if (rw == WRITE) { |
853 | /* swap back endianness */ | ||
854 | bm_lel_to_cpu(b); | ||
855 | /* flush bitmap to stable storage */ | ||
856 | drbd_md_flush(mdev); | 1077 | drbd_md_flush(mdev); |
857 | } else /* rw == READ */ { | 1078 | } else /* rw == READ */ { |
858 | /* just read, if necessary adjust endianness */ | 1079 | b->bm_set = bm_count_bits(b); |
859 | b->bm_set = bm_count_bits_swap_endian(b); | ||
860 | dev_info(DEV, "recounting of set bits took additional %lu jiffies\n", | 1080 | dev_info(DEV, "recounting of set bits took additional %lu jiffies\n", |
861 | jiffies - now); | 1081 | jiffies - now); |
862 | } | 1082 | } |
@@ -874,112 +1094,128 @@ static int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local) | |||
874 | */ | 1094 | */ |
875 | int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local) | 1095 | int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local) |
876 | { | 1096 | { |
877 | return bm_rw(mdev, READ); | 1097 | return bm_rw(mdev, READ, 0); |
878 | } | 1098 | } |
879 | 1099 | ||
880 | /** | 1100 | /** |
881 | * drbd_bm_write() - Write the whole bitmap to its on disk location. | 1101 | * drbd_bm_write() - Write the whole bitmap to its on disk location. |
882 | * @mdev: DRBD device. | 1102 | * @mdev: DRBD device. |
1103 | * | ||
1104 | * Will only write pages that have changed since last IO. | ||
883 | */ | 1105 | */ |
884 | int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local) | 1106 | int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local) |
885 | { | 1107 | { |
886 | return bm_rw(mdev, WRITE); | 1108 | return bm_rw(mdev, WRITE, 0); |
887 | } | 1109 | } |
888 | 1110 | ||
889 | /** | 1111 | /** |
890 | * drbd_bm_write_sect: Writes a 512 (MD_SECTOR_SIZE) byte piece of the bitmap | 1112 | * drbd_bm_lazy_write_out() - Write bitmap pages 0 to @upper_idx-1, if they have changed. |
891 | * @mdev: DRBD device. | 1113 | * @mdev: DRBD device. |
892 | * @enr: Extent number in the resync lru (happens to be sector offset) | 1114 | * @upper_idx: 0: write all changed pages; +ve: page index to stop scanning for changed pages |
893 | * | ||
894 | * The BM_EXT_SIZE is on purpose exactly the amount of the bitmap covered | ||
895 | * by a single sector write. Therefore enr == sector offset from the | ||
896 | * start of the bitmap. | ||
897 | */ | 1115 | */ |
898 | int drbd_bm_write_sect(struct drbd_conf *mdev, unsigned long enr) __must_hold(local) | 1116 | int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(local) |
899 | { | 1117 | { |
900 | sector_t on_disk_sector = enr + mdev->ldev->md.md_offset | 1118 | return bm_rw(mdev, WRITE, upper_idx); |
901 | + mdev->ldev->md.bm_offset; | 1119 | } |
902 | int bm_words, num_words, offset; | 1120 | |
903 | int err = 0; | ||
904 | 1121 | ||
905 | mutex_lock(&mdev->md_io_mutex); | 1122 | /** |
906 | bm_words = drbd_bm_words(mdev); | 1123 | * drbd_bm_write_page: Writes a PAGE_SIZE aligned piece of bitmap |
907 | offset = S2W(enr); /* word offset into bitmap */ | 1124 | * @mdev: DRBD device. |
908 | num_words = min(S2W(1), bm_words - offset); | 1125 | * @idx: bitmap page index |
909 | if (num_words < S2W(1)) | 1126 | * |
910 | memset(page_address(mdev->md_io_page), 0, MD_SECTOR_SIZE); | 1127 | * We don't want to special case on logical_block_size of the backend device, |
911 | drbd_bm_get_lel(mdev, offset, num_words, | 1128 | * so we submit PAGE_SIZE aligned pieces. |
912 | page_address(mdev->md_io_page)); | 1129 | * Note that on "most" systems, PAGE_SIZE is 4k. |
913 | if (!drbd_md_sync_page_io(mdev, mdev->ldev, on_disk_sector, WRITE)) { | 1130 | * |
914 | int i; | 1131 | * In case this becomes an issue on systems with larger PAGE_SIZE, |
915 | err = -EIO; | 1132 | * we may want to change this again to write 4k aligned 4k pieces. |
916 | dev_err(DEV, "IO ERROR writing bitmap sector %lu " | 1133 | */ |
917 | "(meta-disk sector %llus)\n", | 1134 | int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local) |
918 | enr, (unsigned long long)on_disk_sector); | 1135 | { |
919 | drbd_chk_io_error(mdev, 1, TRUE); | 1136 | struct bm_aio_ctx ctx = { |
920 | for (i = 0; i < AL_EXT_PER_BM_SECT; i++) | 1137 | .mdev = mdev, |
921 | drbd_bm_ALe_set_all(mdev, enr*AL_EXT_PER_BM_SECT+i); | 1138 | .in_flight = ATOMIC_INIT(1), |
1139 | .done = COMPLETION_INITIALIZER_ONSTACK(ctx.done), | ||
1140 | .flags = BM_AIO_COPY_PAGES, | ||
1141 | }; | ||
1142 | |||
1143 | if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) { | ||
1144 | dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx); | ||
1145 | return 0; | ||
922 | } | 1146 | } |
1147 | |||
1148 | bm_page_io_async(&ctx, idx, WRITE_SYNC); | ||
1149 | wait_for_completion(&ctx.done); | ||
1150 | |||
1151 | if (ctx.error) | ||
1152 | drbd_chk_io_error(mdev, 1, true); | ||
1153 | /* that should force detach, so the in memory bitmap will be | ||
1154 | * gone in a moment as well. */ | ||
1155 | |||
923 | mdev->bm_writ_cnt++; | 1156 | mdev->bm_writ_cnt++; |
924 | mutex_unlock(&mdev->md_io_mutex); | 1157 | return ctx.error; |
925 | return err; | ||
926 | } | 1158 | } |
927 | 1159 | ||
928 | /* NOTE | 1160 | /* NOTE |
929 | * find_first_bit returns int, we return unsigned long. | 1161 | * find_first_bit returns int, we return unsigned long. |
930 | * should not make much difference anyways, but ... | 1162 | * For this to work on 32bit arch with bitnumbers > (1<<32), |
1163 | * we'd need to return u64, and get a whole lot of other places | ||
1164 | * fixed where we still use unsigned long. | ||
931 | * | 1165 | * |
932 | * this returns a bit number, NOT a sector! | 1166 | * this returns a bit number, NOT a sector! |
933 | */ | 1167 | */ |
934 | #define BPP_MASK ((1UL << (PAGE_SHIFT+3)) - 1) | ||
935 | static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo, | 1168 | static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo, |
936 | const int find_zero_bit, const enum km_type km) | 1169 | const int find_zero_bit, const enum km_type km) |
937 | { | 1170 | { |
938 | struct drbd_bitmap *b = mdev->bitmap; | 1171 | struct drbd_bitmap *b = mdev->bitmap; |
939 | unsigned long i = -1UL; | ||
940 | unsigned long *p_addr; | 1172 | unsigned long *p_addr; |
941 | unsigned long bit_offset; /* bit offset of the mapped page. */ | 1173 | unsigned long bit_offset; |
1174 | unsigned i; | ||
1175 | |||
942 | 1176 | ||
943 | if (bm_fo > b->bm_bits) { | 1177 | if (bm_fo > b->bm_bits) { |
944 | dev_err(DEV, "bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits); | 1178 | dev_err(DEV, "bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits); |
1179 | bm_fo = DRBD_END_OF_BITMAP; | ||
945 | } else { | 1180 | } else { |
946 | while (bm_fo < b->bm_bits) { | 1181 | while (bm_fo < b->bm_bits) { |
947 | unsigned long offset; | 1182 | /* bit offset of the first bit in the page */ |
948 | bit_offset = bm_fo & ~BPP_MASK; /* bit offset of the page */ | 1183 | bit_offset = bm_fo & ~BITS_PER_PAGE_MASK; |
949 | offset = bit_offset >> LN2_BPL; /* word offset of the page */ | 1184 | p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo), km); |
950 | p_addr = __bm_map_paddr(b, offset, km); | ||
951 | 1185 | ||
952 | if (find_zero_bit) | 1186 | if (find_zero_bit) |
953 | i = find_next_zero_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK); | 1187 | i = generic_find_next_zero_le_bit(p_addr, |
1188 | PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK); | ||
954 | else | 1189 | else |
955 | i = find_next_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK); | 1190 | i = generic_find_next_le_bit(p_addr, |
1191 | PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK); | ||
956 | 1192 | ||
957 | __bm_unmap(p_addr, km); | 1193 | __bm_unmap(p_addr, km); |
958 | if (i < PAGE_SIZE*8) { | 1194 | if (i < PAGE_SIZE*8) { |
959 | i = bit_offset + i; | 1195 | bm_fo = bit_offset + i; |
960 | if (i >= b->bm_bits) | 1196 | if (bm_fo >= b->bm_bits) |
961 | break; | 1197 | break; |
962 | goto found; | 1198 | goto found; |
963 | } | 1199 | } |
964 | bm_fo = bit_offset + PAGE_SIZE*8; | 1200 | bm_fo = bit_offset + PAGE_SIZE*8; |
965 | } | 1201 | } |
966 | i = -1UL; | 1202 | bm_fo = DRBD_END_OF_BITMAP; |
967 | } | 1203 | } |
968 | found: | 1204 | found: |
969 | return i; | 1205 | return bm_fo; |
970 | } | 1206 | } |
971 | 1207 | ||
972 | static unsigned long bm_find_next(struct drbd_conf *mdev, | 1208 | static unsigned long bm_find_next(struct drbd_conf *mdev, |
973 | unsigned long bm_fo, const int find_zero_bit) | 1209 | unsigned long bm_fo, const int find_zero_bit) |
974 | { | 1210 | { |
975 | struct drbd_bitmap *b = mdev->bitmap; | 1211 | struct drbd_bitmap *b = mdev->bitmap; |
976 | unsigned long i = -1UL; | 1212 | unsigned long i = DRBD_END_OF_BITMAP; |
977 | 1213 | ||
978 | ERR_IF(!b) return i; | 1214 | ERR_IF(!b) return i; |
979 | ERR_IF(!b->bm_pages) return i; | 1215 | ERR_IF(!b->bm_pages) return i; |
980 | 1216 | ||
981 | spin_lock_irq(&b->bm_lock); | 1217 | spin_lock_irq(&b->bm_lock); |
982 | if (bm_is_locked(b)) | 1218 | if (BM_DONT_TEST & b->bm_flags) |
983 | bm_print_lock_info(mdev); | 1219 | bm_print_lock_info(mdev); |
984 | 1220 | ||
985 | i = __bm_find_next(mdev, bm_fo, find_zero_bit, KM_IRQ1); | 1221 | i = __bm_find_next(mdev, bm_fo, find_zero_bit, KM_IRQ1); |
@@ -1005,13 +1241,13 @@ unsigned long drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo | |||
1005 | * you must take drbd_bm_lock() first */ | 1241 | * you must take drbd_bm_lock() first */ |
1006 | unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo) | 1242 | unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo) |
1007 | { | 1243 | { |
1008 | /* WARN_ON(!bm_is_locked(mdev)); */ | 1244 | /* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */ |
1009 | return __bm_find_next(mdev, bm_fo, 0, KM_USER1); | 1245 | return __bm_find_next(mdev, bm_fo, 0, KM_USER1); |
1010 | } | 1246 | } |
1011 | 1247 | ||
1012 | unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo) | 1248 | unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo) |
1013 | { | 1249 | { |
1014 | /* WARN_ON(!bm_is_locked(mdev)); */ | 1250 | /* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */ |
1015 | return __bm_find_next(mdev, bm_fo, 1, KM_USER1); | 1251 | return __bm_find_next(mdev, bm_fo, 1, KM_USER1); |
1016 | } | 1252 | } |
1017 | 1253 | ||
@@ -1027,8 +1263,9 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, | |||
1027 | struct drbd_bitmap *b = mdev->bitmap; | 1263 | struct drbd_bitmap *b = mdev->bitmap; |
1028 | unsigned long *p_addr = NULL; | 1264 | unsigned long *p_addr = NULL; |
1029 | unsigned long bitnr; | 1265 | unsigned long bitnr; |
1030 | unsigned long last_page_nr = -1UL; | 1266 | unsigned int last_page_nr = -1U; |
1031 | int c = 0; | 1267 | int c = 0; |
1268 | int changed_total = 0; | ||
1032 | 1269 | ||
1033 | if (e >= b->bm_bits) { | 1270 | if (e >= b->bm_bits) { |
1034 | dev_err(DEV, "ASSERT FAILED: bit_s=%lu bit_e=%lu bm_bits=%lu\n", | 1271 | dev_err(DEV, "ASSERT FAILED: bit_s=%lu bit_e=%lu bm_bits=%lu\n", |
@@ -1036,23 +1273,33 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, | |||
1036 | e = b->bm_bits ? b->bm_bits -1 : 0; | 1273 | e = b->bm_bits ? b->bm_bits -1 : 0; |
1037 | } | 1274 | } |
1038 | for (bitnr = s; bitnr <= e; bitnr++) { | 1275 | for (bitnr = s; bitnr <= e; bitnr++) { |
1039 | unsigned long offset = bitnr>>LN2_BPL; | 1276 | unsigned int page_nr = bm_bit_to_page_idx(b, bitnr); |
1040 | unsigned long page_nr = offset >> (PAGE_SHIFT - LN2_BPL + 3); | ||
1041 | if (page_nr != last_page_nr) { | 1277 | if (page_nr != last_page_nr) { |
1042 | if (p_addr) | 1278 | if (p_addr) |
1043 | __bm_unmap(p_addr, km); | 1279 | __bm_unmap(p_addr, km); |
1044 | p_addr = __bm_map_paddr(b, offset, km); | 1280 | if (c < 0) |
1281 | bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]); | ||
1282 | else if (c > 0) | ||
1283 | bm_set_page_need_writeout(b->bm_pages[last_page_nr]); | ||
1284 | changed_total += c; | ||
1285 | c = 0; | ||
1286 | p_addr = __bm_map_pidx(b, page_nr, km); | ||
1045 | last_page_nr = page_nr; | 1287 | last_page_nr = page_nr; |
1046 | } | 1288 | } |
1047 | if (val) | 1289 | if (val) |
1048 | c += (0 == __test_and_set_bit(bitnr & BPP_MASK, p_addr)); | 1290 | c += (0 == generic___test_and_set_le_bit(bitnr & BITS_PER_PAGE_MASK, p_addr)); |
1049 | else | 1291 | else |
1050 | c -= (0 != __test_and_clear_bit(bitnr & BPP_MASK, p_addr)); | 1292 | c -= (0 != generic___test_and_clear_le_bit(bitnr & BITS_PER_PAGE_MASK, p_addr)); |
1051 | } | 1293 | } |
1052 | if (p_addr) | 1294 | if (p_addr) |
1053 | __bm_unmap(p_addr, km); | 1295 | __bm_unmap(p_addr, km); |
1054 | b->bm_set += c; | 1296 | if (c < 0) |
1055 | return c; | 1297 | bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]); |
1298 | else if (c > 0) | ||
1299 | bm_set_page_need_writeout(b->bm_pages[last_page_nr]); | ||
1300 | changed_total += c; | ||
1301 | b->bm_set += changed_total; | ||
1302 | return changed_total; | ||
1056 | } | 1303 | } |
1057 | 1304 | ||
1058 | /* returns number of bits actually changed. | 1305 | /* returns number of bits actually changed. |
@@ -1070,7 +1317,7 @@ static int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, | |||
1070 | ERR_IF(!b->bm_pages) return 0; | 1317 | ERR_IF(!b->bm_pages) return 0; |
1071 | 1318 | ||
1072 | spin_lock_irqsave(&b->bm_lock, flags); | 1319 | spin_lock_irqsave(&b->bm_lock, flags); |
1073 | if (bm_is_locked(b)) | 1320 | if ((val ? BM_DONT_SET : BM_DONT_CLEAR) & b->bm_flags) |
1074 | bm_print_lock_info(mdev); | 1321 | bm_print_lock_info(mdev); |
1075 | 1322 | ||
1076 | c = __bm_change_bits_to(mdev, s, e, val, KM_IRQ1); | 1323 | c = __bm_change_bits_to(mdev, s, e, val, KM_IRQ1); |
@@ -1187,12 +1434,11 @@ int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr) | |||
1187 | ERR_IF(!b->bm_pages) return 0; | 1434 | ERR_IF(!b->bm_pages) return 0; |
1188 | 1435 | ||
1189 | spin_lock_irqsave(&b->bm_lock, flags); | 1436 | spin_lock_irqsave(&b->bm_lock, flags); |
1190 | if (bm_is_locked(b)) | 1437 | if (BM_DONT_TEST & b->bm_flags) |
1191 | bm_print_lock_info(mdev); | 1438 | bm_print_lock_info(mdev); |
1192 | if (bitnr < b->bm_bits) { | 1439 | if (bitnr < b->bm_bits) { |
1193 | unsigned long offset = bitnr>>LN2_BPL; | 1440 | p_addr = bm_map_pidx(b, bm_bit_to_page_idx(b, bitnr)); |
1194 | p_addr = bm_map_paddr(b, offset); | 1441 | i = generic_test_le_bit(bitnr & BITS_PER_PAGE_MASK, p_addr) ? 1 : 0; |
1195 | i = test_bit(bitnr & BPP_MASK, p_addr) ? 1 : 0; | ||
1196 | bm_unmap(p_addr); | 1442 | bm_unmap(p_addr); |
1197 | } else if (bitnr == b->bm_bits) { | 1443 | } else if (bitnr == b->bm_bits) { |
1198 | i = -1; | 1444 | i = -1; |
@@ -1210,10 +1456,10 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi | |||
1210 | { | 1456 | { |
1211 | unsigned long flags; | 1457 | unsigned long flags; |
1212 | struct drbd_bitmap *b = mdev->bitmap; | 1458 | struct drbd_bitmap *b = mdev->bitmap; |
1213 | unsigned long *p_addr = NULL, page_nr = -1; | 1459 | unsigned long *p_addr = NULL; |
1214 | unsigned long bitnr; | 1460 | unsigned long bitnr; |
1461 | unsigned int page_nr = -1U; | ||
1215 | int c = 0; | 1462 | int c = 0; |
1216 | size_t w; | ||
1217 | 1463 | ||
1218 | /* If this is called without a bitmap, that is a bug. But just to be | 1464 | /* If this is called without a bitmap, that is a bug. But just to be |
1219 | * robust in case we screwed up elsewhere, in that case pretend there | 1465 | * robust in case we screwed up elsewhere, in that case pretend there |
@@ -1223,20 +1469,20 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi | |||
1223 | ERR_IF(!b->bm_pages) return 1; | 1469 | ERR_IF(!b->bm_pages) return 1; |
1224 | 1470 | ||
1225 | spin_lock_irqsave(&b->bm_lock, flags); | 1471 | spin_lock_irqsave(&b->bm_lock, flags); |
1226 | if (bm_is_locked(b)) | 1472 | if (BM_DONT_TEST & b->bm_flags) |
1227 | bm_print_lock_info(mdev); | 1473 | bm_print_lock_info(mdev); |
1228 | for (bitnr = s; bitnr <= e; bitnr++) { | 1474 | for (bitnr = s; bitnr <= e; bitnr++) { |
1229 | w = bitnr >> LN2_BPL; | 1475 | unsigned int idx = bm_bit_to_page_idx(b, bitnr); |
1230 | if (page_nr != w >> (PAGE_SHIFT - LN2_BPL + 3)) { | 1476 | if (page_nr != idx) { |
1231 | page_nr = w >> (PAGE_SHIFT - LN2_BPL + 3); | 1477 | page_nr = idx; |
1232 | if (p_addr) | 1478 | if (p_addr) |
1233 | bm_unmap(p_addr); | 1479 | bm_unmap(p_addr); |
1234 | p_addr = bm_map_paddr(b, w); | 1480 | p_addr = bm_map_pidx(b, idx); |
1235 | } | 1481 | } |
1236 | ERR_IF (bitnr >= b->bm_bits) { | 1482 | ERR_IF (bitnr >= b->bm_bits) { |
1237 | dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits); | 1483 | dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits); |
1238 | } else { | 1484 | } else { |
1239 | c += (0 != test_bit(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr)); | 1485 | c += (0 != generic_test_le_bit(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr)); |
1240 | } | 1486 | } |
1241 | } | 1487 | } |
1242 | if (p_addr) | 1488 | if (p_addr) |
@@ -1271,7 +1517,7 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr) | |||
1271 | ERR_IF(!b->bm_pages) return 0; | 1517 | ERR_IF(!b->bm_pages) return 0; |
1272 | 1518 | ||
1273 | spin_lock_irqsave(&b->bm_lock, flags); | 1519 | spin_lock_irqsave(&b->bm_lock, flags); |
1274 | if (bm_is_locked(b)) | 1520 | if (BM_DONT_TEST & b->bm_flags) |
1275 | bm_print_lock_info(mdev); | 1521 | bm_print_lock_info(mdev); |
1276 | 1522 | ||
1277 | s = S2W(enr); | 1523 | s = S2W(enr); |
@@ -1279,7 +1525,7 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr) | |||
1279 | count = 0; | 1525 | count = 0; |
1280 | if (s < b->bm_words) { | 1526 | if (s < b->bm_words) { |
1281 | int n = e-s; | 1527 | int n = e-s; |
1282 | p_addr = bm_map_paddr(b, s); | 1528 | p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s)); |
1283 | bm = p_addr + MLPP(s); | 1529 | bm = p_addr + MLPP(s); |
1284 | while (n--) | 1530 | while (n--) |
1285 | count += hweight_long(*bm++); | 1531 | count += hweight_long(*bm++); |
@@ -1291,18 +1537,20 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr) | |||
1291 | return count; | 1537 | return count; |
1292 | } | 1538 | } |
1293 | 1539 | ||
1294 | /* set all bits covered by the AL-extent al_enr */ | 1540 | /* Set all bits covered by the AL-extent al_enr. |
1541 | * Returns number of bits changed. */ | ||
1295 | unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr) | 1542 | unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr) |
1296 | { | 1543 | { |
1297 | struct drbd_bitmap *b = mdev->bitmap; | 1544 | struct drbd_bitmap *b = mdev->bitmap; |
1298 | unsigned long *p_addr, *bm; | 1545 | unsigned long *p_addr, *bm; |
1299 | unsigned long weight; | 1546 | unsigned long weight; |
1300 | int count, s, e, i, do_now; | 1547 | unsigned long s, e; |
1548 | int count, i, do_now; | ||
1301 | ERR_IF(!b) return 0; | 1549 | ERR_IF(!b) return 0; |
1302 | ERR_IF(!b->bm_pages) return 0; | 1550 | ERR_IF(!b->bm_pages) return 0; |
1303 | 1551 | ||
1304 | spin_lock_irq(&b->bm_lock); | 1552 | spin_lock_irq(&b->bm_lock); |
1305 | if (bm_is_locked(b)) | 1553 | if (BM_DONT_SET & b->bm_flags) |
1306 | bm_print_lock_info(mdev); | 1554 | bm_print_lock_info(mdev); |
1307 | weight = b->bm_set; | 1555 | weight = b->bm_set; |
1308 | 1556 | ||
@@ -1314,7 +1562,7 @@ unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr) | |||
1314 | count = 0; | 1562 | count = 0; |
1315 | if (s < b->bm_words) { | 1563 | if (s < b->bm_words) { |
1316 | i = do_now = e-s; | 1564 | i = do_now = e-s; |
1317 | p_addr = bm_map_paddr(b, s); | 1565 | p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s)); |
1318 | bm = p_addr + MLPP(s); | 1566 | bm = p_addr + MLPP(s); |
1319 | while (i--) { | 1567 | while (i--) { |
1320 | count += hweight_long(*bm); | 1568 | count += hweight_long(*bm); |
@@ -1326,7 +1574,7 @@ unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr) | |||
1326 | if (e == b->bm_words) | 1574 | if (e == b->bm_words) |
1327 | b->bm_set -= bm_clear_surplus(b); | 1575 | b->bm_set -= bm_clear_surplus(b); |
1328 | } else { | 1576 | } else { |
1329 | dev_err(DEV, "start offset (%d) too large in drbd_bm_ALe_set_all\n", s); | 1577 | dev_err(DEV, "start offset (%lu) too large in drbd_bm_ALe_set_all\n", s); |
1330 | } | 1578 | } |
1331 | weight = b->bm_set - weight; | 1579 | weight = b->bm_set - weight; |
1332 | spin_unlock_irq(&b->bm_lock); | 1580 | spin_unlock_irq(&b->bm_lock); |
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index b0bd27dfc1e8..81030d8d654b 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h | |||
@@ -72,13 +72,6 @@ extern int fault_devs; | |||
72 | extern char usermode_helper[]; | 72 | extern char usermode_helper[]; |
73 | 73 | ||
74 | 74 | ||
75 | #ifndef TRUE | ||
76 | #define TRUE 1 | ||
77 | #endif | ||
78 | #ifndef FALSE | ||
79 | #define FALSE 0 | ||
80 | #endif | ||
81 | |||
82 | /* I don't remember why XCPU ... | 75 | /* I don't remember why XCPU ... |
83 | * This is used to wake the asender, | 76 | * This is used to wake the asender, |
84 | * and to interrupt sending the sending task | 77 | * and to interrupt sending the sending task |
@@ -104,6 +97,7 @@ extern char usermode_helper[]; | |||
104 | #define ID_SYNCER (-1ULL) | 97 | #define ID_SYNCER (-1ULL) |
105 | #define ID_VACANT 0 | 98 | #define ID_VACANT 0 |
106 | #define is_syncer_block_id(id) ((id) == ID_SYNCER) | 99 | #define is_syncer_block_id(id) ((id) == ID_SYNCER) |
100 | #define UUID_NEW_BM_OFFSET ((u64)0x0001000000000000ULL) | ||
107 | 101 | ||
108 | struct drbd_conf; | 102 | struct drbd_conf; |
109 | 103 | ||
@@ -137,20 +131,19 @@ enum { | |||
137 | DRBD_FAULT_MAX, | 131 | DRBD_FAULT_MAX, |
138 | }; | 132 | }; |
139 | 133 | ||
140 | #ifdef CONFIG_DRBD_FAULT_INJECTION | ||
141 | extern unsigned int | 134 | extern unsigned int |
142 | _drbd_insert_fault(struct drbd_conf *mdev, unsigned int type); | 135 | _drbd_insert_fault(struct drbd_conf *mdev, unsigned int type); |
136 | |||
143 | static inline int | 137 | static inline int |
144 | drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) { | 138 | drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) { |
139 | #ifdef CONFIG_DRBD_FAULT_INJECTION | ||
145 | return fault_rate && | 140 | return fault_rate && |
146 | (enable_faults & (1<<type)) && | 141 | (enable_faults & (1<<type)) && |
147 | _drbd_insert_fault(mdev, type); | 142 | _drbd_insert_fault(mdev, type); |
148 | } | ||
149 | #define FAULT_ACTIVE(_m, _t) (drbd_insert_fault((_m), (_t))) | ||
150 | |||
151 | #else | 143 | #else |
152 | #define FAULT_ACTIVE(_m, _t) (0) | 144 | return 0; |
153 | #endif | 145 | #endif |
146 | } | ||
154 | 147 | ||
155 | /* integer division, round _UP_ to the next integer */ | 148 | /* integer division, round _UP_ to the next integer */ |
156 | #define div_ceil(A, B) ((A)/(B) + ((A)%(B) ? 1 : 0)) | 149 | #define div_ceil(A, B) ((A)/(B) + ((A)%(B) ? 1 : 0)) |
@@ -212,8 +205,10 @@ enum drbd_packets { | |||
212 | /* P_CKPT_FENCE_REQ = 0x25, * currently reserved for protocol D */ | 205 | /* P_CKPT_FENCE_REQ = 0x25, * currently reserved for protocol D */ |
213 | /* P_CKPT_DISABLE_REQ = 0x26, * currently reserved for protocol D */ | 206 | /* P_CKPT_DISABLE_REQ = 0x26, * currently reserved for protocol D */ |
214 | P_DELAY_PROBE = 0x27, /* is used on BOTH sockets */ | 207 | P_DELAY_PROBE = 0x27, /* is used on BOTH sockets */ |
208 | P_OUT_OF_SYNC = 0x28, /* Mark as out of sync (Outrunning), data socket */ | ||
209 | P_RS_CANCEL = 0x29, /* meta: Used to cancel RS_DATA_REQUEST packet by SyncSource */ | ||
215 | 210 | ||
216 | P_MAX_CMD = 0x28, | 211 | P_MAX_CMD = 0x2A, |
217 | P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */ | 212 | P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */ |
218 | P_MAX_OPT_CMD = 0x101, | 213 | P_MAX_OPT_CMD = 0x101, |
219 | 214 | ||
@@ -269,6 +264,7 @@ static inline const char *cmdname(enum drbd_packets cmd) | |||
269 | [P_RS_IS_IN_SYNC] = "CsumRSIsInSync", | 264 | [P_RS_IS_IN_SYNC] = "CsumRSIsInSync", |
270 | [P_COMPRESSED_BITMAP] = "CBitmap", | 265 | [P_COMPRESSED_BITMAP] = "CBitmap", |
271 | [P_DELAY_PROBE] = "DelayProbe", | 266 | [P_DELAY_PROBE] = "DelayProbe", |
267 | [P_OUT_OF_SYNC] = "OutOfSync", | ||
272 | [P_MAX_CMD] = NULL, | 268 | [P_MAX_CMD] = NULL, |
273 | }; | 269 | }; |
274 | 270 | ||
@@ -512,7 +508,7 @@ struct p_sizes { | |||
512 | u64 d_size; /* size of disk */ | 508 | u64 d_size; /* size of disk */ |
513 | u64 u_size; /* user requested size */ | 509 | u64 u_size; /* user requested size */ |
514 | u64 c_size; /* current exported size */ | 510 | u64 c_size; /* current exported size */ |
515 | u32 max_segment_size; /* Maximal size of a BIO */ | 511 | u32 max_bio_size; /* Maximal size of a BIO */ |
516 | u16 queue_order_type; /* not yet implemented in DRBD*/ | 512 | u16 queue_order_type; /* not yet implemented in DRBD*/ |
517 | u16 dds_flags; /* use enum dds_flags here. */ | 513 | u16 dds_flags; /* use enum dds_flags here. */ |
518 | } __packed; | 514 | } __packed; |
@@ -550,6 +546,13 @@ struct p_discard { | |||
550 | u32 pad; | 546 | u32 pad; |
551 | } __packed; | 547 | } __packed; |
552 | 548 | ||
549 | struct p_block_desc { | ||
550 | struct p_header80 head; | ||
551 | u64 sector; | ||
552 | u32 blksize; | ||
553 | u32 pad; /* to multiple of 8 Byte */ | ||
554 | } __packed; | ||
555 | |||
553 | /* Valid values for the encoding field. | 556 | /* Valid values for the encoding field. |
554 | * Bump proto version when changing this. */ | 557 | * Bump proto version when changing this. */ |
555 | enum drbd_bitmap_code { | 558 | enum drbd_bitmap_code { |
@@ -647,6 +650,7 @@ union p_polymorph { | |||
647 | struct p_block_req block_req; | 650 | struct p_block_req block_req; |
648 | struct p_delay_probe93 delay_probe93; | 651 | struct p_delay_probe93 delay_probe93; |
649 | struct p_rs_uuid rs_uuid; | 652 | struct p_rs_uuid rs_uuid; |
653 | struct p_block_desc block_desc; | ||
650 | } __packed; | 654 | } __packed; |
651 | 655 | ||
652 | /**********************************************************************/ | 656 | /**********************************************************************/ |
@@ -677,13 +681,6 @@ static inline enum drbd_thread_state get_t_state(struct drbd_thread *thi) | |||
677 | return thi->t_state; | 681 | return thi->t_state; |
678 | } | 682 | } |
679 | 683 | ||
680 | |||
681 | /* | ||
682 | * Having this as the first member of a struct provides sort of "inheritance". | ||
683 | * "derived" structs can be "drbd_queue_work()"ed. | ||
684 | * The callback should know and cast back to the descendant struct. | ||
685 | * drbd_request and drbd_epoch_entry are descendants of drbd_work. | ||
686 | */ | ||
687 | struct drbd_work; | 684 | struct drbd_work; |
688 | typedef int (*drbd_work_cb)(struct drbd_conf *, struct drbd_work *, int cancel); | 685 | typedef int (*drbd_work_cb)(struct drbd_conf *, struct drbd_work *, int cancel); |
689 | struct drbd_work { | 686 | struct drbd_work { |
@@ -712,9 +709,6 @@ struct drbd_request { | |||
712 | * starting a new epoch... | 709 | * starting a new epoch... |
713 | */ | 710 | */ |
714 | 711 | ||
715 | /* up to here, the struct layout is identical to drbd_epoch_entry; | ||
716 | * we might be able to use that to our advantage... */ | ||
717 | |||
718 | struct list_head tl_requests; /* ring list in the transfer log */ | 712 | struct list_head tl_requests; /* ring list in the transfer log */ |
719 | struct bio *master_bio; /* master bio pointer */ | 713 | struct bio *master_bio; /* master bio pointer */ |
720 | unsigned long rq_state; /* see comments above _req_mod() */ | 714 | unsigned long rq_state; /* see comments above _req_mod() */ |
@@ -831,7 +825,7 @@ enum { | |||
831 | CRASHED_PRIMARY, /* This node was a crashed primary. | 825 | CRASHED_PRIMARY, /* This node was a crashed primary. |
832 | * Gets cleared when the state.conn | 826 | * Gets cleared when the state.conn |
833 | * goes into C_CONNECTED state. */ | 827 | * goes into C_CONNECTED state. */ |
834 | WRITE_BM_AFTER_RESYNC, /* A kmalloc() during resync failed */ | 828 | NO_BARRIER_SUPP, /* underlying block device doesn't implement barriers */ |
835 | CONSIDER_RESYNC, | 829 | CONSIDER_RESYNC, |
836 | 830 | ||
837 | MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */ | 831 | MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */ |
@@ -856,10 +850,37 @@ enum { | |||
856 | GOT_PING_ACK, /* set when we receive a ping_ack packet, misc wait gets woken */ | 850 | GOT_PING_ACK, /* set when we receive a ping_ack packet, misc wait gets woken */ |
857 | NEW_CUR_UUID, /* Create new current UUID when thawing IO */ | 851 | NEW_CUR_UUID, /* Create new current UUID when thawing IO */ |
858 | AL_SUSPENDED, /* Activity logging is currently suspended. */ | 852 | AL_SUSPENDED, /* Activity logging is currently suspended. */ |
853 | AHEAD_TO_SYNC_SOURCE, /* Ahead -> SyncSource queued */ | ||
859 | }; | 854 | }; |
860 | 855 | ||
861 | struct drbd_bitmap; /* opaque for drbd_conf */ | 856 | struct drbd_bitmap; /* opaque for drbd_conf */ |
862 | 857 | ||
858 | /* definition of bits in bm_flags to be used in drbd_bm_lock | ||
859 | * and drbd_bitmap_io and friends. */ | ||
860 | enum bm_flag { | ||
861 | /* do we need to kfree, or vfree bm_pages? */ | ||
862 | BM_P_VMALLOCED = 0x10000, /* internal use only, will be masked out */ | ||
863 | |||
864 | /* currently locked for bulk operation */ | ||
865 | BM_LOCKED_MASK = 0x7, | ||
866 | |||
867 | /* in detail, that is: */ | ||
868 | BM_DONT_CLEAR = 0x1, | ||
869 | BM_DONT_SET = 0x2, | ||
870 | BM_DONT_TEST = 0x4, | ||
871 | |||
872 | /* (test bit, count bit) allowed (common case) */ | ||
873 | BM_LOCKED_TEST_ALLOWED = 0x3, | ||
874 | |||
875 | /* testing bits, as well as setting new bits allowed, but clearing bits | ||
876 | * would be unexpected. Used during bitmap receive. Setting new bits | ||
877 | * requires sending of "out-of-sync" information, though. */ | ||
878 | BM_LOCKED_SET_ALLOWED = 0x1, | ||
879 | |||
880 | /* clear is not expected while bitmap is locked for bulk operation */ | ||
881 | }; | ||
882 | |||
883 | |||
863 | /* TODO sort members for performance | 884 | /* TODO sort members for performance |
864 | * MAYBE group them further */ | 885 | * MAYBE group them further */ |
865 | 886 | ||
@@ -925,6 +946,7 @@ struct drbd_md_io { | |||
925 | struct bm_io_work { | 946 | struct bm_io_work { |
926 | struct drbd_work w; | 947 | struct drbd_work w; |
927 | char *why; | 948 | char *why; |
949 | enum bm_flag flags; | ||
928 | int (*io_fn)(struct drbd_conf *mdev); | 950 | int (*io_fn)(struct drbd_conf *mdev); |
929 | void (*done)(struct drbd_conf *mdev, int rv); | 951 | void (*done)(struct drbd_conf *mdev, int rv); |
930 | }; | 952 | }; |
@@ -963,9 +985,12 @@ struct drbd_conf { | |||
963 | struct drbd_work resync_work, | 985 | struct drbd_work resync_work, |
964 | unplug_work, | 986 | unplug_work, |
965 | go_diskless, | 987 | go_diskless, |
966 | md_sync_work; | 988 | md_sync_work, |
989 | start_resync_work; | ||
967 | struct timer_list resync_timer; | 990 | struct timer_list resync_timer; |
968 | struct timer_list md_sync_timer; | 991 | struct timer_list md_sync_timer; |
992 | struct timer_list start_resync_timer; | ||
993 | struct timer_list request_timer; | ||
969 | #ifdef DRBD_DEBUG_MD_SYNC | 994 | #ifdef DRBD_DEBUG_MD_SYNC |
970 | struct { | 995 | struct { |
971 | unsigned int line; | 996 | unsigned int line; |
@@ -1000,9 +1025,9 @@ struct drbd_conf { | |||
1000 | struct hlist_head *tl_hash; | 1025 | struct hlist_head *tl_hash; |
1001 | unsigned int tl_hash_s; | 1026 | unsigned int tl_hash_s; |
1002 | 1027 | ||
1003 | /* blocks to sync in this run [unit BM_BLOCK_SIZE] */ | 1028 | /* blocks to resync in this run [unit BM_BLOCK_SIZE] */ |
1004 | unsigned long rs_total; | 1029 | unsigned long rs_total; |
1005 | /* number of sync IOs that failed in this run */ | 1030 | /* number of resync blocks that failed in this run */ |
1006 | unsigned long rs_failed; | 1031 | unsigned long rs_failed; |
1007 | /* Syncer's start time [unit jiffies] */ | 1032 | /* Syncer's start time [unit jiffies] */ |
1008 | unsigned long rs_start; | 1033 | unsigned long rs_start; |
@@ -1102,6 +1127,7 @@ struct drbd_conf { | |||
1102 | struct fifo_buffer rs_plan_s; /* correction values of resync planer */ | 1127 | struct fifo_buffer rs_plan_s; /* correction values of resync planer */ |
1103 | int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */ | 1128 | int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */ |
1104 | int rs_planed; /* resync sectors already planed */ | 1129 | int rs_planed; /* resync sectors already planed */ |
1130 | atomic_t ap_in_flight; /* App sectors in flight (waiting for ack) */ | ||
1105 | }; | 1131 | }; |
1106 | 1132 | ||
1107 | static inline struct drbd_conf *minor_to_mdev(unsigned int minor) | 1133 | static inline struct drbd_conf *minor_to_mdev(unsigned int minor) |
@@ -1163,14 +1189,19 @@ enum dds_flags { | |||
1163 | }; | 1189 | }; |
1164 | 1190 | ||
1165 | extern void drbd_init_set_defaults(struct drbd_conf *mdev); | 1191 | extern void drbd_init_set_defaults(struct drbd_conf *mdev); |
1166 | extern int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, | 1192 | extern enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev, |
1167 | union drbd_state mask, union drbd_state val); | 1193 | enum chg_state_flags f, |
1194 | union drbd_state mask, | ||
1195 | union drbd_state val); | ||
1168 | extern void drbd_force_state(struct drbd_conf *, union drbd_state, | 1196 | extern void drbd_force_state(struct drbd_conf *, union drbd_state, |
1169 | union drbd_state); | 1197 | union drbd_state); |
1170 | extern int _drbd_request_state(struct drbd_conf *, union drbd_state, | 1198 | extern enum drbd_state_rv _drbd_request_state(struct drbd_conf *, |
1171 | union drbd_state, enum chg_state_flags); | 1199 | union drbd_state, |
1172 | extern int __drbd_set_state(struct drbd_conf *, union drbd_state, | 1200 | union drbd_state, |
1173 | enum chg_state_flags, struct completion *done); | 1201 | enum chg_state_flags); |
1202 | extern enum drbd_state_rv __drbd_set_state(struct drbd_conf *, union drbd_state, | ||
1203 | enum chg_state_flags, | ||
1204 | struct completion *done); | ||
1174 | extern void print_st_err(struct drbd_conf *, union drbd_state, | 1205 | extern void print_st_err(struct drbd_conf *, union drbd_state, |
1175 | union drbd_state, int); | 1206 | union drbd_state, int); |
1176 | extern int drbd_thread_start(struct drbd_thread *thi); | 1207 | extern int drbd_thread_start(struct drbd_thread *thi); |
@@ -1195,7 +1226,7 @@ extern int drbd_send(struct drbd_conf *mdev, struct socket *sock, | |||
1195 | extern int drbd_send_protocol(struct drbd_conf *mdev); | 1226 | extern int drbd_send_protocol(struct drbd_conf *mdev); |
1196 | extern int drbd_send_uuids(struct drbd_conf *mdev); | 1227 | extern int drbd_send_uuids(struct drbd_conf *mdev); |
1197 | extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); | 1228 | extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); |
1198 | extern int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val); | 1229 | extern int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev); |
1199 | extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags); | 1230 | extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags); |
1200 | extern int _drbd_send_state(struct drbd_conf *mdev); | 1231 | extern int _drbd_send_state(struct drbd_conf *mdev); |
1201 | extern int drbd_send_state(struct drbd_conf *mdev); | 1232 | extern int drbd_send_state(struct drbd_conf *mdev); |
@@ -1220,11 +1251,10 @@ extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd, | |||
1220 | struct p_data *dp, int data_size); | 1251 | struct p_data *dp, int data_size); |
1221 | extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd, | 1252 | extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd, |
1222 | sector_t sector, int blksize, u64 block_id); | 1253 | sector_t sector, int blksize, u64 block_id); |
1254 | extern int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req); | ||
1223 | extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, | 1255 | extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, |
1224 | struct drbd_epoch_entry *e); | 1256 | struct drbd_epoch_entry *e); |
1225 | extern int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req); | 1257 | extern int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req); |
1226 | extern int _drbd_send_barrier(struct drbd_conf *mdev, | ||
1227 | struct drbd_tl_epoch *barrier); | ||
1228 | extern int drbd_send_drequest(struct drbd_conf *mdev, int cmd, | 1258 | extern int drbd_send_drequest(struct drbd_conf *mdev, int cmd, |
1229 | sector_t sector, int size, u64 block_id); | 1259 | sector_t sector, int size, u64 block_id); |
1230 | extern int drbd_send_drequest_csum(struct drbd_conf *mdev, | 1260 | extern int drbd_send_drequest_csum(struct drbd_conf *mdev, |
@@ -1235,14 +1265,13 @@ extern int drbd_send_ov_request(struct drbd_conf *mdev,sector_t sector,int size) | |||
1235 | 1265 | ||
1236 | extern int drbd_send_bitmap(struct drbd_conf *mdev); | 1266 | extern int drbd_send_bitmap(struct drbd_conf *mdev); |
1237 | extern int _drbd_send_bitmap(struct drbd_conf *mdev); | 1267 | extern int _drbd_send_bitmap(struct drbd_conf *mdev); |
1238 | extern int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode); | 1268 | extern int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode); |
1239 | extern void drbd_free_bc(struct drbd_backing_dev *ldev); | 1269 | extern void drbd_free_bc(struct drbd_backing_dev *ldev); |
1240 | extern void drbd_mdev_cleanup(struct drbd_conf *mdev); | 1270 | extern void drbd_mdev_cleanup(struct drbd_conf *mdev); |
1271 | void drbd_print_uuids(struct drbd_conf *mdev, const char *text); | ||
1241 | 1272 | ||
1242 | /* drbd_meta-data.c (still in drbd_main.c) */ | ||
1243 | extern void drbd_md_sync(struct drbd_conf *mdev); | 1273 | extern void drbd_md_sync(struct drbd_conf *mdev); |
1244 | extern int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev); | 1274 | extern int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev); |
1245 | /* maybe define them below as inline? */ | ||
1246 | extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); | 1275 | extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); |
1247 | extern void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); | 1276 | extern void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); |
1248 | extern void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local); | 1277 | extern void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local); |
@@ -1261,10 +1290,12 @@ extern void drbd_md_mark_dirty_(struct drbd_conf *mdev, | |||
1261 | extern void drbd_queue_bitmap_io(struct drbd_conf *mdev, | 1290 | extern void drbd_queue_bitmap_io(struct drbd_conf *mdev, |
1262 | int (*io_fn)(struct drbd_conf *), | 1291 | int (*io_fn)(struct drbd_conf *), |
1263 | void (*done)(struct drbd_conf *, int), | 1292 | void (*done)(struct drbd_conf *, int), |
1264 | char *why); | 1293 | char *why, enum bm_flag flags); |
1294 | extern int drbd_bitmap_io(struct drbd_conf *mdev, | ||
1295 | int (*io_fn)(struct drbd_conf *), | ||
1296 | char *why, enum bm_flag flags); | ||
1265 | extern int drbd_bmio_set_n_write(struct drbd_conf *mdev); | 1297 | extern int drbd_bmio_set_n_write(struct drbd_conf *mdev); |
1266 | extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev); | 1298 | extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev); |
1267 | extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why); | ||
1268 | extern void drbd_go_diskless(struct drbd_conf *mdev); | 1299 | extern void drbd_go_diskless(struct drbd_conf *mdev); |
1269 | extern void drbd_ldev_destroy(struct drbd_conf *mdev); | 1300 | extern void drbd_ldev_destroy(struct drbd_conf *mdev); |
1270 | 1301 | ||
@@ -1313,6 +1344,7 @@ struct bm_extent { | |||
1313 | 1344 | ||
1314 | #define BME_NO_WRITES 0 /* bm_extent.flags: no more requests on this one! */ | 1345 | #define BME_NO_WRITES 0 /* bm_extent.flags: no more requests on this one! */ |
1315 | #define BME_LOCKED 1 /* bm_extent.flags: syncer active on this one. */ | 1346 | #define BME_LOCKED 1 /* bm_extent.flags: syncer active on this one. */ |
1347 | #define BME_PRIORITY 2 /* finish resync IO on this extent ASAP! App IO waiting! */ | ||
1316 | 1348 | ||
1317 | /* drbd_bitmap.c */ | 1349 | /* drbd_bitmap.c */ |
1318 | /* | 1350 | /* |
@@ -1390,7 +1422,9 @@ struct bm_extent { | |||
1390 | * you should use 64bit OS for that much storage, anyways. */ | 1422 | * you should use 64bit OS for that much storage, anyways. */ |
1391 | #define DRBD_MAX_SECTORS_FLEX BM_BIT_TO_SECT(0xffff7fff) | 1423 | #define DRBD_MAX_SECTORS_FLEX BM_BIT_TO_SECT(0xffff7fff) |
1392 | #else | 1424 | #else |
1393 | #define DRBD_MAX_SECTORS_FLEX BM_BIT_TO_SECT(0x1LU << 32) | 1425 | /* we allow up to 1 PiB now on 64bit architecture with "flexible" meta data */ |
1426 | #define DRBD_MAX_SECTORS_FLEX (1UL << 51) | ||
1427 | /* corresponds to (1UL << 38) bits right now. */ | ||
1394 | #endif | 1428 | #endif |
1395 | #endif | 1429 | #endif |
1396 | 1430 | ||
@@ -1398,7 +1432,7 @@ struct bm_extent { | |||
1398 | * With a value of 8 all IO in one 128K block make it to the same slot of the | 1432 | * With a value of 8 all IO in one 128K block make it to the same slot of the |
1399 | * hash table. */ | 1433 | * hash table. */ |
1400 | #define HT_SHIFT 8 | 1434 | #define HT_SHIFT 8 |
1401 | #define DRBD_MAX_SEGMENT_SIZE (1U<<(9+HT_SHIFT)) | 1435 | #define DRBD_MAX_BIO_SIZE (1U<<(9+HT_SHIFT)) |
1402 | 1436 | ||
1403 | #define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */ | 1437 | #define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */ |
1404 | 1438 | ||
@@ -1410,16 +1444,20 @@ extern int drbd_bm_resize(struct drbd_conf *mdev, sector_t sectors, int set_new | |||
1410 | extern void drbd_bm_cleanup(struct drbd_conf *mdev); | 1444 | extern void drbd_bm_cleanup(struct drbd_conf *mdev); |
1411 | extern void drbd_bm_set_all(struct drbd_conf *mdev); | 1445 | extern void drbd_bm_set_all(struct drbd_conf *mdev); |
1412 | extern void drbd_bm_clear_all(struct drbd_conf *mdev); | 1446 | extern void drbd_bm_clear_all(struct drbd_conf *mdev); |
1447 | /* set/clear/test only a few bits at a time */ | ||
1413 | extern int drbd_bm_set_bits( | 1448 | extern int drbd_bm_set_bits( |
1414 | struct drbd_conf *mdev, unsigned long s, unsigned long e); | 1449 | struct drbd_conf *mdev, unsigned long s, unsigned long e); |
1415 | extern int drbd_bm_clear_bits( | 1450 | extern int drbd_bm_clear_bits( |
1416 | struct drbd_conf *mdev, unsigned long s, unsigned long e); | 1451 | struct drbd_conf *mdev, unsigned long s, unsigned long e); |
1417 | /* bm_set_bits variant for use while holding drbd_bm_lock */ | 1452 | extern int drbd_bm_count_bits( |
1453 | struct drbd_conf *mdev, const unsigned long s, const unsigned long e); | ||
1454 | /* bm_set_bits variant for use while holding drbd_bm_lock, | ||
1455 | * may process the whole bitmap in one go */ | ||
1418 | extern void _drbd_bm_set_bits(struct drbd_conf *mdev, | 1456 | extern void _drbd_bm_set_bits(struct drbd_conf *mdev, |
1419 | const unsigned long s, const unsigned long e); | 1457 | const unsigned long s, const unsigned long e); |
1420 | extern int drbd_bm_test_bit(struct drbd_conf *mdev, unsigned long bitnr); | 1458 | extern int drbd_bm_test_bit(struct drbd_conf *mdev, unsigned long bitnr); |
1421 | extern int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr); | 1459 | extern int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr); |
1422 | extern int drbd_bm_write_sect(struct drbd_conf *mdev, unsigned long enr) __must_hold(local); | 1460 | extern int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local); |
1423 | extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local); | 1461 | extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local); |
1424 | extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local); | 1462 | extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local); |
1425 | extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, | 1463 | extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, |
@@ -1427,6 +1465,8 @@ extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, | |||
1427 | extern size_t drbd_bm_words(struct drbd_conf *mdev); | 1465 | extern size_t drbd_bm_words(struct drbd_conf *mdev); |
1428 | extern unsigned long drbd_bm_bits(struct drbd_conf *mdev); | 1466 | extern unsigned long drbd_bm_bits(struct drbd_conf *mdev); |
1429 | extern sector_t drbd_bm_capacity(struct drbd_conf *mdev); | 1467 | extern sector_t drbd_bm_capacity(struct drbd_conf *mdev); |
1468 | |||
1469 | #define DRBD_END_OF_BITMAP (~(unsigned long)0) | ||
1430 | extern unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo); | 1470 | extern unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo); |
1431 | /* bm_find_next variants for use while you hold drbd_bm_lock() */ | 1471 | /* bm_find_next variants for use while you hold drbd_bm_lock() */ |
1432 | extern unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo); | 1472 | extern unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo); |
@@ -1437,14 +1477,12 @@ extern int drbd_bm_rs_done(struct drbd_conf *mdev); | |||
1437 | /* for receive_bitmap */ | 1477 | /* for receive_bitmap */ |
1438 | extern void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, | 1478 | extern void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, |
1439 | size_t number, unsigned long *buffer); | 1479 | size_t number, unsigned long *buffer); |
1440 | /* for _drbd_send_bitmap and drbd_bm_write_sect */ | 1480 | /* for _drbd_send_bitmap */ |
1441 | extern void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, | 1481 | extern void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, |
1442 | size_t number, unsigned long *buffer); | 1482 | size_t number, unsigned long *buffer); |
1443 | 1483 | ||
1444 | extern void drbd_bm_lock(struct drbd_conf *mdev, char *why); | 1484 | extern void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags); |
1445 | extern void drbd_bm_unlock(struct drbd_conf *mdev); | 1485 | extern void drbd_bm_unlock(struct drbd_conf *mdev); |
1446 | |||
1447 | extern int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e); | ||
1448 | /* drbd_main.c */ | 1486 | /* drbd_main.c */ |
1449 | 1487 | ||
1450 | extern struct kmem_cache *drbd_request_cache; | 1488 | extern struct kmem_cache *drbd_request_cache; |
@@ -1467,7 +1505,7 @@ extern void drbd_free_mdev(struct drbd_conf *mdev); | |||
1467 | extern int proc_details; | 1505 | extern int proc_details; |
1468 | 1506 | ||
1469 | /* drbd_req */ | 1507 | /* drbd_req */ |
1470 | extern int drbd_make_request_26(struct request_queue *q, struct bio *bio); | 1508 | extern int drbd_make_request(struct request_queue *q, struct bio *bio); |
1471 | extern int drbd_read_remote(struct drbd_conf *mdev, struct drbd_request *req); | 1509 | extern int drbd_read_remote(struct drbd_conf *mdev, struct drbd_request *req); |
1472 | extern int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec); | 1510 | extern int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec); |
1473 | extern int is_valid_ar_handle(struct drbd_request *, sector_t); | 1511 | extern int is_valid_ar_handle(struct drbd_request *, sector_t); |
@@ -1482,8 +1520,9 @@ enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = | |||
1482 | extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local); | 1520 | extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local); |
1483 | extern void resync_after_online_grow(struct drbd_conf *); | 1521 | extern void resync_after_online_grow(struct drbd_conf *); |
1484 | extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local); | 1522 | extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local); |
1485 | extern int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, | 1523 | extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev, |
1486 | int force); | 1524 | enum drbd_role new_role, |
1525 | int force); | ||
1487 | extern enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev); | 1526 | extern enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev); |
1488 | extern void drbd_try_outdate_peer_async(struct drbd_conf *mdev); | 1527 | extern void drbd_try_outdate_peer_async(struct drbd_conf *mdev); |
1489 | extern int drbd_khelper(struct drbd_conf *mdev, char *cmd); | 1528 | extern int drbd_khelper(struct drbd_conf *mdev, char *cmd); |
@@ -1499,6 +1538,7 @@ extern int drbd_resync_finished(struct drbd_conf *mdev); | |||
1499 | extern int drbd_md_sync_page_io(struct drbd_conf *mdev, | 1538 | extern int drbd_md_sync_page_io(struct drbd_conf *mdev, |
1500 | struct drbd_backing_dev *bdev, sector_t sector, int rw); | 1539 | struct drbd_backing_dev *bdev, sector_t sector, int rw); |
1501 | extern void drbd_ov_oos_found(struct drbd_conf*, sector_t, int); | 1540 | extern void drbd_ov_oos_found(struct drbd_conf*, sector_t, int); |
1541 | extern void drbd_rs_controller_reset(struct drbd_conf *mdev); | ||
1502 | 1542 | ||
1503 | static inline void ov_oos_print(struct drbd_conf *mdev) | 1543 | static inline void ov_oos_print(struct drbd_conf *mdev) |
1504 | { | 1544 | { |
@@ -1522,21 +1562,23 @@ extern int w_e_end_csum_rs_req(struct drbd_conf *, struct drbd_work *, int); | |||
1522 | extern int w_e_end_ov_reply(struct drbd_conf *, struct drbd_work *, int); | 1562 | extern int w_e_end_ov_reply(struct drbd_conf *, struct drbd_work *, int); |
1523 | extern int w_e_end_ov_req(struct drbd_conf *, struct drbd_work *, int); | 1563 | extern int w_e_end_ov_req(struct drbd_conf *, struct drbd_work *, int); |
1524 | extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int); | 1564 | extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int); |
1525 | extern int w_resync_inactive(struct drbd_conf *, struct drbd_work *, int); | 1565 | extern int w_resync_timer(struct drbd_conf *, struct drbd_work *, int); |
1526 | extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int); | 1566 | extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int); |
1527 | extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int); | 1567 | extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int); |
1528 | extern int w_make_resync_request(struct drbd_conf *, struct drbd_work *, int); | ||
1529 | extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int); | 1568 | extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int); |
1530 | extern int w_send_barrier(struct drbd_conf *, struct drbd_work *, int); | 1569 | extern int w_send_barrier(struct drbd_conf *, struct drbd_work *, int); |
1531 | extern int w_send_read_req(struct drbd_conf *, struct drbd_work *, int); | 1570 | extern int w_send_read_req(struct drbd_conf *, struct drbd_work *, int); |
1532 | extern int w_prev_work_done(struct drbd_conf *, struct drbd_work *, int); | 1571 | extern int w_prev_work_done(struct drbd_conf *, struct drbd_work *, int); |
1533 | extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int); | 1572 | extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int); |
1534 | extern int w_restart_disk_io(struct drbd_conf *, struct drbd_work *, int); | 1573 | extern int w_restart_disk_io(struct drbd_conf *, struct drbd_work *, int); |
1574 | extern int w_send_oos(struct drbd_conf *, struct drbd_work *, int); | ||
1575 | extern int w_start_resync(struct drbd_conf *, struct drbd_work *, int); | ||
1535 | 1576 | ||
1536 | extern void resync_timer_fn(unsigned long data); | 1577 | extern void resync_timer_fn(unsigned long data); |
1578 | extern void start_resync_timer_fn(unsigned long data); | ||
1537 | 1579 | ||
1538 | /* drbd_receiver.c */ | 1580 | /* drbd_receiver.c */ |
1539 | extern int drbd_rs_should_slow_down(struct drbd_conf *mdev); | 1581 | extern int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector); |
1540 | extern int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, | 1582 | extern int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, |
1541 | const unsigned rw, const int fault_type); | 1583 | const unsigned rw, const int fault_type); |
1542 | extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list); | 1584 | extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list); |
@@ -1619,16 +1661,16 @@ extern int drbd_rs_del_all(struct drbd_conf *mdev); | |||
1619 | extern void drbd_rs_failed_io(struct drbd_conf *mdev, | 1661 | extern void drbd_rs_failed_io(struct drbd_conf *mdev, |
1620 | sector_t sector, int size); | 1662 | sector_t sector, int size); |
1621 | extern int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *); | 1663 | extern int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *); |
1664 | extern void drbd_advance_rs_marks(struct drbd_conf *mdev, unsigned long still_to_go); | ||
1622 | extern void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, | 1665 | extern void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, |
1623 | int size, const char *file, const unsigned int line); | 1666 | int size, const char *file, const unsigned int line); |
1624 | #define drbd_set_in_sync(mdev, sector, size) \ | 1667 | #define drbd_set_in_sync(mdev, sector, size) \ |
1625 | __drbd_set_in_sync(mdev, sector, size, __FILE__, __LINE__) | 1668 | __drbd_set_in_sync(mdev, sector, size, __FILE__, __LINE__) |
1626 | extern void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, | 1669 | extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, |
1627 | int size, const char *file, const unsigned int line); | 1670 | int size, const char *file, const unsigned int line); |
1628 | #define drbd_set_out_of_sync(mdev, sector, size) \ | 1671 | #define drbd_set_out_of_sync(mdev, sector, size) \ |
1629 | __drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__) | 1672 | __drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__) |
1630 | extern void drbd_al_apply_to_bm(struct drbd_conf *mdev); | 1673 | extern void drbd_al_apply_to_bm(struct drbd_conf *mdev); |
1631 | extern void drbd_al_to_on_disk_bm(struct drbd_conf *mdev); | ||
1632 | extern void drbd_al_shrink(struct drbd_conf *mdev); | 1674 | extern void drbd_al_shrink(struct drbd_conf *mdev); |
1633 | 1675 | ||
1634 | 1676 | ||
@@ -1747,11 +1789,11 @@ static inline void drbd_state_unlock(struct drbd_conf *mdev) | |||
1747 | wake_up(&mdev->misc_wait); | 1789 | wake_up(&mdev->misc_wait); |
1748 | } | 1790 | } |
1749 | 1791 | ||
1750 | static inline int _drbd_set_state(struct drbd_conf *mdev, | 1792 | static inline enum drbd_state_rv |
1751 | union drbd_state ns, enum chg_state_flags flags, | 1793 | _drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, |
1752 | struct completion *done) | 1794 | enum chg_state_flags flags, struct completion *done) |
1753 | { | 1795 | { |
1754 | int rv; | 1796 | enum drbd_state_rv rv; |
1755 | 1797 | ||
1756 | read_lock(&global_state_lock); | 1798 | read_lock(&global_state_lock); |
1757 | rv = __drbd_set_state(mdev, ns, flags, done); | 1799 | rv = __drbd_set_state(mdev, ns, flags, done); |
@@ -1982,17 +2024,17 @@ static inline int drbd_send_ping_ack(struct drbd_conf *mdev) | |||
1982 | 2024 | ||
1983 | static inline void drbd_thread_stop(struct drbd_thread *thi) | 2025 | static inline void drbd_thread_stop(struct drbd_thread *thi) |
1984 | { | 2026 | { |
1985 | _drbd_thread_stop(thi, FALSE, TRUE); | 2027 | _drbd_thread_stop(thi, false, true); |
1986 | } | 2028 | } |
1987 | 2029 | ||
1988 | static inline void drbd_thread_stop_nowait(struct drbd_thread *thi) | 2030 | static inline void drbd_thread_stop_nowait(struct drbd_thread *thi) |
1989 | { | 2031 | { |
1990 | _drbd_thread_stop(thi, FALSE, FALSE); | 2032 | _drbd_thread_stop(thi, false, false); |
1991 | } | 2033 | } |
1992 | 2034 | ||
1993 | static inline void drbd_thread_restart_nowait(struct drbd_thread *thi) | 2035 | static inline void drbd_thread_restart_nowait(struct drbd_thread *thi) |
1994 | { | 2036 | { |
1995 | _drbd_thread_stop(thi, TRUE, FALSE); | 2037 | _drbd_thread_stop(thi, true, false); |
1996 | } | 2038 | } |
1997 | 2039 | ||
1998 | /* counts how many answer packets packets we expect from our peer, | 2040 | /* counts how many answer packets packets we expect from our peer, |
@@ -2146,17 +2188,18 @@ extern int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins) | |||
2146 | static inline void drbd_get_syncer_progress(struct drbd_conf *mdev, | 2188 | static inline void drbd_get_syncer_progress(struct drbd_conf *mdev, |
2147 | unsigned long *bits_left, unsigned int *per_mil_done) | 2189 | unsigned long *bits_left, unsigned int *per_mil_done) |
2148 | { | 2190 | { |
2149 | /* | 2191 | /* this is to break it at compile time when we change that, in case we |
2150 | * this is to break it at compile time when we change that | 2192 | * want to support more than (1<<32) bits on a 32bit arch. */ |
2151 | * (we may feel 4TB maximum storage per drbd is not enough) | ||
2152 | */ | ||
2153 | typecheck(unsigned long, mdev->rs_total); | 2193 | typecheck(unsigned long, mdev->rs_total); |
2154 | 2194 | ||
2155 | /* note: both rs_total and rs_left are in bits, i.e. in | 2195 | /* note: both rs_total and rs_left are in bits, i.e. in |
2156 | * units of BM_BLOCK_SIZE. | 2196 | * units of BM_BLOCK_SIZE. |
2157 | * for the percentage, we don't care. */ | 2197 | * for the percentage, we don't care. */ |
2158 | 2198 | ||
2159 | *bits_left = drbd_bm_total_weight(mdev) - mdev->rs_failed; | 2199 | if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T) |
2200 | *bits_left = mdev->ov_left; | ||
2201 | else | ||
2202 | *bits_left = drbd_bm_total_weight(mdev) - mdev->rs_failed; | ||
2160 | /* >> 10 to prevent overflow, | 2203 | /* >> 10 to prevent overflow, |
2161 | * +1 to prevent division by zero */ | 2204 | * +1 to prevent division by zero */ |
2162 | if (*bits_left > mdev->rs_total) { | 2205 | if (*bits_left > mdev->rs_total) { |
@@ -2171,10 +2214,19 @@ static inline void drbd_get_syncer_progress(struct drbd_conf *mdev, | |||
2171 | *bits_left, mdev->rs_total, mdev->rs_failed); | 2214 | *bits_left, mdev->rs_total, mdev->rs_failed); |
2172 | *per_mil_done = 0; | 2215 | *per_mil_done = 0; |
2173 | } else { | 2216 | } else { |
2174 | /* make sure the calculation happens in long context */ | 2217 | /* Make sure the division happens in long context. |
2175 | unsigned long tmp = 1000UL - | 2218 | * We allow up to one petabyte storage right now, |
2176 | (*bits_left >> 10)*1000UL | 2219 | * at a granularity of 4k per bit that is 2**38 bits. |
2177 | / ((mdev->rs_total >> 10) + 1UL); | 2220 | * After shift right and multiplication by 1000, |
2221 | * this should still fit easily into a 32bit long, | ||
2222 | * so we don't need a 64bit division on 32bit arch. | ||
2223 | * Note: currently we don't support such large bitmaps on 32bit | ||
2224 | * arch anyways, but no harm done to be prepared for it here. | ||
2225 | */ | ||
2226 | unsigned int shift = mdev->rs_total >= (1ULL << 32) ? 16 : 10; | ||
2227 | unsigned long left = *bits_left >> shift; | ||
2228 | unsigned long total = 1UL + (mdev->rs_total >> shift); | ||
2229 | unsigned long tmp = 1000UL - left * 1000UL/total; | ||
2178 | *per_mil_done = tmp; | 2230 | *per_mil_done = tmp; |
2179 | } | 2231 | } |
2180 | } | 2232 | } |
@@ -2193,8 +2245,9 @@ static inline int drbd_get_max_buffers(struct drbd_conf *mdev) | |||
2193 | return mxb; | 2245 | return mxb; |
2194 | } | 2246 | } |
2195 | 2247 | ||
2196 | static inline int drbd_state_is_stable(union drbd_state s) | 2248 | static inline int drbd_state_is_stable(struct drbd_conf *mdev) |
2197 | { | 2249 | { |
2250 | union drbd_state s = mdev->state; | ||
2198 | 2251 | ||
2199 | /* DO NOT add a default clause, we want the compiler to warn us | 2252 | /* DO NOT add a default clause, we want the compiler to warn us |
2200 | * for any newly introduced state we may have forgotten to add here */ | 2253 | * for any newly introduced state we may have forgotten to add here */ |
@@ -2211,11 +2264,9 @@ static inline int drbd_state_is_stable(union drbd_state s) | |||
2211 | case C_VERIFY_T: | 2264 | case C_VERIFY_T: |
2212 | case C_PAUSED_SYNC_S: | 2265 | case C_PAUSED_SYNC_S: |
2213 | case C_PAUSED_SYNC_T: | 2266 | case C_PAUSED_SYNC_T: |
2214 | /* maybe stable, look at the disk state */ | 2267 | case C_AHEAD: |
2215 | break; | 2268 | case C_BEHIND: |
2216 | 2269 | /* transitional states, IO allowed */ | |
2217 | /* no new io accepted during tansitional states | ||
2218 | * like handshake or teardown */ | ||
2219 | case C_DISCONNECTING: | 2270 | case C_DISCONNECTING: |
2220 | case C_UNCONNECTED: | 2271 | case C_UNCONNECTED: |
2221 | case C_TIMEOUT: | 2272 | case C_TIMEOUT: |
@@ -2226,7 +2277,15 @@ static inline int drbd_state_is_stable(union drbd_state s) | |||
2226 | case C_WF_REPORT_PARAMS: | 2277 | case C_WF_REPORT_PARAMS: |
2227 | case C_STARTING_SYNC_S: | 2278 | case C_STARTING_SYNC_S: |
2228 | case C_STARTING_SYNC_T: | 2279 | case C_STARTING_SYNC_T: |
2280 | break; | ||
2281 | |||
2282 | /* Allow IO in BM exchange states with new protocols */ | ||
2229 | case C_WF_BITMAP_S: | 2283 | case C_WF_BITMAP_S: |
2284 | if (mdev->agreed_pro_version < 96) | ||
2285 | return 0; | ||
2286 | break; | ||
2287 | |||
2288 | /* no new io accepted in these states */ | ||
2230 | case C_WF_BITMAP_T: | 2289 | case C_WF_BITMAP_T: |
2231 | case C_WF_SYNC_UUID: | 2290 | case C_WF_SYNC_UUID: |
2232 | case C_MASK: | 2291 | case C_MASK: |
@@ -2261,41 +2320,47 @@ static inline int is_susp(union drbd_state s) | |||
2261 | return s.susp || s.susp_nod || s.susp_fen; | 2320 | return s.susp || s.susp_nod || s.susp_fen; |
2262 | } | 2321 | } |
2263 | 2322 | ||
2264 | static inline int __inc_ap_bio_cond(struct drbd_conf *mdev) | 2323 | static inline bool may_inc_ap_bio(struct drbd_conf *mdev) |
2265 | { | 2324 | { |
2266 | int mxb = drbd_get_max_buffers(mdev); | 2325 | int mxb = drbd_get_max_buffers(mdev); |
2267 | 2326 | ||
2268 | if (is_susp(mdev->state)) | 2327 | if (is_susp(mdev->state)) |
2269 | return 0; | 2328 | return false; |
2270 | if (test_bit(SUSPEND_IO, &mdev->flags)) | 2329 | if (test_bit(SUSPEND_IO, &mdev->flags)) |
2271 | return 0; | 2330 | return false; |
2272 | 2331 | ||
2273 | /* to avoid potential deadlock or bitmap corruption, | 2332 | /* to avoid potential deadlock or bitmap corruption, |
2274 | * in various places, we only allow new application io | 2333 | * in various places, we only allow new application io |
2275 | * to start during "stable" states. */ | 2334 | * to start during "stable" states. */ |
2276 | 2335 | ||
2277 | /* no new io accepted when attaching or detaching the disk */ | 2336 | /* no new io accepted when attaching or detaching the disk */ |
2278 | if (!drbd_state_is_stable(mdev->state)) | 2337 | if (!drbd_state_is_stable(mdev)) |
2279 | return 0; | 2338 | return false; |
2280 | 2339 | ||
2281 | /* since some older kernels don't have atomic_add_unless, | 2340 | /* since some older kernels don't have atomic_add_unless, |
2282 | * and we are within the spinlock anyways, we have this workaround. */ | 2341 | * and we are within the spinlock anyways, we have this workaround. */ |
2283 | if (atomic_read(&mdev->ap_bio_cnt) > mxb) | 2342 | if (atomic_read(&mdev->ap_bio_cnt) > mxb) |
2284 | return 0; | 2343 | return false; |
2285 | if (test_bit(BITMAP_IO, &mdev->flags)) | 2344 | if (test_bit(BITMAP_IO, &mdev->flags)) |
2286 | return 0; | 2345 | return false; |
2287 | return 1; | 2346 | return true; |
2288 | } | 2347 | } |
2289 | 2348 | ||
2290 | /* I'd like to use wait_event_lock_irq, | 2349 | static inline bool inc_ap_bio_cond(struct drbd_conf *mdev, int count) |
2291 | * but I'm not sure when it got introduced, | ||
2292 | * and not sure when it has 3 or 4 arguments */ | ||
2293 | static inline void inc_ap_bio(struct drbd_conf *mdev, int count) | ||
2294 | { | 2350 | { |
2295 | /* compare with after_state_ch, | 2351 | bool rv = false; |
2296 | * os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S */ | 2352 | |
2297 | DEFINE_WAIT(wait); | 2353 | spin_lock_irq(&mdev->req_lock); |
2354 | rv = may_inc_ap_bio(mdev); | ||
2355 | if (rv) | ||
2356 | atomic_add(count, &mdev->ap_bio_cnt); | ||
2357 | spin_unlock_irq(&mdev->req_lock); | ||
2358 | |||
2359 | return rv; | ||
2360 | } | ||
2298 | 2361 | ||
2362 | static inline void inc_ap_bio(struct drbd_conf *mdev, int count) | ||
2363 | { | ||
2299 | /* we wait here | 2364 | /* we wait here |
2300 | * as long as the device is suspended | 2365 | * as long as the device is suspended |
2301 | * until the bitmap is no longer on the fly during connection | 2366 | * until the bitmap is no longer on the fly during connection |
@@ -2304,16 +2369,7 @@ static inline void inc_ap_bio(struct drbd_conf *mdev, int count) | |||
2304 | * to avoid races with the reconnect code, | 2369 | * to avoid races with the reconnect code, |
2305 | * we need to atomic_inc within the spinlock. */ | 2370 | * we need to atomic_inc within the spinlock. */ |
2306 | 2371 | ||
2307 | spin_lock_irq(&mdev->req_lock); | 2372 | wait_event(mdev->misc_wait, inc_ap_bio_cond(mdev, count)); |
2308 | while (!__inc_ap_bio_cond(mdev)) { | ||
2309 | prepare_to_wait(&mdev->misc_wait, &wait, TASK_UNINTERRUPTIBLE); | ||
2310 | spin_unlock_irq(&mdev->req_lock); | ||
2311 | schedule(); | ||
2312 | finish_wait(&mdev->misc_wait, &wait); | ||
2313 | spin_lock_irq(&mdev->req_lock); | ||
2314 | } | ||
2315 | atomic_add(count, &mdev->ap_bio_cnt); | ||
2316 | spin_unlock_irq(&mdev->req_lock); | ||
2317 | } | 2373 | } |
2318 | 2374 | ||
2319 | static inline void dec_ap_bio(struct drbd_conf *mdev) | 2375 | static inline void dec_ap_bio(struct drbd_conf *mdev) |
@@ -2333,9 +2389,11 @@ static inline void dec_ap_bio(struct drbd_conf *mdev) | |||
2333 | } | 2389 | } |
2334 | } | 2390 | } |
2335 | 2391 | ||
2336 | static inline void drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val) | 2392 | static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val) |
2337 | { | 2393 | { |
2394 | int changed = mdev->ed_uuid != val; | ||
2338 | mdev->ed_uuid = val; | 2395 | mdev->ed_uuid = val; |
2396 | return changed; | ||
2339 | } | 2397 | } |
2340 | 2398 | ||
2341 | static inline int seq_cmp(u32 a, u32 b) | 2399 | static inline int seq_cmp(u32 a, u32 b) |
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 8a43ce0edeed..dfc85f32d317 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c | |||
@@ -85,7 +85,8 @@ MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, " | |||
85 | MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION); | 85 | MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION); |
86 | MODULE_VERSION(REL_VERSION); | 86 | MODULE_VERSION(REL_VERSION); |
87 | MODULE_LICENSE("GPL"); | 87 | MODULE_LICENSE("GPL"); |
88 | MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices (1-255)"); | 88 | MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices (" |
89 | __stringify(DRBD_MINOR_COUNT_MIN) "-" __stringify(DRBD_MINOR_COUNT_MAX) ")"); | ||
89 | MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR); | 90 | MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR); |
90 | 91 | ||
91 | #include <linux/moduleparam.h> | 92 | #include <linux/moduleparam.h> |
@@ -115,7 +116,7 @@ module_param(fault_devs, int, 0644); | |||
115 | #endif | 116 | #endif |
116 | 117 | ||
117 | /* module parameter, defined */ | 118 | /* module parameter, defined */ |
118 | unsigned int minor_count = 32; | 119 | unsigned int minor_count = DRBD_MINOR_COUNT_DEF; |
119 | int disable_sendpage; | 120 | int disable_sendpage; |
120 | int allow_oos; | 121 | int allow_oos; |
121 | unsigned int cn_idx = CN_IDX_DRBD; | 122 | unsigned int cn_idx = CN_IDX_DRBD; |
@@ -335,6 +336,7 @@ bail: | |||
335 | drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); | 336 | drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); |
336 | } | 337 | } |
337 | 338 | ||
339 | |||
338 | /** | 340 | /** |
339 | * _tl_restart() - Walks the transfer log, and applies an action to all requests | 341 | * _tl_restart() - Walks the transfer log, and applies an action to all requests |
340 | * @mdev: DRBD device. | 342 | * @mdev: DRBD device. |
@@ -456,7 +458,7 @@ void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) | |||
456 | } | 458 | } |
457 | 459 | ||
458 | /** | 460 | /** |
459 | * cl_wide_st_chg() - TRUE if the state change is a cluster wide one | 461 | * cl_wide_st_chg() - true if the state change is a cluster wide one |
460 | * @mdev: DRBD device. | 462 | * @mdev: DRBD device. |
461 | * @os: old (current) state. | 463 | * @os: old (current) state. |
462 | * @ns: new (wanted) state. | 464 | * @ns: new (wanted) state. |
@@ -473,12 +475,13 @@ static int cl_wide_st_chg(struct drbd_conf *mdev, | |||
473 | (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S); | 475 | (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S); |
474 | } | 476 | } |
475 | 477 | ||
476 | int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, | 478 | enum drbd_state_rv |
477 | union drbd_state mask, union drbd_state val) | 479 | drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, |
480 | union drbd_state mask, union drbd_state val) | ||
478 | { | 481 | { |
479 | unsigned long flags; | 482 | unsigned long flags; |
480 | union drbd_state os, ns; | 483 | union drbd_state os, ns; |
481 | int rv; | 484 | enum drbd_state_rv rv; |
482 | 485 | ||
483 | spin_lock_irqsave(&mdev->req_lock, flags); | 486 | spin_lock_irqsave(&mdev->req_lock, flags); |
484 | os = mdev->state; | 487 | os = mdev->state; |
@@ -502,20 +505,22 @@ void drbd_force_state(struct drbd_conf *mdev, | |||
502 | drbd_change_state(mdev, CS_HARD, mask, val); | 505 | drbd_change_state(mdev, CS_HARD, mask, val); |
503 | } | 506 | } |
504 | 507 | ||
505 | static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns); | 508 | static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); |
506 | static int is_valid_state_transition(struct drbd_conf *, | 509 | static enum drbd_state_rv is_valid_state_transition(struct drbd_conf *, |
507 | union drbd_state, union drbd_state); | 510 | union drbd_state, |
511 | union drbd_state); | ||
508 | static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, | 512 | static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, |
509 | union drbd_state ns, const char **warn_sync_abort); | 513 | union drbd_state ns, const char **warn_sync_abort); |
510 | int drbd_send_state_req(struct drbd_conf *, | 514 | int drbd_send_state_req(struct drbd_conf *, |
511 | union drbd_state, union drbd_state); | 515 | union drbd_state, union drbd_state); |
512 | 516 | ||
513 | static enum drbd_state_ret_codes _req_st_cond(struct drbd_conf *mdev, | 517 | static enum drbd_state_rv |
514 | union drbd_state mask, union drbd_state val) | 518 | _req_st_cond(struct drbd_conf *mdev, union drbd_state mask, |
519 | union drbd_state val) | ||
515 | { | 520 | { |
516 | union drbd_state os, ns; | 521 | union drbd_state os, ns; |
517 | unsigned long flags; | 522 | unsigned long flags; |
518 | int rv; | 523 | enum drbd_state_rv rv; |
519 | 524 | ||
520 | if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags)) | 525 | if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags)) |
521 | return SS_CW_SUCCESS; | 526 | return SS_CW_SUCCESS; |
@@ -536,7 +541,7 @@ static enum drbd_state_ret_codes _req_st_cond(struct drbd_conf *mdev, | |||
536 | if (rv == SS_SUCCESS) { | 541 | if (rv == SS_SUCCESS) { |
537 | rv = is_valid_state_transition(mdev, ns, os); | 542 | rv = is_valid_state_transition(mdev, ns, os); |
538 | if (rv == SS_SUCCESS) | 543 | if (rv == SS_SUCCESS) |
539 | rv = 0; /* cont waiting, otherwise fail. */ | 544 | rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ |
540 | } | 545 | } |
541 | } | 546 | } |
542 | spin_unlock_irqrestore(&mdev->req_lock, flags); | 547 | spin_unlock_irqrestore(&mdev->req_lock, flags); |
@@ -554,14 +559,14 @@ static enum drbd_state_ret_codes _req_st_cond(struct drbd_conf *mdev, | |||
554 | * Should not be called directly, use drbd_request_state() or | 559 | * Should not be called directly, use drbd_request_state() or |
555 | * _drbd_request_state(). | 560 | * _drbd_request_state(). |
556 | */ | 561 | */ |
557 | static int drbd_req_state(struct drbd_conf *mdev, | 562 | static enum drbd_state_rv |
558 | union drbd_state mask, union drbd_state val, | 563 | drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, |
559 | enum chg_state_flags f) | 564 | union drbd_state val, enum chg_state_flags f) |
560 | { | 565 | { |
561 | struct completion done; | 566 | struct completion done; |
562 | unsigned long flags; | 567 | unsigned long flags; |
563 | union drbd_state os, ns; | 568 | union drbd_state os, ns; |
564 | int rv; | 569 | enum drbd_state_rv rv; |
565 | 570 | ||
566 | init_completion(&done); | 571 | init_completion(&done); |
567 | 572 | ||
@@ -636,10 +641,11 @@ abort: | |||
636 | * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE | 641 | * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE |
637 | * flag, or when logging of failed state change requests is not desired. | 642 | * flag, or when logging of failed state change requests is not desired. |
638 | */ | 643 | */ |
639 | int _drbd_request_state(struct drbd_conf *mdev, union drbd_state mask, | 644 | enum drbd_state_rv |
640 | union drbd_state val, enum chg_state_flags f) | 645 | _drbd_request_state(struct drbd_conf *mdev, union drbd_state mask, |
646 | union drbd_state val, enum chg_state_flags f) | ||
641 | { | 647 | { |
642 | int rv; | 648 | enum drbd_state_rv rv; |
643 | 649 | ||
644 | wait_event(mdev->state_wait, | 650 | wait_event(mdev->state_wait, |
645 | (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE); | 651 | (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE); |
@@ -663,8 +669,8 @@ static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns) | |||
663 | ); | 669 | ); |
664 | } | 670 | } |
665 | 671 | ||
666 | void print_st_err(struct drbd_conf *mdev, | 672 | void print_st_err(struct drbd_conf *mdev, union drbd_state os, |
667 | union drbd_state os, union drbd_state ns, int err) | 673 | union drbd_state ns, enum drbd_state_rv err) |
668 | { | 674 | { |
669 | if (err == SS_IN_TRANSIENT_STATE) | 675 | if (err == SS_IN_TRANSIENT_STATE) |
670 | return; | 676 | return; |
@@ -674,32 +680,18 @@ void print_st_err(struct drbd_conf *mdev, | |||
674 | } | 680 | } |
675 | 681 | ||
676 | 682 | ||
677 | #define drbd_peer_str drbd_role_str | ||
678 | #define drbd_pdsk_str drbd_disk_str | ||
679 | |||
680 | #define drbd_susp_str(A) ((A) ? "1" : "0") | ||
681 | #define drbd_aftr_isp_str(A) ((A) ? "1" : "0") | ||
682 | #define drbd_peer_isp_str(A) ((A) ? "1" : "0") | ||
683 | #define drbd_user_isp_str(A) ((A) ? "1" : "0") | ||
684 | |||
685 | #define PSC(A) \ | ||
686 | ({ if (ns.A != os.A) { \ | ||
687 | pbp += sprintf(pbp, #A "( %s -> %s ) ", \ | ||
688 | drbd_##A##_str(os.A), \ | ||
689 | drbd_##A##_str(ns.A)); \ | ||
690 | } }) | ||
691 | |||
692 | /** | 683 | /** |
693 | * is_valid_state() - Returns an SS_ error code if ns is not valid | 684 | * is_valid_state() - Returns an SS_ error code if ns is not valid |
694 | * @mdev: DRBD device. | 685 | * @mdev: DRBD device. |
695 | * @ns: State to consider. | 686 | * @ns: State to consider. |
696 | */ | 687 | */ |
697 | static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns) | 688 | static enum drbd_state_rv |
689 | is_valid_state(struct drbd_conf *mdev, union drbd_state ns) | ||
698 | { | 690 | { |
699 | /* See drbd_state_sw_errors in drbd_strings.c */ | 691 | /* See drbd_state_sw_errors in drbd_strings.c */ |
700 | 692 | ||
701 | enum drbd_fencing_p fp; | 693 | enum drbd_fencing_p fp; |
702 | int rv = SS_SUCCESS; | 694 | enum drbd_state_rv rv = SS_SUCCESS; |
703 | 695 | ||
704 | fp = FP_DONT_CARE; | 696 | fp = FP_DONT_CARE; |
705 | if (get_ldev(mdev)) { | 697 | if (get_ldev(mdev)) { |
@@ -762,10 +754,11 @@ static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns) | |||
762 | * @ns: new state. | 754 | * @ns: new state. |
763 | * @os: old state. | 755 | * @os: old state. |
764 | */ | 756 | */ |
765 | static int is_valid_state_transition(struct drbd_conf *mdev, | 757 | static enum drbd_state_rv |
766 | union drbd_state ns, union drbd_state os) | 758 | is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns, |
759 | union drbd_state os) | ||
767 | { | 760 | { |
768 | int rv = SS_SUCCESS; | 761 | enum drbd_state_rv rv = SS_SUCCESS; |
769 | 762 | ||
770 | if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) && | 763 | if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) && |
771 | os.conn > C_CONNECTED) | 764 | os.conn > C_CONNECTED) |
@@ -800,6 +793,10 @@ static int is_valid_state_transition(struct drbd_conf *mdev, | |||
800 | os.conn < C_CONNECTED) | 793 | os.conn < C_CONNECTED) |
801 | rv = SS_NEED_CONNECTION; | 794 | rv = SS_NEED_CONNECTION; |
802 | 795 | ||
796 | if ((ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE) | ||
797 | && os.conn < C_WF_REPORT_PARAMS) | ||
798 | rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */ | ||
799 | |||
803 | return rv; | 800 | return rv; |
804 | } | 801 | } |
805 | 802 | ||
@@ -817,6 +814,7 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state | |||
817 | union drbd_state ns, const char **warn_sync_abort) | 814 | union drbd_state ns, const char **warn_sync_abort) |
818 | { | 815 | { |
819 | enum drbd_fencing_p fp; | 816 | enum drbd_fencing_p fp; |
817 | enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max; | ||
820 | 818 | ||
821 | fp = FP_DONT_CARE; | 819 | fp = FP_DONT_CARE; |
822 | if (get_ldev(mdev)) { | 820 | if (get_ldev(mdev)) { |
@@ -869,56 +867,6 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state | |||
869 | ns.conn = C_CONNECTED; | 867 | ns.conn = C_CONNECTED; |
870 | } | 868 | } |
871 | 869 | ||
872 | if (ns.conn >= C_CONNECTED && | ||
873 | ((ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED) || | ||
874 | (ns.disk == D_NEGOTIATING && ns.conn == C_WF_BITMAP_T))) { | ||
875 | switch (ns.conn) { | ||
876 | case C_WF_BITMAP_T: | ||
877 | case C_PAUSED_SYNC_T: | ||
878 | ns.disk = D_OUTDATED; | ||
879 | break; | ||
880 | case C_CONNECTED: | ||
881 | case C_WF_BITMAP_S: | ||
882 | case C_SYNC_SOURCE: | ||
883 | case C_PAUSED_SYNC_S: | ||
884 | ns.disk = D_UP_TO_DATE; | ||
885 | break; | ||
886 | case C_SYNC_TARGET: | ||
887 | ns.disk = D_INCONSISTENT; | ||
888 | dev_warn(DEV, "Implicitly set disk state Inconsistent!\n"); | ||
889 | break; | ||
890 | } | ||
891 | if (os.disk == D_OUTDATED && ns.disk == D_UP_TO_DATE) | ||
892 | dev_warn(DEV, "Implicitly set disk from Outdated to UpToDate\n"); | ||
893 | } | ||
894 | |||
895 | if (ns.conn >= C_CONNECTED && | ||
896 | (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED)) { | ||
897 | switch (ns.conn) { | ||
898 | case C_CONNECTED: | ||
899 | case C_WF_BITMAP_T: | ||
900 | case C_PAUSED_SYNC_T: | ||
901 | case C_SYNC_TARGET: | ||
902 | ns.pdsk = D_UP_TO_DATE; | ||
903 | break; | ||
904 | case C_WF_BITMAP_S: | ||
905 | case C_PAUSED_SYNC_S: | ||
906 | /* remap any consistent state to D_OUTDATED, | ||
907 | * but disallow "upgrade" of not even consistent states. | ||
908 | */ | ||
909 | ns.pdsk = | ||
910 | (D_DISKLESS < os.pdsk && os.pdsk < D_OUTDATED) | ||
911 | ? os.pdsk : D_OUTDATED; | ||
912 | break; | ||
913 | case C_SYNC_SOURCE: | ||
914 | ns.pdsk = D_INCONSISTENT; | ||
915 | dev_warn(DEV, "Implicitly set pdsk Inconsistent!\n"); | ||
916 | break; | ||
917 | } | ||
918 | if (os.pdsk == D_OUTDATED && ns.pdsk == D_UP_TO_DATE) | ||
919 | dev_warn(DEV, "Implicitly set pdsk from Outdated to UpToDate\n"); | ||
920 | } | ||
921 | |||
922 | /* Connection breaks down before we finished "Negotiating" */ | 870 | /* Connection breaks down before we finished "Negotiating" */ |
923 | if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING && | 871 | if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING && |
924 | get_ldev_if_state(mdev, D_NEGOTIATING)) { | 872 | get_ldev_if_state(mdev, D_NEGOTIATING)) { |
@@ -933,6 +881,94 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state | |||
933 | put_ldev(mdev); | 881 | put_ldev(mdev); |
934 | } | 882 | } |
935 | 883 | ||
884 | /* D_CONSISTENT and D_OUTDATED vanish when we get connected */ | ||
885 | if (ns.conn >= C_CONNECTED && ns.conn < C_AHEAD) { | ||
886 | if (ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED) | ||
887 | ns.disk = D_UP_TO_DATE; | ||
888 | if (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED) | ||
889 | ns.pdsk = D_UP_TO_DATE; | ||
890 | } | ||
891 | |||
892 | /* Implications of the connection stat on the disk states */ | ||
893 | disk_min = D_DISKLESS; | ||
894 | disk_max = D_UP_TO_DATE; | ||
895 | pdsk_min = D_INCONSISTENT; | ||
896 | pdsk_max = D_UNKNOWN; | ||
897 | switch ((enum drbd_conns)ns.conn) { | ||
898 | case C_WF_BITMAP_T: | ||
899 | case C_PAUSED_SYNC_T: | ||
900 | case C_STARTING_SYNC_T: | ||
901 | case C_WF_SYNC_UUID: | ||
902 | case C_BEHIND: | ||
903 | disk_min = D_INCONSISTENT; | ||
904 | disk_max = D_OUTDATED; | ||
905 | pdsk_min = D_UP_TO_DATE; | ||
906 | pdsk_max = D_UP_TO_DATE; | ||
907 | break; | ||
908 | case C_VERIFY_S: | ||
909 | case C_VERIFY_T: | ||
910 | disk_min = D_UP_TO_DATE; | ||
911 | disk_max = D_UP_TO_DATE; | ||
912 | pdsk_min = D_UP_TO_DATE; | ||
913 | pdsk_max = D_UP_TO_DATE; | ||
914 | break; | ||
915 | case C_CONNECTED: | ||
916 | disk_min = D_DISKLESS; | ||
917 | disk_max = D_UP_TO_DATE; | ||
918 | pdsk_min = D_DISKLESS; | ||
919 | pdsk_max = D_UP_TO_DATE; | ||
920 | break; | ||
921 | case C_WF_BITMAP_S: | ||
922 | case C_PAUSED_SYNC_S: | ||
923 | case C_STARTING_SYNC_S: | ||
924 | case C_AHEAD: | ||
925 | disk_min = D_UP_TO_DATE; | ||
926 | disk_max = D_UP_TO_DATE; | ||
927 | pdsk_min = D_INCONSISTENT; | ||
928 | pdsk_max = D_CONSISTENT; /* D_OUTDATED would be nice. But explicit outdate necessary*/ | ||
929 | break; | ||
930 | case C_SYNC_TARGET: | ||
931 | disk_min = D_INCONSISTENT; | ||
932 | disk_max = D_INCONSISTENT; | ||
933 | pdsk_min = D_UP_TO_DATE; | ||
934 | pdsk_max = D_UP_TO_DATE; | ||
935 | break; | ||
936 | case C_SYNC_SOURCE: | ||
937 | disk_min = D_UP_TO_DATE; | ||
938 | disk_max = D_UP_TO_DATE; | ||
939 | pdsk_min = D_INCONSISTENT; | ||
940 | pdsk_max = D_INCONSISTENT; | ||
941 | break; | ||
942 | case C_STANDALONE: | ||
943 | case C_DISCONNECTING: | ||
944 | case C_UNCONNECTED: | ||
945 | case C_TIMEOUT: | ||
946 | case C_BROKEN_PIPE: | ||
947 | case C_NETWORK_FAILURE: | ||
948 | case C_PROTOCOL_ERROR: | ||
949 | case C_TEAR_DOWN: | ||
950 | case C_WF_CONNECTION: | ||
951 | case C_WF_REPORT_PARAMS: | ||
952 | case C_MASK: | ||
953 | break; | ||
954 | } | ||
955 | if (ns.disk > disk_max) | ||
956 | ns.disk = disk_max; | ||
957 | |||
958 | if (ns.disk < disk_min) { | ||
959 | dev_warn(DEV, "Implicitly set disk from %s to %s\n", | ||
960 | drbd_disk_str(ns.disk), drbd_disk_str(disk_min)); | ||
961 | ns.disk = disk_min; | ||
962 | } | ||
963 | if (ns.pdsk > pdsk_max) | ||
964 | ns.pdsk = pdsk_max; | ||
965 | |||
966 | if (ns.pdsk < pdsk_min) { | ||
967 | dev_warn(DEV, "Implicitly set pdsk from %s to %s\n", | ||
968 | drbd_disk_str(ns.pdsk), drbd_disk_str(pdsk_min)); | ||
969 | ns.pdsk = pdsk_min; | ||
970 | } | ||
971 | |||
936 | if (fp == FP_STONITH && | 972 | if (fp == FP_STONITH && |
937 | (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) && | 973 | (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) && |
938 | !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED)) | 974 | !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED)) |
@@ -961,6 +997,10 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state | |||
961 | /* helper for __drbd_set_state */ | 997 | /* helper for __drbd_set_state */ |
962 | static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs) | 998 | static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs) |
963 | { | 999 | { |
1000 | if (mdev->agreed_pro_version < 90) | ||
1001 | mdev->ov_start_sector = 0; | ||
1002 | mdev->rs_total = drbd_bm_bits(mdev); | ||
1003 | mdev->ov_position = 0; | ||
964 | if (cs == C_VERIFY_T) { | 1004 | if (cs == C_VERIFY_T) { |
965 | /* starting online verify from an arbitrary position | 1005 | /* starting online verify from an arbitrary position |
966 | * does not fit well into the existing protocol. | 1006 | * does not fit well into the existing protocol. |
@@ -970,11 +1010,15 @@ static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs) | |||
970 | mdev->ov_start_sector = ~(sector_t)0; | 1010 | mdev->ov_start_sector = ~(sector_t)0; |
971 | } else { | 1011 | } else { |
972 | unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector); | 1012 | unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector); |
973 | if (bit >= mdev->rs_total) | 1013 | if (bit >= mdev->rs_total) { |
974 | mdev->ov_start_sector = | 1014 | mdev->ov_start_sector = |
975 | BM_BIT_TO_SECT(mdev->rs_total - 1); | 1015 | BM_BIT_TO_SECT(mdev->rs_total - 1); |
1016 | mdev->rs_total = 1; | ||
1017 | } else | ||
1018 | mdev->rs_total -= bit; | ||
976 | mdev->ov_position = mdev->ov_start_sector; | 1019 | mdev->ov_position = mdev->ov_start_sector; |
977 | } | 1020 | } |
1021 | mdev->ov_left = mdev->rs_total; | ||
978 | } | 1022 | } |
979 | 1023 | ||
980 | static void drbd_resume_al(struct drbd_conf *mdev) | 1024 | static void drbd_resume_al(struct drbd_conf *mdev) |
@@ -992,12 +1036,12 @@ static void drbd_resume_al(struct drbd_conf *mdev) | |||
992 | * | 1036 | * |
993 | * Caller needs to hold req_lock, and global_state_lock. Do not call directly. | 1037 | * Caller needs to hold req_lock, and global_state_lock. Do not call directly. |
994 | */ | 1038 | */ |
995 | int __drbd_set_state(struct drbd_conf *mdev, | 1039 | enum drbd_state_rv |
996 | union drbd_state ns, enum chg_state_flags flags, | 1040 | __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, |
997 | struct completion *done) | 1041 | enum chg_state_flags flags, struct completion *done) |
998 | { | 1042 | { |
999 | union drbd_state os; | 1043 | union drbd_state os; |
1000 | int rv = SS_SUCCESS; | 1044 | enum drbd_state_rv rv = SS_SUCCESS; |
1001 | const char *warn_sync_abort = NULL; | 1045 | const char *warn_sync_abort = NULL; |
1002 | struct after_state_chg_work *ascw; | 1046 | struct after_state_chg_work *ascw; |
1003 | 1047 | ||
@@ -1033,22 +1077,46 @@ int __drbd_set_state(struct drbd_conf *mdev, | |||
1033 | dev_warn(DEV, "%s aborted.\n", warn_sync_abort); | 1077 | dev_warn(DEV, "%s aborted.\n", warn_sync_abort); |
1034 | 1078 | ||
1035 | { | 1079 | { |
1036 | char *pbp, pb[300]; | 1080 | char *pbp, pb[300]; |
1037 | pbp = pb; | 1081 | pbp = pb; |
1038 | *pbp = 0; | 1082 | *pbp = 0; |
1039 | PSC(role); | 1083 | if (ns.role != os.role) |
1040 | PSC(peer); | 1084 | pbp += sprintf(pbp, "role( %s -> %s ) ", |
1041 | PSC(conn); | 1085 | drbd_role_str(os.role), |
1042 | PSC(disk); | 1086 | drbd_role_str(ns.role)); |
1043 | PSC(pdsk); | 1087 | if (ns.peer != os.peer) |
1044 | if (is_susp(ns) != is_susp(os)) | 1088 | pbp += sprintf(pbp, "peer( %s -> %s ) ", |
1045 | pbp += sprintf(pbp, "susp( %s -> %s ) ", | 1089 | drbd_role_str(os.peer), |
1046 | drbd_susp_str(is_susp(os)), | 1090 | drbd_role_str(ns.peer)); |
1047 | drbd_susp_str(is_susp(ns))); | 1091 | if (ns.conn != os.conn) |
1048 | PSC(aftr_isp); | 1092 | pbp += sprintf(pbp, "conn( %s -> %s ) ", |
1049 | PSC(peer_isp); | 1093 | drbd_conn_str(os.conn), |
1050 | PSC(user_isp); | 1094 | drbd_conn_str(ns.conn)); |
1051 | dev_info(DEV, "%s\n", pb); | 1095 | if (ns.disk != os.disk) |
1096 | pbp += sprintf(pbp, "disk( %s -> %s ) ", | ||
1097 | drbd_disk_str(os.disk), | ||
1098 | drbd_disk_str(ns.disk)); | ||
1099 | if (ns.pdsk != os.pdsk) | ||
1100 | pbp += sprintf(pbp, "pdsk( %s -> %s ) ", | ||
1101 | drbd_disk_str(os.pdsk), | ||
1102 | drbd_disk_str(ns.pdsk)); | ||
1103 | if (is_susp(ns) != is_susp(os)) | ||
1104 | pbp += sprintf(pbp, "susp( %d -> %d ) ", | ||
1105 | is_susp(os), | ||
1106 | is_susp(ns)); | ||
1107 | if (ns.aftr_isp != os.aftr_isp) | ||
1108 | pbp += sprintf(pbp, "aftr_isp( %d -> %d ) ", | ||
1109 | os.aftr_isp, | ||
1110 | ns.aftr_isp); | ||
1111 | if (ns.peer_isp != os.peer_isp) | ||
1112 | pbp += sprintf(pbp, "peer_isp( %d -> %d ) ", | ||
1113 | os.peer_isp, | ||
1114 | ns.peer_isp); | ||
1115 | if (ns.user_isp != os.user_isp) | ||
1116 | pbp += sprintf(pbp, "user_isp( %d -> %d ) ", | ||
1117 | os.user_isp, | ||
1118 | ns.user_isp); | ||
1119 | dev_info(DEV, "%s\n", pb); | ||
1052 | } | 1120 | } |
1053 | 1121 | ||
1054 | /* solve the race between becoming unconfigured, | 1122 | /* solve the race between becoming unconfigured, |
@@ -1074,6 +1142,10 @@ int __drbd_set_state(struct drbd_conf *mdev, | |||
1074 | atomic_inc(&mdev->local_cnt); | 1142 | atomic_inc(&mdev->local_cnt); |
1075 | 1143 | ||
1076 | mdev->state = ns; | 1144 | mdev->state = ns; |
1145 | |||
1146 | if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING) | ||
1147 | drbd_print_uuids(mdev, "attached to UUIDs"); | ||
1148 | |||
1077 | wake_up(&mdev->misc_wait); | 1149 | wake_up(&mdev->misc_wait); |
1078 | wake_up(&mdev->state_wait); | 1150 | wake_up(&mdev->state_wait); |
1079 | 1151 | ||
@@ -1081,7 +1153,7 @@ int __drbd_set_state(struct drbd_conf *mdev, | |||
1081 | if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) && | 1153 | if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) && |
1082 | ns.conn < C_CONNECTED) { | 1154 | ns.conn < C_CONNECTED) { |
1083 | mdev->ov_start_sector = | 1155 | mdev->ov_start_sector = |
1084 | BM_BIT_TO_SECT(mdev->rs_total - mdev->ov_left); | 1156 | BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left); |
1085 | dev_info(DEV, "Online Verify reached sector %llu\n", | 1157 | dev_info(DEV, "Online Verify reached sector %llu\n", |
1086 | (unsigned long long)mdev->ov_start_sector); | 1158 | (unsigned long long)mdev->ov_start_sector); |
1087 | } | 1159 | } |
@@ -1106,14 +1178,7 @@ int __drbd_set_state(struct drbd_conf *mdev, | |||
1106 | unsigned long now = jiffies; | 1178 | unsigned long now = jiffies; |
1107 | int i; | 1179 | int i; |
1108 | 1180 | ||
1109 | mdev->ov_position = 0; | 1181 | set_ov_position(mdev, ns.conn); |
1110 | mdev->rs_total = drbd_bm_bits(mdev); | ||
1111 | if (mdev->agreed_pro_version >= 90) | ||
1112 | set_ov_position(mdev, ns.conn); | ||
1113 | else | ||
1114 | mdev->ov_start_sector = 0; | ||
1115 | mdev->ov_left = mdev->rs_total | ||
1116 | - BM_SECT_TO_BIT(mdev->ov_position); | ||
1117 | mdev->rs_start = now; | 1182 | mdev->rs_start = now; |
1118 | mdev->rs_last_events = 0; | 1183 | mdev->rs_last_events = 0; |
1119 | mdev->rs_last_sect_ev = 0; | 1184 | mdev->rs_last_sect_ev = 0; |
@@ -1121,10 +1186,12 @@ int __drbd_set_state(struct drbd_conf *mdev, | |||
1121 | mdev->ov_last_oos_start = 0; | 1186 | mdev->ov_last_oos_start = 0; |
1122 | 1187 | ||
1123 | for (i = 0; i < DRBD_SYNC_MARKS; i++) { | 1188 | for (i = 0; i < DRBD_SYNC_MARKS; i++) { |
1124 | mdev->rs_mark_left[i] = mdev->rs_total; | 1189 | mdev->rs_mark_left[i] = mdev->ov_left; |
1125 | mdev->rs_mark_time[i] = now; | 1190 | mdev->rs_mark_time[i] = now; |
1126 | } | 1191 | } |
1127 | 1192 | ||
1193 | drbd_rs_controller_reset(mdev); | ||
1194 | |||
1128 | if (ns.conn == C_VERIFY_S) { | 1195 | if (ns.conn == C_VERIFY_S) { |
1129 | dev_info(DEV, "Starting Online Verify from sector %llu\n", | 1196 | dev_info(DEV, "Starting Online Verify from sector %llu\n", |
1130 | (unsigned long long)mdev->ov_position); | 1197 | (unsigned long long)mdev->ov_position); |
@@ -1228,6 +1295,26 @@ static void abw_start_sync(struct drbd_conf *mdev, int rv) | |||
1228 | } | 1295 | } |
1229 | } | 1296 | } |
1230 | 1297 | ||
1298 | int drbd_bitmap_io_from_worker(struct drbd_conf *mdev, | ||
1299 | int (*io_fn)(struct drbd_conf *), | ||
1300 | char *why, enum bm_flag flags) | ||
1301 | { | ||
1302 | int rv; | ||
1303 | |||
1304 | D_ASSERT(current == mdev->worker.task); | ||
1305 | |||
1306 | /* open coded non-blocking drbd_suspend_io(mdev); */ | ||
1307 | set_bit(SUSPEND_IO, &mdev->flags); | ||
1308 | |||
1309 | drbd_bm_lock(mdev, why, flags); | ||
1310 | rv = io_fn(mdev); | ||
1311 | drbd_bm_unlock(mdev); | ||
1312 | |||
1313 | drbd_resume_io(mdev); | ||
1314 | |||
1315 | return rv; | ||
1316 | } | ||
1317 | |||
1231 | /** | 1318 | /** |
1232 | * after_state_ch() - Perform after state change actions that may sleep | 1319 | * after_state_ch() - Perform after state change actions that may sleep |
1233 | * @mdev: DRBD device. | 1320 | * @mdev: DRBD device. |
@@ -1266,16 +1353,14 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1266 | 1353 | ||
1267 | nsm.i = -1; | 1354 | nsm.i = -1; |
1268 | if (ns.susp_nod) { | 1355 | if (ns.susp_nod) { |
1269 | if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) { | 1356 | if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) |
1270 | if (ns.conn == C_CONNECTED) | 1357 | what = resend; |
1271 | what = resend, nsm.susp_nod = 0; | ||
1272 | else /* ns.conn > C_CONNECTED */ | ||
1273 | dev_err(DEV, "Unexpected Resynd going on!\n"); | ||
1274 | } | ||
1275 | 1358 | ||
1276 | if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING) | 1359 | if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING) |
1277 | what = restart_frozen_disk_io, nsm.susp_nod = 0; | 1360 | what = restart_frozen_disk_io; |
1278 | 1361 | ||
1362 | if (what != nothing) | ||
1363 | nsm.susp_nod = 0; | ||
1279 | } | 1364 | } |
1280 | 1365 | ||
1281 | if (ns.susp_fen) { | 1366 | if (ns.susp_fen) { |
@@ -1306,13 +1391,30 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1306 | spin_unlock_irq(&mdev->req_lock); | 1391 | spin_unlock_irq(&mdev->req_lock); |
1307 | } | 1392 | } |
1308 | 1393 | ||
1394 | /* Became sync source. With protocol >= 96, we still need to send out | ||
1395 | * the sync uuid now. Need to do that before any drbd_send_state, or | ||
1396 | * the other side may go "paused sync" before receiving the sync uuids, | ||
1397 | * which is unexpected. */ | ||
1398 | if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) && | ||
1399 | (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) && | ||
1400 | mdev->agreed_pro_version >= 96 && get_ldev(mdev)) { | ||
1401 | drbd_gen_and_send_sync_uuid(mdev); | ||
1402 | put_ldev(mdev); | ||
1403 | } | ||
1404 | |||
1309 | /* Do not change the order of the if above and the two below... */ | 1405 | /* Do not change the order of the if above and the two below... */ |
1310 | if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */ | 1406 | if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */ |
1311 | drbd_send_uuids(mdev); | 1407 | drbd_send_uuids(mdev); |
1312 | drbd_send_state(mdev); | 1408 | drbd_send_state(mdev); |
1313 | } | 1409 | } |
1314 | if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S) | 1410 | /* No point in queuing send_bitmap if we don't have a connection |
1315 | drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL, "send_bitmap (WFBitMapS)"); | 1411 | * anymore, so check also the _current_ state, not only the new state |
1412 | * at the time this work was queued. */ | ||
1413 | if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S && | ||
1414 | mdev->state.conn == C_WF_BITMAP_S) | ||
1415 | drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL, | ||
1416 | "send_bitmap (WFBitMapS)", | ||
1417 | BM_LOCKED_TEST_ALLOWED); | ||
1316 | 1418 | ||
1317 | /* Lost contact to peer's copy of the data */ | 1419 | /* Lost contact to peer's copy of the data */ |
1318 | if ((os.pdsk >= D_INCONSISTENT && | 1420 | if ((os.pdsk >= D_INCONSISTENT && |
@@ -1343,7 +1445,23 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1343 | 1445 | ||
1344 | /* D_DISKLESS Peer becomes secondary */ | 1446 | /* D_DISKLESS Peer becomes secondary */ |
1345 | if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) | 1447 | if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) |
1346 | drbd_al_to_on_disk_bm(mdev); | 1448 | /* We may still be Primary ourselves. |
1449 | * No harm done if the bitmap still changes, | ||
1450 | * redirtied pages will follow later. */ | ||
1451 | drbd_bitmap_io_from_worker(mdev, &drbd_bm_write, | ||
1452 | "demote diskless peer", BM_LOCKED_SET_ALLOWED); | ||
1453 | put_ldev(mdev); | ||
1454 | } | ||
1455 | |||
1456 | /* Write out all changed bits on demote. | ||
1457 | * Though, no need to da that just yet | ||
1458 | * if there is a resync going on still */ | ||
1459 | if (os.role == R_PRIMARY && ns.role == R_SECONDARY && | ||
1460 | mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) { | ||
1461 | /* No changes to the bitmap expected this time, so assert that, | ||
1462 | * even though no harm was done if it did change. */ | ||
1463 | drbd_bitmap_io_from_worker(mdev, &drbd_bm_write, | ||
1464 | "demote", BM_LOCKED_TEST_ALLOWED); | ||
1347 | put_ldev(mdev); | 1465 | put_ldev(mdev); |
1348 | } | 1466 | } |
1349 | 1467 | ||
@@ -1371,15 +1489,23 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1371 | if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED) | 1489 | if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED) |
1372 | drbd_send_state(mdev); | 1490 | drbd_send_state(mdev); |
1373 | 1491 | ||
1492 | if (os.conn != C_AHEAD && ns.conn == C_AHEAD) | ||
1493 | drbd_send_state(mdev); | ||
1494 | |||
1374 | /* We are in the progress to start a full sync... */ | 1495 | /* We are in the progress to start a full sync... */ |
1375 | if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || | 1496 | if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || |
1376 | (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S)) | 1497 | (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S)) |
1377 | drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, &abw_start_sync, "set_n_write from StartingSync"); | 1498 | /* no other bitmap changes expected during this phase */ |
1499 | drbd_queue_bitmap_io(mdev, | ||
1500 | &drbd_bmio_set_n_write, &abw_start_sync, | ||
1501 | "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED); | ||
1378 | 1502 | ||
1379 | /* We are invalidating our self... */ | 1503 | /* We are invalidating our self... */ |
1380 | if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED && | 1504 | if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED && |
1381 | os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT) | 1505 | os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT) |
1382 | drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, "set_n_write from invalidate"); | 1506 | /* other bitmap operation expected during this phase */ |
1507 | drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, | ||
1508 | "set_n_write from invalidate", BM_LOCKED_MASK); | ||
1383 | 1509 | ||
1384 | /* first half of local IO error, failure to attach, | 1510 | /* first half of local IO error, failure to attach, |
1385 | * or administrative detach */ | 1511 | * or administrative detach */ |
@@ -1434,8 +1560,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1434 | 1560 | ||
1435 | if (drbd_send_state(mdev)) | 1561 | if (drbd_send_state(mdev)) |
1436 | dev_warn(DEV, "Notified peer that I'm now diskless.\n"); | 1562 | dev_warn(DEV, "Notified peer that I'm now diskless.\n"); |
1437 | else | ||
1438 | dev_err(DEV, "Sending state for being diskless failed\n"); | ||
1439 | /* corresponding get_ldev in __drbd_set_state | 1563 | /* corresponding get_ldev in __drbd_set_state |
1440 | * this may finaly trigger drbd_ldev_destroy. */ | 1564 | * this may finaly trigger drbd_ldev_destroy. */ |
1441 | put_ldev(mdev); | 1565 | put_ldev(mdev); |
@@ -1459,6 +1583,19 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1459 | if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) | 1583 | if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) |
1460 | drbd_send_state(mdev); | 1584 | drbd_send_state(mdev); |
1461 | 1585 | ||
1586 | /* This triggers bitmap writeout of potentially still unwritten pages | ||
1587 | * if the resync finished cleanly, or aborted because of peer disk | ||
1588 | * failure, or because of connection loss. | ||
1589 | * For resync aborted because of local disk failure, we cannot do | ||
1590 | * any bitmap writeout anymore. | ||
1591 | * No harm done if some bits change during this phase. | ||
1592 | */ | ||
1593 | if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) { | ||
1594 | drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, | ||
1595 | "write from resync_finished", BM_LOCKED_SET_ALLOWED); | ||
1596 | put_ldev(mdev); | ||
1597 | } | ||
1598 | |||
1462 | /* free tl_hash if we Got thawed and are C_STANDALONE */ | 1599 | /* free tl_hash if we Got thawed and are C_STANDALONE */ |
1463 | if (ns.conn == C_STANDALONE && !is_susp(ns) && mdev->tl_hash) | 1600 | if (ns.conn == C_STANDALONE && !is_susp(ns) && mdev->tl_hash) |
1464 | drbd_free_tl_hash(mdev); | 1601 | drbd_free_tl_hash(mdev); |
@@ -1559,7 +1696,7 @@ int drbd_thread_start(struct drbd_thread *thi) | |||
1559 | if (!try_module_get(THIS_MODULE)) { | 1696 | if (!try_module_get(THIS_MODULE)) { |
1560 | dev_err(DEV, "Failed to get module reference in drbd_thread_start\n"); | 1697 | dev_err(DEV, "Failed to get module reference in drbd_thread_start\n"); |
1561 | spin_unlock_irqrestore(&thi->t_lock, flags); | 1698 | spin_unlock_irqrestore(&thi->t_lock, flags); |
1562 | return FALSE; | 1699 | return false; |
1563 | } | 1700 | } |
1564 | 1701 | ||
1565 | init_completion(&thi->stop); | 1702 | init_completion(&thi->stop); |
@@ -1576,7 +1713,7 @@ int drbd_thread_start(struct drbd_thread *thi) | |||
1576 | dev_err(DEV, "Couldn't start thread\n"); | 1713 | dev_err(DEV, "Couldn't start thread\n"); |
1577 | 1714 | ||
1578 | module_put(THIS_MODULE); | 1715 | module_put(THIS_MODULE); |
1579 | return FALSE; | 1716 | return false; |
1580 | } | 1717 | } |
1581 | spin_lock_irqsave(&thi->t_lock, flags); | 1718 | spin_lock_irqsave(&thi->t_lock, flags); |
1582 | thi->task = nt; | 1719 | thi->task = nt; |
@@ -1596,7 +1733,7 @@ int drbd_thread_start(struct drbd_thread *thi) | |||
1596 | break; | 1733 | break; |
1597 | } | 1734 | } |
1598 | 1735 | ||
1599 | return TRUE; | 1736 | return true; |
1600 | } | 1737 | } |
1601 | 1738 | ||
1602 | 1739 | ||
@@ -1694,8 +1831,8 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, | |||
1694 | { | 1831 | { |
1695 | int sent, ok; | 1832 | int sent, ok; |
1696 | 1833 | ||
1697 | ERR_IF(!h) return FALSE; | 1834 | ERR_IF(!h) return false; |
1698 | ERR_IF(!size) return FALSE; | 1835 | ERR_IF(!size) return false; |
1699 | 1836 | ||
1700 | h->magic = BE_DRBD_MAGIC; | 1837 | h->magic = BE_DRBD_MAGIC; |
1701 | h->command = cpu_to_be16(cmd); | 1838 | h->command = cpu_to_be16(cmd); |
@@ -1704,8 +1841,8 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, | |||
1704 | sent = drbd_send(mdev, sock, h, size, msg_flags); | 1841 | sent = drbd_send(mdev, sock, h, size, msg_flags); |
1705 | 1842 | ||
1706 | ok = (sent == size); | 1843 | ok = (sent == size); |
1707 | if (!ok) | 1844 | if (!ok && !signal_pending(current)) |
1708 | dev_err(DEV, "short sent %s size=%d sent=%d\n", | 1845 | dev_warn(DEV, "short sent %s size=%d sent=%d\n", |
1709 | cmdname(cmd), (int)size, sent); | 1846 | cmdname(cmd), (int)size, sent); |
1710 | return ok; | 1847 | return ok; |
1711 | } | 1848 | } |
@@ -1840,7 +1977,7 @@ int drbd_send_protocol(struct drbd_conf *mdev) | |||
1840 | else { | 1977 | else { |
1841 | dev_err(DEV, "--dry-run is not supported by peer"); | 1978 | dev_err(DEV, "--dry-run is not supported by peer"); |
1842 | kfree(p); | 1979 | kfree(p); |
1843 | return 0; | 1980 | return -1; |
1844 | } | 1981 | } |
1845 | } | 1982 | } |
1846 | p->conn_flags = cpu_to_be32(cf); | 1983 | p->conn_flags = cpu_to_be32(cf); |
@@ -1888,12 +2025,36 @@ int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev) | |||
1888 | return _drbd_send_uuids(mdev, 8); | 2025 | return _drbd_send_uuids(mdev, 8); |
1889 | } | 2026 | } |
1890 | 2027 | ||
2028 | void drbd_print_uuids(struct drbd_conf *mdev, const char *text) | ||
2029 | { | ||
2030 | if (get_ldev_if_state(mdev, D_NEGOTIATING)) { | ||
2031 | u64 *uuid = mdev->ldev->md.uuid; | ||
2032 | dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX\n", | ||
2033 | text, | ||
2034 | (unsigned long long)uuid[UI_CURRENT], | ||
2035 | (unsigned long long)uuid[UI_BITMAP], | ||
2036 | (unsigned long long)uuid[UI_HISTORY_START], | ||
2037 | (unsigned long long)uuid[UI_HISTORY_END]); | ||
2038 | put_ldev(mdev); | ||
2039 | } else { | ||
2040 | dev_info(DEV, "%s effective data uuid: %016llX\n", | ||
2041 | text, | ||
2042 | (unsigned long long)mdev->ed_uuid); | ||
2043 | } | ||
2044 | } | ||
1891 | 2045 | ||
1892 | int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val) | 2046 | int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev) |
1893 | { | 2047 | { |
1894 | struct p_rs_uuid p; | 2048 | struct p_rs_uuid p; |
2049 | u64 uuid; | ||
1895 | 2050 | ||
1896 | p.uuid = cpu_to_be64(val); | 2051 | D_ASSERT(mdev->state.disk == D_UP_TO_DATE); |
2052 | |||
2053 | uuid = mdev->ldev->md.uuid[UI_BITMAP] + UUID_NEW_BM_OFFSET; | ||
2054 | drbd_uuid_set(mdev, UI_BITMAP, uuid); | ||
2055 | drbd_print_uuids(mdev, "updated sync UUID"); | ||
2056 | drbd_md_sync(mdev); | ||
2057 | p.uuid = cpu_to_be64(uuid); | ||
1897 | 2058 | ||
1898 | return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID, | 2059 | return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID, |
1899 | (struct p_header80 *)&p, sizeof(p)); | 2060 | (struct p_header80 *)&p, sizeof(p)); |
@@ -1921,7 +2082,7 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl | |||
1921 | p.d_size = cpu_to_be64(d_size); | 2082 | p.d_size = cpu_to_be64(d_size); |
1922 | p.u_size = cpu_to_be64(u_size); | 2083 | p.u_size = cpu_to_be64(u_size); |
1923 | p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev)); | 2084 | p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev)); |
1924 | p.max_segment_size = cpu_to_be32(queue_max_segment_size(mdev->rq_queue)); | 2085 | p.max_bio_size = cpu_to_be32(queue_max_hw_sectors(mdev->rq_queue) << 9); |
1925 | p.queue_order_type = cpu_to_be16(q_order_type); | 2086 | p.queue_order_type = cpu_to_be16(q_order_type); |
1926 | p.dds_flags = cpu_to_be16(flags); | 2087 | p.dds_flags = cpu_to_be16(flags); |
1927 | 2088 | ||
@@ -1972,7 +2133,7 @@ int drbd_send_state_req(struct drbd_conf *mdev, | |||
1972 | (struct p_header80 *)&p, sizeof(p)); | 2133 | (struct p_header80 *)&p, sizeof(p)); |
1973 | } | 2134 | } |
1974 | 2135 | ||
1975 | int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode) | 2136 | int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode) |
1976 | { | 2137 | { |
1977 | struct p_req_state_reply p; | 2138 | struct p_req_state_reply p; |
1978 | 2139 | ||
@@ -2076,9 +2237,15 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev, | |||
2076 | return len; | 2237 | return len; |
2077 | } | 2238 | } |
2078 | 2239 | ||
2079 | enum { OK, FAILED, DONE } | 2240 | /** |
2241 | * send_bitmap_rle_or_plain | ||
2242 | * | ||
2243 | * Return 0 when done, 1 when another iteration is needed, and a negative error | ||
2244 | * code upon failure. | ||
2245 | */ | ||
2246 | static int | ||
2080 | send_bitmap_rle_or_plain(struct drbd_conf *mdev, | 2247 | send_bitmap_rle_or_plain(struct drbd_conf *mdev, |
2081 | struct p_header80 *h, struct bm_xfer_ctx *c) | 2248 | struct p_header80 *h, struct bm_xfer_ctx *c) |
2082 | { | 2249 | { |
2083 | struct p_compressed_bm *p = (void*)h; | 2250 | struct p_compressed_bm *p = (void*)h; |
2084 | unsigned long num_words; | 2251 | unsigned long num_words; |
@@ -2088,7 +2255,7 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, | |||
2088 | len = fill_bitmap_rle_bits(mdev, p, c); | 2255 | len = fill_bitmap_rle_bits(mdev, p, c); |
2089 | 2256 | ||
2090 | if (len < 0) | 2257 | if (len < 0) |
2091 | return FAILED; | 2258 | return -EIO; |
2092 | 2259 | ||
2093 | if (len) { | 2260 | if (len) { |
2094 | DCBP_set_code(p, RLE_VLI_Bits); | 2261 | DCBP_set_code(p, RLE_VLI_Bits); |
@@ -2118,11 +2285,14 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, | |||
2118 | if (c->bit_offset > c->bm_bits) | 2285 | if (c->bit_offset > c->bm_bits) |
2119 | c->bit_offset = c->bm_bits; | 2286 | c->bit_offset = c->bm_bits; |
2120 | } | 2287 | } |
2121 | ok = ok ? ((len == 0) ? DONE : OK) : FAILED; | 2288 | if (ok) { |
2122 | 2289 | if (len == 0) { | |
2123 | if (ok == DONE) | 2290 | INFO_bm_xfer_stats(mdev, "send", c); |
2124 | INFO_bm_xfer_stats(mdev, "send", c); | 2291 | return 0; |
2125 | return ok; | 2292 | } else |
2293 | return 1; | ||
2294 | } | ||
2295 | return -EIO; | ||
2126 | } | 2296 | } |
2127 | 2297 | ||
2128 | /* See the comment at receive_bitmap() */ | 2298 | /* See the comment at receive_bitmap() */ |
@@ -2130,16 +2300,16 @@ int _drbd_send_bitmap(struct drbd_conf *mdev) | |||
2130 | { | 2300 | { |
2131 | struct bm_xfer_ctx c; | 2301 | struct bm_xfer_ctx c; |
2132 | struct p_header80 *p; | 2302 | struct p_header80 *p; |
2133 | int ret; | 2303 | int err; |
2134 | 2304 | ||
2135 | ERR_IF(!mdev->bitmap) return FALSE; | 2305 | ERR_IF(!mdev->bitmap) return false; |
2136 | 2306 | ||
2137 | /* maybe we should use some per thread scratch page, | 2307 | /* maybe we should use some per thread scratch page, |
2138 | * and allocate that during initial device creation? */ | 2308 | * and allocate that during initial device creation? */ |
2139 | p = (struct p_header80 *) __get_free_page(GFP_NOIO); | 2309 | p = (struct p_header80 *) __get_free_page(GFP_NOIO); |
2140 | if (!p) { | 2310 | if (!p) { |
2141 | dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__); | 2311 | dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__); |
2142 | return FALSE; | 2312 | return false; |
2143 | } | 2313 | } |
2144 | 2314 | ||
2145 | if (get_ldev(mdev)) { | 2315 | if (get_ldev(mdev)) { |
@@ -2165,11 +2335,11 @@ int _drbd_send_bitmap(struct drbd_conf *mdev) | |||
2165 | }; | 2335 | }; |
2166 | 2336 | ||
2167 | do { | 2337 | do { |
2168 | ret = send_bitmap_rle_or_plain(mdev, p, &c); | 2338 | err = send_bitmap_rle_or_plain(mdev, p, &c); |
2169 | } while (ret == OK); | 2339 | } while (err > 0); |
2170 | 2340 | ||
2171 | free_page((unsigned long) p); | 2341 | free_page((unsigned long) p); |
2172 | return (ret == DONE); | 2342 | return err == 0; |
2173 | } | 2343 | } |
2174 | 2344 | ||
2175 | int drbd_send_bitmap(struct drbd_conf *mdev) | 2345 | int drbd_send_bitmap(struct drbd_conf *mdev) |
@@ -2192,7 +2362,7 @@ int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size) | |||
2192 | p.set_size = cpu_to_be32(set_size); | 2362 | p.set_size = cpu_to_be32(set_size); |
2193 | 2363 | ||
2194 | if (mdev->state.conn < C_CONNECTED) | 2364 | if (mdev->state.conn < C_CONNECTED) |
2195 | return FALSE; | 2365 | return false; |
2196 | ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK, | 2366 | ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK, |
2197 | (struct p_header80 *)&p, sizeof(p)); | 2367 | (struct p_header80 *)&p, sizeof(p)); |
2198 | return ok; | 2368 | return ok; |
@@ -2220,7 +2390,7 @@ static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd, | |||
2220 | p.seq_num = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq)); | 2390 | p.seq_num = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq)); |
2221 | 2391 | ||
2222 | if (!mdev->meta.socket || mdev->state.conn < C_CONNECTED) | 2392 | if (!mdev->meta.socket || mdev->state.conn < C_CONNECTED) |
2223 | return FALSE; | 2393 | return false; |
2224 | ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd, | 2394 | ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd, |
2225 | (struct p_header80 *)&p, sizeof(p)); | 2395 | (struct p_header80 *)&p, sizeof(p)); |
2226 | return ok; | 2396 | return ok; |
@@ -2326,8 +2496,8 @@ int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size) | |||
2326 | } | 2496 | } |
2327 | 2497 | ||
2328 | /* called on sndtimeo | 2498 | /* called on sndtimeo |
2329 | * returns FALSE if we should retry, | 2499 | * returns false if we should retry, |
2330 | * TRUE if we think connection is dead | 2500 | * true if we think connection is dead |
2331 | */ | 2501 | */ |
2332 | static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *sock) | 2502 | static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *sock) |
2333 | { | 2503 | { |
@@ -2340,7 +2510,7 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket * | |||
2340 | || mdev->state.conn < C_CONNECTED; | 2510 | || mdev->state.conn < C_CONNECTED; |
2341 | 2511 | ||
2342 | if (drop_it) | 2512 | if (drop_it) |
2343 | return TRUE; | 2513 | return true; |
2344 | 2514 | ||
2345 | drop_it = !--mdev->ko_count; | 2515 | drop_it = !--mdev->ko_count; |
2346 | if (!drop_it) { | 2516 | if (!drop_it) { |
@@ -2531,13 +2701,39 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) | |||
2531 | if (ok && dgs) { | 2701 | if (ok && dgs) { |
2532 | dgb = mdev->int_dig_out; | 2702 | dgb = mdev->int_dig_out; |
2533 | drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb); | 2703 | drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb); |
2534 | ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0); | 2704 | ok = dgs == drbd_send(mdev, mdev->data.socket, dgb, dgs, 0); |
2535 | } | 2705 | } |
2536 | if (ok) { | 2706 | if (ok) { |
2537 | if (mdev->net_conf->wire_protocol == DRBD_PROT_A) | 2707 | /* For protocol A, we have to memcpy the payload into |
2708 | * socket buffers, as we may complete right away | ||
2709 | * as soon as we handed it over to tcp, at which point the data | ||
2710 | * pages may become invalid. | ||
2711 | * | ||
2712 | * For data-integrity enabled, we copy it as well, so we can be | ||
2713 | * sure that even if the bio pages may still be modified, it | ||
2714 | * won't change the data on the wire, thus if the digest checks | ||
2715 | * out ok after sending on this side, but does not fit on the | ||
2716 | * receiving side, we sure have detected corruption elsewhere. | ||
2717 | */ | ||
2718 | if (mdev->net_conf->wire_protocol == DRBD_PROT_A || dgs) | ||
2538 | ok = _drbd_send_bio(mdev, req->master_bio); | 2719 | ok = _drbd_send_bio(mdev, req->master_bio); |
2539 | else | 2720 | else |
2540 | ok = _drbd_send_zc_bio(mdev, req->master_bio); | 2721 | ok = _drbd_send_zc_bio(mdev, req->master_bio); |
2722 | |||
2723 | /* double check digest, sometimes buffers have been modified in flight. */ | ||
2724 | if (dgs > 0 && dgs <= 64) { | ||
2725 | /* 64 byte, 512 bit, is the larges digest size | ||
2726 | * currently supported in kernel crypto. */ | ||
2727 | unsigned char digest[64]; | ||
2728 | drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, digest); | ||
2729 | if (memcmp(mdev->int_dig_out, digest, dgs)) { | ||
2730 | dev_warn(DEV, | ||
2731 | "Digest mismatch, buffer modified by upper layers during write: %llus +%u\n", | ||
2732 | (unsigned long long)req->sector, req->size); | ||
2733 | } | ||
2734 | } /* else if (dgs > 64) { | ||
2735 | ... Be noisy about digest too large ... | ||
2736 | } */ | ||
2541 | } | 2737 | } |
2542 | 2738 | ||
2543 | drbd_put_data_sock(mdev); | 2739 | drbd_put_data_sock(mdev); |
@@ -2587,7 +2783,7 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, | |||
2587 | if (ok && dgs) { | 2783 | if (ok && dgs) { |
2588 | dgb = mdev->int_dig_out; | 2784 | dgb = mdev->int_dig_out; |
2589 | drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb); | 2785 | drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb); |
2590 | ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0); | 2786 | ok = dgs == drbd_send(mdev, mdev->data.socket, dgb, dgs, 0); |
2591 | } | 2787 | } |
2592 | if (ok) | 2788 | if (ok) |
2593 | ok = _drbd_send_zc_ee(mdev, e); | 2789 | ok = _drbd_send_zc_ee(mdev, e); |
@@ -2597,6 +2793,16 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, | |||
2597 | return ok; | 2793 | return ok; |
2598 | } | 2794 | } |
2599 | 2795 | ||
2796 | int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req) | ||
2797 | { | ||
2798 | struct p_block_desc p; | ||
2799 | |||
2800 | p.sector = cpu_to_be64(req->sector); | ||
2801 | p.blksize = cpu_to_be32(req->size); | ||
2802 | |||
2803 | return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OUT_OF_SYNC, &p.head, sizeof(p)); | ||
2804 | } | ||
2805 | |||
2600 | /* | 2806 | /* |
2601 | drbd_send distinguishes two cases: | 2807 | drbd_send distinguishes two cases: |
2602 | 2808 | ||
@@ -2770,6 +2976,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) | |||
2770 | atomic_set(&mdev->pp_in_use_by_net, 0); | 2976 | atomic_set(&mdev->pp_in_use_by_net, 0); |
2771 | atomic_set(&mdev->rs_sect_in, 0); | 2977 | atomic_set(&mdev->rs_sect_in, 0); |
2772 | atomic_set(&mdev->rs_sect_ev, 0); | 2978 | atomic_set(&mdev->rs_sect_ev, 0); |
2979 | atomic_set(&mdev->ap_in_flight, 0); | ||
2773 | 2980 | ||
2774 | mutex_init(&mdev->md_io_mutex); | 2981 | mutex_init(&mdev->md_io_mutex); |
2775 | mutex_init(&mdev->data.mutex); | 2982 | mutex_init(&mdev->data.mutex); |
@@ -2798,19 +3005,27 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) | |||
2798 | INIT_LIST_HEAD(&mdev->unplug_work.list); | 3005 | INIT_LIST_HEAD(&mdev->unplug_work.list); |
2799 | INIT_LIST_HEAD(&mdev->go_diskless.list); | 3006 | INIT_LIST_HEAD(&mdev->go_diskless.list); |
2800 | INIT_LIST_HEAD(&mdev->md_sync_work.list); | 3007 | INIT_LIST_HEAD(&mdev->md_sync_work.list); |
3008 | INIT_LIST_HEAD(&mdev->start_resync_work.list); | ||
2801 | INIT_LIST_HEAD(&mdev->bm_io_work.w.list); | 3009 | INIT_LIST_HEAD(&mdev->bm_io_work.w.list); |
2802 | 3010 | ||
2803 | mdev->resync_work.cb = w_resync_inactive; | 3011 | mdev->resync_work.cb = w_resync_timer; |
2804 | mdev->unplug_work.cb = w_send_write_hint; | 3012 | mdev->unplug_work.cb = w_send_write_hint; |
2805 | mdev->go_diskless.cb = w_go_diskless; | 3013 | mdev->go_diskless.cb = w_go_diskless; |
2806 | mdev->md_sync_work.cb = w_md_sync; | 3014 | mdev->md_sync_work.cb = w_md_sync; |
2807 | mdev->bm_io_work.w.cb = w_bitmap_io; | 3015 | mdev->bm_io_work.w.cb = w_bitmap_io; |
3016 | mdev->start_resync_work.cb = w_start_resync; | ||
2808 | init_timer(&mdev->resync_timer); | 3017 | init_timer(&mdev->resync_timer); |
2809 | init_timer(&mdev->md_sync_timer); | 3018 | init_timer(&mdev->md_sync_timer); |
3019 | init_timer(&mdev->start_resync_timer); | ||
3020 | init_timer(&mdev->request_timer); | ||
2810 | mdev->resync_timer.function = resync_timer_fn; | 3021 | mdev->resync_timer.function = resync_timer_fn; |
2811 | mdev->resync_timer.data = (unsigned long) mdev; | 3022 | mdev->resync_timer.data = (unsigned long) mdev; |
2812 | mdev->md_sync_timer.function = md_sync_timer_fn; | 3023 | mdev->md_sync_timer.function = md_sync_timer_fn; |
2813 | mdev->md_sync_timer.data = (unsigned long) mdev; | 3024 | mdev->md_sync_timer.data = (unsigned long) mdev; |
3025 | mdev->start_resync_timer.function = start_resync_timer_fn; | ||
3026 | mdev->start_resync_timer.data = (unsigned long) mdev; | ||
3027 | mdev->request_timer.function = request_timer_fn; | ||
3028 | mdev->request_timer.data = (unsigned long) mdev; | ||
2814 | 3029 | ||
2815 | init_waitqueue_head(&mdev->misc_wait); | 3030 | init_waitqueue_head(&mdev->misc_wait); |
2816 | init_waitqueue_head(&mdev->state_wait); | 3031 | init_waitqueue_head(&mdev->state_wait); |
@@ -2881,6 +3096,8 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev) | |||
2881 | D_ASSERT(list_empty(&mdev->resync_work.list)); | 3096 | D_ASSERT(list_empty(&mdev->resync_work.list)); |
2882 | D_ASSERT(list_empty(&mdev->unplug_work.list)); | 3097 | D_ASSERT(list_empty(&mdev->unplug_work.list)); |
2883 | D_ASSERT(list_empty(&mdev->go_diskless.list)); | 3098 | D_ASSERT(list_empty(&mdev->go_diskless.list)); |
3099 | |||
3100 | drbd_set_defaults(mdev); | ||
2884 | } | 3101 | } |
2885 | 3102 | ||
2886 | 3103 | ||
@@ -2923,7 +3140,7 @@ static void drbd_destroy_mempools(void) | |||
2923 | static int drbd_create_mempools(void) | 3140 | static int drbd_create_mempools(void) |
2924 | { | 3141 | { |
2925 | struct page *page; | 3142 | struct page *page; |
2926 | const int number = (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE) * minor_count; | 3143 | const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count; |
2927 | int i; | 3144 | int i; |
2928 | 3145 | ||
2929 | /* prepare our caches and mempools */ | 3146 | /* prepare our caches and mempools */ |
@@ -3087,11 +3304,20 @@ static void drbd_cleanup(void) | |||
3087 | 3304 | ||
3088 | unregister_reboot_notifier(&drbd_notifier); | 3305 | unregister_reboot_notifier(&drbd_notifier); |
3089 | 3306 | ||
3307 | /* first remove proc, | ||
3308 | * drbdsetup uses it's presence to detect | ||
3309 | * whether DRBD is loaded. | ||
3310 | * If we would get stuck in proc removal, | ||
3311 | * but have netlink already deregistered, | ||
3312 | * some drbdsetup commands may wait forever | ||
3313 | * for an answer. | ||
3314 | */ | ||
3315 | if (drbd_proc) | ||
3316 | remove_proc_entry("drbd", NULL); | ||
3317 | |||
3090 | drbd_nl_cleanup(); | 3318 | drbd_nl_cleanup(); |
3091 | 3319 | ||
3092 | if (minor_table) { | 3320 | if (minor_table) { |
3093 | if (drbd_proc) | ||
3094 | remove_proc_entry("drbd", NULL); | ||
3095 | i = minor_count; | 3321 | i = minor_count; |
3096 | while (i--) | 3322 | while (i--) |
3097 | drbd_delete_device(i); | 3323 | drbd_delete_device(i); |
@@ -3119,7 +3345,7 @@ static int drbd_congested(void *congested_data, int bdi_bits) | |||
3119 | char reason = '-'; | 3345 | char reason = '-'; |
3120 | int r = 0; | 3346 | int r = 0; |
3121 | 3347 | ||
3122 | if (!__inc_ap_bio_cond(mdev)) { | 3348 | if (!may_inc_ap_bio(mdev)) { |
3123 | /* DRBD has frozen IO */ | 3349 | /* DRBD has frozen IO */ |
3124 | r = bdi_bits; | 3350 | r = bdi_bits; |
3125 | reason = 'd'; | 3351 | reason = 'd'; |
@@ -3172,7 +3398,7 @@ struct drbd_conf *drbd_new_device(unsigned int minor) | |||
3172 | goto out_no_disk; | 3398 | goto out_no_disk; |
3173 | mdev->vdisk = disk; | 3399 | mdev->vdisk = disk; |
3174 | 3400 | ||
3175 | set_disk_ro(disk, TRUE); | 3401 | set_disk_ro(disk, true); |
3176 | 3402 | ||
3177 | disk->queue = q; | 3403 | disk->queue = q; |
3178 | disk->major = DRBD_MAJOR; | 3404 | disk->major = DRBD_MAJOR; |
@@ -3188,8 +3414,8 @@ struct drbd_conf *drbd_new_device(unsigned int minor) | |||
3188 | q->backing_dev_info.congested_fn = drbd_congested; | 3414 | q->backing_dev_info.congested_fn = drbd_congested; |
3189 | q->backing_dev_info.congested_data = mdev; | 3415 | q->backing_dev_info.congested_data = mdev; |
3190 | 3416 | ||
3191 | blk_queue_make_request(q, drbd_make_request_26); | 3417 | blk_queue_make_request(q, drbd_make_request); |
3192 | blk_queue_max_segment_size(q, DRBD_MAX_SEGMENT_SIZE); | 3418 | blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE >> 9); |
3193 | blk_queue_bounce_limit(q, BLK_BOUNCE_ANY); | 3419 | blk_queue_bounce_limit(q, BLK_BOUNCE_ANY); |
3194 | blk_queue_merge_bvec(q, drbd_merge_bvec); | 3420 | blk_queue_merge_bvec(q, drbd_merge_bvec); |
3195 | q->queue_lock = &mdev->req_lock; | 3421 | q->queue_lock = &mdev->req_lock; |
@@ -3251,6 +3477,7 @@ void drbd_free_mdev(struct drbd_conf *mdev) | |||
3251 | put_disk(mdev->vdisk); | 3477 | put_disk(mdev->vdisk); |
3252 | blk_cleanup_queue(mdev->rq_queue); | 3478 | blk_cleanup_queue(mdev->rq_queue); |
3253 | free_cpumask_var(mdev->cpu_mask); | 3479 | free_cpumask_var(mdev->cpu_mask); |
3480 | drbd_free_tl_hash(mdev); | ||
3254 | kfree(mdev); | 3481 | kfree(mdev); |
3255 | } | 3482 | } |
3256 | 3483 | ||
@@ -3266,7 +3493,7 @@ int __init drbd_init(void) | |||
3266 | return -EINVAL; | 3493 | return -EINVAL; |
3267 | } | 3494 | } |
3268 | 3495 | ||
3269 | if (1 > minor_count || minor_count > 255) { | 3496 | if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) { |
3270 | printk(KERN_ERR | 3497 | printk(KERN_ERR |
3271 | "drbd: invalid minor_count (%d)\n", minor_count); | 3498 | "drbd: invalid minor_count (%d)\n", minor_count); |
3272 | #ifdef MODULE | 3499 | #ifdef MODULE |
@@ -3448,7 +3675,7 @@ void drbd_md_sync(struct drbd_conf *mdev) | |||
3448 | if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { | 3675 | if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { |
3449 | /* this was a try anyways ... */ | 3676 | /* this was a try anyways ... */ |
3450 | dev_err(DEV, "meta data update failed!\n"); | 3677 | dev_err(DEV, "meta data update failed!\n"); |
3451 | drbd_chk_io_error(mdev, 1, TRUE); | 3678 | drbd_chk_io_error(mdev, 1, true); |
3452 | } | 3679 | } |
3453 | 3680 | ||
3454 | /* Update mdev->ldev->md.la_size_sect, | 3681 | /* Update mdev->ldev->md.la_size_sect, |
@@ -3464,7 +3691,7 @@ void drbd_md_sync(struct drbd_conf *mdev) | |||
3464 | * @mdev: DRBD device. | 3691 | * @mdev: DRBD device. |
3465 | * @bdev: Device from which the meta data should be read in. | 3692 | * @bdev: Device from which the meta data should be read in. |
3466 | * | 3693 | * |
3467 | * Return 0 (NO_ERROR) on success, and an enum drbd_ret_codes in case | 3694 | * Return 0 (NO_ERROR) on success, and an enum drbd_ret_code in case |
3468 | * something goes wrong. Currently only: ERR_IO_MD_DISK, ERR_MD_INVALID. | 3695 | * something goes wrong. Currently only: ERR_IO_MD_DISK, ERR_MD_INVALID. |
3469 | */ | 3696 | */ |
3470 | int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) | 3697 | int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) |
@@ -3534,28 +3761,6 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) | |||
3534 | return rv; | 3761 | return rv; |
3535 | } | 3762 | } |
3536 | 3763 | ||
3537 | static void debug_drbd_uuid(struct drbd_conf *mdev, enum drbd_uuid_index index) | ||
3538 | { | ||
3539 | static char *uuid_str[UI_EXTENDED_SIZE] = { | ||
3540 | [UI_CURRENT] = "CURRENT", | ||
3541 | [UI_BITMAP] = "BITMAP", | ||
3542 | [UI_HISTORY_START] = "HISTORY_START", | ||
3543 | [UI_HISTORY_END] = "HISTORY_END", | ||
3544 | [UI_SIZE] = "SIZE", | ||
3545 | [UI_FLAGS] = "FLAGS", | ||
3546 | }; | ||
3547 | |||
3548 | if (index >= UI_EXTENDED_SIZE) { | ||
3549 | dev_warn(DEV, " uuid_index >= EXTENDED_SIZE\n"); | ||
3550 | return; | ||
3551 | } | ||
3552 | |||
3553 | dynamic_dev_dbg(DEV, " uuid[%s] now %016llX\n", | ||
3554 | uuid_str[index], | ||
3555 | (unsigned long long)mdev->ldev->md.uuid[index]); | ||
3556 | } | ||
3557 | |||
3558 | |||
3559 | /** | 3764 | /** |
3560 | * drbd_md_mark_dirty() - Mark meta data super block as dirty | 3765 | * drbd_md_mark_dirty() - Mark meta data super block as dirty |
3561 | * @mdev: DRBD device. | 3766 | * @mdev: DRBD device. |
@@ -3585,10 +3790,8 @@ static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local) | |||
3585 | { | 3790 | { |
3586 | int i; | 3791 | int i; |
3587 | 3792 | ||
3588 | for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++) { | 3793 | for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++) |
3589 | mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i]; | 3794 | mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i]; |
3590 | debug_drbd_uuid(mdev, i+1); | ||
3591 | } | ||
3592 | } | 3795 | } |
3593 | 3796 | ||
3594 | void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) | 3797 | void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) |
@@ -3603,7 +3806,6 @@ void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) | |||
3603 | } | 3806 | } |
3604 | 3807 | ||
3605 | mdev->ldev->md.uuid[idx] = val; | 3808 | mdev->ldev->md.uuid[idx] = val; |
3606 | debug_drbd_uuid(mdev, idx); | ||
3607 | drbd_md_mark_dirty(mdev); | 3809 | drbd_md_mark_dirty(mdev); |
3608 | } | 3810 | } |
3609 | 3811 | ||
@@ -3613,7 +3815,6 @@ void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) | |||
3613 | if (mdev->ldev->md.uuid[idx]) { | 3815 | if (mdev->ldev->md.uuid[idx]) { |
3614 | drbd_uuid_move_history(mdev); | 3816 | drbd_uuid_move_history(mdev); |
3615 | mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx]; | 3817 | mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx]; |
3616 | debug_drbd_uuid(mdev, UI_HISTORY_START); | ||
3617 | } | 3818 | } |
3618 | _drbd_uuid_set(mdev, idx, val); | 3819 | _drbd_uuid_set(mdev, idx, val); |
3619 | } | 3820 | } |
@@ -3628,14 +3829,16 @@ void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) | |||
3628 | void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local) | 3829 | void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local) |
3629 | { | 3830 | { |
3630 | u64 val; | 3831 | u64 val; |
3832 | unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP]; | ||
3833 | |||
3834 | if (bm_uuid) | ||
3835 | dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid); | ||
3631 | 3836 | ||
3632 | dev_info(DEV, "Creating new current UUID\n"); | ||
3633 | D_ASSERT(mdev->ldev->md.uuid[UI_BITMAP] == 0); | ||
3634 | mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT]; | 3837 | mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT]; |
3635 | debug_drbd_uuid(mdev, UI_BITMAP); | ||
3636 | 3838 | ||
3637 | get_random_bytes(&val, sizeof(u64)); | 3839 | get_random_bytes(&val, sizeof(u64)); |
3638 | _drbd_uuid_set(mdev, UI_CURRENT, val); | 3840 | _drbd_uuid_set(mdev, UI_CURRENT, val); |
3841 | drbd_print_uuids(mdev, "new current UUID"); | ||
3639 | /* get it to stable storage _now_ */ | 3842 | /* get it to stable storage _now_ */ |
3640 | drbd_md_sync(mdev); | 3843 | drbd_md_sync(mdev); |
3641 | } | 3844 | } |
@@ -3649,16 +3852,12 @@ void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local) | |||
3649 | drbd_uuid_move_history(mdev); | 3852 | drbd_uuid_move_history(mdev); |
3650 | mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP]; | 3853 | mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP]; |
3651 | mdev->ldev->md.uuid[UI_BITMAP] = 0; | 3854 | mdev->ldev->md.uuid[UI_BITMAP] = 0; |
3652 | debug_drbd_uuid(mdev, UI_HISTORY_START); | ||
3653 | debug_drbd_uuid(mdev, UI_BITMAP); | ||
3654 | } else { | 3855 | } else { |
3655 | if (mdev->ldev->md.uuid[UI_BITMAP]) | 3856 | unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP]; |
3656 | dev_warn(DEV, "bm UUID already set"); | 3857 | if (bm_uuid) |
3657 | 3858 | dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid); | |
3658 | mdev->ldev->md.uuid[UI_BITMAP] = val; | ||
3659 | mdev->ldev->md.uuid[UI_BITMAP] &= ~((u64)1); | ||
3660 | 3859 | ||
3661 | debug_drbd_uuid(mdev, UI_BITMAP); | 3860 | mdev->ldev->md.uuid[UI_BITMAP] = val & ~((u64)1); |
3662 | } | 3861 | } |
3663 | drbd_md_mark_dirty(mdev); | 3862 | drbd_md_mark_dirty(mdev); |
3664 | } | 3863 | } |
@@ -3714,15 +3913,19 @@ int drbd_bmio_clear_n_write(struct drbd_conf *mdev) | |||
3714 | static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused) | 3913 | static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused) |
3715 | { | 3914 | { |
3716 | struct bm_io_work *work = container_of(w, struct bm_io_work, w); | 3915 | struct bm_io_work *work = container_of(w, struct bm_io_work, w); |
3717 | int rv; | 3916 | int rv = -EIO; |
3718 | 3917 | ||
3719 | D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0); | 3918 | D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0); |
3720 | 3919 | ||
3721 | drbd_bm_lock(mdev, work->why); | 3920 | if (get_ldev(mdev)) { |
3722 | rv = work->io_fn(mdev); | 3921 | drbd_bm_lock(mdev, work->why, work->flags); |
3723 | drbd_bm_unlock(mdev); | 3922 | rv = work->io_fn(mdev); |
3923 | drbd_bm_unlock(mdev); | ||
3924 | put_ldev(mdev); | ||
3925 | } | ||
3724 | 3926 | ||
3725 | clear_bit(BITMAP_IO, &mdev->flags); | 3927 | clear_bit(BITMAP_IO, &mdev->flags); |
3928 | smp_mb__after_clear_bit(); | ||
3726 | wake_up(&mdev->misc_wait); | 3929 | wake_up(&mdev->misc_wait); |
3727 | 3930 | ||
3728 | if (work->done) | 3931 | if (work->done) |
@@ -3730,6 +3933,7 @@ static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused) | |||
3730 | 3933 | ||
3731 | clear_bit(BITMAP_IO_QUEUED, &mdev->flags); | 3934 | clear_bit(BITMAP_IO_QUEUED, &mdev->flags); |
3732 | work->why = NULL; | 3935 | work->why = NULL; |
3936 | work->flags = 0; | ||
3733 | 3937 | ||
3734 | return 1; | 3938 | return 1; |
3735 | } | 3939 | } |
@@ -3784,7 +3988,7 @@ void drbd_go_diskless(struct drbd_conf *mdev) | |||
3784 | void drbd_queue_bitmap_io(struct drbd_conf *mdev, | 3988 | void drbd_queue_bitmap_io(struct drbd_conf *mdev, |
3785 | int (*io_fn)(struct drbd_conf *), | 3989 | int (*io_fn)(struct drbd_conf *), |
3786 | void (*done)(struct drbd_conf *, int), | 3990 | void (*done)(struct drbd_conf *, int), |
3787 | char *why) | 3991 | char *why, enum bm_flag flags) |
3788 | { | 3992 | { |
3789 | D_ASSERT(current == mdev->worker.task); | 3993 | D_ASSERT(current == mdev->worker.task); |
3790 | 3994 | ||
@@ -3798,15 +4002,15 @@ void drbd_queue_bitmap_io(struct drbd_conf *mdev, | |||
3798 | mdev->bm_io_work.io_fn = io_fn; | 4002 | mdev->bm_io_work.io_fn = io_fn; |
3799 | mdev->bm_io_work.done = done; | 4003 | mdev->bm_io_work.done = done; |
3800 | mdev->bm_io_work.why = why; | 4004 | mdev->bm_io_work.why = why; |
4005 | mdev->bm_io_work.flags = flags; | ||
3801 | 4006 | ||
4007 | spin_lock_irq(&mdev->req_lock); | ||
3802 | set_bit(BITMAP_IO, &mdev->flags); | 4008 | set_bit(BITMAP_IO, &mdev->flags); |
3803 | if (atomic_read(&mdev->ap_bio_cnt) == 0) { | 4009 | if (atomic_read(&mdev->ap_bio_cnt) == 0) { |
3804 | if (list_empty(&mdev->bm_io_work.w.list)) { | 4010 | if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags)) |
3805 | set_bit(BITMAP_IO_QUEUED, &mdev->flags); | ||
3806 | drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w); | 4011 | drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w); |
3807 | } else | ||
3808 | dev_err(DEV, "FIXME avoided double queuing bm_io_work\n"); | ||
3809 | } | 4012 | } |
4013 | spin_unlock_irq(&mdev->req_lock); | ||
3810 | } | 4014 | } |
3811 | 4015 | ||
3812 | /** | 4016 | /** |
@@ -3818,19 +4022,22 @@ void drbd_queue_bitmap_io(struct drbd_conf *mdev, | |||
3818 | * freezes application IO while that the actual IO operations runs. This | 4022 | * freezes application IO while that the actual IO operations runs. This |
3819 | * functions MAY NOT be called from worker context. | 4023 | * functions MAY NOT be called from worker context. |
3820 | */ | 4024 | */ |
3821 | int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why) | 4025 | int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), |
4026 | char *why, enum bm_flag flags) | ||
3822 | { | 4027 | { |
3823 | int rv; | 4028 | int rv; |
3824 | 4029 | ||
3825 | D_ASSERT(current != mdev->worker.task); | 4030 | D_ASSERT(current != mdev->worker.task); |
3826 | 4031 | ||
3827 | drbd_suspend_io(mdev); | 4032 | if ((flags & BM_LOCKED_SET_ALLOWED) == 0) |
4033 | drbd_suspend_io(mdev); | ||
3828 | 4034 | ||
3829 | drbd_bm_lock(mdev, why); | 4035 | drbd_bm_lock(mdev, why, flags); |
3830 | rv = io_fn(mdev); | 4036 | rv = io_fn(mdev); |
3831 | drbd_bm_unlock(mdev); | 4037 | drbd_bm_unlock(mdev); |
3832 | 4038 | ||
3833 | drbd_resume_io(mdev); | 4039 | if ((flags & BM_LOCKED_SET_ALLOWED) == 0) |
4040 | drbd_resume_io(mdev); | ||
3834 | 4041 | ||
3835 | return rv; | 4042 | return rv; |
3836 | } | 4043 | } |
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index fe81c851ca88..03b29f78a37d 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c | |||
@@ -288,10 +288,11 @@ void drbd_try_outdate_peer_async(struct drbd_conf *mdev) | |||
288 | dev_err(DEV, "out of mem, failed to invoke fence-peer helper\n"); | 288 | dev_err(DEV, "out of mem, failed to invoke fence-peer helper\n"); |
289 | } | 289 | } |
290 | 290 | ||
291 | int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) | 291 | enum drbd_state_rv |
292 | drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) | ||
292 | { | 293 | { |
293 | const int max_tries = 4; | 294 | const int max_tries = 4; |
294 | int r = 0; | 295 | enum drbd_state_rv rv = SS_UNKNOWN_ERROR; |
295 | int try = 0; | 296 | int try = 0; |
296 | int forced = 0; | 297 | int forced = 0; |
297 | union drbd_state mask, val; | 298 | union drbd_state mask, val; |
@@ -306,17 +307,17 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) | |||
306 | val.i = 0; val.role = new_role; | 307 | val.i = 0; val.role = new_role; |
307 | 308 | ||
308 | while (try++ < max_tries) { | 309 | while (try++ < max_tries) { |
309 | r = _drbd_request_state(mdev, mask, val, CS_WAIT_COMPLETE); | 310 | rv = _drbd_request_state(mdev, mask, val, CS_WAIT_COMPLETE); |
310 | 311 | ||
311 | /* in case we first succeeded to outdate, | 312 | /* in case we first succeeded to outdate, |
312 | * but now suddenly could establish a connection */ | 313 | * but now suddenly could establish a connection */ |
313 | if (r == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) { | 314 | if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) { |
314 | val.pdsk = 0; | 315 | val.pdsk = 0; |
315 | mask.pdsk = 0; | 316 | mask.pdsk = 0; |
316 | continue; | 317 | continue; |
317 | } | 318 | } |
318 | 319 | ||
319 | if (r == SS_NO_UP_TO_DATE_DISK && force && | 320 | if (rv == SS_NO_UP_TO_DATE_DISK && force && |
320 | (mdev->state.disk < D_UP_TO_DATE && | 321 | (mdev->state.disk < D_UP_TO_DATE && |
321 | mdev->state.disk >= D_INCONSISTENT)) { | 322 | mdev->state.disk >= D_INCONSISTENT)) { |
322 | mask.disk = D_MASK; | 323 | mask.disk = D_MASK; |
@@ -325,7 +326,7 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) | |||
325 | continue; | 326 | continue; |
326 | } | 327 | } |
327 | 328 | ||
328 | if (r == SS_NO_UP_TO_DATE_DISK && | 329 | if (rv == SS_NO_UP_TO_DATE_DISK && |
329 | mdev->state.disk == D_CONSISTENT && mask.pdsk == 0) { | 330 | mdev->state.disk == D_CONSISTENT && mask.pdsk == 0) { |
330 | D_ASSERT(mdev->state.pdsk == D_UNKNOWN); | 331 | D_ASSERT(mdev->state.pdsk == D_UNKNOWN); |
331 | nps = drbd_try_outdate_peer(mdev); | 332 | nps = drbd_try_outdate_peer(mdev); |
@@ -341,9 +342,9 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) | |||
341 | continue; | 342 | continue; |
342 | } | 343 | } |
343 | 344 | ||
344 | if (r == SS_NOTHING_TO_DO) | 345 | if (rv == SS_NOTHING_TO_DO) |
345 | goto fail; | 346 | goto fail; |
346 | if (r == SS_PRIMARY_NOP && mask.pdsk == 0) { | 347 | if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) { |
347 | nps = drbd_try_outdate_peer(mdev); | 348 | nps = drbd_try_outdate_peer(mdev); |
348 | 349 | ||
349 | if (force && nps > D_OUTDATED) { | 350 | if (force && nps > D_OUTDATED) { |
@@ -356,25 +357,24 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) | |||
356 | 357 | ||
357 | continue; | 358 | continue; |
358 | } | 359 | } |
359 | if (r == SS_TWO_PRIMARIES) { | 360 | if (rv == SS_TWO_PRIMARIES) { |
360 | /* Maybe the peer is detected as dead very soon... | 361 | /* Maybe the peer is detected as dead very soon... |
361 | retry at most once more in this case. */ | 362 | retry at most once more in this case. */ |
362 | __set_current_state(TASK_INTERRUPTIBLE); | 363 | schedule_timeout_interruptible((mdev->net_conf->ping_timeo+1)*HZ/10); |
363 | schedule_timeout((mdev->net_conf->ping_timeo+1)*HZ/10); | ||
364 | if (try < max_tries) | 364 | if (try < max_tries) |
365 | try = max_tries - 1; | 365 | try = max_tries - 1; |
366 | continue; | 366 | continue; |
367 | } | 367 | } |
368 | if (r < SS_SUCCESS) { | 368 | if (rv < SS_SUCCESS) { |
369 | r = _drbd_request_state(mdev, mask, val, | 369 | rv = _drbd_request_state(mdev, mask, val, |
370 | CS_VERBOSE + CS_WAIT_COMPLETE); | 370 | CS_VERBOSE + CS_WAIT_COMPLETE); |
371 | if (r < SS_SUCCESS) | 371 | if (rv < SS_SUCCESS) |
372 | goto fail; | 372 | goto fail; |
373 | } | 373 | } |
374 | break; | 374 | break; |
375 | } | 375 | } |
376 | 376 | ||
377 | if (r < SS_SUCCESS) | 377 | if (rv < SS_SUCCESS) |
378 | goto fail; | 378 | goto fail; |
379 | 379 | ||
380 | if (forced) | 380 | if (forced) |
@@ -384,7 +384,7 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) | |||
384 | wait_event(mdev->misc_wait, atomic_read(&mdev->ap_pending_cnt) == 0); | 384 | wait_event(mdev->misc_wait, atomic_read(&mdev->ap_pending_cnt) == 0); |
385 | 385 | ||
386 | if (new_role == R_SECONDARY) { | 386 | if (new_role == R_SECONDARY) { |
387 | set_disk_ro(mdev->vdisk, TRUE); | 387 | set_disk_ro(mdev->vdisk, true); |
388 | if (get_ldev(mdev)) { | 388 | if (get_ldev(mdev)) { |
389 | mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1; | 389 | mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1; |
390 | put_ldev(mdev); | 390 | put_ldev(mdev); |
@@ -394,7 +394,7 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) | |||
394 | mdev->net_conf->want_lose = 0; | 394 | mdev->net_conf->want_lose = 0; |
395 | put_net_conf(mdev); | 395 | put_net_conf(mdev); |
396 | } | 396 | } |
397 | set_disk_ro(mdev->vdisk, FALSE); | 397 | set_disk_ro(mdev->vdisk, false); |
398 | if (get_ldev(mdev)) { | 398 | if (get_ldev(mdev)) { |
399 | if (((mdev->state.conn < C_CONNECTED || | 399 | if (((mdev->state.conn < C_CONNECTED || |
400 | mdev->state.pdsk <= D_FAILED) | 400 | mdev->state.pdsk <= D_FAILED) |
@@ -406,10 +406,8 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) | |||
406 | } | 406 | } |
407 | } | 407 | } |
408 | 408 | ||
409 | if ((new_role == R_SECONDARY) && get_ldev(mdev)) { | 409 | /* writeout of activity log covered areas of the bitmap |
410 | drbd_al_to_on_disk_bm(mdev); | 410 | * to stable storage done in after state change already */ |
411 | put_ldev(mdev); | ||
412 | } | ||
413 | 411 | ||
414 | if (mdev->state.conn >= C_WF_REPORT_PARAMS) { | 412 | if (mdev->state.conn >= C_WF_REPORT_PARAMS) { |
415 | /* if this was forced, we should consider sync */ | 413 | /* if this was forced, we should consider sync */ |
@@ -423,7 +421,7 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) | |||
423 | kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); | 421 | kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); |
424 | fail: | 422 | fail: |
425 | mutex_unlock(&mdev->state_mutex); | 423 | mutex_unlock(&mdev->state_mutex); |
426 | return r; | 424 | return rv; |
427 | } | 425 | } |
428 | 426 | ||
429 | static struct drbd_conf *ensure_mdev(int minor, int create) | 427 | static struct drbd_conf *ensure_mdev(int minor, int create) |
@@ -528,17 +526,19 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev, | |||
528 | } | 526 | } |
529 | } | 527 | } |
530 | 528 | ||
529 | /* input size is expected to be in KB */ | ||
531 | char *ppsize(char *buf, unsigned long long size) | 530 | char *ppsize(char *buf, unsigned long long size) |
532 | { | 531 | { |
533 | /* Needs 9 bytes at max. */ | 532 | /* Needs 9 bytes at max including trailing NUL: |
533 | * -1ULL ==> "16384 EB" */ | ||
534 | static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' }; | 534 | static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' }; |
535 | int base = 0; | 535 | int base = 0; |
536 | while (size >= 10000) { | 536 | while (size >= 10000 && base < sizeof(units)-1) { |
537 | /* shift + round */ | 537 | /* shift + round */ |
538 | size = (size >> 10) + !!(size & (1<<9)); | 538 | size = (size >> 10) + !!(size & (1<<9)); |
539 | base++; | 539 | base++; |
540 | } | 540 | } |
541 | sprintf(buf, "%lu %cB", (long)size, units[base]); | 541 | sprintf(buf, "%u %cB", (unsigned)size, units[base]); |
542 | 542 | ||
543 | return buf; | 543 | return buf; |
544 | } | 544 | } |
@@ -642,11 +642,19 @@ enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev, enum dds_ | |||
642 | || prev_size != mdev->ldev->md.md_size_sect; | 642 | || prev_size != mdev->ldev->md.md_size_sect; |
643 | 643 | ||
644 | if (la_size_changed || md_moved) { | 644 | if (la_size_changed || md_moved) { |
645 | int err; | ||
646 | |||
645 | drbd_al_shrink(mdev); /* All extents inactive. */ | 647 | drbd_al_shrink(mdev); /* All extents inactive. */ |
646 | dev_info(DEV, "Writing the whole bitmap, %s\n", | 648 | dev_info(DEV, "Writing the whole bitmap, %s\n", |
647 | la_size_changed && md_moved ? "size changed and md moved" : | 649 | la_size_changed && md_moved ? "size changed and md moved" : |
648 | la_size_changed ? "size changed" : "md moved"); | 650 | la_size_changed ? "size changed" : "md moved"); |
649 | rv = drbd_bitmap_io(mdev, &drbd_bm_write, "size changed"); /* does drbd_resume_io() ! */ | 651 | /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */ |
652 | err = drbd_bitmap_io(mdev, &drbd_bm_write, | ||
653 | "size changed", BM_LOCKED_MASK); | ||
654 | if (err) { | ||
655 | rv = dev_size_error; | ||
656 | goto out; | ||
657 | } | ||
650 | drbd_md_mark_dirty(mdev); | 658 | drbd_md_mark_dirty(mdev); |
651 | } | 659 | } |
652 | 660 | ||
@@ -765,22 +773,21 @@ static int drbd_check_al_size(struct drbd_conf *mdev) | |||
765 | return 0; | 773 | return 0; |
766 | } | 774 | } |
767 | 775 | ||
768 | void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __must_hold(local) | 776 | void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size) __must_hold(local) |
769 | { | 777 | { |
770 | struct request_queue * const q = mdev->rq_queue; | 778 | struct request_queue * const q = mdev->rq_queue; |
771 | struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue; | 779 | struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue; |
772 | int max_segments = mdev->ldev->dc.max_bio_bvecs; | 780 | int max_segments = mdev->ldev->dc.max_bio_bvecs; |
781 | int max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9); | ||
773 | 782 | ||
774 | max_seg_s = min(queue_max_sectors(b) * queue_logical_block_size(b), max_seg_s); | ||
775 | |||
776 | blk_queue_max_hw_sectors(q, max_seg_s >> 9); | ||
777 | blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS); | ||
778 | blk_queue_max_segment_size(q, max_seg_s); | ||
779 | blk_queue_logical_block_size(q, 512); | 783 | blk_queue_logical_block_size(q, 512); |
780 | blk_queue_segment_boundary(q, PAGE_SIZE-1); | 784 | blk_queue_max_hw_sectors(q, max_hw_sectors); |
781 | blk_stack_limits(&q->limits, &b->limits, 0); | 785 | /* This is the workaround for "bio would need to, but cannot, be split" */ |
786 | blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS); | ||
787 | blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1); | ||
788 | blk_queue_stack_limits(q, b); | ||
782 | 789 | ||
783 | dev_info(DEV, "max_segment_size ( = BIO size ) = %u\n", queue_max_segment_size(q)); | 790 | dev_info(DEV, "max BIO size = %u\n", queue_max_hw_sectors(q) << 9); |
784 | 791 | ||
785 | if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) { | 792 | if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) { |
786 | dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n", | 793 | dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n", |
@@ -850,7 +857,7 @@ static void drbd_suspend_al(struct drbd_conf *mdev) | |||
850 | static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, | 857 | static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, |
851 | struct drbd_nl_cfg_reply *reply) | 858 | struct drbd_nl_cfg_reply *reply) |
852 | { | 859 | { |
853 | enum drbd_ret_codes retcode; | 860 | enum drbd_ret_code retcode; |
854 | enum determine_dev_size dd; | 861 | enum determine_dev_size dd; |
855 | sector_t max_possible_sectors; | 862 | sector_t max_possible_sectors; |
856 | sector_t min_md_device_sectors; | 863 | sector_t min_md_device_sectors; |
@@ -858,8 +865,8 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp | |||
858 | struct block_device *bdev; | 865 | struct block_device *bdev; |
859 | struct lru_cache *resync_lru = NULL; | 866 | struct lru_cache *resync_lru = NULL; |
860 | union drbd_state ns, os; | 867 | union drbd_state ns, os; |
861 | unsigned int max_seg_s; | 868 | unsigned int max_bio_size; |
862 | int rv; | 869 | enum drbd_state_rv rv; |
863 | int cp_discovered = 0; | 870 | int cp_discovered = 0; |
864 | int logical_block_size; | 871 | int logical_block_size; |
865 | 872 | ||
@@ -1005,9 +1012,10 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp | |||
1005 | /* and for any other previously queued work */ | 1012 | /* and for any other previously queued work */ |
1006 | drbd_flush_workqueue(mdev); | 1013 | drbd_flush_workqueue(mdev); |
1007 | 1014 | ||
1008 | retcode = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE); | 1015 | rv = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE); |
1016 | retcode = rv; /* FIXME: Type mismatch. */ | ||
1009 | drbd_resume_io(mdev); | 1017 | drbd_resume_io(mdev); |
1010 | if (retcode < SS_SUCCESS) | 1018 | if (rv < SS_SUCCESS) |
1011 | goto fail; | 1019 | goto fail; |
1012 | 1020 | ||
1013 | if (!get_ldev_if_state(mdev, D_ATTACHING)) | 1021 | if (!get_ldev_if_state(mdev, D_ATTACHING)) |
@@ -1109,20 +1117,20 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp | |||
1109 | mdev->read_cnt = 0; | 1117 | mdev->read_cnt = 0; |
1110 | mdev->writ_cnt = 0; | 1118 | mdev->writ_cnt = 0; |
1111 | 1119 | ||
1112 | max_seg_s = DRBD_MAX_SEGMENT_SIZE; | 1120 | max_bio_size = DRBD_MAX_BIO_SIZE; |
1113 | if (mdev->state.conn == C_CONNECTED) { | 1121 | if (mdev->state.conn == C_CONNECTED) { |
1114 | /* We are Primary, Connected, and now attach a new local | 1122 | /* We are Primary, Connected, and now attach a new local |
1115 | * backing store. We must not increase the user visible maximum | 1123 | * backing store. We must not increase the user visible maximum |
1116 | * bio size on this device to something the peer may not be | 1124 | * bio size on this device to something the peer may not be |
1117 | * able to handle. */ | 1125 | * able to handle. */ |
1118 | if (mdev->agreed_pro_version < 94) | 1126 | if (mdev->agreed_pro_version < 94) |
1119 | max_seg_s = queue_max_segment_size(mdev->rq_queue); | 1127 | max_bio_size = queue_max_hw_sectors(mdev->rq_queue) << 9; |
1120 | else if (mdev->agreed_pro_version == 94) | 1128 | else if (mdev->agreed_pro_version == 94) |
1121 | max_seg_s = DRBD_MAX_SIZE_H80_PACKET; | 1129 | max_bio_size = DRBD_MAX_SIZE_H80_PACKET; |
1122 | /* else: drbd 8.3.9 and later, stay with default */ | 1130 | /* else: drbd 8.3.9 and later, stay with default */ |
1123 | } | 1131 | } |
1124 | 1132 | ||
1125 | drbd_setup_queue_param(mdev, max_seg_s); | 1133 | drbd_setup_queue_param(mdev, max_bio_size); |
1126 | 1134 | ||
1127 | /* If I am currently not R_PRIMARY, | 1135 | /* If I am currently not R_PRIMARY, |
1128 | * but meta data primary indicator is set, | 1136 | * but meta data primary indicator is set, |
@@ -1154,12 +1162,14 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp | |||
1154 | if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) { | 1162 | if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) { |
1155 | dev_info(DEV, "Assuming that all blocks are out of sync " | 1163 | dev_info(DEV, "Assuming that all blocks are out of sync " |
1156 | "(aka FullSync)\n"); | 1164 | "(aka FullSync)\n"); |
1157 | if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from attaching")) { | 1165 | if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, |
1166 | "set_n_write from attaching", BM_LOCKED_MASK)) { | ||
1158 | retcode = ERR_IO_MD_DISK; | 1167 | retcode = ERR_IO_MD_DISK; |
1159 | goto force_diskless_dec; | 1168 | goto force_diskless_dec; |
1160 | } | 1169 | } |
1161 | } else { | 1170 | } else { |
1162 | if (drbd_bitmap_io(mdev, &drbd_bm_read, "read from attaching") < 0) { | 1171 | if (drbd_bitmap_io(mdev, &drbd_bm_read, |
1172 | "read from attaching", BM_LOCKED_MASK) < 0) { | ||
1163 | retcode = ERR_IO_MD_DISK; | 1173 | retcode = ERR_IO_MD_DISK; |
1164 | goto force_diskless_dec; | 1174 | goto force_diskless_dec; |
1165 | } | 1175 | } |
@@ -1167,7 +1177,11 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp | |||
1167 | 1177 | ||
1168 | if (cp_discovered) { | 1178 | if (cp_discovered) { |
1169 | drbd_al_apply_to_bm(mdev); | 1179 | drbd_al_apply_to_bm(mdev); |
1170 | drbd_al_to_on_disk_bm(mdev); | 1180 | if (drbd_bitmap_io(mdev, &drbd_bm_write, |
1181 | "crashed primary apply AL", BM_LOCKED_MASK)) { | ||
1182 | retcode = ERR_IO_MD_DISK; | ||
1183 | goto force_diskless_dec; | ||
1184 | } | ||
1171 | } | 1185 | } |
1172 | 1186 | ||
1173 | if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev)) | 1187 | if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev)) |
@@ -1279,7 +1293,7 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, | |||
1279 | struct drbd_nl_cfg_reply *reply) | 1293 | struct drbd_nl_cfg_reply *reply) |
1280 | { | 1294 | { |
1281 | int i, ns; | 1295 | int i, ns; |
1282 | enum drbd_ret_codes retcode; | 1296 | enum drbd_ret_code retcode; |
1283 | struct net_conf *new_conf = NULL; | 1297 | struct net_conf *new_conf = NULL; |
1284 | struct crypto_hash *tfm = NULL; | 1298 | struct crypto_hash *tfm = NULL; |
1285 | struct crypto_hash *integrity_w_tfm = NULL; | 1299 | struct crypto_hash *integrity_w_tfm = NULL; |
@@ -1324,6 +1338,8 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, | |||
1324 | new_conf->wire_protocol = DRBD_PROT_C; | 1338 | new_conf->wire_protocol = DRBD_PROT_C; |
1325 | new_conf->ping_timeo = DRBD_PING_TIMEO_DEF; | 1339 | new_conf->ping_timeo = DRBD_PING_TIMEO_DEF; |
1326 | new_conf->rr_conflict = DRBD_RR_CONFLICT_DEF; | 1340 | new_conf->rr_conflict = DRBD_RR_CONFLICT_DEF; |
1341 | new_conf->on_congestion = DRBD_ON_CONGESTION_DEF; | ||
1342 | new_conf->cong_extents = DRBD_CONG_EXTENTS_DEF; | ||
1327 | 1343 | ||
1328 | if (!net_conf_from_tags(mdev, nlp->tag_list, new_conf)) { | 1344 | if (!net_conf_from_tags(mdev, nlp->tag_list, new_conf)) { |
1329 | retcode = ERR_MANDATORY_TAG; | 1345 | retcode = ERR_MANDATORY_TAG; |
@@ -1345,6 +1361,11 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, | |||
1345 | } | 1361 | } |
1346 | } | 1362 | } |
1347 | 1363 | ||
1364 | if (new_conf->on_congestion != OC_BLOCK && new_conf->wire_protocol != DRBD_PROT_A) { | ||
1365 | retcode = ERR_CONG_NOT_PROTO_A; | ||
1366 | goto fail; | ||
1367 | } | ||
1368 | |||
1348 | if (mdev->state.role == R_PRIMARY && new_conf->want_lose) { | 1369 | if (mdev->state.role == R_PRIMARY && new_conf->want_lose) { |
1349 | retcode = ERR_DISCARD; | 1370 | retcode = ERR_DISCARD; |
1350 | goto fail; | 1371 | goto fail; |
@@ -1525,6 +1546,21 @@ static int drbd_nl_disconnect(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl | |||
1525 | struct drbd_nl_cfg_reply *reply) | 1546 | struct drbd_nl_cfg_reply *reply) |
1526 | { | 1547 | { |
1527 | int retcode; | 1548 | int retcode; |
1549 | struct disconnect dc; | ||
1550 | |||
1551 | memset(&dc, 0, sizeof(struct disconnect)); | ||
1552 | if (!disconnect_from_tags(mdev, nlp->tag_list, &dc)) { | ||
1553 | retcode = ERR_MANDATORY_TAG; | ||
1554 | goto fail; | ||
1555 | } | ||
1556 | |||
1557 | if (dc.force) { | ||
1558 | spin_lock_irq(&mdev->req_lock); | ||
1559 | if (mdev->state.conn >= C_WF_CONNECTION) | ||
1560 | _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), CS_HARD, NULL); | ||
1561 | spin_unlock_irq(&mdev->req_lock); | ||
1562 | goto done; | ||
1563 | } | ||
1528 | 1564 | ||
1529 | retcode = _drbd_request_state(mdev, NS(conn, C_DISCONNECTING), CS_ORDERED); | 1565 | retcode = _drbd_request_state(mdev, NS(conn, C_DISCONNECTING), CS_ORDERED); |
1530 | 1566 | ||
@@ -1842,6 +1878,10 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl | |||
1842 | { | 1878 | { |
1843 | int retcode; | 1879 | int retcode; |
1844 | 1880 | ||
1881 | /* If there is still bitmap IO pending, probably because of a previous | ||
1882 | * resync just being finished, wait for it before requesting a new resync. */ | ||
1883 | wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); | ||
1884 | |||
1845 | retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED); | 1885 | retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED); |
1846 | 1886 | ||
1847 | if (retcode < SS_SUCCESS && retcode != SS_NEED_CONNECTION) | 1887 | if (retcode < SS_SUCCESS && retcode != SS_NEED_CONNECTION) |
@@ -1877,6 +1917,10 @@ static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re | |||
1877 | { | 1917 | { |
1878 | int retcode; | 1918 | int retcode; |
1879 | 1919 | ||
1920 | /* If there is still bitmap IO pending, probably because of a previous | ||
1921 | * resync just being finished, wait for it before requesting a new resync. */ | ||
1922 | wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); | ||
1923 | |||
1880 | retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED); | 1924 | retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED); |
1881 | 1925 | ||
1882 | if (retcode < SS_SUCCESS) { | 1926 | if (retcode < SS_SUCCESS) { |
@@ -1885,9 +1929,9 @@ static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re | |||
1885 | into a full resync. */ | 1929 | into a full resync. */ |
1886 | retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT)); | 1930 | retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT)); |
1887 | if (retcode >= SS_SUCCESS) { | 1931 | if (retcode >= SS_SUCCESS) { |
1888 | /* open coded drbd_bitmap_io() */ | ||
1889 | if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al, | 1932 | if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al, |
1890 | "set_n_write from invalidate_peer")) | 1933 | "set_n_write from invalidate_peer", |
1934 | BM_LOCKED_SET_ALLOWED)) | ||
1891 | retcode = ERR_IO_MD_DISK; | 1935 | retcode = ERR_IO_MD_DISK; |
1892 | } | 1936 | } |
1893 | } else | 1937 | } else |
@@ -1914,9 +1958,17 @@ static int drbd_nl_resume_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n | |||
1914 | struct drbd_nl_cfg_reply *reply) | 1958 | struct drbd_nl_cfg_reply *reply) |
1915 | { | 1959 | { |
1916 | int retcode = NO_ERROR; | 1960 | int retcode = NO_ERROR; |
1961 | union drbd_state s; | ||
1917 | 1962 | ||
1918 | if (drbd_request_state(mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) | 1963 | if (drbd_request_state(mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) { |
1919 | retcode = ERR_PAUSE_IS_CLEAR; | 1964 | s = mdev->state; |
1965 | if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) { | ||
1966 | retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP : | ||
1967 | s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR; | ||
1968 | } else { | ||
1969 | retcode = ERR_PAUSE_IS_CLEAR; | ||
1970 | } | ||
1971 | } | ||
1920 | 1972 | ||
1921 | reply->ret_code = retcode; | 1973 | reply->ret_code = retcode; |
1922 | return 0; | 1974 | return 0; |
@@ -2054,6 +2106,11 @@ static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, | |||
2054 | reply->ret_code = ERR_MANDATORY_TAG; | 2106 | reply->ret_code = ERR_MANDATORY_TAG; |
2055 | return 0; | 2107 | return 0; |
2056 | } | 2108 | } |
2109 | |||
2110 | /* If there is still bitmap IO pending, e.g. previous resync or verify | ||
2111 | * just being finished, wait for it before requesting a new resync. */ | ||
2112 | wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); | ||
2113 | |||
2057 | /* w_make_ov_request expects position to be aligned */ | 2114 | /* w_make_ov_request expects position to be aligned */ |
2058 | mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT; | 2115 | mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT; |
2059 | reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S)); | 2116 | reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S)); |
@@ -2097,7 +2154,8 @@ static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl | |||
2097 | drbd_uuid_new_current(mdev); /* New current, previous to UI_BITMAP */ | 2154 | drbd_uuid_new_current(mdev); /* New current, previous to UI_BITMAP */ |
2098 | 2155 | ||
2099 | if (args.clear_bm) { | 2156 | if (args.clear_bm) { |
2100 | err = drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write, "clear_n_write from new_c_uuid"); | 2157 | err = drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write, |
2158 | "clear_n_write from new_c_uuid", BM_LOCKED_MASK); | ||
2101 | if (err) { | 2159 | if (err) { |
2102 | dev_err(DEV, "Writing bitmap failed with %d\n",err); | 2160 | dev_err(DEV, "Writing bitmap failed with %d\n",err); |
2103 | retcode = ERR_IO_MD_DISK; | 2161 | retcode = ERR_IO_MD_DISK; |
@@ -2105,6 +2163,7 @@ static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl | |||
2105 | if (skip_initial_sync) { | 2163 | if (skip_initial_sync) { |
2106 | drbd_send_uuids_skip_initial_sync(mdev); | 2164 | drbd_send_uuids_skip_initial_sync(mdev); |
2107 | _drbd_uuid_set(mdev, UI_BITMAP, 0); | 2165 | _drbd_uuid_set(mdev, UI_BITMAP, 0); |
2166 | drbd_print_uuids(mdev, "cleared bitmap UUID"); | ||
2108 | spin_lock_irq(&mdev->req_lock); | 2167 | spin_lock_irq(&mdev->req_lock); |
2109 | _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), | 2168 | _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), |
2110 | CS_VERBOSE, NULL); | 2169 | CS_VERBOSE, NULL); |
@@ -2189,7 +2248,8 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms | |||
2189 | goto fail; | 2248 | goto fail; |
2190 | } | 2249 | } |
2191 | 2250 | ||
2192 | if (nlp->packet_type >= P_nl_after_last_packet) { | 2251 | if (nlp->packet_type >= P_nl_after_last_packet || |
2252 | nlp->packet_type == P_return_code_only) { | ||
2193 | retcode = ERR_PACKET_NR; | 2253 | retcode = ERR_PACKET_NR; |
2194 | goto fail; | 2254 | goto fail; |
2195 | } | 2255 | } |
@@ -2205,7 +2265,7 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms | |||
2205 | reply_size += cm->reply_body_size; | 2265 | reply_size += cm->reply_body_size; |
2206 | 2266 | ||
2207 | /* allocation not in the IO path, cqueue thread context */ | 2267 | /* allocation not in the IO path, cqueue thread context */ |
2208 | cn_reply = kmalloc(reply_size, GFP_KERNEL); | 2268 | cn_reply = kzalloc(reply_size, GFP_KERNEL); |
2209 | if (!cn_reply) { | 2269 | if (!cn_reply) { |
2210 | retcode = ERR_NOMEM; | 2270 | retcode = ERR_NOMEM; |
2211 | goto fail; | 2271 | goto fail; |
@@ -2213,7 +2273,7 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms | |||
2213 | reply = (struct drbd_nl_cfg_reply *) cn_reply->data; | 2273 | reply = (struct drbd_nl_cfg_reply *) cn_reply->data; |
2214 | 2274 | ||
2215 | reply->packet_type = | 2275 | reply->packet_type = |
2216 | cm->reply_body_size ? nlp->packet_type : P_nl_after_last_packet; | 2276 | cm->reply_body_size ? nlp->packet_type : P_return_code_only; |
2217 | reply->minor = nlp->drbd_minor; | 2277 | reply->minor = nlp->drbd_minor; |
2218 | reply->ret_code = NO_ERROR; /* Might by modified by cm->function. */ | 2278 | reply->ret_code = NO_ERROR; /* Might by modified by cm->function. */ |
2219 | /* reply->tag_list; might be modified by cm->function. */ | 2279 | /* reply->tag_list; might be modified by cm->function. */ |
@@ -2376,7 +2436,7 @@ void drbd_bcast_ee(struct drbd_conf *mdev, | |||
2376 | /* receiver thread context, which is not in the writeout path (of this node), | 2436 | /* receiver thread context, which is not in the writeout path (of this node), |
2377 | * but may be in the writeout path of the _other_ node. | 2437 | * but may be in the writeout path of the _other_ node. |
2378 | * GFP_NOIO to avoid potential "distributed deadlock". */ | 2438 | * GFP_NOIO to avoid potential "distributed deadlock". */ |
2379 | cn_reply = kmalloc( | 2439 | cn_reply = kzalloc( |
2380 | sizeof(struct cn_msg)+ | 2440 | sizeof(struct cn_msg)+ |
2381 | sizeof(struct drbd_nl_cfg_reply)+ | 2441 | sizeof(struct drbd_nl_cfg_reply)+ |
2382 | sizeof(struct dump_ee_tag_len_struct)+ | 2442 | sizeof(struct dump_ee_tag_len_struct)+ |
@@ -2398,10 +2458,11 @@ void drbd_bcast_ee(struct drbd_conf *mdev, | |||
2398 | tl = tl_add_int(tl, T_ee_sector, &e->sector); | 2458 | tl = tl_add_int(tl, T_ee_sector, &e->sector); |
2399 | tl = tl_add_int(tl, T_ee_block_id, &e->block_id); | 2459 | tl = tl_add_int(tl, T_ee_block_id, &e->block_id); |
2400 | 2460 | ||
2461 | /* dump the first 32k */ | ||
2462 | len = min_t(unsigned, e->size, 32 << 10); | ||
2401 | put_unaligned(T_ee_data, tl++); | 2463 | put_unaligned(T_ee_data, tl++); |
2402 | put_unaligned(e->size, tl++); | 2464 | put_unaligned(len, tl++); |
2403 | 2465 | ||
2404 | len = e->size; | ||
2405 | page = e->pages; | 2466 | page = e->pages; |
2406 | page_chain_for_each(page) { | 2467 | page_chain_for_each(page) { |
2407 | void *d = kmap_atomic(page, KM_USER0); | 2468 | void *d = kmap_atomic(page, KM_USER0); |
@@ -2410,6 +2471,8 @@ void drbd_bcast_ee(struct drbd_conf *mdev, | |||
2410 | kunmap_atomic(d, KM_USER0); | 2471 | kunmap_atomic(d, KM_USER0); |
2411 | tl = (unsigned short*)((char*)tl + l); | 2472 | tl = (unsigned short*)((char*)tl + l); |
2412 | len -= l; | 2473 | len -= l; |
2474 | if (len == 0) | ||
2475 | break; | ||
2413 | } | 2476 | } |
2414 | put_unaligned(TT_END, tl++); /* Close the tag list */ | 2477 | put_unaligned(TT_END, tl++); /* Close the tag list */ |
2415 | 2478 | ||
@@ -2508,6 +2571,7 @@ void drbd_nl_send_reply(struct cn_msg *req, int ret_code) | |||
2508 | (struct drbd_nl_cfg_reply *)cn_reply->data; | 2571 | (struct drbd_nl_cfg_reply *)cn_reply->data; |
2509 | int rr; | 2572 | int rr; |
2510 | 2573 | ||
2574 | memset(buffer, 0, sizeof(buffer)); | ||
2511 | cn_reply->id = req->id; | 2575 | cn_reply->id = req->id; |
2512 | 2576 | ||
2513 | cn_reply->seq = req->seq; | 2577 | cn_reply->seq = req->seq; |
@@ -2515,6 +2579,7 @@ void drbd_nl_send_reply(struct cn_msg *req, int ret_code) | |||
2515 | cn_reply->len = sizeof(struct drbd_nl_cfg_reply); | 2579 | cn_reply->len = sizeof(struct drbd_nl_cfg_reply); |
2516 | cn_reply->flags = 0; | 2580 | cn_reply->flags = 0; |
2517 | 2581 | ||
2582 | reply->packet_type = P_return_code_only; | ||
2518 | reply->minor = ((struct drbd_nl_cfg_req *)req->data)->drbd_minor; | 2583 | reply->minor = ((struct drbd_nl_cfg_req *)req->data)->drbd_minor; |
2519 | reply->ret_code = ret_code; | 2584 | reply->ret_code = ret_code; |
2520 | 2585 | ||
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 7e6ac307e2de..2959cdfb77f5 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include "drbd_int.h" | 34 | #include "drbd_int.h" |
35 | 35 | ||
36 | static int drbd_proc_open(struct inode *inode, struct file *file); | 36 | static int drbd_proc_open(struct inode *inode, struct file *file); |
37 | static int drbd_proc_release(struct inode *inode, struct file *file); | ||
37 | 38 | ||
38 | 39 | ||
39 | struct proc_dir_entry *drbd_proc; | 40 | struct proc_dir_entry *drbd_proc; |
@@ -42,9 +43,22 @@ const struct file_operations drbd_proc_fops = { | |||
42 | .open = drbd_proc_open, | 43 | .open = drbd_proc_open, |
43 | .read = seq_read, | 44 | .read = seq_read, |
44 | .llseek = seq_lseek, | 45 | .llseek = seq_lseek, |
45 | .release = single_release, | 46 | .release = drbd_proc_release, |
46 | }; | 47 | }; |
47 | 48 | ||
49 | void seq_printf_with_thousands_grouping(struct seq_file *seq, long v) | ||
50 | { | ||
51 | /* v is in kB/sec. We don't expect TiByte/sec yet. */ | ||
52 | if (unlikely(v >= 1000000)) { | ||
53 | /* cool: > GiByte/s */ | ||
54 | seq_printf(seq, "%ld,", v / 1000000); | ||
55 | v /= 1000000; | ||
56 | seq_printf(seq, "%03ld,%03ld", v/1000, v % 1000); | ||
57 | } else if (likely(v >= 1000)) | ||
58 | seq_printf(seq, "%ld,%03ld", v/1000, v % 1000); | ||
59 | else | ||
60 | seq_printf(seq, "%ld", v); | ||
61 | } | ||
48 | 62 | ||
49 | /*lge | 63 | /*lge |
50 | * progress bars shamelessly adapted from driver/md/md.c | 64 | * progress bars shamelessly adapted from driver/md/md.c |
@@ -71,10 +85,15 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq) | |||
71 | seq_printf(seq, "."); | 85 | seq_printf(seq, "."); |
72 | seq_printf(seq, "] "); | 86 | seq_printf(seq, "] "); |
73 | 87 | ||
74 | seq_printf(seq, "sync'ed:%3u.%u%% ", res / 10, res % 10); | 88 | if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T) |
75 | /* if more than 1 GB display in MB */ | 89 | seq_printf(seq, "verified:"); |
76 | if (mdev->rs_total > 0x100000L) | 90 | else |
77 | seq_printf(seq, "(%lu/%lu)M\n\t", | 91 | seq_printf(seq, "sync'ed:"); |
92 | seq_printf(seq, "%3u.%u%% ", res / 10, res % 10); | ||
93 | |||
94 | /* if more than a few GB, display in MB */ | ||
95 | if (mdev->rs_total > (4UL << (30 - BM_BLOCK_SHIFT))) | ||
96 | seq_printf(seq, "(%lu/%lu)M", | ||
78 | (unsigned long) Bit2KB(rs_left >> 10), | 97 | (unsigned long) Bit2KB(rs_left >> 10), |
79 | (unsigned long) Bit2KB(mdev->rs_total >> 10)); | 98 | (unsigned long) Bit2KB(mdev->rs_total >> 10)); |
80 | else | 99 | else |
@@ -94,6 +113,7 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq) | |||
94 | /* Rolling marks. last_mark+1 may just now be modified. last_mark+2 is | 113 | /* Rolling marks. last_mark+1 may just now be modified. last_mark+2 is |
95 | * at least (DRBD_SYNC_MARKS-2)*DRBD_SYNC_MARK_STEP old, and has at | 114 | * at least (DRBD_SYNC_MARKS-2)*DRBD_SYNC_MARK_STEP old, and has at |
96 | * least DRBD_SYNC_MARK_STEP time before it will be modified. */ | 115 | * least DRBD_SYNC_MARK_STEP time before it will be modified. */ |
116 | /* ------------------------ ~18s average ------------------------ */ | ||
97 | i = (mdev->rs_last_mark + 2) % DRBD_SYNC_MARKS; | 117 | i = (mdev->rs_last_mark + 2) % DRBD_SYNC_MARKS; |
98 | dt = (jiffies - mdev->rs_mark_time[i]) / HZ; | 118 | dt = (jiffies - mdev->rs_mark_time[i]) / HZ; |
99 | if (dt > (DRBD_SYNC_MARK_STEP * DRBD_SYNC_MARKS)) | 119 | if (dt > (DRBD_SYNC_MARK_STEP * DRBD_SYNC_MARKS)) |
@@ -107,14 +127,24 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq) | |||
107 | seq_printf(seq, "finish: %lu:%02lu:%02lu", | 127 | seq_printf(seq, "finish: %lu:%02lu:%02lu", |
108 | rt / 3600, (rt % 3600) / 60, rt % 60); | 128 | rt / 3600, (rt % 3600) / 60, rt % 60); |
109 | 129 | ||
110 | /* current speed average over (SYNC_MARKS * SYNC_MARK_STEP) jiffies */ | ||
111 | dbdt = Bit2KB(db/dt); | 130 | dbdt = Bit2KB(db/dt); |
112 | if (dbdt > 1000) | 131 | seq_printf(seq, " speed: "); |
113 | seq_printf(seq, " speed: %ld,%03ld", | 132 | seq_printf_with_thousands_grouping(seq, dbdt); |
114 | dbdt/1000, dbdt % 1000); | 133 | seq_printf(seq, " ("); |
115 | else | 134 | /* ------------------------- ~3s average ------------------------ */ |
116 | seq_printf(seq, " speed: %ld", dbdt); | 135 | if (proc_details >= 1) { |
136 | /* this is what drbd_rs_should_slow_down() uses */ | ||
137 | i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS; | ||
138 | dt = (jiffies - mdev->rs_mark_time[i]) / HZ; | ||
139 | if (!dt) | ||
140 | dt++; | ||
141 | db = mdev->rs_mark_left[i] - rs_left; | ||
142 | dbdt = Bit2KB(db/dt); | ||
143 | seq_printf_with_thousands_grouping(seq, dbdt); | ||
144 | seq_printf(seq, " -- "); | ||
145 | } | ||
117 | 146 | ||
147 | /* --------------------- long term average ---------------------- */ | ||
118 | /* mean speed since syncer started | 148 | /* mean speed since syncer started |
119 | * we do account for PausedSync periods */ | 149 | * we do account for PausedSync periods */ |
120 | dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ; | 150 | dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ; |
@@ -122,20 +152,34 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq) | |||
122 | dt = 1; | 152 | dt = 1; |
123 | db = mdev->rs_total - rs_left; | 153 | db = mdev->rs_total - rs_left; |
124 | dbdt = Bit2KB(db/dt); | 154 | dbdt = Bit2KB(db/dt); |
125 | if (dbdt > 1000) | 155 | seq_printf_with_thousands_grouping(seq, dbdt); |
126 | seq_printf(seq, " (%ld,%03ld)", | 156 | seq_printf(seq, ")"); |
127 | dbdt/1000, dbdt % 1000); | ||
128 | else | ||
129 | seq_printf(seq, " (%ld)", dbdt); | ||
130 | 157 | ||
131 | if (mdev->state.conn == C_SYNC_TARGET) { | 158 | if (mdev->state.conn == C_SYNC_TARGET || |
132 | if (mdev->c_sync_rate > 1000) | 159 | mdev->state.conn == C_VERIFY_S) { |
133 | seq_printf(seq, " want: %d,%03d", | 160 | seq_printf(seq, " want: "); |
134 | mdev->c_sync_rate / 1000, mdev->c_sync_rate % 1000); | 161 | seq_printf_with_thousands_grouping(seq, mdev->c_sync_rate); |
135 | else | ||
136 | seq_printf(seq, " want: %d", mdev->c_sync_rate); | ||
137 | } | 162 | } |
138 | seq_printf(seq, " K/sec%s\n", stalled ? " (stalled)" : ""); | 163 | seq_printf(seq, " K/sec%s\n", stalled ? " (stalled)" : ""); |
164 | |||
165 | if (proc_details >= 1) { | ||
166 | /* 64 bit: | ||
167 | * we convert to sectors in the display below. */ | ||
168 | unsigned long bm_bits = drbd_bm_bits(mdev); | ||
169 | unsigned long bit_pos; | ||
170 | if (mdev->state.conn == C_VERIFY_S || | ||
171 | mdev->state.conn == C_VERIFY_T) | ||
172 | bit_pos = bm_bits - mdev->ov_left; | ||
173 | else | ||
174 | bit_pos = mdev->bm_resync_fo; | ||
175 | /* Total sectors may be slightly off for oddly | ||
176 | * sized devices. So what. */ | ||
177 | seq_printf(seq, | ||
178 | "\t%3d%% sector pos: %llu/%llu\n", | ||
179 | (int)(bit_pos / (bm_bits/100+1)), | ||
180 | (unsigned long long)bit_pos * BM_SECT_PER_BIT, | ||
181 | (unsigned long long)bm_bits * BM_SECT_PER_BIT); | ||
182 | } | ||
139 | } | 183 | } |
140 | 184 | ||
141 | static void resync_dump_detail(struct seq_file *seq, struct lc_element *e) | 185 | static void resync_dump_detail(struct seq_file *seq, struct lc_element *e) |
@@ -232,20 +276,16 @@ static int drbd_seq_show(struct seq_file *seq, void *v) | |||
232 | mdev->epochs, | 276 | mdev->epochs, |
233 | write_ordering_chars[mdev->write_ordering] | 277 | write_ordering_chars[mdev->write_ordering] |
234 | ); | 278 | ); |
235 | seq_printf(seq, " oos:%lu\n", | 279 | seq_printf(seq, " oos:%llu\n", |
236 | Bit2KB(drbd_bm_total_weight(mdev))); | 280 | Bit2KB((unsigned long long) |
281 | drbd_bm_total_weight(mdev))); | ||
237 | } | 282 | } |
238 | if (mdev->state.conn == C_SYNC_SOURCE || | 283 | if (mdev->state.conn == C_SYNC_SOURCE || |
239 | mdev->state.conn == C_SYNC_TARGET) | 284 | mdev->state.conn == C_SYNC_TARGET || |
285 | mdev->state.conn == C_VERIFY_S || | ||
286 | mdev->state.conn == C_VERIFY_T) | ||
240 | drbd_syncer_progress(mdev, seq); | 287 | drbd_syncer_progress(mdev, seq); |
241 | 288 | ||
242 | if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T) | ||
243 | seq_printf(seq, "\t%3d%% %lu/%lu\n", | ||
244 | (int)((mdev->rs_total-mdev->ov_left) / | ||
245 | (mdev->rs_total/100+1)), | ||
246 | mdev->rs_total - mdev->ov_left, | ||
247 | mdev->rs_total); | ||
248 | |||
249 | if (proc_details >= 1 && get_ldev_if_state(mdev, D_FAILED)) { | 289 | if (proc_details >= 1 && get_ldev_if_state(mdev, D_FAILED)) { |
250 | lc_seq_printf_stats(seq, mdev->resync); | 290 | lc_seq_printf_stats(seq, mdev->resync); |
251 | lc_seq_printf_stats(seq, mdev->act_log); | 291 | lc_seq_printf_stats(seq, mdev->act_log); |
@@ -265,7 +305,15 @@ static int drbd_seq_show(struct seq_file *seq, void *v) | |||
265 | 305 | ||
266 | static int drbd_proc_open(struct inode *inode, struct file *file) | 306 | static int drbd_proc_open(struct inode *inode, struct file *file) |
267 | { | 307 | { |
268 | return single_open(file, drbd_seq_show, PDE(inode)->data); | 308 | if (try_module_get(THIS_MODULE)) |
309 | return single_open(file, drbd_seq_show, PDE(inode)->data); | ||
310 | return -ENODEV; | ||
311 | } | ||
312 | |||
313 | static int drbd_proc_release(struct inode *inode, struct file *file) | ||
314 | { | ||
315 | module_put(THIS_MODULE); | ||
316 | return single_release(inode, file); | ||
269 | } | 317 | } |
270 | 318 | ||
271 | /* PROC FS stuff end */ | 319 | /* PROC FS stuff end */ |
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 8e68be939deb..fe1564c7d8b6 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c | |||
@@ -277,7 +277,7 @@ static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net) | |||
277 | atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use; | 277 | atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use; |
278 | int i; | 278 | int i; |
279 | 279 | ||
280 | if (drbd_pp_vacant > (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE)*minor_count) | 280 | if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE)*minor_count) |
281 | i = page_chain_free(page); | 281 | i = page_chain_free(page); |
282 | else { | 282 | else { |
283 | struct page *tmp; | 283 | struct page *tmp; |
@@ -319,7 +319,7 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, | |||
319 | struct page *page; | 319 | struct page *page; |
320 | unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT; | 320 | unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT; |
321 | 321 | ||
322 | if (FAULT_ACTIVE(mdev, DRBD_FAULT_AL_EE)) | 322 | if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE)) |
323 | return NULL; | 323 | return NULL; |
324 | 324 | ||
325 | e = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM); | 325 | e = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM); |
@@ -725,16 +725,16 @@ static int drbd_socket_okay(struct drbd_conf *mdev, struct socket **sock) | |||
725 | char tb[4]; | 725 | char tb[4]; |
726 | 726 | ||
727 | if (!*sock) | 727 | if (!*sock) |
728 | return FALSE; | 728 | return false; |
729 | 729 | ||
730 | rr = drbd_recv_short(mdev, *sock, tb, 4, MSG_DONTWAIT | MSG_PEEK); | 730 | rr = drbd_recv_short(mdev, *sock, tb, 4, MSG_DONTWAIT | MSG_PEEK); |
731 | 731 | ||
732 | if (rr > 0 || rr == -EAGAIN) { | 732 | if (rr > 0 || rr == -EAGAIN) { |
733 | return TRUE; | 733 | return true; |
734 | } else { | 734 | } else { |
735 | sock_release(*sock); | 735 | sock_release(*sock); |
736 | *sock = NULL; | 736 | *sock = NULL; |
737 | return FALSE; | 737 | return false; |
738 | } | 738 | } |
739 | } | 739 | } |
740 | 740 | ||
@@ -768,8 +768,7 @@ static int drbd_connect(struct drbd_conf *mdev) | |||
768 | if (s || ++try >= 3) | 768 | if (s || ++try >= 3) |
769 | break; | 769 | break; |
770 | /* give the other side time to call bind() & listen() */ | 770 | /* give the other side time to call bind() & listen() */ |
771 | __set_current_state(TASK_INTERRUPTIBLE); | 771 | schedule_timeout_interruptible(HZ / 10); |
772 | schedule_timeout(HZ / 10); | ||
773 | } | 772 | } |
774 | 773 | ||
775 | if (s) { | 774 | if (s) { |
@@ -788,8 +787,7 @@ static int drbd_connect(struct drbd_conf *mdev) | |||
788 | } | 787 | } |
789 | 788 | ||
790 | if (sock && msock) { | 789 | if (sock && msock) { |
791 | __set_current_state(TASK_INTERRUPTIBLE); | 790 | schedule_timeout_interruptible(HZ / 10); |
792 | schedule_timeout(HZ / 10); | ||
793 | ok = drbd_socket_okay(mdev, &sock); | 791 | ok = drbd_socket_okay(mdev, &sock); |
794 | ok = drbd_socket_okay(mdev, &msock) && ok; | 792 | ok = drbd_socket_okay(mdev, &msock) && ok; |
795 | if (ok) | 793 | if (ok) |
@@ -906,7 +904,7 @@ retry: | |||
906 | put_ldev(mdev); | 904 | put_ldev(mdev); |
907 | } | 905 | } |
908 | 906 | ||
909 | if (!drbd_send_protocol(mdev)) | 907 | if (drbd_send_protocol(mdev) == -1) |
910 | return -1; | 908 | return -1; |
911 | drbd_send_sync_param(mdev, &mdev->sync_conf); | 909 | drbd_send_sync_param(mdev, &mdev->sync_conf); |
912 | drbd_send_sizes(mdev, 0, 0); | 910 | drbd_send_sizes(mdev, 0, 0); |
@@ -914,6 +912,7 @@ retry: | |||
914 | drbd_send_state(mdev); | 912 | drbd_send_state(mdev); |
915 | clear_bit(USE_DEGR_WFC_T, &mdev->flags); | 913 | clear_bit(USE_DEGR_WFC_T, &mdev->flags); |
916 | clear_bit(RESIZE_PENDING, &mdev->flags); | 914 | clear_bit(RESIZE_PENDING, &mdev->flags); |
915 | mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */ | ||
917 | 916 | ||
918 | return 1; | 917 | return 1; |
919 | 918 | ||
@@ -932,8 +931,9 @@ static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsi | |||
932 | 931 | ||
933 | r = drbd_recv(mdev, h, sizeof(*h)); | 932 | r = drbd_recv(mdev, h, sizeof(*h)); |
934 | if (unlikely(r != sizeof(*h))) { | 933 | if (unlikely(r != sizeof(*h))) { |
935 | dev_err(DEV, "short read expecting header on sock: r=%d\n", r); | 934 | if (!signal_pending(current)) |
936 | return FALSE; | 935 | dev_warn(DEV, "short read expecting header on sock: r=%d\n", r); |
936 | return false; | ||
937 | } | 937 | } |
938 | 938 | ||
939 | if (likely(h->h80.magic == BE_DRBD_MAGIC)) { | 939 | if (likely(h->h80.magic == BE_DRBD_MAGIC)) { |
@@ -947,11 +947,11 @@ static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsi | |||
947 | be32_to_cpu(h->h80.magic), | 947 | be32_to_cpu(h->h80.magic), |
948 | be16_to_cpu(h->h80.command), | 948 | be16_to_cpu(h->h80.command), |
949 | be16_to_cpu(h->h80.length)); | 949 | be16_to_cpu(h->h80.length)); |
950 | return FALSE; | 950 | return false; |
951 | } | 951 | } |
952 | mdev->last_received = jiffies; | 952 | mdev->last_received = jiffies; |
953 | 953 | ||
954 | return TRUE; | 954 | return true; |
955 | } | 955 | } |
956 | 956 | ||
957 | static void drbd_flush(struct drbd_conf *mdev) | 957 | static void drbd_flush(struct drbd_conf *mdev) |
@@ -1074,6 +1074,16 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) | |||
1074 | * @mdev: DRBD device. | 1074 | * @mdev: DRBD device. |
1075 | * @e: epoch entry | 1075 | * @e: epoch entry |
1076 | * @rw: flag field, see bio->bi_rw | 1076 | * @rw: flag field, see bio->bi_rw |
1077 | * | ||
1078 | * May spread the pages to multiple bios, | ||
1079 | * depending on bio_add_page restrictions. | ||
1080 | * | ||
1081 | * Returns 0 if all bios have been submitted, | ||
1082 | * -ENOMEM if we could not allocate enough bios, | ||
1083 | * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a | ||
1084 | * single page to an empty bio (which should never happen and likely indicates | ||
1085 | * that the lower level IO stack is in some way broken). This has been observed | ||
1086 | * on certain Xen deployments. | ||
1077 | */ | 1087 | */ |
1078 | /* TODO allocate from our own bio_set. */ | 1088 | /* TODO allocate from our own bio_set. */ |
1079 | int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, | 1089 | int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, |
@@ -1086,6 +1096,7 @@ int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, | |||
1086 | unsigned ds = e->size; | 1096 | unsigned ds = e->size; |
1087 | unsigned n_bios = 0; | 1097 | unsigned n_bios = 0; |
1088 | unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT; | 1098 | unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT; |
1099 | int err = -ENOMEM; | ||
1089 | 1100 | ||
1090 | /* In most cases, we will only need one bio. But in case the lower | 1101 | /* In most cases, we will only need one bio. But in case the lower |
1091 | * level restrictions happen to be different at this offset on this | 1102 | * level restrictions happen to be different at this offset on this |
@@ -1111,8 +1122,17 @@ next_bio: | |||
1111 | page_chain_for_each(page) { | 1122 | page_chain_for_each(page) { |
1112 | unsigned len = min_t(unsigned, ds, PAGE_SIZE); | 1123 | unsigned len = min_t(unsigned, ds, PAGE_SIZE); |
1113 | if (!bio_add_page(bio, page, len, 0)) { | 1124 | if (!bio_add_page(bio, page, len, 0)) { |
1114 | /* a single page must always be possible! */ | 1125 | /* A single page must always be possible! |
1115 | BUG_ON(bio->bi_vcnt == 0); | 1126 | * But in case it fails anyways, |
1127 | * we deal with it, and complain (below). */ | ||
1128 | if (bio->bi_vcnt == 0) { | ||
1129 | dev_err(DEV, | ||
1130 | "bio_add_page failed for len=%u, " | ||
1131 | "bi_vcnt=0 (bi_sector=%llu)\n", | ||
1132 | len, (unsigned long long)bio->bi_sector); | ||
1133 | err = -ENOSPC; | ||
1134 | goto fail; | ||
1135 | } | ||
1116 | goto next_bio; | 1136 | goto next_bio; |
1117 | } | 1137 | } |
1118 | ds -= len; | 1138 | ds -= len; |
@@ -1138,7 +1158,7 @@ fail: | |||
1138 | bios = bios->bi_next; | 1158 | bios = bios->bi_next; |
1139 | bio_put(bio); | 1159 | bio_put(bio); |
1140 | } | 1160 | } |
1141 | return -ENOMEM; | 1161 | return err; |
1142 | } | 1162 | } |
1143 | 1163 | ||
1144 | static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) | 1164 | static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) |
@@ -1160,7 +1180,7 @@ static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsign | |||
1160 | switch (mdev->write_ordering) { | 1180 | switch (mdev->write_ordering) { |
1161 | case WO_none: | 1181 | case WO_none: |
1162 | if (rv == FE_RECYCLED) | 1182 | if (rv == FE_RECYCLED) |
1163 | return TRUE; | 1183 | return true; |
1164 | 1184 | ||
1165 | /* receiver context, in the writeout path of the other node. | 1185 | /* receiver context, in the writeout path of the other node. |
1166 | * avoid potential distributed deadlock */ | 1186 | * avoid potential distributed deadlock */ |
@@ -1188,10 +1208,10 @@ static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsign | |||
1188 | D_ASSERT(atomic_read(&epoch->active) == 0); | 1208 | D_ASSERT(atomic_read(&epoch->active) == 0); |
1189 | D_ASSERT(epoch->flags == 0); | 1209 | D_ASSERT(epoch->flags == 0); |
1190 | 1210 | ||
1191 | return TRUE; | 1211 | return true; |
1192 | default: | 1212 | default: |
1193 | dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering); | 1213 | dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering); |
1194 | return FALSE; | 1214 | return false; |
1195 | } | 1215 | } |
1196 | 1216 | ||
1197 | epoch->flags = 0; | 1217 | epoch->flags = 0; |
@@ -1209,7 +1229,7 @@ static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsign | |||
1209 | } | 1229 | } |
1210 | spin_unlock(&mdev->epoch_lock); | 1230 | spin_unlock(&mdev->epoch_lock); |
1211 | 1231 | ||
1212 | return TRUE; | 1232 | return true; |
1213 | } | 1233 | } |
1214 | 1234 | ||
1215 | /* used from receive_RSDataReply (recv_resync_read) | 1235 | /* used from receive_RSDataReply (recv_resync_read) |
@@ -1231,21 +1251,25 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __ | |||
1231 | if (dgs) { | 1251 | if (dgs) { |
1232 | rr = drbd_recv(mdev, dig_in, dgs); | 1252 | rr = drbd_recv(mdev, dig_in, dgs); |
1233 | if (rr != dgs) { | 1253 | if (rr != dgs) { |
1234 | dev_warn(DEV, "short read receiving data digest: read %d expected %d\n", | 1254 | if (!signal_pending(current)) |
1235 | rr, dgs); | 1255 | dev_warn(DEV, |
1256 | "short read receiving data digest: read %d expected %d\n", | ||
1257 | rr, dgs); | ||
1236 | return NULL; | 1258 | return NULL; |
1237 | } | 1259 | } |
1238 | } | 1260 | } |
1239 | 1261 | ||
1240 | data_size -= dgs; | 1262 | data_size -= dgs; |
1241 | 1263 | ||
1264 | ERR_IF(data_size == 0) return NULL; | ||
1242 | ERR_IF(data_size & 0x1ff) return NULL; | 1265 | ERR_IF(data_size & 0x1ff) return NULL; |
1243 | ERR_IF(data_size > DRBD_MAX_SEGMENT_SIZE) return NULL; | 1266 | ERR_IF(data_size > DRBD_MAX_BIO_SIZE) return NULL; |
1244 | 1267 | ||
1245 | /* even though we trust out peer, | 1268 | /* even though we trust out peer, |
1246 | * we sometimes have to double check. */ | 1269 | * we sometimes have to double check. */ |
1247 | if (sector + (data_size>>9) > capacity) { | 1270 | if (sector + (data_size>>9) > capacity) { |
1248 | dev_err(DEV, "capacity: %llus < sector: %llus + size: %u\n", | 1271 | dev_err(DEV, "request from peer beyond end of local disk: " |
1272 | "capacity: %llus < sector: %llus + size: %u\n", | ||
1249 | (unsigned long long)capacity, | 1273 | (unsigned long long)capacity, |
1250 | (unsigned long long)sector, data_size); | 1274 | (unsigned long long)sector, data_size); |
1251 | return NULL; | 1275 | return NULL; |
@@ -1264,15 +1288,16 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __ | |||
1264 | unsigned len = min_t(int, ds, PAGE_SIZE); | 1288 | unsigned len = min_t(int, ds, PAGE_SIZE); |
1265 | data = kmap(page); | 1289 | data = kmap(page); |
1266 | rr = drbd_recv(mdev, data, len); | 1290 | rr = drbd_recv(mdev, data, len); |
1267 | if (FAULT_ACTIVE(mdev, DRBD_FAULT_RECEIVE)) { | 1291 | if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) { |
1268 | dev_err(DEV, "Fault injection: Corrupting data on receive\n"); | 1292 | dev_err(DEV, "Fault injection: Corrupting data on receive\n"); |
1269 | data[0] = data[0] ^ (unsigned long)-1; | 1293 | data[0] = data[0] ^ (unsigned long)-1; |
1270 | } | 1294 | } |
1271 | kunmap(page); | 1295 | kunmap(page); |
1272 | if (rr != len) { | 1296 | if (rr != len) { |
1273 | drbd_free_ee(mdev, e); | 1297 | drbd_free_ee(mdev, e); |
1274 | dev_warn(DEV, "short read receiving data: read %d expected %d\n", | 1298 | if (!signal_pending(current)) |
1275 | rr, len); | 1299 | dev_warn(DEV, "short read receiving data: read %d expected %d\n", |
1300 | rr, len); | ||
1276 | return NULL; | 1301 | return NULL; |
1277 | } | 1302 | } |
1278 | ds -= rr; | 1303 | ds -= rr; |
@@ -1281,7 +1306,8 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __ | |||
1281 | if (dgs) { | 1306 | if (dgs) { |
1282 | drbd_csum_ee(mdev, mdev->integrity_r_tfm, e, dig_vv); | 1307 | drbd_csum_ee(mdev, mdev->integrity_r_tfm, e, dig_vv); |
1283 | if (memcmp(dig_in, dig_vv, dgs)) { | 1308 | if (memcmp(dig_in, dig_vv, dgs)) { |
1284 | dev_err(DEV, "Digest integrity check FAILED.\n"); | 1309 | dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n", |
1310 | (unsigned long long)sector, data_size); | ||
1285 | drbd_bcast_ee(mdev, "digest failed", | 1311 | drbd_bcast_ee(mdev, "digest failed", |
1286 | dgs, dig_in, dig_vv, e); | 1312 | dgs, dig_in, dig_vv, e); |
1287 | drbd_free_ee(mdev, e); | 1313 | drbd_free_ee(mdev, e); |
@@ -1302,7 +1328,7 @@ static int drbd_drain_block(struct drbd_conf *mdev, int data_size) | |||
1302 | void *data; | 1328 | void *data; |
1303 | 1329 | ||
1304 | if (!data_size) | 1330 | if (!data_size) |
1305 | return TRUE; | 1331 | return true; |
1306 | 1332 | ||
1307 | page = drbd_pp_alloc(mdev, 1, 1); | 1333 | page = drbd_pp_alloc(mdev, 1, 1); |
1308 | 1334 | ||
@@ -1311,8 +1337,10 @@ static int drbd_drain_block(struct drbd_conf *mdev, int data_size) | |||
1311 | rr = drbd_recv(mdev, data, min_t(int, data_size, PAGE_SIZE)); | 1337 | rr = drbd_recv(mdev, data, min_t(int, data_size, PAGE_SIZE)); |
1312 | if (rr != min_t(int, data_size, PAGE_SIZE)) { | 1338 | if (rr != min_t(int, data_size, PAGE_SIZE)) { |
1313 | rv = 0; | 1339 | rv = 0; |
1314 | dev_warn(DEV, "short read receiving data: read %d expected %d\n", | 1340 | if (!signal_pending(current)) |
1315 | rr, min_t(int, data_size, PAGE_SIZE)); | 1341 | dev_warn(DEV, |
1342 | "short read receiving data: read %d expected %d\n", | ||
1343 | rr, min_t(int, data_size, PAGE_SIZE)); | ||
1316 | break; | 1344 | break; |
1317 | } | 1345 | } |
1318 | data_size -= rr; | 1346 | data_size -= rr; |
@@ -1337,8 +1365,10 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, | |||
1337 | if (dgs) { | 1365 | if (dgs) { |
1338 | rr = drbd_recv(mdev, dig_in, dgs); | 1366 | rr = drbd_recv(mdev, dig_in, dgs); |
1339 | if (rr != dgs) { | 1367 | if (rr != dgs) { |
1340 | dev_warn(DEV, "short read receiving data reply digest: read %d expected %d\n", | 1368 | if (!signal_pending(current)) |
1341 | rr, dgs); | 1369 | dev_warn(DEV, |
1370 | "short read receiving data reply digest: read %d expected %d\n", | ||
1371 | rr, dgs); | ||
1342 | return 0; | 1372 | return 0; |
1343 | } | 1373 | } |
1344 | } | 1374 | } |
@@ -1359,9 +1389,10 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, | |||
1359 | expect); | 1389 | expect); |
1360 | kunmap(bvec->bv_page); | 1390 | kunmap(bvec->bv_page); |
1361 | if (rr != expect) { | 1391 | if (rr != expect) { |
1362 | dev_warn(DEV, "short read receiving data reply: " | 1392 | if (!signal_pending(current)) |
1363 | "read %d expected %d\n", | 1393 | dev_warn(DEV, "short read receiving data reply: " |
1364 | rr, expect); | 1394 | "read %d expected %d\n", |
1395 | rr, expect); | ||
1365 | return 0; | 1396 | return 0; |
1366 | } | 1397 | } |
1367 | data_size -= rr; | 1398 | data_size -= rr; |
@@ -1425,11 +1456,10 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si | |||
1425 | 1456 | ||
1426 | atomic_add(data_size >> 9, &mdev->rs_sect_ev); | 1457 | atomic_add(data_size >> 9, &mdev->rs_sect_ev); |
1427 | if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0) | 1458 | if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0) |
1428 | return TRUE; | 1459 | return true; |
1429 | 1460 | ||
1430 | /* drbd_submit_ee currently fails for one reason only: | 1461 | /* don't care for the reason here */ |
1431 | * not being able to allocate enough bios. | 1462 | dev_err(DEV, "submit failed, triggering re-connect\n"); |
1432 | * Is dropping the connection going to help? */ | ||
1433 | spin_lock_irq(&mdev->req_lock); | 1463 | spin_lock_irq(&mdev->req_lock); |
1434 | list_del(&e->w.list); | 1464 | list_del(&e->w.list); |
1435 | spin_unlock_irq(&mdev->req_lock); | 1465 | spin_unlock_irq(&mdev->req_lock); |
@@ -1437,7 +1467,7 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si | |||
1437 | drbd_free_ee(mdev, e); | 1467 | drbd_free_ee(mdev, e); |
1438 | fail: | 1468 | fail: |
1439 | put_ldev(mdev); | 1469 | put_ldev(mdev); |
1440 | return FALSE; | 1470 | return false; |
1441 | } | 1471 | } |
1442 | 1472 | ||
1443 | static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) | 1473 | static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) |
@@ -1454,7 +1484,7 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsi | |||
1454 | spin_unlock_irq(&mdev->req_lock); | 1484 | spin_unlock_irq(&mdev->req_lock); |
1455 | if (unlikely(!req)) { | 1485 | if (unlikely(!req)) { |
1456 | dev_err(DEV, "Got a corrupt block_id/sector pair(1).\n"); | 1486 | dev_err(DEV, "Got a corrupt block_id/sector pair(1).\n"); |
1457 | return FALSE; | 1487 | return false; |
1458 | } | 1488 | } |
1459 | 1489 | ||
1460 | /* hlist_del(&req->colision) is done in _req_may_be_done, to avoid | 1490 | /* hlist_del(&req->colision) is done in _req_may_be_done, to avoid |
@@ -1611,15 +1641,15 @@ static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq) | |||
1611 | return ret; | 1641 | return ret; |
1612 | } | 1642 | } |
1613 | 1643 | ||
1614 | static unsigned long write_flags_to_bio(struct drbd_conf *mdev, u32 dpf) | 1644 | /* see also bio_flags_to_wire() |
1645 | * DRBD_REQ_*, because we need to semantically map the flags to data packet | ||
1646 | * flags and back. We may replicate to other kernel versions. */ | ||
1647 | static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf) | ||
1615 | { | 1648 | { |
1616 | if (mdev->agreed_pro_version >= 95) | 1649 | return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) | |
1617 | return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) | | 1650 | (dpf & DP_FUA ? REQ_FUA : 0) | |
1618 | (dpf & DP_FUA ? REQ_FUA : 0) | | 1651 | (dpf & DP_FLUSH ? REQ_FLUSH : 0) | |
1619 | (dpf & DP_FLUSH ? REQ_FUA : 0) | | 1652 | (dpf & DP_DISCARD ? REQ_DISCARD : 0); |
1620 | (dpf & DP_DISCARD ? REQ_DISCARD : 0); | ||
1621 | else | ||
1622 | return dpf & DP_RW_SYNC ? REQ_SYNC : 0; | ||
1623 | } | 1653 | } |
1624 | 1654 | ||
1625 | /* mirrored write */ | 1655 | /* mirrored write */ |
@@ -1632,9 +1662,6 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
1632 | u32 dp_flags; | 1662 | u32 dp_flags; |
1633 | 1663 | ||
1634 | if (!get_ldev(mdev)) { | 1664 | if (!get_ldev(mdev)) { |
1635 | if (__ratelimit(&drbd_ratelimit_state)) | ||
1636 | dev_err(DEV, "Can not write mirrored data block " | ||
1637 | "to local disk.\n"); | ||
1638 | spin_lock(&mdev->peer_seq_lock); | 1665 | spin_lock(&mdev->peer_seq_lock); |
1639 | if (mdev->peer_seq+1 == be32_to_cpu(p->seq_num)) | 1666 | if (mdev->peer_seq+1 == be32_to_cpu(p->seq_num)) |
1640 | mdev->peer_seq++; | 1667 | mdev->peer_seq++; |
@@ -1654,23 +1681,23 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
1654 | e = read_in_block(mdev, p->block_id, sector, data_size); | 1681 | e = read_in_block(mdev, p->block_id, sector, data_size); |
1655 | if (!e) { | 1682 | if (!e) { |
1656 | put_ldev(mdev); | 1683 | put_ldev(mdev); |
1657 | return FALSE; | 1684 | return false; |
1658 | } | 1685 | } |
1659 | 1686 | ||
1660 | e->w.cb = e_end_block; | 1687 | e->w.cb = e_end_block; |
1661 | 1688 | ||
1689 | dp_flags = be32_to_cpu(p->dp_flags); | ||
1690 | rw |= wire_flags_to_bio(mdev, dp_flags); | ||
1691 | |||
1692 | if (dp_flags & DP_MAY_SET_IN_SYNC) | ||
1693 | e->flags |= EE_MAY_SET_IN_SYNC; | ||
1694 | |||
1662 | spin_lock(&mdev->epoch_lock); | 1695 | spin_lock(&mdev->epoch_lock); |
1663 | e->epoch = mdev->current_epoch; | 1696 | e->epoch = mdev->current_epoch; |
1664 | atomic_inc(&e->epoch->epoch_size); | 1697 | atomic_inc(&e->epoch->epoch_size); |
1665 | atomic_inc(&e->epoch->active); | 1698 | atomic_inc(&e->epoch->active); |
1666 | spin_unlock(&mdev->epoch_lock); | 1699 | spin_unlock(&mdev->epoch_lock); |
1667 | 1700 | ||
1668 | dp_flags = be32_to_cpu(p->dp_flags); | ||
1669 | rw |= write_flags_to_bio(mdev, dp_flags); | ||
1670 | |||
1671 | if (dp_flags & DP_MAY_SET_IN_SYNC) | ||
1672 | e->flags |= EE_MAY_SET_IN_SYNC; | ||
1673 | |||
1674 | /* I'm the receiver, I do hold a net_cnt reference. */ | 1701 | /* I'm the receiver, I do hold a net_cnt reference. */ |
1675 | if (!mdev->net_conf->two_primaries) { | 1702 | if (!mdev->net_conf->two_primaries) { |
1676 | spin_lock_irq(&mdev->req_lock); | 1703 | spin_lock_irq(&mdev->req_lock); |
@@ -1773,7 +1800,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
1773 | put_ldev(mdev); | 1800 | put_ldev(mdev); |
1774 | wake_asender(mdev); | 1801 | wake_asender(mdev); |
1775 | finish_wait(&mdev->misc_wait, &wait); | 1802 | finish_wait(&mdev->misc_wait, &wait); |
1776 | return TRUE; | 1803 | return true; |
1777 | } | 1804 | } |
1778 | 1805 | ||
1779 | if (signal_pending(current)) { | 1806 | if (signal_pending(current)) { |
@@ -1829,11 +1856,10 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
1829 | } | 1856 | } |
1830 | 1857 | ||
1831 | if (drbd_submit_ee(mdev, e, rw, DRBD_FAULT_DT_WR) == 0) | 1858 | if (drbd_submit_ee(mdev, e, rw, DRBD_FAULT_DT_WR) == 0) |
1832 | return TRUE; | 1859 | return true; |
1833 | 1860 | ||
1834 | /* drbd_submit_ee currently fails for one reason only: | 1861 | /* don't care for the reason here */ |
1835 | * not being able to allocate enough bios. | 1862 | dev_err(DEV, "submit failed, triggering re-connect\n"); |
1836 | * Is dropping the connection going to help? */ | ||
1837 | spin_lock_irq(&mdev->req_lock); | 1863 | spin_lock_irq(&mdev->req_lock); |
1838 | list_del(&e->w.list); | 1864 | list_del(&e->w.list); |
1839 | hlist_del_init(&e->colision); | 1865 | hlist_del_init(&e->colision); |
@@ -1842,12 +1868,10 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
1842 | drbd_al_complete_io(mdev, e->sector); | 1868 | drbd_al_complete_io(mdev, e->sector); |
1843 | 1869 | ||
1844 | out_interrupted: | 1870 | out_interrupted: |
1845 | /* yes, the epoch_size now is imbalanced. | 1871 | drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + EV_CLEANUP); |
1846 | * but we drop the connection anyways, so we don't have a chance to | ||
1847 | * receive a barrier... atomic_inc(&mdev->epoch_size); */ | ||
1848 | put_ldev(mdev); | 1872 | put_ldev(mdev); |
1849 | drbd_free_ee(mdev, e); | 1873 | drbd_free_ee(mdev, e); |
1850 | return FALSE; | 1874 | return false; |
1851 | } | 1875 | } |
1852 | 1876 | ||
1853 | /* We may throttle resync, if the lower device seems to be busy, | 1877 | /* We may throttle resync, if the lower device seems to be busy, |
@@ -1861,10 +1885,11 @@ out_interrupted: | |||
1861 | * The current sync rate used here uses only the most recent two step marks, | 1885 | * The current sync rate used here uses only the most recent two step marks, |
1862 | * to have a short time average so we can react faster. | 1886 | * to have a short time average so we can react faster. |
1863 | */ | 1887 | */ |
1864 | int drbd_rs_should_slow_down(struct drbd_conf *mdev) | 1888 | int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector) |
1865 | { | 1889 | { |
1866 | struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk; | 1890 | struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk; |
1867 | unsigned long db, dt, dbdt; | 1891 | unsigned long db, dt, dbdt; |
1892 | struct lc_element *tmp; | ||
1868 | int curr_events; | 1893 | int curr_events; |
1869 | int throttle = 0; | 1894 | int throttle = 0; |
1870 | 1895 | ||
@@ -1872,9 +1897,22 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev) | |||
1872 | if (mdev->sync_conf.c_min_rate == 0) | 1897 | if (mdev->sync_conf.c_min_rate == 0) |
1873 | return 0; | 1898 | return 0; |
1874 | 1899 | ||
1900 | spin_lock_irq(&mdev->al_lock); | ||
1901 | tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector)); | ||
1902 | if (tmp) { | ||
1903 | struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); | ||
1904 | if (test_bit(BME_PRIORITY, &bm_ext->flags)) { | ||
1905 | spin_unlock_irq(&mdev->al_lock); | ||
1906 | return 0; | ||
1907 | } | ||
1908 | /* Do not slow down if app IO is already waiting for this extent */ | ||
1909 | } | ||
1910 | spin_unlock_irq(&mdev->al_lock); | ||
1911 | |||
1875 | curr_events = (int)part_stat_read(&disk->part0, sectors[0]) + | 1912 | curr_events = (int)part_stat_read(&disk->part0, sectors[0]) + |
1876 | (int)part_stat_read(&disk->part0, sectors[1]) - | 1913 | (int)part_stat_read(&disk->part0, sectors[1]) - |
1877 | atomic_read(&mdev->rs_sect_ev); | 1914 | atomic_read(&mdev->rs_sect_ev); |
1915 | |||
1878 | if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) { | 1916 | if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) { |
1879 | unsigned long rs_left; | 1917 | unsigned long rs_left; |
1880 | int i; | 1918 | int i; |
@@ -1883,8 +1921,12 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev) | |||
1883 | 1921 | ||
1884 | /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP, | 1922 | /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP, |
1885 | * approx. */ | 1923 | * approx. */ |
1886 | i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-2) % DRBD_SYNC_MARKS; | 1924 | i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS; |
1887 | rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed; | 1925 | |
1926 | if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T) | ||
1927 | rs_left = mdev->ov_left; | ||
1928 | else | ||
1929 | rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed; | ||
1888 | 1930 | ||
1889 | dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ; | 1931 | dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ; |
1890 | if (!dt) | 1932 | if (!dt) |
@@ -1912,15 +1954,15 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un | |||
1912 | sector = be64_to_cpu(p->sector); | 1954 | sector = be64_to_cpu(p->sector); |
1913 | size = be32_to_cpu(p->blksize); | 1955 | size = be32_to_cpu(p->blksize); |
1914 | 1956 | ||
1915 | if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { | 1957 | if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) { |
1916 | dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, | 1958 | dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, |
1917 | (unsigned long long)sector, size); | 1959 | (unsigned long long)sector, size); |
1918 | return FALSE; | 1960 | return false; |
1919 | } | 1961 | } |
1920 | if (sector + (size>>9) > capacity) { | 1962 | if (sector + (size>>9) > capacity) { |
1921 | dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, | 1963 | dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, |
1922 | (unsigned long long)sector, size); | 1964 | (unsigned long long)sector, size); |
1923 | return FALSE; | 1965 | return false; |
1924 | } | 1966 | } |
1925 | 1967 | ||
1926 | if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) { | 1968 | if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) { |
@@ -1957,7 +1999,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un | |||
1957 | e = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO); | 1999 | e = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO); |
1958 | if (!e) { | 2000 | if (!e) { |
1959 | put_ldev(mdev); | 2001 | put_ldev(mdev); |
1960 | return FALSE; | 2002 | return false; |
1961 | } | 2003 | } |
1962 | 2004 | ||
1963 | switch (cmd) { | 2005 | switch (cmd) { |
@@ -1970,6 +2012,8 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un | |||
1970 | case P_RS_DATA_REQUEST: | 2012 | case P_RS_DATA_REQUEST: |
1971 | e->w.cb = w_e_end_rsdata_req; | 2013 | e->w.cb = w_e_end_rsdata_req; |
1972 | fault_type = DRBD_FAULT_RS_RD; | 2014 | fault_type = DRBD_FAULT_RS_RD; |
2015 | /* used in the sector offset progress display */ | ||
2016 | mdev->bm_resync_fo = BM_SECT_TO_BIT(sector); | ||
1973 | break; | 2017 | break; |
1974 | 2018 | ||
1975 | case P_OV_REPLY: | 2019 | case P_OV_REPLY: |
@@ -1991,7 +2035,11 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un | |||
1991 | if (cmd == P_CSUM_RS_REQUEST) { | 2035 | if (cmd == P_CSUM_RS_REQUEST) { |
1992 | D_ASSERT(mdev->agreed_pro_version >= 89); | 2036 | D_ASSERT(mdev->agreed_pro_version >= 89); |
1993 | e->w.cb = w_e_end_csum_rs_req; | 2037 | e->w.cb = w_e_end_csum_rs_req; |
2038 | /* used in the sector offset progress display */ | ||
2039 | mdev->bm_resync_fo = BM_SECT_TO_BIT(sector); | ||
1994 | } else if (cmd == P_OV_REPLY) { | 2040 | } else if (cmd == P_OV_REPLY) { |
2041 | /* track progress, we may need to throttle */ | ||
2042 | atomic_add(size >> 9, &mdev->rs_sect_in); | ||
1995 | e->w.cb = w_e_end_ov_reply; | 2043 | e->w.cb = w_e_end_ov_reply; |
1996 | dec_rs_pending(mdev); | 2044 | dec_rs_pending(mdev); |
1997 | /* drbd_rs_begin_io done when we sent this request, | 2045 | /* drbd_rs_begin_io done when we sent this request, |
@@ -2003,9 +2051,16 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un | |||
2003 | case P_OV_REQUEST: | 2051 | case P_OV_REQUEST: |
2004 | if (mdev->ov_start_sector == ~(sector_t)0 && | 2052 | if (mdev->ov_start_sector == ~(sector_t)0 && |
2005 | mdev->agreed_pro_version >= 90) { | 2053 | mdev->agreed_pro_version >= 90) { |
2054 | unsigned long now = jiffies; | ||
2055 | int i; | ||
2006 | mdev->ov_start_sector = sector; | 2056 | mdev->ov_start_sector = sector; |
2007 | mdev->ov_position = sector; | 2057 | mdev->ov_position = sector; |
2008 | mdev->ov_left = mdev->rs_total - BM_SECT_TO_BIT(sector); | 2058 | mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector); |
2059 | mdev->rs_total = mdev->ov_left; | ||
2060 | for (i = 0; i < DRBD_SYNC_MARKS; i++) { | ||
2061 | mdev->rs_mark_left[i] = mdev->ov_left; | ||
2062 | mdev->rs_mark_time[i] = now; | ||
2063 | } | ||
2009 | dev_info(DEV, "Online Verify start sector: %llu\n", | 2064 | dev_info(DEV, "Online Verify start sector: %llu\n", |
2010 | (unsigned long long)sector); | 2065 | (unsigned long long)sector); |
2011 | } | 2066 | } |
@@ -2042,9 +2097,9 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un | |||
2042 | * we would also throttle its application reads. | 2097 | * we would also throttle its application reads. |
2043 | * In that case, throttling is done on the SyncTarget only. | 2098 | * In that case, throttling is done on the SyncTarget only. |
2044 | */ | 2099 | */ |
2045 | if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev)) | 2100 | if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector)) |
2046 | msleep(100); | 2101 | schedule_timeout_uninterruptible(HZ/10); |
2047 | if (drbd_rs_begin_io(mdev, e->sector)) | 2102 | if (drbd_rs_begin_io(mdev, sector)) |
2048 | goto out_free_e; | 2103 | goto out_free_e; |
2049 | 2104 | ||
2050 | submit_for_resync: | 2105 | submit_for_resync: |
@@ -2057,11 +2112,10 @@ submit: | |||
2057 | spin_unlock_irq(&mdev->req_lock); | 2112 | spin_unlock_irq(&mdev->req_lock); |
2058 | 2113 | ||
2059 | if (drbd_submit_ee(mdev, e, READ, fault_type) == 0) | 2114 | if (drbd_submit_ee(mdev, e, READ, fault_type) == 0) |
2060 | return TRUE; | 2115 | return true; |
2061 | 2116 | ||
2062 | /* drbd_submit_ee currently fails for one reason only: | 2117 | /* don't care for the reason here */ |
2063 | * not being able to allocate enough bios. | 2118 | dev_err(DEV, "submit failed, triggering re-connect\n"); |
2064 | * Is dropping the connection going to help? */ | ||
2065 | spin_lock_irq(&mdev->req_lock); | 2119 | spin_lock_irq(&mdev->req_lock); |
2066 | list_del(&e->w.list); | 2120 | list_del(&e->w.list); |
2067 | spin_unlock_irq(&mdev->req_lock); | 2121 | spin_unlock_irq(&mdev->req_lock); |
@@ -2070,7 +2124,7 @@ submit: | |||
2070 | out_free_e: | 2124 | out_free_e: |
2071 | put_ldev(mdev); | 2125 | put_ldev(mdev); |
2072 | drbd_free_ee(mdev, e); | 2126 | drbd_free_ee(mdev, e); |
2073 | return FALSE; | 2127 | return false; |
2074 | } | 2128 | } |
2075 | 2129 | ||
2076 | static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) | 2130 | static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) |
@@ -2147,10 +2201,7 @@ static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) | |||
2147 | 2201 | ||
2148 | static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local) | 2202 | static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local) |
2149 | { | 2203 | { |
2150 | int self, peer, hg, rv = -100; | 2204 | int hg, rv = -100; |
2151 | |||
2152 | self = mdev->ldev->md.uuid[UI_BITMAP] & 1; | ||
2153 | peer = mdev->p_uuid[UI_BITMAP] & 1; | ||
2154 | 2205 | ||
2155 | switch (mdev->net_conf->after_sb_1p) { | 2206 | switch (mdev->net_conf->after_sb_1p) { |
2156 | case ASB_DISCARD_YOUNGER_PRI: | 2207 | case ASB_DISCARD_YOUNGER_PRI: |
@@ -2177,12 +2228,14 @@ static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local) | |||
2177 | case ASB_CALL_HELPER: | 2228 | case ASB_CALL_HELPER: |
2178 | hg = drbd_asb_recover_0p(mdev); | 2229 | hg = drbd_asb_recover_0p(mdev); |
2179 | if (hg == -1 && mdev->state.role == R_PRIMARY) { | 2230 | if (hg == -1 && mdev->state.role == R_PRIMARY) { |
2180 | self = drbd_set_role(mdev, R_SECONDARY, 0); | 2231 | enum drbd_state_rv rv2; |
2232 | |||
2233 | drbd_set_role(mdev, R_SECONDARY, 0); | ||
2181 | /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE, | 2234 | /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE, |
2182 | * we might be here in C_WF_REPORT_PARAMS which is transient. | 2235 | * we might be here in C_WF_REPORT_PARAMS which is transient. |
2183 | * we do not need to wait for the after state change work either. */ | 2236 | * we do not need to wait for the after state change work either. */ |
2184 | self = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY)); | 2237 | rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY)); |
2185 | if (self != SS_SUCCESS) { | 2238 | if (rv2 != SS_SUCCESS) { |
2186 | drbd_khelper(mdev, "pri-lost-after-sb"); | 2239 | drbd_khelper(mdev, "pri-lost-after-sb"); |
2187 | } else { | 2240 | } else { |
2188 | dev_warn(DEV, "Successfully gave up primary role.\n"); | 2241 | dev_warn(DEV, "Successfully gave up primary role.\n"); |
@@ -2197,10 +2250,7 @@ static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local) | |||
2197 | 2250 | ||
2198 | static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local) | 2251 | static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local) |
2199 | { | 2252 | { |
2200 | int self, peer, hg, rv = -100; | 2253 | int hg, rv = -100; |
2201 | |||
2202 | self = mdev->ldev->md.uuid[UI_BITMAP] & 1; | ||
2203 | peer = mdev->p_uuid[UI_BITMAP] & 1; | ||
2204 | 2254 | ||
2205 | switch (mdev->net_conf->after_sb_2p) { | 2255 | switch (mdev->net_conf->after_sb_2p) { |
2206 | case ASB_DISCARD_YOUNGER_PRI: | 2256 | case ASB_DISCARD_YOUNGER_PRI: |
@@ -2220,11 +2270,13 @@ static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local) | |||
2220 | case ASB_CALL_HELPER: | 2270 | case ASB_CALL_HELPER: |
2221 | hg = drbd_asb_recover_0p(mdev); | 2271 | hg = drbd_asb_recover_0p(mdev); |
2222 | if (hg == -1) { | 2272 | if (hg == -1) { |
2273 | enum drbd_state_rv rv2; | ||
2274 | |||
2223 | /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE, | 2275 | /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE, |
2224 | * we might be here in C_WF_REPORT_PARAMS which is transient. | 2276 | * we might be here in C_WF_REPORT_PARAMS which is transient. |
2225 | * we do not need to wait for the after state change work either. */ | 2277 | * we do not need to wait for the after state change work either. */ |
2226 | self = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY)); | 2278 | rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY)); |
2227 | if (self != SS_SUCCESS) { | 2279 | if (rv2 != SS_SUCCESS) { |
2228 | drbd_khelper(mdev, "pri-lost-after-sb"); | 2280 | drbd_khelper(mdev, "pri-lost-after-sb"); |
2229 | } else { | 2281 | } else { |
2230 | dev_warn(DEV, "Successfully gave up primary role.\n"); | 2282 | dev_warn(DEV, "Successfully gave up primary role.\n"); |
@@ -2263,6 +2315,8 @@ static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid, | |||
2263 | -2 C_SYNC_TARGET set BitMap | 2315 | -2 C_SYNC_TARGET set BitMap |
2264 | -100 after split brain, disconnect | 2316 | -100 after split brain, disconnect |
2265 | -1000 unrelated data | 2317 | -1000 unrelated data |
2318 | -1091 requires proto 91 | ||
2319 | -1096 requires proto 96 | ||
2266 | */ | 2320 | */ |
2267 | static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local) | 2321 | static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local) |
2268 | { | 2322 | { |
@@ -2292,7 +2346,7 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l | |||
2292 | if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) { | 2346 | if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) { |
2293 | 2347 | ||
2294 | if (mdev->agreed_pro_version < 91) | 2348 | if (mdev->agreed_pro_version < 91) |
2295 | return -1001; | 2349 | return -1091; |
2296 | 2350 | ||
2297 | if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) && | 2351 | if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) && |
2298 | (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) { | 2352 | (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) { |
@@ -2313,7 +2367,7 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l | |||
2313 | if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) { | 2367 | if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) { |
2314 | 2368 | ||
2315 | if (mdev->agreed_pro_version < 91) | 2369 | if (mdev->agreed_pro_version < 91) |
2316 | return -1001; | 2370 | return -1091; |
2317 | 2371 | ||
2318 | if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) && | 2372 | if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) && |
2319 | (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) { | 2373 | (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) { |
@@ -2358,17 +2412,22 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l | |||
2358 | *rule_nr = 51; | 2412 | *rule_nr = 51; |
2359 | peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1); | 2413 | peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1); |
2360 | if (self == peer) { | 2414 | if (self == peer) { |
2361 | self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1); | 2415 | if (mdev->agreed_pro_version < 96 ? |
2362 | peer = mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1); | 2416 | (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == |
2363 | if (self == peer) { | 2417 | (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) : |
2418 | peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) { | ||
2364 | /* The last P_SYNC_UUID did not get though. Undo the last start of | 2419 | /* The last P_SYNC_UUID did not get though. Undo the last start of |
2365 | resync as sync source modifications of the peer's UUIDs. */ | 2420 | resync as sync source modifications of the peer's UUIDs. */ |
2366 | 2421 | ||
2367 | if (mdev->agreed_pro_version < 91) | 2422 | if (mdev->agreed_pro_version < 91) |
2368 | return -1001; | 2423 | return -1091; |
2369 | 2424 | ||
2370 | mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START]; | 2425 | mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START]; |
2371 | mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1]; | 2426 | mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1]; |
2427 | |||
2428 | dev_info(DEV, "Did not got last syncUUID packet, corrected:\n"); | ||
2429 | drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]); | ||
2430 | |||
2372 | return -1; | 2431 | return -1; |
2373 | } | 2432 | } |
2374 | } | 2433 | } |
@@ -2390,20 +2449,20 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l | |||
2390 | *rule_nr = 71; | 2449 | *rule_nr = 71; |
2391 | self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1); | 2450 | self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1); |
2392 | if (self == peer) { | 2451 | if (self == peer) { |
2393 | self = mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1); | 2452 | if (mdev->agreed_pro_version < 96 ? |
2394 | peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1); | 2453 | (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == |
2395 | if (self == peer) { | 2454 | (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) : |
2455 | self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) { | ||
2396 | /* The last P_SYNC_UUID did not get though. Undo the last start of | 2456 | /* The last P_SYNC_UUID did not get though. Undo the last start of |
2397 | resync as sync source modifications of our UUIDs. */ | 2457 | resync as sync source modifications of our UUIDs. */ |
2398 | 2458 | ||
2399 | if (mdev->agreed_pro_version < 91) | 2459 | if (mdev->agreed_pro_version < 91) |
2400 | return -1001; | 2460 | return -1091; |
2401 | 2461 | ||
2402 | _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]); | 2462 | _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]); |
2403 | _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]); | 2463 | _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]); |
2404 | 2464 | ||
2405 | dev_info(DEV, "Undid last start of resync:\n"); | 2465 | dev_info(DEV, "Last syncUUID did not get through, corrected:\n"); |
2406 | |||
2407 | drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, | 2466 | drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, |
2408 | mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0); | 2467 | mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0); |
2409 | 2468 | ||
@@ -2466,8 +2525,8 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol | |||
2466 | dev_alert(DEV, "Unrelated data, aborting!\n"); | 2525 | dev_alert(DEV, "Unrelated data, aborting!\n"); |
2467 | return C_MASK; | 2526 | return C_MASK; |
2468 | } | 2527 | } |
2469 | if (hg == -1001) { | 2528 | if (hg < -1000) { |
2470 | dev_alert(DEV, "To resolve this both sides have to support at least protocol\n"); | 2529 | dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000); |
2471 | return C_MASK; | 2530 | return C_MASK; |
2472 | } | 2531 | } |
2473 | 2532 | ||
@@ -2566,7 +2625,8 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol | |||
2566 | 2625 | ||
2567 | if (abs(hg) >= 2) { | 2626 | if (abs(hg) >= 2) { |
2568 | dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n"); | 2627 | dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n"); |
2569 | if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake")) | 2628 | if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake", |
2629 | BM_LOCKED_SET_ALLOWED)) | ||
2570 | return C_MASK; | 2630 | return C_MASK; |
2571 | } | 2631 | } |
2572 | 2632 | ||
@@ -2660,7 +2720,7 @@ static int receive_protocol(struct drbd_conf *mdev, enum drbd_packets cmd, unsig | |||
2660 | unsigned char *my_alg = mdev->net_conf->integrity_alg; | 2720 | unsigned char *my_alg = mdev->net_conf->integrity_alg; |
2661 | 2721 | ||
2662 | if (drbd_recv(mdev, p_integrity_alg, data_size) != data_size) | 2722 | if (drbd_recv(mdev, p_integrity_alg, data_size) != data_size) |
2663 | return FALSE; | 2723 | return false; |
2664 | 2724 | ||
2665 | p_integrity_alg[SHARED_SECRET_MAX-1] = 0; | 2725 | p_integrity_alg[SHARED_SECRET_MAX-1] = 0; |
2666 | if (strcmp(p_integrity_alg, my_alg)) { | 2726 | if (strcmp(p_integrity_alg, my_alg)) { |
@@ -2671,11 +2731,11 @@ static int receive_protocol(struct drbd_conf *mdev, enum drbd_packets cmd, unsig | |||
2671 | my_alg[0] ? my_alg : (unsigned char *)"<not-used>"); | 2731 | my_alg[0] ? my_alg : (unsigned char *)"<not-used>"); |
2672 | } | 2732 | } |
2673 | 2733 | ||
2674 | return TRUE; | 2734 | return true; |
2675 | 2735 | ||
2676 | disconnect: | 2736 | disconnect: |
2677 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); | 2737 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); |
2678 | return FALSE; | 2738 | return false; |
2679 | } | 2739 | } |
2680 | 2740 | ||
2681 | /* helper function | 2741 | /* helper function |
@@ -2707,7 +2767,7 @@ struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev, | |||
2707 | 2767 | ||
2708 | static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int packet_size) | 2768 | static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int packet_size) |
2709 | { | 2769 | { |
2710 | int ok = TRUE; | 2770 | int ok = true; |
2711 | struct p_rs_param_95 *p = &mdev->data.rbuf.rs_param_95; | 2771 | struct p_rs_param_95 *p = &mdev->data.rbuf.rs_param_95; |
2712 | unsigned int header_size, data_size, exp_max_sz; | 2772 | unsigned int header_size, data_size, exp_max_sz; |
2713 | struct crypto_hash *verify_tfm = NULL; | 2773 | struct crypto_hash *verify_tfm = NULL; |
@@ -2725,7 +2785,7 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsi | |||
2725 | if (packet_size > exp_max_sz) { | 2785 | if (packet_size > exp_max_sz) { |
2726 | dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n", | 2786 | dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n", |
2727 | packet_size, exp_max_sz); | 2787 | packet_size, exp_max_sz); |
2728 | return FALSE; | 2788 | return false; |
2729 | } | 2789 | } |
2730 | 2790 | ||
2731 | if (apv <= 88) { | 2791 | if (apv <= 88) { |
@@ -2745,7 +2805,7 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsi | |||
2745 | memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); | 2805 | memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); |
2746 | 2806 | ||
2747 | if (drbd_recv(mdev, &p->head.payload, header_size) != header_size) | 2807 | if (drbd_recv(mdev, &p->head.payload, header_size) != header_size) |
2748 | return FALSE; | 2808 | return false; |
2749 | 2809 | ||
2750 | mdev->sync_conf.rate = be32_to_cpu(p->rate); | 2810 | mdev->sync_conf.rate = be32_to_cpu(p->rate); |
2751 | 2811 | ||
@@ -2755,11 +2815,11 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsi | |||
2755 | dev_err(DEV, "verify-alg too long, " | 2815 | dev_err(DEV, "verify-alg too long, " |
2756 | "peer wants %u, accepting only %u byte\n", | 2816 | "peer wants %u, accepting only %u byte\n", |
2757 | data_size, SHARED_SECRET_MAX); | 2817 | data_size, SHARED_SECRET_MAX); |
2758 | return FALSE; | 2818 | return false; |
2759 | } | 2819 | } |
2760 | 2820 | ||
2761 | if (drbd_recv(mdev, p->verify_alg, data_size) != data_size) | 2821 | if (drbd_recv(mdev, p->verify_alg, data_size) != data_size) |
2762 | return FALSE; | 2822 | return false; |
2763 | 2823 | ||
2764 | /* we expect NUL terminated string */ | 2824 | /* we expect NUL terminated string */ |
2765 | /* but just in case someone tries to be evil */ | 2825 | /* but just in case someone tries to be evil */ |
@@ -2853,7 +2913,7 @@ disconnect: | |||
2853 | /* but free the verify_tfm again, if csums_tfm did not work out */ | 2913 | /* but free the verify_tfm again, if csums_tfm did not work out */ |
2854 | crypto_free_hash(verify_tfm); | 2914 | crypto_free_hash(verify_tfm); |
2855 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); | 2915 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); |
2856 | return FALSE; | 2916 | return false; |
2857 | } | 2917 | } |
2858 | 2918 | ||
2859 | static void drbd_setup_order_type(struct drbd_conf *mdev, int peer) | 2919 | static void drbd_setup_order_type(struct drbd_conf *mdev, int peer) |
@@ -2879,7 +2939,7 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
2879 | { | 2939 | { |
2880 | struct p_sizes *p = &mdev->data.rbuf.sizes; | 2940 | struct p_sizes *p = &mdev->data.rbuf.sizes; |
2881 | enum determine_dev_size dd = unchanged; | 2941 | enum determine_dev_size dd = unchanged; |
2882 | unsigned int max_seg_s; | 2942 | unsigned int max_bio_size; |
2883 | sector_t p_size, p_usize, my_usize; | 2943 | sector_t p_size, p_usize, my_usize; |
2884 | int ldsc = 0; /* local disk size changed */ | 2944 | int ldsc = 0; /* local disk size changed */ |
2885 | enum dds_flags ddsf; | 2945 | enum dds_flags ddsf; |
@@ -2890,7 +2950,7 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
2890 | if (p_size == 0 && mdev->state.disk == D_DISKLESS) { | 2950 | if (p_size == 0 && mdev->state.disk == D_DISKLESS) { |
2891 | dev_err(DEV, "some backing storage is needed\n"); | 2951 | dev_err(DEV, "some backing storage is needed\n"); |
2892 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); | 2952 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); |
2893 | return FALSE; | 2953 | return false; |
2894 | } | 2954 | } |
2895 | 2955 | ||
2896 | /* just store the peer's disk size for now. | 2956 | /* just store the peer's disk size for now. |
@@ -2927,18 +2987,17 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
2927 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); | 2987 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); |
2928 | mdev->ldev->dc.disk_size = my_usize; | 2988 | mdev->ldev->dc.disk_size = my_usize; |
2929 | put_ldev(mdev); | 2989 | put_ldev(mdev); |
2930 | return FALSE; | 2990 | return false; |
2931 | } | 2991 | } |
2932 | put_ldev(mdev); | 2992 | put_ldev(mdev); |
2933 | } | 2993 | } |
2934 | #undef min_not_zero | ||
2935 | 2994 | ||
2936 | ddsf = be16_to_cpu(p->dds_flags); | 2995 | ddsf = be16_to_cpu(p->dds_flags); |
2937 | if (get_ldev(mdev)) { | 2996 | if (get_ldev(mdev)) { |
2938 | dd = drbd_determin_dev_size(mdev, ddsf); | 2997 | dd = drbd_determin_dev_size(mdev, ddsf); |
2939 | put_ldev(mdev); | 2998 | put_ldev(mdev); |
2940 | if (dd == dev_size_error) | 2999 | if (dd == dev_size_error) |
2941 | return FALSE; | 3000 | return false; |
2942 | drbd_md_sync(mdev); | 3001 | drbd_md_sync(mdev); |
2943 | } else { | 3002 | } else { |
2944 | /* I am diskless, need to accept the peer's size. */ | 3003 | /* I am diskless, need to accept the peer's size. */ |
@@ -2952,14 +3011,14 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
2952 | } | 3011 | } |
2953 | 3012 | ||
2954 | if (mdev->agreed_pro_version < 94) | 3013 | if (mdev->agreed_pro_version < 94) |
2955 | max_seg_s = be32_to_cpu(p->max_segment_size); | 3014 | max_bio_size = be32_to_cpu(p->max_bio_size); |
2956 | else if (mdev->agreed_pro_version == 94) | 3015 | else if (mdev->agreed_pro_version == 94) |
2957 | max_seg_s = DRBD_MAX_SIZE_H80_PACKET; | 3016 | max_bio_size = DRBD_MAX_SIZE_H80_PACKET; |
2958 | else /* drbd 8.3.8 onwards */ | 3017 | else /* drbd 8.3.8 onwards */ |
2959 | max_seg_s = DRBD_MAX_SEGMENT_SIZE; | 3018 | max_bio_size = DRBD_MAX_BIO_SIZE; |
2960 | 3019 | ||
2961 | if (max_seg_s != queue_max_segment_size(mdev->rq_queue)) | 3020 | if (max_bio_size != queue_max_hw_sectors(mdev->rq_queue) << 9) |
2962 | drbd_setup_queue_param(mdev, max_seg_s); | 3021 | drbd_setup_queue_param(mdev, max_bio_size); |
2963 | 3022 | ||
2964 | drbd_setup_order_type(mdev, be16_to_cpu(p->queue_order_type)); | 3023 | drbd_setup_order_type(mdev, be16_to_cpu(p->queue_order_type)); |
2965 | put_ldev(mdev); | 3024 | put_ldev(mdev); |
@@ -2985,14 +3044,14 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
2985 | } | 3044 | } |
2986 | } | 3045 | } |
2987 | 3046 | ||
2988 | return TRUE; | 3047 | return true; |
2989 | } | 3048 | } |
2990 | 3049 | ||
2991 | static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) | 3050 | static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) |
2992 | { | 3051 | { |
2993 | struct p_uuids *p = &mdev->data.rbuf.uuids; | 3052 | struct p_uuids *p = &mdev->data.rbuf.uuids; |
2994 | u64 *p_uuid; | 3053 | u64 *p_uuid; |
2995 | int i; | 3054 | int i, updated_uuids = 0; |
2996 | 3055 | ||
2997 | p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO); | 3056 | p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO); |
2998 | 3057 | ||
@@ -3009,7 +3068,7 @@ static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
3009 | dev_err(DEV, "Can only connect to data with current UUID=%016llX\n", | 3068 | dev_err(DEV, "Can only connect to data with current UUID=%016llX\n", |
3010 | (unsigned long long)mdev->ed_uuid); | 3069 | (unsigned long long)mdev->ed_uuid); |
3011 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); | 3070 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); |
3012 | return FALSE; | 3071 | return false; |
3013 | } | 3072 | } |
3014 | 3073 | ||
3015 | if (get_ldev(mdev)) { | 3074 | if (get_ldev(mdev)) { |
@@ -3021,19 +3080,21 @@ static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
3021 | if (skip_initial_sync) { | 3080 | if (skip_initial_sync) { |
3022 | dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n"); | 3081 | dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n"); |
3023 | drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write, | 3082 | drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write, |
3024 | "clear_n_write from receive_uuids"); | 3083 | "clear_n_write from receive_uuids", |
3084 | BM_LOCKED_TEST_ALLOWED); | ||
3025 | _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]); | 3085 | _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]); |
3026 | _drbd_uuid_set(mdev, UI_BITMAP, 0); | 3086 | _drbd_uuid_set(mdev, UI_BITMAP, 0); |
3027 | _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), | 3087 | _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), |
3028 | CS_VERBOSE, NULL); | 3088 | CS_VERBOSE, NULL); |
3029 | drbd_md_sync(mdev); | 3089 | drbd_md_sync(mdev); |
3090 | updated_uuids = 1; | ||
3030 | } | 3091 | } |
3031 | put_ldev(mdev); | 3092 | put_ldev(mdev); |
3032 | } else if (mdev->state.disk < D_INCONSISTENT && | 3093 | } else if (mdev->state.disk < D_INCONSISTENT && |
3033 | mdev->state.role == R_PRIMARY) { | 3094 | mdev->state.role == R_PRIMARY) { |
3034 | /* I am a diskless primary, the peer just created a new current UUID | 3095 | /* I am a diskless primary, the peer just created a new current UUID |
3035 | for me. */ | 3096 | for me. */ |
3036 | drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]); | 3097 | updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]); |
3037 | } | 3098 | } |
3038 | 3099 | ||
3039 | /* Before we test for the disk state, we should wait until an eventually | 3100 | /* Before we test for the disk state, we should wait until an eventually |
@@ -3042,9 +3103,12 @@ static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
3042 | new disk state... */ | 3103 | new disk state... */ |
3043 | wait_event(mdev->misc_wait, !test_bit(CLUSTER_ST_CHANGE, &mdev->flags)); | 3104 | wait_event(mdev->misc_wait, !test_bit(CLUSTER_ST_CHANGE, &mdev->flags)); |
3044 | if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT) | 3105 | if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT) |
3045 | drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]); | 3106 | updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]); |
3046 | 3107 | ||
3047 | return TRUE; | 3108 | if (updated_uuids) |
3109 | drbd_print_uuids(mdev, "receiver updated UUIDs to"); | ||
3110 | |||
3111 | return true; | ||
3048 | } | 3112 | } |
3049 | 3113 | ||
3050 | /** | 3114 | /** |
@@ -3081,7 +3145,7 @@ static int receive_req_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsi | |||
3081 | { | 3145 | { |
3082 | struct p_req_state *p = &mdev->data.rbuf.req_state; | 3146 | struct p_req_state *p = &mdev->data.rbuf.req_state; |
3083 | union drbd_state mask, val; | 3147 | union drbd_state mask, val; |
3084 | int rv; | 3148 | enum drbd_state_rv rv; |
3085 | 3149 | ||
3086 | mask.i = be32_to_cpu(p->mask); | 3150 | mask.i = be32_to_cpu(p->mask); |
3087 | val.i = be32_to_cpu(p->val); | 3151 | val.i = be32_to_cpu(p->val); |
@@ -3089,7 +3153,7 @@ static int receive_req_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsi | |||
3089 | if (test_bit(DISCARD_CONCURRENT, &mdev->flags) && | 3153 | if (test_bit(DISCARD_CONCURRENT, &mdev->flags) && |
3090 | test_bit(CLUSTER_ST_CHANGE, &mdev->flags)) { | 3154 | test_bit(CLUSTER_ST_CHANGE, &mdev->flags)) { |
3091 | drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG); | 3155 | drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG); |
3092 | return TRUE; | 3156 | return true; |
3093 | } | 3157 | } |
3094 | 3158 | ||
3095 | mask = convert_state(mask); | 3159 | mask = convert_state(mask); |
@@ -3100,7 +3164,7 @@ static int receive_req_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsi | |||
3100 | drbd_send_sr_reply(mdev, rv); | 3164 | drbd_send_sr_reply(mdev, rv); |
3101 | drbd_md_sync(mdev); | 3165 | drbd_md_sync(mdev); |
3102 | 3166 | ||
3103 | return TRUE; | 3167 | return true; |
3104 | } | 3168 | } |
3105 | 3169 | ||
3106 | static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) | 3170 | static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) |
@@ -3145,7 +3209,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
3145 | peer_state.conn == C_CONNECTED) { | 3209 | peer_state.conn == C_CONNECTED) { |
3146 | if (drbd_bm_total_weight(mdev) <= mdev->rs_failed) | 3210 | if (drbd_bm_total_weight(mdev) <= mdev->rs_failed) |
3147 | drbd_resync_finished(mdev); | 3211 | drbd_resync_finished(mdev); |
3148 | return TRUE; | 3212 | return true; |
3149 | } | 3213 | } |
3150 | } | 3214 | } |
3151 | 3215 | ||
@@ -3161,6 +3225,9 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
3161 | if (ns.conn == C_WF_REPORT_PARAMS) | 3225 | if (ns.conn == C_WF_REPORT_PARAMS) |
3162 | ns.conn = C_CONNECTED; | 3226 | ns.conn = C_CONNECTED; |
3163 | 3227 | ||
3228 | if (peer_state.conn == C_AHEAD) | ||
3229 | ns.conn = C_BEHIND; | ||
3230 | |||
3164 | if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING && | 3231 | if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING && |
3165 | get_ldev_if_state(mdev, D_NEGOTIATING)) { | 3232 | get_ldev_if_state(mdev, D_NEGOTIATING)) { |
3166 | int cr; /* consider resync */ | 3233 | int cr; /* consider resync */ |
@@ -3195,10 +3262,10 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
3195 | real_peer_disk = D_DISKLESS; | 3262 | real_peer_disk = D_DISKLESS; |
3196 | } else { | 3263 | } else { |
3197 | if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags)) | 3264 | if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags)) |
3198 | return FALSE; | 3265 | return false; |
3199 | D_ASSERT(os.conn == C_WF_REPORT_PARAMS); | 3266 | D_ASSERT(os.conn == C_WF_REPORT_PARAMS); |
3200 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); | 3267 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); |
3201 | return FALSE; | 3268 | return false; |
3202 | } | 3269 | } |
3203 | } | 3270 | } |
3204 | } | 3271 | } |
@@ -3223,7 +3290,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
3223 | drbd_uuid_new_current(mdev); | 3290 | drbd_uuid_new_current(mdev); |
3224 | clear_bit(NEW_CUR_UUID, &mdev->flags); | 3291 | clear_bit(NEW_CUR_UUID, &mdev->flags); |
3225 | drbd_force_state(mdev, NS2(conn, C_PROTOCOL_ERROR, susp, 0)); | 3292 | drbd_force_state(mdev, NS2(conn, C_PROTOCOL_ERROR, susp, 0)); |
3226 | return FALSE; | 3293 | return false; |
3227 | } | 3294 | } |
3228 | rv = _drbd_set_state(mdev, ns, cs_flags, NULL); | 3295 | rv = _drbd_set_state(mdev, ns, cs_flags, NULL); |
3229 | ns = mdev->state; | 3296 | ns = mdev->state; |
@@ -3231,7 +3298,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
3231 | 3298 | ||
3232 | if (rv < SS_SUCCESS) { | 3299 | if (rv < SS_SUCCESS) { |
3233 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); | 3300 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); |
3234 | return FALSE; | 3301 | return false; |
3235 | } | 3302 | } |
3236 | 3303 | ||
3237 | if (os.conn > C_WF_REPORT_PARAMS) { | 3304 | if (os.conn > C_WF_REPORT_PARAMS) { |
@@ -3249,7 +3316,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
3249 | 3316 | ||
3250 | drbd_md_sync(mdev); /* update connected indicator, la_size, ... */ | 3317 | drbd_md_sync(mdev); /* update connected indicator, la_size, ... */ |
3251 | 3318 | ||
3252 | return TRUE; | 3319 | return true; |
3253 | } | 3320 | } |
3254 | 3321 | ||
3255 | static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) | 3322 | static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) |
@@ -3258,6 +3325,7 @@ static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packets cmd, unsi | |||
3258 | 3325 | ||
3259 | wait_event(mdev->misc_wait, | 3326 | wait_event(mdev->misc_wait, |
3260 | mdev->state.conn == C_WF_SYNC_UUID || | 3327 | mdev->state.conn == C_WF_SYNC_UUID || |
3328 | mdev->state.conn == C_BEHIND || | ||
3261 | mdev->state.conn < C_CONNECTED || | 3329 | mdev->state.conn < C_CONNECTED || |
3262 | mdev->state.disk < D_NEGOTIATING); | 3330 | mdev->state.disk < D_NEGOTIATING); |
3263 | 3331 | ||
@@ -3269,32 +3337,42 @@ static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packets cmd, unsi | |||
3269 | _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid)); | 3337 | _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid)); |
3270 | _drbd_uuid_set(mdev, UI_BITMAP, 0UL); | 3338 | _drbd_uuid_set(mdev, UI_BITMAP, 0UL); |
3271 | 3339 | ||
3340 | drbd_print_uuids(mdev, "updated sync uuid"); | ||
3272 | drbd_start_resync(mdev, C_SYNC_TARGET); | 3341 | drbd_start_resync(mdev, C_SYNC_TARGET); |
3273 | 3342 | ||
3274 | put_ldev(mdev); | 3343 | put_ldev(mdev); |
3275 | } else | 3344 | } else |
3276 | dev_err(DEV, "Ignoring SyncUUID packet!\n"); | 3345 | dev_err(DEV, "Ignoring SyncUUID packet!\n"); |
3277 | 3346 | ||
3278 | return TRUE; | 3347 | return true; |
3279 | } | 3348 | } |
3280 | 3349 | ||
3281 | enum receive_bitmap_ret { OK, DONE, FAILED }; | 3350 | /** |
3282 | 3351 | * receive_bitmap_plain | |
3283 | static enum receive_bitmap_ret | 3352 | * |
3353 | * Return 0 when done, 1 when another iteration is needed, and a negative error | ||
3354 | * code upon failure. | ||
3355 | */ | ||
3356 | static int | ||
3284 | receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size, | 3357 | receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size, |
3285 | unsigned long *buffer, struct bm_xfer_ctx *c) | 3358 | unsigned long *buffer, struct bm_xfer_ctx *c) |
3286 | { | 3359 | { |
3287 | unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset); | 3360 | unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset); |
3288 | unsigned want = num_words * sizeof(long); | 3361 | unsigned want = num_words * sizeof(long); |
3362 | int err; | ||
3289 | 3363 | ||
3290 | if (want != data_size) { | 3364 | if (want != data_size) { |
3291 | dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size); | 3365 | dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size); |
3292 | return FAILED; | 3366 | return -EIO; |
3293 | } | 3367 | } |
3294 | if (want == 0) | 3368 | if (want == 0) |
3295 | return DONE; | 3369 | return 0; |
3296 | if (drbd_recv(mdev, buffer, want) != want) | 3370 | err = drbd_recv(mdev, buffer, want); |
3297 | return FAILED; | 3371 | if (err != want) { |
3372 | if (err >= 0) | ||
3373 | err = -EIO; | ||
3374 | return err; | ||
3375 | } | ||
3298 | 3376 | ||
3299 | drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer); | 3377 | drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer); |
3300 | 3378 | ||
@@ -3303,10 +3381,16 @@ receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size, | |||
3303 | if (c->bit_offset > c->bm_bits) | 3381 | if (c->bit_offset > c->bm_bits) |
3304 | c->bit_offset = c->bm_bits; | 3382 | c->bit_offset = c->bm_bits; |
3305 | 3383 | ||
3306 | return OK; | 3384 | return 1; |
3307 | } | 3385 | } |
3308 | 3386 | ||
3309 | static enum receive_bitmap_ret | 3387 | /** |
3388 | * recv_bm_rle_bits | ||
3389 | * | ||
3390 | * Return 0 when done, 1 when another iteration is needed, and a negative error | ||
3391 | * code upon failure. | ||
3392 | */ | ||
3393 | static int | ||
3310 | recv_bm_rle_bits(struct drbd_conf *mdev, | 3394 | recv_bm_rle_bits(struct drbd_conf *mdev, |
3311 | struct p_compressed_bm *p, | 3395 | struct p_compressed_bm *p, |
3312 | struct bm_xfer_ctx *c) | 3396 | struct bm_xfer_ctx *c) |
@@ -3326,18 +3410,18 @@ recv_bm_rle_bits(struct drbd_conf *mdev, | |||
3326 | 3410 | ||
3327 | bits = bitstream_get_bits(&bs, &look_ahead, 64); | 3411 | bits = bitstream_get_bits(&bs, &look_ahead, 64); |
3328 | if (bits < 0) | 3412 | if (bits < 0) |
3329 | return FAILED; | 3413 | return -EIO; |
3330 | 3414 | ||
3331 | for (have = bits; have > 0; s += rl, toggle = !toggle) { | 3415 | for (have = bits; have > 0; s += rl, toggle = !toggle) { |
3332 | bits = vli_decode_bits(&rl, look_ahead); | 3416 | bits = vli_decode_bits(&rl, look_ahead); |
3333 | if (bits <= 0) | 3417 | if (bits <= 0) |
3334 | return FAILED; | 3418 | return -EIO; |
3335 | 3419 | ||
3336 | if (toggle) { | 3420 | if (toggle) { |
3337 | e = s + rl -1; | 3421 | e = s + rl -1; |
3338 | if (e >= c->bm_bits) { | 3422 | if (e >= c->bm_bits) { |
3339 | dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e); | 3423 | dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e); |
3340 | return FAILED; | 3424 | return -EIO; |
3341 | } | 3425 | } |
3342 | _drbd_bm_set_bits(mdev, s, e); | 3426 | _drbd_bm_set_bits(mdev, s, e); |
3343 | } | 3427 | } |
@@ -3347,14 +3431,14 @@ recv_bm_rle_bits(struct drbd_conf *mdev, | |||
3347 | have, bits, look_ahead, | 3431 | have, bits, look_ahead, |
3348 | (unsigned int)(bs.cur.b - p->code), | 3432 | (unsigned int)(bs.cur.b - p->code), |
3349 | (unsigned int)bs.buf_len); | 3433 | (unsigned int)bs.buf_len); |
3350 | return FAILED; | 3434 | return -EIO; |
3351 | } | 3435 | } |
3352 | look_ahead >>= bits; | 3436 | look_ahead >>= bits; |
3353 | have -= bits; | 3437 | have -= bits; |
3354 | 3438 | ||
3355 | bits = bitstream_get_bits(&bs, &tmp, 64 - have); | 3439 | bits = bitstream_get_bits(&bs, &tmp, 64 - have); |
3356 | if (bits < 0) | 3440 | if (bits < 0) |
3357 | return FAILED; | 3441 | return -EIO; |
3358 | look_ahead |= tmp << have; | 3442 | look_ahead |= tmp << have; |
3359 | have += bits; | 3443 | have += bits; |
3360 | } | 3444 | } |
@@ -3362,10 +3446,16 @@ recv_bm_rle_bits(struct drbd_conf *mdev, | |||
3362 | c->bit_offset = s; | 3446 | c->bit_offset = s; |
3363 | bm_xfer_ctx_bit_to_word_offset(c); | 3447 | bm_xfer_ctx_bit_to_word_offset(c); |
3364 | 3448 | ||
3365 | return (s == c->bm_bits) ? DONE : OK; | 3449 | return (s != c->bm_bits); |
3366 | } | 3450 | } |
3367 | 3451 | ||
3368 | static enum receive_bitmap_ret | 3452 | /** |
3453 | * decode_bitmap_c | ||
3454 | * | ||
3455 | * Return 0 when done, 1 when another iteration is needed, and a negative error | ||
3456 | * code upon failure. | ||
3457 | */ | ||
3458 | static int | ||
3369 | decode_bitmap_c(struct drbd_conf *mdev, | 3459 | decode_bitmap_c(struct drbd_conf *mdev, |
3370 | struct p_compressed_bm *p, | 3460 | struct p_compressed_bm *p, |
3371 | struct bm_xfer_ctx *c) | 3461 | struct bm_xfer_ctx *c) |
@@ -3379,7 +3469,7 @@ decode_bitmap_c(struct drbd_conf *mdev, | |||
3379 | 3469 | ||
3380 | dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding); | 3470 | dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding); |
3381 | drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); | 3471 | drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); |
3382 | return FAILED; | 3472 | return -EIO; |
3383 | } | 3473 | } |
3384 | 3474 | ||
3385 | void INFO_bm_xfer_stats(struct drbd_conf *mdev, | 3475 | void INFO_bm_xfer_stats(struct drbd_conf *mdev, |
@@ -3428,13 +3518,13 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigne | |||
3428 | { | 3518 | { |
3429 | struct bm_xfer_ctx c; | 3519 | struct bm_xfer_ctx c; |
3430 | void *buffer; | 3520 | void *buffer; |
3431 | enum receive_bitmap_ret ret; | 3521 | int err; |
3432 | int ok = FALSE; | 3522 | int ok = false; |
3433 | struct p_header80 *h = &mdev->data.rbuf.header.h80; | 3523 | struct p_header80 *h = &mdev->data.rbuf.header.h80; |
3434 | 3524 | ||
3435 | wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt)); | 3525 | drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED); |
3436 | 3526 | /* you are supposed to send additional out-of-sync information | |
3437 | drbd_bm_lock(mdev, "receive bitmap"); | 3527 | * if you actually set bits during this phase */ |
3438 | 3528 | ||
3439 | /* maybe we should use some per thread scratch page, | 3529 | /* maybe we should use some per thread scratch page, |
3440 | * and allocate that during initial device creation? */ | 3530 | * and allocate that during initial device creation? */ |
@@ -3449,9 +3539,9 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigne | |||
3449 | .bm_words = drbd_bm_words(mdev), | 3539 | .bm_words = drbd_bm_words(mdev), |
3450 | }; | 3540 | }; |
3451 | 3541 | ||
3452 | do { | 3542 | for(;;) { |
3453 | if (cmd == P_BITMAP) { | 3543 | if (cmd == P_BITMAP) { |
3454 | ret = receive_bitmap_plain(mdev, data_size, buffer, &c); | 3544 | err = receive_bitmap_plain(mdev, data_size, buffer, &c); |
3455 | } else if (cmd == P_COMPRESSED_BITMAP) { | 3545 | } else if (cmd == P_COMPRESSED_BITMAP) { |
3456 | /* MAYBE: sanity check that we speak proto >= 90, | 3546 | /* MAYBE: sanity check that we speak proto >= 90, |
3457 | * and the feature is enabled! */ | 3547 | * and the feature is enabled! */ |
@@ -3468,9 +3558,9 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigne | |||
3468 | goto out; | 3558 | goto out; |
3469 | if (data_size <= (sizeof(*p) - sizeof(p->head))) { | 3559 | if (data_size <= (sizeof(*p) - sizeof(p->head))) { |
3470 | dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size); | 3560 | dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size); |
3471 | return FAILED; | 3561 | goto out; |
3472 | } | 3562 | } |
3473 | ret = decode_bitmap_c(mdev, p, &c); | 3563 | err = decode_bitmap_c(mdev, p, &c); |
3474 | } else { | 3564 | } else { |
3475 | dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd); | 3565 | dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd); |
3476 | goto out; | 3566 | goto out; |
@@ -3479,24 +3569,26 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigne | |||
3479 | c.packets[cmd == P_BITMAP]++; | 3569 | c.packets[cmd == P_BITMAP]++; |
3480 | c.bytes[cmd == P_BITMAP] += sizeof(struct p_header80) + data_size; | 3570 | c.bytes[cmd == P_BITMAP] += sizeof(struct p_header80) + data_size; |
3481 | 3571 | ||
3482 | if (ret != OK) | 3572 | if (err <= 0) { |
3573 | if (err < 0) | ||
3574 | goto out; | ||
3483 | break; | 3575 | break; |
3484 | 3576 | } | |
3485 | if (!drbd_recv_header(mdev, &cmd, &data_size)) | 3577 | if (!drbd_recv_header(mdev, &cmd, &data_size)) |
3486 | goto out; | 3578 | goto out; |
3487 | } while (ret == OK); | 3579 | } |
3488 | if (ret == FAILED) | ||
3489 | goto out; | ||
3490 | 3580 | ||
3491 | INFO_bm_xfer_stats(mdev, "receive", &c); | 3581 | INFO_bm_xfer_stats(mdev, "receive", &c); |
3492 | 3582 | ||
3493 | if (mdev->state.conn == C_WF_BITMAP_T) { | 3583 | if (mdev->state.conn == C_WF_BITMAP_T) { |
3584 | enum drbd_state_rv rv; | ||
3585 | |||
3494 | ok = !drbd_send_bitmap(mdev); | 3586 | ok = !drbd_send_bitmap(mdev); |
3495 | if (!ok) | 3587 | if (!ok) |
3496 | goto out; | 3588 | goto out; |
3497 | /* Omit CS_ORDERED with this state transition to avoid deadlocks. */ | 3589 | /* Omit CS_ORDERED with this state transition to avoid deadlocks. */ |
3498 | ok = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE); | 3590 | rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE); |
3499 | D_ASSERT(ok == SS_SUCCESS); | 3591 | D_ASSERT(rv == SS_SUCCESS); |
3500 | } else if (mdev->state.conn != C_WF_BITMAP_S) { | 3592 | } else if (mdev->state.conn != C_WF_BITMAP_S) { |
3501 | /* admin may have requested C_DISCONNECTING, | 3593 | /* admin may have requested C_DISCONNECTING, |
3502 | * other threads may have noticed network errors */ | 3594 | * other threads may have noticed network errors */ |
@@ -3504,7 +3596,7 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigne | |||
3504 | drbd_conn_str(mdev->state.conn)); | 3596 | drbd_conn_str(mdev->state.conn)); |
3505 | } | 3597 | } |
3506 | 3598 | ||
3507 | ok = TRUE; | 3599 | ok = true; |
3508 | out: | 3600 | out: |
3509 | drbd_bm_unlock(mdev); | 3601 | drbd_bm_unlock(mdev); |
3510 | if (ok && mdev->state.conn == C_WF_BITMAP_S) | 3602 | if (ok && mdev->state.conn == C_WF_BITMAP_S) |
@@ -3538,7 +3630,26 @@ static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packets cmd, u | |||
3538 | * with the data requests being unplugged */ | 3630 | * with the data requests being unplugged */ |
3539 | drbd_tcp_quickack(mdev->data.socket); | 3631 | drbd_tcp_quickack(mdev->data.socket); |
3540 | 3632 | ||
3541 | return TRUE; | 3633 | return true; |
3634 | } | ||
3635 | |||
3636 | static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) | ||
3637 | { | ||
3638 | struct p_block_desc *p = &mdev->data.rbuf.block_desc; | ||
3639 | |||
3640 | switch (mdev->state.conn) { | ||
3641 | case C_WF_SYNC_UUID: | ||
3642 | case C_WF_BITMAP_T: | ||
3643 | case C_BEHIND: | ||
3644 | break; | ||
3645 | default: | ||
3646 | dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n", | ||
3647 | drbd_conn_str(mdev->state.conn)); | ||
3648 | } | ||
3649 | |||
3650 | drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize)); | ||
3651 | |||
3652 | return true; | ||
3542 | } | 3653 | } |
3543 | 3654 | ||
3544 | typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packets cmd, unsigned int to_receive); | 3655 | typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packets cmd, unsigned int to_receive); |
@@ -3571,6 +3682,7 @@ static struct data_cmd drbd_cmd_handler[] = { | |||
3571 | [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest }, | 3682 | [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest }, |
3572 | [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest }, | 3683 | [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest }, |
3573 | [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip }, | 3684 | [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip }, |
3685 | [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync }, | ||
3574 | /* anything missing from this table is in | 3686 | /* anything missing from this table is in |
3575 | * the asender_tbl, see get_asender_cmd */ | 3687 | * the asender_tbl, see get_asender_cmd */ |
3576 | [P_MAX_CMD] = { 0, 0, NULL }, | 3688 | [P_MAX_CMD] = { 0, 0, NULL }, |
@@ -3610,7 +3722,8 @@ static void drbdd(struct drbd_conf *mdev) | |||
3610 | if (shs) { | 3722 | if (shs) { |
3611 | rv = drbd_recv(mdev, &header->h80.payload, shs); | 3723 | rv = drbd_recv(mdev, &header->h80.payload, shs); |
3612 | if (unlikely(rv != shs)) { | 3724 | if (unlikely(rv != shs)) { |
3613 | dev_err(DEV, "short read while reading sub header: rv=%d\n", rv); | 3725 | if (!signal_pending(current)) |
3726 | dev_warn(DEV, "short read while reading sub header: rv=%d\n", rv); | ||
3614 | goto err_out; | 3727 | goto err_out; |
3615 | } | 3728 | } |
3616 | } | 3729 | } |
@@ -3682,9 +3795,6 @@ static void drbd_disconnect(struct drbd_conf *mdev) | |||
3682 | 3795 | ||
3683 | if (mdev->state.conn == C_STANDALONE) | 3796 | if (mdev->state.conn == C_STANDALONE) |
3684 | return; | 3797 | return; |
3685 | if (mdev->state.conn >= C_WF_CONNECTION) | ||
3686 | dev_err(DEV, "ASSERT FAILED cstate = %s, expected < WFConnection\n", | ||
3687 | drbd_conn_str(mdev->state.conn)); | ||
3688 | 3798 | ||
3689 | /* asender does not clean up anything. it must not interfere, either */ | 3799 | /* asender does not clean up anything. it must not interfere, either */ |
3690 | drbd_thread_stop(&mdev->asender); | 3800 | drbd_thread_stop(&mdev->asender); |
@@ -3713,6 +3823,8 @@ static void drbd_disconnect(struct drbd_conf *mdev) | |||
3713 | atomic_set(&mdev->rs_pending_cnt, 0); | 3823 | atomic_set(&mdev->rs_pending_cnt, 0); |
3714 | wake_up(&mdev->misc_wait); | 3824 | wake_up(&mdev->misc_wait); |
3715 | 3825 | ||
3826 | del_timer(&mdev->request_timer); | ||
3827 | |||
3716 | /* make sure syncer is stopped and w_resume_next_sg queued */ | 3828 | /* make sure syncer is stopped and w_resume_next_sg queued */ |
3717 | del_timer_sync(&mdev->resync_timer); | 3829 | del_timer_sync(&mdev->resync_timer); |
3718 | resync_timer_fn((unsigned long)mdev); | 3830 | resync_timer_fn((unsigned long)mdev); |
@@ -3758,13 +3870,6 @@ static void drbd_disconnect(struct drbd_conf *mdev) | |||
3758 | if (os.conn == C_DISCONNECTING) { | 3870 | if (os.conn == C_DISCONNECTING) { |
3759 | wait_event(mdev->net_cnt_wait, atomic_read(&mdev->net_cnt) == 0); | 3871 | wait_event(mdev->net_cnt_wait, atomic_read(&mdev->net_cnt) == 0); |
3760 | 3872 | ||
3761 | if (!is_susp(mdev->state)) { | ||
3762 | /* we must not free the tl_hash | ||
3763 | * while application io is still on the fly */ | ||
3764 | wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt)); | ||
3765 | drbd_free_tl_hash(mdev); | ||
3766 | } | ||
3767 | |||
3768 | crypto_free_hash(mdev->cram_hmac_tfm); | 3873 | crypto_free_hash(mdev->cram_hmac_tfm); |
3769 | mdev->cram_hmac_tfm = NULL; | 3874 | mdev->cram_hmac_tfm = NULL; |
3770 | 3875 | ||
@@ -3773,6 +3878,10 @@ static void drbd_disconnect(struct drbd_conf *mdev) | |||
3773 | drbd_request_state(mdev, NS(conn, C_STANDALONE)); | 3878 | drbd_request_state(mdev, NS(conn, C_STANDALONE)); |
3774 | } | 3879 | } |
3775 | 3880 | ||
3881 | /* serialize with bitmap writeout triggered by the state change, | ||
3882 | * if any. */ | ||
3883 | wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); | ||
3884 | |||
3776 | /* tcp_close and release of sendpage pages can be deferred. I don't | 3885 | /* tcp_close and release of sendpage pages can be deferred. I don't |
3777 | * want to use SO_LINGER, because apparently it can be deferred for | 3886 | * want to use SO_LINGER, because apparently it can be deferred for |
3778 | * more than 20 seconds (longest time I checked). | 3887 | * more than 20 seconds (longest time I checked). |
@@ -3873,7 +3982,8 @@ static int drbd_do_handshake(struct drbd_conf *mdev) | |||
3873 | rv = drbd_recv(mdev, &p->head.payload, expect); | 3982 | rv = drbd_recv(mdev, &p->head.payload, expect); |
3874 | 3983 | ||
3875 | if (rv != expect) { | 3984 | if (rv != expect) { |
3876 | dev_err(DEV, "short read receiving handshake packet: l=%u\n", rv); | 3985 | if (!signal_pending(current)) |
3986 | dev_warn(DEV, "short read receiving handshake packet: l=%u\n", rv); | ||
3877 | return 0; | 3987 | return 0; |
3878 | } | 3988 | } |
3879 | 3989 | ||
@@ -3975,7 +4085,8 @@ static int drbd_do_auth(struct drbd_conf *mdev) | |||
3975 | rv = drbd_recv(mdev, peers_ch, length); | 4085 | rv = drbd_recv(mdev, peers_ch, length); |
3976 | 4086 | ||
3977 | if (rv != length) { | 4087 | if (rv != length) { |
3978 | dev_err(DEV, "short read AuthChallenge: l=%u\n", rv); | 4088 | if (!signal_pending(current)) |
4089 | dev_warn(DEV, "short read AuthChallenge: l=%u\n", rv); | ||
3979 | rv = 0; | 4090 | rv = 0; |
3980 | goto fail; | 4091 | goto fail; |
3981 | } | 4092 | } |
@@ -4022,7 +4133,8 @@ static int drbd_do_auth(struct drbd_conf *mdev) | |||
4022 | rv = drbd_recv(mdev, response , resp_size); | 4133 | rv = drbd_recv(mdev, response , resp_size); |
4023 | 4134 | ||
4024 | if (rv != resp_size) { | 4135 | if (rv != resp_size) { |
4025 | dev_err(DEV, "short read receiving AuthResponse: l=%u\n", rv); | 4136 | if (!signal_pending(current)) |
4137 | dev_warn(DEV, "short read receiving AuthResponse: l=%u\n", rv); | ||
4026 | rv = 0; | 4138 | rv = 0; |
4027 | goto fail; | 4139 | goto fail; |
4028 | } | 4140 | } |
@@ -4074,8 +4186,7 @@ int drbdd_init(struct drbd_thread *thi) | |||
4074 | h = drbd_connect(mdev); | 4186 | h = drbd_connect(mdev); |
4075 | if (h == 0) { | 4187 | if (h == 0) { |
4076 | drbd_disconnect(mdev); | 4188 | drbd_disconnect(mdev); |
4077 | __set_current_state(TASK_INTERRUPTIBLE); | 4189 | schedule_timeout_interruptible(HZ); |
4078 | schedule_timeout(HZ); | ||
4079 | } | 4190 | } |
4080 | if (h == -1) { | 4191 | if (h == -1) { |
4081 | dev_warn(DEV, "Discarding network configuration.\n"); | 4192 | dev_warn(DEV, "Discarding network configuration.\n"); |
@@ -4113,7 +4224,7 @@ static int got_RqSReply(struct drbd_conf *mdev, struct p_header80 *h) | |||
4113 | } | 4224 | } |
4114 | wake_up(&mdev->state_wait); | 4225 | wake_up(&mdev->state_wait); |
4115 | 4226 | ||
4116 | return TRUE; | 4227 | return true; |
4117 | } | 4228 | } |
4118 | 4229 | ||
4119 | static int got_Ping(struct drbd_conf *mdev, struct p_header80 *h) | 4230 | static int got_Ping(struct drbd_conf *mdev, struct p_header80 *h) |
@@ -4129,7 +4240,7 @@ static int got_PingAck(struct drbd_conf *mdev, struct p_header80 *h) | |||
4129 | if (!test_and_set_bit(GOT_PING_ACK, &mdev->flags)) | 4240 | if (!test_and_set_bit(GOT_PING_ACK, &mdev->flags)) |
4130 | wake_up(&mdev->misc_wait); | 4241 | wake_up(&mdev->misc_wait); |
4131 | 4242 | ||
4132 | return TRUE; | 4243 | return true; |
4133 | } | 4244 | } |
4134 | 4245 | ||
4135 | static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h) | 4246 | static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h) |
@@ -4152,7 +4263,7 @@ static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h) | |||
4152 | dec_rs_pending(mdev); | 4263 | dec_rs_pending(mdev); |
4153 | atomic_add(blksize >> 9, &mdev->rs_sect_in); | 4264 | atomic_add(blksize >> 9, &mdev->rs_sect_in); |
4154 | 4265 | ||
4155 | return TRUE; | 4266 | return true; |
4156 | } | 4267 | } |
4157 | 4268 | ||
4158 | /* when we receive the ACK for a write request, | 4269 | /* when we receive the ACK for a write request, |
@@ -4176,8 +4287,6 @@ static struct drbd_request *_ack_id_to_req(struct drbd_conf *mdev, | |||
4176 | return req; | 4287 | return req; |
4177 | } | 4288 | } |
4178 | } | 4289 | } |
4179 | dev_err(DEV, "_ack_id_to_req: failed to find req %p, sector %llus in list\n", | ||
4180 | (void *)(unsigned long)id, (unsigned long long)sector); | ||
4181 | return NULL; | 4290 | return NULL; |
4182 | } | 4291 | } |
4183 | 4292 | ||
@@ -4195,15 +4304,17 @@ static int validate_req_change_req_state(struct drbd_conf *mdev, | |||
4195 | req = validator(mdev, id, sector); | 4304 | req = validator(mdev, id, sector); |
4196 | if (unlikely(!req)) { | 4305 | if (unlikely(!req)) { |
4197 | spin_unlock_irq(&mdev->req_lock); | 4306 | spin_unlock_irq(&mdev->req_lock); |
4198 | dev_err(DEV, "%s: got a corrupt block_id/sector pair\n", func); | 4307 | |
4199 | return FALSE; | 4308 | dev_err(DEV, "%s: failed to find req %p, sector %llus\n", func, |
4309 | (void *)(unsigned long)id, (unsigned long long)sector); | ||
4310 | return false; | ||
4200 | } | 4311 | } |
4201 | __req_mod(req, what, &m); | 4312 | __req_mod(req, what, &m); |
4202 | spin_unlock_irq(&mdev->req_lock); | 4313 | spin_unlock_irq(&mdev->req_lock); |
4203 | 4314 | ||
4204 | if (m.bio) | 4315 | if (m.bio) |
4205 | complete_master_bio(mdev, &m); | 4316 | complete_master_bio(mdev, &m); |
4206 | return TRUE; | 4317 | return true; |
4207 | } | 4318 | } |
4208 | 4319 | ||
4209 | static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) | 4320 | static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) |
@@ -4218,7 +4329,7 @@ static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) | |||
4218 | if (is_syncer_block_id(p->block_id)) { | 4329 | if (is_syncer_block_id(p->block_id)) { |
4219 | drbd_set_in_sync(mdev, sector, blksize); | 4330 | drbd_set_in_sync(mdev, sector, blksize); |
4220 | dec_rs_pending(mdev); | 4331 | dec_rs_pending(mdev); |
4221 | return TRUE; | 4332 | return true; |
4222 | } | 4333 | } |
4223 | switch (be16_to_cpu(h->command)) { | 4334 | switch (be16_to_cpu(h->command)) { |
4224 | case P_RS_WRITE_ACK: | 4335 | case P_RS_WRITE_ACK: |
@@ -4239,7 +4350,7 @@ static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) | |||
4239 | break; | 4350 | break; |
4240 | default: | 4351 | default: |
4241 | D_ASSERT(0); | 4352 | D_ASSERT(0); |
4242 | return FALSE; | 4353 | return false; |
4243 | } | 4354 | } |
4244 | 4355 | ||
4245 | return validate_req_change_req_state(mdev, p->block_id, sector, | 4356 | return validate_req_change_req_state(mdev, p->block_id, sector, |
@@ -4250,20 +4361,44 @@ static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) | |||
4250 | { | 4361 | { |
4251 | struct p_block_ack *p = (struct p_block_ack *)h; | 4362 | struct p_block_ack *p = (struct p_block_ack *)h; |
4252 | sector_t sector = be64_to_cpu(p->sector); | 4363 | sector_t sector = be64_to_cpu(p->sector); |
4253 | 4364 | int size = be32_to_cpu(p->blksize); | |
4254 | if (__ratelimit(&drbd_ratelimit_state)) | 4365 | struct drbd_request *req; |
4255 | dev_warn(DEV, "Got NegAck packet. Peer is in troubles?\n"); | 4366 | struct bio_and_error m; |
4256 | 4367 | ||
4257 | update_peer_seq(mdev, be32_to_cpu(p->seq_num)); | 4368 | update_peer_seq(mdev, be32_to_cpu(p->seq_num)); |
4258 | 4369 | ||
4259 | if (is_syncer_block_id(p->block_id)) { | 4370 | if (is_syncer_block_id(p->block_id)) { |
4260 | int size = be32_to_cpu(p->blksize); | ||
4261 | dec_rs_pending(mdev); | 4371 | dec_rs_pending(mdev); |
4262 | drbd_rs_failed_io(mdev, sector, size); | 4372 | drbd_rs_failed_io(mdev, sector, size); |
4263 | return TRUE; | 4373 | return true; |
4264 | } | 4374 | } |
4265 | return validate_req_change_req_state(mdev, p->block_id, sector, | 4375 | |
4266 | _ack_id_to_req, __func__ , neg_acked); | 4376 | spin_lock_irq(&mdev->req_lock); |
4377 | req = _ack_id_to_req(mdev, p->block_id, sector); | ||
4378 | if (!req) { | ||
4379 | spin_unlock_irq(&mdev->req_lock); | ||
4380 | if (mdev->net_conf->wire_protocol == DRBD_PROT_A || | ||
4381 | mdev->net_conf->wire_protocol == DRBD_PROT_B) { | ||
4382 | /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs. | ||
4383 | The master bio might already be completed, therefore the | ||
4384 | request is no longer in the collision hash. | ||
4385 | => Do not try to validate block_id as request. */ | ||
4386 | /* In Protocol B we might already have got a P_RECV_ACK | ||
4387 | but then get a P_NEG_ACK after wards. */ | ||
4388 | drbd_set_out_of_sync(mdev, sector, size); | ||
4389 | return true; | ||
4390 | } else { | ||
4391 | dev_err(DEV, "%s: failed to find req %p, sector %llus\n", __func__, | ||
4392 | (void *)(unsigned long)p->block_id, (unsigned long long)sector); | ||
4393 | return false; | ||
4394 | } | ||
4395 | } | ||
4396 | __req_mod(req, neg_acked, &m); | ||
4397 | spin_unlock_irq(&mdev->req_lock); | ||
4398 | |||
4399 | if (m.bio) | ||
4400 | complete_master_bio(mdev, &m); | ||
4401 | return true; | ||
4267 | } | 4402 | } |
4268 | 4403 | ||
4269 | static int got_NegDReply(struct drbd_conf *mdev, struct p_header80 *h) | 4404 | static int got_NegDReply(struct drbd_conf *mdev, struct p_header80 *h) |
@@ -4294,11 +4429,20 @@ static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header80 *h) | |||
4294 | 4429 | ||
4295 | if (get_ldev_if_state(mdev, D_FAILED)) { | 4430 | if (get_ldev_if_state(mdev, D_FAILED)) { |
4296 | drbd_rs_complete_io(mdev, sector); | 4431 | drbd_rs_complete_io(mdev, sector); |
4297 | drbd_rs_failed_io(mdev, sector, size); | 4432 | switch (be16_to_cpu(h->command)) { |
4433 | case P_NEG_RS_DREPLY: | ||
4434 | drbd_rs_failed_io(mdev, sector, size); | ||
4435 | case P_RS_CANCEL: | ||
4436 | break; | ||
4437 | default: | ||
4438 | D_ASSERT(0); | ||
4439 | put_ldev(mdev); | ||
4440 | return false; | ||
4441 | } | ||
4298 | put_ldev(mdev); | 4442 | put_ldev(mdev); |
4299 | } | 4443 | } |
4300 | 4444 | ||
4301 | return TRUE; | 4445 | return true; |
4302 | } | 4446 | } |
4303 | 4447 | ||
4304 | static int got_BarrierAck(struct drbd_conf *mdev, struct p_header80 *h) | 4448 | static int got_BarrierAck(struct drbd_conf *mdev, struct p_header80 *h) |
@@ -4307,7 +4451,14 @@ static int got_BarrierAck(struct drbd_conf *mdev, struct p_header80 *h) | |||
4307 | 4451 | ||
4308 | tl_release(mdev, p->barrier, be32_to_cpu(p->set_size)); | 4452 | tl_release(mdev, p->barrier, be32_to_cpu(p->set_size)); |
4309 | 4453 | ||
4310 | return TRUE; | 4454 | if (mdev->state.conn == C_AHEAD && |
4455 | atomic_read(&mdev->ap_in_flight) == 0 && | ||
4456 | !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags)) { | ||
4457 | mdev->start_resync_timer.expires = jiffies + HZ; | ||
4458 | add_timer(&mdev->start_resync_timer); | ||
4459 | } | ||
4460 | |||
4461 | return true; | ||
4311 | } | 4462 | } |
4312 | 4463 | ||
4313 | static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h) | 4464 | static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h) |
@@ -4328,12 +4479,18 @@ static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h) | |||
4328 | ov_oos_print(mdev); | 4479 | ov_oos_print(mdev); |
4329 | 4480 | ||
4330 | if (!get_ldev(mdev)) | 4481 | if (!get_ldev(mdev)) |
4331 | return TRUE; | 4482 | return true; |
4332 | 4483 | ||
4333 | drbd_rs_complete_io(mdev, sector); | 4484 | drbd_rs_complete_io(mdev, sector); |
4334 | dec_rs_pending(mdev); | 4485 | dec_rs_pending(mdev); |
4335 | 4486 | ||
4336 | if (--mdev->ov_left == 0) { | 4487 | --mdev->ov_left; |
4488 | |||
4489 | /* let's advance progress step marks only for every other megabyte */ | ||
4490 | if ((mdev->ov_left & 0x200) == 0x200) | ||
4491 | drbd_advance_rs_marks(mdev, mdev->ov_left); | ||
4492 | |||
4493 | if (mdev->ov_left == 0) { | ||
4337 | w = kmalloc(sizeof(*w), GFP_NOIO); | 4494 | w = kmalloc(sizeof(*w), GFP_NOIO); |
4338 | if (w) { | 4495 | if (w) { |
4339 | w->cb = w_ov_finished; | 4496 | w->cb = w_ov_finished; |
@@ -4345,12 +4502,12 @@ static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h) | |||
4345 | } | 4502 | } |
4346 | } | 4503 | } |
4347 | put_ldev(mdev); | 4504 | put_ldev(mdev); |
4348 | return TRUE; | 4505 | return true; |
4349 | } | 4506 | } |
4350 | 4507 | ||
4351 | static int got_skip(struct drbd_conf *mdev, struct p_header80 *h) | 4508 | static int got_skip(struct drbd_conf *mdev, struct p_header80 *h) |
4352 | { | 4509 | { |
4353 | return TRUE; | 4510 | return true; |
4354 | } | 4511 | } |
4355 | 4512 | ||
4356 | struct asender_cmd { | 4513 | struct asender_cmd { |
@@ -4378,6 +4535,7 @@ static struct asender_cmd *get_asender_cmd(int cmd) | |||
4378 | [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply }, | 4535 | [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply }, |
4379 | [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync }, | 4536 | [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync }, |
4380 | [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip }, | 4537 | [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip }, |
4538 | [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply}, | ||
4381 | [P_MAX_CMD] = { 0, NULL }, | 4539 | [P_MAX_CMD] = { 0, NULL }, |
4382 | }; | 4540 | }; |
4383 | if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL) | 4541 | if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL) |
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index ad3fc6228f27..5c0c8be1bb0a 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c | |||
@@ -140,9 +140,14 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, | |||
140 | struct hlist_node *n; | 140 | struct hlist_node *n; |
141 | struct hlist_head *slot; | 141 | struct hlist_head *slot; |
142 | 142 | ||
143 | /* before we can signal completion to the upper layers, | 143 | /* Before we can signal completion to the upper layers, |
144 | * we may need to close the current epoch */ | 144 | * we may need to close the current epoch. |
145 | * We can skip this, if this request has not even been sent, because we | ||
146 | * did not have a fully established connection yet/anymore, during | ||
147 | * bitmap exchange, or while we are C_AHEAD due to congestion policy. | ||
148 | */ | ||
145 | if (mdev->state.conn >= C_CONNECTED && | 149 | if (mdev->state.conn >= C_CONNECTED && |
150 | (s & RQ_NET_SENT) != 0 && | ||
146 | req->epoch == mdev->newest_tle->br_number) | 151 | req->epoch == mdev->newest_tle->br_number) |
147 | queue_barrier(mdev); | 152 | queue_barrier(mdev); |
148 | 153 | ||
@@ -440,7 +445,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
440 | req->rq_state |= RQ_LOCAL_COMPLETED; | 445 | req->rq_state |= RQ_LOCAL_COMPLETED; |
441 | req->rq_state &= ~RQ_LOCAL_PENDING; | 446 | req->rq_state &= ~RQ_LOCAL_PENDING; |
442 | 447 | ||
443 | __drbd_chk_io_error(mdev, FALSE); | 448 | __drbd_chk_io_error(mdev, false); |
444 | _req_may_be_done_not_susp(req, m); | 449 | _req_may_be_done_not_susp(req, m); |
445 | put_ldev(mdev); | 450 | put_ldev(mdev); |
446 | break; | 451 | break; |
@@ -461,7 +466,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
461 | 466 | ||
462 | D_ASSERT(!(req->rq_state & RQ_NET_MASK)); | 467 | D_ASSERT(!(req->rq_state & RQ_NET_MASK)); |
463 | 468 | ||
464 | __drbd_chk_io_error(mdev, FALSE); | 469 | __drbd_chk_io_error(mdev, false); |
465 | put_ldev(mdev); | 470 | put_ldev(mdev); |
466 | 471 | ||
467 | /* no point in retrying if there is no good remote data, | 472 | /* no point in retrying if there is no good remote data, |
@@ -545,6 +550,14 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
545 | 550 | ||
546 | break; | 551 | break; |
547 | 552 | ||
553 | case queue_for_send_oos: | ||
554 | req->rq_state |= RQ_NET_QUEUED; | ||
555 | req->w.cb = w_send_oos; | ||
556 | drbd_queue_work(&mdev->data.work, &req->w); | ||
557 | break; | ||
558 | |||
559 | case oos_handed_to_network: | ||
560 | /* actually the same */ | ||
548 | case send_canceled: | 561 | case send_canceled: |
549 | /* treat it the same */ | 562 | /* treat it the same */ |
550 | case send_failed: | 563 | case send_failed: |
@@ -558,6 +571,9 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
558 | 571 | ||
559 | case handed_over_to_network: | 572 | case handed_over_to_network: |
560 | /* assert something? */ | 573 | /* assert something? */ |
574 | if (bio_data_dir(req->master_bio) == WRITE) | ||
575 | atomic_add(req->size>>9, &mdev->ap_in_flight); | ||
576 | |||
561 | if (bio_data_dir(req->master_bio) == WRITE && | 577 | if (bio_data_dir(req->master_bio) == WRITE && |
562 | mdev->net_conf->wire_protocol == DRBD_PROT_A) { | 578 | mdev->net_conf->wire_protocol == DRBD_PROT_A) { |
563 | /* this is what is dangerous about protocol A: | 579 | /* this is what is dangerous about protocol A: |
@@ -591,6 +607,9 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
591 | dec_ap_pending(mdev); | 607 | dec_ap_pending(mdev); |
592 | req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING); | 608 | req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING); |
593 | req->rq_state |= RQ_NET_DONE; | 609 | req->rq_state |= RQ_NET_DONE; |
610 | if (req->rq_state & RQ_NET_SENT && req->rq_state & RQ_WRITE) | ||
611 | atomic_sub(req->size>>9, &mdev->ap_in_flight); | ||
612 | |||
594 | /* if it is still queued, we may not complete it here. | 613 | /* if it is still queued, we may not complete it here. |
595 | * it will be canceled soon. */ | 614 | * it will be canceled soon. */ |
596 | if (!(req->rq_state & RQ_NET_QUEUED)) | 615 | if (!(req->rq_state & RQ_NET_QUEUED)) |
@@ -628,14 +647,17 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
628 | req->rq_state |= RQ_NET_OK; | 647 | req->rq_state |= RQ_NET_OK; |
629 | D_ASSERT(req->rq_state & RQ_NET_PENDING); | 648 | D_ASSERT(req->rq_state & RQ_NET_PENDING); |
630 | dec_ap_pending(mdev); | 649 | dec_ap_pending(mdev); |
650 | atomic_sub(req->size>>9, &mdev->ap_in_flight); | ||
631 | req->rq_state &= ~RQ_NET_PENDING; | 651 | req->rq_state &= ~RQ_NET_PENDING; |
632 | _req_may_be_done_not_susp(req, m); | 652 | _req_may_be_done_not_susp(req, m); |
633 | break; | 653 | break; |
634 | 654 | ||
635 | case neg_acked: | 655 | case neg_acked: |
636 | /* assert something? */ | 656 | /* assert something? */ |
637 | if (req->rq_state & RQ_NET_PENDING) | 657 | if (req->rq_state & RQ_NET_PENDING) { |
638 | dec_ap_pending(mdev); | 658 | dec_ap_pending(mdev); |
659 | atomic_sub(req->size>>9, &mdev->ap_in_flight); | ||
660 | } | ||
639 | req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING); | 661 | req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING); |
640 | 662 | ||
641 | req->rq_state |= RQ_NET_DONE; | 663 | req->rq_state |= RQ_NET_DONE; |
@@ -690,8 +712,11 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
690 | dev_err(DEV, "FIXME (barrier_acked but pending)\n"); | 712 | dev_err(DEV, "FIXME (barrier_acked but pending)\n"); |
691 | list_move(&req->tl_requests, &mdev->out_of_sequence_requests); | 713 | list_move(&req->tl_requests, &mdev->out_of_sequence_requests); |
692 | } | 714 | } |
693 | D_ASSERT(req->rq_state & RQ_NET_SENT); | 715 | if ((req->rq_state & RQ_NET_MASK) != 0) { |
694 | req->rq_state |= RQ_NET_DONE; | 716 | req->rq_state |= RQ_NET_DONE; |
717 | if (mdev->net_conf->wire_protocol == DRBD_PROT_A) | ||
718 | atomic_sub(req->size>>9, &mdev->ap_in_flight); | ||
719 | } | ||
695 | _req_may_be_done(req, m); /* Allowed while state.susp */ | 720 | _req_may_be_done(req, m); /* Allowed while state.susp */ |
696 | break; | 721 | break; |
697 | 722 | ||
@@ -738,14 +763,14 @@ static int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int s | |||
738 | return 0 == drbd_bm_count_bits(mdev, sbnr, ebnr); | 763 | return 0 == drbd_bm_count_bits(mdev, sbnr, ebnr); |
739 | } | 764 | } |
740 | 765 | ||
741 | static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) | 766 | static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time) |
742 | { | 767 | { |
743 | const int rw = bio_rw(bio); | 768 | const int rw = bio_rw(bio); |
744 | const int size = bio->bi_size; | 769 | const int size = bio->bi_size; |
745 | const sector_t sector = bio->bi_sector; | 770 | const sector_t sector = bio->bi_sector; |
746 | struct drbd_tl_epoch *b = NULL; | 771 | struct drbd_tl_epoch *b = NULL; |
747 | struct drbd_request *req; | 772 | struct drbd_request *req; |
748 | int local, remote; | 773 | int local, remote, send_oos = 0; |
749 | int err = -EIO; | 774 | int err = -EIO; |
750 | int ret = 0; | 775 | int ret = 0; |
751 | 776 | ||
@@ -759,6 +784,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) | |||
759 | bio_endio(bio, -ENOMEM); | 784 | bio_endio(bio, -ENOMEM); |
760 | return 0; | 785 | return 0; |
761 | } | 786 | } |
787 | req->start_time = start_time; | ||
762 | 788 | ||
763 | local = get_ldev(mdev); | 789 | local = get_ldev(mdev); |
764 | if (!local) { | 790 | if (!local) { |
@@ -808,9 +834,9 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) | |||
808 | drbd_al_begin_io(mdev, sector); | 834 | drbd_al_begin_io(mdev, sector); |
809 | } | 835 | } |
810 | 836 | ||
811 | remote = remote && (mdev->state.pdsk == D_UP_TO_DATE || | 837 | remote = remote && drbd_should_do_remote(mdev->state); |
812 | (mdev->state.pdsk == D_INCONSISTENT && | 838 | send_oos = rw == WRITE && drbd_should_send_oos(mdev->state); |
813 | mdev->state.conn >= C_CONNECTED)); | 839 | D_ASSERT(!(remote && send_oos)); |
814 | 840 | ||
815 | if (!(local || remote) && !is_susp(mdev->state)) { | 841 | if (!(local || remote) && !is_susp(mdev->state)) { |
816 | if (__ratelimit(&drbd_ratelimit_state)) | 842 | if (__ratelimit(&drbd_ratelimit_state)) |
@@ -824,7 +850,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) | |||
824 | * but there is a race between testing the bit and pointer outside the | 850 | * but there is a race between testing the bit and pointer outside the |
825 | * spinlock, and grabbing the spinlock. | 851 | * spinlock, and grabbing the spinlock. |
826 | * if we lost that race, we retry. */ | 852 | * if we lost that race, we retry. */ |
827 | if (rw == WRITE && remote && | 853 | if (rw == WRITE && (remote || send_oos) && |
828 | mdev->unused_spare_tle == NULL && | 854 | mdev->unused_spare_tle == NULL && |
829 | test_bit(CREATE_BARRIER, &mdev->flags)) { | 855 | test_bit(CREATE_BARRIER, &mdev->flags)) { |
830 | allocate_barrier: | 856 | allocate_barrier: |
@@ -842,18 +868,19 @@ allocate_barrier: | |||
842 | if (is_susp(mdev->state)) { | 868 | if (is_susp(mdev->state)) { |
843 | /* If we got suspended, use the retry mechanism of | 869 | /* If we got suspended, use the retry mechanism of |
844 | generic_make_request() to restart processing of this | 870 | generic_make_request() to restart processing of this |
845 | bio. In the next call to drbd_make_request_26 | 871 | bio. In the next call to drbd_make_request |
846 | we sleep in inc_ap_bio() */ | 872 | we sleep in inc_ap_bio() */ |
847 | ret = 1; | 873 | ret = 1; |
848 | spin_unlock_irq(&mdev->req_lock); | 874 | spin_unlock_irq(&mdev->req_lock); |
849 | goto fail_free_complete; | 875 | goto fail_free_complete; |
850 | } | 876 | } |
851 | 877 | ||
852 | if (remote) { | 878 | if (remote || send_oos) { |
853 | remote = (mdev->state.pdsk == D_UP_TO_DATE || | 879 | remote = drbd_should_do_remote(mdev->state); |
854 | (mdev->state.pdsk == D_INCONSISTENT && | 880 | send_oos = rw == WRITE && drbd_should_send_oos(mdev->state); |
855 | mdev->state.conn >= C_CONNECTED)); | 881 | D_ASSERT(!(remote && send_oos)); |
856 | if (!remote) | 882 | |
883 | if (!(remote || send_oos)) | ||
857 | dev_warn(DEV, "lost connection while grabbing the req_lock!\n"); | 884 | dev_warn(DEV, "lost connection while grabbing the req_lock!\n"); |
858 | if (!(local || remote)) { | 885 | if (!(local || remote)) { |
859 | dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); | 886 | dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); |
@@ -866,7 +893,7 @@ allocate_barrier: | |||
866 | mdev->unused_spare_tle = b; | 893 | mdev->unused_spare_tle = b; |
867 | b = NULL; | 894 | b = NULL; |
868 | } | 895 | } |
869 | if (rw == WRITE && remote && | 896 | if (rw == WRITE && (remote || send_oos) && |
870 | mdev->unused_spare_tle == NULL && | 897 | mdev->unused_spare_tle == NULL && |
871 | test_bit(CREATE_BARRIER, &mdev->flags)) { | 898 | test_bit(CREATE_BARRIER, &mdev->flags)) { |
872 | /* someone closed the current epoch | 899 | /* someone closed the current epoch |
@@ -889,7 +916,7 @@ allocate_barrier: | |||
889 | * barrier packet. To get the write ordering right, we only have to | 916 | * barrier packet. To get the write ordering right, we only have to |
890 | * make sure that, if this is a write request and it triggered a | 917 | * make sure that, if this is a write request and it triggered a |
891 | * barrier packet, this request is queued within the same spinlock. */ | 918 | * barrier packet, this request is queued within the same spinlock. */ |
892 | if (remote && mdev->unused_spare_tle && | 919 | if ((remote || send_oos) && mdev->unused_spare_tle && |
893 | test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) { | 920 | test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) { |
894 | _tl_add_barrier(mdev, mdev->unused_spare_tle); | 921 | _tl_add_barrier(mdev, mdev->unused_spare_tle); |
895 | mdev->unused_spare_tle = NULL; | 922 | mdev->unused_spare_tle = NULL; |
@@ -937,6 +964,34 @@ allocate_barrier: | |||
937 | ? queue_for_net_write | 964 | ? queue_for_net_write |
938 | : queue_for_net_read); | 965 | : queue_for_net_read); |
939 | } | 966 | } |
967 | if (send_oos && drbd_set_out_of_sync(mdev, sector, size)) | ||
968 | _req_mod(req, queue_for_send_oos); | ||
969 | |||
970 | if (remote && | ||
971 | mdev->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96) { | ||
972 | int congested = 0; | ||
973 | |||
974 | if (mdev->net_conf->cong_fill && | ||
975 | atomic_read(&mdev->ap_in_flight) >= mdev->net_conf->cong_fill) { | ||
976 | dev_info(DEV, "Congestion-fill threshold reached\n"); | ||
977 | congested = 1; | ||
978 | } | ||
979 | |||
980 | if (mdev->act_log->used >= mdev->net_conf->cong_extents) { | ||
981 | dev_info(DEV, "Congestion-extents threshold reached\n"); | ||
982 | congested = 1; | ||
983 | } | ||
984 | |||
985 | if (congested) { | ||
986 | queue_barrier(mdev); /* last barrier, after mirrored writes */ | ||
987 | |||
988 | if (mdev->net_conf->on_congestion == OC_PULL_AHEAD) | ||
989 | _drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL); | ||
990 | else /*mdev->net_conf->on_congestion == OC_DISCONNECT */ | ||
991 | _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), 0, NULL); | ||
992 | } | ||
993 | } | ||
994 | |||
940 | spin_unlock_irq(&mdev->req_lock); | 995 | spin_unlock_irq(&mdev->req_lock); |
941 | kfree(b); /* if someone else has beaten us to it... */ | 996 | kfree(b); /* if someone else has beaten us to it... */ |
942 | 997 | ||
@@ -949,9 +1004,9 @@ allocate_barrier: | |||
949 | * stable storage, and this is a WRITE, we may not even submit | 1004 | * stable storage, and this is a WRITE, we may not even submit |
950 | * this bio. */ | 1005 | * this bio. */ |
951 | if (get_ldev(mdev)) { | 1006 | if (get_ldev(mdev)) { |
952 | if (FAULT_ACTIVE(mdev, rw == WRITE ? DRBD_FAULT_DT_WR | 1007 | if (drbd_insert_fault(mdev, rw == WRITE ? DRBD_FAULT_DT_WR |
953 | : rw == READ ? DRBD_FAULT_DT_RD | 1008 | : rw == READ ? DRBD_FAULT_DT_RD |
954 | : DRBD_FAULT_DT_RA)) | 1009 | : DRBD_FAULT_DT_RA)) |
955 | bio_endio(req->private_bio, -EIO); | 1010 | bio_endio(req->private_bio, -EIO); |
956 | else | 1011 | else |
957 | generic_make_request(req->private_bio); | 1012 | generic_make_request(req->private_bio); |
@@ -1018,16 +1073,19 @@ static int drbd_fail_request_early(struct drbd_conf *mdev, int is_write) | |||
1018 | return 0; | 1073 | return 0; |
1019 | } | 1074 | } |
1020 | 1075 | ||
1021 | int drbd_make_request_26(struct request_queue *q, struct bio *bio) | 1076 | int drbd_make_request(struct request_queue *q, struct bio *bio) |
1022 | { | 1077 | { |
1023 | unsigned int s_enr, e_enr; | 1078 | unsigned int s_enr, e_enr; |
1024 | struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata; | 1079 | struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata; |
1080 | unsigned long start_time; | ||
1025 | 1081 | ||
1026 | if (drbd_fail_request_early(mdev, bio_data_dir(bio) & WRITE)) { | 1082 | if (drbd_fail_request_early(mdev, bio_data_dir(bio) & WRITE)) { |
1027 | bio_endio(bio, -EPERM); | 1083 | bio_endio(bio, -EPERM); |
1028 | return 0; | 1084 | return 0; |
1029 | } | 1085 | } |
1030 | 1086 | ||
1087 | start_time = jiffies; | ||
1088 | |||
1031 | /* | 1089 | /* |
1032 | * what we "blindly" assume: | 1090 | * what we "blindly" assume: |
1033 | */ | 1091 | */ |
@@ -1042,12 +1100,12 @@ int drbd_make_request_26(struct request_queue *q, struct bio *bio) | |||
1042 | 1100 | ||
1043 | if (likely(s_enr == e_enr)) { | 1101 | if (likely(s_enr == e_enr)) { |
1044 | inc_ap_bio(mdev, 1); | 1102 | inc_ap_bio(mdev, 1); |
1045 | return drbd_make_request_common(mdev, bio); | 1103 | return drbd_make_request_common(mdev, bio, start_time); |
1046 | } | 1104 | } |
1047 | 1105 | ||
1048 | /* can this bio be split generically? | 1106 | /* can this bio be split generically? |
1049 | * Maybe add our own split-arbitrary-bios function. */ | 1107 | * Maybe add our own split-arbitrary-bios function. */ |
1050 | if (bio->bi_vcnt != 1 || bio->bi_idx != 0 || bio->bi_size > DRBD_MAX_SEGMENT_SIZE) { | 1108 | if (bio->bi_vcnt != 1 || bio->bi_idx != 0 || bio->bi_size > DRBD_MAX_BIO_SIZE) { |
1051 | /* rather error out here than BUG in bio_split */ | 1109 | /* rather error out here than BUG in bio_split */ |
1052 | dev_err(DEV, "bio would need to, but cannot, be split: " | 1110 | dev_err(DEV, "bio would need to, but cannot, be split: " |
1053 | "(vcnt=%u,idx=%u,size=%u,sector=%llu)\n", | 1111 | "(vcnt=%u,idx=%u,size=%u,sector=%llu)\n", |
@@ -1069,11 +1127,7 @@ int drbd_make_request_26(struct request_queue *q, struct bio *bio) | |||
1069 | const int sps = 1 << HT_SHIFT; /* sectors per slot */ | 1127 | const int sps = 1 << HT_SHIFT; /* sectors per slot */ |
1070 | const int mask = sps - 1; | 1128 | const int mask = sps - 1; |
1071 | const sector_t first_sectors = sps - (sect & mask); | 1129 | const sector_t first_sectors = sps - (sect & mask); |
1072 | bp = bio_split(bio, | 1130 | bp = bio_split(bio, first_sectors); |
1073 | #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) | ||
1074 | bio_split_pool, | ||
1075 | #endif | ||
1076 | first_sectors); | ||
1077 | 1131 | ||
1078 | /* we need to get a "reference count" (ap_bio_cnt) | 1132 | /* we need to get a "reference count" (ap_bio_cnt) |
1079 | * to avoid races with the disconnect/reconnect/suspend code. | 1133 | * to avoid races with the disconnect/reconnect/suspend code. |
@@ -1084,10 +1138,10 @@ int drbd_make_request_26(struct request_queue *q, struct bio *bio) | |||
1084 | 1138 | ||
1085 | D_ASSERT(e_enr == s_enr + 1); | 1139 | D_ASSERT(e_enr == s_enr + 1); |
1086 | 1140 | ||
1087 | while (drbd_make_request_common(mdev, &bp->bio1)) | 1141 | while (drbd_make_request_common(mdev, &bp->bio1, start_time)) |
1088 | inc_ap_bio(mdev, 1); | 1142 | inc_ap_bio(mdev, 1); |
1089 | 1143 | ||
1090 | while (drbd_make_request_common(mdev, &bp->bio2)) | 1144 | while (drbd_make_request_common(mdev, &bp->bio2, start_time)) |
1091 | inc_ap_bio(mdev, 1); | 1145 | inc_ap_bio(mdev, 1); |
1092 | 1146 | ||
1093 | dec_ap_bio(mdev); | 1147 | dec_ap_bio(mdev); |
@@ -1098,7 +1152,7 @@ int drbd_make_request_26(struct request_queue *q, struct bio *bio) | |||
1098 | } | 1152 | } |
1099 | 1153 | ||
1100 | /* This is called by bio_add_page(). With this function we reduce | 1154 | /* This is called by bio_add_page(). With this function we reduce |
1101 | * the number of BIOs that span over multiple DRBD_MAX_SEGMENT_SIZEs | 1155 | * the number of BIOs that span over multiple DRBD_MAX_BIO_SIZEs |
1102 | * units (was AL_EXTENTs). | 1156 | * units (was AL_EXTENTs). |
1103 | * | 1157 | * |
1104 | * we do the calculation within the lower 32bit of the byte offsets, | 1158 | * we do the calculation within the lower 32bit of the byte offsets, |
@@ -1108,7 +1162,7 @@ int drbd_make_request_26(struct request_queue *q, struct bio *bio) | |||
1108 | * As long as the BIO is empty we have to allow at least one bvec, | 1162 | * As long as the BIO is empty we have to allow at least one bvec, |
1109 | * regardless of size and offset. so the resulting bio may still | 1163 | * regardless of size and offset. so the resulting bio may still |
1110 | * cross extent boundaries. those are dealt with (bio_split) in | 1164 | * cross extent boundaries. those are dealt with (bio_split) in |
1111 | * drbd_make_request_26. | 1165 | * drbd_make_request. |
1112 | */ | 1166 | */ |
1113 | int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec) | 1167 | int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec) |
1114 | { | 1168 | { |
@@ -1118,8 +1172,8 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct | |||
1118 | unsigned int bio_size = bvm->bi_size; | 1172 | unsigned int bio_size = bvm->bi_size; |
1119 | int limit, backing_limit; | 1173 | int limit, backing_limit; |
1120 | 1174 | ||
1121 | limit = DRBD_MAX_SEGMENT_SIZE | 1175 | limit = DRBD_MAX_BIO_SIZE |
1122 | - ((bio_offset & (DRBD_MAX_SEGMENT_SIZE-1)) + bio_size); | 1176 | - ((bio_offset & (DRBD_MAX_BIO_SIZE-1)) + bio_size); |
1123 | if (limit < 0) | 1177 | if (limit < 0) |
1124 | limit = 0; | 1178 | limit = 0; |
1125 | if (bio_size == 0) { | 1179 | if (bio_size == 0) { |
@@ -1136,3 +1190,42 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct | |||
1136 | } | 1190 | } |
1137 | return limit; | 1191 | return limit; |
1138 | } | 1192 | } |
1193 | |||
1194 | void request_timer_fn(unsigned long data) | ||
1195 | { | ||
1196 | struct drbd_conf *mdev = (struct drbd_conf *) data; | ||
1197 | struct drbd_request *req; /* oldest request */ | ||
1198 | struct list_head *le; | ||
1199 | unsigned long et = 0; /* effective timeout = ko_count * timeout */ | ||
1200 | |||
1201 | if (get_net_conf(mdev)) { | ||
1202 | et = mdev->net_conf->timeout*HZ/10 * mdev->net_conf->ko_count; | ||
1203 | put_net_conf(mdev); | ||
1204 | } | ||
1205 | if (!et || mdev->state.conn < C_WF_REPORT_PARAMS) | ||
1206 | return; /* Recurring timer stopped */ | ||
1207 | |||
1208 | spin_lock_irq(&mdev->req_lock); | ||
1209 | le = &mdev->oldest_tle->requests; | ||
1210 | if (list_empty(le)) { | ||
1211 | spin_unlock_irq(&mdev->req_lock); | ||
1212 | mod_timer(&mdev->request_timer, jiffies + et); | ||
1213 | return; | ||
1214 | } | ||
1215 | |||
1216 | le = le->prev; | ||
1217 | req = list_entry(le, struct drbd_request, tl_requests); | ||
1218 | if (time_is_before_eq_jiffies(req->start_time + et)) { | ||
1219 | if (req->rq_state & RQ_NET_PENDING) { | ||
1220 | dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n"); | ||
1221 | _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE, NULL); | ||
1222 | } else { | ||
1223 | dev_warn(DEV, "Local backing block device frozen?\n"); | ||
1224 | mod_timer(&mdev->request_timer, jiffies + et); | ||
1225 | } | ||
1226 | } else { | ||
1227 | mod_timer(&mdev->request_timer, req->start_time + et); | ||
1228 | } | ||
1229 | |||
1230 | spin_unlock_irq(&mdev->req_lock); | ||
1231 | } | ||
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index ab2bd09d54b4..32e2c3e6a813 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h | |||
@@ -82,14 +82,16 @@ enum drbd_req_event { | |||
82 | to_be_submitted, | 82 | to_be_submitted, |
83 | 83 | ||
84 | /* XXX yes, now I am inconsistent... | 84 | /* XXX yes, now I am inconsistent... |
85 | * these two are not "events" but "actions" | 85 | * these are not "events" but "actions" |
86 | * oh, well... */ | 86 | * oh, well... */ |
87 | queue_for_net_write, | 87 | queue_for_net_write, |
88 | queue_for_net_read, | 88 | queue_for_net_read, |
89 | queue_for_send_oos, | ||
89 | 90 | ||
90 | send_canceled, | 91 | send_canceled, |
91 | send_failed, | 92 | send_failed, |
92 | handed_over_to_network, | 93 | handed_over_to_network, |
94 | oos_handed_to_network, | ||
93 | connection_lost_while_pending, | 95 | connection_lost_while_pending, |
94 | read_retry_remote_canceled, | 96 | read_retry_remote_canceled, |
95 | recv_acked_by_peer, | 97 | recv_acked_by_peer, |
@@ -289,7 +291,6 @@ static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev, | |||
289 | req->epoch = 0; | 291 | req->epoch = 0; |
290 | req->sector = bio_src->bi_sector; | 292 | req->sector = bio_src->bi_sector; |
291 | req->size = bio_src->bi_size; | 293 | req->size = bio_src->bi_size; |
292 | req->start_time = jiffies; | ||
293 | INIT_HLIST_NODE(&req->colision); | 294 | INIT_HLIST_NODE(&req->colision); |
294 | INIT_LIST_HEAD(&req->tl_requests); | 295 | INIT_LIST_HEAD(&req->tl_requests); |
295 | INIT_LIST_HEAD(&req->w.list); | 296 | INIT_LIST_HEAD(&req->w.list); |
@@ -321,6 +322,7 @@ extern int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
321 | struct bio_and_error *m); | 322 | struct bio_and_error *m); |
322 | extern void complete_master_bio(struct drbd_conf *mdev, | 323 | extern void complete_master_bio(struct drbd_conf *mdev, |
323 | struct bio_and_error *m); | 324 | struct bio_and_error *m); |
325 | extern void request_timer_fn(unsigned long data); | ||
324 | 326 | ||
325 | /* use this if you don't want to deal with calling complete_master_bio() | 327 | /* use this if you don't want to deal with calling complete_master_bio() |
326 | * outside the spinlock, e.g. when walking some list on cleanup. */ | 328 | * outside the spinlock, e.g. when walking some list on cleanup. */ |
@@ -338,23 +340,43 @@ static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what) | |||
338 | return rv; | 340 | return rv; |
339 | } | 341 | } |
340 | 342 | ||
341 | /* completion of master bio is outside of spinlock. | 343 | /* completion of master bio is outside of our spinlock. |
342 | * If you need it irqsave, do it your self! | 344 | * We still may or may not be inside some irqs disabled section |
343 | * Which means: don't use from bio endio callback. */ | 345 | * of the lower level driver completion callback, so we need to |
346 | * spin_lock_irqsave here. */ | ||
344 | static inline int req_mod(struct drbd_request *req, | 347 | static inline int req_mod(struct drbd_request *req, |
345 | enum drbd_req_event what) | 348 | enum drbd_req_event what) |
346 | { | 349 | { |
350 | unsigned long flags; | ||
347 | struct drbd_conf *mdev = req->mdev; | 351 | struct drbd_conf *mdev = req->mdev; |
348 | struct bio_and_error m; | 352 | struct bio_and_error m; |
349 | int rv; | 353 | int rv; |
350 | 354 | ||
351 | spin_lock_irq(&mdev->req_lock); | 355 | spin_lock_irqsave(&mdev->req_lock, flags); |
352 | rv = __req_mod(req, what, &m); | 356 | rv = __req_mod(req, what, &m); |
353 | spin_unlock_irq(&mdev->req_lock); | 357 | spin_unlock_irqrestore(&mdev->req_lock, flags); |
354 | 358 | ||
355 | if (m.bio) | 359 | if (m.bio) |
356 | complete_master_bio(mdev, &m); | 360 | complete_master_bio(mdev, &m); |
357 | 361 | ||
358 | return rv; | 362 | return rv; |
359 | } | 363 | } |
364 | |||
365 | static inline bool drbd_should_do_remote(union drbd_state s) | ||
366 | { | ||
367 | return s.pdsk == D_UP_TO_DATE || | ||
368 | (s.pdsk >= D_INCONSISTENT && | ||
369 | s.conn >= C_WF_BITMAP_T && | ||
370 | s.conn < C_AHEAD); | ||
371 | /* Before proto 96 that was >= CONNECTED instead of >= C_WF_BITMAP_T. | ||
372 | That is equivalent since before 96 IO was frozen in the C_WF_BITMAP* | ||
373 | states. */ | ||
374 | } | ||
375 | static inline bool drbd_should_send_oos(union drbd_state s) | ||
376 | { | ||
377 | return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S; | ||
378 | /* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary | ||
379 | since we enter state C_AHEAD only if proto >= 96 */ | ||
380 | } | ||
381 | |||
360 | #endif | 382 | #endif |
diff --git a/drivers/block/drbd/drbd_strings.c b/drivers/block/drbd/drbd_strings.c index 85179e1fb50a..c44a2a602772 100644 --- a/drivers/block/drbd/drbd_strings.c +++ b/drivers/block/drbd/drbd_strings.c | |||
@@ -48,6 +48,8 @@ static const char *drbd_conn_s_names[] = { | |||
48 | [C_PAUSED_SYNC_T] = "PausedSyncT", | 48 | [C_PAUSED_SYNC_T] = "PausedSyncT", |
49 | [C_VERIFY_S] = "VerifyS", | 49 | [C_VERIFY_S] = "VerifyS", |
50 | [C_VERIFY_T] = "VerifyT", | 50 | [C_VERIFY_T] = "VerifyT", |
51 | [C_AHEAD] = "Ahead", | ||
52 | [C_BEHIND] = "Behind", | ||
51 | }; | 53 | }; |
52 | 54 | ||
53 | static const char *drbd_role_s_names[] = { | 55 | static const char *drbd_role_s_names[] = { |
@@ -92,7 +94,7 @@ static const char *drbd_state_sw_errors[] = { | |||
92 | const char *drbd_conn_str(enum drbd_conns s) | 94 | const char *drbd_conn_str(enum drbd_conns s) |
93 | { | 95 | { |
94 | /* enums are unsigned... */ | 96 | /* enums are unsigned... */ |
95 | return s > C_PAUSED_SYNC_T ? "TOO_LARGE" : drbd_conn_s_names[s]; | 97 | return s > C_BEHIND ? "TOO_LARGE" : drbd_conn_s_names[s]; |
96 | } | 98 | } |
97 | 99 | ||
98 | const char *drbd_role_str(enum drbd_role s) | 100 | const char *drbd_role_str(enum drbd_role s) |
@@ -105,7 +107,7 @@ const char *drbd_disk_str(enum drbd_disk_state s) | |||
105 | return s > D_UP_TO_DATE ? "TOO_LARGE" : drbd_disk_s_names[s]; | 107 | return s > D_UP_TO_DATE ? "TOO_LARGE" : drbd_disk_s_names[s]; |
106 | } | 108 | } |
107 | 109 | ||
108 | const char *drbd_set_st_err_str(enum drbd_state_ret_codes err) | 110 | const char *drbd_set_st_err_str(enum drbd_state_rv err) |
109 | { | 111 | { |
110 | return err <= SS_AFTER_LAST_ERROR ? "TOO_SMALL" : | 112 | return err <= SS_AFTER_LAST_ERROR ? "TOO_SMALL" : |
111 | err > SS_TWO_PRIMARIES ? "TOO_LARGE" | 113 | err > SS_TWO_PRIMARIES ? "TOO_LARGE" |
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index e027446590d3..f7e6c92f8d03 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c | |||
@@ -39,18 +39,17 @@ | |||
39 | #include "drbd_req.h" | 39 | #include "drbd_req.h" |
40 | 40 | ||
41 | static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel); | 41 | static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel); |
42 | static int w_make_resync_request(struct drbd_conf *mdev, | ||
43 | struct drbd_work *w, int cancel); | ||
42 | 44 | ||
43 | 45 | ||
44 | 46 | ||
45 | /* defined here: | 47 | /* endio handlers: |
46 | drbd_md_io_complete | 48 | * drbd_md_io_complete (defined here) |
47 | drbd_endio_sec | 49 | * drbd_endio_pri (defined here) |
48 | drbd_endio_pri | 50 | * drbd_endio_sec (defined here) |
49 | 51 | * bm_async_io_complete (defined in drbd_bitmap.c) | |
50 | * more endio handlers: | 52 | * |
51 | atodb_endio in drbd_actlog.c | ||
52 | drbd_bm_async_io_complete in drbd_bitmap.c | ||
53 | |||
54 | * For all these callbacks, note the following: | 53 | * For all these callbacks, note the following: |
55 | * The callbacks will be called in irq context by the IDE drivers, | 54 | * The callbacks will be called in irq context by the IDE drivers, |
56 | * and in Softirqs/Tasklets/BH context by the SCSI drivers. | 55 | * and in Softirqs/Tasklets/BH context by the SCSI drivers. |
@@ -94,7 +93,7 @@ void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local) | |||
94 | if (list_empty(&mdev->read_ee)) | 93 | if (list_empty(&mdev->read_ee)) |
95 | wake_up(&mdev->ee_wait); | 94 | wake_up(&mdev->ee_wait); |
96 | if (test_bit(__EE_WAS_ERROR, &e->flags)) | 95 | if (test_bit(__EE_WAS_ERROR, &e->flags)) |
97 | __drbd_chk_io_error(mdev, FALSE); | 96 | __drbd_chk_io_error(mdev, false); |
98 | spin_unlock_irqrestore(&mdev->req_lock, flags); | 97 | spin_unlock_irqrestore(&mdev->req_lock, flags); |
99 | 98 | ||
100 | drbd_queue_work(&mdev->data.work, &e->w); | 99 | drbd_queue_work(&mdev->data.work, &e->w); |
@@ -137,7 +136,7 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo | |||
137 | : list_empty(&mdev->active_ee); | 136 | : list_empty(&mdev->active_ee); |
138 | 137 | ||
139 | if (test_bit(__EE_WAS_ERROR, &e->flags)) | 138 | if (test_bit(__EE_WAS_ERROR, &e->flags)) |
140 | __drbd_chk_io_error(mdev, FALSE); | 139 | __drbd_chk_io_error(mdev, false); |
141 | spin_unlock_irqrestore(&mdev->req_lock, flags); | 140 | spin_unlock_irqrestore(&mdev->req_lock, flags); |
142 | 141 | ||
143 | if (is_syncer_req) | 142 | if (is_syncer_req) |
@@ -163,14 +162,15 @@ void drbd_endio_sec(struct bio *bio, int error) | |||
163 | int uptodate = bio_flagged(bio, BIO_UPTODATE); | 162 | int uptodate = bio_flagged(bio, BIO_UPTODATE); |
164 | int is_write = bio_data_dir(bio) == WRITE; | 163 | int is_write = bio_data_dir(bio) == WRITE; |
165 | 164 | ||
166 | if (error) | 165 | if (error && __ratelimit(&drbd_ratelimit_state)) |
167 | dev_warn(DEV, "%s: error=%d s=%llus\n", | 166 | dev_warn(DEV, "%s: error=%d s=%llus\n", |
168 | is_write ? "write" : "read", error, | 167 | is_write ? "write" : "read", error, |
169 | (unsigned long long)e->sector); | 168 | (unsigned long long)e->sector); |
170 | if (!error && !uptodate) { | 169 | if (!error && !uptodate) { |
171 | dev_warn(DEV, "%s: setting error to -EIO s=%llus\n", | 170 | if (__ratelimit(&drbd_ratelimit_state)) |
172 | is_write ? "write" : "read", | 171 | dev_warn(DEV, "%s: setting error to -EIO s=%llus\n", |
173 | (unsigned long long)e->sector); | 172 | is_write ? "write" : "read", |
173 | (unsigned long long)e->sector); | ||
174 | /* strange behavior of some lower level drivers... | 174 | /* strange behavior of some lower level drivers... |
175 | * fail the request by clearing the uptodate flag, | 175 | * fail the request by clearing the uptodate flag, |
176 | * but do not return any error?! */ | 176 | * but do not return any error?! */ |
@@ -250,13 +250,6 @@ int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | |||
250 | return w_send_read_req(mdev, w, 0); | 250 | return w_send_read_req(mdev, w, 0); |
251 | } | 251 | } |
252 | 252 | ||
253 | int w_resync_inactive(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | ||
254 | { | ||
255 | ERR_IF(cancel) return 1; | ||
256 | dev_err(DEV, "resync inactive, but callback triggered??\n"); | ||
257 | return 1; /* Simply ignore this! */ | ||
258 | } | ||
259 | |||
260 | void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm, struct drbd_epoch_entry *e, void *digest) | 253 | void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm, struct drbd_epoch_entry *e, void *digest) |
261 | { | 254 | { |
262 | struct hash_desc desc; | 255 | struct hash_desc desc; |
@@ -355,7 +348,7 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) | |||
355 | if (!get_ldev(mdev)) | 348 | if (!get_ldev(mdev)) |
356 | return -EIO; | 349 | return -EIO; |
357 | 350 | ||
358 | if (drbd_rs_should_slow_down(mdev)) | 351 | if (drbd_rs_should_slow_down(mdev, sector)) |
359 | goto defer; | 352 | goto defer; |
360 | 353 | ||
361 | /* GFP_TRY, because if there is no memory available right now, this may | 354 | /* GFP_TRY, because if there is no memory available right now, this may |
@@ -373,9 +366,10 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) | |||
373 | if (drbd_submit_ee(mdev, e, READ, DRBD_FAULT_RS_RD) == 0) | 366 | if (drbd_submit_ee(mdev, e, READ, DRBD_FAULT_RS_RD) == 0) |
374 | return 0; | 367 | return 0; |
375 | 368 | ||
376 | /* drbd_submit_ee currently fails for one reason only: | 369 | /* If it failed because of ENOMEM, retry should help. If it failed |
377 | * not being able to allocate enough bios. | 370 | * because bio_add_page failed (probably broken lower level driver), |
378 | * Is dropping the connection going to help? */ | 371 | * retry may or may not help. |
372 | * If it does not, you may need to force disconnect. */ | ||
379 | spin_lock_irq(&mdev->req_lock); | 373 | spin_lock_irq(&mdev->req_lock); |
380 | list_del(&e->w.list); | 374 | list_del(&e->w.list); |
381 | spin_unlock_irq(&mdev->req_lock); | 375 | spin_unlock_irq(&mdev->req_lock); |
@@ -386,26 +380,25 @@ defer: | |||
386 | return -EAGAIN; | 380 | return -EAGAIN; |
387 | } | 381 | } |
388 | 382 | ||
389 | void resync_timer_fn(unsigned long data) | 383 | int w_resync_timer(struct drbd_conf *mdev, struct drbd_work *w, int cancel) |
390 | { | 384 | { |
391 | struct drbd_conf *mdev = (struct drbd_conf *) data; | ||
392 | int queue; | ||
393 | |||
394 | queue = 1; | ||
395 | switch (mdev->state.conn) { | 385 | switch (mdev->state.conn) { |
396 | case C_VERIFY_S: | 386 | case C_VERIFY_S: |
397 | mdev->resync_work.cb = w_make_ov_request; | 387 | w_make_ov_request(mdev, w, cancel); |
398 | break; | 388 | break; |
399 | case C_SYNC_TARGET: | 389 | case C_SYNC_TARGET: |
400 | mdev->resync_work.cb = w_make_resync_request; | 390 | w_make_resync_request(mdev, w, cancel); |
401 | break; | 391 | break; |
402 | default: | ||
403 | queue = 0; | ||
404 | mdev->resync_work.cb = w_resync_inactive; | ||
405 | } | 392 | } |
406 | 393 | ||
407 | /* harmless race: list_empty outside data.work.q_lock */ | 394 | return 1; |
408 | if (list_empty(&mdev->resync_work.list) && queue) | 395 | } |
396 | |||
397 | void resync_timer_fn(unsigned long data) | ||
398 | { | ||
399 | struct drbd_conf *mdev = (struct drbd_conf *) data; | ||
400 | |||
401 | if (list_empty(&mdev->resync_work.list)) | ||
409 | drbd_queue_work(&mdev->data.work, &mdev->resync_work); | 402 | drbd_queue_work(&mdev->data.work, &mdev->resync_work); |
410 | } | 403 | } |
411 | 404 | ||
@@ -438,7 +431,7 @@ static void fifo_add_val(struct fifo_buffer *fb, int value) | |||
438 | fb->values[i] += value; | 431 | fb->values[i] += value; |
439 | } | 432 | } |
440 | 433 | ||
441 | int drbd_rs_controller(struct drbd_conf *mdev) | 434 | static int drbd_rs_controller(struct drbd_conf *mdev) |
442 | { | 435 | { |
443 | unsigned int sect_in; /* Number of sectors that came in since the last turn */ | 436 | unsigned int sect_in; /* Number of sectors that came in since the last turn */ |
444 | unsigned int want; /* The number of sectors we want in the proxy */ | 437 | unsigned int want; /* The number of sectors we want in the proxy */ |
@@ -492,29 +485,36 @@ int drbd_rs_controller(struct drbd_conf *mdev) | |||
492 | return req_sect; | 485 | return req_sect; |
493 | } | 486 | } |
494 | 487 | ||
495 | int w_make_resync_request(struct drbd_conf *mdev, | 488 | static int drbd_rs_number_requests(struct drbd_conf *mdev) |
496 | struct drbd_work *w, int cancel) | 489 | { |
490 | int number; | ||
491 | if (mdev->rs_plan_s.size) { /* mdev->sync_conf.c_plan_ahead */ | ||
492 | number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9); | ||
493 | mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME; | ||
494 | } else { | ||
495 | mdev->c_sync_rate = mdev->sync_conf.rate; | ||
496 | number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ); | ||
497 | } | ||
498 | |||
499 | /* ignore the amount of pending requests, the resync controller should | ||
500 | * throttle down to incoming reply rate soon enough anyways. */ | ||
501 | return number; | ||
502 | } | ||
503 | |||
504 | static int w_make_resync_request(struct drbd_conf *mdev, | ||
505 | struct drbd_work *w, int cancel) | ||
497 | { | 506 | { |
498 | unsigned long bit; | 507 | unsigned long bit; |
499 | sector_t sector; | 508 | sector_t sector; |
500 | const sector_t capacity = drbd_get_capacity(mdev->this_bdev); | 509 | const sector_t capacity = drbd_get_capacity(mdev->this_bdev); |
501 | int max_segment_size; | 510 | int max_bio_size; |
502 | int number, rollback_i, size, pe, mx; | 511 | int number, rollback_i, size; |
503 | int align, queued, sndbuf; | 512 | int align, queued, sndbuf; |
504 | int i = 0; | 513 | int i = 0; |
505 | 514 | ||
506 | if (unlikely(cancel)) | 515 | if (unlikely(cancel)) |
507 | return 1; | 516 | return 1; |
508 | 517 | ||
509 | if (unlikely(mdev->state.conn < C_CONNECTED)) { | ||
510 | dev_err(DEV, "Confused in w_make_resync_request()! cstate < Connected"); | ||
511 | return 0; | ||
512 | } | ||
513 | |||
514 | if (mdev->state.conn != C_SYNC_TARGET) | ||
515 | dev_err(DEV, "%s in w_make_resync_request\n", | ||
516 | drbd_conn_str(mdev->state.conn)); | ||
517 | |||
518 | if (mdev->rs_total == 0) { | 518 | if (mdev->rs_total == 0) { |
519 | /* empty resync? */ | 519 | /* empty resync? */ |
520 | drbd_resync_finished(mdev); | 520 | drbd_resync_finished(mdev); |
@@ -527,49 +527,19 @@ int w_make_resync_request(struct drbd_conf *mdev, | |||
527 | to continue resync with a broken disk makes no sense at | 527 | to continue resync with a broken disk makes no sense at |
528 | all */ | 528 | all */ |
529 | dev_err(DEV, "Disk broke down during resync!\n"); | 529 | dev_err(DEV, "Disk broke down during resync!\n"); |
530 | mdev->resync_work.cb = w_resync_inactive; | ||
531 | return 1; | 530 | return 1; |
532 | } | 531 | } |
533 | 532 | ||
534 | /* starting with drbd 8.3.8, we can handle multi-bio EEs, | 533 | /* starting with drbd 8.3.8, we can handle multi-bio EEs, |
535 | * if it should be necessary */ | 534 | * if it should be necessary */ |
536 | max_segment_size = | 535 | max_bio_size = |
537 | mdev->agreed_pro_version < 94 ? queue_max_segment_size(mdev->rq_queue) : | 536 | mdev->agreed_pro_version < 94 ? queue_max_hw_sectors(mdev->rq_queue) << 9 : |
538 | mdev->agreed_pro_version < 95 ? DRBD_MAX_SIZE_H80_PACKET : DRBD_MAX_SEGMENT_SIZE; | 537 | mdev->agreed_pro_version < 95 ? DRBD_MAX_SIZE_H80_PACKET : DRBD_MAX_BIO_SIZE; |
539 | 538 | ||
540 | if (mdev->rs_plan_s.size) { /* mdev->sync_conf.c_plan_ahead */ | 539 | number = drbd_rs_number_requests(mdev); |
541 | number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9); | 540 | if (number == 0) |
542 | mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME; | ||
543 | } else { | ||
544 | mdev->c_sync_rate = mdev->sync_conf.rate; | ||
545 | number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ); | ||
546 | } | ||
547 | |||
548 | /* Throttle resync on lower level disk activity, which may also be | ||
549 | * caused by application IO on Primary/SyncTarget. | ||
550 | * Keep this after the call to drbd_rs_controller, as that assumes | ||
551 | * to be called as precisely as possible every SLEEP_TIME, | ||
552 | * and would be confused otherwise. */ | ||
553 | if (drbd_rs_should_slow_down(mdev)) | ||
554 | goto requeue; | 541 | goto requeue; |
555 | 542 | ||
556 | mutex_lock(&mdev->data.mutex); | ||
557 | if (mdev->data.socket) | ||
558 | mx = mdev->data.socket->sk->sk_rcvbuf / sizeof(struct p_block_req); | ||
559 | else | ||
560 | mx = 1; | ||
561 | mutex_unlock(&mdev->data.mutex); | ||
562 | |||
563 | /* For resync rates >160MB/sec, allow more pending RS requests */ | ||
564 | if (number > mx) | ||
565 | mx = number; | ||
566 | |||
567 | /* Limit the number of pending RS requests to no more than the peer's receive buffer */ | ||
568 | pe = atomic_read(&mdev->rs_pending_cnt); | ||
569 | if ((pe + number) > mx) { | ||
570 | number = mx - pe; | ||
571 | } | ||
572 | |||
573 | for (i = 0; i < number; i++) { | 543 | for (i = 0; i < number; i++) { |
574 | /* Stop generating RS requests, when half of the send buffer is filled */ | 544 | /* Stop generating RS requests, when half of the send buffer is filled */ |
575 | mutex_lock(&mdev->data.mutex); | 545 | mutex_lock(&mdev->data.mutex); |
@@ -588,16 +558,16 @@ next_sector: | |||
588 | size = BM_BLOCK_SIZE; | 558 | size = BM_BLOCK_SIZE; |
589 | bit = drbd_bm_find_next(mdev, mdev->bm_resync_fo); | 559 | bit = drbd_bm_find_next(mdev, mdev->bm_resync_fo); |
590 | 560 | ||
591 | if (bit == -1UL) { | 561 | if (bit == DRBD_END_OF_BITMAP) { |
592 | mdev->bm_resync_fo = drbd_bm_bits(mdev); | 562 | mdev->bm_resync_fo = drbd_bm_bits(mdev); |
593 | mdev->resync_work.cb = w_resync_inactive; | ||
594 | put_ldev(mdev); | 563 | put_ldev(mdev); |
595 | return 1; | 564 | return 1; |
596 | } | 565 | } |
597 | 566 | ||
598 | sector = BM_BIT_TO_SECT(bit); | 567 | sector = BM_BIT_TO_SECT(bit); |
599 | 568 | ||
600 | if (drbd_try_rs_begin_io(mdev, sector)) { | 569 | if (drbd_rs_should_slow_down(mdev, sector) || |
570 | drbd_try_rs_begin_io(mdev, sector)) { | ||
601 | mdev->bm_resync_fo = bit; | 571 | mdev->bm_resync_fo = bit; |
602 | goto requeue; | 572 | goto requeue; |
603 | } | 573 | } |
@@ -608,7 +578,7 @@ next_sector: | |||
608 | goto next_sector; | 578 | goto next_sector; |
609 | } | 579 | } |
610 | 580 | ||
611 | #if DRBD_MAX_SEGMENT_SIZE > BM_BLOCK_SIZE | 581 | #if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE |
612 | /* try to find some adjacent bits. | 582 | /* try to find some adjacent bits. |
613 | * we stop if we have already the maximum req size. | 583 | * we stop if we have already the maximum req size. |
614 | * | 584 | * |
@@ -618,7 +588,7 @@ next_sector: | |||
618 | align = 1; | 588 | align = 1; |
619 | rollback_i = i; | 589 | rollback_i = i; |
620 | for (;;) { | 590 | for (;;) { |
621 | if (size + BM_BLOCK_SIZE > max_segment_size) | 591 | if (size + BM_BLOCK_SIZE > max_bio_size) |
622 | break; | 592 | break; |
623 | 593 | ||
624 | /* Be always aligned */ | 594 | /* Be always aligned */ |
@@ -685,7 +655,6 @@ next_sector: | |||
685 | * resync data block, and the last bit is cleared. | 655 | * resync data block, and the last bit is cleared. |
686 | * until then resync "work" is "inactive" ... | 656 | * until then resync "work" is "inactive" ... |
687 | */ | 657 | */ |
688 | mdev->resync_work.cb = w_resync_inactive; | ||
689 | put_ldev(mdev); | 658 | put_ldev(mdev); |
690 | return 1; | 659 | return 1; |
691 | } | 660 | } |
@@ -706,27 +675,18 @@ static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int ca | |||
706 | if (unlikely(cancel)) | 675 | if (unlikely(cancel)) |
707 | return 1; | 676 | return 1; |
708 | 677 | ||
709 | if (unlikely(mdev->state.conn < C_CONNECTED)) { | 678 | number = drbd_rs_number_requests(mdev); |
710 | dev_err(DEV, "Confused in w_make_ov_request()! cstate < Connected"); | ||
711 | return 0; | ||
712 | } | ||
713 | |||
714 | number = SLEEP_TIME*mdev->sync_conf.rate / ((BM_BLOCK_SIZE/1024)*HZ); | ||
715 | if (atomic_read(&mdev->rs_pending_cnt) > number) | ||
716 | goto requeue; | ||
717 | |||
718 | number -= atomic_read(&mdev->rs_pending_cnt); | ||
719 | 679 | ||
720 | sector = mdev->ov_position; | 680 | sector = mdev->ov_position; |
721 | for (i = 0; i < number; i++) { | 681 | for (i = 0; i < number; i++) { |
722 | if (sector >= capacity) { | 682 | if (sector >= capacity) { |
723 | mdev->resync_work.cb = w_resync_inactive; | ||
724 | return 1; | 683 | return 1; |
725 | } | 684 | } |
726 | 685 | ||
727 | size = BM_BLOCK_SIZE; | 686 | size = BM_BLOCK_SIZE; |
728 | 687 | ||
729 | if (drbd_try_rs_begin_io(mdev, sector)) { | 688 | if (drbd_rs_should_slow_down(mdev, sector) || |
689 | drbd_try_rs_begin_io(mdev, sector)) { | ||
730 | mdev->ov_position = sector; | 690 | mdev->ov_position = sector; |
731 | goto requeue; | 691 | goto requeue; |
732 | } | 692 | } |
@@ -744,11 +704,33 @@ static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int ca | |||
744 | mdev->ov_position = sector; | 704 | mdev->ov_position = sector; |
745 | 705 | ||
746 | requeue: | 706 | requeue: |
707 | mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9)); | ||
747 | mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME); | 708 | mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME); |
748 | return 1; | 709 | return 1; |
749 | } | 710 | } |
750 | 711 | ||
751 | 712 | ||
713 | void start_resync_timer_fn(unsigned long data) | ||
714 | { | ||
715 | struct drbd_conf *mdev = (struct drbd_conf *) data; | ||
716 | |||
717 | drbd_queue_work(&mdev->data.work, &mdev->start_resync_work); | ||
718 | } | ||
719 | |||
720 | int w_start_resync(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | ||
721 | { | ||
722 | if (atomic_read(&mdev->unacked_cnt) || atomic_read(&mdev->rs_pending_cnt)) { | ||
723 | dev_warn(DEV, "w_start_resync later...\n"); | ||
724 | mdev->start_resync_timer.expires = jiffies + HZ/10; | ||
725 | add_timer(&mdev->start_resync_timer); | ||
726 | return 1; | ||
727 | } | ||
728 | |||
729 | drbd_start_resync(mdev, C_SYNC_SOURCE); | ||
730 | clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags); | ||
731 | return 1; | ||
732 | } | ||
733 | |||
752 | int w_ov_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | 734 | int w_ov_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel) |
753 | { | 735 | { |
754 | kfree(w); | 736 | kfree(w); |
@@ -782,6 +764,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) | |||
782 | union drbd_state os, ns; | 764 | union drbd_state os, ns; |
783 | struct drbd_work *w; | 765 | struct drbd_work *w; |
784 | char *khelper_cmd = NULL; | 766 | char *khelper_cmd = NULL; |
767 | int verify_done = 0; | ||
785 | 768 | ||
786 | /* Remove all elements from the resync LRU. Since future actions | 769 | /* Remove all elements from the resync LRU. Since future actions |
787 | * might set bits in the (main) bitmap, then the entries in the | 770 | * might set bits in the (main) bitmap, then the entries in the |
@@ -792,8 +775,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) | |||
792 | * queue (or even the read operations for those packets | 775 | * queue (or even the read operations for those packets |
793 | * is not finished by now). Retry in 100ms. */ | 776 | * is not finished by now). Retry in 100ms. */ |
794 | 777 | ||
795 | __set_current_state(TASK_INTERRUPTIBLE); | 778 | schedule_timeout_interruptible(HZ / 10); |
796 | schedule_timeout(HZ / 10); | ||
797 | w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC); | 779 | w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC); |
798 | if (w) { | 780 | if (w) { |
799 | w->cb = w_resync_finished; | 781 | w->cb = w_resync_finished; |
@@ -818,6 +800,8 @@ int drbd_resync_finished(struct drbd_conf *mdev) | |||
818 | spin_lock_irq(&mdev->req_lock); | 800 | spin_lock_irq(&mdev->req_lock); |
819 | os = mdev->state; | 801 | os = mdev->state; |
820 | 802 | ||
803 | verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T); | ||
804 | |||
821 | /* This protects us against multiple calls (that can happen in the presence | 805 | /* This protects us against multiple calls (that can happen in the presence |
822 | of application IO), and against connectivity loss just before we arrive here. */ | 806 | of application IO), and against connectivity loss just before we arrive here. */ |
823 | if (os.conn <= C_CONNECTED) | 807 | if (os.conn <= C_CONNECTED) |
@@ -827,8 +811,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) | |||
827 | ns.conn = C_CONNECTED; | 811 | ns.conn = C_CONNECTED; |
828 | 812 | ||
829 | dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n", | 813 | dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n", |
830 | (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) ? | 814 | verify_done ? "Online verify " : "Resync", |
831 | "Online verify " : "Resync", | ||
832 | dt + mdev->rs_paused, mdev->rs_paused, dbdt); | 815 | dt + mdev->rs_paused, mdev->rs_paused, dbdt); |
833 | 816 | ||
834 | n_oos = drbd_bm_total_weight(mdev); | 817 | n_oos = drbd_bm_total_weight(mdev); |
@@ -886,14 +869,18 @@ int drbd_resync_finished(struct drbd_conf *mdev) | |||
886 | } | 869 | } |
887 | } | 870 | } |
888 | 871 | ||
889 | drbd_uuid_set_bm(mdev, 0UL); | 872 | if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) { |
890 | 873 | /* for verify runs, we don't update uuids here, | |
891 | if (mdev->p_uuid) { | 874 | * so there would be nothing to report. */ |
892 | /* Now the two UUID sets are equal, update what we | 875 | drbd_uuid_set_bm(mdev, 0UL); |
893 | * know of the peer. */ | 876 | drbd_print_uuids(mdev, "updated UUIDs"); |
894 | int i; | 877 | if (mdev->p_uuid) { |
895 | for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++) | 878 | /* Now the two UUID sets are equal, update what we |
896 | mdev->p_uuid[i] = mdev->ldev->md.uuid[i]; | 879 | * know of the peer. */ |
880 | int i; | ||
881 | for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++) | ||
882 | mdev->p_uuid[i] = mdev->ldev->md.uuid[i]; | ||
883 | } | ||
897 | } | 884 | } |
898 | } | 885 | } |
899 | 886 | ||
@@ -905,15 +892,11 @@ out: | |||
905 | mdev->rs_total = 0; | 892 | mdev->rs_total = 0; |
906 | mdev->rs_failed = 0; | 893 | mdev->rs_failed = 0; |
907 | mdev->rs_paused = 0; | 894 | mdev->rs_paused = 0; |
908 | mdev->ov_start_sector = 0; | 895 | if (verify_done) |
896 | mdev->ov_start_sector = 0; | ||
909 | 897 | ||
910 | drbd_md_sync(mdev); | 898 | drbd_md_sync(mdev); |
911 | 899 | ||
912 | if (test_and_clear_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags)) { | ||
913 | dev_info(DEV, "Writing the whole bitmap\n"); | ||
914 | drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, "write from resync_finished"); | ||
915 | } | ||
916 | |||
917 | if (khelper_cmd) | 900 | if (khelper_cmd) |
918 | drbd_khelper(mdev, khelper_cmd); | 901 | drbd_khelper(mdev, khelper_cmd); |
919 | 902 | ||
@@ -994,7 +977,9 @@ int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | |||
994 | put_ldev(mdev); | 977 | put_ldev(mdev); |
995 | } | 978 | } |
996 | 979 | ||
997 | if (likely((e->flags & EE_WAS_ERROR) == 0)) { | 980 | if (mdev->state.conn == C_AHEAD) { |
981 | ok = drbd_send_ack(mdev, P_RS_CANCEL, e); | ||
982 | } else if (likely((e->flags & EE_WAS_ERROR) == 0)) { | ||
998 | if (likely(mdev->state.pdsk >= D_INCONSISTENT)) { | 983 | if (likely(mdev->state.pdsk >= D_INCONSISTENT)) { |
999 | inc_rs_pending(mdev); | 984 | inc_rs_pending(mdev); |
1000 | ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e); | 985 | ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e); |
@@ -1096,25 +1081,27 @@ int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | |||
1096 | if (unlikely(cancel)) | 1081 | if (unlikely(cancel)) |
1097 | goto out; | 1082 | goto out; |
1098 | 1083 | ||
1099 | if (unlikely((e->flags & EE_WAS_ERROR) != 0)) | ||
1100 | goto out; | ||
1101 | |||
1102 | digest_size = crypto_hash_digestsize(mdev->verify_tfm); | 1084 | digest_size = crypto_hash_digestsize(mdev->verify_tfm); |
1103 | /* FIXME if this allocation fails, online verify will not terminate! */ | ||
1104 | digest = kmalloc(digest_size, GFP_NOIO); | 1085 | digest = kmalloc(digest_size, GFP_NOIO); |
1105 | if (digest) { | 1086 | if (!digest) { |
1106 | drbd_csum_ee(mdev, mdev->verify_tfm, e, digest); | 1087 | ok = 0; /* terminate the connection in case the allocation failed */ |
1107 | inc_rs_pending(mdev); | 1088 | goto out; |
1108 | ok = drbd_send_drequest_csum(mdev, e->sector, e->size, | ||
1109 | digest, digest_size, P_OV_REPLY); | ||
1110 | if (!ok) | ||
1111 | dec_rs_pending(mdev); | ||
1112 | kfree(digest); | ||
1113 | } | 1089 | } |
1114 | 1090 | ||
1091 | if (likely(!(e->flags & EE_WAS_ERROR))) | ||
1092 | drbd_csum_ee(mdev, mdev->verify_tfm, e, digest); | ||
1093 | else | ||
1094 | memset(digest, 0, digest_size); | ||
1095 | |||
1096 | inc_rs_pending(mdev); | ||
1097 | ok = drbd_send_drequest_csum(mdev, e->sector, e->size, | ||
1098 | digest, digest_size, P_OV_REPLY); | ||
1099 | if (!ok) | ||
1100 | dec_rs_pending(mdev); | ||
1101 | kfree(digest); | ||
1102 | |||
1115 | out: | 1103 | out: |
1116 | drbd_free_ee(mdev, e); | 1104 | drbd_free_ee(mdev, e); |
1117 | |||
1118 | dec_unacked(mdev); | 1105 | dec_unacked(mdev); |
1119 | 1106 | ||
1120 | return ok; | 1107 | return ok; |
@@ -1129,7 +1116,6 @@ void drbd_ov_oos_found(struct drbd_conf *mdev, sector_t sector, int size) | |||
1129 | mdev->ov_last_oos_size = size>>9; | 1116 | mdev->ov_last_oos_size = size>>9; |
1130 | } | 1117 | } |
1131 | drbd_set_out_of_sync(mdev, sector, size); | 1118 | drbd_set_out_of_sync(mdev, sector, size); |
1132 | set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags); | ||
1133 | } | 1119 | } |
1134 | 1120 | ||
1135 | int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | 1121 | int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) |
@@ -1165,10 +1151,6 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | |||
1165 | eq = !memcmp(digest, di->digest, digest_size); | 1151 | eq = !memcmp(digest, di->digest, digest_size); |
1166 | kfree(digest); | 1152 | kfree(digest); |
1167 | } | 1153 | } |
1168 | } else { | ||
1169 | ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e); | ||
1170 | if (__ratelimit(&drbd_ratelimit_state)) | ||
1171 | dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n"); | ||
1172 | } | 1154 | } |
1173 | 1155 | ||
1174 | dec_unacked(mdev); | 1156 | dec_unacked(mdev); |
@@ -1182,7 +1164,13 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | |||
1182 | 1164 | ||
1183 | drbd_free_ee(mdev, e); | 1165 | drbd_free_ee(mdev, e); |
1184 | 1166 | ||
1185 | if (--mdev->ov_left == 0) { | 1167 | --mdev->ov_left; |
1168 | |||
1169 | /* let's advance progress step marks only for every other megabyte */ | ||
1170 | if ((mdev->ov_left & 0x200) == 0x200) | ||
1171 | drbd_advance_rs_marks(mdev, mdev->ov_left); | ||
1172 | |||
1173 | if (mdev->ov_left == 0) { | ||
1186 | ov_oos_print(mdev); | 1174 | ov_oos_print(mdev); |
1187 | drbd_resync_finished(mdev); | 1175 | drbd_resync_finished(mdev); |
1188 | } | 1176 | } |
@@ -1235,6 +1223,22 @@ int w_send_write_hint(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | |||
1235 | return drbd_send_short_cmd(mdev, P_UNPLUG_REMOTE); | 1223 | return drbd_send_short_cmd(mdev, P_UNPLUG_REMOTE); |
1236 | } | 1224 | } |
1237 | 1225 | ||
1226 | int w_send_oos(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | ||
1227 | { | ||
1228 | struct drbd_request *req = container_of(w, struct drbd_request, w); | ||
1229 | int ok; | ||
1230 | |||
1231 | if (unlikely(cancel)) { | ||
1232 | req_mod(req, send_canceled); | ||
1233 | return 1; | ||
1234 | } | ||
1235 | |||
1236 | ok = drbd_send_oos(mdev, req); | ||
1237 | req_mod(req, oos_handed_to_network); | ||
1238 | |||
1239 | return ok; | ||
1240 | } | ||
1241 | |||
1238 | /** | 1242 | /** |
1239 | * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request | 1243 | * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request |
1240 | * @mdev: DRBD device. | 1244 | * @mdev: DRBD device. |
@@ -1430,6 +1434,17 @@ int drbd_alter_sa(struct drbd_conf *mdev, int na) | |||
1430 | return retcode; | 1434 | return retcode; |
1431 | } | 1435 | } |
1432 | 1436 | ||
1437 | void drbd_rs_controller_reset(struct drbd_conf *mdev) | ||
1438 | { | ||
1439 | atomic_set(&mdev->rs_sect_in, 0); | ||
1440 | atomic_set(&mdev->rs_sect_ev, 0); | ||
1441 | mdev->rs_in_flight = 0; | ||
1442 | mdev->rs_planed = 0; | ||
1443 | spin_lock(&mdev->peer_seq_lock); | ||
1444 | fifo_set(&mdev->rs_plan_s, 0); | ||
1445 | spin_unlock(&mdev->peer_seq_lock); | ||
1446 | } | ||
1447 | |||
1433 | /** | 1448 | /** |
1434 | * drbd_start_resync() - Start the resync process | 1449 | * drbd_start_resync() - Start the resync process |
1435 | * @mdev: DRBD device. | 1450 | * @mdev: DRBD device. |
@@ -1443,13 +1458,18 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) | |||
1443 | union drbd_state ns; | 1458 | union drbd_state ns; |
1444 | int r; | 1459 | int r; |
1445 | 1460 | ||
1446 | if (mdev->state.conn >= C_SYNC_SOURCE) { | 1461 | if (mdev->state.conn >= C_SYNC_SOURCE && mdev->state.conn < C_AHEAD) { |
1447 | dev_err(DEV, "Resync already running!\n"); | 1462 | dev_err(DEV, "Resync already running!\n"); |
1448 | return; | 1463 | return; |
1449 | } | 1464 | } |
1450 | 1465 | ||
1451 | /* In case a previous resync run was aborted by an IO error/detach on the peer. */ | 1466 | if (mdev->state.conn < C_AHEAD) { |
1452 | drbd_rs_cancel_all(mdev); | 1467 | /* In case a previous resync run was aborted by an IO error/detach on the peer. */ |
1468 | drbd_rs_cancel_all(mdev); | ||
1469 | /* This should be done when we abort the resync. We definitely do not | ||
1470 | want to have this for connections going back and forth between | ||
1471 | Ahead/Behind and SyncSource/SyncTarget */ | ||
1472 | } | ||
1453 | 1473 | ||
1454 | if (side == C_SYNC_TARGET) { | 1474 | if (side == C_SYNC_TARGET) { |
1455 | /* Since application IO was locked out during C_WF_BITMAP_T and | 1475 | /* Since application IO was locked out during C_WF_BITMAP_T and |
@@ -1463,6 +1483,20 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) | |||
1463 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); | 1483 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); |
1464 | return; | 1484 | return; |
1465 | } | 1485 | } |
1486 | } else /* C_SYNC_SOURCE */ { | ||
1487 | r = drbd_khelper(mdev, "before-resync-source"); | ||
1488 | r = (r >> 8) & 0xff; | ||
1489 | if (r > 0) { | ||
1490 | if (r == 3) { | ||
1491 | dev_info(DEV, "before-resync-source handler returned %d, " | ||
1492 | "ignoring. Old userland tools?", r); | ||
1493 | } else { | ||
1494 | dev_info(DEV, "before-resync-source handler returned %d, " | ||
1495 | "dropping connection.\n", r); | ||
1496 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); | ||
1497 | return; | ||
1498 | } | ||
1499 | } | ||
1466 | } | 1500 | } |
1467 | 1501 | ||
1468 | drbd_state_lock(mdev); | 1502 | drbd_state_lock(mdev); |
@@ -1472,18 +1506,6 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) | |||
1472 | return; | 1506 | return; |
1473 | } | 1507 | } |
1474 | 1508 | ||
1475 | if (side == C_SYNC_TARGET) { | ||
1476 | mdev->bm_resync_fo = 0; | ||
1477 | } else /* side == C_SYNC_SOURCE */ { | ||
1478 | u64 uuid; | ||
1479 | |||
1480 | get_random_bytes(&uuid, sizeof(u64)); | ||
1481 | drbd_uuid_set(mdev, UI_BITMAP, uuid); | ||
1482 | drbd_send_sync_uuid(mdev, uuid); | ||
1483 | |||
1484 | D_ASSERT(mdev->state.disk == D_UP_TO_DATE); | ||
1485 | } | ||
1486 | |||
1487 | write_lock_irq(&global_state_lock); | 1509 | write_lock_irq(&global_state_lock); |
1488 | ns = mdev->state; | 1510 | ns = mdev->state; |
1489 | 1511 | ||
@@ -1521,13 +1543,24 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) | |||
1521 | _drbd_pause_after(mdev); | 1543 | _drbd_pause_after(mdev); |
1522 | } | 1544 | } |
1523 | write_unlock_irq(&global_state_lock); | 1545 | write_unlock_irq(&global_state_lock); |
1524 | put_ldev(mdev); | ||
1525 | 1546 | ||
1526 | if (r == SS_SUCCESS) { | 1547 | if (r == SS_SUCCESS) { |
1527 | dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n", | 1548 | dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n", |
1528 | drbd_conn_str(ns.conn), | 1549 | drbd_conn_str(ns.conn), |
1529 | (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10), | 1550 | (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10), |
1530 | (unsigned long) mdev->rs_total); | 1551 | (unsigned long) mdev->rs_total); |
1552 | if (side == C_SYNC_TARGET) | ||
1553 | mdev->bm_resync_fo = 0; | ||
1554 | |||
1555 | /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid | ||
1556 | * with w_send_oos, or the sync target will get confused as to | ||
1557 | * how much bits to resync. We cannot do that always, because for an | ||
1558 | * empty resync and protocol < 95, we need to do it here, as we call | ||
1559 | * drbd_resync_finished from here in that case. | ||
1560 | * We drbd_gen_and_send_sync_uuid here for protocol < 96, | ||
1561 | * and from after_state_ch otherwise. */ | ||
1562 | if (side == C_SYNC_SOURCE && mdev->agreed_pro_version < 96) | ||
1563 | drbd_gen_and_send_sync_uuid(mdev); | ||
1531 | 1564 | ||
1532 | if (mdev->agreed_pro_version < 95 && mdev->rs_total == 0) { | 1565 | if (mdev->agreed_pro_version < 95 && mdev->rs_total == 0) { |
1533 | /* This still has a race (about when exactly the peers | 1566 | /* This still has a race (about when exactly the peers |
@@ -1547,13 +1580,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) | |||
1547 | drbd_resync_finished(mdev); | 1580 | drbd_resync_finished(mdev); |
1548 | } | 1581 | } |
1549 | 1582 | ||
1550 | atomic_set(&mdev->rs_sect_in, 0); | 1583 | drbd_rs_controller_reset(mdev); |
1551 | atomic_set(&mdev->rs_sect_ev, 0); | ||
1552 | mdev->rs_in_flight = 0; | ||
1553 | mdev->rs_planed = 0; | ||
1554 | spin_lock(&mdev->peer_seq_lock); | ||
1555 | fifo_set(&mdev->rs_plan_s, 0); | ||
1556 | spin_unlock(&mdev->peer_seq_lock); | ||
1557 | /* ns.conn may already be != mdev->state.conn, | 1584 | /* ns.conn may already be != mdev->state.conn, |
1558 | * we may have been paused in between, or become paused until | 1585 | * we may have been paused in between, or become paused until |
1559 | * the timer triggers. | 1586 | * the timer triggers. |
@@ -1563,6 +1590,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) | |||
1563 | 1590 | ||
1564 | drbd_md_sync(mdev); | 1591 | drbd_md_sync(mdev); |
1565 | } | 1592 | } |
1593 | put_ldev(mdev); | ||
1566 | drbd_state_unlock(mdev); | 1594 | drbd_state_unlock(mdev); |
1567 | } | 1595 | } |
1568 | 1596 | ||
diff --git a/drivers/block/drbd/drbd_wrappers.h b/drivers/block/drbd/drbd_wrappers.h index 53586fa5ae1b..151f1a37478f 100644 --- a/drivers/block/drbd/drbd_wrappers.h +++ b/drivers/block/drbd/drbd_wrappers.h | |||
@@ -39,7 +39,7 @@ static inline void drbd_generic_make_request(struct drbd_conf *mdev, | |||
39 | return; | 39 | return; |
40 | } | 40 | } |
41 | 41 | ||
42 | if (FAULT_ACTIVE(mdev, fault_type)) | 42 | if (drbd_insert_fault(mdev, fault_type)) |
43 | bio_endio(bio, -EIO); | 43 | bio_endio(bio, -EIO); |
44 | else | 44 | else |
45 | generic_make_request(bio); | 45 | generic_make_request(bio); |