diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-27 23:02:07 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-27 23:02:07 -0400 |
| commit | 8d49a77568d1105ff3e64aec484dac059f54824e (patch) | |
| tree | 633ee954a3cea97bf136dec933388a2e419e5dac | |
| parent | 93567c43eb2a4771b9c590435928f9b3a428e568 (diff) | |
| parent | 1ddd5049545e0aa1a0ed19bca4d9c9c3ce1ac8a2 (diff) | |
Merge branch 'for-2.6.39/drivers' of git://git.kernel.dk/linux-2.6-block
* 'for-2.6.39/drivers' of git://git.kernel.dk/linux-2.6-block: (122 commits)
cciss: fix lost command issue
drbd: need include for bitops functions declarations
Revert "cciss: Add missing allocation in scsi_cmd_stack_setup and corresponding deallocation"
cciss: fix missed command status value CMD_UNABORTABLE
cciss: remove unnecessary casts
cciss: Mask off error bits of c->busaddr in cmd_special_free when calling pci_free_consistent
cciss: Inform controller we are using 32-bit tags.
cciss: hoist tag masking out of loop
cciss: Add missing allocation in scsi_cmd_stack_setup and corresponding deallocation
cciss: export resettable host attribute
drbd: drop code present under #ifdef which is relevant to 2.6.28 and below
drbd: Fixed handling of read errors on a 'VerifyS' node
drbd: Fixed handling of read errors on a 'VerifyT' node
drbd: Implemented real timeout checking for request processing time
drbd: Remove unused function atodb_endio()
drbd: improve log message if received sector offset exceeds local capacity
drbd: kill dead code
drbd: don't BUG_ON, if bio_add_page of a single page to an empty bio fails
drbd: Removed left over, now wrong comments
drbd: serialize admin requests for new verify run with pending bitmap io
...
| -rw-r--r-- | Documentation/ABI/testing/sysfs-bus-pci-devices-cciss | 12 | ||||
| -rw-r--r-- | drivers/block/cciss.c | 86 | ||||
| -rw-r--r-- | drivers/block/cciss.h | 1 | ||||
| -rw-r--r-- | drivers/block/cciss_cmd.h | 1 | ||||
| -rw-r--r-- | drivers/block/cciss_scsi.c | 13 | ||||
| -rw-r--r-- | drivers/block/drbd/drbd_actlog.c | 335 | ||||
| -rw-r--r-- | drivers/block/drbd/drbd_bitmap.c | 752 | ||||
| -rw-r--r-- | drivers/block/drbd/drbd_int.h | 270 | ||||
| -rw-r--r-- | drivers/block/drbd/drbd_main.c | 673 | ||||
| -rw-r--r-- | drivers/block/drbd/drbd_nl.c | 183 | ||||
| -rw-r--r-- | drivers/block/drbd/drbd_proc.c | 114 | ||||
| -rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 608 | ||||
| -rw-r--r-- | drivers/block/drbd/drbd_req.c | 169 | ||||
| -rw-r--r-- | drivers/block/drbd/drbd_req.h | 36 | ||||
| -rw-r--r-- | drivers/block/drbd/drbd_strings.c | 6 | ||||
| -rw-r--r-- | drivers/block/drbd/drbd_worker.c | 360 | ||||
| -rw-r--r-- | drivers/block/drbd/drbd_wrappers.h | 2 | ||||
| -rw-r--r-- | include/linux/drbd.h | 23 | ||||
| -rw-r--r-- | include/linux/drbd_limits.h | 12 | ||||
| -rw-r--r-- | include/linux/drbd_nl.h | 13 | ||||
| -rw-r--r-- | include/linux/drbd_tag_magic.h | 1 |
21 files changed, 2267 insertions, 1403 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss index 4f29e5f1ebfa..f5bb0a3bb8c0 100644 --- a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss +++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss | |||
| @@ -59,3 +59,15 @@ Kernel Version: 2.6.31 | |||
| 59 | Contact: iss_storagedev@hp.com | 59 | Contact: iss_storagedev@hp.com |
| 60 | Description: Displays the usage count (number of opens) of logical drive Y | 60 | Description: Displays the usage count (number of opens) of logical drive Y |
| 61 | of controller X. | 61 | of controller X. |
| 62 | |||
| 63 | Where: /sys/bus/pci/devices/<dev>/ccissX/resettable | ||
| 64 | Date: February 2011 | ||
| 65 | Kernel Version: 2.6.38 | ||
| 66 | Contact: iss_storagedev@hp.com | ||
| 67 | Description: Value of 1 indicates the controller can honor the reset_devices | ||
| 68 | kernel parameter. Value of 0 indicates reset_devices cannot be | ||
| 69 | honored. This is to allow, for example, kexec tools to be able | ||
| 70 | to warn the user if they designate an unresettable device as | ||
| 71 | a dump device, as kdump requires resetting the device in order | ||
| 72 | to work reliably. | ||
| 73 | |||
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 35658f445fca..9bf13988f1a2 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c | |||
| @@ -193,7 +193,7 @@ static int __devinit cciss_find_cfg_addrs(struct pci_dev *pdev, | |||
| 193 | u64 *cfg_offset); | 193 | u64 *cfg_offset); |
| 194 | static int __devinit cciss_pci_find_memory_BAR(struct pci_dev *pdev, | 194 | static int __devinit cciss_pci_find_memory_BAR(struct pci_dev *pdev, |
| 195 | unsigned long *memory_bar); | 195 | unsigned long *memory_bar); |
| 196 | 196 | static inline u32 cciss_tag_discard_error_bits(ctlr_info_t *h, u32 tag); | |
| 197 | 197 | ||
| 198 | /* performant mode helper functions */ | 198 | /* performant mode helper functions */ |
| 199 | static void calc_bucket_map(int *bucket, int num_buckets, int nsgs, | 199 | static void calc_bucket_map(int *bucket, int num_buckets, int nsgs, |
| @@ -231,7 +231,7 @@ static const struct block_device_operations cciss_fops = { | |||
| 231 | */ | 231 | */ |
| 232 | static void set_performant_mode(ctlr_info_t *h, CommandList_struct *c) | 232 | static void set_performant_mode(ctlr_info_t *h, CommandList_struct *c) |
| 233 | { | 233 | { |
| 234 | if (likely(h->transMethod == CFGTBL_Trans_Performant)) | 234 | if (likely(h->transMethod & CFGTBL_Trans_Performant)) |
| 235 | c->busaddr |= 1 | (h->blockFetchTable[c->Header.SGList] << 1); | 235 | c->busaddr |= 1 | (h->blockFetchTable[c->Header.SGList] << 1); |
| 236 | } | 236 | } |
| 237 | 237 | ||
| @@ -556,6 +556,44 @@ static void __devinit cciss_procinit(ctlr_info_t *h) | |||
| 556 | #define to_hba(n) container_of(n, struct ctlr_info, dev) | 556 | #define to_hba(n) container_of(n, struct ctlr_info, dev) |
| 557 | #define to_drv(n) container_of(n, drive_info_struct, dev) | 557 | #define to_drv(n) container_of(n, drive_info_struct, dev) |
| 558 | 558 | ||
| 559 | /* List of controllers which cannot be reset on kexec with reset_devices */ | ||
| 560 | static u32 unresettable_controller[] = { | ||
| 561 | 0x324a103C, /* Smart Array P712m */ | ||
| 562 | 0x324b103C, /* SmartArray P711m */ | ||
| 563 | 0x3223103C, /* Smart Array P800 */ | ||
| 564 | 0x3234103C, /* Smart Array P400 */ | ||
| 565 | 0x3235103C, /* Smart Array P400i */ | ||
| 566 | 0x3211103C, /* Smart Array E200i */ | ||
| 567 | 0x3212103C, /* Smart Array E200 */ | ||
| 568 | 0x3213103C, /* Smart Array E200i */ | ||
| 569 | 0x3214103C, /* Smart Array E200i */ | ||
| 570 | 0x3215103C, /* Smart Array E200i */ | ||
| 571 | 0x3237103C, /* Smart Array E500 */ | ||
| 572 | 0x323D103C, /* Smart Array P700m */ | ||
| 573 | 0x409C0E11, /* Smart Array 6400 */ | ||
| 574 | 0x409D0E11, /* Smart Array 6400 EM */ | ||
| 575 | }; | ||
| 576 | |||
| 577 | static int ctlr_is_resettable(struct ctlr_info *h) | ||
| 578 | { | ||
| 579 | int i; | ||
| 580 | |||
| 581 | for (i = 0; i < ARRAY_SIZE(unresettable_controller); i++) | ||
| 582 | if (unresettable_controller[i] == h->board_id) | ||
| 583 | return 0; | ||
| 584 | return 1; | ||
| 585 | } | ||
| 586 | |||
| 587 | static ssize_t host_show_resettable(struct device *dev, | ||
| 588 | struct device_attribute *attr, | ||
| 589 | char *buf) | ||
| 590 | { | ||
| 591 | struct ctlr_info *h = to_hba(dev); | ||
| 592 | |||
| 593 | return snprintf(buf, 20, "%d\n", ctlr_is_resettable(h)); | ||
| 594 | } | ||
| 595 | static DEVICE_ATTR(resettable, S_IRUGO, host_show_resettable, NULL); | ||
| 596 | |||
| 559 | static ssize_t host_store_rescan(struct device *dev, | 597 | static ssize_t host_store_rescan(struct device *dev, |
| 560 | struct device_attribute *attr, | 598 | struct device_attribute *attr, |
| 561 | const char *buf, size_t count) | 599 | const char *buf, size_t count) |
| @@ -741,6 +779,7 @@ static DEVICE_ATTR(usage_count, S_IRUGO, cciss_show_usage_count, NULL); | |||
| 741 | 779 | ||
| 742 | static struct attribute *cciss_host_attrs[] = { | 780 | static struct attribute *cciss_host_attrs[] = { |
| 743 | &dev_attr_rescan.attr, | 781 | &dev_attr_rescan.attr, |
| 782 | &dev_attr_resettable.attr, | ||
| 744 | NULL | 783 | NULL |
| 745 | }; | 784 | }; |
| 746 | 785 | ||
| @@ -973,8 +1012,8 @@ static void cmd_special_free(ctlr_info_t *h, CommandList_struct *c) | |||
| 973 | temp64.val32.upper = c->ErrDesc.Addr.upper; | 1012 | temp64.val32.upper = c->ErrDesc.Addr.upper; |
| 974 | pci_free_consistent(h->pdev, sizeof(ErrorInfo_struct), | 1013 | pci_free_consistent(h->pdev, sizeof(ErrorInfo_struct), |
| 975 | c->err_info, (dma_addr_t) temp64.val); | 1014 | c->err_info, (dma_addr_t) temp64.val); |
| 976 | pci_free_consistent(h->pdev, sizeof(CommandList_struct), | 1015 | pci_free_consistent(h->pdev, sizeof(CommandList_struct), c, |
| 977 | c, (dma_addr_t) c->busaddr); | 1016 | (dma_addr_t) cciss_tag_discard_error_bits(h, (u32) c->busaddr)); |
| 978 | } | 1017 | } |
| 979 | 1018 | ||
| 980 | static inline ctlr_info_t *get_host(struct gendisk *disk) | 1019 | static inline ctlr_info_t *get_host(struct gendisk *disk) |
| @@ -1490,8 +1529,7 @@ static int cciss_bigpassthru(ctlr_info_t *h, void __user *argp) | |||
| 1490 | return -EINVAL; | 1529 | return -EINVAL; |
| 1491 | if (!capable(CAP_SYS_RAWIO)) | 1530 | if (!capable(CAP_SYS_RAWIO)) |
| 1492 | return -EPERM; | 1531 | return -EPERM; |
| 1493 | ioc = (BIG_IOCTL_Command_struct *) | 1532 | ioc = kmalloc(sizeof(*ioc), GFP_KERNEL); |
| 1494 | kmalloc(sizeof(*ioc), GFP_KERNEL); | ||
| 1495 | if (!ioc) { | 1533 | if (!ioc) { |
| 1496 | status = -ENOMEM; | 1534 | status = -ENOMEM; |
| 1497 | goto cleanup1; | 1535 | goto cleanup1; |
| @@ -2653,6 +2691,10 @@ static int process_sendcmd_error(ctlr_info_t *h, CommandList_struct *c) | |||
| 2653 | c->Request.CDB[0]); | 2691 | c->Request.CDB[0]); |
| 2654 | return_status = IO_NEEDS_RETRY; | 2692 | return_status = IO_NEEDS_RETRY; |
| 2655 | break; | 2693 | break; |
| 2694 | case CMD_UNABORTABLE: | ||
| 2695 | dev_warn(&h->pdev->dev, "cmd unabortable\n"); | ||
| 2696 | return_status = IO_ERROR; | ||
| 2697 | break; | ||
| 2656 | default: | 2698 | default: |
| 2657 | dev_warn(&h->pdev->dev, "cmd 0x%02x returned " | 2699 | dev_warn(&h->pdev->dev, "cmd 0x%02x returned " |
| 2658 | "unknown status %x\n", c->Request.CDB[0], | 2700 | "unknown status %x\n", c->Request.CDB[0], |
| @@ -3103,6 +3145,13 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, | |||
| 3103 | (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ? | 3145 | (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ? |
| 3104 | DID_PASSTHROUGH : DID_ERROR); | 3146 | DID_PASSTHROUGH : DID_ERROR); |
| 3105 | break; | 3147 | break; |
| 3148 | case CMD_UNABORTABLE: | ||
| 3149 | dev_warn(&h->pdev->dev, "cmd %p unabortable\n", cmd); | ||
| 3150 | rq->errors = make_status_bytes(SAM_STAT_GOOD, | ||
| 3151 | cmd->err_info->CommandStatus, DRIVER_OK, | ||
| 3152 | cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC ? | ||
| 3153 | DID_PASSTHROUGH : DID_ERROR); | ||
| 3154 | break; | ||
| 3106 | default: | 3155 | default: |
| 3107 | dev_warn(&h->pdev->dev, "cmd %p returned " | 3156 | dev_warn(&h->pdev->dev, "cmd %p returned " |
| 3108 | "unknown status %x\n", cmd, | 3157 | "unknown status %x\n", cmd, |
| @@ -3136,10 +3185,13 @@ static inline u32 cciss_tag_to_index(u32 tag) | |||
| 3136 | return tag >> DIRECT_LOOKUP_SHIFT; | 3185 | return tag >> DIRECT_LOOKUP_SHIFT; |
| 3137 | } | 3186 | } |
| 3138 | 3187 | ||
| 3139 | static inline u32 cciss_tag_discard_error_bits(u32 tag) | 3188 | static inline u32 cciss_tag_discard_error_bits(ctlr_info_t *h, u32 tag) |
| 3140 | { | 3189 | { |
| 3141 | #define CCISS_ERROR_BITS 0x03 | 3190 | #define CCISS_PERF_ERROR_BITS ((1 << DIRECT_LOOKUP_SHIFT) - 1) |
| 3142 | return tag & ~CCISS_ERROR_BITS; | 3191 | #define CCISS_SIMPLE_ERROR_BITS 0x03 |
| 3192 | if (likely(h->transMethod & CFGTBL_Trans_Performant)) | ||
| 3193 | return tag & ~CCISS_PERF_ERROR_BITS; | ||
| 3194 | return tag & ~CCISS_SIMPLE_ERROR_BITS; | ||
| 3143 | } | 3195 | } |
| 3144 | 3196 | ||
| 3145 | static inline void cciss_mark_tag_indexed(u32 *tag) | 3197 | static inline void cciss_mark_tag_indexed(u32 *tag) |
| @@ -3359,7 +3411,7 @@ static inline u32 next_command(ctlr_info_t *h) | |||
| 3359 | { | 3411 | { |
| 3360 | u32 a; | 3412 | u32 a; |
| 3361 | 3413 | ||
| 3362 | if (unlikely(h->transMethod != CFGTBL_Trans_Performant)) | 3414 | if (unlikely(!(h->transMethod & CFGTBL_Trans_Performant))) |
| 3363 | return h->access.command_completed(h); | 3415 | return h->access.command_completed(h); |
| 3364 | 3416 | ||
| 3365 | if ((*(h->reply_pool_head) & 1) == (h->reply_pool_wraparound)) { | 3417 | if ((*(h->reply_pool_head) & 1) == (h->reply_pool_wraparound)) { |
| @@ -3394,14 +3446,12 @@ static inline u32 process_indexed_cmd(ctlr_info_t *h, u32 raw_tag) | |||
| 3394 | /* process completion of a non-indexed command */ | 3446 | /* process completion of a non-indexed command */ |
| 3395 | static inline u32 process_nonindexed_cmd(ctlr_info_t *h, u32 raw_tag) | 3447 | static inline u32 process_nonindexed_cmd(ctlr_info_t *h, u32 raw_tag) |
| 3396 | { | 3448 | { |
| 3397 | u32 tag; | ||
| 3398 | CommandList_struct *c = NULL; | 3449 | CommandList_struct *c = NULL; |
| 3399 | __u32 busaddr_masked, tag_masked; | 3450 | __u32 busaddr_masked, tag_masked; |
| 3400 | 3451 | ||
| 3401 | tag = cciss_tag_discard_error_bits(raw_tag); | 3452 | tag_masked = cciss_tag_discard_error_bits(h, raw_tag); |
| 3402 | list_for_each_entry(c, &h->cmpQ, list) { | 3453 | list_for_each_entry(c, &h->cmpQ, list) { |
| 3403 | busaddr_masked = cciss_tag_discard_error_bits(c->busaddr); | 3454 | busaddr_masked = cciss_tag_discard_error_bits(h, c->busaddr); |
| 3404 | tag_masked = cciss_tag_discard_error_bits(tag); | ||
| 3405 | if (busaddr_masked == tag_masked) { | 3455 | if (busaddr_masked == tag_masked) { |
| 3406 | finish_cmd(h, c, raw_tag); | 3456 | finish_cmd(h, c, raw_tag); |
| 3407 | return next_command(h); | 3457 | return next_command(h); |
| @@ -3753,7 +3803,8 @@ static void __devinit cciss_wait_for_mode_change_ack(ctlr_info_t *h) | |||
| 3753 | } | 3803 | } |
| 3754 | } | 3804 | } |
| 3755 | 3805 | ||
| 3756 | static __devinit void cciss_enter_performant_mode(ctlr_info_t *h) | 3806 | static __devinit void cciss_enter_performant_mode(ctlr_info_t *h, |
| 3807 | u32 use_short_tags) | ||
| 3757 | { | 3808 | { |
| 3758 | /* This is a bit complicated. There are 8 registers on | 3809 | /* This is a bit complicated. There are 8 registers on |
| 3759 | * the controller which we write to to tell it 8 different | 3810 | * the controller which we write to to tell it 8 different |
| @@ -3808,7 +3859,7 @@ static __devinit void cciss_enter_performant_mode(ctlr_info_t *h) | |||
| 3808 | writel(0, &h->transtable->RepQCtrAddrHigh32); | 3859 | writel(0, &h->transtable->RepQCtrAddrHigh32); |
| 3809 | writel(h->reply_pool_dhandle, &h->transtable->RepQAddr0Low32); | 3860 | writel(h->reply_pool_dhandle, &h->transtable->RepQAddr0Low32); |
| 3810 | writel(0, &h->transtable->RepQAddr0High32); | 3861 | writel(0, &h->transtable->RepQAddr0High32); |
| 3811 | writel(CFGTBL_Trans_Performant, | 3862 | writel(CFGTBL_Trans_Performant | use_short_tags, |
| 3812 | &(h->cfgtable->HostWrite.TransportRequest)); | 3863 | &(h->cfgtable->HostWrite.TransportRequest)); |
| 3813 | 3864 | ||
| 3814 | writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL); | 3865 | writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL); |
| @@ -3855,7 +3906,8 @@ static void __devinit cciss_put_controller_into_performant_mode(ctlr_info_t *h) | |||
| 3855 | if ((h->reply_pool == NULL) || (h->blockFetchTable == NULL)) | 3906 | if ((h->reply_pool == NULL) || (h->blockFetchTable == NULL)) |
| 3856 | goto clean_up; | 3907 | goto clean_up; |
| 3857 | 3908 | ||
| 3858 | cciss_enter_performant_mode(h); | 3909 | cciss_enter_performant_mode(h, |
| 3910 | trans_support & CFGTBL_Trans_use_short_tags); | ||
| 3859 | 3911 | ||
| 3860 | /* Change the access methods to the performant access methods */ | 3912 | /* Change the access methods to the performant access methods */ |
| 3861 | h->access = SA5_performant_access; | 3913 | h->access = SA5_performant_access; |
diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h index 579f74918493..554bbd907d14 100644 --- a/drivers/block/cciss.h +++ b/drivers/block/cciss.h | |||
| @@ -222,6 +222,7 @@ static void SA5_submit_command( ctlr_info_t *h, CommandList_struct *c) | |||
| 222 | h->ctlr, c->busaddr); | 222 | h->ctlr, c->busaddr); |
| 223 | #endif /* CCISS_DEBUG */ | 223 | #endif /* CCISS_DEBUG */ |
| 224 | writel(c->busaddr, h->vaddr + SA5_REQUEST_PORT_OFFSET); | 224 | writel(c->busaddr, h->vaddr + SA5_REQUEST_PORT_OFFSET); |
| 225 | readl(h->vaddr + SA5_REQUEST_PORT_OFFSET); | ||
| 225 | h->commands_outstanding++; | 226 | h->commands_outstanding++; |
| 226 | if ( h->commands_outstanding > h->max_outstanding) | 227 | if ( h->commands_outstanding > h->max_outstanding) |
| 227 | h->max_outstanding = h->commands_outstanding; | 228 | h->max_outstanding = h->commands_outstanding; |
diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h index 35463d2f0ee7..cd441bef031f 100644 --- a/drivers/block/cciss_cmd.h +++ b/drivers/block/cciss_cmd.h | |||
| @@ -56,6 +56,7 @@ | |||
| 56 | 56 | ||
| 57 | #define CFGTBL_Trans_Simple 0x00000002l | 57 | #define CFGTBL_Trans_Simple 0x00000002l |
| 58 | #define CFGTBL_Trans_Performant 0x00000004l | 58 | #define CFGTBL_Trans_Performant 0x00000004l |
| 59 | #define CFGTBL_Trans_use_short_tags 0x20000000l | ||
| 59 | 60 | ||
| 60 | #define CFGTBL_BusType_Ultra2 0x00000001l | 61 | #define CFGTBL_BusType_Ultra2 0x00000001l |
| 61 | #define CFGTBL_BusType_Ultra3 0x00000002l | 62 | #define CFGTBL_BusType_Ultra3 0x00000002l |
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c index 727d0225b7d0..df793803f5ae 100644 --- a/drivers/block/cciss_scsi.c +++ b/drivers/block/cciss_scsi.c | |||
| @@ -824,13 +824,18 @@ static void complete_scsi_command(CommandList_struct *c, int timeout, | |||
| 824 | break; | 824 | break; |
| 825 | case CMD_UNSOLICITED_ABORT: | 825 | case CMD_UNSOLICITED_ABORT: |
| 826 | cmd->result = DID_ABORT << 16; | 826 | cmd->result = DID_ABORT << 16; |
| 827 | dev_warn(&h->pdev->dev, "%p aborted do to an " | 827 | dev_warn(&h->pdev->dev, "%p aborted due to an " |
| 828 | "unsolicited abort\n", c); | 828 | "unsolicited abort\n", c); |
| 829 | break; | 829 | break; |
| 830 | case CMD_TIMEOUT: | 830 | case CMD_TIMEOUT: |
| 831 | cmd->result = DID_TIME_OUT << 16; | 831 | cmd->result = DID_TIME_OUT << 16; |
| 832 | dev_warn(&h->pdev->dev, "%p timedout\n", c); | 832 | dev_warn(&h->pdev->dev, "%p timedout\n", c); |
| 833 | break; | 833 | break; |
| 834 | case CMD_UNABORTABLE: | ||
| 835 | cmd->result = DID_ERROR << 16; | ||
| 836 | dev_warn(&h->pdev->dev, "c %p command " | ||
| 837 | "unabortable\n", c); | ||
| 838 | break; | ||
| 834 | default: | 839 | default: |
| 835 | cmd->result = DID_ERROR << 16; | 840 | cmd->result = DID_ERROR << 16; |
| 836 | dev_warn(&h->pdev->dev, | 841 | dev_warn(&h->pdev->dev, |
| @@ -1007,11 +1012,15 @@ cciss_scsi_interpret_error(ctlr_info_t *h, CommandList_struct *c) | |||
| 1007 | break; | 1012 | break; |
| 1008 | case CMD_UNSOLICITED_ABORT: | 1013 | case CMD_UNSOLICITED_ABORT: |
| 1009 | dev_warn(&h->pdev->dev, | 1014 | dev_warn(&h->pdev->dev, |
| 1010 | "%p aborted do to an unsolicited abort\n", c); | 1015 | "%p aborted due to an unsolicited abort\n", c); |
| 1011 | break; | 1016 | break; |
| 1012 | case CMD_TIMEOUT: | 1017 | case CMD_TIMEOUT: |
| 1013 | dev_warn(&h->pdev->dev, "%p timedout\n", c); | 1018 | dev_warn(&h->pdev->dev, "%p timedout\n", c); |
| 1014 | break; | 1019 | break; |
| 1020 | case CMD_UNABORTABLE: | ||
| 1021 | dev_warn(&h->pdev->dev, | ||
| 1022 | "%p unabortable\n", c); | ||
| 1023 | break; | ||
| 1015 | default: | 1024 | default: |
| 1016 | dev_warn(&h->pdev->dev, | 1025 | dev_warn(&h->pdev->dev, |
| 1017 | "%p returned unknown status %x\n", | 1026 | "%p returned unknown status %x\n", |
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index aca302492ff2..2a1642bc451d 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c | |||
| @@ -92,7 +92,7 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, | |||
| 92 | bio->bi_end_io = drbd_md_io_complete; | 92 | bio->bi_end_io = drbd_md_io_complete; |
| 93 | bio->bi_rw = rw; | 93 | bio->bi_rw = rw; |
| 94 | 94 | ||
| 95 | if (FAULT_ACTIVE(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) | 95 | if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) |
| 96 | bio_endio(bio, -EIO); | 96 | bio_endio(bio, -EIO); |
| 97 | else | 97 | else |
| 98 | submit_bio(rw, bio); | 98 | submit_bio(rw, bio); |
| @@ -176,13 +176,17 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr) | |||
| 176 | struct lc_element *al_ext; | 176 | struct lc_element *al_ext; |
| 177 | struct lc_element *tmp; | 177 | struct lc_element *tmp; |
| 178 | unsigned long al_flags = 0; | 178 | unsigned long al_flags = 0; |
| 179 | int wake; | ||
| 179 | 180 | ||
| 180 | spin_lock_irq(&mdev->al_lock); | 181 | spin_lock_irq(&mdev->al_lock); |
| 181 | tmp = lc_find(mdev->resync, enr/AL_EXT_PER_BM_SECT); | 182 | tmp = lc_find(mdev->resync, enr/AL_EXT_PER_BM_SECT); |
| 182 | if (unlikely(tmp != NULL)) { | 183 | if (unlikely(tmp != NULL)) { |
| 183 | struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); | 184 | struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); |
| 184 | if (test_bit(BME_NO_WRITES, &bm_ext->flags)) { | 185 | if (test_bit(BME_NO_WRITES, &bm_ext->flags)) { |
| 186 | wake = !test_and_set_bit(BME_PRIORITY, &bm_ext->flags); | ||
| 185 | spin_unlock_irq(&mdev->al_lock); | 187 | spin_unlock_irq(&mdev->al_lock); |
| 188 | if (wake) | ||
| 189 | wake_up(&mdev->al_wait); | ||
| 186 | return NULL; | 190 | return NULL; |
| 187 | } | 191 | } |
| 188 | } | 192 | } |
| @@ -258,6 +262,33 @@ void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector) | |||
| 258 | spin_unlock_irqrestore(&mdev->al_lock, flags); | 262 | spin_unlock_irqrestore(&mdev->al_lock, flags); |
| 259 | } | 263 | } |
| 260 | 264 | ||
| 265 | #if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT) | ||
| 266 | /* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT | ||
| 267 | * are still coupled, or assume too much about their relation. | ||
| 268 | * Code below will not work if this is violated. | ||
| 269 | * Will be cleaned up with some followup patch. | ||
| 270 | */ | ||
| 271 | # error FIXME | ||
| 272 | #endif | ||
| 273 | |||
| 274 | static unsigned int al_extent_to_bm_page(unsigned int al_enr) | ||
| 275 | { | ||
| 276 | return al_enr >> | ||
| 277 | /* bit to page */ | ||
| 278 | ((PAGE_SHIFT + 3) - | ||
| 279 | /* al extent number to bit */ | ||
| 280 | (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)); | ||
| 281 | } | ||
| 282 | |||
| 283 | static unsigned int rs_extent_to_bm_page(unsigned int rs_enr) | ||
| 284 | { | ||
| 285 | return rs_enr >> | ||
| 286 | /* bit to page */ | ||
| 287 | ((PAGE_SHIFT + 3) - | ||
| 288 | /* al extent number to bit */ | ||
| 289 | (BM_EXT_SHIFT - BM_BLOCK_SHIFT)); | ||
| 290 | } | ||
| 291 | |||
| 261 | int | 292 | int |
| 262 | w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) | 293 | w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) |
| 263 | { | 294 | { |
| @@ -285,7 +316,7 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) | |||
| 285 | * For now, we must not write the transaction, | 316 | * For now, we must not write the transaction, |
| 286 | * if we cannot write out the bitmap of the evicted extent. */ | 317 | * if we cannot write out the bitmap of the evicted extent. */ |
| 287 | if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE) | 318 | if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE) |
| 288 | drbd_bm_write_sect(mdev, evicted/AL_EXT_PER_BM_SECT); | 319 | drbd_bm_write_page(mdev, al_extent_to_bm_page(evicted)); |
| 289 | 320 | ||
| 290 | /* The bitmap write may have failed, causing a state change. */ | 321 | /* The bitmap write may have failed, causing a state change. */ |
| 291 | if (mdev->state.disk < D_INCONSISTENT) { | 322 | if (mdev->state.disk < D_INCONSISTENT) { |
| @@ -334,7 +365,7 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) | |||
| 334 | + mdev->ldev->md.al_offset + mdev->al_tr_pos; | 365 | + mdev->ldev->md.al_offset + mdev->al_tr_pos; |
| 335 | 366 | ||
| 336 | if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) | 367 | if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) |
| 337 | drbd_chk_io_error(mdev, 1, TRUE); | 368 | drbd_chk_io_error(mdev, 1, true); |
| 338 | 369 | ||
| 339 | if (++mdev->al_tr_pos > | 370 | if (++mdev->al_tr_pos > |
| 340 | div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT)) | 371 | div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT)) |
| @@ -511,225 +542,6 @@ cancel: | |||
| 511 | return 1; | 542 | return 1; |
| 512 | } | 543 | } |
| 513 | 544 | ||
| 514 | static void atodb_endio(struct bio *bio, int error) | ||
| 515 | { | ||
| 516 | struct drbd_atodb_wait *wc = bio->bi_private; | ||
| 517 | struct drbd_conf *mdev = wc->mdev; | ||
| 518 | struct page *page; | ||
| 519 | int uptodate = bio_flagged(bio, BIO_UPTODATE); | ||
| 520 | |||
| 521 | /* strange behavior of some lower level drivers... | ||
| 522 | * fail the request by clearing the uptodate flag, | ||
| 523 | * but do not return any error?! */ | ||
| 524 | if (!error && !uptodate) | ||
| 525 | error = -EIO; | ||
| 526 | |||
| 527 | drbd_chk_io_error(mdev, error, TRUE); | ||
| 528 | if (error && wc->error == 0) | ||
| 529 | wc->error = error; | ||
| 530 | |||
| 531 | if (atomic_dec_and_test(&wc->count)) | ||
| 532 | complete(&wc->io_done); | ||
| 533 | |||
| 534 | page = bio->bi_io_vec[0].bv_page; | ||
| 535 | put_page(page); | ||
| 536 | bio_put(bio); | ||
| 537 | mdev->bm_writ_cnt++; | ||
| 538 | put_ldev(mdev); | ||
| 539 | } | ||
| 540 | |||
| 541 | /* sector to word */ | ||
| 542 | #define S2W(s) ((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL)) | ||
| 543 | |||
| 544 | /* activity log to on disk bitmap -- prepare bio unless that sector | ||
| 545 | * is already covered by previously prepared bios */ | ||
| 546 | static int atodb_prepare_unless_covered(struct drbd_conf *mdev, | ||
| 547 | struct bio **bios, | ||
| 548 | unsigned int enr, | ||
| 549 | struct drbd_atodb_wait *wc) __must_hold(local) | ||
| 550 | { | ||
| 551 | struct bio *bio; | ||
| 552 | struct page *page; | ||
| 553 | sector_t on_disk_sector; | ||
| 554 | unsigned int page_offset = PAGE_SIZE; | ||
| 555 | int offset; | ||
| 556 | int i = 0; | ||
| 557 | int err = -ENOMEM; | ||
| 558 | |||
| 559 | /* We always write aligned, full 4k blocks, | ||
| 560 | * so we can ignore the logical_block_size (for now) */ | ||
| 561 | enr &= ~7U; | ||
| 562 | on_disk_sector = enr + mdev->ldev->md.md_offset | ||
| 563 | + mdev->ldev->md.bm_offset; | ||
| 564 | |||
| 565 | D_ASSERT(!(on_disk_sector & 7U)); | ||
| 566 | |||
| 567 | /* Check if that enr is already covered by an already created bio. | ||
| 568 | * Caution, bios[] is not NULL terminated, | ||
| 569 | * but only initialized to all NULL. | ||
| 570 | * For completely scattered activity log, | ||
| 571 | * the last invocation iterates over all bios, | ||
| 572 | * and finds the last NULL entry. | ||
| 573 | */ | ||
| 574 | while ((bio = bios[i])) { | ||
| 575 | if (bio->bi_sector == on_disk_sector) | ||
| 576 | return 0; | ||
| 577 | i++; | ||
| 578 | } | ||
| 579 | /* bios[i] == NULL, the next not yet used slot */ | ||
| 580 | |||
| 581 | /* GFP_KERNEL, we are not in the write-out path */ | ||
| 582 | bio = bio_alloc(GFP_KERNEL, 1); | ||
| 583 | if (bio == NULL) | ||
| 584 | return -ENOMEM; | ||
| 585 | |||
| 586 | if (i > 0) { | ||
| 587 | const struct bio_vec *prev_bv = bios[i-1]->bi_io_vec; | ||
| 588 | page_offset = prev_bv->bv_offset + prev_bv->bv_len; | ||
| 589 | page = prev_bv->bv_page; | ||
| 590 | } | ||
| 591 | if (page_offset == PAGE_SIZE) { | ||
| 592 | page = alloc_page(__GFP_HIGHMEM); | ||
| 593 | if (page == NULL) | ||
| 594 | goto out_bio_put; | ||
| 595 | page_offset = 0; | ||
| 596 | } else { | ||
| 597 | get_page(page); | ||
| 598 | } | ||
| 599 | |||
| 600 | offset = S2W(enr); | ||
| 601 | drbd_bm_get_lel(mdev, offset, | ||
| 602 | min_t(size_t, S2W(8), drbd_bm_words(mdev) - offset), | ||
| 603 | kmap(page) + page_offset); | ||
| 604 | kunmap(page); | ||
| 605 | |||
| 606 | bio->bi_private = wc; | ||
| 607 | bio->bi_end_io = atodb_endio; | ||
| 608 | bio->bi_bdev = mdev->ldev->md_bdev; | ||
| 609 | bio->bi_sector = on_disk_sector; | ||
| 610 | |||
| 611 | if (bio_add_page(bio, page, 4096, page_offset) != 4096) | ||
| 612 | goto out_put_page; | ||
| 613 | |||
| 614 | atomic_inc(&wc->count); | ||
| 615 | /* we already know that we may do this... | ||
| 616 | * get_ldev_if_state(mdev,D_ATTACHING); | ||
| 617 | * just get the extra reference, so that the local_cnt reflects | ||
| 618 | * the number of pending IO requests DRBD at its backing device. | ||
| 619 | */ | ||
| 620 | atomic_inc(&mdev->local_cnt); | ||
| 621 | |||
| 622 | bios[i] = bio; | ||
| 623 | |||
| 624 | return 0; | ||
| 625 | |||
| 626 | out_put_page: | ||
| 627 | err = -EINVAL; | ||
| 628 | put_page(page); | ||
| 629 | out_bio_put: | ||
| 630 | bio_put(bio); | ||
| 631 | return err; | ||
| 632 | } | ||
| 633 | |||
| 634 | /** | ||
| 635 | * drbd_al_to_on_disk_bm() - * Writes bitmap parts covered by active AL extents | ||
| 636 | * @mdev: DRBD device. | ||
| 637 | * | ||
| 638 | * Called when we detach (unconfigure) local storage, | ||
| 639 | * or when we go from R_PRIMARY to R_SECONDARY role. | ||
| 640 | */ | ||
| 641 | void drbd_al_to_on_disk_bm(struct drbd_conf *mdev) | ||
| 642 | { | ||
| 643 | int i, nr_elements; | ||
| 644 | unsigned int enr; | ||
| 645 | struct bio **bios; | ||
| 646 | struct drbd_atodb_wait wc; | ||
| 647 | |||
| 648 | ERR_IF (!get_ldev_if_state(mdev, D_ATTACHING)) | ||
| 649 | return; /* sorry, I don't have any act_log etc... */ | ||
| 650 | |||
| 651 | wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); | ||
| 652 | |||
| 653 | nr_elements = mdev->act_log->nr_elements; | ||
| 654 | |||
| 655 | /* GFP_KERNEL, we are not in anyone's write-out path */ | ||
| 656 | bios = kzalloc(sizeof(struct bio *) * nr_elements, GFP_KERNEL); | ||
| 657 | if (!bios) | ||
| 658 | goto submit_one_by_one; | ||
| 659 | |||
| 660 | atomic_set(&wc.count, 0); | ||
| 661 | init_completion(&wc.io_done); | ||
| 662 | wc.mdev = mdev; | ||
| 663 | wc.error = 0; | ||
| 664 | |||
| 665 | for (i = 0; i < nr_elements; i++) { | ||
| 666 | enr = lc_element_by_index(mdev->act_log, i)->lc_number; | ||
| 667 | if (enr == LC_FREE) | ||
| 668 | continue; | ||
| 669 | /* next statement also does atomic_inc wc.count and local_cnt */ | ||
| 670 | if (atodb_prepare_unless_covered(mdev, bios, | ||
| 671 | enr/AL_EXT_PER_BM_SECT, | ||
| 672 | &wc)) | ||
| 673 | goto free_bios_submit_one_by_one; | ||
| 674 | } | ||
| 675 | |||
| 676 | /* unnecessary optimization? */ | ||
| 677 | lc_unlock(mdev->act_log); | ||
| 678 | wake_up(&mdev->al_wait); | ||
| 679 | |||
| 680 | /* all prepared, submit them */ | ||
| 681 | for (i = 0; i < nr_elements; i++) { | ||
| 682 | if (bios[i] == NULL) | ||
| 683 | break; | ||
| 684 | if (FAULT_ACTIVE(mdev, DRBD_FAULT_MD_WR)) { | ||
| 685 | bios[i]->bi_rw = WRITE; | ||
| 686 | bio_endio(bios[i], -EIO); | ||
| 687 | } else { | ||
| 688 | submit_bio(WRITE, bios[i]); | ||
| 689 | } | ||
| 690 | } | ||
| 691 | |||
| 692 | /* always (try to) flush bitmap to stable storage */ | ||
| 693 | drbd_md_flush(mdev); | ||
| 694 | |||
| 695 | /* In case we did not submit a single IO do not wait for | ||
| 696 | * them to complete. ( Because we would wait forever here. ) | ||
| 697 | * | ||
| 698 | * In case we had IOs and they are already complete, there | ||
| 699 | * is not point in waiting anyways. | ||
| 700 | * Therefore this if () ... */ | ||
| 701 | if (atomic_read(&wc.count)) | ||
| 702 | wait_for_completion(&wc.io_done); | ||
| 703 | |||
| 704 | put_ldev(mdev); | ||
| 705 | |||
| 706 | kfree(bios); | ||
| 707 | return; | ||
| 708 | |||
| 709 | free_bios_submit_one_by_one: | ||
| 710 | /* free everything by calling the endio callback directly. */ | ||
| 711 | for (i = 0; i < nr_elements && bios[i]; i++) | ||
| 712 | bio_endio(bios[i], 0); | ||
| 713 | |||
| 714 | kfree(bios); | ||
| 715 | |||
| 716 | submit_one_by_one: | ||
| 717 | dev_warn(DEV, "Using the slow drbd_al_to_on_disk_bm()\n"); | ||
| 718 | |||
| 719 | for (i = 0; i < mdev->act_log->nr_elements; i++) { | ||
| 720 | enr = lc_element_by_index(mdev->act_log, i)->lc_number; | ||
| 721 | if (enr == LC_FREE) | ||
| 722 | continue; | ||
| 723 | /* Really slow: if we have al-extents 16..19 active, | ||
| 724 | * sector 4 will be written four times! Synchronous! */ | ||
| 725 | drbd_bm_write_sect(mdev, enr/AL_EXT_PER_BM_SECT); | ||
| 726 | } | ||
| 727 | |||
| 728 | lc_unlock(mdev->act_log); | ||
| 729 | wake_up(&mdev->al_wait); | ||
| 730 | put_ldev(mdev); | ||
| 731 | } | ||
| 732 | |||
| 733 | /** | 545 | /** |
| 734 | * drbd_al_apply_to_bm() - Sets the bitmap to diry(1) where covered ba active AL extents | 546 | * drbd_al_apply_to_bm() - Sets the bitmap to diry(1) where covered ba active AL extents |
| 735 | * @mdev: DRBD device. | 547 | * @mdev: DRBD device. |
| @@ -809,7 +621,7 @@ static int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused | |||
| 809 | return 1; | 621 | return 1; |
| 810 | } | 622 | } |
| 811 | 623 | ||
| 812 | drbd_bm_write_sect(mdev, udw->enr); | 624 | drbd_bm_write_page(mdev, rs_extent_to_bm_page(udw->enr)); |
| 813 | put_ldev(mdev); | 625 | put_ldev(mdev); |
| 814 | 626 | ||
| 815 | kfree(udw); | 627 | kfree(udw); |
| @@ -889,7 +701,6 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, | |||
| 889 | dev_warn(DEV, "Kicking resync_lru element enr=%u " | 701 | dev_warn(DEV, "Kicking resync_lru element enr=%u " |
| 890 | "out with rs_failed=%d\n", | 702 | "out with rs_failed=%d\n", |
| 891 | ext->lce.lc_number, ext->rs_failed); | 703 | ext->lce.lc_number, ext->rs_failed); |
| 892 | set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags); | ||
| 893 | } | 704 | } |
| 894 | ext->rs_left = rs_left; | 705 | ext->rs_left = rs_left; |
| 895 | ext->rs_failed = success ? 0 : count; | 706 | ext->rs_failed = success ? 0 : count; |
| @@ -908,7 +719,6 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, | |||
| 908 | drbd_queue_work_front(&mdev->data.work, &udw->w); | 719 | drbd_queue_work_front(&mdev->data.work, &udw->w); |
| 909 | } else { | 720 | } else { |
| 910 | dev_warn(DEV, "Could not kmalloc an udw\n"); | 721 | dev_warn(DEV, "Could not kmalloc an udw\n"); |
| 911 | set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags); | ||
| 912 | } | 722 | } |
| 913 | } | 723 | } |
| 914 | } else { | 724 | } else { |
| @@ -919,6 +729,22 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, | |||
| 919 | } | 729 | } |
| 920 | } | 730 | } |
| 921 | 731 | ||
| 732 | void drbd_advance_rs_marks(struct drbd_conf *mdev, unsigned long still_to_go) | ||
| 733 | { | ||
| 734 | unsigned long now = jiffies; | ||
| 735 | unsigned long last = mdev->rs_mark_time[mdev->rs_last_mark]; | ||
| 736 | int next = (mdev->rs_last_mark + 1) % DRBD_SYNC_MARKS; | ||
| 737 | if (time_after_eq(now, last + DRBD_SYNC_MARK_STEP)) { | ||
| 738 | if (mdev->rs_mark_left[mdev->rs_last_mark] != still_to_go && | ||
| 739 | mdev->state.conn != C_PAUSED_SYNC_T && | ||
| 740 | mdev->state.conn != C_PAUSED_SYNC_S) { | ||
| 741 | mdev->rs_mark_time[next] = now; | ||
| 742 | mdev->rs_mark_left[next] = still_to_go; | ||
| 743 | mdev->rs_last_mark = next; | ||
| 744 | } | ||
| 745 | } | ||
| 746 | } | ||
| 747 | |||
| 922 | /* clear the bit corresponding to the piece of storage in question: | 748 | /* clear the bit corresponding to the piece of storage in question: |
| 923 | * size byte of data starting from sector. Only clear a bits of the affected | 749 | * size byte of data starting from sector. Only clear a bits of the affected |
| 924 | * one ore more _aligned_ BM_BLOCK_SIZE blocks. | 750 | * one ore more _aligned_ BM_BLOCK_SIZE blocks. |
| @@ -936,7 +762,7 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, | |||
| 936 | int wake_up = 0; | 762 | int wake_up = 0; |
| 937 | unsigned long flags; | 763 | unsigned long flags; |
| 938 | 764 | ||
| 939 | if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { | 765 | if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) { |
| 940 | dev_err(DEV, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n", | 766 | dev_err(DEV, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n", |
| 941 | (unsigned long long)sector, size); | 767 | (unsigned long long)sector, size); |
| 942 | return; | 768 | return; |
| @@ -969,21 +795,9 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, | |||
| 969 | */ | 795 | */ |
| 970 | count = drbd_bm_clear_bits(mdev, sbnr, ebnr); | 796 | count = drbd_bm_clear_bits(mdev, sbnr, ebnr); |
| 971 | if (count && get_ldev(mdev)) { | 797 | if (count && get_ldev(mdev)) { |
| 972 | unsigned long now = jiffies; | 798 | drbd_advance_rs_marks(mdev, drbd_bm_total_weight(mdev)); |
| 973 | unsigned long last = mdev->rs_mark_time[mdev->rs_last_mark]; | ||
| 974 | int next = (mdev->rs_last_mark + 1) % DRBD_SYNC_MARKS; | ||
| 975 | if (time_after_eq(now, last + DRBD_SYNC_MARK_STEP)) { | ||
| 976 | unsigned long tw = drbd_bm_total_weight(mdev); | ||
| 977 | if (mdev->rs_mark_left[mdev->rs_last_mark] != tw && | ||
| 978 | mdev->state.conn != C_PAUSED_SYNC_T && | ||
| 979 | mdev->state.conn != C_PAUSED_SYNC_S) { | ||
| 980 | mdev->rs_mark_time[next] = now; | ||
| 981 | mdev->rs_mark_left[next] = tw; | ||
| 982 | mdev->rs_last_mark = next; | ||
| 983 | } | ||
| 984 | } | ||
| 985 | spin_lock_irqsave(&mdev->al_lock, flags); | 799 | spin_lock_irqsave(&mdev->al_lock, flags); |
| 986 | drbd_try_clear_on_disk_bm(mdev, sector, count, TRUE); | 800 | drbd_try_clear_on_disk_bm(mdev, sector, count, true); |
| 987 | spin_unlock_irqrestore(&mdev->al_lock, flags); | 801 | spin_unlock_irqrestore(&mdev->al_lock, flags); |
| 988 | 802 | ||
| 989 | /* just wake_up unconditional now, various lc_chaged(), | 803 | /* just wake_up unconditional now, various lc_chaged(), |
| @@ -998,27 +812,27 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, | |||
| 998 | /* | 812 | /* |
| 999 | * this is intended to set one request worth of data out of sync. | 813 | * this is intended to set one request worth of data out of sync. |
| 1000 | * affects at least 1 bit, | 814 | * affects at least 1 bit, |
| 1001 | * and at most 1+DRBD_MAX_SEGMENT_SIZE/BM_BLOCK_SIZE bits. | 815 | * and at most 1+DRBD_MAX_BIO_SIZE/BM_BLOCK_SIZE bits. |
| 1002 | * | 816 | * |
| 1003 | * called by tl_clear and drbd_send_dblock (==drbd_make_request). | 817 | * called by tl_clear and drbd_send_dblock (==drbd_make_request). |
| 1004 | * so this can be _any_ process. | 818 | * so this can be _any_ process. |
| 1005 | */ | 819 | */ |
| 1006 | void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, | 820 | int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, |
| 1007 | const char *file, const unsigned int line) | 821 | const char *file, const unsigned int line) |
| 1008 | { | 822 | { |
| 1009 | unsigned long sbnr, ebnr, lbnr, flags; | 823 | unsigned long sbnr, ebnr, lbnr, flags; |
| 1010 | sector_t esector, nr_sectors; | 824 | sector_t esector, nr_sectors; |
| 1011 | unsigned int enr, count; | 825 | unsigned int enr, count = 0; |
| 1012 | struct lc_element *e; | 826 | struct lc_element *e; |
| 1013 | 827 | ||
| 1014 | if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { | 828 | if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) { |
| 1015 | dev_err(DEV, "sector: %llus, size: %d\n", | 829 | dev_err(DEV, "sector: %llus, size: %d\n", |
| 1016 | (unsigned long long)sector, size); | 830 | (unsigned long long)sector, size); |
| 1017 | return; | 831 | return 0; |
| 1018 | } | 832 | } |
| 1019 | 833 | ||
| 1020 | if (!get_ldev(mdev)) | 834 | if (!get_ldev(mdev)) |
| 1021 | return; /* no disk, no metadata, no bitmap to set bits in */ | 835 | return 0; /* no disk, no metadata, no bitmap to set bits in */ |
| 1022 | 836 | ||
| 1023 | nr_sectors = drbd_get_capacity(mdev->this_bdev); | 837 | nr_sectors = drbd_get_capacity(mdev->this_bdev); |
| 1024 | esector = sector + (size >> 9) - 1; | 838 | esector = sector + (size >> 9) - 1; |
| @@ -1048,6 +862,8 @@ void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, | |||
| 1048 | 862 | ||
| 1049 | out: | 863 | out: |
| 1050 | put_ldev(mdev); | 864 | put_ldev(mdev); |
| 865 | |||
| 866 | return count; | ||
| 1051 | } | 867 | } |
| 1052 | 868 | ||
| 1053 | static | 869 | static |
| @@ -1128,7 +944,10 @@ int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector) | |||
| 1128 | unsigned int enr = BM_SECT_TO_EXT(sector); | 944 | unsigned int enr = BM_SECT_TO_EXT(sector); |
| 1129 | struct bm_extent *bm_ext; | 945 | struct bm_extent *bm_ext; |
| 1130 | int i, sig; | 946 | int i, sig; |
| 947 | int sa = 200; /* Step aside 200 times, then grab the extent and let app-IO wait. | ||
| 948 | 200 times -> 20 seconds. */ | ||
| 1131 | 949 | ||
| 950 | retry: | ||
| 1132 | sig = wait_event_interruptible(mdev->al_wait, | 951 | sig = wait_event_interruptible(mdev->al_wait, |
| 1133 | (bm_ext = _bme_get(mdev, enr))); | 952 | (bm_ext = _bme_get(mdev, enr))); |
| 1134 | if (sig) | 953 | if (sig) |
| @@ -1139,16 +958,25 @@ int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector) | |||
| 1139 | 958 | ||
| 1140 | for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { | 959 | for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { |
| 1141 | sig = wait_event_interruptible(mdev->al_wait, | 960 | sig = wait_event_interruptible(mdev->al_wait, |
| 1142 | !_is_in_al(mdev, enr * AL_EXT_PER_BM_SECT + i)); | 961 | !_is_in_al(mdev, enr * AL_EXT_PER_BM_SECT + i) || |
| 1143 | if (sig) { | 962 | test_bit(BME_PRIORITY, &bm_ext->flags)); |
| 963 | |||
| 964 | if (sig || (test_bit(BME_PRIORITY, &bm_ext->flags) && sa)) { | ||
| 1144 | spin_lock_irq(&mdev->al_lock); | 965 | spin_lock_irq(&mdev->al_lock); |
| 1145 | if (lc_put(mdev->resync, &bm_ext->lce) == 0) { | 966 | if (lc_put(mdev->resync, &bm_ext->lce) == 0) { |
| 1146 | clear_bit(BME_NO_WRITES, &bm_ext->flags); | 967 | bm_ext->flags = 0; /* clears BME_NO_WRITES and eventually BME_PRIORITY */ |
| 1147 | mdev->resync_locked--; | 968 | mdev->resync_locked--; |
| 1148 | wake_up(&mdev->al_wait); | 969 | wake_up(&mdev->al_wait); |
| 1149 | } | 970 | } |
| 1150 | spin_unlock_irq(&mdev->al_lock); | 971 | spin_unlock_irq(&mdev->al_lock); |
| 1151 | return -EINTR; | 972 | if (sig) |
| 973 | return -EINTR; | ||
| 974 | if (schedule_timeout_interruptible(HZ/10)) | ||
| 975 | return -EINTR; | ||
| 976 | if (sa && --sa == 0) | ||
| 977 | dev_warn(DEV,"drbd_rs_begin_io() stepped aside for 20sec." | ||
| 978 | "Resync stalled?\n"); | ||
| 979 | goto retry; | ||
| 1152 | } | 980 | } |
| 1153 | } | 981 | } |
| 1154 | set_bit(BME_LOCKED, &bm_ext->flags); | 982 | set_bit(BME_LOCKED, &bm_ext->flags); |
| @@ -1291,8 +1119,7 @@ void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector) | |||
| 1291 | } | 1119 | } |
| 1292 | 1120 | ||
| 1293 | if (lc_put(mdev->resync, &bm_ext->lce) == 0) { | 1121 | if (lc_put(mdev->resync, &bm_ext->lce) == 0) { |
| 1294 | clear_bit(BME_LOCKED, &bm_ext->flags); | 1122 | bm_ext->flags = 0; /* clear BME_LOCKED, BME_NO_WRITES and BME_PRIORITY */ |
| 1295 | clear_bit(BME_NO_WRITES, &bm_ext->flags); | ||
| 1296 | mdev->resync_locked--; | 1123 | mdev->resync_locked--; |
| 1297 | wake_up(&mdev->al_wait); | 1124 | wake_up(&mdev->al_wait); |
| 1298 | } | 1125 | } |
| @@ -1383,7 +1210,7 @@ void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size) | |||
| 1383 | sector_t esector, nr_sectors; | 1210 | sector_t esector, nr_sectors; |
| 1384 | int wake_up = 0; | 1211 | int wake_up = 0; |
| 1385 | 1212 | ||
| 1386 | if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { | 1213 | if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) { |
| 1387 | dev_err(DEV, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n", | 1214 | dev_err(DEV, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n", |
| 1388 | (unsigned long long)sector, size); | 1215 | (unsigned long long)sector, size); |
| 1389 | return; | 1216 | return; |
| @@ -1420,7 +1247,7 @@ void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size) | |||
| 1420 | mdev->rs_failed += count; | 1247 | mdev->rs_failed += count; |
| 1421 | 1248 | ||
| 1422 | if (get_ldev(mdev)) { | 1249 | if (get_ldev(mdev)) { |
| 1423 | drbd_try_clear_on_disk_bm(mdev, sector, count, FALSE); | 1250 | drbd_try_clear_on_disk_bm(mdev, sector, count, false); |
| 1424 | put_ldev(mdev); | 1251 | put_ldev(mdev); |
| 1425 | } | 1252 | } |
| 1426 | 1253 | ||
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 0645ca829a94..f0ae63d2df65 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c | |||
| @@ -28,18 +28,58 @@ | |||
| 28 | #include <linux/drbd.h> | 28 | #include <linux/drbd.h> |
| 29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
| 30 | #include <asm/kmap_types.h> | 30 | #include <asm/kmap_types.h> |
| 31 | |||
| 32 | #include <asm-generic/bitops/le.h> | ||
| 33 | |||
| 31 | #include "drbd_int.h" | 34 | #include "drbd_int.h" |
| 32 | 35 | ||
| 36 | |||
| 33 | /* OPAQUE outside this file! | 37 | /* OPAQUE outside this file! |
| 34 | * interface defined in drbd_int.h | 38 | * interface defined in drbd_int.h |
| 35 | 39 | ||
| 36 | * convention: | 40 | * convention: |
| 37 | * function name drbd_bm_... => used elsewhere, "public". | 41 | * function name drbd_bm_... => used elsewhere, "public". |
| 38 | * function name bm_... => internal to implementation, "private". | 42 | * function name bm_... => internal to implementation, "private". |
| 43 | */ | ||
| 44 | |||
| 45 | |||
| 46 | /* | ||
| 47 | * LIMITATIONS: | ||
| 48 | * We want to support >= peta byte of backend storage, while for now still using | ||
| 49 | * a granularity of one bit per 4KiB of storage. | ||
| 50 | * 1 << 50 bytes backend storage (1 PiB) | ||
| 51 | * 1 << (50 - 12) bits needed | ||
| 52 | * 38 --> we need u64 to index and count bits | ||
| 53 | * 1 << (38 - 3) bitmap bytes needed | ||
| 54 | * 35 --> we still need u64 to index and count bytes | ||
| 55 | * (that's 32 GiB of bitmap for 1 PiB storage) | ||
| 56 | * 1 << (35 - 2) 32bit longs needed | ||
| 57 | * 33 --> we'd even need u64 to index and count 32bit long words. | ||
| 58 | * 1 << (35 - 3) 64bit longs needed | ||
| 59 | * 32 --> we could get away with a 32bit unsigned int to index and count | ||
| 60 | * 64bit long words, but I rather stay with unsigned long for now. | ||
| 61 | * We probably should neither count nor point to bytes or long words | ||
| 62 | * directly, but either by bitnumber, or by page index and offset. | ||
| 63 | * 1 << (35 - 12) | ||
| 64 | * 22 --> we need that much 4KiB pages of bitmap. | ||
| 65 | * 1 << (22 + 3) --> on a 64bit arch, | ||
| 66 | * we need 32 MiB to store the array of page pointers. | ||
| 67 | * | ||
| 68 | * Because I'm lazy, and because the resulting patch was too large, too ugly | ||
| 69 | * and still incomplete, on 32bit we still "only" support 16 TiB (minus some), | ||
| 70 | * (1 << 32) bits * 4k storage. | ||
| 71 | * | ||
| 39 | 72 | ||
| 40 | * Note that since find_first_bit returns int, at the current granularity of | 73 | * bitmap storage and IO: |
| 41 | * the bitmap (4KB per byte), this implementation "only" supports up to | 74 | * Bitmap is stored little endian on disk, and is kept little endian in |
| 42 | * 1<<(32+12) == 16 TB... | 75 | * core memory. Currently we still hold the full bitmap in core as long |
| 76 | * as we are "attached" to a local disk, which at 32 GiB for 1PiB storage | ||
| 77 | * seems excessive. | ||
| 78 | * | ||
| 79 | * We plan to reduce the amount of in-core bitmap pages by pageing them in | ||
| 80 | * and out against their on-disk location as necessary, but need to make | ||
| 81 | * sure we don't cause too much meta data IO, and must not deadlock in | ||
| 82 | * tight memory situations. This needs some more work. | ||
| 43 | */ | 83 | */ |
| 44 | 84 | ||
| 45 | /* | 85 | /* |
| @@ -55,13 +95,9 @@ | |||
| 55 | struct drbd_bitmap { | 95 | struct drbd_bitmap { |
| 56 | struct page **bm_pages; | 96 | struct page **bm_pages; |
| 57 | spinlock_t bm_lock; | 97 | spinlock_t bm_lock; |
| 58 | /* WARNING unsigned long bm_*: | 98 | |
| 59 | * 32bit number of bit offset is just enough for 512 MB bitmap. | 99 | /* see LIMITATIONS: above */ |
| 60 | * it will blow up if we make the bitmap bigger... | 100 | |
| 61 | * not that it makes much sense to have a bitmap that large, | ||
| 62 | * rather change the granularity to 16k or 64k or something. | ||
| 63 | * (that implies other problems, however...) | ||
| 64 | */ | ||
| 65 | unsigned long bm_set; /* nr of set bits; THINK maybe atomic_t? */ | 101 | unsigned long bm_set; /* nr of set bits; THINK maybe atomic_t? */ |
| 66 | unsigned long bm_bits; | 102 | unsigned long bm_bits; |
| 67 | size_t bm_words; | 103 | size_t bm_words; |
| @@ -69,29 +105,18 @@ struct drbd_bitmap { | |||
| 69 | sector_t bm_dev_capacity; | 105 | sector_t bm_dev_capacity; |
| 70 | struct mutex bm_change; /* serializes resize operations */ | 106 | struct mutex bm_change; /* serializes resize operations */ |
| 71 | 107 | ||
| 72 | atomic_t bm_async_io; | 108 | wait_queue_head_t bm_io_wait; /* used to serialize IO of single pages */ |
| 73 | wait_queue_head_t bm_io_wait; | ||
| 74 | 109 | ||
| 75 | unsigned long bm_flags; | 110 | enum bm_flag bm_flags; |
| 76 | 111 | ||
| 77 | /* debugging aid, in case we are still racy somewhere */ | 112 | /* debugging aid, in case we are still racy somewhere */ |
| 78 | char *bm_why; | 113 | char *bm_why; |
| 79 | struct task_struct *bm_task; | 114 | struct task_struct *bm_task; |
| 80 | }; | 115 | }; |
| 81 | 116 | ||
| 82 | /* definition of bits in bm_flags */ | ||
| 83 | #define BM_LOCKED 0 | ||
| 84 | #define BM_MD_IO_ERROR 1 | ||
| 85 | #define BM_P_VMALLOCED 2 | ||
| 86 | |||
| 87 | static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, | 117 | static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, |
| 88 | unsigned long e, int val, const enum km_type km); | 118 | unsigned long e, int val, const enum km_type km); |
| 89 | 119 | ||
| 90 | static int bm_is_locked(struct drbd_bitmap *b) | ||
| 91 | { | ||
| 92 | return test_bit(BM_LOCKED, &b->bm_flags); | ||
| 93 | } | ||
| 94 | |||
| 95 | #define bm_print_lock_info(m) __bm_print_lock_info(m, __func__) | 120 | #define bm_print_lock_info(m) __bm_print_lock_info(m, __func__) |
| 96 | static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func) | 121 | static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func) |
| 97 | { | 122 | { |
| @@ -108,7 +133,7 @@ static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func) | |||
| 108 | b->bm_task == mdev->worker.task ? "worker" : "?"); | 133 | b->bm_task == mdev->worker.task ? "worker" : "?"); |
| 109 | } | 134 | } |
| 110 | 135 | ||
| 111 | void drbd_bm_lock(struct drbd_conf *mdev, char *why) | 136 | void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags) |
| 112 | { | 137 | { |
| 113 | struct drbd_bitmap *b = mdev->bitmap; | 138 | struct drbd_bitmap *b = mdev->bitmap; |
| 114 | int trylock_failed; | 139 | int trylock_failed; |
| @@ -131,8 +156,9 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why) | |||
| 131 | b->bm_task == mdev->worker.task ? "worker" : "?"); | 156 | b->bm_task == mdev->worker.task ? "worker" : "?"); |
| 132 | mutex_lock(&b->bm_change); | 157 | mutex_lock(&b->bm_change); |
| 133 | } | 158 | } |
| 134 | if (__test_and_set_bit(BM_LOCKED, &b->bm_flags)) | 159 | if (BM_LOCKED_MASK & b->bm_flags) |
| 135 | dev_err(DEV, "FIXME bitmap already locked in bm_lock\n"); | 160 | dev_err(DEV, "FIXME bitmap already locked in bm_lock\n"); |
| 161 | b->bm_flags |= flags & BM_LOCKED_MASK; | ||
| 136 | 162 | ||
| 137 | b->bm_why = why; | 163 | b->bm_why = why; |
| 138 | b->bm_task = current; | 164 | b->bm_task = current; |
| @@ -146,31 +172,137 @@ void drbd_bm_unlock(struct drbd_conf *mdev) | |||
| 146 | return; | 172 | return; |
| 147 | } | 173 | } |
| 148 | 174 | ||
| 149 | if (!__test_and_clear_bit(BM_LOCKED, &mdev->bitmap->bm_flags)) | 175 | if (!(BM_LOCKED_MASK & mdev->bitmap->bm_flags)) |
| 150 | dev_err(DEV, "FIXME bitmap not locked in bm_unlock\n"); | 176 | dev_err(DEV, "FIXME bitmap not locked in bm_unlock\n"); |
| 151 | 177 | ||
| 178 | b->bm_flags &= ~BM_LOCKED_MASK; | ||
| 152 | b->bm_why = NULL; | 179 | b->bm_why = NULL; |
| 153 | b->bm_task = NULL; | 180 | b->bm_task = NULL; |
| 154 | mutex_unlock(&b->bm_change); | 181 | mutex_unlock(&b->bm_change); |
| 155 | } | 182 | } |
| 156 | 183 | ||
| 157 | /* word offset to long pointer */ | 184 | /* we store some "meta" info about our pages in page->private */ |
| 158 | static unsigned long *__bm_map_paddr(struct drbd_bitmap *b, unsigned long offset, const enum km_type km) | 185 | /* at a granularity of 4k storage per bitmap bit: |
| 186 | * one peta byte storage: 1<<50 byte, 1<<38 * 4k storage blocks | ||
| 187 | * 1<<38 bits, | ||
| 188 | * 1<<23 4k bitmap pages. | ||
| 189 | * Use 24 bits as page index, covers 2 peta byte storage | ||
| 190 | * at a granularity of 4k per bit. | ||
| 191 | * Used to report the failed page idx on io error from the endio handlers. | ||
| 192 | */ | ||
| 193 | #define BM_PAGE_IDX_MASK ((1UL<<24)-1) | ||
| 194 | /* this page is currently read in, or written back */ | ||
| 195 | #define BM_PAGE_IO_LOCK 31 | ||
| 196 | /* if there has been an IO error for this page */ | ||
| 197 | #define BM_PAGE_IO_ERROR 30 | ||
| 198 | /* this is to be able to intelligently skip disk IO, | ||
| 199 | * set if bits have been set since last IO. */ | ||
| 200 | #define BM_PAGE_NEED_WRITEOUT 29 | ||
| 201 | /* to mark for lazy writeout once syncer cleared all clearable bits, | ||
| 202 | * we if bits have been cleared since last IO. */ | ||
| 203 | #define BM_PAGE_LAZY_WRITEOUT 28 | ||
| 204 | |||
| 205 | /* store_page_idx uses non-atomic assingment. It is only used directly after | ||
| 206 | * allocating the page. All other bm_set_page_* and bm_clear_page_* need to | ||
| 207 | * use atomic bit manipulation, as set_out_of_sync (and therefore bitmap | ||
| 208 | * changes) may happen from various contexts, and wait_on_bit/wake_up_bit | ||
| 209 | * requires it all to be atomic as well. */ | ||
| 210 | static void bm_store_page_idx(struct page *page, unsigned long idx) | ||
| 159 | { | 211 | { |
| 160 | struct page *page; | 212 | BUG_ON(0 != (idx & ~BM_PAGE_IDX_MASK)); |
| 161 | unsigned long page_nr; | 213 | page_private(page) |= idx; |
| 214 | } | ||
| 215 | |||
| 216 | static unsigned long bm_page_to_idx(struct page *page) | ||
| 217 | { | ||
| 218 | return page_private(page) & BM_PAGE_IDX_MASK; | ||
| 219 | } | ||
| 220 | |||
| 221 | /* As is very unlikely that the same page is under IO from more than one | ||
| 222 | * context, we can get away with a bit per page and one wait queue per bitmap. | ||
| 223 | */ | ||
| 224 | static void bm_page_lock_io(struct drbd_conf *mdev, int page_nr) | ||
| 225 | { | ||
| 226 | struct drbd_bitmap *b = mdev->bitmap; | ||
| 227 | void *addr = &page_private(b->bm_pages[page_nr]); | ||
| 228 | wait_event(b->bm_io_wait, !test_and_set_bit(BM_PAGE_IO_LOCK, addr)); | ||
| 229 | } | ||
| 230 | |||
| 231 | static void bm_page_unlock_io(struct drbd_conf *mdev, int page_nr) | ||
| 232 | { | ||
| 233 | struct drbd_bitmap *b = mdev->bitmap; | ||
| 234 | void *addr = &page_private(b->bm_pages[page_nr]); | ||
| 235 | clear_bit(BM_PAGE_IO_LOCK, addr); | ||
| 236 | smp_mb__after_clear_bit(); | ||
| 237 | wake_up(&mdev->bitmap->bm_io_wait); | ||
| 238 | } | ||
| 239 | |||
| 240 | /* set _before_ submit_io, so it may be reset due to being changed | ||
| 241 | * while this page is in flight... will get submitted later again */ | ||
| 242 | static void bm_set_page_unchanged(struct page *page) | ||
| 243 | { | ||
| 244 | /* use cmpxchg? */ | ||
| 245 | clear_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page)); | ||
| 246 | clear_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page)); | ||
| 247 | } | ||
| 162 | 248 | ||
| 249 | static void bm_set_page_need_writeout(struct page *page) | ||
| 250 | { | ||
| 251 | set_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page)); | ||
| 252 | } | ||
| 253 | |||
| 254 | static int bm_test_page_unchanged(struct page *page) | ||
| 255 | { | ||
| 256 | volatile const unsigned long *addr = &page_private(page); | ||
| 257 | return (*addr & ((1UL<<BM_PAGE_NEED_WRITEOUT)|(1UL<<BM_PAGE_LAZY_WRITEOUT))) == 0; | ||
| 258 | } | ||
| 259 | |||
| 260 | static void bm_set_page_io_err(struct page *page) | ||
| 261 | { | ||
| 262 | set_bit(BM_PAGE_IO_ERROR, &page_private(page)); | ||
| 263 | } | ||
| 264 | |||
| 265 | static void bm_clear_page_io_err(struct page *page) | ||
| 266 | { | ||
| 267 | clear_bit(BM_PAGE_IO_ERROR, &page_private(page)); | ||
| 268 | } | ||
| 269 | |||
| 270 | static void bm_set_page_lazy_writeout(struct page *page) | ||
| 271 | { | ||
| 272 | set_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page)); | ||
| 273 | } | ||
| 274 | |||
| 275 | static int bm_test_page_lazy_writeout(struct page *page) | ||
| 276 | { | ||
| 277 | return test_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page)); | ||
| 278 | } | ||
| 279 | |||
| 280 | /* on a 32bit box, this would allow for exactly (2<<38) bits. */ | ||
| 281 | static unsigned int bm_word_to_page_idx(struct drbd_bitmap *b, unsigned long long_nr) | ||
| 282 | { | ||
| 163 | /* page_nr = (word*sizeof(long)) >> PAGE_SHIFT; */ | 283 | /* page_nr = (word*sizeof(long)) >> PAGE_SHIFT; */ |
| 164 | page_nr = offset >> (PAGE_SHIFT - LN2_BPL + 3); | 284 | unsigned int page_nr = long_nr >> (PAGE_SHIFT - LN2_BPL + 3); |
| 165 | BUG_ON(page_nr >= b->bm_number_of_pages); | 285 | BUG_ON(page_nr >= b->bm_number_of_pages); |
| 166 | page = b->bm_pages[page_nr]; | 286 | return page_nr; |
| 287 | } | ||
| 167 | 288 | ||
| 289 | static unsigned int bm_bit_to_page_idx(struct drbd_bitmap *b, u64 bitnr) | ||
| 290 | { | ||
| 291 | /* page_nr = (bitnr/8) >> PAGE_SHIFT; */ | ||
| 292 | unsigned int page_nr = bitnr >> (PAGE_SHIFT + 3); | ||
| 293 | BUG_ON(page_nr >= b->bm_number_of_pages); | ||
| 294 | return page_nr; | ||
| 295 | } | ||
| 296 | |||
| 297 | static unsigned long *__bm_map_pidx(struct drbd_bitmap *b, unsigned int idx, const enum km_type km) | ||
| 298 | { | ||
| 299 | struct page *page = b->bm_pages[idx]; | ||
| 168 | return (unsigned long *) kmap_atomic(page, km); | 300 | return (unsigned long *) kmap_atomic(page, km); |
| 169 | } | 301 | } |
| 170 | 302 | ||
| 171 | static unsigned long * bm_map_paddr(struct drbd_bitmap *b, unsigned long offset) | 303 | static unsigned long *bm_map_pidx(struct drbd_bitmap *b, unsigned int idx) |
| 172 | { | 304 | { |
| 173 | return __bm_map_paddr(b, offset, KM_IRQ1); | 305 | return __bm_map_pidx(b, idx, KM_IRQ1); |
| 174 | } | 306 | } |
| 175 | 307 | ||
| 176 | static void __bm_unmap(unsigned long *p_addr, const enum km_type km) | 308 | static void __bm_unmap(unsigned long *p_addr, const enum km_type km) |
| @@ -202,6 +334,7 @@ static void bm_unmap(unsigned long *p_addr) | |||
| 202 | * to be able to report device specific. | 334 | * to be able to report device specific. |
| 203 | */ | 335 | */ |
| 204 | 336 | ||
| 337 | |||
| 205 | static void bm_free_pages(struct page **pages, unsigned long number) | 338 | static void bm_free_pages(struct page **pages, unsigned long number) |
| 206 | { | 339 | { |
| 207 | unsigned long i; | 340 | unsigned long i; |
| @@ -269,6 +402,9 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) | |||
| 269 | bm_vk_free(new_pages, vmalloced); | 402 | bm_vk_free(new_pages, vmalloced); |
| 270 | return NULL; | 403 | return NULL; |
| 271 | } | 404 | } |
| 405 | /* we want to know which page it is | ||
| 406 | * from the endio handlers */ | ||
| 407 | bm_store_page_idx(page, i); | ||
| 272 | new_pages[i] = page; | 408 | new_pages[i] = page; |
| 273 | } | 409 | } |
| 274 | } else { | 410 | } else { |
| @@ -280,9 +416,9 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) | |||
| 280 | } | 416 | } |
| 281 | 417 | ||
| 282 | if (vmalloced) | 418 | if (vmalloced) |
| 283 | set_bit(BM_P_VMALLOCED, &b->bm_flags); | 419 | b->bm_flags |= BM_P_VMALLOCED; |
| 284 | else | 420 | else |
| 285 | clear_bit(BM_P_VMALLOCED, &b->bm_flags); | 421 | b->bm_flags &= ~BM_P_VMALLOCED; |
| 286 | 422 | ||
| 287 | return new_pages; | 423 | return new_pages; |
| 288 | } | 424 | } |
| @@ -319,7 +455,7 @@ void drbd_bm_cleanup(struct drbd_conf *mdev) | |||
| 319 | { | 455 | { |
| 320 | ERR_IF (!mdev->bitmap) return; | 456 | ERR_IF (!mdev->bitmap) return; |
| 321 | bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages); | 457 | bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages); |
| 322 | bm_vk_free(mdev->bitmap->bm_pages, test_bit(BM_P_VMALLOCED, &mdev->bitmap->bm_flags)); | 458 | bm_vk_free(mdev->bitmap->bm_pages, (BM_P_VMALLOCED & mdev->bitmap->bm_flags)); |
| 323 | kfree(mdev->bitmap); | 459 | kfree(mdev->bitmap); |
| 324 | mdev->bitmap = NULL; | 460 | mdev->bitmap = NULL; |
| 325 | } | 461 | } |
| @@ -329,22 +465,39 @@ void drbd_bm_cleanup(struct drbd_conf *mdev) | |||
| 329 | * this masks out the remaining bits. | 465 | * this masks out the remaining bits. |
| 330 | * Returns the number of bits cleared. | 466 | * Returns the number of bits cleared. |
| 331 | */ | 467 | */ |
| 468 | #define BITS_PER_PAGE (1UL << (PAGE_SHIFT + 3)) | ||
| 469 | #define BITS_PER_PAGE_MASK (BITS_PER_PAGE - 1) | ||
| 470 | #define BITS_PER_LONG_MASK (BITS_PER_LONG - 1) | ||
| 332 | static int bm_clear_surplus(struct drbd_bitmap *b) | 471 | static int bm_clear_surplus(struct drbd_bitmap *b) |
| 333 | { | 472 | { |
| 334 | const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) - 1; | 473 | unsigned long mask; |
| 335 | size_t w = b->bm_bits >> LN2_BPL; | ||
| 336 | int cleared = 0; | ||
| 337 | unsigned long *p_addr, *bm; | 474 | unsigned long *p_addr, *bm; |
| 475 | int tmp; | ||
| 476 | int cleared = 0; | ||
| 338 | 477 | ||
| 339 | p_addr = bm_map_paddr(b, w); | 478 | /* number of bits modulo bits per page */ |
| 340 | bm = p_addr + MLPP(w); | 479 | tmp = (b->bm_bits & BITS_PER_PAGE_MASK); |
| 341 | if (w < b->bm_words) { | 480 | /* mask the used bits of the word containing the last bit */ |
| 481 | mask = (1UL << (tmp & BITS_PER_LONG_MASK)) -1; | ||
| 482 | /* bitmap is always stored little endian, | ||
| 483 | * on disk and in core memory alike */ | ||
| 484 | mask = cpu_to_lel(mask); | ||
| 485 | |||
| 486 | p_addr = bm_map_pidx(b, b->bm_number_of_pages - 1); | ||
| 487 | bm = p_addr + (tmp/BITS_PER_LONG); | ||
| 488 | if (mask) { | ||
| 489 | /* If mask != 0, we are not exactly aligned, so bm now points | ||
| 490 | * to the long containing the last bit. | ||
| 491 | * If mask == 0, bm already points to the word immediately | ||
| 492 | * after the last (long word aligned) bit. */ | ||
| 342 | cleared = hweight_long(*bm & ~mask); | 493 | cleared = hweight_long(*bm & ~mask); |
| 343 | *bm &= mask; | 494 | *bm &= mask; |
| 344 | w++; bm++; | 495 | bm++; |
| 345 | } | 496 | } |
| 346 | 497 | ||
| 347 | if (w < b->bm_words) { | 498 | if (BITS_PER_LONG == 32 && ((bm - p_addr) & 1) == 1) { |
| 499 | /* on a 32bit arch, we may need to zero out | ||
| 500 | * a padding long to align with a 64bit remote */ | ||
| 348 | cleared += hweight_long(*bm); | 501 | cleared += hweight_long(*bm); |
| 349 | *bm = 0; | 502 | *bm = 0; |
| 350 | } | 503 | } |
| @@ -354,66 +507,75 @@ static int bm_clear_surplus(struct drbd_bitmap *b) | |||
| 354 | 507 | ||
| 355 | static void bm_set_surplus(struct drbd_bitmap *b) | 508 | static void bm_set_surplus(struct drbd_bitmap *b) |
| 356 | { | 509 | { |
| 357 | const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) - 1; | 510 | unsigned long mask; |
| 358 | size_t w = b->bm_bits >> LN2_BPL; | ||
| 359 | unsigned long *p_addr, *bm; | 511 | unsigned long *p_addr, *bm; |
| 360 | 512 | int tmp; | |
| 361 | p_addr = bm_map_paddr(b, w); | 513 | |
| 362 | bm = p_addr + MLPP(w); | 514 | /* number of bits modulo bits per page */ |
| 363 | if (w < b->bm_words) { | 515 | tmp = (b->bm_bits & BITS_PER_PAGE_MASK); |
| 516 | /* mask the used bits of the word containing the last bit */ | ||
| 517 | mask = (1UL << (tmp & BITS_PER_LONG_MASK)) -1; | ||
| 518 | /* bitmap is always stored little endian, | ||
| 519 | * on disk and in core memory alike */ | ||
| 520 | mask = cpu_to_lel(mask); | ||
| 521 | |||
| 522 | p_addr = bm_map_pidx(b, b->bm_number_of_pages - 1); | ||
| 523 | bm = p_addr + (tmp/BITS_PER_LONG); | ||
| 524 | if (mask) { | ||
| 525 | /* If mask != 0, we are not exactly aligned, so bm now points | ||
| 526 | * to the long containing the last bit. | ||
| 527 | * If mask == 0, bm already points to the word immediately | ||
| 528 | * after the last (long word aligned) bit. */ | ||
| 364 | *bm |= ~mask; | 529 | *bm |= ~mask; |
| 365 | bm++; w++; | 530 | bm++; |
| 366 | } | 531 | } |
| 367 | 532 | ||
| 368 | if (w < b->bm_words) { | 533 | if (BITS_PER_LONG == 32 && ((bm - p_addr) & 1) == 1) { |
| 369 | *bm = ~(0UL); | 534 | /* on a 32bit arch, we may need to zero out |
| 535 | * a padding long to align with a 64bit remote */ | ||
| 536 | *bm = ~0UL; | ||
| 370 | } | 537 | } |
| 371 | bm_unmap(p_addr); | 538 | bm_unmap(p_addr); |
| 372 | } | 539 | } |
| 373 | 540 | ||
| 374 | static unsigned long __bm_count_bits(struct drbd_bitmap *b, const int swap_endian) | 541 | /* you better not modify the bitmap while this is running, |
| 542 | * or its results will be stale */ | ||
| 543 | static unsigned long bm_count_bits(struct drbd_bitmap *b) | ||
| 375 | { | 544 | { |
| 376 | unsigned long *p_addr, *bm, offset = 0; | 545 | unsigned long *p_addr; |
| 377 | unsigned long bits = 0; | 546 | unsigned long bits = 0; |
| 378 | unsigned long i, do_now; | 547 | unsigned long mask = (1UL << (b->bm_bits & BITS_PER_LONG_MASK)) -1; |
| 379 | 548 | int idx, i, last_word; | |
| 380 | while (offset < b->bm_words) { | 549 | |
| 381 | i = do_now = min_t(size_t, b->bm_words-offset, LWPP); | 550 | /* all but last page */ |
| 382 | p_addr = __bm_map_paddr(b, offset, KM_USER0); | 551 | for (idx = 0; idx < b->bm_number_of_pages - 1; idx++) { |
| 383 | bm = p_addr + MLPP(offset); | 552 | p_addr = __bm_map_pidx(b, idx, KM_USER0); |
| 384 | while (i--) { | 553 | for (i = 0; i < LWPP; i++) |
| 385 | #ifndef __LITTLE_ENDIAN | 554 | bits += hweight_long(p_addr[i]); |
| 386 | if (swap_endian) | ||
| 387 | *bm = lel_to_cpu(*bm); | ||
| 388 | #endif | ||
| 389 | bits += hweight_long(*bm++); | ||
| 390 | } | ||
| 391 | __bm_unmap(p_addr, KM_USER0); | 555 | __bm_unmap(p_addr, KM_USER0); |
| 392 | offset += do_now; | ||
| 393 | cond_resched(); | 556 | cond_resched(); |
| 394 | } | 557 | } |
| 395 | 558 | /* last (or only) page */ | |
| 559 | last_word = ((b->bm_bits - 1) & BITS_PER_PAGE_MASK) >> LN2_BPL; | ||
| 560 | p_addr = __bm_map_pidx(b, idx, KM_USER0); | ||
| 561 | for (i = 0; i < last_word; i++) | ||
| 562 | bits += hweight_long(p_addr[i]); | ||
| 563 | p_addr[last_word] &= cpu_to_lel(mask); | ||
| 564 | bits += hweight_long(p_addr[last_word]); | ||
| 565 | /* 32bit arch, may have an unused padding long */ | ||
| 566 | if (BITS_PER_LONG == 32 && (last_word & 1) == 0) | ||
| 567 | p_addr[last_word+1] = 0; | ||
| 568 | __bm_unmap(p_addr, KM_USER0); | ||
| 396 | return bits; | 569 | return bits; |
| 397 | } | 570 | } |
| 398 | 571 | ||
| 399 | static unsigned long bm_count_bits(struct drbd_bitmap *b) | ||
| 400 | { | ||
| 401 | return __bm_count_bits(b, 0); | ||
| 402 | } | ||
| 403 | |||
| 404 | static unsigned long bm_count_bits_swap_endian(struct drbd_bitmap *b) | ||
| 405 | { | ||
| 406 | return __bm_count_bits(b, 1); | ||
| 407 | } | ||
| 408 | |||
| 409 | /* offset and len in long words.*/ | 572 | /* offset and len in long words.*/ |
| 410 | static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len) | 573 | static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len) |
| 411 | { | 574 | { |
| 412 | unsigned long *p_addr, *bm; | 575 | unsigned long *p_addr, *bm; |
| 576 | unsigned int idx; | ||
| 413 | size_t do_now, end; | 577 | size_t do_now, end; |
| 414 | 578 | ||
| 415 | #define BM_SECTORS_PER_BIT (BM_BLOCK_SIZE/512) | ||
| 416 | |||
| 417 | end = offset + len; | 579 | end = offset + len; |
| 418 | 580 | ||
| 419 | if (end > b->bm_words) { | 581 | if (end > b->bm_words) { |
| @@ -423,15 +585,16 @@ static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len) | |||
| 423 | 585 | ||
| 424 | while (offset < end) { | 586 | while (offset < end) { |
| 425 | do_now = min_t(size_t, ALIGN(offset + 1, LWPP), end) - offset; | 587 | do_now = min_t(size_t, ALIGN(offset + 1, LWPP), end) - offset; |
| 426 | p_addr = bm_map_paddr(b, offset); | 588 | idx = bm_word_to_page_idx(b, offset); |
| 589 | p_addr = bm_map_pidx(b, idx); | ||
| 427 | bm = p_addr + MLPP(offset); | 590 | bm = p_addr + MLPP(offset); |
| 428 | if (bm+do_now > p_addr + LWPP) { | 591 | if (bm+do_now > p_addr + LWPP) { |
| 429 | printk(KERN_ALERT "drbd: BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n", | 592 | printk(KERN_ALERT "drbd: BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n", |
| 430 | p_addr, bm, (int)do_now); | 593 | p_addr, bm, (int)do_now); |
| 431 | break; /* breaks to after catch_oob_access_end() only! */ | 594 | } else |
| 432 | } | 595 | memset(bm, c, do_now * sizeof(long)); |
| 433 | memset(bm, c, do_now * sizeof(long)); | ||
| 434 | bm_unmap(p_addr); | 596 | bm_unmap(p_addr); |
| 597 | bm_set_page_need_writeout(b->bm_pages[idx]); | ||
| 435 | offset += do_now; | 598 | offset += do_now; |
| 436 | } | 599 | } |
| 437 | } | 600 | } |
| @@ -447,7 +610,7 @@ static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len) | |||
| 447 | int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) | 610 | int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) |
| 448 | { | 611 | { |
| 449 | struct drbd_bitmap *b = mdev->bitmap; | 612 | struct drbd_bitmap *b = mdev->bitmap; |
| 450 | unsigned long bits, words, owords, obits, *p_addr, *bm; | 613 | unsigned long bits, words, owords, obits; |
| 451 | unsigned long want, have, onpages; /* number of pages */ | 614 | unsigned long want, have, onpages; /* number of pages */ |
| 452 | struct page **npages, **opages = NULL; | 615 | struct page **npages, **opages = NULL; |
| 453 | int err = 0, growing; | 616 | int err = 0, growing; |
| @@ -455,7 +618,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) | |||
| 455 | 618 | ||
| 456 | ERR_IF(!b) return -ENOMEM; | 619 | ERR_IF(!b) return -ENOMEM; |
| 457 | 620 | ||
| 458 | drbd_bm_lock(mdev, "resize"); | 621 | drbd_bm_lock(mdev, "resize", BM_LOCKED_MASK); |
| 459 | 622 | ||
| 460 | dev_info(DEV, "drbd_bm_resize called with capacity == %llu\n", | 623 | dev_info(DEV, "drbd_bm_resize called with capacity == %llu\n", |
| 461 | (unsigned long long)capacity); | 624 | (unsigned long long)capacity); |
| @@ -463,7 +626,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) | |||
| 463 | if (capacity == b->bm_dev_capacity) | 626 | if (capacity == b->bm_dev_capacity) |
| 464 | goto out; | 627 | goto out; |
| 465 | 628 | ||
| 466 | opages_vmalloced = test_bit(BM_P_VMALLOCED, &b->bm_flags); | 629 | opages_vmalloced = (BM_P_VMALLOCED & b->bm_flags); |
| 467 | 630 | ||
| 468 | if (capacity == 0) { | 631 | if (capacity == 0) { |
| 469 | spin_lock_irq(&b->bm_lock); | 632 | spin_lock_irq(&b->bm_lock); |
| @@ -491,18 +654,23 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) | |||
| 491 | words = ALIGN(bits, 64) >> LN2_BPL; | 654 | words = ALIGN(bits, 64) >> LN2_BPL; |
| 492 | 655 | ||
| 493 | if (get_ldev(mdev)) { | 656 | if (get_ldev(mdev)) { |
| 494 | D_ASSERT((u64)bits <= (((u64)mdev->ldev->md.md_size_sect-MD_BM_OFFSET) << 12)); | 657 | u64 bits_on_disk = ((u64)mdev->ldev->md.md_size_sect-MD_BM_OFFSET) << 12; |
| 495 | put_ldev(mdev); | 658 | put_ldev(mdev); |
| 659 | if (bits > bits_on_disk) { | ||
| 660 | dev_info(DEV, "bits = %lu\n", bits); | ||
| 661 | dev_info(DEV, "bits_on_disk = %llu\n", bits_on_disk); | ||
| 662 | err = -ENOSPC; | ||
| 663 | goto out; | ||
| 664 | } | ||
| 496 | } | 665 | } |
| 497 | 666 | ||
| 498 | /* one extra long to catch off by one errors */ | 667 | want = ALIGN(words*sizeof(long), PAGE_SIZE) >> PAGE_SHIFT; |
| 499 | want = ALIGN((words+1)*sizeof(long), PAGE_SIZE) >> PAGE_SHIFT; | ||
| 500 | have = b->bm_number_of_pages; | 668 | have = b->bm_number_of_pages; |
| 501 | if (want == have) { | 669 | if (want == have) { |
| 502 | D_ASSERT(b->bm_pages != NULL); | 670 | D_ASSERT(b->bm_pages != NULL); |
| 503 | npages = b->bm_pages; | 671 | npages = b->bm_pages; |
| 504 | } else { | 672 | } else { |
| 505 | if (FAULT_ACTIVE(mdev, DRBD_FAULT_BM_ALLOC)) | 673 | if (drbd_insert_fault(mdev, DRBD_FAULT_BM_ALLOC)) |
| 506 | npages = NULL; | 674 | npages = NULL; |
| 507 | else | 675 | else |
| 508 | npages = bm_realloc_pages(b, want); | 676 | npages = bm_realloc_pages(b, want); |
| @@ -542,11 +710,6 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) | |||
| 542 | bm_free_pages(opages + want, have - want); | 710 | bm_free_pages(opages + want, have - want); |
| 543 | } | 711 | } |
| 544 | 712 | ||
| 545 | p_addr = bm_map_paddr(b, words); | ||
| 546 | bm = p_addr + MLPP(words); | ||
| 547 | *bm = DRBD_MAGIC; | ||
| 548 | bm_unmap(p_addr); | ||
| 549 | |||
| 550 | (void)bm_clear_surplus(b); | 713 | (void)bm_clear_surplus(b); |
| 551 | 714 | ||
| 552 | spin_unlock_irq(&b->bm_lock); | 715 | spin_unlock_irq(&b->bm_lock); |
| @@ -554,7 +717,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) | |||
| 554 | bm_vk_free(opages, opages_vmalloced); | 717 | bm_vk_free(opages, opages_vmalloced); |
| 555 | if (!growing) | 718 | if (!growing) |
| 556 | b->bm_set = bm_count_bits(b); | 719 | b->bm_set = bm_count_bits(b); |
| 557 | dev_info(DEV, "resync bitmap: bits=%lu words=%lu\n", bits, words); | 720 | dev_info(DEV, "resync bitmap: bits=%lu words=%lu pages=%lu\n", bits, words, want); |
| 558 | 721 | ||
| 559 | out: | 722 | out: |
| 560 | drbd_bm_unlock(mdev); | 723 | drbd_bm_unlock(mdev); |
| @@ -624,6 +787,7 @@ void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number, | |||
| 624 | struct drbd_bitmap *b = mdev->bitmap; | 787 | struct drbd_bitmap *b = mdev->bitmap; |
| 625 | unsigned long *p_addr, *bm; | 788 | unsigned long *p_addr, *bm; |
| 626 | unsigned long word, bits; | 789 | unsigned long word, bits; |
| 790 | unsigned int idx; | ||
| 627 | size_t end, do_now; | 791 | size_t end, do_now; |
| 628 | 792 | ||
| 629 | end = offset + number; | 793 | end = offset + number; |
| @@ -638,16 +802,18 @@ void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number, | |||
| 638 | spin_lock_irq(&b->bm_lock); | 802 | spin_lock_irq(&b->bm_lock); |
| 639 | while (offset < end) { | 803 | while (offset < end) { |
| 640 | do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset; | 804 | do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset; |
| 641 | p_addr = bm_map_paddr(b, offset); | 805 | idx = bm_word_to_page_idx(b, offset); |
| 806 | p_addr = bm_map_pidx(b, idx); | ||
| 642 | bm = p_addr + MLPP(offset); | 807 | bm = p_addr + MLPP(offset); |
| 643 | offset += do_now; | 808 | offset += do_now; |
| 644 | while (do_now--) { | 809 | while (do_now--) { |
| 645 | bits = hweight_long(*bm); | 810 | bits = hweight_long(*bm); |
| 646 | word = *bm | lel_to_cpu(*buffer++); | 811 | word = *bm | *buffer++; |
| 647 | *bm++ = word; | 812 | *bm++ = word; |
| 648 | b->bm_set += hweight_long(word) - bits; | 813 | b->bm_set += hweight_long(word) - bits; |
| 649 | } | 814 | } |
| 650 | bm_unmap(p_addr); | 815 | bm_unmap(p_addr); |
| 816 | bm_set_page_need_writeout(b->bm_pages[idx]); | ||
| 651 | } | 817 | } |
| 652 | /* with 32bit <-> 64bit cross-platform connect | 818 | /* with 32bit <-> 64bit cross-platform connect |
| 653 | * this is only correct for current usage, | 819 | * this is only correct for current usage, |
| @@ -656,7 +822,6 @@ void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number, | |||
| 656 | */ | 822 | */ |
| 657 | if (end == b->bm_words) | 823 | if (end == b->bm_words) |
| 658 | b->bm_set -= bm_clear_surplus(b); | 824 | b->bm_set -= bm_clear_surplus(b); |
| 659 | |||
| 660 | spin_unlock_irq(&b->bm_lock); | 825 | spin_unlock_irq(&b->bm_lock); |
| 661 | } | 826 | } |
| 662 | 827 | ||
| @@ -686,11 +851,11 @@ void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number, | |||
| 686 | else { | 851 | else { |
| 687 | while (offset < end) { | 852 | while (offset < end) { |
| 688 | do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset; | 853 | do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset; |
| 689 | p_addr = bm_map_paddr(b, offset); | 854 | p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, offset)); |
| 690 | bm = p_addr + MLPP(offset); | 855 | bm = p_addr + MLPP(offset); |
| 691 | offset += do_now; | 856 | offset += do_now; |
| 692 | while (do_now--) | 857 | while (do_now--) |
| 693 | *buffer++ = cpu_to_lel(*bm++); | 858 | *buffer++ = *bm++; |
| 694 | bm_unmap(p_addr); | 859 | bm_unmap(p_addr); |
| 695 | } | 860 | } |
| 696 | } | 861 | } |
| @@ -724,9 +889,22 @@ void drbd_bm_clear_all(struct drbd_conf *mdev) | |||
| 724 | spin_unlock_irq(&b->bm_lock); | 889 | spin_unlock_irq(&b->bm_lock); |
| 725 | } | 890 | } |
| 726 | 891 | ||
| 892 | struct bm_aio_ctx { | ||
| 893 | struct drbd_conf *mdev; | ||
| 894 | atomic_t in_flight; | ||
| 895 | struct completion done; | ||
| 896 | unsigned flags; | ||
| 897 | #define BM_AIO_COPY_PAGES 1 | ||
| 898 | int error; | ||
| 899 | }; | ||
| 900 | |||
| 901 | /* bv_page may be a copy, or may be the original */ | ||
| 727 | static void bm_async_io_complete(struct bio *bio, int error) | 902 | static void bm_async_io_complete(struct bio *bio, int error) |
| 728 | { | 903 | { |
| 729 | struct drbd_bitmap *b = bio->bi_private; | 904 | struct bm_aio_ctx *ctx = bio->bi_private; |
| 905 | struct drbd_conf *mdev = ctx->mdev; | ||
| 906 | struct drbd_bitmap *b = mdev->bitmap; | ||
| 907 | unsigned int idx = bm_page_to_idx(bio->bi_io_vec[0].bv_page); | ||
| 730 | int uptodate = bio_flagged(bio, BIO_UPTODATE); | 908 | int uptodate = bio_flagged(bio, BIO_UPTODATE); |
| 731 | 909 | ||
| 732 | 910 | ||
| @@ -737,38 +915,83 @@ static void bm_async_io_complete(struct bio *bio, int error) | |||
| 737 | if (!error && !uptodate) | 915 | if (!error && !uptodate) |
| 738 | error = -EIO; | 916 | error = -EIO; |
| 739 | 917 | ||
| 918 | if ((ctx->flags & BM_AIO_COPY_PAGES) == 0 && | ||
| 919 | !bm_test_page_unchanged(b->bm_pages[idx])) | ||
| 920 | dev_warn(DEV, "bitmap page idx %u changed during IO!\n", idx); | ||
| 921 | |||
| 740 | if (error) { | 922 | if (error) { |
| 741 | /* doh. what now? | 923 | /* ctx error will hold the completed-last non-zero error code, |
| 742 | * for now, set all bits, and flag MD_IO_ERROR */ | 924 | * in case error codes differ. */ |
| 743 | __set_bit(BM_MD_IO_ERROR, &b->bm_flags); | 925 | ctx->error = error; |
| 926 | bm_set_page_io_err(b->bm_pages[idx]); | ||
| 927 | /* Not identical to on disk version of it. | ||
| 928 | * Is BM_PAGE_IO_ERROR enough? */ | ||
| 929 | if (__ratelimit(&drbd_ratelimit_state)) | ||
| 930 | dev_err(DEV, "IO ERROR %d on bitmap page idx %u\n", | ||
| 931 | error, idx); | ||
| 932 | } else { | ||
| 933 | bm_clear_page_io_err(b->bm_pages[idx]); | ||
| 934 | dynamic_dev_dbg(DEV, "bitmap page idx %u completed\n", idx); | ||
| 744 | } | 935 | } |
| 745 | if (atomic_dec_and_test(&b->bm_async_io)) | 936 | |
| 746 | wake_up(&b->bm_io_wait); | 937 | bm_page_unlock_io(mdev, idx); |
| 938 | |||
| 939 | /* FIXME give back to page pool */ | ||
| 940 | if (ctx->flags & BM_AIO_COPY_PAGES) | ||
| 941 | put_page(bio->bi_io_vec[0].bv_page); | ||
| 747 | 942 | ||
| 748 | bio_put(bio); | 943 | bio_put(bio); |
| 944 | |||
| 945 | if (atomic_dec_and_test(&ctx->in_flight)) | ||
| 946 | complete(&ctx->done); | ||
| 749 | } | 947 | } |
| 750 | 948 | ||
| 751 | static void bm_page_io_async(struct drbd_conf *mdev, struct drbd_bitmap *b, int page_nr, int rw) __must_hold(local) | 949 | static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local) |
| 752 | { | 950 | { |
| 753 | /* we are process context. we always get a bio */ | 951 | /* we are process context. we always get a bio */ |
| 754 | struct bio *bio = bio_alloc(GFP_KERNEL, 1); | 952 | struct bio *bio = bio_alloc(GFP_KERNEL, 1); |
| 953 | struct drbd_conf *mdev = ctx->mdev; | ||
| 954 | struct drbd_bitmap *b = mdev->bitmap; | ||
| 955 | struct page *page; | ||
| 755 | unsigned int len; | 956 | unsigned int len; |
| 957 | |||
| 756 | sector_t on_disk_sector = | 958 | sector_t on_disk_sector = |
| 757 | mdev->ldev->md.md_offset + mdev->ldev->md.bm_offset; | 959 | mdev->ldev->md.md_offset + mdev->ldev->md.bm_offset; |
| 758 | on_disk_sector += ((sector_t)page_nr) << (PAGE_SHIFT-9); | 960 | on_disk_sector += ((sector_t)page_nr) << (PAGE_SHIFT-9); |
| 759 | 961 | ||
| 760 | /* this might happen with very small | 962 | /* this might happen with very small |
| 761 | * flexible external meta data device */ | 963 | * flexible external meta data device, |
| 964 | * or with PAGE_SIZE > 4k */ | ||
| 762 | len = min_t(unsigned int, PAGE_SIZE, | 965 | len = min_t(unsigned int, PAGE_SIZE, |
| 763 | (drbd_md_last_sector(mdev->ldev) - on_disk_sector + 1)<<9); | 966 | (drbd_md_last_sector(mdev->ldev) - on_disk_sector + 1)<<9); |
| 764 | 967 | ||
| 968 | /* serialize IO on this page */ | ||
| 969 | bm_page_lock_io(mdev, page_nr); | ||
| 970 | /* before memcpy and submit, | ||
| 971 | * so it can be redirtied any time */ | ||
| 972 | bm_set_page_unchanged(b->bm_pages[page_nr]); | ||
| 973 | |||
| 974 | if (ctx->flags & BM_AIO_COPY_PAGES) { | ||
| 975 | /* FIXME alloc_page is good enough for now, but actually needs | ||
| 976 | * to use pre-allocated page pool */ | ||
| 977 | void *src, *dest; | ||
| 978 | page = alloc_page(__GFP_HIGHMEM|__GFP_WAIT); | ||
| 979 | dest = kmap_atomic(page, KM_USER0); | ||
| 980 | src = kmap_atomic(b->bm_pages[page_nr], KM_USER1); | ||
| 981 | memcpy(dest, src, PAGE_SIZE); | ||
| 982 | kunmap_atomic(src, KM_USER1); | ||
| 983 | kunmap_atomic(dest, KM_USER0); | ||
| 984 | bm_store_page_idx(page, page_nr); | ||
| 985 | } else | ||
| 986 | page = b->bm_pages[page_nr]; | ||
| 987 | |||
| 765 | bio->bi_bdev = mdev->ldev->md_bdev; | 988 | bio->bi_bdev = mdev->ldev->md_bdev; |
| 766 | bio->bi_sector = on_disk_sector; | 989 | bio->bi_sector = on_disk_sector; |
| 767 | bio_add_page(bio, b->bm_pages[page_nr], len, 0); | 990 | bio_add_page(bio, page, len, 0); |
| 768 | bio->bi_private = b; | 991 | bio->bi_private = ctx; |
| 769 | bio->bi_end_io = bm_async_io_complete; | 992 | bio->bi_end_io = bm_async_io_complete; |
| 770 | 993 | ||
| 771 | if (FAULT_ACTIVE(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) { | 994 | if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) { |
| 772 | bio->bi_rw |= rw; | 995 | bio->bi_rw |= rw; |
| 773 | bio_endio(bio, -EIO); | 996 | bio_endio(bio, -EIO); |
| 774 | } else { | 997 | } else { |
| @@ -776,87 +999,84 @@ static void bm_page_io_async(struct drbd_conf *mdev, struct drbd_bitmap *b, int | |||
| 776 | } | 999 | } |
| 777 | } | 1000 | } |
| 778 | 1001 | ||
| 779 | # if defined(__LITTLE_ENDIAN) | ||
| 780 | /* nothing to do, on disk == in memory */ | ||
| 781 | # define bm_cpu_to_lel(x) ((void)0) | ||
| 782 | # else | ||
| 783 | static void bm_cpu_to_lel(struct drbd_bitmap *b) | ||
| 784 | { | ||
| 785 | /* need to cpu_to_lel all the pages ... | ||
| 786 | * this may be optimized by using | ||
| 787 | * cpu_to_lel(-1) == -1 and cpu_to_lel(0) == 0; | ||
| 788 | * the following is still not optimal, but better than nothing */ | ||
| 789 | unsigned int i; | ||
| 790 | unsigned long *p_addr, *bm; | ||
| 791 | if (b->bm_set == 0) { | ||
| 792 | /* no page at all; avoid swap if all is 0 */ | ||
| 793 | i = b->bm_number_of_pages; | ||
| 794 | } else if (b->bm_set == b->bm_bits) { | ||
| 795 | /* only the last page */ | ||
| 796 | i = b->bm_number_of_pages - 1; | ||
| 797 | } else { | ||
| 798 | /* all pages */ | ||
| 799 | i = 0; | ||
| 800 | } | ||
| 801 | for (; i < b->bm_number_of_pages; i++) { | ||
| 802 | p_addr = kmap_atomic(b->bm_pages[i], KM_USER0); | ||
| 803 | for (bm = p_addr; bm < p_addr + PAGE_SIZE/sizeof(long); bm++) | ||
| 804 | *bm = cpu_to_lel(*bm); | ||
| 805 | kunmap_atomic(p_addr, KM_USER0); | ||
| 806 | } | ||
| 807 | } | ||
| 808 | # endif | ||
| 809 | /* lel_to_cpu == cpu_to_lel */ | ||
| 810 | # define bm_lel_to_cpu(x) bm_cpu_to_lel(x) | ||
| 811 | |||
| 812 | /* | 1002 | /* |
| 813 | * bm_rw: read/write the whole bitmap from/to its on disk location. | 1003 | * bm_rw: read/write the whole bitmap from/to its on disk location. |
| 814 | */ | 1004 | */ |
| 815 | static int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local) | 1005 | static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_idx) __must_hold(local) |
| 816 | { | 1006 | { |
| 1007 | struct bm_aio_ctx ctx = { | ||
| 1008 | .mdev = mdev, | ||
| 1009 | .in_flight = ATOMIC_INIT(1), | ||
| 1010 | .done = COMPLETION_INITIALIZER_ONSTACK(ctx.done), | ||
| 1011 | .flags = lazy_writeout_upper_idx ? BM_AIO_COPY_PAGES : 0, | ||
| 1012 | }; | ||
| 817 | struct drbd_bitmap *b = mdev->bitmap; | 1013 | struct drbd_bitmap *b = mdev->bitmap; |
| 818 | /* sector_t sector; */ | 1014 | int num_pages, i, count = 0; |
| 819 | int bm_words, num_pages, i; | ||
| 820 | unsigned long now; | 1015 | unsigned long now; |
| 821 | char ppb[10]; | 1016 | char ppb[10]; |
| 822 | int err = 0; | 1017 | int err = 0; |
| 823 | 1018 | ||
| 824 | WARN_ON(!bm_is_locked(b)); | 1019 | /* |
| 825 | 1020 | * We are protected against bitmap disappearing/resizing by holding an | |
| 826 | /* no spinlock here, the drbd_bm_lock should be enough! */ | 1021 | * ldev reference (caller must have called get_ldev()). |
| 827 | 1022 | * For read/write, we are protected against changes to the bitmap by | |
| 828 | bm_words = drbd_bm_words(mdev); | 1023 | * the bitmap lock (see drbd_bitmap_io). |
| 829 | num_pages = (bm_words*sizeof(long) + PAGE_SIZE-1) >> PAGE_SHIFT; | 1024 | * For lazy writeout, we don't care for ongoing changes to the bitmap, |
| 1025 | * as we submit copies of pages anyways. | ||
| 1026 | */ | ||
| 1027 | if (!ctx.flags) | ||
| 1028 | WARN_ON(!(BM_LOCKED_MASK & b->bm_flags)); | ||
| 830 | 1029 | ||
| 831 | /* on disk bitmap is little endian */ | 1030 | num_pages = b->bm_number_of_pages; |
| 832 | if (rw == WRITE) | ||
| 833 | bm_cpu_to_lel(b); | ||
| 834 | 1031 | ||
| 835 | now = jiffies; | 1032 | now = jiffies; |
| 836 | atomic_set(&b->bm_async_io, num_pages); | ||
| 837 | __clear_bit(BM_MD_IO_ERROR, &b->bm_flags); | ||
| 838 | 1033 | ||
| 839 | /* let the layers below us try to merge these bios... */ | 1034 | /* let the layers below us try to merge these bios... */ |
| 840 | for (i = 0; i < num_pages; i++) | 1035 | for (i = 0; i < num_pages; i++) { |
| 841 | bm_page_io_async(mdev, b, i, rw); | 1036 | /* ignore completely unchanged pages */ |
| 1037 | if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx) | ||
| 1038 | break; | ||
| 1039 | if (rw & WRITE) { | ||
| 1040 | if (bm_test_page_unchanged(b->bm_pages[i])) { | ||
| 1041 | dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i); | ||
| 1042 | continue; | ||
| 1043 | } | ||
| 1044 | /* during lazy writeout, | ||
| 1045 | * ignore those pages not marked for lazy writeout. */ | ||
| 1046 | if (lazy_writeout_upper_idx && | ||
| 1047 | !bm_test_page_lazy_writeout(b->bm_pages[i])) { | ||
| 1048 | dynamic_dev_dbg(DEV, "skipped bm lazy write for idx %u\n", i); | ||
| 1049 | continue; | ||
| 1050 | } | ||
| 1051 | } | ||
| 1052 | atomic_inc(&ctx.in_flight); | ||
| 1053 | bm_page_io_async(&ctx, i, rw); | ||
| 1054 | ++count; | ||
| 1055 | cond_resched(); | ||
| 1056 | } | ||
| 842 | 1057 | ||
| 843 | wait_event(b->bm_io_wait, atomic_read(&b->bm_async_io) == 0); | 1058 | /* |
| 1059 | * We initialize ctx.in_flight to one to make sure bm_async_io_complete | ||
| 1060 | * will not complete() early, and decrement / test it here. If there | ||
| 1061 | * are still some bios in flight, we need to wait for them here. | ||
| 1062 | */ | ||
| 1063 | if (!atomic_dec_and_test(&ctx.in_flight)) | ||
| 1064 | wait_for_completion(&ctx.done); | ||
| 1065 | dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n", | ||
| 1066 | rw == WRITE ? "WRITE" : "READ", | ||
| 1067 | count, jiffies - now); | ||
| 844 | 1068 | ||
| 845 | if (test_bit(BM_MD_IO_ERROR, &b->bm_flags)) { | 1069 | if (ctx.error) { |
| 846 | dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); | 1070 | dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); |
| 847 | drbd_chk_io_error(mdev, 1, TRUE); | 1071 | drbd_chk_io_error(mdev, 1, true); |
| 848 | err = -EIO; | 1072 | err = -EIO; /* ctx.error ? */ |
| 849 | } | 1073 | } |
| 850 | 1074 | ||
| 851 | now = jiffies; | 1075 | now = jiffies; |
| 852 | if (rw == WRITE) { | 1076 | if (rw == WRITE) { |
| 853 | /* swap back endianness */ | ||
| 854 | bm_lel_to_cpu(b); | ||
| 855 | /* flush bitmap to stable storage */ | ||
| 856 | drbd_md_flush(mdev); | 1077 | drbd_md_flush(mdev); |
| 857 | } else /* rw == READ */ { | 1078 | } else /* rw == READ */ { |
| 858 | /* just read, if necessary adjust endianness */ | 1079 | b->bm_set = bm_count_bits(b); |
| 859 | b->bm_set = bm_count_bits_swap_endian(b); | ||
| 860 | dev_info(DEV, "recounting of set bits took additional %lu jiffies\n", | 1080 | dev_info(DEV, "recounting of set bits took additional %lu jiffies\n", |
| 861 | jiffies - now); | 1081 | jiffies - now); |
| 862 | } | 1082 | } |
| @@ -874,112 +1094,128 @@ static int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local) | |||
| 874 | */ | 1094 | */ |
| 875 | int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local) | 1095 | int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local) |
| 876 | { | 1096 | { |
| 877 | return bm_rw(mdev, READ); | 1097 | return bm_rw(mdev, READ, 0); |
| 878 | } | 1098 | } |
| 879 | 1099 | ||
| 880 | /** | 1100 | /** |
| 881 | * drbd_bm_write() - Write the whole bitmap to its on disk location. | 1101 | * drbd_bm_write() - Write the whole bitmap to its on disk location. |
| 882 | * @mdev: DRBD device. | 1102 | * @mdev: DRBD device. |
| 1103 | * | ||
| 1104 | * Will only write pages that have changed since last IO. | ||
| 883 | */ | 1105 | */ |
| 884 | int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local) | 1106 | int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local) |
| 885 | { | 1107 | { |
| 886 | return bm_rw(mdev, WRITE); | 1108 | return bm_rw(mdev, WRITE, 0); |
| 887 | } | 1109 | } |
| 888 | 1110 | ||
| 889 | /** | 1111 | /** |
| 890 | * drbd_bm_write_sect: Writes a 512 (MD_SECTOR_SIZE) byte piece of the bitmap | 1112 | * drbd_bm_lazy_write_out() - Write bitmap pages 0 to @upper_idx-1, if they have changed. |
| 891 | * @mdev: DRBD device. | 1113 | * @mdev: DRBD device. |
| 892 | * @enr: Extent number in the resync lru (happens to be sector offset) | 1114 | * @upper_idx: 0: write all changed pages; +ve: page index to stop scanning for changed pages |
| 893 | * | ||
| 894 | * The BM_EXT_SIZE is on purpose exactly the amount of the bitmap covered | ||
| 895 | * by a single sector write. Therefore enr == sector offset from the | ||
| 896 | * start of the bitmap. | ||
| 897 | */ | 1115 | */ |
| 898 | int drbd_bm_write_sect(struct drbd_conf *mdev, unsigned long enr) __must_hold(local) | 1116 | int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(local) |
| 899 | { | 1117 | { |
| 900 | sector_t on_disk_sector = enr + mdev->ldev->md.md_offset | 1118 | return bm_rw(mdev, WRITE, upper_idx); |
| 901 | + mdev->ldev->md.bm_offset; | 1119 | } |
| 902 | int bm_words, num_words, offset; | 1120 | |
| 903 | int err = 0; | ||
| 904 | 1121 | ||
| 905 | mutex_lock(&mdev->md_io_mutex); | 1122 | /** |
| 906 | bm_words = drbd_bm_words(mdev); | 1123 | * drbd_bm_write_page: Writes a PAGE_SIZE aligned piece of bitmap |
| 907 | offset = S2W(enr); /* word offset into bitmap */ | 1124 | * @mdev: DRBD device. |
| 908 | num_words = min(S2W(1), bm_words - offset); | 1125 | * @idx: bitmap page index |
| 909 | if (num_words < S2W(1)) | 1126 | * |
| 910 | memset(page_address(mdev->md_io_page), 0, MD_SECTOR_SIZE); | 1127 | * We don't want to special case on logical_block_size of the backend device, |
| 911 | drbd_bm_get_lel(mdev, offset, num_words, | 1128 | * so we submit PAGE_SIZE aligned pieces. |
| 912 | page_address(mdev->md_io_page)); | 1129 | * Note that on "most" systems, PAGE_SIZE is 4k. |
| 913 | if (!drbd_md_sync_page_io(mdev, mdev->ldev, on_disk_sector, WRITE)) { | 1130 | * |
| 914 | int i; | 1131 | * In case this becomes an issue on systems with larger PAGE_SIZE, |
| 915 | err = -EIO; | 1132 | * we may want to change this again to write 4k aligned 4k pieces. |
| 916 | dev_err(DEV, "IO ERROR writing bitmap sector %lu " | 1133 | */ |
| 917 | "(meta-disk sector %llus)\n", | 1134 | int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local) |
| 918 | enr, (unsigned long long)on_disk_sector); | 1135 | { |
| 919 | drbd_chk_io_error(mdev, 1, TRUE); | 1136 | struct bm_aio_ctx ctx = { |
| 920 | for (i = 0; i < AL_EXT_PER_BM_SECT; i++) | 1137 | .mdev = mdev, |
| 921 | drbd_bm_ALe_set_all(mdev, enr*AL_EXT_PER_BM_SECT+i); | 1138 | .in_flight = ATOMIC_INIT(1), |
| 1139 | .done = COMPLETION_INITIALIZER_ONSTACK(ctx.done), | ||
| 1140 | .flags = BM_AIO_COPY_PAGES, | ||
| 1141 | }; | ||
| 1142 | |||
| 1143 | if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) { | ||
| 1144 | dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx); | ||
| 1145 | return 0; | ||
| 922 | } | 1146 | } |
| 1147 | |||
| 1148 | bm_page_io_async(&ctx, idx, WRITE_SYNC); | ||
| 1149 | wait_for_completion(&ctx.done); | ||
| 1150 | |||
| 1151 | if (ctx.error) | ||
| 1152 | drbd_chk_io_error(mdev, 1, true); | ||
| 1153 | /* that should force detach, so the in memory bitmap will be | ||
| 1154 | * gone in a moment as well. */ | ||
| 1155 | |||
| 923 | mdev->bm_writ_cnt++; | 1156 | mdev->bm_writ_cnt++; |
| 924 | mutex_unlock(&mdev->md_io_mutex); | 1157 | return ctx.error; |
| 925 | return err; | ||
| 926 | } | 1158 | } |
| 927 | 1159 | ||
| 928 | /* NOTE | 1160 | /* NOTE |
| 929 | * find_first_bit returns int, we return unsigned long. | 1161 | * find_first_bit returns int, we return unsigned long. |
| 930 | * should not make much difference anyways, but ... | 1162 | * For this to work on 32bit arch with bitnumbers > (1<<32), |
| 1163 | * we'd need to return u64, and get a whole lot of other places | ||
| 1164 | * fixed where we still use unsigned long. | ||
| 931 | * | 1165 | * |
| 932 | * this returns a bit number, NOT a sector! | 1166 | * this returns a bit number, NOT a sector! |
| 933 | */ | 1167 | */ |
| 934 | #define BPP_MASK ((1UL << (PAGE_SHIFT+3)) - 1) | ||
| 935 | static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo, | 1168 | static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo, |
| 936 | const int find_zero_bit, const enum km_type km) | 1169 | const int find_zero_bit, const enum km_type km) |
| 937 | { | 1170 | { |
| 938 | struct drbd_bitmap *b = mdev->bitmap; | 1171 | struct drbd_bitmap *b = mdev->bitmap; |
| 939 | unsigned long i = -1UL; | ||
| 940 | unsigned long *p_addr; | 1172 | unsigned long *p_addr; |
| 941 | unsigned long bit_offset; /* bit offset of the mapped page. */ | 1173 | unsigned long bit_offset; |
| 1174 | unsigned i; | ||
| 1175 | |||
| 942 | 1176 | ||
| 943 | if (bm_fo > b->bm_bits) { | 1177 | if (bm_fo > b->bm_bits) { |
| 944 | dev_err(DEV, "bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits); | 1178 | dev_err(DEV, "bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits); |
| 1179 | bm_fo = DRBD_END_OF_BITMAP; | ||
| 945 | } else { | 1180 | } else { |
| 946 | while (bm_fo < b->bm_bits) { | 1181 | while (bm_fo < b->bm_bits) { |
| 947 | unsigned long offset; | 1182 | /* bit offset of the first bit in the page */ |
| 948 | bit_offset = bm_fo & ~BPP_MASK; /* bit offset of the page */ | 1183 | bit_offset = bm_fo & ~BITS_PER_PAGE_MASK; |
| 949 | offset = bit_offset >> LN2_BPL; /* word offset of the page */ | 1184 | p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo), km); |
| 950 | p_addr = __bm_map_paddr(b, offset, km); | ||
| 951 | 1185 | ||
| 952 | if (find_zero_bit) | 1186 | if (find_zero_bit) |
| 953 | i = find_next_zero_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK); | 1187 | i = generic_find_next_zero_le_bit(p_addr, |
| 1188 | PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK); | ||
| 954 | else | 1189 | else |
| 955 | i = find_next_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK); | 1190 | i = generic_find_next_le_bit(p_addr, |
| 1191 | PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK); | ||
| 956 | 1192 | ||
| 957 | __bm_unmap(p_addr, km); | 1193 | __bm_unmap(p_addr, km); |
| 958 | if (i < PAGE_SIZE*8) { | 1194 | if (i < PAGE_SIZE*8) { |
| 959 | i = bit_offset + i; | 1195 | bm_fo = bit_offset + i; |
| 960 | if (i >= b->bm_bits) | 1196 | if (bm_fo >= b->bm_bits) |
| 961 | break; | 1197 | break; |
| 962 | goto found; | 1198 | goto found; |
| 963 | } | 1199 | } |
| 964 | bm_fo = bit_offset + PAGE_SIZE*8; | 1200 | bm_fo = bit_offset + PAGE_SIZE*8; |
| 965 | } | 1201 | } |
| 966 | i = -1UL; | 1202 | bm_fo = DRBD_END_OF_BITMAP; |
| 967 | } | 1203 | } |
| 968 | found: | 1204 | found: |
| 969 | return i; | 1205 | return bm_fo; |
| 970 | } | 1206 | } |
| 971 | 1207 | ||
| 972 | static unsigned long bm_find_next(struct drbd_conf *mdev, | 1208 | static unsigned long bm_find_next(struct drbd_conf *mdev, |
| 973 | unsigned long bm_fo, const int find_zero_bit) | 1209 | unsigned long bm_fo, const int find_zero_bit) |
| 974 | { | 1210 | { |
| 975 | struct drbd_bitmap *b = mdev->bitmap; | 1211 | struct drbd_bitmap *b = mdev->bitmap; |
| 976 | unsigned long i = -1UL; | 1212 | unsigned long i = DRBD_END_OF_BITMAP; |
| 977 | 1213 | ||
| 978 | ERR_IF(!b) return i; | 1214 | ERR_IF(!b) return i; |
| 979 | ERR_IF(!b->bm_pages) return i; | 1215 | ERR_IF(!b->bm_pages) return i; |
| 980 | 1216 | ||
| 981 | spin_lock_irq(&b->bm_lock); | 1217 | spin_lock_irq(&b->bm_lock); |
| 982 | if (bm_is_locked(b)) | 1218 | if (BM_DONT_TEST & b->bm_flags) |
| 983 | bm_print_lock_info(mdev); | 1219 | bm_print_lock_info(mdev); |
| 984 | 1220 | ||
| 985 | i = __bm_find_next(mdev, bm_fo, find_zero_bit, KM_IRQ1); | 1221 | i = __bm_find_next(mdev, bm_fo, find_zero_bit, KM_IRQ1); |
| @@ -1005,13 +1241,13 @@ unsigned long drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo | |||
| 1005 | * you must take drbd_bm_lock() first */ | 1241 | * you must take drbd_bm_lock() first */ |
| 1006 | unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo) | 1242 | unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo) |
| 1007 | { | 1243 | { |
| 1008 | /* WARN_ON(!bm_is_locked(mdev)); */ | 1244 | /* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */ |
| 1009 | return __bm_find_next(mdev, bm_fo, 0, KM_USER1); | 1245 | return __bm_find_next(mdev, bm_fo, 0, KM_USER1); |
| 1010 | } | 1246 | } |
| 1011 | 1247 | ||
| 1012 | unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo) | 1248 | unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo) |
| 1013 | { | 1249 | { |
| 1014 | /* WARN_ON(!bm_is_locked(mdev)); */ | 1250 | /* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */ |
| 1015 | return __bm_find_next(mdev, bm_fo, 1, KM_USER1); | 1251 | return __bm_find_next(mdev, bm_fo, 1, KM_USER1); |
| 1016 | } | 1252 | } |
| 1017 | 1253 | ||
| @@ -1027,8 +1263,9 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, | |||
| 1027 | struct drbd_bitmap *b = mdev->bitmap; | 1263 | struct drbd_bitmap *b = mdev->bitmap; |
| 1028 | unsigned long *p_addr = NULL; | 1264 | unsigned long *p_addr = NULL; |
| 1029 | unsigned long bitnr; | 1265 | unsigned long bitnr; |
| 1030 | unsigned long last_page_nr = -1UL; | 1266 | unsigned int last_page_nr = -1U; |
| 1031 | int c = 0; | 1267 | int c = 0; |
| 1268 | int changed_total = 0; | ||
| 1032 | 1269 | ||
| 1033 | if (e >= b->bm_bits) { | 1270 | if (e >= b->bm_bits) { |
| 1034 | dev_err(DEV, "ASSERT FAILED: bit_s=%lu bit_e=%lu bm_bits=%lu\n", | 1271 | dev_err(DEV, "ASSERT FAILED: bit_s=%lu bit_e=%lu bm_bits=%lu\n", |
| @@ -1036,23 +1273,33 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, | |||
| 1036 | e = b->bm_bits ? b->bm_bits -1 : 0; | 1273 | e = b->bm_bits ? b->bm_bits -1 : 0; |
| 1037 | } | 1274 | } |
| 1038 | for (bitnr = s; bitnr <= e; bitnr++) { | 1275 | for (bitnr = s; bitnr <= e; bitnr++) { |
| 1039 | unsigned long offset = bitnr>>LN2_BPL; | 1276 | unsigned int page_nr = bm_bit_to_page_idx(b, bitnr); |
| 1040 | unsigned long page_nr = offset >> (PAGE_SHIFT - LN2_BPL + 3); | ||
| 1041 | if (page_nr != last_page_nr) { | 1277 | if (page_nr != last_page_nr) { |
| 1042 | if (p_addr) | 1278 | if (p_addr) |
| 1043 | __bm_unmap(p_addr, km); | 1279 | __bm_unmap(p_addr, km); |
| 1044 | p_addr = __bm_map_paddr(b, offset, km); | 1280 | if (c < 0) |
| 1281 | bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]); | ||
| 1282 | else if (c > 0) | ||
| 1283 | bm_set_page_need_writeout(b->bm_pages[last_page_nr]); | ||
| 1284 | changed_total += c; | ||
| 1285 | c = 0; | ||
| 1286 | p_addr = __bm_map_pidx(b, page_nr, km); | ||
| 1045 | last_page_nr = page_nr; | 1287 | last_page_nr = page_nr; |
| 1046 | } | 1288 | } |
| 1047 | if (val) | 1289 | if (val) |
| 1048 | c += (0 == __test_and_set_bit(bitnr & BPP_MASK, p_addr)); | 1290 | c += (0 == generic___test_and_set_le_bit(bitnr & BITS_PER_PAGE_MASK, p_addr)); |
| 1049 | else | 1291 | else |
| 1050 | c -= (0 != __test_and_clear_bit(bitnr & BPP_MASK, p_addr)); | 1292 | c -= (0 != generic___test_and_clear_le_bit(bitnr & BITS_PER_PAGE_MASK, p_addr)); |
| 1051 | } | 1293 | } |
| 1052 | if (p_addr) | 1294 | if (p_addr) |
| 1053 | __bm_unmap(p_addr, km); | 1295 | __bm_unmap(p_addr, km); |
| 1054 | b->bm_set += c; | 1296 | if (c < 0) |
| 1055 | return c; | 1297 | bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]); |
| 1298 | else if (c > 0) | ||
| 1299 | bm_set_page_need_writeout(b->bm_pages[last_page_nr]); | ||
| 1300 | changed_total += c; | ||
| 1301 | b->bm_set += changed_total; | ||
| 1302 | return changed_total; | ||
| 1056 | } | 1303 | } |
| 1057 | 1304 | ||
| 1058 | /* returns number of bits actually changed. | 1305 | /* returns number of bits actually changed. |
| @@ -1070,7 +1317,7 @@ static int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, | |||
| 1070 | ERR_IF(!b->bm_pages) return 0; | 1317 | ERR_IF(!b->bm_pages) return 0; |
| 1071 | 1318 | ||
| 1072 | spin_lock_irqsave(&b->bm_lock, flags); | 1319 | spin_lock_irqsave(&b->bm_lock, flags); |
| 1073 | if (bm_is_locked(b)) | 1320 | if ((val ? BM_DONT_SET : BM_DONT_CLEAR) & b->bm_flags) |
| 1074 | bm_print_lock_info(mdev); | 1321 | bm_print_lock_info(mdev); |
| 1075 | 1322 | ||
| 1076 | c = __bm_change_bits_to(mdev, s, e, val, KM_IRQ1); | 1323 | c = __bm_change_bits_to(mdev, s, e, val, KM_IRQ1); |
| @@ -1187,12 +1434,11 @@ int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr) | |||
| 1187 | ERR_IF(!b->bm_pages) return 0; | 1434 | ERR_IF(!b->bm_pages) return 0; |
| 1188 | 1435 | ||
| 1189 | spin_lock_irqsave(&b->bm_lock, flags); | 1436 | spin_lock_irqsave(&b->bm_lock, flags); |
| 1190 | if (bm_is_locked(b)) | 1437 | if (BM_DONT_TEST & b->bm_flags) |
| 1191 | bm_print_lock_info(mdev); | 1438 | bm_print_lock_info(mdev); |
| 1192 | if (bitnr < b->bm_bits) { | 1439 | if (bitnr < b->bm_bits) { |
| 1193 | unsigned long offset = bitnr>>LN2_BPL; | 1440 | p_addr = bm_map_pidx(b, bm_bit_to_page_idx(b, bitnr)); |
| 1194 | p_addr = bm_map_paddr(b, offset); | 1441 | i = generic_test_le_bit(bitnr & BITS_PER_PAGE_MASK, p_addr) ? 1 : 0; |
| 1195 | i = test_bit(bitnr & BPP_MASK, p_addr) ? 1 : 0; | ||
| 1196 | bm_unmap(p_addr); | 1442 | bm_unmap(p_addr); |
| 1197 | } else if (bitnr == b->bm_bits) { | 1443 | } else if (bitnr == b->bm_bits) { |
| 1198 | i = -1; | 1444 | i = -1; |
| @@ -1210,10 +1456,10 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi | |||
| 1210 | { | 1456 | { |
| 1211 | unsigned long flags; | 1457 | unsigned long flags; |
| 1212 | struct drbd_bitmap *b = mdev->bitmap; | 1458 | struct drbd_bitmap *b = mdev->bitmap; |
| 1213 | unsigned long *p_addr = NULL, page_nr = -1; | 1459 | unsigned long *p_addr = NULL; |
| 1214 | unsigned long bitnr; | 1460 | unsigned long bitnr; |
| 1461 | unsigned int page_nr = -1U; | ||
| 1215 | int c = 0; | 1462 | int c = 0; |
| 1216 | size_t w; | ||
| 1217 | 1463 | ||
| 1218 | /* If this is called without a bitmap, that is a bug. But just to be | 1464 | /* If this is called without a bitmap, that is a bug. But just to be |
| 1219 | * robust in case we screwed up elsewhere, in that case pretend there | 1465 | * robust in case we screwed up elsewhere, in that case pretend there |
| @@ -1223,20 +1469,20 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi | |||
| 1223 | ERR_IF(!b->bm_pages) return 1; | 1469 | ERR_IF(!b->bm_pages) return 1; |
| 1224 | 1470 | ||
| 1225 | spin_lock_irqsave(&b->bm_lock, flags); | 1471 | spin_lock_irqsave(&b->bm_lock, flags); |
| 1226 | if (bm_is_locked(b)) | 1472 | if (BM_DONT_TEST & b->bm_flags) |
| 1227 | bm_print_lock_info(mdev); | 1473 | bm_print_lock_info(mdev); |
| 1228 | for (bitnr = s; bitnr <= e; bitnr++) { | 1474 | for (bitnr = s; bitnr <= e; bitnr++) { |
| 1229 | w = bitnr >> LN2_BPL; | 1475 | unsigned int idx = bm_bit_to_page_idx(b, bitnr); |
| 1230 | if (page_nr != w >> (PAGE_SHIFT - LN2_BPL + 3)) { | 1476 | if (page_nr != idx) { |
| 1231 | page_nr = w >> (PAGE_SHIFT - LN2_BPL + 3); | 1477 | page_nr = idx; |
| 1232 | if (p_addr) | 1478 | if (p_addr) |
| 1233 | bm_unmap(p_addr); | 1479 | bm_unmap(p_addr); |
| 1234 | p_addr = bm_map_paddr(b, w); | 1480 | p_addr = bm_map_pidx(b, idx); |
| 1235 | } | 1481 | } |
| 1236 | ERR_IF (bitnr >= b->bm_bits) { | 1482 | ERR_IF (bitnr >= b->bm_bits) { |
| 1237 | dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits); | 1483 | dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits); |
| 1238 | } else { | 1484 | } else { |
| 1239 | c += (0 != test_bit(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr)); | 1485 | c += (0 != generic_test_le_bit(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr)); |
| 1240 | } | 1486 | } |
| 1241 | } | 1487 | } |
| 1242 | if (p_addr) | 1488 | if (p_addr) |
| @@ -1271,7 +1517,7 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr) | |||
| 1271 | ERR_IF(!b->bm_pages) return 0; | 1517 | ERR_IF(!b->bm_pages) return 0; |
| 1272 | 1518 | ||
| 1273 | spin_lock_irqsave(&b->bm_lock, flags); | 1519 | spin_lock_irqsave(&b->bm_lock, flags); |
| 1274 | if (bm_is_locked(b)) | 1520 | if (BM_DONT_TEST & b->bm_flags) |
| 1275 | bm_print_lock_info(mdev); | 1521 | bm_print_lock_info(mdev); |
| 1276 | 1522 | ||
| 1277 | s = S2W(enr); | 1523 | s = S2W(enr); |
| @@ -1279,7 +1525,7 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr) | |||
| 1279 | count = 0; | 1525 | count = 0; |
| 1280 | if (s < b->bm_words) { | 1526 | if (s < b->bm_words) { |
| 1281 | int n = e-s; | 1527 | int n = e-s; |
| 1282 | p_addr = bm_map_paddr(b, s); | 1528 | p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s)); |
| 1283 | bm = p_addr + MLPP(s); | 1529 | bm = p_addr + MLPP(s); |
| 1284 | while (n--) | 1530 | while (n--) |
| 1285 | count += hweight_long(*bm++); | 1531 | count += hweight_long(*bm++); |
| @@ -1291,18 +1537,20 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr) | |||
| 1291 | return count; | 1537 | return count; |
| 1292 | } | 1538 | } |
| 1293 | 1539 | ||
| 1294 | /* set all bits covered by the AL-extent al_enr */ | 1540 | /* Set all bits covered by the AL-extent al_enr. |
| 1541 | * Returns number of bits changed. */ | ||
| 1295 | unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr) | 1542 | unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr) |
| 1296 | { | 1543 | { |
| 1297 | struct drbd_bitmap *b = mdev->bitmap; | 1544 | struct drbd_bitmap *b = mdev->bitmap; |
| 1298 | unsigned long *p_addr, *bm; | 1545 | unsigned long *p_addr, *bm; |
| 1299 | unsigned long weight; | 1546 | unsigned long weight; |
| 1300 | int count, s, e, i, do_now; | 1547 | unsigned long s, e; |
| 1548 | int count, i, do_now; | ||
| 1301 | ERR_IF(!b) return 0; | 1549 | ERR_IF(!b) return 0; |
| 1302 | ERR_IF(!b->bm_pages) return 0; | 1550 | ERR_IF(!b->bm_pages) return 0; |
| 1303 | 1551 | ||
| 1304 | spin_lock_irq(&b->bm_lock); | 1552 | spin_lock_irq(&b->bm_lock); |
| 1305 | if (bm_is_locked(b)) | 1553 | if (BM_DONT_SET & b->bm_flags) |
| 1306 | bm_print_lock_info(mdev); | 1554 | bm_print_lock_info(mdev); |
| 1307 | weight = b->bm_set; | 1555 | weight = b->bm_set; |
| 1308 | 1556 | ||
| @@ -1314,7 +1562,7 @@ unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr) | |||
| 1314 | count = 0; | 1562 | count = 0; |
| 1315 | if (s < b->bm_words) { | 1563 | if (s < b->bm_words) { |
| 1316 | i = do_now = e-s; | 1564 | i = do_now = e-s; |
| 1317 | p_addr = bm_map_paddr(b, s); | 1565 | p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s)); |
| 1318 | bm = p_addr + MLPP(s); | 1566 | bm = p_addr + MLPP(s); |
| 1319 | while (i--) { | 1567 | while (i--) { |
| 1320 | count += hweight_long(*bm); | 1568 | count += hweight_long(*bm); |
| @@ -1326,7 +1574,7 @@ unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr) | |||
| 1326 | if (e == b->bm_words) | 1574 | if (e == b->bm_words) |
| 1327 | b->bm_set -= bm_clear_surplus(b); | 1575 | b->bm_set -= bm_clear_surplus(b); |
| 1328 | } else { | 1576 | } else { |
| 1329 | dev_err(DEV, "start offset (%d) too large in drbd_bm_ALe_set_all\n", s); | 1577 | dev_err(DEV, "start offset (%lu) too large in drbd_bm_ALe_set_all\n", s); |
| 1330 | } | 1578 | } |
| 1331 | weight = b->bm_set - weight; | 1579 | weight = b->bm_set - weight; |
| 1332 | spin_unlock_irq(&b->bm_lock); | 1580 | spin_unlock_irq(&b->bm_lock); |
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index b0bd27dfc1e8..81030d8d654b 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h | |||
| @@ -72,13 +72,6 @@ extern int fault_devs; | |||
| 72 | extern char usermode_helper[]; | 72 | extern char usermode_helper[]; |
| 73 | 73 | ||
| 74 | 74 | ||
| 75 | #ifndef TRUE | ||
| 76 | #define TRUE 1 | ||
| 77 | #endif | ||
| 78 | #ifndef FALSE | ||
| 79 | #define FALSE 0 | ||
| 80 | #endif | ||
| 81 | |||
| 82 | /* I don't remember why XCPU ... | 75 | /* I don't remember why XCPU ... |
| 83 | * This is used to wake the asender, | 76 | * This is used to wake the asender, |
| 84 | * and to interrupt sending the sending task | 77 | * and to interrupt sending the sending task |
| @@ -104,6 +97,7 @@ extern char usermode_helper[]; | |||
| 104 | #define ID_SYNCER (-1ULL) | 97 | #define ID_SYNCER (-1ULL) |
| 105 | #define ID_VACANT 0 | 98 | #define ID_VACANT 0 |
| 106 | #define is_syncer_block_id(id) ((id) == ID_SYNCER) | 99 | #define is_syncer_block_id(id) ((id) == ID_SYNCER) |
| 100 | #define UUID_NEW_BM_OFFSET ((u64)0x0001000000000000ULL) | ||
| 107 | 101 | ||
| 108 | struct drbd_conf; | 102 | struct drbd_conf; |
| 109 | 103 | ||
| @@ -137,20 +131,19 @@ enum { | |||
| 137 | DRBD_FAULT_MAX, | 131 | DRBD_FAULT_MAX, |
| 138 | }; | 132 | }; |
| 139 | 133 | ||
| 140 | #ifdef CONFIG_DRBD_FAULT_INJECTION | ||
| 141 | extern unsigned int | 134 | extern unsigned int |
| 142 | _drbd_insert_fault(struct drbd_conf *mdev, unsigned int type); | 135 | _drbd_insert_fault(struct drbd_conf *mdev, unsigned int type); |
| 136 | |||
| 143 | static inline int | 137 | static inline int |
| 144 | drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) { | 138 | drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) { |
| 139 | #ifdef CONFIG_DRBD_FAULT_INJECTION | ||
| 145 | return fault_rate && | 140 | return fault_rate && |
| 146 | (enable_faults & (1<<type)) && | 141 | (enable_faults & (1<<type)) && |
| 147 | _drbd_insert_fault(mdev, type); | 142 | _drbd_insert_fault(mdev, type); |
| 148 | } | ||
| 149 | #define FAULT_ACTIVE(_m, _t) (drbd_insert_fault((_m), (_t))) | ||
| 150 | |||
| 151 | #else | 143 | #else |
| 152 | #define FAULT_ACTIVE(_m, _t) (0) | 144 | return 0; |
| 153 | #endif | 145 | #endif |
| 146 | } | ||
| 154 | 147 | ||
| 155 | /* integer division, round _UP_ to the next integer */ | 148 | /* integer division, round _UP_ to the next integer */ |
| 156 | #define div_ceil(A, B) ((A)/(B) + ((A)%(B) ? 1 : 0)) | 149 | #define div_ceil(A, B) ((A)/(B) + ((A)%(B) ? 1 : 0)) |
| @@ -212,8 +205,10 @@ enum drbd_packets { | |||
| 212 | /* P_CKPT_FENCE_REQ = 0x25, * currently reserved for protocol D */ | 205 | /* P_CKPT_FENCE_REQ = 0x25, * currently reserved for protocol D */ |
| 213 | /* P_CKPT_DISABLE_REQ = 0x26, * currently reserved for protocol D */ | 206 | /* P_CKPT_DISABLE_REQ = 0x26, * currently reserved for protocol D */ |
| 214 | P_DELAY_PROBE = 0x27, /* is used on BOTH sockets */ | 207 | P_DELAY_PROBE = 0x27, /* is used on BOTH sockets */ |
| 208 | P_OUT_OF_SYNC = 0x28, /* Mark as out of sync (Outrunning), data socket */ | ||
| 209 | P_RS_CANCEL = 0x29, /* meta: Used to cancel RS_DATA_REQUEST packet by SyncSource */ | ||
| 215 | 210 | ||
| 216 | P_MAX_CMD = 0x28, | 211 | P_MAX_CMD = 0x2A, |
| 217 | P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */ | 212 | P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */ |
| 218 | P_MAX_OPT_CMD = 0x101, | 213 | P_MAX_OPT_CMD = 0x101, |
| 219 | 214 | ||
| @@ -269,6 +264,7 @@ static inline const char *cmdname(enum drbd_packets cmd) | |||
| 269 | [P_RS_IS_IN_SYNC] = "CsumRSIsInSync", | 264 | [P_RS_IS_IN_SYNC] = "CsumRSIsInSync", |
| 270 | [P_COMPRESSED_BITMAP] = "CBitmap", | 265 | [P_COMPRESSED_BITMAP] = "CBitmap", |
| 271 | [P_DELAY_PROBE] = "DelayProbe", | 266 | [P_DELAY_PROBE] = "DelayProbe", |
| 267 | [P_OUT_OF_SYNC] = "OutOfSync", | ||
| 272 | [P_MAX_CMD] = NULL, | 268 | [P_MAX_CMD] = NULL, |
| 273 | }; | 269 | }; |
| 274 | 270 | ||
| @@ -512,7 +508,7 @@ struct p_sizes { | |||
| 512 | u64 d_size; /* size of disk */ | 508 | u64 d_size; /* size of disk */ |
| 513 | u64 u_size; /* user requested size */ | 509 | u64 u_size; /* user requested size */ |
| 514 | u64 c_size; /* current exported size */ | 510 | u64 c_size; /* current exported size */ |
| 515 | u32 max_segment_size; /* Maximal size of a BIO */ | 511 | u32 max_bio_size; /* Maximal size of a BIO */ |
| 516 | u16 queue_order_type; /* not yet implemented in DRBD*/ | 512 | u16 queue_order_type; /* not yet implemented in DRBD*/ |
| 517 | u16 dds_flags; /* use enum dds_flags here. */ | 513 | u16 dds_flags; /* use enum dds_flags here. */ |
| 518 | } __packed; | 514 | } __packed; |
| @@ -550,6 +546,13 @@ struct p_discard { | |||
| 550 | u32 pad; | 546 | u32 pad; |
| 551 | } __packed; | 547 | } __packed; |
| 552 | 548 | ||
| 549 | struct p_block_desc { | ||
| 550 | struct p_header80 head; | ||
| 551 | u64 sector; | ||
| 552 | u32 blksize; | ||
| 553 | u32 pad; /* to multiple of 8 Byte */ | ||
| 554 | } __packed; | ||
| 555 | |||
| 553 | /* Valid values for the encoding field. | 556 | /* Valid values for the encoding field. |
| 554 | * Bump proto version when changing this. */ | 557 | * Bump proto version when changing this. */ |
| 555 | enum drbd_bitmap_code { | 558 | enum drbd_bitmap_code { |
| @@ -647,6 +650,7 @@ union p_polymorph { | |||
| 647 | struct p_block_req block_req; | 650 | struct p_block_req block_req; |
| 648 | struct p_delay_probe93 delay_probe93; | 651 | struct p_delay_probe93 delay_probe93; |
| 649 | struct p_rs_uuid rs_uuid; | 652 | struct p_rs_uuid rs_uuid; |
| 653 | struct p_block_desc block_desc; | ||
| 650 | } __packed; | 654 | } __packed; |
| 651 | 655 | ||
| 652 | /**********************************************************************/ | 656 | /**********************************************************************/ |
| @@ -677,13 +681,6 @@ static inline enum drbd_thread_state get_t_state(struct drbd_thread *thi) | |||
| 677 | return thi->t_state; | 681 | return thi->t_state; |
| 678 | } | 682 | } |
| 679 | 683 | ||
| 680 | |||
| 681 | /* | ||
| 682 | * Having this as the first member of a struct provides sort of "inheritance". | ||
| 683 | * "derived" structs can be "drbd_queue_work()"ed. | ||
| 684 | * The callback should know and cast back to the descendant struct. | ||
| 685 | * drbd_request and drbd_epoch_entry are descendants of drbd_work. | ||
| 686 | */ | ||
| 687 | struct drbd_work; | 684 | struct drbd_work; |
| 688 | typedef int (*drbd_work_cb)(struct drbd_conf *, struct drbd_work *, int cancel); | 685 | typedef int (*drbd_work_cb)(struct drbd_conf *, struct drbd_work *, int cancel); |
| 689 | struct drbd_work { | 686 | struct drbd_work { |
| @@ -712,9 +709,6 @@ struct drbd_request { | |||
| 712 | * starting a new epoch... | 709 | * starting a new epoch... |
| 713 | */ | 710 | */ |
| 714 | 711 | ||
| 715 | /* up to here, the struct layout is identical to drbd_epoch_entry; | ||
| 716 | * we might be able to use that to our advantage... */ | ||
| 717 | |||
| 718 | struct list_head tl_requests; /* ring list in the transfer log */ | 712 | struct list_head tl_requests; /* ring list in the transfer log */ |
| 719 | struct bio *master_bio; /* master bio pointer */ | 713 | struct bio *master_bio; /* master bio pointer */ |
| 720 | unsigned long rq_state; /* see comments above _req_mod() */ | 714 | unsigned long rq_state; /* see comments above _req_mod() */ |
| @@ -831,7 +825,7 @@ enum { | |||
| 831 | CRASHED_PRIMARY, /* This node was a crashed primary. | 825 | CRASHED_PRIMARY, /* This node was a crashed primary. |
| 832 | * Gets cleared when the state.conn | 826 | * Gets cleared when the state.conn |
| 833 | * goes into C_CONNECTED state. */ | 827 | * goes into C_CONNECTED state. */ |
| 834 | WRITE_BM_AFTER_RESYNC, /* A kmalloc() during resync failed */ | 828 | NO_BARRIER_SUPP, /* underlying block device doesn't implement barriers */ |
| 835 | CONSIDER_RESYNC, | 829 | CONSIDER_RESYNC, |
| 836 | 830 | ||
| 837 | MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */ | 831 | MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */ |
| @@ -856,10 +850,37 @@ enum { | |||
| 856 | GOT_PING_ACK, /* set when we receive a ping_ack packet, misc wait gets woken */ | 850 | GOT_PING_ACK, /* set when we receive a ping_ack packet, misc wait gets woken */ |
| 857 | NEW_CUR_UUID, /* Create new current UUID when thawing IO */ | 851 | NEW_CUR_UUID, /* Create new current UUID when thawing IO */ |
| 858 | AL_SUSPENDED, /* Activity logging is currently suspended. */ | 852 | AL_SUSPENDED, /* Activity logging is currently suspended. */ |
| 853 | AHEAD_TO_SYNC_SOURCE, /* Ahead -> SyncSource queued */ | ||
| 859 | }; | 854 | }; |
| 860 | 855 | ||
| 861 | struct drbd_bitmap; /* opaque for drbd_conf */ | 856 | struct drbd_bitmap; /* opaque for drbd_conf */ |
| 862 | 857 | ||
| 858 | /* definition of bits in bm_flags to be used in drbd_bm_lock | ||
| 859 | * and drbd_bitmap_io and friends. */ | ||
| 860 | enum bm_flag { | ||
| 861 | /* do we need to kfree, or vfree bm_pages? */ | ||
| 862 | BM_P_VMALLOCED = 0x10000, /* internal use only, will be masked out */ | ||
| 863 | |||
| 864 | /* currently locked for bulk operation */ | ||
| 865 | BM_LOCKED_MASK = 0x7, | ||
| 866 | |||
| 867 | /* in detail, that is: */ | ||
| 868 | BM_DONT_CLEAR = 0x1, | ||
| 869 | BM_DONT_SET = 0x2, | ||
| 870 | BM_DONT_TEST = 0x4, | ||
| 871 | |||
| 872 | /* (test bit, count bit) allowed (common case) */ | ||
| 873 | BM_LOCKED_TEST_ALLOWED = 0x3, | ||
| 874 | |||
| 875 | /* testing bits, as well as setting new bits allowed, but clearing bits | ||
| 876 | * would be unexpected. Used during bitmap receive. Setting new bits | ||
| 877 | * requires sending of "out-of-sync" information, though. */ | ||
| 878 | BM_LOCKED_SET_ALLOWED = 0x1, | ||
| 879 | |||
| 880 | /* clear is not expected while bitmap is locked for bulk operation */ | ||
| 881 | }; | ||
| 882 | |||
| 883 | |||
| 863 | /* TODO sort members for performance | 884 | /* TODO sort members for performance |
| 864 | * MAYBE group them further */ | 885 | * MAYBE group them further */ |
| 865 | 886 | ||
| @@ -925,6 +946,7 @@ struct drbd_md_io { | |||
| 925 | struct bm_io_work { | 946 | struct bm_io_work { |
| 926 | struct drbd_work w; | 947 | struct drbd_work w; |
| 927 | char *why; | 948 | char *why; |
| 949 | enum bm_flag flags; | ||
| 928 | int (*io_fn)(struct drbd_conf *mdev); | 950 | int (*io_fn)(struct drbd_conf *mdev); |
| 929 | void (*done)(struct drbd_conf *mdev, int rv); | 951 | void (*done)(struct drbd_conf *mdev, int rv); |
| 930 | }; | 952 | }; |
| @@ -963,9 +985,12 @@ struct drbd_conf { | |||
| 963 | struct drbd_work resync_work, | 985 | struct drbd_work resync_work, |
| 964 | unplug_work, | 986 | unplug_work, |
| 965 | go_diskless, | 987 | go_diskless, |
| 966 | md_sync_work; | 988 | md_sync_work, |
| 989 | start_resync_work; | ||
| 967 | struct timer_list resync_timer; | 990 | struct timer_list resync_timer; |
| 968 | struct timer_list md_sync_timer; | 991 | struct timer_list md_sync_timer; |
| 992 | struct timer_list start_resync_timer; | ||
| 993 | struct timer_list request_timer; | ||
| 969 | #ifdef DRBD_DEBUG_MD_SYNC | 994 | #ifdef DRBD_DEBUG_MD_SYNC |
| 970 | struct { | 995 | struct { |
| 971 | unsigned int line; | 996 | unsigned int line; |
| @@ -1000,9 +1025,9 @@ struct drbd_conf { | |||
| 1000 | struct hlist_head *tl_hash; | 1025 | struct hlist_head *tl_hash; |
| 1001 | unsigned int tl_hash_s; | 1026 | unsigned int tl_hash_s; |
| 1002 | 1027 | ||
| 1003 | /* blocks to sync in this run [unit BM_BLOCK_SIZE] */ | 1028 | /* blocks to resync in this run [unit BM_BLOCK_SIZE] */ |
| 1004 | unsigned long rs_total; | 1029 | unsigned long rs_total; |
| 1005 | /* number of sync IOs that failed in this run */ | 1030 | /* number of resync blocks that failed in this run */ |
| 1006 | unsigned long rs_failed; | 1031 | unsigned long rs_failed; |
| 1007 | /* Syncer's start time [unit jiffies] */ | 1032 | /* Syncer's start time [unit jiffies] */ |
| 1008 | unsigned long rs_start; | 1033 | unsigned long rs_start; |
| @@ -1102,6 +1127,7 @@ struct drbd_conf { | |||
| 1102 | struct fifo_buffer rs_plan_s; /* correction values of resync planer */ | 1127 | struct fifo_buffer rs_plan_s; /* correction values of resync planer */ |
| 1103 | int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */ | 1128 | int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */ |
| 1104 | int rs_planed; /* resync sectors already planed */ | 1129 | int rs_planed; /* resync sectors already planed */ |
| 1130 | atomic_t ap_in_flight; /* App sectors in flight (waiting for ack) */ | ||
| 1105 | }; | 1131 | }; |
| 1106 | 1132 | ||
| 1107 | static inline struct drbd_conf *minor_to_mdev(unsigned int minor) | 1133 | static inline struct drbd_conf *minor_to_mdev(unsigned int minor) |
| @@ -1163,14 +1189,19 @@ enum dds_flags { | |||
| 1163 | }; | 1189 | }; |
| 1164 | 1190 | ||
| 1165 | extern void drbd_init_set_defaults(struct drbd_conf *mdev); | 1191 | extern void drbd_init_set_defaults(struct drbd_conf *mdev); |
| 1166 | extern int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, | 1192 | extern enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev, |
| 1167 | union drbd_state mask, union drbd_state val); | 1193 | enum chg_state_flags f, |
| 1194 | union drbd_state mask, | ||
| 1195 | union drbd_state val); | ||
| 1168 | extern void drbd_force_state(struct drbd_conf *, union drbd_state, | 1196 | extern void drbd_force_state(struct drbd_conf *, union drbd_state, |
| 1169 | union drbd_state); | 1197 | union drbd_state); |
| 1170 | extern int _drbd_request_state(struct drbd_conf *, union drbd_state, | 1198 | extern enum drbd_state_rv _drbd_request_state(struct drbd_conf *, |
| 1171 | union drbd_state, enum chg_state_flags); | 1199 | union drbd_state, |
| 1172 | extern int __drbd_set_state(struct drbd_conf *, union drbd_state, | 1200 | union drbd_state, |
| 1173 | enum chg_state_flags, struct completion *done); | 1201 | enum chg_state_flags); |
| 1202 | extern enum drbd_state_rv __drbd_set_state(struct drbd_conf *, union drbd_state, | ||
| 1203 | enum chg_state_flags, | ||
| 1204 | struct completion *done); | ||
| 1174 | extern void print_st_err(struct drbd_conf *, union drbd_state, | 1205 | extern void print_st_err(struct drbd_conf *, union drbd_state, |
| 1175 | union drbd_state, int); | 1206 | union drbd_state, int); |
| 1176 | extern int drbd_thread_start(struct drbd_thread *thi); | 1207 | extern int drbd_thread_start(struct drbd_thread *thi); |
| @@ -1195,7 +1226,7 @@ extern int drbd_send(struct drbd_conf *mdev, struct socket *sock, | |||
| 1195 | extern int drbd_send_protocol(struct drbd_conf *mdev); | 1226 | extern int drbd_send_protocol(struct drbd_conf *mdev); |
| 1196 | extern int drbd_send_uuids(struct drbd_conf *mdev); | 1227 | extern int drbd_send_uuids(struct drbd_conf *mdev); |
| 1197 | extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); | 1228 | extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); |
| 1198 | extern int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val); | 1229 | extern int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev); |
| 1199 | extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags); | 1230 | extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags); |
| 1200 | extern int _drbd_send_state(struct drbd_conf *mdev); | 1231 | extern int _drbd_send_state(struct drbd_conf *mdev); |
| 1201 | extern int drbd_send_state(struct drbd_conf *mdev); | 1232 | extern int drbd_send_state(struct drbd_conf *mdev); |
| @@ -1220,11 +1251,10 @@ extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd, | |||
| 1220 | struct p_data *dp, int data_size); | 1251 | struct p_data *dp, int data_size); |
| 1221 | extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd, | 1252 | extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd, |
| 1222 | sector_t sector, int blksize, u64 block_id); | 1253 | sector_t sector, int blksize, u64 block_id); |
| 1254 | extern int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req); | ||
| 1223 | extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, | 1255 | extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, |
| 1224 | struct drbd_epoch_entry *e); | 1256 | struct drbd_epoch_entry *e); |
| 1225 | extern int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req); | 1257 | extern int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req); |
| 1226 | extern int _drbd_send_barrier(struct drbd_conf *mdev, | ||
| 1227 | struct drbd_tl_epoch *barrier); | ||
| 1228 | extern int drbd_send_drequest(struct drbd_conf *mdev, int cmd, | 1258 | extern int drbd_send_drequest(struct drbd_conf *mdev, int cmd, |
| 1229 | sector_t sector, int size, u64 block_id); | 1259 | sector_t sector, int size, u64 block_id); |
| 1230 | extern int drbd_send_drequest_csum(struct drbd_conf *mdev, | 1260 | extern int drbd_send_drequest_csum(struct drbd_conf *mdev, |
| @@ -1235,14 +1265,13 @@ extern int drbd_send_ov_request(struct drbd_conf *mdev,sector_t sector,int size) | |||
| 1235 | 1265 | ||
| 1236 | extern int drbd_send_bitmap(struct drbd_conf *mdev); | 1266 | extern int drbd_send_bitmap(struct drbd_conf *mdev); |
| 1237 | extern int _drbd_send_bitmap(struct drbd_conf *mdev); | 1267 | extern int _drbd_send_bitmap(struct drbd_conf *mdev); |
| 1238 | extern int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode); | 1268 | extern int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode); |
| 1239 | extern void drbd_free_bc(struct drbd_backing_dev *ldev); | 1269 | extern void drbd_free_bc(struct drbd_backing_dev *ldev); |
| 1240 | extern void drbd_mdev_cleanup(struct drbd_conf *mdev); | 1270 | extern void drbd_mdev_cleanup(struct drbd_conf *mdev); |
| 1271 | void drbd_print_uuids(struct drbd_conf *mdev, const char *text); | ||
| 1241 | 1272 | ||
| 1242 | /* drbd_meta-data.c (still in drbd_main.c) */ | ||
| 1243 | extern void drbd_md_sync(struct drbd_conf *mdev); | 1273 | extern void drbd_md_sync(struct drbd_conf *mdev); |
| 1244 | extern int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev); | 1274 | extern int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev); |
| 1245 | /* maybe define them below as inline? */ | ||
| 1246 | extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); | 1275 | extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); |
| 1247 | extern void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); | 1276 | extern void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); |
| 1248 | extern void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local); | 1277 | extern void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local); |
| @@ -1261,10 +1290,12 @@ extern void drbd_md_mark_dirty_(struct drbd_conf *mdev, | |||
| 1261 | extern void drbd_queue_bitmap_io(struct drbd_conf *mdev, | 1290 | extern void drbd_queue_bitmap_io(struct drbd_conf *mdev, |
| 1262 | int (*io_fn)(struct drbd_conf *), | 1291 | int (*io_fn)(struct drbd_conf *), |
| 1263 | void (*done)(struct drbd_conf *, int), | 1292 | void (*done)(struct drbd_conf *, int), |
| 1264 | char *why); | 1293 | char *why, enum bm_flag flags); |
| 1294 | extern int drbd_bitmap_io(struct drbd_conf *mdev, | ||
| 1295 | int (*io_fn)(struct drbd_conf *), | ||
| 1296 | char *why, enum bm_flag flags); | ||
| 1265 | extern int drbd_bmio_set_n_write(struct drbd_conf *mdev); | 1297 | extern int drbd_bmio_set_n_write(struct drbd_conf *mdev); |
| 1266 | extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev); | 1298 | extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev); |
| 1267 | extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why); | ||
| 1268 | extern void drbd_go_diskless(struct drbd_conf *mdev); | 1299 | extern void drbd_go_diskless(struct drbd_conf *mdev); |
| 1269 | extern void drbd_ldev_destroy(struct drbd_conf *mdev); | 1300 | extern void drbd_ldev_destroy(struct drbd_conf *mdev); |
| 1270 | 1301 | ||
| @@ -1313,6 +1344,7 @@ struct bm_extent { | |||
| 1313 | 1344 | ||
| 1314 | #define BME_NO_WRITES 0 /* bm_extent.flags: no more requests on this one! */ | 1345 | #define BME_NO_WRITES 0 /* bm_extent.flags: no more requests on this one! */ |
| 1315 | #define BME_LOCKED 1 /* bm_extent.flags: syncer active on this one. */ | 1346 | #define BME_LOCKED 1 /* bm_extent.flags: syncer active on this one. */ |
| 1347 | #define BME_PRIORITY 2 /* finish resync IO on this extent ASAP! App IO waiting! */ | ||
| 1316 | 1348 | ||
| 1317 | /* drbd_bitmap.c */ | 1349 | /* drbd_bitmap.c */ |
| 1318 | /* | 1350 | /* |
| @@ -1390,7 +1422,9 @@ struct bm_extent { | |||
| 1390 | * you should use 64bit OS for that much storage, anyways. */ | 1422 | * you should use 64bit OS for that much storage, anyways. */ |
| 1391 | #define DRBD_MAX_SECTORS_FLEX BM_BIT_TO_SECT(0xffff7fff) | 1423 | #define DRBD_MAX_SECTORS_FLEX BM_BIT_TO_SECT(0xffff7fff) |
| 1392 | #else | 1424 | #else |
| 1393 | #define DRBD_MAX_SECTORS_FLEX BM_BIT_TO_SECT(0x1LU << 32) | 1425 | /* we allow up to 1 PiB now on 64bit architecture with "flexible" meta data */ |
| 1426 | #define DRBD_MAX_SECTORS_FLEX (1UL << 51) | ||
| 1427 | /* corresponds to (1UL << 38) bits right now. */ | ||
| 1394 | #endif | 1428 | #endif |
| 1395 | #endif | 1429 | #endif |
| 1396 | 1430 | ||
| @@ -1398,7 +1432,7 @@ struct bm_extent { | |||
| 1398 | * With a value of 8 all IO in one 128K block make it to the same slot of the | 1432 | * With a value of 8 all IO in one 128K block make it to the same slot of the |
| 1399 | * hash table. */ | 1433 | * hash table. */ |
| 1400 | #define HT_SHIFT 8 | 1434 | #define HT_SHIFT 8 |
| 1401 | #define DRBD_MAX_SEGMENT_SIZE (1U<<(9+HT_SHIFT)) | 1435 | #define DRBD_MAX_BIO_SIZE (1U<<(9+HT_SHIFT)) |
| 1402 | 1436 | ||
| 1403 | #define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */ | 1437 | #define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */ |
| 1404 | 1438 | ||
| @@ -1410,16 +1444,20 @@ extern int drbd_bm_resize(struct drbd_conf *mdev, sector_t sectors, int set_new | |||
| 1410 | extern void drbd_bm_cleanup(struct drbd_conf *mdev); | 1444 | extern void drbd_bm_cleanup(struct drbd_conf *mdev); |
| 1411 | extern void drbd_bm_set_all(struct drbd_conf *mdev); | 1445 | extern void drbd_bm_set_all(struct drbd_conf *mdev); |
| 1412 | extern void drbd_bm_clear_all(struct drbd_conf *mdev); | 1446 | extern void drbd_bm_clear_all(struct drbd_conf *mdev); |
| 1447 | /* set/clear/test only a few bits at a time */ | ||
| 1413 | extern int drbd_bm_set_bits( | 1448 | extern int drbd_bm_set_bits( |
| 1414 | struct drbd_conf *mdev, unsigned long s, unsigned long e); | 1449 | struct drbd_conf *mdev, unsigned long s, unsigned long e); |
| 1415 | extern int drbd_bm_clear_bits( | 1450 | extern int drbd_bm_clear_bits( |
| 1416 | struct drbd_conf *mdev, unsigned long s, unsigned long e); | 1451 | struct drbd_conf *mdev, unsigned long s, unsigned long e); |
| 1417 | /* bm_set_bits variant for use while holding drbd_bm_lock */ | 1452 | extern int drbd_bm_count_bits( |
| 1453 | struct drbd_conf *mdev, const unsigned long s, const unsigned long e); | ||
| 1454 | /* bm_set_bits variant for use while holding drbd_bm_lock, | ||
| 1455 | * may process the whole bitmap in one go */ | ||
| 1418 | extern void _drbd_bm_set_bits(struct drbd_conf *mdev, | 1456 | extern void _drbd_bm_set_bits(struct drbd_conf *mdev, |
| 1419 | const unsigned long s, const unsigned long e); | 1457 | const unsigned long s, const unsigned long e); |
| 1420 | extern int drbd_bm_test_bit(struct drbd_conf *mdev, unsigned long bitnr); | 1458 | extern int drbd_bm_test_bit(struct drbd_conf *mdev, unsigned long bitnr); |
| 1421 | extern int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr); | 1459 | extern int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr); |
| 1422 | extern int drbd_bm_write_sect(struct drbd_conf *mdev, unsigned long enr) __must_hold(local); | 1460 | extern int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local); |
| 1423 | extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local); | 1461 | extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local); |
| 1424 | extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local); | 1462 | extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local); |
| 1425 | extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, | 1463 | extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, |
| @@ -1427,6 +1465,8 @@ extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, | |||
| 1427 | extern size_t drbd_bm_words(struct drbd_conf *mdev); | 1465 | extern size_t drbd_bm_words(struct drbd_conf *mdev); |
| 1428 | extern unsigned long drbd_bm_bits(struct drbd_conf *mdev); | 1466 | extern unsigned long drbd_bm_bits(struct drbd_conf *mdev); |
| 1429 | extern sector_t drbd_bm_capacity(struct drbd_conf *mdev); | 1467 | extern sector_t drbd_bm_capacity(struct drbd_conf *mdev); |
| 1468 | |||
| 1469 | #define DRBD_END_OF_BITMAP (~(unsigned long)0) | ||
| 1430 | extern unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo); | 1470 | extern unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo); |
| 1431 | /* bm_find_next variants for use while you hold drbd_bm_lock() */ | 1471 | /* bm_find_next variants for use while you hold drbd_bm_lock() */ |
| 1432 | extern unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo); | 1472 | extern unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo); |
| @@ -1437,14 +1477,12 @@ extern int drbd_bm_rs_done(struct drbd_conf *mdev); | |||
| 1437 | /* for receive_bitmap */ | 1477 | /* for receive_bitmap */ |
| 1438 | extern void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, | 1478 | extern void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, |
| 1439 | size_t number, unsigned long *buffer); | 1479 | size_t number, unsigned long *buffer); |
| 1440 | /* for _drbd_send_bitmap and drbd_bm_write_sect */ | 1480 | /* for _drbd_send_bitmap */ |
| 1441 | extern void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, | 1481 | extern void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, |
| 1442 | size_t number, unsigned long *buffer); | 1482 | size_t number, unsigned long *buffer); |
| 1443 | 1483 | ||
| 1444 | extern void drbd_bm_lock(struct drbd_conf *mdev, char *why); | 1484 | extern void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags); |
| 1445 | extern void drbd_bm_unlock(struct drbd_conf *mdev); | 1485 | extern void drbd_bm_unlock(struct drbd_conf *mdev); |
| 1446 | |||
| 1447 | extern int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e); | ||
| 1448 | /* drbd_main.c */ | 1486 | /* drbd_main.c */ |
| 1449 | 1487 | ||
| 1450 | extern struct kmem_cache *drbd_request_cache; | 1488 | extern struct kmem_cache *drbd_request_cache; |
| @@ -1467,7 +1505,7 @@ extern void drbd_free_mdev(struct drbd_conf *mdev); | |||
| 1467 | extern int proc_details; | 1505 | extern int proc_details; |
| 1468 | 1506 | ||
| 1469 | /* drbd_req */ | 1507 | /* drbd_req */ |
| 1470 | extern int drbd_make_request_26(struct request_queue *q, struct bio *bio); | 1508 | extern int drbd_make_request(struct request_queue *q, struct bio *bio); |
| 1471 | extern int drbd_read_remote(struct drbd_conf *mdev, struct drbd_request *req); | 1509 | extern int drbd_read_remote(struct drbd_conf *mdev, struct drbd_request *req); |
| 1472 | extern int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec); | 1510 | extern int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec); |
| 1473 | extern int is_valid_ar_handle(struct drbd_request *, sector_t); | 1511 | extern int is_valid_ar_handle(struct drbd_request *, sector_t); |
| @@ -1482,8 +1520,9 @@ enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = | |||
| 1482 | extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local); | 1520 | extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local); |
| 1483 | extern void resync_after_online_grow(struct drbd_conf *); | 1521 | extern void resync_after_online_grow(struct drbd_conf *); |
| 1484 | extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local); | 1522 | extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local); |
| 1485 | extern int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, | 1523 | extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev, |
| 1486 | int force); | 1524 | enum drbd_role new_role, |
| 1525 | int force); | ||
| 1487 | extern enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev); | 1526 | extern enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev); |
| 1488 | extern void drbd_try_outdate_peer_async(struct drbd_conf *mdev); | 1527 | extern void drbd_try_outdate_peer_async(struct drbd_conf *mdev); |
| 1489 | extern int drbd_khelper(struct drbd_conf *mdev, char *cmd); | 1528 | extern int drbd_khelper(struct drbd_conf *mdev, char *cmd); |
| @@ -1499,6 +1538,7 @@ extern int drbd_resync_finished(struct drbd_conf *mdev); | |||
| 1499 | extern int drbd_md_sync_page_io(struct drbd_conf *mdev, | 1538 | extern int drbd_md_sync_page_io(struct drbd_conf *mdev, |
| 1500 | struct drbd_backing_dev *bdev, sector_t sector, int rw); | 1539 | struct drbd_backing_dev *bdev, sector_t sector, int rw); |
| 1501 | extern void drbd_ov_oos_found(struct drbd_conf*, sector_t, int); | 1540 | extern void drbd_ov_oos_found(struct drbd_conf*, sector_t, int); |
| 1541 | extern void drbd_rs_controller_reset(struct drbd_conf *mdev); | ||
| 1502 | 1542 | ||
| 1503 | static inline void ov_oos_print(struct drbd_conf *mdev) | 1543 | static inline void ov_oos_print(struct drbd_conf *mdev) |
| 1504 | { | 1544 | { |
| @@ -1522,21 +1562,23 @@ extern int w_e_end_csum_rs_req(struct drbd_conf *, struct drbd_work *, int); | |||
| 1522 | extern int w_e_end_ov_reply(struct drbd_conf *, struct drbd_work *, int); | 1562 | extern int w_e_end_ov_reply(struct drbd_conf *, struct drbd_work *, int); |
| 1523 | extern int w_e_end_ov_req(struct drbd_conf *, struct drbd_work *, int); | 1563 | extern int w_e_end_ov_req(struct drbd_conf *, struct drbd_work *, int); |
| 1524 | extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int); | 1564 | extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int); |
| 1525 | extern int w_resync_inactive(struct drbd_conf *, struct drbd_work *, int); | 1565 | extern int w_resync_timer(struct drbd_conf *, struct drbd_work *, int); |
| 1526 | extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int); | 1566 | extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int); |
| 1527 | extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int); | 1567 | extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int); |
| 1528 | extern int w_make_resync_request(struct drbd_conf *, struct drbd_work *, int); | ||
| 1529 | extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int); | 1568 | extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int); |
| 1530 | extern int w_send_barrier(struct drbd_conf *, struct drbd_work *, int); | 1569 | extern int w_send_barrier(struct drbd_conf *, struct drbd_work *, int); |
| 1531 | extern int w_send_read_req(struct drbd_conf *, struct drbd_work *, int); | 1570 | extern int w_send_read_req(struct drbd_conf *, struct drbd_work *, int); |
| 1532 | extern int w_prev_work_done(struct drbd_conf *, struct drbd_work *, int); | 1571 | extern int w_prev_work_done(struct drbd_conf *, struct drbd_work *, int); |
| 1533 | extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int); | 1572 | extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int); |
| 1534 | extern int w_restart_disk_io(struct drbd_conf *, struct drbd_work *, int); | 1573 | extern int w_restart_disk_io(struct drbd_conf *, struct drbd_work *, int); |
| 1574 | extern int w_send_oos(struct drbd_conf *, struct drbd_work *, int); | ||
| 1575 | extern int w_start_resync(struct drbd_conf *, struct drbd_work *, int); | ||
| 1535 | 1576 | ||
| 1536 | extern void resync_timer_fn(unsigned long data); | 1577 | extern void resync_timer_fn(unsigned long data); |
| 1578 | extern void start_resync_timer_fn(unsigned long data); | ||
| 1537 | 1579 | ||
| 1538 | /* drbd_receiver.c */ | 1580 | /* drbd_receiver.c */ |
| 1539 | extern int drbd_rs_should_slow_down(struct drbd_conf *mdev); | 1581 | extern int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector); |
| 1540 | extern int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, | 1582 | extern int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, |
| 1541 | const unsigned rw, const int fault_type); | 1583 | const unsigned rw, const int fault_type); |
| 1542 | extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list); | 1584 | extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list); |
| @@ -1619,16 +1661,16 @@ extern int drbd_rs_del_all(struct drbd_conf *mdev); | |||
| 1619 | extern void drbd_rs_failed_io(struct drbd_conf *mdev, | 1661 | extern void drbd_rs_failed_io(struct drbd_conf *mdev, |
| 1620 | sector_t sector, int size); | 1662 | sector_t sector, int size); |
| 1621 | extern int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *); | 1663 | extern int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *); |
| 1664 | extern void drbd_advance_rs_marks(struct drbd_conf *mdev, unsigned long still_to_go); | ||
| 1622 | extern void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, | 1665 | extern void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, |
| 1623 | int size, const char *file, const unsigned int line); | 1666 | int size, const char *file, const unsigned int line); |
| 1624 | #define drbd_set_in_sync(mdev, sector, size) \ | 1667 | #define drbd_set_in_sync(mdev, sector, size) \ |
| 1625 | __drbd_set_in_sync(mdev, sector, size, __FILE__, __LINE__) | 1668 | __drbd_set_in_sync(mdev, sector, size, __FILE__, __LINE__) |
| 1626 | extern void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, | 1669 | extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, |
| 1627 | int size, const char *file, const unsigned int line); | 1670 | int size, const char *file, const unsigned int line); |
| 1628 | #define drbd_set_out_of_sync(mdev, sector, size) \ | 1671 | #define drbd_set_out_of_sync(mdev, sector, size) \ |
| 1629 | __drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__) | 1672 | __drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__) |
| 1630 | extern void drbd_al_apply_to_bm(struct drbd_conf *mdev); | 1673 | extern void drbd_al_apply_to_bm(struct drbd_conf *mdev); |
| 1631 | extern void drbd_al_to_on_disk_bm(struct drbd_conf *mdev); | ||
| 1632 | extern void drbd_al_shrink(struct drbd_conf *mdev); | 1674 | extern void drbd_al_shrink(struct drbd_conf *mdev); |
| 1633 | 1675 | ||
| 1634 | 1676 | ||
| @@ -1747,11 +1789,11 @@ static inline void drbd_state_unlock(struct drbd_conf *mdev) | |||
| 1747 | wake_up(&mdev->misc_wait); | 1789 | wake_up(&mdev->misc_wait); |
| 1748 | } | 1790 | } |
| 1749 | 1791 | ||
| 1750 | static inline int _drbd_set_state(struct drbd_conf *mdev, | 1792 | static inline enum drbd_state_rv |
| 1751 | union drbd_state ns, enum chg_state_flags flags, | 1793 | _drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, |
| 1752 | struct completion *done) | 1794 | enum chg_state_flags flags, struct completion *done) |
| 1753 | { | 1795 | { |
| 1754 | int rv; | 1796 | enum drbd_state_rv rv; |
| 1755 | 1797 | ||
| 1756 | read_lock(&global_state_lock); | 1798 | read_lock(&global_state_lock); |
| 1757 | rv = __drbd_set_state(mdev, ns, flags, done); | 1799 | rv = __drbd_set_state(mdev, ns, flags, done); |
| @@ -1982,17 +2024,17 @@ static inline int drbd_send_ping_ack(struct drbd_conf *mdev) | |||
| 1982 | 2024 | ||
| 1983 | static inline void drbd_thread_stop(struct drbd_thread *thi) | 2025 | static inline void drbd_thread_stop(struct drbd_thread *thi) |
| 1984 | { | 2026 | { |
| 1985 | _drbd_thread_stop(thi, FALSE, TRUE); | 2027 | _drbd_thread_stop(thi, false, true); |
| 1986 | } | 2028 | } |
| 1987 | 2029 | ||
| 1988 | static inline void drbd_thread_stop_nowait(struct drbd_thread *thi) | 2030 | static inline void drbd_thread_stop_nowait(struct drbd_thread *thi) |
| 1989 | { | 2031 | { |
| 1990 | _drbd_thread_stop(thi, FALSE, FALSE); | 2032 | _drbd_thread_stop(thi, false, false); |
| 1991 | } | 2033 | } |
| 1992 | 2034 | ||
| 1993 | static inline void drbd_thread_restart_nowait(struct drbd_thread *thi) | 2035 | static inline void drbd_thread_restart_nowait(struct drbd_thread *thi) |
| 1994 | { | 2036 | { |
| 1995 | _drbd_thread_stop(thi, TRUE, FALSE); | 2037 | _drbd_thread_stop(thi, true, false); |
| 1996 | } | 2038 | } |
| 1997 | 2039 | ||
| 1998 | /* counts how many answer packets packets we expect from our peer, | 2040 | /* counts how many answer packets packets we expect from our peer, |
| @@ -2146,17 +2188,18 @@ extern int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins) | |||
| 2146 | static inline void drbd_get_syncer_progress(struct drbd_conf *mdev, | 2188 | static inline void drbd_get_syncer_progress(struct drbd_conf *mdev, |
| 2147 | unsigned long *bits_left, unsigned int *per_mil_done) | 2189 | unsigned long *bits_left, unsigned int *per_mil_done) |
| 2148 | { | 2190 | { |
| 2149 | /* | 2191 | /* this is to break it at compile time when we change that, in case we |
| 2150 | * this is to break it at compile time when we change that | 2192 | * want to support more than (1<<32) bits on a 32bit arch. */ |
| 2151 | * (we may feel 4TB maximum storage per drbd is not enough) | ||
| 2152 | */ | ||
| 2153 | typecheck(unsigned long, mdev->rs_total); | 2193 | typecheck(unsigned long, mdev->rs_total); |
| 2154 | 2194 | ||
| 2155 | /* note: both rs_total and rs_left are in bits, i.e. in | 2195 | /* note: both rs_total and rs_left are in bits, i.e. in |
| 2156 | * units of BM_BLOCK_SIZE. | 2196 | * units of BM_BLOCK_SIZE. |
| 2157 | * for the percentage, we don't care. */ | 2197 | * for the percentage, we don't care. */ |
| 2158 | 2198 | ||
| 2159 | *bits_left = drbd_bm_total_weight(mdev) - mdev->rs_failed; | 2199 | if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T) |
| 2200 | *bits_left = mdev->ov_left; | ||
| 2201 | else | ||
| 2202 | *bits_left = drbd_bm_total_weight(mdev) - mdev->rs_failed; | ||
| 2160 | /* >> 10 to prevent overflow, | 2203 | /* >> 10 to prevent overflow, |
| 2161 | * +1 to prevent division by zero */ | 2204 | * +1 to prevent division by zero */ |
| 2162 | if (*bits_left > mdev->rs_total) { | 2205 | if (*bits_left > mdev->rs_total) { |
| @@ -2171,10 +2214,19 @@ static inline void drbd_get_syncer_progress(struct drbd_conf *mdev, | |||
| 2171 | *bits_left, mdev->rs_total, mdev->rs_failed); | 2214 | *bits_left, mdev->rs_total, mdev->rs_failed); |
| 2172 | *per_mil_done = 0; | 2215 | *per_mil_done = 0; |
| 2173 | } else { | 2216 | } else { |
| 2174 | /* make sure the calculation happens in long context */ | 2217 | /* Make sure the division happens in long context. |
| 2175 | unsigned long tmp = 1000UL - | 2218 | * We allow up to one petabyte storage right now, |
| 2176 | (*bits_left >> 10)*1000UL | 2219 | * at a granularity of 4k per bit that is 2**38 bits. |
| 2177 | / ((mdev->rs_total >> 10) + 1UL); | 2220 | * After shift right and multiplication by 1000, |
| 2221 | * this should still fit easily into a 32bit long, | ||
| 2222 | * so we don't need a 64bit division on 32bit arch. | ||
| 2223 | * Note: currently we don't support such large bitmaps on 32bit | ||
| 2224 | * arch anyways, but no harm done to be prepared for it here. | ||
| 2225 | */ | ||
| 2226 | unsigned int shift = mdev->rs_total >= (1ULL << 32) ? 16 : 10; | ||
| 2227 | unsigned long left = *bits_left >> shift; | ||
| 2228 | unsigned long total = 1UL + (mdev->rs_total >> shift); | ||
| 2229 | unsigned long tmp = 1000UL - left * 1000UL/total; | ||
| 2178 | *per_mil_done = tmp; | 2230 | *per_mil_done = tmp; |
| 2179 | } | 2231 | } |
| 2180 | } | 2232 | } |
| @@ -2193,8 +2245,9 @@ static inline int drbd_get_max_buffers(struct drbd_conf *mdev) | |||
| 2193 | return mxb; | 2245 | return mxb; |
| 2194 | } | 2246 | } |
| 2195 | 2247 | ||
| 2196 | static inline int drbd_state_is_stable(union drbd_state s) | 2248 | static inline int drbd_state_is_stable(struct drbd_conf *mdev) |
| 2197 | { | 2249 | { |
| 2250 | union drbd_state s = mdev->state; | ||
| 2198 | 2251 | ||
| 2199 | /* DO NOT add a default clause, we want the compiler to warn us | 2252 | /* DO NOT add a default clause, we want the compiler to warn us |
| 2200 | * for any newly introduced state we may have forgotten to add here */ | 2253 | * for any newly introduced state we may have forgotten to add here */ |
| @@ -2211,11 +2264,9 @@ static inline int drbd_state_is_stable(union drbd_state s) | |||
| 2211 | case C_VERIFY_T: | 2264 | case C_VERIFY_T: |
| 2212 | case C_PAUSED_SYNC_S: | 2265 | case C_PAUSED_SYNC_S: |
| 2213 | case C_PAUSED_SYNC_T: | 2266 | case C_PAUSED_SYNC_T: |
| 2214 | /* maybe stable, look at the disk state */ | 2267 | case C_AHEAD: |
| 2215 | break; | 2268 | case C_BEHIND: |
| 2216 | 2269 | /* transitional states, IO allowed */ | |
| 2217 | /* no new io accepted during tansitional states | ||
| 2218 | * like handshake or teardown */ | ||
| 2219 | case C_DISCONNECTING: | 2270 | case C_DISCONNECTING: |
| 2220 | case C_UNCONNECTED: | 2271 | case C_UNCONNECTED: |
| 2221 | case C_TIMEOUT: | 2272 | case C_TIMEOUT: |
| @@ -2226,7 +2277,15 @@ static inline int drbd_state_is_stable(union drbd_state s) | |||
| 2226 | case C_WF_REPORT_PARAMS: | 2277 | case C_WF_REPORT_PARAMS: |
| 2227 | case C_STARTING_SYNC_S: | 2278 | case C_STARTING_SYNC_S: |
| 2228 | case C_STARTING_SYNC_T: | 2279 | case C_STARTING_SYNC_T: |
| 2280 | break; | ||
| 2281 | |||
| 2282 | /* Allow IO in BM exchange states with new protocols */ | ||
| 2229 | case C_WF_BITMAP_S: | 2283 | case C_WF_BITMAP_S: |
| 2284 | if (mdev->agreed_pro_version < 96) | ||
| 2285 | return 0; | ||
| 2286 | break; | ||
| 2287 | |||
| 2288 | /* no new io accepted in these states */ | ||
| 2230 | case C_WF_BITMAP_T: | 2289 | case C_WF_BITMAP_T: |
| 2231 | case C_WF_SYNC_UUID: | 2290 | case C_WF_SYNC_UUID: |
| 2232 | case C_MASK: | 2291 | case C_MASK: |
| @@ -2261,41 +2320,47 @@ static inline int is_susp(union drbd_state s) | |||
| 2261 | return s.susp || s.susp_nod || s.susp_fen; | 2320 | return s.susp || s.susp_nod || s.susp_fen; |
| 2262 | } | 2321 | } |
| 2263 | 2322 | ||
| 2264 | static inline int __inc_ap_bio_cond(struct drbd_conf *mdev) | 2323 | static inline bool may_inc_ap_bio(struct drbd_conf *mdev) |
| 2265 | { | 2324 | { |
| 2266 | int mxb = drbd_get_max_buffers(mdev); | 2325 | int mxb = drbd_get_max_buffers(mdev); |
| 2267 | 2326 | ||
| 2268 | if (is_susp(mdev->state)) | 2327 | if (is_susp(mdev->state)) |
| 2269 | return 0; | 2328 | return false; |
| 2270 | if (test_bit(SUSPEND_IO, &mdev->flags)) | 2329 | if (test_bit(SUSPEND_IO, &mdev->flags)) |
| 2271 | return 0; | 2330 | return false; |
| 2272 | 2331 | ||
| 2273 | /* to avoid potential deadlock or bitmap corruption, | 2332 | /* to avoid potential deadlock or bitmap corruption, |
| 2274 | * in various places, we only allow new application io | 2333 | * in various places, we only allow new application io |
| 2275 | * to start during "stable" states. */ | 2334 | * to start during "stable" states. */ |
| 2276 | 2335 | ||
| 2277 | /* no new io accepted when attaching or detaching the disk */ | 2336 | /* no new io accepted when attaching or detaching the disk */ |
| 2278 | if (!drbd_state_is_stable(mdev->state)) | 2337 | if (!drbd_state_is_stable(mdev)) |
| 2279 | return 0; | 2338 | return false; |
| 2280 | 2339 | ||
| 2281 | /* since some older kernels don't have atomic_add_unless, | 2340 | /* since some older kernels don't have atomic_add_unless, |
| 2282 | * and we are within the spinlock anyways, we have this workaround. */ | 2341 | * and we are within the spinlock anyways, we have this workaround. */ |
| 2283 | if (atomic_read(&mdev->ap_bio_cnt) > mxb) | 2342 | if (atomic_read(&mdev->ap_bio_cnt) > mxb) |
| 2284 | return 0; | 2343 | return false; |
| 2285 | if (test_bit(BITMAP_IO, &mdev->flags)) | 2344 | if (test_bit(BITMAP_IO, &mdev->flags)) |
| 2286 | return 0; | 2345 | return false; |
| 2287 | return 1; | 2346 | return true; |
| 2288 | } | 2347 | } |
| 2289 | 2348 | ||
| 2290 | /* I'd like to use wait_event_lock_irq, | 2349 | static inline bool inc_ap_bio_cond(struct drbd_conf *mdev, int count) |
| 2291 | * but I'm not sure when it got introduced, | ||
| 2292 | * and not sure when it has 3 or 4 arguments */ | ||
| 2293 | static inline void inc_ap_bio(struct drbd_conf *mdev, int count) | ||
| 2294 | { | 2350 | { |
| 2295 | /* compare with after_state_ch, | 2351 | bool rv = false; |
| 2296 | * os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S */ | 2352 | |
| 2297 | DEFINE_WAIT(wait); | 2353 | spin_lock_irq(&mdev->req_lock); |
| 2354 | rv = may_inc_ap_bio(mdev); | ||
| 2355 | if (rv) | ||
| 2356 | atomic_add(count, &mdev->ap_bio_cnt); | ||
| 2357 | spin_unlock_irq(&mdev->req_lock); | ||
| 2358 | |||
| 2359 | return rv; | ||
| 2360 | } | ||
| 2298 | 2361 | ||
| 2362 | static inline void inc_ap_bio(struct drbd_conf *mdev, int count) | ||
| 2363 | { | ||
| 2299 | /* we wait here | 2364 | /* we wait here |
| 2300 | * as long as the device is suspended | 2365 | * as long as the device is suspended |
| 2301 | * until the bitmap is no longer on the fly during connection | 2366 | * until the bitmap is no longer on the fly during connection |
| @@ -2304,16 +2369,7 @@ static inline void inc_ap_bio(struct drbd_conf *mdev, int count) | |||
| 2304 | * to avoid races with the reconnect code, | 2369 | * to avoid races with the reconnect code, |
| 2305 | * we need to atomic_inc within the spinlock. */ | 2370 | * we need to atomic_inc within the spinlock. */ |
| 2306 | 2371 | ||
| 2307 | spin_lock_irq(&mdev->req_lock); | 2372 | wait_event(mdev->misc_wait, inc_ap_bio_cond(mdev, count)); |
| 2308 | while (!__inc_ap_bio_cond(mdev)) { | ||
| 2309 | prepare_to_wait(&mdev->misc_wait, &wait, TASK_UNINTERRUPTIBLE); | ||
| 2310 | spin_unlock_irq(&mdev->req_lock); | ||
| 2311 | schedule(); | ||
| 2312 | finish_wait(&mdev->misc_wait, &wait); | ||
| 2313 | spin_lock_irq(&mdev->req_lock); | ||
| 2314 | } | ||
| 2315 | atomic_add(count, &mdev->ap_bio_cnt); | ||
| 2316 | spin_unlock_irq(&mdev->req_lock); | ||
| 2317 | } | 2373 | } |
| 2318 | 2374 | ||
| 2319 | static inline void dec_ap_bio(struct drbd_conf *mdev) | 2375 | static inline void dec_ap_bio(struct drbd_conf *mdev) |
| @@ -2333,9 +2389,11 @@ static inline void dec_ap_bio(struct drbd_conf *mdev) | |||
| 2333 | } | 2389 | } |
| 2334 | } | 2390 | } |
| 2335 | 2391 | ||
| 2336 | static inline void drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val) | 2392 | static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val) |
| 2337 | { | 2393 | { |
| 2394 | int changed = mdev->ed_uuid != val; | ||
| 2338 | mdev->ed_uuid = val; | 2395 | mdev->ed_uuid = val; |
| 2396 | return changed; | ||
| 2339 | } | 2397 | } |
| 2340 | 2398 | ||
| 2341 | static inline int seq_cmp(u32 a, u32 b) | 2399 | static inline int seq_cmp(u32 a, u32 b) |
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 8a43ce0edeed..dfc85f32d317 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c | |||
| @@ -85,7 +85,8 @@ MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, " | |||
| 85 | MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION); | 85 | MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION); |
| 86 | MODULE_VERSION(REL_VERSION); | 86 | MODULE_VERSION(REL_VERSION); |
| 87 | MODULE_LICENSE("GPL"); | 87 | MODULE_LICENSE("GPL"); |
| 88 | MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices (1-255)"); | 88 | MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices (" |
| 89 | __stringify(DRBD_MINOR_COUNT_MIN) "-" __stringify(DRBD_MINOR_COUNT_MAX) ")"); | ||
| 89 | MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR); | 90 | MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR); |
| 90 | 91 | ||
| 91 | #include <linux/moduleparam.h> | 92 | #include <linux/moduleparam.h> |
| @@ -115,7 +116,7 @@ module_param(fault_devs, int, 0644); | |||
| 115 | #endif | 116 | #endif |
| 116 | 117 | ||
| 117 | /* module parameter, defined */ | 118 | /* module parameter, defined */ |
| 118 | unsigned int minor_count = 32; | 119 | unsigned int minor_count = DRBD_MINOR_COUNT_DEF; |
| 119 | int disable_sendpage; | 120 | int disable_sendpage; |
| 120 | int allow_oos; | 121 | int allow_oos; |
| 121 | unsigned int cn_idx = CN_IDX_DRBD; | 122 | unsigned int cn_idx = CN_IDX_DRBD; |
| @@ -335,6 +336,7 @@ bail: | |||
| 335 | drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); | 336 | drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); |
| 336 | } | 337 | } |
| 337 | 338 | ||
| 339 | |||
| 338 | /** | 340 | /** |
| 339 | * _tl_restart() - Walks the transfer log, and applies an action to all requests | 341 | * _tl_restart() - Walks the transfer log, and applies an action to all requests |
| 340 | * @mdev: DRBD device. | 342 | * @mdev: DRBD device. |
| @@ -456,7 +458,7 @@ void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) | |||
| 456 | } | 458 | } |
| 457 | 459 | ||
| 458 | /** | 460 | /** |
| 459 | * cl_wide_st_chg() - TRUE if the state change is a cluster wide one | 461 | * cl_wide_st_chg() - true if the state change is a cluster wide one |
| 460 | * @mdev: DRBD device. | 462 | * @mdev: DRBD device. |
| 461 | * @os: old (current) state. | 463 | * @os: old (current) state. |
| 462 | * @ns: new (wanted) state. | 464 | * @ns: new (wanted) state. |
| @@ -473,12 +475,13 @@ static int cl_wide_st_chg(struct drbd_conf *mdev, | |||
| 473 | (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S); | 475 | (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S); |
| 474 | } | 476 | } |
| 475 | 477 | ||
| 476 | int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, | 478 | enum drbd_state_rv |
| 477 | union drbd_state mask, union drbd_state val) | 479 | drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, |
| 480 | union drbd_state mask, union drbd_state val) | ||
| 478 | { | 481 | { |
| 479 | unsigned long flags; | 482 | unsigned long flags; |
| 480 | union drbd_state os, ns; | 483 | union drbd_state os, ns; |
| 481 | int rv; | 484 | enum drbd_state_rv rv; |
| 482 | 485 | ||
| 483 | spin_lock_irqsave(&mdev->req_lock, flags); | 486 | spin_lock_irqsave(&mdev->req_lock, flags); |
| 484 | os = mdev->state; | 487 | os = mdev->state; |
| @@ -502,20 +505,22 @@ void drbd_force_state(struct drbd_conf *mdev, | |||
| 502 | drbd_change_state(mdev, CS_HARD, mask, val); | 505 | drbd_change_state(mdev, CS_HARD, mask, val); |
| 503 | } | 506 | } |
| 504 | 507 | ||
| 505 | static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns); | 508 | static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); |
| 506 | static int is_valid_state_transition(struct drbd_conf *, | 509 | static enum drbd_state_rv is_valid_state_transition(struct drbd_conf *, |
| 507 | union drbd_state, union drbd_state); | 510 | union drbd_state, |
| 511 | union drbd_state); | ||
| 508 | static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, | 512 | static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, |
| 509 | union drbd_state ns, const char **warn_sync_abort); | 513 | union drbd_state ns, const char **warn_sync_abort); |
| 510 | int drbd_send_state_req(struct drbd_conf *, | 514 | int drbd_send_state_req(struct drbd_conf *, |
| 511 | union drbd_state, union drbd_state); | 515 | union drbd_state, union drbd_state); |
| 512 | 516 | ||
| 513 | static enum drbd_state_ret_codes _req_st_cond(struct drbd_conf *mdev, | 517 | static enum drbd_state_rv |
| 514 | union drbd_state mask, union drbd_state val) | 518 | _req_st_cond(struct drbd_conf *mdev, union drbd_state mask, |
| 519 | union drbd_state val) | ||
| 515 | { | 520 | { |
| 516 | union drbd_state os, ns; | 521 | union drbd_state os, ns; |
| 517 | unsigned long flags; | 522 | unsigned long flags; |
| 518 | int rv; | 523 | enum drbd_state_rv rv; |
| 519 | 524 | ||
| 520 | if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags)) | 525 | if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags)) |
| 521 | return SS_CW_SUCCESS; | 526 | return SS_CW_SUCCESS; |
| @@ -536,7 +541,7 @@ static enum drbd_state_ret_codes _req_st_cond(struct drbd_conf *mdev, | |||
| 536 | if (rv == SS_SUCCESS) { | 541 | if (rv == SS_SUCCESS) { |
| 537 | rv = is_valid_state_transition(mdev, ns, os); | 542 | rv = is_valid_state_transition(mdev, ns, os); |
| 538 | if (rv == SS_SUCCESS) | 543 | if (rv == SS_SUCCESS) |
| 539 | rv = 0; /* cont waiting, otherwise fail. */ | 544 | rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ |
| 540 | } | 545 | } |
| 541 | } | 546 | } |
| 542 | spin_unlock_irqrestore(&mdev->req_lock, flags); | 547 | spin_unlock_irqrestore(&mdev->req_lock, flags); |
| @@ -554,14 +559,14 @@ static enum drbd_state_ret_codes _req_st_cond(struct drbd_conf *mdev, | |||
| 554 | * Should not be called directly, use drbd_request_state() or | 559 | * Should not be called directly, use drbd_request_state() or |
| 555 | * _drbd_request_state(). | 560 | * _drbd_request_state(). |
| 556 | */ | 561 | */ |
| 557 | static int drbd_req_state(struct drbd_conf *mdev, | 562 | static enum drbd_state_rv |
| 558 | union drbd_state mask, union drbd_state val, | 563 | drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, |
| 559 | enum chg_state_flags f) | 564 | union drbd_state val, enum chg_state_flags f) |
| 560 | { | 565 | { |
| 561 | struct completion done; | 566 | struct completion done; |
| 562 | unsigned long flags; | 567 | unsigned long flags; |
| 563 | union drbd_state os, ns; | 568 | union drbd_state os, ns; |
| 564 | int rv; | 569 | enum drbd_state_rv rv; |
| 565 | 570 | ||
| 566 | init_completion(&done); | 571 | init_completion(&done); |
| 567 | 572 | ||
| @@ -636,10 +641,11 @@ abort: | |||
| 636 | * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE | 641 | * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE |
| 637 | * flag, or when logging of failed state change requests is not desired. | 642 | * flag, or when logging of failed state change requests is not desired. |
| 638 | */ | 643 | */ |
| 639 | int _drbd_request_state(struct drbd_conf *mdev, union drbd_state mask, | 644 | enum drbd_state_rv |
| 640 | union drbd_state val, enum chg_state_flags f) | 645 | _drbd_request_state(struct drbd_conf *mdev, union drbd_state mask, |
| 646 | union drbd_state val, enum chg_state_flags f) | ||
| 641 | { | 647 | { |
| 642 | int rv; | 648 | enum drbd_state_rv rv; |
| 643 | 649 | ||
| 644 | wait_event(mdev->state_wait, | 650 | wait_event(mdev->state_wait, |
| 645 | (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE); | 651 | (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE); |
| @@ -663,8 +669,8 @@ static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns) | |||
| 663 | ); | 669 | ); |
| 664 | } | 670 | } |
| 665 | 671 | ||
| 666 | void print_st_err(struct drbd_conf *mdev, | 672 | void print_st_err(struct drbd_conf *mdev, union drbd_state os, |
| 667 | union drbd_state os, union drbd_state ns, int err) | 673 | union drbd_state ns, enum drbd_state_rv err) |
| 668 | { | 674 | { |
| 669 | if (err == SS_IN_TRANSIENT_STATE) | 675 | if (err == SS_IN_TRANSIENT_STATE) |
| 670 | return; | 676 | return; |
| @@ -674,32 +680,18 @@ void print_st_err(struct drbd_conf *mdev, | |||
| 674 | } | 680 | } |
| 675 | 681 | ||
| 676 | 682 | ||
| 677 | #define drbd_peer_str drbd_role_str | ||
| 678 | #define drbd_pdsk_str drbd_disk_str | ||
| 679 | |||
| 680 | #define drbd_susp_str(A) ((A) ? "1" : "0") | ||
| 681 | #define drbd_aftr_isp_str(A) ((A) ? "1" : "0") | ||
| 682 | #define drbd_peer_isp_str(A) ((A) ? "1" : "0") | ||
| 683 | #define drbd_user_isp_str(A) ((A) ? "1" : "0") | ||
| 684 | |||
| 685 | #define PSC(A) \ | ||
| 686 | ({ if (ns.A != os.A) { \ | ||
| 687 | pbp += sprintf(pbp, #A "( %s -> %s ) ", \ | ||
| 688 | drbd_##A##_str(os.A), \ | ||
| 689 | drbd_##A##_str(ns.A)); \ | ||
| 690 | } }) | ||
| 691 | |||
| 692 | /** | 683 | /** |
| 693 | * is_valid_state() - Returns an SS_ error code if ns is not valid | 684 | * is_valid_state() - Returns an SS_ error code if ns is not valid |
| 694 | * @mdev: DRBD device. | 685 | * @mdev: DRBD device. |
| 695 | * @ns: State to consider. | 686 | * @ns: State to consider. |
| 696 | */ | 687 | */ |
| 697 | static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns) | 688 | static enum drbd_state_rv |
| 689 | is_valid_state(struct drbd_conf *mdev, union drbd_state ns) | ||
| 698 | { | 690 | { |
| 699 | /* See drbd_state_sw_errors in drbd_strings.c */ | 691 | /* See drbd_state_sw_errors in drbd_strings.c */ |
| 700 | 692 | ||
| 701 | enum drbd_fencing_p fp; | 693 | enum drbd_fencing_p fp; |
| 702 | int rv = SS_SUCCESS; | 694 | enum drbd_state_rv rv = SS_SUCCESS; |
| 703 | 695 | ||
| 704 | fp = FP_DONT_CARE; | 696 | fp = FP_DONT_CARE; |
| 705 | if (get_ldev(mdev)) { | 697 | if (get_ldev(mdev)) { |
| @@ -762,10 +754,11 @@ static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns) | |||
| 762 | * @ns: new state. | 754 | * @ns: new state. |
| 763 | * @os: old state. | 755 | * @os: old state. |
| 764 | */ | 756 | */ |
| 765 | static int is_valid_state_transition(struct drbd_conf *mdev, | 757 | static enum drbd_state_rv |
| 766 | union drbd_state ns, union drbd_state os) | 758 | is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns, |
| 759 | union drbd_state os) | ||
| 767 | { | 760 | { |
| 768 | int rv = SS_SUCCESS; | 761 | enum drbd_state_rv rv = SS_SUCCESS; |
| 769 | 762 | ||
| 770 | if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) && | 763 | if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) && |
| 771 | os.conn > C_CONNECTED) | 764 | os.conn > C_CONNECTED) |
| @@ -800,6 +793,10 @@ static int is_valid_state_transition(struct drbd_conf *mdev, | |||
| 800 | os.conn < C_CONNECTED) | 793 | os.conn < C_CONNECTED) |
| 801 | rv = SS_NEED_CONNECTION; | 794 | rv = SS_NEED_CONNECTION; |
| 802 | 795 | ||
| 796 | if ((ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE) | ||
| 797 | && os.conn < C_WF_REPORT_PARAMS) | ||
| 798 | rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */ | ||
| 799 | |||
| 803 | return rv; | 800 | return rv; |
| 804 | } | 801 | } |
| 805 | 802 | ||
| @@ -817,6 +814,7 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state | |||
| 817 | union drbd_state ns, const char **warn_sync_abort) | 814 | union drbd_state ns, const char **warn_sync_abort) |
| 818 | { | 815 | { |
| 819 | enum drbd_fencing_p fp; | 816 | enum drbd_fencing_p fp; |
| 817 | enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max; | ||
| 820 | 818 | ||
| 821 | fp = FP_DONT_CARE; | 819 | fp = FP_DONT_CARE; |
| 822 | if (get_ldev(mdev)) { | 820 | if (get_ldev(mdev)) { |
| @@ -869,56 +867,6 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state | |||
| 869 | ns.conn = C_CONNECTED; | 867 | ns.conn = C_CONNECTED; |
| 870 | } | 868 | } |
| 871 | 869 | ||
| 872 | if (ns.conn >= C_CONNECTED && | ||
| 873 | ((ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED) || | ||
| 874 | (ns.disk == D_NEGOTIATING && ns.conn == C_WF_BITMAP_T))) { | ||
| 875 | switch (ns.conn) { | ||
| 876 | case C_WF_BITMAP_T: | ||
| 877 | case C_PAUSED_SYNC_T: | ||
| 878 | ns.disk = D_OUTDATED; | ||
| 879 | break; | ||
| 880 | case C_CONNECTED: | ||
| 881 | case C_WF_BITMAP_S: | ||
| 882 | case C_SYNC_SOURCE: | ||
| 883 | case C_PAUSED_SYNC_S: | ||
| 884 | ns.disk = D_UP_TO_DATE; | ||
| 885 | break; | ||
| 886 | case C_SYNC_TARGET: | ||
| 887 | ns.disk = D_INCONSISTENT; | ||
| 888 | dev_warn(DEV, "Implicitly set disk state Inconsistent!\n"); | ||
| 889 | break; | ||
| 890 | } | ||
| 891 | if (os.disk == D_OUTDATED && ns.disk == D_UP_TO_DATE) | ||
| 892 | dev_warn(DEV, "Implicitly set disk from Outdated to UpToDate\n"); | ||
| 893 | } | ||
| 894 | |||
| 895 | if (ns.conn >= C_CONNECTED && | ||
| 896 | (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED)) { | ||
| 897 | switch (ns.conn) { | ||
| 898 | case C_CONNECTED: | ||
| 899 | case C_WF_BITMAP_T: | ||
| 900 | case C_PAUSED_SYNC_T: | ||
| 901 | case C_SYNC_TARGET: | ||
| 902 | ns.pdsk = D_UP_TO_DATE; | ||
| 903 | break; | ||
| 904 | case C_WF_BITMAP_S: | ||
| 905 | case C_PAUSED_SYNC_S: | ||
| 906 | /* remap any consistent state to D_OUTDATED, | ||
| 907 | * but disallow "upgrade" of not even consistent states. | ||
| 908 | */ | ||
| 909 | ns.pdsk = | ||
| 910 | (D_DISKLESS < os.pdsk && os.pdsk < D_OUTDATED) | ||
| 911 | ? os.pdsk : D_OUTDATED; | ||
| 912 | break; | ||
| 913 | case C_SYNC_SOURCE: | ||
| 914 | ns.pdsk = D_INCONSISTENT; | ||
| 915 | dev_warn(DEV, "Implicitly set pdsk Inconsistent!\n"); | ||
| 916 | break; | ||
| 917 | } | ||
| 918 | if (os.pdsk == D_OUTDATED && ns.pdsk == D_UP_TO_DATE) | ||
| 919 | dev_warn(DEV, "Implicitly set pdsk from Outdated to UpToDate\n"); | ||
| 920 | } | ||
| 921 | |||
| 922 | /* Connection breaks down before we finished "Negotiating" */ | 870 | /* Connection breaks down before we finished "Negotiating" */ |
| 923 | if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING && | 871 | if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING && |
| 924 | get_ldev_if_state(mdev, D_NEGOTIATING)) { | 872 | get_ldev_if_state(mdev, D_NEGOTIATING)) { |
| @@ -933,6 +881,94 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state | |||
| 933 | put_ldev(mdev); | 881 | put_ldev(mdev); |
| 934 | } | 882 | } |
| 935 | 883 | ||
| 884 | /* D_CONSISTENT and D_OUTDATED vanish when we get connected */ | ||
| 885 | if (ns.conn >= C_CONNECTED && ns.conn < C_AHEAD) { | ||
| 886 | if (ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED) | ||
| 887 | ns.disk = D_UP_TO_DATE; | ||
| 888 | if (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED) | ||
| 889 | ns.pdsk = D_UP_TO_DATE; | ||
| 890 | } | ||
| 891 | |||
| 892 | /* Implications of the connection stat on the disk states */ | ||
| 893 | disk_min = D_DISKLESS; | ||
| 894 | disk_max = D_UP_TO_DATE; | ||
| 895 | pdsk_min = D_INCONSISTENT; | ||
| 896 | pdsk_max = D_UNKNOWN; | ||
| 897 | switch ((enum drbd_conns)ns.conn) { | ||
| 898 | case C_WF_BITMAP_T: | ||
| 899 | case C_PAUSED_SYNC_T: | ||
| 900 | case C_STARTING_SYNC_T: | ||
| 901 | case C_WF_SYNC_UUID: | ||
| 902 | case C_BEHIND: | ||
| 903 | disk_min = D_INCONSISTENT; | ||
| 904 | disk_max = D_OUTDATED; | ||
| 905 | pdsk_min = D_UP_TO_DATE; | ||
| 906 | pdsk_max = D_UP_TO_DATE; | ||
| 907 | break; | ||
| 908 | case C_VERIFY_S: | ||
| 909 | case C_VERIFY_T: | ||
| 910 | disk_min = D_UP_TO_DATE; | ||
| 911 | disk_max = D_UP_TO_DATE; | ||
| 912 | pdsk_min = D_UP_TO_DATE; | ||
| 913 | pdsk_max = D_UP_TO_DATE; | ||
| 914 | break; | ||
| 915 | case C_CONNECTED: | ||
| 916 | disk_min = D_DISKLESS; | ||
| 917 | disk_max = D_UP_TO_DATE; | ||
| 918 | pdsk_min = D_DISKLESS; | ||
| 919 | pdsk_max = D_UP_TO_DATE; | ||
| 920 | break; | ||
| 921 | case C_WF_BITMAP_S: | ||
| 922 | case C_PAUSED_SYNC_S: | ||
| 923 | case C_STARTING_SYNC_S: | ||
| 924 | case C_AHEAD: | ||
| 925 | disk_min = D_UP_TO_DATE; | ||
| 926 | disk_max = D_UP_TO_DATE; | ||
| 927 | pdsk_min = D_INCONSISTENT; | ||
| 928 | pdsk_max = D_CONSISTENT; /* D_OUTDATED would be nice. But explicit outdate necessary*/ | ||
| 929 | break; | ||
| 930 | case C_SYNC_TARGET: | ||
| 931 | disk_min = D_INCONSISTENT; | ||
| 932 | disk_max = D_INCONSISTENT; | ||
| 933 | pdsk_min = D_UP_TO_DATE; | ||
| 934 | pdsk_max = D_UP_TO_DATE; | ||
| 935 | break; | ||
| 936 | case C_SYNC_SOURCE: | ||
| 937 | disk_min = D_UP_TO_DATE; | ||
| 938 | disk_max = D_UP_TO_DATE; | ||
| 939 | pdsk_min = D_INCONSISTENT; | ||
| 940 | pdsk_max = D_INCONSISTENT; | ||
| 941 | break; | ||
| 942 | case C_STANDALONE: | ||
| 943 | case C_DISCONNECTING: | ||
| 944 | case C_UNCONNECTED: | ||
| 945 | case C_TIMEOUT: | ||
| 946 | case C_BROKEN_PIPE: | ||
| 947 | case C_NETWORK_FAILURE: | ||
| 948 | case C_PROTOCOL_ERROR: | ||
| 949 | case C_TEAR_DOWN: | ||
| 950 | case C_WF_CONNECTION: | ||
| 951 | case C_WF_REPORT_PARAMS: | ||
| 952 | case C_MASK: | ||
| 953 | break; | ||
| 954 | } | ||
| 955 | if (ns.disk > disk_max) | ||
| 956 | ns.disk = disk_max; | ||
| 957 | |||
| 958 | if (ns.disk < disk_min) { | ||
| 959 | dev_warn(DEV, "Implicitly set disk from %s to %s\n", | ||
| 960 | drbd_disk_str(ns.disk), drbd_disk_str(disk_min)); | ||
| 961 | ns.disk = disk_min; | ||
| 962 | } | ||
| 963 | if (ns.pdsk > pdsk_max) | ||
| 964 | ns.pdsk = pdsk_max; | ||
| 965 | |||
| 966 | if (ns.pdsk < pdsk_min) { | ||
| 967 | dev_warn(DEV, "Implicitly set pdsk from %s to %s\n", | ||
| 968 | drbd_disk_str(ns.pdsk), drbd_disk_str(pdsk_min)); | ||
| 969 | ns.pdsk = pdsk_min; | ||
| 970 | } | ||
| 971 | |||
| 936 | if (fp == FP_STONITH && | 972 | if (fp == FP_STONITH && |
| 937 | (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) && | 973 | (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) && |
| 938 | !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED)) | 974 | !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED)) |
| @@ -961,6 +997,10 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state | |||
| 961 | /* helper for __drbd_set_state */ | 997 | /* helper for __drbd_set_state */ |
| 962 | static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs) | 998 | static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs) |
| 963 | { | 999 | { |
| 1000 | if (mdev->agreed_pro_version < 90) | ||
| 1001 | mdev->ov_start_sector = 0; | ||
| 1002 | mdev->rs_total = drbd_bm_bits(mdev); | ||
| 1003 | mdev->ov_position = 0; | ||
| 964 | if (cs == C_VERIFY_T) { | 1004 | if (cs == C_VERIFY_T) { |
| 965 | /* starting online verify from an arbitrary position | 1005 | /* starting online verify from an arbitrary position |
| 966 | * does not fit well into the existing protocol. | 1006 | * does not fit well into the existing protocol. |
| @@ -970,11 +1010,15 @@ static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs) | |||
| 970 | mdev->ov_start_sector = ~(sector_t)0; | 1010 | mdev->ov_start_sector = ~(sector_t)0; |
| 971 | } else { | 1011 | } else { |
| 972 | unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector); | 1012 | unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector); |
| 973 | if (bit >= mdev->rs_total) | 1013 | if (bit >= mdev->rs_total) { |
| 974 | mdev->ov_start_sector = | 1014 | mdev->ov_start_sector = |
| 975 | BM_BIT_TO_SECT(mdev->rs_total - 1); | 1015 | BM_BIT_TO_SECT(mdev->rs_total - 1); |
| 1016 | mdev->rs_total = 1; | ||
| 1017 | } else | ||
| 1018 | mdev->rs_total -= bit; | ||
| 976 | mdev->ov_position = mdev->ov_start_sector; | 1019 | mdev->ov_position = mdev->ov_start_sector; |
| 977 | } | 1020 | } |
| 1021 | mdev->ov_left = mdev->rs_total; | ||
| 978 | } | 1022 | } |
| 979 | 1023 | ||
| 980 | static void drbd_resume_al(struct drbd_conf *mdev) | 1024 | static void drbd_resume_al(struct drbd_conf *mdev) |
| @@ -992,12 +1036,12 @@ static void drbd_resume_al(struct drbd_conf *mdev) | |||
| 992 | * | 1036 | * |
| 993 | * Caller needs to hold req_lock, and global_state_lock. Do not call directly. | 1037 | * Caller needs to hold req_lock, and global_state_lock. Do not call directly. |
| 994 | */ | 1038 | */ |
| 995 | int __drbd_set_state(struct drbd_conf *mdev, | 1039 | enum drbd_state_rv |
| 996 | union drbd_state ns, enum chg_state_flags flags, | 1040 | __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, |
| 997 | struct completion *done) | 1041 | enum chg_state_flags flags, struct completion *done) |
| 998 | { | 1042 | { |
| 999 | union drbd_state os; | 1043 | union drbd_state os; |
| 1000 | int rv = SS_SUCCESS; | 1044 | enum drbd_state_rv rv = SS_SUCCESS; |
| 1001 | const char *warn_sync_abort = NULL; | 1045 | const char *warn_sync_abort = NULL; |
| 1002 | struct after_state_chg_work *ascw; | 1046 | struct after_state_chg_work *ascw; |
| 1003 | 1047 | ||
| @@ -1033,22 +1077,46 @@ int __drbd_set_state(struct drbd_conf *mdev, | |||
| 1033 | dev_warn(DEV, "%s aborted.\n", warn_sync_abort); | 1077 | dev_warn(DEV, "%s aborted.\n", warn_sync_abort); |
| 1034 | 1078 | ||
| 1035 | { | 1079 | { |
| 1036 | char *pbp, pb[300]; | 1080 | char *pbp, pb[300]; |
| 1037 | pbp = pb; | 1081 | pbp = pb; |
| 1038 | *pbp = 0; | 1082 | *pbp = 0; |
| 1039 | PSC(role); | 1083 | if (ns.role != os.role) |
| 1040 | PSC(peer); | 1084 | pbp += sprintf(pbp, "role( %s -> %s ) ", |
| 1041 | PSC(conn); | 1085 | drbd_role_str(os.role), |
| 1042 | PSC(disk); | 1086 | drbd_role_str(ns.role)); |
| 1043 | PSC(pdsk); | 1087 | if (ns.peer != os.peer) |
| 1044 | if (is_susp(ns) != is_susp(os)) | 1088 | pbp += sprintf(pbp, "peer( %s -> %s ) ", |
| 1045 | pbp += sprintf(pbp, "susp( %s -> %s ) ", | 1089 | drbd_role_str(os.peer), |
| 1046 | drbd_susp_str(is_susp(os)), | 1090 | drbd_role_str(ns.peer)); |
| 1047 | drbd_susp_str(is_susp(ns))); | 1091 | if (ns.conn != os.conn) |
| 1048 | PSC(aftr_isp); | 1092 | pbp += sprintf(pbp, "conn( %s -> %s ) ", |
| 1049 | PSC(peer_isp); | 1093 | drbd_conn_str(os.conn), |
| 1050 | PSC(user_isp); | 1094 | drbd_conn_str(ns.conn)); |
| 1051 | dev_info(DEV, "%s\n", pb); | 1095 | if (ns.disk != os.disk) |
| 1096 | pbp += sprintf(pbp, "disk( %s -> %s ) ", | ||
| 1097 | drbd_disk_str(os.disk), | ||
| 1098 | drbd_disk_str(ns.disk)); | ||
| 1099 | if (ns.pdsk != os.pdsk) | ||
| 1100 | pbp += sprintf(pbp, "pdsk( %s -> %s ) ", | ||
| 1101 | drbd_disk_str(os.pdsk), | ||
| 1102 | drbd_disk_str(ns.pdsk)); | ||
| 1103 | if (is_susp(ns) != is_susp(os)) | ||
| 1104 | pbp += sprintf(pbp, "susp( %d -> %d ) ", | ||
| 1105 | is_susp(os), | ||
| 1106 | is_susp(ns)); | ||
| 1107 | if (ns.aftr_isp != os.aftr_isp) | ||
| 1108 | pbp += sprintf(pbp, "aftr_isp( %d -> %d ) ", | ||
| 1109 | os.aftr_isp, | ||
| 1110 | ns.aftr_isp); | ||
| 1111 | if (ns.peer_isp != os.peer_isp) | ||
| 1112 | pbp += sprintf(pbp, "peer_isp( %d -> %d ) ", | ||
| 1113 | os.peer_isp, | ||
| 1114 | ns.peer_isp); | ||
| 1115 | if (ns.user_isp != os.user_isp) | ||
| 1116 | pbp += sprintf(pbp, "user_isp( %d -> %d ) ", | ||
| 1117 | os.user_isp, | ||
| 1118 | ns.user_isp); | ||
| 1119 | dev_info(DEV, "%s\n", pb); | ||
| 1052 | } | 1120 | } |
| 1053 | 1121 | ||
| 1054 | /* solve the race between becoming unconfigured, | 1122 | /* solve the race between becoming unconfigured, |
| @@ -1074,6 +1142,10 @@ int __drbd_set_state(struct drbd_conf *mdev, | |||
| 1074 | atomic_inc(&mdev->local_cnt); | 1142 | atomic_inc(&mdev->local_cnt); |
| 1075 | 1143 | ||
| 1076 | mdev->state = ns; | 1144 | mdev->state = ns; |
| 1145 | |||
| 1146 | if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING) | ||
| 1147 | drbd_print_uuids(mdev, "attached to UUIDs"); | ||
| 1148 | |||
| 1077 | wake_up(&mdev->misc_wait); | 1149 | wake_up(&mdev->misc_wait); |
| 1078 | wake_up(&mdev->state_wait); | 1150 | wake_up(&mdev->state_wait); |
| 1079 | 1151 | ||
| @@ -1081,7 +1153,7 @@ int __drbd_set_state(struct drbd_conf *mdev, | |||
| 1081 | if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) && | 1153 | if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) && |
| 1082 | ns.conn < C_CONNECTED) { | 1154 | ns.conn < C_CONNECTED) { |
| 1083 | mdev->ov_start_sector = | 1155 | mdev->ov_start_sector = |
| 1084 | BM_BIT_TO_SECT(mdev->rs_total - mdev->ov_left); | 1156 | BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left); |
| 1085 | dev_info(DEV, "Online Verify reached sector %llu\n", | 1157 | dev_info(DEV, "Online Verify reached sector %llu\n", |
| 1086 | (unsigned long long)mdev->ov_start_sector); | 1158 | (unsigned long long)mdev->ov_start_sector); |
| 1087 | } | 1159 | } |
| @@ -1106,14 +1178,7 @@ int __drbd_set_state(struct drbd_conf *mdev, | |||
| 1106 | unsigned long now = jiffies; | 1178 | unsigned long now = jiffies; |
| 1107 | int i; | 1179 | int i; |
| 1108 | 1180 | ||
| 1109 | mdev->ov_position = 0; | 1181 | set_ov_position(mdev, ns.conn); |
| 1110 | mdev->rs_total = drbd_bm_bits(mdev); | ||
| 1111 | if (mdev->agreed_pro_version >= 90) | ||
| 1112 | set_ov_position(mdev, ns.conn); | ||
| 1113 | else | ||
| 1114 | mdev->ov_start_sector = 0; | ||
| 1115 | mdev->ov_left = mdev->rs_total | ||
| 1116 | - BM_SECT_TO_BIT(mdev->ov_position); | ||
| 1117 | mdev->rs_start = now; | 1182 | mdev->rs_start = now; |
| 1118 | mdev->rs_last_events = 0; | 1183 | mdev->rs_last_events = 0; |
| 1119 | mdev->rs_last_sect_ev = 0; | 1184 | mdev->rs_last_sect_ev = 0; |
| @@ -1121,10 +1186,12 @@ int __drbd_set_state(struct drbd_conf *mdev, | |||
| 1121 | mdev->ov_last_oos_start = 0; | 1186 | mdev->ov_last_oos_start = 0; |
| 1122 | 1187 | ||
| 1123 | for (i = 0; i < DRBD_SYNC_MARKS; i++) { | 1188 | for (i = 0; i < DRBD_SYNC_MARKS; i++) { |
| 1124 | mdev->rs_mark_left[i] = mdev->rs_total; | 1189 | mdev->rs_mark_left[i] = mdev->ov_left; |
| 1125 | mdev->rs_mark_time[i] = now; | 1190 | mdev->rs_mark_time[i] = now; |
| 1126 | } | 1191 | } |
| 1127 | 1192 | ||
| 1193 | drbd_rs_controller_reset(mdev); | ||
| 1194 | |||
| 1128 | if (ns.conn == C_VERIFY_S) { | 1195 | if (ns.conn == C_VERIFY_S) { |
| 1129 | dev_info(DEV, "Starting Online Verify from sector %llu\n", | 1196 | dev_info(DEV, "Starting Online Verify from sector %llu\n", |
| 1130 | (unsigned long long)mdev->ov_position); | 1197 | (unsigned long long)mdev->ov_position); |
| @@ -1228,6 +1295,26 @@ static void abw_start_sync(struct drbd_conf *mdev, int rv) | |||
| 1228 | } | 1295 | } |
| 1229 | } | 1296 | } |
| 1230 | 1297 | ||
| 1298 | int drbd_bitmap_io_from_worker(struct drbd_conf *mdev, | ||
| 1299 | int (*io_fn)(struct drbd_conf *), | ||
| 1300 | char *why, enum bm_flag flags) | ||
| 1301 | { | ||
| 1302 | int rv; | ||
| 1303 | |||
| 1304 | D_ASSERT(current == mdev->worker.task); | ||
| 1305 | |||
| 1306 | /* open coded non-blocking drbd_suspend_io(mdev); */ | ||
| 1307 | set_bit(SUSPEND_IO, &mdev->flags); | ||
| 1308 | |||
| 1309 | drbd_bm_lock(mdev, why, flags); | ||
| 1310 | rv = io_fn(mdev); | ||
| 1311 | drbd_bm_unlock(mdev); | ||
| 1312 | |||
| 1313 | drbd_resume_io(mdev); | ||
| 1314 | |||
| 1315 | return rv; | ||
| 1316 | } | ||
| 1317 | |||
| 1231 | /** | 1318 | /** |
| 1232 | * after_state_ch() - Perform after state change actions that may sleep | 1319 | * after_state_ch() - Perform after state change actions that may sleep |
| 1233 | * @mdev: DRBD device. | 1320 | * @mdev: DRBD device. |
| @@ -1266,16 +1353,14 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
| 1266 | 1353 | ||
| 1267 | nsm.i = -1; | 1354 | nsm.i = -1; |
| 1268 | if (ns.susp_nod) { | 1355 | if (ns.susp_nod) { |
| 1269 | if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) { | 1356 | if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) |
| 1270 | if (ns.conn == C_CONNECTED) | 1357 | what = resend; |
| 1271 | what = resend, nsm.susp_nod = 0; | ||
| 1272 | else /* ns.conn > C_CONNECTED */ | ||
| 1273 | dev_err(DEV, "Unexpected Resynd going on!\n"); | ||
| 1274 | } | ||
| 1275 | 1358 | ||
| 1276 | if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING) | 1359 | if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING) |
| 1277 | what = restart_frozen_disk_io, nsm.susp_nod = 0; | 1360 | what = restart_frozen_disk_io; |
| 1278 | 1361 | ||
| 1362 | if (what != nothing) | ||
| 1363 | nsm.susp_nod = 0; | ||
| 1279 | } | 1364 | } |
| 1280 | 1365 | ||
| 1281 | if (ns.susp_fen) { | 1366 | if (ns.susp_fen) { |
| @@ -1306,13 +1391,30 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
| 1306 | spin_unlock_irq(&mdev->req_lock); | 1391 | spin_unlock_irq(&mdev->req_lock); |
| 1307 | } | 1392 | } |
| 1308 | 1393 | ||
| 1394 | /* Became sync source. With protocol >= 96, we still need to send out | ||
| 1395 | * the sync uuid now. Need to do that before any drbd_send_state, or | ||
| 1396 | * the other side may go "paused sync" before receiving the sync uuids, | ||
| 1397 | * which is unexpected. */ | ||
| 1398 | if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) && | ||
| 1399 | (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) && | ||
| 1400 | mdev->agreed_pro_version >= 96 && get_ldev(mdev)) { | ||
| 1401 | drbd_gen_and_send_sync_uuid(mdev); | ||
| 1402 | put_ldev(mdev); | ||
| 1403 | } | ||
| 1404 | |||
| 1309 | /* Do not change the order of the if above and the two below... */ | 1405 | /* Do not change the order of the if above and the two below... */ |
| 1310 | if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */ | 1406 | if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */ |
| 1311 | drbd_send_uuids(mdev); | 1407 | drbd_send_uuids(mdev); |
| 1312 | drbd_send_state(mdev); | 1408 | drbd_send_state(mdev); |
| 1313 | } | 1409 | } |
| 1314 | if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S) | 1410 | /* No point in queuing send_bitmap if we don't have a connection |
| 1315 | drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL, "send_bitmap (WFBitMapS)"); | 1411 | * anymore, so check also the _current_ state, not only the new state |
| 1412 | * at the time this work was queued. */ | ||
| 1413 | if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S && | ||
| 1414 | mdev->state.conn == C_WF_BITMAP_S) | ||
| 1415 | drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL, | ||
| 1416 | "send_bitmap (WFBitMapS)", | ||
| 1417 | BM_LOCKED_TEST_ALLOWED); | ||
| 1316 | 1418 | ||
| 1317 | /* Lost contact to peer's copy of the data */ | 1419 | /* Lost contact to peer's copy of the data */ |
| 1318 | if ((os.pdsk >= D_INCONSISTENT && | 1420 | if ((os.pdsk >= D_INCONSISTENT && |
| @@ -1343,7 +1445,23 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
| 1343 | 1445 | ||
| 1344 | /* D_DISKLESS Peer becomes secondary */ | 1446 | /* D_DISKLESS Peer becomes secondary */ |
| 1345 | if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) | 1447 | if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) |
| 1346 | drbd_al_to_on_disk_bm(mdev); | 1448 | /* We may still be Primary ourselves. |
| 1449 | * No harm done if the bitmap still changes, | ||
| 1450 | * redirtied pages will follow later. */ | ||
| 1451 | drbd_bitmap_io_from_worker(mdev, &drbd_bm_write, | ||
| 1452 | "demote diskless peer", BM_LOCKED_SET_ALLOWED); | ||
| 1453 | put_ldev(mdev); | ||
| 1454 | } | ||
| 1455 | |||
| 1456 | /* Write out all changed bits on demote. | ||
| 1457 | * Though, no need to da that just yet | ||
| 1458 | * if there is a resync going on still */ | ||
| 1459 | if (os.role == R_PRIMARY && ns.role == R_SECONDARY && | ||
| 1460 | mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) { | ||
| 1461 | /* No changes to the bitmap expected this time, so assert that, | ||
| 1462 | * even though no harm was done if it did change. */ | ||
| 1463 | drbd_bitmap_io_from_worker(mdev, &drbd_bm_write, | ||
| 1464 | "demote", BM_LOCKED_TEST_ALLOWED); | ||
| 1347 | put_ldev(mdev); | 1465 | put_ldev(mdev); |
| 1348 | } | 1466 | } |
| 1349 | 1467 | ||
| @@ -1371,15 +1489,23 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
| 1371 | if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED) | 1489 | if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED) |
| 1372 | drbd_send_state(mdev); | 1490 | drbd_send_state(mdev); |
| 1373 | 1491 | ||
| 1492 | if (os.conn != C_AHEAD && ns.conn == C_AHEAD) | ||
| 1493 | drbd_send_state(mdev); | ||
| 1494 | |||
| 1374 | /* We are in the progress to start a full sync... */ | 1495 | /* We are in the progress to start a full sync... */ |
| 1375 | if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || | 1496 | if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || |
| 1376 | (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S)) | 1497 | (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S)) |
| 1377 | drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, &abw_start_sync, "set_n_write from StartingSync"); | 1498 | /* no other bitmap changes expected during this phase */ |
| 1499 | drbd_queue_bitmap_io(mdev, | ||
| 1500 | &drbd_bmio_set_n_write, &abw_start_sync, | ||
| 1501 | "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED); | ||
| 1378 | 1502 | ||
| 1379 | /* We are invalidating our self... */ | 1503 | /* We are invalidating our self... */ |
| 1380 | if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED && | 1504 | if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED && |
| 1381 | os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT) | 1505 | os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT) |
| 1382 | drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, "set_n_write from invalidate"); | 1506 | /* other bitmap operation expected during this phase */ |
| 1507 | drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, | ||
| 1508 | "set_n_write from invalidate", BM_LOCKED_MASK); | ||
| 1383 | 1509 | ||
| 1384 | /* first half of local IO error, failure to attach, | 1510 | /* first half of local IO error, failure to attach, |
| 1385 | * or administrative detach */ | 1511 | * or administrative detach */ |
| @@ -1434,8 +1560,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
| 1434 | 1560 | ||
| 1435 | if (drbd_send_state(mdev)) | 1561 | if (drbd_send_state(mdev)) |
| 1436 | dev_warn(DEV, "Notified peer that I'm now diskless.\n"); | 1562 | dev_warn(DEV, "Notified peer that I'm now diskless.\n"); |
| 1437 | else | ||
| 1438 | dev_err(DEV, "Sending state for being diskless failed\n"); | ||
| 1439 | /* corresponding get_ldev in __drbd_set_state | 1563 | /* corresponding get_ldev in __drbd_set_state |
| 1440 | * this may finaly trigger drbd_ldev_destroy. */ | 1564 | * this may finaly trigger drbd_ldev_destroy. */ |
| 1441 | put_ldev(mdev); | 1565 | put_ldev(mdev); |
| @@ -1459,6 +1583,19 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
| 1459 | if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) | 1583 | if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) |
| 1460 | drbd_send_state(mdev); | 1584 | drbd_send_state(mdev); |
| 1461 | 1585 | ||
| 1586 | /* This triggers bitmap writeout of potentially still unwritten pages | ||
| 1587 | * if the resync finished cleanly, or aborted because of peer disk | ||
| 1588 | * failure, or because of connection loss. | ||
| 1589 | * For resync aborted because of local disk failure, we cannot do | ||
| 1590 | * any bitmap writeout anymore. | ||
| 1591 | * No harm done if some bits change during this phase. | ||
| 1592 | */ | ||
| 1593 | if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) { | ||
| 1594 | drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, | ||
| 1595 | "write from resync_finished", BM_LOCKED_SET_ALLOWED); | ||
| 1596 | put_ldev(mdev); | ||
| 1597 | } | ||
| 1598 | |||
| 1462 | /* free tl_hash if we Got thawed and are C_STANDALONE */ | 1599 | /* free tl_hash if we Got thawed and are C_STANDALONE */ |
| 1463 | if (ns.conn == C_STANDALONE && !is_susp(ns) && mdev->tl_hash) | 1600 | if (ns.conn == C_STANDALONE && !is_susp(ns) && mdev->tl_hash) |
| 1464 | drbd_free_tl_hash(mdev); | 1601 | drbd_free_tl_hash(mdev); |
| @@ -1559,7 +1696,7 @@ int drbd_thread_start(struct drbd_thread *thi) | |||
| 1559 | if (!try_module_get(THIS_MODULE)) { | 1696 | if (!try_module_get(THIS_MODULE)) { |
| 1560 | dev_err(DEV, "Failed to get module reference in drbd_thread_start\n"); | 1697 | dev_err(DEV, "Failed to get module reference in drbd_thread_start\n"); |
| 1561 | spin_unlock_irqrestore(&thi->t_lock, flags); | 1698 | spin_unlock_irqrestore(&thi->t_lock, flags); |
| 1562 | return FALSE; | 1699 | return false; |
| 1563 | } | 1700 | } |
| 1564 | 1701 | ||
| 1565 | init_completion(&thi->stop); | 1702 | init_completion(&thi->stop); |
| @@ -1576,7 +1713,7 @@ int drbd_thread_start(struct drbd_thread *thi) | |||
| 1576 | dev_err(DEV, "Couldn't start thread\n"); | 1713 | dev_err(DEV, "Couldn't start thread\n"); |
| 1577 | 1714 | ||
| 1578 | module_put(THIS_MODULE); | 1715 | module_put(THIS_MODULE); |
| 1579 | return FALSE; | 1716 | return false; |
| 1580 | } | 1717 | } |
| 1581 | spin_lock_irqsave(&thi->t_lock, flags); | 1718 | spin_lock_irqsave(&thi->t_lock, flags); |
| 1582 | thi->task = nt; | 1719 | thi->task = nt; |
| @@ -1596,7 +1733,7 @@ int drbd_thread_start(struct drbd_thread *thi) | |||
| 1596 | break; | 1733 | break; |
| 1597 | } | 1734 | } |
| 1598 | 1735 | ||
| 1599 | return TRUE; | 1736 | return true; |
| 1600 | } | 1737 | } |
| 1601 | 1738 | ||
| 1602 | 1739 | ||
| @@ -1694,8 +1831,8 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, | |||
| 1694 | { | 1831 | { |
| 1695 | int sent, ok; | 1832 | int sent, ok; |
| 1696 | 1833 | ||
| 1697 | ERR_IF(!h) return FALSE; | 1834 | ERR_IF(!h) return false; |
| 1698 | ERR_IF(!size) return FALSE; | 1835 | ERR_IF(!size) return false; |
| 1699 | 1836 | ||
| 1700 | h->magic = BE_DRBD_MAGIC; | 1837 | h->magic = BE_DRBD_MAGIC; |
| 1701 | h->command = cpu_to_be16(cmd); | 1838 | h->command = cpu_to_be16(cmd); |
| @@ -1704,8 +1841,8 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, | |||
| 1704 | sent = drbd_send(mdev, sock, h, size, msg_flags); | 1841 | sent = drbd_send(mdev, sock, h, size, msg_flags); |
| 1705 | 1842 | ||
| 1706 | ok = (sent == size); | 1843 | ok = (sent == size); |
| 1707 | if (!ok) | 1844 | if (!ok && !signal_pending(current)) |
| 1708 | dev_err(DEV, "short sent %s size=%d sent=%d\n", | 1845 | dev_warn(DEV, "short sent %s size=%d sent=%d\n", |
| 1709 | cmdname(cmd), (int)size, sent); | 1846 | cmdname(cmd), (int)size, sent); |
| 1710 | return ok; | 1847 | return ok; |
| 1711 | } | 1848 | } |
| @@ -1840,7 +1977,7 @@ int drbd_send_protocol(struct drbd_conf *mdev) | |||
| 1840 | else { | 1977 | else { |
| 1841 | dev_err(DEV, "--dry-run is not supported by peer"); | 1978 | dev_err(DEV, "--dry-run is not supported by peer"); |
| 1842 | kfree(p); | 1979 | kfree(p); |
| 1843 | return 0; | 1980 | return -1; |
| 1844 | } | 1981 | } |
| 1845 | } | 1982 | } |
| 1846 | p->conn_flags = cpu_to_be32(cf); | 1983 | p->conn_flags = cpu_to_be32(cf); |
| @@ -1888,12 +2025,36 @@ int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev) | |||
| 1888 | return _drbd_send_uuids(mdev, 8); | 2025 | return _drbd_send_uuids(mdev, 8); |
| 1889 | } | 2026 | } |
| 1890 | 2027 | ||
| 2028 | void drbd_print_uuids(struct drbd_conf *mdev, const char *text) | ||
| 2029 | { | ||
| 2030 | if (get_ldev_if_state(mdev, D_NEGOTIATING)) { | ||
| 2031 | u64 *uuid = mdev->ldev->md.uuid; | ||
| 2032 | dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX\n", | ||
| 2033 | text, | ||
| 2034 | (unsigned long long)uuid[UI_CURRENT], | ||
| 2035 | (unsigned long long)uuid[UI_BITMAP], | ||
| 2036 | (unsigned long long)uuid[UI_HISTORY_START], | ||
| 2037 | (unsigned long long)uuid[UI_HISTORY_END]); | ||
| 2038 | put_ldev(mdev); | ||
| 2039 | } else { | ||
| 2040 | dev_info(DEV, "%s effective data uuid: %016llX\n", | ||
| 2041 | text, | ||
| 2042 | (unsigned long long)mdev->ed_uuid); | ||
| 2043 | } | ||
| 2044 | } | ||
| 1891 | 2045 | ||
| 1892 | int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val) | 2046 | int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev) |
| 1893 | { | 2047 | { |
| 1894 | struct p_rs_uuid p; | 2048 | struct p_rs_uuid p; |
| 2049 | u64 uuid; | ||
| 1895 | 2050 | ||
| 1896 | p.uuid = cpu_to_be64(val); | 2051 | D_ASSERT(mdev->state.disk == D_UP_TO_DATE); |
| 2052 | |||
| 2053 | uuid = mdev->ldev->md.uuid[UI_BITMAP] + UUID_NEW_BM_OFFSET; | ||
| 2054 | drbd_uuid_set(mdev, UI_BITMAP, uuid); | ||
| 2055 | drbd_print_uuids(mdev, "updated sync UUID"); | ||
| 2056 | drbd_md_sync(mdev); | ||
| 2057 | p.uuid = cpu_to_be64(uuid); | ||
| 1897 | 2058 | ||
| 1898 | return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID, | 2059 | return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID, |
| 1899 | (struct p_header80 *)&p, sizeof(p)); | 2060 | (struct p_header80 *)&p, sizeof(p)); |
| @@ -1921,7 +2082,7 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl | |||
| 1921 | p.d_size = cpu_to_be64(d_size); | 2082 | p.d_size = cpu_to_be64(d_size); |
| 1922 | p.u_size = cpu_to_be64(u_size); | 2083 | p.u_size = cpu_to_be64(u_size); |
| 1923 | p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev)); | 2084 | p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev)); |
| 1924 | p.max_segment_size = cpu_to_be32(queue_max_segment_size(mdev->rq_queue)); | 2085 | p.max_bio_size = cpu_to_be32(queue_max_hw_sectors(mdev->rq_queue) << 9); |
| 1925 | p.queue_order_type = cpu_to_be16(q_order_type); | 2086 | p.queue_order_type = cpu_to_be16(q_order_type); |
| 1926 | p.dds_flags = cpu_to_be16(flags); | 2087 | p.dds_flags = cpu_to_be16(flags); |
| 1927 | 2088 | ||
| @@ -1972,7 +2133,7 @@ int drbd_send_state_req(struct drbd_conf *mdev, | |||
| 1972 | (struct p_header80 *)&p, sizeof(p)); | 2133 | (struct p_header80 *)&p, sizeof(p)); |
| 1973 | } | 2134 | } |
| 1974 | 2135 | ||
| 1975 | int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode) | 2136 | int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode) |
| 1976 | { | 2137 | { |
| 1977 | struct p_req_state_reply p; | 2138 | struct p_req_state_reply p; |
| 1978 | 2139 | ||
| @@ -2076,9 +2237,15 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev, | |||
| 2076 | return len; | 2237 | return len; |
| 2077 | } | 2238 | } |
| 2078 | 2239 | ||
| 2079 | enum { OK, FAILED, DONE } | 2240 | /** |
| 2241 | * send_bitmap_rle_or_plain | ||
| 2242 | * | ||
| 2243 | * Return 0 when done, 1 when another iteration is needed, and a negative error | ||
| 2244 | * code upon failure. | ||
| 2245 | */ | ||
| 2246 | static int | ||
| 2080 | send_bitmap_rle_or_plain(struct drbd_conf *mdev, | 2247 | send_bitmap_rle_or_plain(struct drbd_conf *mdev, |
| 2081 | struct p_header80 *h, struct bm_xfer_ctx *c) | 2248 | struct p_header80 *h, struct bm_xfer_ctx *c) |
| 2082 | { | 2249 | { |
| 2083 | struct p_compressed_bm *p = (void*)h; | 2250 | struct p_compressed_bm *p = (void*)h; |
| 2084 | unsigned long num_words; | 2251 | unsigned long num_words; |
| @@ -2088,7 +2255,7 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, | |||
| 2088 | len = fill_bitmap_rle_bits(mdev, p, c); | 2255 | len = fill_bitmap_rle_bits(mdev, p, c); |
| 2089 | 2256 | ||
| 2090 | if (len < 0) | 2257 | if (len < 0) |
| 2091 | return FAILED; | 2258 | return -EIO; |
| 2092 | 2259 | ||
| 2093 | if (len) { | 2260 | if (len) { |
| 2094 | DCBP_set_code(p, RLE_VLI_Bits); | 2261 | DCBP_set_code(p, RLE_VLI_Bits); |
| @@ -2118,11 +2285,14 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, | |||
| 2118 | if (c->bit_offset > c->bm_bits) | 2285 | if (c->bit_offset > c->bm_bits) |
| 2119 | c->bit_offset = c->bm_bits; | 2286 | c->bit_offset = c->bm_bits; |
| 2120 | } | 2287 | } |
| 2121 | ok = ok ? ((len == 0) ? DONE : OK) : FAILED; | 2288 | if (ok) { |
| 2122 | 2289 | if (len == 0) { | |
| 2123 | if (ok == DONE) | 2290 | INFO_bm_xfer_stats(mdev, "send", c); |
| 2124 | INFO_bm_xfer_stats(mdev, "send", c); | 2291 | return 0; |
| 2125 | return ok; | 2292 | } else |
| 2293 | return 1; | ||
| 2294 | } | ||
| 2295 | return -EIO; | ||
| 2126 | } | 2296 | } |
| 2127 | 2297 | ||
| 2128 | /* See the comment at receive_bitmap() */ | 2298 | /* See the comment at receive_bitmap() */ |
| @@ -2130,16 +2300,16 @@ int _drbd_send_bitmap(struct drbd_conf *mdev) | |||
| 2130 | { | 2300 | { |
| 2131 | struct bm_xfer_ctx c; | 2301 | struct bm_xfer_ctx c; |
| 2132 | struct p_header80 *p; | 2302 | struct p_header80 *p; |
| 2133 | int ret; | 2303 | int err; |
| 2134 | 2304 | ||
| 2135 | ERR_IF(!mdev->bitmap) return FALSE; | 2305 | ERR_IF(!mdev->bitmap) return false; |
| 2136 | 2306 | ||
| 2137 | /* maybe we should use some per thread scratch page, | 2307 | /* maybe we should use some per thread scratch page, |
| 2138 | * and allocate that during initial device creation? */ | 2308 | * and allocate that during initial device creation? */ |
| 2139 | p = (struct p_header80 *) __get_free_page(GFP_NOIO); | 2309 | p = (struct p_header80 *) __get_free_page(GFP_NOIO); |
| 2140 | if (!p) { | 2310 | if (!p) { |
| 2141 | dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__); | 2311 | dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__); |
| 2142 | return FALSE; | 2312 | return false; |
| 2143 | } | 2313 | } |
| 2144 | 2314 | ||
| 2145 | if (get_ldev(mdev)) { | 2315 | if (get_ldev(mdev)) { |
| @@ -2165,11 +2335,11 @@ int _drbd_send_bitmap(struct drbd_conf *mdev) | |||
| 2165 | }; | 2335 | }; |
| 2166 | 2336 | ||
| 2167 | do { | 2337 | do { |
| 2168 | ret = send_bitmap_rle_or_plain(mdev, p, &c); | 2338 | err = send_bitmap_rle_or_plain(mdev, p, &c); |
| 2169 | } while (ret == OK); | 2339 | } while (err > 0); |
| 2170 | 2340 | ||
| 2171 | free_page((unsigned long) p); | 2341 | free_page((unsigned long) p); |
| 2172 | return (ret == DONE); | 2342 | return err == 0; |
| 2173 | } | 2343 | } |
| 2174 | 2344 | ||
| 2175 | int drbd_send_bitmap(struct drbd_conf *mdev) | 2345 | int drbd_send_bitmap(struct drbd_conf *mdev) |
| @@ -2192,7 +2362,7 @@ int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size) | |||
| 2192 | p.set_size = cpu_to_be32(set_size); | 2362 | p.set_size = cpu_to_be32(set_size); |
| 2193 | 2363 | ||
| 2194 | if (mdev->state.conn < C_CONNECTED) | 2364 | if (mdev->state.conn < C_CONNECTED) |
| 2195 | return FALSE; | 2365 | return false; |
| 2196 | ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK, | 2366 | ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK, |
| 2197 | (struct p_header80 *)&p, sizeof(p)); | 2367 | (struct p_header80 *)&p, sizeof(p)); |
| 2198 | return ok; | 2368 | return ok; |
| @@ -2220,7 +2390,7 @@ static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd, | |||
| 2220 | p.seq_num = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq)); | 2390 | p.seq_num = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq)); |
| 2221 | 2391 | ||
| 2222 | if (!mdev->meta.socket || mdev->state.conn < C_CONNECTED) | 2392 | if (!mdev->meta.socket || mdev->state.conn < C_CONNECTED) |
| 2223 | return FALSE; | 2393 | return false; |
| 2224 | ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd, | 2394 | ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd, |
| 2225 | (struct p_header80 *)&p, sizeof(p)); | 2395 | (struct p_header80 *)&p, sizeof(p)); |
| 2226 | return ok; | 2396 | return ok; |
| @@ -2326,8 +2496,8 @@ int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size) | |||
| 2326 | } | 2496 | } |
| 2327 | 2497 | ||
| 2328 | /* called on sndtimeo | 2498 | /* called on sndtimeo |
| 2329 | * returns FALSE if we should retry, | 2499 | * returns false if we should retry, |
| 2330 | * TRUE if we think connection is dead | 2500 | * true if we think connection is dead |
| 2331 | */ | 2501 | */ |
| 2332 | static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *sock) | 2502 | static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *sock) |
| 2333 | { | 2503 | { |
| @@ -2340,7 +2510,7 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket * | |||
| 2340 | || mdev->state.conn < C_CONNECTED; | 2510 | || mdev->state.conn < C_CONNECTED; |
| 2341 | 2511 | ||
| 2342 | if (drop_it) | 2512 | if (drop_it) |
| 2343 | return TRUE; | 2513 | return true; |
| 2344 | 2514 | ||
| 2345 | drop_it = !--mdev->ko_count; | 2515 | drop_it = !--mdev->ko_count; |
| 2346 | if (!drop_it) { | 2516 | if (!drop_it) { |
| @@ -2531,13 +2701,39 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) | |||
| 2531 | if (ok && dgs) { | 2701 | if (ok && dgs) { |
| 2532 | dgb = mdev->int_dig_out; | 2702 | dgb = mdev->int_dig_out; |
| 2533 | drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb); | 2703 | drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb); |
| 2534 | ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0); | 2704 | ok = dgs == drbd_send(mdev, mdev->data.socket, dgb, dgs, 0); |
| 2535 | } | 2705 | } |
| 2536 | if (ok) { | 2706 | if (ok) { |
| 2537 | if (mdev->net_conf->wire_protocol == DRBD_PROT_A) | 2707 | /* For protocol A, we have to memcpy the payload into |
| 2708 | * socket buffers, as we may complete right away | ||
| 2709 | * as soon as we handed it over to tcp, at which point the data | ||
| 2710 | * pages may become invalid. | ||
| 2711 | * | ||
| 2712 | * For data-integrity enabled, we copy it as well, so we can be | ||
| 2713 | * sure that even if the bio pages may still be modified, it | ||
| 2714 | * won't change the data on the wire, thus if the digest checks | ||
| 2715 | * out ok after sending on this side, but does not fit on the | ||
| 2716 | * receiving side, we sure have detected corruption elsewhere. | ||
| 2717 | */ | ||
| 2718 | if (mdev->net_conf->wire_protocol == DRBD_PROT_A || dgs) | ||
| 2538 | ok = _drbd_send_bio(mdev, req->master_bio); | 2719 | ok = _drbd_send_bio(mdev, req->master_bio); |
| 2539 | else | 2720 | else |
| 2540 | ok = _drbd_send_zc_bio(mdev, req->master_bio); | 2721 | ok = _drbd_send_zc_bio(mdev, req->master_bio); |
| 2722 | |||
| 2723 | /* double check digest, sometimes buffers have been modified in flight. */ | ||
| 2724 | if (dgs > 0 && dgs <= 64) { | ||
| 2725 | /* 64 byte, 512 bit, is the larges digest size | ||
| 2726 | * currently supported in kernel crypto. */ | ||
| 2727 | unsigned char digest[64]; | ||
| 2728 | drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, digest); | ||
| 2729 | if (memcmp(mdev->int_dig_out, digest, dgs)) { | ||
| 2730 | dev_warn(DEV, | ||
| 2731 | "Digest mismatch, buffer modified by upper layers during write: %llus +%u\n", | ||
| 2732 | (unsigned long long)req->sector, req->size); | ||
| 2733 | } | ||
| 2734 | } /* else if (dgs > 64) { | ||
| 2735 | ... Be noisy about digest too large ... | ||
| 2736 | } */ | ||
| 2541 | } | 2737 | } |
| 2542 | 2738 | ||
| 2543 | drbd_put_data_sock(mdev); | 2739 | drbd_put_data_sock(mdev); |
| @@ -2587,7 +2783,7 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, | |||
| 2587 | if (ok && dgs) { | 2783 | if (ok && dgs) { |
| 2588 | dgb = mdev->int_dig_out; | 2784 | dgb = mdev->int_dig_out; |
| 2589 | drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb); | 2785 | drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb); |
| 2590 | ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0); | 2786 | ok = dgs == drbd_send(mdev, mdev->data.socket, dgb, dgs, 0); |
| 2591 | } | 2787 | } |
| 2592 | if (ok) | 2788 | if (ok) |
| 2593 | ok = _drbd_send_zc_ee(mdev, e); | 2789 | ok = _drbd_send_zc_ee(mdev, e); |
| @@ -2597,6 +2793,16 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, | |||
| 2597 | return ok; | 2793 | return ok; |
| 2598 | } | 2794 | } |
| 2599 | 2795 | ||
| 2796 | int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req) | ||
| 2797 | { | ||
| 2798 | struct p_block_desc p; | ||
| 2799 | |||
| 2800 | p.sector = cpu_to_be64(req->sector); | ||
| 2801 | p.blksize = cpu_to_be32(req->size); | ||
| 2802 | |||
| 2803 | return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OUT_OF_SYNC, &p.head, sizeof(p)); | ||
| 2804 | } | ||
| 2805 | |||
| 2600 | /* | 2806 | /* |
| 2601 | drbd_send distinguishes two cases: | 2807 | drbd_send distinguishes two cases: |
| 2602 | 2808 | ||
| @@ -2770,6 +2976,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) | |||
| 2770 | atomic_set(&mdev->pp_in_use_by_net, 0); | 2976 | atomic_set(&mdev->pp_in_use_by_net, 0); |
| 2771 | atomic_set(&mdev->rs_sect_in, 0); | 2977 | atomic_set(&mdev->rs_sect_in, 0); |
| 2772 | atomic_set(&mdev->rs_sect_ev, 0); | 2978 | atomic_set(&mdev->rs_sect_ev, 0); |
| 2979 | atomic_set(&mdev->ap_in_flight, 0); | ||
| 2773 | 2980 | ||
| 2774 | mutex_init(&mdev->md_io_mutex); | 2981 | mutex_init(&mdev->md_io_mutex); |
| 2775 | mutex_init(&mdev->data.mutex); | 2982 | mutex_init(&mdev->data.mutex); |
| @@ -2798,19 +3005,27 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) | |||
| 2798 | INIT_LIST_HEAD(&mdev->unplug_work.list); | 3005 | INIT_LIST_HEAD(&mdev->unplug_work.list); |
| 2799 | INIT_LIST_HEAD(&mdev->go_diskless.list); | 3006 | INIT_LIST_HEAD(&mdev->go_diskless.list); |
| 2800 | INIT_LIST_HEAD(&mdev->md_sync_work.list); | 3007 | INIT_LIST_HEAD(&mdev->md_sync_work.list); |
| 3008 | INIT_LIST_HEAD(&mdev->start_resync_work.list); | ||
| 2801 | INIT_LIST_HEAD(&mdev->bm_io_work.w.list); | 3009 | INIT_LIST_HEAD(&mdev->bm_io_work.w.list); |
| 2802 | 3010 | ||
| 2803 | mdev->resync_work.cb = w_resync_inactive; | 3011 | mdev->resync_work.cb = w_resync_timer; |
| 2804 | mdev->unplug_work.cb = w_send_write_hint; | 3012 | mdev->unplug_work.cb = w_send_write_hint; |
| 2805 | mdev->go_diskless.cb = w_go_diskless; | 3013 | mdev->go_diskless.cb = w_go_diskless; |
| 2806 | mdev->md_sync_work.cb = w_md_sync; | 3014 | mdev->md_sync_work.cb = w_md_sync; |
| 2807 | mdev->bm_io_work.w.cb = w_bitmap_io; | 3015 | mdev->bm_io_work.w.cb = w_bitmap_io; |
| 3016 | mdev->start_resync_work.cb = w_start_resync; | ||
| 2808 | init_timer(&mdev->resync_timer); | 3017 | init_timer(&mdev->resync_timer); |
| 2809 | init_timer(&mdev->md_sync_timer); | 3018 | init_timer(&mdev->md_sync_timer); |
| 3019 | init_timer(&mdev->start_resync_timer); | ||
| 3020 | init_timer(&mdev->request_timer); | ||
| 2810 | mdev->resync_timer.function = resync_timer_fn; | 3021 | mdev->resync_timer.function = resync_timer_fn; |
| 2811 | mdev->resync_timer.data = (unsigned long) mdev; | 3022 | mdev->resync_timer.data = (unsigned long) mdev; |
| 2812 | mdev->md_sync_timer.function = md_sync_timer_fn; | 3023 | mdev->md_sync_timer.function = md_sync_timer_fn; |
| 2813 | mdev->md_sync_timer.data = (unsigned long) mdev; | 3024 | mdev->md_sync_timer.data = (unsigned long) mdev; |
| 3025 | mdev->start_resync_timer.function = start_resync_timer_fn; | ||
| 3026 | mdev->start_resync_timer.data = (unsigned long) mdev; | ||
| 3027 | mdev->request_timer.function = request_timer_fn; | ||
| 3028 | mdev->request_timer.data = (unsigned long) mdev; | ||
| 2814 | 3029 | ||
| 2815 | init_waitqueue_head(&mdev->misc_wait); | 3030 | init_waitqueue_head(&mdev->misc_wait); |
| 2816 | init_waitqueue_head(&mdev->state_wait); | 3031 | init_waitqueue_head(&mdev->state_wait); |
| @@ -2881,6 +3096,8 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev) | |||
| 2881 | D_ASSERT(list_empty(&mdev->resync_work.list)); | 3096 | D_ASSERT(list_empty(&mdev->resync_work.list)); |
| 2882 | D_ASSERT(list_empty(&mdev->unplug_work.list)); | 3097 | D_ASSERT(list_empty(&mdev->unplug_work.list)); |
| 2883 | D_ASSERT(list_empty(&mdev->go_diskless.list)); | 3098 | D_ASSERT(list_empty(&mdev->go_diskless.list)); |
| 3099 | |||
| 3100 | drbd_set_defaults(mdev); | ||
| 2884 | } | 3101 | } |
| 2885 | 3102 | ||
| 2886 | 3103 | ||
| @@ -2923,7 +3140,7 @@ static void drbd_destroy_mempools(void) | |||
| 2923 | static int drbd_create_mempools(void) | 3140 | static int drbd_create_mempools(void) |
| 2924 | { | 3141 | { |
| 2925 | struct page *page; | 3142 | struct page *page; |
| 2926 | const int number = (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE) * minor_count; | 3143 | const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count; |
| 2927 | int i; | 3144 | int i; |
| 2928 | 3145 | ||
| 2929 | /* prepare our caches and mempools */ | 3146 | /* prepare our caches and mempools */ |
| @@ -3087,11 +3304,20 @@ static void drbd_cleanup(void) | |||
| 3087 | 3304 | ||
| 3088 | unregister_reboot_notifier(&drbd_notifier); | 3305 | unregister_reboot_notifier(&drbd_notifier); |
| 3089 | 3306 | ||
| 3307 | /* first remove proc, | ||
| 3308 | * drbdsetup uses it's presence to detect | ||
| 3309 | * whether DRBD is loaded. | ||
| 3310 | * If we would get stuck in proc removal, | ||
| 3311 | * but have netlink already deregistered, | ||
| 3312 | * some drbdsetup commands may wait forever | ||
| 3313 | * for an answer. | ||
| 3314 | */ | ||
| 3315 | if (drbd_proc) | ||
| 3316 | remove_proc_entry("drbd", NULL); | ||
| 3317 | |||
| 3090 | drbd_nl_cleanup(); | 3318 | drbd_nl_cleanup(); |
| 3091 | 3319 | ||
| 3092 | if (minor_table) { | 3320 | if (minor_table) { |
| 3093 | if (drbd_proc) | ||
| 3094 | remove_proc_entry("drbd", NULL); | ||
| 3095 | i = minor_count; | 3321 | i = minor_count; |
| 3096 | while (i--) | 3322 | while (i--) |
| 3097 | drbd_delete_device(i); | 3323 | drbd_delete_device(i); |
| @@ -3119,7 +3345,7 @@ static int drbd_congested(void *congested_data, int bdi_bits) | |||
| 3119 | char reason = '-'; | 3345 | char reason = '-'; |
| 3120 | int r = 0; | 3346 | int r = 0; |
| 3121 | 3347 | ||
| 3122 | if (!__inc_ap_bio_cond(mdev)) { | 3348 | if (!may_inc_ap_bio(mdev)) { |
| 3123 | /* DRBD has frozen IO */ | 3349 | /* DRBD has frozen IO */ |
| 3124 | r = bdi_bits; | 3350 | r = bdi_bits; |
| 3125 | reason = 'd'; | 3351 | reason = 'd'; |
| @@ -3172,7 +3398,7 @@ struct drbd_conf *drbd_new_device(unsigned int minor) | |||
| 3172 | goto out_no_disk; | 3398 | goto out_no_disk; |
| 3173 | mdev->vdisk = disk; | 3399 | mdev->vdisk = disk; |
| 3174 | 3400 | ||
| 3175 | set_disk_ro(disk, TRUE); | 3401 | set_disk_ro(disk, true); |
| 3176 | 3402 | ||
| 3177 | disk->queue = q; | 3403 | disk->queue = q; |
| 3178 | disk->major = DRBD_MAJOR; | 3404 | disk->major = DRBD_MAJOR; |
| @@ -3188,8 +3414,8 @@ struct drbd_conf *drbd_new_device(unsigned int minor) | |||
| 3188 | q->backing_dev_info.congested_fn = drbd_congested; | 3414 | q->backing_dev_info.congested_fn = drbd_congested; |
| 3189 | q->backing_dev_info.congested_data = mdev; | 3415 | q->backing_dev_info.congested_data = mdev; |
| 3190 | 3416 | ||
| 3191 | blk_queue_make_request(q, drbd_make_request_26); | 3417 | blk_queue_make_request(q, drbd_make_request); |
| 3192 | blk_queue_max_segment_size(q, DRBD_MAX_SEGMENT_SIZE); | 3418 | blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE >> 9); |
| 3193 | blk_queue_bounce_limit(q, BLK_BOUNCE_ANY); | 3419 | blk_queue_bounce_limit(q, BLK_BOUNCE_ANY); |
| 3194 | blk_queue_merge_bvec(q, drbd_merge_bvec); | 3420 | blk_queue_merge_bvec(q, drbd_merge_bvec); |
| 3195 | q->queue_lock = &mdev->req_lock; | 3421 | q->queue_lock = &mdev->req_lock; |
| @@ -3251,6 +3477,7 @@ void drbd_free_mdev(struct drbd_conf *mdev) | |||
| 3251 | put_disk(mdev->vdisk); | 3477 | put_disk(mdev->vdisk); |
| 3252 | blk_cleanup_queue(mdev->rq_queue); | 3478 | blk_cleanup_queue(mdev->rq_queue); |
| 3253 | free_cpumask_var(mdev->cpu_mask); | 3479 | free_cpumask_var(mdev->cpu_mask); |
| 3480 | drbd_free_tl_hash(mdev); | ||
| 3254 | kfree(mdev); | 3481 | kfree(mdev); |
| 3255 | } | 3482 | } |
| 3256 | 3483 | ||
| @@ -3266,7 +3493,7 @@ int __init drbd_init(void) | |||
| 3266 | return -EINVAL; | 3493 | return -EINVAL; |
| 3267 | } | 3494 | } |
| 3268 | 3495 | ||
| 3269 | if (1 > minor_count || minor_count > 255) { | 3496 | if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) { |
| 3270 | printk(KERN_ERR | 3497 | printk(KERN_ERR |
| 3271 | "drbd: invalid minor_count (%d)\n", minor_count); | 3498 | "drbd: invalid minor_count (%d)\n", minor_count); |
| 3272 | #ifdef MODULE | 3499 | #ifdef MODULE |
| @@ -3448,7 +3675,7 @@ void drbd_md_sync(struct drbd_conf *mdev) | |||
| 3448 | if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { | 3675 | if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { |
| 3449 | /* this was a try anyways ... */ | 3676 | /* this was a try anyways ... */ |
| 3450 | dev_err(DEV, "meta data update failed!\n"); | 3677 | dev_err(DEV, "meta data update failed!\n"); |
| 3451 | drbd_chk_io_error(mdev, 1, TRUE); | 3678 | drbd_chk_io_error(mdev, 1, true); |
| 3452 | } | 3679 | } |
| 3453 | 3680 | ||
| 3454 | /* Update mdev->ldev->md.la_size_sect, | 3681 | /* Update mdev->ldev->md.la_size_sect, |
| @@ -3464,7 +3691,7 @@ void drbd_md_sync(struct drbd_conf *mdev) | |||
| 3464 | * @mdev: DRBD device. | 3691 | * @mdev: DRBD device. |
| 3465 | * @bdev: Device from which the meta data should be read in. | 3692 | * @bdev: Device from which the meta data should be read in. |
| 3466 | * | 3693 | * |
| 3467 | * Return 0 (NO_ERROR) on success, and an enum drbd_ret_codes in case | 3694 | * Return 0 (NO_ERROR) on success, and an enum drbd_ret_code in case |
| 3468 | * something goes wrong. Currently only: ERR_IO_MD_DISK, ERR_MD_INVALID. | 3695 | * something goes wrong. Currently only: ERR_IO_MD_DISK, ERR_MD_INVALID. |
| 3469 | */ | 3696 | */ |
| 3470 | int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) | 3697 | int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) |
| @@ -3534,28 +3761,6 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) | |||
| 3534 | return rv; | 3761 | return rv; |
| 3535 | } | 3762 | } |
| 3536 | 3763 | ||
| 3537 | static void debug_drbd_uuid(struct drbd_conf *mdev, enum drbd_uuid_index index) | ||
| 3538 | { | ||
| 3539 | static char *uuid_str[UI_EXTENDED_SIZE] = { | ||
| 3540 | [UI_CURRENT] = "CURRENT", | ||
| 3541 | [UI_BITMAP] = "BITMAP", | ||
| 3542 | [UI_HISTORY_START] = "HISTORY_START", | ||
| 3543 | [UI_HISTORY_END] = "HISTORY_END", | ||
| 3544 | [UI_SIZE] = "SIZE", | ||
| 3545 | [UI_FLAGS] = "FLAGS", | ||
| 3546 | }; | ||
| 3547 | |||
| 3548 | if (index >= UI_EXTENDED_SIZE) { | ||
| 3549 | dev_warn(DEV, " uuid_index >= EXTENDED_SIZE\n"); | ||
| 3550 | return; | ||
| 3551 | } | ||
| 3552 | |||
| 3553 | dynamic_dev_dbg(DEV, " uuid[%s] now %016llX\n", | ||
| 3554 | uuid_str[index], | ||
| 3555 | (unsigned long long)mdev->ldev->md.uuid[index]); | ||
| 3556 | } | ||
| 3557 | |||
| 3558 | |||
| 3559 | /** | 3764 | /** |
| 3560 | * drbd_md_mark_dirty() - Mark meta data super block as dirty | 3765 | * drbd_md_mark_dirty() - Mark meta data super block as dirty |
| 3561 | * @mdev: DRBD device. | 3766 | * @mdev: DRBD device. |
| @@ -3585,10 +3790,8 @@ static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local) | |||
| 3585 | { | 3790 | { |
| 3586 | int i; | 3791 | int i; |
| 3587 | 3792 | ||
| 3588 | for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++) { | 3793 | for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++) |
| 3589 | mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i]; | 3794 | mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i]; |
| 3590 | debug_drbd_uuid(mdev, i+1); | ||
| 3591 | } | ||
| 3592 | } | 3795 | } |
| 3593 | 3796 | ||
| 3594 | void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) | 3797 | void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) |
| @@ -3603,7 +3806,6 @@ void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) | |||
| 3603 | } | 3806 | } |
| 3604 | 3807 | ||
| 3605 | mdev->ldev->md.uuid[idx] = val; | 3808 | mdev->ldev->md.uuid[idx] = val; |
| 3606 | debug_drbd_uuid(mdev, idx); | ||
| 3607 | drbd_md_mark_dirty(mdev); | 3809 | drbd_md_mark_dirty(mdev); |
| 3608 | } | 3810 | } |
| 3609 | 3811 | ||
| @@ -3613,7 +3815,6 @@ void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) | |||
| 3613 | if (mdev->ldev->md.uuid[idx]) { | 3815 | if (mdev->ldev->md.uuid[idx]) { |
| 3614 | drbd_uuid_move_history(mdev); | 3816 | drbd_uuid_move_history(mdev); |
| 3615 | mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx]; | 3817 | mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx]; |
| 3616 | debug_drbd_uuid(mdev, UI_HISTORY_START); | ||
| 3617 | } | 3818 | } |
| 3618 | _drbd_uuid_set(mdev, idx, val); | 3819 | _drbd_uuid_set(mdev, idx, val); |
| 3619 | } | 3820 | } |
| @@ -3628,14 +3829,16 @@ void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) | |||
| 3628 | void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local) | 3829 | void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local) |
| 3629 | { | 3830 | { |
| 3630 | u64 val; | 3831 | u64 val; |
| 3832 | unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP]; | ||
| 3833 | |||
| 3834 | if (bm_uuid) | ||
| 3835 | dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid); | ||
| 3631 | 3836 | ||
| 3632 | dev_info(DEV, "Creating new current UUID\n"); | ||
| 3633 | D_ASSERT(mdev->ldev->md.uuid[UI_BITMAP] == 0); | ||
| 3634 | mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT]; | 3837 | mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT]; |
| 3635 | debug_drbd_uuid(mdev, UI_BITMAP); | ||
| 3636 | 3838 | ||
| 3637 | get_random_bytes(&val, sizeof(u64)); | 3839 | get_random_bytes(&val, sizeof(u64)); |
| 3638 | _drbd_uuid_set(mdev, UI_CURRENT, val); | 3840 | _drbd_uuid_set(mdev, UI_CURRENT, val); |
| 3841 | drbd_print_uuids(mdev, "new current UUID"); | ||
| 3639 | /* get it to stable storage _now_ */ | 3842 | /* get it to stable storage _now_ */ |
| 3640 | drbd_md_sync(mdev); | 3843 | drbd_md_sync(mdev); |
| 3641 | } | 3844 | } |
| @@ -3649,16 +3852,12 @@ void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local) | |||
| 3649 | drbd_uuid_move_history(mdev); | 3852 | drbd_uuid_move_history(mdev); |
| 3650 | mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP]; | 3853 | mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP]; |
| 3651 | mdev->ldev->md.uuid[UI_BITMAP] = 0; | 3854 | mdev->ldev->md.uuid[UI_BITMAP] = 0; |
| 3652 | debug_drbd_uuid(mdev, UI_HISTORY_START); | ||
| 3653 | debug_drbd_uuid(mdev, UI_BITMAP); | ||
| 3654 | } else { | 3855 | } else { |
| 3655 | if (mdev->ldev->md.uuid[UI_BITMAP]) | 3856 | unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP]; |
| 3656 | dev_warn(DEV, "bm UUID already set"); | 3857 | if (bm_uuid) |
| 3657 | 3858 | dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid); | |
| 3658 | mdev->ldev->md.uuid[UI_BITMAP] = val; | ||
| 3659 | mdev->ldev->md.uuid[UI_BITMAP] &= ~((u64)1); | ||
| 3660 | 3859 | ||
| 3661 | debug_drbd_uuid(mdev, UI_BITMAP); | 3860 | mdev->ldev->md.uuid[UI_BITMAP] = val & ~((u64)1); |
| 3662 | } | 3861 | } |
| 3663 | drbd_md_mark_dirty(mdev); | 3862 | drbd_md_mark_dirty(mdev); |
| 3664 | } | 3863 | } |
| @@ -3714,15 +3913,19 @@ int drbd_bmio_clear_n_write(struct drbd_conf *mdev) | |||
| 3714 | static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused) | 3913 | static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused) |
| 3715 | { | 3914 | { |
| 3716 | struct bm_io_work *work = container_of(w, struct bm_io_work, w); | 3915 | struct bm_io_work *work = container_of(w, struct bm_io_work, w); |
| 3717 | int rv; | 3916 | int rv = -EIO; |
| 3718 | 3917 | ||
| 3719 | D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0); | 3918 | D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0); |
| 3720 | 3919 | ||
| 3721 | drbd_bm_lock(mdev, work->why); | 3920 | if (get_ldev(mdev)) { |
| 3722 | rv = work->io_fn(mdev); | 3921 | drbd_bm_lock(mdev, work->why, work->flags); |
| 3723 | drbd_bm_unlock(mdev); | 3922 | rv = work->io_fn(mdev); |
| 3923 | drbd_bm_unlock(mdev); | ||
| 3924 | put_ldev(mdev); | ||
| 3925 | } | ||
| 3724 | 3926 | ||
| 3725 | clear_bit(BITMAP_IO, &mdev->flags); | 3927 | clear_bit(BITMAP_IO, &mdev->flags); |
| 3928 | smp_mb__after_clear_bit(); | ||
| 3726 | wake_up(&mdev->misc_wait); | 3929 | wake_up(&mdev->misc_wait); |
| 3727 | 3930 | ||
| 3728 | if (work->done) | 3931 | if (work->done) |
| @@ -3730,6 +3933,7 @@ static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused) | |||
| 3730 | 3933 | ||
| 3731 | clear_bit(BITMAP_IO_QUEUED, &mdev->flags); | 3934 | clear_bit(BITMAP_IO_QUEUED, &mdev->flags); |
| 3732 | work->why = NULL; | 3935 | work->why = NULL; |
| 3936 | work->flags = 0; | ||
| 3733 | 3937 | ||
| 3734 | return 1; | 3938 | return 1; |
| 3735 | } | 3939 | } |
| @@ -3784,7 +3988,7 @@ void drbd_go_diskless(struct drbd_conf *mdev) | |||
| 3784 | void drbd_queue_bitmap_io(struct drbd_conf *mdev, | 3988 | void drbd_queue_bitmap_io(struct drbd_conf *mdev, |
| 3785 | int (*io_fn)(struct drbd_conf *), | 3989 | int (*io_fn)(struct drbd_conf *), |
| 3786 | void (*done)(struct drbd_conf *, int), | 3990 | void (*done)(struct drbd_conf *, int), |
| 3787 | char *why) | 3991 | char *why, enum bm_flag flags) |
| 3788 | { | 3992 | { |
| 3789 | D_ASSERT(current == mdev->worker.task); | 3993 | D_ASSERT(current == mdev->worker.task); |
| 3790 | 3994 | ||
| @@ -3798,15 +4002,15 @@ void drbd_queue_bitmap_io(struct drbd_conf *mdev, | |||
| 3798 | mdev->bm_io_work.io_fn = io_fn; | 4002 | mdev->bm_io_work.io_fn = io_fn; |
| 3799 | mdev->bm_io_work.done = done; | 4003 | mdev->bm_io_work.done = done; |
| 3800 | mdev->bm_io_work.why = why; | 4004 | mdev->bm_io_work.why = why; |
| 4005 | mdev->bm_io_work.flags = flags; | ||
| 3801 | 4006 | ||
| 4007 | spin_lock_irq(&mdev->req_lock); | ||
| 3802 | set_bit(BITMAP_IO, &mdev->flags); | 4008 | set_bit(BITMAP_IO, &mdev->flags); |
| 3803 | if (atomic_read(&mdev->ap_bio_cnt) == 0) { | 4009 | if (atomic_read(&mdev->ap_bio_cnt) == 0) { |
| 3804 | if (list_empty(&mdev->bm_io_work.w.list)) { | 4010 | if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags)) |
| 3805 | set_bit(BITMAP_IO_QUEUED, &mdev->flags); | ||
| 3806 | drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w); | 4011 | drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w); |
| 3807 | } else | ||
| 3808 | dev_err(DEV, "FIXME avoided double queuing bm_io_work\n"); | ||
| 3809 | } | 4012 | } |
| 4013 | spin_unlock_irq(&mdev->req_lock); | ||
| 3810 | } | 4014 | } |
| 3811 | 4015 | ||
| 3812 | /** | 4016 | /** |
| @@ -3818,19 +4022,22 @@ void drbd_queue_bitmap_io(struct drbd_conf *mdev, | |||
| 3818 | * freezes application IO while that the actual IO operations runs. This | 4022 | * freezes application IO while that the actual IO operations runs. This |
| 3819 | * functions MAY NOT be called from worker context. | 4023 | * functions MAY NOT be called from worker context. |
| 3820 | */ | 4024 | */ |
| 3821 | int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why) | 4025 | int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), |
| 4026 | char *why, enum bm_flag flags) | ||
| 3822 | { | 4027 | { |
| 3823 | int rv; | 4028 | int rv; |
| 3824 | 4029 | ||
| 3825 | D_ASSERT(current != mdev->worker.task); | 4030 | D_ASSERT(current != mdev->worker.task); |
| 3826 | 4031 | ||
| 3827 | drbd_suspend_io(mdev); | 4032 | if ((flags & BM_LOCKED_SET_ALLOWED) == 0) |
| 4033 | drbd_suspend_io(mdev); | ||
| 3828 | 4034 | ||
| 3829 | drbd_bm_lock(mdev, why); | 4035 | drbd_bm_lock(mdev, why, flags); |
| 3830 | rv = io_fn(mdev); | 4036 | rv = io_fn(mdev); |
| 3831 | drbd_bm_unlock(mdev); | 4037 | drbd_bm_unlock(mdev); |
| 3832 | 4038 | ||
| 3833 | drbd_resume_io(mdev); | 4039 | if ((flags & BM_LOCKED_SET_ALLOWED) == 0) |
| 4040 | drbd_resume_io(mdev); | ||
| 3834 | 4041 | ||
| 3835 | return rv; | 4042 | return rv; |
| 3836 | } | 4043 | } |
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index fe81c851ca88..03b29f78a37d 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c | |||
| @@ -288,10 +288,11 @@ void drbd_try_outdate_peer_async(struct drbd_conf *mdev) | |||
| 288 | dev_err(DEV, "out of mem, failed to invoke fence-peer helper\n"); | 288 | dev_err(DEV, "out of mem, failed to invoke fence-peer helper\n"); |
| 289 | } | 289 | } |
| 290 | 290 | ||
| 291 | int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) | 291 | enum drbd_state_rv |
| 292 | drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) | ||
| 292 | { | 293 | { |
| 293 | const int max_tries = 4; | 294 | const int max_tries = 4; |
| 294 | int r = 0; | 295 | enum drbd_state_rv rv = SS_UNKNOWN_ERROR; |
| 295 | int try = 0; | 296 | int try = 0; |
| 296 | int forced = 0; | 297 | int forced = 0; |
| 297 | union drbd_state mask, val; | 298 | union drbd_state mask, val; |
| @@ -306,17 +307,17 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) | |||
| 306 | val.i = 0; val.role = new_role; | 307 | val.i = 0; val.role = new_role; |
| 307 | 308 | ||
| 308 | while (try++ < max_tries) { | 309 | while (try++ < max_tries) { |
| 309 | r = _drbd_request_state(mdev, mask, val, CS_WAIT_COMPLETE); | 310 | rv = _drbd_request_state(mdev, mask, val, CS_WAIT_COMPLETE); |
| 310 | 311 | ||
| 311 | /* in case we first succeeded to outdate, | 312 | /* in case we first succeeded to outdate, |
| 312 | * but now suddenly could establish a connection */ | 313 | * but now suddenly could establish a connection */ |
| 313 | if (r == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) { | 314 | if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) { |
| 314 | val.pdsk = 0; | 315 | val.pdsk = 0; |
| 315 | mask.pdsk = 0; | 316 | mask.pdsk = 0; |
| 316 | continue; | 317 | continue; |
| 317 | } | 318 | } |
| 318 | 319 | ||
| 319 | if (r == SS_NO_UP_TO_DATE_DISK && force && | 320 | if (rv == SS_NO_UP_TO_DATE_DISK && force && |
| 320 | (mdev->state.disk < D_UP_TO_DATE && | 321 | (mdev->state.disk < D_UP_TO_DATE && |
| 321 | mdev->state.disk >= D_INCONSISTENT)) { | 322 | mdev->state.disk >= D_INCONSISTENT)) { |
| 322 | mask.disk = D_MASK; | 323 | mask.disk = D_MASK; |
| @@ -325,7 +326,7 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) | |||
| 325 | continue; | 326 | continue; |
| 326 | } | 327 | } |
| 327 | 328 | ||
| 328 | if (r == SS_NO_UP_TO_DATE_DISK && | 329 | if (rv == SS_NO_UP_TO_DATE_DISK && |
| 329 | mdev->state.disk == D_CONSISTENT && mask.pdsk == 0) { | 330 | mdev->state.disk == D_CONSISTENT && mask.pdsk == 0) { |
| 330 | D_ASSERT(mdev->state.pdsk == D_UNKNOWN); | 331 | D_ASSERT(mdev->state.pdsk == D_UNKNOWN); |
| 331 | nps = drbd_try_outdate_peer(mdev); | 332 | nps = drbd_try_outdate_peer(mdev); |
| @@ -341,9 +342,9 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) | |||
| 341 | continue; | 342 | continue; |
| 342 | } | 343 | } |
| 343 | 344 | ||
| 344 | if (r == SS_NOTHING_TO_DO) | 345 | if (rv == SS_NOTHING_TO_DO) |
| 345 | goto fail; | 346 | goto fail; |
| 346 | if (r == SS_PRIMARY_NOP && mask.pdsk == 0) { | 347 | if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) { |
| 347 | nps = drbd_try_outdate_peer(mdev); | 348 | nps = drbd_try_outdate_peer(mdev); |
| 348 | 349 | ||
| 349 | if (force && nps > D_OUTDATED) { | 350 | if (force && nps > D_OUTDATED) { |
| @@ -356,25 +357,24 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) | |||
| 356 | 357 | ||
| 357 | continue; | 358 | continue; |
| 358 | } | 359 | } |
| 359 | if (r == SS_TWO_PRIMARIES) { | 360 | if (rv == SS_TWO_PRIMARIES) { |
| 360 | /* Maybe the peer is detected as dead very soon... | 361 | /* Maybe the peer is detected as dead very soon... |
| 361 | retry at most once more in this case. */ | 362 | retry at most once more in this case. */ |
| 362 | __set_current_state(TASK_INTERRUPTIBLE); | 363 | schedule_timeout_interruptible((mdev->net_conf->ping_timeo+1)*HZ/10); |
| 363 | schedule_timeout((mdev->net_conf->ping_timeo+1)*HZ/10); | ||
| 364 | if (try < max_tries) | 364 | if (try < max_tries) |
| 365 | try = max_tries - 1; | 365 | try = max_tries - 1; |
| 366 | continue; | 366 | continue; |
| 367 | } | 367 | } |
| 368 | if (r < SS_SUCCESS) { | 368 | if (rv < SS_SUCCESS) { |
| 369 | r = _drbd_request_state(mdev, mask, val, | 369 | rv = _drbd_request_state(mdev, mask, val, |
| 370 | CS_VERBOSE + CS_WAIT_COMPLETE); | 370 | CS_VERBOSE + CS_WAIT_COMPLETE); |
| 371 | if (r < SS_SUCCESS) | 371 | if (rv < SS_SUCCESS) |
| 372 | goto fail; | 372 | goto fail; |
| 373 | } | 373 | } |
| 374 | break; | 374 | break; |
| 375 | } | 375 | } |
| 376 | 376 | ||
| 377 | if (r < SS_SUCCESS) | 377 | if (rv < SS_SUCCESS) |
| 378 | goto fail; | 378 | goto fail; |
| 379 | 379 | ||
| 380 | if (forced) | 380 | if (forced) |
| @@ -384,7 +384,7 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) | |||
| 384 | wait_event(mdev->misc_wait, atomic_read(&mdev->ap_pending_cnt) == 0); | 384 | wait_event(mdev->misc_wait, atomic_read(&mdev->ap_pending_cnt) == 0); |
| 385 | 385 | ||
| 386 | if (new_role == R_SECONDARY) { | 386 | if (new_role == R_SECONDARY) { |
| 387 | set_disk_ro(mdev->vdisk, TRUE); | 387 | set_disk_ro(mdev->vdisk, true); |
| 388 | if (get_ldev(mdev)) { | 388 | if (get_ldev(mdev)) { |
| 389 | mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1; | 389 | mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1; |
| 390 | put_ldev(mdev); | 390 | put_ldev(mdev); |
| @@ -394,7 +394,7 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) | |||
| 394 | mdev->net_conf->want_lose = 0; | 394 | mdev->net_conf->want_lose = 0; |
| 395 | put_net_conf(mdev); | 395 | put_net_conf(mdev); |
| 396 | } | 396 | } |
| 397 | set_disk_ro(mdev->vdisk, FALSE); | 397 | set_disk_ro(mdev->vdisk, false); |
| 398 | if (get_ldev(mdev)) { | 398 | if (get_ldev(mdev)) { |
| 399 | if (((mdev->state.conn < C_CONNECTED || | 399 | if (((mdev->state.conn < C_CONNECTED || |
| 400 | mdev->state.pdsk <= D_FAILED) | 400 | mdev->state.pdsk <= D_FAILED) |
| @@ -406,10 +406,8 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) | |||
| 406 | } | 406 | } |
| 407 | } | 407 | } |
| 408 | 408 | ||
| 409 | if ((new_role == R_SECONDARY) && get_ldev(mdev)) { | 409 | /* writeout of activity log covered areas of the bitmap |
| 410 | drbd_al_to_on_disk_bm(mdev); | 410 | * to stable storage done in after state change already */ |
| 411 | put_ldev(mdev); | ||
| 412 | } | ||
| 413 | 411 | ||
| 414 | if (mdev->state.conn >= C_WF_REPORT_PARAMS) { | 412 | if (mdev->state.conn >= C_WF_REPORT_PARAMS) { |
| 415 | /* if this was forced, we should consider sync */ | 413 | /* if this was forced, we should consider sync */ |
| @@ -423,7 +421,7 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) | |||
| 423 | kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); | 421 | kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); |
| 424 | fail: | 422 | fail: |
| 425 | mutex_unlock(&mdev->state_mutex); | 423 | mutex_unlock(&mdev->state_mutex); |
| 426 | return r; | 424 | return rv; |
| 427 | } | 425 | } |
| 428 | 426 | ||
| 429 | static struct drbd_conf *ensure_mdev(int minor, int create) | 427 | static struct drbd_conf *ensure_mdev(int minor, int create) |
| @@ -528,17 +526,19 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev, | |||
| 528 | } | 526 | } |
| 529 | } | 527 | } |
| 530 | 528 | ||
| 529 | /* input size is expected to be in KB */ | ||
| 531 | char *ppsize(char *buf, unsigned long long size) | 530 | char *ppsize(char *buf, unsigned long long size) |
| 532 | { | 531 | { |
| 533 | /* Needs 9 bytes at max. */ | 532 | /* Needs 9 bytes at max including trailing NUL: |
| 533 | * -1ULL ==> "16384 EB" */ | ||
| 534 | static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' }; | 534 | static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' }; |
| 535 | int base = 0; | 535 | int base = 0; |
| 536 | while (size >= 10000) { | 536 | while (size >= 10000 && base < sizeof(units)-1) { |
| 537 | /* shift + round */ | 537 | /* shift + round */ |
| 538 | size = (size >> 10) + !!(size & (1<<9)); | 538 | size = (size >> 10) + !!(size & (1<<9)); |
| 539 | base++; | 539 | base++; |
| 540 | } | 540 | } |
| 541 | sprintf(buf, "%lu %cB", (long)size, units[base]); | 541 | sprintf(buf, "%u %cB", (unsigned)size, units[base]); |
| 542 | 542 | ||
| 543 | return buf; | 543 | return buf; |
| 544 | } | 544 | } |
| @@ -642,11 +642,19 @@ enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev, enum dds_ | |||
| 642 | || prev_size != mdev->ldev->md.md_size_sect; | 642 | || prev_size != mdev->ldev->md.md_size_sect; |
| 643 | 643 | ||
| 644 | if (la_size_changed || md_moved) { | 644 | if (la_size_changed || md_moved) { |
| 645 | int err; | ||
| 646 | |||
| 645 | drbd_al_shrink(mdev); /* All extents inactive. */ | 647 | drbd_al_shrink(mdev); /* All extents inactive. */ |
| 646 | dev_info(DEV, "Writing the whole bitmap, %s\n", | 648 | dev_info(DEV, "Writing the whole bitmap, %s\n", |
| 647 | la_size_changed && md_moved ? "size changed and md moved" : | 649 | la_size_changed && md_moved ? "size changed and md moved" : |
| 648 | la_size_changed ? "size changed" : "md moved"); | 650 | la_size_changed ? "size changed" : "md moved"); |
| 649 | rv = drbd_bitmap_io(mdev, &drbd_bm_write, "size changed"); /* does drbd_resume_io() ! */ | 651 | /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */ |
| 652 | err = drbd_bitmap_io(mdev, &drbd_bm_write, | ||
| 653 | "size changed", BM_LOCKED_MASK); | ||
| 654 | if (err) { | ||
| 655 | rv = dev_size_error; | ||
| 656 | goto out; | ||
| 657 | } | ||
| 650 | drbd_md_mark_dirty(mdev); | 658 | drbd_md_mark_dirty(mdev); |
| 651 | } | 659 | } |
| 652 | 660 | ||
| @@ -765,22 +773,21 @@ static int drbd_check_al_size(struct drbd_conf *mdev) | |||
| 765 | return 0; | 773 | return 0; |
| 766 | } | 774 | } |
| 767 | 775 | ||
| 768 | void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __must_hold(local) | 776 | void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size) __must_hold(local) |
| 769 | { | 777 | { |
| 770 | struct request_queue * const q = mdev->rq_queue; | 778 | struct request_queue * const q = mdev->rq_queue; |
| 771 | struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue; | 779 | struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue; |
| 772 | int max_segments = mdev->ldev->dc.max_bio_bvecs; | 780 | int max_segments = mdev->ldev->dc.max_bio_bvecs; |
| 781 | int max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9); | ||
| 773 | 782 | ||
| 774 | max_seg_s = min(queue_max_sectors(b) * queue_logical_block_size(b), max_seg_s); | ||
| 775 | |||
| 776 | blk_queue_max_hw_sectors(q, max_seg_s >> 9); | ||
| 777 | blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS); | ||
| 778 | blk_queue_max_segment_size(q, max_seg_s); | ||
| 779 | blk_queue_logical_block_size(q, 512); | 783 | blk_queue_logical_block_size(q, 512); |
| 780 | blk_queue_segment_boundary(q, PAGE_SIZE-1); | 784 | blk_queue_max_hw_sectors(q, max_hw_sectors); |
| 781 | blk_stack_limits(&q->limits, &b->limits, 0); | 785 | /* This is the workaround for "bio would need to, but cannot, be split" */ |
| 786 | blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS); | ||
| 787 | blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1); | ||
| 788 | blk_queue_stack_limits(q, b); | ||
| 782 | 789 | ||
| 783 | dev_info(DEV, "max_segment_size ( = BIO size ) = %u\n", queue_max_segment_size(q)); | 790 | dev_info(DEV, "max BIO size = %u\n", queue_max_hw_sectors(q) << 9); |
| 784 | 791 | ||
| 785 | if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) { | 792 | if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) { |
| 786 | dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n", | 793 | dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n", |
| @@ -850,7 +857,7 @@ static void drbd_suspend_al(struct drbd_conf *mdev) | |||
| 850 | static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, | 857 | static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, |
| 851 | struct drbd_nl_cfg_reply *reply) | 858 | struct drbd_nl_cfg_reply *reply) |
| 852 | { | 859 | { |
| 853 | enum drbd_ret_codes retcode; | 860 | enum drbd_ret_code retcode; |
| 854 | enum determine_dev_size dd; | 861 | enum determine_dev_size dd; |
| 855 | sector_t max_possible_sectors; | 862 | sector_t max_possible_sectors; |
| 856 | sector_t min_md_device_sectors; | 863 | sector_t min_md_device_sectors; |
| @@ -858,8 +865,8 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp | |||
| 858 | struct block_device *bdev; | 865 | struct block_device *bdev; |
| 859 | struct lru_cache *resync_lru = NULL; | 866 | struct lru_cache *resync_lru = NULL; |
| 860 | union drbd_state ns, os; | 867 | union drbd_state ns, os; |
| 861 | unsigned int max_seg_s; | 868 | unsigned int max_bio_size; |
| 862 | int rv; | 869 | enum drbd_state_rv rv; |
| 863 | int cp_discovered = 0; | 870 | int cp_discovered = 0; |
| 864 | int logical_block_size; | 871 | int logical_block_size; |
| 865 | 872 | ||
| @@ -1005,9 +1012,10 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp | |||
| 1005 | /* and for any other previously queued work */ | 1012 | /* and for any other previously queued work */ |
| 1006 | drbd_flush_workqueue(mdev); | 1013 | drbd_flush_workqueue(mdev); |
| 1007 | 1014 | ||
| 1008 | retcode = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE); | 1015 | rv = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE); |
| 1016 | retcode = rv; /* FIXME: Type mismatch. */ | ||
| 1009 | drbd_resume_io(mdev); | 1017 | drbd_resume_io(mdev); |
| 1010 | if (retcode < SS_SUCCESS) | 1018 | if (rv < SS_SUCCESS) |
| 1011 | goto fail; | 1019 | goto fail; |
| 1012 | 1020 | ||
| 1013 | if (!get_ldev_if_state(mdev, D_ATTACHING)) | 1021 | if (!get_ldev_if_state(mdev, D_ATTACHING)) |
| @@ -1109,20 +1117,20 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp | |||
| 1109 | mdev->read_cnt = 0; | 1117 | mdev->read_cnt = 0; |
| 1110 | mdev->writ_cnt = 0; | 1118 | mdev->writ_cnt = 0; |
| 1111 | 1119 | ||
| 1112 | max_seg_s = DRBD_MAX_SEGMENT_SIZE; | 1120 | max_bio_size = DRBD_MAX_BIO_SIZE; |
| 1113 | if (mdev->state.conn == C_CONNECTED) { | 1121 | if (mdev->state.conn == C_CONNECTED) { |
| 1114 | /* We are Primary, Connected, and now attach a new local | 1122 | /* We are Primary, Connected, and now attach a new local |
| 1115 | * backing store. We must not increase the user visible maximum | 1123 | * backing store. We must not increase the user visible maximum |
| 1116 | * bio size on this device to something the peer may not be | 1124 | * bio size on this device to something the peer may not be |
| 1117 | * able to handle. */ | 1125 | * able to handle. */ |
| 1118 | if (mdev->agreed_pro_version < 94) | 1126 | if (mdev->agreed_pro_version < 94) |
| 1119 | max_seg_s = queue_max_segment_size(mdev->rq_queue); | 1127 | max_bio_size = queue_max_hw_sectors(mdev->rq_queue) << 9; |
| 1120 | else if (mdev->agreed_pro_version == 94) | 1128 | else if (mdev->agreed_pro_version == 94) |
| 1121 | max_seg_s = DRBD_MAX_SIZE_H80_PACKET; | 1129 | max_bio_size = DRBD_MAX_SIZE_H80_PACKET; |
| 1122 | /* else: drbd 8.3.9 and later, stay with default */ | 1130 | /* else: drbd 8.3.9 and later, stay with default */ |
| 1123 | } | 1131 | } |
| 1124 | 1132 | ||
| 1125 | drbd_setup_queue_param(mdev, max_seg_s); | 1133 | drbd_setup_queue_param(mdev, max_bio_size); |
| 1126 | 1134 | ||
| 1127 | /* If I am currently not R_PRIMARY, | 1135 | /* If I am currently not R_PRIMARY, |
| 1128 | * but meta data primary indicator is set, | 1136 | * but meta data primary indicator is set, |
| @@ -1154,12 +1162,14 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp | |||
| 1154 | if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) { | 1162 | if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) { |
| 1155 | dev_info(DEV, "Assuming that all blocks are out of sync " | 1163 | dev_info(DEV, "Assuming that all blocks are out of sync " |
| 1156 | "(aka FullSync)\n"); | 1164 | "(aka FullSync)\n"); |
| 1157 | if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from attaching")) { | 1165 | if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, |
| 1166 | "set_n_write from attaching", BM_LOCKED_MASK)) { | ||
| 1158 | retcode = ERR_IO_MD_DISK; | 1167 | retcode = ERR_IO_MD_DISK; |
| 1159 | goto force_diskless_dec; | 1168 | goto force_diskless_dec; |
| 1160 | } | 1169 | } |
| 1161 | } else { | 1170 | } else { |
| 1162 | if (drbd_bitmap_io(mdev, &drbd_bm_read, "read from attaching") < 0) { | 1171 | if (drbd_bitmap_io(mdev, &drbd_bm_read, |
| 1172 | "read from attaching", BM_LOCKED_MASK) < 0) { | ||
| 1163 | retcode = ERR_IO_MD_DISK; | 1173 | retcode = ERR_IO_MD_DISK; |
| 1164 | goto force_diskless_dec; | 1174 | goto force_diskless_dec; |
| 1165 | } | 1175 | } |
| @@ -1167,7 +1177,11 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp | |||
| 1167 | 1177 | ||
| 1168 | if (cp_discovered) { | 1178 | if (cp_discovered) { |
| 1169 | drbd_al_apply_to_bm(mdev); | 1179 | drbd_al_apply_to_bm(mdev); |
| 1170 | drbd_al_to_on_disk_bm(mdev); | 1180 | if (drbd_bitmap_io(mdev, &drbd_bm_write, |
| 1181 | "crashed primary apply AL", BM_LOCKED_MASK)) { | ||
| 1182 | retcode = ERR_IO_MD_DISK; | ||
| 1183 | goto force_diskless_dec; | ||
| 1184 | } | ||
| 1171 | } | 1185 | } |
| 1172 | 1186 | ||
| 1173 | if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev)) | 1187 | if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev)) |
| @@ -1279,7 +1293,7 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, | |||
| 1279 | struct drbd_nl_cfg_reply *reply) | 1293 | struct drbd_nl_cfg_reply *reply) |
| 1280 | { | 1294 | { |
| 1281 | int i, ns; | 1295 | int i, ns; |
| 1282 | enum drbd_ret_codes retcode; | 1296 | enum drbd_ret_code retcode; |
| 1283 | struct net_conf *new_conf = NULL; | 1297 | struct net_conf *new_conf = NULL; |
| 1284 | struct crypto_hash *tfm = NULL; | 1298 | struct crypto_hash *tfm = NULL; |
| 1285 | struct crypto_hash *integrity_w_tfm = NULL; | 1299 | struct crypto_hash *integrity_w_tfm = NULL; |
| @@ -1324,6 +1338,8 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, | |||
| 1324 | new_conf->wire_protocol = DRBD_PROT_C; | 1338 | new_conf->wire_protocol = DRBD_PROT_C; |
| 1325 | new_conf->ping_timeo = DRBD_PING_TIMEO_DEF; | 1339 | new_conf->ping_timeo = DRBD_PING_TIMEO_DEF; |
| 1326 | new_conf->rr_conflict = DRBD_RR_CONFLICT_DEF; | 1340 | new_conf->rr_conflict = DRBD_RR_CONFLICT_DEF; |
| 1341 | new_conf->on_congestion = DRBD_ON_CONGESTION_DEF; | ||
| 1342 | new_conf->cong_extents = DRBD_CONG_EXTENTS_DEF; | ||
| 1327 | 1343 | ||
| 1328 | if (!net_conf_from_tags(mdev, nlp->tag_list, new_conf)) { | 1344 | if (!net_conf_from_tags(mdev, nlp->tag_list, new_conf)) { |
| 1329 | retcode = ERR_MANDATORY_TAG; | 1345 | retcode = ERR_MANDATORY_TAG; |
| @@ -1345,6 +1361,11 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, | |||
| 1345 | } | 1361 | } |
| 1346 | } | 1362 | } |
| 1347 | 1363 | ||
| 1364 | if (new_conf->on_congestion != OC_BLOCK && new_conf->wire_protocol != DRBD_PROT_A) { | ||
| 1365 | retcode = ERR_CONG_NOT_PROTO_A; | ||
| 1366 | goto fail; | ||
| 1367 | } | ||
| 1368 | |||
| 1348 | if (mdev->state.role == R_PRIMARY && new_conf->want_lose) { | 1369 | if (mdev->state.role == R_PRIMARY && new_conf->want_lose) { |
| 1349 | retcode = ERR_DISCARD; | 1370 | retcode = ERR_DISCARD; |
| 1350 | goto fail; | 1371 | goto fail; |
| @@ -1525,6 +1546,21 @@ static int drbd_nl_disconnect(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl | |||
| 1525 | struct drbd_nl_cfg_reply *reply) | 1546 | struct drbd_nl_cfg_reply *reply) |
| 1526 | { | 1547 | { |
| 1527 | int retcode; | 1548 | int retcode; |
| 1549 | struct disconnect dc; | ||
| 1550 | |||
| 1551 | memset(&dc, 0, sizeof(struct disconnect)); | ||
| 1552 | if (!disconnect_from_tags(mdev, nlp->tag_list, &dc)) { | ||
| 1553 | retcode = ERR_MANDATORY_TAG; | ||
| 1554 | goto fail; | ||
| 1555 | } | ||
| 1556 | |||
| 1557 | if (dc.force) { | ||
| 1558 | spin_lock_irq(&mdev->req_lock); | ||
| 1559 | if (mdev->state.conn >= C_WF_CONNECTION) | ||
| 1560 | _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), CS_HARD, NULL); | ||
| 1561 | spin_unlock_irq(&mdev->req_lock); | ||
| 1562 | goto done; | ||
| 1563 | } | ||
| 1528 | 1564 | ||
| 1529 | retcode = _drbd_request_state(mdev, NS(conn, C_DISCONNECTING), CS_ORDERED); | 1565 | retcode = _drbd_request_state(mdev, NS(conn, C_DISCONNECTING), CS_ORDERED); |
| 1530 | 1566 | ||
| @@ -1842,6 +1878,10 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl | |||
| 1842 | { | 1878 | { |
| 1843 | int retcode; | 1879 | int retcode; |
| 1844 | 1880 | ||
| 1881 | /* If there is still bitmap IO pending, probably because of a previous | ||
| 1882 | * resync just being finished, wait for it before requesting a new resync. */ | ||
| 1883 | wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); | ||
| 1884 | |||
| 1845 | retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED); | 1885 | retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED); |
| 1846 | 1886 | ||
| 1847 | if (retcode < SS_SUCCESS && retcode != SS_NEED_CONNECTION) | 1887 | if (retcode < SS_SUCCESS && retcode != SS_NEED_CONNECTION) |
| @@ -1877,6 +1917,10 @@ static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re | |||
| 1877 | { | 1917 | { |
| 1878 | int retcode; | 1918 | int retcode; |
| 1879 | 1919 | ||
| 1920 | /* If there is still bitmap IO pending, probably because of a previous | ||
| 1921 | * resync just being finished, wait for it before requesting a new resync. */ | ||
| 1922 | wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); | ||
| 1923 | |||
| 1880 | retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED); | 1924 | retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED); |
| 1881 | 1925 | ||
| 1882 | if (retcode < SS_SUCCESS) { | 1926 | if (retcode < SS_SUCCESS) { |
| @@ -1885,9 +1929,9 @@ static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re | |||
| 1885 | into a full resync. */ | 1929 | into a full resync. */ |
| 1886 | retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT)); | 1930 | retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT)); |
| 1887 | if (retcode >= SS_SUCCESS) { | 1931 | if (retcode >= SS_SUCCESS) { |
| 1888 | /* open coded drbd_bitmap_io() */ | ||
| 1889 | if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al, | 1932 | if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al, |
| 1890 | "set_n_write from invalidate_peer")) | 1933 | "set_n_write from invalidate_peer", |
| 1934 | BM_LOCKED_SET_ALLOWED)) | ||
| 1891 | retcode = ERR_IO_MD_DISK; | 1935 | retcode = ERR_IO_MD_DISK; |
| 1892 | } | 1936 | } |
| 1893 | } else | 1937 | } else |
| @@ -1914,9 +1958,17 @@ static int drbd_nl_resume_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n | |||
| 1914 | struct drbd_nl_cfg_reply *reply) | 1958 | struct drbd_nl_cfg_reply *reply) |
| 1915 | { | 1959 | { |
| 1916 | int retcode = NO_ERROR; | 1960 | int retcode = NO_ERROR; |
| 1961 | union drbd_state s; | ||
| 1917 | 1962 | ||
| 1918 | if (drbd_request_state(mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) | 1963 | if (drbd_request_state(mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) { |
| 1919 | retcode = ERR_PAUSE_IS_CLEAR; | 1964 | s = mdev->state; |
| 1965 | if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) { | ||
| 1966 | retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP : | ||
| 1967 | s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR; | ||
| 1968 | } else { | ||
| 1969 | retcode = ERR_PAUSE_IS_CLEAR; | ||
| 1970 | } | ||
| 1971 | } | ||
| 1920 | 1972 | ||
| 1921 | reply->ret_code = retcode; | 1973 | reply->ret_code = retcode; |
| 1922 | return 0; | 1974 | return 0; |
| @@ -2054,6 +2106,11 @@ static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, | |||
| 2054 | reply->ret_code = ERR_MANDATORY_TAG; | 2106 | reply->ret_code = ERR_MANDATORY_TAG; |
| 2055 | return 0; | 2107 | return 0; |
| 2056 | } | 2108 | } |
| 2109 | |||
| 2110 | /* If there is still bitmap IO pending, e.g. previous resync or verify | ||
| 2111 | * just being finished, wait for it before requesting a new resync. */ | ||
| 2112 | wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); | ||
| 2113 | |||
| 2057 | /* w_make_ov_request expects position to be aligned */ | 2114 | /* w_make_ov_request expects position to be aligned */ |
| 2058 | mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT; | 2115 | mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT; |
| 2059 | reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S)); | 2116 | reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S)); |
| @@ -2097,7 +2154,8 @@ static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl | |||
| 2097 | drbd_uuid_new_current(mdev); /* New current, previous to UI_BITMAP */ | 2154 | drbd_uuid_new_current(mdev); /* New current, previous to UI_BITMAP */ |
| 2098 | 2155 | ||
| 2099 | if (args.clear_bm) { | 2156 | if (args.clear_bm) { |
| 2100 | err = drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write, "clear_n_write from new_c_uuid"); | 2157 | err = drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write, |
| 2158 | "clear_n_write from new_c_uuid", BM_LOCKED_MASK); | ||
| 2101 | if (err) { | 2159 | if (err) { |
| 2102 | dev_err(DEV, "Writing bitmap failed with %d\n",err); | 2160 | dev_err(DEV, "Writing bitmap failed with %d\n",err); |
| 2103 | retcode = ERR_IO_MD_DISK; | 2161 | retcode = ERR_IO_MD_DISK; |
| @@ -2105,6 +2163,7 @@ static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl | |||
| 2105 | if (skip_initial_sync) { | 2163 | if (skip_initial_sync) { |
| 2106 | drbd_send_uuids_skip_initial_sync(mdev); | 2164 | drbd_send_uuids_skip_initial_sync(mdev); |
| 2107 | _drbd_uuid_set(mdev, UI_BITMAP, 0); | 2165 | _drbd_uuid_set(mdev, UI_BITMAP, 0); |
| 2166 | drbd_print_uuids(mdev, "cleared bitmap UUID"); | ||
| 2108 | spin_lock_irq(&mdev->req_lock); | 2167 | spin_lock_irq(&mdev->req_lock); |
| 2109 | _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), | 2168 | _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), |
| 2110 | CS_VERBOSE, NULL); | 2169 | CS_VERBOSE, NULL); |
| @@ -2189,7 +2248,8 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms | |||
| 2189 | goto fail; | 2248 | goto fail; |
| 2190 | } | 2249 | } |
| 2191 | 2250 | ||
| 2192 | if (nlp->packet_type >= P_nl_after_last_packet) { | 2251 | if (nlp->packet_type >= P_nl_after_last_packet || |
| 2252 | nlp->packet_type == P_return_code_only) { | ||
| 2193 | retcode = ERR_PACKET_NR; | 2253 | retcode = ERR_PACKET_NR; |
| 2194 | goto fail; | 2254 | goto fail; |
| 2195 | } | 2255 | } |
| @@ -2205,7 +2265,7 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms | |||
| 2205 | reply_size += cm->reply_body_size; | 2265 | reply_size += cm->reply_body_size; |
| 2206 | 2266 | ||
| 2207 | /* allocation not in the IO path, cqueue thread context */ | 2267 | /* allocation not in the IO path, cqueue thread context */ |
| 2208 | cn_reply = kmalloc(reply_size, GFP_KERNEL); | 2268 | cn_reply = kzalloc(reply_size, GFP_KERNEL); |
| 2209 | if (!cn_reply) { | 2269 | if (!cn_reply) { |
| 2210 | retcode = ERR_NOMEM; | 2270 | retcode = ERR_NOMEM; |
| 2211 | goto fail; | 2271 | goto fail; |
| @@ -2213,7 +2273,7 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms | |||
| 2213 | reply = (struct drbd_nl_cfg_reply *) cn_reply->data; | 2273 | reply = (struct drbd_nl_cfg_reply *) cn_reply->data; |
| 2214 | 2274 | ||
| 2215 | reply->packet_type = | 2275 | reply->packet_type = |
| 2216 | cm->reply_body_size ? nlp->packet_type : P_nl_after_last_packet; | 2276 | cm->reply_body_size ? nlp->packet_type : P_return_code_only; |
| 2217 | reply->minor = nlp->drbd_minor; | 2277 | reply->minor = nlp->drbd_minor; |
| 2218 | reply->ret_code = NO_ERROR; /* Might by modified by cm->function. */ | 2278 | reply->ret_code = NO_ERROR; /* Might by modified by cm->function. */ |
| 2219 | /* reply->tag_list; might be modified by cm->function. */ | 2279 | /* reply->tag_list; might be modified by cm->function. */ |
| @@ -2376,7 +2436,7 @@ void drbd_bcast_ee(struct drbd_conf *mdev, | |||
| 2376 | /* receiver thread context, which is not in the writeout path (of this node), | 2436 | /* receiver thread context, which is not in the writeout path (of this node), |
| 2377 | * but may be in the writeout path of the _other_ node. | 2437 | * but may be in the writeout path of the _other_ node. |
| 2378 | * GFP_NOIO to avoid potential "distributed deadlock". */ | 2438 | * GFP_NOIO to avoid potential "distributed deadlock". */ |
| 2379 | cn_reply = kmalloc( | 2439 | cn_reply = kzalloc( |
| 2380 | sizeof(struct cn_msg)+ | 2440 | sizeof(struct cn_msg)+ |
| 2381 | sizeof(struct drbd_nl_cfg_reply)+ | 2441 | sizeof(struct drbd_nl_cfg_reply)+ |
| 2382 | sizeof(struct dump_ee_tag_len_struct)+ | 2442 | sizeof(struct dump_ee_tag_len_struct)+ |
| @@ -2398,10 +2458,11 @@ void drbd_bcast_ee(struct drbd_conf *mdev, | |||
| 2398 | tl = tl_add_int(tl, T_ee_sector, &e->sector); | 2458 | tl = tl_add_int(tl, T_ee_sector, &e->sector); |
| 2399 | tl = tl_add_int(tl, T_ee_block_id, &e->block_id); | 2459 | tl = tl_add_int(tl, T_ee_block_id, &e->block_id); |
| 2400 | 2460 | ||
| 2461 | /* dump the first 32k */ | ||
| 2462 | len = min_t(unsigned, e->size, 32 << 10); | ||
| 2401 | put_unaligned(T_ee_data, tl++); | 2463 | put_unaligned(T_ee_data, tl++); |
| 2402 | put_unaligned(e->size, tl++); | 2464 | put_unaligned(len, tl++); |
| 2403 | 2465 | ||
| 2404 | len = e->size; | ||
| 2405 | page = e->pages; | 2466 | page = e->pages; |
| 2406 | page_chain_for_each(page) { | 2467 | page_chain_for_each(page) { |
| 2407 | void *d = kmap_atomic(page, KM_USER0); | 2468 | void *d = kmap_atomic(page, KM_USER0); |
| @@ -2410,6 +2471,8 @@ void drbd_bcast_ee(struct drbd_conf *mdev, | |||
| 2410 | kunmap_atomic(d, KM_USER0); | 2471 | kunmap_atomic(d, KM_USER0); |
| 2411 | tl = (unsigned short*)((char*)tl + l); | 2472 | tl = (unsigned short*)((char*)tl + l); |
| 2412 | len -= l; | 2473 | len -= l; |
| 2474 | if (len == 0) | ||
| 2475 | break; | ||
| 2413 | } | 2476 | } |
| 2414 | put_unaligned(TT_END, tl++); /* Close the tag list */ | 2477 | put_unaligned(TT_END, tl++); /* Close the tag list */ |
| 2415 | 2478 | ||
| @@ -2508,6 +2571,7 @@ void drbd_nl_send_reply(struct cn_msg *req, int ret_code) | |||
| 2508 | (struct drbd_nl_cfg_reply *)cn_reply->data; | 2571 | (struct drbd_nl_cfg_reply *)cn_reply->data; |
| 2509 | int rr; | 2572 | int rr; |
| 2510 | 2573 | ||
| 2574 | memset(buffer, 0, sizeof(buffer)); | ||
| 2511 | cn_reply->id = req->id; | 2575 | cn_reply->id = req->id; |
| 2512 | 2576 | ||
| 2513 | cn_reply->seq = req->seq; | 2577 | cn_reply->seq = req->seq; |
| @@ -2515,6 +2579,7 @@ void drbd_nl_send_reply(struct cn_msg *req, int ret_code) | |||
| 2515 | cn_reply->len = sizeof(struct drbd_nl_cfg_reply); | 2579 | cn_reply->len = sizeof(struct drbd_nl_cfg_reply); |
| 2516 | cn_reply->flags = 0; | 2580 | cn_reply->flags = 0; |
| 2517 | 2581 | ||
| 2582 | reply->packet_type = P_return_code_only; | ||
| 2518 | reply->minor = ((struct drbd_nl_cfg_req *)req->data)->drbd_minor; | 2583 | reply->minor = ((struct drbd_nl_cfg_req *)req->data)->drbd_minor; |
| 2519 | reply->ret_code = ret_code; | 2584 | reply->ret_code = ret_code; |
| 2520 | 2585 | ||
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 7e6ac307e2de..2959cdfb77f5 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c | |||
| @@ -34,6 +34,7 @@ | |||
| 34 | #include "drbd_int.h" | 34 | #include "drbd_int.h" |
| 35 | 35 | ||
| 36 | static int drbd_proc_open(struct inode *inode, struct file *file); | 36 | static int drbd_proc_open(struct inode *inode, struct file *file); |
| 37 | static int drbd_proc_release(struct inode *inode, struct file *file); | ||
| 37 | 38 | ||
| 38 | 39 | ||
| 39 | struct proc_dir_entry *drbd_proc; | 40 | struct proc_dir_entry *drbd_proc; |
| @@ -42,9 +43,22 @@ const struct file_operations drbd_proc_fops = { | |||
| 42 | .open = drbd_proc_open, | 43 | .open = drbd_proc_open, |
| 43 | .read = seq_read, | 44 | .read = seq_read, |
| 44 | .llseek = seq_lseek, | 45 | .llseek = seq_lseek, |
| 45 | .release = single_release, | 46 | .release = drbd_proc_release, |
| 46 | }; | 47 | }; |
| 47 | 48 | ||
| 49 | void seq_printf_with_thousands_grouping(struct seq_file *seq, long v) | ||
| 50 | { | ||
| 51 | /* v is in kB/sec. We don't expect TiByte/sec yet. */ | ||
| 52 | if (unlikely(v >= 1000000)) { | ||
| 53 | /* cool: > GiByte/s */ | ||
| 54 | seq_printf(seq, "%ld,", v / 1000000); | ||
| 55 | v /= 1000000; | ||
| 56 | seq_printf(seq, "%03ld,%03ld", v/1000, v % 1000); | ||
| 57 | } else if (likely(v >= 1000)) | ||
| 58 | seq_printf(seq, "%ld,%03ld", v/1000, v % 1000); | ||
| 59 | else | ||
| 60 | seq_printf(seq, "%ld", v); | ||
| 61 | } | ||
| 48 | 62 | ||
| 49 | /*lge | 63 | /*lge |
| 50 | * progress bars shamelessly adapted from driver/md/md.c | 64 | * progress bars shamelessly adapted from driver/md/md.c |
| @@ -71,10 +85,15 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq) | |||
| 71 | seq_printf(seq, "."); | 85 | seq_printf(seq, "."); |
| 72 | seq_printf(seq, "] "); | 86 | seq_printf(seq, "] "); |
| 73 | 87 | ||
| 74 | seq_printf(seq, "sync'ed:%3u.%u%% ", res / 10, res % 10); | 88 | if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T) |
| 75 | /* if more than 1 GB display in MB */ | 89 | seq_printf(seq, "verified:"); |
| 76 | if (mdev->rs_total > 0x100000L) | 90 | else |
| 77 | seq_printf(seq, "(%lu/%lu)M\n\t", | 91 | seq_printf(seq, "sync'ed:"); |
| 92 | seq_printf(seq, "%3u.%u%% ", res / 10, res % 10); | ||
| 93 | |||
| 94 | /* if more than a few GB, display in MB */ | ||
| 95 | if (mdev->rs_total > (4UL << (30 - BM_BLOCK_SHIFT))) | ||
| 96 | seq_printf(seq, "(%lu/%lu)M", | ||
| 78 | (unsigned long) Bit2KB(rs_left >> 10), | 97 | (unsigned long) Bit2KB(rs_left >> 10), |
| 79 | (unsigned long) Bit2KB(mdev->rs_total >> 10)); | 98 | (unsigned long) Bit2KB(mdev->rs_total >> 10)); |
| 80 | else | 99 | else |
| @@ -94,6 +113,7 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq) | |||
| 94 | /* Rolling marks. last_mark+1 may just now be modified. last_mark+2 is | 113 | /* Rolling marks. last_mark+1 may just now be modified. last_mark+2 is |
| 95 | * at least (DRBD_SYNC_MARKS-2)*DRBD_SYNC_MARK_STEP old, and has at | 114 | * at least (DRBD_SYNC_MARKS-2)*DRBD_SYNC_MARK_STEP old, and has at |
| 96 | * least DRBD_SYNC_MARK_STEP time before it will be modified. */ | 115 | * least DRBD_SYNC_MARK_STEP time before it will be modified. */ |
| 116 | /* ------------------------ ~18s average ------------------------ */ | ||
| 97 | i = (mdev->rs_last_mark + 2) % DRBD_SYNC_MARKS; | 117 | i = (mdev->rs_last_mark + 2) % DRBD_SYNC_MARKS; |
| 98 | dt = (jiffies - mdev->rs_mark_time[i]) / HZ; | 118 | dt = (jiffies - mdev->rs_mark_time[i]) / HZ; |
| 99 | if (dt > (DRBD_SYNC_MARK_STEP * DRBD_SYNC_MARKS)) | 119 | if (dt > (DRBD_SYNC_MARK_STEP * DRBD_SYNC_MARKS)) |
| @@ -107,14 +127,24 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq) | |||
| 107 | seq_printf(seq, "finish: %lu:%02lu:%02lu", | 127 | seq_printf(seq, "finish: %lu:%02lu:%02lu", |
| 108 | rt / 3600, (rt % 3600) / 60, rt % 60); | 128 | rt / 3600, (rt % 3600) / 60, rt % 60); |
| 109 | 129 | ||
| 110 | /* current speed average over (SYNC_MARKS * SYNC_MARK_STEP) jiffies */ | ||
| 111 | dbdt = Bit2KB(db/dt); | 130 | dbdt = Bit2KB(db/dt); |
| 112 | if (dbdt > 1000) | 131 | seq_printf(seq, " speed: "); |
| 113 | seq_printf(seq, " speed: %ld,%03ld", | 132 | seq_printf_with_thousands_grouping(seq, dbdt); |
| 114 | dbdt/1000, dbdt % 1000); | 133 | seq_printf(seq, " ("); |
| 115 | else | 134 | /* ------------------------- ~3s average ------------------------ */ |
| 116 | seq_printf(seq, " speed: %ld", dbdt); | 135 | if (proc_details >= 1) { |
| 136 | /* this is what drbd_rs_should_slow_down() uses */ | ||
| 137 | i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS; | ||
| 138 | dt = (jiffies - mdev->rs_mark_time[i]) / HZ; | ||
| 139 | if (!dt) | ||
| 140 | dt++; | ||
| 141 | db = mdev->rs_mark_left[i] - rs_left; | ||
| 142 | dbdt = Bit2KB(db/dt); | ||
| 143 | seq_printf_with_thousands_grouping(seq, dbdt); | ||
| 144 | seq_printf(seq, " -- "); | ||
| 145 | } | ||
| 117 | 146 | ||
| 147 | /* --------------------- long term average ---------------------- */ | ||
| 118 | /* mean speed since syncer started | 148 | /* mean speed since syncer started |
| 119 | * we do account for PausedSync periods */ | 149 | * we do account for PausedSync periods */ |
| 120 | dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ; | 150 | dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ; |
| @@ -122,20 +152,34 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq) | |||
| 122 | dt = 1; | 152 | dt = 1; |
| 123 | db = mdev->rs_total - rs_left; | 153 | db = mdev->rs_total - rs_left; |
| 124 | dbdt = Bit2KB(db/dt); | 154 | dbdt = Bit2KB(db/dt); |
| 125 | if (dbdt > 1000) | 155 | seq_printf_with_thousands_grouping(seq, dbdt); |
| 126 | seq_printf(seq, " (%ld,%03ld)", | 156 | seq_printf(seq, ")"); |
| 127 | dbdt/1000, dbdt % 1000); | ||
| 128 | else | ||
| 129 | seq_printf(seq, " (%ld)", dbdt); | ||
| 130 | 157 | ||
| 131 | if (mdev->state.conn == C_SYNC_TARGET) { | 158 | if (mdev->state.conn == C_SYNC_TARGET || |
| 132 | if (mdev->c_sync_rate > 1000) | 159 | mdev->state.conn == C_VERIFY_S) { |
| 133 | seq_printf(seq, " want: %d,%03d", | 160 | seq_printf(seq, " want: "); |
| 134 | mdev->c_sync_rate / 1000, mdev->c_sync_rate % 1000); | 161 | seq_printf_with_thousands_grouping(seq, mdev->c_sync_rate); |
| 135 | else | ||
| 136 | seq_printf(seq, " want: %d", mdev->c_sync_rate); | ||
| 137 | } | 162 | } |
| 138 | seq_printf(seq, " K/sec%s\n", stalled ? " (stalled)" : ""); | 163 | seq_printf(seq, " K/sec%s\n", stalled ? " (stalled)" : ""); |
| 164 | |||
| 165 | if (proc_details >= 1) { | ||
| 166 | /* 64 bit: | ||
| 167 | * we convert to sectors in the display below. */ | ||
| 168 | unsigned long bm_bits = drbd_bm_bits(mdev); | ||
| 169 | unsigned long bit_pos; | ||
| 170 | if (mdev->state.conn == C_VERIFY_S || | ||
| 171 | mdev->state.conn == C_VERIFY_T) | ||
| 172 | bit_pos = bm_bits - mdev->ov_left; | ||
| 173 | else | ||
| 174 | bit_pos = mdev->bm_resync_fo; | ||
| 175 | /* Total sectors may be slightly off for oddly | ||
| 176 | * sized devices. So what. */ | ||
| 177 | seq_printf(seq, | ||
| 178 | "\t%3d%% sector pos: %llu/%llu\n", | ||
| 179 | (int)(bit_pos / (bm_bits/100+1)), | ||
| 180 | (unsigned long long)bit_pos * BM_SECT_PER_BIT, | ||
| 181 | (unsigned long long)bm_bits * BM_SECT_PER_BIT); | ||
| 182 | } | ||
| 139 | } | 183 | } |
| 140 | 184 | ||
| 141 | static void resync_dump_detail(struct seq_file *seq, struct lc_element *e) | 185 | static void resync_dump_detail(struct seq_file *seq, struct lc_element *e) |
| @@ -232,20 +276,16 @@ static int drbd_seq_show(struct seq_file *seq, void *v) | |||
| 232 | mdev->epochs, | 276 | mdev->epochs, |
| 233 | write_ordering_chars[mdev->write_ordering] | 277 | write_ordering_chars[mdev->write_ordering] |
| 234 | ); | 278 | ); |
| 235 | seq_printf(seq, " oos:%lu\n", | 279 | seq_printf(seq, " oos:%llu\n", |
| 236 | Bit2KB(drbd_bm_total_weight(mdev))); | 280 | Bit2KB((unsigned long long) |
| 281 | drbd_bm_total_weight(mdev))); | ||
| 237 | } | 282 | } |
| 238 | if (mdev->state.conn == C_SYNC_SOURCE || | 283 | if (mdev->state.conn == C_SYNC_SOURCE || |
| 239 | mdev->state.conn == C_SYNC_TARGET) | 284 | mdev->state.conn == C_SYNC_TARGET || |
| 285 | mdev->state.conn == C_VERIFY_S || | ||
| 286 | mdev->state.conn == C_VERIFY_T) | ||
| 240 | drbd_syncer_progress(mdev, seq); | 287 | drbd_syncer_progress(mdev, seq); |
| 241 | 288 | ||
| 242 | if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T) | ||
| 243 | seq_printf(seq, "\t%3d%% %lu/%lu\n", | ||
| 244 | (int)((mdev->rs_total-mdev->ov_left) / | ||
| 245 | (mdev->rs_total/100+1)), | ||
| 246 | mdev->rs_total - mdev->ov_left, | ||
| 247 | mdev->rs_total); | ||
| 248 | |||
| 249 | if (proc_details >= 1 && get_ldev_if_state(mdev, D_FAILED)) { | 289 | if (proc_details >= 1 && get_ldev_if_state(mdev, D_FAILED)) { |
| 250 | lc_seq_printf_stats(seq, mdev->resync); | 290 | lc_seq_printf_stats(seq, mdev->resync); |
| 251 | lc_seq_printf_stats(seq, mdev->act_log); | 291 | lc_seq_printf_stats(seq, mdev->act_log); |
| @@ -265,7 +305,15 @@ static int drbd_seq_show(struct seq_file *seq, void *v) | |||
| 265 | 305 | ||
| 266 | static int drbd_proc_open(struct inode *inode, struct file *file) | 306 | static int drbd_proc_open(struct inode *inode, struct file *file) |
| 267 | { | 307 | { |
| 268 | return single_open(file, drbd_seq_show, PDE(inode)->data); | 308 | if (try_module_get(THIS_MODULE)) |
| 309 | return single_open(file, drbd_seq_show, PDE(inode)->data); | ||
| 310 | return -ENODEV; | ||
| 311 | } | ||
| 312 | |||
| 313 | static int drbd_proc_release(struct inode *inode, struct file *file) | ||
| 314 | { | ||
| 315 | module_put(THIS_MODULE); | ||
| 316 | return single_release(inode, file); | ||
| 269 | } | 317 | } |
| 270 | 318 | ||
| 271 | /* PROC FS stuff end */ | 319 | /* PROC FS stuff end */ |
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 8e68be939deb..fe1564c7d8b6 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c | |||
| @@ -277,7 +277,7 @@ static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net) | |||
| 277 | atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use; | 277 | atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use; |
| 278 | int i; | 278 | int i; |
| 279 | 279 | ||
| 280 | if (drbd_pp_vacant > (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE)*minor_count) | 280 | if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE)*minor_count) |
| 281 | i = page_chain_free(page); | 281 | i = page_chain_free(page); |
| 282 | else { | 282 | else { |
| 283 | struct page *tmp; | 283 | struct page *tmp; |
| @@ -319,7 +319,7 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, | |||
| 319 | struct page *page; | 319 | struct page *page; |
| 320 | unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT; | 320 | unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT; |
| 321 | 321 | ||
| 322 | if (FAULT_ACTIVE(mdev, DRBD_FAULT_AL_EE)) | 322 | if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE)) |
| 323 | return NULL; | 323 | return NULL; |
| 324 | 324 | ||
| 325 | e = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM); | 325 | e = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM); |
| @@ -725,16 +725,16 @@ static int drbd_socket_okay(struct drbd_conf *mdev, struct socket **sock) | |||
| 725 | char tb[4]; | 725 | char tb[4]; |
| 726 | 726 | ||
| 727 | if (!*sock) | 727 | if (!*sock) |
| 728 | return FALSE; | 728 | return false; |
| 729 | 729 | ||
| 730 | rr = drbd_recv_short(mdev, *sock, tb, 4, MSG_DONTWAIT | MSG_PEEK); | 730 | rr = drbd_recv_short(mdev, *sock, tb, 4, MSG_DONTWAIT | MSG_PEEK); |
| 731 | 731 | ||
| 732 | if (rr > 0 || rr == -EAGAIN) { | 732 | if (rr > 0 || rr == -EAGAIN) { |
| 733 | return TRUE; | 733 | return true; |
| 734 | } else { | 734 | } else { |
| 735 | sock_release(*sock); | 735 | sock_release(*sock); |
| 736 | *sock = NULL; | 736 | *sock = NULL; |
| 737 | return FALSE; | 737 | return false; |
| 738 | } | 738 | } |
| 739 | } | 739 | } |
| 740 | 740 | ||
| @@ -768,8 +768,7 @@ static int drbd_connect(struct drbd_conf *mdev) | |||
| 768 | if (s || ++try >= 3) | 768 | if (s || ++try >= 3) |
| 769 | break; | 769 | break; |
| 770 | /* give the other side time to call bind() & listen() */ | 770 | /* give the other side time to call bind() & listen() */ |
| 771 | __set_current_state(TASK_INTERRUPTIBLE); | 771 | schedule_timeout_interruptible(HZ / 10); |
| 772 | schedule_timeout(HZ / 10); | ||
| 773 | } | 772 | } |
| 774 | 773 | ||
| 775 | if (s) { | 774 | if (s) { |
| @@ -788,8 +787,7 @@ static int drbd_connect(struct drbd_conf *mdev) | |||
| 788 | } | 787 | } |
| 789 | 788 | ||
| 790 | if (sock && msock) { | 789 | if (sock && msock) { |
| 791 | __set_current_state(TASK_INTERRUPTIBLE); | 790 | schedule_timeout_interruptible(HZ / 10); |
| 792 | schedule_timeout(HZ / 10); | ||
| 793 | ok = drbd_socket_okay(mdev, &sock); | 791 | ok = drbd_socket_okay(mdev, &sock); |
| 794 | ok = drbd_socket_okay(mdev, &msock) && ok; | 792 | ok = drbd_socket_okay(mdev, &msock) && ok; |
| 795 | if (ok) | 793 | if (ok) |
| @@ -906,7 +904,7 @@ retry: | |||
| 906 | put_ldev(mdev); | 904 | put_ldev(mdev); |
| 907 | } | 905 | } |
| 908 | 906 | ||
| 909 | if (!drbd_send_protocol(mdev)) | 907 | if (drbd_send_protocol(mdev) == -1) |
| 910 | return -1; | 908 | return -1; |
| 911 | drbd_send_sync_param(mdev, &mdev->sync_conf); | 909 | drbd_send_sync_param(mdev, &mdev->sync_conf); |
| 912 | drbd_send_sizes(mdev, 0, 0); | 910 | drbd_send_sizes(mdev, 0, 0); |
| @@ -914,6 +912,7 @@ retry: | |||
| 914 | drbd_send_state(mdev); | 912 | drbd_send_state(mdev); |
| 915 | clear_bit(USE_DEGR_WFC_T, &mdev->flags); | 913 | clear_bit(USE_DEGR_WFC_T, &mdev->flags); |
| 916 | clear_bit(RESIZE_PENDING, &mdev->flags); | 914 | clear_bit(RESIZE_PENDING, &mdev->flags); |
| 915 | mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */ | ||
| 917 | 916 | ||
| 918 | return 1; | 917 | return 1; |
| 919 | 918 | ||
| @@ -932,8 +931,9 @@ static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsi | |||
| 932 | 931 | ||
| 933 | r = drbd_recv(mdev, h, sizeof(*h)); | 932 | r = drbd_recv(mdev, h, sizeof(*h)); |
| 934 | if (unlikely(r != sizeof(*h))) { | 933 | if (unlikely(r != sizeof(*h))) { |
| 935 | dev_err(DEV, "short read expecting header on sock: r=%d\n", r); | 934 | if (!signal_pending(current)) |
| 936 | return FALSE; | 935 | dev_warn(DEV, "short read expecting header on sock: r=%d\n", r); |
| 936 | return false; | ||
| 937 | } | 937 | } |
| 938 | 938 | ||
| 939 | if (likely(h->h80.magic == BE_DRBD_MAGIC)) { | 939 | if (likely(h->h80.magic == BE_DRBD_MAGIC)) { |
| @@ -947,11 +947,11 @@ static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsi | |||
| 947 | be32_to_cpu(h->h80.magic), | 947 | be32_to_cpu(h->h80.magic), |
| 948 | be16_to_cpu(h->h80.command), | 948 | be16_to_cpu(h->h80.command), |
| 949 | be16_to_cpu(h->h80.length)); | 949 | be16_to_cpu(h->h80.length)); |
| 950 | return FALSE; | 950 | return false; |
| 951 | } | 951 | } |
| 952 | mdev->last_received = jiffies; | 952 | mdev->last_received = jiffies; |
| 953 | 953 | ||
| 954 | return TRUE; | 954 | return true; |
| 955 | } | 955 | } |
| 956 | 956 | ||
| 957 | static void drbd_flush(struct drbd_conf *mdev) | 957 | static void drbd_flush(struct drbd_conf *mdev) |
| @@ -1074,6 +1074,16 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) | |||
| 1074 | * @mdev: DRBD device. | 1074 | * @mdev: DRBD device. |
| 1075 | * @e: epoch entry | 1075 | * @e: epoch entry |
| 1076 | * @rw: flag field, see bio->bi_rw | 1076 | * @rw: flag field, see bio->bi_rw |
| 1077 | * | ||
| 1078 | * May spread the pages to multiple bios, | ||
| 1079 | * depending on bio_add_page restrictions. | ||
| 1080 | * | ||
| 1081 | * Returns 0 if all bios have been submitted, | ||
| 1082 | * -ENOMEM if we could not allocate enough bios, | ||
| 1083 | * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a | ||
| 1084 | * single page to an empty bio (which should never happen and likely indicates | ||
| 1085 | * that the lower level IO stack is in some way broken). This has been observed | ||
| 1086 | * on certain Xen deployments. | ||
| 1077 | */ | 1087 | */ |
| 1078 | /* TODO allocate from our own bio_set. */ | 1088 | /* TODO allocate from our own bio_set. */ |
| 1079 | int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, | 1089 | int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, |
| @@ -1086,6 +1096,7 @@ int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, | |||
| 1086 | unsigned ds = e->size; | 1096 | unsigned ds = e->size; |
| 1087 | unsigned n_bios = 0; | 1097 | unsigned n_bios = 0; |
| 1088 | unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT; | 1098 | unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT; |
| 1099 | int err = -ENOMEM; | ||
| 1089 | 1100 | ||
| 1090 | /* In most cases, we will only need one bio. But in case the lower | 1101 | /* In most cases, we will only need one bio. But in case the lower |
| 1091 | * level restrictions happen to be different at this offset on this | 1102 | * level restrictions happen to be different at this offset on this |
| @@ -1111,8 +1122,17 @@ next_bio: | |||
| 1111 | page_chain_for_each(page) { | 1122 | page_chain_for_each(page) { |
| 1112 | unsigned len = min_t(unsigned, ds, PAGE_SIZE); | 1123 | unsigned len = min_t(unsigned, ds, PAGE_SIZE); |
| 1113 | if (!bio_add_page(bio, page, len, 0)) { | 1124 | if (!bio_add_page(bio, page, len, 0)) { |
| 1114 | /* a single page must always be possible! */ | 1125 | /* A single page must always be possible! |
| 1115 | BUG_ON(bio->bi_vcnt == 0); | 1126 | * But in case it fails anyways, |
| 1127 | * we deal with it, and complain (below). */ | ||
| 1128 | if (bio->bi_vcnt == 0) { | ||
| 1129 | dev_err(DEV, | ||
| 1130 | "bio_add_page failed for len=%u, " | ||
| 1131 | "bi_vcnt=0 (bi_sector=%llu)\n", | ||
| 1132 | len, (unsigned long long)bio->bi_sector); | ||
| 1133 | err = -ENOSPC; | ||
| 1134 | goto fail; | ||
| 1135 | } | ||
| 1116 | goto next_bio; | 1136 | goto next_bio; |
| 1117 | } | 1137 | } |
| 1118 | ds -= len; | 1138 | ds -= len; |
| @@ -1138,7 +1158,7 @@ fail: | |||
| 1138 | bios = bios->bi_next; | 1158 | bios = bios->bi_next; |
| 1139 | bio_put(bio); | 1159 | bio_put(bio); |
| 1140 | } | 1160 | } |
| 1141 | return -ENOMEM; | 1161 | return err; |
| 1142 | } | 1162 | } |
| 1143 | 1163 | ||
| 1144 | static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) | 1164 | static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) |
| @@ -1160,7 +1180,7 @@ static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsign | |||
| 1160 | switch (mdev->write_ordering) { | 1180 | switch (mdev->write_ordering) { |
| 1161 | case WO_none: | 1181 | case WO_none: |
| 1162 | if (rv == FE_RECYCLED) | 1182 | if (rv == FE_RECYCLED) |
| 1163 | return TRUE; | 1183 | return true; |
| 1164 | 1184 | ||
| 1165 | /* receiver context, in the writeout path of the other node. | 1185 | /* receiver context, in the writeout path of the other node. |
| 1166 | * avoid potential distributed deadlock */ | 1186 | * avoid potential distributed deadlock */ |
| @@ -1188,10 +1208,10 @@ static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsign | |||
| 1188 | D_ASSERT(atomic_read(&epoch->active) == 0); | 1208 | D_ASSERT(atomic_read(&epoch->active) == 0); |
| 1189 | D_ASSERT(epoch->flags == 0); | 1209 | D_ASSERT(epoch->flags == 0); |
| 1190 | 1210 | ||
| 1191 | return TRUE; | 1211 | return true; |
| 1192 | default: | 1212 | default: |
| 1193 | dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering); | 1213 | dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering); |
| 1194 | return FALSE; | 1214 | return false; |
| 1195 | } | 1215 | } |
| 1196 | 1216 | ||
| 1197 | epoch->flags = 0; | 1217 | epoch->flags = 0; |
| @@ -1209,7 +1229,7 @@ static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsign | |||
| 1209 | } | 1229 | } |
| 1210 | spin_unlock(&mdev->epoch_lock); | 1230 | spin_unlock(&mdev->epoch_lock); |
| 1211 | 1231 | ||
| 1212 | return TRUE; | 1232 | return true; |
| 1213 | } | 1233 | } |
| 1214 | 1234 | ||
| 1215 | /* used from receive_RSDataReply (recv_resync_read) | 1235 | /* used from receive_RSDataReply (recv_resync_read) |
| @@ -1231,21 +1251,25 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __ | |||
| 1231 | if (dgs) { | 1251 | if (dgs) { |
| 1232 | rr = drbd_recv(mdev, dig_in, dgs); | 1252 | rr = drbd_recv(mdev, dig_in, dgs); |
| 1233 | if (rr != dgs) { | 1253 | if (rr != dgs) { |
| 1234 | dev_warn(DEV, "short read receiving data digest: read %d expected %d\n", | 1254 | if (!signal_pending(current)) |
| 1235 | rr, dgs); | 1255 | dev_warn(DEV, |
| 1256 | "short read receiving data digest: read %d expected %d\n", | ||
| 1257 | rr, dgs); | ||
| 1236 | return NULL; | 1258 | return NULL; |
| 1237 | } | 1259 | } |
| 1238 | } | 1260 | } |
| 1239 | 1261 | ||
| 1240 | data_size -= dgs; | 1262 | data_size -= dgs; |
| 1241 | 1263 | ||
| 1264 | ERR_IF(data_size == 0) return NULL; | ||
| 1242 | ERR_IF(data_size & 0x1ff) return NULL; | 1265 | ERR_IF(data_size & 0x1ff) return NULL; |
| 1243 | ERR_IF(data_size > DRBD_MAX_SEGMENT_SIZE) return NULL; | 1266 | ERR_IF(data_size > DRBD_MAX_BIO_SIZE) return NULL; |
| 1244 | 1267 | ||
| 1245 | /* even though we trust out peer, | 1268 | /* even though we trust out peer, |
| 1246 | * we sometimes have to double check. */ | 1269 | * we sometimes have to double check. */ |
| 1247 | if (sector + (data_size>>9) > capacity) { | 1270 | if (sector + (data_size>>9) > capacity) { |
| 1248 | dev_err(DEV, "capacity: %llus < sector: %llus + size: %u\n", | 1271 | dev_err(DEV, "request from peer beyond end of local disk: " |
| 1272 | "capacity: %llus < sector: %llus + size: %u\n", | ||
| 1249 | (unsigned long long)capacity, | 1273 | (unsigned long long)capacity, |
| 1250 | (unsigned long long)sector, data_size); | 1274 | (unsigned long long)sector, data_size); |
| 1251 | return NULL; | 1275 | return NULL; |
| @@ -1264,15 +1288,16 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __ | |||
| 1264 | unsigned len = min_t(int, ds, PAGE_SIZE); | 1288 | unsigned len = min_t(int, ds, PAGE_SIZE); |
| 1265 | data = kmap(page); | 1289 | data = kmap(page); |
| 1266 | rr = drbd_recv(mdev, data, len); | 1290 | rr = drbd_recv(mdev, data, len); |
| 1267 | if (FAULT_ACTIVE(mdev, DRBD_FAULT_RECEIVE)) { | 1291 | if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) { |
| 1268 | dev_err(DEV, "Fault injection: Corrupting data on receive\n"); | 1292 | dev_err(DEV, "Fault injection: Corrupting data on receive\n"); |
| 1269 | data[0] = data[0] ^ (unsigned long)-1; | 1293 | data[0] = data[0] ^ (unsigned long)-1; |
| 1270 | } | 1294 | } |
| 1271 | kunmap(page); | 1295 | kunmap(page); |
| 1272 | if (rr != len) { | 1296 | if (rr != len) { |
| 1273 | drbd_free_ee(mdev, e); | 1297 | drbd_free_ee(mdev, e); |
| 1274 | dev_warn(DEV, "short read receiving data: read %d expected %d\n", | 1298 | if (!signal_pending(current)) |
| 1275 | rr, len); | 1299 | dev_warn(DEV, "short read receiving data: read %d expected %d\n", |
| 1300 | rr, len); | ||
| 1276 | return NULL; | 1301 | return NULL; |
| 1277 | } | 1302 | } |
| 1278 | ds -= rr; | 1303 | ds -= rr; |
| @@ -1281,7 +1306,8 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __ | |||
| 1281 | if (dgs) { | 1306 | if (dgs) { |
| 1282 | drbd_csum_ee(mdev, mdev->integrity_r_tfm, e, dig_vv); | 1307 | drbd_csum_ee(mdev, mdev->integrity_r_tfm, e, dig_vv); |
| 1283 | if (memcmp(dig_in, dig_vv, dgs)) { | 1308 | if (memcmp(dig_in, dig_vv, dgs)) { |
| 1284 | dev_err(DEV, "Digest integrity check FAILED.\n"); | 1309 | dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n", |
| 1310 | (unsigned long long)sector, data_size); | ||
| 1285 | drbd_bcast_ee(mdev, "digest failed", | 1311 | drbd_bcast_ee(mdev, "digest failed", |
| 1286 | dgs, dig_in, dig_vv, e); | 1312 | dgs, dig_in, dig_vv, e); |
| 1287 | drbd_free_ee(mdev, e); | 1313 | drbd_free_ee(mdev, e); |
| @@ -1302,7 +1328,7 @@ static int drbd_drain_block(struct drbd_conf *mdev, int data_size) | |||
| 1302 | void *data; | 1328 | void *data; |
| 1303 | 1329 | ||
| 1304 | if (!data_size) | 1330 | if (!data_size) |
| 1305 | return TRUE; | 1331 | return true; |
| 1306 | 1332 | ||
| 1307 | page = drbd_pp_alloc(mdev, 1, 1); | 1333 | page = drbd_pp_alloc(mdev, 1, 1); |
| 1308 | 1334 | ||
| @@ -1311,8 +1337,10 @@ static int drbd_drain_block(struct drbd_conf *mdev, int data_size) | |||
| 1311 | rr = drbd_recv(mdev, data, min_t(int, data_size, PAGE_SIZE)); | 1337 | rr = drbd_recv(mdev, data, min_t(int, data_size, PAGE_SIZE)); |
| 1312 | if (rr != min_t(int, data_size, PAGE_SIZE)) { | 1338 | if (rr != min_t(int, data_size, PAGE_SIZE)) { |
| 1313 | rv = 0; | 1339 | rv = 0; |
| 1314 | dev_warn(DEV, "short read receiving data: read %d expected %d\n", | 1340 | if (!signal_pending(current)) |
| 1315 | rr, min_t(int, data_size, PAGE_SIZE)); | 1341 | dev_warn(DEV, |
| 1342 | "short read receiving data: read %d expected %d\n", | ||
| 1343 | rr, min_t(int, data_size, PAGE_SIZE)); | ||
| 1316 | break; | 1344 | break; |
| 1317 | } | 1345 | } |
| 1318 | data_size -= rr; | 1346 | data_size -= rr; |
| @@ -1337,8 +1365,10 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, | |||
| 1337 | if (dgs) { | 1365 | if (dgs) { |
| 1338 | rr = drbd_recv(mdev, dig_in, dgs); | 1366 | rr = drbd_recv(mdev, dig_in, dgs); |
| 1339 | if (rr != dgs) { | 1367 | if (rr != dgs) { |
| 1340 | dev_warn(DEV, "short read receiving data reply digest: read %d expected %d\n", | 1368 | if (!signal_pending(current)) |
| 1341 | rr, dgs); | 1369 | dev_warn(DEV, |
| 1370 | "short read receiving data reply digest: read %d expected %d\n", | ||
| 1371 | rr, dgs); | ||
| 1342 | return 0; | 1372 | return 0; |
| 1343 | } | 1373 | } |
| 1344 | } | 1374 | } |
| @@ -1359,9 +1389,10 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, | |||
| 1359 | expect); | 1389 | expect); |
| 1360 | kunmap(bvec->bv_page); | 1390 | kunmap(bvec->bv_page); |
| 1361 | if (rr != expect) { | 1391 | if (rr != expect) { |
| 1362 | dev_warn(DEV, "short read receiving data reply: " | 1392 | if (!signal_pending(current)) |
| 1363 | "read %d expected %d\n", | 1393 | dev_warn(DEV, "short read receiving data reply: " |
| 1364 | rr, expect); | 1394 | "read %d expected %d\n", |
| 1395 | rr, expect); | ||
| 1365 | return 0; | 1396 | return 0; |
| 1366 | } | 1397 | } |
| 1367 | data_size -= rr; | 1398 | data_size -= rr; |
| @@ -1425,11 +1456,10 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si | |||
| 1425 | 1456 | ||
| 1426 | atomic_add(data_size >> 9, &mdev->rs_sect_ev); | 1457 | atomic_add(data_size >> 9, &mdev->rs_sect_ev); |
| 1427 | if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0) | 1458 | if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0) |
| 1428 | return TRUE; | 1459 | return true; |
| 1429 | 1460 | ||
| 1430 | /* drbd_submit_ee currently fails for one reason only: | 1461 | /* don't care for the reason here */ |
| 1431 | * not being able to allocate enough bios. | 1462 | dev_err(DEV, "submit failed, triggering re-connect\n"); |
| 1432 | * Is dropping the connection going to help? */ | ||
| 1433 | spin_lock_irq(&mdev->req_lock); | 1463 | spin_lock_irq(&mdev->req_lock); |
| 1434 | list_del(&e->w.list); | 1464 | list_del(&e->w.list); |
| 1435 | spin_unlock_irq(&mdev->req_lock); | 1465 | spin_unlock_irq(&mdev->req_lock); |
| @@ -1437,7 +1467,7 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si | |||
| 1437 | drbd_free_ee(mdev, e); | 1467 | drbd_free_ee(mdev, e); |
| 1438 | fail: | 1468 | fail: |
| 1439 | put_ldev(mdev); | 1469 | put_ldev(mdev); |
| 1440 | return FALSE; | 1470 | return false; |
| 1441 | } | 1471 | } |
| 1442 | 1472 | ||
| 1443 | static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) | 1473 | static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) |
| @@ -1454,7 +1484,7 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsi | |||
| 1454 | spin_unlock_irq(&mdev->req_lock); | 1484 | spin_unlock_irq(&mdev->req_lock); |
| 1455 | if (unlikely(!req)) { | 1485 | if (unlikely(!req)) { |
| 1456 | dev_err(DEV, "Got a corrupt block_id/sector pair(1).\n"); | 1486 | dev_err(DEV, "Got a corrupt block_id/sector pair(1).\n"); |
| 1457 | return FALSE; | 1487 | return false; |
| 1458 | } | 1488 | } |
| 1459 | 1489 | ||
| 1460 | /* hlist_del(&req->colision) is done in _req_may_be_done, to avoid | 1490 | /* hlist_del(&req->colision) is done in _req_may_be_done, to avoid |
| @@ -1611,15 +1641,15 @@ static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq) | |||
| 1611 | return ret; | 1641 | return ret; |
| 1612 | } | 1642 | } |
| 1613 | 1643 | ||
| 1614 | static unsigned long write_flags_to_bio(struct drbd_conf *mdev, u32 dpf) | 1644 | /* see also bio_flags_to_wire() |
| 1645 | * DRBD_REQ_*, because we need to semantically map the flags to data packet | ||
| 1646 | * flags and back. We may replicate to other kernel versions. */ | ||
| 1647 | static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf) | ||
| 1615 | { | 1648 | { |
| 1616 | if (mdev->agreed_pro_version >= 95) | 1649 | return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) | |
| 1617 | return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) | | 1650 | (dpf & DP_FUA ? REQ_FUA : 0) | |
| 1618 | (dpf & DP_FUA ? REQ_FUA : 0) | | 1651 | (dpf & DP_FLUSH ? REQ_FLUSH : 0) | |
| 1619 | (dpf & DP_FLUSH ? REQ_FUA : 0) | | 1652 | (dpf & DP_DISCARD ? REQ_DISCARD : 0); |
| 1620 | (dpf & DP_DISCARD ? REQ_DISCARD : 0); | ||
| 1621 | else | ||
| 1622 | return dpf & DP_RW_SYNC ? REQ_SYNC : 0; | ||
| 1623 | } | 1653 | } |
| 1624 | 1654 | ||
| 1625 | /* mirrored write */ | 1655 | /* mirrored write */ |
| @@ -1632,9 +1662,6 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
| 1632 | u32 dp_flags; | 1662 | u32 dp_flags; |
| 1633 | 1663 | ||
| 1634 | if (!get_ldev(mdev)) { | 1664 | if (!get_ldev(mdev)) { |
| 1635 | if (__ratelimit(&drbd_ratelimit_state)) | ||
| 1636 | dev_err(DEV, "Can not write mirrored data block " | ||
| 1637 | "to local disk.\n"); | ||
| 1638 | spin_lock(&mdev->peer_seq_lock); | 1665 | spin_lock(&mdev->peer_seq_lock); |
| 1639 | if (mdev->peer_seq+1 == be32_to_cpu(p->seq_num)) | 1666 | if (mdev->peer_seq+1 == be32_to_cpu(p->seq_num)) |
| 1640 | mdev->peer_seq++; | 1667 | mdev->peer_seq++; |
| @@ -1654,23 +1681,23 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
| 1654 | e = read_in_block(mdev, p->block_id, sector, data_size); | 1681 | e = read_in_block(mdev, p->block_id, sector, data_size); |
| 1655 | if (!e) { | 1682 | if (!e) { |
| 1656 | put_ldev(mdev); | 1683 | put_ldev(mdev); |
| 1657 | return FALSE; | 1684 | return false; |
| 1658 | } | 1685 | } |
| 1659 | 1686 | ||
| 1660 | e->w.cb = e_end_block; | 1687 | e->w.cb = e_end_block; |
| 1661 | 1688 | ||
| 1689 | dp_flags = be32_to_cpu(p->dp_flags); | ||
| 1690 | rw |= wire_flags_to_bio(mdev, dp_flags); | ||
| 1691 | |||
| 1692 | if (dp_flags & DP_MAY_SET_IN_SYNC) | ||
| 1693 | e->flags |= EE_MAY_SET_IN_SYNC; | ||
| 1694 | |||
| 1662 | spin_lock(&mdev->epoch_lock); | 1695 | spin_lock(&mdev->epoch_lock); |
| 1663 | e->epoch = mdev->current_epoch; | 1696 | e->epoch = mdev->current_epoch; |
| 1664 | atomic_inc(&e->epoch->epoch_size); | 1697 | atomic_inc(&e->epoch->epoch_size); |
| 1665 | atomic_inc(&e->epoch->active); | 1698 | atomic_inc(&e->epoch->active); |
| 1666 | spin_unlock(&mdev->epoch_lock); | 1699 | spin_unlock(&mdev->epoch_lock); |
| 1667 | 1700 | ||
| 1668 | dp_flags = be32_to_cpu(p->dp_flags); | ||
| 1669 | rw |= write_flags_to_bio(mdev, dp_flags); | ||
| 1670 | |||
| 1671 | if (dp_flags & DP_MAY_SET_IN_SYNC) | ||
| 1672 | e->flags |= EE_MAY_SET_IN_SYNC; | ||
| 1673 | |||
| 1674 | /* I'm the receiver, I do hold a net_cnt reference. */ | 1701 | /* I'm the receiver, I do hold a net_cnt reference. */ |
| 1675 | if (!mdev->net_conf->two_primaries) { | 1702 | if (!mdev->net_conf->two_primaries) { |
| 1676 | spin_lock_irq(&mdev->req_lock); | 1703 | spin_lock_irq(&mdev->req_lock); |
| @@ -1773,7 +1800,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
| 1773 | put_ldev(mdev); | 1800 | put_ldev(mdev); |
| 1774 | wake_asender(mdev); | 1801 | wake_asender(mdev); |
| 1775 | finish_wait(&mdev->misc_wait, &wait); | 1802 | finish_wait(&mdev->misc_wait, &wait); |
| 1776 | return TRUE; | 1803 | return true; |
| 1777 | } | 1804 | } |
| 1778 | 1805 | ||
| 1779 | if (signal_pending(current)) { | 1806 | if (signal_pending(current)) { |
| @@ -1829,11 +1856,10 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
| 1829 | } | 1856 | } |
| 1830 | 1857 | ||
| 1831 | if (drbd_submit_ee(mdev, e, rw, DRBD_FAULT_DT_WR) == 0) | 1858 | if (drbd_submit_ee(mdev, e, rw, DRBD_FAULT_DT_WR) == 0) |
| 1832 | return TRUE; | 1859 | return true; |
| 1833 | 1860 | ||
| 1834 | /* drbd_submit_ee currently fails for one reason only: | 1861 | /* don't care for the reason here */ |
| 1835 | * not being able to allocate enough bios. | 1862 | dev_err(DEV, "submit failed, triggering re-connect\n"); |
| 1836 | * Is dropping the connection going to help? */ | ||
| 1837 | spin_lock_irq(&mdev->req_lock); | 1863 | spin_lock_irq(&mdev->req_lock); |
| 1838 | list_del(&e->w.list); | 1864 | list_del(&e->w.list); |
| 1839 | hlist_del_init(&e->colision); | 1865 | hlist_del_init(&e->colision); |
| @@ -1842,12 +1868,10 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
| 1842 | drbd_al_complete_io(mdev, e->sector); | 1868 | drbd_al_complete_io(mdev, e->sector); |
| 1843 | 1869 | ||
| 1844 | out_interrupted: | 1870 | out_interrupted: |
| 1845 | /* yes, the epoch_size now is imbalanced. | 1871 | drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + EV_CLEANUP); |
| 1846 | * but we drop the connection anyways, so we don't have a chance to | ||
| 1847 | * receive a barrier... atomic_inc(&mdev->epoch_size); */ | ||
| 1848 | put_ldev(mdev); | 1872 | put_ldev(mdev); |
| 1849 | drbd_free_ee(mdev, e); | 1873 | drbd_free_ee(mdev, e); |
| 1850 | return FALSE; | 1874 | return false; |
| 1851 | } | 1875 | } |
| 1852 | 1876 | ||
| 1853 | /* We may throttle resync, if the lower device seems to be busy, | 1877 | /* We may throttle resync, if the lower device seems to be busy, |
| @@ -1861,10 +1885,11 @@ out_interrupted: | |||
| 1861 | * The current sync rate used here uses only the most recent two step marks, | 1885 | * The current sync rate used here uses only the most recent two step marks, |
| 1862 | * to have a short time average so we can react faster. | 1886 | * to have a short time average so we can react faster. |
| 1863 | */ | 1887 | */ |
| 1864 | int drbd_rs_should_slow_down(struct drbd_conf *mdev) | 1888 | int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector) |
| 1865 | { | 1889 | { |
| 1866 | struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk; | 1890 | struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk; |
| 1867 | unsigned long db, dt, dbdt; | 1891 | unsigned long db, dt, dbdt; |
| 1892 | struct lc_element *tmp; | ||
| 1868 | int curr_events; | 1893 | int curr_events; |
| 1869 | int throttle = 0; | 1894 | int throttle = 0; |
| 1870 | 1895 | ||
| @@ -1872,9 +1897,22 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev) | |||
| 1872 | if (mdev->sync_conf.c_min_rate == 0) | 1897 | if (mdev->sync_conf.c_min_rate == 0) |
| 1873 | return 0; | 1898 | return 0; |
| 1874 | 1899 | ||
| 1900 | spin_lock_irq(&mdev->al_lock); | ||
| 1901 | tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector)); | ||
| 1902 | if (tmp) { | ||
| 1903 | struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); | ||
| 1904 | if (test_bit(BME_PRIORITY, &bm_ext->flags)) { | ||
| 1905 | spin_unlock_irq(&mdev->al_lock); | ||
| 1906 | return 0; | ||
| 1907 | } | ||
| 1908 | /* Do not slow down if app IO is already waiting for this extent */ | ||
| 1909 | } | ||
| 1910 | spin_unlock_irq(&mdev->al_lock); | ||
| 1911 | |||
| 1875 | curr_events = (int)part_stat_read(&disk->part0, sectors[0]) + | 1912 | curr_events = (int)part_stat_read(&disk->part0, sectors[0]) + |
| 1876 | (int)part_stat_read(&disk->part0, sectors[1]) - | 1913 | (int)part_stat_read(&disk->part0, sectors[1]) - |
| 1877 | atomic_read(&mdev->rs_sect_ev); | 1914 | atomic_read(&mdev->rs_sect_ev); |
| 1915 | |||
| 1878 | if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) { | 1916 | if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) { |
| 1879 | unsigned long rs_left; | 1917 | unsigned long rs_left; |
| 1880 | int i; | 1918 | int i; |
| @@ -1883,8 +1921,12 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev) | |||
| 1883 | 1921 | ||
| 1884 | /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP, | 1922 | /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP, |
| 1885 | * approx. */ | 1923 | * approx. */ |
| 1886 | i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-2) % DRBD_SYNC_MARKS; | 1924 | i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS; |
| 1887 | rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed; | 1925 | |
| 1926 | if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T) | ||
| 1927 | rs_left = mdev->ov_left; | ||
| 1928 | else | ||
| 1929 | rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed; | ||
| 1888 | 1930 | ||
| 1889 | dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ; | 1931 | dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ; |
| 1890 | if (!dt) | 1932 | if (!dt) |
| @@ -1912,15 +1954,15 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un | |||
| 1912 | sector = be64_to_cpu(p->sector); | 1954 | sector = be64_to_cpu(p->sector); |
| 1913 | size = be32_to_cpu(p->blksize); | 1955 | size = be32_to_cpu(p->blksize); |
| 1914 | 1956 | ||
| 1915 | if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { | 1957 | if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) { |
| 1916 | dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, | 1958 | dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, |
| 1917 | (unsigned long long)sector, size); | 1959 | (unsigned long long)sector, size); |
| 1918 | return FALSE; | 1960 | return false; |
| 1919 | } | 1961 | } |
| 1920 | if (sector + (size>>9) > capacity) { | 1962 | if (sector + (size>>9) > capacity) { |
| 1921 | dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, | 1963 | dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, |
| 1922 | (unsigned long long)sector, size); | 1964 | (unsigned long long)sector, size); |
| 1923 | return FALSE; | 1965 | return false; |
| 1924 | } | 1966 | } |
| 1925 | 1967 | ||
| 1926 | if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) { | 1968 | if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) { |
| @@ -1957,7 +1999,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un | |||
| 1957 | e = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO); | 1999 | e = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO); |
| 1958 | if (!e) { | 2000 | if (!e) { |
| 1959 | put_ldev(mdev); | 2001 | put_ldev(mdev); |
| 1960 | return FALSE; | 2002 | return false; |
| 1961 | } | 2003 | } |
| 1962 | 2004 | ||
| 1963 | switch (cmd) { | 2005 | switch (cmd) { |
| @@ -1970,6 +2012,8 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un | |||
| 1970 | case P_RS_DATA_REQUEST: | 2012 | case P_RS_DATA_REQUEST: |
| 1971 | e->w.cb = w_e_end_rsdata_req; | 2013 | e->w.cb = w_e_end_rsdata_req; |
| 1972 | fault_type = DRBD_FAULT_RS_RD; | 2014 | fault_type = DRBD_FAULT_RS_RD; |
| 2015 | /* used in the sector offset progress display */ | ||
| 2016 | mdev->bm_resync_fo = BM_SECT_TO_BIT(sector); | ||
| 1973 | break; | 2017 | break; |
| 1974 | 2018 | ||
| 1975 | case P_OV_REPLY: | 2019 | case P_OV_REPLY: |
| @@ -1991,7 +2035,11 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un | |||
| 1991 | if (cmd == P_CSUM_RS_REQUEST) { | 2035 | if (cmd == P_CSUM_RS_REQUEST) { |
| 1992 | D_ASSERT(mdev->agreed_pro_version >= 89); | 2036 | D_ASSERT(mdev->agreed_pro_version >= 89); |
| 1993 | e->w.cb = w_e_end_csum_rs_req; | 2037 | e->w.cb = w_e_end_csum_rs_req; |
| 2038 | /* used in the sector offset progress display */ | ||
| 2039 | mdev->bm_resync_fo = BM_SECT_TO_BIT(sector); | ||
| 1994 | } else if (cmd == P_OV_REPLY) { | 2040 | } else if (cmd == P_OV_REPLY) { |
| 2041 | /* track progress, we may need to throttle */ | ||
| 2042 | atomic_add(size >> 9, &mdev->rs_sect_in); | ||
| 1995 | e->w.cb = w_e_end_ov_reply; | 2043 | e->w.cb = w_e_end_ov_reply; |
| 1996 | dec_rs_pending(mdev); | 2044 | dec_rs_pending(mdev); |
| 1997 | /* drbd_rs_begin_io done when we sent this request, | 2045 | /* drbd_rs_begin_io done when we sent this request, |
| @@ -2003,9 +2051,16 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un | |||
| 2003 | case P_OV_REQUEST: | 2051 | case P_OV_REQUEST: |
| 2004 | if (mdev->ov_start_sector == ~(sector_t)0 && | 2052 | if (mdev->ov_start_sector == ~(sector_t)0 && |
| 2005 | mdev->agreed_pro_version >= 90) { | 2053 | mdev->agreed_pro_version >= 90) { |
| 2054 | unsigned long now = jiffies; | ||
| 2055 | int i; | ||
| 2006 | mdev->ov_start_sector = sector; | 2056 | mdev->ov_start_sector = sector; |
| 2007 | mdev->ov_position = sector; | 2057 | mdev->ov_position = sector; |
| 2008 | mdev->ov_left = mdev->rs_total - BM_SECT_TO_BIT(sector); | 2058 | mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector); |
| 2059 | mdev->rs_total = mdev->ov_left; | ||
| 2060 | for (i = 0; i < DRBD_SYNC_MARKS; i++) { | ||
| 2061 | mdev->rs_mark_left[i] = mdev->ov_left; | ||
| 2062 | mdev->rs_mark_time[i] = now; | ||
| 2063 | } | ||
| 2009 | dev_info(DEV, "Online Verify start sector: %llu\n", | 2064 | dev_info(DEV, "Online Verify start sector: %llu\n", |
| 2010 | (unsigned long long)sector); | 2065 | (unsigned long long)sector); |
| 2011 | } | 2066 | } |
| @@ -2042,9 +2097,9 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un | |||
| 2042 | * we would also throttle its application reads. | 2097 | * we would also throttle its application reads. |
| 2043 | * In that case, throttling is done on the SyncTarget only. | 2098 | * In that case, throttling is done on the SyncTarget only. |
| 2044 | */ | 2099 | */ |
| 2045 | if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev)) | 2100 | if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector)) |
| 2046 | msleep(100); | 2101 | schedule_timeout_uninterruptible(HZ/10); |
| 2047 | if (drbd_rs_begin_io(mdev, e->sector)) | 2102 | if (drbd_rs_begin_io(mdev, sector)) |
| 2048 | goto out_free_e; | 2103 | goto out_free_e; |
| 2049 | 2104 | ||
| 2050 | submit_for_resync: | 2105 | submit_for_resync: |
| @@ -2057,11 +2112,10 @@ submit: | |||
| 2057 | spin_unlock_irq(&mdev->req_lock); | 2112 | spin_unlock_irq(&mdev->req_lock); |
| 2058 | 2113 | ||
| 2059 | if (drbd_submit_ee(mdev, e, READ, fault_type) == 0) | 2114 | if (drbd_submit_ee(mdev, e, READ, fault_type) == 0) |
| 2060 | return TRUE; | 2115 | return true; |
| 2061 | 2116 | ||
| 2062 | /* drbd_submit_ee currently fails for one reason only: | 2117 | /* don't care for the reason here */ |
| 2063 | * not being able to allocate enough bios. | 2118 | dev_err(DEV, "submit failed, triggering re-connect\n"); |
| 2064 | * Is dropping the connection going to help? */ | ||
| 2065 | spin_lock_irq(&mdev->req_lock); | 2119 | spin_lock_irq(&mdev->req_lock); |
| 2066 | list_del(&e->w.list); | 2120 | list_del(&e->w.list); |
| 2067 | spin_unlock_irq(&mdev->req_lock); | 2121 | spin_unlock_irq(&mdev->req_lock); |
| @@ -2070,7 +2124,7 @@ submit: | |||
| 2070 | out_free_e: | 2124 | out_free_e: |
| 2071 | put_ldev(mdev); | 2125 | put_ldev(mdev); |
| 2072 | drbd_free_ee(mdev, e); | 2126 | drbd_free_ee(mdev, e); |
| 2073 | return FALSE; | 2127 | return false; |
| 2074 | } | 2128 | } |
| 2075 | 2129 | ||
| 2076 | static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) | 2130 | static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) |
| @@ -2147,10 +2201,7 @@ static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) | |||
| 2147 | 2201 | ||
| 2148 | static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local) | 2202 | static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local) |
| 2149 | { | 2203 | { |
| 2150 | int self, peer, hg, rv = -100; | 2204 | int hg, rv = -100; |
| 2151 | |||
| 2152 | self = mdev->ldev->md.uuid[UI_BITMAP] & 1; | ||
| 2153 | peer = mdev->p_uuid[UI_BITMAP] & 1; | ||
| 2154 | 2205 | ||
| 2155 | switch (mdev->net_conf->after_sb_1p) { | 2206 | switch (mdev->net_conf->after_sb_1p) { |
| 2156 | case ASB_DISCARD_YOUNGER_PRI: | 2207 | case ASB_DISCARD_YOUNGER_PRI: |
| @@ -2177,12 +2228,14 @@ static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local) | |||
| 2177 | case ASB_CALL_HELPER: | 2228 | case ASB_CALL_HELPER: |
| 2178 | hg = drbd_asb_recover_0p(mdev); | 2229 | hg = drbd_asb_recover_0p(mdev); |
| 2179 | if (hg == -1 && mdev->state.role == R_PRIMARY) { | 2230 | if (hg == -1 && mdev->state.role == R_PRIMARY) { |
| 2180 | self = drbd_set_role(mdev, R_SECONDARY, 0); | 2231 | enum drbd_state_rv rv2; |
| 2232 | |||
| 2233 | drbd_set_role(mdev, R_SECONDARY, 0); | ||
| 2181 | /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE, | 2234 | /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE, |
| 2182 | * we might be here in C_WF_REPORT_PARAMS which is transient. | 2235 | * we might be here in C_WF_REPORT_PARAMS which is transient. |
| 2183 | * we do not need to wait for the after state change work either. */ | 2236 | * we do not need to wait for the after state change work either. */ |
| 2184 | self = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY)); | 2237 | rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY)); |
| 2185 | if (self != SS_SUCCESS) { | 2238 | if (rv2 != SS_SUCCESS) { |
| 2186 | drbd_khelper(mdev, "pri-lost-after-sb"); | 2239 | drbd_khelper(mdev, "pri-lost-after-sb"); |
| 2187 | } else { | 2240 | } else { |
| 2188 | dev_warn(DEV, "Successfully gave up primary role.\n"); | 2241 | dev_warn(DEV, "Successfully gave up primary role.\n"); |
| @@ -2197,10 +2250,7 @@ static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local) | |||
| 2197 | 2250 | ||
| 2198 | static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local) | 2251 | static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local) |
| 2199 | { | 2252 | { |
| 2200 | int self, peer, hg, rv = -100; | 2253 | int hg, rv = -100; |
| 2201 | |||
| 2202 | self = mdev->ldev->md.uuid[UI_BITMAP] & 1; | ||
| 2203 | peer = mdev->p_uuid[UI_BITMAP] & 1; | ||
| 2204 | 2254 | ||
| 2205 | switch (mdev->net_conf->after_sb_2p) { | 2255 | switch (mdev->net_conf->after_sb_2p) { |
| 2206 | case ASB_DISCARD_YOUNGER_PRI: | 2256 | case ASB_DISCARD_YOUNGER_PRI: |
| @@ -2220,11 +2270,13 @@ static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local) | |||
| 2220 | case ASB_CALL_HELPER: | 2270 | case ASB_CALL_HELPER: |
| 2221 | hg = drbd_asb_recover_0p(mdev); | 2271 | hg = drbd_asb_recover_0p(mdev); |
| 2222 | if (hg == -1) { | 2272 | if (hg == -1) { |
| 2273 | enum drbd_state_rv rv2; | ||
| 2274 | |||
| 2223 | /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE, | 2275 | /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE, |
| 2224 | * we might be here in C_WF_REPORT_PARAMS which is transient. | 2276 | * we might be here in C_WF_REPORT_PARAMS which is transient. |
| 2225 | * we do not need to wait for the after state change work either. */ | 2277 | * we do not need to wait for the after state change work either. */ |
| 2226 | self = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY)); | 2278 | rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY)); |
| 2227 | if (self != SS_SUCCESS) { | 2279 | if (rv2 != SS_SUCCESS) { |
| 2228 | drbd_khelper(mdev, "pri-lost-after-sb"); | 2280 | drbd_khelper(mdev, "pri-lost-after-sb"); |
| 2229 | } else { | 2281 | } else { |
| 2230 | dev_warn(DEV, "Successfully gave up primary role.\n"); | 2282 | dev_warn(DEV, "Successfully gave up primary role.\n"); |
| @@ -2263,6 +2315,8 @@ static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid, | |||
| 2263 | -2 C_SYNC_TARGET set BitMap | 2315 | -2 C_SYNC_TARGET set BitMap |
| 2264 | -100 after split brain, disconnect | 2316 | -100 after split brain, disconnect |
| 2265 | -1000 unrelated data | 2317 | -1000 unrelated data |
| 2318 | -1091 requires proto 91 | ||
| 2319 | -1096 requires proto 96 | ||
| 2266 | */ | 2320 | */ |
| 2267 | static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local) | 2321 | static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local) |
| 2268 | { | 2322 | { |
| @@ -2292,7 +2346,7 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l | |||
| 2292 | if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) { | 2346 | if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) { |
| 2293 | 2347 | ||
| 2294 | if (mdev->agreed_pro_version < 91) | 2348 | if (mdev->agreed_pro_version < 91) |
| 2295 | return -1001; | 2349 | return -1091; |
| 2296 | 2350 | ||
| 2297 | if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) && | 2351 | if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) && |
| 2298 | (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) { | 2352 | (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) { |
| @@ -2313,7 +2367,7 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l | |||
| 2313 | if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) { | 2367 | if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) { |
| 2314 | 2368 | ||
| 2315 | if (mdev->agreed_pro_version < 91) | 2369 | if (mdev->agreed_pro_version < 91) |
| 2316 | return -1001; | 2370 | return -1091; |
| 2317 | 2371 | ||
| 2318 | if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) && | 2372 | if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) && |
| 2319 | (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) { | 2373 | (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) { |
| @@ -2358,17 +2412,22 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l | |||
| 2358 | *rule_nr = 51; | 2412 | *rule_nr = 51; |
| 2359 | peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1); | 2413 | peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1); |
| 2360 | if (self == peer) { | 2414 | if (self == peer) { |
| 2361 | self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1); | 2415 | if (mdev->agreed_pro_version < 96 ? |
| 2362 | peer = mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1); | 2416 | (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == |
| 2363 | if (self == peer) { | 2417 | (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) : |
| 2418 | peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) { | ||
| 2364 | /* The last P_SYNC_UUID did not get though. Undo the last start of | 2419 | /* The last P_SYNC_UUID did not get though. Undo the last start of |
| 2365 | resync as sync source modifications of the peer's UUIDs. */ | 2420 | resync as sync source modifications of the peer's UUIDs. */ |
| 2366 | 2421 | ||
| 2367 | if (mdev->agreed_pro_version < 91) | 2422 | if (mdev->agreed_pro_version < 91) |
| 2368 | return -1001; | 2423 | return -1091; |
| 2369 | 2424 | ||
| 2370 | mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START]; | 2425 | mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START]; |
| 2371 | mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1]; | 2426 | mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1]; |
| 2427 | |||
| 2428 | dev_info(DEV, "Did not got last syncUUID packet, corrected:\n"); | ||
| 2429 | drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]); | ||
| 2430 | |||
| 2372 | return -1; | 2431 | return -1; |
| 2373 | } | 2432 | } |
| 2374 | } | 2433 | } |
| @@ -2390,20 +2449,20 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l | |||
| 2390 | *rule_nr = 71; | 2449 | *rule_nr = 71; |
| 2391 | self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1); | 2450 | self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1); |
| 2392 | if (self == peer) { | 2451 | if (self == peer) { |
| 2393 | self = mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1); | 2452 | if (mdev->agreed_pro_version < 96 ? |
| 2394 | peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1); | 2453 | (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == |
| 2395 | if (self == peer) { | 2454 | (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) : |
| 2455 | self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) { | ||
| 2396 | /* The last P_SYNC_UUID did not get though. Undo the last start of | 2456 | /* The last P_SYNC_UUID did not get though. Undo the last start of |
| 2397 | resync as sync source modifications of our UUIDs. */ | 2457 | resync as sync source modifications of our UUIDs. */ |
| 2398 | 2458 | ||
| 2399 | if (mdev->agreed_pro_version < 91) | 2459 | if (mdev->agreed_pro_version < 91) |
| 2400 | return -1001; | 2460 | return -1091; |
| 2401 | 2461 | ||
| 2402 | _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]); | 2462 | _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]); |
| 2403 | _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]); | 2463 | _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]); |
| 2404 | 2464 | ||
| 2405 | dev_info(DEV, "Undid last start of resync:\n"); | 2465 | dev_info(DEV, "Last syncUUID did not get through, corrected:\n"); |
| 2406 | |||
| 2407 | drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, | 2466 | drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, |
| 2408 | mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0); | 2467 | mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0); |
| 2409 | 2468 | ||
| @@ -2466,8 +2525,8 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol | |||
| 2466 | dev_alert(DEV, "Unrelated data, aborting!\n"); | 2525 | dev_alert(DEV, "Unrelated data, aborting!\n"); |
| 2467 | return C_MASK; | 2526 | return C_MASK; |
| 2468 | } | 2527 | } |
| 2469 | if (hg == -1001) { | 2528 | if (hg < -1000) { |
| 2470 | dev_alert(DEV, "To resolve this both sides have to support at least protocol\n"); | 2529 | dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000); |
| 2471 | return C_MASK; | 2530 | return C_MASK; |
| 2472 | } | 2531 | } |
| 2473 | 2532 | ||
| @@ -2566,7 +2625,8 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol | |||
| 2566 | 2625 | ||
| 2567 | if (abs(hg) >= 2) { | 2626 | if (abs(hg) >= 2) { |
| 2568 | dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n"); | 2627 | dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n"); |
| 2569 | if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake")) | 2628 | if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake", |
| 2629 | BM_LOCKED_SET_ALLOWED)) | ||
| 2570 | return C_MASK; | 2630 | return C_MASK; |
| 2571 | } | 2631 | } |
| 2572 | 2632 | ||
| @@ -2660,7 +2720,7 @@ static int receive_protocol(struct drbd_conf *mdev, enum drbd_packets cmd, unsig | |||
| 2660 | unsigned char *my_alg = mdev->net_conf->integrity_alg; | 2720 | unsigned char *my_alg = mdev->net_conf->integrity_alg; |
| 2661 | 2721 | ||
| 2662 | if (drbd_recv(mdev, p_integrity_alg, data_size) != data_size) | 2722 | if (drbd_recv(mdev, p_integrity_alg, data_size) != data_size) |
| 2663 | return FALSE; | 2723 | return false; |
| 2664 | 2724 | ||
| 2665 | p_integrity_alg[SHARED_SECRET_MAX-1] = 0; | 2725 | p_integrity_alg[SHARED_SECRET_MAX-1] = 0; |
| 2666 | if (strcmp(p_integrity_alg, my_alg)) { | 2726 | if (strcmp(p_integrity_alg, my_alg)) { |
| @@ -2671,11 +2731,11 @@ static int receive_protocol(struct drbd_conf *mdev, enum drbd_packets cmd, unsig | |||
| 2671 | my_alg[0] ? my_alg : (unsigned char *)"<not-used>"); | 2731 | my_alg[0] ? my_alg : (unsigned char *)"<not-used>"); |
| 2672 | } | 2732 | } |
| 2673 | 2733 | ||
| 2674 | return TRUE; | 2734 | return true; |
| 2675 | 2735 | ||
| 2676 | disconnect: | 2736 | disconnect: |
| 2677 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); | 2737 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); |
| 2678 | return FALSE; | 2738 | return false; |
| 2679 | } | 2739 | } |
| 2680 | 2740 | ||
| 2681 | /* helper function | 2741 | /* helper function |
| @@ -2707,7 +2767,7 @@ struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev, | |||
| 2707 | 2767 | ||
| 2708 | static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int packet_size) | 2768 | static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int packet_size) |
| 2709 | { | 2769 | { |
| 2710 | int ok = TRUE; | 2770 | int ok = true; |
| 2711 | struct p_rs_param_95 *p = &mdev->data.rbuf.rs_param_95; | 2771 | struct p_rs_param_95 *p = &mdev->data.rbuf.rs_param_95; |
| 2712 | unsigned int header_size, data_size, exp_max_sz; | 2772 | unsigned int header_size, data_size, exp_max_sz; |
| 2713 | struct crypto_hash *verify_tfm = NULL; | 2773 | struct crypto_hash *verify_tfm = NULL; |
| @@ -2725,7 +2785,7 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsi | |||
| 2725 | if (packet_size > exp_max_sz) { | 2785 | if (packet_size > exp_max_sz) { |
| 2726 | dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n", | 2786 | dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n", |
| 2727 | packet_size, exp_max_sz); | 2787 | packet_size, exp_max_sz); |
| 2728 | return FALSE; | 2788 | return false; |
| 2729 | } | 2789 | } |
| 2730 | 2790 | ||
| 2731 | if (apv <= 88) { | 2791 | if (apv <= 88) { |
| @@ -2745,7 +2805,7 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsi | |||
| 2745 | memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); | 2805 | memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); |
| 2746 | 2806 | ||
| 2747 | if (drbd_recv(mdev, &p->head.payload, header_size) != header_size) | 2807 | if (drbd_recv(mdev, &p->head.payload, header_size) != header_size) |
| 2748 | return FALSE; | 2808 | return false; |
| 2749 | 2809 | ||
| 2750 | mdev->sync_conf.rate = be32_to_cpu(p->rate); | 2810 | mdev->sync_conf.rate = be32_to_cpu(p->rate); |
| 2751 | 2811 | ||
| @@ -2755,11 +2815,11 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsi | |||
| 2755 | dev_err(DEV, "verify-alg too long, " | 2815 | dev_err(DEV, "verify-alg too long, " |
| 2756 | "peer wants %u, accepting only %u byte\n", | 2816 | "peer wants %u, accepting only %u byte\n", |
| 2757 | data_size, SHARED_SECRET_MAX); | 2817 | data_size, SHARED_SECRET_MAX); |
| 2758 | return FALSE; | 2818 | return false; |
| 2759 | } | 2819 | } |
| 2760 | 2820 | ||
| 2761 | if (drbd_recv(mdev, p->verify_alg, data_size) != data_size) | 2821 | if (drbd_recv(mdev, p->verify_alg, data_size) != data_size) |
| 2762 | return FALSE; | 2822 | return false; |
| 2763 | 2823 | ||
| 2764 | /* we expect NUL terminated string */ | 2824 | /* we expect NUL terminated string */ |
| 2765 | /* but just in case someone tries to be evil */ | 2825 | /* but just in case someone tries to be evil */ |
| @@ -2853,7 +2913,7 @@ disconnect: | |||
| 2853 | /* but free the verify_tfm again, if csums_tfm did not work out */ | 2913 | /* but free the verify_tfm again, if csums_tfm did not work out */ |
| 2854 | crypto_free_hash(verify_tfm); | 2914 | crypto_free_hash(verify_tfm); |
| 2855 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); | 2915 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); |
| 2856 | return FALSE; | 2916 | return false; |
| 2857 | } | 2917 | } |
| 2858 | 2918 | ||
| 2859 | static void drbd_setup_order_type(struct drbd_conf *mdev, int peer) | 2919 | static void drbd_setup_order_type(struct drbd_conf *mdev, int peer) |
| @@ -2879,7 +2939,7 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
| 2879 | { | 2939 | { |
| 2880 | struct p_sizes *p = &mdev->data.rbuf.sizes; | 2940 | struct p_sizes *p = &mdev->data.rbuf.sizes; |
| 2881 | enum determine_dev_size dd = unchanged; | 2941 | enum determine_dev_size dd = unchanged; |
| 2882 | unsigned int max_seg_s; | 2942 | unsigned int max_bio_size; |
| 2883 | sector_t p_size, p_usize, my_usize; | 2943 | sector_t p_size, p_usize, my_usize; |
| 2884 | int ldsc = 0; /* local disk size changed */ | 2944 | int ldsc = 0; /* local disk size changed */ |
| 2885 | enum dds_flags ddsf; | 2945 | enum dds_flags ddsf; |
| @@ -2890,7 +2950,7 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
| 2890 | if (p_size == 0 && mdev->state.disk == D_DISKLESS) { | 2950 | if (p_size == 0 && mdev->state.disk == D_DISKLESS) { |
| 2891 | dev_err(DEV, "some backing storage is needed\n"); | 2951 | dev_err(DEV, "some backing storage is needed\n"); |
| 2892 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); | 2952 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); |
| 2893 | return FALSE; | 2953 | return false; |
| 2894 | } | 2954 | } |
| 2895 | 2955 | ||
| 2896 | /* just store the peer's disk size for now. | 2956 | /* just store the peer's disk size for now. |
| @@ -2927,18 +2987,17 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
| 2927 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); | 2987 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); |
| 2928 | mdev->ldev->dc.disk_size = my_usize; | 2988 | mdev->ldev->dc.disk_size = my_usize; |
| 2929 | put_ldev(mdev); | 2989 | put_ldev(mdev); |
| 2930 | return FALSE; | 2990 | return false; |
| 2931 | } | 2991 | } |
| 2932 | put_ldev(mdev); | 2992 | put_ldev(mdev); |
| 2933 | } | 2993 | } |
| 2934 | #undef min_not_zero | ||
| 2935 | 2994 | ||
| 2936 | ddsf = be16_to_cpu(p->dds_flags); | 2995 | ddsf = be16_to_cpu(p->dds_flags); |
| 2937 | if (get_ldev(mdev)) { | 2996 | if (get_ldev(mdev)) { |
| 2938 | dd = drbd_determin_dev_size(mdev, ddsf); | 2997 | dd = drbd_determin_dev_size(mdev, ddsf); |
| 2939 | put_ldev(mdev); | 2998 | put_ldev(mdev); |
| 2940 | if (dd == dev_size_error) | 2999 | if (dd == dev_size_error) |
| 2941 | return FALSE; | 3000 | return false; |
| 2942 | drbd_md_sync(mdev); | 3001 | drbd_md_sync(mdev); |
| 2943 | } else { | 3002 | } else { |
| 2944 | /* I am diskless, need to accept the peer's size. */ | 3003 | /* I am diskless, need to accept the peer's size. */ |
| @@ -2952,14 +3011,14 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
| 2952 | } | 3011 | } |
| 2953 | 3012 | ||
| 2954 | if (mdev->agreed_pro_version < 94) | 3013 | if (mdev->agreed_pro_version < 94) |
| 2955 | max_seg_s = be32_to_cpu(p->max_segment_size); | 3014 | max_bio_size = be32_to_cpu(p->max_bio_size); |
| 2956 | else if (mdev->agreed_pro_version == 94) | 3015 | else if (mdev->agreed_pro_version == 94) |
| 2957 | max_seg_s = DRBD_MAX_SIZE_H80_PACKET; | 3016 | max_bio_size = DRBD_MAX_SIZE_H80_PACKET; |
| 2958 | else /* drbd 8.3.8 onwards */ | 3017 | else /* drbd 8.3.8 onwards */ |
| 2959 | max_seg_s = DRBD_MAX_SEGMENT_SIZE; | 3018 | max_bio_size = DRBD_MAX_BIO_SIZE; |
| 2960 | 3019 | ||
| 2961 | if (max_seg_s != queue_max_segment_size(mdev->rq_queue)) | 3020 | if (max_bio_size != queue_max_hw_sectors(mdev->rq_queue) << 9) |
| 2962 | drbd_setup_queue_param(mdev, max_seg_s); | 3021 | drbd_setup_queue_param(mdev, max_bio_size); |
| 2963 | 3022 | ||
| 2964 | drbd_setup_order_type(mdev, be16_to_cpu(p->queue_order_type)); | 3023 | drbd_setup_order_type(mdev, be16_to_cpu(p->queue_order_type)); |
| 2965 | put_ldev(mdev); | 3024 | put_ldev(mdev); |
| @@ -2985,14 +3044,14 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
| 2985 | } | 3044 | } |
| 2986 | } | 3045 | } |
| 2987 | 3046 | ||
| 2988 | return TRUE; | 3047 | return true; |
| 2989 | } | 3048 | } |
| 2990 | 3049 | ||
| 2991 | static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) | 3050 | static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) |
| 2992 | { | 3051 | { |
| 2993 | struct p_uuids *p = &mdev->data.rbuf.uuids; | 3052 | struct p_uuids *p = &mdev->data.rbuf.uuids; |
| 2994 | u64 *p_uuid; | 3053 | u64 *p_uuid; |
| 2995 | int i; | 3054 | int i, updated_uuids = 0; |
| 2996 | 3055 | ||
| 2997 | p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO); | 3056 | p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO); |
| 2998 | 3057 | ||
| @@ -3009,7 +3068,7 @@ static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
| 3009 | dev_err(DEV, "Can only connect to data with current UUID=%016llX\n", | 3068 | dev_err(DEV, "Can only connect to data with current UUID=%016llX\n", |
| 3010 | (unsigned long long)mdev->ed_uuid); | 3069 | (unsigned long long)mdev->ed_uuid); |
| 3011 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); | 3070 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); |
| 3012 | return FALSE; | 3071 | return false; |
| 3013 | } | 3072 | } |
| 3014 | 3073 | ||
| 3015 | if (get_ldev(mdev)) { | 3074 | if (get_ldev(mdev)) { |
| @@ -3021,19 +3080,21 @@ static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
| 3021 | if (skip_initial_sync) { | 3080 | if (skip_initial_sync) { |
| 3022 | dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n"); | 3081 | dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n"); |
| 3023 | drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write, | 3082 | drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write, |
| 3024 | "clear_n_write from receive_uuids"); | 3083 | "clear_n_write from receive_uuids", |
| 3084 | BM_LOCKED_TEST_ALLOWED); | ||
| 3025 | _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]); | 3085 | _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]); |
| 3026 | _drbd_uuid_set(mdev, UI_BITMAP, 0); | 3086 | _drbd_uuid_set(mdev, UI_BITMAP, 0); |
| 3027 | _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), | 3087 | _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), |
| 3028 | CS_VERBOSE, NULL); | 3088 | CS_VERBOSE, NULL); |
| 3029 | drbd_md_sync(mdev); | 3089 | drbd_md_sync(mdev); |
| 3090 | updated_uuids = 1; | ||
| 3030 | } | 3091 | } |
| 3031 | put_ldev(mdev); | 3092 | put_ldev(mdev); |
| 3032 | } else if (mdev->state.disk < D_INCONSISTENT && | 3093 | } else if (mdev->state.disk < D_INCONSISTENT && |
| 3033 | mdev->state.role == R_PRIMARY) { | 3094 | mdev->state.role == R_PRIMARY) { |
| 3034 | /* I am a diskless primary, the peer just created a new current UUID | 3095 | /* I am a diskless primary, the peer just created a new current UUID |
| 3035 | for me. */ | 3096 | for me. */ |
| 3036 | drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]); | 3097 | updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]); |
| 3037 | } | 3098 | } |
| 3038 | 3099 | ||
| 3039 | /* Before we test for the disk state, we should wait until an eventually | 3100 | /* Before we test for the disk state, we should wait until an eventually |
| @@ -3042,9 +3103,12 @@ static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
| 3042 | new disk state... */ | 3103 | new disk state... */ |
| 3043 | wait_event(mdev->misc_wait, !test_bit(CLUSTER_ST_CHANGE, &mdev->flags)); | 3104 | wait_event(mdev->misc_wait, !test_bit(CLUSTER_ST_CHANGE, &mdev->flags)); |
| 3044 | if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT) | 3105 | if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT) |
| 3045 | drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]); | 3106 | updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]); |
| 3046 | 3107 | ||
| 3047 | return TRUE; | 3108 | if (updated_uuids) |
| 3109 | drbd_print_uuids(mdev, "receiver updated UUIDs to"); | ||
| 3110 | |||
| 3111 | return true; | ||
| 3048 | } | 3112 | } |
| 3049 | 3113 | ||
| 3050 | /** | 3114 | /** |
| @@ -3081,7 +3145,7 @@ static int receive_req_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsi | |||
| 3081 | { | 3145 | { |
| 3082 | struct p_req_state *p = &mdev->data.rbuf.req_state; | 3146 | struct p_req_state *p = &mdev->data.rbuf.req_state; |
| 3083 | union drbd_state mask, val; | 3147 | union drbd_state mask, val; |
| 3084 | int rv; | 3148 | enum drbd_state_rv rv; |
| 3085 | 3149 | ||
| 3086 | mask.i = be32_to_cpu(p->mask); | 3150 | mask.i = be32_to_cpu(p->mask); |
| 3087 | val.i = be32_to_cpu(p->val); | 3151 | val.i = be32_to_cpu(p->val); |
| @@ -3089,7 +3153,7 @@ static int receive_req_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsi | |||
| 3089 | if (test_bit(DISCARD_CONCURRENT, &mdev->flags) && | 3153 | if (test_bit(DISCARD_CONCURRENT, &mdev->flags) && |
| 3090 | test_bit(CLUSTER_ST_CHANGE, &mdev->flags)) { | 3154 | test_bit(CLUSTER_ST_CHANGE, &mdev->flags)) { |
| 3091 | drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG); | 3155 | drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG); |
| 3092 | return TRUE; | 3156 | return true; |
| 3093 | } | 3157 | } |
| 3094 | 3158 | ||
| 3095 | mask = convert_state(mask); | 3159 | mask = convert_state(mask); |
| @@ -3100,7 +3164,7 @@ static int receive_req_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsi | |||
| 3100 | drbd_send_sr_reply(mdev, rv); | 3164 | drbd_send_sr_reply(mdev, rv); |
| 3101 | drbd_md_sync(mdev); | 3165 | drbd_md_sync(mdev); |
| 3102 | 3166 | ||
| 3103 | return TRUE; | 3167 | return true; |
| 3104 | } | 3168 | } |
| 3105 | 3169 | ||
| 3106 | static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) | 3170 | static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) |
| @@ -3145,7 +3209,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
| 3145 | peer_state.conn == C_CONNECTED) { | 3209 | peer_state.conn == C_CONNECTED) { |
| 3146 | if (drbd_bm_total_weight(mdev) <= mdev->rs_failed) | 3210 | if (drbd_bm_total_weight(mdev) <= mdev->rs_failed) |
| 3147 | drbd_resync_finished(mdev); | 3211 | drbd_resync_finished(mdev); |
| 3148 | return TRUE; | 3212 | return true; |
| 3149 | } | 3213 | } |
| 3150 | } | 3214 | } |
| 3151 | 3215 | ||
| @@ -3161,6 +3225,9 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
| 3161 | if (ns.conn == C_WF_REPORT_PARAMS) | 3225 | if (ns.conn == C_WF_REPORT_PARAMS) |
| 3162 | ns.conn = C_CONNECTED; | 3226 | ns.conn = C_CONNECTED; |
| 3163 | 3227 | ||
| 3228 | if (peer_state.conn == C_AHEAD) | ||
| 3229 | ns.conn = C_BEHIND; | ||
| 3230 | |||
| 3164 | if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING && | 3231 | if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING && |
| 3165 | get_ldev_if_state(mdev, D_NEGOTIATING)) { | 3232 | get_ldev_if_state(mdev, D_NEGOTIATING)) { |
| 3166 | int cr; /* consider resync */ | 3233 | int cr; /* consider resync */ |
| @@ -3195,10 +3262,10 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
| 3195 | real_peer_disk = D_DISKLESS; | 3262 | real_peer_disk = D_DISKLESS; |
| 3196 | } else { | 3263 | } else { |
| 3197 | if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags)) | 3264 | if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags)) |
| 3198 | return FALSE; | 3265 | return false; |
| 3199 | D_ASSERT(os.conn == C_WF_REPORT_PARAMS); | 3266 | D_ASSERT(os.conn == C_WF_REPORT_PARAMS); |
| 3200 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); | 3267 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); |
| 3201 | return FALSE; | 3268 | return false; |
| 3202 | } | 3269 | } |
| 3203 | } | 3270 | } |
| 3204 | } | 3271 | } |
| @@ -3223,7 +3290,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
| 3223 | drbd_uuid_new_current(mdev); | 3290 | drbd_uuid_new_current(mdev); |
| 3224 | clear_bit(NEW_CUR_UUID, &mdev->flags); | 3291 | clear_bit(NEW_CUR_UUID, &mdev->flags); |
| 3225 | drbd_force_state(mdev, NS2(conn, C_PROTOCOL_ERROR, susp, 0)); | 3292 | drbd_force_state(mdev, NS2(conn, C_PROTOCOL_ERROR, susp, 0)); |
| 3226 | return FALSE; | 3293 | return false; |
| 3227 | } | 3294 | } |
| 3228 | rv = _drbd_set_state(mdev, ns, cs_flags, NULL); | 3295 | rv = _drbd_set_state(mdev, ns, cs_flags, NULL); |
| 3229 | ns = mdev->state; | 3296 | ns = mdev->state; |
| @@ -3231,7 +3298,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
| 3231 | 3298 | ||
| 3232 | if (rv < SS_SUCCESS) { | 3299 | if (rv < SS_SUCCESS) { |
| 3233 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); | 3300 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); |
| 3234 | return FALSE; | 3301 | return false; |
| 3235 | } | 3302 | } |
| 3236 | 3303 | ||
| 3237 | if (os.conn > C_WF_REPORT_PARAMS) { | 3304 | if (os.conn > C_WF_REPORT_PARAMS) { |
| @@ -3249,7 +3316,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
| 3249 | 3316 | ||
| 3250 | drbd_md_sync(mdev); /* update connected indicator, la_size, ... */ | 3317 | drbd_md_sync(mdev); /* update connected indicator, la_size, ... */ |
| 3251 | 3318 | ||
| 3252 | return TRUE; | 3319 | return true; |
| 3253 | } | 3320 | } |
| 3254 | 3321 | ||
| 3255 | static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) | 3322 | static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) |
| @@ -3258,6 +3325,7 @@ static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packets cmd, unsi | |||
| 3258 | 3325 | ||
| 3259 | wait_event(mdev->misc_wait, | 3326 | wait_event(mdev->misc_wait, |
| 3260 | mdev->state.conn == C_WF_SYNC_UUID || | 3327 | mdev->state.conn == C_WF_SYNC_UUID || |
| 3328 | mdev->state.conn == C_BEHIND || | ||
| 3261 | mdev->state.conn < C_CONNECTED || | 3329 | mdev->state.conn < C_CONNECTED || |
| 3262 | mdev->state.disk < D_NEGOTIATING); | 3330 | mdev->state.disk < D_NEGOTIATING); |
| 3263 | 3331 | ||
| @@ -3269,32 +3337,42 @@ static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packets cmd, unsi | |||
| 3269 | _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid)); | 3337 | _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid)); |
| 3270 | _drbd_uuid_set(mdev, UI_BITMAP, 0UL); | 3338 | _drbd_uuid_set(mdev, UI_BITMAP, 0UL); |
| 3271 | 3339 | ||
| 3340 | drbd_print_uuids(mdev, "updated sync uuid"); | ||
| 3272 | drbd_start_resync(mdev, C_SYNC_TARGET); | 3341 | drbd_start_resync(mdev, C_SYNC_TARGET); |
| 3273 | 3342 | ||
| 3274 | put_ldev(mdev); | 3343 | put_ldev(mdev); |
| 3275 | } else | 3344 | } else |
| 3276 | dev_err(DEV, "Ignoring SyncUUID packet!\n"); | 3345 | dev_err(DEV, "Ignoring SyncUUID packet!\n"); |
| 3277 | 3346 | ||
| 3278 | return TRUE; | 3347 | return true; |
| 3279 | } | 3348 | } |
| 3280 | 3349 | ||
| 3281 | enum receive_bitmap_ret { OK, DONE, FAILED }; | 3350 | /** |
| 3282 | 3351 | * receive_bitmap_plain | |
| 3283 | static enum receive_bitmap_ret | 3352 | * |
| 3353 | * Return 0 when done, 1 when another iteration is needed, and a negative error | ||
| 3354 | * code upon failure. | ||
| 3355 | */ | ||
| 3356 | static int | ||
| 3284 | receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size, | 3357 | receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size, |
| 3285 | unsigned long *buffer, struct bm_xfer_ctx *c) | 3358 | unsigned long *buffer, struct bm_xfer_ctx *c) |
| 3286 | { | 3359 | { |
| 3287 | unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset); | 3360 | unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset); |
| 3288 | unsigned want = num_words * sizeof(long); | 3361 | unsigned want = num_words * sizeof(long); |
| 3362 | int err; | ||
| 3289 | 3363 | ||
| 3290 | if (want != data_size) { | 3364 | if (want != data_size) { |
| 3291 | dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size); | 3365 | dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size); |
| 3292 | return FAILED; | 3366 | return -EIO; |
| 3293 | } | 3367 | } |
| 3294 | if (want == 0) | 3368 | if (want == 0) |
| 3295 | return DONE; | 3369 | return 0; |
| 3296 | if (drbd_recv(mdev, buffer, want) != want) | 3370 | err = drbd_recv(mdev, buffer, want); |
| 3297 | return FAILED; | 3371 | if (err != want) { |
| 3372 | if (err >= 0) | ||
| 3373 | err = -EIO; | ||
| 3374 | return err; | ||
| 3375 | } | ||
| 3298 | 3376 | ||
| 3299 | drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer); | 3377 | drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer); |
| 3300 | 3378 | ||
| @@ -3303,10 +3381,16 @@ receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size, | |||
| 3303 | if (c->bit_offset > c->bm_bits) | 3381 | if (c->bit_offset > c->bm_bits) |
| 3304 | c->bit_offset = c->bm_bits; | 3382 | c->bit_offset = c->bm_bits; |
| 3305 | 3383 | ||
| 3306 | return OK; | 3384 | return 1; |
| 3307 | } | 3385 | } |
| 3308 | 3386 | ||
| 3309 | static enum receive_bitmap_ret | 3387 | /** |
| 3388 | * recv_bm_rle_bits | ||
| 3389 | * | ||
| 3390 | * Return 0 when done, 1 when another iteration is needed, and a negative error | ||
| 3391 | * code upon failure. | ||
| 3392 | */ | ||
| 3393 | static int | ||
| 3310 | recv_bm_rle_bits(struct drbd_conf *mdev, | 3394 | recv_bm_rle_bits(struct drbd_conf *mdev, |
| 3311 | struct p_compressed_bm *p, | 3395 | struct p_compressed_bm *p, |
| 3312 | struct bm_xfer_ctx *c) | 3396 | struct bm_xfer_ctx *c) |
| @@ -3326,18 +3410,18 @@ recv_bm_rle_bits(struct drbd_conf *mdev, | |||
| 3326 | 3410 | ||
| 3327 | bits = bitstream_get_bits(&bs, &look_ahead, 64); | 3411 | bits = bitstream_get_bits(&bs, &look_ahead, 64); |
| 3328 | if (bits < 0) | 3412 | if (bits < 0) |
| 3329 | return FAILED; | 3413 | return -EIO; |
| 3330 | 3414 | ||
| 3331 | for (have = bits; have > 0; s += rl, toggle = !toggle) { | 3415 | for (have = bits; have > 0; s += rl, toggle = !toggle) { |
| 3332 | bits = vli_decode_bits(&rl, look_ahead); | 3416 | bits = vli_decode_bits(&rl, look_ahead); |
| 3333 | if (bits <= 0) | 3417 | if (bits <= 0) |
| 3334 | return FAILED; | 3418 | return -EIO; |
| 3335 | 3419 | ||
| 3336 | if (toggle) { | 3420 | if (toggle) { |
| 3337 | e = s + rl -1; | 3421 | e = s + rl -1; |
| 3338 | if (e >= c->bm_bits) { | 3422 | if (e >= c->bm_bits) { |
| 3339 | dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e); | 3423 | dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e); |
| 3340 | return FAILED; | 3424 | return -EIO; |
| 3341 | } | 3425 | } |
| 3342 | _drbd_bm_set_bits(mdev, s, e); | 3426 | _drbd_bm_set_bits(mdev, s, e); |
| 3343 | } | 3427 | } |
| @@ -3347,14 +3431,14 @@ recv_bm_rle_bits(struct drbd_conf *mdev, | |||
| 3347 | have, bits, look_ahead, | 3431 | have, bits, look_ahead, |
| 3348 | (unsigned int)(bs.cur.b - p->code), | 3432 | (unsigned int)(bs.cur.b - p->code), |
| 3349 | (unsigned int)bs.buf_len); | 3433 | (unsigned int)bs.buf_len); |
| 3350 | return FAILED; | 3434 | return -EIO; |
| 3351 | } | 3435 | } |
| 3352 | look_ahead >>= bits; | 3436 | look_ahead >>= bits; |
| 3353 | have -= bits; | 3437 | have -= bits; |
| 3354 | 3438 | ||
| 3355 | bits = bitstream_get_bits(&bs, &tmp, 64 - have); | 3439 | bits = bitstream_get_bits(&bs, &tmp, 64 - have); |
| 3356 | if (bits < 0) | 3440 | if (bits < 0) |
| 3357 | return FAILED; | 3441 | return -EIO; |
| 3358 | look_ahead |= tmp << have; | 3442 | look_ahead |= tmp << have; |
| 3359 | have += bits; | 3443 | have += bits; |
| 3360 | } | 3444 | } |
| @@ -3362,10 +3446,16 @@ recv_bm_rle_bits(struct drbd_conf *mdev, | |||
| 3362 | c->bit_offset = s; | 3446 | c->bit_offset = s; |
| 3363 | bm_xfer_ctx_bit_to_word_offset(c); | 3447 | bm_xfer_ctx_bit_to_word_offset(c); |
| 3364 | 3448 | ||
| 3365 | return (s == c->bm_bits) ? DONE : OK; | 3449 | return (s != c->bm_bits); |
| 3366 | } | 3450 | } |
| 3367 | 3451 | ||
| 3368 | static enum receive_bitmap_ret | 3452 | /** |
| 3453 | * decode_bitmap_c | ||
| 3454 | * | ||
| 3455 | * Return 0 when done, 1 when another iteration is needed, and a negative error | ||
| 3456 | * code upon failure. | ||
| 3457 | */ | ||
| 3458 | static int | ||
| 3369 | decode_bitmap_c(struct drbd_conf *mdev, | 3459 | decode_bitmap_c(struct drbd_conf *mdev, |
| 3370 | struct p_compressed_bm *p, | 3460 | struct p_compressed_bm *p, |
| 3371 | struct bm_xfer_ctx *c) | 3461 | struct bm_xfer_ctx *c) |
| @@ -3379,7 +3469,7 @@ decode_bitmap_c(struct drbd_conf *mdev, | |||
| 3379 | 3469 | ||
| 3380 | dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding); | 3470 | dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding); |
| 3381 | drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); | 3471 | drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); |
| 3382 | return FAILED; | 3472 | return -EIO; |
| 3383 | } | 3473 | } |
| 3384 | 3474 | ||
| 3385 | void INFO_bm_xfer_stats(struct drbd_conf *mdev, | 3475 | void INFO_bm_xfer_stats(struct drbd_conf *mdev, |
| @@ -3428,13 +3518,13 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigne | |||
| 3428 | { | 3518 | { |
| 3429 | struct bm_xfer_ctx c; | 3519 | struct bm_xfer_ctx c; |
| 3430 | void *buffer; | 3520 | void *buffer; |
| 3431 | enum receive_bitmap_ret ret; | 3521 | int err; |
| 3432 | int ok = FALSE; | 3522 | int ok = false; |
| 3433 | struct p_header80 *h = &mdev->data.rbuf.header.h80; | 3523 | struct p_header80 *h = &mdev->data.rbuf.header.h80; |
| 3434 | 3524 | ||
| 3435 | wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt)); | 3525 | drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED); |
| 3436 | 3526 | /* you are supposed to send additional out-of-sync information | |
| 3437 | drbd_bm_lock(mdev, "receive bitmap"); | 3527 | * if you actually set bits during this phase */ |
| 3438 | 3528 | ||
| 3439 | /* maybe we should use some per thread scratch page, | 3529 | /* maybe we should use some per thread scratch page, |
| 3440 | * and allocate that during initial device creation? */ | 3530 | * and allocate that during initial device creation? */ |
| @@ -3449,9 +3539,9 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigne | |||
| 3449 | .bm_words = drbd_bm_words(mdev), | 3539 | .bm_words = drbd_bm_words(mdev), |
| 3450 | }; | 3540 | }; |
| 3451 | 3541 | ||
| 3452 | do { | 3542 | for(;;) { |
| 3453 | if (cmd == P_BITMAP) { | 3543 | if (cmd == P_BITMAP) { |
| 3454 | ret = receive_bitmap_plain(mdev, data_size, buffer, &c); | 3544 | err = receive_bitmap_plain(mdev, data_size, buffer, &c); |
| 3455 | } else if (cmd == P_COMPRESSED_BITMAP) { | 3545 | } else if (cmd == P_COMPRESSED_BITMAP) { |
| 3456 | /* MAYBE: sanity check that we speak proto >= 90, | 3546 | /* MAYBE: sanity check that we speak proto >= 90, |
| 3457 | * and the feature is enabled! */ | 3547 | * and the feature is enabled! */ |
| @@ -3468,9 +3558,9 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigne | |||
| 3468 | goto out; | 3558 | goto out; |
| 3469 | if (data_size <= (sizeof(*p) - sizeof(p->head))) { | 3559 | if (data_size <= (sizeof(*p) - sizeof(p->head))) { |
| 3470 | dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size); | 3560 | dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size); |
| 3471 | return FAILED; | 3561 | goto out; |
| 3472 | } | 3562 | } |
| 3473 | ret = decode_bitmap_c(mdev, p, &c); | 3563 | err = decode_bitmap_c(mdev, p, &c); |
| 3474 | } else { | 3564 | } else { |
| 3475 | dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd); | 3565 | dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd); |
| 3476 | goto out; | 3566 | goto out; |
| @@ -3479,24 +3569,26 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigne | |||
| 3479 | c.packets[cmd == P_BITMAP]++; | 3569 | c.packets[cmd == P_BITMAP]++; |
| 3480 | c.bytes[cmd == P_BITMAP] += sizeof(struct p_header80) + data_size; | 3570 | c.bytes[cmd == P_BITMAP] += sizeof(struct p_header80) + data_size; |
| 3481 | 3571 | ||
| 3482 | if (ret != OK) | 3572 | if (err <= 0) { |
| 3573 | if (err < 0) | ||
| 3574 | goto out; | ||
| 3483 | break; | 3575 | break; |
| 3484 | 3576 | } | |
| 3485 | if (!drbd_recv_header(mdev, &cmd, &data_size)) | 3577 | if (!drbd_recv_header(mdev, &cmd, &data_size)) |
| 3486 | goto out; | 3578 | goto out; |
| 3487 | } while (ret == OK); | 3579 | } |
| 3488 | if (ret == FAILED) | ||
| 3489 | goto out; | ||
| 3490 | 3580 | ||
| 3491 | INFO_bm_xfer_stats(mdev, "receive", &c); | 3581 | INFO_bm_xfer_stats(mdev, "receive", &c); |
| 3492 | 3582 | ||
| 3493 | if (mdev->state.conn == C_WF_BITMAP_T) { | 3583 | if (mdev->state.conn == C_WF_BITMAP_T) { |
| 3584 | enum drbd_state_rv rv; | ||
| 3585 | |||
| 3494 | ok = !drbd_send_bitmap(mdev); | 3586 | ok = !drbd_send_bitmap(mdev); |
| 3495 | if (!ok) | 3587 | if (!ok) |
| 3496 | goto out; | 3588 | goto out; |
| 3497 | /* Omit CS_ORDERED with this state transition to avoid deadlocks. */ | 3589 | /* Omit CS_ORDERED with this state transition to avoid deadlocks. */ |
| 3498 | ok = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE); | 3590 | rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE); |
| 3499 | D_ASSERT(ok == SS_SUCCESS); | 3591 | D_ASSERT(rv == SS_SUCCESS); |
| 3500 | } else if (mdev->state.conn != C_WF_BITMAP_S) { | 3592 | } else if (mdev->state.conn != C_WF_BITMAP_S) { |
| 3501 | /* admin may have requested C_DISCONNECTING, | 3593 | /* admin may have requested C_DISCONNECTING, |
| 3502 | * other threads may have noticed network errors */ | 3594 | * other threads may have noticed network errors */ |
| @@ -3504,7 +3596,7 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigne | |||
| 3504 | drbd_conn_str(mdev->state.conn)); | 3596 | drbd_conn_str(mdev->state.conn)); |
| 3505 | } | 3597 | } |
| 3506 | 3598 | ||
| 3507 | ok = TRUE; | 3599 | ok = true; |
| 3508 | out: | 3600 | out: |
| 3509 | drbd_bm_unlock(mdev); | 3601 | drbd_bm_unlock(mdev); |
| 3510 | if (ok && mdev->state.conn == C_WF_BITMAP_S) | 3602 | if (ok && mdev->state.conn == C_WF_BITMAP_S) |
| @@ -3538,7 +3630,26 @@ static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packets cmd, u | |||
| 3538 | * with the data requests being unplugged */ | 3630 | * with the data requests being unplugged */ |
| 3539 | drbd_tcp_quickack(mdev->data.socket); | 3631 | drbd_tcp_quickack(mdev->data.socket); |
| 3540 | 3632 | ||
| 3541 | return TRUE; | 3633 | return true; |
| 3634 | } | ||
| 3635 | |||
| 3636 | static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) | ||
| 3637 | { | ||
| 3638 | struct p_block_desc *p = &mdev->data.rbuf.block_desc; | ||
| 3639 | |||
| 3640 | switch (mdev->state.conn) { | ||
| 3641 | case C_WF_SYNC_UUID: | ||
| 3642 | case C_WF_BITMAP_T: | ||
| 3643 | case C_BEHIND: | ||
| 3644 | break; | ||
| 3645 | default: | ||
| 3646 | dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n", | ||
| 3647 | drbd_conn_str(mdev->state.conn)); | ||
| 3648 | } | ||
| 3649 | |||
| 3650 | drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize)); | ||
| 3651 | |||
| 3652 | return true; | ||
| 3542 | } | 3653 | } |
| 3543 | 3654 | ||
| 3544 | typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packets cmd, unsigned int to_receive); | 3655 | typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packets cmd, unsigned int to_receive); |
| @@ -3571,6 +3682,7 @@ static struct data_cmd drbd_cmd_handler[] = { | |||
| 3571 | [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest }, | 3682 | [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest }, |
| 3572 | [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest }, | 3683 | [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest }, |
| 3573 | [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip }, | 3684 | [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip }, |
| 3685 | [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync }, | ||
| 3574 | /* anything missing from this table is in | 3686 | /* anything missing from this table is in |
| 3575 | * the asender_tbl, see get_asender_cmd */ | 3687 | * the asender_tbl, see get_asender_cmd */ |
| 3576 | [P_MAX_CMD] = { 0, 0, NULL }, | 3688 | [P_MAX_CMD] = { 0, 0, NULL }, |
| @@ -3610,7 +3722,8 @@ static void drbdd(struct drbd_conf *mdev) | |||
| 3610 | if (shs) { | 3722 | if (shs) { |
| 3611 | rv = drbd_recv(mdev, &header->h80.payload, shs); | 3723 | rv = drbd_recv(mdev, &header->h80.payload, shs); |
| 3612 | if (unlikely(rv != shs)) { | 3724 | if (unlikely(rv != shs)) { |
| 3613 | dev_err(DEV, "short read while reading sub header: rv=%d\n", rv); | 3725 | if (!signal_pending(current)) |
| 3726 | dev_warn(DEV, "short read while reading sub header: rv=%d\n", rv); | ||
| 3614 | goto err_out; | 3727 | goto err_out; |
| 3615 | } | 3728 | } |
| 3616 | } | 3729 | } |
| @@ -3682,9 +3795,6 @@ static void drbd_disconnect(struct drbd_conf *mdev) | |||
| 3682 | 3795 | ||
| 3683 | if (mdev->state.conn == C_STANDALONE) | 3796 | if (mdev->state.conn == C_STANDALONE) |
| 3684 | return; | 3797 | return; |
| 3685 | if (mdev->state.conn >= C_WF_CONNECTION) | ||
| 3686 | dev_err(DEV, "ASSERT FAILED cstate = %s, expected < WFConnection\n", | ||
| 3687 | drbd_conn_str(mdev->state.conn)); | ||
| 3688 | 3798 | ||
| 3689 | /* asender does not clean up anything. it must not interfere, either */ | 3799 | /* asender does not clean up anything. it must not interfere, either */ |
| 3690 | drbd_thread_stop(&mdev->asender); | 3800 | drbd_thread_stop(&mdev->asender); |
| @@ -3713,6 +3823,8 @@ static void drbd_disconnect(struct drbd_conf *mdev) | |||
| 3713 | atomic_set(&mdev->rs_pending_cnt, 0); | 3823 | atomic_set(&mdev->rs_pending_cnt, 0); |
| 3714 | wake_up(&mdev->misc_wait); | 3824 | wake_up(&mdev->misc_wait); |
| 3715 | 3825 | ||
| 3826 | del_timer(&mdev->request_timer); | ||
| 3827 | |||
| 3716 | /* make sure syncer is stopped and w_resume_next_sg queued */ | 3828 | /* make sure syncer is stopped and w_resume_next_sg queued */ |
| 3717 | del_timer_sync(&mdev->resync_timer); | 3829 | del_timer_sync(&mdev->resync_timer); |
| 3718 | resync_timer_fn((unsigned long)mdev); | 3830 | resync_timer_fn((unsigned long)mdev); |
| @@ -3758,13 +3870,6 @@ static void drbd_disconnect(struct drbd_conf *mdev) | |||
| 3758 | if (os.conn == C_DISCONNECTING) { | 3870 | if (os.conn == C_DISCONNECTING) { |
| 3759 | wait_event(mdev->net_cnt_wait, atomic_read(&mdev->net_cnt) == 0); | 3871 | wait_event(mdev->net_cnt_wait, atomic_read(&mdev->net_cnt) == 0); |
| 3760 | 3872 | ||
| 3761 | if (!is_susp(mdev->state)) { | ||
| 3762 | /* we must not free the tl_hash | ||
| 3763 | * while application io is still on the fly */ | ||
| 3764 | wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt)); | ||
| 3765 | drbd_free_tl_hash(mdev); | ||
| 3766 | } | ||
| 3767 | |||
| 3768 | crypto_free_hash(mdev->cram_hmac_tfm); | 3873 | crypto_free_hash(mdev->cram_hmac_tfm); |
| 3769 | mdev->cram_hmac_tfm = NULL; | 3874 | mdev->cram_hmac_tfm = NULL; |
| 3770 | 3875 | ||
| @@ -3773,6 +3878,10 @@ static void drbd_disconnect(struct drbd_conf *mdev) | |||
| 3773 | drbd_request_state(mdev, NS(conn, C_STANDALONE)); | 3878 | drbd_request_state(mdev, NS(conn, C_STANDALONE)); |
| 3774 | } | 3879 | } |
| 3775 | 3880 | ||
| 3881 | /* serialize with bitmap writeout triggered by the state change, | ||
| 3882 | * if any. */ | ||
| 3883 | wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); | ||
| 3884 | |||
| 3776 | /* tcp_close and release of sendpage pages can be deferred. I don't | 3885 | /* tcp_close and release of sendpage pages can be deferred. I don't |
| 3777 | * want to use SO_LINGER, because apparently it can be deferred for | 3886 | * want to use SO_LINGER, because apparently it can be deferred for |
| 3778 | * more than 20 seconds (longest time I checked). | 3887 | * more than 20 seconds (longest time I checked). |
| @@ -3873,7 +3982,8 @@ static int drbd_do_handshake(struct drbd_conf *mdev) | |||
| 3873 | rv = drbd_recv(mdev, &p->head.payload, expect); | 3982 | rv = drbd_recv(mdev, &p->head.payload, expect); |
| 3874 | 3983 | ||
| 3875 | if (rv != expect) { | 3984 | if (rv != expect) { |
| 3876 | dev_err(DEV, "short read receiving handshake packet: l=%u\n", rv); | 3985 | if (!signal_pending(current)) |
| 3986 | dev_warn(DEV, "short read receiving handshake packet: l=%u\n", rv); | ||
| 3877 | return 0; | 3987 | return 0; |
| 3878 | } | 3988 | } |
| 3879 | 3989 | ||
| @@ -3975,7 +4085,8 @@ static int drbd_do_auth(struct drbd_conf *mdev) | |||
| 3975 | rv = drbd_recv(mdev, peers_ch, length); | 4085 | rv = drbd_recv(mdev, peers_ch, length); |
| 3976 | 4086 | ||
| 3977 | if (rv != length) { | 4087 | if (rv != length) { |
| 3978 | dev_err(DEV, "short read AuthChallenge: l=%u\n", rv); | 4088 | if (!signal_pending(current)) |
| 4089 | dev_warn(DEV, "short read AuthChallenge: l=%u\n", rv); | ||
| 3979 | rv = 0; | 4090 | rv = 0; |
| 3980 | goto fail; | 4091 | goto fail; |
| 3981 | } | 4092 | } |
| @@ -4022,7 +4133,8 @@ static int drbd_do_auth(struct drbd_conf *mdev) | |||
| 4022 | rv = drbd_recv(mdev, response , resp_size); | 4133 | rv = drbd_recv(mdev, response , resp_size); |
| 4023 | 4134 | ||
| 4024 | if (rv != resp_size) { | 4135 | if (rv != resp_size) { |
| 4025 | dev_err(DEV, "short read receiving AuthResponse: l=%u\n", rv); | 4136 | if (!signal_pending(current)) |
| 4137 | dev_warn(DEV, "short read receiving AuthResponse: l=%u\n", rv); | ||
| 4026 | rv = 0; | 4138 | rv = 0; |
| 4027 | goto fail; | 4139 | goto fail; |
| 4028 | } | 4140 | } |
| @@ -4074,8 +4186,7 @@ int drbdd_init(struct drbd_thread *thi) | |||
| 4074 | h = drbd_connect(mdev); | 4186 | h = drbd_connect(mdev); |
| 4075 | if (h == 0) { | 4187 | if (h == 0) { |
| 4076 | drbd_disconnect(mdev); | 4188 | drbd_disconnect(mdev); |
| 4077 | __set_current_state(TASK_INTERRUPTIBLE); | 4189 | schedule_timeout_interruptible(HZ); |
| 4078 | schedule_timeout(HZ); | ||
| 4079 | } | 4190 | } |
| 4080 | if (h == -1) { | 4191 | if (h == -1) { |
| 4081 | dev_warn(DEV, "Discarding network configuration.\n"); | 4192 | dev_warn(DEV, "Discarding network configuration.\n"); |
| @@ -4113,7 +4224,7 @@ static int got_RqSReply(struct drbd_conf *mdev, struct p_header80 *h) | |||
| 4113 | } | 4224 | } |
| 4114 | wake_up(&mdev->state_wait); | 4225 | wake_up(&mdev->state_wait); |
| 4115 | 4226 | ||
| 4116 | return TRUE; | 4227 | return true; |
| 4117 | } | 4228 | } |
| 4118 | 4229 | ||
| 4119 | static int got_Ping(struct drbd_conf *mdev, struct p_header80 *h) | 4230 | static int got_Ping(struct drbd_conf *mdev, struct p_header80 *h) |
| @@ -4129,7 +4240,7 @@ static int got_PingAck(struct drbd_conf *mdev, struct p_header80 *h) | |||
| 4129 | if (!test_and_set_bit(GOT_PING_ACK, &mdev->flags)) | 4240 | if (!test_and_set_bit(GOT_PING_ACK, &mdev->flags)) |
| 4130 | wake_up(&mdev->misc_wait); | 4241 | wake_up(&mdev->misc_wait); |
| 4131 | 4242 | ||
| 4132 | return TRUE; | 4243 | return true; |
| 4133 | } | 4244 | } |
| 4134 | 4245 | ||
| 4135 | static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h) | 4246 | static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h) |
| @@ -4152,7 +4263,7 @@ static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h) | |||
| 4152 | dec_rs_pending(mdev); | 4263 | dec_rs_pending(mdev); |
| 4153 | atomic_add(blksize >> 9, &mdev->rs_sect_in); | 4264 | atomic_add(blksize >> 9, &mdev->rs_sect_in); |
| 4154 | 4265 | ||
| 4155 | return TRUE; | 4266 | return true; |
| 4156 | } | 4267 | } |
| 4157 | 4268 | ||
| 4158 | /* when we receive the ACK for a write request, | 4269 | /* when we receive the ACK for a write request, |
| @@ -4176,8 +4287,6 @@ static struct drbd_request *_ack_id_to_req(struct drbd_conf *mdev, | |||
| 4176 | return req; | 4287 | return req; |
| 4177 | } | 4288 | } |
| 4178 | } | 4289 | } |
| 4179 | dev_err(DEV, "_ack_id_to_req: failed to find req %p, sector %llus in list\n", | ||
| 4180 | (void *)(unsigned long)id, (unsigned long long)sector); | ||
| 4181 | return NULL; | 4290 | return NULL; |
| 4182 | } | 4291 | } |
| 4183 | 4292 | ||
| @@ -4195,15 +4304,17 @@ static int validate_req_change_req_state(struct drbd_conf *mdev, | |||
| 4195 | req = validator(mdev, id, sector); | 4304 | req = validator(mdev, id, sector); |
| 4196 | if (unlikely(!req)) { | 4305 | if (unlikely(!req)) { |
| 4197 | spin_unlock_irq(&mdev->req_lock); | 4306 | spin_unlock_irq(&mdev->req_lock); |
| 4198 | dev_err(DEV, "%s: got a corrupt block_id/sector pair\n", func); | 4307 | |
| 4199 | return FALSE; | 4308 | dev_err(DEV, "%s: failed to find req %p, sector %llus\n", func, |
| 4309 | (void *)(unsigned long)id, (unsigned long long)sector); | ||
| 4310 | return false; | ||
| 4200 | } | 4311 | } |
| 4201 | __req_mod(req, what, &m); | 4312 | __req_mod(req, what, &m); |
| 4202 | spin_unlock_irq(&mdev->req_lock); | 4313 | spin_unlock_irq(&mdev->req_lock); |
| 4203 | 4314 | ||
| 4204 | if (m.bio) | 4315 | if (m.bio) |
| 4205 | complete_master_bio(mdev, &m); | 4316 | complete_master_bio(mdev, &m); |
| 4206 | return TRUE; | 4317 | return true; |
| 4207 | } | 4318 | } |
| 4208 | 4319 | ||
| 4209 | static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) | 4320 | static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) |
| @@ -4218,7 +4329,7 @@ static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) | |||
| 4218 | if (is_syncer_block_id(p->block_id)) { | 4329 | if (is_syncer_block_id(p->block_id)) { |
| 4219 | drbd_set_in_sync(mdev, sector, blksize); | 4330 | drbd_set_in_sync(mdev, sector, blksize); |
| 4220 | dec_rs_pending(mdev); | 4331 | dec_rs_pending(mdev); |
| 4221 | return TRUE; | 4332 | return true; |
| 4222 | } | 4333 | } |
| 4223 | switch (be16_to_cpu(h->command)) { | 4334 | switch (be16_to_cpu(h->command)) { |
| 4224 | case P_RS_WRITE_ACK: | 4335 | case P_RS_WRITE_ACK: |
| @@ -4239,7 +4350,7 @@ static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) | |||
| 4239 | break; | 4350 | break; |
| 4240 | default: | 4351 | default: |
| 4241 | D_ASSERT(0); | 4352 | D_ASSERT(0); |
| 4242 | return FALSE; | 4353 | return false; |
| 4243 | } | 4354 | } |
| 4244 | 4355 | ||
| 4245 | return validate_req_change_req_state(mdev, p->block_id, sector, | 4356 | return validate_req_change_req_state(mdev, p->block_id, sector, |
| @@ -4250,20 +4361,44 @@ static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) | |||
| 4250 | { | 4361 | { |
| 4251 | struct p_block_ack *p = (struct p_block_ack *)h; | 4362 | struct p_block_ack *p = (struct p_block_ack *)h; |
| 4252 | sector_t sector = be64_to_cpu(p->sector); | 4363 | sector_t sector = be64_to_cpu(p->sector); |
| 4253 | 4364 | int size = be32_to_cpu(p->blksize); | |
| 4254 | if (__ratelimit(&drbd_ratelimit_state)) | 4365 | struct drbd_request *req; |
| 4255 | dev_warn(DEV, "Got NegAck packet. Peer is in troubles?\n"); | 4366 | struct bio_and_error m; |
| 4256 | 4367 | ||
| 4257 | update_peer_seq(mdev, be32_to_cpu(p->seq_num)); | 4368 | update_peer_seq(mdev, be32_to_cpu(p->seq_num)); |
| 4258 | 4369 | ||
| 4259 | if (is_syncer_block_id(p->block_id)) { | 4370 | if (is_syncer_block_id(p->block_id)) { |
| 4260 | int size = be32_to_cpu(p->blksize); | ||
| 4261 | dec_rs_pending(mdev); | 4371 | dec_rs_pending(mdev); |
| 4262 | drbd_rs_failed_io(mdev, sector, size); | 4372 | drbd_rs_failed_io(mdev, sector, size); |
| 4263 | return TRUE; | 4373 | return true; |
| 4264 | } | 4374 | } |
| 4265 | return validate_req_change_req_state(mdev, p->block_id, sector, | 4375 | |
| 4266 | _ack_id_to_req, __func__ , neg_acked); | 4376 | spin_lock_irq(&mdev->req_lock); |
| 4377 | req = _ack_id_to_req(mdev, p->block_id, sector); | ||
| 4378 | if (!req) { | ||
| 4379 | spin_unlock_irq(&mdev->req_lock); | ||
| 4380 | if (mdev->net_conf->wire_protocol == DRBD_PROT_A || | ||
| 4381 | mdev->net_conf->wire_protocol == DRBD_PROT_B) { | ||
| 4382 | /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs. | ||
| 4383 | The master bio might already be completed, therefore the | ||
| 4384 | request is no longer in the collision hash. | ||
| 4385 | => Do not try to validate block_id as request. */ | ||
| 4386 | /* In Protocol B we might already have got a P_RECV_ACK | ||
| 4387 | but then get a P_NEG_ACK after wards. */ | ||
| 4388 | drbd_set_out_of_sync(mdev, sector, size); | ||
| 4389 | return true; | ||
| 4390 | } else { | ||
| 4391 | dev_err(DEV, "%s: failed to find req %p, sector %llus\n", __func__, | ||
| 4392 | (void *)(unsigned long)p->block_id, (unsigned long long)sector); | ||
| 4393 | return false; | ||
| 4394 | } | ||
| 4395 | } | ||
| 4396 | __req_mod(req, neg_acked, &m); | ||
| 4397 | spin_unlock_irq(&mdev->req_lock); | ||
| 4398 | |||
| 4399 | if (m.bio) | ||
| 4400 | complete_master_bio(mdev, &m); | ||
| 4401 | return true; | ||
| 4267 | } | 4402 | } |
| 4268 | 4403 | ||
| 4269 | static int got_NegDReply(struct drbd_conf *mdev, struct p_header80 *h) | 4404 | static int got_NegDReply(struct drbd_conf *mdev, struct p_header80 *h) |
| @@ -4294,11 +4429,20 @@ static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header80 *h) | |||
| 4294 | 4429 | ||
| 4295 | if (get_ldev_if_state(mdev, D_FAILED)) { | 4430 | if (get_ldev_if_state(mdev, D_FAILED)) { |
| 4296 | drbd_rs_complete_io(mdev, sector); | 4431 | drbd_rs_complete_io(mdev, sector); |
| 4297 | drbd_rs_failed_io(mdev, sector, size); | 4432 | switch (be16_to_cpu(h->command)) { |
| 4433 | case P_NEG_RS_DREPLY: | ||
| 4434 | drbd_rs_failed_io(mdev, sector, size); | ||
| 4435 | case P_RS_CANCEL: | ||
| 4436 | break; | ||
| 4437 | default: | ||
| 4438 | D_ASSERT(0); | ||
| 4439 | put_ldev(mdev); | ||
| 4440 | return false; | ||
| 4441 | } | ||
| 4298 | put_ldev(mdev); | 4442 | put_ldev(mdev); |
| 4299 | } | 4443 | } |
| 4300 | 4444 | ||
| 4301 | return TRUE; | 4445 | return true; |
| 4302 | } | 4446 | } |
| 4303 | 4447 | ||
| 4304 | static int got_BarrierAck(struct drbd_conf *mdev, struct p_header80 *h) | 4448 | static int got_BarrierAck(struct drbd_conf *mdev, struct p_header80 *h) |
| @@ -4307,7 +4451,14 @@ static int got_BarrierAck(struct drbd_conf *mdev, struct p_header80 *h) | |||
| 4307 | 4451 | ||
| 4308 | tl_release(mdev, p->barrier, be32_to_cpu(p->set_size)); | 4452 | tl_release(mdev, p->barrier, be32_to_cpu(p->set_size)); |
| 4309 | 4453 | ||
| 4310 | return TRUE; | 4454 | if (mdev->state.conn == C_AHEAD && |
| 4455 | atomic_read(&mdev->ap_in_flight) == 0 && | ||
| 4456 | !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags)) { | ||
| 4457 | mdev->start_resync_timer.expires = jiffies + HZ; | ||
| 4458 | add_timer(&mdev->start_resync_timer); | ||
| 4459 | } | ||
| 4460 | |||
| 4461 | return true; | ||
| 4311 | } | 4462 | } |
| 4312 | 4463 | ||
| 4313 | static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h) | 4464 | static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h) |
| @@ -4328,12 +4479,18 @@ static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h) | |||
| 4328 | ov_oos_print(mdev); | 4479 | ov_oos_print(mdev); |
| 4329 | 4480 | ||
| 4330 | if (!get_ldev(mdev)) | 4481 | if (!get_ldev(mdev)) |
| 4331 | return TRUE; | 4482 | return true; |
| 4332 | 4483 | ||
| 4333 | drbd_rs_complete_io(mdev, sector); | 4484 | drbd_rs_complete_io(mdev, sector); |
| 4334 | dec_rs_pending(mdev); | 4485 | dec_rs_pending(mdev); |
| 4335 | 4486 | ||
| 4336 | if (--mdev->ov_left == 0) { | 4487 | --mdev->ov_left; |
| 4488 | |||
| 4489 | /* let's advance progress step marks only for every other megabyte */ | ||
| 4490 | if ((mdev->ov_left & 0x200) == 0x200) | ||
| 4491 | drbd_advance_rs_marks(mdev, mdev->ov_left); | ||
| 4492 | |||
| 4493 | if (mdev->ov_left == 0) { | ||
| 4337 | w = kmalloc(sizeof(*w), GFP_NOIO); | 4494 | w = kmalloc(sizeof(*w), GFP_NOIO); |
| 4338 | if (w) { | 4495 | if (w) { |
| 4339 | w->cb = w_ov_finished; | 4496 | w->cb = w_ov_finished; |
| @@ -4345,12 +4502,12 @@ static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h) | |||
| 4345 | } | 4502 | } |
| 4346 | } | 4503 | } |
| 4347 | put_ldev(mdev); | 4504 | put_ldev(mdev); |
| 4348 | return TRUE; | 4505 | return true; |
| 4349 | } | 4506 | } |
| 4350 | 4507 | ||
| 4351 | static int got_skip(struct drbd_conf *mdev, struct p_header80 *h) | 4508 | static int got_skip(struct drbd_conf *mdev, struct p_header80 *h) |
| 4352 | { | 4509 | { |
| 4353 | return TRUE; | 4510 | return true; |
| 4354 | } | 4511 | } |
| 4355 | 4512 | ||
| 4356 | struct asender_cmd { | 4513 | struct asender_cmd { |
| @@ -4378,6 +4535,7 @@ static struct asender_cmd *get_asender_cmd(int cmd) | |||
| 4378 | [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply }, | 4535 | [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply }, |
| 4379 | [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync }, | 4536 | [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync }, |
| 4380 | [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip }, | 4537 | [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip }, |
| 4538 | [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply}, | ||
| 4381 | [P_MAX_CMD] = { 0, NULL }, | 4539 | [P_MAX_CMD] = { 0, NULL }, |
| 4382 | }; | 4540 | }; |
| 4383 | if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL) | 4541 | if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL) |
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index ad3fc6228f27..5c0c8be1bb0a 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c | |||
| @@ -140,9 +140,14 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, | |||
| 140 | struct hlist_node *n; | 140 | struct hlist_node *n; |
| 141 | struct hlist_head *slot; | 141 | struct hlist_head *slot; |
| 142 | 142 | ||
| 143 | /* before we can signal completion to the upper layers, | 143 | /* Before we can signal completion to the upper layers, |
| 144 | * we may need to close the current epoch */ | 144 | * we may need to close the current epoch. |
| 145 | * We can skip this, if this request has not even been sent, because we | ||
| 146 | * did not have a fully established connection yet/anymore, during | ||
| 147 | * bitmap exchange, or while we are C_AHEAD due to congestion policy. | ||
| 148 | */ | ||
| 145 | if (mdev->state.conn >= C_CONNECTED && | 149 | if (mdev->state.conn >= C_CONNECTED && |
| 150 | (s & RQ_NET_SENT) != 0 && | ||
| 146 | req->epoch == mdev->newest_tle->br_number) | 151 | req->epoch == mdev->newest_tle->br_number) |
| 147 | queue_barrier(mdev); | 152 | queue_barrier(mdev); |
| 148 | 153 | ||
| @@ -440,7 +445,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
| 440 | req->rq_state |= RQ_LOCAL_COMPLETED; | 445 | req->rq_state |= RQ_LOCAL_COMPLETED; |
| 441 | req->rq_state &= ~RQ_LOCAL_PENDING; | 446 | req->rq_state &= ~RQ_LOCAL_PENDING; |
| 442 | 447 | ||
| 443 | __drbd_chk_io_error(mdev, FALSE); | 448 | __drbd_chk_io_error(mdev, false); |
| 444 | _req_may_be_done_not_susp(req, m); | 449 | _req_may_be_done_not_susp(req, m); |
| 445 | put_ldev(mdev); | 450 | put_ldev(mdev); |
| 446 | break; | 451 | break; |
| @@ -461,7 +466,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
| 461 | 466 | ||
| 462 | D_ASSERT(!(req->rq_state & RQ_NET_MASK)); | 467 | D_ASSERT(!(req->rq_state & RQ_NET_MASK)); |
| 463 | 468 | ||
| 464 | __drbd_chk_io_error(mdev, FALSE); | 469 | __drbd_chk_io_error(mdev, false); |
| 465 | put_ldev(mdev); | 470 | put_ldev(mdev); |
| 466 | 471 | ||
| 467 | /* no point in retrying if there is no good remote data, | 472 | /* no point in retrying if there is no good remote data, |
| @@ -545,6 +550,14 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
| 545 | 550 | ||
| 546 | break; | 551 | break; |
| 547 | 552 | ||
| 553 | case queue_for_send_oos: | ||
| 554 | req->rq_state |= RQ_NET_QUEUED; | ||
| 555 | req->w.cb = w_send_oos; | ||
| 556 | drbd_queue_work(&mdev->data.work, &req->w); | ||
| 557 | break; | ||
| 558 | |||
| 559 | case oos_handed_to_network: | ||
| 560 | /* actually the same */ | ||
| 548 | case send_canceled: | 561 | case send_canceled: |
| 549 | /* treat it the same */ | 562 | /* treat it the same */ |
| 550 | case send_failed: | 563 | case send_failed: |
| @@ -558,6 +571,9 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
| 558 | 571 | ||
| 559 | case handed_over_to_network: | 572 | case handed_over_to_network: |
| 560 | /* assert something? */ | 573 | /* assert something? */ |
| 574 | if (bio_data_dir(req->master_bio) == WRITE) | ||
| 575 | atomic_add(req->size>>9, &mdev->ap_in_flight); | ||
| 576 | |||
| 561 | if (bio_data_dir(req->master_bio) == WRITE && | 577 | if (bio_data_dir(req->master_bio) == WRITE && |
| 562 | mdev->net_conf->wire_protocol == DRBD_PROT_A) { | 578 | mdev->net_conf->wire_protocol == DRBD_PROT_A) { |
| 563 | /* this is what is dangerous about protocol A: | 579 | /* this is what is dangerous about protocol A: |
| @@ -591,6 +607,9 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
| 591 | dec_ap_pending(mdev); | 607 | dec_ap_pending(mdev); |
| 592 | req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING); | 608 | req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING); |
| 593 | req->rq_state |= RQ_NET_DONE; | 609 | req->rq_state |= RQ_NET_DONE; |
| 610 | if (req->rq_state & RQ_NET_SENT && req->rq_state & RQ_WRITE) | ||
| 611 | atomic_sub(req->size>>9, &mdev->ap_in_flight); | ||
| 612 | |||
| 594 | /* if it is still queued, we may not complete it here. | 613 | /* if it is still queued, we may not complete it here. |
| 595 | * it will be canceled soon. */ | 614 | * it will be canceled soon. */ |
| 596 | if (!(req->rq_state & RQ_NET_QUEUED)) | 615 | if (!(req->rq_state & RQ_NET_QUEUED)) |
| @@ -628,14 +647,17 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
| 628 | req->rq_state |= RQ_NET_OK; | 647 | req->rq_state |= RQ_NET_OK; |
| 629 | D_ASSERT(req->rq_state & RQ_NET_PENDING); | 648 | D_ASSERT(req->rq_state & RQ_NET_PENDING); |
| 630 | dec_ap_pending(mdev); | 649 | dec_ap_pending(mdev); |
| 650 | atomic_sub(req->size>>9, &mdev->ap_in_flight); | ||
| 631 | req->rq_state &= ~RQ_NET_PENDING; | 651 | req->rq_state &= ~RQ_NET_PENDING; |
| 632 | _req_may_be_done_not_susp(req, m); | 652 | _req_may_be_done_not_susp(req, m); |
| 633 | break; | 653 | break; |
| 634 | 654 | ||
| 635 | case neg_acked: | 655 | case neg_acked: |
| 636 | /* assert something? */ | 656 | /* assert something? */ |
| 637 | if (req->rq_state & RQ_NET_PENDING) | 657 | if (req->rq_state & RQ_NET_PENDING) { |
| 638 | dec_ap_pending(mdev); | 658 | dec_ap_pending(mdev); |
| 659 | atomic_sub(req->size>>9, &mdev->ap_in_flight); | ||
| 660 | } | ||
| 639 | req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING); | 661 | req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING); |
| 640 | 662 | ||
| 641 | req->rq_state |= RQ_NET_DONE; | 663 | req->rq_state |= RQ_NET_DONE; |
| @@ -690,8 +712,11 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
| 690 | dev_err(DEV, "FIXME (barrier_acked but pending)\n"); | 712 | dev_err(DEV, "FIXME (barrier_acked but pending)\n"); |
| 691 | list_move(&req->tl_requests, &mdev->out_of_sequence_requests); | 713 | list_move(&req->tl_requests, &mdev->out_of_sequence_requests); |
| 692 | } | 714 | } |
| 693 | D_ASSERT(req->rq_state & RQ_NET_SENT); | 715 | if ((req->rq_state & RQ_NET_MASK) != 0) { |
| 694 | req->rq_state |= RQ_NET_DONE; | 716 | req->rq_state |= RQ_NET_DONE; |
| 717 | if (mdev->net_conf->wire_protocol == DRBD_PROT_A) | ||
| 718 | atomic_sub(req->size>>9, &mdev->ap_in_flight); | ||
| 719 | } | ||
| 695 | _req_may_be_done(req, m); /* Allowed while state.susp */ | 720 | _req_may_be_done(req, m); /* Allowed while state.susp */ |
| 696 | break; | 721 | break; |
| 697 | 722 | ||
| @@ -738,14 +763,14 @@ static int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int s | |||
| 738 | return 0 == drbd_bm_count_bits(mdev, sbnr, ebnr); | 763 | return 0 == drbd_bm_count_bits(mdev, sbnr, ebnr); |
| 739 | } | 764 | } |
| 740 | 765 | ||
| 741 | static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) | 766 | static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time) |
| 742 | { | 767 | { |
| 743 | const int rw = bio_rw(bio); | 768 | const int rw = bio_rw(bio); |
| 744 | const int size = bio->bi_size; | 769 | const int size = bio->bi_size; |
| 745 | const sector_t sector = bio->bi_sector; | 770 | const sector_t sector = bio->bi_sector; |
| 746 | struct drbd_tl_epoch *b = NULL; | 771 | struct drbd_tl_epoch *b = NULL; |
| 747 | struct drbd_request *req; | 772 | struct drbd_request *req; |
| 748 | int local, remote; | 773 | int local, remote, send_oos = 0; |
| 749 | int err = -EIO; | 774 | int err = -EIO; |
| 750 | int ret = 0; | 775 | int ret = 0; |
| 751 | 776 | ||
| @@ -759,6 +784,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) | |||
| 759 | bio_endio(bio, -ENOMEM); | 784 | bio_endio(bio, -ENOMEM); |
| 760 | return 0; | 785 | return 0; |
| 761 | } | 786 | } |
| 787 | req->start_time = start_time; | ||
| 762 | 788 | ||
| 763 | local = get_ldev(mdev); | 789 | local = get_ldev(mdev); |
| 764 | if (!local) { | 790 | if (!local) { |
| @@ -808,9 +834,9 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) | |||
| 808 | drbd_al_begin_io(mdev, sector); | 834 | drbd_al_begin_io(mdev, sector); |
| 809 | } | 835 | } |
| 810 | 836 | ||
| 811 | remote = remote && (mdev->state.pdsk == D_UP_TO_DATE || | 837 | remote = remote && drbd_should_do_remote(mdev->state); |
| 812 | (mdev->state.pdsk == D_INCONSISTENT && | 838 | send_oos = rw == WRITE && drbd_should_send_oos(mdev->state); |
| 813 | mdev->state.conn >= C_CONNECTED)); | 839 | D_ASSERT(!(remote && send_oos)); |
| 814 | 840 | ||
| 815 | if (!(local || remote) && !is_susp(mdev->state)) { | 841 | if (!(local || remote) && !is_susp(mdev->state)) { |
| 816 | if (__ratelimit(&drbd_ratelimit_state)) | 842 | if (__ratelimit(&drbd_ratelimit_state)) |
| @@ -824,7 +850,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) | |||
| 824 | * but there is a race between testing the bit and pointer outside the | 850 | * but there is a race between testing the bit and pointer outside the |
| 825 | * spinlock, and grabbing the spinlock. | 851 | * spinlock, and grabbing the spinlock. |
| 826 | * if we lost that race, we retry. */ | 852 | * if we lost that race, we retry. */ |
| 827 | if (rw == WRITE && remote && | 853 | if (rw == WRITE && (remote || send_oos) && |
| 828 | mdev->unused_spare_tle == NULL && | 854 | mdev->unused_spare_tle == NULL && |
| 829 | test_bit(CREATE_BARRIER, &mdev->flags)) { | 855 | test_bit(CREATE_BARRIER, &mdev->flags)) { |
| 830 | allocate_barrier: | 856 | allocate_barrier: |
| @@ -842,18 +868,19 @@ allocate_barrier: | |||
| 842 | if (is_susp(mdev->state)) { | 868 | if (is_susp(mdev->state)) { |
| 843 | /* If we got suspended, use the retry mechanism of | 869 | /* If we got suspended, use the retry mechanism of |
| 844 | generic_make_request() to restart processing of this | 870 | generic_make_request() to restart processing of this |
| 845 | bio. In the next call to drbd_make_request_26 | 871 | bio. In the next call to drbd_make_request |
| 846 | we sleep in inc_ap_bio() */ | 872 | we sleep in inc_ap_bio() */ |
| 847 | ret = 1; | 873 | ret = 1; |
| 848 | spin_unlock_irq(&mdev->req_lock); | 874 | spin_unlock_irq(&mdev->req_lock); |
| 849 | goto fail_free_complete; | 875 | goto fail_free_complete; |
| 850 | } | 876 | } |
| 851 | 877 | ||
| 852 | if (remote) { | 878 | if (remote || send_oos) { |
| 853 | remote = (mdev->state.pdsk == D_UP_TO_DATE || | 879 | remote = drbd_should_do_remote(mdev->state); |
| 854 | (mdev->state.pdsk == D_INCONSISTENT && | 880 | send_oos = rw == WRITE && drbd_should_send_oos(mdev->state); |
| 855 | mdev->state.conn >= C_CONNECTED)); | 881 | D_ASSERT(!(remote && send_oos)); |
| 856 | if (!remote) | 882 | |
| 883 | if (!(remote || send_oos)) | ||
| 857 | dev_warn(DEV, "lost connection while grabbing the req_lock!\n"); | 884 | dev_warn(DEV, "lost connection while grabbing the req_lock!\n"); |
| 858 | if (!(local || remote)) { | 885 | if (!(local || remote)) { |
| 859 | dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); | 886 | dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); |
| @@ -866,7 +893,7 @@ allocate_barrier: | |||
| 866 | mdev->unused_spare_tle = b; | 893 | mdev->unused_spare_tle = b; |
| 867 | b = NULL; | 894 | b = NULL; |
| 868 | } | 895 | } |
| 869 | if (rw == WRITE && remote && | 896 | if (rw == WRITE && (remote || send_oos) && |
| 870 | mdev->unused_spare_tle == NULL && | 897 | mdev->unused_spare_tle == NULL && |
| 871 | test_bit(CREATE_BARRIER, &mdev->flags)) { | 898 | test_bit(CREATE_BARRIER, &mdev->flags)) { |
| 872 | /* someone closed the current epoch | 899 | /* someone closed the current epoch |
| @@ -889,7 +916,7 @@ allocate_barrier: | |||
| 889 | * barrier packet. To get the write ordering right, we only have to | 916 | * barrier packet. To get the write ordering right, we only have to |
| 890 | * make sure that, if this is a write request and it triggered a | 917 | * make sure that, if this is a write request and it triggered a |
| 891 | * barrier packet, this request is queued within the same spinlock. */ | 918 | * barrier packet, this request is queued within the same spinlock. */ |
| 892 | if (remote && mdev->unused_spare_tle && | 919 | if ((remote || send_oos) && mdev->unused_spare_tle && |
| 893 | test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) { | 920 | test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) { |
| 894 | _tl_add_barrier(mdev, mdev->unused_spare_tle); | 921 | _tl_add_barrier(mdev, mdev->unused_spare_tle); |
| 895 | mdev->unused_spare_tle = NULL; | 922 | mdev->unused_spare_tle = NULL; |
| @@ -937,6 +964,34 @@ allocate_barrier: | |||
| 937 | ? queue_for_net_write | 964 | ? queue_for_net_write |
| 938 | : queue_for_net_read); | 965 | : queue_for_net_read); |
| 939 | } | 966 | } |
| 967 | if (send_oos && drbd_set_out_of_sync(mdev, sector, size)) | ||
| 968 | _req_mod(req, queue_for_send_oos); | ||
| 969 | |||
| 970 | if (remote && | ||
| 971 | mdev->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96) { | ||
| 972 | int congested = 0; | ||
| 973 | |||
| 974 | if (mdev->net_conf->cong_fill && | ||
| 975 | atomic_read(&mdev->ap_in_flight) >= mdev->net_conf->cong_fill) { | ||
| 976 | dev_info(DEV, "Congestion-fill threshold reached\n"); | ||
| 977 | congested = 1; | ||
| 978 | } | ||
| 979 | |||
| 980 | if (mdev->act_log->used >= mdev->net_conf->cong_extents) { | ||
| 981 | dev_info(DEV, "Congestion-extents threshold reached\n"); | ||
| 982 | congested = 1; | ||
| 983 | } | ||
| 984 | |||
| 985 | if (congested) { | ||
| 986 | queue_barrier(mdev); /* last barrier, after mirrored writes */ | ||
| 987 | |||
| 988 | if (mdev->net_conf->on_congestion == OC_PULL_AHEAD) | ||
| 989 | _drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL); | ||
| 990 | else /*mdev->net_conf->on_congestion == OC_DISCONNECT */ | ||
| 991 | _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), 0, NULL); | ||
| 992 | } | ||
| 993 | } | ||
| 994 | |||
| 940 | spin_unlock_irq(&mdev->req_lock); | 995 | spin_unlock_irq(&mdev->req_lock); |
| 941 | kfree(b); /* if someone else has beaten us to it... */ | 996 | kfree(b); /* if someone else has beaten us to it... */ |
| 942 | 997 | ||
| @@ -949,9 +1004,9 @@ allocate_barrier: | |||
| 949 | * stable storage, and this is a WRITE, we may not even submit | 1004 | * stable storage, and this is a WRITE, we may not even submit |
| 950 | * this bio. */ | 1005 | * this bio. */ |
| 951 | if (get_ldev(mdev)) { | 1006 | if (get_ldev(mdev)) { |
| 952 | if (FAULT_ACTIVE(mdev, rw == WRITE ? DRBD_FAULT_DT_WR | 1007 | if (drbd_insert_fault(mdev, rw == WRITE ? DRBD_FAULT_DT_WR |
| 953 | : rw == READ ? DRBD_FAULT_DT_RD | 1008 | : rw == READ ? DRBD_FAULT_DT_RD |
| 954 | : DRBD_FAULT_DT_RA)) | 1009 | : DRBD_FAULT_DT_RA)) |
| 955 | bio_endio(req->private_bio, -EIO); | 1010 | bio_endio(req->private_bio, -EIO); |
| 956 | else | 1011 | else |
| 957 | generic_make_request(req->private_bio); | 1012 | generic_make_request(req->private_bio); |
| @@ -1018,16 +1073,19 @@ static int drbd_fail_request_early(struct drbd_conf *mdev, int is_write) | |||
| 1018 | return 0; | 1073 | return 0; |
| 1019 | } | 1074 | } |
| 1020 | 1075 | ||
| 1021 | int drbd_make_request_26(struct request_queue *q, struct bio *bio) | 1076 | int drbd_make_request(struct request_queue *q, struct bio *bio) |
| 1022 | { | 1077 | { |
| 1023 | unsigned int s_enr, e_enr; | 1078 | unsigned int s_enr, e_enr; |
| 1024 | struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata; | 1079 | struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata; |
| 1080 | unsigned long start_time; | ||
| 1025 | 1081 | ||
| 1026 | if (drbd_fail_request_early(mdev, bio_data_dir(bio) & WRITE)) { | 1082 | if (drbd_fail_request_early(mdev, bio_data_dir(bio) & WRITE)) { |
| 1027 | bio_endio(bio, -EPERM); | 1083 | bio_endio(bio, -EPERM); |
| 1028 | return 0; | 1084 | return 0; |
| 1029 | } | 1085 | } |
| 1030 | 1086 | ||
| 1087 | start_time = jiffies; | ||
| 1088 | |||
| 1031 | /* | 1089 | /* |
| 1032 | * what we "blindly" assume: | 1090 | * what we "blindly" assume: |
| 1033 | */ | 1091 | */ |
| @@ -1042,12 +1100,12 @@ int drbd_make_request_26(struct request_queue *q, struct bio *bio) | |||
| 1042 | 1100 | ||
| 1043 | if (likely(s_enr == e_enr)) { | 1101 | if (likely(s_enr == e_enr)) { |
| 1044 | inc_ap_bio(mdev, 1); | 1102 | inc_ap_bio(mdev, 1); |
| 1045 | return drbd_make_request_common(mdev, bio); | 1103 | return drbd_make_request_common(mdev, bio, start_time); |
| 1046 | } | 1104 | } |
| 1047 | 1105 | ||
| 1048 | /* can this bio be split generically? | 1106 | /* can this bio be split generically? |
| 1049 | * Maybe add our own split-arbitrary-bios function. */ | 1107 | * Maybe add our own split-arbitrary-bios function. */ |
| 1050 | if (bio->bi_vcnt != 1 || bio->bi_idx != 0 || bio->bi_size > DRBD_MAX_SEGMENT_SIZE) { | 1108 | if (bio->bi_vcnt != 1 || bio->bi_idx != 0 || bio->bi_size > DRBD_MAX_BIO_SIZE) { |
| 1051 | /* rather error out here than BUG in bio_split */ | 1109 | /* rather error out here than BUG in bio_split */ |
| 1052 | dev_err(DEV, "bio would need to, but cannot, be split: " | 1110 | dev_err(DEV, "bio would need to, but cannot, be split: " |
| 1053 | "(vcnt=%u,idx=%u,size=%u,sector=%llu)\n", | 1111 | "(vcnt=%u,idx=%u,size=%u,sector=%llu)\n", |
| @@ -1069,11 +1127,7 @@ int drbd_make_request_26(struct request_queue *q, struct bio *bio) | |||
| 1069 | const int sps = 1 << HT_SHIFT; /* sectors per slot */ | 1127 | const int sps = 1 << HT_SHIFT; /* sectors per slot */ |
| 1070 | const int mask = sps - 1; | 1128 | const int mask = sps - 1; |
| 1071 | const sector_t first_sectors = sps - (sect & mask); | 1129 | const sector_t first_sectors = sps - (sect & mask); |
| 1072 | bp = bio_split(bio, | 1130 | bp = bio_split(bio, first_sectors); |
| 1073 | #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) | ||
| 1074 | bio_split_pool, | ||
| 1075 | #endif | ||
| 1076 | first_sectors); | ||
| 1077 | 1131 | ||
| 1078 | /* we need to get a "reference count" (ap_bio_cnt) | 1132 | /* we need to get a "reference count" (ap_bio_cnt) |
| 1079 | * to avoid races with the disconnect/reconnect/suspend code. | 1133 | * to avoid races with the disconnect/reconnect/suspend code. |
| @@ -1084,10 +1138,10 @@ int drbd_make_request_26(struct request_queue *q, struct bio *bio) | |||
| 1084 | 1138 | ||
| 1085 | D_ASSERT(e_enr == s_enr + 1); | 1139 | D_ASSERT(e_enr == s_enr + 1); |
| 1086 | 1140 | ||
| 1087 | while (drbd_make_request_common(mdev, &bp->bio1)) | 1141 | while (drbd_make_request_common(mdev, &bp->bio1, start_time)) |
| 1088 | inc_ap_bio(mdev, 1); | 1142 | inc_ap_bio(mdev, 1); |
| 1089 | 1143 | ||
| 1090 | while (drbd_make_request_common(mdev, &bp->bio2)) | 1144 | while (drbd_make_request_common(mdev, &bp->bio2, start_time)) |
| 1091 | inc_ap_bio(mdev, 1); | 1145 | inc_ap_bio(mdev, 1); |
| 1092 | 1146 | ||
| 1093 | dec_ap_bio(mdev); | 1147 | dec_ap_bio(mdev); |
| @@ -1098,7 +1152,7 @@ int drbd_make_request_26(struct request_queue *q, struct bio *bio) | |||
| 1098 | } | 1152 | } |
| 1099 | 1153 | ||
| 1100 | /* This is called by bio_add_page(). With this function we reduce | 1154 | /* This is called by bio_add_page(). With this function we reduce |
| 1101 | * the number of BIOs that span over multiple DRBD_MAX_SEGMENT_SIZEs | 1155 | * the number of BIOs that span over multiple DRBD_MAX_BIO_SIZEs |
| 1102 | * units (was AL_EXTENTs). | 1156 | * units (was AL_EXTENTs). |
| 1103 | * | 1157 | * |
| 1104 | * we do the calculation within the lower 32bit of the byte offsets, | 1158 | * we do the calculation within the lower 32bit of the byte offsets, |
| @@ -1108,7 +1162,7 @@ int drbd_make_request_26(struct request_queue *q, struct bio *bio) | |||
| 1108 | * As long as the BIO is empty we have to allow at least one bvec, | 1162 | * As long as the BIO is empty we have to allow at least one bvec, |
| 1109 | * regardless of size and offset. so the resulting bio may still | 1163 | * regardless of size and offset. so the resulting bio may still |
| 1110 | * cross extent boundaries. those are dealt with (bio_split) in | 1164 | * cross extent boundaries. those are dealt with (bio_split) in |
| 1111 | * drbd_make_request_26. | 1165 | * drbd_make_request. |
| 1112 | */ | 1166 | */ |
| 1113 | int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec) | 1167 | int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec) |
| 1114 | { | 1168 | { |
| @@ -1118,8 +1172,8 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct | |||
| 1118 | unsigned int bio_size = bvm->bi_size; | 1172 | unsigned int bio_size = bvm->bi_size; |
| 1119 | int limit, backing_limit; | 1173 | int limit, backing_limit; |
| 1120 | 1174 | ||
| 1121 | limit = DRBD_MAX_SEGMENT_SIZE | 1175 | limit = DRBD_MAX_BIO_SIZE |
| 1122 | - ((bio_offset & (DRBD_MAX_SEGMENT_SIZE-1)) + bio_size); | 1176 | - ((bio_offset & (DRBD_MAX_BIO_SIZE-1)) + bio_size); |
| 1123 | if (limit < 0) | 1177 | if (limit < 0) |
| 1124 | limit = 0; | 1178 | limit = 0; |
| 1125 | if (bio_size == 0) { | 1179 | if (bio_size == 0) { |
| @@ -1136,3 +1190,42 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct | |||
| 1136 | } | 1190 | } |
| 1137 | return limit; | 1191 | return limit; |
| 1138 | } | 1192 | } |
| 1193 | |||
| 1194 | void request_timer_fn(unsigned long data) | ||
| 1195 | { | ||
| 1196 | struct drbd_conf *mdev = (struct drbd_conf *) data; | ||
| 1197 | struct drbd_request *req; /* oldest request */ | ||
| 1198 | struct list_head *le; | ||
| 1199 | unsigned long et = 0; /* effective timeout = ko_count * timeout */ | ||
| 1200 | |||
| 1201 | if (get_net_conf(mdev)) { | ||
| 1202 | et = mdev->net_conf->timeout*HZ/10 * mdev->net_conf->ko_count; | ||
| 1203 | put_net_conf(mdev); | ||
| 1204 | } | ||
| 1205 | if (!et || mdev->state.conn < C_WF_REPORT_PARAMS) | ||
| 1206 | return; /* Recurring timer stopped */ | ||
| 1207 | |||
| 1208 | spin_lock_irq(&mdev->req_lock); | ||
| 1209 | le = &mdev->oldest_tle->requests; | ||
| 1210 | if (list_empty(le)) { | ||
| 1211 | spin_unlock_irq(&mdev->req_lock); | ||
| 1212 | mod_timer(&mdev->request_timer, jiffies + et); | ||
| 1213 | return; | ||
| 1214 | } | ||
| 1215 | |||
| 1216 | le = le->prev; | ||
| 1217 | req = list_entry(le, struct drbd_request, tl_requests); | ||
| 1218 | if (time_is_before_eq_jiffies(req->start_time + et)) { | ||
| 1219 | if (req->rq_state & RQ_NET_PENDING) { | ||
| 1220 | dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n"); | ||
| 1221 | _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE, NULL); | ||
| 1222 | } else { | ||
| 1223 | dev_warn(DEV, "Local backing block device frozen?\n"); | ||
| 1224 | mod_timer(&mdev->request_timer, jiffies + et); | ||
| 1225 | } | ||
| 1226 | } else { | ||
| 1227 | mod_timer(&mdev->request_timer, req->start_time + et); | ||
| 1228 | } | ||
| 1229 | |||
| 1230 | spin_unlock_irq(&mdev->req_lock); | ||
| 1231 | } | ||
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index ab2bd09d54b4..32e2c3e6a813 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h | |||
| @@ -82,14 +82,16 @@ enum drbd_req_event { | |||
| 82 | to_be_submitted, | 82 | to_be_submitted, |
| 83 | 83 | ||
| 84 | /* XXX yes, now I am inconsistent... | 84 | /* XXX yes, now I am inconsistent... |
| 85 | * these two are not "events" but "actions" | 85 | * these are not "events" but "actions" |
| 86 | * oh, well... */ | 86 | * oh, well... */ |
| 87 | queue_for_net_write, | 87 | queue_for_net_write, |
| 88 | queue_for_net_read, | 88 | queue_for_net_read, |
| 89 | queue_for_send_oos, | ||
| 89 | 90 | ||
| 90 | send_canceled, | 91 | send_canceled, |
| 91 | send_failed, | 92 | send_failed, |
| 92 | handed_over_to_network, | 93 | handed_over_to_network, |
| 94 | oos_handed_to_network, | ||
| 93 | connection_lost_while_pending, | 95 | connection_lost_while_pending, |
| 94 | read_retry_remote_canceled, | 96 | read_retry_remote_canceled, |
| 95 | recv_acked_by_peer, | 97 | recv_acked_by_peer, |
| @@ -289,7 +291,6 @@ static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev, | |||
| 289 | req->epoch = 0; | 291 | req->epoch = 0; |
| 290 | req->sector = bio_src->bi_sector; | 292 | req->sector = bio_src->bi_sector; |
| 291 | req->size = bio_src->bi_size; | 293 | req->size = bio_src->bi_size; |
| 292 | req->start_time = jiffies; | ||
| 293 | INIT_HLIST_NODE(&req->colision); | 294 | INIT_HLIST_NODE(&req->colision); |
| 294 | INIT_LIST_HEAD(&req->tl_requests); | 295 | INIT_LIST_HEAD(&req->tl_requests); |
| 295 | INIT_LIST_HEAD(&req->w.list); | 296 | INIT_LIST_HEAD(&req->w.list); |
| @@ -321,6 +322,7 @@ extern int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
| 321 | struct bio_and_error *m); | 322 | struct bio_and_error *m); |
| 322 | extern void complete_master_bio(struct drbd_conf *mdev, | 323 | extern void complete_master_bio(struct drbd_conf *mdev, |
| 323 | struct bio_and_error *m); | 324 | struct bio_and_error *m); |
| 325 | extern void request_timer_fn(unsigned long data); | ||
| 324 | 326 | ||
| 325 | /* use this if you don't want to deal with calling complete_master_bio() | 327 | /* use this if you don't want to deal with calling complete_master_bio() |
| 326 | * outside the spinlock, e.g. when walking some list on cleanup. */ | 328 | * outside the spinlock, e.g. when walking some list on cleanup. */ |
| @@ -338,23 +340,43 @@ static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what) | |||
| 338 | return rv; | 340 | return rv; |
| 339 | } | 341 | } |
| 340 | 342 | ||
| 341 | /* completion of master bio is outside of spinlock. | 343 | /* completion of master bio is outside of our spinlock. |
| 342 | * If you need it irqsave, do it your self! | 344 | * We still may or may not be inside some irqs disabled section |
| 343 | * Which means: don't use from bio endio callback. */ | 345 | * of the lower level driver completion callback, so we need to |
| 346 | * spin_lock_irqsave here. */ | ||
| 344 | static inline int req_mod(struct drbd_request *req, | 347 | static inline int req_mod(struct drbd_request *req, |
| 345 | enum drbd_req_event what) | 348 | enum drbd_req_event what) |
| 346 | { | 349 | { |
| 350 | unsigned long flags; | ||
| 347 | struct drbd_conf *mdev = req->mdev; | 351 | struct drbd_conf *mdev = req->mdev; |
| 348 | struct bio_and_error m; | 352 | struct bio_and_error m; |
| 349 | int rv; | 353 | int rv; |
| 350 | 354 | ||
| 351 | spin_lock_irq(&mdev->req_lock); | 355 | spin_lock_irqsave(&mdev->req_lock, flags); |
| 352 | rv = __req_mod(req, what, &m); | 356 | rv = __req_mod(req, what, &m); |
| 353 | spin_unlock_irq(&mdev->req_lock); | 357 | spin_unlock_irqrestore(&mdev->req_lock, flags); |
| 354 | 358 | ||
| 355 | if (m.bio) | 359 | if (m.bio) |
| 356 | complete_master_bio(mdev, &m); | 360 | complete_master_bio(mdev, &m); |
| 357 | 361 | ||
| 358 | return rv; | 362 | return rv; |
| 359 | } | 363 | } |
| 364 | |||
| 365 | static inline bool drbd_should_do_remote(union drbd_state s) | ||
| 366 | { | ||
| 367 | return s.pdsk == D_UP_TO_DATE || | ||
| 368 | (s.pdsk >= D_INCONSISTENT && | ||
| 369 | s.conn >= C_WF_BITMAP_T && | ||
| 370 | s.conn < C_AHEAD); | ||
| 371 | /* Before proto 96 that was >= CONNECTED instead of >= C_WF_BITMAP_T. | ||
| 372 | That is equivalent since before 96 IO was frozen in the C_WF_BITMAP* | ||
| 373 | states. */ | ||
| 374 | } | ||
| 375 | static inline bool drbd_should_send_oos(union drbd_state s) | ||
| 376 | { | ||
| 377 | return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S; | ||
| 378 | /* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary | ||
| 379 | since we enter state C_AHEAD only if proto >= 96 */ | ||
| 380 | } | ||
| 381 | |||
| 360 | #endif | 382 | #endif |
diff --git a/drivers/block/drbd/drbd_strings.c b/drivers/block/drbd/drbd_strings.c index 85179e1fb50a..c44a2a602772 100644 --- a/drivers/block/drbd/drbd_strings.c +++ b/drivers/block/drbd/drbd_strings.c | |||
| @@ -48,6 +48,8 @@ static const char *drbd_conn_s_names[] = { | |||
| 48 | [C_PAUSED_SYNC_T] = "PausedSyncT", | 48 | [C_PAUSED_SYNC_T] = "PausedSyncT", |
| 49 | [C_VERIFY_S] = "VerifyS", | 49 | [C_VERIFY_S] = "VerifyS", |
| 50 | [C_VERIFY_T] = "VerifyT", | 50 | [C_VERIFY_T] = "VerifyT", |
| 51 | [C_AHEAD] = "Ahead", | ||
| 52 | [C_BEHIND] = "Behind", | ||
| 51 | }; | 53 | }; |
| 52 | 54 | ||
| 53 | static const char *drbd_role_s_names[] = { | 55 | static const char *drbd_role_s_names[] = { |
| @@ -92,7 +94,7 @@ static const char *drbd_state_sw_errors[] = { | |||
| 92 | const char *drbd_conn_str(enum drbd_conns s) | 94 | const char *drbd_conn_str(enum drbd_conns s) |
| 93 | { | 95 | { |
| 94 | /* enums are unsigned... */ | 96 | /* enums are unsigned... */ |
| 95 | return s > C_PAUSED_SYNC_T ? "TOO_LARGE" : drbd_conn_s_names[s]; | 97 | return s > C_BEHIND ? "TOO_LARGE" : drbd_conn_s_names[s]; |
| 96 | } | 98 | } |
| 97 | 99 | ||
| 98 | const char *drbd_role_str(enum drbd_role s) | 100 | const char *drbd_role_str(enum drbd_role s) |
| @@ -105,7 +107,7 @@ const char *drbd_disk_str(enum drbd_disk_state s) | |||
| 105 | return s > D_UP_TO_DATE ? "TOO_LARGE" : drbd_disk_s_names[s]; | 107 | return s > D_UP_TO_DATE ? "TOO_LARGE" : drbd_disk_s_names[s]; |
| 106 | } | 108 | } |
| 107 | 109 | ||
| 108 | const char *drbd_set_st_err_str(enum drbd_state_ret_codes err) | 110 | const char *drbd_set_st_err_str(enum drbd_state_rv err) |
| 109 | { | 111 | { |
| 110 | return err <= SS_AFTER_LAST_ERROR ? "TOO_SMALL" : | 112 | return err <= SS_AFTER_LAST_ERROR ? "TOO_SMALL" : |
| 111 | err > SS_TWO_PRIMARIES ? "TOO_LARGE" | 113 | err > SS_TWO_PRIMARIES ? "TOO_LARGE" |
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index e027446590d3..f7e6c92f8d03 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c | |||
| @@ -39,18 +39,17 @@ | |||
| 39 | #include "drbd_req.h" | 39 | #include "drbd_req.h" |
| 40 | 40 | ||
| 41 | static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel); | 41 | static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel); |
| 42 | static int w_make_resync_request(struct drbd_conf *mdev, | ||
| 43 | struct drbd_work *w, int cancel); | ||
| 42 | 44 | ||
| 43 | 45 | ||
| 44 | 46 | ||
| 45 | /* defined here: | 47 | /* endio handlers: |
| 46 | drbd_md_io_complete | 48 | * drbd_md_io_complete (defined here) |
| 47 | drbd_endio_sec | 49 | * drbd_endio_pri (defined here) |
| 48 | drbd_endio_pri | 50 | * drbd_endio_sec (defined here) |
| 49 | 51 | * bm_async_io_complete (defined in drbd_bitmap.c) | |
| 50 | * more endio handlers: | 52 | * |
| 51 | atodb_endio in drbd_actlog.c | ||
| 52 | drbd_bm_async_io_complete in drbd_bitmap.c | ||
| 53 | |||
| 54 | * For all these callbacks, note the following: | 53 | * For all these callbacks, note the following: |
| 55 | * The callbacks will be called in irq context by the IDE drivers, | 54 | * The callbacks will be called in irq context by the IDE drivers, |
| 56 | * and in Softirqs/Tasklets/BH context by the SCSI drivers. | 55 | * and in Softirqs/Tasklets/BH context by the SCSI drivers. |
| @@ -94,7 +93,7 @@ void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local) | |||
| 94 | if (list_empty(&mdev->read_ee)) | 93 | if (list_empty(&mdev->read_ee)) |
| 95 | wake_up(&mdev->ee_wait); | 94 | wake_up(&mdev->ee_wait); |
| 96 | if (test_bit(__EE_WAS_ERROR, &e->flags)) | 95 | if (test_bit(__EE_WAS_ERROR, &e->flags)) |
| 97 | __drbd_chk_io_error(mdev, FALSE); | 96 | __drbd_chk_io_error(mdev, false); |
| 98 | spin_unlock_irqrestore(&mdev->req_lock, flags); | 97 | spin_unlock_irqrestore(&mdev->req_lock, flags); |
| 99 | 98 | ||
| 100 | drbd_queue_work(&mdev->data.work, &e->w); | 99 | drbd_queue_work(&mdev->data.work, &e->w); |
| @@ -137,7 +136,7 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo | |||
| 137 | : list_empty(&mdev->active_ee); | 136 | : list_empty(&mdev->active_ee); |
| 138 | 137 | ||
| 139 | if (test_bit(__EE_WAS_ERROR, &e->flags)) | 138 | if (test_bit(__EE_WAS_ERROR, &e->flags)) |
| 140 | __drbd_chk_io_error(mdev, FALSE); | 139 | __drbd_chk_io_error(mdev, false); |
| 141 | spin_unlock_irqrestore(&mdev->req_lock, flags); | 140 | spin_unlock_irqrestore(&mdev->req_lock, flags); |
| 142 | 141 | ||
| 143 | if (is_syncer_req) | 142 | if (is_syncer_req) |
| @@ -163,14 +162,15 @@ void drbd_endio_sec(struct bio *bio, int error) | |||
| 163 | int uptodate = bio_flagged(bio, BIO_UPTODATE); | 162 | int uptodate = bio_flagged(bio, BIO_UPTODATE); |
| 164 | int is_write = bio_data_dir(bio) == WRITE; | 163 | int is_write = bio_data_dir(bio) == WRITE; |
| 165 | 164 | ||
| 166 | if (error) | 165 | if (error && __ratelimit(&drbd_ratelimit_state)) |
| 167 | dev_warn(DEV, "%s: error=%d s=%llus\n", | 166 | dev_warn(DEV, "%s: error=%d s=%llus\n", |
| 168 | is_write ? "write" : "read", error, | 167 | is_write ? "write" : "read", error, |
| 169 | (unsigned long long)e->sector); | 168 | (unsigned long long)e->sector); |
| 170 | if (!error && !uptodate) { | 169 | if (!error && !uptodate) { |
| 171 | dev_warn(DEV, "%s: setting error to -EIO s=%llus\n", | 170 | if (__ratelimit(&drbd_ratelimit_state)) |
| 172 | is_write ? "write" : "read", | 171 | dev_warn(DEV, "%s: setting error to -EIO s=%llus\n", |
| 173 | (unsigned long long)e->sector); | 172 | is_write ? "write" : "read", |
| 173 | (unsigned long long)e->sector); | ||
| 174 | /* strange behavior of some lower level drivers... | 174 | /* strange behavior of some lower level drivers... |
| 175 | * fail the request by clearing the uptodate flag, | 175 | * fail the request by clearing the uptodate flag, |
| 176 | * but do not return any error?! */ | 176 | * but do not return any error?! */ |
| @@ -250,13 +250,6 @@ int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | |||
| 250 | return w_send_read_req(mdev, w, 0); | 250 | return w_send_read_req(mdev, w, 0); |
| 251 | } | 251 | } |
| 252 | 252 | ||
| 253 | int w_resync_inactive(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | ||
| 254 | { | ||
| 255 | ERR_IF(cancel) return 1; | ||
| 256 | dev_err(DEV, "resync inactive, but callback triggered??\n"); | ||
| 257 | return 1; /* Simply ignore this! */ | ||
| 258 | } | ||
| 259 | |||
| 260 | void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm, struct drbd_epoch_entry *e, void *digest) | 253 | void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm, struct drbd_epoch_entry *e, void *digest) |
| 261 | { | 254 | { |
| 262 | struct hash_desc desc; | 255 | struct hash_desc desc; |
| @@ -355,7 +348,7 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) | |||
| 355 | if (!get_ldev(mdev)) | 348 | if (!get_ldev(mdev)) |
| 356 | return -EIO; | 349 | return -EIO; |
| 357 | 350 | ||
| 358 | if (drbd_rs_should_slow_down(mdev)) | 351 | if (drbd_rs_should_slow_down(mdev, sector)) |
| 359 | goto defer; | 352 | goto defer; |
| 360 | 353 | ||
| 361 | /* GFP_TRY, because if there is no memory available right now, this may | 354 | /* GFP_TRY, because if there is no memory available right now, this may |
| @@ -373,9 +366,10 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) | |||
| 373 | if (drbd_submit_ee(mdev, e, READ, DRBD_FAULT_RS_RD) == 0) | 366 | if (drbd_submit_ee(mdev, e, READ, DRBD_FAULT_RS_RD) == 0) |
| 374 | return 0; | 367 | return 0; |
| 375 | 368 | ||
| 376 | /* drbd_submit_ee currently fails for one reason only: | 369 | /* If it failed because of ENOMEM, retry should help. If it failed |
| 377 | * not being able to allocate enough bios. | 370 | * because bio_add_page failed (probably broken lower level driver), |
| 378 | * Is dropping the connection going to help? */ | 371 | * retry may or may not help. |
| 372 | * If it does not, you may need to force disconnect. */ | ||
| 379 | spin_lock_irq(&mdev->req_lock); | 373 | spin_lock_irq(&mdev->req_lock); |
| 380 | list_del(&e->w.list); | 374 | list_del(&e->w.list); |
| 381 | spin_unlock_irq(&mdev->req_lock); | 375 | spin_unlock_irq(&mdev->req_lock); |
| @@ -386,26 +380,25 @@ defer: | |||
| 386 | return -EAGAIN; | 380 | return -EAGAIN; |
| 387 | } | 381 | } |
| 388 | 382 | ||
| 389 | void resync_timer_fn(unsigned long data) | 383 | int w_resync_timer(struct drbd_conf *mdev, struct drbd_work *w, int cancel) |
| 390 | { | 384 | { |
| 391 | struct drbd_conf *mdev = (struct drbd_conf *) data; | ||
| 392 | int queue; | ||
| 393 | |||
| 394 | queue = 1; | ||
| 395 | switch (mdev->state.conn) { | 385 | switch (mdev->state.conn) { |
| 396 | case C_VERIFY_S: | 386 | case C_VERIFY_S: |
| 397 | mdev->resync_work.cb = w_make_ov_request; | 387 | w_make_ov_request(mdev, w, cancel); |
| 398 | break; | 388 | break; |
| 399 | case C_SYNC_TARGET: | 389 | case C_SYNC_TARGET: |
| 400 | mdev->resync_work.cb = w_make_resync_request; | 390 | w_make_resync_request(mdev, w, cancel); |
| 401 | break; | 391 | break; |
| 402 | default: | ||
| 403 | queue = 0; | ||
| 404 | mdev->resync_work.cb = w_resync_inactive; | ||
| 405 | } | 392 | } |
| 406 | 393 | ||
| 407 | /* harmless race: list_empty outside data.work.q_lock */ | 394 | return 1; |
| 408 | if (list_empty(&mdev->resync_work.list) && queue) | 395 | } |
| 396 | |||
| 397 | void resync_timer_fn(unsigned long data) | ||
| 398 | { | ||
| 399 | struct drbd_conf *mdev = (struct drbd_conf *) data; | ||
| 400 | |||
| 401 | if (list_empty(&mdev->resync_work.list)) | ||
| 409 | drbd_queue_work(&mdev->data.work, &mdev->resync_work); | 402 | drbd_queue_work(&mdev->data.work, &mdev->resync_work); |
| 410 | } | 403 | } |
| 411 | 404 | ||
| @@ -438,7 +431,7 @@ static void fifo_add_val(struct fifo_buffer *fb, int value) | |||
| 438 | fb->values[i] += value; | 431 | fb->values[i] += value; |
| 439 | } | 432 | } |
| 440 | 433 | ||
| 441 | int drbd_rs_controller(struct drbd_conf *mdev) | 434 | static int drbd_rs_controller(struct drbd_conf *mdev) |
| 442 | { | 435 | { |
| 443 | unsigned int sect_in; /* Number of sectors that came in since the last turn */ | 436 | unsigned int sect_in; /* Number of sectors that came in since the last turn */ |
| 444 | unsigned int want; /* The number of sectors we want in the proxy */ | 437 | unsigned int want; /* The number of sectors we want in the proxy */ |
| @@ -492,29 +485,36 @@ int drbd_rs_controller(struct drbd_conf *mdev) | |||
| 492 | return req_sect; | 485 | return req_sect; |
| 493 | } | 486 | } |
| 494 | 487 | ||
| 495 | int w_make_resync_request(struct drbd_conf *mdev, | 488 | static int drbd_rs_number_requests(struct drbd_conf *mdev) |
| 496 | struct drbd_work *w, int cancel) | 489 | { |
| 490 | int number; | ||
| 491 | if (mdev->rs_plan_s.size) { /* mdev->sync_conf.c_plan_ahead */ | ||
| 492 | number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9); | ||
| 493 | mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME; | ||
| 494 | } else { | ||
| 495 | mdev->c_sync_rate = mdev->sync_conf.rate; | ||
| 496 | number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ); | ||
| 497 | } | ||
| 498 | |||
| 499 | /* ignore the amount of pending requests, the resync controller should | ||
| 500 | * throttle down to incoming reply rate soon enough anyways. */ | ||
| 501 | return number; | ||
| 502 | } | ||
| 503 | |||
| 504 | static int w_make_resync_request(struct drbd_conf *mdev, | ||
| 505 | struct drbd_work *w, int cancel) | ||
| 497 | { | 506 | { |
| 498 | unsigned long bit; | 507 | unsigned long bit; |
| 499 | sector_t sector; | 508 | sector_t sector; |
| 500 | const sector_t capacity = drbd_get_capacity(mdev->this_bdev); | 509 | const sector_t capacity = drbd_get_capacity(mdev->this_bdev); |
| 501 | int max_segment_size; | 510 | int max_bio_size; |
| 502 | int number, rollback_i, size, pe, mx; | 511 | int number, rollback_i, size; |
| 503 | int align, queued, sndbuf; | 512 | int align, queued, sndbuf; |
| 504 | int i = 0; | 513 | int i = 0; |
| 505 | 514 | ||
| 506 | if (unlikely(cancel)) | 515 | if (unlikely(cancel)) |
| 507 | return 1; | 516 | return 1; |
| 508 | 517 | ||
| 509 | if (unlikely(mdev->state.conn < C_CONNECTED)) { | ||
| 510 | dev_err(DEV, "Confused in w_make_resync_request()! cstate < Connected"); | ||
| 511 | return 0; | ||
| 512 | } | ||
| 513 | |||
| 514 | if (mdev->state.conn != C_SYNC_TARGET) | ||
| 515 | dev_err(DEV, "%s in w_make_resync_request\n", | ||
| 516 | drbd_conn_str(mdev->state.conn)); | ||
| 517 | |||
| 518 | if (mdev->rs_total == 0) { | 518 | if (mdev->rs_total == 0) { |
| 519 | /* empty resync? */ | 519 | /* empty resync? */ |
| 520 | drbd_resync_finished(mdev); | 520 | drbd_resync_finished(mdev); |
| @@ -527,49 +527,19 @@ int w_make_resync_request(struct drbd_conf *mdev, | |||
| 527 | to continue resync with a broken disk makes no sense at | 527 | to continue resync with a broken disk makes no sense at |
| 528 | all */ | 528 | all */ |
| 529 | dev_err(DEV, "Disk broke down during resync!\n"); | 529 | dev_err(DEV, "Disk broke down during resync!\n"); |
| 530 | mdev->resync_work.cb = w_resync_inactive; | ||
| 531 | return 1; | 530 | return 1; |
| 532 | } | 531 | } |
| 533 | 532 | ||
| 534 | /* starting with drbd 8.3.8, we can handle multi-bio EEs, | 533 | /* starting with drbd 8.3.8, we can handle multi-bio EEs, |
| 535 | * if it should be necessary */ | 534 | * if it should be necessary */ |
| 536 | max_segment_size = | 535 | max_bio_size = |
| 537 | mdev->agreed_pro_version < 94 ? queue_max_segment_size(mdev->rq_queue) : | 536 | mdev->agreed_pro_version < 94 ? queue_max_hw_sectors(mdev->rq_queue) << 9 : |
| 538 | mdev->agreed_pro_version < 95 ? DRBD_MAX_SIZE_H80_PACKET : DRBD_MAX_SEGMENT_SIZE; | 537 | mdev->agreed_pro_version < 95 ? DRBD_MAX_SIZE_H80_PACKET : DRBD_MAX_BIO_SIZE; |
| 539 | 538 | ||
| 540 | if (mdev->rs_plan_s.size) { /* mdev->sync_conf.c_plan_ahead */ | 539 | number = drbd_rs_number_requests(mdev); |
| 541 | number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9); | 540 | if (number == 0) |
| 542 | mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME; | ||
| 543 | } else { | ||
| 544 | mdev->c_sync_rate = mdev->sync_conf.rate; | ||
| 545 | number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ); | ||
| 546 | } | ||
| 547 | |||
| 548 | /* Throttle resync on lower level disk activity, which may also be | ||
| 549 | * caused by application IO on Primary/SyncTarget. | ||
| 550 | * Keep this after the call to drbd_rs_controller, as that assumes | ||
| 551 | * to be called as precisely as possible every SLEEP_TIME, | ||
| 552 | * and would be confused otherwise. */ | ||
| 553 | if (drbd_rs_should_slow_down(mdev)) | ||
| 554 | goto requeue; | 541 | goto requeue; |
| 555 | 542 | ||
| 556 | mutex_lock(&mdev->data.mutex); | ||
| 557 | if (mdev->data.socket) | ||
| 558 | mx = mdev->data.socket->sk->sk_rcvbuf / sizeof(struct p_block_req); | ||
| 559 | else | ||
| 560 | mx = 1; | ||
| 561 | mutex_unlock(&mdev->data.mutex); | ||
| 562 | |||
| 563 | /* For resync rates >160MB/sec, allow more pending RS requests */ | ||
| 564 | if (number > mx) | ||
| 565 | mx = number; | ||
| 566 | |||
| 567 | /* Limit the number of pending RS requests to no more than the peer's receive buffer */ | ||
| 568 | pe = atomic_read(&mdev->rs_pending_cnt); | ||
| 569 | if ((pe + number) > mx) { | ||
| 570 | number = mx - pe; | ||
| 571 | } | ||
| 572 | |||
| 573 | for (i = 0; i < number; i++) { | 543 | for (i = 0; i < number; i++) { |
| 574 | /* Stop generating RS requests, when half of the send buffer is filled */ | 544 | /* Stop generating RS requests, when half of the send buffer is filled */ |
| 575 | mutex_lock(&mdev->data.mutex); | 545 | mutex_lock(&mdev->data.mutex); |
| @@ -588,16 +558,16 @@ next_sector: | |||
| 588 | size = BM_BLOCK_SIZE; | 558 | size = BM_BLOCK_SIZE; |
| 589 | bit = drbd_bm_find_next(mdev, mdev->bm_resync_fo); | 559 | bit = drbd_bm_find_next(mdev, mdev->bm_resync_fo); |
| 590 | 560 | ||
| 591 | if (bit == -1UL) { | 561 | if (bit == DRBD_END_OF_BITMAP) { |
| 592 | mdev->bm_resync_fo = drbd_bm_bits(mdev); | 562 | mdev->bm_resync_fo = drbd_bm_bits(mdev); |
| 593 | mdev->resync_work.cb = w_resync_inactive; | ||
| 594 | put_ldev(mdev); | 563 | put_ldev(mdev); |
| 595 | return 1; | 564 | return 1; |
| 596 | } | 565 | } |
| 597 | 566 | ||
| 598 | sector = BM_BIT_TO_SECT(bit); | 567 | sector = BM_BIT_TO_SECT(bit); |
| 599 | 568 | ||
| 600 | if (drbd_try_rs_begin_io(mdev, sector)) { | 569 | if (drbd_rs_should_slow_down(mdev, sector) || |
| 570 | drbd_try_rs_begin_io(mdev, sector)) { | ||
| 601 | mdev->bm_resync_fo = bit; | 571 | mdev->bm_resync_fo = bit; |
| 602 | goto requeue; | 572 | goto requeue; |
| 603 | } | 573 | } |
| @@ -608,7 +578,7 @@ next_sector: | |||
| 608 | goto next_sector; | 578 | goto next_sector; |
| 609 | } | 579 | } |
| 610 | 580 | ||
| 611 | #if DRBD_MAX_SEGMENT_SIZE > BM_BLOCK_SIZE | 581 | #if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE |
| 612 | /* try to find some adjacent bits. | 582 | /* try to find some adjacent bits. |
| 613 | * we stop if we have already the maximum req size. | 583 | * we stop if we have already the maximum req size. |
| 614 | * | 584 | * |
| @@ -618,7 +588,7 @@ next_sector: | |||
| 618 | align = 1; | 588 | align = 1; |
| 619 | rollback_i = i; | 589 | rollback_i = i; |
| 620 | for (;;) { | 590 | for (;;) { |
| 621 | if (size + BM_BLOCK_SIZE > max_segment_size) | 591 | if (size + BM_BLOCK_SIZE > max_bio_size) |
| 622 | break; | 592 | break; |
| 623 | 593 | ||
| 624 | /* Be always aligned */ | 594 | /* Be always aligned */ |
| @@ -685,7 +655,6 @@ next_sector: | |||
| 685 | * resync data block, and the last bit is cleared. | 655 | * resync data block, and the last bit is cleared. |
| 686 | * until then resync "work" is "inactive" ... | 656 | * until then resync "work" is "inactive" ... |
| 687 | */ | 657 | */ |
| 688 | mdev->resync_work.cb = w_resync_inactive; | ||
| 689 | put_ldev(mdev); | 658 | put_ldev(mdev); |
| 690 | return 1; | 659 | return 1; |
| 691 | } | 660 | } |
| @@ -706,27 +675,18 @@ static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int ca | |||
| 706 | if (unlikely(cancel)) | 675 | if (unlikely(cancel)) |
| 707 | return 1; | 676 | return 1; |
| 708 | 677 | ||
| 709 | if (unlikely(mdev->state.conn < C_CONNECTED)) { | 678 | number = drbd_rs_number_requests(mdev); |
| 710 | dev_err(DEV, "Confused in w_make_ov_request()! cstate < Connected"); | ||
| 711 | return 0; | ||
| 712 | } | ||
| 713 | |||
| 714 | number = SLEEP_TIME*mdev->sync_conf.rate / ((BM_BLOCK_SIZE/1024)*HZ); | ||
| 715 | if (atomic_read(&mdev->rs_pending_cnt) > number) | ||
| 716 | goto requeue; | ||
| 717 | |||
| 718 | number -= atomic_read(&mdev->rs_pending_cnt); | ||
| 719 | 679 | ||
| 720 | sector = mdev->ov_position; | 680 | sector = mdev->ov_position; |
| 721 | for (i = 0; i < number; i++) { | 681 | for (i = 0; i < number; i++) { |
| 722 | if (sector >= capacity) { | 682 | if (sector >= capacity) { |
| 723 | mdev->resync_work.cb = w_resync_inactive; | ||
| 724 | return 1; | 683 | return 1; |
| 725 | } | 684 | } |
| 726 | 685 | ||
| 727 | size = BM_BLOCK_SIZE; | 686 | size = BM_BLOCK_SIZE; |
| 728 | 687 | ||
| 729 | if (drbd_try_rs_begin_io(mdev, sector)) { | 688 | if (drbd_rs_should_slow_down(mdev, sector) || |
| 689 | drbd_try_rs_begin_io(mdev, sector)) { | ||
| 730 | mdev->ov_position = sector; | 690 | mdev->ov_position = sector; |
| 731 | goto requeue; | 691 | goto requeue; |
| 732 | } | 692 | } |
| @@ -744,11 +704,33 @@ static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int ca | |||
| 744 | mdev->ov_position = sector; | 704 | mdev->ov_position = sector; |
| 745 | 705 | ||
| 746 | requeue: | 706 | requeue: |
| 707 | mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9)); | ||
| 747 | mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME); | 708 | mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME); |
| 748 | return 1; | 709 | return 1; |
| 749 | } | 710 | } |
| 750 | 711 | ||
| 751 | 712 | ||
| 713 | void start_resync_timer_fn(unsigned long data) | ||
| 714 | { | ||
| 715 | struct drbd_conf *mdev = (struct drbd_conf *) data; | ||
| 716 | |||
| 717 | drbd_queue_work(&mdev->data.work, &mdev->start_resync_work); | ||
| 718 | } | ||
| 719 | |||
| 720 | int w_start_resync(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | ||
| 721 | { | ||
| 722 | if (atomic_read(&mdev->unacked_cnt) || atomic_read(&mdev->rs_pending_cnt)) { | ||
| 723 | dev_warn(DEV, "w_start_resync later...\n"); | ||
| 724 | mdev->start_resync_timer.expires = jiffies + HZ/10; | ||
| 725 | add_timer(&mdev->start_resync_timer); | ||
| 726 | return 1; | ||
| 727 | } | ||
| 728 | |||
| 729 | drbd_start_resync(mdev, C_SYNC_SOURCE); | ||
| 730 | clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags); | ||
| 731 | return 1; | ||
| 732 | } | ||
| 733 | |||
| 752 | int w_ov_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | 734 | int w_ov_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel) |
| 753 | { | 735 | { |
| 754 | kfree(w); | 736 | kfree(w); |
| @@ -782,6 +764,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) | |||
| 782 | union drbd_state os, ns; | 764 | union drbd_state os, ns; |
| 783 | struct drbd_work *w; | 765 | struct drbd_work *w; |
| 784 | char *khelper_cmd = NULL; | 766 | char *khelper_cmd = NULL; |
| 767 | int verify_done = 0; | ||
| 785 | 768 | ||
| 786 | /* Remove all elements from the resync LRU. Since future actions | 769 | /* Remove all elements from the resync LRU. Since future actions |
| 787 | * might set bits in the (main) bitmap, then the entries in the | 770 | * might set bits in the (main) bitmap, then the entries in the |
| @@ -792,8 +775,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) | |||
| 792 | * queue (or even the read operations for those packets | 775 | * queue (or even the read operations for those packets |
| 793 | * is not finished by now). Retry in 100ms. */ | 776 | * is not finished by now). Retry in 100ms. */ |
| 794 | 777 | ||
| 795 | __set_current_state(TASK_INTERRUPTIBLE); | 778 | schedule_timeout_interruptible(HZ / 10); |
| 796 | schedule_timeout(HZ / 10); | ||
| 797 | w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC); | 779 | w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC); |
| 798 | if (w) { | 780 | if (w) { |
| 799 | w->cb = w_resync_finished; | 781 | w->cb = w_resync_finished; |
| @@ -818,6 +800,8 @@ int drbd_resync_finished(struct drbd_conf *mdev) | |||
| 818 | spin_lock_irq(&mdev->req_lock); | 800 | spin_lock_irq(&mdev->req_lock); |
| 819 | os = mdev->state; | 801 | os = mdev->state; |
| 820 | 802 | ||
| 803 | verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T); | ||
| 804 | |||
| 821 | /* This protects us against multiple calls (that can happen in the presence | 805 | /* This protects us against multiple calls (that can happen in the presence |
| 822 | of application IO), and against connectivity loss just before we arrive here. */ | 806 | of application IO), and against connectivity loss just before we arrive here. */ |
| 823 | if (os.conn <= C_CONNECTED) | 807 | if (os.conn <= C_CONNECTED) |
| @@ -827,8 +811,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) | |||
| 827 | ns.conn = C_CONNECTED; | 811 | ns.conn = C_CONNECTED; |
| 828 | 812 | ||
| 829 | dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n", | 813 | dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n", |
| 830 | (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) ? | 814 | verify_done ? "Online verify " : "Resync", |
| 831 | "Online verify " : "Resync", | ||
| 832 | dt + mdev->rs_paused, mdev->rs_paused, dbdt); | 815 | dt + mdev->rs_paused, mdev->rs_paused, dbdt); |
| 833 | 816 | ||
| 834 | n_oos = drbd_bm_total_weight(mdev); | 817 | n_oos = drbd_bm_total_weight(mdev); |
| @@ -886,14 +869,18 @@ int drbd_resync_finished(struct drbd_conf *mdev) | |||
| 886 | } | 869 | } |
| 887 | } | 870 | } |
| 888 | 871 | ||
| 889 | drbd_uuid_set_bm(mdev, 0UL); | 872 | if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) { |
| 890 | 873 | /* for verify runs, we don't update uuids here, | |
| 891 | if (mdev->p_uuid) { | 874 | * so there would be nothing to report. */ |
| 892 | /* Now the two UUID sets are equal, update what we | 875 | drbd_uuid_set_bm(mdev, 0UL); |
| 893 | * know of the peer. */ | 876 | drbd_print_uuids(mdev, "updated UUIDs"); |
| 894 | int i; | 877 | if (mdev->p_uuid) { |
| 895 | for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++) | 878 | /* Now the two UUID sets are equal, update what we |
| 896 | mdev->p_uuid[i] = mdev->ldev->md.uuid[i]; | 879 | * know of the peer. */ |
| 880 | int i; | ||
| 881 | for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++) | ||
| 882 | mdev->p_uuid[i] = mdev->ldev->md.uuid[i]; | ||
| 883 | } | ||
| 897 | } | 884 | } |
| 898 | } | 885 | } |
| 899 | 886 | ||
| @@ -905,15 +892,11 @@ out: | |||
| 905 | mdev->rs_total = 0; | 892 | mdev->rs_total = 0; |
| 906 | mdev->rs_failed = 0; | 893 | mdev->rs_failed = 0; |
| 907 | mdev->rs_paused = 0; | 894 | mdev->rs_paused = 0; |
| 908 | mdev->ov_start_sector = 0; | 895 | if (verify_done) |
| 896 | mdev->ov_start_sector = 0; | ||
| 909 | 897 | ||
| 910 | drbd_md_sync(mdev); | 898 | drbd_md_sync(mdev); |
| 911 | 899 | ||
| 912 | if (test_and_clear_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags)) { | ||
| 913 | dev_info(DEV, "Writing the whole bitmap\n"); | ||
| 914 | drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, "write from resync_finished"); | ||
| 915 | } | ||
| 916 | |||
| 917 | if (khelper_cmd) | 900 | if (khelper_cmd) |
| 918 | drbd_khelper(mdev, khelper_cmd); | 901 | drbd_khelper(mdev, khelper_cmd); |
| 919 | 902 | ||
| @@ -994,7 +977,9 @@ int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | |||
| 994 | put_ldev(mdev); | 977 | put_ldev(mdev); |
| 995 | } | 978 | } |
| 996 | 979 | ||
| 997 | if (likely((e->flags & EE_WAS_ERROR) == 0)) { | 980 | if (mdev->state.conn == C_AHEAD) { |
| 981 | ok = drbd_send_ack(mdev, P_RS_CANCEL, e); | ||
| 982 | } else if (likely((e->flags & EE_WAS_ERROR) == 0)) { | ||
| 998 | if (likely(mdev->state.pdsk >= D_INCONSISTENT)) { | 983 | if (likely(mdev->state.pdsk >= D_INCONSISTENT)) { |
| 999 | inc_rs_pending(mdev); | 984 | inc_rs_pending(mdev); |
| 1000 | ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e); | 985 | ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e); |
| @@ -1096,25 +1081,27 @@ int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | |||
| 1096 | if (unlikely(cancel)) | 1081 | if (unlikely(cancel)) |
| 1097 | goto out; | 1082 | goto out; |
| 1098 | 1083 | ||
| 1099 | if (unlikely((e->flags & EE_WAS_ERROR) != 0)) | ||
| 1100 | goto out; | ||
| 1101 | |||
| 1102 | digest_size = crypto_hash_digestsize(mdev->verify_tfm); | 1084 | digest_size = crypto_hash_digestsize(mdev->verify_tfm); |
| 1103 | /* FIXME if this allocation fails, online verify will not terminate! */ | ||
| 1104 | digest = kmalloc(digest_size, GFP_NOIO); | 1085 | digest = kmalloc(digest_size, GFP_NOIO); |
| 1105 | if (digest) { | 1086 | if (!digest) { |
| 1106 | drbd_csum_ee(mdev, mdev->verify_tfm, e, digest); | 1087 | ok = 0; /* terminate the connection in case the allocation failed */ |
| 1107 | inc_rs_pending(mdev); | 1088 | goto out; |
| 1108 | ok = drbd_send_drequest_csum(mdev, e->sector, e->size, | ||
| 1109 | digest, digest_size, P_OV_REPLY); | ||
| 1110 | if (!ok) | ||
| 1111 | dec_rs_pending(mdev); | ||
| 1112 | kfree(digest); | ||
| 1113 | } | 1089 | } |
| 1114 | 1090 | ||
| 1091 | if (likely(!(e->flags & EE_WAS_ERROR))) | ||
| 1092 | drbd_csum_ee(mdev, mdev->verify_tfm, e, digest); | ||
| 1093 | else | ||
| 1094 | memset(digest, 0, digest_size); | ||
| 1095 | |||
| 1096 | inc_rs_pending(mdev); | ||
| 1097 | ok = drbd_send_drequest_csum(mdev, e->sector, e->size, | ||
| 1098 | digest, digest_size, P_OV_REPLY); | ||
| 1099 | if (!ok) | ||
| 1100 | dec_rs_pending(mdev); | ||
| 1101 | kfree(digest); | ||
| 1102 | |||
| 1115 | out: | 1103 | out: |
| 1116 | drbd_free_ee(mdev, e); | 1104 | drbd_free_ee(mdev, e); |
| 1117 | |||
| 1118 | dec_unacked(mdev); | 1105 | dec_unacked(mdev); |
| 1119 | 1106 | ||
| 1120 | return ok; | 1107 | return ok; |
| @@ -1129,7 +1116,6 @@ void drbd_ov_oos_found(struct drbd_conf *mdev, sector_t sector, int size) | |||
| 1129 | mdev->ov_last_oos_size = size>>9; | 1116 | mdev->ov_last_oos_size = size>>9; |
| 1130 | } | 1117 | } |
| 1131 | drbd_set_out_of_sync(mdev, sector, size); | 1118 | drbd_set_out_of_sync(mdev, sector, size); |
| 1132 | set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags); | ||
| 1133 | } | 1119 | } |
| 1134 | 1120 | ||
| 1135 | int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | 1121 | int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) |
| @@ -1165,10 +1151,6 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | |||
| 1165 | eq = !memcmp(digest, di->digest, digest_size); | 1151 | eq = !memcmp(digest, di->digest, digest_size); |
| 1166 | kfree(digest); | 1152 | kfree(digest); |
| 1167 | } | 1153 | } |
| 1168 | } else { | ||
| 1169 | ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e); | ||
| 1170 | if (__ratelimit(&drbd_ratelimit_state)) | ||
| 1171 | dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n"); | ||
| 1172 | } | 1154 | } |
| 1173 | 1155 | ||
| 1174 | dec_unacked(mdev); | 1156 | dec_unacked(mdev); |
| @@ -1182,7 +1164,13 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | |||
| 1182 | 1164 | ||
| 1183 | drbd_free_ee(mdev, e); | 1165 | drbd_free_ee(mdev, e); |
| 1184 | 1166 | ||
| 1185 | if (--mdev->ov_left == 0) { | 1167 | --mdev->ov_left; |
| 1168 | |||
| 1169 | /* let's advance progress step marks only for every other megabyte */ | ||
| 1170 | if ((mdev->ov_left & 0x200) == 0x200) | ||
| 1171 | drbd_advance_rs_marks(mdev, mdev->ov_left); | ||
| 1172 | |||
| 1173 | if (mdev->ov_left == 0) { | ||
| 1186 | ov_oos_print(mdev); | 1174 | ov_oos_print(mdev); |
| 1187 | drbd_resync_finished(mdev); | 1175 | drbd_resync_finished(mdev); |
| 1188 | } | 1176 | } |
| @@ -1235,6 +1223,22 @@ int w_send_write_hint(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | |||
| 1235 | return drbd_send_short_cmd(mdev, P_UNPLUG_REMOTE); | 1223 | return drbd_send_short_cmd(mdev, P_UNPLUG_REMOTE); |
| 1236 | } | 1224 | } |
| 1237 | 1225 | ||
| 1226 | int w_send_oos(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | ||
| 1227 | { | ||
| 1228 | struct drbd_request *req = container_of(w, struct drbd_request, w); | ||
| 1229 | int ok; | ||
| 1230 | |||
| 1231 | if (unlikely(cancel)) { | ||
| 1232 | req_mod(req, send_canceled); | ||
| 1233 | return 1; | ||
| 1234 | } | ||
| 1235 | |||
| 1236 | ok = drbd_send_oos(mdev, req); | ||
| 1237 | req_mod(req, oos_handed_to_network); | ||
| 1238 | |||
| 1239 | return ok; | ||
| 1240 | } | ||
| 1241 | |||
| 1238 | /** | 1242 | /** |
| 1239 | * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request | 1243 | * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request |
| 1240 | * @mdev: DRBD device. | 1244 | * @mdev: DRBD device. |
| @@ -1430,6 +1434,17 @@ int drbd_alter_sa(struct drbd_conf *mdev, int na) | |||
| 1430 | return retcode; | 1434 | return retcode; |
| 1431 | } | 1435 | } |
| 1432 | 1436 | ||
| 1437 | void drbd_rs_controller_reset(struct drbd_conf *mdev) | ||
| 1438 | { | ||
| 1439 | atomic_set(&mdev->rs_sect_in, 0); | ||
| 1440 | atomic_set(&mdev->rs_sect_ev, 0); | ||
| 1441 | mdev->rs_in_flight = 0; | ||
| 1442 | mdev->rs_planed = 0; | ||
| 1443 | spin_lock(&mdev->peer_seq_lock); | ||
| 1444 | fifo_set(&mdev->rs_plan_s, 0); | ||
| 1445 | spin_unlock(&mdev->peer_seq_lock); | ||
| 1446 | } | ||
| 1447 | |||
| 1433 | /** | 1448 | /** |
| 1434 | * drbd_start_resync() - Start the resync process | 1449 | * drbd_start_resync() - Start the resync process |
| 1435 | * @mdev: DRBD device. | 1450 | * @mdev: DRBD device. |
| @@ -1443,13 +1458,18 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) | |||
| 1443 | union drbd_state ns; | 1458 | union drbd_state ns; |
| 1444 | int r; | 1459 | int r; |
| 1445 | 1460 | ||
| 1446 | if (mdev->state.conn >= C_SYNC_SOURCE) { | 1461 | if (mdev->state.conn >= C_SYNC_SOURCE && mdev->state.conn < C_AHEAD) { |
| 1447 | dev_err(DEV, "Resync already running!\n"); | 1462 | dev_err(DEV, "Resync already running!\n"); |
| 1448 | return; | 1463 | return; |
| 1449 | } | 1464 | } |
| 1450 | 1465 | ||
| 1451 | /* In case a previous resync run was aborted by an IO error/detach on the peer. */ | 1466 | if (mdev->state.conn < C_AHEAD) { |
| 1452 | drbd_rs_cancel_all(mdev); | 1467 | /* In case a previous resync run was aborted by an IO error/detach on the peer. */ |
| 1468 | drbd_rs_cancel_all(mdev); | ||
| 1469 | /* This should be done when we abort the resync. We definitely do not | ||
| 1470 | want to have this for connections going back and forth between | ||
| 1471 | Ahead/Behind and SyncSource/SyncTarget */ | ||
| 1472 | } | ||
| 1453 | 1473 | ||
| 1454 | if (side == C_SYNC_TARGET) { | 1474 | if (side == C_SYNC_TARGET) { |
| 1455 | /* Since application IO was locked out during C_WF_BITMAP_T and | 1475 | /* Since application IO was locked out during C_WF_BITMAP_T and |
| @@ -1463,6 +1483,20 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) | |||
| 1463 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); | 1483 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); |
| 1464 | return; | 1484 | return; |
| 1465 | } | 1485 | } |
| 1486 | } else /* C_SYNC_SOURCE */ { | ||
| 1487 | r = drbd_khelper(mdev, "before-resync-source"); | ||
| 1488 | r = (r >> 8) & 0xff; | ||
| 1489 | if (r > 0) { | ||
| 1490 | if (r == 3) { | ||
| 1491 | dev_info(DEV, "before-resync-source handler returned %d, " | ||
| 1492 | "ignoring. Old userland tools?", r); | ||
| 1493 | } else { | ||
| 1494 | dev_info(DEV, "before-resync-source handler returned %d, " | ||
| 1495 | "dropping connection.\n", r); | ||
| 1496 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); | ||
| 1497 | return; | ||
| 1498 | } | ||
| 1499 | } | ||
| 1466 | } | 1500 | } |
| 1467 | 1501 | ||
| 1468 | drbd_state_lock(mdev); | 1502 | drbd_state_lock(mdev); |
| @@ -1472,18 +1506,6 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) | |||
| 1472 | return; | 1506 | return; |
| 1473 | } | 1507 | } |
| 1474 | 1508 | ||
| 1475 | if (side == C_SYNC_TARGET) { | ||
| 1476 | mdev->bm_resync_fo = 0; | ||
| 1477 | } else /* side == C_SYNC_SOURCE */ { | ||
| 1478 | u64 uuid; | ||
| 1479 | |||
| 1480 | get_random_bytes(&uuid, sizeof(u64)); | ||
| 1481 | drbd_uuid_set(mdev, UI_BITMAP, uuid); | ||
| 1482 | drbd_send_sync_uuid(mdev, uuid); | ||
| 1483 | |||
| 1484 | D_ASSERT(mdev->state.disk == D_UP_TO_DATE); | ||
| 1485 | } | ||
| 1486 | |||
| 1487 | write_lock_irq(&global_state_lock); | 1509 | write_lock_irq(&global_state_lock); |
| 1488 | ns = mdev->state; | 1510 | ns = mdev->state; |
| 1489 | 1511 | ||
| @@ -1521,13 +1543,24 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) | |||
| 1521 | _drbd_pause_after(mdev); | 1543 | _drbd_pause_after(mdev); |
| 1522 | } | 1544 | } |
| 1523 | write_unlock_irq(&global_state_lock); | 1545 | write_unlock_irq(&global_state_lock); |
| 1524 | put_ldev(mdev); | ||
| 1525 | 1546 | ||
| 1526 | if (r == SS_SUCCESS) { | 1547 | if (r == SS_SUCCESS) { |
| 1527 | dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n", | 1548 | dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n", |
| 1528 | drbd_conn_str(ns.conn), | 1549 | drbd_conn_str(ns.conn), |
| 1529 | (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10), | 1550 | (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10), |
| 1530 | (unsigned long) mdev->rs_total); | 1551 | (unsigned long) mdev->rs_total); |
| 1552 | if (side == C_SYNC_TARGET) | ||
| 1553 | mdev->bm_resync_fo = 0; | ||
| 1554 | |||
| 1555 | /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid | ||
| 1556 | * with w_send_oos, or the sync target will get confused as to | ||
| 1557 | * how much bits to resync. We cannot do that always, because for an | ||
| 1558 | * empty resync and protocol < 95, we need to do it here, as we call | ||
| 1559 | * drbd_resync_finished from here in that case. | ||
| 1560 | * We drbd_gen_and_send_sync_uuid here for protocol < 96, | ||
| 1561 | * and from after_state_ch otherwise. */ | ||
| 1562 | if (side == C_SYNC_SOURCE && mdev->agreed_pro_version < 96) | ||
| 1563 | drbd_gen_and_send_sync_uuid(mdev); | ||
| 1531 | 1564 | ||
| 1532 | if (mdev->agreed_pro_version < 95 && mdev->rs_total == 0) { | 1565 | if (mdev->agreed_pro_version < 95 && mdev->rs_total == 0) { |
| 1533 | /* This still has a race (about when exactly the peers | 1566 | /* This still has a race (about when exactly the peers |
| @@ -1547,13 +1580,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) | |||
| 1547 | drbd_resync_finished(mdev); | 1580 | drbd_resync_finished(mdev); |
| 1548 | } | 1581 | } |
| 1549 | 1582 | ||
| 1550 | atomic_set(&mdev->rs_sect_in, 0); | 1583 | drbd_rs_controller_reset(mdev); |
| 1551 | atomic_set(&mdev->rs_sect_ev, 0); | ||
| 1552 | mdev->rs_in_flight = 0; | ||
| 1553 | mdev->rs_planed = 0; | ||
| 1554 | spin_lock(&mdev->peer_seq_lock); | ||
| 1555 | fifo_set(&mdev->rs_plan_s, 0); | ||
| 1556 | spin_unlock(&mdev->peer_seq_lock); | ||
| 1557 | /* ns.conn may already be != mdev->state.conn, | 1584 | /* ns.conn may already be != mdev->state.conn, |
| 1558 | * we may have been paused in between, or become paused until | 1585 | * we may have been paused in between, or become paused until |
| 1559 | * the timer triggers. | 1586 | * the timer triggers. |
| @@ -1563,6 +1590,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) | |||
| 1563 | 1590 | ||
| 1564 | drbd_md_sync(mdev); | 1591 | drbd_md_sync(mdev); |
| 1565 | } | 1592 | } |
| 1593 | put_ldev(mdev); | ||
| 1566 | drbd_state_unlock(mdev); | 1594 | drbd_state_unlock(mdev); |
| 1567 | } | 1595 | } |
| 1568 | 1596 | ||
diff --git a/drivers/block/drbd/drbd_wrappers.h b/drivers/block/drbd/drbd_wrappers.h index 53586fa5ae1b..151f1a37478f 100644 --- a/drivers/block/drbd/drbd_wrappers.h +++ b/drivers/block/drbd/drbd_wrappers.h | |||
| @@ -39,7 +39,7 @@ static inline void drbd_generic_make_request(struct drbd_conf *mdev, | |||
| 39 | return; | 39 | return; |
| 40 | } | 40 | } |
| 41 | 41 | ||
| 42 | if (FAULT_ACTIVE(mdev, fault_type)) | 42 | if (drbd_insert_fault(mdev, fault_type)) |
| 43 | bio_endio(bio, -EIO); | 43 | bio_endio(bio, -EIO); |
| 44 | else | 44 | else |
| 45 | generic_make_request(bio); | 45 | generic_make_request(bio); |
diff --git a/include/linux/drbd.h b/include/linux/drbd.h index ef44c7a0638c..d18d673ebc78 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h | |||
| @@ -53,10 +53,10 @@ | |||
| 53 | 53 | ||
| 54 | 54 | ||
| 55 | extern const char *drbd_buildtag(void); | 55 | extern const char *drbd_buildtag(void); |
| 56 | #define REL_VERSION "8.3.9" | 56 | #define REL_VERSION "8.3.10" |
| 57 | #define API_VERSION 88 | 57 | #define API_VERSION 88 |
| 58 | #define PRO_VERSION_MIN 86 | 58 | #define PRO_VERSION_MIN 86 |
| 59 | #define PRO_VERSION_MAX 95 | 59 | #define PRO_VERSION_MAX 96 |
| 60 | 60 | ||
| 61 | 61 | ||
| 62 | enum drbd_io_error_p { | 62 | enum drbd_io_error_p { |
| @@ -96,8 +96,14 @@ enum drbd_on_no_data { | |||
| 96 | OND_SUSPEND_IO | 96 | OND_SUSPEND_IO |
| 97 | }; | 97 | }; |
| 98 | 98 | ||
| 99 | enum drbd_on_congestion { | ||
| 100 | OC_BLOCK, | ||
| 101 | OC_PULL_AHEAD, | ||
| 102 | OC_DISCONNECT, | ||
| 103 | }; | ||
| 104 | |||
| 99 | /* KEEP the order, do not delete or insert. Only append. */ | 105 | /* KEEP the order, do not delete or insert. Only append. */ |
| 100 | enum drbd_ret_codes { | 106 | enum drbd_ret_code { |
| 101 | ERR_CODE_BASE = 100, | 107 | ERR_CODE_BASE = 100, |
| 102 | NO_ERROR = 101, | 108 | NO_ERROR = 101, |
| 103 | ERR_LOCAL_ADDR = 102, | 109 | ERR_LOCAL_ADDR = 102, |
| @@ -146,6 +152,9 @@ enum drbd_ret_codes { | |||
| 146 | ERR_PERM = 152, | 152 | ERR_PERM = 152, |
| 147 | ERR_NEED_APV_93 = 153, | 153 | ERR_NEED_APV_93 = 153, |
| 148 | ERR_STONITH_AND_PROT_A = 154, | 154 | ERR_STONITH_AND_PROT_A = 154, |
| 155 | ERR_CONG_NOT_PROTO_A = 155, | ||
| 156 | ERR_PIC_AFTER_DEP = 156, | ||
| 157 | ERR_PIC_PEER_DEP = 157, | ||
| 149 | 158 | ||
| 150 | /* insert new ones above this line */ | 159 | /* insert new ones above this line */ |
| 151 | AFTER_LAST_ERR_CODE | 160 | AFTER_LAST_ERR_CODE |
| @@ -199,6 +208,10 @@ enum drbd_conns { | |||
| 199 | C_VERIFY_T, | 208 | C_VERIFY_T, |
| 200 | C_PAUSED_SYNC_S, | 209 | C_PAUSED_SYNC_S, |
| 201 | C_PAUSED_SYNC_T, | 210 | C_PAUSED_SYNC_T, |
| 211 | |||
| 212 | C_AHEAD, | ||
| 213 | C_BEHIND, | ||
| 214 | |||
| 202 | C_MASK = 31 | 215 | C_MASK = 31 |
| 203 | }; | 216 | }; |
| 204 | 217 | ||
| @@ -259,7 +272,7 @@ union drbd_state { | |||
| 259 | unsigned int i; | 272 | unsigned int i; |
| 260 | }; | 273 | }; |
| 261 | 274 | ||
| 262 | enum drbd_state_ret_codes { | 275 | enum drbd_state_rv { |
| 263 | SS_CW_NO_NEED = 4, | 276 | SS_CW_NO_NEED = 4, |
| 264 | SS_CW_SUCCESS = 3, | 277 | SS_CW_SUCCESS = 3, |
| 265 | SS_NOTHING_TO_DO = 2, | 278 | SS_NOTHING_TO_DO = 2, |
| @@ -290,7 +303,7 @@ enum drbd_state_ret_codes { | |||
| 290 | extern const char *drbd_conn_str(enum drbd_conns); | 303 | extern const char *drbd_conn_str(enum drbd_conns); |
| 291 | extern const char *drbd_role_str(enum drbd_role); | 304 | extern const char *drbd_role_str(enum drbd_role); |
| 292 | extern const char *drbd_disk_str(enum drbd_disk_state); | 305 | extern const char *drbd_disk_str(enum drbd_disk_state); |
| 293 | extern const char *drbd_set_st_err_str(enum drbd_state_ret_codes); | 306 | extern const char *drbd_set_st_err_str(enum drbd_state_rv); |
| 294 | 307 | ||
| 295 | #define SHARED_SECRET_MAX 64 | 308 | #define SHARED_SECRET_MAX 64 |
| 296 | 309 | ||
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 4ac33f34b77e..bb264a5732de 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h | |||
| @@ -16,7 +16,8 @@ | |||
| 16 | #define DEBUG_RANGE_CHECK 0 | 16 | #define DEBUG_RANGE_CHECK 0 |
| 17 | 17 | ||
| 18 | #define DRBD_MINOR_COUNT_MIN 1 | 18 | #define DRBD_MINOR_COUNT_MIN 1 |
| 19 | #define DRBD_MINOR_COUNT_MAX 255 | 19 | #define DRBD_MINOR_COUNT_MAX 256 |
| 20 | #define DRBD_MINOR_COUNT_DEF 32 | ||
| 20 | 21 | ||
| 21 | #define DRBD_DIALOG_REFRESH_MIN 0 | 22 | #define DRBD_DIALOG_REFRESH_MIN 0 |
| 22 | #define DRBD_DIALOG_REFRESH_MAX 600 | 23 | #define DRBD_DIALOG_REFRESH_MAX 600 |
| @@ -129,6 +130,7 @@ | |||
| 129 | #define DRBD_AFTER_SB_2P_DEF ASB_DISCONNECT | 130 | #define DRBD_AFTER_SB_2P_DEF ASB_DISCONNECT |
| 130 | #define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT | 131 | #define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT |
| 131 | #define DRBD_ON_NO_DATA_DEF OND_IO_ERROR | 132 | #define DRBD_ON_NO_DATA_DEF OND_IO_ERROR |
| 133 | #define DRBD_ON_CONGESTION_DEF OC_BLOCK | ||
| 132 | 134 | ||
| 133 | #define DRBD_MAX_BIO_BVECS_MIN 0 | 135 | #define DRBD_MAX_BIO_BVECS_MIN 0 |
| 134 | #define DRBD_MAX_BIO_BVECS_MAX 128 | 136 | #define DRBD_MAX_BIO_BVECS_MAX 128 |
| @@ -154,5 +156,13 @@ | |||
| 154 | #define DRBD_C_MIN_RATE_MAX (4 << 20) | 156 | #define DRBD_C_MIN_RATE_MAX (4 << 20) |
| 155 | #define DRBD_C_MIN_RATE_DEF 4096 | 157 | #define DRBD_C_MIN_RATE_DEF 4096 |
| 156 | 158 | ||
| 159 | #define DRBD_CONG_FILL_MIN 0 | ||
| 160 | #define DRBD_CONG_FILL_MAX (10<<21) /* 10GByte in sectors */ | ||
| 161 | #define DRBD_CONG_FILL_DEF 0 | ||
| 162 | |||
| 163 | #define DRBD_CONG_EXTENTS_MIN DRBD_AL_EXTENTS_MIN | ||
| 164 | #define DRBD_CONG_EXTENTS_MAX DRBD_AL_EXTENTS_MAX | ||
| 165 | #define DRBD_CONG_EXTENTS_DEF DRBD_AL_EXTENTS_DEF | ||
| 166 | |||
| 157 | #undef RANGE | 167 | #undef RANGE |
| 158 | #endif | 168 | #endif |
diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h index ade91107c9a5..ab6159e4fcf0 100644 --- a/include/linux/drbd_nl.h +++ b/include/linux/drbd_nl.h | |||
| @@ -56,6 +56,9 @@ NL_PACKET(net_conf, 5, | |||
| 56 | NL_INTEGER( 39, T_MAY_IGNORE, rr_conflict) | 56 | NL_INTEGER( 39, T_MAY_IGNORE, rr_conflict) |
| 57 | NL_INTEGER( 40, T_MAY_IGNORE, ping_timeo) | 57 | NL_INTEGER( 40, T_MAY_IGNORE, ping_timeo) |
| 58 | NL_INTEGER( 67, T_MAY_IGNORE, rcvbuf_size) | 58 | NL_INTEGER( 67, T_MAY_IGNORE, rcvbuf_size) |
| 59 | NL_INTEGER( 81, T_MAY_IGNORE, on_congestion) | ||
| 60 | NL_INTEGER( 82, T_MAY_IGNORE, cong_fill) | ||
| 61 | NL_INTEGER( 83, T_MAY_IGNORE, cong_extents) | ||
| 59 | /* 59 addr_family was available in GIT, never released */ | 62 | /* 59 addr_family was available in GIT, never released */ |
| 60 | NL_BIT( 60, T_MANDATORY, mind_af) | 63 | NL_BIT( 60, T_MANDATORY, mind_af) |
| 61 | NL_BIT( 27, T_MAY_IGNORE, want_lose) | 64 | NL_BIT( 27, T_MAY_IGNORE, want_lose) |
| @@ -66,7 +69,9 @@ NL_PACKET(net_conf, 5, | |||
| 66 | NL_BIT( 70, T_MANDATORY, dry_run) | 69 | NL_BIT( 70, T_MANDATORY, dry_run) |
| 67 | ) | 70 | ) |
| 68 | 71 | ||
| 69 | NL_PACKET(disconnect, 6, ) | 72 | NL_PACKET(disconnect, 6, |
| 73 | NL_BIT( 84, T_MAY_IGNORE, force) | ||
| 74 | ) | ||
| 70 | 75 | ||
| 71 | NL_PACKET(resize, 7, | 76 | NL_PACKET(resize, 7, |
| 72 | NL_INT64( 29, T_MAY_IGNORE, resize_size) | 77 | NL_INT64( 29, T_MAY_IGNORE, resize_size) |
| @@ -143,9 +148,13 @@ NL_PACKET(new_c_uuid, 26, | |||
| 143 | NL_BIT( 63, T_MANDATORY, clear_bm) | 148 | NL_BIT( 63, T_MANDATORY, clear_bm) |
| 144 | ) | 149 | ) |
| 145 | 150 | ||
| 151 | #ifdef NL_RESPONSE | ||
| 152 | NL_RESPONSE(return_code_only, 27) | ||
| 153 | #endif | ||
| 154 | |||
| 146 | #undef NL_PACKET | 155 | #undef NL_PACKET |
| 147 | #undef NL_INTEGER | 156 | #undef NL_INTEGER |
| 148 | #undef NL_INT64 | 157 | #undef NL_INT64 |
| 149 | #undef NL_BIT | 158 | #undef NL_BIT |
| 150 | #undef NL_STRING | 159 | #undef NL_STRING |
| 151 | 160 | #undef NL_RESPONSE | |
diff --git a/include/linux/drbd_tag_magic.h b/include/linux/drbd_tag_magic.h index fcdff8410e99..f14a165e82dc 100644 --- a/include/linux/drbd_tag_magic.h +++ b/include/linux/drbd_tag_magic.h | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | /* declare packet_type enums */ | 7 | /* declare packet_type enums */ |
| 8 | enum packet_types { | 8 | enum packet_types { |
| 9 | #define NL_PACKET(name, number, fields) P_ ## name = number, | 9 | #define NL_PACKET(name, number, fields) P_ ## name = number, |
| 10 | #define NL_RESPONSE(name, number) P_ ## name = number, | ||
| 10 | #define NL_INTEGER(pn, pr, member) | 11 | #define NL_INTEGER(pn, pr, member) |
| 11 | #define NL_INT64(pn, pr, member) | 12 | #define NL_INT64(pn, pr, member) |
| 12 | #define NL_BIT(pn, pr, member) | 13 | #define NL_BIT(pn, pr, member) |
