diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-25 12:15:35 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-25 12:15:35 -0400 |
commit | 929cfdd5d3bdc772aff32e5a3fb4e3894394aa75 (patch) | |
tree | f67202d079eaf1f8d65b2e1bfac70b768ae34bc4 | |
parent | 798ce8f1cca29dcc3f4b55947f611f4ffb32ac2b (diff) | |
parent | a1c15c59feee36267c43142a41152fbf7402afb6 (diff) |
Merge branch 'for-2.6.40/drivers' of git://git.kernel.dk/linux-2.6-block
* 'for-2.6.40/drivers' of git://git.kernel.dk/linux-2.6-block: (110 commits)
loop: handle on-demand devices correctly
loop: limit 'max_part' module param to DISK_MAX_PARTS
drbd: fix warning
drbd: fix warning
drbd: Fix spelling
drbd: fix schedule in atomic
drbd: Take a more conservative approach when deciding max_bio_size
drbd: Fixed state transitions after async outdate-peer-handler returned
drbd: Disallow the peer_disk_state to be D_OUTDATED while connected
drbd: Fix for the connection problems on high latency links
drbd: fix potential activity log refcount imbalance in error path
drbd: Only downgrade the disk state in case of disk failures
drbd: fix disconnect/reconnect loop, if ping-timeout == ping-int
drbd: fix potential distributed deadlock
lru_cache.h: fix comments referring to ts_ instead of lc_
drbd: Fix for application IO with the on-io-error=pass-on policy
xen/p2m: Add EXPORT_SYMBOL_GPL to the M2P override functions.
xen/p2m/m2p/gnttab: Support GNTMAP_host_map in the M2P override.
xen/blkback: don't fail empty barrier requests
xen/blkback: fix xenbus_transaction_start() hang caused by double xenbus_transaction_end()
...
27 files changed, 2677 insertions, 307 deletions
diff --git a/Documentation/blockdev/cciss.txt b/Documentation/blockdev/cciss.txt index 89698e8df7d4..c00c6a5ab21f 100644 --- a/Documentation/blockdev/cciss.txt +++ b/Documentation/blockdev/cciss.txt | |||
@@ -169,3 +169,18 @@ is issued which positions the tape to a known position. Typically you | |||
169 | must rewind the tape (by issuing "mt -f /dev/st0 rewind" for example) | 169 | must rewind the tape (by issuing "mt -f /dev/st0 rewind" for example) |
170 | before i/o can proceed again to a tape drive which was reset. | 170 | before i/o can proceed again to a tape drive which was reset. |
171 | 171 | ||
172 | There is a cciss_tape_cmds module parameter which can be used to make cciss | ||
173 | allocate more commands for use by tape drives. Ordinarily only a few commands | ||
174 | (6) are allocated for tape drives because tape drives are slow and | ||
175 | infrequently used and the primary purpose of Smart Array controllers is to | ||
176 | act as a RAID controller for disk drives, so the vast majority of commands | ||
177 | are allocated for disk devices. However, if you have more than a few tape | ||
178 | drives attached to a smart array, the default number of commands may not be | ||
179 | enought (for example, if you have 8 tape drives, you could only rewind 6 | ||
180 | at one time with the default number of commands.) The cciss_tape_cmds module | ||
181 | parameter allows more commands (up to 16 more) to be allocated for use by | ||
182 | tape drives. For example: | ||
183 | |||
184 | insmod cciss.ko cciss_tape_cmds=16 | ||
185 | |||
186 | Or, as a kernel boot parameter passed in via grub: cciss.cciss_tape_cmds=8 | ||
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 83c32cb72582..717d6e4e18d3 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig | |||
@@ -470,6 +470,27 @@ config XEN_BLKDEV_FRONTEND | |||
470 | block device driver. It communicates with a back-end driver | 470 | block device driver. It communicates with a back-end driver |
471 | in another domain which drives the actual block device. | 471 | in another domain which drives the actual block device. |
472 | 472 | ||
473 | config XEN_BLKDEV_BACKEND | ||
474 | tristate "Block-device backend driver" | ||
475 | depends on XEN_BACKEND | ||
476 | help | ||
477 | The block-device backend driver allows the kernel to export its | ||
478 | block devices to other guests via a high-performance shared-memory | ||
479 | interface. | ||
480 | |||
481 | The corresponding Linux frontend driver is enabled by the | ||
482 | CONFIG_XEN_BLKDEV_FRONTEND configuration option. | ||
483 | |||
484 | The backend driver attaches itself to a any block device specified | ||
485 | in the XenBus configuration. There are no limits to what the block | ||
486 | device as long as it has a major and minor. | ||
487 | |||
488 | If you are compiling a kernel to run in a Xen block backend driver | ||
489 | domain (often this is domain 0) you should say Y here. To | ||
490 | compile this driver as a module, chose M here: the module | ||
491 | will be called xen-blkback. | ||
492 | |||
493 | |||
473 | config VIRTIO_BLK | 494 | config VIRTIO_BLK |
474 | tristate "Virtio block driver (EXPERIMENTAL)" | 495 | tristate "Virtio block driver (EXPERIMENTAL)" |
475 | depends on EXPERIMENTAL && VIRTIO | 496 | depends on EXPERIMENTAL && VIRTIO |
diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 40528ba56d1b..76646e9a1c91 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile | |||
@@ -36,6 +36,7 @@ obj-$(CONFIG_BLK_DEV_UB) += ub.o | |||
36 | obj-$(CONFIG_BLK_DEV_HD) += hd.o | 36 | obj-$(CONFIG_BLK_DEV_HD) += hd.o |
37 | 37 | ||
38 | obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o | 38 | obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o |
39 | obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/ | ||
39 | obj-$(CONFIG_BLK_DEV_DRBD) += drbd/ | 40 | obj-$(CONFIG_BLK_DEV_DRBD) += drbd/ |
40 | obj-$(CONFIG_BLK_DEV_RBD) += rbd.o | 41 | obj-$(CONFIG_BLK_DEV_RBD) += rbd.o |
41 | 42 | ||
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 9bf13988f1a2..8f4ef656a1af 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c | |||
@@ -64,6 +64,10 @@ MODULE_DESCRIPTION("Driver for HP Smart Array Controllers"); | |||
64 | MODULE_SUPPORTED_DEVICE("HP Smart Array Controllers"); | 64 | MODULE_SUPPORTED_DEVICE("HP Smart Array Controllers"); |
65 | MODULE_VERSION("3.6.26"); | 65 | MODULE_VERSION("3.6.26"); |
66 | MODULE_LICENSE("GPL"); | 66 | MODULE_LICENSE("GPL"); |
67 | static int cciss_tape_cmds = 6; | ||
68 | module_param(cciss_tape_cmds, int, 0644); | ||
69 | MODULE_PARM_DESC(cciss_tape_cmds, | ||
70 | "number of commands to allocate for tape devices (default: 6)"); | ||
67 | 71 | ||
68 | static DEFINE_MUTEX(cciss_mutex); | 72 | static DEFINE_MUTEX(cciss_mutex); |
69 | static struct proc_dir_entry *proc_cciss; | 73 | static struct proc_dir_entry *proc_cciss; |
@@ -194,6 +198,8 @@ static int __devinit cciss_find_cfg_addrs(struct pci_dev *pdev, | |||
194 | static int __devinit cciss_pci_find_memory_BAR(struct pci_dev *pdev, | 198 | static int __devinit cciss_pci_find_memory_BAR(struct pci_dev *pdev, |
195 | unsigned long *memory_bar); | 199 | unsigned long *memory_bar); |
196 | static inline u32 cciss_tag_discard_error_bits(ctlr_info_t *h, u32 tag); | 200 | static inline u32 cciss_tag_discard_error_bits(ctlr_info_t *h, u32 tag); |
201 | static __devinit int write_driver_ver_to_cfgtable( | ||
202 | CfgTable_struct __iomem *cfgtable); | ||
197 | 203 | ||
198 | /* performant mode helper functions */ | 204 | /* performant mode helper functions */ |
199 | static void calc_bucket_map(int *bucket, int num_buckets, int nsgs, | 205 | static void calc_bucket_map(int *bucket, int num_buckets, int nsgs, |
@@ -556,7 +562,7 @@ static void __devinit cciss_procinit(ctlr_info_t *h) | |||
556 | #define to_hba(n) container_of(n, struct ctlr_info, dev) | 562 | #define to_hba(n) container_of(n, struct ctlr_info, dev) |
557 | #define to_drv(n) container_of(n, drive_info_struct, dev) | 563 | #define to_drv(n) container_of(n, drive_info_struct, dev) |
558 | 564 | ||
559 | /* List of controllers which cannot be reset on kexec with reset_devices */ | 565 | /* List of controllers which cannot be hard reset on kexec with reset_devices */ |
560 | static u32 unresettable_controller[] = { | 566 | static u32 unresettable_controller[] = { |
561 | 0x324a103C, /* Smart Array P712m */ | 567 | 0x324a103C, /* Smart Array P712m */ |
562 | 0x324b103C, /* SmartArray P711m */ | 568 | 0x324b103C, /* SmartArray P711m */ |
@@ -574,23 +580,45 @@ static u32 unresettable_controller[] = { | |||
574 | 0x409D0E11, /* Smart Array 6400 EM */ | 580 | 0x409D0E11, /* Smart Array 6400 EM */ |
575 | }; | 581 | }; |
576 | 582 | ||
577 | static int ctlr_is_resettable(struct ctlr_info *h) | 583 | /* List of controllers which cannot even be soft reset */ |
584 | static u32 soft_unresettable_controller[] = { | ||
585 | 0x409C0E11, /* Smart Array 6400 */ | ||
586 | 0x409D0E11, /* Smart Array 6400 EM */ | ||
587 | }; | ||
588 | |||
589 | static int ctlr_is_hard_resettable(u32 board_id) | ||
578 | { | 590 | { |
579 | int i; | 591 | int i; |
580 | 592 | ||
581 | for (i = 0; i < ARRAY_SIZE(unresettable_controller); i++) | 593 | for (i = 0; i < ARRAY_SIZE(unresettable_controller); i++) |
582 | if (unresettable_controller[i] == h->board_id) | 594 | if (unresettable_controller[i] == board_id) |
583 | return 0; | 595 | return 0; |
584 | return 1; | 596 | return 1; |
585 | } | 597 | } |
586 | 598 | ||
599 | static int ctlr_is_soft_resettable(u32 board_id) | ||
600 | { | ||
601 | int i; | ||
602 | |||
603 | for (i = 0; i < ARRAY_SIZE(soft_unresettable_controller); i++) | ||
604 | if (soft_unresettable_controller[i] == board_id) | ||
605 | return 0; | ||
606 | return 1; | ||
607 | } | ||
608 | |||
609 | static int ctlr_is_resettable(u32 board_id) | ||
610 | { | ||
611 | return ctlr_is_hard_resettable(board_id) || | ||
612 | ctlr_is_soft_resettable(board_id); | ||
613 | } | ||
614 | |||
587 | static ssize_t host_show_resettable(struct device *dev, | 615 | static ssize_t host_show_resettable(struct device *dev, |
588 | struct device_attribute *attr, | 616 | struct device_attribute *attr, |
589 | char *buf) | 617 | char *buf) |
590 | { | 618 | { |
591 | struct ctlr_info *h = to_hba(dev); | 619 | struct ctlr_info *h = to_hba(dev); |
592 | 620 | ||
593 | return snprintf(buf, 20, "%d\n", ctlr_is_resettable(h)); | 621 | return snprintf(buf, 20, "%d\n", ctlr_is_resettable(h->board_id)); |
594 | } | 622 | } |
595 | static DEVICE_ATTR(resettable, S_IRUGO, host_show_resettable, NULL); | 623 | static DEVICE_ATTR(resettable, S_IRUGO, host_show_resettable, NULL); |
596 | 624 | ||
@@ -2567,7 +2595,7 @@ static int fill_cmd(ctlr_info_t *h, CommandList_struct *c, __u8 cmd, void *buff, | |||
2567 | } | 2595 | } |
2568 | } else if (cmd_type == TYPE_MSG) { | 2596 | } else if (cmd_type == TYPE_MSG) { |
2569 | switch (cmd) { | 2597 | switch (cmd) { |
2570 | case 0: /* ABORT message */ | 2598 | case CCISS_ABORT_MSG: |
2571 | c->Request.CDBLen = 12; | 2599 | c->Request.CDBLen = 12; |
2572 | c->Request.Type.Attribute = ATTR_SIMPLE; | 2600 | c->Request.Type.Attribute = ATTR_SIMPLE; |
2573 | c->Request.Type.Direction = XFER_WRITE; | 2601 | c->Request.Type.Direction = XFER_WRITE; |
@@ -2577,16 +2605,16 @@ static int fill_cmd(ctlr_info_t *h, CommandList_struct *c, __u8 cmd, void *buff, | |||
2577 | /* buff contains the tag of the command to abort */ | 2605 | /* buff contains the tag of the command to abort */ |
2578 | memcpy(&c->Request.CDB[4], buff, 8); | 2606 | memcpy(&c->Request.CDB[4], buff, 8); |
2579 | break; | 2607 | break; |
2580 | case 1: /* RESET message */ | 2608 | case CCISS_RESET_MSG: |
2581 | c->Request.CDBLen = 16; | 2609 | c->Request.CDBLen = 16; |
2582 | c->Request.Type.Attribute = ATTR_SIMPLE; | 2610 | c->Request.Type.Attribute = ATTR_SIMPLE; |
2583 | c->Request.Type.Direction = XFER_NONE; | 2611 | c->Request.Type.Direction = XFER_NONE; |
2584 | c->Request.Timeout = 0; | 2612 | c->Request.Timeout = 0; |
2585 | memset(&c->Request.CDB[0], 0, sizeof(c->Request.CDB)); | 2613 | memset(&c->Request.CDB[0], 0, sizeof(c->Request.CDB)); |
2586 | c->Request.CDB[0] = cmd; /* reset */ | 2614 | c->Request.CDB[0] = cmd; /* reset */ |
2587 | c->Request.CDB[1] = 0x03; /* reset a target */ | 2615 | c->Request.CDB[1] = CCISS_RESET_TYPE_TARGET; |
2588 | break; | 2616 | break; |
2589 | case 3: /* No-Op message */ | 2617 | case CCISS_NOOP_MSG: |
2590 | c->Request.CDBLen = 1; | 2618 | c->Request.CDBLen = 1; |
2591 | c->Request.Type.Attribute = ATTR_SIMPLE; | 2619 | c->Request.Type.Attribute = ATTR_SIMPLE; |
2592 | c->Request.Type.Direction = XFER_WRITE; | 2620 | c->Request.Type.Direction = XFER_WRITE; |
@@ -2615,6 +2643,31 @@ static int fill_cmd(ctlr_info_t *h, CommandList_struct *c, __u8 cmd, void *buff, | |||
2615 | return status; | 2643 | return status; |
2616 | } | 2644 | } |
2617 | 2645 | ||
2646 | static int __devinit cciss_send_reset(ctlr_info_t *h, unsigned char *scsi3addr, | ||
2647 | u8 reset_type) | ||
2648 | { | ||
2649 | CommandList_struct *c; | ||
2650 | int return_status; | ||
2651 | |||
2652 | c = cmd_alloc(h); | ||
2653 | if (!c) | ||
2654 | return -ENOMEM; | ||
2655 | return_status = fill_cmd(h, c, CCISS_RESET_MSG, NULL, 0, 0, | ||
2656 | CTLR_LUNID, TYPE_MSG); | ||
2657 | c->Request.CDB[1] = reset_type; /* fill_cmd defaults to target reset */ | ||
2658 | if (return_status != IO_OK) { | ||
2659 | cmd_special_free(h, c); | ||
2660 | return return_status; | ||
2661 | } | ||
2662 | c->waiting = NULL; | ||
2663 | enqueue_cmd_and_start_io(h, c); | ||
2664 | /* Don't wait for completion, the reset won't complete. Don't free | ||
2665 | * the command either. This is the last command we will send before | ||
2666 | * re-initializing everything, so it doesn't matter and won't leak. | ||
2667 | */ | ||
2668 | return 0; | ||
2669 | } | ||
2670 | |||
2618 | static int check_target_status(ctlr_info_t *h, CommandList_struct *c) | 2671 | static int check_target_status(ctlr_info_t *h, CommandList_struct *c) |
2619 | { | 2672 | { |
2620 | switch (c->err_info->ScsiStatus) { | 2673 | switch (c->err_info->ScsiStatus) { |
@@ -3461,6 +3514,63 @@ static inline u32 process_nonindexed_cmd(ctlr_info_t *h, u32 raw_tag) | |||
3461 | return next_command(h); | 3514 | return next_command(h); |
3462 | } | 3515 | } |
3463 | 3516 | ||
3517 | /* Some controllers, like p400, will give us one interrupt | ||
3518 | * after a soft reset, even if we turned interrupts off. | ||
3519 | * Only need to check for this in the cciss_xxx_discard_completions | ||
3520 | * functions. | ||
3521 | */ | ||
3522 | static int ignore_bogus_interrupt(ctlr_info_t *h) | ||
3523 | { | ||
3524 | if (likely(!reset_devices)) | ||
3525 | return 0; | ||
3526 | |||
3527 | if (likely(h->interrupts_enabled)) | ||
3528 | return 0; | ||
3529 | |||
3530 | dev_info(&h->pdev->dev, "Received interrupt while interrupts disabled " | ||
3531 | "(known firmware bug.) Ignoring.\n"); | ||
3532 | |||
3533 | return 1; | ||
3534 | } | ||
3535 | |||
3536 | static irqreturn_t cciss_intx_discard_completions(int irq, void *dev_id) | ||
3537 | { | ||
3538 | ctlr_info_t *h = dev_id; | ||
3539 | unsigned long flags; | ||
3540 | u32 raw_tag; | ||
3541 | |||
3542 | if (ignore_bogus_interrupt(h)) | ||
3543 | return IRQ_NONE; | ||
3544 | |||
3545 | if (interrupt_not_for_us(h)) | ||
3546 | return IRQ_NONE; | ||
3547 | spin_lock_irqsave(&h->lock, flags); | ||
3548 | while (interrupt_pending(h)) { | ||
3549 | raw_tag = get_next_completion(h); | ||
3550 | while (raw_tag != FIFO_EMPTY) | ||
3551 | raw_tag = next_command(h); | ||
3552 | } | ||
3553 | spin_unlock_irqrestore(&h->lock, flags); | ||
3554 | return IRQ_HANDLED; | ||
3555 | } | ||
3556 | |||
3557 | static irqreturn_t cciss_msix_discard_completions(int irq, void *dev_id) | ||
3558 | { | ||
3559 | ctlr_info_t *h = dev_id; | ||
3560 | unsigned long flags; | ||
3561 | u32 raw_tag; | ||
3562 | |||
3563 | if (ignore_bogus_interrupt(h)) | ||
3564 | return IRQ_NONE; | ||
3565 | |||
3566 | spin_lock_irqsave(&h->lock, flags); | ||
3567 | raw_tag = get_next_completion(h); | ||
3568 | while (raw_tag != FIFO_EMPTY) | ||
3569 | raw_tag = next_command(h); | ||
3570 | spin_unlock_irqrestore(&h->lock, flags); | ||
3571 | return IRQ_HANDLED; | ||
3572 | } | ||
3573 | |||
3464 | static irqreturn_t do_cciss_intx(int irq, void *dev_id) | 3574 | static irqreturn_t do_cciss_intx(int irq, void *dev_id) |
3465 | { | 3575 | { |
3466 | ctlr_info_t *h = dev_id; | 3576 | ctlr_info_t *h = dev_id; |
@@ -4078,6 +4188,9 @@ static int __devinit cciss_find_cfgtables(ctlr_info_t *h) | |||
4078 | cfg_base_addr_index) + cfg_offset, sizeof(h->cfgtable)); | 4188 | cfg_base_addr_index) + cfg_offset, sizeof(h->cfgtable)); |
4079 | if (!h->cfgtable) | 4189 | if (!h->cfgtable) |
4080 | return -ENOMEM; | 4190 | return -ENOMEM; |
4191 | rc = write_driver_ver_to_cfgtable(h->cfgtable); | ||
4192 | if (rc) | ||
4193 | return rc; | ||
4081 | /* Find performant mode table. */ | 4194 | /* Find performant mode table. */ |
4082 | trans_offset = readl(&h->cfgtable->TransMethodOffset); | 4195 | trans_offset = readl(&h->cfgtable->TransMethodOffset); |
4083 | h->transtable = remap_pci_mem(pci_resource_start(h->pdev, | 4196 | h->transtable = remap_pci_mem(pci_resource_start(h->pdev, |
@@ -4112,7 +4225,7 @@ static void __devinit cciss_get_max_perf_mode_cmds(struct ctlr_info *h) | |||
4112 | static void __devinit cciss_find_board_params(ctlr_info_t *h) | 4225 | static void __devinit cciss_find_board_params(ctlr_info_t *h) |
4113 | { | 4226 | { |
4114 | cciss_get_max_perf_mode_cmds(h); | 4227 | cciss_get_max_perf_mode_cmds(h); |
4115 | h->nr_cmds = h->max_commands - 4; /* Allow room for some ioctls */ | 4228 | h->nr_cmds = h->max_commands - 4 - cciss_tape_cmds; |
4116 | h->maxsgentries = readl(&(h->cfgtable->MaxSGElements)); | 4229 | h->maxsgentries = readl(&(h->cfgtable->MaxSGElements)); |
4117 | /* | 4230 | /* |
4118 | * Limit in-command s/g elements to 32 save dma'able memory. | 4231 | * Limit in-command s/g elements to 32 save dma'able memory. |
@@ -4348,7 +4461,7 @@ static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, u | |||
4348 | tag = readl(vaddr + SA5_REPLY_PORT_OFFSET); | 4461 | tag = readl(vaddr + SA5_REPLY_PORT_OFFSET); |
4349 | if ((tag & ~3) == paddr32) | 4462 | if ((tag & ~3) == paddr32) |
4350 | break; | 4463 | break; |
4351 | schedule_timeout_uninterruptible(HZ); | 4464 | msleep(CCISS_POST_RESET_NOOP_TIMEOUT_MSECS); |
4352 | } | 4465 | } |
4353 | 4466 | ||
4354 | iounmap(vaddr); | 4467 | iounmap(vaddr); |
@@ -4375,11 +4488,10 @@ static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, u | |||
4375 | return 0; | 4488 | return 0; |
4376 | } | 4489 | } |
4377 | 4490 | ||
4378 | #define cciss_soft_reset_controller(p) cciss_message(p, 1, 0) | ||
4379 | #define cciss_noop(p) cciss_message(p, 3, 0) | 4491 | #define cciss_noop(p) cciss_message(p, 3, 0) |
4380 | 4492 | ||
4381 | static int cciss_controller_hard_reset(struct pci_dev *pdev, | 4493 | static int cciss_controller_hard_reset(struct pci_dev *pdev, |
4382 | void * __iomem vaddr, bool use_doorbell) | 4494 | void * __iomem vaddr, u32 use_doorbell) |
4383 | { | 4495 | { |
4384 | u16 pmcsr; | 4496 | u16 pmcsr; |
4385 | int pos; | 4497 | int pos; |
@@ -4390,8 +4502,7 @@ static int cciss_controller_hard_reset(struct pci_dev *pdev, | |||
4390 | * other way using the doorbell register. | 4502 | * other way using the doorbell register. |
4391 | */ | 4503 | */ |
4392 | dev_info(&pdev->dev, "using doorbell to reset controller\n"); | 4504 | dev_info(&pdev->dev, "using doorbell to reset controller\n"); |
4393 | writel(DOORBELL_CTLR_RESET, vaddr + SA5_DOORBELL); | 4505 | writel(use_doorbell, vaddr + SA5_DOORBELL); |
4394 | msleep(1000); | ||
4395 | } else { /* Try to do it the PCI power state way */ | 4506 | } else { /* Try to do it the PCI power state way */ |
4396 | 4507 | ||
4397 | /* Quoting from the Open CISS Specification: "The Power | 4508 | /* Quoting from the Open CISS Specification: "The Power |
@@ -4422,12 +4533,64 @@ static int cciss_controller_hard_reset(struct pci_dev *pdev, | |||
4422 | pmcsr &= ~PCI_PM_CTRL_STATE_MASK; | 4533 | pmcsr &= ~PCI_PM_CTRL_STATE_MASK; |
4423 | pmcsr |= PCI_D0; | 4534 | pmcsr |= PCI_D0; |
4424 | pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr); | 4535 | pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr); |
4425 | |||
4426 | msleep(500); | ||
4427 | } | 4536 | } |
4428 | return 0; | 4537 | return 0; |
4429 | } | 4538 | } |
4430 | 4539 | ||
4540 | static __devinit void init_driver_version(char *driver_version, int len) | ||
4541 | { | ||
4542 | memset(driver_version, 0, len); | ||
4543 | strncpy(driver_version, "cciss " DRIVER_NAME, len - 1); | ||
4544 | } | ||
4545 | |||
4546 | static __devinit int write_driver_ver_to_cfgtable( | ||
4547 | CfgTable_struct __iomem *cfgtable) | ||
4548 | { | ||
4549 | char *driver_version; | ||
4550 | int i, size = sizeof(cfgtable->driver_version); | ||
4551 | |||
4552 | driver_version = kmalloc(size, GFP_KERNEL); | ||
4553 | if (!driver_version) | ||
4554 | return -ENOMEM; | ||
4555 | |||
4556 | init_driver_version(driver_version, size); | ||
4557 | for (i = 0; i < size; i++) | ||
4558 | writeb(driver_version[i], &cfgtable->driver_version[i]); | ||
4559 | kfree(driver_version); | ||
4560 | return 0; | ||
4561 | } | ||
4562 | |||
4563 | static __devinit void read_driver_ver_from_cfgtable( | ||
4564 | CfgTable_struct __iomem *cfgtable, unsigned char *driver_ver) | ||
4565 | { | ||
4566 | int i; | ||
4567 | |||
4568 | for (i = 0; i < sizeof(cfgtable->driver_version); i++) | ||
4569 | driver_ver[i] = readb(&cfgtable->driver_version[i]); | ||
4570 | } | ||
4571 | |||
4572 | static __devinit int controller_reset_failed( | ||
4573 | CfgTable_struct __iomem *cfgtable) | ||
4574 | { | ||
4575 | |||
4576 | char *driver_ver, *old_driver_ver; | ||
4577 | int rc, size = sizeof(cfgtable->driver_version); | ||
4578 | |||
4579 | old_driver_ver = kmalloc(2 * size, GFP_KERNEL); | ||
4580 | if (!old_driver_ver) | ||
4581 | return -ENOMEM; | ||
4582 | driver_ver = old_driver_ver + size; | ||
4583 | |||
4584 | /* After a reset, the 32 bytes of "driver version" in the cfgtable | ||
4585 | * should have been changed, otherwise we know the reset failed. | ||
4586 | */ | ||
4587 | init_driver_version(old_driver_ver, size); | ||
4588 | read_driver_ver_from_cfgtable(cfgtable, driver_ver); | ||
4589 | rc = !memcmp(driver_ver, old_driver_ver, size); | ||
4590 | kfree(old_driver_ver); | ||
4591 | return rc; | ||
4592 | } | ||
4593 | |||
4431 | /* This does a hard reset of the controller using PCI power management | 4594 | /* This does a hard reset of the controller using PCI power management |
4432 | * states or using the doorbell register. */ | 4595 | * states or using the doorbell register. */ |
4433 | static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev) | 4596 | static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev) |
@@ -4437,10 +4600,10 @@ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev) | |||
4437 | u64 cfg_base_addr_index; | 4600 | u64 cfg_base_addr_index; |
4438 | void __iomem *vaddr; | 4601 | void __iomem *vaddr; |
4439 | unsigned long paddr; | 4602 | unsigned long paddr; |
4440 | u32 misc_fw_support, active_transport; | 4603 | u32 misc_fw_support; |
4441 | int rc; | 4604 | int rc; |
4442 | CfgTable_struct __iomem *cfgtable; | 4605 | CfgTable_struct __iomem *cfgtable; |
4443 | bool use_doorbell; | 4606 | u32 use_doorbell; |
4444 | u32 board_id; | 4607 | u32 board_id; |
4445 | u16 command_register; | 4608 | u16 command_register; |
4446 | 4609 | ||
@@ -4464,12 +4627,16 @@ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev) | |||
4464 | * likely not be happy. Just forbid resetting this conjoined mess. | 4627 | * likely not be happy. Just forbid resetting this conjoined mess. |
4465 | */ | 4628 | */ |
4466 | cciss_lookup_board_id(pdev, &board_id); | 4629 | cciss_lookup_board_id(pdev, &board_id); |
4467 | if (board_id == 0x409C0E11 || board_id == 0x409D0E11) { | 4630 | if (!ctlr_is_resettable(board_id)) { |
4468 | dev_warn(&pdev->dev, "Cannot reset Smart Array 640x " | 4631 | dev_warn(&pdev->dev, "Cannot reset Smart Array 640x " |
4469 | "due to shared cache module."); | 4632 | "due to shared cache module."); |
4470 | return -ENODEV; | 4633 | return -ENODEV; |
4471 | } | 4634 | } |
4472 | 4635 | ||
4636 | /* if controller is soft- but not hard resettable... */ | ||
4637 | if (!ctlr_is_hard_resettable(board_id)) | ||
4638 | return -ENOTSUPP; /* try soft reset later. */ | ||
4639 | |||
4473 | /* Save the PCI command register */ | 4640 | /* Save the PCI command register */ |
4474 | pci_read_config_word(pdev, 4, &command_register); | 4641 | pci_read_config_word(pdev, 4, &command_register); |
4475 | /* Turn the board off. This is so that later pci_restore_state() | 4642 | /* Turn the board off. This is so that later pci_restore_state() |
@@ -4497,16 +4664,28 @@ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev) | |||
4497 | rc = -ENOMEM; | 4664 | rc = -ENOMEM; |
4498 | goto unmap_vaddr; | 4665 | goto unmap_vaddr; |
4499 | } | 4666 | } |
4667 | rc = write_driver_ver_to_cfgtable(cfgtable); | ||
4668 | if (rc) | ||
4669 | goto unmap_vaddr; | ||
4500 | 4670 | ||
4501 | /* If reset via doorbell register is supported, use that. */ | 4671 | /* If reset via doorbell register is supported, use that. |
4502 | misc_fw_support = readl(&cfgtable->misc_fw_support); | 4672 | * There are two such methods. Favor the newest method. |
4503 | use_doorbell = misc_fw_support & MISC_FW_DOORBELL_RESET; | ||
4504 | |||
4505 | /* The doorbell reset seems to cause lockups on some Smart | ||
4506 | * Arrays (e.g. P410, P410i, maybe others). Until this is | ||
4507 | * fixed or at least isolated, avoid the doorbell reset. | ||
4508 | */ | 4673 | */ |
4509 | use_doorbell = 0; | 4674 | misc_fw_support = readl(&cfgtable->misc_fw_support); |
4675 | use_doorbell = misc_fw_support & MISC_FW_DOORBELL_RESET2; | ||
4676 | if (use_doorbell) { | ||
4677 | use_doorbell = DOORBELL_CTLR_RESET2; | ||
4678 | } else { | ||
4679 | use_doorbell = misc_fw_support & MISC_FW_DOORBELL_RESET; | ||
4680 | if (use_doorbell) { | ||
4681 | dev_warn(&pdev->dev, "Controller claims that " | ||
4682 | "'Bit 2 doorbell reset' is " | ||
4683 | "supported, but not 'bit 5 doorbell reset'. " | ||
4684 | "Firmware update is recommended.\n"); | ||
4685 | rc = -ENOTSUPP; /* use the soft reset */ | ||
4686 | goto unmap_cfgtable; | ||
4687 | } | ||
4688 | } | ||
4510 | 4689 | ||
4511 | rc = cciss_controller_hard_reset(pdev, vaddr, use_doorbell); | 4690 | rc = cciss_controller_hard_reset(pdev, vaddr, use_doorbell); |
4512 | if (rc) | 4691 | if (rc) |
@@ -4524,30 +4703,31 @@ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev) | |||
4524 | msleep(CCISS_POST_RESET_PAUSE_MSECS); | 4703 | msleep(CCISS_POST_RESET_PAUSE_MSECS); |
4525 | 4704 | ||
4526 | /* Wait for board to become not ready, then ready. */ | 4705 | /* Wait for board to become not ready, then ready. */ |
4527 | dev_info(&pdev->dev, "Waiting for board to become ready.\n"); | 4706 | dev_info(&pdev->dev, "Waiting for board to reset.\n"); |
4528 | rc = cciss_wait_for_board_state(pdev, vaddr, BOARD_NOT_READY); | 4707 | rc = cciss_wait_for_board_state(pdev, vaddr, BOARD_NOT_READY); |
4529 | if (rc) /* Don't bail, might be E500, etc. which can't be reset */ | 4708 | if (rc) { |
4530 | dev_warn(&pdev->dev, | 4709 | dev_warn(&pdev->dev, "Failed waiting for board to hard reset." |
4531 | "failed waiting for board to become not ready\n"); | 4710 | " Will try soft reset.\n"); |
4711 | rc = -ENOTSUPP; /* Not expected, but try soft reset later */ | ||
4712 | goto unmap_cfgtable; | ||
4713 | } | ||
4532 | rc = cciss_wait_for_board_state(pdev, vaddr, BOARD_READY); | 4714 | rc = cciss_wait_for_board_state(pdev, vaddr, BOARD_READY); |
4533 | if (rc) { | 4715 | if (rc) { |
4534 | dev_warn(&pdev->dev, | 4716 | dev_warn(&pdev->dev, |
4535 | "failed waiting for board to become ready\n"); | 4717 | "failed waiting for board to become ready " |
4718 | "after hard reset\n"); | ||
4536 | goto unmap_cfgtable; | 4719 | goto unmap_cfgtable; |
4537 | } | 4720 | } |
4538 | dev_info(&pdev->dev, "board ready.\n"); | ||
4539 | 4721 | ||
4540 | /* Controller should be in simple mode at this point. If it's not, | 4722 | rc = controller_reset_failed(vaddr); |
4541 | * It means we're on one of those controllers which doesn't support | 4723 | if (rc < 0) |
4542 | * the doorbell reset method and on which the PCI power management reset | 4724 | goto unmap_cfgtable; |
4543 | * method doesn't work (P800, for example.) | 4725 | if (rc) { |
4544 | * In those cases, don't try to proceed, as it generally doesn't work. | 4726 | dev_warn(&pdev->dev, "Unable to successfully hard reset " |
4545 | */ | 4727 | "controller. Will try soft reset.\n"); |
4546 | active_transport = readl(&cfgtable->TransportActive); | 4728 | rc = -ENOTSUPP; /* Not expected, but try soft reset later */ |
4547 | if (active_transport & PERFORMANT_MODE) { | 4729 | } else { |
4548 | dev_warn(&pdev->dev, "Unable to successfully reset controller," | 4730 | dev_info(&pdev->dev, "Board ready after hard reset.\n"); |
4549 | " Ignoring controller.\n"); | ||
4550 | rc = -ENODEV; | ||
4551 | } | 4731 | } |
4552 | 4732 | ||
4553 | unmap_cfgtable: | 4733 | unmap_cfgtable: |
@@ -4574,11 +4754,12 @@ static __devinit int cciss_init_reset_devices(struct pci_dev *pdev) | |||
4574 | * due to concerns about shared bbwc between 6402/6404 pair. | 4754 | * due to concerns about shared bbwc between 6402/6404 pair. |
4575 | */ | 4755 | */ |
4576 | if (rc == -ENOTSUPP) | 4756 | if (rc == -ENOTSUPP) |
4577 | return 0; /* just try to do the kdump anyhow. */ | 4757 | return rc; /* just try to do the kdump anyhow. */ |
4578 | if (rc) | 4758 | if (rc) |
4579 | return -ENODEV; | 4759 | return -ENODEV; |
4580 | 4760 | ||
4581 | /* Now try to get the controller to respond to a no-op */ | 4761 | /* Now try to get the controller to respond to a no-op */ |
4762 | dev_warn(&pdev->dev, "Waiting for controller to respond to no-op\n"); | ||
4582 | for (i = 0; i < CCISS_POST_RESET_NOOP_RETRIES; i++) { | 4763 | for (i = 0; i < CCISS_POST_RESET_NOOP_RETRIES; i++) { |
4583 | if (cciss_noop(pdev) == 0) | 4764 | if (cciss_noop(pdev) == 0) |
4584 | break; | 4765 | break; |
@@ -4591,6 +4772,148 @@ static __devinit int cciss_init_reset_devices(struct pci_dev *pdev) | |||
4591 | return 0; | 4772 | return 0; |
4592 | } | 4773 | } |
4593 | 4774 | ||
4775 | static __devinit int cciss_allocate_cmd_pool(ctlr_info_t *h) | ||
4776 | { | ||
4777 | h->cmd_pool_bits = kmalloc( | ||
4778 | DIV_ROUND_UP(h->nr_cmds, BITS_PER_LONG) * | ||
4779 | sizeof(unsigned long), GFP_KERNEL); | ||
4780 | h->cmd_pool = pci_alloc_consistent(h->pdev, | ||
4781 | h->nr_cmds * sizeof(CommandList_struct), | ||
4782 | &(h->cmd_pool_dhandle)); | ||
4783 | h->errinfo_pool = pci_alloc_consistent(h->pdev, | ||
4784 | h->nr_cmds * sizeof(ErrorInfo_struct), | ||
4785 | &(h->errinfo_pool_dhandle)); | ||
4786 | if ((h->cmd_pool_bits == NULL) | ||
4787 | || (h->cmd_pool == NULL) | ||
4788 | || (h->errinfo_pool == NULL)) { | ||
4789 | dev_err(&h->pdev->dev, "out of memory"); | ||
4790 | return -ENOMEM; | ||
4791 | } | ||
4792 | return 0; | ||
4793 | } | ||
4794 | |||
4795 | static __devinit int cciss_allocate_scatterlists(ctlr_info_t *h) | ||
4796 | { | ||
4797 | int i; | ||
4798 | |||
4799 | /* zero it, so that on free we need not know how many were alloc'ed */ | ||
4800 | h->scatter_list = kzalloc(h->max_commands * | ||
4801 | sizeof(struct scatterlist *), GFP_KERNEL); | ||
4802 | if (!h->scatter_list) | ||
4803 | return -ENOMEM; | ||
4804 | |||
4805 | for (i = 0; i < h->nr_cmds; i++) { | ||
4806 | h->scatter_list[i] = kmalloc(sizeof(struct scatterlist) * | ||
4807 | h->maxsgentries, GFP_KERNEL); | ||
4808 | if (h->scatter_list[i] == NULL) { | ||
4809 | dev_err(&h->pdev->dev, "could not allocate " | ||
4810 | "s/g lists\n"); | ||
4811 | return -ENOMEM; | ||
4812 | } | ||
4813 | } | ||
4814 | return 0; | ||
4815 | } | ||
4816 | |||
4817 | static void cciss_free_scatterlists(ctlr_info_t *h) | ||
4818 | { | ||
4819 | int i; | ||
4820 | |||
4821 | if (h->scatter_list) { | ||
4822 | for (i = 0; i < h->nr_cmds; i++) | ||
4823 | kfree(h->scatter_list[i]); | ||
4824 | kfree(h->scatter_list); | ||
4825 | } | ||
4826 | } | ||
4827 | |||
4828 | static void cciss_free_cmd_pool(ctlr_info_t *h) | ||
4829 | { | ||
4830 | kfree(h->cmd_pool_bits); | ||
4831 | if (h->cmd_pool) | ||
4832 | pci_free_consistent(h->pdev, | ||
4833 | h->nr_cmds * sizeof(CommandList_struct), | ||
4834 | h->cmd_pool, h->cmd_pool_dhandle); | ||
4835 | if (h->errinfo_pool) | ||
4836 | pci_free_consistent(h->pdev, | ||
4837 | h->nr_cmds * sizeof(ErrorInfo_struct), | ||
4838 | h->errinfo_pool, h->errinfo_pool_dhandle); | ||
4839 | } | ||
4840 | |||
4841 | static int cciss_request_irq(ctlr_info_t *h, | ||
4842 | irqreturn_t (*msixhandler)(int, void *), | ||
4843 | irqreturn_t (*intxhandler)(int, void *)) | ||
4844 | { | ||
4845 | if (h->msix_vector || h->msi_vector) { | ||
4846 | if (!request_irq(h->intr[PERF_MODE_INT], msixhandler, | ||
4847 | IRQF_DISABLED, h->devname, h)) | ||
4848 | return 0; | ||
4849 | dev_err(&h->pdev->dev, "Unable to get msi irq %d" | ||
4850 | " for %s\n", h->intr[PERF_MODE_INT], | ||
4851 | h->devname); | ||
4852 | return -1; | ||
4853 | } | ||
4854 | |||
4855 | if (!request_irq(h->intr[PERF_MODE_INT], intxhandler, | ||
4856 | IRQF_DISABLED, h->devname, h)) | ||
4857 | return 0; | ||
4858 | dev_err(&h->pdev->dev, "Unable to get irq %d for %s\n", | ||
4859 | h->intr[PERF_MODE_INT], h->devname); | ||
4860 | return -1; | ||
4861 | } | ||
4862 | |||
4863 | static int __devinit cciss_kdump_soft_reset(ctlr_info_t *h) | ||
4864 | { | ||
4865 | if (cciss_send_reset(h, CTLR_LUNID, CCISS_RESET_TYPE_CONTROLLER)) { | ||
4866 | dev_warn(&h->pdev->dev, "Resetting array controller failed.\n"); | ||
4867 | return -EIO; | ||
4868 | } | ||
4869 | |||
4870 | dev_info(&h->pdev->dev, "Waiting for board to soft reset.\n"); | ||
4871 | if (cciss_wait_for_board_state(h->pdev, h->vaddr, BOARD_NOT_READY)) { | ||
4872 | dev_warn(&h->pdev->dev, "Soft reset had no effect.\n"); | ||
4873 | return -1; | ||
4874 | } | ||
4875 | |||
4876 | dev_info(&h->pdev->dev, "Board reset, awaiting READY status.\n"); | ||
4877 | if (cciss_wait_for_board_state(h->pdev, h->vaddr, BOARD_READY)) { | ||
4878 | dev_warn(&h->pdev->dev, "Board failed to become ready " | ||
4879 | "after soft reset.\n"); | ||
4880 | return -1; | ||
4881 | } | ||
4882 | |||
4883 | return 0; | ||
4884 | } | ||
4885 | |||
4886 | static void cciss_undo_allocations_after_kdump_soft_reset(ctlr_info_t *h) | ||
4887 | { | ||
4888 | int ctlr = h->ctlr; | ||
4889 | |||
4890 | free_irq(h->intr[PERF_MODE_INT], h); | ||
4891 | #ifdef CONFIG_PCI_MSI | ||
4892 | if (h->msix_vector) | ||
4893 | pci_disable_msix(h->pdev); | ||
4894 | else if (h->msi_vector) | ||
4895 | pci_disable_msi(h->pdev); | ||
4896 | #endif /* CONFIG_PCI_MSI */ | ||
4897 | cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds); | ||
4898 | cciss_free_scatterlists(h); | ||
4899 | cciss_free_cmd_pool(h); | ||
4900 | kfree(h->blockFetchTable); | ||
4901 | if (h->reply_pool) | ||
4902 | pci_free_consistent(h->pdev, h->max_commands * sizeof(__u64), | ||
4903 | h->reply_pool, h->reply_pool_dhandle); | ||
4904 | if (h->transtable) | ||
4905 | iounmap(h->transtable); | ||
4906 | if (h->cfgtable) | ||
4907 | iounmap(h->cfgtable); | ||
4908 | if (h->vaddr) | ||
4909 | iounmap(h->vaddr); | ||
4910 | unregister_blkdev(h->major, h->devname); | ||
4911 | cciss_destroy_hba_sysfs_entry(h); | ||
4912 | pci_release_regions(h->pdev); | ||
4913 | kfree(h); | ||
4914 | hba[ctlr] = NULL; | ||
4915 | } | ||
4916 | |||
4594 | /* | 4917 | /* |
4595 | * This is it. Find all the controllers and register them. I really hate | 4918 | * This is it. Find all the controllers and register them. I really hate |
4596 | * stealing all these major device numbers. | 4919 | * stealing all these major device numbers. |
@@ -4601,15 +4924,28 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, | |||
4601 | { | 4924 | { |
4602 | int i; | 4925 | int i; |
4603 | int j = 0; | 4926 | int j = 0; |
4604 | int k = 0; | ||
4605 | int rc; | 4927 | int rc; |
4928 | int try_soft_reset = 0; | ||
4606 | int dac, return_code; | 4929 | int dac, return_code; |
4607 | InquiryData_struct *inq_buff; | 4930 | InquiryData_struct *inq_buff; |
4608 | ctlr_info_t *h; | 4931 | ctlr_info_t *h; |
4932 | unsigned long flags; | ||
4609 | 4933 | ||
4610 | rc = cciss_init_reset_devices(pdev); | 4934 | rc = cciss_init_reset_devices(pdev); |
4611 | if (rc) | 4935 | if (rc) { |
4612 | return rc; | 4936 | if (rc != -ENOTSUPP) |
4937 | return rc; | ||
4938 | /* If the reset fails in a particular way (it has no way to do | ||
4939 | * a proper hard reset, so returns -ENOTSUPP) we can try to do | ||
4940 | * a soft reset once we get the controller configured up to the | ||
4941 | * point that it can accept a command. | ||
4942 | */ | ||
4943 | try_soft_reset = 1; | ||
4944 | rc = 0; | ||
4945 | } | ||
4946 | |||
4947 | reinit_after_soft_reset: | ||
4948 | |||
4613 | i = alloc_cciss_hba(pdev); | 4949 | i = alloc_cciss_hba(pdev); |
4614 | if (i < 0) | 4950 | if (i < 0) |
4615 | return -1; | 4951 | return -1; |
@@ -4627,6 +4963,11 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, | |||
4627 | sprintf(h->devname, "cciss%d", i); | 4963 | sprintf(h->devname, "cciss%d", i); |
4628 | h->ctlr = i; | 4964 | h->ctlr = i; |
4629 | 4965 | ||
4966 | if (cciss_tape_cmds < 2) | ||
4967 | cciss_tape_cmds = 2; | ||
4968 | if (cciss_tape_cmds > 16) | ||
4969 | cciss_tape_cmds = 16; | ||
4970 | |||
4630 | init_completion(&h->scan_wait); | 4971 | init_completion(&h->scan_wait); |
4631 | 4972 | ||
4632 | if (cciss_create_hba_sysfs_entry(h)) | 4973 | if (cciss_create_hba_sysfs_entry(h)) |
@@ -4662,62 +5003,20 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, | |||
4662 | 5003 | ||
4663 | /* make sure the board interrupts are off */ | 5004 | /* make sure the board interrupts are off */ |
4664 | h->access.set_intr_mask(h, CCISS_INTR_OFF); | 5005 | h->access.set_intr_mask(h, CCISS_INTR_OFF); |
4665 | if (h->msi_vector || h->msix_vector) { | 5006 | rc = cciss_request_irq(h, do_cciss_msix_intr, do_cciss_intx); |
4666 | if (request_irq(h->intr[PERF_MODE_INT], | 5007 | if (rc) |
4667 | do_cciss_msix_intr, | 5008 | goto clean2; |
4668 | IRQF_DISABLED, h->devname, h)) { | ||
4669 | dev_err(&h->pdev->dev, "Unable to get irq %d for %s\n", | ||
4670 | h->intr[PERF_MODE_INT], h->devname); | ||
4671 | goto clean2; | ||
4672 | } | ||
4673 | } else { | ||
4674 | if (request_irq(h->intr[PERF_MODE_INT], do_cciss_intx, | ||
4675 | IRQF_DISABLED, h->devname, h)) { | ||
4676 | dev_err(&h->pdev->dev, "Unable to get irq %d for %s\n", | ||
4677 | h->intr[PERF_MODE_INT], h->devname); | ||
4678 | goto clean2; | ||
4679 | } | ||
4680 | } | ||
4681 | 5009 | ||
4682 | dev_info(&h->pdev->dev, "%s: <0x%x> at PCI %s IRQ %d%s using DAC\n", | 5010 | dev_info(&h->pdev->dev, "%s: <0x%x> at PCI %s IRQ %d%s using DAC\n", |
4683 | h->devname, pdev->device, pci_name(pdev), | 5011 | h->devname, pdev->device, pci_name(pdev), |
4684 | h->intr[PERF_MODE_INT], dac ? "" : " not"); | 5012 | h->intr[PERF_MODE_INT], dac ? "" : " not"); |
4685 | 5013 | ||
4686 | h->cmd_pool_bits = | 5014 | if (cciss_allocate_cmd_pool(h)) |
4687 | kmalloc(DIV_ROUND_UP(h->nr_cmds, BITS_PER_LONG) | ||
4688 | * sizeof(unsigned long), GFP_KERNEL); | ||
4689 | h->cmd_pool = (CommandList_struct *) | ||
4690 | pci_alloc_consistent(h->pdev, | ||
4691 | h->nr_cmds * sizeof(CommandList_struct), | ||
4692 | &(h->cmd_pool_dhandle)); | ||
4693 | h->errinfo_pool = (ErrorInfo_struct *) | ||
4694 | pci_alloc_consistent(h->pdev, | ||
4695 | h->nr_cmds * sizeof(ErrorInfo_struct), | ||
4696 | &(h->errinfo_pool_dhandle)); | ||
4697 | if ((h->cmd_pool_bits == NULL) | ||
4698 | || (h->cmd_pool == NULL) | ||
4699 | || (h->errinfo_pool == NULL)) { | ||
4700 | dev_err(&h->pdev->dev, "out of memory"); | ||
4701 | goto clean4; | 5015 | goto clean4; |
4702 | } | ||
4703 | 5016 | ||
4704 | /* Need space for temp scatter list */ | 5017 | if (cciss_allocate_scatterlists(h)) |
4705 | h->scatter_list = kmalloc(h->max_commands * | ||
4706 | sizeof(struct scatterlist *), | ||
4707 | GFP_KERNEL); | ||
4708 | if (!h->scatter_list) | ||
4709 | goto clean4; | 5018 | goto clean4; |
4710 | 5019 | ||
4711 | for (k = 0; k < h->nr_cmds; k++) { | ||
4712 | h->scatter_list[k] = kmalloc(sizeof(struct scatterlist) * | ||
4713 | h->maxsgentries, | ||
4714 | GFP_KERNEL); | ||
4715 | if (h->scatter_list[k] == NULL) { | ||
4716 | dev_err(&h->pdev->dev, | ||
4717 | "could not allocate s/g lists\n"); | ||
4718 | goto clean4; | ||
4719 | } | ||
4720 | } | ||
4721 | h->cmd_sg_list = cciss_allocate_sg_chain_blocks(h, | 5020 | h->cmd_sg_list = cciss_allocate_sg_chain_blocks(h, |
4722 | h->chainsize, h->nr_cmds); | 5021 | h->chainsize, h->nr_cmds); |
4723 | if (!h->cmd_sg_list && h->chainsize > 0) | 5022 | if (!h->cmd_sg_list && h->chainsize > 0) |
@@ -4741,6 +5040,62 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, | |||
4741 | h->gendisk[j] = NULL; | 5040 | h->gendisk[j] = NULL; |
4742 | } | 5041 | } |
4743 | 5042 | ||
5043 | /* At this point, the controller is ready to take commands. | ||
5044 | * Now, if reset_devices and the hard reset didn't work, try | ||
5045 | * the soft reset and see if that works. | ||
5046 | */ | ||
5047 | if (try_soft_reset) { | ||
5048 | |||
5049 | /* This is kind of gross. We may or may not get a completion | ||
5050 | * from the soft reset command, and if we do, then the value | ||
5051 | * from the fifo may or may not be valid. So, we wait 10 secs | ||
5052 | * after the reset throwing away any completions we get during | ||
5053 | * that time. Unregister the interrupt handler and register | ||
5054 | * fake ones to scoop up any residual completions. | ||
5055 | */ | ||
5056 | spin_lock_irqsave(&h->lock, flags); | ||
5057 | h->access.set_intr_mask(h, CCISS_INTR_OFF); | ||
5058 | spin_unlock_irqrestore(&h->lock, flags); | ||
5059 | free_irq(h->intr[PERF_MODE_INT], h); | ||
5060 | rc = cciss_request_irq(h, cciss_msix_discard_completions, | ||
5061 | cciss_intx_discard_completions); | ||
5062 | if (rc) { | ||
5063 | dev_warn(&h->pdev->dev, "Failed to request_irq after " | ||
5064 | "soft reset.\n"); | ||
5065 | goto clean4; | ||
5066 | } | ||
5067 | |||
5068 | rc = cciss_kdump_soft_reset(h); | ||
5069 | if (rc) { | ||
5070 | dev_warn(&h->pdev->dev, "Soft reset failed.\n"); | ||
5071 | goto clean4; | ||
5072 | } | ||
5073 | |||
5074 | dev_info(&h->pdev->dev, "Board READY.\n"); | ||
5075 | dev_info(&h->pdev->dev, | ||
5076 | "Waiting for stale completions to drain.\n"); | ||
5077 | h->access.set_intr_mask(h, CCISS_INTR_ON); | ||
5078 | msleep(10000); | ||
5079 | h->access.set_intr_mask(h, CCISS_INTR_OFF); | ||
5080 | |||
5081 | rc = controller_reset_failed(h->cfgtable); | ||
5082 | if (rc) | ||
5083 | dev_info(&h->pdev->dev, | ||
5084 | "Soft reset appears to have failed.\n"); | ||
5085 | |||
5086 | /* since the controller's reset, we have to go back and re-init | ||
5087 | * everything. Easiest to just forget what we've done and do it | ||
5088 | * all over again. | ||
5089 | */ | ||
5090 | cciss_undo_allocations_after_kdump_soft_reset(h); | ||
5091 | try_soft_reset = 0; | ||
5092 | if (rc) | ||
5093 | /* don't go to clean4, we already unallocated */ | ||
5094 | return -ENODEV; | ||
5095 | |||
5096 | goto reinit_after_soft_reset; | ||
5097 | } | ||
5098 | |||
4744 | cciss_scsi_setup(h); | 5099 | cciss_scsi_setup(h); |
4745 | 5100 | ||
4746 | /* Turn the interrupts on so we can service requests */ | 5101 | /* Turn the interrupts on so we can service requests */ |
@@ -4775,21 +5130,9 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, | |||
4775 | return 1; | 5130 | return 1; |
4776 | 5131 | ||
4777 | clean4: | 5132 | clean4: |
4778 | kfree(h->cmd_pool_bits); | 5133 | cciss_free_cmd_pool(h); |
4779 | /* Free up sg elements */ | 5134 | cciss_free_scatterlists(h); |
4780 | for (k-- ; k >= 0; k--) | ||
4781 | kfree(h->scatter_list[k]); | ||
4782 | kfree(h->scatter_list); | ||
4783 | cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds); | 5135 | cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds); |
4784 | if (h->cmd_pool) | ||
4785 | pci_free_consistent(h->pdev, | ||
4786 | h->nr_cmds * sizeof(CommandList_struct), | ||
4787 | h->cmd_pool, h->cmd_pool_dhandle); | ||
4788 | if (h->errinfo_pool) | ||
4789 | pci_free_consistent(h->pdev, | ||
4790 | h->nr_cmds * sizeof(ErrorInfo_struct), | ||
4791 | h->errinfo_pool, | ||
4792 | h->errinfo_pool_dhandle); | ||
4793 | free_irq(h->intr[PERF_MODE_INT], h); | 5136 | free_irq(h->intr[PERF_MODE_INT], h); |
4794 | clean2: | 5137 | clean2: |
4795 | unregister_blkdev(h->major, h->devname); | 5138 | unregister_blkdev(h->major, h->devname); |
@@ -4887,16 +5230,16 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev) | |||
4887 | iounmap(h->cfgtable); | 5230 | iounmap(h->cfgtable); |
4888 | iounmap(h->vaddr); | 5231 | iounmap(h->vaddr); |
4889 | 5232 | ||
4890 | pci_free_consistent(h->pdev, h->nr_cmds * sizeof(CommandList_struct), | 5233 | cciss_free_cmd_pool(h); |
4891 | h->cmd_pool, h->cmd_pool_dhandle); | ||
4892 | pci_free_consistent(h->pdev, h->nr_cmds * sizeof(ErrorInfo_struct), | ||
4893 | h->errinfo_pool, h->errinfo_pool_dhandle); | ||
4894 | kfree(h->cmd_pool_bits); | ||
4895 | /* Free up sg elements */ | 5234 | /* Free up sg elements */ |
4896 | for (j = 0; j < h->nr_cmds; j++) | 5235 | for (j = 0; j < h->nr_cmds; j++) |
4897 | kfree(h->scatter_list[j]); | 5236 | kfree(h->scatter_list[j]); |
4898 | kfree(h->scatter_list); | 5237 | kfree(h->scatter_list); |
4899 | cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds); | 5238 | cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds); |
5239 | kfree(h->blockFetchTable); | ||
5240 | if (h->reply_pool) | ||
5241 | pci_free_consistent(h->pdev, h->max_commands * sizeof(__u64), | ||
5242 | h->reply_pool, h->reply_pool_dhandle); | ||
4900 | /* | 5243 | /* |
4901 | * Deliberately omit pci_disable_device(): it does something nasty to | 5244 | * Deliberately omit pci_disable_device(): it does something nasty to |
4902 | * Smart Array controllers that pci_enable_device does not undo | 5245 | * Smart Array controllers that pci_enable_device does not undo |
diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h index 554bbd907d14..16b4d58d84dd 100644 --- a/drivers/block/cciss.h +++ b/drivers/block/cciss.h | |||
@@ -200,7 +200,7 @@ struct ctlr_info | |||
200 | * the above. | 200 | * the above. |
201 | */ | 201 | */ |
202 | #define CCISS_BOARD_READY_WAIT_SECS (120) | 202 | #define CCISS_BOARD_READY_WAIT_SECS (120) |
203 | #define CCISS_BOARD_NOT_READY_WAIT_SECS (10) | 203 | #define CCISS_BOARD_NOT_READY_WAIT_SECS (100) |
204 | #define CCISS_BOARD_READY_POLL_INTERVAL_MSECS (100) | 204 | #define CCISS_BOARD_READY_POLL_INTERVAL_MSECS (100) |
205 | #define CCISS_BOARD_READY_ITERATIONS \ | 205 | #define CCISS_BOARD_READY_ITERATIONS \ |
206 | ((CCISS_BOARD_READY_WAIT_SECS * 1000) / \ | 206 | ((CCISS_BOARD_READY_WAIT_SECS * 1000) / \ |
@@ -209,8 +209,9 @@ struct ctlr_info | |||
209 | ((CCISS_BOARD_NOT_READY_WAIT_SECS * 1000) / \ | 209 | ((CCISS_BOARD_NOT_READY_WAIT_SECS * 1000) / \ |
210 | CCISS_BOARD_READY_POLL_INTERVAL_MSECS) | 210 | CCISS_BOARD_READY_POLL_INTERVAL_MSECS) |
211 | #define CCISS_POST_RESET_PAUSE_MSECS (3000) | 211 | #define CCISS_POST_RESET_PAUSE_MSECS (3000) |
212 | #define CCISS_POST_RESET_NOOP_INTERVAL_MSECS (1000) | 212 | #define CCISS_POST_RESET_NOOP_INTERVAL_MSECS (4000) |
213 | #define CCISS_POST_RESET_NOOP_RETRIES (12) | 213 | #define CCISS_POST_RESET_NOOP_RETRIES (12) |
214 | #define CCISS_POST_RESET_NOOP_TIMEOUT_MSECS (10000) | ||
214 | 215 | ||
215 | /* | 216 | /* |
216 | Send the command to the hardware | 217 | Send the command to the hardware |
@@ -239,11 +240,13 @@ static void SA5_intr_mask(ctlr_info_t *h, unsigned long val) | |||
239 | { /* Turn interrupts on */ | 240 | { /* Turn interrupts on */ |
240 | h->interrupts_enabled = 1; | 241 | h->interrupts_enabled = 1; |
241 | writel(0, h->vaddr + SA5_REPLY_INTR_MASK_OFFSET); | 242 | writel(0, h->vaddr + SA5_REPLY_INTR_MASK_OFFSET); |
243 | (void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET); | ||
242 | } else /* Turn them off */ | 244 | } else /* Turn them off */ |
243 | { | 245 | { |
244 | h->interrupts_enabled = 0; | 246 | h->interrupts_enabled = 0; |
245 | writel( SA5_INTR_OFF, | 247 | writel( SA5_INTR_OFF, |
246 | h->vaddr + SA5_REPLY_INTR_MASK_OFFSET); | 248 | h->vaddr + SA5_REPLY_INTR_MASK_OFFSET); |
249 | (void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET); | ||
247 | } | 250 | } |
248 | } | 251 | } |
249 | /* | 252 | /* |
@@ -257,11 +260,13 @@ static void SA5B_intr_mask(ctlr_info_t *h, unsigned long val) | |||
257 | { /* Turn interrupts on */ | 260 | { /* Turn interrupts on */ |
258 | h->interrupts_enabled = 1; | 261 | h->interrupts_enabled = 1; |
259 | writel(0, h->vaddr + SA5_REPLY_INTR_MASK_OFFSET); | 262 | writel(0, h->vaddr + SA5_REPLY_INTR_MASK_OFFSET); |
263 | (void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET); | ||
260 | } else /* Turn them off */ | 264 | } else /* Turn them off */ |
261 | { | 265 | { |
262 | h->interrupts_enabled = 0; | 266 | h->interrupts_enabled = 0; |
263 | writel( SA5B_INTR_OFF, | 267 | writel( SA5B_INTR_OFF, |
264 | h->vaddr + SA5_REPLY_INTR_MASK_OFFSET); | 268 | h->vaddr + SA5_REPLY_INTR_MASK_OFFSET); |
269 | (void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET); | ||
265 | } | 270 | } |
266 | } | 271 | } |
267 | 272 | ||
@@ -271,10 +276,12 @@ static void SA5_performant_intr_mask(ctlr_info_t *h, unsigned long val) | |||
271 | if (val) { /* turn on interrupts */ | 276 | if (val) { /* turn on interrupts */ |
272 | h->interrupts_enabled = 1; | 277 | h->interrupts_enabled = 1; |
273 | writel(0, h->vaddr + SA5_REPLY_INTR_MASK_OFFSET); | 278 | writel(0, h->vaddr + SA5_REPLY_INTR_MASK_OFFSET); |
279 | (void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET); | ||
274 | } else { | 280 | } else { |
275 | h->interrupts_enabled = 0; | 281 | h->interrupts_enabled = 0; |
276 | writel(SA5_PERF_INTR_OFF, | 282 | writel(SA5_PERF_INTR_OFF, |
277 | h->vaddr + SA5_REPLY_INTR_MASK_OFFSET); | 283 | h->vaddr + SA5_REPLY_INTR_MASK_OFFSET); |
284 | (void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET); | ||
278 | } | 285 | } |
279 | } | 286 | } |
280 | 287 | ||
diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h index cd441bef031f..d9be6b4d49a6 100644 --- a/drivers/block/cciss_cmd.h +++ b/drivers/block/cciss_cmd.h | |||
@@ -53,6 +53,7 @@ | |||
53 | #define CFGTBL_ChangeReq 0x00000001l | 53 | #define CFGTBL_ChangeReq 0x00000001l |
54 | #define CFGTBL_AccCmds 0x00000001l | 54 | #define CFGTBL_AccCmds 0x00000001l |
55 | #define DOORBELL_CTLR_RESET 0x00000004l | 55 | #define DOORBELL_CTLR_RESET 0x00000004l |
56 | #define DOORBELL_CTLR_RESET2 0x00000020l | ||
56 | 57 | ||
57 | #define CFGTBL_Trans_Simple 0x00000002l | 58 | #define CFGTBL_Trans_Simple 0x00000002l |
58 | #define CFGTBL_Trans_Performant 0x00000004l | 59 | #define CFGTBL_Trans_Performant 0x00000004l |
@@ -142,6 +143,14 @@ typedef struct _ReadCapdata_struct_16 | |||
142 | #define BMIC_CACHE_FLUSH 0xc2 | 143 | #define BMIC_CACHE_FLUSH 0xc2 |
143 | #define CCISS_CACHE_FLUSH 0x01 /* C2 was already being used by CCISS */ | 144 | #define CCISS_CACHE_FLUSH 0x01 /* C2 was already being used by CCISS */ |
144 | 145 | ||
146 | #define CCISS_ABORT_MSG 0x00 | ||
147 | #define CCISS_RESET_MSG 0x01 | ||
148 | #define CCISS_RESET_TYPE_CONTROLLER 0x00 | ||
149 | #define CCISS_RESET_TYPE_BUS 0x01 | ||
150 | #define CCISS_RESET_TYPE_TARGET 0x03 | ||
151 | #define CCISS_RESET_TYPE_LUN 0x04 | ||
152 | #define CCISS_NOOP_MSG 0x03 | ||
153 | |||
145 | /* Command List Structure */ | 154 | /* Command List Structure */ |
146 | #define CTLR_LUNID "\0\0\0\0\0\0\0\0" | 155 | #define CTLR_LUNID "\0\0\0\0\0\0\0\0" |
147 | 156 | ||
@@ -235,6 +244,8 @@ typedef struct _CfgTable_struct { | |||
235 | u8 reserved[0x78 - 0x58]; | 244 | u8 reserved[0x78 - 0x58]; |
236 | u32 misc_fw_support; /* offset 0x78 */ | 245 | u32 misc_fw_support; /* offset 0x78 */ |
237 | #define MISC_FW_DOORBELL_RESET (0x02) | 246 | #define MISC_FW_DOORBELL_RESET (0x02) |
247 | #define MISC_FW_DOORBELL_RESET2 (0x10) | ||
248 | u8 driver_version[32]; | ||
238 | } CfgTable_struct; | 249 | } CfgTable_struct; |
239 | 250 | ||
240 | struct TransTable_struct { | 251 | struct TransTable_struct { |
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c index df793803f5ae..696100241a6f 100644 --- a/drivers/block/cciss_scsi.c +++ b/drivers/block/cciss_scsi.c | |||
@@ -84,7 +84,6 @@ static struct scsi_host_template cciss_driver_template = { | |||
84 | .proc_name = "cciss", | 84 | .proc_name = "cciss", |
85 | .proc_info = cciss_scsi_proc_info, | 85 | .proc_info = cciss_scsi_proc_info, |
86 | .queuecommand = cciss_scsi_queue_command, | 86 | .queuecommand = cciss_scsi_queue_command, |
87 | .can_queue = SCSI_CCISS_CAN_QUEUE, | ||
88 | .this_id = 7, | 87 | .this_id = 7, |
89 | .cmd_per_lun = 1, | 88 | .cmd_per_lun = 1, |
90 | .use_clustering = DISABLE_CLUSTERING, | 89 | .use_clustering = DISABLE_CLUSTERING, |
@@ -108,16 +107,13 @@ struct cciss_scsi_cmd_stack_elem_t { | |||
108 | 107 | ||
109 | #pragma pack() | 108 | #pragma pack() |
110 | 109 | ||
111 | #define CMD_STACK_SIZE (SCSI_CCISS_CAN_QUEUE * \ | ||
112 | CCISS_MAX_SCSI_DEVS_PER_HBA + 2) | ||
113 | // plus two for init time usage | ||
114 | |||
115 | #pragma pack(1) | 110 | #pragma pack(1) |
116 | struct cciss_scsi_cmd_stack_t { | 111 | struct cciss_scsi_cmd_stack_t { |
117 | struct cciss_scsi_cmd_stack_elem_t *pool; | 112 | struct cciss_scsi_cmd_stack_elem_t *pool; |
118 | struct cciss_scsi_cmd_stack_elem_t *elem[CMD_STACK_SIZE]; | 113 | struct cciss_scsi_cmd_stack_elem_t **elem; |
119 | dma_addr_t cmd_pool_handle; | 114 | dma_addr_t cmd_pool_handle; |
120 | int top; | 115 | int top; |
116 | int nelems; | ||
121 | }; | 117 | }; |
122 | #pragma pack() | 118 | #pragma pack() |
123 | 119 | ||
@@ -191,7 +187,7 @@ scsi_cmd_free(ctlr_info_t *h, CommandList_struct *c) | |||
191 | sa = h->scsi_ctlr; | 187 | sa = h->scsi_ctlr; |
192 | stk = &sa->cmd_stack; | 188 | stk = &sa->cmd_stack; |
193 | stk->top++; | 189 | stk->top++; |
194 | if (stk->top >= CMD_STACK_SIZE) { | 190 | if (stk->top >= stk->nelems) { |
195 | dev_err(&h->pdev->dev, | 191 | dev_err(&h->pdev->dev, |
196 | "scsi_cmd_free called too many times.\n"); | 192 | "scsi_cmd_free called too many times.\n"); |
197 | BUG(); | 193 | BUG(); |
@@ -206,13 +202,14 @@ scsi_cmd_stack_setup(ctlr_info_t *h, struct cciss_scsi_adapter_data_t *sa) | |||
206 | struct cciss_scsi_cmd_stack_t *stk; | 202 | struct cciss_scsi_cmd_stack_t *stk; |
207 | size_t size; | 203 | size_t size; |
208 | 204 | ||
205 | stk = &sa->cmd_stack; | ||
206 | stk->nelems = cciss_tape_cmds + 2; | ||
209 | sa->cmd_sg_list = cciss_allocate_sg_chain_blocks(h, | 207 | sa->cmd_sg_list = cciss_allocate_sg_chain_blocks(h, |
210 | h->chainsize, CMD_STACK_SIZE); | 208 | h->chainsize, stk->nelems); |
211 | if (!sa->cmd_sg_list && h->chainsize > 0) | 209 | if (!sa->cmd_sg_list && h->chainsize > 0) |
212 | return -ENOMEM; | 210 | return -ENOMEM; |
213 | 211 | ||
214 | stk = &sa->cmd_stack; | 212 | size = sizeof(struct cciss_scsi_cmd_stack_elem_t) * stk->nelems; |
215 | size = sizeof(struct cciss_scsi_cmd_stack_elem_t) * CMD_STACK_SIZE; | ||
216 | 213 | ||
217 | /* Check alignment, see cciss_cmd.h near CommandList_struct def. */ | 214 | /* Check alignment, see cciss_cmd.h near CommandList_struct def. */ |
218 | BUILD_BUG_ON((sizeof(*stk->pool) % COMMANDLIST_ALIGNMENT) != 0); | 215 | BUILD_BUG_ON((sizeof(*stk->pool) % COMMANDLIST_ALIGNMENT) != 0); |
@@ -221,18 +218,23 @@ scsi_cmd_stack_setup(ctlr_info_t *h, struct cciss_scsi_adapter_data_t *sa) | |||
221 | pci_alloc_consistent(h->pdev, size, &stk->cmd_pool_handle); | 218 | pci_alloc_consistent(h->pdev, size, &stk->cmd_pool_handle); |
222 | 219 | ||
223 | if (stk->pool == NULL) { | 220 | if (stk->pool == NULL) { |
224 | cciss_free_sg_chain_blocks(sa->cmd_sg_list, CMD_STACK_SIZE); | 221 | cciss_free_sg_chain_blocks(sa->cmd_sg_list, stk->nelems); |
225 | sa->cmd_sg_list = NULL; | 222 | sa->cmd_sg_list = NULL; |
226 | return -ENOMEM; | 223 | return -ENOMEM; |
227 | } | 224 | } |
228 | 225 | stk->elem = kmalloc(sizeof(stk->elem[0]) * stk->nelems, GFP_KERNEL); | |
229 | for (i=0; i<CMD_STACK_SIZE; i++) { | 226 | if (!stk->elem) { |
227 | pci_free_consistent(h->pdev, size, stk->pool, | ||
228 | stk->cmd_pool_handle); | ||
229 | return -1; | ||
230 | } | ||
231 | for (i = 0; i < stk->nelems; i++) { | ||
230 | stk->elem[i] = &stk->pool[i]; | 232 | stk->elem[i] = &stk->pool[i]; |
231 | stk->elem[i]->busaddr = (__u32) (stk->cmd_pool_handle + | 233 | stk->elem[i]->busaddr = (__u32) (stk->cmd_pool_handle + |
232 | (sizeof(struct cciss_scsi_cmd_stack_elem_t) * i)); | 234 | (sizeof(struct cciss_scsi_cmd_stack_elem_t) * i)); |
233 | stk->elem[i]->cmdindex = i; | 235 | stk->elem[i]->cmdindex = i; |
234 | } | 236 | } |
235 | stk->top = CMD_STACK_SIZE-1; | 237 | stk->top = stk->nelems-1; |
236 | return 0; | 238 | return 0; |
237 | } | 239 | } |
238 | 240 | ||
@@ -245,16 +247,18 @@ scsi_cmd_stack_free(ctlr_info_t *h) | |||
245 | 247 | ||
246 | sa = h->scsi_ctlr; | 248 | sa = h->scsi_ctlr; |
247 | stk = &sa->cmd_stack; | 249 | stk = &sa->cmd_stack; |
248 | if (stk->top != CMD_STACK_SIZE-1) { | 250 | if (stk->top != stk->nelems-1) { |
249 | dev_warn(&h->pdev->dev, | 251 | dev_warn(&h->pdev->dev, |
250 | "bug: %d scsi commands are still outstanding.\n", | 252 | "bug: %d scsi commands are still outstanding.\n", |
251 | CMD_STACK_SIZE - stk->top); | 253 | stk->nelems - stk->top); |
252 | } | 254 | } |
253 | size = sizeof(struct cciss_scsi_cmd_stack_elem_t) * CMD_STACK_SIZE; | 255 | size = sizeof(struct cciss_scsi_cmd_stack_elem_t) * stk->nelems; |
254 | 256 | ||
255 | pci_free_consistent(h->pdev, size, stk->pool, stk->cmd_pool_handle); | 257 | pci_free_consistent(h->pdev, size, stk->pool, stk->cmd_pool_handle); |
256 | stk->pool = NULL; | 258 | stk->pool = NULL; |
257 | cciss_free_sg_chain_blocks(sa->cmd_sg_list, CMD_STACK_SIZE); | 259 | cciss_free_sg_chain_blocks(sa->cmd_sg_list, stk->nelems); |
260 | kfree(stk->elem); | ||
261 | stk->elem = NULL; | ||
258 | } | 262 | } |
259 | 263 | ||
260 | #if 0 | 264 | #if 0 |
@@ -859,6 +863,7 @@ cciss_scsi_detect(ctlr_info_t *h) | |||
859 | sh->io_port = 0; // good enough? FIXME, | 863 | sh->io_port = 0; // good enough? FIXME, |
860 | sh->n_io_port = 0; // I don't think we use these two... | 864 | sh->n_io_port = 0; // I don't think we use these two... |
861 | sh->this_id = SELF_SCSI_ID; | 865 | sh->this_id = SELF_SCSI_ID; |
866 | sh->can_queue = cciss_tape_cmds; | ||
862 | sh->sg_tablesize = h->maxsgentries; | 867 | sh->sg_tablesize = h->maxsgentries; |
863 | sh->max_cmd_len = MAX_COMMAND_SIZE; | 868 | sh->max_cmd_len = MAX_COMMAND_SIZE; |
864 | 869 | ||
diff --git a/drivers/block/cciss_scsi.h b/drivers/block/cciss_scsi.h index 6d5822fe851a..e71d986727ca 100644 --- a/drivers/block/cciss_scsi.h +++ b/drivers/block/cciss_scsi.h | |||
@@ -36,13 +36,9 @@ | |||
36 | addressible natively, and may in fact turn | 36 | addressible natively, and may in fact turn |
37 | out to be not scsi at all. */ | 37 | out to be not scsi at all. */ |
38 | 38 | ||
39 | #define SCSI_CCISS_CAN_QUEUE 2 | ||
40 | 39 | ||
41 | /* | 40 | /* |
42 | 41 | ||
43 | Note, cmd_per_lun could give us some trouble, so I'm setting it very low. | ||
44 | Likewise, SCSI_CCISS_CAN_QUEUE is set very conservatively. | ||
45 | |||
46 | If the upper scsi layer tries to track how many commands we have | 42 | If the upper scsi layer tries to track how many commands we have |
47 | outstanding, it will be operating under the misapprehension that it is | 43 | outstanding, it will be operating under the misapprehension that it is |
48 | the only one sending us requests. We also have the block interface, | 44 | the only one sending us requests. We also have the block interface, |
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index c6828b68d77b..09ef9a878ef0 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c | |||
@@ -28,7 +28,7 @@ | |||
28 | #include "drbd_int.h" | 28 | #include "drbd_int.h" |
29 | #include "drbd_wrappers.h" | 29 | #include "drbd_wrappers.h" |
30 | 30 | ||
31 | /* We maintain a trivial check sum in our on disk activity log. | 31 | /* We maintain a trivial checksum in our on disk activity log. |
32 | * With that we can ensure correct operation even when the storage | 32 | * With that we can ensure correct operation even when the storage |
33 | * device might do a partial (last) sector write while losing power. | 33 | * device might do a partial (last) sector write while losing power. |
34 | */ | 34 | */ |
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 76210ba401ac..f440a02dfdb1 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c | |||
@@ -74,7 +74,7 @@ | |||
74 | * as we are "attached" to a local disk, which at 32 GiB for 1PiB storage | 74 | * as we are "attached" to a local disk, which at 32 GiB for 1PiB storage |
75 | * seems excessive. | 75 | * seems excessive. |
76 | * | 76 | * |
77 | * We plan to reduce the amount of in-core bitmap pages by pageing them in | 77 | * We plan to reduce the amount of in-core bitmap pages by paging them in |
78 | * and out against their on-disk location as necessary, but need to make | 78 | * and out against their on-disk location as necessary, but need to make |
79 | * sure we don't cause too much meta data IO, and must not deadlock in | 79 | * sure we don't cause too much meta data IO, and must not deadlock in |
80 | * tight memory situations. This needs some more work. | 80 | * tight memory situations. This needs some more work. |
@@ -200,7 +200,7 @@ void drbd_bm_unlock(struct drbd_conf *mdev) | |||
200 | * we if bits have been cleared since last IO. */ | 200 | * we if bits have been cleared since last IO. */ |
201 | #define BM_PAGE_LAZY_WRITEOUT 28 | 201 | #define BM_PAGE_LAZY_WRITEOUT 28 |
202 | 202 | ||
203 | /* store_page_idx uses non-atomic assingment. It is only used directly after | 203 | /* store_page_idx uses non-atomic assignment. It is only used directly after |
204 | * allocating the page. All other bm_set_page_* and bm_clear_page_* need to | 204 | * allocating the page. All other bm_set_page_* and bm_clear_page_* need to |
205 | * use atomic bit manipulation, as set_out_of_sync (and therefore bitmap | 205 | * use atomic bit manipulation, as set_out_of_sync (and therefore bitmap |
206 | * changes) may happen from various contexts, and wait_on_bit/wake_up_bit | 206 | * changes) may happen from various contexts, and wait_on_bit/wake_up_bit |
@@ -318,7 +318,7 @@ static void bm_unmap(unsigned long *p_addr) | |||
318 | /* word offset from start of bitmap to word number _in_page_ | 318 | /* word offset from start of bitmap to word number _in_page_ |
319 | * modulo longs per page | 319 | * modulo longs per page |
320 | #define MLPP(X) ((X) % (PAGE_SIZE/sizeof(long)) | 320 | #define MLPP(X) ((X) % (PAGE_SIZE/sizeof(long)) |
321 | hm, well, Philipp thinks gcc might not optimze the % into & (... - 1) | 321 | hm, well, Philipp thinks gcc might not optimize the % into & (... - 1) |
322 | so do it explicitly: | 322 | so do it explicitly: |
323 | */ | 323 | */ |
324 | #define MLPP(X) ((X) & ((PAGE_SIZE/sizeof(long))-1)) | 324 | #define MLPP(X) ((X) & ((PAGE_SIZE/sizeof(long))-1)) |
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index d871b14ed5a1..ef2ceed3be4b 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h | |||
@@ -700,7 +700,7 @@ struct drbd_request { | |||
700 | * see drbd_endio_pri(). */ | 700 | * see drbd_endio_pri(). */ |
701 | struct bio *private_bio; | 701 | struct bio *private_bio; |
702 | 702 | ||
703 | struct hlist_node colision; | 703 | struct hlist_node collision; |
704 | sector_t sector; | 704 | sector_t sector; |
705 | unsigned int size; | 705 | unsigned int size; |
706 | unsigned int epoch; /* barrier_nr */ | 706 | unsigned int epoch; /* barrier_nr */ |
@@ -766,7 +766,7 @@ struct digest_info { | |||
766 | 766 | ||
767 | struct drbd_epoch_entry { | 767 | struct drbd_epoch_entry { |
768 | struct drbd_work w; | 768 | struct drbd_work w; |
769 | struct hlist_node colision; | 769 | struct hlist_node collision; |
770 | struct drbd_epoch *epoch; /* for writes */ | 770 | struct drbd_epoch *epoch; /* for writes */ |
771 | struct drbd_conf *mdev; | 771 | struct drbd_conf *mdev; |
772 | struct page *pages; | 772 | struct page *pages; |
@@ -1129,6 +1129,8 @@ struct drbd_conf { | |||
1129 | int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */ | 1129 | int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */ |
1130 | int rs_planed; /* resync sectors already planned */ | 1130 | int rs_planed; /* resync sectors already planned */ |
1131 | atomic_t ap_in_flight; /* App sectors in flight (waiting for ack) */ | 1131 | atomic_t ap_in_flight; /* App sectors in flight (waiting for ack) */ |
1132 | int peer_max_bio_size; | ||
1133 | int local_max_bio_size; | ||
1132 | }; | 1134 | }; |
1133 | 1135 | ||
1134 | static inline struct drbd_conf *minor_to_mdev(unsigned int minor) | 1136 | static inline struct drbd_conf *minor_to_mdev(unsigned int minor) |
@@ -1218,8 +1220,6 @@ extern void drbd_free_resources(struct drbd_conf *mdev); | |||
1218 | extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, | 1220 | extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, |
1219 | unsigned int set_size); | 1221 | unsigned int set_size); |
1220 | extern void tl_clear(struct drbd_conf *mdev); | 1222 | extern void tl_clear(struct drbd_conf *mdev); |
1221 | enum drbd_req_event; | ||
1222 | extern void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what); | ||
1223 | extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *); | 1223 | extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *); |
1224 | extern void drbd_free_sock(struct drbd_conf *mdev); | 1224 | extern void drbd_free_sock(struct drbd_conf *mdev); |
1225 | extern int drbd_send(struct drbd_conf *mdev, struct socket *sock, | 1225 | extern int drbd_send(struct drbd_conf *mdev, struct socket *sock, |
@@ -1434,6 +1434,7 @@ struct bm_extent { | |||
1434 | * hash table. */ | 1434 | * hash table. */ |
1435 | #define HT_SHIFT 8 | 1435 | #define HT_SHIFT 8 |
1436 | #define DRBD_MAX_BIO_SIZE (1U<<(9+HT_SHIFT)) | 1436 | #define DRBD_MAX_BIO_SIZE (1U<<(9+HT_SHIFT)) |
1437 | #define DRBD_MAX_BIO_SIZE_SAFE (1 << 12) /* Works always = 4k */ | ||
1437 | 1438 | ||
1438 | #define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */ | 1439 | #define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */ |
1439 | 1440 | ||
@@ -1518,9 +1519,9 @@ extern void drbd_resume_io(struct drbd_conf *mdev); | |||
1518 | extern char *ppsize(char *buf, unsigned long long size); | 1519 | extern char *ppsize(char *buf, unsigned long long size); |
1519 | extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, int); | 1520 | extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, int); |
1520 | enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 }; | 1521 | enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 }; |
1521 | extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local); | 1522 | extern enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local); |
1522 | extern void resync_after_online_grow(struct drbd_conf *); | 1523 | extern void resync_after_online_grow(struct drbd_conf *); |
1523 | extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local); | 1524 | extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev); |
1524 | extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev, | 1525 | extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev, |
1525 | enum drbd_role new_role, | 1526 | enum drbd_role new_role, |
1526 | int force); | 1527 | int force); |
@@ -1828,6 +1829,8 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, | |||
1828 | if (!forcedetach) { | 1829 | if (!forcedetach) { |
1829 | if (__ratelimit(&drbd_ratelimit_state)) | 1830 | if (__ratelimit(&drbd_ratelimit_state)) |
1830 | dev_err(DEV, "Local IO failed in %s.\n", where); | 1831 | dev_err(DEV, "Local IO failed in %s.\n", where); |
1832 | if (mdev->state.disk > D_INCONSISTENT) | ||
1833 | _drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_HARD, NULL); | ||
1831 | break; | 1834 | break; |
1832 | } | 1835 | } |
1833 | /* NOTE fall through to detach case if forcedetach set */ | 1836 | /* NOTE fall through to detach case if forcedetach set */ |
@@ -2153,6 +2156,10 @@ static inline int get_net_conf(struct drbd_conf *mdev) | |||
2153 | static inline void put_ldev(struct drbd_conf *mdev) | 2156 | static inline void put_ldev(struct drbd_conf *mdev) |
2154 | { | 2157 | { |
2155 | int i = atomic_dec_return(&mdev->local_cnt); | 2158 | int i = atomic_dec_return(&mdev->local_cnt); |
2159 | |||
2160 | /* This may be called from some endio handler, | ||
2161 | * so we must not sleep here. */ | ||
2162 | |||
2156 | __release(local); | 2163 | __release(local); |
2157 | D_ASSERT(i >= 0); | 2164 | D_ASSERT(i >= 0); |
2158 | if (i == 0) { | 2165 | if (i == 0) { |
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 5b525c179f39..0358e55356c8 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c | |||
@@ -745,6 +745,9 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) | |||
745 | mdev->agreed_pro_version < 88) | 745 | mdev->agreed_pro_version < 88) |
746 | rv = SS_NOT_SUPPORTED; | 746 | rv = SS_NOT_SUPPORTED; |
747 | 747 | ||
748 | else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN) | ||
749 | rv = SS_CONNECTED_OUTDATES; | ||
750 | |||
748 | return rv; | 751 | return rv; |
749 | } | 752 | } |
750 | 753 | ||
@@ -1565,6 +1568,10 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1565 | put_ldev(mdev); | 1568 | put_ldev(mdev); |
1566 | } | 1569 | } |
1567 | 1570 | ||
1571 | /* Notify peer that I had a local IO error, and did not detached.. */ | ||
1572 | if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT) | ||
1573 | drbd_send_state(mdev); | ||
1574 | |||
1568 | /* Disks got bigger while they were detached */ | 1575 | /* Disks got bigger while they were detached */ |
1569 | if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING && | 1576 | if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING && |
1570 | test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) { | 1577 | test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) { |
@@ -2064,7 +2071,7 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl | |||
2064 | { | 2071 | { |
2065 | struct p_sizes p; | 2072 | struct p_sizes p; |
2066 | sector_t d_size, u_size; | 2073 | sector_t d_size, u_size; |
2067 | int q_order_type; | 2074 | int q_order_type, max_bio_size; |
2068 | int ok; | 2075 | int ok; |
2069 | 2076 | ||
2070 | if (get_ldev_if_state(mdev, D_NEGOTIATING)) { | 2077 | if (get_ldev_if_state(mdev, D_NEGOTIATING)) { |
@@ -2072,17 +2079,20 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl | |||
2072 | d_size = drbd_get_max_capacity(mdev->ldev); | 2079 | d_size = drbd_get_max_capacity(mdev->ldev); |
2073 | u_size = mdev->ldev->dc.disk_size; | 2080 | u_size = mdev->ldev->dc.disk_size; |
2074 | q_order_type = drbd_queue_order_type(mdev); | 2081 | q_order_type = drbd_queue_order_type(mdev); |
2082 | max_bio_size = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9; | ||
2083 | max_bio_size = min_t(int, max_bio_size, DRBD_MAX_BIO_SIZE); | ||
2075 | put_ldev(mdev); | 2084 | put_ldev(mdev); |
2076 | } else { | 2085 | } else { |
2077 | d_size = 0; | 2086 | d_size = 0; |
2078 | u_size = 0; | 2087 | u_size = 0; |
2079 | q_order_type = QUEUE_ORDERED_NONE; | 2088 | q_order_type = QUEUE_ORDERED_NONE; |
2089 | max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */ | ||
2080 | } | 2090 | } |
2081 | 2091 | ||
2082 | p.d_size = cpu_to_be64(d_size); | 2092 | p.d_size = cpu_to_be64(d_size); |
2083 | p.u_size = cpu_to_be64(u_size); | 2093 | p.u_size = cpu_to_be64(u_size); |
2084 | p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev)); | 2094 | p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev)); |
2085 | p.max_bio_size = cpu_to_be32(queue_max_hw_sectors(mdev->rq_queue) << 9); | 2095 | p.max_bio_size = cpu_to_be32(max_bio_size); |
2086 | p.queue_order_type = cpu_to_be16(q_order_type); | 2096 | p.queue_order_type = cpu_to_be16(q_order_type); |
2087 | p.dds_flags = cpu_to_be16(flags); | 2097 | p.dds_flags = cpu_to_be16(flags); |
2088 | 2098 | ||
@@ -2722,7 +2732,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) | |||
2722 | 2732 | ||
2723 | /* double check digest, sometimes buffers have been modified in flight. */ | 2733 | /* double check digest, sometimes buffers have been modified in flight. */ |
2724 | if (dgs > 0 && dgs <= 64) { | 2734 | if (dgs > 0 && dgs <= 64) { |
2725 | /* 64 byte, 512 bit, is the larges digest size | 2735 | /* 64 byte, 512 bit, is the largest digest size |
2726 | * currently supported in kernel crypto. */ | 2736 | * currently supported in kernel crypto. */ |
2727 | unsigned char digest[64]; | 2737 | unsigned char digest[64]; |
2728 | drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, digest); | 2738 | drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, digest); |
@@ -3041,6 +3051,8 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) | |||
3041 | mdev->agreed_pro_version = PRO_VERSION_MAX; | 3051 | mdev->agreed_pro_version = PRO_VERSION_MAX; |
3042 | mdev->write_ordering = WO_bdev_flush; | 3052 | mdev->write_ordering = WO_bdev_flush; |
3043 | mdev->resync_wenr = LC_FREE; | 3053 | mdev->resync_wenr = LC_FREE; |
3054 | mdev->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE; | ||
3055 | mdev->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE; | ||
3044 | } | 3056 | } |
3045 | 3057 | ||
3046 | void drbd_mdev_cleanup(struct drbd_conf *mdev) | 3058 | void drbd_mdev_cleanup(struct drbd_conf *mdev) |
@@ -3275,7 +3287,7 @@ static void drbd_delete_device(unsigned int minor) | |||
3275 | 3287 | ||
3276 | drbd_release_ee_lists(mdev); | 3288 | drbd_release_ee_lists(mdev); |
3277 | 3289 | ||
3278 | /* should be free'd on disconnect? */ | 3290 | /* should be freed on disconnect? */ |
3279 | kfree(mdev->ee_hash); | 3291 | kfree(mdev->ee_hash); |
3280 | /* | 3292 | /* |
3281 | mdev->ee_hash_s = 0; | 3293 | mdev->ee_hash_s = 0; |
@@ -3415,7 +3427,9 @@ struct drbd_conf *drbd_new_device(unsigned int minor) | |||
3415 | q->backing_dev_info.congested_data = mdev; | 3427 | q->backing_dev_info.congested_data = mdev; |
3416 | 3428 | ||
3417 | blk_queue_make_request(q, drbd_make_request); | 3429 | blk_queue_make_request(q, drbd_make_request); |
3418 | blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE >> 9); | 3430 | /* Setting the max_hw_sectors to an odd value of 8kibyte here |
3431 | This triggers a max_bio_size message upon first attach or connect */ | ||
3432 | blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8); | ||
3419 | blk_queue_bounce_limit(q, BLK_BOUNCE_ANY); | 3433 | blk_queue_bounce_limit(q, BLK_BOUNCE_ANY); |
3420 | blk_queue_merge_bvec(q, drbd_merge_bvec); | 3434 | blk_queue_merge_bvec(q, drbd_merge_bvec); |
3421 | q->queue_lock = &mdev->req_lock; | 3435 | q->queue_lock = &mdev->req_lock; |
@@ -3627,7 +3641,8 @@ struct meta_data_on_disk { | |||
3627 | /* `-- act_log->nr_elements <-- sync_conf.al_extents */ | 3641 | /* `-- act_log->nr_elements <-- sync_conf.al_extents */ |
3628 | u32 bm_offset; /* offset to the bitmap, from here */ | 3642 | u32 bm_offset; /* offset to the bitmap, from here */ |
3629 | u32 bm_bytes_per_bit; /* BM_BLOCK_SIZE */ | 3643 | u32 bm_bytes_per_bit; /* BM_BLOCK_SIZE */ |
3630 | u32 reserved_u32[4]; | 3644 | u32 la_peer_max_bio_size; /* last peer max_bio_size */ |
3645 | u32 reserved_u32[3]; | ||
3631 | 3646 | ||
3632 | } __packed; | 3647 | } __packed; |
3633 | 3648 | ||
@@ -3668,6 +3683,7 @@ void drbd_md_sync(struct drbd_conf *mdev) | |||
3668 | buffer->device_uuid = cpu_to_be64(mdev->ldev->md.device_uuid); | 3683 | buffer->device_uuid = cpu_to_be64(mdev->ldev->md.device_uuid); |
3669 | 3684 | ||
3670 | buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset); | 3685 | buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset); |
3686 | buffer->la_peer_max_bio_size = cpu_to_be32(mdev->peer_max_bio_size); | ||
3671 | 3687 | ||
3672 | D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset); | 3688 | D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset); |
3673 | sector = mdev->ldev->md.md_offset; | 3689 | sector = mdev->ldev->md.md_offset; |
@@ -3751,6 +3767,15 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) | |||
3751 | mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents); | 3767 | mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents); |
3752 | bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid); | 3768 | bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid); |
3753 | 3769 | ||
3770 | spin_lock_irq(&mdev->req_lock); | ||
3771 | if (mdev->state.conn < C_CONNECTED) { | ||
3772 | int peer; | ||
3773 | peer = be32_to_cpu(buffer->la_peer_max_bio_size); | ||
3774 | peer = max_t(int, peer, DRBD_MAX_BIO_SIZE_SAFE); | ||
3775 | mdev->peer_max_bio_size = peer; | ||
3776 | } | ||
3777 | spin_unlock_irq(&mdev->req_lock); | ||
3778 | |||
3754 | if (mdev->sync_conf.al_extents < 7) | 3779 | if (mdev->sync_conf.al_extents < 7) |
3755 | mdev->sync_conf.al_extents = 127; | 3780 | mdev->sync_conf.al_extents = 127; |
3756 | 3781 | ||
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 03b29f78a37d..515bcd948a43 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c | |||
@@ -272,9 +272,28 @@ static int _try_outdate_peer_async(void *data) | |||
272 | { | 272 | { |
273 | struct drbd_conf *mdev = (struct drbd_conf *)data; | 273 | struct drbd_conf *mdev = (struct drbd_conf *)data; |
274 | enum drbd_disk_state nps; | 274 | enum drbd_disk_state nps; |
275 | union drbd_state ns; | ||
275 | 276 | ||
276 | nps = drbd_try_outdate_peer(mdev); | 277 | nps = drbd_try_outdate_peer(mdev); |
277 | drbd_request_state(mdev, NS(pdsk, nps)); | 278 | |
279 | /* Not using | ||
280 | drbd_request_state(mdev, NS(pdsk, nps)); | ||
281 | here, because we might were able to re-establish the connection | ||
282 | in the meantime. This can only partially be solved in the state's | ||
283 | engine is_valid_state() and is_valid_state_transition() | ||
284 | functions. | ||
285 | |||
286 | nps can be D_INCONSISTENT, D_OUTDATED or D_UNKNOWN. | ||
287 | pdsk == D_INCONSISTENT while conn >= C_CONNECTED is valid, | ||
288 | therefore we have to have the pre state change check here. | ||
289 | */ | ||
290 | spin_lock_irq(&mdev->req_lock); | ||
291 | ns = mdev->state; | ||
292 | if (ns.conn < C_WF_REPORT_PARAMS) { | ||
293 | ns.pdsk = nps; | ||
294 | _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); | ||
295 | } | ||
296 | spin_unlock_irq(&mdev->req_lock); | ||
278 | 297 | ||
279 | return 0; | 298 | return 0; |
280 | } | 299 | } |
@@ -577,7 +596,7 @@ void drbd_resume_io(struct drbd_conf *mdev) | |||
577 | * Returns 0 on success, negative return values indicate errors. | 596 | * Returns 0 on success, negative return values indicate errors. |
578 | * You should call drbd_md_sync() after calling this function. | 597 | * You should call drbd_md_sync() after calling this function. |
579 | */ | 598 | */ |
580 | enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local) | 599 | enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local) |
581 | { | 600 | { |
582 | sector_t prev_first_sect, prev_size; /* previous meta location */ | 601 | sector_t prev_first_sect, prev_size; /* previous meta location */ |
583 | sector_t la_size; | 602 | sector_t la_size; |
@@ -773,30 +792,78 @@ static int drbd_check_al_size(struct drbd_conf *mdev) | |||
773 | return 0; | 792 | return 0; |
774 | } | 793 | } |
775 | 794 | ||
776 | void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size) __must_hold(local) | 795 | static void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size) |
777 | { | 796 | { |
778 | struct request_queue * const q = mdev->rq_queue; | 797 | struct request_queue * const q = mdev->rq_queue; |
779 | struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue; | 798 | int max_hw_sectors = max_bio_size >> 9; |
780 | int max_segments = mdev->ldev->dc.max_bio_bvecs; | 799 | int max_segments = 0; |
781 | int max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9); | 800 | |
801 | if (get_ldev_if_state(mdev, D_ATTACHING)) { | ||
802 | struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue; | ||
803 | |||
804 | max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9); | ||
805 | max_segments = mdev->ldev->dc.max_bio_bvecs; | ||
806 | put_ldev(mdev); | ||
807 | } | ||
782 | 808 | ||
783 | blk_queue_logical_block_size(q, 512); | 809 | blk_queue_logical_block_size(q, 512); |
784 | blk_queue_max_hw_sectors(q, max_hw_sectors); | 810 | blk_queue_max_hw_sectors(q, max_hw_sectors); |
785 | /* This is the workaround for "bio would need to, but cannot, be split" */ | 811 | /* This is the workaround for "bio would need to, but cannot, be split" */ |
786 | blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS); | 812 | blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS); |
787 | blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1); | 813 | blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1); |
788 | blk_queue_stack_limits(q, b); | ||
789 | 814 | ||
790 | dev_info(DEV, "max BIO size = %u\n", queue_max_hw_sectors(q) << 9); | 815 | if (get_ldev_if_state(mdev, D_ATTACHING)) { |
816 | struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue; | ||
817 | |||
818 | blk_queue_stack_limits(q, b); | ||
791 | 819 | ||
792 | if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) { | 820 | if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) { |
793 | dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n", | 821 | dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n", |
794 | q->backing_dev_info.ra_pages, | 822 | q->backing_dev_info.ra_pages, |
795 | b->backing_dev_info.ra_pages); | 823 | b->backing_dev_info.ra_pages); |
796 | q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages; | 824 | q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages; |
825 | } | ||
826 | put_ldev(mdev); | ||
797 | } | 827 | } |
798 | } | 828 | } |
799 | 829 | ||
830 | void drbd_reconsider_max_bio_size(struct drbd_conf *mdev) | ||
831 | { | ||
832 | int now, new, local, peer; | ||
833 | |||
834 | now = queue_max_hw_sectors(mdev->rq_queue) << 9; | ||
835 | local = mdev->local_max_bio_size; /* Eventually last known value, from volatile memory */ | ||
836 | peer = mdev->peer_max_bio_size; /* Eventually last known value, from meta data */ | ||
837 | |||
838 | if (get_ldev_if_state(mdev, D_ATTACHING)) { | ||
839 | local = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9; | ||
840 | mdev->local_max_bio_size = local; | ||
841 | put_ldev(mdev); | ||
842 | } | ||
843 | |||
844 | /* We may ignore peer limits if the peer is modern enough. | ||
845 | Because new from 8.3.8 onwards the peer can use multiple | ||
846 | BIOs for a single peer_request */ | ||
847 | if (mdev->state.conn >= C_CONNECTED) { | ||
848 | if (mdev->agreed_pro_version < 94) | ||
849 | peer = mdev->peer_max_bio_size; | ||
850 | else if (mdev->agreed_pro_version == 94) | ||
851 | peer = DRBD_MAX_SIZE_H80_PACKET; | ||
852 | else /* drbd 8.3.8 onwards */ | ||
853 | peer = DRBD_MAX_BIO_SIZE; | ||
854 | } | ||
855 | |||
856 | new = min_t(int, local, peer); | ||
857 | |||
858 | if (mdev->state.role == R_PRIMARY && new < now) | ||
859 | dev_err(DEV, "ASSERT FAILED new < now; (%d < %d)\n", new, now); | ||
860 | |||
861 | if (new != now) | ||
862 | dev_info(DEV, "max BIO size = %u\n", new); | ||
863 | |||
864 | drbd_setup_queue_param(mdev, new); | ||
865 | } | ||
866 | |||
800 | /* serialize deconfig (worker exiting, doing cleanup) | 867 | /* serialize deconfig (worker exiting, doing cleanup) |
801 | * and reconfig (drbdsetup disk, drbdsetup net) | 868 | * and reconfig (drbdsetup disk, drbdsetup net) |
802 | * | 869 | * |
@@ -865,7 +932,6 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp | |||
865 | struct block_device *bdev; | 932 | struct block_device *bdev; |
866 | struct lru_cache *resync_lru = NULL; | 933 | struct lru_cache *resync_lru = NULL; |
867 | union drbd_state ns, os; | 934 | union drbd_state ns, os; |
868 | unsigned int max_bio_size; | ||
869 | enum drbd_state_rv rv; | 935 | enum drbd_state_rv rv; |
870 | int cp_discovered = 0; | 936 | int cp_discovered = 0; |
871 | int logical_block_size; | 937 | int logical_block_size; |
@@ -1117,20 +1183,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp | |||
1117 | mdev->read_cnt = 0; | 1183 | mdev->read_cnt = 0; |
1118 | mdev->writ_cnt = 0; | 1184 | mdev->writ_cnt = 0; |
1119 | 1185 | ||
1120 | max_bio_size = DRBD_MAX_BIO_SIZE; | 1186 | drbd_reconsider_max_bio_size(mdev); |
1121 | if (mdev->state.conn == C_CONNECTED) { | ||
1122 | /* We are Primary, Connected, and now attach a new local | ||
1123 | * backing store. We must not increase the user visible maximum | ||
1124 | * bio size on this device to something the peer may not be | ||
1125 | * able to handle. */ | ||
1126 | if (mdev->agreed_pro_version < 94) | ||
1127 | max_bio_size = queue_max_hw_sectors(mdev->rq_queue) << 9; | ||
1128 | else if (mdev->agreed_pro_version == 94) | ||
1129 | max_bio_size = DRBD_MAX_SIZE_H80_PACKET; | ||
1130 | /* else: drbd 8.3.9 and later, stay with default */ | ||
1131 | } | ||
1132 | |||
1133 | drbd_setup_queue_param(mdev, max_bio_size); | ||
1134 | 1187 | ||
1135 | /* If I am currently not R_PRIMARY, | 1188 | /* If I am currently not R_PRIMARY, |
1136 | * but meta data primary indicator is set, | 1189 | * but meta data primary indicator is set, |
@@ -1152,7 +1205,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp | |||
1152 | !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND)) | 1205 | !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND)) |
1153 | set_bit(USE_DEGR_WFC_T, &mdev->flags); | 1206 | set_bit(USE_DEGR_WFC_T, &mdev->flags); |
1154 | 1207 | ||
1155 | dd = drbd_determin_dev_size(mdev, 0); | 1208 | dd = drbd_determine_dev_size(mdev, 0); |
1156 | if (dd == dev_size_error) { | 1209 | if (dd == dev_size_error) { |
1157 | retcode = ERR_NOMEM_BITMAP; | 1210 | retcode = ERR_NOMEM_BITMAP; |
1158 | goto force_diskless_dec; | 1211 | goto force_diskless_dec; |
@@ -1281,11 +1334,19 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp | |||
1281 | static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, | 1334 | static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, |
1282 | struct drbd_nl_cfg_reply *reply) | 1335 | struct drbd_nl_cfg_reply *reply) |
1283 | { | 1336 | { |
1337 | enum drbd_ret_code retcode; | ||
1338 | int ret; | ||
1284 | drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */ | 1339 | drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */ |
1285 | reply->ret_code = drbd_request_state(mdev, NS(disk, D_DISKLESS)); | 1340 | retcode = drbd_request_state(mdev, NS(disk, D_FAILED)); |
1286 | if (mdev->state.disk == D_DISKLESS) | 1341 | /* D_FAILED will transition to DISKLESS. */ |
1287 | wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt)); | 1342 | ret = wait_event_interruptible(mdev->misc_wait, |
1343 | mdev->state.disk != D_FAILED); | ||
1288 | drbd_resume_io(mdev); | 1344 | drbd_resume_io(mdev); |
1345 | if ((int)retcode == (int)SS_IS_DISKLESS) | ||
1346 | retcode = SS_NOTHING_TO_DO; | ||
1347 | if (ret) | ||
1348 | retcode = ERR_INTR; | ||
1349 | reply->ret_code = retcode; | ||
1289 | return 0; | 1350 | return 0; |
1290 | } | 1351 | } |
1291 | 1352 | ||
@@ -1658,7 +1719,7 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, | |||
1658 | 1719 | ||
1659 | mdev->ldev->dc.disk_size = (sector_t)rs.resize_size; | 1720 | mdev->ldev->dc.disk_size = (sector_t)rs.resize_size; |
1660 | ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0); | 1721 | ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0); |
1661 | dd = drbd_determin_dev_size(mdev, ddsf); | 1722 | dd = drbd_determine_dev_size(mdev, ddsf); |
1662 | drbd_md_sync(mdev); | 1723 | drbd_md_sync(mdev); |
1663 | put_ldev(mdev); | 1724 | put_ldev(mdev); |
1664 | if (dd == dev_size_error) { | 1725 | if (dd == dev_size_error) { |
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index fd26666c0b08..25d32c5aa50a 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c | |||
@@ -333,7 +333,7 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, | |||
333 | if (!page) | 333 | if (!page) |
334 | goto fail; | 334 | goto fail; |
335 | 335 | ||
336 | INIT_HLIST_NODE(&e->colision); | 336 | INIT_HLIST_NODE(&e->collision); |
337 | e->epoch = NULL; | 337 | e->epoch = NULL; |
338 | e->mdev = mdev; | 338 | e->mdev = mdev; |
339 | e->pages = page; | 339 | e->pages = page; |
@@ -356,7 +356,7 @@ void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, int i | |||
356 | kfree(e->digest); | 356 | kfree(e->digest); |
357 | drbd_pp_free(mdev, e->pages, is_net); | 357 | drbd_pp_free(mdev, e->pages, is_net); |
358 | D_ASSERT(atomic_read(&e->pending_bios) == 0); | 358 | D_ASSERT(atomic_read(&e->pending_bios) == 0); |
359 | D_ASSERT(hlist_unhashed(&e->colision)); | 359 | D_ASSERT(hlist_unhashed(&e->collision)); |
360 | mempool_free(e, drbd_ee_mempool); | 360 | mempool_free(e, drbd_ee_mempool); |
361 | } | 361 | } |
362 | 362 | ||
@@ -787,7 +787,7 @@ static int drbd_connect(struct drbd_conf *mdev) | |||
787 | } | 787 | } |
788 | 788 | ||
789 | if (sock && msock) { | 789 | if (sock && msock) { |
790 | schedule_timeout_interruptible(HZ / 10); | 790 | schedule_timeout_interruptible(mdev->net_conf->ping_timeo*HZ/10); |
791 | ok = drbd_socket_okay(mdev, &sock); | 791 | ok = drbd_socket_okay(mdev, &sock); |
792 | ok = drbd_socket_okay(mdev, &msock) && ok; | 792 | ok = drbd_socket_okay(mdev, &msock) && ok; |
793 | if (ok) | 793 | if (ok) |
@@ -899,11 +899,6 @@ retry: | |||
899 | 899 | ||
900 | drbd_thread_start(&mdev->asender); | 900 | drbd_thread_start(&mdev->asender); |
901 | 901 | ||
902 | if (mdev->agreed_pro_version < 95 && get_ldev(mdev)) { | ||
903 | drbd_setup_queue_param(mdev, DRBD_MAX_SIZE_H80_PACKET); | ||
904 | put_ldev(mdev); | ||
905 | } | ||
906 | |||
907 | if (drbd_send_protocol(mdev) == -1) | 902 | if (drbd_send_protocol(mdev) == -1) |
908 | return -1; | 903 | return -1; |
909 | drbd_send_sync_param(mdev, &mdev->sync_conf); | 904 | drbd_send_sync_param(mdev, &mdev->sync_conf); |
@@ -1418,7 +1413,7 @@ static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int u | |||
1418 | sector_t sector = e->sector; | 1413 | sector_t sector = e->sector; |
1419 | int ok; | 1414 | int ok; |
1420 | 1415 | ||
1421 | D_ASSERT(hlist_unhashed(&e->colision)); | 1416 | D_ASSERT(hlist_unhashed(&e->collision)); |
1422 | 1417 | ||
1423 | if (likely((e->flags & EE_WAS_ERROR) == 0)) { | 1418 | if (likely((e->flags & EE_WAS_ERROR) == 0)) { |
1424 | drbd_set_in_sync(mdev, sector, e->size); | 1419 | drbd_set_in_sync(mdev, sector, e->size); |
@@ -1487,7 +1482,7 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsi | |||
1487 | return false; | 1482 | return false; |
1488 | } | 1483 | } |
1489 | 1484 | ||
1490 | /* hlist_del(&req->colision) is done in _req_may_be_done, to avoid | 1485 | /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid |
1491 | * special casing it there for the various failure cases. | 1486 | * special casing it there for the various failure cases. |
1492 | * still no race with drbd_fail_pending_reads */ | 1487 | * still no race with drbd_fail_pending_reads */ |
1493 | ok = recv_dless_read(mdev, req, sector, data_size); | 1488 | ok = recv_dless_read(mdev, req, sector, data_size); |
@@ -1558,11 +1553,11 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | |||
1558 | * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */ | 1553 | * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */ |
1559 | if (mdev->net_conf->two_primaries) { | 1554 | if (mdev->net_conf->two_primaries) { |
1560 | spin_lock_irq(&mdev->req_lock); | 1555 | spin_lock_irq(&mdev->req_lock); |
1561 | D_ASSERT(!hlist_unhashed(&e->colision)); | 1556 | D_ASSERT(!hlist_unhashed(&e->collision)); |
1562 | hlist_del_init(&e->colision); | 1557 | hlist_del_init(&e->collision); |
1563 | spin_unlock_irq(&mdev->req_lock); | 1558 | spin_unlock_irq(&mdev->req_lock); |
1564 | } else { | 1559 | } else { |
1565 | D_ASSERT(hlist_unhashed(&e->colision)); | 1560 | D_ASSERT(hlist_unhashed(&e->collision)); |
1566 | } | 1561 | } |
1567 | 1562 | ||
1568 | drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0)); | 1563 | drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0)); |
@@ -1579,8 +1574,8 @@ static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int u | |||
1579 | ok = drbd_send_ack(mdev, P_DISCARD_ACK, e); | 1574 | ok = drbd_send_ack(mdev, P_DISCARD_ACK, e); |
1580 | 1575 | ||
1581 | spin_lock_irq(&mdev->req_lock); | 1576 | spin_lock_irq(&mdev->req_lock); |
1582 | D_ASSERT(!hlist_unhashed(&e->colision)); | 1577 | D_ASSERT(!hlist_unhashed(&e->collision)); |
1583 | hlist_del_init(&e->colision); | 1578 | hlist_del_init(&e->collision); |
1584 | spin_unlock_irq(&mdev->req_lock); | 1579 | spin_unlock_irq(&mdev->req_lock); |
1585 | 1580 | ||
1586 | dec_unacked(mdev); | 1581 | dec_unacked(mdev); |
@@ -1755,7 +1750,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
1755 | 1750 | ||
1756 | spin_lock_irq(&mdev->req_lock); | 1751 | spin_lock_irq(&mdev->req_lock); |
1757 | 1752 | ||
1758 | hlist_add_head(&e->colision, ee_hash_slot(mdev, sector)); | 1753 | hlist_add_head(&e->collision, ee_hash_slot(mdev, sector)); |
1759 | 1754 | ||
1760 | #define OVERLAPS overlaps(i->sector, i->size, sector, size) | 1755 | #define OVERLAPS overlaps(i->sector, i->size, sector, size) |
1761 | slot = tl_hash_slot(mdev, sector); | 1756 | slot = tl_hash_slot(mdev, sector); |
@@ -1765,7 +1760,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
1765 | int have_conflict = 0; | 1760 | int have_conflict = 0; |
1766 | prepare_to_wait(&mdev->misc_wait, &wait, | 1761 | prepare_to_wait(&mdev->misc_wait, &wait, |
1767 | TASK_INTERRUPTIBLE); | 1762 | TASK_INTERRUPTIBLE); |
1768 | hlist_for_each_entry(i, n, slot, colision) { | 1763 | hlist_for_each_entry(i, n, slot, collision) { |
1769 | if (OVERLAPS) { | 1764 | if (OVERLAPS) { |
1770 | /* only ALERT on first iteration, | 1765 | /* only ALERT on first iteration, |
1771 | * we may be woken up early... */ | 1766 | * we may be woken up early... */ |
@@ -1804,7 +1799,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
1804 | } | 1799 | } |
1805 | 1800 | ||
1806 | if (signal_pending(current)) { | 1801 | if (signal_pending(current)) { |
1807 | hlist_del_init(&e->colision); | 1802 | hlist_del_init(&e->collision); |
1808 | 1803 | ||
1809 | spin_unlock_irq(&mdev->req_lock); | 1804 | spin_unlock_irq(&mdev->req_lock); |
1810 | 1805 | ||
@@ -1862,7 +1857,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
1862 | dev_err(DEV, "submit failed, triggering re-connect\n"); | 1857 | dev_err(DEV, "submit failed, triggering re-connect\n"); |
1863 | spin_lock_irq(&mdev->req_lock); | 1858 | spin_lock_irq(&mdev->req_lock); |
1864 | list_del(&e->w.list); | 1859 | list_del(&e->w.list); |
1865 | hlist_del_init(&e->colision); | 1860 | hlist_del_init(&e->collision); |
1866 | spin_unlock_irq(&mdev->req_lock); | 1861 | spin_unlock_irq(&mdev->req_lock); |
1867 | if (e->flags & EE_CALL_AL_COMPLETE_IO) | 1862 | if (e->flags & EE_CALL_AL_COMPLETE_IO) |
1868 | drbd_al_complete_io(mdev, e->sector); | 1863 | drbd_al_complete_io(mdev, e->sector); |
@@ -2916,12 +2911,6 @@ disconnect: | |||
2916 | return false; | 2911 | return false; |
2917 | } | 2912 | } |
2918 | 2913 | ||
2919 | static void drbd_setup_order_type(struct drbd_conf *mdev, int peer) | ||
2920 | { | ||
2921 | /* sorry, we currently have no working implementation | ||
2922 | * of distributed TCQ */ | ||
2923 | } | ||
2924 | |||
2925 | /* warn if the arguments differ by more than 12.5% */ | 2914 | /* warn if the arguments differ by more than 12.5% */ |
2926 | static void warn_if_differ_considerably(struct drbd_conf *mdev, | 2915 | static void warn_if_differ_considerably(struct drbd_conf *mdev, |
2927 | const char *s, sector_t a, sector_t b) | 2916 | const char *s, sector_t a, sector_t b) |
@@ -2939,7 +2928,6 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
2939 | { | 2928 | { |
2940 | struct p_sizes *p = &mdev->data.rbuf.sizes; | 2929 | struct p_sizes *p = &mdev->data.rbuf.sizes; |
2941 | enum determine_dev_size dd = unchanged; | 2930 | enum determine_dev_size dd = unchanged; |
2942 | unsigned int max_bio_size; | ||
2943 | sector_t p_size, p_usize, my_usize; | 2931 | sector_t p_size, p_usize, my_usize; |
2944 | int ldsc = 0; /* local disk size changed */ | 2932 | int ldsc = 0; /* local disk size changed */ |
2945 | enum dds_flags ddsf; | 2933 | enum dds_flags ddsf; |
@@ -2994,7 +2982,7 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
2994 | 2982 | ||
2995 | ddsf = be16_to_cpu(p->dds_flags); | 2983 | ddsf = be16_to_cpu(p->dds_flags); |
2996 | if (get_ldev(mdev)) { | 2984 | if (get_ldev(mdev)) { |
2997 | dd = drbd_determin_dev_size(mdev, ddsf); | 2985 | dd = drbd_determine_dev_size(mdev, ddsf); |
2998 | put_ldev(mdev); | 2986 | put_ldev(mdev); |
2999 | if (dd == dev_size_error) | 2987 | if (dd == dev_size_error) |
3000 | return false; | 2988 | return false; |
@@ -3004,23 +2992,15 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
3004 | drbd_set_my_capacity(mdev, p_size); | 2992 | drbd_set_my_capacity(mdev, p_size); |
3005 | } | 2993 | } |
3006 | 2994 | ||
2995 | mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size); | ||
2996 | drbd_reconsider_max_bio_size(mdev); | ||
2997 | |||
3007 | if (get_ldev(mdev)) { | 2998 | if (get_ldev(mdev)) { |
3008 | if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) { | 2999 | if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) { |
3009 | mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev); | 3000 | mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev); |
3010 | ldsc = 1; | 3001 | ldsc = 1; |
3011 | } | 3002 | } |
3012 | 3003 | ||
3013 | if (mdev->agreed_pro_version < 94) | ||
3014 | max_bio_size = be32_to_cpu(p->max_bio_size); | ||
3015 | else if (mdev->agreed_pro_version == 94) | ||
3016 | max_bio_size = DRBD_MAX_SIZE_H80_PACKET; | ||
3017 | else /* drbd 8.3.8 onwards */ | ||
3018 | max_bio_size = DRBD_MAX_BIO_SIZE; | ||
3019 | |||
3020 | if (max_bio_size != queue_max_hw_sectors(mdev->rq_queue) << 9) | ||
3021 | drbd_setup_queue_param(mdev, max_bio_size); | ||
3022 | |||
3023 | drbd_setup_order_type(mdev, be16_to_cpu(p->queue_order_type)); | ||
3024 | put_ldev(mdev); | 3004 | put_ldev(mdev); |
3025 | } | 3005 | } |
3026 | 3006 | ||
@@ -4275,7 +4255,7 @@ static struct drbd_request *_ack_id_to_req(struct drbd_conf *mdev, | |||
4275 | struct hlist_node *n; | 4255 | struct hlist_node *n; |
4276 | struct drbd_request *req; | 4256 | struct drbd_request *req; |
4277 | 4257 | ||
4278 | hlist_for_each_entry(req, n, slot, colision) { | 4258 | hlist_for_each_entry(req, n, slot, collision) { |
4279 | if ((unsigned long)req == (unsigned long)id) { | 4259 | if ((unsigned long)req == (unsigned long)id) { |
4280 | if (req->sector != sector) { | 4260 | if (req->sector != sector) { |
4281 | dev_err(DEV, "_ack_id_to_req: found req %p but it has " | 4261 | dev_err(DEV, "_ack_id_to_req: found req %p but it has " |
@@ -4554,6 +4534,7 @@ int drbd_asender(struct drbd_thread *thi) | |||
4554 | int received = 0; | 4534 | int received = 0; |
4555 | int expect = sizeof(struct p_header80); | 4535 | int expect = sizeof(struct p_header80); |
4556 | int empty; | 4536 | int empty; |
4537 | int ping_timeout_active = 0; | ||
4557 | 4538 | ||
4558 | sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev)); | 4539 | sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev)); |
4559 | 4540 | ||
@@ -4566,6 +4547,7 @@ int drbd_asender(struct drbd_thread *thi) | |||
4566 | ERR_IF(!drbd_send_ping(mdev)) goto reconnect; | 4547 | ERR_IF(!drbd_send_ping(mdev)) goto reconnect; |
4567 | mdev->meta.socket->sk->sk_rcvtimeo = | 4548 | mdev->meta.socket->sk->sk_rcvtimeo = |
4568 | mdev->net_conf->ping_timeo*HZ/10; | 4549 | mdev->net_conf->ping_timeo*HZ/10; |
4550 | ping_timeout_active = 1; | ||
4569 | } | 4551 | } |
4570 | 4552 | ||
4571 | /* conditionally cork; | 4553 | /* conditionally cork; |
@@ -4620,8 +4602,7 @@ int drbd_asender(struct drbd_thread *thi) | |||
4620 | dev_err(DEV, "meta connection shut down by peer.\n"); | 4602 | dev_err(DEV, "meta connection shut down by peer.\n"); |
4621 | goto reconnect; | 4603 | goto reconnect; |
4622 | } else if (rv == -EAGAIN) { | 4604 | } else if (rv == -EAGAIN) { |
4623 | if (mdev->meta.socket->sk->sk_rcvtimeo == | 4605 | if (ping_timeout_active) { |
4624 | mdev->net_conf->ping_timeo*HZ/10) { | ||
4625 | dev_err(DEV, "PingAck did not arrive in time.\n"); | 4606 | dev_err(DEV, "PingAck did not arrive in time.\n"); |
4626 | goto reconnect; | 4607 | goto reconnect; |
4627 | } | 4608 | } |
@@ -4660,6 +4641,11 @@ int drbd_asender(struct drbd_thread *thi) | |||
4660 | if (!cmd->process(mdev, h)) | 4641 | if (!cmd->process(mdev, h)) |
4661 | goto reconnect; | 4642 | goto reconnect; |
4662 | 4643 | ||
4644 | /* the idle_timeout (ping-int) | ||
4645 | * has been restored in got_PingAck() */ | ||
4646 | if (cmd == get_asender_cmd(P_PING_ACK)) | ||
4647 | ping_timeout_active = 0; | ||
4648 | |||
4663 | buf = h; | 4649 | buf = h; |
4664 | received = 0; | 4650 | received = 0; |
4665 | expect = sizeof(struct p_header80); | 4651 | expect = sizeof(struct p_header80); |
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 5c0c8be1bb0a..3424d675b769 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c | |||
@@ -163,7 +163,7 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, | |||
163 | * they must have been failed on the spot */ | 163 | * they must have been failed on the spot */ |
164 | #define OVERLAPS overlaps(sector, size, i->sector, i->size) | 164 | #define OVERLAPS overlaps(sector, size, i->sector, i->size) |
165 | slot = tl_hash_slot(mdev, sector); | 165 | slot = tl_hash_slot(mdev, sector); |
166 | hlist_for_each_entry(i, n, slot, colision) { | 166 | hlist_for_each_entry(i, n, slot, collision) { |
167 | if (OVERLAPS) { | 167 | if (OVERLAPS) { |
168 | dev_alert(DEV, "LOGIC BUG: completed: %p %llus +%u; " | 168 | dev_alert(DEV, "LOGIC BUG: completed: %p %llus +%u; " |
169 | "other: %p %llus +%u\n", | 169 | "other: %p %llus +%u\n", |
@@ -187,7 +187,7 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, | |||
187 | #undef OVERLAPS | 187 | #undef OVERLAPS |
188 | #define OVERLAPS overlaps(sector, size, e->sector, e->size) | 188 | #define OVERLAPS overlaps(sector, size, e->sector, e->size) |
189 | slot = ee_hash_slot(mdev, req->sector); | 189 | slot = ee_hash_slot(mdev, req->sector); |
190 | hlist_for_each_entry(e, n, slot, colision) { | 190 | hlist_for_each_entry(e, n, slot, collision) { |
191 | if (OVERLAPS) { | 191 | if (OVERLAPS) { |
192 | wake_up(&mdev->misc_wait); | 192 | wake_up(&mdev->misc_wait); |
193 | break; | 193 | break; |
@@ -260,8 +260,8 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) | |||
260 | 260 | ||
261 | /* remove the request from the conflict detection | 261 | /* remove the request from the conflict detection |
262 | * respective block_id verification hash */ | 262 | * respective block_id verification hash */ |
263 | if (!hlist_unhashed(&req->colision)) | 263 | if (!hlist_unhashed(&req->collision)) |
264 | hlist_del(&req->colision); | 264 | hlist_del(&req->collision); |
265 | else | 265 | else |
266 | D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0); | 266 | D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0); |
267 | 267 | ||
@@ -329,7 +329,7 @@ static int _req_conflicts(struct drbd_request *req) | |||
329 | struct hlist_node *n; | 329 | struct hlist_node *n; |
330 | struct hlist_head *slot; | 330 | struct hlist_head *slot; |
331 | 331 | ||
332 | D_ASSERT(hlist_unhashed(&req->colision)); | 332 | D_ASSERT(hlist_unhashed(&req->collision)); |
333 | 333 | ||
334 | if (!get_net_conf(mdev)) | 334 | if (!get_net_conf(mdev)) |
335 | return 0; | 335 | return 0; |
@@ -341,7 +341,7 @@ static int _req_conflicts(struct drbd_request *req) | |||
341 | 341 | ||
342 | #define OVERLAPS overlaps(i->sector, i->size, sector, size) | 342 | #define OVERLAPS overlaps(i->sector, i->size, sector, size) |
343 | slot = tl_hash_slot(mdev, sector); | 343 | slot = tl_hash_slot(mdev, sector); |
344 | hlist_for_each_entry(i, n, slot, colision) { | 344 | hlist_for_each_entry(i, n, slot, collision) { |
345 | if (OVERLAPS) { | 345 | if (OVERLAPS) { |
346 | dev_alert(DEV, "%s[%u] Concurrent local write detected! " | 346 | dev_alert(DEV, "%s[%u] Concurrent local write detected! " |
347 | "[DISCARD L] new: %llus +%u; " | 347 | "[DISCARD L] new: %llus +%u; " |
@@ -359,7 +359,7 @@ static int _req_conflicts(struct drbd_request *req) | |||
359 | #undef OVERLAPS | 359 | #undef OVERLAPS |
360 | #define OVERLAPS overlaps(e->sector, e->size, sector, size) | 360 | #define OVERLAPS overlaps(e->sector, e->size, sector, size) |
361 | slot = ee_hash_slot(mdev, sector); | 361 | slot = ee_hash_slot(mdev, sector); |
362 | hlist_for_each_entry(e, n, slot, colision) { | 362 | hlist_for_each_entry(e, n, slot, collision) { |
363 | if (OVERLAPS) { | 363 | if (OVERLAPS) { |
364 | dev_alert(DEV, "%s[%u] Concurrent remote write detected!" | 364 | dev_alert(DEV, "%s[%u] Concurrent remote write detected!" |
365 | " [DISCARD L] new: %llus +%u; " | 365 | " [DISCARD L] new: %llus +%u; " |
@@ -491,7 +491,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
491 | 491 | ||
492 | /* so we can verify the handle in the answer packet | 492 | /* so we can verify the handle in the answer packet |
493 | * corresponding hlist_del is in _req_may_be_done() */ | 493 | * corresponding hlist_del is in _req_may_be_done() */ |
494 | hlist_add_head(&req->colision, ar_hash_slot(mdev, req->sector)); | 494 | hlist_add_head(&req->collision, ar_hash_slot(mdev, req->sector)); |
495 | 495 | ||
496 | set_bit(UNPLUG_REMOTE, &mdev->flags); | 496 | set_bit(UNPLUG_REMOTE, &mdev->flags); |
497 | 497 | ||
@@ -507,7 +507,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
507 | /* assert something? */ | 507 | /* assert something? */ |
508 | /* from drbd_make_request_common only */ | 508 | /* from drbd_make_request_common only */ |
509 | 509 | ||
510 | hlist_add_head(&req->colision, tl_hash_slot(mdev, req->sector)); | 510 | hlist_add_head(&req->collision, tl_hash_slot(mdev, req->sector)); |
511 | /* corresponding hlist_del is in _req_may_be_done() */ | 511 | /* corresponding hlist_del is in _req_may_be_done() */ |
512 | 512 | ||
513 | /* NOTE | 513 | /* NOTE |
@@ -1033,7 +1033,7 @@ fail_conflicting: | |||
1033 | err = 0; | 1033 | err = 0; |
1034 | 1034 | ||
1035 | fail_free_complete: | 1035 | fail_free_complete: |
1036 | if (rw == WRITE && local) | 1036 | if (req->rq_state & RQ_IN_ACT_LOG) |
1037 | drbd_al_complete_io(mdev, sector); | 1037 | drbd_al_complete_io(mdev, sector); |
1038 | fail_and_free_req: | 1038 | fail_and_free_req: |
1039 | if (local) { | 1039 | if (local) { |
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 32e2c3e6a813..68a234a5fdc5 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h | |||
@@ -256,7 +256,7 @@ static inline struct drbd_request *_ar_id_to_req(struct drbd_conf *mdev, | |||
256 | struct hlist_node *n; | 256 | struct hlist_node *n; |
257 | struct drbd_request *req; | 257 | struct drbd_request *req; |
258 | 258 | ||
259 | hlist_for_each_entry(req, n, slot, colision) { | 259 | hlist_for_each_entry(req, n, slot, collision) { |
260 | if ((unsigned long)req == (unsigned long)id) { | 260 | if ((unsigned long)req == (unsigned long)id) { |
261 | D_ASSERT(req->sector == sector); | 261 | D_ASSERT(req->sector == sector); |
262 | return req; | 262 | return req; |
@@ -291,7 +291,7 @@ static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev, | |||
291 | req->epoch = 0; | 291 | req->epoch = 0; |
292 | req->sector = bio_src->bi_sector; | 292 | req->sector = bio_src->bi_sector; |
293 | req->size = bio_src->bi_size; | 293 | req->size = bio_src->bi_size; |
294 | INIT_HLIST_NODE(&req->colision); | 294 | INIT_HLIST_NODE(&req->collision); |
295 | INIT_LIST_HEAD(&req->tl_requests); | 295 | INIT_LIST_HEAD(&req->tl_requests); |
296 | INIT_LIST_HEAD(&req->w.list); | 296 | INIT_LIST_HEAD(&req->w.list); |
297 | } | 297 | } |
@@ -323,6 +323,7 @@ extern int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
323 | extern void complete_master_bio(struct drbd_conf *mdev, | 323 | extern void complete_master_bio(struct drbd_conf *mdev, |
324 | struct bio_and_error *m); | 324 | struct bio_and_error *m); |
325 | extern void request_timer_fn(unsigned long data); | 325 | extern void request_timer_fn(unsigned long data); |
326 | extern void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what); | ||
326 | 327 | ||
327 | /* use this if you don't want to deal with calling complete_master_bio() | 328 | /* use this if you don't want to deal with calling complete_master_bio() |
328 | * outside the spinlock, e.g. when walking some list on cleanup. */ | 329 | * outside the spinlock, e.g. when walking some list on cleanup. */ |
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index f7e6c92f8d03..4d76b06b6b20 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c | |||
@@ -126,7 +126,7 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo | |||
126 | list_del(&e->w.list); /* has been on active_ee or sync_ee */ | 126 | list_del(&e->w.list); /* has been on active_ee or sync_ee */ |
127 | list_add_tail(&e->w.list, &mdev->done_ee); | 127 | list_add_tail(&e->w.list, &mdev->done_ee); |
128 | 128 | ||
129 | /* No hlist_del_init(&e->colision) here, we did not send the Ack yet, | 129 | /* No hlist_del_init(&e->collision) here, we did not send the Ack yet, |
130 | * neither did we wake possibly waiting conflicting requests. | 130 | * neither did we wake possibly waiting conflicting requests. |
131 | * done from "drbd_process_done_ee" within the appropriate w.cb | 131 | * done from "drbd_process_done_ee" within the appropriate w.cb |
132 | * (e_end_block/e_end_resync_block) or from _drbd_clear_done_ee */ | 132 | * (e_end_block/e_end_resync_block) or from _drbd_clear_done_ee */ |
@@ -297,42 +297,48 @@ void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio * | |||
297 | crypto_hash_final(&desc, digest); | 297 | crypto_hash_final(&desc, digest); |
298 | } | 298 | } |
299 | 299 | ||
300 | static int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | 300 | /* TODO merge common code with w_e_end_ov_req */ |
301 | int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | ||
301 | { | 302 | { |
302 | struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); | 303 | struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); |
303 | int digest_size; | 304 | int digest_size; |
304 | void *digest; | 305 | void *digest; |
305 | int ok; | 306 | int ok = 1; |
306 | 307 | ||
307 | D_ASSERT(e->block_id == DRBD_MAGIC + 0xbeef); | 308 | D_ASSERT(e->block_id == DRBD_MAGIC + 0xbeef); |
308 | 309 | ||
309 | if (unlikely(cancel)) { | 310 | if (unlikely(cancel)) |
310 | drbd_free_ee(mdev, e); | 311 | goto out; |
311 | return 1; | ||
312 | } | ||
313 | 312 | ||
314 | if (likely((e->flags & EE_WAS_ERROR) == 0)) { | 313 | if (likely((e->flags & EE_WAS_ERROR) != 0)) |
315 | digest_size = crypto_hash_digestsize(mdev->csums_tfm); | 314 | goto out; |
316 | digest = kmalloc(digest_size, GFP_NOIO); | ||
317 | if (digest) { | ||
318 | drbd_csum_ee(mdev, mdev->csums_tfm, e, digest); | ||
319 | 315 | ||
320 | inc_rs_pending(mdev); | 316 | digest_size = crypto_hash_digestsize(mdev->csums_tfm); |
321 | ok = drbd_send_drequest_csum(mdev, | 317 | digest = kmalloc(digest_size, GFP_NOIO); |
322 | e->sector, | 318 | if (digest) { |
323 | e->size, | 319 | sector_t sector = e->sector; |
324 | digest, | 320 | unsigned int size = e->size; |
325 | digest_size, | 321 | drbd_csum_ee(mdev, mdev->csums_tfm, e, digest); |
326 | P_CSUM_RS_REQUEST); | 322 | /* Free e and pages before send. |
327 | kfree(digest); | 323 | * In case we block on congestion, we could otherwise run into |
328 | } else { | 324 | * some distributed deadlock, if the other side blocks on |
329 | dev_err(DEV, "kmalloc() of digest failed.\n"); | 325 | * congestion as well, because our receiver blocks in |
330 | ok = 0; | 326 | * drbd_pp_alloc due to pp_in_use > max_buffers. */ |
331 | } | 327 | drbd_free_ee(mdev, e); |
332 | } else | 328 | e = NULL; |
333 | ok = 1; | 329 | inc_rs_pending(mdev); |
330 | ok = drbd_send_drequest_csum(mdev, sector, size, | ||
331 | digest, digest_size, | ||
332 | P_CSUM_RS_REQUEST); | ||
333 | kfree(digest); | ||
334 | } else { | ||
335 | dev_err(DEV, "kmalloc() of digest failed.\n"); | ||
336 | ok = 0; | ||
337 | } | ||
334 | 338 | ||
335 | drbd_free_ee(mdev, e); | 339 | out: |
340 | if (e) | ||
341 | drbd_free_ee(mdev, e); | ||
336 | 342 | ||
337 | if (unlikely(!ok)) | 343 | if (unlikely(!ok)) |
338 | dev_err(DEV, "drbd_send_drequest(..., csum) failed\n"); | 344 | dev_err(DEV, "drbd_send_drequest(..., csum) failed\n"); |
@@ -834,7 +840,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) | |||
834 | const int ratio = | 840 | const int ratio = |
835 | (t == 0) ? 0 : | 841 | (t == 0) ? 0 : |
836 | (t < 100000) ? ((s*100)/t) : (s/(t/100)); | 842 | (t < 100000) ? ((s*100)/t) : (s/(t/100)); |
837 | dev_info(DEV, "%u %% had equal check sums, eliminated: %luK; " | 843 | dev_info(DEV, "%u %% had equal checksums, eliminated: %luK; " |
838 | "transferred %luK total %luK\n", | 844 | "transferred %luK total %luK\n", |
839 | ratio, | 845 | ratio, |
840 | Bit2KB(mdev->rs_same_csum), | 846 | Bit2KB(mdev->rs_same_csum), |
@@ -1071,9 +1077,12 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | |||
1071 | return ok; | 1077 | return ok; |
1072 | } | 1078 | } |
1073 | 1079 | ||
1080 | /* TODO merge common code with w_e_send_csum */ | ||
1074 | int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | 1081 | int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) |
1075 | { | 1082 | { |
1076 | struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); | 1083 | struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); |
1084 | sector_t sector = e->sector; | ||
1085 | unsigned int size = e->size; | ||
1077 | int digest_size; | 1086 | int digest_size; |
1078 | void *digest; | 1087 | void *digest; |
1079 | int ok = 1; | 1088 | int ok = 1; |
@@ -1093,17 +1102,25 @@ int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | |||
1093 | else | 1102 | else |
1094 | memset(digest, 0, digest_size); | 1103 | memset(digest, 0, digest_size); |
1095 | 1104 | ||
1105 | /* Free e and pages before send. | ||
1106 | * In case we block on congestion, we could otherwise run into | ||
1107 | * some distributed deadlock, if the other side blocks on | ||
1108 | * congestion as well, because our receiver blocks in | ||
1109 | * drbd_pp_alloc due to pp_in_use > max_buffers. */ | ||
1110 | drbd_free_ee(mdev, e); | ||
1111 | e = NULL; | ||
1096 | inc_rs_pending(mdev); | 1112 | inc_rs_pending(mdev); |
1097 | ok = drbd_send_drequest_csum(mdev, e->sector, e->size, | 1113 | ok = drbd_send_drequest_csum(mdev, sector, size, |
1098 | digest, digest_size, P_OV_REPLY); | 1114 | digest, digest_size, |
1115 | P_OV_REPLY); | ||
1099 | if (!ok) | 1116 | if (!ok) |
1100 | dec_rs_pending(mdev); | 1117 | dec_rs_pending(mdev); |
1101 | kfree(digest); | 1118 | kfree(digest); |
1102 | 1119 | ||
1103 | out: | 1120 | out: |
1104 | drbd_free_ee(mdev, e); | 1121 | if (e) |
1122 | drbd_free_ee(mdev, e); | ||
1105 | dec_unacked(mdev); | 1123 | dec_unacked(mdev); |
1106 | |||
1107 | return ok; | 1124 | return ok; |
1108 | } | 1125 | } |
1109 | 1126 | ||
@@ -1122,8 +1139,10 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | |||
1122 | { | 1139 | { |
1123 | struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); | 1140 | struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); |
1124 | struct digest_info *di; | 1141 | struct digest_info *di; |
1125 | int digest_size; | ||
1126 | void *digest; | 1142 | void *digest; |
1143 | sector_t sector = e->sector; | ||
1144 | unsigned int size = e->size; | ||
1145 | int digest_size; | ||
1127 | int ok, eq = 0; | 1146 | int ok, eq = 0; |
1128 | 1147 | ||
1129 | if (unlikely(cancel)) { | 1148 | if (unlikely(cancel)) { |
@@ -1153,16 +1172,21 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | |||
1153 | } | 1172 | } |
1154 | } | 1173 | } |
1155 | 1174 | ||
1156 | dec_unacked(mdev); | 1175 | /* Free e and pages before send. |
1176 | * In case we block on congestion, we could otherwise run into | ||
1177 | * some distributed deadlock, if the other side blocks on | ||
1178 | * congestion as well, because our receiver blocks in | ||
1179 | * drbd_pp_alloc due to pp_in_use > max_buffers. */ | ||
1180 | drbd_free_ee(mdev, e); | ||
1157 | if (!eq) | 1181 | if (!eq) |
1158 | drbd_ov_oos_found(mdev, e->sector, e->size); | 1182 | drbd_ov_oos_found(mdev, sector, size); |
1159 | else | 1183 | else |
1160 | ov_oos_print(mdev); | 1184 | ov_oos_print(mdev); |
1161 | 1185 | ||
1162 | ok = drbd_send_ack_ex(mdev, P_OV_RESULT, e->sector, e->size, | 1186 | ok = drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, |
1163 | eq ? ID_IN_SYNC : ID_OUT_OF_SYNC); | 1187 | eq ? ID_IN_SYNC : ID_OUT_OF_SYNC); |
1164 | 1188 | ||
1165 | drbd_free_ee(mdev, e); | 1189 | dec_unacked(mdev); |
1166 | 1190 | ||
1167 | --mdev->ov_left; | 1191 | --mdev->ov_left; |
1168 | 1192 | ||
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index a076a14ca72d..c59a672a3de0 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c | |||
@@ -1658,7 +1658,7 @@ static struct kobject *loop_probe(dev_t dev, int *part, void *data) | |||
1658 | struct kobject *kobj; | 1658 | struct kobject *kobj; |
1659 | 1659 | ||
1660 | mutex_lock(&loop_devices_mutex); | 1660 | mutex_lock(&loop_devices_mutex); |
1661 | lo = loop_init_one(dev & MINORMASK); | 1661 | lo = loop_init_one(MINOR(dev) >> part_shift); |
1662 | kobj = lo ? get_disk(lo->lo_disk) : ERR_PTR(-ENOMEM); | 1662 | kobj = lo ? get_disk(lo->lo_disk) : ERR_PTR(-ENOMEM); |
1663 | mutex_unlock(&loop_devices_mutex); | 1663 | mutex_unlock(&loop_devices_mutex); |
1664 | 1664 | ||
@@ -1691,15 +1691,18 @@ static int __init loop_init(void) | |||
1691 | if (max_part > 0) | 1691 | if (max_part > 0) |
1692 | part_shift = fls(max_part); | 1692 | part_shift = fls(max_part); |
1693 | 1693 | ||
1694 | if ((1UL << part_shift) > DISK_MAX_PARTS) | ||
1695 | return -EINVAL; | ||
1696 | |||
1694 | if (max_loop > 1UL << (MINORBITS - part_shift)) | 1697 | if (max_loop > 1UL << (MINORBITS - part_shift)) |
1695 | return -EINVAL; | 1698 | return -EINVAL; |
1696 | 1699 | ||
1697 | if (max_loop) { | 1700 | if (max_loop) { |
1698 | nr = max_loop; | 1701 | nr = max_loop; |
1699 | range = max_loop; | 1702 | range = max_loop << part_shift; |
1700 | } else { | 1703 | } else { |
1701 | nr = 8; | 1704 | nr = 8; |
1702 | range = 1UL << (MINORBITS - part_shift); | 1705 | range = 1UL << MINORBITS; |
1703 | } | 1706 | } |
1704 | 1707 | ||
1705 | if (register_blkdev(LOOP_MAJOR, "loop")) | 1708 | if (register_blkdev(LOOP_MAJOR, "loop")) |
@@ -1738,7 +1741,7 @@ static void __exit loop_exit(void) | |||
1738 | unsigned long range; | 1741 | unsigned long range; |
1739 | struct loop_device *lo, *next; | 1742 | struct loop_device *lo, *next; |
1740 | 1743 | ||
1741 | range = max_loop ? max_loop : 1UL << (MINORBITS - part_shift); | 1744 | range = max_loop ? max_loop << part_shift : 1UL << MINORBITS; |
1742 | 1745 | ||
1743 | list_for_each_entry_safe(lo, next, &loop_devices, lo_list) | 1746 | list_for_each_entry_safe(lo, next, &loop_devices, lo_list) |
1744 | loop_del_one(lo); | 1747 | loop_del_one(lo); |
diff --git a/drivers/block/xen-blkback/Makefile b/drivers/block/xen-blkback/Makefile new file mode 100644 index 000000000000..e491c1b76878 --- /dev/null +++ b/drivers/block/xen-blkback/Makefile | |||
@@ -0,0 +1,3 @@ | |||
1 | obj-$(CONFIG_XEN_BLKDEV_BACKEND) := xen-blkback.o | ||
2 | |||
3 | xen-blkback-y := blkback.o xenbus.o | ||
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c new file mode 100644 index 000000000000..c73910cc28c9 --- /dev/null +++ b/drivers/block/xen-blkback/blkback.c | |||
@@ -0,0 +1,824 @@ | |||
1 | /****************************************************************************** | ||
2 | * | ||
3 | * Back-end of the driver for virtual block devices. This portion of the | ||
4 | * driver exports a 'unified' block-device interface that can be accessed | ||
5 | * by any operating system that implements a compatible front end. A | ||
6 | * reference front-end implementation can be found in: | ||
7 | * drivers/block/xen-blkfront.c | ||
8 | * | ||
9 | * Copyright (c) 2003-2004, Keir Fraser & Steve Hand | ||
10 | * Copyright (c) 2005, Christopher Clark | ||
11 | * | ||
12 | * This program is free software; you can redistribute it and/or | ||
13 | * modify it under the terms of the GNU General Public License version 2 | ||
14 | * as published by the Free Software Foundation; or, when distributed | ||
15 | * separately from the Linux kernel or incorporated into other | ||
16 | * software packages, subject to the following license: | ||
17 | * | ||
18 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
19 | * of this source file (the "Software"), to deal in the Software without | ||
20 | * restriction, including without limitation the rights to use, copy, modify, | ||
21 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
22 | * and to permit persons to whom the Software is furnished to do so, subject to | ||
23 | * the following conditions: | ||
24 | * | ||
25 | * The above copyright notice and this permission notice shall be included in | ||
26 | * all copies or substantial portions of the Software. | ||
27 | * | ||
28 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
29 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
30 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
31 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
32 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
33 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
34 | * IN THE SOFTWARE. | ||
35 | */ | ||
36 | |||
37 | #include <linux/spinlock.h> | ||
38 | #include <linux/kthread.h> | ||
39 | #include <linux/list.h> | ||
40 | #include <linux/delay.h> | ||
41 | #include <linux/freezer.h> | ||
42 | |||
43 | #include <xen/events.h> | ||
44 | #include <xen/page.h> | ||
45 | #include <asm/xen/hypervisor.h> | ||
46 | #include <asm/xen/hypercall.h> | ||
47 | #include "common.h" | ||
48 | |||
49 | /* | ||
50 | * These are rather arbitrary. They are fairly large because adjacent requests | ||
51 | * pulled from a communication ring are quite likely to end up being part of | ||
52 | * the same scatter/gather request at the disc. | ||
53 | * | ||
54 | * ** TRY INCREASING 'xen_blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW ** | ||
55 | * | ||
56 | * This will increase the chances of being able to write whole tracks. | ||
57 | * 64 should be enough to keep us competitive with Linux. | ||
58 | */ | ||
59 | static int xen_blkif_reqs = 64; | ||
60 | module_param_named(reqs, xen_blkif_reqs, int, 0); | ||
61 | MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate"); | ||
62 | |||
63 | /* Run-time switchable: /sys/module/blkback/parameters/ */ | ||
64 | static unsigned int log_stats; | ||
65 | module_param(log_stats, int, 0644); | ||
66 | |||
67 | /* | ||
68 | * Each outstanding request that we've passed to the lower device layers has a | ||
69 | * 'pending_req' allocated to it. Each buffer_head that completes decrements | ||
70 | * the pendcnt towards zero. When it hits zero, the specified domain has a | ||
71 | * response queued for it, with the saved 'id' passed back. | ||
72 | */ | ||
73 | struct pending_req { | ||
74 | struct xen_blkif *blkif; | ||
75 | u64 id; | ||
76 | int nr_pages; | ||
77 | atomic_t pendcnt; | ||
78 | unsigned short operation; | ||
79 | int status; | ||
80 | struct list_head free_list; | ||
81 | }; | ||
82 | |||
83 | #define BLKBACK_INVALID_HANDLE (~0) | ||
84 | |||
85 | struct xen_blkbk { | ||
86 | struct pending_req *pending_reqs; | ||
87 | /* List of all 'pending_req' available */ | ||
88 | struct list_head pending_free; | ||
89 | /* And its spinlock. */ | ||
90 | spinlock_t pending_free_lock; | ||
91 | wait_queue_head_t pending_free_wq; | ||
92 | /* The list of all pages that are available. */ | ||
93 | struct page **pending_pages; | ||
94 | /* And the grant handles that are available. */ | ||
95 | grant_handle_t *pending_grant_handles; | ||
96 | }; | ||
97 | |||
98 | static struct xen_blkbk *blkbk; | ||
99 | |||
100 | /* | ||
101 | * Little helpful macro to figure out the index and virtual address of the | ||
102 | * pending_pages[..]. For each 'pending_req' we have have up to | ||
103 | * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through | ||
104 | * 10 and would index in the pending_pages[..]. | ||
105 | */ | ||
106 | static inline int vaddr_pagenr(struct pending_req *req, int seg) | ||
107 | { | ||
108 | return (req - blkbk->pending_reqs) * | ||
109 | BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; | ||
110 | } | ||
111 | |||
112 | #define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)] | ||
113 | |||
114 | static inline unsigned long vaddr(struct pending_req *req, int seg) | ||
115 | { | ||
116 | unsigned long pfn = page_to_pfn(blkbk->pending_page(req, seg)); | ||
117 | return (unsigned long)pfn_to_kaddr(pfn); | ||
118 | } | ||
119 | |||
120 | #define pending_handle(_req, _seg) \ | ||
121 | (blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)]) | ||
122 | |||
123 | |||
124 | static int do_block_io_op(struct xen_blkif *blkif); | ||
125 | static int dispatch_rw_block_io(struct xen_blkif *blkif, | ||
126 | struct blkif_request *req, | ||
127 | struct pending_req *pending_req); | ||
128 | static void make_response(struct xen_blkif *blkif, u64 id, | ||
129 | unsigned short op, int st); | ||
130 | |||
131 | /* | ||
132 | * Retrieve from the 'pending_reqs' a free pending_req structure to be used. | ||
133 | */ | ||
134 | static struct pending_req *alloc_req(void) | ||
135 | { | ||
136 | struct pending_req *req = NULL; | ||
137 | unsigned long flags; | ||
138 | |||
139 | spin_lock_irqsave(&blkbk->pending_free_lock, flags); | ||
140 | if (!list_empty(&blkbk->pending_free)) { | ||
141 | req = list_entry(blkbk->pending_free.next, struct pending_req, | ||
142 | free_list); | ||
143 | list_del(&req->free_list); | ||
144 | } | ||
145 | spin_unlock_irqrestore(&blkbk->pending_free_lock, flags); | ||
146 | return req; | ||
147 | } | ||
148 | |||
149 | /* | ||
150 | * Return the 'pending_req' structure back to the freepool. We also | ||
151 | * wake up the thread if it was waiting for a free page. | ||
152 | */ | ||
153 | static void free_req(struct pending_req *req) | ||
154 | { | ||
155 | unsigned long flags; | ||
156 | int was_empty; | ||
157 | |||
158 | spin_lock_irqsave(&blkbk->pending_free_lock, flags); | ||
159 | was_empty = list_empty(&blkbk->pending_free); | ||
160 | list_add(&req->free_list, &blkbk->pending_free); | ||
161 | spin_unlock_irqrestore(&blkbk->pending_free_lock, flags); | ||
162 | if (was_empty) | ||
163 | wake_up(&blkbk->pending_free_wq); | ||
164 | } | ||
165 | |||
166 | /* | ||
167 | * Routines for managing virtual block devices (vbds). | ||
168 | */ | ||
169 | static int xen_vbd_translate(struct phys_req *req, struct xen_blkif *blkif, | ||
170 | int operation) | ||
171 | { | ||
172 | struct xen_vbd *vbd = &blkif->vbd; | ||
173 | int rc = -EACCES; | ||
174 | |||
175 | if ((operation != READ) && vbd->readonly) | ||
176 | goto out; | ||
177 | |||
178 | if (likely(req->nr_sects)) { | ||
179 | blkif_sector_t end = req->sector_number + req->nr_sects; | ||
180 | |||
181 | if (unlikely(end < req->sector_number)) | ||
182 | goto out; | ||
183 | if (unlikely(end > vbd_sz(vbd))) | ||
184 | goto out; | ||
185 | } | ||
186 | |||
187 | req->dev = vbd->pdevice; | ||
188 | req->bdev = vbd->bdev; | ||
189 | rc = 0; | ||
190 | |||
191 | out: | ||
192 | return rc; | ||
193 | } | ||
194 | |||
195 | static void xen_vbd_resize(struct xen_blkif *blkif) | ||
196 | { | ||
197 | struct xen_vbd *vbd = &blkif->vbd; | ||
198 | struct xenbus_transaction xbt; | ||
199 | int err; | ||
200 | struct xenbus_device *dev = xen_blkbk_xenbus(blkif->be); | ||
201 | unsigned long long new_size = vbd_sz(vbd); | ||
202 | |||
203 | pr_info(DRV_PFX "VBD Resize: Domid: %d, Device: (%d, %d)\n", | ||
204 | blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice)); | ||
205 | pr_info(DRV_PFX "VBD Resize: new size %llu\n", new_size); | ||
206 | vbd->size = new_size; | ||
207 | again: | ||
208 | err = xenbus_transaction_start(&xbt); | ||
209 | if (err) { | ||
210 | pr_warn(DRV_PFX "Error starting transaction"); | ||
211 | return; | ||
212 | } | ||
213 | err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", | ||
214 | (unsigned long long)vbd_sz(vbd)); | ||
215 | if (err) { | ||
216 | pr_warn(DRV_PFX "Error writing new size"); | ||
217 | goto abort; | ||
218 | } | ||
219 | /* | ||
220 | * Write the current state; we will use this to synchronize | ||
221 | * the front-end. If the current state is "connected" the | ||
222 | * front-end will get the new size information online. | ||
223 | */ | ||
224 | err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state); | ||
225 | if (err) { | ||
226 | pr_warn(DRV_PFX "Error writing the state"); | ||
227 | goto abort; | ||
228 | } | ||
229 | |||
230 | err = xenbus_transaction_end(xbt, 0); | ||
231 | if (err == -EAGAIN) | ||
232 | goto again; | ||
233 | if (err) | ||
234 | pr_warn(DRV_PFX "Error ending transaction"); | ||
235 | return; | ||
236 | abort: | ||
237 | xenbus_transaction_end(xbt, 1); | ||
238 | } | ||
239 | |||
240 | /* | ||
241 | * Notification from the guest OS. | ||
242 | */ | ||
243 | static void blkif_notify_work(struct xen_blkif *blkif) | ||
244 | { | ||
245 | blkif->waiting_reqs = 1; | ||
246 | wake_up(&blkif->wq); | ||
247 | } | ||
248 | |||
249 | irqreturn_t xen_blkif_be_int(int irq, void *dev_id) | ||
250 | { | ||
251 | blkif_notify_work(dev_id); | ||
252 | return IRQ_HANDLED; | ||
253 | } | ||
254 | |||
255 | /* | ||
256 | * SCHEDULER FUNCTIONS | ||
257 | */ | ||
258 | |||
259 | static void print_stats(struct xen_blkif *blkif) | ||
260 | { | ||
261 | pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d\n", | ||
262 | current->comm, blkif->st_oo_req, | ||
263 | blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req); | ||
264 | blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); | ||
265 | blkif->st_rd_req = 0; | ||
266 | blkif->st_wr_req = 0; | ||
267 | blkif->st_oo_req = 0; | ||
268 | } | ||
269 | |||
270 | int xen_blkif_schedule(void *arg) | ||
271 | { | ||
272 | struct xen_blkif *blkif = arg; | ||
273 | struct xen_vbd *vbd = &blkif->vbd; | ||
274 | |||
275 | xen_blkif_get(blkif); | ||
276 | |||
277 | while (!kthread_should_stop()) { | ||
278 | if (try_to_freeze()) | ||
279 | continue; | ||
280 | if (unlikely(vbd->size != vbd_sz(vbd))) | ||
281 | xen_vbd_resize(blkif); | ||
282 | |||
283 | wait_event_interruptible( | ||
284 | blkif->wq, | ||
285 | blkif->waiting_reqs || kthread_should_stop()); | ||
286 | wait_event_interruptible( | ||
287 | blkbk->pending_free_wq, | ||
288 | !list_empty(&blkbk->pending_free) || | ||
289 | kthread_should_stop()); | ||
290 | |||
291 | blkif->waiting_reqs = 0; | ||
292 | smp_mb(); /* clear flag *before* checking for work */ | ||
293 | |||
294 | if (do_block_io_op(blkif)) | ||
295 | blkif->waiting_reqs = 1; | ||
296 | |||
297 | if (log_stats && time_after(jiffies, blkif->st_print)) | ||
298 | print_stats(blkif); | ||
299 | } | ||
300 | |||
301 | if (log_stats) | ||
302 | print_stats(blkif); | ||
303 | |||
304 | blkif->xenblkd = NULL; | ||
305 | xen_blkif_put(blkif); | ||
306 | |||
307 | return 0; | ||
308 | } | ||
309 | |||
310 | struct seg_buf { | ||
311 | unsigned long buf; | ||
312 | unsigned int nsec; | ||
313 | }; | ||
314 | /* | ||
315 | * Unmap the grant references, and also remove the M2P over-rides | ||
316 | * used in the 'pending_req'. | ||
317 | */ | ||
318 | static void xen_blkbk_unmap(struct pending_req *req) | ||
319 | { | ||
320 | struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | ||
321 | unsigned int i, invcount = 0; | ||
322 | grant_handle_t handle; | ||
323 | int ret; | ||
324 | |||
325 | for (i = 0; i < req->nr_pages; i++) { | ||
326 | handle = pending_handle(req, i); | ||
327 | if (handle == BLKBACK_INVALID_HANDLE) | ||
328 | continue; | ||
329 | gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i), | ||
330 | GNTMAP_host_map, handle); | ||
331 | pending_handle(req, i) = BLKBACK_INVALID_HANDLE; | ||
332 | invcount++; | ||
333 | } | ||
334 | |||
335 | ret = HYPERVISOR_grant_table_op( | ||
336 | GNTTABOP_unmap_grant_ref, unmap, invcount); | ||
337 | BUG_ON(ret); | ||
338 | /* | ||
339 | * Note, we use invcount, so nr->pages, so we can't index | ||
340 | * using vaddr(req, i). | ||
341 | */ | ||
342 | for (i = 0; i < invcount; i++) { | ||
343 | ret = m2p_remove_override( | ||
344 | virt_to_page(unmap[i].host_addr), false); | ||
345 | if (ret) { | ||
346 | pr_alert(DRV_PFX "Failed to remove M2P override for %lx\n", | ||
347 | (unsigned long)unmap[i].host_addr); | ||
348 | continue; | ||
349 | } | ||
350 | } | ||
351 | } | ||
352 | |||
353 | static int xen_blkbk_map(struct blkif_request *req, | ||
354 | struct pending_req *pending_req, | ||
355 | struct seg_buf seg[]) | ||
356 | { | ||
357 | struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | ||
358 | int i; | ||
359 | int nseg = req->nr_segments; | ||
360 | int ret = 0; | ||
361 | |||
362 | /* | ||
363 | * Fill out preq.nr_sects with proper amount of sectors, and setup | ||
364 | * assign map[..] with the PFN of the page in our domain with the | ||
365 | * corresponding grant reference for each page. | ||
366 | */ | ||
367 | for (i = 0; i < nseg; i++) { | ||
368 | uint32_t flags; | ||
369 | |||
370 | flags = GNTMAP_host_map; | ||
371 | if (pending_req->operation != BLKIF_OP_READ) | ||
372 | flags |= GNTMAP_readonly; | ||
373 | gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, | ||
374 | req->u.rw.seg[i].gref, | ||
375 | pending_req->blkif->domid); | ||
376 | } | ||
377 | |||
378 | ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); | ||
379 | BUG_ON(ret); | ||
380 | |||
381 | /* | ||
382 | * Now swizzle the MFN in our domain with the MFN from the other domain | ||
383 | * so that when we access vaddr(pending_req,i) it has the contents of | ||
384 | * the page from the other domain. | ||
385 | */ | ||
386 | for (i = 0; i < nseg; i++) { | ||
387 | if (unlikely(map[i].status != 0)) { | ||
388 | pr_debug(DRV_PFX "invalid buffer -- could not remap it\n"); | ||
389 | map[i].handle = BLKBACK_INVALID_HANDLE; | ||
390 | ret |= 1; | ||
391 | } | ||
392 | |||
393 | pending_handle(pending_req, i) = map[i].handle; | ||
394 | |||
395 | if (ret) | ||
396 | continue; | ||
397 | |||
398 | ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr), | ||
399 | blkbk->pending_page(pending_req, i), false); | ||
400 | if (ret) { | ||
401 | pr_alert(DRV_PFX "Failed to install M2P override for %lx (ret: %d)\n", | ||
402 | (unsigned long)map[i].dev_bus_addr, ret); | ||
403 | /* We could switch over to GNTTABOP_copy */ | ||
404 | continue; | ||
405 | } | ||
406 | |||
407 | seg[i].buf = map[i].dev_bus_addr | | ||
408 | (req->u.rw.seg[i].first_sect << 9); | ||
409 | } | ||
410 | return ret; | ||
411 | } | ||
412 | |||
413 | /* | ||
414 | * Completion callback on the bio's. Called as bh->b_end_io() | ||
415 | */ | ||
416 | |||
417 | static void __end_block_io_op(struct pending_req *pending_req, int error) | ||
418 | { | ||
419 | /* An error fails the entire request. */ | ||
420 | if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) && | ||
421 | (error == -EOPNOTSUPP)) { | ||
422 | pr_debug(DRV_PFX "flush diskcache op failed, not supported\n"); | ||
423 | xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0); | ||
424 | pending_req->status = BLKIF_RSP_EOPNOTSUPP; | ||
425 | } else if (error) { | ||
426 | pr_debug(DRV_PFX "Buffer not up-to-date at end of operation," | ||
427 | " error=%d\n", error); | ||
428 | pending_req->status = BLKIF_RSP_ERROR; | ||
429 | } | ||
430 | |||
431 | /* | ||
432 | * If all of the bio's have completed it is time to unmap | ||
433 | * the grant references associated with 'request' and provide | ||
434 | * the proper response on the ring. | ||
435 | */ | ||
436 | if (atomic_dec_and_test(&pending_req->pendcnt)) { | ||
437 | xen_blkbk_unmap(pending_req); | ||
438 | make_response(pending_req->blkif, pending_req->id, | ||
439 | pending_req->operation, pending_req->status); | ||
440 | xen_blkif_put(pending_req->blkif); | ||
441 | free_req(pending_req); | ||
442 | } | ||
443 | } | ||
444 | |||
445 | /* | ||
446 | * bio callback. | ||
447 | */ | ||
448 | static void end_block_io_op(struct bio *bio, int error) | ||
449 | { | ||
450 | __end_block_io_op(bio->bi_private, error); | ||
451 | bio_put(bio); | ||
452 | } | ||
453 | |||
454 | |||
455 | |||
456 | /* | ||
457 | * Function to copy the from the ring buffer the 'struct blkif_request' | ||
458 | * (which has the sectors we want, number of them, grant references, etc), | ||
459 | * and transmute it to the block API to hand it over to the proper block disk. | ||
460 | */ | ||
461 | static int do_block_io_op(struct xen_blkif *blkif) | ||
462 | { | ||
463 | union blkif_back_rings *blk_rings = &blkif->blk_rings; | ||
464 | struct blkif_request req; | ||
465 | struct pending_req *pending_req; | ||
466 | RING_IDX rc, rp; | ||
467 | int more_to_do = 0; | ||
468 | |||
469 | rc = blk_rings->common.req_cons; | ||
470 | rp = blk_rings->common.sring->req_prod; | ||
471 | rmb(); /* Ensure we see queued requests up to 'rp'. */ | ||
472 | |||
473 | while (rc != rp) { | ||
474 | |||
475 | if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) | ||
476 | break; | ||
477 | |||
478 | if (kthread_should_stop()) { | ||
479 | more_to_do = 1; | ||
480 | break; | ||
481 | } | ||
482 | |||
483 | pending_req = alloc_req(); | ||
484 | if (NULL == pending_req) { | ||
485 | blkif->st_oo_req++; | ||
486 | more_to_do = 1; | ||
487 | break; | ||
488 | } | ||
489 | |||
490 | switch (blkif->blk_protocol) { | ||
491 | case BLKIF_PROTOCOL_NATIVE: | ||
492 | memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req)); | ||
493 | break; | ||
494 | case BLKIF_PROTOCOL_X86_32: | ||
495 | blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc)); | ||
496 | break; | ||
497 | case BLKIF_PROTOCOL_X86_64: | ||
498 | blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc)); | ||
499 | break; | ||
500 | default: | ||
501 | BUG(); | ||
502 | } | ||
503 | blk_rings->common.req_cons = ++rc; /* before make_response() */ | ||
504 | |||
505 | /* Apply all sanity checks to /private copy/ of request. */ | ||
506 | barrier(); | ||
507 | |||
508 | if (dispatch_rw_block_io(blkif, &req, pending_req)) | ||
509 | break; | ||
510 | |||
511 | /* Yield point for this unbounded loop. */ | ||
512 | cond_resched(); | ||
513 | } | ||
514 | |||
515 | return more_to_do; | ||
516 | } | ||
517 | |||
518 | /* | ||
519 | * Transmutation of the 'struct blkif_request' to a proper 'struct bio' | ||
520 | * and call the 'submit_bio' to pass it to the underlying storage. | ||
521 | */ | ||
522 | static int dispatch_rw_block_io(struct xen_blkif *blkif, | ||
523 | struct blkif_request *req, | ||
524 | struct pending_req *pending_req) | ||
525 | { | ||
526 | struct phys_req preq; | ||
527 | struct seg_buf seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | ||
528 | unsigned int nseg; | ||
529 | struct bio *bio = NULL; | ||
530 | struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | ||
531 | int i, nbio = 0; | ||
532 | int operation; | ||
533 | struct blk_plug plug; | ||
534 | |||
535 | switch (req->operation) { | ||
536 | case BLKIF_OP_READ: | ||
537 | blkif->st_rd_req++; | ||
538 | operation = READ; | ||
539 | break; | ||
540 | case BLKIF_OP_WRITE: | ||
541 | blkif->st_wr_req++; | ||
542 | operation = WRITE_ODIRECT; | ||
543 | break; | ||
544 | case BLKIF_OP_FLUSH_DISKCACHE: | ||
545 | blkif->st_f_req++; | ||
546 | operation = WRITE_FLUSH; | ||
547 | break; | ||
548 | case BLKIF_OP_WRITE_BARRIER: | ||
549 | default: | ||
550 | operation = 0; /* make gcc happy */ | ||
551 | goto fail_response; | ||
552 | break; | ||
553 | } | ||
554 | |||
555 | /* Check that the number of segments is sane. */ | ||
556 | nseg = req->nr_segments; | ||
557 | if (unlikely(nseg == 0 && operation != WRITE_FLUSH) || | ||
558 | unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { | ||
559 | pr_debug(DRV_PFX "Bad number of segments in request (%d)\n", | ||
560 | nseg); | ||
561 | /* Haven't submitted any bio's yet. */ | ||
562 | goto fail_response; | ||
563 | } | ||
564 | |||
565 | preq.dev = req->handle; | ||
566 | preq.sector_number = req->u.rw.sector_number; | ||
567 | preq.nr_sects = 0; | ||
568 | |||
569 | pending_req->blkif = blkif; | ||
570 | pending_req->id = req->id; | ||
571 | pending_req->operation = req->operation; | ||
572 | pending_req->status = BLKIF_RSP_OKAY; | ||
573 | pending_req->nr_pages = nseg; | ||
574 | |||
575 | for (i = 0; i < nseg; i++) { | ||
576 | seg[i].nsec = req->u.rw.seg[i].last_sect - | ||
577 | req->u.rw.seg[i].first_sect + 1; | ||
578 | if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) || | ||
579 | (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect)) | ||
580 | goto fail_response; | ||
581 | preq.nr_sects += seg[i].nsec; | ||
582 | |||
583 | } | ||
584 | |||
585 | if (xen_vbd_translate(&preq, blkif, operation) != 0) { | ||
586 | pr_debug(DRV_PFX "access denied: %s of [%llu,%llu] on dev=%04x\n", | ||
587 | operation == READ ? "read" : "write", | ||
588 | preq.sector_number, | ||
589 | preq.sector_number + preq.nr_sects, preq.dev); | ||
590 | goto fail_response; | ||
591 | } | ||
592 | |||
593 | /* | ||
594 | * This check _MUST_ be done after xen_vbd_translate as the preq.bdev | ||
595 | * is set there. | ||
596 | */ | ||
597 | for (i = 0; i < nseg; i++) { | ||
598 | if (((int)preq.sector_number|(int)seg[i].nsec) & | ||
599 | ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { | ||
600 | pr_debug(DRV_PFX "Misaligned I/O request from domain %d", | ||
601 | blkif->domid); | ||
602 | goto fail_response; | ||
603 | } | ||
604 | } | ||
605 | |||
606 | /* | ||
607 | * If we have failed at this point, we need to undo the M2P override, | ||
608 | * set gnttab_set_unmap_op on all of the grant references and perform | ||
609 | * the hypercall to unmap the grants - that is all done in | ||
610 | * xen_blkbk_unmap. | ||
611 | */ | ||
612 | if (xen_blkbk_map(req, pending_req, seg)) | ||
613 | goto fail_flush; | ||
614 | |||
615 | /* This corresponding xen_blkif_put is done in __end_block_io_op */ | ||
616 | xen_blkif_get(blkif); | ||
617 | |||
618 | for (i = 0; i < nseg; i++) { | ||
619 | while ((bio == NULL) || | ||
620 | (bio_add_page(bio, | ||
621 | blkbk->pending_page(pending_req, i), | ||
622 | seg[i].nsec << 9, | ||
623 | seg[i].buf & ~PAGE_MASK) == 0)) { | ||
624 | |||
625 | bio = bio_alloc(GFP_KERNEL, nseg-i); | ||
626 | if (unlikely(bio == NULL)) | ||
627 | goto fail_put_bio; | ||
628 | |||
629 | biolist[nbio++] = bio; | ||
630 | bio->bi_bdev = preq.bdev; | ||
631 | bio->bi_private = pending_req; | ||
632 | bio->bi_end_io = end_block_io_op; | ||
633 | bio->bi_sector = preq.sector_number; | ||
634 | } | ||
635 | |||
636 | preq.sector_number += seg[i].nsec; | ||
637 | } | ||
638 | |||
639 | /* This will be hit if the operation was a flush. */ | ||
640 | if (!bio) { | ||
641 | BUG_ON(operation != WRITE_FLUSH); | ||
642 | |||
643 | bio = bio_alloc(GFP_KERNEL, 0); | ||
644 | if (unlikely(bio == NULL)) | ||
645 | goto fail_put_bio; | ||
646 | |||
647 | biolist[nbio++] = bio; | ||
648 | bio->bi_bdev = preq.bdev; | ||
649 | bio->bi_private = pending_req; | ||
650 | bio->bi_end_io = end_block_io_op; | ||
651 | } | ||
652 | |||
653 | /* | ||
654 | * We set it one so that the last submit_bio does not have to call | ||
655 | * atomic_inc. | ||
656 | */ | ||
657 | atomic_set(&pending_req->pendcnt, nbio); | ||
658 | |||
659 | /* Get a reference count for the disk queue and start sending I/O */ | ||
660 | blk_start_plug(&plug); | ||
661 | |||
662 | for (i = 0; i < nbio; i++) | ||
663 | submit_bio(operation, biolist[i]); | ||
664 | |||
665 | /* Let the I/Os go.. */ | ||
666 | blk_finish_plug(&plug); | ||
667 | |||
668 | if (operation == READ) | ||
669 | blkif->st_rd_sect += preq.nr_sects; | ||
670 | else if (operation == WRITE || operation == WRITE_FLUSH) | ||
671 | blkif->st_wr_sect += preq.nr_sects; | ||
672 | |||
673 | return 0; | ||
674 | |||
675 | fail_flush: | ||
676 | xen_blkbk_unmap(pending_req); | ||
677 | fail_response: | ||
678 | /* Haven't submitted any bio's yet. */ | ||
679 | make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); | ||
680 | free_req(pending_req); | ||
681 | msleep(1); /* back off a bit */ | ||
682 | return -EIO; | ||
683 | |||
684 | fail_put_bio: | ||
685 | for (i = 0; i < nbio; i++) | ||
686 | bio_put(biolist[i]); | ||
687 | __end_block_io_op(pending_req, -EINVAL); | ||
688 | msleep(1); /* back off a bit */ | ||
689 | return -EIO; | ||
690 | } | ||
691 | |||
692 | |||
693 | |||
694 | /* | ||
695 | * Put a response on the ring on how the operation fared. | ||
696 | */ | ||
697 | static void make_response(struct xen_blkif *blkif, u64 id, | ||
698 | unsigned short op, int st) | ||
699 | { | ||
700 | struct blkif_response resp; | ||
701 | unsigned long flags; | ||
702 | union blkif_back_rings *blk_rings = &blkif->blk_rings; | ||
703 | int more_to_do = 0; | ||
704 | int notify; | ||
705 | |||
706 | resp.id = id; | ||
707 | resp.operation = op; | ||
708 | resp.status = st; | ||
709 | |||
710 | spin_lock_irqsave(&blkif->blk_ring_lock, flags); | ||
711 | /* Place on the response ring for the relevant domain. */ | ||
712 | switch (blkif->blk_protocol) { | ||
713 | case BLKIF_PROTOCOL_NATIVE: | ||
714 | memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt), | ||
715 | &resp, sizeof(resp)); | ||
716 | break; | ||
717 | case BLKIF_PROTOCOL_X86_32: | ||
718 | memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt), | ||
719 | &resp, sizeof(resp)); | ||
720 | break; | ||
721 | case BLKIF_PROTOCOL_X86_64: | ||
722 | memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt), | ||
723 | &resp, sizeof(resp)); | ||
724 | break; | ||
725 | default: | ||
726 | BUG(); | ||
727 | } | ||
728 | blk_rings->common.rsp_prod_pvt++; | ||
729 | RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify); | ||
730 | if (blk_rings->common.rsp_prod_pvt == blk_rings->common.req_cons) { | ||
731 | /* | ||
732 | * Tail check for pending requests. Allows frontend to avoid | ||
733 | * notifications if requests are already in flight (lower | ||
734 | * overheads and promotes batching). | ||
735 | */ | ||
736 | RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do); | ||
737 | |||
738 | } else if (RING_HAS_UNCONSUMED_REQUESTS(&blk_rings->common)) { | ||
739 | more_to_do = 1; | ||
740 | } | ||
741 | |||
742 | spin_unlock_irqrestore(&blkif->blk_ring_lock, flags); | ||
743 | |||
744 | if (more_to_do) | ||
745 | blkif_notify_work(blkif); | ||
746 | if (notify) | ||
747 | notify_remote_via_irq(blkif->irq); | ||
748 | } | ||
749 | |||
750 | static int __init xen_blkif_init(void) | ||
751 | { | ||
752 | int i, mmap_pages; | ||
753 | int rc = 0; | ||
754 | |||
755 | if (!xen_pv_domain()) | ||
756 | return -ENODEV; | ||
757 | |||
758 | blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL); | ||
759 | if (!blkbk) { | ||
760 | pr_alert(DRV_PFX "%s: out of memory!\n", __func__); | ||
761 | return -ENOMEM; | ||
762 | } | ||
763 | |||
764 | mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; | ||
765 | |||
766 | blkbk->pending_reqs = kmalloc(sizeof(blkbk->pending_reqs[0]) * | ||
767 | xen_blkif_reqs, GFP_KERNEL); | ||
768 | blkbk->pending_grant_handles = kzalloc(sizeof(blkbk->pending_grant_handles[0]) * | ||
769 | mmap_pages, GFP_KERNEL); | ||
770 | blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) * | ||
771 | mmap_pages, GFP_KERNEL); | ||
772 | |||
773 | if (!blkbk->pending_reqs || !blkbk->pending_grant_handles || | ||
774 | !blkbk->pending_pages) { | ||
775 | rc = -ENOMEM; | ||
776 | goto out_of_memory; | ||
777 | } | ||
778 | |||
779 | for (i = 0; i < mmap_pages; i++) { | ||
780 | blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; | ||
781 | blkbk->pending_pages[i] = alloc_page(GFP_KERNEL); | ||
782 | if (blkbk->pending_pages[i] == NULL) { | ||
783 | rc = -ENOMEM; | ||
784 | goto out_of_memory; | ||
785 | } | ||
786 | } | ||
787 | rc = xen_blkif_interface_init(); | ||
788 | if (rc) | ||
789 | goto failed_init; | ||
790 | |||
791 | memset(blkbk->pending_reqs, 0, sizeof(blkbk->pending_reqs)); | ||
792 | |||
793 | INIT_LIST_HEAD(&blkbk->pending_free); | ||
794 | spin_lock_init(&blkbk->pending_free_lock); | ||
795 | init_waitqueue_head(&blkbk->pending_free_wq); | ||
796 | |||
797 | for (i = 0; i < xen_blkif_reqs; i++) | ||
798 | list_add_tail(&blkbk->pending_reqs[i].free_list, | ||
799 | &blkbk->pending_free); | ||
800 | |||
801 | rc = xen_blkif_xenbus_init(); | ||
802 | if (rc) | ||
803 | goto failed_init; | ||
804 | |||
805 | return 0; | ||
806 | |||
807 | out_of_memory: | ||
808 | pr_alert(DRV_PFX "%s: out of memory\n", __func__); | ||
809 | failed_init: | ||
810 | kfree(blkbk->pending_reqs); | ||
811 | kfree(blkbk->pending_grant_handles); | ||
812 | for (i = 0; i < mmap_pages; i++) { | ||
813 | if (blkbk->pending_pages[i]) | ||
814 | __free_page(blkbk->pending_pages[i]); | ||
815 | } | ||
816 | kfree(blkbk->pending_pages); | ||
817 | kfree(blkbk); | ||
818 | blkbk = NULL; | ||
819 | return rc; | ||
820 | } | ||
821 | |||
822 | module_init(xen_blkif_init); | ||
823 | |||
824 | MODULE_LICENSE("Dual BSD/GPL"); | ||
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h new file mode 100644 index 000000000000..9e40b283a468 --- /dev/null +++ b/drivers/block/xen-blkback/common.h | |||
@@ -0,0 +1,233 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or | ||
3 | * modify it under the terms of the GNU General Public License version 2 | ||
4 | * as published by the Free Software Foundation; or, when distributed | ||
5 | * separately from the Linux kernel or incorporated into other | ||
6 | * software packages, subject to the following license: | ||
7 | * | ||
8 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
9 | * of this source file (the "Software"), to deal in the Software without | ||
10 | * restriction, including without limitation the rights to use, copy, modify, | ||
11 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
12 | * and to permit persons to whom the Software is furnished to do so, subject to | ||
13 | * the following conditions: | ||
14 | * | ||
15 | * The above copyright notice and this permission notice shall be included in | ||
16 | * all copies or substantial portions of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
20 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
21 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
22 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
24 | * IN THE SOFTWARE. | ||
25 | */ | ||
26 | |||
27 | #ifndef __XEN_BLKIF__BACKEND__COMMON_H__ | ||
28 | #define __XEN_BLKIF__BACKEND__COMMON_H__ | ||
29 | |||
30 | #include <linux/version.h> | ||
31 | #include <linux/module.h> | ||
32 | #include <linux/interrupt.h> | ||
33 | #include <linux/slab.h> | ||
34 | #include <linux/blkdev.h> | ||
35 | #include <linux/vmalloc.h> | ||
36 | #include <linux/wait.h> | ||
37 | #include <linux/io.h> | ||
38 | #include <asm/setup.h> | ||
39 | #include <asm/pgalloc.h> | ||
40 | #include <asm/hypervisor.h> | ||
41 | #include <xen/grant_table.h> | ||
42 | #include <xen/xenbus.h> | ||
43 | #include <xen/interface/io/ring.h> | ||
44 | #include <xen/interface/io/blkif.h> | ||
45 | #include <xen/interface/io/protocols.h> | ||
46 | |||
47 | #define DRV_PFX "xen-blkback:" | ||
48 | #define DPRINTK(fmt, args...) \ | ||
49 | pr_debug(DRV_PFX "(%s:%d) " fmt ".\n", \ | ||
50 | __func__, __LINE__, ##args) | ||
51 | |||
52 | |||
53 | /* Not a real protocol. Used to generate ring structs which contain | ||
54 | * the elements common to all protocols only. This way we get a | ||
55 | * compiler-checkable way to use common struct elements, so we can | ||
56 | * avoid using switch(protocol) in a number of places. */ | ||
57 | struct blkif_common_request { | ||
58 | char dummy; | ||
59 | }; | ||
60 | struct blkif_common_response { | ||
61 | char dummy; | ||
62 | }; | ||
63 | |||
64 | /* i386 protocol version */ | ||
65 | #pragma pack(push, 4) | ||
66 | struct blkif_x86_32_request { | ||
67 | uint8_t operation; /* BLKIF_OP_??? */ | ||
68 | uint8_t nr_segments; /* number of segments */ | ||
69 | blkif_vdev_t handle; /* only for read/write requests */ | ||
70 | uint64_t id; /* private guest value, echoed in resp */ | ||
71 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ | ||
72 | struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | ||
73 | }; | ||
74 | struct blkif_x86_32_response { | ||
75 | uint64_t id; /* copied from request */ | ||
76 | uint8_t operation; /* copied from request */ | ||
77 | int16_t status; /* BLKIF_RSP_??? */ | ||
78 | }; | ||
79 | #pragma pack(pop) | ||
80 | |||
81 | /* x86_64 protocol version */ | ||
82 | struct blkif_x86_64_request { | ||
83 | uint8_t operation; /* BLKIF_OP_??? */ | ||
84 | uint8_t nr_segments; /* number of segments */ | ||
85 | blkif_vdev_t handle; /* only for read/write requests */ | ||
86 | uint64_t __attribute__((__aligned__(8))) id; | ||
87 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ | ||
88 | struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | ||
89 | }; | ||
90 | struct blkif_x86_64_response { | ||
91 | uint64_t __attribute__((__aligned__(8))) id; | ||
92 | uint8_t operation; /* copied from request */ | ||
93 | int16_t status; /* BLKIF_RSP_??? */ | ||
94 | }; | ||
95 | |||
96 | DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, | ||
97 | struct blkif_common_response); | ||
98 | DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, | ||
99 | struct blkif_x86_32_response); | ||
100 | DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request, | ||
101 | struct blkif_x86_64_response); | ||
102 | |||
103 | union blkif_back_rings { | ||
104 | struct blkif_back_ring native; | ||
105 | struct blkif_common_back_ring common; | ||
106 | struct blkif_x86_32_back_ring x86_32; | ||
107 | struct blkif_x86_64_back_ring x86_64; | ||
108 | }; | ||
109 | |||
110 | enum blkif_protocol { | ||
111 | BLKIF_PROTOCOL_NATIVE = 1, | ||
112 | BLKIF_PROTOCOL_X86_32 = 2, | ||
113 | BLKIF_PROTOCOL_X86_64 = 3, | ||
114 | }; | ||
115 | |||
116 | struct xen_vbd { | ||
117 | /* What the domain refers to this vbd as. */ | ||
118 | blkif_vdev_t handle; | ||
119 | /* Non-zero -> read-only */ | ||
120 | unsigned char readonly; | ||
121 | /* VDISK_xxx */ | ||
122 | unsigned char type; | ||
123 | /* phys device that this vbd maps to. */ | ||
124 | u32 pdevice; | ||
125 | struct block_device *bdev; | ||
126 | /* Cached size parameter. */ | ||
127 | sector_t size; | ||
128 | bool flush_support; | ||
129 | }; | ||
130 | |||
131 | struct backend_info; | ||
132 | |||
133 | struct xen_blkif { | ||
134 | /* Unique identifier for this interface. */ | ||
135 | domid_t domid; | ||
136 | unsigned int handle; | ||
137 | /* Physical parameters of the comms window. */ | ||
138 | unsigned int irq; | ||
139 | /* Comms information. */ | ||
140 | enum blkif_protocol blk_protocol; | ||
141 | union blkif_back_rings blk_rings; | ||
142 | struct vm_struct *blk_ring_area; | ||
143 | /* The VBD attached to this interface. */ | ||
144 | struct xen_vbd vbd; | ||
145 | /* Back pointer to the backend_info. */ | ||
146 | struct backend_info *be; | ||
147 | /* Private fields. */ | ||
148 | spinlock_t blk_ring_lock; | ||
149 | atomic_t refcnt; | ||
150 | |||
151 | wait_queue_head_t wq; | ||
152 | /* One thread per one blkif. */ | ||
153 | struct task_struct *xenblkd; | ||
154 | unsigned int waiting_reqs; | ||
155 | |||
156 | /* statistics */ | ||
157 | unsigned long st_print; | ||
158 | int st_rd_req; | ||
159 | int st_wr_req; | ||
160 | int st_oo_req; | ||
161 | int st_f_req; | ||
162 | int st_rd_sect; | ||
163 | int st_wr_sect; | ||
164 | |||
165 | wait_queue_head_t waiting_to_free; | ||
166 | |||
167 | grant_handle_t shmem_handle; | ||
168 | grant_ref_t shmem_ref; | ||
169 | }; | ||
170 | |||
171 | |||
172 | #define vbd_sz(_v) ((_v)->bdev->bd_part ? \ | ||
173 | (_v)->bdev->bd_part->nr_sects : \ | ||
174 | get_capacity((_v)->bdev->bd_disk)) | ||
175 | |||
176 | #define xen_blkif_get(_b) (atomic_inc(&(_b)->refcnt)) | ||
177 | #define xen_blkif_put(_b) \ | ||
178 | do { \ | ||
179 | if (atomic_dec_and_test(&(_b)->refcnt)) \ | ||
180 | wake_up(&(_b)->waiting_to_free);\ | ||
181 | } while (0) | ||
182 | |||
183 | struct phys_req { | ||
184 | unsigned short dev; | ||
185 | unsigned short nr_sects; | ||
186 | struct block_device *bdev; | ||
187 | blkif_sector_t sector_number; | ||
188 | }; | ||
189 | int xen_blkif_interface_init(void); | ||
190 | |||
191 | int xen_blkif_xenbus_init(void); | ||
192 | |||
193 | irqreturn_t xen_blkif_be_int(int irq, void *dev_id); | ||
194 | int xen_blkif_schedule(void *arg); | ||
195 | |||
196 | int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, | ||
197 | struct backend_info *be, int state); | ||
198 | |||
199 | struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be); | ||
200 | |||
201 | static inline void blkif_get_x86_32_req(struct blkif_request *dst, | ||
202 | struct blkif_x86_32_request *src) | ||
203 | { | ||
204 | int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; | ||
205 | dst->operation = src->operation; | ||
206 | dst->nr_segments = src->nr_segments; | ||
207 | dst->handle = src->handle; | ||
208 | dst->id = src->id; | ||
209 | dst->u.rw.sector_number = src->sector_number; | ||
210 | barrier(); | ||
211 | if (n > dst->nr_segments) | ||
212 | n = dst->nr_segments; | ||
213 | for (i = 0; i < n; i++) | ||
214 | dst->u.rw.seg[i] = src->seg[i]; | ||
215 | } | ||
216 | |||
217 | static inline void blkif_get_x86_64_req(struct blkif_request *dst, | ||
218 | struct blkif_x86_64_request *src) | ||
219 | { | ||
220 | int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; | ||
221 | dst->operation = src->operation; | ||
222 | dst->nr_segments = src->nr_segments; | ||
223 | dst->handle = src->handle; | ||
224 | dst->id = src->id; | ||
225 | dst->u.rw.sector_number = src->sector_number; | ||
226 | barrier(); | ||
227 | if (n > dst->nr_segments) | ||
228 | n = dst->nr_segments; | ||
229 | for (i = 0; i < n; i++) | ||
230 | dst->u.rw.seg[i] = src->seg[i]; | ||
231 | } | ||
232 | |||
233 | #endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */ | ||
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c new file mode 100644 index 000000000000..34570823355b --- /dev/null +++ b/drivers/block/xen-blkback/xenbus.c | |||
@@ -0,0 +1,768 @@ | |||
1 | /* Xenbus code for blkif backend | ||
2 | Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au> | ||
3 | Copyright (C) 2005 XenSource Ltd | ||
4 | |||
5 | This program is free software; you can redistribute it and/or modify | ||
6 | it under the terms of the GNU General Public License as published by | ||
7 | the Free Software Foundation; either version 2 of the License, or | ||
8 | (at your option) any later version. | ||
9 | |||
10 | This program is distributed in the hope that it will be useful, | ||
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | GNU General Public License for more details. | ||
14 | |||
15 | */ | ||
16 | |||
17 | #include <stdarg.h> | ||
18 | #include <linux/module.h> | ||
19 | #include <linux/kthread.h> | ||
20 | #include <xen/events.h> | ||
21 | #include <xen/grant_table.h> | ||
22 | #include "common.h" | ||
23 | |||
24 | struct backend_info { | ||
25 | struct xenbus_device *dev; | ||
26 | struct xen_blkif *blkif; | ||
27 | struct xenbus_watch backend_watch; | ||
28 | unsigned major; | ||
29 | unsigned minor; | ||
30 | char *mode; | ||
31 | }; | ||
32 | |||
33 | static struct kmem_cache *xen_blkif_cachep; | ||
34 | static void connect(struct backend_info *); | ||
35 | static int connect_ring(struct backend_info *); | ||
36 | static void backend_changed(struct xenbus_watch *, const char **, | ||
37 | unsigned int); | ||
38 | |||
39 | struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be) | ||
40 | { | ||
41 | return be->dev; | ||
42 | } | ||
43 | |||
44 | static int blkback_name(struct xen_blkif *blkif, char *buf) | ||
45 | { | ||
46 | char *devpath, *devname; | ||
47 | struct xenbus_device *dev = blkif->be->dev; | ||
48 | |||
49 | devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL); | ||
50 | if (IS_ERR(devpath)) | ||
51 | return PTR_ERR(devpath); | ||
52 | |||
53 | devname = strstr(devpath, "/dev/"); | ||
54 | if (devname != NULL) | ||
55 | devname += strlen("/dev/"); | ||
56 | else | ||
57 | devname = devpath; | ||
58 | |||
59 | snprintf(buf, TASK_COMM_LEN, "blkback.%d.%s", blkif->domid, devname); | ||
60 | kfree(devpath); | ||
61 | |||
62 | return 0; | ||
63 | } | ||
64 | |||
65 | static void xen_update_blkif_status(struct xen_blkif *blkif) | ||
66 | { | ||
67 | int err; | ||
68 | char name[TASK_COMM_LEN]; | ||
69 | |||
70 | /* Not ready to connect? */ | ||
71 | if (!blkif->irq || !blkif->vbd.bdev) | ||
72 | return; | ||
73 | |||
74 | /* Already connected? */ | ||
75 | if (blkif->be->dev->state == XenbusStateConnected) | ||
76 | return; | ||
77 | |||
78 | /* Attempt to connect: exit if we fail to. */ | ||
79 | connect(blkif->be); | ||
80 | if (blkif->be->dev->state != XenbusStateConnected) | ||
81 | return; | ||
82 | |||
83 | err = blkback_name(blkif, name); | ||
84 | if (err) { | ||
85 | xenbus_dev_error(blkif->be->dev, err, "get blkback dev name"); | ||
86 | return; | ||
87 | } | ||
88 | |||
89 | err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping); | ||
90 | if (err) { | ||
91 | xenbus_dev_error(blkif->be->dev, err, "block flush"); | ||
92 | return; | ||
93 | } | ||
94 | invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping); | ||
95 | |||
96 | blkif->xenblkd = kthread_run(xen_blkif_schedule, blkif, name); | ||
97 | if (IS_ERR(blkif->xenblkd)) { | ||
98 | err = PTR_ERR(blkif->xenblkd); | ||
99 | blkif->xenblkd = NULL; | ||
100 | xenbus_dev_error(blkif->be->dev, err, "start xenblkd"); | ||
101 | } | ||
102 | } | ||
103 | |||
104 | static struct xen_blkif *xen_blkif_alloc(domid_t domid) | ||
105 | { | ||
106 | struct xen_blkif *blkif; | ||
107 | |||
108 | blkif = kmem_cache_alloc(xen_blkif_cachep, GFP_KERNEL); | ||
109 | if (!blkif) | ||
110 | return ERR_PTR(-ENOMEM); | ||
111 | |||
112 | memset(blkif, 0, sizeof(*blkif)); | ||
113 | blkif->domid = domid; | ||
114 | spin_lock_init(&blkif->blk_ring_lock); | ||
115 | atomic_set(&blkif->refcnt, 1); | ||
116 | init_waitqueue_head(&blkif->wq); | ||
117 | blkif->st_print = jiffies; | ||
118 | init_waitqueue_head(&blkif->waiting_to_free); | ||
119 | |||
120 | return blkif; | ||
121 | } | ||
122 | |||
123 | static int map_frontend_page(struct xen_blkif *blkif, unsigned long shared_page) | ||
124 | { | ||
125 | struct gnttab_map_grant_ref op; | ||
126 | |||
127 | gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr, | ||
128 | GNTMAP_host_map, shared_page, blkif->domid); | ||
129 | |||
130 | if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) | ||
131 | BUG(); | ||
132 | |||
133 | if (op.status) { | ||
134 | DPRINTK("Grant table operation failure !\n"); | ||
135 | return op.status; | ||
136 | } | ||
137 | |||
138 | blkif->shmem_ref = shared_page; | ||
139 | blkif->shmem_handle = op.handle; | ||
140 | |||
141 | return 0; | ||
142 | } | ||
143 | |||
144 | static void unmap_frontend_page(struct xen_blkif *blkif) | ||
145 | { | ||
146 | struct gnttab_unmap_grant_ref op; | ||
147 | |||
148 | gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr, | ||
149 | GNTMAP_host_map, blkif->shmem_handle); | ||
150 | |||
151 | if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) | ||
152 | BUG(); | ||
153 | } | ||
154 | |||
155 | static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, | ||
156 | unsigned int evtchn) | ||
157 | { | ||
158 | int err; | ||
159 | |||
160 | /* Already connected through? */ | ||
161 | if (blkif->irq) | ||
162 | return 0; | ||
163 | |||
164 | blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE); | ||
165 | if (!blkif->blk_ring_area) | ||
166 | return -ENOMEM; | ||
167 | |||
168 | err = map_frontend_page(blkif, shared_page); | ||
169 | if (err) { | ||
170 | free_vm_area(blkif->blk_ring_area); | ||
171 | return err; | ||
172 | } | ||
173 | |||
174 | switch (blkif->blk_protocol) { | ||
175 | case BLKIF_PROTOCOL_NATIVE: | ||
176 | { | ||
177 | struct blkif_sring *sring; | ||
178 | sring = (struct blkif_sring *)blkif->blk_ring_area->addr; | ||
179 | BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); | ||
180 | break; | ||
181 | } | ||
182 | case BLKIF_PROTOCOL_X86_32: | ||
183 | { | ||
184 | struct blkif_x86_32_sring *sring_x86_32; | ||
185 | sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring_area->addr; | ||
186 | BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); | ||
187 | break; | ||
188 | } | ||
189 | case BLKIF_PROTOCOL_X86_64: | ||
190 | { | ||
191 | struct blkif_x86_64_sring *sring_x86_64; | ||
192 | sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring_area->addr; | ||
193 | BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); | ||
194 | break; | ||
195 | } | ||
196 | default: | ||
197 | BUG(); | ||
198 | } | ||
199 | |||
200 | err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn, | ||
201 | xen_blkif_be_int, 0, | ||
202 | "blkif-backend", blkif); | ||
203 | if (err < 0) { | ||
204 | unmap_frontend_page(blkif); | ||
205 | free_vm_area(blkif->blk_ring_area); | ||
206 | blkif->blk_rings.common.sring = NULL; | ||
207 | return err; | ||
208 | } | ||
209 | blkif->irq = err; | ||
210 | |||
211 | return 0; | ||
212 | } | ||
213 | |||
214 | static void xen_blkif_disconnect(struct xen_blkif *blkif) | ||
215 | { | ||
216 | if (blkif->xenblkd) { | ||
217 | kthread_stop(blkif->xenblkd); | ||
218 | blkif->xenblkd = NULL; | ||
219 | } | ||
220 | |||
221 | atomic_dec(&blkif->refcnt); | ||
222 | wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0); | ||
223 | atomic_inc(&blkif->refcnt); | ||
224 | |||
225 | if (blkif->irq) { | ||
226 | unbind_from_irqhandler(blkif->irq, blkif); | ||
227 | blkif->irq = 0; | ||
228 | } | ||
229 | |||
230 | if (blkif->blk_rings.common.sring) { | ||
231 | unmap_frontend_page(blkif); | ||
232 | free_vm_area(blkif->blk_ring_area); | ||
233 | blkif->blk_rings.common.sring = NULL; | ||
234 | } | ||
235 | } | ||
236 | |||
237 | void xen_blkif_free(struct xen_blkif *blkif) | ||
238 | { | ||
239 | if (!atomic_dec_and_test(&blkif->refcnt)) | ||
240 | BUG(); | ||
241 | kmem_cache_free(xen_blkif_cachep, blkif); | ||
242 | } | ||
243 | |||
244 | int __init xen_blkif_interface_init(void) | ||
245 | { | ||
246 | xen_blkif_cachep = kmem_cache_create("blkif_cache", | ||
247 | sizeof(struct xen_blkif), | ||
248 | 0, 0, NULL); | ||
249 | if (!xen_blkif_cachep) | ||
250 | return -ENOMEM; | ||
251 | |||
252 | return 0; | ||
253 | } | ||
254 | |||
255 | /* | ||
256 | * sysfs interface for VBD I/O requests | ||
257 | */ | ||
258 | |||
259 | #define VBD_SHOW(name, format, args...) \ | ||
260 | static ssize_t show_##name(struct device *_dev, \ | ||
261 | struct device_attribute *attr, \ | ||
262 | char *buf) \ | ||
263 | { \ | ||
264 | struct xenbus_device *dev = to_xenbus_device(_dev); \ | ||
265 | struct backend_info *be = dev_get_drvdata(&dev->dev); \ | ||
266 | \ | ||
267 | return sprintf(buf, format, ##args); \ | ||
268 | } \ | ||
269 | static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) | ||
270 | |||
271 | VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req); | ||
272 | VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req); | ||
273 | VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req); | ||
274 | VBD_SHOW(f_req, "%d\n", be->blkif->st_f_req); | ||
275 | VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect); | ||
276 | VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect); | ||
277 | |||
278 | static struct attribute *xen_vbdstat_attrs[] = { | ||
279 | &dev_attr_oo_req.attr, | ||
280 | &dev_attr_rd_req.attr, | ||
281 | &dev_attr_wr_req.attr, | ||
282 | &dev_attr_f_req.attr, | ||
283 | &dev_attr_rd_sect.attr, | ||
284 | &dev_attr_wr_sect.attr, | ||
285 | NULL | ||
286 | }; | ||
287 | |||
288 | static struct attribute_group xen_vbdstat_group = { | ||
289 | .name = "statistics", | ||
290 | .attrs = xen_vbdstat_attrs, | ||
291 | }; | ||
292 | |||
293 | VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor); | ||
294 | VBD_SHOW(mode, "%s\n", be->mode); | ||
295 | |||
296 | int xenvbd_sysfs_addif(struct xenbus_device *dev) | ||
297 | { | ||
298 | int error; | ||
299 | |||
300 | error = device_create_file(&dev->dev, &dev_attr_physical_device); | ||
301 | if (error) | ||
302 | goto fail1; | ||
303 | |||
304 | error = device_create_file(&dev->dev, &dev_attr_mode); | ||
305 | if (error) | ||
306 | goto fail2; | ||
307 | |||
308 | error = sysfs_create_group(&dev->dev.kobj, &xen_vbdstat_group); | ||
309 | if (error) | ||
310 | goto fail3; | ||
311 | |||
312 | return 0; | ||
313 | |||
314 | fail3: sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group); | ||
315 | fail2: device_remove_file(&dev->dev, &dev_attr_mode); | ||
316 | fail1: device_remove_file(&dev->dev, &dev_attr_physical_device); | ||
317 | return error; | ||
318 | } | ||
319 | |||
320 | void xenvbd_sysfs_delif(struct xenbus_device *dev) | ||
321 | { | ||
322 | sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group); | ||
323 | device_remove_file(&dev->dev, &dev_attr_mode); | ||
324 | device_remove_file(&dev->dev, &dev_attr_physical_device); | ||
325 | } | ||
326 | |||
327 | |||
328 | static void xen_vbd_free(struct xen_vbd *vbd) | ||
329 | { | ||
330 | if (vbd->bdev) | ||
331 | blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE); | ||
332 | vbd->bdev = NULL; | ||
333 | } | ||
334 | |||
335 | static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, | ||
336 | unsigned major, unsigned minor, int readonly, | ||
337 | int cdrom) | ||
338 | { | ||
339 | struct xen_vbd *vbd; | ||
340 | struct block_device *bdev; | ||
341 | struct request_queue *q; | ||
342 | |||
343 | vbd = &blkif->vbd; | ||
344 | vbd->handle = handle; | ||
345 | vbd->readonly = readonly; | ||
346 | vbd->type = 0; | ||
347 | |||
348 | vbd->pdevice = MKDEV(major, minor); | ||
349 | |||
350 | bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ? | ||
351 | FMODE_READ : FMODE_WRITE, NULL); | ||
352 | |||
353 | if (IS_ERR(bdev)) { | ||
354 | DPRINTK("xen_vbd_create: device %08x could not be opened.\n", | ||
355 | vbd->pdevice); | ||
356 | return -ENOENT; | ||
357 | } | ||
358 | |||
359 | vbd->bdev = bdev; | ||
360 | vbd->size = vbd_sz(vbd); | ||
361 | |||
362 | if (vbd->bdev->bd_disk == NULL) { | ||
363 | DPRINTK("xen_vbd_create: device %08x doesn't exist.\n", | ||
364 | vbd->pdevice); | ||
365 | xen_vbd_free(vbd); | ||
366 | return -ENOENT; | ||
367 | } | ||
368 | |||
369 | if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom) | ||
370 | vbd->type |= VDISK_CDROM; | ||
371 | if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE) | ||
372 | vbd->type |= VDISK_REMOVABLE; | ||
373 | |||
374 | q = bdev_get_queue(bdev); | ||
375 | if (q && q->flush_flags) | ||
376 | vbd->flush_support = true; | ||
377 | |||
378 | DPRINTK("Successful creation of handle=%04x (dom=%u)\n", | ||
379 | handle, blkif->domid); | ||
380 | return 0; | ||
381 | } | ||
382 | static int xen_blkbk_remove(struct xenbus_device *dev) | ||
383 | { | ||
384 | struct backend_info *be = dev_get_drvdata(&dev->dev); | ||
385 | |||
386 | DPRINTK(""); | ||
387 | |||
388 | if (be->major || be->minor) | ||
389 | xenvbd_sysfs_delif(dev); | ||
390 | |||
391 | if (be->backend_watch.node) { | ||
392 | unregister_xenbus_watch(&be->backend_watch); | ||
393 | kfree(be->backend_watch.node); | ||
394 | be->backend_watch.node = NULL; | ||
395 | } | ||
396 | |||
397 | if (be->blkif) { | ||
398 | xen_blkif_disconnect(be->blkif); | ||
399 | xen_vbd_free(&be->blkif->vbd); | ||
400 | xen_blkif_free(be->blkif); | ||
401 | be->blkif = NULL; | ||
402 | } | ||
403 | |||
404 | kfree(be); | ||
405 | dev_set_drvdata(&dev->dev, NULL); | ||
406 | return 0; | ||
407 | } | ||
408 | |||
409 | int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, | ||
410 | struct backend_info *be, int state) | ||
411 | { | ||
412 | struct xenbus_device *dev = be->dev; | ||
413 | int err; | ||
414 | |||
415 | err = xenbus_printf(xbt, dev->nodename, "feature-flush-cache", | ||
416 | "%d", state); | ||
417 | if (err) | ||
418 | xenbus_dev_fatal(dev, err, "writing feature-flush-cache"); | ||
419 | |||
420 | return err; | ||
421 | } | ||
422 | |||
423 | /* | ||
424 | * Entry point to this code when a new device is created. Allocate the basic | ||
425 | * structures, and watch the store waiting for the hotplug scripts to tell us | ||
426 | * the device's physical major and minor numbers. Switch to InitWait. | ||
427 | */ | ||
428 | static int xen_blkbk_probe(struct xenbus_device *dev, | ||
429 | const struct xenbus_device_id *id) | ||
430 | { | ||
431 | int err; | ||
432 | struct backend_info *be = kzalloc(sizeof(struct backend_info), | ||
433 | GFP_KERNEL); | ||
434 | if (!be) { | ||
435 | xenbus_dev_fatal(dev, -ENOMEM, | ||
436 | "allocating backend structure"); | ||
437 | return -ENOMEM; | ||
438 | } | ||
439 | be->dev = dev; | ||
440 | dev_set_drvdata(&dev->dev, be); | ||
441 | |||
442 | be->blkif = xen_blkif_alloc(dev->otherend_id); | ||
443 | if (IS_ERR(be->blkif)) { | ||
444 | err = PTR_ERR(be->blkif); | ||
445 | be->blkif = NULL; | ||
446 | xenbus_dev_fatal(dev, err, "creating block interface"); | ||
447 | goto fail; | ||
448 | } | ||
449 | |||
450 | /* setup back pointer */ | ||
451 | be->blkif->be = be; | ||
452 | |||
453 | err = xenbus_watch_pathfmt(dev, &be->backend_watch, backend_changed, | ||
454 | "%s/%s", dev->nodename, "physical-device"); | ||
455 | if (err) | ||
456 | goto fail; | ||
457 | |||
458 | err = xenbus_switch_state(dev, XenbusStateInitWait); | ||
459 | if (err) | ||
460 | goto fail; | ||
461 | |||
462 | return 0; | ||
463 | |||
464 | fail: | ||
465 | DPRINTK("failed"); | ||
466 | xen_blkbk_remove(dev); | ||
467 | return err; | ||
468 | } | ||
469 | |||
470 | |||
471 | /* | ||
472 | * Callback received when the hotplug scripts have placed the physical-device | ||
473 | * node. Read it and the mode node, and create a vbd. If the frontend is | ||
474 | * ready, connect. | ||
475 | */ | ||
476 | static void backend_changed(struct xenbus_watch *watch, | ||
477 | const char **vec, unsigned int len) | ||
478 | { | ||
479 | int err; | ||
480 | unsigned major; | ||
481 | unsigned minor; | ||
482 | struct backend_info *be | ||
483 | = container_of(watch, struct backend_info, backend_watch); | ||
484 | struct xenbus_device *dev = be->dev; | ||
485 | int cdrom = 0; | ||
486 | char *device_type; | ||
487 | |||
488 | DPRINTK(""); | ||
489 | |||
490 | err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x", | ||
491 | &major, &minor); | ||
492 | if (XENBUS_EXIST_ERR(err)) { | ||
493 | /* | ||
494 | * Since this watch will fire once immediately after it is | ||
495 | * registered, we expect this. Ignore it, and wait for the | ||
496 | * hotplug scripts. | ||
497 | */ | ||
498 | return; | ||
499 | } | ||
500 | if (err != 2) { | ||
501 | xenbus_dev_fatal(dev, err, "reading physical-device"); | ||
502 | return; | ||
503 | } | ||
504 | |||
505 | if ((be->major || be->minor) && | ||
506 | ((be->major != major) || (be->minor != minor))) { | ||
507 | pr_warn(DRV_PFX "changing physical device (from %x:%x to %x:%x) not supported.\n", | ||
508 | be->major, be->minor, major, minor); | ||
509 | return; | ||
510 | } | ||
511 | |||
512 | be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL); | ||
513 | if (IS_ERR(be->mode)) { | ||
514 | err = PTR_ERR(be->mode); | ||
515 | be->mode = NULL; | ||
516 | xenbus_dev_fatal(dev, err, "reading mode"); | ||
517 | return; | ||
518 | } | ||
519 | |||
520 | device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL); | ||
521 | if (!IS_ERR(device_type)) { | ||
522 | cdrom = strcmp(device_type, "cdrom") == 0; | ||
523 | kfree(device_type); | ||
524 | } | ||
525 | |||
526 | if (be->major == 0 && be->minor == 0) { | ||
527 | /* Front end dir is a number, which is used as the handle. */ | ||
528 | |||
529 | char *p = strrchr(dev->otherend, '/') + 1; | ||
530 | long handle; | ||
531 | err = strict_strtoul(p, 0, &handle); | ||
532 | if (err) | ||
533 | return; | ||
534 | |||
535 | be->major = major; | ||
536 | be->minor = minor; | ||
537 | |||
538 | err = xen_vbd_create(be->blkif, handle, major, minor, | ||
539 | (NULL == strchr(be->mode, 'w')), cdrom); | ||
540 | if (err) { | ||
541 | be->major = 0; | ||
542 | be->minor = 0; | ||
543 | xenbus_dev_fatal(dev, err, "creating vbd structure"); | ||
544 | return; | ||
545 | } | ||
546 | |||
547 | err = xenvbd_sysfs_addif(dev); | ||
548 | if (err) { | ||
549 | xen_vbd_free(&be->blkif->vbd); | ||
550 | be->major = 0; | ||
551 | be->minor = 0; | ||
552 | xenbus_dev_fatal(dev, err, "creating sysfs entries"); | ||
553 | return; | ||
554 | } | ||
555 | |||
556 | /* We're potentially connected now */ | ||
557 | xen_update_blkif_status(be->blkif); | ||
558 | } | ||
559 | } | ||
560 | |||
561 | |||
562 | /* | ||
563 | * Callback received when the frontend's state changes. | ||
564 | */ | ||
565 | static void frontend_changed(struct xenbus_device *dev, | ||
566 | enum xenbus_state frontend_state) | ||
567 | { | ||
568 | struct backend_info *be = dev_get_drvdata(&dev->dev); | ||
569 | int err; | ||
570 | |||
571 | DPRINTK("%s", xenbus_strstate(frontend_state)); | ||
572 | |||
573 | switch (frontend_state) { | ||
574 | case XenbusStateInitialising: | ||
575 | if (dev->state == XenbusStateClosed) { | ||
576 | pr_info(DRV_PFX "%s: prepare for reconnect\n", | ||
577 | dev->nodename); | ||
578 | xenbus_switch_state(dev, XenbusStateInitWait); | ||
579 | } | ||
580 | break; | ||
581 | |||
582 | case XenbusStateInitialised: | ||
583 | case XenbusStateConnected: | ||
584 | /* | ||
585 | * Ensure we connect even when two watches fire in | ||
586 | * close successsion and we miss the intermediate value | ||
587 | * of frontend_state. | ||
588 | */ | ||
589 | if (dev->state == XenbusStateConnected) | ||
590 | break; | ||
591 | |||
592 | /* | ||
593 | * Enforce precondition before potential leak point. | ||
594 | * blkif_disconnect() is idempotent. | ||
595 | */ | ||
596 | xen_blkif_disconnect(be->blkif); | ||
597 | |||
598 | err = connect_ring(be); | ||
599 | if (err) | ||
600 | break; | ||
601 | xen_update_blkif_status(be->blkif); | ||
602 | break; | ||
603 | |||
604 | case XenbusStateClosing: | ||
605 | xen_blkif_disconnect(be->blkif); | ||
606 | xenbus_switch_state(dev, XenbusStateClosing); | ||
607 | break; | ||
608 | |||
609 | case XenbusStateClosed: | ||
610 | xenbus_switch_state(dev, XenbusStateClosed); | ||
611 | if (xenbus_dev_is_online(dev)) | ||
612 | break; | ||
613 | /* fall through if not online */ | ||
614 | case XenbusStateUnknown: | ||
615 | /* implies blkif_disconnect() via blkback_remove() */ | ||
616 | device_unregister(&dev->dev); | ||
617 | break; | ||
618 | |||
619 | default: | ||
620 | xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend", | ||
621 | frontend_state); | ||
622 | break; | ||
623 | } | ||
624 | } | ||
625 | |||
626 | |||
627 | /* ** Connection ** */ | ||
628 | |||
629 | |||
630 | /* | ||
631 | * Write the physical details regarding the block device to the store, and | ||
632 | * switch to Connected state. | ||
633 | */ | ||
634 | static void connect(struct backend_info *be) | ||
635 | { | ||
636 | struct xenbus_transaction xbt; | ||
637 | int err; | ||
638 | struct xenbus_device *dev = be->dev; | ||
639 | |||
640 | DPRINTK("%s", dev->otherend); | ||
641 | |||
642 | /* Supply the information about the device the frontend needs */ | ||
643 | again: | ||
644 | err = xenbus_transaction_start(&xbt); | ||
645 | if (err) { | ||
646 | xenbus_dev_fatal(dev, err, "starting transaction"); | ||
647 | return; | ||
648 | } | ||
649 | |||
650 | err = xen_blkbk_flush_diskcache(xbt, be, be->blkif->vbd.flush_support); | ||
651 | if (err) | ||
652 | goto abort; | ||
653 | |||
654 | err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", | ||
655 | (unsigned long long)vbd_sz(&be->blkif->vbd)); | ||
656 | if (err) { | ||
657 | xenbus_dev_fatal(dev, err, "writing %s/sectors", | ||
658 | dev->nodename); | ||
659 | goto abort; | ||
660 | } | ||
661 | |||
662 | /* FIXME: use a typename instead */ | ||
663 | err = xenbus_printf(xbt, dev->nodename, "info", "%u", | ||
664 | be->blkif->vbd.type | | ||
665 | (be->blkif->vbd.readonly ? VDISK_READONLY : 0)); | ||
666 | if (err) { | ||
667 | xenbus_dev_fatal(dev, err, "writing %s/info", | ||
668 | dev->nodename); | ||
669 | goto abort; | ||
670 | } | ||
671 | err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu", | ||
672 | (unsigned long) | ||
673 | bdev_logical_block_size(be->blkif->vbd.bdev)); | ||
674 | if (err) { | ||
675 | xenbus_dev_fatal(dev, err, "writing %s/sector-size", | ||
676 | dev->nodename); | ||
677 | goto abort; | ||
678 | } | ||
679 | |||
680 | err = xenbus_transaction_end(xbt, 0); | ||
681 | if (err == -EAGAIN) | ||
682 | goto again; | ||
683 | if (err) | ||
684 | xenbus_dev_fatal(dev, err, "ending transaction"); | ||
685 | |||
686 | err = xenbus_switch_state(dev, XenbusStateConnected); | ||
687 | if (err) | ||
688 | xenbus_dev_fatal(dev, err, "switching to Connected state", | ||
689 | dev->nodename); | ||
690 | |||
691 | return; | ||
692 | abort: | ||
693 | xenbus_transaction_end(xbt, 1); | ||
694 | } | ||
695 | |||
696 | |||
697 | static int connect_ring(struct backend_info *be) | ||
698 | { | ||
699 | struct xenbus_device *dev = be->dev; | ||
700 | unsigned long ring_ref; | ||
701 | unsigned int evtchn; | ||
702 | char protocol[64] = ""; | ||
703 | int err; | ||
704 | |||
705 | DPRINTK("%s", dev->otherend); | ||
706 | |||
707 | err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", | ||
708 | &ring_ref, "event-channel", "%u", &evtchn, NULL); | ||
709 | if (err) { | ||
710 | xenbus_dev_fatal(dev, err, | ||
711 | "reading %s/ring-ref and event-channel", | ||
712 | dev->otherend); | ||
713 | return err; | ||
714 | } | ||
715 | |||
716 | be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; | ||
717 | err = xenbus_gather(XBT_NIL, dev->otherend, "protocol", | ||
718 | "%63s", protocol, NULL); | ||
719 | if (err) | ||
720 | strcpy(protocol, "unspecified, assuming native"); | ||
721 | else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE)) | ||
722 | be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; | ||
723 | else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32)) | ||
724 | be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32; | ||
725 | else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64)) | ||
726 | be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64; | ||
727 | else { | ||
728 | xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); | ||
729 | return -1; | ||
730 | } | ||
731 | pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n", | ||
732 | ring_ref, evtchn, be->blkif->blk_protocol, protocol); | ||
733 | |||
734 | /* Map the shared frame, irq etc. */ | ||
735 | err = xen_blkif_map(be->blkif, ring_ref, evtchn); | ||
736 | if (err) { | ||
737 | xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u", | ||
738 | ring_ref, evtchn); | ||
739 | return err; | ||
740 | } | ||
741 | |||
742 | return 0; | ||
743 | } | ||
744 | |||
745 | |||
746 | /* ** Driver Registration ** */ | ||
747 | |||
748 | |||
749 | static const struct xenbus_device_id xen_blkbk_ids[] = { | ||
750 | { "vbd" }, | ||
751 | { "" } | ||
752 | }; | ||
753 | |||
754 | |||
755 | static struct xenbus_driver xen_blkbk = { | ||
756 | .name = "vbd", | ||
757 | .owner = THIS_MODULE, | ||
758 | .ids = xen_blkbk_ids, | ||
759 | .probe = xen_blkbk_probe, | ||
760 | .remove = xen_blkbk_remove, | ||
761 | .otherend_changed = frontend_changed | ||
762 | }; | ||
763 | |||
764 | |||
765 | int xen_blkif_xenbus_init(void) | ||
766 | { | ||
767 | return xenbus_register_backend(&xen_blkbk); | ||
768 | } | ||
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 9cb8668ff5f4..b536a9cef917 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c | |||
@@ -97,6 +97,7 @@ struct blkfront_info | |||
97 | struct blk_shadow shadow[BLK_RING_SIZE]; | 97 | struct blk_shadow shadow[BLK_RING_SIZE]; |
98 | unsigned long shadow_free; | 98 | unsigned long shadow_free; |
99 | unsigned int feature_flush; | 99 | unsigned int feature_flush; |
100 | unsigned int flush_op; | ||
100 | int is_ready; | 101 | int is_ready; |
101 | }; | 102 | }; |
102 | 103 | ||
@@ -250,8 +251,7 @@ static int blkif_ioctl(struct block_device *bdev, fmode_t mode, | |||
250 | 251 | ||
251 | /* | 252 | /* |
252 | * Generate a Xen blkfront IO request from a blk layer request. Reads | 253 | * Generate a Xen blkfront IO request from a blk layer request. Reads |
253 | * and writes are handled as expected. Since we lack a loose flush | 254 | * and writes are handled as expected. |
254 | * request, we map flushes into a full ordered barrier. | ||
255 | * | 255 | * |
256 | * @req: a request struct | 256 | * @req: a request struct |
257 | */ | 257 | */ |
@@ -293,14 +293,13 @@ static int blkif_queue_request(struct request *req) | |||
293 | 293 | ||
294 | if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) { | 294 | if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) { |
295 | /* | 295 | /* |
296 | * Ideally we could just do an unordered | 296 | * Ideally we can do an unordered flush-to-disk. In case the |
297 | * flush-to-disk, but all we have is a full write | 297 | * backend onlysupports barriers, use that. A barrier request |
298 | * barrier at the moment. However, a barrier write is | ||
299 | * a superset of FUA, so we can implement it the same | 298 | * a superset of FUA, so we can implement it the same |
300 | * way. (It's also a FLUSH+FUA, since it is | 299 | * way. (It's also a FLUSH+FUA, since it is |
301 | * guaranteed ordered WRT previous writes.) | 300 | * guaranteed ordered WRT previous writes.) |
302 | */ | 301 | */ |
303 | ring_req->operation = BLKIF_OP_WRITE_BARRIER; | 302 | ring_req->operation = info->flush_op; |
304 | } | 303 | } |
305 | 304 | ||
306 | ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); | 305 | ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); |
@@ -433,8 +432,11 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) | |||
433 | static void xlvbd_flush(struct blkfront_info *info) | 432 | static void xlvbd_flush(struct blkfront_info *info) |
434 | { | 433 | { |
435 | blk_queue_flush(info->rq, info->feature_flush); | 434 | blk_queue_flush(info->rq, info->feature_flush); |
436 | printk(KERN_INFO "blkfront: %s: barriers %s\n", | 435 | printk(KERN_INFO "blkfront: %s: %s: %s\n", |
437 | info->gd->disk_name, | 436 | info->gd->disk_name, |
437 | info->flush_op == BLKIF_OP_WRITE_BARRIER ? | ||
438 | "barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ? | ||
439 | "flush diskcache" : "barrier or flush"), | ||
438 | info->feature_flush ? "enabled" : "disabled"); | 440 | info->feature_flush ? "enabled" : "disabled"); |
439 | } | 441 | } |
440 | 442 | ||
@@ -720,15 +722,20 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) | |||
720 | 722 | ||
721 | error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; | 723 | error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; |
722 | switch (bret->operation) { | 724 | switch (bret->operation) { |
725 | case BLKIF_OP_FLUSH_DISKCACHE: | ||
723 | case BLKIF_OP_WRITE_BARRIER: | 726 | case BLKIF_OP_WRITE_BARRIER: |
724 | if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { | 727 | if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { |
725 | printk(KERN_WARNING "blkfront: %s: write barrier op failed\n", | 728 | printk(KERN_WARNING "blkfront: %s: write %s op failed\n", |
729 | info->flush_op == BLKIF_OP_WRITE_BARRIER ? | ||
730 | "barrier" : "flush disk cache", | ||
726 | info->gd->disk_name); | 731 | info->gd->disk_name); |
727 | error = -EOPNOTSUPP; | 732 | error = -EOPNOTSUPP; |
728 | } | 733 | } |
729 | if (unlikely(bret->status == BLKIF_RSP_ERROR && | 734 | if (unlikely(bret->status == BLKIF_RSP_ERROR && |
730 | info->shadow[id].req.nr_segments == 0)) { | 735 | info->shadow[id].req.nr_segments == 0)) { |
731 | printk(KERN_WARNING "blkfront: %s: empty write barrier op failed\n", | 736 | printk(KERN_WARNING "blkfront: %s: empty write %s op failed\n", |
737 | info->flush_op == BLKIF_OP_WRITE_BARRIER ? | ||
738 | "barrier" : "flush disk cache", | ||
732 | info->gd->disk_name); | 739 | info->gd->disk_name); |
733 | error = -EOPNOTSUPP; | 740 | error = -EOPNOTSUPP; |
734 | } | 741 | } |
@@ -736,6 +743,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) | |||
736 | if (error == -EOPNOTSUPP) | 743 | if (error == -EOPNOTSUPP) |
737 | error = 0; | 744 | error = 0; |
738 | info->feature_flush = 0; | 745 | info->feature_flush = 0; |
746 | info->flush_op = 0; | ||
739 | xlvbd_flush(info); | 747 | xlvbd_flush(info); |
740 | } | 748 | } |
741 | /* fall through */ | 749 | /* fall through */ |
@@ -1100,7 +1108,7 @@ static void blkfront_connect(struct blkfront_info *info) | |||
1100 | unsigned long sector_size; | 1108 | unsigned long sector_size; |
1101 | unsigned int binfo; | 1109 | unsigned int binfo; |
1102 | int err; | 1110 | int err; |
1103 | int barrier; | 1111 | int barrier, flush; |
1104 | 1112 | ||
1105 | switch (info->connected) { | 1113 | switch (info->connected) { |
1106 | case BLKIF_STATE_CONNECTED: | 1114 | case BLKIF_STATE_CONNECTED: |
@@ -1140,8 +1148,11 @@ static void blkfront_connect(struct blkfront_info *info) | |||
1140 | return; | 1148 | return; |
1141 | } | 1149 | } |
1142 | 1150 | ||
1151 | info->feature_flush = 0; | ||
1152 | info->flush_op = 0; | ||
1153 | |||
1143 | err = xenbus_gather(XBT_NIL, info->xbdev->otherend, | 1154 | err = xenbus_gather(XBT_NIL, info->xbdev->otherend, |
1144 | "feature-barrier", "%lu", &barrier, | 1155 | "feature-barrier", "%d", &barrier, |
1145 | NULL); | 1156 | NULL); |
1146 | 1157 | ||
1147 | /* | 1158 | /* |
@@ -1151,11 +1162,23 @@ static void blkfront_connect(struct blkfront_info *info) | |||
1151 | * | 1162 | * |
1152 | * If there are barriers, then we use flush. | 1163 | * If there are barriers, then we use flush. |
1153 | */ | 1164 | */ |
1154 | info->feature_flush = 0; | 1165 | if (!err && barrier) { |
1155 | |||
1156 | if (!err && barrier) | ||
1157 | info->feature_flush = REQ_FLUSH | REQ_FUA; | 1166 | info->feature_flush = REQ_FLUSH | REQ_FUA; |
1167 | info->flush_op = BLKIF_OP_WRITE_BARRIER; | ||
1168 | } | ||
1169 | /* | ||
1170 | * And if there is "feature-flush-cache" use that above | ||
1171 | * barriers. | ||
1172 | */ | ||
1173 | err = xenbus_gather(XBT_NIL, info->xbdev->otherend, | ||
1174 | "feature-flush-cache", "%d", &flush, | ||
1175 | NULL); | ||
1158 | 1176 | ||
1177 | if (!err && flush) { | ||
1178 | info->feature_flush = REQ_FLUSH; | ||
1179 | info->flush_op = BLKIF_OP_FLUSH_DISKCACHE; | ||
1180 | } | ||
1181 | |||
1159 | err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); | 1182 | err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); |
1160 | if (err) { | 1183 | if (err) { |
1161 | xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", | 1184 | xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", |
diff --git a/include/linux/drbd.h b/include/linux/drbd.h index cec467f5d676..9e5f5607eba3 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h | |||
@@ -38,7 +38,7 @@ | |||
38 | 38 | ||
39 | /* Although the Linux source code makes a difference between | 39 | /* Although the Linux source code makes a difference between |
40 | generic endianness and the bitfields' endianness, there is no | 40 | generic endianness and the bitfields' endianness, there is no |
41 | architecture as of Linux-2.6.24-rc4 where the bitfileds' endianness | 41 | architecture as of Linux-2.6.24-rc4 where the bitfields' endianness |
42 | does not match the generic endianness. */ | 42 | does not match the generic endianness. */ |
43 | 43 | ||
44 | #if __BYTE_ORDER == __LITTLE_ENDIAN | 44 | #if __BYTE_ORDER == __LITTLE_ENDIAN |
@@ -53,7 +53,7 @@ | |||
53 | 53 | ||
54 | 54 | ||
55 | extern const char *drbd_buildtag(void); | 55 | extern const char *drbd_buildtag(void); |
56 | #define REL_VERSION "8.3.10" | 56 | #define REL_VERSION "8.3.11" |
57 | #define API_VERSION 88 | 57 | #define API_VERSION 88 |
58 | #define PRO_VERSION_MIN 86 | 58 | #define PRO_VERSION_MIN 86 |
59 | #define PRO_VERSION_MAX 96 | 59 | #define PRO_VERSION_MAX 96 |
@@ -195,7 +195,7 @@ enum drbd_conns { | |||
195 | C_WF_REPORT_PARAMS, /* we have a socket */ | 195 | C_WF_REPORT_PARAMS, /* we have a socket */ |
196 | C_CONNECTED, /* we have introduced each other */ | 196 | C_CONNECTED, /* we have introduced each other */ |
197 | C_STARTING_SYNC_S, /* starting full sync by admin request. */ | 197 | C_STARTING_SYNC_S, /* starting full sync by admin request. */ |
198 | C_STARTING_SYNC_T, /* stariing full sync by admin request. */ | 198 | C_STARTING_SYNC_T, /* starting full sync by admin request. */ |
199 | C_WF_BITMAP_S, | 199 | C_WF_BITMAP_S, |
200 | C_WF_BITMAP_T, | 200 | C_WF_BITMAP_T, |
201 | C_WF_SYNC_UUID, | 201 | C_WF_SYNC_UUID, |
@@ -236,7 +236,7 @@ union drbd_state { | |||
236 | * pointed out by Maxim Uvarov q<muvarov@ru.mvista.com> | 236 | * pointed out by Maxim Uvarov q<muvarov@ru.mvista.com> |
237 | * even though we transmit as "cpu_to_be32(state)", | 237 | * even though we transmit as "cpu_to_be32(state)", |
238 | * the offsets of the bitfields still need to be swapped | 238 | * the offsets of the bitfields still need to be swapped |
239 | * on different endianess. | 239 | * on different endianness. |
240 | */ | 240 | */ |
241 | struct { | 241 | struct { |
242 | #if defined(__LITTLE_ENDIAN_BITFIELD) | 242 | #if defined(__LITTLE_ENDIAN_BITFIELD) |
@@ -266,7 +266,7 @@ union drbd_state { | |||
266 | unsigned peer:2 ; /* 3/4 primary/secondary/unknown */ | 266 | unsigned peer:2 ; /* 3/4 primary/secondary/unknown */ |
267 | unsigned role:2 ; /* 3/4 primary/secondary/unknown */ | 267 | unsigned role:2 ; /* 3/4 primary/secondary/unknown */ |
268 | #else | 268 | #else |
269 | # error "this endianess is not supported" | 269 | # error "this endianness is not supported" |
270 | #endif | 270 | #endif |
271 | }; | 271 | }; |
272 | unsigned int i; | 272 | unsigned int i; |
diff --git a/include/linux/drbd_tag_magic.h b/include/linux/drbd_tag_magic.h index f14a165e82dc..069543190516 100644 --- a/include/linux/drbd_tag_magic.h +++ b/include/linux/drbd_tag_magic.h | |||
@@ -30,7 +30,7 @@ enum packet_types { | |||
30 | int tag_and_len ## member; | 30 | int tag_and_len ## member; |
31 | #include "linux/drbd_nl.h" | 31 | #include "linux/drbd_nl.h" |
32 | 32 | ||
33 | /* declate tag-list-sizes */ | 33 | /* declare tag-list-sizes */ |
34 | static const int tag_list_sizes[] = { | 34 | static const int tag_list_sizes[] = { |
35 | #define NL_PACKET(name, number, fields) 2 fields , | 35 | #define NL_PACKET(name, number, fields) 2 fields , |
36 | #define NL_INTEGER(pn, pr, member) + 4 + 4 | 36 | #define NL_INTEGER(pn, pr, member) + 4 + 4 |
diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h index 6a4fab7c6e09..7a71ffad037c 100644 --- a/include/linux/lru_cache.h +++ b/include/linux/lru_cache.h | |||
@@ -139,9 +139,9 @@ write intent log information, three of which are mentioned here. | |||
139 | * .list is on one of three lists: | 139 | * .list is on one of three lists: |
140 | * in_use: currently in use (refcnt > 0, lc_number != LC_FREE) | 140 | * in_use: currently in use (refcnt > 0, lc_number != LC_FREE) |
141 | * lru: unused but ready to be reused or recycled | 141 | * lru: unused but ready to be reused or recycled |
142 | * (ts_refcnt == 0, lc_number != LC_FREE), | 142 | * (lc_refcnt == 0, lc_number != LC_FREE), |
143 | * free: unused but ready to be recycled | 143 | * free: unused but ready to be recycled |
144 | * (ts_refcnt == 0, lc_number == LC_FREE), | 144 | * (lc_refcnt == 0, lc_number == LC_FREE), |
145 | * | 145 | * |
146 | * an element is said to be "in the active set", | 146 | * an element is said to be "in the active set", |
147 | * if either on "in_use" or "lru", i.e. lc_number != LC_FREE. | 147 | * if either on "in_use" or "lru", i.e. lc_number != LC_FREE. |
@@ -160,8 +160,8 @@ struct lc_element { | |||
160 | struct hlist_node colision; | 160 | struct hlist_node colision; |
161 | struct list_head list; /* LRU list or free list */ | 161 | struct list_head list; /* LRU list or free list */ |
162 | unsigned refcnt; | 162 | unsigned refcnt; |
163 | /* back "pointer" into ts_cache->element[index], | 163 | /* back "pointer" into lc_cache->element[index], |
164 | * for paranoia, and for "ts_element_to_index" */ | 164 | * for paranoia, and for "lc_element_to_index" */ |
165 | unsigned lc_index; | 165 | unsigned lc_index; |
166 | /* if we want to track a larger set of objects, | 166 | /* if we want to track a larger set of objects, |
167 | * it needs to become arch independend u64 */ | 167 | * it needs to become arch independend u64 */ |
@@ -190,8 +190,8 @@ struct lru_cache { | |||
190 | /* Arbitrary limit on maximum tracked objects. Practical limit is much | 190 | /* Arbitrary limit on maximum tracked objects. Practical limit is much |
191 | * lower due to allocation failures, probably. For typical use cases, | 191 | * lower due to allocation failures, probably. For typical use cases, |
192 | * nr_elements should be a few thousand at most. | 192 | * nr_elements should be a few thousand at most. |
193 | * This also limits the maximum value of ts_element.ts_index, allowing the | 193 | * This also limits the maximum value of lc_element.lc_index, allowing the |
194 | * 8 high bits of .ts_index to be overloaded with flags in the future. */ | 194 | * 8 high bits of .lc_index to be overloaded with flags in the future. */ |
195 | #define LC_MAX_ACTIVE (1<<24) | 195 | #define LC_MAX_ACTIVE (1<<24) |
196 | 196 | ||
197 | /* statistics */ | 197 | /* statistics */ |
diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h index 61e523af3c46..3d5d6db864fe 100644 --- a/include/xen/interface/io/blkif.h +++ b/include/xen/interface/io/blkif.h | |||
@@ -45,6 +45,19 @@ typedef uint64_t blkif_sector_t; | |||
45 | #define BLKIF_OP_WRITE_BARRIER 2 | 45 | #define BLKIF_OP_WRITE_BARRIER 2 |
46 | 46 | ||
47 | /* | 47 | /* |
48 | * Recognised if "feature-flush-cache" is present in backend xenbus | ||
49 | * info. A flush will ask the underlying storage hardware to flush its | ||
50 | * non-volatile caches as appropriate. The "feature-flush-cache" node | ||
51 | * contains a boolean indicating whether flush requests are likely to | ||
52 | * succeed or fail. Either way, a flush request may fail at any time | ||
53 | * with BLKIF_RSP_EOPNOTSUPP if it is unsupported by the underlying | ||
54 | * block-device hardware. The boolean simply indicates whether or not it | ||
55 | * is worthwhile for the frontend to attempt flushes. If a backend does | ||
56 | * not recognise BLKIF_OP_WRITE_FLUSH_CACHE, it should *not* create the | ||
57 | * "feature-flush-cache" node! | ||
58 | */ | ||
59 | #define BLKIF_OP_FLUSH_DISKCACHE 3 | ||
60 | /* | ||
48 | * Maximum scatter/gather segments per request. | 61 | * Maximum scatter/gather segments per request. |
49 | * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE. | 62 | * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE. |
50 | * NB. This could be 12 if the ring indexes weren't stored in the same page. | 63 | * NB. This could be 12 if the ring indexes weren't stored in the same page. |