aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-bus-pci-devices-cciss7
-rw-r--r--Documentation/blockdev/cciss.txt10
-rw-r--r--block/genhd.c4
-rw-r--r--block/ioctl.c2
-rw-r--r--drivers/block/cciss.c76
-rw-r--r--drivers/block/cciss.h1
-rw-r--r--drivers/block/cpqarray.c2
-rw-r--r--drivers/block/loop.c111
-rw-r--r--drivers/block/nbd.c69
-rw-r--r--drivers/block/xen-blkback/blkback.c130
-rw-r--r--drivers/block/xen-blkback/common.h98
-rw-r--r--drivers/block/xen-blkback/xenbus.c76
-rw-r--r--drivers/block/xen-blkfront.c123
-rw-r--r--drivers/scsi/hpsa.c7
-rw-r--r--fs/block_dev.c2
-rw-r--r--include/linux/genhd.h6
-rw-r--r--include/linux/loop.h1
-rw-r--r--include/xen/interface/io/blkif.h36
18 files changed, 638 insertions, 123 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
index f5bb0a3bb8c0..53d99edd1d75 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
+++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
@@ -71,3 +71,10 @@ Description: Value of 1 indicates the controller can honor the reset_devices
71 a dump device, as kdump requires resetting the device in order 71 a dump device, as kdump requires resetting the device in order
72 to work reliably. 72 to work reliably.
73 73
74Where: /sys/bus/pci/devices/<dev>/ccissX/transport_mode
75Date: July 2011
76Kernel Version: 3.0
77Contact: iss_storagedev@hp.com
78Description: Value of "simple" indicates that the controller has been placed
79 in "simple mode". Value of "performant" indicates that the
80 controller has been placed in "performant mode".
diff --git a/Documentation/blockdev/cciss.txt b/Documentation/blockdev/cciss.txt
index c00c6a5ab21f..71464e09ec18 100644
--- a/Documentation/blockdev/cciss.txt
+++ b/Documentation/blockdev/cciss.txt
@@ -78,6 +78,16 @@ The device naming scheme is:
78/dev/cciss/c1d1p2 Controller 1, disk 1, partition 2 78/dev/cciss/c1d1p2 Controller 1, disk 1, partition 2
79/dev/cciss/c1d1p3 Controller 1, disk 1, partition 3 79/dev/cciss/c1d1p3 Controller 1, disk 1, partition 3
80 80
81CCISS simple mode support
82-------------------------
83
84The "cciss_simple_mode=1" boot parameter may be used to prevent the driver
85from putting the controller into "performant" mode. The difference is that
86with simple mode, each command completion requires an interrupt, while with
87"performant mode" (the default, and ordinarily better performing) it is
88possible to have multiple command completions indicated by a single
89interrupt.
90
81SCSI tape drive and medium changer support 91SCSI tape drive and medium changer support
82------------------------------------------ 92------------------------------------------
83 93
diff --git a/block/genhd.c b/block/genhd.c
index 024fc3944fb5..9253839714ff 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -537,7 +537,7 @@ void register_disk(struct gendisk *disk)
537 disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj); 537 disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
538 538
539 /* No minors to use for partitions */ 539 /* No minors to use for partitions */
540 if (!disk_partitionable(disk)) 540 if (!disk_part_scan_enabled(disk))
541 goto exit; 541 goto exit;
542 542
543 /* No such device (e.g., media were just removed) */ 543 /* No such device (e.g., media were just removed) */
@@ -848,7 +848,7 @@ static int show_partition(struct seq_file *seqf, void *v)
848 char buf[BDEVNAME_SIZE]; 848 char buf[BDEVNAME_SIZE];
849 849
850 /* Don't show non-partitionable removeable devices or empty devices */ 850 /* Don't show non-partitionable removeable devices or empty devices */
851 if (!get_capacity(sgp) || (!disk_partitionable(sgp) && 851 if (!get_capacity(sgp) || (!disk_max_parts(sgp) &&
852 (sgp->flags & GENHD_FL_REMOVABLE))) 852 (sgp->flags & GENHD_FL_REMOVABLE)))
853 return 0; 853 return 0;
854 if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO) 854 if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
diff --git a/block/ioctl.c b/block/ioctl.c
index 1124cd297263..5c74efc01903 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -101,7 +101,7 @@ static int blkdev_reread_part(struct block_device *bdev)
101 struct gendisk *disk = bdev->bd_disk; 101 struct gendisk *disk = bdev->bd_disk;
102 int res; 102 int res;
103 103
104 if (!disk_partitionable(disk) || bdev != bdev->bd_contains) 104 if (!disk_part_scan_enabled(disk) || bdev != bdev->bd_contains)
105 return -EINVAL; 105 return -EINVAL;
106 if (!capable(CAP_SYS_ADMIN)) 106 if (!capable(CAP_SYS_ADMIN))
107 return -EACCES; 107 return -EACCES;
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 8f4ef656a1af..486f94ef24d4 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -68,6 +68,10 @@ static int cciss_tape_cmds = 6;
68module_param(cciss_tape_cmds, int, 0644); 68module_param(cciss_tape_cmds, int, 0644);
69MODULE_PARM_DESC(cciss_tape_cmds, 69MODULE_PARM_DESC(cciss_tape_cmds,
70 "number of commands to allocate for tape devices (default: 6)"); 70 "number of commands to allocate for tape devices (default: 6)");
71static int cciss_simple_mode;
72module_param(cciss_simple_mode, int, S_IRUGO|S_IWUSR);
73MODULE_PARM_DESC(cciss_simple_mode,
74 "Use 'simple mode' rather than 'performant mode'");
71 75
72static DEFINE_MUTEX(cciss_mutex); 76static DEFINE_MUTEX(cciss_mutex);
73static struct proc_dir_entry *proc_cciss; 77static struct proc_dir_entry *proc_cciss;
@@ -176,6 +180,7 @@ static void cciss_geometry_inquiry(ctlr_info_t *h, int logvol,
176 unsigned int block_size, InquiryData_struct *inq_buff, 180 unsigned int block_size, InquiryData_struct *inq_buff,
177 drive_info_struct *drv); 181 drive_info_struct *drv);
178static void __devinit cciss_interrupt_mode(ctlr_info_t *); 182static void __devinit cciss_interrupt_mode(ctlr_info_t *);
183static int __devinit cciss_enter_simple_mode(struct ctlr_info *h);
179static void start_io(ctlr_info_t *h); 184static void start_io(ctlr_info_t *h);
180static int sendcmd_withirq(ctlr_info_t *h, __u8 cmd, void *buff, size_t size, 185static int sendcmd_withirq(ctlr_info_t *h, __u8 cmd, void *buff, size_t size,
181 __u8 page_code, unsigned char scsi3addr[], 186 __u8 page_code, unsigned char scsi3addr[],
@@ -388,7 +393,7 @@ static void cciss_seq_show_header(struct seq_file *seq)
388 h->product_name, 393 h->product_name,
389 (unsigned long)h->board_id, 394 (unsigned long)h->board_id,
390 h->firm_ver[0], h->firm_ver[1], h->firm_ver[2], 395 h->firm_ver[0], h->firm_ver[1], h->firm_ver[2],
391 h->firm_ver[3], (unsigned int)h->intr[PERF_MODE_INT], 396 h->firm_ver[3], (unsigned int)h->intr[h->intr_mode],
392 h->num_luns, 397 h->num_luns,
393 h->Qdepth, h->commands_outstanding, 398 h->Qdepth, h->commands_outstanding,
394 h->maxQsinceinit, h->max_outstanding, h->maxSG); 399 h->maxQsinceinit, h->max_outstanding, h->maxSG);
@@ -636,6 +641,18 @@ static ssize_t host_store_rescan(struct device *dev,
636} 641}
637static DEVICE_ATTR(rescan, S_IWUSR, NULL, host_store_rescan); 642static DEVICE_ATTR(rescan, S_IWUSR, NULL, host_store_rescan);
638 643
644static ssize_t host_show_transport_mode(struct device *dev,
645 struct device_attribute *attr,
646 char *buf)
647{
648 struct ctlr_info *h = to_hba(dev);
649
650 return snprintf(buf, 20, "%s\n",
651 h->transMethod & CFGTBL_Trans_Performant ?
652 "performant" : "simple");
653}
654static DEVICE_ATTR(transport_mode, S_IRUGO, host_show_transport_mode, NULL);
655
639static ssize_t dev_show_unique_id(struct device *dev, 656static ssize_t dev_show_unique_id(struct device *dev,
640 struct device_attribute *attr, 657 struct device_attribute *attr,
641 char *buf) 658 char *buf)
@@ -808,6 +825,7 @@ static DEVICE_ATTR(usage_count, S_IRUGO, cciss_show_usage_count, NULL);
808static struct attribute *cciss_host_attrs[] = { 825static struct attribute *cciss_host_attrs[] = {
809 &dev_attr_rescan.attr, 826 &dev_attr_rescan.attr,
810 &dev_attr_resettable.attr, 827 &dev_attr_resettable.attr,
828 &dev_attr_transport_mode.attr,
811 NULL 829 NULL
812}; 830};
813 831
@@ -3984,6 +4002,9 @@ static void __devinit cciss_put_controller_into_performant_mode(ctlr_info_t *h)
3984{ 4002{
3985 __u32 trans_support; 4003 __u32 trans_support;
3986 4004
4005 if (cciss_simple_mode)
4006 return;
4007
3987 dev_dbg(&h->pdev->dev, "Trying to put board into Performant mode\n"); 4008 dev_dbg(&h->pdev->dev, "Trying to put board into Performant mode\n");
3988 /* Attempt to put controller into performant mode if supported */ 4009 /* Attempt to put controller into performant mode if supported */
3989 /* Does board support performant mode? */ 4010 /* Does board support performant mode? */
@@ -4081,7 +4102,7 @@ static void __devinit cciss_interrupt_mode(ctlr_info_t *h)
4081default_int_mode: 4102default_int_mode:
4082#endif /* CONFIG_PCI_MSI */ 4103#endif /* CONFIG_PCI_MSI */
4083 /* if we get here we're going to use the default interrupt mode */ 4104 /* if we get here we're going to use the default interrupt mode */
4084 h->intr[PERF_MODE_INT] = h->pdev->irq; 4105 h->intr[h->intr_mode] = h->pdev->irq;
4085 return; 4106 return;
4086} 4107}
4087 4108
@@ -4341,6 +4362,9 @@ static int __devinit cciss_pci_init(ctlr_info_t *h)
4341 } 4362 }
4342 cciss_enable_scsi_prefetch(h); 4363 cciss_enable_scsi_prefetch(h);
4343 cciss_p600_dma_prefetch_quirk(h); 4364 cciss_p600_dma_prefetch_quirk(h);
4365 err = cciss_enter_simple_mode(h);
4366 if (err)
4367 goto err_out_free_res;
4344 cciss_put_controller_into_performant_mode(h); 4368 cciss_put_controller_into_performant_mode(h);
4345 return 0; 4369 return 0;
4346 4370
@@ -4533,6 +4557,13 @@ static int cciss_controller_hard_reset(struct pci_dev *pdev,
4533 pmcsr &= ~PCI_PM_CTRL_STATE_MASK; 4557 pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
4534 pmcsr |= PCI_D0; 4558 pmcsr |= PCI_D0;
4535 pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr); 4559 pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
4560
4561 /*
4562 * The P600 requires a small delay when changing states.
4563 * Otherwise we may think the board did not reset and we bail.
4564 * This for kdump only and is particular to the P600.
4565 */
4566 msleep(500);
4536 } 4567 }
4537 return 0; 4568 return 0;
4538} 4569}
@@ -4843,20 +4874,20 @@ static int cciss_request_irq(ctlr_info_t *h,
4843 irqreturn_t (*intxhandler)(int, void *)) 4874 irqreturn_t (*intxhandler)(int, void *))
4844{ 4875{
4845 if (h->msix_vector || h->msi_vector) { 4876 if (h->msix_vector || h->msi_vector) {
4846 if (!request_irq(h->intr[PERF_MODE_INT], msixhandler, 4877 if (!request_irq(h->intr[h->intr_mode], msixhandler,
4847 IRQF_DISABLED, h->devname, h)) 4878 IRQF_DISABLED, h->devname, h))
4848 return 0; 4879 return 0;
4849 dev_err(&h->pdev->dev, "Unable to get msi irq %d" 4880 dev_err(&h->pdev->dev, "Unable to get msi irq %d"
4850 " for %s\n", h->intr[PERF_MODE_INT], 4881 " for %s\n", h->intr[h->intr_mode],
4851 h->devname); 4882 h->devname);
4852 return -1; 4883 return -1;
4853 } 4884 }
4854 4885
4855 if (!request_irq(h->intr[PERF_MODE_INT], intxhandler, 4886 if (!request_irq(h->intr[h->intr_mode], intxhandler,
4856 IRQF_DISABLED, h->devname, h)) 4887 IRQF_DISABLED, h->devname, h))
4857 return 0; 4888 return 0;
4858 dev_err(&h->pdev->dev, "Unable to get irq %d for %s\n", 4889 dev_err(&h->pdev->dev, "Unable to get irq %d for %s\n",
4859 h->intr[PERF_MODE_INT], h->devname); 4890 h->intr[h->intr_mode], h->devname);
4860 return -1; 4891 return -1;
4861} 4892}
4862 4893
@@ -4887,7 +4918,7 @@ static void cciss_undo_allocations_after_kdump_soft_reset(ctlr_info_t *h)
4887{ 4918{
4888 int ctlr = h->ctlr; 4919 int ctlr = h->ctlr;
4889 4920
4890 free_irq(h->intr[PERF_MODE_INT], h); 4921 free_irq(h->intr[h->intr_mode], h);
4891#ifdef CONFIG_PCI_MSI 4922#ifdef CONFIG_PCI_MSI
4892 if (h->msix_vector) 4923 if (h->msix_vector)
4893 pci_disable_msix(h->pdev); 4924 pci_disable_msix(h->pdev);
@@ -4953,6 +4984,7 @@ reinit_after_soft_reset:
4953 h = hba[i]; 4984 h = hba[i];
4954 h->pdev = pdev; 4985 h->pdev = pdev;
4955 h->busy_initializing = 1; 4986 h->busy_initializing = 1;
4987 h->intr_mode = cciss_simple_mode ? SIMPLE_MODE_INT : PERF_MODE_INT;
4956 INIT_LIST_HEAD(&h->cmpQ); 4988 INIT_LIST_HEAD(&h->cmpQ);
4957 INIT_LIST_HEAD(&h->reqQ); 4989 INIT_LIST_HEAD(&h->reqQ);
4958 mutex_init(&h->busy_shutting_down); 4990 mutex_init(&h->busy_shutting_down);
@@ -5009,7 +5041,7 @@ reinit_after_soft_reset:
5009 5041
5010 dev_info(&h->pdev->dev, "%s: <0x%x> at PCI %s IRQ %d%s using DAC\n", 5042 dev_info(&h->pdev->dev, "%s: <0x%x> at PCI %s IRQ %d%s using DAC\n",
5011 h->devname, pdev->device, pci_name(pdev), 5043 h->devname, pdev->device, pci_name(pdev),
5012 h->intr[PERF_MODE_INT], dac ? "" : " not"); 5044 h->intr[h->intr_mode], dac ? "" : " not");
5013 5045
5014 if (cciss_allocate_cmd_pool(h)) 5046 if (cciss_allocate_cmd_pool(h))
5015 goto clean4; 5047 goto clean4;
@@ -5056,7 +5088,7 @@ reinit_after_soft_reset:
5056 spin_lock_irqsave(&h->lock, flags); 5088 spin_lock_irqsave(&h->lock, flags);
5057 h->access.set_intr_mask(h, CCISS_INTR_OFF); 5089 h->access.set_intr_mask(h, CCISS_INTR_OFF);
5058 spin_unlock_irqrestore(&h->lock, flags); 5090 spin_unlock_irqrestore(&h->lock, flags);
5059 free_irq(h->intr[PERF_MODE_INT], h); 5091 free_irq(h->intr[h->intr_mode], h);
5060 rc = cciss_request_irq(h, cciss_msix_discard_completions, 5092 rc = cciss_request_irq(h, cciss_msix_discard_completions,
5061 cciss_intx_discard_completions); 5093 cciss_intx_discard_completions);
5062 if (rc) { 5094 if (rc) {
@@ -5133,7 +5165,7 @@ clean4:
5133 cciss_free_cmd_pool(h); 5165 cciss_free_cmd_pool(h);
5134 cciss_free_scatterlists(h); 5166 cciss_free_scatterlists(h);
5135 cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds); 5167 cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds);
5136 free_irq(h->intr[PERF_MODE_INT], h); 5168 free_irq(h->intr[h->intr_mode], h);
5137clean2: 5169clean2:
5138 unregister_blkdev(h->major, h->devname); 5170 unregister_blkdev(h->major, h->devname);
5139clean1: 5171clean1:
@@ -5172,9 +5204,31 @@ static void cciss_shutdown(struct pci_dev *pdev)
5172 if (return_code != IO_OK) 5204 if (return_code != IO_OK)
5173 dev_warn(&h->pdev->dev, "Error flushing cache\n"); 5205 dev_warn(&h->pdev->dev, "Error flushing cache\n");
5174 h->access.set_intr_mask(h, CCISS_INTR_OFF); 5206 h->access.set_intr_mask(h, CCISS_INTR_OFF);
5175 free_irq(h->intr[PERF_MODE_INT], h); 5207 free_irq(h->intr[h->intr_mode], h);
5208}
5209
5210static int __devinit cciss_enter_simple_mode(struct ctlr_info *h)
5211{
5212 u32 trans_support;
5213
5214 trans_support = readl(&(h->cfgtable->TransportSupport));
5215 if (!(trans_support & SIMPLE_MODE))
5216 return -ENOTSUPP;
5217
5218 h->max_commands = readl(&(h->cfgtable->CmdsOutMax));
5219 writel(CFGTBL_Trans_Simple, &(h->cfgtable->HostWrite.TransportRequest));
5220 writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
5221 cciss_wait_for_mode_change_ack(h);
5222 print_cfg_table(h);
5223 if (!(readl(&(h->cfgtable->TransportActive)) & CFGTBL_Trans_Simple)) {
5224 dev_warn(&h->pdev->dev, "unable to get board into simple mode\n");
5225 return -ENODEV;
5226 }
5227 h->transMethod = CFGTBL_Trans_Simple;
5228 return 0;
5176} 5229}
5177 5230
5231
5178static void __devexit cciss_remove_one(struct pci_dev *pdev) 5232static void __devexit cciss_remove_one(struct pci_dev *pdev)
5179{ 5233{
5180 ctlr_info_t *h; 5234 ctlr_info_t *h;
diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h
index c049548e68b7..7fda30e4a241 100644
--- a/drivers/block/cciss.h
+++ b/drivers/block/cciss.h
@@ -92,6 +92,7 @@ struct ctlr_info
92 unsigned int intr[4]; 92 unsigned int intr[4];
93 unsigned int msix_vector; 93 unsigned int msix_vector;
94 unsigned int msi_vector; 94 unsigned int msi_vector;
95 int intr_mode;
95 int cciss_max_sectors; 96 int cciss_max_sectors;
96 BYTE cciss_read; 97 BYTE cciss_read;
97 BYTE cciss_write; 98 BYTE cciss_write;
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index b2fceb53e809..9125bbeacd4d 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -620,6 +620,7 @@ static int cpqarray_pci_init(ctlr_info_t *c, struct pci_dev *pdev)
620 } 620 }
621 vendor_id = pdev->vendor; 621 vendor_id = pdev->vendor;
622 device_id = pdev->device; 622 device_id = pdev->device;
623 revision = pdev->revision;
623 irq = pdev->irq; 624 irq = pdev->irq;
624 625
625 for(i=0; i<6; i++) 626 for(i=0; i<6; i++)
@@ -632,7 +633,6 @@ static int cpqarray_pci_init(ctlr_info_t *c, struct pci_dev *pdev)
632 } 633 }
633 634
634 pci_read_config_word(pdev, PCI_COMMAND, &command); 635 pci_read_config_word(pdev, PCI_COMMAND, &command);
635 pci_read_config_byte(pdev, PCI_CLASS_REVISION, &revision);
636 pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &cache_line_size); 636 pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &cache_line_size);
637 pci_read_config_byte(pdev, PCI_LATENCY_TIMER, &latency_timer); 637 pci_read_config_byte(pdev, PCI_LATENCY_TIMER, &latency_timer);
638 638
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index c77983ea86c8..3d806820280e 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -76,6 +76,8 @@
76#include <linux/splice.h> 76#include <linux/splice.h>
77#include <linux/sysfs.h> 77#include <linux/sysfs.h>
78#include <linux/miscdevice.h> 78#include <linux/miscdevice.h>
79#include <linux/falloc.h>
80
79#include <asm/uaccess.h> 81#include <asm/uaccess.h>
80 82
81static DEFINE_IDR(loop_index_idr); 83static DEFINE_IDR(loop_index_idr);
@@ -407,6 +409,29 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
407 } 409 }
408 } 410 }
409 411
412 /*
413 * We use punch hole to reclaim the free space used by the
414 * image a.k.a. discard. However we do support discard if
415 * encryption is enabled, because it may give an attacker
416 * useful information.
417 */
418 if (bio->bi_rw & REQ_DISCARD) {
419 struct file *file = lo->lo_backing_file;
420 int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
421
422 if ((!file->f_op->fallocate) ||
423 lo->lo_encrypt_key_size) {
424 ret = -EOPNOTSUPP;
425 goto out;
426 }
427 ret = file->f_op->fallocate(file, mode, pos,
428 bio->bi_size);
429 if (unlikely(ret && ret != -EINVAL &&
430 ret != -EOPNOTSUPP))
431 ret = -EIO;
432 goto out;
433 }
434
410 ret = lo_send(lo, bio, pos); 435 ret = lo_send(lo, bio, pos);
411 436
412 if ((bio->bi_rw & REQ_FUA) && !ret) { 437 if ((bio->bi_rw & REQ_FUA) && !ret) {
@@ -622,7 +647,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
622 goto out_putf; 647 goto out_putf;
623 648
624 fput(old_file); 649 fput(old_file);
625 if (max_part > 0) 650 if (lo->lo_flags & LO_FLAGS_PARTSCAN)
626 ioctl_by_bdev(bdev, BLKRRPART, 0); 651 ioctl_by_bdev(bdev, BLKRRPART, 0);
627 return 0; 652 return 0;
628 653
@@ -699,16 +724,25 @@ static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf)
699 return sprintf(buf, "%s\n", autoclear ? "1" : "0"); 724 return sprintf(buf, "%s\n", autoclear ? "1" : "0");
700} 725}
701 726
727static ssize_t loop_attr_partscan_show(struct loop_device *lo, char *buf)
728{
729 int partscan = (lo->lo_flags & LO_FLAGS_PARTSCAN);
730
731 return sprintf(buf, "%s\n", partscan ? "1" : "0");
732}
733
702LOOP_ATTR_RO(backing_file); 734LOOP_ATTR_RO(backing_file);
703LOOP_ATTR_RO(offset); 735LOOP_ATTR_RO(offset);
704LOOP_ATTR_RO(sizelimit); 736LOOP_ATTR_RO(sizelimit);
705LOOP_ATTR_RO(autoclear); 737LOOP_ATTR_RO(autoclear);
738LOOP_ATTR_RO(partscan);
706 739
707static struct attribute *loop_attrs[] = { 740static struct attribute *loop_attrs[] = {
708 &loop_attr_backing_file.attr, 741 &loop_attr_backing_file.attr,
709 &loop_attr_offset.attr, 742 &loop_attr_offset.attr,
710 &loop_attr_sizelimit.attr, 743 &loop_attr_sizelimit.attr,
711 &loop_attr_autoclear.attr, 744 &loop_attr_autoclear.attr,
745 &loop_attr_partscan.attr,
712 NULL, 746 NULL,
713}; 747};
714 748
@@ -729,6 +763,35 @@ static void loop_sysfs_exit(struct loop_device *lo)
729 &loop_attribute_group); 763 &loop_attribute_group);
730} 764}
731 765
766static void loop_config_discard(struct loop_device *lo)
767{
768 struct file *file = lo->lo_backing_file;
769 struct inode *inode = file->f_mapping->host;
770 struct request_queue *q = lo->lo_queue;
771
772 /*
773 * We use punch hole to reclaim the free space used by the
774 * image a.k.a. discard. However we do support discard if
775 * encryption is enabled, because it may give an attacker
776 * useful information.
777 */
778 if ((!file->f_op->fallocate) ||
779 lo->lo_encrypt_key_size) {
780 q->limits.discard_granularity = 0;
781 q->limits.discard_alignment = 0;
782 q->limits.max_discard_sectors = 0;
783 q->limits.discard_zeroes_data = 0;
784 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
785 return;
786 }
787
788 q->limits.discard_granularity = inode->i_sb->s_blocksize;
789 q->limits.discard_alignment = inode->i_sb->s_blocksize;
790 q->limits.max_discard_sectors = UINT_MAX >> 9;
791 q->limits.discard_zeroes_data = 1;
792 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
793}
794
732static int loop_set_fd(struct loop_device *lo, fmode_t mode, 795static int loop_set_fd(struct loop_device *lo, fmode_t mode,
733 struct block_device *bdev, unsigned int arg) 796 struct block_device *bdev, unsigned int arg)
734{ 797{
@@ -829,7 +892,9 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
829 } 892 }
830 lo->lo_state = Lo_bound; 893 lo->lo_state = Lo_bound;
831 wake_up_process(lo->lo_thread); 894 wake_up_process(lo->lo_thread);
832 if (max_part > 0) 895 if (part_shift)
896 lo->lo_flags |= LO_FLAGS_PARTSCAN;
897 if (lo->lo_flags & LO_FLAGS_PARTSCAN)
833 ioctl_by_bdev(bdev, BLKRRPART, 0); 898 ioctl_by_bdev(bdev, BLKRRPART, 0);
834 return 0; 899 return 0;
835 900
@@ -890,10 +955,11 @@ loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
890 return err; 955 return err;
891} 956}
892 957
893static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev) 958static int loop_clr_fd(struct loop_device *lo)
894{ 959{
895 struct file *filp = lo->lo_backing_file; 960 struct file *filp = lo->lo_backing_file;
896 gfp_t gfp = lo->old_gfp_mask; 961 gfp_t gfp = lo->old_gfp_mask;
962 struct block_device *bdev = lo->lo_device;
897 963
898 if (lo->lo_state != Lo_bound) 964 if (lo->lo_state != Lo_bound)
899 return -ENXIO; 965 return -ENXIO;
@@ -922,7 +988,6 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
922 lo->lo_offset = 0; 988 lo->lo_offset = 0;
923 lo->lo_sizelimit = 0; 989 lo->lo_sizelimit = 0;
924 lo->lo_encrypt_key_size = 0; 990 lo->lo_encrypt_key_size = 0;
925 lo->lo_flags = 0;
926 lo->lo_thread = NULL; 991 lo->lo_thread = NULL;
927 memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE); 992 memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
928 memset(lo->lo_crypt_name, 0, LO_NAME_SIZE); 993 memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
@@ -940,8 +1005,11 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
940 lo->lo_state = Lo_unbound; 1005 lo->lo_state = Lo_unbound;
941 /* This is safe: open() is still holding a reference. */ 1006 /* This is safe: open() is still holding a reference. */
942 module_put(THIS_MODULE); 1007 module_put(THIS_MODULE);
943 if (max_part > 0 && bdev) 1008 if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev)
944 ioctl_by_bdev(bdev, BLKRRPART, 0); 1009 ioctl_by_bdev(bdev, BLKRRPART, 0);
1010 lo->lo_flags = 0;
1011 if (!part_shift)
1012 lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
945 mutex_unlock(&lo->lo_ctl_mutex); 1013 mutex_unlock(&lo->lo_ctl_mutex);
946 /* 1014 /*
947 * Need not hold lo_ctl_mutex to fput backing file. 1015 * Need not hold lo_ctl_mutex to fput backing file.
@@ -995,6 +1063,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
995 if (figure_loop_size(lo)) 1063 if (figure_loop_size(lo))
996 return -EFBIG; 1064 return -EFBIG;
997 } 1065 }
1066 loop_config_discard(lo);
998 1067
999 memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); 1068 memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
1000 memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE); 1069 memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
@@ -1010,6 +1079,13 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
1010 (info->lo_flags & LO_FLAGS_AUTOCLEAR)) 1079 (info->lo_flags & LO_FLAGS_AUTOCLEAR))
1011 lo->lo_flags ^= LO_FLAGS_AUTOCLEAR; 1080 lo->lo_flags ^= LO_FLAGS_AUTOCLEAR;
1012 1081
1082 if ((info->lo_flags & LO_FLAGS_PARTSCAN) &&
1083 !(lo->lo_flags & LO_FLAGS_PARTSCAN)) {
1084 lo->lo_flags |= LO_FLAGS_PARTSCAN;
1085 lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
1086 ioctl_by_bdev(lo->lo_device, BLKRRPART, 0);
1087 }
1088
1013 lo->lo_encrypt_key_size = info->lo_encrypt_key_size; 1089 lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
1014 lo->lo_init[0] = info->lo_init[0]; 1090 lo->lo_init[0] = info->lo_init[0];
1015 lo->lo_init[1] = info->lo_init[1]; 1091 lo->lo_init[1] = info->lo_init[1];
@@ -1203,7 +1279,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
1203 break; 1279 break;
1204 case LOOP_CLR_FD: 1280 case LOOP_CLR_FD:
1205 /* loop_clr_fd would have unlocked lo_ctl_mutex on success */ 1281 /* loop_clr_fd would have unlocked lo_ctl_mutex on success */
1206 err = loop_clr_fd(lo, bdev); 1282 err = loop_clr_fd(lo);
1207 if (!err) 1283 if (!err)
1208 goto out_unlocked; 1284 goto out_unlocked;
1209 break; 1285 break;
@@ -1423,7 +1499,7 @@ static int lo_release(struct gendisk *disk, fmode_t mode)
1423 * In autoclear mode, stop the loop thread 1499 * In autoclear mode, stop the loop thread
1424 * and remove configuration after last close. 1500 * and remove configuration after last close.
1425 */ 1501 */
1426 err = loop_clr_fd(lo, NULL); 1502 err = loop_clr_fd(lo);
1427 if (!err) 1503 if (!err)
1428 goto out_unlocked; 1504 goto out_unlocked;
1429 } else { 1505 } else {
@@ -1545,6 +1621,27 @@ static int loop_add(struct loop_device **l, int i)
1545 if (!disk) 1621 if (!disk)
1546 goto out_free_queue; 1622 goto out_free_queue;
1547 1623
1624 /*
1625 * Disable partition scanning by default. The in-kernel partition
1626 * scanning can be requested individually per-device during its
1627 * setup. Userspace can always add and remove partitions from all
1628 * devices. The needed partition minors are allocated from the
1629 * extended minor space, the main loop device numbers will continue
1630 * to match the loop minors, regardless of the number of partitions
1631 * used.
1632 *
1633 * If max_part is given, partition scanning is globally enabled for
1634 * all loop devices. The minors for the main loop devices will be
1635 * multiples of max_part.
1636 *
1637 * Note: Global-for-all-devices, set-only-at-init, read-only module
1638 * parameteters like 'max_loop' and 'max_part' make things needlessly
1639 * complicated, are too static, inflexible and may surprise
1640 * userspace tools. Parameters like this in general should be avoided.
1641 */
1642 if (!part_shift)
1643 disk->flags |= GENHD_FL_NO_PART_SCAN;
1644 disk->flags |= GENHD_FL_EXT_DEVT;
1548 mutex_init(&lo->lo_ctl_mutex); 1645 mutex_init(&lo->lo_ctl_mutex);
1549 lo->lo_number = i; 1646 lo->lo_number = i;
1550 lo->lo_thread = NULL; 1647 lo->lo_thread = NULL;
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index f533f3375e24..c3f0ee16594d 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -127,8 +127,7 @@ static void sock_shutdown(struct nbd_device *lo, int lock)
127 if (lock) 127 if (lock)
128 mutex_lock(&lo->tx_lock); 128 mutex_lock(&lo->tx_lock);
129 if (lo->sock) { 129 if (lo->sock) {
130 printk(KERN_WARNING "%s: shutting down socket\n", 130 dev_warn(disk_to_dev(lo->disk), "shutting down socket\n");
131 lo->disk->disk_name);
132 kernel_sock_shutdown(lo->sock, SHUT_RDWR); 131 kernel_sock_shutdown(lo->sock, SHUT_RDWR);
133 lo->sock = NULL; 132 lo->sock = NULL;
134 } 133 }
@@ -158,8 +157,9 @@ static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size,
158 sigset_t blocked, oldset; 157 sigset_t blocked, oldset;
159 158
160 if (unlikely(!sock)) { 159 if (unlikely(!sock)) {
161 printk(KERN_ERR "%s: Attempted %s on closed socket in sock_xmit\n", 160 dev_err(disk_to_dev(lo->disk),
162 lo->disk->disk_name, (send ? "send" : "recv")); 161 "Attempted %s on closed socket in sock_xmit\n",
162 (send ? "send" : "recv"));
163 return -EINVAL; 163 return -EINVAL;
164 } 164 }
165 165
@@ -250,8 +250,8 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req)
250 result = sock_xmit(lo, 1, &request, sizeof(request), 250 result = sock_xmit(lo, 1, &request, sizeof(request),
251 (nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0); 251 (nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0);
252 if (result <= 0) { 252 if (result <= 0) {
253 printk(KERN_ERR "%s: Send control failed (result %d)\n", 253 dev_err(disk_to_dev(lo->disk),
254 lo->disk->disk_name, result); 254 "Send control failed (result %d)\n", result);
255 goto error_out; 255 goto error_out;
256 } 256 }
257 257
@@ -270,8 +270,9 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req)
270 lo->disk->disk_name, req, bvec->bv_len); 270 lo->disk->disk_name, req, bvec->bv_len);
271 result = sock_send_bvec(lo, bvec, flags); 271 result = sock_send_bvec(lo, bvec, flags);
272 if (result <= 0) { 272 if (result <= 0) {
273 printk(KERN_ERR "%s: Send data failed (result %d)\n", 273 dev_err(disk_to_dev(lo->disk),
274 lo->disk->disk_name, result); 274 "Send data failed (result %d)\n",
275 result);
275 goto error_out; 276 goto error_out;
276 } 277 }
277 } 278 }
@@ -328,14 +329,13 @@ static struct request *nbd_read_stat(struct nbd_device *lo)
328 reply.magic = 0; 329 reply.magic = 0;
329 result = sock_xmit(lo, 0, &reply, sizeof(reply), MSG_WAITALL); 330 result = sock_xmit(lo, 0, &reply, sizeof(reply), MSG_WAITALL);
330 if (result <= 0) { 331 if (result <= 0) {
331 printk(KERN_ERR "%s: Receive control failed (result %d)\n", 332 dev_err(disk_to_dev(lo->disk),
332 lo->disk->disk_name, result); 333 "Receive control failed (result %d)\n", result);
333 goto harderror; 334 goto harderror;
334 } 335 }
335 336
336 if (ntohl(reply.magic) != NBD_REPLY_MAGIC) { 337 if (ntohl(reply.magic) != NBD_REPLY_MAGIC) {
337 printk(KERN_ERR "%s: Wrong magic (0x%lx)\n", 338 dev_err(disk_to_dev(lo->disk), "Wrong magic (0x%lx)\n",
338 lo->disk->disk_name,
339 (unsigned long)ntohl(reply.magic)); 339 (unsigned long)ntohl(reply.magic));
340 result = -EPROTO; 340 result = -EPROTO;
341 goto harderror; 341 goto harderror;
@@ -347,15 +347,15 @@ static struct request *nbd_read_stat(struct nbd_device *lo)
347 if (result != -ENOENT) 347 if (result != -ENOENT)
348 goto harderror; 348 goto harderror;
349 349
350 printk(KERN_ERR "%s: Unexpected reply (%p)\n", 350 dev_err(disk_to_dev(lo->disk), "Unexpected reply (%p)\n",
351 lo->disk->disk_name, reply.handle); 351 reply.handle);
352 result = -EBADR; 352 result = -EBADR;
353 goto harderror; 353 goto harderror;
354 } 354 }
355 355
356 if (ntohl(reply.error)) { 356 if (ntohl(reply.error)) {
357 printk(KERN_ERR "%s: Other side returned error (%d)\n", 357 dev_err(disk_to_dev(lo->disk), "Other side returned error (%d)\n",
358 lo->disk->disk_name, ntohl(reply.error)); 358 ntohl(reply.error));
359 req->errors++; 359 req->errors++;
360 return req; 360 return req;
361 } 361 }
@@ -369,8 +369,8 @@ static struct request *nbd_read_stat(struct nbd_device *lo)
369 rq_for_each_segment(bvec, req, iter) { 369 rq_for_each_segment(bvec, req, iter) {
370 result = sock_recv_bvec(lo, bvec); 370 result = sock_recv_bvec(lo, bvec);
371 if (result <= 0) { 371 if (result <= 0) {
372 printk(KERN_ERR "%s: Receive data failed (result %d)\n", 372 dev_err(disk_to_dev(lo->disk), "Receive data failed (result %d)\n",
373 lo->disk->disk_name, result); 373 result);
374 req->errors++; 374 req->errors++;
375 return req; 375 return req;
376 } 376 }
@@ -405,10 +405,10 @@ static int nbd_do_it(struct nbd_device *lo)
405 405
406 BUG_ON(lo->magic != LO_MAGIC); 406 BUG_ON(lo->magic != LO_MAGIC);
407 407
408 lo->pid = current->pid; 408 lo->pid = task_pid_nr(current);
409 ret = sysfs_create_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr); 409 ret = device_create_file(disk_to_dev(lo->disk), &pid_attr);
410 if (ret) { 410 if (ret) {
411 printk(KERN_ERR "nbd: sysfs_create_file failed!"); 411 dev_err(disk_to_dev(lo->disk), "device_create_file failed!\n");
412 lo->pid = 0; 412 lo->pid = 0;
413 return ret; 413 return ret;
414 } 414 }
@@ -416,7 +416,7 @@ static int nbd_do_it(struct nbd_device *lo)
416 while ((req = nbd_read_stat(lo)) != NULL) 416 while ((req = nbd_read_stat(lo)) != NULL)
417 nbd_end_request(req); 417 nbd_end_request(req);
418 418
419 sysfs_remove_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr); 419 device_remove_file(disk_to_dev(lo->disk), &pid_attr);
420 lo->pid = 0; 420 lo->pid = 0;
421 return 0; 421 return 0;
422} 422}
@@ -457,8 +457,8 @@ static void nbd_handle_req(struct nbd_device *lo, struct request *req)
457 if (rq_data_dir(req) == WRITE) { 457 if (rq_data_dir(req) == WRITE) {
458 nbd_cmd(req) = NBD_CMD_WRITE; 458 nbd_cmd(req) = NBD_CMD_WRITE;
459 if (lo->flags & NBD_READ_ONLY) { 459 if (lo->flags & NBD_READ_ONLY) {
460 printk(KERN_ERR "%s: Write on read-only\n", 460 dev_err(disk_to_dev(lo->disk),
461 lo->disk->disk_name); 461 "Write on read-only\n");
462 goto error_out; 462 goto error_out;
463 } 463 }
464 } 464 }
@@ -468,16 +468,15 @@ static void nbd_handle_req(struct nbd_device *lo, struct request *req)
468 mutex_lock(&lo->tx_lock); 468 mutex_lock(&lo->tx_lock);
469 if (unlikely(!lo->sock)) { 469 if (unlikely(!lo->sock)) {
470 mutex_unlock(&lo->tx_lock); 470 mutex_unlock(&lo->tx_lock);
471 printk(KERN_ERR "%s: Attempted send on closed socket\n", 471 dev_err(disk_to_dev(lo->disk),
472 lo->disk->disk_name); 472 "Attempted send on closed socket\n");
473 goto error_out; 473 goto error_out;
474 } 474 }
475 475
476 lo->active_req = req; 476 lo->active_req = req;
477 477
478 if (nbd_send_req(lo, req) != 0) { 478 if (nbd_send_req(lo, req) != 0) {
479 printk(KERN_ERR "%s: Request send failed\n", 479 dev_err(disk_to_dev(lo->disk), "Request send failed\n");
480 lo->disk->disk_name);
481 req->errors++; 480 req->errors++;
482 nbd_end_request(req); 481 nbd_end_request(req);
483 } else { 482 } else {
@@ -549,8 +548,8 @@ static void do_nbd_request(struct request_queue *q)
549 BUG_ON(lo->magic != LO_MAGIC); 548 BUG_ON(lo->magic != LO_MAGIC);
550 549
551 if (unlikely(!lo->sock)) { 550 if (unlikely(!lo->sock)) {
552 printk(KERN_ERR "%s: Attempted send on closed socket\n", 551 dev_err(disk_to_dev(lo->disk),
553 lo->disk->disk_name); 552 "Attempted send on closed socket\n");
554 req->errors++; 553 req->errors++;
555 nbd_end_request(req); 554 nbd_end_request(req);
556 spin_lock_irq(q->queue_lock); 555 spin_lock_irq(q->queue_lock);
@@ -576,7 +575,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
576 case NBD_DISCONNECT: { 575 case NBD_DISCONNECT: {
577 struct request sreq; 576 struct request sreq;
578 577
579 printk(KERN_INFO "%s: NBD_DISCONNECT\n", lo->disk->disk_name); 578 dev_info(disk_to_dev(lo->disk), "NBD_DISCONNECT\n");
580 579
581 blk_rq_init(NULL, &sreq); 580 blk_rq_init(NULL, &sreq);
582 sreq.cmd_type = REQ_TYPE_SPECIAL; 581 sreq.cmd_type = REQ_TYPE_SPECIAL;
@@ -674,7 +673,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
674 file = lo->file; 673 file = lo->file;
675 lo->file = NULL; 674 lo->file = NULL;
676 nbd_clear_que(lo); 675 nbd_clear_que(lo);
677 printk(KERN_WARNING "%s: queue cleared\n", lo->disk->disk_name); 676 dev_warn(disk_to_dev(lo->disk), "queue cleared\n");
678 if (file) 677 if (file)
679 fput(file); 678 fput(file);
680 lo->bytesize = 0; 679 lo->bytesize = 0;
@@ -694,8 +693,8 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
694 return 0; 693 return 0;
695 694
696 case NBD_PRINT_DEBUG: 695 case NBD_PRINT_DEBUG:
697 printk(KERN_INFO "%s: next = %p, prev = %p, head = %p\n", 696 dev_info(disk_to_dev(lo->disk),
698 bdev->bd_disk->disk_name, 697 "next = %p, prev = %p, head = %p\n",
699 lo->queue_head.next, lo->queue_head.prev, 698 lo->queue_head.next, lo->queue_head.prev,
700 &lo->queue_head); 699 &lo->queue_head);
701 return 0; 700 return 0;
@@ -745,7 +744,7 @@ static int __init nbd_init(void)
745 BUILD_BUG_ON(sizeof(struct nbd_request) != 28); 744 BUILD_BUG_ON(sizeof(struct nbd_request) != 28);
746 745
747 if (max_part < 0) { 746 if (max_part < 0) {
748 printk(KERN_CRIT "nbd: max_part must be >= 0\n"); 747 printk(KERN_ERR "nbd: max_part must be >= 0\n");
749 return -EINVAL; 748 return -EINVAL;
750 } 749 }
751 750
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 1540792b1e54..15ec4db194d1 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -39,6 +39,9 @@
39#include <linux/list.h> 39#include <linux/list.h>
40#include <linux/delay.h> 40#include <linux/delay.h>
41#include <linux/freezer.h> 41#include <linux/freezer.h>
42#include <linux/loop.h>
43#include <linux/falloc.h>
44#include <linux/fs.h>
42 45
43#include <xen/events.h> 46#include <xen/events.h>
44#include <xen/page.h> 47#include <xen/page.h>
@@ -258,13 +261,16 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
258 261
259static void print_stats(struct xen_blkif *blkif) 262static void print_stats(struct xen_blkif *blkif)
260{ 263{
261 pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d\n", 264 pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d"
265 " | ds %4d\n",
262 current->comm, blkif->st_oo_req, 266 current->comm, blkif->st_oo_req,
263 blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req); 267 blkif->st_rd_req, blkif->st_wr_req,
268 blkif->st_f_req, blkif->st_ds_req);
264 blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); 269 blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
265 blkif->st_rd_req = 0; 270 blkif->st_rd_req = 0;
266 blkif->st_wr_req = 0; 271 blkif->st_wr_req = 0;
267 blkif->st_oo_req = 0; 272 blkif->st_oo_req = 0;
273 blkif->st_ds_req = 0;
268} 274}
269 275
270int xen_blkif_schedule(void *arg) 276int xen_blkif_schedule(void *arg)
@@ -410,6 +416,59 @@ static int xen_blkbk_map(struct blkif_request *req,
410 return ret; 416 return ret;
411} 417}
412 418
419static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req)
420{
421 int err = 0;
422 int status = BLKIF_RSP_OKAY;
423 struct block_device *bdev = blkif->vbd.bdev;
424
425 if (blkif->blk_backend_type == BLKIF_BACKEND_PHY)
426 /* just forward the discard request */
427 err = blkdev_issue_discard(bdev,
428 req->u.discard.sector_number,
429 req->u.discard.nr_sectors,
430 GFP_KERNEL, 0);
431 else if (blkif->blk_backend_type == BLKIF_BACKEND_FILE) {
432 /* punch a hole in the backing file */
433 struct loop_device *lo = bdev->bd_disk->private_data;
434 struct file *file = lo->lo_backing_file;
435
436 if (file->f_op->fallocate)
437 err = file->f_op->fallocate(file,
438 FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
439 req->u.discard.sector_number << 9,
440 req->u.discard.nr_sectors << 9);
441 else
442 err = -EOPNOTSUPP;
443 } else
444 err = -EOPNOTSUPP;
445
446 if (err == -EOPNOTSUPP) {
447 pr_debug(DRV_PFX "discard op failed, not supported\n");
448 status = BLKIF_RSP_EOPNOTSUPP;
449 } else if (err)
450 status = BLKIF_RSP_ERROR;
451
452 make_response(blkif, req->id, req->operation, status);
453}
454
455static void xen_blk_drain_io(struct xen_blkif *blkif)
456{
457 atomic_set(&blkif->drain, 1);
458 do {
459 /* The initial value is one, and one refcnt taken at the
460 * start of the xen_blkif_schedule thread. */
461 if (atomic_read(&blkif->refcnt) <= 2)
462 break;
463 wait_for_completion_interruptible_timeout(
464 &blkif->drain_complete, HZ);
465
466 if (!atomic_read(&blkif->drain))
467 break;
468 } while (!kthread_should_stop());
469 atomic_set(&blkif->drain, 0);
470}
471
413/* 472/*
414 * Completion callback on the bio's. Called as bh->b_end_io() 473 * Completion callback on the bio's. Called as bh->b_end_io()
415 */ 474 */
@@ -422,6 +481,11 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
422 pr_debug(DRV_PFX "flush diskcache op failed, not supported\n"); 481 pr_debug(DRV_PFX "flush diskcache op failed, not supported\n");
423 xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0); 482 xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0);
424 pending_req->status = BLKIF_RSP_EOPNOTSUPP; 483 pending_req->status = BLKIF_RSP_EOPNOTSUPP;
484 } else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
485 (error == -EOPNOTSUPP)) {
486 pr_debug(DRV_PFX "write barrier op failed, not supported\n");
487 xen_blkbk_barrier(XBT_NIL, pending_req->blkif->be, 0);
488 pending_req->status = BLKIF_RSP_EOPNOTSUPP;
425 } else if (error) { 489 } else if (error) {
426 pr_debug(DRV_PFX "Buffer not up-to-date at end of operation," 490 pr_debug(DRV_PFX "Buffer not up-to-date at end of operation,"
427 " error=%d\n", error); 491 " error=%d\n", error);
@@ -438,6 +502,10 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
438 make_response(pending_req->blkif, pending_req->id, 502 make_response(pending_req->blkif, pending_req->id,
439 pending_req->operation, pending_req->status); 503 pending_req->operation, pending_req->status);
440 xen_blkif_put(pending_req->blkif); 504 xen_blkif_put(pending_req->blkif);
505 if (atomic_read(&pending_req->blkif->refcnt) <= 2) {
506 if (atomic_read(&pending_req->blkif->drain))
507 complete(&pending_req->blkif->drain_complete);
508 }
441 free_req(pending_req); 509 free_req(pending_req);
442 } 510 }
443} 511}
@@ -532,7 +600,6 @@ do_block_io_op(struct xen_blkif *blkif)
532 600
533 return more_to_do; 601 return more_to_do;
534} 602}
535
536/* 603/*
537 * Transmutation of the 'struct blkif_request' to a proper 'struct bio' 604 * Transmutation of the 'struct blkif_request' to a proper 'struct bio'
538 * and call the 'submit_bio' to pass it to the underlying storage. 605 * and call the 'submit_bio' to pass it to the underlying storage.
@@ -549,6 +616,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
549 int i, nbio = 0; 616 int i, nbio = 0;
550 int operation; 617 int operation;
551 struct blk_plug plug; 618 struct blk_plug plug;
619 bool drain = false;
552 620
553 switch (req->operation) { 621 switch (req->operation) {
554 case BLKIF_OP_READ: 622 case BLKIF_OP_READ:
@@ -559,11 +627,16 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
559 blkif->st_wr_req++; 627 blkif->st_wr_req++;
560 operation = WRITE_ODIRECT; 628 operation = WRITE_ODIRECT;
561 break; 629 break;
630 case BLKIF_OP_WRITE_BARRIER:
631 drain = true;
562 case BLKIF_OP_FLUSH_DISKCACHE: 632 case BLKIF_OP_FLUSH_DISKCACHE:
563 blkif->st_f_req++; 633 blkif->st_f_req++;
564 operation = WRITE_FLUSH; 634 operation = WRITE_FLUSH;
565 break; 635 break;
566 case BLKIF_OP_WRITE_BARRIER: 636 case BLKIF_OP_DISCARD:
637 blkif->st_ds_req++;
638 operation = REQ_DISCARD;
639 break;
567 default: 640 default:
568 operation = 0; /* make gcc happy */ 641 operation = 0; /* make gcc happy */
569 goto fail_response; 642 goto fail_response;
@@ -572,7 +645,8 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
572 645
573 /* Check that the number of segments is sane. */ 646 /* Check that the number of segments is sane. */
574 nseg = req->nr_segments; 647 nseg = req->nr_segments;
575 if (unlikely(nseg == 0 && operation != WRITE_FLUSH) || 648 if (unlikely(nseg == 0 && operation != WRITE_FLUSH &&
649 operation != REQ_DISCARD) ||
576 unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { 650 unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
577 pr_debug(DRV_PFX "Bad number of segments in request (%d)\n", 651 pr_debug(DRV_PFX "Bad number of segments in request (%d)\n",
578 nseg); 652 nseg);
@@ -621,16 +695,25 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
621 } 695 }
622 } 696 }
623 697
698 /* Wait on all outstanding I/O's and once that has been completed
699 * issue the WRITE_FLUSH.
700 */
701 if (drain)
702 xen_blk_drain_io(pending_req->blkif);
703
624 /* 704 /*
625 * If we have failed at this point, we need to undo the M2P override, 705 * If we have failed at this point, we need to undo the M2P override,
626 * set gnttab_set_unmap_op on all of the grant references and perform 706 * set gnttab_set_unmap_op on all of the grant references and perform
627 * the hypercall to unmap the grants - that is all done in 707 * the hypercall to unmap the grants - that is all done in
628 * xen_blkbk_unmap. 708 * xen_blkbk_unmap.
629 */ 709 */
630 if (xen_blkbk_map(req, pending_req, seg)) 710 if (operation != REQ_DISCARD && xen_blkbk_map(req, pending_req, seg))
631 goto fail_flush; 711 goto fail_flush;
632 712
633 /* This corresponding xen_blkif_put is done in __end_block_io_op */ 713 /*
714 * This corresponding xen_blkif_put is done in __end_block_io_op, or
715 * below (in "!bio") if we are handling a BLKIF_OP_DISCARD.
716 */
634 xen_blkif_get(blkif); 717 xen_blkif_get(blkif);
635 718
636 for (i = 0; i < nseg; i++) { 719 for (i = 0; i < nseg; i++) {
@@ -654,18 +737,25 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
654 preq.sector_number += seg[i].nsec; 737 preq.sector_number += seg[i].nsec;
655 } 738 }
656 739
657 /* This will be hit if the operation was a flush. */ 740 /* This will be hit if the operation was a flush or discard. */
658 if (!bio) { 741 if (!bio) {
659 BUG_ON(operation != WRITE_FLUSH); 742 BUG_ON(operation != WRITE_FLUSH && operation != REQ_DISCARD);
660 743
661 bio = bio_alloc(GFP_KERNEL, 0); 744 if (operation == WRITE_FLUSH) {
662 if (unlikely(bio == NULL)) 745 bio = bio_alloc(GFP_KERNEL, 0);
663 goto fail_put_bio; 746 if (unlikely(bio == NULL))
747 goto fail_put_bio;
664 748
665 biolist[nbio++] = bio; 749 biolist[nbio++] = bio;
666 bio->bi_bdev = preq.bdev; 750 bio->bi_bdev = preq.bdev;
667 bio->bi_private = pending_req; 751 bio->bi_private = pending_req;
668 bio->bi_end_io = end_block_io_op; 752 bio->bi_end_io = end_block_io_op;
753 } else if (operation == REQ_DISCARD) {
754 xen_blk_discard(blkif, req);
755 xen_blkif_put(blkif);
756 free_req(pending_req);
757 return 0;
758 }
669 } 759 }
670 760
671 /* 761 /*
@@ -685,7 +775,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
685 775
686 if (operation == READ) 776 if (operation == READ)
687 blkif->st_rd_sect += preq.nr_sects; 777 blkif->st_rd_sect += preq.nr_sects;
688 else if (operation == WRITE || operation == WRITE_FLUSH) 778 else if (operation & WRITE)
689 blkif->st_wr_sect += preq.nr_sects; 779 blkif->st_wr_sect += preq.nr_sects;
690 780
691 return 0; 781 return 0;
@@ -765,9 +855,9 @@ static int __init xen_blkif_init(void)
765 855
766 mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; 856 mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
767 857
768 blkbk->pending_reqs = kmalloc(sizeof(blkbk->pending_reqs[0]) * 858 blkbk->pending_reqs = kzalloc(sizeof(blkbk->pending_reqs[0]) *
769 xen_blkif_reqs, GFP_KERNEL); 859 xen_blkif_reqs, GFP_KERNEL);
770 blkbk->pending_grant_handles = kzalloc(sizeof(blkbk->pending_grant_handles[0]) * 860 blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) *
771 mmap_pages, GFP_KERNEL); 861 mmap_pages, GFP_KERNEL);
772 blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) * 862 blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) *
773 mmap_pages, GFP_KERNEL); 863 mmap_pages, GFP_KERNEL);
@@ -790,8 +880,6 @@ static int __init xen_blkif_init(void)
790 if (rc) 880 if (rc)
791 goto failed_init; 881 goto failed_init;
792 882
793 memset(blkbk->pending_reqs, 0, sizeof(blkbk->pending_reqs));
794
795 INIT_LIST_HEAD(&blkbk->pending_free); 883 INIT_LIST_HEAD(&blkbk->pending_free);
796 spin_lock_init(&blkbk->pending_free_lock); 884 spin_lock_init(&blkbk->pending_free_lock);
797 init_waitqueue_head(&blkbk->pending_free_wq); 885 init_waitqueue_head(&blkbk->pending_free_wq);
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index c4bd34063ecc..de09f525d6c1 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -62,13 +62,26 @@ struct blkif_common_response {
62 62
63/* i386 protocol version */ 63/* i386 protocol version */
64#pragma pack(push, 4) 64#pragma pack(push, 4)
65
66struct blkif_x86_32_request_rw {
67 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
68 struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
69};
70
71struct blkif_x86_32_request_discard {
72 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
73 uint64_t nr_sectors;
74};
75
65struct blkif_x86_32_request { 76struct blkif_x86_32_request {
66 uint8_t operation; /* BLKIF_OP_??? */ 77 uint8_t operation; /* BLKIF_OP_??? */
67 uint8_t nr_segments; /* number of segments */ 78 uint8_t nr_segments; /* number of segments */
68 blkif_vdev_t handle; /* only for read/write requests */ 79 blkif_vdev_t handle; /* only for read/write requests */
69 uint64_t id; /* private guest value, echoed in resp */ 80 uint64_t id; /* private guest value, echoed in resp */
70 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ 81 union {
71 struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 82 struct blkif_x86_32_request_rw rw;
83 struct blkif_x86_32_request_discard discard;
84 } u;
72}; 85};
73struct blkif_x86_32_response { 86struct blkif_x86_32_response {
74 uint64_t id; /* copied from request */ 87 uint64_t id; /* copied from request */
@@ -78,13 +91,26 @@ struct blkif_x86_32_response {
78#pragma pack(pop) 91#pragma pack(pop)
79 92
80/* x86_64 protocol version */ 93/* x86_64 protocol version */
94
95struct blkif_x86_64_request_rw {
96 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
97 struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
98};
99
100struct blkif_x86_64_request_discard {
101 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
102 uint64_t nr_sectors;
103};
104
81struct blkif_x86_64_request { 105struct blkif_x86_64_request {
82 uint8_t operation; /* BLKIF_OP_??? */ 106 uint8_t operation; /* BLKIF_OP_??? */
83 uint8_t nr_segments; /* number of segments */ 107 uint8_t nr_segments; /* number of segments */
84 blkif_vdev_t handle; /* only for read/write requests */ 108 blkif_vdev_t handle; /* only for read/write requests */
85 uint64_t __attribute__((__aligned__(8))) id; 109 uint64_t __attribute__((__aligned__(8))) id;
86 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ 110 union {
87 struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 111 struct blkif_x86_64_request_rw rw;
112 struct blkif_x86_64_request_discard discard;
113 } u;
88}; 114};
89struct blkif_x86_64_response { 115struct blkif_x86_64_response {
90 uint64_t __attribute__((__aligned__(8))) id; 116 uint64_t __attribute__((__aligned__(8))) id;
@@ -112,6 +138,11 @@ enum blkif_protocol {
112 BLKIF_PROTOCOL_X86_64 = 3, 138 BLKIF_PROTOCOL_X86_64 = 3,
113}; 139};
114 140
141enum blkif_backend_type {
142 BLKIF_BACKEND_PHY = 1,
143 BLKIF_BACKEND_FILE = 2,
144};
145
115struct xen_vbd { 146struct xen_vbd {
116 /* What the domain refers to this vbd as. */ 147 /* What the domain refers to this vbd as. */
117 blkif_vdev_t handle; 148 blkif_vdev_t handle;
@@ -137,6 +168,7 @@ struct xen_blkif {
137 unsigned int irq; 168 unsigned int irq;
138 /* Comms information. */ 169 /* Comms information. */
139 enum blkif_protocol blk_protocol; 170 enum blkif_protocol blk_protocol;
171 enum blkif_backend_type blk_backend_type;
140 union blkif_back_rings blk_rings; 172 union blkif_back_rings blk_rings;
141 struct vm_struct *blk_ring_area; 173 struct vm_struct *blk_ring_area;
142 /* The VBD attached to this interface. */ 174 /* The VBD attached to this interface. */
@@ -148,6 +180,9 @@ struct xen_blkif {
148 atomic_t refcnt; 180 atomic_t refcnt;
149 181
150 wait_queue_head_t wq; 182 wait_queue_head_t wq;
183 /* for barrier (drain) requests */
184 struct completion drain_complete;
185 atomic_t drain;
151 /* One thread per one blkif. */ 186 /* One thread per one blkif. */
152 struct task_struct *xenblkd; 187 struct task_struct *xenblkd;
153 unsigned int waiting_reqs; 188 unsigned int waiting_reqs;
@@ -158,6 +193,7 @@ struct xen_blkif {
158 int st_wr_req; 193 int st_wr_req;
159 int st_oo_req; 194 int st_oo_req;
160 int st_f_req; 195 int st_f_req;
196 int st_ds_req;
161 int st_rd_sect; 197 int st_rd_sect;
162 int st_wr_sect; 198 int st_wr_sect;
163 199
@@ -181,7 +217,7 @@ struct xen_blkif {
181 217
182struct phys_req { 218struct phys_req {
183 unsigned short dev; 219 unsigned short dev;
184 unsigned short nr_sects; 220 blkif_sector_t nr_sects;
185 struct block_device *bdev; 221 struct block_device *bdev;
186 blkif_sector_t sector_number; 222 blkif_sector_t sector_number;
187}; 223};
@@ -195,6 +231,8 @@ int xen_blkif_schedule(void *arg);
195int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, 231int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
196 struct backend_info *be, int state); 232 struct backend_info *be, int state);
197 233
234int xen_blkbk_barrier(struct xenbus_transaction xbt,
235 struct backend_info *be, int state);
198struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be); 236struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be);
199 237
200static inline void blkif_get_x86_32_req(struct blkif_request *dst, 238static inline void blkif_get_x86_32_req(struct blkif_request *dst,
@@ -205,12 +243,25 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst,
205 dst->nr_segments = src->nr_segments; 243 dst->nr_segments = src->nr_segments;
206 dst->handle = src->handle; 244 dst->handle = src->handle;
207 dst->id = src->id; 245 dst->id = src->id;
208 dst->u.rw.sector_number = src->sector_number; 246 switch (src->operation) {
209 barrier(); 247 case BLKIF_OP_READ:
210 if (n > dst->nr_segments) 248 case BLKIF_OP_WRITE:
211 n = dst->nr_segments; 249 case BLKIF_OP_WRITE_BARRIER:
212 for (i = 0; i < n; i++) 250 case BLKIF_OP_FLUSH_DISKCACHE:
213 dst->u.rw.seg[i] = src->seg[i]; 251 dst->u.rw.sector_number = src->u.rw.sector_number;
252 barrier();
253 if (n > dst->nr_segments)
254 n = dst->nr_segments;
255 for (i = 0; i < n; i++)
256 dst->u.rw.seg[i] = src->u.rw.seg[i];
257 break;
258 case BLKIF_OP_DISCARD:
259 dst->u.discard.sector_number = src->u.discard.sector_number;
260 dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
261 break;
262 default:
263 break;
264 }
214} 265}
215 266
216static inline void blkif_get_x86_64_req(struct blkif_request *dst, 267static inline void blkif_get_x86_64_req(struct blkif_request *dst,
@@ -221,12 +272,25 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst,
221 dst->nr_segments = src->nr_segments; 272 dst->nr_segments = src->nr_segments;
222 dst->handle = src->handle; 273 dst->handle = src->handle;
223 dst->id = src->id; 274 dst->id = src->id;
224 dst->u.rw.sector_number = src->sector_number; 275 switch (src->operation) {
225 barrier(); 276 case BLKIF_OP_READ:
226 if (n > dst->nr_segments) 277 case BLKIF_OP_WRITE:
227 n = dst->nr_segments; 278 case BLKIF_OP_WRITE_BARRIER:
228 for (i = 0; i < n; i++) 279 case BLKIF_OP_FLUSH_DISKCACHE:
229 dst->u.rw.seg[i] = src->seg[i]; 280 dst->u.rw.sector_number = src->u.rw.sector_number;
281 barrier();
282 if (n > dst->nr_segments)
283 n = dst->nr_segments;
284 for (i = 0; i < n; i++)
285 dst->u.rw.seg[i] = src->u.rw.seg[i];
286 break;
287 case BLKIF_OP_DISCARD:
288 dst->u.discard.sector_number = src->u.discard.sector_number;
289 dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
290 break;
291 default:
292 break;
293 }
230} 294}
231 295
232#endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */ 296#endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 5fd2010f7d2b..2c008afe63d9 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -114,6 +114,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
114 spin_lock_init(&blkif->blk_ring_lock); 114 spin_lock_init(&blkif->blk_ring_lock);
115 atomic_set(&blkif->refcnt, 1); 115 atomic_set(&blkif->refcnt, 1);
116 init_waitqueue_head(&blkif->wq); 116 init_waitqueue_head(&blkif->wq);
117 init_completion(&blkif->drain_complete);
118 atomic_set(&blkif->drain, 0);
117 blkif->st_print = jiffies; 119 blkif->st_print = jiffies;
118 init_waitqueue_head(&blkif->waiting_to_free); 120 init_waitqueue_head(&blkif->waiting_to_free);
119 121
@@ -272,6 +274,7 @@ VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req);
272VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req); 274VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req);
273VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req); 275VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req);
274VBD_SHOW(f_req, "%d\n", be->blkif->st_f_req); 276VBD_SHOW(f_req, "%d\n", be->blkif->st_f_req);
277VBD_SHOW(ds_req, "%d\n", be->blkif->st_ds_req);
275VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect); 278VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
276VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect); 279VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
277 280
@@ -280,6 +283,7 @@ static struct attribute *xen_vbdstat_attrs[] = {
280 &dev_attr_rd_req.attr, 283 &dev_attr_rd_req.attr,
281 &dev_attr_wr_req.attr, 284 &dev_attr_wr_req.attr,
282 &dev_attr_f_req.attr, 285 &dev_attr_f_req.attr,
286 &dev_attr_ds_req.attr,
283 &dev_attr_rd_sect.attr, 287 &dev_attr_rd_sect.attr,
284 &dev_attr_wr_sect.attr, 288 &dev_attr_wr_sect.attr,
285 NULL 289 NULL
@@ -419,6 +423,73 @@ int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
419 return err; 423 return err;
420} 424}
421 425
426int xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be)
427{
428 struct xenbus_device *dev = be->dev;
429 struct xen_blkif *blkif = be->blkif;
430 char *type;
431 int err;
432 int state = 0;
433
434 type = xenbus_read(XBT_NIL, dev->nodename, "type", NULL);
435 if (!IS_ERR(type)) {
436 if (strncmp(type, "file", 4) == 0) {
437 state = 1;
438 blkif->blk_backend_type = BLKIF_BACKEND_FILE;
439 }
440 if (strncmp(type, "phy", 3) == 0) {
441 struct block_device *bdev = be->blkif->vbd.bdev;
442 struct request_queue *q = bdev_get_queue(bdev);
443 if (blk_queue_discard(q)) {
444 err = xenbus_printf(xbt, dev->nodename,
445 "discard-granularity", "%u",
446 q->limits.discard_granularity);
447 if (err) {
448 xenbus_dev_fatal(dev, err,
449 "writing discard-granularity");
450 goto kfree;
451 }
452 err = xenbus_printf(xbt, dev->nodename,
453 "discard-alignment", "%u",
454 q->limits.discard_alignment);
455 if (err) {
456 xenbus_dev_fatal(dev, err,
457 "writing discard-alignment");
458 goto kfree;
459 }
460 state = 1;
461 blkif->blk_backend_type = BLKIF_BACKEND_PHY;
462 }
463 }
464 } else {
465 err = PTR_ERR(type);
466 xenbus_dev_fatal(dev, err, "reading type");
467 goto out;
468 }
469
470 err = xenbus_printf(xbt, dev->nodename, "feature-discard",
471 "%d", state);
472 if (err)
473 xenbus_dev_fatal(dev, err, "writing feature-discard");
474kfree:
475 kfree(type);
476out:
477 return err;
478}
479int xen_blkbk_barrier(struct xenbus_transaction xbt,
480 struct backend_info *be, int state)
481{
482 struct xenbus_device *dev = be->dev;
483 int err;
484
485 err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
486 "%d", state);
487 if (err)
488 xenbus_dev_fatal(dev, err, "writing feature-barrier");
489
490 return err;
491}
492
422/* 493/*
423 * Entry point to this code when a new device is created. Allocate the basic 494 * Entry point to this code when a new device is created. Allocate the basic
424 * structures, and watch the store waiting for the hotplug scripts to tell us 495 * structures, and watch the store waiting for the hotplug scripts to tell us
@@ -650,6 +721,11 @@ again:
650 if (err) 721 if (err)
651 goto abort; 722 goto abort;
652 723
724 err = xen_blkbk_discard(xbt, be);
725
726 /* If we can't advertise it is OK. */
727 err = xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);
728
653 err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", 729 err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
654 (unsigned long long)vbd_sz(&be->blkif->vbd)); 730 (unsigned long long)vbd_sz(&be->blkif->vbd));
655 if (err) { 731 if (err) {
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 9ea8c2576c70..7b2ec5908413 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -98,6 +98,9 @@ struct blkfront_info
98 unsigned long shadow_free; 98 unsigned long shadow_free;
99 unsigned int feature_flush; 99 unsigned int feature_flush;
100 unsigned int flush_op; 100 unsigned int flush_op;
101 unsigned int feature_discard;
102 unsigned int discard_granularity;
103 unsigned int discard_alignment;
101 int is_ready; 104 int is_ready;
102}; 105};
103 106
@@ -302,29 +305,36 @@ static int blkif_queue_request(struct request *req)
302 ring_req->operation = info->flush_op; 305 ring_req->operation = info->flush_op;
303 } 306 }
304 307
305 ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); 308 if (unlikely(req->cmd_flags & REQ_DISCARD)) {
306 BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); 309 /* id, sector_number and handle are set above. */
310 ring_req->operation = BLKIF_OP_DISCARD;
311 ring_req->nr_segments = 0;
312 ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
313 } else {
314 ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
315 BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
307 316
308 for_each_sg(info->sg, sg, ring_req->nr_segments, i) { 317 for_each_sg(info->sg, sg, ring_req->nr_segments, i) {
309 buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg))); 318 buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
310 fsect = sg->offset >> 9; 319 fsect = sg->offset >> 9;
311 lsect = fsect + (sg->length >> 9) - 1; 320 lsect = fsect + (sg->length >> 9) - 1;
312 /* install a grant reference. */ 321 /* install a grant reference. */
313 ref = gnttab_claim_grant_reference(&gref_head); 322 ref = gnttab_claim_grant_reference(&gref_head);
314 BUG_ON(ref == -ENOSPC); 323 BUG_ON(ref == -ENOSPC);
315 324
316 gnttab_grant_foreign_access_ref( 325 gnttab_grant_foreign_access_ref(
317 ref, 326 ref,
318 info->xbdev->otherend_id, 327 info->xbdev->otherend_id,
319 buffer_mfn, 328 buffer_mfn,
320 rq_data_dir(req) ); 329 rq_data_dir(req));
321 330
322 info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn); 331 info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
323 ring_req->u.rw.seg[i] = 332 ring_req->u.rw.seg[i] =
324 (struct blkif_request_segment) { 333 (struct blkif_request_segment) {
325 .gref = ref, 334 .gref = ref,
326 .first_sect = fsect, 335 .first_sect = fsect,
327 .last_sect = lsect }; 336 .last_sect = lsect };
337 }
328 } 338 }
329 339
330 info->ring.req_prod_pvt++; 340 info->ring.req_prod_pvt++;
@@ -370,7 +380,9 @@ static void do_blkif_request(struct request_queue *rq)
370 380
371 blk_start_request(req); 381 blk_start_request(req);
372 382
373 if (req->cmd_type != REQ_TYPE_FS) { 383 if ((req->cmd_type != REQ_TYPE_FS) ||
384 ((req->cmd_flags & (REQ_FLUSH | REQ_FUA)) &&
385 !info->flush_op)) {
374 __blk_end_request_all(req, -EIO); 386 __blk_end_request_all(req, -EIO);
375 continue; 387 continue;
376 } 388 }
@@ -399,6 +411,7 @@ wait:
399static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) 411static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
400{ 412{
401 struct request_queue *rq; 413 struct request_queue *rq;
414 struct blkfront_info *info = gd->private_data;
402 415
403 rq = blk_init_queue(do_blkif_request, &blkif_io_lock); 416 rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
404 if (rq == NULL) 417 if (rq == NULL)
@@ -406,6 +419,13 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
406 419
407 queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); 420 queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
408 421
422 if (info->feature_discard) {
423 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq);
424 blk_queue_max_discard_sectors(rq, get_capacity(gd));
425 rq->limits.discard_granularity = info->discard_granularity;
426 rq->limits.discard_alignment = info->discard_alignment;
427 }
428
409 /* Hard sector size and max sectors impersonate the equiv. hardware. */ 429 /* Hard sector size and max sectors impersonate the equiv. hardware. */
410 blk_queue_logical_block_size(rq, sector_size); 430 blk_queue_logical_block_size(rq, sector_size);
411 blk_queue_max_hw_sectors(rq, 512); 431 blk_queue_max_hw_sectors(rq, 512);
@@ -722,6 +742,17 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
722 742
723 error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; 743 error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
724 switch (bret->operation) { 744 switch (bret->operation) {
745 case BLKIF_OP_DISCARD:
746 if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
747 struct request_queue *rq = info->rq;
748 printk(KERN_WARNING "blkfront: %s: discard op failed\n",
749 info->gd->disk_name);
750 error = -EOPNOTSUPP;
751 info->feature_discard = 0;
752 queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
753 }
754 __blk_end_request_all(req, error);
755 break;
725 case BLKIF_OP_FLUSH_DISKCACHE: 756 case BLKIF_OP_FLUSH_DISKCACHE:
726 case BLKIF_OP_WRITE_BARRIER: 757 case BLKIF_OP_WRITE_BARRIER:
727 if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { 758 if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
@@ -1098,6 +1129,33 @@ blkfront_closing(struct blkfront_info *info)
1098 bdput(bdev); 1129 bdput(bdev);
1099} 1130}
1100 1131
1132static void blkfront_setup_discard(struct blkfront_info *info)
1133{
1134 int err;
1135 char *type;
1136 unsigned int discard_granularity;
1137 unsigned int discard_alignment;
1138
1139 type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL);
1140 if (IS_ERR(type))
1141 return;
1142
1143 if (strncmp(type, "phy", 3) == 0) {
1144 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1145 "discard-granularity", "%u", &discard_granularity,
1146 "discard-alignment", "%u", &discard_alignment,
1147 NULL);
1148 if (!err) {
1149 info->feature_discard = 1;
1150 info->discard_granularity = discard_granularity;
1151 info->discard_alignment = discard_alignment;
1152 }
1153 } else if (strncmp(type, "file", 4) == 0)
1154 info->feature_discard = 1;
1155
1156 kfree(type);
1157}
1158
1101/* 1159/*
1102 * Invoked when the backend is finally 'ready' (and has told produced 1160 * Invoked when the backend is finally 'ready' (and has told produced
1103 * the details about the physical device - #sectors, size, etc). 1161 * the details about the physical device - #sectors, size, etc).
@@ -1108,7 +1166,7 @@ static void blkfront_connect(struct blkfront_info *info)
1108 unsigned long sector_size; 1166 unsigned long sector_size;
1109 unsigned int binfo; 1167 unsigned int binfo;
1110 int err; 1168 int err;
1111 int barrier, flush; 1169 int barrier, flush, discard;
1112 1170
1113 switch (info->connected) { 1171 switch (info->connected) {
1114 case BLKIF_STATE_CONNECTED: 1172 case BLKIF_STATE_CONNECTED:
@@ -1178,7 +1236,14 @@ static void blkfront_connect(struct blkfront_info *info)
1178 info->feature_flush = REQ_FLUSH; 1236 info->feature_flush = REQ_FLUSH;
1179 info->flush_op = BLKIF_OP_FLUSH_DISKCACHE; 1237 info->flush_op = BLKIF_OP_FLUSH_DISKCACHE;
1180 } 1238 }
1181 1239
1240 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1241 "feature-discard", "%d", &discard,
1242 NULL);
1243
1244 if (!err && discard)
1245 blkfront_setup_discard(info);
1246
1182 err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); 1247 err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
1183 if (err) { 1248 if (err) {
1184 xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", 1249 xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
@@ -1385,6 +1450,8 @@ static struct xenbus_driver blkfront = {
1385 1450
1386static int __init xlblk_init(void) 1451static int __init xlblk_init(void)
1387{ 1452{
1453 int ret;
1454
1388 if (!xen_domain()) 1455 if (!xen_domain())
1389 return -ENODEV; 1456 return -ENODEV;
1390 1457
@@ -1394,7 +1461,13 @@ static int __init xlblk_init(void)
1394 return -ENODEV; 1461 return -ENODEV;
1395 } 1462 }
1396 1463
1397 return xenbus_register_frontend(&blkfront); 1464 ret = xenbus_register_frontend(&blkfront);
1465 if (ret) {
1466 unregister_blkdev(XENVBD_MAJOR, DEV_NAME);
1467 return ret;
1468 }
1469
1470 return 0;
1398} 1471}
1399module_init(xlblk_init); 1472module_init(xlblk_init);
1400 1473
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 9825ecf34957..bbdc9f960a66 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -3300,6 +3300,13 @@ static int hpsa_controller_hard_reset(struct pci_dev *pdev,
3300 pmcsr &= ~PCI_PM_CTRL_STATE_MASK; 3300 pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
3301 pmcsr |= PCI_D0; 3301 pmcsr |= PCI_D0;
3302 pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr); 3302 pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
3303
3304 /*
3305 * The P600 requires a small delay when changing states.
3306 * Otherwise we may think the board did not reset and we bail.
3307 * This for kdump only and is particular to the P600.
3308 */
3309 msleep(500);
3303 } 3310 }
3304 return 0; 3311 return 0;
3305} 3312}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 1c44b8d54504..b07f1da1de4e 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -971,7 +971,7 @@ static void flush_disk(struct block_device *bdev, bool kill_dirty)
971 971
972 if (!bdev->bd_disk) 972 if (!bdev->bd_disk)
973 return; 973 return;
974 if (disk_partitionable(bdev->bd_disk)) 974 if (disk_part_scan_enabled(bdev->bd_disk))
975 bdev->bd_invalidated = 1; 975 bdev->bd_invalidated = 1;
976} 976}
977 977
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 6957350e122f..9de31bc98c88 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -131,6 +131,7 @@ struct hd_struct {
131#define GENHD_FL_EXT_DEVT 64 /* allow extended devt */ 131#define GENHD_FL_EXT_DEVT 64 /* allow extended devt */
132#define GENHD_FL_NATIVE_CAPACITY 128 132#define GENHD_FL_NATIVE_CAPACITY 128
133#define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE 256 133#define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE 256
134#define GENHD_FL_NO_PART_SCAN 512
134 135
135enum { 136enum {
136 DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */ 137 DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */
@@ -238,9 +239,10 @@ static inline int disk_max_parts(struct gendisk *disk)
238 return disk->minors; 239 return disk->minors;
239} 240}
240 241
241static inline bool disk_partitionable(struct gendisk *disk) 242static inline bool disk_part_scan_enabled(struct gendisk *disk)
242{ 243{
243 return disk_max_parts(disk) > 1; 244 return disk_max_parts(disk) > 1 &&
245 !(disk->flags & GENHD_FL_NO_PART_SCAN);
244} 246}
245 247
246static inline dev_t disk_devt(struct gendisk *disk) 248static inline dev_t disk_devt(struct gendisk *disk)
diff --git a/include/linux/loop.h b/include/linux/loop.h
index a06880689115..11a41a8f08eb 100644
--- a/include/linux/loop.h
+++ b/include/linux/loop.h
@@ -74,6 +74,7 @@ struct loop_device {
74enum { 74enum {
75 LO_FLAGS_READ_ONLY = 1, 75 LO_FLAGS_READ_ONLY = 1,
76 LO_FLAGS_AUTOCLEAR = 4, 76 LO_FLAGS_AUTOCLEAR = 4,
77 LO_FLAGS_PARTSCAN = 8,
77}; 78};
78 79
79#include <asm/posix_types.h> /* for __kernel_old_dev_t */ 80#include <asm/posix_types.h> /* for __kernel_old_dev_t */
diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h
index 3d5d6db864fe..9324488f23f0 100644
--- a/include/xen/interface/io/blkif.h
+++ b/include/xen/interface/io/blkif.h
@@ -57,6 +57,36 @@ typedef uint64_t blkif_sector_t;
57 * "feature-flush-cache" node! 57 * "feature-flush-cache" node!
58 */ 58 */
59#define BLKIF_OP_FLUSH_DISKCACHE 3 59#define BLKIF_OP_FLUSH_DISKCACHE 3
60
61/*
62 * Recognised only if "feature-discard" is present in backend xenbus info.
63 * The "feature-discard" node contains a boolean indicating whether trim
64 * (ATA) or unmap (SCSI) - conviently called discard requests are likely
65 * to succeed or fail. Either way, a discard request
66 * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by
67 * the underlying block-device hardware. The boolean simply indicates whether
68 * or not it is worthwhile for the frontend to attempt discard requests.
69 * If a backend does not recognise BLKIF_OP_DISCARD, it should *not*
70 * create the "feature-discard" node!
71 *
72 * Discard operation is a request for the underlying block device to mark
73 * extents to be erased. However, discard does not guarantee that the blocks
74 * will be erased from the device - it is just a hint to the device
75 * controller that these blocks are no longer in use. What the device
76 * controller does with that information is left to the controller.
77 * Discard operations are passed with sector_number as the
78 * sector index to begin discard operations at and nr_sectors as the number of
79 * sectors to be discarded. The specified sectors should be discarded if the
80 * underlying block device supports trim (ATA) or unmap (SCSI) operations,
81 * or a BLKIF_RSP_EOPNOTSUPP should be returned.
82 * More information about trim/unmap operations at:
83 * http://t13.org/Documents/UploadedDocuments/docs2008/
84 * e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc
85 * http://www.seagate.com/staticfiles/support/disc/manuals/
86 * Interface%20manuals/100293068c.pdf
87 */
88#define BLKIF_OP_DISCARD 5
89
60/* 90/*
61 * Maximum scatter/gather segments per request. 91 * Maximum scatter/gather segments per request.
62 * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE. 92 * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE.
@@ -74,6 +104,11 @@ struct blkif_request_rw {
74 } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 104 } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
75}; 105};
76 106
107struct blkif_request_discard {
108 blkif_sector_t sector_number;
109 uint64_t nr_sectors;
110};
111
77struct blkif_request { 112struct blkif_request {
78 uint8_t operation; /* BLKIF_OP_??? */ 113 uint8_t operation; /* BLKIF_OP_??? */
79 uint8_t nr_segments; /* number of segments */ 114 uint8_t nr_segments; /* number of segments */
@@ -81,6 +116,7 @@ struct blkif_request {
81 uint64_t id; /* private guest value, echoed in resp */ 116 uint64_t id; /* private guest value, echoed in resp */
82 union { 117 union {
83 struct blkif_request_rw rw; 118 struct blkif_request_rw rw;
119 struct blkif_request_discard discard;
84 } u; 120 } u;
85}; 121};
86 122