aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-11-04 20:22:14 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-11-04 20:22:14 -0400
commit3d0a8d10cfb4cc3d1877c29a866ee7d8a46aa2fa (patch)
tree11a85044d1472f5972ae47ce10a2f446ad981e9f /drivers/block
parentb4fdcb02f1e39c27058a885905bd0277370ba441 (diff)
parenta0eda62552eba4e1f92d5354bb65c68fb6b45f87 (diff)
Merge branch 'for-3.2/drivers' of git://git.kernel.dk/linux-block
* 'for-3.2/drivers' of git://git.kernel.dk/linux-block: (30 commits) virtio-blk: use ida to allocate disk index hpsa: add small delay when using PCI Power Management to reset for kump cciss: add small delay when using PCI Power Management to reset for kump xen/blkback: Fix two races in the handling of barrier requests. xen/blkback: Check for proper operation. xen/blkback: Fix the inhibition to map pages when discarding sector ranges. xen/blkback: Report VBD_WSECT (wr_sect) properly. xen/blkback: Support 'feature-barrier' aka old-style BARRIER requests. xen-blkfront: plug device number leak in xlblk_init() error path xen-blkfront: If no barrier or flush is supported, use invalid operation. xen-blkback: use kzalloc() in favor of kmalloc()+memset() xen-blkback: fixed indentation and comments xen-blkfront: fix a deadlock while handling discard response xen-blkfront: Handle discard requests. xen-blkback: Implement discard requests ('feature-discard') xen-blkfront: add BLKIF_OP_DISCARD and discard request struct drivers/block/loop.c: remove unnecessary bdev argument from loop_clr_fd() drivers/block/loop.c: emit uevent on auto release drivers/block/cpqarray.c: use pci_dev->revision loop: always allow userspace partitions and optionally support automatic scanning ... Fic up trivial header file includsion conflict in drivers/block/loop.c
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/cciss.c76
-rw-r--r--drivers/block/cciss.h1
-rw-r--r--drivers/block/cpqarray.c2
-rw-r--r--drivers/block/loop.c111
-rw-r--r--drivers/block/nbd.c69
-rw-r--r--drivers/block/xen-blkback/blkback.c130
-rw-r--r--drivers/block/xen-blkback/common.h98
-rw-r--r--drivers/block/xen-blkback/xenbus.c76
-rw-r--r--drivers/block/xen-blkfront.c123
9 files changed, 569 insertions, 117 deletions
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 8f4ef656a1af..486f94ef24d4 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -68,6 +68,10 @@ static int cciss_tape_cmds = 6;
68module_param(cciss_tape_cmds, int, 0644); 68module_param(cciss_tape_cmds, int, 0644);
69MODULE_PARM_DESC(cciss_tape_cmds, 69MODULE_PARM_DESC(cciss_tape_cmds,
70 "number of commands to allocate for tape devices (default: 6)"); 70 "number of commands to allocate for tape devices (default: 6)");
71static int cciss_simple_mode;
72module_param(cciss_simple_mode, int, S_IRUGO|S_IWUSR);
73MODULE_PARM_DESC(cciss_simple_mode,
74 "Use 'simple mode' rather than 'performant mode'");
71 75
72static DEFINE_MUTEX(cciss_mutex); 76static DEFINE_MUTEX(cciss_mutex);
73static struct proc_dir_entry *proc_cciss; 77static struct proc_dir_entry *proc_cciss;
@@ -176,6 +180,7 @@ static void cciss_geometry_inquiry(ctlr_info_t *h, int logvol,
176 unsigned int block_size, InquiryData_struct *inq_buff, 180 unsigned int block_size, InquiryData_struct *inq_buff,
177 drive_info_struct *drv); 181 drive_info_struct *drv);
178static void __devinit cciss_interrupt_mode(ctlr_info_t *); 182static void __devinit cciss_interrupt_mode(ctlr_info_t *);
183static int __devinit cciss_enter_simple_mode(struct ctlr_info *h);
179static void start_io(ctlr_info_t *h); 184static void start_io(ctlr_info_t *h);
180static int sendcmd_withirq(ctlr_info_t *h, __u8 cmd, void *buff, size_t size, 185static int sendcmd_withirq(ctlr_info_t *h, __u8 cmd, void *buff, size_t size,
181 __u8 page_code, unsigned char scsi3addr[], 186 __u8 page_code, unsigned char scsi3addr[],
@@ -388,7 +393,7 @@ static void cciss_seq_show_header(struct seq_file *seq)
388 h->product_name, 393 h->product_name,
389 (unsigned long)h->board_id, 394 (unsigned long)h->board_id,
390 h->firm_ver[0], h->firm_ver[1], h->firm_ver[2], 395 h->firm_ver[0], h->firm_ver[1], h->firm_ver[2],
391 h->firm_ver[3], (unsigned int)h->intr[PERF_MODE_INT], 396 h->firm_ver[3], (unsigned int)h->intr[h->intr_mode],
392 h->num_luns, 397 h->num_luns,
393 h->Qdepth, h->commands_outstanding, 398 h->Qdepth, h->commands_outstanding,
394 h->maxQsinceinit, h->max_outstanding, h->maxSG); 399 h->maxQsinceinit, h->max_outstanding, h->maxSG);
@@ -636,6 +641,18 @@ static ssize_t host_store_rescan(struct device *dev,
636} 641}
637static DEVICE_ATTR(rescan, S_IWUSR, NULL, host_store_rescan); 642static DEVICE_ATTR(rescan, S_IWUSR, NULL, host_store_rescan);
638 643
644static ssize_t host_show_transport_mode(struct device *dev,
645 struct device_attribute *attr,
646 char *buf)
647{
648 struct ctlr_info *h = to_hba(dev);
649
650 return snprintf(buf, 20, "%s\n",
651 h->transMethod & CFGTBL_Trans_Performant ?
652 "performant" : "simple");
653}
654static DEVICE_ATTR(transport_mode, S_IRUGO, host_show_transport_mode, NULL);
655
639static ssize_t dev_show_unique_id(struct device *dev, 656static ssize_t dev_show_unique_id(struct device *dev,
640 struct device_attribute *attr, 657 struct device_attribute *attr,
641 char *buf) 658 char *buf)
@@ -808,6 +825,7 @@ static DEVICE_ATTR(usage_count, S_IRUGO, cciss_show_usage_count, NULL);
808static struct attribute *cciss_host_attrs[] = { 825static struct attribute *cciss_host_attrs[] = {
809 &dev_attr_rescan.attr, 826 &dev_attr_rescan.attr,
810 &dev_attr_resettable.attr, 827 &dev_attr_resettable.attr,
828 &dev_attr_transport_mode.attr,
811 NULL 829 NULL
812}; 830};
813 831
@@ -3984,6 +4002,9 @@ static void __devinit cciss_put_controller_into_performant_mode(ctlr_info_t *h)
3984{ 4002{
3985 __u32 trans_support; 4003 __u32 trans_support;
3986 4004
4005 if (cciss_simple_mode)
4006 return;
4007
3987 dev_dbg(&h->pdev->dev, "Trying to put board into Performant mode\n"); 4008 dev_dbg(&h->pdev->dev, "Trying to put board into Performant mode\n");
3988 /* Attempt to put controller into performant mode if supported */ 4009 /* Attempt to put controller into performant mode if supported */
3989 /* Does board support performant mode? */ 4010 /* Does board support performant mode? */
@@ -4081,7 +4102,7 @@ static void __devinit cciss_interrupt_mode(ctlr_info_t *h)
4081default_int_mode: 4102default_int_mode:
4082#endif /* CONFIG_PCI_MSI */ 4103#endif /* CONFIG_PCI_MSI */
4083 /* if we get here we're going to use the default interrupt mode */ 4104 /* if we get here we're going to use the default interrupt mode */
4084 h->intr[PERF_MODE_INT] = h->pdev->irq; 4105 h->intr[h->intr_mode] = h->pdev->irq;
4085 return; 4106 return;
4086} 4107}
4087 4108
@@ -4341,6 +4362,9 @@ static int __devinit cciss_pci_init(ctlr_info_t *h)
4341 } 4362 }
4342 cciss_enable_scsi_prefetch(h); 4363 cciss_enable_scsi_prefetch(h);
4343 cciss_p600_dma_prefetch_quirk(h); 4364 cciss_p600_dma_prefetch_quirk(h);
4365 err = cciss_enter_simple_mode(h);
4366 if (err)
4367 goto err_out_free_res;
4344 cciss_put_controller_into_performant_mode(h); 4368 cciss_put_controller_into_performant_mode(h);
4345 return 0; 4369 return 0;
4346 4370
@@ -4533,6 +4557,13 @@ static int cciss_controller_hard_reset(struct pci_dev *pdev,
4533 pmcsr &= ~PCI_PM_CTRL_STATE_MASK; 4557 pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
4534 pmcsr |= PCI_D0; 4558 pmcsr |= PCI_D0;
4535 pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr); 4559 pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
4560
4561 /*
4562 * The P600 requires a small delay when changing states.
4563 * Otherwise we may think the board did not reset and we bail.
4564 * This for kdump only and is particular to the P600.
4565 */
4566 msleep(500);
4536 } 4567 }
4537 return 0; 4568 return 0;
4538} 4569}
@@ -4843,20 +4874,20 @@ static int cciss_request_irq(ctlr_info_t *h,
4843 irqreturn_t (*intxhandler)(int, void *)) 4874 irqreturn_t (*intxhandler)(int, void *))
4844{ 4875{
4845 if (h->msix_vector || h->msi_vector) { 4876 if (h->msix_vector || h->msi_vector) {
4846 if (!request_irq(h->intr[PERF_MODE_INT], msixhandler, 4877 if (!request_irq(h->intr[h->intr_mode], msixhandler,
4847 IRQF_DISABLED, h->devname, h)) 4878 IRQF_DISABLED, h->devname, h))
4848 return 0; 4879 return 0;
4849 dev_err(&h->pdev->dev, "Unable to get msi irq %d" 4880 dev_err(&h->pdev->dev, "Unable to get msi irq %d"
4850 " for %s\n", h->intr[PERF_MODE_INT], 4881 " for %s\n", h->intr[h->intr_mode],
4851 h->devname); 4882 h->devname);
4852 return -1; 4883 return -1;
4853 } 4884 }
4854 4885
4855 if (!request_irq(h->intr[PERF_MODE_INT], intxhandler, 4886 if (!request_irq(h->intr[h->intr_mode], intxhandler,
4856 IRQF_DISABLED, h->devname, h)) 4887 IRQF_DISABLED, h->devname, h))
4857 return 0; 4888 return 0;
4858 dev_err(&h->pdev->dev, "Unable to get irq %d for %s\n", 4889 dev_err(&h->pdev->dev, "Unable to get irq %d for %s\n",
4859 h->intr[PERF_MODE_INT], h->devname); 4890 h->intr[h->intr_mode], h->devname);
4860 return -1; 4891 return -1;
4861} 4892}
4862 4893
@@ -4887,7 +4918,7 @@ static void cciss_undo_allocations_after_kdump_soft_reset(ctlr_info_t *h)
4887{ 4918{
4888 int ctlr = h->ctlr; 4919 int ctlr = h->ctlr;
4889 4920
4890 free_irq(h->intr[PERF_MODE_INT], h); 4921 free_irq(h->intr[h->intr_mode], h);
4891#ifdef CONFIG_PCI_MSI 4922#ifdef CONFIG_PCI_MSI
4892 if (h->msix_vector) 4923 if (h->msix_vector)
4893 pci_disable_msix(h->pdev); 4924 pci_disable_msix(h->pdev);
@@ -4953,6 +4984,7 @@ reinit_after_soft_reset:
4953 h = hba[i]; 4984 h = hba[i];
4954 h->pdev = pdev; 4985 h->pdev = pdev;
4955 h->busy_initializing = 1; 4986 h->busy_initializing = 1;
4987 h->intr_mode = cciss_simple_mode ? SIMPLE_MODE_INT : PERF_MODE_INT;
4956 INIT_LIST_HEAD(&h->cmpQ); 4988 INIT_LIST_HEAD(&h->cmpQ);
4957 INIT_LIST_HEAD(&h->reqQ); 4989 INIT_LIST_HEAD(&h->reqQ);
4958 mutex_init(&h->busy_shutting_down); 4990 mutex_init(&h->busy_shutting_down);
@@ -5009,7 +5041,7 @@ reinit_after_soft_reset:
5009 5041
5010 dev_info(&h->pdev->dev, "%s: <0x%x> at PCI %s IRQ %d%s using DAC\n", 5042 dev_info(&h->pdev->dev, "%s: <0x%x> at PCI %s IRQ %d%s using DAC\n",
5011 h->devname, pdev->device, pci_name(pdev), 5043 h->devname, pdev->device, pci_name(pdev),
5012 h->intr[PERF_MODE_INT], dac ? "" : " not"); 5044 h->intr[h->intr_mode], dac ? "" : " not");
5013 5045
5014 if (cciss_allocate_cmd_pool(h)) 5046 if (cciss_allocate_cmd_pool(h))
5015 goto clean4; 5047 goto clean4;
@@ -5056,7 +5088,7 @@ reinit_after_soft_reset:
5056 spin_lock_irqsave(&h->lock, flags); 5088 spin_lock_irqsave(&h->lock, flags);
5057 h->access.set_intr_mask(h, CCISS_INTR_OFF); 5089 h->access.set_intr_mask(h, CCISS_INTR_OFF);
5058 spin_unlock_irqrestore(&h->lock, flags); 5090 spin_unlock_irqrestore(&h->lock, flags);
5059 free_irq(h->intr[PERF_MODE_INT], h); 5091 free_irq(h->intr[h->intr_mode], h);
5060 rc = cciss_request_irq(h, cciss_msix_discard_completions, 5092 rc = cciss_request_irq(h, cciss_msix_discard_completions,
5061 cciss_intx_discard_completions); 5093 cciss_intx_discard_completions);
5062 if (rc) { 5094 if (rc) {
@@ -5133,7 +5165,7 @@ clean4:
5133 cciss_free_cmd_pool(h); 5165 cciss_free_cmd_pool(h);
5134 cciss_free_scatterlists(h); 5166 cciss_free_scatterlists(h);
5135 cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds); 5167 cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds);
5136 free_irq(h->intr[PERF_MODE_INT], h); 5168 free_irq(h->intr[h->intr_mode], h);
5137clean2: 5169clean2:
5138 unregister_blkdev(h->major, h->devname); 5170 unregister_blkdev(h->major, h->devname);
5139clean1: 5171clean1:
@@ -5172,9 +5204,31 @@ static void cciss_shutdown(struct pci_dev *pdev)
5172 if (return_code != IO_OK) 5204 if (return_code != IO_OK)
5173 dev_warn(&h->pdev->dev, "Error flushing cache\n"); 5205 dev_warn(&h->pdev->dev, "Error flushing cache\n");
5174 h->access.set_intr_mask(h, CCISS_INTR_OFF); 5206 h->access.set_intr_mask(h, CCISS_INTR_OFF);
5175 free_irq(h->intr[PERF_MODE_INT], h); 5207 free_irq(h->intr[h->intr_mode], h);
5208}
5209
5210static int __devinit cciss_enter_simple_mode(struct ctlr_info *h)
5211{
5212 u32 trans_support;
5213
5214 trans_support = readl(&(h->cfgtable->TransportSupport));
5215 if (!(trans_support & SIMPLE_MODE))
5216 return -ENOTSUPP;
5217
5218 h->max_commands = readl(&(h->cfgtable->CmdsOutMax));
5219 writel(CFGTBL_Trans_Simple, &(h->cfgtable->HostWrite.TransportRequest));
5220 writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
5221 cciss_wait_for_mode_change_ack(h);
5222 print_cfg_table(h);
5223 if (!(readl(&(h->cfgtable->TransportActive)) & CFGTBL_Trans_Simple)) {
5224 dev_warn(&h->pdev->dev, "unable to get board into simple mode\n");
5225 return -ENODEV;
5226 }
5227 h->transMethod = CFGTBL_Trans_Simple;
5228 return 0;
5176} 5229}
5177 5230
5231
5178static void __devexit cciss_remove_one(struct pci_dev *pdev) 5232static void __devexit cciss_remove_one(struct pci_dev *pdev)
5179{ 5233{
5180 ctlr_info_t *h; 5234 ctlr_info_t *h;
diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h
index c049548e68b7..7fda30e4a241 100644
--- a/drivers/block/cciss.h
+++ b/drivers/block/cciss.h
@@ -92,6 +92,7 @@ struct ctlr_info
92 unsigned int intr[4]; 92 unsigned int intr[4];
93 unsigned int msix_vector; 93 unsigned int msix_vector;
94 unsigned int msi_vector; 94 unsigned int msi_vector;
95 int intr_mode;
95 int cciss_max_sectors; 96 int cciss_max_sectors;
96 BYTE cciss_read; 97 BYTE cciss_read;
97 BYTE cciss_write; 98 BYTE cciss_write;
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index b2fceb53e809..9125bbeacd4d 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -620,6 +620,7 @@ static int cpqarray_pci_init(ctlr_info_t *c, struct pci_dev *pdev)
620 } 620 }
621 vendor_id = pdev->vendor; 621 vendor_id = pdev->vendor;
622 device_id = pdev->device; 622 device_id = pdev->device;
623 revision = pdev->revision;
623 irq = pdev->irq; 624 irq = pdev->irq;
624 625
625 for(i=0; i<6; i++) 626 for(i=0; i<6; i++)
@@ -632,7 +633,6 @@ static int cpqarray_pci_init(ctlr_info_t *c, struct pci_dev *pdev)
632 } 633 }
633 634
634 pci_read_config_word(pdev, PCI_COMMAND, &command); 635 pci_read_config_word(pdev, PCI_COMMAND, &command);
635 pci_read_config_byte(pdev, PCI_CLASS_REVISION, &revision);
636 pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &cache_line_size); 636 pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &cache_line_size);
637 pci_read_config_byte(pdev, PCI_LATENCY_TIMER, &latency_timer); 637 pci_read_config_byte(pdev, PCI_LATENCY_TIMER, &latency_timer);
638 638
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index c77983ea86c8..3d806820280e 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -76,6 +76,8 @@
76#include <linux/splice.h> 76#include <linux/splice.h>
77#include <linux/sysfs.h> 77#include <linux/sysfs.h>
78#include <linux/miscdevice.h> 78#include <linux/miscdevice.h>
79#include <linux/falloc.h>
80
79#include <asm/uaccess.h> 81#include <asm/uaccess.h>
80 82
81static DEFINE_IDR(loop_index_idr); 83static DEFINE_IDR(loop_index_idr);
@@ -407,6 +409,29 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
407 } 409 }
408 } 410 }
409 411
412 /*
413 * We use punch hole to reclaim the free space used by the
414 * image a.k.a. discard. However we do support discard if
415 * encryption is enabled, because it may give an attacker
416 * useful information.
417 */
418 if (bio->bi_rw & REQ_DISCARD) {
419 struct file *file = lo->lo_backing_file;
420 int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
421
422 if ((!file->f_op->fallocate) ||
423 lo->lo_encrypt_key_size) {
424 ret = -EOPNOTSUPP;
425 goto out;
426 }
427 ret = file->f_op->fallocate(file, mode, pos,
428 bio->bi_size);
429 if (unlikely(ret && ret != -EINVAL &&
430 ret != -EOPNOTSUPP))
431 ret = -EIO;
432 goto out;
433 }
434
410 ret = lo_send(lo, bio, pos); 435 ret = lo_send(lo, bio, pos);
411 436
412 if ((bio->bi_rw & REQ_FUA) && !ret) { 437 if ((bio->bi_rw & REQ_FUA) && !ret) {
@@ -622,7 +647,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
622 goto out_putf; 647 goto out_putf;
623 648
624 fput(old_file); 649 fput(old_file);
625 if (max_part > 0) 650 if (lo->lo_flags & LO_FLAGS_PARTSCAN)
626 ioctl_by_bdev(bdev, BLKRRPART, 0); 651 ioctl_by_bdev(bdev, BLKRRPART, 0);
627 return 0; 652 return 0;
628 653
@@ -699,16 +724,25 @@ static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf)
699 return sprintf(buf, "%s\n", autoclear ? "1" : "0"); 724 return sprintf(buf, "%s\n", autoclear ? "1" : "0");
700} 725}
701 726
727static ssize_t loop_attr_partscan_show(struct loop_device *lo, char *buf)
728{
729 int partscan = (lo->lo_flags & LO_FLAGS_PARTSCAN);
730
731 return sprintf(buf, "%s\n", partscan ? "1" : "0");
732}
733
702LOOP_ATTR_RO(backing_file); 734LOOP_ATTR_RO(backing_file);
703LOOP_ATTR_RO(offset); 735LOOP_ATTR_RO(offset);
704LOOP_ATTR_RO(sizelimit); 736LOOP_ATTR_RO(sizelimit);
705LOOP_ATTR_RO(autoclear); 737LOOP_ATTR_RO(autoclear);
738LOOP_ATTR_RO(partscan);
706 739
707static struct attribute *loop_attrs[] = { 740static struct attribute *loop_attrs[] = {
708 &loop_attr_backing_file.attr, 741 &loop_attr_backing_file.attr,
709 &loop_attr_offset.attr, 742 &loop_attr_offset.attr,
710 &loop_attr_sizelimit.attr, 743 &loop_attr_sizelimit.attr,
711 &loop_attr_autoclear.attr, 744 &loop_attr_autoclear.attr,
745 &loop_attr_partscan.attr,
712 NULL, 746 NULL,
713}; 747};
714 748
@@ -729,6 +763,35 @@ static void loop_sysfs_exit(struct loop_device *lo)
729 &loop_attribute_group); 763 &loop_attribute_group);
730} 764}
731 765
766static void loop_config_discard(struct loop_device *lo)
767{
768 struct file *file = lo->lo_backing_file;
769 struct inode *inode = file->f_mapping->host;
770 struct request_queue *q = lo->lo_queue;
771
772 /*
773 * We use punch hole to reclaim the free space used by the
774 * image a.k.a. discard. However we do support discard if
775 * encryption is enabled, because it may give an attacker
776 * useful information.
777 */
778 if ((!file->f_op->fallocate) ||
779 lo->lo_encrypt_key_size) {
780 q->limits.discard_granularity = 0;
781 q->limits.discard_alignment = 0;
782 q->limits.max_discard_sectors = 0;
783 q->limits.discard_zeroes_data = 0;
784 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
785 return;
786 }
787
788 q->limits.discard_granularity = inode->i_sb->s_blocksize;
789 q->limits.discard_alignment = inode->i_sb->s_blocksize;
790 q->limits.max_discard_sectors = UINT_MAX >> 9;
791 q->limits.discard_zeroes_data = 1;
792 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
793}
794
732static int loop_set_fd(struct loop_device *lo, fmode_t mode, 795static int loop_set_fd(struct loop_device *lo, fmode_t mode,
733 struct block_device *bdev, unsigned int arg) 796 struct block_device *bdev, unsigned int arg)
734{ 797{
@@ -829,7 +892,9 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
829 } 892 }
830 lo->lo_state = Lo_bound; 893 lo->lo_state = Lo_bound;
831 wake_up_process(lo->lo_thread); 894 wake_up_process(lo->lo_thread);
832 if (max_part > 0) 895 if (part_shift)
896 lo->lo_flags |= LO_FLAGS_PARTSCAN;
897 if (lo->lo_flags & LO_FLAGS_PARTSCAN)
833 ioctl_by_bdev(bdev, BLKRRPART, 0); 898 ioctl_by_bdev(bdev, BLKRRPART, 0);
834 return 0; 899 return 0;
835 900
@@ -890,10 +955,11 @@ loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
890 return err; 955 return err;
891} 956}
892 957
893static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev) 958static int loop_clr_fd(struct loop_device *lo)
894{ 959{
895 struct file *filp = lo->lo_backing_file; 960 struct file *filp = lo->lo_backing_file;
896 gfp_t gfp = lo->old_gfp_mask; 961 gfp_t gfp = lo->old_gfp_mask;
962 struct block_device *bdev = lo->lo_device;
897 963
898 if (lo->lo_state != Lo_bound) 964 if (lo->lo_state != Lo_bound)
899 return -ENXIO; 965 return -ENXIO;
@@ -922,7 +988,6 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
922 lo->lo_offset = 0; 988 lo->lo_offset = 0;
923 lo->lo_sizelimit = 0; 989 lo->lo_sizelimit = 0;
924 lo->lo_encrypt_key_size = 0; 990 lo->lo_encrypt_key_size = 0;
925 lo->lo_flags = 0;
926 lo->lo_thread = NULL; 991 lo->lo_thread = NULL;
927 memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE); 992 memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
928 memset(lo->lo_crypt_name, 0, LO_NAME_SIZE); 993 memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
@@ -940,8 +1005,11 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
940 lo->lo_state = Lo_unbound; 1005 lo->lo_state = Lo_unbound;
941 /* This is safe: open() is still holding a reference. */ 1006 /* This is safe: open() is still holding a reference. */
942 module_put(THIS_MODULE); 1007 module_put(THIS_MODULE);
943 if (max_part > 0 && bdev) 1008 if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev)
944 ioctl_by_bdev(bdev, BLKRRPART, 0); 1009 ioctl_by_bdev(bdev, BLKRRPART, 0);
1010 lo->lo_flags = 0;
1011 if (!part_shift)
1012 lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
945 mutex_unlock(&lo->lo_ctl_mutex); 1013 mutex_unlock(&lo->lo_ctl_mutex);
946 /* 1014 /*
947 * Need not hold lo_ctl_mutex to fput backing file. 1015 * Need not hold lo_ctl_mutex to fput backing file.
@@ -995,6 +1063,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
995 if (figure_loop_size(lo)) 1063 if (figure_loop_size(lo))
996 return -EFBIG; 1064 return -EFBIG;
997 } 1065 }
1066 loop_config_discard(lo);
998 1067
999 memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); 1068 memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
1000 memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE); 1069 memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
@@ -1010,6 +1079,13 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
1010 (info->lo_flags & LO_FLAGS_AUTOCLEAR)) 1079 (info->lo_flags & LO_FLAGS_AUTOCLEAR))
1011 lo->lo_flags ^= LO_FLAGS_AUTOCLEAR; 1080 lo->lo_flags ^= LO_FLAGS_AUTOCLEAR;
1012 1081
1082 if ((info->lo_flags & LO_FLAGS_PARTSCAN) &&
1083 !(lo->lo_flags & LO_FLAGS_PARTSCAN)) {
1084 lo->lo_flags |= LO_FLAGS_PARTSCAN;
1085 lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
1086 ioctl_by_bdev(lo->lo_device, BLKRRPART, 0);
1087 }
1088
1013 lo->lo_encrypt_key_size = info->lo_encrypt_key_size; 1089 lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
1014 lo->lo_init[0] = info->lo_init[0]; 1090 lo->lo_init[0] = info->lo_init[0];
1015 lo->lo_init[1] = info->lo_init[1]; 1091 lo->lo_init[1] = info->lo_init[1];
@@ -1203,7 +1279,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
1203 break; 1279 break;
1204 case LOOP_CLR_FD: 1280 case LOOP_CLR_FD:
1205 /* loop_clr_fd would have unlocked lo_ctl_mutex on success */ 1281 /* loop_clr_fd would have unlocked lo_ctl_mutex on success */
1206 err = loop_clr_fd(lo, bdev); 1282 err = loop_clr_fd(lo);
1207 if (!err) 1283 if (!err)
1208 goto out_unlocked; 1284 goto out_unlocked;
1209 break; 1285 break;
@@ -1423,7 +1499,7 @@ static int lo_release(struct gendisk *disk, fmode_t mode)
1423 * In autoclear mode, stop the loop thread 1499 * In autoclear mode, stop the loop thread
1424 * and remove configuration after last close. 1500 * and remove configuration after last close.
1425 */ 1501 */
1426 err = loop_clr_fd(lo, NULL); 1502 err = loop_clr_fd(lo);
1427 if (!err) 1503 if (!err)
1428 goto out_unlocked; 1504 goto out_unlocked;
1429 } else { 1505 } else {
@@ -1545,6 +1621,27 @@ static int loop_add(struct loop_device **l, int i)
1545 if (!disk) 1621 if (!disk)
1546 goto out_free_queue; 1622 goto out_free_queue;
1547 1623
1624 /*
1625 * Disable partition scanning by default. The in-kernel partition
1626 * scanning can be requested individually per-device during its
1627 * setup. Userspace can always add and remove partitions from all
1628 * devices. The needed partition minors are allocated from the
1629 * extended minor space, the main loop device numbers will continue
1630 * to match the loop minors, regardless of the number of partitions
1631 * used.
1632 *
1633 * If max_part is given, partition scanning is globally enabled for
1634 * all loop devices. The minors for the main loop devices will be
1635 * multiples of max_part.
1636 *
1637 * Note: Global-for-all-devices, set-only-at-init, read-only module
1638 * parameteters like 'max_loop' and 'max_part' make things needlessly
1639 * complicated, are too static, inflexible and may surprise
1640 * userspace tools. Parameters like this in general should be avoided.
1641 */
1642 if (!part_shift)
1643 disk->flags |= GENHD_FL_NO_PART_SCAN;
1644 disk->flags |= GENHD_FL_EXT_DEVT;
1548 mutex_init(&lo->lo_ctl_mutex); 1645 mutex_init(&lo->lo_ctl_mutex);
1549 lo->lo_number = i; 1646 lo->lo_number = i;
1550 lo->lo_thread = NULL; 1647 lo->lo_thread = NULL;
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index f533f3375e24..c3f0ee16594d 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -127,8 +127,7 @@ static void sock_shutdown(struct nbd_device *lo, int lock)
127 if (lock) 127 if (lock)
128 mutex_lock(&lo->tx_lock); 128 mutex_lock(&lo->tx_lock);
129 if (lo->sock) { 129 if (lo->sock) {
130 printk(KERN_WARNING "%s: shutting down socket\n", 130 dev_warn(disk_to_dev(lo->disk), "shutting down socket\n");
131 lo->disk->disk_name);
132 kernel_sock_shutdown(lo->sock, SHUT_RDWR); 131 kernel_sock_shutdown(lo->sock, SHUT_RDWR);
133 lo->sock = NULL; 132 lo->sock = NULL;
134 } 133 }
@@ -158,8 +157,9 @@ static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size,
158 sigset_t blocked, oldset; 157 sigset_t blocked, oldset;
159 158
160 if (unlikely(!sock)) { 159 if (unlikely(!sock)) {
161 printk(KERN_ERR "%s: Attempted %s on closed socket in sock_xmit\n", 160 dev_err(disk_to_dev(lo->disk),
162 lo->disk->disk_name, (send ? "send" : "recv")); 161 "Attempted %s on closed socket in sock_xmit\n",
162 (send ? "send" : "recv"));
163 return -EINVAL; 163 return -EINVAL;
164 } 164 }
165 165
@@ -250,8 +250,8 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req)
250 result = sock_xmit(lo, 1, &request, sizeof(request), 250 result = sock_xmit(lo, 1, &request, sizeof(request),
251 (nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0); 251 (nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0);
252 if (result <= 0) { 252 if (result <= 0) {
253 printk(KERN_ERR "%s: Send control failed (result %d)\n", 253 dev_err(disk_to_dev(lo->disk),
254 lo->disk->disk_name, result); 254 "Send control failed (result %d)\n", result);
255 goto error_out; 255 goto error_out;
256 } 256 }
257 257
@@ -270,8 +270,9 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req)
270 lo->disk->disk_name, req, bvec->bv_len); 270 lo->disk->disk_name, req, bvec->bv_len);
271 result = sock_send_bvec(lo, bvec, flags); 271 result = sock_send_bvec(lo, bvec, flags);
272 if (result <= 0) { 272 if (result <= 0) {
273 printk(KERN_ERR "%s: Send data failed (result %d)\n", 273 dev_err(disk_to_dev(lo->disk),
274 lo->disk->disk_name, result); 274 "Send data failed (result %d)\n",
275 result);
275 goto error_out; 276 goto error_out;
276 } 277 }
277 } 278 }
@@ -328,14 +329,13 @@ static struct request *nbd_read_stat(struct nbd_device *lo)
328 reply.magic = 0; 329 reply.magic = 0;
329 result = sock_xmit(lo, 0, &reply, sizeof(reply), MSG_WAITALL); 330 result = sock_xmit(lo, 0, &reply, sizeof(reply), MSG_WAITALL);
330 if (result <= 0) { 331 if (result <= 0) {
331 printk(KERN_ERR "%s: Receive control failed (result %d)\n", 332 dev_err(disk_to_dev(lo->disk),
332 lo->disk->disk_name, result); 333 "Receive control failed (result %d)\n", result);
333 goto harderror; 334 goto harderror;
334 } 335 }
335 336
336 if (ntohl(reply.magic) != NBD_REPLY_MAGIC) { 337 if (ntohl(reply.magic) != NBD_REPLY_MAGIC) {
337 printk(KERN_ERR "%s: Wrong magic (0x%lx)\n", 338 dev_err(disk_to_dev(lo->disk), "Wrong magic (0x%lx)\n",
338 lo->disk->disk_name,
339 (unsigned long)ntohl(reply.magic)); 339 (unsigned long)ntohl(reply.magic));
340 result = -EPROTO; 340 result = -EPROTO;
341 goto harderror; 341 goto harderror;
@@ -347,15 +347,15 @@ static struct request *nbd_read_stat(struct nbd_device *lo)
347 if (result != -ENOENT) 347 if (result != -ENOENT)
348 goto harderror; 348 goto harderror;
349 349
350 printk(KERN_ERR "%s: Unexpected reply (%p)\n", 350 dev_err(disk_to_dev(lo->disk), "Unexpected reply (%p)\n",
351 lo->disk->disk_name, reply.handle); 351 reply.handle);
352 result = -EBADR; 352 result = -EBADR;
353 goto harderror; 353 goto harderror;
354 } 354 }
355 355
356 if (ntohl(reply.error)) { 356 if (ntohl(reply.error)) {
357 printk(KERN_ERR "%s: Other side returned error (%d)\n", 357 dev_err(disk_to_dev(lo->disk), "Other side returned error (%d)\n",
358 lo->disk->disk_name, ntohl(reply.error)); 358 ntohl(reply.error));
359 req->errors++; 359 req->errors++;
360 return req; 360 return req;
361 } 361 }
@@ -369,8 +369,8 @@ static struct request *nbd_read_stat(struct nbd_device *lo)
369 rq_for_each_segment(bvec, req, iter) { 369 rq_for_each_segment(bvec, req, iter) {
370 result = sock_recv_bvec(lo, bvec); 370 result = sock_recv_bvec(lo, bvec);
371 if (result <= 0) { 371 if (result <= 0) {
372 printk(KERN_ERR "%s: Receive data failed (result %d)\n", 372 dev_err(disk_to_dev(lo->disk), "Receive data failed (result %d)\n",
373 lo->disk->disk_name, result); 373 result);
374 req->errors++; 374 req->errors++;
375 return req; 375 return req;
376 } 376 }
@@ -405,10 +405,10 @@ static int nbd_do_it(struct nbd_device *lo)
405 405
406 BUG_ON(lo->magic != LO_MAGIC); 406 BUG_ON(lo->magic != LO_MAGIC);
407 407
408 lo->pid = current->pid; 408 lo->pid = task_pid_nr(current);
409 ret = sysfs_create_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr); 409 ret = device_create_file(disk_to_dev(lo->disk), &pid_attr);
410 if (ret) { 410 if (ret) {
411 printk(KERN_ERR "nbd: sysfs_create_file failed!"); 411 dev_err(disk_to_dev(lo->disk), "device_create_file failed!\n");
412 lo->pid = 0; 412 lo->pid = 0;
413 return ret; 413 return ret;
414 } 414 }
@@ -416,7 +416,7 @@ static int nbd_do_it(struct nbd_device *lo)
416 while ((req = nbd_read_stat(lo)) != NULL) 416 while ((req = nbd_read_stat(lo)) != NULL)
417 nbd_end_request(req); 417 nbd_end_request(req);
418 418
419 sysfs_remove_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr); 419 device_remove_file(disk_to_dev(lo->disk), &pid_attr);
420 lo->pid = 0; 420 lo->pid = 0;
421 return 0; 421 return 0;
422} 422}
@@ -457,8 +457,8 @@ static void nbd_handle_req(struct nbd_device *lo, struct request *req)
457 if (rq_data_dir(req) == WRITE) { 457 if (rq_data_dir(req) == WRITE) {
458 nbd_cmd(req) = NBD_CMD_WRITE; 458 nbd_cmd(req) = NBD_CMD_WRITE;
459 if (lo->flags & NBD_READ_ONLY) { 459 if (lo->flags & NBD_READ_ONLY) {
460 printk(KERN_ERR "%s: Write on read-only\n", 460 dev_err(disk_to_dev(lo->disk),
461 lo->disk->disk_name); 461 "Write on read-only\n");
462 goto error_out; 462 goto error_out;
463 } 463 }
464 } 464 }
@@ -468,16 +468,15 @@ static void nbd_handle_req(struct nbd_device *lo, struct request *req)
468 mutex_lock(&lo->tx_lock); 468 mutex_lock(&lo->tx_lock);
469 if (unlikely(!lo->sock)) { 469 if (unlikely(!lo->sock)) {
470 mutex_unlock(&lo->tx_lock); 470 mutex_unlock(&lo->tx_lock);
471 printk(KERN_ERR "%s: Attempted send on closed socket\n", 471 dev_err(disk_to_dev(lo->disk),
472 lo->disk->disk_name); 472 "Attempted send on closed socket\n");
473 goto error_out; 473 goto error_out;
474 } 474 }
475 475
476 lo->active_req = req; 476 lo->active_req = req;
477 477
478 if (nbd_send_req(lo, req) != 0) { 478 if (nbd_send_req(lo, req) != 0) {
479 printk(KERN_ERR "%s: Request send failed\n", 479 dev_err(disk_to_dev(lo->disk), "Request send failed\n");
480 lo->disk->disk_name);
481 req->errors++; 480 req->errors++;
482 nbd_end_request(req); 481 nbd_end_request(req);
483 } else { 482 } else {
@@ -549,8 +548,8 @@ static void do_nbd_request(struct request_queue *q)
549 BUG_ON(lo->magic != LO_MAGIC); 548 BUG_ON(lo->magic != LO_MAGIC);
550 549
551 if (unlikely(!lo->sock)) { 550 if (unlikely(!lo->sock)) {
552 printk(KERN_ERR "%s: Attempted send on closed socket\n", 551 dev_err(disk_to_dev(lo->disk),
553 lo->disk->disk_name); 552 "Attempted send on closed socket\n");
554 req->errors++; 553 req->errors++;
555 nbd_end_request(req); 554 nbd_end_request(req);
556 spin_lock_irq(q->queue_lock); 555 spin_lock_irq(q->queue_lock);
@@ -576,7 +575,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
576 case NBD_DISCONNECT: { 575 case NBD_DISCONNECT: {
577 struct request sreq; 576 struct request sreq;
578 577
579 printk(KERN_INFO "%s: NBD_DISCONNECT\n", lo->disk->disk_name); 578 dev_info(disk_to_dev(lo->disk), "NBD_DISCONNECT\n");
580 579
581 blk_rq_init(NULL, &sreq); 580 blk_rq_init(NULL, &sreq);
582 sreq.cmd_type = REQ_TYPE_SPECIAL; 581 sreq.cmd_type = REQ_TYPE_SPECIAL;
@@ -674,7 +673,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
674 file = lo->file; 673 file = lo->file;
675 lo->file = NULL; 674 lo->file = NULL;
676 nbd_clear_que(lo); 675 nbd_clear_que(lo);
677 printk(KERN_WARNING "%s: queue cleared\n", lo->disk->disk_name); 676 dev_warn(disk_to_dev(lo->disk), "queue cleared\n");
678 if (file) 677 if (file)
679 fput(file); 678 fput(file);
680 lo->bytesize = 0; 679 lo->bytesize = 0;
@@ -694,8 +693,8 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
694 return 0; 693 return 0;
695 694
696 case NBD_PRINT_DEBUG: 695 case NBD_PRINT_DEBUG:
697 printk(KERN_INFO "%s: next = %p, prev = %p, head = %p\n", 696 dev_info(disk_to_dev(lo->disk),
698 bdev->bd_disk->disk_name, 697 "next = %p, prev = %p, head = %p\n",
699 lo->queue_head.next, lo->queue_head.prev, 698 lo->queue_head.next, lo->queue_head.prev,
700 &lo->queue_head); 699 &lo->queue_head);
701 return 0; 700 return 0;
@@ -745,7 +744,7 @@ static int __init nbd_init(void)
745 BUILD_BUG_ON(sizeof(struct nbd_request) != 28); 744 BUILD_BUG_ON(sizeof(struct nbd_request) != 28);
746 745
747 if (max_part < 0) { 746 if (max_part < 0) {
748 printk(KERN_CRIT "nbd: max_part must be >= 0\n"); 747 printk(KERN_ERR "nbd: max_part must be >= 0\n");
749 return -EINVAL; 748 return -EINVAL;
750 } 749 }
751 750
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 1540792b1e54..15ec4db194d1 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -39,6 +39,9 @@
39#include <linux/list.h> 39#include <linux/list.h>
40#include <linux/delay.h> 40#include <linux/delay.h>
41#include <linux/freezer.h> 41#include <linux/freezer.h>
42#include <linux/loop.h>
43#include <linux/falloc.h>
44#include <linux/fs.h>
42 45
43#include <xen/events.h> 46#include <xen/events.h>
44#include <xen/page.h> 47#include <xen/page.h>
@@ -258,13 +261,16 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
258 261
259static void print_stats(struct xen_blkif *blkif) 262static void print_stats(struct xen_blkif *blkif)
260{ 263{
261 pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d\n", 264 pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d"
265 " | ds %4d\n",
262 current->comm, blkif->st_oo_req, 266 current->comm, blkif->st_oo_req,
263 blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req); 267 blkif->st_rd_req, blkif->st_wr_req,
268 blkif->st_f_req, blkif->st_ds_req);
264 blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); 269 blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
265 blkif->st_rd_req = 0; 270 blkif->st_rd_req = 0;
266 blkif->st_wr_req = 0; 271 blkif->st_wr_req = 0;
267 blkif->st_oo_req = 0; 272 blkif->st_oo_req = 0;
273 blkif->st_ds_req = 0;
268} 274}
269 275
270int xen_blkif_schedule(void *arg) 276int xen_blkif_schedule(void *arg)
@@ -410,6 +416,59 @@ static int xen_blkbk_map(struct blkif_request *req,
410 return ret; 416 return ret;
411} 417}
412 418
419static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req)
420{
421 int err = 0;
422 int status = BLKIF_RSP_OKAY;
423 struct block_device *bdev = blkif->vbd.bdev;
424
425 if (blkif->blk_backend_type == BLKIF_BACKEND_PHY)
426 /* just forward the discard request */
427 err = blkdev_issue_discard(bdev,
428 req->u.discard.sector_number,
429 req->u.discard.nr_sectors,
430 GFP_KERNEL, 0);
431 else if (blkif->blk_backend_type == BLKIF_BACKEND_FILE) {
432 /* punch a hole in the backing file */
433 struct loop_device *lo = bdev->bd_disk->private_data;
434 struct file *file = lo->lo_backing_file;
435
436 if (file->f_op->fallocate)
437 err = file->f_op->fallocate(file,
438 FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
439 req->u.discard.sector_number << 9,
440 req->u.discard.nr_sectors << 9);
441 else
442 err = -EOPNOTSUPP;
443 } else
444 err = -EOPNOTSUPP;
445
446 if (err == -EOPNOTSUPP) {
447 pr_debug(DRV_PFX "discard op failed, not supported\n");
448 status = BLKIF_RSP_EOPNOTSUPP;
449 } else if (err)
450 status = BLKIF_RSP_ERROR;
451
452 make_response(blkif, req->id, req->operation, status);
453}
454
455static void xen_blk_drain_io(struct xen_blkif *blkif)
456{
457 atomic_set(&blkif->drain, 1);
458 do {
459 /* The initial value is one, and one refcnt taken at the
460 * start of the xen_blkif_schedule thread. */
461 if (atomic_read(&blkif->refcnt) <= 2)
462 break;
463 wait_for_completion_interruptible_timeout(
464 &blkif->drain_complete, HZ);
465
466 if (!atomic_read(&blkif->drain))
467 break;
468 } while (!kthread_should_stop());
469 atomic_set(&blkif->drain, 0);
470}
471
413/* 472/*
414 * Completion callback on the bio's. Called as bh->b_end_io() 473 * Completion callback on the bio's. Called as bh->b_end_io()
415 */ 474 */
@@ -422,6 +481,11 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
422 pr_debug(DRV_PFX "flush diskcache op failed, not supported\n"); 481 pr_debug(DRV_PFX "flush diskcache op failed, not supported\n");
423 xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0); 482 xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0);
424 pending_req->status = BLKIF_RSP_EOPNOTSUPP; 483 pending_req->status = BLKIF_RSP_EOPNOTSUPP;
484 } else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
485 (error == -EOPNOTSUPP)) {
486 pr_debug(DRV_PFX "write barrier op failed, not supported\n");
487 xen_blkbk_barrier(XBT_NIL, pending_req->blkif->be, 0);
488 pending_req->status = BLKIF_RSP_EOPNOTSUPP;
425 } else if (error) { 489 } else if (error) {
426 pr_debug(DRV_PFX "Buffer not up-to-date at end of operation," 490 pr_debug(DRV_PFX "Buffer not up-to-date at end of operation,"
427 " error=%d\n", error); 491 " error=%d\n", error);
@@ -438,6 +502,10 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
438 make_response(pending_req->blkif, pending_req->id, 502 make_response(pending_req->blkif, pending_req->id,
439 pending_req->operation, pending_req->status); 503 pending_req->operation, pending_req->status);
440 xen_blkif_put(pending_req->blkif); 504 xen_blkif_put(pending_req->blkif);
505 if (atomic_read(&pending_req->blkif->refcnt) <= 2) {
506 if (atomic_read(&pending_req->blkif->drain))
507 complete(&pending_req->blkif->drain_complete);
508 }
441 free_req(pending_req); 509 free_req(pending_req);
442 } 510 }
443} 511}
@@ -532,7 +600,6 @@ do_block_io_op(struct xen_blkif *blkif)
532 600
533 return more_to_do; 601 return more_to_do;
534} 602}
535
536/* 603/*
537 * Transmutation of the 'struct blkif_request' to a proper 'struct bio' 604 * Transmutation of the 'struct blkif_request' to a proper 'struct bio'
538 * and call the 'submit_bio' to pass it to the underlying storage. 605 * and call the 'submit_bio' to pass it to the underlying storage.
@@ -549,6 +616,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
549 int i, nbio = 0; 616 int i, nbio = 0;
550 int operation; 617 int operation;
551 struct blk_plug plug; 618 struct blk_plug plug;
619 bool drain = false;
552 620
553 switch (req->operation) { 621 switch (req->operation) {
554 case BLKIF_OP_READ: 622 case BLKIF_OP_READ:
@@ -559,11 +627,16 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
559 blkif->st_wr_req++; 627 blkif->st_wr_req++;
560 operation = WRITE_ODIRECT; 628 operation = WRITE_ODIRECT;
561 break; 629 break;
630 case BLKIF_OP_WRITE_BARRIER:
631 drain = true;
562 case BLKIF_OP_FLUSH_DISKCACHE: 632 case BLKIF_OP_FLUSH_DISKCACHE:
563 blkif->st_f_req++; 633 blkif->st_f_req++;
564 operation = WRITE_FLUSH; 634 operation = WRITE_FLUSH;
565 break; 635 break;
566 case BLKIF_OP_WRITE_BARRIER: 636 case BLKIF_OP_DISCARD:
637 blkif->st_ds_req++;
638 operation = REQ_DISCARD;
639 break;
567 default: 640 default:
568 operation = 0; /* make gcc happy */ 641 operation = 0; /* make gcc happy */
569 goto fail_response; 642 goto fail_response;
@@ -572,7 +645,8 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
572 645
573 /* Check that the number of segments is sane. */ 646 /* Check that the number of segments is sane. */
574 nseg = req->nr_segments; 647 nseg = req->nr_segments;
575 if (unlikely(nseg == 0 && operation != WRITE_FLUSH) || 648 if (unlikely(nseg == 0 && operation != WRITE_FLUSH &&
649 operation != REQ_DISCARD) ||
576 unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { 650 unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
577 pr_debug(DRV_PFX "Bad number of segments in request (%d)\n", 651 pr_debug(DRV_PFX "Bad number of segments in request (%d)\n",
578 nseg); 652 nseg);
@@ -621,16 +695,25 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
621 } 695 }
622 } 696 }
623 697
698 /* Wait on all outstanding I/O's and once that has been completed
699 * issue the WRITE_FLUSH.
700 */
701 if (drain)
702 xen_blk_drain_io(pending_req->blkif);
703
624 /* 704 /*
625 * If we have failed at this point, we need to undo the M2P override, 705 * If we have failed at this point, we need to undo the M2P override,
626 * set gnttab_set_unmap_op on all of the grant references and perform 706 * set gnttab_set_unmap_op on all of the grant references and perform
627 * the hypercall to unmap the grants - that is all done in 707 * the hypercall to unmap the grants - that is all done in
628 * xen_blkbk_unmap. 708 * xen_blkbk_unmap.
629 */ 709 */
630 if (xen_blkbk_map(req, pending_req, seg)) 710 if (operation != REQ_DISCARD && xen_blkbk_map(req, pending_req, seg))
631 goto fail_flush; 711 goto fail_flush;
632 712
633 /* This corresponding xen_blkif_put is done in __end_block_io_op */ 713 /*
714 * This corresponding xen_blkif_put is done in __end_block_io_op, or
715 * below (in "!bio") if we are handling a BLKIF_OP_DISCARD.
716 */
634 xen_blkif_get(blkif); 717 xen_blkif_get(blkif);
635 718
636 for (i = 0; i < nseg; i++) { 719 for (i = 0; i < nseg; i++) {
@@ -654,18 +737,25 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
654 preq.sector_number += seg[i].nsec; 737 preq.sector_number += seg[i].nsec;
655 } 738 }
656 739
657 /* This will be hit if the operation was a flush. */ 740 /* This will be hit if the operation was a flush or discard. */
658 if (!bio) { 741 if (!bio) {
659 BUG_ON(operation != WRITE_FLUSH); 742 BUG_ON(operation != WRITE_FLUSH && operation != REQ_DISCARD);
660 743
661 bio = bio_alloc(GFP_KERNEL, 0); 744 if (operation == WRITE_FLUSH) {
662 if (unlikely(bio == NULL)) 745 bio = bio_alloc(GFP_KERNEL, 0);
663 goto fail_put_bio; 746 if (unlikely(bio == NULL))
747 goto fail_put_bio;
664 748
665 biolist[nbio++] = bio; 749 biolist[nbio++] = bio;
666 bio->bi_bdev = preq.bdev; 750 bio->bi_bdev = preq.bdev;
667 bio->bi_private = pending_req; 751 bio->bi_private = pending_req;
668 bio->bi_end_io = end_block_io_op; 752 bio->bi_end_io = end_block_io_op;
753 } else if (operation == REQ_DISCARD) {
754 xen_blk_discard(blkif, req);
755 xen_blkif_put(blkif);
756 free_req(pending_req);
757 return 0;
758 }
669 } 759 }
670 760
671 /* 761 /*
@@ -685,7 +775,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
685 775
686 if (operation == READ) 776 if (operation == READ)
687 blkif->st_rd_sect += preq.nr_sects; 777 blkif->st_rd_sect += preq.nr_sects;
688 else if (operation == WRITE || operation == WRITE_FLUSH) 778 else if (operation & WRITE)
689 blkif->st_wr_sect += preq.nr_sects; 779 blkif->st_wr_sect += preq.nr_sects;
690 780
691 return 0; 781 return 0;
@@ -765,9 +855,9 @@ static int __init xen_blkif_init(void)
765 855
766 mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; 856 mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
767 857
768 blkbk->pending_reqs = kmalloc(sizeof(blkbk->pending_reqs[0]) * 858 blkbk->pending_reqs = kzalloc(sizeof(blkbk->pending_reqs[0]) *
769 xen_blkif_reqs, GFP_KERNEL); 859 xen_blkif_reqs, GFP_KERNEL);
770 blkbk->pending_grant_handles = kzalloc(sizeof(blkbk->pending_grant_handles[0]) * 860 blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) *
771 mmap_pages, GFP_KERNEL); 861 mmap_pages, GFP_KERNEL);
772 blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) * 862 blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) *
773 mmap_pages, GFP_KERNEL); 863 mmap_pages, GFP_KERNEL);
@@ -790,8 +880,6 @@ static int __init xen_blkif_init(void)
790 if (rc) 880 if (rc)
791 goto failed_init; 881 goto failed_init;
792 882
793 memset(blkbk->pending_reqs, 0, sizeof(blkbk->pending_reqs));
794
795 INIT_LIST_HEAD(&blkbk->pending_free); 883 INIT_LIST_HEAD(&blkbk->pending_free);
796 spin_lock_init(&blkbk->pending_free_lock); 884 spin_lock_init(&blkbk->pending_free_lock);
797 init_waitqueue_head(&blkbk->pending_free_wq); 885 init_waitqueue_head(&blkbk->pending_free_wq);
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index c4bd34063ecc..de09f525d6c1 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -62,13 +62,26 @@ struct blkif_common_response {
62 62
63/* i386 protocol version */ 63/* i386 protocol version */
64#pragma pack(push, 4) 64#pragma pack(push, 4)
65
66struct blkif_x86_32_request_rw {
67 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
68 struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
69};
70
71struct blkif_x86_32_request_discard {
72 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
73 uint64_t nr_sectors;
74};
75
65struct blkif_x86_32_request { 76struct blkif_x86_32_request {
66 uint8_t operation; /* BLKIF_OP_??? */ 77 uint8_t operation; /* BLKIF_OP_??? */
67 uint8_t nr_segments; /* number of segments */ 78 uint8_t nr_segments; /* number of segments */
68 blkif_vdev_t handle; /* only for read/write requests */ 79 blkif_vdev_t handle; /* only for read/write requests */
69 uint64_t id; /* private guest value, echoed in resp */ 80 uint64_t id; /* private guest value, echoed in resp */
70 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ 81 union {
71 struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 82 struct blkif_x86_32_request_rw rw;
83 struct blkif_x86_32_request_discard discard;
84 } u;
72}; 85};
73struct blkif_x86_32_response { 86struct blkif_x86_32_response {
74 uint64_t id; /* copied from request */ 87 uint64_t id; /* copied from request */
@@ -78,13 +91,26 @@ struct blkif_x86_32_response {
78#pragma pack(pop) 91#pragma pack(pop)
79 92
80/* x86_64 protocol version */ 93/* x86_64 protocol version */
94
95struct blkif_x86_64_request_rw {
96 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
97 struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
98};
99
100struct blkif_x86_64_request_discard {
101 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
102 uint64_t nr_sectors;
103};
104
81struct blkif_x86_64_request { 105struct blkif_x86_64_request {
82 uint8_t operation; /* BLKIF_OP_??? */ 106 uint8_t operation; /* BLKIF_OP_??? */
83 uint8_t nr_segments; /* number of segments */ 107 uint8_t nr_segments; /* number of segments */
84 blkif_vdev_t handle; /* only for read/write requests */ 108 blkif_vdev_t handle; /* only for read/write requests */
85 uint64_t __attribute__((__aligned__(8))) id; 109 uint64_t __attribute__((__aligned__(8))) id;
86 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ 110 union {
87 struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 111 struct blkif_x86_64_request_rw rw;
112 struct blkif_x86_64_request_discard discard;
113 } u;
88}; 114};
89struct blkif_x86_64_response { 115struct blkif_x86_64_response {
90 uint64_t __attribute__((__aligned__(8))) id; 116 uint64_t __attribute__((__aligned__(8))) id;
@@ -112,6 +138,11 @@ enum blkif_protocol {
112 BLKIF_PROTOCOL_X86_64 = 3, 138 BLKIF_PROTOCOL_X86_64 = 3,
113}; 139};
114 140
141enum blkif_backend_type {
142 BLKIF_BACKEND_PHY = 1,
143 BLKIF_BACKEND_FILE = 2,
144};
145
115struct xen_vbd { 146struct xen_vbd {
116 /* What the domain refers to this vbd as. */ 147 /* What the domain refers to this vbd as. */
117 blkif_vdev_t handle; 148 blkif_vdev_t handle;
@@ -137,6 +168,7 @@ struct xen_blkif {
137 unsigned int irq; 168 unsigned int irq;
138 /* Comms information. */ 169 /* Comms information. */
139 enum blkif_protocol blk_protocol; 170 enum blkif_protocol blk_protocol;
171 enum blkif_backend_type blk_backend_type;
140 union blkif_back_rings blk_rings; 172 union blkif_back_rings blk_rings;
141 struct vm_struct *blk_ring_area; 173 struct vm_struct *blk_ring_area;
142 /* The VBD attached to this interface. */ 174 /* The VBD attached to this interface. */
@@ -148,6 +180,9 @@ struct xen_blkif {
148 atomic_t refcnt; 180 atomic_t refcnt;
149 181
150 wait_queue_head_t wq; 182 wait_queue_head_t wq;
183 /* for barrier (drain) requests */
184 struct completion drain_complete;
185 atomic_t drain;
151 /* One thread per one blkif. */ 186 /* One thread per one blkif. */
152 struct task_struct *xenblkd; 187 struct task_struct *xenblkd;
153 unsigned int waiting_reqs; 188 unsigned int waiting_reqs;
@@ -158,6 +193,7 @@ struct xen_blkif {
158 int st_wr_req; 193 int st_wr_req;
159 int st_oo_req; 194 int st_oo_req;
160 int st_f_req; 195 int st_f_req;
196 int st_ds_req;
161 int st_rd_sect; 197 int st_rd_sect;
162 int st_wr_sect; 198 int st_wr_sect;
163 199
@@ -181,7 +217,7 @@ struct xen_blkif {
181 217
182struct phys_req { 218struct phys_req {
183 unsigned short dev; 219 unsigned short dev;
184 unsigned short nr_sects; 220 blkif_sector_t nr_sects;
185 struct block_device *bdev; 221 struct block_device *bdev;
186 blkif_sector_t sector_number; 222 blkif_sector_t sector_number;
187}; 223};
@@ -195,6 +231,8 @@ int xen_blkif_schedule(void *arg);
195int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, 231int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
196 struct backend_info *be, int state); 232 struct backend_info *be, int state);
197 233
234int xen_blkbk_barrier(struct xenbus_transaction xbt,
235 struct backend_info *be, int state);
198struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be); 236struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be);
199 237
200static inline void blkif_get_x86_32_req(struct blkif_request *dst, 238static inline void blkif_get_x86_32_req(struct blkif_request *dst,
@@ -205,12 +243,25 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst,
205 dst->nr_segments = src->nr_segments; 243 dst->nr_segments = src->nr_segments;
206 dst->handle = src->handle; 244 dst->handle = src->handle;
207 dst->id = src->id; 245 dst->id = src->id;
208 dst->u.rw.sector_number = src->sector_number; 246 switch (src->operation) {
209 barrier(); 247 case BLKIF_OP_READ:
210 if (n > dst->nr_segments) 248 case BLKIF_OP_WRITE:
211 n = dst->nr_segments; 249 case BLKIF_OP_WRITE_BARRIER:
212 for (i = 0; i < n; i++) 250 case BLKIF_OP_FLUSH_DISKCACHE:
213 dst->u.rw.seg[i] = src->seg[i]; 251 dst->u.rw.sector_number = src->u.rw.sector_number;
252 barrier();
253 if (n > dst->nr_segments)
254 n = dst->nr_segments;
255 for (i = 0; i < n; i++)
256 dst->u.rw.seg[i] = src->u.rw.seg[i];
257 break;
258 case BLKIF_OP_DISCARD:
259 dst->u.discard.sector_number = src->u.discard.sector_number;
260 dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
261 break;
262 default:
263 break;
264 }
214} 265}
215 266
216static inline void blkif_get_x86_64_req(struct blkif_request *dst, 267static inline void blkif_get_x86_64_req(struct blkif_request *dst,
@@ -221,12 +272,25 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst,
221 dst->nr_segments = src->nr_segments; 272 dst->nr_segments = src->nr_segments;
222 dst->handle = src->handle; 273 dst->handle = src->handle;
223 dst->id = src->id; 274 dst->id = src->id;
224 dst->u.rw.sector_number = src->sector_number; 275 switch (src->operation) {
225 barrier(); 276 case BLKIF_OP_READ:
226 if (n > dst->nr_segments) 277 case BLKIF_OP_WRITE:
227 n = dst->nr_segments; 278 case BLKIF_OP_WRITE_BARRIER:
228 for (i = 0; i < n; i++) 279 case BLKIF_OP_FLUSH_DISKCACHE:
229 dst->u.rw.seg[i] = src->seg[i]; 280 dst->u.rw.sector_number = src->u.rw.sector_number;
281 barrier();
282 if (n > dst->nr_segments)
283 n = dst->nr_segments;
284 for (i = 0; i < n; i++)
285 dst->u.rw.seg[i] = src->u.rw.seg[i];
286 break;
287 case BLKIF_OP_DISCARD:
288 dst->u.discard.sector_number = src->u.discard.sector_number;
289 dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
290 break;
291 default:
292 break;
293 }
230} 294}
231 295
232#endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */ 296#endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 5fd2010f7d2b..2c008afe63d9 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -114,6 +114,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
114 spin_lock_init(&blkif->blk_ring_lock); 114 spin_lock_init(&blkif->blk_ring_lock);
115 atomic_set(&blkif->refcnt, 1); 115 atomic_set(&blkif->refcnt, 1);
116 init_waitqueue_head(&blkif->wq); 116 init_waitqueue_head(&blkif->wq);
117 init_completion(&blkif->drain_complete);
118 atomic_set(&blkif->drain, 0);
117 blkif->st_print = jiffies; 119 blkif->st_print = jiffies;
118 init_waitqueue_head(&blkif->waiting_to_free); 120 init_waitqueue_head(&blkif->waiting_to_free);
119 121
@@ -272,6 +274,7 @@ VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req);
272VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req); 274VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req);
273VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req); 275VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req);
274VBD_SHOW(f_req, "%d\n", be->blkif->st_f_req); 276VBD_SHOW(f_req, "%d\n", be->blkif->st_f_req);
277VBD_SHOW(ds_req, "%d\n", be->blkif->st_ds_req);
275VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect); 278VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
276VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect); 279VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
277 280
@@ -280,6 +283,7 @@ static struct attribute *xen_vbdstat_attrs[] = {
280 &dev_attr_rd_req.attr, 283 &dev_attr_rd_req.attr,
281 &dev_attr_wr_req.attr, 284 &dev_attr_wr_req.attr,
282 &dev_attr_f_req.attr, 285 &dev_attr_f_req.attr,
286 &dev_attr_ds_req.attr,
283 &dev_attr_rd_sect.attr, 287 &dev_attr_rd_sect.attr,
284 &dev_attr_wr_sect.attr, 288 &dev_attr_wr_sect.attr,
285 NULL 289 NULL
@@ -419,6 +423,73 @@ int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
419 return err; 423 return err;
420} 424}
421 425
426int xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be)
427{
428 struct xenbus_device *dev = be->dev;
429 struct xen_blkif *blkif = be->blkif;
430 char *type;
431 int err;
432 int state = 0;
433
434 type = xenbus_read(XBT_NIL, dev->nodename, "type", NULL);
435 if (!IS_ERR(type)) {
436 if (strncmp(type, "file", 4) == 0) {
437 state = 1;
438 blkif->blk_backend_type = BLKIF_BACKEND_FILE;
439 }
440 if (strncmp(type, "phy", 3) == 0) {
441 struct block_device *bdev = be->blkif->vbd.bdev;
442 struct request_queue *q = bdev_get_queue(bdev);
443 if (blk_queue_discard(q)) {
444 err = xenbus_printf(xbt, dev->nodename,
445 "discard-granularity", "%u",
446 q->limits.discard_granularity);
447 if (err) {
448 xenbus_dev_fatal(dev, err,
449 "writing discard-granularity");
450 goto kfree;
451 }
452 err = xenbus_printf(xbt, dev->nodename,
453 "discard-alignment", "%u",
454 q->limits.discard_alignment);
455 if (err) {
456 xenbus_dev_fatal(dev, err,
457 "writing discard-alignment");
458 goto kfree;
459 }
460 state = 1;
461 blkif->blk_backend_type = BLKIF_BACKEND_PHY;
462 }
463 }
464 } else {
465 err = PTR_ERR(type);
466 xenbus_dev_fatal(dev, err, "reading type");
467 goto out;
468 }
469
470 err = xenbus_printf(xbt, dev->nodename, "feature-discard",
471 "%d", state);
472 if (err)
473 xenbus_dev_fatal(dev, err, "writing feature-discard");
474kfree:
475 kfree(type);
476out:
477 return err;
478}
479int xen_blkbk_barrier(struct xenbus_transaction xbt,
480 struct backend_info *be, int state)
481{
482 struct xenbus_device *dev = be->dev;
483 int err;
484
485 err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
486 "%d", state);
487 if (err)
488 xenbus_dev_fatal(dev, err, "writing feature-barrier");
489
490 return err;
491}
492
422/* 493/*
423 * Entry point to this code when a new device is created. Allocate the basic 494 * Entry point to this code when a new device is created. Allocate the basic
424 * structures, and watch the store waiting for the hotplug scripts to tell us 495 * structures, and watch the store waiting for the hotplug scripts to tell us
@@ -650,6 +721,11 @@ again:
650 if (err) 721 if (err)
651 goto abort; 722 goto abort;
652 723
724 err = xen_blkbk_discard(xbt, be);
725
726 /* If we can't advertise it is OK. */
727 err = xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);
728
653 err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", 729 err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
654 (unsigned long long)vbd_sz(&be->blkif->vbd)); 730 (unsigned long long)vbd_sz(&be->blkif->vbd));
655 if (err) { 731 if (err) {
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 9ea8c2576c70..7b2ec5908413 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -98,6 +98,9 @@ struct blkfront_info
98 unsigned long shadow_free; 98 unsigned long shadow_free;
99 unsigned int feature_flush; 99 unsigned int feature_flush;
100 unsigned int flush_op; 100 unsigned int flush_op;
101 unsigned int feature_discard;
102 unsigned int discard_granularity;
103 unsigned int discard_alignment;
101 int is_ready; 104 int is_ready;
102}; 105};
103 106
@@ -302,29 +305,36 @@ static int blkif_queue_request(struct request *req)
302 ring_req->operation = info->flush_op; 305 ring_req->operation = info->flush_op;
303 } 306 }
304 307
305 ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); 308 if (unlikely(req->cmd_flags & REQ_DISCARD)) {
306 BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); 309 /* id, sector_number and handle are set above. */
310 ring_req->operation = BLKIF_OP_DISCARD;
311 ring_req->nr_segments = 0;
312 ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
313 } else {
314 ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
315 BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
307 316
308 for_each_sg(info->sg, sg, ring_req->nr_segments, i) { 317 for_each_sg(info->sg, sg, ring_req->nr_segments, i) {
309 buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg))); 318 buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
310 fsect = sg->offset >> 9; 319 fsect = sg->offset >> 9;
311 lsect = fsect + (sg->length >> 9) - 1; 320 lsect = fsect + (sg->length >> 9) - 1;
312 /* install a grant reference. */ 321 /* install a grant reference. */
313 ref = gnttab_claim_grant_reference(&gref_head); 322 ref = gnttab_claim_grant_reference(&gref_head);
314 BUG_ON(ref == -ENOSPC); 323 BUG_ON(ref == -ENOSPC);
315 324
316 gnttab_grant_foreign_access_ref( 325 gnttab_grant_foreign_access_ref(
317 ref, 326 ref,
318 info->xbdev->otherend_id, 327 info->xbdev->otherend_id,
319 buffer_mfn, 328 buffer_mfn,
320 rq_data_dir(req) ); 329 rq_data_dir(req));
321 330
322 info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn); 331 info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
323 ring_req->u.rw.seg[i] = 332 ring_req->u.rw.seg[i] =
324 (struct blkif_request_segment) { 333 (struct blkif_request_segment) {
325 .gref = ref, 334 .gref = ref,
326 .first_sect = fsect, 335 .first_sect = fsect,
327 .last_sect = lsect }; 336 .last_sect = lsect };
337 }
328 } 338 }
329 339
330 info->ring.req_prod_pvt++; 340 info->ring.req_prod_pvt++;
@@ -370,7 +380,9 @@ static void do_blkif_request(struct request_queue *rq)
370 380
371 blk_start_request(req); 381 blk_start_request(req);
372 382
373 if (req->cmd_type != REQ_TYPE_FS) { 383 if ((req->cmd_type != REQ_TYPE_FS) ||
384 ((req->cmd_flags & (REQ_FLUSH | REQ_FUA)) &&
385 !info->flush_op)) {
374 __blk_end_request_all(req, -EIO); 386 __blk_end_request_all(req, -EIO);
375 continue; 387 continue;
376 } 388 }
@@ -399,6 +411,7 @@ wait:
399static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) 411static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
400{ 412{
401 struct request_queue *rq; 413 struct request_queue *rq;
414 struct blkfront_info *info = gd->private_data;
402 415
403 rq = blk_init_queue(do_blkif_request, &blkif_io_lock); 416 rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
404 if (rq == NULL) 417 if (rq == NULL)
@@ -406,6 +419,13 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
406 419
407 queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); 420 queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
408 421
422 if (info->feature_discard) {
423 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq);
424 blk_queue_max_discard_sectors(rq, get_capacity(gd));
425 rq->limits.discard_granularity = info->discard_granularity;
426 rq->limits.discard_alignment = info->discard_alignment;
427 }
428
409 /* Hard sector size and max sectors impersonate the equiv. hardware. */ 429 /* Hard sector size and max sectors impersonate the equiv. hardware. */
410 blk_queue_logical_block_size(rq, sector_size); 430 blk_queue_logical_block_size(rq, sector_size);
411 blk_queue_max_hw_sectors(rq, 512); 431 blk_queue_max_hw_sectors(rq, 512);
@@ -722,6 +742,17 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
722 742
723 error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; 743 error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
724 switch (bret->operation) { 744 switch (bret->operation) {
745 case BLKIF_OP_DISCARD:
746 if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
747 struct request_queue *rq = info->rq;
748 printk(KERN_WARNING "blkfront: %s: discard op failed\n",
749 info->gd->disk_name);
750 error = -EOPNOTSUPP;
751 info->feature_discard = 0;
752 queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
753 }
754 __blk_end_request_all(req, error);
755 break;
725 case BLKIF_OP_FLUSH_DISKCACHE: 756 case BLKIF_OP_FLUSH_DISKCACHE:
726 case BLKIF_OP_WRITE_BARRIER: 757 case BLKIF_OP_WRITE_BARRIER:
727 if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { 758 if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
@@ -1098,6 +1129,33 @@ blkfront_closing(struct blkfront_info *info)
1098 bdput(bdev); 1129 bdput(bdev);
1099} 1130}
1100 1131
1132static void blkfront_setup_discard(struct blkfront_info *info)
1133{
1134 int err;
1135 char *type;
1136 unsigned int discard_granularity;
1137 unsigned int discard_alignment;
1138
1139 type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL);
1140 if (IS_ERR(type))
1141 return;
1142
1143 if (strncmp(type, "phy", 3) == 0) {
1144 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1145 "discard-granularity", "%u", &discard_granularity,
1146 "discard-alignment", "%u", &discard_alignment,
1147 NULL);
1148 if (!err) {
1149 info->feature_discard = 1;
1150 info->discard_granularity = discard_granularity;
1151 info->discard_alignment = discard_alignment;
1152 }
1153 } else if (strncmp(type, "file", 4) == 0)
1154 info->feature_discard = 1;
1155
1156 kfree(type);
1157}
1158
1101/* 1159/*
1102 * Invoked when the backend is finally 'ready' (and has told produced 1160 * Invoked when the backend is finally 'ready' (and has told produced
1103 * the details about the physical device - #sectors, size, etc). 1161 * the details about the physical device - #sectors, size, etc).
@@ -1108,7 +1166,7 @@ static void blkfront_connect(struct blkfront_info *info)
1108 unsigned long sector_size; 1166 unsigned long sector_size;
1109 unsigned int binfo; 1167 unsigned int binfo;
1110 int err; 1168 int err;
1111 int barrier, flush; 1169 int barrier, flush, discard;
1112 1170
1113 switch (info->connected) { 1171 switch (info->connected) {
1114 case BLKIF_STATE_CONNECTED: 1172 case BLKIF_STATE_CONNECTED:
@@ -1178,7 +1236,14 @@ static void blkfront_connect(struct blkfront_info *info)
1178 info->feature_flush = REQ_FLUSH; 1236 info->feature_flush = REQ_FLUSH;
1179 info->flush_op = BLKIF_OP_FLUSH_DISKCACHE; 1237 info->flush_op = BLKIF_OP_FLUSH_DISKCACHE;
1180 } 1238 }
1181 1239
1240 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1241 "feature-discard", "%d", &discard,
1242 NULL);
1243
1244 if (!err && discard)
1245 blkfront_setup_discard(info);
1246
1182 err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); 1247 err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
1183 if (err) { 1248 if (err) {
1184 xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", 1249 xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
@@ -1385,6 +1450,8 @@ static struct xenbus_driver blkfront = {
1385 1450
1386static int __init xlblk_init(void) 1451static int __init xlblk_init(void)
1387{ 1452{
1453 int ret;
1454
1388 if (!xen_domain()) 1455 if (!xen_domain())
1389 return -ENODEV; 1456 return -ENODEV;
1390 1457
@@ -1394,7 +1461,13 @@ static int __init xlblk_init(void)
1394 return -ENODEV; 1461 return -ENODEV;
1395 } 1462 }
1396 1463
1397 return xenbus_register_frontend(&blkfront); 1464 ret = xenbus_register_frontend(&blkfront);
1465 if (ret) {
1466 unregister_blkdev(XENVBD_MAJOR, DEV_NAME);
1467 return ret;
1468 }
1469
1470 return 0;
1398} 1471}
1399module_init(xlblk_init); 1472module_init(xlblk_init);
1400 1473