aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/DAC960.c18
-rw-r--r--drivers/block/Kconfig15
-rw-r--r--drivers/block/Makefile2
-rw-r--r--drivers/block/brd.c20
-rw-r--r--drivers/block/cciss.c6
-rw-r--r--drivers/block/drbd/drbd_bitmap.c50
-rw-r--r--drivers/block/drbd/drbd_int.h4
-rw-r--r--drivers/block/drbd/drbd_main.c4
-rw-r--r--drivers/block/drbd/drbd_nl.c6
-rw-r--r--drivers/block/floppy.c56
-rw-r--r--drivers/block/hd.c1
-rw-r--r--drivers/block/loop.c40
-rw-r--r--drivers/block/mtip32xx/Kconfig9
-rw-r--r--drivers/block/mtip32xx/Makefile5
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c3650
-rw-r--r--drivers/block/mtip32xx/mtip32xx.h418
-rw-r--r--drivers/block/nbd.c296
-rw-r--r--drivers/block/nvme.c1740
-rw-r--r--drivers/block/paride/bpck6.c5
-rw-r--r--drivers/block/paride/pcd.c2
-rw-r--r--drivers/block/paride/pd.c3
-rw-r--r--drivers/block/paride/pf.c4
-rw-r--r--drivers/block/paride/pg.c3
-rw-r--r--drivers/block/paride/pt.c4
-rw-r--r--drivers/block/pktcdvd.c8
-rw-r--r--drivers/block/rbd.c731
-rw-r--r--drivers/block/rbd_types.h4
-rw-r--r--drivers/block/sunvdc.c5
-rw-r--r--drivers/block/sx8.c14
-rw-r--r--drivers/block/ub.c42
-rw-r--r--drivers/block/viodasd.c809
-rw-r--r--drivers/block/virtio_blk.c91
-rw-r--r--drivers/block/xd.c3
-rw-r--r--drivers/block/xen-blkback/blkback.c84
-rw-r--r--drivers/block/xen-blkback/common.h67
-rw-r--r--drivers/block/xen-blkback/xenbus.c12
-rw-r--r--drivers/block/xen-blkfront.c79
37 files changed, 6780 insertions, 1530 deletions
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index e086fbbbe853..8db9089127c5 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -1177,7 +1177,8 @@ static bool DAC960_V1_EnableMemoryMailboxInterface(DAC960_Controller_T
1177 int TimeoutCounter; 1177 int TimeoutCounter;
1178 int i; 1178 int i;
1179 1179
1180 1180 memset(&CommandMailbox, 0, sizeof(DAC960_V1_CommandMailbox_T));
1181
1181 if (pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(32))) 1182 if (pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(32)))
1182 return DAC960_Failure(Controller, "DMA mask out of range"); 1183 return DAC960_Failure(Controller, "DMA mask out of range");
1183 Controller->BounceBufferLimit = DMA_BIT_MASK(32); 1184 Controller->BounceBufferLimit = DMA_BIT_MASK(32);
@@ -4627,7 +4628,8 @@ static void DAC960_V2_ProcessCompletedCommand(DAC960_Command_T *Command)
4627 DAC960_Controller_T *Controller = Command->Controller; 4628 DAC960_Controller_T *Controller = Command->Controller;
4628 DAC960_CommandType_T CommandType = Command->CommandType; 4629 DAC960_CommandType_T CommandType = Command->CommandType;
4629 DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; 4630 DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox;
4630 DAC960_V2_IOCTL_Opcode_T CommandOpcode = CommandMailbox->Common.IOCTL_Opcode; 4631 DAC960_V2_IOCTL_Opcode_T IOCTLOpcode = CommandMailbox->Common.IOCTL_Opcode;
4632 DAC960_V2_CommandOpcode_T CommandOpcode = CommandMailbox->SCSI_10.CommandOpcode;
4631 DAC960_V2_CommandStatus_T CommandStatus = Command->V2.CommandStatus; 4633 DAC960_V2_CommandStatus_T CommandStatus = Command->V2.CommandStatus;
4632 4634
4633 if (CommandType == DAC960_ReadCommand || 4635 if (CommandType == DAC960_ReadCommand ||
@@ -4699,7 +4701,7 @@ static void DAC960_V2_ProcessCompletedCommand(DAC960_Command_T *Command)
4699 { 4701 {
4700 if (Controller->ShutdownMonitoringTimer) 4702 if (Controller->ShutdownMonitoringTimer)
4701 return; 4703 return;
4702 if (CommandOpcode == DAC960_V2_GetControllerInfo) 4704 if (IOCTLOpcode == DAC960_V2_GetControllerInfo)
4703 { 4705 {
4704 DAC960_V2_ControllerInfo_T *NewControllerInfo = 4706 DAC960_V2_ControllerInfo_T *NewControllerInfo =
4705 Controller->V2.NewControllerInformation; 4707 Controller->V2.NewControllerInformation;
@@ -4719,14 +4721,14 @@ static void DAC960_V2_ProcessCompletedCommand(DAC960_Command_T *Command)
4719 memcpy(ControllerInfo, NewControllerInfo, 4721 memcpy(ControllerInfo, NewControllerInfo,
4720 sizeof(DAC960_V2_ControllerInfo_T)); 4722 sizeof(DAC960_V2_ControllerInfo_T));
4721 } 4723 }
4722 else if (CommandOpcode == DAC960_V2_GetEvent) 4724 else if (IOCTLOpcode == DAC960_V2_GetEvent)
4723 { 4725 {
4724 if (CommandStatus == DAC960_V2_NormalCompletion) { 4726 if (CommandStatus == DAC960_V2_NormalCompletion) {
4725 DAC960_V2_ReportEvent(Controller, Controller->V2.Event); 4727 DAC960_V2_ReportEvent(Controller, Controller->V2.Event);
4726 } 4728 }
4727 Controller->V2.NextEventSequenceNumber++; 4729 Controller->V2.NextEventSequenceNumber++;
4728 } 4730 }
4729 else if (CommandOpcode == DAC960_V2_GetPhysicalDeviceInfoValid && 4731 else if (IOCTLOpcode == DAC960_V2_GetPhysicalDeviceInfoValid &&
4730 CommandStatus == DAC960_V2_NormalCompletion) 4732 CommandStatus == DAC960_V2_NormalCompletion)
4731 { 4733 {
4732 DAC960_V2_PhysicalDeviceInfo_T *NewPhysicalDeviceInfo = 4734 DAC960_V2_PhysicalDeviceInfo_T *NewPhysicalDeviceInfo =
@@ -4915,7 +4917,7 @@ static void DAC960_V2_ProcessCompletedCommand(DAC960_Command_T *Command)
4915 NewPhysicalDeviceInfo->LogicalUnit++; 4917 NewPhysicalDeviceInfo->LogicalUnit++;
4916 Controller->V2.PhysicalDeviceIndex++; 4918 Controller->V2.PhysicalDeviceIndex++;
4917 } 4919 }
4918 else if (CommandOpcode == DAC960_V2_GetPhysicalDeviceInfoValid) 4920 else if (IOCTLOpcode == DAC960_V2_GetPhysicalDeviceInfoValid)
4919 { 4921 {
4920 unsigned int DeviceIndex; 4922 unsigned int DeviceIndex;
4921 for (DeviceIndex = Controller->V2.PhysicalDeviceIndex; 4923 for (DeviceIndex = Controller->V2.PhysicalDeviceIndex;
@@ -4938,7 +4940,7 @@ static void DAC960_V2_ProcessCompletedCommand(DAC960_Command_T *Command)
4938 } 4940 }
4939 Controller->V2.NeedPhysicalDeviceInformation = false; 4941 Controller->V2.NeedPhysicalDeviceInformation = false;
4940 } 4942 }
4941 else if (CommandOpcode == DAC960_V2_GetLogicalDeviceInfoValid && 4943 else if (IOCTLOpcode == DAC960_V2_GetLogicalDeviceInfoValid &&
4942 CommandStatus == DAC960_V2_NormalCompletion) 4944 CommandStatus == DAC960_V2_NormalCompletion)
4943 { 4945 {
4944 DAC960_V2_LogicalDeviceInfo_T *NewLogicalDeviceInfo = 4946 DAC960_V2_LogicalDeviceInfo_T *NewLogicalDeviceInfo =
@@ -5065,7 +5067,7 @@ static void DAC960_V2_ProcessCompletedCommand(DAC960_Command_T *Command)
5065 [LogicalDeviceNumber] = true; 5067 [LogicalDeviceNumber] = true;
5066 NewLogicalDeviceInfo->LogicalDeviceNumber++; 5068 NewLogicalDeviceInfo->LogicalDeviceNumber++;
5067 } 5069 }
5068 else if (CommandOpcode == DAC960_V2_GetLogicalDeviceInfoValid) 5070 else if (IOCTLOpcode == DAC960_V2_GetLogicalDeviceInfoValid)
5069 { 5071 {
5070 int LogicalDriveNumber; 5072 int LogicalDriveNumber;
5071 for (LogicalDriveNumber = 0; 5073 for (LogicalDriveNumber = 0;
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 6f07ec1c2f58..a796407123c7 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -116,6 +116,8 @@ config PARIDE
116 116
117source "drivers/block/paride/Kconfig" 117source "drivers/block/paride/Kconfig"
118 118
119source "drivers/block/mtip32xx/Kconfig"
120
119config BLK_CPQ_DA 121config BLK_CPQ_DA
120 tristate "Compaq SMART2 support" 122 tristate "Compaq SMART2 support"
121 depends on PCI && VIRT_TO_BUS 123 depends on PCI && VIRT_TO_BUS
@@ -315,6 +317,17 @@ config BLK_DEV_NBD
315 317
316 If unsure, say N. 318 If unsure, say N.
317 319
320config BLK_DEV_NVME
321 tristate "NVM Express block device"
322 depends on PCI
323 ---help---
324 The NVM Express driver is for solid state drives directly
325 connected to the PCI or PCI Express bus. If you know you
326 don't have one of these, it is safe to answer N.
327
328 To compile this driver as a module, choose M here: the
329 module will be called nvme.
330
318config BLK_DEV_OSD 331config BLK_DEV_OSD
319 tristate "OSD object-as-blkdev support" 332 tristate "OSD object-as-blkdev support"
320 depends on SCSI_OSD_ULD 333 depends on SCSI_OSD_ULD
@@ -341,7 +354,7 @@ config BLK_DEV_SX8
341 Use devices /dev/sx8/$N and /dev/sx8/$Np$M. 354 Use devices /dev/sx8/$N and /dev/sx8/$Np$M.
342 355
343config BLK_DEV_UB 356config BLK_DEV_UB
344 tristate "Low Performance USB Block driver" 357 tristate "Low Performance USB Block driver (deprecated)"
345 depends on USB 358 depends on USB
346 help 359 help
347 This driver supports certain USB attached storage devices 360 This driver supports certain USB attached storage devices
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 76646e9a1c91..5b795059f8fb 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_XILINX_SYSACE) += xsysace.o
23obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o 23obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o
24obj-$(CONFIG_MG_DISK) += mg_disk.o 24obj-$(CONFIG_MG_DISK) += mg_disk.o
25obj-$(CONFIG_SUNVDC) += sunvdc.o 25obj-$(CONFIG_SUNVDC) += sunvdc.o
26obj-$(CONFIG_BLK_DEV_NVME) += nvme.o
26obj-$(CONFIG_BLK_DEV_OSD) += osdblk.o 27obj-$(CONFIG_BLK_DEV_OSD) += osdblk.o
27 28
28obj-$(CONFIG_BLK_DEV_UMEM) += umem.o 29obj-$(CONFIG_BLK_DEV_UMEM) += umem.o
@@ -39,5 +40,6 @@ obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o
39obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/ 40obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/
40obj-$(CONFIG_BLK_DEV_DRBD) += drbd/ 41obj-$(CONFIG_BLK_DEV_DRBD) += drbd/
41obj-$(CONFIG_BLK_DEV_RBD) += rbd.o 42obj-$(CONFIG_BLK_DEV_RBD) += rbd.o
43obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/
42 44
43swim_mod-y := swim.o swim_asm.o 45swim_mod-y := swim.o swim_asm.o
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index ec246437f5a4..531ceb31d0ff 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -242,9 +242,9 @@ static void copy_to_brd(struct brd_device *brd, const void *src,
242 page = brd_lookup_page(brd, sector); 242 page = brd_lookup_page(brd, sector);
243 BUG_ON(!page); 243 BUG_ON(!page);
244 244
245 dst = kmap_atomic(page, KM_USER1); 245 dst = kmap_atomic(page);
246 memcpy(dst + offset, src, copy); 246 memcpy(dst + offset, src, copy);
247 kunmap_atomic(dst, KM_USER1); 247 kunmap_atomic(dst);
248 248
249 if (copy < n) { 249 if (copy < n) {
250 src += copy; 250 src += copy;
@@ -253,9 +253,9 @@ static void copy_to_brd(struct brd_device *brd, const void *src,
253 page = brd_lookup_page(brd, sector); 253 page = brd_lookup_page(brd, sector);
254 BUG_ON(!page); 254 BUG_ON(!page);
255 255
256 dst = kmap_atomic(page, KM_USER1); 256 dst = kmap_atomic(page);
257 memcpy(dst, src, copy); 257 memcpy(dst, src, copy);
258 kunmap_atomic(dst, KM_USER1); 258 kunmap_atomic(dst);
259 } 259 }
260} 260}
261 261
@@ -273,9 +273,9 @@ static void copy_from_brd(void *dst, struct brd_device *brd,
273 copy = min_t(size_t, n, PAGE_SIZE - offset); 273 copy = min_t(size_t, n, PAGE_SIZE - offset);
274 page = brd_lookup_page(brd, sector); 274 page = brd_lookup_page(brd, sector);
275 if (page) { 275 if (page) {
276 src = kmap_atomic(page, KM_USER1); 276 src = kmap_atomic(page);
277 memcpy(dst, src + offset, copy); 277 memcpy(dst, src + offset, copy);
278 kunmap_atomic(src, KM_USER1); 278 kunmap_atomic(src);
279 } else 279 } else
280 memset(dst, 0, copy); 280 memset(dst, 0, copy);
281 281
@@ -285,9 +285,9 @@ static void copy_from_brd(void *dst, struct brd_device *brd,
285 copy = n - copy; 285 copy = n - copy;
286 page = brd_lookup_page(brd, sector); 286 page = brd_lookup_page(brd, sector);
287 if (page) { 287 if (page) {
288 src = kmap_atomic(page, KM_USER1); 288 src = kmap_atomic(page);
289 memcpy(dst, src, copy); 289 memcpy(dst, src, copy);
290 kunmap_atomic(src, KM_USER1); 290 kunmap_atomic(src);
291 } else 291 } else
292 memset(dst, 0, copy); 292 memset(dst, 0, copy);
293 } 293 }
@@ -309,7 +309,7 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page,
309 goto out; 309 goto out;
310 } 310 }
311 311
312 mem = kmap_atomic(page, KM_USER0); 312 mem = kmap_atomic(page);
313 if (rw == READ) { 313 if (rw == READ) {
314 copy_from_brd(mem + off, brd, sector, len); 314 copy_from_brd(mem + off, brd, sector, len);
315 flush_dcache_page(page); 315 flush_dcache_page(page);
@@ -317,7 +317,7 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page,
317 flush_dcache_page(page); 317 flush_dcache_page(page);
318 copy_to_brd(brd, mem + off, sector, len); 318 copy_to_brd(brd, mem + off, sector, len);
319 } 319 }
320 kunmap_atomic(mem, KM_USER0); 320 kunmap_atomic(mem);
321 321
322out: 322out:
323 return err; 323 return err;
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 587cce57adae..b0f553b26d0f 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1735,7 +1735,7 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
1735 case CCISS_BIG_PASSTHRU: 1735 case CCISS_BIG_PASSTHRU:
1736 return cciss_bigpassthru(h, argp); 1736 return cciss_bigpassthru(h, argp);
1737 1737
1738 /* scsi_cmd_ioctl handles these, below, though some are not */ 1738 /* scsi_cmd_blk_ioctl handles these, below, though some are not */
1739 /* very meaningful for cciss. SG_IO is the main one people want. */ 1739 /* very meaningful for cciss. SG_IO is the main one people want. */
1740 1740
1741 case SG_GET_VERSION_NUM: 1741 case SG_GET_VERSION_NUM:
@@ -1746,9 +1746,9 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
1746 case SG_EMULATED_HOST: 1746 case SG_EMULATED_HOST:
1747 case SG_IO: 1747 case SG_IO:
1748 case SCSI_IOCTL_SEND_COMMAND: 1748 case SCSI_IOCTL_SEND_COMMAND:
1749 return scsi_cmd_ioctl(disk->queue, disk, mode, cmd, argp); 1749 return scsi_cmd_blk_ioctl(bdev, mode, cmd, argp);
1750 1750
1751 /* scsi_cmd_ioctl would normally handle these, below, but */ 1751 /* scsi_cmd_blk_ioctl would normally handle these, below, but */
1752 /* they aren't a good fit for cciss, as CD-ROMs are */ 1752 /* they aren't a good fit for cciss, as CD-ROMs are */
1753 /* not supported, and we don't have any bus/target/lun */ 1753 /* not supported, and we don't have any bus/target/lun */
1754 /* which we present to the kernel. */ 1754 /* which we present to the kernel. */
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 912f585a760f..3030201c69d8 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -289,25 +289,25 @@ static unsigned int bm_bit_to_page_idx(struct drbd_bitmap *b, u64 bitnr)
289 return page_nr; 289 return page_nr;
290} 290}
291 291
292static unsigned long *__bm_map_pidx(struct drbd_bitmap *b, unsigned int idx, const enum km_type km) 292static unsigned long *__bm_map_pidx(struct drbd_bitmap *b, unsigned int idx)
293{ 293{
294 struct page *page = b->bm_pages[idx]; 294 struct page *page = b->bm_pages[idx];
295 return (unsigned long *) kmap_atomic(page, km); 295 return (unsigned long *) kmap_atomic(page);
296} 296}
297 297
298static unsigned long *bm_map_pidx(struct drbd_bitmap *b, unsigned int idx) 298static unsigned long *bm_map_pidx(struct drbd_bitmap *b, unsigned int idx)
299{ 299{
300 return __bm_map_pidx(b, idx, KM_IRQ1); 300 return __bm_map_pidx(b, idx);
301} 301}
302 302
303static void __bm_unmap(unsigned long *p_addr, const enum km_type km) 303static void __bm_unmap(unsigned long *p_addr)
304{ 304{
305 kunmap_atomic(p_addr, km); 305 kunmap_atomic(p_addr);
306}; 306};
307 307
308static void bm_unmap(unsigned long *p_addr) 308static void bm_unmap(unsigned long *p_addr)
309{ 309{
310 return __bm_unmap(p_addr, KM_IRQ1); 310 return __bm_unmap(p_addr);
311} 311}
312 312
313/* long word offset of _bitmap_ sector */ 313/* long word offset of _bitmap_ sector */
@@ -543,15 +543,15 @@ static unsigned long bm_count_bits(struct drbd_bitmap *b)
543 543
544 /* all but last page */ 544 /* all but last page */
545 for (idx = 0; idx < b->bm_number_of_pages - 1; idx++) { 545 for (idx = 0; idx < b->bm_number_of_pages - 1; idx++) {
546 p_addr = __bm_map_pidx(b, idx, KM_USER0); 546 p_addr = __bm_map_pidx(b, idx);
547 for (i = 0; i < LWPP; i++) 547 for (i = 0; i < LWPP; i++)
548 bits += hweight_long(p_addr[i]); 548 bits += hweight_long(p_addr[i]);
549 __bm_unmap(p_addr, KM_USER0); 549 __bm_unmap(p_addr);
550 cond_resched(); 550 cond_resched();
551 } 551 }
552 /* last (or only) page */ 552 /* last (or only) page */
553 last_word = ((b->bm_bits - 1) & BITS_PER_PAGE_MASK) >> LN2_BPL; 553 last_word = ((b->bm_bits - 1) & BITS_PER_PAGE_MASK) >> LN2_BPL;
554 p_addr = __bm_map_pidx(b, idx, KM_USER0); 554 p_addr = __bm_map_pidx(b, idx);
555 for (i = 0; i < last_word; i++) 555 for (i = 0; i < last_word; i++)
556 bits += hweight_long(p_addr[i]); 556 bits += hweight_long(p_addr[i]);
557 p_addr[last_word] &= cpu_to_lel(mask); 557 p_addr[last_word] &= cpu_to_lel(mask);
@@ -559,7 +559,7 @@ static unsigned long bm_count_bits(struct drbd_bitmap *b)
559 /* 32bit arch, may have an unused padding long */ 559 /* 32bit arch, may have an unused padding long */
560 if (BITS_PER_LONG == 32 && (last_word & 1) == 0) 560 if (BITS_PER_LONG == 32 && (last_word & 1) == 0)
561 p_addr[last_word+1] = 0; 561 p_addr[last_word+1] = 0;
562 __bm_unmap(p_addr, KM_USER0); 562 __bm_unmap(p_addr);
563 return bits; 563 return bits;
564} 564}
565 565
@@ -970,11 +970,11 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must
970 * to use pre-allocated page pool */ 970 * to use pre-allocated page pool */
971 void *src, *dest; 971 void *src, *dest;
972 page = alloc_page(__GFP_HIGHMEM|__GFP_WAIT); 972 page = alloc_page(__GFP_HIGHMEM|__GFP_WAIT);
973 dest = kmap_atomic(page, KM_USER0); 973 dest = kmap_atomic(page);
974 src = kmap_atomic(b->bm_pages[page_nr], KM_USER1); 974 src = kmap_atomic(b->bm_pages[page_nr]);
975 memcpy(dest, src, PAGE_SIZE); 975 memcpy(dest, src, PAGE_SIZE);
976 kunmap_atomic(src, KM_USER1); 976 kunmap_atomic(src);
977 kunmap_atomic(dest, KM_USER0); 977 kunmap_atomic(dest);
978 bm_store_page_idx(page, page_nr); 978 bm_store_page_idx(page, page_nr);
979 } else 979 } else
980 page = b->bm_pages[page_nr]; 980 page = b->bm_pages[page_nr];
@@ -1163,7 +1163,7 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc
1163 * this returns a bit number, NOT a sector! 1163 * this returns a bit number, NOT a sector!
1164 */ 1164 */
1165static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo, 1165static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo,
1166 const int find_zero_bit, const enum km_type km) 1166 const int find_zero_bit)
1167{ 1167{
1168 struct drbd_bitmap *b = mdev->bitmap; 1168 struct drbd_bitmap *b = mdev->bitmap;
1169 unsigned long *p_addr; 1169 unsigned long *p_addr;
@@ -1178,7 +1178,7 @@ static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo,
1178 while (bm_fo < b->bm_bits) { 1178 while (bm_fo < b->bm_bits) {
1179 /* bit offset of the first bit in the page */ 1179 /* bit offset of the first bit in the page */
1180 bit_offset = bm_fo & ~BITS_PER_PAGE_MASK; 1180 bit_offset = bm_fo & ~BITS_PER_PAGE_MASK;
1181 p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo), km); 1181 p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo));
1182 1182
1183 if (find_zero_bit) 1183 if (find_zero_bit)
1184 i = find_next_zero_bit_le(p_addr, 1184 i = find_next_zero_bit_le(p_addr,
@@ -1187,7 +1187,7 @@ static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo,
1187 i = find_next_bit_le(p_addr, 1187 i = find_next_bit_le(p_addr,
1188 PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK); 1188 PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK);
1189 1189
1190 __bm_unmap(p_addr, km); 1190 __bm_unmap(p_addr);
1191 if (i < PAGE_SIZE*8) { 1191 if (i < PAGE_SIZE*8) {
1192 bm_fo = bit_offset + i; 1192 bm_fo = bit_offset + i;
1193 if (bm_fo >= b->bm_bits) 1193 if (bm_fo >= b->bm_bits)
@@ -1215,7 +1215,7 @@ static unsigned long bm_find_next(struct drbd_conf *mdev,
1215 if (BM_DONT_TEST & b->bm_flags) 1215 if (BM_DONT_TEST & b->bm_flags)
1216 bm_print_lock_info(mdev); 1216 bm_print_lock_info(mdev);
1217 1217
1218 i = __bm_find_next(mdev, bm_fo, find_zero_bit, KM_IRQ1); 1218 i = __bm_find_next(mdev, bm_fo, find_zero_bit);
1219 1219
1220 spin_unlock_irq(&b->bm_lock); 1220 spin_unlock_irq(&b->bm_lock);
1221 return i; 1221 return i;
@@ -1239,13 +1239,13 @@ unsigned long drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo
1239unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo) 1239unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo)
1240{ 1240{
1241 /* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */ 1241 /* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */
1242 return __bm_find_next(mdev, bm_fo, 0, KM_USER1); 1242 return __bm_find_next(mdev, bm_fo, 0);
1243} 1243}
1244 1244
1245unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo) 1245unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo)
1246{ 1246{
1247 /* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */ 1247 /* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */
1248 return __bm_find_next(mdev, bm_fo, 1, KM_USER1); 1248 return __bm_find_next(mdev, bm_fo, 1);
1249} 1249}
1250 1250
1251/* returns number of bits actually changed. 1251/* returns number of bits actually changed.
@@ -1273,14 +1273,14 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
1273 unsigned int page_nr = bm_bit_to_page_idx(b, bitnr); 1273 unsigned int page_nr = bm_bit_to_page_idx(b, bitnr);
1274 if (page_nr != last_page_nr) { 1274 if (page_nr != last_page_nr) {
1275 if (p_addr) 1275 if (p_addr)
1276 __bm_unmap(p_addr, KM_IRQ1); 1276 __bm_unmap(p_addr);
1277 if (c < 0) 1277 if (c < 0)
1278 bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]); 1278 bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]);
1279 else if (c > 0) 1279 else if (c > 0)
1280 bm_set_page_need_writeout(b->bm_pages[last_page_nr]); 1280 bm_set_page_need_writeout(b->bm_pages[last_page_nr]);
1281 changed_total += c; 1281 changed_total += c;
1282 c = 0; 1282 c = 0;
1283 p_addr = __bm_map_pidx(b, page_nr, KM_IRQ1); 1283 p_addr = __bm_map_pidx(b, page_nr);
1284 last_page_nr = page_nr; 1284 last_page_nr = page_nr;
1285 } 1285 }
1286 if (val) 1286 if (val)
@@ -1289,7 +1289,7 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
1289 c -= (0 != __test_and_clear_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr)); 1289 c -= (0 != __test_and_clear_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr));
1290 } 1290 }
1291 if (p_addr) 1291 if (p_addr)
1292 __bm_unmap(p_addr, KM_IRQ1); 1292 __bm_unmap(p_addr);
1293 if (c < 0) 1293 if (c < 0)
1294 bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]); 1294 bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]);
1295 else if (c > 0) 1295 else if (c > 0)
@@ -1342,13 +1342,13 @@ static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b,
1342{ 1342{
1343 int i; 1343 int i;
1344 int bits; 1344 int bits;
1345 unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr], KM_IRQ1); 1345 unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr]);
1346 for (i = first_word; i < last_word; i++) { 1346 for (i = first_word; i < last_word; i++) {
1347 bits = hweight_long(paddr[i]); 1347 bits = hweight_long(paddr[i]);
1348 paddr[i] = ~0UL; 1348 paddr[i] = ~0UL;
1349 b->bm_set += BITS_PER_LONG - bits; 1349 b->bm_set += BITS_PER_LONG - bits;
1350 } 1350 }
1351 kunmap_atomic(paddr, KM_IRQ1); 1351 kunmap_atomic(paddr);
1352} 1352}
1353 1353
1354/* Same thing as drbd_bm_set_bits, 1354/* Same thing as drbd_bm_set_bits,
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 9cf20355ceec..8d680562ba73 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -59,8 +59,8 @@
59 59
60/* module parameter, defined in drbd_main.c */ 60/* module parameter, defined in drbd_main.c */
61extern unsigned int minor_count; 61extern unsigned int minor_count;
62extern int disable_sendpage; 62extern bool disable_sendpage;
63extern int allow_oos; 63extern bool allow_oos;
64extern unsigned int cn_idx; 64extern unsigned int cn_idx;
65 65
66#ifdef CONFIG_DRBD_FAULT_INJECTION 66#ifdef CONFIG_DRBD_FAULT_INJECTION
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 0358e55356c8..211fc44f84be 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -117,8 +117,8 @@ module_param(fault_devs, int, 0644);
117 117
118/* module parameter, defined */ 118/* module parameter, defined */
119unsigned int minor_count = DRBD_MINOR_COUNT_DEF; 119unsigned int minor_count = DRBD_MINOR_COUNT_DEF;
120int disable_sendpage; 120bool disable_sendpage;
121int allow_oos; 121bool allow_oos;
122unsigned int cn_idx = CN_IDX_DRBD; 122unsigned int cn_idx = CN_IDX_DRBD;
123int proc_details; /* Detail level in proc drbd*/ 123int proc_details; /* Detail level in proc drbd*/
124 124
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index af2a25049bce..abfaacaaf346 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -179,7 +179,7 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd)
179 dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb); 179 dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
180 180
181 drbd_bcast_ev_helper(mdev, cmd); 181 drbd_bcast_ev_helper(mdev, cmd);
182 ret = call_usermodehelper(usermode_helper, argv, envp, 1); 182 ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
183 if (ret) 183 if (ret)
184 dev_warn(DEV, "helper command: %s %s %s exit code %u (0x%x)\n", 184 dev_warn(DEV, "helper command: %s %s %s exit code %u (0x%x)\n",
185 usermode_helper, cmd, mb, 185 usermode_helper, cmd, mb,
@@ -2526,10 +2526,10 @@ void drbd_bcast_ee(struct drbd_conf *mdev,
2526 2526
2527 page = e->pages; 2527 page = e->pages;
2528 page_chain_for_each(page) { 2528 page_chain_for_each(page) {
2529 void *d = kmap_atomic(page, KM_USER0); 2529 void *d = kmap_atomic(page);
2530 unsigned l = min_t(unsigned, len, PAGE_SIZE); 2530 unsigned l = min_t(unsigned, len, PAGE_SIZE);
2531 memcpy(tl, d, l); 2531 memcpy(tl, d, l);
2532 kunmap_atomic(d, KM_USER0); 2532 kunmap_atomic(d);
2533 tl = (unsigned short*)((char*)tl + l); 2533 tl = (unsigned short*)((char*)tl + l);
2534 len -= l; 2534 len -= l;
2535 if (len == 0) 2535 if (len == 0)
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 510fb10ec45a..b0b00d70c166 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -202,7 +202,6 @@ static int slow_floppy;
202 202
203#include <asm/dma.h> 203#include <asm/dma.h>
204#include <asm/irq.h> 204#include <asm/irq.h>
205#include <asm/system.h>
206 205
207static int FLOPPY_IRQ = 6; 206static int FLOPPY_IRQ = 6;
208static int FLOPPY_DMA = 2; 207static int FLOPPY_DMA = 2;
@@ -1031,37 +1030,6 @@ static int fd_wait_for_completion(unsigned long delay, timeout_fn function)
1031 return 0; 1030 return 0;
1032} 1031}
1033 1032
1034static DEFINE_SPINLOCK(floppy_hlt_lock);
1035static int hlt_disabled;
1036static void floppy_disable_hlt(void)
1037{
1038 unsigned long flags;
1039
1040 WARN_ONCE(1, "floppy_disable_hlt() scheduled for removal in 2012");
1041 spin_lock_irqsave(&floppy_hlt_lock, flags);
1042 if (!hlt_disabled) {
1043 hlt_disabled = 1;
1044#ifdef HAVE_DISABLE_HLT
1045 disable_hlt();
1046#endif
1047 }
1048 spin_unlock_irqrestore(&floppy_hlt_lock, flags);
1049}
1050
1051static void floppy_enable_hlt(void)
1052{
1053 unsigned long flags;
1054
1055 spin_lock_irqsave(&floppy_hlt_lock, flags);
1056 if (hlt_disabled) {
1057 hlt_disabled = 0;
1058#ifdef HAVE_DISABLE_HLT
1059 enable_hlt();
1060#endif
1061 }
1062 spin_unlock_irqrestore(&floppy_hlt_lock, flags);
1063}
1064
1065static void setup_DMA(void) 1033static void setup_DMA(void)
1066{ 1034{
1067 unsigned long f; 1035 unsigned long f;
@@ -1106,7 +1074,6 @@ static void setup_DMA(void)
1106 fd_enable_dma(); 1074 fd_enable_dma();
1107 release_dma_lock(f); 1075 release_dma_lock(f);
1108#endif 1076#endif
1109 floppy_disable_hlt();
1110} 1077}
1111 1078
1112static void show_floppy(void); 1079static void show_floppy(void);
@@ -1708,7 +1675,6 @@ irqreturn_t floppy_interrupt(int irq, void *dev_id)
1708 fd_disable_dma(); 1675 fd_disable_dma();
1709 release_dma_lock(f); 1676 release_dma_lock(f);
1710 1677
1711 floppy_enable_hlt();
1712 do_floppy = NULL; 1678 do_floppy = NULL;
1713 if (fdc >= N_FDC || FDCS->address == -1) { 1679 if (fdc >= N_FDC || FDCS->address == -1) {
1714 /* we don't even know which FDC is the culprit */ 1680 /* we don't even know which FDC is the culprit */
@@ -1857,8 +1823,6 @@ static void floppy_shutdown(unsigned long data)
1857 show_floppy(); 1823 show_floppy();
1858 cancel_activity(); 1824 cancel_activity();
1859 1825
1860 floppy_enable_hlt();
1861
1862 flags = claim_dma_lock(); 1826 flags = claim_dma_lock();
1863 fd_disable_dma(); 1827 fd_disable_dma();
1864 release_dma_lock(flags); 1828 release_dma_lock(flags);
@@ -3832,7 +3796,7 @@ static int __floppy_read_block_0(struct block_device *bdev)
3832 bio.bi_size = size; 3796 bio.bi_size = size;
3833 bio.bi_bdev = bdev; 3797 bio.bi_bdev = bdev;
3834 bio.bi_sector = 0; 3798 bio.bi_sector = 0;
3835 bio.bi_flags = BIO_QUIET; 3799 bio.bi_flags = (1 << BIO_QUIET);
3836 init_completion(&complete); 3800 init_completion(&complete);
3837 bio.bi_private = &complete; 3801 bio.bi_private = &complete;
3838 bio.bi_end_io = floppy_rb0_complete; 3802 bio.bi_end_io = floppy_rb0_complete;
@@ -4368,8 +4332,14 @@ out_unreg_blkdev:
4368out_put_disk: 4332out_put_disk:
4369 while (dr--) { 4333 while (dr--) {
4370 del_timer_sync(&motor_off_timer[dr]); 4334 del_timer_sync(&motor_off_timer[dr]);
4371 if (disks[dr]->queue) 4335 if (disks[dr]->queue) {
4372 blk_cleanup_queue(disks[dr]->queue); 4336 blk_cleanup_queue(disks[dr]->queue);
4337 /*
4338 * put_disk() is not paired with add_disk() and
4339 * will put queue reference one extra time. fix it.
4340 */
4341 disks[dr]->queue = NULL;
4342 }
4373 put_disk(disks[dr]); 4343 put_disk(disks[dr]);
4374 } 4344 }
4375 return err; 4345 return err;
@@ -4503,7 +4473,6 @@ static void floppy_release_irq_and_dma(void)
4503#if N_FDC > 1 4473#if N_FDC > 1
4504 set_dor(1, ~8, 0); 4474 set_dor(1, ~8, 0);
4505#endif 4475#endif
4506 floppy_enable_hlt();
4507 4476
4508 if (floppy_track_buffer && max_buffer_sectors) { 4477 if (floppy_track_buffer && max_buffer_sectors) {
4509 tmpsize = max_buffer_sectors * 1024; 4478 tmpsize = max_buffer_sectors * 1024;
@@ -4579,6 +4548,15 @@ static void __exit floppy_module_exit(void)
4579 platform_device_unregister(&floppy_device[drive]); 4548 platform_device_unregister(&floppy_device[drive]);
4580 } 4549 }
4581 blk_cleanup_queue(disks[drive]->queue); 4550 blk_cleanup_queue(disks[drive]->queue);
4551
4552 /*
4553 * These disks have not called add_disk(). Don't put down
4554 * queue reference in put_disk().
4555 */
4556 if (!(allowed_drive_mask & (1 << drive)) ||
4557 fdc_state[FDC(drive)].version == FDC_NONE)
4558 disks[drive]->queue = NULL;
4559
4582 put_disk(disks[drive]); 4560 put_disk(disks[drive]);
4583 } 4561 }
4584 4562
diff --git a/drivers/block/hd.c b/drivers/block/hd.c
index b52c9ca146fc..bf397bf108b7 100644
--- a/drivers/block/hd.c
+++ b/drivers/block/hd.c
@@ -44,7 +44,6 @@
44#define HD_IRQ 14 44#define HD_IRQ 14
45 45
46#define REALLY_SLOW_IO 46#define REALLY_SLOW_IO
47#include <asm/system.h>
48#include <asm/io.h> 47#include <asm/io.h>
49#include <asm/uaccess.h> 48#include <asm/uaccess.h>
50 49
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index f00257782fcc..bbca966f8f66 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -93,16 +93,16 @@ static int transfer_none(struct loop_device *lo, int cmd,
93 struct page *loop_page, unsigned loop_off, 93 struct page *loop_page, unsigned loop_off,
94 int size, sector_t real_block) 94 int size, sector_t real_block)
95{ 95{
96 char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off; 96 char *raw_buf = kmap_atomic(raw_page) + raw_off;
97 char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off; 97 char *loop_buf = kmap_atomic(loop_page) + loop_off;
98 98
99 if (cmd == READ) 99 if (cmd == READ)
100 memcpy(loop_buf, raw_buf, size); 100 memcpy(loop_buf, raw_buf, size);
101 else 101 else
102 memcpy(raw_buf, loop_buf, size); 102 memcpy(raw_buf, loop_buf, size);
103 103
104 kunmap_atomic(loop_buf, KM_USER1); 104 kunmap_atomic(loop_buf);
105 kunmap_atomic(raw_buf, KM_USER0); 105 kunmap_atomic(raw_buf);
106 cond_resched(); 106 cond_resched();
107 return 0; 107 return 0;
108} 108}
@@ -112,8 +112,8 @@ static int transfer_xor(struct loop_device *lo, int cmd,
112 struct page *loop_page, unsigned loop_off, 112 struct page *loop_page, unsigned loop_off,
113 int size, sector_t real_block) 113 int size, sector_t real_block)
114{ 114{
115 char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off; 115 char *raw_buf = kmap_atomic(raw_page) + raw_off;
116 char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off; 116 char *loop_buf = kmap_atomic(loop_page) + loop_off;
117 char *in, *out, *key; 117 char *in, *out, *key;
118 int i, keysize; 118 int i, keysize;
119 119
@@ -130,8 +130,8 @@ static int transfer_xor(struct loop_device *lo, int cmd,
130 for (i = 0; i < size; i++) 130 for (i = 0; i < size; i++)
131 *out++ = *in++ ^ key[(i & 511) % keysize]; 131 *out++ = *in++ ^ key[(i & 511) % keysize];
132 132
133 kunmap_atomic(loop_buf, KM_USER1); 133 kunmap_atomic(loop_buf);
134 kunmap_atomic(raw_buf, KM_USER0); 134 kunmap_atomic(raw_buf);
135 cond_resched(); 135 cond_resched();
136 return 0; 136 return 0;
137} 137}
@@ -356,14 +356,14 @@ lo_direct_splice_actor(struct pipe_inode_info *pipe, struct splice_desc *sd)
356 return __splice_from_pipe(pipe, sd, lo_splice_actor); 356 return __splice_from_pipe(pipe, sd, lo_splice_actor);
357} 357}
358 358
359static int 359static ssize_t
360do_lo_receive(struct loop_device *lo, 360do_lo_receive(struct loop_device *lo,
361 struct bio_vec *bvec, int bsize, loff_t pos) 361 struct bio_vec *bvec, int bsize, loff_t pos)
362{ 362{
363 struct lo_read_data cookie; 363 struct lo_read_data cookie;
364 struct splice_desc sd; 364 struct splice_desc sd;
365 struct file *file; 365 struct file *file;
366 long retval; 366 ssize_t retval;
367 367
368 cookie.lo = lo; 368 cookie.lo = lo;
369 cookie.page = bvec->bv_page; 369 cookie.page = bvec->bv_page;
@@ -379,26 +379,28 @@ do_lo_receive(struct loop_device *lo,
379 file = lo->lo_backing_file; 379 file = lo->lo_backing_file;
380 retval = splice_direct_to_actor(file, &sd, lo_direct_splice_actor); 380 retval = splice_direct_to_actor(file, &sd, lo_direct_splice_actor);
381 381
382 if (retval < 0) 382 return retval;
383 return retval;
384 if (retval != bvec->bv_len)
385 return -EIO;
386 return 0;
387} 383}
388 384
389static int 385static int
390lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos) 386lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
391{ 387{
392 struct bio_vec *bvec; 388 struct bio_vec *bvec;
393 int i, ret = 0; 389 ssize_t s;
390 int i;
394 391
395 bio_for_each_segment(bvec, bio, i) { 392 bio_for_each_segment(bvec, bio, i) {
396 ret = do_lo_receive(lo, bvec, bsize, pos); 393 s = do_lo_receive(lo, bvec, bsize, pos);
397 if (ret < 0) 394 if (s < 0)
395 return s;
396
397 if (s != bvec->bv_len) {
398 zero_fill_bio(bio);
398 break; 399 break;
400 }
399 pos += bvec->bv_len; 401 pos += bvec->bv_len;
400 } 402 }
401 return ret; 403 return 0;
402} 404}
403 405
404static int do_bio_filebacked(struct loop_device *lo, struct bio *bio) 406static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
diff --git a/drivers/block/mtip32xx/Kconfig b/drivers/block/mtip32xx/Kconfig
new file mode 100644
index 000000000000..b5dd14e072f2
--- /dev/null
+++ b/drivers/block/mtip32xx/Kconfig
@@ -0,0 +1,9 @@
1#
2# mtip32xx device driver configuration
3#
4
5config BLK_DEV_PCIESSD_MTIP32XX
6 tristate "Block Device Driver for Micron PCIe SSDs"
7 depends on HOTPLUG_PCI_PCIE
8 help
9 This enables the block driver for Micron PCIe SSDs.
diff --git a/drivers/block/mtip32xx/Makefile b/drivers/block/mtip32xx/Makefile
new file mode 100644
index 000000000000..4fbef8c8329b
--- /dev/null
+++ b/drivers/block/mtip32xx/Makefile
@@ -0,0 +1,5 @@
1#
2# Makefile for Block device driver for Micron PCIe SSD
3#
4
5obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx.o
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
new file mode 100644
index 000000000000..8eb81c96608f
--- /dev/null
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -0,0 +1,3650 @@
1/*
2 * Driver for the Micron P320 SSD
3 * Copyright (C) 2011 Micron Technology, Inc.
4 *
5 * Portions of this code were derived from works subjected to the
6 * following copyright:
7 * Copyright (C) 2009 Integrated Device Technology, Inc.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 */
20
21#include <linux/pci.h>
22#include <linux/interrupt.h>
23#include <linux/ata.h>
24#include <linux/delay.h>
25#include <linux/hdreg.h>
26#include <linux/uaccess.h>
27#include <linux/random.h>
28#include <linux/smp.h>
29#include <linux/compat.h>
30#include <linux/fs.h>
31#include <linux/module.h>
32#include <linux/genhd.h>
33#include <linux/blkdev.h>
34#include <linux/bio.h>
35#include <linux/dma-mapping.h>
36#include <linux/idr.h>
37#include <linux/kthread.h>
38#include <../drivers/ata/ahci.h>
39#include "mtip32xx.h"
40
41#define HW_CMD_SLOT_SZ (MTIP_MAX_COMMAND_SLOTS * 32)
42#define HW_CMD_TBL_SZ (AHCI_CMD_TBL_HDR_SZ + (MTIP_MAX_SG * 16))
43#define HW_CMD_TBL_AR_SZ (HW_CMD_TBL_SZ * MTIP_MAX_COMMAND_SLOTS)
44#define HW_PORT_PRIV_DMA_SZ \
45 (HW_CMD_SLOT_SZ + HW_CMD_TBL_AR_SZ + AHCI_RX_FIS_SZ)
46
47#define HOST_HSORG 0xFC
48#define HSORG_DISABLE_SLOTGRP_INTR (1<<24)
49#define HSORG_DISABLE_SLOTGRP_PXIS (1<<16)
50#define HSORG_HWREV 0xFF00
51#define HSORG_STYLE 0x8
52#define HSORG_SLOTGROUPS 0x7
53
54#define PORT_COMMAND_ISSUE 0x38
55#define PORT_SDBV 0x7C
56
57#define PORT_OFFSET 0x100
58#define PORT_MEM_SIZE 0x80
59
60#define PORT_IRQ_ERR \
61 (PORT_IRQ_HBUS_ERR | PORT_IRQ_IF_ERR | PORT_IRQ_CONNECT | \
62 PORT_IRQ_PHYRDY | PORT_IRQ_UNK_FIS | PORT_IRQ_BAD_PMP | \
63 PORT_IRQ_TF_ERR | PORT_IRQ_HBUS_DATA_ERR | PORT_IRQ_IF_NONFATAL | \
64 PORT_IRQ_OVERFLOW)
65#define PORT_IRQ_LEGACY \
66 (PORT_IRQ_PIOS_FIS | PORT_IRQ_D2H_REG_FIS)
67#define PORT_IRQ_HANDLED \
68 (PORT_IRQ_SDB_FIS | PORT_IRQ_LEGACY | \
69 PORT_IRQ_TF_ERR | PORT_IRQ_IF_ERR | \
70 PORT_IRQ_CONNECT | PORT_IRQ_PHYRDY)
71#define DEF_PORT_IRQ \
72 (PORT_IRQ_ERR | PORT_IRQ_LEGACY | PORT_IRQ_SDB_FIS)
73
74/* product numbers */
75#define MTIP_PRODUCT_UNKNOWN 0x00
76#define MTIP_PRODUCT_ASICFPGA 0x11
77
78/* Device instance number, incremented each time a device is probed. */
79static int instance;
80
81/*
82 * Global variable used to hold the major block device number
83 * allocated in mtip_init().
84 */
85static int mtip_major;
86
87static DEFINE_SPINLOCK(rssd_index_lock);
88static DEFINE_IDA(rssd_index_ida);
89
90static int mtip_block_initialize(struct driver_data *dd);
91
92#ifdef CONFIG_COMPAT
93struct mtip_compat_ide_task_request_s {
94 __u8 io_ports[8];
95 __u8 hob_ports[8];
96 ide_reg_valid_t out_flags;
97 ide_reg_valid_t in_flags;
98 int data_phase;
99 int req_cmd;
100 compat_ulong_t out_size;
101 compat_ulong_t in_size;
102};
103#endif
104
105/*
106 * This function check_for_surprise_removal is called
107 * while card is removed from the system and it will
108 * read the vendor id from the configration space
109 *
110 * @pdev Pointer to the pci_dev structure.
111 *
112 * return value
113 * true if device removed, else false
114 */
115static bool mtip_check_surprise_removal(struct pci_dev *pdev)
116{
117 u16 vendor_id = 0;
118
119 /* Read the vendorID from the configuration space */
120 pci_read_config_word(pdev, 0x00, &vendor_id);
121 if (vendor_id == 0xFFFF)
122 return true; /* device removed */
123
124 return false; /* device present */
125}
126
127/*
128 * This function is called for clean the pending command in the
129 * command slot during the surprise removal of device and return
130 * error to the upper layer.
131 *
132 * @dd Pointer to the DRIVER_DATA structure.
133 *
134 * return value
135 * None
136 */
137static void mtip_command_cleanup(struct driver_data *dd)
138{
139 int group = 0, commandslot = 0, commandindex = 0;
140 struct mtip_cmd *command;
141 struct mtip_port *port = dd->port;
142
143 for (group = 0; group < 4; group++) {
144 for (commandslot = 0; commandslot < 32; commandslot++) {
145 if (!(port->allocated[group] & (1 << commandslot)))
146 continue;
147
148 commandindex = group << 5 | commandslot;
149 command = &port->commands[commandindex];
150
151 if (atomic_read(&command->active)
152 && (command->async_callback)) {
153 command->async_callback(command->async_data,
154 -ENODEV);
155 command->async_callback = NULL;
156 command->async_data = NULL;
157 }
158
159 dma_unmap_sg(&port->dd->pdev->dev,
160 command->sg,
161 command->scatter_ents,
162 command->direction);
163 }
164 }
165
166 up(&port->cmd_slot);
167
168 atomic_set(&dd->drv_cleanup_done, true);
169}
170
171/*
172 * Obtain an empty command slot.
173 *
174 * This function needs to be reentrant since it could be called
175 * at the same time on multiple CPUs. The allocation of the
176 * command slot must be atomic.
177 *
178 * @port Pointer to the port data structure.
179 *
180 * return value
181 * >= 0 Index of command slot obtained.
182 * -1 No command slots available.
183 */
184static int get_slot(struct mtip_port *port)
185{
186 int slot, i;
187 unsigned int num_command_slots = port->dd->slot_groups * 32;
188
189 /*
190 * Try 10 times, because there is a small race here.
191 * that's ok, because it's still cheaper than a lock.
192 *
193 * Race: Since this section is not protected by lock, same bit
194 * could be chosen by different process contexts running in
195 * different processor. So instead of costly lock, we are going
196 * with loop.
197 */
198 for (i = 0; i < 10; i++) {
199 slot = find_next_zero_bit(port->allocated,
200 num_command_slots, 1);
201 if ((slot < num_command_slots) &&
202 (!test_and_set_bit(slot, port->allocated)))
203 return slot;
204 }
205 dev_warn(&port->dd->pdev->dev, "Failed to get a tag.\n");
206
207 if (mtip_check_surprise_removal(port->dd->pdev)) {
208 /* Device not present, clean outstanding commands */
209 mtip_command_cleanup(port->dd);
210 }
211 return -1;
212}
213
214/*
215 * Release a command slot.
216 *
217 * @port Pointer to the port data structure.
218 * @tag Tag of command to release
219 *
220 * return value
221 * None
222 */
223static inline void release_slot(struct mtip_port *port, int tag)
224{
225 smp_mb__before_clear_bit();
226 clear_bit(tag, port->allocated);
227 smp_mb__after_clear_bit();
228}
229
230/*
231 * Reset the HBA (without sleeping)
232 *
233 * Just like hba_reset, except does not call sleep, so can be
234 * run from interrupt/tasklet context.
235 *
236 * @dd Pointer to the driver data structure.
237 *
238 * return value
239 * 0 The reset was successful.
240 * -1 The HBA Reset bit did not clear.
241 */
242static int hba_reset_nosleep(struct driver_data *dd)
243{
244 unsigned long timeout;
245
246 /* Chip quirk: quiesce any chip function */
247 mdelay(10);
248
249 /* Set the reset bit */
250 writel(HOST_RESET, dd->mmio + HOST_CTL);
251
252 /* Flush */
253 readl(dd->mmio + HOST_CTL);
254
255 /*
256 * Wait 10ms then spin for up to 1 second
257 * waiting for reset acknowledgement
258 */
259 timeout = jiffies + msecs_to_jiffies(1000);
260 mdelay(10);
261 while ((readl(dd->mmio + HOST_CTL) & HOST_RESET)
262 && time_before(jiffies, timeout))
263 mdelay(1);
264
265 if (readl(dd->mmio + HOST_CTL) & HOST_RESET)
266 return -1;
267
268 return 0;
269}
270
271/*
272 * Issue a command to the hardware.
273 *
274 * Set the appropriate bit in the s_active and Command Issue hardware
275 * registers, causing hardware command processing to begin.
276 *
277 * @port Pointer to the port structure.
278 * @tag The tag of the command to be issued.
279 *
280 * return value
281 * None
282 */
283static inline void mtip_issue_ncq_command(struct mtip_port *port, int tag)
284{
285 unsigned long flags = 0;
286
287 atomic_set(&port->commands[tag].active, 1);
288
289 spin_lock_irqsave(&port->cmd_issue_lock, flags);
290
291 writel((1 << MTIP_TAG_BIT(tag)),
292 port->s_active[MTIP_TAG_INDEX(tag)]);
293 writel((1 << MTIP_TAG_BIT(tag)),
294 port->cmd_issue[MTIP_TAG_INDEX(tag)]);
295
296 spin_unlock_irqrestore(&port->cmd_issue_lock, flags);
297}
298
299/*
300 * Enable/disable the reception of FIS
301 *
302 * @port Pointer to the port data structure
303 * @enable 1 to enable, 0 to disable
304 *
305 * return value
306 * Previous state: 1 enabled, 0 disabled
307 */
308static int mtip_enable_fis(struct mtip_port *port, int enable)
309{
310 u32 tmp;
311
312 /* enable FIS reception */
313 tmp = readl(port->mmio + PORT_CMD);
314 if (enable)
315 writel(tmp | PORT_CMD_FIS_RX, port->mmio + PORT_CMD);
316 else
317 writel(tmp & ~PORT_CMD_FIS_RX, port->mmio + PORT_CMD);
318
319 /* Flush */
320 readl(port->mmio + PORT_CMD);
321
322 return (((tmp & PORT_CMD_FIS_RX) == PORT_CMD_FIS_RX));
323}
324
325/*
326 * Enable/disable the DMA engine
327 *
328 * @port Pointer to the port data structure
329 * @enable 1 to enable, 0 to disable
330 *
331 * return value
332 * Previous state: 1 enabled, 0 disabled.
333 */
334static int mtip_enable_engine(struct mtip_port *port, int enable)
335{
336 u32 tmp;
337
338 /* enable FIS reception */
339 tmp = readl(port->mmio + PORT_CMD);
340 if (enable)
341 writel(tmp | PORT_CMD_START, port->mmio + PORT_CMD);
342 else
343 writel(tmp & ~PORT_CMD_START, port->mmio + PORT_CMD);
344
345 readl(port->mmio + PORT_CMD);
346 return (((tmp & PORT_CMD_START) == PORT_CMD_START));
347}
348
349/*
350 * Enables the port DMA engine and FIS reception.
351 *
352 * return value
353 * None
354 */
355static inline void mtip_start_port(struct mtip_port *port)
356{
357 /* Enable FIS reception */
358 mtip_enable_fis(port, 1);
359
360 /* Enable the DMA engine */
361 mtip_enable_engine(port, 1);
362}
363
364/*
365 * Deinitialize a port by disabling port interrupts, the DMA engine,
366 * and FIS reception.
367 *
368 * @port Pointer to the port structure
369 *
370 * return value
371 * None
372 */
373static inline void mtip_deinit_port(struct mtip_port *port)
374{
375 /* Disable interrupts on this port */
376 writel(0, port->mmio + PORT_IRQ_MASK);
377
378 /* Disable the DMA engine */
379 mtip_enable_engine(port, 0);
380
381 /* Disable FIS reception */
382 mtip_enable_fis(port, 0);
383}
384
385/*
386 * Initialize a port.
387 *
388 * This function deinitializes the port by calling mtip_deinit_port() and
389 * then initializes it by setting the command header and RX FIS addresses,
390 * clearing the SError register and any pending port interrupts before
391 * re-enabling the default set of port interrupts.
392 *
393 * @port Pointer to the port structure.
394 *
395 * return value
396 * None
397 */
398static void mtip_init_port(struct mtip_port *port)
399{
400 int i;
401 mtip_deinit_port(port);
402
403 /* Program the command list base and FIS base addresses */
404 if (readl(port->dd->mmio + HOST_CAP) & HOST_CAP_64) {
405 writel((port->command_list_dma >> 16) >> 16,
406 port->mmio + PORT_LST_ADDR_HI);
407 writel((port->rxfis_dma >> 16) >> 16,
408 port->mmio + PORT_FIS_ADDR_HI);
409 }
410
411 writel(port->command_list_dma & 0xFFFFFFFF,
412 port->mmio + PORT_LST_ADDR);
413 writel(port->rxfis_dma & 0xFFFFFFFF, port->mmio + PORT_FIS_ADDR);
414
415 /* Clear SError */
416 writel(readl(port->mmio + PORT_SCR_ERR), port->mmio + PORT_SCR_ERR);
417
418 /* reset the completed registers.*/
419 for (i = 0; i < port->dd->slot_groups; i++)
420 writel(0xFFFFFFFF, port->completed[i]);
421
422 /* Clear any pending interrupts for this port */
423 writel(readl(port->mmio + PORT_IRQ_STAT), port->mmio + PORT_IRQ_STAT);
424
425 /* Enable port interrupts */
426 writel(DEF_PORT_IRQ, port->mmio + PORT_IRQ_MASK);
427}
428
429/*
430 * Restart a port
431 *
432 * @port Pointer to the port data structure.
433 *
434 * return value
435 * None
436 */
437static void mtip_restart_port(struct mtip_port *port)
438{
439 unsigned long timeout;
440
441 /* Disable the DMA engine */
442 mtip_enable_engine(port, 0);
443
444 /* Chip quirk: wait up to 500ms for PxCMD.CR == 0 */
445 timeout = jiffies + msecs_to_jiffies(500);
446 while ((readl(port->mmio + PORT_CMD) & PORT_CMD_LIST_ON)
447 && time_before(jiffies, timeout))
448 ;
449
450 /*
451 * Chip quirk: escalate to hba reset if
452 * PxCMD.CR not clear after 500 ms
453 */
454 if (readl(port->mmio + PORT_CMD) & PORT_CMD_LIST_ON) {
455 dev_warn(&port->dd->pdev->dev,
456 "PxCMD.CR not clear, escalating reset\n");
457
458 if (hba_reset_nosleep(port->dd))
459 dev_err(&port->dd->pdev->dev,
460 "HBA reset escalation failed.\n");
461
462 /* 30 ms delay before com reset to quiesce chip */
463 mdelay(30);
464 }
465
466 dev_warn(&port->dd->pdev->dev, "Issuing COM reset\n");
467
468 /* Set PxSCTL.DET */
469 writel(readl(port->mmio + PORT_SCR_CTL) |
470 1, port->mmio + PORT_SCR_CTL);
471 readl(port->mmio + PORT_SCR_CTL);
472
473 /* Wait 1 ms to quiesce chip function */
474 timeout = jiffies + msecs_to_jiffies(1);
475 while (time_before(jiffies, timeout))
476 ;
477
478 /* Clear PxSCTL.DET */
479 writel(readl(port->mmio + PORT_SCR_CTL) & ~1,
480 port->mmio + PORT_SCR_CTL);
481 readl(port->mmio + PORT_SCR_CTL);
482
483 /* Wait 500 ms for bit 0 of PORT_SCR_STS to be set */
484 timeout = jiffies + msecs_to_jiffies(500);
485 while (((readl(port->mmio + PORT_SCR_STAT) & 0x01) == 0)
486 && time_before(jiffies, timeout))
487 ;
488
489 if ((readl(port->mmio + PORT_SCR_STAT) & 0x01) == 0)
490 dev_warn(&port->dd->pdev->dev,
491 "COM reset failed\n");
492
493 /* Clear SError, the PxSERR.DIAG.x should be set so clear it */
494 writel(readl(port->mmio + PORT_SCR_ERR), port->mmio + PORT_SCR_ERR);
495
496 /* Enable the DMA engine */
497 mtip_enable_engine(port, 1);
498}
499
500/*
501 * Called periodically to see if any read/write commands are
502 * taking too long to complete.
503 *
504 * @data Pointer to the PORT data structure.
505 *
506 * return value
507 * None
508 */
509static void mtip_timeout_function(unsigned long int data)
510{
511 struct mtip_port *port = (struct mtip_port *) data;
512 struct host_to_dev_fis *fis;
513 struct mtip_cmd *command;
514 int tag, cmdto_cnt = 0;
515 unsigned int bit, group;
516 unsigned int num_command_slots = port->dd->slot_groups * 32;
517
518 if (unlikely(!port))
519 return;
520
521 if (atomic_read(&port->dd->resumeflag) == true) {
522 mod_timer(&port->cmd_timer,
523 jiffies + msecs_to_jiffies(30000));
524 return;
525 }
526
527 for (tag = 0; tag < num_command_slots; tag++) {
528 /*
529 * Skip internal command slot as it has
530 * its own timeout mechanism
531 */
532 if (tag == MTIP_TAG_INTERNAL)
533 continue;
534
535 if (atomic_read(&port->commands[tag].active) &&
536 (time_after(jiffies, port->commands[tag].comp_time))) {
537 group = tag >> 5;
538 bit = tag & 0x1F;
539
540 command = &port->commands[tag];
541 fis = (struct host_to_dev_fis *) command->command;
542
543 dev_warn(&port->dd->pdev->dev,
544 "Timeout for command tag %d\n", tag);
545
546 cmdto_cnt++;
547 if (cmdto_cnt == 1)
548 set_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags);
549
550 /*
551 * Clear the completed bit. This should prevent
552 * any interrupt handlers from trying to retire
553 * the command.
554 */
555 writel(1 << bit, port->completed[group]);
556
557 /* Call the async completion callback. */
558 if (likely(command->async_callback))
559 command->async_callback(command->async_data,
560 -EIO);
561 command->async_callback = NULL;
562 command->comp_func = NULL;
563
564 /* Unmap the DMA scatter list entries */
565 dma_unmap_sg(&port->dd->pdev->dev,
566 command->sg,
567 command->scatter_ents,
568 command->direction);
569
570 /*
571 * Clear the allocated bit and active tag for the
572 * command.
573 */
574 atomic_set(&port->commands[tag].active, 0);
575 release_slot(port, tag);
576
577 up(&port->cmd_slot);
578 }
579 }
580
581 if (cmdto_cnt) {
582 dev_warn(&port->dd->pdev->dev,
583 "%d commands timed out: restarting port",
584 cmdto_cnt);
585 mtip_restart_port(port);
586 clear_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags);
587 wake_up_interruptible(&port->svc_wait);
588 }
589
590 /* Restart the timer */
591 mod_timer(&port->cmd_timer,
592 jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD));
593}
594
595/*
596 * IO completion function.
597 *
598 * This completion function is called by the driver ISR when a
599 * command that was issued by the kernel completes. It first calls the
600 * asynchronous completion function which normally calls back into the block
601 * layer passing the asynchronous callback data, then unmaps the
602 * scatter list associated with the completed command, and finally
603 * clears the allocated bit associated with the completed command.
604 *
605 * @port Pointer to the port data structure.
606 * @tag Tag of the command.
607 * @data Pointer to driver_data.
608 * @status Completion status.
609 *
610 * return value
611 * None
612 */
613static void mtip_async_complete(struct mtip_port *port,
614 int tag,
615 void *data,
616 int status)
617{
618 struct mtip_cmd *command;
619 struct driver_data *dd = data;
620 int cb_status = status ? -EIO : 0;
621
622 if (unlikely(!dd) || unlikely(!port))
623 return;
624
625 command = &port->commands[tag];
626
627 if (unlikely(status == PORT_IRQ_TF_ERR)) {
628 dev_warn(&port->dd->pdev->dev,
629 "Command tag %d failed due to TFE\n", tag);
630 }
631
632 /* Upper layer callback */
633 if (likely(command->async_callback))
634 command->async_callback(command->async_data, cb_status);
635
636 command->async_callback = NULL;
637 command->comp_func = NULL;
638
639 /* Unmap the DMA scatter list entries */
640 dma_unmap_sg(&dd->pdev->dev,
641 command->sg,
642 command->scatter_ents,
643 command->direction);
644
645 /* Clear the allocated and active bits for the command */
646 atomic_set(&port->commands[tag].active, 0);
647 release_slot(port, tag);
648
649 up(&port->cmd_slot);
650}
651
652/*
653 * Internal command completion callback function.
654 *
655 * This function is normally called by the driver ISR when an internal
656 * command completed. This function signals the command completion by
657 * calling complete().
658 *
659 * @port Pointer to the port data structure.
660 * @tag Tag of the command that has completed.
661 * @data Pointer to a completion structure.
662 * @status Completion status.
663 *
664 * return value
665 * None
666 */
667static void mtip_completion(struct mtip_port *port,
668 int tag,
669 void *data,
670 int status)
671{
672 struct mtip_cmd *command = &port->commands[tag];
673 struct completion *waiting = data;
674 if (unlikely(status == PORT_IRQ_TF_ERR))
675 dev_warn(&port->dd->pdev->dev,
676 "Internal command %d completed with TFE\n", tag);
677
678 command->async_callback = NULL;
679 command->comp_func = NULL;
680
681 complete(waiting);
682}
683
684/*
685 * Helper function for tag logging
686 */
687static void print_tags(struct driver_data *dd,
688 char *msg,
689 unsigned long *tagbits)
690{
691 unsigned int tag, count = 0;
692
693 for (tag = 0; tag < (dd->slot_groups) * 32; tag++) {
694 if (test_bit(tag, tagbits))
695 count++;
696 }
697 if (count)
698 dev_info(&dd->pdev->dev, "%s [%i tags]\n", msg, count);
699}
700
701/*
702 * Handle an error.
703 *
704 * @dd Pointer to the DRIVER_DATA structure.
705 *
706 * return value
707 * None
708 */
709static void mtip_handle_tfe(struct driver_data *dd)
710{
711 int group, tag, bit, reissue;
712 struct mtip_port *port;
713 struct mtip_cmd *command;
714 u32 completed;
715 struct host_to_dev_fis *fis;
716 unsigned long tagaccum[SLOTBITS_IN_LONGS];
717
718 dev_warn(&dd->pdev->dev, "Taskfile error\n");
719
720 port = dd->port;
721
722 /* Stop the timer to prevent command timeouts. */
723 del_timer(&port->cmd_timer);
724
725 /* Set eh_active */
726 set_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags);
727
728 /* Loop through all the groups */
729 for (group = 0; group < dd->slot_groups; group++) {
730 completed = readl(port->completed[group]);
731
732 /* clear completed status register in the hardware.*/
733 writel(completed, port->completed[group]);
734
735 /* clear the tag accumulator */
736 memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long));
737
738 /* Process successfully completed commands */
739 for (bit = 0; bit < 32 && completed; bit++) {
740 if (!(completed & (1<<bit)))
741 continue;
742 tag = (group << 5) + bit;
743
744 /* Skip the internal command slot */
745 if (tag == MTIP_TAG_INTERNAL)
746 continue;
747
748 command = &port->commands[tag];
749 if (likely(command->comp_func)) {
750 set_bit(tag, tagaccum);
751 atomic_set(&port->commands[tag].active, 0);
752 command->comp_func(port,
753 tag,
754 command->comp_data,
755 0);
756 } else {
757 dev_err(&port->dd->pdev->dev,
758 "Missing completion func for tag %d",
759 tag);
760 if (mtip_check_surprise_removal(dd->pdev)) {
761 mtip_command_cleanup(dd);
762 /* don't proceed further */
763 return;
764 }
765 }
766 }
767 }
768 print_tags(dd, "TFE tags completed:", tagaccum);
769
770 /* Restart the port */
771 mdelay(20);
772 mtip_restart_port(port);
773
774 /* clear the tag accumulator */
775 memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long));
776
777 /* Loop through all the groups */
778 for (group = 0; group < dd->slot_groups; group++) {
779 for (bit = 0; bit < 32; bit++) {
780 reissue = 1;
781 tag = (group << 5) + bit;
782
783 /* If the active bit is set re-issue the command */
784 if (atomic_read(&port->commands[tag].active) == 0)
785 continue;
786
787 fis = (struct host_to_dev_fis *)
788 port->commands[tag].command;
789
790 /* Should re-issue? */
791 if (tag == MTIP_TAG_INTERNAL ||
792 fis->command == ATA_CMD_SET_FEATURES)
793 reissue = 0;
794
795 /*
796 * First check if this command has
797 * exceeded its retries.
798 */
799 if (reissue &&
800 (port->commands[tag].retries-- > 0)) {
801
802 set_bit(tag, tagaccum);
803
804 /* Update the timeout value. */
805 port->commands[tag].comp_time =
806 jiffies + msecs_to_jiffies(
807 MTIP_NCQ_COMMAND_TIMEOUT_MS);
808 /* Re-issue the command. */
809 mtip_issue_ncq_command(port, tag);
810
811 continue;
812 }
813
814 /* Retire a command that will not be reissued */
815 dev_warn(&port->dd->pdev->dev,
816 "retiring tag %d\n", tag);
817 atomic_set(&port->commands[tag].active, 0);
818
819 if (port->commands[tag].comp_func)
820 port->commands[tag].comp_func(
821 port,
822 tag,
823 port->commands[tag].comp_data,
824 PORT_IRQ_TF_ERR);
825 else
826 dev_warn(&port->dd->pdev->dev,
827 "Bad completion for tag %d\n",
828 tag);
829 }
830 }
831 print_tags(dd, "TFE tags reissued:", tagaccum);
832
833 /* clear eh_active */
834 clear_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags);
835 wake_up_interruptible(&port->svc_wait);
836
837 mod_timer(&port->cmd_timer,
838 jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD));
839}
840
841/*
842 * Handle a set device bits interrupt
843 */
844static inline void mtip_process_sdbf(struct driver_data *dd)
845{
846 struct mtip_port *port = dd->port;
847 int group, tag, bit;
848 u32 completed;
849 struct mtip_cmd *command;
850
851 /* walk all bits in all slot groups */
852 for (group = 0; group < dd->slot_groups; group++) {
853 completed = readl(port->completed[group]);
854
855 /* clear completed status register in the hardware.*/
856 writel(completed, port->completed[group]);
857
858 /* Process completed commands. */
859 for (bit = 0;
860 (bit < 32) && completed;
861 bit++, completed >>= 1) {
862 if (completed & 0x01) {
863 tag = (group << 5) | bit;
864
865 /* skip internal command slot. */
866 if (unlikely(tag == MTIP_TAG_INTERNAL))
867 continue;
868
869 command = &port->commands[tag];
870 /* make internal callback */
871 if (likely(command->comp_func)) {
872 command->comp_func(
873 port,
874 tag,
875 command->comp_data,
876 0);
877 } else {
878 dev_warn(&dd->pdev->dev,
879 "Null completion "
880 "for tag %d",
881 tag);
882
883 if (mtip_check_surprise_removal(
884 dd->pdev)) {
885 mtip_command_cleanup(dd);
886 return;
887 }
888 }
889 }
890 }
891 }
892}
893
894/*
895 * Process legacy pio and d2h interrupts
896 */
897static inline void mtip_process_legacy(struct driver_data *dd, u32 port_stat)
898{
899 struct mtip_port *port = dd->port;
900 struct mtip_cmd *cmd = &port->commands[MTIP_TAG_INTERNAL];
901
902 if (test_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags) &&
903 (cmd != NULL) && !(readl(port->cmd_issue[MTIP_TAG_INTERNAL])
904 & (1 << MTIP_TAG_INTERNAL))) {
905 if (cmd->comp_func) {
906 cmd->comp_func(port,
907 MTIP_TAG_INTERNAL,
908 cmd->comp_data,
909 0);
910 return;
911 }
912 }
913
914 dev_warn(&dd->pdev->dev, "IRQ status 0x%x ignored.\n", port_stat);
915
916 return;
917}
918
919/*
920 * Demux and handle errors
921 */
922static inline void mtip_process_errors(struct driver_data *dd, u32 port_stat)
923{
924 if (likely(port_stat & (PORT_IRQ_TF_ERR | PORT_IRQ_IF_ERR)))
925 mtip_handle_tfe(dd);
926
927 if (unlikely(port_stat & PORT_IRQ_CONNECT)) {
928 dev_warn(&dd->pdev->dev,
929 "Clearing PxSERR.DIAG.x\n");
930 writel((1 << 26), dd->port->mmio + PORT_SCR_ERR);
931 }
932
933 if (unlikely(port_stat & PORT_IRQ_PHYRDY)) {
934 dev_warn(&dd->pdev->dev,
935 "Clearing PxSERR.DIAG.n\n");
936 writel((1 << 16), dd->port->mmio + PORT_SCR_ERR);
937 }
938
939 if (unlikely(port_stat & ~PORT_IRQ_HANDLED)) {
940 dev_warn(&dd->pdev->dev,
941 "Port stat errors %x unhandled\n",
942 (port_stat & ~PORT_IRQ_HANDLED));
943 }
944}
945
946static inline irqreturn_t mtip_handle_irq(struct driver_data *data)
947{
948 struct driver_data *dd = (struct driver_data *) data;
949 struct mtip_port *port = dd->port;
950 u32 hba_stat, port_stat;
951 int rv = IRQ_NONE;
952
953 hba_stat = readl(dd->mmio + HOST_IRQ_STAT);
954 if (hba_stat) {
955 rv = IRQ_HANDLED;
956
957 /* Acknowledge the interrupt status on the port.*/
958 port_stat = readl(port->mmio + PORT_IRQ_STAT);
959 writel(port_stat, port->mmio + PORT_IRQ_STAT);
960
961 /* Demux port status */
962 if (likely(port_stat & PORT_IRQ_SDB_FIS))
963 mtip_process_sdbf(dd);
964
965 if (unlikely(port_stat & PORT_IRQ_ERR)) {
966 if (unlikely(mtip_check_surprise_removal(dd->pdev))) {
967 mtip_command_cleanup(dd);
968 /* don't proceed further */
969 return IRQ_HANDLED;
970 }
971
972 mtip_process_errors(dd, port_stat & PORT_IRQ_ERR);
973 }
974
975 if (unlikely(port_stat & PORT_IRQ_LEGACY))
976 mtip_process_legacy(dd, port_stat & PORT_IRQ_LEGACY);
977 }
978
979 /* acknowledge interrupt */
980 writel(hba_stat, dd->mmio + HOST_IRQ_STAT);
981
982 return rv;
983}
984
985/*
986 * Wrapper for mtip_handle_irq
987 * (ignores return code)
988 */
989static void mtip_tasklet(unsigned long data)
990{
991 mtip_handle_irq((struct driver_data *) data);
992}
993
994/*
995 * HBA interrupt subroutine.
996 *
997 * @irq IRQ number.
998 * @instance Pointer to the driver data structure.
999 *
1000 * return value
1001 * IRQ_HANDLED A HBA interrupt was pending and handled.
1002 * IRQ_NONE This interrupt was not for the HBA.
1003 */
1004static irqreturn_t mtip_irq_handler(int irq, void *instance)
1005{
1006 struct driver_data *dd = instance;
1007 tasklet_schedule(&dd->tasklet);
1008 return IRQ_HANDLED;
1009}
1010
1011static void mtip_issue_non_ncq_command(struct mtip_port *port, int tag)
1012{
1013 atomic_set(&port->commands[tag].active, 1);
1014 writel(1 << MTIP_TAG_BIT(tag),
1015 port->cmd_issue[MTIP_TAG_INDEX(tag)]);
1016}
1017
1018/*
1019 * Wait for port to quiesce
1020 *
1021 * @port Pointer to port data structure
1022 * @timeout Max duration to wait (ms)
1023 *
1024 * return value
1025 * 0 Success
1026 * -EBUSY Commands still active
1027 */
1028static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
1029{
1030 unsigned long to;
1031 unsigned int n;
1032 unsigned int active = 1;
1033
1034 to = jiffies + msecs_to_jiffies(timeout);
1035 do {
1036 if (test_bit(MTIP_FLAG_SVC_THD_ACTIVE_BIT, &port->flags) &&
1037 test_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags)) {
1038 msleep(20);
1039 continue; /* svc thd is actively issuing commands */
1040 }
1041 /*
1042 * Ignore s_active bit 0 of array element 0.
1043 * This bit will always be set
1044 */
1045 active = readl(port->s_active[0]) & 0xFFFFFFFE;
1046 for (n = 1; n < port->dd->slot_groups; n++)
1047 active |= readl(port->s_active[n]);
1048
1049 if (!active)
1050 break;
1051
1052 msleep(20);
1053 } while (time_before(jiffies, to));
1054
1055 return active ? -EBUSY : 0;
1056}
1057
1058/*
1059 * Execute an internal command and wait for the completion.
1060 *
1061 * @port Pointer to the port data structure.
1062 * @fis Pointer to the FIS that describes the command.
1063 * @fis_len Length in WORDS of the FIS.
1064 * @buffer DMA accessible for command data.
1065 * @buf_len Length, in bytes, of the data buffer.
1066 * @opts Command header options, excluding the FIS length
1067 * and the number of PRD entries.
1068 * @timeout Time in ms to wait for the command to complete.
1069 *
1070 * return value
1071 * 0 Command completed successfully.
1072 * -EFAULT The buffer address is not correctly aligned.
1073 * -EBUSY Internal command or other IO in progress.
1074 * -EAGAIN Time out waiting for command to complete.
1075 */
1076static int mtip_exec_internal_command(struct mtip_port *port,
1077 void *fis,
1078 int fis_len,
1079 dma_addr_t buffer,
1080 int buf_len,
1081 u32 opts,
1082 gfp_t atomic,
1083 unsigned long timeout)
1084{
1085 struct mtip_cmd_sg *command_sg;
1086 DECLARE_COMPLETION_ONSTACK(wait);
1087 int rv = 0;
1088 struct mtip_cmd *int_cmd = &port->commands[MTIP_TAG_INTERNAL];
1089
1090 /* Make sure the buffer is 8 byte aligned. This is asic specific. */
1091 if (buffer & 0x00000007) {
1092 dev_err(&port->dd->pdev->dev,
1093 "SG buffer is not 8 byte aligned\n");
1094 return -EFAULT;
1095 }
1096
1097 /* Only one internal command should be running at a time */
1098 if (test_and_set_bit(MTIP_TAG_INTERNAL, port->allocated)) {
1099 dev_warn(&port->dd->pdev->dev,
1100 "Internal command already active\n");
1101 return -EBUSY;
1102 }
1103 set_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags);
1104
1105 if (atomic == GFP_KERNEL) {
1106 /* wait for io to complete if non atomic */
1107 if (mtip_quiesce_io(port, 5000) < 0) {
1108 dev_warn(&port->dd->pdev->dev,
1109 "Failed to quiesce IO\n");
1110 release_slot(port, MTIP_TAG_INTERNAL);
1111 clear_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags);
1112 wake_up_interruptible(&port->svc_wait);
1113 return -EBUSY;
1114 }
1115
1116 /* Set the completion function and data for the command. */
1117 int_cmd->comp_data = &wait;
1118 int_cmd->comp_func = mtip_completion;
1119
1120 } else {
1121 /* Clear completion - we're going to poll */
1122 int_cmd->comp_data = NULL;
1123 int_cmd->comp_func = NULL;
1124 }
1125
1126 /* Copy the command to the command table */
1127 memcpy(int_cmd->command, fis, fis_len*4);
1128
1129 /* Populate the SG list */
1130 int_cmd->command_header->opts =
1131 __force_bit2int cpu_to_le32(opts | fis_len);
1132 if (buf_len) {
1133 command_sg = int_cmd->command + AHCI_CMD_TBL_HDR_SZ;
1134
1135 command_sg->info =
1136 __force_bit2int cpu_to_le32((buf_len-1) & 0x3FFFFF);
1137 command_sg->dba =
1138 __force_bit2int cpu_to_le32(buffer & 0xFFFFFFFF);
1139 command_sg->dba_upper =
1140 __force_bit2int cpu_to_le32((buffer >> 16) >> 16);
1141
1142 int_cmd->command_header->opts |=
1143 __force_bit2int cpu_to_le32((1 << 16));
1144 }
1145
1146 /* Populate the command header */
1147 int_cmd->command_header->byte_count = 0;
1148
1149 /* Issue the command to the hardware */
1150 mtip_issue_non_ncq_command(port, MTIP_TAG_INTERNAL);
1151
1152 /* Poll if atomic, wait_for_completion otherwise */
1153 if (atomic == GFP_KERNEL) {
1154 /* Wait for the command to complete or timeout. */
1155 if (wait_for_completion_timeout(
1156 &wait,
1157 msecs_to_jiffies(timeout)) == 0) {
1158 dev_err(&port->dd->pdev->dev,
1159 "Internal command did not complete [%d] "
1160 "within timeout of %lu ms\n",
1161 atomic, timeout);
1162 rv = -EAGAIN;
1163 }
1164
1165 if (readl(port->cmd_issue[MTIP_TAG_INTERNAL])
1166 & (1 << MTIP_TAG_INTERNAL)) {
1167 dev_warn(&port->dd->pdev->dev,
1168 "Retiring internal command but CI is 1.\n");
1169 }
1170
1171 } else {
1172 /* Spin for <timeout> checking if command still outstanding */
1173 timeout = jiffies + msecs_to_jiffies(timeout);
1174
1175 while ((readl(
1176 port->cmd_issue[MTIP_TAG_INTERNAL])
1177 & (1 << MTIP_TAG_INTERNAL))
1178 && time_before(jiffies, timeout))
1179 ;
1180
1181 if (readl(port->cmd_issue[MTIP_TAG_INTERNAL])
1182 & (1 << MTIP_TAG_INTERNAL)) {
1183 dev_err(&port->dd->pdev->dev,
1184 "Internal command did not complete [%d]\n",
1185 atomic);
1186 rv = -EAGAIN;
1187 }
1188 }
1189
1190 /* Clear the allocated and active bits for the internal command. */
1191 atomic_set(&int_cmd->active, 0);
1192 release_slot(port, MTIP_TAG_INTERNAL);
1193 clear_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags);
1194 wake_up_interruptible(&port->svc_wait);
1195
1196 return rv;
1197}
1198
1199/*
1200 * Byte-swap ATA ID strings.
1201 *
1202 * ATA identify data contains strings in byte-swapped 16-bit words.
1203 * They must be swapped (on all architectures) to be usable as C strings.
1204 * This function swaps bytes in-place.
1205 *
1206 * @buf The buffer location of the string
1207 * @len The number of bytes to swap
1208 *
1209 * return value
1210 * None
1211 */
1212static inline void ata_swap_string(u16 *buf, unsigned int len)
1213{
1214 int i;
1215 for (i = 0; i < (len/2); i++)
1216 be16_to_cpus(&buf[i]);
1217}
1218
1219/*
1220 * Request the device identity information.
1221 *
1222 * If a user space buffer is not specified, i.e. is NULL, the
1223 * identify information is still read from the drive and placed
1224 * into the identify data buffer (@e port->identify) in the
1225 * port data structure.
1226 * When the identify buffer contains valid identify information @e
1227 * port->identify_valid is non-zero.
1228 *
1229 * @port Pointer to the port structure.
1230 * @user_buffer A user space buffer where the identify data should be
1231 * copied.
1232 *
1233 * return value
1234 * 0 Command completed successfully.
1235 * -EFAULT An error occurred while coping data to the user buffer.
1236 * -1 Command failed.
1237 */
1238static int mtip_get_identify(struct mtip_port *port, void __user *user_buffer)
1239{
1240 int rv = 0;
1241 struct host_to_dev_fis fis;
1242
1243 /* Build the FIS. */
1244 memset(&fis, 0, sizeof(struct host_to_dev_fis));
1245 fis.type = 0x27;
1246 fis.opts = 1 << 7;
1247 fis.command = ATA_CMD_ID_ATA;
1248
1249 /* Set the identify information as invalid. */
1250 port->identify_valid = 0;
1251
1252 /* Clear the identify information. */
1253 memset(port->identify, 0, sizeof(u16) * ATA_ID_WORDS);
1254
1255 /* Execute the command. */
1256 if (mtip_exec_internal_command(port,
1257 &fis,
1258 5,
1259 port->identify_dma,
1260 sizeof(u16) * ATA_ID_WORDS,
1261 0,
1262 GFP_KERNEL,
1263 MTIP_INTERNAL_COMMAND_TIMEOUT_MS)
1264 < 0) {
1265 rv = -1;
1266 goto out;
1267 }
1268
1269 /*
1270 * Perform any necessary byte-swapping. Yes, the kernel does in fact
1271 * perform field-sensitive swapping on the string fields.
1272 * See the kernel use of ata_id_string() for proof of this.
1273 */
1274#ifdef __LITTLE_ENDIAN
1275 ata_swap_string(port->identify + 27, 40); /* model string*/
1276 ata_swap_string(port->identify + 23, 8); /* firmware string*/
1277 ata_swap_string(port->identify + 10, 20); /* serial# string*/
1278#else
1279 {
1280 int i;
1281 for (i = 0; i < ATA_ID_WORDS; i++)
1282 port->identify[i] = le16_to_cpu(port->identify[i]);
1283 }
1284#endif
1285
1286 /* Set the identify buffer as valid. */
1287 port->identify_valid = 1;
1288
1289 if (user_buffer) {
1290 if (copy_to_user(
1291 user_buffer,
1292 port->identify,
1293 ATA_ID_WORDS * sizeof(u16))) {
1294 rv = -EFAULT;
1295 goto out;
1296 }
1297 }
1298
1299out:
1300 return rv;
1301}
1302
1303/*
1304 * Issue a standby immediate command to the device.
1305 *
1306 * @port Pointer to the port structure.
1307 *
1308 * return value
1309 * 0 Command was executed successfully.
1310 * -1 An error occurred while executing the command.
1311 */
1312static int mtip_standby_immediate(struct mtip_port *port)
1313{
1314 int rv;
1315 struct host_to_dev_fis fis;
1316
1317 /* Build the FIS. */
1318 memset(&fis, 0, sizeof(struct host_to_dev_fis));
1319 fis.type = 0x27;
1320 fis.opts = 1 << 7;
1321 fis.command = ATA_CMD_STANDBYNOW1;
1322
1323 /* Execute the command. Use a 15-second timeout for large drives. */
1324 rv = mtip_exec_internal_command(port,
1325 &fis,
1326 5,
1327 0,
1328 0,
1329 0,
1330 GFP_KERNEL,
1331 15000);
1332
1333 return rv;
1334}
1335
1336/*
1337 * Get the drive capacity.
1338 *
1339 * @dd Pointer to the device data structure.
1340 * @sectors Pointer to the variable that will receive the sector count.
1341 *
1342 * return value
1343 * 1 Capacity was returned successfully.
1344 * 0 The identify information is invalid.
1345 */
1346static bool mtip_hw_get_capacity(struct driver_data *dd, sector_t *sectors)
1347{
1348 struct mtip_port *port = dd->port;
1349 u64 total, raw0, raw1, raw2, raw3;
1350 raw0 = port->identify[100];
1351 raw1 = port->identify[101];
1352 raw2 = port->identify[102];
1353 raw3 = port->identify[103];
1354 total = raw0 | raw1<<16 | raw2<<32 | raw3<<48;
1355 *sectors = total;
1356 return (bool) !!port->identify_valid;
1357}
1358
1359/*
1360 * Reset the HBA.
1361 *
1362 * Resets the HBA by setting the HBA Reset bit in the Global
1363 * HBA Control register. After setting the HBA Reset bit the
1364 * function waits for 1 second before reading the HBA Reset
1365 * bit to make sure it has cleared. If HBA Reset is not clear
1366 * an error is returned. Cannot be used in non-blockable
1367 * context.
1368 *
1369 * @dd Pointer to the driver data structure.
1370 *
1371 * return value
1372 * 0 The reset was successful.
1373 * -1 The HBA Reset bit did not clear.
1374 */
1375static int mtip_hba_reset(struct driver_data *dd)
1376{
1377 mtip_deinit_port(dd->port);
1378
1379 /* Set the reset bit */
1380 writel(HOST_RESET, dd->mmio + HOST_CTL);
1381
1382 /* Flush */
1383 readl(dd->mmio + HOST_CTL);
1384
1385 /* Wait for reset to clear */
1386 ssleep(1);
1387
1388 /* Check the bit has cleared */
1389 if (readl(dd->mmio + HOST_CTL) & HOST_RESET) {
1390 dev_err(&dd->pdev->dev,
1391 "Reset bit did not clear.\n");
1392 return -1;
1393 }
1394
1395 return 0;
1396}
1397
1398/*
1399 * Display the identify command data.
1400 *
1401 * @port Pointer to the port data structure.
1402 *
1403 * return value
1404 * None
1405 */
1406static void mtip_dump_identify(struct mtip_port *port)
1407{
1408 sector_t sectors;
1409 unsigned short revid;
1410 char cbuf[42];
1411
1412 if (!port->identify_valid)
1413 return;
1414
1415 strlcpy(cbuf, (char *)(port->identify+10), 21);
1416 dev_info(&port->dd->pdev->dev,
1417 "Serial No.: %s\n", cbuf);
1418
1419 strlcpy(cbuf, (char *)(port->identify+23), 9);
1420 dev_info(&port->dd->pdev->dev,
1421 "Firmware Ver.: %s\n", cbuf);
1422
1423 strlcpy(cbuf, (char *)(port->identify+27), 41);
1424 dev_info(&port->dd->pdev->dev, "Model: %s\n", cbuf);
1425
1426 if (mtip_hw_get_capacity(port->dd, &sectors))
1427 dev_info(&port->dd->pdev->dev,
1428 "Capacity: %llu sectors (%llu MB)\n",
1429 (u64)sectors,
1430 ((u64)sectors) * ATA_SECT_SIZE >> 20);
1431
1432 pci_read_config_word(port->dd->pdev, PCI_REVISION_ID, &revid);
1433 switch (revid & 0xFF) {
1434 case 0x1:
1435 strlcpy(cbuf, "A0", 3);
1436 break;
1437 case 0x3:
1438 strlcpy(cbuf, "A2", 3);
1439 break;
1440 default:
1441 strlcpy(cbuf, "?", 2);
1442 break;
1443 }
1444 dev_info(&port->dd->pdev->dev,
1445 "Card Type: %s\n", cbuf);
1446}
1447
1448/*
1449 * Map the commands scatter list into the command table.
1450 *
1451 * @command Pointer to the command.
1452 * @nents Number of scatter list entries.
1453 *
1454 * return value
1455 * None
1456 */
1457static inline void fill_command_sg(struct driver_data *dd,
1458 struct mtip_cmd *command,
1459 int nents)
1460{
1461 int n;
1462 unsigned int dma_len;
1463 struct mtip_cmd_sg *command_sg;
1464 struct scatterlist *sg = command->sg;
1465
1466 command_sg = command->command + AHCI_CMD_TBL_HDR_SZ;
1467
1468 for (n = 0; n < nents; n++) {
1469 dma_len = sg_dma_len(sg);
1470 if (dma_len > 0x400000)
1471 dev_err(&dd->pdev->dev,
1472 "DMA segment length truncated\n");
1473 command_sg->info = __force_bit2int
1474 cpu_to_le32((dma_len-1) & 0x3FFFFF);
1475 command_sg->dba = __force_bit2int
1476 cpu_to_le32(sg_dma_address(sg));
1477 command_sg->dba_upper = __force_bit2int
1478 cpu_to_le32((sg_dma_address(sg) >> 16) >> 16);
1479 command_sg++;
1480 sg++;
1481 }
1482}
1483
1484/*
1485 * @brief Execute a drive command.
1486 *
1487 * return value 0 The command completed successfully.
1488 * return value -1 An error occurred while executing the command.
1489 */
1490static int exec_drive_task(struct mtip_port *port, u8 *command)
1491{
1492 struct host_to_dev_fis fis;
1493 struct host_to_dev_fis *reply = (port->rxfis + RX_FIS_D2H_REG);
1494
1495 /* Build the FIS. */
1496 memset(&fis, 0, sizeof(struct host_to_dev_fis));
1497 fis.type = 0x27;
1498 fis.opts = 1 << 7;
1499 fis.command = command[0];
1500 fis.features = command[1];
1501 fis.sect_count = command[2];
1502 fis.sector = command[3];
1503 fis.cyl_low = command[4];
1504 fis.cyl_hi = command[5];
1505 fis.device = command[6] & ~0x10; /* Clear the dev bit*/
1506
1507
1508 dbg_printk(MTIP_DRV_NAME "%s: User Command: cmd %x, feat %x, "
1509 "nsect %x, sect %x, lcyl %x, "
1510 "hcyl %x, sel %x\n",
1511 __func__,
1512 command[0],
1513 command[1],
1514 command[2],
1515 command[3],
1516 command[4],
1517 command[5],
1518 command[6]);
1519
1520 /* Execute the command. */
1521 if (mtip_exec_internal_command(port,
1522 &fis,
1523 5,
1524 0,
1525 0,
1526 0,
1527 GFP_KERNEL,
1528 MTIP_IOCTL_COMMAND_TIMEOUT_MS) < 0) {
1529 return -1;
1530 }
1531
1532 command[0] = reply->command; /* Status*/
1533 command[1] = reply->features; /* Error*/
1534 command[4] = reply->cyl_low;
1535 command[5] = reply->cyl_hi;
1536
1537 dbg_printk(MTIP_DRV_NAME "%s: Completion Status: stat %x, "
1538 "err %x , cyl_lo %x cyl_hi %x\n",
1539 __func__,
1540 command[0],
1541 command[1],
1542 command[4],
1543 command[5]);
1544
1545 return 0;
1546}
1547
1548/*
1549 * @brief Execute a drive command.
1550 *
1551 * @param port Pointer to the port data structure.
1552 * @param command Pointer to the user specified command parameters.
1553 * @param user_buffer Pointer to the user space buffer where read sector
1554 * data should be copied.
1555 *
1556 * return value 0 The command completed successfully.
1557 * return value -EFAULT An error occurred while copying the completion
1558 * data to the user space buffer.
1559 * return value -1 An error occurred while executing the command.
1560 */
1561static int exec_drive_command(struct mtip_port *port, u8 *command,
1562 void __user *user_buffer)
1563{
1564 struct host_to_dev_fis fis;
1565 struct host_to_dev_fis *reply = (port->rxfis + RX_FIS_D2H_REG);
1566
1567 /* Build the FIS. */
1568 memset(&fis, 0, sizeof(struct host_to_dev_fis));
1569 fis.type = 0x27;
1570 fis.opts = 1 << 7;
1571 fis.command = command[0];
1572 fis.features = command[2];
1573 fis.sect_count = command[3];
1574 if (fis.command == ATA_CMD_SMART) {
1575 fis.sector = command[1];
1576 fis.cyl_low = 0x4F;
1577 fis.cyl_hi = 0xC2;
1578 }
1579
1580 dbg_printk(MTIP_DRV_NAME
1581 "%s: User Command: cmd %x, sect %x, "
1582 "feat %x, sectcnt %x\n",
1583 __func__,
1584 command[0],
1585 command[1],
1586 command[2],
1587 command[3]);
1588
1589 memset(port->sector_buffer, 0x00, ATA_SECT_SIZE);
1590
1591 /* Execute the command. */
1592 if (mtip_exec_internal_command(port,
1593 &fis,
1594 5,
1595 port->sector_buffer_dma,
1596 (command[3] != 0) ? ATA_SECT_SIZE : 0,
1597 0,
1598 GFP_KERNEL,
1599 MTIP_IOCTL_COMMAND_TIMEOUT_MS)
1600 < 0) {
1601 return -1;
1602 }
1603
1604 /* Collect the completion status. */
1605 command[0] = reply->command; /* Status*/
1606 command[1] = reply->features; /* Error*/
1607 command[2] = command[3];
1608
1609 dbg_printk(MTIP_DRV_NAME
1610 "%s: Completion Status: stat %x, "
1611 "err %x, cmd %x\n",
1612 __func__,
1613 command[0],
1614 command[1],
1615 command[2]);
1616
1617 if (user_buffer && command[3]) {
1618 if (copy_to_user(user_buffer,
1619 port->sector_buffer,
1620 ATA_SECT_SIZE * command[3])) {
1621 return -EFAULT;
1622 }
1623 }
1624
1625 return 0;
1626}
1627
1628/*
1629 * Indicates whether a command has a single sector payload.
1630 *
1631 * @command passed to the device to perform the certain event.
1632 * @features passed to the device to perform the certain event.
1633 *
1634 * return value
1635 * 1 command is one that always has a single sector payload,
1636 * regardless of the value in the Sector Count field.
1637 * 0 otherwise
1638 *
1639 */
1640static unsigned int implicit_sector(unsigned char command,
1641 unsigned char features)
1642{
1643 unsigned int rv = 0;
1644
1645 /* list of commands that have an implicit sector count of 1 */
1646 switch (command) {
1647 case ATA_CMD_SEC_SET_PASS:
1648 case ATA_CMD_SEC_UNLOCK:
1649 case ATA_CMD_SEC_ERASE_PREP:
1650 case ATA_CMD_SEC_ERASE_UNIT:
1651 case ATA_CMD_SEC_FREEZE_LOCK:
1652 case ATA_CMD_SEC_DISABLE_PASS:
1653 case ATA_CMD_PMP_READ:
1654 case ATA_CMD_PMP_WRITE:
1655 rv = 1;
1656 break;
1657 case ATA_CMD_SET_MAX:
1658 if (features == ATA_SET_MAX_UNLOCK)
1659 rv = 1;
1660 break;
1661 case ATA_CMD_SMART:
1662 if ((features == ATA_SMART_READ_VALUES) ||
1663 (features == ATA_SMART_READ_THRESHOLDS))
1664 rv = 1;
1665 break;
1666 case ATA_CMD_CONF_OVERLAY:
1667 if ((features == ATA_DCO_IDENTIFY) ||
1668 (features == ATA_DCO_SET))
1669 rv = 1;
1670 break;
1671 }
1672 return rv;
1673}
1674
1675/*
1676 * Executes a taskfile
1677 * See ide_taskfile_ioctl() for derivation
1678 */
1679static int exec_drive_taskfile(struct driver_data *dd,
1680 void __user *buf,
1681 ide_task_request_t *req_task,
1682 int outtotal)
1683{
1684 struct host_to_dev_fis fis;
1685 struct host_to_dev_fis *reply;
1686 u8 *outbuf = NULL;
1687 u8 *inbuf = NULL;
1688 dma_addr_t outbuf_dma = 0;
1689 dma_addr_t inbuf_dma = 0;
1690 dma_addr_t dma_buffer = 0;
1691 int err = 0;
1692 unsigned int taskin = 0;
1693 unsigned int taskout = 0;
1694 u8 nsect = 0;
1695 unsigned int timeout = MTIP_IOCTL_COMMAND_TIMEOUT_MS;
1696 unsigned int force_single_sector;
1697 unsigned int transfer_size;
1698 unsigned long task_file_data;
1699 int intotal = outtotal + req_task->out_size;
1700
1701 taskout = req_task->out_size;
1702 taskin = req_task->in_size;
1703 /* 130560 = 512 * 0xFF*/
1704 if (taskin > 130560 || taskout > 130560) {
1705 err = -EINVAL;
1706 goto abort;
1707 }
1708
1709 if (taskout) {
1710 outbuf = kzalloc(taskout, GFP_KERNEL);
1711 if (outbuf == NULL) {
1712 err = -ENOMEM;
1713 goto abort;
1714 }
1715 if (copy_from_user(outbuf, buf + outtotal, taskout)) {
1716 err = -EFAULT;
1717 goto abort;
1718 }
1719 outbuf_dma = pci_map_single(dd->pdev,
1720 outbuf,
1721 taskout,
1722 DMA_TO_DEVICE);
1723 if (outbuf_dma == 0) {
1724 err = -ENOMEM;
1725 goto abort;
1726 }
1727 dma_buffer = outbuf_dma;
1728 }
1729
1730 if (taskin) {
1731 inbuf = kzalloc(taskin, GFP_KERNEL);
1732 if (inbuf == NULL) {
1733 err = -ENOMEM;
1734 goto abort;
1735 }
1736
1737 if (copy_from_user(inbuf, buf + intotal, taskin)) {
1738 err = -EFAULT;
1739 goto abort;
1740 }
1741 inbuf_dma = pci_map_single(dd->pdev,
1742 inbuf,
1743 taskin, DMA_FROM_DEVICE);
1744 if (inbuf_dma == 0) {
1745 err = -ENOMEM;
1746 goto abort;
1747 }
1748 dma_buffer = inbuf_dma;
1749 }
1750
1751 /* only supports PIO and non-data commands from this ioctl. */
1752 switch (req_task->data_phase) {
1753 case TASKFILE_OUT:
1754 nsect = taskout / ATA_SECT_SIZE;
1755 reply = (dd->port->rxfis + RX_FIS_PIO_SETUP);
1756 break;
1757 case TASKFILE_IN:
1758 reply = (dd->port->rxfis + RX_FIS_PIO_SETUP);
1759 break;
1760 case TASKFILE_NO_DATA:
1761 reply = (dd->port->rxfis + RX_FIS_D2H_REG);
1762 break;
1763 default:
1764 err = -EINVAL;
1765 goto abort;
1766 }
1767
1768 /* Build the FIS. */
1769 memset(&fis, 0, sizeof(struct host_to_dev_fis));
1770
1771 fis.type = 0x27;
1772 fis.opts = 1 << 7;
1773 fis.command = req_task->io_ports[7];
1774 fis.features = req_task->io_ports[1];
1775 fis.sect_count = req_task->io_ports[2];
1776 fis.lba_low = req_task->io_ports[3];
1777 fis.lba_mid = req_task->io_ports[4];
1778 fis.lba_hi = req_task->io_ports[5];
1779 /* Clear the dev bit*/
1780 fis.device = req_task->io_ports[6] & ~0x10;
1781
1782 if ((req_task->in_flags.all == 0) && (req_task->out_flags.all & 1)) {
1783 req_task->in_flags.all =
1784 IDE_TASKFILE_STD_IN_FLAGS |
1785 (IDE_HOB_STD_IN_FLAGS << 8);
1786 fis.lba_low_ex = req_task->hob_ports[3];
1787 fis.lba_mid_ex = req_task->hob_ports[4];
1788 fis.lba_hi_ex = req_task->hob_ports[5];
1789 fis.features_ex = req_task->hob_ports[1];
1790 fis.sect_cnt_ex = req_task->hob_ports[2];
1791
1792 } else {
1793 req_task->in_flags.all = IDE_TASKFILE_STD_IN_FLAGS;
1794 }
1795
1796 force_single_sector = implicit_sector(fis.command, fis.features);
1797
1798 if ((taskin || taskout) && (!fis.sect_count)) {
1799 if (nsect)
1800 fis.sect_count = nsect;
1801 else {
1802 if (!force_single_sector) {
1803 dev_warn(&dd->pdev->dev,
1804 "data movement but "
1805 "sect_count is 0\n");
1806 err = -EINVAL;
1807 goto abort;
1808 }
1809 }
1810 }
1811
1812 dbg_printk(MTIP_DRV_NAME
1813 "taskfile: cmd %x, feat %x, nsect %x,"
1814 " sect/lbal %x, lcyl/lbam %x, hcyl/lbah %x,"
1815 " head/dev %x\n",
1816 fis.command,
1817 fis.features,
1818 fis.sect_count,
1819 fis.lba_low,
1820 fis.lba_mid,
1821 fis.lba_hi,
1822 fis.device);
1823
1824 switch (fis.command) {
1825 case ATA_CMD_DOWNLOAD_MICRO:
1826 /* Change timeout for Download Microcode to 60 seconds.*/
1827 timeout = 60000;
1828 break;
1829 case ATA_CMD_SEC_ERASE_UNIT:
1830 /* Change timeout for Security Erase Unit to 4 minutes.*/
1831 timeout = 240000;
1832 break;
1833 case ATA_CMD_STANDBYNOW1:
1834 /* Change timeout for standby immediate to 10 seconds.*/
1835 timeout = 10000;
1836 break;
1837 case 0xF7:
1838 case 0xFA:
1839 /* Change timeout for vendor unique command to 10 secs */
1840 timeout = 10000;
1841 break;
1842 case ATA_CMD_SMART:
1843 /* Change timeout for vendor unique command to 10 secs */
1844 timeout = 10000;
1845 break;
1846 default:
1847 timeout = MTIP_IOCTL_COMMAND_TIMEOUT_MS;
1848 break;
1849 }
1850
1851 /* Determine the correct transfer size.*/
1852 if (force_single_sector)
1853 transfer_size = ATA_SECT_SIZE;
1854 else
1855 transfer_size = ATA_SECT_SIZE * fis.sect_count;
1856
1857 /* Execute the command.*/
1858 if (mtip_exec_internal_command(dd->port,
1859 &fis,
1860 5,
1861 dma_buffer,
1862 transfer_size,
1863 0,
1864 GFP_KERNEL,
1865 timeout) < 0) {
1866 err = -EIO;
1867 goto abort;
1868 }
1869
1870 task_file_data = readl(dd->port->mmio+PORT_TFDATA);
1871
1872 if ((req_task->data_phase == TASKFILE_IN) && !(task_file_data & 1)) {
1873 reply = dd->port->rxfis + RX_FIS_PIO_SETUP;
1874 req_task->io_ports[7] = reply->control;
1875 } else {
1876 reply = dd->port->rxfis + RX_FIS_D2H_REG;
1877 req_task->io_ports[7] = reply->command;
1878 }
1879
1880 /* reclaim the DMA buffers.*/
1881 if (inbuf_dma)
1882 pci_unmap_single(dd->pdev, inbuf_dma,
1883 taskin, DMA_FROM_DEVICE);
1884 if (outbuf_dma)
1885 pci_unmap_single(dd->pdev, outbuf_dma,
1886 taskout, DMA_TO_DEVICE);
1887 inbuf_dma = 0;
1888 outbuf_dma = 0;
1889
1890 /* return the ATA registers to the caller.*/
1891 req_task->io_ports[1] = reply->features;
1892 req_task->io_ports[2] = reply->sect_count;
1893 req_task->io_ports[3] = reply->lba_low;
1894 req_task->io_ports[4] = reply->lba_mid;
1895 req_task->io_ports[5] = reply->lba_hi;
1896 req_task->io_ports[6] = reply->device;
1897
1898 if (req_task->out_flags.all & 1) {
1899
1900 req_task->hob_ports[3] = reply->lba_low_ex;
1901 req_task->hob_ports[4] = reply->lba_mid_ex;
1902 req_task->hob_ports[5] = reply->lba_hi_ex;
1903 req_task->hob_ports[1] = reply->features_ex;
1904 req_task->hob_ports[2] = reply->sect_cnt_ex;
1905 }
1906
1907 /* Com rest after secure erase or lowlevel format */
1908 if (((fis.command == ATA_CMD_SEC_ERASE_UNIT) ||
1909 ((fis.command == 0xFC) &&
1910 (fis.features == 0x27 || fis.features == 0x72 ||
1911 fis.features == 0x62 || fis.features == 0x26))) &&
1912 !(reply->command & 1)) {
1913 mtip_restart_port(dd->port);
1914 }
1915
1916 dbg_printk(MTIP_DRV_NAME
1917 "%s: Completion: stat %x,"
1918 "err %x, sect_cnt %x, lbalo %x,"
1919 "lbamid %x, lbahi %x, dev %x\n",
1920 __func__,
1921 req_task->io_ports[7],
1922 req_task->io_ports[1],
1923 req_task->io_ports[2],
1924 req_task->io_ports[3],
1925 req_task->io_ports[4],
1926 req_task->io_ports[5],
1927 req_task->io_ports[6]);
1928
1929 if (taskout) {
1930 if (copy_to_user(buf + outtotal, outbuf, taskout)) {
1931 err = -EFAULT;
1932 goto abort;
1933 }
1934 }
1935 if (taskin) {
1936 if (copy_to_user(buf + intotal, inbuf, taskin)) {
1937 err = -EFAULT;
1938 goto abort;
1939 }
1940 }
1941abort:
1942 if (inbuf_dma)
1943 pci_unmap_single(dd->pdev, inbuf_dma,
1944 taskin, DMA_FROM_DEVICE);
1945 if (outbuf_dma)
1946 pci_unmap_single(dd->pdev, outbuf_dma,
1947 taskout, DMA_TO_DEVICE);
1948 kfree(outbuf);
1949 kfree(inbuf);
1950
1951 return err;
1952}
1953
1954/*
1955 * Handle IOCTL calls from the Block Layer.
1956 *
1957 * This function is called by the Block Layer when it receives an IOCTL
1958 * command that it does not understand. If the IOCTL command is not supported
1959 * this function returns -ENOTTY.
1960 *
1961 * @dd Pointer to the driver data structure.
1962 * @cmd IOCTL command passed from the Block Layer.
1963 * @arg IOCTL argument passed from the Block Layer.
1964 *
1965 * return value
1966 * 0 The IOCTL completed successfully.
1967 * -ENOTTY The specified command is not supported.
1968 * -EFAULT An error occurred copying data to a user space buffer.
1969 * -EIO An error occurred while executing the command.
1970 */
1971static int mtip_hw_ioctl(struct driver_data *dd, unsigned int cmd,
1972 unsigned long arg)
1973{
1974 switch (cmd) {
1975 case HDIO_GET_IDENTITY:
1976 if (mtip_get_identify(dd->port, (void __user *) arg) < 0) {
1977 dev_warn(&dd->pdev->dev,
1978 "Unable to read identity\n");
1979 return -EIO;
1980 }
1981
1982 break;
1983 case HDIO_DRIVE_CMD:
1984 {
1985 u8 drive_command[4];
1986
1987 /* Copy the user command info to our buffer. */
1988 if (copy_from_user(drive_command,
1989 (void __user *) arg,
1990 sizeof(drive_command)))
1991 return -EFAULT;
1992
1993 /* Execute the drive command. */
1994 if (exec_drive_command(dd->port,
1995 drive_command,
1996 (void __user *) (arg+4)))
1997 return -EIO;
1998
1999 /* Copy the status back to the users buffer. */
2000 if (copy_to_user((void __user *) arg,
2001 drive_command,
2002 sizeof(drive_command)))
2003 return -EFAULT;
2004
2005 break;
2006 }
2007 case HDIO_DRIVE_TASK:
2008 {
2009 u8 drive_command[7];
2010
2011 /* Copy the user command info to our buffer. */
2012 if (copy_from_user(drive_command,
2013 (void __user *) arg,
2014 sizeof(drive_command)))
2015 return -EFAULT;
2016
2017 /* Execute the drive command. */
2018 if (exec_drive_task(dd->port, drive_command))
2019 return -EIO;
2020
2021 /* Copy the status back to the users buffer. */
2022 if (copy_to_user((void __user *) arg,
2023 drive_command,
2024 sizeof(drive_command)))
2025 return -EFAULT;
2026
2027 break;
2028 }
2029 case HDIO_DRIVE_TASKFILE: {
2030 ide_task_request_t req_task;
2031 int ret, outtotal;
2032
2033 if (copy_from_user(&req_task, (void __user *) arg,
2034 sizeof(req_task)))
2035 return -EFAULT;
2036
2037 outtotal = sizeof(req_task);
2038
2039 ret = exec_drive_taskfile(dd, (void __user *) arg,
2040 &req_task, outtotal);
2041
2042 if (copy_to_user((void __user *) arg, &req_task,
2043 sizeof(req_task)))
2044 return -EFAULT;
2045
2046 return ret;
2047 }
2048
2049 default:
2050 return -EINVAL;
2051 }
2052 return 0;
2053}
2054
2055/*
2056 * Submit an IO to the hw
2057 *
2058 * This function is called by the block layer to issue an io
2059 * to the device. Upon completion, the callback function will
2060 * be called with the data parameter passed as the callback data.
2061 *
2062 * @dd Pointer to the driver data structure.
2063 * @start First sector to read.
2064 * @nsect Number of sectors to read.
2065 * @nents Number of entries in scatter list for the read command.
2066 * @tag The tag of this read command.
2067 * @callback Pointer to the function that should be called
2068 * when the read completes.
2069 * @data Callback data passed to the callback function
2070 * when the read completes.
2071 * @dir Direction (read or write)
2072 *
2073 * return value
2074 * None
2075 */
2076static void mtip_hw_submit_io(struct driver_data *dd, sector_t start,
2077 int nsect, int nents, int tag, void *callback,
2078 void *data, int dir)
2079{
2080 struct host_to_dev_fis *fis;
2081 struct mtip_port *port = dd->port;
2082 struct mtip_cmd *command = &port->commands[tag];
2083
2084 /* Map the scatter list for DMA access */
2085 if (dir == READ)
2086 nents = dma_map_sg(&dd->pdev->dev, command->sg,
2087 nents, DMA_FROM_DEVICE);
2088 else
2089 nents = dma_map_sg(&dd->pdev->dev, command->sg,
2090 nents, DMA_TO_DEVICE);
2091
2092 command->scatter_ents = nents;
2093
2094 /*
2095 * The number of retries for this command before it is
2096 * reported as a failure to the upper layers.
2097 */
2098 command->retries = MTIP_MAX_RETRIES;
2099
2100 /* Fill out fis */
2101 fis = command->command;
2102 fis->type = 0x27;
2103 fis->opts = 1 << 7;
2104 fis->command =
2105 (dir == READ ? ATA_CMD_FPDMA_READ : ATA_CMD_FPDMA_WRITE);
2106 *((unsigned int *) &fis->lba_low) = (start & 0xFFFFFF);
2107 *((unsigned int *) &fis->lba_low_ex) = ((start >> 24) & 0xFFFFFF);
2108 fis->device = 1 << 6;
2109 fis->features = nsect & 0xFF;
2110 fis->features_ex = (nsect >> 8) & 0xFF;
2111 fis->sect_count = ((tag << 3) | (tag >> 5));
2112 fis->sect_cnt_ex = 0;
2113 fis->control = 0;
2114 fis->res2 = 0;
2115 fis->res3 = 0;
2116 fill_command_sg(dd, command, nents);
2117
2118 /* Populate the command header */
2119 command->command_header->opts =
2120 __force_bit2int cpu_to_le32(
2121 (nents << 16) | 5 | AHCI_CMD_PREFETCH);
2122 command->command_header->byte_count = 0;
2123
2124 /*
2125 * Set the completion function and data for the command
2126 * within this layer.
2127 */
2128 command->comp_data = dd;
2129 command->comp_func = mtip_async_complete;
2130 command->direction = (dir == READ ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
2131
2132 /*
2133 * Set the completion function and data for the command passed
2134 * from the upper layer.
2135 */
2136 command->async_data = data;
2137 command->async_callback = callback;
2138
2139 /*
2140 * To prevent this command from being issued
2141 * if an internal command is in progress or error handling is active.
2142 */
2143 if (unlikely(test_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags) ||
2144 test_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags))) {
2145 set_bit(tag, port->cmds_to_issue);
2146 set_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags);
2147 return;
2148 }
2149
2150 /* Issue the command to the hardware */
2151 mtip_issue_ncq_command(port, tag);
2152
2153 /* Set the command's timeout value.*/
2154 port->commands[tag].comp_time = jiffies + msecs_to_jiffies(
2155 MTIP_NCQ_COMMAND_TIMEOUT_MS);
2156}
2157
2158/*
2159 * Release a command slot.
2160 *
2161 * @dd Pointer to the driver data structure.
2162 * @tag Slot tag
2163 *
2164 * return value
2165 * None
2166 */
2167static void mtip_hw_release_scatterlist(struct driver_data *dd, int tag)
2168{
2169 release_slot(dd->port, tag);
2170}
2171
2172/*
2173 * Obtain a command slot and return its associated scatter list.
2174 *
2175 * @dd Pointer to the driver data structure.
2176 * @tag Pointer to an int that will receive the allocated command
2177 * slot tag.
2178 *
2179 * return value
2180 * Pointer to the scatter list for the allocated command slot
2181 * or NULL if no command slots are available.
2182 */
2183static struct scatterlist *mtip_hw_get_scatterlist(struct driver_data *dd,
2184 int *tag)
2185{
2186 /*
2187 * It is possible that, even with this semaphore, a thread
2188 * may think that no command slots are available. Therefore, we
2189 * need to make an attempt to get_slot().
2190 */
2191 down(&dd->port->cmd_slot);
2192 *tag = get_slot(dd->port);
2193
2194 if (unlikely(*tag < 0))
2195 return NULL;
2196
2197 return dd->port->commands[*tag].sg;
2198}
2199
2200/*
2201 * Sysfs register/status dump.
2202 *
2203 * @dev Pointer to the device structure, passed by the kernrel.
2204 * @attr Pointer to the device_attribute structure passed by the kernel.
2205 * @buf Pointer to the char buffer that will receive the stats info.
2206 *
2207 * return value
2208 * The size, in bytes, of the data copied into buf.
2209 */
2210static ssize_t hw_show_registers(struct device *dev,
2211 struct device_attribute *attr,
2212 char *buf)
2213{
2214 u32 group_allocated;
2215 struct driver_data *dd = dev_to_disk(dev)->private_data;
2216 int size = 0;
2217 int n;
2218
2219 size += sprintf(&buf[size], "%s:\ns_active:\n", __func__);
2220
2221 for (n = 0; n < dd->slot_groups; n++)
2222 size += sprintf(&buf[size], "0x%08x\n",
2223 readl(dd->port->s_active[n]));
2224
2225 size += sprintf(&buf[size], "Command Issue:\n");
2226
2227 for (n = 0; n < dd->slot_groups; n++)
2228 size += sprintf(&buf[size], "0x%08x\n",
2229 readl(dd->port->cmd_issue[n]));
2230
2231 size += sprintf(&buf[size], "Allocated:\n");
2232
2233 for (n = 0; n < dd->slot_groups; n++) {
2234 if (sizeof(long) > sizeof(u32))
2235 group_allocated =
2236 dd->port->allocated[n/2] >> (32*(n&1));
2237 else
2238 group_allocated = dd->port->allocated[n];
2239 size += sprintf(&buf[size], "0x%08x\n",
2240 group_allocated);
2241 }
2242
2243 size += sprintf(&buf[size], "completed:\n");
2244
2245 for (n = 0; n < dd->slot_groups; n++)
2246 size += sprintf(&buf[size], "0x%08x\n",
2247 readl(dd->port->completed[n]));
2248
2249 size += sprintf(&buf[size], "PORT_IRQ_STAT 0x%08x\n",
2250 readl(dd->port->mmio + PORT_IRQ_STAT));
2251 size += sprintf(&buf[size], "HOST_IRQ_STAT 0x%08x\n",
2252 readl(dd->mmio + HOST_IRQ_STAT));
2253
2254 return size;
2255}
2256static DEVICE_ATTR(registers, S_IRUGO, hw_show_registers, NULL);
2257
2258/*
2259 * Create the sysfs related attributes.
2260 *
2261 * @dd Pointer to the driver data structure.
2262 * @kobj Pointer to the kobj for the block device.
2263 *
2264 * return value
2265 * 0 Operation completed successfully.
2266 * -EINVAL Invalid parameter.
2267 */
2268static int mtip_hw_sysfs_init(struct driver_data *dd, struct kobject *kobj)
2269{
2270 if (!kobj || !dd)
2271 return -EINVAL;
2272
2273 if (sysfs_create_file(kobj, &dev_attr_registers.attr))
2274 dev_warn(&dd->pdev->dev,
2275 "Error creating registers sysfs entry\n");
2276 return 0;
2277}
2278
2279/*
2280 * Remove the sysfs related attributes.
2281 *
2282 * @dd Pointer to the driver data structure.
2283 * @kobj Pointer to the kobj for the block device.
2284 *
2285 * return value
2286 * 0 Operation completed successfully.
2287 * -EINVAL Invalid parameter.
2288 */
2289static int mtip_hw_sysfs_exit(struct driver_data *dd, struct kobject *kobj)
2290{
2291 if (!kobj || !dd)
2292 return -EINVAL;
2293
2294 sysfs_remove_file(kobj, &dev_attr_registers.attr);
2295
2296 return 0;
2297}
2298
2299/*
2300 * Perform any init/resume time hardware setup
2301 *
2302 * @dd Pointer to the driver data structure.
2303 *
2304 * return value
2305 * None
2306 */
2307static inline void hba_setup(struct driver_data *dd)
2308{
2309 u32 hwdata;
2310 hwdata = readl(dd->mmio + HOST_HSORG);
2311
2312 /* interrupt bug workaround: use only 1 IS bit.*/
2313 writel(hwdata |
2314 HSORG_DISABLE_SLOTGRP_INTR |
2315 HSORG_DISABLE_SLOTGRP_PXIS,
2316 dd->mmio + HOST_HSORG);
2317}
2318
2319/*
2320 * Detect the details of the product, and store anything needed
2321 * into the driver data structure. This includes product type and
2322 * version and number of slot groups.
2323 *
2324 * @dd Pointer to the driver data structure.
2325 *
2326 * return value
2327 * None
2328 */
2329static void mtip_detect_product(struct driver_data *dd)
2330{
2331 u32 hwdata;
2332 unsigned int rev, slotgroups;
2333
2334 /*
2335 * HBA base + 0xFC [15:0] - vendor-specific hardware interface
2336 * info register:
2337 * [15:8] hardware/software interface rev#
2338 * [ 3] asic-style interface
2339 * [ 2:0] number of slot groups, minus 1 (only valid for asic-style).
2340 */
2341 hwdata = readl(dd->mmio + HOST_HSORG);
2342
2343 dd->product_type = MTIP_PRODUCT_UNKNOWN;
2344 dd->slot_groups = 1;
2345
2346 if (hwdata & 0x8) {
2347 dd->product_type = MTIP_PRODUCT_ASICFPGA;
2348 rev = (hwdata & HSORG_HWREV) >> 8;
2349 slotgroups = (hwdata & HSORG_SLOTGROUPS) + 1;
2350 dev_info(&dd->pdev->dev,
2351 "ASIC-FPGA design, HS rev 0x%x, "
2352 "%i slot groups [%i slots]\n",
2353 rev,
2354 slotgroups,
2355 slotgroups * 32);
2356
2357 if (slotgroups > MTIP_MAX_SLOT_GROUPS) {
2358 dev_warn(&dd->pdev->dev,
2359 "Warning: driver only supports "
2360 "%i slot groups.\n", MTIP_MAX_SLOT_GROUPS);
2361 slotgroups = MTIP_MAX_SLOT_GROUPS;
2362 }
2363 dd->slot_groups = slotgroups;
2364 return;
2365 }
2366
2367 dev_warn(&dd->pdev->dev, "Unrecognized product id\n");
2368}
2369
2370/*
2371 * Blocking wait for FTL rebuild to complete
2372 *
2373 * @dd Pointer to the DRIVER_DATA structure.
2374 *
2375 * return value
2376 * 0 FTL rebuild completed successfully
2377 * -EFAULT FTL rebuild error/timeout/interruption
2378 */
2379static int mtip_ftl_rebuild_poll(struct driver_data *dd)
2380{
2381 unsigned long timeout, cnt = 0, start;
2382
2383 dev_warn(&dd->pdev->dev,
2384 "FTL rebuild in progress. Polling for completion.\n");
2385
2386 start = jiffies;
2387 dd->ftlrebuildflag = 1;
2388 timeout = jiffies + msecs_to_jiffies(MTIP_FTL_REBUILD_TIMEOUT_MS);
2389
2390 do {
2391 if (mtip_check_surprise_removal(dd->pdev))
2392 return -EFAULT;
2393
2394 if (mtip_get_identify(dd->port, NULL) < 0)
2395 return -EFAULT;
2396
2397 if (*(dd->port->identify + MTIP_FTL_REBUILD_OFFSET) ==
2398 MTIP_FTL_REBUILD_MAGIC) {
2399 ssleep(1);
2400 /* Print message every 3 minutes */
2401 if (cnt++ >= 180) {
2402 dev_warn(&dd->pdev->dev,
2403 "FTL rebuild in progress (%d secs).\n",
2404 jiffies_to_msecs(jiffies - start) / 1000);
2405 cnt = 0;
2406 }
2407 } else {
2408 dev_warn(&dd->pdev->dev,
2409 "FTL rebuild complete (%d secs).\n",
2410 jiffies_to_msecs(jiffies - start) / 1000);
2411 dd->ftlrebuildflag = 0;
2412 mtip_block_initialize(dd);
2413 break;
2414 }
2415 ssleep(10);
2416 } while (time_before(jiffies, timeout));
2417
2418 /* Check for timeout */
2419 if (dd->ftlrebuildflag) {
2420 dev_err(&dd->pdev->dev,
2421 "Timed out waiting for FTL rebuild to complete (%d secs).\n",
2422 jiffies_to_msecs(jiffies - start) / 1000);
2423 return -EFAULT;
2424 }
2425
2426 return 0;
2427}
2428
2429/*
2430 * service thread to issue queued commands
2431 *
2432 * @data Pointer to the driver data structure.
2433 *
2434 * return value
2435 * 0
2436 */
2437
2438static int mtip_service_thread(void *data)
2439{
2440 struct driver_data *dd = (struct driver_data *)data;
2441 unsigned long slot, slot_start, slot_wrap;
2442 unsigned int num_cmd_slots = dd->slot_groups * 32;
2443 struct mtip_port *port = dd->port;
2444
2445 while (1) {
2446 /*
2447 * the condition is to check neither an internal command is
2448 * is in progress nor error handling is active
2449 */
2450 wait_event_interruptible(port->svc_wait, (port->flags) &&
2451 !test_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags) &&
2452 !test_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags));
2453
2454 if (kthread_should_stop())
2455 break;
2456
2457 set_bit(MTIP_FLAG_SVC_THD_ACTIVE_BIT, &port->flags);
2458 if (test_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags)) {
2459 slot = 1;
2460 /* used to restrict the loop to one iteration */
2461 slot_start = num_cmd_slots;
2462 slot_wrap = 0;
2463 while (1) {
2464 slot = find_next_bit(port->cmds_to_issue,
2465 num_cmd_slots, slot);
2466 if (slot_wrap == 1) {
2467 if ((slot_start >= slot) ||
2468 (slot >= num_cmd_slots))
2469 break;
2470 }
2471 if (unlikely(slot_start == num_cmd_slots))
2472 slot_start = slot;
2473
2474 if (unlikely(slot == num_cmd_slots)) {
2475 slot = 1;
2476 slot_wrap = 1;
2477 continue;
2478 }
2479
2480 /* Issue the command to the hardware */
2481 mtip_issue_ncq_command(port, slot);
2482
2483 /* Set the command's timeout value.*/
2484 port->commands[slot].comp_time = jiffies +
2485 msecs_to_jiffies(MTIP_NCQ_COMMAND_TIMEOUT_MS);
2486
2487 clear_bit(slot, port->cmds_to_issue);
2488 }
2489
2490 clear_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags);
2491 } else if (test_bit(MTIP_FLAG_REBUILD_BIT, &port->flags)) {
2492 mtip_ftl_rebuild_poll(dd);
2493 clear_bit(MTIP_FLAG_REBUILD_BIT, &port->flags);
2494 }
2495 clear_bit(MTIP_FLAG_SVC_THD_ACTIVE_BIT, &port->flags);
2496
2497 if (test_bit(MTIP_FLAG_SVC_THD_SHOULD_STOP_BIT, &port->flags))
2498 break;
2499 }
2500 return 0;
2501}
2502
2503/*
2504 * Called once for each card.
2505 *
2506 * @dd Pointer to the driver data structure.
2507 *
2508 * return value
2509 * 0 on success, else an error code.
2510 */
2511static int mtip_hw_init(struct driver_data *dd)
2512{
2513 int i;
2514 int rv;
2515 unsigned int num_command_slots;
2516
2517 dd->mmio = pcim_iomap_table(dd->pdev)[MTIP_ABAR];
2518
2519 mtip_detect_product(dd);
2520 if (dd->product_type == MTIP_PRODUCT_UNKNOWN) {
2521 rv = -EIO;
2522 goto out1;
2523 }
2524 num_command_slots = dd->slot_groups * 32;
2525
2526 hba_setup(dd);
2527
2528 tasklet_init(&dd->tasklet, mtip_tasklet, (unsigned long)dd);
2529
2530 dd->port = kzalloc(sizeof(struct mtip_port), GFP_KERNEL);
2531 if (!dd->port) {
2532 dev_err(&dd->pdev->dev,
2533 "Memory allocation: port structure\n");
2534 return -ENOMEM;
2535 }
2536
2537 /* Counting semaphore to track command slot usage */
2538 sema_init(&dd->port->cmd_slot, num_command_slots - 1);
2539
2540 /* Spinlock to prevent concurrent issue */
2541 spin_lock_init(&dd->port->cmd_issue_lock);
2542
2543 /* Set the port mmio base address. */
2544 dd->port->mmio = dd->mmio + PORT_OFFSET;
2545 dd->port->dd = dd;
2546
2547 /* Allocate memory for the command list. */
2548 dd->port->command_list =
2549 dmam_alloc_coherent(&dd->pdev->dev,
2550 HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 2),
2551 &dd->port->command_list_dma,
2552 GFP_KERNEL);
2553 if (!dd->port->command_list) {
2554 dev_err(&dd->pdev->dev,
2555 "Memory allocation: command list\n");
2556 rv = -ENOMEM;
2557 goto out1;
2558 }
2559
2560 /* Clear the memory we have allocated. */
2561 memset(dd->port->command_list,
2562 0,
2563 HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 2));
2564
2565 /* Setup the addresse of the RX FIS. */
2566 dd->port->rxfis = dd->port->command_list + HW_CMD_SLOT_SZ;
2567 dd->port->rxfis_dma = dd->port->command_list_dma + HW_CMD_SLOT_SZ;
2568
2569 /* Setup the address of the command tables. */
2570 dd->port->command_table = dd->port->rxfis + AHCI_RX_FIS_SZ;
2571 dd->port->command_tbl_dma = dd->port->rxfis_dma + AHCI_RX_FIS_SZ;
2572
2573 /* Setup the address of the identify data. */
2574 dd->port->identify = dd->port->command_table +
2575 HW_CMD_TBL_AR_SZ;
2576 dd->port->identify_dma = dd->port->command_tbl_dma +
2577 HW_CMD_TBL_AR_SZ;
2578
2579 /* Setup the address of the sector buffer. */
2580 dd->port->sector_buffer = (void *) dd->port->identify + ATA_SECT_SIZE;
2581 dd->port->sector_buffer_dma = dd->port->identify_dma + ATA_SECT_SIZE;
2582
2583 /* Point the command headers at the command tables. */
2584 for (i = 0; i < num_command_slots; i++) {
2585 dd->port->commands[i].command_header =
2586 dd->port->command_list +
2587 (sizeof(struct mtip_cmd_hdr) * i);
2588 dd->port->commands[i].command_header_dma =
2589 dd->port->command_list_dma +
2590 (sizeof(struct mtip_cmd_hdr) * i);
2591
2592 dd->port->commands[i].command =
2593 dd->port->command_table + (HW_CMD_TBL_SZ * i);
2594 dd->port->commands[i].command_dma =
2595 dd->port->command_tbl_dma + (HW_CMD_TBL_SZ * i);
2596
2597 if (readl(dd->mmio + HOST_CAP) & HOST_CAP_64)
2598 dd->port->commands[i].command_header->ctbau =
2599 __force_bit2int cpu_to_le32(
2600 (dd->port->commands[i].command_dma >> 16) >> 16);
2601 dd->port->commands[i].command_header->ctba =
2602 __force_bit2int cpu_to_le32(
2603 dd->port->commands[i].command_dma & 0xFFFFFFFF);
2604
2605 /*
2606 * If this is not done, a bug is reported by the stock
2607 * FC11 i386. Due to the fact that it has lots of kernel
2608 * debugging enabled.
2609 */
2610 sg_init_table(dd->port->commands[i].sg, MTIP_MAX_SG);
2611
2612 /* Mark all commands as currently inactive.*/
2613 atomic_set(&dd->port->commands[i].active, 0);
2614 }
2615
2616 /* Setup the pointers to the extended s_active and CI registers. */
2617 for (i = 0; i < dd->slot_groups; i++) {
2618 dd->port->s_active[i] =
2619 dd->port->mmio + i*0x80 + PORT_SCR_ACT;
2620 dd->port->cmd_issue[i] =
2621 dd->port->mmio + i*0x80 + PORT_COMMAND_ISSUE;
2622 dd->port->completed[i] =
2623 dd->port->mmio + i*0x80 + PORT_SDBV;
2624 }
2625
2626 /* Reset the HBA. */
2627 if (mtip_hba_reset(dd) < 0) {
2628 dev_err(&dd->pdev->dev,
2629 "Card did not reset within timeout\n");
2630 rv = -EIO;
2631 goto out2;
2632 }
2633
2634 mtip_init_port(dd->port);
2635 mtip_start_port(dd->port);
2636
2637 /* Setup the ISR and enable interrupts. */
2638 rv = devm_request_irq(&dd->pdev->dev,
2639 dd->pdev->irq,
2640 mtip_irq_handler,
2641 IRQF_SHARED,
2642 dev_driver_string(&dd->pdev->dev),
2643 dd);
2644
2645 if (rv) {
2646 dev_err(&dd->pdev->dev,
2647 "Unable to allocate IRQ %d\n", dd->pdev->irq);
2648 goto out2;
2649 }
2650
2651 /* Enable interrupts on the HBA. */
2652 writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN,
2653 dd->mmio + HOST_CTL);
2654
2655 init_timer(&dd->port->cmd_timer);
2656 init_waitqueue_head(&dd->port->svc_wait);
2657
2658 dd->port->cmd_timer.data = (unsigned long int) dd->port;
2659 dd->port->cmd_timer.function = mtip_timeout_function;
2660 mod_timer(&dd->port->cmd_timer,
2661 jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD));
2662
2663 if (mtip_get_identify(dd->port, NULL) < 0) {
2664 rv = -EFAULT;
2665 goto out3;
2666 }
2667
2668 if (*(dd->port->identify + MTIP_FTL_REBUILD_OFFSET) ==
2669 MTIP_FTL_REBUILD_MAGIC) {
2670 set_bit(MTIP_FLAG_REBUILD_BIT, &dd->port->flags);
2671 return MTIP_FTL_REBUILD_MAGIC;
2672 }
2673 mtip_dump_identify(dd->port);
2674 return rv;
2675
2676out3:
2677 del_timer_sync(&dd->port->cmd_timer);
2678
2679 /* Disable interrupts on the HBA. */
2680 writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN,
2681 dd->mmio + HOST_CTL);
2682
2683 /*Release the IRQ. */
2684 devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd);
2685
2686out2:
2687 mtip_deinit_port(dd->port);
2688
2689 /* Free the command/command header memory. */
2690 dmam_free_coherent(&dd->pdev->dev,
2691 HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 2),
2692 dd->port->command_list,
2693 dd->port->command_list_dma);
2694out1:
2695 /* Free the memory allocated for the for structure. */
2696 kfree(dd->port);
2697
2698 return rv;
2699}
2700
2701/*
2702 * Called to deinitialize an interface.
2703 *
2704 * @dd Pointer to the driver data structure.
2705 *
2706 * return value
2707 * 0
2708 */
2709static int mtip_hw_exit(struct driver_data *dd)
2710{
2711 /*
2712 * Send standby immediate (E0h) to the drive so that it
2713 * saves its state.
2714 */
2715 if (atomic_read(&dd->drv_cleanup_done) != true) {
2716
2717 mtip_standby_immediate(dd->port);
2718
2719 /* de-initialize the port. */
2720 mtip_deinit_port(dd->port);
2721
2722 /* Disable interrupts on the HBA. */
2723 writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN,
2724 dd->mmio + HOST_CTL);
2725 }
2726
2727 del_timer_sync(&dd->port->cmd_timer);
2728
2729 /* Release the IRQ. */
2730 devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd);
2731
2732 /* Stop the bottom half tasklet. */
2733 tasklet_kill(&dd->tasklet);
2734
2735 /* Free the command/command header memory. */
2736 dmam_free_coherent(&dd->pdev->dev,
2737 HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 2),
2738 dd->port->command_list,
2739 dd->port->command_list_dma);
2740 /* Free the memory allocated for the for structure. */
2741 kfree(dd->port);
2742
2743 return 0;
2744}
2745
2746/*
2747 * Issue a Standby Immediate command to the device.
2748 *
2749 * This function is called by the Block Layer just before the
2750 * system powers off during a shutdown.
2751 *
2752 * @dd Pointer to the driver data structure.
2753 *
2754 * return value
2755 * 0
2756 */
2757static int mtip_hw_shutdown(struct driver_data *dd)
2758{
2759 /*
2760 * Send standby immediate (E0h) to the drive so that it
2761 * saves its state.
2762 */
2763 mtip_standby_immediate(dd->port);
2764
2765 return 0;
2766}
2767
2768/*
2769 * Suspend function
2770 *
2771 * This function is called by the Block Layer just before the
2772 * system hibernates.
2773 *
2774 * @dd Pointer to the driver data structure.
2775 *
2776 * return value
2777 * 0 Suspend was successful
2778 * -EFAULT Suspend was not successful
2779 */
2780static int mtip_hw_suspend(struct driver_data *dd)
2781{
2782 /*
2783 * Send standby immediate (E0h) to the drive
2784 * so that it saves its state.
2785 */
2786 if (mtip_standby_immediate(dd->port) != 0) {
2787 dev_err(&dd->pdev->dev,
2788 "Failed standby-immediate command\n");
2789 return -EFAULT;
2790 }
2791
2792 /* Disable interrupts on the HBA.*/
2793 writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN,
2794 dd->mmio + HOST_CTL);
2795 mtip_deinit_port(dd->port);
2796
2797 return 0;
2798}
2799
2800/*
2801 * Resume function
2802 *
2803 * This function is called by the Block Layer as the
2804 * system resumes.
2805 *
2806 * @dd Pointer to the driver data structure.
2807 *
2808 * return value
2809 * 0 Resume was successful
2810 * -EFAULT Resume was not successful
2811 */
2812static int mtip_hw_resume(struct driver_data *dd)
2813{
2814 /* Perform any needed hardware setup steps */
2815 hba_setup(dd);
2816
2817 /* Reset the HBA */
2818 if (mtip_hba_reset(dd) != 0) {
2819 dev_err(&dd->pdev->dev,
2820 "Unable to reset the HBA\n");
2821 return -EFAULT;
2822 }
2823
2824 /*
2825 * Enable the port, DMA engine, and FIS reception specific
2826 * h/w in controller.
2827 */
2828 mtip_init_port(dd->port);
2829 mtip_start_port(dd->port);
2830
2831 /* Enable interrupts on the HBA.*/
2832 writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN,
2833 dd->mmio + HOST_CTL);
2834
2835 return 0;
2836}
2837
2838/*
2839 * Helper function for reusing disk name
2840 * upon hot insertion.
2841 */
2842static int rssd_disk_name_format(char *prefix,
2843 int index,
2844 char *buf,
2845 int buflen)
2846{
2847 const int base = 'z' - 'a' + 1;
2848 char *begin = buf + strlen(prefix);
2849 char *end = buf + buflen;
2850 char *p;
2851 int unit;
2852
2853 p = end - 1;
2854 *p = '\0';
2855 unit = base;
2856 do {
2857 if (p == begin)
2858 return -EINVAL;
2859 *--p = 'a' + (index % unit);
2860 index = (index / unit) - 1;
2861 } while (index >= 0);
2862
2863 memmove(begin, p, end - p);
2864 memcpy(buf, prefix, strlen(prefix));
2865
2866 return 0;
2867}
2868
2869/*
2870 * Block layer IOCTL handler.
2871 *
2872 * @dev Pointer to the block_device structure.
2873 * @mode ignored
2874 * @cmd IOCTL command passed from the user application.
2875 * @arg Argument passed from the user application.
2876 *
2877 * return value
2878 * 0 IOCTL completed successfully.
2879 * -ENOTTY IOCTL not supported or invalid driver data
2880 * structure pointer.
2881 */
2882static int mtip_block_ioctl(struct block_device *dev,
2883 fmode_t mode,
2884 unsigned cmd,
2885 unsigned long arg)
2886{
2887 struct driver_data *dd = dev->bd_disk->private_data;
2888
2889 if (!capable(CAP_SYS_ADMIN))
2890 return -EACCES;
2891
2892 if (!dd)
2893 return -ENOTTY;
2894
2895 switch (cmd) {
2896 case BLKFLSBUF:
2897 return -ENOTTY;
2898 default:
2899 return mtip_hw_ioctl(dd, cmd, arg);
2900 }
2901}
2902
2903#ifdef CONFIG_COMPAT
2904/*
2905 * Block layer compat IOCTL handler.
2906 *
2907 * @dev Pointer to the block_device structure.
2908 * @mode ignored
2909 * @cmd IOCTL command passed from the user application.
2910 * @arg Argument passed from the user application.
2911 *
2912 * return value
2913 * 0 IOCTL completed successfully.
2914 * -ENOTTY IOCTL not supported or invalid driver data
2915 * structure pointer.
2916 */
2917static int mtip_block_compat_ioctl(struct block_device *dev,
2918 fmode_t mode,
2919 unsigned cmd,
2920 unsigned long arg)
2921{
2922 struct driver_data *dd = dev->bd_disk->private_data;
2923
2924 if (!capable(CAP_SYS_ADMIN))
2925 return -EACCES;
2926
2927 if (!dd)
2928 return -ENOTTY;
2929
2930 switch (cmd) {
2931 case BLKFLSBUF:
2932 return -ENOTTY;
2933 case HDIO_DRIVE_TASKFILE: {
2934 struct mtip_compat_ide_task_request_s __user *compat_req_task;
2935 ide_task_request_t req_task;
2936 int compat_tasksize, outtotal, ret;
2937
2938 compat_tasksize =
2939 sizeof(struct mtip_compat_ide_task_request_s);
2940
2941 compat_req_task =
2942 (struct mtip_compat_ide_task_request_s __user *) arg;
2943
2944 if (copy_from_user(&req_task, (void __user *) arg,
2945 compat_tasksize - (2 * sizeof(compat_long_t))))
2946 return -EFAULT;
2947
2948 if (get_user(req_task.out_size, &compat_req_task->out_size))
2949 return -EFAULT;
2950
2951 if (get_user(req_task.in_size, &compat_req_task->in_size))
2952 return -EFAULT;
2953
2954 outtotal = sizeof(struct mtip_compat_ide_task_request_s);
2955
2956 ret = exec_drive_taskfile(dd, (void __user *) arg,
2957 &req_task, outtotal);
2958
2959 if (copy_to_user((void __user *) arg, &req_task,
2960 compat_tasksize -
2961 (2 * sizeof(compat_long_t))))
2962 return -EFAULT;
2963
2964 if (put_user(req_task.out_size, &compat_req_task->out_size))
2965 return -EFAULT;
2966
2967 if (put_user(req_task.in_size, &compat_req_task->in_size))
2968 return -EFAULT;
2969
2970 return ret;
2971 }
2972 default:
2973 return mtip_hw_ioctl(dd, cmd, arg);
2974 }
2975}
2976#endif
2977
2978/*
2979 * Obtain the geometry of the device.
2980 *
2981 * You may think that this function is obsolete, but some applications,
2982 * fdisk for example still used CHS values. This function describes the
2983 * device as having 224 heads and 56 sectors per cylinder. These values are
2984 * chosen so that each cylinder is aligned on a 4KB boundary. Since a
2985 * partition is described in terms of a start and end cylinder this means
2986 * that each partition is also 4KB aligned. Non-aligned partitions adversely
2987 * affects performance.
2988 *
2989 * @dev Pointer to the block_device strucutre.
2990 * @geo Pointer to a hd_geometry structure.
2991 *
2992 * return value
2993 * 0 Operation completed successfully.
2994 * -ENOTTY An error occurred while reading the drive capacity.
2995 */
2996static int mtip_block_getgeo(struct block_device *dev,
2997 struct hd_geometry *geo)
2998{
2999 struct driver_data *dd = dev->bd_disk->private_data;
3000 sector_t capacity;
3001
3002 if (!dd)
3003 return -ENOTTY;
3004
3005 if (!(mtip_hw_get_capacity(dd, &capacity))) {
3006 dev_warn(&dd->pdev->dev,
3007 "Could not get drive capacity.\n");
3008 return -ENOTTY;
3009 }
3010
3011 geo->heads = 224;
3012 geo->sectors = 56;
3013 sector_div(capacity, (geo->heads * geo->sectors));
3014 geo->cylinders = capacity;
3015 return 0;
3016}
3017
3018/*
3019 * Block device operation function.
3020 *
3021 * This structure contains pointers to the functions required by the block
3022 * layer.
3023 */
3024static const struct block_device_operations mtip_block_ops = {
3025 .ioctl = mtip_block_ioctl,
3026#ifdef CONFIG_COMPAT
3027 .compat_ioctl = mtip_block_compat_ioctl,
3028#endif
3029 .getgeo = mtip_block_getgeo,
3030 .owner = THIS_MODULE
3031};
3032
3033/*
3034 * Block layer make request function.
3035 *
3036 * This function is called by the kernel to process a BIO for
3037 * the P320 device.
3038 *
3039 * @queue Pointer to the request queue. Unused other than to obtain
3040 * the driver data structure.
3041 * @bio Pointer to the BIO.
3042 *
3043 */
3044static void mtip_make_request(struct request_queue *queue, struct bio *bio)
3045{
3046 struct driver_data *dd = queue->queuedata;
3047 struct scatterlist *sg;
3048 struct bio_vec *bvec;
3049 int nents = 0;
3050 int tag = 0;
3051
3052 if (unlikely(!bio_has_data(bio))) {
3053 blk_queue_flush(queue, 0);
3054 bio_endio(bio, 0);
3055 return;
3056 }
3057
3058 sg = mtip_hw_get_scatterlist(dd, &tag);
3059 if (likely(sg != NULL)) {
3060 blk_queue_bounce(queue, &bio);
3061
3062 if (unlikely((bio)->bi_vcnt > MTIP_MAX_SG)) {
3063 dev_warn(&dd->pdev->dev,
3064 "Maximum number of SGL entries exceeded");
3065 bio_io_error(bio);
3066 mtip_hw_release_scatterlist(dd, tag);
3067 return;
3068 }
3069
3070 /* Create the scatter list for this bio. */
3071 bio_for_each_segment(bvec, bio, nents) {
3072 sg_set_page(&sg[nents],
3073 bvec->bv_page,
3074 bvec->bv_len,
3075 bvec->bv_offset);
3076 }
3077
3078 /* Issue the read/write. */
3079 mtip_hw_submit_io(dd,
3080 bio->bi_sector,
3081 bio_sectors(bio),
3082 nents,
3083 tag,
3084 bio_endio,
3085 bio,
3086 bio_data_dir(bio));
3087 } else
3088 bio_io_error(bio);
3089}
3090
3091/*
3092 * Block layer initialization function.
3093 *
3094 * This function is called once by the PCI layer for each P320
3095 * device that is connected to the system.
3096 *
3097 * @dd Pointer to the driver data structure.
3098 *
3099 * return value
3100 * 0 on success else an error code.
3101 */
3102static int mtip_block_initialize(struct driver_data *dd)
3103{
3104 int rv = 0, wait_for_rebuild = 0;
3105 sector_t capacity;
3106 unsigned int index = 0;
3107 struct kobject *kobj;
3108 unsigned char thd_name[16];
3109
3110 if (dd->disk)
3111 goto skip_create_disk; /* hw init done, before rebuild */
3112
3113 /* Initialize the protocol layer. */
3114 wait_for_rebuild = mtip_hw_init(dd);
3115 if (wait_for_rebuild < 0) {
3116 dev_err(&dd->pdev->dev,
3117 "Protocol layer initialization failed\n");
3118 rv = -EINVAL;
3119 goto protocol_init_error;
3120 }
3121
3122 dd->disk = alloc_disk(MTIP_MAX_MINORS);
3123 if (dd->disk == NULL) {
3124 dev_err(&dd->pdev->dev,
3125 "Unable to allocate gendisk structure\n");
3126 rv = -EINVAL;
3127 goto alloc_disk_error;
3128 }
3129
3130 /* Generate the disk name, implemented same as in sd.c */
3131 do {
3132 if (!ida_pre_get(&rssd_index_ida, GFP_KERNEL))
3133 goto ida_get_error;
3134
3135 spin_lock(&rssd_index_lock);
3136 rv = ida_get_new(&rssd_index_ida, &index);
3137 spin_unlock(&rssd_index_lock);
3138 } while (rv == -EAGAIN);
3139
3140 if (rv)
3141 goto ida_get_error;
3142
3143 rv = rssd_disk_name_format("rssd",
3144 index,
3145 dd->disk->disk_name,
3146 DISK_NAME_LEN);
3147 if (rv)
3148 goto disk_index_error;
3149
3150 dd->disk->driverfs_dev = &dd->pdev->dev;
3151 dd->disk->major = dd->major;
3152 dd->disk->first_minor = dd->instance * MTIP_MAX_MINORS;
3153 dd->disk->fops = &mtip_block_ops;
3154 dd->disk->private_data = dd;
3155 dd->index = index;
3156
3157 /*
3158 * if rebuild pending, start the service thread, and delay the block
3159 * queue creation and add_disk()
3160 */
3161 if (wait_for_rebuild == MTIP_FTL_REBUILD_MAGIC)
3162 goto start_service_thread;
3163
3164skip_create_disk:
3165 /* Allocate the request queue. */
3166 dd->queue = blk_alloc_queue(GFP_KERNEL);
3167 if (dd->queue == NULL) {
3168 dev_err(&dd->pdev->dev,
3169 "Unable to allocate request queue\n");
3170 rv = -ENOMEM;
3171 goto block_queue_alloc_init_error;
3172 }
3173
3174 /* Attach our request function to the request queue. */
3175 blk_queue_make_request(dd->queue, mtip_make_request);
3176
3177 dd->disk->queue = dd->queue;
3178 dd->queue->queuedata = dd;
3179
3180 /* Set device limits. */
3181 set_bit(QUEUE_FLAG_NONROT, &dd->queue->queue_flags);
3182 blk_queue_max_segments(dd->queue, MTIP_MAX_SG);
3183 blk_queue_physical_block_size(dd->queue, 4096);
3184 blk_queue_io_min(dd->queue, 4096);
3185 /*
3186 * write back cache is not supported in the device. FUA depends on
3187 * write back cache support, hence setting flush support to zero.
3188 */
3189 blk_queue_flush(dd->queue, 0);
3190
3191 /* Set the capacity of the device in 512 byte sectors. */
3192 if (!(mtip_hw_get_capacity(dd, &capacity))) {
3193 dev_warn(&dd->pdev->dev,
3194 "Could not read drive capacity\n");
3195 rv = -EIO;
3196 goto read_capacity_error;
3197 }
3198 set_capacity(dd->disk, capacity);
3199
3200 /* Enable the block device and add it to /dev */
3201 add_disk(dd->disk);
3202
3203 /*
3204 * Now that the disk is active, initialize any sysfs attributes
3205 * managed by the protocol layer.
3206 */
3207 kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
3208 if (kobj) {
3209 mtip_hw_sysfs_init(dd, kobj);
3210 kobject_put(kobj);
3211 }
3212
3213 if (dd->mtip_svc_handler)
3214 return rv; /* service thread created for handling rebuild */
3215
3216start_service_thread:
3217 sprintf(thd_name, "mtip_svc_thd_%02d", index);
3218
3219 dd->mtip_svc_handler = kthread_run(mtip_service_thread,
3220 dd, thd_name);
3221
3222 if (IS_ERR(dd->mtip_svc_handler)) {
3223 printk(KERN_ERR "mtip32xx: service thread failed to start\n");
3224 dd->mtip_svc_handler = NULL;
3225 rv = -EFAULT;
3226 goto kthread_run_error;
3227 }
3228
3229 return rv;
3230
3231kthread_run_error:
3232 /* Delete our gendisk. This also removes the device from /dev */
3233 del_gendisk(dd->disk);
3234
3235read_capacity_error:
3236 blk_cleanup_queue(dd->queue);
3237
3238block_queue_alloc_init_error:
3239disk_index_error:
3240 spin_lock(&rssd_index_lock);
3241 ida_remove(&rssd_index_ida, index);
3242 spin_unlock(&rssd_index_lock);
3243
3244ida_get_error:
3245 put_disk(dd->disk);
3246
3247alloc_disk_error:
3248 mtip_hw_exit(dd); /* De-initialize the protocol layer. */
3249
3250protocol_init_error:
3251 return rv;
3252}
3253
3254/*
3255 * Block layer deinitialization function.
3256 *
3257 * Called by the PCI layer as each P320 device is removed.
3258 *
3259 * @dd Pointer to the driver data structure.
3260 *
3261 * return value
3262 * 0
3263 */
3264static int mtip_block_remove(struct driver_data *dd)
3265{
3266 struct kobject *kobj;
3267
3268 if (dd->mtip_svc_handler) {
3269 set_bit(MTIP_FLAG_SVC_THD_SHOULD_STOP_BIT, &dd->port->flags);
3270 wake_up_interruptible(&dd->port->svc_wait);
3271 kthread_stop(dd->mtip_svc_handler);
3272 }
3273
3274 /* Clean up the sysfs attributes managed by the protocol layer. */
3275 kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
3276 if (kobj) {
3277 mtip_hw_sysfs_exit(dd, kobj);
3278 kobject_put(kobj);
3279 }
3280
3281 /*
3282 * Delete our gendisk structure. This also removes the device
3283 * from /dev
3284 */
3285 del_gendisk(dd->disk);
3286 blk_cleanup_queue(dd->queue);
3287 dd->disk = NULL;
3288 dd->queue = NULL;
3289
3290 /* De-initialize the protocol layer. */
3291 mtip_hw_exit(dd);
3292
3293 return 0;
3294}
3295
3296/*
3297 * Function called by the PCI layer when just before the
3298 * machine shuts down.
3299 *
3300 * If a protocol layer shutdown function is present it will be called
3301 * by this function.
3302 *
3303 * @dd Pointer to the driver data structure.
3304 *
3305 * return value
3306 * 0
3307 */
3308static int mtip_block_shutdown(struct driver_data *dd)
3309{
3310 dev_info(&dd->pdev->dev,
3311 "Shutting down %s ...\n", dd->disk->disk_name);
3312
3313 /* Delete our gendisk structure, and cleanup the blk queue. */
3314 del_gendisk(dd->disk);
3315 blk_cleanup_queue(dd->queue);
3316 dd->disk = NULL;
3317 dd->queue = NULL;
3318
3319 mtip_hw_shutdown(dd);
3320 return 0;
3321}
3322
3323static int mtip_block_suspend(struct driver_data *dd)
3324{
3325 dev_info(&dd->pdev->dev,
3326 "Suspending %s ...\n", dd->disk->disk_name);
3327 mtip_hw_suspend(dd);
3328 return 0;
3329}
3330
3331static int mtip_block_resume(struct driver_data *dd)
3332{
3333 dev_info(&dd->pdev->dev, "Resuming %s ...\n",
3334 dd->disk->disk_name);
3335 mtip_hw_resume(dd);
3336 return 0;
3337}
3338
3339/*
3340 * Called for each supported PCI device detected.
3341 *
3342 * This function allocates the private data structure, enables the
3343 * PCI device and then calls the block layer initialization function.
3344 *
3345 * return value
3346 * 0 on success else an error code.
3347 */
3348static int mtip_pci_probe(struct pci_dev *pdev,
3349 const struct pci_device_id *ent)
3350{
3351 int rv = 0;
3352 struct driver_data *dd = NULL;
3353
3354 /* Allocate memory for this devices private data. */
3355 dd = kzalloc(sizeof(struct driver_data), GFP_KERNEL);
3356 if (dd == NULL) {
3357 dev_err(&pdev->dev,
3358 "Unable to allocate memory for driver data\n");
3359 return -ENOMEM;
3360 }
3361
3362 /* Set the atomic variable as 1 in case of SRSI */
3363 atomic_set(&dd->drv_cleanup_done, true);
3364
3365 atomic_set(&dd->resumeflag, false);
3366
3367 /* Attach the private data to this PCI device. */
3368 pci_set_drvdata(pdev, dd);
3369
3370 rv = pcim_enable_device(pdev);
3371 if (rv < 0) {
3372 dev_err(&pdev->dev, "Unable to enable device\n");
3373 goto iomap_err;
3374 }
3375
3376 /* Map BAR5 to memory. */
3377 rv = pcim_iomap_regions(pdev, 1 << MTIP_ABAR, MTIP_DRV_NAME);
3378 if (rv < 0) {
3379 dev_err(&pdev->dev, "Unable to map regions\n");
3380 goto iomap_err;
3381 }
3382
3383 if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
3384 rv = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
3385
3386 if (rv) {
3387 rv = pci_set_consistent_dma_mask(pdev,
3388 DMA_BIT_MASK(32));
3389 if (rv) {
3390 dev_warn(&pdev->dev,
3391 "64-bit DMA enable failed\n");
3392 goto setmask_err;
3393 }
3394 }
3395 }
3396
3397 pci_set_master(pdev);
3398
3399 if (pci_enable_msi(pdev)) {
3400 dev_warn(&pdev->dev,
3401 "Unable to enable MSI interrupt.\n");
3402 goto block_initialize_err;
3403 }
3404
3405 /* Copy the info we may need later into the private data structure. */
3406 dd->major = mtip_major;
3407 dd->instance = instance;
3408 dd->pdev = pdev;
3409
3410 /* Initialize the block layer. */
3411 rv = mtip_block_initialize(dd);
3412 if (rv < 0) {
3413 dev_err(&pdev->dev,
3414 "Unable to initialize block layer\n");
3415 goto block_initialize_err;
3416 }
3417
3418 /*
3419 * Increment the instance count so that each device has a unique
3420 * instance number.
3421 */
3422 instance++;
3423
3424 goto done;
3425
3426block_initialize_err:
3427 pci_disable_msi(pdev);
3428
3429setmask_err:
3430 pcim_iounmap_regions(pdev, 1 << MTIP_ABAR);
3431
3432iomap_err:
3433 kfree(dd);
3434 pci_set_drvdata(pdev, NULL);
3435 return rv;
3436done:
3437 /* Set the atomic variable as 0 in case of SRSI */
3438 atomic_set(&dd->drv_cleanup_done, true);
3439
3440 return rv;
3441}
3442
3443/*
3444 * Called for each probed device when the device is removed or the
3445 * driver is unloaded.
3446 *
3447 * return value
3448 * None
3449 */
3450static void mtip_pci_remove(struct pci_dev *pdev)
3451{
3452 struct driver_data *dd = pci_get_drvdata(pdev);
3453 int counter = 0;
3454
3455 if (mtip_check_surprise_removal(pdev)) {
3456 while (atomic_read(&dd->drv_cleanup_done) == false) {
3457 counter++;
3458 msleep(20);
3459 if (counter == 10) {
3460 /* Cleanup the outstanding commands */
3461 mtip_command_cleanup(dd);
3462 break;
3463 }
3464 }
3465 }
3466 /* Set the atomic variable as 1 in case of SRSI */
3467 atomic_set(&dd->drv_cleanup_done, true);
3468
3469 /* Clean up the block layer. */
3470 mtip_block_remove(dd);
3471
3472 pci_disable_msi(pdev);
3473
3474 kfree(dd);
3475 pcim_iounmap_regions(pdev, 1 << MTIP_ABAR);
3476}
3477
3478/*
3479 * Called for each probed device when the device is suspended.
3480 *
3481 * return value
3482 * 0 Success
3483 * <0 Error
3484 */
3485static int mtip_pci_suspend(struct pci_dev *pdev, pm_message_t mesg)
3486{
3487 int rv = 0;
3488 struct driver_data *dd = pci_get_drvdata(pdev);
3489
3490 if (!dd) {
3491 dev_err(&pdev->dev,
3492 "Driver private datastructure is NULL\n");
3493 return -EFAULT;
3494 }
3495
3496 atomic_set(&dd->resumeflag, true);
3497
3498 /* Disable ports & interrupts then send standby immediate */
3499 rv = mtip_block_suspend(dd);
3500 if (rv < 0) {
3501 dev_err(&pdev->dev,
3502 "Failed to suspend controller\n");
3503 return rv;
3504 }
3505
3506 /*
3507 * Save the pci config space to pdev structure &
3508 * disable the device
3509 */
3510 pci_save_state(pdev);
3511 pci_disable_device(pdev);
3512
3513 /* Move to Low power state*/
3514 pci_set_power_state(pdev, PCI_D3hot);
3515
3516 return rv;
3517}
3518
3519/*
3520 * Called for each probed device when the device is resumed.
3521 *
3522 * return value
3523 * 0 Success
3524 * <0 Error
3525 */
3526static int mtip_pci_resume(struct pci_dev *pdev)
3527{
3528 int rv = 0;
3529 struct driver_data *dd;
3530
3531 dd = pci_get_drvdata(pdev);
3532 if (!dd) {
3533 dev_err(&pdev->dev,
3534 "Driver private datastructure is NULL\n");
3535 return -EFAULT;
3536 }
3537
3538 /* Move the device to active State */
3539 pci_set_power_state(pdev, PCI_D0);
3540
3541 /* Restore PCI configuration space */
3542 pci_restore_state(pdev);
3543
3544 /* Enable the PCI device*/
3545 rv = pcim_enable_device(pdev);
3546 if (rv < 0) {
3547 dev_err(&pdev->dev,
3548 "Failed to enable card during resume\n");
3549 goto err;
3550 }
3551 pci_set_master(pdev);
3552
3553 /*
3554 * Calls hbaReset, initPort, & startPort function
3555 * then enables interrupts
3556 */
3557 rv = mtip_block_resume(dd);
3558 if (rv < 0)
3559 dev_err(&pdev->dev, "Unable to resume\n");
3560
3561err:
3562 atomic_set(&dd->resumeflag, false);
3563
3564 return rv;
3565}
3566
3567/*
3568 * Shutdown routine
3569 *
3570 * return value
3571 * None
3572 */
3573static void mtip_pci_shutdown(struct pci_dev *pdev)
3574{
3575 struct driver_data *dd = pci_get_drvdata(pdev);
3576 if (dd)
3577 mtip_block_shutdown(dd);
3578}
3579
3580/* Table of device ids supported by this driver. */
3581static DEFINE_PCI_DEVICE_TABLE(mtip_pci_tbl) = {
3582 { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P320_DEVICE_ID) },
3583 { 0 }
3584};
3585
3586/* Structure that describes the PCI driver functions. */
3587static struct pci_driver mtip_pci_driver = {
3588 .name = MTIP_DRV_NAME,
3589 .id_table = mtip_pci_tbl,
3590 .probe = mtip_pci_probe,
3591 .remove = mtip_pci_remove,
3592 .suspend = mtip_pci_suspend,
3593 .resume = mtip_pci_resume,
3594 .shutdown = mtip_pci_shutdown,
3595};
3596
3597MODULE_DEVICE_TABLE(pci, mtip_pci_tbl);
3598
3599/*
3600 * Module initialization function.
3601 *
3602 * Called once when the module is loaded. This function allocates a major
3603 * block device number to the Cyclone devices and registers the PCI layer
3604 * of the driver.
3605 *
3606 * Return value
3607 * 0 on success else error code.
3608 */
3609static int __init mtip_init(void)
3610{
3611 printk(KERN_INFO MTIP_DRV_NAME " Version " MTIP_DRV_VERSION "\n");
3612
3613 /* Allocate a major block device number to use with this driver. */
3614 mtip_major = register_blkdev(0, MTIP_DRV_NAME);
3615 if (mtip_major < 0) {
3616 printk(KERN_ERR "Unable to register block device (%d)\n",
3617 mtip_major);
3618 return -EBUSY;
3619 }
3620
3621 /* Register our PCI operations. */
3622 return pci_register_driver(&mtip_pci_driver);
3623}
3624
3625/*
3626 * Module de-initialization function.
3627 *
3628 * Called once when the module is unloaded. This function deallocates
3629 * the major block device number allocated by mtip_init() and
3630 * unregisters the PCI layer of the driver.
3631 *
3632 * Return value
3633 * none
3634 */
3635static void __exit mtip_exit(void)
3636{
3637 /* Release the allocated major block device number. */
3638 unregister_blkdev(mtip_major, MTIP_DRV_NAME);
3639
3640 /* Unregister the PCI driver. */
3641 pci_unregister_driver(&mtip_pci_driver);
3642}
3643
3644MODULE_AUTHOR("Micron Technology, Inc");
3645MODULE_DESCRIPTION("Micron RealSSD PCIe Block Driver");
3646MODULE_LICENSE("GPL");
3647MODULE_VERSION(MTIP_DRV_VERSION);
3648
3649module_init(mtip_init);
3650module_exit(mtip_exit);
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
new file mode 100644
index 000000000000..e0554a8f2233
--- /dev/null
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -0,0 +1,418 @@
1/*
2 * mtip32xx.h - Header file for the P320 SSD Block Driver
3 * Copyright (C) 2011 Micron Technology, Inc.
4 *
5 * Portions of this code were derived from works subjected to the
6 * following copyright:
7 * Copyright (C) 2009 Integrated Device Technology, Inc.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 */
20
21#ifndef __MTIP32XX_H__
22#define __MTIP32XX_H__
23
24#include <linux/spinlock.h>
25#include <linux/rwsem.h>
26#include <linux/ata.h>
27#include <linux/interrupt.h>
28#include <linux/genhd.h>
29#include <linux/version.h>
30
31/* Offset of Subsystem Device ID in pci confoguration space */
32#define PCI_SUBSYSTEM_DEVICEID 0x2E
33
34/* offset of Device Control register in PCIe extended capabilites space */
35#define PCIE_CONFIG_EXT_DEVICE_CONTROL_OFFSET 0x48
36
37/* # of times to retry timed out IOs */
38#define MTIP_MAX_RETRIES 5
39
40/* Various timeout values in ms */
41#define MTIP_NCQ_COMMAND_TIMEOUT_MS 5000
42#define MTIP_IOCTL_COMMAND_TIMEOUT_MS 5000
43#define MTIP_INTERNAL_COMMAND_TIMEOUT_MS 5000
44
45/* check for timeouts every 500ms */
46#define MTIP_TIMEOUT_CHECK_PERIOD 500
47
48/* ftl rebuild */
49#define MTIP_FTL_REBUILD_OFFSET 142
50#define MTIP_FTL_REBUILD_MAGIC 0xED51
51#define MTIP_FTL_REBUILD_TIMEOUT_MS 2400000
52
53/* Macro to extract the tag bit number from a tag value. */
54#define MTIP_TAG_BIT(tag) (tag & 0x1F)
55
56/*
57 * Macro to extract the tag index from a tag value. The index
58 * is used to access the correct s_active/Command Issue register based
59 * on the tag value.
60 */
61#define MTIP_TAG_INDEX(tag) (tag >> 5)
62
63/*
64 * Maximum number of scatter gather entries
65 * a single command may have.
66 */
67#define MTIP_MAX_SG 128
68
69/*
70 * Maximum number of slot groups (Command Issue & s_active registers)
71 * NOTE: This is the driver maximum; check dd->slot_groups for actual value.
72 */
73#define MTIP_MAX_SLOT_GROUPS 8
74
75/* Internal command tag. */
76#define MTIP_TAG_INTERNAL 0
77
78/* Micron Vendor ID & P320x SSD Device ID */
79#define PCI_VENDOR_ID_MICRON 0x1344
80#define P320_DEVICE_ID 0x5150
81
82/* Driver name and version strings */
83#define MTIP_DRV_NAME "mtip32xx"
84#define MTIP_DRV_VERSION "1.2.6os3"
85
86/* Maximum number of minor device numbers per device. */
87#define MTIP_MAX_MINORS 16
88
89/* Maximum number of supported command slots. */
90#define MTIP_MAX_COMMAND_SLOTS (MTIP_MAX_SLOT_GROUPS * 32)
91
92/*
93 * Per-tag bitfield size in longs.
94 * Linux bit manipulation functions
95 * (i.e. test_and_set_bit, find_next_zero_bit)
96 * manipulate memory in longs, so we try to make the math work.
97 * take the slot groups and find the number of longs, rounding up.
98 * Careful! i386 and x86_64 use different size longs!
99 */
100#define U32_PER_LONG (sizeof(long) / sizeof(u32))
101#define SLOTBITS_IN_LONGS ((MTIP_MAX_SLOT_GROUPS + \
102 (U32_PER_LONG-1))/U32_PER_LONG)
103
104/* BAR number used to access the HBA registers. */
105#define MTIP_ABAR 5
106
107#ifdef DEBUG
108 #define dbg_printk(format, arg...) \
109 printk(pr_fmt(format), ##arg);
110#else
111 #define dbg_printk(format, arg...)
112#endif
113
114#define __force_bit2int (unsigned int __force)
115
116/* below are bit numbers in 'flags' defined in mtip_port */
117#define MTIP_FLAG_IC_ACTIVE_BIT 0
118#define MTIP_FLAG_EH_ACTIVE_BIT 1
119#define MTIP_FLAG_SVC_THD_ACTIVE_BIT 2
120#define MTIP_FLAG_ISSUE_CMDS_BIT 4
121#define MTIP_FLAG_REBUILD_BIT 5
122#define MTIP_FLAG_SVC_THD_SHOULD_STOP_BIT 8
123
124/* Register Frame Information Structure (FIS), host to device. */
125struct host_to_dev_fis {
126 /*
127 * FIS type.
128 * - 27h Register FIS, host to device.
129 * - 34h Register FIS, device to host.
130 * - 39h DMA Activate FIS, device to host.
131 * - 41h DMA Setup FIS, bi-directional.
132 * - 46h Data FIS, bi-directional.
133 * - 58h BIST Activate FIS, bi-directional.
134 * - 5Fh PIO Setup FIS, device to host.
135 * - A1h Set Device Bits FIS, device to host.
136 */
137 unsigned char type;
138 unsigned char opts;
139 unsigned char command;
140 unsigned char features;
141
142 union {
143 unsigned char lba_low;
144 unsigned char sector;
145 };
146 union {
147 unsigned char lba_mid;
148 unsigned char cyl_low;
149 };
150 union {
151 unsigned char lba_hi;
152 unsigned char cyl_hi;
153 };
154 union {
155 unsigned char device;
156 unsigned char head;
157 };
158
159 union {
160 unsigned char lba_low_ex;
161 unsigned char sector_ex;
162 };
163 union {
164 unsigned char lba_mid_ex;
165 unsigned char cyl_low_ex;
166 };
167 union {
168 unsigned char lba_hi_ex;
169 unsigned char cyl_hi_ex;
170 };
171 unsigned char features_ex;
172
173 unsigned char sect_count;
174 unsigned char sect_cnt_ex;
175 unsigned char res2;
176 unsigned char control;
177
178 unsigned int res3;
179};
180
181/* Command header structure. */
182struct mtip_cmd_hdr {
183 /*
184 * Command options.
185 * - Bits 31:16 Number of PRD entries.
186 * - Bits 15:8 Unused in this implementation.
187 * - Bit 7 Prefetch bit, informs the drive to prefetch PRD entries.
188 * - Bit 6 Write bit, should be set when writing data to the device.
189 * - Bit 5 Unused in this implementation.
190 * - Bits 4:0 Length of the command FIS in DWords (DWord = 4 bytes).
191 */
192 unsigned int opts;
193 /* This field is unsed when using NCQ. */
194 union {
195 unsigned int byte_count;
196 unsigned int status;
197 };
198 /*
199 * Lower 32 bits of the command table address associated with this
200 * header. The command table addresses must be 128 byte aligned.
201 */
202 unsigned int ctba;
203 /*
204 * If 64 bit addressing is used this field is the upper 32 bits
205 * of the command table address associated with this command.
206 */
207 unsigned int ctbau;
208 /* Reserved and unused. */
209 unsigned int res[4];
210};
211
212/* Command scatter gather structure (PRD). */
213struct mtip_cmd_sg {
214 /*
215 * Low 32 bits of the data buffer address. For P320 this
216 * address must be 8 byte aligned signified by bits 2:0 being
217 * set to 0.
218 */
219 unsigned int dba;
220 /*
221 * When 64 bit addressing is used this field is the upper
222 * 32 bits of the data buffer address.
223 */
224 unsigned int dba_upper;
225 /* Unused. */
226 unsigned int reserved;
227 /*
228 * Bit 31: interrupt when this data block has been transferred.
229 * Bits 30..22: reserved
230 * Bits 21..0: byte count (minus 1). For P320 the byte count must be
231 * 8 byte aligned signified by bits 2:0 being set to 1.
232 */
233 unsigned int info;
234};
235struct mtip_port;
236
237/* Structure used to describe a command. */
238struct mtip_cmd {
239
240 struct mtip_cmd_hdr *command_header; /* ptr to command header entry */
241
242 dma_addr_t command_header_dma; /* corresponding physical address */
243
244 void *command; /* ptr to command table entry */
245
246 dma_addr_t command_dma; /* corresponding physical address */
247
248 void *comp_data; /* data passed to completion function comp_func() */
249 /*
250 * Completion function called by the ISR upon completion of
251 * a command.
252 */
253 void (*comp_func)(struct mtip_port *port,
254 int tag,
255 void *data,
256 int status);
257 /* Additional callback function that may be called by comp_func() */
258 void (*async_callback)(void *data, int status);
259
260 void *async_data; /* Addl. data passed to async_callback() */
261
262 int scatter_ents; /* Number of scatter list entries used */
263
264 struct scatterlist sg[MTIP_MAX_SG]; /* Scatter list entries */
265
266 int retries; /* The number of retries left for this command. */
267
268 int direction; /* Data transfer direction */
269
270 unsigned long comp_time; /* command completion time, in jiffies */
271
272 atomic_t active; /* declares if this command sent to the drive. */
273};
274
275/* Structure used to describe a port. */
276struct mtip_port {
277 /* Pointer back to the driver data for this port. */
278 struct driver_data *dd;
279 /*
280 * Used to determine if the data pointed to by the
281 * identify field is valid.
282 */
283 unsigned long identify_valid;
284 /* Base address of the memory mapped IO for the port. */
285 void __iomem *mmio;
286 /* Array of pointers to the memory mapped s_active registers. */
287 void __iomem *s_active[MTIP_MAX_SLOT_GROUPS];
288 /* Array of pointers to the memory mapped completed registers. */
289 void __iomem *completed[MTIP_MAX_SLOT_GROUPS];
290 /* Array of pointers to the memory mapped Command Issue registers. */
291 void __iomem *cmd_issue[MTIP_MAX_SLOT_GROUPS];
292 /*
293 * Pointer to the beginning of the command header memory as used
294 * by the driver.
295 */
296 void *command_list;
297 /*
298 * Pointer to the beginning of the command header memory as used
299 * by the DMA.
300 */
301 dma_addr_t command_list_dma;
302 /*
303 * Pointer to the beginning of the RX FIS memory as used
304 * by the driver.
305 */
306 void *rxfis;
307 /*
308 * Pointer to the beginning of the RX FIS memory as used
309 * by the DMA.
310 */
311 dma_addr_t rxfis_dma;
312 /*
313 * Pointer to the beginning of the command table memory as used
314 * by the driver.
315 */
316 void *command_table;
317 /*
318 * Pointer to the beginning of the command table memory as used
319 * by the DMA.
320 */
321 dma_addr_t command_tbl_dma;
322 /*
323 * Pointer to the beginning of the identify data memory as used
324 * by the driver.
325 */
326 u16 *identify;
327 /*
328 * Pointer to the beginning of the identify data memory as used
329 * by the DMA.
330 */
331 dma_addr_t identify_dma;
332 /*
333 * Pointer to the beginning of a sector buffer that is used
334 * by the driver when issuing internal commands.
335 */
336 u16 *sector_buffer;
337 /*
338 * Pointer to the beginning of a sector buffer that is used
339 * by the DMA when the driver issues internal commands.
340 */
341 dma_addr_t sector_buffer_dma;
342 /*
343 * Bit significant, used to determine if a command slot has
344 * been allocated. i.e. the slot is in use. Bits are cleared
345 * when the command slot and all associated data structures
346 * are no longer needed.
347 */
348 unsigned long allocated[SLOTBITS_IN_LONGS];
349 /*
350 * used to queue commands when an internal command is in progress
351 * or error handling is active
352 */
353 unsigned long cmds_to_issue[SLOTBITS_IN_LONGS];
354 /*
355 * Array of command slots. Structure includes pointers to the
356 * command header and command table, and completion function and data
357 * pointers.
358 */
359 struct mtip_cmd commands[MTIP_MAX_COMMAND_SLOTS];
360 /* Used by mtip_service_thread to wait for an event */
361 wait_queue_head_t svc_wait;
362 /*
363 * indicates the state of the port. Also, helps the service thread
364 * to determine its action on wake up.
365 */
366 unsigned long flags;
367 /*
368 * Timer used to complete commands that have been active for too long.
369 */
370 struct timer_list cmd_timer;
371 /*
372 * Semaphore used to block threads if there are no
373 * command slots available.
374 */
375 struct semaphore cmd_slot;
376 /* Spinlock for working around command-issue bug. */
377 spinlock_t cmd_issue_lock;
378};
379
380/*
381 * Driver private data structure.
382 *
383 * One structure is allocated per probed device.
384 */
385struct driver_data {
386 void __iomem *mmio; /* Base address of the HBA registers. */
387
388 int major; /* Major device number. */
389
390 int instance; /* Instance number. First device probed is 0, ... */
391
392 struct gendisk *disk; /* Pointer to our gendisk structure. */
393
394 struct pci_dev *pdev; /* Pointer to the PCI device structure. */
395
396 struct request_queue *queue; /* Our request queue. */
397
398 struct mtip_port *port; /* Pointer to the port data structure. */
399
400 /* Tasklet used to process the bottom half of the ISR. */
401 struct tasklet_struct tasklet;
402
403 unsigned product_type; /* magic value declaring the product type */
404
405 unsigned slot_groups; /* number of slot groups the product supports */
406
407 atomic_t drv_cleanup_done; /* Atomic variable for SRSI */
408
409 unsigned long index; /* Index to determine the disk name */
410
411 unsigned int ftlrebuildflag; /* FTL rebuild flag */
412
413 atomic_t resumeflag; /* Atomic variable to track suspend/resume */
414
415 struct task_struct *mtip_svc_handler; /* task_struct of svc thd */
416};
417
418#endif
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index c3f0ee16594d..061427a75d37 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -34,12 +34,11 @@
34#include <linux/kthread.h> 34#include <linux/kthread.h>
35 35
36#include <asm/uaccess.h> 36#include <asm/uaccess.h>
37#include <asm/system.h>
38#include <asm/types.h> 37#include <asm/types.h>
39 38
40#include <linux/nbd.h> 39#include <linux/nbd.h>
41 40
42#define LO_MAGIC 0x68797548 41#define NBD_MAGIC 0x68797548
43 42
44#ifdef NDEBUG 43#ifdef NDEBUG
45#define dprintk(flags, fmt...) 44#define dprintk(flags, fmt...)
@@ -116,7 +115,7 @@ static void nbd_end_request(struct request *req)
116 spin_unlock_irqrestore(q->queue_lock, flags); 115 spin_unlock_irqrestore(q->queue_lock, flags);
117} 116}
118 117
119static void sock_shutdown(struct nbd_device *lo, int lock) 118static void sock_shutdown(struct nbd_device *nbd, int lock)
120{ 119{
121 /* Forcibly shutdown the socket causing all listeners 120 /* Forcibly shutdown the socket causing all listeners
122 * to error 121 * to error
@@ -125,14 +124,14 @@ static void sock_shutdown(struct nbd_device *lo, int lock)
125 * there should be a more generic interface rather than 124 * there should be a more generic interface rather than
126 * calling socket ops directly here */ 125 * calling socket ops directly here */
127 if (lock) 126 if (lock)
128 mutex_lock(&lo->tx_lock); 127 mutex_lock(&nbd->tx_lock);
129 if (lo->sock) { 128 if (nbd->sock) {
130 dev_warn(disk_to_dev(lo->disk), "shutting down socket\n"); 129 dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
131 kernel_sock_shutdown(lo->sock, SHUT_RDWR); 130 kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
132 lo->sock = NULL; 131 nbd->sock = NULL;
133 } 132 }
134 if (lock) 133 if (lock)
135 mutex_unlock(&lo->tx_lock); 134 mutex_unlock(&nbd->tx_lock);
136} 135}
137 136
138static void nbd_xmit_timeout(unsigned long arg) 137static void nbd_xmit_timeout(unsigned long arg)
@@ -147,17 +146,17 @@ static void nbd_xmit_timeout(unsigned long arg)
147/* 146/*
148 * Send or receive packet. 147 * Send or receive packet.
149 */ 148 */
150static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size, 149static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size,
151 int msg_flags) 150 int msg_flags)
152{ 151{
153 struct socket *sock = lo->sock; 152 struct socket *sock = nbd->sock;
154 int result; 153 int result;
155 struct msghdr msg; 154 struct msghdr msg;
156 struct kvec iov; 155 struct kvec iov;
157 sigset_t blocked, oldset; 156 sigset_t blocked, oldset;
158 157
159 if (unlikely(!sock)) { 158 if (unlikely(!sock)) {
160 dev_err(disk_to_dev(lo->disk), 159 dev_err(disk_to_dev(nbd->disk),
161 "Attempted %s on closed socket in sock_xmit\n", 160 "Attempted %s on closed socket in sock_xmit\n",
162 (send ? "send" : "recv")); 161 (send ? "send" : "recv"));
163 return -EINVAL; 162 return -EINVAL;
@@ -181,15 +180,15 @@ static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size,
181 if (send) { 180 if (send) {
182 struct timer_list ti; 181 struct timer_list ti;
183 182
184 if (lo->xmit_timeout) { 183 if (nbd->xmit_timeout) {
185 init_timer(&ti); 184 init_timer(&ti);
186 ti.function = nbd_xmit_timeout; 185 ti.function = nbd_xmit_timeout;
187 ti.data = (unsigned long)current; 186 ti.data = (unsigned long)current;
188 ti.expires = jiffies + lo->xmit_timeout; 187 ti.expires = jiffies + nbd->xmit_timeout;
189 add_timer(&ti); 188 add_timer(&ti);
190 } 189 }
191 result = kernel_sendmsg(sock, &msg, &iov, 1, size); 190 result = kernel_sendmsg(sock, &msg, &iov, 1, size);
192 if (lo->xmit_timeout) 191 if (nbd->xmit_timeout)
193 del_timer_sync(&ti); 192 del_timer_sync(&ti);
194 } else 193 } else
195 result = kernel_recvmsg(sock, &msg, &iov, 1, size, 194 result = kernel_recvmsg(sock, &msg, &iov, 1, size,
@@ -201,7 +200,7 @@ static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size,
201 task_pid_nr(current), current->comm, 200 task_pid_nr(current), current->comm,
202 dequeue_signal_lock(current, &current->blocked, &info)); 201 dequeue_signal_lock(current, &current->blocked, &info));
203 result = -EINTR; 202 result = -EINTR;
204 sock_shutdown(lo, !send); 203 sock_shutdown(nbd, !send);
205 break; 204 break;
206 } 205 }
207 206
@@ -219,18 +218,19 @@ static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size,
219 return result; 218 return result;
220} 219}
221 220
222static inline int sock_send_bvec(struct nbd_device *lo, struct bio_vec *bvec, 221static inline int sock_send_bvec(struct nbd_device *nbd, struct bio_vec *bvec,
223 int flags) 222 int flags)
224{ 223{
225 int result; 224 int result;
226 void *kaddr = kmap(bvec->bv_page); 225 void *kaddr = kmap(bvec->bv_page);
227 result = sock_xmit(lo, 1, kaddr + bvec->bv_offset, bvec->bv_len, flags); 226 result = sock_xmit(nbd, 1, kaddr + bvec->bv_offset,
227 bvec->bv_len, flags);
228 kunmap(bvec->bv_page); 228 kunmap(bvec->bv_page);
229 return result; 229 return result;
230} 230}
231 231
232/* always call with the tx_lock held */ 232/* always call with the tx_lock held */
233static int nbd_send_req(struct nbd_device *lo, struct request *req) 233static int nbd_send_req(struct nbd_device *nbd, struct request *req)
234{ 234{
235 int result, flags; 235 int result, flags;
236 struct nbd_request request; 236 struct nbd_request request;
@@ -243,14 +243,14 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req)
243 memcpy(request.handle, &req, sizeof(req)); 243 memcpy(request.handle, &req, sizeof(req));
244 244
245 dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%uB)\n", 245 dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%uB)\n",
246 lo->disk->disk_name, req, 246 nbd->disk->disk_name, req,
247 nbdcmd_to_ascii(nbd_cmd(req)), 247 nbdcmd_to_ascii(nbd_cmd(req)),
248 (unsigned long long)blk_rq_pos(req) << 9, 248 (unsigned long long)blk_rq_pos(req) << 9,
249 blk_rq_bytes(req)); 249 blk_rq_bytes(req));
250 result = sock_xmit(lo, 1, &request, sizeof(request), 250 result = sock_xmit(nbd, 1, &request, sizeof(request),
251 (nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0); 251 (nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0);
252 if (result <= 0) { 252 if (result <= 0) {
253 dev_err(disk_to_dev(lo->disk), 253 dev_err(disk_to_dev(nbd->disk),
254 "Send control failed (result %d)\n", result); 254 "Send control failed (result %d)\n", result);
255 goto error_out; 255 goto error_out;
256 } 256 }
@@ -267,10 +267,10 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req)
267 if (!rq_iter_last(req, iter)) 267 if (!rq_iter_last(req, iter))
268 flags = MSG_MORE; 268 flags = MSG_MORE;
269 dprintk(DBG_TX, "%s: request %p: sending %d bytes data\n", 269 dprintk(DBG_TX, "%s: request %p: sending %d bytes data\n",
270 lo->disk->disk_name, req, bvec->bv_len); 270 nbd->disk->disk_name, req, bvec->bv_len);
271 result = sock_send_bvec(lo, bvec, flags); 271 result = sock_send_bvec(nbd, bvec, flags);
272 if (result <= 0) { 272 if (result <= 0) {
273 dev_err(disk_to_dev(lo->disk), 273 dev_err(disk_to_dev(nbd->disk),
274 "Send data failed (result %d)\n", 274 "Send data failed (result %d)\n",
275 result); 275 result);
276 goto error_out; 276 goto error_out;
@@ -283,25 +283,25 @@ error_out:
283 return -EIO; 283 return -EIO;
284} 284}
285 285
286static struct request *nbd_find_request(struct nbd_device *lo, 286static struct request *nbd_find_request(struct nbd_device *nbd,
287 struct request *xreq) 287 struct request *xreq)
288{ 288{
289 struct request *req, *tmp; 289 struct request *req, *tmp;
290 int err; 290 int err;
291 291
292 err = wait_event_interruptible(lo->active_wq, lo->active_req != xreq); 292 err = wait_event_interruptible(nbd->active_wq, nbd->active_req != xreq);
293 if (unlikely(err)) 293 if (unlikely(err))
294 goto out; 294 goto out;
295 295
296 spin_lock(&lo->queue_lock); 296 spin_lock(&nbd->queue_lock);
297 list_for_each_entry_safe(req, tmp, &lo->queue_head, queuelist) { 297 list_for_each_entry_safe(req, tmp, &nbd->queue_head, queuelist) {
298 if (req != xreq) 298 if (req != xreq)
299 continue; 299 continue;
300 list_del_init(&req->queuelist); 300 list_del_init(&req->queuelist);
301 spin_unlock(&lo->queue_lock); 301 spin_unlock(&nbd->queue_lock);
302 return req; 302 return req;
303 } 303 }
304 spin_unlock(&lo->queue_lock); 304 spin_unlock(&nbd->queue_lock);
305 305
306 err = -ENOENT; 306 err = -ENOENT;
307 307
@@ -309,78 +309,78 @@ out:
309 return ERR_PTR(err); 309 return ERR_PTR(err);
310} 310}
311 311
312static inline int sock_recv_bvec(struct nbd_device *lo, struct bio_vec *bvec) 312static inline int sock_recv_bvec(struct nbd_device *nbd, struct bio_vec *bvec)
313{ 313{
314 int result; 314 int result;
315 void *kaddr = kmap(bvec->bv_page); 315 void *kaddr = kmap(bvec->bv_page);
316 result = sock_xmit(lo, 0, kaddr + bvec->bv_offset, bvec->bv_len, 316 result = sock_xmit(nbd, 0, kaddr + bvec->bv_offset, bvec->bv_len,
317 MSG_WAITALL); 317 MSG_WAITALL);
318 kunmap(bvec->bv_page); 318 kunmap(bvec->bv_page);
319 return result; 319 return result;
320} 320}
321 321
322/* NULL returned = something went wrong, inform userspace */ 322/* NULL returned = something went wrong, inform userspace */
323static struct request *nbd_read_stat(struct nbd_device *lo) 323static struct request *nbd_read_stat(struct nbd_device *nbd)
324{ 324{
325 int result; 325 int result;
326 struct nbd_reply reply; 326 struct nbd_reply reply;
327 struct request *req; 327 struct request *req;
328 328
329 reply.magic = 0; 329 reply.magic = 0;
330 result = sock_xmit(lo, 0, &reply, sizeof(reply), MSG_WAITALL); 330 result = sock_xmit(nbd, 0, &reply, sizeof(reply), MSG_WAITALL);
331 if (result <= 0) { 331 if (result <= 0) {
332 dev_err(disk_to_dev(lo->disk), 332 dev_err(disk_to_dev(nbd->disk),
333 "Receive control failed (result %d)\n", result); 333 "Receive control failed (result %d)\n", result);
334 goto harderror; 334 goto harderror;
335 } 335 }
336 336
337 if (ntohl(reply.magic) != NBD_REPLY_MAGIC) { 337 if (ntohl(reply.magic) != NBD_REPLY_MAGIC) {
338 dev_err(disk_to_dev(lo->disk), "Wrong magic (0x%lx)\n", 338 dev_err(disk_to_dev(nbd->disk), "Wrong magic (0x%lx)\n",
339 (unsigned long)ntohl(reply.magic)); 339 (unsigned long)ntohl(reply.magic));
340 result = -EPROTO; 340 result = -EPROTO;
341 goto harderror; 341 goto harderror;
342 } 342 }
343 343
344 req = nbd_find_request(lo, *(struct request **)reply.handle); 344 req = nbd_find_request(nbd, *(struct request **)reply.handle);
345 if (IS_ERR(req)) { 345 if (IS_ERR(req)) {
346 result = PTR_ERR(req); 346 result = PTR_ERR(req);
347 if (result != -ENOENT) 347 if (result != -ENOENT)
348 goto harderror; 348 goto harderror;
349 349
350 dev_err(disk_to_dev(lo->disk), "Unexpected reply (%p)\n", 350 dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%p)\n",
351 reply.handle); 351 reply.handle);
352 result = -EBADR; 352 result = -EBADR;
353 goto harderror; 353 goto harderror;
354 } 354 }
355 355
356 if (ntohl(reply.error)) { 356 if (ntohl(reply.error)) {
357 dev_err(disk_to_dev(lo->disk), "Other side returned error (%d)\n", 357 dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
358 ntohl(reply.error)); 358 ntohl(reply.error));
359 req->errors++; 359 req->errors++;
360 return req; 360 return req;
361 } 361 }
362 362
363 dprintk(DBG_RX, "%s: request %p: got reply\n", 363 dprintk(DBG_RX, "%s: request %p: got reply\n",
364 lo->disk->disk_name, req); 364 nbd->disk->disk_name, req);
365 if (nbd_cmd(req) == NBD_CMD_READ) { 365 if (nbd_cmd(req) == NBD_CMD_READ) {
366 struct req_iterator iter; 366 struct req_iterator iter;
367 struct bio_vec *bvec; 367 struct bio_vec *bvec;
368 368
369 rq_for_each_segment(bvec, req, iter) { 369 rq_for_each_segment(bvec, req, iter) {
370 result = sock_recv_bvec(lo, bvec); 370 result = sock_recv_bvec(nbd, bvec);
371 if (result <= 0) { 371 if (result <= 0) {
372 dev_err(disk_to_dev(lo->disk), "Receive data failed (result %d)\n", 372 dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
373 result); 373 result);
374 req->errors++; 374 req->errors++;
375 return req; 375 return req;
376 } 376 }
377 dprintk(DBG_RX, "%s: request %p: got %d bytes data\n", 377 dprintk(DBG_RX, "%s: request %p: got %d bytes data\n",
378 lo->disk->disk_name, req, bvec->bv_len); 378 nbd->disk->disk_name, req, bvec->bv_len);
379 } 379 }
380 } 380 }
381 return req; 381 return req;
382harderror: 382harderror:
383 lo->harderror = result; 383 nbd->harderror = result;
384 return NULL; 384 return NULL;
385} 385}
386 386
@@ -398,48 +398,48 @@ static struct device_attribute pid_attr = {
398 .show = pid_show, 398 .show = pid_show,
399}; 399};
400 400
401static int nbd_do_it(struct nbd_device *lo) 401static int nbd_do_it(struct nbd_device *nbd)
402{ 402{
403 struct request *req; 403 struct request *req;
404 int ret; 404 int ret;
405 405
406 BUG_ON(lo->magic != LO_MAGIC); 406 BUG_ON(nbd->magic != NBD_MAGIC);
407 407
408 lo->pid = task_pid_nr(current); 408 nbd->pid = task_pid_nr(current);
409 ret = device_create_file(disk_to_dev(lo->disk), &pid_attr); 409 ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
410 if (ret) { 410 if (ret) {
411 dev_err(disk_to_dev(lo->disk), "device_create_file failed!\n"); 411 dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
412 lo->pid = 0; 412 nbd->pid = 0;
413 return ret; 413 return ret;
414 } 414 }
415 415
416 while ((req = nbd_read_stat(lo)) != NULL) 416 while ((req = nbd_read_stat(nbd)) != NULL)
417 nbd_end_request(req); 417 nbd_end_request(req);
418 418
419 device_remove_file(disk_to_dev(lo->disk), &pid_attr); 419 device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
420 lo->pid = 0; 420 nbd->pid = 0;
421 return 0; 421 return 0;
422} 422}
423 423
424static void nbd_clear_que(struct nbd_device *lo) 424static void nbd_clear_que(struct nbd_device *nbd)
425{ 425{
426 struct request *req; 426 struct request *req;
427 427
428 BUG_ON(lo->magic != LO_MAGIC); 428 BUG_ON(nbd->magic != NBD_MAGIC);
429 429
430 /* 430 /*
431 * Because we have set lo->sock to NULL under the tx_lock, all 431 * Because we have set nbd->sock to NULL under the tx_lock, all
432 * modifications to the list must have completed by now. For 432 * modifications to the list must have completed by now. For
433 * the same reason, the active_req must be NULL. 433 * the same reason, the active_req must be NULL.
434 * 434 *
435 * As a consequence, we don't need to take the spin lock while 435 * As a consequence, we don't need to take the spin lock while
436 * purging the list here. 436 * purging the list here.
437 */ 437 */
438 BUG_ON(lo->sock); 438 BUG_ON(nbd->sock);
439 BUG_ON(lo->active_req); 439 BUG_ON(nbd->active_req);
440 440
441 while (!list_empty(&lo->queue_head)) { 441 while (!list_empty(&nbd->queue_head)) {
442 req = list_entry(lo->queue_head.next, struct request, 442 req = list_entry(nbd->queue_head.next, struct request,
443 queuelist); 443 queuelist);
444 list_del_init(&req->queuelist); 444 list_del_init(&req->queuelist);
445 req->errors++; 445 req->errors++;
@@ -448,7 +448,7 @@ static void nbd_clear_que(struct nbd_device *lo)
448} 448}
449 449
450 450
451static void nbd_handle_req(struct nbd_device *lo, struct request *req) 451static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
452{ 452{
453 if (req->cmd_type != REQ_TYPE_FS) 453 if (req->cmd_type != REQ_TYPE_FS)
454 goto error_out; 454 goto error_out;
@@ -456,8 +456,8 @@ static void nbd_handle_req(struct nbd_device *lo, struct request *req)
456 nbd_cmd(req) = NBD_CMD_READ; 456 nbd_cmd(req) = NBD_CMD_READ;
457 if (rq_data_dir(req) == WRITE) { 457 if (rq_data_dir(req) == WRITE) {
458 nbd_cmd(req) = NBD_CMD_WRITE; 458 nbd_cmd(req) = NBD_CMD_WRITE;
459 if (lo->flags & NBD_READ_ONLY) { 459 if (nbd->flags & NBD_READ_ONLY) {
460 dev_err(disk_to_dev(lo->disk), 460 dev_err(disk_to_dev(nbd->disk),
461 "Write on read-only\n"); 461 "Write on read-only\n");
462 goto error_out; 462 goto error_out;
463 } 463 }
@@ -465,29 +465,29 @@ static void nbd_handle_req(struct nbd_device *lo, struct request *req)
465 465
466 req->errors = 0; 466 req->errors = 0;
467 467
468 mutex_lock(&lo->tx_lock); 468 mutex_lock(&nbd->tx_lock);
469 if (unlikely(!lo->sock)) { 469 if (unlikely(!nbd->sock)) {
470 mutex_unlock(&lo->tx_lock); 470 mutex_unlock(&nbd->tx_lock);
471 dev_err(disk_to_dev(lo->disk), 471 dev_err(disk_to_dev(nbd->disk),
472 "Attempted send on closed socket\n"); 472 "Attempted send on closed socket\n");
473 goto error_out; 473 goto error_out;
474 } 474 }
475 475
476 lo->active_req = req; 476 nbd->active_req = req;
477 477
478 if (nbd_send_req(lo, req) != 0) { 478 if (nbd_send_req(nbd, req) != 0) {
479 dev_err(disk_to_dev(lo->disk), "Request send failed\n"); 479 dev_err(disk_to_dev(nbd->disk), "Request send failed\n");
480 req->errors++; 480 req->errors++;
481 nbd_end_request(req); 481 nbd_end_request(req);
482 } else { 482 } else {
483 spin_lock(&lo->queue_lock); 483 spin_lock(&nbd->queue_lock);
484 list_add(&req->queuelist, &lo->queue_head); 484 list_add(&req->queuelist, &nbd->queue_head);
485 spin_unlock(&lo->queue_lock); 485 spin_unlock(&nbd->queue_lock);
486 } 486 }
487 487
488 lo->active_req = NULL; 488 nbd->active_req = NULL;
489 mutex_unlock(&lo->tx_lock); 489 mutex_unlock(&nbd->tx_lock);
490 wake_up_all(&lo->active_wq); 490 wake_up_all(&nbd->active_wq);
491 491
492 return; 492 return;
493 493
@@ -498,28 +498,28 @@ error_out:
498 498
499static int nbd_thread(void *data) 499static int nbd_thread(void *data)
500{ 500{
501 struct nbd_device *lo = data; 501 struct nbd_device *nbd = data;
502 struct request *req; 502 struct request *req;
503 503
504 set_user_nice(current, -20); 504 set_user_nice(current, -20);
505 while (!kthread_should_stop() || !list_empty(&lo->waiting_queue)) { 505 while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
506 /* wait for something to do */ 506 /* wait for something to do */
507 wait_event_interruptible(lo->waiting_wq, 507 wait_event_interruptible(nbd->waiting_wq,
508 kthread_should_stop() || 508 kthread_should_stop() ||
509 !list_empty(&lo->waiting_queue)); 509 !list_empty(&nbd->waiting_queue));
510 510
511 /* extract request */ 511 /* extract request */
512 if (list_empty(&lo->waiting_queue)) 512 if (list_empty(&nbd->waiting_queue))
513 continue; 513 continue;
514 514
515 spin_lock_irq(&lo->queue_lock); 515 spin_lock_irq(&nbd->queue_lock);
516 req = list_entry(lo->waiting_queue.next, struct request, 516 req = list_entry(nbd->waiting_queue.next, struct request,
517 queuelist); 517 queuelist);
518 list_del_init(&req->queuelist); 518 list_del_init(&req->queuelist);
519 spin_unlock_irq(&lo->queue_lock); 519 spin_unlock_irq(&nbd->queue_lock);
520 520
521 /* handle request */ 521 /* handle request */
522 nbd_handle_req(lo, req); 522 nbd_handle_req(nbd, req);
523 } 523 }
524 return 0; 524 return 0;
525} 525}
@@ -527,7 +527,7 @@ static int nbd_thread(void *data)
527/* 527/*
528 * We always wait for result of write, for now. It would be nice to make it optional 528 * We always wait for result of write, for now. It would be nice to make it optional
529 * in future 529 * in future
530 * if ((rq_data_dir(req) == WRITE) && (lo->flags & NBD_WRITE_NOCHK)) 530 * if ((rq_data_dir(req) == WRITE) && (nbd->flags & NBD_WRITE_NOCHK))
531 * { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); } 531 * { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); }
532 */ 532 */
533 533
@@ -536,19 +536,19 @@ static void do_nbd_request(struct request_queue *q)
536 struct request *req; 536 struct request *req;
537 537
538 while ((req = blk_fetch_request(q)) != NULL) { 538 while ((req = blk_fetch_request(q)) != NULL) {
539 struct nbd_device *lo; 539 struct nbd_device *nbd;
540 540
541 spin_unlock_irq(q->queue_lock); 541 spin_unlock_irq(q->queue_lock);
542 542
543 dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n", 543 dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n",
544 req->rq_disk->disk_name, req, req->cmd_type); 544 req->rq_disk->disk_name, req, req->cmd_type);
545 545
546 lo = req->rq_disk->private_data; 546 nbd = req->rq_disk->private_data;
547 547
548 BUG_ON(lo->magic != LO_MAGIC); 548 BUG_ON(nbd->magic != NBD_MAGIC);
549 549
550 if (unlikely(!lo->sock)) { 550 if (unlikely(!nbd->sock)) {
551 dev_err(disk_to_dev(lo->disk), 551 dev_err(disk_to_dev(nbd->disk),
552 "Attempted send on closed socket\n"); 552 "Attempted send on closed socket\n");
553 req->errors++; 553 req->errors++;
554 nbd_end_request(req); 554 nbd_end_request(req);
@@ -556,11 +556,11 @@ static void do_nbd_request(struct request_queue *q)
556 continue; 556 continue;
557 } 557 }
558 558
559 spin_lock_irq(&lo->queue_lock); 559 spin_lock_irq(&nbd->queue_lock);
560 list_add_tail(&req->queuelist, &lo->waiting_queue); 560 list_add_tail(&req->queuelist, &nbd->waiting_queue);
561 spin_unlock_irq(&lo->queue_lock); 561 spin_unlock_irq(&nbd->queue_lock);
562 562
563 wake_up(&lo->waiting_wq); 563 wake_up(&nbd->waiting_wq);
564 564
565 spin_lock_irq(q->queue_lock); 565 spin_lock_irq(q->queue_lock);
566 } 566 }
@@ -568,32 +568,32 @@ static void do_nbd_request(struct request_queue *q)
568 568
569/* Must be called with tx_lock held */ 569/* Must be called with tx_lock held */
570 570
571static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo, 571static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
572 unsigned int cmd, unsigned long arg) 572 unsigned int cmd, unsigned long arg)
573{ 573{
574 switch (cmd) { 574 switch (cmd) {
575 case NBD_DISCONNECT: { 575 case NBD_DISCONNECT: {
576 struct request sreq; 576 struct request sreq;
577 577
578 dev_info(disk_to_dev(lo->disk), "NBD_DISCONNECT\n"); 578 dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
579 579
580 blk_rq_init(NULL, &sreq); 580 blk_rq_init(NULL, &sreq);
581 sreq.cmd_type = REQ_TYPE_SPECIAL; 581 sreq.cmd_type = REQ_TYPE_SPECIAL;
582 nbd_cmd(&sreq) = NBD_CMD_DISC; 582 nbd_cmd(&sreq) = NBD_CMD_DISC;
583 if (!lo->sock) 583 if (!nbd->sock)
584 return -EINVAL; 584 return -EINVAL;
585 nbd_send_req(lo, &sreq); 585 nbd_send_req(nbd, &sreq);
586 return 0; 586 return 0;
587 } 587 }
588 588
589 case NBD_CLEAR_SOCK: { 589 case NBD_CLEAR_SOCK: {
590 struct file *file; 590 struct file *file;
591 591
592 lo->sock = NULL; 592 nbd->sock = NULL;
593 file = lo->file; 593 file = nbd->file;
594 lo->file = NULL; 594 nbd->file = NULL;
595 nbd_clear_que(lo); 595 nbd_clear_que(nbd);
596 BUG_ON(!list_empty(&lo->queue_head)); 596 BUG_ON(!list_empty(&nbd->queue_head));
597 if (file) 597 if (file)
598 fput(file); 598 fput(file);
599 return 0; 599 return 0;
@@ -601,14 +601,14 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
601 601
602 case NBD_SET_SOCK: { 602 case NBD_SET_SOCK: {
603 struct file *file; 603 struct file *file;
604 if (lo->file) 604 if (nbd->file)
605 return -EBUSY; 605 return -EBUSY;
606 file = fget(arg); 606 file = fget(arg);
607 if (file) { 607 if (file) {
608 struct inode *inode = file->f_path.dentry->d_inode; 608 struct inode *inode = file->f_path.dentry->d_inode;
609 if (S_ISSOCK(inode->i_mode)) { 609 if (S_ISSOCK(inode->i_mode)) {
610 lo->file = file; 610 nbd->file = file;
611 lo->sock = SOCKET_I(inode); 611 nbd->sock = SOCKET_I(inode);
612 if (max_part > 0) 612 if (max_part > 0)
613 bdev->bd_invalidated = 1; 613 bdev->bd_invalidated = 1;
614 return 0; 614 return 0;
@@ -620,29 +620,29 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
620 } 620 }
621 621
622 case NBD_SET_BLKSIZE: 622 case NBD_SET_BLKSIZE:
623 lo->blksize = arg; 623 nbd->blksize = arg;
624 lo->bytesize &= ~(lo->blksize-1); 624 nbd->bytesize &= ~(nbd->blksize-1);
625 bdev->bd_inode->i_size = lo->bytesize; 625 bdev->bd_inode->i_size = nbd->bytesize;
626 set_blocksize(bdev, lo->blksize); 626 set_blocksize(bdev, nbd->blksize);
627 set_capacity(lo->disk, lo->bytesize >> 9); 627 set_capacity(nbd->disk, nbd->bytesize >> 9);
628 return 0; 628 return 0;
629 629
630 case NBD_SET_SIZE: 630 case NBD_SET_SIZE:
631 lo->bytesize = arg & ~(lo->blksize-1); 631 nbd->bytesize = arg & ~(nbd->blksize-1);
632 bdev->bd_inode->i_size = lo->bytesize; 632 bdev->bd_inode->i_size = nbd->bytesize;
633 set_blocksize(bdev, lo->blksize); 633 set_blocksize(bdev, nbd->blksize);
634 set_capacity(lo->disk, lo->bytesize >> 9); 634 set_capacity(nbd->disk, nbd->bytesize >> 9);
635 return 0; 635 return 0;
636 636
637 case NBD_SET_TIMEOUT: 637 case NBD_SET_TIMEOUT:
638 lo->xmit_timeout = arg * HZ; 638 nbd->xmit_timeout = arg * HZ;
639 return 0; 639 return 0;
640 640
641 case NBD_SET_SIZE_BLOCKS: 641 case NBD_SET_SIZE_BLOCKS:
642 lo->bytesize = ((u64) arg) * lo->blksize; 642 nbd->bytesize = ((u64) arg) * nbd->blksize;
643 bdev->bd_inode->i_size = lo->bytesize; 643 bdev->bd_inode->i_size = nbd->bytesize;
644 set_blocksize(bdev, lo->blksize); 644 set_blocksize(bdev, nbd->blksize);
645 set_capacity(lo->disk, lo->bytesize >> 9); 645 set_capacity(nbd->disk, nbd->bytesize >> 9);
646 return 0; 646 return 0;
647 647
648 case NBD_DO_IT: { 648 case NBD_DO_IT: {
@@ -650,38 +650,38 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
650 struct file *file; 650 struct file *file;
651 int error; 651 int error;
652 652
653 if (lo->pid) 653 if (nbd->pid)
654 return -EBUSY; 654 return -EBUSY;
655 if (!lo->file) 655 if (!nbd->file)
656 return -EINVAL; 656 return -EINVAL;
657 657
658 mutex_unlock(&lo->tx_lock); 658 mutex_unlock(&nbd->tx_lock);
659 659
660 thread = kthread_create(nbd_thread, lo, lo->disk->disk_name); 660 thread = kthread_create(nbd_thread, nbd, nbd->disk->disk_name);
661 if (IS_ERR(thread)) { 661 if (IS_ERR(thread)) {
662 mutex_lock(&lo->tx_lock); 662 mutex_lock(&nbd->tx_lock);
663 return PTR_ERR(thread); 663 return PTR_ERR(thread);
664 } 664 }
665 wake_up_process(thread); 665 wake_up_process(thread);
666 error = nbd_do_it(lo); 666 error = nbd_do_it(nbd);
667 kthread_stop(thread); 667 kthread_stop(thread);
668 668
669 mutex_lock(&lo->tx_lock); 669 mutex_lock(&nbd->tx_lock);
670 if (error) 670 if (error)
671 return error; 671 return error;
672 sock_shutdown(lo, 0); 672 sock_shutdown(nbd, 0);
673 file = lo->file; 673 file = nbd->file;
674 lo->file = NULL; 674 nbd->file = NULL;
675 nbd_clear_que(lo); 675 nbd_clear_que(nbd);
676 dev_warn(disk_to_dev(lo->disk), "queue cleared\n"); 676 dev_warn(disk_to_dev(nbd->disk), "queue cleared\n");
677 if (file) 677 if (file)
678 fput(file); 678 fput(file);
679 lo->bytesize = 0; 679 nbd->bytesize = 0;
680 bdev->bd_inode->i_size = 0; 680 bdev->bd_inode->i_size = 0;
681 set_capacity(lo->disk, 0); 681 set_capacity(nbd->disk, 0);
682 if (max_part > 0) 682 if (max_part > 0)
683 ioctl_by_bdev(bdev, BLKRRPART, 0); 683 ioctl_by_bdev(bdev, BLKRRPART, 0);
684 return lo->harderror; 684 return nbd->harderror;
685 } 685 }
686 686
687 case NBD_CLEAR_QUE: 687 case NBD_CLEAR_QUE:
@@ -689,14 +689,14 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
689 * This is for compatibility only. The queue is always cleared 689 * This is for compatibility only. The queue is always cleared
690 * by NBD_DO_IT or NBD_CLEAR_SOCK. 690 * by NBD_DO_IT or NBD_CLEAR_SOCK.
691 */ 691 */
692 BUG_ON(!lo->sock && !list_empty(&lo->queue_head)); 692 BUG_ON(!nbd->sock && !list_empty(&nbd->queue_head));
693 return 0; 693 return 0;
694 694
695 case NBD_PRINT_DEBUG: 695 case NBD_PRINT_DEBUG:
696 dev_info(disk_to_dev(lo->disk), 696 dev_info(disk_to_dev(nbd->disk),
697 "next = %p, prev = %p, head = %p\n", 697 "next = %p, prev = %p, head = %p\n",
698 lo->queue_head.next, lo->queue_head.prev, 698 nbd->queue_head.next, nbd->queue_head.prev,
699 &lo->queue_head); 699 &nbd->queue_head);
700 return 0; 700 return 0;
701 } 701 }
702 return -ENOTTY; 702 return -ENOTTY;
@@ -705,21 +705,21 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
705static int nbd_ioctl(struct block_device *bdev, fmode_t mode, 705static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
706 unsigned int cmd, unsigned long arg) 706 unsigned int cmd, unsigned long arg)
707{ 707{
708 struct nbd_device *lo = bdev->bd_disk->private_data; 708 struct nbd_device *nbd = bdev->bd_disk->private_data;
709 int error; 709 int error;
710 710
711 if (!capable(CAP_SYS_ADMIN)) 711 if (!capable(CAP_SYS_ADMIN))
712 return -EPERM; 712 return -EPERM;
713 713
714 BUG_ON(lo->magic != LO_MAGIC); 714 BUG_ON(nbd->magic != NBD_MAGIC);
715 715
716 /* Anyone capable of this syscall can do *real bad* things */ 716 /* Anyone capable of this syscall can do *real bad* things */
717 dprintk(DBG_IOCTL, "%s: nbd_ioctl cmd=%s(0x%x) arg=%lu\n", 717 dprintk(DBG_IOCTL, "%s: nbd_ioctl cmd=%s(0x%x) arg=%lu\n",
718 lo->disk->disk_name, ioctl_cmd_to_ascii(cmd), cmd, arg); 718 nbd->disk->disk_name, ioctl_cmd_to_ascii(cmd), cmd, arg);
719 719
720 mutex_lock(&lo->tx_lock); 720 mutex_lock(&nbd->tx_lock);
721 error = __nbd_ioctl(bdev, lo, cmd, arg); 721 error = __nbd_ioctl(bdev, nbd, cmd, arg);
722 mutex_unlock(&lo->tx_lock); 722 mutex_unlock(&nbd->tx_lock);
723 723
724 return error; 724 return error;
725} 725}
@@ -805,7 +805,7 @@ static int __init nbd_init(void)
805 for (i = 0; i < nbds_max; i++) { 805 for (i = 0; i < nbds_max; i++) {
806 struct gendisk *disk = nbd_dev[i].disk; 806 struct gendisk *disk = nbd_dev[i].disk;
807 nbd_dev[i].file = NULL; 807 nbd_dev[i].file = NULL;
808 nbd_dev[i].magic = LO_MAGIC; 808 nbd_dev[i].magic = NBD_MAGIC;
809 nbd_dev[i].flags = 0; 809 nbd_dev[i].flags = 0;
810 INIT_LIST_HEAD(&nbd_dev[i].waiting_queue); 810 INIT_LIST_HEAD(&nbd_dev[i].waiting_queue);
811 spin_lock_init(&nbd_dev[i].queue_lock); 811 spin_lock_init(&nbd_dev[i].queue_lock);
diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
new file mode 100644
index 000000000000..38a2d0631882
--- /dev/null
+++ b/drivers/block/nvme.c
@@ -0,0 +1,1740 @@
1/*
2 * NVM Express device driver
3 * Copyright (c) 2011, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
17 */
18
19#include <linux/nvme.h>
20#include <linux/bio.h>
21#include <linux/bitops.h>
22#include <linux/blkdev.h>
23#include <linux/delay.h>
24#include <linux/errno.h>
25#include <linux/fs.h>
26#include <linux/genhd.h>
27#include <linux/idr.h>
28#include <linux/init.h>
29#include <linux/interrupt.h>
30#include <linux/io.h>
31#include <linux/kdev_t.h>
32#include <linux/kthread.h>
33#include <linux/kernel.h>
34#include <linux/mm.h>
35#include <linux/module.h>
36#include <linux/moduleparam.h>
37#include <linux/pci.h>
38#include <linux/poison.h>
39#include <linux/sched.h>
40#include <linux/slab.h>
41#include <linux/types.h>
42
43#include <asm-generic/io-64-nonatomic-lo-hi.h>
44
45#define NVME_Q_DEPTH 1024
46#define SQ_SIZE(depth) (depth * sizeof(struct nvme_command))
47#define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion))
48#define NVME_MINORS 64
49#define NVME_IO_TIMEOUT (5 * HZ)
50#define ADMIN_TIMEOUT (60 * HZ)
51
52static int nvme_major;
53module_param(nvme_major, int, 0);
54
55static int use_threaded_interrupts;
56module_param(use_threaded_interrupts, int, 0);
57
58static DEFINE_SPINLOCK(dev_list_lock);
59static LIST_HEAD(dev_list);
60static struct task_struct *nvme_thread;
61
62/*
63 * Represents an NVM Express device. Each nvme_dev is a PCI function.
64 */
65struct nvme_dev {
66 struct list_head node;
67 struct nvme_queue **queues;
68 u32 __iomem *dbs;
69 struct pci_dev *pci_dev;
70 struct dma_pool *prp_page_pool;
71 struct dma_pool *prp_small_pool;
72 int instance;
73 int queue_count;
74 int db_stride;
75 u32 ctrl_config;
76 struct msix_entry *entry;
77 struct nvme_bar __iomem *bar;
78 struct list_head namespaces;
79 char serial[20];
80 char model[40];
81 char firmware_rev[8];
82};
83
84/*
85 * An NVM Express namespace is equivalent to a SCSI LUN
86 */
87struct nvme_ns {
88 struct list_head list;
89
90 struct nvme_dev *dev;
91 struct request_queue *queue;
92 struct gendisk *disk;
93
94 int ns_id;
95 int lba_shift;
96};
97
98/*
99 * An NVM Express queue. Each device has at least two (one for admin
100 * commands and one for I/O commands).
101 */
102struct nvme_queue {
103 struct device *q_dmadev;
104 struct nvme_dev *dev;
105 spinlock_t q_lock;
106 struct nvme_command *sq_cmds;
107 volatile struct nvme_completion *cqes;
108 dma_addr_t sq_dma_addr;
109 dma_addr_t cq_dma_addr;
110 wait_queue_head_t sq_full;
111 wait_queue_t sq_cong_wait;
112 struct bio_list sq_cong;
113 u32 __iomem *q_db;
114 u16 q_depth;
115 u16 cq_vector;
116 u16 sq_head;
117 u16 sq_tail;
118 u16 cq_head;
119 u16 cq_phase;
120 unsigned long cmdid_data[];
121};
122
123/*
124 * Check we didin't inadvertently grow the command struct
125 */
126static inline void _nvme_check_size(void)
127{
128 BUILD_BUG_ON(sizeof(struct nvme_rw_command) != 64);
129 BUILD_BUG_ON(sizeof(struct nvme_create_cq) != 64);
130 BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64);
131 BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64);
132 BUILD_BUG_ON(sizeof(struct nvme_features) != 64);
133 BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
134 BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != 4096);
135 BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096);
136 BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
137}
138
139typedef void (*nvme_completion_fn)(struct nvme_dev *, void *,
140 struct nvme_completion *);
141
142struct nvme_cmd_info {
143 nvme_completion_fn fn;
144 void *ctx;
145 unsigned long timeout;
146};
147
148static struct nvme_cmd_info *nvme_cmd_info(struct nvme_queue *nvmeq)
149{
150 return (void *)&nvmeq->cmdid_data[BITS_TO_LONGS(nvmeq->q_depth)];
151}
152
153/**
154 * alloc_cmdid() - Allocate a Command ID
155 * @nvmeq: The queue that will be used for this command
156 * @ctx: A pointer that will be passed to the handler
157 * @handler: The function to call on completion
158 *
159 * Allocate a Command ID for a queue. The data passed in will
160 * be passed to the completion handler. This is implemented by using
161 * the bottom two bits of the ctx pointer to store the handler ID.
162 * Passing in a pointer that's not 4-byte aligned will cause a BUG.
163 * We can change this if it becomes a problem.
164 *
165 * May be called with local interrupts disabled and the q_lock held,
166 * or with interrupts enabled and no locks held.
167 */
168static int alloc_cmdid(struct nvme_queue *nvmeq, void *ctx,
169 nvme_completion_fn handler, unsigned timeout)
170{
171 int depth = nvmeq->q_depth - 1;
172 struct nvme_cmd_info *info = nvme_cmd_info(nvmeq);
173 int cmdid;
174
175 do {
176 cmdid = find_first_zero_bit(nvmeq->cmdid_data, depth);
177 if (cmdid >= depth)
178 return -EBUSY;
179 } while (test_and_set_bit(cmdid, nvmeq->cmdid_data));
180
181 info[cmdid].fn = handler;
182 info[cmdid].ctx = ctx;
183 info[cmdid].timeout = jiffies + timeout;
184 return cmdid;
185}
186
187static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx,
188 nvme_completion_fn handler, unsigned timeout)
189{
190 int cmdid;
191 wait_event_killable(nvmeq->sq_full,
192 (cmdid = alloc_cmdid(nvmeq, ctx, handler, timeout)) >= 0);
193 return (cmdid < 0) ? -EINTR : cmdid;
194}
195
196/* Special values must be less than 0x1000 */
197#define CMD_CTX_BASE ((void *)POISON_POINTER_DELTA)
198#define CMD_CTX_CANCELLED (0x30C + CMD_CTX_BASE)
199#define CMD_CTX_COMPLETED (0x310 + CMD_CTX_BASE)
200#define CMD_CTX_INVALID (0x314 + CMD_CTX_BASE)
201#define CMD_CTX_FLUSH (0x318 + CMD_CTX_BASE)
202
203static void special_completion(struct nvme_dev *dev, void *ctx,
204 struct nvme_completion *cqe)
205{
206 if (ctx == CMD_CTX_CANCELLED)
207 return;
208 if (ctx == CMD_CTX_FLUSH)
209 return;
210 if (ctx == CMD_CTX_COMPLETED) {
211 dev_warn(&dev->pci_dev->dev,
212 "completed id %d twice on queue %d\n",
213 cqe->command_id, le16_to_cpup(&cqe->sq_id));
214 return;
215 }
216 if (ctx == CMD_CTX_INVALID) {
217 dev_warn(&dev->pci_dev->dev,
218 "invalid id %d completed on queue %d\n",
219 cqe->command_id, le16_to_cpup(&cqe->sq_id));
220 return;
221 }
222
223 dev_warn(&dev->pci_dev->dev, "Unknown special completion %p\n", ctx);
224}
225
226/*
227 * Called with local interrupts disabled and the q_lock held. May not sleep.
228 */
229static void *free_cmdid(struct nvme_queue *nvmeq, int cmdid,
230 nvme_completion_fn *fn)
231{
232 void *ctx;
233 struct nvme_cmd_info *info = nvme_cmd_info(nvmeq);
234
235 if (cmdid >= nvmeq->q_depth) {
236 *fn = special_completion;
237 return CMD_CTX_INVALID;
238 }
239 *fn = info[cmdid].fn;
240 ctx = info[cmdid].ctx;
241 info[cmdid].fn = special_completion;
242 info[cmdid].ctx = CMD_CTX_COMPLETED;
243 clear_bit(cmdid, nvmeq->cmdid_data);
244 wake_up(&nvmeq->sq_full);
245 return ctx;
246}
247
248static void *cancel_cmdid(struct nvme_queue *nvmeq, int cmdid,
249 nvme_completion_fn *fn)
250{
251 void *ctx;
252 struct nvme_cmd_info *info = nvme_cmd_info(nvmeq);
253 if (fn)
254 *fn = info[cmdid].fn;
255 ctx = info[cmdid].ctx;
256 info[cmdid].fn = special_completion;
257 info[cmdid].ctx = CMD_CTX_CANCELLED;
258 return ctx;
259}
260
261static struct nvme_queue *get_nvmeq(struct nvme_dev *dev)
262{
263 return dev->queues[get_cpu() + 1];
264}
265
266static void put_nvmeq(struct nvme_queue *nvmeq)
267{
268 put_cpu();
269}
270
271/**
272 * nvme_submit_cmd() - Copy a command into a queue and ring the doorbell
273 * @nvmeq: The queue to use
274 * @cmd: The command to send
275 *
276 * Safe to use from interrupt context
277 */
278static int nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
279{
280 unsigned long flags;
281 u16 tail;
282 spin_lock_irqsave(&nvmeq->q_lock, flags);
283 tail = nvmeq->sq_tail;
284 memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd));
285 if (++tail == nvmeq->q_depth)
286 tail = 0;
287 writel(tail, nvmeq->q_db);
288 nvmeq->sq_tail = tail;
289 spin_unlock_irqrestore(&nvmeq->q_lock, flags);
290
291 return 0;
292}
293
294/*
295 * The nvme_iod describes the data in an I/O, including the list of PRP
296 * entries. You can't see it in this data structure because C doesn't let
297 * me express that. Use nvme_alloc_iod to ensure there's enough space
298 * allocated to store the PRP list.
299 */
300struct nvme_iod {
301 void *private; /* For the use of the submitter of the I/O */
302 int npages; /* In the PRP list. 0 means small pool in use */
303 int offset; /* Of PRP list */
304 int nents; /* Used in scatterlist */
305 int length; /* Of data, in bytes */
306 dma_addr_t first_dma;
307 struct scatterlist sg[0];
308};
309
310static __le64 **iod_list(struct nvme_iod *iod)
311{
312 return ((void *)iod) + iod->offset;
313}
314
315/*
316 * Will slightly overestimate the number of pages needed. This is OK
317 * as it only leads to a small amount of wasted memory for the lifetime of
318 * the I/O.
319 */
320static int nvme_npages(unsigned size)
321{
322 unsigned nprps = DIV_ROUND_UP(size + PAGE_SIZE, PAGE_SIZE);
323 return DIV_ROUND_UP(8 * nprps, PAGE_SIZE - 8);
324}
325
326static struct nvme_iod *
327nvme_alloc_iod(unsigned nseg, unsigned nbytes, gfp_t gfp)
328{
329 struct nvme_iod *iod = kmalloc(sizeof(struct nvme_iod) +
330 sizeof(__le64 *) * nvme_npages(nbytes) +
331 sizeof(struct scatterlist) * nseg, gfp);
332
333 if (iod) {
334 iod->offset = offsetof(struct nvme_iod, sg[nseg]);
335 iod->npages = -1;
336 iod->length = nbytes;
337 }
338
339 return iod;
340}
341
342static void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod)
343{
344 const int last_prp = PAGE_SIZE / 8 - 1;
345 int i;
346 __le64 **list = iod_list(iod);
347 dma_addr_t prp_dma = iod->first_dma;
348
349 if (iod->npages == 0)
350 dma_pool_free(dev->prp_small_pool, list[0], prp_dma);
351 for (i = 0; i < iod->npages; i++) {
352 __le64 *prp_list = list[i];
353 dma_addr_t next_prp_dma = le64_to_cpu(prp_list[last_prp]);
354 dma_pool_free(dev->prp_page_pool, prp_list, prp_dma);
355 prp_dma = next_prp_dma;
356 }
357 kfree(iod);
358}
359
360static void requeue_bio(struct nvme_dev *dev, struct bio *bio)
361{
362 struct nvme_queue *nvmeq = get_nvmeq(dev);
363 if (bio_list_empty(&nvmeq->sq_cong))
364 add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
365 bio_list_add(&nvmeq->sq_cong, bio);
366 put_nvmeq(nvmeq);
367 wake_up_process(nvme_thread);
368}
369
370static void bio_completion(struct nvme_dev *dev, void *ctx,
371 struct nvme_completion *cqe)
372{
373 struct nvme_iod *iod = ctx;
374 struct bio *bio = iod->private;
375 u16 status = le16_to_cpup(&cqe->status) >> 1;
376
377 dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents,
378 bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
379 nvme_free_iod(dev, iod);
380 if (status) {
381 bio_endio(bio, -EIO);
382 } else if (bio->bi_vcnt > bio->bi_idx) {
383 requeue_bio(dev, bio);
384 } else {
385 bio_endio(bio, 0);
386 }
387}
388
389/* length is in bytes. gfp flags indicates whether we may sleep. */
390static int nvme_setup_prps(struct nvme_dev *dev,
391 struct nvme_common_command *cmd, struct nvme_iod *iod,
392 int total_len, gfp_t gfp)
393{
394 struct dma_pool *pool;
395 int length = total_len;
396 struct scatterlist *sg = iod->sg;
397 int dma_len = sg_dma_len(sg);
398 u64 dma_addr = sg_dma_address(sg);
399 int offset = offset_in_page(dma_addr);
400 __le64 *prp_list;
401 __le64 **list = iod_list(iod);
402 dma_addr_t prp_dma;
403 int nprps, i;
404
405 cmd->prp1 = cpu_to_le64(dma_addr);
406 length -= (PAGE_SIZE - offset);
407 if (length <= 0)
408 return total_len;
409
410 dma_len -= (PAGE_SIZE - offset);
411 if (dma_len) {
412 dma_addr += (PAGE_SIZE - offset);
413 } else {
414 sg = sg_next(sg);
415 dma_addr = sg_dma_address(sg);
416 dma_len = sg_dma_len(sg);
417 }
418
419 if (length <= PAGE_SIZE) {
420 cmd->prp2 = cpu_to_le64(dma_addr);
421 return total_len;
422 }
423
424 nprps = DIV_ROUND_UP(length, PAGE_SIZE);
425 if (nprps <= (256 / 8)) {
426 pool = dev->prp_small_pool;
427 iod->npages = 0;
428 } else {
429 pool = dev->prp_page_pool;
430 iod->npages = 1;
431 }
432
433 prp_list = dma_pool_alloc(pool, gfp, &prp_dma);
434 if (!prp_list) {
435 cmd->prp2 = cpu_to_le64(dma_addr);
436 iod->npages = -1;
437 return (total_len - length) + PAGE_SIZE;
438 }
439 list[0] = prp_list;
440 iod->first_dma = prp_dma;
441 cmd->prp2 = cpu_to_le64(prp_dma);
442 i = 0;
443 for (;;) {
444 if (i == PAGE_SIZE / 8) {
445 __le64 *old_prp_list = prp_list;
446 prp_list = dma_pool_alloc(pool, gfp, &prp_dma);
447 if (!prp_list)
448 return total_len - length;
449 list[iod->npages++] = prp_list;
450 prp_list[0] = old_prp_list[i - 1];
451 old_prp_list[i - 1] = cpu_to_le64(prp_dma);
452 i = 1;
453 }
454 prp_list[i++] = cpu_to_le64(dma_addr);
455 dma_len -= PAGE_SIZE;
456 dma_addr += PAGE_SIZE;
457 length -= PAGE_SIZE;
458 if (length <= 0)
459 break;
460 if (dma_len > 0)
461 continue;
462 BUG_ON(dma_len < 0);
463 sg = sg_next(sg);
464 dma_addr = sg_dma_address(sg);
465 dma_len = sg_dma_len(sg);
466 }
467
468 return total_len;
469}
470
471/* NVMe scatterlists require no holes in the virtual address */
472#define BIOVEC_NOT_VIRT_MERGEABLE(vec1, vec2) ((vec2)->bv_offset || \
473 (((vec1)->bv_offset + (vec1)->bv_len) % PAGE_SIZE))
474
475static int nvme_map_bio(struct device *dev, struct nvme_iod *iod,
476 struct bio *bio, enum dma_data_direction dma_dir, int psegs)
477{
478 struct bio_vec *bvec, *bvprv = NULL;
479 struct scatterlist *sg = NULL;
480 int i, old_idx, length = 0, nsegs = 0;
481
482 sg_init_table(iod->sg, psegs);
483 old_idx = bio->bi_idx;
484 bio_for_each_segment(bvec, bio, i) {
485 if (bvprv && BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) {
486 sg->length += bvec->bv_len;
487 } else {
488 if (bvprv && BIOVEC_NOT_VIRT_MERGEABLE(bvprv, bvec))
489 break;
490 sg = sg ? sg + 1 : iod->sg;
491 sg_set_page(sg, bvec->bv_page, bvec->bv_len,
492 bvec->bv_offset);
493 nsegs++;
494 }
495 length += bvec->bv_len;
496 bvprv = bvec;
497 }
498 bio->bi_idx = i;
499 iod->nents = nsegs;
500 sg_mark_end(sg);
501 if (dma_map_sg(dev, iod->sg, iod->nents, dma_dir) == 0) {
502 bio->bi_idx = old_idx;
503 return -ENOMEM;
504 }
505 return length;
506}
507
508static int nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns,
509 int cmdid)
510{
511 struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail];
512
513 memset(cmnd, 0, sizeof(*cmnd));
514 cmnd->common.opcode = nvme_cmd_flush;
515 cmnd->common.command_id = cmdid;
516 cmnd->common.nsid = cpu_to_le32(ns->ns_id);
517
518 if (++nvmeq->sq_tail == nvmeq->q_depth)
519 nvmeq->sq_tail = 0;
520 writel(nvmeq->sq_tail, nvmeq->q_db);
521
522 return 0;
523}
524
525static int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns)
526{
527 int cmdid = alloc_cmdid(nvmeq, (void *)CMD_CTX_FLUSH,
528 special_completion, NVME_IO_TIMEOUT);
529 if (unlikely(cmdid < 0))
530 return cmdid;
531
532 return nvme_submit_flush(nvmeq, ns, cmdid);
533}
534
535/*
536 * Called with local interrupts disabled and the q_lock held. May not sleep.
537 */
538static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
539 struct bio *bio)
540{
541 struct nvme_command *cmnd;
542 struct nvme_iod *iod;
543 enum dma_data_direction dma_dir;
544 int cmdid, length, result = -ENOMEM;
545 u16 control;
546 u32 dsmgmt;
547 int psegs = bio_phys_segments(ns->queue, bio);
548
549 if ((bio->bi_rw & REQ_FLUSH) && psegs) {
550 result = nvme_submit_flush_data(nvmeq, ns);
551 if (result)
552 return result;
553 }
554
555 iod = nvme_alloc_iod(psegs, bio->bi_size, GFP_ATOMIC);
556 if (!iod)
557 goto nomem;
558 iod->private = bio;
559
560 result = -EBUSY;
561 cmdid = alloc_cmdid(nvmeq, iod, bio_completion, NVME_IO_TIMEOUT);
562 if (unlikely(cmdid < 0))
563 goto free_iod;
564
565 if ((bio->bi_rw & REQ_FLUSH) && !psegs)
566 return nvme_submit_flush(nvmeq, ns, cmdid);
567
568 control = 0;
569 if (bio->bi_rw & REQ_FUA)
570 control |= NVME_RW_FUA;
571 if (bio->bi_rw & (REQ_FAILFAST_DEV | REQ_RAHEAD))
572 control |= NVME_RW_LR;
573
574 dsmgmt = 0;
575 if (bio->bi_rw & REQ_RAHEAD)
576 dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
577
578 cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail];
579
580 memset(cmnd, 0, sizeof(*cmnd));
581 if (bio_data_dir(bio)) {
582 cmnd->rw.opcode = nvme_cmd_write;
583 dma_dir = DMA_TO_DEVICE;
584 } else {
585 cmnd->rw.opcode = nvme_cmd_read;
586 dma_dir = DMA_FROM_DEVICE;
587 }
588
589 result = nvme_map_bio(nvmeq->q_dmadev, iod, bio, dma_dir, psegs);
590 if (result < 0)
591 goto free_iod;
592 length = result;
593
594 cmnd->rw.command_id = cmdid;
595 cmnd->rw.nsid = cpu_to_le32(ns->ns_id);
596 length = nvme_setup_prps(nvmeq->dev, &cmnd->common, iod, length,
597 GFP_ATOMIC);
598 cmnd->rw.slba = cpu_to_le64(bio->bi_sector >> (ns->lba_shift - 9));
599 cmnd->rw.length = cpu_to_le16((length >> ns->lba_shift) - 1);
600 cmnd->rw.control = cpu_to_le16(control);
601 cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
602
603 bio->bi_sector += length >> 9;
604
605 if (++nvmeq->sq_tail == nvmeq->q_depth)
606 nvmeq->sq_tail = 0;
607 writel(nvmeq->sq_tail, nvmeq->q_db);
608
609 return 0;
610
611 free_iod:
612 nvme_free_iod(nvmeq->dev, iod);
613 nomem:
614 return result;
615}
616
617static void nvme_make_request(struct request_queue *q, struct bio *bio)
618{
619 struct nvme_ns *ns = q->queuedata;
620 struct nvme_queue *nvmeq = get_nvmeq(ns->dev);
621 int result = -EBUSY;
622
623 spin_lock_irq(&nvmeq->q_lock);
624 if (bio_list_empty(&nvmeq->sq_cong))
625 result = nvme_submit_bio_queue(nvmeq, ns, bio);
626 if (unlikely(result)) {
627 if (bio_list_empty(&nvmeq->sq_cong))
628 add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
629 bio_list_add(&nvmeq->sq_cong, bio);
630 }
631
632 spin_unlock_irq(&nvmeq->q_lock);
633 put_nvmeq(nvmeq);
634}
635
636static irqreturn_t nvme_process_cq(struct nvme_queue *nvmeq)
637{
638 u16 head, phase;
639
640 head = nvmeq->cq_head;
641 phase = nvmeq->cq_phase;
642
643 for (;;) {
644 void *ctx;
645 nvme_completion_fn fn;
646 struct nvme_completion cqe = nvmeq->cqes[head];
647 if ((le16_to_cpu(cqe.status) & 1) != phase)
648 break;
649 nvmeq->sq_head = le16_to_cpu(cqe.sq_head);
650 if (++head == nvmeq->q_depth) {
651 head = 0;
652 phase = !phase;
653 }
654
655 ctx = free_cmdid(nvmeq, cqe.command_id, &fn);
656 fn(nvmeq->dev, ctx, &cqe);
657 }
658
659 /* If the controller ignores the cq head doorbell and continuously
660 * writes to the queue, it is theoretically possible to wrap around
661 * the queue twice and mistakenly return IRQ_NONE. Linux only
662 * requires that 0.1% of your interrupts are handled, so this isn't
663 * a big problem.
664 */
665 if (head == nvmeq->cq_head && phase == nvmeq->cq_phase)
666 return IRQ_NONE;
667
668 writel(head, nvmeq->q_db + (1 << nvmeq->dev->db_stride));
669 nvmeq->cq_head = head;
670 nvmeq->cq_phase = phase;
671
672 return IRQ_HANDLED;
673}
674
675static irqreturn_t nvme_irq(int irq, void *data)
676{
677 irqreturn_t result;
678 struct nvme_queue *nvmeq = data;
679 spin_lock(&nvmeq->q_lock);
680 result = nvme_process_cq(nvmeq);
681 spin_unlock(&nvmeq->q_lock);
682 return result;
683}
684
685static irqreturn_t nvme_irq_check(int irq, void *data)
686{
687 struct nvme_queue *nvmeq = data;
688 struct nvme_completion cqe = nvmeq->cqes[nvmeq->cq_head];
689 if ((le16_to_cpu(cqe.status) & 1) != nvmeq->cq_phase)
690 return IRQ_NONE;
691 return IRQ_WAKE_THREAD;
692}
693
694static void nvme_abort_command(struct nvme_queue *nvmeq, int cmdid)
695{
696 spin_lock_irq(&nvmeq->q_lock);
697 cancel_cmdid(nvmeq, cmdid, NULL);
698 spin_unlock_irq(&nvmeq->q_lock);
699}
700
701struct sync_cmd_info {
702 struct task_struct *task;
703 u32 result;
704 int status;
705};
706
707static void sync_completion(struct nvme_dev *dev, void *ctx,
708 struct nvme_completion *cqe)
709{
710 struct sync_cmd_info *cmdinfo = ctx;
711 cmdinfo->result = le32_to_cpup(&cqe->result);
712 cmdinfo->status = le16_to_cpup(&cqe->status) >> 1;
713 wake_up_process(cmdinfo->task);
714}
715
716/*
717 * Returns 0 on success. If the result is negative, it's a Linux error code;
718 * if the result is positive, it's an NVM Express status code
719 */
720static int nvme_submit_sync_cmd(struct nvme_queue *nvmeq,
721 struct nvme_command *cmd, u32 *result, unsigned timeout)
722{
723 int cmdid;
724 struct sync_cmd_info cmdinfo;
725
726 cmdinfo.task = current;
727 cmdinfo.status = -EINTR;
728
729 cmdid = alloc_cmdid_killable(nvmeq, &cmdinfo, sync_completion,
730 timeout);
731 if (cmdid < 0)
732 return cmdid;
733 cmd->common.command_id = cmdid;
734
735 set_current_state(TASK_KILLABLE);
736 nvme_submit_cmd(nvmeq, cmd);
737 schedule();
738
739 if (cmdinfo.status == -EINTR) {
740 nvme_abort_command(nvmeq, cmdid);
741 return -EINTR;
742 }
743
744 if (result)
745 *result = cmdinfo.result;
746
747 return cmdinfo.status;
748}
749
750static int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd,
751 u32 *result)
752{
753 return nvme_submit_sync_cmd(dev->queues[0], cmd, result, ADMIN_TIMEOUT);
754}
755
756static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
757{
758 int status;
759 struct nvme_command c;
760
761 memset(&c, 0, sizeof(c));
762 c.delete_queue.opcode = opcode;
763 c.delete_queue.qid = cpu_to_le16(id);
764
765 status = nvme_submit_admin_cmd(dev, &c, NULL);
766 if (status)
767 return -EIO;
768 return 0;
769}
770
771static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
772 struct nvme_queue *nvmeq)
773{
774 int status;
775 struct nvme_command c;
776 int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED;
777
778 memset(&c, 0, sizeof(c));
779 c.create_cq.opcode = nvme_admin_create_cq;
780 c.create_cq.prp1 = cpu_to_le64(nvmeq->cq_dma_addr);
781 c.create_cq.cqid = cpu_to_le16(qid);
782 c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
783 c.create_cq.cq_flags = cpu_to_le16(flags);
784 c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector);
785
786 status = nvme_submit_admin_cmd(dev, &c, NULL);
787 if (status)
788 return -EIO;
789 return 0;
790}
791
792static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
793 struct nvme_queue *nvmeq)
794{
795 int status;
796 struct nvme_command c;
797 int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM;
798
799 memset(&c, 0, sizeof(c));
800 c.create_sq.opcode = nvme_admin_create_sq;
801 c.create_sq.prp1 = cpu_to_le64(nvmeq->sq_dma_addr);
802 c.create_sq.sqid = cpu_to_le16(qid);
803 c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
804 c.create_sq.sq_flags = cpu_to_le16(flags);
805 c.create_sq.cqid = cpu_to_le16(qid);
806
807 status = nvme_submit_admin_cmd(dev, &c, NULL);
808 if (status)
809 return -EIO;
810 return 0;
811}
812
813static int adapter_delete_cq(struct nvme_dev *dev, u16 cqid)
814{
815 return adapter_delete_queue(dev, nvme_admin_delete_cq, cqid);
816}
817
818static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid)
819{
820 return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid);
821}
822
823static int nvme_identify(struct nvme_dev *dev, unsigned nsid, unsigned cns,
824 dma_addr_t dma_addr)
825{
826 struct nvme_command c;
827
828 memset(&c, 0, sizeof(c));
829 c.identify.opcode = nvme_admin_identify;
830 c.identify.nsid = cpu_to_le32(nsid);
831 c.identify.prp1 = cpu_to_le64(dma_addr);
832 c.identify.cns = cpu_to_le32(cns);
833
834 return nvme_submit_admin_cmd(dev, &c, NULL);
835}
836
837static int nvme_get_features(struct nvme_dev *dev, unsigned fid,
838 unsigned dword11, dma_addr_t dma_addr)
839{
840 struct nvme_command c;
841
842 memset(&c, 0, sizeof(c));
843 c.features.opcode = nvme_admin_get_features;
844 c.features.prp1 = cpu_to_le64(dma_addr);
845 c.features.fid = cpu_to_le32(fid);
846 c.features.dword11 = cpu_to_le32(dword11);
847
848 return nvme_submit_admin_cmd(dev, &c, NULL);
849}
850
851static int nvme_set_features(struct nvme_dev *dev, unsigned fid,
852 unsigned dword11, dma_addr_t dma_addr, u32 *result)
853{
854 struct nvme_command c;
855
856 memset(&c, 0, sizeof(c));
857 c.features.opcode = nvme_admin_set_features;
858 c.features.prp1 = cpu_to_le64(dma_addr);
859 c.features.fid = cpu_to_le32(fid);
860 c.features.dword11 = cpu_to_le32(dword11);
861
862 return nvme_submit_admin_cmd(dev, &c, result);
863}
864
865static void nvme_free_queue(struct nvme_dev *dev, int qid)
866{
867 struct nvme_queue *nvmeq = dev->queues[qid];
868 int vector = dev->entry[nvmeq->cq_vector].vector;
869
870 irq_set_affinity_hint(vector, NULL);
871 free_irq(vector, nvmeq);
872
873 /* Don't tell the adapter to delete the admin queue */
874 if (qid) {
875 adapter_delete_sq(dev, qid);
876 adapter_delete_cq(dev, qid);
877 }
878
879 dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
880 (void *)nvmeq->cqes, nvmeq->cq_dma_addr);
881 dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
882 nvmeq->sq_cmds, nvmeq->sq_dma_addr);
883 kfree(nvmeq);
884}
885
886static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
887 int depth, int vector)
888{
889 struct device *dmadev = &dev->pci_dev->dev;
890 unsigned extra = (depth / 8) + (depth * sizeof(struct nvme_cmd_info));
891 struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL);
892 if (!nvmeq)
893 return NULL;
894
895 nvmeq->cqes = dma_alloc_coherent(dmadev, CQ_SIZE(depth),
896 &nvmeq->cq_dma_addr, GFP_KERNEL);
897 if (!nvmeq->cqes)
898 goto free_nvmeq;
899 memset((void *)nvmeq->cqes, 0, CQ_SIZE(depth));
900
901 nvmeq->sq_cmds = dma_alloc_coherent(dmadev, SQ_SIZE(depth),
902 &nvmeq->sq_dma_addr, GFP_KERNEL);
903 if (!nvmeq->sq_cmds)
904 goto free_cqdma;
905
906 nvmeq->q_dmadev = dmadev;
907 nvmeq->dev = dev;
908 spin_lock_init(&nvmeq->q_lock);
909 nvmeq->cq_head = 0;
910 nvmeq->cq_phase = 1;
911 init_waitqueue_head(&nvmeq->sq_full);
912 init_waitqueue_entry(&nvmeq->sq_cong_wait, nvme_thread);
913 bio_list_init(&nvmeq->sq_cong);
914 nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)];
915 nvmeq->q_depth = depth;
916 nvmeq->cq_vector = vector;
917
918 return nvmeq;
919
920 free_cqdma:
921 dma_free_coherent(dmadev, CQ_SIZE(nvmeq->q_depth), (void *)nvmeq->cqes,
922 nvmeq->cq_dma_addr);
923 free_nvmeq:
924 kfree(nvmeq);
925 return NULL;
926}
927
928static int queue_request_irq(struct nvme_dev *dev, struct nvme_queue *nvmeq,
929 const char *name)
930{
931 if (use_threaded_interrupts)
932 return request_threaded_irq(dev->entry[nvmeq->cq_vector].vector,
933 nvme_irq_check, nvme_irq,
934 IRQF_DISABLED | IRQF_SHARED,
935 name, nvmeq);
936 return request_irq(dev->entry[nvmeq->cq_vector].vector, nvme_irq,
937 IRQF_DISABLED | IRQF_SHARED, name, nvmeq);
938}
939
940static __devinit struct nvme_queue *nvme_create_queue(struct nvme_dev *dev,
941 int qid, int cq_size, int vector)
942{
943 int result;
944 struct nvme_queue *nvmeq = nvme_alloc_queue(dev, qid, cq_size, vector);
945
946 if (!nvmeq)
947 return ERR_PTR(-ENOMEM);
948
949 result = adapter_alloc_cq(dev, qid, nvmeq);
950 if (result < 0)
951 goto free_nvmeq;
952
953 result = adapter_alloc_sq(dev, qid, nvmeq);
954 if (result < 0)
955 goto release_cq;
956
957 result = queue_request_irq(dev, nvmeq, "nvme");
958 if (result < 0)
959 goto release_sq;
960
961 return nvmeq;
962
963 release_sq:
964 adapter_delete_sq(dev, qid);
965 release_cq:
966 adapter_delete_cq(dev, qid);
967 free_nvmeq:
968 dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
969 (void *)nvmeq->cqes, nvmeq->cq_dma_addr);
970 dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
971 nvmeq->sq_cmds, nvmeq->sq_dma_addr);
972 kfree(nvmeq);
973 return ERR_PTR(result);
974}
975
976static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev)
977{
978 int result;
979 u32 aqa;
980 u64 cap;
981 unsigned long timeout;
982 struct nvme_queue *nvmeq;
983
984 dev->dbs = ((void __iomem *)dev->bar) + 4096;
985
986 nvmeq = nvme_alloc_queue(dev, 0, 64, 0);
987 if (!nvmeq)
988 return -ENOMEM;
989
990 aqa = nvmeq->q_depth - 1;
991 aqa |= aqa << 16;
992
993 dev->ctrl_config = NVME_CC_ENABLE | NVME_CC_CSS_NVM;
994 dev->ctrl_config |= (PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;
995 dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
996 dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
997
998 writel(0, &dev->bar->cc);
999 writel(aqa, &dev->bar->aqa);
1000 writeq(nvmeq->sq_dma_addr, &dev->bar->asq);
1001 writeq(nvmeq->cq_dma_addr, &dev->bar->acq);
1002 writel(dev->ctrl_config, &dev->bar->cc);
1003
1004 cap = readq(&dev->bar->cap);
1005 timeout = ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
1006 dev->db_stride = NVME_CAP_STRIDE(cap);
1007
1008 while (!(readl(&dev->bar->csts) & NVME_CSTS_RDY)) {
1009 msleep(100);
1010 if (fatal_signal_pending(current))
1011 return -EINTR;
1012 if (time_after(jiffies, timeout)) {
1013 dev_err(&dev->pci_dev->dev,
1014 "Device not ready; aborting initialisation\n");
1015 return -ENODEV;
1016 }
1017 }
1018
1019 result = queue_request_irq(dev, nvmeq, "nvme admin");
1020 dev->queues[0] = nvmeq;
1021 return result;
1022}
1023
1024static struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
1025 unsigned long addr, unsigned length)
1026{
1027 int i, err, count, nents, offset;
1028 struct scatterlist *sg;
1029 struct page **pages;
1030 struct nvme_iod *iod;
1031
1032 if (addr & 3)
1033 return ERR_PTR(-EINVAL);
1034 if (!length)
1035 return ERR_PTR(-EINVAL);
1036
1037 offset = offset_in_page(addr);
1038 count = DIV_ROUND_UP(offset + length, PAGE_SIZE);
1039 pages = kcalloc(count, sizeof(*pages), GFP_KERNEL);
1040
1041 err = get_user_pages_fast(addr, count, 1, pages);
1042 if (err < count) {
1043 count = err;
1044 err = -EFAULT;
1045 goto put_pages;
1046 }
1047
1048 iod = nvme_alloc_iod(count, length, GFP_KERNEL);
1049 sg = iod->sg;
1050 sg_init_table(sg, count);
1051 for (i = 0; i < count; i++) {
1052 sg_set_page(&sg[i], pages[i],
1053 min_t(int, length, PAGE_SIZE - offset), offset);
1054 length -= (PAGE_SIZE - offset);
1055 offset = 0;
1056 }
1057 sg_mark_end(&sg[i - 1]);
1058 iod->nents = count;
1059
1060 err = -ENOMEM;
1061 nents = dma_map_sg(&dev->pci_dev->dev, sg, count,
1062 write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
1063 if (!nents)
1064 goto free_iod;
1065
1066 kfree(pages);
1067 return iod;
1068
1069 free_iod:
1070 kfree(iod);
1071 put_pages:
1072 for (i = 0; i < count; i++)
1073 put_page(pages[i]);
1074 kfree(pages);
1075 return ERR_PTR(err);
1076}
1077
1078static void nvme_unmap_user_pages(struct nvme_dev *dev, int write,
1079 struct nvme_iod *iod)
1080{
1081 int i;
1082
1083 dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents,
1084 write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
1085
1086 for (i = 0; i < iod->nents; i++)
1087 put_page(sg_page(&iod->sg[i]));
1088}
1089
1090static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
1091{
1092 struct nvme_dev *dev = ns->dev;
1093 struct nvme_queue *nvmeq;
1094 struct nvme_user_io io;
1095 struct nvme_command c;
1096 unsigned length;
1097 int status;
1098 struct nvme_iod *iod;
1099
1100 if (copy_from_user(&io, uio, sizeof(io)))
1101 return -EFAULT;
1102 length = (io.nblocks + 1) << ns->lba_shift;
1103
1104 switch (io.opcode) {
1105 case nvme_cmd_write:
1106 case nvme_cmd_read:
1107 case nvme_cmd_compare:
1108 iod = nvme_map_user_pages(dev, io.opcode & 1, io.addr, length);
1109 break;
1110 default:
1111 return -EINVAL;
1112 }
1113
1114 if (IS_ERR(iod))
1115 return PTR_ERR(iod);
1116
1117 memset(&c, 0, sizeof(c));
1118 c.rw.opcode = io.opcode;
1119 c.rw.flags = io.flags;
1120 c.rw.nsid = cpu_to_le32(ns->ns_id);
1121 c.rw.slba = cpu_to_le64(io.slba);
1122 c.rw.length = cpu_to_le16(io.nblocks);
1123 c.rw.control = cpu_to_le16(io.control);
1124 c.rw.dsmgmt = cpu_to_le16(io.dsmgmt);
1125 c.rw.reftag = io.reftag;
1126 c.rw.apptag = io.apptag;
1127 c.rw.appmask = io.appmask;
1128 /* XXX: metadata */
1129 length = nvme_setup_prps(dev, &c.common, iod, length, GFP_KERNEL);
1130
1131 nvmeq = get_nvmeq(dev);
1132 /*
1133 * Since nvme_submit_sync_cmd sleeps, we can't keep preemption
1134 * disabled. We may be preempted at any point, and be rescheduled
1135 * to a different CPU. That will cause cacheline bouncing, but no
1136 * additional races since q_lock already protects against other CPUs.
1137 */
1138 put_nvmeq(nvmeq);
1139 if (length != (io.nblocks + 1) << ns->lba_shift)
1140 status = -ENOMEM;
1141 else
1142 status = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT);
1143
1144 nvme_unmap_user_pages(dev, io.opcode & 1, iod);
1145 nvme_free_iod(dev, iod);
1146 return status;
1147}
1148
1149static int nvme_user_admin_cmd(struct nvme_ns *ns,
1150 struct nvme_admin_cmd __user *ucmd)
1151{
1152 struct nvme_dev *dev = ns->dev;
1153 struct nvme_admin_cmd cmd;
1154 struct nvme_command c;
1155 int status, length;
1156 struct nvme_iod *iod;
1157
1158 if (!capable(CAP_SYS_ADMIN))
1159 return -EACCES;
1160 if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
1161 return -EFAULT;
1162
1163 memset(&c, 0, sizeof(c));
1164 c.common.opcode = cmd.opcode;
1165 c.common.flags = cmd.flags;
1166 c.common.nsid = cpu_to_le32(cmd.nsid);
1167 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
1168 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
1169 c.common.cdw10[0] = cpu_to_le32(cmd.cdw10);
1170 c.common.cdw10[1] = cpu_to_le32(cmd.cdw11);
1171 c.common.cdw10[2] = cpu_to_le32(cmd.cdw12);
1172 c.common.cdw10[3] = cpu_to_le32(cmd.cdw13);
1173 c.common.cdw10[4] = cpu_to_le32(cmd.cdw14);
1174 c.common.cdw10[5] = cpu_to_le32(cmd.cdw15);
1175
1176 length = cmd.data_len;
1177 if (cmd.data_len) {
1178 iod = nvme_map_user_pages(dev, cmd.opcode & 1, cmd.addr,
1179 length);
1180 if (IS_ERR(iod))
1181 return PTR_ERR(iod);
1182 length = nvme_setup_prps(dev, &c.common, iod, length,
1183 GFP_KERNEL);
1184 }
1185
1186 if (length != cmd.data_len)
1187 status = -ENOMEM;
1188 else
1189 status = nvme_submit_admin_cmd(dev, &c, NULL);
1190
1191 if (cmd.data_len) {
1192 nvme_unmap_user_pages(dev, cmd.opcode & 1, iod);
1193 nvme_free_iod(dev, iod);
1194 }
1195 return status;
1196}
1197
1198static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
1199 unsigned long arg)
1200{
1201 struct nvme_ns *ns = bdev->bd_disk->private_data;
1202
1203 switch (cmd) {
1204 case NVME_IOCTL_ID:
1205 return ns->ns_id;
1206 case NVME_IOCTL_ADMIN_CMD:
1207 return nvme_user_admin_cmd(ns, (void __user *)arg);
1208 case NVME_IOCTL_SUBMIT_IO:
1209 return nvme_submit_io(ns, (void __user *)arg);
1210 default:
1211 return -ENOTTY;
1212 }
1213}
1214
1215static const struct block_device_operations nvme_fops = {
1216 .owner = THIS_MODULE,
1217 .ioctl = nvme_ioctl,
1218 .compat_ioctl = nvme_ioctl,
1219};
1220
1221static void nvme_timeout_ios(struct nvme_queue *nvmeq)
1222{
1223 int depth = nvmeq->q_depth - 1;
1224 struct nvme_cmd_info *info = nvme_cmd_info(nvmeq);
1225 unsigned long now = jiffies;
1226 int cmdid;
1227
1228 for_each_set_bit(cmdid, nvmeq->cmdid_data, depth) {
1229 void *ctx;
1230 nvme_completion_fn fn;
1231 static struct nvme_completion cqe = { .status = cpu_to_le16(NVME_SC_ABORT_REQ) << 1, };
1232
1233 if (!time_after(now, info[cmdid].timeout))
1234 continue;
1235 dev_warn(nvmeq->q_dmadev, "Timing out I/O %d\n", cmdid);
1236 ctx = cancel_cmdid(nvmeq, cmdid, &fn);
1237 fn(nvmeq->dev, ctx, &cqe);
1238 }
1239}
1240
1241static void nvme_resubmit_bios(struct nvme_queue *nvmeq)
1242{
1243 while (bio_list_peek(&nvmeq->sq_cong)) {
1244 struct bio *bio = bio_list_pop(&nvmeq->sq_cong);
1245 struct nvme_ns *ns = bio->bi_bdev->bd_disk->private_data;
1246 if (nvme_submit_bio_queue(nvmeq, ns, bio)) {
1247 bio_list_add_head(&nvmeq->sq_cong, bio);
1248 break;
1249 }
1250 if (bio_list_empty(&nvmeq->sq_cong))
1251 remove_wait_queue(&nvmeq->sq_full,
1252 &nvmeq->sq_cong_wait);
1253 }
1254}
1255
1256static int nvme_kthread(void *data)
1257{
1258 struct nvme_dev *dev;
1259
1260 while (!kthread_should_stop()) {
1261 __set_current_state(TASK_RUNNING);
1262 spin_lock(&dev_list_lock);
1263 list_for_each_entry(dev, &dev_list, node) {
1264 int i;
1265 for (i = 0; i < dev->queue_count; i++) {
1266 struct nvme_queue *nvmeq = dev->queues[i];
1267 if (!nvmeq)
1268 continue;
1269 spin_lock_irq(&nvmeq->q_lock);
1270 if (nvme_process_cq(nvmeq))
1271 printk("process_cq did something\n");
1272 nvme_timeout_ios(nvmeq);
1273 nvme_resubmit_bios(nvmeq);
1274 spin_unlock_irq(&nvmeq->q_lock);
1275 }
1276 }
1277 spin_unlock(&dev_list_lock);
1278 set_current_state(TASK_INTERRUPTIBLE);
1279 schedule_timeout(HZ);
1280 }
1281 return 0;
1282}
1283
1284static DEFINE_IDA(nvme_index_ida);
1285
1286static int nvme_get_ns_idx(void)
1287{
1288 int index, error;
1289
1290 do {
1291 if (!ida_pre_get(&nvme_index_ida, GFP_KERNEL))
1292 return -1;
1293
1294 spin_lock(&dev_list_lock);
1295 error = ida_get_new(&nvme_index_ida, &index);
1296 spin_unlock(&dev_list_lock);
1297 } while (error == -EAGAIN);
1298
1299 if (error)
1300 index = -1;
1301 return index;
1302}
1303
1304static void nvme_put_ns_idx(int index)
1305{
1306 spin_lock(&dev_list_lock);
1307 ida_remove(&nvme_index_ida, index);
1308 spin_unlock(&dev_list_lock);
1309}
1310
1311static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, int nsid,
1312 struct nvme_id_ns *id, struct nvme_lba_range_type *rt)
1313{
1314 struct nvme_ns *ns;
1315 struct gendisk *disk;
1316 int lbaf;
1317
1318 if (rt->attributes & NVME_LBART_ATTRIB_HIDE)
1319 return NULL;
1320
1321 ns = kzalloc(sizeof(*ns), GFP_KERNEL);
1322 if (!ns)
1323 return NULL;
1324 ns->queue = blk_alloc_queue(GFP_KERNEL);
1325 if (!ns->queue)
1326 goto out_free_ns;
1327 ns->queue->queue_flags = QUEUE_FLAG_DEFAULT;
1328 queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, ns->queue);
1329 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue);
1330/* queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); */
1331 blk_queue_make_request(ns->queue, nvme_make_request);
1332 ns->dev = dev;
1333 ns->queue->queuedata = ns;
1334
1335 disk = alloc_disk(NVME_MINORS);
1336 if (!disk)
1337 goto out_free_queue;
1338 ns->ns_id = nsid;
1339 ns->disk = disk;
1340 lbaf = id->flbas & 0xf;
1341 ns->lba_shift = id->lbaf[lbaf].ds;
1342
1343 disk->major = nvme_major;
1344 disk->minors = NVME_MINORS;
1345 disk->first_minor = NVME_MINORS * nvme_get_ns_idx();
1346 disk->fops = &nvme_fops;
1347 disk->private_data = ns;
1348 disk->queue = ns->queue;
1349 disk->driverfs_dev = &dev->pci_dev->dev;
1350 sprintf(disk->disk_name, "nvme%dn%d", dev->instance, nsid);
1351 set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
1352
1353 return ns;
1354
1355 out_free_queue:
1356 blk_cleanup_queue(ns->queue);
1357 out_free_ns:
1358 kfree(ns);
1359 return NULL;
1360}
1361
1362static void nvme_ns_free(struct nvme_ns *ns)
1363{
1364 int index = ns->disk->first_minor / NVME_MINORS;
1365 put_disk(ns->disk);
1366 nvme_put_ns_idx(index);
1367 blk_cleanup_queue(ns->queue);
1368 kfree(ns);
1369}
1370
1371static int set_queue_count(struct nvme_dev *dev, int count)
1372{
1373 int status;
1374 u32 result;
1375 u32 q_count = (count - 1) | ((count - 1) << 16);
1376
1377 status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES, q_count, 0,
1378 &result);
1379 if (status)
1380 return -EIO;
1381 return min(result & 0xffff, result >> 16) + 1;
1382}
1383
1384static int __devinit nvme_setup_io_queues(struct nvme_dev *dev)
1385{
1386 int result, cpu, i, nr_io_queues, db_bar_size;
1387
1388 nr_io_queues = num_online_cpus();
1389 result = set_queue_count(dev, nr_io_queues);
1390 if (result < 0)
1391 return result;
1392 if (result < nr_io_queues)
1393 nr_io_queues = result;
1394
1395 /* Deregister the admin queue's interrupt */
1396 free_irq(dev->entry[0].vector, dev->queues[0]);
1397
1398 db_bar_size = 4096 + ((nr_io_queues + 1) << (dev->db_stride + 3));
1399 if (db_bar_size > 8192) {
1400 iounmap(dev->bar);
1401 dev->bar = ioremap(pci_resource_start(dev->pci_dev, 0),
1402 db_bar_size);
1403 dev->dbs = ((void __iomem *)dev->bar) + 4096;
1404 dev->queues[0]->q_db = dev->dbs;
1405 }
1406
1407 for (i = 0; i < nr_io_queues; i++)
1408 dev->entry[i].entry = i;
1409 for (;;) {
1410 result = pci_enable_msix(dev->pci_dev, dev->entry,
1411 nr_io_queues);
1412 if (result == 0) {
1413 break;
1414 } else if (result > 0) {
1415 nr_io_queues = result;
1416 continue;
1417 } else {
1418 nr_io_queues = 1;
1419 break;
1420 }
1421 }
1422
1423 result = queue_request_irq(dev, dev->queues[0], "nvme admin");
1424 /* XXX: handle failure here */
1425
1426 cpu = cpumask_first(cpu_online_mask);
1427 for (i = 0; i < nr_io_queues; i++) {
1428 irq_set_affinity_hint(dev->entry[i].vector, get_cpu_mask(cpu));
1429 cpu = cpumask_next(cpu, cpu_online_mask);
1430 }
1431
1432 for (i = 0; i < nr_io_queues; i++) {
1433 dev->queues[i + 1] = nvme_create_queue(dev, i + 1,
1434 NVME_Q_DEPTH, i);
1435 if (IS_ERR(dev->queues[i + 1]))
1436 return PTR_ERR(dev->queues[i + 1]);
1437 dev->queue_count++;
1438 }
1439
1440 for (; i < num_possible_cpus(); i++) {
1441 int target = i % rounddown_pow_of_two(dev->queue_count - 1);
1442 dev->queues[i + 1] = dev->queues[target + 1];
1443 }
1444
1445 return 0;
1446}
1447
1448static void nvme_free_queues(struct nvme_dev *dev)
1449{
1450 int i;
1451
1452 for (i = dev->queue_count - 1; i >= 0; i--)
1453 nvme_free_queue(dev, i);
1454}
1455
1456static int __devinit nvme_dev_add(struct nvme_dev *dev)
1457{
1458 int res, nn, i;
1459 struct nvme_ns *ns, *next;
1460 struct nvme_id_ctrl *ctrl;
1461 struct nvme_id_ns *id_ns;
1462 void *mem;
1463 dma_addr_t dma_addr;
1464
1465 res = nvme_setup_io_queues(dev);
1466 if (res)
1467 return res;
1468
1469 mem = dma_alloc_coherent(&dev->pci_dev->dev, 8192, &dma_addr,
1470 GFP_KERNEL);
1471
1472 res = nvme_identify(dev, 0, 1, dma_addr);
1473 if (res) {
1474 res = -EIO;
1475 goto out_free;
1476 }
1477
1478 ctrl = mem;
1479 nn = le32_to_cpup(&ctrl->nn);
1480 memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn));
1481 memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn));
1482 memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
1483
1484 id_ns = mem;
1485 for (i = 1; i <= nn; i++) {
1486 res = nvme_identify(dev, i, 0, dma_addr);
1487 if (res)
1488 continue;
1489
1490 if (id_ns->ncap == 0)
1491 continue;
1492
1493 res = nvme_get_features(dev, NVME_FEAT_LBA_RANGE, i,
1494 dma_addr + 4096);
1495 if (res)
1496 continue;
1497
1498 ns = nvme_alloc_ns(dev, i, mem, mem + 4096);
1499 if (ns)
1500 list_add_tail(&ns->list, &dev->namespaces);
1501 }
1502 list_for_each_entry(ns, &dev->namespaces, list)
1503 add_disk(ns->disk);
1504
1505 goto out;
1506
1507 out_free:
1508 list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
1509 list_del(&ns->list);
1510 nvme_ns_free(ns);
1511 }
1512
1513 out:
1514 dma_free_coherent(&dev->pci_dev->dev, 8192, mem, dma_addr);
1515 return res;
1516}
1517
1518static int nvme_dev_remove(struct nvme_dev *dev)
1519{
1520 struct nvme_ns *ns, *next;
1521
1522 spin_lock(&dev_list_lock);
1523 list_del(&dev->node);
1524 spin_unlock(&dev_list_lock);
1525
1526 /* TODO: wait all I/O finished or cancel them */
1527
1528 list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
1529 list_del(&ns->list);
1530 del_gendisk(ns->disk);
1531 nvme_ns_free(ns);
1532 }
1533
1534 nvme_free_queues(dev);
1535
1536 return 0;
1537}
1538
1539static int nvme_setup_prp_pools(struct nvme_dev *dev)
1540{
1541 struct device *dmadev = &dev->pci_dev->dev;
1542 dev->prp_page_pool = dma_pool_create("prp list page", dmadev,
1543 PAGE_SIZE, PAGE_SIZE, 0);
1544 if (!dev->prp_page_pool)
1545 return -ENOMEM;
1546
1547 /* Optimisation for I/Os between 4k and 128k */
1548 dev->prp_small_pool = dma_pool_create("prp list 256", dmadev,
1549 256, 256, 0);
1550 if (!dev->prp_small_pool) {
1551 dma_pool_destroy(dev->prp_page_pool);
1552 return -ENOMEM;
1553 }
1554 return 0;
1555}
1556
1557static void nvme_release_prp_pools(struct nvme_dev *dev)
1558{
1559 dma_pool_destroy(dev->prp_page_pool);
1560 dma_pool_destroy(dev->prp_small_pool);
1561}
1562
1563/* XXX: Use an ida or something to let remove / add work correctly */
1564static void nvme_set_instance(struct nvme_dev *dev)
1565{
1566 static int instance;
1567 dev->instance = instance++;
1568}
1569
1570static void nvme_release_instance(struct nvme_dev *dev)
1571{
1572}
1573
1574static int __devinit nvme_probe(struct pci_dev *pdev,
1575 const struct pci_device_id *id)
1576{
1577 int bars, result = -ENOMEM;
1578 struct nvme_dev *dev;
1579
1580 dev = kzalloc(sizeof(*dev), GFP_KERNEL);
1581 if (!dev)
1582 return -ENOMEM;
1583 dev->entry = kcalloc(num_possible_cpus(), sizeof(*dev->entry),
1584 GFP_KERNEL);
1585 if (!dev->entry)
1586 goto free;
1587 dev->queues = kcalloc(num_possible_cpus() + 1, sizeof(void *),
1588 GFP_KERNEL);
1589 if (!dev->queues)
1590 goto free;
1591
1592 if (pci_enable_device_mem(pdev))
1593 goto free;
1594 pci_set_master(pdev);
1595 bars = pci_select_bars(pdev, IORESOURCE_MEM);
1596 if (pci_request_selected_regions(pdev, bars, "nvme"))
1597 goto disable;
1598
1599 INIT_LIST_HEAD(&dev->namespaces);
1600 dev->pci_dev = pdev;
1601 pci_set_drvdata(pdev, dev);
1602 dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1603 dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1604 nvme_set_instance(dev);
1605 dev->entry[0].vector = pdev->irq;
1606
1607 result = nvme_setup_prp_pools(dev);
1608 if (result)
1609 goto disable_msix;
1610
1611 dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
1612 if (!dev->bar) {
1613 result = -ENOMEM;
1614 goto disable_msix;
1615 }
1616
1617 result = nvme_configure_admin_queue(dev);
1618 if (result)
1619 goto unmap;
1620 dev->queue_count++;
1621
1622 spin_lock(&dev_list_lock);
1623 list_add(&dev->node, &dev_list);
1624 spin_unlock(&dev_list_lock);
1625
1626 result = nvme_dev_add(dev);
1627 if (result)
1628 goto delete;
1629
1630 return 0;
1631
1632 delete:
1633 spin_lock(&dev_list_lock);
1634 list_del(&dev->node);
1635 spin_unlock(&dev_list_lock);
1636
1637 nvme_free_queues(dev);
1638 unmap:
1639 iounmap(dev->bar);
1640 disable_msix:
1641 pci_disable_msix(pdev);
1642 nvme_release_instance(dev);
1643 nvme_release_prp_pools(dev);
1644 disable:
1645 pci_disable_device(pdev);
1646 pci_release_regions(pdev);
1647 free:
1648 kfree(dev->queues);
1649 kfree(dev->entry);
1650 kfree(dev);
1651 return result;
1652}
1653
1654static void __devexit nvme_remove(struct pci_dev *pdev)
1655{
1656 struct nvme_dev *dev = pci_get_drvdata(pdev);
1657 nvme_dev_remove(dev);
1658 pci_disable_msix(pdev);
1659 iounmap(dev->bar);
1660 nvme_release_instance(dev);
1661 nvme_release_prp_pools(dev);
1662 pci_disable_device(pdev);
1663 pci_release_regions(pdev);
1664 kfree(dev->queues);
1665 kfree(dev->entry);
1666 kfree(dev);
1667}
1668
1669/* These functions are yet to be implemented */
1670#define nvme_error_detected NULL
1671#define nvme_dump_registers NULL
1672#define nvme_link_reset NULL
1673#define nvme_slot_reset NULL
1674#define nvme_error_resume NULL
1675#define nvme_suspend NULL
1676#define nvme_resume NULL
1677
1678static struct pci_error_handlers nvme_err_handler = {
1679 .error_detected = nvme_error_detected,
1680 .mmio_enabled = nvme_dump_registers,
1681 .link_reset = nvme_link_reset,
1682 .slot_reset = nvme_slot_reset,
1683 .resume = nvme_error_resume,
1684};
1685
1686/* Move to pci_ids.h later */
1687#define PCI_CLASS_STORAGE_EXPRESS 0x010802
1688
1689static DEFINE_PCI_DEVICE_TABLE(nvme_id_table) = {
1690 { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
1691 { 0, }
1692};
1693MODULE_DEVICE_TABLE(pci, nvme_id_table);
1694
1695static struct pci_driver nvme_driver = {
1696 .name = "nvme",
1697 .id_table = nvme_id_table,
1698 .probe = nvme_probe,
1699 .remove = __devexit_p(nvme_remove),
1700 .suspend = nvme_suspend,
1701 .resume = nvme_resume,
1702 .err_handler = &nvme_err_handler,
1703};
1704
1705static int __init nvme_init(void)
1706{
1707 int result = -EBUSY;
1708
1709 nvme_thread = kthread_run(nvme_kthread, NULL, "nvme");
1710 if (IS_ERR(nvme_thread))
1711 return PTR_ERR(nvme_thread);
1712
1713 nvme_major = register_blkdev(nvme_major, "nvme");
1714 if (nvme_major <= 0)
1715 goto kill_kthread;
1716
1717 result = pci_register_driver(&nvme_driver);
1718 if (result)
1719 goto unregister_blkdev;
1720 return 0;
1721
1722 unregister_blkdev:
1723 unregister_blkdev(nvme_major, "nvme");
1724 kill_kthread:
1725 kthread_stop(nvme_thread);
1726 return result;
1727}
1728
1729static void __exit nvme_exit(void)
1730{
1731 pci_unregister_driver(&nvme_driver);
1732 unregister_blkdev(nvme_major, "nvme");
1733 kthread_stop(nvme_thread);
1734}
1735
1736MODULE_AUTHOR("Matthew Wilcox <willy@linux.intel.com>");
1737MODULE_LICENSE("GPL");
1738MODULE_VERSION("0.8");
1739module_init(nvme_init);
1740module_exit(nvme_exit);
diff --git a/drivers/block/paride/bpck6.c b/drivers/block/paride/bpck6.c
index ad124525ac23..ec64e7f5d1ce 100644
--- a/drivers/block/paride/bpck6.c
+++ b/drivers/block/paride/bpck6.c
@@ -20,9 +20,6 @@
20*/ 20*/
21 21
22 22
23/* PARAMETERS */
24static int verbose; /* set this to 1 to see debugging messages and whatnot */
25
26#define BACKPACK_VERSION "2.0.2" 23#define BACKPACK_VERSION "2.0.2"
27 24
28#include <linux/module.h> 25#include <linux/module.h>
@@ -36,6 +33,8 @@ static int verbose; /* set this to 1 to see debugging messages and whatnot */
36#include "ppc6lnx.c" 33#include "ppc6lnx.c"
37#include "paride.h" 34#include "paride.h"
38 35
36/* PARAMETERS */
37static bool verbose; /* set this to 1 to see debugging messages and whatnot */
39 38
40 39
41#define PPCSTRUCT(pi) ((Interface *)(pi->private)) 40#define PPCSTRUCT(pi) ((Interface *)(pi->private))
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 46b8136c31bb..ba2b6b5e5910 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -144,7 +144,7 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_SLV, D_DLY};
144static DEFINE_MUTEX(pcd_mutex); 144static DEFINE_MUTEX(pcd_mutex);
145static DEFINE_SPINLOCK(pcd_lock); 145static DEFINE_SPINLOCK(pcd_lock);
146 146
147module_param(verbose, bool, 0644); 147module_param(verbose, int, 0644);
148module_param(major, int, 0); 148module_param(major, int, 0);
149module_param(name, charp, 0); 149module_param(name, charp, 0);
150module_param(nice, int, 0); 150module_param(nice, int, 0);
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 869e7676d46f..831e3ac156e6 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -124,8 +124,9 @@
124 by default. 124 by default.
125 125
126*/ 126*/
127#include <linux/types.h>
127 128
128static int verbose = 0; 129static bool verbose = 0;
129static int major = PD_MAJOR; 130static int major = PD_MAJOR;
130static char *name = PD_NAME; 131static char *name = PD_NAME;
131static int cluster = 64; 132static int cluster = 64;
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index f21b520ef419..ec8f9ed6326e 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -118,13 +118,15 @@
118#define PF_NAME "pf" 118#define PF_NAME "pf"
119#define PF_UNITS 4 119#define PF_UNITS 4
120 120
121#include <linux/types.h>
122
121/* Here are things one can override from the insmod command. 123/* Here are things one can override from the insmod command.
122 Most are autoprobed by paride unless set here. Verbose is off 124 Most are autoprobed by paride unless set here. Verbose is off
123 by default. 125 by default.
124 126
125*/ 127*/
126 128
127static int verbose = 0; 129static bool verbose = 0;
128static int major = PF_MAJOR; 130static int major = PF_MAJOR;
129static char *name = PF_NAME; 131static char *name = PF_NAME;
130static int cluster = 64; 132static int cluster = 64;
diff --git a/drivers/block/paride/pg.c b/drivers/block/paride/pg.c
index a79fb4f7ff62..4a27b1de5fcb 100644
--- a/drivers/block/paride/pg.c
+++ b/drivers/block/paride/pg.c
@@ -130,13 +130,14 @@
130#define PI_PG 4 130#define PI_PG 4
131#endif 131#endif
132 132
133#include <linux/types.h>
133/* Here are things one can override from the insmod command. 134/* Here are things one can override from the insmod command.
134 Most are autoprobed by paride unless set here. Verbose is 0 135 Most are autoprobed by paride unless set here. Verbose is 0
135 by default. 136 by default.
136 137
137*/ 138*/
138 139
139static int verbose = 0; 140static bool verbose = 0;
140static int major = PG_MAJOR; 141static int major = PG_MAJOR;
141static char *name = PG_NAME; 142static char *name = PG_NAME;
142static int disable = 0; 143static int disable = 0;
diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c
index 7179f79d7468..2596042eb987 100644
--- a/drivers/block/paride/pt.c
+++ b/drivers/block/paride/pt.c
@@ -109,13 +109,15 @@
109#define PT_NAME "pt" 109#define PT_NAME "pt"
110#define PT_UNITS 4 110#define PT_UNITS 4
111 111
112#include <linux/types.h>
113
112/* Here are things one can override from the insmod command. 114/* Here are things one can override from the insmod command.
113 Most are autoprobed by paride unless set here. Verbose is on 115 Most are autoprobed by paride unless set here. Verbose is on
114 by default. 116 by default.
115 117
116*/ 118*/
117 119
118static int verbose = 0; 120static bool verbose = 0;
119static int major = PT_MAJOR; 121static int major = PT_MAJOR;
120static char *name = PT_NAME; 122static char *name = PT_NAME;
121static int disable = 0; 123static int disable = 0;
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index d59edeabd93f..ba66e4445f41 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -987,14 +987,14 @@ static void pkt_copy_bio_data(struct bio *src_bio, int seg, int offs, struct pag
987 987
988 while (copy_size > 0) { 988 while (copy_size > 0) {
989 struct bio_vec *src_bvl = bio_iovec_idx(src_bio, seg); 989 struct bio_vec *src_bvl = bio_iovec_idx(src_bio, seg);
990 void *vfrom = kmap_atomic(src_bvl->bv_page, KM_USER0) + 990 void *vfrom = kmap_atomic(src_bvl->bv_page) +
991 src_bvl->bv_offset + offs; 991 src_bvl->bv_offset + offs;
992 void *vto = page_address(dst_page) + dst_offs; 992 void *vto = page_address(dst_page) + dst_offs;
993 int len = min_t(int, copy_size, src_bvl->bv_len - offs); 993 int len = min_t(int, copy_size, src_bvl->bv_len - offs);
994 994
995 BUG_ON(len < 0); 995 BUG_ON(len < 0);
996 memcpy(vto, vfrom, len); 996 memcpy(vto, vfrom, len);
997 kunmap_atomic(vfrom, KM_USER0); 997 kunmap_atomic(vfrom);
998 998
999 seg++; 999 seg++;
1000 offs = 0; 1000 offs = 0;
@@ -1019,10 +1019,10 @@ static void pkt_make_local_copy(struct packet_data *pkt, struct bio_vec *bvec)
1019 offs = 0; 1019 offs = 0;
1020 for (f = 0; f < pkt->frames; f++) { 1020 for (f = 0; f < pkt->frames; f++) {
1021 if (bvec[f].bv_page != pkt->pages[p]) { 1021 if (bvec[f].bv_page != pkt->pages[p]) {
1022 void *vfrom = kmap_atomic(bvec[f].bv_page, KM_USER0) + bvec[f].bv_offset; 1022 void *vfrom = kmap_atomic(bvec[f].bv_page) + bvec[f].bv_offset;
1023 void *vto = page_address(pkt->pages[p]) + offs; 1023 void *vto = page_address(pkt->pages[p]) + offs;
1024 memcpy(vto, vfrom, CD_FRAMESIZE); 1024 memcpy(vto, vfrom, CD_FRAMESIZE);
1025 kunmap_atomic(vfrom, KM_USER0); 1025 kunmap_atomic(vfrom);
1026 bvec[f].bv_page = pkt->pages[p]; 1026 bvec[f].bv_page = pkt->pages[p];
1027 bvec[f].bv_offset = offs; 1027 bvec[f].bv_offset = offs;
1028 } else { 1028 } else {
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 148ab944378d..013c7a549fb6 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -41,19 +41,35 @@
41 41
42#include "rbd_types.h" 42#include "rbd_types.h"
43 43
44#define DRV_NAME "rbd" 44/*
45#define DRV_NAME_LONG "rbd (rados block device)" 45 * The basic unit of block I/O is a sector. It is interpreted in a
46 * number of contexts in Linux (blk, bio, genhd), but the default is
47 * universally 512 bytes. These symbols are just slightly more
48 * meaningful than the bare numbers they represent.
49 */
50#define SECTOR_SHIFT 9
51#define SECTOR_SIZE (1ULL << SECTOR_SHIFT)
52
53#define RBD_DRV_NAME "rbd"
54#define RBD_DRV_NAME_LONG "rbd (rados block device)"
46 55
47#define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */ 56#define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */
48 57
49#define RBD_MAX_MD_NAME_LEN (96 + sizeof(RBD_SUFFIX)) 58#define RBD_MAX_MD_NAME_LEN (RBD_MAX_OBJ_NAME_LEN + sizeof(RBD_SUFFIX))
50#define RBD_MAX_POOL_NAME_LEN 64 59#define RBD_MAX_POOL_NAME_LEN 64
51#define RBD_MAX_SNAP_NAME_LEN 32 60#define RBD_MAX_SNAP_NAME_LEN 32
52#define RBD_MAX_OPT_LEN 1024 61#define RBD_MAX_OPT_LEN 1024
53 62
54#define RBD_SNAP_HEAD_NAME "-" 63#define RBD_SNAP_HEAD_NAME "-"
55 64
65/*
66 * An RBD device name will be "rbd#", where the "rbd" comes from
67 * RBD_DRV_NAME above, and # is a unique integer identifier.
68 * MAX_INT_FORMAT_WIDTH is used in ensuring DEV_NAME_LEN is big
69 * enough to hold all possible device names.
70 */
56#define DEV_NAME_LEN 32 71#define DEV_NAME_LEN 32
72#define MAX_INT_FORMAT_WIDTH ((5 * sizeof (int)) / 2 + 1)
57 73
58#define RBD_NOTIFY_TIMEOUT_DEFAULT 10 74#define RBD_NOTIFY_TIMEOUT_DEFAULT 10
59 75
@@ -66,7 +82,6 @@ struct rbd_image_header {
66 __u8 obj_order; 82 __u8 obj_order;
67 __u8 crypt_type; 83 __u8 crypt_type;
68 __u8 comp_type; 84 __u8 comp_type;
69 struct rw_semaphore snap_rwsem;
70 struct ceph_snap_context *snapc; 85 struct ceph_snap_context *snapc;
71 size_t snap_names_len; 86 size_t snap_names_len;
72 u64 snap_seq; 87 u64 snap_seq;
@@ -83,7 +98,7 @@ struct rbd_options {
83}; 98};
84 99
85/* 100/*
86 * an instance of the client. multiple devices may share a client. 101 * an instance of the client. multiple devices may share an rbd client.
87 */ 102 */
88struct rbd_client { 103struct rbd_client {
89 struct ceph_client *client; 104 struct ceph_client *client;
@@ -92,20 +107,9 @@ struct rbd_client {
92 struct list_head node; 107 struct list_head node;
93}; 108};
94 109
95struct rbd_req_coll;
96
97/* 110/*
98 * a single io request 111 * a request completion status
99 */ 112 */
100struct rbd_request {
101 struct request *rq; /* blk layer request */
102 struct bio *bio; /* cloned bio */
103 struct page **pages; /* list of used pages */
104 u64 len;
105 int coll_index;
106 struct rbd_req_coll *coll;
107};
108
109struct rbd_req_status { 113struct rbd_req_status {
110 int done; 114 int done;
111 int rc; 115 int rc;
@@ -122,6 +126,18 @@ struct rbd_req_coll {
122 struct rbd_req_status status[0]; 126 struct rbd_req_status status[0];
123}; 127};
124 128
129/*
130 * a single io request
131 */
132struct rbd_request {
133 struct request *rq; /* blk layer request */
134 struct bio *bio; /* cloned bio */
135 struct page **pages; /* list of used pages */
136 u64 len;
137 int coll_index;
138 struct rbd_req_coll *coll;
139};
140
125struct rbd_snap { 141struct rbd_snap {
126 struct device dev; 142 struct device dev;
127 const char *name; 143 const char *name;
@@ -140,7 +156,6 @@ struct rbd_device {
140 struct gendisk *disk; /* blkdev's gendisk and rq */ 156 struct gendisk *disk; /* blkdev's gendisk and rq */
141 struct request_queue *q; 157 struct request_queue *q;
142 158
143 struct ceph_client *client;
144 struct rbd_client *rbd_client; 159 struct rbd_client *rbd_client;
145 160
146 char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ 161 char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */
@@ -157,6 +172,8 @@ struct rbd_device {
157 struct ceph_osd_event *watch_event; 172 struct ceph_osd_event *watch_event;
158 struct ceph_osd_request *watch_request; 173 struct ceph_osd_request *watch_request;
159 174
175 /* protects updating the header */
176 struct rw_semaphore header_rwsem;
160 char snap_name[RBD_MAX_SNAP_NAME_LEN]; 177 char snap_name[RBD_MAX_SNAP_NAME_LEN];
161 u32 cur_snap; /* index+1 of current snapshot within snap context 178 u32 cur_snap; /* index+1 of current snapshot within snap context
162 0 - for the head */ 179 0 - for the head */
@@ -171,15 +188,13 @@ struct rbd_device {
171 struct device dev; 188 struct device dev;
172}; 189};
173 190
174static struct bus_type rbd_bus_type = {
175 .name = "rbd",
176};
177
178static spinlock_t node_lock; /* protects client get/put */
179
180static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ 191static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */
192
181static LIST_HEAD(rbd_dev_list); /* devices */ 193static LIST_HEAD(rbd_dev_list); /* devices */
182static LIST_HEAD(rbd_client_list); /* clients */ 194static DEFINE_SPINLOCK(rbd_dev_list_lock);
195
196static LIST_HEAD(rbd_client_list); /* clients */
197static DEFINE_SPINLOCK(rbd_client_list_lock);
183 198
184static int __rbd_init_snaps_header(struct rbd_device *rbd_dev); 199static int __rbd_init_snaps_header(struct rbd_device *rbd_dev);
185static void rbd_dev_release(struct device *dev); 200static void rbd_dev_release(struct device *dev);
@@ -190,12 +205,32 @@ static ssize_t rbd_snap_add(struct device *dev,
190static void __rbd_remove_snap_dev(struct rbd_device *rbd_dev, 205static void __rbd_remove_snap_dev(struct rbd_device *rbd_dev,
191 struct rbd_snap *snap); 206 struct rbd_snap *snap);
192 207
208static ssize_t rbd_add(struct bus_type *bus, const char *buf,
209 size_t count);
210static ssize_t rbd_remove(struct bus_type *bus, const char *buf,
211 size_t count);
212
213static struct bus_attribute rbd_bus_attrs[] = {
214 __ATTR(add, S_IWUSR, NULL, rbd_add),
215 __ATTR(remove, S_IWUSR, NULL, rbd_remove),
216 __ATTR_NULL
217};
193 218
194static struct rbd_device *dev_to_rbd(struct device *dev) 219static struct bus_type rbd_bus_type = {
220 .name = "rbd",
221 .bus_attrs = rbd_bus_attrs,
222};
223
224static void rbd_root_dev_release(struct device *dev)
195{ 225{
196 return container_of(dev, struct rbd_device, dev);
197} 226}
198 227
228static struct device rbd_root_dev = {
229 .init_name = "rbd",
230 .release = rbd_root_dev_release,
231};
232
233
199static struct device *rbd_get_dev(struct rbd_device *rbd_dev) 234static struct device *rbd_get_dev(struct rbd_device *rbd_dev)
200{ 235{
201 return get_device(&rbd_dev->dev); 236 return get_device(&rbd_dev->dev);
@@ -210,8 +245,7 @@ static int __rbd_update_snaps(struct rbd_device *rbd_dev);
210 245
211static int rbd_open(struct block_device *bdev, fmode_t mode) 246static int rbd_open(struct block_device *bdev, fmode_t mode)
212{ 247{
213 struct gendisk *disk = bdev->bd_disk; 248 struct rbd_device *rbd_dev = bdev->bd_disk->private_data;
214 struct rbd_device *rbd_dev = disk->private_data;
215 249
216 rbd_get_dev(rbd_dev); 250 rbd_get_dev(rbd_dev);
217 251
@@ -256,9 +290,11 @@ static struct rbd_client *rbd_client_create(struct ceph_options *opt,
256 kref_init(&rbdc->kref); 290 kref_init(&rbdc->kref);
257 INIT_LIST_HEAD(&rbdc->node); 291 INIT_LIST_HEAD(&rbdc->node);
258 292
293 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
294
259 rbdc->client = ceph_create_client(opt, rbdc, 0, 0); 295 rbdc->client = ceph_create_client(opt, rbdc, 0, 0);
260 if (IS_ERR(rbdc->client)) 296 if (IS_ERR(rbdc->client))
261 goto out_rbdc; 297 goto out_mutex;
262 opt = NULL; /* Now rbdc->client is responsible for opt */ 298 opt = NULL; /* Now rbdc->client is responsible for opt */
263 299
264 ret = ceph_open_session(rbdc->client); 300 ret = ceph_open_session(rbdc->client);
@@ -267,16 +303,19 @@ static struct rbd_client *rbd_client_create(struct ceph_options *opt,
267 303
268 rbdc->rbd_opts = rbd_opts; 304 rbdc->rbd_opts = rbd_opts;
269 305
270 spin_lock(&node_lock); 306 spin_lock(&rbd_client_list_lock);
271 list_add_tail(&rbdc->node, &rbd_client_list); 307 list_add_tail(&rbdc->node, &rbd_client_list);
272 spin_unlock(&node_lock); 308 spin_unlock(&rbd_client_list_lock);
309
310 mutex_unlock(&ctl_mutex);
273 311
274 dout("rbd_client_create created %p\n", rbdc); 312 dout("rbd_client_create created %p\n", rbdc);
275 return rbdc; 313 return rbdc;
276 314
277out_err: 315out_err:
278 ceph_destroy_client(rbdc->client); 316 ceph_destroy_client(rbdc->client);
279out_rbdc: 317out_mutex:
318 mutex_unlock(&ctl_mutex);
280 kfree(rbdc); 319 kfree(rbdc);
281out_opt: 320out_opt:
282 if (opt) 321 if (opt)
@@ -324,7 +363,7 @@ static int parse_rbd_opts_token(char *c, void *private)
324 substring_t argstr[MAX_OPT_ARGS]; 363 substring_t argstr[MAX_OPT_ARGS];
325 int token, intval, ret; 364 int token, intval, ret;
326 365
327 token = match_token((char *)c, rbdopt_tokens, argstr); 366 token = match_token(c, rbdopt_tokens, argstr);
328 if (token < 0) 367 if (token < 0)
329 return -EINVAL; 368 return -EINVAL;
330 369
@@ -357,64 +396,61 @@ static int parse_rbd_opts_token(char *c, void *private)
357 * Get a ceph client with specific addr and configuration, if one does 396 * Get a ceph client with specific addr and configuration, if one does
358 * not exist create it. 397 * not exist create it.
359 */ 398 */
360static int rbd_get_client(struct rbd_device *rbd_dev, const char *mon_addr, 399static struct rbd_client *rbd_get_client(const char *mon_addr,
361 char *options) 400 size_t mon_addr_len,
401 char *options)
362{ 402{
363 struct rbd_client *rbdc; 403 struct rbd_client *rbdc;
364 struct ceph_options *opt; 404 struct ceph_options *opt;
365 int ret;
366 struct rbd_options *rbd_opts; 405 struct rbd_options *rbd_opts;
367 406
368 rbd_opts = kzalloc(sizeof(*rbd_opts), GFP_KERNEL); 407 rbd_opts = kzalloc(sizeof(*rbd_opts), GFP_KERNEL);
369 if (!rbd_opts) 408 if (!rbd_opts)
370 return -ENOMEM; 409 return ERR_PTR(-ENOMEM);
371 410
372 rbd_opts->notify_timeout = RBD_NOTIFY_TIMEOUT_DEFAULT; 411 rbd_opts->notify_timeout = RBD_NOTIFY_TIMEOUT_DEFAULT;
373 412
374 ret = ceph_parse_options(&opt, options, mon_addr, 413 opt = ceph_parse_options(options, mon_addr,
375 mon_addr + strlen(mon_addr), parse_rbd_opts_token, rbd_opts); 414 mon_addr + mon_addr_len,
376 if (ret < 0) 415 parse_rbd_opts_token, rbd_opts);
377 goto done_err; 416 if (IS_ERR(opt)) {
417 kfree(rbd_opts);
418 return ERR_CAST(opt);
419 }
378 420
379 spin_lock(&node_lock); 421 spin_lock(&rbd_client_list_lock);
380 rbdc = __rbd_client_find(opt); 422 rbdc = __rbd_client_find(opt);
381 if (rbdc) { 423 if (rbdc) {
382 ceph_destroy_options(opt);
383
384 /* using an existing client */ 424 /* using an existing client */
385 kref_get(&rbdc->kref); 425 kref_get(&rbdc->kref);
386 rbd_dev->rbd_client = rbdc; 426 spin_unlock(&rbd_client_list_lock);
387 rbd_dev->client = rbdc->client; 427
388 spin_unlock(&node_lock); 428 ceph_destroy_options(opt);
389 return 0; 429 kfree(rbd_opts);
430
431 return rbdc;
390 } 432 }
391 spin_unlock(&node_lock); 433 spin_unlock(&rbd_client_list_lock);
392 434
393 rbdc = rbd_client_create(opt, rbd_opts); 435 rbdc = rbd_client_create(opt, rbd_opts);
394 if (IS_ERR(rbdc)) {
395 ret = PTR_ERR(rbdc);
396 goto done_err;
397 }
398 436
399 rbd_dev->rbd_client = rbdc; 437 if (IS_ERR(rbdc))
400 rbd_dev->client = rbdc->client; 438 kfree(rbd_opts);
401 return 0; 439
402done_err: 440 return rbdc;
403 kfree(rbd_opts);
404 return ret;
405} 441}
406 442
407/* 443/*
408 * Destroy ceph client 444 * Destroy ceph client
445 *
446 * Caller must hold rbd_client_list_lock.
409 */ 447 */
410static void rbd_client_release(struct kref *kref) 448static void rbd_client_release(struct kref *kref)
411{ 449{
412 struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref); 450 struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref);
413 451
414 dout("rbd_release_client %p\n", rbdc); 452 dout("rbd_release_client %p\n", rbdc);
415 spin_lock(&node_lock);
416 list_del(&rbdc->node); 453 list_del(&rbdc->node);
417 spin_unlock(&node_lock);
418 454
419 ceph_destroy_client(rbdc->client); 455 ceph_destroy_client(rbdc->client);
420 kfree(rbdc->rbd_opts); 456 kfree(rbdc->rbd_opts);
@@ -427,9 +463,10 @@ static void rbd_client_release(struct kref *kref)
427 */ 463 */
428static void rbd_put_client(struct rbd_device *rbd_dev) 464static void rbd_put_client(struct rbd_device *rbd_dev)
429{ 465{
466 spin_lock(&rbd_client_list_lock);
430 kref_put(&rbd_dev->rbd_client->kref, rbd_client_release); 467 kref_put(&rbd_dev->rbd_client->kref, rbd_client_release);
468 spin_unlock(&rbd_client_list_lock);
431 rbd_dev->rbd_client = NULL; 469 rbd_dev->rbd_client = NULL;
432 rbd_dev->client = NULL;
433} 470}
434 471
435/* 472/*
@@ -454,21 +491,19 @@ static int rbd_header_from_disk(struct rbd_image_header *header,
454 gfp_t gfp_flags) 491 gfp_t gfp_flags)
455{ 492{
456 int i; 493 int i;
457 u32 snap_count = le32_to_cpu(ondisk->snap_count); 494 u32 snap_count;
458 int ret = -ENOMEM;
459 495
460 if (memcmp(ondisk, RBD_HEADER_TEXT, sizeof(RBD_HEADER_TEXT))) { 496 if (memcmp(ondisk, RBD_HEADER_TEXT, sizeof(RBD_HEADER_TEXT)))
461 return -ENXIO; 497 return -ENXIO;
462 }
463 498
464 init_rwsem(&header->snap_rwsem); 499 snap_count = le32_to_cpu(ondisk->snap_count);
465 header->snap_names_len = le64_to_cpu(ondisk->snap_names_len);
466 header->snapc = kmalloc(sizeof(struct ceph_snap_context) + 500 header->snapc = kmalloc(sizeof(struct ceph_snap_context) +
467 snap_count * 501 snap_count * sizeof (*ondisk),
468 sizeof(struct rbd_image_snap_ondisk),
469 gfp_flags); 502 gfp_flags);
470 if (!header->snapc) 503 if (!header->snapc)
471 return -ENOMEM; 504 return -ENOMEM;
505
506 header->snap_names_len = le64_to_cpu(ondisk->snap_names_len);
472 if (snap_count) { 507 if (snap_count) {
473 header->snap_names = kmalloc(header->snap_names_len, 508 header->snap_names = kmalloc(header->snap_names_len,
474 GFP_KERNEL); 509 GFP_KERNEL);
@@ -495,8 +530,7 @@ static int rbd_header_from_disk(struct rbd_image_header *header,
495 header->snapc->num_snaps = snap_count; 530 header->snapc->num_snaps = snap_count;
496 header->total_snaps = snap_count; 531 header->total_snaps = snap_count;
497 532
498 if (snap_count && 533 if (snap_count && allocated_snaps == snap_count) {
499 allocated_snaps == snap_count) {
500 for (i = 0; i < snap_count; i++) { 534 for (i = 0; i < snap_count; i++) {
501 header->snapc->snaps[i] = 535 header->snapc->snaps[i] =
502 le64_to_cpu(ondisk->snaps[i].id); 536 le64_to_cpu(ondisk->snaps[i].id);
@@ -515,7 +549,7 @@ err_names:
515 kfree(header->snap_names); 549 kfree(header->snap_names);
516err_snapc: 550err_snapc:
517 kfree(header->snapc); 551 kfree(header->snapc);
518 return ret; 552 return -ENOMEM;
519} 553}
520 554
521static int snap_index(struct rbd_image_header *header, int snap_num) 555static int snap_index(struct rbd_image_header *header, int snap_num)
@@ -539,35 +573,34 @@ static int snap_by_name(struct rbd_image_header *header, const char *snap_name,
539 int i; 573 int i;
540 char *p = header->snap_names; 574 char *p = header->snap_names;
541 575
542 for (i = 0; i < header->total_snaps; i++, p += strlen(p) + 1) { 576 for (i = 0; i < header->total_snaps; i++) {
543 if (strcmp(snap_name, p) == 0) 577 if (!strcmp(snap_name, p)) {
544 break;
545 }
546 if (i == header->total_snaps)
547 return -ENOENT;
548 if (seq)
549 *seq = header->snapc->snaps[i];
550 578
551 if (size) 579 /* Found it. Pass back its id and/or size */
552 *size = header->snap_sizes[i];
553 580
554 return i; 581 if (seq)
582 *seq = header->snapc->snaps[i];
583 if (size)
584 *size = header->snap_sizes[i];
585 return i;
586 }
587 p += strlen(p) + 1; /* Skip ahead to the next name */
588 }
589 return -ENOENT;
555} 590}
556 591
557static int rbd_header_set_snap(struct rbd_device *dev, 592static int rbd_header_set_snap(struct rbd_device *dev, u64 *size)
558 const char *snap_name,
559 u64 *size)
560{ 593{
561 struct rbd_image_header *header = &dev->header; 594 struct rbd_image_header *header = &dev->header;
562 struct ceph_snap_context *snapc = header->snapc; 595 struct ceph_snap_context *snapc = header->snapc;
563 int ret = -ENOENT; 596 int ret = -ENOENT;
564 597
565 down_write(&header->snap_rwsem); 598 BUILD_BUG_ON(sizeof (dev->snap_name) < sizeof (RBD_SNAP_HEAD_NAME));
566 599
567 if (!snap_name || 600 down_write(&dev->header_rwsem);
568 !*snap_name || 601
569 strcmp(snap_name, "-") == 0 || 602 if (!memcmp(dev->snap_name, RBD_SNAP_HEAD_NAME,
570 strcmp(snap_name, RBD_SNAP_HEAD_NAME) == 0) { 603 sizeof (RBD_SNAP_HEAD_NAME))) {
571 if (header->total_snaps) 604 if (header->total_snaps)
572 snapc->seq = header->snap_seq; 605 snapc->seq = header->snap_seq;
573 else 606 else
@@ -577,7 +610,7 @@ static int rbd_header_set_snap(struct rbd_device *dev,
577 if (size) 610 if (size)
578 *size = header->image_size; 611 *size = header->image_size;
579 } else { 612 } else {
580 ret = snap_by_name(header, snap_name, &snapc->seq, size); 613 ret = snap_by_name(header, dev->snap_name, &snapc->seq, size);
581 if (ret < 0) 614 if (ret < 0)
582 goto done; 615 goto done;
583 616
@@ -587,7 +620,7 @@ static int rbd_header_set_snap(struct rbd_device *dev,
587 620
588 ret = 0; 621 ret = 0;
589done: 622done:
590 up_write(&header->snap_rwsem); 623 up_write(&dev->header_rwsem);
591 return ret; 624 return ret;
592} 625}
593 626
@@ -714,7 +747,7 @@ static struct bio *bio_chain_clone(struct bio **old, struct bio **next,
714 747
715 /* split the bio. We'll release it either in the next 748 /* split the bio. We'll release it either in the next
716 call, or it will have to be released outside */ 749 call, or it will have to be released outside */
717 bp = bio_split(old_chain, (len - total) / 512ULL); 750 bp = bio_split(old_chain, (len - total) / SECTOR_SIZE);
718 if (!bp) 751 if (!bp)
719 goto err_out; 752 goto err_out;
720 753
@@ -854,7 +887,7 @@ static int rbd_do_request(struct request *rq,
854 struct timespec mtime = CURRENT_TIME; 887 struct timespec mtime = CURRENT_TIME;
855 struct rbd_request *req_data; 888 struct rbd_request *req_data;
856 struct ceph_osd_request_head *reqhead; 889 struct ceph_osd_request_head *reqhead;
857 struct rbd_image_header *header = &dev->header; 890 struct ceph_osd_client *osdc;
858 891
859 req_data = kzalloc(sizeof(*req_data), GFP_NOIO); 892 req_data = kzalloc(sizeof(*req_data), GFP_NOIO);
860 if (!req_data) { 893 if (!req_data) {
@@ -871,15 +904,13 @@ static int rbd_do_request(struct request *rq,
871 904
872 dout("rbd_do_request obj=%s ofs=%lld len=%lld\n", obj, len, ofs); 905 dout("rbd_do_request obj=%s ofs=%lld len=%lld\n", obj, len, ofs);
873 906
874 down_read(&header->snap_rwsem); 907 down_read(&dev->header_rwsem);
875 908
876 req = ceph_osdc_alloc_request(&dev->client->osdc, flags, 909 osdc = &dev->rbd_client->client->osdc;
877 snapc, 910 req = ceph_osdc_alloc_request(osdc, flags, snapc, ops,
878 ops, 911 false, GFP_NOIO, pages, bio);
879 false,
880 GFP_NOIO, pages, bio);
881 if (!req) { 912 if (!req) {
882 up_read(&header->snap_rwsem); 913 up_read(&dev->header_rwsem);
883 ret = -ENOMEM; 914 ret = -ENOMEM;
884 goto done_pages; 915 goto done_pages;
885 } 916 }
@@ -906,27 +937,27 @@ static int rbd_do_request(struct request *rq,
906 layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); 937 layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
907 layout->fl_pg_preferred = cpu_to_le32(-1); 938 layout->fl_pg_preferred = cpu_to_le32(-1);
908 layout->fl_pg_pool = cpu_to_le32(dev->poolid); 939 layout->fl_pg_pool = cpu_to_le32(dev->poolid);
909 ceph_calc_raw_layout(&dev->client->osdc, layout, snapid, 940 ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno,
910 ofs, &len, &bno, req, ops); 941 req, ops);
911 942
912 ceph_osdc_build_request(req, ofs, &len, 943 ceph_osdc_build_request(req, ofs, &len,
913 ops, 944 ops,
914 snapc, 945 snapc,
915 &mtime, 946 &mtime,
916 req->r_oid, req->r_oid_len); 947 req->r_oid, req->r_oid_len);
917 up_read(&header->snap_rwsem); 948 up_read(&dev->header_rwsem);
918 949
919 if (linger_req) { 950 if (linger_req) {
920 ceph_osdc_set_request_linger(&dev->client->osdc, req); 951 ceph_osdc_set_request_linger(osdc, req);
921 *linger_req = req; 952 *linger_req = req;
922 } 953 }
923 954
924 ret = ceph_osdc_start_request(&dev->client->osdc, req, false); 955 ret = ceph_osdc_start_request(osdc, req, false);
925 if (ret < 0) 956 if (ret < 0)
926 goto done_err; 957 goto done_err;
927 958
928 if (!rbd_cb) { 959 if (!rbd_cb) {
929 ret = ceph_osdc_wait_request(&dev->client->osdc, req); 960 ret = ceph_osdc_wait_request(osdc, req);
930 if (ver) 961 if (ver)
931 *ver = le64_to_cpu(req->r_reassert_version.version); 962 *ver = le64_to_cpu(req->r_reassert_version.version);
932 dout("reassert_ver=%lld\n", 963 dout("reassert_ver=%lld\n",
@@ -1210,8 +1241,8 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
1210 rc = __rbd_update_snaps(dev); 1241 rc = __rbd_update_snaps(dev);
1211 mutex_unlock(&ctl_mutex); 1242 mutex_unlock(&ctl_mutex);
1212 if (rc) 1243 if (rc)
1213 pr_warning(DRV_NAME "%d got notification but failed to update" 1244 pr_warning(RBD_DRV_NAME "%d got notification but failed to "
1214 " snaps: %d\n", dev->major, rc); 1245 " update snaps: %d\n", dev->major, rc);
1215 1246
1216 rbd_req_sync_notify_ack(dev, ver, notify_id, dev->obj_md_name); 1247 rbd_req_sync_notify_ack(dev, ver, notify_id, dev->obj_md_name);
1217} 1248}
@@ -1224,7 +1255,7 @@ static int rbd_req_sync_watch(struct rbd_device *dev,
1224 u64 ver) 1255 u64 ver)
1225{ 1256{
1226 struct ceph_osd_req_op *ops; 1257 struct ceph_osd_req_op *ops;
1227 struct ceph_osd_client *osdc = &dev->client->osdc; 1258 struct ceph_osd_client *osdc = &dev->rbd_client->client->osdc;
1228 1259
1229 int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_WATCH, 0); 1260 int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_WATCH, 0);
1230 if (ret < 0) 1261 if (ret < 0)
@@ -1311,7 +1342,7 @@ static int rbd_req_sync_notify(struct rbd_device *dev,
1311 const char *obj) 1342 const char *obj)
1312{ 1343{
1313 struct ceph_osd_req_op *ops; 1344 struct ceph_osd_req_op *ops;
1314 struct ceph_osd_client *osdc = &dev->client->osdc; 1345 struct ceph_osd_client *osdc = &dev->rbd_client->client->osdc;
1315 struct ceph_osd_event *event; 1346 struct ceph_osd_event *event;
1316 struct rbd_notify_info info; 1347 struct rbd_notify_info info;
1317 int payload_len = sizeof(u32) + sizeof(u32); 1348 int payload_len = sizeof(u32) + sizeof(u32);
@@ -1418,9 +1449,7 @@ static void rbd_rq_fn(struct request_queue *q)
1418 struct request *rq; 1449 struct request *rq;
1419 struct bio_pair *bp = NULL; 1450 struct bio_pair *bp = NULL;
1420 1451
1421 rq = blk_fetch_request(q); 1452 while ((rq = blk_fetch_request(q))) {
1422
1423 while (1) {
1424 struct bio *bio; 1453 struct bio *bio;
1425 struct bio *rq_bio, *next_bio = NULL; 1454 struct bio *rq_bio, *next_bio = NULL;
1426 bool do_write; 1455 bool do_write;
@@ -1438,32 +1467,32 @@ static void rbd_rq_fn(struct request_queue *q)
1438 /* filter out block requests we don't understand */ 1467 /* filter out block requests we don't understand */
1439 if ((rq->cmd_type != REQ_TYPE_FS)) { 1468 if ((rq->cmd_type != REQ_TYPE_FS)) {
1440 __blk_end_request_all(rq, 0); 1469 __blk_end_request_all(rq, 0);
1441 goto next; 1470 continue;
1442 } 1471 }
1443 1472
1444 /* deduce our operation (read, write) */ 1473 /* deduce our operation (read, write) */
1445 do_write = (rq_data_dir(rq) == WRITE); 1474 do_write = (rq_data_dir(rq) == WRITE);
1446 1475
1447 size = blk_rq_bytes(rq); 1476 size = blk_rq_bytes(rq);
1448 ofs = blk_rq_pos(rq) * 512ULL; 1477 ofs = blk_rq_pos(rq) * SECTOR_SIZE;
1449 rq_bio = rq->bio; 1478 rq_bio = rq->bio;
1450 if (do_write && rbd_dev->read_only) { 1479 if (do_write && rbd_dev->read_only) {
1451 __blk_end_request_all(rq, -EROFS); 1480 __blk_end_request_all(rq, -EROFS);
1452 goto next; 1481 continue;
1453 } 1482 }
1454 1483
1455 spin_unlock_irq(q->queue_lock); 1484 spin_unlock_irq(q->queue_lock);
1456 1485
1457 dout("%s 0x%x bytes at 0x%llx\n", 1486 dout("%s 0x%x bytes at 0x%llx\n",
1458 do_write ? "write" : "read", 1487 do_write ? "write" : "read",
1459 size, blk_rq_pos(rq) * 512ULL); 1488 size, blk_rq_pos(rq) * SECTOR_SIZE);
1460 1489
1461 num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size); 1490 num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size);
1462 coll = rbd_alloc_coll(num_segs); 1491 coll = rbd_alloc_coll(num_segs);
1463 if (!coll) { 1492 if (!coll) {
1464 spin_lock_irq(q->queue_lock); 1493 spin_lock_irq(q->queue_lock);
1465 __blk_end_request_all(rq, -ENOMEM); 1494 __blk_end_request_all(rq, -ENOMEM);
1466 goto next; 1495 continue;
1467 } 1496 }
1468 1497
1469 do { 1498 do {
@@ -1509,8 +1538,6 @@ next_seg:
1509 if (bp) 1538 if (bp)
1510 bio_pair_release(bp); 1539 bio_pair_release(bp);
1511 spin_lock_irq(q->queue_lock); 1540 spin_lock_irq(q->queue_lock);
1512next:
1513 rq = blk_fetch_request(q);
1514 } 1541 }
1515} 1542}
1516 1543
@@ -1523,13 +1550,17 @@ static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd,
1523 struct bio_vec *bvec) 1550 struct bio_vec *bvec)
1524{ 1551{
1525 struct rbd_device *rbd_dev = q->queuedata; 1552 struct rbd_device *rbd_dev = q->queuedata;
1526 unsigned int chunk_sectors = 1 << (rbd_dev->header.obj_order - 9); 1553 unsigned int chunk_sectors;
1527 sector_t sector = bmd->bi_sector + get_start_sect(bmd->bi_bdev); 1554 sector_t sector;
1528 unsigned int bio_sectors = bmd->bi_size >> 9; 1555 unsigned int bio_sectors;
1529 int max; 1556 int max;
1530 1557
1558 chunk_sectors = 1 << (rbd_dev->header.obj_order - SECTOR_SHIFT);
1559 sector = bmd->bi_sector + get_start_sect(bmd->bi_bdev);
1560 bio_sectors = bmd->bi_size >> SECTOR_SHIFT;
1561
1531 max = (chunk_sectors - ((sector & (chunk_sectors - 1)) 1562 max = (chunk_sectors - ((sector & (chunk_sectors - 1))
1532 + bio_sectors)) << 9; 1563 + bio_sectors)) << SECTOR_SHIFT;
1533 if (max < 0) 1564 if (max < 0)
1534 max = 0; /* bio_add cannot handle a negative return */ 1565 max = 0; /* bio_add cannot handle a negative return */
1535 if (max <= bvec->bv_len && bio_sectors == 0) 1566 if (max <= bvec->bv_len && bio_sectors == 0)
@@ -1562,15 +1593,16 @@ static int rbd_read_header(struct rbd_device *rbd_dev,
1562 ssize_t rc; 1593 ssize_t rc;
1563 struct rbd_image_header_ondisk *dh; 1594 struct rbd_image_header_ondisk *dh;
1564 int snap_count = 0; 1595 int snap_count = 0;
1565 u64 snap_names_len = 0;
1566 u64 ver; 1596 u64 ver;
1597 size_t len;
1567 1598
1599 /*
1600 * First reads the fixed-size header to determine the number
1601 * of snapshots, then re-reads it, along with all snapshot
1602 * records as well as their stored names.
1603 */
1604 len = sizeof (*dh);
1568 while (1) { 1605 while (1) {
1569 int len = sizeof(*dh) +
1570 snap_count * sizeof(struct rbd_image_snap_ondisk) +
1571 snap_names_len;
1572
1573 rc = -ENOMEM;
1574 dh = kmalloc(len, GFP_KERNEL); 1606 dh = kmalloc(len, GFP_KERNEL);
1575 if (!dh) 1607 if (!dh)
1576 return -ENOMEM; 1608 return -ENOMEM;
@@ -1585,21 +1617,22 @@ static int rbd_read_header(struct rbd_device *rbd_dev,
1585 1617
1586 rc = rbd_header_from_disk(header, dh, snap_count, GFP_KERNEL); 1618 rc = rbd_header_from_disk(header, dh, snap_count, GFP_KERNEL);
1587 if (rc < 0) { 1619 if (rc < 0) {
1588 if (rc == -ENXIO) { 1620 if (rc == -ENXIO)
1589 pr_warning("unrecognized header format" 1621 pr_warning("unrecognized header format"
1590 " for image %s", rbd_dev->obj); 1622 " for image %s", rbd_dev->obj);
1591 }
1592 goto out_dh; 1623 goto out_dh;
1593 } 1624 }
1594 1625
1595 if (snap_count != header->total_snaps) { 1626 if (snap_count == header->total_snaps)
1596 snap_count = header->total_snaps; 1627 break;
1597 snap_names_len = header->snap_names_len; 1628
1598 rbd_header_free(header); 1629 snap_count = header->total_snaps;
1599 kfree(dh); 1630 len = sizeof (*dh) +
1600 continue; 1631 snap_count * sizeof(struct rbd_image_snap_ondisk) +
1601 } 1632 header->snap_names_len;
1602 break; 1633
1634 rbd_header_free(header);
1635 kfree(dh);
1603 } 1636 }
1604 header->obj_version = ver; 1637 header->obj_version = ver;
1605 1638
@@ -1620,13 +1653,14 @@ static int rbd_header_add_snap(struct rbd_device *dev,
1620 int ret; 1653 int ret;
1621 void *data, *p, *e; 1654 void *data, *p, *e;
1622 u64 ver; 1655 u64 ver;
1656 struct ceph_mon_client *monc;
1623 1657
1624 /* we should create a snapshot only if we're pointing at the head */ 1658 /* we should create a snapshot only if we're pointing at the head */
1625 if (dev->cur_snap) 1659 if (dev->cur_snap)
1626 return -EINVAL; 1660 return -EINVAL;
1627 1661
1628 ret = ceph_monc_create_snapid(&dev->client->monc, dev->poolid, 1662 monc = &dev->rbd_client->client->monc;
1629 &new_snapid); 1663 ret = ceph_monc_create_snapid(monc, dev->poolid, &new_snapid);
1630 dout("created snapid=%lld\n", new_snapid); 1664 dout("created snapid=%lld\n", new_snapid);
1631 if (ret < 0) 1665 if (ret < 0)
1632 return ret; 1666 return ret;
@@ -1681,9 +1715,9 @@ static int __rbd_update_snaps(struct rbd_device *rbd_dev)
1681 return ret; 1715 return ret;
1682 1716
1683 /* resized? */ 1717 /* resized? */
1684 set_capacity(rbd_dev->disk, h.image_size / 512ULL); 1718 set_capacity(rbd_dev->disk, h.image_size / SECTOR_SIZE);
1685 1719
1686 down_write(&rbd_dev->header.snap_rwsem); 1720 down_write(&rbd_dev->header_rwsem);
1687 1721
1688 snap_seq = rbd_dev->header.snapc->seq; 1722 snap_seq = rbd_dev->header.snapc->seq;
1689 if (rbd_dev->header.total_snaps && 1723 if (rbd_dev->header.total_snaps &&
@@ -1708,7 +1742,7 @@ static int __rbd_update_snaps(struct rbd_device *rbd_dev)
1708 1742
1709 ret = __rbd_init_snaps_header(rbd_dev); 1743 ret = __rbd_init_snaps_header(rbd_dev);
1710 1744
1711 up_write(&rbd_dev->header.snap_rwsem); 1745 up_write(&rbd_dev->header_rwsem);
1712 1746
1713 return ret; 1747 return ret;
1714} 1748}
@@ -1718,6 +1752,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
1718 struct gendisk *disk; 1752 struct gendisk *disk;
1719 struct request_queue *q; 1753 struct request_queue *q;
1720 int rc; 1754 int rc;
1755 u64 segment_size;
1721 u64 total_size = 0; 1756 u64 total_size = 0;
1722 1757
1723 /* contact OSD, request size info about the object being mapped */ 1758 /* contact OSD, request size info about the object being mapped */
@@ -1730,7 +1765,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
1730 if (rc) 1765 if (rc)
1731 return rc; 1766 return rc;
1732 1767
1733 rc = rbd_header_set_snap(rbd_dev, rbd_dev->snap_name, &total_size); 1768 rc = rbd_header_set_snap(rbd_dev, &total_size);
1734 if (rc) 1769 if (rc)
1735 return rc; 1770 return rc;
1736 1771
@@ -1740,7 +1775,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
1740 if (!disk) 1775 if (!disk)
1741 goto out; 1776 goto out;
1742 1777
1743 snprintf(disk->disk_name, sizeof(disk->disk_name), DRV_NAME "%d", 1778 snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d",
1744 rbd_dev->id); 1779 rbd_dev->id);
1745 disk->major = rbd_dev->major; 1780 disk->major = rbd_dev->major;
1746 disk->first_minor = 0; 1781 disk->first_minor = 0;
@@ -1753,11 +1788,15 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
1753 if (!q) 1788 if (!q)
1754 goto out_disk; 1789 goto out_disk;
1755 1790
1791 /* We use the default size, but let's be explicit about it. */
1792 blk_queue_physical_block_size(q, SECTOR_SIZE);
1793
1756 /* set io sizes to object size */ 1794 /* set io sizes to object size */
1757 blk_queue_max_hw_sectors(q, rbd_obj_bytes(&rbd_dev->header) / 512ULL); 1795 segment_size = rbd_obj_bytes(&rbd_dev->header);
1758 blk_queue_max_segment_size(q, rbd_obj_bytes(&rbd_dev->header)); 1796 blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
1759 blk_queue_io_min(q, rbd_obj_bytes(&rbd_dev->header)); 1797 blk_queue_max_segment_size(q, segment_size);
1760 blk_queue_io_opt(q, rbd_obj_bytes(&rbd_dev->header)); 1798 blk_queue_io_min(q, segment_size);
1799 blk_queue_io_opt(q, segment_size);
1761 1800
1762 blk_queue_merge_bvec(q, rbd_merge_bvec); 1801 blk_queue_merge_bvec(q, rbd_merge_bvec);
1763 disk->queue = q; 1802 disk->queue = q;
@@ -1768,7 +1807,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
1768 rbd_dev->q = q; 1807 rbd_dev->q = q;
1769 1808
1770 /* finally, announce the disk to the world */ 1809 /* finally, announce the disk to the world */
1771 set_capacity(disk, total_size / 512ULL); 1810 set_capacity(disk, total_size / SECTOR_SIZE);
1772 add_disk(disk); 1811 add_disk(disk);
1773 1812
1774 pr_info("%s: added with size 0x%llx\n", 1813 pr_info("%s: added with size 0x%llx\n",
@@ -1785,10 +1824,15 @@ out:
1785 sysfs 1824 sysfs
1786*/ 1825*/
1787 1826
1827static struct rbd_device *dev_to_rbd_dev(struct device *dev)
1828{
1829 return container_of(dev, struct rbd_device, dev);
1830}
1831
1788static ssize_t rbd_size_show(struct device *dev, 1832static ssize_t rbd_size_show(struct device *dev,
1789 struct device_attribute *attr, char *buf) 1833 struct device_attribute *attr, char *buf)
1790{ 1834{
1791 struct rbd_device *rbd_dev = dev_to_rbd(dev); 1835 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1792 1836
1793 return sprintf(buf, "%llu\n", (unsigned long long)rbd_dev->header.image_size); 1837 return sprintf(buf, "%llu\n", (unsigned long long)rbd_dev->header.image_size);
1794} 1838}
@@ -1796,7 +1840,7 @@ static ssize_t rbd_size_show(struct device *dev,
1796static ssize_t rbd_major_show(struct device *dev, 1840static ssize_t rbd_major_show(struct device *dev,
1797 struct device_attribute *attr, char *buf) 1841 struct device_attribute *attr, char *buf)
1798{ 1842{
1799 struct rbd_device *rbd_dev = dev_to_rbd(dev); 1843 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1800 1844
1801 return sprintf(buf, "%d\n", rbd_dev->major); 1845 return sprintf(buf, "%d\n", rbd_dev->major);
1802} 1846}
@@ -1804,15 +1848,16 @@ static ssize_t rbd_major_show(struct device *dev,
1804static ssize_t rbd_client_id_show(struct device *dev, 1848static ssize_t rbd_client_id_show(struct device *dev,
1805 struct device_attribute *attr, char *buf) 1849 struct device_attribute *attr, char *buf)
1806{ 1850{
1807 struct rbd_device *rbd_dev = dev_to_rbd(dev); 1851 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1808 1852
1809 return sprintf(buf, "client%lld\n", ceph_client_id(rbd_dev->client)); 1853 return sprintf(buf, "client%lld\n",
1854 ceph_client_id(rbd_dev->rbd_client->client));
1810} 1855}
1811 1856
1812static ssize_t rbd_pool_show(struct device *dev, 1857static ssize_t rbd_pool_show(struct device *dev,
1813 struct device_attribute *attr, char *buf) 1858 struct device_attribute *attr, char *buf)
1814{ 1859{
1815 struct rbd_device *rbd_dev = dev_to_rbd(dev); 1860 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1816 1861
1817 return sprintf(buf, "%s\n", rbd_dev->pool_name); 1862 return sprintf(buf, "%s\n", rbd_dev->pool_name);
1818} 1863}
@@ -1820,7 +1865,7 @@ static ssize_t rbd_pool_show(struct device *dev,
1820static ssize_t rbd_name_show(struct device *dev, 1865static ssize_t rbd_name_show(struct device *dev,
1821 struct device_attribute *attr, char *buf) 1866 struct device_attribute *attr, char *buf)
1822{ 1867{
1823 struct rbd_device *rbd_dev = dev_to_rbd(dev); 1868 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1824 1869
1825 return sprintf(buf, "%s\n", rbd_dev->obj); 1870 return sprintf(buf, "%s\n", rbd_dev->obj);
1826} 1871}
@@ -1829,7 +1874,7 @@ static ssize_t rbd_snap_show(struct device *dev,
1829 struct device_attribute *attr, 1874 struct device_attribute *attr,
1830 char *buf) 1875 char *buf)
1831{ 1876{
1832 struct rbd_device *rbd_dev = dev_to_rbd(dev); 1877 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1833 1878
1834 return sprintf(buf, "%s\n", rbd_dev->snap_name); 1879 return sprintf(buf, "%s\n", rbd_dev->snap_name);
1835} 1880}
@@ -1839,7 +1884,7 @@ static ssize_t rbd_image_refresh(struct device *dev,
1839 const char *buf, 1884 const char *buf,
1840 size_t size) 1885 size_t size)
1841{ 1886{
1842 struct rbd_device *rbd_dev = dev_to_rbd(dev); 1887 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1843 int rc; 1888 int rc;
1844 int ret = size; 1889 int ret = size;
1845 1890
@@ -1904,7 +1949,7 @@ static ssize_t rbd_snap_size_show(struct device *dev,
1904{ 1949{
1905 struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); 1950 struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
1906 1951
1907 return sprintf(buf, "%lld\n", (long long)snap->size); 1952 return sprintf(buf, "%zd\n", snap->size);
1908} 1953}
1909 1954
1910static ssize_t rbd_snap_id_show(struct device *dev, 1955static ssize_t rbd_snap_id_show(struct device *dev,
@@ -1913,7 +1958,7 @@ static ssize_t rbd_snap_id_show(struct device *dev,
1913{ 1958{
1914 struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); 1959 struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
1915 1960
1916 return sprintf(buf, "%lld\n", (long long)snap->id); 1961 return sprintf(buf, "%llu\n", (unsigned long long) snap->id);
1917} 1962}
1918 1963
1919static DEVICE_ATTR(snap_size, S_IRUGO, rbd_snap_size_show, NULL); 1964static DEVICE_ATTR(snap_size, S_IRUGO, rbd_snap_size_show, NULL);
@@ -2085,19 +2130,9 @@ static int __rbd_init_snaps_header(struct rbd_device *rbd_dev)
2085 return 0; 2130 return 0;
2086} 2131}
2087 2132
2088
2089static void rbd_root_dev_release(struct device *dev)
2090{
2091}
2092
2093static struct device rbd_root_dev = {
2094 .init_name = "rbd",
2095 .release = rbd_root_dev_release,
2096};
2097
2098static int rbd_bus_add_dev(struct rbd_device *rbd_dev) 2133static int rbd_bus_add_dev(struct rbd_device *rbd_dev)
2099{ 2134{
2100 int ret = -ENOMEM; 2135 int ret;
2101 struct device *dev; 2136 struct device *dev;
2102 struct rbd_snap *snap; 2137 struct rbd_snap *snap;
2103 2138
@@ -2111,7 +2146,7 @@ static int rbd_bus_add_dev(struct rbd_device *rbd_dev)
2111 dev_set_name(dev, "%d", rbd_dev->id); 2146 dev_set_name(dev, "%d", rbd_dev->id);
2112 ret = device_register(dev); 2147 ret = device_register(dev);
2113 if (ret < 0) 2148 if (ret < 0)
2114 goto done_free; 2149 goto out;
2115 2150
2116 list_for_each_entry(snap, &rbd_dev->snaps, node) { 2151 list_for_each_entry(snap, &rbd_dev->snaps, node) {
2117 ret = rbd_register_snap_dev(rbd_dev, snap, 2152 ret = rbd_register_snap_dev(rbd_dev, snap,
@@ -2119,10 +2154,7 @@ static int rbd_bus_add_dev(struct rbd_device *rbd_dev)
2119 if (ret < 0) 2154 if (ret < 0)
2120 break; 2155 break;
2121 } 2156 }
2122 2157out:
2123 mutex_unlock(&ctl_mutex);
2124 return 0;
2125done_free:
2126 mutex_unlock(&ctl_mutex); 2158 mutex_unlock(&ctl_mutex);
2127 return ret; 2159 return ret;
2128} 2160}
@@ -2151,102 +2183,250 @@ static int rbd_init_watch_dev(struct rbd_device *rbd_dev)
2151 return ret; 2183 return ret;
2152} 2184}
2153 2185
2186static atomic64_t rbd_id_max = ATOMIC64_INIT(0);
2187
2188/*
2189 * Get a unique rbd identifier for the given new rbd_dev, and add
2190 * the rbd_dev to the global list. The minimum rbd id is 1.
2191 */
2192static void rbd_id_get(struct rbd_device *rbd_dev)
2193{
2194 rbd_dev->id = atomic64_inc_return(&rbd_id_max);
2195
2196 spin_lock(&rbd_dev_list_lock);
2197 list_add_tail(&rbd_dev->node, &rbd_dev_list);
2198 spin_unlock(&rbd_dev_list_lock);
2199}
2200
2201/*
2202 * Remove an rbd_dev from the global list, and record that its
2203 * identifier is no longer in use.
2204 */
2205static void rbd_id_put(struct rbd_device *rbd_dev)
2206{
2207 struct list_head *tmp;
2208 int rbd_id = rbd_dev->id;
2209 int max_id;
2210
2211 BUG_ON(rbd_id < 1);
2212
2213 spin_lock(&rbd_dev_list_lock);
2214 list_del_init(&rbd_dev->node);
2215
2216 /*
2217 * If the id being "put" is not the current maximum, there
2218 * is nothing special we need to do.
2219 */
2220 if (rbd_id != atomic64_read(&rbd_id_max)) {
2221 spin_unlock(&rbd_dev_list_lock);
2222 return;
2223 }
2224
2225 /*
2226 * We need to update the current maximum id. Search the
2227 * list to find out what it is. We're more likely to find
2228 * the maximum at the end, so search the list backward.
2229 */
2230 max_id = 0;
2231 list_for_each_prev(tmp, &rbd_dev_list) {
2232 struct rbd_device *rbd_dev;
2233
2234 rbd_dev = list_entry(tmp, struct rbd_device, node);
2235 if (rbd_id > max_id)
2236 max_id = rbd_id;
2237 }
2238 spin_unlock(&rbd_dev_list_lock);
2239
2240 /*
2241 * The max id could have been updated by rbd_id_get(), in
2242 * which case it now accurately reflects the new maximum.
2243 * Be careful not to overwrite the maximum value in that
2244 * case.
2245 */
2246 atomic64_cmpxchg(&rbd_id_max, rbd_id, max_id);
2247}
2248
2249/*
2250 * Skips over white space at *buf, and updates *buf to point to the
2251 * first found non-space character (if any). Returns the length of
2252 * the token (string of non-white space characters) found. Note
2253 * that *buf must be terminated with '\0'.
2254 */
2255static inline size_t next_token(const char **buf)
2256{
2257 /*
2258 * These are the characters that produce nonzero for
2259 * isspace() in the "C" and "POSIX" locales.
2260 */
2261 const char *spaces = " \f\n\r\t\v";
2262
2263 *buf += strspn(*buf, spaces); /* Find start of token */
2264
2265 return strcspn(*buf, spaces); /* Return token length */
2266}
2267
2268/*
2269 * Finds the next token in *buf, and if the provided token buffer is
2270 * big enough, copies the found token into it. The result, if
2271 * copied, is guaranteed to be terminated with '\0'. Note that *buf
2272 * must be terminated with '\0' on entry.
2273 *
2274 * Returns the length of the token found (not including the '\0').
2275 * Return value will be 0 if no token is found, and it will be >=
2276 * token_size if the token would not fit.
2277 *
2278 * The *buf pointer will be updated to point beyond the end of the
2279 * found token. Note that this occurs even if the token buffer is
2280 * too small to hold it.
2281 */
2282static inline size_t copy_token(const char **buf,
2283 char *token,
2284 size_t token_size)
2285{
2286 size_t len;
2287
2288 len = next_token(buf);
2289 if (len < token_size) {
2290 memcpy(token, *buf, len);
2291 *(token + len) = '\0';
2292 }
2293 *buf += len;
2294
2295 return len;
2296}
2297
2298/*
2299 * This fills in the pool_name, obj, obj_len, snap_name, obj_len,
2300 * rbd_dev, rbd_md_name, and name fields of the given rbd_dev, based
2301 * on the list of monitor addresses and other options provided via
2302 * /sys/bus/rbd/add.
2303 */
2304static int rbd_add_parse_args(struct rbd_device *rbd_dev,
2305 const char *buf,
2306 const char **mon_addrs,
2307 size_t *mon_addrs_size,
2308 char *options,
2309 size_t options_size)
2310{
2311 size_t len;
2312
2313 /* The first four tokens are required */
2314
2315 len = next_token(&buf);
2316 if (!len)
2317 return -EINVAL;
2318 *mon_addrs_size = len + 1;
2319 *mon_addrs = buf;
2320
2321 buf += len;
2322
2323 len = copy_token(&buf, options, options_size);
2324 if (!len || len >= options_size)
2325 return -EINVAL;
2326
2327 len = copy_token(&buf, rbd_dev->pool_name, sizeof (rbd_dev->pool_name));
2328 if (!len || len >= sizeof (rbd_dev->pool_name))
2329 return -EINVAL;
2330
2331 len = copy_token(&buf, rbd_dev->obj, sizeof (rbd_dev->obj));
2332 if (!len || len >= sizeof (rbd_dev->obj))
2333 return -EINVAL;
2334
2335 /* We have the object length in hand, save it. */
2336
2337 rbd_dev->obj_len = len;
2338
2339 BUILD_BUG_ON(RBD_MAX_MD_NAME_LEN
2340 < RBD_MAX_OBJ_NAME_LEN + sizeof (RBD_SUFFIX));
2341 sprintf(rbd_dev->obj_md_name, "%s%s", rbd_dev->obj, RBD_SUFFIX);
2342
2343 /*
2344 * The snapshot name is optional, but it's an error if it's
2345 * too long. If no snapshot is supplied, fill in the default.
2346 */
2347 len = copy_token(&buf, rbd_dev->snap_name, sizeof (rbd_dev->snap_name));
2348 if (!len)
2349 memcpy(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME,
2350 sizeof (RBD_SNAP_HEAD_NAME));
2351 else if (len >= sizeof (rbd_dev->snap_name))
2352 return -EINVAL;
2353
2354 return 0;
2355}
2356
2154static ssize_t rbd_add(struct bus_type *bus, 2357static ssize_t rbd_add(struct bus_type *bus,
2155 const char *buf, 2358 const char *buf,
2156 size_t count) 2359 size_t count)
2157{ 2360{
2158 struct ceph_osd_client *osdc;
2159 struct rbd_device *rbd_dev; 2361 struct rbd_device *rbd_dev;
2160 ssize_t rc = -ENOMEM; 2362 const char *mon_addrs = NULL;
2161 int irc, new_id = 0; 2363 size_t mon_addrs_size = 0;
2162 struct list_head *tmp; 2364 char *options = NULL;
2163 char *mon_dev_name; 2365 struct ceph_osd_client *osdc;
2164 char *options; 2366 int rc = -ENOMEM;
2165 2367
2166 if (!try_module_get(THIS_MODULE)) 2368 if (!try_module_get(THIS_MODULE))
2167 return -ENODEV; 2369 return -ENODEV;
2168 2370
2169 mon_dev_name = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL);
2170 if (!mon_dev_name)
2171 goto err_out_mod;
2172
2173 options = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL);
2174 if (!options)
2175 goto err_mon_dev;
2176
2177 /* new rbd_device object */
2178 rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL); 2371 rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL);
2179 if (!rbd_dev) 2372 if (!rbd_dev)
2180 goto err_out_opt; 2373 goto err_nomem;
2374 options = kmalloc(count, GFP_KERNEL);
2375 if (!options)
2376 goto err_nomem;
2181 2377
2182 /* static rbd_device initialization */ 2378 /* static rbd_device initialization */
2183 spin_lock_init(&rbd_dev->lock); 2379 spin_lock_init(&rbd_dev->lock);
2184 INIT_LIST_HEAD(&rbd_dev->node); 2380 INIT_LIST_HEAD(&rbd_dev->node);
2185 INIT_LIST_HEAD(&rbd_dev->snaps); 2381 INIT_LIST_HEAD(&rbd_dev->snaps);
2382 init_rwsem(&rbd_dev->header_rwsem);
2186 2383
2187 /* generate unique id: find highest unique id, add one */ 2384 init_rwsem(&rbd_dev->header_rwsem);
2188 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
2189
2190 list_for_each(tmp, &rbd_dev_list) {
2191 struct rbd_device *rbd_dev;
2192 2385
2193 rbd_dev = list_entry(tmp, struct rbd_device, node); 2386 /* generate unique id: find highest unique id, add one */
2194 if (rbd_dev->id >= new_id) 2387 rbd_id_get(rbd_dev);
2195 new_id = rbd_dev->id + 1;
2196 }
2197
2198 rbd_dev->id = new_id;
2199 2388
2200 /* add to global list */ 2389 /* Fill in the device name, now that we have its id. */
2201 list_add_tail(&rbd_dev->node, &rbd_dev_list); 2390 BUILD_BUG_ON(DEV_NAME_LEN
2391 < sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH);
2392 sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->id);
2202 2393
2203 /* parse add command */ 2394 /* parse add command */
2204 if (sscanf(buf, "%" __stringify(RBD_MAX_OPT_LEN) "s " 2395 rc = rbd_add_parse_args(rbd_dev, buf, &mon_addrs, &mon_addrs_size,
2205 "%" __stringify(RBD_MAX_OPT_LEN) "s " 2396 options, count);
2206 "%" __stringify(RBD_MAX_POOL_NAME_LEN) "s " 2397 if (rc)
2207 "%" __stringify(RBD_MAX_OBJ_NAME_LEN) "s" 2398 goto err_put_id;
2208 "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s",
2209 mon_dev_name, options, rbd_dev->pool_name,
2210 rbd_dev->obj, rbd_dev->snap_name) < 4) {
2211 rc = -EINVAL;
2212 goto err_out_slot;
2213 }
2214
2215 if (rbd_dev->snap_name[0] == 0)
2216 rbd_dev->snap_name[0] = '-';
2217
2218 rbd_dev->obj_len = strlen(rbd_dev->obj);
2219 snprintf(rbd_dev->obj_md_name, sizeof(rbd_dev->obj_md_name), "%s%s",
2220 rbd_dev->obj, RBD_SUFFIX);
2221
2222 /* initialize rest of new object */
2223 snprintf(rbd_dev->name, DEV_NAME_LEN, DRV_NAME "%d", rbd_dev->id);
2224 rc = rbd_get_client(rbd_dev, mon_dev_name, options);
2225 if (rc < 0)
2226 goto err_out_slot;
2227 2399
2228 mutex_unlock(&ctl_mutex); 2400 rbd_dev->rbd_client = rbd_get_client(mon_addrs, mon_addrs_size - 1,
2401 options);
2402 if (IS_ERR(rbd_dev->rbd_client)) {
2403 rc = PTR_ERR(rbd_dev->rbd_client);
2404 goto err_put_id;
2405 }
2229 2406
2230 /* pick the pool */ 2407 /* pick the pool */
2231 osdc = &rbd_dev->client->osdc; 2408 osdc = &rbd_dev->rbd_client->client->osdc;
2232 rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name); 2409 rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name);
2233 if (rc < 0) 2410 if (rc < 0)
2234 goto err_out_client; 2411 goto err_out_client;
2235 rbd_dev->poolid = rc; 2412 rbd_dev->poolid = rc;
2236 2413
2237 /* register our block device */ 2414 /* register our block device */
2238 irc = register_blkdev(0, rbd_dev->name); 2415 rc = register_blkdev(0, rbd_dev->name);
2239 if (irc < 0) { 2416 if (rc < 0)
2240 rc = irc;
2241 goto err_out_client; 2417 goto err_out_client;
2242 } 2418 rbd_dev->major = rc;
2243 rbd_dev->major = irc;
2244 2419
2245 rc = rbd_bus_add_dev(rbd_dev); 2420 rc = rbd_bus_add_dev(rbd_dev);
2246 if (rc) 2421 if (rc)
2247 goto err_out_blkdev; 2422 goto err_out_blkdev;
2248 2423
2249 /* set up and announce blkdev mapping */ 2424 /*
2425 * At this point cleanup in the event of an error is the job
2426 * of the sysfs code (initiated by rbd_bus_del_dev()).
2427 *
2428 * Set up and announce blkdev mapping.
2429 */
2250 rc = rbd_init_disk(rbd_dev); 2430 rc = rbd_init_disk(rbd_dev);
2251 if (rc) 2431 if (rc)
2252 goto err_out_bus; 2432 goto err_out_bus;
@@ -2258,35 +2438,26 @@ static ssize_t rbd_add(struct bus_type *bus,
2258 return count; 2438 return count;
2259 2439
2260err_out_bus: 2440err_out_bus:
2261 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
2262 list_del_init(&rbd_dev->node);
2263 mutex_unlock(&ctl_mutex);
2264
2265 /* this will also clean up rest of rbd_dev stuff */ 2441 /* this will also clean up rest of rbd_dev stuff */
2266 2442
2267 rbd_bus_del_dev(rbd_dev); 2443 rbd_bus_del_dev(rbd_dev);
2268 kfree(options); 2444 kfree(options);
2269 kfree(mon_dev_name);
2270 return rc; 2445 return rc;
2271 2446
2272err_out_blkdev: 2447err_out_blkdev:
2273 unregister_blkdev(rbd_dev->major, rbd_dev->name); 2448 unregister_blkdev(rbd_dev->major, rbd_dev->name);
2274err_out_client: 2449err_out_client:
2275 rbd_put_client(rbd_dev); 2450 rbd_put_client(rbd_dev);
2276 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 2451err_put_id:
2277err_out_slot: 2452 rbd_id_put(rbd_dev);
2278 list_del_init(&rbd_dev->node); 2453err_nomem:
2279 mutex_unlock(&ctl_mutex);
2280
2281 kfree(rbd_dev);
2282err_out_opt:
2283 kfree(options); 2454 kfree(options);
2284err_mon_dev: 2455 kfree(rbd_dev);
2285 kfree(mon_dev_name); 2456
2286err_out_mod:
2287 dout("Error adding device %s\n", buf); 2457 dout("Error adding device %s\n", buf);
2288 module_put(THIS_MODULE); 2458 module_put(THIS_MODULE);
2289 return rc; 2459
2460 return (ssize_t) rc;
2290} 2461}
2291 2462
2292static struct rbd_device *__rbd_get_dev(unsigned long id) 2463static struct rbd_device *__rbd_get_dev(unsigned long id)
@@ -2294,22 +2465,28 @@ static struct rbd_device *__rbd_get_dev(unsigned long id)
2294 struct list_head *tmp; 2465 struct list_head *tmp;
2295 struct rbd_device *rbd_dev; 2466 struct rbd_device *rbd_dev;
2296 2467
2468 spin_lock(&rbd_dev_list_lock);
2297 list_for_each(tmp, &rbd_dev_list) { 2469 list_for_each(tmp, &rbd_dev_list) {
2298 rbd_dev = list_entry(tmp, struct rbd_device, node); 2470 rbd_dev = list_entry(tmp, struct rbd_device, node);
2299 if (rbd_dev->id == id) 2471 if (rbd_dev->id == id) {
2472 spin_unlock(&rbd_dev_list_lock);
2300 return rbd_dev; 2473 return rbd_dev;
2474 }
2301 } 2475 }
2476 spin_unlock(&rbd_dev_list_lock);
2302 return NULL; 2477 return NULL;
2303} 2478}
2304 2479
2305static void rbd_dev_release(struct device *dev) 2480static void rbd_dev_release(struct device *dev)
2306{ 2481{
2307 struct rbd_device *rbd_dev = 2482 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
2308 container_of(dev, struct rbd_device, dev);
2309 2483
2310 if (rbd_dev->watch_request) 2484 if (rbd_dev->watch_request) {
2311 ceph_osdc_unregister_linger_request(&rbd_dev->client->osdc, 2485 struct ceph_client *client = rbd_dev->rbd_client->client;
2486
2487 ceph_osdc_unregister_linger_request(&client->osdc,
2312 rbd_dev->watch_request); 2488 rbd_dev->watch_request);
2489 }
2313 if (rbd_dev->watch_event) 2490 if (rbd_dev->watch_event)
2314 rbd_req_sync_unwatch(rbd_dev, rbd_dev->obj_md_name); 2491 rbd_req_sync_unwatch(rbd_dev, rbd_dev->obj_md_name);
2315 2492
@@ -2318,6 +2495,9 @@ static void rbd_dev_release(struct device *dev)
2318 /* clean up and free blkdev */ 2495 /* clean up and free blkdev */
2319 rbd_free_disk(rbd_dev); 2496 rbd_free_disk(rbd_dev);
2320 unregister_blkdev(rbd_dev->major, rbd_dev->name); 2497 unregister_blkdev(rbd_dev->major, rbd_dev->name);
2498
2499 /* done with the id, and with the rbd_dev */
2500 rbd_id_put(rbd_dev);
2321 kfree(rbd_dev); 2501 kfree(rbd_dev);
2322 2502
2323 /* release module ref */ 2503 /* release module ref */
@@ -2350,8 +2530,6 @@ static ssize_t rbd_remove(struct bus_type *bus,
2350 goto done; 2530 goto done;
2351 } 2531 }
2352 2532
2353 list_del_init(&rbd_dev->node);
2354
2355 __rbd_remove_all_snaps(rbd_dev); 2533 __rbd_remove_all_snaps(rbd_dev);
2356 rbd_bus_del_dev(rbd_dev); 2534 rbd_bus_del_dev(rbd_dev);
2357 2535
@@ -2365,7 +2543,7 @@ static ssize_t rbd_snap_add(struct device *dev,
2365 const char *buf, 2543 const char *buf,
2366 size_t count) 2544 size_t count)
2367{ 2545{
2368 struct rbd_device *rbd_dev = dev_to_rbd(dev); 2546 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
2369 int ret; 2547 int ret;
2370 char *name = kmalloc(count + 1, GFP_KERNEL); 2548 char *name = kmalloc(count + 1, GFP_KERNEL);
2371 if (!name) 2549 if (!name)
@@ -2401,12 +2579,6 @@ err_unlock:
2401 return ret; 2579 return ret;
2402} 2580}
2403 2581
2404static struct bus_attribute rbd_bus_attrs[] = {
2405 __ATTR(add, S_IWUSR, NULL, rbd_add),
2406 __ATTR(remove, S_IWUSR, NULL, rbd_remove),
2407 __ATTR_NULL
2408};
2409
2410/* 2582/*
2411 * create control files in sysfs 2583 * create control files in sysfs
2412 * /sys/bus/rbd/... 2584 * /sys/bus/rbd/...
@@ -2415,21 +2587,21 @@ static int rbd_sysfs_init(void)
2415{ 2587{
2416 int ret; 2588 int ret;
2417 2589
2418 rbd_bus_type.bus_attrs = rbd_bus_attrs; 2590 ret = device_register(&rbd_root_dev);
2419 2591 if (ret < 0)
2420 ret = bus_register(&rbd_bus_type);
2421 if (ret < 0)
2422 return ret; 2592 return ret;
2423 2593
2424 ret = device_register(&rbd_root_dev); 2594 ret = bus_register(&rbd_bus_type);
2595 if (ret < 0)
2596 device_unregister(&rbd_root_dev);
2425 2597
2426 return ret; 2598 return ret;
2427} 2599}
2428 2600
2429static void rbd_sysfs_cleanup(void) 2601static void rbd_sysfs_cleanup(void)
2430{ 2602{
2431 device_unregister(&rbd_root_dev);
2432 bus_unregister(&rbd_bus_type); 2603 bus_unregister(&rbd_bus_type);
2604 device_unregister(&rbd_root_dev);
2433} 2605}
2434 2606
2435int __init rbd_init(void) 2607int __init rbd_init(void)
@@ -2439,8 +2611,7 @@ int __init rbd_init(void)
2439 rc = rbd_sysfs_init(); 2611 rc = rbd_sysfs_init();
2440 if (rc) 2612 if (rc)
2441 return rc; 2613 return rc;
2442 spin_lock_init(&node_lock); 2614 pr_info("loaded " RBD_DRV_NAME_LONG "\n");
2443 pr_info("loaded " DRV_NAME_LONG "\n");
2444 return 0; 2615 return 0;
2445} 2616}
2446 2617
diff --git a/drivers/block/rbd_types.h b/drivers/block/rbd_types.h
index fc6c678aa2cb..950708688f17 100644
--- a/drivers/block/rbd_types.h
+++ b/drivers/block/rbd_types.h
@@ -41,10 +41,6 @@
41#define RBD_HEADER_SIGNATURE "RBD" 41#define RBD_HEADER_SIGNATURE "RBD"
42#define RBD_HEADER_VERSION "001.005" 42#define RBD_HEADER_VERSION "001.005"
43 43
44struct rbd_info {
45 __le64 max_id;
46} __attribute__ ((packed));
47
48struct rbd_image_snap_ondisk { 44struct rbd_image_snap_ondisk {
49 __le64 id; 45 __le64 id;
50 __le64 image_size; 46 __le64 image_size;
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 48e8fee9f2d4..9dcf76a10bb6 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -839,10 +839,7 @@ static struct vio_driver vdc_port_driver = {
839 .id_table = vdc_port_match, 839 .id_table = vdc_port_match,
840 .probe = vdc_port_probe, 840 .probe = vdc_port_probe,
841 .remove = vdc_port_remove, 841 .remove = vdc_port_remove,
842 .driver = { 842 .name = "vdc_port",
843 .name = "vdc_port",
844 .owner = THIS_MODULE,
845 }
846}; 843};
847 844
848static int __init vdc_init(void) 845static int __init vdc_init(void)
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index b70f0fca9a42..3fb6ab4c8b4e 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -619,8 +619,10 @@ static int carm_array_info (struct carm_host *host, unsigned int array_idx)
619 host->state == HST_DEV_SCAN); 619 host->state == HST_DEV_SCAN);
620 spin_unlock_irq(&host->lock); 620 spin_unlock_irq(&host->lock);
621 621
622 DPRINTK("blk_insert_request, tag == %u\n", idx); 622 DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx);
623 blk_insert_request(host->oob_q, crq->rq, 1, crq); 623 crq->rq->cmd_type = REQ_TYPE_SPECIAL;
624 crq->rq->special = crq;
625 blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL);
624 626
625 return 0; 627 return 0;
626 628
@@ -658,8 +660,10 @@ static int carm_send_special (struct carm_host *host, carm_sspc_t func)
658 BUG_ON(rc < 0); 660 BUG_ON(rc < 0);
659 crq->msg_bucket = (u32) rc; 661 crq->msg_bucket = (u32) rc;
660 662
661 DPRINTK("blk_insert_request, tag == %u\n", idx); 663 DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx);
662 blk_insert_request(host->oob_q, crq->rq, 1, crq); 664 crq->rq->cmd_type = REQ_TYPE_SPECIAL;
665 crq->rq->special = crq;
666 blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL);
663 667
664 return 0; 668 return 0;
665} 669}
@@ -1116,7 +1120,7 @@ static inline void carm_handle_resp(struct carm_host *host,
1116 break; 1120 break;
1117 case MISC_GET_FW_VER: { 1121 case MISC_GET_FW_VER: {
1118 struct carm_fw_ver *ver = (struct carm_fw_ver *) 1122 struct carm_fw_ver *ver = (struct carm_fw_ver *)
1119 mem + sizeof(struct carm_msg_get_fw_ver); 1123 (mem + sizeof(struct carm_msg_get_fw_ver));
1120 if (!error) { 1124 if (!error) {
1121 host->fw_ver = le32_to_cpu(ver->version); 1125 host->fw_ver = le32_to_cpu(ver->version);
1122 host->flags |= (ver->features & FL_FW_VER_MASK); 1126 host->flags |= (ver->features & FL_FW_VER_MASK);
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index 0e376d46bdd1..fcec0225ac76 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -119,43 +119,6 @@
119 119
120/* 120/*
121 */ 121 */
122
123/* command block wrapper */
124struct bulk_cb_wrap {
125 __le32 Signature; /* contains 'USBC' */
126 u32 Tag; /* unique per command id */
127 __le32 DataTransferLength; /* size of data */
128 u8 Flags; /* direction in bit 0 */
129 u8 Lun; /* LUN */
130 u8 Length; /* of of the CDB */
131 u8 CDB[UB_MAX_CDB_SIZE]; /* max command */
132};
133
134#define US_BULK_CB_WRAP_LEN 31
135#define US_BULK_CB_SIGN 0x43425355 /*spells out USBC */
136#define US_BULK_FLAG_IN 1
137#define US_BULK_FLAG_OUT 0
138
139/* command status wrapper */
140struct bulk_cs_wrap {
141 __le32 Signature; /* should = 'USBS' */
142 u32 Tag; /* same as original command */
143 __le32 Residue; /* amount not transferred */
144 u8 Status; /* see below */
145};
146
147#define US_BULK_CS_WRAP_LEN 13
148#define US_BULK_CS_SIGN 0x53425355 /* spells out 'USBS' */
149#define US_BULK_STAT_OK 0
150#define US_BULK_STAT_FAIL 1
151#define US_BULK_STAT_PHASE 2
152
153/* bulk-only class specific requests */
154#define US_BULK_RESET_REQUEST 0xff
155#define US_BULK_GET_MAX_LUN 0xfe
156
157/*
158 */
159struct ub_dev; 122struct ub_dev;
160 123
161#define UB_MAX_REQ_SG 9 /* cdrecord requires 32KB and maybe a header */ 124#define UB_MAX_REQ_SG 9 /* cdrecord requires 32KB and maybe a header */
@@ -1744,12 +1707,11 @@ static int ub_bd_release(struct gendisk *disk, fmode_t mode)
1744static int ub_bd_ioctl(struct block_device *bdev, fmode_t mode, 1707static int ub_bd_ioctl(struct block_device *bdev, fmode_t mode,
1745 unsigned int cmd, unsigned long arg) 1708 unsigned int cmd, unsigned long arg)
1746{ 1709{
1747 struct gendisk *disk = bdev->bd_disk;
1748 void __user *usermem = (void __user *) arg; 1710 void __user *usermem = (void __user *) arg;
1749 int ret; 1711 int ret;
1750 1712
1751 mutex_lock(&ub_mutex); 1713 mutex_lock(&ub_mutex);
1752 ret = scsi_cmd_ioctl(disk->queue, disk, mode, cmd, usermem); 1714 ret = scsi_cmd_blk_ioctl(bdev, mode, cmd, usermem);
1753 mutex_unlock(&ub_mutex); 1715 mutex_unlock(&ub_mutex);
1754 1716
1755 return ret; 1717 return ret;
@@ -2478,6 +2440,8 @@ static int __init ub_init(void)
2478 int rc; 2440 int rc;
2479 int i; 2441 int i;
2480 2442
2443 pr_info("'Low Performance USB Block' driver is deprecated. "
2444 "Please switch to usb-storage\n");
2481 for (i = 0; i < UB_QLOCK_NUM; i++) 2445 for (i = 0; i < UB_QLOCK_NUM; i++)
2482 spin_lock_init(&ub_qlockv[i]); 2446 spin_lock_init(&ub_qlockv[i]);
2483 2447
diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c
deleted file mode 100644
index 9a5b2a2d616d..000000000000
--- a/drivers/block/viodasd.c
+++ /dev/null
@@ -1,809 +0,0 @@
1/* -*- linux-c -*-
2 * viodasd.c
3 * Authors: Dave Boutcher <boutcher@us.ibm.com>
4 * Ryan Arnold <ryanarn@us.ibm.com>
5 * Colin Devilbiss <devilbis@us.ibm.com>
6 * Stephen Rothwell
7 *
8 * (C) Copyright 2000-2004 IBM Corporation
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License as
12 * published by the Free Software Foundation; either version 2 of the
13 * License, or (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 *
24 * This routine provides access to disk space (termed "DASD" in historical
25 * IBM terms) owned and managed by an OS/400 partition running on the
26 * same box as this Linux partition.
27 *
28 * All disk operations are performed by sending messages back and forth to
29 * the OS/400 partition.
30 */
31
32#define pr_fmt(fmt) "viod: " fmt
33
34#include <linux/major.h>
35#include <linux/fs.h>
36#include <linux/module.h>
37#include <linux/kernel.h>
38#include <linux/blkdev.h>
39#include <linux/genhd.h>
40#include <linux/hdreg.h>
41#include <linux/errno.h>
42#include <linux/init.h>
43#include <linux/string.h>
44#include <linux/mutex.h>
45#include <linux/dma-mapping.h>
46#include <linux/completion.h>
47#include <linux/device.h>
48#include <linux/scatterlist.h>
49
50#include <asm/uaccess.h>
51#include <asm/vio.h>
52#include <asm/iseries/hv_types.h>
53#include <asm/iseries/hv_lp_event.h>
54#include <asm/iseries/hv_lp_config.h>
55#include <asm/iseries/vio.h>
56#include <asm/firmware.h>
57
58MODULE_DESCRIPTION("iSeries Virtual DASD");
59MODULE_AUTHOR("Dave Boutcher");
60MODULE_LICENSE("GPL");
61
62/*
63 * We only support 7 partitions per physical disk....so with minor
64 * numbers 0-255 we get a maximum of 32 disks.
65 */
66#define VIOD_GENHD_NAME "iseries/vd"
67
68#define VIOD_VERS "1.64"
69
70enum {
71 PARTITION_SHIFT = 3,
72 MAX_DISKNO = HVMAXARCHITECTEDVIRTUALDISKS,
73 MAX_DISK_NAME = FIELD_SIZEOF(struct gendisk, disk_name)
74};
75
76static DEFINE_MUTEX(viodasd_mutex);
77static DEFINE_SPINLOCK(viodasd_spinlock);
78
79#define VIOMAXREQ 16
80
81#define DEVICE_NO(cell) ((struct viodasd_device *)(cell) - &viodasd_devices[0])
82
83struct viodasd_waitevent {
84 struct completion com;
85 int rc;
86 u16 sub_result;
87 int max_disk; /* open */
88};
89
90static const struct vio_error_entry viodasd_err_table[] = {
91 { 0x0201, EINVAL, "Invalid Range" },
92 { 0x0202, EINVAL, "Invalid Token" },
93 { 0x0203, EIO, "DMA Error" },
94 { 0x0204, EIO, "Use Error" },
95 { 0x0205, EIO, "Release Error" },
96 { 0x0206, EINVAL, "Invalid Disk" },
97 { 0x0207, EBUSY, "Can't Lock" },
98 { 0x0208, EIO, "Already Locked" },
99 { 0x0209, EIO, "Already Unlocked" },
100 { 0x020A, EIO, "Invalid Arg" },
101 { 0x020B, EIO, "Bad IFS File" },
102 { 0x020C, EROFS, "Read Only Device" },
103 { 0x02FF, EIO, "Internal Error" },
104 { 0x0000, 0, NULL },
105};
106
107/*
108 * Figure out the biggest I/O request (in sectors) we can accept
109 */
110#define VIODASD_MAXSECTORS (4096 / 512 * VIOMAXBLOCKDMA)
111
112/*
113 * Number of disk I/O requests we've sent to OS/400
114 */
115static int num_req_outstanding;
116
117/*
118 * This is our internal structure for keeping track of disk devices
119 */
120struct viodasd_device {
121 u16 cylinders;
122 u16 tracks;
123 u16 sectors;
124 u16 bytes_per_sector;
125 u64 size;
126 int read_only;
127 spinlock_t q_lock;
128 struct gendisk *disk;
129 struct device *dev;
130} viodasd_devices[MAX_DISKNO];
131
132/*
133 * External open entry point.
134 */
135static int viodasd_open(struct block_device *bdev, fmode_t mode)
136{
137 struct viodasd_device *d = bdev->bd_disk->private_data;
138 HvLpEvent_Rc hvrc;
139 struct viodasd_waitevent we;
140 u16 flags = 0;
141
142 if (d->read_only) {
143 if (mode & FMODE_WRITE)
144 return -EROFS;
145 flags = vioblockflags_ro;
146 }
147
148 init_completion(&we.com);
149
150 /* Send the open event to OS/400 */
151 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
152 HvLpEvent_Type_VirtualIo,
153 viomajorsubtype_blockio | vioblockopen,
154 HvLpEvent_AckInd_DoAck, HvLpEvent_AckType_ImmediateAck,
155 viopath_sourceinst(viopath_hostLp),
156 viopath_targetinst(viopath_hostLp),
157 (u64)(unsigned long)&we, VIOVERSION << 16,
158 ((u64)DEVICE_NO(d) << 48) | ((u64)flags << 32),
159 0, 0, 0);
160 if (hvrc != 0) {
161 pr_warning("HV open failed %d\n", (int)hvrc);
162 return -EIO;
163 }
164
165 wait_for_completion(&we.com);
166
167 /* Check the return code */
168 if (we.rc != 0) {
169 const struct vio_error_entry *err =
170 vio_lookup_rc(viodasd_err_table, we.sub_result);
171
172 pr_warning("bad rc opening disk: %d:0x%04x (%s)\n",
173 (int)we.rc, we.sub_result, err->msg);
174 return -EIO;
175 }
176
177 return 0;
178}
179
180static int viodasd_unlocked_open(struct block_device *bdev, fmode_t mode)
181{
182 int ret;
183
184 mutex_lock(&viodasd_mutex);
185 ret = viodasd_open(bdev, mode);
186 mutex_unlock(&viodasd_mutex);
187
188 return ret;
189}
190
191
192/*
193 * External release entry point.
194 */
195static int viodasd_release(struct gendisk *disk, fmode_t mode)
196{
197 struct viodasd_device *d = disk->private_data;
198 HvLpEvent_Rc hvrc;
199
200 mutex_lock(&viodasd_mutex);
201 /* Send the event to OS/400. We DON'T expect a response */
202 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
203 HvLpEvent_Type_VirtualIo,
204 viomajorsubtype_blockio | vioblockclose,
205 HvLpEvent_AckInd_NoAck, HvLpEvent_AckType_ImmediateAck,
206 viopath_sourceinst(viopath_hostLp),
207 viopath_targetinst(viopath_hostLp),
208 0, VIOVERSION << 16,
209 ((u64)DEVICE_NO(d) << 48) /* | ((u64)flags << 32) */,
210 0, 0, 0);
211 if (hvrc != 0)
212 pr_warning("HV close call failed %d\n", (int)hvrc);
213
214 mutex_unlock(&viodasd_mutex);
215
216 return 0;
217}
218
219
220/* External ioctl entry point.
221 */
222static int viodasd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
223{
224 struct gendisk *disk = bdev->bd_disk;
225 struct viodasd_device *d = disk->private_data;
226
227 geo->sectors = d->sectors ? d->sectors : 32;
228 geo->heads = d->tracks ? d->tracks : 64;
229 geo->cylinders = d->cylinders ? d->cylinders :
230 get_capacity(disk) / (geo->sectors * geo->heads);
231
232 return 0;
233}
234
235/*
236 * Our file operations table
237 */
238static const struct block_device_operations viodasd_fops = {
239 .owner = THIS_MODULE,
240 .open = viodasd_unlocked_open,
241 .release = viodasd_release,
242 .getgeo = viodasd_getgeo,
243};
244
245/*
246 * End a request
247 */
248static void viodasd_end_request(struct request *req, int error,
249 int num_sectors)
250{
251 __blk_end_request(req, error, num_sectors << 9);
252}
253
254/*
255 * Send an actual I/O request to OS/400
256 */
257static int send_request(struct request *req)
258{
259 u64 start;
260 int direction;
261 int nsg;
262 u16 viocmd;
263 HvLpEvent_Rc hvrc;
264 struct vioblocklpevent *bevent;
265 struct HvLpEvent *hev;
266 struct scatterlist sg[VIOMAXBLOCKDMA];
267 int sgindex;
268 struct viodasd_device *d;
269 unsigned long flags;
270
271 start = (u64)blk_rq_pos(req) << 9;
272
273 if (rq_data_dir(req) == READ) {
274 direction = DMA_FROM_DEVICE;
275 viocmd = viomajorsubtype_blockio | vioblockread;
276 } else {
277 direction = DMA_TO_DEVICE;
278 viocmd = viomajorsubtype_blockio | vioblockwrite;
279 }
280
281 d = req->rq_disk->private_data;
282
283 /* Now build the scatter-gather list */
284 sg_init_table(sg, VIOMAXBLOCKDMA);
285 nsg = blk_rq_map_sg(req->q, req, sg);
286 nsg = dma_map_sg(d->dev, sg, nsg, direction);
287
288 spin_lock_irqsave(&viodasd_spinlock, flags);
289 num_req_outstanding++;
290
291 /* This optimization handles a single DMA block */
292 if (nsg == 1)
293 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
294 HvLpEvent_Type_VirtualIo, viocmd,
295 HvLpEvent_AckInd_DoAck,
296 HvLpEvent_AckType_ImmediateAck,
297 viopath_sourceinst(viopath_hostLp),
298 viopath_targetinst(viopath_hostLp),
299 (u64)(unsigned long)req, VIOVERSION << 16,
300 ((u64)DEVICE_NO(d) << 48), start,
301 ((u64)sg_dma_address(&sg[0])) << 32,
302 sg_dma_len(&sg[0]));
303 else {
304 bevent = (struct vioblocklpevent *)
305 vio_get_event_buffer(viomajorsubtype_blockio);
306 if (bevent == NULL) {
307 pr_warning("error allocating disk event buffer\n");
308 goto error_ret;
309 }
310
311 /*
312 * Now build up the actual request. Note that we store
313 * the pointer to the request in the correlation
314 * token so we can match the response up later
315 */
316 memset(bevent, 0, sizeof(struct vioblocklpevent));
317 hev = &bevent->event;
318 hev->flags = HV_LP_EVENT_VALID | HV_LP_EVENT_DO_ACK |
319 HV_LP_EVENT_INT;
320 hev->xType = HvLpEvent_Type_VirtualIo;
321 hev->xSubtype = viocmd;
322 hev->xSourceLp = HvLpConfig_getLpIndex();
323 hev->xTargetLp = viopath_hostLp;
324 hev->xSizeMinus1 =
325 offsetof(struct vioblocklpevent, u.rw_data.dma_info) +
326 (sizeof(bevent->u.rw_data.dma_info[0]) * nsg) - 1;
327 hev->xSourceInstanceId = viopath_sourceinst(viopath_hostLp);
328 hev->xTargetInstanceId = viopath_targetinst(viopath_hostLp);
329 hev->xCorrelationToken = (u64)req;
330 bevent->version = VIOVERSION;
331 bevent->disk = DEVICE_NO(d);
332 bevent->u.rw_data.offset = start;
333
334 /*
335 * Copy just the dma information from the sg list
336 * into the request
337 */
338 for (sgindex = 0; sgindex < nsg; sgindex++) {
339 bevent->u.rw_data.dma_info[sgindex].token =
340 sg_dma_address(&sg[sgindex]);
341 bevent->u.rw_data.dma_info[sgindex].len =
342 sg_dma_len(&sg[sgindex]);
343 }
344
345 /* Send the request */
346 hvrc = HvCallEvent_signalLpEvent(&bevent->event);
347 vio_free_event_buffer(viomajorsubtype_blockio, bevent);
348 }
349
350 if (hvrc != HvLpEvent_Rc_Good) {
351 pr_warning("error sending disk event to OS/400 (rc %d)\n",
352 (int)hvrc);
353 goto error_ret;
354 }
355 spin_unlock_irqrestore(&viodasd_spinlock, flags);
356 return 0;
357
358error_ret:
359 num_req_outstanding--;
360 spin_unlock_irqrestore(&viodasd_spinlock, flags);
361 dma_unmap_sg(d->dev, sg, nsg, direction);
362 return -1;
363}
364
365/*
366 * This is the external request processing routine
367 */
368static void do_viodasd_request(struct request_queue *q)
369{
370 struct request *req;
371
372 /*
373 * If we already have the maximum number of requests
374 * outstanding to OS/400 just bail out. We'll come
375 * back later.
376 */
377 while (num_req_outstanding < VIOMAXREQ) {
378 req = blk_fetch_request(q);
379 if (req == NULL)
380 return;
381 /* check that request contains a valid command */
382 if (req->cmd_type != REQ_TYPE_FS) {
383 viodasd_end_request(req, -EIO, blk_rq_sectors(req));
384 continue;
385 }
386 /* Try sending the request */
387 if (send_request(req) != 0)
388 viodasd_end_request(req, -EIO, blk_rq_sectors(req));
389 }
390}
391
392/*
393 * Probe a single disk and fill in the viodasd_device structure
394 * for it.
395 */
396static int probe_disk(struct viodasd_device *d)
397{
398 HvLpEvent_Rc hvrc;
399 struct viodasd_waitevent we;
400 int dev_no = DEVICE_NO(d);
401 struct gendisk *g;
402 struct request_queue *q;
403 u16 flags = 0;
404
405retry:
406 init_completion(&we.com);
407
408 /* Send the open event to OS/400 */
409 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
410 HvLpEvent_Type_VirtualIo,
411 viomajorsubtype_blockio | vioblockopen,
412 HvLpEvent_AckInd_DoAck, HvLpEvent_AckType_ImmediateAck,
413 viopath_sourceinst(viopath_hostLp),
414 viopath_targetinst(viopath_hostLp),
415 (u64)(unsigned long)&we, VIOVERSION << 16,
416 ((u64)dev_no << 48) | ((u64)flags<< 32),
417 0, 0, 0);
418 if (hvrc != 0) {
419 pr_warning("bad rc on HV open %d\n", (int)hvrc);
420 return 0;
421 }
422
423 wait_for_completion(&we.com);
424
425 if (we.rc != 0) {
426 if (flags != 0)
427 return 0;
428 /* try again with read only flag set */
429 flags = vioblockflags_ro;
430 goto retry;
431 }
432 if (we.max_disk > (MAX_DISKNO - 1)) {
433 printk_once(KERN_INFO pr_fmt("Only examining the first %d of %d disks connected\n"),
434 MAX_DISKNO, we.max_disk + 1);
435 }
436
437 /* Send the close event to OS/400. We DON'T expect a response */
438 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
439 HvLpEvent_Type_VirtualIo,
440 viomajorsubtype_blockio | vioblockclose,
441 HvLpEvent_AckInd_NoAck, HvLpEvent_AckType_ImmediateAck,
442 viopath_sourceinst(viopath_hostLp),
443 viopath_targetinst(viopath_hostLp),
444 0, VIOVERSION << 16,
445 ((u64)dev_no << 48) | ((u64)flags << 32),
446 0, 0, 0);
447 if (hvrc != 0) {
448 pr_warning("bad rc sending event to OS/400 %d\n", (int)hvrc);
449 return 0;
450 }
451
452 if (d->dev == NULL) {
453 /* this is when we reprobe for new disks */
454 if (vio_create_viodasd(dev_no) == NULL) {
455 pr_warning("cannot allocate virtual device for disk %d\n",
456 dev_no);
457 return 0;
458 }
459 /*
460 * The vio_create_viodasd will have recursed into this
461 * routine with d->dev set to the new vio device and
462 * will finish the setup of the disk below.
463 */
464 return 1;
465 }
466
467 /* create the request queue for the disk */
468 spin_lock_init(&d->q_lock);
469 q = blk_init_queue(do_viodasd_request, &d->q_lock);
470 if (q == NULL) {
471 pr_warning("cannot allocate queue for disk %d\n", dev_no);
472 return 0;
473 }
474 g = alloc_disk(1 << PARTITION_SHIFT);
475 if (g == NULL) {
476 pr_warning("cannot allocate disk structure for disk %d\n",
477 dev_no);
478 blk_cleanup_queue(q);
479 return 0;
480 }
481
482 d->disk = g;
483 blk_queue_max_segments(q, VIOMAXBLOCKDMA);
484 blk_queue_max_hw_sectors(q, VIODASD_MAXSECTORS);
485 g->major = VIODASD_MAJOR;
486 g->first_minor = dev_no << PARTITION_SHIFT;
487 if (dev_no >= 26)
488 snprintf(g->disk_name, sizeof(g->disk_name),
489 VIOD_GENHD_NAME "%c%c",
490 'a' + (dev_no / 26) - 1, 'a' + (dev_no % 26));
491 else
492 snprintf(g->disk_name, sizeof(g->disk_name),
493 VIOD_GENHD_NAME "%c", 'a' + (dev_no % 26));
494 g->fops = &viodasd_fops;
495 g->queue = q;
496 g->private_data = d;
497 g->driverfs_dev = d->dev;
498 set_capacity(g, d->size >> 9);
499
500 pr_info("disk %d: %lu sectors (%lu MB) CHS=%d/%d/%d sector size %d%s\n",
501 dev_no, (unsigned long)(d->size >> 9),
502 (unsigned long)(d->size >> 20),
503 (int)d->cylinders, (int)d->tracks,
504 (int)d->sectors, (int)d->bytes_per_sector,
505 d->read_only ? " (RO)" : "");
506
507 /* register us in the global list */
508 add_disk(g);
509 return 1;
510}
511
512/* returns the total number of scatterlist elements converted */
513static int block_event_to_scatterlist(const struct vioblocklpevent *bevent,
514 struct scatterlist *sg, int *total_len)
515{
516 int i, numsg;
517 const struct rw_data *rw_data = &bevent->u.rw_data;
518 static const int offset =
519 offsetof(struct vioblocklpevent, u.rw_data.dma_info);
520 static const int element_size = sizeof(rw_data->dma_info[0]);
521
522 numsg = ((bevent->event.xSizeMinus1 + 1) - offset) / element_size;
523 if (numsg > VIOMAXBLOCKDMA)
524 numsg = VIOMAXBLOCKDMA;
525
526 *total_len = 0;
527 sg_init_table(sg, VIOMAXBLOCKDMA);
528 for (i = 0; (i < numsg) && (rw_data->dma_info[i].len > 0); ++i) {
529 sg_dma_address(&sg[i]) = rw_data->dma_info[i].token;
530 sg_dma_len(&sg[i]) = rw_data->dma_info[i].len;
531 *total_len += rw_data->dma_info[i].len;
532 }
533 return i;
534}
535
536/*
537 * Restart all queues, starting with the one _after_ the disk given,
538 * thus reducing the chance of starvation of higher numbered disks.
539 */
540static void viodasd_restart_all_queues_starting_from(int first_index)
541{
542 int i;
543
544 for (i = first_index + 1; i < MAX_DISKNO; ++i)
545 if (viodasd_devices[i].disk)
546 blk_run_queue(viodasd_devices[i].disk->queue);
547 for (i = 0; i <= first_index; ++i)
548 if (viodasd_devices[i].disk)
549 blk_run_queue(viodasd_devices[i].disk->queue);
550}
551
552/*
553 * For read and write requests, decrement the number of outstanding requests,
554 * Free the DMA buffers we allocated.
555 */
556static int viodasd_handle_read_write(struct vioblocklpevent *bevent)
557{
558 int num_sg, num_sect, pci_direction, total_len;
559 struct request *req;
560 struct scatterlist sg[VIOMAXBLOCKDMA];
561 struct HvLpEvent *event = &bevent->event;
562 unsigned long irq_flags;
563 struct viodasd_device *d;
564 int error;
565 spinlock_t *qlock;
566
567 num_sg = block_event_to_scatterlist(bevent, sg, &total_len);
568 num_sect = total_len >> 9;
569 if (event->xSubtype == (viomajorsubtype_blockio | vioblockread))
570 pci_direction = DMA_FROM_DEVICE;
571 else
572 pci_direction = DMA_TO_DEVICE;
573 req = (struct request *)bevent->event.xCorrelationToken;
574 d = req->rq_disk->private_data;
575
576 dma_unmap_sg(d->dev, sg, num_sg, pci_direction);
577
578 /*
579 * Since this is running in interrupt mode, we need to make sure
580 * we're not stepping on any global I/O operations
581 */
582 spin_lock_irqsave(&viodasd_spinlock, irq_flags);
583 num_req_outstanding--;
584 spin_unlock_irqrestore(&viodasd_spinlock, irq_flags);
585
586 error = (event->xRc == HvLpEvent_Rc_Good) ? 0 : -EIO;
587 if (error) {
588 const struct vio_error_entry *err;
589 err = vio_lookup_rc(viodasd_err_table, bevent->sub_result);
590 pr_warning("read/write error %d:0x%04x (%s)\n",
591 event->xRc, bevent->sub_result, err->msg);
592 num_sect = blk_rq_sectors(req);
593 }
594 qlock = req->q->queue_lock;
595 spin_lock_irqsave(qlock, irq_flags);
596 viodasd_end_request(req, error, num_sect);
597 spin_unlock_irqrestore(qlock, irq_flags);
598
599 /* Finally, try to get more requests off of this device's queue */
600 viodasd_restart_all_queues_starting_from(DEVICE_NO(d));
601
602 return 0;
603}
604
605/* This routine handles incoming block LP events */
606static void handle_block_event(struct HvLpEvent *event)
607{
608 struct vioblocklpevent *bevent = (struct vioblocklpevent *)event;
609 struct viodasd_waitevent *pwe;
610
611 if (event == NULL)
612 /* Notification that a partition went away! */
613 return;
614 /* First, we should NEVER get an int here...only acks */
615 if (hvlpevent_is_int(event)) {
616 pr_warning("Yikes! got an int in viodasd event handler!\n");
617 if (hvlpevent_need_ack(event)) {
618 event->xRc = HvLpEvent_Rc_InvalidSubtype;
619 HvCallEvent_ackLpEvent(event);
620 }
621 }
622
623 switch (event->xSubtype & VIOMINOR_SUBTYPE_MASK) {
624 case vioblockopen:
625 /*
626 * Handle a response to an open request. We get all the
627 * disk information in the response, so update it. The
628 * correlation token contains a pointer to a waitevent
629 * structure that has a completion in it. update the
630 * return code in the waitevent structure and post the
631 * completion to wake up the guy who sent the request
632 */
633 pwe = (struct viodasd_waitevent *)event->xCorrelationToken;
634 pwe->rc = event->xRc;
635 pwe->sub_result = bevent->sub_result;
636 if (event->xRc == HvLpEvent_Rc_Good) {
637 const struct open_data *data = &bevent->u.open_data;
638 struct viodasd_device *device =
639 &viodasd_devices[bevent->disk];
640 device->read_only =
641 bevent->flags & vioblockflags_ro;
642 device->size = data->disk_size;
643 device->cylinders = data->cylinders;
644 device->tracks = data->tracks;
645 device->sectors = data->sectors;
646 device->bytes_per_sector = data->bytes_per_sector;
647 pwe->max_disk = data->max_disk;
648 }
649 complete(&pwe->com);
650 break;
651 case vioblockclose:
652 break;
653 case vioblockread:
654 case vioblockwrite:
655 viodasd_handle_read_write(bevent);
656 break;
657
658 default:
659 pr_warning("invalid subtype!");
660 if (hvlpevent_need_ack(event)) {
661 event->xRc = HvLpEvent_Rc_InvalidSubtype;
662 HvCallEvent_ackLpEvent(event);
663 }
664 }
665}
666
667/*
668 * Get the driver to reprobe for more disks.
669 */
670static ssize_t probe_disks(struct device_driver *drv, const char *buf,
671 size_t count)
672{
673 struct viodasd_device *d;
674
675 for (d = viodasd_devices; d < &viodasd_devices[MAX_DISKNO]; d++) {
676 if (d->disk == NULL)
677 probe_disk(d);
678 }
679 return count;
680}
681static DRIVER_ATTR(probe, S_IWUSR, NULL, probe_disks);
682
683static int viodasd_probe(struct vio_dev *vdev, const struct vio_device_id *id)
684{
685 struct viodasd_device *d = &viodasd_devices[vdev->unit_address];
686
687 d->dev = &vdev->dev;
688 if (!probe_disk(d))
689 return -ENODEV;
690 return 0;
691}
692
693static int viodasd_remove(struct vio_dev *vdev)
694{
695 struct viodasd_device *d;
696
697 d = &viodasd_devices[vdev->unit_address];
698 if (d->disk) {
699 del_gendisk(d->disk);
700 blk_cleanup_queue(d->disk->queue);
701 put_disk(d->disk);
702 d->disk = NULL;
703 }
704 d->dev = NULL;
705 return 0;
706}
707
708/**
709 * viodasd_device_table: Used by vio.c to match devices that we
710 * support.
711 */
712static struct vio_device_id viodasd_device_table[] __devinitdata = {
713 { "block", "IBM,iSeries-viodasd" },
714 { "", "" }
715};
716MODULE_DEVICE_TABLE(vio, viodasd_device_table);
717
718static struct vio_driver viodasd_driver = {
719 .id_table = viodasd_device_table,
720 .probe = viodasd_probe,
721 .remove = viodasd_remove,
722 .driver = {
723 .name = "viodasd",
724 .owner = THIS_MODULE,
725 }
726};
727
728static int need_delete_probe;
729
730/*
731 * Initialize the whole device driver. Handle module and non-module
732 * versions
733 */
734static int __init viodasd_init(void)
735{
736 int rc;
737
738 if (!firmware_has_feature(FW_FEATURE_ISERIES)) {
739 rc = -ENODEV;
740 goto early_fail;
741 }
742
743 /* Try to open to our host lp */
744 if (viopath_hostLp == HvLpIndexInvalid)
745 vio_set_hostlp();
746
747 if (viopath_hostLp == HvLpIndexInvalid) {
748 pr_warning("invalid hosting partition\n");
749 rc = -EIO;
750 goto early_fail;
751 }
752
753 pr_info("vers " VIOD_VERS ", hosting partition %d\n", viopath_hostLp);
754
755 /* register the block device */
756 rc = register_blkdev(VIODASD_MAJOR, VIOD_GENHD_NAME);
757 if (rc) {
758 pr_warning("Unable to get major number %d for %s\n",
759 VIODASD_MAJOR, VIOD_GENHD_NAME);
760 goto early_fail;
761 }
762 /* Actually open the path to the hosting partition */
763 rc = viopath_open(viopath_hostLp, viomajorsubtype_blockio,
764 VIOMAXREQ + 2);
765 if (rc) {
766 pr_warning("error opening path to host partition %d\n",
767 viopath_hostLp);
768 goto unregister_blk;
769 }
770
771 /* Initialize our request handler */
772 vio_setHandler(viomajorsubtype_blockio, handle_block_event);
773
774 rc = vio_register_driver(&viodasd_driver);
775 if (rc) {
776 pr_warning("vio_register_driver failed\n");
777 goto unset_handler;
778 }
779
780 /*
781 * If this call fails, it just means that we cannot dynamically
782 * add virtual disks, but the driver will still work fine for
783 * all existing disk, so ignore the failure.
784 */
785 if (!driver_create_file(&viodasd_driver.driver, &driver_attr_probe))
786 need_delete_probe = 1;
787
788 return 0;
789
790unset_handler:
791 vio_clearHandler(viomajorsubtype_blockio);
792 viopath_close(viopath_hostLp, viomajorsubtype_blockio, VIOMAXREQ + 2);
793unregister_blk:
794 unregister_blkdev(VIODASD_MAJOR, VIOD_GENHD_NAME);
795early_fail:
796 return rc;
797}
798module_init(viodasd_init);
799
800void __exit viodasd_exit(void)
801{
802 if (need_delete_probe)
803 driver_remove_file(&viodasd_driver.driver, &driver_attr_probe);
804 vio_unregister_driver(&viodasd_driver);
805 vio_clearHandler(viomajorsubtype_blockio);
806 viopath_close(viopath_hostLp, viomajorsubtype_blockio, VIOMAXREQ + 2);
807 unregister_blkdev(VIODASD_MAJOR, VIOD_GENHD_NAME);
808}
809module_exit(viodasd_exit);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 4d0b70adf5f7..c4a60badf252 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -4,6 +4,7 @@
4#include <linux/blkdev.h> 4#include <linux/blkdev.h>
5#include <linux/hdreg.h> 5#include <linux/hdreg.h>
6#include <linux/module.h> 6#include <linux/module.h>
7#include <linux/mutex.h>
7#include <linux/virtio.h> 8#include <linux/virtio.h>
8#include <linux/virtio_blk.h> 9#include <linux/virtio_blk.h>
9#include <linux/scatterlist.h> 10#include <linux/scatterlist.h>
@@ -36,6 +37,12 @@ struct virtio_blk
36 /* Process context for config space updates */ 37 /* Process context for config space updates */
37 struct work_struct config_work; 38 struct work_struct config_work;
38 39
40 /* Lock for config space updates */
41 struct mutex config_lock;
42
43 /* enable config space updates */
44 bool config_enable;
45
39 /* What host tells us, plus 2 for header & tailer. */ 46 /* What host tells us, plus 2 for header & tailer. */
40 unsigned int sg_elems; 47 unsigned int sg_elems;
41 48
@@ -172,7 +179,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
172 } 179 }
173 } 180 }
174 181
175 if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr) < 0) { 182 if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr, GFP_ATOMIC)<0) {
176 mempool_free(vbr, vblk->pool); 183 mempool_free(vbr, vblk->pool);
177 return false; 184 return false;
178 } 185 }
@@ -243,8 +250,8 @@ static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
243 if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI)) 250 if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI))
244 return -ENOTTY; 251 return -ENOTTY;
245 252
246 return scsi_cmd_ioctl(disk->queue, disk, mode, cmd, 253 return scsi_cmd_blk_ioctl(bdev, mode, cmd,
247 (void __user *)data); 254 (void __user *)data);
248} 255}
249 256
250/* We provide getgeo only to please some old bootloader/partitioning tools */ 257/* We provide getgeo only to please some old bootloader/partitioning tools */
@@ -318,6 +325,10 @@ static void virtblk_config_changed_work(struct work_struct *work)
318 char cap_str_2[10], cap_str_10[10]; 325 char cap_str_2[10], cap_str_10[10];
319 u64 capacity, size; 326 u64 capacity, size;
320 327
328 mutex_lock(&vblk->config_lock);
329 if (!vblk->config_enable)
330 goto done;
331
321 /* Host must always specify the capacity. */ 332 /* Host must always specify the capacity. */
322 vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity), 333 vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
323 &capacity, sizeof(capacity)); 334 &capacity, sizeof(capacity));
@@ -340,6 +351,8 @@ static void virtblk_config_changed_work(struct work_struct *work)
340 cap_str_10, cap_str_2); 351 cap_str_10, cap_str_2);
341 352
342 set_capacity(vblk->disk, capacity); 353 set_capacity(vblk->disk, capacity);
354done:
355 mutex_unlock(&vblk->config_lock);
343} 356}
344 357
345static void virtblk_config_changed(struct virtio_device *vdev) 358static void virtblk_config_changed(struct virtio_device *vdev)
@@ -349,6 +362,18 @@ static void virtblk_config_changed(struct virtio_device *vdev)
349 queue_work(virtblk_wq, &vblk->config_work); 362 queue_work(virtblk_wq, &vblk->config_work);
350} 363}
351 364
365static int init_vq(struct virtio_blk *vblk)
366{
367 int err = 0;
368
369 /* We expect one virtqueue, for output. */
370 vblk->vq = virtio_find_single_vq(vblk->vdev, blk_done, "requests");
371 if (IS_ERR(vblk->vq))
372 err = PTR_ERR(vblk->vq);
373
374 return err;
375}
376
352static int __devinit virtblk_probe(struct virtio_device *vdev) 377static int __devinit virtblk_probe(struct virtio_device *vdev)
353{ 378{
354 struct virtio_blk *vblk; 379 struct virtio_blk *vblk;
@@ -388,14 +413,13 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
388 vblk->vdev = vdev; 413 vblk->vdev = vdev;
389 vblk->sg_elems = sg_elems; 414 vblk->sg_elems = sg_elems;
390 sg_init_table(vblk->sg, vblk->sg_elems); 415 sg_init_table(vblk->sg, vblk->sg_elems);
416 mutex_init(&vblk->config_lock);
391 INIT_WORK(&vblk->config_work, virtblk_config_changed_work); 417 INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
418 vblk->config_enable = true;
392 419
393 /* We expect one virtqueue, for output. */ 420 err = init_vq(vblk);
394 vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests"); 421 if (err)
395 if (IS_ERR(vblk->vq)) {
396 err = PTR_ERR(vblk->vq);
397 goto out_free_vblk; 422 goto out_free_vblk;
398 }
399 423
400 vblk->pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req)); 424 vblk->pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req));
401 if (!vblk->pool) { 425 if (!vblk->pool) {
@@ -542,7 +566,10 @@ static void __devexit virtblk_remove(struct virtio_device *vdev)
542 struct virtio_blk *vblk = vdev->priv; 566 struct virtio_blk *vblk = vdev->priv;
543 int index = vblk->index; 567 int index = vblk->index;
544 568
545 flush_work(&vblk->config_work); 569 /* Prevent config work handler from accessing the device. */
570 mutex_lock(&vblk->config_lock);
571 vblk->config_enable = false;
572 mutex_unlock(&vblk->config_lock);
546 573
547 /* Nothing should be pending. */ 574 /* Nothing should be pending. */
548 BUG_ON(!list_empty(&vblk->reqs)); 575 BUG_ON(!list_empty(&vblk->reqs));
@@ -550,6 +577,8 @@ static void __devexit virtblk_remove(struct virtio_device *vdev)
550 /* Stop all the virtqueues. */ 577 /* Stop all the virtqueues. */
551 vdev->config->reset(vdev); 578 vdev->config->reset(vdev);
552 579
580 flush_work(&vblk->config_work);
581
553 del_gendisk(vblk->disk); 582 del_gendisk(vblk->disk);
554 blk_cleanup_queue(vblk->disk->queue); 583 blk_cleanup_queue(vblk->disk->queue);
555 put_disk(vblk->disk); 584 put_disk(vblk->disk);
@@ -559,6 +588,46 @@ static void __devexit virtblk_remove(struct virtio_device *vdev)
559 ida_simple_remove(&vd_index_ida, index); 588 ida_simple_remove(&vd_index_ida, index);
560} 589}
561 590
591#ifdef CONFIG_PM
592static int virtblk_freeze(struct virtio_device *vdev)
593{
594 struct virtio_blk *vblk = vdev->priv;
595
596 /* Ensure we don't receive any more interrupts */
597 vdev->config->reset(vdev);
598
599 /* Prevent config work handler from accessing the device. */
600 mutex_lock(&vblk->config_lock);
601 vblk->config_enable = false;
602 mutex_unlock(&vblk->config_lock);
603
604 flush_work(&vblk->config_work);
605
606 spin_lock_irq(vblk->disk->queue->queue_lock);
607 blk_stop_queue(vblk->disk->queue);
608 spin_unlock_irq(vblk->disk->queue->queue_lock);
609 blk_sync_queue(vblk->disk->queue);
610
611 vdev->config->del_vqs(vdev);
612 return 0;
613}
614
615static int virtblk_restore(struct virtio_device *vdev)
616{
617 struct virtio_blk *vblk = vdev->priv;
618 int ret;
619
620 vblk->config_enable = true;
621 ret = init_vq(vdev->priv);
622 if (!ret) {
623 spin_lock_irq(vblk->disk->queue->queue_lock);
624 blk_start_queue(vblk->disk->queue);
625 spin_unlock_irq(vblk->disk->queue->queue_lock);
626 }
627 return ret;
628}
629#endif
630
562static const struct virtio_device_id id_table[] = { 631static const struct virtio_device_id id_table[] = {
563 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, 632 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
564 { 0 }, 633 { 0 },
@@ -584,6 +653,10 @@ static struct virtio_driver __refdata virtio_blk = {
584 .probe = virtblk_probe, 653 .probe = virtblk_probe,
585 .remove = __devexit_p(virtblk_remove), 654 .remove = __devexit_p(virtblk_remove),
586 .config_changed = virtblk_config_changed, 655 .config_changed = virtblk_config_changed,
656#ifdef CONFIG_PM
657 .freeze = virtblk_freeze,
658 .restore = virtblk_restore,
659#endif
587}; 660};
588 661
589static int __init init(void) 662static int __init init(void)
diff --git a/drivers/block/xd.c b/drivers/block/xd.c
index 4abd2bcd20fb..ff540520bada 100644
--- a/drivers/block/xd.c
+++ b/drivers/block/xd.c
@@ -52,7 +52,6 @@
52#include <linux/io.h> 52#include <linux/io.h>
53#include <linux/gfp.h> 53#include <linux/gfp.h>
54 54
55#include <asm/system.h>
56#include <asm/uaccess.h> 55#include <asm/uaccess.h>
57#include <asm/dma.h> 56#include <asm/dma.h>
58 57
@@ -148,7 +147,7 @@ static volatile int xdc_busy;
148static struct timer_list xd_watchdog_int; 147static struct timer_list xd_watchdog_int;
149 148
150static volatile u_char xd_error; 149static volatile u_char xd_error;
151static int nodma = XD_DONT_USE_DMA; 150static bool nodma = XD_DONT_USE_DMA;
152 151
153static struct request_queue *xd_queue; 152static struct request_queue *xd_queue;
154 153
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 15ec4db194d1..0088bf60f368 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -39,9 +39,6 @@
39#include <linux/list.h> 39#include <linux/list.h>
40#include <linux/delay.h> 40#include <linux/delay.h>
41#include <linux/freezer.h> 41#include <linux/freezer.h>
42#include <linux/loop.h>
43#include <linux/falloc.h>
44#include <linux/fs.h>
45 42
46#include <xen/events.h> 43#include <xen/events.h>
47#include <xen/page.h> 44#include <xen/page.h>
@@ -362,7 +359,7 @@ static int xen_blkbk_map(struct blkif_request *req,
362{ 359{
363 struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 360 struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
364 int i; 361 int i;
365 int nseg = req->nr_segments; 362 int nseg = req->u.rw.nr_segments;
366 int ret = 0; 363 int ret = 0;
367 364
368 /* 365 /*
@@ -416,30 +413,25 @@ static int xen_blkbk_map(struct blkif_request *req,
416 return ret; 413 return ret;
417} 414}
418 415
419static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req) 416static int dispatch_discard_io(struct xen_blkif *blkif,
417 struct blkif_request *req)
420{ 418{
421 int err = 0; 419 int err = 0;
422 int status = BLKIF_RSP_OKAY; 420 int status = BLKIF_RSP_OKAY;
423 struct block_device *bdev = blkif->vbd.bdev; 421 struct block_device *bdev = blkif->vbd.bdev;
424 422
425 if (blkif->blk_backend_type == BLKIF_BACKEND_PHY) 423 blkif->st_ds_req++;
426 /* just forward the discard request */ 424
425 xen_blkif_get(blkif);
426 if (blkif->blk_backend_type == BLKIF_BACKEND_PHY ||
427 blkif->blk_backend_type == BLKIF_BACKEND_FILE) {
428 unsigned long secure = (blkif->vbd.discard_secure &&
429 (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ?
430 BLKDEV_DISCARD_SECURE : 0;
427 err = blkdev_issue_discard(bdev, 431 err = blkdev_issue_discard(bdev,
428 req->u.discard.sector_number, 432 req->u.discard.sector_number,
429 req->u.discard.nr_sectors, 433 req->u.discard.nr_sectors,
430 GFP_KERNEL, 0); 434 GFP_KERNEL, secure);
431 else if (blkif->blk_backend_type == BLKIF_BACKEND_FILE) {
432 /* punch a hole in the backing file */
433 struct loop_device *lo = bdev->bd_disk->private_data;
434 struct file *file = lo->lo_backing_file;
435
436 if (file->f_op->fallocate)
437 err = file->f_op->fallocate(file,
438 FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
439 req->u.discard.sector_number << 9,
440 req->u.discard.nr_sectors << 9);
441 else
442 err = -EOPNOTSUPP;
443 } else 435 } else
444 err = -EOPNOTSUPP; 436 err = -EOPNOTSUPP;
445 437
@@ -449,7 +441,9 @@ static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req)
449 } else if (err) 441 } else if (err)
450 status = BLKIF_RSP_ERROR; 442 status = BLKIF_RSP_ERROR;
451 443
452 make_response(blkif, req->id, req->operation, status); 444 make_response(blkif, req->u.discard.id, req->operation, status);
445 xen_blkif_put(blkif);
446 return err;
453} 447}
454 448
455static void xen_blk_drain_io(struct xen_blkif *blkif) 449static void xen_blk_drain_io(struct xen_blkif *blkif)
@@ -573,8 +567,11 @@ __do_block_io_op(struct xen_blkif *blkif)
573 567
574 /* Apply all sanity checks to /private copy/ of request. */ 568 /* Apply all sanity checks to /private copy/ of request. */
575 barrier(); 569 barrier();
576 570 if (unlikely(req.operation == BLKIF_OP_DISCARD)) {
577 if (dispatch_rw_block_io(blkif, &req, pending_req)) 571 free_req(pending_req);
572 if (dispatch_discard_io(blkif, &req))
573 break;
574 } else if (dispatch_rw_block_io(blkif, &req, pending_req))
578 break; 575 break;
579 576
580 /* Yield point for this unbounded loop. */ 577 /* Yield point for this unbounded loop. */
@@ -633,10 +630,6 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
633 blkif->st_f_req++; 630 blkif->st_f_req++;
634 operation = WRITE_FLUSH; 631 operation = WRITE_FLUSH;
635 break; 632 break;
636 case BLKIF_OP_DISCARD:
637 blkif->st_ds_req++;
638 operation = REQ_DISCARD;
639 break;
640 default: 633 default:
641 operation = 0; /* make gcc happy */ 634 operation = 0; /* make gcc happy */
642 goto fail_response; 635 goto fail_response;
@@ -644,9 +637,9 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
644 } 637 }
645 638
646 /* Check that the number of segments is sane. */ 639 /* Check that the number of segments is sane. */
647 nseg = req->nr_segments; 640 nseg = req->u.rw.nr_segments;
648 if (unlikely(nseg == 0 && operation != WRITE_FLUSH && 641
649 operation != REQ_DISCARD) || 642 if (unlikely(nseg == 0 && operation != WRITE_FLUSH) ||
650 unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { 643 unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
651 pr_debug(DRV_PFX "Bad number of segments in request (%d)\n", 644 pr_debug(DRV_PFX "Bad number of segments in request (%d)\n",
652 nseg); 645 nseg);
@@ -654,12 +647,12 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
654 goto fail_response; 647 goto fail_response;
655 } 648 }
656 649
657 preq.dev = req->handle; 650 preq.dev = req->u.rw.handle;
658 preq.sector_number = req->u.rw.sector_number; 651 preq.sector_number = req->u.rw.sector_number;
659 preq.nr_sects = 0; 652 preq.nr_sects = 0;
660 653
661 pending_req->blkif = blkif; 654 pending_req->blkif = blkif;
662 pending_req->id = req->id; 655 pending_req->id = req->u.rw.id;
663 pending_req->operation = req->operation; 656 pending_req->operation = req->operation;
664 pending_req->status = BLKIF_RSP_OKAY; 657 pending_req->status = BLKIF_RSP_OKAY;
665 pending_req->nr_pages = nseg; 658 pending_req->nr_pages = nseg;
@@ -707,7 +700,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
707 * the hypercall to unmap the grants - that is all done in 700 * the hypercall to unmap the grants - that is all done in
708 * xen_blkbk_unmap. 701 * xen_blkbk_unmap.
709 */ 702 */
710 if (operation != REQ_DISCARD && xen_blkbk_map(req, pending_req, seg)) 703 if (xen_blkbk_map(req, pending_req, seg))
711 goto fail_flush; 704 goto fail_flush;
712 705
713 /* 706 /*
@@ -739,23 +732,16 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
739 732
740 /* This will be hit if the operation was a flush or discard. */ 733 /* This will be hit if the operation was a flush or discard. */
741 if (!bio) { 734 if (!bio) {
742 BUG_ON(operation != WRITE_FLUSH && operation != REQ_DISCARD); 735 BUG_ON(operation != WRITE_FLUSH);
743 736
744 if (operation == WRITE_FLUSH) { 737 bio = bio_alloc(GFP_KERNEL, 0);
745 bio = bio_alloc(GFP_KERNEL, 0); 738 if (unlikely(bio == NULL))
746 if (unlikely(bio == NULL)) 739 goto fail_put_bio;
747 goto fail_put_bio;
748 740
749 biolist[nbio++] = bio; 741 biolist[nbio++] = bio;
750 bio->bi_bdev = preq.bdev; 742 bio->bi_bdev = preq.bdev;
751 bio->bi_private = pending_req; 743 bio->bi_private = pending_req;
752 bio->bi_end_io = end_block_io_op; 744 bio->bi_end_io = end_block_io_op;
753 } else if (operation == REQ_DISCARD) {
754 xen_blk_discard(blkif, req);
755 xen_blkif_put(blkif);
756 free_req(pending_req);
757 return 0;
758 }
759 } 745 }
760 746
761 /* 747 /*
@@ -784,7 +770,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
784 xen_blkbk_unmap(pending_req); 770 xen_blkbk_unmap(pending_req);
785 fail_response: 771 fail_response:
786 /* Haven't submitted any bio's yet. */ 772 /* Haven't submitted any bio's yet. */
787 make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); 773 make_response(blkif, req->u.rw.id, req->operation, BLKIF_RSP_ERROR);
788 free_req(pending_req); 774 free_req(pending_req);
789 msleep(1); /* back off a bit */ 775 msleep(1); /* back off a bit */
790 return -EIO; 776 return -EIO;
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index dfb1b3a43a5d..d0ee7edc9be8 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -60,58 +60,66 @@ struct blkif_common_response {
60 char dummy; 60 char dummy;
61}; 61};
62 62
63/* i386 protocol version */
64#pragma pack(push, 4)
65
66struct blkif_x86_32_request_rw { 63struct blkif_x86_32_request_rw {
64 uint8_t nr_segments; /* number of segments */
65 blkif_vdev_t handle; /* only for read/write requests */
66 uint64_t id; /* private guest value, echoed in resp */
67 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ 67 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
68 struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 68 struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
69}; 69} __attribute__((__packed__));
70 70
71struct blkif_x86_32_request_discard { 71struct blkif_x86_32_request_discard {
72 uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */
73 blkif_vdev_t _pad1; /* was "handle" for read/write requests */
74 uint64_t id; /* private guest value, echoed in resp */
72 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ 75 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
73 uint64_t nr_sectors; 76 uint64_t nr_sectors;
74}; 77} __attribute__((__packed__));
75 78
76struct blkif_x86_32_request { 79struct blkif_x86_32_request {
77 uint8_t operation; /* BLKIF_OP_??? */ 80 uint8_t operation; /* BLKIF_OP_??? */
78 uint8_t nr_segments; /* number of segments */
79 blkif_vdev_t handle; /* only for read/write requests */
80 uint64_t id; /* private guest value, echoed in resp */
81 union { 81 union {
82 struct blkif_x86_32_request_rw rw; 82 struct blkif_x86_32_request_rw rw;
83 struct blkif_x86_32_request_discard discard; 83 struct blkif_x86_32_request_discard discard;
84 } u; 84 } u;
85}; 85} __attribute__((__packed__));
86
87/* i386 protocol version */
88#pragma pack(push, 4)
86struct blkif_x86_32_response { 89struct blkif_x86_32_response {
87 uint64_t id; /* copied from request */ 90 uint64_t id; /* copied from request */
88 uint8_t operation; /* copied from request */ 91 uint8_t operation; /* copied from request */
89 int16_t status; /* BLKIF_RSP_??? */ 92 int16_t status; /* BLKIF_RSP_??? */
90}; 93};
91#pragma pack(pop) 94#pragma pack(pop)
92
93/* x86_64 protocol version */ 95/* x86_64 protocol version */
94 96
95struct blkif_x86_64_request_rw { 97struct blkif_x86_64_request_rw {
98 uint8_t nr_segments; /* number of segments */
99 blkif_vdev_t handle; /* only for read/write requests */
100 uint32_t _pad1; /* offsetof(blkif_reqest..,u.rw.id)==8 */
101 uint64_t id;
96 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ 102 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
97 struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 103 struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
98}; 104} __attribute__((__packed__));
99 105
100struct blkif_x86_64_request_discard { 106struct blkif_x86_64_request_discard {
107 uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */
108 blkif_vdev_t _pad1; /* was "handle" for read/write requests */
109 uint32_t _pad2; /* offsetof(blkif_..,u.discard.id)==8 */
110 uint64_t id;
101 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ 111 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
102 uint64_t nr_sectors; 112 uint64_t nr_sectors;
103}; 113} __attribute__((__packed__));
104 114
105struct blkif_x86_64_request { 115struct blkif_x86_64_request {
106 uint8_t operation; /* BLKIF_OP_??? */ 116 uint8_t operation; /* BLKIF_OP_??? */
107 uint8_t nr_segments; /* number of segments */
108 blkif_vdev_t handle; /* only for read/write requests */
109 uint64_t __attribute__((__aligned__(8))) id;
110 union { 117 union {
111 struct blkif_x86_64_request_rw rw; 118 struct blkif_x86_64_request_rw rw;
112 struct blkif_x86_64_request_discard discard; 119 struct blkif_x86_64_request_discard discard;
113 } u; 120 } u;
114}; 121} __attribute__((__packed__));
122
115struct blkif_x86_64_response { 123struct blkif_x86_64_response {
116 uint64_t __attribute__((__aligned__(8))) id; 124 uint64_t __attribute__((__aligned__(8))) id;
117 uint8_t operation; /* copied from request */ 125 uint8_t operation; /* copied from request */
@@ -156,6 +164,7 @@ struct xen_vbd {
156 /* Cached size parameter. */ 164 /* Cached size parameter. */
157 sector_t size; 165 sector_t size;
158 bool flush_support; 166 bool flush_support;
167 bool discard_secure;
159}; 168};
160 169
161struct backend_info; 170struct backend_info;
@@ -237,22 +246,23 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst,
237{ 246{
238 int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; 247 int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
239 dst->operation = src->operation; 248 dst->operation = src->operation;
240 dst->nr_segments = src->nr_segments;
241 dst->handle = src->handle;
242 dst->id = src->id;
243 switch (src->operation) { 249 switch (src->operation) {
244 case BLKIF_OP_READ: 250 case BLKIF_OP_READ:
245 case BLKIF_OP_WRITE: 251 case BLKIF_OP_WRITE:
246 case BLKIF_OP_WRITE_BARRIER: 252 case BLKIF_OP_WRITE_BARRIER:
247 case BLKIF_OP_FLUSH_DISKCACHE: 253 case BLKIF_OP_FLUSH_DISKCACHE:
254 dst->u.rw.nr_segments = src->u.rw.nr_segments;
255 dst->u.rw.handle = src->u.rw.handle;
256 dst->u.rw.id = src->u.rw.id;
248 dst->u.rw.sector_number = src->u.rw.sector_number; 257 dst->u.rw.sector_number = src->u.rw.sector_number;
249 barrier(); 258 barrier();
250 if (n > dst->nr_segments) 259 if (n > dst->u.rw.nr_segments)
251 n = dst->nr_segments; 260 n = dst->u.rw.nr_segments;
252 for (i = 0; i < n; i++) 261 for (i = 0; i < n; i++)
253 dst->u.rw.seg[i] = src->u.rw.seg[i]; 262 dst->u.rw.seg[i] = src->u.rw.seg[i];
254 break; 263 break;
255 case BLKIF_OP_DISCARD: 264 case BLKIF_OP_DISCARD:
265 dst->u.discard.flag = src->u.discard.flag;
256 dst->u.discard.sector_number = src->u.discard.sector_number; 266 dst->u.discard.sector_number = src->u.discard.sector_number;
257 dst->u.discard.nr_sectors = src->u.discard.nr_sectors; 267 dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
258 break; 268 break;
@@ -266,22 +276,23 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst,
266{ 276{
267 int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; 277 int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
268 dst->operation = src->operation; 278 dst->operation = src->operation;
269 dst->nr_segments = src->nr_segments;
270 dst->handle = src->handle;
271 dst->id = src->id;
272 switch (src->operation) { 279 switch (src->operation) {
273 case BLKIF_OP_READ: 280 case BLKIF_OP_READ:
274 case BLKIF_OP_WRITE: 281 case BLKIF_OP_WRITE:
275 case BLKIF_OP_WRITE_BARRIER: 282 case BLKIF_OP_WRITE_BARRIER:
276 case BLKIF_OP_FLUSH_DISKCACHE: 283 case BLKIF_OP_FLUSH_DISKCACHE:
284 dst->u.rw.nr_segments = src->u.rw.nr_segments;
285 dst->u.rw.handle = src->u.rw.handle;
286 dst->u.rw.id = src->u.rw.id;
277 dst->u.rw.sector_number = src->u.rw.sector_number; 287 dst->u.rw.sector_number = src->u.rw.sector_number;
278 barrier(); 288 barrier();
279 if (n > dst->nr_segments) 289 if (n > dst->u.rw.nr_segments)
280 n = dst->nr_segments; 290 n = dst->u.rw.nr_segments;
281 for (i = 0; i < n; i++) 291 for (i = 0; i < n; i++)
282 dst->u.rw.seg[i] = src->u.rw.seg[i]; 292 dst->u.rw.seg[i] = src->u.rw.seg[i];
283 break; 293 break;
284 case BLKIF_OP_DISCARD: 294 case BLKIF_OP_DISCARD:
295 dst->u.discard.flag = src->u.discard.flag;
285 dst->u.discard.sector_number = src->u.discard.sector_number; 296 dst->u.discard.sector_number = src->u.discard.sector_number;
286 dst->u.discard.nr_sectors = src->u.discard.nr_sectors; 297 dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
287 break; 298 break;
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 37c794d31264..24a2fb57e5d0 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -338,6 +338,9 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
338 if (q && q->flush_flags) 338 if (q && q->flush_flags)
339 vbd->flush_support = true; 339 vbd->flush_support = true;
340 340
341 if (q && blk_queue_secdiscard(q))
342 vbd->discard_secure = true;
343
341 DPRINTK("Successful creation of handle=%04x (dom=%u)\n", 344 DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
342 handle, blkif->domid); 345 handle, blkif->domid);
343 return 0; 346 return 0;
@@ -420,6 +423,15 @@ int xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be)
420 state = 1; 423 state = 1;
421 blkif->blk_backend_type = BLKIF_BACKEND_PHY; 424 blkif->blk_backend_type = BLKIF_BACKEND_PHY;
422 } 425 }
426 /* Optional. */
427 err = xenbus_printf(xbt, dev->nodename,
428 "discard-secure", "%d",
429 blkif->vbd.discard_secure);
430 if (err) {
431 xenbus_dev_fatal(dev, err,
432 "writting discard-secure");
433 goto kfree;
434 }
423 } 435 }
424 } else { 436 } else {
425 err = PTR_ERR(type); 437 err = PTR_ERR(type);
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 19b6005a323e..98cbeba8cd53 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -98,7 +98,8 @@ struct blkfront_info
98 unsigned long shadow_free; 98 unsigned long shadow_free;
99 unsigned int feature_flush; 99 unsigned int feature_flush;
100 unsigned int flush_op; 100 unsigned int flush_op;
101 unsigned int feature_discard; 101 unsigned int feature_discard:1;
102 unsigned int feature_secdiscard:1;
102 unsigned int discard_granularity; 103 unsigned int discard_granularity;
103 unsigned int discard_alignment; 104 unsigned int discard_alignment;
104 int is_ready; 105 int is_ready;
@@ -135,15 +136,15 @@ static int get_id_from_freelist(struct blkfront_info *info)
135{ 136{
136 unsigned long free = info->shadow_free; 137 unsigned long free = info->shadow_free;
137 BUG_ON(free >= BLK_RING_SIZE); 138 BUG_ON(free >= BLK_RING_SIZE);
138 info->shadow_free = info->shadow[free].req.id; 139 info->shadow_free = info->shadow[free].req.u.rw.id;
139 info->shadow[free].req.id = 0x0fffffee; /* debug */ 140 info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
140 return free; 141 return free;
141} 142}
142 143
143static void add_id_to_freelist(struct blkfront_info *info, 144static void add_id_to_freelist(struct blkfront_info *info,
144 unsigned long id) 145 unsigned long id)
145{ 146{
146 info->shadow[id].req.id = info->shadow_free; 147 info->shadow[id].req.u.rw.id = info->shadow_free;
147 info->shadow[id].request = NULL; 148 info->shadow[id].request = NULL;
148 info->shadow_free = id; 149 info->shadow_free = id;
149} 150}
@@ -156,7 +157,7 @@ static int xlbd_reserve_minors(unsigned int minor, unsigned int nr)
156 if (end > nr_minors) { 157 if (end > nr_minors) {
157 unsigned long *bitmap, *old; 158 unsigned long *bitmap, *old;
158 159
159 bitmap = kzalloc(BITS_TO_LONGS(end) * sizeof(*bitmap), 160 bitmap = kcalloc(BITS_TO_LONGS(end), sizeof(*bitmap),
160 GFP_KERNEL); 161 GFP_KERNEL);
161 if (bitmap == NULL) 162 if (bitmap == NULL)
162 return -ENOMEM; 163 return -ENOMEM;
@@ -287,9 +288,9 @@ static int blkif_queue_request(struct request *req)
287 id = get_id_from_freelist(info); 288 id = get_id_from_freelist(info);
288 info->shadow[id].request = req; 289 info->shadow[id].request = req;
289 290
290 ring_req->id = id; 291 ring_req->u.rw.id = id;
291 ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req); 292 ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
292 ring_req->handle = info->handle; 293 ring_req->u.rw.handle = info->handle;
293 294
294 ring_req->operation = rq_data_dir(req) ? 295 ring_req->operation = rq_data_dir(req) ?
295 BLKIF_OP_WRITE : BLKIF_OP_READ; 296 BLKIF_OP_WRITE : BLKIF_OP_READ;
@@ -305,16 +306,21 @@ static int blkif_queue_request(struct request *req)
305 ring_req->operation = info->flush_op; 306 ring_req->operation = info->flush_op;
306 } 307 }
307 308
308 if (unlikely(req->cmd_flags & REQ_DISCARD)) { 309 if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE))) {
309 /* id, sector_number and handle are set above. */ 310 /* id, sector_number and handle are set above. */
310 ring_req->operation = BLKIF_OP_DISCARD; 311 ring_req->operation = BLKIF_OP_DISCARD;
311 ring_req->nr_segments = 0;
312 ring_req->u.discard.nr_sectors = blk_rq_sectors(req); 312 ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
313 if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard)
314 ring_req->u.discard.flag = BLKIF_DISCARD_SECURE;
315 else
316 ring_req->u.discard.flag = 0;
313 } else { 317 } else {
314 ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); 318 ring_req->u.rw.nr_segments = blk_rq_map_sg(req->q, req,
315 BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); 319 info->sg);
320 BUG_ON(ring_req->u.rw.nr_segments >
321 BLKIF_MAX_SEGMENTS_PER_REQUEST);
316 322
317 for_each_sg(info->sg, sg, ring_req->nr_segments, i) { 323 for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) {
318 buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg))); 324 buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
319 fsect = sg->offset >> 9; 325 fsect = sg->offset >> 9;
320 lsect = fsect + (sg->length >> 9) - 1; 326 lsect = fsect + (sg->length >> 9) - 1;
@@ -424,6 +430,8 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
424 blk_queue_max_discard_sectors(rq, get_capacity(gd)); 430 blk_queue_max_discard_sectors(rq, get_capacity(gd));
425 rq->limits.discard_granularity = info->discard_granularity; 431 rq->limits.discard_granularity = info->discard_granularity;
426 rq->limits.discard_alignment = info->discard_alignment; 432 rq->limits.discard_alignment = info->discard_alignment;
433 if (info->feature_secdiscard)
434 queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, rq);
427 } 435 }
428 436
429 /* Hard sector size and max sectors impersonate the equiv. hardware. */ 437 /* Hard sector size and max sectors impersonate the equiv. hardware. */
@@ -705,7 +713,9 @@ static void blkif_free(struct blkfront_info *info, int suspend)
705static void blkif_completion(struct blk_shadow *s) 713static void blkif_completion(struct blk_shadow *s)
706{ 714{
707 int i; 715 int i;
708 for (i = 0; i < s->req.nr_segments; i++) 716 /* Do not let BLKIF_OP_DISCARD as nr_segment is in the same place
717 * flag. */
718 for (i = 0; i < s->req.u.rw.nr_segments; i++)
709 gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL); 719 gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL);
710} 720}
711 721
@@ -736,7 +746,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
736 id = bret->id; 746 id = bret->id;
737 req = info->shadow[id].request; 747 req = info->shadow[id].request;
738 748
739 blkif_completion(&info->shadow[id]); 749 if (bret->operation != BLKIF_OP_DISCARD)
750 blkif_completion(&info->shadow[id]);
740 751
741 add_id_to_freelist(info, id); 752 add_id_to_freelist(info, id);
742 753
@@ -749,7 +760,9 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
749 info->gd->disk_name); 760 info->gd->disk_name);
750 error = -EOPNOTSUPP; 761 error = -EOPNOTSUPP;
751 info->feature_discard = 0; 762 info->feature_discard = 0;
763 info->feature_secdiscard = 0;
752 queue_flag_clear(QUEUE_FLAG_DISCARD, rq); 764 queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
765 queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq);
753 } 766 }
754 __blk_end_request_all(req, error); 767 __blk_end_request_all(req, error);
755 break; 768 break;
@@ -763,7 +776,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
763 error = -EOPNOTSUPP; 776 error = -EOPNOTSUPP;
764 } 777 }
765 if (unlikely(bret->status == BLKIF_RSP_ERROR && 778 if (unlikely(bret->status == BLKIF_RSP_ERROR &&
766 info->shadow[id].req.nr_segments == 0)) { 779 info->shadow[id].req.u.rw.nr_segments == 0)) {
767 printk(KERN_WARNING "blkfront: %s: empty write %s op failed\n", 780 printk(KERN_WARNING "blkfront: %s: empty write %s op failed\n",
768 info->flush_op == BLKIF_OP_WRITE_BARRIER ? 781 info->flush_op == BLKIF_OP_WRITE_BARRIER ?
769 "barrier" : "flush disk cache", 782 "barrier" : "flush disk cache",
@@ -984,8 +997,8 @@ static int blkfront_probe(struct xenbus_device *dev,
984 INIT_WORK(&info->work, blkif_restart_queue); 997 INIT_WORK(&info->work, blkif_restart_queue);
985 998
986 for (i = 0; i < BLK_RING_SIZE; i++) 999 for (i = 0; i < BLK_RING_SIZE; i++)
987 info->shadow[i].req.id = i+1; 1000 info->shadow[i].req.u.rw.id = i+1;
988 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; 1001 info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
989 1002
990 /* Front end dir is a number, which is used as the id. */ 1003 /* Front end dir is a number, which is used as the id. */
991 info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); 1004 info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
@@ -1019,9 +1032,9 @@ static int blkif_recover(struct blkfront_info *info)
1019 /* Stage 2: Set up free list. */ 1032 /* Stage 2: Set up free list. */
1020 memset(&info->shadow, 0, sizeof(info->shadow)); 1033 memset(&info->shadow, 0, sizeof(info->shadow));
1021 for (i = 0; i < BLK_RING_SIZE; i++) 1034 for (i = 0; i < BLK_RING_SIZE; i++)
1022 info->shadow[i].req.id = i+1; 1035 info->shadow[i].req.u.rw.id = i+1;
1023 info->shadow_free = info->ring.req_prod_pvt; 1036 info->shadow_free = info->ring.req_prod_pvt;
1024 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; 1037 info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
1025 1038
1026 /* Stage 3: Find pending requests and requeue them. */ 1039 /* Stage 3: Find pending requests and requeue them. */
1027 for (i = 0; i < BLK_RING_SIZE; i++) { 1040 for (i = 0; i < BLK_RING_SIZE; i++) {
@@ -1034,17 +1047,19 @@ static int blkif_recover(struct blkfront_info *info)
1034 *req = copy[i].req; 1047 *req = copy[i].req;
1035 1048
1036 /* We get a new request id, and must reset the shadow state. */ 1049 /* We get a new request id, and must reset the shadow state. */
1037 req->id = get_id_from_freelist(info); 1050 req->u.rw.id = get_id_from_freelist(info);
1038 memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i])); 1051 memcpy(&info->shadow[req->u.rw.id], &copy[i], sizeof(copy[i]));
1039 1052
1053 if (req->operation != BLKIF_OP_DISCARD) {
1040 /* Rewrite any grant references invalidated by susp/resume. */ 1054 /* Rewrite any grant references invalidated by susp/resume. */
1041 for (j = 0; j < req->nr_segments; j++) 1055 for (j = 0; j < req->u.rw.nr_segments; j++)
1042 gnttab_grant_foreign_access_ref( 1056 gnttab_grant_foreign_access_ref(
1043 req->u.rw.seg[j].gref, 1057 req->u.rw.seg[j].gref,
1044 info->xbdev->otherend_id, 1058 info->xbdev->otherend_id,
1045 pfn_to_mfn(info->shadow[req->id].frame[j]), 1059 pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]),
1046 rq_data_dir(info->shadow[req->id].request)); 1060 rq_data_dir(info->shadow[req->u.rw.id].request));
1047 info->shadow[req->id].req = *req; 1061 }
1062 info->shadow[req->u.rw.id].req = *req;
1048 1063
1049 info->ring.req_prod_pvt++; 1064 info->ring.req_prod_pvt++;
1050 } 1065 }
@@ -1135,11 +1150,13 @@ static void blkfront_setup_discard(struct blkfront_info *info)
1135 char *type; 1150 char *type;
1136 unsigned int discard_granularity; 1151 unsigned int discard_granularity;
1137 unsigned int discard_alignment; 1152 unsigned int discard_alignment;
1153 unsigned int discard_secure;
1138 1154
1139 type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL); 1155 type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL);
1140 if (IS_ERR(type)) 1156 if (IS_ERR(type))
1141 return; 1157 return;
1142 1158
1159 info->feature_secdiscard = 0;
1143 if (strncmp(type, "phy", 3) == 0) { 1160 if (strncmp(type, "phy", 3) == 0) {
1144 err = xenbus_gather(XBT_NIL, info->xbdev->otherend, 1161 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1145 "discard-granularity", "%u", &discard_granularity, 1162 "discard-granularity", "%u", &discard_granularity,
@@ -1150,6 +1167,12 @@ static void blkfront_setup_discard(struct blkfront_info *info)
1150 info->discard_granularity = discard_granularity; 1167 info->discard_granularity = discard_granularity;
1151 info->discard_alignment = discard_alignment; 1168 info->discard_alignment = discard_alignment;
1152 } 1169 }
1170 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1171 "discard-secure", "%d", &discard_secure,
1172 NULL);
1173 if (!err)
1174 info->feature_secdiscard = discard_secure;
1175
1153 } else if (strncmp(type, "file", 4) == 0) 1176 } else if (strncmp(type, "file", 4) == 0)
1154 info->feature_discard = 1; 1177 info->feature_discard = 1;
1155 1178