diff options
Diffstat (limited to 'drivers/block')
37 files changed, 6780 insertions, 1530 deletions
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index e086fbbbe853..8db9089127c5 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c | |||
@@ -1177,7 +1177,8 @@ static bool DAC960_V1_EnableMemoryMailboxInterface(DAC960_Controller_T | |||
1177 | int TimeoutCounter; | 1177 | int TimeoutCounter; |
1178 | int i; | 1178 | int i; |
1179 | 1179 | ||
1180 | 1180 | memset(&CommandMailbox, 0, sizeof(DAC960_V1_CommandMailbox_T)); | |
1181 | |||
1181 | if (pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(32))) | 1182 | if (pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(32))) |
1182 | return DAC960_Failure(Controller, "DMA mask out of range"); | 1183 | return DAC960_Failure(Controller, "DMA mask out of range"); |
1183 | Controller->BounceBufferLimit = DMA_BIT_MASK(32); | 1184 | Controller->BounceBufferLimit = DMA_BIT_MASK(32); |
@@ -4627,7 +4628,8 @@ static void DAC960_V2_ProcessCompletedCommand(DAC960_Command_T *Command) | |||
4627 | DAC960_Controller_T *Controller = Command->Controller; | 4628 | DAC960_Controller_T *Controller = Command->Controller; |
4628 | DAC960_CommandType_T CommandType = Command->CommandType; | 4629 | DAC960_CommandType_T CommandType = Command->CommandType; |
4629 | DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; | 4630 | DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; |
4630 | DAC960_V2_IOCTL_Opcode_T CommandOpcode = CommandMailbox->Common.IOCTL_Opcode; | 4631 | DAC960_V2_IOCTL_Opcode_T IOCTLOpcode = CommandMailbox->Common.IOCTL_Opcode; |
4632 | DAC960_V2_CommandOpcode_T CommandOpcode = CommandMailbox->SCSI_10.CommandOpcode; | ||
4631 | DAC960_V2_CommandStatus_T CommandStatus = Command->V2.CommandStatus; | 4633 | DAC960_V2_CommandStatus_T CommandStatus = Command->V2.CommandStatus; |
4632 | 4634 | ||
4633 | if (CommandType == DAC960_ReadCommand || | 4635 | if (CommandType == DAC960_ReadCommand || |
@@ -4699,7 +4701,7 @@ static void DAC960_V2_ProcessCompletedCommand(DAC960_Command_T *Command) | |||
4699 | { | 4701 | { |
4700 | if (Controller->ShutdownMonitoringTimer) | 4702 | if (Controller->ShutdownMonitoringTimer) |
4701 | return; | 4703 | return; |
4702 | if (CommandOpcode == DAC960_V2_GetControllerInfo) | 4704 | if (IOCTLOpcode == DAC960_V2_GetControllerInfo) |
4703 | { | 4705 | { |
4704 | DAC960_V2_ControllerInfo_T *NewControllerInfo = | 4706 | DAC960_V2_ControllerInfo_T *NewControllerInfo = |
4705 | Controller->V2.NewControllerInformation; | 4707 | Controller->V2.NewControllerInformation; |
@@ -4719,14 +4721,14 @@ static void DAC960_V2_ProcessCompletedCommand(DAC960_Command_T *Command) | |||
4719 | memcpy(ControllerInfo, NewControllerInfo, | 4721 | memcpy(ControllerInfo, NewControllerInfo, |
4720 | sizeof(DAC960_V2_ControllerInfo_T)); | 4722 | sizeof(DAC960_V2_ControllerInfo_T)); |
4721 | } | 4723 | } |
4722 | else if (CommandOpcode == DAC960_V2_GetEvent) | 4724 | else if (IOCTLOpcode == DAC960_V2_GetEvent) |
4723 | { | 4725 | { |
4724 | if (CommandStatus == DAC960_V2_NormalCompletion) { | 4726 | if (CommandStatus == DAC960_V2_NormalCompletion) { |
4725 | DAC960_V2_ReportEvent(Controller, Controller->V2.Event); | 4727 | DAC960_V2_ReportEvent(Controller, Controller->V2.Event); |
4726 | } | 4728 | } |
4727 | Controller->V2.NextEventSequenceNumber++; | 4729 | Controller->V2.NextEventSequenceNumber++; |
4728 | } | 4730 | } |
4729 | else if (CommandOpcode == DAC960_V2_GetPhysicalDeviceInfoValid && | 4731 | else if (IOCTLOpcode == DAC960_V2_GetPhysicalDeviceInfoValid && |
4730 | CommandStatus == DAC960_V2_NormalCompletion) | 4732 | CommandStatus == DAC960_V2_NormalCompletion) |
4731 | { | 4733 | { |
4732 | DAC960_V2_PhysicalDeviceInfo_T *NewPhysicalDeviceInfo = | 4734 | DAC960_V2_PhysicalDeviceInfo_T *NewPhysicalDeviceInfo = |
@@ -4915,7 +4917,7 @@ static void DAC960_V2_ProcessCompletedCommand(DAC960_Command_T *Command) | |||
4915 | NewPhysicalDeviceInfo->LogicalUnit++; | 4917 | NewPhysicalDeviceInfo->LogicalUnit++; |
4916 | Controller->V2.PhysicalDeviceIndex++; | 4918 | Controller->V2.PhysicalDeviceIndex++; |
4917 | } | 4919 | } |
4918 | else if (CommandOpcode == DAC960_V2_GetPhysicalDeviceInfoValid) | 4920 | else if (IOCTLOpcode == DAC960_V2_GetPhysicalDeviceInfoValid) |
4919 | { | 4921 | { |
4920 | unsigned int DeviceIndex; | 4922 | unsigned int DeviceIndex; |
4921 | for (DeviceIndex = Controller->V2.PhysicalDeviceIndex; | 4923 | for (DeviceIndex = Controller->V2.PhysicalDeviceIndex; |
@@ -4938,7 +4940,7 @@ static void DAC960_V2_ProcessCompletedCommand(DAC960_Command_T *Command) | |||
4938 | } | 4940 | } |
4939 | Controller->V2.NeedPhysicalDeviceInformation = false; | 4941 | Controller->V2.NeedPhysicalDeviceInformation = false; |
4940 | } | 4942 | } |
4941 | else if (CommandOpcode == DAC960_V2_GetLogicalDeviceInfoValid && | 4943 | else if (IOCTLOpcode == DAC960_V2_GetLogicalDeviceInfoValid && |
4942 | CommandStatus == DAC960_V2_NormalCompletion) | 4944 | CommandStatus == DAC960_V2_NormalCompletion) |
4943 | { | 4945 | { |
4944 | DAC960_V2_LogicalDeviceInfo_T *NewLogicalDeviceInfo = | 4946 | DAC960_V2_LogicalDeviceInfo_T *NewLogicalDeviceInfo = |
@@ -5065,7 +5067,7 @@ static void DAC960_V2_ProcessCompletedCommand(DAC960_Command_T *Command) | |||
5065 | [LogicalDeviceNumber] = true; | 5067 | [LogicalDeviceNumber] = true; |
5066 | NewLogicalDeviceInfo->LogicalDeviceNumber++; | 5068 | NewLogicalDeviceInfo->LogicalDeviceNumber++; |
5067 | } | 5069 | } |
5068 | else if (CommandOpcode == DAC960_V2_GetLogicalDeviceInfoValid) | 5070 | else if (IOCTLOpcode == DAC960_V2_GetLogicalDeviceInfoValid) |
5069 | { | 5071 | { |
5070 | int LogicalDriveNumber; | 5072 | int LogicalDriveNumber; |
5071 | for (LogicalDriveNumber = 0; | 5073 | for (LogicalDriveNumber = 0; |
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 6f07ec1c2f58..a796407123c7 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig | |||
@@ -116,6 +116,8 @@ config PARIDE | |||
116 | 116 | ||
117 | source "drivers/block/paride/Kconfig" | 117 | source "drivers/block/paride/Kconfig" |
118 | 118 | ||
119 | source "drivers/block/mtip32xx/Kconfig" | ||
120 | |||
119 | config BLK_CPQ_DA | 121 | config BLK_CPQ_DA |
120 | tristate "Compaq SMART2 support" | 122 | tristate "Compaq SMART2 support" |
121 | depends on PCI && VIRT_TO_BUS | 123 | depends on PCI && VIRT_TO_BUS |
@@ -315,6 +317,17 @@ config BLK_DEV_NBD | |||
315 | 317 | ||
316 | If unsure, say N. | 318 | If unsure, say N. |
317 | 319 | ||
320 | config BLK_DEV_NVME | ||
321 | tristate "NVM Express block device" | ||
322 | depends on PCI | ||
323 | ---help--- | ||
324 | The NVM Express driver is for solid state drives directly | ||
325 | connected to the PCI or PCI Express bus. If you know you | ||
326 | don't have one of these, it is safe to answer N. | ||
327 | |||
328 | To compile this driver as a module, choose M here: the | ||
329 | module will be called nvme. | ||
330 | |||
318 | config BLK_DEV_OSD | 331 | config BLK_DEV_OSD |
319 | tristate "OSD object-as-blkdev support" | 332 | tristate "OSD object-as-blkdev support" |
320 | depends on SCSI_OSD_ULD | 333 | depends on SCSI_OSD_ULD |
@@ -341,7 +354,7 @@ config BLK_DEV_SX8 | |||
341 | Use devices /dev/sx8/$N and /dev/sx8/$Np$M. | 354 | Use devices /dev/sx8/$N and /dev/sx8/$Np$M. |
342 | 355 | ||
343 | config BLK_DEV_UB | 356 | config BLK_DEV_UB |
344 | tristate "Low Performance USB Block driver" | 357 | tristate "Low Performance USB Block driver (deprecated)" |
345 | depends on USB | 358 | depends on USB |
346 | help | 359 | help |
347 | This driver supports certain USB attached storage devices | 360 | This driver supports certain USB attached storage devices |
diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 76646e9a1c91..5b795059f8fb 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile | |||
@@ -23,6 +23,7 @@ obj-$(CONFIG_XILINX_SYSACE) += xsysace.o | |||
23 | obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o | 23 | obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o |
24 | obj-$(CONFIG_MG_DISK) += mg_disk.o | 24 | obj-$(CONFIG_MG_DISK) += mg_disk.o |
25 | obj-$(CONFIG_SUNVDC) += sunvdc.o | 25 | obj-$(CONFIG_SUNVDC) += sunvdc.o |
26 | obj-$(CONFIG_BLK_DEV_NVME) += nvme.o | ||
26 | obj-$(CONFIG_BLK_DEV_OSD) += osdblk.o | 27 | obj-$(CONFIG_BLK_DEV_OSD) += osdblk.o |
27 | 28 | ||
28 | obj-$(CONFIG_BLK_DEV_UMEM) += umem.o | 29 | obj-$(CONFIG_BLK_DEV_UMEM) += umem.o |
@@ -39,5 +40,6 @@ obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o | |||
39 | obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/ | 40 | obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/ |
40 | obj-$(CONFIG_BLK_DEV_DRBD) += drbd/ | 41 | obj-$(CONFIG_BLK_DEV_DRBD) += drbd/ |
41 | obj-$(CONFIG_BLK_DEV_RBD) += rbd.o | 42 | obj-$(CONFIG_BLK_DEV_RBD) += rbd.o |
43 | obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/ | ||
42 | 44 | ||
43 | swim_mod-y := swim.o swim_asm.o | 45 | swim_mod-y := swim.o swim_asm.o |
diff --git a/drivers/block/brd.c b/drivers/block/brd.c index ec246437f5a4..531ceb31d0ff 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c | |||
@@ -242,9 +242,9 @@ static void copy_to_brd(struct brd_device *brd, const void *src, | |||
242 | page = brd_lookup_page(brd, sector); | 242 | page = brd_lookup_page(brd, sector); |
243 | BUG_ON(!page); | 243 | BUG_ON(!page); |
244 | 244 | ||
245 | dst = kmap_atomic(page, KM_USER1); | 245 | dst = kmap_atomic(page); |
246 | memcpy(dst + offset, src, copy); | 246 | memcpy(dst + offset, src, copy); |
247 | kunmap_atomic(dst, KM_USER1); | 247 | kunmap_atomic(dst); |
248 | 248 | ||
249 | if (copy < n) { | 249 | if (copy < n) { |
250 | src += copy; | 250 | src += copy; |
@@ -253,9 +253,9 @@ static void copy_to_brd(struct brd_device *brd, const void *src, | |||
253 | page = brd_lookup_page(brd, sector); | 253 | page = brd_lookup_page(brd, sector); |
254 | BUG_ON(!page); | 254 | BUG_ON(!page); |
255 | 255 | ||
256 | dst = kmap_atomic(page, KM_USER1); | 256 | dst = kmap_atomic(page); |
257 | memcpy(dst, src, copy); | 257 | memcpy(dst, src, copy); |
258 | kunmap_atomic(dst, KM_USER1); | 258 | kunmap_atomic(dst); |
259 | } | 259 | } |
260 | } | 260 | } |
261 | 261 | ||
@@ -273,9 +273,9 @@ static void copy_from_brd(void *dst, struct brd_device *brd, | |||
273 | copy = min_t(size_t, n, PAGE_SIZE - offset); | 273 | copy = min_t(size_t, n, PAGE_SIZE - offset); |
274 | page = brd_lookup_page(brd, sector); | 274 | page = brd_lookup_page(brd, sector); |
275 | if (page) { | 275 | if (page) { |
276 | src = kmap_atomic(page, KM_USER1); | 276 | src = kmap_atomic(page); |
277 | memcpy(dst, src + offset, copy); | 277 | memcpy(dst, src + offset, copy); |
278 | kunmap_atomic(src, KM_USER1); | 278 | kunmap_atomic(src); |
279 | } else | 279 | } else |
280 | memset(dst, 0, copy); | 280 | memset(dst, 0, copy); |
281 | 281 | ||
@@ -285,9 +285,9 @@ static void copy_from_brd(void *dst, struct brd_device *brd, | |||
285 | copy = n - copy; | 285 | copy = n - copy; |
286 | page = brd_lookup_page(brd, sector); | 286 | page = brd_lookup_page(brd, sector); |
287 | if (page) { | 287 | if (page) { |
288 | src = kmap_atomic(page, KM_USER1); | 288 | src = kmap_atomic(page); |
289 | memcpy(dst, src, copy); | 289 | memcpy(dst, src, copy); |
290 | kunmap_atomic(src, KM_USER1); | 290 | kunmap_atomic(src); |
291 | } else | 291 | } else |
292 | memset(dst, 0, copy); | 292 | memset(dst, 0, copy); |
293 | } | 293 | } |
@@ -309,7 +309,7 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page, | |||
309 | goto out; | 309 | goto out; |
310 | } | 310 | } |
311 | 311 | ||
312 | mem = kmap_atomic(page, KM_USER0); | 312 | mem = kmap_atomic(page); |
313 | if (rw == READ) { | 313 | if (rw == READ) { |
314 | copy_from_brd(mem + off, brd, sector, len); | 314 | copy_from_brd(mem + off, brd, sector, len); |
315 | flush_dcache_page(page); | 315 | flush_dcache_page(page); |
@@ -317,7 +317,7 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page, | |||
317 | flush_dcache_page(page); | 317 | flush_dcache_page(page); |
318 | copy_to_brd(brd, mem + off, sector, len); | 318 | copy_to_brd(brd, mem + off, sector, len); |
319 | } | 319 | } |
320 | kunmap_atomic(mem, KM_USER0); | 320 | kunmap_atomic(mem); |
321 | 321 | ||
322 | out: | 322 | out: |
323 | return err; | 323 | return err; |
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 587cce57adae..b0f553b26d0f 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c | |||
@@ -1735,7 +1735,7 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode, | |||
1735 | case CCISS_BIG_PASSTHRU: | 1735 | case CCISS_BIG_PASSTHRU: |
1736 | return cciss_bigpassthru(h, argp); | 1736 | return cciss_bigpassthru(h, argp); |
1737 | 1737 | ||
1738 | /* scsi_cmd_ioctl handles these, below, though some are not */ | 1738 | /* scsi_cmd_blk_ioctl handles these, below, though some are not */ |
1739 | /* very meaningful for cciss. SG_IO is the main one people want. */ | 1739 | /* very meaningful for cciss. SG_IO is the main one people want. */ |
1740 | 1740 | ||
1741 | case SG_GET_VERSION_NUM: | 1741 | case SG_GET_VERSION_NUM: |
@@ -1746,9 +1746,9 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode, | |||
1746 | case SG_EMULATED_HOST: | 1746 | case SG_EMULATED_HOST: |
1747 | case SG_IO: | 1747 | case SG_IO: |
1748 | case SCSI_IOCTL_SEND_COMMAND: | 1748 | case SCSI_IOCTL_SEND_COMMAND: |
1749 | return scsi_cmd_ioctl(disk->queue, disk, mode, cmd, argp); | 1749 | return scsi_cmd_blk_ioctl(bdev, mode, cmd, argp); |
1750 | 1750 | ||
1751 | /* scsi_cmd_ioctl would normally handle these, below, but */ | 1751 | /* scsi_cmd_blk_ioctl would normally handle these, below, but */ |
1752 | /* they aren't a good fit for cciss, as CD-ROMs are */ | 1752 | /* they aren't a good fit for cciss, as CD-ROMs are */ |
1753 | /* not supported, and we don't have any bus/target/lun */ | 1753 | /* not supported, and we don't have any bus/target/lun */ |
1754 | /* which we present to the kernel. */ | 1754 | /* which we present to the kernel. */ |
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 912f585a760f..3030201c69d8 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c | |||
@@ -289,25 +289,25 @@ static unsigned int bm_bit_to_page_idx(struct drbd_bitmap *b, u64 bitnr) | |||
289 | return page_nr; | 289 | return page_nr; |
290 | } | 290 | } |
291 | 291 | ||
292 | static unsigned long *__bm_map_pidx(struct drbd_bitmap *b, unsigned int idx, const enum km_type km) | 292 | static unsigned long *__bm_map_pidx(struct drbd_bitmap *b, unsigned int idx) |
293 | { | 293 | { |
294 | struct page *page = b->bm_pages[idx]; | 294 | struct page *page = b->bm_pages[idx]; |
295 | return (unsigned long *) kmap_atomic(page, km); | 295 | return (unsigned long *) kmap_atomic(page); |
296 | } | 296 | } |
297 | 297 | ||
298 | static unsigned long *bm_map_pidx(struct drbd_bitmap *b, unsigned int idx) | 298 | static unsigned long *bm_map_pidx(struct drbd_bitmap *b, unsigned int idx) |
299 | { | 299 | { |
300 | return __bm_map_pidx(b, idx, KM_IRQ1); | 300 | return __bm_map_pidx(b, idx); |
301 | } | 301 | } |
302 | 302 | ||
303 | static void __bm_unmap(unsigned long *p_addr, const enum km_type km) | 303 | static void __bm_unmap(unsigned long *p_addr) |
304 | { | 304 | { |
305 | kunmap_atomic(p_addr, km); | 305 | kunmap_atomic(p_addr); |
306 | }; | 306 | }; |
307 | 307 | ||
308 | static void bm_unmap(unsigned long *p_addr) | 308 | static void bm_unmap(unsigned long *p_addr) |
309 | { | 309 | { |
310 | return __bm_unmap(p_addr, KM_IRQ1); | 310 | return __bm_unmap(p_addr); |
311 | } | 311 | } |
312 | 312 | ||
313 | /* long word offset of _bitmap_ sector */ | 313 | /* long word offset of _bitmap_ sector */ |
@@ -543,15 +543,15 @@ static unsigned long bm_count_bits(struct drbd_bitmap *b) | |||
543 | 543 | ||
544 | /* all but last page */ | 544 | /* all but last page */ |
545 | for (idx = 0; idx < b->bm_number_of_pages - 1; idx++) { | 545 | for (idx = 0; idx < b->bm_number_of_pages - 1; idx++) { |
546 | p_addr = __bm_map_pidx(b, idx, KM_USER0); | 546 | p_addr = __bm_map_pidx(b, idx); |
547 | for (i = 0; i < LWPP; i++) | 547 | for (i = 0; i < LWPP; i++) |
548 | bits += hweight_long(p_addr[i]); | 548 | bits += hweight_long(p_addr[i]); |
549 | __bm_unmap(p_addr, KM_USER0); | 549 | __bm_unmap(p_addr); |
550 | cond_resched(); | 550 | cond_resched(); |
551 | } | 551 | } |
552 | /* last (or only) page */ | 552 | /* last (or only) page */ |
553 | last_word = ((b->bm_bits - 1) & BITS_PER_PAGE_MASK) >> LN2_BPL; | 553 | last_word = ((b->bm_bits - 1) & BITS_PER_PAGE_MASK) >> LN2_BPL; |
554 | p_addr = __bm_map_pidx(b, idx, KM_USER0); | 554 | p_addr = __bm_map_pidx(b, idx); |
555 | for (i = 0; i < last_word; i++) | 555 | for (i = 0; i < last_word; i++) |
556 | bits += hweight_long(p_addr[i]); | 556 | bits += hweight_long(p_addr[i]); |
557 | p_addr[last_word] &= cpu_to_lel(mask); | 557 | p_addr[last_word] &= cpu_to_lel(mask); |
@@ -559,7 +559,7 @@ static unsigned long bm_count_bits(struct drbd_bitmap *b) | |||
559 | /* 32bit arch, may have an unused padding long */ | 559 | /* 32bit arch, may have an unused padding long */ |
560 | if (BITS_PER_LONG == 32 && (last_word & 1) == 0) | 560 | if (BITS_PER_LONG == 32 && (last_word & 1) == 0) |
561 | p_addr[last_word+1] = 0; | 561 | p_addr[last_word+1] = 0; |
562 | __bm_unmap(p_addr, KM_USER0); | 562 | __bm_unmap(p_addr); |
563 | return bits; | 563 | return bits; |
564 | } | 564 | } |
565 | 565 | ||
@@ -970,11 +970,11 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must | |||
970 | * to use pre-allocated page pool */ | 970 | * to use pre-allocated page pool */ |
971 | void *src, *dest; | 971 | void *src, *dest; |
972 | page = alloc_page(__GFP_HIGHMEM|__GFP_WAIT); | 972 | page = alloc_page(__GFP_HIGHMEM|__GFP_WAIT); |
973 | dest = kmap_atomic(page, KM_USER0); | 973 | dest = kmap_atomic(page); |
974 | src = kmap_atomic(b->bm_pages[page_nr], KM_USER1); | 974 | src = kmap_atomic(b->bm_pages[page_nr]); |
975 | memcpy(dest, src, PAGE_SIZE); | 975 | memcpy(dest, src, PAGE_SIZE); |
976 | kunmap_atomic(src, KM_USER1); | 976 | kunmap_atomic(src); |
977 | kunmap_atomic(dest, KM_USER0); | 977 | kunmap_atomic(dest); |
978 | bm_store_page_idx(page, page_nr); | 978 | bm_store_page_idx(page, page_nr); |
979 | } else | 979 | } else |
980 | page = b->bm_pages[page_nr]; | 980 | page = b->bm_pages[page_nr]; |
@@ -1163,7 +1163,7 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc | |||
1163 | * this returns a bit number, NOT a sector! | 1163 | * this returns a bit number, NOT a sector! |
1164 | */ | 1164 | */ |
1165 | static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo, | 1165 | static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo, |
1166 | const int find_zero_bit, const enum km_type km) | 1166 | const int find_zero_bit) |
1167 | { | 1167 | { |
1168 | struct drbd_bitmap *b = mdev->bitmap; | 1168 | struct drbd_bitmap *b = mdev->bitmap; |
1169 | unsigned long *p_addr; | 1169 | unsigned long *p_addr; |
@@ -1178,7 +1178,7 @@ static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo, | |||
1178 | while (bm_fo < b->bm_bits) { | 1178 | while (bm_fo < b->bm_bits) { |
1179 | /* bit offset of the first bit in the page */ | 1179 | /* bit offset of the first bit in the page */ |
1180 | bit_offset = bm_fo & ~BITS_PER_PAGE_MASK; | 1180 | bit_offset = bm_fo & ~BITS_PER_PAGE_MASK; |
1181 | p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo), km); | 1181 | p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo)); |
1182 | 1182 | ||
1183 | if (find_zero_bit) | 1183 | if (find_zero_bit) |
1184 | i = find_next_zero_bit_le(p_addr, | 1184 | i = find_next_zero_bit_le(p_addr, |
@@ -1187,7 +1187,7 @@ static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo, | |||
1187 | i = find_next_bit_le(p_addr, | 1187 | i = find_next_bit_le(p_addr, |
1188 | PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK); | 1188 | PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK); |
1189 | 1189 | ||
1190 | __bm_unmap(p_addr, km); | 1190 | __bm_unmap(p_addr); |
1191 | if (i < PAGE_SIZE*8) { | 1191 | if (i < PAGE_SIZE*8) { |
1192 | bm_fo = bit_offset + i; | 1192 | bm_fo = bit_offset + i; |
1193 | if (bm_fo >= b->bm_bits) | 1193 | if (bm_fo >= b->bm_bits) |
@@ -1215,7 +1215,7 @@ static unsigned long bm_find_next(struct drbd_conf *mdev, | |||
1215 | if (BM_DONT_TEST & b->bm_flags) | 1215 | if (BM_DONT_TEST & b->bm_flags) |
1216 | bm_print_lock_info(mdev); | 1216 | bm_print_lock_info(mdev); |
1217 | 1217 | ||
1218 | i = __bm_find_next(mdev, bm_fo, find_zero_bit, KM_IRQ1); | 1218 | i = __bm_find_next(mdev, bm_fo, find_zero_bit); |
1219 | 1219 | ||
1220 | spin_unlock_irq(&b->bm_lock); | 1220 | spin_unlock_irq(&b->bm_lock); |
1221 | return i; | 1221 | return i; |
@@ -1239,13 +1239,13 @@ unsigned long drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo | |||
1239 | unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo) | 1239 | unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo) |
1240 | { | 1240 | { |
1241 | /* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */ | 1241 | /* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */ |
1242 | return __bm_find_next(mdev, bm_fo, 0, KM_USER1); | 1242 | return __bm_find_next(mdev, bm_fo, 0); |
1243 | } | 1243 | } |
1244 | 1244 | ||
1245 | unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo) | 1245 | unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo) |
1246 | { | 1246 | { |
1247 | /* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */ | 1247 | /* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */ |
1248 | return __bm_find_next(mdev, bm_fo, 1, KM_USER1); | 1248 | return __bm_find_next(mdev, bm_fo, 1); |
1249 | } | 1249 | } |
1250 | 1250 | ||
1251 | /* returns number of bits actually changed. | 1251 | /* returns number of bits actually changed. |
@@ -1273,14 +1273,14 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, | |||
1273 | unsigned int page_nr = bm_bit_to_page_idx(b, bitnr); | 1273 | unsigned int page_nr = bm_bit_to_page_idx(b, bitnr); |
1274 | if (page_nr != last_page_nr) { | 1274 | if (page_nr != last_page_nr) { |
1275 | if (p_addr) | 1275 | if (p_addr) |
1276 | __bm_unmap(p_addr, KM_IRQ1); | 1276 | __bm_unmap(p_addr); |
1277 | if (c < 0) | 1277 | if (c < 0) |
1278 | bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]); | 1278 | bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]); |
1279 | else if (c > 0) | 1279 | else if (c > 0) |
1280 | bm_set_page_need_writeout(b->bm_pages[last_page_nr]); | 1280 | bm_set_page_need_writeout(b->bm_pages[last_page_nr]); |
1281 | changed_total += c; | 1281 | changed_total += c; |
1282 | c = 0; | 1282 | c = 0; |
1283 | p_addr = __bm_map_pidx(b, page_nr, KM_IRQ1); | 1283 | p_addr = __bm_map_pidx(b, page_nr); |
1284 | last_page_nr = page_nr; | 1284 | last_page_nr = page_nr; |
1285 | } | 1285 | } |
1286 | if (val) | 1286 | if (val) |
@@ -1289,7 +1289,7 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, | |||
1289 | c -= (0 != __test_and_clear_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr)); | 1289 | c -= (0 != __test_and_clear_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr)); |
1290 | } | 1290 | } |
1291 | if (p_addr) | 1291 | if (p_addr) |
1292 | __bm_unmap(p_addr, KM_IRQ1); | 1292 | __bm_unmap(p_addr); |
1293 | if (c < 0) | 1293 | if (c < 0) |
1294 | bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]); | 1294 | bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]); |
1295 | else if (c > 0) | 1295 | else if (c > 0) |
@@ -1342,13 +1342,13 @@ static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b, | |||
1342 | { | 1342 | { |
1343 | int i; | 1343 | int i; |
1344 | int bits; | 1344 | int bits; |
1345 | unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr], KM_IRQ1); | 1345 | unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr]); |
1346 | for (i = first_word; i < last_word; i++) { | 1346 | for (i = first_word; i < last_word; i++) { |
1347 | bits = hweight_long(paddr[i]); | 1347 | bits = hweight_long(paddr[i]); |
1348 | paddr[i] = ~0UL; | 1348 | paddr[i] = ~0UL; |
1349 | b->bm_set += BITS_PER_LONG - bits; | 1349 | b->bm_set += BITS_PER_LONG - bits; |
1350 | } | 1350 | } |
1351 | kunmap_atomic(paddr, KM_IRQ1); | 1351 | kunmap_atomic(paddr); |
1352 | } | 1352 | } |
1353 | 1353 | ||
1354 | /* Same thing as drbd_bm_set_bits, | 1354 | /* Same thing as drbd_bm_set_bits, |
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 9cf20355ceec..8d680562ba73 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h | |||
@@ -59,8 +59,8 @@ | |||
59 | 59 | ||
60 | /* module parameter, defined in drbd_main.c */ | 60 | /* module parameter, defined in drbd_main.c */ |
61 | extern unsigned int minor_count; | 61 | extern unsigned int minor_count; |
62 | extern int disable_sendpage; | 62 | extern bool disable_sendpage; |
63 | extern int allow_oos; | 63 | extern bool allow_oos; |
64 | extern unsigned int cn_idx; | 64 | extern unsigned int cn_idx; |
65 | 65 | ||
66 | #ifdef CONFIG_DRBD_FAULT_INJECTION | 66 | #ifdef CONFIG_DRBD_FAULT_INJECTION |
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 0358e55356c8..211fc44f84be 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c | |||
@@ -117,8 +117,8 @@ module_param(fault_devs, int, 0644); | |||
117 | 117 | ||
118 | /* module parameter, defined */ | 118 | /* module parameter, defined */ |
119 | unsigned int minor_count = DRBD_MINOR_COUNT_DEF; | 119 | unsigned int minor_count = DRBD_MINOR_COUNT_DEF; |
120 | int disable_sendpage; | 120 | bool disable_sendpage; |
121 | int allow_oos; | 121 | bool allow_oos; |
122 | unsigned int cn_idx = CN_IDX_DRBD; | 122 | unsigned int cn_idx = CN_IDX_DRBD; |
123 | int proc_details; /* Detail level in proc drbd*/ | 123 | int proc_details; /* Detail level in proc drbd*/ |
124 | 124 | ||
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index af2a25049bce..abfaacaaf346 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c | |||
@@ -179,7 +179,7 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd) | |||
179 | dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb); | 179 | dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb); |
180 | 180 | ||
181 | drbd_bcast_ev_helper(mdev, cmd); | 181 | drbd_bcast_ev_helper(mdev, cmd); |
182 | ret = call_usermodehelper(usermode_helper, argv, envp, 1); | 182 | ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC); |
183 | if (ret) | 183 | if (ret) |
184 | dev_warn(DEV, "helper command: %s %s %s exit code %u (0x%x)\n", | 184 | dev_warn(DEV, "helper command: %s %s %s exit code %u (0x%x)\n", |
185 | usermode_helper, cmd, mb, | 185 | usermode_helper, cmd, mb, |
@@ -2526,10 +2526,10 @@ void drbd_bcast_ee(struct drbd_conf *mdev, | |||
2526 | 2526 | ||
2527 | page = e->pages; | 2527 | page = e->pages; |
2528 | page_chain_for_each(page) { | 2528 | page_chain_for_each(page) { |
2529 | void *d = kmap_atomic(page, KM_USER0); | 2529 | void *d = kmap_atomic(page); |
2530 | unsigned l = min_t(unsigned, len, PAGE_SIZE); | 2530 | unsigned l = min_t(unsigned, len, PAGE_SIZE); |
2531 | memcpy(tl, d, l); | 2531 | memcpy(tl, d, l); |
2532 | kunmap_atomic(d, KM_USER0); | 2532 | kunmap_atomic(d); |
2533 | tl = (unsigned short*)((char*)tl + l); | 2533 | tl = (unsigned short*)((char*)tl + l); |
2534 | len -= l; | 2534 | len -= l; |
2535 | if (len == 0) | 2535 | if (len == 0) |
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 510fb10ec45a..b0b00d70c166 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c | |||
@@ -202,7 +202,6 @@ static int slow_floppy; | |||
202 | 202 | ||
203 | #include <asm/dma.h> | 203 | #include <asm/dma.h> |
204 | #include <asm/irq.h> | 204 | #include <asm/irq.h> |
205 | #include <asm/system.h> | ||
206 | 205 | ||
207 | static int FLOPPY_IRQ = 6; | 206 | static int FLOPPY_IRQ = 6; |
208 | static int FLOPPY_DMA = 2; | 207 | static int FLOPPY_DMA = 2; |
@@ -1031,37 +1030,6 @@ static int fd_wait_for_completion(unsigned long delay, timeout_fn function) | |||
1031 | return 0; | 1030 | return 0; |
1032 | } | 1031 | } |
1033 | 1032 | ||
1034 | static DEFINE_SPINLOCK(floppy_hlt_lock); | ||
1035 | static int hlt_disabled; | ||
1036 | static void floppy_disable_hlt(void) | ||
1037 | { | ||
1038 | unsigned long flags; | ||
1039 | |||
1040 | WARN_ONCE(1, "floppy_disable_hlt() scheduled for removal in 2012"); | ||
1041 | spin_lock_irqsave(&floppy_hlt_lock, flags); | ||
1042 | if (!hlt_disabled) { | ||
1043 | hlt_disabled = 1; | ||
1044 | #ifdef HAVE_DISABLE_HLT | ||
1045 | disable_hlt(); | ||
1046 | #endif | ||
1047 | } | ||
1048 | spin_unlock_irqrestore(&floppy_hlt_lock, flags); | ||
1049 | } | ||
1050 | |||
1051 | static void floppy_enable_hlt(void) | ||
1052 | { | ||
1053 | unsigned long flags; | ||
1054 | |||
1055 | spin_lock_irqsave(&floppy_hlt_lock, flags); | ||
1056 | if (hlt_disabled) { | ||
1057 | hlt_disabled = 0; | ||
1058 | #ifdef HAVE_DISABLE_HLT | ||
1059 | enable_hlt(); | ||
1060 | #endif | ||
1061 | } | ||
1062 | spin_unlock_irqrestore(&floppy_hlt_lock, flags); | ||
1063 | } | ||
1064 | |||
1065 | static void setup_DMA(void) | 1033 | static void setup_DMA(void) |
1066 | { | 1034 | { |
1067 | unsigned long f; | 1035 | unsigned long f; |
@@ -1106,7 +1074,6 @@ static void setup_DMA(void) | |||
1106 | fd_enable_dma(); | 1074 | fd_enable_dma(); |
1107 | release_dma_lock(f); | 1075 | release_dma_lock(f); |
1108 | #endif | 1076 | #endif |
1109 | floppy_disable_hlt(); | ||
1110 | } | 1077 | } |
1111 | 1078 | ||
1112 | static void show_floppy(void); | 1079 | static void show_floppy(void); |
@@ -1708,7 +1675,6 @@ irqreturn_t floppy_interrupt(int irq, void *dev_id) | |||
1708 | fd_disable_dma(); | 1675 | fd_disable_dma(); |
1709 | release_dma_lock(f); | 1676 | release_dma_lock(f); |
1710 | 1677 | ||
1711 | floppy_enable_hlt(); | ||
1712 | do_floppy = NULL; | 1678 | do_floppy = NULL; |
1713 | if (fdc >= N_FDC || FDCS->address == -1) { | 1679 | if (fdc >= N_FDC || FDCS->address == -1) { |
1714 | /* we don't even know which FDC is the culprit */ | 1680 | /* we don't even know which FDC is the culprit */ |
@@ -1857,8 +1823,6 @@ static void floppy_shutdown(unsigned long data) | |||
1857 | show_floppy(); | 1823 | show_floppy(); |
1858 | cancel_activity(); | 1824 | cancel_activity(); |
1859 | 1825 | ||
1860 | floppy_enable_hlt(); | ||
1861 | |||
1862 | flags = claim_dma_lock(); | 1826 | flags = claim_dma_lock(); |
1863 | fd_disable_dma(); | 1827 | fd_disable_dma(); |
1864 | release_dma_lock(flags); | 1828 | release_dma_lock(flags); |
@@ -3832,7 +3796,7 @@ static int __floppy_read_block_0(struct block_device *bdev) | |||
3832 | bio.bi_size = size; | 3796 | bio.bi_size = size; |
3833 | bio.bi_bdev = bdev; | 3797 | bio.bi_bdev = bdev; |
3834 | bio.bi_sector = 0; | 3798 | bio.bi_sector = 0; |
3835 | bio.bi_flags = BIO_QUIET; | 3799 | bio.bi_flags = (1 << BIO_QUIET); |
3836 | init_completion(&complete); | 3800 | init_completion(&complete); |
3837 | bio.bi_private = &complete; | 3801 | bio.bi_private = &complete; |
3838 | bio.bi_end_io = floppy_rb0_complete; | 3802 | bio.bi_end_io = floppy_rb0_complete; |
@@ -4368,8 +4332,14 @@ out_unreg_blkdev: | |||
4368 | out_put_disk: | 4332 | out_put_disk: |
4369 | while (dr--) { | 4333 | while (dr--) { |
4370 | del_timer_sync(&motor_off_timer[dr]); | 4334 | del_timer_sync(&motor_off_timer[dr]); |
4371 | if (disks[dr]->queue) | 4335 | if (disks[dr]->queue) { |
4372 | blk_cleanup_queue(disks[dr]->queue); | 4336 | blk_cleanup_queue(disks[dr]->queue); |
4337 | /* | ||
4338 | * put_disk() is not paired with add_disk() and | ||
4339 | * will put queue reference one extra time. fix it. | ||
4340 | */ | ||
4341 | disks[dr]->queue = NULL; | ||
4342 | } | ||
4373 | put_disk(disks[dr]); | 4343 | put_disk(disks[dr]); |
4374 | } | 4344 | } |
4375 | return err; | 4345 | return err; |
@@ -4503,7 +4473,6 @@ static void floppy_release_irq_and_dma(void) | |||
4503 | #if N_FDC > 1 | 4473 | #if N_FDC > 1 |
4504 | set_dor(1, ~8, 0); | 4474 | set_dor(1, ~8, 0); |
4505 | #endif | 4475 | #endif |
4506 | floppy_enable_hlt(); | ||
4507 | 4476 | ||
4508 | if (floppy_track_buffer && max_buffer_sectors) { | 4477 | if (floppy_track_buffer && max_buffer_sectors) { |
4509 | tmpsize = max_buffer_sectors * 1024; | 4478 | tmpsize = max_buffer_sectors * 1024; |
@@ -4579,6 +4548,15 @@ static void __exit floppy_module_exit(void) | |||
4579 | platform_device_unregister(&floppy_device[drive]); | 4548 | platform_device_unregister(&floppy_device[drive]); |
4580 | } | 4549 | } |
4581 | blk_cleanup_queue(disks[drive]->queue); | 4550 | blk_cleanup_queue(disks[drive]->queue); |
4551 | |||
4552 | /* | ||
4553 | * These disks have not called add_disk(). Don't put down | ||
4554 | * queue reference in put_disk(). | ||
4555 | */ | ||
4556 | if (!(allowed_drive_mask & (1 << drive)) || | ||
4557 | fdc_state[FDC(drive)].version == FDC_NONE) | ||
4558 | disks[drive]->queue = NULL; | ||
4559 | |||
4582 | put_disk(disks[drive]); | 4560 | put_disk(disks[drive]); |
4583 | } | 4561 | } |
4584 | 4562 | ||
diff --git a/drivers/block/hd.c b/drivers/block/hd.c index b52c9ca146fc..bf397bf108b7 100644 --- a/drivers/block/hd.c +++ b/drivers/block/hd.c | |||
@@ -44,7 +44,6 @@ | |||
44 | #define HD_IRQ 14 | 44 | #define HD_IRQ 14 |
45 | 45 | ||
46 | #define REALLY_SLOW_IO | 46 | #define REALLY_SLOW_IO |
47 | #include <asm/system.h> | ||
48 | #include <asm/io.h> | 47 | #include <asm/io.h> |
49 | #include <asm/uaccess.h> | 48 | #include <asm/uaccess.h> |
50 | 49 | ||
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f00257782fcc..bbca966f8f66 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c | |||
@@ -93,16 +93,16 @@ static int transfer_none(struct loop_device *lo, int cmd, | |||
93 | struct page *loop_page, unsigned loop_off, | 93 | struct page *loop_page, unsigned loop_off, |
94 | int size, sector_t real_block) | 94 | int size, sector_t real_block) |
95 | { | 95 | { |
96 | char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off; | 96 | char *raw_buf = kmap_atomic(raw_page) + raw_off; |
97 | char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off; | 97 | char *loop_buf = kmap_atomic(loop_page) + loop_off; |
98 | 98 | ||
99 | if (cmd == READ) | 99 | if (cmd == READ) |
100 | memcpy(loop_buf, raw_buf, size); | 100 | memcpy(loop_buf, raw_buf, size); |
101 | else | 101 | else |
102 | memcpy(raw_buf, loop_buf, size); | 102 | memcpy(raw_buf, loop_buf, size); |
103 | 103 | ||
104 | kunmap_atomic(loop_buf, KM_USER1); | 104 | kunmap_atomic(loop_buf); |
105 | kunmap_atomic(raw_buf, KM_USER0); | 105 | kunmap_atomic(raw_buf); |
106 | cond_resched(); | 106 | cond_resched(); |
107 | return 0; | 107 | return 0; |
108 | } | 108 | } |
@@ -112,8 +112,8 @@ static int transfer_xor(struct loop_device *lo, int cmd, | |||
112 | struct page *loop_page, unsigned loop_off, | 112 | struct page *loop_page, unsigned loop_off, |
113 | int size, sector_t real_block) | 113 | int size, sector_t real_block) |
114 | { | 114 | { |
115 | char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off; | 115 | char *raw_buf = kmap_atomic(raw_page) + raw_off; |
116 | char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off; | 116 | char *loop_buf = kmap_atomic(loop_page) + loop_off; |
117 | char *in, *out, *key; | 117 | char *in, *out, *key; |
118 | int i, keysize; | 118 | int i, keysize; |
119 | 119 | ||
@@ -130,8 +130,8 @@ static int transfer_xor(struct loop_device *lo, int cmd, | |||
130 | for (i = 0; i < size; i++) | 130 | for (i = 0; i < size; i++) |
131 | *out++ = *in++ ^ key[(i & 511) % keysize]; | 131 | *out++ = *in++ ^ key[(i & 511) % keysize]; |
132 | 132 | ||
133 | kunmap_atomic(loop_buf, KM_USER1); | 133 | kunmap_atomic(loop_buf); |
134 | kunmap_atomic(raw_buf, KM_USER0); | 134 | kunmap_atomic(raw_buf); |
135 | cond_resched(); | 135 | cond_resched(); |
136 | return 0; | 136 | return 0; |
137 | } | 137 | } |
@@ -356,14 +356,14 @@ lo_direct_splice_actor(struct pipe_inode_info *pipe, struct splice_desc *sd) | |||
356 | return __splice_from_pipe(pipe, sd, lo_splice_actor); | 356 | return __splice_from_pipe(pipe, sd, lo_splice_actor); |
357 | } | 357 | } |
358 | 358 | ||
359 | static int | 359 | static ssize_t |
360 | do_lo_receive(struct loop_device *lo, | 360 | do_lo_receive(struct loop_device *lo, |
361 | struct bio_vec *bvec, int bsize, loff_t pos) | 361 | struct bio_vec *bvec, int bsize, loff_t pos) |
362 | { | 362 | { |
363 | struct lo_read_data cookie; | 363 | struct lo_read_data cookie; |
364 | struct splice_desc sd; | 364 | struct splice_desc sd; |
365 | struct file *file; | 365 | struct file *file; |
366 | long retval; | 366 | ssize_t retval; |
367 | 367 | ||
368 | cookie.lo = lo; | 368 | cookie.lo = lo; |
369 | cookie.page = bvec->bv_page; | 369 | cookie.page = bvec->bv_page; |
@@ -379,26 +379,28 @@ do_lo_receive(struct loop_device *lo, | |||
379 | file = lo->lo_backing_file; | 379 | file = lo->lo_backing_file; |
380 | retval = splice_direct_to_actor(file, &sd, lo_direct_splice_actor); | 380 | retval = splice_direct_to_actor(file, &sd, lo_direct_splice_actor); |
381 | 381 | ||
382 | if (retval < 0) | 382 | return retval; |
383 | return retval; | ||
384 | if (retval != bvec->bv_len) | ||
385 | return -EIO; | ||
386 | return 0; | ||
387 | } | 383 | } |
388 | 384 | ||
389 | static int | 385 | static int |
390 | lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos) | 386 | lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos) |
391 | { | 387 | { |
392 | struct bio_vec *bvec; | 388 | struct bio_vec *bvec; |
393 | int i, ret = 0; | 389 | ssize_t s; |
390 | int i; | ||
394 | 391 | ||
395 | bio_for_each_segment(bvec, bio, i) { | 392 | bio_for_each_segment(bvec, bio, i) { |
396 | ret = do_lo_receive(lo, bvec, bsize, pos); | 393 | s = do_lo_receive(lo, bvec, bsize, pos); |
397 | if (ret < 0) | 394 | if (s < 0) |
395 | return s; | ||
396 | |||
397 | if (s != bvec->bv_len) { | ||
398 | zero_fill_bio(bio); | ||
398 | break; | 399 | break; |
400 | } | ||
399 | pos += bvec->bv_len; | 401 | pos += bvec->bv_len; |
400 | } | 402 | } |
401 | return ret; | 403 | return 0; |
402 | } | 404 | } |
403 | 405 | ||
404 | static int do_bio_filebacked(struct loop_device *lo, struct bio *bio) | 406 | static int do_bio_filebacked(struct loop_device *lo, struct bio *bio) |
diff --git a/drivers/block/mtip32xx/Kconfig b/drivers/block/mtip32xx/Kconfig new file mode 100644 index 000000000000..b5dd14e072f2 --- /dev/null +++ b/drivers/block/mtip32xx/Kconfig | |||
@@ -0,0 +1,9 @@ | |||
1 | # | ||
2 | # mtip32xx device driver configuration | ||
3 | # | ||
4 | |||
5 | config BLK_DEV_PCIESSD_MTIP32XX | ||
6 | tristate "Block Device Driver for Micron PCIe SSDs" | ||
7 | depends on HOTPLUG_PCI_PCIE | ||
8 | help | ||
9 | This enables the block driver for Micron PCIe SSDs. | ||
diff --git a/drivers/block/mtip32xx/Makefile b/drivers/block/mtip32xx/Makefile new file mode 100644 index 000000000000..4fbef8c8329b --- /dev/null +++ b/drivers/block/mtip32xx/Makefile | |||
@@ -0,0 +1,5 @@ | |||
1 | # | ||
2 | # Makefile for Block device driver for Micron PCIe SSD | ||
3 | # | ||
4 | |||
5 | obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx.o | ||
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c new file mode 100644 index 000000000000..8eb81c96608f --- /dev/null +++ b/drivers/block/mtip32xx/mtip32xx.c | |||
@@ -0,0 +1,3650 @@ | |||
1 | /* | ||
2 | * Driver for the Micron P320 SSD | ||
3 | * Copyright (C) 2011 Micron Technology, Inc. | ||
4 | * | ||
5 | * Portions of this code were derived from works subjected to the | ||
6 | * following copyright: | ||
7 | * Copyright (C) 2009 Integrated Device Technology, Inc. | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation; either version 2 of the License, or | ||
12 | * (at your option) any later version. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | * GNU General Public License for more details. | ||
18 | * | ||
19 | */ | ||
20 | |||
21 | #include <linux/pci.h> | ||
22 | #include <linux/interrupt.h> | ||
23 | #include <linux/ata.h> | ||
24 | #include <linux/delay.h> | ||
25 | #include <linux/hdreg.h> | ||
26 | #include <linux/uaccess.h> | ||
27 | #include <linux/random.h> | ||
28 | #include <linux/smp.h> | ||
29 | #include <linux/compat.h> | ||
30 | #include <linux/fs.h> | ||
31 | #include <linux/module.h> | ||
32 | #include <linux/genhd.h> | ||
33 | #include <linux/blkdev.h> | ||
34 | #include <linux/bio.h> | ||
35 | #include <linux/dma-mapping.h> | ||
36 | #include <linux/idr.h> | ||
37 | #include <linux/kthread.h> | ||
38 | #include <../drivers/ata/ahci.h> | ||
39 | #include "mtip32xx.h" | ||
40 | |||
41 | #define HW_CMD_SLOT_SZ (MTIP_MAX_COMMAND_SLOTS * 32) | ||
42 | #define HW_CMD_TBL_SZ (AHCI_CMD_TBL_HDR_SZ + (MTIP_MAX_SG * 16)) | ||
43 | #define HW_CMD_TBL_AR_SZ (HW_CMD_TBL_SZ * MTIP_MAX_COMMAND_SLOTS) | ||
44 | #define HW_PORT_PRIV_DMA_SZ \ | ||
45 | (HW_CMD_SLOT_SZ + HW_CMD_TBL_AR_SZ + AHCI_RX_FIS_SZ) | ||
46 | |||
47 | #define HOST_HSORG 0xFC | ||
48 | #define HSORG_DISABLE_SLOTGRP_INTR (1<<24) | ||
49 | #define HSORG_DISABLE_SLOTGRP_PXIS (1<<16) | ||
50 | #define HSORG_HWREV 0xFF00 | ||
51 | #define HSORG_STYLE 0x8 | ||
52 | #define HSORG_SLOTGROUPS 0x7 | ||
53 | |||
54 | #define PORT_COMMAND_ISSUE 0x38 | ||
55 | #define PORT_SDBV 0x7C | ||
56 | |||
57 | #define PORT_OFFSET 0x100 | ||
58 | #define PORT_MEM_SIZE 0x80 | ||
59 | |||
60 | #define PORT_IRQ_ERR \ | ||
61 | (PORT_IRQ_HBUS_ERR | PORT_IRQ_IF_ERR | PORT_IRQ_CONNECT | \ | ||
62 | PORT_IRQ_PHYRDY | PORT_IRQ_UNK_FIS | PORT_IRQ_BAD_PMP | \ | ||
63 | PORT_IRQ_TF_ERR | PORT_IRQ_HBUS_DATA_ERR | PORT_IRQ_IF_NONFATAL | \ | ||
64 | PORT_IRQ_OVERFLOW) | ||
65 | #define PORT_IRQ_LEGACY \ | ||
66 | (PORT_IRQ_PIOS_FIS | PORT_IRQ_D2H_REG_FIS) | ||
67 | #define PORT_IRQ_HANDLED \ | ||
68 | (PORT_IRQ_SDB_FIS | PORT_IRQ_LEGACY | \ | ||
69 | PORT_IRQ_TF_ERR | PORT_IRQ_IF_ERR | \ | ||
70 | PORT_IRQ_CONNECT | PORT_IRQ_PHYRDY) | ||
71 | #define DEF_PORT_IRQ \ | ||
72 | (PORT_IRQ_ERR | PORT_IRQ_LEGACY | PORT_IRQ_SDB_FIS) | ||
73 | |||
74 | /* product numbers */ | ||
75 | #define MTIP_PRODUCT_UNKNOWN 0x00 | ||
76 | #define MTIP_PRODUCT_ASICFPGA 0x11 | ||
77 | |||
78 | /* Device instance number, incremented each time a device is probed. */ | ||
79 | static int instance; | ||
80 | |||
81 | /* | ||
82 | * Global variable used to hold the major block device number | ||
83 | * allocated in mtip_init(). | ||
84 | */ | ||
85 | static int mtip_major; | ||
86 | |||
87 | static DEFINE_SPINLOCK(rssd_index_lock); | ||
88 | static DEFINE_IDA(rssd_index_ida); | ||
89 | |||
90 | static int mtip_block_initialize(struct driver_data *dd); | ||
91 | |||
92 | #ifdef CONFIG_COMPAT | ||
93 | struct mtip_compat_ide_task_request_s { | ||
94 | __u8 io_ports[8]; | ||
95 | __u8 hob_ports[8]; | ||
96 | ide_reg_valid_t out_flags; | ||
97 | ide_reg_valid_t in_flags; | ||
98 | int data_phase; | ||
99 | int req_cmd; | ||
100 | compat_ulong_t out_size; | ||
101 | compat_ulong_t in_size; | ||
102 | }; | ||
103 | #endif | ||
104 | |||
105 | /* | ||
106 | * This function check_for_surprise_removal is called | ||
107 | * while card is removed from the system and it will | ||
108 | * read the vendor id from the configration space | ||
109 | * | ||
110 | * @pdev Pointer to the pci_dev structure. | ||
111 | * | ||
112 | * return value | ||
113 | * true if device removed, else false | ||
114 | */ | ||
115 | static bool mtip_check_surprise_removal(struct pci_dev *pdev) | ||
116 | { | ||
117 | u16 vendor_id = 0; | ||
118 | |||
119 | /* Read the vendorID from the configuration space */ | ||
120 | pci_read_config_word(pdev, 0x00, &vendor_id); | ||
121 | if (vendor_id == 0xFFFF) | ||
122 | return true; /* device removed */ | ||
123 | |||
124 | return false; /* device present */ | ||
125 | } | ||
126 | |||
127 | /* | ||
128 | * This function is called for clean the pending command in the | ||
129 | * command slot during the surprise removal of device and return | ||
130 | * error to the upper layer. | ||
131 | * | ||
132 | * @dd Pointer to the DRIVER_DATA structure. | ||
133 | * | ||
134 | * return value | ||
135 | * None | ||
136 | */ | ||
137 | static void mtip_command_cleanup(struct driver_data *dd) | ||
138 | { | ||
139 | int group = 0, commandslot = 0, commandindex = 0; | ||
140 | struct mtip_cmd *command; | ||
141 | struct mtip_port *port = dd->port; | ||
142 | |||
143 | for (group = 0; group < 4; group++) { | ||
144 | for (commandslot = 0; commandslot < 32; commandslot++) { | ||
145 | if (!(port->allocated[group] & (1 << commandslot))) | ||
146 | continue; | ||
147 | |||
148 | commandindex = group << 5 | commandslot; | ||
149 | command = &port->commands[commandindex]; | ||
150 | |||
151 | if (atomic_read(&command->active) | ||
152 | && (command->async_callback)) { | ||
153 | command->async_callback(command->async_data, | ||
154 | -ENODEV); | ||
155 | command->async_callback = NULL; | ||
156 | command->async_data = NULL; | ||
157 | } | ||
158 | |||
159 | dma_unmap_sg(&port->dd->pdev->dev, | ||
160 | command->sg, | ||
161 | command->scatter_ents, | ||
162 | command->direction); | ||
163 | } | ||
164 | } | ||
165 | |||
166 | up(&port->cmd_slot); | ||
167 | |||
168 | atomic_set(&dd->drv_cleanup_done, true); | ||
169 | } | ||
170 | |||
171 | /* | ||
172 | * Obtain an empty command slot. | ||
173 | * | ||
174 | * This function needs to be reentrant since it could be called | ||
175 | * at the same time on multiple CPUs. The allocation of the | ||
176 | * command slot must be atomic. | ||
177 | * | ||
178 | * @port Pointer to the port data structure. | ||
179 | * | ||
180 | * return value | ||
181 | * >= 0 Index of command slot obtained. | ||
182 | * -1 No command slots available. | ||
183 | */ | ||
184 | static int get_slot(struct mtip_port *port) | ||
185 | { | ||
186 | int slot, i; | ||
187 | unsigned int num_command_slots = port->dd->slot_groups * 32; | ||
188 | |||
189 | /* | ||
190 | * Try 10 times, because there is a small race here. | ||
191 | * that's ok, because it's still cheaper than a lock. | ||
192 | * | ||
193 | * Race: Since this section is not protected by lock, same bit | ||
194 | * could be chosen by different process contexts running in | ||
195 | * different processor. So instead of costly lock, we are going | ||
196 | * with loop. | ||
197 | */ | ||
198 | for (i = 0; i < 10; i++) { | ||
199 | slot = find_next_zero_bit(port->allocated, | ||
200 | num_command_slots, 1); | ||
201 | if ((slot < num_command_slots) && | ||
202 | (!test_and_set_bit(slot, port->allocated))) | ||
203 | return slot; | ||
204 | } | ||
205 | dev_warn(&port->dd->pdev->dev, "Failed to get a tag.\n"); | ||
206 | |||
207 | if (mtip_check_surprise_removal(port->dd->pdev)) { | ||
208 | /* Device not present, clean outstanding commands */ | ||
209 | mtip_command_cleanup(port->dd); | ||
210 | } | ||
211 | return -1; | ||
212 | } | ||
213 | |||
214 | /* | ||
215 | * Release a command slot. | ||
216 | * | ||
217 | * @port Pointer to the port data structure. | ||
218 | * @tag Tag of command to release | ||
219 | * | ||
220 | * return value | ||
221 | * None | ||
222 | */ | ||
223 | static inline void release_slot(struct mtip_port *port, int tag) | ||
224 | { | ||
225 | smp_mb__before_clear_bit(); | ||
226 | clear_bit(tag, port->allocated); | ||
227 | smp_mb__after_clear_bit(); | ||
228 | } | ||
229 | |||
230 | /* | ||
231 | * Reset the HBA (without sleeping) | ||
232 | * | ||
233 | * Just like hba_reset, except does not call sleep, so can be | ||
234 | * run from interrupt/tasklet context. | ||
235 | * | ||
236 | * @dd Pointer to the driver data structure. | ||
237 | * | ||
238 | * return value | ||
239 | * 0 The reset was successful. | ||
240 | * -1 The HBA Reset bit did not clear. | ||
241 | */ | ||
242 | static int hba_reset_nosleep(struct driver_data *dd) | ||
243 | { | ||
244 | unsigned long timeout; | ||
245 | |||
246 | /* Chip quirk: quiesce any chip function */ | ||
247 | mdelay(10); | ||
248 | |||
249 | /* Set the reset bit */ | ||
250 | writel(HOST_RESET, dd->mmio + HOST_CTL); | ||
251 | |||
252 | /* Flush */ | ||
253 | readl(dd->mmio + HOST_CTL); | ||
254 | |||
255 | /* | ||
256 | * Wait 10ms then spin for up to 1 second | ||
257 | * waiting for reset acknowledgement | ||
258 | */ | ||
259 | timeout = jiffies + msecs_to_jiffies(1000); | ||
260 | mdelay(10); | ||
261 | while ((readl(dd->mmio + HOST_CTL) & HOST_RESET) | ||
262 | && time_before(jiffies, timeout)) | ||
263 | mdelay(1); | ||
264 | |||
265 | if (readl(dd->mmio + HOST_CTL) & HOST_RESET) | ||
266 | return -1; | ||
267 | |||
268 | return 0; | ||
269 | } | ||
270 | |||
271 | /* | ||
272 | * Issue a command to the hardware. | ||
273 | * | ||
274 | * Set the appropriate bit in the s_active and Command Issue hardware | ||
275 | * registers, causing hardware command processing to begin. | ||
276 | * | ||
277 | * @port Pointer to the port structure. | ||
278 | * @tag The tag of the command to be issued. | ||
279 | * | ||
280 | * return value | ||
281 | * None | ||
282 | */ | ||
283 | static inline void mtip_issue_ncq_command(struct mtip_port *port, int tag) | ||
284 | { | ||
285 | unsigned long flags = 0; | ||
286 | |||
287 | atomic_set(&port->commands[tag].active, 1); | ||
288 | |||
289 | spin_lock_irqsave(&port->cmd_issue_lock, flags); | ||
290 | |||
291 | writel((1 << MTIP_TAG_BIT(tag)), | ||
292 | port->s_active[MTIP_TAG_INDEX(tag)]); | ||
293 | writel((1 << MTIP_TAG_BIT(tag)), | ||
294 | port->cmd_issue[MTIP_TAG_INDEX(tag)]); | ||
295 | |||
296 | spin_unlock_irqrestore(&port->cmd_issue_lock, flags); | ||
297 | } | ||
298 | |||
299 | /* | ||
300 | * Enable/disable the reception of FIS | ||
301 | * | ||
302 | * @port Pointer to the port data structure | ||
303 | * @enable 1 to enable, 0 to disable | ||
304 | * | ||
305 | * return value | ||
306 | * Previous state: 1 enabled, 0 disabled | ||
307 | */ | ||
308 | static int mtip_enable_fis(struct mtip_port *port, int enable) | ||
309 | { | ||
310 | u32 tmp; | ||
311 | |||
312 | /* enable FIS reception */ | ||
313 | tmp = readl(port->mmio + PORT_CMD); | ||
314 | if (enable) | ||
315 | writel(tmp | PORT_CMD_FIS_RX, port->mmio + PORT_CMD); | ||
316 | else | ||
317 | writel(tmp & ~PORT_CMD_FIS_RX, port->mmio + PORT_CMD); | ||
318 | |||
319 | /* Flush */ | ||
320 | readl(port->mmio + PORT_CMD); | ||
321 | |||
322 | return (((tmp & PORT_CMD_FIS_RX) == PORT_CMD_FIS_RX)); | ||
323 | } | ||
324 | |||
325 | /* | ||
326 | * Enable/disable the DMA engine | ||
327 | * | ||
328 | * @port Pointer to the port data structure | ||
329 | * @enable 1 to enable, 0 to disable | ||
330 | * | ||
331 | * return value | ||
332 | * Previous state: 1 enabled, 0 disabled. | ||
333 | */ | ||
334 | static int mtip_enable_engine(struct mtip_port *port, int enable) | ||
335 | { | ||
336 | u32 tmp; | ||
337 | |||
338 | /* enable FIS reception */ | ||
339 | tmp = readl(port->mmio + PORT_CMD); | ||
340 | if (enable) | ||
341 | writel(tmp | PORT_CMD_START, port->mmio + PORT_CMD); | ||
342 | else | ||
343 | writel(tmp & ~PORT_CMD_START, port->mmio + PORT_CMD); | ||
344 | |||
345 | readl(port->mmio + PORT_CMD); | ||
346 | return (((tmp & PORT_CMD_START) == PORT_CMD_START)); | ||
347 | } | ||
348 | |||
349 | /* | ||
350 | * Enables the port DMA engine and FIS reception. | ||
351 | * | ||
352 | * return value | ||
353 | * None | ||
354 | */ | ||
355 | static inline void mtip_start_port(struct mtip_port *port) | ||
356 | { | ||
357 | /* Enable FIS reception */ | ||
358 | mtip_enable_fis(port, 1); | ||
359 | |||
360 | /* Enable the DMA engine */ | ||
361 | mtip_enable_engine(port, 1); | ||
362 | } | ||
363 | |||
364 | /* | ||
365 | * Deinitialize a port by disabling port interrupts, the DMA engine, | ||
366 | * and FIS reception. | ||
367 | * | ||
368 | * @port Pointer to the port structure | ||
369 | * | ||
370 | * return value | ||
371 | * None | ||
372 | */ | ||
373 | static inline void mtip_deinit_port(struct mtip_port *port) | ||
374 | { | ||
375 | /* Disable interrupts on this port */ | ||
376 | writel(0, port->mmio + PORT_IRQ_MASK); | ||
377 | |||
378 | /* Disable the DMA engine */ | ||
379 | mtip_enable_engine(port, 0); | ||
380 | |||
381 | /* Disable FIS reception */ | ||
382 | mtip_enable_fis(port, 0); | ||
383 | } | ||
384 | |||
385 | /* | ||
386 | * Initialize a port. | ||
387 | * | ||
388 | * This function deinitializes the port by calling mtip_deinit_port() and | ||
389 | * then initializes it by setting the command header and RX FIS addresses, | ||
390 | * clearing the SError register and any pending port interrupts before | ||
391 | * re-enabling the default set of port interrupts. | ||
392 | * | ||
393 | * @port Pointer to the port structure. | ||
394 | * | ||
395 | * return value | ||
396 | * None | ||
397 | */ | ||
398 | static void mtip_init_port(struct mtip_port *port) | ||
399 | { | ||
400 | int i; | ||
401 | mtip_deinit_port(port); | ||
402 | |||
403 | /* Program the command list base and FIS base addresses */ | ||
404 | if (readl(port->dd->mmio + HOST_CAP) & HOST_CAP_64) { | ||
405 | writel((port->command_list_dma >> 16) >> 16, | ||
406 | port->mmio + PORT_LST_ADDR_HI); | ||
407 | writel((port->rxfis_dma >> 16) >> 16, | ||
408 | port->mmio + PORT_FIS_ADDR_HI); | ||
409 | } | ||
410 | |||
411 | writel(port->command_list_dma & 0xFFFFFFFF, | ||
412 | port->mmio + PORT_LST_ADDR); | ||
413 | writel(port->rxfis_dma & 0xFFFFFFFF, port->mmio + PORT_FIS_ADDR); | ||
414 | |||
415 | /* Clear SError */ | ||
416 | writel(readl(port->mmio + PORT_SCR_ERR), port->mmio + PORT_SCR_ERR); | ||
417 | |||
418 | /* reset the completed registers.*/ | ||
419 | for (i = 0; i < port->dd->slot_groups; i++) | ||
420 | writel(0xFFFFFFFF, port->completed[i]); | ||
421 | |||
422 | /* Clear any pending interrupts for this port */ | ||
423 | writel(readl(port->mmio + PORT_IRQ_STAT), port->mmio + PORT_IRQ_STAT); | ||
424 | |||
425 | /* Enable port interrupts */ | ||
426 | writel(DEF_PORT_IRQ, port->mmio + PORT_IRQ_MASK); | ||
427 | } | ||
428 | |||
429 | /* | ||
430 | * Restart a port | ||
431 | * | ||
432 | * @port Pointer to the port data structure. | ||
433 | * | ||
434 | * return value | ||
435 | * None | ||
436 | */ | ||
437 | static void mtip_restart_port(struct mtip_port *port) | ||
438 | { | ||
439 | unsigned long timeout; | ||
440 | |||
441 | /* Disable the DMA engine */ | ||
442 | mtip_enable_engine(port, 0); | ||
443 | |||
444 | /* Chip quirk: wait up to 500ms for PxCMD.CR == 0 */ | ||
445 | timeout = jiffies + msecs_to_jiffies(500); | ||
446 | while ((readl(port->mmio + PORT_CMD) & PORT_CMD_LIST_ON) | ||
447 | && time_before(jiffies, timeout)) | ||
448 | ; | ||
449 | |||
450 | /* | ||
451 | * Chip quirk: escalate to hba reset if | ||
452 | * PxCMD.CR not clear after 500 ms | ||
453 | */ | ||
454 | if (readl(port->mmio + PORT_CMD) & PORT_CMD_LIST_ON) { | ||
455 | dev_warn(&port->dd->pdev->dev, | ||
456 | "PxCMD.CR not clear, escalating reset\n"); | ||
457 | |||
458 | if (hba_reset_nosleep(port->dd)) | ||
459 | dev_err(&port->dd->pdev->dev, | ||
460 | "HBA reset escalation failed.\n"); | ||
461 | |||
462 | /* 30 ms delay before com reset to quiesce chip */ | ||
463 | mdelay(30); | ||
464 | } | ||
465 | |||
466 | dev_warn(&port->dd->pdev->dev, "Issuing COM reset\n"); | ||
467 | |||
468 | /* Set PxSCTL.DET */ | ||
469 | writel(readl(port->mmio + PORT_SCR_CTL) | | ||
470 | 1, port->mmio + PORT_SCR_CTL); | ||
471 | readl(port->mmio + PORT_SCR_CTL); | ||
472 | |||
473 | /* Wait 1 ms to quiesce chip function */ | ||
474 | timeout = jiffies + msecs_to_jiffies(1); | ||
475 | while (time_before(jiffies, timeout)) | ||
476 | ; | ||
477 | |||
478 | /* Clear PxSCTL.DET */ | ||
479 | writel(readl(port->mmio + PORT_SCR_CTL) & ~1, | ||
480 | port->mmio + PORT_SCR_CTL); | ||
481 | readl(port->mmio + PORT_SCR_CTL); | ||
482 | |||
483 | /* Wait 500 ms for bit 0 of PORT_SCR_STS to be set */ | ||
484 | timeout = jiffies + msecs_to_jiffies(500); | ||
485 | while (((readl(port->mmio + PORT_SCR_STAT) & 0x01) == 0) | ||
486 | && time_before(jiffies, timeout)) | ||
487 | ; | ||
488 | |||
489 | if ((readl(port->mmio + PORT_SCR_STAT) & 0x01) == 0) | ||
490 | dev_warn(&port->dd->pdev->dev, | ||
491 | "COM reset failed\n"); | ||
492 | |||
493 | /* Clear SError, the PxSERR.DIAG.x should be set so clear it */ | ||
494 | writel(readl(port->mmio + PORT_SCR_ERR), port->mmio + PORT_SCR_ERR); | ||
495 | |||
496 | /* Enable the DMA engine */ | ||
497 | mtip_enable_engine(port, 1); | ||
498 | } | ||
499 | |||
500 | /* | ||
501 | * Called periodically to see if any read/write commands are | ||
502 | * taking too long to complete. | ||
503 | * | ||
504 | * @data Pointer to the PORT data structure. | ||
505 | * | ||
506 | * return value | ||
507 | * None | ||
508 | */ | ||
509 | static void mtip_timeout_function(unsigned long int data) | ||
510 | { | ||
511 | struct mtip_port *port = (struct mtip_port *) data; | ||
512 | struct host_to_dev_fis *fis; | ||
513 | struct mtip_cmd *command; | ||
514 | int tag, cmdto_cnt = 0; | ||
515 | unsigned int bit, group; | ||
516 | unsigned int num_command_slots = port->dd->slot_groups * 32; | ||
517 | |||
518 | if (unlikely(!port)) | ||
519 | return; | ||
520 | |||
521 | if (atomic_read(&port->dd->resumeflag) == true) { | ||
522 | mod_timer(&port->cmd_timer, | ||
523 | jiffies + msecs_to_jiffies(30000)); | ||
524 | return; | ||
525 | } | ||
526 | |||
527 | for (tag = 0; tag < num_command_slots; tag++) { | ||
528 | /* | ||
529 | * Skip internal command slot as it has | ||
530 | * its own timeout mechanism | ||
531 | */ | ||
532 | if (tag == MTIP_TAG_INTERNAL) | ||
533 | continue; | ||
534 | |||
535 | if (atomic_read(&port->commands[tag].active) && | ||
536 | (time_after(jiffies, port->commands[tag].comp_time))) { | ||
537 | group = tag >> 5; | ||
538 | bit = tag & 0x1F; | ||
539 | |||
540 | command = &port->commands[tag]; | ||
541 | fis = (struct host_to_dev_fis *) command->command; | ||
542 | |||
543 | dev_warn(&port->dd->pdev->dev, | ||
544 | "Timeout for command tag %d\n", tag); | ||
545 | |||
546 | cmdto_cnt++; | ||
547 | if (cmdto_cnt == 1) | ||
548 | set_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags); | ||
549 | |||
550 | /* | ||
551 | * Clear the completed bit. This should prevent | ||
552 | * any interrupt handlers from trying to retire | ||
553 | * the command. | ||
554 | */ | ||
555 | writel(1 << bit, port->completed[group]); | ||
556 | |||
557 | /* Call the async completion callback. */ | ||
558 | if (likely(command->async_callback)) | ||
559 | command->async_callback(command->async_data, | ||
560 | -EIO); | ||
561 | command->async_callback = NULL; | ||
562 | command->comp_func = NULL; | ||
563 | |||
564 | /* Unmap the DMA scatter list entries */ | ||
565 | dma_unmap_sg(&port->dd->pdev->dev, | ||
566 | command->sg, | ||
567 | command->scatter_ents, | ||
568 | command->direction); | ||
569 | |||
570 | /* | ||
571 | * Clear the allocated bit and active tag for the | ||
572 | * command. | ||
573 | */ | ||
574 | atomic_set(&port->commands[tag].active, 0); | ||
575 | release_slot(port, tag); | ||
576 | |||
577 | up(&port->cmd_slot); | ||
578 | } | ||
579 | } | ||
580 | |||
581 | if (cmdto_cnt) { | ||
582 | dev_warn(&port->dd->pdev->dev, | ||
583 | "%d commands timed out: restarting port", | ||
584 | cmdto_cnt); | ||
585 | mtip_restart_port(port); | ||
586 | clear_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags); | ||
587 | wake_up_interruptible(&port->svc_wait); | ||
588 | } | ||
589 | |||
590 | /* Restart the timer */ | ||
591 | mod_timer(&port->cmd_timer, | ||
592 | jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD)); | ||
593 | } | ||
594 | |||
595 | /* | ||
596 | * IO completion function. | ||
597 | * | ||
598 | * This completion function is called by the driver ISR when a | ||
599 | * command that was issued by the kernel completes. It first calls the | ||
600 | * asynchronous completion function which normally calls back into the block | ||
601 | * layer passing the asynchronous callback data, then unmaps the | ||
602 | * scatter list associated with the completed command, and finally | ||
603 | * clears the allocated bit associated with the completed command. | ||
604 | * | ||
605 | * @port Pointer to the port data structure. | ||
606 | * @tag Tag of the command. | ||
607 | * @data Pointer to driver_data. | ||
608 | * @status Completion status. | ||
609 | * | ||
610 | * return value | ||
611 | * None | ||
612 | */ | ||
613 | static void mtip_async_complete(struct mtip_port *port, | ||
614 | int tag, | ||
615 | void *data, | ||
616 | int status) | ||
617 | { | ||
618 | struct mtip_cmd *command; | ||
619 | struct driver_data *dd = data; | ||
620 | int cb_status = status ? -EIO : 0; | ||
621 | |||
622 | if (unlikely(!dd) || unlikely(!port)) | ||
623 | return; | ||
624 | |||
625 | command = &port->commands[tag]; | ||
626 | |||
627 | if (unlikely(status == PORT_IRQ_TF_ERR)) { | ||
628 | dev_warn(&port->dd->pdev->dev, | ||
629 | "Command tag %d failed due to TFE\n", tag); | ||
630 | } | ||
631 | |||
632 | /* Upper layer callback */ | ||
633 | if (likely(command->async_callback)) | ||
634 | command->async_callback(command->async_data, cb_status); | ||
635 | |||
636 | command->async_callback = NULL; | ||
637 | command->comp_func = NULL; | ||
638 | |||
639 | /* Unmap the DMA scatter list entries */ | ||
640 | dma_unmap_sg(&dd->pdev->dev, | ||
641 | command->sg, | ||
642 | command->scatter_ents, | ||
643 | command->direction); | ||
644 | |||
645 | /* Clear the allocated and active bits for the command */ | ||
646 | atomic_set(&port->commands[tag].active, 0); | ||
647 | release_slot(port, tag); | ||
648 | |||
649 | up(&port->cmd_slot); | ||
650 | } | ||
651 | |||
652 | /* | ||
653 | * Internal command completion callback function. | ||
654 | * | ||
655 | * This function is normally called by the driver ISR when an internal | ||
656 | * command completed. This function signals the command completion by | ||
657 | * calling complete(). | ||
658 | * | ||
659 | * @port Pointer to the port data structure. | ||
660 | * @tag Tag of the command that has completed. | ||
661 | * @data Pointer to a completion structure. | ||
662 | * @status Completion status. | ||
663 | * | ||
664 | * return value | ||
665 | * None | ||
666 | */ | ||
667 | static void mtip_completion(struct mtip_port *port, | ||
668 | int tag, | ||
669 | void *data, | ||
670 | int status) | ||
671 | { | ||
672 | struct mtip_cmd *command = &port->commands[tag]; | ||
673 | struct completion *waiting = data; | ||
674 | if (unlikely(status == PORT_IRQ_TF_ERR)) | ||
675 | dev_warn(&port->dd->pdev->dev, | ||
676 | "Internal command %d completed with TFE\n", tag); | ||
677 | |||
678 | command->async_callback = NULL; | ||
679 | command->comp_func = NULL; | ||
680 | |||
681 | complete(waiting); | ||
682 | } | ||
683 | |||
684 | /* | ||
685 | * Helper function for tag logging | ||
686 | */ | ||
687 | static void print_tags(struct driver_data *dd, | ||
688 | char *msg, | ||
689 | unsigned long *tagbits) | ||
690 | { | ||
691 | unsigned int tag, count = 0; | ||
692 | |||
693 | for (tag = 0; tag < (dd->slot_groups) * 32; tag++) { | ||
694 | if (test_bit(tag, tagbits)) | ||
695 | count++; | ||
696 | } | ||
697 | if (count) | ||
698 | dev_info(&dd->pdev->dev, "%s [%i tags]\n", msg, count); | ||
699 | } | ||
700 | |||
701 | /* | ||
702 | * Handle an error. | ||
703 | * | ||
704 | * @dd Pointer to the DRIVER_DATA structure. | ||
705 | * | ||
706 | * return value | ||
707 | * None | ||
708 | */ | ||
709 | static void mtip_handle_tfe(struct driver_data *dd) | ||
710 | { | ||
711 | int group, tag, bit, reissue; | ||
712 | struct mtip_port *port; | ||
713 | struct mtip_cmd *command; | ||
714 | u32 completed; | ||
715 | struct host_to_dev_fis *fis; | ||
716 | unsigned long tagaccum[SLOTBITS_IN_LONGS]; | ||
717 | |||
718 | dev_warn(&dd->pdev->dev, "Taskfile error\n"); | ||
719 | |||
720 | port = dd->port; | ||
721 | |||
722 | /* Stop the timer to prevent command timeouts. */ | ||
723 | del_timer(&port->cmd_timer); | ||
724 | |||
725 | /* Set eh_active */ | ||
726 | set_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags); | ||
727 | |||
728 | /* Loop through all the groups */ | ||
729 | for (group = 0; group < dd->slot_groups; group++) { | ||
730 | completed = readl(port->completed[group]); | ||
731 | |||
732 | /* clear completed status register in the hardware.*/ | ||
733 | writel(completed, port->completed[group]); | ||
734 | |||
735 | /* clear the tag accumulator */ | ||
736 | memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long)); | ||
737 | |||
738 | /* Process successfully completed commands */ | ||
739 | for (bit = 0; bit < 32 && completed; bit++) { | ||
740 | if (!(completed & (1<<bit))) | ||
741 | continue; | ||
742 | tag = (group << 5) + bit; | ||
743 | |||
744 | /* Skip the internal command slot */ | ||
745 | if (tag == MTIP_TAG_INTERNAL) | ||
746 | continue; | ||
747 | |||
748 | command = &port->commands[tag]; | ||
749 | if (likely(command->comp_func)) { | ||
750 | set_bit(tag, tagaccum); | ||
751 | atomic_set(&port->commands[tag].active, 0); | ||
752 | command->comp_func(port, | ||
753 | tag, | ||
754 | command->comp_data, | ||
755 | 0); | ||
756 | } else { | ||
757 | dev_err(&port->dd->pdev->dev, | ||
758 | "Missing completion func for tag %d", | ||
759 | tag); | ||
760 | if (mtip_check_surprise_removal(dd->pdev)) { | ||
761 | mtip_command_cleanup(dd); | ||
762 | /* don't proceed further */ | ||
763 | return; | ||
764 | } | ||
765 | } | ||
766 | } | ||
767 | } | ||
768 | print_tags(dd, "TFE tags completed:", tagaccum); | ||
769 | |||
770 | /* Restart the port */ | ||
771 | mdelay(20); | ||
772 | mtip_restart_port(port); | ||
773 | |||
774 | /* clear the tag accumulator */ | ||
775 | memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long)); | ||
776 | |||
777 | /* Loop through all the groups */ | ||
778 | for (group = 0; group < dd->slot_groups; group++) { | ||
779 | for (bit = 0; bit < 32; bit++) { | ||
780 | reissue = 1; | ||
781 | tag = (group << 5) + bit; | ||
782 | |||
783 | /* If the active bit is set re-issue the command */ | ||
784 | if (atomic_read(&port->commands[tag].active) == 0) | ||
785 | continue; | ||
786 | |||
787 | fis = (struct host_to_dev_fis *) | ||
788 | port->commands[tag].command; | ||
789 | |||
790 | /* Should re-issue? */ | ||
791 | if (tag == MTIP_TAG_INTERNAL || | ||
792 | fis->command == ATA_CMD_SET_FEATURES) | ||
793 | reissue = 0; | ||
794 | |||
795 | /* | ||
796 | * First check if this command has | ||
797 | * exceeded its retries. | ||
798 | */ | ||
799 | if (reissue && | ||
800 | (port->commands[tag].retries-- > 0)) { | ||
801 | |||
802 | set_bit(tag, tagaccum); | ||
803 | |||
804 | /* Update the timeout value. */ | ||
805 | port->commands[tag].comp_time = | ||
806 | jiffies + msecs_to_jiffies( | ||
807 | MTIP_NCQ_COMMAND_TIMEOUT_MS); | ||
808 | /* Re-issue the command. */ | ||
809 | mtip_issue_ncq_command(port, tag); | ||
810 | |||
811 | continue; | ||
812 | } | ||
813 | |||
814 | /* Retire a command that will not be reissued */ | ||
815 | dev_warn(&port->dd->pdev->dev, | ||
816 | "retiring tag %d\n", tag); | ||
817 | atomic_set(&port->commands[tag].active, 0); | ||
818 | |||
819 | if (port->commands[tag].comp_func) | ||
820 | port->commands[tag].comp_func( | ||
821 | port, | ||
822 | tag, | ||
823 | port->commands[tag].comp_data, | ||
824 | PORT_IRQ_TF_ERR); | ||
825 | else | ||
826 | dev_warn(&port->dd->pdev->dev, | ||
827 | "Bad completion for tag %d\n", | ||
828 | tag); | ||
829 | } | ||
830 | } | ||
831 | print_tags(dd, "TFE tags reissued:", tagaccum); | ||
832 | |||
833 | /* clear eh_active */ | ||
834 | clear_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags); | ||
835 | wake_up_interruptible(&port->svc_wait); | ||
836 | |||
837 | mod_timer(&port->cmd_timer, | ||
838 | jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD)); | ||
839 | } | ||
840 | |||
841 | /* | ||
842 | * Handle a set device bits interrupt | ||
843 | */ | ||
844 | static inline void mtip_process_sdbf(struct driver_data *dd) | ||
845 | { | ||
846 | struct mtip_port *port = dd->port; | ||
847 | int group, tag, bit; | ||
848 | u32 completed; | ||
849 | struct mtip_cmd *command; | ||
850 | |||
851 | /* walk all bits in all slot groups */ | ||
852 | for (group = 0; group < dd->slot_groups; group++) { | ||
853 | completed = readl(port->completed[group]); | ||
854 | |||
855 | /* clear completed status register in the hardware.*/ | ||
856 | writel(completed, port->completed[group]); | ||
857 | |||
858 | /* Process completed commands. */ | ||
859 | for (bit = 0; | ||
860 | (bit < 32) && completed; | ||
861 | bit++, completed >>= 1) { | ||
862 | if (completed & 0x01) { | ||
863 | tag = (group << 5) | bit; | ||
864 | |||
865 | /* skip internal command slot. */ | ||
866 | if (unlikely(tag == MTIP_TAG_INTERNAL)) | ||
867 | continue; | ||
868 | |||
869 | command = &port->commands[tag]; | ||
870 | /* make internal callback */ | ||
871 | if (likely(command->comp_func)) { | ||
872 | command->comp_func( | ||
873 | port, | ||
874 | tag, | ||
875 | command->comp_data, | ||
876 | 0); | ||
877 | } else { | ||
878 | dev_warn(&dd->pdev->dev, | ||
879 | "Null completion " | ||
880 | "for tag %d", | ||
881 | tag); | ||
882 | |||
883 | if (mtip_check_surprise_removal( | ||
884 | dd->pdev)) { | ||
885 | mtip_command_cleanup(dd); | ||
886 | return; | ||
887 | } | ||
888 | } | ||
889 | } | ||
890 | } | ||
891 | } | ||
892 | } | ||
893 | |||
894 | /* | ||
895 | * Process legacy pio and d2h interrupts | ||
896 | */ | ||
897 | static inline void mtip_process_legacy(struct driver_data *dd, u32 port_stat) | ||
898 | { | ||
899 | struct mtip_port *port = dd->port; | ||
900 | struct mtip_cmd *cmd = &port->commands[MTIP_TAG_INTERNAL]; | ||
901 | |||
902 | if (test_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags) && | ||
903 | (cmd != NULL) && !(readl(port->cmd_issue[MTIP_TAG_INTERNAL]) | ||
904 | & (1 << MTIP_TAG_INTERNAL))) { | ||
905 | if (cmd->comp_func) { | ||
906 | cmd->comp_func(port, | ||
907 | MTIP_TAG_INTERNAL, | ||
908 | cmd->comp_data, | ||
909 | 0); | ||
910 | return; | ||
911 | } | ||
912 | } | ||
913 | |||
914 | dev_warn(&dd->pdev->dev, "IRQ status 0x%x ignored.\n", port_stat); | ||
915 | |||
916 | return; | ||
917 | } | ||
918 | |||
919 | /* | ||
920 | * Demux and handle errors | ||
921 | */ | ||
922 | static inline void mtip_process_errors(struct driver_data *dd, u32 port_stat) | ||
923 | { | ||
924 | if (likely(port_stat & (PORT_IRQ_TF_ERR | PORT_IRQ_IF_ERR))) | ||
925 | mtip_handle_tfe(dd); | ||
926 | |||
927 | if (unlikely(port_stat & PORT_IRQ_CONNECT)) { | ||
928 | dev_warn(&dd->pdev->dev, | ||
929 | "Clearing PxSERR.DIAG.x\n"); | ||
930 | writel((1 << 26), dd->port->mmio + PORT_SCR_ERR); | ||
931 | } | ||
932 | |||
933 | if (unlikely(port_stat & PORT_IRQ_PHYRDY)) { | ||
934 | dev_warn(&dd->pdev->dev, | ||
935 | "Clearing PxSERR.DIAG.n\n"); | ||
936 | writel((1 << 16), dd->port->mmio + PORT_SCR_ERR); | ||
937 | } | ||
938 | |||
939 | if (unlikely(port_stat & ~PORT_IRQ_HANDLED)) { | ||
940 | dev_warn(&dd->pdev->dev, | ||
941 | "Port stat errors %x unhandled\n", | ||
942 | (port_stat & ~PORT_IRQ_HANDLED)); | ||
943 | } | ||
944 | } | ||
945 | |||
946 | static inline irqreturn_t mtip_handle_irq(struct driver_data *data) | ||
947 | { | ||
948 | struct driver_data *dd = (struct driver_data *) data; | ||
949 | struct mtip_port *port = dd->port; | ||
950 | u32 hba_stat, port_stat; | ||
951 | int rv = IRQ_NONE; | ||
952 | |||
953 | hba_stat = readl(dd->mmio + HOST_IRQ_STAT); | ||
954 | if (hba_stat) { | ||
955 | rv = IRQ_HANDLED; | ||
956 | |||
957 | /* Acknowledge the interrupt status on the port.*/ | ||
958 | port_stat = readl(port->mmio + PORT_IRQ_STAT); | ||
959 | writel(port_stat, port->mmio + PORT_IRQ_STAT); | ||
960 | |||
961 | /* Demux port status */ | ||
962 | if (likely(port_stat & PORT_IRQ_SDB_FIS)) | ||
963 | mtip_process_sdbf(dd); | ||
964 | |||
965 | if (unlikely(port_stat & PORT_IRQ_ERR)) { | ||
966 | if (unlikely(mtip_check_surprise_removal(dd->pdev))) { | ||
967 | mtip_command_cleanup(dd); | ||
968 | /* don't proceed further */ | ||
969 | return IRQ_HANDLED; | ||
970 | } | ||
971 | |||
972 | mtip_process_errors(dd, port_stat & PORT_IRQ_ERR); | ||
973 | } | ||
974 | |||
975 | if (unlikely(port_stat & PORT_IRQ_LEGACY)) | ||
976 | mtip_process_legacy(dd, port_stat & PORT_IRQ_LEGACY); | ||
977 | } | ||
978 | |||
979 | /* acknowledge interrupt */ | ||
980 | writel(hba_stat, dd->mmio + HOST_IRQ_STAT); | ||
981 | |||
982 | return rv; | ||
983 | } | ||
984 | |||
985 | /* | ||
986 | * Wrapper for mtip_handle_irq | ||
987 | * (ignores return code) | ||
988 | */ | ||
989 | static void mtip_tasklet(unsigned long data) | ||
990 | { | ||
991 | mtip_handle_irq((struct driver_data *) data); | ||
992 | } | ||
993 | |||
994 | /* | ||
995 | * HBA interrupt subroutine. | ||
996 | * | ||
997 | * @irq IRQ number. | ||
998 | * @instance Pointer to the driver data structure. | ||
999 | * | ||
1000 | * return value | ||
1001 | * IRQ_HANDLED A HBA interrupt was pending and handled. | ||
1002 | * IRQ_NONE This interrupt was not for the HBA. | ||
1003 | */ | ||
1004 | static irqreturn_t mtip_irq_handler(int irq, void *instance) | ||
1005 | { | ||
1006 | struct driver_data *dd = instance; | ||
1007 | tasklet_schedule(&dd->tasklet); | ||
1008 | return IRQ_HANDLED; | ||
1009 | } | ||
1010 | |||
1011 | static void mtip_issue_non_ncq_command(struct mtip_port *port, int tag) | ||
1012 | { | ||
1013 | atomic_set(&port->commands[tag].active, 1); | ||
1014 | writel(1 << MTIP_TAG_BIT(tag), | ||
1015 | port->cmd_issue[MTIP_TAG_INDEX(tag)]); | ||
1016 | } | ||
1017 | |||
1018 | /* | ||
1019 | * Wait for port to quiesce | ||
1020 | * | ||
1021 | * @port Pointer to port data structure | ||
1022 | * @timeout Max duration to wait (ms) | ||
1023 | * | ||
1024 | * return value | ||
1025 | * 0 Success | ||
1026 | * -EBUSY Commands still active | ||
1027 | */ | ||
1028 | static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout) | ||
1029 | { | ||
1030 | unsigned long to; | ||
1031 | unsigned int n; | ||
1032 | unsigned int active = 1; | ||
1033 | |||
1034 | to = jiffies + msecs_to_jiffies(timeout); | ||
1035 | do { | ||
1036 | if (test_bit(MTIP_FLAG_SVC_THD_ACTIVE_BIT, &port->flags) && | ||
1037 | test_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags)) { | ||
1038 | msleep(20); | ||
1039 | continue; /* svc thd is actively issuing commands */ | ||
1040 | } | ||
1041 | /* | ||
1042 | * Ignore s_active bit 0 of array element 0. | ||
1043 | * This bit will always be set | ||
1044 | */ | ||
1045 | active = readl(port->s_active[0]) & 0xFFFFFFFE; | ||
1046 | for (n = 1; n < port->dd->slot_groups; n++) | ||
1047 | active |= readl(port->s_active[n]); | ||
1048 | |||
1049 | if (!active) | ||
1050 | break; | ||
1051 | |||
1052 | msleep(20); | ||
1053 | } while (time_before(jiffies, to)); | ||
1054 | |||
1055 | return active ? -EBUSY : 0; | ||
1056 | } | ||
1057 | |||
1058 | /* | ||
1059 | * Execute an internal command and wait for the completion. | ||
1060 | * | ||
1061 | * @port Pointer to the port data structure. | ||
1062 | * @fis Pointer to the FIS that describes the command. | ||
1063 | * @fis_len Length in WORDS of the FIS. | ||
1064 | * @buffer DMA accessible for command data. | ||
1065 | * @buf_len Length, in bytes, of the data buffer. | ||
1066 | * @opts Command header options, excluding the FIS length | ||
1067 | * and the number of PRD entries. | ||
1068 | * @timeout Time in ms to wait for the command to complete. | ||
1069 | * | ||
1070 | * return value | ||
1071 | * 0 Command completed successfully. | ||
1072 | * -EFAULT The buffer address is not correctly aligned. | ||
1073 | * -EBUSY Internal command or other IO in progress. | ||
1074 | * -EAGAIN Time out waiting for command to complete. | ||
1075 | */ | ||
1076 | static int mtip_exec_internal_command(struct mtip_port *port, | ||
1077 | void *fis, | ||
1078 | int fis_len, | ||
1079 | dma_addr_t buffer, | ||
1080 | int buf_len, | ||
1081 | u32 opts, | ||
1082 | gfp_t atomic, | ||
1083 | unsigned long timeout) | ||
1084 | { | ||
1085 | struct mtip_cmd_sg *command_sg; | ||
1086 | DECLARE_COMPLETION_ONSTACK(wait); | ||
1087 | int rv = 0; | ||
1088 | struct mtip_cmd *int_cmd = &port->commands[MTIP_TAG_INTERNAL]; | ||
1089 | |||
1090 | /* Make sure the buffer is 8 byte aligned. This is asic specific. */ | ||
1091 | if (buffer & 0x00000007) { | ||
1092 | dev_err(&port->dd->pdev->dev, | ||
1093 | "SG buffer is not 8 byte aligned\n"); | ||
1094 | return -EFAULT; | ||
1095 | } | ||
1096 | |||
1097 | /* Only one internal command should be running at a time */ | ||
1098 | if (test_and_set_bit(MTIP_TAG_INTERNAL, port->allocated)) { | ||
1099 | dev_warn(&port->dd->pdev->dev, | ||
1100 | "Internal command already active\n"); | ||
1101 | return -EBUSY; | ||
1102 | } | ||
1103 | set_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags); | ||
1104 | |||
1105 | if (atomic == GFP_KERNEL) { | ||
1106 | /* wait for io to complete if non atomic */ | ||
1107 | if (mtip_quiesce_io(port, 5000) < 0) { | ||
1108 | dev_warn(&port->dd->pdev->dev, | ||
1109 | "Failed to quiesce IO\n"); | ||
1110 | release_slot(port, MTIP_TAG_INTERNAL); | ||
1111 | clear_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags); | ||
1112 | wake_up_interruptible(&port->svc_wait); | ||
1113 | return -EBUSY; | ||
1114 | } | ||
1115 | |||
1116 | /* Set the completion function and data for the command. */ | ||
1117 | int_cmd->comp_data = &wait; | ||
1118 | int_cmd->comp_func = mtip_completion; | ||
1119 | |||
1120 | } else { | ||
1121 | /* Clear completion - we're going to poll */ | ||
1122 | int_cmd->comp_data = NULL; | ||
1123 | int_cmd->comp_func = NULL; | ||
1124 | } | ||
1125 | |||
1126 | /* Copy the command to the command table */ | ||
1127 | memcpy(int_cmd->command, fis, fis_len*4); | ||
1128 | |||
1129 | /* Populate the SG list */ | ||
1130 | int_cmd->command_header->opts = | ||
1131 | __force_bit2int cpu_to_le32(opts | fis_len); | ||
1132 | if (buf_len) { | ||
1133 | command_sg = int_cmd->command + AHCI_CMD_TBL_HDR_SZ; | ||
1134 | |||
1135 | command_sg->info = | ||
1136 | __force_bit2int cpu_to_le32((buf_len-1) & 0x3FFFFF); | ||
1137 | command_sg->dba = | ||
1138 | __force_bit2int cpu_to_le32(buffer & 0xFFFFFFFF); | ||
1139 | command_sg->dba_upper = | ||
1140 | __force_bit2int cpu_to_le32((buffer >> 16) >> 16); | ||
1141 | |||
1142 | int_cmd->command_header->opts |= | ||
1143 | __force_bit2int cpu_to_le32((1 << 16)); | ||
1144 | } | ||
1145 | |||
1146 | /* Populate the command header */ | ||
1147 | int_cmd->command_header->byte_count = 0; | ||
1148 | |||
1149 | /* Issue the command to the hardware */ | ||
1150 | mtip_issue_non_ncq_command(port, MTIP_TAG_INTERNAL); | ||
1151 | |||
1152 | /* Poll if atomic, wait_for_completion otherwise */ | ||
1153 | if (atomic == GFP_KERNEL) { | ||
1154 | /* Wait for the command to complete or timeout. */ | ||
1155 | if (wait_for_completion_timeout( | ||
1156 | &wait, | ||
1157 | msecs_to_jiffies(timeout)) == 0) { | ||
1158 | dev_err(&port->dd->pdev->dev, | ||
1159 | "Internal command did not complete [%d] " | ||
1160 | "within timeout of %lu ms\n", | ||
1161 | atomic, timeout); | ||
1162 | rv = -EAGAIN; | ||
1163 | } | ||
1164 | |||
1165 | if (readl(port->cmd_issue[MTIP_TAG_INTERNAL]) | ||
1166 | & (1 << MTIP_TAG_INTERNAL)) { | ||
1167 | dev_warn(&port->dd->pdev->dev, | ||
1168 | "Retiring internal command but CI is 1.\n"); | ||
1169 | } | ||
1170 | |||
1171 | } else { | ||
1172 | /* Spin for <timeout> checking if command still outstanding */ | ||
1173 | timeout = jiffies + msecs_to_jiffies(timeout); | ||
1174 | |||
1175 | while ((readl( | ||
1176 | port->cmd_issue[MTIP_TAG_INTERNAL]) | ||
1177 | & (1 << MTIP_TAG_INTERNAL)) | ||
1178 | && time_before(jiffies, timeout)) | ||
1179 | ; | ||
1180 | |||
1181 | if (readl(port->cmd_issue[MTIP_TAG_INTERNAL]) | ||
1182 | & (1 << MTIP_TAG_INTERNAL)) { | ||
1183 | dev_err(&port->dd->pdev->dev, | ||
1184 | "Internal command did not complete [%d]\n", | ||
1185 | atomic); | ||
1186 | rv = -EAGAIN; | ||
1187 | } | ||
1188 | } | ||
1189 | |||
1190 | /* Clear the allocated and active bits for the internal command. */ | ||
1191 | atomic_set(&int_cmd->active, 0); | ||
1192 | release_slot(port, MTIP_TAG_INTERNAL); | ||
1193 | clear_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags); | ||
1194 | wake_up_interruptible(&port->svc_wait); | ||
1195 | |||
1196 | return rv; | ||
1197 | } | ||
1198 | |||
1199 | /* | ||
1200 | * Byte-swap ATA ID strings. | ||
1201 | * | ||
1202 | * ATA identify data contains strings in byte-swapped 16-bit words. | ||
1203 | * They must be swapped (on all architectures) to be usable as C strings. | ||
1204 | * This function swaps bytes in-place. | ||
1205 | * | ||
1206 | * @buf The buffer location of the string | ||
1207 | * @len The number of bytes to swap | ||
1208 | * | ||
1209 | * return value | ||
1210 | * None | ||
1211 | */ | ||
1212 | static inline void ata_swap_string(u16 *buf, unsigned int len) | ||
1213 | { | ||
1214 | int i; | ||
1215 | for (i = 0; i < (len/2); i++) | ||
1216 | be16_to_cpus(&buf[i]); | ||
1217 | } | ||
1218 | |||
1219 | /* | ||
1220 | * Request the device identity information. | ||
1221 | * | ||
1222 | * If a user space buffer is not specified, i.e. is NULL, the | ||
1223 | * identify information is still read from the drive and placed | ||
1224 | * into the identify data buffer (@e port->identify) in the | ||
1225 | * port data structure. | ||
1226 | * When the identify buffer contains valid identify information @e | ||
1227 | * port->identify_valid is non-zero. | ||
1228 | * | ||
1229 | * @port Pointer to the port structure. | ||
1230 | * @user_buffer A user space buffer where the identify data should be | ||
1231 | * copied. | ||
1232 | * | ||
1233 | * return value | ||
1234 | * 0 Command completed successfully. | ||
1235 | * -EFAULT An error occurred while coping data to the user buffer. | ||
1236 | * -1 Command failed. | ||
1237 | */ | ||
1238 | static int mtip_get_identify(struct mtip_port *port, void __user *user_buffer) | ||
1239 | { | ||
1240 | int rv = 0; | ||
1241 | struct host_to_dev_fis fis; | ||
1242 | |||
1243 | /* Build the FIS. */ | ||
1244 | memset(&fis, 0, sizeof(struct host_to_dev_fis)); | ||
1245 | fis.type = 0x27; | ||
1246 | fis.opts = 1 << 7; | ||
1247 | fis.command = ATA_CMD_ID_ATA; | ||
1248 | |||
1249 | /* Set the identify information as invalid. */ | ||
1250 | port->identify_valid = 0; | ||
1251 | |||
1252 | /* Clear the identify information. */ | ||
1253 | memset(port->identify, 0, sizeof(u16) * ATA_ID_WORDS); | ||
1254 | |||
1255 | /* Execute the command. */ | ||
1256 | if (mtip_exec_internal_command(port, | ||
1257 | &fis, | ||
1258 | 5, | ||
1259 | port->identify_dma, | ||
1260 | sizeof(u16) * ATA_ID_WORDS, | ||
1261 | 0, | ||
1262 | GFP_KERNEL, | ||
1263 | MTIP_INTERNAL_COMMAND_TIMEOUT_MS) | ||
1264 | < 0) { | ||
1265 | rv = -1; | ||
1266 | goto out; | ||
1267 | } | ||
1268 | |||
1269 | /* | ||
1270 | * Perform any necessary byte-swapping. Yes, the kernel does in fact | ||
1271 | * perform field-sensitive swapping on the string fields. | ||
1272 | * See the kernel use of ata_id_string() for proof of this. | ||
1273 | */ | ||
1274 | #ifdef __LITTLE_ENDIAN | ||
1275 | ata_swap_string(port->identify + 27, 40); /* model string*/ | ||
1276 | ata_swap_string(port->identify + 23, 8); /* firmware string*/ | ||
1277 | ata_swap_string(port->identify + 10, 20); /* serial# string*/ | ||
1278 | #else | ||
1279 | { | ||
1280 | int i; | ||
1281 | for (i = 0; i < ATA_ID_WORDS; i++) | ||
1282 | port->identify[i] = le16_to_cpu(port->identify[i]); | ||
1283 | } | ||
1284 | #endif | ||
1285 | |||
1286 | /* Set the identify buffer as valid. */ | ||
1287 | port->identify_valid = 1; | ||
1288 | |||
1289 | if (user_buffer) { | ||
1290 | if (copy_to_user( | ||
1291 | user_buffer, | ||
1292 | port->identify, | ||
1293 | ATA_ID_WORDS * sizeof(u16))) { | ||
1294 | rv = -EFAULT; | ||
1295 | goto out; | ||
1296 | } | ||
1297 | } | ||
1298 | |||
1299 | out: | ||
1300 | return rv; | ||
1301 | } | ||
1302 | |||
1303 | /* | ||
1304 | * Issue a standby immediate command to the device. | ||
1305 | * | ||
1306 | * @port Pointer to the port structure. | ||
1307 | * | ||
1308 | * return value | ||
1309 | * 0 Command was executed successfully. | ||
1310 | * -1 An error occurred while executing the command. | ||
1311 | */ | ||
1312 | static int mtip_standby_immediate(struct mtip_port *port) | ||
1313 | { | ||
1314 | int rv; | ||
1315 | struct host_to_dev_fis fis; | ||
1316 | |||
1317 | /* Build the FIS. */ | ||
1318 | memset(&fis, 0, sizeof(struct host_to_dev_fis)); | ||
1319 | fis.type = 0x27; | ||
1320 | fis.opts = 1 << 7; | ||
1321 | fis.command = ATA_CMD_STANDBYNOW1; | ||
1322 | |||
1323 | /* Execute the command. Use a 15-second timeout for large drives. */ | ||
1324 | rv = mtip_exec_internal_command(port, | ||
1325 | &fis, | ||
1326 | 5, | ||
1327 | 0, | ||
1328 | 0, | ||
1329 | 0, | ||
1330 | GFP_KERNEL, | ||
1331 | 15000); | ||
1332 | |||
1333 | return rv; | ||
1334 | } | ||
1335 | |||
1336 | /* | ||
1337 | * Get the drive capacity. | ||
1338 | * | ||
1339 | * @dd Pointer to the device data structure. | ||
1340 | * @sectors Pointer to the variable that will receive the sector count. | ||
1341 | * | ||
1342 | * return value | ||
1343 | * 1 Capacity was returned successfully. | ||
1344 | * 0 The identify information is invalid. | ||
1345 | */ | ||
1346 | static bool mtip_hw_get_capacity(struct driver_data *dd, sector_t *sectors) | ||
1347 | { | ||
1348 | struct mtip_port *port = dd->port; | ||
1349 | u64 total, raw0, raw1, raw2, raw3; | ||
1350 | raw0 = port->identify[100]; | ||
1351 | raw1 = port->identify[101]; | ||
1352 | raw2 = port->identify[102]; | ||
1353 | raw3 = port->identify[103]; | ||
1354 | total = raw0 | raw1<<16 | raw2<<32 | raw3<<48; | ||
1355 | *sectors = total; | ||
1356 | return (bool) !!port->identify_valid; | ||
1357 | } | ||
1358 | |||
1359 | /* | ||
1360 | * Reset the HBA. | ||
1361 | * | ||
1362 | * Resets the HBA by setting the HBA Reset bit in the Global | ||
1363 | * HBA Control register. After setting the HBA Reset bit the | ||
1364 | * function waits for 1 second before reading the HBA Reset | ||
1365 | * bit to make sure it has cleared. If HBA Reset is not clear | ||
1366 | * an error is returned. Cannot be used in non-blockable | ||
1367 | * context. | ||
1368 | * | ||
1369 | * @dd Pointer to the driver data structure. | ||
1370 | * | ||
1371 | * return value | ||
1372 | * 0 The reset was successful. | ||
1373 | * -1 The HBA Reset bit did not clear. | ||
1374 | */ | ||
1375 | static int mtip_hba_reset(struct driver_data *dd) | ||
1376 | { | ||
1377 | mtip_deinit_port(dd->port); | ||
1378 | |||
1379 | /* Set the reset bit */ | ||
1380 | writel(HOST_RESET, dd->mmio + HOST_CTL); | ||
1381 | |||
1382 | /* Flush */ | ||
1383 | readl(dd->mmio + HOST_CTL); | ||
1384 | |||
1385 | /* Wait for reset to clear */ | ||
1386 | ssleep(1); | ||
1387 | |||
1388 | /* Check the bit has cleared */ | ||
1389 | if (readl(dd->mmio + HOST_CTL) & HOST_RESET) { | ||
1390 | dev_err(&dd->pdev->dev, | ||
1391 | "Reset bit did not clear.\n"); | ||
1392 | return -1; | ||
1393 | } | ||
1394 | |||
1395 | return 0; | ||
1396 | } | ||
1397 | |||
1398 | /* | ||
1399 | * Display the identify command data. | ||
1400 | * | ||
1401 | * @port Pointer to the port data structure. | ||
1402 | * | ||
1403 | * return value | ||
1404 | * None | ||
1405 | */ | ||
1406 | static void mtip_dump_identify(struct mtip_port *port) | ||
1407 | { | ||
1408 | sector_t sectors; | ||
1409 | unsigned short revid; | ||
1410 | char cbuf[42]; | ||
1411 | |||
1412 | if (!port->identify_valid) | ||
1413 | return; | ||
1414 | |||
1415 | strlcpy(cbuf, (char *)(port->identify+10), 21); | ||
1416 | dev_info(&port->dd->pdev->dev, | ||
1417 | "Serial No.: %s\n", cbuf); | ||
1418 | |||
1419 | strlcpy(cbuf, (char *)(port->identify+23), 9); | ||
1420 | dev_info(&port->dd->pdev->dev, | ||
1421 | "Firmware Ver.: %s\n", cbuf); | ||
1422 | |||
1423 | strlcpy(cbuf, (char *)(port->identify+27), 41); | ||
1424 | dev_info(&port->dd->pdev->dev, "Model: %s\n", cbuf); | ||
1425 | |||
1426 | if (mtip_hw_get_capacity(port->dd, §ors)) | ||
1427 | dev_info(&port->dd->pdev->dev, | ||
1428 | "Capacity: %llu sectors (%llu MB)\n", | ||
1429 | (u64)sectors, | ||
1430 | ((u64)sectors) * ATA_SECT_SIZE >> 20); | ||
1431 | |||
1432 | pci_read_config_word(port->dd->pdev, PCI_REVISION_ID, &revid); | ||
1433 | switch (revid & 0xFF) { | ||
1434 | case 0x1: | ||
1435 | strlcpy(cbuf, "A0", 3); | ||
1436 | break; | ||
1437 | case 0x3: | ||
1438 | strlcpy(cbuf, "A2", 3); | ||
1439 | break; | ||
1440 | default: | ||
1441 | strlcpy(cbuf, "?", 2); | ||
1442 | break; | ||
1443 | } | ||
1444 | dev_info(&port->dd->pdev->dev, | ||
1445 | "Card Type: %s\n", cbuf); | ||
1446 | } | ||
1447 | |||
1448 | /* | ||
1449 | * Map the commands scatter list into the command table. | ||
1450 | * | ||
1451 | * @command Pointer to the command. | ||
1452 | * @nents Number of scatter list entries. | ||
1453 | * | ||
1454 | * return value | ||
1455 | * None | ||
1456 | */ | ||
1457 | static inline void fill_command_sg(struct driver_data *dd, | ||
1458 | struct mtip_cmd *command, | ||
1459 | int nents) | ||
1460 | { | ||
1461 | int n; | ||
1462 | unsigned int dma_len; | ||
1463 | struct mtip_cmd_sg *command_sg; | ||
1464 | struct scatterlist *sg = command->sg; | ||
1465 | |||
1466 | command_sg = command->command + AHCI_CMD_TBL_HDR_SZ; | ||
1467 | |||
1468 | for (n = 0; n < nents; n++) { | ||
1469 | dma_len = sg_dma_len(sg); | ||
1470 | if (dma_len > 0x400000) | ||
1471 | dev_err(&dd->pdev->dev, | ||
1472 | "DMA segment length truncated\n"); | ||
1473 | command_sg->info = __force_bit2int | ||
1474 | cpu_to_le32((dma_len-1) & 0x3FFFFF); | ||
1475 | command_sg->dba = __force_bit2int | ||
1476 | cpu_to_le32(sg_dma_address(sg)); | ||
1477 | command_sg->dba_upper = __force_bit2int | ||
1478 | cpu_to_le32((sg_dma_address(sg) >> 16) >> 16); | ||
1479 | command_sg++; | ||
1480 | sg++; | ||
1481 | } | ||
1482 | } | ||
1483 | |||
1484 | /* | ||
1485 | * @brief Execute a drive command. | ||
1486 | * | ||
1487 | * return value 0 The command completed successfully. | ||
1488 | * return value -1 An error occurred while executing the command. | ||
1489 | */ | ||
1490 | static int exec_drive_task(struct mtip_port *port, u8 *command) | ||
1491 | { | ||
1492 | struct host_to_dev_fis fis; | ||
1493 | struct host_to_dev_fis *reply = (port->rxfis + RX_FIS_D2H_REG); | ||
1494 | |||
1495 | /* Build the FIS. */ | ||
1496 | memset(&fis, 0, sizeof(struct host_to_dev_fis)); | ||
1497 | fis.type = 0x27; | ||
1498 | fis.opts = 1 << 7; | ||
1499 | fis.command = command[0]; | ||
1500 | fis.features = command[1]; | ||
1501 | fis.sect_count = command[2]; | ||
1502 | fis.sector = command[3]; | ||
1503 | fis.cyl_low = command[4]; | ||
1504 | fis.cyl_hi = command[5]; | ||
1505 | fis.device = command[6] & ~0x10; /* Clear the dev bit*/ | ||
1506 | |||
1507 | |||
1508 | dbg_printk(MTIP_DRV_NAME "%s: User Command: cmd %x, feat %x, " | ||
1509 | "nsect %x, sect %x, lcyl %x, " | ||
1510 | "hcyl %x, sel %x\n", | ||
1511 | __func__, | ||
1512 | command[0], | ||
1513 | command[1], | ||
1514 | command[2], | ||
1515 | command[3], | ||
1516 | command[4], | ||
1517 | command[5], | ||
1518 | command[6]); | ||
1519 | |||
1520 | /* Execute the command. */ | ||
1521 | if (mtip_exec_internal_command(port, | ||
1522 | &fis, | ||
1523 | 5, | ||
1524 | 0, | ||
1525 | 0, | ||
1526 | 0, | ||
1527 | GFP_KERNEL, | ||
1528 | MTIP_IOCTL_COMMAND_TIMEOUT_MS) < 0) { | ||
1529 | return -1; | ||
1530 | } | ||
1531 | |||
1532 | command[0] = reply->command; /* Status*/ | ||
1533 | command[1] = reply->features; /* Error*/ | ||
1534 | command[4] = reply->cyl_low; | ||
1535 | command[5] = reply->cyl_hi; | ||
1536 | |||
1537 | dbg_printk(MTIP_DRV_NAME "%s: Completion Status: stat %x, " | ||
1538 | "err %x , cyl_lo %x cyl_hi %x\n", | ||
1539 | __func__, | ||
1540 | command[0], | ||
1541 | command[1], | ||
1542 | command[4], | ||
1543 | command[5]); | ||
1544 | |||
1545 | return 0; | ||
1546 | } | ||
1547 | |||
1548 | /* | ||
1549 | * @brief Execute a drive command. | ||
1550 | * | ||
1551 | * @param port Pointer to the port data structure. | ||
1552 | * @param command Pointer to the user specified command parameters. | ||
1553 | * @param user_buffer Pointer to the user space buffer where read sector | ||
1554 | * data should be copied. | ||
1555 | * | ||
1556 | * return value 0 The command completed successfully. | ||
1557 | * return value -EFAULT An error occurred while copying the completion | ||
1558 | * data to the user space buffer. | ||
1559 | * return value -1 An error occurred while executing the command. | ||
1560 | */ | ||
1561 | static int exec_drive_command(struct mtip_port *port, u8 *command, | ||
1562 | void __user *user_buffer) | ||
1563 | { | ||
1564 | struct host_to_dev_fis fis; | ||
1565 | struct host_to_dev_fis *reply = (port->rxfis + RX_FIS_D2H_REG); | ||
1566 | |||
1567 | /* Build the FIS. */ | ||
1568 | memset(&fis, 0, sizeof(struct host_to_dev_fis)); | ||
1569 | fis.type = 0x27; | ||
1570 | fis.opts = 1 << 7; | ||
1571 | fis.command = command[0]; | ||
1572 | fis.features = command[2]; | ||
1573 | fis.sect_count = command[3]; | ||
1574 | if (fis.command == ATA_CMD_SMART) { | ||
1575 | fis.sector = command[1]; | ||
1576 | fis.cyl_low = 0x4F; | ||
1577 | fis.cyl_hi = 0xC2; | ||
1578 | } | ||
1579 | |||
1580 | dbg_printk(MTIP_DRV_NAME | ||
1581 | "%s: User Command: cmd %x, sect %x, " | ||
1582 | "feat %x, sectcnt %x\n", | ||
1583 | __func__, | ||
1584 | command[0], | ||
1585 | command[1], | ||
1586 | command[2], | ||
1587 | command[3]); | ||
1588 | |||
1589 | memset(port->sector_buffer, 0x00, ATA_SECT_SIZE); | ||
1590 | |||
1591 | /* Execute the command. */ | ||
1592 | if (mtip_exec_internal_command(port, | ||
1593 | &fis, | ||
1594 | 5, | ||
1595 | port->sector_buffer_dma, | ||
1596 | (command[3] != 0) ? ATA_SECT_SIZE : 0, | ||
1597 | 0, | ||
1598 | GFP_KERNEL, | ||
1599 | MTIP_IOCTL_COMMAND_TIMEOUT_MS) | ||
1600 | < 0) { | ||
1601 | return -1; | ||
1602 | } | ||
1603 | |||
1604 | /* Collect the completion status. */ | ||
1605 | command[0] = reply->command; /* Status*/ | ||
1606 | command[1] = reply->features; /* Error*/ | ||
1607 | command[2] = command[3]; | ||
1608 | |||
1609 | dbg_printk(MTIP_DRV_NAME | ||
1610 | "%s: Completion Status: stat %x, " | ||
1611 | "err %x, cmd %x\n", | ||
1612 | __func__, | ||
1613 | command[0], | ||
1614 | command[1], | ||
1615 | command[2]); | ||
1616 | |||
1617 | if (user_buffer && command[3]) { | ||
1618 | if (copy_to_user(user_buffer, | ||
1619 | port->sector_buffer, | ||
1620 | ATA_SECT_SIZE * command[3])) { | ||
1621 | return -EFAULT; | ||
1622 | } | ||
1623 | } | ||
1624 | |||
1625 | return 0; | ||
1626 | } | ||
1627 | |||
1628 | /* | ||
1629 | * Indicates whether a command has a single sector payload. | ||
1630 | * | ||
1631 | * @command passed to the device to perform the certain event. | ||
1632 | * @features passed to the device to perform the certain event. | ||
1633 | * | ||
1634 | * return value | ||
1635 | * 1 command is one that always has a single sector payload, | ||
1636 | * regardless of the value in the Sector Count field. | ||
1637 | * 0 otherwise | ||
1638 | * | ||
1639 | */ | ||
1640 | static unsigned int implicit_sector(unsigned char command, | ||
1641 | unsigned char features) | ||
1642 | { | ||
1643 | unsigned int rv = 0; | ||
1644 | |||
1645 | /* list of commands that have an implicit sector count of 1 */ | ||
1646 | switch (command) { | ||
1647 | case ATA_CMD_SEC_SET_PASS: | ||
1648 | case ATA_CMD_SEC_UNLOCK: | ||
1649 | case ATA_CMD_SEC_ERASE_PREP: | ||
1650 | case ATA_CMD_SEC_ERASE_UNIT: | ||
1651 | case ATA_CMD_SEC_FREEZE_LOCK: | ||
1652 | case ATA_CMD_SEC_DISABLE_PASS: | ||
1653 | case ATA_CMD_PMP_READ: | ||
1654 | case ATA_CMD_PMP_WRITE: | ||
1655 | rv = 1; | ||
1656 | break; | ||
1657 | case ATA_CMD_SET_MAX: | ||
1658 | if (features == ATA_SET_MAX_UNLOCK) | ||
1659 | rv = 1; | ||
1660 | break; | ||
1661 | case ATA_CMD_SMART: | ||
1662 | if ((features == ATA_SMART_READ_VALUES) || | ||
1663 | (features == ATA_SMART_READ_THRESHOLDS)) | ||
1664 | rv = 1; | ||
1665 | break; | ||
1666 | case ATA_CMD_CONF_OVERLAY: | ||
1667 | if ((features == ATA_DCO_IDENTIFY) || | ||
1668 | (features == ATA_DCO_SET)) | ||
1669 | rv = 1; | ||
1670 | break; | ||
1671 | } | ||
1672 | return rv; | ||
1673 | } | ||
1674 | |||
1675 | /* | ||
1676 | * Executes a taskfile | ||
1677 | * See ide_taskfile_ioctl() for derivation | ||
1678 | */ | ||
1679 | static int exec_drive_taskfile(struct driver_data *dd, | ||
1680 | void __user *buf, | ||
1681 | ide_task_request_t *req_task, | ||
1682 | int outtotal) | ||
1683 | { | ||
1684 | struct host_to_dev_fis fis; | ||
1685 | struct host_to_dev_fis *reply; | ||
1686 | u8 *outbuf = NULL; | ||
1687 | u8 *inbuf = NULL; | ||
1688 | dma_addr_t outbuf_dma = 0; | ||
1689 | dma_addr_t inbuf_dma = 0; | ||
1690 | dma_addr_t dma_buffer = 0; | ||
1691 | int err = 0; | ||
1692 | unsigned int taskin = 0; | ||
1693 | unsigned int taskout = 0; | ||
1694 | u8 nsect = 0; | ||
1695 | unsigned int timeout = MTIP_IOCTL_COMMAND_TIMEOUT_MS; | ||
1696 | unsigned int force_single_sector; | ||
1697 | unsigned int transfer_size; | ||
1698 | unsigned long task_file_data; | ||
1699 | int intotal = outtotal + req_task->out_size; | ||
1700 | |||
1701 | taskout = req_task->out_size; | ||
1702 | taskin = req_task->in_size; | ||
1703 | /* 130560 = 512 * 0xFF*/ | ||
1704 | if (taskin > 130560 || taskout > 130560) { | ||
1705 | err = -EINVAL; | ||
1706 | goto abort; | ||
1707 | } | ||
1708 | |||
1709 | if (taskout) { | ||
1710 | outbuf = kzalloc(taskout, GFP_KERNEL); | ||
1711 | if (outbuf == NULL) { | ||
1712 | err = -ENOMEM; | ||
1713 | goto abort; | ||
1714 | } | ||
1715 | if (copy_from_user(outbuf, buf + outtotal, taskout)) { | ||
1716 | err = -EFAULT; | ||
1717 | goto abort; | ||
1718 | } | ||
1719 | outbuf_dma = pci_map_single(dd->pdev, | ||
1720 | outbuf, | ||
1721 | taskout, | ||
1722 | DMA_TO_DEVICE); | ||
1723 | if (outbuf_dma == 0) { | ||
1724 | err = -ENOMEM; | ||
1725 | goto abort; | ||
1726 | } | ||
1727 | dma_buffer = outbuf_dma; | ||
1728 | } | ||
1729 | |||
1730 | if (taskin) { | ||
1731 | inbuf = kzalloc(taskin, GFP_KERNEL); | ||
1732 | if (inbuf == NULL) { | ||
1733 | err = -ENOMEM; | ||
1734 | goto abort; | ||
1735 | } | ||
1736 | |||
1737 | if (copy_from_user(inbuf, buf + intotal, taskin)) { | ||
1738 | err = -EFAULT; | ||
1739 | goto abort; | ||
1740 | } | ||
1741 | inbuf_dma = pci_map_single(dd->pdev, | ||
1742 | inbuf, | ||
1743 | taskin, DMA_FROM_DEVICE); | ||
1744 | if (inbuf_dma == 0) { | ||
1745 | err = -ENOMEM; | ||
1746 | goto abort; | ||
1747 | } | ||
1748 | dma_buffer = inbuf_dma; | ||
1749 | } | ||
1750 | |||
1751 | /* only supports PIO and non-data commands from this ioctl. */ | ||
1752 | switch (req_task->data_phase) { | ||
1753 | case TASKFILE_OUT: | ||
1754 | nsect = taskout / ATA_SECT_SIZE; | ||
1755 | reply = (dd->port->rxfis + RX_FIS_PIO_SETUP); | ||
1756 | break; | ||
1757 | case TASKFILE_IN: | ||
1758 | reply = (dd->port->rxfis + RX_FIS_PIO_SETUP); | ||
1759 | break; | ||
1760 | case TASKFILE_NO_DATA: | ||
1761 | reply = (dd->port->rxfis + RX_FIS_D2H_REG); | ||
1762 | break; | ||
1763 | default: | ||
1764 | err = -EINVAL; | ||
1765 | goto abort; | ||
1766 | } | ||
1767 | |||
1768 | /* Build the FIS. */ | ||
1769 | memset(&fis, 0, sizeof(struct host_to_dev_fis)); | ||
1770 | |||
1771 | fis.type = 0x27; | ||
1772 | fis.opts = 1 << 7; | ||
1773 | fis.command = req_task->io_ports[7]; | ||
1774 | fis.features = req_task->io_ports[1]; | ||
1775 | fis.sect_count = req_task->io_ports[2]; | ||
1776 | fis.lba_low = req_task->io_ports[3]; | ||
1777 | fis.lba_mid = req_task->io_ports[4]; | ||
1778 | fis.lba_hi = req_task->io_ports[5]; | ||
1779 | /* Clear the dev bit*/ | ||
1780 | fis.device = req_task->io_ports[6] & ~0x10; | ||
1781 | |||
1782 | if ((req_task->in_flags.all == 0) && (req_task->out_flags.all & 1)) { | ||
1783 | req_task->in_flags.all = | ||
1784 | IDE_TASKFILE_STD_IN_FLAGS | | ||
1785 | (IDE_HOB_STD_IN_FLAGS << 8); | ||
1786 | fis.lba_low_ex = req_task->hob_ports[3]; | ||
1787 | fis.lba_mid_ex = req_task->hob_ports[4]; | ||
1788 | fis.lba_hi_ex = req_task->hob_ports[5]; | ||
1789 | fis.features_ex = req_task->hob_ports[1]; | ||
1790 | fis.sect_cnt_ex = req_task->hob_ports[2]; | ||
1791 | |||
1792 | } else { | ||
1793 | req_task->in_flags.all = IDE_TASKFILE_STD_IN_FLAGS; | ||
1794 | } | ||
1795 | |||
1796 | force_single_sector = implicit_sector(fis.command, fis.features); | ||
1797 | |||
1798 | if ((taskin || taskout) && (!fis.sect_count)) { | ||
1799 | if (nsect) | ||
1800 | fis.sect_count = nsect; | ||
1801 | else { | ||
1802 | if (!force_single_sector) { | ||
1803 | dev_warn(&dd->pdev->dev, | ||
1804 | "data movement but " | ||
1805 | "sect_count is 0\n"); | ||
1806 | err = -EINVAL; | ||
1807 | goto abort; | ||
1808 | } | ||
1809 | } | ||
1810 | } | ||
1811 | |||
1812 | dbg_printk(MTIP_DRV_NAME | ||
1813 | "taskfile: cmd %x, feat %x, nsect %x," | ||
1814 | " sect/lbal %x, lcyl/lbam %x, hcyl/lbah %x," | ||
1815 | " head/dev %x\n", | ||
1816 | fis.command, | ||
1817 | fis.features, | ||
1818 | fis.sect_count, | ||
1819 | fis.lba_low, | ||
1820 | fis.lba_mid, | ||
1821 | fis.lba_hi, | ||
1822 | fis.device); | ||
1823 | |||
1824 | switch (fis.command) { | ||
1825 | case ATA_CMD_DOWNLOAD_MICRO: | ||
1826 | /* Change timeout for Download Microcode to 60 seconds.*/ | ||
1827 | timeout = 60000; | ||
1828 | break; | ||
1829 | case ATA_CMD_SEC_ERASE_UNIT: | ||
1830 | /* Change timeout for Security Erase Unit to 4 minutes.*/ | ||
1831 | timeout = 240000; | ||
1832 | break; | ||
1833 | case ATA_CMD_STANDBYNOW1: | ||
1834 | /* Change timeout for standby immediate to 10 seconds.*/ | ||
1835 | timeout = 10000; | ||
1836 | break; | ||
1837 | case 0xF7: | ||
1838 | case 0xFA: | ||
1839 | /* Change timeout for vendor unique command to 10 secs */ | ||
1840 | timeout = 10000; | ||
1841 | break; | ||
1842 | case ATA_CMD_SMART: | ||
1843 | /* Change timeout for vendor unique command to 10 secs */ | ||
1844 | timeout = 10000; | ||
1845 | break; | ||
1846 | default: | ||
1847 | timeout = MTIP_IOCTL_COMMAND_TIMEOUT_MS; | ||
1848 | break; | ||
1849 | } | ||
1850 | |||
1851 | /* Determine the correct transfer size.*/ | ||
1852 | if (force_single_sector) | ||
1853 | transfer_size = ATA_SECT_SIZE; | ||
1854 | else | ||
1855 | transfer_size = ATA_SECT_SIZE * fis.sect_count; | ||
1856 | |||
1857 | /* Execute the command.*/ | ||
1858 | if (mtip_exec_internal_command(dd->port, | ||
1859 | &fis, | ||
1860 | 5, | ||
1861 | dma_buffer, | ||
1862 | transfer_size, | ||
1863 | 0, | ||
1864 | GFP_KERNEL, | ||
1865 | timeout) < 0) { | ||
1866 | err = -EIO; | ||
1867 | goto abort; | ||
1868 | } | ||
1869 | |||
1870 | task_file_data = readl(dd->port->mmio+PORT_TFDATA); | ||
1871 | |||
1872 | if ((req_task->data_phase == TASKFILE_IN) && !(task_file_data & 1)) { | ||
1873 | reply = dd->port->rxfis + RX_FIS_PIO_SETUP; | ||
1874 | req_task->io_ports[7] = reply->control; | ||
1875 | } else { | ||
1876 | reply = dd->port->rxfis + RX_FIS_D2H_REG; | ||
1877 | req_task->io_ports[7] = reply->command; | ||
1878 | } | ||
1879 | |||
1880 | /* reclaim the DMA buffers.*/ | ||
1881 | if (inbuf_dma) | ||
1882 | pci_unmap_single(dd->pdev, inbuf_dma, | ||
1883 | taskin, DMA_FROM_DEVICE); | ||
1884 | if (outbuf_dma) | ||
1885 | pci_unmap_single(dd->pdev, outbuf_dma, | ||
1886 | taskout, DMA_TO_DEVICE); | ||
1887 | inbuf_dma = 0; | ||
1888 | outbuf_dma = 0; | ||
1889 | |||
1890 | /* return the ATA registers to the caller.*/ | ||
1891 | req_task->io_ports[1] = reply->features; | ||
1892 | req_task->io_ports[2] = reply->sect_count; | ||
1893 | req_task->io_ports[3] = reply->lba_low; | ||
1894 | req_task->io_ports[4] = reply->lba_mid; | ||
1895 | req_task->io_ports[5] = reply->lba_hi; | ||
1896 | req_task->io_ports[6] = reply->device; | ||
1897 | |||
1898 | if (req_task->out_flags.all & 1) { | ||
1899 | |||
1900 | req_task->hob_ports[3] = reply->lba_low_ex; | ||
1901 | req_task->hob_ports[4] = reply->lba_mid_ex; | ||
1902 | req_task->hob_ports[5] = reply->lba_hi_ex; | ||
1903 | req_task->hob_ports[1] = reply->features_ex; | ||
1904 | req_task->hob_ports[2] = reply->sect_cnt_ex; | ||
1905 | } | ||
1906 | |||
1907 | /* Com rest after secure erase or lowlevel format */ | ||
1908 | if (((fis.command == ATA_CMD_SEC_ERASE_UNIT) || | ||
1909 | ((fis.command == 0xFC) && | ||
1910 | (fis.features == 0x27 || fis.features == 0x72 || | ||
1911 | fis.features == 0x62 || fis.features == 0x26))) && | ||
1912 | !(reply->command & 1)) { | ||
1913 | mtip_restart_port(dd->port); | ||
1914 | } | ||
1915 | |||
1916 | dbg_printk(MTIP_DRV_NAME | ||
1917 | "%s: Completion: stat %x," | ||
1918 | "err %x, sect_cnt %x, lbalo %x," | ||
1919 | "lbamid %x, lbahi %x, dev %x\n", | ||
1920 | __func__, | ||
1921 | req_task->io_ports[7], | ||
1922 | req_task->io_ports[1], | ||
1923 | req_task->io_ports[2], | ||
1924 | req_task->io_ports[3], | ||
1925 | req_task->io_ports[4], | ||
1926 | req_task->io_ports[5], | ||
1927 | req_task->io_ports[6]); | ||
1928 | |||
1929 | if (taskout) { | ||
1930 | if (copy_to_user(buf + outtotal, outbuf, taskout)) { | ||
1931 | err = -EFAULT; | ||
1932 | goto abort; | ||
1933 | } | ||
1934 | } | ||
1935 | if (taskin) { | ||
1936 | if (copy_to_user(buf + intotal, inbuf, taskin)) { | ||
1937 | err = -EFAULT; | ||
1938 | goto abort; | ||
1939 | } | ||
1940 | } | ||
1941 | abort: | ||
1942 | if (inbuf_dma) | ||
1943 | pci_unmap_single(dd->pdev, inbuf_dma, | ||
1944 | taskin, DMA_FROM_DEVICE); | ||
1945 | if (outbuf_dma) | ||
1946 | pci_unmap_single(dd->pdev, outbuf_dma, | ||
1947 | taskout, DMA_TO_DEVICE); | ||
1948 | kfree(outbuf); | ||
1949 | kfree(inbuf); | ||
1950 | |||
1951 | return err; | ||
1952 | } | ||
1953 | |||
1954 | /* | ||
1955 | * Handle IOCTL calls from the Block Layer. | ||
1956 | * | ||
1957 | * This function is called by the Block Layer when it receives an IOCTL | ||
1958 | * command that it does not understand. If the IOCTL command is not supported | ||
1959 | * this function returns -ENOTTY. | ||
1960 | * | ||
1961 | * @dd Pointer to the driver data structure. | ||
1962 | * @cmd IOCTL command passed from the Block Layer. | ||
1963 | * @arg IOCTL argument passed from the Block Layer. | ||
1964 | * | ||
1965 | * return value | ||
1966 | * 0 The IOCTL completed successfully. | ||
1967 | * -ENOTTY The specified command is not supported. | ||
1968 | * -EFAULT An error occurred copying data to a user space buffer. | ||
1969 | * -EIO An error occurred while executing the command. | ||
1970 | */ | ||
1971 | static int mtip_hw_ioctl(struct driver_data *dd, unsigned int cmd, | ||
1972 | unsigned long arg) | ||
1973 | { | ||
1974 | switch (cmd) { | ||
1975 | case HDIO_GET_IDENTITY: | ||
1976 | if (mtip_get_identify(dd->port, (void __user *) arg) < 0) { | ||
1977 | dev_warn(&dd->pdev->dev, | ||
1978 | "Unable to read identity\n"); | ||
1979 | return -EIO; | ||
1980 | } | ||
1981 | |||
1982 | break; | ||
1983 | case HDIO_DRIVE_CMD: | ||
1984 | { | ||
1985 | u8 drive_command[4]; | ||
1986 | |||
1987 | /* Copy the user command info to our buffer. */ | ||
1988 | if (copy_from_user(drive_command, | ||
1989 | (void __user *) arg, | ||
1990 | sizeof(drive_command))) | ||
1991 | return -EFAULT; | ||
1992 | |||
1993 | /* Execute the drive command. */ | ||
1994 | if (exec_drive_command(dd->port, | ||
1995 | drive_command, | ||
1996 | (void __user *) (arg+4))) | ||
1997 | return -EIO; | ||
1998 | |||
1999 | /* Copy the status back to the users buffer. */ | ||
2000 | if (copy_to_user((void __user *) arg, | ||
2001 | drive_command, | ||
2002 | sizeof(drive_command))) | ||
2003 | return -EFAULT; | ||
2004 | |||
2005 | break; | ||
2006 | } | ||
2007 | case HDIO_DRIVE_TASK: | ||
2008 | { | ||
2009 | u8 drive_command[7]; | ||
2010 | |||
2011 | /* Copy the user command info to our buffer. */ | ||
2012 | if (copy_from_user(drive_command, | ||
2013 | (void __user *) arg, | ||
2014 | sizeof(drive_command))) | ||
2015 | return -EFAULT; | ||
2016 | |||
2017 | /* Execute the drive command. */ | ||
2018 | if (exec_drive_task(dd->port, drive_command)) | ||
2019 | return -EIO; | ||
2020 | |||
2021 | /* Copy the status back to the users buffer. */ | ||
2022 | if (copy_to_user((void __user *) arg, | ||
2023 | drive_command, | ||
2024 | sizeof(drive_command))) | ||
2025 | return -EFAULT; | ||
2026 | |||
2027 | break; | ||
2028 | } | ||
2029 | case HDIO_DRIVE_TASKFILE: { | ||
2030 | ide_task_request_t req_task; | ||
2031 | int ret, outtotal; | ||
2032 | |||
2033 | if (copy_from_user(&req_task, (void __user *) arg, | ||
2034 | sizeof(req_task))) | ||
2035 | return -EFAULT; | ||
2036 | |||
2037 | outtotal = sizeof(req_task); | ||
2038 | |||
2039 | ret = exec_drive_taskfile(dd, (void __user *) arg, | ||
2040 | &req_task, outtotal); | ||
2041 | |||
2042 | if (copy_to_user((void __user *) arg, &req_task, | ||
2043 | sizeof(req_task))) | ||
2044 | return -EFAULT; | ||
2045 | |||
2046 | return ret; | ||
2047 | } | ||
2048 | |||
2049 | default: | ||
2050 | return -EINVAL; | ||
2051 | } | ||
2052 | return 0; | ||
2053 | } | ||
2054 | |||
2055 | /* | ||
2056 | * Submit an IO to the hw | ||
2057 | * | ||
2058 | * This function is called by the block layer to issue an io | ||
2059 | * to the device. Upon completion, the callback function will | ||
2060 | * be called with the data parameter passed as the callback data. | ||
2061 | * | ||
2062 | * @dd Pointer to the driver data structure. | ||
2063 | * @start First sector to read. | ||
2064 | * @nsect Number of sectors to read. | ||
2065 | * @nents Number of entries in scatter list for the read command. | ||
2066 | * @tag The tag of this read command. | ||
2067 | * @callback Pointer to the function that should be called | ||
2068 | * when the read completes. | ||
2069 | * @data Callback data passed to the callback function | ||
2070 | * when the read completes. | ||
2071 | * @dir Direction (read or write) | ||
2072 | * | ||
2073 | * return value | ||
2074 | * None | ||
2075 | */ | ||
2076 | static void mtip_hw_submit_io(struct driver_data *dd, sector_t start, | ||
2077 | int nsect, int nents, int tag, void *callback, | ||
2078 | void *data, int dir) | ||
2079 | { | ||
2080 | struct host_to_dev_fis *fis; | ||
2081 | struct mtip_port *port = dd->port; | ||
2082 | struct mtip_cmd *command = &port->commands[tag]; | ||
2083 | |||
2084 | /* Map the scatter list for DMA access */ | ||
2085 | if (dir == READ) | ||
2086 | nents = dma_map_sg(&dd->pdev->dev, command->sg, | ||
2087 | nents, DMA_FROM_DEVICE); | ||
2088 | else | ||
2089 | nents = dma_map_sg(&dd->pdev->dev, command->sg, | ||
2090 | nents, DMA_TO_DEVICE); | ||
2091 | |||
2092 | command->scatter_ents = nents; | ||
2093 | |||
2094 | /* | ||
2095 | * The number of retries for this command before it is | ||
2096 | * reported as a failure to the upper layers. | ||
2097 | */ | ||
2098 | command->retries = MTIP_MAX_RETRIES; | ||
2099 | |||
2100 | /* Fill out fis */ | ||
2101 | fis = command->command; | ||
2102 | fis->type = 0x27; | ||
2103 | fis->opts = 1 << 7; | ||
2104 | fis->command = | ||
2105 | (dir == READ ? ATA_CMD_FPDMA_READ : ATA_CMD_FPDMA_WRITE); | ||
2106 | *((unsigned int *) &fis->lba_low) = (start & 0xFFFFFF); | ||
2107 | *((unsigned int *) &fis->lba_low_ex) = ((start >> 24) & 0xFFFFFF); | ||
2108 | fis->device = 1 << 6; | ||
2109 | fis->features = nsect & 0xFF; | ||
2110 | fis->features_ex = (nsect >> 8) & 0xFF; | ||
2111 | fis->sect_count = ((tag << 3) | (tag >> 5)); | ||
2112 | fis->sect_cnt_ex = 0; | ||
2113 | fis->control = 0; | ||
2114 | fis->res2 = 0; | ||
2115 | fis->res3 = 0; | ||
2116 | fill_command_sg(dd, command, nents); | ||
2117 | |||
2118 | /* Populate the command header */ | ||
2119 | command->command_header->opts = | ||
2120 | __force_bit2int cpu_to_le32( | ||
2121 | (nents << 16) | 5 | AHCI_CMD_PREFETCH); | ||
2122 | command->command_header->byte_count = 0; | ||
2123 | |||
2124 | /* | ||
2125 | * Set the completion function and data for the command | ||
2126 | * within this layer. | ||
2127 | */ | ||
2128 | command->comp_data = dd; | ||
2129 | command->comp_func = mtip_async_complete; | ||
2130 | command->direction = (dir == READ ? DMA_FROM_DEVICE : DMA_TO_DEVICE); | ||
2131 | |||
2132 | /* | ||
2133 | * Set the completion function and data for the command passed | ||
2134 | * from the upper layer. | ||
2135 | */ | ||
2136 | command->async_data = data; | ||
2137 | command->async_callback = callback; | ||
2138 | |||
2139 | /* | ||
2140 | * To prevent this command from being issued | ||
2141 | * if an internal command is in progress or error handling is active. | ||
2142 | */ | ||
2143 | if (unlikely(test_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags) || | ||
2144 | test_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags))) { | ||
2145 | set_bit(tag, port->cmds_to_issue); | ||
2146 | set_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags); | ||
2147 | return; | ||
2148 | } | ||
2149 | |||
2150 | /* Issue the command to the hardware */ | ||
2151 | mtip_issue_ncq_command(port, tag); | ||
2152 | |||
2153 | /* Set the command's timeout value.*/ | ||
2154 | port->commands[tag].comp_time = jiffies + msecs_to_jiffies( | ||
2155 | MTIP_NCQ_COMMAND_TIMEOUT_MS); | ||
2156 | } | ||
2157 | |||
2158 | /* | ||
2159 | * Release a command slot. | ||
2160 | * | ||
2161 | * @dd Pointer to the driver data structure. | ||
2162 | * @tag Slot tag | ||
2163 | * | ||
2164 | * return value | ||
2165 | * None | ||
2166 | */ | ||
2167 | static void mtip_hw_release_scatterlist(struct driver_data *dd, int tag) | ||
2168 | { | ||
2169 | release_slot(dd->port, tag); | ||
2170 | } | ||
2171 | |||
2172 | /* | ||
2173 | * Obtain a command slot and return its associated scatter list. | ||
2174 | * | ||
2175 | * @dd Pointer to the driver data structure. | ||
2176 | * @tag Pointer to an int that will receive the allocated command | ||
2177 | * slot tag. | ||
2178 | * | ||
2179 | * return value | ||
2180 | * Pointer to the scatter list for the allocated command slot | ||
2181 | * or NULL if no command slots are available. | ||
2182 | */ | ||
2183 | static struct scatterlist *mtip_hw_get_scatterlist(struct driver_data *dd, | ||
2184 | int *tag) | ||
2185 | { | ||
2186 | /* | ||
2187 | * It is possible that, even with this semaphore, a thread | ||
2188 | * may think that no command slots are available. Therefore, we | ||
2189 | * need to make an attempt to get_slot(). | ||
2190 | */ | ||
2191 | down(&dd->port->cmd_slot); | ||
2192 | *tag = get_slot(dd->port); | ||
2193 | |||
2194 | if (unlikely(*tag < 0)) | ||
2195 | return NULL; | ||
2196 | |||
2197 | return dd->port->commands[*tag].sg; | ||
2198 | } | ||
2199 | |||
2200 | /* | ||
2201 | * Sysfs register/status dump. | ||
2202 | * | ||
2203 | * @dev Pointer to the device structure, passed by the kernrel. | ||
2204 | * @attr Pointer to the device_attribute structure passed by the kernel. | ||
2205 | * @buf Pointer to the char buffer that will receive the stats info. | ||
2206 | * | ||
2207 | * return value | ||
2208 | * The size, in bytes, of the data copied into buf. | ||
2209 | */ | ||
2210 | static ssize_t hw_show_registers(struct device *dev, | ||
2211 | struct device_attribute *attr, | ||
2212 | char *buf) | ||
2213 | { | ||
2214 | u32 group_allocated; | ||
2215 | struct driver_data *dd = dev_to_disk(dev)->private_data; | ||
2216 | int size = 0; | ||
2217 | int n; | ||
2218 | |||
2219 | size += sprintf(&buf[size], "%s:\ns_active:\n", __func__); | ||
2220 | |||
2221 | for (n = 0; n < dd->slot_groups; n++) | ||
2222 | size += sprintf(&buf[size], "0x%08x\n", | ||
2223 | readl(dd->port->s_active[n])); | ||
2224 | |||
2225 | size += sprintf(&buf[size], "Command Issue:\n"); | ||
2226 | |||
2227 | for (n = 0; n < dd->slot_groups; n++) | ||
2228 | size += sprintf(&buf[size], "0x%08x\n", | ||
2229 | readl(dd->port->cmd_issue[n])); | ||
2230 | |||
2231 | size += sprintf(&buf[size], "Allocated:\n"); | ||
2232 | |||
2233 | for (n = 0; n < dd->slot_groups; n++) { | ||
2234 | if (sizeof(long) > sizeof(u32)) | ||
2235 | group_allocated = | ||
2236 | dd->port->allocated[n/2] >> (32*(n&1)); | ||
2237 | else | ||
2238 | group_allocated = dd->port->allocated[n]; | ||
2239 | size += sprintf(&buf[size], "0x%08x\n", | ||
2240 | group_allocated); | ||
2241 | } | ||
2242 | |||
2243 | size += sprintf(&buf[size], "completed:\n"); | ||
2244 | |||
2245 | for (n = 0; n < dd->slot_groups; n++) | ||
2246 | size += sprintf(&buf[size], "0x%08x\n", | ||
2247 | readl(dd->port->completed[n])); | ||
2248 | |||
2249 | size += sprintf(&buf[size], "PORT_IRQ_STAT 0x%08x\n", | ||
2250 | readl(dd->port->mmio + PORT_IRQ_STAT)); | ||
2251 | size += sprintf(&buf[size], "HOST_IRQ_STAT 0x%08x\n", | ||
2252 | readl(dd->mmio + HOST_IRQ_STAT)); | ||
2253 | |||
2254 | return size; | ||
2255 | } | ||
2256 | static DEVICE_ATTR(registers, S_IRUGO, hw_show_registers, NULL); | ||
2257 | |||
2258 | /* | ||
2259 | * Create the sysfs related attributes. | ||
2260 | * | ||
2261 | * @dd Pointer to the driver data structure. | ||
2262 | * @kobj Pointer to the kobj for the block device. | ||
2263 | * | ||
2264 | * return value | ||
2265 | * 0 Operation completed successfully. | ||
2266 | * -EINVAL Invalid parameter. | ||
2267 | */ | ||
2268 | static int mtip_hw_sysfs_init(struct driver_data *dd, struct kobject *kobj) | ||
2269 | { | ||
2270 | if (!kobj || !dd) | ||
2271 | return -EINVAL; | ||
2272 | |||
2273 | if (sysfs_create_file(kobj, &dev_attr_registers.attr)) | ||
2274 | dev_warn(&dd->pdev->dev, | ||
2275 | "Error creating registers sysfs entry\n"); | ||
2276 | return 0; | ||
2277 | } | ||
2278 | |||
2279 | /* | ||
2280 | * Remove the sysfs related attributes. | ||
2281 | * | ||
2282 | * @dd Pointer to the driver data structure. | ||
2283 | * @kobj Pointer to the kobj for the block device. | ||
2284 | * | ||
2285 | * return value | ||
2286 | * 0 Operation completed successfully. | ||
2287 | * -EINVAL Invalid parameter. | ||
2288 | */ | ||
2289 | static int mtip_hw_sysfs_exit(struct driver_data *dd, struct kobject *kobj) | ||
2290 | { | ||
2291 | if (!kobj || !dd) | ||
2292 | return -EINVAL; | ||
2293 | |||
2294 | sysfs_remove_file(kobj, &dev_attr_registers.attr); | ||
2295 | |||
2296 | return 0; | ||
2297 | } | ||
2298 | |||
2299 | /* | ||
2300 | * Perform any init/resume time hardware setup | ||
2301 | * | ||
2302 | * @dd Pointer to the driver data structure. | ||
2303 | * | ||
2304 | * return value | ||
2305 | * None | ||
2306 | */ | ||
2307 | static inline void hba_setup(struct driver_data *dd) | ||
2308 | { | ||
2309 | u32 hwdata; | ||
2310 | hwdata = readl(dd->mmio + HOST_HSORG); | ||
2311 | |||
2312 | /* interrupt bug workaround: use only 1 IS bit.*/ | ||
2313 | writel(hwdata | | ||
2314 | HSORG_DISABLE_SLOTGRP_INTR | | ||
2315 | HSORG_DISABLE_SLOTGRP_PXIS, | ||
2316 | dd->mmio + HOST_HSORG); | ||
2317 | } | ||
2318 | |||
2319 | /* | ||
2320 | * Detect the details of the product, and store anything needed | ||
2321 | * into the driver data structure. This includes product type and | ||
2322 | * version and number of slot groups. | ||
2323 | * | ||
2324 | * @dd Pointer to the driver data structure. | ||
2325 | * | ||
2326 | * return value | ||
2327 | * None | ||
2328 | */ | ||
2329 | static void mtip_detect_product(struct driver_data *dd) | ||
2330 | { | ||
2331 | u32 hwdata; | ||
2332 | unsigned int rev, slotgroups; | ||
2333 | |||
2334 | /* | ||
2335 | * HBA base + 0xFC [15:0] - vendor-specific hardware interface | ||
2336 | * info register: | ||
2337 | * [15:8] hardware/software interface rev# | ||
2338 | * [ 3] asic-style interface | ||
2339 | * [ 2:0] number of slot groups, minus 1 (only valid for asic-style). | ||
2340 | */ | ||
2341 | hwdata = readl(dd->mmio + HOST_HSORG); | ||
2342 | |||
2343 | dd->product_type = MTIP_PRODUCT_UNKNOWN; | ||
2344 | dd->slot_groups = 1; | ||
2345 | |||
2346 | if (hwdata & 0x8) { | ||
2347 | dd->product_type = MTIP_PRODUCT_ASICFPGA; | ||
2348 | rev = (hwdata & HSORG_HWREV) >> 8; | ||
2349 | slotgroups = (hwdata & HSORG_SLOTGROUPS) + 1; | ||
2350 | dev_info(&dd->pdev->dev, | ||
2351 | "ASIC-FPGA design, HS rev 0x%x, " | ||
2352 | "%i slot groups [%i slots]\n", | ||
2353 | rev, | ||
2354 | slotgroups, | ||
2355 | slotgroups * 32); | ||
2356 | |||
2357 | if (slotgroups > MTIP_MAX_SLOT_GROUPS) { | ||
2358 | dev_warn(&dd->pdev->dev, | ||
2359 | "Warning: driver only supports " | ||
2360 | "%i slot groups.\n", MTIP_MAX_SLOT_GROUPS); | ||
2361 | slotgroups = MTIP_MAX_SLOT_GROUPS; | ||
2362 | } | ||
2363 | dd->slot_groups = slotgroups; | ||
2364 | return; | ||
2365 | } | ||
2366 | |||
2367 | dev_warn(&dd->pdev->dev, "Unrecognized product id\n"); | ||
2368 | } | ||
2369 | |||
2370 | /* | ||
2371 | * Blocking wait for FTL rebuild to complete | ||
2372 | * | ||
2373 | * @dd Pointer to the DRIVER_DATA structure. | ||
2374 | * | ||
2375 | * return value | ||
2376 | * 0 FTL rebuild completed successfully | ||
2377 | * -EFAULT FTL rebuild error/timeout/interruption | ||
2378 | */ | ||
2379 | static int mtip_ftl_rebuild_poll(struct driver_data *dd) | ||
2380 | { | ||
2381 | unsigned long timeout, cnt = 0, start; | ||
2382 | |||
2383 | dev_warn(&dd->pdev->dev, | ||
2384 | "FTL rebuild in progress. Polling for completion.\n"); | ||
2385 | |||
2386 | start = jiffies; | ||
2387 | dd->ftlrebuildflag = 1; | ||
2388 | timeout = jiffies + msecs_to_jiffies(MTIP_FTL_REBUILD_TIMEOUT_MS); | ||
2389 | |||
2390 | do { | ||
2391 | if (mtip_check_surprise_removal(dd->pdev)) | ||
2392 | return -EFAULT; | ||
2393 | |||
2394 | if (mtip_get_identify(dd->port, NULL) < 0) | ||
2395 | return -EFAULT; | ||
2396 | |||
2397 | if (*(dd->port->identify + MTIP_FTL_REBUILD_OFFSET) == | ||
2398 | MTIP_FTL_REBUILD_MAGIC) { | ||
2399 | ssleep(1); | ||
2400 | /* Print message every 3 minutes */ | ||
2401 | if (cnt++ >= 180) { | ||
2402 | dev_warn(&dd->pdev->dev, | ||
2403 | "FTL rebuild in progress (%d secs).\n", | ||
2404 | jiffies_to_msecs(jiffies - start) / 1000); | ||
2405 | cnt = 0; | ||
2406 | } | ||
2407 | } else { | ||
2408 | dev_warn(&dd->pdev->dev, | ||
2409 | "FTL rebuild complete (%d secs).\n", | ||
2410 | jiffies_to_msecs(jiffies - start) / 1000); | ||
2411 | dd->ftlrebuildflag = 0; | ||
2412 | mtip_block_initialize(dd); | ||
2413 | break; | ||
2414 | } | ||
2415 | ssleep(10); | ||
2416 | } while (time_before(jiffies, timeout)); | ||
2417 | |||
2418 | /* Check for timeout */ | ||
2419 | if (dd->ftlrebuildflag) { | ||
2420 | dev_err(&dd->pdev->dev, | ||
2421 | "Timed out waiting for FTL rebuild to complete (%d secs).\n", | ||
2422 | jiffies_to_msecs(jiffies - start) / 1000); | ||
2423 | return -EFAULT; | ||
2424 | } | ||
2425 | |||
2426 | return 0; | ||
2427 | } | ||
2428 | |||
2429 | /* | ||
2430 | * service thread to issue queued commands | ||
2431 | * | ||
2432 | * @data Pointer to the driver data structure. | ||
2433 | * | ||
2434 | * return value | ||
2435 | * 0 | ||
2436 | */ | ||
2437 | |||
2438 | static int mtip_service_thread(void *data) | ||
2439 | { | ||
2440 | struct driver_data *dd = (struct driver_data *)data; | ||
2441 | unsigned long slot, slot_start, slot_wrap; | ||
2442 | unsigned int num_cmd_slots = dd->slot_groups * 32; | ||
2443 | struct mtip_port *port = dd->port; | ||
2444 | |||
2445 | while (1) { | ||
2446 | /* | ||
2447 | * the condition is to check neither an internal command is | ||
2448 | * is in progress nor error handling is active | ||
2449 | */ | ||
2450 | wait_event_interruptible(port->svc_wait, (port->flags) && | ||
2451 | !test_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags) && | ||
2452 | !test_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags)); | ||
2453 | |||
2454 | if (kthread_should_stop()) | ||
2455 | break; | ||
2456 | |||
2457 | set_bit(MTIP_FLAG_SVC_THD_ACTIVE_BIT, &port->flags); | ||
2458 | if (test_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags)) { | ||
2459 | slot = 1; | ||
2460 | /* used to restrict the loop to one iteration */ | ||
2461 | slot_start = num_cmd_slots; | ||
2462 | slot_wrap = 0; | ||
2463 | while (1) { | ||
2464 | slot = find_next_bit(port->cmds_to_issue, | ||
2465 | num_cmd_slots, slot); | ||
2466 | if (slot_wrap == 1) { | ||
2467 | if ((slot_start >= slot) || | ||
2468 | (slot >= num_cmd_slots)) | ||
2469 | break; | ||
2470 | } | ||
2471 | if (unlikely(slot_start == num_cmd_slots)) | ||
2472 | slot_start = slot; | ||
2473 | |||
2474 | if (unlikely(slot == num_cmd_slots)) { | ||
2475 | slot = 1; | ||
2476 | slot_wrap = 1; | ||
2477 | continue; | ||
2478 | } | ||
2479 | |||
2480 | /* Issue the command to the hardware */ | ||
2481 | mtip_issue_ncq_command(port, slot); | ||
2482 | |||
2483 | /* Set the command's timeout value.*/ | ||
2484 | port->commands[slot].comp_time = jiffies + | ||
2485 | msecs_to_jiffies(MTIP_NCQ_COMMAND_TIMEOUT_MS); | ||
2486 | |||
2487 | clear_bit(slot, port->cmds_to_issue); | ||
2488 | } | ||
2489 | |||
2490 | clear_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags); | ||
2491 | } else if (test_bit(MTIP_FLAG_REBUILD_BIT, &port->flags)) { | ||
2492 | mtip_ftl_rebuild_poll(dd); | ||
2493 | clear_bit(MTIP_FLAG_REBUILD_BIT, &port->flags); | ||
2494 | } | ||
2495 | clear_bit(MTIP_FLAG_SVC_THD_ACTIVE_BIT, &port->flags); | ||
2496 | |||
2497 | if (test_bit(MTIP_FLAG_SVC_THD_SHOULD_STOP_BIT, &port->flags)) | ||
2498 | break; | ||
2499 | } | ||
2500 | return 0; | ||
2501 | } | ||
2502 | |||
2503 | /* | ||
2504 | * Called once for each card. | ||
2505 | * | ||
2506 | * @dd Pointer to the driver data structure. | ||
2507 | * | ||
2508 | * return value | ||
2509 | * 0 on success, else an error code. | ||
2510 | */ | ||
2511 | static int mtip_hw_init(struct driver_data *dd) | ||
2512 | { | ||
2513 | int i; | ||
2514 | int rv; | ||
2515 | unsigned int num_command_slots; | ||
2516 | |||
2517 | dd->mmio = pcim_iomap_table(dd->pdev)[MTIP_ABAR]; | ||
2518 | |||
2519 | mtip_detect_product(dd); | ||
2520 | if (dd->product_type == MTIP_PRODUCT_UNKNOWN) { | ||
2521 | rv = -EIO; | ||
2522 | goto out1; | ||
2523 | } | ||
2524 | num_command_slots = dd->slot_groups * 32; | ||
2525 | |||
2526 | hba_setup(dd); | ||
2527 | |||
2528 | tasklet_init(&dd->tasklet, mtip_tasklet, (unsigned long)dd); | ||
2529 | |||
2530 | dd->port = kzalloc(sizeof(struct mtip_port), GFP_KERNEL); | ||
2531 | if (!dd->port) { | ||
2532 | dev_err(&dd->pdev->dev, | ||
2533 | "Memory allocation: port structure\n"); | ||
2534 | return -ENOMEM; | ||
2535 | } | ||
2536 | |||
2537 | /* Counting semaphore to track command slot usage */ | ||
2538 | sema_init(&dd->port->cmd_slot, num_command_slots - 1); | ||
2539 | |||
2540 | /* Spinlock to prevent concurrent issue */ | ||
2541 | spin_lock_init(&dd->port->cmd_issue_lock); | ||
2542 | |||
2543 | /* Set the port mmio base address. */ | ||
2544 | dd->port->mmio = dd->mmio + PORT_OFFSET; | ||
2545 | dd->port->dd = dd; | ||
2546 | |||
2547 | /* Allocate memory for the command list. */ | ||
2548 | dd->port->command_list = | ||
2549 | dmam_alloc_coherent(&dd->pdev->dev, | ||
2550 | HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 2), | ||
2551 | &dd->port->command_list_dma, | ||
2552 | GFP_KERNEL); | ||
2553 | if (!dd->port->command_list) { | ||
2554 | dev_err(&dd->pdev->dev, | ||
2555 | "Memory allocation: command list\n"); | ||
2556 | rv = -ENOMEM; | ||
2557 | goto out1; | ||
2558 | } | ||
2559 | |||
2560 | /* Clear the memory we have allocated. */ | ||
2561 | memset(dd->port->command_list, | ||
2562 | 0, | ||
2563 | HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 2)); | ||
2564 | |||
2565 | /* Setup the addresse of the RX FIS. */ | ||
2566 | dd->port->rxfis = dd->port->command_list + HW_CMD_SLOT_SZ; | ||
2567 | dd->port->rxfis_dma = dd->port->command_list_dma + HW_CMD_SLOT_SZ; | ||
2568 | |||
2569 | /* Setup the address of the command tables. */ | ||
2570 | dd->port->command_table = dd->port->rxfis + AHCI_RX_FIS_SZ; | ||
2571 | dd->port->command_tbl_dma = dd->port->rxfis_dma + AHCI_RX_FIS_SZ; | ||
2572 | |||
2573 | /* Setup the address of the identify data. */ | ||
2574 | dd->port->identify = dd->port->command_table + | ||
2575 | HW_CMD_TBL_AR_SZ; | ||
2576 | dd->port->identify_dma = dd->port->command_tbl_dma + | ||
2577 | HW_CMD_TBL_AR_SZ; | ||
2578 | |||
2579 | /* Setup the address of the sector buffer. */ | ||
2580 | dd->port->sector_buffer = (void *) dd->port->identify + ATA_SECT_SIZE; | ||
2581 | dd->port->sector_buffer_dma = dd->port->identify_dma + ATA_SECT_SIZE; | ||
2582 | |||
2583 | /* Point the command headers at the command tables. */ | ||
2584 | for (i = 0; i < num_command_slots; i++) { | ||
2585 | dd->port->commands[i].command_header = | ||
2586 | dd->port->command_list + | ||
2587 | (sizeof(struct mtip_cmd_hdr) * i); | ||
2588 | dd->port->commands[i].command_header_dma = | ||
2589 | dd->port->command_list_dma + | ||
2590 | (sizeof(struct mtip_cmd_hdr) * i); | ||
2591 | |||
2592 | dd->port->commands[i].command = | ||
2593 | dd->port->command_table + (HW_CMD_TBL_SZ * i); | ||
2594 | dd->port->commands[i].command_dma = | ||
2595 | dd->port->command_tbl_dma + (HW_CMD_TBL_SZ * i); | ||
2596 | |||
2597 | if (readl(dd->mmio + HOST_CAP) & HOST_CAP_64) | ||
2598 | dd->port->commands[i].command_header->ctbau = | ||
2599 | __force_bit2int cpu_to_le32( | ||
2600 | (dd->port->commands[i].command_dma >> 16) >> 16); | ||
2601 | dd->port->commands[i].command_header->ctba = | ||
2602 | __force_bit2int cpu_to_le32( | ||
2603 | dd->port->commands[i].command_dma & 0xFFFFFFFF); | ||
2604 | |||
2605 | /* | ||
2606 | * If this is not done, a bug is reported by the stock | ||
2607 | * FC11 i386. Due to the fact that it has lots of kernel | ||
2608 | * debugging enabled. | ||
2609 | */ | ||
2610 | sg_init_table(dd->port->commands[i].sg, MTIP_MAX_SG); | ||
2611 | |||
2612 | /* Mark all commands as currently inactive.*/ | ||
2613 | atomic_set(&dd->port->commands[i].active, 0); | ||
2614 | } | ||
2615 | |||
2616 | /* Setup the pointers to the extended s_active and CI registers. */ | ||
2617 | for (i = 0; i < dd->slot_groups; i++) { | ||
2618 | dd->port->s_active[i] = | ||
2619 | dd->port->mmio + i*0x80 + PORT_SCR_ACT; | ||
2620 | dd->port->cmd_issue[i] = | ||
2621 | dd->port->mmio + i*0x80 + PORT_COMMAND_ISSUE; | ||
2622 | dd->port->completed[i] = | ||
2623 | dd->port->mmio + i*0x80 + PORT_SDBV; | ||
2624 | } | ||
2625 | |||
2626 | /* Reset the HBA. */ | ||
2627 | if (mtip_hba_reset(dd) < 0) { | ||
2628 | dev_err(&dd->pdev->dev, | ||
2629 | "Card did not reset within timeout\n"); | ||
2630 | rv = -EIO; | ||
2631 | goto out2; | ||
2632 | } | ||
2633 | |||
2634 | mtip_init_port(dd->port); | ||
2635 | mtip_start_port(dd->port); | ||
2636 | |||
2637 | /* Setup the ISR and enable interrupts. */ | ||
2638 | rv = devm_request_irq(&dd->pdev->dev, | ||
2639 | dd->pdev->irq, | ||
2640 | mtip_irq_handler, | ||
2641 | IRQF_SHARED, | ||
2642 | dev_driver_string(&dd->pdev->dev), | ||
2643 | dd); | ||
2644 | |||
2645 | if (rv) { | ||
2646 | dev_err(&dd->pdev->dev, | ||
2647 | "Unable to allocate IRQ %d\n", dd->pdev->irq); | ||
2648 | goto out2; | ||
2649 | } | ||
2650 | |||
2651 | /* Enable interrupts on the HBA. */ | ||
2652 | writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN, | ||
2653 | dd->mmio + HOST_CTL); | ||
2654 | |||
2655 | init_timer(&dd->port->cmd_timer); | ||
2656 | init_waitqueue_head(&dd->port->svc_wait); | ||
2657 | |||
2658 | dd->port->cmd_timer.data = (unsigned long int) dd->port; | ||
2659 | dd->port->cmd_timer.function = mtip_timeout_function; | ||
2660 | mod_timer(&dd->port->cmd_timer, | ||
2661 | jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD)); | ||
2662 | |||
2663 | if (mtip_get_identify(dd->port, NULL) < 0) { | ||
2664 | rv = -EFAULT; | ||
2665 | goto out3; | ||
2666 | } | ||
2667 | |||
2668 | if (*(dd->port->identify + MTIP_FTL_REBUILD_OFFSET) == | ||
2669 | MTIP_FTL_REBUILD_MAGIC) { | ||
2670 | set_bit(MTIP_FLAG_REBUILD_BIT, &dd->port->flags); | ||
2671 | return MTIP_FTL_REBUILD_MAGIC; | ||
2672 | } | ||
2673 | mtip_dump_identify(dd->port); | ||
2674 | return rv; | ||
2675 | |||
2676 | out3: | ||
2677 | del_timer_sync(&dd->port->cmd_timer); | ||
2678 | |||
2679 | /* Disable interrupts on the HBA. */ | ||
2680 | writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN, | ||
2681 | dd->mmio + HOST_CTL); | ||
2682 | |||
2683 | /*Release the IRQ. */ | ||
2684 | devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd); | ||
2685 | |||
2686 | out2: | ||
2687 | mtip_deinit_port(dd->port); | ||
2688 | |||
2689 | /* Free the command/command header memory. */ | ||
2690 | dmam_free_coherent(&dd->pdev->dev, | ||
2691 | HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 2), | ||
2692 | dd->port->command_list, | ||
2693 | dd->port->command_list_dma); | ||
2694 | out1: | ||
2695 | /* Free the memory allocated for the for structure. */ | ||
2696 | kfree(dd->port); | ||
2697 | |||
2698 | return rv; | ||
2699 | } | ||
2700 | |||
2701 | /* | ||
2702 | * Called to deinitialize an interface. | ||
2703 | * | ||
2704 | * @dd Pointer to the driver data structure. | ||
2705 | * | ||
2706 | * return value | ||
2707 | * 0 | ||
2708 | */ | ||
2709 | static int mtip_hw_exit(struct driver_data *dd) | ||
2710 | { | ||
2711 | /* | ||
2712 | * Send standby immediate (E0h) to the drive so that it | ||
2713 | * saves its state. | ||
2714 | */ | ||
2715 | if (atomic_read(&dd->drv_cleanup_done) != true) { | ||
2716 | |||
2717 | mtip_standby_immediate(dd->port); | ||
2718 | |||
2719 | /* de-initialize the port. */ | ||
2720 | mtip_deinit_port(dd->port); | ||
2721 | |||
2722 | /* Disable interrupts on the HBA. */ | ||
2723 | writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN, | ||
2724 | dd->mmio + HOST_CTL); | ||
2725 | } | ||
2726 | |||
2727 | del_timer_sync(&dd->port->cmd_timer); | ||
2728 | |||
2729 | /* Release the IRQ. */ | ||
2730 | devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd); | ||
2731 | |||
2732 | /* Stop the bottom half tasklet. */ | ||
2733 | tasklet_kill(&dd->tasklet); | ||
2734 | |||
2735 | /* Free the command/command header memory. */ | ||
2736 | dmam_free_coherent(&dd->pdev->dev, | ||
2737 | HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 2), | ||
2738 | dd->port->command_list, | ||
2739 | dd->port->command_list_dma); | ||
2740 | /* Free the memory allocated for the for structure. */ | ||
2741 | kfree(dd->port); | ||
2742 | |||
2743 | return 0; | ||
2744 | } | ||
2745 | |||
2746 | /* | ||
2747 | * Issue a Standby Immediate command to the device. | ||
2748 | * | ||
2749 | * This function is called by the Block Layer just before the | ||
2750 | * system powers off during a shutdown. | ||
2751 | * | ||
2752 | * @dd Pointer to the driver data structure. | ||
2753 | * | ||
2754 | * return value | ||
2755 | * 0 | ||
2756 | */ | ||
2757 | static int mtip_hw_shutdown(struct driver_data *dd) | ||
2758 | { | ||
2759 | /* | ||
2760 | * Send standby immediate (E0h) to the drive so that it | ||
2761 | * saves its state. | ||
2762 | */ | ||
2763 | mtip_standby_immediate(dd->port); | ||
2764 | |||
2765 | return 0; | ||
2766 | } | ||
2767 | |||
2768 | /* | ||
2769 | * Suspend function | ||
2770 | * | ||
2771 | * This function is called by the Block Layer just before the | ||
2772 | * system hibernates. | ||
2773 | * | ||
2774 | * @dd Pointer to the driver data structure. | ||
2775 | * | ||
2776 | * return value | ||
2777 | * 0 Suspend was successful | ||
2778 | * -EFAULT Suspend was not successful | ||
2779 | */ | ||
2780 | static int mtip_hw_suspend(struct driver_data *dd) | ||
2781 | { | ||
2782 | /* | ||
2783 | * Send standby immediate (E0h) to the drive | ||
2784 | * so that it saves its state. | ||
2785 | */ | ||
2786 | if (mtip_standby_immediate(dd->port) != 0) { | ||
2787 | dev_err(&dd->pdev->dev, | ||
2788 | "Failed standby-immediate command\n"); | ||
2789 | return -EFAULT; | ||
2790 | } | ||
2791 | |||
2792 | /* Disable interrupts on the HBA.*/ | ||
2793 | writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN, | ||
2794 | dd->mmio + HOST_CTL); | ||
2795 | mtip_deinit_port(dd->port); | ||
2796 | |||
2797 | return 0; | ||
2798 | } | ||
2799 | |||
2800 | /* | ||
2801 | * Resume function | ||
2802 | * | ||
2803 | * This function is called by the Block Layer as the | ||
2804 | * system resumes. | ||
2805 | * | ||
2806 | * @dd Pointer to the driver data structure. | ||
2807 | * | ||
2808 | * return value | ||
2809 | * 0 Resume was successful | ||
2810 | * -EFAULT Resume was not successful | ||
2811 | */ | ||
2812 | static int mtip_hw_resume(struct driver_data *dd) | ||
2813 | { | ||
2814 | /* Perform any needed hardware setup steps */ | ||
2815 | hba_setup(dd); | ||
2816 | |||
2817 | /* Reset the HBA */ | ||
2818 | if (mtip_hba_reset(dd) != 0) { | ||
2819 | dev_err(&dd->pdev->dev, | ||
2820 | "Unable to reset the HBA\n"); | ||
2821 | return -EFAULT; | ||
2822 | } | ||
2823 | |||
2824 | /* | ||
2825 | * Enable the port, DMA engine, and FIS reception specific | ||
2826 | * h/w in controller. | ||
2827 | */ | ||
2828 | mtip_init_port(dd->port); | ||
2829 | mtip_start_port(dd->port); | ||
2830 | |||
2831 | /* Enable interrupts on the HBA.*/ | ||
2832 | writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN, | ||
2833 | dd->mmio + HOST_CTL); | ||
2834 | |||
2835 | return 0; | ||
2836 | } | ||
2837 | |||
2838 | /* | ||
2839 | * Helper function for reusing disk name | ||
2840 | * upon hot insertion. | ||
2841 | */ | ||
2842 | static int rssd_disk_name_format(char *prefix, | ||
2843 | int index, | ||
2844 | char *buf, | ||
2845 | int buflen) | ||
2846 | { | ||
2847 | const int base = 'z' - 'a' + 1; | ||
2848 | char *begin = buf + strlen(prefix); | ||
2849 | char *end = buf + buflen; | ||
2850 | char *p; | ||
2851 | int unit; | ||
2852 | |||
2853 | p = end - 1; | ||
2854 | *p = '\0'; | ||
2855 | unit = base; | ||
2856 | do { | ||
2857 | if (p == begin) | ||
2858 | return -EINVAL; | ||
2859 | *--p = 'a' + (index % unit); | ||
2860 | index = (index / unit) - 1; | ||
2861 | } while (index >= 0); | ||
2862 | |||
2863 | memmove(begin, p, end - p); | ||
2864 | memcpy(buf, prefix, strlen(prefix)); | ||
2865 | |||
2866 | return 0; | ||
2867 | } | ||
2868 | |||
2869 | /* | ||
2870 | * Block layer IOCTL handler. | ||
2871 | * | ||
2872 | * @dev Pointer to the block_device structure. | ||
2873 | * @mode ignored | ||
2874 | * @cmd IOCTL command passed from the user application. | ||
2875 | * @arg Argument passed from the user application. | ||
2876 | * | ||
2877 | * return value | ||
2878 | * 0 IOCTL completed successfully. | ||
2879 | * -ENOTTY IOCTL not supported or invalid driver data | ||
2880 | * structure pointer. | ||
2881 | */ | ||
2882 | static int mtip_block_ioctl(struct block_device *dev, | ||
2883 | fmode_t mode, | ||
2884 | unsigned cmd, | ||
2885 | unsigned long arg) | ||
2886 | { | ||
2887 | struct driver_data *dd = dev->bd_disk->private_data; | ||
2888 | |||
2889 | if (!capable(CAP_SYS_ADMIN)) | ||
2890 | return -EACCES; | ||
2891 | |||
2892 | if (!dd) | ||
2893 | return -ENOTTY; | ||
2894 | |||
2895 | switch (cmd) { | ||
2896 | case BLKFLSBUF: | ||
2897 | return -ENOTTY; | ||
2898 | default: | ||
2899 | return mtip_hw_ioctl(dd, cmd, arg); | ||
2900 | } | ||
2901 | } | ||
2902 | |||
2903 | #ifdef CONFIG_COMPAT | ||
2904 | /* | ||
2905 | * Block layer compat IOCTL handler. | ||
2906 | * | ||
2907 | * @dev Pointer to the block_device structure. | ||
2908 | * @mode ignored | ||
2909 | * @cmd IOCTL command passed from the user application. | ||
2910 | * @arg Argument passed from the user application. | ||
2911 | * | ||
2912 | * return value | ||
2913 | * 0 IOCTL completed successfully. | ||
2914 | * -ENOTTY IOCTL not supported or invalid driver data | ||
2915 | * structure pointer. | ||
2916 | */ | ||
2917 | static int mtip_block_compat_ioctl(struct block_device *dev, | ||
2918 | fmode_t mode, | ||
2919 | unsigned cmd, | ||
2920 | unsigned long arg) | ||
2921 | { | ||
2922 | struct driver_data *dd = dev->bd_disk->private_data; | ||
2923 | |||
2924 | if (!capable(CAP_SYS_ADMIN)) | ||
2925 | return -EACCES; | ||
2926 | |||
2927 | if (!dd) | ||
2928 | return -ENOTTY; | ||
2929 | |||
2930 | switch (cmd) { | ||
2931 | case BLKFLSBUF: | ||
2932 | return -ENOTTY; | ||
2933 | case HDIO_DRIVE_TASKFILE: { | ||
2934 | struct mtip_compat_ide_task_request_s __user *compat_req_task; | ||
2935 | ide_task_request_t req_task; | ||
2936 | int compat_tasksize, outtotal, ret; | ||
2937 | |||
2938 | compat_tasksize = | ||
2939 | sizeof(struct mtip_compat_ide_task_request_s); | ||
2940 | |||
2941 | compat_req_task = | ||
2942 | (struct mtip_compat_ide_task_request_s __user *) arg; | ||
2943 | |||
2944 | if (copy_from_user(&req_task, (void __user *) arg, | ||
2945 | compat_tasksize - (2 * sizeof(compat_long_t)))) | ||
2946 | return -EFAULT; | ||
2947 | |||
2948 | if (get_user(req_task.out_size, &compat_req_task->out_size)) | ||
2949 | return -EFAULT; | ||
2950 | |||
2951 | if (get_user(req_task.in_size, &compat_req_task->in_size)) | ||
2952 | return -EFAULT; | ||
2953 | |||
2954 | outtotal = sizeof(struct mtip_compat_ide_task_request_s); | ||
2955 | |||
2956 | ret = exec_drive_taskfile(dd, (void __user *) arg, | ||
2957 | &req_task, outtotal); | ||
2958 | |||
2959 | if (copy_to_user((void __user *) arg, &req_task, | ||
2960 | compat_tasksize - | ||
2961 | (2 * sizeof(compat_long_t)))) | ||
2962 | return -EFAULT; | ||
2963 | |||
2964 | if (put_user(req_task.out_size, &compat_req_task->out_size)) | ||
2965 | return -EFAULT; | ||
2966 | |||
2967 | if (put_user(req_task.in_size, &compat_req_task->in_size)) | ||
2968 | return -EFAULT; | ||
2969 | |||
2970 | return ret; | ||
2971 | } | ||
2972 | default: | ||
2973 | return mtip_hw_ioctl(dd, cmd, arg); | ||
2974 | } | ||
2975 | } | ||
2976 | #endif | ||
2977 | |||
2978 | /* | ||
2979 | * Obtain the geometry of the device. | ||
2980 | * | ||
2981 | * You may think that this function is obsolete, but some applications, | ||
2982 | * fdisk for example still used CHS values. This function describes the | ||
2983 | * device as having 224 heads and 56 sectors per cylinder. These values are | ||
2984 | * chosen so that each cylinder is aligned on a 4KB boundary. Since a | ||
2985 | * partition is described in terms of a start and end cylinder this means | ||
2986 | * that each partition is also 4KB aligned. Non-aligned partitions adversely | ||
2987 | * affects performance. | ||
2988 | * | ||
2989 | * @dev Pointer to the block_device strucutre. | ||
2990 | * @geo Pointer to a hd_geometry structure. | ||
2991 | * | ||
2992 | * return value | ||
2993 | * 0 Operation completed successfully. | ||
2994 | * -ENOTTY An error occurred while reading the drive capacity. | ||
2995 | */ | ||
2996 | static int mtip_block_getgeo(struct block_device *dev, | ||
2997 | struct hd_geometry *geo) | ||
2998 | { | ||
2999 | struct driver_data *dd = dev->bd_disk->private_data; | ||
3000 | sector_t capacity; | ||
3001 | |||
3002 | if (!dd) | ||
3003 | return -ENOTTY; | ||
3004 | |||
3005 | if (!(mtip_hw_get_capacity(dd, &capacity))) { | ||
3006 | dev_warn(&dd->pdev->dev, | ||
3007 | "Could not get drive capacity.\n"); | ||
3008 | return -ENOTTY; | ||
3009 | } | ||
3010 | |||
3011 | geo->heads = 224; | ||
3012 | geo->sectors = 56; | ||
3013 | sector_div(capacity, (geo->heads * geo->sectors)); | ||
3014 | geo->cylinders = capacity; | ||
3015 | return 0; | ||
3016 | } | ||
3017 | |||
3018 | /* | ||
3019 | * Block device operation function. | ||
3020 | * | ||
3021 | * This structure contains pointers to the functions required by the block | ||
3022 | * layer. | ||
3023 | */ | ||
3024 | static const struct block_device_operations mtip_block_ops = { | ||
3025 | .ioctl = mtip_block_ioctl, | ||
3026 | #ifdef CONFIG_COMPAT | ||
3027 | .compat_ioctl = mtip_block_compat_ioctl, | ||
3028 | #endif | ||
3029 | .getgeo = mtip_block_getgeo, | ||
3030 | .owner = THIS_MODULE | ||
3031 | }; | ||
3032 | |||
3033 | /* | ||
3034 | * Block layer make request function. | ||
3035 | * | ||
3036 | * This function is called by the kernel to process a BIO for | ||
3037 | * the P320 device. | ||
3038 | * | ||
3039 | * @queue Pointer to the request queue. Unused other than to obtain | ||
3040 | * the driver data structure. | ||
3041 | * @bio Pointer to the BIO. | ||
3042 | * | ||
3043 | */ | ||
3044 | static void mtip_make_request(struct request_queue *queue, struct bio *bio) | ||
3045 | { | ||
3046 | struct driver_data *dd = queue->queuedata; | ||
3047 | struct scatterlist *sg; | ||
3048 | struct bio_vec *bvec; | ||
3049 | int nents = 0; | ||
3050 | int tag = 0; | ||
3051 | |||
3052 | if (unlikely(!bio_has_data(bio))) { | ||
3053 | blk_queue_flush(queue, 0); | ||
3054 | bio_endio(bio, 0); | ||
3055 | return; | ||
3056 | } | ||
3057 | |||
3058 | sg = mtip_hw_get_scatterlist(dd, &tag); | ||
3059 | if (likely(sg != NULL)) { | ||
3060 | blk_queue_bounce(queue, &bio); | ||
3061 | |||
3062 | if (unlikely((bio)->bi_vcnt > MTIP_MAX_SG)) { | ||
3063 | dev_warn(&dd->pdev->dev, | ||
3064 | "Maximum number of SGL entries exceeded"); | ||
3065 | bio_io_error(bio); | ||
3066 | mtip_hw_release_scatterlist(dd, tag); | ||
3067 | return; | ||
3068 | } | ||
3069 | |||
3070 | /* Create the scatter list for this bio. */ | ||
3071 | bio_for_each_segment(bvec, bio, nents) { | ||
3072 | sg_set_page(&sg[nents], | ||
3073 | bvec->bv_page, | ||
3074 | bvec->bv_len, | ||
3075 | bvec->bv_offset); | ||
3076 | } | ||
3077 | |||
3078 | /* Issue the read/write. */ | ||
3079 | mtip_hw_submit_io(dd, | ||
3080 | bio->bi_sector, | ||
3081 | bio_sectors(bio), | ||
3082 | nents, | ||
3083 | tag, | ||
3084 | bio_endio, | ||
3085 | bio, | ||
3086 | bio_data_dir(bio)); | ||
3087 | } else | ||
3088 | bio_io_error(bio); | ||
3089 | } | ||
3090 | |||
3091 | /* | ||
3092 | * Block layer initialization function. | ||
3093 | * | ||
3094 | * This function is called once by the PCI layer for each P320 | ||
3095 | * device that is connected to the system. | ||
3096 | * | ||
3097 | * @dd Pointer to the driver data structure. | ||
3098 | * | ||
3099 | * return value | ||
3100 | * 0 on success else an error code. | ||
3101 | */ | ||
3102 | static int mtip_block_initialize(struct driver_data *dd) | ||
3103 | { | ||
3104 | int rv = 0, wait_for_rebuild = 0; | ||
3105 | sector_t capacity; | ||
3106 | unsigned int index = 0; | ||
3107 | struct kobject *kobj; | ||
3108 | unsigned char thd_name[16]; | ||
3109 | |||
3110 | if (dd->disk) | ||
3111 | goto skip_create_disk; /* hw init done, before rebuild */ | ||
3112 | |||
3113 | /* Initialize the protocol layer. */ | ||
3114 | wait_for_rebuild = mtip_hw_init(dd); | ||
3115 | if (wait_for_rebuild < 0) { | ||
3116 | dev_err(&dd->pdev->dev, | ||
3117 | "Protocol layer initialization failed\n"); | ||
3118 | rv = -EINVAL; | ||
3119 | goto protocol_init_error; | ||
3120 | } | ||
3121 | |||
3122 | dd->disk = alloc_disk(MTIP_MAX_MINORS); | ||
3123 | if (dd->disk == NULL) { | ||
3124 | dev_err(&dd->pdev->dev, | ||
3125 | "Unable to allocate gendisk structure\n"); | ||
3126 | rv = -EINVAL; | ||
3127 | goto alloc_disk_error; | ||
3128 | } | ||
3129 | |||
3130 | /* Generate the disk name, implemented same as in sd.c */ | ||
3131 | do { | ||
3132 | if (!ida_pre_get(&rssd_index_ida, GFP_KERNEL)) | ||
3133 | goto ida_get_error; | ||
3134 | |||
3135 | spin_lock(&rssd_index_lock); | ||
3136 | rv = ida_get_new(&rssd_index_ida, &index); | ||
3137 | spin_unlock(&rssd_index_lock); | ||
3138 | } while (rv == -EAGAIN); | ||
3139 | |||
3140 | if (rv) | ||
3141 | goto ida_get_error; | ||
3142 | |||
3143 | rv = rssd_disk_name_format("rssd", | ||
3144 | index, | ||
3145 | dd->disk->disk_name, | ||
3146 | DISK_NAME_LEN); | ||
3147 | if (rv) | ||
3148 | goto disk_index_error; | ||
3149 | |||
3150 | dd->disk->driverfs_dev = &dd->pdev->dev; | ||
3151 | dd->disk->major = dd->major; | ||
3152 | dd->disk->first_minor = dd->instance * MTIP_MAX_MINORS; | ||
3153 | dd->disk->fops = &mtip_block_ops; | ||
3154 | dd->disk->private_data = dd; | ||
3155 | dd->index = index; | ||
3156 | |||
3157 | /* | ||
3158 | * if rebuild pending, start the service thread, and delay the block | ||
3159 | * queue creation and add_disk() | ||
3160 | */ | ||
3161 | if (wait_for_rebuild == MTIP_FTL_REBUILD_MAGIC) | ||
3162 | goto start_service_thread; | ||
3163 | |||
3164 | skip_create_disk: | ||
3165 | /* Allocate the request queue. */ | ||
3166 | dd->queue = blk_alloc_queue(GFP_KERNEL); | ||
3167 | if (dd->queue == NULL) { | ||
3168 | dev_err(&dd->pdev->dev, | ||
3169 | "Unable to allocate request queue\n"); | ||
3170 | rv = -ENOMEM; | ||
3171 | goto block_queue_alloc_init_error; | ||
3172 | } | ||
3173 | |||
3174 | /* Attach our request function to the request queue. */ | ||
3175 | blk_queue_make_request(dd->queue, mtip_make_request); | ||
3176 | |||
3177 | dd->disk->queue = dd->queue; | ||
3178 | dd->queue->queuedata = dd; | ||
3179 | |||
3180 | /* Set device limits. */ | ||
3181 | set_bit(QUEUE_FLAG_NONROT, &dd->queue->queue_flags); | ||
3182 | blk_queue_max_segments(dd->queue, MTIP_MAX_SG); | ||
3183 | blk_queue_physical_block_size(dd->queue, 4096); | ||
3184 | blk_queue_io_min(dd->queue, 4096); | ||
3185 | /* | ||
3186 | * write back cache is not supported in the device. FUA depends on | ||
3187 | * write back cache support, hence setting flush support to zero. | ||
3188 | */ | ||
3189 | blk_queue_flush(dd->queue, 0); | ||
3190 | |||
3191 | /* Set the capacity of the device in 512 byte sectors. */ | ||
3192 | if (!(mtip_hw_get_capacity(dd, &capacity))) { | ||
3193 | dev_warn(&dd->pdev->dev, | ||
3194 | "Could not read drive capacity\n"); | ||
3195 | rv = -EIO; | ||
3196 | goto read_capacity_error; | ||
3197 | } | ||
3198 | set_capacity(dd->disk, capacity); | ||
3199 | |||
3200 | /* Enable the block device and add it to /dev */ | ||
3201 | add_disk(dd->disk); | ||
3202 | |||
3203 | /* | ||
3204 | * Now that the disk is active, initialize any sysfs attributes | ||
3205 | * managed by the protocol layer. | ||
3206 | */ | ||
3207 | kobj = kobject_get(&disk_to_dev(dd->disk)->kobj); | ||
3208 | if (kobj) { | ||
3209 | mtip_hw_sysfs_init(dd, kobj); | ||
3210 | kobject_put(kobj); | ||
3211 | } | ||
3212 | |||
3213 | if (dd->mtip_svc_handler) | ||
3214 | return rv; /* service thread created for handling rebuild */ | ||
3215 | |||
3216 | start_service_thread: | ||
3217 | sprintf(thd_name, "mtip_svc_thd_%02d", index); | ||
3218 | |||
3219 | dd->mtip_svc_handler = kthread_run(mtip_service_thread, | ||
3220 | dd, thd_name); | ||
3221 | |||
3222 | if (IS_ERR(dd->mtip_svc_handler)) { | ||
3223 | printk(KERN_ERR "mtip32xx: service thread failed to start\n"); | ||
3224 | dd->mtip_svc_handler = NULL; | ||
3225 | rv = -EFAULT; | ||
3226 | goto kthread_run_error; | ||
3227 | } | ||
3228 | |||
3229 | return rv; | ||
3230 | |||
3231 | kthread_run_error: | ||
3232 | /* Delete our gendisk. This also removes the device from /dev */ | ||
3233 | del_gendisk(dd->disk); | ||
3234 | |||
3235 | read_capacity_error: | ||
3236 | blk_cleanup_queue(dd->queue); | ||
3237 | |||
3238 | block_queue_alloc_init_error: | ||
3239 | disk_index_error: | ||
3240 | spin_lock(&rssd_index_lock); | ||
3241 | ida_remove(&rssd_index_ida, index); | ||
3242 | spin_unlock(&rssd_index_lock); | ||
3243 | |||
3244 | ida_get_error: | ||
3245 | put_disk(dd->disk); | ||
3246 | |||
3247 | alloc_disk_error: | ||
3248 | mtip_hw_exit(dd); /* De-initialize the protocol layer. */ | ||
3249 | |||
3250 | protocol_init_error: | ||
3251 | return rv; | ||
3252 | } | ||
3253 | |||
3254 | /* | ||
3255 | * Block layer deinitialization function. | ||
3256 | * | ||
3257 | * Called by the PCI layer as each P320 device is removed. | ||
3258 | * | ||
3259 | * @dd Pointer to the driver data structure. | ||
3260 | * | ||
3261 | * return value | ||
3262 | * 0 | ||
3263 | */ | ||
3264 | static int mtip_block_remove(struct driver_data *dd) | ||
3265 | { | ||
3266 | struct kobject *kobj; | ||
3267 | |||
3268 | if (dd->mtip_svc_handler) { | ||
3269 | set_bit(MTIP_FLAG_SVC_THD_SHOULD_STOP_BIT, &dd->port->flags); | ||
3270 | wake_up_interruptible(&dd->port->svc_wait); | ||
3271 | kthread_stop(dd->mtip_svc_handler); | ||
3272 | } | ||
3273 | |||
3274 | /* Clean up the sysfs attributes managed by the protocol layer. */ | ||
3275 | kobj = kobject_get(&disk_to_dev(dd->disk)->kobj); | ||
3276 | if (kobj) { | ||
3277 | mtip_hw_sysfs_exit(dd, kobj); | ||
3278 | kobject_put(kobj); | ||
3279 | } | ||
3280 | |||
3281 | /* | ||
3282 | * Delete our gendisk structure. This also removes the device | ||
3283 | * from /dev | ||
3284 | */ | ||
3285 | del_gendisk(dd->disk); | ||
3286 | blk_cleanup_queue(dd->queue); | ||
3287 | dd->disk = NULL; | ||
3288 | dd->queue = NULL; | ||
3289 | |||
3290 | /* De-initialize the protocol layer. */ | ||
3291 | mtip_hw_exit(dd); | ||
3292 | |||
3293 | return 0; | ||
3294 | } | ||
3295 | |||
3296 | /* | ||
3297 | * Function called by the PCI layer when just before the | ||
3298 | * machine shuts down. | ||
3299 | * | ||
3300 | * If a protocol layer shutdown function is present it will be called | ||
3301 | * by this function. | ||
3302 | * | ||
3303 | * @dd Pointer to the driver data structure. | ||
3304 | * | ||
3305 | * return value | ||
3306 | * 0 | ||
3307 | */ | ||
3308 | static int mtip_block_shutdown(struct driver_data *dd) | ||
3309 | { | ||
3310 | dev_info(&dd->pdev->dev, | ||
3311 | "Shutting down %s ...\n", dd->disk->disk_name); | ||
3312 | |||
3313 | /* Delete our gendisk structure, and cleanup the blk queue. */ | ||
3314 | del_gendisk(dd->disk); | ||
3315 | blk_cleanup_queue(dd->queue); | ||
3316 | dd->disk = NULL; | ||
3317 | dd->queue = NULL; | ||
3318 | |||
3319 | mtip_hw_shutdown(dd); | ||
3320 | return 0; | ||
3321 | } | ||
3322 | |||
3323 | static int mtip_block_suspend(struct driver_data *dd) | ||
3324 | { | ||
3325 | dev_info(&dd->pdev->dev, | ||
3326 | "Suspending %s ...\n", dd->disk->disk_name); | ||
3327 | mtip_hw_suspend(dd); | ||
3328 | return 0; | ||
3329 | } | ||
3330 | |||
3331 | static int mtip_block_resume(struct driver_data *dd) | ||
3332 | { | ||
3333 | dev_info(&dd->pdev->dev, "Resuming %s ...\n", | ||
3334 | dd->disk->disk_name); | ||
3335 | mtip_hw_resume(dd); | ||
3336 | return 0; | ||
3337 | } | ||
3338 | |||
3339 | /* | ||
3340 | * Called for each supported PCI device detected. | ||
3341 | * | ||
3342 | * This function allocates the private data structure, enables the | ||
3343 | * PCI device and then calls the block layer initialization function. | ||
3344 | * | ||
3345 | * return value | ||
3346 | * 0 on success else an error code. | ||
3347 | */ | ||
3348 | static int mtip_pci_probe(struct pci_dev *pdev, | ||
3349 | const struct pci_device_id *ent) | ||
3350 | { | ||
3351 | int rv = 0; | ||
3352 | struct driver_data *dd = NULL; | ||
3353 | |||
3354 | /* Allocate memory for this devices private data. */ | ||
3355 | dd = kzalloc(sizeof(struct driver_data), GFP_KERNEL); | ||
3356 | if (dd == NULL) { | ||
3357 | dev_err(&pdev->dev, | ||
3358 | "Unable to allocate memory for driver data\n"); | ||
3359 | return -ENOMEM; | ||
3360 | } | ||
3361 | |||
3362 | /* Set the atomic variable as 1 in case of SRSI */ | ||
3363 | atomic_set(&dd->drv_cleanup_done, true); | ||
3364 | |||
3365 | atomic_set(&dd->resumeflag, false); | ||
3366 | |||
3367 | /* Attach the private data to this PCI device. */ | ||
3368 | pci_set_drvdata(pdev, dd); | ||
3369 | |||
3370 | rv = pcim_enable_device(pdev); | ||
3371 | if (rv < 0) { | ||
3372 | dev_err(&pdev->dev, "Unable to enable device\n"); | ||
3373 | goto iomap_err; | ||
3374 | } | ||
3375 | |||
3376 | /* Map BAR5 to memory. */ | ||
3377 | rv = pcim_iomap_regions(pdev, 1 << MTIP_ABAR, MTIP_DRV_NAME); | ||
3378 | if (rv < 0) { | ||
3379 | dev_err(&pdev->dev, "Unable to map regions\n"); | ||
3380 | goto iomap_err; | ||
3381 | } | ||
3382 | |||
3383 | if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { | ||
3384 | rv = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); | ||
3385 | |||
3386 | if (rv) { | ||
3387 | rv = pci_set_consistent_dma_mask(pdev, | ||
3388 | DMA_BIT_MASK(32)); | ||
3389 | if (rv) { | ||
3390 | dev_warn(&pdev->dev, | ||
3391 | "64-bit DMA enable failed\n"); | ||
3392 | goto setmask_err; | ||
3393 | } | ||
3394 | } | ||
3395 | } | ||
3396 | |||
3397 | pci_set_master(pdev); | ||
3398 | |||
3399 | if (pci_enable_msi(pdev)) { | ||
3400 | dev_warn(&pdev->dev, | ||
3401 | "Unable to enable MSI interrupt.\n"); | ||
3402 | goto block_initialize_err; | ||
3403 | } | ||
3404 | |||
3405 | /* Copy the info we may need later into the private data structure. */ | ||
3406 | dd->major = mtip_major; | ||
3407 | dd->instance = instance; | ||
3408 | dd->pdev = pdev; | ||
3409 | |||
3410 | /* Initialize the block layer. */ | ||
3411 | rv = mtip_block_initialize(dd); | ||
3412 | if (rv < 0) { | ||
3413 | dev_err(&pdev->dev, | ||
3414 | "Unable to initialize block layer\n"); | ||
3415 | goto block_initialize_err; | ||
3416 | } | ||
3417 | |||
3418 | /* | ||
3419 | * Increment the instance count so that each device has a unique | ||
3420 | * instance number. | ||
3421 | */ | ||
3422 | instance++; | ||
3423 | |||
3424 | goto done; | ||
3425 | |||
3426 | block_initialize_err: | ||
3427 | pci_disable_msi(pdev); | ||
3428 | |||
3429 | setmask_err: | ||
3430 | pcim_iounmap_regions(pdev, 1 << MTIP_ABAR); | ||
3431 | |||
3432 | iomap_err: | ||
3433 | kfree(dd); | ||
3434 | pci_set_drvdata(pdev, NULL); | ||
3435 | return rv; | ||
3436 | done: | ||
3437 | /* Set the atomic variable as 0 in case of SRSI */ | ||
3438 | atomic_set(&dd->drv_cleanup_done, true); | ||
3439 | |||
3440 | return rv; | ||
3441 | } | ||
3442 | |||
3443 | /* | ||
3444 | * Called for each probed device when the device is removed or the | ||
3445 | * driver is unloaded. | ||
3446 | * | ||
3447 | * return value | ||
3448 | * None | ||
3449 | */ | ||
3450 | static void mtip_pci_remove(struct pci_dev *pdev) | ||
3451 | { | ||
3452 | struct driver_data *dd = pci_get_drvdata(pdev); | ||
3453 | int counter = 0; | ||
3454 | |||
3455 | if (mtip_check_surprise_removal(pdev)) { | ||
3456 | while (atomic_read(&dd->drv_cleanup_done) == false) { | ||
3457 | counter++; | ||
3458 | msleep(20); | ||
3459 | if (counter == 10) { | ||
3460 | /* Cleanup the outstanding commands */ | ||
3461 | mtip_command_cleanup(dd); | ||
3462 | break; | ||
3463 | } | ||
3464 | } | ||
3465 | } | ||
3466 | /* Set the atomic variable as 1 in case of SRSI */ | ||
3467 | atomic_set(&dd->drv_cleanup_done, true); | ||
3468 | |||
3469 | /* Clean up the block layer. */ | ||
3470 | mtip_block_remove(dd); | ||
3471 | |||
3472 | pci_disable_msi(pdev); | ||
3473 | |||
3474 | kfree(dd); | ||
3475 | pcim_iounmap_regions(pdev, 1 << MTIP_ABAR); | ||
3476 | } | ||
3477 | |||
3478 | /* | ||
3479 | * Called for each probed device when the device is suspended. | ||
3480 | * | ||
3481 | * return value | ||
3482 | * 0 Success | ||
3483 | * <0 Error | ||
3484 | */ | ||
3485 | static int mtip_pci_suspend(struct pci_dev *pdev, pm_message_t mesg) | ||
3486 | { | ||
3487 | int rv = 0; | ||
3488 | struct driver_data *dd = pci_get_drvdata(pdev); | ||
3489 | |||
3490 | if (!dd) { | ||
3491 | dev_err(&pdev->dev, | ||
3492 | "Driver private datastructure is NULL\n"); | ||
3493 | return -EFAULT; | ||
3494 | } | ||
3495 | |||
3496 | atomic_set(&dd->resumeflag, true); | ||
3497 | |||
3498 | /* Disable ports & interrupts then send standby immediate */ | ||
3499 | rv = mtip_block_suspend(dd); | ||
3500 | if (rv < 0) { | ||
3501 | dev_err(&pdev->dev, | ||
3502 | "Failed to suspend controller\n"); | ||
3503 | return rv; | ||
3504 | } | ||
3505 | |||
3506 | /* | ||
3507 | * Save the pci config space to pdev structure & | ||
3508 | * disable the device | ||
3509 | */ | ||
3510 | pci_save_state(pdev); | ||
3511 | pci_disable_device(pdev); | ||
3512 | |||
3513 | /* Move to Low power state*/ | ||
3514 | pci_set_power_state(pdev, PCI_D3hot); | ||
3515 | |||
3516 | return rv; | ||
3517 | } | ||
3518 | |||
3519 | /* | ||
3520 | * Called for each probed device when the device is resumed. | ||
3521 | * | ||
3522 | * return value | ||
3523 | * 0 Success | ||
3524 | * <0 Error | ||
3525 | */ | ||
3526 | static int mtip_pci_resume(struct pci_dev *pdev) | ||
3527 | { | ||
3528 | int rv = 0; | ||
3529 | struct driver_data *dd; | ||
3530 | |||
3531 | dd = pci_get_drvdata(pdev); | ||
3532 | if (!dd) { | ||
3533 | dev_err(&pdev->dev, | ||
3534 | "Driver private datastructure is NULL\n"); | ||
3535 | return -EFAULT; | ||
3536 | } | ||
3537 | |||
3538 | /* Move the device to active State */ | ||
3539 | pci_set_power_state(pdev, PCI_D0); | ||
3540 | |||
3541 | /* Restore PCI configuration space */ | ||
3542 | pci_restore_state(pdev); | ||
3543 | |||
3544 | /* Enable the PCI device*/ | ||
3545 | rv = pcim_enable_device(pdev); | ||
3546 | if (rv < 0) { | ||
3547 | dev_err(&pdev->dev, | ||
3548 | "Failed to enable card during resume\n"); | ||
3549 | goto err; | ||
3550 | } | ||
3551 | pci_set_master(pdev); | ||
3552 | |||
3553 | /* | ||
3554 | * Calls hbaReset, initPort, & startPort function | ||
3555 | * then enables interrupts | ||
3556 | */ | ||
3557 | rv = mtip_block_resume(dd); | ||
3558 | if (rv < 0) | ||
3559 | dev_err(&pdev->dev, "Unable to resume\n"); | ||
3560 | |||
3561 | err: | ||
3562 | atomic_set(&dd->resumeflag, false); | ||
3563 | |||
3564 | return rv; | ||
3565 | } | ||
3566 | |||
3567 | /* | ||
3568 | * Shutdown routine | ||
3569 | * | ||
3570 | * return value | ||
3571 | * None | ||
3572 | */ | ||
3573 | static void mtip_pci_shutdown(struct pci_dev *pdev) | ||
3574 | { | ||
3575 | struct driver_data *dd = pci_get_drvdata(pdev); | ||
3576 | if (dd) | ||
3577 | mtip_block_shutdown(dd); | ||
3578 | } | ||
3579 | |||
3580 | /* Table of device ids supported by this driver. */ | ||
3581 | static DEFINE_PCI_DEVICE_TABLE(mtip_pci_tbl) = { | ||
3582 | { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P320_DEVICE_ID) }, | ||
3583 | { 0 } | ||
3584 | }; | ||
3585 | |||
3586 | /* Structure that describes the PCI driver functions. */ | ||
3587 | static struct pci_driver mtip_pci_driver = { | ||
3588 | .name = MTIP_DRV_NAME, | ||
3589 | .id_table = mtip_pci_tbl, | ||
3590 | .probe = mtip_pci_probe, | ||
3591 | .remove = mtip_pci_remove, | ||
3592 | .suspend = mtip_pci_suspend, | ||
3593 | .resume = mtip_pci_resume, | ||
3594 | .shutdown = mtip_pci_shutdown, | ||
3595 | }; | ||
3596 | |||
3597 | MODULE_DEVICE_TABLE(pci, mtip_pci_tbl); | ||
3598 | |||
3599 | /* | ||
3600 | * Module initialization function. | ||
3601 | * | ||
3602 | * Called once when the module is loaded. This function allocates a major | ||
3603 | * block device number to the Cyclone devices and registers the PCI layer | ||
3604 | * of the driver. | ||
3605 | * | ||
3606 | * Return value | ||
3607 | * 0 on success else error code. | ||
3608 | */ | ||
3609 | static int __init mtip_init(void) | ||
3610 | { | ||
3611 | printk(KERN_INFO MTIP_DRV_NAME " Version " MTIP_DRV_VERSION "\n"); | ||
3612 | |||
3613 | /* Allocate a major block device number to use with this driver. */ | ||
3614 | mtip_major = register_blkdev(0, MTIP_DRV_NAME); | ||
3615 | if (mtip_major < 0) { | ||
3616 | printk(KERN_ERR "Unable to register block device (%d)\n", | ||
3617 | mtip_major); | ||
3618 | return -EBUSY; | ||
3619 | } | ||
3620 | |||
3621 | /* Register our PCI operations. */ | ||
3622 | return pci_register_driver(&mtip_pci_driver); | ||
3623 | } | ||
3624 | |||
3625 | /* | ||
3626 | * Module de-initialization function. | ||
3627 | * | ||
3628 | * Called once when the module is unloaded. This function deallocates | ||
3629 | * the major block device number allocated by mtip_init() and | ||
3630 | * unregisters the PCI layer of the driver. | ||
3631 | * | ||
3632 | * Return value | ||
3633 | * none | ||
3634 | */ | ||
3635 | static void __exit mtip_exit(void) | ||
3636 | { | ||
3637 | /* Release the allocated major block device number. */ | ||
3638 | unregister_blkdev(mtip_major, MTIP_DRV_NAME); | ||
3639 | |||
3640 | /* Unregister the PCI driver. */ | ||
3641 | pci_unregister_driver(&mtip_pci_driver); | ||
3642 | } | ||
3643 | |||
3644 | MODULE_AUTHOR("Micron Technology, Inc"); | ||
3645 | MODULE_DESCRIPTION("Micron RealSSD PCIe Block Driver"); | ||
3646 | MODULE_LICENSE("GPL"); | ||
3647 | MODULE_VERSION(MTIP_DRV_VERSION); | ||
3648 | |||
3649 | module_init(mtip_init); | ||
3650 | module_exit(mtip_exit); | ||
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h new file mode 100644 index 000000000000..e0554a8f2233 --- /dev/null +++ b/drivers/block/mtip32xx/mtip32xx.h | |||
@@ -0,0 +1,418 @@ | |||
1 | /* | ||
2 | * mtip32xx.h - Header file for the P320 SSD Block Driver | ||
3 | * Copyright (C) 2011 Micron Technology, Inc. | ||
4 | * | ||
5 | * Portions of this code were derived from works subjected to the | ||
6 | * following copyright: | ||
7 | * Copyright (C) 2009 Integrated Device Technology, Inc. | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation; either version 2 of the License, or | ||
12 | * (at your option) any later version. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | * GNU General Public License for more details. | ||
18 | * | ||
19 | */ | ||
20 | |||
21 | #ifndef __MTIP32XX_H__ | ||
22 | #define __MTIP32XX_H__ | ||
23 | |||
24 | #include <linux/spinlock.h> | ||
25 | #include <linux/rwsem.h> | ||
26 | #include <linux/ata.h> | ||
27 | #include <linux/interrupt.h> | ||
28 | #include <linux/genhd.h> | ||
29 | #include <linux/version.h> | ||
30 | |||
31 | /* Offset of Subsystem Device ID in pci confoguration space */ | ||
32 | #define PCI_SUBSYSTEM_DEVICEID 0x2E | ||
33 | |||
34 | /* offset of Device Control register in PCIe extended capabilites space */ | ||
35 | #define PCIE_CONFIG_EXT_DEVICE_CONTROL_OFFSET 0x48 | ||
36 | |||
37 | /* # of times to retry timed out IOs */ | ||
38 | #define MTIP_MAX_RETRIES 5 | ||
39 | |||
40 | /* Various timeout values in ms */ | ||
41 | #define MTIP_NCQ_COMMAND_TIMEOUT_MS 5000 | ||
42 | #define MTIP_IOCTL_COMMAND_TIMEOUT_MS 5000 | ||
43 | #define MTIP_INTERNAL_COMMAND_TIMEOUT_MS 5000 | ||
44 | |||
45 | /* check for timeouts every 500ms */ | ||
46 | #define MTIP_TIMEOUT_CHECK_PERIOD 500 | ||
47 | |||
48 | /* ftl rebuild */ | ||
49 | #define MTIP_FTL_REBUILD_OFFSET 142 | ||
50 | #define MTIP_FTL_REBUILD_MAGIC 0xED51 | ||
51 | #define MTIP_FTL_REBUILD_TIMEOUT_MS 2400000 | ||
52 | |||
53 | /* Macro to extract the tag bit number from a tag value. */ | ||
54 | #define MTIP_TAG_BIT(tag) (tag & 0x1F) | ||
55 | |||
56 | /* | ||
57 | * Macro to extract the tag index from a tag value. The index | ||
58 | * is used to access the correct s_active/Command Issue register based | ||
59 | * on the tag value. | ||
60 | */ | ||
61 | #define MTIP_TAG_INDEX(tag) (tag >> 5) | ||
62 | |||
63 | /* | ||
64 | * Maximum number of scatter gather entries | ||
65 | * a single command may have. | ||
66 | */ | ||
67 | #define MTIP_MAX_SG 128 | ||
68 | |||
69 | /* | ||
70 | * Maximum number of slot groups (Command Issue & s_active registers) | ||
71 | * NOTE: This is the driver maximum; check dd->slot_groups for actual value. | ||
72 | */ | ||
73 | #define MTIP_MAX_SLOT_GROUPS 8 | ||
74 | |||
75 | /* Internal command tag. */ | ||
76 | #define MTIP_TAG_INTERNAL 0 | ||
77 | |||
78 | /* Micron Vendor ID & P320x SSD Device ID */ | ||
79 | #define PCI_VENDOR_ID_MICRON 0x1344 | ||
80 | #define P320_DEVICE_ID 0x5150 | ||
81 | |||
82 | /* Driver name and version strings */ | ||
83 | #define MTIP_DRV_NAME "mtip32xx" | ||
84 | #define MTIP_DRV_VERSION "1.2.6os3" | ||
85 | |||
86 | /* Maximum number of minor device numbers per device. */ | ||
87 | #define MTIP_MAX_MINORS 16 | ||
88 | |||
89 | /* Maximum number of supported command slots. */ | ||
90 | #define MTIP_MAX_COMMAND_SLOTS (MTIP_MAX_SLOT_GROUPS * 32) | ||
91 | |||
92 | /* | ||
93 | * Per-tag bitfield size in longs. | ||
94 | * Linux bit manipulation functions | ||
95 | * (i.e. test_and_set_bit, find_next_zero_bit) | ||
96 | * manipulate memory in longs, so we try to make the math work. | ||
97 | * take the slot groups and find the number of longs, rounding up. | ||
98 | * Careful! i386 and x86_64 use different size longs! | ||
99 | */ | ||
100 | #define U32_PER_LONG (sizeof(long) / sizeof(u32)) | ||
101 | #define SLOTBITS_IN_LONGS ((MTIP_MAX_SLOT_GROUPS + \ | ||
102 | (U32_PER_LONG-1))/U32_PER_LONG) | ||
103 | |||
104 | /* BAR number used to access the HBA registers. */ | ||
105 | #define MTIP_ABAR 5 | ||
106 | |||
107 | #ifdef DEBUG | ||
108 | #define dbg_printk(format, arg...) \ | ||
109 | printk(pr_fmt(format), ##arg); | ||
110 | #else | ||
111 | #define dbg_printk(format, arg...) | ||
112 | #endif | ||
113 | |||
114 | #define __force_bit2int (unsigned int __force) | ||
115 | |||
116 | /* below are bit numbers in 'flags' defined in mtip_port */ | ||
117 | #define MTIP_FLAG_IC_ACTIVE_BIT 0 | ||
118 | #define MTIP_FLAG_EH_ACTIVE_BIT 1 | ||
119 | #define MTIP_FLAG_SVC_THD_ACTIVE_BIT 2 | ||
120 | #define MTIP_FLAG_ISSUE_CMDS_BIT 4 | ||
121 | #define MTIP_FLAG_REBUILD_BIT 5 | ||
122 | #define MTIP_FLAG_SVC_THD_SHOULD_STOP_BIT 8 | ||
123 | |||
124 | /* Register Frame Information Structure (FIS), host to device. */ | ||
125 | struct host_to_dev_fis { | ||
126 | /* | ||
127 | * FIS type. | ||
128 | * - 27h Register FIS, host to device. | ||
129 | * - 34h Register FIS, device to host. | ||
130 | * - 39h DMA Activate FIS, device to host. | ||
131 | * - 41h DMA Setup FIS, bi-directional. | ||
132 | * - 46h Data FIS, bi-directional. | ||
133 | * - 58h BIST Activate FIS, bi-directional. | ||
134 | * - 5Fh PIO Setup FIS, device to host. | ||
135 | * - A1h Set Device Bits FIS, device to host. | ||
136 | */ | ||
137 | unsigned char type; | ||
138 | unsigned char opts; | ||
139 | unsigned char command; | ||
140 | unsigned char features; | ||
141 | |||
142 | union { | ||
143 | unsigned char lba_low; | ||
144 | unsigned char sector; | ||
145 | }; | ||
146 | union { | ||
147 | unsigned char lba_mid; | ||
148 | unsigned char cyl_low; | ||
149 | }; | ||
150 | union { | ||
151 | unsigned char lba_hi; | ||
152 | unsigned char cyl_hi; | ||
153 | }; | ||
154 | union { | ||
155 | unsigned char device; | ||
156 | unsigned char head; | ||
157 | }; | ||
158 | |||
159 | union { | ||
160 | unsigned char lba_low_ex; | ||
161 | unsigned char sector_ex; | ||
162 | }; | ||
163 | union { | ||
164 | unsigned char lba_mid_ex; | ||
165 | unsigned char cyl_low_ex; | ||
166 | }; | ||
167 | union { | ||
168 | unsigned char lba_hi_ex; | ||
169 | unsigned char cyl_hi_ex; | ||
170 | }; | ||
171 | unsigned char features_ex; | ||
172 | |||
173 | unsigned char sect_count; | ||
174 | unsigned char sect_cnt_ex; | ||
175 | unsigned char res2; | ||
176 | unsigned char control; | ||
177 | |||
178 | unsigned int res3; | ||
179 | }; | ||
180 | |||
181 | /* Command header structure. */ | ||
182 | struct mtip_cmd_hdr { | ||
183 | /* | ||
184 | * Command options. | ||
185 | * - Bits 31:16 Number of PRD entries. | ||
186 | * - Bits 15:8 Unused in this implementation. | ||
187 | * - Bit 7 Prefetch bit, informs the drive to prefetch PRD entries. | ||
188 | * - Bit 6 Write bit, should be set when writing data to the device. | ||
189 | * - Bit 5 Unused in this implementation. | ||
190 | * - Bits 4:0 Length of the command FIS in DWords (DWord = 4 bytes). | ||
191 | */ | ||
192 | unsigned int opts; | ||
193 | /* This field is unsed when using NCQ. */ | ||
194 | union { | ||
195 | unsigned int byte_count; | ||
196 | unsigned int status; | ||
197 | }; | ||
198 | /* | ||
199 | * Lower 32 bits of the command table address associated with this | ||
200 | * header. The command table addresses must be 128 byte aligned. | ||
201 | */ | ||
202 | unsigned int ctba; | ||
203 | /* | ||
204 | * If 64 bit addressing is used this field is the upper 32 bits | ||
205 | * of the command table address associated with this command. | ||
206 | */ | ||
207 | unsigned int ctbau; | ||
208 | /* Reserved and unused. */ | ||
209 | unsigned int res[4]; | ||
210 | }; | ||
211 | |||
212 | /* Command scatter gather structure (PRD). */ | ||
213 | struct mtip_cmd_sg { | ||
214 | /* | ||
215 | * Low 32 bits of the data buffer address. For P320 this | ||
216 | * address must be 8 byte aligned signified by bits 2:0 being | ||
217 | * set to 0. | ||
218 | */ | ||
219 | unsigned int dba; | ||
220 | /* | ||
221 | * When 64 bit addressing is used this field is the upper | ||
222 | * 32 bits of the data buffer address. | ||
223 | */ | ||
224 | unsigned int dba_upper; | ||
225 | /* Unused. */ | ||
226 | unsigned int reserved; | ||
227 | /* | ||
228 | * Bit 31: interrupt when this data block has been transferred. | ||
229 | * Bits 30..22: reserved | ||
230 | * Bits 21..0: byte count (minus 1). For P320 the byte count must be | ||
231 | * 8 byte aligned signified by bits 2:0 being set to 1. | ||
232 | */ | ||
233 | unsigned int info; | ||
234 | }; | ||
235 | struct mtip_port; | ||
236 | |||
237 | /* Structure used to describe a command. */ | ||
238 | struct mtip_cmd { | ||
239 | |||
240 | struct mtip_cmd_hdr *command_header; /* ptr to command header entry */ | ||
241 | |||
242 | dma_addr_t command_header_dma; /* corresponding physical address */ | ||
243 | |||
244 | void *command; /* ptr to command table entry */ | ||
245 | |||
246 | dma_addr_t command_dma; /* corresponding physical address */ | ||
247 | |||
248 | void *comp_data; /* data passed to completion function comp_func() */ | ||
249 | /* | ||
250 | * Completion function called by the ISR upon completion of | ||
251 | * a command. | ||
252 | */ | ||
253 | void (*comp_func)(struct mtip_port *port, | ||
254 | int tag, | ||
255 | void *data, | ||
256 | int status); | ||
257 | /* Additional callback function that may be called by comp_func() */ | ||
258 | void (*async_callback)(void *data, int status); | ||
259 | |||
260 | void *async_data; /* Addl. data passed to async_callback() */ | ||
261 | |||
262 | int scatter_ents; /* Number of scatter list entries used */ | ||
263 | |||
264 | struct scatterlist sg[MTIP_MAX_SG]; /* Scatter list entries */ | ||
265 | |||
266 | int retries; /* The number of retries left for this command. */ | ||
267 | |||
268 | int direction; /* Data transfer direction */ | ||
269 | |||
270 | unsigned long comp_time; /* command completion time, in jiffies */ | ||
271 | |||
272 | atomic_t active; /* declares if this command sent to the drive. */ | ||
273 | }; | ||
274 | |||
275 | /* Structure used to describe a port. */ | ||
276 | struct mtip_port { | ||
277 | /* Pointer back to the driver data for this port. */ | ||
278 | struct driver_data *dd; | ||
279 | /* | ||
280 | * Used to determine if the data pointed to by the | ||
281 | * identify field is valid. | ||
282 | */ | ||
283 | unsigned long identify_valid; | ||
284 | /* Base address of the memory mapped IO for the port. */ | ||
285 | void __iomem *mmio; | ||
286 | /* Array of pointers to the memory mapped s_active registers. */ | ||
287 | void __iomem *s_active[MTIP_MAX_SLOT_GROUPS]; | ||
288 | /* Array of pointers to the memory mapped completed registers. */ | ||
289 | void __iomem *completed[MTIP_MAX_SLOT_GROUPS]; | ||
290 | /* Array of pointers to the memory mapped Command Issue registers. */ | ||
291 | void __iomem *cmd_issue[MTIP_MAX_SLOT_GROUPS]; | ||
292 | /* | ||
293 | * Pointer to the beginning of the command header memory as used | ||
294 | * by the driver. | ||
295 | */ | ||
296 | void *command_list; | ||
297 | /* | ||
298 | * Pointer to the beginning of the command header memory as used | ||
299 | * by the DMA. | ||
300 | */ | ||
301 | dma_addr_t command_list_dma; | ||
302 | /* | ||
303 | * Pointer to the beginning of the RX FIS memory as used | ||
304 | * by the driver. | ||
305 | */ | ||
306 | void *rxfis; | ||
307 | /* | ||
308 | * Pointer to the beginning of the RX FIS memory as used | ||
309 | * by the DMA. | ||
310 | */ | ||
311 | dma_addr_t rxfis_dma; | ||
312 | /* | ||
313 | * Pointer to the beginning of the command table memory as used | ||
314 | * by the driver. | ||
315 | */ | ||
316 | void *command_table; | ||
317 | /* | ||
318 | * Pointer to the beginning of the command table memory as used | ||
319 | * by the DMA. | ||
320 | */ | ||
321 | dma_addr_t command_tbl_dma; | ||
322 | /* | ||
323 | * Pointer to the beginning of the identify data memory as used | ||
324 | * by the driver. | ||
325 | */ | ||
326 | u16 *identify; | ||
327 | /* | ||
328 | * Pointer to the beginning of the identify data memory as used | ||
329 | * by the DMA. | ||
330 | */ | ||
331 | dma_addr_t identify_dma; | ||
332 | /* | ||
333 | * Pointer to the beginning of a sector buffer that is used | ||
334 | * by the driver when issuing internal commands. | ||
335 | */ | ||
336 | u16 *sector_buffer; | ||
337 | /* | ||
338 | * Pointer to the beginning of a sector buffer that is used | ||
339 | * by the DMA when the driver issues internal commands. | ||
340 | */ | ||
341 | dma_addr_t sector_buffer_dma; | ||
342 | /* | ||
343 | * Bit significant, used to determine if a command slot has | ||
344 | * been allocated. i.e. the slot is in use. Bits are cleared | ||
345 | * when the command slot and all associated data structures | ||
346 | * are no longer needed. | ||
347 | */ | ||
348 | unsigned long allocated[SLOTBITS_IN_LONGS]; | ||
349 | /* | ||
350 | * used to queue commands when an internal command is in progress | ||
351 | * or error handling is active | ||
352 | */ | ||
353 | unsigned long cmds_to_issue[SLOTBITS_IN_LONGS]; | ||
354 | /* | ||
355 | * Array of command slots. Structure includes pointers to the | ||
356 | * command header and command table, and completion function and data | ||
357 | * pointers. | ||
358 | */ | ||
359 | struct mtip_cmd commands[MTIP_MAX_COMMAND_SLOTS]; | ||
360 | /* Used by mtip_service_thread to wait for an event */ | ||
361 | wait_queue_head_t svc_wait; | ||
362 | /* | ||
363 | * indicates the state of the port. Also, helps the service thread | ||
364 | * to determine its action on wake up. | ||
365 | */ | ||
366 | unsigned long flags; | ||
367 | /* | ||
368 | * Timer used to complete commands that have been active for too long. | ||
369 | */ | ||
370 | struct timer_list cmd_timer; | ||
371 | /* | ||
372 | * Semaphore used to block threads if there are no | ||
373 | * command slots available. | ||
374 | */ | ||
375 | struct semaphore cmd_slot; | ||
376 | /* Spinlock for working around command-issue bug. */ | ||
377 | spinlock_t cmd_issue_lock; | ||
378 | }; | ||
379 | |||
380 | /* | ||
381 | * Driver private data structure. | ||
382 | * | ||
383 | * One structure is allocated per probed device. | ||
384 | */ | ||
385 | struct driver_data { | ||
386 | void __iomem *mmio; /* Base address of the HBA registers. */ | ||
387 | |||
388 | int major; /* Major device number. */ | ||
389 | |||
390 | int instance; /* Instance number. First device probed is 0, ... */ | ||
391 | |||
392 | struct gendisk *disk; /* Pointer to our gendisk structure. */ | ||
393 | |||
394 | struct pci_dev *pdev; /* Pointer to the PCI device structure. */ | ||
395 | |||
396 | struct request_queue *queue; /* Our request queue. */ | ||
397 | |||
398 | struct mtip_port *port; /* Pointer to the port data structure. */ | ||
399 | |||
400 | /* Tasklet used to process the bottom half of the ISR. */ | ||
401 | struct tasklet_struct tasklet; | ||
402 | |||
403 | unsigned product_type; /* magic value declaring the product type */ | ||
404 | |||
405 | unsigned slot_groups; /* number of slot groups the product supports */ | ||
406 | |||
407 | atomic_t drv_cleanup_done; /* Atomic variable for SRSI */ | ||
408 | |||
409 | unsigned long index; /* Index to determine the disk name */ | ||
410 | |||
411 | unsigned int ftlrebuildflag; /* FTL rebuild flag */ | ||
412 | |||
413 | atomic_t resumeflag; /* Atomic variable to track suspend/resume */ | ||
414 | |||
415 | struct task_struct *mtip_svc_handler; /* task_struct of svc thd */ | ||
416 | }; | ||
417 | |||
418 | #endif | ||
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index c3f0ee16594d..061427a75d37 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c | |||
@@ -34,12 +34,11 @@ | |||
34 | #include <linux/kthread.h> | 34 | #include <linux/kthread.h> |
35 | 35 | ||
36 | #include <asm/uaccess.h> | 36 | #include <asm/uaccess.h> |
37 | #include <asm/system.h> | ||
38 | #include <asm/types.h> | 37 | #include <asm/types.h> |
39 | 38 | ||
40 | #include <linux/nbd.h> | 39 | #include <linux/nbd.h> |
41 | 40 | ||
42 | #define LO_MAGIC 0x68797548 | 41 | #define NBD_MAGIC 0x68797548 |
43 | 42 | ||
44 | #ifdef NDEBUG | 43 | #ifdef NDEBUG |
45 | #define dprintk(flags, fmt...) | 44 | #define dprintk(flags, fmt...) |
@@ -116,7 +115,7 @@ static void nbd_end_request(struct request *req) | |||
116 | spin_unlock_irqrestore(q->queue_lock, flags); | 115 | spin_unlock_irqrestore(q->queue_lock, flags); |
117 | } | 116 | } |
118 | 117 | ||
119 | static void sock_shutdown(struct nbd_device *lo, int lock) | 118 | static void sock_shutdown(struct nbd_device *nbd, int lock) |
120 | { | 119 | { |
121 | /* Forcibly shutdown the socket causing all listeners | 120 | /* Forcibly shutdown the socket causing all listeners |
122 | * to error | 121 | * to error |
@@ -125,14 +124,14 @@ static void sock_shutdown(struct nbd_device *lo, int lock) | |||
125 | * there should be a more generic interface rather than | 124 | * there should be a more generic interface rather than |
126 | * calling socket ops directly here */ | 125 | * calling socket ops directly here */ |
127 | if (lock) | 126 | if (lock) |
128 | mutex_lock(&lo->tx_lock); | 127 | mutex_lock(&nbd->tx_lock); |
129 | if (lo->sock) { | 128 | if (nbd->sock) { |
130 | dev_warn(disk_to_dev(lo->disk), "shutting down socket\n"); | 129 | dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n"); |
131 | kernel_sock_shutdown(lo->sock, SHUT_RDWR); | 130 | kernel_sock_shutdown(nbd->sock, SHUT_RDWR); |
132 | lo->sock = NULL; | 131 | nbd->sock = NULL; |
133 | } | 132 | } |
134 | if (lock) | 133 | if (lock) |
135 | mutex_unlock(&lo->tx_lock); | 134 | mutex_unlock(&nbd->tx_lock); |
136 | } | 135 | } |
137 | 136 | ||
138 | static void nbd_xmit_timeout(unsigned long arg) | 137 | static void nbd_xmit_timeout(unsigned long arg) |
@@ -147,17 +146,17 @@ static void nbd_xmit_timeout(unsigned long arg) | |||
147 | /* | 146 | /* |
148 | * Send or receive packet. | 147 | * Send or receive packet. |
149 | */ | 148 | */ |
150 | static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size, | 149 | static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size, |
151 | int msg_flags) | 150 | int msg_flags) |
152 | { | 151 | { |
153 | struct socket *sock = lo->sock; | 152 | struct socket *sock = nbd->sock; |
154 | int result; | 153 | int result; |
155 | struct msghdr msg; | 154 | struct msghdr msg; |
156 | struct kvec iov; | 155 | struct kvec iov; |
157 | sigset_t blocked, oldset; | 156 | sigset_t blocked, oldset; |
158 | 157 | ||
159 | if (unlikely(!sock)) { | 158 | if (unlikely(!sock)) { |
160 | dev_err(disk_to_dev(lo->disk), | 159 | dev_err(disk_to_dev(nbd->disk), |
161 | "Attempted %s on closed socket in sock_xmit\n", | 160 | "Attempted %s on closed socket in sock_xmit\n", |
162 | (send ? "send" : "recv")); | 161 | (send ? "send" : "recv")); |
163 | return -EINVAL; | 162 | return -EINVAL; |
@@ -181,15 +180,15 @@ static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size, | |||
181 | if (send) { | 180 | if (send) { |
182 | struct timer_list ti; | 181 | struct timer_list ti; |
183 | 182 | ||
184 | if (lo->xmit_timeout) { | 183 | if (nbd->xmit_timeout) { |
185 | init_timer(&ti); | 184 | init_timer(&ti); |
186 | ti.function = nbd_xmit_timeout; | 185 | ti.function = nbd_xmit_timeout; |
187 | ti.data = (unsigned long)current; | 186 | ti.data = (unsigned long)current; |
188 | ti.expires = jiffies + lo->xmit_timeout; | 187 | ti.expires = jiffies + nbd->xmit_timeout; |
189 | add_timer(&ti); | 188 | add_timer(&ti); |
190 | } | 189 | } |
191 | result = kernel_sendmsg(sock, &msg, &iov, 1, size); | 190 | result = kernel_sendmsg(sock, &msg, &iov, 1, size); |
192 | if (lo->xmit_timeout) | 191 | if (nbd->xmit_timeout) |
193 | del_timer_sync(&ti); | 192 | del_timer_sync(&ti); |
194 | } else | 193 | } else |
195 | result = kernel_recvmsg(sock, &msg, &iov, 1, size, | 194 | result = kernel_recvmsg(sock, &msg, &iov, 1, size, |
@@ -201,7 +200,7 @@ static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size, | |||
201 | task_pid_nr(current), current->comm, | 200 | task_pid_nr(current), current->comm, |
202 | dequeue_signal_lock(current, ¤t->blocked, &info)); | 201 | dequeue_signal_lock(current, ¤t->blocked, &info)); |
203 | result = -EINTR; | 202 | result = -EINTR; |
204 | sock_shutdown(lo, !send); | 203 | sock_shutdown(nbd, !send); |
205 | break; | 204 | break; |
206 | } | 205 | } |
207 | 206 | ||
@@ -219,18 +218,19 @@ static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size, | |||
219 | return result; | 218 | return result; |
220 | } | 219 | } |
221 | 220 | ||
222 | static inline int sock_send_bvec(struct nbd_device *lo, struct bio_vec *bvec, | 221 | static inline int sock_send_bvec(struct nbd_device *nbd, struct bio_vec *bvec, |
223 | int flags) | 222 | int flags) |
224 | { | 223 | { |
225 | int result; | 224 | int result; |
226 | void *kaddr = kmap(bvec->bv_page); | 225 | void *kaddr = kmap(bvec->bv_page); |
227 | result = sock_xmit(lo, 1, kaddr + bvec->bv_offset, bvec->bv_len, flags); | 226 | result = sock_xmit(nbd, 1, kaddr + bvec->bv_offset, |
227 | bvec->bv_len, flags); | ||
228 | kunmap(bvec->bv_page); | 228 | kunmap(bvec->bv_page); |
229 | return result; | 229 | return result; |
230 | } | 230 | } |
231 | 231 | ||
232 | /* always call with the tx_lock held */ | 232 | /* always call with the tx_lock held */ |
233 | static int nbd_send_req(struct nbd_device *lo, struct request *req) | 233 | static int nbd_send_req(struct nbd_device *nbd, struct request *req) |
234 | { | 234 | { |
235 | int result, flags; | 235 | int result, flags; |
236 | struct nbd_request request; | 236 | struct nbd_request request; |
@@ -243,14 +243,14 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req) | |||
243 | memcpy(request.handle, &req, sizeof(req)); | 243 | memcpy(request.handle, &req, sizeof(req)); |
244 | 244 | ||
245 | dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%uB)\n", | 245 | dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%uB)\n", |
246 | lo->disk->disk_name, req, | 246 | nbd->disk->disk_name, req, |
247 | nbdcmd_to_ascii(nbd_cmd(req)), | 247 | nbdcmd_to_ascii(nbd_cmd(req)), |
248 | (unsigned long long)blk_rq_pos(req) << 9, | 248 | (unsigned long long)blk_rq_pos(req) << 9, |
249 | blk_rq_bytes(req)); | 249 | blk_rq_bytes(req)); |
250 | result = sock_xmit(lo, 1, &request, sizeof(request), | 250 | result = sock_xmit(nbd, 1, &request, sizeof(request), |
251 | (nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0); | 251 | (nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0); |
252 | if (result <= 0) { | 252 | if (result <= 0) { |
253 | dev_err(disk_to_dev(lo->disk), | 253 | dev_err(disk_to_dev(nbd->disk), |
254 | "Send control failed (result %d)\n", result); | 254 | "Send control failed (result %d)\n", result); |
255 | goto error_out; | 255 | goto error_out; |
256 | } | 256 | } |
@@ -267,10 +267,10 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req) | |||
267 | if (!rq_iter_last(req, iter)) | 267 | if (!rq_iter_last(req, iter)) |
268 | flags = MSG_MORE; | 268 | flags = MSG_MORE; |
269 | dprintk(DBG_TX, "%s: request %p: sending %d bytes data\n", | 269 | dprintk(DBG_TX, "%s: request %p: sending %d bytes data\n", |
270 | lo->disk->disk_name, req, bvec->bv_len); | 270 | nbd->disk->disk_name, req, bvec->bv_len); |
271 | result = sock_send_bvec(lo, bvec, flags); | 271 | result = sock_send_bvec(nbd, bvec, flags); |
272 | if (result <= 0) { | 272 | if (result <= 0) { |
273 | dev_err(disk_to_dev(lo->disk), | 273 | dev_err(disk_to_dev(nbd->disk), |
274 | "Send data failed (result %d)\n", | 274 | "Send data failed (result %d)\n", |
275 | result); | 275 | result); |
276 | goto error_out; | 276 | goto error_out; |
@@ -283,25 +283,25 @@ error_out: | |||
283 | return -EIO; | 283 | return -EIO; |
284 | } | 284 | } |
285 | 285 | ||
286 | static struct request *nbd_find_request(struct nbd_device *lo, | 286 | static struct request *nbd_find_request(struct nbd_device *nbd, |
287 | struct request *xreq) | 287 | struct request *xreq) |
288 | { | 288 | { |
289 | struct request *req, *tmp; | 289 | struct request *req, *tmp; |
290 | int err; | 290 | int err; |
291 | 291 | ||
292 | err = wait_event_interruptible(lo->active_wq, lo->active_req != xreq); | 292 | err = wait_event_interruptible(nbd->active_wq, nbd->active_req != xreq); |
293 | if (unlikely(err)) | 293 | if (unlikely(err)) |
294 | goto out; | 294 | goto out; |
295 | 295 | ||
296 | spin_lock(&lo->queue_lock); | 296 | spin_lock(&nbd->queue_lock); |
297 | list_for_each_entry_safe(req, tmp, &lo->queue_head, queuelist) { | 297 | list_for_each_entry_safe(req, tmp, &nbd->queue_head, queuelist) { |
298 | if (req != xreq) | 298 | if (req != xreq) |
299 | continue; | 299 | continue; |
300 | list_del_init(&req->queuelist); | 300 | list_del_init(&req->queuelist); |
301 | spin_unlock(&lo->queue_lock); | 301 | spin_unlock(&nbd->queue_lock); |
302 | return req; | 302 | return req; |
303 | } | 303 | } |
304 | spin_unlock(&lo->queue_lock); | 304 | spin_unlock(&nbd->queue_lock); |
305 | 305 | ||
306 | err = -ENOENT; | 306 | err = -ENOENT; |
307 | 307 | ||
@@ -309,78 +309,78 @@ out: | |||
309 | return ERR_PTR(err); | 309 | return ERR_PTR(err); |
310 | } | 310 | } |
311 | 311 | ||
312 | static inline int sock_recv_bvec(struct nbd_device *lo, struct bio_vec *bvec) | 312 | static inline int sock_recv_bvec(struct nbd_device *nbd, struct bio_vec *bvec) |
313 | { | 313 | { |
314 | int result; | 314 | int result; |
315 | void *kaddr = kmap(bvec->bv_page); | 315 | void *kaddr = kmap(bvec->bv_page); |
316 | result = sock_xmit(lo, 0, kaddr + bvec->bv_offset, bvec->bv_len, | 316 | result = sock_xmit(nbd, 0, kaddr + bvec->bv_offset, bvec->bv_len, |
317 | MSG_WAITALL); | 317 | MSG_WAITALL); |
318 | kunmap(bvec->bv_page); | 318 | kunmap(bvec->bv_page); |
319 | return result; | 319 | return result; |
320 | } | 320 | } |
321 | 321 | ||
322 | /* NULL returned = something went wrong, inform userspace */ | 322 | /* NULL returned = something went wrong, inform userspace */ |
323 | static struct request *nbd_read_stat(struct nbd_device *lo) | 323 | static struct request *nbd_read_stat(struct nbd_device *nbd) |
324 | { | 324 | { |
325 | int result; | 325 | int result; |
326 | struct nbd_reply reply; | 326 | struct nbd_reply reply; |
327 | struct request *req; | 327 | struct request *req; |
328 | 328 | ||
329 | reply.magic = 0; | 329 | reply.magic = 0; |
330 | result = sock_xmit(lo, 0, &reply, sizeof(reply), MSG_WAITALL); | 330 | result = sock_xmit(nbd, 0, &reply, sizeof(reply), MSG_WAITALL); |
331 | if (result <= 0) { | 331 | if (result <= 0) { |
332 | dev_err(disk_to_dev(lo->disk), | 332 | dev_err(disk_to_dev(nbd->disk), |
333 | "Receive control failed (result %d)\n", result); | 333 | "Receive control failed (result %d)\n", result); |
334 | goto harderror; | 334 | goto harderror; |
335 | } | 335 | } |
336 | 336 | ||
337 | if (ntohl(reply.magic) != NBD_REPLY_MAGIC) { | 337 | if (ntohl(reply.magic) != NBD_REPLY_MAGIC) { |
338 | dev_err(disk_to_dev(lo->disk), "Wrong magic (0x%lx)\n", | 338 | dev_err(disk_to_dev(nbd->disk), "Wrong magic (0x%lx)\n", |
339 | (unsigned long)ntohl(reply.magic)); | 339 | (unsigned long)ntohl(reply.magic)); |
340 | result = -EPROTO; | 340 | result = -EPROTO; |
341 | goto harderror; | 341 | goto harderror; |
342 | } | 342 | } |
343 | 343 | ||
344 | req = nbd_find_request(lo, *(struct request **)reply.handle); | 344 | req = nbd_find_request(nbd, *(struct request **)reply.handle); |
345 | if (IS_ERR(req)) { | 345 | if (IS_ERR(req)) { |
346 | result = PTR_ERR(req); | 346 | result = PTR_ERR(req); |
347 | if (result != -ENOENT) | 347 | if (result != -ENOENT) |
348 | goto harderror; | 348 | goto harderror; |
349 | 349 | ||
350 | dev_err(disk_to_dev(lo->disk), "Unexpected reply (%p)\n", | 350 | dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%p)\n", |
351 | reply.handle); | 351 | reply.handle); |
352 | result = -EBADR; | 352 | result = -EBADR; |
353 | goto harderror; | 353 | goto harderror; |
354 | } | 354 | } |
355 | 355 | ||
356 | if (ntohl(reply.error)) { | 356 | if (ntohl(reply.error)) { |
357 | dev_err(disk_to_dev(lo->disk), "Other side returned error (%d)\n", | 357 | dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n", |
358 | ntohl(reply.error)); | 358 | ntohl(reply.error)); |
359 | req->errors++; | 359 | req->errors++; |
360 | return req; | 360 | return req; |
361 | } | 361 | } |
362 | 362 | ||
363 | dprintk(DBG_RX, "%s: request %p: got reply\n", | 363 | dprintk(DBG_RX, "%s: request %p: got reply\n", |
364 | lo->disk->disk_name, req); | 364 | nbd->disk->disk_name, req); |
365 | if (nbd_cmd(req) == NBD_CMD_READ) { | 365 | if (nbd_cmd(req) == NBD_CMD_READ) { |
366 | struct req_iterator iter; | 366 | struct req_iterator iter; |
367 | struct bio_vec *bvec; | 367 | struct bio_vec *bvec; |
368 | 368 | ||
369 | rq_for_each_segment(bvec, req, iter) { | 369 | rq_for_each_segment(bvec, req, iter) { |
370 | result = sock_recv_bvec(lo, bvec); | 370 | result = sock_recv_bvec(nbd, bvec); |
371 | if (result <= 0) { | 371 | if (result <= 0) { |
372 | dev_err(disk_to_dev(lo->disk), "Receive data failed (result %d)\n", | 372 | dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", |
373 | result); | 373 | result); |
374 | req->errors++; | 374 | req->errors++; |
375 | return req; | 375 | return req; |
376 | } | 376 | } |
377 | dprintk(DBG_RX, "%s: request %p: got %d bytes data\n", | 377 | dprintk(DBG_RX, "%s: request %p: got %d bytes data\n", |
378 | lo->disk->disk_name, req, bvec->bv_len); | 378 | nbd->disk->disk_name, req, bvec->bv_len); |
379 | } | 379 | } |
380 | } | 380 | } |
381 | return req; | 381 | return req; |
382 | harderror: | 382 | harderror: |
383 | lo->harderror = result; | 383 | nbd->harderror = result; |
384 | return NULL; | 384 | return NULL; |
385 | } | 385 | } |
386 | 386 | ||
@@ -398,48 +398,48 @@ static struct device_attribute pid_attr = { | |||
398 | .show = pid_show, | 398 | .show = pid_show, |
399 | }; | 399 | }; |
400 | 400 | ||
401 | static int nbd_do_it(struct nbd_device *lo) | 401 | static int nbd_do_it(struct nbd_device *nbd) |
402 | { | 402 | { |
403 | struct request *req; | 403 | struct request *req; |
404 | int ret; | 404 | int ret; |
405 | 405 | ||
406 | BUG_ON(lo->magic != LO_MAGIC); | 406 | BUG_ON(nbd->magic != NBD_MAGIC); |
407 | 407 | ||
408 | lo->pid = task_pid_nr(current); | 408 | nbd->pid = task_pid_nr(current); |
409 | ret = device_create_file(disk_to_dev(lo->disk), &pid_attr); | 409 | ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr); |
410 | if (ret) { | 410 | if (ret) { |
411 | dev_err(disk_to_dev(lo->disk), "device_create_file failed!\n"); | 411 | dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n"); |
412 | lo->pid = 0; | 412 | nbd->pid = 0; |
413 | return ret; | 413 | return ret; |
414 | } | 414 | } |
415 | 415 | ||
416 | while ((req = nbd_read_stat(lo)) != NULL) | 416 | while ((req = nbd_read_stat(nbd)) != NULL) |
417 | nbd_end_request(req); | 417 | nbd_end_request(req); |
418 | 418 | ||
419 | device_remove_file(disk_to_dev(lo->disk), &pid_attr); | 419 | device_remove_file(disk_to_dev(nbd->disk), &pid_attr); |
420 | lo->pid = 0; | 420 | nbd->pid = 0; |
421 | return 0; | 421 | return 0; |
422 | } | 422 | } |
423 | 423 | ||
424 | static void nbd_clear_que(struct nbd_device *lo) | 424 | static void nbd_clear_que(struct nbd_device *nbd) |
425 | { | 425 | { |
426 | struct request *req; | 426 | struct request *req; |
427 | 427 | ||
428 | BUG_ON(lo->magic != LO_MAGIC); | 428 | BUG_ON(nbd->magic != NBD_MAGIC); |
429 | 429 | ||
430 | /* | 430 | /* |
431 | * Because we have set lo->sock to NULL under the tx_lock, all | 431 | * Because we have set nbd->sock to NULL under the tx_lock, all |
432 | * modifications to the list must have completed by now. For | 432 | * modifications to the list must have completed by now. For |
433 | * the same reason, the active_req must be NULL. | 433 | * the same reason, the active_req must be NULL. |
434 | * | 434 | * |
435 | * As a consequence, we don't need to take the spin lock while | 435 | * As a consequence, we don't need to take the spin lock while |
436 | * purging the list here. | 436 | * purging the list here. |
437 | */ | 437 | */ |
438 | BUG_ON(lo->sock); | 438 | BUG_ON(nbd->sock); |
439 | BUG_ON(lo->active_req); | 439 | BUG_ON(nbd->active_req); |
440 | 440 | ||
441 | while (!list_empty(&lo->queue_head)) { | 441 | while (!list_empty(&nbd->queue_head)) { |
442 | req = list_entry(lo->queue_head.next, struct request, | 442 | req = list_entry(nbd->queue_head.next, struct request, |
443 | queuelist); | 443 | queuelist); |
444 | list_del_init(&req->queuelist); | 444 | list_del_init(&req->queuelist); |
445 | req->errors++; | 445 | req->errors++; |
@@ -448,7 +448,7 @@ static void nbd_clear_que(struct nbd_device *lo) | |||
448 | } | 448 | } |
449 | 449 | ||
450 | 450 | ||
451 | static void nbd_handle_req(struct nbd_device *lo, struct request *req) | 451 | static void nbd_handle_req(struct nbd_device *nbd, struct request *req) |
452 | { | 452 | { |
453 | if (req->cmd_type != REQ_TYPE_FS) | 453 | if (req->cmd_type != REQ_TYPE_FS) |
454 | goto error_out; | 454 | goto error_out; |
@@ -456,8 +456,8 @@ static void nbd_handle_req(struct nbd_device *lo, struct request *req) | |||
456 | nbd_cmd(req) = NBD_CMD_READ; | 456 | nbd_cmd(req) = NBD_CMD_READ; |
457 | if (rq_data_dir(req) == WRITE) { | 457 | if (rq_data_dir(req) == WRITE) { |
458 | nbd_cmd(req) = NBD_CMD_WRITE; | 458 | nbd_cmd(req) = NBD_CMD_WRITE; |
459 | if (lo->flags & NBD_READ_ONLY) { | 459 | if (nbd->flags & NBD_READ_ONLY) { |
460 | dev_err(disk_to_dev(lo->disk), | 460 | dev_err(disk_to_dev(nbd->disk), |
461 | "Write on read-only\n"); | 461 | "Write on read-only\n"); |
462 | goto error_out; | 462 | goto error_out; |
463 | } | 463 | } |
@@ -465,29 +465,29 @@ static void nbd_handle_req(struct nbd_device *lo, struct request *req) | |||
465 | 465 | ||
466 | req->errors = 0; | 466 | req->errors = 0; |
467 | 467 | ||
468 | mutex_lock(&lo->tx_lock); | 468 | mutex_lock(&nbd->tx_lock); |
469 | if (unlikely(!lo->sock)) { | 469 | if (unlikely(!nbd->sock)) { |
470 | mutex_unlock(&lo->tx_lock); | 470 | mutex_unlock(&nbd->tx_lock); |
471 | dev_err(disk_to_dev(lo->disk), | 471 | dev_err(disk_to_dev(nbd->disk), |
472 | "Attempted send on closed socket\n"); | 472 | "Attempted send on closed socket\n"); |
473 | goto error_out; | 473 | goto error_out; |
474 | } | 474 | } |
475 | 475 | ||
476 | lo->active_req = req; | 476 | nbd->active_req = req; |
477 | 477 | ||
478 | if (nbd_send_req(lo, req) != 0) { | 478 | if (nbd_send_req(nbd, req) != 0) { |
479 | dev_err(disk_to_dev(lo->disk), "Request send failed\n"); | 479 | dev_err(disk_to_dev(nbd->disk), "Request send failed\n"); |
480 | req->errors++; | 480 | req->errors++; |
481 | nbd_end_request(req); | 481 | nbd_end_request(req); |
482 | } else { | 482 | } else { |
483 | spin_lock(&lo->queue_lock); | 483 | spin_lock(&nbd->queue_lock); |
484 | list_add(&req->queuelist, &lo->queue_head); | 484 | list_add(&req->queuelist, &nbd->queue_head); |
485 | spin_unlock(&lo->queue_lock); | 485 | spin_unlock(&nbd->queue_lock); |
486 | } | 486 | } |
487 | 487 | ||
488 | lo->active_req = NULL; | 488 | nbd->active_req = NULL; |
489 | mutex_unlock(&lo->tx_lock); | 489 | mutex_unlock(&nbd->tx_lock); |
490 | wake_up_all(&lo->active_wq); | 490 | wake_up_all(&nbd->active_wq); |
491 | 491 | ||
492 | return; | 492 | return; |
493 | 493 | ||
@@ -498,28 +498,28 @@ error_out: | |||
498 | 498 | ||
499 | static int nbd_thread(void *data) | 499 | static int nbd_thread(void *data) |
500 | { | 500 | { |
501 | struct nbd_device *lo = data; | 501 | struct nbd_device *nbd = data; |
502 | struct request *req; | 502 | struct request *req; |
503 | 503 | ||
504 | set_user_nice(current, -20); | 504 | set_user_nice(current, -20); |
505 | while (!kthread_should_stop() || !list_empty(&lo->waiting_queue)) { | 505 | while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) { |
506 | /* wait for something to do */ | 506 | /* wait for something to do */ |
507 | wait_event_interruptible(lo->waiting_wq, | 507 | wait_event_interruptible(nbd->waiting_wq, |
508 | kthread_should_stop() || | 508 | kthread_should_stop() || |
509 | !list_empty(&lo->waiting_queue)); | 509 | !list_empty(&nbd->waiting_queue)); |
510 | 510 | ||
511 | /* extract request */ | 511 | /* extract request */ |
512 | if (list_empty(&lo->waiting_queue)) | 512 | if (list_empty(&nbd->waiting_queue)) |
513 | continue; | 513 | continue; |
514 | 514 | ||
515 | spin_lock_irq(&lo->queue_lock); | 515 | spin_lock_irq(&nbd->queue_lock); |
516 | req = list_entry(lo->waiting_queue.next, struct request, | 516 | req = list_entry(nbd->waiting_queue.next, struct request, |
517 | queuelist); | 517 | queuelist); |
518 | list_del_init(&req->queuelist); | 518 | list_del_init(&req->queuelist); |
519 | spin_unlock_irq(&lo->queue_lock); | 519 | spin_unlock_irq(&nbd->queue_lock); |
520 | 520 | ||
521 | /* handle request */ | 521 | /* handle request */ |
522 | nbd_handle_req(lo, req); | 522 | nbd_handle_req(nbd, req); |
523 | } | 523 | } |
524 | return 0; | 524 | return 0; |
525 | } | 525 | } |
@@ -527,7 +527,7 @@ static int nbd_thread(void *data) | |||
527 | /* | 527 | /* |
528 | * We always wait for result of write, for now. It would be nice to make it optional | 528 | * We always wait for result of write, for now. It would be nice to make it optional |
529 | * in future | 529 | * in future |
530 | * if ((rq_data_dir(req) == WRITE) && (lo->flags & NBD_WRITE_NOCHK)) | 530 | * if ((rq_data_dir(req) == WRITE) && (nbd->flags & NBD_WRITE_NOCHK)) |
531 | * { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); } | 531 | * { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); } |
532 | */ | 532 | */ |
533 | 533 | ||
@@ -536,19 +536,19 @@ static void do_nbd_request(struct request_queue *q) | |||
536 | struct request *req; | 536 | struct request *req; |
537 | 537 | ||
538 | while ((req = blk_fetch_request(q)) != NULL) { | 538 | while ((req = blk_fetch_request(q)) != NULL) { |
539 | struct nbd_device *lo; | 539 | struct nbd_device *nbd; |
540 | 540 | ||
541 | spin_unlock_irq(q->queue_lock); | 541 | spin_unlock_irq(q->queue_lock); |
542 | 542 | ||
543 | dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n", | 543 | dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n", |
544 | req->rq_disk->disk_name, req, req->cmd_type); | 544 | req->rq_disk->disk_name, req, req->cmd_type); |
545 | 545 | ||
546 | lo = req->rq_disk->private_data; | 546 | nbd = req->rq_disk->private_data; |
547 | 547 | ||
548 | BUG_ON(lo->magic != LO_MAGIC); | 548 | BUG_ON(nbd->magic != NBD_MAGIC); |
549 | 549 | ||
550 | if (unlikely(!lo->sock)) { | 550 | if (unlikely(!nbd->sock)) { |
551 | dev_err(disk_to_dev(lo->disk), | 551 | dev_err(disk_to_dev(nbd->disk), |
552 | "Attempted send on closed socket\n"); | 552 | "Attempted send on closed socket\n"); |
553 | req->errors++; | 553 | req->errors++; |
554 | nbd_end_request(req); | 554 | nbd_end_request(req); |
@@ -556,11 +556,11 @@ static void do_nbd_request(struct request_queue *q) | |||
556 | continue; | 556 | continue; |
557 | } | 557 | } |
558 | 558 | ||
559 | spin_lock_irq(&lo->queue_lock); | 559 | spin_lock_irq(&nbd->queue_lock); |
560 | list_add_tail(&req->queuelist, &lo->waiting_queue); | 560 | list_add_tail(&req->queuelist, &nbd->waiting_queue); |
561 | spin_unlock_irq(&lo->queue_lock); | 561 | spin_unlock_irq(&nbd->queue_lock); |
562 | 562 | ||
563 | wake_up(&lo->waiting_wq); | 563 | wake_up(&nbd->waiting_wq); |
564 | 564 | ||
565 | spin_lock_irq(q->queue_lock); | 565 | spin_lock_irq(q->queue_lock); |
566 | } | 566 | } |
@@ -568,32 +568,32 @@ static void do_nbd_request(struct request_queue *q) | |||
568 | 568 | ||
569 | /* Must be called with tx_lock held */ | 569 | /* Must be called with tx_lock held */ |
570 | 570 | ||
571 | static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo, | 571 | static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, |
572 | unsigned int cmd, unsigned long arg) | 572 | unsigned int cmd, unsigned long arg) |
573 | { | 573 | { |
574 | switch (cmd) { | 574 | switch (cmd) { |
575 | case NBD_DISCONNECT: { | 575 | case NBD_DISCONNECT: { |
576 | struct request sreq; | 576 | struct request sreq; |
577 | 577 | ||
578 | dev_info(disk_to_dev(lo->disk), "NBD_DISCONNECT\n"); | 578 | dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n"); |
579 | 579 | ||
580 | blk_rq_init(NULL, &sreq); | 580 | blk_rq_init(NULL, &sreq); |
581 | sreq.cmd_type = REQ_TYPE_SPECIAL; | 581 | sreq.cmd_type = REQ_TYPE_SPECIAL; |
582 | nbd_cmd(&sreq) = NBD_CMD_DISC; | 582 | nbd_cmd(&sreq) = NBD_CMD_DISC; |
583 | if (!lo->sock) | 583 | if (!nbd->sock) |
584 | return -EINVAL; | 584 | return -EINVAL; |
585 | nbd_send_req(lo, &sreq); | 585 | nbd_send_req(nbd, &sreq); |
586 | return 0; | 586 | return 0; |
587 | } | 587 | } |
588 | 588 | ||
589 | case NBD_CLEAR_SOCK: { | 589 | case NBD_CLEAR_SOCK: { |
590 | struct file *file; | 590 | struct file *file; |
591 | 591 | ||
592 | lo->sock = NULL; | 592 | nbd->sock = NULL; |
593 | file = lo->file; | 593 | file = nbd->file; |
594 | lo->file = NULL; | 594 | nbd->file = NULL; |
595 | nbd_clear_que(lo); | 595 | nbd_clear_que(nbd); |
596 | BUG_ON(!list_empty(&lo->queue_head)); | 596 | BUG_ON(!list_empty(&nbd->queue_head)); |
597 | if (file) | 597 | if (file) |
598 | fput(file); | 598 | fput(file); |
599 | return 0; | 599 | return 0; |
@@ -601,14 +601,14 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo, | |||
601 | 601 | ||
602 | case NBD_SET_SOCK: { | 602 | case NBD_SET_SOCK: { |
603 | struct file *file; | 603 | struct file *file; |
604 | if (lo->file) | 604 | if (nbd->file) |
605 | return -EBUSY; | 605 | return -EBUSY; |
606 | file = fget(arg); | 606 | file = fget(arg); |
607 | if (file) { | 607 | if (file) { |
608 | struct inode *inode = file->f_path.dentry->d_inode; | 608 | struct inode *inode = file->f_path.dentry->d_inode; |
609 | if (S_ISSOCK(inode->i_mode)) { | 609 | if (S_ISSOCK(inode->i_mode)) { |
610 | lo->file = file; | 610 | nbd->file = file; |
611 | lo->sock = SOCKET_I(inode); | 611 | nbd->sock = SOCKET_I(inode); |
612 | if (max_part > 0) | 612 | if (max_part > 0) |
613 | bdev->bd_invalidated = 1; | 613 | bdev->bd_invalidated = 1; |
614 | return 0; | 614 | return 0; |
@@ -620,29 +620,29 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo, | |||
620 | } | 620 | } |
621 | 621 | ||
622 | case NBD_SET_BLKSIZE: | 622 | case NBD_SET_BLKSIZE: |
623 | lo->blksize = arg; | 623 | nbd->blksize = arg; |
624 | lo->bytesize &= ~(lo->blksize-1); | 624 | nbd->bytesize &= ~(nbd->blksize-1); |
625 | bdev->bd_inode->i_size = lo->bytesize; | 625 | bdev->bd_inode->i_size = nbd->bytesize; |
626 | set_blocksize(bdev, lo->blksize); | 626 | set_blocksize(bdev, nbd->blksize); |
627 | set_capacity(lo->disk, lo->bytesize >> 9); | 627 | set_capacity(nbd->disk, nbd->bytesize >> 9); |
628 | return 0; | 628 | return 0; |
629 | 629 | ||
630 | case NBD_SET_SIZE: | 630 | case NBD_SET_SIZE: |
631 | lo->bytesize = arg & ~(lo->blksize-1); | 631 | nbd->bytesize = arg & ~(nbd->blksize-1); |
632 | bdev->bd_inode->i_size = lo->bytesize; | 632 | bdev->bd_inode->i_size = nbd->bytesize; |
633 | set_blocksize(bdev, lo->blksize); | 633 | set_blocksize(bdev, nbd->blksize); |
634 | set_capacity(lo->disk, lo->bytesize >> 9); | 634 | set_capacity(nbd->disk, nbd->bytesize >> 9); |
635 | return 0; | 635 | return 0; |
636 | 636 | ||
637 | case NBD_SET_TIMEOUT: | 637 | case NBD_SET_TIMEOUT: |
638 | lo->xmit_timeout = arg * HZ; | 638 | nbd->xmit_timeout = arg * HZ; |
639 | return 0; | 639 | return 0; |
640 | 640 | ||
641 | case NBD_SET_SIZE_BLOCKS: | 641 | case NBD_SET_SIZE_BLOCKS: |
642 | lo->bytesize = ((u64) arg) * lo->blksize; | 642 | nbd->bytesize = ((u64) arg) * nbd->blksize; |
643 | bdev->bd_inode->i_size = lo->bytesize; | 643 | bdev->bd_inode->i_size = nbd->bytesize; |
644 | set_blocksize(bdev, lo->blksize); | 644 | set_blocksize(bdev, nbd->blksize); |
645 | set_capacity(lo->disk, lo->bytesize >> 9); | 645 | set_capacity(nbd->disk, nbd->bytesize >> 9); |
646 | return 0; | 646 | return 0; |
647 | 647 | ||
648 | case NBD_DO_IT: { | 648 | case NBD_DO_IT: { |
@@ -650,38 +650,38 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo, | |||
650 | struct file *file; | 650 | struct file *file; |
651 | int error; | 651 | int error; |
652 | 652 | ||
653 | if (lo->pid) | 653 | if (nbd->pid) |
654 | return -EBUSY; | 654 | return -EBUSY; |
655 | if (!lo->file) | 655 | if (!nbd->file) |
656 | return -EINVAL; | 656 | return -EINVAL; |
657 | 657 | ||
658 | mutex_unlock(&lo->tx_lock); | 658 | mutex_unlock(&nbd->tx_lock); |
659 | 659 | ||
660 | thread = kthread_create(nbd_thread, lo, lo->disk->disk_name); | 660 | thread = kthread_create(nbd_thread, nbd, nbd->disk->disk_name); |
661 | if (IS_ERR(thread)) { | 661 | if (IS_ERR(thread)) { |
662 | mutex_lock(&lo->tx_lock); | 662 | mutex_lock(&nbd->tx_lock); |
663 | return PTR_ERR(thread); | 663 | return PTR_ERR(thread); |
664 | } | 664 | } |
665 | wake_up_process(thread); | 665 | wake_up_process(thread); |
666 | error = nbd_do_it(lo); | 666 | error = nbd_do_it(nbd); |
667 | kthread_stop(thread); | 667 | kthread_stop(thread); |
668 | 668 | ||
669 | mutex_lock(&lo->tx_lock); | 669 | mutex_lock(&nbd->tx_lock); |
670 | if (error) | 670 | if (error) |
671 | return error; | 671 | return error; |
672 | sock_shutdown(lo, 0); | 672 | sock_shutdown(nbd, 0); |
673 | file = lo->file; | 673 | file = nbd->file; |
674 | lo->file = NULL; | 674 | nbd->file = NULL; |
675 | nbd_clear_que(lo); | 675 | nbd_clear_que(nbd); |
676 | dev_warn(disk_to_dev(lo->disk), "queue cleared\n"); | 676 | dev_warn(disk_to_dev(nbd->disk), "queue cleared\n"); |
677 | if (file) | 677 | if (file) |
678 | fput(file); | 678 | fput(file); |
679 | lo->bytesize = 0; | 679 | nbd->bytesize = 0; |
680 | bdev->bd_inode->i_size = 0; | 680 | bdev->bd_inode->i_size = 0; |
681 | set_capacity(lo->disk, 0); | 681 | set_capacity(nbd->disk, 0); |
682 | if (max_part > 0) | 682 | if (max_part > 0) |
683 | ioctl_by_bdev(bdev, BLKRRPART, 0); | 683 | ioctl_by_bdev(bdev, BLKRRPART, 0); |
684 | return lo->harderror; | 684 | return nbd->harderror; |
685 | } | 685 | } |
686 | 686 | ||
687 | case NBD_CLEAR_QUE: | 687 | case NBD_CLEAR_QUE: |
@@ -689,14 +689,14 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo, | |||
689 | * This is for compatibility only. The queue is always cleared | 689 | * This is for compatibility only. The queue is always cleared |
690 | * by NBD_DO_IT or NBD_CLEAR_SOCK. | 690 | * by NBD_DO_IT or NBD_CLEAR_SOCK. |
691 | */ | 691 | */ |
692 | BUG_ON(!lo->sock && !list_empty(&lo->queue_head)); | 692 | BUG_ON(!nbd->sock && !list_empty(&nbd->queue_head)); |
693 | return 0; | 693 | return 0; |
694 | 694 | ||
695 | case NBD_PRINT_DEBUG: | 695 | case NBD_PRINT_DEBUG: |
696 | dev_info(disk_to_dev(lo->disk), | 696 | dev_info(disk_to_dev(nbd->disk), |
697 | "next = %p, prev = %p, head = %p\n", | 697 | "next = %p, prev = %p, head = %p\n", |
698 | lo->queue_head.next, lo->queue_head.prev, | 698 | nbd->queue_head.next, nbd->queue_head.prev, |
699 | &lo->queue_head); | 699 | &nbd->queue_head); |
700 | return 0; | 700 | return 0; |
701 | } | 701 | } |
702 | return -ENOTTY; | 702 | return -ENOTTY; |
@@ -705,21 +705,21 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo, | |||
705 | static int nbd_ioctl(struct block_device *bdev, fmode_t mode, | 705 | static int nbd_ioctl(struct block_device *bdev, fmode_t mode, |
706 | unsigned int cmd, unsigned long arg) | 706 | unsigned int cmd, unsigned long arg) |
707 | { | 707 | { |
708 | struct nbd_device *lo = bdev->bd_disk->private_data; | 708 | struct nbd_device *nbd = bdev->bd_disk->private_data; |
709 | int error; | 709 | int error; |
710 | 710 | ||
711 | if (!capable(CAP_SYS_ADMIN)) | 711 | if (!capable(CAP_SYS_ADMIN)) |
712 | return -EPERM; | 712 | return -EPERM; |
713 | 713 | ||
714 | BUG_ON(lo->magic != LO_MAGIC); | 714 | BUG_ON(nbd->magic != NBD_MAGIC); |
715 | 715 | ||
716 | /* Anyone capable of this syscall can do *real bad* things */ | 716 | /* Anyone capable of this syscall can do *real bad* things */ |
717 | dprintk(DBG_IOCTL, "%s: nbd_ioctl cmd=%s(0x%x) arg=%lu\n", | 717 | dprintk(DBG_IOCTL, "%s: nbd_ioctl cmd=%s(0x%x) arg=%lu\n", |
718 | lo->disk->disk_name, ioctl_cmd_to_ascii(cmd), cmd, arg); | 718 | nbd->disk->disk_name, ioctl_cmd_to_ascii(cmd), cmd, arg); |
719 | 719 | ||
720 | mutex_lock(&lo->tx_lock); | 720 | mutex_lock(&nbd->tx_lock); |
721 | error = __nbd_ioctl(bdev, lo, cmd, arg); | 721 | error = __nbd_ioctl(bdev, nbd, cmd, arg); |
722 | mutex_unlock(&lo->tx_lock); | 722 | mutex_unlock(&nbd->tx_lock); |
723 | 723 | ||
724 | return error; | 724 | return error; |
725 | } | 725 | } |
@@ -805,7 +805,7 @@ static int __init nbd_init(void) | |||
805 | for (i = 0; i < nbds_max; i++) { | 805 | for (i = 0; i < nbds_max; i++) { |
806 | struct gendisk *disk = nbd_dev[i].disk; | 806 | struct gendisk *disk = nbd_dev[i].disk; |
807 | nbd_dev[i].file = NULL; | 807 | nbd_dev[i].file = NULL; |
808 | nbd_dev[i].magic = LO_MAGIC; | 808 | nbd_dev[i].magic = NBD_MAGIC; |
809 | nbd_dev[i].flags = 0; | 809 | nbd_dev[i].flags = 0; |
810 | INIT_LIST_HEAD(&nbd_dev[i].waiting_queue); | 810 | INIT_LIST_HEAD(&nbd_dev[i].waiting_queue); |
811 | spin_lock_init(&nbd_dev[i].queue_lock); | 811 | spin_lock_init(&nbd_dev[i].queue_lock); |
diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c new file mode 100644 index 000000000000..38a2d0631882 --- /dev/null +++ b/drivers/block/nvme.c | |||
@@ -0,0 +1,1740 @@ | |||
1 | /* | ||
2 | * NVM Express device driver | ||
3 | * Copyright (c) 2011, Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License along with | ||
15 | * this program; if not, write to the Free Software Foundation, Inc., | ||
16 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
17 | */ | ||
18 | |||
19 | #include <linux/nvme.h> | ||
20 | #include <linux/bio.h> | ||
21 | #include <linux/bitops.h> | ||
22 | #include <linux/blkdev.h> | ||
23 | #include <linux/delay.h> | ||
24 | #include <linux/errno.h> | ||
25 | #include <linux/fs.h> | ||
26 | #include <linux/genhd.h> | ||
27 | #include <linux/idr.h> | ||
28 | #include <linux/init.h> | ||
29 | #include <linux/interrupt.h> | ||
30 | #include <linux/io.h> | ||
31 | #include <linux/kdev_t.h> | ||
32 | #include <linux/kthread.h> | ||
33 | #include <linux/kernel.h> | ||
34 | #include <linux/mm.h> | ||
35 | #include <linux/module.h> | ||
36 | #include <linux/moduleparam.h> | ||
37 | #include <linux/pci.h> | ||
38 | #include <linux/poison.h> | ||
39 | #include <linux/sched.h> | ||
40 | #include <linux/slab.h> | ||
41 | #include <linux/types.h> | ||
42 | |||
43 | #include <asm-generic/io-64-nonatomic-lo-hi.h> | ||
44 | |||
45 | #define NVME_Q_DEPTH 1024 | ||
46 | #define SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) | ||
47 | #define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) | ||
48 | #define NVME_MINORS 64 | ||
49 | #define NVME_IO_TIMEOUT (5 * HZ) | ||
50 | #define ADMIN_TIMEOUT (60 * HZ) | ||
51 | |||
52 | static int nvme_major; | ||
53 | module_param(nvme_major, int, 0); | ||
54 | |||
55 | static int use_threaded_interrupts; | ||
56 | module_param(use_threaded_interrupts, int, 0); | ||
57 | |||
58 | static DEFINE_SPINLOCK(dev_list_lock); | ||
59 | static LIST_HEAD(dev_list); | ||
60 | static struct task_struct *nvme_thread; | ||
61 | |||
62 | /* | ||
63 | * Represents an NVM Express device. Each nvme_dev is a PCI function. | ||
64 | */ | ||
65 | struct nvme_dev { | ||
66 | struct list_head node; | ||
67 | struct nvme_queue **queues; | ||
68 | u32 __iomem *dbs; | ||
69 | struct pci_dev *pci_dev; | ||
70 | struct dma_pool *prp_page_pool; | ||
71 | struct dma_pool *prp_small_pool; | ||
72 | int instance; | ||
73 | int queue_count; | ||
74 | int db_stride; | ||
75 | u32 ctrl_config; | ||
76 | struct msix_entry *entry; | ||
77 | struct nvme_bar __iomem *bar; | ||
78 | struct list_head namespaces; | ||
79 | char serial[20]; | ||
80 | char model[40]; | ||
81 | char firmware_rev[8]; | ||
82 | }; | ||
83 | |||
84 | /* | ||
85 | * An NVM Express namespace is equivalent to a SCSI LUN | ||
86 | */ | ||
87 | struct nvme_ns { | ||
88 | struct list_head list; | ||
89 | |||
90 | struct nvme_dev *dev; | ||
91 | struct request_queue *queue; | ||
92 | struct gendisk *disk; | ||
93 | |||
94 | int ns_id; | ||
95 | int lba_shift; | ||
96 | }; | ||
97 | |||
98 | /* | ||
99 | * An NVM Express queue. Each device has at least two (one for admin | ||
100 | * commands and one for I/O commands). | ||
101 | */ | ||
102 | struct nvme_queue { | ||
103 | struct device *q_dmadev; | ||
104 | struct nvme_dev *dev; | ||
105 | spinlock_t q_lock; | ||
106 | struct nvme_command *sq_cmds; | ||
107 | volatile struct nvme_completion *cqes; | ||
108 | dma_addr_t sq_dma_addr; | ||
109 | dma_addr_t cq_dma_addr; | ||
110 | wait_queue_head_t sq_full; | ||
111 | wait_queue_t sq_cong_wait; | ||
112 | struct bio_list sq_cong; | ||
113 | u32 __iomem *q_db; | ||
114 | u16 q_depth; | ||
115 | u16 cq_vector; | ||
116 | u16 sq_head; | ||
117 | u16 sq_tail; | ||
118 | u16 cq_head; | ||
119 | u16 cq_phase; | ||
120 | unsigned long cmdid_data[]; | ||
121 | }; | ||
122 | |||
123 | /* | ||
124 | * Check we didin't inadvertently grow the command struct | ||
125 | */ | ||
126 | static inline void _nvme_check_size(void) | ||
127 | { | ||
128 | BUILD_BUG_ON(sizeof(struct nvme_rw_command) != 64); | ||
129 | BUILD_BUG_ON(sizeof(struct nvme_create_cq) != 64); | ||
130 | BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64); | ||
131 | BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64); | ||
132 | BUILD_BUG_ON(sizeof(struct nvme_features) != 64); | ||
133 | BUILD_BUG_ON(sizeof(struct nvme_command) != 64); | ||
134 | BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != 4096); | ||
135 | BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096); | ||
136 | BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64); | ||
137 | } | ||
138 | |||
139 | typedef void (*nvme_completion_fn)(struct nvme_dev *, void *, | ||
140 | struct nvme_completion *); | ||
141 | |||
142 | struct nvme_cmd_info { | ||
143 | nvme_completion_fn fn; | ||
144 | void *ctx; | ||
145 | unsigned long timeout; | ||
146 | }; | ||
147 | |||
148 | static struct nvme_cmd_info *nvme_cmd_info(struct nvme_queue *nvmeq) | ||
149 | { | ||
150 | return (void *)&nvmeq->cmdid_data[BITS_TO_LONGS(nvmeq->q_depth)]; | ||
151 | } | ||
152 | |||
153 | /** | ||
154 | * alloc_cmdid() - Allocate a Command ID | ||
155 | * @nvmeq: The queue that will be used for this command | ||
156 | * @ctx: A pointer that will be passed to the handler | ||
157 | * @handler: The function to call on completion | ||
158 | * | ||
159 | * Allocate a Command ID for a queue. The data passed in will | ||
160 | * be passed to the completion handler. This is implemented by using | ||
161 | * the bottom two bits of the ctx pointer to store the handler ID. | ||
162 | * Passing in a pointer that's not 4-byte aligned will cause a BUG. | ||
163 | * We can change this if it becomes a problem. | ||
164 | * | ||
165 | * May be called with local interrupts disabled and the q_lock held, | ||
166 | * or with interrupts enabled and no locks held. | ||
167 | */ | ||
168 | static int alloc_cmdid(struct nvme_queue *nvmeq, void *ctx, | ||
169 | nvme_completion_fn handler, unsigned timeout) | ||
170 | { | ||
171 | int depth = nvmeq->q_depth - 1; | ||
172 | struct nvme_cmd_info *info = nvme_cmd_info(nvmeq); | ||
173 | int cmdid; | ||
174 | |||
175 | do { | ||
176 | cmdid = find_first_zero_bit(nvmeq->cmdid_data, depth); | ||
177 | if (cmdid >= depth) | ||
178 | return -EBUSY; | ||
179 | } while (test_and_set_bit(cmdid, nvmeq->cmdid_data)); | ||
180 | |||
181 | info[cmdid].fn = handler; | ||
182 | info[cmdid].ctx = ctx; | ||
183 | info[cmdid].timeout = jiffies + timeout; | ||
184 | return cmdid; | ||
185 | } | ||
186 | |||
187 | static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx, | ||
188 | nvme_completion_fn handler, unsigned timeout) | ||
189 | { | ||
190 | int cmdid; | ||
191 | wait_event_killable(nvmeq->sq_full, | ||
192 | (cmdid = alloc_cmdid(nvmeq, ctx, handler, timeout)) >= 0); | ||
193 | return (cmdid < 0) ? -EINTR : cmdid; | ||
194 | } | ||
195 | |||
196 | /* Special values must be less than 0x1000 */ | ||
197 | #define CMD_CTX_BASE ((void *)POISON_POINTER_DELTA) | ||
198 | #define CMD_CTX_CANCELLED (0x30C + CMD_CTX_BASE) | ||
199 | #define CMD_CTX_COMPLETED (0x310 + CMD_CTX_BASE) | ||
200 | #define CMD_CTX_INVALID (0x314 + CMD_CTX_BASE) | ||
201 | #define CMD_CTX_FLUSH (0x318 + CMD_CTX_BASE) | ||
202 | |||
203 | static void special_completion(struct nvme_dev *dev, void *ctx, | ||
204 | struct nvme_completion *cqe) | ||
205 | { | ||
206 | if (ctx == CMD_CTX_CANCELLED) | ||
207 | return; | ||
208 | if (ctx == CMD_CTX_FLUSH) | ||
209 | return; | ||
210 | if (ctx == CMD_CTX_COMPLETED) { | ||
211 | dev_warn(&dev->pci_dev->dev, | ||
212 | "completed id %d twice on queue %d\n", | ||
213 | cqe->command_id, le16_to_cpup(&cqe->sq_id)); | ||
214 | return; | ||
215 | } | ||
216 | if (ctx == CMD_CTX_INVALID) { | ||
217 | dev_warn(&dev->pci_dev->dev, | ||
218 | "invalid id %d completed on queue %d\n", | ||
219 | cqe->command_id, le16_to_cpup(&cqe->sq_id)); | ||
220 | return; | ||
221 | } | ||
222 | |||
223 | dev_warn(&dev->pci_dev->dev, "Unknown special completion %p\n", ctx); | ||
224 | } | ||
225 | |||
226 | /* | ||
227 | * Called with local interrupts disabled and the q_lock held. May not sleep. | ||
228 | */ | ||
229 | static void *free_cmdid(struct nvme_queue *nvmeq, int cmdid, | ||
230 | nvme_completion_fn *fn) | ||
231 | { | ||
232 | void *ctx; | ||
233 | struct nvme_cmd_info *info = nvme_cmd_info(nvmeq); | ||
234 | |||
235 | if (cmdid >= nvmeq->q_depth) { | ||
236 | *fn = special_completion; | ||
237 | return CMD_CTX_INVALID; | ||
238 | } | ||
239 | *fn = info[cmdid].fn; | ||
240 | ctx = info[cmdid].ctx; | ||
241 | info[cmdid].fn = special_completion; | ||
242 | info[cmdid].ctx = CMD_CTX_COMPLETED; | ||
243 | clear_bit(cmdid, nvmeq->cmdid_data); | ||
244 | wake_up(&nvmeq->sq_full); | ||
245 | return ctx; | ||
246 | } | ||
247 | |||
248 | static void *cancel_cmdid(struct nvme_queue *nvmeq, int cmdid, | ||
249 | nvme_completion_fn *fn) | ||
250 | { | ||
251 | void *ctx; | ||
252 | struct nvme_cmd_info *info = nvme_cmd_info(nvmeq); | ||
253 | if (fn) | ||
254 | *fn = info[cmdid].fn; | ||
255 | ctx = info[cmdid].ctx; | ||
256 | info[cmdid].fn = special_completion; | ||
257 | info[cmdid].ctx = CMD_CTX_CANCELLED; | ||
258 | return ctx; | ||
259 | } | ||
260 | |||
261 | static struct nvme_queue *get_nvmeq(struct nvme_dev *dev) | ||
262 | { | ||
263 | return dev->queues[get_cpu() + 1]; | ||
264 | } | ||
265 | |||
266 | static void put_nvmeq(struct nvme_queue *nvmeq) | ||
267 | { | ||
268 | put_cpu(); | ||
269 | } | ||
270 | |||
271 | /** | ||
272 | * nvme_submit_cmd() - Copy a command into a queue and ring the doorbell | ||
273 | * @nvmeq: The queue to use | ||
274 | * @cmd: The command to send | ||
275 | * | ||
276 | * Safe to use from interrupt context | ||
277 | */ | ||
278 | static int nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd) | ||
279 | { | ||
280 | unsigned long flags; | ||
281 | u16 tail; | ||
282 | spin_lock_irqsave(&nvmeq->q_lock, flags); | ||
283 | tail = nvmeq->sq_tail; | ||
284 | memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd)); | ||
285 | if (++tail == nvmeq->q_depth) | ||
286 | tail = 0; | ||
287 | writel(tail, nvmeq->q_db); | ||
288 | nvmeq->sq_tail = tail; | ||
289 | spin_unlock_irqrestore(&nvmeq->q_lock, flags); | ||
290 | |||
291 | return 0; | ||
292 | } | ||
293 | |||
294 | /* | ||
295 | * The nvme_iod describes the data in an I/O, including the list of PRP | ||
296 | * entries. You can't see it in this data structure because C doesn't let | ||
297 | * me express that. Use nvme_alloc_iod to ensure there's enough space | ||
298 | * allocated to store the PRP list. | ||
299 | */ | ||
300 | struct nvme_iod { | ||
301 | void *private; /* For the use of the submitter of the I/O */ | ||
302 | int npages; /* In the PRP list. 0 means small pool in use */ | ||
303 | int offset; /* Of PRP list */ | ||
304 | int nents; /* Used in scatterlist */ | ||
305 | int length; /* Of data, in bytes */ | ||
306 | dma_addr_t first_dma; | ||
307 | struct scatterlist sg[0]; | ||
308 | }; | ||
309 | |||
310 | static __le64 **iod_list(struct nvme_iod *iod) | ||
311 | { | ||
312 | return ((void *)iod) + iod->offset; | ||
313 | } | ||
314 | |||
315 | /* | ||
316 | * Will slightly overestimate the number of pages needed. This is OK | ||
317 | * as it only leads to a small amount of wasted memory for the lifetime of | ||
318 | * the I/O. | ||
319 | */ | ||
320 | static int nvme_npages(unsigned size) | ||
321 | { | ||
322 | unsigned nprps = DIV_ROUND_UP(size + PAGE_SIZE, PAGE_SIZE); | ||
323 | return DIV_ROUND_UP(8 * nprps, PAGE_SIZE - 8); | ||
324 | } | ||
325 | |||
326 | static struct nvme_iod * | ||
327 | nvme_alloc_iod(unsigned nseg, unsigned nbytes, gfp_t gfp) | ||
328 | { | ||
329 | struct nvme_iod *iod = kmalloc(sizeof(struct nvme_iod) + | ||
330 | sizeof(__le64 *) * nvme_npages(nbytes) + | ||
331 | sizeof(struct scatterlist) * nseg, gfp); | ||
332 | |||
333 | if (iod) { | ||
334 | iod->offset = offsetof(struct nvme_iod, sg[nseg]); | ||
335 | iod->npages = -1; | ||
336 | iod->length = nbytes; | ||
337 | } | ||
338 | |||
339 | return iod; | ||
340 | } | ||
341 | |||
342 | static void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod) | ||
343 | { | ||
344 | const int last_prp = PAGE_SIZE / 8 - 1; | ||
345 | int i; | ||
346 | __le64 **list = iod_list(iod); | ||
347 | dma_addr_t prp_dma = iod->first_dma; | ||
348 | |||
349 | if (iod->npages == 0) | ||
350 | dma_pool_free(dev->prp_small_pool, list[0], prp_dma); | ||
351 | for (i = 0; i < iod->npages; i++) { | ||
352 | __le64 *prp_list = list[i]; | ||
353 | dma_addr_t next_prp_dma = le64_to_cpu(prp_list[last_prp]); | ||
354 | dma_pool_free(dev->prp_page_pool, prp_list, prp_dma); | ||
355 | prp_dma = next_prp_dma; | ||
356 | } | ||
357 | kfree(iod); | ||
358 | } | ||
359 | |||
360 | static void requeue_bio(struct nvme_dev *dev, struct bio *bio) | ||
361 | { | ||
362 | struct nvme_queue *nvmeq = get_nvmeq(dev); | ||
363 | if (bio_list_empty(&nvmeq->sq_cong)) | ||
364 | add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait); | ||
365 | bio_list_add(&nvmeq->sq_cong, bio); | ||
366 | put_nvmeq(nvmeq); | ||
367 | wake_up_process(nvme_thread); | ||
368 | } | ||
369 | |||
370 | static void bio_completion(struct nvme_dev *dev, void *ctx, | ||
371 | struct nvme_completion *cqe) | ||
372 | { | ||
373 | struct nvme_iod *iod = ctx; | ||
374 | struct bio *bio = iod->private; | ||
375 | u16 status = le16_to_cpup(&cqe->status) >> 1; | ||
376 | |||
377 | dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents, | ||
378 | bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); | ||
379 | nvme_free_iod(dev, iod); | ||
380 | if (status) { | ||
381 | bio_endio(bio, -EIO); | ||
382 | } else if (bio->bi_vcnt > bio->bi_idx) { | ||
383 | requeue_bio(dev, bio); | ||
384 | } else { | ||
385 | bio_endio(bio, 0); | ||
386 | } | ||
387 | } | ||
388 | |||
389 | /* length is in bytes. gfp flags indicates whether we may sleep. */ | ||
390 | static int nvme_setup_prps(struct nvme_dev *dev, | ||
391 | struct nvme_common_command *cmd, struct nvme_iod *iod, | ||
392 | int total_len, gfp_t gfp) | ||
393 | { | ||
394 | struct dma_pool *pool; | ||
395 | int length = total_len; | ||
396 | struct scatterlist *sg = iod->sg; | ||
397 | int dma_len = sg_dma_len(sg); | ||
398 | u64 dma_addr = sg_dma_address(sg); | ||
399 | int offset = offset_in_page(dma_addr); | ||
400 | __le64 *prp_list; | ||
401 | __le64 **list = iod_list(iod); | ||
402 | dma_addr_t prp_dma; | ||
403 | int nprps, i; | ||
404 | |||
405 | cmd->prp1 = cpu_to_le64(dma_addr); | ||
406 | length -= (PAGE_SIZE - offset); | ||
407 | if (length <= 0) | ||
408 | return total_len; | ||
409 | |||
410 | dma_len -= (PAGE_SIZE - offset); | ||
411 | if (dma_len) { | ||
412 | dma_addr += (PAGE_SIZE - offset); | ||
413 | } else { | ||
414 | sg = sg_next(sg); | ||
415 | dma_addr = sg_dma_address(sg); | ||
416 | dma_len = sg_dma_len(sg); | ||
417 | } | ||
418 | |||
419 | if (length <= PAGE_SIZE) { | ||
420 | cmd->prp2 = cpu_to_le64(dma_addr); | ||
421 | return total_len; | ||
422 | } | ||
423 | |||
424 | nprps = DIV_ROUND_UP(length, PAGE_SIZE); | ||
425 | if (nprps <= (256 / 8)) { | ||
426 | pool = dev->prp_small_pool; | ||
427 | iod->npages = 0; | ||
428 | } else { | ||
429 | pool = dev->prp_page_pool; | ||
430 | iod->npages = 1; | ||
431 | } | ||
432 | |||
433 | prp_list = dma_pool_alloc(pool, gfp, &prp_dma); | ||
434 | if (!prp_list) { | ||
435 | cmd->prp2 = cpu_to_le64(dma_addr); | ||
436 | iod->npages = -1; | ||
437 | return (total_len - length) + PAGE_SIZE; | ||
438 | } | ||
439 | list[0] = prp_list; | ||
440 | iod->first_dma = prp_dma; | ||
441 | cmd->prp2 = cpu_to_le64(prp_dma); | ||
442 | i = 0; | ||
443 | for (;;) { | ||
444 | if (i == PAGE_SIZE / 8) { | ||
445 | __le64 *old_prp_list = prp_list; | ||
446 | prp_list = dma_pool_alloc(pool, gfp, &prp_dma); | ||
447 | if (!prp_list) | ||
448 | return total_len - length; | ||
449 | list[iod->npages++] = prp_list; | ||
450 | prp_list[0] = old_prp_list[i - 1]; | ||
451 | old_prp_list[i - 1] = cpu_to_le64(prp_dma); | ||
452 | i = 1; | ||
453 | } | ||
454 | prp_list[i++] = cpu_to_le64(dma_addr); | ||
455 | dma_len -= PAGE_SIZE; | ||
456 | dma_addr += PAGE_SIZE; | ||
457 | length -= PAGE_SIZE; | ||
458 | if (length <= 0) | ||
459 | break; | ||
460 | if (dma_len > 0) | ||
461 | continue; | ||
462 | BUG_ON(dma_len < 0); | ||
463 | sg = sg_next(sg); | ||
464 | dma_addr = sg_dma_address(sg); | ||
465 | dma_len = sg_dma_len(sg); | ||
466 | } | ||
467 | |||
468 | return total_len; | ||
469 | } | ||
470 | |||
471 | /* NVMe scatterlists require no holes in the virtual address */ | ||
472 | #define BIOVEC_NOT_VIRT_MERGEABLE(vec1, vec2) ((vec2)->bv_offset || \ | ||
473 | (((vec1)->bv_offset + (vec1)->bv_len) % PAGE_SIZE)) | ||
474 | |||
475 | static int nvme_map_bio(struct device *dev, struct nvme_iod *iod, | ||
476 | struct bio *bio, enum dma_data_direction dma_dir, int psegs) | ||
477 | { | ||
478 | struct bio_vec *bvec, *bvprv = NULL; | ||
479 | struct scatterlist *sg = NULL; | ||
480 | int i, old_idx, length = 0, nsegs = 0; | ||
481 | |||
482 | sg_init_table(iod->sg, psegs); | ||
483 | old_idx = bio->bi_idx; | ||
484 | bio_for_each_segment(bvec, bio, i) { | ||
485 | if (bvprv && BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) { | ||
486 | sg->length += bvec->bv_len; | ||
487 | } else { | ||
488 | if (bvprv && BIOVEC_NOT_VIRT_MERGEABLE(bvprv, bvec)) | ||
489 | break; | ||
490 | sg = sg ? sg + 1 : iod->sg; | ||
491 | sg_set_page(sg, bvec->bv_page, bvec->bv_len, | ||
492 | bvec->bv_offset); | ||
493 | nsegs++; | ||
494 | } | ||
495 | length += bvec->bv_len; | ||
496 | bvprv = bvec; | ||
497 | } | ||
498 | bio->bi_idx = i; | ||
499 | iod->nents = nsegs; | ||
500 | sg_mark_end(sg); | ||
501 | if (dma_map_sg(dev, iod->sg, iod->nents, dma_dir) == 0) { | ||
502 | bio->bi_idx = old_idx; | ||
503 | return -ENOMEM; | ||
504 | } | ||
505 | return length; | ||
506 | } | ||
507 | |||
508 | static int nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns, | ||
509 | int cmdid) | ||
510 | { | ||
511 | struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail]; | ||
512 | |||
513 | memset(cmnd, 0, sizeof(*cmnd)); | ||
514 | cmnd->common.opcode = nvme_cmd_flush; | ||
515 | cmnd->common.command_id = cmdid; | ||
516 | cmnd->common.nsid = cpu_to_le32(ns->ns_id); | ||
517 | |||
518 | if (++nvmeq->sq_tail == nvmeq->q_depth) | ||
519 | nvmeq->sq_tail = 0; | ||
520 | writel(nvmeq->sq_tail, nvmeq->q_db); | ||
521 | |||
522 | return 0; | ||
523 | } | ||
524 | |||
525 | static int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns) | ||
526 | { | ||
527 | int cmdid = alloc_cmdid(nvmeq, (void *)CMD_CTX_FLUSH, | ||
528 | special_completion, NVME_IO_TIMEOUT); | ||
529 | if (unlikely(cmdid < 0)) | ||
530 | return cmdid; | ||
531 | |||
532 | return nvme_submit_flush(nvmeq, ns, cmdid); | ||
533 | } | ||
534 | |||
535 | /* | ||
536 | * Called with local interrupts disabled and the q_lock held. May not sleep. | ||
537 | */ | ||
538 | static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns, | ||
539 | struct bio *bio) | ||
540 | { | ||
541 | struct nvme_command *cmnd; | ||
542 | struct nvme_iod *iod; | ||
543 | enum dma_data_direction dma_dir; | ||
544 | int cmdid, length, result = -ENOMEM; | ||
545 | u16 control; | ||
546 | u32 dsmgmt; | ||
547 | int psegs = bio_phys_segments(ns->queue, bio); | ||
548 | |||
549 | if ((bio->bi_rw & REQ_FLUSH) && psegs) { | ||
550 | result = nvme_submit_flush_data(nvmeq, ns); | ||
551 | if (result) | ||
552 | return result; | ||
553 | } | ||
554 | |||
555 | iod = nvme_alloc_iod(psegs, bio->bi_size, GFP_ATOMIC); | ||
556 | if (!iod) | ||
557 | goto nomem; | ||
558 | iod->private = bio; | ||
559 | |||
560 | result = -EBUSY; | ||
561 | cmdid = alloc_cmdid(nvmeq, iod, bio_completion, NVME_IO_TIMEOUT); | ||
562 | if (unlikely(cmdid < 0)) | ||
563 | goto free_iod; | ||
564 | |||
565 | if ((bio->bi_rw & REQ_FLUSH) && !psegs) | ||
566 | return nvme_submit_flush(nvmeq, ns, cmdid); | ||
567 | |||
568 | control = 0; | ||
569 | if (bio->bi_rw & REQ_FUA) | ||
570 | control |= NVME_RW_FUA; | ||
571 | if (bio->bi_rw & (REQ_FAILFAST_DEV | REQ_RAHEAD)) | ||
572 | control |= NVME_RW_LR; | ||
573 | |||
574 | dsmgmt = 0; | ||
575 | if (bio->bi_rw & REQ_RAHEAD) | ||
576 | dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH; | ||
577 | |||
578 | cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail]; | ||
579 | |||
580 | memset(cmnd, 0, sizeof(*cmnd)); | ||
581 | if (bio_data_dir(bio)) { | ||
582 | cmnd->rw.opcode = nvme_cmd_write; | ||
583 | dma_dir = DMA_TO_DEVICE; | ||
584 | } else { | ||
585 | cmnd->rw.opcode = nvme_cmd_read; | ||
586 | dma_dir = DMA_FROM_DEVICE; | ||
587 | } | ||
588 | |||
589 | result = nvme_map_bio(nvmeq->q_dmadev, iod, bio, dma_dir, psegs); | ||
590 | if (result < 0) | ||
591 | goto free_iod; | ||
592 | length = result; | ||
593 | |||
594 | cmnd->rw.command_id = cmdid; | ||
595 | cmnd->rw.nsid = cpu_to_le32(ns->ns_id); | ||
596 | length = nvme_setup_prps(nvmeq->dev, &cmnd->common, iod, length, | ||
597 | GFP_ATOMIC); | ||
598 | cmnd->rw.slba = cpu_to_le64(bio->bi_sector >> (ns->lba_shift - 9)); | ||
599 | cmnd->rw.length = cpu_to_le16((length >> ns->lba_shift) - 1); | ||
600 | cmnd->rw.control = cpu_to_le16(control); | ||
601 | cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt); | ||
602 | |||
603 | bio->bi_sector += length >> 9; | ||
604 | |||
605 | if (++nvmeq->sq_tail == nvmeq->q_depth) | ||
606 | nvmeq->sq_tail = 0; | ||
607 | writel(nvmeq->sq_tail, nvmeq->q_db); | ||
608 | |||
609 | return 0; | ||
610 | |||
611 | free_iod: | ||
612 | nvme_free_iod(nvmeq->dev, iod); | ||
613 | nomem: | ||
614 | return result; | ||
615 | } | ||
616 | |||
617 | static void nvme_make_request(struct request_queue *q, struct bio *bio) | ||
618 | { | ||
619 | struct nvme_ns *ns = q->queuedata; | ||
620 | struct nvme_queue *nvmeq = get_nvmeq(ns->dev); | ||
621 | int result = -EBUSY; | ||
622 | |||
623 | spin_lock_irq(&nvmeq->q_lock); | ||
624 | if (bio_list_empty(&nvmeq->sq_cong)) | ||
625 | result = nvme_submit_bio_queue(nvmeq, ns, bio); | ||
626 | if (unlikely(result)) { | ||
627 | if (bio_list_empty(&nvmeq->sq_cong)) | ||
628 | add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait); | ||
629 | bio_list_add(&nvmeq->sq_cong, bio); | ||
630 | } | ||
631 | |||
632 | spin_unlock_irq(&nvmeq->q_lock); | ||
633 | put_nvmeq(nvmeq); | ||
634 | } | ||
635 | |||
636 | static irqreturn_t nvme_process_cq(struct nvme_queue *nvmeq) | ||
637 | { | ||
638 | u16 head, phase; | ||
639 | |||
640 | head = nvmeq->cq_head; | ||
641 | phase = nvmeq->cq_phase; | ||
642 | |||
643 | for (;;) { | ||
644 | void *ctx; | ||
645 | nvme_completion_fn fn; | ||
646 | struct nvme_completion cqe = nvmeq->cqes[head]; | ||
647 | if ((le16_to_cpu(cqe.status) & 1) != phase) | ||
648 | break; | ||
649 | nvmeq->sq_head = le16_to_cpu(cqe.sq_head); | ||
650 | if (++head == nvmeq->q_depth) { | ||
651 | head = 0; | ||
652 | phase = !phase; | ||
653 | } | ||
654 | |||
655 | ctx = free_cmdid(nvmeq, cqe.command_id, &fn); | ||
656 | fn(nvmeq->dev, ctx, &cqe); | ||
657 | } | ||
658 | |||
659 | /* If the controller ignores the cq head doorbell and continuously | ||
660 | * writes to the queue, it is theoretically possible to wrap around | ||
661 | * the queue twice and mistakenly return IRQ_NONE. Linux only | ||
662 | * requires that 0.1% of your interrupts are handled, so this isn't | ||
663 | * a big problem. | ||
664 | */ | ||
665 | if (head == nvmeq->cq_head && phase == nvmeq->cq_phase) | ||
666 | return IRQ_NONE; | ||
667 | |||
668 | writel(head, nvmeq->q_db + (1 << nvmeq->dev->db_stride)); | ||
669 | nvmeq->cq_head = head; | ||
670 | nvmeq->cq_phase = phase; | ||
671 | |||
672 | return IRQ_HANDLED; | ||
673 | } | ||
674 | |||
675 | static irqreturn_t nvme_irq(int irq, void *data) | ||
676 | { | ||
677 | irqreturn_t result; | ||
678 | struct nvme_queue *nvmeq = data; | ||
679 | spin_lock(&nvmeq->q_lock); | ||
680 | result = nvme_process_cq(nvmeq); | ||
681 | spin_unlock(&nvmeq->q_lock); | ||
682 | return result; | ||
683 | } | ||
684 | |||
685 | static irqreturn_t nvme_irq_check(int irq, void *data) | ||
686 | { | ||
687 | struct nvme_queue *nvmeq = data; | ||
688 | struct nvme_completion cqe = nvmeq->cqes[nvmeq->cq_head]; | ||
689 | if ((le16_to_cpu(cqe.status) & 1) != nvmeq->cq_phase) | ||
690 | return IRQ_NONE; | ||
691 | return IRQ_WAKE_THREAD; | ||
692 | } | ||
693 | |||
694 | static void nvme_abort_command(struct nvme_queue *nvmeq, int cmdid) | ||
695 | { | ||
696 | spin_lock_irq(&nvmeq->q_lock); | ||
697 | cancel_cmdid(nvmeq, cmdid, NULL); | ||
698 | spin_unlock_irq(&nvmeq->q_lock); | ||
699 | } | ||
700 | |||
701 | struct sync_cmd_info { | ||
702 | struct task_struct *task; | ||
703 | u32 result; | ||
704 | int status; | ||
705 | }; | ||
706 | |||
707 | static void sync_completion(struct nvme_dev *dev, void *ctx, | ||
708 | struct nvme_completion *cqe) | ||
709 | { | ||
710 | struct sync_cmd_info *cmdinfo = ctx; | ||
711 | cmdinfo->result = le32_to_cpup(&cqe->result); | ||
712 | cmdinfo->status = le16_to_cpup(&cqe->status) >> 1; | ||
713 | wake_up_process(cmdinfo->task); | ||
714 | } | ||
715 | |||
716 | /* | ||
717 | * Returns 0 on success. If the result is negative, it's a Linux error code; | ||
718 | * if the result is positive, it's an NVM Express status code | ||
719 | */ | ||
720 | static int nvme_submit_sync_cmd(struct nvme_queue *nvmeq, | ||
721 | struct nvme_command *cmd, u32 *result, unsigned timeout) | ||
722 | { | ||
723 | int cmdid; | ||
724 | struct sync_cmd_info cmdinfo; | ||
725 | |||
726 | cmdinfo.task = current; | ||
727 | cmdinfo.status = -EINTR; | ||
728 | |||
729 | cmdid = alloc_cmdid_killable(nvmeq, &cmdinfo, sync_completion, | ||
730 | timeout); | ||
731 | if (cmdid < 0) | ||
732 | return cmdid; | ||
733 | cmd->common.command_id = cmdid; | ||
734 | |||
735 | set_current_state(TASK_KILLABLE); | ||
736 | nvme_submit_cmd(nvmeq, cmd); | ||
737 | schedule(); | ||
738 | |||
739 | if (cmdinfo.status == -EINTR) { | ||
740 | nvme_abort_command(nvmeq, cmdid); | ||
741 | return -EINTR; | ||
742 | } | ||
743 | |||
744 | if (result) | ||
745 | *result = cmdinfo.result; | ||
746 | |||
747 | return cmdinfo.status; | ||
748 | } | ||
749 | |||
750 | static int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd, | ||
751 | u32 *result) | ||
752 | { | ||
753 | return nvme_submit_sync_cmd(dev->queues[0], cmd, result, ADMIN_TIMEOUT); | ||
754 | } | ||
755 | |||
756 | static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) | ||
757 | { | ||
758 | int status; | ||
759 | struct nvme_command c; | ||
760 | |||
761 | memset(&c, 0, sizeof(c)); | ||
762 | c.delete_queue.opcode = opcode; | ||
763 | c.delete_queue.qid = cpu_to_le16(id); | ||
764 | |||
765 | status = nvme_submit_admin_cmd(dev, &c, NULL); | ||
766 | if (status) | ||
767 | return -EIO; | ||
768 | return 0; | ||
769 | } | ||
770 | |||
771 | static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, | ||
772 | struct nvme_queue *nvmeq) | ||
773 | { | ||
774 | int status; | ||
775 | struct nvme_command c; | ||
776 | int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED; | ||
777 | |||
778 | memset(&c, 0, sizeof(c)); | ||
779 | c.create_cq.opcode = nvme_admin_create_cq; | ||
780 | c.create_cq.prp1 = cpu_to_le64(nvmeq->cq_dma_addr); | ||
781 | c.create_cq.cqid = cpu_to_le16(qid); | ||
782 | c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1); | ||
783 | c.create_cq.cq_flags = cpu_to_le16(flags); | ||
784 | c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector); | ||
785 | |||
786 | status = nvme_submit_admin_cmd(dev, &c, NULL); | ||
787 | if (status) | ||
788 | return -EIO; | ||
789 | return 0; | ||
790 | } | ||
791 | |||
792 | static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, | ||
793 | struct nvme_queue *nvmeq) | ||
794 | { | ||
795 | int status; | ||
796 | struct nvme_command c; | ||
797 | int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM; | ||
798 | |||
799 | memset(&c, 0, sizeof(c)); | ||
800 | c.create_sq.opcode = nvme_admin_create_sq; | ||
801 | c.create_sq.prp1 = cpu_to_le64(nvmeq->sq_dma_addr); | ||
802 | c.create_sq.sqid = cpu_to_le16(qid); | ||
803 | c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1); | ||
804 | c.create_sq.sq_flags = cpu_to_le16(flags); | ||
805 | c.create_sq.cqid = cpu_to_le16(qid); | ||
806 | |||
807 | status = nvme_submit_admin_cmd(dev, &c, NULL); | ||
808 | if (status) | ||
809 | return -EIO; | ||
810 | return 0; | ||
811 | } | ||
812 | |||
813 | static int adapter_delete_cq(struct nvme_dev *dev, u16 cqid) | ||
814 | { | ||
815 | return adapter_delete_queue(dev, nvme_admin_delete_cq, cqid); | ||
816 | } | ||
817 | |||
818 | static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid) | ||
819 | { | ||
820 | return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid); | ||
821 | } | ||
822 | |||
823 | static int nvme_identify(struct nvme_dev *dev, unsigned nsid, unsigned cns, | ||
824 | dma_addr_t dma_addr) | ||
825 | { | ||
826 | struct nvme_command c; | ||
827 | |||
828 | memset(&c, 0, sizeof(c)); | ||
829 | c.identify.opcode = nvme_admin_identify; | ||
830 | c.identify.nsid = cpu_to_le32(nsid); | ||
831 | c.identify.prp1 = cpu_to_le64(dma_addr); | ||
832 | c.identify.cns = cpu_to_le32(cns); | ||
833 | |||
834 | return nvme_submit_admin_cmd(dev, &c, NULL); | ||
835 | } | ||
836 | |||
837 | static int nvme_get_features(struct nvme_dev *dev, unsigned fid, | ||
838 | unsigned dword11, dma_addr_t dma_addr) | ||
839 | { | ||
840 | struct nvme_command c; | ||
841 | |||
842 | memset(&c, 0, sizeof(c)); | ||
843 | c.features.opcode = nvme_admin_get_features; | ||
844 | c.features.prp1 = cpu_to_le64(dma_addr); | ||
845 | c.features.fid = cpu_to_le32(fid); | ||
846 | c.features.dword11 = cpu_to_le32(dword11); | ||
847 | |||
848 | return nvme_submit_admin_cmd(dev, &c, NULL); | ||
849 | } | ||
850 | |||
851 | static int nvme_set_features(struct nvme_dev *dev, unsigned fid, | ||
852 | unsigned dword11, dma_addr_t dma_addr, u32 *result) | ||
853 | { | ||
854 | struct nvme_command c; | ||
855 | |||
856 | memset(&c, 0, sizeof(c)); | ||
857 | c.features.opcode = nvme_admin_set_features; | ||
858 | c.features.prp1 = cpu_to_le64(dma_addr); | ||
859 | c.features.fid = cpu_to_le32(fid); | ||
860 | c.features.dword11 = cpu_to_le32(dword11); | ||
861 | |||
862 | return nvme_submit_admin_cmd(dev, &c, result); | ||
863 | } | ||
864 | |||
865 | static void nvme_free_queue(struct nvme_dev *dev, int qid) | ||
866 | { | ||
867 | struct nvme_queue *nvmeq = dev->queues[qid]; | ||
868 | int vector = dev->entry[nvmeq->cq_vector].vector; | ||
869 | |||
870 | irq_set_affinity_hint(vector, NULL); | ||
871 | free_irq(vector, nvmeq); | ||
872 | |||
873 | /* Don't tell the adapter to delete the admin queue */ | ||
874 | if (qid) { | ||
875 | adapter_delete_sq(dev, qid); | ||
876 | adapter_delete_cq(dev, qid); | ||
877 | } | ||
878 | |||
879 | dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), | ||
880 | (void *)nvmeq->cqes, nvmeq->cq_dma_addr); | ||
881 | dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), | ||
882 | nvmeq->sq_cmds, nvmeq->sq_dma_addr); | ||
883 | kfree(nvmeq); | ||
884 | } | ||
885 | |||
886 | static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, | ||
887 | int depth, int vector) | ||
888 | { | ||
889 | struct device *dmadev = &dev->pci_dev->dev; | ||
890 | unsigned extra = (depth / 8) + (depth * sizeof(struct nvme_cmd_info)); | ||
891 | struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL); | ||
892 | if (!nvmeq) | ||
893 | return NULL; | ||
894 | |||
895 | nvmeq->cqes = dma_alloc_coherent(dmadev, CQ_SIZE(depth), | ||
896 | &nvmeq->cq_dma_addr, GFP_KERNEL); | ||
897 | if (!nvmeq->cqes) | ||
898 | goto free_nvmeq; | ||
899 | memset((void *)nvmeq->cqes, 0, CQ_SIZE(depth)); | ||
900 | |||
901 | nvmeq->sq_cmds = dma_alloc_coherent(dmadev, SQ_SIZE(depth), | ||
902 | &nvmeq->sq_dma_addr, GFP_KERNEL); | ||
903 | if (!nvmeq->sq_cmds) | ||
904 | goto free_cqdma; | ||
905 | |||
906 | nvmeq->q_dmadev = dmadev; | ||
907 | nvmeq->dev = dev; | ||
908 | spin_lock_init(&nvmeq->q_lock); | ||
909 | nvmeq->cq_head = 0; | ||
910 | nvmeq->cq_phase = 1; | ||
911 | init_waitqueue_head(&nvmeq->sq_full); | ||
912 | init_waitqueue_entry(&nvmeq->sq_cong_wait, nvme_thread); | ||
913 | bio_list_init(&nvmeq->sq_cong); | ||
914 | nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)]; | ||
915 | nvmeq->q_depth = depth; | ||
916 | nvmeq->cq_vector = vector; | ||
917 | |||
918 | return nvmeq; | ||
919 | |||
920 | free_cqdma: | ||
921 | dma_free_coherent(dmadev, CQ_SIZE(nvmeq->q_depth), (void *)nvmeq->cqes, | ||
922 | nvmeq->cq_dma_addr); | ||
923 | free_nvmeq: | ||
924 | kfree(nvmeq); | ||
925 | return NULL; | ||
926 | } | ||
927 | |||
928 | static int queue_request_irq(struct nvme_dev *dev, struct nvme_queue *nvmeq, | ||
929 | const char *name) | ||
930 | { | ||
931 | if (use_threaded_interrupts) | ||
932 | return request_threaded_irq(dev->entry[nvmeq->cq_vector].vector, | ||
933 | nvme_irq_check, nvme_irq, | ||
934 | IRQF_DISABLED | IRQF_SHARED, | ||
935 | name, nvmeq); | ||
936 | return request_irq(dev->entry[nvmeq->cq_vector].vector, nvme_irq, | ||
937 | IRQF_DISABLED | IRQF_SHARED, name, nvmeq); | ||
938 | } | ||
939 | |||
940 | static __devinit struct nvme_queue *nvme_create_queue(struct nvme_dev *dev, | ||
941 | int qid, int cq_size, int vector) | ||
942 | { | ||
943 | int result; | ||
944 | struct nvme_queue *nvmeq = nvme_alloc_queue(dev, qid, cq_size, vector); | ||
945 | |||
946 | if (!nvmeq) | ||
947 | return ERR_PTR(-ENOMEM); | ||
948 | |||
949 | result = adapter_alloc_cq(dev, qid, nvmeq); | ||
950 | if (result < 0) | ||
951 | goto free_nvmeq; | ||
952 | |||
953 | result = adapter_alloc_sq(dev, qid, nvmeq); | ||
954 | if (result < 0) | ||
955 | goto release_cq; | ||
956 | |||
957 | result = queue_request_irq(dev, nvmeq, "nvme"); | ||
958 | if (result < 0) | ||
959 | goto release_sq; | ||
960 | |||
961 | return nvmeq; | ||
962 | |||
963 | release_sq: | ||
964 | adapter_delete_sq(dev, qid); | ||
965 | release_cq: | ||
966 | adapter_delete_cq(dev, qid); | ||
967 | free_nvmeq: | ||
968 | dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), | ||
969 | (void *)nvmeq->cqes, nvmeq->cq_dma_addr); | ||
970 | dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), | ||
971 | nvmeq->sq_cmds, nvmeq->sq_dma_addr); | ||
972 | kfree(nvmeq); | ||
973 | return ERR_PTR(result); | ||
974 | } | ||
975 | |||
976 | static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev) | ||
977 | { | ||
978 | int result; | ||
979 | u32 aqa; | ||
980 | u64 cap; | ||
981 | unsigned long timeout; | ||
982 | struct nvme_queue *nvmeq; | ||
983 | |||
984 | dev->dbs = ((void __iomem *)dev->bar) + 4096; | ||
985 | |||
986 | nvmeq = nvme_alloc_queue(dev, 0, 64, 0); | ||
987 | if (!nvmeq) | ||
988 | return -ENOMEM; | ||
989 | |||
990 | aqa = nvmeq->q_depth - 1; | ||
991 | aqa |= aqa << 16; | ||
992 | |||
993 | dev->ctrl_config = NVME_CC_ENABLE | NVME_CC_CSS_NVM; | ||
994 | dev->ctrl_config |= (PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT; | ||
995 | dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE; | ||
996 | dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES; | ||
997 | |||
998 | writel(0, &dev->bar->cc); | ||
999 | writel(aqa, &dev->bar->aqa); | ||
1000 | writeq(nvmeq->sq_dma_addr, &dev->bar->asq); | ||
1001 | writeq(nvmeq->cq_dma_addr, &dev->bar->acq); | ||
1002 | writel(dev->ctrl_config, &dev->bar->cc); | ||
1003 | |||
1004 | cap = readq(&dev->bar->cap); | ||
1005 | timeout = ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies; | ||
1006 | dev->db_stride = NVME_CAP_STRIDE(cap); | ||
1007 | |||
1008 | while (!(readl(&dev->bar->csts) & NVME_CSTS_RDY)) { | ||
1009 | msleep(100); | ||
1010 | if (fatal_signal_pending(current)) | ||
1011 | return -EINTR; | ||
1012 | if (time_after(jiffies, timeout)) { | ||
1013 | dev_err(&dev->pci_dev->dev, | ||
1014 | "Device not ready; aborting initialisation\n"); | ||
1015 | return -ENODEV; | ||
1016 | } | ||
1017 | } | ||
1018 | |||
1019 | result = queue_request_irq(dev, nvmeq, "nvme admin"); | ||
1020 | dev->queues[0] = nvmeq; | ||
1021 | return result; | ||
1022 | } | ||
1023 | |||
1024 | static struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, | ||
1025 | unsigned long addr, unsigned length) | ||
1026 | { | ||
1027 | int i, err, count, nents, offset; | ||
1028 | struct scatterlist *sg; | ||
1029 | struct page **pages; | ||
1030 | struct nvme_iod *iod; | ||
1031 | |||
1032 | if (addr & 3) | ||
1033 | return ERR_PTR(-EINVAL); | ||
1034 | if (!length) | ||
1035 | return ERR_PTR(-EINVAL); | ||
1036 | |||
1037 | offset = offset_in_page(addr); | ||
1038 | count = DIV_ROUND_UP(offset + length, PAGE_SIZE); | ||
1039 | pages = kcalloc(count, sizeof(*pages), GFP_KERNEL); | ||
1040 | |||
1041 | err = get_user_pages_fast(addr, count, 1, pages); | ||
1042 | if (err < count) { | ||
1043 | count = err; | ||
1044 | err = -EFAULT; | ||
1045 | goto put_pages; | ||
1046 | } | ||
1047 | |||
1048 | iod = nvme_alloc_iod(count, length, GFP_KERNEL); | ||
1049 | sg = iod->sg; | ||
1050 | sg_init_table(sg, count); | ||
1051 | for (i = 0; i < count; i++) { | ||
1052 | sg_set_page(&sg[i], pages[i], | ||
1053 | min_t(int, length, PAGE_SIZE - offset), offset); | ||
1054 | length -= (PAGE_SIZE - offset); | ||
1055 | offset = 0; | ||
1056 | } | ||
1057 | sg_mark_end(&sg[i - 1]); | ||
1058 | iod->nents = count; | ||
1059 | |||
1060 | err = -ENOMEM; | ||
1061 | nents = dma_map_sg(&dev->pci_dev->dev, sg, count, | ||
1062 | write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); | ||
1063 | if (!nents) | ||
1064 | goto free_iod; | ||
1065 | |||
1066 | kfree(pages); | ||
1067 | return iod; | ||
1068 | |||
1069 | free_iod: | ||
1070 | kfree(iod); | ||
1071 | put_pages: | ||
1072 | for (i = 0; i < count; i++) | ||
1073 | put_page(pages[i]); | ||
1074 | kfree(pages); | ||
1075 | return ERR_PTR(err); | ||
1076 | } | ||
1077 | |||
1078 | static void nvme_unmap_user_pages(struct nvme_dev *dev, int write, | ||
1079 | struct nvme_iod *iod) | ||
1080 | { | ||
1081 | int i; | ||
1082 | |||
1083 | dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents, | ||
1084 | write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); | ||
1085 | |||
1086 | for (i = 0; i < iod->nents; i++) | ||
1087 | put_page(sg_page(&iod->sg[i])); | ||
1088 | } | ||
1089 | |||
1090 | static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) | ||
1091 | { | ||
1092 | struct nvme_dev *dev = ns->dev; | ||
1093 | struct nvme_queue *nvmeq; | ||
1094 | struct nvme_user_io io; | ||
1095 | struct nvme_command c; | ||
1096 | unsigned length; | ||
1097 | int status; | ||
1098 | struct nvme_iod *iod; | ||
1099 | |||
1100 | if (copy_from_user(&io, uio, sizeof(io))) | ||
1101 | return -EFAULT; | ||
1102 | length = (io.nblocks + 1) << ns->lba_shift; | ||
1103 | |||
1104 | switch (io.opcode) { | ||
1105 | case nvme_cmd_write: | ||
1106 | case nvme_cmd_read: | ||
1107 | case nvme_cmd_compare: | ||
1108 | iod = nvme_map_user_pages(dev, io.opcode & 1, io.addr, length); | ||
1109 | break; | ||
1110 | default: | ||
1111 | return -EINVAL; | ||
1112 | } | ||
1113 | |||
1114 | if (IS_ERR(iod)) | ||
1115 | return PTR_ERR(iod); | ||
1116 | |||
1117 | memset(&c, 0, sizeof(c)); | ||
1118 | c.rw.opcode = io.opcode; | ||
1119 | c.rw.flags = io.flags; | ||
1120 | c.rw.nsid = cpu_to_le32(ns->ns_id); | ||
1121 | c.rw.slba = cpu_to_le64(io.slba); | ||
1122 | c.rw.length = cpu_to_le16(io.nblocks); | ||
1123 | c.rw.control = cpu_to_le16(io.control); | ||
1124 | c.rw.dsmgmt = cpu_to_le16(io.dsmgmt); | ||
1125 | c.rw.reftag = io.reftag; | ||
1126 | c.rw.apptag = io.apptag; | ||
1127 | c.rw.appmask = io.appmask; | ||
1128 | /* XXX: metadata */ | ||
1129 | length = nvme_setup_prps(dev, &c.common, iod, length, GFP_KERNEL); | ||
1130 | |||
1131 | nvmeq = get_nvmeq(dev); | ||
1132 | /* | ||
1133 | * Since nvme_submit_sync_cmd sleeps, we can't keep preemption | ||
1134 | * disabled. We may be preempted at any point, and be rescheduled | ||
1135 | * to a different CPU. That will cause cacheline bouncing, but no | ||
1136 | * additional races since q_lock already protects against other CPUs. | ||
1137 | */ | ||
1138 | put_nvmeq(nvmeq); | ||
1139 | if (length != (io.nblocks + 1) << ns->lba_shift) | ||
1140 | status = -ENOMEM; | ||
1141 | else | ||
1142 | status = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT); | ||
1143 | |||
1144 | nvme_unmap_user_pages(dev, io.opcode & 1, iod); | ||
1145 | nvme_free_iod(dev, iod); | ||
1146 | return status; | ||
1147 | } | ||
1148 | |||
1149 | static int nvme_user_admin_cmd(struct nvme_ns *ns, | ||
1150 | struct nvme_admin_cmd __user *ucmd) | ||
1151 | { | ||
1152 | struct nvme_dev *dev = ns->dev; | ||
1153 | struct nvme_admin_cmd cmd; | ||
1154 | struct nvme_command c; | ||
1155 | int status, length; | ||
1156 | struct nvme_iod *iod; | ||
1157 | |||
1158 | if (!capable(CAP_SYS_ADMIN)) | ||
1159 | return -EACCES; | ||
1160 | if (copy_from_user(&cmd, ucmd, sizeof(cmd))) | ||
1161 | return -EFAULT; | ||
1162 | |||
1163 | memset(&c, 0, sizeof(c)); | ||
1164 | c.common.opcode = cmd.opcode; | ||
1165 | c.common.flags = cmd.flags; | ||
1166 | c.common.nsid = cpu_to_le32(cmd.nsid); | ||
1167 | c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); | ||
1168 | c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); | ||
1169 | c.common.cdw10[0] = cpu_to_le32(cmd.cdw10); | ||
1170 | c.common.cdw10[1] = cpu_to_le32(cmd.cdw11); | ||
1171 | c.common.cdw10[2] = cpu_to_le32(cmd.cdw12); | ||
1172 | c.common.cdw10[3] = cpu_to_le32(cmd.cdw13); | ||
1173 | c.common.cdw10[4] = cpu_to_le32(cmd.cdw14); | ||
1174 | c.common.cdw10[5] = cpu_to_le32(cmd.cdw15); | ||
1175 | |||
1176 | length = cmd.data_len; | ||
1177 | if (cmd.data_len) { | ||
1178 | iod = nvme_map_user_pages(dev, cmd.opcode & 1, cmd.addr, | ||
1179 | length); | ||
1180 | if (IS_ERR(iod)) | ||
1181 | return PTR_ERR(iod); | ||
1182 | length = nvme_setup_prps(dev, &c.common, iod, length, | ||
1183 | GFP_KERNEL); | ||
1184 | } | ||
1185 | |||
1186 | if (length != cmd.data_len) | ||
1187 | status = -ENOMEM; | ||
1188 | else | ||
1189 | status = nvme_submit_admin_cmd(dev, &c, NULL); | ||
1190 | |||
1191 | if (cmd.data_len) { | ||
1192 | nvme_unmap_user_pages(dev, cmd.opcode & 1, iod); | ||
1193 | nvme_free_iod(dev, iod); | ||
1194 | } | ||
1195 | return status; | ||
1196 | } | ||
1197 | |||
1198 | static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, | ||
1199 | unsigned long arg) | ||
1200 | { | ||
1201 | struct nvme_ns *ns = bdev->bd_disk->private_data; | ||
1202 | |||
1203 | switch (cmd) { | ||
1204 | case NVME_IOCTL_ID: | ||
1205 | return ns->ns_id; | ||
1206 | case NVME_IOCTL_ADMIN_CMD: | ||
1207 | return nvme_user_admin_cmd(ns, (void __user *)arg); | ||
1208 | case NVME_IOCTL_SUBMIT_IO: | ||
1209 | return nvme_submit_io(ns, (void __user *)arg); | ||
1210 | default: | ||
1211 | return -ENOTTY; | ||
1212 | } | ||
1213 | } | ||
1214 | |||
1215 | static const struct block_device_operations nvme_fops = { | ||
1216 | .owner = THIS_MODULE, | ||
1217 | .ioctl = nvme_ioctl, | ||
1218 | .compat_ioctl = nvme_ioctl, | ||
1219 | }; | ||
1220 | |||
1221 | static void nvme_timeout_ios(struct nvme_queue *nvmeq) | ||
1222 | { | ||
1223 | int depth = nvmeq->q_depth - 1; | ||
1224 | struct nvme_cmd_info *info = nvme_cmd_info(nvmeq); | ||
1225 | unsigned long now = jiffies; | ||
1226 | int cmdid; | ||
1227 | |||
1228 | for_each_set_bit(cmdid, nvmeq->cmdid_data, depth) { | ||
1229 | void *ctx; | ||
1230 | nvme_completion_fn fn; | ||
1231 | static struct nvme_completion cqe = { .status = cpu_to_le16(NVME_SC_ABORT_REQ) << 1, }; | ||
1232 | |||
1233 | if (!time_after(now, info[cmdid].timeout)) | ||
1234 | continue; | ||
1235 | dev_warn(nvmeq->q_dmadev, "Timing out I/O %d\n", cmdid); | ||
1236 | ctx = cancel_cmdid(nvmeq, cmdid, &fn); | ||
1237 | fn(nvmeq->dev, ctx, &cqe); | ||
1238 | } | ||
1239 | } | ||
1240 | |||
1241 | static void nvme_resubmit_bios(struct nvme_queue *nvmeq) | ||
1242 | { | ||
1243 | while (bio_list_peek(&nvmeq->sq_cong)) { | ||
1244 | struct bio *bio = bio_list_pop(&nvmeq->sq_cong); | ||
1245 | struct nvme_ns *ns = bio->bi_bdev->bd_disk->private_data; | ||
1246 | if (nvme_submit_bio_queue(nvmeq, ns, bio)) { | ||
1247 | bio_list_add_head(&nvmeq->sq_cong, bio); | ||
1248 | break; | ||
1249 | } | ||
1250 | if (bio_list_empty(&nvmeq->sq_cong)) | ||
1251 | remove_wait_queue(&nvmeq->sq_full, | ||
1252 | &nvmeq->sq_cong_wait); | ||
1253 | } | ||
1254 | } | ||
1255 | |||
1256 | static int nvme_kthread(void *data) | ||
1257 | { | ||
1258 | struct nvme_dev *dev; | ||
1259 | |||
1260 | while (!kthread_should_stop()) { | ||
1261 | __set_current_state(TASK_RUNNING); | ||
1262 | spin_lock(&dev_list_lock); | ||
1263 | list_for_each_entry(dev, &dev_list, node) { | ||
1264 | int i; | ||
1265 | for (i = 0; i < dev->queue_count; i++) { | ||
1266 | struct nvme_queue *nvmeq = dev->queues[i]; | ||
1267 | if (!nvmeq) | ||
1268 | continue; | ||
1269 | spin_lock_irq(&nvmeq->q_lock); | ||
1270 | if (nvme_process_cq(nvmeq)) | ||
1271 | printk("process_cq did something\n"); | ||
1272 | nvme_timeout_ios(nvmeq); | ||
1273 | nvme_resubmit_bios(nvmeq); | ||
1274 | spin_unlock_irq(&nvmeq->q_lock); | ||
1275 | } | ||
1276 | } | ||
1277 | spin_unlock(&dev_list_lock); | ||
1278 | set_current_state(TASK_INTERRUPTIBLE); | ||
1279 | schedule_timeout(HZ); | ||
1280 | } | ||
1281 | return 0; | ||
1282 | } | ||
1283 | |||
1284 | static DEFINE_IDA(nvme_index_ida); | ||
1285 | |||
1286 | static int nvme_get_ns_idx(void) | ||
1287 | { | ||
1288 | int index, error; | ||
1289 | |||
1290 | do { | ||
1291 | if (!ida_pre_get(&nvme_index_ida, GFP_KERNEL)) | ||
1292 | return -1; | ||
1293 | |||
1294 | spin_lock(&dev_list_lock); | ||
1295 | error = ida_get_new(&nvme_index_ida, &index); | ||
1296 | spin_unlock(&dev_list_lock); | ||
1297 | } while (error == -EAGAIN); | ||
1298 | |||
1299 | if (error) | ||
1300 | index = -1; | ||
1301 | return index; | ||
1302 | } | ||
1303 | |||
1304 | static void nvme_put_ns_idx(int index) | ||
1305 | { | ||
1306 | spin_lock(&dev_list_lock); | ||
1307 | ida_remove(&nvme_index_ida, index); | ||
1308 | spin_unlock(&dev_list_lock); | ||
1309 | } | ||
1310 | |||
1311 | static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, int nsid, | ||
1312 | struct nvme_id_ns *id, struct nvme_lba_range_type *rt) | ||
1313 | { | ||
1314 | struct nvme_ns *ns; | ||
1315 | struct gendisk *disk; | ||
1316 | int lbaf; | ||
1317 | |||
1318 | if (rt->attributes & NVME_LBART_ATTRIB_HIDE) | ||
1319 | return NULL; | ||
1320 | |||
1321 | ns = kzalloc(sizeof(*ns), GFP_KERNEL); | ||
1322 | if (!ns) | ||
1323 | return NULL; | ||
1324 | ns->queue = blk_alloc_queue(GFP_KERNEL); | ||
1325 | if (!ns->queue) | ||
1326 | goto out_free_ns; | ||
1327 | ns->queue->queue_flags = QUEUE_FLAG_DEFAULT; | ||
1328 | queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, ns->queue); | ||
1329 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue); | ||
1330 | /* queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); */ | ||
1331 | blk_queue_make_request(ns->queue, nvme_make_request); | ||
1332 | ns->dev = dev; | ||
1333 | ns->queue->queuedata = ns; | ||
1334 | |||
1335 | disk = alloc_disk(NVME_MINORS); | ||
1336 | if (!disk) | ||
1337 | goto out_free_queue; | ||
1338 | ns->ns_id = nsid; | ||
1339 | ns->disk = disk; | ||
1340 | lbaf = id->flbas & 0xf; | ||
1341 | ns->lba_shift = id->lbaf[lbaf].ds; | ||
1342 | |||
1343 | disk->major = nvme_major; | ||
1344 | disk->minors = NVME_MINORS; | ||
1345 | disk->first_minor = NVME_MINORS * nvme_get_ns_idx(); | ||
1346 | disk->fops = &nvme_fops; | ||
1347 | disk->private_data = ns; | ||
1348 | disk->queue = ns->queue; | ||
1349 | disk->driverfs_dev = &dev->pci_dev->dev; | ||
1350 | sprintf(disk->disk_name, "nvme%dn%d", dev->instance, nsid); | ||
1351 | set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9)); | ||
1352 | |||
1353 | return ns; | ||
1354 | |||
1355 | out_free_queue: | ||
1356 | blk_cleanup_queue(ns->queue); | ||
1357 | out_free_ns: | ||
1358 | kfree(ns); | ||
1359 | return NULL; | ||
1360 | } | ||
1361 | |||
1362 | static void nvme_ns_free(struct nvme_ns *ns) | ||
1363 | { | ||
1364 | int index = ns->disk->first_minor / NVME_MINORS; | ||
1365 | put_disk(ns->disk); | ||
1366 | nvme_put_ns_idx(index); | ||
1367 | blk_cleanup_queue(ns->queue); | ||
1368 | kfree(ns); | ||
1369 | } | ||
1370 | |||
1371 | static int set_queue_count(struct nvme_dev *dev, int count) | ||
1372 | { | ||
1373 | int status; | ||
1374 | u32 result; | ||
1375 | u32 q_count = (count - 1) | ((count - 1) << 16); | ||
1376 | |||
1377 | status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES, q_count, 0, | ||
1378 | &result); | ||
1379 | if (status) | ||
1380 | return -EIO; | ||
1381 | return min(result & 0xffff, result >> 16) + 1; | ||
1382 | } | ||
1383 | |||
1384 | static int __devinit nvme_setup_io_queues(struct nvme_dev *dev) | ||
1385 | { | ||
1386 | int result, cpu, i, nr_io_queues, db_bar_size; | ||
1387 | |||
1388 | nr_io_queues = num_online_cpus(); | ||
1389 | result = set_queue_count(dev, nr_io_queues); | ||
1390 | if (result < 0) | ||
1391 | return result; | ||
1392 | if (result < nr_io_queues) | ||
1393 | nr_io_queues = result; | ||
1394 | |||
1395 | /* Deregister the admin queue's interrupt */ | ||
1396 | free_irq(dev->entry[0].vector, dev->queues[0]); | ||
1397 | |||
1398 | db_bar_size = 4096 + ((nr_io_queues + 1) << (dev->db_stride + 3)); | ||
1399 | if (db_bar_size > 8192) { | ||
1400 | iounmap(dev->bar); | ||
1401 | dev->bar = ioremap(pci_resource_start(dev->pci_dev, 0), | ||
1402 | db_bar_size); | ||
1403 | dev->dbs = ((void __iomem *)dev->bar) + 4096; | ||
1404 | dev->queues[0]->q_db = dev->dbs; | ||
1405 | } | ||
1406 | |||
1407 | for (i = 0; i < nr_io_queues; i++) | ||
1408 | dev->entry[i].entry = i; | ||
1409 | for (;;) { | ||
1410 | result = pci_enable_msix(dev->pci_dev, dev->entry, | ||
1411 | nr_io_queues); | ||
1412 | if (result == 0) { | ||
1413 | break; | ||
1414 | } else if (result > 0) { | ||
1415 | nr_io_queues = result; | ||
1416 | continue; | ||
1417 | } else { | ||
1418 | nr_io_queues = 1; | ||
1419 | break; | ||
1420 | } | ||
1421 | } | ||
1422 | |||
1423 | result = queue_request_irq(dev, dev->queues[0], "nvme admin"); | ||
1424 | /* XXX: handle failure here */ | ||
1425 | |||
1426 | cpu = cpumask_first(cpu_online_mask); | ||
1427 | for (i = 0; i < nr_io_queues; i++) { | ||
1428 | irq_set_affinity_hint(dev->entry[i].vector, get_cpu_mask(cpu)); | ||
1429 | cpu = cpumask_next(cpu, cpu_online_mask); | ||
1430 | } | ||
1431 | |||
1432 | for (i = 0; i < nr_io_queues; i++) { | ||
1433 | dev->queues[i + 1] = nvme_create_queue(dev, i + 1, | ||
1434 | NVME_Q_DEPTH, i); | ||
1435 | if (IS_ERR(dev->queues[i + 1])) | ||
1436 | return PTR_ERR(dev->queues[i + 1]); | ||
1437 | dev->queue_count++; | ||
1438 | } | ||
1439 | |||
1440 | for (; i < num_possible_cpus(); i++) { | ||
1441 | int target = i % rounddown_pow_of_two(dev->queue_count - 1); | ||
1442 | dev->queues[i + 1] = dev->queues[target + 1]; | ||
1443 | } | ||
1444 | |||
1445 | return 0; | ||
1446 | } | ||
1447 | |||
1448 | static void nvme_free_queues(struct nvme_dev *dev) | ||
1449 | { | ||
1450 | int i; | ||
1451 | |||
1452 | for (i = dev->queue_count - 1; i >= 0; i--) | ||
1453 | nvme_free_queue(dev, i); | ||
1454 | } | ||
1455 | |||
1456 | static int __devinit nvme_dev_add(struct nvme_dev *dev) | ||
1457 | { | ||
1458 | int res, nn, i; | ||
1459 | struct nvme_ns *ns, *next; | ||
1460 | struct nvme_id_ctrl *ctrl; | ||
1461 | struct nvme_id_ns *id_ns; | ||
1462 | void *mem; | ||
1463 | dma_addr_t dma_addr; | ||
1464 | |||
1465 | res = nvme_setup_io_queues(dev); | ||
1466 | if (res) | ||
1467 | return res; | ||
1468 | |||
1469 | mem = dma_alloc_coherent(&dev->pci_dev->dev, 8192, &dma_addr, | ||
1470 | GFP_KERNEL); | ||
1471 | |||
1472 | res = nvme_identify(dev, 0, 1, dma_addr); | ||
1473 | if (res) { | ||
1474 | res = -EIO; | ||
1475 | goto out_free; | ||
1476 | } | ||
1477 | |||
1478 | ctrl = mem; | ||
1479 | nn = le32_to_cpup(&ctrl->nn); | ||
1480 | memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn)); | ||
1481 | memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn)); | ||
1482 | memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr)); | ||
1483 | |||
1484 | id_ns = mem; | ||
1485 | for (i = 1; i <= nn; i++) { | ||
1486 | res = nvme_identify(dev, i, 0, dma_addr); | ||
1487 | if (res) | ||
1488 | continue; | ||
1489 | |||
1490 | if (id_ns->ncap == 0) | ||
1491 | continue; | ||
1492 | |||
1493 | res = nvme_get_features(dev, NVME_FEAT_LBA_RANGE, i, | ||
1494 | dma_addr + 4096); | ||
1495 | if (res) | ||
1496 | continue; | ||
1497 | |||
1498 | ns = nvme_alloc_ns(dev, i, mem, mem + 4096); | ||
1499 | if (ns) | ||
1500 | list_add_tail(&ns->list, &dev->namespaces); | ||
1501 | } | ||
1502 | list_for_each_entry(ns, &dev->namespaces, list) | ||
1503 | add_disk(ns->disk); | ||
1504 | |||
1505 | goto out; | ||
1506 | |||
1507 | out_free: | ||
1508 | list_for_each_entry_safe(ns, next, &dev->namespaces, list) { | ||
1509 | list_del(&ns->list); | ||
1510 | nvme_ns_free(ns); | ||
1511 | } | ||
1512 | |||
1513 | out: | ||
1514 | dma_free_coherent(&dev->pci_dev->dev, 8192, mem, dma_addr); | ||
1515 | return res; | ||
1516 | } | ||
1517 | |||
1518 | static int nvme_dev_remove(struct nvme_dev *dev) | ||
1519 | { | ||
1520 | struct nvme_ns *ns, *next; | ||
1521 | |||
1522 | spin_lock(&dev_list_lock); | ||
1523 | list_del(&dev->node); | ||
1524 | spin_unlock(&dev_list_lock); | ||
1525 | |||
1526 | /* TODO: wait all I/O finished or cancel them */ | ||
1527 | |||
1528 | list_for_each_entry_safe(ns, next, &dev->namespaces, list) { | ||
1529 | list_del(&ns->list); | ||
1530 | del_gendisk(ns->disk); | ||
1531 | nvme_ns_free(ns); | ||
1532 | } | ||
1533 | |||
1534 | nvme_free_queues(dev); | ||
1535 | |||
1536 | return 0; | ||
1537 | } | ||
1538 | |||
1539 | static int nvme_setup_prp_pools(struct nvme_dev *dev) | ||
1540 | { | ||
1541 | struct device *dmadev = &dev->pci_dev->dev; | ||
1542 | dev->prp_page_pool = dma_pool_create("prp list page", dmadev, | ||
1543 | PAGE_SIZE, PAGE_SIZE, 0); | ||
1544 | if (!dev->prp_page_pool) | ||
1545 | return -ENOMEM; | ||
1546 | |||
1547 | /* Optimisation for I/Os between 4k and 128k */ | ||
1548 | dev->prp_small_pool = dma_pool_create("prp list 256", dmadev, | ||
1549 | 256, 256, 0); | ||
1550 | if (!dev->prp_small_pool) { | ||
1551 | dma_pool_destroy(dev->prp_page_pool); | ||
1552 | return -ENOMEM; | ||
1553 | } | ||
1554 | return 0; | ||
1555 | } | ||
1556 | |||
1557 | static void nvme_release_prp_pools(struct nvme_dev *dev) | ||
1558 | { | ||
1559 | dma_pool_destroy(dev->prp_page_pool); | ||
1560 | dma_pool_destroy(dev->prp_small_pool); | ||
1561 | } | ||
1562 | |||
1563 | /* XXX: Use an ida or something to let remove / add work correctly */ | ||
1564 | static void nvme_set_instance(struct nvme_dev *dev) | ||
1565 | { | ||
1566 | static int instance; | ||
1567 | dev->instance = instance++; | ||
1568 | } | ||
1569 | |||
1570 | static void nvme_release_instance(struct nvme_dev *dev) | ||
1571 | { | ||
1572 | } | ||
1573 | |||
1574 | static int __devinit nvme_probe(struct pci_dev *pdev, | ||
1575 | const struct pci_device_id *id) | ||
1576 | { | ||
1577 | int bars, result = -ENOMEM; | ||
1578 | struct nvme_dev *dev; | ||
1579 | |||
1580 | dev = kzalloc(sizeof(*dev), GFP_KERNEL); | ||
1581 | if (!dev) | ||
1582 | return -ENOMEM; | ||
1583 | dev->entry = kcalloc(num_possible_cpus(), sizeof(*dev->entry), | ||
1584 | GFP_KERNEL); | ||
1585 | if (!dev->entry) | ||
1586 | goto free; | ||
1587 | dev->queues = kcalloc(num_possible_cpus() + 1, sizeof(void *), | ||
1588 | GFP_KERNEL); | ||
1589 | if (!dev->queues) | ||
1590 | goto free; | ||
1591 | |||
1592 | if (pci_enable_device_mem(pdev)) | ||
1593 | goto free; | ||
1594 | pci_set_master(pdev); | ||
1595 | bars = pci_select_bars(pdev, IORESOURCE_MEM); | ||
1596 | if (pci_request_selected_regions(pdev, bars, "nvme")) | ||
1597 | goto disable; | ||
1598 | |||
1599 | INIT_LIST_HEAD(&dev->namespaces); | ||
1600 | dev->pci_dev = pdev; | ||
1601 | pci_set_drvdata(pdev, dev); | ||
1602 | dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); | ||
1603 | dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64)); | ||
1604 | nvme_set_instance(dev); | ||
1605 | dev->entry[0].vector = pdev->irq; | ||
1606 | |||
1607 | result = nvme_setup_prp_pools(dev); | ||
1608 | if (result) | ||
1609 | goto disable_msix; | ||
1610 | |||
1611 | dev->bar = ioremap(pci_resource_start(pdev, 0), 8192); | ||
1612 | if (!dev->bar) { | ||
1613 | result = -ENOMEM; | ||
1614 | goto disable_msix; | ||
1615 | } | ||
1616 | |||
1617 | result = nvme_configure_admin_queue(dev); | ||
1618 | if (result) | ||
1619 | goto unmap; | ||
1620 | dev->queue_count++; | ||
1621 | |||
1622 | spin_lock(&dev_list_lock); | ||
1623 | list_add(&dev->node, &dev_list); | ||
1624 | spin_unlock(&dev_list_lock); | ||
1625 | |||
1626 | result = nvme_dev_add(dev); | ||
1627 | if (result) | ||
1628 | goto delete; | ||
1629 | |||
1630 | return 0; | ||
1631 | |||
1632 | delete: | ||
1633 | spin_lock(&dev_list_lock); | ||
1634 | list_del(&dev->node); | ||
1635 | spin_unlock(&dev_list_lock); | ||
1636 | |||
1637 | nvme_free_queues(dev); | ||
1638 | unmap: | ||
1639 | iounmap(dev->bar); | ||
1640 | disable_msix: | ||
1641 | pci_disable_msix(pdev); | ||
1642 | nvme_release_instance(dev); | ||
1643 | nvme_release_prp_pools(dev); | ||
1644 | disable: | ||
1645 | pci_disable_device(pdev); | ||
1646 | pci_release_regions(pdev); | ||
1647 | free: | ||
1648 | kfree(dev->queues); | ||
1649 | kfree(dev->entry); | ||
1650 | kfree(dev); | ||
1651 | return result; | ||
1652 | } | ||
1653 | |||
1654 | static void __devexit nvme_remove(struct pci_dev *pdev) | ||
1655 | { | ||
1656 | struct nvme_dev *dev = pci_get_drvdata(pdev); | ||
1657 | nvme_dev_remove(dev); | ||
1658 | pci_disable_msix(pdev); | ||
1659 | iounmap(dev->bar); | ||
1660 | nvme_release_instance(dev); | ||
1661 | nvme_release_prp_pools(dev); | ||
1662 | pci_disable_device(pdev); | ||
1663 | pci_release_regions(pdev); | ||
1664 | kfree(dev->queues); | ||
1665 | kfree(dev->entry); | ||
1666 | kfree(dev); | ||
1667 | } | ||
1668 | |||
1669 | /* These functions are yet to be implemented */ | ||
1670 | #define nvme_error_detected NULL | ||
1671 | #define nvme_dump_registers NULL | ||
1672 | #define nvme_link_reset NULL | ||
1673 | #define nvme_slot_reset NULL | ||
1674 | #define nvme_error_resume NULL | ||
1675 | #define nvme_suspend NULL | ||
1676 | #define nvme_resume NULL | ||
1677 | |||
1678 | static struct pci_error_handlers nvme_err_handler = { | ||
1679 | .error_detected = nvme_error_detected, | ||
1680 | .mmio_enabled = nvme_dump_registers, | ||
1681 | .link_reset = nvme_link_reset, | ||
1682 | .slot_reset = nvme_slot_reset, | ||
1683 | .resume = nvme_error_resume, | ||
1684 | }; | ||
1685 | |||
1686 | /* Move to pci_ids.h later */ | ||
1687 | #define PCI_CLASS_STORAGE_EXPRESS 0x010802 | ||
1688 | |||
1689 | static DEFINE_PCI_DEVICE_TABLE(nvme_id_table) = { | ||
1690 | { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, | ||
1691 | { 0, } | ||
1692 | }; | ||
1693 | MODULE_DEVICE_TABLE(pci, nvme_id_table); | ||
1694 | |||
1695 | static struct pci_driver nvme_driver = { | ||
1696 | .name = "nvme", | ||
1697 | .id_table = nvme_id_table, | ||
1698 | .probe = nvme_probe, | ||
1699 | .remove = __devexit_p(nvme_remove), | ||
1700 | .suspend = nvme_suspend, | ||
1701 | .resume = nvme_resume, | ||
1702 | .err_handler = &nvme_err_handler, | ||
1703 | }; | ||
1704 | |||
1705 | static int __init nvme_init(void) | ||
1706 | { | ||
1707 | int result = -EBUSY; | ||
1708 | |||
1709 | nvme_thread = kthread_run(nvme_kthread, NULL, "nvme"); | ||
1710 | if (IS_ERR(nvme_thread)) | ||
1711 | return PTR_ERR(nvme_thread); | ||
1712 | |||
1713 | nvme_major = register_blkdev(nvme_major, "nvme"); | ||
1714 | if (nvme_major <= 0) | ||
1715 | goto kill_kthread; | ||
1716 | |||
1717 | result = pci_register_driver(&nvme_driver); | ||
1718 | if (result) | ||
1719 | goto unregister_blkdev; | ||
1720 | return 0; | ||
1721 | |||
1722 | unregister_blkdev: | ||
1723 | unregister_blkdev(nvme_major, "nvme"); | ||
1724 | kill_kthread: | ||
1725 | kthread_stop(nvme_thread); | ||
1726 | return result; | ||
1727 | } | ||
1728 | |||
1729 | static void __exit nvme_exit(void) | ||
1730 | { | ||
1731 | pci_unregister_driver(&nvme_driver); | ||
1732 | unregister_blkdev(nvme_major, "nvme"); | ||
1733 | kthread_stop(nvme_thread); | ||
1734 | } | ||
1735 | |||
1736 | MODULE_AUTHOR("Matthew Wilcox <willy@linux.intel.com>"); | ||
1737 | MODULE_LICENSE("GPL"); | ||
1738 | MODULE_VERSION("0.8"); | ||
1739 | module_init(nvme_init); | ||
1740 | module_exit(nvme_exit); | ||
diff --git a/drivers/block/paride/bpck6.c b/drivers/block/paride/bpck6.c index ad124525ac23..ec64e7f5d1ce 100644 --- a/drivers/block/paride/bpck6.c +++ b/drivers/block/paride/bpck6.c | |||
@@ -20,9 +20,6 @@ | |||
20 | */ | 20 | */ |
21 | 21 | ||
22 | 22 | ||
23 | /* PARAMETERS */ | ||
24 | static int verbose; /* set this to 1 to see debugging messages and whatnot */ | ||
25 | |||
26 | #define BACKPACK_VERSION "2.0.2" | 23 | #define BACKPACK_VERSION "2.0.2" |
27 | 24 | ||
28 | #include <linux/module.h> | 25 | #include <linux/module.h> |
@@ -36,6 +33,8 @@ static int verbose; /* set this to 1 to see debugging messages and whatnot */ | |||
36 | #include "ppc6lnx.c" | 33 | #include "ppc6lnx.c" |
37 | #include "paride.h" | 34 | #include "paride.h" |
38 | 35 | ||
36 | /* PARAMETERS */ | ||
37 | static bool verbose; /* set this to 1 to see debugging messages and whatnot */ | ||
39 | 38 | ||
40 | 39 | ||
41 | #define PPCSTRUCT(pi) ((Interface *)(pi->private)) | 40 | #define PPCSTRUCT(pi) ((Interface *)(pi->private)) |
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 46b8136c31bb..ba2b6b5e5910 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c | |||
@@ -144,7 +144,7 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_SLV, D_DLY}; | |||
144 | static DEFINE_MUTEX(pcd_mutex); | 144 | static DEFINE_MUTEX(pcd_mutex); |
145 | static DEFINE_SPINLOCK(pcd_lock); | 145 | static DEFINE_SPINLOCK(pcd_lock); |
146 | 146 | ||
147 | module_param(verbose, bool, 0644); | 147 | module_param(verbose, int, 0644); |
148 | module_param(major, int, 0); | 148 | module_param(major, int, 0); |
149 | module_param(name, charp, 0); | 149 | module_param(name, charp, 0); |
150 | module_param(nice, int, 0); | 150 | module_param(nice, int, 0); |
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 869e7676d46f..831e3ac156e6 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c | |||
@@ -124,8 +124,9 @@ | |||
124 | by default. | 124 | by default. |
125 | 125 | ||
126 | */ | 126 | */ |
127 | #include <linux/types.h> | ||
127 | 128 | ||
128 | static int verbose = 0; | 129 | static bool verbose = 0; |
129 | static int major = PD_MAJOR; | 130 | static int major = PD_MAJOR; |
130 | static char *name = PD_NAME; | 131 | static char *name = PD_NAME; |
131 | static int cluster = 64; | 132 | static int cluster = 64; |
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index f21b520ef419..ec8f9ed6326e 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c | |||
@@ -118,13 +118,15 @@ | |||
118 | #define PF_NAME "pf" | 118 | #define PF_NAME "pf" |
119 | #define PF_UNITS 4 | 119 | #define PF_UNITS 4 |
120 | 120 | ||
121 | #include <linux/types.h> | ||
122 | |||
121 | /* Here are things one can override from the insmod command. | 123 | /* Here are things one can override from the insmod command. |
122 | Most are autoprobed by paride unless set here. Verbose is off | 124 | Most are autoprobed by paride unless set here. Verbose is off |
123 | by default. | 125 | by default. |
124 | 126 | ||
125 | */ | 127 | */ |
126 | 128 | ||
127 | static int verbose = 0; | 129 | static bool verbose = 0; |
128 | static int major = PF_MAJOR; | 130 | static int major = PF_MAJOR; |
129 | static char *name = PF_NAME; | 131 | static char *name = PF_NAME; |
130 | static int cluster = 64; | 132 | static int cluster = 64; |
diff --git a/drivers/block/paride/pg.c b/drivers/block/paride/pg.c index a79fb4f7ff62..4a27b1de5fcb 100644 --- a/drivers/block/paride/pg.c +++ b/drivers/block/paride/pg.c | |||
@@ -130,13 +130,14 @@ | |||
130 | #define PI_PG 4 | 130 | #define PI_PG 4 |
131 | #endif | 131 | #endif |
132 | 132 | ||
133 | #include <linux/types.h> | ||
133 | /* Here are things one can override from the insmod command. | 134 | /* Here are things one can override from the insmod command. |
134 | Most are autoprobed by paride unless set here. Verbose is 0 | 135 | Most are autoprobed by paride unless set here. Verbose is 0 |
135 | by default. | 136 | by default. |
136 | 137 | ||
137 | */ | 138 | */ |
138 | 139 | ||
139 | static int verbose = 0; | 140 | static bool verbose = 0; |
140 | static int major = PG_MAJOR; | 141 | static int major = PG_MAJOR; |
141 | static char *name = PG_NAME; | 142 | static char *name = PG_NAME; |
142 | static int disable = 0; | 143 | static int disable = 0; |
diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c index 7179f79d7468..2596042eb987 100644 --- a/drivers/block/paride/pt.c +++ b/drivers/block/paride/pt.c | |||
@@ -109,13 +109,15 @@ | |||
109 | #define PT_NAME "pt" | 109 | #define PT_NAME "pt" |
110 | #define PT_UNITS 4 | 110 | #define PT_UNITS 4 |
111 | 111 | ||
112 | #include <linux/types.h> | ||
113 | |||
112 | /* Here are things one can override from the insmod command. | 114 | /* Here are things one can override from the insmod command. |
113 | Most are autoprobed by paride unless set here. Verbose is on | 115 | Most are autoprobed by paride unless set here. Verbose is on |
114 | by default. | 116 | by default. |
115 | 117 | ||
116 | */ | 118 | */ |
117 | 119 | ||
118 | static int verbose = 0; | 120 | static bool verbose = 0; |
119 | static int major = PT_MAJOR; | 121 | static int major = PT_MAJOR; |
120 | static char *name = PT_NAME; | 122 | static char *name = PT_NAME; |
121 | static int disable = 0; | 123 | static int disable = 0; |
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index d59edeabd93f..ba66e4445f41 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c | |||
@@ -987,14 +987,14 @@ static void pkt_copy_bio_data(struct bio *src_bio, int seg, int offs, struct pag | |||
987 | 987 | ||
988 | while (copy_size > 0) { | 988 | while (copy_size > 0) { |
989 | struct bio_vec *src_bvl = bio_iovec_idx(src_bio, seg); | 989 | struct bio_vec *src_bvl = bio_iovec_idx(src_bio, seg); |
990 | void *vfrom = kmap_atomic(src_bvl->bv_page, KM_USER0) + | 990 | void *vfrom = kmap_atomic(src_bvl->bv_page) + |
991 | src_bvl->bv_offset + offs; | 991 | src_bvl->bv_offset + offs; |
992 | void *vto = page_address(dst_page) + dst_offs; | 992 | void *vto = page_address(dst_page) + dst_offs; |
993 | int len = min_t(int, copy_size, src_bvl->bv_len - offs); | 993 | int len = min_t(int, copy_size, src_bvl->bv_len - offs); |
994 | 994 | ||
995 | BUG_ON(len < 0); | 995 | BUG_ON(len < 0); |
996 | memcpy(vto, vfrom, len); | 996 | memcpy(vto, vfrom, len); |
997 | kunmap_atomic(vfrom, KM_USER0); | 997 | kunmap_atomic(vfrom); |
998 | 998 | ||
999 | seg++; | 999 | seg++; |
1000 | offs = 0; | 1000 | offs = 0; |
@@ -1019,10 +1019,10 @@ static void pkt_make_local_copy(struct packet_data *pkt, struct bio_vec *bvec) | |||
1019 | offs = 0; | 1019 | offs = 0; |
1020 | for (f = 0; f < pkt->frames; f++) { | 1020 | for (f = 0; f < pkt->frames; f++) { |
1021 | if (bvec[f].bv_page != pkt->pages[p]) { | 1021 | if (bvec[f].bv_page != pkt->pages[p]) { |
1022 | void *vfrom = kmap_atomic(bvec[f].bv_page, KM_USER0) + bvec[f].bv_offset; | 1022 | void *vfrom = kmap_atomic(bvec[f].bv_page) + bvec[f].bv_offset; |
1023 | void *vto = page_address(pkt->pages[p]) + offs; | 1023 | void *vto = page_address(pkt->pages[p]) + offs; |
1024 | memcpy(vto, vfrom, CD_FRAMESIZE); | 1024 | memcpy(vto, vfrom, CD_FRAMESIZE); |
1025 | kunmap_atomic(vfrom, KM_USER0); | 1025 | kunmap_atomic(vfrom); |
1026 | bvec[f].bv_page = pkt->pages[p]; | 1026 | bvec[f].bv_page = pkt->pages[p]; |
1027 | bvec[f].bv_offset = offs; | 1027 | bvec[f].bv_offset = offs; |
1028 | } else { | 1028 | } else { |
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 148ab944378d..013c7a549fb6 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c | |||
@@ -41,19 +41,35 @@ | |||
41 | 41 | ||
42 | #include "rbd_types.h" | 42 | #include "rbd_types.h" |
43 | 43 | ||
44 | #define DRV_NAME "rbd" | 44 | /* |
45 | #define DRV_NAME_LONG "rbd (rados block device)" | 45 | * The basic unit of block I/O is a sector. It is interpreted in a |
46 | * number of contexts in Linux (blk, bio, genhd), but the default is | ||
47 | * universally 512 bytes. These symbols are just slightly more | ||
48 | * meaningful than the bare numbers they represent. | ||
49 | */ | ||
50 | #define SECTOR_SHIFT 9 | ||
51 | #define SECTOR_SIZE (1ULL << SECTOR_SHIFT) | ||
52 | |||
53 | #define RBD_DRV_NAME "rbd" | ||
54 | #define RBD_DRV_NAME_LONG "rbd (rados block device)" | ||
46 | 55 | ||
47 | #define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */ | 56 | #define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */ |
48 | 57 | ||
49 | #define RBD_MAX_MD_NAME_LEN (96 + sizeof(RBD_SUFFIX)) | 58 | #define RBD_MAX_MD_NAME_LEN (RBD_MAX_OBJ_NAME_LEN + sizeof(RBD_SUFFIX)) |
50 | #define RBD_MAX_POOL_NAME_LEN 64 | 59 | #define RBD_MAX_POOL_NAME_LEN 64 |
51 | #define RBD_MAX_SNAP_NAME_LEN 32 | 60 | #define RBD_MAX_SNAP_NAME_LEN 32 |
52 | #define RBD_MAX_OPT_LEN 1024 | 61 | #define RBD_MAX_OPT_LEN 1024 |
53 | 62 | ||
54 | #define RBD_SNAP_HEAD_NAME "-" | 63 | #define RBD_SNAP_HEAD_NAME "-" |
55 | 64 | ||
65 | /* | ||
66 | * An RBD device name will be "rbd#", where the "rbd" comes from | ||
67 | * RBD_DRV_NAME above, and # is a unique integer identifier. | ||
68 | * MAX_INT_FORMAT_WIDTH is used in ensuring DEV_NAME_LEN is big | ||
69 | * enough to hold all possible device names. | ||
70 | */ | ||
56 | #define DEV_NAME_LEN 32 | 71 | #define DEV_NAME_LEN 32 |
72 | #define MAX_INT_FORMAT_WIDTH ((5 * sizeof (int)) / 2 + 1) | ||
57 | 73 | ||
58 | #define RBD_NOTIFY_TIMEOUT_DEFAULT 10 | 74 | #define RBD_NOTIFY_TIMEOUT_DEFAULT 10 |
59 | 75 | ||
@@ -66,7 +82,6 @@ struct rbd_image_header { | |||
66 | __u8 obj_order; | 82 | __u8 obj_order; |
67 | __u8 crypt_type; | 83 | __u8 crypt_type; |
68 | __u8 comp_type; | 84 | __u8 comp_type; |
69 | struct rw_semaphore snap_rwsem; | ||
70 | struct ceph_snap_context *snapc; | 85 | struct ceph_snap_context *snapc; |
71 | size_t snap_names_len; | 86 | size_t snap_names_len; |
72 | u64 snap_seq; | 87 | u64 snap_seq; |
@@ -83,7 +98,7 @@ struct rbd_options { | |||
83 | }; | 98 | }; |
84 | 99 | ||
85 | /* | 100 | /* |
86 | * an instance of the client. multiple devices may share a client. | 101 | * an instance of the client. multiple devices may share an rbd client. |
87 | */ | 102 | */ |
88 | struct rbd_client { | 103 | struct rbd_client { |
89 | struct ceph_client *client; | 104 | struct ceph_client *client; |
@@ -92,20 +107,9 @@ struct rbd_client { | |||
92 | struct list_head node; | 107 | struct list_head node; |
93 | }; | 108 | }; |
94 | 109 | ||
95 | struct rbd_req_coll; | ||
96 | |||
97 | /* | 110 | /* |
98 | * a single io request | 111 | * a request completion status |
99 | */ | 112 | */ |
100 | struct rbd_request { | ||
101 | struct request *rq; /* blk layer request */ | ||
102 | struct bio *bio; /* cloned bio */ | ||
103 | struct page **pages; /* list of used pages */ | ||
104 | u64 len; | ||
105 | int coll_index; | ||
106 | struct rbd_req_coll *coll; | ||
107 | }; | ||
108 | |||
109 | struct rbd_req_status { | 113 | struct rbd_req_status { |
110 | int done; | 114 | int done; |
111 | int rc; | 115 | int rc; |
@@ -122,6 +126,18 @@ struct rbd_req_coll { | |||
122 | struct rbd_req_status status[0]; | 126 | struct rbd_req_status status[0]; |
123 | }; | 127 | }; |
124 | 128 | ||
129 | /* | ||
130 | * a single io request | ||
131 | */ | ||
132 | struct rbd_request { | ||
133 | struct request *rq; /* blk layer request */ | ||
134 | struct bio *bio; /* cloned bio */ | ||
135 | struct page **pages; /* list of used pages */ | ||
136 | u64 len; | ||
137 | int coll_index; | ||
138 | struct rbd_req_coll *coll; | ||
139 | }; | ||
140 | |||
125 | struct rbd_snap { | 141 | struct rbd_snap { |
126 | struct device dev; | 142 | struct device dev; |
127 | const char *name; | 143 | const char *name; |
@@ -140,7 +156,6 @@ struct rbd_device { | |||
140 | struct gendisk *disk; /* blkdev's gendisk and rq */ | 156 | struct gendisk *disk; /* blkdev's gendisk and rq */ |
141 | struct request_queue *q; | 157 | struct request_queue *q; |
142 | 158 | ||
143 | struct ceph_client *client; | ||
144 | struct rbd_client *rbd_client; | 159 | struct rbd_client *rbd_client; |
145 | 160 | ||
146 | char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ | 161 | char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ |
@@ -157,6 +172,8 @@ struct rbd_device { | |||
157 | struct ceph_osd_event *watch_event; | 172 | struct ceph_osd_event *watch_event; |
158 | struct ceph_osd_request *watch_request; | 173 | struct ceph_osd_request *watch_request; |
159 | 174 | ||
175 | /* protects updating the header */ | ||
176 | struct rw_semaphore header_rwsem; | ||
160 | char snap_name[RBD_MAX_SNAP_NAME_LEN]; | 177 | char snap_name[RBD_MAX_SNAP_NAME_LEN]; |
161 | u32 cur_snap; /* index+1 of current snapshot within snap context | 178 | u32 cur_snap; /* index+1 of current snapshot within snap context |
162 | 0 - for the head */ | 179 | 0 - for the head */ |
@@ -171,15 +188,13 @@ struct rbd_device { | |||
171 | struct device dev; | 188 | struct device dev; |
172 | }; | 189 | }; |
173 | 190 | ||
174 | static struct bus_type rbd_bus_type = { | ||
175 | .name = "rbd", | ||
176 | }; | ||
177 | |||
178 | static spinlock_t node_lock; /* protects client get/put */ | ||
179 | |||
180 | static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ | 191 | static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ |
192 | |||
181 | static LIST_HEAD(rbd_dev_list); /* devices */ | 193 | static LIST_HEAD(rbd_dev_list); /* devices */ |
182 | static LIST_HEAD(rbd_client_list); /* clients */ | 194 | static DEFINE_SPINLOCK(rbd_dev_list_lock); |
195 | |||
196 | static LIST_HEAD(rbd_client_list); /* clients */ | ||
197 | static DEFINE_SPINLOCK(rbd_client_list_lock); | ||
183 | 198 | ||
184 | static int __rbd_init_snaps_header(struct rbd_device *rbd_dev); | 199 | static int __rbd_init_snaps_header(struct rbd_device *rbd_dev); |
185 | static void rbd_dev_release(struct device *dev); | 200 | static void rbd_dev_release(struct device *dev); |
@@ -190,12 +205,32 @@ static ssize_t rbd_snap_add(struct device *dev, | |||
190 | static void __rbd_remove_snap_dev(struct rbd_device *rbd_dev, | 205 | static void __rbd_remove_snap_dev(struct rbd_device *rbd_dev, |
191 | struct rbd_snap *snap); | 206 | struct rbd_snap *snap); |
192 | 207 | ||
208 | static ssize_t rbd_add(struct bus_type *bus, const char *buf, | ||
209 | size_t count); | ||
210 | static ssize_t rbd_remove(struct bus_type *bus, const char *buf, | ||
211 | size_t count); | ||
212 | |||
213 | static struct bus_attribute rbd_bus_attrs[] = { | ||
214 | __ATTR(add, S_IWUSR, NULL, rbd_add), | ||
215 | __ATTR(remove, S_IWUSR, NULL, rbd_remove), | ||
216 | __ATTR_NULL | ||
217 | }; | ||
193 | 218 | ||
194 | static struct rbd_device *dev_to_rbd(struct device *dev) | 219 | static struct bus_type rbd_bus_type = { |
220 | .name = "rbd", | ||
221 | .bus_attrs = rbd_bus_attrs, | ||
222 | }; | ||
223 | |||
224 | static void rbd_root_dev_release(struct device *dev) | ||
195 | { | 225 | { |
196 | return container_of(dev, struct rbd_device, dev); | ||
197 | } | 226 | } |
198 | 227 | ||
228 | static struct device rbd_root_dev = { | ||
229 | .init_name = "rbd", | ||
230 | .release = rbd_root_dev_release, | ||
231 | }; | ||
232 | |||
233 | |||
199 | static struct device *rbd_get_dev(struct rbd_device *rbd_dev) | 234 | static struct device *rbd_get_dev(struct rbd_device *rbd_dev) |
200 | { | 235 | { |
201 | return get_device(&rbd_dev->dev); | 236 | return get_device(&rbd_dev->dev); |
@@ -210,8 +245,7 @@ static int __rbd_update_snaps(struct rbd_device *rbd_dev); | |||
210 | 245 | ||
211 | static int rbd_open(struct block_device *bdev, fmode_t mode) | 246 | static int rbd_open(struct block_device *bdev, fmode_t mode) |
212 | { | 247 | { |
213 | struct gendisk *disk = bdev->bd_disk; | 248 | struct rbd_device *rbd_dev = bdev->bd_disk->private_data; |
214 | struct rbd_device *rbd_dev = disk->private_data; | ||
215 | 249 | ||
216 | rbd_get_dev(rbd_dev); | 250 | rbd_get_dev(rbd_dev); |
217 | 251 | ||
@@ -256,9 +290,11 @@ static struct rbd_client *rbd_client_create(struct ceph_options *opt, | |||
256 | kref_init(&rbdc->kref); | 290 | kref_init(&rbdc->kref); |
257 | INIT_LIST_HEAD(&rbdc->node); | 291 | INIT_LIST_HEAD(&rbdc->node); |
258 | 292 | ||
293 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
294 | |||
259 | rbdc->client = ceph_create_client(opt, rbdc, 0, 0); | 295 | rbdc->client = ceph_create_client(opt, rbdc, 0, 0); |
260 | if (IS_ERR(rbdc->client)) | 296 | if (IS_ERR(rbdc->client)) |
261 | goto out_rbdc; | 297 | goto out_mutex; |
262 | opt = NULL; /* Now rbdc->client is responsible for opt */ | 298 | opt = NULL; /* Now rbdc->client is responsible for opt */ |
263 | 299 | ||
264 | ret = ceph_open_session(rbdc->client); | 300 | ret = ceph_open_session(rbdc->client); |
@@ -267,16 +303,19 @@ static struct rbd_client *rbd_client_create(struct ceph_options *opt, | |||
267 | 303 | ||
268 | rbdc->rbd_opts = rbd_opts; | 304 | rbdc->rbd_opts = rbd_opts; |
269 | 305 | ||
270 | spin_lock(&node_lock); | 306 | spin_lock(&rbd_client_list_lock); |
271 | list_add_tail(&rbdc->node, &rbd_client_list); | 307 | list_add_tail(&rbdc->node, &rbd_client_list); |
272 | spin_unlock(&node_lock); | 308 | spin_unlock(&rbd_client_list_lock); |
309 | |||
310 | mutex_unlock(&ctl_mutex); | ||
273 | 311 | ||
274 | dout("rbd_client_create created %p\n", rbdc); | 312 | dout("rbd_client_create created %p\n", rbdc); |
275 | return rbdc; | 313 | return rbdc; |
276 | 314 | ||
277 | out_err: | 315 | out_err: |
278 | ceph_destroy_client(rbdc->client); | 316 | ceph_destroy_client(rbdc->client); |
279 | out_rbdc: | 317 | out_mutex: |
318 | mutex_unlock(&ctl_mutex); | ||
280 | kfree(rbdc); | 319 | kfree(rbdc); |
281 | out_opt: | 320 | out_opt: |
282 | if (opt) | 321 | if (opt) |
@@ -324,7 +363,7 @@ static int parse_rbd_opts_token(char *c, void *private) | |||
324 | substring_t argstr[MAX_OPT_ARGS]; | 363 | substring_t argstr[MAX_OPT_ARGS]; |
325 | int token, intval, ret; | 364 | int token, intval, ret; |
326 | 365 | ||
327 | token = match_token((char *)c, rbdopt_tokens, argstr); | 366 | token = match_token(c, rbdopt_tokens, argstr); |
328 | if (token < 0) | 367 | if (token < 0) |
329 | return -EINVAL; | 368 | return -EINVAL; |
330 | 369 | ||
@@ -357,64 +396,61 @@ static int parse_rbd_opts_token(char *c, void *private) | |||
357 | * Get a ceph client with specific addr and configuration, if one does | 396 | * Get a ceph client with specific addr and configuration, if one does |
358 | * not exist create it. | 397 | * not exist create it. |
359 | */ | 398 | */ |
360 | static int rbd_get_client(struct rbd_device *rbd_dev, const char *mon_addr, | 399 | static struct rbd_client *rbd_get_client(const char *mon_addr, |
361 | char *options) | 400 | size_t mon_addr_len, |
401 | char *options) | ||
362 | { | 402 | { |
363 | struct rbd_client *rbdc; | 403 | struct rbd_client *rbdc; |
364 | struct ceph_options *opt; | 404 | struct ceph_options *opt; |
365 | int ret; | ||
366 | struct rbd_options *rbd_opts; | 405 | struct rbd_options *rbd_opts; |
367 | 406 | ||
368 | rbd_opts = kzalloc(sizeof(*rbd_opts), GFP_KERNEL); | 407 | rbd_opts = kzalloc(sizeof(*rbd_opts), GFP_KERNEL); |
369 | if (!rbd_opts) | 408 | if (!rbd_opts) |
370 | return -ENOMEM; | 409 | return ERR_PTR(-ENOMEM); |
371 | 410 | ||
372 | rbd_opts->notify_timeout = RBD_NOTIFY_TIMEOUT_DEFAULT; | 411 | rbd_opts->notify_timeout = RBD_NOTIFY_TIMEOUT_DEFAULT; |
373 | 412 | ||
374 | ret = ceph_parse_options(&opt, options, mon_addr, | 413 | opt = ceph_parse_options(options, mon_addr, |
375 | mon_addr + strlen(mon_addr), parse_rbd_opts_token, rbd_opts); | 414 | mon_addr + mon_addr_len, |
376 | if (ret < 0) | 415 | parse_rbd_opts_token, rbd_opts); |
377 | goto done_err; | 416 | if (IS_ERR(opt)) { |
417 | kfree(rbd_opts); | ||
418 | return ERR_CAST(opt); | ||
419 | } | ||
378 | 420 | ||
379 | spin_lock(&node_lock); | 421 | spin_lock(&rbd_client_list_lock); |
380 | rbdc = __rbd_client_find(opt); | 422 | rbdc = __rbd_client_find(opt); |
381 | if (rbdc) { | 423 | if (rbdc) { |
382 | ceph_destroy_options(opt); | ||
383 | |||
384 | /* using an existing client */ | 424 | /* using an existing client */ |
385 | kref_get(&rbdc->kref); | 425 | kref_get(&rbdc->kref); |
386 | rbd_dev->rbd_client = rbdc; | 426 | spin_unlock(&rbd_client_list_lock); |
387 | rbd_dev->client = rbdc->client; | 427 | |
388 | spin_unlock(&node_lock); | 428 | ceph_destroy_options(opt); |
389 | return 0; | 429 | kfree(rbd_opts); |
430 | |||
431 | return rbdc; | ||
390 | } | 432 | } |
391 | spin_unlock(&node_lock); | 433 | spin_unlock(&rbd_client_list_lock); |
392 | 434 | ||
393 | rbdc = rbd_client_create(opt, rbd_opts); | 435 | rbdc = rbd_client_create(opt, rbd_opts); |
394 | if (IS_ERR(rbdc)) { | ||
395 | ret = PTR_ERR(rbdc); | ||
396 | goto done_err; | ||
397 | } | ||
398 | 436 | ||
399 | rbd_dev->rbd_client = rbdc; | 437 | if (IS_ERR(rbdc)) |
400 | rbd_dev->client = rbdc->client; | 438 | kfree(rbd_opts); |
401 | return 0; | 439 | |
402 | done_err: | 440 | return rbdc; |
403 | kfree(rbd_opts); | ||
404 | return ret; | ||
405 | } | 441 | } |
406 | 442 | ||
407 | /* | 443 | /* |
408 | * Destroy ceph client | 444 | * Destroy ceph client |
445 | * | ||
446 | * Caller must hold rbd_client_list_lock. | ||
409 | */ | 447 | */ |
410 | static void rbd_client_release(struct kref *kref) | 448 | static void rbd_client_release(struct kref *kref) |
411 | { | 449 | { |
412 | struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref); | 450 | struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref); |
413 | 451 | ||
414 | dout("rbd_release_client %p\n", rbdc); | 452 | dout("rbd_release_client %p\n", rbdc); |
415 | spin_lock(&node_lock); | ||
416 | list_del(&rbdc->node); | 453 | list_del(&rbdc->node); |
417 | spin_unlock(&node_lock); | ||
418 | 454 | ||
419 | ceph_destroy_client(rbdc->client); | 455 | ceph_destroy_client(rbdc->client); |
420 | kfree(rbdc->rbd_opts); | 456 | kfree(rbdc->rbd_opts); |
@@ -427,9 +463,10 @@ static void rbd_client_release(struct kref *kref) | |||
427 | */ | 463 | */ |
428 | static void rbd_put_client(struct rbd_device *rbd_dev) | 464 | static void rbd_put_client(struct rbd_device *rbd_dev) |
429 | { | 465 | { |
466 | spin_lock(&rbd_client_list_lock); | ||
430 | kref_put(&rbd_dev->rbd_client->kref, rbd_client_release); | 467 | kref_put(&rbd_dev->rbd_client->kref, rbd_client_release); |
468 | spin_unlock(&rbd_client_list_lock); | ||
431 | rbd_dev->rbd_client = NULL; | 469 | rbd_dev->rbd_client = NULL; |
432 | rbd_dev->client = NULL; | ||
433 | } | 470 | } |
434 | 471 | ||
435 | /* | 472 | /* |
@@ -454,21 +491,19 @@ static int rbd_header_from_disk(struct rbd_image_header *header, | |||
454 | gfp_t gfp_flags) | 491 | gfp_t gfp_flags) |
455 | { | 492 | { |
456 | int i; | 493 | int i; |
457 | u32 snap_count = le32_to_cpu(ondisk->snap_count); | 494 | u32 snap_count; |
458 | int ret = -ENOMEM; | ||
459 | 495 | ||
460 | if (memcmp(ondisk, RBD_HEADER_TEXT, sizeof(RBD_HEADER_TEXT))) { | 496 | if (memcmp(ondisk, RBD_HEADER_TEXT, sizeof(RBD_HEADER_TEXT))) |
461 | return -ENXIO; | 497 | return -ENXIO; |
462 | } | ||
463 | 498 | ||
464 | init_rwsem(&header->snap_rwsem); | 499 | snap_count = le32_to_cpu(ondisk->snap_count); |
465 | header->snap_names_len = le64_to_cpu(ondisk->snap_names_len); | ||
466 | header->snapc = kmalloc(sizeof(struct ceph_snap_context) + | 500 | header->snapc = kmalloc(sizeof(struct ceph_snap_context) + |
467 | snap_count * | 501 | snap_count * sizeof (*ondisk), |
468 | sizeof(struct rbd_image_snap_ondisk), | ||
469 | gfp_flags); | 502 | gfp_flags); |
470 | if (!header->snapc) | 503 | if (!header->snapc) |
471 | return -ENOMEM; | 504 | return -ENOMEM; |
505 | |||
506 | header->snap_names_len = le64_to_cpu(ondisk->snap_names_len); | ||
472 | if (snap_count) { | 507 | if (snap_count) { |
473 | header->snap_names = kmalloc(header->snap_names_len, | 508 | header->snap_names = kmalloc(header->snap_names_len, |
474 | GFP_KERNEL); | 509 | GFP_KERNEL); |
@@ -495,8 +530,7 @@ static int rbd_header_from_disk(struct rbd_image_header *header, | |||
495 | header->snapc->num_snaps = snap_count; | 530 | header->snapc->num_snaps = snap_count; |
496 | header->total_snaps = snap_count; | 531 | header->total_snaps = snap_count; |
497 | 532 | ||
498 | if (snap_count && | 533 | if (snap_count && allocated_snaps == snap_count) { |
499 | allocated_snaps == snap_count) { | ||
500 | for (i = 0; i < snap_count; i++) { | 534 | for (i = 0; i < snap_count; i++) { |
501 | header->snapc->snaps[i] = | 535 | header->snapc->snaps[i] = |
502 | le64_to_cpu(ondisk->snaps[i].id); | 536 | le64_to_cpu(ondisk->snaps[i].id); |
@@ -515,7 +549,7 @@ err_names: | |||
515 | kfree(header->snap_names); | 549 | kfree(header->snap_names); |
516 | err_snapc: | 550 | err_snapc: |
517 | kfree(header->snapc); | 551 | kfree(header->snapc); |
518 | return ret; | 552 | return -ENOMEM; |
519 | } | 553 | } |
520 | 554 | ||
521 | static int snap_index(struct rbd_image_header *header, int snap_num) | 555 | static int snap_index(struct rbd_image_header *header, int snap_num) |
@@ -539,35 +573,34 @@ static int snap_by_name(struct rbd_image_header *header, const char *snap_name, | |||
539 | int i; | 573 | int i; |
540 | char *p = header->snap_names; | 574 | char *p = header->snap_names; |
541 | 575 | ||
542 | for (i = 0; i < header->total_snaps; i++, p += strlen(p) + 1) { | 576 | for (i = 0; i < header->total_snaps; i++) { |
543 | if (strcmp(snap_name, p) == 0) | 577 | if (!strcmp(snap_name, p)) { |
544 | break; | ||
545 | } | ||
546 | if (i == header->total_snaps) | ||
547 | return -ENOENT; | ||
548 | if (seq) | ||
549 | *seq = header->snapc->snaps[i]; | ||
550 | 578 | ||
551 | if (size) | 579 | /* Found it. Pass back its id and/or size */ |
552 | *size = header->snap_sizes[i]; | ||
553 | 580 | ||
554 | return i; | 581 | if (seq) |
582 | *seq = header->snapc->snaps[i]; | ||
583 | if (size) | ||
584 | *size = header->snap_sizes[i]; | ||
585 | return i; | ||
586 | } | ||
587 | p += strlen(p) + 1; /* Skip ahead to the next name */ | ||
588 | } | ||
589 | return -ENOENT; | ||
555 | } | 590 | } |
556 | 591 | ||
557 | static int rbd_header_set_snap(struct rbd_device *dev, | 592 | static int rbd_header_set_snap(struct rbd_device *dev, u64 *size) |
558 | const char *snap_name, | ||
559 | u64 *size) | ||
560 | { | 593 | { |
561 | struct rbd_image_header *header = &dev->header; | 594 | struct rbd_image_header *header = &dev->header; |
562 | struct ceph_snap_context *snapc = header->snapc; | 595 | struct ceph_snap_context *snapc = header->snapc; |
563 | int ret = -ENOENT; | 596 | int ret = -ENOENT; |
564 | 597 | ||
565 | down_write(&header->snap_rwsem); | 598 | BUILD_BUG_ON(sizeof (dev->snap_name) < sizeof (RBD_SNAP_HEAD_NAME)); |
566 | 599 | ||
567 | if (!snap_name || | 600 | down_write(&dev->header_rwsem); |
568 | !*snap_name || | 601 | |
569 | strcmp(snap_name, "-") == 0 || | 602 | if (!memcmp(dev->snap_name, RBD_SNAP_HEAD_NAME, |
570 | strcmp(snap_name, RBD_SNAP_HEAD_NAME) == 0) { | 603 | sizeof (RBD_SNAP_HEAD_NAME))) { |
571 | if (header->total_snaps) | 604 | if (header->total_snaps) |
572 | snapc->seq = header->snap_seq; | 605 | snapc->seq = header->snap_seq; |
573 | else | 606 | else |
@@ -577,7 +610,7 @@ static int rbd_header_set_snap(struct rbd_device *dev, | |||
577 | if (size) | 610 | if (size) |
578 | *size = header->image_size; | 611 | *size = header->image_size; |
579 | } else { | 612 | } else { |
580 | ret = snap_by_name(header, snap_name, &snapc->seq, size); | 613 | ret = snap_by_name(header, dev->snap_name, &snapc->seq, size); |
581 | if (ret < 0) | 614 | if (ret < 0) |
582 | goto done; | 615 | goto done; |
583 | 616 | ||
@@ -587,7 +620,7 @@ static int rbd_header_set_snap(struct rbd_device *dev, | |||
587 | 620 | ||
588 | ret = 0; | 621 | ret = 0; |
589 | done: | 622 | done: |
590 | up_write(&header->snap_rwsem); | 623 | up_write(&dev->header_rwsem); |
591 | return ret; | 624 | return ret; |
592 | } | 625 | } |
593 | 626 | ||
@@ -714,7 +747,7 @@ static struct bio *bio_chain_clone(struct bio **old, struct bio **next, | |||
714 | 747 | ||
715 | /* split the bio. We'll release it either in the next | 748 | /* split the bio. We'll release it either in the next |
716 | call, or it will have to be released outside */ | 749 | call, or it will have to be released outside */ |
717 | bp = bio_split(old_chain, (len - total) / 512ULL); | 750 | bp = bio_split(old_chain, (len - total) / SECTOR_SIZE); |
718 | if (!bp) | 751 | if (!bp) |
719 | goto err_out; | 752 | goto err_out; |
720 | 753 | ||
@@ -854,7 +887,7 @@ static int rbd_do_request(struct request *rq, | |||
854 | struct timespec mtime = CURRENT_TIME; | 887 | struct timespec mtime = CURRENT_TIME; |
855 | struct rbd_request *req_data; | 888 | struct rbd_request *req_data; |
856 | struct ceph_osd_request_head *reqhead; | 889 | struct ceph_osd_request_head *reqhead; |
857 | struct rbd_image_header *header = &dev->header; | 890 | struct ceph_osd_client *osdc; |
858 | 891 | ||
859 | req_data = kzalloc(sizeof(*req_data), GFP_NOIO); | 892 | req_data = kzalloc(sizeof(*req_data), GFP_NOIO); |
860 | if (!req_data) { | 893 | if (!req_data) { |
@@ -871,15 +904,13 @@ static int rbd_do_request(struct request *rq, | |||
871 | 904 | ||
872 | dout("rbd_do_request obj=%s ofs=%lld len=%lld\n", obj, len, ofs); | 905 | dout("rbd_do_request obj=%s ofs=%lld len=%lld\n", obj, len, ofs); |
873 | 906 | ||
874 | down_read(&header->snap_rwsem); | 907 | down_read(&dev->header_rwsem); |
875 | 908 | ||
876 | req = ceph_osdc_alloc_request(&dev->client->osdc, flags, | 909 | osdc = &dev->rbd_client->client->osdc; |
877 | snapc, | 910 | req = ceph_osdc_alloc_request(osdc, flags, snapc, ops, |
878 | ops, | 911 | false, GFP_NOIO, pages, bio); |
879 | false, | ||
880 | GFP_NOIO, pages, bio); | ||
881 | if (!req) { | 912 | if (!req) { |
882 | up_read(&header->snap_rwsem); | 913 | up_read(&dev->header_rwsem); |
883 | ret = -ENOMEM; | 914 | ret = -ENOMEM; |
884 | goto done_pages; | 915 | goto done_pages; |
885 | } | 916 | } |
@@ -906,27 +937,27 @@ static int rbd_do_request(struct request *rq, | |||
906 | layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); | 937 | layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); |
907 | layout->fl_pg_preferred = cpu_to_le32(-1); | 938 | layout->fl_pg_preferred = cpu_to_le32(-1); |
908 | layout->fl_pg_pool = cpu_to_le32(dev->poolid); | 939 | layout->fl_pg_pool = cpu_to_le32(dev->poolid); |
909 | ceph_calc_raw_layout(&dev->client->osdc, layout, snapid, | 940 | ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno, |
910 | ofs, &len, &bno, req, ops); | 941 | req, ops); |
911 | 942 | ||
912 | ceph_osdc_build_request(req, ofs, &len, | 943 | ceph_osdc_build_request(req, ofs, &len, |
913 | ops, | 944 | ops, |
914 | snapc, | 945 | snapc, |
915 | &mtime, | 946 | &mtime, |
916 | req->r_oid, req->r_oid_len); | 947 | req->r_oid, req->r_oid_len); |
917 | up_read(&header->snap_rwsem); | 948 | up_read(&dev->header_rwsem); |
918 | 949 | ||
919 | if (linger_req) { | 950 | if (linger_req) { |
920 | ceph_osdc_set_request_linger(&dev->client->osdc, req); | 951 | ceph_osdc_set_request_linger(osdc, req); |
921 | *linger_req = req; | 952 | *linger_req = req; |
922 | } | 953 | } |
923 | 954 | ||
924 | ret = ceph_osdc_start_request(&dev->client->osdc, req, false); | 955 | ret = ceph_osdc_start_request(osdc, req, false); |
925 | if (ret < 0) | 956 | if (ret < 0) |
926 | goto done_err; | 957 | goto done_err; |
927 | 958 | ||
928 | if (!rbd_cb) { | 959 | if (!rbd_cb) { |
929 | ret = ceph_osdc_wait_request(&dev->client->osdc, req); | 960 | ret = ceph_osdc_wait_request(osdc, req); |
930 | if (ver) | 961 | if (ver) |
931 | *ver = le64_to_cpu(req->r_reassert_version.version); | 962 | *ver = le64_to_cpu(req->r_reassert_version.version); |
932 | dout("reassert_ver=%lld\n", | 963 | dout("reassert_ver=%lld\n", |
@@ -1210,8 +1241,8 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) | |||
1210 | rc = __rbd_update_snaps(dev); | 1241 | rc = __rbd_update_snaps(dev); |
1211 | mutex_unlock(&ctl_mutex); | 1242 | mutex_unlock(&ctl_mutex); |
1212 | if (rc) | 1243 | if (rc) |
1213 | pr_warning(DRV_NAME "%d got notification but failed to update" | 1244 | pr_warning(RBD_DRV_NAME "%d got notification but failed to " |
1214 | " snaps: %d\n", dev->major, rc); | 1245 | " update snaps: %d\n", dev->major, rc); |
1215 | 1246 | ||
1216 | rbd_req_sync_notify_ack(dev, ver, notify_id, dev->obj_md_name); | 1247 | rbd_req_sync_notify_ack(dev, ver, notify_id, dev->obj_md_name); |
1217 | } | 1248 | } |
@@ -1224,7 +1255,7 @@ static int rbd_req_sync_watch(struct rbd_device *dev, | |||
1224 | u64 ver) | 1255 | u64 ver) |
1225 | { | 1256 | { |
1226 | struct ceph_osd_req_op *ops; | 1257 | struct ceph_osd_req_op *ops; |
1227 | struct ceph_osd_client *osdc = &dev->client->osdc; | 1258 | struct ceph_osd_client *osdc = &dev->rbd_client->client->osdc; |
1228 | 1259 | ||
1229 | int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_WATCH, 0); | 1260 | int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_WATCH, 0); |
1230 | if (ret < 0) | 1261 | if (ret < 0) |
@@ -1311,7 +1342,7 @@ static int rbd_req_sync_notify(struct rbd_device *dev, | |||
1311 | const char *obj) | 1342 | const char *obj) |
1312 | { | 1343 | { |
1313 | struct ceph_osd_req_op *ops; | 1344 | struct ceph_osd_req_op *ops; |
1314 | struct ceph_osd_client *osdc = &dev->client->osdc; | 1345 | struct ceph_osd_client *osdc = &dev->rbd_client->client->osdc; |
1315 | struct ceph_osd_event *event; | 1346 | struct ceph_osd_event *event; |
1316 | struct rbd_notify_info info; | 1347 | struct rbd_notify_info info; |
1317 | int payload_len = sizeof(u32) + sizeof(u32); | 1348 | int payload_len = sizeof(u32) + sizeof(u32); |
@@ -1418,9 +1449,7 @@ static void rbd_rq_fn(struct request_queue *q) | |||
1418 | struct request *rq; | 1449 | struct request *rq; |
1419 | struct bio_pair *bp = NULL; | 1450 | struct bio_pair *bp = NULL; |
1420 | 1451 | ||
1421 | rq = blk_fetch_request(q); | 1452 | while ((rq = blk_fetch_request(q))) { |
1422 | |||
1423 | while (1) { | ||
1424 | struct bio *bio; | 1453 | struct bio *bio; |
1425 | struct bio *rq_bio, *next_bio = NULL; | 1454 | struct bio *rq_bio, *next_bio = NULL; |
1426 | bool do_write; | 1455 | bool do_write; |
@@ -1438,32 +1467,32 @@ static void rbd_rq_fn(struct request_queue *q) | |||
1438 | /* filter out block requests we don't understand */ | 1467 | /* filter out block requests we don't understand */ |
1439 | if ((rq->cmd_type != REQ_TYPE_FS)) { | 1468 | if ((rq->cmd_type != REQ_TYPE_FS)) { |
1440 | __blk_end_request_all(rq, 0); | 1469 | __blk_end_request_all(rq, 0); |
1441 | goto next; | 1470 | continue; |
1442 | } | 1471 | } |
1443 | 1472 | ||
1444 | /* deduce our operation (read, write) */ | 1473 | /* deduce our operation (read, write) */ |
1445 | do_write = (rq_data_dir(rq) == WRITE); | 1474 | do_write = (rq_data_dir(rq) == WRITE); |
1446 | 1475 | ||
1447 | size = blk_rq_bytes(rq); | 1476 | size = blk_rq_bytes(rq); |
1448 | ofs = blk_rq_pos(rq) * 512ULL; | 1477 | ofs = blk_rq_pos(rq) * SECTOR_SIZE; |
1449 | rq_bio = rq->bio; | 1478 | rq_bio = rq->bio; |
1450 | if (do_write && rbd_dev->read_only) { | 1479 | if (do_write && rbd_dev->read_only) { |
1451 | __blk_end_request_all(rq, -EROFS); | 1480 | __blk_end_request_all(rq, -EROFS); |
1452 | goto next; | 1481 | continue; |
1453 | } | 1482 | } |
1454 | 1483 | ||
1455 | spin_unlock_irq(q->queue_lock); | 1484 | spin_unlock_irq(q->queue_lock); |
1456 | 1485 | ||
1457 | dout("%s 0x%x bytes at 0x%llx\n", | 1486 | dout("%s 0x%x bytes at 0x%llx\n", |
1458 | do_write ? "write" : "read", | 1487 | do_write ? "write" : "read", |
1459 | size, blk_rq_pos(rq) * 512ULL); | 1488 | size, blk_rq_pos(rq) * SECTOR_SIZE); |
1460 | 1489 | ||
1461 | num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size); | 1490 | num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size); |
1462 | coll = rbd_alloc_coll(num_segs); | 1491 | coll = rbd_alloc_coll(num_segs); |
1463 | if (!coll) { | 1492 | if (!coll) { |
1464 | spin_lock_irq(q->queue_lock); | 1493 | spin_lock_irq(q->queue_lock); |
1465 | __blk_end_request_all(rq, -ENOMEM); | 1494 | __blk_end_request_all(rq, -ENOMEM); |
1466 | goto next; | 1495 | continue; |
1467 | } | 1496 | } |
1468 | 1497 | ||
1469 | do { | 1498 | do { |
@@ -1509,8 +1538,6 @@ next_seg: | |||
1509 | if (bp) | 1538 | if (bp) |
1510 | bio_pair_release(bp); | 1539 | bio_pair_release(bp); |
1511 | spin_lock_irq(q->queue_lock); | 1540 | spin_lock_irq(q->queue_lock); |
1512 | next: | ||
1513 | rq = blk_fetch_request(q); | ||
1514 | } | 1541 | } |
1515 | } | 1542 | } |
1516 | 1543 | ||
@@ -1523,13 +1550,17 @@ static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd, | |||
1523 | struct bio_vec *bvec) | 1550 | struct bio_vec *bvec) |
1524 | { | 1551 | { |
1525 | struct rbd_device *rbd_dev = q->queuedata; | 1552 | struct rbd_device *rbd_dev = q->queuedata; |
1526 | unsigned int chunk_sectors = 1 << (rbd_dev->header.obj_order - 9); | 1553 | unsigned int chunk_sectors; |
1527 | sector_t sector = bmd->bi_sector + get_start_sect(bmd->bi_bdev); | 1554 | sector_t sector; |
1528 | unsigned int bio_sectors = bmd->bi_size >> 9; | 1555 | unsigned int bio_sectors; |
1529 | int max; | 1556 | int max; |
1530 | 1557 | ||
1558 | chunk_sectors = 1 << (rbd_dev->header.obj_order - SECTOR_SHIFT); | ||
1559 | sector = bmd->bi_sector + get_start_sect(bmd->bi_bdev); | ||
1560 | bio_sectors = bmd->bi_size >> SECTOR_SHIFT; | ||
1561 | |||
1531 | max = (chunk_sectors - ((sector & (chunk_sectors - 1)) | 1562 | max = (chunk_sectors - ((sector & (chunk_sectors - 1)) |
1532 | + bio_sectors)) << 9; | 1563 | + bio_sectors)) << SECTOR_SHIFT; |
1533 | if (max < 0) | 1564 | if (max < 0) |
1534 | max = 0; /* bio_add cannot handle a negative return */ | 1565 | max = 0; /* bio_add cannot handle a negative return */ |
1535 | if (max <= bvec->bv_len && bio_sectors == 0) | 1566 | if (max <= bvec->bv_len && bio_sectors == 0) |
@@ -1562,15 +1593,16 @@ static int rbd_read_header(struct rbd_device *rbd_dev, | |||
1562 | ssize_t rc; | 1593 | ssize_t rc; |
1563 | struct rbd_image_header_ondisk *dh; | 1594 | struct rbd_image_header_ondisk *dh; |
1564 | int snap_count = 0; | 1595 | int snap_count = 0; |
1565 | u64 snap_names_len = 0; | ||
1566 | u64 ver; | 1596 | u64 ver; |
1597 | size_t len; | ||
1567 | 1598 | ||
1599 | /* | ||
1600 | * First reads the fixed-size header to determine the number | ||
1601 | * of snapshots, then re-reads it, along with all snapshot | ||
1602 | * records as well as their stored names. | ||
1603 | */ | ||
1604 | len = sizeof (*dh); | ||
1568 | while (1) { | 1605 | while (1) { |
1569 | int len = sizeof(*dh) + | ||
1570 | snap_count * sizeof(struct rbd_image_snap_ondisk) + | ||
1571 | snap_names_len; | ||
1572 | |||
1573 | rc = -ENOMEM; | ||
1574 | dh = kmalloc(len, GFP_KERNEL); | 1606 | dh = kmalloc(len, GFP_KERNEL); |
1575 | if (!dh) | 1607 | if (!dh) |
1576 | return -ENOMEM; | 1608 | return -ENOMEM; |
@@ -1585,21 +1617,22 @@ static int rbd_read_header(struct rbd_device *rbd_dev, | |||
1585 | 1617 | ||
1586 | rc = rbd_header_from_disk(header, dh, snap_count, GFP_KERNEL); | 1618 | rc = rbd_header_from_disk(header, dh, snap_count, GFP_KERNEL); |
1587 | if (rc < 0) { | 1619 | if (rc < 0) { |
1588 | if (rc == -ENXIO) { | 1620 | if (rc == -ENXIO) |
1589 | pr_warning("unrecognized header format" | 1621 | pr_warning("unrecognized header format" |
1590 | " for image %s", rbd_dev->obj); | 1622 | " for image %s", rbd_dev->obj); |
1591 | } | ||
1592 | goto out_dh; | 1623 | goto out_dh; |
1593 | } | 1624 | } |
1594 | 1625 | ||
1595 | if (snap_count != header->total_snaps) { | 1626 | if (snap_count == header->total_snaps) |
1596 | snap_count = header->total_snaps; | 1627 | break; |
1597 | snap_names_len = header->snap_names_len; | 1628 | |
1598 | rbd_header_free(header); | 1629 | snap_count = header->total_snaps; |
1599 | kfree(dh); | 1630 | len = sizeof (*dh) + |
1600 | continue; | 1631 | snap_count * sizeof(struct rbd_image_snap_ondisk) + |
1601 | } | 1632 | header->snap_names_len; |
1602 | break; | 1633 | |
1634 | rbd_header_free(header); | ||
1635 | kfree(dh); | ||
1603 | } | 1636 | } |
1604 | header->obj_version = ver; | 1637 | header->obj_version = ver; |
1605 | 1638 | ||
@@ -1620,13 +1653,14 @@ static int rbd_header_add_snap(struct rbd_device *dev, | |||
1620 | int ret; | 1653 | int ret; |
1621 | void *data, *p, *e; | 1654 | void *data, *p, *e; |
1622 | u64 ver; | 1655 | u64 ver; |
1656 | struct ceph_mon_client *monc; | ||
1623 | 1657 | ||
1624 | /* we should create a snapshot only if we're pointing at the head */ | 1658 | /* we should create a snapshot only if we're pointing at the head */ |
1625 | if (dev->cur_snap) | 1659 | if (dev->cur_snap) |
1626 | return -EINVAL; | 1660 | return -EINVAL; |
1627 | 1661 | ||
1628 | ret = ceph_monc_create_snapid(&dev->client->monc, dev->poolid, | 1662 | monc = &dev->rbd_client->client->monc; |
1629 | &new_snapid); | 1663 | ret = ceph_monc_create_snapid(monc, dev->poolid, &new_snapid); |
1630 | dout("created snapid=%lld\n", new_snapid); | 1664 | dout("created snapid=%lld\n", new_snapid); |
1631 | if (ret < 0) | 1665 | if (ret < 0) |
1632 | return ret; | 1666 | return ret; |
@@ -1681,9 +1715,9 @@ static int __rbd_update_snaps(struct rbd_device *rbd_dev) | |||
1681 | return ret; | 1715 | return ret; |
1682 | 1716 | ||
1683 | /* resized? */ | 1717 | /* resized? */ |
1684 | set_capacity(rbd_dev->disk, h.image_size / 512ULL); | 1718 | set_capacity(rbd_dev->disk, h.image_size / SECTOR_SIZE); |
1685 | 1719 | ||
1686 | down_write(&rbd_dev->header.snap_rwsem); | 1720 | down_write(&rbd_dev->header_rwsem); |
1687 | 1721 | ||
1688 | snap_seq = rbd_dev->header.snapc->seq; | 1722 | snap_seq = rbd_dev->header.snapc->seq; |
1689 | if (rbd_dev->header.total_snaps && | 1723 | if (rbd_dev->header.total_snaps && |
@@ -1708,7 +1742,7 @@ static int __rbd_update_snaps(struct rbd_device *rbd_dev) | |||
1708 | 1742 | ||
1709 | ret = __rbd_init_snaps_header(rbd_dev); | 1743 | ret = __rbd_init_snaps_header(rbd_dev); |
1710 | 1744 | ||
1711 | up_write(&rbd_dev->header.snap_rwsem); | 1745 | up_write(&rbd_dev->header_rwsem); |
1712 | 1746 | ||
1713 | return ret; | 1747 | return ret; |
1714 | } | 1748 | } |
@@ -1718,6 +1752,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) | |||
1718 | struct gendisk *disk; | 1752 | struct gendisk *disk; |
1719 | struct request_queue *q; | 1753 | struct request_queue *q; |
1720 | int rc; | 1754 | int rc; |
1755 | u64 segment_size; | ||
1721 | u64 total_size = 0; | 1756 | u64 total_size = 0; |
1722 | 1757 | ||
1723 | /* contact OSD, request size info about the object being mapped */ | 1758 | /* contact OSD, request size info about the object being mapped */ |
@@ -1730,7 +1765,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) | |||
1730 | if (rc) | 1765 | if (rc) |
1731 | return rc; | 1766 | return rc; |
1732 | 1767 | ||
1733 | rc = rbd_header_set_snap(rbd_dev, rbd_dev->snap_name, &total_size); | 1768 | rc = rbd_header_set_snap(rbd_dev, &total_size); |
1734 | if (rc) | 1769 | if (rc) |
1735 | return rc; | 1770 | return rc; |
1736 | 1771 | ||
@@ -1740,7 +1775,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) | |||
1740 | if (!disk) | 1775 | if (!disk) |
1741 | goto out; | 1776 | goto out; |
1742 | 1777 | ||
1743 | snprintf(disk->disk_name, sizeof(disk->disk_name), DRV_NAME "%d", | 1778 | snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d", |
1744 | rbd_dev->id); | 1779 | rbd_dev->id); |
1745 | disk->major = rbd_dev->major; | 1780 | disk->major = rbd_dev->major; |
1746 | disk->first_minor = 0; | 1781 | disk->first_minor = 0; |
@@ -1753,11 +1788,15 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) | |||
1753 | if (!q) | 1788 | if (!q) |
1754 | goto out_disk; | 1789 | goto out_disk; |
1755 | 1790 | ||
1791 | /* We use the default size, but let's be explicit about it. */ | ||
1792 | blk_queue_physical_block_size(q, SECTOR_SIZE); | ||
1793 | |||
1756 | /* set io sizes to object size */ | 1794 | /* set io sizes to object size */ |
1757 | blk_queue_max_hw_sectors(q, rbd_obj_bytes(&rbd_dev->header) / 512ULL); | 1795 | segment_size = rbd_obj_bytes(&rbd_dev->header); |
1758 | blk_queue_max_segment_size(q, rbd_obj_bytes(&rbd_dev->header)); | 1796 | blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE); |
1759 | blk_queue_io_min(q, rbd_obj_bytes(&rbd_dev->header)); | 1797 | blk_queue_max_segment_size(q, segment_size); |
1760 | blk_queue_io_opt(q, rbd_obj_bytes(&rbd_dev->header)); | 1798 | blk_queue_io_min(q, segment_size); |
1799 | blk_queue_io_opt(q, segment_size); | ||
1761 | 1800 | ||
1762 | blk_queue_merge_bvec(q, rbd_merge_bvec); | 1801 | blk_queue_merge_bvec(q, rbd_merge_bvec); |
1763 | disk->queue = q; | 1802 | disk->queue = q; |
@@ -1768,7 +1807,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) | |||
1768 | rbd_dev->q = q; | 1807 | rbd_dev->q = q; |
1769 | 1808 | ||
1770 | /* finally, announce the disk to the world */ | 1809 | /* finally, announce the disk to the world */ |
1771 | set_capacity(disk, total_size / 512ULL); | 1810 | set_capacity(disk, total_size / SECTOR_SIZE); |
1772 | add_disk(disk); | 1811 | add_disk(disk); |
1773 | 1812 | ||
1774 | pr_info("%s: added with size 0x%llx\n", | 1813 | pr_info("%s: added with size 0x%llx\n", |
@@ -1785,10 +1824,15 @@ out: | |||
1785 | sysfs | 1824 | sysfs |
1786 | */ | 1825 | */ |
1787 | 1826 | ||
1827 | static struct rbd_device *dev_to_rbd_dev(struct device *dev) | ||
1828 | { | ||
1829 | return container_of(dev, struct rbd_device, dev); | ||
1830 | } | ||
1831 | |||
1788 | static ssize_t rbd_size_show(struct device *dev, | 1832 | static ssize_t rbd_size_show(struct device *dev, |
1789 | struct device_attribute *attr, char *buf) | 1833 | struct device_attribute *attr, char *buf) |
1790 | { | 1834 | { |
1791 | struct rbd_device *rbd_dev = dev_to_rbd(dev); | 1835 | struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); |
1792 | 1836 | ||
1793 | return sprintf(buf, "%llu\n", (unsigned long long)rbd_dev->header.image_size); | 1837 | return sprintf(buf, "%llu\n", (unsigned long long)rbd_dev->header.image_size); |
1794 | } | 1838 | } |
@@ -1796,7 +1840,7 @@ static ssize_t rbd_size_show(struct device *dev, | |||
1796 | static ssize_t rbd_major_show(struct device *dev, | 1840 | static ssize_t rbd_major_show(struct device *dev, |
1797 | struct device_attribute *attr, char *buf) | 1841 | struct device_attribute *attr, char *buf) |
1798 | { | 1842 | { |
1799 | struct rbd_device *rbd_dev = dev_to_rbd(dev); | 1843 | struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); |
1800 | 1844 | ||
1801 | return sprintf(buf, "%d\n", rbd_dev->major); | 1845 | return sprintf(buf, "%d\n", rbd_dev->major); |
1802 | } | 1846 | } |
@@ -1804,15 +1848,16 @@ static ssize_t rbd_major_show(struct device *dev, | |||
1804 | static ssize_t rbd_client_id_show(struct device *dev, | 1848 | static ssize_t rbd_client_id_show(struct device *dev, |
1805 | struct device_attribute *attr, char *buf) | 1849 | struct device_attribute *attr, char *buf) |
1806 | { | 1850 | { |
1807 | struct rbd_device *rbd_dev = dev_to_rbd(dev); | 1851 | struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); |
1808 | 1852 | ||
1809 | return sprintf(buf, "client%lld\n", ceph_client_id(rbd_dev->client)); | 1853 | return sprintf(buf, "client%lld\n", |
1854 | ceph_client_id(rbd_dev->rbd_client->client)); | ||
1810 | } | 1855 | } |
1811 | 1856 | ||
1812 | static ssize_t rbd_pool_show(struct device *dev, | 1857 | static ssize_t rbd_pool_show(struct device *dev, |
1813 | struct device_attribute *attr, char *buf) | 1858 | struct device_attribute *attr, char *buf) |
1814 | { | 1859 | { |
1815 | struct rbd_device *rbd_dev = dev_to_rbd(dev); | 1860 | struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); |
1816 | 1861 | ||
1817 | return sprintf(buf, "%s\n", rbd_dev->pool_name); | 1862 | return sprintf(buf, "%s\n", rbd_dev->pool_name); |
1818 | } | 1863 | } |
@@ -1820,7 +1865,7 @@ static ssize_t rbd_pool_show(struct device *dev, | |||
1820 | static ssize_t rbd_name_show(struct device *dev, | 1865 | static ssize_t rbd_name_show(struct device *dev, |
1821 | struct device_attribute *attr, char *buf) | 1866 | struct device_attribute *attr, char *buf) |
1822 | { | 1867 | { |
1823 | struct rbd_device *rbd_dev = dev_to_rbd(dev); | 1868 | struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); |
1824 | 1869 | ||
1825 | return sprintf(buf, "%s\n", rbd_dev->obj); | 1870 | return sprintf(buf, "%s\n", rbd_dev->obj); |
1826 | } | 1871 | } |
@@ -1829,7 +1874,7 @@ static ssize_t rbd_snap_show(struct device *dev, | |||
1829 | struct device_attribute *attr, | 1874 | struct device_attribute *attr, |
1830 | char *buf) | 1875 | char *buf) |
1831 | { | 1876 | { |
1832 | struct rbd_device *rbd_dev = dev_to_rbd(dev); | 1877 | struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); |
1833 | 1878 | ||
1834 | return sprintf(buf, "%s\n", rbd_dev->snap_name); | 1879 | return sprintf(buf, "%s\n", rbd_dev->snap_name); |
1835 | } | 1880 | } |
@@ -1839,7 +1884,7 @@ static ssize_t rbd_image_refresh(struct device *dev, | |||
1839 | const char *buf, | 1884 | const char *buf, |
1840 | size_t size) | 1885 | size_t size) |
1841 | { | 1886 | { |
1842 | struct rbd_device *rbd_dev = dev_to_rbd(dev); | 1887 | struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); |
1843 | int rc; | 1888 | int rc; |
1844 | int ret = size; | 1889 | int ret = size; |
1845 | 1890 | ||
@@ -1904,7 +1949,7 @@ static ssize_t rbd_snap_size_show(struct device *dev, | |||
1904 | { | 1949 | { |
1905 | struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); | 1950 | struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); |
1906 | 1951 | ||
1907 | return sprintf(buf, "%lld\n", (long long)snap->size); | 1952 | return sprintf(buf, "%zd\n", snap->size); |
1908 | } | 1953 | } |
1909 | 1954 | ||
1910 | static ssize_t rbd_snap_id_show(struct device *dev, | 1955 | static ssize_t rbd_snap_id_show(struct device *dev, |
@@ -1913,7 +1958,7 @@ static ssize_t rbd_snap_id_show(struct device *dev, | |||
1913 | { | 1958 | { |
1914 | struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); | 1959 | struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); |
1915 | 1960 | ||
1916 | return sprintf(buf, "%lld\n", (long long)snap->id); | 1961 | return sprintf(buf, "%llu\n", (unsigned long long) snap->id); |
1917 | } | 1962 | } |
1918 | 1963 | ||
1919 | static DEVICE_ATTR(snap_size, S_IRUGO, rbd_snap_size_show, NULL); | 1964 | static DEVICE_ATTR(snap_size, S_IRUGO, rbd_snap_size_show, NULL); |
@@ -2085,19 +2130,9 @@ static int __rbd_init_snaps_header(struct rbd_device *rbd_dev) | |||
2085 | return 0; | 2130 | return 0; |
2086 | } | 2131 | } |
2087 | 2132 | ||
2088 | |||
2089 | static void rbd_root_dev_release(struct device *dev) | ||
2090 | { | ||
2091 | } | ||
2092 | |||
2093 | static struct device rbd_root_dev = { | ||
2094 | .init_name = "rbd", | ||
2095 | .release = rbd_root_dev_release, | ||
2096 | }; | ||
2097 | |||
2098 | static int rbd_bus_add_dev(struct rbd_device *rbd_dev) | 2133 | static int rbd_bus_add_dev(struct rbd_device *rbd_dev) |
2099 | { | 2134 | { |
2100 | int ret = -ENOMEM; | 2135 | int ret; |
2101 | struct device *dev; | 2136 | struct device *dev; |
2102 | struct rbd_snap *snap; | 2137 | struct rbd_snap *snap; |
2103 | 2138 | ||
@@ -2111,7 +2146,7 @@ static int rbd_bus_add_dev(struct rbd_device *rbd_dev) | |||
2111 | dev_set_name(dev, "%d", rbd_dev->id); | 2146 | dev_set_name(dev, "%d", rbd_dev->id); |
2112 | ret = device_register(dev); | 2147 | ret = device_register(dev); |
2113 | if (ret < 0) | 2148 | if (ret < 0) |
2114 | goto done_free; | 2149 | goto out; |
2115 | 2150 | ||
2116 | list_for_each_entry(snap, &rbd_dev->snaps, node) { | 2151 | list_for_each_entry(snap, &rbd_dev->snaps, node) { |
2117 | ret = rbd_register_snap_dev(rbd_dev, snap, | 2152 | ret = rbd_register_snap_dev(rbd_dev, snap, |
@@ -2119,10 +2154,7 @@ static int rbd_bus_add_dev(struct rbd_device *rbd_dev) | |||
2119 | if (ret < 0) | 2154 | if (ret < 0) |
2120 | break; | 2155 | break; |
2121 | } | 2156 | } |
2122 | 2157 | out: | |
2123 | mutex_unlock(&ctl_mutex); | ||
2124 | return 0; | ||
2125 | done_free: | ||
2126 | mutex_unlock(&ctl_mutex); | 2158 | mutex_unlock(&ctl_mutex); |
2127 | return ret; | 2159 | return ret; |
2128 | } | 2160 | } |
@@ -2151,102 +2183,250 @@ static int rbd_init_watch_dev(struct rbd_device *rbd_dev) | |||
2151 | return ret; | 2183 | return ret; |
2152 | } | 2184 | } |
2153 | 2185 | ||
2186 | static atomic64_t rbd_id_max = ATOMIC64_INIT(0); | ||
2187 | |||
2188 | /* | ||
2189 | * Get a unique rbd identifier for the given new rbd_dev, and add | ||
2190 | * the rbd_dev to the global list. The minimum rbd id is 1. | ||
2191 | */ | ||
2192 | static void rbd_id_get(struct rbd_device *rbd_dev) | ||
2193 | { | ||
2194 | rbd_dev->id = atomic64_inc_return(&rbd_id_max); | ||
2195 | |||
2196 | spin_lock(&rbd_dev_list_lock); | ||
2197 | list_add_tail(&rbd_dev->node, &rbd_dev_list); | ||
2198 | spin_unlock(&rbd_dev_list_lock); | ||
2199 | } | ||
2200 | |||
2201 | /* | ||
2202 | * Remove an rbd_dev from the global list, and record that its | ||
2203 | * identifier is no longer in use. | ||
2204 | */ | ||
2205 | static void rbd_id_put(struct rbd_device *rbd_dev) | ||
2206 | { | ||
2207 | struct list_head *tmp; | ||
2208 | int rbd_id = rbd_dev->id; | ||
2209 | int max_id; | ||
2210 | |||
2211 | BUG_ON(rbd_id < 1); | ||
2212 | |||
2213 | spin_lock(&rbd_dev_list_lock); | ||
2214 | list_del_init(&rbd_dev->node); | ||
2215 | |||
2216 | /* | ||
2217 | * If the id being "put" is not the current maximum, there | ||
2218 | * is nothing special we need to do. | ||
2219 | */ | ||
2220 | if (rbd_id != atomic64_read(&rbd_id_max)) { | ||
2221 | spin_unlock(&rbd_dev_list_lock); | ||
2222 | return; | ||
2223 | } | ||
2224 | |||
2225 | /* | ||
2226 | * We need to update the current maximum id. Search the | ||
2227 | * list to find out what it is. We're more likely to find | ||
2228 | * the maximum at the end, so search the list backward. | ||
2229 | */ | ||
2230 | max_id = 0; | ||
2231 | list_for_each_prev(tmp, &rbd_dev_list) { | ||
2232 | struct rbd_device *rbd_dev; | ||
2233 | |||
2234 | rbd_dev = list_entry(tmp, struct rbd_device, node); | ||
2235 | if (rbd_id > max_id) | ||
2236 | max_id = rbd_id; | ||
2237 | } | ||
2238 | spin_unlock(&rbd_dev_list_lock); | ||
2239 | |||
2240 | /* | ||
2241 | * The max id could have been updated by rbd_id_get(), in | ||
2242 | * which case it now accurately reflects the new maximum. | ||
2243 | * Be careful not to overwrite the maximum value in that | ||
2244 | * case. | ||
2245 | */ | ||
2246 | atomic64_cmpxchg(&rbd_id_max, rbd_id, max_id); | ||
2247 | } | ||
2248 | |||
2249 | /* | ||
2250 | * Skips over white space at *buf, and updates *buf to point to the | ||
2251 | * first found non-space character (if any). Returns the length of | ||
2252 | * the token (string of non-white space characters) found. Note | ||
2253 | * that *buf must be terminated with '\0'. | ||
2254 | */ | ||
2255 | static inline size_t next_token(const char **buf) | ||
2256 | { | ||
2257 | /* | ||
2258 | * These are the characters that produce nonzero for | ||
2259 | * isspace() in the "C" and "POSIX" locales. | ||
2260 | */ | ||
2261 | const char *spaces = " \f\n\r\t\v"; | ||
2262 | |||
2263 | *buf += strspn(*buf, spaces); /* Find start of token */ | ||
2264 | |||
2265 | return strcspn(*buf, spaces); /* Return token length */ | ||
2266 | } | ||
2267 | |||
2268 | /* | ||
2269 | * Finds the next token in *buf, and if the provided token buffer is | ||
2270 | * big enough, copies the found token into it. The result, if | ||
2271 | * copied, is guaranteed to be terminated with '\0'. Note that *buf | ||
2272 | * must be terminated with '\0' on entry. | ||
2273 | * | ||
2274 | * Returns the length of the token found (not including the '\0'). | ||
2275 | * Return value will be 0 if no token is found, and it will be >= | ||
2276 | * token_size if the token would not fit. | ||
2277 | * | ||
2278 | * The *buf pointer will be updated to point beyond the end of the | ||
2279 | * found token. Note that this occurs even if the token buffer is | ||
2280 | * too small to hold it. | ||
2281 | */ | ||
2282 | static inline size_t copy_token(const char **buf, | ||
2283 | char *token, | ||
2284 | size_t token_size) | ||
2285 | { | ||
2286 | size_t len; | ||
2287 | |||
2288 | len = next_token(buf); | ||
2289 | if (len < token_size) { | ||
2290 | memcpy(token, *buf, len); | ||
2291 | *(token + len) = '\0'; | ||
2292 | } | ||
2293 | *buf += len; | ||
2294 | |||
2295 | return len; | ||
2296 | } | ||
2297 | |||
2298 | /* | ||
2299 | * This fills in the pool_name, obj, obj_len, snap_name, obj_len, | ||
2300 | * rbd_dev, rbd_md_name, and name fields of the given rbd_dev, based | ||
2301 | * on the list of monitor addresses and other options provided via | ||
2302 | * /sys/bus/rbd/add. | ||
2303 | */ | ||
2304 | static int rbd_add_parse_args(struct rbd_device *rbd_dev, | ||
2305 | const char *buf, | ||
2306 | const char **mon_addrs, | ||
2307 | size_t *mon_addrs_size, | ||
2308 | char *options, | ||
2309 | size_t options_size) | ||
2310 | { | ||
2311 | size_t len; | ||
2312 | |||
2313 | /* The first four tokens are required */ | ||
2314 | |||
2315 | len = next_token(&buf); | ||
2316 | if (!len) | ||
2317 | return -EINVAL; | ||
2318 | *mon_addrs_size = len + 1; | ||
2319 | *mon_addrs = buf; | ||
2320 | |||
2321 | buf += len; | ||
2322 | |||
2323 | len = copy_token(&buf, options, options_size); | ||
2324 | if (!len || len >= options_size) | ||
2325 | return -EINVAL; | ||
2326 | |||
2327 | len = copy_token(&buf, rbd_dev->pool_name, sizeof (rbd_dev->pool_name)); | ||
2328 | if (!len || len >= sizeof (rbd_dev->pool_name)) | ||
2329 | return -EINVAL; | ||
2330 | |||
2331 | len = copy_token(&buf, rbd_dev->obj, sizeof (rbd_dev->obj)); | ||
2332 | if (!len || len >= sizeof (rbd_dev->obj)) | ||
2333 | return -EINVAL; | ||
2334 | |||
2335 | /* We have the object length in hand, save it. */ | ||
2336 | |||
2337 | rbd_dev->obj_len = len; | ||
2338 | |||
2339 | BUILD_BUG_ON(RBD_MAX_MD_NAME_LEN | ||
2340 | < RBD_MAX_OBJ_NAME_LEN + sizeof (RBD_SUFFIX)); | ||
2341 | sprintf(rbd_dev->obj_md_name, "%s%s", rbd_dev->obj, RBD_SUFFIX); | ||
2342 | |||
2343 | /* | ||
2344 | * The snapshot name is optional, but it's an error if it's | ||
2345 | * too long. If no snapshot is supplied, fill in the default. | ||
2346 | */ | ||
2347 | len = copy_token(&buf, rbd_dev->snap_name, sizeof (rbd_dev->snap_name)); | ||
2348 | if (!len) | ||
2349 | memcpy(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME, | ||
2350 | sizeof (RBD_SNAP_HEAD_NAME)); | ||
2351 | else if (len >= sizeof (rbd_dev->snap_name)) | ||
2352 | return -EINVAL; | ||
2353 | |||
2354 | return 0; | ||
2355 | } | ||
2356 | |||
2154 | static ssize_t rbd_add(struct bus_type *bus, | 2357 | static ssize_t rbd_add(struct bus_type *bus, |
2155 | const char *buf, | 2358 | const char *buf, |
2156 | size_t count) | 2359 | size_t count) |
2157 | { | 2360 | { |
2158 | struct ceph_osd_client *osdc; | ||
2159 | struct rbd_device *rbd_dev; | 2361 | struct rbd_device *rbd_dev; |
2160 | ssize_t rc = -ENOMEM; | 2362 | const char *mon_addrs = NULL; |
2161 | int irc, new_id = 0; | 2363 | size_t mon_addrs_size = 0; |
2162 | struct list_head *tmp; | 2364 | char *options = NULL; |
2163 | char *mon_dev_name; | 2365 | struct ceph_osd_client *osdc; |
2164 | char *options; | 2366 | int rc = -ENOMEM; |
2165 | 2367 | ||
2166 | if (!try_module_get(THIS_MODULE)) | 2368 | if (!try_module_get(THIS_MODULE)) |
2167 | return -ENODEV; | 2369 | return -ENODEV; |
2168 | 2370 | ||
2169 | mon_dev_name = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL); | ||
2170 | if (!mon_dev_name) | ||
2171 | goto err_out_mod; | ||
2172 | |||
2173 | options = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL); | ||
2174 | if (!options) | ||
2175 | goto err_mon_dev; | ||
2176 | |||
2177 | /* new rbd_device object */ | ||
2178 | rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL); | 2371 | rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL); |
2179 | if (!rbd_dev) | 2372 | if (!rbd_dev) |
2180 | goto err_out_opt; | 2373 | goto err_nomem; |
2374 | options = kmalloc(count, GFP_KERNEL); | ||
2375 | if (!options) | ||
2376 | goto err_nomem; | ||
2181 | 2377 | ||
2182 | /* static rbd_device initialization */ | 2378 | /* static rbd_device initialization */ |
2183 | spin_lock_init(&rbd_dev->lock); | 2379 | spin_lock_init(&rbd_dev->lock); |
2184 | INIT_LIST_HEAD(&rbd_dev->node); | 2380 | INIT_LIST_HEAD(&rbd_dev->node); |
2185 | INIT_LIST_HEAD(&rbd_dev->snaps); | 2381 | INIT_LIST_HEAD(&rbd_dev->snaps); |
2382 | init_rwsem(&rbd_dev->header_rwsem); | ||
2186 | 2383 | ||
2187 | /* generate unique id: find highest unique id, add one */ | 2384 | init_rwsem(&rbd_dev->header_rwsem); |
2188 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
2189 | |||
2190 | list_for_each(tmp, &rbd_dev_list) { | ||
2191 | struct rbd_device *rbd_dev; | ||
2192 | 2385 | ||
2193 | rbd_dev = list_entry(tmp, struct rbd_device, node); | 2386 | /* generate unique id: find highest unique id, add one */ |
2194 | if (rbd_dev->id >= new_id) | 2387 | rbd_id_get(rbd_dev); |
2195 | new_id = rbd_dev->id + 1; | ||
2196 | } | ||
2197 | |||
2198 | rbd_dev->id = new_id; | ||
2199 | 2388 | ||
2200 | /* add to global list */ | 2389 | /* Fill in the device name, now that we have its id. */ |
2201 | list_add_tail(&rbd_dev->node, &rbd_dev_list); | 2390 | BUILD_BUG_ON(DEV_NAME_LEN |
2391 | < sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH); | ||
2392 | sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->id); | ||
2202 | 2393 | ||
2203 | /* parse add command */ | 2394 | /* parse add command */ |
2204 | if (sscanf(buf, "%" __stringify(RBD_MAX_OPT_LEN) "s " | 2395 | rc = rbd_add_parse_args(rbd_dev, buf, &mon_addrs, &mon_addrs_size, |
2205 | "%" __stringify(RBD_MAX_OPT_LEN) "s " | 2396 | options, count); |
2206 | "%" __stringify(RBD_MAX_POOL_NAME_LEN) "s " | 2397 | if (rc) |
2207 | "%" __stringify(RBD_MAX_OBJ_NAME_LEN) "s" | 2398 | goto err_put_id; |
2208 | "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s", | ||
2209 | mon_dev_name, options, rbd_dev->pool_name, | ||
2210 | rbd_dev->obj, rbd_dev->snap_name) < 4) { | ||
2211 | rc = -EINVAL; | ||
2212 | goto err_out_slot; | ||
2213 | } | ||
2214 | |||
2215 | if (rbd_dev->snap_name[0] == 0) | ||
2216 | rbd_dev->snap_name[0] = '-'; | ||
2217 | |||
2218 | rbd_dev->obj_len = strlen(rbd_dev->obj); | ||
2219 | snprintf(rbd_dev->obj_md_name, sizeof(rbd_dev->obj_md_name), "%s%s", | ||
2220 | rbd_dev->obj, RBD_SUFFIX); | ||
2221 | |||
2222 | /* initialize rest of new object */ | ||
2223 | snprintf(rbd_dev->name, DEV_NAME_LEN, DRV_NAME "%d", rbd_dev->id); | ||
2224 | rc = rbd_get_client(rbd_dev, mon_dev_name, options); | ||
2225 | if (rc < 0) | ||
2226 | goto err_out_slot; | ||
2227 | 2399 | ||
2228 | mutex_unlock(&ctl_mutex); | 2400 | rbd_dev->rbd_client = rbd_get_client(mon_addrs, mon_addrs_size - 1, |
2401 | options); | ||
2402 | if (IS_ERR(rbd_dev->rbd_client)) { | ||
2403 | rc = PTR_ERR(rbd_dev->rbd_client); | ||
2404 | goto err_put_id; | ||
2405 | } | ||
2229 | 2406 | ||
2230 | /* pick the pool */ | 2407 | /* pick the pool */ |
2231 | osdc = &rbd_dev->client->osdc; | 2408 | osdc = &rbd_dev->rbd_client->client->osdc; |
2232 | rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name); | 2409 | rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name); |
2233 | if (rc < 0) | 2410 | if (rc < 0) |
2234 | goto err_out_client; | 2411 | goto err_out_client; |
2235 | rbd_dev->poolid = rc; | 2412 | rbd_dev->poolid = rc; |
2236 | 2413 | ||
2237 | /* register our block device */ | 2414 | /* register our block device */ |
2238 | irc = register_blkdev(0, rbd_dev->name); | 2415 | rc = register_blkdev(0, rbd_dev->name); |
2239 | if (irc < 0) { | 2416 | if (rc < 0) |
2240 | rc = irc; | ||
2241 | goto err_out_client; | 2417 | goto err_out_client; |
2242 | } | 2418 | rbd_dev->major = rc; |
2243 | rbd_dev->major = irc; | ||
2244 | 2419 | ||
2245 | rc = rbd_bus_add_dev(rbd_dev); | 2420 | rc = rbd_bus_add_dev(rbd_dev); |
2246 | if (rc) | 2421 | if (rc) |
2247 | goto err_out_blkdev; | 2422 | goto err_out_blkdev; |
2248 | 2423 | ||
2249 | /* set up and announce blkdev mapping */ | 2424 | /* |
2425 | * At this point cleanup in the event of an error is the job | ||
2426 | * of the sysfs code (initiated by rbd_bus_del_dev()). | ||
2427 | * | ||
2428 | * Set up and announce blkdev mapping. | ||
2429 | */ | ||
2250 | rc = rbd_init_disk(rbd_dev); | 2430 | rc = rbd_init_disk(rbd_dev); |
2251 | if (rc) | 2431 | if (rc) |
2252 | goto err_out_bus; | 2432 | goto err_out_bus; |
@@ -2258,35 +2438,26 @@ static ssize_t rbd_add(struct bus_type *bus, | |||
2258 | return count; | 2438 | return count; |
2259 | 2439 | ||
2260 | err_out_bus: | 2440 | err_out_bus: |
2261 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
2262 | list_del_init(&rbd_dev->node); | ||
2263 | mutex_unlock(&ctl_mutex); | ||
2264 | |||
2265 | /* this will also clean up rest of rbd_dev stuff */ | 2441 | /* this will also clean up rest of rbd_dev stuff */ |
2266 | 2442 | ||
2267 | rbd_bus_del_dev(rbd_dev); | 2443 | rbd_bus_del_dev(rbd_dev); |
2268 | kfree(options); | 2444 | kfree(options); |
2269 | kfree(mon_dev_name); | ||
2270 | return rc; | 2445 | return rc; |
2271 | 2446 | ||
2272 | err_out_blkdev: | 2447 | err_out_blkdev: |
2273 | unregister_blkdev(rbd_dev->major, rbd_dev->name); | 2448 | unregister_blkdev(rbd_dev->major, rbd_dev->name); |
2274 | err_out_client: | 2449 | err_out_client: |
2275 | rbd_put_client(rbd_dev); | 2450 | rbd_put_client(rbd_dev); |
2276 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | 2451 | err_put_id: |
2277 | err_out_slot: | 2452 | rbd_id_put(rbd_dev); |
2278 | list_del_init(&rbd_dev->node); | 2453 | err_nomem: |
2279 | mutex_unlock(&ctl_mutex); | ||
2280 | |||
2281 | kfree(rbd_dev); | ||
2282 | err_out_opt: | ||
2283 | kfree(options); | 2454 | kfree(options); |
2284 | err_mon_dev: | 2455 | kfree(rbd_dev); |
2285 | kfree(mon_dev_name); | 2456 | |
2286 | err_out_mod: | ||
2287 | dout("Error adding device %s\n", buf); | 2457 | dout("Error adding device %s\n", buf); |
2288 | module_put(THIS_MODULE); | 2458 | module_put(THIS_MODULE); |
2289 | return rc; | 2459 | |
2460 | return (ssize_t) rc; | ||
2290 | } | 2461 | } |
2291 | 2462 | ||
2292 | static struct rbd_device *__rbd_get_dev(unsigned long id) | 2463 | static struct rbd_device *__rbd_get_dev(unsigned long id) |
@@ -2294,22 +2465,28 @@ static struct rbd_device *__rbd_get_dev(unsigned long id) | |||
2294 | struct list_head *tmp; | 2465 | struct list_head *tmp; |
2295 | struct rbd_device *rbd_dev; | 2466 | struct rbd_device *rbd_dev; |
2296 | 2467 | ||
2468 | spin_lock(&rbd_dev_list_lock); | ||
2297 | list_for_each(tmp, &rbd_dev_list) { | 2469 | list_for_each(tmp, &rbd_dev_list) { |
2298 | rbd_dev = list_entry(tmp, struct rbd_device, node); | 2470 | rbd_dev = list_entry(tmp, struct rbd_device, node); |
2299 | if (rbd_dev->id == id) | 2471 | if (rbd_dev->id == id) { |
2472 | spin_unlock(&rbd_dev_list_lock); | ||
2300 | return rbd_dev; | 2473 | return rbd_dev; |
2474 | } | ||
2301 | } | 2475 | } |
2476 | spin_unlock(&rbd_dev_list_lock); | ||
2302 | return NULL; | 2477 | return NULL; |
2303 | } | 2478 | } |
2304 | 2479 | ||
2305 | static void rbd_dev_release(struct device *dev) | 2480 | static void rbd_dev_release(struct device *dev) |
2306 | { | 2481 | { |
2307 | struct rbd_device *rbd_dev = | 2482 | struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); |
2308 | container_of(dev, struct rbd_device, dev); | ||
2309 | 2483 | ||
2310 | if (rbd_dev->watch_request) | 2484 | if (rbd_dev->watch_request) { |
2311 | ceph_osdc_unregister_linger_request(&rbd_dev->client->osdc, | 2485 | struct ceph_client *client = rbd_dev->rbd_client->client; |
2486 | |||
2487 | ceph_osdc_unregister_linger_request(&client->osdc, | ||
2312 | rbd_dev->watch_request); | 2488 | rbd_dev->watch_request); |
2489 | } | ||
2313 | if (rbd_dev->watch_event) | 2490 | if (rbd_dev->watch_event) |
2314 | rbd_req_sync_unwatch(rbd_dev, rbd_dev->obj_md_name); | 2491 | rbd_req_sync_unwatch(rbd_dev, rbd_dev->obj_md_name); |
2315 | 2492 | ||
@@ -2318,6 +2495,9 @@ static void rbd_dev_release(struct device *dev) | |||
2318 | /* clean up and free blkdev */ | 2495 | /* clean up and free blkdev */ |
2319 | rbd_free_disk(rbd_dev); | 2496 | rbd_free_disk(rbd_dev); |
2320 | unregister_blkdev(rbd_dev->major, rbd_dev->name); | 2497 | unregister_blkdev(rbd_dev->major, rbd_dev->name); |
2498 | |||
2499 | /* done with the id, and with the rbd_dev */ | ||
2500 | rbd_id_put(rbd_dev); | ||
2321 | kfree(rbd_dev); | 2501 | kfree(rbd_dev); |
2322 | 2502 | ||
2323 | /* release module ref */ | 2503 | /* release module ref */ |
@@ -2350,8 +2530,6 @@ static ssize_t rbd_remove(struct bus_type *bus, | |||
2350 | goto done; | 2530 | goto done; |
2351 | } | 2531 | } |
2352 | 2532 | ||
2353 | list_del_init(&rbd_dev->node); | ||
2354 | |||
2355 | __rbd_remove_all_snaps(rbd_dev); | 2533 | __rbd_remove_all_snaps(rbd_dev); |
2356 | rbd_bus_del_dev(rbd_dev); | 2534 | rbd_bus_del_dev(rbd_dev); |
2357 | 2535 | ||
@@ -2365,7 +2543,7 @@ static ssize_t rbd_snap_add(struct device *dev, | |||
2365 | const char *buf, | 2543 | const char *buf, |
2366 | size_t count) | 2544 | size_t count) |
2367 | { | 2545 | { |
2368 | struct rbd_device *rbd_dev = dev_to_rbd(dev); | 2546 | struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); |
2369 | int ret; | 2547 | int ret; |
2370 | char *name = kmalloc(count + 1, GFP_KERNEL); | 2548 | char *name = kmalloc(count + 1, GFP_KERNEL); |
2371 | if (!name) | 2549 | if (!name) |
@@ -2401,12 +2579,6 @@ err_unlock: | |||
2401 | return ret; | 2579 | return ret; |
2402 | } | 2580 | } |
2403 | 2581 | ||
2404 | static struct bus_attribute rbd_bus_attrs[] = { | ||
2405 | __ATTR(add, S_IWUSR, NULL, rbd_add), | ||
2406 | __ATTR(remove, S_IWUSR, NULL, rbd_remove), | ||
2407 | __ATTR_NULL | ||
2408 | }; | ||
2409 | |||
2410 | /* | 2582 | /* |
2411 | * create control files in sysfs | 2583 | * create control files in sysfs |
2412 | * /sys/bus/rbd/... | 2584 | * /sys/bus/rbd/... |
@@ -2415,21 +2587,21 @@ static int rbd_sysfs_init(void) | |||
2415 | { | 2587 | { |
2416 | int ret; | 2588 | int ret; |
2417 | 2589 | ||
2418 | rbd_bus_type.bus_attrs = rbd_bus_attrs; | 2590 | ret = device_register(&rbd_root_dev); |
2419 | 2591 | if (ret < 0) | |
2420 | ret = bus_register(&rbd_bus_type); | ||
2421 | if (ret < 0) | ||
2422 | return ret; | 2592 | return ret; |
2423 | 2593 | ||
2424 | ret = device_register(&rbd_root_dev); | 2594 | ret = bus_register(&rbd_bus_type); |
2595 | if (ret < 0) | ||
2596 | device_unregister(&rbd_root_dev); | ||
2425 | 2597 | ||
2426 | return ret; | 2598 | return ret; |
2427 | } | 2599 | } |
2428 | 2600 | ||
2429 | static void rbd_sysfs_cleanup(void) | 2601 | static void rbd_sysfs_cleanup(void) |
2430 | { | 2602 | { |
2431 | device_unregister(&rbd_root_dev); | ||
2432 | bus_unregister(&rbd_bus_type); | 2603 | bus_unregister(&rbd_bus_type); |
2604 | device_unregister(&rbd_root_dev); | ||
2433 | } | 2605 | } |
2434 | 2606 | ||
2435 | int __init rbd_init(void) | 2607 | int __init rbd_init(void) |
@@ -2439,8 +2611,7 @@ int __init rbd_init(void) | |||
2439 | rc = rbd_sysfs_init(); | 2611 | rc = rbd_sysfs_init(); |
2440 | if (rc) | 2612 | if (rc) |
2441 | return rc; | 2613 | return rc; |
2442 | spin_lock_init(&node_lock); | 2614 | pr_info("loaded " RBD_DRV_NAME_LONG "\n"); |
2443 | pr_info("loaded " DRV_NAME_LONG "\n"); | ||
2444 | return 0; | 2615 | return 0; |
2445 | } | 2616 | } |
2446 | 2617 | ||
diff --git a/drivers/block/rbd_types.h b/drivers/block/rbd_types.h index fc6c678aa2cb..950708688f17 100644 --- a/drivers/block/rbd_types.h +++ b/drivers/block/rbd_types.h | |||
@@ -41,10 +41,6 @@ | |||
41 | #define RBD_HEADER_SIGNATURE "RBD" | 41 | #define RBD_HEADER_SIGNATURE "RBD" |
42 | #define RBD_HEADER_VERSION "001.005" | 42 | #define RBD_HEADER_VERSION "001.005" |
43 | 43 | ||
44 | struct rbd_info { | ||
45 | __le64 max_id; | ||
46 | } __attribute__ ((packed)); | ||
47 | |||
48 | struct rbd_image_snap_ondisk { | 44 | struct rbd_image_snap_ondisk { |
49 | __le64 id; | 45 | __le64 id; |
50 | __le64 image_size; | 46 | __le64 image_size; |
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 48e8fee9f2d4..9dcf76a10bb6 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c | |||
@@ -839,10 +839,7 @@ static struct vio_driver vdc_port_driver = { | |||
839 | .id_table = vdc_port_match, | 839 | .id_table = vdc_port_match, |
840 | .probe = vdc_port_probe, | 840 | .probe = vdc_port_probe, |
841 | .remove = vdc_port_remove, | 841 | .remove = vdc_port_remove, |
842 | .driver = { | 842 | .name = "vdc_port", |
843 | .name = "vdc_port", | ||
844 | .owner = THIS_MODULE, | ||
845 | } | ||
846 | }; | 843 | }; |
847 | 844 | ||
848 | static int __init vdc_init(void) | 845 | static int __init vdc_init(void) |
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index b70f0fca9a42..3fb6ab4c8b4e 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c | |||
@@ -619,8 +619,10 @@ static int carm_array_info (struct carm_host *host, unsigned int array_idx) | |||
619 | host->state == HST_DEV_SCAN); | 619 | host->state == HST_DEV_SCAN); |
620 | spin_unlock_irq(&host->lock); | 620 | spin_unlock_irq(&host->lock); |
621 | 621 | ||
622 | DPRINTK("blk_insert_request, tag == %u\n", idx); | 622 | DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx); |
623 | blk_insert_request(host->oob_q, crq->rq, 1, crq); | 623 | crq->rq->cmd_type = REQ_TYPE_SPECIAL; |
624 | crq->rq->special = crq; | ||
625 | blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL); | ||
624 | 626 | ||
625 | return 0; | 627 | return 0; |
626 | 628 | ||
@@ -658,8 +660,10 @@ static int carm_send_special (struct carm_host *host, carm_sspc_t func) | |||
658 | BUG_ON(rc < 0); | 660 | BUG_ON(rc < 0); |
659 | crq->msg_bucket = (u32) rc; | 661 | crq->msg_bucket = (u32) rc; |
660 | 662 | ||
661 | DPRINTK("blk_insert_request, tag == %u\n", idx); | 663 | DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx); |
662 | blk_insert_request(host->oob_q, crq->rq, 1, crq); | 664 | crq->rq->cmd_type = REQ_TYPE_SPECIAL; |
665 | crq->rq->special = crq; | ||
666 | blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL); | ||
663 | 667 | ||
664 | return 0; | 668 | return 0; |
665 | } | 669 | } |
@@ -1116,7 +1120,7 @@ static inline void carm_handle_resp(struct carm_host *host, | |||
1116 | break; | 1120 | break; |
1117 | case MISC_GET_FW_VER: { | 1121 | case MISC_GET_FW_VER: { |
1118 | struct carm_fw_ver *ver = (struct carm_fw_ver *) | 1122 | struct carm_fw_ver *ver = (struct carm_fw_ver *) |
1119 | mem + sizeof(struct carm_msg_get_fw_ver); | 1123 | (mem + sizeof(struct carm_msg_get_fw_ver)); |
1120 | if (!error) { | 1124 | if (!error) { |
1121 | host->fw_ver = le32_to_cpu(ver->version); | 1125 | host->fw_ver = le32_to_cpu(ver->version); |
1122 | host->flags |= (ver->features & FL_FW_VER_MASK); | 1126 | host->flags |= (ver->features & FL_FW_VER_MASK); |
diff --git a/drivers/block/ub.c b/drivers/block/ub.c index 0e376d46bdd1..fcec0225ac76 100644 --- a/drivers/block/ub.c +++ b/drivers/block/ub.c | |||
@@ -119,43 +119,6 @@ | |||
119 | 119 | ||
120 | /* | 120 | /* |
121 | */ | 121 | */ |
122 | |||
123 | /* command block wrapper */ | ||
124 | struct bulk_cb_wrap { | ||
125 | __le32 Signature; /* contains 'USBC' */ | ||
126 | u32 Tag; /* unique per command id */ | ||
127 | __le32 DataTransferLength; /* size of data */ | ||
128 | u8 Flags; /* direction in bit 0 */ | ||
129 | u8 Lun; /* LUN */ | ||
130 | u8 Length; /* of of the CDB */ | ||
131 | u8 CDB[UB_MAX_CDB_SIZE]; /* max command */ | ||
132 | }; | ||
133 | |||
134 | #define US_BULK_CB_WRAP_LEN 31 | ||
135 | #define US_BULK_CB_SIGN 0x43425355 /*spells out USBC */ | ||
136 | #define US_BULK_FLAG_IN 1 | ||
137 | #define US_BULK_FLAG_OUT 0 | ||
138 | |||
139 | /* command status wrapper */ | ||
140 | struct bulk_cs_wrap { | ||
141 | __le32 Signature; /* should = 'USBS' */ | ||
142 | u32 Tag; /* same as original command */ | ||
143 | __le32 Residue; /* amount not transferred */ | ||
144 | u8 Status; /* see below */ | ||
145 | }; | ||
146 | |||
147 | #define US_BULK_CS_WRAP_LEN 13 | ||
148 | #define US_BULK_CS_SIGN 0x53425355 /* spells out 'USBS' */ | ||
149 | #define US_BULK_STAT_OK 0 | ||
150 | #define US_BULK_STAT_FAIL 1 | ||
151 | #define US_BULK_STAT_PHASE 2 | ||
152 | |||
153 | /* bulk-only class specific requests */ | ||
154 | #define US_BULK_RESET_REQUEST 0xff | ||
155 | #define US_BULK_GET_MAX_LUN 0xfe | ||
156 | |||
157 | /* | ||
158 | */ | ||
159 | struct ub_dev; | 122 | struct ub_dev; |
160 | 123 | ||
161 | #define UB_MAX_REQ_SG 9 /* cdrecord requires 32KB and maybe a header */ | 124 | #define UB_MAX_REQ_SG 9 /* cdrecord requires 32KB and maybe a header */ |
@@ -1744,12 +1707,11 @@ static int ub_bd_release(struct gendisk *disk, fmode_t mode) | |||
1744 | static int ub_bd_ioctl(struct block_device *bdev, fmode_t mode, | 1707 | static int ub_bd_ioctl(struct block_device *bdev, fmode_t mode, |
1745 | unsigned int cmd, unsigned long arg) | 1708 | unsigned int cmd, unsigned long arg) |
1746 | { | 1709 | { |
1747 | struct gendisk *disk = bdev->bd_disk; | ||
1748 | void __user *usermem = (void __user *) arg; | 1710 | void __user *usermem = (void __user *) arg; |
1749 | int ret; | 1711 | int ret; |
1750 | 1712 | ||
1751 | mutex_lock(&ub_mutex); | 1713 | mutex_lock(&ub_mutex); |
1752 | ret = scsi_cmd_ioctl(disk->queue, disk, mode, cmd, usermem); | 1714 | ret = scsi_cmd_blk_ioctl(bdev, mode, cmd, usermem); |
1753 | mutex_unlock(&ub_mutex); | 1715 | mutex_unlock(&ub_mutex); |
1754 | 1716 | ||
1755 | return ret; | 1717 | return ret; |
@@ -2478,6 +2440,8 @@ static int __init ub_init(void) | |||
2478 | int rc; | 2440 | int rc; |
2479 | int i; | 2441 | int i; |
2480 | 2442 | ||
2443 | pr_info("'Low Performance USB Block' driver is deprecated. " | ||
2444 | "Please switch to usb-storage\n"); | ||
2481 | for (i = 0; i < UB_QLOCK_NUM; i++) | 2445 | for (i = 0; i < UB_QLOCK_NUM; i++) |
2482 | spin_lock_init(&ub_qlockv[i]); | 2446 | spin_lock_init(&ub_qlockv[i]); |
2483 | 2447 | ||
diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c deleted file mode 100644 index 9a5b2a2d616d..000000000000 --- a/drivers/block/viodasd.c +++ /dev/null | |||
@@ -1,809 +0,0 @@ | |||
1 | /* -*- linux-c -*- | ||
2 | * viodasd.c | ||
3 | * Authors: Dave Boutcher <boutcher@us.ibm.com> | ||
4 | * Ryan Arnold <ryanarn@us.ibm.com> | ||
5 | * Colin Devilbiss <devilbis@us.ibm.com> | ||
6 | * Stephen Rothwell | ||
7 | * | ||
8 | * (C) Copyright 2000-2004 IBM Corporation | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public License as | ||
12 | * published by the Free Software Foundation; either version 2 of the | ||
13 | * License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
23 | * | ||
24 | * This routine provides access to disk space (termed "DASD" in historical | ||
25 | * IBM terms) owned and managed by an OS/400 partition running on the | ||
26 | * same box as this Linux partition. | ||
27 | * | ||
28 | * All disk operations are performed by sending messages back and forth to | ||
29 | * the OS/400 partition. | ||
30 | */ | ||
31 | |||
32 | #define pr_fmt(fmt) "viod: " fmt | ||
33 | |||
34 | #include <linux/major.h> | ||
35 | #include <linux/fs.h> | ||
36 | #include <linux/module.h> | ||
37 | #include <linux/kernel.h> | ||
38 | #include <linux/blkdev.h> | ||
39 | #include <linux/genhd.h> | ||
40 | #include <linux/hdreg.h> | ||
41 | #include <linux/errno.h> | ||
42 | #include <linux/init.h> | ||
43 | #include <linux/string.h> | ||
44 | #include <linux/mutex.h> | ||
45 | #include <linux/dma-mapping.h> | ||
46 | #include <linux/completion.h> | ||
47 | #include <linux/device.h> | ||
48 | #include <linux/scatterlist.h> | ||
49 | |||
50 | #include <asm/uaccess.h> | ||
51 | #include <asm/vio.h> | ||
52 | #include <asm/iseries/hv_types.h> | ||
53 | #include <asm/iseries/hv_lp_event.h> | ||
54 | #include <asm/iseries/hv_lp_config.h> | ||
55 | #include <asm/iseries/vio.h> | ||
56 | #include <asm/firmware.h> | ||
57 | |||
58 | MODULE_DESCRIPTION("iSeries Virtual DASD"); | ||
59 | MODULE_AUTHOR("Dave Boutcher"); | ||
60 | MODULE_LICENSE("GPL"); | ||
61 | |||
62 | /* | ||
63 | * We only support 7 partitions per physical disk....so with minor | ||
64 | * numbers 0-255 we get a maximum of 32 disks. | ||
65 | */ | ||
66 | #define VIOD_GENHD_NAME "iseries/vd" | ||
67 | |||
68 | #define VIOD_VERS "1.64" | ||
69 | |||
70 | enum { | ||
71 | PARTITION_SHIFT = 3, | ||
72 | MAX_DISKNO = HVMAXARCHITECTEDVIRTUALDISKS, | ||
73 | MAX_DISK_NAME = FIELD_SIZEOF(struct gendisk, disk_name) | ||
74 | }; | ||
75 | |||
76 | static DEFINE_MUTEX(viodasd_mutex); | ||
77 | static DEFINE_SPINLOCK(viodasd_spinlock); | ||
78 | |||
79 | #define VIOMAXREQ 16 | ||
80 | |||
81 | #define DEVICE_NO(cell) ((struct viodasd_device *)(cell) - &viodasd_devices[0]) | ||
82 | |||
83 | struct viodasd_waitevent { | ||
84 | struct completion com; | ||
85 | int rc; | ||
86 | u16 sub_result; | ||
87 | int max_disk; /* open */ | ||
88 | }; | ||
89 | |||
90 | static const struct vio_error_entry viodasd_err_table[] = { | ||
91 | { 0x0201, EINVAL, "Invalid Range" }, | ||
92 | { 0x0202, EINVAL, "Invalid Token" }, | ||
93 | { 0x0203, EIO, "DMA Error" }, | ||
94 | { 0x0204, EIO, "Use Error" }, | ||
95 | { 0x0205, EIO, "Release Error" }, | ||
96 | { 0x0206, EINVAL, "Invalid Disk" }, | ||
97 | { 0x0207, EBUSY, "Can't Lock" }, | ||
98 | { 0x0208, EIO, "Already Locked" }, | ||
99 | { 0x0209, EIO, "Already Unlocked" }, | ||
100 | { 0x020A, EIO, "Invalid Arg" }, | ||
101 | { 0x020B, EIO, "Bad IFS File" }, | ||
102 | { 0x020C, EROFS, "Read Only Device" }, | ||
103 | { 0x02FF, EIO, "Internal Error" }, | ||
104 | { 0x0000, 0, NULL }, | ||
105 | }; | ||
106 | |||
107 | /* | ||
108 | * Figure out the biggest I/O request (in sectors) we can accept | ||
109 | */ | ||
110 | #define VIODASD_MAXSECTORS (4096 / 512 * VIOMAXBLOCKDMA) | ||
111 | |||
112 | /* | ||
113 | * Number of disk I/O requests we've sent to OS/400 | ||
114 | */ | ||
115 | static int num_req_outstanding; | ||
116 | |||
117 | /* | ||
118 | * This is our internal structure for keeping track of disk devices | ||
119 | */ | ||
120 | struct viodasd_device { | ||
121 | u16 cylinders; | ||
122 | u16 tracks; | ||
123 | u16 sectors; | ||
124 | u16 bytes_per_sector; | ||
125 | u64 size; | ||
126 | int read_only; | ||
127 | spinlock_t q_lock; | ||
128 | struct gendisk *disk; | ||
129 | struct device *dev; | ||
130 | } viodasd_devices[MAX_DISKNO]; | ||
131 | |||
132 | /* | ||
133 | * External open entry point. | ||
134 | */ | ||
135 | static int viodasd_open(struct block_device *bdev, fmode_t mode) | ||
136 | { | ||
137 | struct viodasd_device *d = bdev->bd_disk->private_data; | ||
138 | HvLpEvent_Rc hvrc; | ||
139 | struct viodasd_waitevent we; | ||
140 | u16 flags = 0; | ||
141 | |||
142 | if (d->read_only) { | ||
143 | if (mode & FMODE_WRITE) | ||
144 | return -EROFS; | ||
145 | flags = vioblockflags_ro; | ||
146 | } | ||
147 | |||
148 | init_completion(&we.com); | ||
149 | |||
150 | /* Send the open event to OS/400 */ | ||
151 | hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, | ||
152 | HvLpEvent_Type_VirtualIo, | ||
153 | viomajorsubtype_blockio | vioblockopen, | ||
154 | HvLpEvent_AckInd_DoAck, HvLpEvent_AckType_ImmediateAck, | ||
155 | viopath_sourceinst(viopath_hostLp), | ||
156 | viopath_targetinst(viopath_hostLp), | ||
157 | (u64)(unsigned long)&we, VIOVERSION << 16, | ||
158 | ((u64)DEVICE_NO(d) << 48) | ((u64)flags << 32), | ||
159 | 0, 0, 0); | ||
160 | if (hvrc != 0) { | ||
161 | pr_warning("HV open failed %d\n", (int)hvrc); | ||
162 | return -EIO; | ||
163 | } | ||
164 | |||
165 | wait_for_completion(&we.com); | ||
166 | |||
167 | /* Check the return code */ | ||
168 | if (we.rc != 0) { | ||
169 | const struct vio_error_entry *err = | ||
170 | vio_lookup_rc(viodasd_err_table, we.sub_result); | ||
171 | |||
172 | pr_warning("bad rc opening disk: %d:0x%04x (%s)\n", | ||
173 | (int)we.rc, we.sub_result, err->msg); | ||
174 | return -EIO; | ||
175 | } | ||
176 | |||
177 | return 0; | ||
178 | } | ||
179 | |||
180 | static int viodasd_unlocked_open(struct block_device *bdev, fmode_t mode) | ||
181 | { | ||
182 | int ret; | ||
183 | |||
184 | mutex_lock(&viodasd_mutex); | ||
185 | ret = viodasd_open(bdev, mode); | ||
186 | mutex_unlock(&viodasd_mutex); | ||
187 | |||
188 | return ret; | ||
189 | } | ||
190 | |||
191 | |||
192 | /* | ||
193 | * External release entry point. | ||
194 | */ | ||
195 | static int viodasd_release(struct gendisk *disk, fmode_t mode) | ||
196 | { | ||
197 | struct viodasd_device *d = disk->private_data; | ||
198 | HvLpEvent_Rc hvrc; | ||
199 | |||
200 | mutex_lock(&viodasd_mutex); | ||
201 | /* Send the event to OS/400. We DON'T expect a response */ | ||
202 | hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, | ||
203 | HvLpEvent_Type_VirtualIo, | ||
204 | viomajorsubtype_blockio | vioblockclose, | ||
205 | HvLpEvent_AckInd_NoAck, HvLpEvent_AckType_ImmediateAck, | ||
206 | viopath_sourceinst(viopath_hostLp), | ||
207 | viopath_targetinst(viopath_hostLp), | ||
208 | 0, VIOVERSION << 16, | ||
209 | ((u64)DEVICE_NO(d) << 48) /* | ((u64)flags << 32) */, | ||
210 | 0, 0, 0); | ||
211 | if (hvrc != 0) | ||
212 | pr_warning("HV close call failed %d\n", (int)hvrc); | ||
213 | |||
214 | mutex_unlock(&viodasd_mutex); | ||
215 | |||
216 | return 0; | ||
217 | } | ||
218 | |||
219 | |||
220 | /* External ioctl entry point. | ||
221 | */ | ||
222 | static int viodasd_getgeo(struct block_device *bdev, struct hd_geometry *geo) | ||
223 | { | ||
224 | struct gendisk *disk = bdev->bd_disk; | ||
225 | struct viodasd_device *d = disk->private_data; | ||
226 | |||
227 | geo->sectors = d->sectors ? d->sectors : 32; | ||
228 | geo->heads = d->tracks ? d->tracks : 64; | ||
229 | geo->cylinders = d->cylinders ? d->cylinders : | ||
230 | get_capacity(disk) / (geo->sectors * geo->heads); | ||
231 | |||
232 | return 0; | ||
233 | } | ||
234 | |||
235 | /* | ||
236 | * Our file operations table | ||
237 | */ | ||
238 | static const struct block_device_operations viodasd_fops = { | ||
239 | .owner = THIS_MODULE, | ||
240 | .open = viodasd_unlocked_open, | ||
241 | .release = viodasd_release, | ||
242 | .getgeo = viodasd_getgeo, | ||
243 | }; | ||
244 | |||
245 | /* | ||
246 | * End a request | ||
247 | */ | ||
248 | static void viodasd_end_request(struct request *req, int error, | ||
249 | int num_sectors) | ||
250 | { | ||
251 | __blk_end_request(req, error, num_sectors << 9); | ||
252 | } | ||
253 | |||
254 | /* | ||
255 | * Send an actual I/O request to OS/400 | ||
256 | */ | ||
257 | static int send_request(struct request *req) | ||
258 | { | ||
259 | u64 start; | ||
260 | int direction; | ||
261 | int nsg; | ||
262 | u16 viocmd; | ||
263 | HvLpEvent_Rc hvrc; | ||
264 | struct vioblocklpevent *bevent; | ||
265 | struct HvLpEvent *hev; | ||
266 | struct scatterlist sg[VIOMAXBLOCKDMA]; | ||
267 | int sgindex; | ||
268 | struct viodasd_device *d; | ||
269 | unsigned long flags; | ||
270 | |||
271 | start = (u64)blk_rq_pos(req) << 9; | ||
272 | |||
273 | if (rq_data_dir(req) == READ) { | ||
274 | direction = DMA_FROM_DEVICE; | ||
275 | viocmd = viomajorsubtype_blockio | vioblockread; | ||
276 | } else { | ||
277 | direction = DMA_TO_DEVICE; | ||
278 | viocmd = viomajorsubtype_blockio | vioblockwrite; | ||
279 | } | ||
280 | |||
281 | d = req->rq_disk->private_data; | ||
282 | |||
283 | /* Now build the scatter-gather list */ | ||
284 | sg_init_table(sg, VIOMAXBLOCKDMA); | ||
285 | nsg = blk_rq_map_sg(req->q, req, sg); | ||
286 | nsg = dma_map_sg(d->dev, sg, nsg, direction); | ||
287 | |||
288 | spin_lock_irqsave(&viodasd_spinlock, flags); | ||
289 | num_req_outstanding++; | ||
290 | |||
291 | /* This optimization handles a single DMA block */ | ||
292 | if (nsg == 1) | ||
293 | hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, | ||
294 | HvLpEvent_Type_VirtualIo, viocmd, | ||
295 | HvLpEvent_AckInd_DoAck, | ||
296 | HvLpEvent_AckType_ImmediateAck, | ||
297 | viopath_sourceinst(viopath_hostLp), | ||
298 | viopath_targetinst(viopath_hostLp), | ||
299 | (u64)(unsigned long)req, VIOVERSION << 16, | ||
300 | ((u64)DEVICE_NO(d) << 48), start, | ||
301 | ((u64)sg_dma_address(&sg[0])) << 32, | ||
302 | sg_dma_len(&sg[0])); | ||
303 | else { | ||
304 | bevent = (struct vioblocklpevent *) | ||
305 | vio_get_event_buffer(viomajorsubtype_blockio); | ||
306 | if (bevent == NULL) { | ||
307 | pr_warning("error allocating disk event buffer\n"); | ||
308 | goto error_ret; | ||
309 | } | ||
310 | |||
311 | /* | ||
312 | * Now build up the actual request. Note that we store | ||
313 | * the pointer to the request in the correlation | ||
314 | * token so we can match the response up later | ||
315 | */ | ||
316 | memset(bevent, 0, sizeof(struct vioblocklpevent)); | ||
317 | hev = &bevent->event; | ||
318 | hev->flags = HV_LP_EVENT_VALID | HV_LP_EVENT_DO_ACK | | ||
319 | HV_LP_EVENT_INT; | ||
320 | hev->xType = HvLpEvent_Type_VirtualIo; | ||
321 | hev->xSubtype = viocmd; | ||
322 | hev->xSourceLp = HvLpConfig_getLpIndex(); | ||
323 | hev->xTargetLp = viopath_hostLp; | ||
324 | hev->xSizeMinus1 = | ||
325 | offsetof(struct vioblocklpevent, u.rw_data.dma_info) + | ||
326 | (sizeof(bevent->u.rw_data.dma_info[0]) * nsg) - 1; | ||
327 | hev->xSourceInstanceId = viopath_sourceinst(viopath_hostLp); | ||
328 | hev->xTargetInstanceId = viopath_targetinst(viopath_hostLp); | ||
329 | hev->xCorrelationToken = (u64)req; | ||
330 | bevent->version = VIOVERSION; | ||
331 | bevent->disk = DEVICE_NO(d); | ||
332 | bevent->u.rw_data.offset = start; | ||
333 | |||
334 | /* | ||
335 | * Copy just the dma information from the sg list | ||
336 | * into the request | ||
337 | */ | ||
338 | for (sgindex = 0; sgindex < nsg; sgindex++) { | ||
339 | bevent->u.rw_data.dma_info[sgindex].token = | ||
340 | sg_dma_address(&sg[sgindex]); | ||
341 | bevent->u.rw_data.dma_info[sgindex].len = | ||
342 | sg_dma_len(&sg[sgindex]); | ||
343 | } | ||
344 | |||
345 | /* Send the request */ | ||
346 | hvrc = HvCallEvent_signalLpEvent(&bevent->event); | ||
347 | vio_free_event_buffer(viomajorsubtype_blockio, bevent); | ||
348 | } | ||
349 | |||
350 | if (hvrc != HvLpEvent_Rc_Good) { | ||
351 | pr_warning("error sending disk event to OS/400 (rc %d)\n", | ||
352 | (int)hvrc); | ||
353 | goto error_ret; | ||
354 | } | ||
355 | spin_unlock_irqrestore(&viodasd_spinlock, flags); | ||
356 | return 0; | ||
357 | |||
358 | error_ret: | ||
359 | num_req_outstanding--; | ||
360 | spin_unlock_irqrestore(&viodasd_spinlock, flags); | ||
361 | dma_unmap_sg(d->dev, sg, nsg, direction); | ||
362 | return -1; | ||
363 | } | ||
364 | |||
365 | /* | ||
366 | * This is the external request processing routine | ||
367 | */ | ||
368 | static void do_viodasd_request(struct request_queue *q) | ||
369 | { | ||
370 | struct request *req; | ||
371 | |||
372 | /* | ||
373 | * If we already have the maximum number of requests | ||
374 | * outstanding to OS/400 just bail out. We'll come | ||
375 | * back later. | ||
376 | */ | ||
377 | while (num_req_outstanding < VIOMAXREQ) { | ||
378 | req = blk_fetch_request(q); | ||
379 | if (req == NULL) | ||
380 | return; | ||
381 | /* check that request contains a valid command */ | ||
382 | if (req->cmd_type != REQ_TYPE_FS) { | ||
383 | viodasd_end_request(req, -EIO, blk_rq_sectors(req)); | ||
384 | continue; | ||
385 | } | ||
386 | /* Try sending the request */ | ||
387 | if (send_request(req) != 0) | ||
388 | viodasd_end_request(req, -EIO, blk_rq_sectors(req)); | ||
389 | } | ||
390 | } | ||
391 | |||
392 | /* | ||
393 | * Probe a single disk and fill in the viodasd_device structure | ||
394 | * for it. | ||
395 | */ | ||
396 | static int probe_disk(struct viodasd_device *d) | ||
397 | { | ||
398 | HvLpEvent_Rc hvrc; | ||
399 | struct viodasd_waitevent we; | ||
400 | int dev_no = DEVICE_NO(d); | ||
401 | struct gendisk *g; | ||
402 | struct request_queue *q; | ||
403 | u16 flags = 0; | ||
404 | |||
405 | retry: | ||
406 | init_completion(&we.com); | ||
407 | |||
408 | /* Send the open event to OS/400 */ | ||
409 | hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, | ||
410 | HvLpEvent_Type_VirtualIo, | ||
411 | viomajorsubtype_blockio | vioblockopen, | ||
412 | HvLpEvent_AckInd_DoAck, HvLpEvent_AckType_ImmediateAck, | ||
413 | viopath_sourceinst(viopath_hostLp), | ||
414 | viopath_targetinst(viopath_hostLp), | ||
415 | (u64)(unsigned long)&we, VIOVERSION << 16, | ||
416 | ((u64)dev_no << 48) | ((u64)flags<< 32), | ||
417 | 0, 0, 0); | ||
418 | if (hvrc != 0) { | ||
419 | pr_warning("bad rc on HV open %d\n", (int)hvrc); | ||
420 | return 0; | ||
421 | } | ||
422 | |||
423 | wait_for_completion(&we.com); | ||
424 | |||
425 | if (we.rc != 0) { | ||
426 | if (flags != 0) | ||
427 | return 0; | ||
428 | /* try again with read only flag set */ | ||
429 | flags = vioblockflags_ro; | ||
430 | goto retry; | ||
431 | } | ||
432 | if (we.max_disk > (MAX_DISKNO - 1)) { | ||
433 | printk_once(KERN_INFO pr_fmt("Only examining the first %d of %d disks connected\n"), | ||
434 | MAX_DISKNO, we.max_disk + 1); | ||
435 | } | ||
436 | |||
437 | /* Send the close event to OS/400. We DON'T expect a response */ | ||
438 | hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, | ||
439 | HvLpEvent_Type_VirtualIo, | ||
440 | viomajorsubtype_blockio | vioblockclose, | ||
441 | HvLpEvent_AckInd_NoAck, HvLpEvent_AckType_ImmediateAck, | ||
442 | viopath_sourceinst(viopath_hostLp), | ||
443 | viopath_targetinst(viopath_hostLp), | ||
444 | 0, VIOVERSION << 16, | ||
445 | ((u64)dev_no << 48) | ((u64)flags << 32), | ||
446 | 0, 0, 0); | ||
447 | if (hvrc != 0) { | ||
448 | pr_warning("bad rc sending event to OS/400 %d\n", (int)hvrc); | ||
449 | return 0; | ||
450 | } | ||
451 | |||
452 | if (d->dev == NULL) { | ||
453 | /* this is when we reprobe for new disks */ | ||
454 | if (vio_create_viodasd(dev_no) == NULL) { | ||
455 | pr_warning("cannot allocate virtual device for disk %d\n", | ||
456 | dev_no); | ||
457 | return 0; | ||
458 | } | ||
459 | /* | ||
460 | * The vio_create_viodasd will have recursed into this | ||
461 | * routine with d->dev set to the new vio device and | ||
462 | * will finish the setup of the disk below. | ||
463 | */ | ||
464 | return 1; | ||
465 | } | ||
466 | |||
467 | /* create the request queue for the disk */ | ||
468 | spin_lock_init(&d->q_lock); | ||
469 | q = blk_init_queue(do_viodasd_request, &d->q_lock); | ||
470 | if (q == NULL) { | ||
471 | pr_warning("cannot allocate queue for disk %d\n", dev_no); | ||
472 | return 0; | ||
473 | } | ||
474 | g = alloc_disk(1 << PARTITION_SHIFT); | ||
475 | if (g == NULL) { | ||
476 | pr_warning("cannot allocate disk structure for disk %d\n", | ||
477 | dev_no); | ||
478 | blk_cleanup_queue(q); | ||
479 | return 0; | ||
480 | } | ||
481 | |||
482 | d->disk = g; | ||
483 | blk_queue_max_segments(q, VIOMAXBLOCKDMA); | ||
484 | blk_queue_max_hw_sectors(q, VIODASD_MAXSECTORS); | ||
485 | g->major = VIODASD_MAJOR; | ||
486 | g->first_minor = dev_no << PARTITION_SHIFT; | ||
487 | if (dev_no >= 26) | ||
488 | snprintf(g->disk_name, sizeof(g->disk_name), | ||
489 | VIOD_GENHD_NAME "%c%c", | ||
490 | 'a' + (dev_no / 26) - 1, 'a' + (dev_no % 26)); | ||
491 | else | ||
492 | snprintf(g->disk_name, sizeof(g->disk_name), | ||
493 | VIOD_GENHD_NAME "%c", 'a' + (dev_no % 26)); | ||
494 | g->fops = &viodasd_fops; | ||
495 | g->queue = q; | ||
496 | g->private_data = d; | ||
497 | g->driverfs_dev = d->dev; | ||
498 | set_capacity(g, d->size >> 9); | ||
499 | |||
500 | pr_info("disk %d: %lu sectors (%lu MB) CHS=%d/%d/%d sector size %d%s\n", | ||
501 | dev_no, (unsigned long)(d->size >> 9), | ||
502 | (unsigned long)(d->size >> 20), | ||
503 | (int)d->cylinders, (int)d->tracks, | ||
504 | (int)d->sectors, (int)d->bytes_per_sector, | ||
505 | d->read_only ? " (RO)" : ""); | ||
506 | |||
507 | /* register us in the global list */ | ||
508 | add_disk(g); | ||
509 | return 1; | ||
510 | } | ||
511 | |||
512 | /* returns the total number of scatterlist elements converted */ | ||
513 | static int block_event_to_scatterlist(const struct vioblocklpevent *bevent, | ||
514 | struct scatterlist *sg, int *total_len) | ||
515 | { | ||
516 | int i, numsg; | ||
517 | const struct rw_data *rw_data = &bevent->u.rw_data; | ||
518 | static const int offset = | ||
519 | offsetof(struct vioblocklpevent, u.rw_data.dma_info); | ||
520 | static const int element_size = sizeof(rw_data->dma_info[0]); | ||
521 | |||
522 | numsg = ((bevent->event.xSizeMinus1 + 1) - offset) / element_size; | ||
523 | if (numsg > VIOMAXBLOCKDMA) | ||
524 | numsg = VIOMAXBLOCKDMA; | ||
525 | |||
526 | *total_len = 0; | ||
527 | sg_init_table(sg, VIOMAXBLOCKDMA); | ||
528 | for (i = 0; (i < numsg) && (rw_data->dma_info[i].len > 0); ++i) { | ||
529 | sg_dma_address(&sg[i]) = rw_data->dma_info[i].token; | ||
530 | sg_dma_len(&sg[i]) = rw_data->dma_info[i].len; | ||
531 | *total_len += rw_data->dma_info[i].len; | ||
532 | } | ||
533 | return i; | ||
534 | } | ||
535 | |||
536 | /* | ||
537 | * Restart all queues, starting with the one _after_ the disk given, | ||
538 | * thus reducing the chance of starvation of higher numbered disks. | ||
539 | */ | ||
540 | static void viodasd_restart_all_queues_starting_from(int first_index) | ||
541 | { | ||
542 | int i; | ||
543 | |||
544 | for (i = first_index + 1; i < MAX_DISKNO; ++i) | ||
545 | if (viodasd_devices[i].disk) | ||
546 | blk_run_queue(viodasd_devices[i].disk->queue); | ||
547 | for (i = 0; i <= first_index; ++i) | ||
548 | if (viodasd_devices[i].disk) | ||
549 | blk_run_queue(viodasd_devices[i].disk->queue); | ||
550 | } | ||
551 | |||
552 | /* | ||
553 | * For read and write requests, decrement the number of outstanding requests, | ||
554 | * Free the DMA buffers we allocated. | ||
555 | */ | ||
556 | static int viodasd_handle_read_write(struct vioblocklpevent *bevent) | ||
557 | { | ||
558 | int num_sg, num_sect, pci_direction, total_len; | ||
559 | struct request *req; | ||
560 | struct scatterlist sg[VIOMAXBLOCKDMA]; | ||
561 | struct HvLpEvent *event = &bevent->event; | ||
562 | unsigned long irq_flags; | ||
563 | struct viodasd_device *d; | ||
564 | int error; | ||
565 | spinlock_t *qlock; | ||
566 | |||
567 | num_sg = block_event_to_scatterlist(bevent, sg, &total_len); | ||
568 | num_sect = total_len >> 9; | ||
569 | if (event->xSubtype == (viomajorsubtype_blockio | vioblockread)) | ||
570 | pci_direction = DMA_FROM_DEVICE; | ||
571 | else | ||
572 | pci_direction = DMA_TO_DEVICE; | ||
573 | req = (struct request *)bevent->event.xCorrelationToken; | ||
574 | d = req->rq_disk->private_data; | ||
575 | |||
576 | dma_unmap_sg(d->dev, sg, num_sg, pci_direction); | ||
577 | |||
578 | /* | ||
579 | * Since this is running in interrupt mode, we need to make sure | ||
580 | * we're not stepping on any global I/O operations | ||
581 | */ | ||
582 | spin_lock_irqsave(&viodasd_spinlock, irq_flags); | ||
583 | num_req_outstanding--; | ||
584 | spin_unlock_irqrestore(&viodasd_spinlock, irq_flags); | ||
585 | |||
586 | error = (event->xRc == HvLpEvent_Rc_Good) ? 0 : -EIO; | ||
587 | if (error) { | ||
588 | const struct vio_error_entry *err; | ||
589 | err = vio_lookup_rc(viodasd_err_table, bevent->sub_result); | ||
590 | pr_warning("read/write error %d:0x%04x (%s)\n", | ||
591 | event->xRc, bevent->sub_result, err->msg); | ||
592 | num_sect = blk_rq_sectors(req); | ||
593 | } | ||
594 | qlock = req->q->queue_lock; | ||
595 | spin_lock_irqsave(qlock, irq_flags); | ||
596 | viodasd_end_request(req, error, num_sect); | ||
597 | spin_unlock_irqrestore(qlock, irq_flags); | ||
598 | |||
599 | /* Finally, try to get more requests off of this device's queue */ | ||
600 | viodasd_restart_all_queues_starting_from(DEVICE_NO(d)); | ||
601 | |||
602 | return 0; | ||
603 | } | ||
604 | |||
605 | /* This routine handles incoming block LP events */ | ||
606 | static void handle_block_event(struct HvLpEvent *event) | ||
607 | { | ||
608 | struct vioblocklpevent *bevent = (struct vioblocklpevent *)event; | ||
609 | struct viodasd_waitevent *pwe; | ||
610 | |||
611 | if (event == NULL) | ||
612 | /* Notification that a partition went away! */ | ||
613 | return; | ||
614 | /* First, we should NEVER get an int here...only acks */ | ||
615 | if (hvlpevent_is_int(event)) { | ||
616 | pr_warning("Yikes! got an int in viodasd event handler!\n"); | ||
617 | if (hvlpevent_need_ack(event)) { | ||
618 | event->xRc = HvLpEvent_Rc_InvalidSubtype; | ||
619 | HvCallEvent_ackLpEvent(event); | ||
620 | } | ||
621 | } | ||
622 | |||
623 | switch (event->xSubtype & VIOMINOR_SUBTYPE_MASK) { | ||
624 | case vioblockopen: | ||
625 | /* | ||
626 | * Handle a response to an open request. We get all the | ||
627 | * disk information in the response, so update it. The | ||
628 | * correlation token contains a pointer to a waitevent | ||
629 | * structure that has a completion in it. update the | ||
630 | * return code in the waitevent structure and post the | ||
631 | * completion to wake up the guy who sent the request | ||
632 | */ | ||
633 | pwe = (struct viodasd_waitevent *)event->xCorrelationToken; | ||
634 | pwe->rc = event->xRc; | ||
635 | pwe->sub_result = bevent->sub_result; | ||
636 | if (event->xRc == HvLpEvent_Rc_Good) { | ||
637 | const struct open_data *data = &bevent->u.open_data; | ||
638 | struct viodasd_device *device = | ||
639 | &viodasd_devices[bevent->disk]; | ||
640 | device->read_only = | ||
641 | bevent->flags & vioblockflags_ro; | ||
642 | device->size = data->disk_size; | ||
643 | device->cylinders = data->cylinders; | ||
644 | device->tracks = data->tracks; | ||
645 | device->sectors = data->sectors; | ||
646 | device->bytes_per_sector = data->bytes_per_sector; | ||
647 | pwe->max_disk = data->max_disk; | ||
648 | } | ||
649 | complete(&pwe->com); | ||
650 | break; | ||
651 | case vioblockclose: | ||
652 | break; | ||
653 | case vioblockread: | ||
654 | case vioblockwrite: | ||
655 | viodasd_handle_read_write(bevent); | ||
656 | break; | ||
657 | |||
658 | default: | ||
659 | pr_warning("invalid subtype!"); | ||
660 | if (hvlpevent_need_ack(event)) { | ||
661 | event->xRc = HvLpEvent_Rc_InvalidSubtype; | ||
662 | HvCallEvent_ackLpEvent(event); | ||
663 | } | ||
664 | } | ||
665 | } | ||
666 | |||
667 | /* | ||
668 | * Get the driver to reprobe for more disks. | ||
669 | */ | ||
670 | static ssize_t probe_disks(struct device_driver *drv, const char *buf, | ||
671 | size_t count) | ||
672 | { | ||
673 | struct viodasd_device *d; | ||
674 | |||
675 | for (d = viodasd_devices; d < &viodasd_devices[MAX_DISKNO]; d++) { | ||
676 | if (d->disk == NULL) | ||
677 | probe_disk(d); | ||
678 | } | ||
679 | return count; | ||
680 | } | ||
681 | static DRIVER_ATTR(probe, S_IWUSR, NULL, probe_disks); | ||
682 | |||
683 | static int viodasd_probe(struct vio_dev *vdev, const struct vio_device_id *id) | ||
684 | { | ||
685 | struct viodasd_device *d = &viodasd_devices[vdev->unit_address]; | ||
686 | |||
687 | d->dev = &vdev->dev; | ||
688 | if (!probe_disk(d)) | ||
689 | return -ENODEV; | ||
690 | return 0; | ||
691 | } | ||
692 | |||
693 | static int viodasd_remove(struct vio_dev *vdev) | ||
694 | { | ||
695 | struct viodasd_device *d; | ||
696 | |||
697 | d = &viodasd_devices[vdev->unit_address]; | ||
698 | if (d->disk) { | ||
699 | del_gendisk(d->disk); | ||
700 | blk_cleanup_queue(d->disk->queue); | ||
701 | put_disk(d->disk); | ||
702 | d->disk = NULL; | ||
703 | } | ||
704 | d->dev = NULL; | ||
705 | return 0; | ||
706 | } | ||
707 | |||
708 | /** | ||
709 | * viodasd_device_table: Used by vio.c to match devices that we | ||
710 | * support. | ||
711 | */ | ||
712 | static struct vio_device_id viodasd_device_table[] __devinitdata = { | ||
713 | { "block", "IBM,iSeries-viodasd" }, | ||
714 | { "", "" } | ||
715 | }; | ||
716 | MODULE_DEVICE_TABLE(vio, viodasd_device_table); | ||
717 | |||
718 | static struct vio_driver viodasd_driver = { | ||
719 | .id_table = viodasd_device_table, | ||
720 | .probe = viodasd_probe, | ||
721 | .remove = viodasd_remove, | ||
722 | .driver = { | ||
723 | .name = "viodasd", | ||
724 | .owner = THIS_MODULE, | ||
725 | } | ||
726 | }; | ||
727 | |||
728 | static int need_delete_probe; | ||
729 | |||
730 | /* | ||
731 | * Initialize the whole device driver. Handle module and non-module | ||
732 | * versions | ||
733 | */ | ||
734 | static int __init viodasd_init(void) | ||
735 | { | ||
736 | int rc; | ||
737 | |||
738 | if (!firmware_has_feature(FW_FEATURE_ISERIES)) { | ||
739 | rc = -ENODEV; | ||
740 | goto early_fail; | ||
741 | } | ||
742 | |||
743 | /* Try to open to our host lp */ | ||
744 | if (viopath_hostLp == HvLpIndexInvalid) | ||
745 | vio_set_hostlp(); | ||
746 | |||
747 | if (viopath_hostLp == HvLpIndexInvalid) { | ||
748 | pr_warning("invalid hosting partition\n"); | ||
749 | rc = -EIO; | ||
750 | goto early_fail; | ||
751 | } | ||
752 | |||
753 | pr_info("vers " VIOD_VERS ", hosting partition %d\n", viopath_hostLp); | ||
754 | |||
755 | /* register the block device */ | ||
756 | rc = register_blkdev(VIODASD_MAJOR, VIOD_GENHD_NAME); | ||
757 | if (rc) { | ||
758 | pr_warning("Unable to get major number %d for %s\n", | ||
759 | VIODASD_MAJOR, VIOD_GENHD_NAME); | ||
760 | goto early_fail; | ||
761 | } | ||
762 | /* Actually open the path to the hosting partition */ | ||
763 | rc = viopath_open(viopath_hostLp, viomajorsubtype_blockio, | ||
764 | VIOMAXREQ + 2); | ||
765 | if (rc) { | ||
766 | pr_warning("error opening path to host partition %d\n", | ||
767 | viopath_hostLp); | ||
768 | goto unregister_blk; | ||
769 | } | ||
770 | |||
771 | /* Initialize our request handler */ | ||
772 | vio_setHandler(viomajorsubtype_blockio, handle_block_event); | ||
773 | |||
774 | rc = vio_register_driver(&viodasd_driver); | ||
775 | if (rc) { | ||
776 | pr_warning("vio_register_driver failed\n"); | ||
777 | goto unset_handler; | ||
778 | } | ||
779 | |||
780 | /* | ||
781 | * If this call fails, it just means that we cannot dynamically | ||
782 | * add virtual disks, but the driver will still work fine for | ||
783 | * all existing disk, so ignore the failure. | ||
784 | */ | ||
785 | if (!driver_create_file(&viodasd_driver.driver, &driver_attr_probe)) | ||
786 | need_delete_probe = 1; | ||
787 | |||
788 | return 0; | ||
789 | |||
790 | unset_handler: | ||
791 | vio_clearHandler(viomajorsubtype_blockio); | ||
792 | viopath_close(viopath_hostLp, viomajorsubtype_blockio, VIOMAXREQ + 2); | ||
793 | unregister_blk: | ||
794 | unregister_blkdev(VIODASD_MAJOR, VIOD_GENHD_NAME); | ||
795 | early_fail: | ||
796 | return rc; | ||
797 | } | ||
798 | module_init(viodasd_init); | ||
799 | |||
800 | void __exit viodasd_exit(void) | ||
801 | { | ||
802 | if (need_delete_probe) | ||
803 | driver_remove_file(&viodasd_driver.driver, &driver_attr_probe); | ||
804 | vio_unregister_driver(&viodasd_driver); | ||
805 | vio_clearHandler(viomajorsubtype_blockio); | ||
806 | viopath_close(viopath_hostLp, viomajorsubtype_blockio, VIOMAXREQ + 2); | ||
807 | unregister_blkdev(VIODASD_MAJOR, VIOD_GENHD_NAME); | ||
808 | } | ||
809 | module_exit(viodasd_exit); | ||
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 4d0b70adf5f7..c4a60badf252 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <linux/blkdev.h> | 4 | #include <linux/blkdev.h> |
5 | #include <linux/hdreg.h> | 5 | #include <linux/hdreg.h> |
6 | #include <linux/module.h> | 6 | #include <linux/module.h> |
7 | #include <linux/mutex.h> | ||
7 | #include <linux/virtio.h> | 8 | #include <linux/virtio.h> |
8 | #include <linux/virtio_blk.h> | 9 | #include <linux/virtio_blk.h> |
9 | #include <linux/scatterlist.h> | 10 | #include <linux/scatterlist.h> |
@@ -36,6 +37,12 @@ struct virtio_blk | |||
36 | /* Process context for config space updates */ | 37 | /* Process context for config space updates */ |
37 | struct work_struct config_work; | 38 | struct work_struct config_work; |
38 | 39 | ||
40 | /* Lock for config space updates */ | ||
41 | struct mutex config_lock; | ||
42 | |||
43 | /* enable config space updates */ | ||
44 | bool config_enable; | ||
45 | |||
39 | /* What host tells us, plus 2 for header & tailer. */ | 46 | /* What host tells us, plus 2 for header & tailer. */ |
40 | unsigned int sg_elems; | 47 | unsigned int sg_elems; |
41 | 48 | ||
@@ -172,7 +179,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, | |||
172 | } | 179 | } |
173 | } | 180 | } |
174 | 181 | ||
175 | if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr) < 0) { | 182 | if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr, GFP_ATOMIC)<0) { |
176 | mempool_free(vbr, vblk->pool); | 183 | mempool_free(vbr, vblk->pool); |
177 | return false; | 184 | return false; |
178 | } | 185 | } |
@@ -243,8 +250,8 @@ static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, | |||
243 | if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI)) | 250 | if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI)) |
244 | return -ENOTTY; | 251 | return -ENOTTY; |
245 | 252 | ||
246 | return scsi_cmd_ioctl(disk->queue, disk, mode, cmd, | 253 | return scsi_cmd_blk_ioctl(bdev, mode, cmd, |
247 | (void __user *)data); | 254 | (void __user *)data); |
248 | } | 255 | } |
249 | 256 | ||
250 | /* We provide getgeo only to please some old bootloader/partitioning tools */ | 257 | /* We provide getgeo only to please some old bootloader/partitioning tools */ |
@@ -318,6 +325,10 @@ static void virtblk_config_changed_work(struct work_struct *work) | |||
318 | char cap_str_2[10], cap_str_10[10]; | 325 | char cap_str_2[10], cap_str_10[10]; |
319 | u64 capacity, size; | 326 | u64 capacity, size; |
320 | 327 | ||
328 | mutex_lock(&vblk->config_lock); | ||
329 | if (!vblk->config_enable) | ||
330 | goto done; | ||
331 | |||
321 | /* Host must always specify the capacity. */ | 332 | /* Host must always specify the capacity. */ |
322 | vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity), | 333 | vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity), |
323 | &capacity, sizeof(capacity)); | 334 | &capacity, sizeof(capacity)); |
@@ -340,6 +351,8 @@ static void virtblk_config_changed_work(struct work_struct *work) | |||
340 | cap_str_10, cap_str_2); | 351 | cap_str_10, cap_str_2); |
341 | 352 | ||
342 | set_capacity(vblk->disk, capacity); | 353 | set_capacity(vblk->disk, capacity); |
354 | done: | ||
355 | mutex_unlock(&vblk->config_lock); | ||
343 | } | 356 | } |
344 | 357 | ||
345 | static void virtblk_config_changed(struct virtio_device *vdev) | 358 | static void virtblk_config_changed(struct virtio_device *vdev) |
@@ -349,6 +362,18 @@ static void virtblk_config_changed(struct virtio_device *vdev) | |||
349 | queue_work(virtblk_wq, &vblk->config_work); | 362 | queue_work(virtblk_wq, &vblk->config_work); |
350 | } | 363 | } |
351 | 364 | ||
365 | static int init_vq(struct virtio_blk *vblk) | ||
366 | { | ||
367 | int err = 0; | ||
368 | |||
369 | /* We expect one virtqueue, for output. */ | ||
370 | vblk->vq = virtio_find_single_vq(vblk->vdev, blk_done, "requests"); | ||
371 | if (IS_ERR(vblk->vq)) | ||
372 | err = PTR_ERR(vblk->vq); | ||
373 | |||
374 | return err; | ||
375 | } | ||
376 | |||
352 | static int __devinit virtblk_probe(struct virtio_device *vdev) | 377 | static int __devinit virtblk_probe(struct virtio_device *vdev) |
353 | { | 378 | { |
354 | struct virtio_blk *vblk; | 379 | struct virtio_blk *vblk; |
@@ -388,14 +413,13 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) | |||
388 | vblk->vdev = vdev; | 413 | vblk->vdev = vdev; |
389 | vblk->sg_elems = sg_elems; | 414 | vblk->sg_elems = sg_elems; |
390 | sg_init_table(vblk->sg, vblk->sg_elems); | 415 | sg_init_table(vblk->sg, vblk->sg_elems); |
416 | mutex_init(&vblk->config_lock); | ||
391 | INIT_WORK(&vblk->config_work, virtblk_config_changed_work); | 417 | INIT_WORK(&vblk->config_work, virtblk_config_changed_work); |
418 | vblk->config_enable = true; | ||
392 | 419 | ||
393 | /* We expect one virtqueue, for output. */ | 420 | err = init_vq(vblk); |
394 | vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests"); | 421 | if (err) |
395 | if (IS_ERR(vblk->vq)) { | ||
396 | err = PTR_ERR(vblk->vq); | ||
397 | goto out_free_vblk; | 422 | goto out_free_vblk; |
398 | } | ||
399 | 423 | ||
400 | vblk->pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req)); | 424 | vblk->pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req)); |
401 | if (!vblk->pool) { | 425 | if (!vblk->pool) { |
@@ -542,7 +566,10 @@ static void __devexit virtblk_remove(struct virtio_device *vdev) | |||
542 | struct virtio_blk *vblk = vdev->priv; | 566 | struct virtio_blk *vblk = vdev->priv; |
543 | int index = vblk->index; | 567 | int index = vblk->index; |
544 | 568 | ||
545 | flush_work(&vblk->config_work); | 569 | /* Prevent config work handler from accessing the device. */ |
570 | mutex_lock(&vblk->config_lock); | ||
571 | vblk->config_enable = false; | ||
572 | mutex_unlock(&vblk->config_lock); | ||
546 | 573 | ||
547 | /* Nothing should be pending. */ | 574 | /* Nothing should be pending. */ |
548 | BUG_ON(!list_empty(&vblk->reqs)); | 575 | BUG_ON(!list_empty(&vblk->reqs)); |
@@ -550,6 +577,8 @@ static void __devexit virtblk_remove(struct virtio_device *vdev) | |||
550 | /* Stop all the virtqueues. */ | 577 | /* Stop all the virtqueues. */ |
551 | vdev->config->reset(vdev); | 578 | vdev->config->reset(vdev); |
552 | 579 | ||
580 | flush_work(&vblk->config_work); | ||
581 | |||
553 | del_gendisk(vblk->disk); | 582 | del_gendisk(vblk->disk); |
554 | blk_cleanup_queue(vblk->disk->queue); | 583 | blk_cleanup_queue(vblk->disk->queue); |
555 | put_disk(vblk->disk); | 584 | put_disk(vblk->disk); |
@@ -559,6 +588,46 @@ static void __devexit virtblk_remove(struct virtio_device *vdev) | |||
559 | ida_simple_remove(&vd_index_ida, index); | 588 | ida_simple_remove(&vd_index_ida, index); |
560 | } | 589 | } |
561 | 590 | ||
591 | #ifdef CONFIG_PM | ||
592 | static int virtblk_freeze(struct virtio_device *vdev) | ||
593 | { | ||
594 | struct virtio_blk *vblk = vdev->priv; | ||
595 | |||
596 | /* Ensure we don't receive any more interrupts */ | ||
597 | vdev->config->reset(vdev); | ||
598 | |||
599 | /* Prevent config work handler from accessing the device. */ | ||
600 | mutex_lock(&vblk->config_lock); | ||
601 | vblk->config_enable = false; | ||
602 | mutex_unlock(&vblk->config_lock); | ||
603 | |||
604 | flush_work(&vblk->config_work); | ||
605 | |||
606 | spin_lock_irq(vblk->disk->queue->queue_lock); | ||
607 | blk_stop_queue(vblk->disk->queue); | ||
608 | spin_unlock_irq(vblk->disk->queue->queue_lock); | ||
609 | blk_sync_queue(vblk->disk->queue); | ||
610 | |||
611 | vdev->config->del_vqs(vdev); | ||
612 | return 0; | ||
613 | } | ||
614 | |||
615 | static int virtblk_restore(struct virtio_device *vdev) | ||
616 | { | ||
617 | struct virtio_blk *vblk = vdev->priv; | ||
618 | int ret; | ||
619 | |||
620 | vblk->config_enable = true; | ||
621 | ret = init_vq(vdev->priv); | ||
622 | if (!ret) { | ||
623 | spin_lock_irq(vblk->disk->queue->queue_lock); | ||
624 | blk_start_queue(vblk->disk->queue); | ||
625 | spin_unlock_irq(vblk->disk->queue->queue_lock); | ||
626 | } | ||
627 | return ret; | ||
628 | } | ||
629 | #endif | ||
630 | |||
562 | static const struct virtio_device_id id_table[] = { | 631 | static const struct virtio_device_id id_table[] = { |
563 | { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, | 632 | { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, |
564 | { 0 }, | 633 | { 0 }, |
@@ -584,6 +653,10 @@ static struct virtio_driver __refdata virtio_blk = { | |||
584 | .probe = virtblk_probe, | 653 | .probe = virtblk_probe, |
585 | .remove = __devexit_p(virtblk_remove), | 654 | .remove = __devexit_p(virtblk_remove), |
586 | .config_changed = virtblk_config_changed, | 655 | .config_changed = virtblk_config_changed, |
656 | #ifdef CONFIG_PM | ||
657 | .freeze = virtblk_freeze, | ||
658 | .restore = virtblk_restore, | ||
659 | #endif | ||
587 | }; | 660 | }; |
588 | 661 | ||
589 | static int __init init(void) | 662 | static int __init init(void) |
diff --git a/drivers/block/xd.c b/drivers/block/xd.c index 4abd2bcd20fb..ff540520bada 100644 --- a/drivers/block/xd.c +++ b/drivers/block/xd.c | |||
@@ -52,7 +52,6 @@ | |||
52 | #include <linux/io.h> | 52 | #include <linux/io.h> |
53 | #include <linux/gfp.h> | 53 | #include <linux/gfp.h> |
54 | 54 | ||
55 | #include <asm/system.h> | ||
56 | #include <asm/uaccess.h> | 55 | #include <asm/uaccess.h> |
57 | #include <asm/dma.h> | 56 | #include <asm/dma.h> |
58 | 57 | ||
@@ -148,7 +147,7 @@ static volatile int xdc_busy; | |||
148 | static struct timer_list xd_watchdog_int; | 147 | static struct timer_list xd_watchdog_int; |
149 | 148 | ||
150 | static volatile u_char xd_error; | 149 | static volatile u_char xd_error; |
151 | static int nodma = XD_DONT_USE_DMA; | 150 | static bool nodma = XD_DONT_USE_DMA; |
152 | 151 | ||
153 | static struct request_queue *xd_queue; | 152 | static struct request_queue *xd_queue; |
154 | 153 | ||
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 15ec4db194d1..0088bf60f368 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c | |||
@@ -39,9 +39,6 @@ | |||
39 | #include <linux/list.h> | 39 | #include <linux/list.h> |
40 | #include <linux/delay.h> | 40 | #include <linux/delay.h> |
41 | #include <linux/freezer.h> | 41 | #include <linux/freezer.h> |
42 | #include <linux/loop.h> | ||
43 | #include <linux/falloc.h> | ||
44 | #include <linux/fs.h> | ||
45 | 42 | ||
46 | #include <xen/events.h> | 43 | #include <xen/events.h> |
47 | #include <xen/page.h> | 44 | #include <xen/page.h> |
@@ -362,7 +359,7 @@ static int xen_blkbk_map(struct blkif_request *req, | |||
362 | { | 359 | { |
363 | struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | 360 | struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; |
364 | int i; | 361 | int i; |
365 | int nseg = req->nr_segments; | 362 | int nseg = req->u.rw.nr_segments; |
366 | int ret = 0; | 363 | int ret = 0; |
367 | 364 | ||
368 | /* | 365 | /* |
@@ -416,30 +413,25 @@ static int xen_blkbk_map(struct blkif_request *req, | |||
416 | return ret; | 413 | return ret; |
417 | } | 414 | } |
418 | 415 | ||
419 | static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req) | 416 | static int dispatch_discard_io(struct xen_blkif *blkif, |
417 | struct blkif_request *req) | ||
420 | { | 418 | { |
421 | int err = 0; | 419 | int err = 0; |
422 | int status = BLKIF_RSP_OKAY; | 420 | int status = BLKIF_RSP_OKAY; |
423 | struct block_device *bdev = blkif->vbd.bdev; | 421 | struct block_device *bdev = blkif->vbd.bdev; |
424 | 422 | ||
425 | if (blkif->blk_backend_type == BLKIF_BACKEND_PHY) | 423 | blkif->st_ds_req++; |
426 | /* just forward the discard request */ | 424 | |
425 | xen_blkif_get(blkif); | ||
426 | if (blkif->blk_backend_type == BLKIF_BACKEND_PHY || | ||
427 | blkif->blk_backend_type == BLKIF_BACKEND_FILE) { | ||
428 | unsigned long secure = (blkif->vbd.discard_secure && | ||
429 | (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ? | ||
430 | BLKDEV_DISCARD_SECURE : 0; | ||
427 | err = blkdev_issue_discard(bdev, | 431 | err = blkdev_issue_discard(bdev, |
428 | req->u.discard.sector_number, | 432 | req->u.discard.sector_number, |
429 | req->u.discard.nr_sectors, | 433 | req->u.discard.nr_sectors, |
430 | GFP_KERNEL, 0); | 434 | GFP_KERNEL, secure); |
431 | else if (blkif->blk_backend_type == BLKIF_BACKEND_FILE) { | ||
432 | /* punch a hole in the backing file */ | ||
433 | struct loop_device *lo = bdev->bd_disk->private_data; | ||
434 | struct file *file = lo->lo_backing_file; | ||
435 | |||
436 | if (file->f_op->fallocate) | ||
437 | err = file->f_op->fallocate(file, | ||
438 | FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, | ||
439 | req->u.discard.sector_number << 9, | ||
440 | req->u.discard.nr_sectors << 9); | ||
441 | else | ||
442 | err = -EOPNOTSUPP; | ||
443 | } else | 435 | } else |
444 | err = -EOPNOTSUPP; | 436 | err = -EOPNOTSUPP; |
445 | 437 | ||
@@ -449,7 +441,9 @@ static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req) | |||
449 | } else if (err) | 441 | } else if (err) |
450 | status = BLKIF_RSP_ERROR; | 442 | status = BLKIF_RSP_ERROR; |
451 | 443 | ||
452 | make_response(blkif, req->id, req->operation, status); | 444 | make_response(blkif, req->u.discard.id, req->operation, status); |
445 | xen_blkif_put(blkif); | ||
446 | return err; | ||
453 | } | 447 | } |
454 | 448 | ||
455 | static void xen_blk_drain_io(struct xen_blkif *blkif) | 449 | static void xen_blk_drain_io(struct xen_blkif *blkif) |
@@ -573,8 +567,11 @@ __do_block_io_op(struct xen_blkif *blkif) | |||
573 | 567 | ||
574 | /* Apply all sanity checks to /private copy/ of request. */ | 568 | /* Apply all sanity checks to /private copy/ of request. */ |
575 | barrier(); | 569 | barrier(); |
576 | 570 | if (unlikely(req.operation == BLKIF_OP_DISCARD)) { | |
577 | if (dispatch_rw_block_io(blkif, &req, pending_req)) | 571 | free_req(pending_req); |
572 | if (dispatch_discard_io(blkif, &req)) | ||
573 | break; | ||
574 | } else if (dispatch_rw_block_io(blkif, &req, pending_req)) | ||
578 | break; | 575 | break; |
579 | 576 | ||
580 | /* Yield point for this unbounded loop. */ | 577 | /* Yield point for this unbounded loop. */ |
@@ -633,10 +630,6 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, | |||
633 | blkif->st_f_req++; | 630 | blkif->st_f_req++; |
634 | operation = WRITE_FLUSH; | 631 | operation = WRITE_FLUSH; |
635 | break; | 632 | break; |
636 | case BLKIF_OP_DISCARD: | ||
637 | blkif->st_ds_req++; | ||
638 | operation = REQ_DISCARD; | ||
639 | break; | ||
640 | default: | 633 | default: |
641 | operation = 0; /* make gcc happy */ | 634 | operation = 0; /* make gcc happy */ |
642 | goto fail_response; | 635 | goto fail_response; |
@@ -644,9 +637,9 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, | |||
644 | } | 637 | } |
645 | 638 | ||
646 | /* Check that the number of segments is sane. */ | 639 | /* Check that the number of segments is sane. */ |
647 | nseg = req->nr_segments; | 640 | nseg = req->u.rw.nr_segments; |
648 | if (unlikely(nseg == 0 && operation != WRITE_FLUSH && | 641 | |
649 | operation != REQ_DISCARD) || | 642 | if (unlikely(nseg == 0 && operation != WRITE_FLUSH) || |
650 | unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { | 643 | unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { |
651 | pr_debug(DRV_PFX "Bad number of segments in request (%d)\n", | 644 | pr_debug(DRV_PFX "Bad number of segments in request (%d)\n", |
652 | nseg); | 645 | nseg); |
@@ -654,12 +647,12 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, | |||
654 | goto fail_response; | 647 | goto fail_response; |
655 | } | 648 | } |
656 | 649 | ||
657 | preq.dev = req->handle; | 650 | preq.dev = req->u.rw.handle; |
658 | preq.sector_number = req->u.rw.sector_number; | 651 | preq.sector_number = req->u.rw.sector_number; |
659 | preq.nr_sects = 0; | 652 | preq.nr_sects = 0; |
660 | 653 | ||
661 | pending_req->blkif = blkif; | 654 | pending_req->blkif = blkif; |
662 | pending_req->id = req->id; | 655 | pending_req->id = req->u.rw.id; |
663 | pending_req->operation = req->operation; | 656 | pending_req->operation = req->operation; |
664 | pending_req->status = BLKIF_RSP_OKAY; | 657 | pending_req->status = BLKIF_RSP_OKAY; |
665 | pending_req->nr_pages = nseg; | 658 | pending_req->nr_pages = nseg; |
@@ -707,7 +700,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, | |||
707 | * the hypercall to unmap the grants - that is all done in | 700 | * the hypercall to unmap the grants - that is all done in |
708 | * xen_blkbk_unmap. | 701 | * xen_blkbk_unmap. |
709 | */ | 702 | */ |
710 | if (operation != REQ_DISCARD && xen_blkbk_map(req, pending_req, seg)) | 703 | if (xen_blkbk_map(req, pending_req, seg)) |
711 | goto fail_flush; | 704 | goto fail_flush; |
712 | 705 | ||
713 | /* | 706 | /* |
@@ -739,23 +732,16 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, | |||
739 | 732 | ||
740 | /* This will be hit if the operation was a flush or discard. */ | 733 | /* This will be hit if the operation was a flush or discard. */ |
741 | if (!bio) { | 734 | if (!bio) { |
742 | BUG_ON(operation != WRITE_FLUSH && operation != REQ_DISCARD); | 735 | BUG_ON(operation != WRITE_FLUSH); |
743 | 736 | ||
744 | if (operation == WRITE_FLUSH) { | 737 | bio = bio_alloc(GFP_KERNEL, 0); |
745 | bio = bio_alloc(GFP_KERNEL, 0); | 738 | if (unlikely(bio == NULL)) |
746 | if (unlikely(bio == NULL)) | 739 | goto fail_put_bio; |
747 | goto fail_put_bio; | ||
748 | 740 | ||
749 | biolist[nbio++] = bio; | 741 | biolist[nbio++] = bio; |
750 | bio->bi_bdev = preq.bdev; | 742 | bio->bi_bdev = preq.bdev; |
751 | bio->bi_private = pending_req; | 743 | bio->bi_private = pending_req; |
752 | bio->bi_end_io = end_block_io_op; | 744 | bio->bi_end_io = end_block_io_op; |
753 | } else if (operation == REQ_DISCARD) { | ||
754 | xen_blk_discard(blkif, req); | ||
755 | xen_blkif_put(blkif); | ||
756 | free_req(pending_req); | ||
757 | return 0; | ||
758 | } | ||
759 | } | 745 | } |
760 | 746 | ||
761 | /* | 747 | /* |
@@ -784,7 +770,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, | |||
784 | xen_blkbk_unmap(pending_req); | 770 | xen_blkbk_unmap(pending_req); |
785 | fail_response: | 771 | fail_response: |
786 | /* Haven't submitted any bio's yet. */ | 772 | /* Haven't submitted any bio's yet. */ |
787 | make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); | 773 | make_response(blkif, req->u.rw.id, req->operation, BLKIF_RSP_ERROR); |
788 | free_req(pending_req); | 774 | free_req(pending_req); |
789 | msleep(1); /* back off a bit */ | 775 | msleep(1); /* back off a bit */ |
790 | return -EIO; | 776 | return -EIO; |
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index dfb1b3a43a5d..d0ee7edc9be8 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h | |||
@@ -60,58 +60,66 @@ struct blkif_common_response { | |||
60 | char dummy; | 60 | char dummy; |
61 | }; | 61 | }; |
62 | 62 | ||
63 | /* i386 protocol version */ | ||
64 | #pragma pack(push, 4) | ||
65 | |||
66 | struct blkif_x86_32_request_rw { | 63 | struct blkif_x86_32_request_rw { |
64 | uint8_t nr_segments; /* number of segments */ | ||
65 | blkif_vdev_t handle; /* only for read/write requests */ | ||
66 | uint64_t id; /* private guest value, echoed in resp */ | ||
67 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ | 67 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ |
68 | struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | 68 | struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; |
69 | }; | 69 | } __attribute__((__packed__)); |
70 | 70 | ||
71 | struct blkif_x86_32_request_discard { | 71 | struct blkif_x86_32_request_discard { |
72 | uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */ | ||
73 | blkif_vdev_t _pad1; /* was "handle" for read/write requests */ | ||
74 | uint64_t id; /* private guest value, echoed in resp */ | ||
72 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ | 75 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ |
73 | uint64_t nr_sectors; | 76 | uint64_t nr_sectors; |
74 | }; | 77 | } __attribute__((__packed__)); |
75 | 78 | ||
76 | struct blkif_x86_32_request { | 79 | struct blkif_x86_32_request { |
77 | uint8_t operation; /* BLKIF_OP_??? */ | 80 | uint8_t operation; /* BLKIF_OP_??? */ |
78 | uint8_t nr_segments; /* number of segments */ | ||
79 | blkif_vdev_t handle; /* only for read/write requests */ | ||
80 | uint64_t id; /* private guest value, echoed in resp */ | ||
81 | union { | 81 | union { |
82 | struct blkif_x86_32_request_rw rw; | 82 | struct blkif_x86_32_request_rw rw; |
83 | struct blkif_x86_32_request_discard discard; | 83 | struct blkif_x86_32_request_discard discard; |
84 | } u; | 84 | } u; |
85 | }; | 85 | } __attribute__((__packed__)); |
86 | |||
87 | /* i386 protocol version */ | ||
88 | #pragma pack(push, 4) | ||
86 | struct blkif_x86_32_response { | 89 | struct blkif_x86_32_response { |
87 | uint64_t id; /* copied from request */ | 90 | uint64_t id; /* copied from request */ |
88 | uint8_t operation; /* copied from request */ | 91 | uint8_t operation; /* copied from request */ |
89 | int16_t status; /* BLKIF_RSP_??? */ | 92 | int16_t status; /* BLKIF_RSP_??? */ |
90 | }; | 93 | }; |
91 | #pragma pack(pop) | 94 | #pragma pack(pop) |
92 | |||
93 | /* x86_64 protocol version */ | 95 | /* x86_64 protocol version */ |
94 | 96 | ||
95 | struct blkif_x86_64_request_rw { | 97 | struct blkif_x86_64_request_rw { |
98 | uint8_t nr_segments; /* number of segments */ | ||
99 | blkif_vdev_t handle; /* only for read/write requests */ | ||
100 | uint32_t _pad1; /* offsetof(blkif_reqest..,u.rw.id)==8 */ | ||
101 | uint64_t id; | ||
96 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ | 102 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ |
97 | struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | 103 | struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; |
98 | }; | 104 | } __attribute__((__packed__)); |
99 | 105 | ||
100 | struct blkif_x86_64_request_discard { | 106 | struct blkif_x86_64_request_discard { |
107 | uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */ | ||
108 | blkif_vdev_t _pad1; /* was "handle" for read/write requests */ | ||
109 | uint32_t _pad2; /* offsetof(blkif_..,u.discard.id)==8 */ | ||
110 | uint64_t id; | ||
101 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ | 111 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ |
102 | uint64_t nr_sectors; | 112 | uint64_t nr_sectors; |
103 | }; | 113 | } __attribute__((__packed__)); |
104 | 114 | ||
105 | struct blkif_x86_64_request { | 115 | struct blkif_x86_64_request { |
106 | uint8_t operation; /* BLKIF_OP_??? */ | 116 | uint8_t operation; /* BLKIF_OP_??? */ |
107 | uint8_t nr_segments; /* number of segments */ | ||
108 | blkif_vdev_t handle; /* only for read/write requests */ | ||
109 | uint64_t __attribute__((__aligned__(8))) id; | ||
110 | union { | 117 | union { |
111 | struct blkif_x86_64_request_rw rw; | 118 | struct blkif_x86_64_request_rw rw; |
112 | struct blkif_x86_64_request_discard discard; | 119 | struct blkif_x86_64_request_discard discard; |
113 | } u; | 120 | } u; |
114 | }; | 121 | } __attribute__((__packed__)); |
122 | |||
115 | struct blkif_x86_64_response { | 123 | struct blkif_x86_64_response { |
116 | uint64_t __attribute__((__aligned__(8))) id; | 124 | uint64_t __attribute__((__aligned__(8))) id; |
117 | uint8_t operation; /* copied from request */ | 125 | uint8_t operation; /* copied from request */ |
@@ -156,6 +164,7 @@ struct xen_vbd { | |||
156 | /* Cached size parameter. */ | 164 | /* Cached size parameter. */ |
157 | sector_t size; | 165 | sector_t size; |
158 | bool flush_support; | 166 | bool flush_support; |
167 | bool discard_secure; | ||
159 | }; | 168 | }; |
160 | 169 | ||
161 | struct backend_info; | 170 | struct backend_info; |
@@ -237,22 +246,23 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst, | |||
237 | { | 246 | { |
238 | int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; | 247 | int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; |
239 | dst->operation = src->operation; | 248 | dst->operation = src->operation; |
240 | dst->nr_segments = src->nr_segments; | ||
241 | dst->handle = src->handle; | ||
242 | dst->id = src->id; | ||
243 | switch (src->operation) { | 249 | switch (src->operation) { |
244 | case BLKIF_OP_READ: | 250 | case BLKIF_OP_READ: |
245 | case BLKIF_OP_WRITE: | 251 | case BLKIF_OP_WRITE: |
246 | case BLKIF_OP_WRITE_BARRIER: | 252 | case BLKIF_OP_WRITE_BARRIER: |
247 | case BLKIF_OP_FLUSH_DISKCACHE: | 253 | case BLKIF_OP_FLUSH_DISKCACHE: |
254 | dst->u.rw.nr_segments = src->u.rw.nr_segments; | ||
255 | dst->u.rw.handle = src->u.rw.handle; | ||
256 | dst->u.rw.id = src->u.rw.id; | ||
248 | dst->u.rw.sector_number = src->u.rw.sector_number; | 257 | dst->u.rw.sector_number = src->u.rw.sector_number; |
249 | barrier(); | 258 | barrier(); |
250 | if (n > dst->nr_segments) | 259 | if (n > dst->u.rw.nr_segments) |
251 | n = dst->nr_segments; | 260 | n = dst->u.rw.nr_segments; |
252 | for (i = 0; i < n; i++) | 261 | for (i = 0; i < n; i++) |
253 | dst->u.rw.seg[i] = src->u.rw.seg[i]; | 262 | dst->u.rw.seg[i] = src->u.rw.seg[i]; |
254 | break; | 263 | break; |
255 | case BLKIF_OP_DISCARD: | 264 | case BLKIF_OP_DISCARD: |
265 | dst->u.discard.flag = src->u.discard.flag; | ||
256 | dst->u.discard.sector_number = src->u.discard.sector_number; | 266 | dst->u.discard.sector_number = src->u.discard.sector_number; |
257 | dst->u.discard.nr_sectors = src->u.discard.nr_sectors; | 267 | dst->u.discard.nr_sectors = src->u.discard.nr_sectors; |
258 | break; | 268 | break; |
@@ -266,22 +276,23 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst, | |||
266 | { | 276 | { |
267 | int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; | 277 | int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; |
268 | dst->operation = src->operation; | 278 | dst->operation = src->operation; |
269 | dst->nr_segments = src->nr_segments; | ||
270 | dst->handle = src->handle; | ||
271 | dst->id = src->id; | ||
272 | switch (src->operation) { | 279 | switch (src->operation) { |
273 | case BLKIF_OP_READ: | 280 | case BLKIF_OP_READ: |
274 | case BLKIF_OP_WRITE: | 281 | case BLKIF_OP_WRITE: |
275 | case BLKIF_OP_WRITE_BARRIER: | 282 | case BLKIF_OP_WRITE_BARRIER: |
276 | case BLKIF_OP_FLUSH_DISKCACHE: | 283 | case BLKIF_OP_FLUSH_DISKCACHE: |
284 | dst->u.rw.nr_segments = src->u.rw.nr_segments; | ||
285 | dst->u.rw.handle = src->u.rw.handle; | ||
286 | dst->u.rw.id = src->u.rw.id; | ||
277 | dst->u.rw.sector_number = src->u.rw.sector_number; | 287 | dst->u.rw.sector_number = src->u.rw.sector_number; |
278 | barrier(); | 288 | barrier(); |
279 | if (n > dst->nr_segments) | 289 | if (n > dst->u.rw.nr_segments) |
280 | n = dst->nr_segments; | 290 | n = dst->u.rw.nr_segments; |
281 | for (i = 0; i < n; i++) | 291 | for (i = 0; i < n; i++) |
282 | dst->u.rw.seg[i] = src->u.rw.seg[i]; | 292 | dst->u.rw.seg[i] = src->u.rw.seg[i]; |
283 | break; | 293 | break; |
284 | case BLKIF_OP_DISCARD: | 294 | case BLKIF_OP_DISCARD: |
295 | dst->u.discard.flag = src->u.discard.flag; | ||
285 | dst->u.discard.sector_number = src->u.discard.sector_number; | 296 | dst->u.discard.sector_number = src->u.discard.sector_number; |
286 | dst->u.discard.nr_sectors = src->u.discard.nr_sectors; | 297 | dst->u.discard.nr_sectors = src->u.discard.nr_sectors; |
287 | break; | 298 | break; |
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 37c794d31264..24a2fb57e5d0 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c | |||
@@ -338,6 +338,9 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, | |||
338 | if (q && q->flush_flags) | 338 | if (q && q->flush_flags) |
339 | vbd->flush_support = true; | 339 | vbd->flush_support = true; |
340 | 340 | ||
341 | if (q && blk_queue_secdiscard(q)) | ||
342 | vbd->discard_secure = true; | ||
343 | |||
341 | DPRINTK("Successful creation of handle=%04x (dom=%u)\n", | 344 | DPRINTK("Successful creation of handle=%04x (dom=%u)\n", |
342 | handle, blkif->domid); | 345 | handle, blkif->domid); |
343 | return 0; | 346 | return 0; |
@@ -420,6 +423,15 @@ int xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be) | |||
420 | state = 1; | 423 | state = 1; |
421 | blkif->blk_backend_type = BLKIF_BACKEND_PHY; | 424 | blkif->blk_backend_type = BLKIF_BACKEND_PHY; |
422 | } | 425 | } |
426 | /* Optional. */ | ||
427 | err = xenbus_printf(xbt, dev->nodename, | ||
428 | "discard-secure", "%d", | ||
429 | blkif->vbd.discard_secure); | ||
430 | if (err) { | ||
431 | xenbus_dev_fatal(dev, err, | ||
432 | "writting discard-secure"); | ||
433 | goto kfree; | ||
434 | } | ||
423 | } | 435 | } |
424 | } else { | 436 | } else { |
425 | err = PTR_ERR(type); | 437 | err = PTR_ERR(type); |
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 19b6005a323e..98cbeba8cd53 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c | |||
@@ -98,7 +98,8 @@ struct blkfront_info | |||
98 | unsigned long shadow_free; | 98 | unsigned long shadow_free; |
99 | unsigned int feature_flush; | 99 | unsigned int feature_flush; |
100 | unsigned int flush_op; | 100 | unsigned int flush_op; |
101 | unsigned int feature_discard; | 101 | unsigned int feature_discard:1; |
102 | unsigned int feature_secdiscard:1; | ||
102 | unsigned int discard_granularity; | 103 | unsigned int discard_granularity; |
103 | unsigned int discard_alignment; | 104 | unsigned int discard_alignment; |
104 | int is_ready; | 105 | int is_ready; |
@@ -135,15 +136,15 @@ static int get_id_from_freelist(struct blkfront_info *info) | |||
135 | { | 136 | { |
136 | unsigned long free = info->shadow_free; | 137 | unsigned long free = info->shadow_free; |
137 | BUG_ON(free >= BLK_RING_SIZE); | 138 | BUG_ON(free >= BLK_RING_SIZE); |
138 | info->shadow_free = info->shadow[free].req.id; | 139 | info->shadow_free = info->shadow[free].req.u.rw.id; |
139 | info->shadow[free].req.id = 0x0fffffee; /* debug */ | 140 | info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */ |
140 | return free; | 141 | return free; |
141 | } | 142 | } |
142 | 143 | ||
143 | static void add_id_to_freelist(struct blkfront_info *info, | 144 | static void add_id_to_freelist(struct blkfront_info *info, |
144 | unsigned long id) | 145 | unsigned long id) |
145 | { | 146 | { |
146 | info->shadow[id].req.id = info->shadow_free; | 147 | info->shadow[id].req.u.rw.id = info->shadow_free; |
147 | info->shadow[id].request = NULL; | 148 | info->shadow[id].request = NULL; |
148 | info->shadow_free = id; | 149 | info->shadow_free = id; |
149 | } | 150 | } |
@@ -156,7 +157,7 @@ static int xlbd_reserve_minors(unsigned int minor, unsigned int nr) | |||
156 | if (end > nr_minors) { | 157 | if (end > nr_minors) { |
157 | unsigned long *bitmap, *old; | 158 | unsigned long *bitmap, *old; |
158 | 159 | ||
159 | bitmap = kzalloc(BITS_TO_LONGS(end) * sizeof(*bitmap), | 160 | bitmap = kcalloc(BITS_TO_LONGS(end), sizeof(*bitmap), |
160 | GFP_KERNEL); | 161 | GFP_KERNEL); |
161 | if (bitmap == NULL) | 162 | if (bitmap == NULL) |
162 | return -ENOMEM; | 163 | return -ENOMEM; |
@@ -287,9 +288,9 @@ static int blkif_queue_request(struct request *req) | |||
287 | id = get_id_from_freelist(info); | 288 | id = get_id_from_freelist(info); |
288 | info->shadow[id].request = req; | 289 | info->shadow[id].request = req; |
289 | 290 | ||
290 | ring_req->id = id; | 291 | ring_req->u.rw.id = id; |
291 | ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req); | 292 | ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req); |
292 | ring_req->handle = info->handle; | 293 | ring_req->u.rw.handle = info->handle; |
293 | 294 | ||
294 | ring_req->operation = rq_data_dir(req) ? | 295 | ring_req->operation = rq_data_dir(req) ? |
295 | BLKIF_OP_WRITE : BLKIF_OP_READ; | 296 | BLKIF_OP_WRITE : BLKIF_OP_READ; |
@@ -305,16 +306,21 @@ static int blkif_queue_request(struct request *req) | |||
305 | ring_req->operation = info->flush_op; | 306 | ring_req->operation = info->flush_op; |
306 | } | 307 | } |
307 | 308 | ||
308 | if (unlikely(req->cmd_flags & REQ_DISCARD)) { | 309 | if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE))) { |
309 | /* id, sector_number and handle are set above. */ | 310 | /* id, sector_number and handle are set above. */ |
310 | ring_req->operation = BLKIF_OP_DISCARD; | 311 | ring_req->operation = BLKIF_OP_DISCARD; |
311 | ring_req->nr_segments = 0; | ||
312 | ring_req->u.discard.nr_sectors = blk_rq_sectors(req); | 312 | ring_req->u.discard.nr_sectors = blk_rq_sectors(req); |
313 | if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard) | ||
314 | ring_req->u.discard.flag = BLKIF_DISCARD_SECURE; | ||
315 | else | ||
316 | ring_req->u.discard.flag = 0; | ||
313 | } else { | 317 | } else { |
314 | ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); | 318 | ring_req->u.rw.nr_segments = blk_rq_map_sg(req->q, req, |
315 | BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); | 319 | info->sg); |
320 | BUG_ON(ring_req->u.rw.nr_segments > | ||
321 | BLKIF_MAX_SEGMENTS_PER_REQUEST); | ||
316 | 322 | ||
317 | for_each_sg(info->sg, sg, ring_req->nr_segments, i) { | 323 | for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) { |
318 | buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg))); | 324 | buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg))); |
319 | fsect = sg->offset >> 9; | 325 | fsect = sg->offset >> 9; |
320 | lsect = fsect + (sg->length >> 9) - 1; | 326 | lsect = fsect + (sg->length >> 9) - 1; |
@@ -424,6 +430,8 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) | |||
424 | blk_queue_max_discard_sectors(rq, get_capacity(gd)); | 430 | blk_queue_max_discard_sectors(rq, get_capacity(gd)); |
425 | rq->limits.discard_granularity = info->discard_granularity; | 431 | rq->limits.discard_granularity = info->discard_granularity; |
426 | rq->limits.discard_alignment = info->discard_alignment; | 432 | rq->limits.discard_alignment = info->discard_alignment; |
433 | if (info->feature_secdiscard) | ||
434 | queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, rq); | ||
427 | } | 435 | } |
428 | 436 | ||
429 | /* Hard sector size and max sectors impersonate the equiv. hardware. */ | 437 | /* Hard sector size and max sectors impersonate the equiv. hardware. */ |
@@ -705,7 +713,9 @@ static void blkif_free(struct blkfront_info *info, int suspend) | |||
705 | static void blkif_completion(struct blk_shadow *s) | 713 | static void blkif_completion(struct blk_shadow *s) |
706 | { | 714 | { |
707 | int i; | 715 | int i; |
708 | for (i = 0; i < s->req.nr_segments; i++) | 716 | /* Do not let BLKIF_OP_DISCARD as nr_segment is in the same place |
717 | * flag. */ | ||
718 | for (i = 0; i < s->req.u.rw.nr_segments; i++) | ||
709 | gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL); | 719 | gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL); |
710 | } | 720 | } |
711 | 721 | ||
@@ -736,7 +746,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) | |||
736 | id = bret->id; | 746 | id = bret->id; |
737 | req = info->shadow[id].request; | 747 | req = info->shadow[id].request; |
738 | 748 | ||
739 | blkif_completion(&info->shadow[id]); | 749 | if (bret->operation != BLKIF_OP_DISCARD) |
750 | blkif_completion(&info->shadow[id]); | ||
740 | 751 | ||
741 | add_id_to_freelist(info, id); | 752 | add_id_to_freelist(info, id); |
742 | 753 | ||
@@ -749,7 +760,9 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) | |||
749 | info->gd->disk_name); | 760 | info->gd->disk_name); |
750 | error = -EOPNOTSUPP; | 761 | error = -EOPNOTSUPP; |
751 | info->feature_discard = 0; | 762 | info->feature_discard = 0; |
763 | info->feature_secdiscard = 0; | ||
752 | queue_flag_clear(QUEUE_FLAG_DISCARD, rq); | 764 | queue_flag_clear(QUEUE_FLAG_DISCARD, rq); |
765 | queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq); | ||
753 | } | 766 | } |
754 | __blk_end_request_all(req, error); | 767 | __blk_end_request_all(req, error); |
755 | break; | 768 | break; |
@@ -763,7 +776,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) | |||
763 | error = -EOPNOTSUPP; | 776 | error = -EOPNOTSUPP; |
764 | } | 777 | } |
765 | if (unlikely(bret->status == BLKIF_RSP_ERROR && | 778 | if (unlikely(bret->status == BLKIF_RSP_ERROR && |
766 | info->shadow[id].req.nr_segments == 0)) { | 779 | info->shadow[id].req.u.rw.nr_segments == 0)) { |
767 | printk(KERN_WARNING "blkfront: %s: empty write %s op failed\n", | 780 | printk(KERN_WARNING "blkfront: %s: empty write %s op failed\n", |
768 | info->flush_op == BLKIF_OP_WRITE_BARRIER ? | 781 | info->flush_op == BLKIF_OP_WRITE_BARRIER ? |
769 | "barrier" : "flush disk cache", | 782 | "barrier" : "flush disk cache", |
@@ -984,8 +997,8 @@ static int blkfront_probe(struct xenbus_device *dev, | |||
984 | INIT_WORK(&info->work, blkif_restart_queue); | 997 | INIT_WORK(&info->work, blkif_restart_queue); |
985 | 998 | ||
986 | for (i = 0; i < BLK_RING_SIZE; i++) | 999 | for (i = 0; i < BLK_RING_SIZE; i++) |
987 | info->shadow[i].req.id = i+1; | 1000 | info->shadow[i].req.u.rw.id = i+1; |
988 | info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; | 1001 | info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; |
989 | 1002 | ||
990 | /* Front end dir is a number, which is used as the id. */ | 1003 | /* Front end dir is a number, which is used as the id. */ |
991 | info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); | 1004 | info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); |
@@ -1019,9 +1032,9 @@ static int blkif_recover(struct blkfront_info *info) | |||
1019 | /* Stage 2: Set up free list. */ | 1032 | /* Stage 2: Set up free list. */ |
1020 | memset(&info->shadow, 0, sizeof(info->shadow)); | 1033 | memset(&info->shadow, 0, sizeof(info->shadow)); |
1021 | for (i = 0; i < BLK_RING_SIZE; i++) | 1034 | for (i = 0; i < BLK_RING_SIZE; i++) |
1022 | info->shadow[i].req.id = i+1; | 1035 | info->shadow[i].req.u.rw.id = i+1; |
1023 | info->shadow_free = info->ring.req_prod_pvt; | 1036 | info->shadow_free = info->ring.req_prod_pvt; |
1024 | info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; | 1037 | info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; |
1025 | 1038 | ||
1026 | /* Stage 3: Find pending requests and requeue them. */ | 1039 | /* Stage 3: Find pending requests and requeue them. */ |
1027 | for (i = 0; i < BLK_RING_SIZE; i++) { | 1040 | for (i = 0; i < BLK_RING_SIZE; i++) { |
@@ -1034,17 +1047,19 @@ static int blkif_recover(struct blkfront_info *info) | |||
1034 | *req = copy[i].req; | 1047 | *req = copy[i].req; |
1035 | 1048 | ||
1036 | /* We get a new request id, and must reset the shadow state. */ | 1049 | /* We get a new request id, and must reset the shadow state. */ |
1037 | req->id = get_id_from_freelist(info); | 1050 | req->u.rw.id = get_id_from_freelist(info); |
1038 | memcpy(&info->shadow[req->id], ©[i], sizeof(copy[i])); | 1051 | memcpy(&info->shadow[req->u.rw.id], ©[i], sizeof(copy[i])); |
1039 | 1052 | ||
1053 | if (req->operation != BLKIF_OP_DISCARD) { | ||
1040 | /* Rewrite any grant references invalidated by susp/resume. */ | 1054 | /* Rewrite any grant references invalidated by susp/resume. */ |
1041 | for (j = 0; j < req->nr_segments; j++) | 1055 | for (j = 0; j < req->u.rw.nr_segments; j++) |
1042 | gnttab_grant_foreign_access_ref( | 1056 | gnttab_grant_foreign_access_ref( |
1043 | req->u.rw.seg[j].gref, | 1057 | req->u.rw.seg[j].gref, |
1044 | info->xbdev->otherend_id, | 1058 | info->xbdev->otherend_id, |
1045 | pfn_to_mfn(info->shadow[req->id].frame[j]), | 1059 | pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]), |
1046 | rq_data_dir(info->shadow[req->id].request)); | 1060 | rq_data_dir(info->shadow[req->u.rw.id].request)); |
1047 | info->shadow[req->id].req = *req; | 1061 | } |
1062 | info->shadow[req->u.rw.id].req = *req; | ||
1048 | 1063 | ||
1049 | info->ring.req_prod_pvt++; | 1064 | info->ring.req_prod_pvt++; |
1050 | } | 1065 | } |
@@ -1135,11 +1150,13 @@ static void blkfront_setup_discard(struct blkfront_info *info) | |||
1135 | char *type; | 1150 | char *type; |
1136 | unsigned int discard_granularity; | 1151 | unsigned int discard_granularity; |
1137 | unsigned int discard_alignment; | 1152 | unsigned int discard_alignment; |
1153 | unsigned int discard_secure; | ||
1138 | 1154 | ||
1139 | type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL); | 1155 | type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL); |
1140 | if (IS_ERR(type)) | 1156 | if (IS_ERR(type)) |
1141 | return; | 1157 | return; |
1142 | 1158 | ||
1159 | info->feature_secdiscard = 0; | ||
1143 | if (strncmp(type, "phy", 3) == 0) { | 1160 | if (strncmp(type, "phy", 3) == 0) { |
1144 | err = xenbus_gather(XBT_NIL, info->xbdev->otherend, | 1161 | err = xenbus_gather(XBT_NIL, info->xbdev->otherend, |
1145 | "discard-granularity", "%u", &discard_granularity, | 1162 | "discard-granularity", "%u", &discard_granularity, |
@@ -1150,6 +1167,12 @@ static void blkfront_setup_discard(struct blkfront_info *info) | |||
1150 | info->discard_granularity = discard_granularity; | 1167 | info->discard_granularity = discard_granularity; |
1151 | info->discard_alignment = discard_alignment; | 1168 | info->discard_alignment = discard_alignment; |
1152 | } | 1169 | } |
1170 | err = xenbus_gather(XBT_NIL, info->xbdev->otherend, | ||
1171 | "discard-secure", "%d", &discard_secure, | ||
1172 | NULL); | ||
1173 | if (!err) | ||
1174 | info->feature_secdiscard = discard_secure; | ||
1175 | |||
1153 | } else if (strncmp(type, "file", 4) == 0) | 1176 | } else if (strncmp(type, "file", 4) == 0) |
1154 | info->feature_discard = 1; | 1177 | info->feature_discard = 1; |
1155 | 1178 | ||