aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
authorJiri Kosina <jkosina@suse.cz>2013-12-19 09:08:03 -0500
committerJiri Kosina <jkosina@suse.cz>2013-12-19 09:08:32 -0500
commite23c34bb41da65f354fb7eee04300c56ee48f60c (patch)
tree549fbe449d55273b81ef104a9755109bf4ae7817 /drivers/block
parentb481c2cb3534c85dca625973b33eba15f9af3e4c (diff)
parent319e2e3f63c348a9b66db4667efa73178e18b17d (diff)
Merge branch 'master' into for-next
Sync with Linus' tree to be able to apply fixes on top of newer things in tree (efi-stub). Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/Kconfig17
-rw-r--r--drivers/block/Makefile3
-rw-r--r--drivers/block/amiflop.c2
-rw-r--r--drivers/block/aoe/aoe.h4
-rw-r--r--drivers/block/aoe/aoeblk.c100
-rw-r--r--drivers/block/aoe/aoecmd.c4
-rw-r--r--drivers/block/aoe/aoedev.c10
-rw-r--r--drivers/block/brd.c2
-rw-r--r--drivers/block/cciss.c14
-rw-r--r--drivers/block/cpqarray.c1
-rw-r--r--drivers/block/drbd/drbd_int.h3
-rw-r--r--drivers/block/drbd/drbd_main.c19
-rw-r--r--drivers/block/drbd/drbd_nl.c6
-rw-r--r--drivers/block/drbd/drbd_receiver.c45
-rw-r--r--drivers/block/drbd/drbd_req.c3
-rw-r--r--drivers/block/floppy.c4
-rw-r--r--drivers/block/loop.c21
-rw-r--r--drivers/block/mg_disk.c4
-rw-r--r--drivers/block/mtip32xx/Kconfig2
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c500
-rw-r--r--drivers/block/mtip32xx/mtip32xx.h18
-rw-r--r--drivers/block/null_blk.c635
-rw-r--r--drivers/block/nvme-core.c581
-rw-r--r--drivers/block/nvme-scsi.c24
-rw-r--r--drivers/block/osdblk.c2
-rw-r--r--drivers/block/pktcdvd.c300
-rw-r--r--drivers/block/rbd.c115
-rw-r--r--drivers/block/rsxx/core.c8
-rw-r--r--drivers/block/rsxx/dev.c8
-rw-r--r--drivers/block/rsxx/dma.c119
-rw-r--r--drivers/block/rsxx/rsxx_priv.h11
-rw-r--r--drivers/block/skd_main.c5432
-rw-r--r--drivers/block/skd_s1120.h330
-rw-r--r--drivers/block/swim.c2
-rw-r--r--drivers/block/virtio_blk.c404
-rw-r--r--drivers/block/xen-blkback/blkback.c3
-rw-r--r--drivers/block/xen-blkback/xenbus.c2
-rw-r--r--drivers/block/xen-blkfront.c217
38 files changed, 7862 insertions, 1113 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index e07a5fd58ad7..86b9f37d102e 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -15,6 +15,9 @@ menuconfig BLK_DEV
15 15
16if BLK_DEV 16if BLK_DEV
17 17
18config BLK_DEV_NULL_BLK
19 tristate "Null test block driver"
20
18config BLK_DEV_FD 21config BLK_DEV_FD
19 tristate "Normal floppy disk support" 22 tristate "Normal floppy disk support"
20 depends on ARCH_MAY_HAVE_PC_FDC 23 depends on ARCH_MAY_HAVE_PC_FDC
@@ -107,7 +110,7 @@ source "drivers/block/mtip32xx/Kconfig"
107 110
108config BLK_CPQ_DA 111config BLK_CPQ_DA
109 tristate "Compaq SMART2 support" 112 tristate "Compaq SMART2 support"
110 depends on PCI && VIRT_TO_BUS 113 depends on PCI && VIRT_TO_BUS && 0
111 help 114 help
112 This is the driver for Compaq Smart Array controllers. Everyone 115 This is the driver for Compaq Smart Array controllers. Everyone
113 using these boards should say Y here. See the file 116 using these boards should say Y here. See the file
@@ -316,6 +319,16 @@ config BLK_DEV_NVME
316 To compile this driver as a module, choose M here: the 319 To compile this driver as a module, choose M here: the
317 module will be called nvme. 320 module will be called nvme.
318 321
322config BLK_DEV_SKD
323 tristate "STEC S1120 Block Driver"
324 depends on PCI
325 depends on 64BIT
326 ---help---
327 Saying Y or M here will enable support for the
328 STEC, Inc. S1120 PCIe SSD.
329
330 Use device /dev/skd$N amd /dev/skd$Np$M.
331
319config BLK_DEV_OSD 332config BLK_DEV_OSD
320 tristate "OSD object-as-blkdev support" 333 tristate "OSD object-as-blkdev support"
321 depends on SCSI_OSD_ULD 334 depends on SCSI_OSD_ULD
@@ -505,7 +518,7 @@ config VIRTIO_BLK
505config BLK_DEV_HD 518config BLK_DEV_HD
506 bool "Very old hard disk (MFM/RLL/IDE) driver" 519 bool "Very old hard disk (MFM/RLL/IDE) driver"
507 depends on HAVE_IDE 520 depends on HAVE_IDE
508 depends on !ARM || ARCH_RPC || ARCH_SHARK || BROKEN 521 depends on !ARM || ARCH_RPC || BROKEN
509 help 522 help
510 This is a very old hard disk driver that lacks the enhanced 523 This is a very old hard disk driver that lacks the enhanced
511 functionality of the newer ones. 524 functionality of the newer ones.
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index d521b5a081b9..816d979c3266 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o
23obj-$(CONFIG_MG_DISK) += mg_disk.o 23obj-$(CONFIG_MG_DISK) += mg_disk.o
24obj-$(CONFIG_SUNVDC) += sunvdc.o 24obj-$(CONFIG_SUNVDC) += sunvdc.o
25obj-$(CONFIG_BLK_DEV_NVME) += nvme.o 25obj-$(CONFIG_BLK_DEV_NVME) += nvme.o
26obj-$(CONFIG_BLK_DEV_SKD) += skd.o
26obj-$(CONFIG_BLK_DEV_OSD) += osdblk.o 27obj-$(CONFIG_BLK_DEV_OSD) += osdblk.o
27 28
28obj-$(CONFIG_BLK_DEV_UMEM) += umem.o 29obj-$(CONFIG_BLK_DEV_UMEM) += umem.o
@@ -40,6 +41,8 @@ obj-$(CONFIG_BLK_DEV_RBD) += rbd.o
40obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/ 41obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/
41 42
42obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/ 43obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/
44obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o
43 45
44nvme-y := nvme-core.o nvme-scsi.o 46nvme-y := nvme-core.o nvme-scsi.o
47skd-y := skd_main.o
45swim_mod-y := swim.o swim_asm.o 48swim_mod-y := swim.o swim_asm.o
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 4ff85b8785ee..748dea4f34dc 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -343,7 +343,7 @@ static int fd_motor_on(int nr)
343 unit[nr].motor = 1; 343 unit[nr].motor = 1;
344 fd_select(nr); 344 fd_select(nr);
345 345
346 INIT_COMPLETION(motor_on_completion); 346 reinit_completion(&motor_on_completion);
347 motor_on_timer.data = nr; 347 motor_on_timer.data = nr;
348 mod_timer(&motor_on_timer, jiffies + HZ/2); 348 mod_timer(&motor_on_timer, jiffies + HZ/2);
349 349
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index 025c41d3cb33..14a9d1912318 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -1,5 +1,5 @@
1/* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */ 1/* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */
2#define VERSION "83" 2#define VERSION "85"
3#define AOE_MAJOR 152 3#define AOE_MAJOR 152
4#define DEVICE_NAME "aoe" 4#define DEVICE_NAME "aoe"
5 5
@@ -169,6 +169,7 @@ struct aoedev {
169 ulong ref; 169 ulong ref;
170 struct work_struct work;/* disk create work struct */ 170 struct work_struct work;/* disk create work struct */
171 struct gendisk *gd; 171 struct gendisk *gd;
172 struct dentry *debugfs;
172 struct request_queue *blkq; 173 struct request_queue *blkq;
173 struct hd_geometry geo; 174 struct hd_geometry geo;
174 sector_t ssize; 175 sector_t ssize;
@@ -206,6 +207,7 @@ struct ktstate {
206int aoeblk_init(void); 207int aoeblk_init(void);
207void aoeblk_exit(void); 208void aoeblk_exit(void);
208void aoeblk_gdalloc(void *); 209void aoeblk_gdalloc(void *);
210void aoedisk_rm_debugfs(struct aoedev *d);
209void aoedisk_rm_sysfs(struct aoedev *d); 211void aoedisk_rm_sysfs(struct aoedev *d);
210 212
211int aoechr_init(void); 213int aoechr_init(void);
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 916d9ed5c8aa..dd73e1ff1759 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -1,4 +1,4 @@
1/* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */ 1/* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */
2/* 2/*
3 * aoeblk.c 3 * aoeblk.c
4 * block device routines 4 * block device routines
@@ -17,11 +17,13 @@
17#include <linux/mutex.h> 17#include <linux/mutex.h>
18#include <linux/export.h> 18#include <linux/export.h>
19#include <linux/moduleparam.h> 19#include <linux/moduleparam.h>
20#include <linux/debugfs.h>
20#include <scsi/sg.h> 21#include <scsi/sg.h>
21#include "aoe.h" 22#include "aoe.h"
22 23
23static DEFINE_MUTEX(aoeblk_mutex); 24static DEFINE_MUTEX(aoeblk_mutex);
24static struct kmem_cache *buf_pool_cache; 25static struct kmem_cache *buf_pool_cache;
26static struct dentry *aoe_debugfs_dir;
25 27
26/* GPFS needs a larger value than the default. */ 28/* GPFS needs a larger value than the default. */
27static int aoe_maxsectors; 29static int aoe_maxsectors;
@@ -108,6 +110,55 @@ static ssize_t aoedisk_show_payload(struct device *dev,
108 return snprintf(page, PAGE_SIZE, "%lu\n", d->maxbcnt); 110 return snprintf(page, PAGE_SIZE, "%lu\n", d->maxbcnt);
109} 111}
110 112
113static int aoedisk_debugfs_show(struct seq_file *s, void *ignored)
114{
115 struct aoedev *d;
116 struct aoetgt **t, **te;
117 struct aoeif *ifp, *ife;
118 unsigned long flags;
119 char c;
120
121 d = s->private;
122 seq_printf(s, "rttavg: %d rttdev: %d\n",
123 d->rttavg >> RTTSCALE,
124 d->rttdev >> RTTDSCALE);
125 seq_printf(s, "nskbpool: %d\n", skb_queue_len(&d->skbpool));
126 seq_printf(s, "kicked: %ld\n", d->kicked);
127 seq_printf(s, "maxbcnt: %ld\n", d->maxbcnt);
128 seq_printf(s, "ref: %ld\n", d->ref);
129
130 spin_lock_irqsave(&d->lock, flags);
131 t = d->targets;
132 te = t + d->ntargets;
133 for (; t < te && *t; t++) {
134 c = '\t';
135 seq_printf(s, "falloc: %ld\n", (*t)->falloc);
136 seq_printf(s, "ffree: %p\n",
137 list_empty(&(*t)->ffree) ? NULL : (*t)->ffree.next);
138 seq_printf(s, "%pm:%d:%d:%d\n", (*t)->addr, (*t)->nout,
139 (*t)->maxout, (*t)->nframes);
140 seq_printf(s, "\tssthresh:%d\n", (*t)->ssthresh);
141 seq_printf(s, "\ttaint:%d\n", (*t)->taint);
142 seq_printf(s, "\tr:%d\n", (*t)->rpkts);
143 seq_printf(s, "\tw:%d\n", (*t)->wpkts);
144 ifp = (*t)->ifs;
145 ife = ifp + ARRAY_SIZE((*t)->ifs);
146 for (; ifp->nd && ifp < ife; ifp++) {
147 seq_printf(s, "%c%s", c, ifp->nd->name);
148 c = ',';
149 }
150 seq_puts(s, "\n");
151 }
152 spin_unlock_irqrestore(&d->lock, flags);
153
154 return 0;
155}
156
157static int aoe_debugfs_open(struct inode *inode, struct file *file)
158{
159 return single_open(file, aoedisk_debugfs_show, inode->i_private);
160}
161
111static DEVICE_ATTR(state, S_IRUGO, aoedisk_show_state, NULL); 162static DEVICE_ATTR(state, S_IRUGO, aoedisk_show_state, NULL);
112static DEVICE_ATTR(mac, S_IRUGO, aoedisk_show_mac, NULL); 163static DEVICE_ATTR(mac, S_IRUGO, aoedisk_show_mac, NULL);
113static DEVICE_ATTR(netif, S_IRUGO, aoedisk_show_netif, NULL); 164static DEVICE_ATTR(netif, S_IRUGO, aoedisk_show_netif, NULL);
@@ -130,6 +181,44 @@ static const struct attribute_group attr_group = {
130 .attrs = aoe_attrs, 181 .attrs = aoe_attrs,
131}; 182};
132 183
184static const struct file_operations aoe_debugfs_fops = {
185 .open = aoe_debugfs_open,
186 .read = seq_read,
187 .llseek = seq_lseek,
188 .release = single_release,
189};
190
191static void
192aoedisk_add_debugfs(struct aoedev *d)
193{
194 struct dentry *entry;
195 char *p;
196
197 if (aoe_debugfs_dir == NULL)
198 return;
199 p = strchr(d->gd->disk_name, '/');
200 if (p == NULL)
201 p = d->gd->disk_name;
202 else
203 p++;
204 BUG_ON(*p == '\0');
205 entry = debugfs_create_file(p, 0444, aoe_debugfs_dir, d,
206 &aoe_debugfs_fops);
207 if (IS_ERR_OR_NULL(entry)) {
208 pr_info("aoe: cannot create debugfs file for %s\n",
209 d->gd->disk_name);
210 return;
211 }
212 BUG_ON(d->debugfs);
213 d->debugfs = entry;
214}
215void
216aoedisk_rm_debugfs(struct aoedev *d)
217{
218 debugfs_remove(d->debugfs);
219 d->debugfs = NULL;
220}
221
133static int 222static int
134aoedisk_add_sysfs(struct aoedev *d) 223aoedisk_add_sysfs(struct aoedev *d)
135{ 224{
@@ -330,6 +419,7 @@ aoeblk_gdalloc(void *vp)
330 419
331 add_disk(gd); 420 add_disk(gd);
332 aoedisk_add_sysfs(d); 421 aoedisk_add_sysfs(d);
422 aoedisk_add_debugfs(d);
333 423
334 spin_lock_irqsave(&d->lock, flags); 424 spin_lock_irqsave(&d->lock, flags);
335 WARN_ON(!(d->flags & DEVFL_GD_NOW)); 425 WARN_ON(!(d->flags & DEVFL_GD_NOW));
@@ -351,6 +441,8 @@ err:
351void 441void
352aoeblk_exit(void) 442aoeblk_exit(void)
353{ 443{
444 debugfs_remove_recursive(aoe_debugfs_dir);
445 aoe_debugfs_dir = NULL;
354 kmem_cache_destroy(buf_pool_cache); 446 kmem_cache_destroy(buf_pool_cache);
355} 447}
356 448
@@ -362,7 +454,11 @@ aoeblk_init(void)
362 0, 0, NULL); 454 0, 0, NULL);
363 if (buf_pool_cache == NULL) 455 if (buf_pool_cache == NULL)
364 return -ENOMEM; 456 return -ENOMEM;
365 457 aoe_debugfs_dir = debugfs_create_dir("aoe", NULL);
458 if (IS_ERR_OR_NULL(aoe_debugfs_dir)) {
459 pr_info("aoe: cannot create debugfs directory\n");
460 aoe_debugfs_dir = NULL;
461 }
366 return 0; 462 return 0;
367} 463}
368 464
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 4d45dba7fb8f..d2515435e23f 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -380,7 +380,6 @@ aoecmd_ata_rw(struct aoedev *d)
380{ 380{
381 struct frame *f; 381 struct frame *f;
382 struct buf *buf; 382 struct buf *buf;
383 struct aoetgt *t;
384 struct sk_buff *skb; 383 struct sk_buff *skb;
385 struct sk_buff_head queue; 384 struct sk_buff_head queue;
386 ulong bcnt, fbcnt; 385 ulong bcnt, fbcnt;
@@ -391,7 +390,6 @@ aoecmd_ata_rw(struct aoedev *d)
391 f = newframe(d); 390 f = newframe(d);
392 if (f == NULL) 391 if (f == NULL)
393 return 0; 392 return 0;
394 t = *d->tgt;
395 bcnt = d->maxbcnt; 393 bcnt = d->maxbcnt;
396 if (bcnt == 0) 394 if (bcnt == 0)
397 bcnt = DEFAULTBCNT; 395 bcnt = DEFAULTBCNT;
@@ -485,7 +483,6 @@ resend(struct aoedev *d, struct frame *f)
485 struct sk_buff *skb; 483 struct sk_buff *skb;
486 struct sk_buff_head queue; 484 struct sk_buff_head queue;
487 struct aoe_hdr *h; 485 struct aoe_hdr *h;
488 struct aoe_atahdr *ah;
489 struct aoetgt *t; 486 struct aoetgt *t;
490 char buf[128]; 487 char buf[128];
491 u32 n; 488 u32 n;
@@ -500,7 +497,6 @@ resend(struct aoedev *d, struct frame *f)
500 return; 497 return;
501 } 498 }
502 h = (struct aoe_hdr *) skb_mac_header(skb); 499 h = (struct aoe_hdr *) skb_mac_header(skb);
503 ah = (struct aoe_atahdr *) (h+1);
504 500
505 if (!(f->flags & FFL_PROBE)) { 501 if (!(f->flags & FFL_PROBE)) {
506 snprintf(buf, sizeof(buf), 502 snprintf(buf, sizeof(buf),
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index 784c92e038d1..e774c50b6842 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -12,6 +12,7 @@
12#include <linux/bitmap.h> 12#include <linux/bitmap.h>
13#include <linux/kdev_t.h> 13#include <linux/kdev_t.h>
14#include <linux/moduleparam.h> 14#include <linux/moduleparam.h>
15#include <linux/string.h>
15#include "aoe.h" 16#include "aoe.h"
16 17
17static void dummy_timer(ulong); 18static void dummy_timer(ulong);
@@ -241,16 +242,12 @@ aoedev_downdev(struct aoedev *d)
241static int 242static int
242user_req(char *s, size_t slen, struct aoedev *d) 243user_req(char *s, size_t slen, struct aoedev *d)
243{ 244{
244 char *p; 245 const char *p;
245 size_t lim; 246 size_t lim;
246 247
247 if (!d->gd) 248 if (!d->gd)
248 return 0; 249 return 0;
249 p = strrchr(d->gd->disk_name, '/'); 250 p = kbasename(d->gd->disk_name);
250 if (!p)
251 p = d->gd->disk_name;
252 else
253 p += 1;
254 lim = sizeof(d->gd->disk_name); 251 lim = sizeof(d->gd->disk_name);
255 lim -= p - d->gd->disk_name; 252 lim -= p - d->gd->disk_name;
256 if (slen < lim) 253 if (slen < lim)
@@ -278,6 +275,7 @@ freedev(struct aoedev *d)
278 275
279 del_timer_sync(&d->timer); 276 del_timer_sync(&d->timer);
280 if (d->gd) { 277 if (d->gd) {
278 aoedisk_rm_debugfs(d);
281 aoedisk_rm_sysfs(d); 279 aoedisk_rm_sysfs(d);
282 del_gendisk(d->gd); 280 del_gendisk(d->gd);
283 put_disk(d->gd); 281 put_disk(d->gd);
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 9bf4371755f2..d91f1a56e861 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -545,7 +545,7 @@ static struct kobject *brd_probe(dev_t dev, int *part, void *data)
545 545
546 mutex_lock(&brd_devices_mutex); 546 mutex_lock(&brd_devices_mutex);
547 brd = brd_init_one(MINOR(dev) >> part_shift); 547 brd = brd_init_one(MINOR(dev) >> part_shift);
548 kobj = brd ? get_disk(brd->brd_disk) : ERR_PTR(-ENOMEM); 548 kobj = brd ? get_disk(brd->brd_disk) : NULL;
549 mutex_unlock(&brd_devices_mutex); 549 mutex_unlock(&brd_devices_mutex);
550 550
551 *part = 0; 551 *part = 0;
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 62b6c2cc80b5..b35fc4f5237c 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1189,6 +1189,7 @@ static int cciss_ioctl32_passthru(struct block_device *bdev, fmode_t mode,
1189 int err; 1189 int err;
1190 u32 cp; 1190 u32 cp;
1191 1191
1192 memset(&arg64, 0, sizeof(arg64));
1192 err = 0; 1193 err = 0;
1193 err |= 1194 err |=
1194 copy_from_user(&arg64.LUN_info, &arg32->LUN_info, 1195 copy_from_user(&arg64.LUN_info, &arg32->LUN_info,
@@ -2807,7 +2808,7 @@ resend_cmd2:
2807 /* erase the old error information */ 2808 /* erase the old error information */
2808 memset(c->err_info, 0, sizeof(ErrorInfo_struct)); 2809 memset(c->err_info, 0, sizeof(ErrorInfo_struct));
2809 return_status = IO_OK; 2810 return_status = IO_OK;
2810 INIT_COMPLETION(wait); 2811 reinit_completion(&wait);
2811 goto resend_cmd2; 2812 goto resend_cmd2;
2812 } 2813 }
2813 2814
@@ -3668,7 +3669,7 @@ static int add_to_scan_list(struct ctlr_info *h)
3668 } 3669 }
3669 } 3670 }
3670 if (!found && !h->busy_scanning) { 3671 if (!found && !h->busy_scanning) {
3671 INIT_COMPLETION(h->scan_wait); 3672 reinit_completion(&h->scan_wait);
3672 list_add_tail(&h->scan_list, &scan_q); 3673 list_add_tail(&h->scan_list, &scan_q);
3673 ret = 1; 3674 ret = 1;
3674 } 3675 }
@@ -4258,6 +4259,13 @@ static void cciss_find_board_params(ctlr_info_t *h)
4258 h->nr_cmds = h->max_commands - 4 - cciss_tape_cmds; 4259 h->nr_cmds = h->max_commands - 4 - cciss_tape_cmds;
4259 h->maxsgentries = readl(&(h->cfgtable->MaxSGElements)); 4260 h->maxsgentries = readl(&(h->cfgtable->MaxSGElements));
4260 /* 4261 /*
4262 * The P600 may exhibit poor performnace under some workloads
4263 * if we use the value in the configuration table. Limit this
4264 * controller to MAXSGENTRIES (32) instead.
4265 */
4266 if (h->board_id == 0x3225103C)
4267 h->maxsgentries = MAXSGENTRIES;
4268 /*
4261 * Limit in-command s/g elements to 32 save dma'able memory. 4269 * Limit in-command s/g elements to 32 save dma'able memory.
4262 * Howvever spec says if 0, use 31 4270 * Howvever spec says if 0, use 31
4263 */ 4271 */
@@ -5175,7 +5183,7 @@ reinit_after_soft_reset:
5175 rebuild_lun_table(h, 1, 0); 5183 rebuild_lun_table(h, 1, 0);
5176 cciss_engage_scsi(h); 5184 cciss_engage_scsi(h);
5177 h->busy_initializing = 0; 5185 h->busy_initializing = 0;
5178 return 1; 5186 return 0;
5179 5187
5180clean4: 5188clean4:
5181 cciss_free_cmd_pool(h); 5189 cciss_free_cmd_pool(h);
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index 639d26b90b91..2b9440384536 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -1193,6 +1193,7 @@ out_passthru:
1193 ida_pci_info_struct pciinfo; 1193 ida_pci_info_struct pciinfo;
1194 1194
1195 if (!arg) return -EINVAL; 1195 if (!arg) return -EINVAL;
1196 memset(&pciinfo, 0, sizeof(pciinfo));
1196 pciinfo.bus = host->pci_dev->bus->number; 1197 pciinfo.bus = host->pci_dev->bus->number;
1197 pciinfo.dev_fn = host->pci_dev->devfn; 1198 pciinfo.dev_fn = host->pci_dev->devfn;
1198 pciinfo.board_id = host->board_id; 1199 pciinfo.board_id = host->board_id;
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 2d7f608d181c..0e06f0c5dd1e 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1474,7 +1474,8 @@ enum determine_dev_size {
1474 DS_ERROR = -1, 1474 DS_ERROR = -1,
1475 DS_UNCHANGED = 0, 1475 DS_UNCHANGED = 0,
1476 DS_SHRUNK = 1, 1476 DS_SHRUNK = 1,
1477 DS_GREW = 2 1477 DS_GREW = 2,
1478 DS_GREW_FROM_ZERO = 3,
1478}; 1479};
1479extern enum determine_dev_size 1480extern enum determine_dev_size
1480drbd_determine_dev_size(struct drbd_conf *, enum dds_flags, struct resize_parms *) __must_hold(local); 1481drbd_determine_dev_size(struct drbd_conf *, enum dds_flags, struct resize_parms *) __must_hold(local);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 55635edf563b..9e3818b1bc83 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2750,13 +2750,6 @@ int __init drbd_init(void)
2750 return err; 2750 return err;
2751 } 2751 }
2752 2752
2753 err = drbd_genl_register();
2754 if (err) {
2755 printk(KERN_ERR "drbd: unable to register generic netlink family\n");
2756 goto fail;
2757 }
2758
2759
2760 register_reboot_notifier(&drbd_notifier); 2753 register_reboot_notifier(&drbd_notifier);
2761 2754
2762 /* 2755 /*
@@ -2767,6 +2760,15 @@ int __init drbd_init(void)
2767 drbd_proc = NULL; /* play safe for drbd_cleanup */ 2760 drbd_proc = NULL; /* play safe for drbd_cleanup */
2768 idr_init(&minors); 2761 idr_init(&minors);
2769 2762
2763 rwlock_init(&global_state_lock);
2764 INIT_LIST_HEAD(&drbd_tconns);
2765
2766 err = drbd_genl_register();
2767 if (err) {
2768 printk(KERN_ERR "drbd: unable to register generic netlink family\n");
2769 goto fail;
2770 }
2771
2770 err = drbd_create_mempools(); 2772 err = drbd_create_mempools();
2771 if (err) 2773 if (err)
2772 goto fail; 2774 goto fail;
@@ -2778,9 +2780,6 @@ int __init drbd_init(void)
2778 goto fail; 2780 goto fail;
2779 } 2781 }
2780 2782
2781 rwlock_init(&global_state_lock);
2782 INIT_LIST_HEAD(&drbd_tconns);
2783
2784 retry.wq = create_singlethread_workqueue("drbd-reissue"); 2783 retry.wq = create_singlethread_workqueue("drbd-reissue");
2785 if (!retry.wq) { 2784 if (!retry.wq) {
2786 printk(KERN_ERR "drbd: unable to create retry workqueue\n"); 2785 printk(KERN_ERR "drbd: unable to create retry workqueue\n");
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 8cc1e640f485..c706d50a8b06 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -955,7 +955,7 @@ drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags, struct res
955 } 955 }
956 956
957 if (size > la_size_sect) 957 if (size > la_size_sect)
958 rv = DS_GREW; 958 rv = la_size_sect ? DS_GREW : DS_GREW_FROM_ZERO;
959 if (size < la_size_sect) 959 if (size < la_size_sect)
960 rv = DS_SHRUNK; 960 rv = DS_SHRUNK;
961 961
@@ -1132,9 +1132,9 @@ void drbd_reconsider_max_bio_size(struct drbd_conf *mdev)
1132 /* We may ignore peer limits if the peer is modern enough. 1132 /* We may ignore peer limits if the peer is modern enough.
1133 Because new from 8.3.8 onwards the peer can use multiple 1133 Because new from 8.3.8 onwards the peer can use multiple
1134 BIOs for a single peer_request */ 1134 BIOs for a single peer_request */
1135 if (mdev->state.conn >= C_CONNECTED) { 1135 if (mdev->state.conn >= C_WF_REPORT_PARAMS) {
1136 if (mdev->tconn->agreed_pro_version < 94) 1136 if (mdev->tconn->agreed_pro_version < 94)
1137 peer = min( mdev->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET); 1137 peer = min(mdev->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
1138 /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */ 1138 /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */
1139 else if (mdev->tconn->agreed_pro_version == 94) 1139 else if (mdev->tconn->agreed_pro_version == 94)
1140 peer = DRBD_MAX_SIZE_H80_PACKET; 1140 peer = DRBD_MAX_SIZE_H80_PACKET;
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index cc29cd3bf78b..6fa6673b36b3 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1890,29 +1890,11 @@ static u32 seq_max(u32 a, u32 b)
1890 return seq_greater(a, b) ? a : b; 1890 return seq_greater(a, b) ? a : b;
1891} 1891}
1892 1892
1893static bool need_peer_seq(struct drbd_conf *mdev)
1894{
1895 struct drbd_tconn *tconn = mdev->tconn;
1896 int tp;
1897
1898 /*
1899 * We only need to keep track of the last packet_seq number of our peer
1900 * if we are in dual-primary mode and we have the resolve-conflicts flag set; see
1901 * handle_write_conflicts().
1902 */
1903
1904 rcu_read_lock();
1905 tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries;
1906 rcu_read_unlock();
1907
1908 return tp && test_bit(RESOLVE_CONFLICTS, &tconn->flags);
1909}
1910
1911static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq) 1893static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
1912{ 1894{
1913 unsigned int newest_peer_seq; 1895 unsigned int newest_peer_seq;
1914 1896
1915 if (need_peer_seq(mdev)) { 1897 if (test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags)) {
1916 spin_lock(&mdev->peer_seq_lock); 1898 spin_lock(&mdev->peer_seq_lock);
1917 newest_peer_seq = seq_max(mdev->peer_seq, peer_seq); 1899 newest_peer_seq = seq_max(mdev->peer_seq, peer_seq);
1918 mdev->peer_seq = newest_peer_seq; 1900 mdev->peer_seq = newest_peer_seq;
@@ -1972,22 +1954,31 @@ static int wait_for_and_update_peer_seq(struct drbd_conf *mdev, const u32 peer_s
1972{ 1954{
1973 DEFINE_WAIT(wait); 1955 DEFINE_WAIT(wait);
1974 long timeout; 1956 long timeout;
1975 int ret; 1957 int ret = 0, tp;
1976 1958
1977 if (!need_peer_seq(mdev)) 1959 if (!test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags))
1978 return 0; 1960 return 0;
1979 1961
1980 spin_lock(&mdev->peer_seq_lock); 1962 spin_lock(&mdev->peer_seq_lock);
1981 for (;;) { 1963 for (;;) {
1982 if (!seq_greater(peer_seq - 1, mdev->peer_seq)) { 1964 if (!seq_greater(peer_seq - 1, mdev->peer_seq)) {
1983 mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq); 1965 mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
1984 ret = 0;
1985 break; 1966 break;
1986 } 1967 }
1968
1987 if (signal_pending(current)) { 1969 if (signal_pending(current)) {
1988 ret = -ERESTARTSYS; 1970 ret = -ERESTARTSYS;
1989 break; 1971 break;
1990 } 1972 }
1973
1974 rcu_read_lock();
1975 tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries;
1976 rcu_read_unlock();
1977
1978 if (!tp)
1979 break;
1980
1981 /* Only need to wait if two_primaries is enabled */
1991 prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE); 1982 prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
1992 spin_unlock(&mdev->peer_seq_lock); 1983 spin_unlock(&mdev->peer_seq_lock);
1993 rcu_read_lock(); 1984 rcu_read_lock();
@@ -2228,8 +2219,10 @@ static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi)
2228 } 2219 }
2229 goto out_interrupted; 2220 goto out_interrupted;
2230 } 2221 }
2231 } else 2222 } else {
2223 update_peer_seq(mdev, peer_seq);
2232 spin_lock_irq(&mdev->tconn->req_lock); 2224 spin_lock_irq(&mdev->tconn->req_lock);
2225 }
2233 list_add(&peer_req->w.list, &mdev->active_ee); 2226 list_add(&peer_req->w.list, &mdev->active_ee);
2234 spin_unlock_irq(&mdev->tconn->req_lock); 2227 spin_unlock_irq(&mdev->tconn->req_lock);
2235 2228
@@ -4132,7 +4125,11 @@ recv_bm_rle_bits(struct drbd_conf *mdev,
4132 (unsigned int)bs.buf_len); 4125 (unsigned int)bs.buf_len);
4133 return -EIO; 4126 return -EIO;
4134 } 4127 }
4135 look_ahead >>= bits; 4128 /* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4129 if (likely(bits < 64))
4130 look_ahead >>= bits;
4131 else
4132 look_ahead = 0;
4136 have -= bits; 4133 have -= bits;
4137 4134
4138 bits = bitstream_get_bits(&bs, &tmp, 64 - have); 4135 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index c24379ffd4e3..fec7bef44994 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -1306,6 +1306,7 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct
1306 int backing_limit; 1306 int backing_limit;
1307 1307
1308 if (bio_size && get_ldev(mdev)) { 1308 if (bio_size && get_ldev(mdev)) {
1309 unsigned int max_hw_sectors = queue_max_hw_sectors(q);
1309 struct request_queue * const b = 1310 struct request_queue * const b =
1310 mdev->ldev->backing_bdev->bd_disk->queue; 1311 mdev->ldev->backing_bdev->bd_disk->queue;
1311 if (b->merge_bvec_fn) { 1312 if (b->merge_bvec_fn) {
@@ -1313,6 +1314,8 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct
1313 limit = min(limit, backing_limit); 1314 limit = min(limit, backing_limit);
1314 } 1315 }
1315 put_ldev(mdev); 1316 put_ldev(mdev);
1317 if ((limit >> 9) > max_hw_sectors)
1318 limit = max_hw_sectors << 9;
1316 } 1319 }
1317 return limit; 1320 return limit;
1318} 1321}
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 04ceb7e2fadd..000abe2f105c 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -2886,9 +2886,9 @@ static void do_fd_request(struct request_queue *q)
2886 return; 2886 return;
2887 2887
2888 if (WARN(atomic_read(&usage_count) == 0, 2888 if (WARN(atomic_read(&usage_count) == 0,
2889 "warning: usage count=0, current_req=%p sect=%ld type=%x flags=%x\n", 2889 "warning: usage count=0, current_req=%p sect=%ld type=%x flags=%llx\n",
2890 current_req, (long)blk_rq_pos(current_req), current_req->cmd_type, 2890 current_req, (long)blk_rq_pos(current_req), current_req->cmd_type,
2891 current_req->cmd_flags)) 2891 (unsigned long long) current_req->cmd_flags))
2892 return; 2892 return;
2893 2893
2894 if (test_and_set_bit(0, &fdc_busy)) { 2894 if (test_and_set_bit(0, &fdc_busy)) {
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 40e715531aa6..c8dac7305244 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -894,13 +894,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
894 894
895 bio_list_init(&lo->lo_bio_list); 895 bio_list_init(&lo->lo_bio_list);
896 896
897 /*
898 * set queue make_request_fn, and add limits based on lower level
899 * device
900 */
901 blk_queue_make_request(lo->lo_queue, loop_make_request);
902 lo->lo_queue->queuedata = lo;
903
904 if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync) 897 if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
905 blk_queue_flush(lo->lo_queue, REQ_FLUSH); 898 blk_queue_flush(lo->lo_queue, REQ_FLUSH);
906 899
@@ -1618,6 +1611,8 @@ static int loop_add(struct loop_device **l, int i)
1618 if (!lo) 1611 if (!lo)
1619 goto out; 1612 goto out;
1620 1613
1614 lo->lo_state = Lo_unbound;
1615
1621 /* allocate id, if @id >= 0, we're requesting that specific id */ 1616 /* allocate id, if @id >= 0, we're requesting that specific id */
1622 if (i >= 0) { 1617 if (i >= 0) {
1623 err = idr_alloc(&loop_index_idr, lo, i, i + 1, GFP_KERNEL); 1618 err = idr_alloc(&loop_index_idr, lo, i, i + 1, GFP_KERNEL);
@@ -1633,7 +1628,13 @@ static int loop_add(struct loop_device **l, int i)
1633 err = -ENOMEM; 1628 err = -ENOMEM;
1634 lo->lo_queue = blk_alloc_queue(GFP_KERNEL); 1629 lo->lo_queue = blk_alloc_queue(GFP_KERNEL);
1635 if (!lo->lo_queue) 1630 if (!lo->lo_queue)
1636 goto out_free_dev; 1631 goto out_free_idr;
1632
1633 /*
1634 * set queue make_request_fn
1635 */
1636 blk_queue_make_request(lo->lo_queue, loop_make_request);
1637 lo->lo_queue->queuedata = lo;
1637 1638
1638 disk = lo->lo_disk = alloc_disk(1 << part_shift); 1639 disk = lo->lo_disk = alloc_disk(1 << part_shift);
1639 if (!disk) 1640 if (!disk)
@@ -1678,6 +1679,8 @@ static int loop_add(struct loop_device **l, int i)
1678 1679
1679out_free_queue: 1680out_free_queue:
1680 blk_cleanup_queue(lo->lo_queue); 1681 blk_cleanup_queue(lo->lo_queue);
1682out_free_idr:
1683 idr_remove(&loop_index_idr, i);
1681out_free_dev: 1684out_free_dev:
1682 kfree(lo); 1685 kfree(lo);
1683out: 1686out:
@@ -1741,7 +1744,7 @@ static struct kobject *loop_probe(dev_t dev, int *part, void *data)
1741 if (err < 0) 1744 if (err < 0)
1742 err = loop_add(&lo, MINOR(dev) >> part_shift); 1745 err = loop_add(&lo, MINOR(dev) >> part_shift);
1743 if (err < 0) 1746 if (err < 0)
1744 kobj = ERR_PTR(err); 1747 kobj = NULL;
1745 else 1748 else
1746 kobj = get_disk(lo->lo_disk); 1749 kobj = get_disk(lo->lo_disk);
1747 mutex_unlock(&loop_index_mutex); 1750 mutex_unlock(&loop_index_mutex);
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index a56cfcd5d648..7bc363f1ee82 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -636,7 +636,7 @@ ok_to_write:
636 mg_request(host->breq); 636 mg_request(host->breq);
637} 637}
638 638
639void mg_times_out(unsigned long data) 639static void mg_times_out(unsigned long data)
640{ 640{
641 struct mg_host *host = (struct mg_host *)data; 641 struct mg_host *host = (struct mg_host *)data;
642 char *name; 642 char *name;
@@ -936,7 +936,7 @@ static int mg_probe(struct platform_device *plat_dev)
936 goto probe_err_3b; 936 goto probe_err_3b;
937 } 937 }
938 err = request_irq(host->irq, mg_irq, 938 err = request_irq(host->irq, mg_irq,
939 IRQF_DISABLED | IRQF_TRIGGER_RISING, 939 IRQF_TRIGGER_RISING,
940 MG_DEV_NAME, host); 940 MG_DEV_NAME, host);
941 if (err) { 941 if (err) {
942 printk(KERN_ERR "%s:%d fail (request_irq err=%d)\n", 942 printk(KERN_ERR "%s:%d fail (request_irq err=%d)\n",
diff --git a/drivers/block/mtip32xx/Kconfig b/drivers/block/mtip32xx/Kconfig
index 1fca1f996b45..0ba837fc62a8 100644
--- a/drivers/block/mtip32xx/Kconfig
+++ b/drivers/block/mtip32xx/Kconfig
@@ -4,6 +4,6 @@
4 4
5config BLK_DEV_PCIESSD_MTIP32XX 5config BLK_DEV_PCIESSD_MTIP32XX
6 tristate "Block Device Driver for Micron PCIe SSDs" 6 tristate "Block Device Driver for Micron PCIe SSDs"
7 depends on PCI && GENERIC_HARDIRQS 7 depends on PCI
8 help 8 help
9 This enables the block driver for Micron PCIe SSDs. 9 This enables the block driver for Micron PCIe SSDs.
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 952dbfe22126..050c71267f14 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -126,64 +126,30 @@ struct mtip_compat_ide_task_request_s {
126static bool mtip_check_surprise_removal(struct pci_dev *pdev) 126static bool mtip_check_surprise_removal(struct pci_dev *pdev)
127{ 127{
128 u16 vendor_id = 0; 128 u16 vendor_id = 0;
129 struct driver_data *dd = pci_get_drvdata(pdev);
130
131 if (dd->sr)
132 return true;
129 133
130 /* Read the vendorID from the configuration space */ 134 /* Read the vendorID from the configuration space */
131 pci_read_config_word(pdev, 0x00, &vendor_id); 135 pci_read_config_word(pdev, 0x00, &vendor_id);
132 if (vendor_id == 0xFFFF) 136 if (vendor_id == 0xFFFF) {
137 dd->sr = true;
138 if (dd->queue)
139 set_bit(QUEUE_FLAG_DEAD, &dd->queue->queue_flags);
140 else
141 dev_warn(&dd->pdev->dev,
142 "%s: dd->queue is NULL\n", __func__);
143 if (dd->port) {
144 set_bit(MTIP_PF_SR_CLEANUP_BIT, &dd->port->flags);
145 wake_up_interruptible(&dd->port->svc_wait);
146 } else
147 dev_warn(&dd->pdev->dev,
148 "%s: dd->port is NULL\n", __func__);
133 return true; /* device removed */ 149 return true; /* device removed */
134
135 return false; /* device present */
136}
137
138/*
139 * This function is called for clean the pending command in the
140 * command slot during the surprise removal of device and return
141 * error to the upper layer.
142 *
143 * @dd Pointer to the DRIVER_DATA structure.
144 *
145 * return value
146 * None
147 */
148static void mtip_command_cleanup(struct driver_data *dd)
149{
150 int group = 0, commandslot = 0, commandindex = 0;
151 struct mtip_cmd *command;
152 struct mtip_port *port = dd->port;
153 static int in_progress;
154
155 if (in_progress)
156 return;
157
158 in_progress = 1;
159
160 for (group = 0; group < 4; group++) {
161 for (commandslot = 0; commandslot < 32; commandslot++) {
162 if (!(port->allocated[group] & (1 << commandslot)))
163 continue;
164
165 commandindex = group << 5 | commandslot;
166 command = &port->commands[commandindex];
167
168 if (atomic_read(&command->active)
169 && (command->async_callback)) {
170 command->async_callback(command->async_data,
171 -ENODEV);
172 command->async_callback = NULL;
173 command->async_data = NULL;
174 }
175
176 dma_unmap_sg(&port->dd->pdev->dev,
177 command->sg,
178 command->scatter_ents,
179 command->direction);
180 }
181 } 150 }
182 151
183 up(&port->cmd_slot); 152 return false; /* device present */
184
185 set_bit(MTIP_DDF_CLEANUP_BIT, &dd->dd_flag);
186 in_progress = 0;
187} 153}
188 154
189/* 155/*
@@ -222,10 +188,7 @@ static int get_slot(struct mtip_port *port)
222 } 188 }
223 dev_warn(&port->dd->pdev->dev, "Failed to get a tag.\n"); 189 dev_warn(&port->dd->pdev->dev, "Failed to get a tag.\n");
224 190
225 if (mtip_check_surprise_removal(port->dd->pdev)) { 191 mtip_check_surprise_removal(port->dd->pdev);
226 /* Device not present, clean outstanding commands */
227 mtip_command_cleanup(port->dd);
228 }
229 return -1; 192 return -1;
230} 193}
231 194
@@ -246,6 +209,107 @@ static inline void release_slot(struct mtip_port *port, int tag)
246} 209}
247 210
248/* 211/*
212 * IO completion function.
213 *
214 * This completion function is called by the driver ISR when a
215 * command that was issued by the kernel completes. It first calls the
216 * asynchronous completion function which normally calls back into the block
217 * layer passing the asynchronous callback data, then unmaps the
218 * scatter list associated with the completed command, and finally
219 * clears the allocated bit associated with the completed command.
220 *
221 * @port Pointer to the port data structure.
222 * @tag Tag of the command.
223 * @data Pointer to driver_data.
224 * @status Completion status.
225 *
226 * return value
227 * None
228 */
229static void mtip_async_complete(struct mtip_port *port,
230 int tag,
231 void *data,
232 int status)
233{
234 struct mtip_cmd *command;
235 struct driver_data *dd = data;
236 int cb_status = status ? -EIO : 0;
237
238 if (unlikely(!dd) || unlikely(!port))
239 return;
240
241 command = &port->commands[tag];
242
243 if (unlikely(status == PORT_IRQ_TF_ERR)) {
244 dev_warn(&port->dd->pdev->dev,
245 "Command tag %d failed due to TFE\n", tag);
246 }
247
248 /* Upper layer callback */
249 if (likely(command->async_callback))
250 command->async_callback(command->async_data, cb_status);
251
252 command->async_callback = NULL;
253 command->comp_func = NULL;
254
255 /* Unmap the DMA scatter list entries */
256 dma_unmap_sg(&dd->pdev->dev,
257 command->sg,
258 command->scatter_ents,
259 command->direction);
260
261 /* Clear the allocated and active bits for the command */
262 atomic_set(&port->commands[tag].active, 0);
263 release_slot(port, tag);
264
265 up(&port->cmd_slot);
266}
267
268/*
269 * This function is called for clean the pending command in the
270 * command slot during the surprise removal of device and return
271 * error to the upper layer.
272 *
273 * @dd Pointer to the DRIVER_DATA structure.
274 *
275 * return value
276 * None
277 */
278static void mtip_command_cleanup(struct driver_data *dd)
279{
280 int tag = 0;
281 struct mtip_cmd *cmd;
282 struct mtip_port *port = dd->port;
283 unsigned int num_cmd_slots = dd->slot_groups * 32;
284
285 if (!test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag))
286 return;
287
288 if (!port)
289 return;
290
291 cmd = &port->commands[MTIP_TAG_INTERNAL];
292 if (atomic_read(&cmd->active))
293 if (readl(port->cmd_issue[MTIP_TAG_INTERNAL]) &
294 (1 << MTIP_TAG_INTERNAL))
295 if (cmd->comp_func)
296 cmd->comp_func(port, MTIP_TAG_INTERNAL,
297 cmd->comp_data, -ENODEV);
298
299 while (1) {
300 tag = find_next_bit(port->allocated, num_cmd_slots, tag);
301 if (tag >= num_cmd_slots)
302 break;
303
304 cmd = &port->commands[tag];
305 if (atomic_read(&cmd->active))
306 mtip_async_complete(port, tag, dd, -ENODEV);
307 }
308
309 set_bit(MTIP_DDF_CLEANUP_BIT, &dd->dd_flag);
310}
311
312/*
249 * Reset the HBA (without sleeping) 313 * Reset the HBA (without sleeping)
250 * 314 *
251 * @dd Pointer to the driver data structure. 315 * @dd Pointer to the driver data structure.
@@ -584,6 +648,9 @@ static void mtip_timeout_function(unsigned long int data)
584 if (unlikely(!port)) 648 if (unlikely(!port))
585 return; 649 return;
586 650
651 if (unlikely(port->dd->sr))
652 return;
653
587 if (test_bit(MTIP_DDF_RESUME_BIT, &port->dd->dd_flag)) { 654 if (test_bit(MTIP_DDF_RESUME_BIT, &port->dd->dd_flag)) {
588 mod_timer(&port->cmd_timer, 655 mod_timer(&port->cmd_timer,
589 jiffies + msecs_to_jiffies(30000)); 656 jiffies + msecs_to_jiffies(30000));
@@ -675,66 +742,6 @@ static void mtip_timeout_function(unsigned long int data)
675} 742}
676 743
677/* 744/*
678 * IO completion function.
679 *
680 * This completion function is called by the driver ISR when a
681 * command that was issued by the kernel completes. It first calls the
682 * asynchronous completion function which normally calls back into the block
683 * layer passing the asynchronous callback data, then unmaps the
684 * scatter list associated with the completed command, and finally
685 * clears the allocated bit associated with the completed command.
686 *
687 * @port Pointer to the port data structure.
688 * @tag Tag of the command.
689 * @data Pointer to driver_data.
690 * @status Completion status.
691 *
692 * return value
693 * None
694 */
695static void mtip_async_complete(struct mtip_port *port,
696 int tag,
697 void *data,
698 int status)
699{
700 struct mtip_cmd *command;
701 struct driver_data *dd = data;
702 int cb_status = status ? -EIO : 0;
703
704 if (unlikely(!dd) || unlikely(!port))
705 return;
706
707 command = &port->commands[tag];
708
709 if (unlikely(status == PORT_IRQ_TF_ERR)) {
710 dev_warn(&port->dd->pdev->dev,
711 "Command tag %d failed due to TFE\n", tag);
712 }
713
714 /* Upper layer callback */
715 if (likely(command->async_callback))
716 command->async_callback(command->async_data, cb_status);
717
718 command->async_callback = NULL;
719 command->comp_func = NULL;
720
721 /* Unmap the DMA scatter list entries */
722 dma_unmap_sg(&dd->pdev->dev,
723 command->sg,
724 command->scatter_ents,
725 command->direction);
726
727 /* Clear the allocated and active bits for the command */
728 atomic_set(&port->commands[tag].active, 0);
729 release_slot(port, tag);
730
731 if (unlikely(command->unaligned))
732 up(&port->cmd_slot_unal);
733 else
734 up(&port->cmd_slot);
735}
736
737/*
738 * Internal command completion callback function. 745 * Internal command completion callback function.
739 * 746 *
740 * This function is normally called by the driver ISR when an internal 747 * This function is normally called by the driver ISR when an internal
@@ -854,7 +861,6 @@ static void mtip_handle_tfe(struct driver_data *dd)
854 "Missing completion func for tag %d", 861 "Missing completion func for tag %d",
855 tag); 862 tag);
856 if (mtip_check_surprise_removal(dd->pdev)) { 863 if (mtip_check_surprise_removal(dd->pdev)) {
857 mtip_command_cleanup(dd);
858 /* don't proceed further */ 864 /* don't proceed further */
859 return; 865 return;
860 } 866 }
@@ -1018,14 +1024,12 @@ static inline void mtip_workq_sdbfx(struct mtip_port *port, int group,
1018 command->comp_data, 1024 command->comp_data,
1019 0); 1025 0);
1020 } else { 1026 } else {
1021 dev_warn(&dd->pdev->dev, 1027 dev_dbg(&dd->pdev->dev,
1022 "Null completion " 1028 "Null completion for tag %d",
1023 "for tag %d",
1024 tag); 1029 tag);
1025 1030
1026 if (mtip_check_surprise_removal( 1031 if (mtip_check_surprise_removal(
1027 dd->pdev)) { 1032 dd->pdev)) {
1028 mtip_command_cleanup(dd);
1029 return; 1033 return;
1030 } 1034 }
1031 } 1035 }
@@ -1145,7 +1149,6 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data)
1145 1149
1146 if (unlikely(port_stat & PORT_IRQ_ERR)) { 1150 if (unlikely(port_stat & PORT_IRQ_ERR)) {
1147 if (unlikely(mtip_check_surprise_removal(dd->pdev))) { 1151 if (unlikely(mtip_check_surprise_removal(dd->pdev))) {
1148 mtip_command_cleanup(dd);
1149 /* don't proceed further */ 1152 /* don't proceed further */
1150 return IRQ_HANDLED; 1153 return IRQ_HANDLED;
1151 } 1154 }
@@ -2806,34 +2809,51 @@ static ssize_t show_device_status(struct device_driver *drv, char *buf)
2806static ssize_t mtip_hw_read_device_status(struct file *f, char __user *ubuf, 2809static ssize_t mtip_hw_read_device_status(struct file *f, char __user *ubuf,
2807 size_t len, loff_t *offset) 2810 size_t len, loff_t *offset)
2808{ 2811{
2812 struct driver_data *dd = (struct driver_data *)f->private_data;
2809 int size = *offset; 2813 int size = *offset;
2810 char buf[MTIP_DFS_MAX_BUF_SIZE]; 2814 char *buf;
2815 int rv = 0;
2811 2816
2812 if (!len || *offset) 2817 if (!len || *offset)
2813 return 0; 2818 return 0;
2814 2819
2820 buf = kzalloc(MTIP_DFS_MAX_BUF_SIZE, GFP_KERNEL);
2821 if (!buf) {
2822 dev_err(&dd->pdev->dev,
2823 "Memory allocation: status buffer\n");
2824 return -ENOMEM;
2825 }
2826
2815 size += show_device_status(NULL, buf); 2827 size += show_device_status(NULL, buf);
2816 2828
2817 *offset = size <= len ? size : len; 2829 *offset = size <= len ? size : len;
2818 size = copy_to_user(ubuf, buf, *offset); 2830 size = copy_to_user(ubuf, buf, *offset);
2819 if (size) 2831 if (size)
2820 return -EFAULT; 2832 rv = -EFAULT;
2821 2833
2822 return *offset; 2834 kfree(buf);
2835 return rv ? rv : *offset;
2823} 2836}
2824 2837
2825static ssize_t mtip_hw_read_registers(struct file *f, char __user *ubuf, 2838static ssize_t mtip_hw_read_registers(struct file *f, char __user *ubuf,
2826 size_t len, loff_t *offset) 2839 size_t len, loff_t *offset)
2827{ 2840{
2828 struct driver_data *dd = (struct driver_data *)f->private_data; 2841 struct driver_data *dd = (struct driver_data *)f->private_data;
2829 char buf[MTIP_DFS_MAX_BUF_SIZE]; 2842 char *buf;
2830 u32 group_allocated; 2843 u32 group_allocated;
2831 int size = *offset; 2844 int size = *offset;
2832 int n; 2845 int n, rv = 0;
2833 2846
2834 if (!len || size) 2847 if (!len || size)
2835 return 0; 2848 return 0;
2836 2849
2850 buf = kzalloc(MTIP_DFS_MAX_BUF_SIZE, GFP_KERNEL);
2851 if (!buf) {
2852 dev_err(&dd->pdev->dev,
2853 "Memory allocation: register buffer\n");
2854 return -ENOMEM;
2855 }
2856
2837 size += sprintf(&buf[size], "H/ S ACTive : [ 0x"); 2857 size += sprintf(&buf[size], "H/ S ACTive : [ 0x");
2838 2858
2839 for (n = dd->slot_groups-1; n >= 0; n--) 2859 for (n = dd->slot_groups-1; n >= 0; n--)
@@ -2888,21 +2908,30 @@ static ssize_t mtip_hw_read_registers(struct file *f, char __user *ubuf,
2888 *offset = size <= len ? size : len; 2908 *offset = size <= len ? size : len;
2889 size = copy_to_user(ubuf, buf, *offset); 2909 size = copy_to_user(ubuf, buf, *offset);
2890 if (size) 2910 if (size)
2891 return -EFAULT; 2911 rv = -EFAULT;
2892 2912
2893 return *offset; 2913 kfree(buf);
2914 return rv ? rv : *offset;
2894} 2915}
2895 2916
2896static ssize_t mtip_hw_read_flags(struct file *f, char __user *ubuf, 2917static ssize_t mtip_hw_read_flags(struct file *f, char __user *ubuf,
2897 size_t len, loff_t *offset) 2918 size_t len, loff_t *offset)
2898{ 2919{
2899 struct driver_data *dd = (struct driver_data *)f->private_data; 2920 struct driver_data *dd = (struct driver_data *)f->private_data;
2900 char buf[MTIP_DFS_MAX_BUF_SIZE]; 2921 char *buf;
2901 int size = *offset; 2922 int size = *offset;
2923 int rv = 0;
2902 2924
2903 if (!len || size) 2925 if (!len || size)
2904 return 0; 2926 return 0;
2905 2927
2928 buf = kzalloc(MTIP_DFS_MAX_BUF_SIZE, GFP_KERNEL);
2929 if (!buf) {
2930 dev_err(&dd->pdev->dev,
2931 "Memory allocation: flag buffer\n");
2932 return -ENOMEM;
2933 }
2934
2906 size += sprintf(&buf[size], "Flag-port : [ %08lX ]\n", 2935 size += sprintf(&buf[size], "Flag-port : [ %08lX ]\n",
2907 dd->port->flags); 2936 dd->port->flags);
2908 size += sprintf(&buf[size], "Flag-dd : [ %08lX ]\n", 2937 size += sprintf(&buf[size], "Flag-dd : [ %08lX ]\n",
@@ -2911,9 +2940,10 @@ static ssize_t mtip_hw_read_flags(struct file *f, char __user *ubuf,
2911 *offset = size <= len ? size : len; 2940 *offset = size <= len ? size : len;
2912 size = copy_to_user(ubuf, buf, *offset); 2941 size = copy_to_user(ubuf, buf, *offset);
2913 if (size) 2942 if (size)
2914 return -EFAULT; 2943 rv = -EFAULT;
2915 2944
2916 return *offset; 2945 kfree(buf);
2946 return rv ? rv : *offset;
2917} 2947}
2918 2948
2919static const struct file_operations mtip_device_status_fops = { 2949static const struct file_operations mtip_device_status_fops = {
@@ -3006,6 +3036,46 @@ static void mtip_hw_debugfs_exit(struct driver_data *dd)
3006 debugfs_remove_recursive(dd->dfs_node); 3036 debugfs_remove_recursive(dd->dfs_node);
3007} 3037}
3008 3038
3039static int mtip_free_orphan(struct driver_data *dd)
3040{
3041 struct kobject *kobj;
3042
3043 if (dd->bdev) {
3044 if (dd->bdev->bd_holders >= 1)
3045 return -2;
3046
3047 bdput(dd->bdev);
3048 dd->bdev = NULL;
3049 }
3050
3051 mtip_hw_debugfs_exit(dd);
3052
3053 spin_lock(&rssd_index_lock);
3054 ida_remove(&rssd_index_ida, dd->index);
3055 spin_unlock(&rssd_index_lock);
3056
3057 if (!test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag) &&
3058 test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag)) {
3059 put_disk(dd->disk);
3060 } else {
3061 if (dd->disk) {
3062 kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
3063 if (kobj) {
3064 mtip_hw_sysfs_exit(dd, kobj);
3065 kobject_put(kobj);
3066 }
3067 del_gendisk(dd->disk);
3068 dd->disk = NULL;
3069 }
3070 if (dd->queue) {
3071 dd->queue->queuedata = NULL;
3072 blk_cleanup_queue(dd->queue);
3073 dd->queue = NULL;
3074 }
3075 }
3076 kfree(dd);
3077 return 0;
3078}
3009 3079
3010/* 3080/*
3011 * Perform any init/resume time hardware setup 3081 * Perform any init/resume time hardware setup
@@ -3154,6 +3224,7 @@ static int mtip_service_thread(void *data)
3154 unsigned long slot, slot_start, slot_wrap; 3224 unsigned long slot, slot_start, slot_wrap;
3155 unsigned int num_cmd_slots = dd->slot_groups * 32; 3225 unsigned int num_cmd_slots = dd->slot_groups * 32;
3156 struct mtip_port *port = dd->port; 3226 struct mtip_port *port = dd->port;
3227 int ret;
3157 3228
3158 while (1) { 3229 while (1) {
3159 /* 3230 /*
@@ -3164,13 +3235,18 @@ static int mtip_service_thread(void *data)
3164 !(port->flags & MTIP_PF_PAUSE_IO)); 3235 !(port->flags & MTIP_PF_PAUSE_IO));
3165 3236
3166 if (kthread_should_stop()) 3237 if (kthread_should_stop())
3238 goto st_out;
3239
3240 set_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags);
3241
3242 /* If I am an orphan, start self cleanup */
3243 if (test_bit(MTIP_PF_SR_CLEANUP_BIT, &port->flags))
3167 break; 3244 break;
3168 3245
3169 if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, 3246 if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
3170 &dd->dd_flag))) 3247 &dd->dd_flag)))
3171 break; 3248 goto st_out;
3172 3249
3173 set_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags);
3174 if (test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) { 3250 if (test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) {
3175 slot = 1; 3251 slot = 1;
3176 /* used to restrict the loop to one iteration */ 3252 /* used to restrict the loop to one iteration */
@@ -3201,7 +3277,7 @@ static int mtip_service_thread(void *data)
3201 3277
3202 clear_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags); 3278 clear_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags);
3203 } else if (test_bit(MTIP_PF_REBUILD_BIT, &port->flags)) { 3279 } else if (test_bit(MTIP_PF_REBUILD_BIT, &port->flags)) {
3204 if (!mtip_ftl_rebuild_poll(dd)) 3280 if (mtip_ftl_rebuild_poll(dd) < 0)
3205 set_bit(MTIP_DDF_REBUILD_FAILED_BIT, 3281 set_bit(MTIP_DDF_REBUILD_FAILED_BIT,
3206 &dd->dd_flag); 3282 &dd->dd_flag);
3207 clear_bit(MTIP_PF_REBUILD_BIT, &port->flags); 3283 clear_bit(MTIP_PF_REBUILD_BIT, &port->flags);
@@ -3209,8 +3285,30 @@ static int mtip_service_thread(void *data)
3209 clear_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags); 3285 clear_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags);
3210 3286
3211 if (test_bit(MTIP_PF_SVC_THD_STOP_BIT, &port->flags)) 3287 if (test_bit(MTIP_PF_SVC_THD_STOP_BIT, &port->flags))
3288 goto st_out;
3289 }
3290
3291 /* wait for pci remove to exit */
3292 while (1) {
3293 if (test_bit(MTIP_DDF_REMOVE_DONE_BIT, &dd->dd_flag))
3212 break; 3294 break;
3295 msleep_interruptible(1000);
3296 if (kthread_should_stop())
3297 goto st_out;
3298 }
3299
3300 while (1) {
3301 ret = mtip_free_orphan(dd);
3302 if (!ret) {
3303 /* NOTE: All data structures are invalid, do not
3304 * access any here */
3305 return 0;
3306 }
3307 msleep_interruptible(1000);
3308 if (kthread_should_stop())
3309 goto st_out;
3213 } 3310 }
3311st_out:
3214 return 0; 3312 return 0;
3215} 3313}
3216 3314
@@ -3437,13 +3535,13 @@ static int mtip_hw_init(struct driver_data *dd)
3437 rv = -EFAULT; 3535 rv = -EFAULT;
3438 goto out3; 3536 goto out3;
3439 } 3537 }
3538 mtip_dump_identify(dd->port);
3440 3539
3441 if (*(dd->port->identify + MTIP_FTL_REBUILD_OFFSET) == 3540 if (*(dd->port->identify + MTIP_FTL_REBUILD_OFFSET) ==
3442 MTIP_FTL_REBUILD_MAGIC) { 3541 MTIP_FTL_REBUILD_MAGIC) {
3443 set_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags); 3542 set_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags);
3444 return MTIP_FTL_REBUILD_MAGIC; 3543 return MTIP_FTL_REBUILD_MAGIC;
3445 } 3544 }
3446 mtip_dump_identify(dd->port);
3447 3545
3448 /* check write protect, over temp and rebuild statuses */ 3546 /* check write protect, over temp and rebuild statuses */
3449 rv = mtip_read_log_page(dd->port, ATA_LOG_SATA_NCQ, 3547 rv = mtip_read_log_page(dd->port, ATA_LOG_SATA_NCQ,
@@ -3467,8 +3565,8 @@ static int mtip_hw_init(struct driver_data *dd)
3467 } 3565 }
3468 if (buf[288] == 0xBF) { 3566 if (buf[288] == 0xBF) {
3469 dev_info(&dd->pdev->dev, 3567 dev_info(&dd->pdev->dev,
3470 "Drive indicates rebuild has failed.\n"); 3568 "Drive is in security locked state.\n");
3471 /* TODO */ 3569 set_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag);
3472 } 3570 }
3473 } 3571 }
3474 3572
@@ -3523,9 +3621,8 @@ static int mtip_hw_exit(struct driver_data *dd)
3523 * Send standby immediate (E0h) to the drive so that it 3621 * Send standby immediate (E0h) to the drive so that it
3524 * saves its state. 3622 * saves its state.
3525 */ 3623 */
3526 if (!test_bit(MTIP_DDF_CLEANUP_BIT, &dd->dd_flag)) { 3624 if (!dd->sr) {
3527 3625 if (!test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag))
3528 if (!test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags))
3529 if (mtip_standby_immediate(dd->port)) 3626 if (mtip_standby_immediate(dd->port))
3530 dev_warn(&dd->pdev->dev, 3627 dev_warn(&dd->pdev->dev,
3531 "STANDBY IMMEDIATE failed\n"); 3628 "STANDBY IMMEDIATE failed\n");
@@ -3551,6 +3648,7 @@ static int mtip_hw_exit(struct driver_data *dd)
3551 dd->port->command_list_dma); 3648 dd->port->command_list_dma);
3552 /* Free the memory allocated for the for structure. */ 3649 /* Free the memory allocated for the for structure. */
3553 kfree(dd->port); 3650 kfree(dd->port);
3651 dd->port = NULL;
3554 3652
3555 return 0; 3653 return 0;
3556} 3654}
@@ -3572,7 +3670,8 @@ static int mtip_hw_shutdown(struct driver_data *dd)
3572 * Send standby immediate (E0h) to the drive so that it 3670 * Send standby immediate (E0h) to the drive so that it
3573 * saves its state. 3671 * saves its state.
3574 */ 3672 */
3575 mtip_standby_immediate(dd->port); 3673 if (!dd->sr && dd->port)
3674 mtip_standby_immediate(dd->port);
3576 3675
3577 return 0; 3676 return 0;
3578} 3677}
@@ -3887,6 +3986,10 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio)
3887 bio_endio(bio, -ENODATA); 3986 bio_endio(bio, -ENODATA);
3888 return; 3987 return;
3889 } 3988 }
3989 if (test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag)) {
3990 bio_endio(bio, -ENXIO);
3991 return;
3992 }
3890 } 3993 }
3891 3994
3892 if (unlikely(bio->bi_rw & REQ_DISCARD)) { 3995 if (unlikely(bio->bi_rw & REQ_DISCARD)) {
@@ -4010,6 +4113,8 @@ static int mtip_block_initialize(struct driver_data *dd)
4010 dd->disk->private_data = dd; 4113 dd->disk->private_data = dd;
4011 dd->index = index; 4114 dd->index = index;
4012 4115
4116 mtip_hw_debugfs_init(dd);
4117
4013 /* 4118 /*
4014 * if rebuild pending, start the service thread, and delay the block 4119 * if rebuild pending, start the service thread, and delay the block
4015 * queue creation and add_disk() 4120 * queue creation and add_disk()
@@ -4068,6 +4173,7 @@ skip_create_disk:
4068 /* Enable the block device and add it to /dev */ 4173 /* Enable the block device and add it to /dev */
4069 add_disk(dd->disk); 4174 add_disk(dd->disk);
4070 4175
4176 dd->bdev = bdget_disk(dd->disk, 0);
4071 /* 4177 /*
4072 * Now that the disk is active, initialize any sysfs attributes 4178 * Now that the disk is active, initialize any sysfs attributes
4073 * managed by the protocol layer. 4179 * managed by the protocol layer.
@@ -4077,7 +4183,6 @@ skip_create_disk:
4077 mtip_hw_sysfs_init(dd, kobj); 4183 mtip_hw_sysfs_init(dd, kobj);
4078 kobject_put(kobj); 4184 kobject_put(kobj);
4079 } 4185 }
4080 mtip_hw_debugfs_init(dd);
4081 4186
4082 if (dd->mtip_svc_handler) { 4187 if (dd->mtip_svc_handler) {
4083 set_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag); 4188 set_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag);
@@ -4103,7 +4208,8 @@ start_service_thread:
4103 return rv; 4208 return rv;
4104 4209
4105kthread_run_error: 4210kthread_run_error:
4106 mtip_hw_debugfs_exit(dd); 4211 bdput(dd->bdev);
4212 dd->bdev = NULL;
4107 4213
4108 /* Delete our gendisk. This also removes the device from /dev */ 4214 /* Delete our gendisk. This also removes the device from /dev */
4109 del_gendisk(dd->disk); 4215 del_gendisk(dd->disk);
@@ -4112,6 +4218,7 @@ read_capacity_error:
4112 blk_cleanup_queue(dd->queue); 4218 blk_cleanup_queue(dd->queue);
4113 4219
4114block_queue_alloc_init_error: 4220block_queue_alloc_init_error:
4221 mtip_hw_debugfs_exit(dd);
4115disk_index_error: 4222disk_index_error:
4116 spin_lock(&rssd_index_lock); 4223 spin_lock(&rssd_index_lock);
4117 ida_remove(&rssd_index_ida, index); 4224 ida_remove(&rssd_index_ida, index);
@@ -4141,40 +4248,48 @@ static int mtip_block_remove(struct driver_data *dd)
4141{ 4248{
4142 struct kobject *kobj; 4249 struct kobject *kobj;
4143 4250
4144 if (dd->mtip_svc_handler) { 4251 if (!dd->sr) {
4145 set_bit(MTIP_PF_SVC_THD_STOP_BIT, &dd->port->flags); 4252 mtip_hw_debugfs_exit(dd);
4146 wake_up_interruptible(&dd->port->svc_wait);
4147 kthread_stop(dd->mtip_svc_handler);
4148 }
4149 4253
4150 /* Clean up the sysfs attributes, if created */ 4254 if (dd->mtip_svc_handler) {
4151 if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) { 4255 set_bit(MTIP_PF_SVC_THD_STOP_BIT, &dd->port->flags);
4152 kobj = kobject_get(&disk_to_dev(dd->disk)->kobj); 4256 wake_up_interruptible(&dd->port->svc_wait);
4153 if (kobj) { 4257 kthread_stop(dd->mtip_svc_handler);
4154 mtip_hw_sysfs_exit(dd, kobj);
4155 kobject_put(kobj);
4156 } 4258 }
4157 }
4158 mtip_hw_debugfs_exit(dd);
4159 4259
4160 /* 4260 /* Clean up the sysfs attributes, if created */
4161 * Delete our gendisk structure. This also removes the device 4261 if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) {
4162 * from /dev 4262 kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
4163 */ 4263 if (kobj) {
4164 if (dd->disk) { 4264 mtip_hw_sysfs_exit(dd, kobj);
4165 if (dd->disk->queue) 4265 kobject_put(kobj);
4166 del_gendisk(dd->disk); 4266 }
4167 else 4267 }
4168 put_disk(dd->disk); 4268 /*
4169 } 4269 * Delete our gendisk structure. This also removes the device
4170 4270 * from /dev
4171 spin_lock(&rssd_index_lock); 4271 */
4172 ida_remove(&rssd_index_ida, dd->index); 4272 if (dd->bdev) {
4173 spin_unlock(&rssd_index_lock); 4273 bdput(dd->bdev);
4274 dd->bdev = NULL;
4275 }
4276 if (dd->disk) {
4277 if (dd->disk->queue) {
4278 del_gendisk(dd->disk);
4279 blk_cleanup_queue(dd->queue);
4280 dd->queue = NULL;
4281 } else
4282 put_disk(dd->disk);
4283 }
4284 dd->disk = NULL;
4174 4285
4175 blk_cleanup_queue(dd->queue); 4286 spin_lock(&rssd_index_lock);
4176 dd->disk = NULL; 4287 ida_remove(&rssd_index_ida, dd->index);
4177 dd->queue = NULL; 4288 spin_unlock(&rssd_index_lock);
4289 } else {
4290 dev_info(&dd->pdev->dev, "device %s surprise removal\n",
4291 dd->disk->disk_name);
4292 }
4178 4293
4179 /* De-initialize the protocol layer. */ 4294 /* De-initialize the protocol layer. */
4180 mtip_hw_exit(dd); 4295 mtip_hw_exit(dd);
@@ -4490,8 +4605,7 @@ done:
4490static void mtip_pci_remove(struct pci_dev *pdev) 4605static void mtip_pci_remove(struct pci_dev *pdev)
4491{ 4606{
4492 struct driver_data *dd = pci_get_drvdata(pdev); 4607 struct driver_data *dd = pci_get_drvdata(pdev);
4493 int counter = 0; 4608 unsigned long flags, to;
4494 unsigned long flags;
4495 4609
4496 set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag); 4610 set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag);
4497 4611
@@ -4500,17 +4614,22 @@ static void mtip_pci_remove(struct pci_dev *pdev)
4500 list_add(&dd->remove_list, &removing_list); 4614 list_add(&dd->remove_list, &removing_list);
4501 spin_unlock_irqrestore(&dev_lock, flags); 4615 spin_unlock_irqrestore(&dev_lock, flags);
4502 4616
4503 if (mtip_check_surprise_removal(pdev)) { 4617 mtip_check_surprise_removal(pdev);
4504 while (!test_bit(MTIP_DDF_CLEANUP_BIT, &dd->dd_flag)) { 4618 synchronize_irq(dd->pdev->irq);
4505 counter++; 4619
4506 msleep(20); 4620 /* Spin until workers are done */
4507 if (counter == 10) { 4621 to = jiffies + msecs_to_jiffies(4000);
4508 /* Cleanup the outstanding commands */ 4622 do {
4509 mtip_command_cleanup(dd); 4623 msleep(20);
4510 break; 4624 } while (atomic_read(&dd->irq_workers_active) != 0 &&
4511 } 4625 time_before(jiffies, to));
4512 } 4626
4627 if (atomic_read(&dd->irq_workers_active) != 0) {
4628 dev_warn(&dd->pdev->dev,
4629 "Completion workers still active!\n");
4513 } 4630 }
4631 /* Cleanup the outstanding commands */
4632 mtip_command_cleanup(dd);
4514 4633
4515 /* Clean up the block layer. */ 4634 /* Clean up the block layer. */
4516 mtip_block_remove(dd); 4635 mtip_block_remove(dd);
@@ -4529,8 +4648,15 @@ static void mtip_pci_remove(struct pci_dev *pdev)
4529 list_del_init(&dd->remove_list); 4648 list_del_init(&dd->remove_list);
4530 spin_unlock_irqrestore(&dev_lock, flags); 4649 spin_unlock_irqrestore(&dev_lock, flags);
4531 4650
4532 kfree(dd); 4651 if (!dd->sr)
4652 kfree(dd);
4653 else
4654 set_bit(MTIP_DDF_REMOVE_DONE_BIT, &dd->dd_flag);
4655
4533 pcim_iounmap_regions(pdev, 1 << MTIP_ABAR); 4656 pcim_iounmap_regions(pdev, 1 << MTIP_ABAR);
4657 pci_set_drvdata(pdev, NULL);
4658 pci_dev_put(pdev);
4659
4534} 4660}
4535 4661
4536/* 4662/*
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index 3bb8a295fbe4..9be7a1582ad3 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -140,6 +140,7 @@ enum {
140 MTIP_PF_SVC_THD_ACTIVE_BIT = 4, 140 MTIP_PF_SVC_THD_ACTIVE_BIT = 4,
141 MTIP_PF_ISSUE_CMDS_BIT = 5, 141 MTIP_PF_ISSUE_CMDS_BIT = 5,
142 MTIP_PF_REBUILD_BIT = 6, 142 MTIP_PF_REBUILD_BIT = 6,
143 MTIP_PF_SR_CLEANUP_BIT = 7,
143 MTIP_PF_SVC_THD_STOP_BIT = 8, 144 MTIP_PF_SVC_THD_STOP_BIT = 8,
144 145
145 /* below are bit numbers in 'dd_flag' defined in driver_data */ 146 /* below are bit numbers in 'dd_flag' defined in driver_data */
@@ -147,15 +148,18 @@ enum {
147 MTIP_DDF_REMOVE_PENDING_BIT = 1, 148 MTIP_DDF_REMOVE_PENDING_BIT = 1,
148 MTIP_DDF_OVER_TEMP_BIT = 2, 149 MTIP_DDF_OVER_TEMP_BIT = 2,
149 MTIP_DDF_WRITE_PROTECT_BIT = 3, 150 MTIP_DDF_WRITE_PROTECT_BIT = 3,
150 MTIP_DDF_STOP_IO = ((1 << MTIP_DDF_REMOVE_PENDING_BIT) | 151 MTIP_DDF_REMOVE_DONE_BIT = 4,
151 (1 << MTIP_DDF_SEC_LOCK_BIT) |
152 (1 << MTIP_DDF_OVER_TEMP_BIT) |
153 (1 << MTIP_DDF_WRITE_PROTECT_BIT)),
154
155 MTIP_DDF_CLEANUP_BIT = 5, 152 MTIP_DDF_CLEANUP_BIT = 5,
156 MTIP_DDF_RESUME_BIT = 6, 153 MTIP_DDF_RESUME_BIT = 6,
157 MTIP_DDF_INIT_DONE_BIT = 7, 154 MTIP_DDF_INIT_DONE_BIT = 7,
158 MTIP_DDF_REBUILD_FAILED_BIT = 8, 155 MTIP_DDF_REBUILD_FAILED_BIT = 8,
156
157 MTIP_DDF_STOP_IO = ((1 << MTIP_DDF_REMOVE_PENDING_BIT) |
158 (1 << MTIP_DDF_SEC_LOCK_BIT) |
159 (1 << MTIP_DDF_OVER_TEMP_BIT) |
160 (1 << MTIP_DDF_WRITE_PROTECT_BIT) |
161 (1 << MTIP_DDF_REBUILD_FAILED_BIT)),
162
159}; 163};
160 164
161struct smart_attr { 165struct smart_attr {
@@ -499,6 +503,8 @@ struct driver_data {
499 503
500 bool trim_supp; /* flag indicating trim support */ 504 bool trim_supp; /* flag indicating trim support */
501 505
506 bool sr;
507
502 int numa_node; /* NUMA support */ 508 int numa_node; /* NUMA support */
503 509
504 char workq_name[32]; 510 char workq_name[32];
@@ -511,6 +517,8 @@ struct driver_data {
511 517
512 int isr_binding; 518 int isr_binding;
513 519
520 struct block_device *bdev;
521
514 int unal_qdepth; /* qdepth of unaligned IO queue */ 522 int unal_qdepth; /* qdepth of unaligned IO queue */
515 523
516 struct list_head online_list; /* linkage for online list */ 524 struct list_head online_list; /* linkage for online list */
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
new file mode 100644
index 000000000000..f370fc13aea5
--- /dev/null
+++ b/drivers/block/null_blk.c
@@ -0,0 +1,635 @@
1#include <linux/module.h>
2#include <linux/moduleparam.h>
3#include <linux/sched.h>
4#include <linux/fs.h>
5#include <linux/blkdev.h>
6#include <linux/init.h>
7#include <linux/slab.h>
8#include <linux/blk-mq.h>
9#include <linux/hrtimer.h>
10
11struct nullb_cmd {
12 struct list_head list;
13 struct llist_node ll_list;
14 struct call_single_data csd;
15 struct request *rq;
16 struct bio *bio;
17 unsigned int tag;
18 struct nullb_queue *nq;
19};
20
21struct nullb_queue {
22 unsigned long *tag_map;
23 wait_queue_head_t wait;
24 unsigned int queue_depth;
25
26 struct nullb_cmd *cmds;
27};
28
29struct nullb {
30 struct list_head list;
31 unsigned int index;
32 struct request_queue *q;
33 struct gendisk *disk;
34 struct hrtimer timer;
35 unsigned int queue_depth;
36 spinlock_t lock;
37
38 struct nullb_queue *queues;
39 unsigned int nr_queues;
40};
41
42static LIST_HEAD(nullb_list);
43static struct mutex lock;
44static int null_major;
45static int nullb_indexes;
46
47struct completion_queue {
48 struct llist_head list;
49 struct hrtimer timer;
50};
51
52/*
53 * These are per-cpu for now, they will need to be configured by the
54 * complete_queues parameter and appropriately mapped.
55 */
56static DEFINE_PER_CPU(struct completion_queue, completion_queues);
57
58enum {
59 NULL_IRQ_NONE = 0,
60 NULL_IRQ_SOFTIRQ = 1,
61 NULL_IRQ_TIMER = 2,
62
63 NULL_Q_BIO = 0,
64 NULL_Q_RQ = 1,
65 NULL_Q_MQ = 2,
66};
67
68static int submit_queues = 1;
69module_param(submit_queues, int, S_IRUGO);
70MODULE_PARM_DESC(submit_queues, "Number of submission queues");
71
72static int home_node = NUMA_NO_NODE;
73module_param(home_node, int, S_IRUGO);
74MODULE_PARM_DESC(home_node, "Home node for the device");
75
76static int queue_mode = NULL_Q_MQ;
77module_param(queue_mode, int, S_IRUGO);
78MODULE_PARM_DESC(use_mq, "Use blk-mq interface (0=bio,1=rq,2=multiqueue)");
79
80static int gb = 250;
81module_param(gb, int, S_IRUGO);
82MODULE_PARM_DESC(gb, "Size in GB");
83
84static int bs = 512;
85module_param(bs, int, S_IRUGO);
86MODULE_PARM_DESC(bs, "Block size (in bytes)");
87
88static int nr_devices = 2;
89module_param(nr_devices, int, S_IRUGO);
90MODULE_PARM_DESC(nr_devices, "Number of devices to register");
91
92static int irqmode = NULL_IRQ_SOFTIRQ;
93module_param(irqmode, int, S_IRUGO);
94MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer");
95
96static int completion_nsec = 10000;
97module_param(completion_nsec, int, S_IRUGO);
98MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns");
99
100static int hw_queue_depth = 64;
101module_param(hw_queue_depth, int, S_IRUGO);
102MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64");
103
104static bool use_per_node_hctx = true;
105module_param(use_per_node_hctx, bool, S_IRUGO);
106MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: true");
107
108static void put_tag(struct nullb_queue *nq, unsigned int tag)
109{
110 clear_bit_unlock(tag, nq->tag_map);
111
112 if (waitqueue_active(&nq->wait))
113 wake_up(&nq->wait);
114}
115
116static unsigned int get_tag(struct nullb_queue *nq)
117{
118 unsigned int tag;
119
120 do {
121 tag = find_first_zero_bit(nq->tag_map, nq->queue_depth);
122 if (tag >= nq->queue_depth)
123 return -1U;
124 } while (test_and_set_bit_lock(tag, nq->tag_map));
125
126 return tag;
127}
128
129static void free_cmd(struct nullb_cmd *cmd)
130{
131 put_tag(cmd->nq, cmd->tag);
132}
133
134static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
135{
136 struct nullb_cmd *cmd;
137 unsigned int tag;
138
139 tag = get_tag(nq);
140 if (tag != -1U) {
141 cmd = &nq->cmds[tag];
142 cmd->tag = tag;
143 cmd->nq = nq;
144 return cmd;
145 }
146
147 return NULL;
148}
149
150static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait)
151{
152 struct nullb_cmd *cmd;
153 DEFINE_WAIT(wait);
154
155 cmd = __alloc_cmd(nq);
156 if (cmd || !can_wait)
157 return cmd;
158
159 do {
160 prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE);
161 cmd = __alloc_cmd(nq);
162 if (cmd)
163 break;
164
165 io_schedule();
166 } while (1);
167
168 finish_wait(&nq->wait, &wait);
169 return cmd;
170}
171
172static void end_cmd(struct nullb_cmd *cmd)
173{
174 if (cmd->rq) {
175 if (queue_mode == NULL_Q_MQ)
176 blk_mq_end_io(cmd->rq, 0);
177 else {
178 INIT_LIST_HEAD(&cmd->rq->queuelist);
179 blk_end_request_all(cmd->rq, 0);
180 }
181 } else if (cmd->bio)
182 bio_endio(cmd->bio, 0);
183
184 if (queue_mode != NULL_Q_MQ)
185 free_cmd(cmd);
186}
187
188static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
189{
190 struct completion_queue *cq;
191 struct llist_node *entry;
192 struct nullb_cmd *cmd;
193
194 cq = &per_cpu(completion_queues, smp_processor_id());
195
196 while ((entry = llist_del_all(&cq->list)) != NULL) {
197 do {
198 cmd = container_of(entry, struct nullb_cmd, ll_list);
199 end_cmd(cmd);
200 entry = entry->next;
201 } while (entry);
202 }
203
204 return HRTIMER_NORESTART;
205}
206
207static void null_cmd_end_timer(struct nullb_cmd *cmd)
208{
209 struct completion_queue *cq = &per_cpu(completion_queues, get_cpu());
210
211 cmd->ll_list.next = NULL;
212 if (llist_add(&cmd->ll_list, &cq->list)) {
213 ktime_t kt = ktime_set(0, completion_nsec);
214
215 hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL);
216 }
217
218 put_cpu();
219}
220
221static void null_softirq_done_fn(struct request *rq)
222{
223 blk_end_request_all(rq, 0);
224}
225
226#ifdef CONFIG_SMP
227
228static void null_ipi_cmd_end_io(void *data)
229{
230 struct completion_queue *cq;
231 struct llist_node *entry, *next;
232 struct nullb_cmd *cmd;
233
234 cq = &per_cpu(completion_queues, smp_processor_id());
235
236 entry = llist_del_all(&cq->list);
237
238 while (entry) {
239 next = entry->next;
240 cmd = llist_entry(entry, struct nullb_cmd, ll_list);
241 end_cmd(cmd);
242 entry = next;
243 }
244}
245
246static void null_cmd_end_ipi(struct nullb_cmd *cmd)
247{
248 struct call_single_data *data = &cmd->csd;
249 int cpu = get_cpu();
250 struct completion_queue *cq = &per_cpu(completion_queues, cpu);
251
252 cmd->ll_list.next = NULL;
253
254 if (llist_add(&cmd->ll_list, &cq->list)) {
255 data->func = null_ipi_cmd_end_io;
256 data->flags = 0;
257 __smp_call_function_single(cpu, data, 0);
258 }
259
260 put_cpu();
261}
262
263#endif /* CONFIG_SMP */
264
265static inline void null_handle_cmd(struct nullb_cmd *cmd)
266{
267 /* Complete IO by inline, softirq or timer */
268 switch (irqmode) {
269 case NULL_IRQ_NONE:
270 end_cmd(cmd);
271 break;
272 case NULL_IRQ_SOFTIRQ:
273#ifdef CONFIG_SMP
274 null_cmd_end_ipi(cmd);
275#else
276 end_cmd(cmd);
277#endif
278 break;
279 case NULL_IRQ_TIMER:
280 null_cmd_end_timer(cmd);
281 break;
282 }
283}
284
285static struct nullb_queue *nullb_to_queue(struct nullb *nullb)
286{
287 int index = 0;
288
289 if (nullb->nr_queues != 1)
290 index = raw_smp_processor_id() / ((nr_cpu_ids + nullb->nr_queues - 1) / nullb->nr_queues);
291
292 return &nullb->queues[index];
293}
294
295static void null_queue_bio(struct request_queue *q, struct bio *bio)
296{
297 struct nullb *nullb = q->queuedata;
298 struct nullb_queue *nq = nullb_to_queue(nullb);
299 struct nullb_cmd *cmd;
300
301 cmd = alloc_cmd(nq, 1);
302 cmd->bio = bio;
303
304 null_handle_cmd(cmd);
305}
306
307static int null_rq_prep_fn(struct request_queue *q, struct request *req)
308{
309 struct nullb *nullb = q->queuedata;
310 struct nullb_queue *nq = nullb_to_queue(nullb);
311 struct nullb_cmd *cmd;
312
313 cmd = alloc_cmd(nq, 0);
314 if (cmd) {
315 cmd->rq = req;
316 req->special = cmd;
317 return BLKPREP_OK;
318 }
319
320 return BLKPREP_DEFER;
321}
322
323static void null_request_fn(struct request_queue *q)
324{
325 struct request *rq;
326
327 while ((rq = blk_fetch_request(q)) != NULL) {
328 struct nullb_cmd *cmd = rq->special;
329
330 spin_unlock_irq(q->queue_lock);
331 null_handle_cmd(cmd);
332 spin_lock_irq(q->queue_lock);
333 }
334}
335
336static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq)
337{
338 struct nullb_cmd *cmd = rq->special;
339
340 cmd->rq = rq;
341 cmd->nq = hctx->driver_data;
342
343 null_handle_cmd(cmd);
344 return BLK_MQ_RQ_QUEUE_OK;
345}
346
347static struct blk_mq_hw_ctx *null_alloc_hctx(struct blk_mq_reg *reg, unsigned int hctx_index)
348{
349 return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL,
350 hctx_index);
351}
352
353static void null_free_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_index)
354{
355 kfree(hctx);
356}
357
358static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
359 unsigned int index)
360{
361 struct nullb *nullb = data;
362 struct nullb_queue *nq = &nullb->queues[index];
363
364 init_waitqueue_head(&nq->wait);
365 nq->queue_depth = nullb->queue_depth;
366 nullb->nr_queues++;
367 hctx->driver_data = nq;
368
369 return 0;
370}
371
372static struct blk_mq_ops null_mq_ops = {
373 .queue_rq = null_queue_rq,
374 .map_queue = blk_mq_map_queue,
375 .init_hctx = null_init_hctx,
376};
377
378static struct blk_mq_reg null_mq_reg = {
379 .ops = &null_mq_ops,
380 .queue_depth = 64,
381 .cmd_size = sizeof(struct nullb_cmd),
382 .flags = BLK_MQ_F_SHOULD_MERGE,
383};
384
385static void null_del_dev(struct nullb *nullb)
386{
387 list_del_init(&nullb->list);
388
389 del_gendisk(nullb->disk);
390 if (queue_mode == NULL_Q_MQ)
391 blk_mq_free_queue(nullb->q);
392 else
393 blk_cleanup_queue(nullb->q);
394 put_disk(nullb->disk);
395 kfree(nullb);
396}
397
398static int null_open(struct block_device *bdev, fmode_t mode)
399{
400 return 0;
401}
402
403static void null_release(struct gendisk *disk, fmode_t mode)
404{
405}
406
407static const struct block_device_operations null_fops = {
408 .owner = THIS_MODULE,
409 .open = null_open,
410 .release = null_release,
411};
412
413static int setup_commands(struct nullb_queue *nq)
414{
415 struct nullb_cmd *cmd;
416 int i, tag_size;
417
418 nq->cmds = kzalloc(nq->queue_depth * sizeof(*cmd), GFP_KERNEL);
419 if (!nq->cmds)
420 return 1;
421
422 tag_size = ALIGN(nq->queue_depth, BITS_PER_LONG) / BITS_PER_LONG;
423 nq->tag_map = kzalloc(tag_size * sizeof(unsigned long), GFP_KERNEL);
424 if (!nq->tag_map) {
425 kfree(nq->cmds);
426 return 1;
427 }
428
429 for (i = 0; i < nq->queue_depth; i++) {
430 cmd = &nq->cmds[i];
431 INIT_LIST_HEAD(&cmd->list);
432 cmd->ll_list.next = NULL;
433 cmd->tag = -1U;
434 }
435
436 return 0;
437}
438
439static void cleanup_queue(struct nullb_queue *nq)
440{
441 kfree(nq->tag_map);
442 kfree(nq->cmds);
443}
444
445static void cleanup_queues(struct nullb *nullb)
446{
447 int i;
448
449 for (i = 0; i < nullb->nr_queues; i++)
450 cleanup_queue(&nullb->queues[i]);
451
452 kfree(nullb->queues);
453}
454
455static int setup_queues(struct nullb *nullb)
456{
457 struct nullb_queue *nq;
458 int i;
459
460 nullb->queues = kzalloc(submit_queues * sizeof(*nq), GFP_KERNEL);
461 if (!nullb->queues)
462 return 1;
463
464 nullb->nr_queues = 0;
465 nullb->queue_depth = hw_queue_depth;
466
467 if (queue_mode == NULL_Q_MQ)
468 return 0;
469
470 for (i = 0; i < submit_queues; i++) {
471 nq = &nullb->queues[i];
472 init_waitqueue_head(&nq->wait);
473 nq->queue_depth = hw_queue_depth;
474 if (setup_commands(nq))
475 break;
476 nullb->nr_queues++;
477 }
478
479 if (i == submit_queues)
480 return 0;
481
482 cleanup_queues(nullb);
483 return 1;
484}
485
486static int null_add_dev(void)
487{
488 struct gendisk *disk;
489 struct nullb *nullb;
490 sector_t size;
491
492 nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, home_node);
493 if (!nullb)
494 return -ENOMEM;
495
496 spin_lock_init(&nullb->lock);
497
498 if (queue_mode == NULL_Q_MQ && use_per_node_hctx)
499 submit_queues = nr_online_nodes;
500
501 if (setup_queues(nullb))
502 goto err;
503
504 if (queue_mode == NULL_Q_MQ) {
505 null_mq_reg.numa_node = home_node;
506 null_mq_reg.queue_depth = hw_queue_depth;
507 null_mq_reg.nr_hw_queues = submit_queues;
508
509 if (use_per_node_hctx) {
510 null_mq_reg.ops->alloc_hctx = null_alloc_hctx;
511 null_mq_reg.ops->free_hctx = null_free_hctx;
512 } else {
513 null_mq_reg.ops->alloc_hctx = blk_mq_alloc_single_hw_queue;
514 null_mq_reg.ops->free_hctx = blk_mq_free_single_hw_queue;
515 }
516
517 nullb->q = blk_mq_init_queue(&null_mq_reg, nullb);
518 } else if (queue_mode == NULL_Q_BIO) {
519 nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node);
520 blk_queue_make_request(nullb->q, null_queue_bio);
521 } else {
522 nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node);
523 blk_queue_prep_rq(nullb->q, null_rq_prep_fn);
524 if (nullb->q)
525 blk_queue_softirq_done(nullb->q, null_softirq_done_fn);
526 }
527
528 if (!nullb->q)
529 goto queue_fail;
530
531 nullb->q->queuedata = nullb;
532 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q);
533
534 disk = nullb->disk = alloc_disk_node(1, home_node);
535 if (!disk) {
536queue_fail:
537 if (queue_mode == NULL_Q_MQ)
538 blk_mq_free_queue(nullb->q);
539 else
540 blk_cleanup_queue(nullb->q);
541 cleanup_queues(nullb);
542err:
543 kfree(nullb);
544 return -ENOMEM;
545 }
546
547 mutex_lock(&lock);
548 list_add_tail(&nullb->list, &nullb_list);
549 nullb->index = nullb_indexes++;
550 mutex_unlock(&lock);
551
552 blk_queue_logical_block_size(nullb->q, bs);
553 blk_queue_physical_block_size(nullb->q, bs);
554
555 size = gb * 1024 * 1024 * 1024ULL;
556 sector_div(size, bs);
557 set_capacity(disk, size);
558
559 disk->flags |= GENHD_FL_EXT_DEVT;
560 disk->major = null_major;
561 disk->first_minor = nullb->index;
562 disk->fops = &null_fops;
563 disk->private_data = nullb;
564 disk->queue = nullb->q;
565 sprintf(disk->disk_name, "nullb%d", nullb->index);
566 add_disk(disk);
567 return 0;
568}
569
570static int __init null_init(void)
571{
572 unsigned int i;
573
574#if !defined(CONFIG_SMP)
575 if (irqmode == NULL_IRQ_SOFTIRQ) {
576 pr_warn("null_blk: softirq completions not available.\n");
577 pr_warn("null_blk: using direct completions.\n");
578 irqmode = NULL_IRQ_NONE;
579 }
580#endif
581
582 if (submit_queues > nr_cpu_ids)
583 submit_queues = nr_cpu_ids;
584 else if (!submit_queues)
585 submit_queues = 1;
586
587 mutex_init(&lock);
588
589 /* Initialize a separate list for each CPU for issuing softirqs */
590 for_each_possible_cpu(i) {
591 struct completion_queue *cq = &per_cpu(completion_queues, i);
592
593 init_llist_head(&cq->list);
594
595 if (irqmode != NULL_IRQ_TIMER)
596 continue;
597
598 hrtimer_init(&cq->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
599 cq->timer.function = null_cmd_timer_expired;
600 }
601
602 null_major = register_blkdev(0, "nullb");
603 if (null_major < 0)
604 return null_major;
605
606 for (i = 0; i < nr_devices; i++) {
607 if (null_add_dev()) {
608 unregister_blkdev(null_major, "nullb");
609 return -EINVAL;
610 }
611 }
612
613 pr_info("null: module loaded\n");
614 return 0;
615}
616
617static void __exit null_exit(void)
618{
619 struct nullb *nullb;
620
621 unregister_blkdev(null_major, "nullb");
622
623 mutex_lock(&lock);
624 while (!list_empty(&nullb_list)) {
625 nullb = list_entry(nullb_list.next, struct nullb, list);
626 null_del_dev(nullb);
627 }
628 mutex_unlock(&lock);
629}
630
631module_init(null_init);
632module_exit(null_exit);
633
634MODULE_AUTHOR("Jens Axboe <jaxboe@fusionio.com>");
635MODULE_LICENSE("GPL");
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index ce79a590b45b..26d03fa0bf26 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -36,6 +36,7 @@
36#include <linux/moduleparam.h> 36#include <linux/moduleparam.h>
37#include <linux/pci.h> 37#include <linux/pci.h>
38#include <linux/poison.h> 38#include <linux/poison.h>
39#include <linux/ptrace.h>
39#include <linux/sched.h> 40#include <linux/sched.h>
40#include <linux/slab.h> 41#include <linux/slab.h>
41#include <linux/types.h> 42#include <linux/types.h>
@@ -79,7 +80,9 @@ struct nvme_queue {
79 u16 sq_head; 80 u16 sq_head;
80 u16 sq_tail; 81 u16 sq_tail;
81 u16 cq_head; 82 u16 cq_head;
82 u16 cq_phase; 83 u8 cq_phase;
84 u8 cqe_seen;
85 u8 q_suspended;
83 unsigned long cmdid_data[]; 86 unsigned long cmdid_data[];
84}; 87};
85 88
@@ -115,6 +118,11 @@ static struct nvme_cmd_info *nvme_cmd_info(struct nvme_queue *nvmeq)
115 return (void *)&nvmeq->cmdid_data[BITS_TO_LONGS(nvmeq->q_depth)]; 118 return (void *)&nvmeq->cmdid_data[BITS_TO_LONGS(nvmeq->q_depth)];
116} 119}
117 120
121static unsigned nvme_queue_extra(int depth)
122{
123 return DIV_ROUND_UP(depth, 8) + (depth * sizeof(struct nvme_cmd_info));
124}
125
118/** 126/**
119 * alloc_cmdid() - Allocate a Command ID 127 * alloc_cmdid() - Allocate a Command ID
120 * @nvmeq: The queue that will be used for this command 128 * @nvmeq: The queue that will be used for this command
@@ -285,6 +293,7 @@ nvme_alloc_iod(unsigned nseg, unsigned nbytes, gfp_t gfp)
285 iod->npages = -1; 293 iod->npages = -1;
286 iod->length = nbytes; 294 iod->length = nbytes;
287 iod->nents = 0; 295 iod->nents = 0;
296 iod->start_time = jiffies;
288 } 297 }
289 298
290 return iod; 299 return iod;
@@ -308,6 +317,30 @@ void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod)
308 kfree(iod); 317 kfree(iod);
309} 318}
310 319
320static void nvme_start_io_acct(struct bio *bio)
321{
322 struct gendisk *disk = bio->bi_bdev->bd_disk;
323 const int rw = bio_data_dir(bio);
324 int cpu = part_stat_lock();
325 part_round_stats(cpu, &disk->part0);
326 part_stat_inc(cpu, &disk->part0, ios[rw]);
327 part_stat_add(cpu, &disk->part0, sectors[rw], bio_sectors(bio));
328 part_inc_in_flight(&disk->part0, rw);
329 part_stat_unlock();
330}
331
332static void nvme_end_io_acct(struct bio *bio, unsigned long start_time)
333{
334 struct gendisk *disk = bio->bi_bdev->bd_disk;
335 const int rw = bio_data_dir(bio);
336 unsigned long duration = jiffies - start_time;
337 int cpu = part_stat_lock();
338 part_stat_add(cpu, &disk->part0, ticks[rw], duration);
339 part_round_stats(cpu, &disk->part0);
340 part_dec_in_flight(&disk->part0, rw);
341 part_stat_unlock();
342}
343
311static void bio_completion(struct nvme_dev *dev, void *ctx, 344static void bio_completion(struct nvme_dev *dev, void *ctx,
312 struct nvme_completion *cqe) 345 struct nvme_completion *cqe)
313{ 346{
@@ -315,9 +348,11 @@ static void bio_completion(struct nvme_dev *dev, void *ctx,
315 struct bio *bio = iod->private; 348 struct bio *bio = iod->private;
316 u16 status = le16_to_cpup(&cqe->status) >> 1; 349 u16 status = le16_to_cpup(&cqe->status) >> 1;
317 350
318 if (iod->nents) 351 if (iod->nents) {
319 dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents, 352 dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents,
320 bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); 353 bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
354 nvme_end_io_acct(bio, iod->start_time);
355 }
321 nvme_free_iod(dev, iod); 356 nvme_free_iod(dev, iod);
322 if (status) 357 if (status)
323 bio_endio(bio, -EIO); 358 bio_endio(bio, -EIO);
@@ -422,10 +457,8 @@ static void nvme_bio_pair_endio(struct bio *bio, int err)
422 457
423 if (atomic_dec_and_test(&bp->cnt)) { 458 if (atomic_dec_and_test(&bp->cnt)) {
424 bio_endio(bp->parent, bp->err); 459 bio_endio(bp->parent, bp->err);
425 if (bp->bv1) 460 kfree(bp->bv1);
426 kfree(bp->bv1); 461 kfree(bp->bv2);
427 if (bp->bv2)
428 kfree(bp->bv2);
429 kfree(bp); 462 kfree(bp);
430 } 463 }
431} 464}
@@ -695,6 +728,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
695 cmnd->rw.control = cpu_to_le16(control); 728 cmnd->rw.control = cpu_to_le16(control);
696 cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt); 729 cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
697 730
731 nvme_start_io_acct(bio);
698 if (++nvmeq->sq_tail == nvmeq->q_depth) 732 if (++nvmeq->sq_tail == nvmeq->q_depth)
699 nvmeq->sq_tail = 0; 733 nvmeq->sq_tail = 0;
700 writel(nvmeq->sq_tail, nvmeq->q_db); 734 writel(nvmeq->sq_tail, nvmeq->q_db);
@@ -709,26 +743,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
709 return result; 743 return result;
710} 744}
711 745
712static void nvme_make_request(struct request_queue *q, struct bio *bio) 746static int nvme_process_cq(struct nvme_queue *nvmeq)
713{
714 struct nvme_ns *ns = q->queuedata;
715 struct nvme_queue *nvmeq = get_nvmeq(ns->dev);
716 int result = -EBUSY;
717
718 spin_lock_irq(&nvmeq->q_lock);
719 if (bio_list_empty(&nvmeq->sq_cong))
720 result = nvme_submit_bio_queue(nvmeq, ns, bio);
721 if (unlikely(result)) {
722 if (bio_list_empty(&nvmeq->sq_cong))
723 add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
724 bio_list_add(&nvmeq->sq_cong, bio);
725 }
726
727 spin_unlock_irq(&nvmeq->q_lock);
728 put_nvmeq(nvmeq);
729}
730
731static irqreturn_t nvme_process_cq(struct nvme_queue *nvmeq)
732{ 747{
733 u16 head, phase; 748 u16 head, phase;
734 749
@@ -758,13 +773,40 @@ static irqreturn_t nvme_process_cq(struct nvme_queue *nvmeq)
758 * a big problem. 773 * a big problem.
759 */ 774 */
760 if (head == nvmeq->cq_head && phase == nvmeq->cq_phase) 775 if (head == nvmeq->cq_head && phase == nvmeq->cq_phase)
761 return IRQ_NONE; 776 return 0;
762 777
763 writel(head, nvmeq->q_db + (1 << nvmeq->dev->db_stride)); 778 writel(head, nvmeq->q_db + (1 << nvmeq->dev->db_stride));
764 nvmeq->cq_head = head; 779 nvmeq->cq_head = head;
765 nvmeq->cq_phase = phase; 780 nvmeq->cq_phase = phase;
766 781
767 return IRQ_HANDLED; 782 nvmeq->cqe_seen = 1;
783 return 1;
784}
785
786static void nvme_make_request(struct request_queue *q, struct bio *bio)
787{
788 struct nvme_ns *ns = q->queuedata;
789 struct nvme_queue *nvmeq = get_nvmeq(ns->dev);
790 int result = -EBUSY;
791
792 if (!nvmeq) {
793 put_nvmeq(NULL);
794 bio_endio(bio, -EIO);
795 return;
796 }
797
798 spin_lock_irq(&nvmeq->q_lock);
799 if (!nvmeq->q_suspended && bio_list_empty(&nvmeq->sq_cong))
800 result = nvme_submit_bio_queue(nvmeq, ns, bio);
801 if (unlikely(result)) {
802 if (bio_list_empty(&nvmeq->sq_cong))
803 add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
804 bio_list_add(&nvmeq->sq_cong, bio);
805 }
806
807 nvme_process_cq(nvmeq);
808 spin_unlock_irq(&nvmeq->q_lock);
809 put_nvmeq(nvmeq);
768} 810}
769 811
770static irqreturn_t nvme_irq(int irq, void *data) 812static irqreturn_t nvme_irq(int irq, void *data)
@@ -772,7 +814,9 @@ static irqreturn_t nvme_irq(int irq, void *data)
772 irqreturn_t result; 814 irqreturn_t result;
773 struct nvme_queue *nvmeq = data; 815 struct nvme_queue *nvmeq = data;
774 spin_lock(&nvmeq->q_lock); 816 spin_lock(&nvmeq->q_lock);
775 result = nvme_process_cq(nvmeq); 817 nvme_process_cq(nvmeq);
818 result = nvmeq->cqe_seen ? IRQ_HANDLED : IRQ_NONE;
819 nvmeq->cqe_seen = 0;
776 spin_unlock(&nvmeq->q_lock); 820 spin_unlock(&nvmeq->q_lock);
777 return result; 821 return result;
778} 822}
@@ -986,8 +1030,15 @@ static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout)
986 } 1030 }
987} 1031}
988 1032
989static void nvme_free_queue_mem(struct nvme_queue *nvmeq) 1033static void nvme_free_queue(struct nvme_queue *nvmeq)
990{ 1034{
1035 spin_lock_irq(&nvmeq->q_lock);
1036 while (bio_list_peek(&nvmeq->sq_cong)) {
1037 struct bio *bio = bio_list_pop(&nvmeq->sq_cong);
1038 bio_endio(bio, -EIO);
1039 }
1040 spin_unlock_irq(&nvmeq->q_lock);
1041
991 dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), 1042 dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
992 (void *)nvmeq->cqes, nvmeq->cq_dma_addr); 1043 (void *)nvmeq->cqes, nvmeq->cq_dma_addr);
993 dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), 1044 dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
@@ -995,17 +1046,28 @@ static void nvme_free_queue_mem(struct nvme_queue *nvmeq)
995 kfree(nvmeq); 1046 kfree(nvmeq);
996} 1047}
997 1048
998static void nvme_free_queue(struct nvme_dev *dev, int qid) 1049static void nvme_free_queues(struct nvme_dev *dev)
1050{
1051 int i;
1052
1053 for (i = dev->queue_count - 1; i >= 0; i--) {
1054 nvme_free_queue(dev->queues[i]);
1055 dev->queue_count--;
1056 dev->queues[i] = NULL;
1057 }
1058}
1059
1060static void nvme_disable_queue(struct nvme_dev *dev, int qid)
999{ 1061{
1000 struct nvme_queue *nvmeq = dev->queues[qid]; 1062 struct nvme_queue *nvmeq = dev->queues[qid];
1001 int vector = dev->entry[nvmeq->cq_vector].vector; 1063 int vector = dev->entry[nvmeq->cq_vector].vector;
1002 1064
1003 spin_lock_irq(&nvmeq->q_lock); 1065 spin_lock_irq(&nvmeq->q_lock);
1004 nvme_cancel_ios(nvmeq, false); 1066 if (nvmeq->q_suspended) {
1005 while (bio_list_peek(&nvmeq->sq_cong)) { 1067 spin_unlock_irq(&nvmeq->q_lock);
1006 struct bio *bio = bio_list_pop(&nvmeq->sq_cong); 1068 return;
1007 bio_endio(bio, -EIO);
1008 } 1069 }
1070 nvmeq->q_suspended = 1;
1009 spin_unlock_irq(&nvmeq->q_lock); 1071 spin_unlock_irq(&nvmeq->q_lock);
1010 1072
1011 irq_set_affinity_hint(vector, NULL); 1073 irq_set_affinity_hint(vector, NULL);
@@ -1017,15 +1079,17 @@ static void nvme_free_queue(struct nvme_dev *dev, int qid)
1017 adapter_delete_cq(dev, qid); 1079 adapter_delete_cq(dev, qid);
1018 } 1080 }
1019 1081
1020 nvme_free_queue_mem(nvmeq); 1082 spin_lock_irq(&nvmeq->q_lock);
1083 nvme_process_cq(nvmeq);
1084 nvme_cancel_ios(nvmeq, false);
1085 spin_unlock_irq(&nvmeq->q_lock);
1021} 1086}
1022 1087
1023static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, 1088static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
1024 int depth, int vector) 1089 int depth, int vector)
1025{ 1090{
1026 struct device *dmadev = &dev->pci_dev->dev; 1091 struct device *dmadev = &dev->pci_dev->dev;
1027 unsigned extra = DIV_ROUND_UP(depth, 8) + (depth * 1092 unsigned extra = nvme_queue_extra(depth);
1028 sizeof(struct nvme_cmd_info));
1029 struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL); 1093 struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL);
1030 if (!nvmeq) 1094 if (!nvmeq)
1031 return NULL; 1095 return NULL;
@@ -1052,6 +1116,8 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
1052 nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)]; 1116 nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)];
1053 nvmeq->q_depth = depth; 1117 nvmeq->q_depth = depth;
1054 nvmeq->cq_vector = vector; 1118 nvmeq->cq_vector = vector;
1119 nvmeq->q_suspended = 1;
1120 dev->queue_count++;
1055 1121
1056 return nvmeq; 1122 return nvmeq;
1057 1123
@@ -1075,18 +1141,29 @@ static int queue_request_irq(struct nvme_dev *dev, struct nvme_queue *nvmeq,
1075 IRQF_DISABLED | IRQF_SHARED, name, nvmeq); 1141 IRQF_DISABLED | IRQF_SHARED, name, nvmeq);
1076} 1142}
1077 1143
1078static struct nvme_queue *nvme_create_queue(struct nvme_dev *dev, int qid, 1144static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
1079 int cq_size, int vector)
1080{ 1145{
1081 int result; 1146 struct nvme_dev *dev = nvmeq->dev;
1082 struct nvme_queue *nvmeq = nvme_alloc_queue(dev, qid, cq_size, vector); 1147 unsigned extra = nvme_queue_extra(nvmeq->q_depth);
1083 1148
1084 if (!nvmeq) 1149 nvmeq->sq_tail = 0;
1085 return ERR_PTR(-ENOMEM); 1150 nvmeq->cq_head = 0;
1151 nvmeq->cq_phase = 1;
1152 nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)];
1153 memset(nvmeq->cmdid_data, 0, extra);
1154 memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth));
1155 nvme_cancel_ios(nvmeq, false);
1156 nvmeq->q_suspended = 0;
1157}
1158
1159static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
1160{
1161 struct nvme_dev *dev = nvmeq->dev;
1162 int result;
1086 1163
1087 result = adapter_alloc_cq(dev, qid, nvmeq); 1164 result = adapter_alloc_cq(dev, qid, nvmeq);
1088 if (result < 0) 1165 if (result < 0)
1089 goto free_nvmeq; 1166 return result;
1090 1167
1091 result = adapter_alloc_sq(dev, qid, nvmeq); 1168 result = adapter_alloc_sq(dev, qid, nvmeq);
1092 if (result < 0) 1169 if (result < 0)
@@ -1096,19 +1173,17 @@ static struct nvme_queue *nvme_create_queue(struct nvme_dev *dev, int qid,
1096 if (result < 0) 1173 if (result < 0)
1097 goto release_sq; 1174 goto release_sq;
1098 1175
1099 return nvmeq; 1176 spin_lock(&nvmeq->q_lock);
1177 nvme_init_queue(nvmeq, qid);
1178 spin_unlock(&nvmeq->q_lock);
1179
1180 return result;
1100 1181
1101 release_sq: 1182 release_sq:
1102 adapter_delete_sq(dev, qid); 1183 adapter_delete_sq(dev, qid);
1103 release_cq: 1184 release_cq:
1104 adapter_delete_cq(dev, qid); 1185 adapter_delete_cq(dev, qid);
1105 free_nvmeq: 1186 return result;
1106 dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
1107 (void *)nvmeq->cqes, nvmeq->cq_dma_addr);
1108 dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
1109 nvmeq->sq_cmds, nvmeq->sq_dma_addr);
1110 kfree(nvmeq);
1111 return ERR_PTR(result);
1112} 1187}
1113 1188
1114static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled) 1189static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled)
@@ -1152,6 +1227,30 @@ static int nvme_enable_ctrl(struct nvme_dev *dev, u64 cap)
1152 return nvme_wait_ready(dev, cap, true); 1227 return nvme_wait_ready(dev, cap, true);
1153} 1228}
1154 1229
1230static int nvme_shutdown_ctrl(struct nvme_dev *dev)
1231{
1232 unsigned long timeout;
1233 u32 cc;
1234
1235 cc = (readl(&dev->bar->cc) & ~NVME_CC_SHN_MASK) | NVME_CC_SHN_NORMAL;
1236 writel(cc, &dev->bar->cc);
1237
1238 timeout = 2 * HZ + jiffies;
1239 while ((readl(&dev->bar->csts) & NVME_CSTS_SHST_MASK) !=
1240 NVME_CSTS_SHST_CMPLT) {
1241 msleep(100);
1242 if (fatal_signal_pending(current))
1243 return -EINTR;
1244 if (time_after(jiffies, timeout)) {
1245 dev_err(&dev->pci_dev->dev,
1246 "Device shutdown incomplete; abort shutdown\n");
1247 return -ENODEV;
1248 }
1249 }
1250
1251 return 0;
1252}
1253
1155static int nvme_configure_admin_queue(struct nvme_dev *dev) 1254static int nvme_configure_admin_queue(struct nvme_dev *dev)
1156{ 1255{
1157 int result; 1256 int result;
@@ -1159,16 +1258,17 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
1159 u64 cap = readq(&dev->bar->cap); 1258 u64 cap = readq(&dev->bar->cap);
1160 struct nvme_queue *nvmeq; 1259 struct nvme_queue *nvmeq;
1161 1260
1162 dev->dbs = ((void __iomem *)dev->bar) + 4096;
1163 dev->db_stride = NVME_CAP_STRIDE(cap);
1164
1165 result = nvme_disable_ctrl(dev, cap); 1261 result = nvme_disable_ctrl(dev, cap);
1166 if (result < 0) 1262 if (result < 0)
1167 return result; 1263 return result;
1168 1264
1169 nvmeq = nvme_alloc_queue(dev, 0, 64, 0); 1265 nvmeq = dev->queues[0];
1170 if (!nvmeq) 1266 if (!nvmeq) {
1171 return -ENOMEM; 1267 nvmeq = nvme_alloc_queue(dev, 0, 64, 0);
1268 if (!nvmeq)
1269 return -ENOMEM;
1270 dev->queues[0] = nvmeq;
1271 }
1172 1272
1173 aqa = nvmeq->q_depth - 1; 1273 aqa = nvmeq->q_depth - 1;
1174 aqa |= aqa << 16; 1274 aqa |= aqa << 16;
@@ -1185,17 +1285,15 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
1185 1285
1186 result = nvme_enable_ctrl(dev, cap); 1286 result = nvme_enable_ctrl(dev, cap);
1187 if (result) 1287 if (result)
1188 goto free_q; 1288 return result;
1189 1289
1190 result = queue_request_irq(dev, nvmeq, "nvme admin"); 1290 result = queue_request_irq(dev, nvmeq, "nvme admin");
1191 if (result) 1291 if (result)
1192 goto free_q; 1292 return result;
1193
1194 dev->queues[0] = nvmeq;
1195 return result;
1196 1293
1197 free_q: 1294 spin_lock(&nvmeq->q_lock);
1198 nvme_free_queue_mem(nvmeq); 1295 nvme_init_queue(nvmeq, 0);
1296 spin_unlock(&nvmeq->q_lock);
1199 return result; 1297 return result;
1200} 1298}
1201 1299
@@ -1314,7 +1412,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
1314 c.rw.appmask = cpu_to_le16(io.appmask); 1412 c.rw.appmask = cpu_to_le16(io.appmask);
1315 1413
1316 if (meta_len) { 1414 if (meta_len) {
1317 meta_iod = nvme_map_user_pages(dev, io.opcode & 1, io.metadata, meta_len); 1415 meta_iod = nvme_map_user_pages(dev, io.opcode & 1, io.metadata,
1416 meta_len);
1318 if (IS_ERR(meta_iod)) { 1417 if (IS_ERR(meta_iod)) {
1319 status = PTR_ERR(meta_iod); 1418 status = PTR_ERR(meta_iod);
1320 meta_iod = NULL; 1419 meta_iod = NULL;
@@ -1356,6 +1455,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
1356 put_nvmeq(nvmeq); 1455 put_nvmeq(nvmeq);
1357 if (length != (io.nblocks + 1) << ns->lba_shift) 1456 if (length != (io.nblocks + 1) << ns->lba_shift)
1358 status = -ENOMEM; 1457 status = -ENOMEM;
1458 else if (!nvmeq || nvmeq->q_suspended)
1459 status = -EBUSY;
1359 else 1460 else
1360 status = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT); 1461 status = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT);
1361 1462
@@ -1453,6 +1554,7 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
1453 1554
1454 switch (cmd) { 1555 switch (cmd) {
1455 case NVME_IOCTL_ID: 1556 case NVME_IOCTL_ID:
1557 force_successful_syscall_return();
1456 return ns->ns_id; 1558 return ns->ns_id;
1457 case NVME_IOCTL_ADMIN_CMD: 1559 case NVME_IOCTL_ADMIN_CMD:
1458 return nvme_user_admin_cmd(ns->dev, (void __user *)arg); 1560 return nvme_user_admin_cmd(ns->dev, (void __user *)arg);
@@ -1506,10 +1608,12 @@ static int nvme_kthread(void *data)
1506 if (!nvmeq) 1608 if (!nvmeq)
1507 continue; 1609 continue;
1508 spin_lock_irq(&nvmeq->q_lock); 1610 spin_lock_irq(&nvmeq->q_lock);
1509 if (nvme_process_cq(nvmeq)) 1611 if (nvmeq->q_suspended)
1510 printk("process_cq did something\n"); 1612 goto unlock;
1613 nvme_process_cq(nvmeq);
1511 nvme_cancel_ios(nvmeq, true); 1614 nvme_cancel_ios(nvmeq, true);
1512 nvme_resubmit_bios(nvmeq); 1615 nvme_resubmit_bios(nvmeq);
1616 unlock:
1513 spin_unlock_irq(&nvmeq->q_lock); 1617 spin_unlock_irq(&nvmeq->q_lock);
1514 } 1618 }
1515 } 1619 }
@@ -1556,7 +1660,7 @@ static void nvme_config_discard(struct nvme_ns *ns)
1556 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); 1660 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
1557} 1661}
1558 1662
1559static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, int nsid, 1663static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid,
1560 struct nvme_id_ns *id, struct nvme_lba_range_type *rt) 1664 struct nvme_id_ns *id, struct nvme_lba_range_type *rt)
1561{ 1665{
1562 struct nvme_ns *ns; 1666 struct nvme_ns *ns;
@@ -1631,14 +1735,19 @@ static int set_queue_count(struct nvme_dev *dev, int count)
1631 status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES, q_count, 0, 1735 status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES, q_count, 0,
1632 &result); 1736 &result);
1633 if (status) 1737 if (status)
1634 return -EIO; 1738 return status < 0 ? -EIO : -EBUSY;
1635 return min(result & 0xffff, result >> 16) + 1; 1739 return min(result & 0xffff, result >> 16) + 1;
1636} 1740}
1637 1741
1742static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
1743{
1744 return 4096 + ((nr_io_queues + 1) << (dev->db_stride + 3));
1745}
1746
1638static int nvme_setup_io_queues(struct nvme_dev *dev) 1747static int nvme_setup_io_queues(struct nvme_dev *dev)
1639{ 1748{
1640 struct pci_dev *pdev = dev->pci_dev; 1749 struct pci_dev *pdev = dev->pci_dev;
1641 int result, cpu, i, nr_io_queues, db_bar_size, q_depth, q_count; 1750 int result, cpu, i, vecs, nr_io_queues, size, q_depth;
1642 1751
1643 nr_io_queues = num_online_cpus(); 1752 nr_io_queues = num_online_cpus();
1644 result = set_queue_count(dev, nr_io_queues); 1753 result = set_queue_count(dev, nr_io_queues);
@@ -1647,53 +1756,80 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
1647 if (result < nr_io_queues) 1756 if (result < nr_io_queues)
1648 nr_io_queues = result; 1757 nr_io_queues = result;
1649 1758
1650 q_count = nr_io_queues; 1759 size = db_bar_size(dev, nr_io_queues);
1651 /* Deregister the admin queue's interrupt */ 1760 if (size > 8192) {
1652 free_irq(dev->entry[0].vector, dev->queues[0]);
1653
1654 db_bar_size = 4096 + ((nr_io_queues + 1) << (dev->db_stride + 3));
1655 if (db_bar_size > 8192) {
1656 iounmap(dev->bar); 1761 iounmap(dev->bar);
1657 dev->bar = ioremap(pci_resource_start(pdev, 0), db_bar_size); 1762 do {
1763 dev->bar = ioremap(pci_resource_start(pdev, 0), size);
1764 if (dev->bar)
1765 break;
1766 if (!--nr_io_queues)
1767 return -ENOMEM;
1768 size = db_bar_size(dev, nr_io_queues);
1769 } while (1);
1658 dev->dbs = ((void __iomem *)dev->bar) + 4096; 1770 dev->dbs = ((void __iomem *)dev->bar) + 4096;
1659 dev->queues[0]->q_db = dev->dbs; 1771 dev->queues[0]->q_db = dev->dbs;
1660 } 1772 }
1661 1773
1662 for (i = 0; i < nr_io_queues; i++) 1774 /* Deregister the admin queue's interrupt */
1775 free_irq(dev->entry[0].vector, dev->queues[0]);
1776
1777 vecs = nr_io_queues;
1778 for (i = 0; i < vecs; i++)
1663 dev->entry[i].entry = i; 1779 dev->entry[i].entry = i;
1664 for (;;) { 1780 for (;;) {
1665 result = pci_enable_msix(pdev, dev->entry, nr_io_queues); 1781 result = pci_enable_msix(pdev, dev->entry, vecs);
1666 if (result == 0) { 1782 if (result <= 0)
1667 break;
1668 } else if (result > 0) {
1669 nr_io_queues = result;
1670 continue;
1671 } else {
1672 nr_io_queues = 0;
1673 break; 1783 break;
1674 } 1784 vecs = result;
1675 } 1785 }
1676 1786
1677 if (nr_io_queues == 0) { 1787 if (result < 0) {
1678 nr_io_queues = q_count; 1788 vecs = nr_io_queues;
1789 if (vecs > 32)
1790 vecs = 32;
1679 for (;;) { 1791 for (;;) {
1680 result = pci_enable_msi_block(pdev, nr_io_queues); 1792 result = pci_enable_msi_block(pdev, vecs);
1681 if (result == 0) { 1793 if (result == 0) {
1682 for (i = 0; i < nr_io_queues; i++) 1794 for (i = 0; i < vecs; i++)
1683 dev->entry[i].vector = i + pdev->irq; 1795 dev->entry[i].vector = i + pdev->irq;
1684 break; 1796 break;
1685 } else if (result > 0) { 1797 } else if (result < 0) {
1686 nr_io_queues = result; 1798 vecs = 1;
1687 continue;
1688 } else {
1689 nr_io_queues = 1;
1690 break; 1799 break;
1691 } 1800 }
1801 vecs = result;
1692 } 1802 }
1693 } 1803 }
1694 1804
1805 /*
1806 * Should investigate if there's a performance win from allocating
1807 * more queues than interrupt vectors; it might allow the submission
1808 * path to scale better, even if the receive path is limited by the
1809 * number of interrupts.
1810 */
1811 nr_io_queues = vecs;
1812
1695 result = queue_request_irq(dev, dev->queues[0], "nvme admin"); 1813 result = queue_request_irq(dev, dev->queues[0], "nvme admin");
1696 /* XXX: handle failure here */ 1814 if (result) {
1815 dev->queues[0]->q_suspended = 1;
1816 goto free_queues;
1817 }
1818
1819 /* Free previously allocated queues that are no longer usable */
1820 spin_lock(&dev_list_lock);
1821 for (i = dev->queue_count - 1; i > nr_io_queues; i--) {
1822 struct nvme_queue *nvmeq = dev->queues[i];
1823
1824 spin_lock(&nvmeq->q_lock);
1825 nvme_cancel_ios(nvmeq, false);
1826 spin_unlock(&nvmeq->q_lock);
1827
1828 nvme_free_queue(nvmeq);
1829 dev->queue_count--;
1830 dev->queues[i] = NULL;
1831 }
1832 spin_unlock(&dev_list_lock);
1697 1833
1698 cpu = cpumask_first(cpu_online_mask); 1834 cpu = cpumask_first(cpu_online_mask);
1699 for (i = 0; i < nr_io_queues; i++) { 1835 for (i = 0; i < nr_io_queues; i++) {
@@ -1703,11 +1839,12 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
1703 1839
1704 q_depth = min_t(int, NVME_CAP_MQES(readq(&dev->bar->cap)) + 1, 1840 q_depth = min_t(int, NVME_CAP_MQES(readq(&dev->bar->cap)) + 1,
1705 NVME_Q_DEPTH); 1841 NVME_Q_DEPTH);
1706 for (i = 0; i < nr_io_queues; i++) { 1842 for (i = dev->queue_count - 1; i < nr_io_queues; i++) {
1707 dev->queues[i + 1] = nvme_create_queue(dev, i + 1, q_depth, i); 1843 dev->queues[i + 1] = nvme_alloc_queue(dev, i + 1, q_depth, i);
1708 if (IS_ERR(dev->queues[i + 1])) 1844 if (!dev->queues[i + 1]) {
1709 return PTR_ERR(dev->queues[i + 1]); 1845 result = -ENOMEM;
1710 dev->queue_count++; 1846 goto free_queues;
1847 }
1711 } 1848 }
1712 1849
1713 for (; i < num_possible_cpus(); i++) { 1850 for (; i < num_possible_cpus(); i++) {
@@ -1715,15 +1852,20 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
1715 dev->queues[i + 1] = dev->queues[target + 1]; 1852 dev->queues[i + 1] = dev->queues[target + 1];
1716 } 1853 }
1717 1854
1718 return 0; 1855 for (i = 1; i < dev->queue_count; i++) {
1719} 1856 result = nvme_create_queue(dev->queues[i], i);
1857 if (result) {
1858 for (--i; i > 0; i--)
1859 nvme_disable_queue(dev, i);
1860 goto free_queues;
1861 }
1862 }
1720 1863
1721static void nvme_free_queues(struct nvme_dev *dev) 1864 return 0;
1722{
1723 int i;
1724 1865
1725 for (i = dev->queue_count - 1; i >= 0; i--) 1866 free_queues:
1726 nvme_free_queue(dev, i); 1867 nvme_free_queues(dev);
1868 return result;
1727} 1869}
1728 1870
1729/* 1871/*
@@ -1734,7 +1876,8 @@ static void nvme_free_queues(struct nvme_dev *dev)
1734 */ 1876 */
1735static int nvme_dev_add(struct nvme_dev *dev) 1877static int nvme_dev_add(struct nvme_dev *dev)
1736{ 1878{
1737 int res, nn, i; 1879 int res;
1880 unsigned nn, i;
1738 struct nvme_ns *ns; 1881 struct nvme_ns *ns;
1739 struct nvme_id_ctrl *ctrl; 1882 struct nvme_id_ctrl *ctrl;
1740 struct nvme_id_ns *id_ns; 1883 struct nvme_id_ns *id_ns;
@@ -1742,10 +1885,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
1742 dma_addr_t dma_addr; 1885 dma_addr_t dma_addr;
1743 int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12; 1886 int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12;
1744 1887
1745 res = nvme_setup_io_queues(dev);
1746 if (res)
1747 return res;
1748
1749 mem = dma_alloc_coherent(&dev->pci_dev->dev, 8192, &dma_addr, 1888 mem = dma_alloc_coherent(&dev->pci_dev->dev, 8192, &dma_addr,
1750 GFP_KERNEL); 1889 GFP_KERNEL);
1751 if (!mem) 1890 if (!mem)
@@ -1796,23 +1935,83 @@ static int nvme_dev_add(struct nvme_dev *dev)
1796 return res; 1935 return res;
1797} 1936}
1798 1937
1799static int nvme_dev_remove(struct nvme_dev *dev) 1938static int nvme_dev_map(struct nvme_dev *dev)
1800{ 1939{
1801 struct nvme_ns *ns, *next; 1940 int bars, result = -ENOMEM;
1941 struct pci_dev *pdev = dev->pci_dev;
1942
1943 if (pci_enable_device_mem(pdev))
1944 return result;
1945
1946 dev->entry[0].vector = pdev->irq;
1947 pci_set_master(pdev);
1948 bars = pci_select_bars(pdev, IORESOURCE_MEM);
1949 if (pci_request_selected_regions(pdev, bars, "nvme"))
1950 goto disable_pci;
1951
1952 if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)) &&
1953 dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)))
1954 goto disable;
1955
1956 pci_set_drvdata(pdev, dev);
1957 dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
1958 if (!dev->bar)
1959 goto disable;
1960
1961 dev->db_stride = NVME_CAP_STRIDE(readq(&dev->bar->cap));
1962 dev->dbs = ((void __iomem *)dev->bar) + 4096;
1963
1964 return 0;
1965
1966 disable:
1967 pci_release_regions(pdev);
1968 disable_pci:
1969 pci_disable_device(pdev);
1970 return result;
1971}
1972
1973static void nvme_dev_unmap(struct nvme_dev *dev)
1974{
1975 if (dev->pci_dev->msi_enabled)
1976 pci_disable_msi(dev->pci_dev);
1977 else if (dev->pci_dev->msix_enabled)
1978 pci_disable_msix(dev->pci_dev);
1979
1980 if (dev->bar) {
1981 iounmap(dev->bar);
1982 dev->bar = NULL;
1983 }
1984
1985 pci_release_regions(dev->pci_dev);
1986 if (pci_is_enabled(dev->pci_dev))
1987 pci_disable_device(dev->pci_dev);
1988}
1989
1990static void nvme_dev_shutdown(struct nvme_dev *dev)
1991{
1992 int i;
1993
1994 for (i = dev->queue_count - 1; i >= 0; i--)
1995 nvme_disable_queue(dev, i);
1802 1996
1803 spin_lock(&dev_list_lock); 1997 spin_lock(&dev_list_lock);
1804 list_del(&dev->node); 1998 list_del_init(&dev->node);
1805 spin_unlock(&dev_list_lock); 1999 spin_unlock(&dev_list_lock);
1806 2000
2001 if (dev->bar)
2002 nvme_shutdown_ctrl(dev);
2003 nvme_dev_unmap(dev);
2004}
2005
2006static void nvme_dev_remove(struct nvme_dev *dev)
2007{
2008 struct nvme_ns *ns, *next;
2009
1807 list_for_each_entry_safe(ns, next, &dev->namespaces, list) { 2010 list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
1808 list_del(&ns->list); 2011 list_del(&ns->list);
1809 del_gendisk(ns->disk); 2012 del_gendisk(ns->disk);
1810 nvme_ns_free(ns); 2013 nvme_ns_free(ns);
1811 } 2014 }
1812
1813 nvme_free_queues(dev);
1814
1815 return 0;
1816} 2015}
1817 2016
1818static int nvme_setup_prp_pools(struct nvme_dev *dev) 2017static int nvme_setup_prp_pools(struct nvme_dev *dev)
@@ -1872,15 +2071,10 @@ static void nvme_free_dev(struct kref *kref)
1872{ 2071{
1873 struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref); 2072 struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref);
1874 nvme_dev_remove(dev); 2073 nvme_dev_remove(dev);
1875 if (dev->pci_dev->msi_enabled) 2074 nvme_dev_shutdown(dev);
1876 pci_disable_msi(dev->pci_dev); 2075 nvme_free_queues(dev);
1877 else if (dev->pci_dev->msix_enabled)
1878 pci_disable_msix(dev->pci_dev);
1879 iounmap(dev->bar);
1880 nvme_release_instance(dev); 2076 nvme_release_instance(dev);
1881 nvme_release_prp_pools(dev); 2077 nvme_release_prp_pools(dev);
1882 pci_disable_device(dev->pci_dev);
1883 pci_release_regions(dev->pci_dev);
1884 kfree(dev->queues); 2078 kfree(dev->queues);
1885 kfree(dev->entry); 2079 kfree(dev->entry);
1886 kfree(dev); 2080 kfree(dev);
@@ -1921,9 +2115,40 @@ static const struct file_operations nvme_dev_fops = {
1921 .compat_ioctl = nvme_dev_ioctl, 2115 .compat_ioctl = nvme_dev_ioctl,
1922}; 2116};
1923 2117
2118static int nvme_dev_start(struct nvme_dev *dev)
2119{
2120 int result;
2121
2122 result = nvme_dev_map(dev);
2123 if (result)
2124 return result;
2125
2126 result = nvme_configure_admin_queue(dev);
2127 if (result)
2128 goto unmap;
2129
2130 spin_lock(&dev_list_lock);
2131 list_add(&dev->node, &dev_list);
2132 spin_unlock(&dev_list_lock);
2133
2134 result = nvme_setup_io_queues(dev);
2135 if (result && result != -EBUSY)
2136 goto disable;
2137
2138 return result;
2139
2140 disable:
2141 spin_lock(&dev_list_lock);
2142 list_del_init(&dev->node);
2143 spin_unlock(&dev_list_lock);
2144 unmap:
2145 nvme_dev_unmap(dev);
2146 return result;
2147}
2148
1924static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) 2149static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1925{ 2150{
1926 int bars, result = -ENOMEM; 2151 int result = -ENOMEM;
1927 struct nvme_dev *dev; 2152 struct nvme_dev *dev;
1928 2153
1929 dev = kzalloc(sizeof(*dev), GFP_KERNEL); 2154 dev = kzalloc(sizeof(*dev), GFP_KERNEL);
@@ -1938,53 +2163,29 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1938 if (!dev->queues) 2163 if (!dev->queues)
1939 goto free; 2164 goto free;
1940 2165
1941 if (pci_enable_device_mem(pdev))
1942 goto free;
1943 pci_set_master(pdev);
1944 bars = pci_select_bars(pdev, IORESOURCE_MEM);
1945 if (pci_request_selected_regions(pdev, bars, "nvme"))
1946 goto disable;
1947
1948 INIT_LIST_HEAD(&dev->namespaces); 2166 INIT_LIST_HEAD(&dev->namespaces);
1949 dev->pci_dev = pdev; 2167 dev->pci_dev = pdev;
1950 pci_set_drvdata(pdev, dev);
1951
1952 if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)))
1953 dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1954 else if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)))
1955 dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1956 else
1957 goto disable;
1958 2168
1959 result = nvme_set_instance(dev); 2169 result = nvme_set_instance(dev);
1960 if (result) 2170 if (result)
1961 goto disable; 2171 goto free;
1962
1963 dev->entry[0].vector = pdev->irq;
1964 2172
1965 result = nvme_setup_prp_pools(dev); 2173 result = nvme_setup_prp_pools(dev);
1966 if (result) 2174 if (result)
1967 goto disable_msix; 2175 goto release;
1968 2176
1969 dev->bar = ioremap(pci_resource_start(pdev, 0), 8192); 2177 result = nvme_dev_start(dev);
1970 if (!dev->bar) { 2178 if (result) {
1971 result = -ENOMEM; 2179 if (result == -EBUSY)
1972 goto disable_msix; 2180 goto create_cdev;
2181 goto release_pools;
1973 } 2182 }
1974 2183
1975 result = nvme_configure_admin_queue(dev);
1976 if (result)
1977 goto unmap;
1978 dev->queue_count++;
1979
1980 spin_lock(&dev_list_lock);
1981 list_add(&dev->node, &dev_list);
1982 spin_unlock(&dev_list_lock);
1983
1984 result = nvme_dev_add(dev); 2184 result = nvme_dev_add(dev);
1985 if (result) 2185 if (result)
1986 goto delete; 2186 goto shutdown;
1987 2187
2188 create_cdev:
1988 scnprintf(dev->name, sizeof(dev->name), "nvme%d", dev->instance); 2189 scnprintf(dev->name, sizeof(dev->name), "nvme%d", dev->instance);
1989 dev->miscdev.minor = MISC_DYNAMIC_MINOR; 2190 dev->miscdev.minor = MISC_DYNAMIC_MINOR;
1990 dev->miscdev.parent = &pdev->dev; 2191 dev->miscdev.parent = &pdev->dev;
@@ -1999,24 +2200,13 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1999 2200
2000 remove: 2201 remove:
2001 nvme_dev_remove(dev); 2202 nvme_dev_remove(dev);
2002 delete: 2203 shutdown:
2003 spin_lock(&dev_list_lock); 2204 nvme_dev_shutdown(dev);
2004 list_del(&dev->node); 2205 release_pools:
2005 spin_unlock(&dev_list_lock);
2006
2007 nvme_free_queues(dev); 2206 nvme_free_queues(dev);
2008 unmap:
2009 iounmap(dev->bar);
2010 disable_msix:
2011 if (dev->pci_dev->msi_enabled)
2012 pci_disable_msi(dev->pci_dev);
2013 else if (dev->pci_dev->msix_enabled)
2014 pci_disable_msix(dev->pci_dev);
2015 nvme_release_instance(dev);
2016 nvme_release_prp_pools(dev); 2207 nvme_release_prp_pools(dev);
2017 disable: 2208 release:
2018 pci_disable_device(pdev); 2209 nvme_release_instance(dev);
2019 pci_release_regions(pdev);
2020 free: 2210 free:
2021 kfree(dev->queues); 2211 kfree(dev->queues);
2022 kfree(dev->entry); 2212 kfree(dev->entry);
@@ -2037,8 +2227,30 @@ static void nvme_remove(struct pci_dev *pdev)
2037#define nvme_link_reset NULL 2227#define nvme_link_reset NULL
2038#define nvme_slot_reset NULL 2228#define nvme_slot_reset NULL
2039#define nvme_error_resume NULL 2229#define nvme_error_resume NULL
2040#define nvme_suspend NULL 2230
2041#define nvme_resume NULL 2231static int nvme_suspend(struct device *dev)
2232{
2233 struct pci_dev *pdev = to_pci_dev(dev);
2234 struct nvme_dev *ndev = pci_get_drvdata(pdev);
2235
2236 nvme_dev_shutdown(ndev);
2237 return 0;
2238}
2239
2240static int nvme_resume(struct device *dev)
2241{
2242 struct pci_dev *pdev = to_pci_dev(dev);
2243 struct nvme_dev *ndev = pci_get_drvdata(pdev);
2244 int ret;
2245
2246 ret = nvme_dev_start(ndev);
2247 /* XXX: should remove gendisks if resume fails */
2248 if (ret)
2249 nvme_free_queues(ndev);
2250 return ret;
2251}
2252
2253static SIMPLE_DEV_PM_OPS(nvme_dev_pm_ops, nvme_suspend, nvme_resume);
2042 2254
2043static const struct pci_error_handlers nvme_err_handler = { 2255static const struct pci_error_handlers nvme_err_handler = {
2044 .error_detected = nvme_error_detected, 2256 .error_detected = nvme_error_detected,
@@ -2062,8 +2274,9 @@ static struct pci_driver nvme_driver = {
2062 .id_table = nvme_id_table, 2274 .id_table = nvme_id_table,
2063 .probe = nvme_probe, 2275 .probe = nvme_probe,
2064 .remove = nvme_remove, 2276 .remove = nvme_remove,
2065 .suspend = nvme_suspend, 2277 .driver = {
2066 .resume = nvme_resume, 2278 .pm = &nvme_dev_pm_ops,
2279 },
2067 .err_handler = &nvme_err_handler, 2280 .err_handler = &nvme_err_handler,
2068}; 2281};
2069 2282
diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c
index 102de2f52b5c..4a4ff4eb8e23 100644
--- a/drivers/block/nvme-scsi.c
+++ b/drivers/block/nvme-scsi.c
@@ -933,13 +933,12 @@ static int nvme_trans_bdev_char_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
933 int res = SNTI_TRANSLATION_SUCCESS; 933 int res = SNTI_TRANSLATION_SUCCESS;
934 int xfer_len; 934 int xfer_len;
935 935
936 inq_response = kmalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL); 936 inq_response = kzalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL);
937 if (inq_response == NULL) { 937 if (inq_response == NULL) {
938 res = -ENOMEM; 938 res = -ENOMEM;
939 goto out_mem; 939 goto out_mem;
940 } 940 }
941 941
942 memset(inq_response, 0, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
943 inq_response[1] = INQ_BDEV_CHARACTERISTICS_PAGE; /* Page Code */ 942 inq_response[1] = INQ_BDEV_CHARACTERISTICS_PAGE; /* Page Code */
944 inq_response[2] = 0x00; /* Page Length MSB */ 943 inq_response[2] = 0x00; /* Page Length MSB */
945 inq_response[3] = 0x3C; /* Page Length LSB */ 944 inq_response[3] = 0x3C; /* Page Length LSB */
@@ -964,12 +963,11 @@ static int nvme_trans_log_supp_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
964 int xfer_len; 963 int xfer_len;
965 u8 *log_response; 964 u8 *log_response;
966 965
967 log_response = kmalloc(LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH, GFP_KERNEL); 966 log_response = kzalloc(LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH, GFP_KERNEL);
968 if (log_response == NULL) { 967 if (log_response == NULL) {
969 res = -ENOMEM; 968 res = -ENOMEM;
970 goto out_mem; 969 goto out_mem;
971 } 970 }
972 memset(log_response, 0, LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH);
973 971
974 log_response[0] = LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE; 972 log_response[0] = LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE;
975 /* Subpage=0x00, Page Length MSB=0 */ 973 /* Subpage=0x00, Page Length MSB=0 */
@@ -1000,12 +998,11 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
1000 u8 temp_c; 998 u8 temp_c;
1001 u16 temp_k; 999 u16 temp_k;
1002 1000
1003 log_response = kmalloc(LOG_INFO_EXCP_PAGE_LENGTH, GFP_KERNEL); 1001 log_response = kzalloc(LOG_INFO_EXCP_PAGE_LENGTH, GFP_KERNEL);
1004 if (log_response == NULL) { 1002 if (log_response == NULL) {
1005 res = -ENOMEM; 1003 res = -ENOMEM;
1006 goto out_mem; 1004 goto out_mem;
1007 } 1005 }
1008 memset(log_response, 0, LOG_INFO_EXCP_PAGE_LENGTH);
1009 1006
1010 mem = dma_alloc_coherent(&dev->pci_dev->dev, 1007 mem = dma_alloc_coherent(&dev->pci_dev->dev,
1011 sizeof(struct nvme_smart_log), 1008 sizeof(struct nvme_smart_log),
@@ -1069,12 +1066,11 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
1069 u8 temp_c_cur, temp_c_thresh; 1066 u8 temp_c_cur, temp_c_thresh;
1070 u16 temp_k; 1067 u16 temp_k;
1071 1068
1072 log_response = kmalloc(LOG_TEMP_PAGE_LENGTH, GFP_KERNEL); 1069 log_response = kzalloc(LOG_TEMP_PAGE_LENGTH, GFP_KERNEL);
1073 if (log_response == NULL) { 1070 if (log_response == NULL) {
1074 res = -ENOMEM; 1071 res = -ENOMEM;
1075 goto out_mem; 1072 goto out_mem;
1076 } 1073 }
1077 memset(log_response, 0, LOG_TEMP_PAGE_LENGTH);
1078 1074
1079 mem = dma_alloc_coherent(&dev->pci_dev->dev, 1075 mem = dma_alloc_coherent(&dev->pci_dev->dev,
1080 sizeof(struct nvme_smart_log), 1076 sizeof(struct nvme_smart_log),
@@ -1380,12 +1376,11 @@ static int nvme_trans_mode_page_create(struct nvme_ns *ns,
1380 blk_desc_offset = mph_size; 1376 blk_desc_offset = mph_size;
1381 mode_pages_offset_1 = blk_desc_offset + blk_desc_len; 1377 mode_pages_offset_1 = blk_desc_offset + blk_desc_len;
1382 1378
1383 response = kmalloc(resp_size, GFP_KERNEL); 1379 response = kzalloc(resp_size, GFP_KERNEL);
1384 if (response == NULL) { 1380 if (response == NULL) {
1385 res = -ENOMEM; 1381 res = -ENOMEM;
1386 goto out_mem; 1382 goto out_mem;
1387 } 1383 }
1388 memset(response, 0, resp_size);
1389 1384
1390 res = nvme_trans_fill_mode_parm_hdr(&response[0], mph_size, cdb10, 1385 res = nvme_trans_fill_mode_parm_hdr(&response[0], mph_size, cdb10,
1391 llbaa, mode_data_length, blk_desc_len); 1386 llbaa, mode_data_length, blk_desc_len);
@@ -2480,12 +2475,11 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr,
2480 } 2475 }
2481 id_ns = mem; 2476 id_ns = mem;
2482 2477
2483 response = kmalloc(resp_size, GFP_KERNEL); 2478 response = kzalloc(resp_size, GFP_KERNEL);
2484 if (response == NULL) { 2479 if (response == NULL) {
2485 res = -ENOMEM; 2480 res = -ENOMEM;
2486 goto out_dma; 2481 goto out_dma;
2487 } 2482 }
2488 memset(response, 0, resp_size);
2489 nvme_trans_fill_read_cap(response, id_ns, cdb16); 2483 nvme_trans_fill_read_cap(response, id_ns, cdb16);
2490 2484
2491 xfer_len = min(alloc_len, resp_size); 2485 xfer_len = min(alloc_len, resp_size);
@@ -2554,12 +2548,11 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
2554 goto out_dma; 2548 goto out_dma;
2555 } 2549 }
2556 2550
2557 response = kmalloc(resp_size, GFP_KERNEL); 2551 response = kzalloc(resp_size, GFP_KERNEL);
2558 if (response == NULL) { 2552 if (response == NULL) {
2559 res = -ENOMEM; 2553 res = -ENOMEM;
2560 goto out_dma; 2554 goto out_dma;
2561 } 2555 }
2562 memset(response, 0, resp_size);
2563 2556
2564 /* The first LUN ID will always be 0 per the SAM spec */ 2557 /* The first LUN ID will always be 0 per the SAM spec */
2565 for (lun_id = 0; lun_id < le32_to_cpu(id_ctrl->nn); lun_id++) { 2558 for (lun_id = 0; lun_id < le32_to_cpu(id_ctrl->nn); lun_id++) {
@@ -2600,12 +2593,11 @@ static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
2600 2593
2601 resp_size = ((desc_format) ? (DESC_FMT_SENSE_DATA_SIZE) : 2594 resp_size = ((desc_format) ? (DESC_FMT_SENSE_DATA_SIZE) :
2602 (FIXED_FMT_SENSE_DATA_SIZE)); 2595 (FIXED_FMT_SENSE_DATA_SIZE));
2603 response = kmalloc(resp_size, GFP_KERNEL); 2596 response = kzalloc(resp_size, GFP_KERNEL);
2604 if (response == NULL) { 2597 if (response == NULL) {
2605 res = -ENOMEM; 2598 res = -ENOMEM;
2606 goto out; 2599 goto out;
2607 } 2600 }
2608 memset(response, 0, resp_size);
2609 2601
2610 if (desc_format == DESCRIPTOR_FORMAT_SENSE_DATA_TYPE) { 2602 if (desc_format == DESCRIPTOR_FORMAT_SENSE_DATA_TYPE) {
2611 /* Descriptor Format Sense Data */ 2603 /* Descriptor Format Sense Data */
diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c
index 1bbc681688e4..79aa179305b5 100644
--- a/drivers/block/osdblk.c
+++ b/drivers/block/osdblk.c
@@ -598,7 +598,7 @@ static ssize_t class_osdblk_remove(struct class *c,
598 unsigned long ul; 598 unsigned long ul;
599 struct list_head *tmp; 599 struct list_head *tmp;
600 600
601 rc = strict_strtoul(buf, 10, &ul); 601 rc = kstrtoul(buf, 10, &ul);
602 if (rc) 602 if (rc)
603 return rc; 603 return rc;
604 604
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index f5d0ea11d9fd..ff8668c5efb1 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -44,6 +44,8 @@
44 * 44 *
45 *************************************************************************/ 45 *************************************************************************/
46 46
47#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
48
47#include <linux/pktcdvd.h> 49#include <linux/pktcdvd.h>
48#include <linux/module.h> 50#include <linux/module.h>
49#include <linux/types.h> 51#include <linux/types.h>
@@ -69,23 +71,24 @@
69 71
70#define DRIVER_NAME "pktcdvd" 72#define DRIVER_NAME "pktcdvd"
71 73
72#if PACKET_DEBUG 74#define pkt_err(pd, fmt, ...) \
73#define DPRINTK(fmt, args...) printk(KERN_NOTICE fmt, ##args) 75 pr_err("%s: " fmt, pd->name, ##__VA_ARGS__)
74#else 76#define pkt_notice(pd, fmt, ...) \
75#define DPRINTK(fmt, args...) 77 pr_notice("%s: " fmt, pd->name, ##__VA_ARGS__)
76#endif 78#define pkt_info(pd, fmt, ...) \
77 79 pr_info("%s: " fmt, pd->name, ##__VA_ARGS__)
78#if PACKET_DEBUG > 1 80
79#define VPRINTK(fmt, args...) printk(KERN_NOTICE fmt, ##args) 81#define pkt_dbg(level, pd, fmt, ...) \
80#else 82do { \
81#define VPRINTK(fmt, args...) 83 if (level == 2 && PACKET_DEBUG >= 2) \
82#endif 84 pr_notice("%s: %s():" fmt, \
85 pd->name, __func__, ##__VA_ARGS__); \
86 else if (level == 1 && PACKET_DEBUG >= 1) \
87 pr_notice("%s: " fmt, pd->name, ##__VA_ARGS__); \
88} while (0)
83 89
84#define MAX_SPEED 0xffff 90#define MAX_SPEED 0xffff
85 91
86#define ZONE(sector, pd) (((sector) + (pd)->offset) & \
87 ~(sector_t)((pd)->settings.size - 1))
88
89static DEFINE_MUTEX(pktcdvd_mutex); 92static DEFINE_MUTEX(pktcdvd_mutex);
90static struct pktcdvd_device *pkt_devs[MAX_WRITERS]; 93static struct pktcdvd_device *pkt_devs[MAX_WRITERS];
91static struct proc_dir_entry *pkt_proc; 94static struct proc_dir_entry *pkt_proc;
@@ -103,7 +106,10 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev);
103static int pkt_remove_dev(dev_t pkt_dev); 106static int pkt_remove_dev(dev_t pkt_dev);
104static int pkt_seq_show(struct seq_file *m, void *p); 107static int pkt_seq_show(struct seq_file *m, void *p);
105 108
106 109static sector_t get_zone(sector_t sector, struct pktcdvd_device *pd)
110{
111 return (sector + pd->offset) & ~(sector_t)(pd->settings.size - 1);
112}
107 113
108/* 114/*
109 * create and register a pktcdvd kernel object. 115 * create and register a pktcdvd kernel object.
@@ -424,7 +430,7 @@ static int pkt_sysfs_init(void)
424 if (ret) { 430 if (ret) {
425 kfree(class_pktcdvd); 431 kfree(class_pktcdvd);
426 class_pktcdvd = NULL; 432 class_pktcdvd = NULL;
427 printk(DRIVER_NAME": failed to create class pktcdvd\n"); 433 pr_err("failed to create class pktcdvd\n");
428 return ret; 434 return ret;
429 } 435 }
430 return 0; 436 return 0;
@@ -467,45 +473,31 @@ static void pkt_debugfs_dev_new(struct pktcdvd_device *pd)
467{ 473{
468 if (!pkt_debugfs_root) 474 if (!pkt_debugfs_root)
469 return; 475 return;
470 pd->dfs_f_info = NULL;
471 pd->dfs_d_root = debugfs_create_dir(pd->name, pkt_debugfs_root); 476 pd->dfs_d_root = debugfs_create_dir(pd->name, pkt_debugfs_root);
472 if (IS_ERR(pd->dfs_d_root)) { 477 if (!pd->dfs_d_root)
473 pd->dfs_d_root = NULL;
474 return; 478 return;
475 } 479
476 pd->dfs_f_info = debugfs_create_file("info", S_IRUGO, 480 pd->dfs_f_info = debugfs_create_file("info", S_IRUGO,
477 pd->dfs_d_root, pd, &debug_fops); 481 pd->dfs_d_root, pd, &debug_fops);
478 if (IS_ERR(pd->dfs_f_info)) {
479 pd->dfs_f_info = NULL;
480 return;
481 }
482} 482}
483 483
484static void pkt_debugfs_dev_remove(struct pktcdvd_device *pd) 484static void pkt_debugfs_dev_remove(struct pktcdvd_device *pd)
485{ 485{
486 if (!pkt_debugfs_root) 486 if (!pkt_debugfs_root)
487 return; 487 return;
488 if (pd->dfs_f_info) 488 debugfs_remove(pd->dfs_f_info);
489 debugfs_remove(pd->dfs_f_info); 489 debugfs_remove(pd->dfs_d_root);
490 pd->dfs_f_info = NULL; 490 pd->dfs_f_info = NULL;
491 if (pd->dfs_d_root)
492 debugfs_remove(pd->dfs_d_root);
493 pd->dfs_d_root = NULL; 491 pd->dfs_d_root = NULL;
494} 492}
495 493
496static void pkt_debugfs_init(void) 494static void pkt_debugfs_init(void)
497{ 495{
498 pkt_debugfs_root = debugfs_create_dir(DRIVER_NAME, NULL); 496 pkt_debugfs_root = debugfs_create_dir(DRIVER_NAME, NULL);
499 if (IS_ERR(pkt_debugfs_root)) {
500 pkt_debugfs_root = NULL;
501 return;
502 }
503} 497}
504 498
505static void pkt_debugfs_cleanup(void) 499static void pkt_debugfs_cleanup(void)
506{ 500{
507 if (!pkt_debugfs_root)
508 return;
509 debugfs_remove(pkt_debugfs_root); 501 debugfs_remove(pkt_debugfs_root);
510 pkt_debugfs_root = NULL; 502 pkt_debugfs_root = NULL;
511} 503}
@@ -517,7 +509,7 @@ static void pkt_bio_finished(struct pktcdvd_device *pd)
517{ 509{
518 BUG_ON(atomic_read(&pd->cdrw.pending_bios) <= 0); 510 BUG_ON(atomic_read(&pd->cdrw.pending_bios) <= 0);
519 if (atomic_dec_and_test(&pd->cdrw.pending_bios)) { 511 if (atomic_dec_and_test(&pd->cdrw.pending_bios)) {
520 VPRINTK(DRIVER_NAME": queue empty\n"); 512 pkt_dbg(2, pd, "queue empty\n");
521 atomic_set(&pd->iosched.attention, 1); 513 atomic_set(&pd->iosched.attention, 1);
522 wake_up(&pd->wqueue); 514 wake_up(&pd->wqueue);
523 } 515 }
@@ -734,36 +726,33 @@ out:
734 return ret; 726 return ret;
735} 727}
736 728
729static const char *sense_key_string(__u8 index)
730{
731 static const char * const info[] = {
732 "No sense", "Recovered error", "Not ready",
733 "Medium error", "Hardware error", "Illegal request",
734 "Unit attention", "Data protect", "Blank check",
735 };
736
737 return index < ARRAY_SIZE(info) ? info[index] : "INVALID";
738}
739
737/* 740/*
738 * A generic sense dump / resolve mechanism should be implemented across 741 * A generic sense dump / resolve mechanism should be implemented across
739 * all ATAPI + SCSI devices. 742 * all ATAPI + SCSI devices.
740 */ 743 */
741static void pkt_dump_sense(struct packet_command *cgc) 744static void pkt_dump_sense(struct pktcdvd_device *pd,
745 struct packet_command *cgc)
742{ 746{
743 static char *info[9] = { "No sense", "Recovered error", "Not ready",
744 "Medium error", "Hardware error", "Illegal request",
745 "Unit attention", "Data protect", "Blank check" };
746 int i;
747 struct request_sense *sense = cgc->sense; 747 struct request_sense *sense = cgc->sense;
748 748
749 printk(DRIVER_NAME":"); 749 if (sense)
750 for (i = 0; i < CDROM_PACKET_SIZE; i++) 750 pkt_err(pd, "%*ph - sense %02x.%02x.%02x (%s)\n",
751 printk(" %02x", cgc->cmd[i]); 751 CDROM_PACKET_SIZE, cgc->cmd,
752 printk(" - "); 752 sense->sense_key, sense->asc, sense->ascq,
753 753 sense_key_string(sense->sense_key));
754 if (sense == NULL) { 754 else
755 printk("no sense\n"); 755 pkt_err(pd, "%*ph - no sense\n", CDROM_PACKET_SIZE, cgc->cmd);
756 return;
757 }
758
759 printk("sense %02x.%02x.%02x", sense->sense_key, sense->asc, sense->ascq);
760
761 if (sense->sense_key > 8) {
762 printk(" (INVALID)\n");
763 return;
764 }
765
766 printk(" (%s)\n", info[sense->sense_key]);
767} 756}
768 757
769/* 758/*
@@ -806,7 +795,7 @@ static noinline_for_stack int pkt_set_speed(struct pktcdvd_device *pd,
806 cgc.cmd[5] = write_speed & 0xff; 795 cgc.cmd[5] = write_speed & 0xff;
807 796
808 if ((ret = pkt_generic_packet(pd, &cgc))) 797 if ((ret = pkt_generic_packet(pd, &cgc)))
809 pkt_dump_sense(&cgc); 798 pkt_dump_sense(pd, &cgc);
810 799
811 return ret; 800 return ret;
812} 801}
@@ -872,7 +861,7 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
872 need_write_seek = 0; 861 need_write_seek = 0;
873 if (need_write_seek && reads_queued) { 862 if (need_write_seek && reads_queued) {
874 if (atomic_read(&pd->cdrw.pending_bios) > 0) { 863 if (atomic_read(&pd->cdrw.pending_bios) > 0) {
875 VPRINTK(DRIVER_NAME": write, waiting\n"); 864 pkt_dbg(2, pd, "write, waiting\n");
876 break; 865 break;
877 } 866 }
878 pkt_flush_cache(pd); 867 pkt_flush_cache(pd);
@@ -881,7 +870,7 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
881 } else { 870 } else {
882 if (!reads_queued && writes_queued) { 871 if (!reads_queued && writes_queued) {
883 if (atomic_read(&pd->cdrw.pending_bios) > 0) { 872 if (atomic_read(&pd->cdrw.pending_bios) > 0) {
884 VPRINTK(DRIVER_NAME": read, waiting\n"); 873 pkt_dbg(2, pd, "read, waiting\n");
885 break; 874 break;
886 } 875 }
887 pd->iosched.writing = 1; 876 pd->iosched.writing = 1;
@@ -943,7 +932,7 @@ static int pkt_set_segment_merging(struct pktcdvd_device *pd, struct request_que
943 set_bit(PACKET_MERGE_SEGS, &pd->flags); 932 set_bit(PACKET_MERGE_SEGS, &pd->flags);
944 return 0; 933 return 0;
945 } else { 934 } else {
946 printk(DRIVER_NAME": cdrom max_phys_segments too small\n"); 935 pkt_err(pd, "cdrom max_phys_segments too small\n");
947 return -EIO; 936 return -EIO;
948 } 937 }
949} 938}
@@ -987,8 +976,9 @@ static void pkt_end_io_read(struct bio *bio, int err)
987 struct pktcdvd_device *pd = pkt->pd; 976 struct pktcdvd_device *pd = pkt->pd;
988 BUG_ON(!pd); 977 BUG_ON(!pd);
989 978
990 VPRINTK("pkt_end_io_read: bio=%p sec0=%llx sec=%llx err=%d\n", bio, 979 pkt_dbg(2, pd, "bio=%p sec0=%llx sec=%llx err=%d\n",
991 (unsigned long long)pkt->sector, (unsigned long long)bio->bi_sector, err); 980 bio, (unsigned long long)pkt->sector,
981 (unsigned long long)bio->bi_sector, err);
992 982
993 if (err) 983 if (err)
994 atomic_inc(&pkt->io_errors); 984 atomic_inc(&pkt->io_errors);
@@ -1005,7 +995,7 @@ static void pkt_end_io_packet_write(struct bio *bio, int err)
1005 struct pktcdvd_device *pd = pkt->pd; 995 struct pktcdvd_device *pd = pkt->pd;
1006 BUG_ON(!pd); 996 BUG_ON(!pd);
1007 997
1008 VPRINTK("pkt_end_io_packet_write: id=%d, err=%d\n", pkt->id, err); 998 pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, err);
1009 999
1010 pd->stats.pkt_ended++; 1000 pd->stats.pkt_ended++;
1011 1001
@@ -1047,7 +1037,7 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
1047 spin_unlock(&pkt->lock); 1037 spin_unlock(&pkt->lock);
1048 1038
1049 if (pkt->cache_valid) { 1039 if (pkt->cache_valid) {
1050 VPRINTK("pkt_gather_data: zone %llx cached\n", 1040 pkt_dbg(2, pd, "zone %llx cached\n",
1051 (unsigned long long)pkt->sector); 1041 (unsigned long long)pkt->sector);
1052 goto out_account; 1042 goto out_account;
1053 } 1043 }
@@ -1070,7 +1060,7 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
1070 1060
1071 p = (f * CD_FRAMESIZE) / PAGE_SIZE; 1061 p = (f * CD_FRAMESIZE) / PAGE_SIZE;
1072 offset = (f * CD_FRAMESIZE) % PAGE_SIZE; 1062 offset = (f * CD_FRAMESIZE) % PAGE_SIZE;
1073 VPRINTK("pkt_gather_data: Adding frame %d, page:%p offs:%d\n", 1063 pkt_dbg(2, pd, "Adding frame %d, page:%p offs:%d\n",
1074 f, pkt->pages[p], offset); 1064 f, pkt->pages[p], offset);
1075 if (!bio_add_page(bio, pkt->pages[p], CD_FRAMESIZE, offset)) 1065 if (!bio_add_page(bio, pkt->pages[p], CD_FRAMESIZE, offset))
1076 BUG(); 1066 BUG();
@@ -1082,7 +1072,7 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
1082 } 1072 }
1083 1073
1084out_account: 1074out_account:
1085 VPRINTK("pkt_gather_data: need %d frames for zone %llx\n", 1075 pkt_dbg(2, pd, "need %d frames for zone %llx\n",
1086 frames_read, (unsigned long long)pkt->sector); 1076 frames_read, (unsigned long long)pkt->sector);
1087 pd->stats.pkt_started++; 1077 pd->stats.pkt_started++;
1088 pd->stats.secs_rg += frames_read * (CD_FRAMESIZE >> 9); 1078 pd->stats.secs_rg += frames_read * (CD_FRAMESIZE >> 9);
@@ -1183,7 +1173,8 @@ static inline void pkt_set_state(struct packet_data *pkt, enum packet_data_state
1183 "IDLE", "WAITING", "READ_WAIT", "WRITE_WAIT", "RECOVERY", "FINISHED" 1173 "IDLE", "WAITING", "READ_WAIT", "WRITE_WAIT", "RECOVERY", "FINISHED"
1184 }; 1174 };
1185 enum packet_data_state old_state = pkt->state; 1175 enum packet_data_state old_state = pkt->state;
1186 VPRINTK("pkt %2d : s=%6llx %s -> %s\n", pkt->id, (unsigned long long)pkt->sector, 1176 pkt_dbg(2, pd, "pkt %2d : s=%6llx %s -> %s\n",
1177 pkt->id, (unsigned long long)pkt->sector,
1187 state_name[old_state], state_name[state]); 1178 state_name[old_state], state_name[state]);
1188#endif 1179#endif
1189 pkt->state = state; 1180 pkt->state = state;
@@ -1202,12 +1193,10 @@ static int pkt_handle_queue(struct pktcdvd_device *pd)
1202 struct rb_node *n; 1193 struct rb_node *n;
1203 int wakeup; 1194 int wakeup;
1204 1195
1205 VPRINTK("handle_queue\n");
1206
1207 atomic_set(&pd->scan_queue, 0); 1196 atomic_set(&pd->scan_queue, 0);
1208 1197
1209 if (list_empty(&pd->cdrw.pkt_free_list)) { 1198 if (list_empty(&pd->cdrw.pkt_free_list)) {
1210 VPRINTK("handle_queue: no pkt\n"); 1199 pkt_dbg(2, pd, "no pkt\n");
1211 return 0; 1200 return 0;
1212 } 1201 }
1213 1202
@@ -1224,7 +1213,7 @@ static int pkt_handle_queue(struct pktcdvd_device *pd)
1224 node = first_node; 1213 node = first_node;
1225 while (node) { 1214 while (node) {
1226 bio = node->bio; 1215 bio = node->bio;
1227 zone = ZONE(bio->bi_sector, pd); 1216 zone = get_zone(bio->bi_sector, pd);
1228 list_for_each_entry(p, &pd->cdrw.pkt_active_list, list) { 1217 list_for_each_entry(p, &pd->cdrw.pkt_active_list, list) {
1229 if (p->sector == zone) { 1218 if (p->sector == zone) {
1230 bio = NULL; 1219 bio = NULL;
@@ -1244,7 +1233,7 @@ try_next_bio:
1244 } 1233 }
1245 spin_unlock(&pd->lock); 1234 spin_unlock(&pd->lock);
1246 if (!bio) { 1235 if (!bio) {
1247 VPRINTK("handle_queue: no bio\n"); 1236 pkt_dbg(2, pd, "no bio\n");
1248 return 0; 1237 return 0;
1249 } 1238 }
1250 1239
@@ -1260,12 +1249,12 @@ try_next_bio:
1260 * to this packet. 1249 * to this packet.
1261 */ 1250 */
1262 spin_lock(&pd->lock); 1251 spin_lock(&pd->lock);
1263 VPRINTK("pkt_handle_queue: looking for zone %llx\n", (unsigned long long)zone); 1252 pkt_dbg(2, pd, "looking for zone %llx\n", (unsigned long long)zone);
1264 while ((node = pkt_rbtree_find(pd, zone)) != NULL) { 1253 while ((node = pkt_rbtree_find(pd, zone)) != NULL) {
1265 bio = node->bio; 1254 bio = node->bio;
1266 VPRINTK("pkt_handle_queue: found zone=%llx\n", 1255 pkt_dbg(2, pd, "found zone=%llx\n",
1267 (unsigned long long)ZONE(bio->bi_sector, pd)); 1256 (unsigned long long)get_zone(bio->bi_sector, pd));
1268 if (ZONE(bio->bi_sector, pd) != zone) 1257 if (get_zone(bio->bi_sector, pd) != zone)
1269 break; 1258 break;
1270 pkt_rbtree_erase(pd, node); 1259 pkt_rbtree_erase(pd, node);
1271 spin_lock(&pkt->lock); 1260 spin_lock(&pkt->lock);
@@ -1316,7 +1305,7 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
1316 if (!bio_add_page(pkt->w_bio, bvec[f].bv_page, CD_FRAMESIZE, bvec[f].bv_offset)) 1305 if (!bio_add_page(pkt->w_bio, bvec[f].bv_page, CD_FRAMESIZE, bvec[f].bv_offset))
1317 BUG(); 1306 BUG();
1318 } 1307 }
1319 VPRINTK(DRIVER_NAME": vcnt=%d\n", pkt->w_bio->bi_vcnt); 1308 pkt_dbg(2, pd, "vcnt=%d\n", pkt->w_bio->bi_vcnt);
1320 1309
1321 /* 1310 /*
1322 * Fill-in bvec with data from orig_bios. 1311 * Fill-in bvec with data from orig_bios.
@@ -1327,7 +1316,7 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
1327 pkt_set_state(pkt, PACKET_WRITE_WAIT_STATE); 1316 pkt_set_state(pkt, PACKET_WRITE_WAIT_STATE);
1328 spin_unlock(&pkt->lock); 1317 spin_unlock(&pkt->lock);
1329 1318
1330 VPRINTK("pkt_start_write: Writing %d frames for zone %llx\n", 1319 pkt_dbg(2, pd, "Writing %d frames for zone %llx\n",
1331 pkt->write_size, (unsigned long long)pkt->sector); 1320 pkt->write_size, (unsigned long long)pkt->sector);
1332 1321
1333 if (test_bit(PACKET_MERGE_SEGS, &pd->flags) || (pkt->write_size < pkt->frames)) { 1322 if (test_bit(PACKET_MERGE_SEGS, &pd->flags) || (pkt->write_size < pkt->frames)) {
@@ -1359,7 +1348,7 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data
1359{ 1348{
1360 int uptodate; 1349 int uptodate;
1361 1350
1362 VPRINTK("run_state_machine: pkt %d\n", pkt->id); 1351 pkt_dbg(2, pd, "pkt %d\n", pkt->id);
1363 1352
1364 for (;;) { 1353 for (;;) {
1365 switch (pkt->state) { 1354 switch (pkt->state) {
@@ -1398,7 +1387,7 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data
1398 if (pkt_start_recovery(pkt)) { 1387 if (pkt_start_recovery(pkt)) {
1399 pkt_start_write(pd, pkt); 1388 pkt_start_write(pd, pkt);
1400 } else { 1389 } else {
1401 VPRINTK("No recovery possible\n"); 1390 pkt_dbg(2, pd, "No recovery possible\n");
1402 pkt_set_state(pkt, PACKET_FINISHED_STATE); 1391 pkt_set_state(pkt, PACKET_FINISHED_STATE);
1403 } 1392 }
1404 break; 1393 break;
@@ -1419,8 +1408,6 @@ static void pkt_handle_packets(struct pktcdvd_device *pd)
1419{ 1408{
1420 struct packet_data *pkt, *next; 1409 struct packet_data *pkt, *next;
1421 1410
1422 VPRINTK("pkt_handle_packets\n");
1423
1424 /* 1411 /*
1425 * Run state machine for active packets 1412 * Run state machine for active packets
1426 */ 1413 */
@@ -1502,9 +1489,9 @@ static int kcdrwd(void *foobar)
1502 if (PACKET_DEBUG > 1) { 1489 if (PACKET_DEBUG > 1) {
1503 int states[PACKET_NUM_STATES]; 1490 int states[PACKET_NUM_STATES];
1504 pkt_count_states(pd, states); 1491 pkt_count_states(pd, states);
1505 VPRINTK("kcdrwd: i:%d ow:%d rw:%d ww:%d rec:%d fin:%d\n", 1492 pkt_dbg(2, pd, "i:%d ow:%d rw:%d ww:%d rec:%d fin:%d\n",
1506 states[0], states[1], states[2], states[3], 1493 states[0], states[1], states[2],
1507 states[4], states[5]); 1494 states[3], states[4], states[5]);
1508 } 1495 }
1509 1496
1510 min_sleep_time = MAX_SCHEDULE_TIMEOUT; 1497 min_sleep_time = MAX_SCHEDULE_TIMEOUT;
@@ -1513,9 +1500,9 @@ static int kcdrwd(void *foobar)
1513 min_sleep_time = pkt->sleep_time; 1500 min_sleep_time = pkt->sleep_time;
1514 } 1501 }
1515 1502
1516 VPRINTK("kcdrwd: sleeping\n"); 1503 pkt_dbg(2, pd, "sleeping\n");
1517 residue = schedule_timeout(min_sleep_time); 1504 residue = schedule_timeout(min_sleep_time);
1518 VPRINTK("kcdrwd: wake up\n"); 1505 pkt_dbg(2, pd, "wake up\n");
1519 1506
1520 /* make swsusp happy with our thread */ 1507 /* make swsusp happy with our thread */
1521 try_to_freeze(); 1508 try_to_freeze();
@@ -1563,9 +1550,10 @@ work_to_do:
1563 1550
1564static void pkt_print_settings(struct pktcdvd_device *pd) 1551static void pkt_print_settings(struct pktcdvd_device *pd)
1565{ 1552{
1566 printk(DRIVER_NAME": %s packets, ", pd->settings.fp ? "Fixed" : "Variable"); 1553 pkt_info(pd, "%s packets, %u blocks, Mode-%c disc\n",
1567 printk("%u blocks, ", pd->settings.size >> 2); 1554 pd->settings.fp ? "Fixed" : "Variable",
1568 printk("Mode-%c disc\n", pd->settings.block_mode == 8 ? '1' : '2'); 1555 pd->settings.size >> 2,
1556 pd->settings.block_mode == 8 ? '1' : '2');
1569} 1557}
1570 1558
1571static int pkt_mode_sense(struct pktcdvd_device *pd, struct packet_command *cgc, int page_code, int page_control) 1559static int pkt_mode_sense(struct pktcdvd_device *pd, struct packet_command *cgc, int page_code, int page_control)
@@ -1699,7 +1687,7 @@ static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd)
1699 init_cdrom_command(&cgc, buffer, sizeof(*wp), CGC_DATA_READ); 1687 init_cdrom_command(&cgc, buffer, sizeof(*wp), CGC_DATA_READ);
1700 cgc.sense = &sense; 1688 cgc.sense = &sense;
1701 if ((ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0))) { 1689 if ((ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0))) {
1702 pkt_dump_sense(&cgc); 1690 pkt_dump_sense(pd, &cgc);
1703 return ret; 1691 return ret;
1704 } 1692 }
1705 1693
@@ -1714,7 +1702,7 @@ static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd)
1714 init_cdrom_command(&cgc, buffer, size, CGC_DATA_READ); 1702 init_cdrom_command(&cgc, buffer, size, CGC_DATA_READ);
1715 cgc.sense = &sense; 1703 cgc.sense = &sense;
1716 if ((ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0))) { 1704 if ((ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0))) {
1717 pkt_dump_sense(&cgc); 1705 pkt_dump_sense(pd, &cgc);
1718 return ret; 1706 return ret;
1719 } 1707 }
1720 1708
@@ -1749,14 +1737,14 @@ static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd)
1749 /* 1737 /*
1750 * paranoia 1738 * paranoia
1751 */ 1739 */
1752 printk(DRIVER_NAME": write mode wrong %d\n", wp->data_block_type); 1740 pkt_err(pd, "write mode wrong %d\n", wp->data_block_type);
1753 return 1; 1741 return 1;
1754 } 1742 }
1755 wp->packet_size = cpu_to_be32(pd->settings.size >> 2); 1743 wp->packet_size = cpu_to_be32(pd->settings.size >> 2);
1756 1744
1757 cgc.buflen = cgc.cmd[8] = size; 1745 cgc.buflen = cgc.cmd[8] = size;
1758 if ((ret = pkt_mode_select(pd, &cgc))) { 1746 if ((ret = pkt_mode_select(pd, &cgc))) {
1759 pkt_dump_sense(&cgc); 1747 pkt_dump_sense(pd, &cgc);
1760 return ret; 1748 return ret;
1761 } 1749 }
1762 1750
@@ -1793,7 +1781,7 @@ static int pkt_writable_track(struct pktcdvd_device *pd, track_information *ti)
1793 if (ti->rt == 1 && ti->blank == 0) 1781 if (ti->rt == 1 && ti->blank == 0)
1794 return 1; 1782 return 1;
1795 1783
1796 printk(DRIVER_NAME": bad state %d-%d-%d\n", ti->rt, ti->blank, ti->packet); 1784 pkt_err(pd, "bad state %d-%d-%d\n", ti->rt, ti->blank, ti->packet);
1797 return 0; 1785 return 0;
1798} 1786}
1799 1787
@@ -1811,7 +1799,8 @@ static int pkt_writable_disc(struct pktcdvd_device *pd, disc_information *di)
1811 case 0x12: /* DVD-RAM */ 1799 case 0x12: /* DVD-RAM */
1812 return 1; 1800 return 1;
1813 default: 1801 default:
1814 VPRINTK(DRIVER_NAME": Wrong disc profile (%x)\n", pd->mmc3_profile); 1802 pkt_dbg(2, pd, "Wrong disc profile (%x)\n",
1803 pd->mmc3_profile);
1815 return 0; 1804 return 0;
1816 } 1805 }
1817 1806
@@ -1820,22 +1809,22 @@ static int pkt_writable_disc(struct pktcdvd_device *pd, disc_information *di)
1820 * but i'm not sure, should we leave this to user apps? probably. 1809 * but i'm not sure, should we leave this to user apps? probably.
1821 */ 1810 */
1822 if (di->disc_type == 0xff) { 1811 if (di->disc_type == 0xff) {
1823 printk(DRIVER_NAME": Unknown disc. No track?\n"); 1812 pkt_notice(pd, "unknown disc - no track?\n");
1824 return 0; 1813 return 0;
1825 } 1814 }
1826 1815
1827 if (di->disc_type != 0x20 && di->disc_type != 0) { 1816 if (di->disc_type != 0x20 && di->disc_type != 0) {
1828 printk(DRIVER_NAME": Wrong disc type (%x)\n", di->disc_type); 1817 pkt_err(pd, "wrong disc type (%x)\n", di->disc_type);
1829 return 0; 1818 return 0;
1830 } 1819 }
1831 1820
1832 if (di->erasable == 0) { 1821 if (di->erasable == 0) {
1833 printk(DRIVER_NAME": Disc not erasable\n"); 1822 pkt_notice(pd, "disc not erasable\n");
1834 return 0; 1823 return 0;
1835 } 1824 }
1836 1825
1837 if (di->border_status == PACKET_SESSION_RESERVED) { 1826 if (di->border_status == PACKET_SESSION_RESERVED) {
1838 printk(DRIVER_NAME": Can't write to last track (reserved)\n"); 1827 pkt_err(pd, "can't write to last track (reserved)\n");
1839 return 0; 1828 return 0;
1840 } 1829 }
1841 1830
@@ -1860,7 +1849,7 @@ static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd)
1860 memset(&ti, 0, sizeof(track_information)); 1849 memset(&ti, 0, sizeof(track_information));
1861 1850
1862 if ((ret = pkt_get_disc_info(pd, &di))) { 1851 if ((ret = pkt_get_disc_info(pd, &di))) {
1863 printk("failed get_disc\n"); 1852 pkt_err(pd, "failed get_disc\n");
1864 return ret; 1853 return ret;
1865 } 1854 }
1866 1855
@@ -1871,12 +1860,12 @@ static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd)
1871 1860
1872 track = 1; /* (di.last_track_msb << 8) | di.last_track_lsb; */ 1861 track = 1; /* (di.last_track_msb << 8) | di.last_track_lsb; */
1873 if ((ret = pkt_get_track_info(pd, track, 1, &ti))) { 1862 if ((ret = pkt_get_track_info(pd, track, 1, &ti))) {
1874 printk(DRIVER_NAME": failed get_track\n"); 1863 pkt_err(pd, "failed get_track\n");
1875 return ret; 1864 return ret;
1876 } 1865 }
1877 1866
1878 if (!pkt_writable_track(pd, &ti)) { 1867 if (!pkt_writable_track(pd, &ti)) {
1879 printk(DRIVER_NAME": can't write to this track\n"); 1868 pkt_err(pd, "can't write to this track\n");
1880 return -EROFS; 1869 return -EROFS;
1881 } 1870 }
1882 1871
@@ -1886,11 +1875,11 @@ static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd)
1886 */ 1875 */
1887 pd->settings.size = be32_to_cpu(ti.fixed_packet_size) << 2; 1876 pd->settings.size = be32_to_cpu(ti.fixed_packet_size) << 2;
1888 if (pd->settings.size == 0) { 1877 if (pd->settings.size == 0) {
1889 printk(DRIVER_NAME": detected zero packet size!\n"); 1878 pkt_notice(pd, "detected zero packet size!\n");
1890 return -ENXIO; 1879 return -ENXIO;
1891 } 1880 }
1892 if (pd->settings.size > PACKET_MAX_SECTORS) { 1881 if (pd->settings.size > PACKET_MAX_SECTORS) {
1893 printk(DRIVER_NAME": packet size is too big\n"); 1882 pkt_err(pd, "packet size is too big\n");
1894 return -EROFS; 1883 return -EROFS;
1895 } 1884 }
1896 pd->settings.fp = ti.fp; 1885 pd->settings.fp = ti.fp;
@@ -1932,7 +1921,7 @@ static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd)
1932 pd->settings.block_mode = PACKET_BLOCK_MODE2; 1921 pd->settings.block_mode = PACKET_BLOCK_MODE2;
1933 break; 1922 break;
1934 default: 1923 default:
1935 printk(DRIVER_NAME": unknown data mode\n"); 1924 pkt_err(pd, "unknown data mode\n");
1936 return -EROFS; 1925 return -EROFS;
1937 } 1926 }
1938 return 0; 1927 return 0;
@@ -1966,10 +1955,10 @@ static noinline_for_stack int pkt_write_caching(struct pktcdvd_device *pd,
1966 cgc.buflen = cgc.cmd[8] = 2 + ((buf[0] << 8) | (buf[1] & 0xff)); 1955 cgc.buflen = cgc.cmd[8] = 2 + ((buf[0] << 8) | (buf[1] & 0xff));
1967 ret = pkt_mode_select(pd, &cgc); 1956 ret = pkt_mode_select(pd, &cgc);
1968 if (ret) { 1957 if (ret) {
1969 printk(DRIVER_NAME": write caching control failed\n"); 1958 pkt_err(pd, "write caching control failed\n");
1970 pkt_dump_sense(&cgc); 1959 pkt_dump_sense(pd, &cgc);
1971 } else if (!ret && set) 1960 } else if (!ret && set)
1972 printk(DRIVER_NAME": enabled write caching on %s\n", pd->name); 1961 pkt_notice(pd, "enabled write caching\n");
1973 return ret; 1962 return ret;
1974} 1963}
1975 1964
@@ -2005,7 +1994,7 @@ static noinline_for_stack int pkt_get_max_speed(struct pktcdvd_device *pd,
2005 sizeof(struct mode_page_header); 1994 sizeof(struct mode_page_header);
2006 ret = pkt_mode_sense(pd, &cgc, GPMODE_CAPABILITIES_PAGE, 0); 1995 ret = pkt_mode_sense(pd, &cgc, GPMODE_CAPABILITIES_PAGE, 0);
2007 if (ret) { 1996 if (ret) {
2008 pkt_dump_sense(&cgc); 1997 pkt_dump_sense(pd, &cgc);
2009 return ret; 1998 return ret;
2010 } 1999 }
2011 } 2000 }
@@ -2064,7 +2053,7 @@ static noinline_for_stack int pkt_media_speed(struct pktcdvd_device *pd,
2064 cgc.cmd[8] = 2; 2053 cgc.cmd[8] = 2;
2065 ret = pkt_generic_packet(pd, &cgc); 2054 ret = pkt_generic_packet(pd, &cgc);
2066 if (ret) { 2055 if (ret) {
2067 pkt_dump_sense(&cgc); 2056 pkt_dump_sense(pd, &cgc);
2068 return ret; 2057 return ret;
2069 } 2058 }
2070 size = ((unsigned int) buf[0]<<8) + buf[1] + 2; 2059 size = ((unsigned int) buf[0]<<8) + buf[1] + 2;
@@ -2079,16 +2068,16 @@ static noinline_for_stack int pkt_media_speed(struct pktcdvd_device *pd,
2079 cgc.cmd[8] = size; 2068 cgc.cmd[8] = size;
2080 ret = pkt_generic_packet(pd, &cgc); 2069 ret = pkt_generic_packet(pd, &cgc);
2081 if (ret) { 2070 if (ret) {
2082 pkt_dump_sense(&cgc); 2071 pkt_dump_sense(pd, &cgc);
2083 return ret; 2072 return ret;
2084 } 2073 }
2085 2074
2086 if (!(buf[6] & 0x40)) { 2075 if (!(buf[6] & 0x40)) {
2087 printk(DRIVER_NAME": Disc type is not CD-RW\n"); 2076 pkt_notice(pd, "disc type is not CD-RW\n");
2088 return 1; 2077 return 1;
2089 } 2078 }
2090 if (!(buf[6] & 0x4)) { 2079 if (!(buf[6] & 0x4)) {
2091 printk(DRIVER_NAME": A1 values on media are not valid, maybe not CDRW?\n"); 2080 pkt_notice(pd, "A1 values on media are not valid, maybe not CDRW?\n");
2092 return 1; 2081 return 1;
2093 } 2082 }
2094 2083
@@ -2108,14 +2097,14 @@ static noinline_for_stack int pkt_media_speed(struct pktcdvd_device *pd,
2108 *speed = us_clv_to_speed[sp]; 2097 *speed = us_clv_to_speed[sp];
2109 break; 2098 break;
2110 default: 2099 default:
2111 printk(DRIVER_NAME": Unknown disc sub-type %d\n",st); 2100 pkt_notice(pd, "unknown disc sub-type %d\n", st);
2112 return 1; 2101 return 1;
2113 } 2102 }
2114 if (*speed) { 2103 if (*speed) {
2115 printk(DRIVER_NAME": Max. media speed: %d\n",*speed); 2104 pkt_info(pd, "maximum media speed: %d\n", *speed);
2116 return 0; 2105 return 0;
2117 } else { 2106 } else {
2118 printk(DRIVER_NAME": Unknown speed %d for sub-type %d\n",sp,st); 2107 pkt_notice(pd, "unknown speed %d for sub-type %d\n", sp, st);
2119 return 1; 2108 return 1;
2120 } 2109 }
2121} 2110}
@@ -2126,7 +2115,7 @@ static noinline_for_stack int pkt_perform_opc(struct pktcdvd_device *pd)
2126 struct request_sense sense; 2115 struct request_sense sense;
2127 int ret; 2116 int ret;
2128 2117
2129 VPRINTK(DRIVER_NAME": Performing OPC\n"); 2118 pkt_dbg(2, pd, "Performing OPC\n");
2130 2119
2131 init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE); 2120 init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
2132 cgc.sense = &sense; 2121 cgc.sense = &sense;
@@ -2134,7 +2123,7 @@ static noinline_for_stack int pkt_perform_opc(struct pktcdvd_device *pd)
2134 cgc.cmd[0] = GPCMD_SEND_OPC; 2123 cgc.cmd[0] = GPCMD_SEND_OPC;
2135 cgc.cmd[1] = 1; 2124 cgc.cmd[1] = 1;
2136 if ((ret = pkt_generic_packet(pd, &cgc))) 2125 if ((ret = pkt_generic_packet(pd, &cgc)))
2137 pkt_dump_sense(&cgc); 2126 pkt_dump_sense(pd, &cgc);
2138 return ret; 2127 return ret;
2139} 2128}
2140 2129
@@ -2144,12 +2133,12 @@ static int pkt_open_write(struct pktcdvd_device *pd)
2144 unsigned int write_speed, media_write_speed, read_speed; 2133 unsigned int write_speed, media_write_speed, read_speed;
2145 2134
2146 if ((ret = pkt_probe_settings(pd))) { 2135 if ((ret = pkt_probe_settings(pd))) {
2147 VPRINTK(DRIVER_NAME": %s failed probe\n", pd->name); 2136 pkt_dbg(2, pd, "failed probe\n");
2148 return ret; 2137 return ret;
2149 } 2138 }
2150 2139
2151 if ((ret = pkt_set_write_settings(pd))) { 2140 if ((ret = pkt_set_write_settings(pd))) {
2152 DPRINTK(DRIVER_NAME": %s failed saving write settings\n", pd->name); 2141 pkt_dbg(1, pd, "failed saving write settings\n");
2153 return -EIO; 2142 return -EIO;
2154 } 2143 }
2155 2144
@@ -2161,26 +2150,26 @@ static int pkt_open_write(struct pktcdvd_device *pd)
2161 case 0x13: /* DVD-RW */ 2150 case 0x13: /* DVD-RW */
2162 case 0x1a: /* DVD+RW */ 2151 case 0x1a: /* DVD+RW */
2163 case 0x12: /* DVD-RAM */ 2152 case 0x12: /* DVD-RAM */
2164 DPRINTK(DRIVER_NAME": write speed %ukB/s\n", write_speed); 2153 pkt_dbg(1, pd, "write speed %ukB/s\n", write_speed);
2165 break; 2154 break;
2166 default: 2155 default:
2167 if ((ret = pkt_media_speed(pd, &media_write_speed))) 2156 if ((ret = pkt_media_speed(pd, &media_write_speed)))
2168 media_write_speed = 16; 2157 media_write_speed = 16;
2169 write_speed = min(write_speed, media_write_speed * 177); 2158 write_speed = min(write_speed, media_write_speed * 177);
2170 DPRINTK(DRIVER_NAME": write speed %ux\n", write_speed / 176); 2159 pkt_dbg(1, pd, "write speed %ux\n", write_speed / 176);
2171 break; 2160 break;
2172 } 2161 }
2173 read_speed = write_speed; 2162 read_speed = write_speed;
2174 2163
2175 if ((ret = pkt_set_speed(pd, write_speed, read_speed))) { 2164 if ((ret = pkt_set_speed(pd, write_speed, read_speed))) {
2176 DPRINTK(DRIVER_NAME": %s couldn't set write speed\n", pd->name); 2165 pkt_dbg(1, pd, "couldn't set write speed\n");
2177 return -EIO; 2166 return -EIO;
2178 } 2167 }
2179 pd->write_speed = write_speed; 2168 pd->write_speed = write_speed;
2180 pd->read_speed = read_speed; 2169 pd->read_speed = read_speed;
2181 2170
2182 if ((ret = pkt_perform_opc(pd))) { 2171 if ((ret = pkt_perform_opc(pd))) {
2183 DPRINTK(DRIVER_NAME": %s Optimum Power Calibration failed\n", pd->name); 2172 pkt_dbg(1, pd, "Optimum Power Calibration failed\n");
2184 } 2173 }
2185 2174
2186 return 0; 2175 return 0;
@@ -2205,7 +2194,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
2205 goto out; 2194 goto out;
2206 2195
2207 if ((ret = pkt_get_last_written(pd, &lba))) { 2196 if ((ret = pkt_get_last_written(pd, &lba))) {
2208 printk(DRIVER_NAME": pkt_get_last_written failed\n"); 2197 pkt_err(pd, "pkt_get_last_written failed\n");
2209 goto out_putdev; 2198 goto out_putdev;
2210 } 2199 }
2211 2200
@@ -2235,11 +2224,11 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
2235 2224
2236 if (write) { 2225 if (write) {
2237 if (!pkt_grow_pktlist(pd, CONFIG_CDROM_PKTCDVD_BUFFERS)) { 2226 if (!pkt_grow_pktlist(pd, CONFIG_CDROM_PKTCDVD_BUFFERS)) {
2238 printk(DRIVER_NAME": not enough memory for buffers\n"); 2227 pkt_err(pd, "not enough memory for buffers\n");
2239 ret = -ENOMEM; 2228 ret = -ENOMEM;
2240 goto out_putdev; 2229 goto out_putdev;
2241 } 2230 }
2242 printk(DRIVER_NAME": %lukB available on disc\n", lba << 1); 2231 pkt_info(pd, "%lukB available on disc\n", lba << 1);
2243 } 2232 }
2244 2233
2245 return 0; 2234 return 0;
@@ -2257,7 +2246,7 @@ out:
2257static void pkt_release_dev(struct pktcdvd_device *pd, int flush) 2246static void pkt_release_dev(struct pktcdvd_device *pd, int flush)
2258{ 2247{
2259 if (flush && pkt_flush_cache(pd)) 2248 if (flush && pkt_flush_cache(pd))
2260 DPRINTK(DRIVER_NAME": %s not flushing cache\n", pd->name); 2249 pkt_dbg(1, pd, "not flushing cache\n");
2261 2250
2262 pkt_lock_door(pd, 0); 2251 pkt_lock_door(pd, 0);
2263 2252
@@ -2279,8 +2268,6 @@ static int pkt_open(struct block_device *bdev, fmode_t mode)
2279 struct pktcdvd_device *pd = NULL; 2268 struct pktcdvd_device *pd = NULL;
2280 int ret; 2269 int ret;
2281 2270
2282 VPRINTK(DRIVER_NAME": entering open\n");
2283
2284 mutex_lock(&pktcdvd_mutex); 2271 mutex_lock(&pktcdvd_mutex);
2285 mutex_lock(&ctl_mutex); 2272 mutex_lock(&ctl_mutex);
2286 pd = pkt_find_dev_from_minor(MINOR(bdev->bd_dev)); 2273 pd = pkt_find_dev_from_minor(MINOR(bdev->bd_dev));
@@ -2315,7 +2302,6 @@ static int pkt_open(struct block_device *bdev, fmode_t mode)
2315out_dec: 2302out_dec:
2316 pd->refcnt--; 2303 pd->refcnt--;
2317out: 2304out:
2318 VPRINTK(DRIVER_NAME": failed open (%d)\n", ret);
2319 mutex_unlock(&ctl_mutex); 2305 mutex_unlock(&ctl_mutex);
2320 mutex_unlock(&pktcdvd_mutex); 2306 mutex_unlock(&pktcdvd_mutex);
2321 return ret; 2307 return ret;
@@ -2360,7 +2346,8 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
2360 2346
2361 pd = q->queuedata; 2347 pd = q->queuedata;
2362 if (!pd) { 2348 if (!pd) {
2363 printk(DRIVER_NAME": %s incorrect request queue\n", bdevname(bio->bi_bdev, b)); 2349 pr_err("%s incorrect request queue\n",
2350 bdevname(bio->bi_bdev, b));
2364 goto end_io; 2351 goto end_io;
2365 } 2352 }
2366 2353
@@ -2382,20 +2369,20 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
2382 } 2369 }
2383 2370
2384 if (!test_bit(PACKET_WRITABLE, &pd->flags)) { 2371 if (!test_bit(PACKET_WRITABLE, &pd->flags)) {
2385 printk(DRIVER_NAME": WRITE for ro device %s (%llu)\n", 2372 pkt_notice(pd, "WRITE for ro device (%llu)\n",
2386 pd->name, (unsigned long long)bio->bi_sector); 2373 (unsigned long long)bio->bi_sector);
2387 goto end_io; 2374 goto end_io;
2388 } 2375 }
2389 2376
2390 if (!bio->bi_size || (bio->bi_size % CD_FRAMESIZE)) { 2377 if (!bio->bi_size || (bio->bi_size % CD_FRAMESIZE)) {
2391 printk(DRIVER_NAME": wrong bio size\n"); 2378 pkt_err(pd, "wrong bio size\n");
2392 goto end_io; 2379 goto end_io;
2393 } 2380 }
2394 2381
2395 blk_queue_bounce(q, &bio); 2382 blk_queue_bounce(q, &bio);
2396 2383
2397 zone = ZONE(bio->bi_sector, pd); 2384 zone = get_zone(bio->bi_sector, pd);
2398 VPRINTK("pkt_make_request: start = %6llx stop = %6llx\n", 2385 pkt_dbg(2, pd, "start = %6llx stop = %6llx\n",
2399 (unsigned long long)bio->bi_sector, 2386 (unsigned long long)bio->bi_sector,
2400 (unsigned long long)bio_end_sector(bio)); 2387 (unsigned long long)bio_end_sector(bio));
2401 2388
@@ -2405,7 +2392,7 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
2405 sector_t last_zone; 2392 sector_t last_zone;
2406 int first_sectors; 2393 int first_sectors;
2407 2394
2408 last_zone = ZONE(bio_end_sector(bio) - 1, pd); 2395 last_zone = get_zone(bio_end_sector(bio) - 1, pd);
2409 if (last_zone != zone) { 2396 if (last_zone != zone) {
2410 BUG_ON(last_zone != zone + pd->settings.size); 2397 BUG_ON(last_zone != zone + pd->settings.size);
2411 first_sectors = last_zone - bio->bi_sector; 2398 first_sectors = last_zone - bio->bi_sector;
@@ -2500,7 +2487,7 @@ static int pkt_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd,
2500 struct bio_vec *bvec) 2487 struct bio_vec *bvec)
2501{ 2488{
2502 struct pktcdvd_device *pd = q->queuedata; 2489 struct pktcdvd_device *pd = q->queuedata;
2503 sector_t zone = ZONE(bmd->bi_sector, pd); 2490 sector_t zone = get_zone(bmd->bi_sector, pd);
2504 int used = ((bmd->bi_sector - zone) << 9) + bmd->bi_size; 2491 int used = ((bmd->bi_sector - zone) << 9) + bmd->bi_size;
2505 int remaining = (pd->settings.size << 9) - used; 2492 int remaining = (pd->settings.size << 9) - used;
2506 int remaining2; 2493 int remaining2;
@@ -2609,7 +2596,7 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
2609 struct block_device *bdev; 2596 struct block_device *bdev;
2610 2597
2611 if (pd->pkt_dev == dev) { 2598 if (pd->pkt_dev == dev) {
2612 printk(DRIVER_NAME": Recursive setup not allowed\n"); 2599 pkt_err(pd, "recursive setup not allowed\n");
2613 return -EBUSY; 2600 return -EBUSY;
2614 } 2601 }
2615 for (i = 0; i < MAX_WRITERS; i++) { 2602 for (i = 0; i < MAX_WRITERS; i++) {
@@ -2617,11 +2604,12 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
2617 if (!pd2) 2604 if (!pd2)
2618 continue; 2605 continue;
2619 if (pd2->bdev->bd_dev == dev) { 2606 if (pd2->bdev->bd_dev == dev) {
2620 printk(DRIVER_NAME": %s already setup\n", bdevname(pd2->bdev, b)); 2607 pkt_err(pd, "%s already setup\n",
2608 bdevname(pd2->bdev, b));
2621 return -EBUSY; 2609 return -EBUSY;
2622 } 2610 }
2623 if (pd2->pkt_dev == dev) { 2611 if (pd2->pkt_dev == dev) {
2624 printk(DRIVER_NAME": Can't chain pktcdvd devices\n"); 2612 pkt_err(pd, "can't chain pktcdvd devices\n");
2625 return -EBUSY; 2613 return -EBUSY;
2626 } 2614 }
2627 } 2615 }
@@ -2644,13 +2632,13 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
2644 atomic_set(&pd->cdrw.pending_bios, 0); 2632 atomic_set(&pd->cdrw.pending_bios, 0);
2645 pd->cdrw.thread = kthread_run(kcdrwd, pd, "%s", pd->name); 2633 pd->cdrw.thread = kthread_run(kcdrwd, pd, "%s", pd->name);
2646 if (IS_ERR(pd->cdrw.thread)) { 2634 if (IS_ERR(pd->cdrw.thread)) {
2647 printk(DRIVER_NAME": can't start kernel thread\n"); 2635 pkt_err(pd, "can't start kernel thread\n");
2648 ret = -ENOMEM; 2636 ret = -ENOMEM;
2649 goto out_mem; 2637 goto out_mem;
2650 } 2638 }
2651 2639
2652 proc_create_data(pd->name, 0, pkt_proc, &pkt_proc_fops, pd); 2640 proc_create_data(pd->name, 0, pkt_proc, &pkt_proc_fops, pd);
2653 DPRINTK(DRIVER_NAME": writer %s mapped to %s\n", pd->name, bdevname(bdev, b)); 2641 pkt_dbg(1, pd, "writer mapped to %s\n", bdevname(bdev, b));
2654 return 0; 2642 return 0;
2655 2643
2656out_mem: 2644out_mem:
@@ -2665,8 +2653,8 @@ static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
2665 struct pktcdvd_device *pd = bdev->bd_disk->private_data; 2653 struct pktcdvd_device *pd = bdev->bd_disk->private_data;
2666 int ret; 2654 int ret;
2667 2655
2668 VPRINTK("pkt_ioctl: cmd %x, dev %d:%d\n", cmd, 2656 pkt_dbg(2, pd, "cmd %x, dev %d:%d\n",
2669 MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev)); 2657 cmd, MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev));
2670 2658
2671 mutex_lock(&pktcdvd_mutex); 2659 mutex_lock(&pktcdvd_mutex);
2672 switch (cmd) { 2660 switch (cmd) {
@@ -2690,7 +2678,7 @@ static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
2690 break; 2678 break;
2691 2679
2692 default: 2680 default:
2693 VPRINTK(DRIVER_NAME": Unknown ioctl for %s (%x)\n", pd->name, cmd); 2681 pkt_dbg(2, pd, "Unknown ioctl (%x)\n", cmd);
2694 ret = -ENOTTY; 2682 ret = -ENOTTY;
2695 } 2683 }
2696 mutex_unlock(&pktcdvd_mutex); 2684 mutex_unlock(&pktcdvd_mutex);
@@ -2743,7 +2731,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
2743 if (!pkt_devs[idx]) 2731 if (!pkt_devs[idx])
2744 break; 2732 break;
2745 if (idx == MAX_WRITERS) { 2733 if (idx == MAX_WRITERS) {
2746 printk(DRIVER_NAME": max %d writers supported\n", MAX_WRITERS); 2734 pr_err("max %d writers supported\n", MAX_WRITERS);
2747 ret = -EBUSY; 2735 ret = -EBUSY;
2748 goto out_mutex; 2736 goto out_mutex;
2749 } 2737 }
@@ -2818,7 +2806,7 @@ out_mem:
2818 kfree(pd); 2806 kfree(pd);
2819out_mutex: 2807out_mutex:
2820 mutex_unlock(&ctl_mutex); 2808 mutex_unlock(&ctl_mutex);
2821 printk(DRIVER_NAME": setup of pktcdvd device failed\n"); 2809 pr_err("setup of pktcdvd device failed\n");
2822 return ret; 2810 return ret;
2823} 2811}
2824 2812
@@ -2839,7 +2827,7 @@ static int pkt_remove_dev(dev_t pkt_dev)
2839 break; 2827 break;
2840 } 2828 }
2841 if (idx == MAX_WRITERS) { 2829 if (idx == MAX_WRITERS) {
2842 DPRINTK(DRIVER_NAME": dev not setup\n"); 2830 pr_debug("dev not setup\n");
2843 ret = -ENXIO; 2831 ret = -ENXIO;
2844 goto out; 2832 goto out;
2845 } 2833 }
@@ -2859,7 +2847,7 @@ static int pkt_remove_dev(dev_t pkt_dev)
2859 blkdev_put(pd->bdev, FMODE_READ | FMODE_NDELAY); 2847 blkdev_put(pd->bdev, FMODE_READ | FMODE_NDELAY);
2860 2848
2861 remove_proc_entry(pd->name, pkt_proc); 2849 remove_proc_entry(pd->name, pkt_proc);
2862 DPRINTK(DRIVER_NAME": writer %s unmapped\n", pd->name); 2850 pkt_dbg(1, pd, "writer unmapped\n");
2863 2851
2864 del_gendisk(pd->disk); 2852 del_gendisk(pd->disk);
2865 blk_cleanup_queue(pd->disk->queue); 2853 blk_cleanup_queue(pd->disk->queue);
@@ -2969,7 +2957,7 @@ static int __init pkt_init(void)
2969 2957
2970 ret = register_blkdev(pktdev_major, DRIVER_NAME); 2958 ret = register_blkdev(pktdev_major, DRIVER_NAME);
2971 if (ret < 0) { 2959 if (ret < 0) {
2972 printk(DRIVER_NAME": Unable to register block device\n"); 2960 pr_err("unable to register block device\n");
2973 goto out2; 2961 goto out2;
2974 } 2962 }
2975 if (!pktdev_major) 2963 if (!pktdev_major)
@@ -2983,7 +2971,7 @@ static int __init pkt_init(void)
2983 2971
2984 ret = misc_register(&pkt_misc); 2972 ret = misc_register(&pkt_misc);
2985 if (ret) { 2973 if (ret) {
2986 printk(DRIVER_NAME": Unable to register misc device\n"); 2974 pr_err("unable to register misc device\n");
2987 goto out_misc; 2975 goto out_misc;
2988 } 2976 }
2989 2977
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 191cd177fef2..cb1db2979d3d 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -931,12 +931,14 @@ static const char *rbd_dev_v1_snap_name(struct rbd_device *rbd_dev,
931 u64 snap_id) 931 u64 snap_id)
932{ 932{
933 u32 which; 933 u32 which;
934 const char *snap_name;
934 935
935 which = rbd_dev_snap_index(rbd_dev, snap_id); 936 which = rbd_dev_snap_index(rbd_dev, snap_id);
936 if (which == BAD_SNAP_INDEX) 937 if (which == BAD_SNAP_INDEX)
937 return NULL; 938 return ERR_PTR(-ENOENT);
938 939
939 return _rbd_dev_v1_snap_name(rbd_dev, which); 940 snap_name = _rbd_dev_v1_snap_name(rbd_dev, which);
941 return snap_name ? snap_name : ERR_PTR(-ENOMEM);
940} 942}
941 943
942static const char *rbd_snap_name(struct rbd_device *rbd_dev, u64 snap_id) 944static const char *rbd_snap_name(struct rbd_device *rbd_dev, u64 snap_id)
@@ -1561,11 +1563,12 @@ rbd_img_obj_request_read_callback(struct rbd_obj_request *obj_request)
1561 obj_request, obj_request->img_request, obj_request->result, 1563 obj_request, obj_request->img_request, obj_request->result,
1562 xferred, length); 1564 xferred, length);
1563 /* 1565 /*
1564 * ENOENT means a hole in the image. We zero-fill the 1566 * ENOENT means a hole in the image. We zero-fill the entire
1565 * entire length of the request. A short read also implies 1567 * length of the request. A short read also implies zero-fill
1566 * zero-fill to the end of the request. Either way we 1568 * to the end of the request. An error requires the whole
1567 * update the xferred count to indicate the whole request 1569 * length of the request to be reported finished with an error
1568 * was satisfied. 1570 * to the block layer. In each case we update the xferred
1571 * count to indicate the whole request was satisfied.
1569 */ 1572 */
1570 rbd_assert(obj_request->type != OBJ_REQUEST_NODATA); 1573 rbd_assert(obj_request->type != OBJ_REQUEST_NODATA);
1571 if (obj_request->result == -ENOENT) { 1574 if (obj_request->result == -ENOENT) {
@@ -1574,14 +1577,13 @@ rbd_img_obj_request_read_callback(struct rbd_obj_request *obj_request)
1574 else 1577 else
1575 zero_pages(obj_request->pages, 0, length); 1578 zero_pages(obj_request->pages, 0, length);
1576 obj_request->result = 0; 1579 obj_request->result = 0;
1577 obj_request->xferred = length;
1578 } else if (xferred < length && !obj_request->result) { 1580 } else if (xferred < length && !obj_request->result) {
1579 if (obj_request->type == OBJ_REQUEST_BIO) 1581 if (obj_request->type == OBJ_REQUEST_BIO)
1580 zero_bio_chain(obj_request->bio_list, xferred); 1582 zero_bio_chain(obj_request->bio_list, xferred);
1581 else 1583 else
1582 zero_pages(obj_request->pages, xferred, length); 1584 zero_pages(obj_request->pages, xferred, length);
1583 obj_request->xferred = length;
1584 } 1585 }
1586 obj_request->xferred = length;
1585 obj_request_done_set(obj_request); 1587 obj_request_done_set(obj_request);
1586} 1588}
1587 1589
@@ -2167,9 +2169,9 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request,
2167 struct rbd_obj_request *obj_request = NULL; 2169 struct rbd_obj_request *obj_request = NULL;
2168 struct rbd_obj_request *next_obj_request; 2170 struct rbd_obj_request *next_obj_request;
2169 bool write_request = img_request_write_test(img_request); 2171 bool write_request = img_request_write_test(img_request);
2170 struct bio *bio_list = 0; 2172 struct bio *bio_list = NULL;
2171 unsigned int bio_offset = 0; 2173 unsigned int bio_offset = 0;
2172 struct page **pages = 0; 2174 struct page **pages = NULL;
2173 u64 img_offset; 2175 u64 img_offset;
2174 u64 resid; 2176 u64 resid;
2175 u16 opcode; 2177 u16 opcode;
@@ -2207,6 +2209,11 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request,
2207 rbd_segment_name_free(object_name); 2209 rbd_segment_name_free(object_name);
2208 if (!obj_request) 2210 if (!obj_request)
2209 goto out_unwind; 2211 goto out_unwind;
2212 /*
2213 * set obj_request->img_request before creating the
2214 * osd_request so that it gets the right snapc
2215 */
2216 rbd_img_obj_request_add(img_request, obj_request);
2210 2217
2211 if (type == OBJ_REQUEST_BIO) { 2218 if (type == OBJ_REQUEST_BIO) {
2212 unsigned int clone_size; 2219 unsigned int clone_size;
@@ -2248,11 +2255,6 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request,
2248 obj_request->pages, length, 2255 obj_request->pages, length,
2249 offset & ~PAGE_MASK, false, false); 2256 offset & ~PAGE_MASK, false, false);
2250 2257
2251 /*
2252 * set obj_request->img_request before formatting
2253 * the osd_request so that it gets the right snapc
2254 */
2255 rbd_img_obj_request_add(img_request, obj_request);
2256 if (write_request) 2258 if (write_request)
2257 rbd_osd_req_format_write(obj_request); 2259 rbd_osd_req_format_write(obj_request);
2258 else 2260 else
@@ -2812,7 +2814,7 @@ out_err:
2812 obj_request_done_set(obj_request); 2814 obj_request_done_set(obj_request);
2813} 2815}
2814 2816
2815static int rbd_obj_notify_ack(struct rbd_device *rbd_dev, u64 notify_id) 2817static int rbd_obj_notify_ack_sync(struct rbd_device *rbd_dev, u64 notify_id)
2816{ 2818{
2817 struct rbd_obj_request *obj_request; 2819 struct rbd_obj_request *obj_request;
2818 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 2820 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
@@ -2827,16 +2829,17 @@ static int rbd_obj_notify_ack(struct rbd_device *rbd_dev, u64 notify_id)
2827 obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, obj_request); 2829 obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, obj_request);
2828 if (!obj_request->osd_req) 2830 if (!obj_request->osd_req)
2829 goto out; 2831 goto out;
2830 obj_request->callback = rbd_obj_request_put;
2831 2832
2832 osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_NOTIFY_ACK, 2833 osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_NOTIFY_ACK,
2833 notify_id, 0, 0); 2834 notify_id, 0, 0);
2834 rbd_osd_req_format_read(obj_request); 2835 rbd_osd_req_format_read(obj_request);
2835 2836
2836 ret = rbd_obj_request_submit(osdc, obj_request); 2837 ret = rbd_obj_request_submit(osdc, obj_request);
2837out:
2838 if (ret) 2838 if (ret)
2839 rbd_obj_request_put(obj_request); 2839 goto out;
2840 ret = rbd_obj_request_wait(obj_request);
2841out:
2842 rbd_obj_request_put(obj_request);
2840 2843
2841 return ret; 2844 return ret;
2842} 2845}
@@ -2856,7 +2859,7 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
2856 if (ret) 2859 if (ret)
2857 rbd_warn(rbd_dev, "header refresh error (%d)\n", ret); 2860 rbd_warn(rbd_dev, "header refresh error (%d)\n", ret);
2858 2861
2859 rbd_obj_notify_ack(rbd_dev, notify_id); 2862 rbd_obj_notify_ack_sync(rbd_dev, notify_id);
2860} 2863}
2861 2864
2862/* 2865/*
@@ -3328,6 +3331,31 @@ static void rbd_exists_validate(struct rbd_device *rbd_dev)
3328 clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); 3331 clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
3329} 3332}
3330 3333
3334static void rbd_dev_update_size(struct rbd_device *rbd_dev)
3335{
3336 sector_t size;
3337 bool removing;
3338
3339 /*
3340 * Don't hold the lock while doing disk operations,
3341 * or lock ordering will conflict with the bdev mutex via:
3342 * rbd_add() -> blkdev_get() -> rbd_open()
3343 */
3344 spin_lock_irq(&rbd_dev->lock);
3345 removing = test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags);
3346 spin_unlock_irq(&rbd_dev->lock);
3347 /*
3348 * If the device is being removed, rbd_dev->disk has
3349 * been destroyed, so don't try to update its size
3350 */
3351 if (!removing) {
3352 size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE;
3353 dout("setting size to %llu sectors", (unsigned long long)size);
3354 set_capacity(rbd_dev->disk, size);
3355 revalidate_disk(rbd_dev->disk);
3356 }
3357}
3358
3331static int rbd_dev_refresh(struct rbd_device *rbd_dev) 3359static int rbd_dev_refresh(struct rbd_device *rbd_dev)
3332{ 3360{
3333 u64 mapping_size; 3361 u64 mapping_size;
@@ -3347,12 +3375,7 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev)
3347 up_write(&rbd_dev->header_rwsem); 3375 up_write(&rbd_dev->header_rwsem);
3348 3376
3349 if (mapping_size != rbd_dev->mapping.size) { 3377 if (mapping_size != rbd_dev->mapping.size) {
3350 sector_t size; 3378 rbd_dev_update_size(rbd_dev);
3351
3352 size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE;
3353 dout("setting size to %llu sectors", (unsigned long long)size);
3354 set_capacity(rbd_dev->disk, size);
3355 revalidate_disk(rbd_dev->disk);
3356 } 3379 }
3357 3380
3358 return ret; 3381 return ret;
@@ -3706,12 +3729,14 @@ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
3706 if (ret < sizeof (size_buf)) 3729 if (ret < sizeof (size_buf))
3707 return -ERANGE; 3730 return -ERANGE;
3708 3731
3709 if (order) 3732 if (order) {
3710 *order = size_buf.order; 3733 *order = size_buf.order;
3734 dout(" order %u", (unsigned int)*order);
3735 }
3711 *snap_size = le64_to_cpu(size_buf.size); 3736 *snap_size = le64_to_cpu(size_buf.size);
3712 3737
3713 dout(" snap_id 0x%016llx order = %u, snap_size = %llu\n", 3738 dout(" snap_id 0x%016llx snap_size = %llu\n",
3714 (unsigned long long)snap_id, (unsigned int)*order, 3739 (unsigned long long)snap_id,
3715 (unsigned long long)*snap_size); 3740 (unsigned long long)*snap_size);
3716 3741
3717 return 0; 3742 return 0;
@@ -4059,8 +4084,13 @@ static u64 rbd_v2_snap_id_by_name(struct rbd_device *rbd_dev, const char *name)
4059 4084
4060 snap_id = snapc->snaps[which]; 4085 snap_id = snapc->snaps[which];
4061 snap_name = rbd_dev_v2_snap_name(rbd_dev, snap_id); 4086 snap_name = rbd_dev_v2_snap_name(rbd_dev, snap_id);
4062 if (IS_ERR(snap_name)) 4087 if (IS_ERR(snap_name)) {
4063 break; 4088 /* ignore no-longer existing snapshots */
4089 if (PTR_ERR(snap_name) == -ENOENT)
4090 continue;
4091 else
4092 break;
4093 }
4064 found = !strcmp(name, snap_name); 4094 found = !strcmp(name, snap_name);
4065 kfree(snap_name); 4095 kfree(snap_name);
4066 } 4096 }
@@ -4139,8 +4169,8 @@ static int rbd_dev_spec_update(struct rbd_device *rbd_dev)
4139 /* Look up the snapshot name, and make a copy */ 4169 /* Look up the snapshot name, and make a copy */
4140 4170
4141 snap_name = rbd_snap_name(rbd_dev, spec->snap_id); 4171 snap_name = rbd_snap_name(rbd_dev, spec->snap_id);
4142 if (!snap_name) { 4172 if (IS_ERR(snap_name)) {
4143 ret = -ENOMEM; 4173 ret = PTR_ERR(snap_name);
4144 goto out_err; 4174 goto out_err;
4145 } 4175 }
4146 4176
@@ -5130,7 +5160,7 @@ static ssize_t rbd_remove(struct bus_type *bus,
5130 bool already = false; 5160 bool already = false;
5131 int ret; 5161 int ret;
5132 5162
5133 ret = strict_strtoul(buf, 10, &ul); 5163 ret = kstrtoul(buf, 10, &ul);
5134 if (ret) 5164 if (ret)
5135 return ret; 5165 return ret;
5136 5166
@@ -5161,10 +5191,23 @@ static ssize_t rbd_remove(struct bus_type *bus,
5161 if (ret < 0 || already) 5191 if (ret < 0 || already)
5162 return ret; 5192 return ret;
5163 5193
5164 rbd_bus_del_dev(rbd_dev);
5165 ret = rbd_dev_header_watch_sync(rbd_dev, false); 5194 ret = rbd_dev_header_watch_sync(rbd_dev, false);
5166 if (ret) 5195 if (ret)
5167 rbd_warn(rbd_dev, "failed to cancel watch event (%d)\n", ret); 5196 rbd_warn(rbd_dev, "failed to cancel watch event (%d)\n", ret);
5197
5198 /*
5199 * flush remaining watch callbacks - these must be complete
5200 * before the osd_client is shutdown
5201 */
5202 dout("%s: flushing notifies", __func__);
5203 ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc);
5204 /*
5205 * Don't free anything from rbd_dev->disk until after all
5206 * notifies are completely processed. Otherwise
5207 * rbd_bus_del_dev() will race with rbd_watch_cb(), resulting
5208 * in a potential use after free of rbd_dev->disk or rbd_dev.
5209 */
5210 rbd_bus_del_dev(rbd_dev);
5168 rbd_dev_image_release(rbd_dev); 5211 rbd_dev_image_release(rbd_dev);
5169 module_put(THIS_MODULE); 5212 module_put(THIS_MODULE);
5170 5213
diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c
index 6e85e21445eb..a8de2eec6ff3 100644
--- a/drivers/block/rsxx/core.c
+++ b/drivers/block/rsxx/core.c
@@ -654,7 +654,8 @@ static void rsxx_eeh_failure(struct pci_dev *dev)
654 for (i = 0; i < card->n_targets; i++) { 654 for (i = 0; i < card->n_targets; i++) {
655 spin_lock_bh(&card->ctrl[i].queue_lock); 655 spin_lock_bh(&card->ctrl[i].queue_lock);
656 cnt = rsxx_cleanup_dma_queue(&card->ctrl[i], 656 cnt = rsxx_cleanup_dma_queue(&card->ctrl[i],
657 &card->ctrl[i].queue); 657 &card->ctrl[i].queue,
658 COMPLETE_DMA);
658 spin_unlock_bh(&card->ctrl[i].queue_lock); 659 spin_unlock_bh(&card->ctrl[i].queue_lock);
659 660
660 cnt += rsxx_dma_cancel(&card->ctrl[i]); 661 cnt += rsxx_dma_cancel(&card->ctrl[i]);
@@ -748,10 +749,6 @@ static pci_ers_result_t rsxx_slot_reset(struct pci_dev *dev)
748 749
749 card->eeh_state = 0; 750 card->eeh_state = 0;
750 751
751 st = rsxx_eeh_remap_dmas(card);
752 if (st)
753 goto failed_remap_dmas;
754
755 spin_lock_irqsave(&card->irq_lock, flags); 752 spin_lock_irqsave(&card->irq_lock, flags);
756 if (card->n_targets & RSXX_MAX_TARGETS) 753 if (card->n_targets & RSXX_MAX_TARGETS)
757 rsxx_enable_ier_and_isr(card, CR_INTR_ALL_G); 754 rsxx_enable_ier_and_isr(card, CR_INTR_ALL_G);
@@ -778,7 +775,6 @@ static pci_ers_result_t rsxx_slot_reset(struct pci_dev *dev)
778 return PCI_ERS_RESULT_RECOVERED; 775 return PCI_ERS_RESULT_RECOVERED;
779 776
780failed_hw_buffers_init: 777failed_hw_buffers_init:
781failed_remap_dmas:
782 for (i = 0; i < card->n_targets; i++) { 778 for (i = 0; i < card->n_targets; i++) {
783 if (card->ctrl[i].status.buf) 779 if (card->ctrl[i].status.buf)
784 pci_free_consistent(card->dev, 780 pci_free_consistent(card->dev,
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index d7af441880be..2284f5d3a54a 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -295,13 +295,15 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
295 return -ENOMEM; 295 return -ENOMEM;
296 } 296 }
297 297
298 blk_size = card->config.data.block_size; 298 if (card->config_valid) {
299 blk_size = card->config.data.block_size;
300 blk_queue_dma_alignment(card->queue, blk_size - 1);
301 blk_queue_logical_block_size(card->queue, blk_size);
302 }
299 303
300 blk_queue_make_request(card->queue, rsxx_make_request); 304 blk_queue_make_request(card->queue, rsxx_make_request);
301 blk_queue_bounce_limit(card->queue, BLK_BOUNCE_ANY); 305 blk_queue_bounce_limit(card->queue, BLK_BOUNCE_ANY);
302 blk_queue_dma_alignment(card->queue, blk_size - 1);
303 blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors); 306 blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors);
304 blk_queue_logical_block_size(card->queue, blk_size);
305 blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE); 307 blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE);
306 308
307 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, card->queue); 309 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, card->queue);
diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c
index bed32f16b084..fc88ba3e1bd2 100644
--- a/drivers/block/rsxx/dma.c
+++ b/drivers/block/rsxx/dma.c
@@ -221,6 +221,21 @@ static void dma_intr_coal_auto_tune(struct rsxx_cardinfo *card)
221} 221}
222 222
223/*----------------- RSXX DMA Handling -------------------*/ 223/*----------------- RSXX DMA Handling -------------------*/
224static void rsxx_free_dma(struct rsxx_dma_ctrl *ctrl, struct rsxx_dma *dma)
225{
226 if (dma->cmd != HW_CMD_BLK_DISCARD) {
227 if (!pci_dma_mapping_error(ctrl->card->dev, dma->dma_addr)) {
228 pci_unmap_page(ctrl->card->dev, dma->dma_addr,
229 get_dma_size(dma),
230 dma->cmd == HW_CMD_BLK_WRITE ?
231 PCI_DMA_TODEVICE :
232 PCI_DMA_FROMDEVICE);
233 }
234 }
235
236 kmem_cache_free(rsxx_dma_pool, dma);
237}
238
224static void rsxx_complete_dma(struct rsxx_dma_ctrl *ctrl, 239static void rsxx_complete_dma(struct rsxx_dma_ctrl *ctrl,
225 struct rsxx_dma *dma, 240 struct rsxx_dma *dma,
226 unsigned int status) 241 unsigned int status)
@@ -232,21 +247,14 @@ static void rsxx_complete_dma(struct rsxx_dma_ctrl *ctrl,
232 if (status & DMA_CANCELLED) 247 if (status & DMA_CANCELLED)
233 ctrl->stats.dma_cancelled++; 248 ctrl->stats.dma_cancelled++;
234 249
235 if (dma->dma_addr)
236 pci_unmap_page(ctrl->card->dev, dma->dma_addr,
237 get_dma_size(dma),
238 dma->cmd == HW_CMD_BLK_WRITE ?
239 PCI_DMA_TODEVICE :
240 PCI_DMA_FROMDEVICE);
241
242 if (dma->cb) 250 if (dma->cb)
243 dma->cb(ctrl->card, dma->cb_data, status ? 1 : 0); 251 dma->cb(ctrl->card, dma->cb_data, status ? 1 : 0);
244 252
245 kmem_cache_free(rsxx_dma_pool, dma); 253 rsxx_free_dma(ctrl, dma);
246} 254}
247 255
248int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl, 256int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl,
249 struct list_head *q) 257 struct list_head *q, unsigned int done)
250{ 258{
251 struct rsxx_dma *dma; 259 struct rsxx_dma *dma;
252 struct rsxx_dma *tmp; 260 struct rsxx_dma *tmp;
@@ -254,7 +262,10 @@ int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl,
254 262
255 list_for_each_entry_safe(dma, tmp, q, list) { 263 list_for_each_entry_safe(dma, tmp, q, list) {
256 list_del(&dma->list); 264 list_del(&dma->list);
257 rsxx_complete_dma(ctrl, dma, DMA_CANCELLED); 265 if (done & COMPLETE_DMA)
266 rsxx_complete_dma(ctrl, dma, DMA_CANCELLED);
267 else
268 rsxx_free_dma(ctrl, dma);
258 cnt++; 269 cnt++;
259 } 270 }
260 271
@@ -370,7 +381,7 @@ static void dma_engine_stalled(unsigned long data)
370 381
371 /* Clean up the DMA queue */ 382 /* Clean up the DMA queue */
372 spin_lock(&ctrl->queue_lock); 383 spin_lock(&ctrl->queue_lock);
373 cnt = rsxx_cleanup_dma_queue(ctrl, &ctrl->queue); 384 cnt = rsxx_cleanup_dma_queue(ctrl, &ctrl->queue, COMPLETE_DMA);
374 spin_unlock(&ctrl->queue_lock); 385 spin_unlock(&ctrl->queue_lock);
375 386
376 cnt += rsxx_dma_cancel(ctrl); 387 cnt += rsxx_dma_cancel(ctrl);
@@ -388,6 +399,7 @@ static void rsxx_issue_dmas(struct rsxx_dma_ctrl *ctrl)
388 int tag; 399 int tag;
389 int cmds_pending = 0; 400 int cmds_pending = 0;
390 struct hw_cmd *hw_cmd_buf; 401 struct hw_cmd *hw_cmd_buf;
402 int dir;
391 403
392 hw_cmd_buf = ctrl->cmd.buf; 404 hw_cmd_buf = ctrl->cmd.buf;
393 405
@@ -424,6 +436,31 @@ static void rsxx_issue_dmas(struct rsxx_dma_ctrl *ctrl)
424 continue; 436 continue;
425 } 437 }
426 438
439 if (dma->cmd != HW_CMD_BLK_DISCARD) {
440 if (dma->cmd == HW_CMD_BLK_WRITE)
441 dir = PCI_DMA_TODEVICE;
442 else
443 dir = PCI_DMA_FROMDEVICE;
444
445 /*
446 * The function pci_map_page is placed here because we
447 * can only, by design, issue up to 255 commands to the
448 * hardware at one time per DMA channel. So the maximum
449 * amount of mapped memory would be 255 * 4 channels *
450 * 4096 Bytes which is less than 2GB, the limit of a x8
451 * Non-HWWD PCIe slot. This way the pci_map_page
452 * function should never fail because of a lack of
453 * mappable memory.
454 */
455 dma->dma_addr = pci_map_page(ctrl->card->dev, dma->page,
456 dma->pg_off, dma->sub_page.cnt << 9, dir);
457 if (pci_dma_mapping_error(ctrl->card->dev, dma->dma_addr)) {
458 push_tracker(ctrl->trackers, tag);
459 rsxx_complete_dma(ctrl, dma, DMA_CANCELLED);
460 continue;
461 }
462 }
463
427 set_tracker_dma(ctrl->trackers, tag, dma); 464 set_tracker_dma(ctrl->trackers, tag, dma);
428 hw_cmd_buf[ctrl->cmd.idx].command = dma->cmd; 465 hw_cmd_buf[ctrl->cmd.idx].command = dma->cmd;
429 hw_cmd_buf[ctrl->cmd.idx].tag = tag; 466 hw_cmd_buf[ctrl->cmd.idx].tag = tag;
@@ -620,14 +657,6 @@ static int rsxx_queue_dma(struct rsxx_cardinfo *card,
620 if (!dma) 657 if (!dma)
621 return -ENOMEM; 658 return -ENOMEM;
622 659
623 dma->dma_addr = pci_map_page(card->dev, page, pg_off, dma_len,
624 dir ? PCI_DMA_TODEVICE :
625 PCI_DMA_FROMDEVICE);
626 if (!dma->dma_addr) {
627 kmem_cache_free(rsxx_dma_pool, dma);
628 return -ENOMEM;
629 }
630
631 dma->cmd = dir ? HW_CMD_BLK_WRITE : HW_CMD_BLK_READ; 660 dma->cmd = dir ? HW_CMD_BLK_WRITE : HW_CMD_BLK_READ;
632 dma->laddr = laddr; 661 dma->laddr = laddr;
633 dma->sub_page.off = (dma_off >> 9); 662 dma->sub_page.off = (dma_off >> 9);
@@ -736,11 +765,9 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
736 return 0; 765 return 0;
737 766
738bvec_err: 767bvec_err:
739 for (i = 0; i < card->n_targets; i++) { 768 for (i = 0; i < card->n_targets; i++)
740 spin_lock_bh(&card->ctrl[i].queue_lock); 769 rsxx_cleanup_dma_queue(&card->ctrl[i], &dma_list[i],
741 rsxx_cleanup_dma_queue(&card->ctrl[i], &dma_list[i]); 770 FREE_DMA);
742 spin_unlock_bh(&card->ctrl[i].queue_lock);
743 }
744 771
745 return st; 772 return st;
746} 773}
@@ -990,7 +1017,7 @@ void rsxx_dma_destroy(struct rsxx_cardinfo *card)
990 1017
991 /* Clean up the DMA queue */ 1018 /* Clean up the DMA queue */
992 spin_lock_bh(&ctrl->queue_lock); 1019 spin_lock_bh(&ctrl->queue_lock);
993 rsxx_cleanup_dma_queue(ctrl, &ctrl->queue); 1020 rsxx_cleanup_dma_queue(ctrl, &ctrl->queue, COMPLETE_DMA);
994 spin_unlock_bh(&ctrl->queue_lock); 1021 spin_unlock_bh(&ctrl->queue_lock);
995 1022
996 rsxx_dma_cancel(ctrl); 1023 rsxx_dma_cancel(ctrl);
@@ -1032,6 +1059,14 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card)
1032 else 1059 else
1033 card->ctrl[i].stats.reads_issued--; 1060 card->ctrl[i].stats.reads_issued--;
1034 1061
1062 if (dma->cmd != HW_CMD_BLK_DISCARD) {
1063 pci_unmap_page(card->dev, dma->dma_addr,
1064 get_dma_size(dma),
1065 dma->cmd == HW_CMD_BLK_WRITE ?
1066 PCI_DMA_TODEVICE :
1067 PCI_DMA_FROMDEVICE);
1068 }
1069
1035 list_add_tail(&dma->list, &issued_dmas[i]); 1070 list_add_tail(&dma->list, &issued_dmas[i]);
1036 push_tracker(card->ctrl[i].trackers, j); 1071 push_tracker(card->ctrl[i].trackers, j);
1037 cnt++; 1072 cnt++;
@@ -1043,15 +1078,6 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card)
1043 atomic_sub(cnt, &card->ctrl[i].stats.hw_q_depth); 1078 atomic_sub(cnt, &card->ctrl[i].stats.hw_q_depth);
1044 card->ctrl[i].stats.sw_q_depth += cnt; 1079 card->ctrl[i].stats.sw_q_depth += cnt;
1045 card->ctrl[i].e_cnt = 0; 1080 card->ctrl[i].e_cnt = 0;
1046
1047 list_for_each_entry(dma, &card->ctrl[i].queue, list) {
1048 if (dma->dma_addr)
1049 pci_unmap_page(card->dev, dma->dma_addr,
1050 get_dma_size(dma),
1051 dma->cmd == HW_CMD_BLK_WRITE ?
1052 PCI_DMA_TODEVICE :
1053 PCI_DMA_FROMDEVICE);
1054 }
1055 spin_unlock_bh(&card->ctrl[i].queue_lock); 1081 spin_unlock_bh(&card->ctrl[i].queue_lock);
1056 } 1082 }
1057 1083
@@ -1060,31 +1086,6 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card)
1060 return 0; 1086 return 0;
1061} 1087}
1062 1088
1063int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card)
1064{
1065 struct rsxx_dma *dma;
1066 int i;
1067
1068 for (i = 0; i < card->n_targets; i++) {
1069 spin_lock_bh(&card->ctrl[i].queue_lock);
1070 list_for_each_entry(dma, &card->ctrl[i].queue, list) {
1071 dma->dma_addr = pci_map_page(card->dev, dma->page,
1072 dma->pg_off, get_dma_size(dma),
1073 dma->cmd == HW_CMD_BLK_WRITE ?
1074 PCI_DMA_TODEVICE :
1075 PCI_DMA_FROMDEVICE);
1076 if (!dma->dma_addr) {
1077 spin_unlock_bh(&card->ctrl[i].queue_lock);
1078 kmem_cache_free(rsxx_dma_pool, dma);
1079 return -ENOMEM;
1080 }
1081 }
1082 spin_unlock_bh(&card->ctrl[i].queue_lock);
1083 }
1084
1085 return 0;
1086}
1087
1088int rsxx_dma_init(void) 1089int rsxx_dma_init(void)
1089{ 1090{
1090 rsxx_dma_pool = KMEM_CACHE(rsxx_dma, SLAB_HWCACHE_ALIGN); 1091 rsxx_dma_pool = KMEM_CACHE(rsxx_dma, SLAB_HWCACHE_ALIGN);
diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h
index 5ad5055a4104..6bbc64d0f690 100644
--- a/drivers/block/rsxx/rsxx_priv.h
+++ b/drivers/block/rsxx/rsxx_priv.h
@@ -52,7 +52,7 @@ struct proc_cmd;
52#define RS70_PCI_REV_SUPPORTED 4 52#define RS70_PCI_REV_SUPPORTED 4
53 53
54#define DRIVER_NAME "rsxx" 54#define DRIVER_NAME "rsxx"
55#define DRIVER_VERSION "4.0" 55#define DRIVER_VERSION "4.0.3.2516"
56 56
57/* Block size is 4096 */ 57/* Block size is 4096 */
58#define RSXX_HW_BLK_SHIFT 12 58#define RSXX_HW_BLK_SHIFT 12
@@ -345,6 +345,11 @@ enum rsxx_creg_stat {
345 CREG_STAT_TAG_MASK = 0x0000ff00, 345 CREG_STAT_TAG_MASK = 0x0000ff00,
346}; 346};
347 347
348enum rsxx_dma_finish {
349 FREE_DMA = 0x0,
350 COMPLETE_DMA = 0x1,
351};
352
348static inline unsigned int CREG_DATA(int N) 353static inline unsigned int CREG_DATA(int N)
349{ 354{
350 return CREG_DATA0 + (N << 2); 355 return CREG_DATA0 + (N << 2);
@@ -379,7 +384,9 @@ typedef void (*rsxx_dma_cb)(struct rsxx_cardinfo *card,
379int rsxx_dma_setup(struct rsxx_cardinfo *card); 384int rsxx_dma_setup(struct rsxx_cardinfo *card);
380void rsxx_dma_destroy(struct rsxx_cardinfo *card); 385void rsxx_dma_destroy(struct rsxx_cardinfo *card);
381int rsxx_dma_init(void); 386int rsxx_dma_init(void);
382int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl, struct list_head *q); 387int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl,
388 struct list_head *q,
389 unsigned int done);
383int rsxx_dma_cancel(struct rsxx_dma_ctrl *ctrl); 390int rsxx_dma_cancel(struct rsxx_dma_ctrl *ctrl);
384void rsxx_dma_cleanup(void); 391void rsxx_dma_cleanup(void);
385void rsxx_dma_queue_reset(struct rsxx_cardinfo *card); 392void rsxx_dma_queue_reset(struct rsxx_cardinfo *card);
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
new file mode 100644
index 000000000000..9199c93be926
--- /dev/null
+++ b/drivers/block/skd_main.c
@@ -0,0 +1,5432 @@
1/* Copyright 2012 STEC, Inc.
2 *
3 * This file is licensed under the terms of the 3-clause
4 * BSD License (http://opensource.org/licenses/BSD-3-Clause)
5 * or the GNU GPL-2.0 (http://www.gnu.org/licenses/gpl-2.0.html),
6 * at your option. Both licenses are also available in the LICENSE file
7 * distributed with this project. This file may not be copied, modified,
8 * or distributed except in accordance with those terms.
9 * Gordoni Waidhofer <gwaidhofer@stec-inc.com>
10 * Initial Driver Design!
11 * Thomas Swann <tswann@stec-inc.com>
12 * Interrupt handling.
13 * Ramprasad Chinthekindi <rchinthekindi@stec-inc.com>
14 * biomode implementation.
15 * Akhil Bhansali <abhansali@stec-inc.com>
16 * Added support for DISCARD / FLUSH and FUA.
17 */
18
19#include <linux/kernel.h>
20#include <linux/module.h>
21#include <linux/init.h>
22#include <linux/pci.h>
23#include <linux/slab.h>
24#include <linux/spinlock.h>
25#include <linux/blkdev.h>
26#include <linux/sched.h>
27#include <linux/interrupt.h>
28#include <linux/compiler.h>
29#include <linux/workqueue.h>
30#include <linux/bitops.h>
31#include <linux/delay.h>
32#include <linux/time.h>
33#include <linux/hdreg.h>
34#include <linux/dma-mapping.h>
35#include <linux/completion.h>
36#include <linux/scatterlist.h>
37#include <linux/version.h>
38#include <linux/err.h>
39#include <linux/scatterlist.h>
40#include <linux/aer.h>
41#include <linux/ctype.h>
42#include <linux/wait.h>
43#include <linux/uio.h>
44#include <scsi/scsi.h>
45#include <scsi/sg.h>
46#include <linux/io.h>
47#include <linux/uaccess.h>
48#include <asm/unaligned.h>
49
50#include "skd_s1120.h"
51
52static int skd_dbg_level;
53static int skd_isr_comp_limit = 4;
54
55enum {
56 STEC_LINK_2_5GTS = 0,
57 STEC_LINK_5GTS = 1,
58 STEC_LINK_8GTS = 2,
59 STEC_LINK_UNKNOWN = 0xFF
60};
61
62enum {
63 SKD_FLUSH_INITIALIZER,
64 SKD_FLUSH_ZERO_SIZE_FIRST,
65 SKD_FLUSH_DATA_SECOND,
66};
67
68#define SKD_ASSERT(expr) \
69 do { \
70 if (unlikely(!(expr))) { \
71 pr_err("Assertion failed! %s,%s,%s,line=%d\n", \
72 # expr, __FILE__, __func__, __LINE__); \
73 } \
74 } while (0)
75
76#define DRV_NAME "skd"
77#define DRV_VERSION "2.2.1"
78#define DRV_BUILD_ID "0260"
79#define PFX DRV_NAME ": "
80#define DRV_BIN_VERSION 0x100
81#define DRV_VER_COMPL "2.2.1." DRV_BUILD_ID
82
83MODULE_AUTHOR("bug-reports: support@stec-inc.com");
84MODULE_LICENSE("Dual BSD/GPL");
85
86MODULE_DESCRIPTION("STEC s1120 PCIe SSD block driver (b" DRV_BUILD_ID ")");
87MODULE_VERSION(DRV_VERSION "-" DRV_BUILD_ID);
88
89#define PCI_VENDOR_ID_STEC 0x1B39
90#define PCI_DEVICE_ID_S1120 0x0001
91
92#define SKD_FUA_NV (1 << 1)
93#define SKD_MINORS_PER_DEVICE 16
94
95#define SKD_MAX_QUEUE_DEPTH 200u
96
97#define SKD_PAUSE_TIMEOUT (5 * 1000)
98
99#define SKD_N_FITMSG_BYTES (512u)
100
101#define SKD_N_SPECIAL_CONTEXT 32u
102#define SKD_N_SPECIAL_FITMSG_BYTES (128u)
103
104/* SG elements are 32 bytes, so we can make this 4096 and still be under the
105 * 128KB limit. That allows 4096*4K = 16M xfer size
106 */
107#define SKD_N_SG_PER_REQ_DEFAULT 256u
108#define SKD_N_SG_PER_SPECIAL 256u
109
110#define SKD_N_COMPLETION_ENTRY 256u
111#define SKD_N_READ_CAP_BYTES (8u)
112
113#define SKD_N_INTERNAL_BYTES (512u)
114
115/* 5 bits of uniqifier, 0xF800 */
116#define SKD_ID_INCR (0x400)
117#define SKD_ID_TABLE_MASK (3u << 8u)
118#define SKD_ID_RW_REQUEST (0u << 8u)
119#define SKD_ID_INTERNAL (1u << 8u)
120#define SKD_ID_SPECIAL_REQUEST (2u << 8u)
121#define SKD_ID_FIT_MSG (3u << 8u)
122#define SKD_ID_SLOT_MASK 0x00FFu
123#define SKD_ID_SLOT_AND_TABLE_MASK 0x03FFu
124
125#define SKD_N_TIMEOUT_SLOT 4u
126#define SKD_TIMEOUT_SLOT_MASK 3u
127
128#define SKD_N_MAX_SECTORS 2048u
129
130#define SKD_MAX_RETRIES 2u
131
132#define SKD_TIMER_SECONDS(seconds) (seconds)
133#define SKD_TIMER_MINUTES(minutes) ((minutes) * (60))
134
135#define INQ_STD_NBYTES 36
136#define SKD_DISCARD_CDB_LENGTH 24
137
138enum skd_drvr_state {
139 SKD_DRVR_STATE_LOAD,
140 SKD_DRVR_STATE_IDLE,
141 SKD_DRVR_STATE_BUSY,
142 SKD_DRVR_STATE_STARTING,
143 SKD_DRVR_STATE_ONLINE,
144 SKD_DRVR_STATE_PAUSING,
145 SKD_DRVR_STATE_PAUSED,
146 SKD_DRVR_STATE_DRAINING_TIMEOUT,
147 SKD_DRVR_STATE_RESTARTING,
148 SKD_DRVR_STATE_RESUMING,
149 SKD_DRVR_STATE_STOPPING,
150 SKD_DRVR_STATE_FAULT,
151 SKD_DRVR_STATE_DISAPPEARED,
152 SKD_DRVR_STATE_PROTOCOL_MISMATCH,
153 SKD_DRVR_STATE_BUSY_ERASE,
154 SKD_DRVR_STATE_BUSY_SANITIZE,
155 SKD_DRVR_STATE_BUSY_IMMINENT,
156 SKD_DRVR_STATE_WAIT_BOOT,
157 SKD_DRVR_STATE_SYNCING,
158};
159
160#define SKD_WAIT_BOOT_TIMO SKD_TIMER_SECONDS(90u)
161#define SKD_STARTING_TIMO SKD_TIMER_SECONDS(8u)
162#define SKD_RESTARTING_TIMO SKD_TIMER_MINUTES(4u)
163#define SKD_DRAINING_TIMO SKD_TIMER_SECONDS(6u)
164#define SKD_BUSY_TIMO SKD_TIMER_MINUTES(20u)
165#define SKD_STARTED_BUSY_TIMO SKD_TIMER_SECONDS(60u)
166#define SKD_START_WAIT_SECONDS 90u
167
168enum skd_req_state {
169 SKD_REQ_STATE_IDLE,
170 SKD_REQ_STATE_SETUP,
171 SKD_REQ_STATE_BUSY,
172 SKD_REQ_STATE_COMPLETED,
173 SKD_REQ_STATE_TIMEOUT,
174 SKD_REQ_STATE_ABORTED,
175};
176
177enum skd_fit_msg_state {
178 SKD_MSG_STATE_IDLE,
179 SKD_MSG_STATE_BUSY,
180};
181
182enum skd_check_status_action {
183 SKD_CHECK_STATUS_REPORT_GOOD,
184 SKD_CHECK_STATUS_REPORT_SMART_ALERT,
185 SKD_CHECK_STATUS_REQUEUE_REQUEST,
186 SKD_CHECK_STATUS_REPORT_ERROR,
187 SKD_CHECK_STATUS_BUSY_IMMINENT,
188};
189
190struct skd_fitmsg_context {
191 enum skd_fit_msg_state state;
192
193 struct skd_fitmsg_context *next;
194
195 u32 id;
196 u16 outstanding;
197
198 u32 length;
199 u32 offset;
200
201 u8 *msg_buf;
202 dma_addr_t mb_dma_address;
203};
204
205struct skd_request_context {
206 enum skd_req_state state;
207
208 struct skd_request_context *next;
209
210 u16 id;
211 u32 fitmsg_id;
212
213 struct request *req;
214 u8 flush_cmd;
215 u8 discard_page;
216
217 u32 timeout_stamp;
218 u8 sg_data_dir;
219 struct scatterlist *sg;
220 u32 n_sg;
221 u32 sg_byte_count;
222
223 struct fit_sg_descriptor *sksg_list;
224 dma_addr_t sksg_dma_address;
225
226 struct fit_completion_entry_v1 completion;
227
228 struct fit_comp_error_info err_info;
229
230};
231#define SKD_DATA_DIR_HOST_TO_CARD 1
232#define SKD_DATA_DIR_CARD_TO_HOST 2
233#define SKD_DATA_DIR_NONE 3 /* especially for DISCARD requests. */
234
235struct skd_special_context {
236 struct skd_request_context req;
237
238 u8 orphaned;
239
240 void *data_buf;
241 dma_addr_t db_dma_address;
242
243 u8 *msg_buf;
244 dma_addr_t mb_dma_address;
245};
246
247struct skd_sg_io {
248 fmode_t mode;
249 void __user *argp;
250
251 struct sg_io_hdr sg;
252
253 u8 cdb[16];
254
255 u32 dxfer_len;
256 u32 iovcnt;
257 struct sg_iovec *iov;
258 struct sg_iovec no_iov_iov;
259
260 struct skd_special_context *skspcl;
261};
262
263typedef enum skd_irq_type {
264 SKD_IRQ_LEGACY,
265 SKD_IRQ_MSI,
266 SKD_IRQ_MSIX
267} skd_irq_type_t;
268
269#define SKD_MAX_BARS 2
270
271struct skd_device {
272 volatile void __iomem *mem_map[SKD_MAX_BARS];
273 resource_size_t mem_phys[SKD_MAX_BARS];
274 u32 mem_size[SKD_MAX_BARS];
275
276 skd_irq_type_t irq_type;
277 u32 msix_count;
278 struct skd_msix_entry *msix_entries;
279
280 struct pci_dev *pdev;
281 int pcie_error_reporting_is_enabled;
282
283 spinlock_t lock;
284 struct gendisk *disk;
285 struct request_queue *queue;
286 struct device *class_dev;
287 int gendisk_on;
288 int sync_done;
289
290 atomic_t device_count;
291 u32 devno;
292 u32 major;
293 char name[32];
294 char isr_name[30];
295
296 enum skd_drvr_state state;
297 u32 drive_state;
298
299 u32 in_flight;
300 u32 cur_max_queue_depth;
301 u32 queue_low_water_mark;
302 u32 dev_max_queue_depth;
303
304 u32 num_fitmsg_context;
305 u32 num_req_context;
306
307 u32 timeout_slot[SKD_N_TIMEOUT_SLOT];
308 u32 timeout_stamp;
309 struct skd_fitmsg_context *skmsg_free_list;
310 struct skd_fitmsg_context *skmsg_table;
311
312 struct skd_request_context *skreq_free_list;
313 struct skd_request_context *skreq_table;
314
315 struct skd_special_context *skspcl_free_list;
316 struct skd_special_context *skspcl_table;
317
318 struct skd_special_context internal_skspcl;
319 u32 read_cap_blocksize;
320 u32 read_cap_last_lba;
321 int read_cap_is_valid;
322 int inquiry_is_valid;
323 u8 inq_serial_num[13]; /*12 chars plus null term */
324 u8 id_str[80]; /* holds a composite name (pci + sernum) */
325
326 u8 skcomp_cycle;
327 u32 skcomp_ix;
328 struct fit_completion_entry_v1 *skcomp_table;
329 struct fit_comp_error_info *skerr_table;
330 dma_addr_t cq_dma_address;
331
332 wait_queue_head_t waitq;
333
334 struct timer_list timer;
335 u32 timer_countdown;
336 u32 timer_substate;
337
338 int n_special;
339 int sgs_per_request;
340 u32 last_mtd;
341
342 u32 proto_ver;
343
344 int dbg_level;
345 u32 connect_time_stamp;
346 int connect_retries;
347#define SKD_MAX_CONNECT_RETRIES 16
348 u32 drive_jiffies;
349
350 u32 timo_slot;
351
352
353 struct work_struct completion_worker;
354};
355
356#define SKD_WRITEL(DEV, VAL, OFF) skd_reg_write32(DEV, VAL, OFF)
357#define SKD_READL(DEV, OFF) skd_reg_read32(DEV, OFF)
358#define SKD_WRITEQ(DEV, VAL, OFF) skd_reg_write64(DEV, VAL, OFF)
359
360static inline u32 skd_reg_read32(struct skd_device *skdev, u32 offset)
361{
362 u32 val;
363
364 if (likely(skdev->dbg_level < 2))
365 return readl(skdev->mem_map[1] + offset);
366 else {
367 barrier();
368 val = readl(skdev->mem_map[1] + offset);
369 barrier();
370 pr_debug("%s:%s:%d offset %x = %x\n",
371 skdev->name, __func__, __LINE__, offset, val);
372 return val;
373 }
374
375}
376
377static inline void skd_reg_write32(struct skd_device *skdev, u32 val,
378 u32 offset)
379{
380 if (likely(skdev->dbg_level < 2)) {
381 writel(val, skdev->mem_map[1] + offset);
382 barrier();
383 } else {
384 barrier();
385 writel(val, skdev->mem_map[1] + offset);
386 barrier();
387 pr_debug("%s:%s:%d offset %x = %x\n",
388 skdev->name, __func__, __LINE__, offset, val);
389 }
390}
391
392static inline void skd_reg_write64(struct skd_device *skdev, u64 val,
393 u32 offset)
394{
395 if (likely(skdev->dbg_level < 2)) {
396 writeq(val, skdev->mem_map[1] + offset);
397 barrier();
398 } else {
399 barrier();
400 writeq(val, skdev->mem_map[1] + offset);
401 barrier();
402 pr_debug("%s:%s:%d offset %x = %016llx\n",
403 skdev->name, __func__, __LINE__, offset, val);
404 }
405}
406
407
408#define SKD_IRQ_DEFAULT SKD_IRQ_MSI
409static int skd_isr_type = SKD_IRQ_DEFAULT;
410
411module_param(skd_isr_type, int, 0444);
412MODULE_PARM_DESC(skd_isr_type, "Interrupt type capability."
413 " (0==legacy, 1==MSI, 2==MSI-X, default==1)");
414
415#define SKD_MAX_REQ_PER_MSG_DEFAULT 1
416static int skd_max_req_per_msg = SKD_MAX_REQ_PER_MSG_DEFAULT;
417
418module_param(skd_max_req_per_msg, int, 0444);
419MODULE_PARM_DESC(skd_max_req_per_msg,
420 "Maximum SCSI requests packed in a single message."
421 " (1-14, default==1)");
422
423#define SKD_MAX_QUEUE_DEPTH_DEFAULT 64
424#define SKD_MAX_QUEUE_DEPTH_DEFAULT_STR "64"
425static int skd_max_queue_depth = SKD_MAX_QUEUE_DEPTH_DEFAULT;
426
427module_param(skd_max_queue_depth, int, 0444);
428MODULE_PARM_DESC(skd_max_queue_depth,
429 "Maximum SCSI requests issued to s1120."
430 " (1-200, default==" SKD_MAX_QUEUE_DEPTH_DEFAULT_STR ")");
431
432static int skd_sgs_per_request = SKD_N_SG_PER_REQ_DEFAULT;
433module_param(skd_sgs_per_request, int, 0444);
434MODULE_PARM_DESC(skd_sgs_per_request,
435 "Maximum SG elements per block request."
436 " (1-4096, default==256)");
437
438static int skd_max_pass_thru = SKD_N_SPECIAL_CONTEXT;
439module_param(skd_max_pass_thru, int, 0444);
440MODULE_PARM_DESC(skd_max_pass_thru,
441 "Maximum SCSI pass-thru at a time." " (1-50, default==32)");
442
443module_param(skd_dbg_level, int, 0444);
444MODULE_PARM_DESC(skd_dbg_level, "s1120 debug level (0,1,2)");
445
446module_param(skd_isr_comp_limit, int, 0444);
447MODULE_PARM_DESC(skd_isr_comp_limit, "s1120 isr comp limit (0=none) default=4");
448
449/* Major device number dynamically assigned. */
450static u32 skd_major;
451
452static void skd_destruct(struct skd_device *skdev);
453static const struct block_device_operations skd_blockdev_ops;
454static void skd_send_fitmsg(struct skd_device *skdev,
455 struct skd_fitmsg_context *skmsg);
456static void skd_send_special_fitmsg(struct skd_device *skdev,
457 struct skd_special_context *skspcl);
458static void skd_request_fn(struct request_queue *rq);
459static void skd_end_request(struct skd_device *skdev,
460 struct skd_request_context *skreq, int error);
461static int skd_preop_sg_list(struct skd_device *skdev,
462 struct skd_request_context *skreq);
463static void skd_postop_sg_list(struct skd_device *skdev,
464 struct skd_request_context *skreq);
465
466static void skd_restart_device(struct skd_device *skdev);
467static int skd_quiesce_dev(struct skd_device *skdev);
468static int skd_unquiesce_dev(struct skd_device *skdev);
469static void skd_release_special(struct skd_device *skdev,
470 struct skd_special_context *skspcl);
471static void skd_disable_interrupts(struct skd_device *skdev);
472static void skd_isr_fwstate(struct skd_device *skdev);
473static void skd_recover_requests(struct skd_device *skdev, int requeue);
474static void skd_soft_reset(struct skd_device *skdev);
475
476static const char *skd_name(struct skd_device *skdev);
477const char *skd_drive_state_to_str(int state);
478const char *skd_skdev_state_to_str(enum skd_drvr_state state);
479static void skd_log_skdev(struct skd_device *skdev, const char *event);
480static void skd_log_skmsg(struct skd_device *skdev,
481 struct skd_fitmsg_context *skmsg, const char *event);
482static void skd_log_skreq(struct skd_device *skdev,
483 struct skd_request_context *skreq, const char *event);
484
485/*
486 *****************************************************************************
487 * READ/WRITE REQUESTS
488 *****************************************************************************
489 */
490static void skd_fail_all_pending(struct skd_device *skdev)
491{
492 struct request_queue *q = skdev->queue;
493 struct request *req;
494
495 for (;; ) {
496 req = blk_peek_request(q);
497 if (req == NULL)
498 break;
499 blk_start_request(req);
500 __blk_end_request_all(req, -EIO);
501 }
502}
503
504static void
505skd_prep_rw_cdb(struct skd_scsi_request *scsi_req,
506 int data_dir, unsigned lba,
507 unsigned count)
508{
509 if (data_dir == READ)
510 scsi_req->cdb[0] = 0x28;
511 else
512 scsi_req->cdb[0] = 0x2a;
513
514 scsi_req->cdb[1] = 0;
515 scsi_req->cdb[2] = (lba & 0xff000000) >> 24;
516 scsi_req->cdb[3] = (lba & 0xff0000) >> 16;
517 scsi_req->cdb[4] = (lba & 0xff00) >> 8;
518 scsi_req->cdb[5] = (lba & 0xff);
519 scsi_req->cdb[6] = 0;
520 scsi_req->cdb[7] = (count & 0xff00) >> 8;
521 scsi_req->cdb[8] = count & 0xff;
522 scsi_req->cdb[9] = 0;
523}
524
525static void
526skd_prep_zerosize_flush_cdb(struct skd_scsi_request *scsi_req,
527 struct skd_request_context *skreq)
528{
529 skreq->flush_cmd = 1;
530
531 scsi_req->cdb[0] = 0x35;
532 scsi_req->cdb[1] = 0;
533 scsi_req->cdb[2] = 0;
534 scsi_req->cdb[3] = 0;
535 scsi_req->cdb[4] = 0;
536 scsi_req->cdb[5] = 0;
537 scsi_req->cdb[6] = 0;
538 scsi_req->cdb[7] = 0;
539 scsi_req->cdb[8] = 0;
540 scsi_req->cdb[9] = 0;
541}
542
543static void
544skd_prep_discard_cdb(struct skd_scsi_request *scsi_req,
545 struct skd_request_context *skreq,
546 struct page *page,
547 u32 lba, u32 count)
548{
549 char *buf;
550 unsigned long len;
551 struct request *req;
552
553 buf = page_address(page);
554 len = SKD_DISCARD_CDB_LENGTH;
555
556 scsi_req->cdb[0] = UNMAP;
557 scsi_req->cdb[8] = len;
558
559 put_unaligned_be16(6 + 16, &buf[0]);
560 put_unaligned_be16(16, &buf[2]);
561 put_unaligned_be64(lba, &buf[8]);
562 put_unaligned_be32(count, &buf[16]);
563
564 req = skreq->req;
565 blk_add_request_payload(req, page, len);
566 req->buffer = buf;
567}
568
569static void skd_request_fn_not_online(struct request_queue *q);
570
571static void skd_request_fn(struct request_queue *q)
572{
573 struct skd_device *skdev = q->queuedata;
574 struct skd_fitmsg_context *skmsg = NULL;
575 struct fit_msg_hdr *fmh = NULL;
576 struct skd_request_context *skreq;
577 struct request *req = NULL;
578 struct skd_scsi_request *scsi_req;
579 struct page *page;
580 unsigned long io_flags;
581 int error;
582 u32 lba;
583 u32 count;
584 int data_dir;
585 u32 be_lba;
586 u32 be_count;
587 u64 be_dmaa;
588 u64 cmdctxt;
589 u32 timo_slot;
590 void *cmd_ptr;
591 int flush, fua;
592
593 if (skdev->state != SKD_DRVR_STATE_ONLINE) {
594 skd_request_fn_not_online(q);
595 return;
596 }
597
598 if (blk_queue_stopped(skdev->queue)) {
599 if (skdev->skmsg_free_list == NULL ||
600 skdev->skreq_free_list == NULL ||
601 skdev->in_flight >= skdev->queue_low_water_mark)
602 /* There is still some kind of shortage */
603 return;
604
605 queue_flag_clear(QUEUE_FLAG_STOPPED, skdev->queue);
606 }
607
608 /*
609 * Stop conditions:
610 * - There are no more native requests
611 * - There are already the maximum number of requests in progress
612 * - There are no more skd_request_context entries
613 * - There are no more FIT msg buffers
614 */
615 for (;; ) {
616
617 flush = fua = 0;
618
619 req = blk_peek_request(q);
620
621 /* Are there any native requests to start? */
622 if (req == NULL)
623 break;
624
625 lba = (u32)blk_rq_pos(req);
626 count = blk_rq_sectors(req);
627 data_dir = rq_data_dir(req);
628 io_flags = req->cmd_flags;
629
630 if (io_flags & REQ_FLUSH)
631 flush++;
632
633 if (io_flags & REQ_FUA)
634 fua++;
635
636 pr_debug("%s:%s:%d new req=%p lba=%u(0x%x) "
637 "count=%u(0x%x) dir=%d\n",
638 skdev->name, __func__, __LINE__,
639 req, lba, lba, count, count, data_dir);
640
641 /* At this point we know there is a request */
642
643 /* Are too many requets already in progress? */
644 if (skdev->in_flight >= skdev->cur_max_queue_depth) {
645 pr_debug("%s:%s:%d qdepth %d, limit %d\n",
646 skdev->name, __func__, __LINE__,
647 skdev->in_flight, skdev->cur_max_queue_depth);
648 break;
649 }
650
651 /* Is a skd_request_context available? */
652 skreq = skdev->skreq_free_list;
653 if (skreq == NULL) {
654 pr_debug("%s:%s:%d Out of req=%p\n",
655 skdev->name, __func__, __LINE__, q);
656 break;
657 }
658 SKD_ASSERT(skreq->state == SKD_REQ_STATE_IDLE);
659 SKD_ASSERT((skreq->id & SKD_ID_INCR) == 0);
660
661 /* Now we check to see if we can get a fit msg */
662 if (skmsg == NULL) {
663 if (skdev->skmsg_free_list == NULL) {
664 pr_debug("%s:%s:%d Out of msg\n",
665 skdev->name, __func__, __LINE__);
666 break;
667 }
668 }
669
670 skreq->flush_cmd = 0;
671 skreq->n_sg = 0;
672 skreq->sg_byte_count = 0;
673 skreq->discard_page = 0;
674
675 /*
676 * OK to now dequeue request from q.
677 *
678 * At this point we are comitted to either start or reject
679 * the native request. Note that skd_request_context is
680 * available but is still at the head of the free list.
681 */
682 blk_start_request(req);
683 skreq->req = req;
684 skreq->fitmsg_id = 0;
685
686 /* Either a FIT msg is in progress or we have to start one. */
687 if (skmsg == NULL) {
688 /* Are there any FIT msg buffers available? */
689 skmsg = skdev->skmsg_free_list;
690 if (skmsg == NULL) {
691 pr_debug("%s:%s:%d Out of msg skdev=%p\n",
692 skdev->name, __func__, __LINE__,
693 skdev);
694 break;
695 }
696 SKD_ASSERT(skmsg->state == SKD_MSG_STATE_IDLE);
697 SKD_ASSERT((skmsg->id & SKD_ID_INCR) == 0);
698
699 skdev->skmsg_free_list = skmsg->next;
700
701 skmsg->state = SKD_MSG_STATE_BUSY;
702 skmsg->id += SKD_ID_INCR;
703
704 /* Initialize the FIT msg header */
705 fmh = (struct fit_msg_hdr *)skmsg->msg_buf;
706 memset(fmh, 0, sizeof(*fmh));
707 fmh->protocol_id = FIT_PROTOCOL_ID_SOFIT;
708 skmsg->length = sizeof(*fmh);
709 }
710
711 skreq->fitmsg_id = skmsg->id;
712
713 /*
714 * Note that a FIT msg may have just been started
715 * but contains no SoFIT requests yet.
716 */
717
718 /*
719 * Transcode the request, checking as we go. The outcome of
720 * the transcoding is represented by the error variable.
721 */
722 cmd_ptr = &skmsg->msg_buf[skmsg->length];
723 memset(cmd_ptr, 0, 32);
724
725 be_lba = cpu_to_be32(lba);
726 be_count = cpu_to_be32(count);
727 be_dmaa = cpu_to_be64((u64)skreq->sksg_dma_address);
728 cmdctxt = skreq->id + SKD_ID_INCR;
729
730 scsi_req = cmd_ptr;
731 scsi_req->hdr.tag = cmdctxt;
732 scsi_req->hdr.sg_list_dma_address = be_dmaa;
733
734 if (data_dir == READ)
735 skreq->sg_data_dir = SKD_DATA_DIR_CARD_TO_HOST;
736 else
737 skreq->sg_data_dir = SKD_DATA_DIR_HOST_TO_CARD;
738
739 if (io_flags & REQ_DISCARD) {
740 page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
741 if (!page) {
742 pr_err("request_fn:Page allocation failed.\n");
743 skd_end_request(skdev, skreq, -ENOMEM);
744 break;
745 }
746 skreq->discard_page = 1;
747 skd_prep_discard_cdb(scsi_req, skreq, page, lba, count);
748
749 } else if (flush == SKD_FLUSH_ZERO_SIZE_FIRST) {
750 skd_prep_zerosize_flush_cdb(scsi_req, skreq);
751 SKD_ASSERT(skreq->flush_cmd == 1);
752
753 } else {
754 skd_prep_rw_cdb(scsi_req, data_dir, lba, count);
755 }
756
757 if (fua)
758 scsi_req->cdb[1] |= SKD_FUA_NV;
759
760 if (!req->bio)
761 goto skip_sg;
762
763 error = skd_preop_sg_list(skdev, skreq);
764
765 if (error != 0) {
766 /*
767 * Complete the native request with error.
768 * Note that the request context is still at the
769 * head of the free list, and that the SoFIT request
770 * was encoded into the FIT msg buffer but the FIT
771 * msg length has not been updated. In short, the
772 * only resource that has been allocated but might
773 * not be used is that the FIT msg could be empty.
774 */
775 pr_debug("%s:%s:%d error Out\n",
776 skdev->name, __func__, __LINE__);
777 skd_end_request(skdev, skreq, error);
778 continue;
779 }
780
781skip_sg:
782 scsi_req->hdr.sg_list_len_bytes =
783 cpu_to_be32(skreq->sg_byte_count);
784
785 /* Complete resource allocations. */
786 skdev->skreq_free_list = skreq->next;
787 skreq->state = SKD_REQ_STATE_BUSY;
788 skreq->id += SKD_ID_INCR;
789
790 skmsg->length += sizeof(struct skd_scsi_request);
791 fmh->num_protocol_cmds_coalesced++;
792
793 /*
794 * Update the active request counts.
795 * Capture the timeout timestamp.
796 */
797 skreq->timeout_stamp = skdev->timeout_stamp;
798 timo_slot = skreq->timeout_stamp & SKD_TIMEOUT_SLOT_MASK;
799 skdev->timeout_slot[timo_slot]++;
800 skdev->in_flight++;
801 pr_debug("%s:%s:%d req=0x%x busy=%d\n",
802 skdev->name, __func__, __LINE__,
803 skreq->id, skdev->in_flight);
804
805 /*
806 * If the FIT msg buffer is full send it.
807 */
808 if (skmsg->length >= SKD_N_FITMSG_BYTES ||
809 fmh->num_protocol_cmds_coalesced >= skd_max_req_per_msg) {
810 skd_send_fitmsg(skdev, skmsg);
811 skmsg = NULL;
812 fmh = NULL;
813 }
814 }
815
816 /*
817 * Is a FIT msg in progress? If it is empty put the buffer back
818 * on the free list. If it is non-empty send what we got.
819 * This minimizes latency when there are fewer requests than
820 * what fits in a FIT msg.
821 */
822 if (skmsg != NULL) {
823 /* Bigger than just a FIT msg header? */
824 if (skmsg->length > sizeof(struct fit_msg_hdr)) {
825 pr_debug("%s:%s:%d sending msg=%p, len %d\n",
826 skdev->name, __func__, __LINE__,
827 skmsg, skmsg->length);
828 skd_send_fitmsg(skdev, skmsg);
829 } else {
830 /*
831 * The FIT msg is empty. It means we got started
832 * on the msg, but the requests were rejected.
833 */
834 skmsg->state = SKD_MSG_STATE_IDLE;
835 skmsg->id += SKD_ID_INCR;
836 skmsg->next = skdev->skmsg_free_list;
837 skdev->skmsg_free_list = skmsg;
838 }
839 skmsg = NULL;
840 fmh = NULL;
841 }
842
843 /*
844 * If req is non-NULL it means there is something to do but
845 * we are out of a resource.
846 */
847 if (req)
848 blk_stop_queue(skdev->queue);
849}
850
851static void skd_end_request(struct skd_device *skdev,
852 struct skd_request_context *skreq, int error)
853{
854 struct request *req = skreq->req;
855 unsigned int io_flags = req->cmd_flags;
856
857 if ((io_flags & REQ_DISCARD) &&
858 (skreq->discard_page == 1)) {
859 pr_debug("%s:%s:%d, free the page!",
860 skdev->name, __func__, __LINE__);
861 free_page((unsigned long)req->buffer);
862 req->buffer = NULL;
863 }
864
865 if (unlikely(error)) {
866 struct request *req = skreq->req;
867 char *cmd = (rq_data_dir(req) == READ) ? "read" : "write";
868 u32 lba = (u32)blk_rq_pos(req);
869 u32 count = blk_rq_sectors(req);
870
871 pr_err("(%s): Error cmd=%s sect=%u count=%u id=0x%x\n",
872 skd_name(skdev), cmd, lba, count, skreq->id);
873 } else
874 pr_debug("%s:%s:%d id=0x%x error=%d\n",
875 skdev->name, __func__, __LINE__, skreq->id, error);
876
877 __blk_end_request_all(skreq->req, error);
878}
879
880static int skd_preop_sg_list(struct skd_device *skdev,
881 struct skd_request_context *skreq)
882{
883 struct request *req = skreq->req;
884 int writing = skreq->sg_data_dir == SKD_DATA_DIR_HOST_TO_CARD;
885 int pci_dir = writing ? PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE;
886 struct scatterlist *sg = &skreq->sg[0];
887 int n_sg;
888 int i;
889
890 skreq->sg_byte_count = 0;
891
892 /* SKD_ASSERT(skreq->sg_data_dir == SKD_DATA_DIR_HOST_TO_CARD ||
893 skreq->sg_data_dir == SKD_DATA_DIR_CARD_TO_HOST); */
894
895 n_sg = blk_rq_map_sg(skdev->queue, req, sg);
896 if (n_sg <= 0)
897 return -EINVAL;
898
899 /*
900 * Map scatterlist to PCI bus addresses.
901 * Note PCI might change the number of entries.
902 */
903 n_sg = pci_map_sg(skdev->pdev, sg, n_sg, pci_dir);
904 if (n_sg <= 0)
905 return -EINVAL;
906
907 SKD_ASSERT(n_sg <= skdev->sgs_per_request);
908
909 skreq->n_sg = n_sg;
910
911 for (i = 0; i < n_sg; i++) {
912 struct fit_sg_descriptor *sgd = &skreq->sksg_list[i];
913 u32 cnt = sg_dma_len(&sg[i]);
914 uint64_t dma_addr = sg_dma_address(&sg[i]);
915
916 sgd->control = FIT_SGD_CONTROL_NOT_LAST;
917 sgd->byte_count = cnt;
918 skreq->sg_byte_count += cnt;
919 sgd->host_side_addr = dma_addr;
920 sgd->dev_side_addr = 0;
921 }
922
923 skreq->sksg_list[n_sg - 1].next_desc_ptr = 0LL;
924 skreq->sksg_list[n_sg - 1].control = FIT_SGD_CONTROL_LAST;
925
926 if (unlikely(skdev->dbg_level > 1)) {
927 pr_debug("%s:%s:%d skreq=%x sksg_list=%p sksg_dma=%llx\n",
928 skdev->name, __func__, __LINE__,
929 skreq->id, skreq->sksg_list, skreq->sksg_dma_address);
930 for (i = 0; i < n_sg; i++) {
931 struct fit_sg_descriptor *sgd = &skreq->sksg_list[i];
932 pr_debug("%s:%s:%d sg[%d] count=%u ctrl=0x%x "
933 "addr=0x%llx next=0x%llx\n",
934 skdev->name, __func__, __LINE__,
935 i, sgd->byte_count, sgd->control,
936 sgd->host_side_addr, sgd->next_desc_ptr);
937 }
938 }
939
940 return 0;
941}
942
943static void skd_postop_sg_list(struct skd_device *skdev,
944 struct skd_request_context *skreq)
945{
946 int writing = skreq->sg_data_dir == SKD_DATA_DIR_HOST_TO_CARD;
947 int pci_dir = writing ? PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE;
948
949 /*
950 * restore the next ptr for next IO request so we
951 * don't have to set it every time.
952 */
953 skreq->sksg_list[skreq->n_sg - 1].next_desc_ptr =
954 skreq->sksg_dma_address +
955 ((skreq->n_sg) * sizeof(struct fit_sg_descriptor));
956 pci_unmap_sg(skdev->pdev, &skreq->sg[0], skreq->n_sg, pci_dir);
957}
958
959static void skd_request_fn_not_online(struct request_queue *q)
960{
961 struct skd_device *skdev = q->queuedata;
962 int error;
963
964 SKD_ASSERT(skdev->state != SKD_DRVR_STATE_ONLINE);
965
966 skd_log_skdev(skdev, "req_not_online");
967 switch (skdev->state) {
968 case SKD_DRVR_STATE_PAUSING:
969 case SKD_DRVR_STATE_PAUSED:
970 case SKD_DRVR_STATE_STARTING:
971 case SKD_DRVR_STATE_RESTARTING:
972 case SKD_DRVR_STATE_WAIT_BOOT:
973 /* In case of starting, we haven't started the queue,
974 * so we can't get here... but requests are
975 * possibly hanging out waiting for us because we
976 * reported the dev/skd0 already. They'll wait
977 * forever if connect doesn't complete.
978 * What to do??? delay dev/skd0 ??
979 */
980 case SKD_DRVR_STATE_BUSY:
981 case SKD_DRVR_STATE_BUSY_IMMINENT:
982 case SKD_DRVR_STATE_BUSY_ERASE:
983 case SKD_DRVR_STATE_DRAINING_TIMEOUT:
984 return;
985
986 case SKD_DRVR_STATE_BUSY_SANITIZE:
987 case SKD_DRVR_STATE_STOPPING:
988 case SKD_DRVR_STATE_SYNCING:
989 case SKD_DRVR_STATE_FAULT:
990 case SKD_DRVR_STATE_DISAPPEARED:
991 default:
992 error = -EIO;
993 break;
994 }
995
996 /* If we get here, terminate all pending block requeusts
997 * with EIO and any scsi pass thru with appropriate sense
998 */
999
1000 skd_fail_all_pending(skdev);
1001}
1002
1003/*
1004 *****************************************************************************
1005 * TIMER
1006 *****************************************************************************
1007 */
1008
1009static void skd_timer_tick_not_online(struct skd_device *skdev);
1010
1011static void skd_timer_tick(ulong arg)
1012{
1013 struct skd_device *skdev = (struct skd_device *)arg;
1014
1015 u32 timo_slot;
1016 u32 overdue_timestamp;
1017 unsigned long reqflags;
1018 u32 state;
1019
1020 if (skdev->state == SKD_DRVR_STATE_FAULT)
1021 /* The driver has declared fault, and we want it to
1022 * stay that way until driver is reloaded.
1023 */
1024 return;
1025
1026 spin_lock_irqsave(&skdev->lock, reqflags);
1027
1028 state = SKD_READL(skdev, FIT_STATUS);
1029 state &= FIT_SR_DRIVE_STATE_MASK;
1030 if (state != skdev->drive_state)
1031 skd_isr_fwstate(skdev);
1032
1033 if (skdev->state != SKD_DRVR_STATE_ONLINE) {
1034 skd_timer_tick_not_online(skdev);
1035 goto timer_func_out;
1036 }
1037 skdev->timeout_stamp++;
1038 timo_slot = skdev->timeout_stamp & SKD_TIMEOUT_SLOT_MASK;
1039
1040 /*
1041 * All requests that happened during the previous use of
1042 * this slot should be done by now. The previous use was
1043 * over 7 seconds ago.
1044 */
1045 if (skdev->timeout_slot[timo_slot] == 0)
1046 goto timer_func_out;
1047
1048 /* Something is overdue */
1049 overdue_timestamp = skdev->timeout_stamp - SKD_N_TIMEOUT_SLOT;
1050
1051 pr_debug("%s:%s:%d found %d timeouts, draining busy=%d\n",
1052 skdev->name, __func__, __LINE__,
1053 skdev->timeout_slot[timo_slot], skdev->in_flight);
1054 pr_err("(%s): Overdue IOs (%d), busy %d\n",
1055 skd_name(skdev), skdev->timeout_slot[timo_slot],
1056 skdev->in_flight);
1057
1058 skdev->timer_countdown = SKD_DRAINING_TIMO;
1059 skdev->state = SKD_DRVR_STATE_DRAINING_TIMEOUT;
1060 skdev->timo_slot = timo_slot;
1061 blk_stop_queue(skdev->queue);
1062
1063timer_func_out:
1064 mod_timer(&skdev->timer, (jiffies + HZ));
1065
1066 spin_unlock_irqrestore(&skdev->lock, reqflags);
1067}
1068
1069static void skd_timer_tick_not_online(struct skd_device *skdev)
1070{
1071 switch (skdev->state) {
1072 case SKD_DRVR_STATE_IDLE:
1073 case SKD_DRVR_STATE_LOAD:
1074 break;
1075 case SKD_DRVR_STATE_BUSY_SANITIZE:
1076 pr_debug("%s:%s:%d drive busy sanitize[%x], driver[%x]\n",
1077 skdev->name, __func__, __LINE__,
1078 skdev->drive_state, skdev->state);
1079 /* If we've been in sanitize for 3 seconds, we figure we're not
1080 * going to get anymore completions, so recover requests now
1081 */
1082 if (skdev->timer_countdown > 0) {
1083 skdev->timer_countdown--;
1084 return;
1085 }
1086 skd_recover_requests(skdev, 0);
1087 break;
1088
1089 case SKD_DRVR_STATE_BUSY:
1090 case SKD_DRVR_STATE_BUSY_IMMINENT:
1091 case SKD_DRVR_STATE_BUSY_ERASE:
1092 pr_debug("%s:%s:%d busy[%x], countdown=%d\n",
1093 skdev->name, __func__, __LINE__,
1094 skdev->state, skdev->timer_countdown);
1095 if (skdev->timer_countdown > 0) {
1096 skdev->timer_countdown--;
1097 return;
1098 }
1099 pr_debug("%s:%s:%d busy[%x], timedout=%d, restarting device.",
1100 skdev->name, __func__, __LINE__,
1101 skdev->state, skdev->timer_countdown);
1102 skd_restart_device(skdev);
1103 break;
1104
1105 case SKD_DRVR_STATE_WAIT_BOOT:
1106 case SKD_DRVR_STATE_STARTING:
1107 if (skdev->timer_countdown > 0) {
1108 skdev->timer_countdown--;
1109 return;
1110 }
1111 /* For now, we fault the drive. Could attempt resets to
1112 * revcover at some point. */
1113 skdev->state = SKD_DRVR_STATE_FAULT;
1114
1115 pr_err("(%s): DriveFault Connect Timeout (%x)\n",
1116 skd_name(skdev), skdev->drive_state);
1117
1118 /*start the queue so we can respond with error to requests */
1119 /* wakeup anyone waiting for startup complete */
1120 blk_start_queue(skdev->queue);
1121 skdev->gendisk_on = -1;
1122 wake_up_interruptible(&skdev->waitq);
1123 break;
1124
1125 case SKD_DRVR_STATE_ONLINE:
1126 /* shouldn't get here. */
1127 break;
1128
1129 case SKD_DRVR_STATE_PAUSING:
1130 case SKD_DRVR_STATE_PAUSED:
1131 break;
1132
1133 case SKD_DRVR_STATE_DRAINING_TIMEOUT:
1134 pr_debug("%s:%s:%d "
1135 "draining busy [%d] tick[%d] qdb[%d] tmls[%d]\n",
1136 skdev->name, __func__, __LINE__,
1137 skdev->timo_slot,
1138 skdev->timer_countdown,
1139 skdev->in_flight,
1140 skdev->timeout_slot[skdev->timo_slot]);
1141 /* if the slot has cleared we can let the I/O continue */
1142 if (skdev->timeout_slot[skdev->timo_slot] == 0) {
1143 pr_debug("%s:%s:%d Slot drained, starting queue.\n",
1144 skdev->name, __func__, __LINE__);
1145 skdev->state = SKD_DRVR_STATE_ONLINE;
1146 blk_start_queue(skdev->queue);
1147 return;
1148 }
1149 if (skdev->timer_countdown > 0) {
1150 skdev->timer_countdown--;
1151 return;
1152 }
1153 skd_restart_device(skdev);
1154 break;
1155
1156 case SKD_DRVR_STATE_RESTARTING:
1157 if (skdev->timer_countdown > 0) {
1158 skdev->timer_countdown--;
1159 return;
1160 }
1161 /* For now, we fault the drive. Could attempt resets to
1162 * revcover at some point. */
1163 skdev->state = SKD_DRVR_STATE_FAULT;
1164 pr_err("(%s): DriveFault Reconnect Timeout (%x)\n",
1165 skd_name(skdev), skdev->drive_state);
1166
1167 /*
1168 * Recovering does two things:
1169 * 1. completes IO with error
1170 * 2. reclaims dma resources
1171 * When is it safe to recover requests?
1172 * - if the drive state is faulted
1173 * - if the state is still soft reset after out timeout
1174 * - if the drive registers are dead (state = FF)
1175 * If it is "unsafe", we still need to recover, so we will
1176 * disable pci bus mastering and disable our interrupts.
1177 */
1178
1179 if ((skdev->drive_state == FIT_SR_DRIVE_SOFT_RESET) ||
1180 (skdev->drive_state == FIT_SR_DRIVE_FAULT) ||
1181 (skdev->drive_state == FIT_SR_DRIVE_STATE_MASK))
1182 /* It never came out of soft reset. Try to
1183 * recover the requests and then let them
1184 * fail. This is to mitigate hung processes. */
1185 skd_recover_requests(skdev, 0);
1186 else {
1187 pr_err("(%s): Disable BusMaster (%x)\n",
1188 skd_name(skdev), skdev->drive_state);
1189 pci_disable_device(skdev->pdev);
1190 skd_disable_interrupts(skdev);
1191 skd_recover_requests(skdev, 0);
1192 }
1193
1194 /*start the queue so we can respond with error to requests */
1195 /* wakeup anyone waiting for startup complete */
1196 blk_start_queue(skdev->queue);
1197 skdev->gendisk_on = -1;
1198 wake_up_interruptible(&skdev->waitq);
1199 break;
1200
1201 case SKD_DRVR_STATE_RESUMING:
1202 case SKD_DRVR_STATE_STOPPING:
1203 case SKD_DRVR_STATE_SYNCING:
1204 case SKD_DRVR_STATE_FAULT:
1205 case SKD_DRVR_STATE_DISAPPEARED:
1206 default:
1207 break;
1208 }
1209}
1210
1211static int skd_start_timer(struct skd_device *skdev)
1212{
1213 int rc;
1214
1215 init_timer(&skdev->timer);
1216 setup_timer(&skdev->timer, skd_timer_tick, (ulong)skdev);
1217
1218 rc = mod_timer(&skdev->timer, (jiffies + HZ));
1219 if (rc)
1220 pr_err("%s: failed to start timer %d\n",
1221 __func__, rc);
1222 return rc;
1223}
1224
1225static void skd_kill_timer(struct skd_device *skdev)
1226{
1227 del_timer_sync(&skdev->timer);
1228}
1229
1230/*
1231 *****************************************************************************
1232 * IOCTL
1233 *****************************************************************************
1234 */
1235static int skd_ioctl_sg_io(struct skd_device *skdev,
1236 fmode_t mode, void __user *argp);
1237static int skd_sg_io_get_and_check_args(struct skd_device *skdev,
1238 struct skd_sg_io *sksgio);
1239static int skd_sg_io_obtain_skspcl(struct skd_device *skdev,
1240 struct skd_sg_io *sksgio);
1241static int skd_sg_io_prep_buffering(struct skd_device *skdev,
1242 struct skd_sg_io *sksgio);
1243static int skd_sg_io_copy_buffer(struct skd_device *skdev,
1244 struct skd_sg_io *sksgio, int dxfer_dir);
1245static int skd_sg_io_send_fitmsg(struct skd_device *skdev,
1246 struct skd_sg_io *sksgio);
1247static int skd_sg_io_await(struct skd_device *skdev, struct skd_sg_io *sksgio);
1248static int skd_sg_io_release_skspcl(struct skd_device *skdev,
1249 struct skd_sg_io *sksgio);
1250static int skd_sg_io_put_status(struct skd_device *skdev,
1251 struct skd_sg_io *sksgio);
1252
1253static void skd_complete_special(struct skd_device *skdev,
1254 volatile struct fit_completion_entry_v1
1255 *skcomp,
1256 volatile struct fit_comp_error_info *skerr,
1257 struct skd_special_context *skspcl);
1258
1259static int skd_bdev_ioctl(struct block_device *bdev, fmode_t mode,
1260 uint cmd_in, ulong arg)
1261{
1262 int rc = 0;
1263 struct gendisk *disk = bdev->bd_disk;
1264 struct skd_device *skdev = disk->private_data;
1265 void __user *p = (void *)arg;
1266
1267 pr_debug("%s:%s:%d %s: CMD[%s] ioctl mode 0x%x, cmd 0x%x arg %0lx\n",
1268 skdev->name, __func__, __LINE__,
1269 disk->disk_name, current->comm, mode, cmd_in, arg);
1270
1271 if (!capable(CAP_SYS_ADMIN))
1272 return -EPERM;
1273
1274 switch (cmd_in) {
1275 case SG_SET_TIMEOUT:
1276 case SG_GET_TIMEOUT:
1277 case SG_GET_VERSION_NUM:
1278 rc = scsi_cmd_ioctl(disk->queue, disk, mode, cmd_in, p);
1279 break;
1280 case SG_IO:
1281 rc = skd_ioctl_sg_io(skdev, mode, p);
1282 break;
1283
1284 default:
1285 rc = -ENOTTY;
1286 break;
1287 }
1288
1289 pr_debug("%s:%s:%d %s: completion rc %d\n",
1290 skdev->name, __func__, __LINE__, disk->disk_name, rc);
1291 return rc;
1292}
1293
1294static int skd_ioctl_sg_io(struct skd_device *skdev, fmode_t mode,
1295 void __user *argp)
1296{
1297 int rc;
1298 struct skd_sg_io sksgio;
1299
1300 memset(&sksgio, 0, sizeof(sksgio));
1301 sksgio.mode = mode;
1302 sksgio.argp = argp;
1303 sksgio.iov = &sksgio.no_iov_iov;
1304
1305 switch (skdev->state) {
1306 case SKD_DRVR_STATE_ONLINE:
1307 case SKD_DRVR_STATE_BUSY_IMMINENT:
1308 break;
1309
1310 default:
1311 pr_debug("%s:%s:%d drive not online\n",
1312 skdev->name, __func__, __LINE__);
1313 rc = -ENXIO;
1314 goto out;
1315 }
1316
1317 rc = skd_sg_io_get_and_check_args(skdev, &sksgio);
1318 if (rc)
1319 goto out;
1320
1321 rc = skd_sg_io_obtain_skspcl(skdev, &sksgio);
1322 if (rc)
1323 goto out;
1324
1325 rc = skd_sg_io_prep_buffering(skdev, &sksgio);
1326 if (rc)
1327 goto out;
1328
1329 rc = skd_sg_io_copy_buffer(skdev, &sksgio, SG_DXFER_TO_DEV);
1330 if (rc)
1331 goto out;
1332
1333 rc = skd_sg_io_send_fitmsg(skdev, &sksgio);
1334 if (rc)
1335 goto out;
1336
1337 rc = skd_sg_io_await(skdev, &sksgio);
1338 if (rc)
1339 goto out;
1340
1341 rc = skd_sg_io_copy_buffer(skdev, &sksgio, SG_DXFER_FROM_DEV);
1342 if (rc)
1343 goto out;
1344
1345 rc = skd_sg_io_put_status(skdev, &sksgio);
1346 if (rc)
1347 goto out;
1348
1349 rc = 0;
1350
1351out:
1352 skd_sg_io_release_skspcl(skdev, &sksgio);
1353
1354 if (sksgio.iov != NULL && sksgio.iov != &sksgio.no_iov_iov)
1355 kfree(sksgio.iov);
1356 return rc;
1357}
1358
1359static int skd_sg_io_get_and_check_args(struct skd_device *skdev,
1360 struct skd_sg_io *sksgio)
1361{
1362 struct sg_io_hdr *sgp = &sksgio->sg;
1363 int i, acc;
1364
1365 if (!access_ok(VERIFY_WRITE, sksgio->argp, sizeof(sg_io_hdr_t))) {
1366 pr_debug("%s:%s:%d access sg failed %p\n",
1367 skdev->name, __func__, __LINE__, sksgio->argp);
1368 return -EFAULT;
1369 }
1370
1371 if (__copy_from_user(sgp, sksgio->argp, sizeof(sg_io_hdr_t))) {
1372 pr_debug("%s:%s:%d copy_from_user sg failed %p\n",
1373 skdev->name, __func__, __LINE__, sksgio->argp);
1374 return -EFAULT;
1375 }
1376
1377 if (sgp->interface_id != SG_INTERFACE_ID_ORIG) {
1378 pr_debug("%s:%s:%d interface_id invalid 0x%x\n",
1379 skdev->name, __func__, __LINE__, sgp->interface_id);
1380 return -EINVAL;
1381 }
1382
1383 if (sgp->cmd_len > sizeof(sksgio->cdb)) {
1384 pr_debug("%s:%s:%d cmd_len invalid %d\n",
1385 skdev->name, __func__, __LINE__, sgp->cmd_len);
1386 return -EINVAL;
1387 }
1388
1389 if (sgp->iovec_count > 256) {
1390 pr_debug("%s:%s:%d iovec_count invalid %d\n",
1391 skdev->name, __func__, __LINE__, sgp->iovec_count);
1392 return -EINVAL;
1393 }
1394
1395 if (sgp->dxfer_len > (PAGE_SIZE * SKD_N_SG_PER_SPECIAL)) {
1396 pr_debug("%s:%s:%d dxfer_len invalid %d\n",
1397 skdev->name, __func__, __LINE__, sgp->dxfer_len);
1398 return -EINVAL;
1399 }
1400
1401 switch (sgp->dxfer_direction) {
1402 case SG_DXFER_NONE:
1403 acc = -1;
1404 break;
1405
1406 case SG_DXFER_TO_DEV:
1407 acc = VERIFY_READ;
1408 break;
1409
1410 case SG_DXFER_FROM_DEV:
1411 case SG_DXFER_TO_FROM_DEV:
1412 acc = VERIFY_WRITE;
1413 break;
1414
1415 default:
1416 pr_debug("%s:%s:%d dxfer_dir invalid %d\n",
1417 skdev->name, __func__, __LINE__, sgp->dxfer_direction);
1418 return -EINVAL;
1419 }
1420
1421 if (copy_from_user(sksgio->cdb, sgp->cmdp, sgp->cmd_len)) {
1422 pr_debug("%s:%s:%d copy_from_user cmdp failed %p\n",
1423 skdev->name, __func__, __LINE__, sgp->cmdp);
1424 return -EFAULT;
1425 }
1426
1427 if (sgp->mx_sb_len != 0) {
1428 if (!access_ok(VERIFY_WRITE, sgp->sbp, sgp->mx_sb_len)) {
1429 pr_debug("%s:%s:%d access sbp failed %p\n",
1430 skdev->name, __func__, __LINE__, sgp->sbp);
1431 return -EFAULT;
1432 }
1433 }
1434
1435 if (sgp->iovec_count == 0) {
1436 sksgio->iov[0].iov_base = sgp->dxferp;
1437 sksgio->iov[0].iov_len = sgp->dxfer_len;
1438 sksgio->iovcnt = 1;
1439 sksgio->dxfer_len = sgp->dxfer_len;
1440 } else {
1441 struct sg_iovec *iov;
1442 uint nbytes = sizeof(*iov) * sgp->iovec_count;
1443 size_t iov_data_len;
1444
1445 iov = kmalloc(nbytes, GFP_KERNEL);
1446 if (iov == NULL) {
1447 pr_debug("%s:%s:%d alloc iovec failed %d\n",
1448 skdev->name, __func__, __LINE__,
1449 sgp->iovec_count);
1450 return -ENOMEM;
1451 }
1452 sksgio->iov = iov;
1453 sksgio->iovcnt = sgp->iovec_count;
1454
1455 if (copy_from_user(iov, sgp->dxferp, nbytes)) {
1456 pr_debug("%s:%s:%d copy_from_user iovec failed %p\n",
1457 skdev->name, __func__, __LINE__, sgp->dxferp);
1458 return -EFAULT;
1459 }
1460
1461 /*
1462 * Sum up the vecs, making sure they don't overflow
1463 */
1464 iov_data_len = 0;
1465 for (i = 0; i < sgp->iovec_count; i++) {
1466 if (iov_data_len + iov[i].iov_len < iov_data_len)
1467 return -EINVAL;
1468 iov_data_len += iov[i].iov_len;
1469 }
1470
1471 /* SG_IO howto says that the shorter of the two wins */
1472 if (sgp->dxfer_len < iov_data_len) {
1473 sksgio->iovcnt = iov_shorten((struct iovec *)iov,
1474 sgp->iovec_count,
1475 sgp->dxfer_len);
1476 sksgio->dxfer_len = sgp->dxfer_len;
1477 } else
1478 sksgio->dxfer_len = iov_data_len;
1479 }
1480
1481 if (sgp->dxfer_direction != SG_DXFER_NONE) {
1482 struct sg_iovec *iov = sksgio->iov;
1483 for (i = 0; i < sksgio->iovcnt; i++, iov++) {
1484 if (!access_ok(acc, iov->iov_base, iov->iov_len)) {
1485 pr_debug("%s:%s:%d access data failed %p/%d\n",
1486 skdev->name, __func__, __LINE__,
1487 iov->iov_base, (int)iov->iov_len);
1488 return -EFAULT;
1489 }
1490 }
1491 }
1492
1493 return 0;
1494}
1495
1496static int skd_sg_io_obtain_skspcl(struct skd_device *skdev,
1497 struct skd_sg_io *sksgio)
1498{
1499 struct skd_special_context *skspcl = NULL;
1500 int rc;
1501
1502 for (;;) {
1503 ulong flags;
1504
1505 spin_lock_irqsave(&skdev->lock, flags);
1506 skspcl = skdev->skspcl_free_list;
1507 if (skspcl != NULL) {
1508 skdev->skspcl_free_list =
1509 (struct skd_special_context *)skspcl->req.next;
1510 skspcl->req.id += SKD_ID_INCR;
1511 skspcl->req.state = SKD_REQ_STATE_SETUP;
1512 skspcl->orphaned = 0;
1513 skspcl->req.n_sg = 0;
1514 }
1515 spin_unlock_irqrestore(&skdev->lock, flags);
1516
1517 if (skspcl != NULL) {
1518 rc = 0;
1519 break;
1520 }
1521
1522 pr_debug("%s:%s:%d blocking\n",
1523 skdev->name, __func__, __LINE__);
1524
1525 rc = wait_event_interruptible_timeout(
1526 skdev->waitq,
1527 (skdev->skspcl_free_list != NULL),
1528 msecs_to_jiffies(sksgio->sg.timeout));
1529
1530 pr_debug("%s:%s:%d unblocking, rc=%d\n",
1531 skdev->name, __func__, __LINE__, rc);
1532
1533 if (rc <= 0) {
1534 if (rc == 0)
1535 rc = -ETIMEDOUT;
1536 else
1537 rc = -EINTR;
1538 break;
1539 }
1540 /*
1541 * If we get here rc > 0 meaning the timeout to
1542 * wait_event_interruptible_timeout() had time left, hence the
1543 * sought event -- non-empty free list -- happened.
1544 * Retry the allocation.
1545 */
1546 }
1547 sksgio->skspcl = skspcl;
1548
1549 return rc;
1550}
1551
1552static int skd_skreq_prep_buffering(struct skd_device *skdev,
1553 struct skd_request_context *skreq,
1554 u32 dxfer_len)
1555{
1556 u32 resid = dxfer_len;
1557
1558 /*
1559 * The DMA engine must have aligned addresses and byte counts.
1560 */
1561 resid += (-resid) & 3;
1562 skreq->sg_byte_count = resid;
1563
1564 skreq->n_sg = 0;
1565
1566 while (resid > 0) {
1567 u32 nbytes = PAGE_SIZE;
1568 u32 ix = skreq->n_sg;
1569 struct scatterlist *sg = &skreq->sg[ix];
1570 struct fit_sg_descriptor *sksg = &skreq->sksg_list[ix];
1571 struct page *page;
1572
1573 if (nbytes > resid)
1574 nbytes = resid;
1575
1576 page = alloc_page(GFP_KERNEL);
1577 if (page == NULL)
1578 return -ENOMEM;
1579
1580 sg_set_page(sg, page, nbytes, 0);
1581
1582 /* TODO: This should be going through a pci_???()
1583 * routine to do proper mapping. */
1584 sksg->control = FIT_SGD_CONTROL_NOT_LAST;
1585 sksg->byte_count = nbytes;
1586
1587 sksg->host_side_addr = sg_phys(sg);
1588
1589 sksg->dev_side_addr = 0;
1590 sksg->next_desc_ptr = skreq->sksg_dma_address +
1591 (ix + 1) * sizeof(*sksg);
1592
1593 skreq->n_sg++;
1594 resid -= nbytes;
1595 }
1596
1597 if (skreq->n_sg > 0) {
1598 u32 ix = skreq->n_sg - 1;
1599 struct fit_sg_descriptor *sksg = &skreq->sksg_list[ix];
1600
1601 sksg->control = FIT_SGD_CONTROL_LAST;
1602 sksg->next_desc_ptr = 0;
1603 }
1604
1605 if (unlikely(skdev->dbg_level > 1)) {
1606 u32 i;
1607
1608 pr_debug("%s:%s:%d skreq=%x sksg_list=%p sksg_dma=%llx\n",
1609 skdev->name, __func__, __LINE__,
1610 skreq->id, skreq->sksg_list, skreq->sksg_dma_address);
1611 for (i = 0; i < skreq->n_sg; i++) {
1612 struct fit_sg_descriptor *sgd = &skreq->sksg_list[i];
1613
1614 pr_debug("%s:%s:%d sg[%d] count=%u ctrl=0x%x "
1615 "addr=0x%llx next=0x%llx\n",
1616 skdev->name, __func__, __LINE__,
1617 i, sgd->byte_count, sgd->control,
1618 sgd->host_side_addr, sgd->next_desc_ptr);
1619 }
1620 }
1621
1622 return 0;
1623}
1624
1625static int skd_sg_io_prep_buffering(struct skd_device *skdev,
1626 struct skd_sg_io *sksgio)
1627{
1628 struct skd_special_context *skspcl = sksgio->skspcl;
1629 struct skd_request_context *skreq = &skspcl->req;
1630 u32 dxfer_len = sksgio->dxfer_len;
1631 int rc;
1632
1633 rc = skd_skreq_prep_buffering(skdev, skreq, dxfer_len);
1634 /*
1635 * Eventually, errors or not, skd_release_special() is called
1636 * to recover allocations including partial allocations.
1637 */
1638 return rc;
1639}
1640
1641static int skd_sg_io_copy_buffer(struct skd_device *skdev,
1642 struct skd_sg_io *sksgio, int dxfer_dir)
1643{
1644 struct skd_special_context *skspcl = sksgio->skspcl;
1645 u32 iov_ix = 0;
1646 struct sg_iovec curiov;
1647 u32 sksg_ix = 0;
1648 u8 *bufp = NULL;
1649 u32 buf_len = 0;
1650 u32 resid = sksgio->dxfer_len;
1651 int rc;
1652
1653 curiov.iov_len = 0;
1654 curiov.iov_base = NULL;
1655
1656 if (dxfer_dir != sksgio->sg.dxfer_direction) {
1657 if (dxfer_dir != SG_DXFER_TO_DEV ||
1658 sksgio->sg.dxfer_direction != SG_DXFER_TO_FROM_DEV)
1659 return 0;
1660 }
1661
1662 while (resid > 0) {
1663 u32 nbytes = PAGE_SIZE;
1664
1665 if (curiov.iov_len == 0) {
1666 curiov = sksgio->iov[iov_ix++];
1667 continue;
1668 }
1669
1670 if (buf_len == 0) {
1671 struct page *page;
1672 page = sg_page(&skspcl->req.sg[sksg_ix++]);
1673 bufp = page_address(page);
1674 buf_len = PAGE_SIZE;
1675 }
1676
1677 nbytes = min_t(u32, nbytes, resid);
1678 nbytes = min_t(u32, nbytes, curiov.iov_len);
1679 nbytes = min_t(u32, nbytes, buf_len);
1680
1681 if (dxfer_dir == SG_DXFER_TO_DEV)
1682 rc = __copy_from_user(bufp, curiov.iov_base, nbytes);
1683 else
1684 rc = __copy_to_user(curiov.iov_base, bufp, nbytes);
1685
1686 if (rc)
1687 return -EFAULT;
1688
1689 resid -= nbytes;
1690 curiov.iov_len -= nbytes;
1691 curiov.iov_base += nbytes;
1692 buf_len -= nbytes;
1693 }
1694
1695 return 0;
1696}
1697
1698static int skd_sg_io_send_fitmsg(struct skd_device *skdev,
1699 struct skd_sg_io *sksgio)
1700{
1701 struct skd_special_context *skspcl = sksgio->skspcl;
1702 struct fit_msg_hdr *fmh = (struct fit_msg_hdr *)skspcl->msg_buf;
1703 struct skd_scsi_request *scsi_req = (struct skd_scsi_request *)&fmh[1];
1704
1705 memset(skspcl->msg_buf, 0, SKD_N_SPECIAL_FITMSG_BYTES);
1706
1707 /* Initialize the FIT msg header */
1708 fmh->protocol_id = FIT_PROTOCOL_ID_SOFIT;
1709 fmh->num_protocol_cmds_coalesced = 1;
1710
1711 /* Initialize the SCSI request */
1712 if (sksgio->sg.dxfer_direction != SG_DXFER_NONE)
1713 scsi_req->hdr.sg_list_dma_address =
1714 cpu_to_be64(skspcl->req.sksg_dma_address);
1715 scsi_req->hdr.tag = skspcl->req.id;
1716 scsi_req->hdr.sg_list_len_bytes =
1717 cpu_to_be32(skspcl->req.sg_byte_count);
1718 memcpy(scsi_req->cdb, sksgio->cdb, sizeof(scsi_req->cdb));
1719
1720 skspcl->req.state = SKD_REQ_STATE_BUSY;
1721 skd_send_special_fitmsg(skdev, skspcl);
1722
1723 return 0;
1724}
1725
1726static int skd_sg_io_await(struct skd_device *skdev, struct skd_sg_io *sksgio)
1727{
1728 unsigned long flags;
1729 int rc;
1730
1731 rc = wait_event_interruptible_timeout(skdev->waitq,
1732 (sksgio->skspcl->req.state !=
1733 SKD_REQ_STATE_BUSY),
1734 msecs_to_jiffies(sksgio->sg.
1735 timeout));
1736
1737 spin_lock_irqsave(&skdev->lock, flags);
1738
1739 if (sksgio->skspcl->req.state == SKD_REQ_STATE_ABORTED) {
1740 pr_debug("%s:%s:%d skspcl %p aborted\n",
1741 skdev->name, __func__, __LINE__, sksgio->skspcl);
1742
1743 /* Build check cond, sense and let command finish. */
1744 /* For a timeout, we must fabricate completion and sense
1745 * data to complete the command */
1746 sksgio->skspcl->req.completion.status =
1747 SAM_STAT_CHECK_CONDITION;
1748
1749 memset(&sksgio->skspcl->req.err_info, 0,
1750 sizeof(sksgio->skspcl->req.err_info));
1751 sksgio->skspcl->req.err_info.type = 0x70;
1752 sksgio->skspcl->req.err_info.key = ABORTED_COMMAND;
1753 sksgio->skspcl->req.err_info.code = 0x44;
1754 sksgio->skspcl->req.err_info.qual = 0;
1755 rc = 0;
1756 } else if (sksgio->skspcl->req.state != SKD_REQ_STATE_BUSY)
1757 /* No longer on the adapter. We finish. */
1758 rc = 0;
1759 else {
1760 /* Something's gone wrong. Still busy. Timeout or
1761 * user interrupted (control-C). Mark as an orphan
1762 * so it will be disposed when completed. */
1763 sksgio->skspcl->orphaned = 1;
1764 sksgio->skspcl = NULL;
1765 if (rc == 0) {
1766 pr_debug("%s:%s:%d timed out %p (%u ms)\n",
1767 skdev->name, __func__, __LINE__,
1768 sksgio, sksgio->sg.timeout);
1769 rc = -ETIMEDOUT;
1770 } else {
1771 pr_debug("%s:%s:%d cntlc %p\n",
1772 skdev->name, __func__, __LINE__, sksgio);
1773 rc = -EINTR;
1774 }
1775 }
1776
1777 spin_unlock_irqrestore(&skdev->lock, flags);
1778
1779 return rc;
1780}
1781
1782static int skd_sg_io_put_status(struct skd_device *skdev,
1783 struct skd_sg_io *sksgio)
1784{
1785 struct sg_io_hdr *sgp = &sksgio->sg;
1786 struct skd_special_context *skspcl = sksgio->skspcl;
1787 int resid = 0;
1788
1789 u32 nb = be32_to_cpu(skspcl->req.completion.num_returned_bytes);
1790
1791 sgp->status = skspcl->req.completion.status;
1792 resid = sksgio->dxfer_len - nb;
1793
1794 sgp->masked_status = sgp->status & STATUS_MASK;
1795 sgp->msg_status = 0;
1796 sgp->host_status = 0;
1797 sgp->driver_status = 0;
1798 sgp->resid = resid;
1799 if (sgp->masked_status || sgp->host_status || sgp->driver_status)
1800 sgp->info |= SG_INFO_CHECK;
1801
1802 pr_debug("%s:%s:%d status %x masked %x resid 0x%x\n",
1803 skdev->name, __func__, __LINE__,
1804 sgp->status, sgp->masked_status, sgp->resid);
1805
1806 if (sgp->masked_status == SAM_STAT_CHECK_CONDITION) {
1807 if (sgp->mx_sb_len > 0) {
1808 struct fit_comp_error_info *ei = &skspcl->req.err_info;
1809 u32 nbytes = sizeof(*ei);
1810
1811 nbytes = min_t(u32, nbytes, sgp->mx_sb_len);
1812
1813 sgp->sb_len_wr = nbytes;
1814
1815 if (__copy_to_user(sgp->sbp, ei, nbytes)) {
1816 pr_debug("%s:%s:%d copy_to_user sense failed %p\n",
1817 skdev->name, __func__, __LINE__,
1818 sgp->sbp);
1819 return -EFAULT;
1820 }
1821 }
1822 }
1823
1824 if (__copy_to_user(sksgio->argp, sgp, sizeof(sg_io_hdr_t))) {
1825 pr_debug("%s:%s:%d copy_to_user sg failed %p\n",
1826 skdev->name, __func__, __LINE__, sksgio->argp);
1827 return -EFAULT;
1828 }
1829
1830 return 0;
1831}
1832
1833static int skd_sg_io_release_skspcl(struct skd_device *skdev,
1834 struct skd_sg_io *sksgio)
1835{
1836 struct skd_special_context *skspcl = sksgio->skspcl;
1837
1838 if (skspcl != NULL) {
1839 ulong flags;
1840
1841 sksgio->skspcl = NULL;
1842
1843 spin_lock_irqsave(&skdev->lock, flags);
1844 skd_release_special(skdev, skspcl);
1845 spin_unlock_irqrestore(&skdev->lock, flags);
1846 }
1847
1848 return 0;
1849}
1850
1851/*
1852 *****************************************************************************
1853 * INTERNAL REQUESTS -- generated by driver itself
1854 *****************************************************************************
1855 */
1856
1857static int skd_format_internal_skspcl(struct skd_device *skdev)
1858{
1859 struct skd_special_context *skspcl = &skdev->internal_skspcl;
1860 struct fit_sg_descriptor *sgd = &skspcl->req.sksg_list[0];
1861 struct fit_msg_hdr *fmh;
1862 uint64_t dma_address;
1863 struct skd_scsi_request *scsi;
1864
1865 fmh = (struct fit_msg_hdr *)&skspcl->msg_buf[0];
1866 fmh->protocol_id = FIT_PROTOCOL_ID_SOFIT;
1867 fmh->num_protocol_cmds_coalesced = 1;
1868
1869 scsi = (struct skd_scsi_request *)&skspcl->msg_buf[64];
1870 memset(scsi, 0, sizeof(*scsi));
1871 dma_address = skspcl->req.sksg_dma_address;
1872 scsi->hdr.sg_list_dma_address = cpu_to_be64(dma_address);
1873 sgd->control = FIT_SGD_CONTROL_LAST;
1874 sgd->byte_count = 0;
1875 sgd->host_side_addr = skspcl->db_dma_address;
1876 sgd->dev_side_addr = 0;
1877 sgd->next_desc_ptr = 0LL;
1878
1879 return 1;
1880}
1881
1882#define WR_BUF_SIZE SKD_N_INTERNAL_BYTES
1883
1884static void skd_send_internal_skspcl(struct skd_device *skdev,
1885 struct skd_special_context *skspcl,
1886 u8 opcode)
1887{
1888 struct fit_sg_descriptor *sgd = &skspcl->req.sksg_list[0];
1889 struct skd_scsi_request *scsi;
1890 unsigned char *buf = skspcl->data_buf;
1891 int i;
1892
1893 if (skspcl->req.state != SKD_REQ_STATE_IDLE)
1894 /*
1895 * A refresh is already in progress.
1896 * Just wait for it to finish.
1897 */
1898 return;
1899
1900 SKD_ASSERT((skspcl->req.id & SKD_ID_INCR) == 0);
1901 skspcl->req.state = SKD_REQ_STATE_BUSY;
1902 skspcl->req.id += SKD_ID_INCR;
1903
1904 scsi = (struct skd_scsi_request *)&skspcl->msg_buf[64];
1905 scsi->hdr.tag = skspcl->req.id;
1906
1907 memset(scsi->cdb, 0, sizeof(scsi->cdb));
1908
1909 switch (opcode) {
1910 case TEST_UNIT_READY:
1911 scsi->cdb[0] = TEST_UNIT_READY;
1912 sgd->byte_count = 0;
1913 scsi->hdr.sg_list_len_bytes = 0;
1914 break;
1915
1916 case READ_CAPACITY:
1917 scsi->cdb[0] = READ_CAPACITY;
1918 sgd->byte_count = SKD_N_READ_CAP_BYTES;
1919 scsi->hdr.sg_list_len_bytes = cpu_to_be32(sgd->byte_count);
1920 break;
1921
1922 case INQUIRY:
1923 scsi->cdb[0] = INQUIRY;
1924 scsi->cdb[1] = 0x01; /* evpd */
1925 scsi->cdb[2] = 0x80; /* serial number page */
1926 scsi->cdb[4] = 0x10;
1927 sgd->byte_count = 16;
1928 scsi->hdr.sg_list_len_bytes = cpu_to_be32(sgd->byte_count);
1929 break;
1930
1931 case SYNCHRONIZE_CACHE:
1932 scsi->cdb[0] = SYNCHRONIZE_CACHE;
1933 sgd->byte_count = 0;
1934 scsi->hdr.sg_list_len_bytes = 0;
1935 break;
1936
1937 case WRITE_BUFFER:
1938 scsi->cdb[0] = WRITE_BUFFER;
1939 scsi->cdb[1] = 0x02;
1940 scsi->cdb[7] = (WR_BUF_SIZE & 0xFF00) >> 8;
1941 scsi->cdb[8] = WR_BUF_SIZE & 0xFF;
1942 sgd->byte_count = WR_BUF_SIZE;
1943 scsi->hdr.sg_list_len_bytes = cpu_to_be32(sgd->byte_count);
1944 /* fill incrementing byte pattern */
1945 for (i = 0; i < sgd->byte_count; i++)
1946 buf[i] = i & 0xFF;
1947 break;
1948
1949 case READ_BUFFER:
1950 scsi->cdb[0] = READ_BUFFER;
1951 scsi->cdb[1] = 0x02;
1952 scsi->cdb[7] = (WR_BUF_SIZE & 0xFF00) >> 8;
1953 scsi->cdb[8] = WR_BUF_SIZE & 0xFF;
1954 sgd->byte_count = WR_BUF_SIZE;
1955 scsi->hdr.sg_list_len_bytes = cpu_to_be32(sgd->byte_count);
1956 memset(skspcl->data_buf, 0, sgd->byte_count);
1957 break;
1958
1959 default:
1960 SKD_ASSERT("Don't know what to send");
1961 return;
1962
1963 }
1964 skd_send_special_fitmsg(skdev, skspcl);
1965}
1966
1967static void skd_refresh_device_data(struct skd_device *skdev)
1968{
1969 struct skd_special_context *skspcl = &skdev->internal_skspcl;
1970
1971 skd_send_internal_skspcl(skdev, skspcl, TEST_UNIT_READY);
1972}
1973
1974static int skd_chk_read_buf(struct skd_device *skdev,
1975 struct skd_special_context *skspcl)
1976{
1977 unsigned char *buf = skspcl->data_buf;
1978 int i;
1979
1980 /* check for incrementing byte pattern */
1981 for (i = 0; i < WR_BUF_SIZE; i++)
1982 if (buf[i] != (i & 0xFF))
1983 return 1;
1984
1985 return 0;
1986}
1987
1988static void skd_log_check_status(struct skd_device *skdev, u8 status, u8 key,
1989 u8 code, u8 qual, u8 fruc)
1990{
1991 /* If the check condition is of special interest, log a message */
1992 if ((status == SAM_STAT_CHECK_CONDITION) && (key == 0x02)
1993 && (code == 0x04) && (qual == 0x06)) {
1994 pr_err("(%s): *** LOST_WRITE_DATA ERROR *** key/asc/"
1995 "ascq/fruc %02x/%02x/%02x/%02x\n",
1996 skd_name(skdev), key, code, qual, fruc);
1997 }
1998}
1999
2000static void skd_complete_internal(struct skd_device *skdev,
2001 volatile struct fit_completion_entry_v1
2002 *skcomp,
2003 volatile struct fit_comp_error_info *skerr,
2004 struct skd_special_context *skspcl)
2005{
2006 u8 *buf = skspcl->data_buf;
2007 u8 status;
2008 int i;
2009 struct skd_scsi_request *scsi =
2010 (struct skd_scsi_request *)&skspcl->msg_buf[64];
2011
2012 SKD_ASSERT(skspcl == &skdev->internal_skspcl);
2013
2014 pr_debug("%s:%s:%d complete internal %x\n",
2015 skdev->name, __func__, __LINE__, scsi->cdb[0]);
2016
2017 skspcl->req.completion = *skcomp;
2018 skspcl->req.state = SKD_REQ_STATE_IDLE;
2019 skspcl->req.id += SKD_ID_INCR;
2020
2021 status = skspcl->req.completion.status;
2022
2023 skd_log_check_status(skdev, status, skerr->key, skerr->code,
2024 skerr->qual, skerr->fruc);
2025
2026 switch (scsi->cdb[0]) {
2027 case TEST_UNIT_READY:
2028 if (status == SAM_STAT_GOOD)
2029 skd_send_internal_skspcl(skdev, skspcl, WRITE_BUFFER);
2030 else if ((status == SAM_STAT_CHECK_CONDITION) &&
2031 (skerr->key == MEDIUM_ERROR))
2032 skd_send_internal_skspcl(skdev, skspcl, WRITE_BUFFER);
2033 else {
2034 if (skdev->state == SKD_DRVR_STATE_STOPPING) {
2035 pr_debug("%s:%s:%d TUR failed, don't send anymore state 0x%x\n",
2036 skdev->name, __func__, __LINE__,
2037 skdev->state);
2038 return;
2039 }
2040 pr_debug("%s:%s:%d **** TUR failed, retry skerr\n",
2041 skdev->name, __func__, __LINE__);
2042 skd_send_internal_skspcl(skdev, skspcl, 0x00);
2043 }
2044 break;
2045
2046 case WRITE_BUFFER:
2047 if (status == SAM_STAT_GOOD)
2048 skd_send_internal_skspcl(skdev, skspcl, READ_BUFFER);
2049 else {
2050 if (skdev->state == SKD_DRVR_STATE_STOPPING) {
2051 pr_debug("%s:%s:%d write buffer failed, don't send anymore state 0x%x\n",
2052 skdev->name, __func__, __LINE__,
2053 skdev->state);
2054 return;
2055 }
2056 pr_debug("%s:%s:%d **** write buffer failed, retry skerr\n",
2057 skdev->name, __func__, __LINE__);
2058 skd_send_internal_skspcl(skdev, skspcl, 0x00);
2059 }
2060 break;
2061
2062 case READ_BUFFER:
2063 if (status == SAM_STAT_GOOD) {
2064 if (skd_chk_read_buf(skdev, skspcl) == 0)
2065 skd_send_internal_skspcl(skdev, skspcl,
2066 READ_CAPACITY);
2067 else {
2068 pr_err(
2069 "(%s):*** W/R Buffer mismatch %d ***\n",
2070 skd_name(skdev), skdev->connect_retries);
2071 if (skdev->connect_retries <
2072 SKD_MAX_CONNECT_RETRIES) {
2073 skdev->connect_retries++;
2074 skd_soft_reset(skdev);
2075 } else {
2076 pr_err(
2077 "(%s): W/R Buffer Connect Error\n",
2078 skd_name(skdev));
2079 return;
2080 }
2081 }
2082
2083 } else {
2084 if (skdev->state == SKD_DRVR_STATE_STOPPING) {
2085 pr_debug("%s:%s:%d "
2086 "read buffer failed, don't send anymore state 0x%x\n",
2087 skdev->name, __func__, __LINE__,
2088 skdev->state);
2089 return;
2090 }
2091 pr_debug("%s:%s:%d "
2092 "**** read buffer failed, retry skerr\n",
2093 skdev->name, __func__, __LINE__);
2094 skd_send_internal_skspcl(skdev, skspcl, 0x00);
2095 }
2096 break;
2097
2098 case READ_CAPACITY:
2099 skdev->read_cap_is_valid = 0;
2100 if (status == SAM_STAT_GOOD) {
2101 skdev->read_cap_last_lba =
2102 (buf[0] << 24) | (buf[1] << 16) |
2103 (buf[2] << 8) | buf[3];
2104 skdev->read_cap_blocksize =
2105 (buf[4] << 24) | (buf[5] << 16) |
2106 (buf[6] << 8) | buf[7];
2107
2108 pr_debug("%s:%s:%d last lba %d, bs %d\n",
2109 skdev->name, __func__, __LINE__,
2110 skdev->read_cap_last_lba,
2111 skdev->read_cap_blocksize);
2112
2113 set_capacity(skdev->disk, skdev->read_cap_last_lba + 1);
2114
2115 skdev->read_cap_is_valid = 1;
2116
2117 skd_send_internal_skspcl(skdev, skspcl, INQUIRY);
2118 } else if ((status == SAM_STAT_CHECK_CONDITION) &&
2119 (skerr->key == MEDIUM_ERROR)) {
2120 skdev->read_cap_last_lba = ~0;
2121 set_capacity(skdev->disk, skdev->read_cap_last_lba + 1);
2122 pr_debug("%s:%s:%d "
2123 "**** MEDIUM ERROR caused READCAP to fail, ignore failure and continue to inquiry\n",
2124 skdev->name, __func__, __LINE__);
2125 skd_send_internal_skspcl(skdev, skspcl, INQUIRY);
2126 } else {
2127 pr_debug("%s:%s:%d **** READCAP failed, retry TUR\n",
2128 skdev->name, __func__, __LINE__);
2129 skd_send_internal_skspcl(skdev, skspcl,
2130 TEST_UNIT_READY);
2131 }
2132 break;
2133
2134 case INQUIRY:
2135 skdev->inquiry_is_valid = 0;
2136 if (status == SAM_STAT_GOOD) {
2137 skdev->inquiry_is_valid = 1;
2138
2139 for (i = 0; i < 12; i++)
2140 skdev->inq_serial_num[i] = buf[i + 4];
2141 skdev->inq_serial_num[12] = 0;
2142 }
2143
2144 if (skd_unquiesce_dev(skdev) < 0)
2145 pr_debug("%s:%s:%d **** failed, to ONLINE device\n",
2146 skdev->name, __func__, __LINE__);
2147 /* connection is complete */
2148 skdev->connect_retries = 0;
2149 break;
2150
2151 case SYNCHRONIZE_CACHE:
2152 if (status == SAM_STAT_GOOD)
2153 skdev->sync_done = 1;
2154 else
2155 skdev->sync_done = -1;
2156 wake_up_interruptible(&skdev->waitq);
2157 break;
2158
2159 default:
2160 SKD_ASSERT("we didn't send this");
2161 }
2162}
2163
2164/*
2165 *****************************************************************************
2166 * FIT MESSAGES
2167 *****************************************************************************
2168 */
2169
2170static void skd_send_fitmsg(struct skd_device *skdev,
2171 struct skd_fitmsg_context *skmsg)
2172{
2173 u64 qcmd;
2174 struct fit_msg_hdr *fmh;
2175
2176 pr_debug("%s:%s:%d dma address 0x%llx, busy=%d\n",
2177 skdev->name, __func__, __LINE__,
2178 skmsg->mb_dma_address, skdev->in_flight);
2179 pr_debug("%s:%s:%d msg_buf 0x%p, offset %x\n",
2180 skdev->name, __func__, __LINE__,
2181 skmsg->msg_buf, skmsg->offset);
2182
2183 qcmd = skmsg->mb_dma_address;
2184 qcmd |= FIT_QCMD_QID_NORMAL;
2185
2186 fmh = (struct fit_msg_hdr *)skmsg->msg_buf;
2187 skmsg->outstanding = fmh->num_protocol_cmds_coalesced;
2188
2189 if (unlikely(skdev->dbg_level > 1)) {
2190 u8 *bp = (u8 *)skmsg->msg_buf;
2191 int i;
2192 for (i = 0; i < skmsg->length; i += 8) {
2193 pr_debug("%s:%s:%d msg[%2d] %02x %02x %02x %02x "
2194 "%02x %02x %02x %02x\n",
2195 skdev->name, __func__, __LINE__,
2196 i, bp[i + 0], bp[i + 1], bp[i + 2],
2197 bp[i + 3], bp[i + 4], bp[i + 5],
2198 bp[i + 6], bp[i + 7]);
2199 if (i == 0)
2200 i = 64 - 8;
2201 }
2202 }
2203
2204 if (skmsg->length > 256)
2205 qcmd |= FIT_QCMD_MSGSIZE_512;
2206 else if (skmsg->length > 128)
2207 qcmd |= FIT_QCMD_MSGSIZE_256;
2208 else if (skmsg->length > 64)
2209 qcmd |= FIT_QCMD_MSGSIZE_128;
2210 else
2211 /*
2212 * This makes no sense because the FIT msg header is
2213 * 64 bytes. If the msg is only 64 bytes long it has
2214 * no payload.
2215 */
2216 qcmd |= FIT_QCMD_MSGSIZE_64;
2217
2218 SKD_WRITEQ(skdev, qcmd, FIT_Q_COMMAND);
2219
2220}
2221
2222static void skd_send_special_fitmsg(struct skd_device *skdev,
2223 struct skd_special_context *skspcl)
2224{
2225 u64 qcmd;
2226
2227 if (unlikely(skdev->dbg_level > 1)) {
2228 u8 *bp = (u8 *)skspcl->msg_buf;
2229 int i;
2230
2231 for (i = 0; i < SKD_N_SPECIAL_FITMSG_BYTES; i += 8) {
2232 pr_debug("%s:%s:%d spcl[%2d] %02x %02x %02x %02x "
2233 "%02x %02x %02x %02x\n",
2234 skdev->name, __func__, __LINE__, i,
2235 bp[i + 0], bp[i + 1], bp[i + 2], bp[i + 3],
2236 bp[i + 4], bp[i + 5], bp[i + 6], bp[i + 7]);
2237 if (i == 0)
2238 i = 64 - 8;
2239 }
2240
2241 pr_debug("%s:%s:%d skspcl=%p id=%04x sksg_list=%p sksg_dma=%llx\n",
2242 skdev->name, __func__, __LINE__,
2243 skspcl, skspcl->req.id, skspcl->req.sksg_list,
2244 skspcl->req.sksg_dma_address);
2245 for (i = 0; i < skspcl->req.n_sg; i++) {
2246 struct fit_sg_descriptor *sgd =
2247 &skspcl->req.sksg_list[i];
2248
2249 pr_debug("%s:%s:%d sg[%d] count=%u ctrl=0x%x "
2250 "addr=0x%llx next=0x%llx\n",
2251 skdev->name, __func__, __LINE__,
2252 i, sgd->byte_count, sgd->control,
2253 sgd->host_side_addr, sgd->next_desc_ptr);
2254 }
2255 }
2256
2257 /*
2258 * Special FIT msgs are always 128 bytes: a 64-byte FIT hdr
2259 * and one 64-byte SSDI command.
2260 */
2261 qcmd = skspcl->mb_dma_address;
2262 qcmd |= FIT_QCMD_QID_NORMAL + FIT_QCMD_MSGSIZE_128;
2263
2264 SKD_WRITEQ(skdev, qcmd, FIT_Q_COMMAND);
2265}
2266
2267/*
2268 *****************************************************************************
2269 * COMPLETION QUEUE
2270 *****************************************************************************
2271 */
2272
2273static void skd_complete_other(struct skd_device *skdev,
2274 volatile struct fit_completion_entry_v1 *skcomp,
2275 volatile struct fit_comp_error_info *skerr);
2276
2277struct sns_info {
2278 u8 type;
2279 u8 stat;
2280 u8 key;
2281 u8 asc;
2282 u8 ascq;
2283 u8 mask;
2284 enum skd_check_status_action action;
2285};
2286
2287static struct sns_info skd_chkstat_table[] = {
2288 /* Good */
2289 { 0x70, 0x02, RECOVERED_ERROR, 0, 0, 0x1c,
2290 SKD_CHECK_STATUS_REPORT_GOOD },
2291
2292 /* Smart alerts */
2293 { 0x70, 0x02, NO_SENSE, 0x0B, 0x00, 0x1E, /* warnings */
2294 SKD_CHECK_STATUS_REPORT_SMART_ALERT },
2295 { 0x70, 0x02, NO_SENSE, 0x5D, 0x00, 0x1E, /* thresholds */
2296 SKD_CHECK_STATUS_REPORT_SMART_ALERT },
2297 { 0x70, 0x02, RECOVERED_ERROR, 0x0B, 0x01, 0x1F, /* temperature over trigger */
2298 SKD_CHECK_STATUS_REPORT_SMART_ALERT },
2299
2300 /* Retry (with limits) */
2301 { 0x70, 0x02, 0x0B, 0, 0, 0x1C, /* This one is for DMA ERROR */
2302 SKD_CHECK_STATUS_REQUEUE_REQUEST },
2303 { 0x70, 0x02, 0x06, 0x0B, 0x00, 0x1E, /* warnings */
2304 SKD_CHECK_STATUS_REQUEUE_REQUEST },
2305 { 0x70, 0x02, 0x06, 0x5D, 0x00, 0x1E, /* thresholds */
2306 SKD_CHECK_STATUS_REQUEUE_REQUEST },
2307 { 0x70, 0x02, 0x06, 0x80, 0x30, 0x1F, /* backup power */
2308 SKD_CHECK_STATUS_REQUEUE_REQUEST },
2309
2310 /* Busy (or about to be) */
2311 { 0x70, 0x02, 0x06, 0x3f, 0x01, 0x1F, /* fw changed */
2312 SKD_CHECK_STATUS_BUSY_IMMINENT },
2313};
2314
2315/*
2316 * Look up status and sense data to decide how to handle the error
2317 * from the device.
2318 * mask says which fields must match e.g., mask=0x18 means check
2319 * type and stat, ignore key, asc, ascq.
2320 */
2321
2322static enum skd_check_status_action
2323skd_check_status(struct skd_device *skdev,
2324 u8 cmp_status, volatile struct fit_comp_error_info *skerr)
2325{
2326 int i, n;
2327
2328 pr_err("(%s): key/asc/ascq/fruc %02x/%02x/%02x/%02x\n",
2329 skd_name(skdev), skerr->key, skerr->code, skerr->qual,
2330 skerr->fruc);
2331
2332 pr_debug("%s:%s:%d stat: t=%02x stat=%02x k=%02x c=%02x q=%02x fruc=%02x\n",
2333 skdev->name, __func__, __LINE__, skerr->type, cmp_status,
2334 skerr->key, skerr->code, skerr->qual, skerr->fruc);
2335
2336 /* Does the info match an entry in the good category? */
2337 n = sizeof(skd_chkstat_table) / sizeof(skd_chkstat_table[0]);
2338 for (i = 0; i < n; i++) {
2339 struct sns_info *sns = &skd_chkstat_table[i];
2340
2341 if (sns->mask & 0x10)
2342 if (skerr->type != sns->type)
2343 continue;
2344
2345 if (sns->mask & 0x08)
2346 if (cmp_status != sns->stat)
2347 continue;
2348
2349 if (sns->mask & 0x04)
2350 if (skerr->key != sns->key)
2351 continue;
2352
2353 if (sns->mask & 0x02)
2354 if (skerr->code != sns->asc)
2355 continue;
2356
2357 if (sns->mask & 0x01)
2358 if (skerr->qual != sns->ascq)
2359 continue;
2360
2361 if (sns->action == SKD_CHECK_STATUS_REPORT_SMART_ALERT) {
2362 pr_err("(%s): SMART Alert: sense key/asc/ascq "
2363 "%02x/%02x/%02x\n",
2364 skd_name(skdev), skerr->key,
2365 skerr->code, skerr->qual);
2366 }
2367 return sns->action;
2368 }
2369
2370 /* No other match, so nonzero status means error,
2371 * zero status means good
2372 */
2373 if (cmp_status) {
2374 pr_debug("%s:%s:%d status check: error\n",
2375 skdev->name, __func__, __LINE__);
2376 return SKD_CHECK_STATUS_REPORT_ERROR;
2377 }
2378
2379 pr_debug("%s:%s:%d status check good default\n",
2380 skdev->name, __func__, __LINE__);
2381 return SKD_CHECK_STATUS_REPORT_GOOD;
2382}
2383
2384static void skd_resolve_req_exception(struct skd_device *skdev,
2385 struct skd_request_context *skreq)
2386{
2387 u8 cmp_status = skreq->completion.status;
2388
2389 switch (skd_check_status(skdev, cmp_status, &skreq->err_info)) {
2390 case SKD_CHECK_STATUS_REPORT_GOOD:
2391 case SKD_CHECK_STATUS_REPORT_SMART_ALERT:
2392 skd_end_request(skdev, skreq, 0);
2393 break;
2394
2395 case SKD_CHECK_STATUS_BUSY_IMMINENT:
2396 skd_log_skreq(skdev, skreq, "retry(busy)");
2397 blk_requeue_request(skdev->queue, skreq->req);
2398 pr_info("(%s) drive BUSY imminent\n", skd_name(skdev));
2399 skdev->state = SKD_DRVR_STATE_BUSY_IMMINENT;
2400 skdev->timer_countdown = SKD_TIMER_MINUTES(20);
2401 skd_quiesce_dev(skdev);
2402 break;
2403
2404 case SKD_CHECK_STATUS_REQUEUE_REQUEST:
2405 if ((unsigned long) ++skreq->req->special < SKD_MAX_RETRIES) {
2406 skd_log_skreq(skdev, skreq, "retry");
2407 blk_requeue_request(skdev->queue, skreq->req);
2408 break;
2409 }
2410 /* fall through to report error */
2411
2412 case SKD_CHECK_STATUS_REPORT_ERROR:
2413 default:
2414 skd_end_request(skdev, skreq, -EIO);
2415 break;
2416 }
2417}
2418
2419/* assume spinlock is already held */
2420static void skd_release_skreq(struct skd_device *skdev,
2421 struct skd_request_context *skreq)
2422{
2423 u32 msg_slot;
2424 struct skd_fitmsg_context *skmsg;
2425
2426 u32 timo_slot;
2427
2428 /*
2429 * Reclaim the FIT msg buffer if this is
2430 * the first of the requests it carried to
2431 * be completed. The FIT msg buffer used to
2432 * send this request cannot be reused until
2433 * we are sure the s1120 card has copied
2434 * it to its memory. The FIT msg might have
2435 * contained several requests. As soon as
2436 * any of them are completed we know that
2437 * the entire FIT msg was transferred.
2438 * Only the first completed request will
2439 * match the FIT msg buffer id. The FIT
2440 * msg buffer id is immediately updated.
2441 * When subsequent requests complete the FIT
2442 * msg buffer id won't match, so we know
2443 * quite cheaply that it is already done.
2444 */
2445 msg_slot = skreq->fitmsg_id & SKD_ID_SLOT_MASK;
2446 SKD_ASSERT(msg_slot < skdev->num_fitmsg_context);
2447
2448 skmsg = &skdev->skmsg_table[msg_slot];
2449 if (skmsg->id == skreq->fitmsg_id) {
2450 SKD_ASSERT(skmsg->state == SKD_MSG_STATE_BUSY);
2451 SKD_ASSERT(skmsg->outstanding > 0);
2452 skmsg->outstanding--;
2453 if (skmsg->outstanding == 0) {
2454 skmsg->state = SKD_MSG_STATE_IDLE;
2455 skmsg->id += SKD_ID_INCR;
2456 skmsg->next = skdev->skmsg_free_list;
2457 skdev->skmsg_free_list = skmsg;
2458 }
2459 }
2460
2461 /*
2462 * Decrease the number of active requests.
2463 * Also decrements the count in the timeout slot.
2464 */
2465 SKD_ASSERT(skdev->in_flight > 0);
2466 skdev->in_flight -= 1;
2467
2468 timo_slot = skreq->timeout_stamp & SKD_TIMEOUT_SLOT_MASK;
2469 SKD_ASSERT(skdev->timeout_slot[timo_slot] > 0);
2470 skdev->timeout_slot[timo_slot] -= 1;
2471
2472 /*
2473 * Reset backpointer
2474 */
2475 skreq->req = NULL;
2476
2477 /*
2478 * Reclaim the skd_request_context
2479 */
2480 skreq->state = SKD_REQ_STATE_IDLE;
2481 skreq->id += SKD_ID_INCR;
2482 skreq->next = skdev->skreq_free_list;
2483 skdev->skreq_free_list = skreq;
2484}
2485
2486#define DRIVER_INQ_EVPD_PAGE_CODE 0xDA
2487
2488static void skd_do_inq_page_00(struct skd_device *skdev,
2489 volatile struct fit_completion_entry_v1 *skcomp,
2490 volatile struct fit_comp_error_info *skerr,
2491 uint8_t *cdb, uint8_t *buf)
2492{
2493 uint16_t insert_pt, max_bytes, drive_pages, drive_bytes, new_size;
2494
2495 /* Caller requested "supported pages". The driver needs to insert
2496 * its page.
2497 */
2498 pr_debug("%s:%s:%d skd_do_driver_inquiry: modify supported pages.\n",
2499 skdev->name, __func__, __LINE__);
2500
2501 /* If the device rejected the request because the CDB was
2502 * improperly formed, then just leave.
2503 */
2504 if (skcomp->status == SAM_STAT_CHECK_CONDITION &&
2505 skerr->key == ILLEGAL_REQUEST && skerr->code == 0x24)
2506 return;
2507
2508 /* Get the amount of space the caller allocated */
2509 max_bytes = (cdb[3] << 8) | cdb[4];
2510
2511 /* Get the number of pages actually returned by the device */
2512 drive_pages = (buf[2] << 8) | buf[3];
2513 drive_bytes = drive_pages + 4;
2514 new_size = drive_pages + 1;
2515
2516 /* Supported pages must be in numerical order, so find where
2517 * the driver page needs to be inserted into the list of
2518 * pages returned by the device.
2519 */
2520 for (insert_pt = 4; insert_pt < drive_bytes; insert_pt++) {
2521 if (buf[insert_pt] == DRIVER_INQ_EVPD_PAGE_CODE)
2522 return; /* Device using this page code. abort */
2523 else if (buf[insert_pt] > DRIVER_INQ_EVPD_PAGE_CODE)
2524 break;
2525 }
2526
2527 if (insert_pt < max_bytes) {
2528 uint16_t u;
2529
2530 /* Shift everything up one byte to make room. */
2531 for (u = new_size + 3; u > insert_pt; u--)
2532 buf[u] = buf[u - 1];
2533 buf[insert_pt] = DRIVER_INQ_EVPD_PAGE_CODE;
2534
2535 /* SCSI byte order increment of num_returned_bytes by 1 */
2536 skcomp->num_returned_bytes =
2537 be32_to_cpu(skcomp->num_returned_bytes) + 1;
2538 skcomp->num_returned_bytes =
2539 be32_to_cpu(skcomp->num_returned_bytes);
2540 }
2541
2542 /* update page length field to reflect the driver's page too */
2543 buf[2] = (uint8_t)((new_size >> 8) & 0xFF);
2544 buf[3] = (uint8_t)((new_size >> 0) & 0xFF);
2545}
2546
2547static void skd_get_link_info(struct pci_dev *pdev, u8 *speed, u8 *width)
2548{
2549 int pcie_reg;
2550 u16 pci_bus_speed;
2551 u8 pci_lanes;
2552
2553 pcie_reg = pci_find_capability(pdev, PCI_CAP_ID_EXP);
2554 if (pcie_reg) {
2555 u16 linksta;
2556 pci_read_config_word(pdev, pcie_reg + PCI_EXP_LNKSTA, &linksta);
2557
2558 pci_bus_speed = linksta & 0xF;
2559 pci_lanes = (linksta & 0x3F0) >> 4;
2560 } else {
2561 *speed = STEC_LINK_UNKNOWN;
2562 *width = 0xFF;
2563 return;
2564 }
2565
2566 switch (pci_bus_speed) {
2567 case 1:
2568 *speed = STEC_LINK_2_5GTS;
2569 break;
2570 case 2:
2571 *speed = STEC_LINK_5GTS;
2572 break;
2573 case 3:
2574 *speed = STEC_LINK_8GTS;
2575 break;
2576 default:
2577 *speed = STEC_LINK_UNKNOWN;
2578 break;
2579 }
2580
2581 if (pci_lanes <= 0x20)
2582 *width = pci_lanes;
2583 else
2584 *width = 0xFF;
2585}
2586
2587static void skd_do_inq_page_da(struct skd_device *skdev,
2588 volatile struct fit_completion_entry_v1 *skcomp,
2589 volatile struct fit_comp_error_info *skerr,
2590 uint8_t *cdb, uint8_t *buf)
2591{
2592 struct pci_dev *pdev = skdev->pdev;
2593 unsigned max_bytes;
2594 struct driver_inquiry_data inq;
2595 u16 val;
2596
2597 pr_debug("%s:%s:%d skd_do_driver_inquiry: return driver page\n",
2598 skdev->name, __func__, __LINE__);
2599
2600 memset(&inq, 0, sizeof(inq));
2601
2602 inq.page_code = DRIVER_INQ_EVPD_PAGE_CODE;
2603
2604 skd_get_link_info(pdev, &inq.pcie_link_speed, &inq.pcie_link_lanes);
2605 inq.pcie_bus_number = cpu_to_be16(pdev->bus->number);
2606 inq.pcie_device_number = PCI_SLOT(pdev->devfn);
2607 inq.pcie_function_number = PCI_FUNC(pdev->devfn);
2608
2609 pci_read_config_word(pdev, PCI_VENDOR_ID, &val);
2610 inq.pcie_vendor_id = cpu_to_be16(val);
2611
2612 pci_read_config_word(pdev, PCI_DEVICE_ID, &val);
2613 inq.pcie_device_id = cpu_to_be16(val);
2614
2615 pci_read_config_word(pdev, PCI_SUBSYSTEM_VENDOR_ID, &val);
2616 inq.pcie_subsystem_vendor_id = cpu_to_be16(val);
2617
2618 pci_read_config_word(pdev, PCI_SUBSYSTEM_ID, &val);
2619 inq.pcie_subsystem_device_id = cpu_to_be16(val);
2620
2621 /* Driver version, fixed lenth, padded with spaces on the right */
2622 inq.driver_version_length = sizeof(inq.driver_version);
2623 memset(&inq.driver_version, ' ', sizeof(inq.driver_version));
2624 memcpy(inq.driver_version, DRV_VER_COMPL,
2625 min(sizeof(inq.driver_version), strlen(DRV_VER_COMPL)));
2626
2627 inq.page_length = cpu_to_be16((sizeof(inq) - 4));
2628
2629 /* Clear the error set by the device */
2630 skcomp->status = SAM_STAT_GOOD;
2631 memset((void *)skerr, 0, sizeof(*skerr));
2632
2633 /* copy response into output buffer */
2634 max_bytes = (cdb[3] << 8) | cdb[4];
2635 memcpy(buf, &inq, min_t(unsigned, max_bytes, sizeof(inq)));
2636
2637 skcomp->num_returned_bytes =
2638 be32_to_cpu(min_t(uint16_t, max_bytes, sizeof(inq)));
2639}
2640
2641static void skd_do_driver_inq(struct skd_device *skdev,
2642 volatile struct fit_completion_entry_v1 *skcomp,
2643 volatile struct fit_comp_error_info *skerr,
2644 uint8_t *cdb, uint8_t *buf)
2645{
2646 if (!buf)
2647 return;
2648 else if (cdb[0] != INQUIRY)
2649 return; /* Not an INQUIRY */
2650 else if ((cdb[1] & 1) == 0)
2651 return; /* EVPD not set */
2652 else if (cdb[2] == 0)
2653 /* Need to add driver's page to supported pages list */
2654 skd_do_inq_page_00(skdev, skcomp, skerr, cdb, buf);
2655 else if (cdb[2] == DRIVER_INQ_EVPD_PAGE_CODE)
2656 /* Caller requested driver's page */
2657 skd_do_inq_page_da(skdev, skcomp, skerr, cdb, buf);
2658}
2659
2660static unsigned char *skd_sg_1st_page_ptr(struct scatterlist *sg)
2661{
2662 if (!sg)
2663 return NULL;
2664 if (!sg_page(sg))
2665 return NULL;
2666 return sg_virt(sg);
2667}
2668
2669static void skd_process_scsi_inq(struct skd_device *skdev,
2670 volatile struct fit_completion_entry_v1
2671 *skcomp,
2672 volatile struct fit_comp_error_info *skerr,
2673 struct skd_special_context *skspcl)
2674{
2675 uint8_t *buf;
2676 struct fit_msg_hdr *fmh = (struct fit_msg_hdr *)skspcl->msg_buf;
2677 struct skd_scsi_request *scsi_req = (struct skd_scsi_request *)&fmh[1];
2678
2679 dma_sync_sg_for_cpu(skdev->class_dev, skspcl->req.sg, skspcl->req.n_sg,
2680 skspcl->req.sg_data_dir);
2681 buf = skd_sg_1st_page_ptr(skspcl->req.sg);
2682
2683 if (buf)
2684 skd_do_driver_inq(skdev, skcomp, skerr, scsi_req->cdb, buf);
2685}
2686
2687
2688static int skd_isr_completion_posted(struct skd_device *skdev,
2689 int limit, int *enqueued)
2690{
2691 volatile struct fit_completion_entry_v1 *skcmp = NULL;
2692 volatile struct fit_comp_error_info *skerr;
2693 u16 req_id;
2694 u32 req_slot;
2695 struct skd_request_context *skreq;
2696 u16 cmp_cntxt = 0;
2697 u8 cmp_status = 0;
2698 u8 cmp_cycle = 0;
2699 u32 cmp_bytes = 0;
2700 int rc = 0;
2701 int processed = 0;
2702
2703 for (;; ) {
2704 SKD_ASSERT(skdev->skcomp_ix < SKD_N_COMPLETION_ENTRY);
2705
2706 skcmp = &skdev->skcomp_table[skdev->skcomp_ix];
2707 cmp_cycle = skcmp->cycle;
2708 cmp_cntxt = skcmp->tag;
2709 cmp_status = skcmp->status;
2710 cmp_bytes = be32_to_cpu(skcmp->num_returned_bytes);
2711
2712 skerr = &skdev->skerr_table[skdev->skcomp_ix];
2713
2714 pr_debug("%s:%s:%d "
2715 "cycle=%d ix=%d got cycle=%d cmdctxt=0x%x stat=%d "
2716 "busy=%d rbytes=0x%x proto=%d\n",
2717 skdev->name, __func__, __LINE__, skdev->skcomp_cycle,
2718 skdev->skcomp_ix, cmp_cycle, cmp_cntxt, cmp_status,
2719 skdev->in_flight, cmp_bytes, skdev->proto_ver);
2720
2721 if (cmp_cycle != skdev->skcomp_cycle) {
2722 pr_debug("%s:%s:%d end of completions\n",
2723 skdev->name, __func__, __LINE__);
2724 break;
2725 }
2726 /*
2727 * Update the completion queue head index and possibly
2728 * the completion cycle count. 8-bit wrap-around.
2729 */
2730 skdev->skcomp_ix++;
2731 if (skdev->skcomp_ix >= SKD_N_COMPLETION_ENTRY) {
2732 skdev->skcomp_ix = 0;
2733 skdev->skcomp_cycle++;
2734 }
2735
2736 /*
2737 * The command context is a unique 32-bit ID. The low order
2738 * bits help locate the request. The request is usually a
2739 * r/w request (see skd_start() above) or a special request.
2740 */
2741 req_id = cmp_cntxt;
2742 req_slot = req_id & SKD_ID_SLOT_AND_TABLE_MASK;
2743
2744 /* Is this other than a r/w request? */
2745 if (req_slot >= skdev->num_req_context) {
2746 /*
2747 * This is not a completion for a r/w request.
2748 */
2749 skd_complete_other(skdev, skcmp, skerr);
2750 continue;
2751 }
2752
2753 skreq = &skdev->skreq_table[req_slot];
2754
2755 /*
2756 * Make sure the request ID for the slot matches.
2757 */
2758 if (skreq->id != req_id) {
2759 pr_debug("%s:%s:%d mismatch comp_id=0x%x req_id=0x%x\n",
2760 skdev->name, __func__, __LINE__,
2761 req_id, skreq->id);
2762 {
2763 u16 new_id = cmp_cntxt;
2764 pr_err("(%s): Completion mismatch "
2765 "comp_id=0x%04x skreq=0x%04x new=0x%04x\n",
2766 skd_name(skdev), req_id,
2767 skreq->id, new_id);
2768
2769 continue;
2770 }
2771 }
2772
2773 SKD_ASSERT(skreq->state == SKD_REQ_STATE_BUSY);
2774
2775 if (skreq->state == SKD_REQ_STATE_ABORTED) {
2776 pr_debug("%s:%s:%d reclaim req %p id=%04x\n",
2777 skdev->name, __func__, __LINE__,
2778 skreq, skreq->id);
2779 /* a previously timed out command can
2780 * now be cleaned up */
2781 skd_release_skreq(skdev, skreq);
2782 continue;
2783 }
2784
2785 skreq->completion = *skcmp;
2786 if (unlikely(cmp_status == SAM_STAT_CHECK_CONDITION)) {
2787 skreq->err_info = *skerr;
2788 skd_log_check_status(skdev, cmp_status, skerr->key,
2789 skerr->code, skerr->qual,
2790 skerr->fruc);
2791 }
2792 /* Release DMA resources for the request. */
2793 if (skreq->n_sg > 0)
2794 skd_postop_sg_list(skdev, skreq);
2795
2796 if (!skreq->req) {
2797 pr_debug("%s:%s:%d NULL backptr skdreq %p, "
2798 "req=0x%x req_id=0x%x\n",
2799 skdev->name, __func__, __LINE__,
2800 skreq, skreq->id, req_id);
2801 } else {
2802 /*
2803 * Capture the outcome and post it back to the
2804 * native request.
2805 */
2806 if (likely(cmp_status == SAM_STAT_GOOD))
2807 skd_end_request(skdev, skreq, 0);
2808 else
2809 skd_resolve_req_exception(skdev, skreq);
2810 }
2811
2812 /*
2813 * Release the skreq, its FIT msg (if one), timeout slot,
2814 * and queue depth.
2815 */
2816 skd_release_skreq(skdev, skreq);
2817
2818 /* skd_isr_comp_limit equal zero means no limit */
2819 if (limit) {
2820 if (++processed >= limit) {
2821 rc = 1;
2822 break;
2823 }
2824 }
2825 }
2826
2827 if ((skdev->state == SKD_DRVR_STATE_PAUSING)
2828 && (skdev->in_flight) == 0) {
2829 skdev->state = SKD_DRVR_STATE_PAUSED;
2830 wake_up_interruptible(&skdev->waitq);
2831 }
2832
2833 return rc;
2834}
2835
2836static void skd_complete_other(struct skd_device *skdev,
2837 volatile struct fit_completion_entry_v1 *skcomp,
2838 volatile struct fit_comp_error_info *skerr)
2839{
2840 u32 req_id = 0;
2841 u32 req_table;
2842 u32 req_slot;
2843 struct skd_special_context *skspcl;
2844
2845 req_id = skcomp->tag;
2846 req_table = req_id & SKD_ID_TABLE_MASK;
2847 req_slot = req_id & SKD_ID_SLOT_MASK;
2848
2849 pr_debug("%s:%s:%d table=0x%x id=0x%x slot=%d\n",
2850 skdev->name, __func__, __LINE__,
2851 req_table, req_id, req_slot);
2852
2853 /*
2854 * Based on the request id, determine how to dispatch this completion.
2855 * This swich/case is finding the good cases and forwarding the
2856 * completion entry. Errors are reported below the switch.
2857 */
2858 switch (req_table) {
2859 case SKD_ID_RW_REQUEST:
2860 /*
2861 * The caller, skd_completion_posted_isr() above,
2862 * handles r/w requests. The only way we get here
2863 * is if the req_slot is out of bounds.
2864 */
2865 break;
2866
2867 case SKD_ID_SPECIAL_REQUEST:
2868 /*
2869 * Make sure the req_slot is in bounds and that the id
2870 * matches.
2871 */
2872 if (req_slot < skdev->n_special) {
2873 skspcl = &skdev->skspcl_table[req_slot];
2874 if (skspcl->req.id == req_id &&
2875 skspcl->req.state == SKD_REQ_STATE_BUSY) {
2876 skd_complete_special(skdev,
2877 skcomp, skerr, skspcl);
2878 return;
2879 }
2880 }
2881 break;
2882
2883 case SKD_ID_INTERNAL:
2884 if (req_slot == 0) {
2885 skspcl = &skdev->internal_skspcl;
2886 if (skspcl->req.id == req_id &&
2887 skspcl->req.state == SKD_REQ_STATE_BUSY) {
2888 skd_complete_internal(skdev,
2889 skcomp, skerr, skspcl);
2890 return;
2891 }
2892 }
2893 break;
2894
2895 case SKD_ID_FIT_MSG:
2896 /*
2897 * These id's should never appear in a completion record.
2898 */
2899 break;
2900
2901 default:
2902 /*
2903 * These id's should never appear anywhere;
2904 */
2905 break;
2906 }
2907
2908 /*
2909 * If we get here it is a bad or stale id.
2910 */
2911}
2912
2913static void skd_complete_special(struct skd_device *skdev,
2914 volatile struct fit_completion_entry_v1
2915 *skcomp,
2916 volatile struct fit_comp_error_info *skerr,
2917 struct skd_special_context *skspcl)
2918{
2919 pr_debug("%s:%s:%d completing special request %p\n",
2920 skdev->name, __func__, __LINE__, skspcl);
2921 if (skspcl->orphaned) {
2922 /* Discard orphaned request */
2923 /* ?: Can this release directly or does it need
2924 * to use a worker? */
2925 pr_debug("%s:%s:%d release orphaned %p\n",
2926 skdev->name, __func__, __LINE__, skspcl);
2927 skd_release_special(skdev, skspcl);
2928 return;
2929 }
2930
2931 skd_process_scsi_inq(skdev, skcomp, skerr, skspcl);
2932
2933 skspcl->req.state = SKD_REQ_STATE_COMPLETED;
2934 skspcl->req.completion = *skcomp;
2935 skspcl->req.err_info = *skerr;
2936
2937 skd_log_check_status(skdev, skspcl->req.completion.status, skerr->key,
2938 skerr->code, skerr->qual, skerr->fruc);
2939
2940 wake_up_interruptible(&skdev->waitq);
2941}
2942
2943/* assume spinlock is already held */
2944static void skd_release_special(struct skd_device *skdev,
2945 struct skd_special_context *skspcl)
2946{
2947 int i, was_depleted;
2948
2949 for (i = 0; i < skspcl->req.n_sg; i++) {
2950 struct page *page = sg_page(&skspcl->req.sg[i]);
2951 __free_page(page);
2952 }
2953
2954 was_depleted = (skdev->skspcl_free_list == NULL);
2955
2956 skspcl->req.state = SKD_REQ_STATE_IDLE;
2957 skspcl->req.id += SKD_ID_INCR;
2958 skspcl->req.next =
2959 (struct skd_request_context *)skdev->skspcl_free_list;
2960 skdev->skspcl_free_list = (struct skd_special_context *)skspcl;
2961
2962 if (was_depleted) {
2963 pr_debug("%s:%s:%d skspcl was depleted\n",
2964 skdev->name, __func__, __LINE__);
2965 /* Free list was depleted. Their might be waiters. */
2966 wake_up_interruptible(&skdev->waitq);
2967 }
2968}
2969
2970static void skd_reset_skcomp(struct skd_device *skdev)
2971{
2972 u32 nbytes;
2973 struct fit_completion_entry_v1 *skcomp;
2974
2975 nbytes = sizeof(*skcomp) * SKD_N_COMPLETION_ENTRY;
2976 nbytes += sizeof(struct fit_comp_error_info) * SKD_N_COMPLETION_ENTRY;
2977
2978 memset(skdev->skcomp_table, 0, nbytes);
2979
2980 skdev->skcomp_ix = 0;
2981 skdev->skcomp_cycle = 1;
2982}
2983
2984/*
2985 *****************************************************************************
2986 * INTERRUPTS
2987 *****************************************************************************
2988 */
2989static void skd_completion_worker(struct work_struct *work)
2990{
2991 struct skd_device *skdev =
2992 container_of(work, struct skd_device, completion_worker);
2993 unsigned long flags;
2994 int flush_enqueued = 0;
2995
2996 spin_lock_irqsave(&skdev->lock, flags);
2997
2998 /*
2999 * pass in limit=0, which means no limit..
3000 * process everything in compq
3001 */
3002 skd_isr_completion_posted(skdev, 0, &flush_enqueued);
3003 skd_request_fn(skdev->queue);
3004
3005 spin_unlock_irqrestore(&skdev->lock, flags);
3006}
3007
3008static void skd_isr_msg_from_dev(struct skd_device *skdev);
3009
3010irqreturn_t
3011static skd_isr(int irq, void *ptr)
3012{
3013 struct skd_device *skdev;
3014 u32 intstat;
3015 u32 ack;
3016 int rc = 0;
3017 int deferred = 0;
3018 int flush_enqueued = 0;
3019
3020 skdev = (struct skd_device *)ptr;
3021 spin_lock(&skdev->lock);
3022
3023 for (;; ) {
3024 intstat = SKD_READL(skdev, FIT_INT_STATUS_HOST);
3025
3026 ack = FIT_INT_DEF_MASK;
3027 ack &= intstat;
3028
3029 pr_debug("%s:%s:%d intstat=0x%x ack=0x%x\n",
3030 skdev->name, __func__, __LINE__, intstat, ack);
3031
3032 /* As long as there is an int pending on device, keep
3033 * running loop. When none, get out, but if we've never
3034 * done any processing, call completion handler?
3035 */
3036 if (ack == 0) {
3037 /* No interrupts on device, but run the completion
3038 * processor anyway?
3039 */
3040 if (rc == 0)
3041 if (likely (skdev->state
3042 == SKD_DRVR_STATE_ONLINE))
3043 deferred = 1;
3044 break;
3045 }
3046
3047 rc = IRQ_HANDLED;
3048
3049 SKD_WRITEL(skdev, ack, FIT_INT_STATUS_HOST);
3050
3051 if (likely((skdev->state != SKD_DRVR_STATE_LOAD) &&
3052 (skdev->state != SKD_DRVR_STATE_STOPPING))) {
3053 if (intstat & FIT_ISH_COMPLETION_POSTED) {
3054 /*
3055 * If we have already deferred completion
3056 * processing, don't bother running it again
3057 */
3058 if (deferred == 0)
3059 deferred =
3060 skd_isr_completion_posted(skdev,
3061 skd_isr_comp_limit, &flush_enqueued);
3062 }
3063
3064 if (intstat & FIT_ISH_FW_STATE_CHANGE) {
3065 skd_isr_fwstate(skdev);
3066 if (skdev->state == SKD_DRVR_STATE_FAULT ||
3067 skdev->state ==
3068 SKD_DRVR_STATE_DISAPPEARED) {
3069 spin_unlock(&skdev->lock);
3070 return rc;
3071 }
3072 }
3073
3074 if (intstat & FIT_ISH_MSG_FROM_DEV)
3075 skd_isr_msg_from_dev(skdev);
3076 }
3077 }
3078
3079 if (unlikely(flush_enqueued))
3080 skd_request_fn(skdev->queue);
3081
3082 if (deferred)
3083 schedule_work(&skdev->completion_worker);
3084 else if (!flush_enqueued)
3085 skd_request_fn(skdev->queue);
3086
3087 spin_unlock(&skdev->lock);
3088
3089 return rc;
3090}
3091
3092static void skd_drive_fault(struct skd_device *skdev)
3093{
3094 skdev->state = SKD_DRVR_STATE_FAULT;
3095 pr_err("(%s): Drive FAULT\n", skd_name(skdev));
3096}
3097
3098static void skd_drive_disappeared(struct skd_device *skdev)
3099{
3100 skdev->state = SKD_DRVR_STATE_DISAPPEARED;
3101 pr_err("(%s): Drive DISAPPEARED\n", skd_name(skdev));
3102}
3103
3104static void skd_isr_fwstate(struct skd_device *skdev)
3105{
3106 u32 sense;
3107 u32 state;
3108 u32 mtd;
3109 int prev_driver_state = skdev->state;
3110
3111 sense = SKD_READL(skdev, FIT_STATUS);
3112 state = sense & FIT_SR_DRIVE_STATE_MASK;
3113
3114 pr_err("(%s): s1120 state %s(%d)=>%s(%d)\n",
3115 skd_name(skdev),
3116 skd_drive_state_to_str(skdev->drive_state), skdev->drive_state,
3117 skd_drive_state_to_str(state), state);
3118
3119 skdev->drive_state = state;
3120
3121 switch (skdev->drive_state) {
3122 case FIT_SR_DRIVE_INIT:
3123 if (skdev->state == SKD_DRVR_STATE_PROTOCOL_MISMATCH) {
3124 skd_disable_interrupts(skdev);
3125 break;
3126 }
3127 if (skdev->state == SKD_DRVR_STATE_RESTARTING)
3128 skd_recover_requests(skdev, 0);
3129 if (skdev->state == SKD_DRVR_STATE_WAIT_BOOT) {
3130 skdev->timer_countdown = SKD_STARTING_TIMO;
3131 skdev->state = SKD_DRVR_STATE_STARTING;
3132 skd_soft_reset(skdev);
3133 break;
3134 }
3135 mtd = FIT_MXD_CONS(FIT_MTD_FITFW_INIT, 0, 0);
3136 SKD_WRITEL(skdev, mtd, FIT_MSG_TO_DEVICE);
3137 skdev->last_mtd = mtd;
3138 break;
3139
3140 case FIT_SR_DRIVE_ONLINE:
3141 skdev->cur_max_queue_depth = skd_max_queue_depth;
3142 if (skdev->cur_max_queue_depth > skdev->dev_max_queue_depth)
3143 skdev->cur_max_queue_depth = skdev->dev_max_queue_depth;
3144
3145 skdev->queue_low_water_mark =
3146 skdev->cur_max_queue_depth * 2 / 3 + 1;
3147 if (skdev->queue_low_water_mark < 1)
3148 skdev->queue_low_water_mark = 1;
3149 pr_info(
3150 "(%s): Queue depth limit=%d dev=%d lowat=%d\n",
3151 skd_name(skdev),
3152 skdev->cur_max_queue_depth,
3153 skdev->dev_max_queue_depth, skdev->queue_low_water_mark);
3154
3155 skd_refresh_device_data(skdev);
3156 break;
3157
3158 case FIT_SR_DRIVE_BUSY:
3159 skdev->state = SKD_DRVR_STATE_BUSY;
3160 skdev->timer_countdown = SKD_BUSY_TIMO;
3161 skd_quiesce_dev(skdev);
3162 break;
3163 case FIT_SR_DRIVE_BUSY_SANITIZE:
3164 /* set timer for 3 seconds, we'll abort any unfinished
3165 * commands after that expires
3166 */
3167 skdev->state = SKD_DRVR_STATE_BUSY_SANITIZE;
3168 skdev->timer_countdown = SKD_TIMER_SECONDS(3);
3169 blk_start_queue(skdev->queue);
3170 break;
3171 case FIT_SR_DRIVE_BUSY_ERASE:
3172 skdev->state = SKD_DRVR_STATE_BUSY_ERASE;
3173 skdev->timer_countdown = SKD_BUSY_TIMO;
3174 break;
3175 case FIT_SR_DRIVE_OFFLINE:
3176 skdev->state = SKD_DRVR_STATE_IDLE;
3177 break;
3178 case FIT_SR_DRIVE_SOFT_RESET:
3179 switch (skdev->state) {
3180 case SKD_DRVR_STATE_STARTING:
3181 case SKD_DRVR_STATE_RESTARTING:
3182 /* Expected by a caller of skd_soft_reset() */
3183 break;
3184 default:
3185 skdev->state = SKD_DRVR_STATE_RESTARTING;
3186 break;
3187 }
3188 break;
3189 case FIT_SR_DRIVE_FW_BOOTING:
3190 pr_debug("%s:%s:%d ISR FIT_SR_DRIVE_FW_BOOTING %s\n",
3191 skdev->name, __func__, __LINE__, skdev->name);
3192 skdev->state = SKD_DRVR_STATE_WAIT_BOOT;
3193 skdev->timer_countdown = SKD_WAIT_BOOT_TIMO;
3194 break;
3195
3196 case FIT_SR_DRIVE_DEGRADED:
3197 case FIT_SR_PCIE_LINK_DOWN:
3198 case FIT_SR_DRIVE_NEED_FW_DOWNLOAD:
3199 break;
3200
3201 case FIT_SR_DRIVE_FAULT:
3202 skd_drive_fault(skdev);
3203 skd_recover_requests(skdev, 0);
3204 blk_start_queue(skdev->queue);
3205 break;
3206
3207 /* PCIe bus returned all Fs? */
3208 case 0xFF:
3209 pr_info("(%s): state=0x%x sense=0x%x\n",
3210 skd_name(skdev), state, sense);
3211 skd_drive_disappeared(skdev);
3212 skd_recover_requests(skdev, 0);
3213 blk_start_queue(skdev->queue);
3214 break;
3215 default:
3216 /*
3217 * Uknown FW State. Wait for a state we recognize.
3218 */
3219 break;
3220 }
3221 pr_err("(%s): Driver state %s(%d)=>%s(%d)\n",
3222 skd_name(skdev),
3223 skd_skdev_state_to_str(prev_driver_state), prev_driver_state,
3224 skd_skdev_state_to_str(skdev->state), skdev->state);
3225}
3226
3227static void skd_recover_requests(struct skd_device *skdev, int requeue)
3228{
3229 int i;
3230
3231 for (i = 0; i < skdev->num_req_context; i++) {
3232 struct skd_request_context *skreq = &skdev->skreq_table[i];
3233
3234 if (skreq->state == SKD_REQ_STATE_BUSY) {
3235 skd_log_skreq(skdev, skreq, "recover");
3236
3237 SKD_ASSERT((skreq->id & SKD_ID_INCR) != 0);
3238 SKD_ASSERT(skreq->req != NULL);
3239
3240 /* Release DMA resources for the request. */
3241 if (skreq->n_sg > 0)
3242 skd_postop_sg_list(skdev, skreq);
3243
3244 if (requeue &&
3245 (unsigned long) ++skreq->req->special <
3246 SKD_MAX_RETRIES)
3247 blk_requeue_request(skdev->queue, skreq->req);
3248 else
3249 skd_end_request(skdev, skreq, -EIO);
3250
3251 skreq->req = NULL;
3252
3253 skreq->state = SKD_REQ_STATE_IDLE;
3254 skreq->id += SKD_ID_INCR;
3255 }
3256 if (i > 0)
3257 skreq[-1].next = skreq;
3258 skreq->next = NULL;
3259 }
3260 skdev->skreq_free_list = skdev->skreq_table;
3261
3262 for (i = 0; i < skdev->num_fitmsg_context; i++) {
3263 struct skd_fitmsg_context *skmsg = &skdev->skmsg_table[i];
3264
3265 if (skmsg->state == SKD_MSG_STATE_BUSY) {
3266 skd_log_skmsg(skdev, skmsg, "salvaged");
3267 SKD_ASSERT((skmsg->id & SKD_ID_INCR) != 0);
3268 skmsg->state = SKD_MSG_STATE_IDLE;
3269 skmsg->id += SKD_ID_INCR;
3270 }
3271 if (i > 0)
3272 skmsg[-1].next = skmsg;
3273 skmsg->next = NULL;
3274 }
3275 skdev->skmsg_free_list = skdev->skmsg_table;
3276
3277 for (i = 0; i < skdev->n_special; i++) {
3278 struct skd_special_context *skspcl = &skdev->skspcl_table[i];
3279
3280 /* If orphaned, reclaim it because it has already been reported
3281 * to the process as an error (it was just waiting for
3282 * a completion that didn't come, and now it will never come)
3283 * If busy, change to a state that will cause it to error
3284 * out in the wait routine and let it do the normal
3285 * reporting and reclaiming
3286 */
3287 if (skspcl->req.state == SKD_REQ_STATE_BUSY) {
3288 if (skspcl->orphaned) {
3289 pr_debug("%s:%s:%d orphaned %p\n",
3290 skdev->name, __func__, __LINE__,
3291 skspcl);
3292 skd_release_special(skdev, skspcl);
3293 } else {
3294 pr_debug("%s:%s:%d not orphaned %p\n",
3295 skdev->name, __func__, __LINE__,
3296 skspcl);
3297 skspcl->req.state = SKD_REQ_STATE_ABORTED;
3298 }
3299 }
3300 }
3301 skdev->skspcl_free_list = skdev->skspcl_table;
3302
3303 for (i = 0; i < SKD_N_TIMEOUT_SLOT; i++)
3304 skdev->timeout_slot[i] = 0;
3305
3306 skdev->in_flight = 0;
3307}
3308
3309static void skd_isr_msg_from_dev(struct skd_device *skdev)
3310{
3311 u32 mfd;
3312 u32 mtd;
3313 u32 data;
3314
3315 mfd = SKD_READL(skdev, FIT_MSG_FROM_DEVICE);
3316
3317 pr_debug("%s:%s:%d mfd=0x%x last_mtd=0x%x\n",
3318 skdev->name, __func__, __LINE__, mfd, skdev->last_mtd);
3319
3320 /* ignore any mtd that is an ack for something we didn't send */
3321 if (FIT_MXD_TYPE(mfd) != FIT_MXD_TYPE(skdev->last_mtd))
3322 return;
3323
3324 switch (FIT_MXD_TYPE(mfd)) {
3325 case FIT_MTD_FITFW_INIT:
3326 skdev->proto_ver = FIT_PROTOCOL_MAJOR_VER(mfd);
3327
3328 if (skdev->proto_ver != FIT_PROTOCOL_VERSION_1) {
3329 pr_err("(%s): protocol mismatch\n",
3330 skdev->name);
3331 pr_err("(%s): got=%d support=%d\n",
3332 skdev->name, skdev->proto_ver,
3333 FIT_PROTOCOL_VERSION_1);
3334 pr_err("(%s): please upgrade driver\n",
3335 skdev->name);
3336 skdev->state = SKD_DRVR_STATE_PROTOCOL_MISMATCH;
3337 skd_soft_reset(skdev);
3338 break;
3339 }
3340 mtd = FIT_MXD_CONS(FIT_MTD_GET_CMDQ_DEPTH, 0, 0);
3341 SKD_WRITEL(skdev, mtd, FIT_MSG_TO_DEVICE);
3342 skdev->last_mtd = mtd;
3343 break;
3344
3345 case FIT_MTD_GET_CMDQ_DEPTH:
3346 skdev->dev_max_queue_depth = FIT_MXD_DATA(mfd);
3347 mtd = FIT_MXD_CONS(FIT_MTD_SET_COMPQ_DEPTH, 0,
3348 SKD_N_COMPLETION_ENTRY);
3349 SKD_WRITEL(skdev, mtd, FIT_MSG_TO_DEVICE);
3350 skdev->last_mtd = mtd;
3351 break;
3352
3353 case FIT_MTD_SET_COMPQ_DEPTH:
3354 SKD_WRITEQ(skdev, skdev->cq_dma_address, FIT_MSG_TO_DEVICE_ARG);
3355 mtd = FIT_MXD_CONS(FIT_MTD_SET_COMPQ_ADDR, 0, 0);
3356 SKD_WRITEL(skdev, mtd, FIT_MSG_TO_DEVICE);
3357 skdev->last_mtd = mtd;
3358 break;
3359
3360 case FIT_MTD_SET_COMPQ_ADDR:
3361 skd_reset_skcomp(skdev);
3362 mtd = FIT_MXD_CONS(FIT_MTD_CMD_LOG_HOST_ID, 0, skdev->devno);
3363 SKD_WRITEL(skdev, mtd, FIT_MSG_TO_DEVICE);
3364 skdev->last_mtd = mtd;
3365 break;
3366
3367 case FIT_MTD_CMD_LOG_HOST_ID:
3368 skdev->connect_time_stamp = get_seconds();
3369 data = skdev->connect_time_stamp & 0xFFFF;
3370 mtd = FIT_MXD_CONS(FIT_MTD_CMD_LOG_TIME_STAMP_LO, 0, data);
3371 SKD_WRITEL(skdev, mtd, FIT_MSG_TO_DEVICE);
3372 skdev->last_mtd = mtd;
3373 break;
3374
3375 case FIT_MTD_CMD_LOG_TIME_STAMP_LO:
3376 skdev->drive_jiffies = FIT_MXD_DATA(mfd);
3377 data = (skdev->connect_time_stamp >> 16) & 0xFFFF;
3378 mtd = FIT_MXD_CONS(FIT_MTD_CMD_LOG_TIME_STAMP_HI, 0, data);
3379 SKD_WRITEL(skdev, mtd, FIT_MSG_TO_DEVICE);
3380 skdev->last_mtd = mtd;
3381 break;
3382
3383 case FIT_MTD_CMD_LOG_TIME_STAMP_HI:
3384 skdev->drive_jiffies |= (FIT_MXD_DATA(mfd) << 16);
3385 mtd = FIT_MXD_CONS(FIT_MTD_ARM_QUEUE, 0, 0);
3386 SKD_WRITEL(skdev, mtd, FIT_MSG_TO_DEVICE);
3387 skdev->last_mtd = mtd;
3388
3389 pr_err("(%s): Time sync driver=0x%x device=0x%x\n",
3390 skd_name(skdev),
3391 skdev->connect_time_stamp, skdev->drive_jiffies);
3392 break;
3393
3394 case FIT_MTD_ARM_QUEUE:
3395 skdev->last_mtd = 0;
3396 /*
3397 * State should be, or soon will be, FIT_SR_DRIVE_ONLINE.
3398 */
3399 break;
3400
3401 default:
3402 break;
3403 }
3404}
3405
3406static void skd_disable_interrupts(struct skd_device *skdev)
3407{
3408 u32 sense;
3409
3410 sense = SKD_READL(skdev, FIT_CONTROL);
3411 sense &= ~FIT_CR_ENABLE_INTERRUPTS;
3412 SKD_WRITEL(skdev, sense, FIT_CONTROL);
3413 pr_debug("%s:%s:%d sense 0x%x\n",
3414 skdev->name, __func__, __LINE__, sense);
3415
3416 /* Note that the 1s is written. A 1-bit means
3417 * disable, a 0 means enable.
3418 */
3419 SKD_WRITEL(skdev, ~0, FIT_INT_MASK_HOST);
3420}
3421
3422static void skd_enable_interrupts(struct skd_device *skdev)
3423{
3424 u32 val;
3425
3426 /* unmask interrupts first */
3427 val = FIT_ISH_FW_STATE_CHANGE +
3428 FIT_ISH_COMPLETION_POSTED + FIT_ISH_MSG_FROM_DEV;
3429
3430 /* Note that the compliment of mask is written. A 1-bit means
3431 * disable, a 0 means enable. */
3432 SKD_WRITEL(skdev, ~val, FIT_INT_MASK_HOST);
3433 pr_debug("%s:%s:%d interrupt mask=0x%x\n",
3434 skdev->name, __func__, __LINE__, ~val);
3435
3436 val = SKD_READL(skdev, FIT_CONTROL);
3437 val |= FIT_CR_ENABLE_INTERRUPTS;
3438 pr_debug("%s:%s:%d control=0x%x\n",
3439 skdev->name, __func__, __LINE__, val);
3440 SKD_WRITEL(skdev, val, FIT_CONTROL);
3441}
3442
3443/*
3444 *****************************************************************************
3445 * START, STOP, RESTART, QUIESCE, UNQUIESCE
3446 *****************************************************************************
3447 */
3448
3449static void skd_soft_reset(struct skd_device *skdev)
3450{
3451 u32 val;
3452
3453 val = SKD_READL(skdev, FIT_CONTROL);
3454 val |= (FIT_CR_SOFT_RESET);
3455 pr_debug("%s:%s:%d control=0x%x\n",
3456 skdev->name, __func__, __LINE__, val);
3457 SKD_WRITEL(skdev, val, FIT_CONTROL);
3458}
3459
3460static void skd_start_device(struct skd_device *skdev)
3461{
3462 unsigned long flags;
3463 u32 sense;
3464 u32 state;
3465
3466 spin_lock_irqsave(&skdev->lock, flags);
3467
3468 /* ack all ghost interrupts */
3469 SKD_WRITEL(skdev, FIT_INT_DEF_MASK, FIT_INT_STATUS_HOST);
3470
3471 sense = SKD_READL(skdev, FIT_STATUS);
3472
3473 pr_debug("%s:%s:%d initial status=0x%x\n",
3474 skdev->name, __func__, __LINE__, sense);
3475
3476 state = sense & FIT_SR_DRIVE_STATE_MASK;
3477 skdev->drive_state = state;
3478 skdev->last_mtd = 0;
3479
3480 skdev->state = SKD_DRVR_STATE_STARTING;
3481 skdev->timer_countdown = SKD_STARTING_TIMO;
3482
3483 skd_enable_interrupts(skdev);
3484
3485 switch (skdev->drive_state) {
3486 case FIT_SR_DRIVE_OFFLINE:
3487 pr_err("(%s): Drive offline...\n", skd_name(skdev));
3488 break;
3489
3490 case FIT_SR_DRIVE_FW_BOOTING:
3491 pr_debug("%s:%s:%d FIT_SR_DRIVE_FW_BOOTING %s\n",
3492 skdev->name, __func__, __LINE__, skdev->name);
3493 skdev->state = SKD_DRVR_STATE_WAIT_BOOT;
3494 skdev->timer_countdown = SKD_WAIT_BOOT_TIMO;
3495 break;
3496
3497 case FIT_SR_DRIVE_BUSY_SANITIZE:
3498 pr_info("(%s): Start: BUSY_SANITIZE\n",
3499 skd_name(skdev));
3500 skdev->state = SKD_DRVR_STATE_BUSY_SANITIZE;
3501 skdev->timer_countdown = SKD_STARTED_BUSY_TIMO;
3502 break;
3503
3504 case FIT_SR_DRIVE_BUSY_ERASE:
3505 pr_info("(%s): Start: BUSY_ERASE\n", skd_name(skdev));
3506 skdev->state = SKD_DRVR_STATE_BUSY_ERASE;
3507 skdev->timer_countdown = SKD_STARTED_BUSY_TIMO;
3508 break;
3509
3510 case FIT_SR_DRIVE_INIT:
3511 case FIT_SR_DRIVE_ONLINE:
3512 skd_soft_reset(skdev);
3513 break;
3514
3515 case FIT_SR_DRIVE_BUSY:
3516 pr_err("(%s): Drive Busy...\n", skd_name(skdev));
3517 skdev->state = SKD_DRVR_STATE_BUSY;
3518 skdev->timer_countdown = SKD_STARTED_BUSY_TIMO;
3519 break;
3520
3521 case FIT_SR_DRIVE_SOFT_RESET:
3522 pr_err("(%s) drive soft reset in prog\n",
3523 skd_name(skdev));
3524 break;
3525
3526 case FIT_SR_DRIVE_FAULT:
3527 /* Fault state is bad...soft reset won't do it...
3528 * Hard reset, maybe, but does it work on device?
3529 * For now, just fault so the system doesn't hang.
3530 */
3531 skd_drive_fault(skdev);
3532 /*start the queue so we can respond with error to requests */
3533 pr_debug("%s:%s:%d starting %s queue\n",
3534 skdev->name, __func__, __LINE__, skdev->name);
3535 blk_start_queue(skdev->queue);
3536 skdev->gendisk_on = -1;
3537 wake_up_interruptible(&skdev->waitq);
3538 break;
3539
3540 case 0xFF:
3541 /* Most likely the device isn't there or isn't responding
3542 * to the BAR1 addresses. */
3543 skd_drive_disappeared(skdev);
3544 /*start the queue so we can respond with error to requests */
3545 pr_debug("%s:%s:%d starting %s queue to error-out reqs\n",
3546 skdev->name, __func__, __LINE__, skdev->name);
3547 blk_start_queue(skdev->queue);
3548 skdev->gendisk_on = -1;
3549 wake_up_interruptible(&skdev->waitq);
3550 break;
3551
3552 default:
3553 pr_err("(%s) Start: unknown state %x\n",
3554 skd_name(skdev), skdev->drive_state);
3555 break;
3556 }
3557
3558 state = SKD_READL(skdev, FIT_CONTROL);
3559 pr_debug("%s:%s:%d FIT Control Status=0x%x\n",
3560 skdev->name, __func__, __LINE__, state);
3561
3562 state = SKD_READL(skdev, FIT_INT_STATUS_HOST);
3563 pr_debug("%s:%s:%d Intr Status=0x%x\n",
3564 skdev->name, __func__, __LINE__, state);
3565
3566 state = SKD_READL(skdev, FIT_INT_MASK_HOST);
3567 pr_debug("%s:%s:%d Intr Mask=0x%x\n",
3568 skdev->name, __func__, __LINE__, state);
3569
3570 state = SKD_READL(skdev, FIT_MSG_FROM_DEVICE);
3571 pr_debug("%s:%s:%d Msg from Dev=0x%x\n",
3572 skdev->name, __func__, __LINE__, state);
3573
3574 state = SKD_READL(skdev, FIT_HW_VERSION);
3575 pr_debug("%s:%s:%d HW version=0x%x\n",
3576 skdev->name, __func__, __LINE__, state);
3577
3578 spin_unlock_irqrestore(&skdev->lock, flags);
3579}
3580
3581static void skd_stop_device(struct skd_device *skdev)
3582{
3583 unsigned long flags;
3584 struct skd_special_context *skspcl = &skdev->internal_skspcl;
3585 u32 dev_state;
3586 int i;
3587
3588 spin_lock_irqsave(&skdev->lock, flags);
3589
3590 if (skdev->state != SKD_DRVR_STATE_ONLINE) {
3591 pr_err("(%s): skd_stop_device not online no sync\n",
3592 skd_name(skdev));
3593 goto stop_out;
3594 }
3595
3596 if (skspcl->req.state != SKD_REQ_STATE_IDLE) {
3597 pr_err("(%s): skd_stop_device no special\n",
3598 skd_name(skdev));
3599 goto stop_out;
3600 }
3601
3602 skdev->state = SKD_DRVR_STATE_SYNCING;
3603 skdev->sync_done = 0;
3604
3605 skd_send_internal_skspcl(skdev, skspcl, SYNCHRONIZE_CACHE);
3606
3607 spin_unlock_irqrestore(&skdev->lock, flags);
3608
3609 wait_event_interruptible_timeout(skdev->waitq,
3610 (skdev->sync_done), (10 * HZ));
3611
3612 spin_lock_irqsave(&skdev->lock, flags);
3613
3614 switch (skdev->sync_done) {
3615 case 0:
3616 pr_err("(%s): skd_stop_device no sync\n",
3617 skd_name(skdev));
3618 break;
3619 case 1:
3620 pr_err("(%s): skd_stop_device sync done\n",
3621 skd_name(skdev));
3622 break;
3623 default:
3624 pr_err("(%s): skd_stop_device sync error\n",
3625 skd_name(skdev));
3626 }
3627
3628stop_out:
3629 skdev->state = SKD_DRVR_STATE_STOPPING;
3630 spin_unlock_irqrestore(&skdev->lock, flags);
3631
3632 skd_kill_timer(skdev);
3633
3634 spin_lock_irqsave(&skdev->lock, flags);
3635 skd_disable_interrupts(skdev);
3636
3637 /* ensure all ints on device are cleared */
3638 /* soft reset the device to unload with a clean slate */
3639 SKD_WRITEL(skdev, FIT_INT_DEF_MASK, FIT_INT_STATUS_HOST);
3640 SKD_WRITEL(skdev, FIT_CR_SOFT_RESET, FIT_CONTROL);
3641
3642 spin_unlock_irqrestore(&skdev->lock, flags);
3643
3644 /* poll every 100ms, 1 second timeout */
3645 for (i = 0; i < 10; i++) {
3646 dev_state =
3647 SKD_READL(skdev, FIT_STATUS) & FIT_SR_DRIVE_STATE_MASK;
3648 if (dev_state == FIT_SR_DRIVE_INIT)
3649 break;
3650 set_current_state(TASK_INTERRUPTIBLE);
3651 schedule_timeout(msecs_to_jiffies(100));
3652 }
3653
3654 if (dev_state != FIT_SR_DRIVE_INIT)
3655 pr_err("(%s): skd_stop_device state error 0x%02x\n",
3656 skd_name(skdev), dev_state);
3657}
3658
3659/* assume spinlock is held */
3660static void skd_restart_device(struct skd_device *skdev)
3661{
3662 u32 state;
3663
3664 /* ack all ghost interrupts */
3665 SKD_WRITEL(skdev, FIT_INT_DEF_MASK, FIT_INT_STATUS_HOST);
3666
3667 state = SKD_READL(skdev, FIT_STATUS);
3668
3669 pr_debug("%s:%s:%d drive status=0x%x\n",
3670 skdev->name, __func__, __LINE__, state);
3671
3672 state &= FIT_SR_DRIVE_STATE_MASK;
3673 skdev->drive_state = state;
3674 skdev->last_mtd = 0;
3675
3676 skdev->state = SKD_DRVR_STATE_RESTARTING;
3677 skdev->timer_countdown = SKD_RESTARTING_TIMO;
3678
3679 skd_soft_reset(skdev);
3680}
3681
3682/* assume spinlock is held */
3683static int skd_quiesce_dev(struct skd_device *skdev)
3684{
3685 int rc = 0;
3686
3687 switch (skdev->state) {
3688 case SKD_DRVR_STATE_BUSY:
3689 case SKD_DRVR_STATE_BUSY_IMMINENT:
3690 pr_debug("%s:%s:%d stopping %s queue\n",
3691 skdev->name, __func__, __LINE__, skdev->name);
3692 blk_stop_queue(skdev->queue);
3693 break;
3694 case SKD_DRVR_STATE_ONLINE:
3695 case SKD_DRVR_STATE_STOPPING:
3696 case SKD_DRVR_STATE_SYNCING:
3697 case SKD_DRVR_STATE_PAUSING:
3698 case SKD_DRVR_STATE_PAUSED:
3699 case SKD_DRVR_STATE_STARTING:
3700 case SKD_DRVR_STATE_RESTARTING:
3701 case SKD_DRVR_STATE_RESUMING:
3702 default:
3703 rc = -EINVAL;
3704 pr_debug("%s:%s:%d state [%d] not implemented\n",
3705 skdev->name, __func__, __LINE__, skdev->state);
3706 }
3707 return rc;
3708}
3709
3710/* assume spinlock is held */
3711static int skd_unquiesce_dev(struct skd_device *skdev)
3712{
3713 int prev_driver_state = skdev->state;
3714
3715 skd_log_skdev(skdev, "unquiesce");
3716 if (skdev->state == SKD_DRVR_STATE_ONLINE) {
3717 pr_debug("%s:%s:%d **** device already ONLINE\n",
3718 skdev->name, __func__, __LINE__);
3719 return 0;
3720 }
3721 if (skdev->drive_state != FIT_SR_DRIVE_ONLINE) {
3722 /*
3723 * If there has been an state change to other than
3724 * ONLINE, we will rely on controller state change
3725 * to come back online and restart the queue.
3726 * The BUSY state means that driver is ready to
3727 * continue normal processing but waiting for controller
3728 * to become available.
3729 */
3730 skdev->state = SKD_DRVR_STATE_BUSY;
3731 pr_debug("%s:%s:%d drive BUSY state\n",
3732 skdev->name, __func__, __LINE__);
3733 return 0;
3734 }
3735
3736 /*
3737 * Drive has just come online, driver is either in startup,
3738 * paused performing a task, or bust waiting for hardware.
3739 */
3740 switch (skdev->state) {
3741 case SKD_DRVR_STATE_PAUSED:
3742 case SKD_DRVR_STATE_BUSY:
3743 case SKD_DRVR_STATE_BUSY_IMMINENT:
3744 case SKD_DRVR_STATE_BUSY_ERASE:
3745 case SKD_DRVR_STATE_STARTING:
3746 case SKD_DRVR_STATE_RESTARTING:
3747 case SKD_DRVR_STATE_FAULT:
3748 case SKD_DRVR_STATE_IDLE:
3749 case SKD_DRVR_STATE_LOAD:
3750 skdev->state = SKD_DRVR_STATE_ONLINE;
3751 pr_err("(%s): Driver state %s(%d)=>%s(%d)\n",
3752 skd_name(skdev),
3753 skd_skdev_state_to_str(prev_driver_state),
3754 prev_driver_state, skd_skdev_state_to_str(skdev->state),
3755 skdev->state);
3756 pr_debug("%s:%s:%d **** device ONLINE...starting block queue\n",
3757 skdev->name, __func__, __LINE__);
3758 pr_debug("%s:%s:%d starting %s queue\n",
3759 skdev->name, __func__, __LINE__, skdev->name);
3760 pr_info("(%s): STEC s1120 ONLINE\n", skd_name(skdev));
3761 blk_start_queue(skdev->queue);
3762 skdev->gendisk_on = 1;
3763 wake_up_interruptible(&skdev->waitq);
3764 break;
3765
3766 case SKD_DRVR_STATE_DISAPPEARED:
3767 default:
3768 pr_debug("%s:%s:%d **** driver state %d, not implemented \n",
3769 skdev->name, __func__, __LINE__,
3770 skdev->state);
3771 return -EBUSY;
3772 }
3773 return 0;
3774}
3775
3776/*
3777 *****************************************************************************
3778 * PCIe MSI/MSI-X INTERRUPT HANDLERS
3779 *****************************************************************************
3780 */
3781
3782static irqreturn_t skd_reserved_isr(int irq, void *skd_host_data)
3783{
3784 struct skd_device *skdev = skd_host_data;
3785 unsigned long flags;
3786
3787 spin_lock_irqsave(&skdev->lock, flags);
3788 pr_debug("%s:%s:%d MSIX = 0x%x\n",
3789 skdev->name, __func__, __LINE__,
3790 SKD_READL(skdev, FIT_INT_STATUS_HOST));
3791 pr_err("(%s): MSIX reserved irq %d = 0x%x\n", skd_name(skdev),
3792 irq, SKD_READL(skdev, FIT_INT_STATUS_HOST));
3793 SKD_WRITEL(skdev, FIT_INT_RESERVED_MASK, FIT_INT_STATUS_HOST);
3794 spin_unlock_irqrestore(&skdev->lock, flags);
3795 return IRQ_HANDLED;
3796}
3797
3798static irqreturn_t skd_statec_isr(int irq, void *skd_host_data)
3799{
3800 struct skd_device *skdev = skd_host_data;
3801 unsigned long flags;
3802
3803 spin_lock_irqsave(&skdev->lock, flags);
3804 pr_debug("%s:%s:%d MSIX = 0x%x\n",
3805 skdev->name, __func__, __LINE__,
3806 SKD_READL(skdev, FIT_INT_STATUS_HOST));
3807 SKD_WRITEL(skdev, FIT_ISH_FW_STATE_CHANGE, FIT_INT_STATUS_HOST);
3808 skd_isr_fwstate(skdev);
3809 spin_unlock_irqrestore(&skdev->lock, flags);
3810 return IRQ_HANDLED;
3811}
3812
3813static irqreturn_t skd_comp_q(int irq, void *skd_host_data)
3814{
3815 struct skd_device *skdev = skd_host_data;
3816 unsigned long flags;
3817 int flush_enqueued = 0;
3818 int deferred;
3819
3820 spin_lock_irqsave(&skdev->lock, flags);
3821 pr_debug("%s:%s:%d MSIX = 0x%x\n",
3822 skdev->name, __func__, __LINE__,
3823 SKD_READL(skdev, FIT_INT_STATUS_HOST));
3824 SKD_WRITEL(skdev, FIT_ISH_COMPLETION_POSTED, FIT_INT_STATUS_HOST);
3825 deferred = skd_isr_completion_posted(skdev, skd_isr_comp_limit,
3826 &flush_enqueued);
3827 if (flush_enqueued)
3828 skd_request_fn(skdev->queue);
3829
3830 if (deferred)
3831 schedule_work(&skdev->completion_worker);
3832 else if (!flush_enqueued)
3833 skd_request_fn(skdev->queue);
3834
3835 spin_unlock_irqrestore(&skdev->lock, flags);
3836
3837 return IRQ_HANDLED;
3838}
3839
3840static irqreturn_t skd_msg_isr(int irq, void *skd_host_data)
3841{
3842 struct skd_device *skdev = skd_host_data;
3843 unsigned long flags;
3844
3845 spin_lock_irqsave(&skdev->lock, flags);
3846 pr_debug("%s:%s:%d MSIX = 0x%x\n",
3847 skdev->name, __func__, __LINE__,
3848 SKD_READL(skdev, FIT_INT_STATUS_HOST));
3849 SKD_WRITEL(skdev, FIT_ISH_MSG_FROM_DEV, FIT_INT_STATUS_HOST);
3850 skd_isr_msg_from_dev(skdev);
3851 spin_unlock_irqrestore(&skdev->lock, flags);
3852 return IRQ_HANDLED;
3853}
3854
3855static irqreturn_t skd_qfull_isr(int irq, void *skd_host_data)
3856{
3857 struct skd_device *skdev = skd_host_data;
3858 unsigned long flags;
3859
3860 spin_lock_irqsave(&skdev->lock, flags);
3861 pr_debug("%s:%s:%d MSIX = 0x%x\n",
3862 skdev->name, __func__, __LINE__,
3863 SKD_READL(skdev, FIT_INT_STATUS_HOST));
3864 SKD_WRITEL(skdev, FIT_INT_QUEUE_FULL, FIT_INT_STATUS_HOST);
3865 spin_unlock_irqrestore(&skdev->lock, flags);
3866 return IRQ_HANDLED;
3867}
3868
3869/*
3870 *****************************************************************************
3871 * PCIe MSI/MSI-X SETUP
3872 *****************************************************************************
3873 */
3874
3875struct skd_msix_entry {
3876 int have_irq;
3877 u32 vector;
3878 u32 entry;
3879 struct skd_device *rsp;
3880 char isr_name[30];
3881};
3882
3883struct skd_init_msix_entry {
3884 const char *name;
3885 irq_handler_t handler;
3886};
3887
3888#define SKD_MAX_MSIX_COUNT 13
3889#define SKD_MIN_MSIX_COUNT 7
3890#define SKD_BASE_MSIX_IRQ 4
3891
3892static struct skd_init_msix_entry msix_entries[SKD_MAX_MSIX_COUNT] = {
3893 { "(DMA 0)", skd_reserved_isr },
3894 { "(DMA 1)", skd_reserved_isr },
3895 { "(DMA 2)", skd_reserved_isr },
3896 { "(DMA 3)", skd_reserved_isr },
3897 { "(State Change)", skd_statec_isr },
3898 { "(COMPL_Q)", skd_comp_q },
3899 { "(MSG)", skd_msg_isr },
3900 { "(Reserved)", skd_reserved_isr },
3901 { "(Reserved)", skd_reserved_isr },
3902 { "(Queue Full 0)", skd_qfull_isr },
3903 { "(Queue Full 1)", skd_qfull_isr },
3904 { "(Queue Full 2)", skd_qfull_isr },
3905 { "(Queue Full 3)", skd_qfull_isr },
3906};
3907
3908static void skd_release_msix(struct skd_device *skdev)
3909{
3910 struct skd_msix_entry *qentry;
3911 int i;
3912
3913 if (skdev->msix_entries == NULL)
3914 return;
3915 for (i = 0; i < skdev->msix_count; i++) {
3916 qentry = &skdev->msix_entries[i];
3917 skdev = qentry->rsp;
3918
3919 if (qentry->have_irq)
3920 devm_free_irq(&skdev->pdev->dev,
3921 qentry->vector, qentry->rsp);
3922 }
3923 pci_disable_msix(skdev->pdev);
3924 kfree(skdev->msix_entries);
3925 skdev->msix_count = 0;
3926 skdev->msix_entries = NULL;
3927}
3928
3929static int skd_acquire_msix(struct skd_device *skdev)
3930{
3931 int i, rc;
3932 struct pci_dev *pdev;
3933 struct msix_entry *entries = NULL;
3934 struct skd_msix_entry *qentry;
3935
3936 pdev = skdev->pdev;
3937 skdev->msix_count = SKD_MAX_MSIX_COUNT;
3938 entries = kzalloc(sizeof(struct msix_entry) * SKD_MAX_MSIX_COUNT,
3939 GFP_KERNEL);
3940 if (!entries)
3941 return -ENOMEM;
3942
3943 for (i = 0; i < SKD_MAX_MSIX_COUNT; i++)
3944 entries[i].entry = i;
3945
3946 rc = pci_enable_msix(pdev, entries, SKD_MAX_MSIX_COUNT);
3947 if (rc < 0)
3948 goto msix_out;
3949 if (rc) {
3950 if (rc < SKD_MIN_MSIX_COUNT) {
3951 pr_err("(%s): failed to enable MSI-X %d\n",
3952 skd_name(skdev), rc);
3953 goto msix_out;
3954 }
3955 pr_debug("%s:%s:%d %s: <%s> allocated %d MSI-X vectors\n",
3956 skdev->name, __func__, __LINE__,
3957 pci_name(pdev), skdev->name, rc);
3958
3959 skdev->msix_count = rc;
3960 rc = pci_enable_msix(pdev, entries, skdev->msix_count);
3961 if (rc) {
3962 pr_err("(%s): failed to enable MSI-X "
3963 "support (%d) %d\n",
3964 skd_name(skdev), skdev->msix_count, rc);
3965 goto msix_out;
3966 }
3967 }
3968 skdev->msix_entries = kzalloc(sizeof(struct skd_msix_entry) *
3969 skdev->msix_count, GFP_KERNEL);
3970 if (!skdev->msix_entries) {
3971 rc = -ENOMEM;
3972 skdev->msix_count = 0;
3973 pr_err("(%s): msix table allocation error\n",
3974 skd_name(skdev));
3975 goto msix_out;
3976 }
3977
3978 qentry = skdev->msix_entries;
3979 for (i = 0; i < skdev->msix_count; i++) {
3980 qentry->vector = entries[i].vector;
3981 qentry->entry = entries[i].entry;
3982 qentry->rsp = NULL;
3983 qentry->have_irq = 0;
3984 pr_debug("%s:%s:%d %s: <%s> msix (%d) vec %d, entry %x\n",
3985 skdev->name, __func__, __LINE__,
3986 pci_name(pdev), skdev->name,
3987 i, qentry->vector, qentry->entry);
3988 qentry++;
3989 }
3990
3991 /* Enable MSI-X vectors for the base queue */
3992 for (i = 0; i < SKD_MAX_MSIX_COUNT; i++) {
3993 qentry = &skdev->msix_entries[i];
3994 snprintf(qentry->isr_name, sizeof(qentry->isr_name),
3995 "%s%d-msix %s", DRV_NAME, skdev->devno,
3996 msix_entries[i].name);
3997 rc = devm_request_irq(&skdev->pdev->dev, qentry->vector,
3998 msix_entries[i].handler, 0,
3999 qentry->isr_name, skdev);
4000 if (rc) {
4001 pr_err("(%s): Unable to register(%d) MSI-X "
4002 "handler %d: %s\n",
4003 skd_name(skdev), rc, i, qentry->isr_name);
4004 goto msix_out;
4005 } else {
4006 qentry->have_irq = 1;
4007 qentry->rsp = skdev;
4008 }
4009 }
4010 pr_debug("%s:%s:%d %s: <%s> msix %d irq(s) enabled\n",
4011 skdev->name, __func__, __LINE__,
4012 pci_name(pdev), skdev->name, skdev->msix_count);
4013 return 0;
4014
4015msix_out:
4016 if (entries)
4017 kfree(entries);
4018 skd_release_msix(skdev);
4019 return rc;
4020}
4021
4022static int skd_acquire_irq(struct skd_device *skdev)
4023{
4024 int rc;
4025 struct pci_dev *pdev;
4026
4027 pdev = skdev->pdev;
4028 skdev->msix_count = 0;
4029
4030RETRY_IRQ_TYPE:
4031 switch (skdev->irq_type) {
4032 case SKD_IRQ_MSIX:
4033 rc = skd_acquire_msix(skdev);
4034 if (!rc)
4035 pr_info("(%s): MSI-X %d irqs enabled\n",
4036 skd_name(skdev), skdev->msix_count);
4037 else {
4038 pr_err(
4039 "(%s): failed to enable MSI-X, re-trying with MSI %d\n",
4040 skd_name(skdev), rc);
4041 skdev->irq_type = SKD_IRQ_MSI;
4042 goto RETRY_IRQ_TYPE;
4043 }
4044 break;
4045 case SKD_IRQ_MSI:
4046 snprintf(skdev->isr_name, sizeof(skdev->isr_name), "%s%d-msi",
4047 DRV_NAME, skdev->devno);
4048 rc = pci_enable_msi(pdev);
4049 if (!rc) {
4050 rc = devm_request_irq(&pdev->dev, pdev->irq, skd_isr, 0,
4051 skdev->isr_name, skdev);
4052 if (rc) {
4053 pci_disable_msi(pdev);
4054 pr_err(
4055 "(%s): failed to allocate the MSI interrupt %d\n",
4056 skd_name(skdev), rc);
4057 goto RETRY_IRQ_LEGACY;
4058 }
4059 pr_info("(%s): MSI irq %d enabled\n",
4060 skd_name(skdev), pdev->irq);
4061 } else {
4062RETRY_IRQ_LEGACY:
4063 pr_err(
4064 "(%s): failed to enable MSI, re-trying with LEGACY %d\n",
4065 skd_name(skdev), rc);
4066 skdev->irq_type = SKD_IRQ_LEGACY;
4067 goto RETRY_IRQ_TYPE;
4068 }
4069 break;
4070 case SKD_IRQ_LEGACY:
4071 snprintf(skdev->isr_name, sizeof(skdev->isr_name),
4072 "%s%d-legacy", DRV_NAME, skdev->devno);
4073 rc = devm_request_irq(&pdev->dev, pdev->irq, skd_isr,
4074 IRQF_SHARED, skdev->isr_name, skdev);
4075 if (!rc)
4076 pr_info("(%s): LEGACY irq %d enabled\n",
4077 skd_name(skdev), pdev->irq);
4078 else
4079 pr_err("(%s): request LEGACY irq error %d\n",
4080 skd_name(skdev), rc);
4081 break;
4082 default:
4083 pr_info("(%s): irq_type %d invalid, re-set to %d\n",
4084 skd_name(skdev), skdev->irq_type, SKD_IRQ_DEFAULT);
4085 skdev->irq_type = SKD_IRQ_LEGACY;
4086 goto RETRY_IRQ_TYPE;
4087 }
4088 return rc;
4089}
4090
4091static void skd_release_irq(struct skd_device *skdev)
4092{
4093 switch (skdev->irq_type) {
4094 case SKD_IRQ_MSIX:
4095 skd_release_msix(skdev);
4096 break;
4097 case SKD_IRQ_MSI:
4098 devm_free_irq(&skdev->pdev->dev, skdev->pdev->irq, skdev);
4099 pci_disable_msi(skdev->pdev);
4100 break;
4101 case SKD_IRQ_LEGACY:
4102 devm_free_irq(&skdev->pdev->dev, skdev->pdev->irq, skdev);
4103 break;
4104 default:
4105 pr_err("(%s): wrong irq type %d!",
4106 skd_name(skdev), skdev->irq_type);
4107 break;
4108 }
4109}
4110
4111/*
4112 *****************************************************************************
4113 * CONSTRUCT
4114 *****************************************************************************
4115 */
4116
4117static int skd_cons_skcomp(struct skd_device *skdev)
4118{
4119 int rc = 0;
4120 struct fit_completion_entry_v1 *skcomp;
4121 u32 nbytes;
4122
4123 nbytes = sizeof(*skcomp) * SKD_N_COMPLETION_ENTRY;
4124 nbytes += sizeof(struct fit_comp_error_info) * SKD_N_COMPLETION_ENTRY;
4125
4126 pr_debug("%s:%s:%d comp pci_alloc, total bytes %d entries %d\n",
4127 skdev->name, __func__, __LINE__,
4128 nbytes, SKD_N_COMPLETION_ENTRY);
4129
4130 skcomp = pci_alloc_consistent(skdev->pdev, nbytes,
4131 &skdev->cq_dma_address);
4132
4133 if (skcomp == NULL) {
4134 rc = -ENOMEM;
4135 goto err_out;
4136 }
4137
4138 memset(skcomp, 0, nbytes);
4139
4140 skdev->skcomp_table = skcomp;
4141 skdev->skerr_table = (struct fit_comp_error_info *)((char *)skcomp +
4142 sizeof(*skcomp) *
4143 SKD_N_COMPLETION_ENTRY);
4144
4145err_out:
4146 return rc;
4147}
4148
4149static int skd_cons_skmsg(struct skd_device *skdev)
4150{
4151 int rc = 0;
4152 u32 i;
4153
4154 pr_debug("%s:%s:%d skmsg_table kzalloc, struct %lu, count %u total %lu\n",
4155 skdev->name, __func__, __LINE__,
4156 sizeof(struct skd_fitmsg_context),
4157 skdev->num_fitmsg_context,
4158 sizeof(struct skd_fitmsg_context) * skdev->num_fitmsg_context);
4159
4160 skdev->skmsg_table = kzalloc(sizeof(struct skd_fitmsg_context)
4161 *skdev->num_fitmsg_context, GFP_KERNEL);
4162 if (skdev->skmsg_table == NULL) {
4163 rc = -ENOMEM;
4164 goto err_out;
4165 }
4166
4167 for (i = 0; i < skdev->num_fitmsg_context; i++) {
4168 struct skd_fitmsg_context *skmsg;
4169
4170 skmsg = &skdev->skmsg_table[i];
4171
4172 skmsg->id = i + SKD_ID_FIT_MSG;
4173
4174 skmsg->state = SKD_MSG_STATE_IDLE;
4175 skmsg->msg_buf = pci_alloc_consistent(skdev->pdev,
4176 SKD_N_FITMSG_BYTES + 64,
4177 &skmsg->mb_dma_address);
4178
4179 if (skmsg->msg_buf == NULL) {
4180 rc = -ENOMEM;
4181 goto err_out;
4182 }
4183
4184 skmsg->offset = (u32)((u64)skmsg->msg_buf &
4185 (~FIT_QCMD_BASE_ADDRESS_MASK));
4186 skmsg->msg_buf += ~FIT_QCMD_BASE_ADDRESS_MASK;
4187 skmsg->msg_buf = (u8 *)((u64)skmsg->msg_buf &
4188 FIT_QCMD_BASE_ADDRESS_MASK);
4189 skmsg->mb_dma_address += ~FIT_QCMD_BASE_ADDRESS_MASK;
4190 skmsg->mb_dma_address &= FIT_QCMD_BASE_ADDRESS_MASK;
4191 memset(skmsg->msg_buf, 0, SKD_N_FITMSG_BYTES);
4192
4193 skmsg->next = &skmsg[1];
4194 }
4195
4196 /* Free list is in order starting with the 0th entry. */
4197 skdev->skmsg_table[i - 1].next = NULL;
4198 skdev->skmsg_free_list = skdev->skmsg_table;
4199
4200err_out:
4201 return rc;
4202}
4203
4204static struct fit_sg_descriptor *skd_cons_sg_list(struct skd_device *skdev,
4205 u32 n_sg,
4206 dma_addr_t *ret_dma_addr)
4207{
4208 struct fit_sg_descriptor *sg_list;
4209 u32 nbytes;
4210
4211 nbytes = sizeof(*sg_list) * n_sg;
4212
4213 sg_list = pci_alloc_consistent(skdev->pdev, nbytes, ret_dma_addr);
4214
4215 if (sg_list != NULL) {
4216 uint64_t dma_address = *ret_dma_addr;
4217 u32 i;
4218
4219 memset(sg_list, 0, nbytes);
4220
4221 for (i = 0; i < n_sg - 1; i++) {
4222 uint64_t ndp_off;
4223 ndp_off = (i + 1) * sizeof(struct fit_sg_descriptor);
4224
4225 sg_list[i].next_desc_ptr = dma_address + ndp_off;
4226 }
4227 sg_list[i].next_desc_ptr = 0LL;
4228 }
4229
4230 return sg_list;
4231}
4232
4233static int skd_cons_skreq(struct skd_device *skdev)
4234{
4235 int rc = 0;
4236 u32 i;
4237
4238 pr_debug("%s:%s:%d skreq_table kzalloc, struct %lu, count %u total %lu\n",
4239 skdev->name, __func__, __LINE__,
4240 sizeof(struct skd_request_context),
4241 skdev->num_req_context,
4242 sizeof(struct skd_request_context) * skdev->num_req_context);
4243
4244 skdev->skreq_table = kzalloc(sizeof(struct skd_request_context)
4245 * skdev->num_req_context, GFP_KERNEL);
4246 if (skdev->skreq_table == NULL) {
4247 rc = -ENOMEM;
4248 goto err_out;
4249 }
4250
4251 pr_debug("%s:%s:%d alloc sg_table sg_per_req %u scatlist %lu total %lu\n",
4252 skdev->name, __func__, __LINE__,
4253 skdev->sgs_per_request, sizeof(struct scatterlist),
4254 skdev->sgs_per_request * sizeof(struct scatterlist));
4255
4256 for (i = 0; i < skdev->num_req_context; i++) {
4257 struct skd_request_context *skreq;
4258
4259 skreq = &skdev->skreq_table[i];
4260
4261 skreq->id = i + SKD_ID_RW_REQUEST;
4262 skreq->state = SKD_REQ_STATE_IDLE;
4263
4264 skreq->sg = kzalloc(sizeof(struct scatterlist) *
4265 skdev->sgs_per_request, GFP_KERNEL);
4266 if (skreq->sg == NULL) {
4267 rc = -ENOMEM;
4268 goto err_out;
4269 }
4270 sg_init_table(skreq->sg, skdev->sgs_per_request);
4271
4272 skreq->sksg_list = skd_cons_sg_list(skdev,
4273 skdev->sgs_per_request,
4274 &skreq->sksg_dma_address);
4275
4276 if (skreq->sksg_list == NULL) {
4277 rc = -ENOMEM;
4278 goto err_out;
4279 }
4280
4281 skreq->next = &skreq[1];
4282 }
4283
4284 /* Free list is in order starting with the 0th entry. */
4285 skdev->skreq_table[i - 1].next = NULL;
4286 skdev->skreq_free_list = skdev->skreq_table;
4287
4288err_out:
4289 return rc;
4290}
4291
4292static int skd_cons_skspcl(struct skd_device *skdev)
4293{
4294 int rc = 0;
4295 u32 i, nbytes;
4296
4297 pr_debug("%s:%s:%d skspcl_table kzalloc, struct %lu, count %u total %lu\n",
4298 skdev->name, __func__, __LINE__,
4299 sizeof(struct skd_special_context),
4300 skdev->n_special,
4301 sizeof(struct skd_special_context) * skdev->n_special);
4302
4303 skdev->skspcl_table = kzalloc(sizeof(struct skd_special_context)
4304 * skdev->n_special, GFP_KERNEL);
4305 if (skdev->skspcl_table == NULL) {
4306 rc = -ENOMEM;
4307 goto err_out;
4308 }
4309
4310 for (i = 0; i < skdev->n_special; i++) {
4311 struct skd_special_context *skspcl;
4312
4313 skspcl = &skdev->skspcl_table[i];
4314
4315 skspcl->req.id = i + SKD_ID_SPECIAL_REQUEST;
4316 skspcl->req.state = SKD_REQ_STATE_IDLE;
4317
4318 skspcl->req.next = &skspcl[1].req;
4319
4320 nbytes = SKD_N_SPECIAL_FITMSG_BYTES;
4321
4322 skspcl->msg_buf = pci_alloc_consistent(skdev->pdev, nbytes,
4323 &skspcl->mb_dma_address);
4324 if (skspcl->msg_buf == NULL) {
4325 rc = -ENOMEM;
4326 goto err_out;
4327 }
4328
4329 memset(skspcl->msg_buf, 0, nbytes);
4330
4331 skspcl->req.sg = kzalloc(sizeof(struct scatterlist) *
4332 SKD_N_SG_PER_SPECIAL, GFP_KERNEL);
4333 if (skspcl->req.sg == NULL) {
4334 rc = -ENOMEM;
4335 goto err_out;
4336 }
4337
4338 skspcl->req.sksg_list = skd_cons_sg_list(skdev,
4339 SKD_N_SG_PER_SPECIAL,
4340 &skspcl->req.
4341 sksg_dma_address);
4342 if (skspcl->req.sksg_list == NULL) {
4343 rc = -ENOMEM;
4344 goto err_out;
4345 }
4346 }
4347
4348 /* Free list is in order starting with the 0th entry. */
4349 skdev->skspcl_table[i - 1].req.next = NULL;
4350 skdev->skspcl_free_list = skdev->skspcl_table;
4351
4352 return rc;
4353
4354err_out:
4355 return rc;
4356}
4357
4358static int skd_cons_sksb(struct skd_device *skdev)
4359{
4360 int rc = 0;
4361 struct skd_special_context *skspcl;
4362 u32 nbytes;
4363
4364 skspcl = &skdev->internal_skspcl;
4365
4366 skspcl->req.id = 0 + SKD_ID_INTERNAL;
4367 skspcl->req.state = SKD_REQ_STATE_IDLE;
4368
4369 nbytes = SKD_N_INTERNAL_BYTES;
4370
4371 skspcl->data_buf = pci_alloc_consistent(skdev->pdev, nbytes,
4372 &skspcl->db_dma_address);
4373 if (skspcl->data_buf == NULL) {
4374 rc = -ENOMEM;
4375 goto err_out;
4376 }
4377
4378 memset(skspcl->data_buf, 0, nbytes);
4379
4380 nbytes = SKD_N_SPECIAL_FITMSG_BYTES;
4381 skspcl->msg_buf = pci_alloc_consistent(skdev->pdev, nbytes,
4382 &skspcl->mb_dma_address);
4383 if (skspcl->msg_buf == NULL) {
4384 rc = -ENOMEM;
4385 goto err_out;
4386 }
4387
4388 memset(skspcl->msg_buf, 0, nbytes);
4389
4390 skspcl->req.sksg_list = skd_cons_sg_list(skdev, 1,
4391 &skspcl->req.sksg_dma_address);
4392 if (skspcl->req.sksg_list == NULL) {
4393 rc = -ENOMEM;
4394 goto err_out;
4395 }
4396
4397 if (!skd_format_internal_skspcl(skdev)) {
4398 rc = -EINVAL;
4399 goto err_out;
4400 }
4401
4402err_out:
4403 return rc;
4404}
4405
4406static int skd_cons_disk(struct skd_device *skdev)
4407{
4408 int rc = 0;
4409 struct gendisk *disk;
4410 struct request_queue *q;
4411 unsigned long flags;
4412
4413 disk = alloc_disk(SKD_MINORS_PER_DEVICE);
4414 if (!disk) {
4415 rc = -ENOMEM;
4416 goto err_out;
4417 }
4418
4419 skdev->disk = disk;
4420 sprintf(disk->disk_name, DRV_NAME "%u", skdev->devno);
4421
4422 disk->major = skdev->major;
4423 disk->first_minor = skdev->devno * SKD_MINORS_PER_DEVICE;
4424 disk->fops = &skd_blockdev_ops;
4425 disk->private_data = skdev;
4426
4427 q = blk_init_queue(skd_request_fn, &skdev->lock);
4428 if (!q) {
4429 rc = -ENOMEM;
4430 goto err_out;
4431 }
4432
4433 skdev->queue = q;
4434 disk->queue = q;
4435 q->queuedata = skdev;
4436
4437 blk_queue_flush(q, REQ_FLUSH | REQ_FUA);
4438 blk_queue_max_segments(q, skdev->sgs_per_request);
4439 blk_queue_max_hw_sectors(q, SKD_N_MAX_SECTORS);
4440
4441 /* set sysfs ptimal_io_size to 8K */
4442 blk_queue_io_opt(q, 8192);
4443
4444 /* DISCARD Flag initialization. */
4445 q->limits.discard_granularity = 8192;
4446 q->limits.discard_alignment = 0;
4447 q->limits.max_discard_sectors = UINT_MAX >> 9;
4448 q->limits.discard_zeroes_data = 1;
4449 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
4450 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
4451
4452 spin_lock_irqsave(&skdev->lock, flags);
4453 pr_debug("%s:%s:%d stopping %s queue\n",
4454 skdev->name, __func__, __LINE__, skdev->name);
4455 blk_stop_queue(skdev->queue);
4456 spin_unlock_irqrestore(&skdev->lock, flags);
4457
4458err_out:
4459 return rc;
4460}
4461
4462#define SKD_N_DEV_TABLE 16u
4463static u32 skd_next_devno;
4464
4465static struct skd_device *skd_construct(struct pci_dev *pdev)
4466{
4467 struct skd_device *skdev;
4468 int blk_major = skd_major;
4469 int rc;
4470
4471 skdev = kzalloc(sizeof(*skdev), GFP_KERNEL);
4472
4473 if (!skdev) {
4474 pr_err(PFX "(%s): memory alloc failure\n",
4475 pci_name(pdev));
4476 return NULL;
4477 }
4478
4479 skdev->state = SKD_DRVR_STATE_LOAD;
4480 skdev->pdev = pdev;
4481 skdev->devno = skd_next_devno++;
4482 skdev->major = blk_major;
4483 skdev->irq_type = skd_isr_type;
4484 sprintf(skdev->name, DRV_NAME "%d", skdev->devno);
4485 skdev->dev_max_queue_depth = 0;
4486
4487 skdev->num_req_context = skd_max_queue_depth;
4488 skdev->num_fitmsg_context = skd_max_queue_depth;
4489 skdev->n_special = skd_max_pass_thru;
4490 skdev->cur_max_queue_depth = 1;
4491 skdev->queue_low_water_mark = 1;
4492 skdev->proto_ver = 99;
4493 skdev->sgs_per_request = skd_sgs_per_request;
4494 skdev->dbg_level = skd_dbg_level;
4495
4496 atomic_set(&skdev->device_count, 0);
4497
4498 spin_lock_init(&skdev->lock);
4499
4500 INIT_WORK(&skdev->completion_worker, skd_completion_worker);
4501
4502 pr_debug("%s:%s:%d skcomp\n", skdev->name, __func__, __LINE__);
4503 rc = skd_cons_skcomp(skdev);
4504 if (rc < 0)
4505 goto err_out;
4506
4507 pr_debug("%s:%s:%d skmsg\n", skdev->name, __func__, __LINE__);
4508 rc = skd_cons_skmsg(skdev);
4509 if (rc < 0)
4510 goto err_out;
4511
4512 pr_debug("%s:%s:%d skreq\n", skdev->name, __func__, __LINE__);
4513 rc = skd_cons_skreq(skdev);
4514 if (rc < 0)
4515 goto err_out;
4516
4517 pr_debug("%s:%s:%d skspcl\n", skdev->name, __func__, __LINE__);
4518 rc = skd_cons_skspcl(skdev);
4519 if (rc < 0)
4520 goto err_out;
4521
4522 pr_debug("%s:%s:%d sksb\n", skdev->name, __func__, __LINE__);
4523 rc = skd_cons_sksb(skdev);
4524 if (rc < 0)
4525 goto err_out;
4526
4527 pr_debug("%s:%s:%d disk\n", skdev->name, __func__, __LINE__);
4528 rc = skd_cons_disk(skdev);
4529 if (rc < 0)
4530 goto err_out;
4531
4532 pr_debug("%s:%s:%d VICTORY\n", skdev->name, __func__, __LINE__);
4533 return skdev;
4534
4535err_out:
4536 pr_debug("%s:%s:%d construct failed\n",
4537 skdev->name, __func__, __LINE__);
4538 skd_destruct(skdev);
4539 return NULL;
4540}
4541
4542/*
4543 *****************************************************************************
4544 * DESTRUCT (FREE)
4545 *****************************************************************************
4546 */
4547
4548static void skd_free_skcomp(struct skd_device *skdev)
4549{
4550 if (skdev->skcomp_table != NULL) {
4551 u32 nbytes;
4552
4553 nbytes = sizeof(skdev->skcomp_table[0]) *
4554 SKD_N_COMPLETION_ENTRY;
4555 pci_free_consistent(skdev->pdev, nbytes,
4556 skdev->skcomp_table, skdev->cq_dma_address);
4557 }
4558
4559 skdev->skcomp_table = NULL;
4560 skdev->cq_dma_address = 0;
4561}
4562
4563static void skd_free_skmsg(struct skd_device *skdev)
4564{
4565 u32 i;
4566
4567 if (skdev->skmsg_table == NULL)
4568 return;
4569
4570 for (i = 0; i < skdev->num_fitmsg_context; i++) {
4571 struct skd_fitmsg_context *skmsg;
4572
4573 skmsg = &skdev->skmsg_table[i];
4574
4575 if (skmsg->msg_buf != NULL) {
4576 skmsg->msg_buf += skmsg->offset;
4577 skmsg->mb_dma_address += skmsg->offset;
4578 pci_free_consistent(skdev->pdev, SKD_N_FITMSG_BYTES,
4579 skmsg->msg_buf,
4580 skmsg->mb_dma_address);
4581 }
4582 skmsg->msg_buf = NULL;
4583 skmsg->mb_dma_address = 0;
4584 }
4585
4586 kfree(skdev->skmsg_table);
4587 skdev->skmsg_table = NULL;
4588}
4589
4590static void skd_free_sg_list(struct skd_device *skdev,
4591 struct fit_sg_descriptor *sg_list,
4592 u32 n_sg, dma_addr_t dma_addr)
4593{
4594 if (sg_list != NULL) {
4595 u32 nbytes;
4596
4597 nbytes = sizeof(*sg_list) * n_sg;
4598
4599 pci_free_consistent(skdev->pdev, nbytes, sg_list, dma_addr);
4600 }
4601}
4602
4603static void skd_free_skreq(struct skd_device *skdev)
4604{
4605 u32 i;
4606
4607 if (skdev->skreq_table == NULL)
4608 return;
4609
4610 for (i = 0; i < skdev->num_req_context; i++) {
4611 struct skd_request_context *skreq;
4612
4613 skreq = &skdev->skreq_table[i];
4614
4615 skd_free_sg_list(skdev, skreq->sksg_list,
4616 skdev->sgs_per_request,
4617 skreq->sksg_dma_address);
4618
4619 skreq->sksg_list = NULL;
4620 skreq->sksg_dma_address = 0;
4621
4622 kfree(skreq->sg);
4623 }
4624
4625 kfree(skdev->skreq_table);
4626 skdev->skreq_table = NULL;
4627}
4628
4629static void skd_free_skspcl(struct skd_device *skdev)
4630{
4631 u32 i;
4632 u32 nbytes;
4633
4634 if (skdev->skspcl_table == NULL)
4635 return;
4636
4637 for (i = 0; i < skdev->n_special; i++) {
4638 struct skd_special_context *skspcl;
4639
4640 skspcl = &skdev->skspcl_table[i];
4641
4642 if (skspcl->msg_buf != NULL) {
4643 nbytes = SKD_N_SPECIAL_FITMSG_BYTES;
4644 pci_free_consistent(skdev->pdev, nbytes,
4645 skspcl->msg_buf,
4646 skspcl->mb_dma_address);
4647 }
4648
4649 skspcl->msg_buf = NULL;
4650 skspcl->mb_dma_address = 0;
4651
4652 skd_free_sg_list(skdev, skspcl->req.sksg_list,
4653 SKD_N_SG_PER_SPECIAL,
4654 skspcl->req.sksg_dma_address);
4655
4656 skspcl->req.sksg_list = NULL;
4657 skspcl->req.sksg_dma_address = 0;
4658
4659 kfree(skspcl->req.sg);
4660 }
4661
4662 kfree(skdev->skspcl_table);
4663 skdev->skspcl_table = NULL;
4664}
4665
4666static void skd_free_sksb(struct skd_device *skdev)
4667{
4668 struct skd_special_context *skspcl;
4669 u32 nbytes;
4670
4671 skspcl = &skdev->internal_skspcl;
4672
4673 if (skspcl->data_buf != NULL) {
4674 nbytes = SKD_N_INTERNAL_BYTES;
4675
4676 pci_free_consistent(skdev->pdev, nbytes,
4677 skspcl->data_buf, skspcl->db_dma_address);
4678 }
4679
4680 skspcl->data_buf = NULL;
4681 skspcl->db_dma_address = 0;
4682
4683 if (skspcl->msg_buf != NULL) {
4684 nbytes = SKD_N_SPECIAL_FITMSG_BYTES;
4685 pci_free_consistent(skdev->pdev, nbytes,
4686 skspcl->msg_buf, skspcl->mb_dma_address);
4687 }
4688
4689 skspcl->msg_buf = NULL;
4690 skspcl->mb_dma_address = 0;
4691
4692 skd_free_sg_list(skdev, skspcl->req.sksg_list, 1,
4693 skspcl->req.sksg_dma_address);
4694
4695 skspcl->req.sksg_list = NULL;
4696 skspcl->req.sksg_dma_address = 0;
4697}
4698
4699static void skd_free_disk(struct skd_device *skdev)
4700{
4701 struct gendisk *disk = skdev->disk;
4702
4703 if (disk != NULL) {
4704 struct request_queue *q = disk->queue;
4705
4706 if (disk->flags & GENHD_FL_UP)
4707 del_gendisk(disk);
4708 if (q)
4709 blk_cleanup_queue(q);
4710 put_disk(disk);
4711 }
4712 skdev->disk = NULL;
4713}
4714
4715static void skd_destruct(struct skd_device *skdev)
4716{
4717 if (skdev == NULL)
4718 return;
4719
4720
4721 pr_debug("%s:%s:%d disk\n", skdev->name, __func__, __LINE__);
4722 skd_free_disk(skdev);
4723
4724 pr_debug("%s:%s:%d sksb\n", skdev->name, __func__, __LINE__);
4725 skd_free_sksb(skdev);
4726
4727 pr_debug("%s:%s:%d skspcl\n", skdev->name, __func__, __LINE__);
4728 skd_free_skspcl(skdev);
4729
4730 pr_debug("%s:%s:%d skreq\n", skdev->name, __func__, __LINE__);
4731 skd_free_skreq(skdev);
4732
4733 pr_debug("%s:%s:%d skmsg\n", skdev->name, __func__, __LINE__);
4734 skd_free_skmsg(skdev);
4735
4736 pr_debug("%s:%s:%d skcomp\n", skdev->name, __func__, __LINE__);
4737 skd_free_skcomp(skdev);
4738
4739 pr_debug("%s:%s:%d skdev\n", skdev->name, __func__, __LINE__);
4740 kfree(skdev);
4741}
4742
4743/*
4744 *****************************************************************************
4745 * BLOCK DEVICE (BDEV) GLUE
4746 *****************************************************************************
4747 */
4748
4749static int skd_bdev_getgeo(struct block_device *bdev, struct hd_geometry *geo)
4750{
4751 struct skd_device *skdev;
4752 u64 capacity;
4753
4754 skdev = bdev->bd_disk->private_data;
4755
4756 pr_debug("%s:%s:%d %s: CMD[%s] getgeo device\n",
4757 skdev->name, __func__, __LINE__,
4758 bdev->bd_disk->disk_name, current->comm);
4759
4760 if (skdev->read_cap_is_valid) {
4761 capacity = get_capacity(skdev->disk);
4762 geo->heads = 64;
4763 geo->sectors = 255;
4764 geo->cylinders = (capacity) / (255 * 64);
4765
4766 return 0;
4767 }
4768 return -EIO;
4769}
4770
4771static int skd_bdev_attach(struct skd_device *skdev)
4772{
4773 pr_debug("%s:%s:%d add_disk\n", skdev->name, __func__, __LINE__);
4774 add_disk(skdev->disk);
4775 return 0;
4776}
4777
4778static const struct block_device_operations skd_blockdev_ops = {
4779 .owner = THIS_MODULE,
4780 .ioctl = skd_bdev_ioctl,
4781 .getgeo = skd_bdev_getgeo,
4782};
4783
4784
4785/*
4786 *****************************************************************************
4787 * PCIe DRIVER GLUE
4788 *****************************************************************************
4789 */
4790
4791static DEFINE_PCI_DEVICE_TABLE(skd_pci_tbl) = {
4792 { PCI_VENDOR_ID_STEC, PCI_DEVICE_ID_S1120,
4793 PCI_ANY_ID, PCI_ANY_ID, 0, 0, },
4794 { 0 } /* terminate list */
4795};
4796
4797MODULE_DEVICE_TABLE(pci, skd_pci_tbl);
4798
4799static char *skd_pci_info(struct skd_device *skdev, char *str)
4800{
4801 int pcie_reg;
4802
4803 strcpy(str, "PCIe (");
4804 pcie_reg = pci_find_capability(skdev->pdev, PCI_CAP_ID_EXP);
4805
4806 if (pcie_reg) {
4807
4808 char lwstr[6];
4809 uint16_t pcie_lstat, lspeed, lwidth;
4810
4811 pcie_reg += 0x12;
4812 pci_read_config_word(skdev->pdev, pcie_reg, &pcie_lstat);
4813 lspeed = pcie_lstat & (0xF);
4814 lwidth = (pcie_lstat & 0x3F0) >> 4;
4815
4816 if (lspeed == 1)
4817 strcat(str, "2.5GT/s ");
4818 else if (lspeed == 2)
4819 strcat(str, "5.0GT/s ");
4820 else
4821 strcat(str, "<unknown> ");
4822 snprintf(lwstr, sizeof(lwstr), "%dX)", lwidth);
4823 strcat(str, lwstr);
4824 }
4825 return str;
4826}
4827
4828static int skd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
4829{
4830 int i;
4831 int rc = 0;
4832 char pci_str[32];
4833 struct skd_device *skdev;
4834
4835 pr_info("STEC s1120 Driver(%s) version %s-b%s\n",
4836 DRV_NAME, DRV_VERSION, DRV_BUILD_ID);
4837 pr_info("(skd?:??:[%s]): vendor=%04X device=%04x\n",
4838 pci_name(pdev), pdev->vendor, pdev->device);
4839
4840 rc = pci_enable_device(pdev);
4841 if (rc)
4842 return rc;
4843 rc = pci_request_regions(pdev, DRV_NAME);
4844 if (rc)
4845 goto err_out;
4846 rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
4847 if (!rc) {
4848 if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) {
4849
4850 pr_err("(%s): consistent DMA mask error %d\n",
4851 pci_name(pdev), rc);
4852 }
4853 } else {
4854 (rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)));
4855 if (rc) {
4856
4857 pr_err("(%s): DMA mask error %d\n",
4858 pci_name(pdev), rc);
4859 goto err_out_regions;
4860 }
4861 }
4862
4863 if (!skd_major) {
4864 rc = register_blkdev(0, DRV_NAME);
4865 if (rc < 0)
4866 goto err_out_regions;
4867 BUG_ON(!rc);
4868 skd_major = rc;
4869 }
4870
4871 skdev = skd_construct(pdev);
4872 if (skdev == NULL) {
4873 rc = -ENOMEM;
4874 goto err_out_regions;
4875 }
4876
4877 skd_pci_info(skdev, pci_str);
4878 pr_info("(%s): %s 64bit\n", skd_name(skdev), pci_str);
4879
4880 pci_set_master(pdev);
4881 rc = pci_enable_pcie_error_reporting(pdev);
4882 if (rc) {
4883 pr_err(
4884 "(%s): bad enable of PCIe error reporting rc=%d\n",
4885 skd_name(skdev), rc);
4886 skdev->pcie_error_reporting_is_enabled = 0;
4887 } else
4888 skdev->pcie_error_reporting_is_enabled = 1;
4889
4890
4891 pci_set_drvdata(pdev, skdev);
4892
4893 skdev->disk->driverfs_dev = &pdev->dev;
4894
4895 for (i = 0; i < SKD_MAX_BARS; i++) {
4896 skdev->mem_phys[i] = pci_resource_start(pdev, i);
4897 skdev->mem_size[i] = (u32)pci_resource_len(pdev, i);
4898 skdev->mem_map[i] = ioremap(skdev->mem_phys[i],
4899 skdev->mem_size[i]);
4900 if (!skdev->mem_map[i]) {
4901 pr_err("(%s): Unable to map adapter memory!\n",
4902 skd_name(skdev));
4903 rc = -ENODEV;
4904 goto err_out_iounmap;
4905 }
4906 pr_debug("%s:%s:%d mem_map=%p, phyd=%016llx, size=%d\n",
4907 skdev->name, __func__, __LINE__,
4908 skdev->mem_map[i],
4909 (uint64_t)skdev->mem_phys[i], skdev->mem_size[i]);
4910 }
4911
4912 rc = skd_acquire_irq(skdev);
4913 if (rc) {
4914 pr_err("(%s): interrupt resource error %d\n",
4915 skd_name(skdev), rc);
4916 goto err_out_iounmap;
4917 }
4918
4919 rc = skd_start_timer(skdev);
4920 if (rc)
4921 goto err_out_timer;
4922
4923 init_waitqueue_head(&skdev->waitq);
4924
4925 skd_start_device(skdev);
4926
4927 rc = wait_event_interruptible_timeout(skdev->waitq,
4928 (skdev->gendisk_on),
4929 (SKD_START_WAIT_SECONDS * HZ));
4930 if (skdev->gendisk_on > 0) {
4931 /* device came on-line after reset */
4932 skd_bdev_attach(skdev);
4933 rc = 0;
4934 } else {
4935 /* we timed out, something is wrong with the device,
4936 don't add the disk structure */
4937 pr_err(
4938 "(%s): error: waiting for s1120 timed out %d!\n",
4939 skd_name(skdev), rc);
4940 /* in case of no error; we timeout with ENXIO */
4941 if (!rc)
4942 rc = -ENXIO;
4943 goto err_out_timer;
4944 }
4945
4946
4947#ifdef SKD_VMK_POLL_HANDLER
4948 if (skdev->irq_type == SKD_IRQ_MSIX) {
4949 /* MSIX completion handler is being used for coredump */
4950 vmklnx_scsi_register_poll_handler(skdev->scsi_host,
4951 skdev->msix_entries[5].vector,
4952 skd_comp_q, skdev);
4953 } else {
4954 vmklnx_scsi_register_poll_handler(skdev->scsi_host,
4955 skdev->pdev->irq, skd_isr,
4956 skdev);
4957 }
4958#endif /* SKD_VMK_POLL_HANDLER */
4959
4960 return rc;
4961
4962err_out_timer:
4963 skd_stop_device(skdev);
4964 skd_release_irq(skdev);
4965
4966err_out_iounmap:
4967 for (i = 0; i < SKD_MAX_BARS; i++)
4968 if (skdev->mem_map[i])
4969 iounmap(skdev->mem_map[i]);
4970
4971 if (skdev->pcie_error_reporting_is_enabled)
4972 pci_disable_pcie_error_reporting(pdev);
4973
4974 skd_destruct(skdev);
4975
4976err_out_regions:
4977 pci_release_regions(pdev);
4978
4979err_out:
4980 pci_disable_device(pdev);
4981 pci_set_drvdata(pdev, NULL);
4982 return rc;
4983}
4984
4985static void skd_pci_remove(struct pci_dev *pdev)
4986{
4987 int i;
4988 struct skd_device *skdev;
4989
4990 skdev = pci_get_drvdata(pdev);
4991 if (!skdev) {
4992 pr_err("%s: no device data for PCI\n", pci_name(pdev));
4993 return;
4994 }
4995 skd_stop_device(skdev);
4996 skd_release_irq(skdev);
4997
4998 for (i = 0; i < SKD_MAX_BARS; i++)
4999 if (skdev->mem_map[i])
5000 iounmap((u32 *)skdev->mem_map[i]);
5001
5002 if (skdev->pcie_error_reporting_is_enabled)
5003 pci_disable_pcie_error_reporting(pdev);
5004
5005 skd_destruct(skdev);
5006
5007 pci_release_regions(pdev);
5008 pci_disable_device(pdev);
5009 pci_set_drvdata(pdev, NULL);
5010
5011 return;
5012}
5013
5014static int skd_pci_suspend(struct pci_dev *pdev, pm_message_t state)
5015{
5016 int i;
5017 struct skd_device *skdev;
5018
5019 skdev = pci_get_drvdata(pdev);
5020 if (!skdev) {
5021 pr_err("%s: no device data for PCI\n", pci_name(pdev));
5022 return -EIO;
5023 }
5024
5025 skd_stop_device(skdev);
5026
5027 skd_release_irq(skdev);
5028
5029 for (i = 0; i < SKD_MAX_BARS; i++)
5030 if (skdev->mem_map[i])
5031 iounmap((u32 *)skdev->mem_map[i]);
5032
5033 if (skdev->pcie_error_reporting_is_enabled)
5034 pci_disable_pcie_error_reporting(pdev);
5035
5036 pci_release_regions(pdev);
5037 pci_save_state(pdev);
5038 pci_disable_device(pdev);
5039 pci_set_power_state(pdev, pci_choose_state(pdev, state));
5040 return 0;
5041}
5042
5043static int skd_pci_resume(struct pci_dev *pdev)
5044{
5045 int i;
5046 int rc = 0;
5047 struct skd_device *skdev;
5048
5049 skdev = pci_get_drvdata(pdev);
5050 if (!skdev) {
5051 pr_err("%s: no device data for PCI\n", pci_name(pdev));
5052 return -1;
5053 }
5054
5055 pci_set_power_state(pdev, PCI_D0);
5056 pci_enable_wake(pdev, PCI_D0, 0);
5057 pci_restore_state(pdev);
5058
5059 rc = pci_enable_device(pdev);
5060 if (rc)
5061 return rc;
5062 rc = pci_request_regions(pdev, DRV_NAME);
5063 if (rc)
5064 goto err_out;
5065 rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
5066 if (!rc) {
5067 if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) {
5068
5069 pr_err("(%s): consistent DMA mask error %d\n",
5070 pci_name(pdev), rc);
5071 }
5072 } else {
5073 rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
5074 if (rc) {
5075
5076 pr_err("(%s): DMA mask error %d\n",
5077 pci_name(pdev), rc);
5078 goto err_out_regions;
5079 }
5080 }
5081
5082 pci_set_master(pdev);
5083 rc = pci_enable_pcie_error_reporting(pdev);
5084 if (rc) {
5085 pr_err("(%s): bad enable of PCIe error reporting rc=%d\n",
5086 skdev->name, rc);
5087 skdev->pcie_error_reporting_is_enabled = 0;
5088 } else
5089 skdev->pcie_error_reporting_is_enabled = 1;
5090
5091 for (i = 0; i < SKD_MAX_BARS; i++) {
5092
5093 skdev->mem_phys[i] = pci_resource_start(pdev, i);
5094 skdev->mem_size[i] = (u32)pci_resource_len(pdev, i);
5095 skdev->mem_map[i] = ioremap(skdev->mem_phys[i],
5096 skdev->mem_size[i]);
5097 if (!skdev->mem_map[i]) {
5098 pr_err("(%s): Unable to map adapter memory!\n",
5099 skd_name(skdev));
5100 rc = -ENODEV;
5101 goto err_out_iounmap;
5102 }
5103 pr_debug("%s:%s:%d mem_map=%p, phyd=%016llx, size=%d\n",
5104 skdev->name, __func__, __LINE__,
5105 skdev->mem_map[i],
5106 (uint64_t)skdev->mem_phys[i], skdev->mem_size[i]);
5107 }
5108 rc = skd_acquire_irq(skdev);
5109 if (rc) {
5110
5111 pr_err("(%s): interrupt resource error %d\n",
5112 pci_name(pdev), rc);
5113 goto err_out_iounmap;
5114 }
5115
5116 rc = skd_start_timer(skdev);
5117 if (rc)
5118 goto err_out_timer;
5119
5120 init_waitqueue_head(&skdev->waitq);
5121
5122 skd_start_device(skdev);
5123
5124 return rc;
5125
5126err_out_timer:
5127 skd_stop_device(skdev);
5128 skd_release_irq(skdev);
5129
5130err_out_iounmap:
5131 for (i = 0; i < SKD_MAX_BARS; i++)
5132 if (skdev->mem_map[i])
5133 iounmap(skdev->mem_map[i]);
5134
5135 if (skdev->pcie_error_reporting_is_enabled)
5136 pci_disable_pcie_error_reporting(pdev);
5137
5138err_out_regions:
5139 pci_release_regions(pdev);
5140
5141err_out:
5142 pci_disable_device(pdev);
5143 return rc;
5144}
5145
5146static void skd_pci_shutdown(struct pci_dev *pdev)
5147{
5148 struct skd_device *skdev;
5149
5150 pr_err("skd_pci_shutdown called\n");
5151
5152 skdev = pci_get_drvdata(pdev);
5153 if (!skdev) {
5154 pr_err("%s: no device data for PCI\n", pci_name(pdev));
5155 return;
5156 }
5157
5158 pr_err("%s: calling stop\n", skd_name(skdev));
5159 skd_stop_device(skdev);
5160}
5161
5162static struct pci_driver skd_driver = {
5163 .name = DRV_NAME,
5164 .id_table = skd_pci_tbl,
5165 .probe = skd_pci_probe,
5166 .remove = skd_pci_remove,
5167 .suspend = skd_pci_suspend,
5168 .resume = skd_pci_resume,
5169 .shutdown = skd_pci_shutdown,
5170};
5171
5172/*
5173 *****************************************************************************
5174 * LOGGING SUPPORT
5175 *****************************************************************************
5176 */
5177
5178static const char *skd_name(struct skd_device *skdev)
5179{
5180 memset(skdev->id_str, 0, sizeof(skdev->id_str));
5181
5182 if (skdev->inquiry_is_valid)
5183 snprintf(skdev->id_str, sizeof(skdev->id_str), "%s:%s:[%s]",
5184 skdev->name, skdev->inq_serial_num,
5185 pci_name(skdev->pdev));
5186 else
5187 snprintf(skdev->id_str, sizeof(skdev->id_str), "%s:??:[%s]",
5188 skdev->name, pci_name(skdev->pdev));
5189
5190 return skdev->id_str;
5191}
5192
5193const char *skd_drive_state_to_str(int state)
5194{
5195 switch (state) {
5196 case FIT_SR_DRIVE_OFFLINE:
5197 return "OFFLINE";
5198 case FIT_SR_DRIVE_INIT:
5199 return "INIT";
5200 case FIT_SR_DRIVE_ONLINE:
5201 return "ONLINE";
5202 case FIT_SR_DRIVE_BUSY:
5203 return "BUSY";
5204 case FIT_SR_DRIVE_FAULT:
5205 return "FAULT";
5206 case FIT_SR_DRIVE_DEGRADED:
5207 return "DEGRADED";
5208 case FIT_SR_PCIE_LINK_DOWN:
5209 return "INK_DOWN";
5210 case FIT_SR_DRIVE_SOFT_RESET:
5211 return "SOFT_RESET";
5212 case FIT_SR_DRIVE_NEED_FW_DOWNLOAD:
5213 return "NEED_FW";
5214 case FIT_SR_DRIVE_INIT_FAULT:
5215 return "INIT_FAULT";
5216 case FIT_SR_DRIVE_BUSY_SANITIZE:
5217 return "BUSY_SANITIZE";
5218 case FIT_SR_DRIVE_BUSY_ERASE:
5219 return "BUSY_ERASE";
5220 case FIT_SR_DRIVE_FW_BOOTING:
5221 return "FW_BOOTING";
5222 default:
5223 return "???";
5224 }
5225}
5226
5227const char *skd_skdev_state_to_str(enum skd_drvr_state state)
5228{
5229 switch (state) {
5230 case SKD_DRVR_STATE_LOAD:
5231 return "LOAD";
5232 case SKD_DRVR_STATE_IDLE:
5233 return "IDLE";
5234 case SKD_DRVR_STATE_BUSY:
5235 return "BUSY";
5236 case SKD_DRVR_STATE_STARTING:
5237 return "STARTING";
5238 case SKD_DRVR_STATE_ONLINE:
5239 return "ONLINE";
5240 case SKD_DRVR_STATE_PAUSING:
5241 return "PAUSING";
5242 case SKD_DRVR_STATE_PAUSED:
5243 return "PAUSED";
5244 case SKD_DRVR_STATE_DRAINING_TIMEOUT:
5245 return "DRAINING_TIMEOUT";
5246 case SKD_DRVR_STATE_RESTARTING:
5247 return "RESTARTING";
5248 case SKD_DRVR_STATE_RESUMING:
5249 return "RESUMING";
5250 case SKD_DRVR_STATE_STOPPING:
5251 return "STOPPING";
5252 case SKD_DRVR_STATE_SYNCING:
5253 return "SYNCING";
5254 case SKD_DRVR_STATE_FAULT:
5255 return "FAULT";
5256 case SKD_DRVR_STATE_DISAPPEARED:
5257 return "DISAPPEARED";
5258 case SKD_DRVR_STATE_BUSY_ERASE:
5259 return "BUSY_ERASE";
5260 case SKD_DRVR_STATE_BUSY_SANITIZE:
5261 return "BUSY_SANITIZE";
5262 case SKD_DRVR_STATE_BUSY_IMMINENT:
5263 return "BUSY_IMMINENT";
5264 case SKD_DRVR_STATE_WAIT_BOOT:
5265 return "WAIT_BOOT";
5266
5267 default:
5268 return "???";
5269 }
5270}
5271
5272const char *skd_skmsg_state_to_str(enum skd_fit_msg_state state)
5273{
5274 switch (state) {
5275 case SKD_MSG_STATE_IDLE:
5276 return "IDLE";
5277 case SKD_MSG_STATE_BUSY:
5278 return "BUSY";
5279 default:
5280 return "???";
5281 }
5282}
5283
5284const char *skd_skreq_state_to_str(enum skd_req_state state)
5285{
5286 switch (state) {
5287 case SKD_REQ_STATE_IDLE:
5288 return "IDLE";
5289 case SKD_REQ_STATE_SETUP:
5290 return "SETUP";
5291 case SKD_REQ_STATE_BUSY:
5292 return "BUSY";
5293 case SKD_REQ_STATE_COMPLETED:
5294 return "COMPLETED";
5295 case SKD_REQ_STATE_TIMEOUT:
5296 return "TIMEOUT";
5297 case SKD_REQ_STATE_ABORTED:
5298 return "ABORTED";
5299 default:
5300 return "???";
5301 }
5302}
5303
5304static void skd_log_skdev(struct skd_device *skdev, const char *event)
5305{
5306 pr_debug("%s:%s:%d (%s) skdev=%p event='%s'\n",
5307 skdev->name, __func__, __LINE__, skdev->name, skdev, event);
5308 pr_debug("%s:%s:%d drive_state=%s(%d) driver_state=%s(%d)\n",
5309 skdev->name, __func__, __LINE__,
5310 skd_drive_state_to_str(skdev->drive_state), skdev->drive_state,
5311 skd_skdev_state_to_str(skdev->state), skdev->state);
5312 pr_debug("%s:%s:%d busy=%d limit=%d dev=%d lowat=%d\n",
5313 skdev->name, __func__, __LINE__,
5314 skdev->in_flight, skdev->cur_max_queue_depth,
5315 skdev->dev_max_queue_depth, skdev->queue_low_water_mark);
5316 pr_debug("%s:%s:%d timestamp=0x%x cycle=%d cycle_ix=%d\n",
5317 skdev->name, __func__, __LINE__,
5318 skdev->timeout_stamp, skdev->skcomp_cycle, skdev->skcomp_ix);
5319}
5320
5321static void skd_log_skmsg(struct skd_device *skdev,
5322 struct skd_fitmsg_context *skmsg, const char *event)
5323{
5324 pr_debug("%s:%s:%d (%s) skmsg=%p event='%s'\n",
5325 skdev->name, __func__, __LINE__, skdev->name, skmsg, event);
5326 pr_debug("%s:%s:%d state=%s(%d) id=0x%04x length=%d\n",
5327 skdev->name, __func__, __LINE__,
5328 skd_skmsg_state_to_str(skmsg->state), skmsg->state,
5329 skmsg->id, skmsg->length);
5330}
5331
5332static void skd_log_skreq(struct skd_device *skdev,
5333 struct skd_request_context *skreq, const char *event)
5334{
5335 pr_debug("%s:%s:%d (%s) skreq=%p event='%s'\n",
5336 skdev->name, __func__, __LINE__, skdev->name, skreq, event);
5337 pr_debug("%s:%s:%d state=%s(%d) id=0x%04x fitmsg=0x%04x\n",
5338 skdev->name, __func__, __LINE__,
5339 skd_skreq_state_to_str(skreq->state), skreq->state,
5340 skreq->id, skreq->fitmsg_id);
5341 pr_debug("%s:%s:%d timo=0x%x sg_dir=%d n_sg=%d\n",
5342 skdev->name, __func__, __LINE__,
5343 skreq->timeout_stamp, skreq->sg_data_dir, skreq->n_sg);
5344
5345 if (skreq->req != NULL) {
5346 struct request *req = skreq->req;
5347 u32 lba = (u32)blk_rq_pos(req);
5348 u32 count = blk_rq_sectors(req);
5349
5350 pr_debug("%s:%s:%d "
5351 "req=%p lba=%u(0x%x) count=%u(0x%x) dir=%d\n",
5352 skdev->name, __func__, __LINE__,
5353 req, lba, lba, count, count,
5354 (int)rq_data_dir(req));
5355 } else
5356 pr_debug("%s:%s:%d req=NULL\n",
5357 skdev->name, __func__, __LINE__);
5358}
5359
5360/*
5361 *****************************************************************************
5362 * MODULE GLUE
5363 *****************************************************************************
5364 */
5365
5366static int __init skd_init(void)
5367{
5368 pr_info(PFX " v%s-b%s loaded\n", DRV_VERSION, DRV_BUILD_ID);
5369
5370 switch (skd_isr_type) {
5371 case SKD_IRQ_LEGACY:
5372 case SKD_IRQ_MSI:
5373 case SKD_IRQ_MSIX:
5374 break;
5375 default:
5376 pr_err(PFX "skd_isr_type %d invalid, re-set to %d\n",
5377 skd_isr_type, SKD_IRQ_DEFAULT);
5378 skd_isr_type = SKD_IRQ_DEFAULT;
5379 }
5380
5381 if (skd_max_queue_depth < 1 ||
5382 skd_max_queue_depth > SKD_MAX_QUEUE_DEPTH) {
5383 pr_err(PFX "skd_max_queue_depth %d invalid, re-set to %d\n",
5384 skd_max_queue_depth, SKD_MAX_QUEUE_DEPTH_DEFAULT);
5385 skd_max_queue_depth = SKD_MAX_QUEUE_DEPTH_DEFAULT;
5386 }
5387
5388 if (skd_max_req_per_msg < 1 || skd_max_req_per_msg > 14) {
5389 pr_err(PFX "skd_max_req_per_msg %d invalid, re-set to %d\n",
5390 skd_max_req_per_msg, SKD_MAX_REQ_PER_MSG_DEFAULT);
5391 skd_max_req_per_msg = SKD_MAX_REQ_PER_MSG_DEFAULT;
5392 }
5393
5394 if (skd_sgs_per_request < 1 || skd_sgs_per_request > 4096) {
5395 pr_err(PFX "skd_sg_per_request %d invalid, re-set to %d\n",
5396 skd_sgs_per_request, SKD_N_SG_PER_REQ_DEFAULT);
5397 skd_sgs_per_request = SKD_N_SG_PER_REQ_DEFAULT;
5398 }
5399
5400 if (skd_dbg_level < 0 || skd_dbg_level > 2) {
5401 pr_err(PFX "skd_dbg_level %d invalid, re-set to %d\n",
5402 skd_dbg_level, 0);
5403 skd_dbg_level = 0;
5404 }
5405
5406 if (skd_isr_comp_limit < 0) {
5407 pr_err(PFX "skd_isr_comp_limit %d invalid, set to %d\n",
5408 skd_isr_comp_limit, 0);
5409 skd_isr_comp_limit = 0;
5410 }
5411
5412 if (skd_max_pass_thru < 1 || skd_max_pass_thru > 50) {
5413 pr_err(PFX "skd_max_pass_thru %d invalid, re-set to %d\n",
5414 skd_max_pass_thru, SKD_N_SPECIAL_CONTEXT);
5415 skd_max_pass_thru = SKD_N_SPECIAL_CONTEXT;
5416 }
5417
5418 return pci_register_driver(&skd_driver);
5419}
5420
5421static void __exit skd_exit(void)
5422{
5423 pr_info(PFX " v%s-b%s unloading\n", DRV_VERSION, DRV_BUILD_ID);
5424
5425 pci_unregister_driver(&skd_driver);
5426
5427 if (skd_major)
5428 unregister_blkdev(skd_major, DRV_NAME);
5429}
5430
5431module_init(skd_init);
5432module_exit(skd_exit);
diff --git a/drivers/block/skd_s1120.h b/drivers/block/skd_s1120.h
new file mode 100644
index 000000000000..61c757ff0161
--- /dev/null
+++ b/drivers/block/skd_s1120.h
@@ -0,0 +1,330 @@
1/* Copyright 2012 STEC, Inc.
2 *
3 * This file is licensed under the terms of the 3-clause
4 * BSD License (http://opensource.org/licenses/BSD-3-Clause)
5 * or the GNU GPL-2.0 (http://www.gnu.org/licenses/gpl-2.0.html),
6 * at your option. Both licenses are also available in the LICENSE file
7 * distributed with this project. This file may not be copied, modified,
8 * or distributed except in accordance with those terms.
9 */
10
11
12#ifndef SKD_S1120_H
13#define SKD_S1120_H
14
15#pragma pack(push, s1120_h, 1)
16
17/*
18 * Q-channel, 64-bit r/w
19 */
20#define FIT_Q_COMMAND 0x400u
21#define FIT_QCMD_QID_MASK (0x3 << 1)
22#define FIT_QCMD_QID0 (0x0 << 1)
23#define FIT_QCMD_QID_NORMAL FIT_QCMD_QID0
24#define FIT_QCMD_QID1 (0x1 << 1)
25#define FIT_QCMD_QID2 (0x2 << 1)
26#define FIT_QCMD_QID3 (0x3 << 1)
27#define FIT_QCMD_FLUSH_QUEUE (0ull) /* add QID */
28#define FIT_QCMD_MSGSIZE_MASK (0x3 << 4)
29#define FIT_QCMD_MSGSIZE_64 (0x0 << 4)
30#define FIT_QCMD_MSGSIZE_128 (0x1 << 4)
31#define FIT_QCMD_MSGSIZE_256 (0x2 << 4)
32#define FIT_QCMD_MSGSIZE_512 (0x3 << 4)
33#define FIT_QCMD_BASE_ADDRESS_MASK (0xFFFFFFFFFFFFFFC0ull)
34
35/*
36 * Control, 32-bit r/w
37 */
38#define FIT_CONTROL 0x500u
39#define FIT_CR_HARD_RESET (1u << 0u)
40#define FIT_CR_SOFT_RESET (1u << 1u)
41#define FIT_CR_DIS_TIMESTAMPS (1u << 6u)
42#define FIT_CR_ENABLE_INTERRUPTS (1u << 7u)
43
44/*
45 * Status, 32-bit, r/o
46 */
47#define FIT_STATUS 0x510u
48#define FIT_SR_DRIVE_STATE_MASK 0x000000FFu
49#define FIT_SR_SIGNATURE (0xFF << 8)
50#define FIT_SR_PIO_DMA (1 << 16)
51#define FIT_SR_DRIVE_OFFLINE 0x00
52#define FIT_SR_DRIVE_INIT 0x01
53/* #define FIT_SR_DRIVE_READY 0x02 */
54#define FIT_SR_DRIVE_ONLINE 0x03
55#define FIT_SR_DRIVE_BUSY 0x04
56#define FIT_SR_DRIVE_FAULT 0x05
57#define FIT_SR_DRIVE_DEGRADED 0x06
58#define FIT_SR_PCIE_LINK_DOWN 0x07
59#define FIT_SR_DRIVE_SOFT_RESET 0x08
60#define FIT_SR_DRIVE_INIT_FAULT 0x09
61#define FIT_SR_DRIVE_BUSY_SANITIZE 0x0A
62#define FIT_SR_DRIVE_BUSY_ERASE 0x0B
63#define FIT_SR_DRIVE_FW_BOOTING 0x0C
64#define FIT_SR_DRIVE_NEED_FW_DOWNLOAD 0xFE
65#define FIT_SR_DEVICE_MISSING 0xFF
66#define FIT_SR__RESERVED 0xFFFFFF00u
67
68/*
69 * FIT_STATUS - Status register data definition
70 */
71#define FIT_SR_STATE_MASK (0xFF << 0)
72#define FIT_SR_SIGNATURE (0xFF << 8)
73#define FIT_SR_PIO_DMA (1 << 16)
74
75/*
76 * Interrupt status, 32-bit r/w1c (w1c ==> write 1 to clear)
77 */
78#define FIT_INT_STATUS_HOST 0x520u
79#define FIT_ISH_FW_STATE_CHANGE (1u << 0u)
80#define FIT_ISH_COMPLETION_POSTED (1u << 1u)
81#define FIT_ISH_MSG_FROM_DEV (1u << 2u)
82#define FIT_ISH_UNDEFINED_3 (1u << 3u)
83#define FIT_ISH_UNDEFINED_4 (1u << 4u)
84#define FIT_ISH_Q0_FULL (1u << 5u)
85#define FIT_ISH_Q1_FULL (1u << 6u)
86#define FIT_ISH_Q2_FULL (1u << 7u)
87#define FIT_ISH_Q3_FULL (1u << 8u)
88#define FIT_ISH_QCMD_FIFO_OVERRUN (1u << 9u)
89#define FIT_ISH_BAD_EXP_ROM_READ (1u << 10u)
90
91#define FIT_INT_DEF_MASK \
92 (FIT_ISH_FW_STATE_CHANGE | \
93 FIT_ISH_COMPLETION_POSTED | \
94 FIT_ISH_MSG_FROM_DEV | \
95 FIT_ISH_Q0_FULL | \
96 FIT_ISH_Q1_FULL | \
97 FIT_ISH_Q2_FULL | \
98 FIT_ISH_Q3_FULL | \
99 FIT_ISH_QCMD_FIFO_OVERRUN | \
100 FIT_ISH_BAD_EXP_ROM_READ)
101
102#define FIT_INT_QUEUE_FULL \
103 (FIT_ISH_Q0_FULL | \
104 FIT_ISH_Q1_FULL | \
105 FIT_ISH_Q2_FULL | \
106 FIT_ISH_Q3_FULL)
107
108#define MSI_MSG_NWL_ERROR_0 0x00000000
109#define MSI_MSG_NWL_ERROR_1 0x00000001
110#define MSI_MSG_NWL_ERROR_2 0x00000002
111#define MSI_MSG_NWL_ERROR_3 0x00000003
112#define MSI_MSG_STATE_CHANGE 0x00000004
113#define MSI_MSG_COMPLETION_POSTED 0x00000005
114#define MSI_MSG_MSG_FROM_DEV 0x00000006
115#define MSI_MSG_RESERVED_0 0x00000007
116#define MSI_MSG_RESERVED_1 0x00000008
117#define MSI_MSG_QUEUE_0_FULL 0x00000009
118#define MSI_MSG_QUEUE_1_FULL 0x0000000A
119#define MSI_MSG_QUEUE_2_FULL 0x0000000B
120#define MSI_MSG_QUEUE_3_FULL 0x0000000C
121
122#define FIT_INT_RESERVED_MASK \
123 (FIT_ISH_UNDEFINED_3 | \
124 FIT_ISH_UNDEFINED_4)
125
126/*
127 * Interrupt mask, 32-bit r/w
128 * Bit definitions are the same as FIT_INT_STATUS_HOST
129 */
130#define FIT_INT_MASK_HOST 0x528u
131
132/*
133 * Message to device, 32-bit r/w
134 */
135#define FIT_MSG_TO_DEVICE 0x540u
136
137/*
138 * Message from device, 32-bit, r/o
139 */
140#define FIT_MSG_FROM_DEVICE 0x548u
141
142/*
143 * 32-bit messages to/from device, composition/extraction macros
144 */
145#define FIT_MXD_CONS(TYPE, PARAM, DATA) \
146 ((((TYPE) & 0xFFu) << 24u) | \
147 (((PARAM) & 0xFFu) << 16u) | \
148 (((DATA) & 0xFFFFu) << 0u))
149#define FIT_MXD_TYPE(MXD) (((MXD) >> 24u) & 0xFFu)
150#define FIT_MXD_PARAM(MXD) (((MXD) >> 16u) & 0xFFu)
151#define FIT_MXD_DATA(MXD) (((MXD) >> 0u) & 0xFFFFu)
152
153/*
154 * Types of messages to/from device
155 */
156#define FIT_MTD_FITFW_INIT 0x01u
157#define FIT_MTD_GET_CMDQ_DEPTH 0x02u
158#define FIT_MTD_SET_COMPQ_DEPTH 0x03u
159#define FIT_MTD_SET_COMPQ_ADDR 0x04u
160#define FIT_MTD_ARM_QUEUE 0x05u
161#define FIT_MTD_CMD_LOG_HOST_ID 0x07u
162#define FIT_MTD_CMD_LOG_TIME_STAMP_LO 0x08u
163#define FIT_MTD_CMD_LOG_TIME_STAMP_HI 0x09u
164#define FIT_MFD_SMART_EXCEEDED 0x10u
165#define FIT_MFD_POWER_DOWN 0x11u
166#define FIT_MFD_OFFLINE 0x12u
167#define FIT_MFD_ONLINE 0x13u
168#define FIT_MFD_FW_RESTARTING 0x14u
169#define FIT_MFD_PM_ACTIVE 0x15u
170#define FIT_MFD_PM_STANDBY 0x16u
171#define FIT_MFD_PM_SLEEP 0x17u
172#define FIT_MFD_CMD_PROGRESS 0x18u
173
174#define FIT_MTD_DEBUG 0xFEu
175#define FIT_MFD_DEBUG 0xFFu
176
177#define FIT_MFD_MASK (0xFFu)
178#define FIT_MFD_DATA_MASK (0xFFu)
179#define FIT_MFD_MSG(x) (((x) >> 24) & FIT_MFD_MASK)
180#define FIT_MFD_DATA(x) ((x) & FIT_MFD_MASK)
181
182/*
183 * Extra arg to FIT_MSG_TO_DEVICE, 64-bit r/w
184 * Used to set completion queue address (FIT_MTD_SET_COMPQ_ADDR)
185 * (was Response buffer in docs)
186 */
187#define FIT_MSG_TO_DEVICE_ARG 0x580u
188
189/*
190 * Hardware (ASIC) version, 32-bit r/o
191 */
192#define FIT_HW_VERSION 0x588u
193
194/*
195 * Scatter/gather list descriptor.
196 * 32-bytes and must be aligned on a 32-byte boundary.
197 * All fields are in little endian order.
198 */
199struct fit_sg_descriptor {
200 uint32_t control;
201 uint32_t byte_count;
202 uint64_t host_side_addr;
203 uint64_t dev_side_addr;
204 uint64_t next_desc_ptr;
205};
206
207#define FIT_SGD_CONTROL_NOT_LAST 0x000u
208#define FIT_SGD_CONTROL_LAST 0x40Eu
209
210/*
211 * Header at the beginning of a FIT message. The header
212 * is followed by SSDI requests each 64 bytes.
213 * A FIT message can be up to 512 bytes long and must start
214 * on a 64-byte boundary.
215 */
216struct fit_msg_hdr {
217 uint8_t protocol_id;
218 uint8_t num_protocol_cmds_coalesced;
219 uint8_t _reserved[62];
220};
221
222#define FIT_PROTOCOL_ID_FIT 1
223#define FIT_PROTOCOL_ID_SSDI 2
224#define FIT_PROTOCOL_ID_SOFIT 3
225
226
227#define FIT_PROTOCOL_MINOR_VER(mtd_val) ((mtd_val >> 16) & 0xF)
228#define FIT_PROTOCOL_MAJOR_VER(mtd_val) ((mtd_val >> 20) & 0xF)
229
230/*
231 * Format of a completion entry. The completion queue is circular
232 * and must have at least as many entries as the maximum number
233 * of commands that may be issued to the device.
234 *
235 * There are no head/tail pointers. The cycle value is used to
236 * infer the presence of new completion records.
237 * Initially the cycle in all entries is 0, the index is 0, and
238 * the cycle value to expect is 1. When completions are added
239 * their cycle values are set to 1. When the index wraps the
240 * cycle value to expect is incremented.
241 *
242 * Command_context is opaque and taken verbatim from the SSDI command.
243 * All other fields are big endian.
244 */
245#define FIT_PROTOCOL_VERSION_0 0
246
247/*
248 * Protocol major version 1 completion entry.
249 * The major protocol version is found in bits
250 * 20-23 of the FIT_MTD_FITFW_INIT response.
251 */
252struct fit_completion_entry_v1 {
253 uint32_t num_returned_bytes;
254 uint16_t tag;
255 uint8_t status; /* SCSI status */
256 uint8_t cycle;
257};
258#define FIT_PROTOCOL_VERSION_1 1
259#define FIT_PROTOCOL_VERSION_CURRENT FIT_PROTOCOL_VERSION_1
260
261struct fit_comp_error_info {
262 uint8_t type:7; /* 00: Bits0-6 indicates the type of sense data. */
263 uint8_t valid:1; /* 00: Bit 7 := 1 ==> info field is valid. */
264 uint8_t reserved0; /* 01: Obsolete field */
265 uint8_t key:4; /* 02: Bits0-3 indicate the sense key. */
266 uint8_t reserved2:1; /* 02: Reserved bit. */
267 uint8_t bad_length:1; /* 02: Incorrect Length Indicator */
268 uint8_t end_medium:1; /* 02: End of Medium */
269 uint8_t file_mark:1; /* 02: Filemark */
270 uint8_t info[4]; /* 03: */
271 uint8_t reserved1; /* 07: Additional Sense Length */
272 uint8_t cmd_spec[4]; /* 08: Command Specific Information */
273 uint8_t code; /* 0C: Additional Sense Code */
274 uint8_t qual; /* 0D: Additional Sense Code Qualifier */
275 uint8_t fruc; /* 0E: Field Replaceable Unit Code */
276 uint8_t sks_high:7; /* 0F: Sense Key Specific (MSB) */
277 uint8_t sks_valid:1; /* 0F: Sense Key Specific Valid */
278 uint16_t sks_low; /* 10: Sense Key Specific (LSW) */
279 uint16_t reserved3; /* 12: Part of additional sense bytes (unused) */
280 uint16_t uec; /* 14: Additional Sense Bytes */
281 uint64_t per; /* 16: Additional Sense Bytes */
282 uint8_t reserved4[2]; /* 1E: Additional Sense Bytes (unused) */
283};
284
285
286/* Task management constants */
287#define SOFT_TASK_SIMPLE 0x00
288#define SOFT_TASK_HEAD_OF_QUEUE 0x01
289#define SOFT_TASK_ORDERED 0x02
290
291/* Version zero has the last 32 bits reserved,
292 * Version one has the last 32 bits sg_list_len_bytes;
293 */
294struct skd_command_header {
295 uint64_t sg_list_dma_address;
296 uint16_t tag;
297 uint8_t attribute;
298 uint8_t add_cdb_len; /* In 32 bit words */
299 uint32_t sg_list_len_bytes;
300};
301
302struct skd_scsi_request {
303 struct skd_command_header hdr;
304 unsigned char cdb[16];
305/* unsigned char _reserved[16]; */
306};
307
308struct driver_inquiry_data {
309 uint8_t peripheral_device_type:5;
310 uint8_t qualifier:3;
311 uint8_t page_code;
312 uint16_t page_length;
313 uint16_t pcie_bus_number;
314 uint8_t pcie_device_number;
315 uint8_t pcie_function_number;
316 uint8_t pcie_link_speed;
317 uint8_t pcie_link_lanes;
318 uint16_t pcie_vendor_id;
319 uint16_t pcie_device_id;
320 uint16_t pcie_subsystem_vendor_id;
321 uint16_t pcie_subsystem_device_id;
322 uint8_t reserved1[2];
323 uint8_t reserved2[3];
324 uint8_t driver_version_length;
325 uint8_t driver_version[0x14];
326};
327
328#pragma pack(pop, s1120_h)
329
330#endif /* SKD_S1120_H */
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index 8ed6ccb748cf..b02d53a399f3 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -924,7 +924,6 @@ static int swim_probe(struct platform_device *dev)
924 return 0; 924 return 0;
925 925
926out_kfree: 926out_kfree:
927 platform_set_drvdata(dev, NULL);
928 kfree(swd); 927 kfree(swd);
929out_iounmap: 928out_iounmap:
930 iounmap(swim_base); 929 iounmap(swim_base);
@@ -962,7 +961,6 @@ static int swim_remove(struct platform_device *dev)
962 if (res) 961 if (res)
963 release_mem_region(res->start, resource_size(res)); 962 release_mem_region(res->start, resource_size(res));
964 963
965 platform_set_drvdata(dev, NULL);
966 kfree(swd); 964 kfree(swd);
967 965
968 return 0; 966 return 0;
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 5cdf88b7ad9e..6a680d4de7f1 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -11,12 +11,11 @@
11#include <linux/string_helpers.h> 11#include <linux/string_helpers.h>
12#include <scsi/scsi_cmnd.h> 12#include <scsi/scsi_cmnd.h>
13#include <linux/idr.h> 13#include <linux/idr.h>
14#include <linux/blk-mq.h>
15#include <linux/numa.h>
14 16
15#define PART_BITS 4 17#define PART_BITS 4
16 18
17static bool use_bio;
18module_param(use_bio, bool, S_IRUGO);
19
20static int major; 19static int major;
21static DEFINE_IDA(vd_index_ida); 20static DEFINE_IDA(vd_index_ida);
22 21
@@ -26,13 +25,11 @@ struct virtio_blk
26{ 25{
27 struct virtio_device *vdev; 26 struct virtio_device *vdev;
28 struct virtqueue *vq; 27 struct virtqueue *vq;
29 wait_queue_head_t queue_wait; 28 spinlock_t vq_lock;
30 29
31 /* The disk structure for the kernel. */ 30 /* The disk structure for the kernel. */
32 struct gendisk *disk; 31 struct gendisk *disk;
33 32
34 mempool_t *pool;
35
36 /* Process context for config space updates */ 33 /* Process context for config space updates */
37 struct work_struct config_work; 34 struct work_struct config_work;
38 35
@@ -47,31 +44,17 @@ struct virtio_blk
47 44
48 /* Ida index - used to track minor number allocations. */ 45 /* Ida index - used to track minor number allocations. */
49 int index; 46 int index;
50
51 /* Scatterlist: can be too big for stack. */
52 struct scatterlist sg[/*sg_elems*/];
53}; 47};
54 48
55struct virtblk_req 49struct virtblk_req
56{ 50{
57 struct request *req; 51 struct request *req;
58 struct bio *bio;
59 struct virtio_blk_outhdr out_hdr; 52 struct virtio_blk_outhdr out_hdr;
60 struct virtio_scsi_inhdr in_hdr; 53 struct virtio_scsi_inhdr in_hdr;
61 struct work_struct work;
62 struct virtio_blk *vblk;
63 int flags;
64 u8 status; 54 u8 status;
65 struct scatterlist sg[]; 55 struct scatterlist sg[];
66}; 56};
67 57
68enum {
69 VBLK_IS_FLUSH = 1,
70 VBLK_REQ_FLUSH = 2,
71 VBLK_REQ_DATA = 4,
72 VBLK_REQ_FUA = 8,
73};
74
75static inline int virtblk_result(struct virtblk_req *vbr) 58static inline int virtblk_result(struct virtblk_req *vbr)
76{ 59{
77 switch (vbr->status) { 60 switch (vbr->status) {
@@ -84,22 +67,6 @@ static inline int virtblk_result(struct virtblk_req *vbr)
84 } 67 }
85} 68}
86 69
87static inline struct virtblk_req *virtblk_alloc_req(struct virtio_blk *vblk,
88 gfp_t gfp_mask)
89{
90 struct virtblk_req *vbr;
91
92 vbr = mempool_alloc(vblk->pool, gfp_mask);
93 if (!vbr)
94 return NULL;
95
96 vbr->vblk = vblk;
97 if (use_bio)
98 sg_init_table(vbr->sg, vblk->sg_elems);
99
100 return vbr;
101}
102
103static int __virtblk_add_req(struct virtqueue *vq, 70static int __virtblk_add_req(struct virtqueue *vq,
104 struct virtblk_req *vbr, 71 struct virtblk_req *vbr,
105 struct scatterlist *data_sg, 72 struct scatterlist *data_sg,
@@ -143,83 +110,8 @@ static int __virtblk_add_req(struct virtqueue *vq,
143 return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); 110 return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
144} 111}
145 112
146static void virtblk_add_req(struct virtblk_req *vbr, bool have_data)
147{
148 struct virtio_blk *vblk = vbr->vblk;
149 DEFINE_WAIT(wait);
150 int ret;
151
152 spin_lock_irq(vblk->disk->queue->queue_lock);
153 while (unlikely((ret = __virtblk_add_req(vblk->vq, vbr, vbr->sg,
154 have_data)) < 0)) {
155 prepare_to_wait_exclusive(&vblk->queue_wait, &wait,
156 TASK_UNINTERRUPTIBLE);
157
158 spin_unlock_irq(vblk->disk->queue->queue_lock);
159 io_schedule();
160 spin_lock_irq(vblk->disk->queue->queue_lock);
161
162 finish_wait(&vblk->queue_wait, &wait);
163 }
164
165 virtqueue_kick(vblk->vq);
166 spin_unlock_irq(vblk->disk->queue->queue_lock);
167}
168
169static void virtblk_bio_send_flush(struct virtblk_req *vbr)
170{
171 vbr->flags |= VBLK_IS_FLUSH;
172 vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
173 vbr->out_hdr.sector = 0;
174 vbr->out_hdr.ioprio = 0;
175
176 virtblk_add_req(vbr, false);
177}
178
179static void virtblk_bio_send_data(struct virtblk_req *vbr)
180{
181 struct virtio_blk *vblk = vbr->vblk;
182 struct bio *bio = vbr->bio;
183 bool have_data;
184
185 vbr->flags &= ~VBLK_IS_FLUSH;
186 vbr->out_hdr.type = 0;
187 vbr->out_hdr.sector = bio->bi_sector;
188 vbr->out_hdr.ioprio = bio_prio(bio);
189
190 if (blk_bio_map_sg(vblk->disk->queue, bio, vbr->sg)) {
191 have_data = true;
192 if (bio->bi_rw & REQ_WRITE)
193 vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
194 else
195 vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
196 } else
197 have_data = false;
198
199 virtblk_add_req(vbr, have_data);
200}
201
202static void virtblk_bio_send_data_work(struct work_struct *work)
203{
204 struct virtblk_req *vbr;
205
206 vbr = container_of(work, struct virtblk_req, work);
207
208 virtblk_bio_send_data(vbr);
209}
210
211static void virtblk_bio_send_flush_work(struct work_struct *work)
212{
213 struct virtblk_req *vbr;
214
215 vbr = container_of(work, struct virtblk_req, work);
216
217 virtblk_bio_send_flush(vbr);
218}
219
220static inline void virtblk_request_done(struct virtblk_req *vbr) 113static inline void virtblk_request_done(struct virtblk_req *vbr)
221{ 114{
222 struct virtio_blk *vblk = vbr->vblk;
223 struct request *req = vbr->req; 115 struct request *req = vbr->req;
224 int error = virtblk_result(vbr); 116 int error = virtblk_result(vbr);
225 117
@@ -231,90 +123,45 @@ static inline void virtblk_request_done(struct virtblk_req *vbr)
231 req->errors = (error != 0); 123 req->errors = (error != 0);
232 } 124 }
233 125
234 __blk_end_request_all(req, error); 126 blk_mq_end_io(req, error);
235 mempool_free(vbr, vblk->pool);
236}
237
238static inline void virtblk_bio_flush_done(struct virtblk_req *vbr)
239{
240 struct virtio_blk *vblk = vbr->vblk;
241
242 if (vbr->flags & VBLK_REQ_DATA) {
243 /* Send out the actual write data */
244 INIT_WORK(&vbr->work, virtblk_bio_send_data_work);
245 queue_work(virtblk_wq, &vbr->work);
246 } else {
247 bio_endio(vbr->bio, virtblk_result(vbr));
248 mempool_free(vbr, vblk->pool);
249 }
250}
251
252static inline void virtblk_bio_data_done(struct virtblk_req *vbr)
253{
254 struct virtio_blk *vblk = vbr->vblk;
255
256 if (unlikely(vbr->flags & VBLK_REQ_FUA)) {
257 /* Send out a flush before end the bio */
258 vbr->flags &= ~VBLK_REQ_DATA;
259 INIT_WORK(&vbr->work, virtblk_bio_send_flush_work);
260 queue_work(virtblk_wq, &vbr->work);
261 } else {
262 bio_endio(vbr->bio, virtblk_result(vbr));
263 mempool_free(vbr, vblk->pool);
264 }
265}
266
267static inline void virtblk_bio_done(struct virtblk_req *vbr)
268{
269 if (unlikely(vbr->flags & VBLK_IS_FLUSH))
270 virtblk_bio_flush_done(vbr);
271 else
272 virtblk_bio_data_done(vbr);
273} 127}
274 128
275static void virtblk_done(struct virtqueue *vq) 129static void virtblk_done(struct virtqueue *vq)
276{ 130{
277 struct virtio_blk *vblk = vq->vdev->priv; 131 struct virtio_blk *vblk = vq->vdev->priv;
278 bool bio_done = false, req_done = false; 132 bool req_done = false;
279 struct virtblk_req *vbr; 133 struct virtblk_req *vbr;
280 unsigned long flags; 134 unsigned long flags;
281 unsigned int len; 135 unsigned int len;
282 136
283 spin_lock_irqsave(vblk->disk->queue->queue_lock, flags); 137 spin_lock_irqsave(&vblk->vq_lock, flags);
284 do { 138 do {
285 virtqueue_disable_cb(vq); 139 virtqueue_disable_cb(vq);
286 while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { 140 while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
287 if (vbr->bio) { 141 virtblk_request_done(vbr);
288 virtblk_bio_done(vbr); 142 req_done = true;
289 bio_done = true;
290 } else {
291 virtblk_request_done(vbr);
292 req_done = true;
293 }
294 } 143 }
144 if (unlikely(virtqueue_is_broken(vq)))
145 break;
295 } while (!virtqueue_enable_cb(vq)); 146 } while (!virtqueue_enable_cb(vq));
147 spin_unlock_irqrestore(&vblk->vq_lock, flags);
148
296 /* In case queue is stopped waiting for more buffers. */ 149 /* In case queue is stopped waiting for more buffers. */
297 if (req_done) 150 if (req_done)
298 blk_start_queue(vblk->disk->queue); 151 blk_mq_start_stopped_hw_queues(vblk->disk->queue);
299 spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags);
300
301 if (bio_done)
302 wake_up(&vblk->queue_wait);
303} 152}
304 153
305static bool do_req(struct request_queue *q, struct virtio_blk *vblk, 154static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
306 struct request *req)
307{ 155{
156 struct virtio_blk *vblk = hctx->queue->queuedata;
157 struct virtblk_req *vbr = req->special;
158 unsigned long flags;
308 unsigned int num; 159 unsigned int num;
309 struct virtblk_req *vbr; 160 const bool last = (req->cmd_flags & REQ_END) != 0;
310 161
311 vbr = virtblk_alloc_req(vblk, GFP_ATOMIC); 162 BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
312 if (!vbr)
313 /* When another request finishes we'll try again. */
314 return false;
315 163
316 vbr->req = req; 164 vbr->req = req;
317 vbr->bio = NULL;
318 if (req->cmd_flags & REQ_FLUSH) { 165 if (req->cmd_flags & REQ_FLUSH) {
319 vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH; 166 vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
320 vbr->out_hdr.sector = 0; 167 vbr->out_hdr.sector = 0;
@@ -342,7 +189,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
342 } 189 }
343 } 190 }
344 191
345 num = blk_rq_map_sg(q, vbr->req, vblk->sg); 192 num = blk_rq_map_sg(hctx->queue, vbr->req, vbr->sg);
346 if (num) { 193 if (num) {
347 if (rq_data_dir(vbr->req) == WRITE) 194 if (rq_data_dir(vbr->req) == WRITE)
348 vbr->out_hdr.type |= VIRTIO_BLK_T_OUT; 195 vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
@@ -350,63 +197,19 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
350 vbr->out_hdr.type |= VIRTIO_BLK_T_IN; 197 vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
351 } 198 }
352 199
353 if (__virtblk_add_req(vblk->vq, vbr, vblk->sg, num) < 0) { 200 spin_lock_irqsave(&vblk->vq_lock, flags);
354 mempool_free(vbr, vblk->pool); 201 if (__virtblk_add_req(vblk->vq, vbr, vbr->sg, num) < 0) {
355 return false; 202 virtqueue_kick(vblk->vq);
356 } 203 spin_unlock_irqrestore(&vblk->vq_lock, flags);
357 204 blk_mq_stop_hw_queue(hctx);
358 return true; 205 return BLK_MQ_RQ_QUEUE_BUSY;
359}
360
361static void virtblk_request(struct request_queue *q)
362{
363 struct virtio_blk *vblk = q->queuedata;
364 struct request *req;
365 unsigned int issued = 0;
366
367 while ((req = blk_peek_request(q)) != NULL) {
368 BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
369
370 /* If this request fails, stop queue and wait for something to
371 finish to restart it. */
372 if (!do_req(q, vblk, req)) {
373 blk_stop_queue(q);
374 break;
375 }
376 blk_start_request(req);
377 issued++;
378 } 206 }
379 207
380 if (issued) 208 if (last)
381 virtqueue_kick(vblk->vq); 209 virtqueue_kick(vblk->vq);
382}
383 210
384static void virtblk_make_request(struct request_queue *q, struct bio *bio) 211 spin_unlock_irqrestore(&vblk->vq_lock, flags);
385{ 212 return BLK_MQ_RQ_QUEUE_OK;
386 struct virtio_blk *vblk = q->queuedata;
387 struct virtblk_req *vbr;
388
389 BUG_ON(bio->bi_phys_segments + 2 > vblk->sg_elems);
390
391 vbr = virtblk_alloc_req(vblk, GFP_NOIO);
392 if (!vbr) {
393 bio_endio(bio, -ENOMEM);
394 return;
395 }
396
397 vbr->bio = bio;
398 vbr->flags = 0;
399 if (bio->bi_rw & REQ_FLUSH)
400 vbr->flags |= VBLK_REQ_FLUSH;
401 if (bio->bi_rw & REQ_FUA)
402 vbr->flags |= VBLK_REQ_FUA;
403 if (bio->bi_size)
404 vbr->flags |= VBLK_REQ_DATA;
405
406 if (unlikely(vbr->flags & VBLK_REQ_FLUSH))
407 virtblk_bio_send_flush(vbr);
408 else
409 virtblk_bio_send_data(vbr);
410} 213}
411 214
412/* return id (s/n) string for *disk to *id_str 215/* return id (s/n) string for *disk to *id_str
@@ -456,18 +259,15 @@ static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
456static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) 259static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
457{ 260{
458 struct virtio_blk *vblk = bd->bd_disk->private_data; 261 struct virtio_blk *vblk = bd->bd_disk->private_data;
459 struct virtio_blk_geometry vgeo;
460 int err;
461 262
462 /* see if the host passed in geometry config */ 263 /* see if the host passed in geometry config */
463 err = virtio_config_val(vblk->vdev, VIRTIO_BLK_F_GEOMETRY, 264 if (virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_GEOMETRY)) {
464 offsetof(struct virtio_blk_config, geometry), 265 virtio_cread(vblk->vdev, struct virtio_blk_config,
465 &vgeo); 266 geometry.cylinders, &geo->cylinders);
466 267 virtio_cread(vblk->vdev, struct virtio_blk_config,
467 if (!err) { 268 geometry.heads, &geo->heads);
468 geo->heads = vgeo.heads; 269 virtio_cread(vblk->vdev, struct virtio_blk_config,
469 geo->sectors = vgeo.sectors; 270 geometry.sectors, &geo->sectors);
470 geo->cylinders = vgeo.cylinders;
471 } else { 271 } else {
472 /* some standard values, similar to sd */ 272 /* some standard values, similar to sd */
473 geo->heads = 1 << 6; 273 geo->heads = 1 << 6;
@@ -529,8 +329,7 @@ static void virtblk_config_changed_work(struct work_struct *work)
529 goto done; 329 goto done;
530 330
531 /* Host must always specify the capacity. */ 331 /* Host must always specify the capacity. */
532 vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity), 332 virtio_cread(vdev, struct virtio_blk_config, capacity, &capacity);
533 &capacity, sizeof(capacity));
534 333
535 /* If capacity is too big, truncate with warning. */ 334 /* If capacity is too big, truncate with warning. */
536 if ((sector_t)capacity != capacity) { 335 if ((sector_t)capacity != capacity) {
@@ -608,9 +407,9 @@ static int virtblk_get_cache_mode(struct virtio_device *vdev)
608 u8 writeback; 407 u8 writeback;
609 int err; 408 int err;
610 409
611 err = virtio_config_val(vdev, VIRTIO_BLK_F_CONFIG_WCE, 410 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE,
612 offsetof(struct virtio_blk_config, wce), 411 struct virtio_blk_config, wce,
613 &writeback); 412 &writeback);
614 if (err) 413 if (err)
615 writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_WCE); 414 writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_WCE);
616 415
@@ -642,7 +441,6 @@ virtblk_cache_type_store(struct device *dev, struct device_attribute *attr,
642 struct virtio_blk *vblk = disk->private_data; 441 struct virtio_blk *vblk = disk->private_data;
643 struct virtio_device *vdev = vblk->vdev; 442 struct virtio_device *vdev = vblk->vdev;
644 int i; 443 int i;
645 u8 writeback;
646 444
647 BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE)); 445 BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE));
648 for (i = ARRAY_SIZE(virtblk_cache_types); --i >= 0; ) 446 for (i = ARRAY_SIZE(virtblk_cache_types); --i >= 0; )
@@ -652,11 +450,7 @@ virtblk_cache_type_store(struct device *dev, struct device_attribute *attr,
652 if (i < 0) 450 if (i < 0)
653 return -EINVAL; 451 return -EINVAL;
654 452
655 writeback = i; 453 virtio_cwrite8(vdev, offsetof(struct virtio_blk_config, wce), i);
656 vdev->config->set(vdev,
657 offsetof(struct virtio_blk_config, wce),
658 &writeback, sizeof(writeback));
659
660 virtblk_update_cache_mode(vdev); 454 virtblk_update_cache_mode(vdev);
661 return count; 455 return count;
662} 456}
@@ -680,12 +474,35 @@ static const struct device_attribute dev_attr_cache_type_rw =
680 __ATTR(cache_type, S_IRUGO|S_IWUSR, 474 __ATTR(cache_type, S_IRUGO|S_IWUSR,
681 virtblk_cache_type_show, virtblk_cache_type_store); 475 virtblk_cache_type_show, virtblk_cache_type_store);
682 476
477static struct blk_mq_ops virtio_mq_ops = {
478 .queue_rq = virtio_queue_rq,
479 .map_queue = blk_mq_map_queue,
480 .alloc_hctx = blk_mq_alloc_single_hw_queue,
481 .free_hctx = blk_mq_free_single_hw_queue,
482};
483
484static struct blk_mq_reg virtio_mq_reg = {
485 .ops = &virtio_mq_ops,
486 .nr_hw_queues = 1,
487 .queue_depth = 64,
488 .numa_node = NUMA_NO_NODE,
489 .flags = BLK_MQ_F_SHOULD_MERGE,
490};
491
492static void virtblk_init_vbr(void *data, struct blk_mq_hw_ctx *hctx,
493 struct request *rq, unsigned int nr)
494{
495 struct virtio_blk *vblk = data;
496 struct virtblk_req *vbr = rq->special;
497
498 sg_init_table(vbr->sg, vblk->sg_elems);
499}
500
683static int virtblk_probe(struct virtio_device *vdev) 501static int virtblk_probe(struct virtio_device *vdev)
684{ 502{
685 struct virtio_blk *vblk; 503 struct virtio_blk *vblk;
686 struct request_queue *q; 504 struct request_queue *q;
687 int err, index; 505 int err, index;
688 int pool_size;
689 506
690 u64 cap; 507 u64 cap;
691 u32 v, blk_size, sg_elems, opt_io_size; 508 u32 v, blk_size, sg_elems, opt_io_size;
@@ -699,9 +516,9 @@ static int virtblk_probe(struct virtio_device *vdev)
699 index = err; 516 index = err;
700 517
701 /* We need to know how many segments before we allocate. */ 518 /* We need to know how many segments before we allocate. */
702 err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX, 519 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SEG_MAX,
703 offsetof(struct virtio_blk_config, seg_max), 520 struct virtio_blk_config, seg_max,
704 &sg_elems); 521 &sg_elems);
705 522
706 /* We need at least one SG element, whatever they say. */ 523 /* We need at least one SG element, whatever they say. */
707 if (err || !sg_elems) 524 if (err || !sg_elems)
@@ -709,17 +526,14 @@ static int virtblk_probe(struct virtio_device *vdev)
709 526
710 /* We need an extra sg elements at head and tail. */ 527 /* We need an extra sg elements at head and tail. */
711 sg_elems += 2; 528 sg_elems += 2;
712 vdev->priv = vblk = kmalloc(sizeof(*vblk) + 529 vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
713 sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL);
714 if (!vblk) { 530 if (!vblk) {
715 err = -ENOMEM; 531 err = -ENOMEM;
716 goto out_free_index; 532 goto out_free_index;
717 } 533 }
718 534
719 init_waitqueue_head(&vblk->queue_wait);
720 vblk->vdev = vdev; 535 vblk->vdev = vdev;
721 vblk->sg_elems = sg_elems; 536 vblk->sg_elems = sg_elems;
722 sg_init_table(vblk->sg, vblk->sg_elems);
723 mutex_init(&vblk->config_lock); 537 mutex_init(&vblk->config_lock);
724 538
725 INIT_WORK(&vblk->config_work, virtblk_config_changed_work); 539 INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
@@ -728,31 +542,27 @@ static int virtblk_probe(struct virtio_device *vdev)
728 err = init_vq(vblk); 542 err = init_vq(vblk);
729 if (err) 543 if (err)
730 goto out_free_vblk; 544 goto out_free_vblk;
731 545 spin_lock_init(&vblk->vq_lock);
732 pool_size = sizeof(struct virtblk_req);
733 if (use_bio)
734 pool_size += sizeof(struct scatterlist) * sg_elems;
735 vblk->pool = mempool_create_kmalloc_pool(1, pool_size);
736 if (!vblk->pool) {
737 err = -ENOMEM;
738 goto out_free_vq;
739 }
740 546
741 /* FIXME: How many partitions? How long is a piece of string? */ 547 /* FIXME: How many partitions? How long is a piece of string? */
742 vblk->disk = alloc_disk(1 << PART_BITS); 548 vblk->disk = alloc_disk(1 << PART_BITS);
743 if (!vblk->disk) { 549 if (!vblk->disk) {
744 err = -ENOMEM; 550 err = -ENOMEM;
745 goto out_mempool; 551 goto out_free_vq;
746 } 552 }
747 553
748 q = vblk->disk->queue = blk_init_queue(virtblk_request, NULL); 554 virtio_mq_reg.cmd_size =
555 sizeof(struct virtblk_req) +
556 sizeof(struct scatterlist) * sg_elems;
557
558 q = vblk->disk->queue = blk_mq_init_queue(&virtio_mq_reg, vblk);
749 if (!q) { 559 if (!q) {
750 err = -ENOMEM; 560 err = -ENOMEM;
751 goto out_put_disk; 561 goto out_put_disk;
752 } 562 }
753 563
754 if (use_bio) 564 blk_mq_init_commands(q, virtblk_init_vbr, vblk);
755 blk_queue_make_request(q, virtblk_make_request); 565
756 q->queuedata = vblk; 566 q->queuedata = vblk;
757 567
758 virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); 568 virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
@@ -772,8 +582,7 @@ static int virtblk_probe(struct virtio_device *vdev)
772 set_disk_ro(vblk->disk, 1); 582 set_disk_ro(vblk->disk, 1);
773 583
774 /* Host must always specify the capacity. */ 584 /* Host must always specify the capacity. */
775 vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity), 585 virtio_cread(vdev, struct virtio_blk_config, capacity, &cap);
776 &cap, sizeof(cap));
777 586
778 /* If capacity is too big, truncate with warning. */ 587 /* If capacity is too big, truncate with warning. */
779 if ((sector_t)cap != cap) { 588 if ((sector_t)cap != cap) {
@@ -794,46 +603,45 @@ static int virtblk_probe(struct virtio_device *vdev)
794 603
795 /* Host can optionally specify maximum segment size and number of 604 /* Host can optionally specify maximum segment size and number of
796 * segments. */ 605 * segments. */
797 err = virtio_config_val(vdev, VIRTIO_BLK_F_SIZE_MAX, 606 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SIZE_MAX,
798 offsetof(struct virtio_blk_config, size_max), 607 struct virtio_blk_config, size_max, &v);
799 &v);
800 if (!err) 608 if (!err)
801 blk_queue_max_segment_size(q, v); 609 blk_queue_max_segment_size(q, v);
802 else 610 else
803 blk_queue_max_segment_size(q, -1U); 611 blk_queue_max_segment_size(q, -1U);
804 612
805 /* Host can optionally specify the block size of the device */ 613 /* Host can optionally specify the block size of the device */
806 err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE, 614 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE,
807 offsetof(struct virtio_blk_config, blk_size), 615 struct virtio_blk_config, blk_size,
808 &blk_size); 616 &blk_size);
809 if (!err) 617 if (!err)
810 blk_queue_logical_block_size(q, blk_size); 618 blk_queue_logical_block_size(q, blk_size);
811 else 619 else
812 blk_size = queue_logical_block_size(q); 620 blk_size = queue_logical_block_size(q);
813 621
814 /* Use topology information if available */ 622 /* Use topology information if available */
815 err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY, 623 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
816 offsetof(struct virtio_blk_config, physical_block_exp), 624 struct virtio_blk_config, physical_block_exp,
817 &physical_block_exp); 625 &physical_block_exp);
818 if (!err && physical_block_exp) 626 if (!err && physical_block_exp)
819 blk_queue_physical_block_size(q, 627 blk_queue_physical_block_size(q,
820 blk_size * (1 << physical_block_exp)); 628 blk_size * (1 << physical_block_exp));
821 629
822 err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY, 630 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
823 offsetof(struct virtio_blk_config, alignment_offset), 631 struct virtio_blk_config, alignment_offset,
824 &alignment_offset); 632 &alignment_offset);
825 if (!err && alignment_offset) 633 if (!err && alignment_offset)
826 blk_queue_alignment_offset(q, blk_size * alignment_offset); 634 blk_queue_alignment_offset(q, blk_size * alignment_offset);
827 635
828 err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY, 636 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
829 offsetof(struct virtio_blk_config, min_io_size), 637 struct virtio_blk_config, min_io_size,
830 &min_io_size); 638 &min_io_size);
831 if (!err && min_io_size) 639 if (!err && min_io_size)
832 blk_queue_io_min(q, blk_size * min_io_size); 640 blk_queue_io_min(q, blk_size * min_io_size);
833 641
834 err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY, 642 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
835 offsetof(struct virtio_blk_config, opt_io_size), 643 struct virtio_blk_config, opt_io_size,
836 &opt_io_size); 644 &opt_io_size);
837 if (!err && opt_io_size) 645 if (!err && opt_io_size)
838 blk_queue_io_opt(q, blk_size * opt_io_size); 646 blk_queue_io_opt(q, blk_size * opt_io_size);
839 647
@@ -857,8 +665,6 @@ out_del_disk:
857 blk_cleanup_queue(vblk->disk->queue); 665 blk_cleanup_queue(vblk->disk->queue);
858out_put_disk: 666out_put_disk:
859 put_disk(vblk->disk); 667 put_disk(vblk->disk);
860out_mempool:
861 mempool_destroy(vblk->pool);
862out_free_vq: 668out_free_vq:
863 vdev->config->del_vqs(vdev); 669 vdev->config->del_vqs(vdev);
864out_free_vblk: 670out_free_vblk:
@@ -890,7 +696,6 @@ static void virtblk_remove(struct virtio_device *vdev)
890 696
891 refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount); 697 refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount);
892 put_disk(vblk->disk); 698 put_disk(vblk->disk);
893 mempool_destroy(vblk->pool);
894 vdev->config->del_vqs(vdev); 699 vdev->config->del_vqs(vdev);
895 kfree(vblk); 700 kfree(vblk);
896 701
@@ -899,7 +704,7 @@ static void virtblk_remove(struct virtio_device *vdev)
899 ida_simple_remove(&vd_index_ida, index); 704 ida_simple_remove(&vd_index_ida, index);
900} 705}
901 706
902#ifdef CONFIG_PM 707#ifdef CONFIG_PM_SLEEP
903static int virtblk_freeze(struct virtio_device *vdev) 708static int virtblk_freeze(struct virtio_device *vdev)
904{ 709{
905 struct virtio_blk *vblk = vdev->priv; 710 struct virtio_blk *vblk = vdev->priv;
@@ -914,10 +719,7 @@ static int virtblk_freeze(struct virtio_device *vdev)
914 719
915 flush_work(&vblk->config_work); 720 flush_work(&vblk->config_work);
916 721
917 spin_lock_irq(vblk->disk->queue->queue_lock); 722 blk_mq_stop_hw_queues(vblk->disk->queue);
918 blk_stop_queue(vblk->disk->queue);
919 spin_unlock_irq(vblk->disk->queue->queue_lock);
920 blk_sync_queue(vblk->disk->queue);
921 723
922 vdev->config->del_vqs(vdev); 724 vdev->config->del_vqs(vdev);
923 return 0; 725 return 0;
@@ -930,11 +732,9 @@ static int virtblk_restore(struct virtio_device *vdev)
930 732
931 vblk->config_enable = true; 733 vblk->config_enable = true;
932 ret = init_vq(vdev->priv); 734 ret = init_vq(vdev->priv);
933 if (!ret) { 735 if (!ret)
934 spin_lock_irq(vblk->disk->queue->queue_lock); 736 blk_mq_start_stopped_hw_queues(vblk->disk->queue);
935 blk_start_queue(vblk->disk->queue); 737
936 spin_unlock_irq(vblk->disk->queue->queue_lock);
937 }
938 return ret; 738 return ret;
939} 739}
940#endif 740#endif
@@ -959,7 +759,7 @@ static struct virtio_driver virtio_blk = {
959 .probe = virtblk_probe, 759 .probe = virtblk_probe,
960 .remove = virtblk_remove, 760 .remove = virtblk_remove,
961 .config_changed = virtblk_config_changed, 761 .config_changed = virtblk_config_changed,
962#ifdef CONFIG_PM 762#ifdef CONFIG_PM_SLEEP
963 .freeze = virtblk_freeze, 763 .freeze = virtblk_freeze,
964 .restore = virtblk_restore, 764 .restore = virtblk_restore,
965#endif 765#endif
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index bf4b9d282c04..6620b73d0490 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -887,6 +887,8 @@ static int dispatch_discard_io(struct xen_blkif *blkif,
887 unsigned long secure; 887 unsigned long secure;
888 struct phys_req preq; 888 struct phys_req preq;
889 889
890 xen_blkif_get(blkif);
891
890 preq.sector_number = req->u.discard.sector_number; 892 preq.sector_number = req->u.discard.sector_number;
891 preq.nr_sects = req->u.discard.nr_sectors; 893 preq.nr_sects = req->u.discard.nr_sectors;
892 894
@@ -899,7 +901,6 @@ static int dispatch_discard_io(struct xen_blkif *blkif,
899 } 901 }
900 blkif->st_ds_req++; 902 blkif->st_ds_req++;
901 903
902 xen_blkif_get(blkif);
903 secure = (blkif->vbd.discard_secure && 904 secure = (blkif->vbd.discard_secure &&
904 (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ? 905 (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ?
905 BLKDEV_DISCARD_SECURE : 0; 906 BLKDEV_DISCARD_SECURE : 0;
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index fe5c3cd10c34..c2014a0aa206 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -620,7 +620,7 @@ static void backend_changed(struct xenbus_watch *watch,
620 } 620 }
621 621
622 /* Front end dir is a number, which is used as the handle. */ 622 /* Front end dir is a number, which is used as the handle. */
623 err = strict_strtoul(strrchr(dev->otherend, '/') + 1, 0, &handle); 623 err = kstrtoul(strrchr(dev->otherend, '/') + 1, 0, &handle);
624 if (err) 624 if (err)
625 return; 625 return;
626 626
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index a4660bbee8a6..c4a4c9006288 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -121,7 +121,8 @@ struct blkfront_info
121 struct work_struct work; 121 struct work_struct work;
122 struct gnttab_free_callback callback; 122 struct gnttab_free_callback callback;
123 struct blk_shadow shadow[BLK_RING_SIZE]; 123 struct blk_shadow shadow[BLK_RING_SIZE];
124 struct list_head persistent_gnts; 124 struct list_head grants;
125 struct list_head indirect_pages;
125 unsigned int persistent_gnts_c; 126 unsigned int persistent_gnts_c;
126 unsigned long shadow_free; 127 unsigned long shadow_free;
127 unsigned int feature_flush; 128 unsigned int feature_flush;
@@ -200,15 +201,17 @@ static int fill_grant_buffer(struct blkfront_info *info, int num)
200 if (!gnt_list_entry) 201 if (!gnt_list_entry)
201 goto out_of_memory; 202 goto out_of_memory;
202 203
203 granted_page = alloc_page(GFP_NOIO); 204 if (info->feature_persistent) {
204 if (!granted_page) { 205 granted_page = alloc_page(GFP_NOIO);
205 kfree(gnt_list_entry); 206 if (!granted_page) {
206 goto out_of_memory; 207 kfree(gnt_list_entry);
208 goto out_of_memory;
209 }
210 gnt_list_entry->pfn = page_to_pfn(granted_page);
207 } 211 }
208 212
209 gnt_list_entry->pfn = page_to_pfn(granted_page);
210 gnt_list_entry->gref = GRANT_INVALID_REF; 213 gnt_list_entry->gref = GRANT_INVALID_REF;
211 list_add(&gnt_list_entry->node, &info->persistent_gnts); 214 list_add(&gnt_list_entry->node, &info->grants);
212 i++; 215 i++;
213 } 216 }
214 217
@@ -216,9 +219,10 @@ static int fill_grant_buffer(struct blkfront_info *info, int num)
216 219
217out_of_memory: 220out_of_memory:
218 list_for_each_entry_safe(gnt_list_entry, n, 221 list_for_each_entry_safe(gnt_list_entry, n,
219 &info->persistent_gnts, node) { 222 &info->grants, node) {
220 list_del(&gnt_list_entry->node); 223 list_del(&gnt_list_entry->node);
221 __free_page(pfn_to_page(gnt_list_entry->pfn)); 224 if (info->feature_persistent)
225 __free_page(pfn_to_page(gnt_list_entry->pfn));
222 kfree(gnt_list_entry); 226 kfree(gnt_list_entry);
223 i--; 227 i--;
224 } 228 }
@@ -227,13 +231,14 @@ out_of_memory:
227} 231}
228 232
229static struct grant *get_grant(grant_ref_t *gref_head, 233static struct grant *get_grant(grant_ref_t *gref_head,
234 unsigned long pfn,
230 struct blkfront_info *info) 235 struct blkfront_info *info)
231{ 236{
232 struct grant *gnt_list_entry; 237 struct grant *gnt_list_entry;
233 unsigned long buffer_mfn; 238 unsigned long buffer_mfn;
234 239
235 BUG_ON(list_empty(&info->persistent_gnts)); 240 BUG_ON(list_empty(&info->grants));
236 gnt_list_entry = list_first_entry(&info->persistent_gnts, struct grant, 241 gnt_list_entry = list_first_entry(&info->grants, struct grant,
237 node); 242 node);
238 list_del(&gnt_list_entry->node); 243 list_del(&gnt_list_entry->node);
239 244
@@ -245,6 +250,10 @@ static struct grant *get_grant(grant_ref_t *gref_head,
245 /* Assign a gref to this page */ 250 /* Assign a gref to this page */
246 gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head); 251 gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head);
247 BUG_ON(gnt_list_entry->gref == -ENOSPC); 252 BUG_ON(gnt_list_entry->gref == -ENOSPC);
253 if (!info->feature_persistent) {
254 BUG_ON(!pfn);
255 gnt_list_entry->pfn = pfn;
256 }
248 buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn); 257 buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
249 gnttab_grant_foreign_access_ref(gnt_list_entry->gref, 258 gnttab_grant_foreign_access_ref(gnt_list_entry->gref,
250 info->xbdev->otherend_id, 259 info->xbdev->otherend_id,
@@ -400,10 +409,13 @@ static int blkif_queue_request(struct request *req)
400 if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) 409 if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
401 return 1; 410 return 1;
402 411
403 max_grefs = info->max_indirect_segments ? 412 max_grefs = req->nr_phys_segments;
404 info->max_indirect_segments + 413 if (max_grefs > BLKIF_MAX_SEGMENTS_PER_REQUEST)
405 INDIRECT_GREFS(info->max_indirect_segments) : 414 /*
406 BLKIF_MAX_SEGMENTS_PER_REQUEST; 415 * If we are using indirect segments we need to account
416 * for the indirect grefs used in the request.
417 */
418 max_grefs += INDIRECT_GREFS(req->nr_phys_segments);
407 419
408 /* Check if we have enough grants to allocate a requests */ 420 /* Check if we have enough grants to allocate a requests */
409 if (info->persistent_gnts_c < max_grefs) { 421 if (info->persistent_gnts_c < max_grefs) {
@@ -477,22 +489,34 @@ static int blkif_queue_request(struct request *req)
477 489
478 if ((ring_req->operation == BLKIF_OP_INDIRECT) && 490 if ((ring_req->operation == BLKIF_OP_INDIRECT) &&
479 (i % SEGS_PER_INDIRECT_FRAME == 0)) { 491 (i % SEGS_PER_INDIRECT_FRAME == 0)) {
492 unsigned long uninitialized_var(pfn);
493
480 if (segments) 494 if (segments)
481 kunmap_atomic(segments); 495 kunmap_atomic(segments);
482 496
483 n = i / SEGS_PER_INDIRECT_FRAME; 497 n = i / SEGS_PER_INDIRECT_FRAME;
484 gnt_list_entry = get_grant(&gref_head, info); 498 if (!info->feature_persistent) {
499 struct page *indirect_page;
500
501 /* Fetch a pre-allocated page to use for indirect grefs */
502 BUG_ON(list_empty(&info->indirect_pages));
503 indirect_page = list_first_entry(&info->indirect_pages,
504 struct page, lru);
505 list_del(&indirect_page->lru);
506 pfn = page_to_pfn(indirect_page);
507 }
508 gnt_list_entry = get_grant(&gref_head, pfn, info);
485 info->shadow[id].indirect_grants[n] = gnt_list_entry; 509 info->shadow[id].indirect_grants[n] = gnt_list_entry;
486 segments = kmap_atomic(pfn_to_page(gnt_list_entry->pfn)); 510 segments = kmap_atomic(pfn_to_page(gnt_list_entry->pfn));
487 ring_req->u.indirect.indirect_grefs[n] = gnt_list_entry->gref; 511 ring_req->u.indirect.indirect_grefs[n] = gnt_list_entry->gref;
488 } 512 }
489 513
490 gnt_list_entry = get_grant(&gref_head, info); 514 gnt_list_entry = get_grant(&gref_head, page_to_pfn(sg_page(sg)), info);
491 ref = gnt_list_entry->gref; 515 ref = gnt_list_entry->gref;
492 516
493 info->shadow[id].grants_used[i] = gnt_list_entry; 517 info->shadow[id].grants_used[i] = gnt_list_entry;
494 518
495 if (rq_data_dir(req)) { 519 if (rq_data_dir(req) && info->feature_persistent) {
496 char *bvec_data; 520 char *bvec_data;
497 void *shared_data; 521 void *shared_data;
498 522
@@ -904,21 +928,36 @@ static void blkif_free(struct blkfront_info *info, int suspend)
904 blk_stop_queue(info->rq); 928 blk_stop_queue(info->rq);
905 929
906 /* Remove all persistent grants */ 930 /* Remove all persistent grants */
907 if (!list_empty(&info->persistent_gnts)) { 931 if (!list_empty(&info->grants)) {
908 list_for_each_entry_safe(persistent_gnt, n, 932 list_for_each_entry_safe(persistent_gnt, n,
909 &info->persistent_gnts, node) { 933 &info->grants, node) {
910 list_del(&persistent_gnt->node); 934 list_del(&persistent_gnt->node);
911 if (persistent_gnt->gref != GRANT_INVALID_REF) { 935 if (persistent_gnt->gref != GRANT_INVALID_REF) {
912 gnttab_end_foreign_access(persistent_gnt->gref, 936 gnttab_end_foreign_access(persistent_gnt->gref,
913 0, 0UL); 937 0, 0UL);
914 info->persistent_gnts_c--; 938 info->persistent_gnts_c--;
915 } 939 }
916 __free_page(pfn_to_page(persistent_gnt->pfn)); 940 if (info->feature_persistent)
941 __free_page(pfn_to_page(persistent_gnt->pfn));
917 kfree(persistent_gnt); 942 kfree(persistent_gnt);
918 } 943 }
919 } 944 }
920 BUG_ON(info->persistent_gnts_c != 0); 945 BUG_ON(info->persistent_gnts_c != 0);
921 946
947 /*
948 * Remove indirect pages, this only happens when using indirect
949 * descriptors but not persistent grants
950 */
951 if (!list_empty(&info->indirect_pages)) {
952 struct page *indirect_page, *n;
953
954 BUG_ON(info->feature_persistent);
955 list_for_each_entry_safe(indirect_page, n, &info->indirect_pages, lru) {
956 list_del(&indirect_page->lru);
957 __free_page(indirect_page);
958 }
959 }
960
922 for (i = 0; i < BLK_RING_SIZE; i++) { 961 for (i = 0; i < BLK_RING_SIZE; i++) {
923 /* 962 /*
924 * Clear persistent grants present in requests already 963 * Clear persistent grants present in requests already
@@ -933,7 +972,8 @@ static void blkif_free(struct blkfront_info *info, int suspend)
933 for (j = 0; j < segs; j++) { 972 for (j = 0; j < segs; j++) {
934 persistent_gnt = info->shadow[i].grants_used[j]; 973 persistent_gnt = info->shadow[i].grants_used[j];
935 gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); 974 gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
936 __free_page(pfn_to_page(persistent_gnt->pfn)); 975 if (info->feature_persistent)
976 __free_page(pfn_to_page(persistent_gnt->pfn));
937 kfree(persistent_gnt); 977 kfree(persistent_gnt);
938 } 978 }
939 979
@@ -992,7 +1032,7 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
992 nseg = s->req.operation == BLKIF_OP_INDIRECT ? 1032 nseg = s->req.operation == BLKIF_OP_INDIRECT ?
993 s->req.u.indirect.nr_segments : s->req.u.rw.nr_segments; 1033 s->req.u.indirect.nr_segments : s->req.u.rw.nr_segments;
994 1034
995 if (bret->operation == BLKIF_OP_READ) { 1035 if (bret->operation == BLKIF_OP_READ && info->feature_persistent) {
996 /* 1036 /*
997 * Copy the data received from the backend into the bvec. 1037 * Copy the data received from the backend into the bvec.
998 * Since bv_offset can be different than 0, and bv_len different 1038 * Since bv_offset can be different than 0, and bv_len different
@@ -1013,13 +1053,51 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
1013 } 1053 }
1014 /* Add the persistent grant into the list of free grants */ 1054 /* Add the persistent grant into the list of free grants */
1015 for (i = 0; i < nseg; i++) { 1055 for (i = 0; i < nseg; i++) {
1016 list_add(&s->grants_used[i]->node, &info->persistent_gnts); 1056 if (gnttab_query_foreign_access(s->grants_used[i]->gref)) {
1017 info->persistent_gnts_c++; 1057 /*
1058 * If the grant is still mapped by the backend (the
1059 * backend has chosen to make this grant persistent)
1060 * we add it at the head of the list, so it will be
1061 * reused first.
1062 */
1063 if (!info->feature_persistent)
1064 pr_alert_ratelimited("backed has not unmapped grant: %u\n",
1065 s->grants_used[i]->gref);
1066 list_add(&s->grants_used[i]->node, &info->grants);
1067 info->persistent_gnts_c++;
1068 } else {
1069 /*
1070 * If the grant is not mapped by the backend we end the
1071 * foreign access and add it to the tail of the list,
1072 * so it will not be picked again unless we run out of
1073 * persistent grants.
1074 */
1075 gnttab_end_foreign_access(s->grants_used[i]->gref, 0, 0UL);
1076 s->grants_used[i]->gref = GRANT_INVALID_REF;
1077 list_add_tail(&s->grants_used[i]->node, &info->grants);
1078 }
1018 } 1079 }
1019 if (s->req.operation == BLKIF_OP_INDIRECT) { 1080 if (s->req.operation == BLKIF_OP_INDIRECT) {
1020 for (i = 0; i < INDIRECT_GREFS(nseg); i++) { 1081 for (i = 0; i < INDIRECT_GREFS(nseg); i++) {
1021 list_add(&s->indirect_grants[i]->node, &info->persistent_gnts); 1082 if (gnttab_query_foreign_access(s->indirect_grants[i]->gref)) {
1022 info->persistent_gnts_c++; 1083 if (!info->feature_persistent)
1084 pr_alert_ratelimited("backed has not unmapped grant: %u\n",
1085 s->indirect_grants[i]->gref);
1086 list_add(&s->indirect_grants[i]->node, &info->grants);
1087 info->persistent_gnts_c++;
1088 } else {
1089 struct page *indirect_page;
1090
1091 gnttab_end_foreign_access(s->indirect_grants[i]->gref, 0, 0UL);
1092 /*
1093 * Add the used indirect page back to the list of
1094 * available pages for indirect grefs.
1095 */
1096 indirect_page = pfn_to_page(s->indirect_grants[i]->pfn);
1097 list_add(&indirect_page->lru, &info->indirect_pages);
1098 s->indirect_grants[i]->gref = GRANT_INVALID_REF;
1099 list_add_tail(&s->indirect_grants[i]->node, &info->grants);
1100 }
1023 } 1101 }
1024 } 1102 }
1025} 1103}
@@ -1313,7 +1391,8 @@ static int blkfront_probe(struct xenbus_device *dev,
1313 spin_lock_init(&info->io_lock); 1391 spin_lock_init(&info->io_lock);
1314 info->xbdev = dev; 1392 info->xbdev = dev;
1315 info->vdevice = vdevice; 1393 info->vdevice = vdevice;
1316 INIT_LIST_HEAD(&info->persistent_gnts); 1394 INIT_LIST_HEAD(&info->grants);
1395 INIT_LIST_HEAD(&info->indirect_pages);
1317 info->persistent_gnts_c = 0; 1396 info->persistent_gnts_c = 0;
1318 info->connected = BLKIF_STATE_DISCONNECTED; 1397 info->connected = BLKIF_STATE_DISCONNECTED;
1319 INIT_WORK(&info->work, blkif_restart_queue); 1398 INIT_WORK(&info->work, blkif_restart_queue);
@@ -1336,57 +1415,6 @@ static int blkfront_probe(struct xenbus_device *dev,
1336 return 0; 1415 return 0;
1337} 1416}
1338 1417
1339/*
1340 * This is a clone of md_trim_bio, used to split a bio into smaller ones
1341 */
1342static void trim_bio(struct bio *bio, int offset, int size)
1343{
1344 /* 'bio' is a cloned bio which we need to trim to match
1345 * the given offset and size.
1346 * This requires adjusting bi_sector, bi_size, and bi_io_vec
1347 */
1348 int i;
1349 struct bio_vec *bvec;
1350 int sofar = 0;
1351
1352 size <<= 9;
1353 if (offset == 0 && size == bio->bi_size)
1354 return;
1355
1356 bio->bi_sector += offset;
1357 bio->bi_size = size;
1358 offset <<= 9;
1359 clear_bit(BIO_SEG_VALID, &bio->bi_flags);
1360
1361 while (bio->bi_idx < bio->bi_vcnt &&
1362 bio->bi_io_vec[bio->bi_idx].bv_len <= offset) {
1363 /* remove this whole bio_vec */
1364 offset -= bio->bi_io_vec[bio->bi_idx].bv_len;
1365 bio->bi_idx++;
1366 }
1367 if (bio->bi_idx < bio->bi_vcnt) {
1368 bio->bi_io_vec[bio->bi_idx].bv_offset += offset;
1369 bio->bi_io_vec[bio->bi_idx].bv_len -= offset;
1370 }
1371 /* avoid any complications with bi_idx being non-zero*/
1372 if (bio->bi_idx) {
1373 memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
1374 (bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec));
1375 bio->bi_vcnt -= bio->bi_idx;
1376 bio->bi_idx = 0;
1377 }
1378 /* Make sure vcnt and last bv are not too big */
1379 bio_for_each_segment(bvec, bio, i) {
1380 if (sofar + bvec->bv_len > size)
1381 bvec->bv_len = size - sofar;
1382 if (bvec->bv_len == 0) {
1383 bio->bi_vcnt = i;
1384 break;
1385 }
1386 sofar += bvec->bv_len;
1387 }
1388}
1389
1390static void split_bio_end(struct bio *bio, int error) 1418static void split_bio_end(struct bio *bio, int error)
1391{ 1419{
1392 struct split_bio *split_bio = bio->bi_private; 1420 struct split_bio *split_bio = bio->bi_private;
@@ -1522,7 +1550,7 @@ static int blkif_recover(struct blkfront_info *info)
1522 (unsigned int)(bio->bi_size >> 9) - offset); 1550 (unsigned int)(bio->bi_size >> 9) - offset);
1523 cloned_bio = bio_clone(bio, GFP_NOIO); 1551 cloned_bio = bio_clone(bio, GFP_NOIO);
1524 BUG_ON(cloned_bio == NULL); 1552 BUG_ON(cloned_bio == NULL);
1525 trim_bio(cloned_bio, offset, size); 1553 bio_trim(cloned_bio, offset, size);
1526 cloned_bio->bi_private = split_bio; 1554 cloned_bio->bi_private = split_bio;
1527 cloned_bio->bi_end_io = split_bio_end; 1555 cloned_bio->bi_end_io = split_bio_end;
1528 submit_bio(cloned_bio->bi_rw, cloned_bio); 1556 submit_bio(cloned_bio->bi_rw, cloned_bio);
@@ -1660,6 +1688,23 @@ static int blkfront_setup_indirect(struct blkfront_info *info)
1660 if (err) 1688 if (err)
1661 goto out_of_memory; 1689 goto out_of_memory;
1662 1690
1691 if (!info->feature_persistent && info->max_indirect_segments) {
1692 /*
1693 * We are using indirect descriptors but not persistent
1694 * grants, we need to allocate a set of pages that can be
1695 * used for mapping indirect grefs
1696 */
1697 int num = INDIRECT_GREFS(segs) * BLK_RING_SIZE;
1698
1699 BUG_ON(!list_empty(&info->indirect_pages));
1700 for (i = 0; i < num; i++) {
1701 struct page *indirect_page = alloc_page(GFP_NOIO);
1702 if (!indirect_page)
1703 goto out_of_memory;
1704 list_add(&indirect_page->lru, &info->indirect_pages);
1705 }
1706 }
1707
1663 for (i = 0; i < BLK_RING_SIZE; i++) { 1708 for (i = 0; i < BLK_RING_SIZE; i++) {
1664 info->shadow[i].grants_used = kzalloc( 1709 info->shadow[i].grants_used = kzalloc(
1665 sizeof(info->shadow[i].grants_used[0]) * segs, 1710 sizeof(info->shadow[i].grants_used[0]) * segs,
@@ -1690,6 +1735,13 @@ out_of_memory:
1690 kfree(info->shadow[i].indirect_grants); 1735 kfree(info->shadow[i].indirect_grants);
1691 info->shadow[i].indirect_grants = NULL; 1736 info->shadow[i].indirect_grants = NULL;
1692 } 1737 }
1738 if (!list_empty(&info->indirect_pages)) {
1739 struct page *indirect_page, *n;
1740 list_for_each_entry_safe(indirect_page, n, &info->indirect_pages, lru) {
1741 list_del(&indirect_page->lru);
1742 __free_page(indirect_page);
1743 }
1744 }
1693 return -ENOMEM; 1745 return -ENOMEM;
1694} 1746}
1695 1747
@@ -1959,6 +2011,10 @@ static void blkif_release(struct gendisk *disk, fmode_t mode)
1959 2011
1960 bdev = bdget_disk(disk, 0); 2012 bdev = bdget_disk(disk, 0);
1961 2013
2014 if (!bdev) {
2015 WARN(1, "Block device %s yanked out from us!\n", disk->disk_name);
2016 goto out_mutex;
2017 }
1962 if (bdev->bd_openers) 2018 if (bdev->bd_openers)
1963 goto out; 2019 goto out;
1964 2020
@@ -1989,6 +2045,7 @@ static void blkif_release(struct gendisk *disk, fmode_t mode)
1989 2045
1990out: 2046out:
1991 bdput(bdev); 2047 bdput(bdev);
2048out_mutex:
1992 mutex_unlock(&blkfront_mutex); 2049 mutex_unlock(&blkfront_mutex);
1993} 2050}
1994 2051