aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-09-02 16:14:58 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-09-02 16:14:58 -0400
commit52b084d31cbc8e90cb6fc1ac4061d9a24375c89d (patch)
treeb0ebeae6ae724d6cc1c79702d8201e7767b6f3de
parent1081230b748de8f03f37f80c53dfa89feda9b8de (diff)
parente19b127f5b76ec03b9c52b64f117dc75bb39eda1 (diff)
Merge branch 'for-4.3/drivers' of git://git.kernel.dk/linux-block
Pull block driver updates from Jens Axboe: "On top of the 4.3 core block IO changes, here are the driver related changes for 4.3. Basically just NVMe and nbd this time around: - NVMe: - PRACT PI improvement from Alok Pandey. - Cleanups and improvements on submission queue doorbell and writing, using CMB if available. From Jon Derrick. - From Keith, support for setting queue maximum segments, and reset support. - Also from Jon, fixup of u64 division issue on 32-bit archs and wiring up of the reset support through and ioctl. - Two small cleanups from Matias and Sunad - Various code cleanups and fixes from Markus Pargmann" * 'for-4.3/drivers' of git://git.kernel.dk/linux-block: NVMe: Using PRACT bit to generate and verify PI by controller NVMe:Remove unreachable code in nvme_abort_req NVMe: Add nvme subsystem reset IOCTL NVMe: Add nvme subsystem reset support NVMe: removed unused nn var from nvme_dev_add NVMe: Set queue max segments nbd: flags is a u32 variable nbd: Rename functions for clearness of recv/send path nbd: Change 'disconnect' to be boolean nbd: Add debugfs entries nbd: Remove variable 'pid' nbd: Move clear queue debug message nbd: Remove 'harderror' and propagate error properly nbd: restructure sock_shutdown nbd: sock_shutdown, remove conditional lock nbd: Fix timeout detection nvme: Fixes u64 division which breaks i386 builds NVMe: Use CMB for the IO SQes if available NVMe: Unify SQ entry writing and doorbell ringing
-rw-r--r--drivers/block/nbd.c362
-rw-r--r--drivers/block/nvme-core.c267
-rw-r--r--include/linux/nvme.h22
-rw-r--r--include/uapi/linux/nvme.h1
4 files changed, 506 insertions, 146 deletions
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index f169faf9838a..293495a75d3d 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -33,6 +33,7 @@
33#include <linux/net.h> 33#include <linux/net.h>
34#include <linux/kthread.h> 34#include <linux/kthread.h>
35#include <linux/types.h> 35#include <linux/types.h>
36#include <linux/debugfs.h>
36 37
37#include <asm/uaccess.h> 38#include <asm/uaccess.h>
38#include <asm/types.h> 39#include <asm/types.h>
@@ -40,8 +41,7 @@
40#include <linux/nbd.h> 41#include <linux/nbd.h>
41 42
42struct nbd_device { 43struct nbd_device {
43 int flags; 44 u32 flags;
44 int harderror; /* Code of hard error */
45 struct socket * sock; /* If == NULL, device is not ready, yet */ 45 struct socket * sock; /* If == NULL, device is not ready, yet */
46 int magic; 46 int magic;
47 47
@@ -56,11 +56,24 @@ struct nbd_device {
56 struct gendisk *disk; 56 struct gendisk *disk;
57 int blksize; 57 int blksize;
58 loff_t bytesize; 58 loff_t bytesize;
59 pid_t pid; /* pid of nbd-client, if attached */
60 int xmit_timeout; 59 int xmit_timeout;
61 int disconnect; /* a disconnect has been requested by user */ 60 bool disconnect; /* a disconnect has been requested by user */
61
62 struct timer_list timeout_timer;
63 struct task_struct *task_recv;
64 struct task_struct *task_send;
65
66#if IS_ENABLED(CONFIG_DEBUG_FS)
67 struct dentry *dbg_dir;
68#endif
62}; 69};
63 70
71#if IS_ENABLED(CONFIG_DEBUG_FS)
72static struct dentry *nbd_dbg_dir;
73#endif
74
75#define nbd_name(nbd) ((nbd)->disk->disk_name)
76
64#define NBD_MAGIC 0x68797548 77#define NBD_MAGIC 0x68797548
65 78
66static unsigned int nbds_max = 16; 79static unsigned int nbds_max = 16;
@@ -113,26 +126,36 @@ static void nbd_end_request(struct nbd_device *nbd, struct request *req)
113/* 126/*
114 * Forcibly shutdown the socket causing all listeners to error 127 * Forcibly shutdown the socket causing all listeners to error
115 */ 128 */
116static void sock_shutdown(struct nbd_device *nbd, int lock) 129static void sock_shutdown(struct nbd_device *nbd)
117{ 130{
118 if (lock) 131 if (!nbd->sock)
119 mutex_lock(&nbd->tx_lock); 132 return;
120 if (nbd->sock) { 133
121 dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n"); 134 dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
122 kernel_sock_shutdown(nbd->sock, SHUT_RDWR); 135 kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
123 nbd->sock = NULL; 136 nbd->sock = NULL;
124 } 137 del_timer_sync(&nbd->timeout_timer);
125 if (lock)
126 mutex_unlock(&nbd->tx_lock);
127} 138}
128 139
129static void nbd_xmit_timeout(unsigned long arg) 140static void nbd_xmit_timeout(unsigned long arg)
130{ 141{
131 struct task_struct *task = (struct task_struct *)arg; 142 struct nbd_device *nbd = (struct nbd_device *)arg;
143 struct task_struct *task;
144
145 if (list_empty(&nbd->queue_head))
146 return;
147
148 nbd->disconnect = true;
149
150 task = READ_ONCE(nbd->task_recv);
151 if (task)
152 force_sig(SIGKILL, task);
132 153
133 printk(KERN_WARNING "nbd: killing hung xmit (%s, pid: %d)\n", 154 task = READ_ONCE(nbd->task_send);
134 task->comm, task->pid); 155 if (task)
135 force_sig(SIGKILL, task); 156 force_sig(SIGKILL, nbd->task_send);
157
158 dev_err(nbd_to_dev(nbd), "Connection timed out, killed receiver and sender, shutting down connection\n");
136} 159}
137 160
138/* 161/*
@@ -171,33 +194,12 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size,
171 msg.msg_controllen = 0; 194 msg.msg_controllen = 0;
172 msg.msg_flags = msg_flags | MSG_NOSIGNAL; 195 msg.msg_flags = msg_flags | MSG_NOSIGNAL;
173 196
174 if (send) { 197 if (send)
175 struct timer_list ti;
176
177 if (nbd->xmit_timeout) {
178 init_timer(&ti);
179 ti.function = nbd_xmit_timeout;
180 ti.data = (unsigned long)current;
181 ti.expires = jiffies + nbd->xmit_timeout;
182 add_timer(&ti);
183 }
184 result = kernel_sendmsg(sock, &msg, &iov, 1, size); 198 result = kernel_sendmsg(sock, &msg, &iov, 1, size);
185 if (nbd->xmit_timeout) 199 else
186 del_timer_sync(&ti);
187 } else
188 result = kernel_recvmsg(sock, &msg, &iov, 1, size, 200 result = kernel_recvmsg(sock, &msg, &iov, 1, size,
189 msg.msg_flags); 201 msg.msg_flags);
190 202
191 if (signal_pending(current)) {
192 siginfo_t info;
193 printk(KERN_WARNING "nbd (pid %d: %s) got signal %d\n",
194 task_pid_nr(current), current->comm,
195 dequeue_signal_lock(current, &current->blocked, &info));
196 result = -EINTR;
197 sock_shutdown(nbd, !send);
198 break;
199 }
200
201 if (result <= 0) { 203 if (result <= 0) {
202 if (result == 0) 204 if (result == 0)
203 result = -EPIPE; /* short read */ 205 result = -EPIPE; /* short read */
@@ -210,6 +212,9 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size,
210 sigprocmask(SIG_SETMASK, &oldset, NULL); 212 sigprocmask(SIG_SETMASK, &oldset, NULL);
211 tsk_restore_flags(current, pflags, PF_MEMALLOC); 213 tsk_restore_flags(current, pflags, PF_MEMALLOC);
212 214
215 if (!send && nbd->xmit_timeout)
216 mod_timer(&nbd->timeout_timer, jiffies + nbd->xmit_timeout);
217
213 return result; 218 return result;
214} 219}
215 220
@@ -333,26 +338,24 @@ static struct request *nbd_read_stat(struct nbd_device *nbd)
333 if (result <= 0) { 338 if (result <= 0) {
334 dev_err(disk_to_dev(nbd->disk), 339 dev_err(disk_to_dev(nbd->disk),
335 "Receive control failed (result %d)\n", result); 340 "Receive control failed (result %d)\n", result);
336 goto harderror; 341 return ERR_PTR(result);
337 } 342 }
338 343
339 if (ntohl(reply.magic) != NBD_REPLY_MAGIC) { 344 if (ntohl(reply.magic) != NBD_REPLY_MAGIC) {
340 dev_err(disk_to_dev(nbd->disk), "Wrong magic (0x%lx)\n", 345 dev_err(disk_to_dev(nbd->disk), "Wrong magic (0x%lx)\n",
341 (unsigned long)ntohl(reply.magic)); 346 (unsigned long)ntohl(reply.magic));
342 result = -EPROTO; 347 return ERR_PTR(-EPROTO);
343 goto harderror;
344 } 348 }
345 349
346 req = nbd_find_request(nbd, *(struct request **)reply.handle); 350 req = nbd_find_request(nbd, *(struct request **)reply.handle);
347 if (IS_ERR(req)) { 351 if (IS_ERR(req)) {
348 result = PTR_ERR(req); 352 result = PTR_ERR(req);
349 if (result != -ENOENT) 353 if (result != -ENOENT)
350 goto harderror; 354 return ERR_PTR(result);
351 355
352 dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%p)\n", 356 dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%p)\n",
353 reply.handle); 357 reply.handle);
354 result = -EBADR; 358 return ERR_PTR(-EBADR);
355 goto harderror;
356 } 359 }
357 360
358 if (ntohl(reply.error)) { 361 if (ntohl(reply.error)) {
@@ -380,18 +383,15 @@ static struct request *nbd_read_stat(struct nbd_device *nbd)
380 } 383 }
381 } 384 }
382 return req; 385 return req;
383harderror:
384 nbd->harderror = result;
385 return NULL;
386} 386}
387 387
388static ssize_t pid_show(struct device *dev, 388static ssize_t pid_show(struct device *dev,
389 struct device_attribute *attr, char *buf) 389 struct device_attribute *attr, char *buf)
390{ 390{
391 struct gendisk *disk = dev_to_disk(dev); 391 struct gendisk *disk = dev_to_disk(dev);
392 struct nbd_device *nbd = (struct nbd_device *)disk->private_data;
392 393
393 return sprintf(buf, "%ld\n", 394 return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv));
394 (long) ((struct nbd_device *)disk->private_data)->pid);
395} 395}
396 396
397static struct device_attribute pid_attr = { 397static struct device_attribute pid_attr = {
@@ -399,7 +399,7 @@ static struct device_attribute pid_attr = {
399 .show = pid_show, 399 .show = pid_show,
400}; 400};
401 401
402static int nbd_do_it(struct nbd_device *nbd) 402static int nbd_thread_recv(struct nbd_device *nbd)
403{ 403{
404 struct request *req; 404 struct request *req;
405 int ret; 405 int ret;
@@ -407,20 +407,43 @@ static int nbd_do_it(struct nbd_device *nbd)
407 BUG_ON(nbd->magic != NBD_MAGIC); 407 BUG_ON(nbd->magic != NBD_MAGIC);
408 408
409 sk_set_memalloc(nbd->sock->sk); 409 sk_set_memalloc(nbd->sock->sk);
410 nbd->pid = task_pid_nr(current); 410
411 nbd->task_recv = current;
412
411 ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr); 413 ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
412 if (ret) { 414 if (ret) {
413 dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n"); 415 dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
414 nbd->pid = 0; 416 nbd->task_recv = NULL;
415 return ret; 417 return ret;
416 } 418 }
417 419
418 while ((req = nbd_read_stat(nbd)) != NULL) 420 while (1) {
421 req = nbd_read_stat(nbd);
422 if (IS_ERR(req)) {
423 ret = PTR_ERR(req);
424 break;
425 }
426
419 nbd_end_request(nbd, req); 427 nbd_end_request(nbd, req);
428 }
420 429
421 device_remove_file(disk_to_dev(nbd->disk), &pid_attr); 430 device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
422 nbd->pid = 0; 431
423 return 0; 432 nbd->task_recv = NULL;
433
434 if (signal_pending(current)) {
435 siginfo_t info;
436
437 ret = dequeue_signal_lock(current, &current->blocked, &info);
438 dev_warn(nbd_to_dev(nbd), "pid %d, %s, got signal %d\n",
439 task_pid_nr(current), current->comm, ret);
440 mutex_lock(&nbd->tx_lock);
441 sock_shutdown(nbd);
442 mutex_unlock(&nbd->tx_lock);
443 ret = -ETIMEDOUT;
444 }
445
446 return ret;
424} 447}
425 448
426static void nbd_clear_que(struct nbd_device *nbd) 449static void nbd_clear_que(struct nbd_device *nbd)
@@ -455,6 +478,7 @@ static void nbd_clear_que(struct nbd_device *nbd)
455 req->errors++; 478 req->errors++;
456 nbd_end_request(nbd, req); 479 nbd_end_request(nbd, req);
457 } 480 }
481 dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n");
458} 482}
459 483
460 484
@@ -482,6 +506,9 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
482 506
483 nbd->active_req = req; 507 nbd->active_req = req;
484 508
509 if (nbd->xmit_timeout && list_empty_careful(&nbd->queue_head))
510 mod_timer(&nbd->timeout_timer, jiffies + nbd->xmit_timeout);
511
485 if (nbd_send_req(nbd, req) != 0) { 512 if (nbd_send_req(nbd, req) != 0) {
486 dev_err(disk_to_dev(nbd->disk), "Request send failed\n"); 513 dev_err(disk_to_dev(nbd->disk), "Request send failed\n");
487 req->errors++; 514 req->errors++;
@@ -503,11 +530,13 @@ error_out:
503 nbd_end_request(nbd, req); 530 nbd_end_request(nbd, req);
504} 531}
505 532
506static int nbd_thread(void *data) 533static int nbd_thread_send(void *data)
507{ 534{
508 struct nbd_device *nbd = data; 535 struct nbd_device *nbd = data;
509 struct request *req; 536 struct request *req;
510 537
538 nbd->task_send = current;
539
511 set_user_nice(current, MIN_NICE); 540 set_user_nice(current, MIN_NICE);
512 while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) { 541 while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
513 /* wait for something to do */ 542 /* wait for something to do */
@@ -515,6 +544,20 @@ static int nbd_thread(void *data)
515 kthread_should_stop() || 544 kthread_should_stop() ||
516 !list_empty(&nbd->waiting_queue)); 545 !list_empty(&nbd->waiting_queue));
517 546
547 if (signal_pending(current)) {
548 siginfo_t info;
549 int ret;
550
551 ret = dequeue_signal_lock(current, &current->blocked,
552 &info);
553 dev_warn(nbd_to_dev(nbd), "pid %d, %s, got signal %d\n",
554 task_pid_nr(current), current->comm, ret);
555 mutex_lock(&nbd->tx_lock);
556 sock_shutdown(nbd);
557 mutex_unlock(&nbd->tx_lock);
558 break;
559 }
560
518 /* extract request */ 561 /* extract request */
519 if (list_empty(&nbd->waiting_queue)) 562 if (list_empty(&nbd->waiting_queue))
520 continue; 563 continue;
@@ -528,6 +571,9 @@ static int nbd_thread(void *data)
528 /* handle request */ 571 /* handle request */
529 nbd_handle_req(nbd, req); 572 nbd_handle_req(nbd, req);
530 } 573 }
574
575 nbd->task_send = NULL;
576
531 return 0; 577 return 0;
532} 578}
533 579
@@ -538,7 +584,7 @@ static int nbd_thread(void *data)
538 * { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); } 584 * { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); }
539 */ 585 */
540 586
541static void do_nbd_request(struct request_queue *q) 587static void nbd_request_handler(struct request_queue *q)
542 __releases(q->queue_lock) __acquires(q->queue_lock) 588 __releases(q->queue_lock) __acquires(q->queue_lock)
543{ 589{
544 struct request *req; 590 struct request *req;
@@ -574,6 +620,9 @@ static void do_nbd_request(struct request_queue *q)
574 } 620 }
575} 621}
576 622
623static int nbd_dev_dbg_init(struct nbd_device *nbd);
624static void nbd_dev_dbg_close(struct nbd_device *nbd);
625
577/* Must be called with tx_lock held */ 626/* Must be called with tx_lock held */
578 627
579static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, 628static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
@@ -597,7 +646,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
597 if (!nbd->sock) 646 if (!nbd->sock)
598 return -EINVAL; 647 return -EINVAL;
599 648
600 nbd->disconnect = 1; 649 nbd->disconnect = true;
601 650
602 nbd_send_req(nbd, &sreq); 651 nbd_send_req(nbd, &sreq);
603 return 0; 652 return 0;
@@ -625,7 +674,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
625 nbd->sock = sock; 674 nbd->sock = sock;
626 if (max_part > 0) 675 if (max_part > 0)
627 bdev->bd_invalidated = 1; 676 bdev->bd_invalidated = 1;
628 nbd->disconnect = 0; /* we're connected now */ 677 nbd->disconnect = false; /* we're connected now */
629 return 0; 678 return 0;
630 } 679 }
631 return -EINVAL; 680 return -EINVAL;
@@ -648,6 +697,12 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
648 697
649 case NBD_SET_TIMEOUT: 698 case NBD_SET_TIMEOUT:
650 nbd->xmit_timeout = arg * HZ; 699 nbd->xmit_timeout = arg * HZ;
700 if (arg)
701 mod_timer(&nbd->timeout_timer,
702 jiffies + nbd->xmit_timeout);
703 else
704 del_timer_sync(&nbd->timeout_timer);
705
651 return 0; 706 return 0;
652 707
653 case NBD_SET_FLAGS: 708 case NBD_SET_FLAGS:
@@ -666,7 +721,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
666 struct socket *sock; 721 struct socket *sock;
667 int error; 722 int error;
668 723
669 if (nbd->pid) 724 if (nbd->task_recv)
670 return -EBUSY; 725 return -EBUSY;
671 if (!nbd->sock) 726 if (!nbd->sock)
672 return -EINVAL; 727 return -EINVAL;
@@ -683,24 +738,24 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
683 else 738 else
684 blk_queue_flush(nbd->disk->queue, 0); 739 blk_queue_flush(nbd->disk->queue, 0);
685 740
686 thread = kthread_run(nbd_thread, nbd, "%s", 741 thread = kthread_run(nbd_thread_send, nbd, "%s",
687 nbd->disk->disk_name); 742 nbd_name(nbd));
688 if (IS_ERR(thread)) { 743 if (IS_ERR(thread)) {
689 mutex_lock(&nbd->tx_lock); 744 mutex_lock(&nbd->tx_lock);
690 return PTR_ERR(thread); 745 return PTR_ERR(thread);
691 } 746 }
692 747
693 error = nbd_do_it(nbd); 748 nbd_dev_dbg_init(nbd);
749 error = nbd_thread_recv(nbd);
750 nbd_dev_dbg_close(nbd);
694 kthread_stop(thread); 751 kthread_stop(thread);
695 752
696 mutex_lock(&nbd->tx_lock); 753 mutex_lock(&nbd->tx_lock);
697 if (error) 754
698 return error; 755 sock_shutdown(nbd);
699 sock_shutdown(nbd, 0);
700 sock = nbd->sock; 756 sock = nbd->sock;
701 nbd->sock = NULL; 757 nbd->sock = NULL;
702 nbd_clear_que(nbd); 758 nbd_clear_que(nbd);
703 dev_warn(disk_to_dev(nbd->disk), "queue cleared\n");
704 kill_bdev(bdev); 759 kill_bdev(bdev);
705 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); 760 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
706 set_device_ro(bdev, false); 761 set_device_ro(bdev, false);
@@ -714,7 +769,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
714 blkdev_reread_part(bdev); 769 blkdev_reread_part(bdev);
715 if (nbd->disconnect) /* user requested, ignore socket errors */ 770 if (nbd->disconnect) /* user requested, ignore socket errors */
716 return 0; 771 return 0;
717 return nbd->harderror; 772 return error;
718 } 773 }
719 774
720 case NBD_CLEAR_QUE: 775 case NBD_CLEAR_QUE:
@@ -758,6 +813,161 @@ static const struct block_device_operations nbd_fops =
758 .ioctl = nbd_ioctl, 813 .ioctl = nbd_ioctl,
759}; 814};
760 815
816#if IS_ENABLED(CONFIG_DEBUG_FS)
817
818static int nbd_dbg_tasks_show(struct seq_file *s, void *unused)
819{
820 struct nbd_device *nbd = s->private;
821
822 if (nbd->task_recv)
823 seq_printf(s, "recv: %d\n", task_pid_nr(nbd->task_recv));
824 if (nbd->task_send)
825 seq_printf(s, "send: %d\n", task_pid_nr(nbd->task_send));
826
827 return 0;
828}
829
830static int nbd_dbg_tasks_open(struct inode *inode, struct file *file)
831{
832 return single_open(file, nbd_dbg_tasks_show, inode->i_private);
833}
834
835static const struct file_operations nbd_dbg_tasks_ops = {
836 .open = nbd_dbg_tasks_open,
837 .read = seq_read,
838 .llseek = seq_lseek,
839 .release = single_release,
840};
841
842static int nbd_dbg_flags_show(struct seq_file *s, void *unused)
843{
844 struct nbd_device *nbd = s->private;
845 u32 flags = nbd->flags;
846
847 seq_printf(s, "Hex: 0x%08x\n\n", flags);
848
849 seq_puts(s, "Known flags:\n");
850
851 if (flags & NBD_FLAG_HAS_FLAGS)
852 seq_puts(s, "NBD_FLAG_HAS_FLAGS\n");
853 if (flags & NBD_FLAG_READ_ONLY)
854 seq_puts(s, "NBD_FLAG_READ_ONLY\n");
855 if (flags & NBD_FLAG_SEND_FLUSH)
856 seq_puts(s, "NBD_FLAG_SEND_FLUSH\n");
857 if (flags & NBD_FLAG_SEND_TRIM)
858 seq_puts(s, "NBD_FLAG_SEND_TRIM\n");
859
860 return 0;
861}
862
863static int nbd_dbg_flags_open(struct inode *inode, struct file *file)
864{
865 return single_open(file, nbd_dbg_flags_show, inode->i_private);
866}
867
868static const struct file_operations nbd_dbg_flags_ops = {
869 .open = nbd_dbg_flags_open,
870 .read = seq_read,
871 .llseek = seq_lseek,
872 .release = single_release,
873};
874
875static int nbd_dev_dbg_init(struct nbd_device *nbd)
876{
877 struct dentry *dir;
878 struct dentry *f;
879
880 dir = debugfs_create_dir(nbd_name(nbd), nbd_dbg_dir);
881 if (IS_ERR_OR_NULL(dir)) {
882 dev_err(nbd_to_dev(nbd), "Failed to create debugfs dir for '%s' (%ld)\n",
883 nbd_name(nbd), PTR_ERR(dir));
884 return PTR_ERR(dir);
885 }
886 nbd->dbg_dir = dir;
887
888 f = debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_ops);
889 if (IS_ERR_OR_NULL(f)) {
890 dev_err(nbd_to_dev(nbd), "Failed to create debugfs file 'tasks', %ld\n",
891 PTR_ERR(f));
892 return PTR_ERR(f);
893 }
894
895 f = debugfs_create_u64("size_bytes", 0444, dir, &nbd->bytesize);
896 if (IS_ERR_OR_NULL(f)) {
897 dev_err(nbd_to_dev(nbd), "Failed to create debugfs file 'size_bytes', %ld\n",
898 PTR_ERR(f));
899 return PTR_ERR(f);
900 }
901
902 f = debugfs_create_u32("timeout", 0444, dir, &nbd->xmit_timeout);
903 if (IS_ERR_OR_NULL(f)) {
904 dev_err(nbd_to_dev(nbd), "Failed to create debugfs file 'timeout', %ld\n",
905 PTR_ERR(f));
906 return PTR_ERR(f);
907 }
908
909 f = debugfs_create_u32("blocksize", 0444, dir, &nbd->blksize);
910 if (IS_ERR_OR_NULL(f)) {
911 dev_err(nbd_to_dev(nbd), "Failed to create debugfs file 'blocksize', %ld\n",
912 PTR_ERR(f));
913 return PTR_ERR(f);
914 }
915
916 f = debugfs_create_file("flags", 0444, dir, &nbd, &nbd_dbg_flags_ops);
917 if (IS_ERR_OR_NULL(f)) {
918 dev_err(nbd_to_dev(nbd), "Failed to create debugfs file 'flags', %ld\n",
919 PTR_ERR(f));
920 return PTR_ERR(f);
921 }
922
923 return 0;
924}
925
926static void nbd_dev_dbg_close(struct nbd_device *nbd)
927{
928 debugfs_remove_recursive(nbd->dbg_dir);
929}
930
931static int nbd_dbg_init(void)
932{
933 struct dentry *dbg_dir;
934
935 dbg_dir = debugfs_create_dir("nbd", NULL);
936 if (IS_ERR(dbg_dir))
937 return PTR_ERR(dbg_dir);
938
939 nbd_dbg_dir = dbg_dir;
940
941 return 0;
942}
943
944static void nbd_dbg_close(void)
945{
946 debugfs_remove_recursive(nbd_dbg_dir);
947}
948
949#else /* IS_ENABLED(CONFIG_DEBUG_FS) */
950
951static int nbd_dev_dbg_init(struct nbd_device *nbd)
952{
953 return 0;
954}
955
956static void nbd_dev_dbg_close(struct nbd_device *nbd)
957{
958}
959
960static int nbd_dbg_init(void)
961{
962 return 0;
963}
964
965static void nbd_dbg_close(void)
966{
967}
968
969#endif
970
761/* 971/*
762 * And here should be modules and kernel interface 972 * And here should be modules and kernel interface
763 * (Just smiley confuses emacs :-) 973 * (Just smiley confuses emacs :-)
@@ -811,7 +1021,7 @@ static int __init nbd_init(void)
811 * every gendisk to have its very own request_queue struct. 1021 * every gendisk to have its very own request_queue struct.
812 * These structs are big so we dynamically allocate them. 1022 * These structs are big so we dynamically allocate them.
813 */ 1023 */
814 disk->queue = blk_init_queue(do_nbd_request, &nbd_lock); 1024 disk->queue = blk_init_queue(nbd_request_handler, &nbd_lock);
815 if (!disk->queue) { 1025 if (!disk->queue) {
816 put_disk(disk); 1026 put_disk(disk);
817 goto out; 1027 goto out;
@@ -835,6 +1045,8 @@ static int __init nbd_init(void)
835 1045
836 printk(KERN_INFO "nbd: registered device at major %d\n", NBD_MAJOR); 1046 printk(KERN_INFO "nbd: registered device at major %d\n", NBD_MAJOR);
837 1047
1048 nbd_dbg_init();
1049
838 for (i = 0; i < nbds_max; i++) { 1050 for (i = 0; i < nbds_max; i++) {
839 struct gendisk *disk = nbd_dev[i].disk; 1051 struct gendisk *disk = nbd_dev[i].disk;
840 nbd_dev[i].magic = NBD_MAGIC; 1052 nbd_dev[i].magic = NBD_MAGIC;
@@ -842,6 +1054,9 @@ static int __init nbd_init(void)
842 spin_lock_init(&nbd_dev[i].queue_lock); 1054 spin_lock_init(&nbd_dev[i].queue_lock);
843 INIT_LIST_HEAD(&nbd_dev[i].queue_head); 1055 INIT_LIST_HEAD(&nbd_dev[i].queue_head);
844 mutex_init(&nbd_dev[i].tx_lock); 1056 mutex_init(&nbd_dev[i].tx_lock);
1057 init_timer(&nbd_dev[i].timeout_timer);
1058 nbd_dev[i].timeout_timer.function = nbd_xmit_timeout;
1059 nbd_dev[i].timeout_timer.data = (unsigned long)&nbd_dev[i];
845 init_waitqueue_head(&nbd_dev[i].active_wq); 1060 init_waitqueue_head(&nbd_dev[i].active_wq);
846 init_waitqueue_head(&nbd_dev[i].waiting_wq); 1061 init_waitqueue_head(&nbd_dev[i].waiting_wq);
847 nbd_dev[i].blksize = 1024; 1062 nbd_dev[i].blksize = 1024;
@@ -868,6 +1083,9 @@ out:
868static void __exit nbd_cleanup(void) 1083static void __exit nbd_cleanup(void)
869{ 1084{
870 int i; 1085 int i;
1086
1087 nbd_dbg_close();
1088
871 for (i = 0; i < nbds_max; i++) { 1089 for (i = 0; i < nbds_max; i++) {
872 struct gendisk *disk = nbd_dev[i].disk; 1090 struct gendisk *disk = nbd_dev[i].disk;
873 nbd_dev[i].magic = 0; 1091 nbd_dev[i].magic = 0;
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 2f694d78da55..b97fc3fe0916 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -72,6 +72,10 @@ module_param(nvme_char_major, int, 0);
72static int use_threaded_interrupts; 72static int use_threaded_interrupts;
73module_param(use_threaded_interrupts, int, 0); 73module_param(use_threaded_interrupts, int, 0);
74 74
75static bool use_cmb_sqes = true;
76module_param(use_cmb_sqes, bool, 0644);
77MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes");
78
75static DEFINE_SPINLOCK(dev_list_lock); 79static DEFINE_SPINLOCK(dev_list_lock);
76static LIST_HEAD(dev_list); 80static LIST_HEAD(dev_list);
77static struct task_struct *nvme_thread; 81static struct task_struct *nvme_thread;
@@ -103,6 +107,7 @@ struct nvme_queue {
103 char irqname[24]; /* nvme4294967295-65535\0 */ 107 char irqname[24]; /* nvme4294967295-65535\0 */
104 spinlock_t q_lock; 108 spinlock_t q_lock;
105 struct nvme_command *sq_cmds; 109 struct nvme_command *sq_cmds;
110 struct nvme_command __iomem *sq_cmds_io;
106 volatile struct nvme_completion *cqes; 111 volatile struct nvme_completion *cqes;
107 struct blk_mq_tags **tags; 112 struct blk_mq_tags **tags;
108 dma_addr_t sq_dma_addr; 113 dma_addr_t sq_dma_addr;
@@ -379,27 +384,28 @@ static void *nvme_finish_cmd(struct nvme_queue *nvmeq, int tag,
379 * 384 *
380 * Safe to use from interrupt context 385 * Safe to use from interrupt context
381 */ 386 */
382static int __nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd) 387static void __nvme_submit_cmd(struct nvme_queue *nvmeq,
388 struct nvme_command *cmd)
383{ 389{
384 u16 tail = nvmeq->sq_tail; 390 u16 tail = nvmeq->sq_tail;
385 391
386 memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd)); 392 if (nvmeq->sq_cmds_io)
393 memcpy_toio(&nvmeq->sq_cmds_io[tail], cmd, sizeof(*cmd));
394 else
395 memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd));
396
387 if (++tail == nvmeq->q_depth) 397 if (++tail == nvmeq->q_depth)
388 tail = 0; 398 tail = 0;
389 writel(tail, nvmeq->q_db); 399 writel(tail, nvmeq->q_db);
390 nvmeq->sq_tail = tail; 400 nvmeq->sq_tail = tail;
391
392 return 0;
393} 401}
394 402
395static int nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd) 403static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
396{ 404{
397 unsigned long flags; 405 unsigned long flags;
398 int ret;
399 spin_lock_irqsave(&nvmeq->q_lock, flags); 406 spin_lock_irqsave(&nvmeq->q_lock, flags);
400 ret = __nvme_submit_cmd(nvmeq, cmd); 407 __nvme_submit_cmd(nvmeq, cmd);
401 spin_unlock_irqrestore(&nvmeq->q_lock, flags); 408 spin_unlock_irqrestore(&nvmeq->q_lock, flags);
402 return ret;
403} 409}
404 410
405static __le64 **iod_list(struct nvme_iod *iod) 411static __le64 **iod_list(struct nvme_iod *iod)
@@ -730,18 +736,16 @@ static int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod,
730static void nvme_submit_priv(struct nvme_queue *nvmeq, struct request *req, 736static void nvme_submit_priv(struct nvme_queue *nvmeq, struct request *req,
731 struct nvme_iod *iod) 737 struct nvme_iod *iod)
732{ 738{
733 struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail]; 739 struct nvme_command cmnd;
734 740
735 memcpy(cmnd, req->cmd, sizeof(struct nvme_command)); 741 memcpy(&cmnd, req->cmd, sizeof(cmnd));
736 cmnd->rw.command_id = req->tag; 742 cmnd.rw.command_id = req->tag;
737 if (req->nr_phys_segments) { 743 if (req->nr_phys_segments) {
738 cmnd->rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); 744 cmnd.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
739 cmnd->rw.prp2 = cpu_to_le64(iod->first_dma); 745 cmnd.rw.prp2 = cpu_to_le64(iod->first_dma);
740 } 746 }
741 747
742 if (++nvmeq->sq_tail == nvmeq->q_depth) 748 __nvme_submit_cmd(nvmeq, &cmnd);
743 nvmeq->sq_tail = 0;
744 writel(nvmeq->sq_tail, nvmeq->q_db);
745} 749}
746 750
747/* 751/*
@@ -754,45 +758,41 @@ static void nvme_submit_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns,
754{ 758{
755 struct nvme_dsm_range *range = 759 struct nvme_dsm_range *range =
756 (struct nvme_dsm_range *)iod_list(iod)[0]; 760 (struct nvme_dsm_range *)iod_list(iod)[0];
757 struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail]; 761 struct nvme_command cmnd;
758 762
759 range->cattr = cpu_to_le32(0); 763 range->cattr = cpu_to_le32(0);
760 range->nlb = cpu_to_le32(blk_rq_bytes(req) >> ns->lba_shift); 764 range->nlb = cpu_to_le32(blk_rq_bytes(req) >> ns->lba_shift);
761 range->slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); 765 range->slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
762 766
763 memset(cmnd, 0, sizeof(*cmnd)); 767 memset(&cmnd, 0, sizeof(cmnd));
764 cmnd->dsm.opcode = nvme_cmd_dsm; 768 cmnd.dsm.opcode = nvme_cmd_dsm;
765 cmnd->dsm.command_id = req->tag; 769 cmnd.dsm.command_id = req->tag;
766 cmnd->dsm.nsid = cpu_to_le32(ns->ns_id); 770 cmnd.dsm.nsid = cpu_to_le32(ns->ns_id);
767 cmnd->dsm.prp1 = cpu_to_le64(iod->first_dma); 771 cmnd.dsm.prp1 = cpu_to_le64(iod->first_dma);
768 cmnd->dsm.nr = 0; 772 cmnd.dsm.nr = 0;
769 cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); 773 cmnd.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
770 774
771 if (++nvmeq->sq_tail == nvmeq->q_depth) 775 __nvme_submit_cmd(nvmeq, &cmnd);
772 nvmeq->sq_tail = 0;
773 writel(nvmeq->sq_tail, nvmeq->q_db);
774} 776}
775 777
776static void nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns, 778static void nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns,
777 int cmdid) 779 int cmdid)
778{ 780{
779 struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail]; 781 struct nvme_command cmnd;
780 782
781 memset(cmnd, 0, sizeof(*cmnd)); 783 memset(&cmnd, 0, sizeof(cmnd));
782 cmnd->common.opcode = nvme_cmd_flush; 784 cmnd.common.opcode = nvme_cmd_flush;
783 cmnd->common.command_id = cmdid; 785 cmnd.common.command_id = cmdid;
784 cmnd->common.nsid = cpu_to_le32(ns->ns_id); 786 cmnd.common.nsid = cpu_to_le32(ns->ns_id);
785 787
786 if (++nvmeq->sq_tail == nvmeq->q_depth) 788 __nvme_submit_cmd(nvmeq, &cmnd);
787 nvmeq->sq_tail = 0;
788 writel(nvmeq->sq_tail, nvmeq->q_db);
789} 789}
790 790
791static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod, 791static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod,
792 struct nvme_ns *ns) 792 struct nvme_ns *ns)
793{ 793{
794 struct request *req = iod_get_private(iod); 794 struct request *req = iod_get_private(iod);
795 struct nvme_command *cmnd; 795 struct nvme_command cmnd;
796 u16 control = 0; 796 u16 control = 0;
797 u32 dsmgmt = 0; 797 u32 dsmgmt = 0;
798 798
@@ -804,19 +804,16 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod,
804 if (req->cmd_flags & REQ_RAHEAD) 804 if (req->cmd_flags & REQ_RAHEAD)
805 dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH; 805 dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
806 806
807 cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail]; 807 memset(&cmnd, 0, sizeof(cmnd));
808 memset(cmnd, 0, sizeof(*cmnd)); 808 cmnd.rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read);
809 cmnd.rw.command_id = req->tag;
810 cmnd.rw.nsid = cpu_to_le32(ns->ns_id);
811 cmnd.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
812 cmnd.rw.prp2 = cpu_to_le64(iod->first_dma);
813 cmnd.rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
814 cmnd.rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
809 815
810 cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read); 816 if (ns->ms) {
811 cmnd->rw.command_id = req->tag;
812 cmnd->rw.nsid = cpu_to_le32(ns->ns_id);
813 cmnd->rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
814 cmnd->rw.prp2 = cpu_to_le64(iod->first_dma);
815 cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
816 cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
817
818 if (blk_integrity_rq(req)) {
819 cmnd->rw.metadata = cpu_to_le64(sg_dma_address(iod->meta_sg));
820 switch (ns->pi_type) { 817 switch (ns->pi_type) {
821 case NVME_NS_DPS_PI_TYPE3: 818 case NVME_NS_DPS_PI_TYPE3:
822 control |= NVME_RW_PRINFO_PRCHK_GUARD; 819 control |= NVME_RW_PRINFO_PRCHK_GUARD;
@@ -825,19 +822,21 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod,
825 case NVME_NS_DPS_PI_TYPE2: 822 case NVME_NS_DPS_PI_TYPE2:
826 control |= NVME_RW_PRINFO_PRCHK_GUARD | 823 control |= NVME_RW_PRINFO_PRCHK_GUARD |
827 NVME_RW_PRINFO_PRCHK_REF; 824 NVME_RW_PRINFO_PRCHK_REF;
828 cmnd->rw.reftag = cpu_to_le32( 825 cmnd.rw.reftag = cpu_to_le32(
829 nvme_block_nr(ns, blk_rq_pos(req))); 826 nvme_block_nr(ns, blk_rq_pos(req)));
830 break; 827 break;
831 } 828 }
832 } else if (ns->ms) 829 if (blk_integrity_rq(req))
833 control |= NVME_RW_PRINFO_PRACT; 830 cmnd.rw.metadata =
831 cpu_to_le64(sg_dma_address(iod->meta_sg));
832 else
833 control |= NVME_RW_PRINFO_PRACT;
834 }
834 835
835 cmnd->rw.control = cpu_to_le16(control); 836 cmnd.rw.control = cpu_to_le16(control);
836 cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt); 837 cmnd.rw.dsmgmt = cpu_to_le32(dsmgmt);
837 838
838 if (++nvmeq->sq_tail == nvmeq->q_depth) 839 __nvme_submit_cmd(nvmeq, &cmnd);
839 nvmeq->sq_tail = 0;
840 writel(nvmeq->sq_tail, nvmeq->q_db);
841 840
842 return 0; 841 return 0;
843} 842}
@@ -1080,7 +1079,8 @@ static int nvme_submit_async_admin_req(struct nvme_dev *dev)
1080 c.common.command_id = req->tag; 1079 c.common.command_id = req->tag;
1081 1080
1082 blk_mq_free_request(req); 1081 blk_mq_free_request(req);
1083 return __nvme_submit_cmd(nvmeq, &c); 1082 __nvme_submit_cmd(nvmeq, &c);
1083 return 0;
1084} 1084}
1085 1085
1086static int nvme_submit_admin_async_cmd(struct nvme_dev *dev, 1086static int nvme_submit_admin_async_cmd(struct nvme_dev *dev,
@@ -1103,7 +1103,8 @@ static int nvme_submit_admin_async_cmd(struct nvme_dev *dev,
1103 1103
1104 cmd->common.command_id = req->tag; 1104 cmd->common.command_id = req->tag;
1105 1105
1106 return nvme_submit_cmd(nvmeq, cmd); 1106 nvme_submit_cmd(nvmeq, cmd);
1107 return 0;
1107} 1108}
1108 1109
1109static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) 1110static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
@@ -1315,12 +1316,7 @@ static void nvme_abort_req(struct request *req)
1315 1316
1316 dev_warn(nvmeq->q_dmadev, "Aborting I/O %d QID %d\n", req->tag, 1317 dev_warn(nvmeq->q_dmadev, "Aborting I/O %d QID %d\n", req->tag,
1317 nvmeq->qid); 1318 nvmeq->qid);
1318 if (nvme_submit_cmd(dev->queues[0], &cmd) < 0) { 1319 nvme_submit_cmd(dev->queues[0], &cmd);
1319 dev_warn(nvmeq->q_dmadev,
1320 "Could not abort I/O %d QID %d",
1321 req->tag, nvmeq->qid);
1322 blk_mq_free_request(abort_req);
1323 }
1324} 1320}
1325 1321
1326static void nvme_cancel_queue_ios(struct request *req, void *data, bool reserved) 1322static void nvme_cancel_queue_ios(struct request *req, void *data, bool reserved)
@@ -1374,7 +1370,8 @@ static void nvme_free_queue(struct nvme_queue *nvmeq)
1374{ 1370{
1375 dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), 1371 dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
1376 (void *)nvmeq->cqes, nvmeq->cq_dma_addr); 1372 (void *)nvmeq->cqes, nvmeq->cq_dma_addr);
1377 dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), 1373 if (nvmeq->sq_cmds)
1374 dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
1378 nvmeq->sq_cmds, nvmeq->sq_dma_addr); 1375 nvmeq->sq_cmds, nvmeq->sq_dma_addr);
1379 kfree(nvmeq); 1376 kfree(nvmeq);
1380} 1377}
@@ -1447,6 +1444,47 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid)
1447 spin_unlock_irq(&nvmeq->q_lock); 1444 spin_unlock_irq(&nvmeq->q_lock);
1448} 1445}
1449 1446
1447static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
1448 int entry_size)
1449{
1450 int q_depth = dev->q_depth;
1451 unsigned q_size_aligned = roundup(q_depth * entry_size, dev->page_size);
1452
1453 if (q_size_aligned * nr_io_queues > dev->cmb_size) {
1454 u64 mem_per_q = div_u64(dev->cmb_size, nr_io_queues);
1455 mem_per_q = round_down(mem_per_q, dev->page_size);
1456 q_depth = div_u64(mem_per_q, entry_size);
1457
1458 /*
1459 * Ensure the reduced q_depth is above some threshold where it
1460 * would be better to map queues in system memory with the
1461 * original depth
1462 */
1463 if (q_depth < 64)
1464 return -ENOMEM;
1465 }
1466
1467 return q_depth;
1468}
1469
1470static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
1471 int qid, int depth)
1472{
1473 if (qid && dev->cmb && use_cmb_sqes && NVME_CMB_SQS(dev->cmbsz)) {
1474 unsigned offset = (qid - 1) *
1475 roundup(SQ_SIZE(depth), dev->page_size);
1476 nvmeq->sq_dma_addr = dev->cmb_dma_addr + offset;
1477 nvmeq->sq_cmds_io = dev->cmb + offset;
1478 } else {
1479 nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
1480 &nvmeq->sq_dma_addr, GFP_KERNEL);
1481 if (!nvmeq->sq_cmds)
1482 return -ENOMEM;
1483 }
1484
1485 return 0;
1486}
1487
1450static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, 1488static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
1451 int depth) 1489 int depth)
1452{ 1490{
@@ -1459,9 +1497,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
1459 if (!nvmeq->cqes) 1497 if (!nvmeq->cqes)
1460 goto free_nvmeq; 1498 goto free_nvmeq;
1461 1499
1462 nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth), 1500 if (nvme_alloc_sq_cmds(dev, nvmeq, qid, depth))
1463 &nvmeq->sq_dma_addr, GFP_KERNEL);
1464 if (!nvmeq->sq_cmds)
1465 goto free_cqdma; 1501 goto free_cqdma;
1466 1502
1467 nvmeq->q_dmadev = dev->dev; 1503 nvmeq->q_dmadev = dev->dev;
@@ -1696,6 +1732,12 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
1696 page_shift = dev_page_max; 1732 page_shift = dev_page_max;
1697 } 1733 }
1698 1734
1735 dev->subsystem = readl(&dev->bar->vs) >= NVME_VS(1, 1) ?
1736 NVME_CAP_NSSRC(cap) : 0;
1737
1738 if (dev->subsystem && (readl(&dev->bar->csts) & NVME_CSTS_NSSRO))
1739 writel(NVME_CSTS_NSSRO, &dev->bar->csts);
1740
1699 result = nvme_disable_ctrl(dev, cap); 1741 result = nvme_disable_ctrl(dev, cap);
1700 if (result < 0) 1742 if (result < 0)
1701 return result; 1743 return result;
@@ -1856,6 +1898,15 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
1856 return status; 1898 return status;
1857} 1899}
1858 1900
1901static int nvme_subsys_reset(struct nvme_dev *dev)
1902{
1903 if (!dev->subsystem)
1904 return -ENOTTY;
1905
1906 writel(0x4E564D65, &dev->bar->nssr); /* "NVMe" */
1907 return 0;
1908}
1909
1859static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, 1910static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
1860 unsigned long arg) 1911 unsigned long arg)
1861{ 1912{
@@ -1989,7 +2040,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
1989 !ns->ext) 2040 !ns->ext)
1990 nvme_init_integrity(ns); 2041 nvme_init_integrity(ns);
1991 2042
1992 if (ns->ms && !blk_get_integrity(disk)) 2043 if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
1993 set_capacity(disk, 0); 2044 set_capacity(disk, 0);
1994 else 2045 else
1995 set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9)); 2046 set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
@@ -2020,7 +2071,10 @@ static int nvme_kthread(void *data)
2020 spin_lock(&dev_list_lock); 2071 spin_lock(&dev_list_lock);
2021 list_for_each_entry_safe(dev, next, &dev_list, node) { 2072 list_for_each_entry_safe(dev, next, &dev_list, node) {
2022 int i; 2073 int i;
2023 if (readl(&dev->bar->csts) & NVME_CSTS_CFS) { 2074 u32 csts = readl(&dev->bar->csts);
2075
2076 if ((dev->subsystem && (csts & NVME_CSTS_NSSRO)) ||
2077 csts & NVME_CSTS_CFS) {
2024 if (work_busy(&dev->reset_work)) 2078 if (work_busy(&dev->reset_work))
2025 continue; 2079 continue;
2026 list_del_init(&dev->node); 2080 list_del_init(&dev->node);
@@ -2080,8 +2134,11 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
2080 list_add_tail(&ns->list, &dev->namespaces); 2134 list_add_tail(&ns->list, &dev->namespaces);
2081 2135
2082 blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); 2136 blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
2083 if (dev->max_hw_sectors) 2137 if (dev->max_hw_sectors) {
2084 blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors); 2138 blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors);
2139 blk_queue_max_segments(ns->queue,
2140 ((dev->max_hw_sectors << 9) / dev->page_size) + 1);
2141 }
2085 if (dev->stripe_size) 2142 if (dev->stripe_size)
2086 blk_queue_chunk_sectors(ns->queue, dev->stripe_size >> 9); 2143 blk_queue_chunk_sectors(ns->queue, dev->stripe_size >> 9);
2087 if (dev->vwc & NVME_CTRL_VWC_PRESENT) 2144 if (dev->vwc & NVME_CTRL_VWC_PRESENT)
@@ -2159,6 +2216,58 @@ static int set_queue_count(struct nvme_dev *dev, int count)
2159 return min(result & 0xffff, result >> 16) + 1; 2216 return min(result & 0xffff, result >> 16) + 1;
2160} 2217}
2161 2218
2219static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
2220{
2221 u64 szu, size, offset;
2222 u32 cmbloc;
2223 resource_size_t bar_size;
2224 struct pci_dev *pdev = to_pci_dev(dev->dev);
2225 void __iomem *cmb;
2226 dma_addr_t dma_addr;
2227
2228 if (!use_cmb_sqes)
2229 return NULL;
2230
2231 dev->cmbsz = readl(&dev->bar->cmbsz);
2232 if (!(NVME_CMB_SZ(dev->cmbsz)))
2233 return NULL;
2234
2235 cmbloc = readl(&dev->bar->cmbloc);
2236
2237 szu = (u64)1 << (12 + 4 * NVME_CMB_SZU(dev->cmbsz));
2238 size = szu * NVME_CMB_SZ(dev->cmbsz);
2239 offset = szu * NVME_CMB_OFST(cmbloc);
2240 bar_size = pci_resource_len(pdev, NVME_CMB_BIR(cmbloc));
2241
2242 if (offset > bar_size)
2243 return NULL;
2244
2245 /*
2246 * Controllers may support a CMB size larger than their BAR,
2247 * for example, due to being behind a bridge. Reduce the CMB to
2248 * the reported size of the BAR
2249 */
2250 if (size > bar_size - offset)
2251 size = bar_size - offset;
2252
2253 dma_addr = pci_resource_start(pdev, NVME_CMB_BIR(cmbloc)) + offset;
2254 cmb = ioremap_wc(dma_addr, size);
2255 if (!cmb)
2256 return NULL;
2257
2258 dev->cmb_dma_addr = dma_addr;
2259 dev->cmb_size = size;
2260 return cmb;
2261}
2262
2263static inline void nvme_release_cmb(struct nvme_dev *dev)
2264{
2265 if (dev->cmb) {
2266 iounmap(dev->cmb);
2267 dev->cmb = NULL;
2268 }
2269}
2270
2162static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues) 2271static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
2163{ 2272{
2164 return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride); 2273 return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride);
@@ -2177,6 +2286,15 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
2177 if (result < nr_io_queues) 2286 if (result < nr_io_queues)
2178 nr_io_queues = result; 2287 nr_io_queues = result;
2179 2288
2289 if (dev->cmb && NVME_CMB_SQS(dev->cmbsz)) {
2290 result = nvme_cmb_qdepth(dev, nr_io_queues,
2291 sizeof(struct nvme_command));
2292 if (result > 0)
2293 dev->q_depth = result;
2294 else
2295 nvme_release_cmb(dev);
2296 }
2297
2180 size = db_bar_size(dev, nr_io_queues); 2298 size = db_bar_size(dev, nr_io_queues);
2181 if (size > 8192) { 2299 if (size > 8192) {
2182 iounmap(dev->bar); 2300 iounmap(dev->bar);
@@ -2344,7 +2462,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
2344{ 2462{
2345 struct pci_dev *pdev = to_pci_dev(dev->dev); 2463 struct pci_dev *pdev = to_pci_dev(dev->dev);
2346 int res; 2464 int res;
2347 unsigned nn;
2348 struct nvme_id_ctrl *ctrl; 2465 struct nvme_id_ctrl *ctrl;
2349 int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12; 2466 int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12;
2350 2467
@@ -2354,7 +2471,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
2354 return -EIO; 2471 return -EIO;
2355 } 2472 }
2356 2473
2357 nn = le32_to_cpup(&ctrl->nn);
2358 dev->oncs = le16_to_cpup(&ctrl->oncs); 2474 dev->oncs = le16_to_cpup(&ctrl->oncs);
2359 dev->abort_limit = ctrl->acl + 1; 2475 dev->abort_limit = ctrl->acl + 1;
2360 dev->vwc = ctrl->vwc; 2476 dev->vwc = ctrl->vwc;
@@ -2440,6 +2556,8 @@ static int nvme_dev_map(struct nvme_dev *dev)
2440 dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH); 2556 dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH);
2441 dev->db_stride = 1 << NVME_CAP_STRIDE(cap); 2557 dev->db_stride = 1 << NVME_CAP_STRIDE(cap);
2442 dev->dbs = ((void __iomem *)dev->bar) + 4096; 2558 dev->dbs = ((void __iomem *)dev->bar) + 4096;
2559 if (readl(&dev->bar->vs) >= NVME_VS(1, 2))
2560 dev->cmb = nvme_map_cmb(dev);
2443 2561
2444 return 0; 2562 return 0;
2445 2563
@@ -2820,6 +2938,8 @@ static long nvme_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
2820 case NVME_IOCTL_RESET: 2938 case NVME_IOCTL_RESET:
2821 dev_warn(dev->dev, "resetting controller\n"); 2939 dev_warn(dev->dev, "resetting controller\n");
2822 return nvme_reset(dev); 2940 return nvme_reset(dev);
2941 case NVME_IOCTL_SUBSYS_RESET:
2942 return nvme_subsys_reset(dev);
2823 default: 2943 default:
2824 return -ENOTTY; 2944 return -ENOTTY;
2825 } 2945 }
@@ -3145,6 +3265,7 @@ static void nvme_remove(struct pci_dev *pdev)
3145 nvme_dev_remove_admin(dev); 3265 nvme_dev_remove_admin(dev);
3146 device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance)); 3266 device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance));
3147 nvme_free_queues(dev, 0); 3267 nvme_free_queues(dev, 0);
3268 nvme_release_cmb(dev);
3148 nvme_release_prp_pools(dev); 3269 nvme_release_prp_pools(dev);
3149 kref_put(&dev->kref, nvme_free_dev); 3270 kref_put(&dev->kref, nvme_free_dev);
3150} 3271}
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index c0d94ed8ce9a..b5812c395351 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -28,18 +28,32 @@ struct nvme_bar {
28 __u32 cc; /* Controller Configuration */ 28 __u32 cc; /* Controller Configuration */
29 __u32 rsvd1; /* Reserved */ 29 __u32 rsvd1; /* Reserved */
30 __u32 csts; /* Controller Status */ 30 __u32 csts; /* Controller Status */
31 __u32 rsvd2; /* Reserved */ 31 __u32 nssr; /* Subsystem Reset */
32 __u32 aqa; /* Admin Queue Attributes */ 32 __u32 aqa; /* Admin Queue Attributes */
33 __u64 asq; /* Admin SQ Base Address */ 33 __u64 asq; /* Admin SQ Base Address */
34 __u64 acq; /* Admin CQ Base Address */ 34 __u64 acq; /* Admin CQ Base Address */
35 __u32 cmbloc; /* Controller Memory Buffer Location */
36 __u32 cmbsz; /* Controller Memory Buffer Size */
35}; 37};
36 38
37#define NVME_CAP_MQES(cap) ((cap) & 0xffff) 39#define NVME_CAP_MQES(cap) ((cap) & 0xffff)
38#define NVME_CAP_TIMEOUT(cap) (((cap) >> 24) & 0xff) 40#define NVME_CAP_TIMEOUT(cap) (((cap) >> 24) & 0xff)
39#define NVME_CAP_STRIDE(cap) (((cap) >> 32) & 0xf) 41#define NVME_CAP_STRIDE(cap) (((cap) >> 32) & 0xf)
42#define NVME_CAP_NSSRC(cap) (((cap) >> 36) & 0x1)
40#define NVME_CAP_MPSMIN(cap) (((cap) >> 48) & 0xf) 43#define NVME_CAP_MPSMIN(cap) (((cap) >> 48) & 0xf)
41#define NVME_CAP_MPSMAX(cap) (((cap) >> 52) & 0xf) 44#define NVME_CAP_MPSMAX(cap) (((cap) >> 52) & 0xf)
42 45
46#define NVME_CMB_BIR(cmbloc) ((cmbloc) & 0x7)
47#define NVME_CMB_OFST(cmbloc) (((cmbloc) >> 12) & 0xfffff)
48#define NVME_CMB_SZ(cmbsz) (((cmbsz) >> 12) & 0xfffff)
49#define NVME_CMB_SZU(cmbsz) (((cmbsz) >> 8) & 0xf)
50
51#define NVME_CMB_WDS(cmbsz) ((cmbsz) & 0x10)
52#define NVME_CMB_RDS(cmbsz) ((cmbsz) & 0x8)
53#define NVME_CMB_LISTS(cmbsz) ((cmbsz) & 0x4)
54#define NVME_CMB_CQS(cmbsz) ((cmbsz) & 0x2)
55#define NVME_CMB_SQS(cmbsz) ((cmbsz) & 0x1)
56
43enum { 57enum {
44 NVME_CC_ENABLE = 1 << 0, 58 NVME_CC_ENABLE = 1 << 0,
45 NVME_CC_CSS_NVM = 0 << 4, 59 NVME_CC_CSS_NVM = 0 << 4,
@@ -55,6 +69,7 @@ enum {
55 NVME_CC_IOCQES = 4 << 20, 69 NVME_CC_IOCQES = 4 << 20,
56 NVME_CSTS_RDY = 1 << 0, 70 NVME_CSTS_RDY = 1 << 0,
57 NVME_CSTS_CFS = 1 << 1, 71 NVME_CSTS_CFS = 1 << 1,
72 NVME_CSTS_NSSRO = 1 << 4,
58 NVME_CSTS_SHST_NORMAL = 0 << 2, 73 NVME_CSTS_SHST_NORMAL = 0 << 2,
59 NVME_CSTS_SHST_OCCUR = 1 << 2, 74 NVME_CSTS_SHST_OCCUR = 1 << 2,
60 NVME_CSTS_SHST_CMPLT = 2 << 2, 75 NVME_CSTS_SHST_CMPLT = 2 << 2,
@@ -97,9 +112,14 @@ struct nvme_dev {
97 char serial[20]; 112 char serial[20];
98 char model[40]; 113 char model[40];
99 char firmware_rev[8]; 114 char firmware_rev[8];
115 bool subsystem;
100 u32 max_hw_sectors; 116 u32 max_hw_sectors;
101 u32 stripe_size; 117 u32 stripe_size;
102 u32 page_size; 118 u32 page_size;
119 void __iomem *cmb;
120 dma_addr_t cmb_dma_addr;
121 u64 cmb_size;
122 u32 cmbsz;
103 u16 oncs; 123 u16 oncs;
104 u16 abort_limit; 124 u16 abort_limit;
105 u8 event_limit; 125 u8 event_limit;
diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h
index 732b32e92b02..8864194a4151 100644
--- a/include/uapi/linux/nvme.h
+++ b/include/uapi/linux/nvme.h
@@ -584,5 +584,6 @@ struct nvme_passthru_cmd {
584#define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io) 584#define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io)
585#define NVME_IOCTL_IO_CMD _IOWR('N', 0x43, struct nvme_passthru_cmd) 585#define NVME_IOCTL_IO_CMD _IOWR('N', 0x43, struct nvme_passthru_cmd)
586#define NVME_IOCTL_RESET _IO('N', 0x44) 586#define NVME_IOCTL_RESET _IO('N', 0x44)
587#define NVME_IOCTL_SUBSYS_RESET _IO('N', 0x45)
587 588
588#endif /* _UAPI_LINUX_NVME_H */ 589#endif /* _UAPI_LINUX_NVME_H */