diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-13 17:22:26 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-13 17:22:26 -0500 |
commit | 9ea18f8cab5f1c36cdd0f09717e35ceb48c36a87 (patch) | |
tree | 0c8da7ac47cb59fe39f177ab0407f554aff77194 /drivers | |
parent | caf292ae5bb9d57198ce001d8b762f7abae3a94d (diff) | |
parent | 849c6e7746e4f6317ace6aa7d2fcdcd844e99ddb (diff) |
Merge branch 'for-3.19/drivers' of git://git.kernel.dk/linux-block
Pull block layer driver updates from Jens Axboe:
- NVMe updates:
- The blk-mq conversion from Matias (and others)
- A stack of NVMe bug fixes from the nvme tree, mostly from Keith.
- Various bug fixes from me, fixing issues in both the blk-mq
conversion and generic bugs.
- Abort and CPU online fix from Sam.
- Hot add/remove fix from Indraneel.
- A couple of drbd fixes from the drbd team (Andreas, Lars, Philipp)
- With the generic IO stat accounting from 3.19/core, converting md,
bcache, and rsxx to use those. From Gu Zheng.
- Boundary check for queue/irq mode for null_blk from Matias. Fixes
cases where invalid values could be given, causing the device to hang.
- The xen blkfront pull request, with two bug fixes from Vitaly.
* 'for-3.19/drivers' of git://git.kernel.dk/linux-block: (56 commits)
NVMe: fix race condition in nvme_submit_sync_cmd()
NVMe: fix retry/error logic in nvme_queue_rq()
NVMe: Fix FS mount issue (hot-remove followed by hot-add)
NVMe: fix error return checking from blk_mq_alloc_request()
NVMe: fix freeing of wrong request in abort path
xen/blkfront: remove redundant flush_op
xen/blkfront: improve protection against issuing unsupported REQ_FUA
NVMe: Fix command setup on IO retry
null_blk: boundary check queue_mode and irqmode
block/rsxx: use generic io stats accounting functions to simplify io stat accounting
md: use generic io stats accounting functions to simplify io stat accounting
drbd: use generic io stats accounting functions to simplify io stat accounting
md/bcache: use generic io stats accounting functions to simplify io stat accounting
NVMe: Update module version major number
NVMe: fail pci initialization if the device doesn't have any BARs
NVMe: add ->exit_hctx() hook
NVMe: make setup work for devices that don't do INTx
NVMe: enable IO stats by default
NVMe: nvme_submit_async_admin_req() must use atomic rq allocation
NVMe: replace blk_put_request() with blk_mq_free_request()
...
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/block/drbd/drbd_actlog.c | 3 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 39 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 23 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_nl.c | 64 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 2 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.c | 25 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_state.c | 42 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_state.h | 5 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_worker.c | 5 | ||||
-rw-r--r-- | drivers/block/null_blk.c | 42 | ||||
-rw-r--r-- | drivers/block/nvme-core.c | 1594 | ||||
-rw-r--r-- | drivers/block/nvme-scsi.c | 162 | ||||
-rw-r--r-- | drivers/block/rsxx/dev.c | 29 | ||||
-rw-r--r-- | drivers/block/xen-blkfront.c | 65 | ||||
-rw-r--r-- | drivers/md/bcache/request.c | 23 | ||||
-rw-r--r-- | drivers/md/dm.c | 13 | ||||
-rw-r--r-- | drivers/md/md.c | 6 |
17 files changed, 952 insertions, 1190 deletions
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index a2dfa169237d..1318e3217cb0 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c | |||
@@ -827,8 +827,7 @@ static int update_sync_bits(struct drbd_device *device, | |||
827 | * | 827 | * |
828 | */ | 828 | */ |
829 | int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size, | 829 | int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size, |
830 | enum update_sync_bits_mode mode, | 830 | enum update_sync_bits_mode mode) |
831 | const char *file, const unsigned int line) | ||
832 | { | 831 | { |
833 | /* Is called from worker and receiver context _only_ */ | 832 | /* Is called from worker and receiver context _only_ */ |
834 | unsigned long sbnr, ebnr, lbnr; | 833 | unsigned long sbnr, ebnr, lbnr; |
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 9b22f8f01b57..b905e9888b88 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h | |||
@@ -1454,7 +1454,6 @@ extern int is_valid_ar_handle(struct drbd_request *, sector_t); | |||
1454 | 1454 | ||
1455 | 1455 | ||
1456 | /* drbd_nl.c */ | 1456 | /* drbd_nl.c */ |
1457 | extern int drbd_msg_put_info(struct sk_buff *skb, const char *info); | ||
1458 | extern void drbd_suspend_io(struct drbd_device *device); | 1457 | extern void drbd_suspend_io(struct drbd_device *device); |
1459 | extern void drbd_resume_io(struct drbd_device *device); | 1458 | extern void drbd_resume_io(struct drbd_device *device); |
1460 | extern char *ppsize(char *buf, unsigned long long size); | 1459 | extern char *ppsize(char *buf, unsigned long long size); |
@@ -1558,52 +1557,31 @@ extern void drbd_set_recv_tcq(struct drbd_device *device, int tcq_enabled); | |||
1558 | extern void _drbd_clear_done_ee(struct drbd_device *device, struct list_head *to_be_freed); | 1557 | extern void _drbd_clear_done_ee(struct drbd_device *device, struct list_head *to_be_freed); |
1559 | extern int drbd_connected(struct drbd_peer_device *); | 1558 | extern int drbd_connected(struct drbd_peer_device *); |
1560 | 1559 | ||
1561 | /* Yes, there is kernel_setsockopt, but only since 2.6.18. | ||
1562 | * So we have our own copy of it here. */ | ||
1563 | static inline int drbd_setsockopt(struct socket *sock, int level, int optname, | ||
1564 | char *optval, int optlen) | ||
1565 | { | ||
1566 | mm_segment_t oldfs = get_fs(); | ||
1567 | char __user *uoptval; | ||
1568 | int err; | ||
1569 | |||
1570 | uoptval = (char __user __force *)optval; | ||
1571 | |||
1572 | set_fs(KERNEL_DS); | ||
1573 | if (level == SOL_SOCKET) | ||
1574 | err = sock_setsockopt(sock, level, optname, uoptval, optlen); | ||
1575 | else | ||
1576 | err = sock->ops->setsockopt(sock, level, optname, uoptval, | ||
1577 | optlen); | ||
1578 | set_fs(oldfs); | ||
1579 | return err; | ||
1580 | } | ||
1581 | |||
1582 | static inline void drbd_tcp_cork(struct socket *sock) | 1560 | static inline void drbd_tcp_cork(struct socket *sock) |
1583 | { | 1561 | { |
1584 | int val = 1; | 1562 | int val = 1; |
1585 | (void) drbd_setsockopt(sock, SOL_TCP, TCP_CORK, | 1563 | (void) kernel_setsockopt(sock, SOL_TCP, TCP_CORK, |
1586 | (char*)&val, sizeof(val)); | 1564 | (char*)&val, sizeof(val)); |
1587 | } | 1565 | } |
1588 | 1566 | ||
1589 | static inline void drbd_tcp_uncork(struct socket *sock) | 1567 | static inline void drbd_tcp_uncork(struct socket *sock) |
1590 | { | 1568 | { |
1591 | int val = 0; | 1569 | int val = 0; |
1592 | (void) drbd_setsockopt(sock, SOL_TCP, TCP_CORK, | 1570 | (void) kernel_setsockopt(sock, SOL_TCP, TCP_CORK, |
1593 | (char*)&val, sizeof(val)); | 1571 | (char*)&val, sizeof(val)); |
1594 | } | 1572 | } |
1595 | 1573 | ||
1596 | static inline void drbd_tcp_nodelay(struct socket *sock) | 1574 | static inline void drbd_tcp_nodelay(struct socket *sock) |
1597 | { | 1575 | { |
1598 | int val = 1; | 1576 | int val = 1; |
1599 | (void) drbd_setsockopt(sock, SOL_TCP, TCP_NODELAY, | 1577 | (void) kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, |
1600 | (char*)&val, sizeof(val)); | 1578 | (char*)&val, sizeof(val)); |
1601 | } | 1579 | } |
1602 | 1580 | ||
1603 | static inline void drbd_tcp_quickack(struct socket *sock) | 1581 | static inline void drbd_tcp_quickack(struct socket *sock) |
1604 | { | 1582 | { |
1605 | int val = 2; | 1583 | int val = 2; |
1606 | (void) drbd_setsockopt(sock, SOL_TCP, TCP_QUICKACK, | 1584 | (void) kernel_setsockopt(sock, SOL_TCP, TCP_QUICKACK, |
1607 | (char*)&val, sizeof(val)); | 1585 | (char*)&val, sizeof(val)); |
1608 | } | 1586 | } |
1609 | 1587 | ||
@@ -1662,14 +1640,13 @@ extern void drbd_advance_rs_marks(struct drbd_device *device, unsigned long stil | |||
1662 | 1640 | ||
1663 | enum update_sync_bits_mode { RECORD_RS_FAILED, SET_OUT_OF_SYNC, SET_IN_SYNC }; | 1641 | enum update_sync_bits_mode { RECORD_RS_FAILED, SET_OUT_OF_SYNC, SET_IN_SYNC }; |
1664 | extern int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size, | 1642 | extern int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size, |
1665 | enum update_sync_bits_mode mode, | 1643 | enum update_sync_bits_mode mode); |
1666 | const char *file, const unsigned int line); | ||
1667 | #define drbd_set_in_sync(device, sector, size) \ | 1644 | #define drbd_set_in_sync(device, sector, size) \ |
1668 | __drbd_change_sync(device, sector, size, SET_IN_SYNC, __FILE__, __LINE__) | 1645 | __drbd_change_sync(device, sector, size, SET_IN_SYNC) |
1669 | #define drbd_set_out_of_sync(device, sector, size) \ | 1646 | #define drbd_set_out_of_sync(device, sector, size) \ |
1670 | __drbd_change_sync(device, sector, size, SET_OUT_OF_SYNC, __FILE__, __LINE__) | 1647 | __drbd_change_sync(device, sector, size, SET_OUT_OF_SYNC) |
1671 | #define drbd_rs_failed_io(device, sector, size) \ | 1648 | #define drbd_rs_failed_io(device, sector, size) \ |
1672 | __drbd_change_sync(device, sector, size, RECORD_RS_FAILED, __FILE__, __LINE__) | 1649 | __drbd_change_sync(device, sector, size, RECORD_RS_FAILED) |
1673 | extern void drbd_al_shrink(struct drbd_device *device); | 1650 | extern void drbd_al_shrink(struct drbd_device *device); |
1674 | extern int drbd_initialize_al(struct drbd_device *, void *); | 1651 | extern int drbd_initialize_al(struct drbd_device *, void *); |
1675 | 1652 | ||
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 973c185c9cfe..1fc83427199c 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c | |||
@@ -2532,10 +2532,6 @@ int set_resource_options(struct drbd_resource *resource, struct res_opts *res_op | |||
2532 | 2532 | ||
2533 | if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) | 2533 | if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) |
2534 | return -ENOMEM; | 2534 | return -ENOMEM; |
2535 | /* | ||
2536 | retcode = ERR_NOMEM; | ||
2537 | drbd_msg_put_info("unable to allocate cpumask"); | ||
2538 | */ | ||
2539 | 2535 | ||
2540 | /* silently ignore cpu mask on UP kernel */ | 2536 | /* silently ignore cpu mask on UP kernel */ |
2541 | if (nr_cpu_ids > 1 && res_opts->cpu_mask[0] != 0) { | 2537 | if (nr_cpu_ids > 1 && res_opts->cpu_mask[0] != 0) { |
@@ -2731,7 +2727,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig | |||
2731 | 2727 | ||
2732 | device = minor_to_device(minor); | 2728 | device = minor_to_device(minor); |
2733 | if (device) | 2729 | if (device) |
2734 | return ERR_MINOR_EXISTS; | 2730 | return ERR_MINOR_OR_VOLUME_EXISTS; |
2735 | 2731 | ||
2736 | /* GFP_KERNEL, we are outside of all write-out paths */ | 2732 | /* GFP_KERNEL, we are outside of all write-out paths */ |
2737 | device = kzalloc(sizeof(struct drbd_device), GFP_KERNEL); | 2733 | device = kzalloc(sizeof(struct drbd_device), GFP_KERNEL); |
@@ -2793,20 +2789,16 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig | |||
2793 | 2789 | ||
2794 | id = idr_alloc(&drbd_devices, device, minor, minor + 1, GFP_KERNEL); | 2790 | id = idr_alloc(&drbd_devices, device, minor, minor + 1, GFP_KERNEL); |
2795 | if (id < 0) { | 2791 | if (id < 0) { |
2796 | if (id == -ENOSPC) { | 2792 | if (id == -ENOSPC) |
2797 | err = ERR_MINOR_EXISTS; | 2793 | err = ERR_MINOR_OR_VOLUME_EXISTS; |
2798 | drbd_msg_put_info(adm_ctx->reply_skb, "requested minor exists already"); | ||
2799 | } | ||
2800 | goto out_no_minor_idr; | 2794 | goto out_no_minor_idr; |
2801 | } | 2795 | } |
2802 | kref_get(&device->kref); | 2796 | kref_get(&device->kref); |
2803 | 2797 | ||
2804 | id = idr_alloc(&resource->devices, device, vnr, vnr + 1, GFP_KERNEL); | 2798 | id = idr_alloc(&resource->devices, device, vnr, vnr + 1, GFP_KERNEL); |
2805 | if (id < 0) { | 2799 | if (id < 0) { |
2806 | if (id == -ENOSPC) { | 2800 | if (id == -ENOSPC) |
2807 | err = ERR_MINOR_EXISTS; | 2801 | err = ERR_MINOR_OR_VOLUME_EXISTS; |
2808 | drbd_msg_put_info(adm_ctx->reply_skb, "requested minor exists already"); | ||
2809 | } | ||
2810 | goto out_idr_remove_minor; | 2802 | goto out_idr_remove_minor; |
2811 | } | 2803 | } |
2812 | kref_get(&device->kref); | 2804 | kref_get(&device->kref); |
@@ -2825,10 +2817,8 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig | |||
2825 | 2817 | ||
2826 | id = idr_alloc(&connection->peer_devices, peer_device, vnr, vnr + 1, GFP_KERNEL); | 2818 | id = idr_alloc(&connection->peer_devices, peer_device, vnr, vnr + 1, GFP_KERNEL); |
2827 | if (id < 0) { | 2819 | if (id < 0) { |
2828 | if (id == -ENOSPC) { | 2820 | if (id == -ENOSPC) |
2829 | err = ERR_INVALID_REQUEST; | 2821 | err = ERR_INVALID_REQUEST; |
2830 | drbd_msg_put_info(adm_ctx->reply_skb, "requested volume exists already"); | ||
2831 | } | ||
2832 | goto out_idr_remove_from_resource; | 2822 | goto out_idr_remove_from_resource; |
2833 | } | 2823 | } |
2834 | kref_get(&connection->kref); | 2824 | kref_get(&connection->kref); |
@@ -2836,7 +2826,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig | |||
2836 | 2826 | ||
2837 | if (init_submitter(device)) { | 2827 | if (init_submitter(device)) { |
2838 | err = ERR_NOMEM; | 2828 | err = ERR_NOMEM; |
2839 | drbd_msg_put_info(adm_ctx->reply_skb, "unable to create submit workqueue"); | ||
2840 | goto out_idr_remove_vol; | 2829 | goto out_idr_remove_vol; |
2841 | } | 2830 | } |
2842 | 2831 | ||
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 1cd47df44bda..74df8cfad414 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c | |||
@@ -92,7 +92,7 @@ static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info) | |||
92 | 92 | ||
93 | /* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only | 93 | /* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only |
94 | * reason it could fail was no space in skb, and there are 4k available. */ | 94 | * reason it could fail was no space in skb, and there are 4k available. */ |
95 | int drbd_msg_put_info(struct sk_buff *skb, const char *info) | 95 | static int drbd_msg_put_info(struct sk_buff *skb, const char *info) |
96 | { | 96 | { |
97 | struct nlattr *nla; | 97 | struct nlattr *nla; |
98 | int err = -EMSGSIZE; | 98 | int err = -EMSGSIZE; |
@@ -588,7 +588,7 @@ drbd_set_role(struct drbd_device *const device, enum drbd_role new_role, int for | |||
588 | val.i = 0; val.role = new_role; | 588 | val.i = 0; val.role = new_role; |
589 | 589 | ||
590 | while (try++ < max_tries) { | 590 | while (try++ < max_tries) { |
591 | rv = _drbd_request_state(device, mask, val, CS_WAIT_COMPLETE); | 591 | rv = _drbd_request_state_holding_state_mutex(device, mask, val, CS_WAIT_COMPLETE); |
592 | 592 | ||
593 | /* in case we first succeeded to outdate, | 593 | /* in case we first succeeded to outdate, |
594 | * but now suddenly could establish a connection */ | 594 | * but now suddenly could establish a connection */ |
@@ -2052,7 +2052,7 @@ check_net_options(struct drbd_connection *connection, struct net_conf *new_net_c | |||
2052 | rv = _check_net_options(connection, rcu_dereference(connection->net_conf), new_net_conf); | 2052 | rv = _check_net_options(connection, rcu_dereference(connection->net_conf), new_net_conf); |
2053 | rcu_read_unlock(); | 2053 | rcu_read_unlock(); |
2054 | 2054 | ||
2055 | /* connection->volumes protected by genl_lock() here */ | 2055 | /* connection->peer_devices protected by genl_lock() here */ |
2056 | idr_for_each_entry(&connection->peer_devices, peer_device, i) { | 2056 | idr_for_each_entry(&connection->peer_devices, peer_device, i) { |
2057 | struct drbd_device *device = peer_device->device; | 2057 | struct drbd_device *device = peer_device->device; |
2058 | if (!device->bitmap) { | 2058 | if (!device->bitmap) { |
@@ -3483,7 +3483,7 @@ int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info) | |||
3483 | * that first_peer_device(device)->connection and device->vnr match the request. */ | 3483 | * that first_peer_device(device)->connection and device->vnr match the request. */ |
3484 | if (adm_ctx.device) { | 3484 | if (adm_ctx.device) { |
3485 | if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) | 3485 | if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) |
3486 | retcode = ERR_MINOR_EXISTS; | 3486 | retcode = ERR_MINOR_OR_VOLUME_EXISTS; |
3487 | /* else: still NO_ERROR */ | 3487 | /* else: still NO_ERROR */ |
3488 | goto out; | 3488 | goto out; |
3489 | } | 3489 | } |
@@ -3530,6 +3530,27 @@ out: | |||
3530 | return 0; | 3530 | return 0; |
3531 | } | 3531 | } |
3532 | 3532 | ||
3533 | static int adm_del_resource(struct drbd_resource *resource) | ||
3534 | { | ||
3535 | struct drbd_connection *connection; | ||
3536 | |||
3537 | for_each_connection(connection, resource) { | ||
3538 | if (connection->cstate > C_STANDALONE) | ||
3539 | return ERR_NET_CONFIGURED; | ||
3540 | } | ||
3541 | if (!idr_is_empty(&resource->devices)) | ||
3542 | return ERR_RES_IN_USE; | ||
3543 | |||
3544 | list_del_rcu(&resource->resources); | ||
3545 | /* Make sure all threads have actually stopped: state handling only | ||
3546 | * does drbd_thread_stop_nowait(). */ | ||
3547 | list_for_each_entry(connection, &resource->connections, connections) | ||
3548 | drbd_thread_stop(&connection->worker); | ||
3549 | synchronize_rcu(); | ||
3550 | drbd_free_resource(resource); | ||
3551 | return NO_ERROR; | ||
3552 | } | ||
3553 | |||
3533 | int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) | 3554 | int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) |
3534 | { | 3555 | { |
3535 | struct drbd_config_context adm_ctx; | 3556 | struct drbd_config_context adm_ctx; |
@@ -3575,14 +3596,6 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) | |||
3575 | } | 3596 | } |
3576 | } | 3597 | } |
3577 | 3598 | ||
3578 | /* If we reach this, all volumes (of this connection) are Secondary, | ||
3579 | * Disconnected, Diskless, aka Unconfigured. Make sure all threads have | ||
3580 | * actually stopped, state handling only does drbd_thread_stop_nowait(). */ | ||
3581 | for_each_connection(connection, resource) | ||
3582 | drbd_thread_stop(&connection->worker); | ||
3583 | |||
3584 | /* Now, nothing can fail anymore */ | ||
3585 | |||
3586 | /* delete volumes */ | 3599 | /* delete volumes */ |
3587 | idr_for_each_entry(&resource->devices, device, i) { | 3600 | idr_for_each_entry(&resource->devices, device, i) { |
3588 | retcode = adm_del_minor(device); | 3601 | retcode = adm_del_minor(device); |
@@ -3593,10 +3606,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) | |||
3593 | } | 3606 | } |
3594 | } | 3607 | } |
3595 | 3608 | ||
3596 | list_del_rcu(&resource->resources); | 3609 | retcode = adm_del_resource(resource); |
3597 | synchronize_rcu(); | ||
3598 | drbd_free_resource(resource); | ||
3599 | retcode = NO_ERROR; | ||
3600 | out: | 3610 | out: |
3601 | mutex_unlock(&resource->adm_mutex); | 3611 | mutex_unlock(&resource->adm_mutex); |
3602 | finish: | 3612 | finish: |
@@ -3608,7 +3618,6 @@ int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info) | |||
3608 | { | 3618 | { |
3609 | struct drbd_config_context adm_ctx; | 3619 | struct drbd_config_context adm_ctx; |
3610 | struct drbd_resource *resource; | 3620 | struct drbd_resource *resource; |
3611 | struct drbd_connection *connection; | ||
3612 | enum drbd_ret_code retcode; | 3621 | enum drbd_ret_code retcode; |
3613 | 3622 | ||
3614 | retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE); | 3623 | retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE); |
@@ -3616,27 +3625,10 @@ int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info) | |||
3616 | return retcode; | 3625 | return retcode; |
3617 | if (retcode != NO_ERROR) | 3626 | if (retcode != NO_ERROR) |
3618 | goto finish; | 3627 | goto finish; |
3619 | |||
3620 | resource = adm_ctx.resource; | 3628 | resource = adm_ctx.resource; |
3621 | mutex_lock(&resource->adm_mutex); | ||
3622 | for_each_connection(connection, resource) { | ||
3623 | if (connection->cstate > C_STANDALONE) { | ||
3624 | retcode = ERR_NET_CONFIGURED; | ||
3625 | goto out; | ||
3626 | } | ||
3627 | } | ||
3628 | if (!idr_is_empty(&resource->devices)) { | ||
3629 | retcode = ERR_RES_IN_USE; | ||
3630 | goto out; | ||
3631 | } | ||
3632 | 3629 | ||
3633 | list_del_rcu(&resource->resources); | 3630 | mutex_lock(&resource->adm_mutex); |
3634 | for_each_connection(connection, resource) | 3631 | retcode = adm_del_resource(resource); |
3635 | drbd_thread_stop(&connection->worker); | ||
3636 | synchronize_rcu(); | ||
3637 | drbd_free_resource(resource); | ||
3638 | retcode = NO_ERROR; | ||
3639 | out: | ||
3640 | mutex_unlock(&resource->adm_mutex); | 3632 | mutex_unlock(&resource->adm_mutex); |
3641 | finish: | 3633 | finish: |
3642 | drbd_adm_finish(&adm_ctx, info, retcode); | 3634 | drbd_adm_finish(&adm_ctx, info, retcode); |
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 6960fb064731..d169b4a79267 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c | |||
@@ -2482,7 +2482,7 @@ bool drbd_rs_c_min_rate_throttle(struct drbd_device *device) | |||
2482 | atomic_read(&device->rs_sect_ev); | 2482 | atomic_read(&device->rs_sect_ev); |
2483 | 2483 | ||
2484 | if (atomic_read(&device->ap_actlog_cnt) | 2484 | if (atomic_read(&device->ap_actlog_cnt) |
2485 | || !device->rs_last_events || curr_events - device->rs_last_events > 64) { | 2485 | || curr_events - device->rs_last_events > 64) { |
2486 | unsigned long rs_left; | 2486 | unsigned long rs_left; |
2487 | int i; | 2487 | int i; |
2488 | 2488 | ||
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 5a01c53dddeb..34f2f0ba409b 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c | |||
@@ -36,29 +36,15 @@ static bool drbd_may_do_local_read(struct drbd_device *device, sector_t sector, | |||
36 | /* Update disk stats at start of I/O request */ | 36 | /* Update disk stats at start of I/O request */ |
37 | static void _drbd_start_io_acct(struct drbd_device *device, struct drbd_request *req) | 37 | static void _drbd_start_io_acct(struct drbd_device *device, struct drbd_request *req) |
38 | { | 38 | { |
39 | const int rw = bio_data_dir(req->master_bio); | 39 | generic_start_io_acct(bio_data_dir(req->master_bio), req->i.size >> 9, |
40 | int cpu; | 40 | &device->vdisk->part0); |
41 | cpu = part_stat_lock(); | ||
42 | part_round_stats(cpu, &device->vdisk->part0); | ||
43 | part_stat_inc(cpu, &device->vdisk->part0, ios[rw]); | ||
44 | part_stat_add(cpu, &device->vdisk->part0, sectors[rw], req->i.size >> 9); | ||
45 | (void) cpu; /* The macro invocations above want the cpu argument, I do not like | ||
46 | the compiler warning about cpu only assigned but never used... */ | ||
47 | part_inc_in_flight(&device->vdisk->part0, rw); | ||
48 | part_stat_unlock(); | ||
49 | } | 41 | } |
50 | 42 | ||
51 | /* Update disk stats when completing request upwards */ | 43 | /* Update disk stats when completing request upwards */ |
52 | static void _drbd_end_io_acct(struct drbd_device *device, struct drbd_request *req) | 44 | static void _drbd_end_io_acct(struct drbd_device *device, struct drbd_request *req) |
53 | { | 45 | { |
54 | int rw = bio_data_dir(req->master_bio); | 46 | generic_end_io_acct(bio_data_dir(req->master_bio), |
55 | unsigned long duration = jiffies - req->start_jif; | 47 | &device->vdisk->part0, req->start_jif); |
56 | int cpu; | ||
57 | cpu = part_stat_lock(); | ||
58 | part_stat_add(cpu, &device->vdisk->part0, ticks[rw], duration); | ||
59 | part_round_stats(cpu, &device->vdisk->part0); | ||
60 | part_dec_in_flight(&device->vdisk->part0, rw); | ||
61 | part_stat_unlock(); | ||
62 | } | 48 | } |
63 | 49 | ||
64 | static struct drbd_request *drbd_req_new(struct drbd_device *device, | 50 | static struct drbd_request *drbd_req_new(struct drbd_device *device, |
@@ -1545,6 +1531,7 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct | |||
1545 | struct request_queue * const b = | 1531 | struct request_queue * const b = |
1546 | device->ldev->backing_bdev->bd_disk->queue; | 1532 | device->ldev->backing_bdev->bd_disk->queue; |
1547 | if (b->merge_bvec_fn) { | 1533 | if (b->merge_bvec_fn) { |
1534 | bvm->bi_bdev = device->ldev->backing_bdev; | ||
1548 | backing_limit = b->merge_bvec_fn(b, bvm, bvec); | 1535 | backing_limit = b->merge_bvec_fn(b, bvm, bvec); |
1549 | limit = min(limit, backing_limit); | 1536 | limit = min(limit, backing_limit); |
1550 | } | 1537 | } |
@@ -1628,7 +1615,7 @@ void request_timer_fn(unsigned long data) | |||
1628 | time_after(now, req_peer->pre_send_jif + ent) && | 1615 | time_after(now, req_peer->pre_send_jif + ent) && |
1629 | !time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent)) { | 1616 | !time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent)) { |
1630 | drbd_warn(device, "Remote failed to finish a request within ko-count * timeout\n"); | 1617 | drbd_warn(device, "Remote failed to finish a request within ko-count * timeout\n"); |
1631 | _drbd_set_state(_NS(device, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL); | 1618 | _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_VERBOSE | CS_HARD); |
1632 | } | 1619 | } |
1633 | if (dt && oldest_submit_jif != now && | 1620 | if (dt && oldest_submit_jif != now && |
1634 | time_after(now, oldest_submit_jif + dt) && | 1621 | time_after(now, oldest_submit_jif + dt) && |
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 84b11f887d73..2d7dd269b6a8 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c | |||
@@ -215,6 +215,18 @@ static bool no_peer_wf_report_params(struct drbd_connection *connection) | |||
215 | return rv; | 215 | return rv; |
216 | } | 216 | } |
217 | 217 | ||
218 | static void wake_up_all_devices(struct drbd_connection *connection) | ||
219 | { | ||
220 | struct drbd_peer_device *peer_device; | ||
221 | int vnr; | ||
222 | |||
223 | rcu_read_lock(); | ||
224 | idr_for_each_entry(&connection->peer_devices, peer_device, vnr) | ||
225 | wake_up(&peer_device->device->state_wait); | ||
226 | rcu_read_unlock(); | ||
227 | |||
228 | } | ||
229 | |||
218 | 230 | ||
219 | /** | 231 | /** |
220 | * cl_wide_st_chg() - true if the state change is a cluster wide one | 232 | * cl_wide_st_chg() - true if the state change is a cluster wide one |
@@ -410,6 +422,22 @@ _drbd_request_state(struct drbd_device *device, union drbd_state mask, | |||
410 | return rv; | 422 | return rv; |
411 | } | 423 | } |
412 | 424 | ||
425 | enum drbd_state_rv | ||
426 | _drbd_request_state_holding_state_mutex(struct drbd_device *device, union drbd_state mask, | ||
427 | union drbd_state val, enum chg_state_flags f) | ||
428 | { | ||
429 | enum drbd_state_rv rv; | ||
430 | |||
431 | BUG_ON(f & CS_SERIALIZE); | ||
432 | |||
433 | wait_event_cmd(device->state_wait, | ||
434 | (rv = drbd_req_state(device, mask, val, f)) != SS_IN_TRANSIENT_STATE, | ||
435 | mutex_unlock(device->state_mutex), | ||
436 | mutex_lock(device->state_mutex)); | ||
437 | |||
438 | return rv; | ||
439 | } | ||
440 | |||
413 | static void print_st(struct drbd_device *device, const char *name, union drbd_state ns) | 441 | static void print_st(struct drbd_device *device, const char *name, union drbd_state ns) |
414 | { | 442 | { |
415 | drbd_err(device, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c%c%c }\n", | 443 | drbd_err(device, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c%c%c }\n", |
@@ -629,14 +657,11 @@ is_valid_soft_transition(union drbd_state os, union drbd_state ns, struct drbd_c | |||
629 | if (ns.conn == C_DISCONNECTING && os.conn == C_UNCONNECTED) | 657 | if (ns.conn == C_DISCONNECTING && os.conn == C_UNCONNECTED) |
630 | rv = SS_IN_TRANSIENT_STATE; | 658 | rv = SS_IN_TRANSIENT_STATE; |
631 | 659 | ||
632 | /* if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS) | ||
633 | rv = SS_IN_TRANSIENT_STATE; */ | ||
634 | |||
635 | /* While establishing a connection only allow cstate to change. | 660 | /* While establishing a connection only allow cstate to change. |
636 | Delay/refuse role changes, detach attach etc... */ | 661 | Delay/refuse role changes, detach attach etc... (they do not touch cstate) */ |
637 | if (test_bit(STATE_SENT, &connection->flags) && | 662 | if (test_bit(STATE_SENT, &connection->flags) && |
638 | !(os.conn == C_WF_REPORT_PARAMS || | 663 | !((ns.conn == C_WF_REPORT_PARAMS && os.conn == C_WF_CONNECTION) || |
639 | (ns.conn == C_WF_REPORT_PARAMS && os.conn == C_WF_CONNECTION))) | 664 | (ns.conn >= C_CONNECTED && os.conn == C_WF_REPORT_PARAMS))) |
640 | rv = SS_IN_TRANSIENT_STATE; | 665 | rv = SS_IN_TRANSIENT_STATE; |
641 | 666 | ||
642 | if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED) | 667 | if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED) |
@@ -1032,8 +1057,10 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns, | |||
1032 | 1057 | ||
1033 | /* Wake up role changes, that were delayed because of connection establishing */ | 1058 | /* Wake up role changes, that were delayed because of connection establishing */ |
1034 | if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS && | 1059 | if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS && |
1035 | no_peer_wf_report_params(connection)) | 1060 | no_peer_wf_report_params(connection)) { |
1036 | clear_bit(STATE_SENT, &connection->flags); | 1061 | clear_bit(STATE_SENT, &connection->flags); |
1062 | wake_up_all_devices(connection); | ||
1063 | } | ||
1037 | 1064 | ||
1038 | wake_up(&device->misc_wait); | 1065 | wake_up(&device->misc_wait); |
1039 | wake_up(&device->state_wait); | 1066 | wake_up(&device->state_wait); |
@@ -1072,7 +1099,6 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns, | |||
1072 | 1099 | ||
1073 | set_ov_position(device, ns.conn); | 1100 | set_ov_position(device, ns.conn); |
1074 | device->rs_start = now; | 1101 | device->rs_start = now; |
1075 | device->rs_last_events = 0; | ||
1076 | device->rs_last_sect_ev = 0; | 1102 | device->rs_last_sect_ev = 0; |
1077 | device->ov_last_oos_size = 0; | 1103 | device->ov_last_oos_size = 0; |
1078 | device->ov_last_oos_start = 0; | 1104 | device->ov_last_oos_start = 0; |
diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h index cc41605ba21c..7f53c40823cd 100644 --- a/drivers/block/drbd/drbd_state.h +++ b/drivers/block/drbd/drbd_state.h | |||
@@ -117,6 +117,11 @@ extern enum drbd_state_rv _drbd_request_state(struct drbd_device *, | |||
117 | union drbd_state, | 117 | union drbd_state, |
118 | union drbd_state, | 118 | union drbd_state, |
119 | enum chg_state_flags); | 119 | enum chg_state_flags); |
120 | |||
121 | extern enum drbd_state_rv | ||
122 | _drbd_request_state_holding_state_mutex(struct drbd_device *, union drbd_state, | ||
123 | union drbd_state, enum chg_state_flags); | ||
124 | |||
120 | extern enum drbd_state_rv __drbd_set_state(struct drbd_device *, union drbd_state, | 125 | extern enum drbd_state_rv __drbd_set_state(struct drbd_device *, union drbd_state, |
121 | enum chg_state_flags, | 126 | enum chg_state_flags, |
122 | struct completion *done); | 127 | struct completion *done); |
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index d2d1f97511bd..d0fae55d871d 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c | |||
@@ -1592,11 +1592,15 @@ void drbd_resync_after_changed(struct drbd_device *device) | |||
1592 | 1592 | ||
1593 | void drbd_rs_controller_reset(struct drbd_device *device) | 1593 | void drbd_rs_controller_reset(struct drbd_device *device) |
1594 | { | 1594 | { |
1595 | struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk; | ||
1595 | struct fifo_buffer *plan; | 1596 | struct fifo_buffer *plan; |
1596 | 1597 | ||
1597 | atomic_set(&device->rs_sect_in, 0); | 1598 | atomic_set(&device->rs_sect_in, 0); |
1598 | atomic_set(&device->rs_sect_ev, 0); | 1599 | atomic_set(&device->rs_sect_ev, 0); |
1599 | device->rs_in_flight = 0; | 1600 | device->rs_in_flight = 0; |
1601 | device->rs_last_events = | ||
1602 | (int)part_stat_read(&disk->part0, sectors[0]) + | ||
1603 | (int)part_stat_read(&disk->part0, sectors[1]); | ||
1600 | 1604 | ||
1601 | /* Updating the RCU protected object in place is necessary since | 1605 | /* Updating the RCU protected object in place is necessary since |
1602 | this function gets called from atomic context. | 1606 | this function gets called from atomic context. |
@@ -1743,7 +1747,6 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) | |||
1743 | device->rs_failed = 0; | 1747 | device->rs_failed = 0; |
1744 | device->rs_paused = 0; | 1748 | device->rs_paused = 0; |
1745 | device->rs_same_csum = 0; | 1749 | device->rs_same_csum = 0; |
1746 | device->rs_last_events = 0; | ||
1747 | device->rs_last_sect_ev = 0; | 1750 | device->rs_last_sect_ev = 0; |
1748 | device->rs_total = tw; | 1751 | device->rs_total = tw; |
1749 | device->rs_start = now; | 1752 | device->rs_start = now; |
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index caa61212fdb5..ae9f615382f6 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c | |||
@@ -78,7 +78,33 @@ module_param(home_node, int, S_IRUGO); | |||
78 | MODULE_PARM_DESC(home_node, "Home node for the device"); | 78 | MODULE_PARM_DESC(home_node, "Home node for the device"); |
79 | 79 | ||
80 | static int queue_mode = NULL_Q_MQ; | 80 | static int queue_mode = NULL_Q_MQ; |
81 | module_param(queue_mode, int, S_IRUGO); | 81 | |
82 | static int null_param_store_val(const char *str, int *val, int min, int max) | ||
83 | { | ||
84 | int ret, new_val; | ||
85 | |||
86 | ret = kstrtoint(str, 10, &new_val); | ||
87 | if (ret) | ||
88 | return -EINVAL; | ||
89 | |||
90 | if (new_val < min || new_val > max) | ||
91 | return -EINVAL; | ||
92 | |||
93 | *val = new_val; | ||
94 | return 0; | ||
95 | } | ||
96 | |||
97 | static int null_set_queue_mode(const char *str, const struct kernel_param *kp) | ||
98 | { | ||
99 | return null_param_store_val(str, &queue_mode, NULL_Q_BIO, NULL_Q_MQ); | ||
100 | } | ||
101 | |||
102 | static struct kernel_param_ops null_queue_mode_param_ops = { | ||
103 | .set = null_set_queue_mode, | ||
104 | .get = param_get_int, | ||
105 | }; | ||
106 | |||
107 | device_param_cb(queue_mode, &null_queue_mode_param_ops, &queue_mode, S_IRUGO); | ||
82 | MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)"); | 108 | MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)"); |
83 | 109 | ||
84 | static int gb = 250; | 110 | static int gb = 250; |
@@ -94,7 +120,19 @@ module_param(nr_devices, int, S_IRUGO); | |||
94 | MODULE_PARM_DESC(nr_devices, "Number of devices to register"); | 120 | MODULE_PARM_DESC(nr_devices, "Number of devices to register"); |
95 | 121 | ||
96 | static int irqmode = NULL_IRQ_SOFTIRQ; | 122 | static int irqmode = NULL_IRQ_SOFTIRQ; |
97 | module_param(irqmode, int, S_IRUGO); | 123 | |
124 | static int null_set_irqmode(const char *str, const struct kernel_param *kp) | ||
125 | { | ||
126 | return null_param_store_val(str, &irqmode, NULL_IRQ_NONE, | ||
127 | NULL_IRQ_TIMER); | ||
128 | } | ||
129 | |||
130 | static struct kernel_param_ops null_irqmode_param_ops = { | ||
131 | .set = null_set_irqmode, | ||
132 | .get = param_get_int, | ||
133 | }; | ||
134 | |||
135 | device_param_cb(irqmode, &null_irqmode_param_ops, &irqmode, S_IRUGO); | ||
98 | MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer"); | 136 | MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer"); |
99 | 137 | ||
100 | static int completion_nsec = 10000; | 138 | static int completion_nsec = 10000; |
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index e2bb8afbeae5..b1d5d8797315 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c | |||
@@ -13,9 +13,9 @@ | |||
13 | */ | 13 | */ |
14 | 14 | ||
15 | #include <linux/nvme.h> | 15 | #include <linux/nvme.h> |
16 | #include <linux/bio.h> | ||
17 | #include <linux/bitops.h> | 16 | #include <linux/bitops.h> |
18 | #include <linux/blkdev.h> | 17 | #include <linux/blkdev.h> |
18 | #include <linux/blk-mq.h> | ||
19 | #include <linux/cpu.h> | 19 | #include <linux/cpu.h> |
20 | #include <linux/delay.h> | 20 | #include <linux/delay.h> |
21 | #include <linux/errno.h> | 21 | #include <linux/errno.h> |
@@ -33,7 +33,6 @@ | |||
33 | #include <linux/module.h> | 33 | #include <linux/module.h> |
34 | #include <linux/moduleparam.h> | 34 | #include <linux/moduleparam.h> |
35 | #include <linux/pci.h> | 35 | #include <linux/pci.h> |
36 | #include <linux/percpu.h> | ||
37 | #include <linux/poison.h> | 36 | #include <linux/poison.h> |
38 | #include <linux/ptrace.h> | 37 | #include <linux/ptrace.h> |
39 | #include <linux/sched.h> | 38 | #include <linux/sched.h> |
@@ -42,12 +41,12 @@ | |||
42 | #include <scsi/sg.h> | 41 | #include <scsi/sg.h> |
43 | #include <asm-generic/io-64-nonatomic-lo-hi.h> | 42 | #include <asm-generic/io-64-nonatomic-lo-hi.h> |
44 | 43 | ||
45 | #include <trace/events/block.h> | ||
46 | |||
47 | #define NVME_Q_DEPTH 1024 | 44 | #define NVME_Q_DEPTH 1024 |
45 | #define NVME_AQ_DEPTH 64 | ||
48 | #define SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) | 46 | #define SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) |
49 | #define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) | 47 | #define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) |
50 | #define ADMIN_TIMEOUT (admin_timeout * HZ) | 48 | #define ADMIN_TIMEOUT (admin_timeout * HZ) |
49 | #define SHUTDOWN_TIMEOUT (shutdown_timeout * HZ) | ||
51 | #define IOD_TIMEOUT (retry_time * HZ) | 50 | #define IOD_TIMEOUT (retry_time * HZ) |
52 | 51 | ||
53 | static unsigned char admin_timeout = 60; | 52 | static unsigned char admin_timeout = 60; |
@@ -62,6 +61,10 @@ static unsigned char retry_time = 30; | |||
62 | module_param(retry_time, byte, 0644); | 61 | module_param(retry_time, byte, 0644); |
63 | MODULE_PARM_DESC(retry_time, "time in seconds to retry failed I/O"); | 62 | MODULE_PARM_DESC(retry_time, "time in seconds to retry failed I/O"); |
64 | 63 | ||
64 | static unsigned char shutdown_timeout = 5; | ||
65 | module_param(shutdown_timeout, byte, 0644); | ||
66 | MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown"); | ||
67 | |||
65 | static int nvme_major; | 68 | static int nvme_major; |
66 | module_param(nvme_major, int, 0); | 69 | module_param(nvme_major, int, 0); |
67 | 70 | ||
@@ -76,10 +79,12 @@ static wait_queue_head_t nvme_kthread_wait; | |||
76 | static struct notifier_block nvme_nb; | 79 | static struct notifier_block nvme_nb; |
77 | 80 | ||
78 | static void nvme_reset_failed_dev(struct work_struct *ws); | 81 | static void nvme_reset_failed_dev(struct work_struct *ws); |
82 | static int nvme_process_cq(struct nvme_queue *nvmeq); | ||
79 | 83 | ||
80 | struct async_cmd_info { | 84 | struct async_cmd_info { |
81 | struct kthread_work work; | 85 | struct kthread_work work; |
82 | struct kthread_worker *worker; | 86 | struct kthread_worker *worker; |
87 | struct request *req; | ||
83 | u32 result; | 88 | u32 result; |
84 | int status; | 89 | int status; |
85 | void *ctx; | 90 | void *ctx; |
@@ -90,7 +95,7 @@ struct async_cmd_info { | |||
90 | * commands and one for I/O commands). | 95 | * commands and one for I/O commands). |
91 | */ | 96 | */ |
92 | struct nvme_queue { | 97 | struct nvme_queue { |
93 | struct rcu_head r_head; | 98 | struct llist_node node; |
94 | struct device *q_dmadev; | 99 | struct device *q_dmadev; |
95 | struct nvme_dev *dev; | 100 | struct nvme_dev *dev; |
96 | char irqname[24]; /* nvme4294967295-65535\0 */ | 101 | char irqname[24]; /* nvme4294967295-65535\0 */ |
@@ -99,10 +104,6 @@ struct nvme_queue { | |||
99 | volatile struct nvme_completion *cqes; | 104 | volatile struct nvme_completion *cqes; |
100 | dma_addr_t sq_dma_addr; | 105 | dma_addr_t sq_dma_addr; |
101 | dma_addr_t cq_dma_addr; | 106 | dma_addr_t cq_dma_addr; |
102 | wait_queue_head_t sq_full; | ||
103 | wait_queue_t sq_cong_wait; | ||
104 | struct bio_list sq_cong; | ||
105 | struct list_head iod_bio; | ||
106 | u32 __iomem *q_db; | 107 | u32 __iomem *q_db; |
107 | u16 q_depth; | 108 | u16 q_depth; |
108 | u16 cq_vector; | 109 | u16 cq_vector; |
@@ -112,10 +113,8 @@ struct nvme_queue { | |||
112 | u16 qid; | 113 | u16 qid; |
113 | u8 cq_phase; | 114 | u8 cq_phase; |
114 | u8 cqe_seen; | 115 | u8 cqe_seen; |
115 | u8 q_suspended; | ||
116 | cpumask_var_t cpu_mask; | ||
117 | struct async_cmd_info cmdinfo; | 116 | struct async_cmd_info cmdinfo; |
118 | unsigned long cmdid_data[]; | 117 | struct blk_mq_hw_ctx *hctx; |
119 | }; | 118 | }; |
120 | 119 | ||
121 | /* | 120 | /* |
@@ -143,62 +142,79 @@ typedef void (*nvme_completion_fn)(struct nvme_queue *, void *, | |||
143 | struct nvme_cmd_info { | 142 | struct nvme_cmd_info { |
144 | nvme_completion_fn fn; | 143 | nvme_completion_fn fn; |
145 | void *ctx; | 144 | void *ctx; |
146 | unsigned long timeout; | ||
147 | int aborted; | 145 | int aborted; |
146 | struct nvme_queue *nvmeq; | ||
148 | }; | 147 | }; |
149 | 148 | ||
150 | static struct nvme_cmd_info *nvme_cmd_info(struct nvme_queue *nvmeq) | 149 | static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, |
150 | unsigned int hctx_idx) | ||
151 | { | 151 | { |
152 | return (void *)&nvmeq->cmdid_data[BITS_TO_LONGS(nvmeq->q_depth)]; | 152 | struct nvme_dev *dev = data; |
153 | struct nvme_queue *nvmeq = dev->queues[0]; | ||
154 | |||
155 | WARN_ON(nvmeq->hctx); | ||
156 | nvmeq->hctx = hctx; | ||
157 | hctx->driver_data = nvmeq; | ||
158 | return 0; | ||
153 | } | 159 | } |
154 | 160 | ||
155 | static unsigned nvme_queue_extra(int depth) | 161 | static int nvme_admin_init_request(void *data, struct request *req, |
162 | unsigned int hctx_idx, unsigned int rq_idx, | ||
163 | unsigned int numa_node) | ||
156 | { | 164 | { |
157 | return DIV_ROUND_UP(depth, 8) + (depth * sizeof(struct nvme_cmd_info)); | 165 | struct nvme_dev *dev = data; |
166 | struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req); | ||
167 | struct nvme_queue *nvmeq = dev->queues[0]; | ||
168 | |||
169 | BUG_ON(!nvmeq); | ||
170 | cmd->nvmeq = nvmeq; | ||
171 | return 0; | ||
158 | } | 172 | } |
159 | 173 | ||
160 | /** | 174 | static void nvme_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) |
161 | * alloc_cmdid() - Allocate a Command ID | ||
162 | * @nvmeq: The queue that will be used for this command | ||
163 | * @ctx: A pointer that will be passed to the handler | ||
164 | * @handler: The function to call on completion | ||
165 | * | ||
166 | * Allocate a Command ID for a queue. The data passed in will | ||
167 | * be passed to the completion handler. This is implemented by using | ||
168 | * the bottom two bits of the ctx pointer to store the handler ID. | ||
169 | * Passing in a pointer that's not 4-byte aligned will cause a BUG. | ||
170 | * We can change this if it becomes a problem. | ||
171 | * | ||
172 | * May be called with local interrupts disabled and the q_lock held, | ||
173 | * or with interrupts enabled and no locks held. | ||
174 | */ | ||
175 | static int alloc_cmdid(struct nvme_queue *nvmeq, void *ctx, | ||
176 | nvme_completion_fn handler, unsigned timeout) | ||
177 | { | 175 | { |
178 | int depth = nvmeq->q_depth - 1; | 176 | struct nvme_queue *nvmeq = hctx->driver_data; |
179 | struct nvme_cmd_info *info = nvme_cmd_info(nvmeq); | ||
180 | int cmdid; | ||
181 | 177 | ||
182 | do { | 178 | nvmeq->hctx = NULL; |
183 | cmdid = find_first_zero_bit(nvmeq->cmdid_data, depth); | 179 | } |
184 | if (cmdid >= depth) | 180 | |
185 | return -EBUSY; | 181 | static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, |
186 | } while (test_and_set_bit(cmdid, nvmeq->cmdid_data)); | 182 | unsigned int hctx_idx) |
183 | { | ||
184 | struct nvme_dev *dev = data; | ||
185 | struct nvme_queue *nvmeq = dev->queues[ | ||
186 | (hctx_idx % dev->queue_count) + 1]; | ||
187 | |||
188 | if (!nvmeq->hctx) | ||
189 | nvmeq->hctx = hctx; | ||
187 | 190 | ||
188 | info[cmdid].fn = handler; | 191 | /* nvmeq queues are shared between namespaces. We assume here that |
189 | info[cmdid].ctx = ctx; | 192 | * blk-mq map the tags so they match up with the nvme queue tags. */ |
190 | info[cmdid].timeout = jiffies + timeout; | 193 | WARN_ON(nvmeq->hctx->tags != hctx->tags); |
191 | info[cmdid].aborted = 0; | 194 | |
192 | return cmdid; | 195 | hctx->driver_data = nvmeq; |
196 | return 0; | ||
193 | } | 197 | } |
194 | 198 | ||
195 | static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx, | 199 | static int nvme_init_request(void *data, struct request *req, |
196 | nvme_completion_fn handler, unsigned timeout) | 200 | unsigned int hctx_idx, unsigned int rq_idx, |
201 | unsigned int numa_node) | ||
197 | { | 202 | { |
198 | int cmdid; | 203 | struct nvme_dev *dev = data; |
199 | wait_event_killable(nvmeq->sq_full, | 204 | struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req); |
200 | (cmdid = alloc_cmdid(nvmeq, ctx, handler, timeout)) >= 0); | 205 | struct nvme_queue *nvmeq = dev->queues[hctx_idx + 1]; |
201 | return (cmdid < 0) ? -EINTR : cmdid; | 206 | |
207 | BUG_ON(!nvmeq); | ||
208 | cmd->nvmeq = nvmeq; | ||
209 | return 0; | ||
210 | } | ||
211 | |||
212 | static void nvme_set_info(struct nvme_cmd_info *cmd, void *ctx, | ||
213 | nvme_completion_fn handler) | ||
214 | { | ||
215 | cmd->fn = handler; | ||
216 | cmd->ctx = ctx; | ||
217 | cmd->aborted = 0; | ||
202 | } | 218 | } |
203 | 219 | ||
204 | /* Special values must be less than 0x1000 */ | 220 | /* Special values must be less than 0x1000 */ |
@@ -206,17 +222,12 @@ static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx, | |||
206 | #define CMD_CTX_CANCELLED (0x30C + CMD_CTX_BASE) | 222 | #define CMD_CTX_CANCELLED (0x30C + CMD_CTX_BASE) |
207 | #define CMD_CTX_COMPLETED (0x310 + CMD_CTX_BASE) | 223 | #define CMD_CTX_COMPLETED (0x310 + CMD_CTX_BASE) |
208 | #define CMD_CTX_INVALID (0x314 + CMD_CTX_BASE) | 224 | #define CMD_CTX_INVALID (0x314 + CMD_CTX_BASE) |
209 | #define CMD_CTX_ABORT (0x318 + CMD_CTX_BASE) | ||
210 | 225 | ||
211 | static void special_completion(struct nvme_queue *nvmeq, void *ctx, | 226 | static void special_completion(struct nvme_queue *nvmeq, void *ctx, |
212 | struct nvme_completion *cqe) | 227 | struct nvme_completion *cqe) |
213 | { | 228 | { |
214 | if (ctx == CMD_CTX_CANCELLED) | 229 | if (ctx == CMD_CTX_CANCELLED) |
215 | return; | 230 | return; |
216 | if (ctx == CMD_CTX_ABORT) { | ||
217 | ++nvmeq->dev->abort_limit; | ||
218 | return; | ||
219 | } | ||
220 | if (ctx == CMD_CTX_COMPLETED) { | 231 | if (ctx == CMD_CTX_COMPLETED) { |
221 | dev_warn(nvmeq->q_dmadev, | 232 | dev_warn(nvmeq->q_dmadev, |
222 | "completed id %d twice on queue %d\n", | 233 | "completed id %d twice on queue %d\n", |
@@ -229,99 +240,89 @@ static void special_completion(struct nvme_queue *nvmeq, void *ctx, | |||
229 | cqe->command_id, le16_to_cpup(&cqe->sq_id)); | 240 | cqe->command_id, le16_to_cpup(&cqe->sq_id)); |
230 | return; | 241 | return; |
231 | } | 242 | } |
232 | |||
233 | dev_warn(nvmeq->q_dmadev, "Unknown special completion %p\n", ctx); | 243 | dev_warn(nvmeq->q_dmadev, "Unknown special completion %p\n", ctx); |
234 | } | 244 | } |
235 | 245 | ||
236 | static void async_completion(struct nvme_queue *nvmeq, void *ctx, | 246 | static void *cancel_cmd_info(struct nvme_cmd_info *cmd, nvme_completion_fn *fn) |
237 | struct nvme_completion *cqe) | ||
238 | { | ||
239 | struct async_cmd_info *cmdinfo = ctx; | ||
240 | cmdinfo->result = le32_to_cpup(&cqe->result); | ||
241 | cmdinfo->status = le16_to_cpup(&cqe->status) >> 1; | ||
242 | queue_kthread_work(cmdinfo->worker, &cmdinfo->work); | ||
243 | } | ||
244 | |||
245 | /* | ||
246 | * Called with local interrupts disabled and the q_lock held. May not sleep. | ||
247 | */ | ||
248 | static void *free_cmdid(struct nvme_queue *nvmeq, int cmdid, | ||
249 | nvme_completion_fn *fn) | ||
250 | { | 247 | { |
251 | void *ctx; | 248 | void *ctx; |
252 | struct nvme_cmd_info *info = nvme_cmd_info(nvmeq); | ||
253 | 249 | ||
254 | if (cmdid >= nvmeq->q_depth || !info[cmdid].fn) { | ||
255 | if (fn) | ||
256 | *fn = special_completion; | ||
257 | return CMD_CTX_INVALID; | ||
258 | } | ||
259 | if (fn) | 250 | if (fn) |
260 | *fn = info[cmdid].fn; | 251 | *fn = cmd->fn; |
261 | ctx = info[cmdid].ctx; | 252 | ctx = cmd->ctx; |
262 | info[cmdid].fn = special_completion; | 253 | cmd->fn = special_completion; |
263 | info[cmdid].ctx = CMD_CTX_COMPLETED; | 254 | cmd->ctx = CMD_CTX_CANCELLED; |
264 | clear_bit(cmdid, nvmeq->cmdid_data); | ||
265 | wake_up(&nvmeq->sq_full); | ||
266 | return ctx; | 255 | return ctx; |
267 | } | 256 | } |
268 | 257 | ||
269 | static void *cancel_cmdid(struct nvme_queue *nvmeq, int cmdid, | 258 | static void async_req_completion(struct nvme_queue *nvmeq, void *ctx, |
270 | nvme_completion_fn *fn) | 259 | struct nvme_completion *cqe) |
271 | { | 260 | { |
272 | void *ctx; | 261 | struct request *req = ctx; |
273 | struct nvme_cmd_info *info = nvme_cmd_info(nvmeq); | ||
274 | if (fn) | ||
275 | *fn = info[cmdid].fn; | ||
276 | ctx = info[cmdid].ctx; | ||
277 | info[cmdid].fn = special_completion; | ||
278 | info[cmdid].ctx = CMD_CTX_CANCELLED; | ||
279 | return ctx; | ||
280 | } | ||
281 | 262 | ||
282 | static struct nvme_queue *raw_nvmeq(struct nvme_dev *dev, int qid) | 263 | u32 result = le32_to_cpup(&cqe->result); |
283 | { | 264 | u16 status = le16_to_cpup(&cqe->status) >> 1; |
284 | return rcu_dereference_raw(dev->queues[qid]); | 265 | |
266 | if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ) | ||
267 | ++nvmeq->dev->event_limit; | ||
268 | if (status == NVME_SC_SUCCESS) | ||
269 | dev_warn(nvmeq->q_dmadev, | ||
270 | "async event result %08x\n", result); | ||
271 | |||
272 | blk_mq_free_hctx_request(nvmeq->hctx, req); | ||
285 | } | 273 | } |
286 | 274 | ||
287 | static struct nvme_queue *get_nvmeq(struct nvme_dev *dev) __acquires(RCU) | 275 | static void abort_completion(struct nvme_queue *nvmeq, void *ctx, |
276 | struct nvme_completion *cqe) | ||
288 | { | 277 | { |
289 | struct nvme_queue *nvmeq; | 278 | struct request *req = ctx; |
290 | unsigned queue_id = get_cpu_var(*dev->io_queue); | ||
291 | 279 | ||
292 | rcu_read_lock(); | 280 | u16 status = le16_to_cpup(&cqe->status) >> 1; |
293 | nvmeq = rcu_dereference(dev->queues[queue_id]); | 281 | u32 result = le32_to_cpup(&cqe->result); |
294 | if (nvmeq) | ||
295 | return nvmeq; | ||
296 | 282 | ||
297 | rcu_read_unlock(); | 283 | blk_mq_free_hctx_request(nvmeq->hctx, req); |
298 | put_cpu_var(*dev->io_queue); | 284 | |
299 | return NULL; | 285 | dev_warn(nvmeq->q_dmadev, "Abort status:%x result:%x", status, result); |
286 | ++nvmeq->dev->abort_limit; | ||
300 | } | 287 | } |
301 | 288 | ||
302 | static void put_nvmeq(struct nvme_queue *nvmeq) __releases(RCU) | 289 | static void async_completion(struct nvme_queue *nvmeq, void *ctx, |
290 | struct nvme_completion *cqe) | ||
303 | { | 291 | { |
304 | rcu_read_unlock(); | 292 | struct async_cmd_info *cmdinfo = ctx; |
305 | put_cpu_var(nvmeq->dev->io_queue); | 293 | cmdinfo->result = le32_to_cpup(&cqe->result); |
294 | cmdinfo->status = le16_to_cpup(&cqe->status) >> 1; | ||
295 | queue_kthread_work(cmdinfo->worker, &cmdinfo->work); | ||
296 | blk_mq_free_hctx_request(nvmeq->hctx, cmdinfo->req); | ||
306 | } | 297 | } |
307 | 298 | ||
308 | static struct nvme_queue *lock_nvmeq(struct nvme_dev *dev, int q_idx) | 299 | static inline struct nvme_cmd_info *get_cmd_from_tag(struct nvme_queue *nvmeq, |
309 | __acquires(RCU) | 300 | unsigned int tag) |
310 | { | 301 | { |
311 | struct nvme_queue *nvmeq; | 302 | struct blk_mq_hw_ctx *hctx = nvmeq->hctx; |
303 | struct request *req = blk_mq_tag_to_rq(hctx->tags, tag); | ||
312 | 304 | ||
313 | rcu_read_lock(); | 305 | return blk_mq_rq_to_pdu(req); |
314 | nvmeq = rcu_dereference(dev->queues[q_idx]); | ||
315 | if (nvmeq) | ||
316 | return nvmeq; | ||
317 | |||
318 | rcu_read_unlock(); | ||
319 | return NULL; | ||
320 | } | 306 | } |
321 | 307 | ||
322 | static void unlock_nvmeq(struct nvme_queue *nvmeq) __releases(RCU) | 308 | /* |
309 | * Called with local interrupts disabled and the q_lock held. May not sleep. | ||
310 | */ | ||
311 | static void *nvme_finish_cmd(struct nvme_queue *nvmeq, int tag, | ||
312 | nvme_completion_fn *fn) | ||
323 | { | 313 | { |
324 | rcu_read_unlock(); | 314 | struct nvme_cmd_info *cmd = get_cmd_from_tag(nvmeq, tag); |
315 | void *ctx; | ||
316 | if (tag >= nvmeq->q_depth) { | ||
317 | *fn = special_completion; | ||
318 | return CMD_CTX_INVALID; | ||
319 | } | ||
320 | if (fn) | ||
321 | *fn = cmd->fn; | ||
322 | ctx = cmd->ctx; | ||
323 | cmd->fn = special_completion; | ||
324 | cmd->ctx = CMD_CTX_COMPLETED; | ||
325 | return ctx; | ||
325 | } | 326 | } |
326 | 327 | ||
327 | /** | 328 | /** |
@@ -331,26 +332,29 @@ static void unlock_nvmeq(struct nvme_queue *nvmeq) __releases(RCU) | |||
331 | * | 332 | * |
332 | * Safe to use from interrupt context | 333 | * Safe to use from interrupt context |
333 | */ | 334 | */ |
334 | static int nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd) | 335 | static int __nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd) |
335 | { | 336 | { |
336 | unsigned long flags; | 337 | u16 tail = nvmeq->sq_tail; |
337 | u16 tail; | 338 | |
338 | spin_lock_irqsave(&nvmeq->q_lock, flags); | ||
339 | if (nvmeq->q_suspended) { | ||
340 | spin_unlock_irqrestore(&nvmeq->q_lock, flags); | ||
341 | return -EBUSY; | ||
342 | } | ||
343 | tail = nvmeq->sq_tail; | ||
344 | memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd)); | 339 | memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd)); |
345 | if (++tail == nvmeq->q_depth) | 340 | if (++tail == nvmeq->q_depth) |
346 | tail = 0; | 341 | tail = 0; |
347 | writel(tail, nvmeq->q_db); | 342 | writel(tail, nvmeq->q_db); |
348 | nvmeq->sq_tail = tail; | 343 | nvmeq->sq_tail = tail; |
349 | spin_unlock_irqrestore(&nvmeq->q_lock, flags); | ||
350 | 344 | ||
351 | return 0; | 345 | return 0; |
352 | } | 346 | } |
353 | 347 | ||
348 | static int nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd) | ||
349 | { | ||
350 | unsigned long flags; | ||
351 | int ret; | ||
352 | spin_lock_irqsave(&nvmeq->q_lock, flags); | ||
353 | ret = __nvme_submit_cmd(nvmeq, cmd); | ||
354 | spin_unlock_irqrestore(&nvmeq->q_lock, flags); | ||
355 | return ret; | ||
356 | } | ||
357 | |||
354 | static __le64 **iod_list(struct nvme_iod *iod) | 358 | static __le64 **iod_list(struct nvme_iod *iod) |
355 | { | 359 | { |
356 | return ((void *)iod) + iod->offset; | 360 | return ((void *)iod) + iod->offset; |
@@ -361,17 +365,17 @@ static __le64 **iod_list(struct nvme_iod *iod) | |||
361 | * as it only leads to a small amount of wasted memory for the lifetime of | 365 | * as it only leads to a small amount of wasted memory for the lifetime of |
362 | * the I/O. | 366 | * the I/O. |
363 | */ | 367 | */ |
364 | static int nvme_npages(unsigned size) | 368 | static int nvme_npages(unsigned size, struct nvme_dev *dev) |
365 | { | 369 | { |
366 | unsigned nprps = DIV_ROUND_UP(size + PAGE_SIZE, PAGE_SIZE); | 370 | unsigned nprps = DIV_ROUND_UP(size + dev->page_size, dev->page_size); |
367 | return DIV_ROUND_UP(8 * nprps, PAGE_SIZE - 8); | 371 | return DIV_ROUND_UP(8 * nprps, dev->page_size - 8); |
368 | } | 372 | } |
369 | 373 | ||
370 | static struct nvme_iod * | 374 | static struct nvme_iod * |
371 | nvme_alloc_iod(unsigned nseg, unsigned nbytes, gfp_t gfp) | 375 | nvme_alloc_iod(unsigned nseg, unsigned nbytes, struct nvme_dev *dev, gfp_t gfp) |
372 | { | 376 | { |
373 | struct nvme_iod *iod = kmalloc(sizeof(struct nvme_iod) + | 377 | struct nvme_iod *iod = kmalloc(sizeof(struct nvme_iod) + |
374 | sizeof(__le64 *) * nvme_npages(nbytes) + | 378 | sizeof(__le64 *) * nvme_npages(nbytes, dev) + |
375 | sizeof(struct scatterlist) * nseg, gfp); | 379 | sizeof(struct scatterlist) * nseg, gfp); |
376 | 380 | ||
377 | if (iod) { | 381 | if (iod) { |
@@ -380,7 +384,6 @@ nvme_alloc_iod(unsigned nseg, unsigned nbytes, gfp_t gfp) | |||
380 | iod->length = nbytes; | 384 | iod->length = nbytes; |
381 | iod->nents = 0; | 385 | iod->nents = 0; |
382 | iod->first_dma = 0ULL; | 386 | iod->first_dma = 0ULL; |
383 | iod->start_time = jiffies; | ||
384 | } | 387 | } |
385 | 388 | ||
386 | return iod; | 389 | return iod; |
@@ -388,7 +391,7 @@ nvme_alloc_iod(unsigned nseg, unsigned nbytes, gfp_t gfp) | |||
388 | 391 | ||
389 | void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod) | 392 | void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod) |
390 | { | 393 | { |
391 | const int last_prp = PAGE_SIZE / 8 - 1; | 394 | const int last_prp = dev->page_size / 8 - 1; |
392 | int i; | 395 | int i; |
393 | __le64 **list = iod_list(iod); | 396 | __le64 **list = iod_list(iod); |
394 | dma_addr_t prp_dma = iod->first_dma; | 397 | dma_addr_t prp_dma = iod->first_dma; |
@@ -404,65 +407,49 @@ void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod) | |||
404 | kfree(iod); | 407 | kfree(iod); |
405 | } | 408 | } |
406 | 409 | ||
407 | static void nvme_start_io_acct(struct bio *bio) | 410 | static int nvme_error_status(u16 status) |
408 | { | ||
409 | struct gendisk *disk = bio->bi_bdev->bd_disk; | ||
410 | if (blk_queue_io_stat(disk->queue)) { | ||
411 | const int rw = bio_data_dir(bio); | ||
412 | int cpu = part_stat_lock(); | ||
413 | part_round_stats(cpu, &disk->part0); | ||
414 | part_stat_inc(cpu, &disk->part0, ios[rw]); | ||
415 | part_stat_add(cpu, &disk->part0, sectors[rw], | ||
416 | bio_sectors(bio)); | ||
417 | part_inc_in_flight(&disk->part0, rw); | ||
418 | part_stat_unlock(); | ||
419 | } | ||
420 | } | ||
421 | |||
422 | static void nvme_end_io_acct(struct bio *bio, unsigned long start_time) | ||
423 | { | 411 | { |
424 | struct gendisk *disk = bio->bi_bdev->bd_disk; | 412 | switch (status & 0x7ff) { |
425 | if (blk_queue_io_stat(disk->queue)) { | 413 | case NVME_SC_SUCCESS: |
426 | const int rw = bio_data_dir(bio); | 414 | return 0; |
427 | unsigned long duration = jiffies - start_time; | 415 | case NVME_SC_CAP_EXCEEDED: |
428 | int cpu = part_stat_lock(); | 416 | return -ENOSPC; |
429 | part_stat_add(cpu, &disk->part0, ticks[rw], duration); | 417 | default: |
430 | part_round_stats(cpu, &disk->part0); | 418 | return -EIO; |
431 | part_dec_in_flight(&disk->part0, rw); | ||
432 | part_stat_unlock(); | ||
433 | } | 419 | } |
434 | } | 420 | } |
435 | 421 | ||
436 | static void bio_completion(struct nvme_queue *nvmeq, void *ctx, | 422 | static void req_completion(struct nvme_queue *nvmeq, void *ctx, |
437 | struct nvme_completion *cqe) | 423 | struct nvme_completion *cqe) |
438 | { | 424 | { |
439 | struct nvme_iod *iod = ctx; | 425 | struct nvme_iod *iod = ctx; |
440 | struct bio *bio = iod->private; | 426 | struct request *req = iod->private; |
427 | struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req); | ||
428 | |||
441 | u16 status = le16_to_cpup(&cqe->status) >> 1; | 429 | u16 status = le16_to_cpup(&cqe->status) >> 1; |
442 | int error = 0; | ||
443 | 430 | ||
444 | if (unlikely(status)) { | 431 | if (unlikely(status)) { |
445 | if (!(status & NVME_SC_DNR || | 432 | if (!(status & NVME_SC_DNR || blk_noretry_request(req)) |
446 | bio->bi_rw & REQ_FAILFAST_MASK) && | 433 | && (jiffies - req->start_time) < req->timeout) { |
447 | (jiffies - iod->start_time) < IOD_TIMEOUT) { | 434 | blk_mq_requeue_request(req); |
448 | if (!waitqueue_active(&nvmeq->sq_full)) | 435 | blk_mq_kick_requeue_list(req->q); |
449 | add_wait_queue(&nvmeq->sq_full, | ||
450 | &nvmeq->sq_cong_wait); | ||
451 | list_add_tail(&iod->node, &nvmeq->iod_bio); | ||
452 | wake_up(&nvmeq->sq_full); | ||
453 | return; | 436 | return; |
454 | } | 437 | } |
455 | error = -EIO; | 438 | req->errors = nvme_error_status(status); |
456 | } | 439 | } else |
457 | if (iod->nents) { | 440 | req->errors = 0; |
458 | dma_unmap_sg(nvmeq->q_dmadev, iod->sg, iod->nents, | 441 | |
459 | bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); | 442 | if (cmd_rq->aborted) |
460 | nvme_end_io_acct(bio, iod->start_time); | 443 | dev_warn(&nvmeq->dev->pci_dev->dev, |
461 | } | 444 | "completing aborted command with status:%04x\n", |
445 | status); | ||
446 | |||
447 | if (iod->nents) | ||
448 | dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->sg, iod->nents, | ||
449 | rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); | ||
462 | nvme_free_iod(nvmeq->dev, iod); | 450 | nvme_free_iod(nvmeq->dev, iod); |
463 | 451 | ||
464 | trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio, error); | 452 | blk_mq_complete_request(req); |
465 | bio_endio(bio, error); | ||
466 | } | 453 | } |
467 | 454 | ||
468 | /* length is in bytes. gfp flags indicates whether we may sleep. */ | 455 | /* length is in bytes. gfp flags indicates whether we may sleep. */ |
@@ -479,26 +466,27 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len, | |||
479 | __le64 **list = iod_list(iod); | 466 | __le64 **list = iod_list(iod); |
480 | dma_addr_t prp_dma; | 467 | dma_addr_t prp_dma; |
481 | int nprps, i; | 468 | int nprps, i; |
469 | u32 page_size = dev->page_size; | ||
482 | 470 | ||
483 | length -= (PAGE_SIZE - offset); | 471 | length -= (page_size - offset); |
484 | if (length <= 0) | 472 | if (length <= 0) |
485 | return total_len; | 473 | return total_len; |
486 | 474 | ||
487 | dma_len -= (PAGE_SIZE - offset); | 475 | dma_len -= (page_size - offset); |
488 | if (dma_len) { | 476 | if (dma_len) { |
489 | dma_addr += (PAGE_SIZE - offset); | 477 | dma_addr += (page_size - offset); |
490 | } else { | 478 | } else { |
491 | sg = sg_next(sg); | 479 | sg = sg_next(sg); |
492 | dma_addr = sg_dma_address(sg); | 480 | dma_addr = sg_dma_address(sg); |
493 | dma_len = sg_dma_len(sg); | 481 | dma_len = sg_dma_len(sg); |
494 | } | 482 | } |
495 | 483 | ||
496 | if (length <= PAGE_SIZE) { | 484 | if (length <= page_size) { |
497 | iod->first_dma = dma_addr; | 485 | iod->first_dma = dma_addr; |
498 | return total_len; | 486 | return total_len; |
499 | } | 487 | } |
500 | 488 | ||
501 | nprps = DIV_ROUND_UP(length, PAGE_SIZE); | 489 | nprps = DIV_ROUND_UP(length, page_size); |
502 | if (nprps <= (256 / 8)) { | 490 | if (nprps <= (256 / 8)) { |
503 | pool = dev->prp_small_pool; | 491 | pool = dev->prp_small_pool; |
504 | iod->npages = 0; | 492 | iod->npages = 0; |
@@ -511,13 +499,13 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len, | |||
511 | if (!prp_list) { | 499 | if (!prp_list) { |
512 | iod->first_dma = dma_addr; | 500 | iod->first_dma = dma_addr; |
513 | iod->npages = -1; | 501 | iod->npages = -1; |
514 | return (total_len - length) + PAGE_SIZE; | 502 | return (total_len - length) + page_size; |
515 | } | 503 | } |
516 | list[0] = prp_list; | 504 | list[0] = prp_list; |
517 | iod->first_dma = prp_dma; | 505 | iod->first_dma = prp_dma; |
518 | i = 0; | 506 | i = 0; |
519 | for (;;) { | 507 | for (;;) { |
520 | if (i == PAGE_SIZE / 8) { | 508 | if (i == page_size >> 3) { |
521 | __le64 *old_prp_list = prp_list; | 509 | __le64 *old_prp_list = prp_list; |
522 | prp_list = dma_pool_alloc(pool, gfp, &prp_dma); | 510 | prp_list = dma_pool_alloc(pool, gfp, &prp_dma); |
523 | if (!prp_list) | 511 | if (!prp_list) |
@@ -528,9 +516,9 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len, | |||
528 | i = 1; | 516 | i = 1; |
529 | } | 517 | } |
530 | prp_list[i++] = cpu_to_le64(dma_addr); | 518 | prp_list[i++] = cpu_to_le64(dma_addr); |
531 | dma_len -= PAGE_SIZE; | 519 | dma_len -= page_size; |
532 | dma_addr += PAGE_SIZE; | 520 | dma_addr += page_size; |
533 | length -= PAGE_SIZE; | 521 | length -= page_size; |
534 | if (length <= 0) | 522 | if (length <= 0) |
535 | break; | 523 | break; |
536 | if (dma_len > 0) | 524 | if (dma_len > 0) |
@@ -544,88 +532,25 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len, | |||
544 | return total_len; | 532 | return total_len; |
545 | } | 533 | } |
546 | 534 | ||
547 | static int nvme_split_and_submit(struct bio *bio, struct nvme_queue *nvmeq, | 535 | /* |
548 | int len) | 536 | * We reuse the small pool to allocate the 16-byte range here as it is not |
549 | { | 537 | * worth having a special pool for these or additional cases to handle freeing |
550 | struct bio *split = bio_split(bio, len >> 9, GFP_ATOMIC, NULL); | 538 | * the iod. |
551 | if (!split) | 539 | */ |
552 | return -ENOMEM; | 540 | static void nvme_submit_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns, |
553 | 541 | struct request *req, struct nvme_iod *iod) | |
554 | trace_block_split(bdev_get_queue(bio->bi_bdev), bio, | ||
555 | split->bi_iter.bi_sector); | ||
556 | bio_chain(split, bio); | ||
557 | |||
558 | if (!waitqueue_active(&nvmeq->sq_full)) | ||
559 | add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait); | ||
560 | bio_list_add(&nvmeq->sq_cong, split); | ||
561 | bio_list_add(&nvmeq->sq_cong, bio); | ||
562 | wake_up(&nvmeq->sq_full); | ||
563 | |||
564 | return 0; | ||
565 | } | ||
566 | |||
567 | /* NVMe scatterlists require no holes in the virtual address */ | ||
568 | #define BIOVEC_NOT_VIRT_MERGEABLE(vec1, vec2) ((vec2)->bv_offset || \ | ||
569 | (((vec1)->bv_offset + (vec1)->bv_len) % PAGE_SIZE)) | ||
570 | |||
571 | static int nvme_map_bio(struct nvme_queue *nvmeq, struct nvme_iod *iod, | ||
572 | struct bio *bio, enum dma_data_direction dma_dir, int psegs) | ||
573 | { | ||
574 | struct bio_vec bvec, bvprv; | ||
575 | struct bvec_iter iter; | ||
576 | struct scatterlist *sg = NULL; | ||
577 | int length = 0, nsegs = 0, split_len = bio->bi_iter.bi_size; | ||
578 | int first = 1; | ||
579 | |||
580 | if (nvmeq->dev->stripe_size) | ||
581 | split_len = nvmeq->dev->stripe_size - | ||
582 | ((bio->bi_iter.bi_sector << 9) & | ||
583 | (nvmeq->dev->stripe_size - 1)); | ||
584 | |||
585 | sg_init_table(iod->sg, psegs); | ||
586 | bio_for_each_segment(bvec, bio, iter) { | ||
587 | if (!first && BIOVEC_PHYS_MERGEABLE(&bvprv, &bvec)) { | ||
588 | sg->length += bvec.bv_len; | ||
589 | } else { | ||
590 | if (!first && BIOVEC_NOT_VIRT_MERGEABLE(&bvprv, &bvec)) | ||
591 | return nvme_split_and_submit(bio, nvmeq, | ||
592 | length); | ||
593 | |||
594 | sg = sg ? sg + 1 : iod->sg; | ||
595 | sg_set_page(sg, bvec.bv_page, | ||
596 | bvec.bv_len, bvec.bv_offset); | ||
597 | nsegs++; | ||
598 | } | ||
599 | |||
600 | if (split_len - length < bvec.bv_len) | ||
601 | return nvme_split_and_submit(bio, nvmeq, split_len); | ||
602 | length += bvec.bv_len; | ||
603 | bvprv = bvec; | ||
604 | first = 0; | ||
605 | } | ||
606 | iod->nents = nsegs; | ||
607 | sg_mark_end(sg); | ||
608 | if (dma_map_sg(nvmeq->q_dmadev, iod->sg, iod->nents, dma_dir) == 0) | ||
609 | return -ENOMEM; | ||
610 | |||
611 | BUG_ON(length != bio->bi_iter.bi_size); | ||
612 | return length; | ||
613 | } | ||
614 | |||
615 | static int nvme_submit_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns, | ||
616 | struct bio *bio, struct nvme_iod *iod, int cmdid) | ||
617 | { | 542 | { |
618 | struct nvme_dsm_range *range = | 543 | struct nvme_dsm_range *range = |
619 | (struct nvme_dsm_range *)iod_list(iod)[0]; | 544 | (struct nvme_dsm_range *)iod_list(iod)[0]; |
620 | struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail]; | 545 | struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail]; |
621 | 546 | ||
622 | range->cattr = cpu_to_le32(0); | 547 | range->cattr = cpu_to_le32(0); |
623 | range->nlb = cpu_to_le32(bio->bi_iter.bi_size >> ns->lba_shift); | 548 | range->nlb = cpu_to_le32(blk_rq_bytes(req) >> ns->lba_shift); |
624 | range->slba = cpu_to_le64(nvme_block_nr(ns, bio->bi_iter.bi_sector)); | 549 | range->slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); |
625 | 550 | ||
626 | memset(cmnd, 0, sizeof(*cmnd)); | 551 | memset(cmnd, 0, sizeof(*cmnd)); |
627 | cmnd->dsm.opcode = nvme_cmd_dsm; | 552 | cmnd->dsm.opcode = nvme_cmd_dsm; |
628 | cmnd->dsm.command_id = cmdid; | 553 | cmnd->dsm.command_id = req->tag; |
629 | cmnd->dsm.nsid = cpu_to_le32(ns->ns_id); | 554 | cmnd->dsm.nsid = cpu_to_le32(ns->ns_id); |
630 | cmnd->dsm.prp1 = cpu_to_le64(iod->first_dma); | 555 | cmnd->dsm.prp1 = cpu_to_le64(iod->first_dma); |
631 | cmnd->dsm.nr = 0; | 556 | cmnd->dsm.nr = 0; |
@@ -634,11 +559,9 @@ static int nvme_submit_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns, | |||
634 | if (++nvmeq->sq_tail == nvmeq->q_depth) | 559 | if (++nvmeq->sq_tail == nvmeq->q_depth) |
635 | nvmeq->sq_tail = 0; | 560 | nvmeq->sq_tail = 0; |
636 | writel(nvmeq->sq_tail, nvmeq->q_db); | 561 | writel(nvmeq->sq_tail, nvmeq->q_db); |
637 | |||
638 | return 0; | ||
639 | } | 562 | } |
640 | 563 | ||
641 | static int nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns, | 564 | static void nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns, |
642 | int cmdid) | 565 | int cmdid) |
643 | { | 566 | { |
644 | struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail]; | 567 | struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail]; |
@@ -651,49 +574,34 @@ static int nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns, | |||
651 | if (++nvmeq->sq_tail == nvmeq->q_depth) | 574 | if (++nvmeq->sq_tail == nvmeq->q_depth) |
652 | nvmeq->sq_tail = 0; | 575 | nvmeq->sq_tail = 0; |
653 | writel(nvmeq->sq_tail, nvmeq->q_db); | 576 | writel(nvmeq->sq_tail, nvmeq->q_db); |
654 | |||
655 | return 0; | ||
656 | } | 577 | } |
657 | 578 | ||
658 | static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod) | 579 | static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod, |
580 | struct nvme_ns *ns) | ||
659 | { | 581 | { |
660 | struct bio *bio = iod->private; | 582 | struct request *req = iod->private; |
661 | struct nvme_ns *ns = bio->bi_bdev->bd_disk->private_data; | ||
662 | struct nvme_command *cmnd; | 583 | struct nvme_command *cmnd; |
663 | int cmdid; | 584 | u16 control = 0; |
664 | u16 control; | 585 | u32 dsmgmt = 0; |
665 | u32 dsmgmt; | ||
666 | |||
667 | cmdid = alloc_cmdid(nvmeq, iod, bio_completion, NVME_IO_TIMEOUT); | ||
668 | if (unlikely(cmdid < 0)) | ||
669 | return cmdid; | ||
670 | 586 | ||
671 | if (bio->bi_rw & REQ_DISCARD) | 587 | if (req->cmd_flags & REQ_FUA) |
672 | return nvme_submit_discard(nvmeq, ns, bio, iod, cmdid); | ||
673 | if (bio->bi_rw & REQ_FLUSH) | ||
674 | return nvme_submit_flush(nvmeq, ns, cmdid); | ||
675 | |||
676 | control = 0; | ||
677 | if (bio->bi_rw & REQ_FUA) | ||
678 | control |= NVME_RW_FUA; | 588 | control |= NVME_RW_FUA; |
679 | if (bio->bi_rw & (REQ_FAILFAST_DEV | REQ_RAHEAD)) | 589 | if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD)) |
680 | control |= NVME_RW_LR; | 590 | control |= NVME_RW_LR; |
681 | 591 | ||
682 | dsmgmt = 0; | 592 | if (req->cmd_flags & REQ_RAHEAD) |
683 | if (bio->bi_rw & REQ_RAHEAD) | ||
684 | dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH; | 593 | dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH; |
685 | 594 | ||
686 | cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail]; | 595 | cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail]; |
687 | memset(cmnd, 0, sizeof(*cmnd)); | 596 | memset(cmnd, 0, sizeof(*cmnd)); |
688 | 597 | ||
689 | cmnd->rw.opcode = bio_data_dir(bio) ? nvme_cmd_write : nvme_cmd_read; | 598 | cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read); |
690 | cmnd->rw.command_id = cmdid; | 599 | cmnd->rw.command_id = req->tag; |
691 | cmnd->rw.nsid = cpu_to_le32(ns->ns_id); | 600 | cmnd->rw.nsid = cpu_to_le32(ns->ns_id); |
692 | cmnd->rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); | 601 | cmnd->rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); |
693 | cmnd->rw.prp2 = cpu_to_le64(iod->first_dma); | 602 | cmnd->rw.prp2 = cpu_to_le64(iod->first_dma); |
694 | cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, bio->bi_iter.bi_sector)); | 603 | cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); |
695 | cmnd->rw.length = | 604 | cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); |
696 | cpu_to_le16((bio->bi_iter.bi_size >> ns->lba_shift) - 1); | ||
697 | cmnd->rw.control = cpu_to_le16(control); | 605 | cmnd->rw.control = cpu_to_le16(control); |
698 | cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt); | 606 | cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt); |
699 | 607 | ||
@@ -704,45 +612,26 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod) | |||
704 | return 0; | 612 | return 0; |
705 | } | 613 | } |
706 | 614 | ||
707 | static int nvme_split_flush_data(struct nvme_queue *nvmeq, struct bio *bio) | 615 | static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, |
708 | { | 616 | const struct blk_mq_queue_data *bd) |
709 | struct bio *split = bio_clone(bio, GFP_ATOMIC); | ||
710 | if (!split) | ||
711 | return -ENOMEM; | ||
712 | |||
713 | split->bi_iter.bi_size = 0; | ||
714 | split->bi_phys_segments = 0; | ||
715 | bio->bi_rw &= ~REQ_FLUSH; | ||
716 | bio_chain(split, bio); | ||
717 | |||
718 | if (!waitqueue_active(&nvmeq->sq_full)) | ||
719 | add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait); | ||
720 | bio_list_add(&nvmeq->sq_cong, split); | ||
721 | bio_list_add(&nvmeq->sq_cong, bio); | ||
722 | wake_up_process(nvme_thread); | ||
723 | |||
724 | return 0; | ||
725 | } | ||
726 | |||
727 | /* | ||
728 | * Called with local interrupts disabled and the q_lock held. May not sleep. | ||
729 | */ | ||
730 | static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns, | ||
731 | struct bio *bio) | ||
732 | { | 617 | { |
618 | struct nvme_ns *ns = hctx->queue->queuedata; | ||
619 | struct nvme_queue *nvmeq = hctx->driver_data; | ||
620 | struct request *req = bd->rq; | ||
621 | struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req); | ||
733 | struct nvme_iod *iod; | 622 | struct nvme_iod *iod; |
734 | int psegs = bio_phys_segments(ns->queue, bio); | 623 | int psegs = req->nr_phys_segments; |
735 | int result; | 624 | enum dma_data_direction dma_dir; |
625 | unsigned size = !(req->cmd_flags & REQ_DISCARD) ? blk_rq_bytes(req) : | ||
626 | sizeof(struct nvme_dsm_range); | ||
736 | 627 | ||
737 | if ((bio->bi_rw & REQ_FLUSH) && psegs) | 628 | iod = nvme_alloc_iod(psegs, size, ns->dev, GFP_ATOMIC); |
738 | return nvme_split_flush_data(nvmeq, bio); | ||
739 | |||
740 | iod = nvme_alloc_iod(psegs, bio->bi_iter.bi_size, GFP_ATOMIC); | ||
741 | if (!iod) | 629 | if (!iod) |
742 | return -ENOMEM; | 630 | return BLK_MQ_RQ_QUEUE_BUSY; |
743 | 631 | ||
744 | iod->private = bio; | 632 | iod->private = req; |
745 | if (bio->bi_rw & REQ_DISCARD) { | 633 | |
634 | if (req->cmd_flags & REQ_DISCARD) { | ||
746 | void *range; | 635 | void *range; |
747 | /* | 636 | /* |
748 | * We reuse the small pool to allocate the 16-byte range here | 637 | * We reuse the small pool to allocate the 16-byte range here |
@@ -752,35 +641,50 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns, | |||
752 | range = dma_pool_alloc(nvmeq->dev->prp_small_pool, | 641 | range = dma_pool_alloc(nvmeq->dev->prp_small_pool, |
753 | GFP_ATOMIC, | 642 | GFP_ATOMIC, |
754 | &iod->first_dma); | 643 | &iod->first_dma); |
755 | if (!range) { | 644 | if (!range) |
756 | result = -ENOMEM; | 645 | goto retry_cmd; |
757 | goto free_iod; | ||
758 | } | ||
759 | iod_list(iod)[0] = (__le64 *)range; | 646 | iod_list(iod)[0] = (__le64 *)range; |
760 | iod->npages = 0; | 647 | iod->npages = 0; |
761 | } else if (psegs) { | 648 | } else if (psegs) { |
762 | result = nvme_map_bio(nvmeq, iod, bio, | 649 | dma_dir = rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE; |
763 | bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, | 650 | |
764 | psegs); | 651 | sg_init_table(iod->sg, psegs); |
765 | if (result <= 0) | 652 | iod->nents = blk_rq_map_sg(req->q, req, iod->sg); |
766 | goto free_iod; | 653 | if (!iod->nents) |
767 | if (nvme_setup_prps(nvmeq->dev, iod, result, GFP_ATOMIC) != | 654 | goto error_cmd; |
768 | result) { | 655 | |
769 | result = -ENOMEM; | 656 | if (!dma_map_sg(nvmeq->q_dmadev, iod->sg, iod->nents, dma_dir)) |
770 | goto free_iod; | 657 | goto retry_cmd; |
658 | |||
659 | if (blk_rq_bytes(req) != | ||
660 | nvme_setup_prps(nvmeq->dev, iod, blk_rq_bytes(req), GFP_ATOMIC)) { | ||
661 | dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->sg, | ||
662 | iod->nents, dma_dir); | ||
663 | goto retry_cmd; | ||
771 | } | 664 | } |
772 | nvme_start_io_acct(bio); | ||
773 | } | 665 | } |
774 | if (unlikely(nvme_submit_iod(nvmeq, iod))) { | ||
775 | if (!waitqueue_active(&nvmeq->sq_full)) | ||
776 | add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait); | ||
777 | list_add_tail(&iod->node, &nvmeq->iod_bio); | ||
778 | } | ||
779 | return 0; | ||
780 | 666 | ||
781 | free_iod: | 667 | blk_mq_start_request(req); |
668 | |||
669 | nvme_set_info(cmd, iod, req_completion); | ||
670 | spin_lock_irq(&nvmeq->q_lock); | ||
671 | if (req->cmd_flags & REQ_DISCARD) | ||
672 | nvme_submit_discard(nvmeq, ns, req, iod); | ||
673 | else if (req->cmd_flags & REQ_FLUSH) | ||
674 | nvme_submit_flush(nvmeq, ns, req->tag); | ||
675 | else | ||
676 | nvme_submit_iod(nvmeq, iod, ns); | ||
677 | |||
678 | nvme_process_cq(nvmeq); | ||
679 | spin_unlock_irq(&nvmeq->q_lock); | ||
680 | return BLK_MQ_RQ_QUEUE_OK; | ||
681 | |||
682 | error_cmd: | ||
782 | nvme_free_iod(nvmeq->dev, iod); | 683 | nvme_free_iod(nvmeq->dev, iod); |
783 | return result; | 684 | return BLK_MQ_RQ_QUEUE_ERROR; |
685 | retry_cmd: | ||
686 | nvme_free_iod(nvmeq->dev, iod); | ||
687 | return BLK_MQ_RQ_QUEUE_BUSY; | ||
784 | } | 688 | } |
785 | 689 | ||
786 | static int nvme_process_cq(struct nvme_queue *nvmeq) | 690 | static int nvme_process_cq(struct nvme_queue *nvmeq) |
@@ -801,8 +705,7 @@ static int nvme_process_cq(struct nvme_queue *nvmeq) | |||
801 | head = 0; | 705 | head = 0; |
802 | phase = !phase; | 706 | phase = !phase; |
803 | } | 707 | } |
804 | 708 | ctx = nvme_finish_cmd(nvmeq, cqe.command_id, &fn); | |
805 | ctx = free_cmdid(nvmeq, cqe.command_id, &fn); | ||
806 | fn(nvmeq, ctx, &cqe); | 709 | fn(nvmeq, ctx, &cqe); |
807 | } | 710 | } |
808 | 711 | ||
@@ -823,29 +726,13 @@ static int nvme_process_cq(struct nvme_queue *nvmeq) | |||
823 | return 1; | 726 | return 1; |
824 | } | 727 | } |
825 | 728 | ||
826 | static void nvme_make_request(struct request_queue *q, struct bio *bio) | 729 | /* Admin queue isn't initialized as a request queue. If at some point this |
730 | * happens anyway, make sure to notify the user */ | ||
731 | static int nvme_admin_queue_rq(struct blk_mq_hw_ctx *hctx, | ||
732 | const struct blk_mq_queue_data *bd) | ||
827 | { | 733 | { |
828 | struct nvme_ns *ns = q->queuedata; | 734 | WARN_ON_ONCE(1); |
829 | struct nvme_queue *nvmeq = get_nvmeq(ns->dev); | 735 | return BLK_MQ_RQ_QUEUE_ERROR; |
830 | int result = -EBUSY; | ||
831 | |||
832 | if (!nvmeq) { | ||
833 | bio_endio(bio, -EIO); | ||
834 | return; | ||
835 | } | ||
836 | |||
837 | spin_lock_irq(&nvmeq->q_lock); | ||
838 | if (!nvmeq->q_suspended && bio_list_empty(&nvmeq->sq_cong)) | ||
839 | result = nvme_submit_bio_queue(nvmeq, ns, bio); | ||
840 | if (unlikely(result)) { | ||
841 | if (!waitqueue_active(&nvmeq->sq_full)) | ||
842 | add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait); | ||
843 | bio_list_add(&nvmeq->sq_cong, bio); | ||
844 | } | ||
845 | |||
846 | nvme_process_cq(nvmeq); | ||
847 | spin_unlock_irq(&nvmeq->q_lock); | ||
848 | put_nvmeq(nvmeq); | ||
849 | } | 736 | } |
850 | 737 | ||
851 | static irqreturn_t nvme_irq(int irq, void *data) | 738 | static irqreturn_t nvme_irq(int irq, void *data) |
@@ -869,10 +756,11 @@ static irqreturn_t nvme_irq_check(int irq, void *data) | |||
869 | return IRQ_WAKE_THREAD; | 756 | return IRQ_WAKE_THREAD; |
870 | } | 757 | } |
871 | 758 | ||
872 | static void nvme_abort_command(struct nvme_queue *nvmeq, int cmdid) | 759 | static void nvme_abort_cmd_info(struct nvme_queue *nvmeq, struct nvme_cmd_info * |
760 | cmd_info) | ||
873 | { | 761 | { |
874 | spin_lock_irq(&nvmeq->q_lock); | 762 | spin_lock_irq(&nvmeq->q_lock); |
875 | cancel_cmdid(nvmeq, cmdid, NULL); | 763 | cancel_cmd_info(cmd_info, NULL); |
876 | spin_unlock_irq(&nvmeq->q_lock); | 764 | spin_unlock_irq(&nvmeq->q_lock); |
877 | } | 765 | } |
878 | 766 | ||
@@ -895,47 +783,40 @@ static void sync_completion(struct nvme_queue *nvmeq, void *ctx, | |||
895 | * Returns 0 on success. If the result is negative, it's a Linux error code; | 783 | * Returns 0 on success. If the result is negative, it's a Linux error code; |
896 | * if the result is positive, it's an NVM Express status code | 784 | * if the result is positive, it's an NVM Express status code |
897 | */ | 785 | */ |
898 | static int nvme_submit_sync_cmd(struct nvme_dev *dev, int q_idx, | 786 | static int nvme_submit_sync_cmd(struct request *req, struct nvme_command *cmd, |
899 | struct nvme_command *cmd, | ||
900 | u32 *result, unsigned timeout) | 787 | u32 *result, unsigned timeout) |
901 | { | 788 | { |
902 | int cmdid, ret; | 789 | int ret; |
903 | struct sync_cmd_info cmdinfo; | 790 | struct sync_cmd_info cmdinfo; |
904 | struct nvme_queue *nvmeq; | 791 | struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req); |
905 | 792 | struct nvme_queue *nvmeq = cmd_rq->nvmeq; | |
906 | nvmeq = lock_nvmeq(dev, q_idx); | ||
907 | if (!nvmeq) | ||
908 | return -ENODEV; | ||
909 | 793 | ||
910 | cmdinfo.task = current; | 794 | cmdinfo.task = current; |
911 | cmdinfo.status = -EINTR; | 795 | cmdinfo.status = -EINTR; |
912 | 796 | ||
913 | cmdid = alloc_cmdid(nvmeq, &cmdinfo, sync_completion, timeout); | 797 | cmd->common.command_id = req->tag; |
914 | if (cmdid < 0) { | 798 | |
915 | unlock_nvmeq(nvmeq); | 799 | nvme_set_info(cmd_rq, &cmdinfo, sync_completion); |
916 | return cmdid; | ||
917 | } | ||
918 | cmd->common.command_id = cmdid; | ||
919 | 800 | ||
920 | set_current_state(TASK_KILLABLE); | 801 | set_current_state(TASK_KILLABLE); |
921 | ret = nvme_submit_cmd(nvmeq, cmd); | 802 | ret = nvme_submit_cmd(nvmeq, cmd); |
922 | if (ret) { | 803 | if (ret) { |
923 | free_cmdid(nvmeq, cmdid, NULL); | 804 | nvme_finish_cmd(nvmeq, req->tag, NULL); |
924 | unlock_nvmeq(nvmeq); | ||
925 | set_current_state(TASK_RUNNING); | 805 | set_current_state(TASK_RUNNING); |
926 | return ret; | ||
927 | } | 806 | } |
928 | unlock_nvmeq(nvmeq); | 807 | ret = schedule_timeout(timeout); |
929 | schedule_timeout(timeout); | 808 | |
930 | 809 | /* | |
931 | if (cmdinfo.status == -EINTR) { | 810 | * Ensure that sync_completion has either run, or that it will |
932 | nvmeq = lock_nvmeq(dev, q_idx); | 811 | * never run. |
933 | if (nvmeq) { | 812 | */ |
934 | nvme_abort_command(nvmeq, cmdid); | 813 | nvme_abort_cmd_info(nvmeq, blk_mq_rq_to_pdu(req)); |
935 | unlock_nvmeq(nvmeq); | 814 | |
936 | } | 815 | /* |
816 | * We never got the completion | ||
817 | */ | ||
818 | if (cmdinfo.status == -EINTR) | ||
937 | return -EINTR; | 819 | return -EINTR; |
938 | } | ||
939 | 820 | ||
940 | if (result) | 821 | if (result) |
941 | *result = cmdinfo.result; | 822 | *result = cmdinfo.result; |
@@ -943,59 +824,99 @@ static int nvme_submit_sync_cmd(struct nvme_dev *dev, int q_idx, | |||
943 | return cmdinfo.status; | 824 | return cmdinfo.status; |
944 | } | 825 | } |
945 | 826 | ||
946 | static int nvme_submit_async_cmd(struct nvme_queue *nvmeq, | 827 | static int nvme_submit_async_admin_req(struct nvme_dev *dev) |
828 | { | ||
829 | struct nvme_queue *nvmeq = dev->queues[0]; | ||
830 | struct nvme_command c; | ||
831 | struct nvme_cmd_info *cmd_info; | ||
832 | struct request *req; | ||
833 | |||
834 | req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_ATOMIC, false); | ||
835 | if (IS_ERR(req)) | ||
836 | return PTR_ERR(req); | ||
837 | |||
838 | cmd_info = blk_mq_rq_to_pdu(req); | ||
839 | nvme_set_info(cmd_info, req, async_req_completion); | ||
840 | |||
841 | memset(&c, 0, sizeof(c)); | ||
842 | c.common.opcode = nvme_admin_async_event; | ||
843 | c.common.command_id = req->tag; | ||
844 | |||
845 | return __nvme_submit_cmd(nvmeq, &c); | ||
846 | } | ||
847 | |||
848 | static int nvme_submit_admin_async_cmd(struct nvme_dev *dev, | ||
947 | struct nvme_command *cmd, | 849 | struct nvme_command *cmd, |
948 | struct async_cmd_info *cmdinfo, unsigned timeout) | 850 | struct async_cmd_info *cmdinfo, unsigned timeout) |
949 | { | 851 | { |
950 | int cmdid; | 852 | struct nvme_queue *nvmeq = dev->queues[0]; |
853 | struct request *req; | ||
854 | struct nvme_cmd_info *cmd_rq; | ||
855 | |||
856 | req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_KERNEL, false); | ||
857 | if (IS_ERR(req)) | ||
858 | return PTR_ERR(req); | ||
951 | 859 | ||
952 | cmdid = alloc_cmdid_killable(nvmeq, cmdinfo, async_completion, timeout); | 860 | req->timeout = timeout; |
953 | if (cmdid < 0) | 861 | cmd_rq = blk_mq_rq_to_pdu(req); |
954 | return cmdid; | 862 | cmdinfo->req = req; |
863 | nvme_set_info(cmd_rq, cmdinfo, async_completion); | ||
955 | cmdinfo->status = -EINTR; | 864 | cmdinfo->status = -EINTR; |
956 | cmd->common.command_id = cmdid; | 865 | |
866 | cmd->common.command_id = req->tag; | ||
867 | |||
957 | return nvme_submit_cmd(nvmeq, cmd); | 868 | return nvme_submit_cmd(nvmeq, cmd); |
958 | } | 869 | } |
959 | 870 | ||
960 | int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd, | 871 | static int __nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd, |
961 | u32 *result) | 872 | u32 *result, unsigned timeout) |
962 | { | 873 | { |
963 | return nvme_submit_sync_cmd(dev, 0, cmd, result, ADMIN_TIMEOUT); | 874 | int res; |
875 | struct request *req; | ||
876 | |||
877 | req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_KERNEL, false); | ||
878 | if (IS_ERR(req)) | ||
879 | return PTR_ERR(req); | ||
880 | res = nvme_submit_sync_cmd(req, cmd, result, timeout); | ||
881 | blk_mq_free_request(req); | ||
882 | return res; | ||
964 | } | 883 | } |
965 | 884 | ||
966 | int nvme_submit_io_cmd(struct nvme_dev *dev, struct nvme_command *cmd, | 885 | int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd, |
967 | u32 *result) | 886 | u32 *result) |
968 | { | 887 | { |
969 | return nvme_submit_sync_cmd(dev, smp_processor_id() + 1, cmd, result, | 888 | return __nvme_submit_admin_cmd(dev, cmd, result, ADMIN_TIMEOUT); |
970 | NVME_IO_TIMEOUT); | ||
971 | } | 889 | } |
972 | 890 | ||
973 | static int nvme_submit_admin_cmd_async(struct nvme_dev *dev, | 891 | int nvme_submit_io_cmd(struct nvme_dev *dev, struct nvme_ns *ns, |
974 | struct nvme_command *cmd, struct async_cmd_info *cmdinfo) | 892 | struct nvme_command *cmd, u32 *result) |
975 | { | 893 | { |
976 | return nvme_submit_async_cmd(raw_nvmeq(dev, 0), cmd, cmdinfo, | 894 | int res; |
977 | ADMIN_TIMEOUT); | 895 | struct request *req; |
896 | |||
897 | req = blk_mq_alloc_request(ns->queue, WRITE, (GFP_KERNEL|__GFP_WAIT), | ||
898 | false); | ||
899 | if (IS_ERR(req)) | ||
900 | return PTR_ERR(req); | ||
901 | res = nvme_submit_sync_cmd(req, cmd, result, NVME_IO_TIMEOUT); | ||
902 | blk_mq_free_request(req); | ||
903 | return res; | ||
978 | } | 904 | } |
979 | 905 | ||
980 | static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) | 906 | static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) |
981 | { | 907 | { |
982 | int status; | ||
983 | struct nvme_command c; | 908 | struct nvme_command c; |
984 | 909 | ||
985 | memset(&c, 0, sizeof(c)); | 910 | memset(&c, 0, sizeof(c)); |
986 | c.delete_queue.opcode = opcode; | 911 | c.delete_queue.opcode = opcode; |
987 | c.delete_queue.qid = cpu_to_le16(id); | 912 | c.delete_queue.qid = cpu_to_le16(id); |
988 | 913 | ||
989 | status = nvme_submit_admin_cmd(dev, &c, NULL); | 914 | return nvme_submit_admin_cmd(dev, &c, NULL); |
990 | if (status) | ||
991 | return -EIO; | ||
992 | return 0; | ||
993 | } | 915 | } |
994 | 916 | ||
995 | static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, | 917 | static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, |
996 | struct nvme_queue *nvmeq) | 918 | struct nvme_queue *nvmeq) |
997 | { | 919 | { |
998 | int status; | ||
999 | struct nvme_command c; | 920 | struct nvme_command c; |
1000 | int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED; | 921 | int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED; |
1001 | 922 | ||
@@ -1007,16 +928,12 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, | |||
1007 | c.create_cq.cq_flags = cpu_to_le16(flags); | 928 | c.create_cq.cq_flags = cpu_to_le16(flags); |
1008 | c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector); | 929 | c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector); |
1009 | 930 | ||
1010 | status = nvme_submit_admin_cmd(dev, &c, NULL); | 931 | return nvme_submit_admin_cmd(dev, &c, NULL); |
1011 | if (status) | ||
1012 | return -EIO; | ||
1013 | return 0; | ||
1014 | } | 932 | } |
1015 | 933 | ||
1016 | static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, | 934 | static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, |
1017 | struct nvme_queue *nvmeq) | 935 | struct nvme_queue *nvmeq) |
1018 | { | 936 | { |
1019 | int status; | ||
1020 | struct nvme_command c; | 937 | struct nvme_command c; |
1021 | int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM; | 938 | int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM; |
1022 | 939 | ||
@@ -1028,10 +945,7 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, | |||
1028 | c.create_sq.sq_flags = cpu_to_le16(flags); | 945 | c.create_sq.sq_flags = cpu_to_le16(flags); |
1029 | c.create_sq.cqid = cpu_to_le16(qid); | 946 | c.create_sq.cqid = cpu_to_le16(qid); |
1030 | 947 | ||
1031 | status = nvme_submit_admin_cmd(dev, &c, NULL); | 948 | return nvme_submit_admin_cmd(dev, &c, NULL); |
1032 | if (status) | ||
1033 | return -EIO; | ||
1034 | return 0; | ||
1035 | } | 949 | } |
1036 | 950 | ||
1037 | static int adapter_delete_cq(struct nvme_dev *dev, u16 cqid) | 951 | static int adapter_delete_cq(struct nvme_dev *dev, u16 cqid) |
@@ -1087,28 +1001,27 @@ int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, | |||
1087 | } | 1001 | } |
1088 | 1002 | ||
1089 | /** | 1003 | /** |
1090 | * nvme_abort_cmd - Attempt aborting a command | 1004 | * nvme_abort_req - Attempt aborting a request |
1091 | * @cmdid: Command id of a timed out IO | ||
1092 | * @queue: The queue with timed out IO | ||
1093 | * | 1005 | * |
1094 | * Schedule controller reset if the command was already aborted once before and | 1006 | * Schedule controller reset if the command was already aborted once before and |
1095 | * still hasn't been returned to the driver, or if this is the admin queue. | 1007 | * still hasn't been returned to the driver, or if this is the admin queue. |
1096 | */ | 1008 | */ |
1097 | static void nvme_abort_cmd(int cmdid, struct nvme_queue *nvmeq) | 1009 | static void nvme_abort_req(struct request *req) |
1098 | { | 1010 | { |
1099 | int a_cmdid; | 1011 | struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req); |
1100 | struct nvme_command cmd; | 1012 | struct nvme_queue *nvmeq = cmd_rq->nvmeq; |
1101 | struct nvme_dev *dev = nvmeq->dev; | 1013 | struct nvme_dev *dev = nvmeq->dev; |
1102 | struct nvme_cmd_info *info = nvme_cmd_info(nvmeq); | 1014 | struct request *abort_req; |
1103 | struct nvme_queue *adminq; | 1015 | struct nvme_cmd_info *abort_cmd; |
1016 | struct nvme_command cmd; | ||
1104 | 1017 | ||
1105 | if (!nvmeq->qid || info[cmdid].aborted) { | 1018 | if (!nvmeq->qid || cmd_rq->aborted) { |
1106 | if (work_busy(&dev->reset_work)) | 1019 | if (work_busy(&dev->reset_work)) |
1107 | return; | 1020 | return; |
1108 | list_del_init(&dev->node); | 1021 | list_del_init(&dev->node); |
1109 | dev_warn(&dev->pci_dev->dev, | 1022 | dev_warn(&dev->pci_dev->dev, |
1110 | "I/O %d QID %d timeout, reset controller\n", cmdid, | 1023 | "I/O %d QID %d timeout, reset controller\n", |
1111 | nvmeq->qid); | 1024 | req->tag, nvmeq->qid); |
1112 | dev->reset_workfn = nvme_reset_failed_dev; | 1025 | dev->reset_workfn = nvme_reset_failed_dev; |
1113 | queue_work(nvme_workq, &dev->reset_work); | 1026 | queue_work(nvme_workq, &dev->reset_work); |
1114 | return; | 1027 | return; |
@@ -1117,120 +1030,110 @@ static void nvme_abort_cmd(int cmdid, struct nvme_queue *nvmeq) | |||
1117 | if (!dev->abort_limit) | 1030 | if (!dev->abort_limit) |
1118 | return; | 1031 | return; |
1119 | 1032 | ||
1120 | adminq = rcu_dereference(dev->queues[0]); | 1033 | abort_req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_ATOMIC, |
1121 | a_cmdid = alloc_cmdid(adminq, CMD_CTX_ABORT, special_completion, | 1034 | false); |
1122 | ADMIN_TIMEOUT); | 1035 | if (IS_ERR(abort_req)) |
1123 | if (a_cmdid < 0) | ||
1124 | return; | 1036 | return; |
1125 | 1037 | ||
1038 | abort_cmd = blk_mq_rq_to_pdu(abort_req); | ||
1039 | nvme_set_info(abort_cmd, abort_req, abort_completion); | ||
1040 | |||
1126 | memset(&cmd, 0, sizeof(cmd)); | 1041 | memset(&cmd, 0, sizeof(cmd)); |
1127 | cmd.abort.opcode = nvme_admin_abort_cmd; | 1042 | cmd.abort.opcode = nvme_admin_abort_cmd; |
1128 | cmd.abort.cid = cmdid; | 1043 | cmd.abort.cid = req->tag; |
1129 | cmd.abort.sqid = cpu_to_le16(nvmeq->qid); | 1044 | cmd.abort.sqid = cpu_to_le16(nvmeq->qid); |
1130 | cmd.abort.command_id = a_cmdid; | 1045 | cmd.abort.command_id = abort_req->tag; |
1131 | 1046 | ||
1132 | --dev->abort_limit; | 1047 | --dev->abort_limit; |
1133 | info[cmdid].aborted = 1; | 1048 | cmd_rq->aborted = 1; |
1134 | info[cmdid].timeout = jiffies + ADMIN_TIMEOUT; | ||
1135 | 1049 | ||
1136 | dev_warn(nvmeq->q_dmadev, "Aborting I/O %d QID %d\n", cmdid, | 1050 | dev_warn(nvmeq->q_dmadev, "Aborting I/O %d QID %d\n", req->tag, |
1137 | nvmeq->qid); | 1051 | nvmeq->qid); |
1138 | nvme_submit_cmd(adminq, &cmd); | 1052 | if (nvme_submit_cmd(dev->queues[0], &cmd) < 0) { |
1053 | dev_warn(nvmeq->q_dmadev, | ||
1054 | "Could not abort I/O %d QID %d", | ||
1055 | req->tag, nvmeq->qid); | ||
1056 | blk_mq_free_request(abort_req); | ||
1057 | } | ||
1139 | } | 1058 | } |
1140 | 1059 | ||
1141 | /** | 1060 | static void nvme_cancel_queue_ios(struct blk_mq_hw_ctx *hctx, |
1142 | * nvme_cancel_ios - Cancel outstanding I/Os | 1061 | struct request *req, void *data, bool reserved) |
1143 | * @queue: The queue to cancel I/Os on | ||
1144 | * @timeout: True to only cancel I/Os which have timed out | ||
1145 | */ | ||
1146 | static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout) | ||
1147 | { | 1062 | { |
1148 | int depth = nvmeq->q_depth - 1; | 1063 | struct nvme_queue *nvmeq = data; |
1149 | struct nvme_cmd_info *info = nvme_cmd_info(nvmeq); | 1064 | void *ctx; |
1150 | unsigned long now = jiffies; | 1065 | nvme_completion_fn fn; |
1151 | int cmdid; | 1066 | struct nvme_cmd_info *cmd; |
1067 | static struct nvme_completion cqe = { | ||
1068 | .status = cpu_to_le16(NVME_SC_ABORT_REQ << 1), | ||
1069 | }; | ||
1152 | 1070 | ||
1153 | for_each_set_bit(cmdid, nvmeq->cmdid_data, depth) { | 1071 | cmd = blk_mq_rq_to_pdu(req); |
1154 | void *ctx; | ||
1155 | nvme_completion_fn fn; | ||
1156 | static struct nvme_completion cqe = { | ||
1157 | .status = cpu_to_le16(NVME_SC_ABORT_REQ << 1), | ||
1158 | }; | ||
1159 | 1072 | ||
1160 | if (timeout && !time_after(now, info[cmdid].timeout)) | 1073 | if (cmd->ctx == CMD_CTX_CANCELLED) |
1161 | continue; | 1074 | return; |
1162 | if (info[cmdid].ctx == CMD_CTX_CANCELLED) | 1075 | |
1163 | continue; | 1076 | dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d QID %d\n", |
1164 | if (timeout && nvmeq->dev->initialized) { | 1077 | req->tag, nvmeq->qid); |
1165 | nvme_abort_cmd(cmdid, nvmeq); | 1078 | ctx = cancel_cmd_info(cmd, &fn); |
1166 | continue; | 1079 | fn(nvmeq, ctx, &cqe); |
1167 | } | ||
1168 | dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d QID %d\n", cmdid, | ||
1169 | nvmeq->qid); | ||
1170 | ctx = cancel_cmdid(nvmeq, cmdid, &fn); | ||
1171 | fn(nvmeq, ctx, &cqe); | ||
1172 | } | ||
1173 | } | 1080 | } |
1174 | 1081 | ||
1175 | static void nvme_free_queue(struct rcu_head *r) | 1082 | static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) |
1176 | { | 1083 | { |
1177 | struct nvme_queue *nvmeq = container_of(r, struct nvme_queue, r_head); | 1084 | struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req); |
1085 | struct nvme_queue *nvmeq = cmd->nvmeq; | ||
1178 | 1086 | ||
1179 | spin_lock_irq(&nvmeq->q_lock); | 1087 | dev_warn(nvmeq->q_dmadev, "Timeout I/O %d QID %d\n", req->tag, |
1180 | while (bio_list_peek(&nvmeq->sq_cong)) { | 1088 | nvmeq->qid); |
1181 | struct bio *bio = bio_list_pop(&nvmeq->sq_cong); | 1089 | if (nvmeq->dev->initialized) |
1182 | bio_endio(bio, -EIO); | 1090 | nvme_abort_req(req); |
1183 | } | 1091 | |
1184 | while (!list_empty(&nvmeq->iod_bio)) { | 1092 | /* |
1185 | static struct nvme_completion cqe = { | 1093 | * The aborted req will be completed on receiving the abort req. |
1186 | .status = cpu_to_le16( | 1094 | * We enable the timer again. If hit twice, it'll cause a device reset, |
1187 | (NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1), | 1095 | * as the device then is in a faulty state. |
1188 | }; | 1096 | */ |
1189 | struct nvme_iod *iod = list_first_entry(&nvmeq->iod_bio, | 1097 | return BLK_EH_RESET_TIMER; |
1190 | struct nvme_iod, | 1098 | } |
1191 | node); | ||
1192 | list_del(&iod->node); | ||
1193 | bio_completion(nvmeq, iod, &cqe); | ||
1194 | } | ||
1195 | spin_unlock_irq(&nvmeq->q_lock); | ||
1196 | 1099 | ||
1100 | static void nvme_free_queue(struct nvme_queue *nvmeq) | ||
1101 | { | ||
1197 | dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), | 1102 | dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), |
1198 | (void *)nvmeq->cqes, nvmeq->cq_dma_addr); | 1103 | (void *)nvmeq->cqes, nvmeq->cq_dma_addr); |
1199 | dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), | 1104 | dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), |
1200 | nvmeq->sq_cmds, nvmeq->sq_dma_addr); | 1105 | nvmeq->sq_cmds, nvmeq->sq_dma_addr); |
1201 | if (nvmeq->qid) | ||
1202 | free_cpumask_var(nvmeq->cpu_mask); | ||
1203 | kfree(nvmeq); | 1106 | kfree(nvmeq); |
1204 | } | 1107 | } |
1205 | 1108 | ||
1206 | static void nvme_free_queues(struct nvme_dev *dev, int lowest) | 1109 | static void nvme_free_queues(struct nvme_dev *dev, int lowest) |
1207 | { | 1110 | { |
1111 | LLIST_HEAD(q_list); | ||
1112 | struct nvme_queue *nvmeq, *next; | ||
1113 | struct llist_node *entry; | ||
1208 | int i; | 1114 | int i; |
1209 | 1115 | ||
1210 | for (i = dev->queue_count - 1; i >= lowest; i--) { | 1116 | for (i = dev->queue_count - 1; i >= lowest; i--) { |
1211 | struct nvme_queue *nvmeq = raw_nvmeq(dev, i); | 1117 | struct nvme_queue *nvmeq = dev->queues[i]; |
1212 | rcu_assign_pointer(dev->queues[i], NULL); | 1118 | llist_add(&nvmeq->node, &q_list); |
1213 | call_rcu(&nvmeq->r_head, nvme_free_queue); | ||
1214 | dev->queue_count--; | 1119 | dev->queue_count--; |
1120 | dev->queues[i] = NULL; | ||
1215 | } | 1121 | } |
1122 | synchronize_rcu(); | ||
1123 | entry = llist_del_all(&q_list); | ||
1124 | llist_for_each_entry_safe(nvmeq, next, entry, node) | ||
1125 | nvme_free_queue(nvmeq); | ||
1216 | } | 1126 | } |
1217 | 1127 | ||
1218 | /** | 1128 | /** |
1219 | * nvme_suspend_queue - put queue into suspended state | 1129 | * nvme_suspend_queue - put queue into suspended state |
1220 | * @nvmeq - queue to suspend | 1130 | * @nvmeq - queue to suspend |
1221 | * | ||
1222 | * Returns 1 if already suspended, 0 otherwise. | ||
1223 | */ | 1131 | */ |
1224 | static int nvme_suspend_queue(struct nvme_queue *nvmeq) | 1132 | static int nvme_suspend_queue(struct nvme_queue *nvmeq) |
1225 | { | 1133 | { |
1226 | int vector = nvmeq->dev->entry[nvmeq->cq_vector].vector; | 1134 | int vector = nvmeq->dev->entry[nvmeq->cq_vector].vector; |
1227 | 1135 | ||
1228 | spin_lock_irq(&nvmeq->q_lock); | 1136 | spin_lock_irq(&nvmeq->q_lock); |
1229 | if (nvmeq->q_suspended) { | ||
1230 | spin_unlock_irq(&nvmeq->q_lock); | ||
1231 | return 1; | ||
1232 | } | ||
1233 | nvmeq->q_suspended = 1; | ||
1234 | nvmeq->dev->online_queues--; | 1137 | nvmeq->dev->online_queues--; |
1235 | spin_unlock_irq(&nvmeq->q_lock); | 1138 | spin_unlock_irq(&nvmeq->q_lock); |
1236 | 1139 | ||
@@ -1242,15 +1145,18 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) | |||
1242 | 1145 | ||
1243 | static void nvme_clear_queue(struct nvme_queue *nvmeq) | 1146 | static void nvme_clear_queue(struct nvme_queue *nvmeq) |
1244 | { | 1147 | { |
1148 | struct blk_mq_hw_ctx *hctx = nvmeq->hctx; | ||
1149 | |||
1245 | spin_lock_irq(&nvmeq->q_lock); | 1150 | spin_lock_irq(&nvmeq->q_lock); |
1246 | nvme_process_cq(nvmeq); | 1151 | nvme_process_cq(nvmeq); |
1247 | nvme_cancel_ios(nvmeq, false); | 1152 | if (hctx && hctx->tags) |
1153 | blk_mq_tag_busy_iter(hctx, nvme_cancel_queue_ios, nvmeq); | ||
1248 | spin_unlock_irq(&nvmeq->q_lock); | 1154 | spin_unlock_irq(&nvmeq->q_lock); |
1249 | } | 1155 | } |
1250 | 1156 | ||
1251 | static void nvme_disable_queue(struct nvme_dev *dev, int qid) | 1157 | static void nvme_disable_queue(struct nvme_dev *dev, int qid) |
1252 | { | 1158 | { |
1253 | struct nvme_queue *nvmeq = raw_nvmeq(dev, qid); | 1159 | struct nvme_queue *nvmeq = dev->queues[qid]; |
1254 | 1160 | ||
1255 | if (!nvmeq) | 1161 | if (!nvmeq) |
1256 | return; | 1162 | return; |
@@ -1270,25 +1176,20 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, | |||
1270 | int depth, int vector) | 1176 | int depth, int vector) |
1271 | { | 1177 | { |
1272 | struct device *dmadev = &dev->pci_dev->dev; | 1178 | struct device *dmadev = &dev->pci_dev->dev; |
1273 | unsigned extra = nvme_queue_extra(depth); | 1179 | struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq), GFP_KERNEL); |
1274 | struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL); | ||
1275 | if (!nvmeq) | 1180 | if (!nvmeq) |
1276 | return NULL; | 1181 | return NULL; |
1277 | 1182 | ||
1278 | nvmeq->cqes = dma_alloc_coherent(dmadev, CQ_SIZE(depth), | 1183 | nvmeq->cqes = dma_zalloc_coherent(dmadev, CQ_SIZE(depth), |
1279 | &nvmeq->cq_dma_addr, GFP_KERNEL); | 1184 | &nvmeq->cq_dma_addr, GFP_KERNEL); |
1280 | if (!nvmeq->cqes) | 1185 | if (!nvmeq->cqes) |
1281 | goto free_nvmeq; | 1186 | goto free_nvmeq; |
1282 | memset((void *)nvmeq->cqes, 0, CQ_SIZE(depth)); | ||
1283 | 1187 | ||
1284 | nvmeq->sq_cmds = dma_alloc_coherent(dmadev, SQ_SIZE(depth), | 1188 | nvmeq->sq_cmds = dma_alloc_coherent(dmadev, SQ_SIZE(depth), |
1285 | &nvmeq->sq_dma_addr, GFP_KERNEL); | 1189 | &nvmeq->sq_dma_addr, GFP_KERNEL); |
1286 | if (!nvmeq->sq_cmds) | 1190 | if (!nvmeq->sq_cmds) |
1287 | goto free_cqdma; | 1191 | goto free_cqdma; |
1288 | 1192 | ||
1289 | if (qid && !zalloc_cpumask_var(&nvmeq->cpu_mask, GFP_KERNEL)) | ||
1290 | goto free_sqdma; | ||
1291 | |||
1292 | nvmeq->q_dmadev = dmadev; | 1193 | nvmeq->q_dmadev = dmadev; |
1293 | nvmeq->dev = dev; | 1194 | nvmeq->dev = dev; |
1294 | snprintf(nvmeq->irqname, sizeof(nvmeq->irqname), "nvme%dq%d", | 1195 | snprintf(nvmeq->irqname, sizeof(nvmeq->irqname), "nvme%dq%d", |
@@ -1296,23 +1197,15 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, | |||
1296 | spin_lock_init(&nvmeq->q_lock); | 1197 | spin_lock_init(&nvmeq->q_lock); |
1297 | nvmeq->cq_head = 0; | 1198 | nvmeq->cq_head = 0; |
1298 | nvmeq->cq_phase = 1; | 1199 | nvmeq->cq_phase = 1; |
1299 | init_waitqueue_head(&nvmeq->sq_full); | ||
1300 | init_waitqueue_entry(&nvmeq->sq_cong_wait, nvme_thread); | ||
1301 | bio_list_init(&nvmeq->sq_cong); | ||
1302 | INIT_LIST_HEAD(&nvmeq->iod_bio); | ||
1303 | nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; | 1200 | nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; |
1304 | nvmeq->q_depth = depth; | 1201 | nvmeq->q_depth = depth; |
1305 | nvmeq->cq_vector = vector; | 1202 | nvmeq->cq_vector = vector; |
1306 | nvmeq->qid = qid; | 1203 | nvmeq->qid = qid; |
1307 | nvmeq->q_suspended = 1; | ||
1308 | dev->queue_count++; | 1204 | dev->queue_count++; |
1309 | rcu_assign_pointer(dev->queues[qid], nvmeq); | 1205 | dev->queues[qid] = nvmeq; |
1310 | 1206 | ||
1311 | return nvmeq; | 1207 | return nvmeq; |
1312 | 1208 | ||
1313 | free_sqdma: | ||
1314 | dma_free_coherent(dmadev, SQ_SIZE(depth), (void *)nvmeq->sq_cmds, | ||
1315 | nvmeq->sq_dma_addr); | ||
1316 | free_cqdma: | 1209 | free_cqdma: |
1317 | dma_free_coherent(dmadev, CQ_SIZE(depth), (void *)nvmeq->cqes, | 1210 | dma_free_coherent(dmadev, CQ_SIZE(depth), (void *)nvmeq->cqes, |
1318 | nvmeq->cq_dma_addr); | 1211 | nvmeq->cq_dma_addr); |
@@ -1335,17 +1228,15 @@ static int queue_request_irq(struct nvme_dev *dev, struct nvme_queue *nvmeq, | |||
1335 | static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) | 1228 | static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) |
1336 | { | 1229 | { |
1337 | struct nvme_dev *dev = nvmeq->dev; | 1230 | struct nvme_dev *dev = nvmeq->dev; |
1338 | unsigned extra = nvme_queue_extra(nvmeq->q_depth); | ||
1339 | 1231 | ||
1232 | spin_lock_irq(&nvmeq->q_lock); | ||
1340 | nvmeq->sq_tail = 0; | 1233 | nvmeq->sq_tail = 0; |
1341 | nvmeq->cq_head = 0; | 1234 | nvmeq->cq_head = 0; |
1342 | nvmeq->cq_phase = 1; | 1235 | nvmeq->cq_phase = 1; |
1343 | nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; | 1236 | nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; |
1344 | memset(nvmeq->cmdid_data, 0, extra); | ||
1345 | memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth)); | 1237 | memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth)); |
1346 | nvme_cancel_ios(nvmeq, false); | ||
1347 | nvmeq->q_suspended = 0; | ||
1348 | dev->online_queues++; | 1238 | dev->online_queues++; |
1239 | spin_unlock_irq(&nvmeq->q_lock); | ||
1349 | } | 1240 | } |
1350 | 1241 | ||
1351 | static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) | 1242 | static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) |
@@ -1365,10 +1256,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) | |||
1365 | if (result < 0) | 1256 | if (result < 0) |
1366 | goto release_sq; | 1257 | goto release_sq; |
1367 | 1258 | ||
1368 | spin_lock_irq(&nvmeq->q_lock); | ||
1369 | nvme_init_queue(nvmeq, qid); | 1259 | nvme_init_queue(nvmeq, qid); |
1370 | spin_unlock_irq(&nvmeq->q_lock); | ||
1371 | |||
1372 | return result; | 1260 | return result; |
1373 | 1261 | ||
1374 | release_sq: | 1262 | release_sq: |
@@ -1408,27 +1296,32 @@ static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled) | |||
1408 | */ | 1296 | */ |
1409 | static int nvme_disable_ctrl(struct nvme_dev *dev, u64 cap) | 1297 | static int nvme_disable_ctrl(struct nvme_dev *dev, u64 cap) |
1410 | { | 1298 | { |
1411 | u32 cc = readl(&dev->bar->cc); | 1299 | dev->ctrl_config &= ~NVME_CC_SHN_MASK; |
1300 | dev->ctrl_config &= ~NVME_CC_ENABLE; | ||
1301 | writel(dev->ctrl_config, &dev->bar->cc); | ||
1412 | 1302 | ||
1413 | if (cc & NVME_CC_ENABLE) | ||
1414 | writel(cc & ~NVME_CC_ENABLE, &dev->bar->cc); | ||
1415 | return nvme_wait_ready(dev, cap, false); | 1303 | return nvme_wait_ready(dev, cap, false); |
1416 | } | 1304 | } |
1417 | 1305 | ||
1418 | static int nvme_enable_ctrl(struct nvme_dev *dev, u64 cap) | 1306 | static int nvme_enable_ctrl(struct nvme_dev *dev, u64 cap) |
1419 | { | 1307 | { |
1308 | dev->ctrl_config &= ~NVME_CC_SHN_MASK; | ||
1309 | dev->ctrl_config |= NVME_CC_ENABLE; | ||
1310 | writel(dev->ctrl_config, &dev->bar->cc); | ||
1311 | |||
1420 | return nvme_wait_ready(dev, cap, true); | 1312 | return nvme_wait_ready(dev, cap, true); |
1421 | } | 1313 | } |
1422 | 1314 | ||
1423 | static int nvme_shutdown_ctrl(struct nvme_dev *dev) | 1315 | static int nvme_shutdown_ctrl(struct nvme_dev *dev) |
1424 | { | 1316 | { |
1425 | unsigned long timeout; | 1317 | unsigned long timeout; |
1426 | u32 cc; | ||
1427 | 1318 | ||
1428 | cc = (readl(&dev->bar->cc) & ~NVME_CC_SHN_MASK) | NVME_CC_SHN_NORMAL; | 1319 | dev->ctrl_config &= ~NVME_CC_SHN_MASK; |
1429 | writel(cc, &dev->bar->cc); | 1320 | dev->ctrl_config |= NVME_CC_SHN_NORMAL; |
1430 | 1321 | ||
1431 | timeout = 2 * HZ + jiffies; | 1322 | writel(dev->ctrl_config, &dev->bar->cc); |
1323 | |||
1324 | timeout = SHUTDOWN_TIMEOUT + jiffies; | ||
1432 | while ((readl(&dev->bar->csts) & NVME_CSTS_SHST_MASK) != | 1325 | while ((readl(&dev->bar->csts) & NVME_CSTS_SHST_MASK) != |
1433 | NVME_CSTS_SHST_CMPLT) { | 1326 | NVME_CSTS_SHST_CMPLT) { |
1434 | msleep(100); | 1327 | msleep(100); |
@@ -1444,20 +1337,86 @@ static int nvme_shutdown_ctrl(struct nvme_dev *dev) | |||
1444 | return 0; | 1337 | return 0; |
1445 | } | 1338 | } |
1446 | 1339 | ||
1340 | static struct blk_mq_ops nvme_mq_admin_ops = { | ||
1341 | .queue_rq = nvme_admin_queue_rq, | ||
1342 | .map_queue = blk_mq_map_queue, | ||
1343 | .init_hctx = nvme_admin_init_hctx, | ||
1344 | .exit_hctx = nvme_exit_hctx, | ||
1345 | .init_request = nvme_admin_init_request, | ||
1346 | .timeout = nvme_timeout, | ||
1347 | }; | ||
1348 | |||
1349 | static struct blk_mq_ops nvme_mq_ops = { | ||
1350 | .queue_rq = nvme_queue_rq, | ||
1351 | .map_queue = blk_mq_map_queue, | ||
1352 | .init_hctx = nvme_init_hctx, | ||
1353 | .exit_hctx = nvme_exit_hctx, | ||
1354 | .init_request = nvme_init_request, | ||
1355 | .timeout = nvme_timeout, | ||
1356 | }; | ||
1357 | |||
1358 | static int nvme_alloc_admin_tags(struct nvme_dev *dev) | ||
1359 | { | ||
1360 | if (!dev->admin_q) { | ||
1361 | dev->admin_tagset.ops = &nvme_mq_admin_ops; | ||
1362 | dev->admin_tagset.nr_hw_queues = 1; | ||
1363 | dev->admin_tagset.queue_depth = NVME_AQ_DEPTH - 1; | ||
1364 | dev->admin_tagset.timeout = ADMIN_TIMEOUT; | ||
1365 | dev->admin_tagset.numa_node = dev_to_node(&dev->pci_dev->dev); | ||
1366 | dev->admin_tagset.cmd_size = sizeof(struct nvme_cmd_info); | ||
1367 | dev->admin_tagset.driver_data = dev; | ||
1368 | |||
1369 | if (blk_mq_alloc_tag_set(&dev->admin_tagset)) | ||
1370 | return -ENOMEM; | ||
1371 | |||
1372 | dev->admin_q = blk_mq_init_queue(&dev->admin_tagset); | ||
1373 | if (!dev->admin_q) { | ||
1374 | blk_mq_free_tag_set(&dev->admin_tagset); | ||
1375 | return -ENOMEM; | ||
1376 | } | ||
1377 | } | ||
1378 | |||
1379 | return 0; | ||
1380 | } | ||
1381 | |||
1382 | static void nvme_free_admin_tags(struct nvme_dev *dev) | ||
1383 | { | ||
1384 | if (dev->admin_q) | ||
1385 | blk_mq_free_tag_set(&dev->admin_tagset); | ||
1386 | } | ||
1387 | |||
1447 | static int nvme_configure_admin_queue(struct nvme_dev *dev) | 1388 | static int nvme_configure_admin_queue(struct nvme_dev *dev) |
1448 | { | 1389 | { |
1449 | int result; | 1390 | int result; |
1450 | u32 aqa; | 1391 | u32 aqa; |
1451 | u64 cap = readq(&dev->bar->cap); | 1392 | u64 cap = readq(&dev->bar->cap); |
1452 | struct nvme_queue *nvmeq; | 1393 | struct nvme_queue *nvmeq; |
1394 | unsigned page_shift = PAGE_SHIFT; | ||
1395 | unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12; | ||
1396 | unsigned dev_page_max = NVME_CAP_MPSMAX(cap) + 12; | ||
1397 | |||
1398 | if (page_shift < dev_page_min) { | ||
1399 | dev_err(&dev->pci_dev->dev, | ||
1400 | "Minimum device page size (%u) too large for " | ||
1401 | "host (%u)\n", 1 << dev_page_min, | ||
1402 | 1 << page_shift); | ||
1403 | return -ENODEV; | ||
1404 | } | ||
1405 | if (page_shift > dev_page_max) { | ||
1406 | dev_info(&dev->pci_dev->dev, | ||
1407 | "Device maximum page size (%u) smaller than " | ||
1408 | "host (%u); enabling work-around\n", | ||
1409 | 1 << dev_page_max, 1 << page_shift); | ||
1410 | page_shift = dev_page_max; | ||
1411 | } | ||
1453 | 1412 | ||
1454 | result = nvme_disable_ctrl(dev, cap); | 1413 | result = nvme_disable_ctrl(dev, cap); |
1455 | if (result < 0) | 1414 | if (result < 0) |
1456 | return result; | 1415 | return result; |
1457 | 1416 | ||
1458 | nvmeq = raw_nvmeq(dev, 0); | 1417 | nvmeq = dev->queues[0]; |
1459 | if (!nvmeq) { | 1418 | if (!nvmeq) { |
1460 | nvmeq = nvme_alloc_queue(dev, 0, 64, 0); | 1419 | nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH, 0); |
1461 | if (!nvmeq) | 1420 | if (!nvmeq) |
1462 | return -ENOMEM; | 1421 | return -ENOMEM; |
1463 | } | 1422 | } |
@@ -1465,27 +1424,35 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) | |||
1465 | aqa = nvmeq->q_depth - 1; | 1424 | aqa = nvmeq->q_depth - 1; |
1466 | aqa |= aqa << 16; | 1425 | aqa |= aqa << 16; |
1467 | 1426 | ||
1468 | dev->ctrl_config = NVME_CC_ENABLE | NVME_CC_CSS_NVM; | 1427 | dev->page_size = 1 << page_shift; |
1469 | dev->ctrl_config |= (PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT; | 1428 | |
1429 | dev->ctrl_config = NVME_CC_CSS_NVM; | ||
1430 | dev->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT; | ||
1470 | dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE; | 1431 | dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE; |
1471 | dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES; | 1432 | dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES; |
1472 | 1433 | ||
1473 | writel(aqa, &dev->bar->aqa); | 1434 | writel(aqa, &dev->bar->aqa); |
1474 | writeq(nvmeq->sq_dma_addr, &dev->bar->asq); | 1435 | writeq(nvmeq->sq_dma_addr, &dev->bar->asq); |
1475 | writeq(nvmeq->cq_dma_addr, &dev->bar->acq); | 1436 | writeq(nvmeq->cq_dma_addr, &dev->bar->acq); |
1476 | writel(dev->ctrl_config, &dev->bar->cc); | ||
1477 | 1437 | ||
1478 | result = nvme_enable_ctrl(dev, cap); | 1438 | result = nvme_enable_ctrl(dev, cap); |
1479 | if (result) | 1439 | if (result) |
1480 | return result; | 1440 | goto free_nvmeq; |
1441 | |||
1442 | result = nvme_alloc_admin_tags(dev); | ||
1443 | if (result) | ||
1444 | goto free_nvmeq; | ||
1481 | 1445 | ||
1482 | result = queue_request_irq(dev, nvmeq, nvmeq->irqname); | 1446 | result = queue_request_irq(dev, nvmeq, nvmeq->irqname); |
1483 | if (result) | 1447 | if (result) |
1484 | return result; | 1448 | goto free_tags; |
1485 | 1449 | ||
1486 | spin_lock_irq(&nvmeq->q_lock); | 1450 | return result; |
1487 | nvme_init_queue(nvmeq, 0); | 1451 | |
1488 | spin_unlock_irq(&nvmeq->q_lock); | 1452 | free_tags: |
1453 | nvme_free_admin_tags(dev); | ||
1454 | free_nvmeq: | ||
1455 | nvme_free_queues(dev, 0); | ||
1489 | return result; | 1456 | return result; |
1490 | } | 1457 | } |
1491 | 1458 | ||
@@ -1516,7 +1483,7 @@ struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, | |||
1516 | } | 1483 | } |
1517 | 1484 | ||
1518 | err = -ENOMEM; | 1485 | err = -ENOMEM; |
1519 | iod = nvme_alloc_iod(count, length, GFP_KERNEL); | 1486 | iod = nvme_alloc_iod(count, length, dev, GFP_KERNEL); |
1520 | if (!iod) | 1487 | if (!iod) |
1521 | goto put_pages; | 1488 | goto put_pages; |
1522 | 1489 | ||
@@ -1644,7 +1611,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) | |||
1644 | if (length != (io.nblocks + 1) << ns->lba_shift) | 1611 | if (length != (io.nblocks + 1) << ns->lba_shift) |
1645 | status = -ENOMEM; | 1612 | status = -ENOMEM; |
1646 | else | 1613 | else |
1647 | status = nvme_submit_io_cmd(dev, &c, NULL); | 1614 | status = nvme_submit_io_cmd(dev, ns, &c, NULL); |
1648 | 1615 | ||
1649 | if (meta_len) { | 1616 | if (meta_len) { |
1650 | if (status == NVME_SC_SUCCESS && !(io.opcode & 1)) { | 1617 | if (status == NVME_SC_SUCCESS && !(io.opcode & 1)) { |
@@ -1676,10 +1643,10 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) | |||
1676 | return status; | 1643 | return status; |
1677 | } | 1644 | } |
1678 | 1645 | ||
1679 | static int nvme_user_admin_cmd(struct nvme_dev *dev, | 1646 | static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns, |
1680 | struct nvme_admin_cmd __user *ucmd) | 1647 | struct nvme_passthru_cmd __user *ucmd) |
1681 | { | 1648 | { |
1682 | struct nvme_admin_cmd cmd; | 1649 | struct nvme_passthru_cmd cmd; |
1683 | struct nvme_command c; | 1650 | struct nvme_command c; |
1684 | int status, length; | 1651 | int status, length; |
1685 | struct nvme_iod *uninitialized_var(iod); | 1652 | struct nvme_iod *uninitialized_var(iod); |
@@ -1716,10 +1683,23 @@ static int nvme_user_admin_cmd(struct nvme_dev *dev, | |||
1716 | 1683 | ||
1717 | timeout = cmd.timeout_ms ? msecs_to_jiffies(cmd.timeout_ms) : | 1684 | timeout = cmd.timeout_ms ? msecs_to_jiffies(cmd.timeout_ms) : |
1718 | ADMIN_TIMEOUT; | 1685 | ADMIN_TIMEOUT; |
1686 | |||
1719 | if (length != cmd.data_len) | 1687 | if (length != cmd.data_len) |
1720 | status = -ENOMEM; | 1688 | status = -ENOMEM; |
1721 | else | 1689 | else if (ns) { |
1722 | status = nvme_submit_sync_cmd(dev, 0, &c, &cmd.result, timeout); | 1690 | struct request *req; |
1691 | |||
1692 | req = blk_mq_alloc_request(ns->queue, WRITE, | ||
1693 | (GFP_KERNEL|__GFP_WAIT), false); | ||
1694 | if (IS_ERR(req)) | ||
1695 | status = PTR_ERR(req); | ||
1696 | else { | ||
1697 | status = nvme_submit_sync_cmd(req, &c, &cmd.result, | ||
1698 | timeout); | ||
1699 | blk_mq_free_request(req); | ||
1700 | } | ||
1701 | } else | ||
1702 | status = __nvme_submit_admin_cmd(dev, &c, &cmd.result, timeout); | ||
1723 | 1703 | ||
1724 | if (cmd.data_len) { | 1704 | if (cmd.data_len) { |
1725 | nvme_unmap_user_pages(dev, cmd.opcode & 1, iod); | 1705 | nvme_unmap_user_pages(dev, cmd.opcode & 1, iod); |
@@ -1743,7 +1723,9 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, | |||
1743 | force_successful_syscall_return(); | 1723 | force_successful_syscall_return(); |
1744 | return ns->ns_id; | 1724 | return ns->ns_id; |
1745 | case NVME_IOCTL_ADMIN_CMD: | 1725 | case NVME_IOCTL_ADMIN_CMD: |
1746 | return nvme_user_admin_cmd(ns->dev, (void __user *)arg); | 1726 | return nvme_user_cmd(ns->dev, NULL, (void __user *)arg); |
1727 | case NVME_IOCTL_IO_CMD: | ||
1728 | return nvme_user_cmd(ns->dev, ns, (void __user *)arg); | ||
1747 | case NVME_IOCTL_SUBMIT_IO: | 1729 | case NVME_IOCTL_SUBMIT_IO: |
1748 | return nvme_submit_io(ns, (void __user *)arg); | 1730 | return nvme_submit_io(ns, (void __user *)arg); |
1749 | case SG_GET_VERSION_NUM: | 1731 | case SG_GET_VERSION_NUM: |
@@ -1759,11 +1741,9 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, | |||
1759 | static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode, | 1741 | static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode, |
1760 | unsigned int cmd, unsigned long arg) | 1742 | unsigned int cmd, unsigned long arg) |
1761 | { | 1743 | { |
1762 | struct nvme_ns *ns = bdev->bd_disk->private_data; | ||
1763 | |||
1764 | switch (cmd) { | 1744 | switch (cmd) { |
1765 | case SG_IO: | 1745 | case SG_IO: |
1766 | return nvme_sg_io32(ns, arg); | 1746 | return -ENOIOCTLCMD; |
1767 | } | 1747 | } |
1768 | return nvme_ioctl(bdev, mode, cmd, arg); | 1748 | return nvme_ioctl(bdev, mode, cmd, arg); |
1769 | } | 1749 | } |
@@ -1773,11 +1753,18 @@ static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode, | |||
1773 | 1753 | ||
1774 | static int nvme_open(struct block_device *bdev, fmode_t mode) | 1754 | static int nvme_open(struct block_device *bdev, fmode_t mode) |
1775 | { | 1755 | { |
1776 | struct nvme_ns *ns = bdev->bd_disk->private_data; | 1756 | int ret = 0; |
1777 | struct nvme_dev *dev = ns->dev; | 1757 | struct nvme_ns *ns; |
1778 | 1758 | ||
1779 | kref_get(&dev->kref); | 1759 | spin_lock(&dev_list_lock); |
1780 | return 0; | 1760 | ns = bdev->bd_disk->private_data; |
1761 | if (!ns) | ||
1762 | ret = -ENXIO; | ||
1763 | else if (!kref_get_unless_zero(&ns->dev->kref)) | ||
1764 | ret = -ENXIO; | ||
1765 | spin_unlock(&dev_list_lock); | ||
1766 | |||
1767 | return ret; | ||
1781 | } | 1768 | } |
1782 | 1769 | ||
1783 | static void nvme_free_dev(struct kref *kref); | 1770 | static void nvme_free_dev(struct kref *kref); |
@@ -1799,6 +1786,35 @@ static int nvme_getgeo(struct block_device *bd, struct hd_geometry *geo) | |||
1799 | return 0; | 1786 | return 0; |
1800 | } | 1787 | } |
1801 | 1788 | ||
1789 | static int nvme_revalidate_disk(struct gendisk *disk) | ||
1790 | { | ||
1791 | struct nvme_ns *ns = disk->private_data; | ||
1792 | struct nvme_dev *dev = ns->dev; | ||
1793 | struct nvme_id_ns *id; | ||
1794 | dma_addr_t dma_addr; | ||
1795 | int lbaf; | ||
1796 | |||
1797 | id = dma_alloc_coherent(&dev->pci_dev->dev, 4096, &dma_addr, | ||
1798 | GFP_KERNEL); | ||
1799 | if (!id) { | ||
1800 | dev_warn(&dev->pci_dev->dev, "%s: Memory alocation failure\n", | ||
1801 | __func__); | ||
1802 | return 0; | ||
1803 | } | ||
1804 | |||
1805 | if (nvme_identify(dev, ns->ns_id, 0, dma_addr)) | ||
1806 | goto free; | ||
1807 | |||
1808 | lbaf = id->flbas & 0xf; | ||
1809 | ns->lba_shift = id->lbaf[lbaf].ds; | ||
1810 | |||
1811 | blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); | ||
1812 | set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9)); | ||
1813 | free: | ||
1814 | dma_free_coherent(&dev->pci_dev->dev, 4096, id, dma_addr); | ||
1815 | return 0; | ||
1816 | } | ||
1817 | |||
1802 | static const struct block_device_operations nvme_fops = { | 1818 | static const struct block_device_operations nvme_fops = { |
1803 | .owner = THIS_MODULE, | 1819 | .owner = THIS_MODULE, |
1804 | .ioctl = nvme_ioctl, | 1820 | .ioctl = nvme_ioctl, |
@@ -1806,43 +1822,9 @@ static const struct block_device_operations nvme_fops = { | |||
1806 | .open = nvme_open, | 1822 | .open = nvme_open, |
1807 | .release = nvme_release, | 1823 | .release = nvme_release, |
1808 | .getgeo = nvme_getgeo, | 1824 | .getgeo = nvme_getgeo, |
1825 | .revalidate_disk= nvme_revalidate_disk, | ||
1809 | }; | 1826 | }; |
1810 | 1827 | ||
1811 | static void nvme_resubmit_iods(struct nvme_queue *nvmeq) | ||
1812 | { | ||
1813 | struct nvme_iod *iod, *next; | ||
1814 | |||
1815 | list_for_each_entry_safe(iod, next, &nvmeq->iod_bio, node) { | ||
1816 | if (unlikely(nvme_submit_iod(nvmeq, iod))) | ||
1817 | break; | ||
1818 | list_del(&iod->node); | ||
1819 | if (bio_list_empty(&nvmeq->sq_cong) && | ||
1820 | list_empty(&nvmeq->iod_bio)) | ||
1821 | remove_wait_queue(&nvmeq->sq_full, | ||
1822 | &nvmeq->sq_cong_wait); | ||
1823 | } | ||
1824 | } | ||
1825 | |||
1826 | static void nvme_resubmit_bios(struct nvme_queue *nvmeq) | ||
1827 | { | ||
1828 | while (bio_list_peek(&nvmeq->sq_cong)) { | ||
1829 | struct bio *bio = bio_list_pop(&nvmeq->sq_cong); | ||
1830 | struct nvme_ns *ns = bio->bi_bdev->bd_disk->private_data; | ||
1831 | |||
1832 | if (bio_list_empty(&nvmeq->sq_cong) && | ||
1833 | list_empty(&nvmeq->iod_bio)) | ||
1834 | remove_wait_queue(&nvmeq->sq_full, | ||
1835 | &nvmeq->sq_cong_wait); | ||
1836 | if (nvme_submit_bio_queue(nvmeq, ns, bio)) { | ||
1837 | if (!waitqueue_active(&nvmeq->sq_full)) | ||
1838 | add_wait_queue(&nvmeq->sq_full, | ||
1839 | &nvmeq->sq_cong_wait); | ||
1840 | bio_list_add_head(&nvmeq->sq_cong, bio); | ||
1841 | break; | ||
1842 | } | ||
1843 | } | ||
1844 | } | ||
1845 | |||
1846 | static int nvme_kthread(void *data) | 1828 | static int nvme_kthread(void *data) |
1847 | { | 1829 | { |
1848 | struct nvme_dev *dev, *next; | 1830 | struct nvme_dev *dev, *next; |
@@ -1858,28 +1840,26 @@ static int nvme_kthread(void *data) | |||
1858 | continue; | 1840 | continue; |
1859 | list_del_init(&dev->node); | 1841 | list_del_init(&dev->node); |
1860 | dev_warn(&dev->pci_dev->dev, | 1842 | dev_warn(&dev->pci_dev->dev, |
1861 | "Failed status, reset controller\n"); | 1843 | "Failed status: %x, reset controller\n", |
1844 | readl(&dev->bar->csts)); | ||
1862 | dev->reset_workfn = nvme_reset_failed_dev; | 1845 | dev->reset_workfn = nvme_reset_failed_dev; |
1863 | queue_work(nvme_workq, &dev->reset_work); | 1846 | queue_work(nvme_workq, &dev->reset_work); |
1864 | continue; | 1847 | continue; |
1865 | } | 1848 | } |
1866 | rcu_read_lock(); | ||
1867 | for (i = 0; i < dev->queue_count; i++) { | 1849 | for (i = 0; i < dev->queue_count; i++) { |
1868 | struct nvme_queue *nvmeq = | 1850 | struct nvme_queue *nvmeq = dev->queues[i]; |
1869 | rcu_dereference(dev->queues[i]); | ||
1870 | if (!nvmeq) | 1851 | if (!nvmeq) |
1871 | continue; | 1852 | continue; |
1872 | spin_lock_irq(&nvmeq->q_lock); | 1853 | spin_lock_irq(&nvmeq->q_lock); |
1873 | if (nvmeq->q_suspended) | ||
1874 | goto unlock; | ||
1875 | nvme_process_cq(nvmeq); | 1854 | nvme_process_cq(nvmeq); |
1876 | nvme_cancel_ios(nvmeq, true); | 1855 | |
1877 | nvme_resubmit_bios(nvmeq); | 1856 | while ((i == 0) && (dev->event_limit > 0)) { |
1878 | nvme_resubmit_iods(nvmeq); | 1857 | if (nvme_submit_async_admin_req(dev)) |
1879 | unlock: | 1858 | break; |
1859 | dev->event_limit--; | ||
1860 | } | ||
1880 | spin_unlock_irq(&nvmeq->q_lock); | 1861 | spin_unlock_irq(&nvmeq->q_lock); |
1881 | } | 1862 | } |
1882 | rcu_read_unlock(); | ||
1883 | } | 1863 | } |
1884 | spin_unlock(&dev_list_lock); | 1864 | spin_unlock(&dev_list_lock); |
1885 | schedule_timeout(round_jiffies_relative(HZ)); | 1865 | schedule_timeout(round_jiffies_relative(HZ)); |
@@ -1902,28 +1882,28 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid, | |||
1902 | { | 1882 | { |
1903 | struct nvme_ns *ns; | 1883 | struct nvme_ns *ns; |
1904 | struct gendisk *disk; | 1884 | struct gendisk *disk; |
1885 | int node = dev_to_node(&dev->pci_dev->dev); | ||
1905 | int lbaf; | 1886 | int lbaf; |
1906 | 1887 | ||
1907 | if (rt->attributes & NVME_LBART_ATTRIB_HIDE) | 1888 | if (rt->attributes & NVME_LBART_ATTRIB_HIDE) |
1908 | return NULL; | 1889 | return NULL; |
1909 | 1890 | ||
1910 | ns = kzalloc(sizeof(*ns), GFP_KERNEL); | 1891 | ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node); |
1911 | if (!ns) | 1892 | if (!ns) |
1912 | return NULL; | 1893 | return NULL; |
1913 | ns->queue = blk_alloc_queue(GFP_KERNEL); | 1894 | ns->queue = blk_mq_init_queue(&dev->tagset); |
1914 | if (!ns->queue) | 1895 | if (IS_ERR(ns->queue)) |
1915 | goto out_free_ns; | 1896 | goto out_free_ns; |
1916 | ns->queue->queue_flags = QUEUE_FLAG_DEFAULT; | ||
1917 | queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, ns->queue); | 1897 | queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, ns->queue); |
1918 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue); | 1898 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue); |
1919 | queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, ns->queue); | 1899 | queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, ns->queue); |
1920 | blk_queue_make_request(ns->queue, nvme_make_request); | ||
1921 | ns->dev = dev; | 1900 | ns->dev = dev; |
1922 | ns->queue->queuedata = ns; | 1901 | ns->queue->queuedata = ns; |
1923 | 1902 | ||
1924 | disk = alloc_disk(0); | 1903 | disk = alloc_disk_node(0, node); |
1925 | if (!disk) | 1904 | if (!disk) |
1926 | goto out_free_queue; | 1905 | goto out_free_queue; |
1906 | |||
1927 | ns->ns_id = nsid; | 1907 | ns->ns_id = nsid; |
1928 | ns->disk = disk; | 1908 | ns->disk = disk; |
1929 | lbaf = id->flbas & 0xf; | 1909 | lbaf = id->flbas & 0xf; |
@@ -1932,6 +1912,8 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid, | |||
1932 | blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); | 1912 | blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); |
1933 | if (dev->max_hw_sectors) | 1913 | if (dev->max_hw_sectors) |
1934 | blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors); | 1914 | blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors); |
1915 | if (dev->stripe_size) | ||
1916 | blk_queue_chunk_sectors(ns->queue, dev->stripe_size >> 9); | ||
1935 | if (dev->vwc & NVME_CTRL_VWC_PRESENT) | 1917 | if (dev->vwc & NVME_CTRL_VWC_PRESENT) |
1936 | blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA); | 1918 | blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA); |
1937 | 1919 | ||
@@ -1957,143 +1939,19 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid, | |||
1957 | return NULL; | 1939 | return NULL; |
1958 | } | 1940 | } |
1959 | 1941 | ||
1960 | static int nvme_find_closest_node(int node) | ||
1961 | { | ||
1962 | int n, val, min_val = INT_MAX, best_node = node; | ||
1963 | |||
1964 | for_each_online_node(n) { | ||
1965 | if (n == node) | ||
1966 | continue; | ||
1967 | val = node_distance(node, n); | ||
1968 | if (val < min_val) { | ||
1969 | min_val = val; | ||
1970 | best_node = n; | ||
1971 | } | ||
1972 | } | ||
1973 | return best_node; | ||
1974 | } | ||
1975 | |||
1976 | static void nvme_set_queue_cpus(cpumask_t *qmask, struct nvme_queue *nvmeq, | ||
1977 | int count) | ||
1978 | { | ||
1979 | int cpu; | ||
1980 | for_each_cpu(cpu, qmask) { | ||
1981 | if (cpumask_weight(nvmeq->cpu_mask) >= count) | ||
1982 | break; | ||
1983 | if (!cpumask_test_and_set_cpu(cpu, nvmeq->cpu_mask)) | ||
1984 | *per_cpu_ptr(nvmeq->dev->io_queue, cpu) = nvmeq->qid; | ||
1985 | } | ||
1986 | } | ||
1987 | |||
1988 | static void nvme_add_cpus(cpumask_t *mask, const cpumask_t *unassigned_cpus, | ||
1989 | const cpumask_t *new_mask, struct nvme_queue *nvmeq, int cpus_per_queue) | ||
1990 | { | ||
1991 | int next_cpu; | ||
1992 | for_each_cpu(next_cpu, new_mask) { | ||
1993 | cpumask_or(mask, mask, get_cpu_mask(next_cpu)); | ||
1994 | cpumask_or(mask, mask, topology_thread_cpumask(next_cpu)); | ||
1995 | cpumask_and(mask, mask, unassigned_cpus); | ||
1996 | nvme_set_queue_cpus(mask, nvmeq, cpus_per_queue); | ||
1997 | } | ||
1998 | } | ||
1999 | |||
2000 | static void nvme_create_io_queues(struct nvme_dev *dev) | 1942 | static void nvme_create_io_queues(struct nvme_dev *dev) |
2001 | { | 1943 | { |
2002 | unsigned i, max; | 1944 | unsigned i; |
2003 | 1945 | ||
2004 | max = min(dev->max_qid, num_online_cpus()); | 1946 | for (i = dev->queue_count; i <= dev->max_qid; i++) |
2005 | for (i = dev->queue_count; i <= max; i++) | ||
2006 | if (!nvme_alloc_queue(dev, i, dev->q_depth, i - 1)) | 1947 | if (!nvme_alloc_queue(dev, i, dev->q_depth, i - 1)) |
2007 | break; | 1948 | break; |
2008 | 1949 | ||
2009 | max = min(dev->queue_count - 1, num_online_cpus()); | 1950 | for (i = dev->online_queues; i <= dev->queue_count - 1; i++) |
2010 | for (i = dev->online_queues; i <= max; i++) | 1951 | if (nvme_create_queue(dev->queues[i], i)) |
2011 | if (nvme_create_queue(raw_nvmeq(dev, i), i)) | ||
2012 | break; | 1952 | break; |
2013 | } | 1953 | } |
2014 | 1954 | ||
2015 | /* | ||
2016 | * If there are fewer queues than online cpus, this will try to optimally | ||
2017 | * assign a queue to multiple cpus by grouping cpus that are "close" together: | ||
2018 | * thread siblings, core, socket, closest node, then whatever else is | ||
2019 | * available. | ||
2020 | */ | ||
2021 | static void nvme_assign_io_queues(struct nvme_dev *dev) | ||
2022 | { | ||
2023 | unsigned cpu, cpus_per_queue, queues, remainder, i; | ||
2024 | cpumask_var_t unassigned_cpus; | ||
2025 | |||
2026 | nvme_create_io_queues(dev); | ||
2027 | |||
2028 | queues = min(dev->online_queues - 1, num_online_cpus()); | ||
2029 | if (!queues) | ||
2030 | return; | ||
2031 | |||
2032 | cpus_per_queue = num_online_cpus() / queues; | ||
2033 | remainder = queues - (num_online_cpus() - queues * cpus_per_queue); | ||
2034 | |||
2035 | if (!alloc_cpumask_var(&unassigned_cpus, GFP_KERNEL)) | ||
2036 | return; | ||
2037 | |||
2038 | cpumask_copy(unassigned_cpus, cpu_online_mask); | ||
2039 | cpu = cpumask_first(unassigned_cpus); | ||
2040 | for (i = 1; i <= queues; i++) { | ||
2041 | struct nvme_queue *nvmeq = lock_nvmeq(dev, i); | ||
2042 | cpumask_t mask; | ||
2043 | |||
2044 | cpumask_clear(nvmeq->cpu_mask); | ||
2045 | if (!cpumask_weight(unassigned_cpus)) { | ||
2046 | unlock_nvmeq(nvmeq); | ||
2047 | break; | ||
2048 | } | ||
2049 | |||
2050 | mask = *get_cpu_mask(cpu); | ||
2051 | nvme_set_queue_cpus(&mask, nvmeq, cpus_per_queue); | ||
2052 | if (cpus_weight(mask) < cpus_per_queue) | ||
2053 | nvme_add_cpus(&mask, unassigned_cpus, | ||
2054 | topology_thread_cpumask(cpu), | ||
2055 | nvmeq, cpus_per_queue); | ||
2056 | if (cpus_weight(mask) < cpus_per_queue) | ||
2057 | nvme_add_cpus(&mask, unassigned_cpus, | ||
2058 | topology_core_cpumask(cpu), | ||
2059 | nvmeq, cpus_per_queue); | ||
2060 | if (cpus_weight(mask) < cpus_per_queue) | ||
2061 | nvme_add_cpus(&mask, unassigned_cpus, | ||
2062 | cpumask_of_node(cpu_to_node(cpu)), | ||
2063 | nvmeq, cpus_per_queue); | ||
2064 | if (cpus_weight(mask) < cpus_per_queue) | ||
2065 | nvme_add_cpus(&mask, unassigned_cpus, | ||
2066 | cpumask_of_node( | ||
2067 | nvme_find_closest_node( | ||
2068 | cpu_to_node(cpu))), | ||
2069 | nvmeq, cpus_per_queue); | ||
2070 | if (cpus_weight(mask) < cpus_per_queue) | ||
2071 | nvme_add_cpus(&mask, unassigned_cpus, | ||
2072 | unassigned_cpus, | ||
2073 | nvmeq, cpus_per_queue); | ||
2074 | |||
2075 | WARN(cpumask_weight(nvmeq->cpu_mask) != cpus_per_queue, | ||
2076 | "nvme%d qid:%d mis-matched queue-to-cpu assignment\n", | ||
2077 | dev->instance, i); | ||
2078 | |||
2079 | irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector, | ||
2080 | nvmeq->cpu_mask); | ||
2081 | cpumask_andnot(unassigned_cpus, unassigned_cpus, | ||
2082 | nvmeq->cpu_mask); | ||
2083 | cpu = cpumask_next(cpu, unassigned_cpus); | ||
2084 | if (remainder && !--remainder) | ||
2085 | cpus_per_queue++; | ||
2086 | unlock_nvmeq(nvmeq); | ||
2087 | } | ||
2088 | WARN(cpumask_weight(unassigned_cpus), "nvme%d unassigned online cpus\n", | ||
2089 | dev->instance); | ||
2090 | i = 0; | ||
2091 | cpumask_andnot(unassigned_cpus, cpu_possible_mask, cpu_online_mask); | ||
2092 | for_each_cpu(cpu, unassigned_cpus) | ||
2093 | *per_cpu_ptr(dev->io_queue, cpu) = (i++ % queues) + 1; | ||
2094 | free_cpumask_var(unassigned_cpus); | ||
2095 | } | ||
2096 | |||
2097 | static int set_queue_count(struct nvme_dev *dev, int count) | 1955 | static int set_queue_count(struct nvme_dev *dev, int count) |
2098 | { | 1956 | { |
2099 | int status; | 1957 | int status; |
@@ -2107,7 +1965,7 @@ static int set_queue_count(struct nvme_dev *dev, int count) | |||
2107 | if (status > 0) { | 1965 | if (status > 0) { |
2108 | dev_err(&dev->pci_dev->dev, "Could not set queue count (%d)\n", | 1966 | dev_err(&dev->pci_dev->dev, "Could not set queue count (%d)\n", |
2109 | status); | 1967 | status); |
2110 | return -EBUSY; | 1968 | return 0; |
2111 | } | 1969 | } |
2112 | return min(result & 0xffff, result >> 16) + 1; | 1970 | return min(result & 0xffff, result >> 16) + 1; |
2113 | } | 1971 | } |
@@ -2117,39 +1975,15 @@ static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues) | |||
2117 | return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride); | 1975 | return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride); |
2118 | } | 1976 | } |
2119 | 1977 | ||
2120 | static void nvme_cpu_workfn(struct work_struct *work) | ||
2121 | { | ||
2122 | struct nvme_dev *dev = container_of(work, struct nvme_dev, cpu_work); | ||
2123 | if (dev->initialized) | ||
2124 | nvme_assign_io_queues(dev); | ||
2125 | } | ||
2126 | |||
2127 | static int nvme_cpu_notify(struct notifier_block *self, | ||
2128 | unsigned long action, void *hcpu) | ||
2129 | { | ||
2130 | struct nvme_dev *dev; | ||
2131 | |||
2132 | switch (action) { | ||
2133 | case CPU_ONLINE: | ||
2134 | case CPU_DEAD: | ||
2135 | spin_lock(&dev_list_lock); | ||
2136 | list_for_each_entry(dev, &dev_list, node) | ||
2137 | schedule_work(&dev->cpu_work); | ||
2138 | spin_unlock(&dev_list_lock); | ||
2139 | break; | ||
2140 | } | ||
2141 | return NOTIFY_OK; | ||
2142 | } | ||
2143 | |||
2144 | static int nvme_setup_io_queues(struct nvme_dev *dev) | 1978 | static int nvme_setup_io_queues(struct nvme_dev *dev) |
2145 | { | 1979 | { |
2146 | struct nvme_queue *adminq = raw_nvmeq(dev, 0); | 1980 | struct nvme_queue *adminq = dev->queues[0]; |
2147 | struct pci_dev *pdev = dev->pci_dev; | 1981 | struct pci_dev *pdev = dev->pci_dev; |
2148 | int result, i, vecs, nr_io_queues, size; | 1982 | int result, i, vecs, nr_io_queues, size; |
2149 | 1983 | ||
2150 | nr_io_queues = num_possible_cpus(); | 1984 | nr_io_queues = num_possible_cpus(); |
2151 | result = set_queue_count(dev, nr_io_queues); | 1985 | result = set_queue_count(dev, nr_io_queues); |
2152 | if (result < 0) | 1986 | if (result <= 0) |
2153 | return result; | 1987 | return result; |
2154 | if (result < nr_io_queues) | 1988 | if (result < nr_io_queues) |
2155 | nr_io_queues = result; | 1989 | nr_io_queues = result; |
@@ -2172,6 +2006,13 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) | |||
2172 | /* Deregister the admin queue's interrupt */ | 2006 | /* Deregister the admin queue's interrupt */ |
2173 | free_irq(dev->entry[0].vector, adminq); | 2007 | free_irq(dev->entry[0].vector, adminq); |
2174 | 2008 | ||
2009 | /* | ||
2010 | * If we enable msix early due to not intx, disable it again before | ||
2011 | * setting up the full range we need. | ||
2012 | */ | ||
2013 | if (!pdev->irq) | ||
2014 | pci_disable_msix(pdev); | ||
2015 | |||
2175 | for (i = 0; i < nr_io_queues; i++) | 2016 | for (i = 0; i < nr_io_queues; i++) |
2176 | dev->entry[i].entry = i; | 2017 | dev->entry[i].entry = i; |
2177 | vecs = pci_enable_msix_range(pdev, dev->entry, 1, nr_io_queues); | 2018 | vecs = pci_enable_msix_range(pdev, dev->entry, 1, nr_io_queues); |
@@ -2195,14 +2036,12 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) | |||
2195 | dev->max_qid = nr_io_queues; | 2036 | dev->max_qid = nr_io_queues; |
2196 | 2037 | ||
2197 | result = queue_request_irq(dev, adminq, adminq->irqname); | 2038 | result = queue_request_irq(dev, adminq, adminq->irqname); |
2198 | if (result) { | 2039 | if (result) |
2199 | adminq->q_suspended = 1; | ||
2200 | goto free_queues; | 2040 | goto free_queues; |
2201 | } | ||
2202 | 2041 | ||
2203 | /* Free previously allocated queues that are no longer usable */ | 2042 | /* Free previously allocated queues that are no longer usable */ |
2204 | nvme_free_queues(dev, nr_io_queues + 1); | 2043 | nvme_free_queues(dev, nr_io_queues + 1); |
2205 | nvme_assign_io_queues(dev); | 2044 | nvme_create_io_queues(dev); |
2206 | 2045 | ||
2207 | return 0; | 2046 | return 0; |
2208 | 2047 | ||
@@ -2245,14 +2084,37 @@ static int nvme_dev_add(struct nvme_dev *dev) | |||
2245 | dev->oncs = le16_to_cpup(&ctrl->oncs); | 2084 | dev->oncs = le16_to_cpup(&ctrl->oncs); |
2246 | dev->abort_limit = ctrl->acl + 1; | 2085 | dev->abort_limit = ctrl->acl + 1; |
2247 | dev->vwc = ctrl->vwc; | 2086 | dev->vwc = ctrl->vwc; |
2087 | dev->event_limit = min(ctrl->aerl + 1, 8); | ||
2248 | memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn)); | 2088 | memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn)); |
2249 | memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn)); | 2089 | memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn)); |
2250 | memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr)); | 2090 | memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr)); |
2251 | if (ctrl->mdts) | 2091 | if (ctrl->mdts) |
2252 | dev->max_hw_sectors = 1 << (ctrl->mdts + shift - 9); | 2092 | dev->max_hw_sectors = 1 << (ctrl->mdts + shift - 9); |
2253 | if ((pdev->vendor == PCI_VENDOR_ID_INTEL) && | 2093 | if ((pdev->vendor == PCI_VENDOR_ID_INTEL) && |
2254 | (pdev->device == 0x0953) && ctrl->vs[3]) | 2094 | (pdev->device == 0x0953) && ctrl->vs[3]) { |
2095 | unsigned int max_hw_sectors; | ||
2096 | |||
2255 | dev->stripe_size = 1 << (ctrl->vs[3] + shift); | 2097 | dev->stripe_size = 1 << (ctrl->vs[3] + shift); |
2098 | max_hw_sectors = dev->stripe_size >> (shift - 9); | ||
2099 | if (dev->max_hw_sectors) { | ||
2100 | dev->max_hw_sectors = min(max_hw_sectors, | ||
2101 | dev->max_hw_sectors); | ||
2102 | } else | ||
2103 | dev->max_hw_sectors = max_hw_sectors; | ||
2104 | } | ||
2105 | |||
2106 | dev->tagset.ops = &nvme_mq_ops; | ||
2107 | dev->tagset.nr_hw_queues = dev->online_queues - 1; | ||
2108 | dev->tagset.timeout = NVME_IO_TIMEOUT; | ||
2109 | dev->tagset.numa_node = dev_to_node(&dev->pci_dev->dev); | ||
2110 | dev->tagset.queue_depth = | ||
2111 | min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1; | ||
2112 | dev->tagset.cmd_size = sizeof(struct nvme_cmd_info); | ||
2113 | dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE; | ||
2114 | dev->tagset.driver_data = dev; | ||
2115 | |||
2116 | if (blk_mq_alloc_tag_set(&dev->tagset)) | ||
2117 | goto out; | ||
2256 | 2118 | ||
2257 | id_ns = mem; | 2119 | id_ns = mem; |
2258 | for (i = 1; i <= nn; i++) { | 2120 | for (i = 1; i <= nn; i++) { |
@@ -2293,6 +2155,9 @@ static int nvme_dev_map(struct nvme_dev *dev) | |||
2293 | dev->entry[0].vector = pdev->irq; | 2155 | dev->entry[0].vector = pdev->irq; |
2294 | pci_set_master(pdev); | 2156 | pci_set_master(pdev); |
2295 | bars = pci_select_bars(pdev, IORESOURCE_MEM); | 2157 | bars = pci_select_bars(pdev, IORESOURCE_MEM); |
2158 | if (!bars) | ||
2159 | goto disable_pci; | ||
2160 | |||
2296 | if (pci_request_selected_regions(pdev, bars, "nvme")) | 2161 | if (pci_request_selected_regions(pdev, bars, "nvme")) |
2297 | goto disable_pci; | 2162 | goto disable_pci; |
2298 | 2163 | ||
@@ -2303,10 +2168,22 @@ static int nvme_dev_map(struct nvme_dev *dev) | |||
2303 | dev->bar = ioremap(pci_resource_start(pdev, 0), 8192); | 2168 | dev->bar = ioremap(pci_resource_start(pdev, 0), 8192); |
2304 | if (!dev->bar) | 2169 | if (!dev->bar) |
2305 | goto disable; | 2170 | goto disable; |
2171 | |||
2306 | if (readl(&dev->bar->csts) == -1) { | 2172 | if (readl(&dev->bar->csts) == -1) { |
2307 | result = -ENODEV; | 2173 | result = -ENODEV; |
2308 | goto unmap; | 2174 | goto unmap; |
2309 | } | 2175 | } |
2176 | |||
2177 | /* | ||
2178 | * Some devices don't advertse INTx interrupts, pre-enable a single | ||
2179 | * MSIX vec for setup. We'll adjust this later. | ||
2180 | */ | ||
2181 | if (!pdev->irq) { | ||
2182 | result = pci_enable_msix(pdev, dev->entry, 1); | ||
2183 | if (result < 0) | ||
2184 | goto unmap; | ||
2185 | } | ||
2186 | |||
2310 | cap = readq(&dev->bar->cap); | 2187 | cap = readq(&dev->bar->cap); |
2311 | dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH); | 2188 | dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH); |
2312 | dev->db_stride = 1 << NVME_CAP_STRIDE(cap); | 2189 | dev->db_stride = 1 << NVME_CAP_STRIDE(cap); |
@@ -2402,7 +2279,8 @@ static int adapter_async_del_queue(struct nvme_queue *nvmeq, u8 opcode, | |||
2402 | c.delete_queue.qid = cpu_to_le16(nvmeq->qid); | 2279 | c.delete_queue.qid = cpu_to_le16(nvmeq->qid); |
2403 | 2280 | ||
2404 | init_kthread_work(&nvmeq->cmdinfo.work, fn); | 2281 | init_kthread_work(&nvmeq->cmdinfo.work, fn); |
2405 | return nvme_submit_admin_cmd_async(nvmeq->dev, &c, &nvmeq->cmdinfo); | 2282 | return nvme_submit_admin_async_cmd(nvmeq->dev, &c, &nvmeq->cmdinfo, |
2283 | ADMIN_TIMEOUT); | ||
2406 | } | 2284 | } |
2407 | 2285 | ||
2408 | static void nvme_del_cq_work_handler(struct kthread_work *work) | 2286 | static void nvme_del_cq_work_handler(struct kthread_work *work) |
@@ -2465,7 +2343,7 @@ static void nvme_disable_io_queues(struct nvme_dev *dev) | |||
2465 | atomic_set(&dq.refcount, 0); | 2343 | atomic_set(&dq.refcount, 0); |
2466 | dq.worker = &worker; | 2344 | dq.worker = &worker; |
2467 | for (i = dev->queue_count - 1; i > 0; i--) { | 2345 | for (i = dev->queue_count - 1; i > 0; i--) { |
2468 | struct nvme_queue *nvmeq = raw_nvmeq(dev, i); | 2346 | struct nvme_queue *nvmeq = dev->queues[i]; |
2469 | 2347 | ||
2470 | if (nvme_suspend_queue(nvmeq)) | 2348 | if (nvme_suspend_queue(nvmeq)) |
2471 | continue; | 2349 | continue; |
@@ -2501,13 +2379,16 @@ static void nvme_dev_list_remove(struct nvme_dev *dev) | |||
2501 | static void nvme_dev_shutdown(struct nvme_dev *dev) | 2379 | static void nvme_dev_shutdown(struct nvme_dev *dev) |
2502 | { | 2380 | { |
2503 | int i; | 2381 | int i; |
2382 | u32 csts = -1; | ||
2504 | 2383 | ||
2505 | dev->initialized = 0; | 2384 | dev->initialized = 0; |
2506 | nvme_dev_list_remove(dev); | 2385 | nvme_dev_list_remove(dev); |
2507 | 2386 | ||
2508 | if (!dev->bar || (dev->bar && readl(&dev->bar->csts) == -1)) { | 2387 | if (dev->bar) |
2388 | csts = readl(&dev->bar->csts); | ||
2389 | if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) { | ||
2509 | for (i = dev->queue_count - 1; i >= 0; i--) { | 2390 | for (i = dev->queue_count - 1; i >= 0; i--) { |
2510 | struct nvme_queue *nvmeq = raw_nvmeq(dev, i); | 2391 | struct nvme_queue *nvmeq = dev->queues[i]; |
2511 | nvme_suspend_queue(nvmeq); | 2392 | nvme_suspend_queue(nvmeq); |
2512 | nvme_clear_queue(nvmeq); | 2393 | nvme_clear_queue(nvmeq); |
2513 | } | 2394 | } |
@@ -2519,6 +2400,12 @@ static void nvme_dev_shutdown(struct nvme_dev *dev) | |||
2519 | nvme_dev_unmap(dev); | 2400 | nvme_dev_unmap(dev); |
2520 | } | 2401 | } |
2521 | 2402 | ||
2403 | static void nvme_dev_remove_admin(struct nvme_dev *dev) | ||
2404 | { | ||
2405 | if (dev->admin_q && !blk_queue_dying(dev->admin_q)) | ||
2406 | blk_cleanup_queue(dev->admin_q); | ||
2407 | } | ||
2408 | |||
2522 | static void nvme_dev_remove(struct nvme_dev *dev) | 2409 | static void nvme_dev_remove(struct nvme_dev *dev) |
2523 | { | 2410 | { |
2524 | struct nvme_ns *ns; | 2411 | struct nvme_ns *ns; |
@@ -2590,6 +2477,11 @@ static void nvme_free_namespaces(struct nvme_dev *dev) | |||
2590 | 2477 | ||
2591 | list_for_each_entry_safe(ns, next, &dev->namespaces, list) { | 2478 | list_for_each_entry_safe(ns, next, &dev->namespaces, list) { |
2592 | list_del(&ns->list); | 2479 | list_del(&ns->list); |
2480 | |||
2481 | spin_lock(&dev_list_lock); | ||
2482 | ns->disk->private_data = NULL; | ||
2483 | spin_unlock(&dev_list_lock); | ||
2484 | |||
2593 | put_disk(ns->disk); | 2485 | put_disk(ns->disk); |
2594 | kfree(ns); | 2486 | kfree(ns); |
2595 | } | 2487 | } |
@@ -2599,8 +2491,10 @@ static void nvme_free_dev(struct kref *kref) | |||
2599 | { | 2491 | { |
2600 | struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref); | 2492 | struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref); |
2601 | 2493 | ||
2494 | pci_dev_put(dev->pci_dev); | ||
2602 | nvme_free_namespaces(dev); | 2495 | nvme_free_namespaces(dev); |
2603 | free_percpu(dev->io_queue); | 2496 | nvme_release_instance(dev); |
2497 | blk_mq_free_tag_set(&dev->tagset); | ||
2604 | kfree(dev->queues); | 2498 | kfree(dev->queues); |
2605 | kfree(dev->entry); | 2499 | kfree(dev->entry); |
2606 | kfree(dev); | 2500 | kfree(dev); |
@@ -2625,9 +2519,16 @@ static int nvme_dev_release(struct inode *inode, struct file *f) | |||
2625 | static long nvme_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg) | 2519 | static long nvme_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg) |
2626 | { | 2520 | { |
2627 | struct nvme_dev *dev = f->private_data; | 2521 | struct nvme_dev *dev = f->private_data; |
2522 | struct nvme_ns *ns; | ||
2523 | |||
2628 | switch (cmd) { | 2524 | switch (cmd) { |
2629 | case NVME_IOCTL_ADMIN_CMD: | 2525 | case NVME_IOCTL_ADMIN_CMD: |
2630 | return nvme_user_admin_cmd(dev, (void __user *)arg); | 2526 | return nvme_user_cmd(dev, NULL, (void __user *)arg); |
2527 | case NVME_IOCTL_IO_CMD: | ||
2528 | if (list_empty(&dev->namespaces)) | ||
2529 | return -ENOTTY; | ||
2530 | ns = list_first_entry(&dev->namespaces, struct nvme_ns, list); | ||
2531 | return nvme_user_cmd(dev, ns, (void __user *)arg); | ||
2631 | default: | 2532 | default: |
2632 | return -ENOTTY; | 2533 | return -ENOTTY; |
2633 | } | 2534 | } |
@@ -2641,6 +2542,22 @@ static const struct file_operations nvme_dev_fops = { | |||
2641 | .compat_ioctl = nvme_dev_ioctl, | 2542 | .compat_ioctl = nvme_dev_ioctl, |
2642 | }; | 2543 | }; |
2643 | 2544 | ||
2545 | static void nvme_set_irq_hints(struct nvme_dev *dev) | ||
2546 | { | ||
2547 | struct nvme_queue *nvmeq; | ||
2548 | int i; | ||
2549 | |||
2550 | for (i = 0; i < dev->online_queues; i++) { | ||
2551 | nvmeq = dev->queues[i]; | ||
2552 | |||
2553 | if (!nvmeq->hctx) | ||
2554 | continue; | ||
2555 | |||
2556 | irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector, | ||
2557 | nvmeq->hctx->cpumask); | ||
2558 | } | ||
2559 | } | ||
2560 | |||
2644 | static int nvme_dev_start(struct nvme_dev *dev) | 2561 | static int nvme_dev_start(struct nvme_dev *dev) |
2645 | { | 2562 | { |
2646 | int result; | 2563 | int result; |
@@ -2664,7 +2581,7 @@ static int nvme_dev_start(struct nvme_dev *dev) | |||
2664 | 2581 | ||
2665 | if (start_thread) { | 2582 | if (start_thread) { |
2666 | nvme_thread = kthread_run(nvme_kthread, NULL, "nvme"); | 2583 | nvme_thread = kthread_run(nvme_kthread, NULL, "nvme"); |
2667 | wake_up(&nvme_kthread_wait); | 2584 | wake_up_all(&nvme_kthread_wait); |
2668 | } else | 2585 | } else |
2669 | wait_event_killable(nvme_kthread_wait, nvme_thread); | 2586 | wait_event_killable(nvme_kthread_wait, nvme_thread); |
2670 | 2587 | ||
@@ -2673,10 +2590,14 @@ static int nvme_dev_start(struct nvme_dev *dev) | |||
2673 | goto disable; | 2590 | goto disable; |
2674 | } | 2591 | } |
2675 | 2592 | ||
2593 | nvme_init_queue(dev->queues[0], 0); | ||
2594 | |||
2676 | result = nvme_setup_io_queues(dev); | 2595 | result = nvme_setup_io_queues(dev); |
2677 | if (result && result != -EBUSY) | 2596 | if (result) |
2678 | goto disable; | 2597 | goto disable; |
2679 | 2598 | ||
2599 | nvme_set_irq_hints(dev); | ||
2600 | |||
2680 | return result; | 2601 | return result; |
2681 | 2602 | ||
2682 | disable: | 2603 | disable: |
@@ -2693,7 +2614,7 @@ static int nvme_remove_dead_ctrl(void *arg) | |||
2693 | struct pci_dev *pdev = dev->pci_dev; | 2614 | struct pci_dev *pdev = dev->pci_dev; |
2694 | 2615 | ||
2695 | if (pci_get_drvdata(pdev)) | 2616 | if (pci_get_drvdata(pdev)) |
2696 | pci_stop_and_remove_bus_device(pdev); | 2617 | pci_stop_and_remove_bus_device_locked(pdev); |
2697 | kref_put(&dev->kref, nvme_free_dev); | 2618 | kref_put(&dev->kref, nvme_free_dev); |
2698 | return 0; | 2619 | return 0; |
2699 | } | 2620 | } |
@@ -2702,8 +2623,8 @@ static void nvme_remove_disks(struct work_struct *ws) | |||
2702 | { | 2623 | { |
2703 | struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work); | 2624 | struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work); |
2704 | 2625 | ||
2705 | nvme_dev_remove(dev); | ||
2706 | nvme_free_queues(dev, 1); | 2626 | nvme_free_queues(dev, 1); |
2627 | nvme_dev_remove(dev); | ||
2707 | } | 2628 | } |
2708 | 2629 | ||
2709 | static int nvme_dev_resume(struct nvme_dev *dev) | 2630 | static int nvme_dev_resume(struct nvme_dev *dev) |
@@ -2711,9 +2632,9 @@ static int nvme_dev_resume(struct nvme_dev *dev) | |||
2711 | int ret; | 2632 | int ret; |
2712 | 2633 | ||
2713 | ret = nvme_dev_start(dev); | 2634 | ret = nvme_dev_start(dev); |
2714 | if (ret && ret != -EBUSY) | 2635 | if (ret) |
2715 | return ret; | 2636 | return ret; |
2716 | if (ret == -EBUSY) { | 2637 | if (dev->online_queues < 2) { |
2717 | spin_lock(&dev_list_lock); | 2638 | spin_lock(&dev_list_lock); |
2718 | dev->reset_workfn = nvme_remove_disks; | 2639 | dev->reset_workfn = nvme_remove_disks; |
2719 | queue_work(nvme_workq, &dev->reset_work); | 2640 | queue_work(nvme_workq, &dev->reset_work); |
@@ -2727,7 +2648,7 @@ static void nvme_dev_reset(struct nvme_dev *dev) | |||
2727 | { | 2648 | { |
2728 | nvme_dev_shutdown(dev); | 2649 | nvme_dev_shutdown(dev); |
2729 | if (nvme_dev_resume(dev)) { | 2650 | if (nvme_dev_resume(dev)) { |
2730 | dev_err(&dev->pci_dev->dev, "Device failed to resume\n"); | 2651 | dev_warn(&dev->pci_dev->dev, "Device failed to resume\n"); |
2731 | kref_get(&dev->kref); | 2652 | kref_get(&dev->kref); |
2732 | if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d", | 2653 | if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d", |
2733 | dev->instance))) { | 2654 | dev->instance))) { |
@@ -2752,33 +2673,33 @@ static void nvme_reset_workfn(struct work_struct *work) | |||
2752 | 2673 | ||
2753 | static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) | 2674 | static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) |
2754 | { | 2675 | { |
2755 | int result = -ENOMEM; | 2676 | int node, result = -ENOMEM; |
2756 | struct nvme_dev *dev; | 2677 | struct nvme_dev *dev; |
2757 | 2678 | ||
2758 | dev = kzalloc(sizeof(*dev), GFP_KERNEL); | 2679 | node = dev_to_node(&pdev->dev); |
2680 | if (node == NUMA_NO_NODE) | ||
2681 | set_dev_node(&pdev->dev, 0); | ||
2682 | |||
2683 | dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node); | ||
2759 | if (!dev) | 2684 | if (!dev) |
2760 | return -ENOMEM; | 2685 | return -ENOMEM; |
2761 | dev->entry = kcalloc(num_possible_cpus(), sizeof(*dev->entry), | 2686 | dev->entry = kzalloc_node(num_possible_cpus() * sizeof(*dev->entry), |
2762 | GFP_KERNEL); | 2687 | GFP_KERNEL, node); |
2763 | if (!dev->entry) | 2688 | if (!dev->entry) |
2764 | goto free; | 2689 | goto free; |
2765 | dev->queues = kcalloc(num_possible_cpus() + 1, sizeof(void *), | 2690 | dev->queues = kzalloc_node((num_possible_cpus() + 1) * sizeof(void *), |
2766 | GFP_KERNEL); | 2691 | GFP_KERNEL, node); |
2767 | if (!dev->queues) | 2692 | if (!dev->queues) |
2768 | goto free; | 2693 | goto free; |
2769 | dev->io_queue = alloc_percpu(unsigned short); | ||
2770 | if (!dev->io_queue) | ||
2771 | goto free; | ||
2772 | 2694 | ||
2773 | INIT_LIST_HEAD(&dev->namespaces); | 2695 | INIT_LIST_HEAD(&dev->namespaces); |
2774 | dev->reset_workfn = nvme_reset_failed_dev; | 2696 | dev->reset_workfn = nvme_reset_failed_dev; |
2775 | INIT_WORK(&dev->reset_work, nvme_reset_workfn); | 2697 | INIT_WORK(&dev->reset_work, nvme_reset_workfn); |
2776 | INIT_WORK(&dev->cpu_work, nvme_cpu_workfn); | 2698 | dev->pci_dev = pci_dev_get(pdev); |
2777 | dev->pci_dev = pdev; | ||
2778 | pci_set_drvdata(pdev, dev); | 2699 | pci_set_drvdata(pdev, dev); |
2779 | result = nvme_set_instance(dev); | 2700 | result = nvme_set_instance(dev); |
2780 | if (result) | 2701 | if (result) |
2781 | goto free; | 2702 | goto put_pci; |
2782 | 2703 | ||
2783 | result = nvme_setup_prp_pools(dev); | 2704 | result = nvme_setup_prp_pools(dev); |
2784 | if (result) | 2705 | if (result) |
@@ -2786,17 +2707,14 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) | |||
2786 | 2707 | ||
2787 | kref_init(&dev->kref); | 2708 | kref_init(&dev->kref); |
2788 | result = nvme_dev_start(dev); | 2709 | result = nvme_dev_start(dev); |
2789 | if (result) { | 2710 | if (result) |
2790 | if (result == -EBUSY) | ||
2791 | goto create_cdev; | ||
2792 | goto release_pools; | 2711 | goto release_pools; |
2793 | } | ||
2794 | 2712 | ||
2795 | result = nvme_dev_add(dev); | 2713 | if (dev->online_queues > 1) |
2714 | result = nvme_dev_add(dev); | ||
2796 | if (result) | 2715 | if (result) |
2797 | goto shutdown; | 2716 | goto shutdown; |
2798 | 2717 | ||
2799 | create_cdev: | ||
2800 | scnprintf(dev->name, sizeof(dev->name), "nvme%d", dev->instance); | 2718 | scnprintf(dev->name, sizeof(dev->name), "nvme%d", dev->instance); |
2801 | dev->miscdev.minor = MISC_DYNAMIC_MINOR; | 2719 | dev->miscdev.minor = MISC_DYNAMIC_MINOR; |
2802 | dev->miscdev.parent = &pdev->dev; | 2720 | dev->miscdev.parent = &pdev->dev; |
@@ -2806,11 +2724,14 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) | |||
2806 | if (result) | 2724 | if (result) |
2807 | goto remove; | 2725 | goto remove; |
2808 | 2726 | ||
2727 | nvme_set_irq_hints(dev); | ||
2728 | |||
2809 | dev->initialized = 1; | 2729 | dev->initialized = 1; |
2810 | return 0; | 2730 | return 0; |
2811 | 2731 | ||
2812 | remove: | 2732 | remove: |
2813 | nvme_dev_remove(dev); | 2733 | nvme_dev_remove(dev); |
2734 | nvme_dev_remove_admin(dev); | ||
2814 | nvme_free_namespaces(dev); | 2735 | nvme_free_namespaces(dev); |
2815 | shutdown: | 2736 | shutdown: |
2816 | nvme_dev_shutdown(dev); | 2737 | nvme_dev_shutdown(dev); |
@@ -2819,8 +2740,9 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) | |||
2819 | nvme_release_prp_pools(dev); | 2740 | nvme_release_prp_pools(dev); |
2820 | release: | 2741 | release: |
2821 | nvme_release_instance(dev); | 2742 | nvme_release_instance(dev); |
2743 | put_pci: | ||
2744 | pci_dev_put(dev->pci_dev); | ||
2822 | free: | 2745 | free: |
2823 | free_percpu(dev->io_queue); | ||
2824 | kfree(dev->queues); | 2746 | kfree(dev->queues); |
2825 | kfree(dev->entry); | 2747 | kfree(dev->entry); |
2826 | kfree(dev); | 2748 | kfree(dev); |
@@ -2829,12 +2751,12 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) | |||
2829 | 2751 | ||
2830 | static void nvme_reset_notify(struct pci_dev *pdev, bool prepare) | 2752 | static void nvme_reset_notify(struct pci_dev *pdev, bool prepare) |
2831 | { | 2753 | { |
2832 | struct nvme_dev *dev = pci_get_drvdata(pdev); | 2754 | struct nvme_dev *dev = pci_get_drvdata(pdev); |
2833 | 2755 | ||
2834 | if (prepare) | 2756 | if (prepare) |
2835 | nvme_dev_shutdown(dev); | 2757 | nvme_dev_shutdown(dev); |
2836 | else | 2758 | else |
2837 | nvme_dev_resume(dev); | 2759 | nvme_dev_resume(dev); |
2838 | } | 2760 | } |
2839 | 2761 | ||
2840 | static void nvme_shutdown(struct pci_dev *pdev) | 2762 | static void nvme_shutdown(struct pci_dev *pdev) |
@@ -2853,13 +2775,12 @@ static void nvme_remove(struct pci_dev *pdev) | |||
2853 | 2775 | ||
2854 | pci_set_drvdata(pdev, NULL); | 2776 | pci_set_drvdata(pdev, NULL); |
2855 | flush_work(&dev->reset_work); | 2777 | flush_work(&dev->reset_work); |
2856 | flush_work(&dev->cpu_work); | ||
2857 | misc_deregister(&dev->miscdev); | 2778 | misc_deregister(&dev->miscdev); |
2858 | nvme_dev_remove(dev); | 2779 | nvme_dev_remove(dev); |
2859 | nvme_dev_shutdown(dev); | 2780 | nvme_dev_shutdown(dev); |
2781 | nvme_dev_remove_admin(dev); | ||
2860 | nvme_free_queues(dev, 0); | 2782 | nvme_free_queues(dev, 0); |
2861 | rcu_barrier(); | 2783 | nvme_free_admin_tags(dev); |
2862 | nvme_release_instance(dev); | ||
2863 | nvme_release_prp_pools(dev); | 2784 | nvme_release_prp_pools(dev); |
2864 | kref_put(&dev->kref, nvme_free_dev); | 2785 | kref_put(&dev->kref, nvme_free_dev); |
2865 | } | 2786 | } |
@@ -2942,18 +2863,11 @@ static int __init nvme_init(void) | |||
2942 | else if (result > 0) | 2863 | else if (result > 0) |
2943 | nvme_major = result; | 2864 | nvme_major = result; |
2944 | 2865 | ||
2945 | nvme_nb.notifier_call = &nvme_cpu_notify; | ||
2946 | result = register_hotcpu_notifier(&nvme_nb); | ||
2947 | if (result) | ||
2948 | goto unregister_blkdev; | ||
2949 | |||
2950 | result = pci_register_driver(&nvme_driver); | 2866 | result = pci_register_driver(&nvme_driver); |
2951 | if (result) | 2867 | if (result) |
2952 | goto unregister_hotcpu; | 2868 | goto unregister_blkdev; |
2953 | return 0; | 2869 | return 0; |
2954 | 2870 | ||
2955 | unregister_hotcpu: | ||
2956 | unregister_hotcpu_notifier(&nvme_nb); | ||
2957 | unregister_blkdev: | 2871 | unregister_blkdev: |
2958 | unregister_blkdev(nvme_major, "nvme"); | 2872 | unregister_blkdev(nvme_major, "nvme"); |
2959 | kill_workq: | 2873 | kill_workq: |
@@ -2973,6 +2887,6 @@ static void __exit nvme_exit(void) | |||
2973 | 2887 | ||
2974 | MODULE_AUTHOR("Matthew Wilcox <willy@linux.intel.com>"); | 2888 | MODULE_AUTHOR("Matthew Wilcox <willy@linux.intel.com>"); |
2975 | MODULE_LICENSE("GPL"); | 2889 | MODULE_LICENSE("GPL"); |
2976 | MODULE_VERSION("0.9"); | 2890 | MODULE_VERSION("1.0"); |
2977 | module_init(nvme_init); | 2891 | module_init(nvme_init); |
2978 | module_exit(nvme_exit); | 2892 | module_exit(nvme_exit); |
diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c index 0b4b2775600e..5e78568026c3 100644 --- a/drivers/block/nvme-scsi.c +++ b/drivers/block/nvme-scsi.c | |||
@@ -2105,7 +2105,7 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, | |||
2105 | 2105 | ||
2106 | nvme_offset += unit_num_blocks; | 2106 | nvme_offset += unit_num_blocks; |
2107 | 2107 | ||
2108 | nvme_sc = nvme_submit_io_cmd(dev, &c, NULL); | 2108 | nvme_sc = nvme_submit_io_cmd(dev, ns, &c, NULL); |
2109 | if (nvme_sc != NVME_SC_SUCCESS) { | 2109 | if (nvme_sc != NVME_SC_SUCCESS) { |
2110 | nvme_unmap_user_pages(dev, | 2110 | nvme_unmap_user_pages(dev, |
2111 | (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, | 2111 | (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, |
@@ -2658,7 +2658,7 @@ static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr, | |||
2658 | c.common.opcode = nvme_cmd_flush; | 2658 | c.common.opcode = nvme_cmd_flush; |
2659 | c.common.nsid = cpu_to_le32(ns->ns_id); | 2659 | c.common.nsid = cpu_to_le32(ns->ns_id); |
2660 | 2660 | ||
2661 | nvme_sc = nvme_submit_io_cmd(ns->dev, &c, NULL); | 2661 | nvme_sc = nvme_submit_io_cmd(ns->dev, ns, &c, NULL); |
2662 | res = nvme_trans_status_code(hdr, nvme_sc); | 2662 | res = nvme_trans_status_code(hdr, nvme_sc); |
2663 | if (res) | 2663 | if (res) |
2664 | goto out; | 2664 | goto out; |
@@ -2686,7 +2686,7 @@ static int nvme_trans_synchronize_cache(struct nvme_ns *ns, | |||
2686 | c.common.opcode = nvme_cmd_flush; | 2686 | c.common.opcode = nvme_cmd_flush; |
2687 | c.common.nsid = cpu_to_le32(ns->ns_id); | 2687 | c.common.nsid = cpu_to_le32(ns->ns_id); |
2688 | 2688 | ||
2689 | nvme_sc = nvme_submit_io_cmd(ns->dev, &c, NULL); | 2689 | nvme_sc = nvme_submit_io_cmd(ns->dev, ns, &c, NULL); |
2690 | 2690 | ||
2691 | res = nvme_trans_status_code(hdr, nvme_sc); | 2691 | res = nvme_trans_status_code(hdr, nvme_sc); |
2692 | if (res) | 2692 | if (res) |
@@ -2894,7 +2894,7 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr, | |||
2894 | c.dsm.nr = cpu_to_le32(ndesc - 1); | 2894 | c.dsm.nr = cpu_to_le32(ndesc - 1); |
2895 | c.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); | 2895 | c.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); |
2896 | 2896 | ||
2897 | nvme_sc = nvme_submit_io_cmd(dev, &c, NULL); | 2897 | nvme_sc = nvme_submit_io_cmd(dev, ns, &c, NULL); |
2898 | res = nvme_trans_status_code(hdr, nvme_sc); | 2898 | res = nvme_trans_status_code(hdr, nvme_sc); |
2899 | 2899 | ||
2900 | dma_free_coherent(&dev->pci_dev->dev, ndesc * sizeof(*range), | 2900 | dma_free_coherent(&dev->pci_dev->dev, ndesc * sizeof(*range), |
@@ -2915,6 +2915,14 @@ static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr) | |||
2915 | if (copy_from_user(cmd, hdr->cmdp, hdr->cmd_len)) | 2915 | if (copy_from_user(cmd, hdr->cmdp, hdr->cmd_len)) |
2916 | return -EFAULT; | 2916 | return -EFAULT; |
2917 | 2917 | ||
2918 | /* | ||
2919 | * Prime the hdr with good status for scsi commands that don't require | ||
2920 | * an nvme command for translation. | ||
2921 | */ | ||
2922 | retcode = nvme_trans_status_code(hdr, NVME_SC_SUCCESS); | ||
2923 | if (retcode) | ||
2924 | return retcode; | ||
2925 | |||
2918 | opcode = cmd[0]; | 2926 | opcode = cmd[0]; |
2919 | 2927 | ||
2920 | switch (opcode) { | 2928 | switch (opcode) { |
@@ -3016,152 +3024,6 @@ int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr) | |||
3016 | return retcode; | 3024 | return retcode; |
3017 | } | 3025 | } |
3018 | 3026 | ||
3019 | #ifdef CONFIG_COMPAT | ||
3020 | typedef struct sg_io_hdr32 { | ||
3021 | compat_int_t interface_id; /* [i] 'S' for SCSI generic (required) */ | ||
3022 | compat_int_t dxfer_direction; /* [i] data transfer direction */ | ||
3023 | unsigned char cmd_len; /* [i] SCSI command length ( <= 16 bytes) */ | ||
3024 | unsigned char mx_sb_len; /* [i] max length to write to sbp */ | ||
3025 | unsigned short iovec_count; /* [i] 0 implies no scatter gather */ | ||
3026 | compat_uint_t dxfer_len; /* [i] byte count of data transfer */ | ||
3027 | compat_uint_t dxferp; /* [i], [*io] points to data transfer memory | ||
3028 | or scatter gather list */ | ||
3029 | compat_uptr_t cmdp; /* [i], [*i] points to command to perform */ | ||
3030 | compat_uptr_t sbp; /* [i], [*o] points to sense_buffer memory */ | ||
3031 | compat_uint_t timeout; /* [i] MAX_UINT->no timeout (unit: millisec) */ | ||
3032 | compat_uint_t flags; /* [i] 0 -> default, see SG_FLAG... */ | ||
3033 | compat_int_t pack_id; /* [i->o] unused internally (normally) */ | ||
3034 | compat_uptr_t usr_ptr; /* [i->o] unused internally */ | ||
3035 | unsigned char status; /* [o] scsi status */ | ||
3036 | unsigned char masked_status; /* [o] shifted, masked scsi status */ | ||
3037 | unsigned char msg_status; /* [o] messaging level data (optional) */ | ||
3038 | unsigned char sb_len_wr; /* [o] byte count actually written to sbp */ | ||
3039 | unsigned short host_status; /* [o] errors from host adapter */ | ||
3040 | unsigned short driver_status; /* [o] errors from software driver */ | ||
3041 | compat_int_t resid; /* [o] dxfer_len - actual_transferred */ | ||
3042 | compat_uint_t duration; /* [o] time taken by cmd (unit: millisec) */ | ||
3043 | compat_uint_t info; /* [o] auxiliary information */ | ||
3044 | } sg_io_hdr32_t; /* 64 bytes long (on sparc32) */ | ||
3045 | |||
3046 | typedef struct sg_iovec32 { | ||
3047 | compat_uint_t iov_base; | ||
3048 | compat_uint_t iov_len; | ||
3049 | } sg_iovec32_t; | ||
3050 | |||
3051 | static int sg_build_iovec(sg_io_hdr_t __user *sgio, void __user *dxferp, u16 iovec_count) | ||
3052 | { | ||
3053 | sg_iovec_t __user *iov = (sg_iovec_t __user *) (sgio + 1); | ||
3054 | sg_iovec32_t __user *iov32 = dxferp; | ||
3055 | int i; | ||
3056 | |||
3057 | for (i = 0; i < iovec_count; i++) { | ||
3058 | u32 base, len; | ||
3059 | |||
3060 | if (get_user(base, &iov32[i].iov_base) || | ||
3061 | get_user(len, &iov32[i].iov_len) || | ||
3062 | put_user(compat_ptr(base), &iov[i].iov_base) || | ||
3063 | put_user(len, &iov[i].iov_len)) | ||
3064 | return -EFAULT; | ||
3065 | } | ||
3066 | |||
3067 | if (put_user(iov, &sgio->dxferp)) | ||
3068 | return -EFAULT; | ||
3069 | return 0; | ||
3070 | } | ||
3071 | |||
3072 | int nvme_sg_io32(struct nvme_ns *ns, unsigned long arg) | ||
3073 | { | ||
3074 | sg_io_hdr32_t __user *sgio32 = (sg_io_hdr32_t __user *)arg; | ||
3075 | sg_io_hdr_t __user *sgio; | ||
3076 | u16 iovec_count; | ||
3077 | u32 data; | ||
3078 | void __user *dxferp; | ||
3079 | int err; | ||
3080 | int interface_id; | ||
3081 | |||
3082 | if (get_user(interface_id, &sgio32->interface_id)) | ||
3083 | return -EFAULT; | ||
3084 | if (interface_id != 'S') | ||
3085 | return -EINVAL; | ||
3086 | |||
3087 | if (get_user(iovec_count, &sgio32->iovec_count)) | ||
3088 | return -EFAULT; | ||
3089 | |||
3090 | { | ||
3091 | void __user *top = compat_alloc_user_space(0); | ||
3092 | void __user *new = compat_alloc_user_space(sizeof(sg_io_hdr_t) + | ||
3093 | (iovec_count * sizeof(sg_iovec_t))); | ||
3094 | if (new > top) | ||
3095 | return -EINVAL; | ||
3096 | |||
3097 | sgio = new; | ||
3098 | } | ||
3099 | |||
3100 | /* Ok, now construct. */ | ||
3101 | if (copy_in_user(&sgio->interface_id, &sgio32->interface_id, | ||
3102 | (2 * sizeof(int)) + | ||
3103 | (2 * sizeof(unsigned char)) + | ||
3104 | (1 * sizeof(unsigned short)) + | ||
3105 | (1 * sizeof(unsigned int)))) | ||
3106 | return -EFAULT; | ||
3107 | |||
3108 | if (get_user(data, &sgio32->dxferp)) | ||
3109 | return -EFAULT; | ||
3110 | dxferp = compat_ptr(data); | ||
3111 | if (iovec_count) { | ||
3112 | if (sg_build_iovec(sgio, dxferp, iovec_count)) | ||
3113 | return -EFAULT; | ||
3114 | } else { | ||
3115 | if (put_user(dxferp, &sgio->dxferp)) | ||
3116 | return -EFAULT; | ||
3117 | } | ||
3118 | |||
3119 | { | ||
3120 | unsigned char __user *cmdp; | ||
3121 | unsigned char __user *sbp; | ||
3122 | |||
3123 | if (get_user(data, &sgio32->cmdp)) | ||
3124 | return -EFAULT; | ||
3125 | cmdp = compat_ptr(data); | ||
3126 | |||
3127 | if (get_user(data, &sgio32->sbp)) | ||
3128 | return -EFAULT; | ||
3129 | sbp = compat_ptr(data); | ||
3130 | |||
3131 | if (put_user(cmdp, &sgio->cmdp) || | ||
3132 | put_user(sbp, &sgio->sbp)) | ||
3133 | return -EFAULT; | ||
3134 | } | ||
3135 | |||
3136 | if (copy_in_user(&sgio->timeout, &sgio32->timeout, | ||
3137 | 3 * sizeof(int))) | ||
3138 | return -EFAULT; | ||
3139 | |||
3140 | if (get_user(data, &sgio32->usr_ptr)) | ||
3141 | return -EFAULT; | ||
3142 | if (put_user(compat_ptr(data), &sgio->usr_ptr)) | ||
3143 | return -EFAULT; | ||
3144 | |||
3145 | err = nvme_sg_io(ns, sgio); | ||
3146 | if (err >= 0) { | ||
3147 | void __user *datap; | ||
3148 | |||
3149 | if (copy_in_user(&sgio32->pack_id, &sgio->pack_id, | ||
3150 | sizeof(int)) || | ||
3151 | get_user(datap, &sgio->usr_ptr) || | ||
3152 | put_user((u32)(unsigned long)datap, | ||
3153 | &sgio32->usr_ptr) || | ||
3154 | copy_in_user(&sgio32->status, &sgio->status, | ||
3155 | (4 * sizeof(unsigned char)) + | ||
3156 | (2 * sizeof(unsigned short)) + | ||
3157 | (3 * sizeof(int)))) | ||
3158 | err = -EFAULT; | ||
3159 | } | ||
3160 | |||
3161 | return err; | ||
3162 | } | ||
3163 | #endif | ||
3164 | |||
3165 | int nvme_sg_get_version_num(int __user *ip) | 3027 | int nvme_sg_get_version_num(int __user *ip) |
3166 | { | 3028 | { |
3167 | return put_user(sg_version_num, ip); | 3029 | return put_user(sg_version_num, ip); |
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c index 40ee7705df63..ac8c62cb4875 100644 --- a/drivers/block/rsxx/dev.c +++ b/drivers/block/rsxx/dev.c | |||
@@ -112,37 +112,16 @@ static const struct block_device_operations rsxx_fops = { | |||
112 | 112 | ||
113 | static void disk_stats_start(struct rsxx_cardinfo *card, struct bio *bio) | 113 | static void disk_stats_start(struct rsxx_cardinfo *card, struct bio *bio) |
114 | { | 114 | { |
115 | struct hd_struct *part0 = &card->gendisk->part0; | 115 | generic_start_io_acct(bio_data_dir(bio), bio_sectors(bio), |
116 | int rw = bio_data_dir(bio); | 116 | &card->gendisk->part0); |
117 | int cpu; | ||
118 | |||
119 | cpu = part_stat_lock(); | ||
120 | |||
121 | part_round_stats(cpu, part0); | ||
122 | part_inc_in_flight(part0, rw); | ||
123 | |||
124 | part_stat_unlock(); | ||
125 | } | 117 | } |
126 | 118 | ||
127 | static void disk_stats_complete(struct rsxx_cardinfo *card, | 119 | static void disk_stats_complete(struct rsxx_cardinfo *card, |
128 | struct bio *bio, | 120 | struct bio *bio, |
129 | unsigned long start_time) | 121 | unsigned long start_time) |
130 | { | 122 | { |
131 | struct hd_struct *part0 = &card->gendisk->part0; | 123 | generic_end_io_acct(bio_data_dir(bio), &card->gendisk->part0, |
132 | unsigned long duration = jiffies - start_time; | 124 | start_time); |
133 | int rw = bio_data_dir(bio); | ||
134 | int cpu; | ||
135 | |||
136 | cpu = part_stat_lock(); | ||
137 | |||
138 | part_stat_add(cpu, part0, sectors[rw], bio_sectors(bio)); | ||
139 | part_stat_inc(cpu, part0, ios[rw]); | ||
140 | part_stat_add(cpu, part0, ticks[rw], duration); | ||
141 | |||
142 | part_round_stats(cpu, part0); | ||
143 | part_dec_in_flight(part0, rw); | ||
144 | |||
145 | part_stat_unlock(); | ||
146 | } | 125 | } |
147 | 126 | ||
148 | static void bio_dma_done_cb(struct rsxx_cardinfo *card, | 127 | static void bio_dma_done_cb(struct rsxx_cardinfo *card, |
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 5ac312f6e0be..2236c6f31608 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c | |||
@@ -126,7 +126,6 @@ struct blkfront_info | |||
126 | unsigned int persistent_gnts_c; | 126 | unsigned int persistent_gnts_c; |
127 | unsigned long shadow_free; | 127 | unsigned long shadow_free; |
128 | unsigned int feature_flush; | 128 | unsigned int feature_flush; |
129 | unsigned int flush_op; | ||
130 | unsigned int feature_discard:1; | 129 | unsigned int feature_discard:1; |
131 | unsigned int feature_secdiscard:1; | 130 | unsigned int feature_secdiscard:1; |
132 | unsigned int discard_granularity; | 131 | unsigned int discard_granularity; |
@@ -479,7 +478,19 @@ static int blkif_queue_request(struct request *req) | |||
479 | * way. (It's also a FLUSH+FUA, since it is | 478 | * way. (It's also a FLUSH+FUA, since it is |
480 | * guaranteed ordered WRT previous writes.) | 479 | * guaranteed ordered WRT previous writes.) |
481 | */ | 480 | */ |
482 | ring_req->operation = info->flush_op; | 481 | switch (info->feature_flush & |
482 | ((REQ_FLUSH|REQ_FUA))) { | ||
483 | case REQ_FLUSH|REQ_FUA: | ||
484 | ring_req->operation = | ||
485 | BLKIF_OP_WRITE_BARRIER; | ||
486 | break; | ||
487 | case REQ_FLUSH: | ||
488 | ring_req->operation = | ||
489 | BLKIF_OP_FLUSH_DISKCACHE; | ||
490 | break; | ||
491 | default: | ||
492 | ring_req->operation = 0; | ||
493 | } | ||
483 | } | 494 | } |
484 | ring_req->u.rw.nr_segments = nseg; | 495 | ring_req->u.rw.nr_segments = nseg; |
485 | } | 496 | } |
@@ -582,12 +593,14 @@ static inline void flush_requests(struct blkfront_info *info) | |||
582 | notify_remote_via_irq(info->irq); | 593 | notify_remote_via_irq(info->irq); |
583 | } | 594 | } |
584 | 595 | ||
585 | static inline bool blkif_request_flush_valid(struct request *req, | 596 | static inline bool blkif_request_flush_invalid(struct request *req, |
586 | struct blkfront_info *info) | 597 | struct blkfront_info *info) |
587 | { | 598 | { |
588 | return ((req->cmd_type != REQ_TYPE_FS) || | 599 | return ((req->cmd_type != REQ_TYPE_FS) || |
589 | ((req->cmd_flags & (REQ_FLUSH | REQ_FUA)) && | 600 | ((req->cmd_flags & REQ_FLUSH) && |
590 | !info->flush_op)); | 601 | !(info->feature_flush & REQ_FLUSH)) || |
602 | ((req->cmd_flags & REQ_FUA) && | ||
603 | !(info->feature_flush & REQ_FUA))); | ||
591 | } | 604 | } |
592 | 605 | ||
593 | /* | 606 | /* |
@@ -612,8 +625,8 @@ static void do_blkif_request(struct request_queue *rq) | |||
612 | 625 | ||
613 | blk_start_request(req); | 626 | blk_start_request(req); |
614 | 627 | ||
615 | if (blkif_request_flush_valid(req, info)) { | 628 | if (blkif_request_flush_invalid(req, info)) { |
616 | __blk_end_request_all(req, -EIO); | 629 | __blk_end_request_all(req, -EOPNOTSUPP); |
617 | continue; | 630 | continue; |
618 | } | 631 | } |
619 | 632 | ||
@@ -683,20 +696,26 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, | |||
683 | return 0; | 696 | return 0; |
684 | } | 697 | } |
685 | 698 | ||
699 | static const char *flush_info(unsigned int feature_flush) | ||
700 | { | ||
701 | switch (feature_flush & ((REQ_FLUSH | REQ_FUA))) { | ||
702 | case REQ_FLUSH|REQ_FUA: | ||
703 | return "barrier: enabled;"; | ||
704 | case REQ_FLUSH: | ||
705 | return "flush diskcache: enabled;"; | ||
706 | default: | ||
707 | return "barrier or flush: disabled;"; | ||
708 | } | ||
709 | } | ||
686 | 710 | ||
687 | static void xlvbd_flush(struct blkfront_info *info) | 711 | static void xlvbd_flush(struct blkfront_info *info) |
688 | { | 712 | { |
689 | blk_queue_flush(info->rq, info->feature_flush); | 713 | blk_queue_flush(info->rq, info->feature_flush); |
690 | printk(KERN_INFO "blkfront: %s: %s: %s %s %s %s %s\n", | 714 | pr_info("blkfront: %s: %s %s %s %s %s\n", |
691 | info->gd->disk_name, | 715 | info->gd->disk_name, flush_info(info->feature_flush), |
692 | info->flush_op == BLKIF_OP_WRITE_BARRIER ? | 716 | "persistent grants:", info->feature_persistent ? |
693 | "barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ? | 717 | "enabled;" : "disabled;", "indirect descriptors:", |
694 | "flush diskcache" : "barrier or flush"), | 718 | info->max_indirect_segments ? "enabled;" : "disabled;"); |
695 | info->feature_flush ? "enabled;" : "disabled;", | ||
696 | "persistent grants:", | ||
697 | info->feature_persistent ? "enabled;" : "disabled;", | ||
698 | "indirect descriptors:", | ||
699 | info->max_indirect_segments ? "enabled;" : "disabled;"); | ||
700 | } | 719 | } |
701 | 720 | ||
702 | static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset) | 721 | static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset) |
@@ -1188,7 +1207,6 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) | |||
1188 | if (error == -EOPNOTSUPP) | 1207 | if (error == -EOPNOTSUPP) |
1189 | error = 0; | 1208 | error = 0; |
1190 | info->feature_flush = 0; | 1209 | info->feature_flush = 0; |
1191 | info->flush_op = 0; | ||
1192 | xlvbd_flush(info); | 1210 | xlvbd_flush(info); |
1193 | } | 1211 | } |
1194 | /* fall through */ | 1212 | /* fall through */ |
@@ -1808,7 +1826,6 @@ static void blkfront_connect(struct blkfront_info *info) | |||
1808 | physical_sector_size = sector_size; | 1826 | physical_sector_size = sector_size; |
1809 | 1827 | ||
1810 | info->feature_flush = 0; | 1828 | info->feature_flush = 0; |
1811 | info->flush_op = 0; | ||
1812 | 1829 | ||
1813 | err = xenbus_gather(XBT_NIL, info->xbdev->otherend, | 1830 | err = xenbus_gather(XBT_NIL, info->xbdev->otherend, |
1814 | "feature-barrier", "%d", &barrier, | 1831 | "feature-barrier", "%d", &barrier, |
@@ -1821,10 +1838,8 @@ static void blkfront_connect(struct blkfront_info *info) | |||
1821 | * | 1838 | * |
1822 | * If there are barriers, then we use flush. | 1839 | * If there are barriers, then we use flush. |
1823 | */ | 1840 | */ |
1824 | if (!err && barrier) { | 1841 | if (!err && barrier) |
1825 | info->feature_flush = REQ_FLUSH | REQ_FUA; | 1842 | info->feature_flush = REQ_FLUSH | REQ_FUA; |
1826 | info->flush_op = BLKIF_OP_WRITE_BARRIER; | ||
1827 | } | ||
1828 | /* | 1843 | /* |
1829 | * And if there is "feature-flush-cache" use that above | 1844 | * And if there is "feature-flush-cache" use that above |
1830 | * barriers. | 1845 | * barriers. |
@@ -1833,10 +1848,8 @@ static void blkfront_connect(struct blkfront_info *info) | |||
1833 | "feature-flush-cache", "%d", &flush, | 1848 | "feature-flush-cache", "%d", &flush, |
1834 | NULL); | 1849 | NULL); |
1835 | 1850 | ||
1836 | if (!err && flush) { | 1851 | if (!err && flush) |
1837 | info->feature_flush = REQ_FLUSH; | 1852 | info->feature_flush = REQ_FLUSH; |
1838 | info->flush_op = BLKIF_OP_FLUSH_DISKCACHE; | ||
1839 | } | ||
1840 | 1853 | ||
1841 | err = xenbus_gather(XBT_NIL, info->xbdev->otherend, | 1854 | err = xenbus_gather(XBT_NIL, info->xbdev->otherend, |
1842 | "feature-discard", "%d", &discard, | 1855 | "feature-discard", "%d", &discard, |
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 62e6e98186b5..ab43faddb447 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c | |||
@@ -601,13 +601,8 @@ static void request_endio(struct bio *bio, int error) | |||
601 | static void bio_complete(struct search *s) | 601 | static void bio_complete(struct search *s) |
602 | { | 602 | { |
603 | if (s->orig_bio) { | 603 | if (s->orig_bio) { |
604 | int cpu, rw = bio_data_dir(s->orig_bio); | 604 | generic_end_io_acct(bio_data_dir(s->orig_bio), |
605 | unsigned long duration = jiffies - s->start_time; | 605 | &s->d->disk->part0, s->start_time); |
606 | |||
607 | cpu = part_stat_lock(); | ||
608 | part_round_stats(cpu, &s->d->disk->part0); | ||
609 | part_stat_add(cpu, &s->d->disk->part0, ticks[rw], duration); | ||
610 | part_stat_unlock(); | ||
611 | 606 | ||
612 | trace_bcache_request_end(s->d, s->orig_bio); | 607 | trace_bcache_request_end(s->d, s->orig_bio); |
613 | bio_endio(s->orig_bio, s->iop.error); | 608 | bio_endio(s->orig_bio, s->iop.error); |
@@ -959,12 +954,9 @@ static void cached_dev_make_request(struct request_queue *q, struct bio *bio) | |||
959 | struct search *s; | 954 | struct search *s; |
960 | struct bcache_device *d = bio->bi_bdev->bd_disk->private_data; | 955 | struct bcache_device *d = bio->bi_bdev->bd_disk->private_data; |
961 | struct cached_dev *dc = container_of(d, struct cached_dev, disk); | 956 | struct cached_dev *dc = container_of(d, struct cached_dev, disk); |
962 | int cpu, rw = bio_data_dir(bio); | 957 | int rw = bio_data_dir(bio); |
963 | 958 | ||
964 | cpu = part_stat_lock(); | 959 | generic_start_io_acct(rw, bio_sectors(bio), &d->disk->part0); |
965 | part_stat_inc(cpu, &d->disk->part0, ios[rw]); | ||
966 | part_stat_add(cpu, &d->disk->part0, sectors[rw], bio_sectors(bio)); | ||
967 | part_stat_unlock(); | ||
968 | 960 | ||
969 | bio->bi_bdev = dc->bdev; | 961 | bio->bi_bdev = dc->bdev; |
970 | bio->bi_iter.bi_sector += dc->sb.data_offset; | 962 | bio->bi_iter.bi_sector += dc->sb.data_offset; |
@@ -1074,12 +1066,9 @@ static void flash_dev_make_request(struct request_queue *q, struct bio *bio) | |||
1074 | struct search *s; | 1066 | struct search *s; |
1075 | struct closure *cl; | 1067 | struct closure *cl; |
1076 | struct bcache_device *d = bio->bi_bdev->bd_disk->private_data; | 1068 | struct bcache_device *d = bio->bi_bdev->bd_disk->private_data; |
1077 | int cpu, rw = bio_data_dir(bio); | 1069 | int rw = bio_data_dir(bio); |
1078 | 1070 | ||
1079 | cpu = part_stat_lock(); | 1071 | generic_start_io_acct(rw, bio_sectors(bio), &d->disk->part0); |
1080 | part_stat_inc(cpu, &d->disk->part0, ios[rw]); | ||
1081 | part_stat_add(cpu, &d->disk->part0, sectors[rw], bio_sectors(bio)); | ||
1082 | part_stat_unlock(); | ||
1083 | 1072 | ||
1084 | s = search_alloc(bio, d); | 1073 | s = search_alloc(bio, d); |
1085 | cl = &s->cl; | 1074 | cl = &s->cl; |
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 8f37ed215b19..4c06585bf165 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -605,13 +605,10 @@ static void end_io_acct(struct dm_io *io) | |||
605 | struct mapped_device *md = io->md; | 605 | struct mapped_device *md = io->md; |
606 | struct bio *bio = io->bio; | 606 | struct bio *bio = io->bio; |
607 | unsigned long duration = jiffies - io->start_time; | 607 | unsigned long duration = jiffies - io->start_time; |
608 | int pending, cpu; | 608 | int pending; |
609 | int rw = bio_data_dir(bio); | 609 | int rw = bio_data_dir(bio); |
610 | 610 | ||
611 | cpu = part_stat_lock(); | 611 | generic_end_io_acct(rw, &dm_disk(md)->part0, io->start_time); |
612 | part_round_stats(cpu, &dm_disk(md)->part0); | ||
613 | part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration); | ||
614 | part_stat_unlock(); | ||
615 | 612 | ||
616 | if (unlikely(dm_stats_used(&md->stats))) | 613 | if (unlikely(dm_stats_used(&md->stats))) |
617 | dm_stats_account_io(&md->stats, bio->bi_rw, bio->bi_iter.bi_sector, | 614 | dm_stats_account_io(&md->stats, bio->bi_rw, bio->bi_iter.bi_sector, |
@@ -1651,16 +1648,12 @@ static void _dm_request(struct request_queue *q, struct bio *bio) | |||
1651 | { | 1648 | { |
1652 | int rw = bio_data_dir(bio); | 1649 | int rw = bio_data_dir(bio); |
1653 | struct mapped_device *md = q->queuedata; | 1650 | struct mapped_device *md = q->queuedata; |
1654 | int cpu; | ||
1655 | int srcu_idx; | 1651 | int srcu_idx; |
1656 | struct dm_table *map; | 1652 | struct dm_table *map; |
1657 | 1653 | ||
1658 | map = dm_get_live_table(md, &srcu_idx); | 1654 | map = dm_get_live_table(md, &srcu_idx); |
1659 | 1655 | ||
1660 | cpu = part_stat_lock(); | 1656 | generic_start_io_acct(rw, bio_sectors(bio), &dm_disk(md)->part0); |
1661 | part_stat_inc(cpu, &dm_disk(md)->part0, ios[rw]); | ||
1662 | part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio)); | ||
1663 | part_stat_unlock(); | ||
1664 | 1657 | ||
1665 | /* if we're suspended, we have to queue this io for later */ | 1658 | /* if we're suspended, we have to queue this io for later */ |
1666 | if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) { | 1659 | if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) { |
diff --git a/drivers/md/md.c b/drivers/md/md.c index 9233c71138f1..056ccd28c037 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -247,7 +247,6 @@ static void md_make_request(struct request_queue *q, struct bio *bio) | |||
247 | { | 247 | { |
248 | const int rw = bio_data_dir(bio); | 248 | const int rw = bio_data_dir(bio); |
249 | struct mddev *mddev = q->queuedata; | 249 | struct mddev *mddev = q->queuedata; |
250 | int cpu; | ||
251 | unsigned int sectors; | 250 | unsigned int sectors; |
252 | 251 | ||
253 | if (mddev == NULL || mddev->pers == NULL | 252 | if (mddev == NULL || mddev->pers == NULL |
@@ -284,10 +283,7 @@ static void md_make_request(struct request_queue *q, struct bio *bio) | |||
284 | sectors = bio_sectors(bio); | 283 | sectors = bio_sectors(bio); |
285 | mddev->pers->make_request(mddev, bio); | 284 | mddev->pers->make_request(mddev, bio); |
286 | 285 | ||
287 | cpu = part_stat_lock(); | 286 | generic_start_io_acct(rw, sectors, &mddev->gendisk->part0); |
288 | part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); | ||
289 | part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors); | ||
290 | part_stat_unlock(); | ||
291 | 287 | ||
292 | if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended) | 288 | if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended) |
293 | wake_up(&mddev->sb_wait); | 289 | wake_up(&mddev->sb_wait); |