diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-11-23 14:20:14 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-11-23 14:20:14 -0500 |
| commit | 3381918fec9278d14f776d1dabd68da85fd6822e (patch) | |
| tree | 9ef5806b5ab9f7b941d5069fa9c7acd6d4f901c3 /drivers | |
| parent | d88783b9c8849d88c3a75b7b9071cba072b47eba (diff) | |
| parent | 14b04063cc994effc86f976625bf8f806d8d44cb (diff) | |
Merge tag 'for-linus-20181123' of git://git.kernel.dk/linux-block
Pull block fix from Jens Axboe:
"Just a single fix for this week, fixing an issue with nvme-fc"
* tag 'for-linus-20181123' of git://git.kernel.dk/linux-block:
nvme-fc: resolve io failures during connect
Diffstat (limited to 'drivers')
| -rw-r--r-- | drivers/nvme/host/fc.c | 73 |
1 files changed, 63 insertions, 10 deletions
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 0b70c8bab045..54032c466636 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c | |||
| @@ -152,6 +152,7 @@ struct nvme_fc_ctrl { | |||
| 152 | 152 | ||
| 153 | bool ioq_live; | 153 | bool ioq_live; |
| 154 | bool assoc_active; | 154 | bool assoc_active; |
| 155 | atomic_t err_work_active; | ||
| 155 | u64 association_id; | 156 | u64 association_id; |
| 156 | 157 | ||
| 157 | struct list_head ctrl_list; /* rport->ctrl_list */ | 158 | struct list_head ctrl_list; /* rport->ctrl_list */ |
| @@ -160,6 +161,7 @@ struct nvme_fc_ctrl { | |||
| 160 | struct blk_mq_tag_set tag_set; | 161 | struct blk_mq_tag_set tag_set; |
| 161 | 162 | ||
| 162 | struct delayed_work connect_work; | 163 | struct delayed_work connect_work; |
| 164 | struct work_struct err_work; | ||
| 163 | 165 | ||
| 164 | struct kref ref; | 166 | struct kref ref; |
| 165 | u32 flags; | 167 | u32 flags; |
| @@ -1531,6 +1533,10 @@ nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl) | |||
| 1531 | struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops; | 1533 | struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops; |
| 1532 | int i; | 1534 | int i; |
| 1533 | 1535 | ||
| 1536 | /* ensure we've initialized the ops once */ | ||
| 1537 | if (!(aen_op->flags & FCOP_FLAGS_AEN)) | ||
| 1538 | return; | ||
| 1539 | |||
| 1534 | for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) | 1540 | for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) |
| 1535 | __nvme_fc_abort_op(ctrl, aen_op); | 1541 | __nvme_fc_abort_op(ctrl, aen_op); |
| 1536 | } | 1542 | } |
| @@ -2049,7 +2055,25 @@ nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl) | |||
| 2049 | static void | 2055 | static void |
| 2050 | nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) | 2056 | nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) |
| 2051 | { | 2057 | { |
| 2052 | /* only proceed if in LIVE state - e.g. on first error */ | 2058 | int active; |
| 2059 | |||
| 2060 | /* | ||
| 2061 | * if an error (io timeout, etc) while (re)connecting, | ||
| 2062 | * it's an error on creating the new association. | ||
| 2063 | * Start the error recovery thread if it hasn't already | ||
| 2064 | * been started. It is expected there could be multiple | ||
| 2065 | * ios hitting this path before things are cleaned up. | ||
| 2066 | */ | ||
| 2067 | if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) { | ||
| 2068 | active = atomic_xchg(&ctrl->err_work_active, 1); | ||
| 2069 | if (!active && !schedule_work(&ctrl->err_work)) { | ||
| 2070 | atomic_set(&ctrl->err_work_active, 0); | ||
| 2071 | WARN_ON(1); | ||
| 2072 | } | ||
| 2073 | return; | ||
| 2074 | } | ||
| 2075 | |||
| 2076 | /* Otherwise, only proceed if in LIVE state - e.g. on first error */ | ||
| 2053 | if (ctrl->ctrl.state != NVME_CTRL_LIVE) | 2077 | if (ctrl->ctrl.state != NVME_CTRL_LIVE) |
| 2054 | return; | 2078 | return; |
| 2055 | 2079 | ||
| @@ -2814,6 +2838,7 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl) | |||
| 2814 | { | 2838 | { |
| 2815 | struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); | 2839 | struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); |
| 2816 | 2840 | ||
| 2841 | cancel_work_sync(&ctrl->err_work); | ||
| 2817 | cancel_delayed_work_sync(&ctrl->connect_work); | 2842 | cancel_delayed_work_sync(&ctrl->connect_work); |
| 2818 | /* | 2843 | /* |
| 2819 | * kill the association on the link side. this will block | 2844 | * kill the association on the link side. this will block |
| @@ -2866,23 +2891,30 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) | |||
| 2866 | } | 2891 | } |
| 2867 | 2892 | ||
| 2868 | static void | 2893 | static void |
| 2869 | nvme_fc_reset_ctrl_work(struct work_struct *work) | 2894 | __nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl) |
| 2870 | { | 2895 | { |
| 2871 | struct nvme_fc_ctrl *ctrl = | 2896 | nvme_stop_keep_alive(&ctrl->ctrl); |
| 2872 | container_of(work, struct nvme_fc_ctrl, ctrl.reset_work); | ||
| 2873 | int ret; | ||
| 2874 | |||
| 2875 | nvme_stop_ctrl(&ctrl->ctrl); | ||
| 2876 | 2897 | ||
| 2877 | /* will block will waiting for io to terminate */ | 2898 | /* will block will waiting for io to terminate */ |
| 2878 | nvme_fc_delete_association(ctrl); | 2899 | nvme_fc_delete_association(ctrl); |
| 2879 | 2900 | ||
| 2880 | if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { | 2901 | if (ctrl->ctrl.state != NVME_CTRL_CONNECTING && |
| 2902 | !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) | ||
| 2881 | dev_err(ctrl->ctrl.device, | 2903 | dev_err(ctrl->ctrl.device, |
| 2882 | "NVME-FC{%d}: error_recovery: Couldn't change state " | 2904 | "NVME-FC{%d}: error_recovery: Couldn't change state " |
| 2883 | "to CONNECTING\n", ctrl->cnum); | 2905 | "to CONNECTING\n", ctrl->cnum); |
| 2884 | return; | 2906 | } |
| 2885 | } | 2907 | |
| 2908 | static void | ||
| 2909 | nvme_fc_reset_ctrl_work(struct work_struct *work) | ||
| 2910 | { | ||
| 2911 | struct nvme_fc_ctrl *ctrl = | ||
| 2912 | container_of(work, struct nvme_fc_ctrl, ctrl.reset_work); | ||
| 2913 | int ret; | ||
| 2914 | |||
| 2915 | __nvme_fc_terminate_io(ctrl); | ||
| 2916 | |||
| 2917 | nvme_stop_ctrl(&ctrl->ctrl); | ||
| 2886 | 2918 | ||
| 2887 | if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE) | 2919 | if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE) |
| 2888 | ret = nvme_fc_create_association(ctrl); | 2920 | ret = nvme_fc_create_association(ctrl); |
| @@ -2897,6 +2929,24 @@ nvme_fc_reset_ctrl_work(struct work_struct *work) | |||
| 2897 | ctrl->cnum); | 2929 | ctrl->cnum); |
| 2898 | } | 2930 | } |
| 2899 | 2931 | ||
| 2932 | static void | ||
| 2933 | nvme_fc_connect_err_work(struct work_struct *work) | ||
| 2934 | { | ||
| 2935 | struct nvme_fc_ctrl *ctrl = | ||
| 2936 | container_of(work, struct nvme_fc_ctrl, err_work); | ||
| 2937 | |||
| 2938 | __nvme_fc_terminate_io(ctrl); | ||
| 2939 | |||
| 2940 | atomic_set(&ctrl->err_work_active, 0); | ||
| 2941 | |||
| 2942 | /* | ||
| 2943 | * Rescheduling the connection after recovering | ||
| 2944 | * from the io error is left to the reconnect work | ||
| 2945 | * item, which is what should have stalled waiting on | ||
| 2946 | * the io that had the error that scheduled this work. | ||
| 2947 | */ | ||
| 2948 | } | ||
| 2949 | |||
| 2900 | static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { | 2950 | static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { |
| 2901 | .name = "fc", | 2951 | .name = "fc", |
| 2902 | .module = THIS_MODULE, | 2952 | .module = THIS_MODULE, |
| @@ -3007,6 +3057,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, | |||
| 3007 | ctrl->cnum = idx; | 3057 | ctrl->cnum = idx; |
| 3008 | ctrl->ioq_live = false; | 3058 | ctrl->ioq_live = false; |
| 3009 | ctrl->assoc_active = false; | 3059 | ctrl->assoc_active = false; |
| 3060 | atomic_set(&ctrl->err_work_active, 0); | ||
| 3010 | init_waitqueue_head(&ctrl->ioabort_wait); | 3061 | init_waitqueue_head(&ctrl->ioabort_wait); |
| 3011 | 3062 | ||
| 3012 | get_device(ctrl->dev); | 3063 | get_device(ctrl->dev); |
| @@ -3014,6 +3065,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, | |||
| 3014 | 3065 | ||
| 3015 | INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work); | 3066 | INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work); |
| 3016 | INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work); | 3067 | INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work); |
| 3068 | INIT_WORK(&ctrl->err_work, nvme_fc_connect_err_work); | ||
| 3017 | spin_lock_init(&ctrl->lock); | 3069 | spin_lock_init(&ctrl->lock); |
| 3018 | 3070 | ||
| 3019 | /* io queue count */ | 3071 | /* io queue count */ |
| @@ -3103,6 +3155,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, | |||
| 3103 | fail_ctrl: | 3155 | fail_ctrl: |
| 3104 | nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING); | 3156 | nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING); |
| 3105 | cancel_work_sync(&ctrl->ctrl.reset_work); | 3157 | cancel_work_sync(&ctrl->ctrl.reset_work); |
| 3158 | cancel_work_sync(&ctrl->err_work); | ||
| 3106 | cancel_delayed_work_sync(&ctrl->connect_work); | 3159 | cancel_delayed_work_sync(&ctrl->connect_work); |
| 3107 | 3160 | ||
| 3108 | ctrl->ctrl.opts = NULL; | 3161 | ctrl->ctrl.opts = NULL; |
