aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2018-04-05 19:18:55 -0400
committerDan Williams <dan.j.williams@intel.com>2018-04-07 10:55:05 -0400
commitbc6ba8085842164f2a8dc2e78e23a7167872abbe (patch)
treefcb0a7821f2ebdec33968cc6600b72cd1ee2d123
parent459d0ddb079c869c986e1bb871c91564a4b8ccfe (diff)
nfit, address-range-scrub: rework and simplify ARS state machine
ARS is an operation that can take 10s to 100s of seconds to find media errors that should rarely be present. If the platform crashes due to media errors in persistent memory, the expectation is that the BIOS will report those known errors in a 'short' ARS request. A 'short' ARS request asks platform firmware to return an ARS payload with all known errors, but without issuing a 'long' scrub. At driver init a short request is issued to all PMEM ranges before registering regions. Then, in the background, a long ARS is scheduled for each region. The ARS implementation is simplified to centralize ARS completion work in the ars_complete() helper. The timeout is removed since there is no facility to cancel ARS, and this otherwise arranges for system init to never be blocked waiting for a 'long' ARS. The ars_state flags are used to coordinate ARS requests from driver init, ARS requests from userspace, and ARS requests in response to media error notifications. Given that there is no notification of ARS completion the implementation still needs to poll. It backs off exponentially to a maximum poll period of 30 minutes. Suggested-by: Toshi Kani <toshi.kani@hpe.com> Co-developed-by: Dave Jiang <dave.jiang@intel.com> Signed-off-by: Dave Jiang <dave.jiang@intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
-rw-r--r--drivers/acpi/nfit/core.c482
-rw-r--r--drivers/acpi/nfit/nfit.h4
2 files changed, 218 insertions, 268 deletions
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 866853abebea..2532294bbd68 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -35,16 +35,6 @@ static bool force_enable_dimms;
35module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR); 35module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR);
36MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status"); 36MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status");
37 37
38static unsigned int scrub_timeout = NFIT_ARS_TIMEOUT;
39module_param(scrub_timeout, uint, S_IRUGO|S_IWUSR);
40MODULE_PARM_DESC(scrub_timeout, "Initial scrub timeout in seconds");
41
42/* after three payloads of overflow, it's dead jim */
43static unsigned int scrub_overflow_abort = 3;
44module_param(scrub_overflow_abort, uint, S_IRUGO|S_IWUSR);
45MODULE_PARM_DESC(scrub_overflow_abort,
46 "Number of times we overflow ARS results before abort");
47
48static bool disable_vendor_specific; 38static bool disable_vendor_specific;
49module_param(disable_vendor_specific, bool, S_IRUGO); 39module_param(disable_vendor_specific, bool, S_IRUGO);
50MODULE_PARM_DESC(disable_vendor_specific, 40MODULE_PARM_DESC(disable_vendor_specific,
@@ -1251,7 +1241,7 @@ static ssize_t scrub_show(struct device *dev,
1251 1241
1252 mutex_lock(&acpi_desc->init_mutex); 1242 mutex_lock(&acpi_desc->init_mutex);
1253 rc = sprintf(buf, "%d%s", acpi_desc->scrub_count, 1243 rc = sprintf(buf, "%d%s", acpi_desc->scrub_count,
1254 work_busy(&acpi_desc->work) 1244 work_busy(&acpi_desc->dwork.work)
1255 && !acpi_desc->cancel ? "+\n" : "\n"); 1245 && !acpi_desc->cancel ? "+\n" : "\n");
1256 mutex_unlock(&acpi_desc->init_mutex); 1246 mutex_unlock(&acpi_desc->init_mutex);
1257 } 1247 }
@@ -2452,7 +2442,8 @@ static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa
2452 memset(&ars_start, 0, sizeof(ars_start)); 2442 memset(&ars_start, 0, sizeof(ars_start));
2453 ars_start.address = spa->address; 2443 ars_start.address = spa->address;
2454 ars_start.length = spa->length; 2444 ars_start.length = spa->length;
2455 ars_start.flags = acpi_desc->ars_start_flags; 2445 if (test_bit(ARS_SHORT, &nfit_spa->ars_state))
2446 ars_start.flags = ND_ARS_RETURN_PREV_DATA;
2456 if (nfit_spa_type(spa) == NFIT_SPA_PM) 2447 if (nfit_spa_type(spa) == NFIT_SPA_PM)
2457 ars_start.type = ND_ARS_PERSISTENT; 2448 ars_start.type = ND_ARS_PERSISTENT;
2458 else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) 2449 else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE)
@@ -2500,6 +2491,52 @@ static int ars_get_status(struct acpi_nfit_desc *acpi_desc)
2500 return cmd_rc; 2491 return cmd_rc;
2501} 2492}
2502 2493
2494static void ars_complete(struct acpi_nfit_desc *acpi_desc,
2495 struct nfit_spa *nfit_spa)
2496{
2497 struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
2498 struct acpi_nfit_system_address *spa = nfit_spa->spa;
2499 struct nd_region *nd_region = nfit_spa->nd_region;
2500 struct device *dev;
2501
2502 if ((ars_status->address >= spa->address && ars_status->address
2503 < spa->address + spa->length)
2504 || (ars_status->address < spa->address)) {
2505 /*
2506 * Assume that if a scrub starts at an offset from the
2507 * start of nfit_spa that we are in the continuation
2508 * case.
2509 *
2510 * Otherwise, if the scrub covers the spa range, mark
2511 * any pending request complete.
2512 */
2513 if (ars_status->address + ars_status->length
2514 >= spa->address + spa->length)
2515 /* complete */;
2516 else
2517 return;
2518 } else
2519 return;
2520
2521 if (test_bit(ARS_DONE, &nfit_spa->ars_state))
2522 return;
2523
2524 if (!test_and_clear_bit(ARS_REQ, &nfit_spa->ars_state))
2525 return;
2526
2527 if (nd_region) {
2528 dev = nd_region_dev(nd_region);
2529 nvdimm_region_notify(nd_region, NVDIMM_REVALIDATE_POISON);
2530 } else
2531 dev = acpi_desc->dev;
2532
2533 dev_dbg(dev, "ARS: range %d %s complete\n", spa->range_index,
2534 test_bit(ARS_SHORT, &nfit_spa->ars_state)
2535 ? "short" : "long");
2536 clear_bit(ARS_SHORT, &nfit_spa->ars_state);
2537 set_bit(ARS_DONE, &nfit_spa->ars_state);
2538}
2539
2503static int ars_status_process_records(struct acpi_nfit_desc *acpi_desc) 2540static int ars_status_process_records(struct acpi_nfit_desc *acpi_desc)
2504{ 2541{
2505 struct nvdimm_bus *nvdimm_bus = acpi_desc->nvdimm_bus; 2542 struct nvdimm_bus *nvdimm_bus = acpi_desc->nvdimm_bus;
@@ -2764,6 +2801,7 @@ static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc)
2764 return -ENOMEM; 2801 return -ENOMEM;
2765 2802
2766 rc = ars_get_status(acpi_desc); 2803 rc = ars_get_status(acpi_desc);
2804
2767 if (rc < 0 && rc != -ENOSPC) 2805 if (rc < 0 && rc != -ENOSPC)
2768 return rc; 2806 return rc;
2769 2807
@@ -2773,223 +2811,125 @@ static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc)
2773 return 0; 2811 return 0;
2774} 2812}
2775 2813
2776static void acpi_nfit_async_scrub(struct acpi_nfit_desc *acpi_desc, 2814static int ars_register(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa,
2777 struct nfit_spa *nfit_spa) 2815 int *query_rc)
2778{ 2816{
2779 struct acpi_nfit_system_address *spa = nfit_spa->spa; 2817 int rc = *query_rc;
2780 unsigned int overflow_retry = scrub_overflow_abort;
2781 u64 init_ars_start = 0, init_ars_len = 0;
2782 struct device *dev = acpi_desc->dev;
2783 unsigned int tmo = scrub_timeout;
2784 int rc;
2785
2786 if (!test_bit(ARS_REQ, &nfit_spa->ars_state) || !nfit_spa->nd_region)
2787 return;
2788
2789 rc = ars_start(acpi_desc, nfit_spa);
2790 /*
2791 * If we timed out the initial scan we'll still be busy here,
2792 * and will wait another timeout before giving up permanently.
2793 */
2794 if (rc < 0 && rc != -EBUSY)
2795 return;
2796 2818
2797 do { 2819 set_bit(ARS_REQ, &nfit_spa->ars_state);
2798 u64 ars_start, ars_len; 2820 set_bit(ARS_SHORT, &nfit_spa->ars_state);
2799
2800 if (acpi_desc->cancel)
2801 break;
2802 rc = acpi_nfit_query_poison(acpi_desc);
2803 if (rc == -ENOTTY)
2804 break;
2805 if (rc == -EBUSY && !tmo) {
2806 dev_warn(dev, "range %d ars timeout, aborting\n",
2807 spa->range_index);
2808 break;
2809 }
2810 2821
2822 switch (rc) {
2823 case 0:
2824 case -EAGAIN:
2825 rc = ars_start(acpi_desc, nfit_spa);
2811 if (rc == -EBUSY) { 2826 if (rc == -EBUSY) {
2812 /* 2827 *query_rc = rc;
2813 * Note, entries may be appended to the list
2814 * while the lock is dropped, but the workqueue
2815 * being active prevents entries being deleted /
2816 * freed.
2817 */
2818 mutex_unlock(&acpi_desc->init_mutex);
2819 ssleep(1);
2820 tmo--;
2821 mutex_lock(&acpi_desc->init_mutex);
2822 continue;
2823 }
2824
2825 /* we got some results, but there are more pending... */
2826 if (rc == -ENOSPC && overflow_retry--) {
2827 if (!init_ars_len) {
2828 init_ars_len = acpi_desc->ars_status->length;
2829 init_ars_start = acpi_desc->ars_status->address;
2830 }
2831 rc = ars_continue(acpi_desc);
2832 }
2833
2834 if (rc < 0) {
2835 dev_warn(dev, "range %d ars continuation failed\n",
2836 spa->range_index);
2837 break; 2828 break;
2838 } 2829 } else if (rc == 0) {
2839 2830 rc = acpi_nfit_query_poison(acpi_desc);
2840 if (init_ars_len) {
2841 ars_start = init_ars_start;
2842 ars_len = init_ars_len;
2843 } else { 2831 } else {
2844 ars_start = acpi_desc->ars_status->address; 2832 set_bit(ARS_FAILED, &nfit_spa->ars_state);
2845 ars_len = acpi_desc->ars_status->length; 2833 break;
2846 } 2834 }
2847 dev_dbg(dev, "spa range: %d ars from %#llx + %#llx complete\n", 2835 if (rc == -EAGAIN)
2848 spa->range_index, ars_start, ars_len); 2836 clear_bit(ARS_SHORT, &nfit_spa->ars_state);
2849 /* notify the region about new poison entries */ 2837 else if (rc == 0)
2850 nvdimm_region_notify(nfit_spa->nd_region, 2838 ars_complete(acpi_desc, nfit_spa);
2851 NVDIMM_REVALIDATE_POISON); 2839 break;
2840 case -EBUSY:
2841 case -ENOSPC:
2852 break; 2842 break;
2853 } while (1); 2843 default:
2844 set_bit(ARS_FAILED, &nfit_spa->ars_state);
2845 break;
2846 }
2847
2848 if (test_and_clear_bit(ARS_DONE, &nfit_spa->ars_state))
2849 set_bit(ARS_REQ, &nfit_spa->ars_state);
2850
2851 return acpi_nfit_register_region(acpi_desc, nfit_spa);
2854} 2852}
2855 2853
2856static void acpi_nfit_scrub(struct work_struct *work) 2854static void ars_complete_all(struct acpi_nfit_desc *acpi_desc)
2857{ 2855{
2858 struct device *dev;
2859 u64 init_scrub_length = 0;
2860 struct nfit_spa *nfit_spa; 2856 struct nfit_spa *nfit_spa;
2861 u64 init_scrub_address = 0;
2862 bool init_ars_done = false;
2863 struct acpi_nfit_desc *acpi_desc;
2864 unsigned int tmo = scrub_timeout;
2865 unsigned int overflow_retry = scrub_overflow_abort;
2866
2867 acpi_desc = container_of(work, typeof(*acpi_desc), work);
2868 dev = acpi_desc->dev;
2869
2870 /*
2871 * We scrub in 2 phases. The first phase waits for any platform
2872 * firmware initiated scrubs to complete and then we go search for the
2873 * affected spa regions to mark them scanned. In the second phase we
2874 * initiate a directed scrub for every range that was not scrubbed in
2875 * phase 1. If we're called for a 'rescan', we harmlessly pass through
2876 * the first phase, but really only care about running phase 2, where
2877 * regions can be notified of new poison.
2878 */
2879 2857
2880 /* process platform firmware initiated scrubs */
2881 retry:
2882 mutex_lock(&acpi_desc->init_mutex);
2883 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { 2858 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
2884 struct nd_cmd_ars_status *ars_status; 2859 if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
2885 struct acpi_nfit_system_address *spa;
2886 u64 ars_start, ars_len;
2887 int rc;
2888
2889 if (acpi_desc->cancel)
2890 break;
2891
2892 if (nfit_spa->nd_region)
2893 continue;
2894
2895 if (init_ars_done) {
2896 /*
2897 * No need to re-query, we're now just
2898 * reconciling all the ranges covered by the
2899 * initial scrub
2900 */
2901 rc = 0;
2902 } else
2903 rc = acpi_nfit_query_poison(acpi_desc);
2904
2905 if (rc == -ENOTTY) {
2906 /* no ars capability, just register spa and move on */
2907 acpi_nfit_register_region(acpi_desc, nfit_spa);
2908 continue; 2860 continue;
2909 } 2861 ars_complete(acpi_desc, nfit_spa);
2910 2862 }
2911 if (rc == -EBUSY && !tmo) { 2863}
2912 /* fallthrough to directed scrub in phase 2 */
2913 dev_warn(dev, "timeout awaiting ars results, continuing...\n");
2914 break;
2915 } else if (rc == -EBUSY) {
2916 mutex_unlock(&acpi_desc->init_mutex);
2917 ssleep(1);
2918 tmo--;
2919 goto retry;
2920 }
2921 2864
2922 /* we got some results, but there are more pending... */ 2865static unsigned int __acpi_nfit_scrub(struct acpi_nfit_desc *acpi_desc,
2923 if (rc == -ENOSPC && overflow_retry--) { 2866 int query_rc)
2924 ars_status = acpi_desc->ars_status; 2867{
2925 /* 2868 unsigned int tmo = acpi_desc->scrub_tmo;
2926 * Record the original scrub range, so that we 2869 struct device *dev = acpi_desc->dev;
2927 * can recall all the ranges impacted by the 2870 struct nfit_spa *nfit_spa;
2928 * initial scrub.
2929 */
2930 if (!init_scrub_length) {
2931 init_scrub_length = ars_status->length;
2932 init_scrub_address = ars_status->address;
2933 }
2934 rc = ars_continue(acpi_desc);
2935 if (rc == 0) {
2936 mutex_unlock(&acpi_desc->init_mutex);
2937 goto retry;
2938 }
2939 }
2940 2871
2941 if (rc < 0) { 2872 if (acpi_desc->cancel)
2942 /* 2873 return 0;
2943 * Initial scrub failed, we'll give it one more
2944 * try below...
2945 */
2946 break;
2947 }
2948 2874
2949 /* We got some final results, record completed ranges */ 2875 if (query_rc == -EBUSY) {
2950 ars_status = acpi_desc->ars_status; 2876 dev_dbg(dev, "ARS: ARS busy\n");
2951 if (init_scrub_length) { 2877 return min(30U * 60U, tmo * 2);
2952 ars_start = init_scrub_address; 2878 }
2953 ars_len = ars_start + init_scrub_length; 2879 if (query_rc == -ENOSPC) {
2954 } else { 2880 dev_dbg(dev, "ARS: ARS continue\n");
2955 ars_start = ars_status->address; 2881 ars_continue(acpi_desc);
2956 ars_len = ars_status->length; 2882 return 1;
2957 } 2883 }
2958 spa = nfit_spa->spa; 2884 if (query_rc && query_rc != -EAGAIN) {
2885 unsigned long long addr, end;
2959 2886
2960 if (!init_ars_done) { 2887 addr = acpi_desc->ars_status->address;
2961 init_ars_done = true; 2888 end = addr + acpi_desc->ars_status->length;
2962 dev_dbg(dev, "init scrub %#llx + %#llx complete\n", 2889 dev_dbg(dev, "ARS: %llx-%llx failed (%d)\n", addr, end,
2963 ars_start, ars_len); 2890 query_rc);
2964 }
2965 if (ars_start <= spa->address && ars_start + ars_len
2966 >= spa->address + spa->length)
2967 acpi_nfit_register_region(acpi_desc, nfit_spa);
2968 } 2891 }
2969 2892
2970 /* 2893 ars_complete_all(acpi_desc);
2971 * For all the ranges not covered by an initial scrub we still
2972 * want to see if there are errors, but it's ok to discover them
2973 * asynchronously.
2974 */
2975 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { 2894 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
2976 /* 2895 if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
2977 * Flag all the ranges that still need scrubbing, but 2896 continue;
2978 * register them now to make data available. 2897 if (test_bit(ARS_REQ, &nfit_spa->ars_state)) {
2979 */ 2898 int rc = ars_start(acpi_desc, nfit_spa);
2980 if (!nfit_spa->nd_region) { 2899
2981 set_bit(ARS_REQ, &nfit_spa->ars_state); 2900 clear_bit(ARS_DONE, &nfit_spa->ars_state);
2982 acpi_nfit_register_region(acpi_desc, nfit_spa); 2901 dev = nd_region_dev(nfit_spa->nd_region);
2902 dev_dbg(dev, "ARS: range %d ARS start (%d)\n",
2903 nfit_spa->spa->range_index, rc);
2904 if (rc == 0 || rc == -EBUSY)
2905 return 1;
2906 dev_err(dev, "ARS: range %d ARS failed (%d)\n",
2907 nfit_spa->spa->range_index, rc);
2908 set_bit(ARS_FAILED, &nfit_spa->ars_state);
2983 } 2909 }
2984 } 2910 }
2985 acpi_desc->init_complete = 1; 2911 return 0;
2912}
2986 2913
2987 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) 2914static void acpi_nfit_scrub(struct work_struct *work)
2988 acpi_nfit_async_scrub(acpi_desc, nfit_spa); 2915{
2989 acpi_desc->scrub_count++; 2916 struct acpi_nfit_desc *acpi_desc;
2990 acpi_desc->ars_start_flags = 0; 2917 unsigned int tmo;
2991 if (acpi_desc->scrub_count_state) 2918 int query_rc;
2992 sysfs_notify_dirent(acpi_desc->scrub_count_state); 2919
2920 acpi_desc = container_of(work, typeof(*acpi_desc), dwork.work);
2921 mutex_lock(&acpi_desc->init_mutex);
2922 query_rc = acpi_nfit_query_poison(acpi_desc);
2923 tmo = __acpi_nfit_scrub(acpi_desc, query_rc);
2924 if (tmo) {
2925 queue_delayed_work(nfit_wq, &acpi_desc->dwork, tmo * HZ);
2926 acpi_desc->scrub_tmo = tmo;
2927 } else {
2928 acpi_desc->scrub_count++;
2929 if (acpi_desc->scrub_count_state)
2930 sysfs_notify_dirent(acpi_desc->scrub_count_state);
2931 }
2932 memset(acpi_desc->ars_status, 0, acpi_desc->max_ars);
2993 mutex_unlock(&acpi_desc->init_mutex); 2933 mutex_unlock(&acpi_desc->init_mutex);
2994} 2934}
2995 2935
@@ -3015,33 +2955,61 @@ static void acpi_nfit_init_ars(struct acpi_nfit_desc *acpi_desc,
3015 nfit_spa->max_ars = ars_cap.max_ars_out; 2955 nfit_spa->max_ars = ars_cap.max_ars_out;
3016 nfit_spa->clear_err_unit = ars_cap.clear_err_unit; 2956 nfit_spa->clear_err_unit = ars_cap.clear_err_unit;
3017 acpi_desc->max_ars = max(nfit_spa->max_ars, acpi_desc->max_ars); 2957 acpi_desc->max_ars = max(nfit_spa->max_ars, acpi_desc->max_ars);
2958 clear_bit(ARS_FAILED, &nfit_spa->ars_state);
2959 set_bit(ARS_REQ, &nfit_spa->ars_state);
3018} 2960}
3019 2961
3020
3021static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc) 2962static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
3022{ 2963{
3023 struct nfit_spa *nfit_spa; 2964 struct nfit_spa *nfit_spa;
2965 int rc, query_rc;
3024 2966
3025 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { 2967 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
3026 int rc, type = nfit_spa_type(nfit_spa->spa); 2968 set_bit(ARS_FAILED, &nfit_spa->ars_state);
3027 2969 switch (nfit_spa_type(nfit_spa->spa)) {
3028 /* PMEM and VMEM will be registered by the ARS workqueue */ 2970 case NFIT_SPA_VOLATILE:
3029 if (type == NFIT_SPA_PM || type == NFIT_SPA_VOLATILE) { 2971 case NFIT_SPA_PM:
3030 acpi_nfit_init_ars(acpi_desc, nfit_spa); 2972 acpi_nfit_init_ars(acpi_desc, nfit_spa);
3031 continue; 2973 break;
3032 } 2974 }
3033 /* BLK apertures belong to BLK region registration below */
3034 if (type == NFIT_SPA_BDW)
3035 continue;
3036 /* BLK regions don't need to wait for ARS results */
3037 rc = acpi_nfit_register_region(acpi_desc, nfit_spa);
3038 if (rc)
3039 return rc;
3040 } 2975 }
3041 2976
3042 acpi_desc->ars_start_flags = 0; 2977 /*
3043 if (!acpi_desc->cancel) 2978 * Reap any results that might be pending before starting new
3044 queue_work(nfit_wq, &acpi_desc->work); 2979 * short requests.
2980 */
2981 query_rc = acpi_nfit_query_poison(acpi_desc);
2982 if (query_rc == 0)
2983 ars_complete_all(acpi_desc);
2984
2985 list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
2986 switch (nfit_spa_type(nfit_spa->spa)) {
2987 case NFIT_SPA_VOLATILE:
2988 case NFIT_SPA_PM:
2989 /* register regions and kick off initial ARS run */
2990 rc = ars_register(acpi_desc, nfit_spa, &query_rc);
2991 if (rc)
2992 return rc;
2993 break;
2994 case NFIT_SPA_BDW:
2995 /* nothing to register */
2996 break;
2997 case NFIT_SPA_DCR:
2998 case NFIT_SPA_VDISK:
2999 case NFIT_SPA_VCD:
3000 case NFIT_SPA_PDISK:
3001 case NFIT_SPA_PCD:
3002 /* register known regions that don't support ARS */
3003 rc = acpi_nfit_register_region(acpi_desc, nfit_spa);
3004 if (rc)
3005 return rc;
3006 break;
3007 default:
3008 /* don't register unknown regions */
3009 break;
3010 }
3011
3012 queue_delayed_work(nfit_wq, &acpi_desc->dwork, 0);
3045 return 0; 3013 return 0;
3046} 3014}
3047 3015
@@ -3176,49 +3144,20 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz)
3176} 3144}
3177EXPORT_SYMBOL_GPL(acpi_nfit_init); 3145EXPORT_SYMBOL_GPL(acpi_nfit_init);
3178 3146
3179struct acpi_nfit_flush_work {
3180 struct work_struct work;
3181 struct completion cmp;
3182};
3183
3184static void flush_probe(struct work_struct *work)
3185{
3186 struct acpi_nfit_flush_work *flush;
3187
3188 flush = container_of(work, typeof(*flush), work);
3189 complete(&flush->cmp);
3190}
3191
3192static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc) 3147static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc)
3193{ 3148{
3194 struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); 3149 struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
3195 struct device *dev = acpi_desc->dev; 3150 struct device *dev = acpi_desc->dev;
3196 struct acpi_nfit_flush_work flush;
3197 int rc;
3198 3151
3199 /* bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */ 3152 /* Bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */
3200 device_lock(dev); 3153 device_lock(dev);
3201 device_unlock(dev); 3154 device_unlock(dev);
3202 3155
3203 /* bounce the init_mutex to make init_complete valid */ 3156 /* Bounce the init_mutex to complete initial registration */
3204 mutex_lock(&acpi_desc->init_mutex); 3157 mutex_lock(&acpi_desc->init_mutex);
3205 if (acpi_desc->cancel || acpi_desc->init_complete) {
3206 mutex_unlock(&acpi_desc->init_mutex);
3207 return 0;
3208 }
3209
3210 /*
3211 * Scrub work could take 10s of seconds, userspace may give up so we
3212 * need to be interruptible while waiting.
3213 */
3214 INIT_WORK_ONSTACK(&flush.work, flush_probe);
3215 init_completion(&flush.cmp);
3216 queue_work(nfit_wq, &flush.work);
3217 mutex_unlock(&acpi_desc->init_mutex); 3158 mutex_unlock(&acpi_desc->init_mutex);
3218 3159
3219 rc = wait_for_completion_interruptible(&flush.cmp); 3160 return 0;
3220 cancel_work_sync(&flush.work);
3221 return rc;
3222} 3161}
3223 3162
3224static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, 3163static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
@@ -3237,7 +3176,7 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
3237 * just needs guarantees that any ars it initiates are not 3176 * just needs guarantees that any ars it initiates are not
3238 * interrupted by any intervening start reqeusts from userspace. 3177 * interrupted by any intervening start reqeusts from userspace.
3239 */ 3178 */
3240 if (work_busy(&acpi_desc->work)) 3179 if (work_busy(&acpi_desc->dwork.work))
3241 return -EBUSY; 3180 return -EBUSY;
3242 3181
3243 return 0; 3182 return 0;
@@ -3246,11 +3185,9 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
3246int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags) 3185int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags)
3247{ 3186{
3248 struct device *dev = acpi_desc->dev; 3187 struct device *dev = acpi_desc->dev;
3188 int scheduled = 0, busy = 0;
3249 struct nfit_spa *nfit_spa; 3189 struct nfit_spa *nfit_spa;
3250 3190
3251 if (work_busy(&acpi_desc->work))
3252 return -EBUSY;
3253
3254 mutex_lock(&acpi_desc->init_mutex); 3191 mutex_lock(&acpi_desc->init_mutex);
3255 if (acpi_desc->cancel) { 3192 if (acpi_desc->cancel) {
3256 mutex_unlock(&acpi_desc->init_mutex); 3193 mutex_unlock(&acpi_desc->init_mutex);
@@ -3258,21 +3195,32 @@ int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags)
3258 } 3195 }
3259 3196
3260 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { 3197 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
3261 struct acpi_nfit_system_address *spa = nfit_spa->spa; 3198 int type = nfit_spa_type(nfit_spa->spa);
3262 3199
3263 if (nfit_spa_type(spa) != NFIT_SPA_PM) 3200 if (type != NFIT_SPA_PM && type != NFIT_SPA_VOLATILE)
3201 continue;
3202 if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
3264 continue; 3203 continue;
3265 3204
3266 set_bit(ARS_REQ, &nfit_spa->ars_state); 3205 if (test_and_set_bit(ARS_REQ, &nfit_spa->ars_state))
3206 busy++;
3207 else {
3208 if (test_bit(ARS_SHORT, &flags))
3209 set_bit(ARS_SHORT, &nfit_spa->ars_state);
3210 scheduled++;
3211 }
3212 }
3213 if (scheduled) {
3214 queue_delayed_work(nfit_wq, &acpi_desc->dwork, 0);
3215 dev_dbg(dev, "ars_scan triggered\n");
3267 } 3216 }
3268 acpi_desc->ars_start_flags = 0;
3269 if (test_bit(ARS_SHORT, &flags))
3270 acpi_desc->ars_start_flags |= ND_ARS_RETURN_PREV_DATA;
3271 queue_work(nfit_wq, &acpi_desc->work);
3272 dev_dbg(dev, "ars_scan triggered\n");
3273 mutex_unlock(&acpi_desc->init_mutex); 3217 mutex_unlock(&acpi_desc->init_mutex);
3274 3218
3275 return 0; 3219 if (scheduled)
3220 return 0;
3221 if (busy)
3222 return -EBUSY;
3223 return -ENOTTY;
3276} 3224}
3277 3225
3278void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev) 3226void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
@@ -3299,7 +3247,8 @@ void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
3299 INIT_LIST_HEAD(&acpi_desc->dimms); 3247 INIT_LIST_HEAD(&acpi_desc->dimms);
3300 INIT_LIST_HEAD(&acpi_desc->list); 3248 INIT_LIST_HEAD(&acpi_desc->list);
3301 mutex_init(&acpi_desc->init_mutex); 3249 mutex_init(&acpi_desc->init_mutex);
3302 INIT_WORK(&acpi_desc->work, acpi_nfit_scrub); 3250 acpi_desc->scrub_tmo = 1;
3251 INIT_DELAYED_WORK(&acpi_desc->dwork, acpi_nfit_scrub);
3303} 3252}
3304EXPORT_SYMBOL_GPL(acpi_nfit_desc_init); 3253EXPORT_SYMBOL_GPL(acpi_nfit_desc_init);
3305 3254
@@ -3323,6 +3272,7 @@ void acpi_nfit_shutdown(void *data)
3323 3272
3324 mutex_lock(&acpi_desc->init_mutex); 3273 mutex_lock(&acpi_desc->init_mutex);
3325 acpi_desc->cancel = 1; 3274 acpi_desc->cancel = 1;
3275 cancel_delayed_work_sync(&acpi_desc->dwork);
3326 mutex_unlock(&acpi_desc->init_mutex); 3276 mutex_unlock(&acpi_desc->init_mutex);
3327 3277
3328 /* 3278 /*
diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h
index 45e7949986a8..7d15856a739f 100644
--- a/drivers/acpi/nfit/nfit.h
+++ b/drivers/acpi/nfit/nfit.h
@@ -197,18 +197,18 @@ struct acpi_nfit_desc {
197 struct device *dev; 197 struct device *dev;
198 u8 ars_start_flags; 198 u8 ars_start_flags;
199 struct nd_cmd_ars_status *ars_status; 199 struct nd_cmd_ars_status *ars_status;
200 struct work_struct work; 200 struct delayed_work dwork;
201 struct list_head list; 201 struct list_head list;
202 struct kernfs_node *scrub_count_state; 202 struct kernfs_node *scrub_count_state;
203 unsigned int max_ars; 203 unsigned int max_ars;
204 unsigned int scrub_count; 204 unsigned int scrub_count;
205 unsigned int scrub_mode; 205 unsigned int scrub_mode;
206 unsigned int cancel:1; 206 unsigned int cancel:1;
207 unsigned int init_complete:1;
208 unsigned long dimm_cmd_force_en; 207 unsigned long dimm_cmd_force_en;
209 unsigned long bus_cmd_force_en; 208 unsigned long bus_cmd_force_en;
210 unsigned long bus_nfit_cmd_force_en; 209 unsigned long bus_nfit_cmd_force_en;
211 unsigned int platform_cap; 210 unsigned int platform_cap;
211 unsigned int scrub_tmo;
212 int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, 212 int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
213 void *iobuf, u64 len, int rw); 213 void *iobuf, u64 len, int rw);
214}; 214};