diff options
author | Haggai Eran <haggaie@mellanox.com> | 2014-12-11 10:04:23 -0500 |
---|---|---|
committer | Roland Dreier <roland@purestorage.com> | 2014-12-15 21:19:03 -0500 |
commit | 6aec21f6a8322fa8d43df3ea7f051dfd8967f1b9 (patch) | |
tree | de572339272023be68f0d5c2512cd719fb033f40 | |
parent | 832a6b06ab5e13c228fc27e333ad360aa03ace6f (diff) |
IB/mlx5: Page faults handling infrastructure
* Refactor MR registration and cleanup, and fix reg_pages accounting.
* Create a work queue to handle page fault events in a kthread context.
* Register a fault handler to get events from the core for each QP.
The registered fault handler is empty in this patch, and only a later
patch implements it.
Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Shachar Raindel <raindel@mellanox.com>
Signed-off-by: Haggai Eran <haggaie@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
-rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 31 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mlx5_ib.h | 67 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mr.c | 45 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/odp.c | 145 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/qp.c | 26 | ||||
-rw-r--r-- | include/linux/mlx5/driver.h | 2 |
6 files changed, 294 insertions, 22 deletions
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index e6d775f2446d..a801baa79c8e 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c | |||
@@ -864,7 +864,7 @@ static ssize_t show_reg_pages(struct device *device, | |||
864 | struct mlx5_ib_dev *dev = | 864 | struct mlx5_ib_dev *dev = |
865 | container_of(device, struct mlx5_ib_dev, ib_dev.dev); | 865 | container_of(device, struct mlx5_ib_dev, ib_dev.dev); |
866 | 866 | ||
867 | return sprintf(buf, "%d\n", dev->mdev->priv.reg_pages); | 867 | return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages)); |
868 | } | 868 | } |
869 | 869 | ||
870 | static ssize_t show_hca(struct device *device, struct device_attribute *attr, | 870 | static ssize_t show_hca(struct device *device, struct device_attribute *attr, |
@@ -1389,16 +1389,19 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) | |||
1389 | goto err_eqs; | 1389 | goto err_eqs; |
1390 | 1390 | ||
1391 | mutex_init(&dev->cap_mask_mutex); | 1391 | mutex_init(&dev->cap_mask_mutex); |
1392 | spin_lock_init(&dev->mr_lock); | ||
1393 | 1392 | ||
1394 | err = create_dev_resources(&dev->devr); | 1393 | err = create_dev_resources(&dev->devr); |
1395 | if (err) | 1394 | if (err) |
1396 | goto err_eqs; | 1395 | goto err_eqs; |
1397 | 1396 | ||
1398 | err = ib_register_device(&dev->ib_dev, NULL); | 1397 | err = mlx5_ib_odp_init_one(dev); |
1399 | if (err) | 1398 | if (err) |
1400 | goto err_rsrc; | 1399 | goto err_rsrc; |
1401 | 1400 | ||
1401 | err = ib_register_device(&dev->ib_dev, NULL); | ||
1402 | if (err) | ||
1403 | goto err_odp; | ||
1404 | |||
1402 | err = create_umr_res(dev); | 1405 | err = create_umr_res(dev); |
1403 | if (err) | 1406 | if (err) |
1404 | goto err_dev; | 1407 | goto err_dev; |
@@ -1420,6 +1423,9 @@ err_umrc: | |||
1420 | err_dev: | 1423 | err_dev: |
1421 | ib_unregister_device(&dev->ib_dev); | 1424 | ib_unregister_device(&dev->ib_dev); |
1422 | 1425 | ||
1426 | err_odp: | ||
1427 | mlx5_ib_odp_remove_one(dev); | ||
1428 | |||
1423 | err_rsrc: | 1429 | err_rsrc: |
1424 | destroy_dev_resources(&dev->devr); | 1430 | destroy_dev_resources(&dev->devr); |
1425 | 1431 | ||
@@ -1435,8 +1441,10 @@ err_dealloc: | |||
1435 | static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) | 1441 | static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) |
1436 | { | 1442 | { |
1437 | struct mlx5_ib_dev *dev = context; | 1443 | struct mlx5_ib_dev *dev = context; |
1444 | |||
1438 | ib_unregister_device(&dev->ib_dev); | 1445 | ib_unregister_device(&dev->ib_dev); |
1439 | destroy_umrc_res(dev); | 1446 | destroy_umrc_res(dev); |
1447 | mlx5_ib_odp_remove_one(dev); | ||
1440 | destroy_dev_resources(&dev->devr); | 1448 | destroy_dev_resources(&dev->devr); |
1441 | free_comp_eqs(dev); | 1449 | free_comp_eqs(dev); |
1442 | ib_dealloc_device(&dev->ib_dev); | 1450 | ib_dealloc_device(&dev->ib_dev); |
@@ -1450,15 +1458,30 @@ static struct mlx5_interface mlx5_ib_interface = { | |||
1450 | 1458 | ||
1451 | static int __init mlx5_ib_init(void) | 1459 | static int __init mlx5_ib_init(void) |
1452 | { | 1460 | { |
1461 | int err; | ||
1462 | |||
1453 | if (deprecated_prof_sel != 2) | 1463 | if (deprecated_prof_sel != 2) |
1454 | pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n"); | 1464 | pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n"); |
1455 | 1465 | ||
1456 | return mlx5_register_interface(&mlx5_ib_interface); | 1466 | err = mlx5_ib_odp_init(); |
1467 | if (err) | ||
1468 | return err; | ||
1469 | |||
1470 | err = mlx5_register_interface(&mlx5_ib_interface); | ||
1471 | if (err) | ||
1472 | goto clean_odp; | ||
1473 | |||
1474 | return err; | ||
1475 | |||
1476 | clean_odp: | ||
1477 | mlx5_ib_odp_cleanup(); | ||
1478 | return err; | ||
1457 | } | 1479 | } |
1458 | 1480 | ||
1459 | static void __exit mlx5_ib_cleanup(void) | 1481 | static void __exit mlx5_ib_cleanup(void) |
1460 | { | 1482 | { |
1461 | mlx5_unregister_interface(&mlx5_ib_interface); | 1483 | mlx5_unregister_interface(&mlx5_ib_interface); |
1484 | mlx5_ib_odp_cleanup(); | ||
1462 | } | 1485 | } |
1463 | 1486 | ||
1464 | module_init(mlx5_ib_init); | 1487 | module_init(mlx5_ib_init); |
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 6856e27bfb6a..c6ceec3e3d6a 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h | |||
@@ -149,6 +149,29 @@ enum { | |||
149 | MLX5_QP_EMPTY | 149 | MLX5_QP_EMPTY |
150 | }; | 150 | }; |
151 | 151 | ||
152 | /* | ||
153 | * Connect-IB can trigger up to four concurrent pagefaults | ||
154 | * per-QP. | ||
155 | */ | ||
156 | enum mlx5_ib_pagefault_context { | ||
157 | MLX5_IB_PAGEFAULT_RESPONDER_READ, | ||
158 | MLX5_IB_PAGEFAULT_REQUESTOR_READ, | ||
159 | MLX5_IB_PAGEFAULT_RESPONDER_WRITE, | ||
160 | MLX5_IB_PAGEFAULT_REQUESTOR_WRITE, | ||
161 | MLX5_IB_PAGEFAULT_CONTEXTS | ||
162 | }; | ||
163 | |||
164 | static inline enum mlx5_ib_pagefault_context | ||
165 | mlx5_ib_get_pagefault_context(struct mlx5_pagefault *pagefault) | ||
166 | { | ||
167 | return pagefault->flags & (MLX5_PFAULT_REQUESTOR | MLX5_PFAULT_WRITE); | ||
168 | } | ||
169 | |||
170 | struct mlx5_ib_pfault { | ||
171 | struct work_struct work; | ||
172 | struct mlx5_pagefault mpfault; | ||
173 | }; | ||
174 | |||
152 | struct mlx5_ib_qp { | 175 | struct mlx5_ib_qp { |
153 | struct ib_qp ibqp; | 176 | struct ib_qp ibqp; |
154 | struct mlx5_core_qp mqp; | 177 | struct mlx5_core_qp mqp; |
@@ -194,6 +217,21 @@ struct mlx5_ib_qp { | |||
194 | 217 | ||
195 | /* Store signature errors */ | 218 | /* Store signature errors */ |
196 | bool signature_en; | 219 | bool signature_en; |
220 | |||
221 | #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING | ||
222 | /* | ||
223 | * A flag that is true for QP's that are in a state that doesn't | ||
224 | * allow page faults, and shouldn't schedule any more faults. | ||
225 | */ | ||
226 | int disable_page_faults; | ||
227 | /* | ||
228 | * The disable_page_faults_lock protects a QP's disable_page_faults | ||
229 | * field, allowing for a thread to atomically check whether the QP | ||
230 | * allows page faults, and if so schedule a page fault. | ||
231 | */ | ||
232 | spinlock_t disable_page_faults_lock; | ||
233 | struct mlx5_ib_pfault pagefaults[MLX5_IB_PAGEFAULT_CONTEXTS]; | ||
234 | #endif | ||
197 | }; | 235 | }; |
198 | 236 | ||
199 | struct mlx5_ib_cq_buf { | 237 | struct mlx5_ib_cq_buf { |
@@ -392,13 +430,17 @@ struct mlx5_ib_dev { | |||
392 | struct umr_common umrc; | 430 | struct umr_common umrc; |
393 | /* sync used page count stats | 431 | /* sync used page count stats |
394 | */ | 432 | */ |
395 | spinlock_t mr_lock; | ||
396 | struct mlx5_ib_resources devr; | 433 | struct mlx5_ib_resources devr; |
397 | struct mlx5_mr_cache cache; | 434 | struct mlx5_mr_cache cache; |
398 | struct timer_list delay_timer; | 435 | struct timer_list delay_timer; |
399 | int fill_delay; | 436 | int fill_delay; |
400 | #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING | 437 | #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING |
401 | struct ib_odp_caps odp_caps; | 438 | struct ib_odp_caps odp_caps; |
439 | /* | ||
440 | * Sleepable RCU that prevents destruction of MRs while they are still | ||
441 | * being used by a page fault handler. | ||
442 | */ | ||
443 | struct srcu_struct mr_srcu; | ||
402 | #endif | 444 | #endif |
403 | }; | 445 | }; |
404 | 446 | ||
@@ -575,12 +617,33 @@ int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, | |||
575 | struct ib_mr_status *mr_status); | 617 | struct ib_mr_status *mr_status); |
576 | 618 | ||
577 | #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING | 619 | #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING |
620 | extern struct workqueue_struct *mlx5_ib_page_fault_wq; | ||
621 | |||
578 | int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev); | 622 | int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev); |
579 | #else | 623 | void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp, |
624 | struct mlx5_ib_pfault *pfault); | ||
625 | void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp); | ||
626 | int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev); | ||
627 | void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev); | ||
628 | int __init mlx5_ib_odp_init(void); | ||
629 | void mlx5_ib_odp_cleanup(void); | ||
630 | void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp); | ||
631 | void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp); | ||
632 | |||
633 | #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ | ||
580 | static inline int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev) | 634 | static inline int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev) |
581 | { | 635 | { |
582 | return 0; | 636 | return 0; |
583 | } | 637 | } |
638 | |||
639 | static inline void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp) {} | ||
640 | static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; } | ||
641 | static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) {} | ||
642 | static inline int mlx5_ib_odp_init(void) { return 0; } | ||
643 | static inline void mlx5_ib_odp_cleanup(void) {} | ||
644 | static inline void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp) {} | ||
645 | static inline void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp) {} | ||
646 | |||
584 | #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ | 647 | #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ |
585 | 648 | ||
586 | static inline void init_query_mad(struct ib_smp *mad) | 649 | static inline void init_query_mad(struct ib_smp *mad) |
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 38b06267798e..922ac85b7198 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c | |||
@@ -52,6 +52,8 @@ static __be64 mlx5_ib_update_mtt_emergency_buffer[ | |||
52 | static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex); | 52 | static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex); |
53 | #endif | 53 | #endif |
54 | 54 | ||
55 | static int clean_mr(struct mlx5_ib_mr *mr); | ||
56 | |||
55 | static int order2idx(struct mlx5_ib_dev *dev, int order) | 57 | static int order2idx(struct mlx5_ib_dev *dev, int order) |
56 | { | 58 | { |
57 | struct mlx5_mr_cache *cache = &dev->cache; | 59 | struct mlx5_mr_cache *cache = &dev->cache; |
@@ -1049,6 +1051,10 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, | |||
1049 | mlx5_ib_dbg(dev, "cache empty for order %d", order); | 1051 | mlx5_ib_dbg(dev, "cache empty for order %d", order); |
1050 | mr = NULL; | 1052 | mr = NULL; |
1051 | } | 1053 | } |
1054 | } else if (access_flags & IB_ACCESS_ON_DEMAND) { | ||
1055 | err = -EINVAL; | ||
1056 | pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB"); | ||
1057 | goto error; | ||
1052 | } | 1058 | } |
1053 | 1059 | ||
1054 | if (!mr) | 1060 | if (!mr) |
@@ -1064,9 +1070,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, | |||
1064 | 1070 | ||
1065 | mr->umem = umem; | 1071 | mr->umem = umem; |
1066 | mr->npages = npages; | 1072 | mr->npages = npages; |
1067 | spin_lock(&dev->mr_lock); | 1073 | atomic_add(npages, &dev->mdev->priv.reg_pages); |
1068 | dev->mdev->priv.reg_pages += npages; | ||
1069 | spin_unlock(&dev->mr_lock); | ||
1070 | mr->ibmr.lkey = mr->mmr.key; | 1074 | mr->ibmr.lkey = mr->mmr.key; |
1071 | mr->ibmr.rkey = mr->mmr.key; | 1075 | mr->ibmr.rkey = mr->mmr.key; |
1072 | 1076 | ||
@@ -1110,12 +1114,9 @@ error: | |||
1110 | return err; | 1114 | return err; |
1111 | } | 1115 | } |
1112 | 1116 | ||
1113 | int mlx5_ib_dereg_mr(struct ib_mr *ibmr) | 1117 | static int clean_mr(struct mlx5_ib_mr *mr) |
1114 | { | 1118 | { |
1115 | struct mlx5_ib_dev *dev = to_mdev(ibmr->device); | 1119 | struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); |
1116 | struct mlx5_ib_mr *mr = to_mmr(ibmr); | ||
1117 | struct ib_umem *umem = mr->umem; | ||
1118 | int npages = mr->npages; | ||
1119 | int umred = mr->umred; | 1120 | int umred = mr->umred; |
1120 | int err; | 1121 | int err; |
1121 | 1122 | ||
@@ -1135,16 +1136,32 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr) | |||
1135 | free_cached_mr(dev, mr); | 1136 | free_cached_mr(dev, mr); |
1136 | } | 1137 | } |
1137 | 1138 | ||
1139 | if (!umred) | ||
1140 | kfree(mr); | ||
1141 | |||
1142 | return 0; | ||
1143 | } | ||
1144 | |||
1145 | int mlx5_ib_dereg_mr(struct ib_mr *ibmr) | ||
1146 | { | ||
1147 | struct mlx5_ib_dev *dev = to_mdev(ibmr->device); | ||
1148 | struct mlx5_ib_mr *mr = to_mmr(ibmr); | ||
1149 | int npages = mr->npages; | ||
1150 | struct ib_umem *umem = mr->umem; | ||
1151 | |||
1152 | #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING | ||
1153 | if (umem) | ||
1154 | /* Wait for all running page-fault handlers to finish. */ | ||
1155 | synchronize_srcu(&dev->mr_srcu); | ||
1156 | #endif | ||
1157 | |||
1158 | clean_mr(mr); | ||
1159 | |||
1138 | if (umem) { | 1160 | if (umem) { |
1139 | ib_umem_release(umem); | 1161 | ib_umem_release(umem); |
1140 | spin_lock(&dev->mr_lock); | 1162 | atomic_sub(npages, &dev->mdev->priv.reg_pages); |
1141 | dev->mdev->priv.reg_pages -= npages; | ||
1142 | spin_unlock(&dev->mr_lock); | ||
1143 | } | 1163 | } |
1144 | 1164 | ||
1145 | if (!umred) | ||
1146 | kfree(mr); | ||
1147 | |||
1148 | return 0; | 1165 | return 0; |
1149 | } | 1166 | } |
1150 | 1167 | ||
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 66c39ee16aff..63bbdba396f1 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c | |||
@@ -32,6 +32,8 @@ | |||
32 | 32 | ||
33 | #include "mlx5_ib.h" | 33 | #include "mlx5_ib.h" |
34 | 34 | ||
35 | struct workqueue_struct *mlx5_ib_page_fault_wq; | ||
36 | |||
35 | #define COPY_ODP_BIT_MLX_TO_IB(reg, ib_caps, field_name, bit_name) do { \ | 37 | #define COPY_ODP_BIT_MLX_TO_IB(reg, ib_caps, field_name, bit_name) do { \ |
36 | if (be32_to_cpu(reg.field_name) & MLX5_ODP_SUPPORT_##bit_name) \ | 38 | if (be32_to_cpu(reg.field_name) & MLX5_ODP_SUPPORT_##bit_name) \ |
37 | ib_caps->field_name |= IB_ODP_SUPPORT_##bit_name; \ | 39 | ib_caps->field_name |= IB_ODP_SUPPORT_##bit_name; \ |
@@ -58,3 +60,146 @@ int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev) | |||
58 | out: | 60 | out: |
59 | return err; | 61 | return err; |
60 | } | 62 | } |
63 | |||
64 | static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev, | ||
65 | u32 key) | ||
66 | { | ||
67 | u32 base_key = mlx5_base_mkey(key); | ||
68 | struct mlx5_core_mr *mmr = __mlx5_mr_lookup(dev->mdev, base_key); | ||
69 | |||
70 | if (!mmr || mmr->key != key) | ||
71 | return NULL; | ||
72 | |||
73 | return container_of(mmr, struct mlx5_ib_mr, mmr); | ||
74 | } | ||
75 | |||
76 | static void mlx5_ib_page_fault_resume(struct mlx5_ib_qp *qp, | ||
77 | struct mlx5_ib_pfault *pfault, | ||
78 | int error) { | ||
79 | struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device); | ||
80 | int ret = mlx5_core_page_fault_resume(dev->mdev, qp->mqp.qpn, | ||
81 | pfault->mpfault.flags, | ||
82 | error); | ||
83 | if (ret) | ||
84 | pr_err("Failed to resolve the page fault on QP 0x%x\n", | ||
85 | qp->mqp.qpn); | ||
86 | } | ||
87 | |||
88 | void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp, | ||
89 | struct mlx5_ib_pfault *pfault) | ||
90 | { | ||
91 | u8 event_subtype = pfault->mpfault.event_subtype; | ||
92 | |||
93 | switch (event_subtype) { | ||
94 | default: | ||
95 | pr_warn("Invalid page fault event subtype: 0x%x\n", | ||
96 | event_subtype); | ||
97 | mlx5_ib_page_fault_resume(qp, pfault, 1); | ||
98 | break; | ||
99 | } | ||
100 | } | ||
101 | |||
102 | static void mlx5_ib_qp_pfault_action(struct work_struct *work) | ||
103 | { | ||
104 | struct mlx5_ib_pfault *pfault = container_of(work, | ||
105 | struct mlx5_ib_pfault, | ||
106 | work); | ||
107 | enum mlx5_ib_pagefault_context context = | ||
108 | mlx5_ib_get_pagefault_context(&pfault->mpfault); | ||
109 | struct mlx5_ib_qp *qp = container_of(pfault, struct mlx5_ib_qp, | ||
110 | pagefaults[context]); | ||
111 | mlx5_ib_mr_pfault_handler(qp, pfault); | ||
112 | } | ||
113 | |||
114 | void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp) | ||
115 | { | ||
116 | unsigned long flags; | ||
117 | |||
118 | spin_lock_irqsave(&qp->disable_page_faults_lock, flags); | ||
119 | qp->disable_page_faults = 1; | ||
120 | spin_unlock_irqrestore(&qp->disable_page_faults_lock, flags); | ||
121 | |||
122 | /* | ||
123 | * Note that at this point, we are guarenteed that no more | ||
124 | * work queue elements will be posted to the work queue with | ||
125 | * the QP we are closing. | ||
126 | */ | ||
127 | flush_workqueue(mlx5_ib_page_fault_wq); | ||
128 | } | ||
129 | |||
130 | void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp) | ||
131 | { | ||
132 | unsigned long flags; | ||
133 | |||
134 | spin_lock_irqsave(&qp->disable_page_faults_lock, flags); | ||
135 | qp->disable_page_faults = 0; | ||
136 | spin_unlock_irqrestore(&qp->disable_page_faults_lock, flags); | ||
137 | } | ||
138 | |||
139 | static void mlx5_ib_pfault_handler(struct mlx5_core_qp *qp, | ||
140 | struct mlx5_pagefault *pfault) | ||
141 | { | ||
142 | /* | ||
143 | * Note that we will only get one fault event per QP per context | ||
144 | * (responder/initiator, read/write), until we resolve the page fault | ||
145 | * with the mlx5_ib_page_fault_resume command. Since this function is | ||
146 | * called from within the work element, there is no risk of missing | ||
147 | * events. | ||
148 | */ | ||
149 | struct mlx5_ib_qp *mibqp = to_mibqp(qp); | ||
150 | enum mlx5_ib_pagefault_context context = | ||
151 | mlx5_ib_get_pagefault_context(pfault); | ||
152 | struct mlx5_ib_pfault *qp_pfault = &mibqp->pagefaults[context]; | ||
153 | |||
154 | qp_pfault->mpfault = *pfault; | ||
155 | |||
156 | /* No need to stop interrupts here since we are in an interrupt */ | ||
157 | spin_lock(&mibqp->disable_page_faults_lock); | ||
158 | if (!mibqp->disable_page_faults) | ||
159 | queue_work(mlx5_ib_page_fault_wq, &qp_pfault->work); | ||
160 | spin_unlock(&mibqp->disable_page_faults_lock); | ||
161 | } | ||
162 | |||
163 | void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp) | ||
164 | { | ||
165 | int i; | ||
166 | |||
167 | qp->disable_page_faults = 1; | ||
168 | spin_lock_init(&qp->disable_page_faults_lock); | ||
169 | |||
170 | qp->mqp.pfault_handler = mlx5_ib_pfault_handler; | ||
171 | |||
172 | for (i = 0; i < MLX5_IB_PAGEFAULT_CONTEXTS; ++i) | ||
173 | INIT_WORK(&qp->pagefaults[i].work, mlx5_ib_qp_pfault_action); | ||
174 | } | ||
175 | |||
176 | int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) | ||
177 | { | ||
178 | int ret; | ||
179 | |||
180 | ret = init_srcu_struct(&ibdev->mr_srcu); | ||
181 | if (ret) | ||
182 | return ret; | ||
183 | |||
184 | return 0; | ||
185 | } | ||
186 | |||
187 | void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) | ||
188 | { | ||
189 | cleanup_srcu_struct(&ibdev->mr_srcu); | ||
190 | } | ||
191 | |||
192 | int __init mlx5_ib_odp_init(void) | ||
193 | { | ||
194 | mlx5_ib_page_fault_wq = | ||
195 | create_singlethread_workqueue("mlx5_ib_page_faults"); | ||
196 | if (!mlx5_ib_page_fault_wq) | ||
197 | return -ENOMEM; | ||
198 | |||
199 | return 0; | ||
200 | } | ||
201 | |||
202 | void mlx5_ib_odp_cleanup(void) | ||
203 | { | ||
204 | destroy_workqueue(mlx5_ib_page_fault_wq); | ||
205 | } | ||
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 9783c3342dbf..be0cd358b080 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c | |||
@@ -876,6 +876,8 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, | |||
876 | int inlen = sizeof(*in); | 876 | int inlen = sizeof(*in); |
877 | int err; | 877 | int err; |
878 | 878 | ||
879 | mlx5_ib_odp_create_qp(qp); | ||
880 | |||
879 | gen = &dev->mdev->caps.gen; | 881 | gen = &dev->mdev->caps.gen; |
880 | mutex_init(&qp->mutex); | 882 | mutex_init(&qp->mutex); |
881 | spin_lock_init(&qp->sq.lock); | 883 | spin_lock_init(&qp->sq.lock); |
@@ -1160,11 +1162,13 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) | |||
1160 | in = kzalloc(sizeof(*in), GFP_KERNEL); | 1162 | in = kzalloc(sizeof(*in), GFP_KERNEL); |
1161 | if (!in) | 1163 | if (!in) |
1162 | return; | 1164 | return; |
1163 | if (qp->state != IB_QPS_RESET) | 1165 | if (qp->state != IB_QPS_RESET) { |
1166 | mlx5_ib_qp_disable_pagefaults(qp); | ||
1164 | if (mlx5_core_qp_modify(dev->mdev, to_mlx5_state(qp->state), | 1167 | if (mlx5_core_qp_modify(dev->mdev, to_mlx5_state(qp->state), |
1165 | MLX5_QP_STATE_RST, in, sizeof(*in), &qp->mqp)) | 1168 | MLX5_QP_STATE_RST, in, sizeof(*in), &qp->mqp)) |
1166 | mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n", | 1169 | mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n", |
1167 | qp->mqp.qpn); | 1170 | qp->mqp.qpn); |
1171 | } | ||
1168 | 1172 | ||
1169 | get_cqs(qp, &send_cq, &recv_cq); | 1173 | get_cqs(qp, &send_cq, &recv_cq); |
1170 | 1174 | ||
@@ -1712,6 +1716,15 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, | |||
1712 | if (mlx5_st < 0) | 1716 | if (mlx5_st < 0) |
1713 | goto out; | 1717 | goto out; |
1714 | 1718 | ||
1719 | /* If moving to a reset or error state, we must disable page faults on | ||
1720 | * this QP and flush all current page faults. Otherwise a stale page | ||
1721 | * fault may attempt to work on this QP after it is reset and moved | ||
1722 | * again to RTS, and may cause the driver and the device to get out of | ||
1723 | * sync. */ | ||
1724 | if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR && | ||
1725 | (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR)) | ||
1726 | mlx5_ib_qp_disable_pagefaults(qp); | ||
1727 | |||
1715 | optpar = ib_mask_to_mlx5_opt(attr_mask); | 1728 | optpar = ib_mask_to_mlx5_opt(attr_mask); |
1716 | optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st]; | 1729 | optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st]; |
1717 | in->optparam = cpu_to_be32(optpar); | 1730 | in->optparam = cpu_to_be32(optpar); |
@@ -1721,6 +1734,9 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, | |||
1721 | if (err) | 1734 | if (err) |
1722 | goto out; | 1735 | goto out; |
1723 | 1736 | ||
1737 | if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) | ||
1738 | mlx5_ib_qp_enable_pagefaults(qp); | ||
1739 | |||
1724 | qp->state = new_state; | 1740 | qp->state = new_state; |
1725 | 1741 | ||
1726 | if (attr_mask & IB_QP_ACCESS_FLAGS) | 1742 | if (attr_mask & IB_QP_ACCESS_FLAGS) |
@@ -3026,6 +3042,14 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr | |||
3026 | int mlx5_state; | 3042 | int mlx5_state; |
3027 | int err = 0; | 3043 | int err = 0; |
3028 | 3044 | ||
3045 | #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING | ||
3046 | /* | ||
3047 | * Wait for any outstanding page faults, in case the user frees memory | ||
3048 | * based upon this query's result. | ||
3049 | */ | ||
3050 | flush_workqueue(mlx5_ib_page_fault_wq); | ||
3051 | #endif | ||
3052 | |||
3029 | mutex_lock(&qp->mutex); | 3053 | mutex_lock(&qp->mutex); |
3030 | outb = kzalloc(sizeof(*outb), GFP_KERNEL); | 3054 | outb = kzalloc(sizeof(*outb), GFP_KERNEL); |
3031 | if (!outb) { | 3055 | if (!outb) { |
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 7088dcd19214..166d9315fe4b 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h | |||
@@ -474,7 +474,7 @@ struct mlx5_priv { | |||
474 | struct workqueue_struct *pg_wq; | 474 | struct workqueue_struct *pg_wq; |
475 | struct rb_root page_root; | 475 | struct rb_root page_root; |
476 | int fw_pages; | 476 | int fw_pages; |
477 | int reg_pages; | 477 | atomic_t reg_pages; |
478 | struct list_head free_list; | 478 | struct list_head free_list; |
479 | 479 | ||
480 | struct mlx5_core_health health; | 480 | struct mlx5_core_health health; |