aboutsummaryrefslogtreecommitdiffstats
path: root/include/rdma
diff options
context:
space:
mode:
authorHaggai Eran <haggaie@mellanox.com>2014-12-11 10:04:18 -0500
committerRoland Dreier <roland@purestorage.com>2014-12-15 21:13:36 -0500
commit882214e2b12860bff1ccff15a3ec2bbb29d58c02 (patch)
treea3609ca71cec22f0c80b4f1b3d5bebf8024051bb /include/rdma
parent8ada2c1c0c1d75a60723cd2ca7d49c594a146af6 (diff)
IB/core: Implement support for MMU notifiers regarding on demand paging regions
* Add an interval tree implementation for ODP umems. Create an interval tree for each ucontext (including a count of the number of ODP MRs in this context, semaphore, etc.), and register ODP umems in the interval tree. * Add MMU notifiers handling functions, using the interval tree to notify only the relevant umems and underlying MRs. * Register to receive MMU notifier events from the MM subsystem upon ODP MR registration (and unregister accordingly). * Add a completion object to synchronize the destruction of ODP umems. * Add mechanism to abort page faults when there's a concurrent invalidation. The way we synchronize between concurrent invalidations and page faults is by keeping a counter of currently running invalidations, and a sequence number that is incremented whenever an invalidation is caught. The page fault code checks the counter and also verifies that the sequence number hasn't progressed before it updates the umem's page tables. This is similar to what the kvm module does. In order to prevent the case where we register a umem in the middle of an ongoing notifier, we also keep a per ucontext counter of the total number of active mmu notifiers. We only enable new umems when all the running notifiers complete. Signed-off-by: Sagi Grimberg <sagig@mellanox.com> Signed-off-by: Shachar Raindel <raindel@mellanox.com> Signed-off-by: Haggai Eran <haggaie@mellanox.com> Signed-off-by: Yuval Dagan <yuvalda@mellanox.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'include/rdma')
-rw-r--r--include/rdma/ib_umem_odp.h65
-rw-r--r--include/rdma/ib_verbs.h19
2 files changed, 83 insertions, 1 deletions
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
index b5a2df1923b7..3da0b167041b 100644
--- a/include/rdma/ib_umem_odp.h
+++ b/include/rdma/ib_umem_odp.h
@@ -34,6 +34,13 @@
34#define IB_UMEM_ODP_H 34#define IB_UMEM_ODP_H
35 35
36#include <rdma/ib_umem.h> 36#include <rdma/ib_umem.h>
37#include <rdma/ib_verbs.h>
38#include <linux/interval_tree.h>
39
40struct umem_odp_node {
41 u64 __subtree_last;
42 struct rb_node rb;
43};
37 44
38struct ib_umem_odp { 45struct ib_umem_odp {
39 /* 46 /*
@@ -51,10 +58,27 @@ struct ib_umem_odp {
51 dma_addr_t *dma_list; 58 dma_addr_t *dma_list;
52 /* 59 /*
53 * The umem_mutex protects the page_list and dma_list fields of an ODP 60 * The umem_mutex protects the page_list and dma_list fields of an ODP
54 * umem, allowing only a single thread to map/unmap pages. 61 * umem, allowing only a single thread to map/unmap pages. The mutex
62 * also protects access to the mmu notifier counters.
55 */ 63 */
56 struct mutex umem_mutex; 64 struct mutex umem_mutex;
57 void *private; /* for the HW driver to use. */ 65 void *private; /* for the HW driver to use. */
66
67 /* When false, use the notifier counter in the ucontext struct. */
68 bool mn_counters_active;
69 int notifiers_seq;
70 int notifiers_count;
71
72 /* A linked list of umems that don't have private mmu notifier
73 * counters yet. */
74 struct list_head no_private_counters;
75 struct ib_umem *umem;
76
77 /* Tree tracking */
78 struct umem_odp_node interval_tree;
79
80 struct completion notifier_completion;
81 int dying;
58}; 82};
59 83
60#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 84#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
@@ -82,6 +106,45 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bcnt,
82void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 start_offset, 106void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 start_offset,
83 u64 bound); 107 u64 bound);
84 108
109void rbt_ib_umem_insert(struct umem_odp_node *node, struct rb_root *root);
110void rbt_ib_umem_remove(struct umem_odp_node *node, struct rb_root *root);
111typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end,
112 void *cookie);
113/*
114 * Call the callback on each ib_umem in the range. Returns the logical or of
115 * the return values of the functions called.
116 */
117int rbt_ib_umem_for_each_in_range(struct rb_root *root, u64 start, u64 end,
118 umem_call_back cb, void *cookie);
119
120struct umem_odp_node *rbt_ib_umem_iter_first(struct rb_root *root,
121 u64 start, u64 last);
122struct umem_odp_node *rbt_ib_umem_iter_next(struct umem_odp_node *node,
123 u64 start, u64 last);
124
125static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item,
126 unsigned long mmu_seq)
127{
128 /*
129 * This code is strongly based on the KVM code from
130 * mmu_notifier_retry. Should be called with
131 * the relevant locks taken (item->odp_data->umem_mutex
132 * and the ucontext umem_mutex semaphore locked for read).
133 */
134
135 /* Do not allow page faults while the new ib_umem hasn't seen a state
136 * with zero notifiers yet, and doesn't have its own valid set of
137 * private counters. */
138 if (!item->odp_data->mn_counters_active)
139 return 1;
140
141 if (unlikely(item->odp_data->notifiers_count))
142 return 1;
143 if (item->odp_data->notifiers_seq != mmu_seq)
144 return 1;
145 return 0;
146}
147
85#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ 148#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
86 149
87static inline int ib_umem_odp_get(struct ib_ucontext *context, 150static inline int ib_umem_odp_get(struct ib_ucontext *context,
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 3af5dcad1b69..0d74f1de99aa 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -51,6 +51,7 @@
51#include <uapi/linux/if_ether.h> 51#include <uapi/linux/if_ether.h>
52 52
53#include <linux/atomic.h> 53#include <linux/atomic.h>
54#include <linux/mmu_notifier.h>
54#include <asm/uaccess.h> 55#include <asm/uaccess.h>
55 56
56extern struct workqueue_struct *ib_wq; 57extern struct workqueue_struct *ib_wq;
@@ -1139,6 +1140,8 @@ struct ib_fmr_attr {
1139 u8 page_shift; 1140 u8 page_shift;
1140}; 1141};
1141 1142
1143struct ib_umem;
1144
1142struct ib_ucontext { 1145struct ib_ucontext {
1143 struct ib_device *device; 1146 struct ib_device *device;
1144 struct list_head pd_list; 1147 struct list_head pd_list;
@@ -1153,6 +1156,22 @@ struct ib_ucontext {
1153 int closing; 1156 int closing;
1154 1157
1155 struct pid *tgid; 1158 struct pid *tgid;
1159#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1160 struct rb_root umem_tree;
1161 /*
1162 * Protects .umem_rbroot and tree, as well as odp_mrs_count and
1163 * mmu notifiers registration.
1164 */
1165 struct rw_semaphore umem_rwsem;
1166 void (*invalidate_range)(struct ib_umem *umem,
1167 unsigned long start, unsigned long end);
1168
1169 struct mmu_notifier mn;
1170 atomic_t notifier_count;
1171 /* A list of umems that don't have private mmu notifier counters yet. */
1172 struct list_head no_private_counters;
1173 int odp_mrs_count;
1174#endif
1156}; 1175};
1157 1176
1158struct ib_uobject { 1177struct ib_uobject {