aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMitko Haralanov <mitko.haralanov@intel.com>2016-04-12 13:45:57 -0400
committerDoug Ledford <dledford@redhat.com>2016-04-28 12:00:38 -0400
commitf19bd643dbded8672bfeffe9e51322464e4a9239 (patch)
tree38b79afd324c01c2090ddc21e03d48c82010ff1f
parente7d2c25d94bf4bb6f73d185e5514414a15a56f46 (diff)
IB/hfi1: Prevent NULL pointer deferences in caching code
There is a potential kernel crash when the MMU notifier calls the invalidation routines in the hfi1 pinned page caching code for sdma. The invalidation routine could call the remove callback for the node, which in turn ends up dereferencing the current task_struct to get a pointer to the mm_struct. However, the mm_struct pointer could be NULL resulting in the following backtrace: BUG: unable to handle kernel NULL pointer dereference at 00000000000000a8 IP: [<ffffffffa041f75a>] sdma_rb_remove+0xaa/0x100 [hfi1] 15 task: ffff88085e66e080 ti: ffff88085c244000 task.ti: ffff88085c244000 RIP: 0010:[<ffffffffa041f75a>] [<ffffffffa041f75a>] sdma_rb_remove+0xaa/0x100 [hfi1] RSP: 0000:ffff88085c245878 EFLAGS: 00010002 RAX: 0000000000000000 RBX: ffff88105b9bbd40 RCX: ffffea003931a830 RDX: 0000000000000004 RSI: ffff88105754a9c0 RDI: ffff88105754a9c0 RBP: ffff88085c245890 R08: ffff88105b9bbd70 R09: 00000000fffffffb R10: ffff88105b9bbd58 R11: 0000000000000013 R12: ffff88105754a9c0 R13: 0000000000000001 R14: 0000000000000001 R15: ffff88105b9bbd40 FS: 0000000000000000(0000) GS:ffff88107ef40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000000a8 CR3: 0000000001a0b000 CR4: 00000000001407e0 Stack: ffff88105b9bbd40 ffff88080ec481a8 ffff88080ec481b8 ffff88085c2458c0 ffffffffa03fa00e ffff88080ec48190 ffff88080ed9cd00 0000000001024000 0000000000000000 ffff88085c245920 ffffffffa03fa0e7 0000000000000282 Call Trace: [<ffffffffa03fa00e>] __mmu_rb_remove.isra.5+0x5e/0x70 [hfi1] [<ffffffffa03fa0e7>] mmu_notifier_mem_invalidate+0xc7/0xf0 [hfi1] [<ffffffffa03fa143>] mmu_notifier_page+0x13/0x20 [hfi1] [<ffffffff81156dd0>] __mmu_notifier_invalidate_page+0x50/0x70 [<ffffffff81140bbb>] try_to_unmap_one+0x20b/0x470 [<ffffffff81141ee7>] try_to_unmap_anon+0xa7/0x120 [<ffffffff81141fad>] try_to_unmap+0x4d/0x60 [<ffffffff8111fd7b>] shrink_page_list+0x2eb/0x9d0 [<ffffffff81120ab3>] shrink_inactive_list+0x243/0x490 [<ffffffff81121491>] shrink_lruvec+0x4c1/0x640 [<ffffffff81121641>] shrink_zone+0x31/0x100 [<ffffffff81121b0f>] kswapd_shrink_zone.constprop.62+0xef/0x1c0 [<ffffffff811229e3>] kswapd+0x403/0x7e0 [<ffffffff811225e0>] ? shrink_all_memory+0xf0/0xf0 [<ffffffff81068ac0>] kthread+0xc0/0xd0 [<ffffffff81068a00>] ? insert_kthread_work+0x40/0x40 [<ffffffff814ff8ec>] ret_from_fork+0x7c/0xb0 [<ffffffff81068a00>] ? insert_kthread_work+0x40/0x40 To correct this, the mm_struct passed to us by the MMU notifier is used (which is what should have been done to begin with). This avoids the broken derefences and ensures that the correct mm_struct is used. Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Reviewed-by: Dean Luick <dean.luick@intel.com> Signed-off-by: Mitko Haralanov <mitko.haralanov@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
-rw-r--r--drivers/staging/rdma/hfi1/mmu_rb.c24
-rw-r--r--drivers/staging/rdma/hfi1/mmu_rb.h3
-rw-r--r--drivers/staging/rdma/hfi1/user_exp_rcv.c9
-rw-r--r--drivers/staging/rdma/hfi1/user_sdma.c24
4 files changed, 37 insertions, 23 deletions
diff --git a/drivers/staging/rdma/hfi1/mmu_rb.c b/drivers/staging/rdma/hfi1/mmu_rb.c
index c7ad0164ea9a..eac4d041d351 100644
--- a/drivers/staging/rdma/hfi1/mmu_rb.c
+++ b/drivers/staging/rdma/hfi1/mmu_rb.c
@@ -71,6 +71,7 @@ static inline void mmu_notifier_range_start(struct mmu_notifier *,
71 struct mm_struct *, 71 struct mm_struct *,
72 unsigned long, unsigned long); 72 unsigned long, unsigned long);
73static void mmu_notifier_mem_invalidate(struct mmu_notifier *, 73static void mmu_notifier_mem_invalidate(struct mmu_notifier *,
74 struct mm_struct *,
74 unsigned long, unsigned long); 75 unsigned long, unsigned long);
75static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *, 76static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *,
76 unsigned long, unsigned long); 77 unsigned long, unsigned long);
@@ -137,7 +138,7 @@ void hfi1_mmu_rb_unregister(struct rb_root *root)
137 rbnode = rb_entry(node, struct mmu_rb_node, node); 138 rbnode = rb_entry(node, struct mmu_rb_node, node);
138 rb_erase(node, root); 139 rb_erase(node, root);
139 if (handler->ops->remove) 140 if (handler->ops->remove)
140 handler->ops->remove(root, rbnode, false); 141 handler->ops->remove(root, rbnode, NULL);
141 } 142 }
142 } 143 }
143 144
@@ -201,14 +202,14 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
201} 202}
202 203
203static void __mmu_rb_remove(struct mmu_rb_handler *handler, 204static void __mmu_rb_remove(struct mmu_rb_handler *handler,
204 struct mmu_rb_node *node, bool arg) 205 struct mmu_rb_node *node, struct mm_struct *mm)
205{ 206{
206 /* Validity of handler and node pointers has been checked by caller. */ 207 /* Validity of handler and node pointers has been checked by caller. */
207 hfi1_cdbg(MMU, "Removing node addr 0x%llx, len %u", node->addr, 208 hfi1_cdbg(MMU, "Removing node addr 0x%llx, len %u", node->addr,
208 node->len); 209 node->len);
209 __mmu_int_rb_remove(node, handler->root); 210 __mmu_int_rb_remove(node, handler->root);
210 if (handler->ops->remove) 211 if (handler->ops->remove)
211 handler->ops->remove(handler->root, node, arg); 212 handler->ops->remove(handler->root, node, mm);
212} 213}
213 214
214struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr, 215struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr,
@@ -237,7 +238,7 @@ void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node)
237 return; 238 return;
238 239
239 spin_lock_irqsave(&handler->lock, flags); 240 spin_lock_irqsave(&handler->lock, flags);
240 __mmu_rb_remove(handler, node, false); 241 __mmu_rb_remove(handler, node, NULL);
241 spin_unlock_irqrestore(&handler->lock, flags); 242 spin_unlock_irqrestore(&handler->lock, flags);
242} 243}
243 244
@@ -260,7 +261,7 @@ unlock:
260static inline void mmu_notifier_page(struct mmu_notifier *mn, 261static inline void mmu_notifier_page(struct mmu_notifier *mn,
261 struct mm_struct *mm, unsigned long addr) 262 struct mm_struct *mm, unsigned long addr)
262{ 263{
263 mmu_notifier_mem_invalidate(mn, addr, addr + PAGE_SIZE); 264 mmu_notifier_mem_invalidate(mn, mm, addr, addr + PAGE_SIZE);
264} 265}
265 266
266static inline void mmu_notifier_range_start(struct mmu_notifier *mn, 267static inline void mmu_notifier_range_start(struct mmu_notifier *mn,
@@ -268,25 +269,28 @@ static inline void mmu_notifier_range_start(struct mmu_notifier *mn,
268 unsigned long start, 269 unsigned long start,
269 unsigned long end) 270 unsigned long end)
270{ 271{
271 mmu_notifier_mem_invalidate(mn, start, end); 272 mmu_notifier_mem_invalidate(mn, mm, start, end);
272} 273}
273 274
274static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn, 275static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn,
276 struct mm_struct *mm,
275 unsigned long start, unsigned long end) 277 unsigned long start, unsigned long end)
276{ 278{
277 struct mmu_rb_handler *handler = 279 struct mmu_rb_handler *handler =
278 container_of(mn, struct mmu_rb_handler, mn); 280 container_of(mn, struct mmu_rb_handler, mn);
279 struct rb_root *root = handler->root; 281 struct rb_root *root = handler->root;
280 struct mmu_rb_node *node; 282 struct mmu_rb_node *node, *ptr = NULL;
281 unsigned long flags; 283 unsigned long flags;
282 284
283 spin_lock_irqsave(&handler->lock, flags); 285 spin_lock_irqsave(&handler->lock, flags);
284 for (node = __mmu_int_rb_iter_first(root, start, end - 1); node; 286 for (node = __mmu_int_rb_iter_first(root, start, end - 1);
285 node = __mmu_int_rb_iter_next(node, start, end - 1)) { 287 node; node = ptr) {
288 /* Guard against node removal. */
289 ptr = __mmu_int_rb_iter_next(node, start, end - 1);
286 hfi1_cdbg(MMU, "Invalidating node addr 0x%llx, len %u", 290 hfi1_cdbg(MMU, "Invalidating node addr 0x%llx, len %u",
287 node->addr, node->len); 291 node->addr, node->len);
288 if (handler->ops->invalidate(root, node)) 292 if (handler->ops->invalidate(root, node))
289 __mmu_rb_remove(handler, node, true); 293 __mmu_rb_remove(handler, node, mm);
290 } 294 }
291 spin_unlock_irqrestore(&handler->lock, flags); 295 spin_unlock_irqrestore(&handler->lock, flags);
292} 296}
diff --git a/drivers/staging/rdma/hfi1/mmu_rb.h b/drivers/staging/rdma/hfi1/mmu_rb.h
index f8523fdb8a18..19a306e83c7d 100644
--- a/drivers/staging/rdma/hfi1/mmu_rb.h
+++ b/drivers/staging/rdma/hfi1/mmu_rb.h
@@ -59,7 +59,8 @@ struct mmu_rb_node {
59struct mmu_rb_ops { 59struct mmu_rb_ops {
60 bool (*filter)(struct mmu_rb_node *, unsigned long, unsigned long); 60 bool (*filter)(struct mmu_rb_node *, unsigned long, unsigned long);
61 int (*insert)(struct rb_root *, struct mmu_rb_node *); 61 int (*insert)(struct rb_root *, struct mmu_rb_node *);
62 void (*remove)(struct rb_root *, struct mmu_rb_node *, bool); 62 void (*remove)(struct rb_root *, struct mmu_rb_node *,
63 struct mm_struct *);
63 int (*invalidate)(struct rb_root *, struct mmu_rb_node *); 64 int (*invalidate)(struct rb_root *, struct mmu_rb_node *);
64}; 65};
65 66
diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.c b/drivers/staging/rdma/hfi1/user_exp_rcv.c
index 0861e095df8d..5b72849bbd71 100644
--- a/drivers/staging/rdma/hfi1/user_exp_rcv.c
+++ b/drivers/staging/rdma/hfi1/user_exp_rcv.c
@@ -87,7 +87,8 @@ static u32 find_phys_blocks(struct page **, unsigned, struct tid_pageset *);
87static int set_rcvarray_entry(struct file *, unsigned long, u32, 87static int set_rcvarray_entry(struct file *, unsigned long, u32,
88 struct tid_group *, struct page **, unsigned); 88 struct tid_group *, struct page **, unsigned);
89static int mmu_rb_insert(struct rb_root *, struct mmu_rb_node *); 89static int mmu_rb_insert(struct rb_root *, struct mmu_rb_node *);
90static void mmu_rb_remove(struct rb_root *, struct mmu_rb_node *, bool); 90static void mmu_rb_remove(struct rb_root *, struct mmu_rb_node *,
91 struct mm_struct *);
91static int mmu_rb_invalidate(struct rb_root *, struct mmu_rb_node *); 92static int mmu_rb_invalidate(struct rb_root *, struct mmu_rb_node *);
92static int program_rcvarray(struct file *, unsigned long, struct tid_group *, 93static int program_rcvarray(struct file *, unsigned long, struct tid_group *,
93 struct tid_pageset *, unsigned, u16, struct page **, 94 struct tid_pageset *, unsigned, u16, struct page **,
@@ -899,7 +900,7 @@ static int unprogram_rcvarray(struct file *fp, u32 tidinfo,
899 if (!node || node->rcventry != (uctxt->expected_base + rcventry)) 900 if (!node || node->rcventry != (uctxt->expected_base + rcventry))
900 return -EBADF; 901 return -EBADF;
901 if (HFI1_CAP_IS_USET(TID_UNMAP)) 902 if (HFI1_CAP_IS_USET(TID_UNMAP))
902 mmu_rb_remove(&fd->tid_rb_root, &node->mmu, false); 903 mmu_rb_remove(&fd->tid_rb_root, &node->mmu, NULL);
903 else 904 else
904 hfi1_mmu_rb_remove(&fd->tid_rb_root, &node->mmu); 905 hfi1_mmu_rb_remove(&fd->tid_rb_root, &node->mmu);
905 906
@@ -965,7 +966,7 @@ static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt,
965 continue; 966 continue;
966 if (HFI1_CAP_IS_USET(TID_UNMAP)) 967 if (HFI1_CAP_IS_USET(TID_UNMAP))
967 mmu_rb_remove(&fd->tid_rb_root, 968 mmu_rb_remove(&fd->tid_rb_root,
968 &node->mmu, false); 969 &node->mmu, NULL);
969 else 970 else
970 hfi1_mmu_rb_remove(&fd->tid_rb_root, 971 hfi1_mmu_rb_remove(&fd->tid_rb_root,
971 &node->mmu); 972 &node->mmu);
@@ -1032,7 +1033,7 @@ static int mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *node)
1032} 1033}
1033 1034
1034static void mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node, 1035static void mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node,
1035 bool notifier) 1036 struct mm_struct *mm)
1036{ 1037{
1037 struct hfi1_filedata *fdata = 1038 struct hfi1_filedata *fdata =
1038 container_of(root, struct hfi1_filedata, tid_rb_root); 1039 container_of(root, struct hfi1_filedata, tid_rb_root);
diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/staging/rdma/hfi1/user_sdma.c
index ab6b6a42000f..e08c74fe4c6b 100644
--- a/drivers/staging/rdma/hfi1/user_sdma.c
+++ b/drivers/staging/rdma/hfi1/user_sdma.c
@@ -299,7 +299,8 @@ static int defer_packet_queue(
299static void activate_packet_queue(struct iowait *, int); 299static void activate_packet_queue(struct iowait *, int);
300static bool sdma_rb_filter(struct mmu_rb_node *, unsigned long, unsigned long); 300static bool sdma_rb_filter(struct mmu_rb_node *, unsigned long, unsigned long);
301static int sdma_rb_insert(struct rb_root *, struct mmu_rb_node *); 301static int sdma_rb_insert(struct rb_root *, struct mmu_rb_node *);
302static void sdma_rb_remove(struct rb_root *, struct mmu_rb_node *, bool); 302static void sdma_rb_remove(struct rb_root *, struct mmu_rb_node *,
303 struct mm_struct *);
303static int sdma_rb_invalidate(struct rb_root *, struct mmu_rb_node *); 304static int sdma_rb_invalidate(struct rb_root *, struct mmu_rb_node *);
304 305
305static struct mmu_rb_ops sdma_rb_ops = { 306static struct mmu_rb_ops sdma_rb_ops = {
@@ -1063,8 +1064,10 @@ static int pin_vector_pages(struct user_sdma_request *req,
1063 rb_node = hfi1_mmu_rb_search(&pq->sdma_rb_root, 1064 rb_node = hfi1_mmu_rb_search(&pq->sdma_rb_root,
1064 (unsigned long)iovec->iov.iov_base, 1065 (unsigned long)iovec->iov.iov_base,
1065 iovec->iov.iov_len); 1066 iovec->iov.iov_len);
1066 if (rb_node) 1067 if (rb_node && !IS_ERR(rb_node))
1067 node = container_of(rb_node, struct sdma_mmu_node, rb); 1068 node = container_of(rb_node, struct sdma_mmu_node, rb);
1069 else
1070 rb_node = NULL;
1068 1071
1069 if (!node) { 1072 if (!node) {
1070 node = kzalloc(sizeof(*node), GFP_KERNEL); 1073 node = kzalloc(sizeof(*node), GFP_KERNEL);
@@ -1502,7 +1505,7 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
1502 &req->pq->sdma_rb_root, 1505 &req->pq->sdma_rb_root,
1503 (unsigned long)req->iovs[i].iov.iov_base, 1506 (unsigned long)req->iovs[i].iov.iov_base,
1504 req->iovs[i].iov.iov_len); 1507 req->iovs[i].iov.iov_len);
1505 if (!mnode) 1508 if (!mnode || IS_ERR(mnode))
1506 continue; 1509 continue;
1507 1510
1508 node = container_of(mnode, struct sdma_mmu_node, rb); 1511 node = container_of(mnode, struct sdma_mmu_node, rb);
@@ -1547,7 +1550,7 @@ static int sdma_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode)
1547} 1550}
1548 1551
1549static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode, 1552static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode,
1550 bool notifier) 1553 struct mm_struct *mm)
1551{ 1554{
1552 struct sdma_mmu_node *node = 1555 struct sdma_mmu_node *node =
1553 container_of(mnode, struct sdma_mmu_node, rb); 1556 container_of(mnode, struct sdma_mmu_node, rb);
@@ -1557,14 +1560,19 @@ static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode,
1557 node->pq->n_locked -= node->npages; 1560 node->pq->n_locked -= node->npages;
1558 spin_unlock(&node->pq->evict_lock); 1561 spin_unlock(&node->pq->evict_lock);
1559 1562
1560 unpin_vector_pages(notifier ? NULL : current->mm, node->pages, 1563 /*
1561 node->npages); 1564 * If mm is set, we are being called by the MMU notifier and we
1565 * should not pass a mm_struct to unpin_vector_page(). This is to
1566 * prevent a deadlock when hfi1_release_user_pages() attempts to
1567 * take the mmap_sem, which the MMU notifier has already taken.
1568 */
1569 unpin_vector_pages(mm ? NULL : current->mm, node->pages, node->npages);
1562 /* 1570 /*
1563 * If called by the MMU notifier, we have to adjust the pinned 1571 * If called by the MMU notifier, we have to adjust the pinned
1564 * page count ourselves. 1572 * page count ourselves.
1565 */ 1573 */
1566 if (notifier) 1574 if (mm)
1567 current->mm->pinned_vm -= node->npages; 1575 mm->pinned_vm -= node->npages;
1568 kfree(node); 1576 kfree(node);
1569} 1577}
1570 1578