diff options
Diffstat (limited to 'drivers/dax')
-rw-r--r-- | drivers/dax/Kconfig | 1 | ||||
-rw-r--r-- | drivers/dax/dax.c | 322 |
2 files changed, 323 insertions, 0 deletions
diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig index 86ffbaa891ad..cedab7572de3 100644 --- a/drivers/dax/Kconfig +++ b/drivers/dax/Kconfig | |||
@@ -1,6 +1,7 @@ | |||
1 | menuconfig DEV_DAX | 1 | menuconfig DEV_DAX |
2 | tristate "DAX: direct access to differentiated memory" | 2 | tristate "DAX: direct access to differentiated memory" |
3 | default m if NVDIMM_DAX | 3 | default m if NVDIMM_DAX |
4 | depends on TRANSPARENT_HUGEPAGE | ||
4 | help | 5 | help |
5 | Support raw access to differentiated (persistence, bandwidth, | 6 | Support raw access to differentiated (persistence, bandwidth, |
6 | latency...) memory via an mmap(2) capable character | 7 | latency...) memory via an mmap(2) capable character |
diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c index 4c22a40f2335..b891a129b275 100644 --- a/drivers/dax/dax.c +++ b/drivers/dax/dax.c | |||
@@ -49,6 +49,7 @@ struct dax_region { | |||
49 | * @region - parent region | 49 | * @region - parent region |
50 | * @dev - device backing the character device | 50 | * @dev - device backing the character device |
51 | * @kref - enable this data to be tracked in filp->private_data | 51 | * @kref - enable this data to be tracked in filp->private_data |
52 | * @alive - !alive + rcu grace period == no new mappings can be established | ||
52 | * @id - child id in the region | 53 | * @id - child id in the region |
53 | * @num_resources - number of physical address extents in this device | 54 | * @num_resources - number of physical address extents in this device |
54 | * @res - array of physical address ranges | 55 | * @res - array of physical address ranges |
@@ -57,6 +58,7 @@ struct dax_dev { | |||
57 | struct dax_region *region; | 58 | struct dax_region *region; |
58 | struct device *dev; | 59 | struct device *dev; |
59 | struct kref kref; | 60 | struct kref kref; |
61 | bool alive; | ||
60 | int id; | 62 | int id; |
61 | int num_resources; | 63 | int num_resources; |
62 | struct resource res[0]; | 64 | struct resource res[0]; |
@@ -150,6 +152,16 @@ static void unregister_dax_dev(void *_dev) | |||
150 | 152 | ||
151 | dev_dbg(dev, "%s\n", __func__); | 153 | dev_dbg(dev, "%s\n", __func__); |
152 | 154 | ||
155 | /* | ||
156 | * Note, rcu is not protecting the liveness of dax_dev, rcu is | ||
157 | * ensuring that any fault handlers that might have seen | ||
158 | * dax_dev->alive == true, have completed. Any fault handlers | ||
159 | * that start after synchronize_rcu() has started will abort | ||
160 | * upon seeing dax_dev->alive == false. | ||
161 | */ | ||
162 | dax_dev->alive = false; | ||
163 | synchronize_rcu(); | ||
164 | |||
153 | get_device(dev); | 165 | get_device(dev); |
154 | device_unregister(dev); | 166 | device_unregister(dev); |
155 | ida_simple_remove(&dax_region->ida, dax_dev->id); | 167 | ida_simple_remove(&dax_region->ida, dax_dev->id); |
@@ -173,6 +185,7 @@ int devm_create_dax_dev(struct dax_region *dax_region, struct resource *res, | |||
173 | memcpy(dax_dev->res, res, sizeof(*res) * count); | 185 | memcpy(dax_dev->res, res, sizeof(*res) * count); |
174 | dax_dev->num_resources = count; | 186 | dax_dev->num_resources = count; |
175 | kref_init(&dax_dev->kref); | 187 | kref_init(&dax_dev->kref); |
188 | dax_dev->alive = true; | ||
176 | dax_dev->region = dax_region; | 189 | dax_dev->region = dax_region; |
177 | kref_get(&dax_region->kref); | 190 | kref_get(&dax_region->kref); |
178 | 191 | ||
@@ -217,9 +230,318 @@ int devm_create_dax_dev(struct dax_region *dax_region, struct resource *res, | |||
217 | } | 230 | } |
218 | EXPORT_SYMBOL_GPL(devm_create_dax_dev); | 231 | EXPORT_SYMBOL_GPL(devm_create_dax_dev); |
219 | 232 | ||
233 | /* return an unmapped area aligned to the dax region specified alignment */ | ||
234 | static unsigned long dax_dev_get_unmapped_area(struct file *filp, | ||
235 | unsigned long addr, unsigned long len, unsigned long pgoff, | ||
236 | unsigned long flags) | ||
237 | { | ||
238 | unsigned long off, off_end, off_align, len_align, addr_align, align; | ||
239 | struct dax_dev *dax_dev = filp ? filp->private_data : NULL; | ||
240 | struct dax_region *dax_region; | ||
241 | |||
242 | if (!dax_dev || addr) | ||
243 | goto out; | ||
244 | |||
245 | dax_region = dax_dev->region; | ||
246 | align = dax_region->align; | ||
247 | off = pgoff << PAGE_SHIFT; | ||
248 | off_end = off + len; | ||
249 | off_align = round_up(off, align); | ||
250 | |||
251 | if ((off_end <= off_align) || ((off_end - off_align) < align)) | ||
252 | goto out; | ||
253 | |||
254 | len_align = len + align; | ||
255 | if ((off + len_align) < off) | ||
256 | goto out; | ||
257 | |||
258 | addr_align = current->mm->get_unmapped_area(filp, addr, len_align, | ||
259 | pgoff, flags); | ||
260 | if (!IS_ERR_VALUE(addr_align)) { | ||
261 | addr_align += (off - addr_align) & (align - 1); | ||
262 | return addr_align; | ||
263 | } | ||
264 | out: | ||
265 | return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags); | ||
266 | } | ||
267 | |||
268 | static int __match_devt(struct device *dev, const void *data) | ||
269 | { | ||
270 | const dev_t *devt = data; | ||
271 | |||
272 | return dev->devt == *devt; | ||
273 | } | ||
274 | |||
275 | static struct device *dax_dev_find(dev_t dev_t) | ||
276 | { | ||
277 | return class_find_device(dax_class, NULL, &dev_t, __match_devt); | ||
278 | } | ||
279 | |||
280 | static int dax_dev_open(struct inode *inode, struct file *filp) | ||
281 | { | ||
282 | struct dax_dev *dax_dev = NULL; | ||
283 | struct device *dev; | ||
284 | |||
285 | dev = dax_dev_find(inode->i_rdev); | ||
286 | if (!dev) | ||
287 | return -ENXIO; | ||
288 | |||
289 | device_lock(dev); | ||
290 | dax_dev = dev_get_drvdata(dev); | ||
291 | if (dax_dev) { | ||
292 | dev_dbg(dev, "%s\n", __func__); | ||
293 | filp->private_data = dax_dev; | ||
294 | kref_get(&dax_dev->kref); | ||
295 | inode->i_flags = S_DAX; | ||
296 | } | ||
297 | device_unlock(dev); | ||
298 | |||
299 | if (!dax_dev) { | ||
300 | put_device(dev); | ||
301 | return -ENXIO; | ||
302 | } | ||
303 | return 0; | ||
304 | } | ||
305 | |||
306 | static int dax_dev_release(struct inode *inode, struct file *filp) | ||
307 | { | ||
308 | struct dax_dev *dax_dev = filp->private_data; | ||
309 | struct device *dev = dax_dev->dev; | ||
310 | |||
311 | dev_dbg(dax_dev->dev, "%s\n", __func__); | ||
312 | dax_dev_put(dax_dev); | ||
313 | put_device(dev); | ||
314 | |||
315 | return 0; | ||
316 | } | ||
317 | |||
318 | static int check_vma(struct dax_dev *dax_dev, struct vm_area_struct *vma, | ||
319 | const char *func) | ||
320 | { | ||
321 | struct dax_region *dax_region = dax_dev->region; | ||
322 | struct device *dev = dax_dev->dev; | ||
323 | unsigned long mask; | ||
324 | |||
325 | if (!dax_dev->alive) | ||
326 | return -ENXIO; | ||
327 | |||
328 | /* prevent private / writable mappings from being established */ | ||
329 | if ((vma->vm_flags & (VM_NORESERVE|VM_SHARED|VM_WRITE)) == VM_WRITE) { | ||
330 | dev_info(dev, "%s: %s: fail, attempted private mapping\n", | ||
331 | current->comm, func); | ||
332 | return -EINVAL; | ||
333 | } | ||
334 | |||
335 | mask = dax_region->align - 1; | ||
336 | if (vma->vm_start & mask || vma->vm_end & mask) { | ||
337 | dev_info(dev, "%s: %s: fail, unaligned vma (%#lx - %#lx, %#lx)\n", | ||
338 | current->comm, func, vma->vm_start, vma->vm_end, | ||
339 | mask); | ||
340 | return -EINVAL; | ||
341 | } | ||
342 | |||
343 | if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) == PFN_DEV | ||
344 | && (vma->vm_flags & VM_DONTCOPY) == 0) { | ||
345 | dev_info(dev, "%s: %s: fail, dax range requires MADV_DONTFORK\n", | ||
346 | current->comm, func); | ||
347 | return -EINVAL; | ||
348 | } | ||
349 | |||
350 | if (!vma_is_dax(vma)) { | ||
351 | dev_info(dev, "%s: %s: fail, vma is not DAX capable\n", | ||
352 | current->comm, func); | ||
353 | return -EINVAL; | ||
354 | } | ||
355 | |||
356 | return 0; | ||
357 | } | ||
358 | |||
359 | static phys_addr_t pgoff_to_phys(struct dax_dev *dax_dev, pgoff_t pgoff, | ||
360 | unsigned long size) | ||
361 | { | ||
362 | struct resource *res; | ||
363 | phys_addr_t phys; | ||
364 | int i; | ||
365 | |||
366 | for (i = 0; i < dax_dev->num_resources; i++) { | ||
367 | res = &dax_dev->res[i]; | ||
368 | phys = pgoff * PAGE_SIZE + res->start; | ||
369 | if (phys >= res->start && phys <= res->end) | ||
370 | break; | ||
371 | pgoff -= PHYS_PFN(resource_size(res)); | ||
372 | } | ||
373 | |||
374 | if (i < dax_dev->num_resources) { | ||
375 | res = &dax_dev->res[i]; | ||
376 | if (phys + size - 1 <= res->end) | ||
377 | return phys; | ||
378 | } | ||
379 | |||
380 | return -1; | ||
381 | } | ||
382 | |||
383 | static int __dax_dev_fault(struct dax_dev *dax_dev, struct vm_area_struct *vma, | ||
384 | struct vm_fault *vmf) | ||
385 | { | ||
386 | unsigned long vaddr = (unsigned long) vmf->virtual_address; | ||
387 | struct device *dev = dax_dev->dev; | ||
388 | struct dax_region *dax_region; | ||
389 | int rc = VM_FAULT_SIGBUS; | ||
390 | phys_addr_t phys; | ||
391 | pfn_t pfn; | ||
392 | |||
393 | if (check_vma(dax_dev, vma, __func__)) | ||
394 | return VM_FAULT_SIGBUS; | ||
395 | |||
396 | dax_region = dax_dev->region; | ||
397 | if (dax_region->align > PAGE_SIZE) { | ||
398 | dev_dbg(dev, "%s: alignment > fault size\n", __func__); | ||
399 | return VM_FAULT_SIGBUS; | ||
400 | } | ||
401 | |||
402 | phys = pgoff_to_phys(dax_dev, vmf->pgoff, PAGE_SIZE); | ||
403 | if (phys == -1) { | ||
404 | dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__, | ||
405 | vmf->pgoff); | ||
406 | return VM_FAULT_SIGBUS; | ||
407 | } | ||
408 | |||
409 | pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); | ||
410 | |||
411 | rc = vm_insert_mixed(vma, vaddr, pfn); | ||
412 | |||
413 | if (rc == -ENOMEM) | ||
414 | return VM_FAULT_OOM; | ||
415 | if (rc < 0 && rc != -EBUSY) | ||
416 | return VM_FAULT_SIGBUS; | ||
417 | |||
418 | return VM_FAULT_NOPAGE; | ||
419 | } | ||
420 | |||
421 | static int dax_dev_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
422 | { | ||
423 | int rc; | ||
424 | struct file *filp = vma->vm_file; | ||
425 | struct dax_dev *dax_dev = filp->private_data; | ||
426 | |||
427 | dev_dbg(dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__, | ||
428 | current->comm, (vmf->flags & FAULT_FLAG_WRITE) | ||
429 | ? "write" : "read", vma->vm_start, vma->vm_end); | ||
430 | rcu_read_lock(); | ||
431 | rc = __dax_dev_fault(dax_dev, vma, vmf); | ||
432 | rcu_read_unlock(); | ||
433 | |||
434 | return rc; | ||
435 | } | ||
436 | |||
437 | static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, | ||
438 | struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, | ||
439 | unsigned int flags) | ||
440 | { | ||
441 | unsigned long pmd_addr = addr & PMD_MASK; | ||
442 | struct device *dev = dax_dev->dev; | ||
443 | struct dax_region *dax_region; | ||
444 | phys_addr_t phys; | ||
445 | pgoff_t pgoff; | ||
446 | pfn_t pfn; | ||
447 | |||
448 | if (check_vma(dax_dev, vma, __func__)) | ||
449 | return VM_FAULT_SIGBUS; | ||
450 | |||
451 | dax_region = dax_dev->region; | ||
452 | if (dax_region->align > PMD_SIZE) { | ||
453 | dev_dbg(dev, "%s: alignment > fault size\n", __func__); | ||
454 | return VM_FAULT_SIGBUS; | ||
455 | } | ||
456 | |||
457 | /* dax pmd mappings require pfn_t_devmap() */ | ||
458 | if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) { | ||
459 | dev_dbg(dev, "%s: alignment > fault size\n", __func__); | ||
460 | return VM_FAULT_SIGBUS; | ||
461 | } | ||
462 | |||
463 | pgoff = linear_page_index(vma, pmd_addr); | ||
464 | phys = pgoff_to_phys(dax_dev, pgoff, PAGE_SIZE); | ||
465 | if (phys == -1) { | ||
466 | dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__, | ||
467 | pgoff); | ||
468 | return VM_FAULT_SIGBUS; | ||
469 | } | ||
470 | |||
471 | pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); | ||
472 | |||
473 | return vmf_insert_pfn_pmd(vma, addr, pmd, pfn, | ||
474 | flags & FAULT_FLAG_WRITE); | ||
475 | } | ||
476 | |||
477 | static int dax_dev_pmd_fault(struct vm_area_struct *vma, unsigned long addr, | ||
478 | pmd_t *pmd, unsigned int flags) | ||
479 | { | ||
480 | int rc; | ||
481 | struct file *filp = vma->vm_file; | ||
482 | struct dax_dev *dax_dev = filp->private_data; | ||
483 | |||
484 | dev_dbg(dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__, | ||
485 | current->comm, (flags & FAULT_FLAG_WRITE) | ||
486 | ? "write" : "read", vma->vm_start, vma->vm_end); | ||
487 | |||
488 | rcu_read_lock(); | ||
489 | rc = __dax_dev_pmd_fault(dax_dev, vma, addr, pmd, flags); | ||
490 | rcu_read_unlock(); | ||
491 | |||
492 | return rc; | ||
493 | } | ||
494 | |||
495 | static void dax_dev_vm_open(struct vm_area_struct *vma) | ||
496 | { | ||
497 | struct file *filp = vma->vm_file; | ||
498 | struct dax_dev *dax_dev = filp->private_data; | ||
499 | |||
500 | dev_dbg(dax_dev->dev, "%s\n", __func__); | ||
501 | kref_get(&dax_dev->kref); | ||
502 | } | ||
503 | |||
504 | static void dax_dev_vm_close(struct vm_area_struct *vma) | ||
505 | { | ||
506 | struct file *filp = vma->vm_file; | ||
507 | struct dax_dev *dax_dev = filp->private_data; | ||
508 | |||
509 | dev_dbg(dax_dev->dev, "%s\n", __func__); | ||
510 | dax_dev_put(dax_dev); | ||
511 | } | ||
512 | |||
513 | static const struct vm_operations_struct dax_dev_vm_ops = { | ||
514 | .fault = dax_dev_fault, | ||
515 | .pmd_fault = dax_dev_pmd_fault, | ||
516 | .open = dax_dev_vm_open, | ||
517 | .close = dax_dev_vm_close, | ||
518 | }; | ||
519 | |||
520 | static int dax_dev_mmap(struct file *filp, struct vm_area_struct *vma) | ||
521 | { | ||
522 | struct dax_dev *dax_dev = filp->private_data; | ||
523 | int rc; | ||
524 | |||
525 | dev_dbg(dax_dev->dev, "%s\n", __func__); | ||
526 | |||
527 | rc = check_vma(dax_dev, vma, __func__); | ||
528 | if (rc) | ||
529 | return rc; | ||
530 | |||
531 | kref_get(&dax_dev->kref); | ||
532 | vma->vm_ops = &dax_dev_vm_ops; | ||
533 | vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; | ||
534 | return 0; | ||
535 | |||
536 | } | ||
537 | |||
220 | static const struct file_operations dax_fops = { | 538 | static const struct file_operations dax_fops = { |
221 | .llseek = noop_llseek, | 539 | .llseek = noop_llseek, |
222 | .owner = THIS_MODULE, | 540 | .owner = THIS_MODULE, |
541 | .open = dax_dev_open, | ||
542 | .release = dax_dev_release, | ||
543 | .get_unmapped_area = dax_dev_get_unmapped_area, | ||
544 | .mmap = dax_dev_mmap, | ||
223 | }; | 545 | }; |
224 | 546 | ||
225 | static int __init dax_init(void) | 547 | static int __init dax_init(void) |