diff options
author | Hugh Dickins <hughd@google.com> | 2016-07-26 18:26:15 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-26 19:19:19 -0400 |
commit | c01d5b300774d130a24d787825b01eb24e6e20cb (patch) | |
tree | e56b92aa5cf8f25ab812da30db5b36a6126e877b | |
parent | 5a6e75f8110c97e2a5488894d4e922187e6cb343 (diff) |
shmem: get_unmapped_area align huge page
Provide a shmem_get_unmapped_area method in file_operations, called at
mmap time to decide the mapping address. It could be conditional on
CONFIG_TRANSPARENT_HUGEPAGE, but save #ifdefs in other places by making
it unconditional.
shmem_get_unmapped_area() first calls the usual mm->get_unmapped_area
(which we treat as a black box, highly dependent on architecture and
config and executable layout). Lots of conditions, and in most cases it
just goes with the address that chose; but when our huge stars are
rightly aligned, yet that did not provide a suitable address, go back to
ask for a larger arena, within which to align the mapping suitably.
There have to be some direct calls to shmem_get_unmapped_area(), not via
the file_operations: because of the way shmem_zero_setup() is called to
create a shmem object late in the mmap sequence, when MAP_SHARED is
requested with MAP_ANONYMOUS or /dev/zero. Though this only matters
when /proc/sys/vm/shmem_huge has been set.
Link: http://lkml.kernel.org/r/1466021202-61880-29-git-send-email-kirill.shutemov@linux.intel.com
Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | drivers/char/mem.c | 24 | ||||
-rw-r--r-- | include/linux/shmem_fs.h | 2 | ||||
-rw-r--r-- | ipc/shm.c | 6 | ||||
-rw-r--r-- | mm/mmap.c | 16 | ||||
-rw-r--r-- | mm/shmem.c | 98 |
5 files changed, 142 insertions, 4 deletions
diff --git a/drivers/char/mem.c b/drivers/char/mem.c index d633974e7f8b..a33163dbb913 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/device.h> | 22 | #include <linux/device.h> |
23 | #include <linux/highmem.h> | 23 | #include <linux/highmem.h> |
24 | #include <linux/backing-dev.h> | 24 | #include <linux/backing-dev.h> |
25 | #include <linux/shmem_fs.h> | ||
25 | #include <linux/splice.h> | 26 | #include <linux/splice.h> |
26 | #include <linux/pfn.h> | 27 | #include <linux/pfn.h> |
27 | #include <linux/export.h> | 28 | #include <linux/export.h> |
@@ -657,6 +658,28 @@ static int mmap_zero(struct file *file, struct vm_area_struct *vma) | |||
657 | return 0; | 658 | return 0; |
658 | } | 659 | } |
659 | 660 | ||
661 | static unsigned long get_unmapped_area_zero(struct file *file, | ||
662 | unsigned long addr, unsigned long len, | ||
663 | unsigned long pgoff, unsigned long flags) | ||
664 | { | ||
665 | #ifdef CONFIG_MMU | ||
666 | if (flags & MAP_SHARED) { | ||
667 | /* | ||
668 | * mmap_zero() will call shmem_zero_setup() to create a file, | ||
669 | * so use shmem's get_unmapped_area in case it can be huge; | ||
670 | * and pass NULL for file as in mmap.c's get_unmapped_area(), | ||
671 | * so as not to confuse shmem with our handle on "/dev/zero". | ||
672 | */ | ||
673 | return shmem_get_unmapped_area(NULL, addr, len, pgoff, flags); | ||
674 | } | ||
675 | |||
676 | /* Otherwise flags & MAP_PRIVATE: with no shmem object beneath it */ | ||
677 | return current->mm->get_unmapped_area(file, addr, len, pgoff, flags); | ||
678 | #else | ||
679 | return -ENOSYS; | ||
680 | #endif | ||
681 | } | ||
682 | |||
660 | static ssize_t write_full(struct file *file, const char __user *buf, | 683 | static ssize_t write_full(struct file *file, const char __user *buf, |
661 | size_t count, loff_t *ppos) | 684 | size_t count, loff_t *ppos) |
662 | { | 685 | { |
@@ -764,6 +787,7 @@ static const struct file_operations zero_fops = { | |||
764 | .read_iter = read_iter_zero, | 787 | .read_iter = read_iter_zero, |
765 | .write_iter = write_iter_zero, | 788 | .write_iter = write_iter_zero, |
766 | .mmap = mmap_zero, | 789 | .mmap = mmap_zero, |
790 | .get_unmapped_area = get_unmapped_area_zero, | ||
767 | #ifndef CONFIG_MMU | 791 | #ifndef CONFIG_MMU |
768 | .mmap_capabilities = zero_mmap_capabilities, | 792 | .mmap_capabilities = zero_mmap_capabilities, |
769 | #endif | 793 | #endif |
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 466f18c73a49..ff2de4bab61f 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h | |||
@@ -50,6 +50,8 @@ extern struct file *shmem_file_setup(const char *name, | |||
50 | extern struct file *shmem_kernel_file_setup(const char *name, loff_t size, | 50 | extern struct file *shmem_kernel_file_setup(const char *name, loff_t size, |
51 | unsigned long flags); | 51 | unsigned long flags); |
52 | extern int shmem_zero_setup(struct vm_area_struct *); | 52 | extern int shmem_zero_setup(struct vm_area_struct *); |
53 | extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr, | ||
54 | unsigned long len, unsigned long pgoff, unsigned long flags); | ||
53 | extern int shmem_lock(struct file *file, int lock, struct user_struct *user); | 55 | extern int shmem_lock(struct file *file, int lock, struct user_struct *user); |
54 | extern bool shmem_mapping(struct address_space *mapping); | 56 | extern bool shmem_mapping(struct address_space *mapping); |
55 | extern void shmem_unlock_mapping(struct address_space *mapping); | 57 | extern void shmem_unlock_mapping(struct address_space *mapping); |
@@ -476,13 +476,15 @@ static const struct file_operations shm_file_operations = { | |||
476 | .mmap = shm_mmap, | 476 | .mmap = shm_mmap, |
477 | .fsync = shm_fsync, | 477 | .fsync = shm_fsync, |
478 | .release = shm_release, | 478 | .release = shm_release, |
479 | #ifndef CONFIG_MMU | ||
480 | .get_unmapped_area = shm_get_unmapped_area, | 479 | .get_unmapped_area = shm_get_unmapped_area, |
481 | #endif | ||
482 | .llseek = noop_llseek, | 480 | .llseek = noop_llseek, |
483 | .fallocate = shm_fallocate, | 481 | .fallocate = shm_fallocate, |
484 | }; | 482 | }; |
485 | 483 | ||
484 | /* | ||
485 | * shm_file_operations_huge is now identical to shm_file_operations, | ||
486 | * but we keep it distinct for the sake of is_file_shm_hugepages(). | ||
487 | */ | ||
486 | static const struct file_operations shm_file_operations_huge = { | 488 | static const struct file_operations shm_file_operations_huge = { |
487 | .mmap = shm_mmap, | 489 | .mmap = shm_mmap, |
488 | .fsync = shm_fsync, | 490 | .fsync = shm_fsync, |
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/personality.h> | 25 | #include <linux/personality.h> |
26 | #include <linux/security.h> | 26 | #include <linux/security.h> |
27 | #include <linux/hugetlb.h> | 27 | #include <linux/hugetlb.h> |
28 | #include <linux/shmem_fs.h> | ||
28 | #include <linux/profile.h> | 29 | #include <linux/profile.h> |
29 | #include <linux/export.h> | 30 | #include <linux/export.h> |
30 | #include <linux/mount.h> | 31 | #include <linux/mount.h> |
@@ -1897,8 +1898,19 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, | |||
1897 | return -ENOMEM; | 1898 | return -ENOMEM; |
1898 | 1899 | ||
1899 | get_area = current->mm->get_unmapped_area; | 1900 | get_area = current->mm->get_unmapped_area; |
1900 | if (file && file->f_op->get_unmapped_area) | 1901 | if (file) { |
1901 | get_area = file->f_op->get_unmapped_area; | 1902 | if (file->f_op->get_unmapped_area) |
1903 | get_area = file->f_op->get_unmapped_area; | ||
1904 | } else if (flags & MAP_SHARED) { | ||
1905 | /* | ||
1906 | * mmap_region() will call shmem_zero_setup() to create a file, | ||
1907 | * so use shmem's get_unmapped_area in case it can be huge. | ||
1908 | * do_mmap_pgoff() will clear pgoff, so match alignment. | ||
1909 | */ | ||
1910 | pgoff = 0; | ||
1911 | get_area = shmem_get_unmapped_area; | ||
1912 | } | ||
1913 | |||
1902 | addr = get_area(file, addr, len, pgoff, flags); | 1914 | addr = get_area(file, addr, len, pgoff, flags); |
1903 | if (IS_ERR_VALUE(addr)) | 1915 | if (IS_ERR_VALUE(addr)) |
1904 | return addr; | 1916 | return addr; |
diff --git a/mm/shmem.c b/mm/shmem.c index fd374f74d99f..ab02b5bb5553 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -1513,6 +1513,94 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1513 | return ret; | 1513 | return ret; |
1514 | } | 1514 | } |
1515 | 1515 | ||
1516 | unsigned long shmem_get_unmapped_area(struct file *file, | ||
1517 | unsigned long uaddr, unsigned long len, | ||
1518 | unsigned long pgoff, unsigned long flags) | ||
1519 | { | ||
1520 | unsigned long (*get_area)(struct file *, | ||
1521 | unsigned long, unsigned long, unsigned long, unsigned long); | ||
1522 | unsigned long addr; | ||
1523 | unsigned long offset; | ||
1524 | unsigned long inflated_len; | ||
1525 | unsigned long inflated_addr; | ||
1526 | unsigned long inflated_offset; | ||
1527 | |||
1528 | if (len > TASK_SIZE) | ||
1529 | return -ENOMEM; | ||
1530 | |||
1531 | get_area = current->mm->get_unmapped_area; | ||
1532 | addr = get_area(file, uaddr, len, pgoff, flags); | ||
1533 | |||
1534 | if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) | ||
1535 | return addr; | ||
1536 | if (IS_ERR_VALUE(addr)) | ||
1537 | return addr; | ||
1538 | if (addr & ~PAGE_MASK) | ||
1539 | return addr; | ||
1540 | if (addr > TASK_SIZE - len) | ||
1541 | return addr; | ||
1542 | |||
1543 | if (shmem_huge == SHMEM_HUGE_DENY) | ||
1544 | return addr; | ||
1545 | if (len < HPAGE_PMD_SIZE) | ||
1546 | return addr; | ||
1547 | if (flags & MAP_FIXED) | ||
1548 | return addr; | ||
1549 | /* | ||
1550 | * Our priority is to support MAP_SHARED mapped hugely; | ||
1551 | * and support MAP_PRIVATE mapped hugely too, until it is COWed. | ||
1552 | * But if caller specified an address hint, respect that as before. | ||
1553 | */ | ||
1554 | if (uaddr) | ||
1555 | return addr; | ||
1556 | |||
1557 | if (shmem_huge != SHMEM_HUGE_FORCE) { | ||
1558 | struct super_block *sb; | ||
1559 | |||
1560 | if (file) { | ||
1561 | VM_BUG_ON(file->f_op != &shmem_file_operations); | ||
1562 | sb = file_inode(file)->i_sb; | ||
1563 | } else { | ||
1564 | /* | ||
1565 | * Called directly from mm/mmap.c, or drivers/char/mem.c | ||
1566 | * for "/dev/zero", to create a shared anonymous object. | ||
1567 | */ | ||
1568 | if (IS_ERR(shm_mnt)) | ||
1569 | return addr; | ||
1570 | sb = shm_mnt->mnt_sb; | ||
1571 | } | ||
1572 | if (SHMEM_SB(sb)->huge != SHMEM_HUGE_NEVER) | ||
1573 | return addr; | ||
1574 | } | ||
1575 | |||
1576 | offset = (pgoff << PAGE_SHIFT) & (HPAGE_PMD_SIZE-1); | ||
1577 | if (offset && offset + len < 2 * HPAGE_PMD_SIZE) | ||
1578 | return addr; | ||
1579 | if ((addr & (HPAGE_PMD_SIZE-1)) == offset) | ||
1580 | return addr; | ||
1581 | |||
1582 | inflated_len = len + HPAGE_PMD_SIZE - PAGE_SIZE; | ||
1583 | if (inflated_len > TASK_SIZE) | ||
1584 | return addr; | ||
1585 | if (inflated_len < len) | ||
1586 | return addr; | ||
1587 | |||
1588 | inflated_addr = get_area(NULL, 0, inflated_len, 0, flags); | ||
1589 | if (IS_ERR_VALUE(inflated_addr)) | ||
1590 | return addr; | ||
1591 | if (inflated_addr & ~PAGE_MASK) | ||
1592 | return addr; | ||
1593 | |||
1594 | inflated_offset = inflated_addr & (HPAGE_PMD_SIZE-1); | ||
1595 | inflated_addr += offset - inflated_offset; | ||
1596 | if (inflated_offset > offset) | ||
1597 | inflated_addr += HPAGE_PMD_SIZE; | ||
1598 | |||
1599 | if (inflated_addr > TASK_SIZE - len) | ||
1600 | return addr; | ||
1601 | return inflated_addr; | ||
1602 | } | ||
1603 | |||
1516 | #ifdef CONFIG_NUMA | 1604 | #ifdef CONFIG_NUMA |
1517 | static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol) | 1605 | static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol) |
1518 | { | 1606 | { |
@@ -3261,6 +3349,7 @@ static const struct address_space_operations shmem_aops = { | |||
3261 | 3349 | ||
3262 | static const struct file_operations shmem_file_operations = { | 3350 | static const struct file_operations shmem_file_operations = { |
3263 | .mmap = shmem_mmap, | 3351 | .mmap = shmem_mmap, |
3352 | .get_unmapped_area = shmem_get_unmapped_area, | ||
3264 | #ifdef CONFIG_TMPFS | 3353 | #ifdef CONFIG_TMPFS |
3265 | .llseek = shmem_file_llseek, | 3354 | .llseek = shmem_file_llseek, |
3266 | .read_iter = shmem_file_read_iter, | 3355 | .read_iter = shmem_file_read_iter, |
@@ -3496,6 +3585,15 @@ void shmem_unlock_mapping(struct address_space *mapping) | |||
3496 | { | 3585 | { |
3497 | } | 3586 | } |
3498 | 3587 | ||
3588 | #ifdef CONFIG_MMU | ||
3589 | unsigned long shmem_get_unmapped_area(struct file *file, | ||
3590 | unsigned long addr, unsigned long len, | ||
3591 | unsigned long pgoff, unsigned long flags) | ||
3592 | { | ||
3593 | return current->mm->get_unmapped_area(file, addr, len, pgoff, flags); | ||
3594 | } | ||
3595 | #endif | ||
3596 | |||
3499 | void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) | 3597 | void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) |
3500 | { | 3598 | { |
3501 | truncate_inode_pages_range(inode->i_mapping, lstart, lend); | 3599 | truncate_inode_pages_range(inode->i_mapping, lstart, lend); |