summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hughd@google.com>2016-07-26 18:26:15 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-07-26 19:19:19 -0400
commitc01d5b300774d130a24d787825b01eb24e6e20cb (patch)
treee56b92aa5cf8f25ab812da30db5b36a6126e877b
parent5a6e75f8110c97e2a5488894d4e922187e6cb343 (diff)
shmem: get_unmapped_area align huge page
Provide a shmem_get_unmapped_area method in file_operations, called at mmap time to decide the mapping address. It could be conditional on CONFIG_TRANSPARENT_HUGEPAGE, but save #ifdefs in other places by making it unconditional. shmem_get_unmapped_area() first calls the usual mm->get_unmapped_area (which we treat as a black box, highly dependent on architecture and config and executable layout). Lots of conditions, and in most cases it just goes with the address that chose; but when our huge stars are rightly aligned, yet that did not provide a suitable address, go back to ask for a larger arena, within which to align the mapping suitably. There have to be some direct calls to shmem_get_unmapped_area(), not via the file_operations: because of the way shmem_zero_setup() is called to create a shmem object late in the mmap sequence, when MAP_SHARED is requested with MAP_ANONYMOUS or /dev/zero. Though this only matters when /proc/sys/vm/shmem_huge has been set. Link: http://lkml.kernel.org/r/1466021202-61880-29-git-send-email-kirill.shutemov@linux.intel.com Signed-off-by: Hugh Dickins <hughd@google.com> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--drivers/char/mem.c24
-rw-r--r--include/linux/shmem_fs.h2
-rw-r--r--ipc/shm.c6
-rw-r--r--mm/mmap.c16
-rw-r--r--mm/shmem.c98
5 files changed, 142 insertions, 4 deletions
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index d633974e7f8b..a33163dbb913 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -22,6 +22,7 @@
22#include <linux/device.h> 22#include <linux/device.h>
23#include <linux/highmem.h> 23#include <linux/highmem.h>
24#include <linux/backing-dev.h> 24#include <linux/backing-dev.h>
25#include <linux/shmem_fs.h>
25#include <linux/splice.h> 26#include <linux/splice.h>
26#include <linux/pfn.h> 27#include <linux/pfn.h>
27#include <linux/export.h> 28#include <linux/export.h>
@@ -657,6 +658,28 @@ static int mmap_zero(struct file *file, struct vm_area_struct *vma)
657 return 0; 658 return 0;
658} 659}
659 660
661static unsigned long get_unmapped_area_zero(struct file *file,
662 unsigned long addr, unsigned long len,
663 unsigned long pgoff, unsigned long flags)
664{
665#ifdef CONFIG_MMU
666 if (flags & MAP_SHARED) {
667 /*
668 * mmap_zero() will call shmem_zero_setup() to create a file,
669 * so use shmem's get_unmapped_area in case it can be huge;
670 * and pass NULL for file as in mmap.c's get_unmapped_area(),
671 * so as not to confuse shmem with our handle on "/dev/zero".
672 */
673 return shmem_get_unmapped_area(NULL, addr, len, pgoff, flags);
674 }
675
676 /* Otherwise flags & MAP_PRIVATE: with no shmem object beneath it */
677 return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
678#else
679 return -ENOSYS;
680#endif
681}
682
660static ssize_t write_full(struct file *file, const char __user *buf, 683static ssize_t write_full(struct file *file, const char __user *buf,
661 size_t count, loff_t *ppos) 684 size_t count, loff_t *ppos)
662{ 685{
@@ -764,6 +787,7 @@ static const struct file_operations zero_fops = {
764 .read_iter = read_iter_zero, 787 .read_iter = read_iter_zero,
765 .write_iter = write_iter_zero, 788 .write_iter = write_iter_zero,
766 .mmap = mmap_zero, 789 .mmap = mmap_zero,
790 .get_unmapped_area = get_unmapped_area_zero,
767#ifndef CONFIG_MMU 791#ifndef CONFIG_MMU
768 .mmap_capabilities = zero_mmap_capabilities, 792 .mmap_capabilities = zero_mmap_capabilities,
769#endif 793#endif
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 466f18c73a49..ff2de4bab61f 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -50,6 +50,8 @@ extern struct file *shmem_file_setup(const char *name,
50extern struct file *shmem_kernel_file_setup(const char *name, loff_t size, 50extern struct file *shmem_kernel_file_setup(const char *name, loff_t size,
51 unsigned long flags); 51 unsigned long flags);
52extern int shmem_zero_setup(struct vm_area_struct *); 52extern int shmem_zero_setup(struct vm_area_struct *);
53extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr,
54 unsigned long len, unsigned long pgoff, unsigned long flags);
53extern int shmem_lock(struct file *file, int lock, struct user_struct *user); 55extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
54extern bool shmem_mapping(struct address_space *mapping); 56extern bool shmem_mapping(struct address_space *mapping);
55extern void shmem_unlock_mapping(struct address_space *mapping); 57extern void shmem_unlock_mapping(struct address_space *mapping);
diff --git a/ipc/shm.c b/ipc/shm.c
index 13282510bc0d..7fa5cbebbf19 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -476,13 +476,15 @@ static const struct file_operations shm_file_operations = {
476 .mmap = shm_mmap, 476 .mmap = shm_mmap,
477 .fsync = shm_fsync, 477 .fsync = shm_fsync,
478 .release = shm_release, 478 .release = shm_release,
479#ifndef CONFIG_MMU
480 .get_unmapped_area = shm_get_unmapped_area, 479 .get_unmapped_area = shm_get_unmapped_area,
481#endif
482 .llseek = noop_llseek, 480 .llseek = noop_llseek,
483 .fallocate = shm_fallocate, 481 .fallocate = shm_fallocate,
484}; 482};
485 483
484/*
485 * shm_file_operations_huge is now identical to shm_file_operations,
486 * but we keep it distinct for the sake of is_file_shm_hugepages().
487 */
486static const struct file_operations shm_file_operations_huge = { 488static const struct file_operations shm_file_operations_huge = {
487 .mmap = shm_mmap, 489 .mmap = shm_mmap,
488 .fsync = shm_fsync, 490 .fsync = shm_fsync,
diff --git a/mm/mmap.c b/mm/mmap.c
index a41872c8f2af..86b18f334f4f 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -25,6 +25,7 @@
25#include <linux/personality.h> 25#include <linux/personality.h>
26#include <linux/security.h> 26#include <linux/security.h>
27#include <linux/hugetlb.h> 27#include <linux/hugetlb.h>
28#include <linux/shmem_fs.h>
28#include <linux/profile.h> 29#include <linux/profile.h>
29#include <linux/export.h> 30#include <linux/export.h>
30#include <linux/mount.h> 31#include <linux/mount.h>
@@ -1897,8 +1898,19 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
1897 return -ENOMEM; 1898 return -ENOMEM;
1898 1899
1899 get_area = current->mm->get_unmapped_area; 1900 get_area = current->mm->get_unmapped_area;
1900 if (file && file->f_op->get_unmapped_area) 1901 if (file) {
1901 get_area = file->f_op->get_unmapped_area; 1902 if (file->f_op->get_unmapped_area)
1903 get_area = file->f_op->get_unmapped_area;
1904 } else if (flags & MAP_SHARED) {
1905 /*
1906 * mmap_region() will call shmem_zero_setup() to create a file,
1907 * so use shmem's get_unmapped_area in case it can be huge.
1908 * do_mmap_pgoff() will clear pgoff, so match alignment.
1909 */
1910 pgoff = 0;
1911 get_area = shmem_get_unmapped_area;
1912 }
1913
1902 addr = get_area(file, addr, len, pgoff, flags); 1914 addr = get_area(file, addr, len, pgoff, flags);
1903 if (IS_ERR_VALUE(addr)) 1915 if (IS_ERR_VALUE(addr))
1904 return addr; 1916 return addr;
diff --git a/mm/shmem.c b/mm/shmem.c
index fd374f74d99f..ab02b5bb5553 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1513,6 +1513,94 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1513 return ret; 1513 return ret;
1514} 1514}
1515 1515
1516unsigned long shmem_get_unmapped_area(struct file *file,
1517 unsigned long uaddr, unsigned long len,
1518 unsigned long pgoff, unsigned long flags)
1519{
1520 unsigned long (*get_area)(struct file *,
1521 unsigned long, unsigned long, unsigned long, unsigned long);
1522 unsigned long addr;
1523 unsigned long offset;
1524 unsigned long inflated_len;
1525 unsigned long inflated_addr;
1526 unsigned long inflated_offset;
1527
1528 if (len > TASK_SIZE)
1529 return -ENOMEM;
1530
1531 get_area = current->mm->get_unmapped_area;
1532 addr = get_area(file, uaddr, len, pgoff, flags);
1533
1534 if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
1535 return addr;
1536 if (IS_ERR_VALUE(addr))
1537 return addr;
1538 if (addr & ~PAGE_MASK)
1539 return addr;
1540 if (addr > TASK_SIZE - len)
1541 return addr;
1542
1543 if (shmem_huge == SHMEM_HUGE_DENY)
1544 return addr;
1545 if (len < HPAGE_PMD_SIZE)
1546 return addr;
1547 if (flags & MAP_FIXED)
1548 return addr;
1549 /*
1550 * Our priority is to support MAP_SHARED mapped hugely;
1551 * and support MAP_PRIVATE mapped hugely too, until it is COWed.
1552 * But if caller specified an address hint, respect that as before.
1553 */
1554 if (uaddr)
1555 return addr;
1556
1557 if (shmem_huge != SHMEM_HUGE_FORCE) {
1558 struct super_block *sb;
1559
1560 if (file) {
1561 VM_BUG_ON(file->f_op != &shmem_file_operations);
1562 sb = file_inode(file)->i_sb;
1563 } else {
1564 /*
1565 * Called directly from mm/mmap.c, or drivers/char/mem.c
1566 * for "/dev/zero", to create a shared anonymous object.
1567 */
1568 if (IS_ERR(shm_mnt))
1569 return addr;
1570 sb = shm_mnt->mnt_sb;
1571 }
1572 if (SHMEM_SB(sb)->huge != SHMEM_HUGE_NEVER)
1573 return addr;
1574 }
1575
1576 offset = (pgoff << PAGE_SHIFT) & (HPAGE_PMD_SIZE-1);
1577 if (offset && offset + len < 2 * HPAGE_PMD_SIZE)
1578 return addr;
1579 if ((addr & (HPAGE_PMD_SIZE-1)) == offset)
1580 return addr;
1581
1582 inflated_len = len + HPAGE_PMD_SIZE - PAGE_SIZE;
1583 if (inflated_len > TASK_SIZE)
1584 return addr;
1585 if (inflated_len < len)
1586 return addr;
1587
1588 inflated_addr = get_area(NULL, 0, inflated_len, 0, flags);
1589 if (IS_ERR_VALUE(inflated_addr))
1590 return addr;
1591 if (inflated_addr & ~PAGE_MASK)
1592 return addr;
1593
1594 inflated_offset = inflated_addr & (HPAGE_PMD_SIZE-1);
1595 inflated_addr += offset - inflated_offset;
1596 if (inflated_offset > offset)
1597 inflated_addr += HPAGE_PMD_SIZE;
1598
1599 if (inflated_addr > TASK_SIZE - len)
1600 return addr;
1601 return inflated_addr;
1602}
1603
1516#ifdef CONFIG_NUMA 1604#ifdef CONFIG_NUMA
1517static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol) 1605static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol)
1518{ 1606{
@@ -3261,6 +3349,7 @@ static const struct address_space_operations shmem_aops = {
3261 3349
3262static const struct file_operations shmem_file_operations = { 3350static const struct file_operations shmem_file_operations = {
3263 .mmap = shmem_mmap, 3351 .mmap = shmem_mmap,
3352 .get_unmapped_area = shmem_get_unmapped_area,
3264#ifdef CONFIG_TMPFS 3353#ifdef CONFIG_TMPFS
3265 .llseek = shmem_file_llseek, 3354 .llseek = shmem_file_llseek,
3266 .read_iter = shmem_file_read_iter, 3355 .read_iter = shmem_file_read_iter,
@@ -3496,6 +3585,15 @@ void shmem_unlock_mapping(struct address_space *mapping)
3496{ 3585{
3497} 3586}
3498 3587
3588#ifdef CONFIG_MMU
3589unsigned long shmem_get_unmapped_area(struct file *file,
3590 unsigned long addr, unsigned long len,
3591 unsigned long pgoff, unsigned long flags)
3592{
3593 return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
3594}
3595#endif
3596
3499void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) 3597void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
3500{ 3598{
3501 truncate_inode_pages_range(inode->i_mapping, lstart, lend); 3599 truncate_inode_pages_range(inode->i_mapping, lstart, lend);