diff options
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/filemap.c | 103 | ||||
| -rw-r--r-- | mm/hugetlb.c | 9 | ||||
| -rw-r--r-- | mm/maccess.c | 11 | ||||
| -rw-r--r-- | mm/memcontrol.c | 11 | ||||
| -rw-r--r-- | mm/migrate.c | 39 | ||||
| -rw-r--r-- | mm/mmap.c | 40 | ||||
| -rw-r--r-- | mm/nommu.c | 144 | ||||
| -rw-r--r-- | mm/oom_kill.c | 2 | ||||
| -rw-r--r-- | mm/page_alloc.c | 9 | ||||
| -rw-r--r-- | mm/percpu.c | 4 | ||||
| -rw-r--r-- | mm/slab.c | 4 | ||||
| -rw-r--r-- | mm/truncate.c | 30 | ||||
| -rw-r--r-- | mm/util.c | 46 | ||||
| -rw-r--r-- | mm/vmalloc.c | 114 | ||||
| -rw-r--r-- | mm/vmscan.c | 3 |
15 files changed, 371 insertions, 198 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 96ac6b0eb6cb..698ea80f2102 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
| @@ -1634,14 +1634,15 @@ EXPORT_SYMBOL(generic_file_readonly_mmap); | |||
| 1634 | static struct page *__read_cache_page(struct address_space *mapping, | 1634 | static struct page *__read_cache_page(struct address_space *mapping, |
| 1635 | pgoff_t index, | 1635 | pgoff_t index, |
| 1636 | int (*filler)(void *,struct page*), | 1636 | int (*filler)(void *,struct page*), |
| 1637 | void *data) | 1637 | void *data, |
| 1638 | gfp_t gfp) | ||
| 1638 | { | 1639 | { |
| 1639 | struct page *page; | 1640 | struct page *page; |
| 1640 | int err; | 1641 | int err; |
| 1641 | repeat: | 1642 | repeat: |
| 1642 | page = find_get_page(mapping, index); | 1643 | page = find_get_page(mapping, index); |
| 1643 | if (!page) { | 1644 | if (!page) { |
| 1644 | page = page_cache_alloc_cold(mapping); | 1645 | page = __page_cache_alloc(gfp | __GFP_COLD); |
| 1645 | if (!page) | 1646 | if (!page) |
| 1646 | return ERR_PTR(-ENOMEM); | 1647 | return ERR_PTR(-ENOMEM); |
| 1647 | err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); | 1648 | err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); |
| @@ -1661,31 +1662,18 @@ repeat: | |||
| 1661 | return page; | 1662 | return page; |
| 1662 | } | 1663 | } |
| 1663 | 1664 | ||
| 1664 | /** | 1665 | static struct page *do_read_cache_page(struct address_space *mapping, |
| 1665 | * read_cache_page_async - read into page cache, fill it if needed | ||
| 1666 | * @mapping: the page's address_space | ||
| 1667 | * @index: the page index | ||
| 1668 | * @filler: function to perform the read | ||
| 1669 | * @data: destination for read data | ||
| 1670 | * | ||
| 1671 | * Same as read_cache_page, but don't wait for page to become unlocked | ||
| 1672 | * after submitting it to the filler. | ||
| 1673 | * | ||
| 1674 | * Read into the page cache. If a page already exists, and PageUptodate() is | ||
| 1675 | * not set, try to fill the page but don't wait for it to become unlocked. | ||
| 1676 | * | ||
| 1677 | * If the page does not get brought uptodate, return -EIO. | ||
| 1678 | */ | ||
| 1679 | struct page *read_cache_page_async(struct address_space *mapping, | ||
| 1680 | pgoff_t index, | 1666 | pgoff_t index, |
| 1681 | int (*filler)(void *,struct page*), | 1667 | int (*filler)(void *,struct page*), |
| 1682 | void *data) | 1668 | void *data, |
| 1669 | gfp_t gfp) | ||
| 1670 | |||
| 1683 | { | 1671 | { |
| 1684 | struct page *page; | 1672 | struct page *page; |
| 1685 | int err; | 1673 | int err; |
| 1686 | 1674 | ||
| 1687 | retry: | 1675 | retry: |
| 1688 | page = __read_cache_page(mapping, index, filler, data); | 1676 | page = __read_cache_page(mapping, index, filler, data, gfp); |
| 1689 | if (IS_ERR(page)) | 1677 | if (IS_ERR(page)) |
| 1690 | return page; | 1678 | return page; |
| 1691 | if (PageUptodate(page)) | 1679 | if (PageUptodate(page)) |
| @@ -1710,8 +1698,67 @@ out: | |||
| 1710 | mark_page_accessed(page); | 1698 | mark_page_accessed(page); |
| 1711 | return page; | 1699 | return page; |
| 1712 | } | 1700 | } |
| 1701 | |||
| 1702 | /** | ||
| 1703 | * read_cache_page_async - read into page cache, fill it if needed | ||
| 1704 | * @mapping: the page's address_space | ||
| 1705 | * @index: the page index | ||
| 1706 | * @filler: function to perform the read | ||
| 1707 | * @data: destination for read data | ||
| 1708 | * | ||
| 1709 | * Same as read_cache_page, but don't wait for page to become unlocked | ||
| 1710 | * after submitting it to the filler. | ||
| 1711 | * | ||
| 1712 | * Read into the page cache. If a page already exists, and PageUptodate() is | ||
| 1713 | * not set, try to fill the page but don't wait for it to become unlocked. | ||
| 1714 | * | ||
| 1715 | * If the page does not get brought uptodate, return -EIO. | ||
| 1716 | */ | ||
| 1717 | struct page *read_cache_page_async(struct address_space *mapping, | ||
| 1718 | pgoff_t index, | ||
| 1719 | int (*filler)(void *,struct page*), | ||
| 1720 | void *data) | ||
| 1721 | { | ||
| 1722 | return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping)); | ||
| 1723 | } | ||
| 1713 | EXPORT_SYMBOL(read_cache_page_async); | 1724 | EXPORT_SYMBOL(read_cache_page_async); |
| 1714 | 1725 | ||
| 1726 | static struct page *wait_on_page_read(struct page *page) | ||
| 1727 | { | ||
| 1728 | if (!IS_ERR(page)) { | ||
| 1729 | wait_on_page_locked(page); | ||
| 1730 | if (!PageUptodate(page)) { | ||
| 1731 | page_cache_release(page); | ||
| 1732 | page = ERR_PTR(-EIO); | ||
| 1733 | } | ||
| 1734 | } | ||
| 1735 | return page; | ||
| 1736 | } | ||
| 1737 | |||
| 1738 | /** | ||
| 1739 | * read_cache_page_gfp - read into page cache, using specified page allocation flags. | ||
| 1740 | * @mapping: the page's address_space | ||
| 1741 | * @index: the page index | ||
| 1742 | * @gfp: the page allocator flags to use if allocating | ||
| 1743 | * | ||
| 1744 | * This is the same as "read_mapping_page(mapping, index, NULL)", but with | ||
| 1745 | * any new page allocations done using the specified allocation flags. Note | ||
| 1746 | * that the Radix tree operations will still use GFP_KERNEL, so you can't | ||
| 1747 | * expect to do this atomically or anything like that - but you can pass in | ||
| 1748 | * other page requirements. | ||
| 1749 | * | ||
| 1750 | * If the page does not get brought uptodate, return -EIO. | ||
| 1751 | */ | ||
| 1752 | struct page *read_cache_page_gfp(struct address_space *mapping, | ||
| 1753 | pgoff_t index, | ||
| 1754 | gfp_t gfp) | ||
| 1755 | { | ||
| 1756 | filler_t *filler = (filler_t *)mapping->a_ops->readpage; | ||
| 1757 | |||
| 1758 | return wait_on_page_read(do_read_cache_page(mapping, index, filler, NULL, gfp)); | ||
| 1759 | } | ||
| 1760 | EXPORT_SYMBOL(read_cache_page_gfp); | ||
| 1761 | |||
| 1715 | /** | 1762 | /** |
| 1716 | * read_cache_page - read into page cache, fill it if needed | 1763 | * read_cache_page - read into page cache, fill it if needed |
| 1717 | * @mapping: the page's address_space | 1764 | * @mapping: the page's address_space |
| @@ -1729,18 +1776,7 @@ struct page *read_cache_page(struct address_space *mapping, | |||
| 1729 | int (*filler)(void *,struct page*), | 1776 | int (*filler)(void *,struct page*), |
| 1730 | void *data) | 1777 | void *data) |
| 1731 | { | 1778 | { |
| 1732 | struct page *page; | 1779 | return wait_on_page_read(read_cache_page_async(mapping, index, filler, data)); |
| 1733 | |||
| 1734 | page = read_cache_page_async(mapping, index, filler, data); | ||
| 1735 | if (IS_ERR(page)) | ||
| 1736 | goto out; | ||
| 1737 | wait_on_page_locked(page); | ||
| 1738 | if (!PageUptodate(page)) { | ||
| 1739 | page_cache_release(page); | ||
| 1740 | page = ERR_PTR(-EIO); | ||
| 1741 | } | ||
| 1742 | out: | ||
| 1743 | return page; | ||
| 1744 | } | 1780 | } |
| 1745 | EXPORT_SYMBOL(read_cache_page); | 1781 | EXPORT_SYMBOL(read_cache_page); |
| 1746 | 1782 | ||
| @@ -2196,6 +2232,9 @@ again: | |||
| 2196 | if (unlikely(status)) | 2232 | if (unlikely(status)) |
| 2197 | break; | 2233 | break; |
| 2198 | 2234 | ||
| 2235 | if (mapping_writably_mapped(mapping)) | ||
| 2236 | flush_dcache_page(page); | ||
| 2237 | |||
| 2199 | pagefault_disable(); | 2238 | pagefault_disable(); |
| 2200 | copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); | 2239 | copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); |
| 2201 | pagefault_enable(); | 2240 | pagefault_enable(); |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 65f38c218207..2d16fa6b8c2d 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
| @@ -402,7 +402,7 @@ static void clear_huge_page(struct page *page, | |||
| 402 | { | 402 | { |
| 403 | int i; | 403 | int i; |
| 404 | 404 | ||
| 405 | if (unlikely(sz > MAX_ORDER_NR_PAGES)) { | 405 | if (unlikely(sz/PAGE_SIZE > MAX_ORDER_NR_PAGES)) { |
| 406 | clear_gigantic_page(page, addr, sz); | 406 | clear_gigantic_page(page, addr, sz); |
| 407 | return; | 407 | return; |
| 408 | } | 408 | } |
| @@ -1515,10 +1515,9 @@ static struct attribute_group hstate_attr_group = { | |||
| 1515 | .attrs = hstate_attrs, | 1515 | .attrs = hstate_attrs, |
| 1516 | }; | 1516 | }; |
| 1517 | 1517 | ||
| 1518 | static int __init hugetlb_sysfs_add_hstate(struct hstate *h, | 1518 | static int hugetlb_sysfs_add_hstate(struct hstate *h, struct kobject *parent, |
| 1519 | struct kobject *parent, | 1519 | struct kobject **hstate_kobjs, |
| 1520 | struct kobject **hstate_kobjs, | 1520 | struct attribute_group *hstate_attr_group) |
| 1521 | struct attribute_group *hstate_attr_group) | ||
| 1522 | { | 1521 | { |
| 1523 | int retval; | 1522 | int retval; |
| 1524 | int hi = h - hstates; | 1523 | int hi = h - hstates; |
diff --git a/mm/maccess.c b/mm/maccess.c index 9073695ff25f..4e348dbaecd7 100644 --- a/mm/maccess.c +++ b/mm/maccess.c | |||
| @@ -14,7 +14,11 @@ | |||
| 14 | * Safely read from address @src to the buffer at @dst. If a kernel fault | 14 | * Safely read from address @src to the buffer at @dst. If a kernel fault |
| 15 | * happens, handle that and return -EFAULT. | 15 | * happens, handle that and return -EFAULT. |
| 16 | */ | 16 | */ |
| 17 | long probe_kernel_read(void *dst, void *src, size_t size) | 17 | |
| 18 | long __weak probe_kernel_read(void *dst, void *src, size_t size) | ||
| 19 | __attribute__((alias("__probe_kernel_read"))); | ||
| 20 | |||
| 21 | long __probe_kernel_read(void *dst, void *src, size_t size) | ||
| 18 | { | 22 | { |
| 19 | long ret; | 23 | long ret; |
| 20 | mm_segment_t old_fs = get_fs(); | 24 | mm_segment_t old_fs = get_fs(); |
| @@ -39,7 +43,10 @@ EXPORT_SYMBOL_GPL(probe_kernel_read); | |||
| 39 | * Safely write to address @dst from the buffer at @src. If a kernel fault | 43 | * Safely write to address @dst from the buffer at @src. If a kernel fault |
| 40 | * happens, handle that and return -EFAULT. | 44 | * happens, handle that and return -EFAULT. |
| 41 | */ | 45 | */ |
| 42 | long notrace __weak probe_kernel_write(void *dst, void *src, size_t size) | 46 | long __weak probe_kernel_write(void *dst, void *src, size_t size) |
| 47 | __attribute__((alias("__probe_kernel_write"))); | ||
| 48 | |||
| 49 | long __probe_kernel_write(void *dst, void *src, size_t size) | ||
| 43 | { | 50 | { |
| 44 | long ret; | 51 | long ret; |
| 45 | mm_segment_t old_fs = get_fs(); | 52 | mm_segment_t old_fs = get_fs(); |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 488b644e0e8e..954032b80bed 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
| @@ -2586,7 +2586,7 @@ static int mem_cgroup_force_empty(struct mem_cgroup *mem, bool free_all) | |||
| 2586 | if (free_all) | 2586 | if (free_all) |
| 2587 | goto try_to_free; | 2587 | goto try_to_free; |
| 2588 | move_account: | 2588 | move_account: |
| 2589 | while (mem->res.usage > 0) { | 2589 | do { |
| 2590 | ret = -EBUSY; | 2590 | ret = -EBUSY; |
| 2591 | if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children)) | 2591 | if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children)) |
| 2592 | goto out; | 2592 | goto out; |
| @@ -2614,8 +2614,8 @@ move_account: | |||
| 2614 | if (ret == -ENOMEM) | 2614 | if (ret == -ENOMEM) |
| 2615 | goto try_to_free; | 2615 | goto try_to_free; |
| 2616 | cond_resched(); | 2616 | cond_resched(); |
| 2617 | } | 2617 | /* "ret" should also be checked to ensure all lists are empty. */ |
| 2618 | ret = 0; | 2618 | } while (mem->res.usage > 0 || ret); |
| 2619 | out: | 2619 | out: |
| 2620 | css_put(&mem->css); | 2620 | css_put(&mem->css); |
| 2621 | return ret; | 2621 | return ret; |
| @@ -2648,10 +2648,7 @@ try_to_free: | |||
| 2648 | } | 2648 | } |
| 2649 | lru_add_drain(); | 2649 | lru_add_drain(); |
| 2650 | /* try move_account...there may be some *locked* pages. */ | 2650 | /* try move_account...there may be some *locked* pages. */ |
| 2651 | if (mem->res.usage) | 2651 | goto move_account; |
| 2652 | goto move_account; | ||
| 2653 | ret = 0; | ||
| 2654 | goto out; | ||
| 2655 | } | 2652 | } |
| 2656 | 2653 | ||
| 2657 | int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event) | 2654 | int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event) |
diff --git a/mm/migrate.c b/mm/migrate.c index efddbf0926b2..880bd592d38e 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
| @@ -912,6 +912,9 @@ static int do_pages_move(struct mm_struct *mm, struct task_struct *task, | |||
| 912 | goto out_pm; | 912 | goto out_pm; |
| 913 | 913 | ||
| 914 | err = -ENODEV; | 914 | err = -ENODEV; |
| 915 | if (node < 0 || node >= MAX_NUMNODES) | ||
| 916 | goto out_pm; | ||
| 917 | |||
| 915 | if (!node_state(node, N_HIGH_MEMORY)) | 918 | if (!node_state(node, N_HIGH_MEMORY)) |
| 916 | goto out_pm; | 919 | goto out_pm; |
| 917 | 920 | ||
| @@ -999,33 +1002,27 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages, | |||
| 999 | #define DO_PAGES_STAT_CHUNK_NR 16 | 1002 | #define DO_PAGES_STAT_CHUNK_NR 16 |
| 1000 | const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR]; | 1003 | const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR]; |
| 1001 | int chunk_status[DO_PAGES_STAT_CHUNK_NR]; | 1004 | int chunk_status[DO_PAGES_STAT_CHUNK_NR]; |
| 1002 | unsigned long i, chunk_nr = DO_PAGES_STAT_CHUNK_NR; | ||
| 1003 | int err; | ||
| 1004 | 1005 | ||
| 1005 | for (i = 0; i < nr_pages; i += chunk_nr) { | 1006 | while (nr_pages) { |
| 1006 | if (chunk_nr > nr_pages - i) | 1007 | unsigned long chunk_nr; |
| 1007 | chunk_nr = nr_pages - i; | ||
| 1008 | 1008 | ||
| 1009 | err = copy_from_user(chunk_pages, &pages[i], | 1009 | chunk_nr = nr_pages; |
| 1010 | chunk_nr * sizeof(*chunk_pages)); | 1010 | if (chunk_nr > DO_PAGES_STAT_CHUNK_NR) |
| 1011 | if (err) { | 1011 | chunk_nr = DO_PAGES_STAT_CHUNK_NR; |
| 1012 | err = -EFAULT; | 1012 | |
| 1013 | goto out; | 1013 | if (copy_from_user(chunk_pages, pages, chunk_nr * sizeof(*chunk_pages))) |
| 1014 | } | 1014 | break; |
| 1015 | 1015 | ||
| 1016 | do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status); | 1016 | do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status); |
| 1017 | 1017 | ||
| 1018 | err = copy_to_user(&status[i], chunk_status, | 1018 | if (copy_to_user(status, chunk_status, chunk_nr * sizeof(*status))) |
| 1019 | chunk_nr * sizeof(*chunk_status)); | 1019 | break; |
| 1020 | if (err) { | ||
| 1021 | err = -EFAULT; | ||
| 1022 | goto out; | ||
| 1023 | } | ||
| 1024 | } | ||
| 1025 | err = 0; | ||
| 1026 | 1020 | ||
| 1027 | out: | 1021 | pages += chunk_nr; |
| 1028 | return err; | 1022 | status += chunk_nr; |
| 1023 | nr_pages -= chunk_nr; | ||
| 1024 | } | ||
| 1025 | return nr_pages ? -EFAULT : 0; | ||
| 1029 | } | 1026 | } |
| 1030 | 1027 | ||
| 1031 | /* | 1028 | /* |
| @@ -1043,6 +1043,46 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, | |||
| 1043 | } | 1043 | } |
| 1044 | EXPORT_SYMBOL(do_mmap_pgoff); | 1044 | EXPORT_SYMBOL(do_mmap_pgoff); |
| 1045 | 1045 | ||
| 1046 | SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, | ||
| 1047 | unsigned long, prot, unsigned long, flags, | ||
| 1048 | unsigned long, fd, unsigned long, pgoff) | ||
| 1049 | { | ||
| 1050 | struct file *file = NULL; | ||
| 1051 | unsigned long retval = -EBADF; | ||
| 1052 | |||
| 1053 | if (!(flags & MAP_ANONYMOUS)) { | ||
| 1054 | if (unlikely(flags & MAP_HUGETLB)) | ||
| 1055 | return -EINVAL; | ||
| 1056 | file = fget(fd); | ||
| 1057 | if (!file) | ||
| 1058 | goto out; | ||
| 1059 | } else if (flags & MAP_HUGETLB) { | ||
| 1060 | struct user_struct *user = NULL; | ||
| 1061 | /* | ||
| 1062 | * VM_NORESERVE is used because the reservations will be | ||
| 1063 | * taken when vm_ops->mmap() is called | ||
| 1064 | * A dummy user value is used because we are not locking | ||
| 1065 | * memory so no accounting is necessary | ||
| 1066 | */ | ||
| 1067 | len = ALIGN(len, huge_page_size(&default_hstate)); | ||
| 1068 | file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE, | ||
| 1069 | &user, HUGETLB_ANONHUGE_INODE); | ||
| 1070 | if (IS_ERR(file)) | ||
| 1071 | return PTR_ERR(file); | ||
| 1072 | } | ||
| 1073 | |||
| 1074 | flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); | ||
| 1075 | |||
| 1076 | down_write(¤t->mm->mmap_sem); | ||
| 1077 | retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); | ||
| 1078 | up_write(¤t->mm->mmap_sem); | ||
| 1079 | |||
| 1080 | if (file) | ||
| 1081 | fput(file); | ||
| 1082 | out: | ||
| 1083 | return retval; | ||
| 1084 | } | ||
| 1085 | |||
| 1046 | /* | 1086 | /* |
| 1047 | * Some shared mappigns will want the pages marked read-only | 1087 | * Some shared mappigns will want the pages marked read-only |
| 1048 | * to track write events. If so, we'll downgrade vm_page_prot | 1088 | * to track write events. If so, we'll downgrade vm_page_prot |
diff --git a/mm/nommu.c b/mm/nommu.c index 8687973462bb..48a2ecfaf059 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
| @@ -432,6 +432,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) | |||
| 432 | /* | 432 | /* |
| 433 | * Ok, looks good - let it rip. | 433 | * Ok, looks good - let it rip. |
| 434 | */ | 434 | */ |
| 435 | flush_icache_range(mm->brk, brk); | ||
| 435 | return mm->brk = brk; | 436 | return mm->brk = brk; |
| 436 | } | 437 | } |
| 437 | 438 | ||
| @@ -551,11 +552,11 @@ static void free_page_series(unsigned long from, unsigned long to) | |||
| 551 | static void __put_nommu_region(struct vm_region *region) | 552 | static void __put_nommu_region(struct vm_region *region) |
| 552 | __releases(nommu_region_sem) | 553 | __releases(nommu_region_sem) |
| 553 | { | 554 | { |
| 554 | kenter("%p{%d}", region, atomic_read(®ion->vm_usage)); | 555 | kenter("%p{%d}", region, region->vm_usage); |
| 555 | 556 | ||
| 556 | BUG_ON(!nommu_region_tree.rb_node); | 557 | BUG_ON(!nommu_region_tree.rb_node); |
| 557 | 558 | ||
| 558 | if (atomic_dec_and_test(®ion->vm_usage)) { | 559 | if (--region->vm_usage == 0) { |
| 559 | if (region->vm_top > region->vm_start) | 560 | if (region->vm_top > region->vm_start) |
| 560 | delete_nommu_region(region); | 561 | delete_nommu_region(region); |
| 561 | up_write(&nommu_region_sem); | 562 | up_write(&nommu_region_sem); |
| @@ -1204,7 +1205,7 @@ unsigned long do_mmap_pgoff(struct file *file, | |||
| 1204 | if (!vma) | 1205 | if (!vma) |
| 1205 | goto error_getting_vma; | 1206 | goto error_getting_vma; |
| 1206 | 1207 | ||
| 1207 | atomic_set(®ion->vm_usage, 1); | 1208 | region->vm_usage = 1; |
| 1208 | region->vm_flags = vm_flags; | 1209 | region->vm_flags = vm_flags; |
| 1209 | region->vm_pgoff = pgoff; | 1210 | region->vm_pgoff = pgoff; |
| 1210 | 1211 | ||
| @@ -1271,7 +1272,7 @@ unsigned long do_mmap_pgoff(struct file *file, | |||
| 1271 | } | 1272 | } |
| 1272 | 1273 | ||
| 1273 | /* we've found a region we can share */ | 1274 | /* we've found a region we can share */ |
| 1274 | atomic_inc(&pregion->vm_usage); | 1275 | pregion->vm_usage++; |
| 1275 | vma->vm_region = pregion; | 1276 | vma->vm_region = pregion; |
| 1276 | start = pregion->vm_start; | 1277 | start = pregion->vm_start; |
| 1277 | start += (pgoff - pregion->vm_pgoff) << PAGE_SHIFT; | 1278 | start += (pgoff - pregion->vm_pgoff) << PAGE_SHIFT; |
| @@ -1288,7 +1289,7 @@ unsigned long do_mmap_pgoff(struct file *file, | |||
| 1288 | vma->vm_region = NULL; | 1289 | vma->vm_region = NULL; |
| 1289 | vma->vm_start = 0; | 1290 | vma->vm_start = 0; |
| 1290 | vma->vm_end = 0; | 1291 | vma->vm_end = 0; |
| 1291 | atomic_dec(&pregion->vm_usage); | 1292 | pregion->vm_usage--; |
| 1292 | pregion = NULL; | 1293 | pregion = NULL; |
| 1293 | goto error_just_free; | 1294 | goto error_just_free; |
| 1294 | } | 1295 | } |
| @@ -1353,10 +1354,14 @@ unsigned long do_mmap_pgoff(struct file *file, | |||
| 1353 | share: | 1354 | share: |
| 1354 | add_vma_to_mm(current->mm, vma); | 1355 | add_vma_to_mm(current->mm, vma); |
| 1355 | 1356 | ||
| 1356 | up_write(&nommu_region_sem); | 1357 | /* we flush the region from the icache only when the first executable |
| 1358 | * mapping of it is made */ | ||
| 1359 | if (vma->vm_flags & VM_EXEC && !region->vm_icache_flushed) { | ||
| 1360 | flush_icache_range(region->vm_start, region->vm_end); | ||
| 1361 | region->vm_icache_flushed = true; | ||
| 1362 | } | ||
| 1357 | 1363 | ||
| 1358 | if (prot & PROT_EXEC) | 1364 | up_write(&nommu_region_sem); |
| 1359 | flush_icache_range(result, result + len); | ||
| 1360 | 1365 | ||
| 1361 | kleave(" = %lx", result); | 1366 | kleave(" = %lx", result); |
| 1362 | return result; | 1367 | return result; |
| @@ -1398,6 +1403,31 @@ error_getting_region: | |||
| 1398 | } | 1403 | } |
| 1399 | EXPORT_SYMBOL(do_mmap_pgoff); | 1404 | EXPORT_SYMBOL(do_mmap_pgoff); |
| 1400 | 1405 | ||
| 1406 | SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, | ||
| 1407 | unsigned long, prot, unsigned long, flags, | ||
| 1408 | unsigned long, fd, unsigned long, pgoff) | ||
| 1409 | { | ||
| 1410 | struct file *file = NULL; | ||
| 1411 | unsigned long retval = -EBADF; | ||
| 1412 | |||
| 1413 | if (!(flags & MAP_ANONYMOUS)) { | ||
| 1414 | file = fget(fd); | ||
| 1415 | if (!file) | ||
| 1416 | goto out; | ||
| 1417 | } | ||
| 1418 | |||
| 1419 | flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); | ||
| 1420 | |||
| 1421 | down_write(¤t->mm->mmap_sem); | ||
| 1422 | retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); | ||
| 1423 | up_write(¤t->mm->mmap_sem); | ||
| 1424 | |||
| 1425 | if (file) | ||
| 1426 | fput(file); | ||
| 1427 | out: | ||
| 1428 | return retval; | ||
| 1429 | } | ||
| 1430 | |||
| 1401 | /* | 1431 | /* |
| 1402 | * split a vma into two pieces at address 'addr', a new vma is allocated either | 1432 | * split a vma into two pieces at address 'addr', a new vma is allocated either |
| 1403 | * for the first part or the tail. | 1433 | * for the first part or the tail. |
| @@ -1411,10 +1441,9 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 1411 | 1441 | ||
| 1412 | kenter(""); | 1442 | kenter(""); |
| 1413 | 1443 | ||
| 1414 | /* we're only permitted to split anonymous regions that have a single | 1444 | /* we're only permitted to split anonymous regions (these should have |
| 1415 | * owner */ | 1445 | * only a single usage on the region) */ |
| 1416 | if (vma->vm_file || | 1446 | if (vma->vm_file) |
| 1417 | atomic_read(&vma->vm_region->vm_usage) != 1) | ||
| 1418 | return -ENOMEM; | 1447 | return -ENOMEM; |
| 1419 | 1448 | ||
| 1420 | if (mm->map_count >= sysctl_max_map_count) | 1449 | if (mm->map_count >= sysctl_max_map_count) |
| @@ -1488,7 +1517,7 @@ static int shrink_vma(struct mm_struct *mm, | |||
| 1488 | 1517 | ||
| 1489 | /* cut the backing region down to size */ | 1518 | /* cut the backing region down to size */ |
| 1490 | region = vma->vm_region; | 1519 | region = vma->vm_region; |
| 1491 | BUG_ON(atomic_read(®ion->vm_usage) != 1); | 1520 | BUG_ON(region->vm_usage != 1); |
| 1492 | 1521 | ||
| 1493 | down_write(&nommu_region_sem); | 1522 | down_write(&nommu_region_sem); |
| 1494 | delete_nommu_region(region); | 1523 | delete_nommu_region(region); |
| @@ -1732,27 +1761,6 @@ void unmap_mapping_range(struct address_space *mapping, | |||
| 1732 | EXPORT_SYMBOL(unmap_mapping_range); | 1761 | EXPORT_SYMBOL(unmap_mapping_range); |
| 1733 | 1762 | ||
| 1734 | /* | 1763 | /* |
| 1735 | * ask for an unmapped area at which to create a mapping on a file | ||
| 1736 | */ | ||
| 1737 | unsigned long get_unmapped_area(struct file *file, unsigned long addr, | ||
| 1738 | unsigned long len, unsigned long pgoff, | ||
| 1739 | unsigned long flags) | ||
| 1740 | { | ||
| 1741 | unsigned long (*get_area)(struct file *, unsigned long, unsigned long, | ||
| 1742 | unsigned long, unsigned long); | ||
| 1743 | |||
| 1744 | get_area = current->mm->get_unmapped_area; | ||
| 1745 | if (file && file->f_op && file->f_op->get_unmapped_area) | ||
| 1746 | get_area = file->f_op->get_unmapped_area; | ||
| 1747 | |||
| 1748 | if (!get_area) | ||
| 1749 | return -ENOSYS; | ||
| 1750 | |||
| 1751 | return get_area(file, addr, len, pgoff, flags); | ||
| 1752 | } | ||
| 1753 | EXPORT_SYMBOL(get_unmapped_area); | ||
| 1754 | |||
| 1755 | /* | ||
| 1756 | * Check that a process has enough memory to allocate a new virtual | 1764 | * Check that a process has enough memory to allocate a new virtual |
| 1757 | * mapping. 0 means there is enough memory for the allocation to | 1765 | * mapping. 0 means there is enough memory for the allocation to |
| 1758 | * succeed and -ENOMEM implies there is not. | 1766 | * succeed and -ENOMEM implies there is not. |
| @@ -1891,9 +1899,11 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in | |||
| 1891 | 1899 | ||
| 1892 | /* only read or write mappings where it is permitted */ | 1900 | /* only read or write mappings where it is permitted */ |
| 1893 | if (write && vma->vm_flags & VM_MAYWRITE) | 1901 | if (write && vma->vm_flags & VM_MAYWRITE) |
| 1894 | len -= copy_to_user((void *) addr, buf, len); | 1902 | copy_to_user_page(vma, NULL, addr, |
| 1903 | (void *) addr, buf, len); | ||
| 1895 | else if (!write && vma->vm_flags & VM_MAYREAD) | 1904 | else if (!write && vma->vm_flags & VM_MAYREAD) |
| 1896 | len -= copy_from_user(buf, (void *) addr, len); | 1905 | copy_from_user_page(vma, NULL, addr, |
| 1906 | buf, (void *) addr, len); | ||
| 1897 | else | 1907 | else |
| 1898 | len = 0; | 1908 | len = 0; |
| 1899 | } else { | 1909 | } else { |
| @@ -1904,3 +1914,65 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in | |||
| 1904 | mmput(mm); | 1914 | mmput(mm); |
| 1905 | return len; | 1915 | return len; |
| 1906 | } | 1916 | } |
| 1917 | |||
| 1918 | /** | ||
| 1919 | * nommu_shrink_inode_mappings - Shrink the shared mappings on an inode | ||
| 1920 | * @inode: The inode to check | ||
| 1921 | * @size: The current filesize of the inode | ||
| 1922 | * @newsize: The proposed filesize of the inode | ||
| 1923 | * | ||
| 1924 | * Check the shared mappings on an inode on behalf of a shrinking truncate to | ||
| 1925 | * make sure that that any outstanding VMAs aren't broken and then shrink the | ||
| 1926 | * vm_regions that extend that beyond so that do_mmap_pgoff() doesn't | ||
| 1927 | * automatically grant mappings that are too large. | ||
| 1928 | */ | ||
| 1929 | int nommu_shrink_inode_mappings(struct inode *inode, size_t size, | ||
| 1930 | size_t newsize) | ||
| 1931 | { | ||
| 1932 | struct vm_area_struct *vma; | ||
| 1933 | struct prio_tree_iter iter; | ||
| 1934 | struct vm_region *region; | ||
| 1935 | pgoff_t low, high; | ||
| 1936 | size_t r_size, r_top; | ||
| 1937 | |||
| 1938 | low = newsize >> PAGE_SHIFT; | ||
| 1939 | high = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
| 1940 | |||
| 1941 | down_write(&nommu_region_sem); | ||
| 1942 | |||
| 1943 | /* search for VMAs that fall within the dead zone */ | ||
| 1944 | vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap, | ||
| 1945 | low, high) { | ||
| 1946 | /* found one - only interested if it's shared out of the page | ||
| 1947 | * cache */ | ||
| 1948 | if (vma->vm_flags & VM_SHARED) { | ||
| 1949 | up_write(&nommu_region_sem); | ||
| 1950 | return -ETXTBSY; /* not quite true, but near enough */ | ||
| 1951 | } | ||
| 1952 | } | ||
| 1953 | |||
| 1954 | /* reduce any regions that overlap the dead zone - if in existence, | ||
| 1955 | * these will be pointed to by VMAs that don't overlap the dead zone | ||
| 1956 | * | ||
| 1957 | * we don't check for any regions that start beyond the EOF as there | ||
| 1958 | * shouldn't be any | ||
| 1959 | */ | ||
| 1960 | vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap, | ||
| 1961 | 0, ULONG_MAX) { | ||
| 1962 | if (!(vma->vm_flags & VM_SHARED)) | ||
| 1963 | continue; | ||
| 1964 | |||
| 1965 | region = vma->vm_region; | ||
| 1966 | r_size = region->vm_top - region->vm_start; | ||
| 1967 | r_top = (region->vm_pgoff << PAGE_SHIFT) + r_size; | ||
| 1968 | |||
| 1969 | if (r_top > newsize) { | ||
| 1970 | region->vm_top -= r_top - newsize; | ||
| 1971 | if (region->vm_end > region->vm_top) | ||
| 1972 | region->vm_end = region->vm_top; | ||
| 1973 | } | ||
| 1974 | } | ||
| 1975 | |||
| 1976 | up_write(&nommu_region_sem); | ||
| 1977 | return 0; | ||
| 1978 | } | ||
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index f52481b1c1e5..237050478f28 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
| @@ -459,6 +459,8 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, | |||
| 459 | list_for_each_entry(c, &p->children, sibling) { | 459 | list_for_each_entry(c, &p->children, sibling) { |
| 460 | if (c->mm == p->mm) | 460 | if (c->mm == p->mm) |
| 461 | continue; | 461 | continue; |
| 462 | if (mem && !task_in_mem_cgroup(c, mem)) | ||
| 463 | continue; | ||
| 462 | if (!oom_kill_task(c)) | 464 | if (!oom_kill_task(c)) |
| 463 | return 0; | 465 | return 0; |
| 464 | } | 466 | } |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4e9f5cc5fb59..8deb9d0fd5b1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
| @@ -556,8 +556,9 @@ static void free_pcppages_bulk(struct zone *zone, int count, | |||
| 556 | page = list_entry(list->prev, struct page, lru); | 556 | page = list_entry(list->prev, struct page, lru); |
| 557 | /* must delete as __free_one_page list manipulates */ | 557 | /* must delete as __free_one_page list manipulates */ |
| 558 | list_del(&page->lru); | 558 | list_del(&page->lru); |
| 559 | __free_one_page(page, zone, 0, migratetype); | 559 | /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ |
| 560 | trace_mm_page_pcpu_drain(page, 0, migratetype); | 560 | __free_one_page(page, zone, 0, page_private(page)); |
| 561 | trace_mm_page_pcpu_drain(page, 0, page_private(page)); | ||
| 561 | } while (--count && --batch_free && !list_empty(list)); | 562 | } while (--count && --batch_free && !list_empty(list)); |
| 562 | } | 563 | } |
| 563 | spin_unlock(&zone->lock); | 564 | spin_unlock(&zone->lock); |
| @@ -1222,10 +1223,10 @@ again: | |||
| 1222 | } | 1223 | } |
| 1223 | spin_lock_irqsave(&zone->lock, flags); | 1224 | spin_lock_irqsave(&zone->lock, flags); |
| 1224 | page = __rmqueue(zone, order, migratetype); | 1225 | page = __rmqueue(zone, order, migratetype); |
| 1225 | __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order)); | ||
| 1226 | spin_unlock(&zone->lock); | 1226 | spin_unlock(&zone->lock); |
| 1227 | if (!page) | 1227 | if (!page) |
| 1228 | goto failed; | 1228 | goto failed; |
| 1229 | __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order)); | ||
| 1229 | } | 1230 | } |
| 1230 | 1231 | ||
| 1231 | __count_zone_vm_events(PGALLOC, zone, 1 << order); | 1232 | __count_zone_vm_events(PGALLOC, zone, 1 << order); |
| @@ -3998,7 +3999,7 @@ void __init add_active_range(unsigned int nid, unsigned long start_pfn, | |||
| 3998 | } | 3999 | } |
| 3999 | 4000 | ||
| 4000 | /* Merge backward if suitable */ | 4001 | /* Merge backward if suitable */ |
| 4001 | if (start_pfn < early_node_map[i].end_pfn && | 4002 | if (start_pfn < early_node_map[i].start_pfn && |
| 4002 | end_pfn >= early_node_map[i].start_pfn) { | 4003 | end_pfn >= early_node_map[i].start_pfn) { |
| 4003 | early_node_map[i].start_pfn = start_pfn; | 4004 | early_node_map[i].start_pfn = start_pfn; |
| 4004 | return; | 4005 | return; |
diff --git a/mm/percpu.c b/mm/percpu.c index 442010cc91c6..083e7c91e5f6 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
| @@ -1271,7 +1271,7 @@ static void pcpu_reclaim(struct work_struct *work) | |||
| 1271 | */ | 1271 | */ |
| 1272 | void free_percpu(void *ptr) | 1272 | void free_percpu(void *ptr) |
| 1273 | { | 1273 | { |
| 1274 | void *addr = __pcpu_ptr_to_addr(ptr); | 1274 | void *addr; |
| 1275 | struct pcpu_chunk *chunk; | 1275 | struct pcpu_chunk *chunk; |
| 1276 | unsigned long flags; | 1276 | unsigned long flags; |
| 1277 | int off; | 1277 | int off; |
| @@ -1279,6 +1279,8 @@ void free_percpu(void *ptr) | |||
| 1279 | if (!ptr) | 1279 | if (!ptr) |
| 1280 | return; | 1280 | return; |
| 1281 | 1281 | ||
| 1282 | addr = __pcpu_ptr_to_addr(ptr); | ||
| 1283 | |||
| 1282 | spin_lock_irqsave(&pcpu_lock, flags); | 1284 | spin_lock_irqsave(&pcpu_lock, flags); |
| 1283 | 1285 | ||
| 1284 | chunk = pcpu_chunk_addr_search(addr); | 1286 | chunk = pcpu_chunk_addr_search(addr); |
| @@ -654,7 +654,7 @@ static void init_node_lock_keys(int q) | |||
| 654 | 654 | ||
| 655 | l3 = s->cs_cachep->nodelists[q]; | 655 | l3 = s->cs_cachep->nodelists[q]; |
| 656 | if (!l3 || OFF_SLAB(s->cs_cachep)) | 656 | if (!l3 || OFF_SLAB(s->cs_cachep)) |
| 657 | return; | 657 | continue; |
| 658 | lockdep_set_class(&l3->list_lock, &on_slab_l3_key); | 658 | lockdep_set_class(&l3->list_lock, &on_slab_l3_key); |
| 659 | alc = l3->alien; | 659 | alc = l3->alien; |
| 660 | /* | 660 | /* |
| @@ -665,7 +665,7 @@ static void init_node_lock_keys(int q) | |||
| 665 | * for alloc_alien_cache, | 665 | * for alloc_alien_cache, |
| 666 | */ | 666 | */ |
| 667 | if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC) | 667 | if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC) |
| 668 | return; | 668 | continue; |
| 669 | for_each_node(r) { | 669 | for_each_node(r) { |
| 670 | if (alc[r]) | 670 | if (alc[r]) |
| 671 | lockdep_set_class(&alc[r]->lock, | 671 | lockdep_set_class(&alc[r]->lock, |
diff --git a/mm/truncate.c b/mm/truncate.c index 342deee22684..e87e37244829 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
| @@ -522,22 +522,20 @@ EXPORT_SYMBOL_GPL(invalidate_inode_pages2); | |||
| 522 | */ | 522 | */ |
| 523 | void truncate_pagecache(struct inode *inode, loff_t old, loff_t new) | 523 | void truncate_pagecache(struct inode *inode, loff_t old, loff_t new) |
| 524 | { | 524 | { |
| 525 | if (new < old) { | 525 | struct address_space *mapping = inode->i_mapping; |
| 526 | struct address_space *mapping = inode->i_mapping; | 526 | |
| 527 | 527 | /* | |
| 528 | /* | 528 | * unmap_mapping_range is called twice, first simply for |
| 529 | * unmap_mapping_range is called twice, first simply for | 529 | * efficiency so that truncate_inode_pages does fewer |
| 530 | * efficiency so that truncate_inode_pages does fewer | 530 | * single-page unmaps. However after this first call, and |
| 531 | * single-page unmaps. However after this first call, and | 531 | * before truncate_inode_pages finishes, it is possible for |
| 532 | * before truncate_inode_pages finishes, it is possible for | 532 | * private pages to be COWed, which remain after |
| 533 | * private pages to be COWed, which remain after | 533 | * truncate_inode_pages finishes, hence the second |
| 534 | * truncate_inode_pages finishes, hence the second | 534 | * unmap_mapping_range call must be made for correctness. |
| 535 | * unmap_mapping_range call must be made for correctness. | 535 | */ |
| 536 | */ | 536 | unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1); |
| 537 | unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1); | 537 | truncate_inode_pages(mapping, new); |
| 538 | truncate_inode_pages(mapping, new); | 538 | unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1); |
| 539 | unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1); | ||
| 540 | } | ||
| 541 | } | 539 | } |
| 542 | EXPORT_SYMBOL(truncate_pagecache); | 540 | EXPORT_SYMBOL(truncate_pagecache); |
| 543 | 541 | ||
| @@ -4,10 +4,6 @@ | |||
| 4 | #include <linux/module.h> | 4 | #include <linux/module.h> |
| 5 | #include <linux/err.h> | 5 | #include <linux/err.h> |
| 6 | #include <linux/sched.h> | 6 | #include <linux/sched.h> |
| 7 | #include <linux/hugetlb.h> | ||
| 8 | #include <linux/syscalls.h> | ||
| 9 | #include <linux/mman.h> | ||
| 10 | #include <linux/file.h> | ||
| 11 | #include <asm/uaccess.h> | 7 | #include <asm/uaccess.h> |
| 12 | 8 | ||
| 13 | #define CREATE_TRACE_POINTS | 9 | #define CREATE_TRACE_POINTS |
| @@ -224,7 +220,7 @@ char *strndup_user(const char __user *s, long n) | |||
| 224 | } | 220 | } |
| 225 | EXPORT_SYMBOL(strndup_user); | 221 | EXPORT_SYMBOL(strndup_user); |
| 226 | 222 | ||
| 227 | #ifndef HAVE_ARCH_PICK_MMAP_LAYOUT | 223 | #if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT) |
| 228 | void arch_pick_mmap_layout(struct mm_struct *mm) | 224 | void arch_pick_mmap_layout(struct mm_struct *mm) |
| 229 | { | 225 | { |
| 230 | mm->mmap_base = TASK_UNMAPPED_BASE; | 226 | mm->mmap_base = TASK_UNMAPPED_BASE; |
| @@ -272,46 +268,6 @@ int __attribute__((weak)) get_user_pages_fast(unsigned long start, | |||
| 272 | } | 268 | } |
| 273 | EXPORT_SYMBOL_GPL(get_user_pages_fast); | 269 | EXPORT_SYMBOL_GPL(get_user_pages_fast); |
| 274 | 270 | ||
| 275 | SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, | ||
| 276 | unsigned long, prot, unsigned long, flags, | ||
| 277 | unsigned long, fd, unsigned long, pgoff) | ||
| 278 | { | ||
| 279 | struct file * file = NULL; | ||
| 280 | unsigned long retval = -EBADF; | ||
| 281 | |||
| 282 | if (!(flags & MAP_ANONYMOUS)) { | ||
| 283 | if (unlikely(flags & MAP_HUGETLB)) | ||
| 284 | return -EINVAL; | ||
| 285 | file = fget(fd); | ||
| 286 | if (!file) | ||
| 287 | goto out; | ||
| 288 | } else if (flags & MAP_HUGETLB) { | ||
| 289 | struct user_struct *user = NULL; | ||
| 290 | /* | ||
| 291 | * VM_NORESERVE is used because the reservations will be | ||
| 292 | * taken when vm_ops->mmap() is called | ||
| 293 | * A dummy user value is used because we are not locking | ||
| 294 | * memory so no accounting is necessary | ||
| 295 | */ | ||
| 296 | len = ALIGN(len, huge_page_size(&default_hstate)); | ||
| 297 | file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE, | ||
| 298 | &user, HUGETLB_ANONHUGE_INODE); | ||
| 299 | if (IS_ERR(file)) | ||
| 300 | return PTR_ERR(file); | ||
| 301 | } | ||
| 302 | |||
| 303 | flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); | ||
| 304 | |||
| 305 | down_write(¤t->mm->mmap_sem); | ||
| 306 | retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); | ||
| 307 | up_write(¤t->mm->mmap_sem); | ||
| 308 | |||
| 309 | if (file) | ||
| 310 | fput(file); | ||
| 311 | out: | ||
| 312 | return retval; | ||
| 313 | } | ||
| 314 | |||
| 315 | /* Tracepoints definitions. */ | 271 | /* Tracepoints definitions. */ |
| 316 | EXPORT_TRACEPOINT_SYMBOL(kmalloc); | 272 | EXPORT_TRACEPOINT_SYMBOL(kmalloc); |
| 317 | EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); | 273 | EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); |
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 37e69295f250..ae007462b7f6 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c | |||
| @@ -509,6 +509,9 @@ static unsigned long lazy_max_pages(void) | |||
| 509 | 509 | ||
| 510 | static atomic_t vmap_lazy_nr = ATOMIC_INIT(0); | 510 | static atomic_t vmap_lazy_nr = ATOMIC_INIT(0); |
| 511 | 511 | ||
| 512 | /* for per-CPU blocks */ | ||
| 513 | static void purge_fragmented_blocks_allcpus(void); | ||
| 514 | |||
| 512 | /* | 515 | /* |
| 513 | * Purges all lazily-freed vmap areas. | 516 | * Purges all lazily-freed vmap areas. |
| 514 | * | 517 | * |
| @@ -539,6 +542,9 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, | |||
| 539 | } else | 542 | } else |
| 540 | spin_lock(&purge_lock); | 543 | spin_lock(&purge_lock); |
| 541 | 544 | ||
| 545 | if (sync) | ||
| 546 | purge_fragmented_blocks_allcpus(); | ||
| 547 | |||
| 542 | rcu_read_lock(); | 548 | rcu_read_lock(); |
| 543 | list_for_each_entry_rcu(va, &vmap_area_list, list) { | 549 | list_for_each_entry_rcu(va, &vmap_area_list, list) { |
| 544 | if (va->flags & VM_LAZY_FREE) { | 550 | if (va->flags & VM_LAZY_FREE) { |
| @@ -555,10 +561,8 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, | |||
| 555 | } | 561 | } |
| 556 | rcu_read_unlock(); | 562 | rcu_read_unlock(); |
| 557 | 563 | ||
| 558 | if (nr) { | 564 | if (nr) |
| 559 | BUG_ON(nr > atomic_read(&vmap_lazy_nr)); | ||
| 560 | atomic_sub(nr, &vmap_lazy_nr); | 565 | atomic_sub(nr, &vmap_lazy_nr); |
| 561 | } | ||
| 562 | 566 | ||
| 563 | if (nr || force_flush) | 567 | if (nr || force_flush) |
| 564 | flush_tlb_kernel_range(*start, *end); | 568 | flush_tlb_kernel_range(*start, *end); |
| @@ -669,8 +673,6 @@ static bool vmap_initialized __read_mostly = false; | |||
| 669 | struct vmap_block_queue { | 673 | struct vmap_block_queue { |
| 670 | spinlock_t lock; | 674 | spinlock_t lock; |
| 671 | struct list_head free; | 675 | struct list_head free; |
| 672 | struct list_head dirty; | ||
| 673 | unsigned int nr_dirty; | ||
| 674 | }; | 676 | }; |
| 675 | 677 | ||
| 676 | struct vmap_block { | 678 | struct vmap_block { |
| @@ -680,10 +682,9 @@ struct vmap_block { | |||
| 680 | unsigned long free, dirty; | 682 | unsigned long free, dirty; |
| 681 | DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS); | 683 | DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS); |
| 682 | DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS); | 684 | DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS); |
| 683 | union { | 685 | struct list_head free_list; |
| 684 | struct list_head free_list; | 686 | struct rcu_head rcu_head; |
| 685 | struct rcu_head rcu_head; | 687 | struct list_head purge; |
| 686 | }; | ||
| 687 | }; | 688 | }; |
| 688 | 689 | ||
| 689 | /* Queue of free and dirty vmap blocks, for allocation and flushing purposes */ | 690 | /* Queue of free and dirty vmap blocks, for allocation and flushing purposes */ |
| @@ -759,7 +760,7 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask) | |||
| 759 | vbq = &get_cpu_var(vmap_block_queue); | 760 | vbq = &get_cpu_var(vmap_block_queue); |
| 760 | vb->vbq = vbq; | 761 | vb->vbq = vbq; |
| 761 | spin_lock(&vbq->lock); | 762 | spin_lock(&vbq->lock); |
| 762 | list_add(&vb->free_list, &vbq->free); | 763 | list_add_rcu(&vb->free_list, &vbq->free); |
| 763 | spin_unlock(&vbq->lock); | 764 | spin_unlock(&vbq->lock); |
| 764 | put_cpu_var(vmap_block_queue); | 765 | put_cpu_var(vmap_block_queue); |
| 765 | 766 | ||
| @@ -778,8 +779,6 @@ static void free_vmap_block(struct vmap_block *vb) | |||
| 778 | struct vmap_block *tmp; | 779 | struct vmap_block *tmp; |
| 779 | unsigned long vb_idx; | 780 | unsigned long vb_idx; |
| 780 | 781 | ||
| 781 | BUG_ON(!list_empty(&vb->free_list)); | ||
| 782 | |||
| 783 | vb_idx = addr_to_vb_idx(vb->va->va_start); | 782 | vb_idx = addr_to_vb_idx(vb->va->va_start); |
| 784 | spin_lock(&vmap_block_tree_lock); | 783 | spin_lock(&vmap_block_tree_lock); |
| 785 | tmp = radix_tree_delete(&vmap_block_tree, vb_idx); | 784 | tmp = radix_tree_delete(&vmap_block_tree, vb_idx); |
| @@ -790,12 +789,61 @@ static void free_vmap_block(struct vmap_block *vb) | |||
| 790 | call_rcu(&vb->rcu_head, rcu_free_vb); | 789 | call_rcu(&vb->rcu_head, rcu_free_vb); |
| 791 | } | 790 | } |
| 792 | 791 | ||
| 792 | static void purge_fragmented_blocks(int cpu) | ||
| 793 | { | ||
| 794 | LIST_HEAD(purge); | ||
| 795 | struct vmap_block *vb; | ||
| 796 | struct vmap_block *n_vb; | ||
| 797 | struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu); | ||
| 798 | |||
| 799 | rcu_read_lock(); | ||
| 800 | list_for_each_entry_rcu(vb, &vbq->free, free_list) { | ||
| 801 | |||
| 802 | if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS)) | ||
| 803 | continue; | ||
| 804 | |||
| 805 | spin_lock(&vb->lock); | ||
| 806 | if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) { | ||
| 807 | vb->free = 0; /* prevent further allocs after releasing lock */ | ||
| 808 | vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */ | ||
| 809 | bitmap_fill(vb->alloc_map, VMAP_BBMAP_BITS); | ||
| 810 | bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS); | ||
| 811 | spin_lock(&vbq->lock); | ||
| 812 | list_del_rcu(&vb->free_list); | ||
| 813 | spin_unlock(&vbq->lock); | ||
| 814 | spin_unlock(&vb->lock); | ||
| 815 | list_add_tail(&vb->purge, &purge); | ||
| 816 | } else | ||
| 817 | spin_unlock(&vb->lock); | ||
| 818 | } | ||
| 819 | rcu_read_unlock(); | ||
| 820 | |||
| 821 | list_for_each_entry_safe(vb, n_vb, &purge, purge) { | ||
| 822 | list_del(&vb->purge); | ||
| 823 | free_vmap_block(vb); | ||
| 824 | } | ||
| 825 | } | ||
| 826 | |||
| 827 | static void purge_fragmented_blocks_thiscpu(void) | ||
| 828 | { | ||
| 829 | purge_fragmented_blocks(smp_processor_id()); | ||
| 830 | } | ||
| 831 | |||
| 832 | static void purge_fragmented_blocks_allcpus(void) | ||
| 833 | { | ||
| 834 | int cpu; | ||
| 835 | |||
| 836 | for_each_possible_cpu(cpu) | ||
| 837 | purge_fragmented_blocks(cpu); | ||
| 838 | } | ||
| 839 | |||
| 793 | static void *vb_alloc(unsigned long size, gfp_t gfp_mask) | 840 | static void *vb_alloc(unsigned long size, gfp_t gfp_mask) |
| 794 | { | 841 | { |
| 795 | struct vmap_block_queue *vbq; | 842 | struct vmap_block_queue *vbq; |
| 796 | struct vmap_block *vb; | 843 | struct vmap_block *vb; |
| 797 | unsigned long addr = 0; | 844 | unsigned long addr = 0; |
| 798 | unsigned int order; | 845 | unsigned int order; |
| 846 | int purge = 0; | ||
| 799 | 847 | ||
| 800 | BUG_ON(size & ~PAGE_MASK); | 848 | BUG_ON(size & ~PAGE_MASK); |
| 801 | BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); | 849 | BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); |
| @@ -808,24 +856,38 @@ again: | |||
| 808 | int i; | 856 | int i; |
| 809 | 857 | ||
| 810 | spin_lock(&vb->lock); | 858 | spin_lock(&vb->lock); |
| 859 | if (vb->free < 1UL << order) | ||
| 860 | goto next; | ||
| 861 | |||
| 811 | i = bitmap_find_free_region(vb->alloc_map, | 862 | i = bitmap_find_free_region(vb->alloc_map, |
| 812 | VMAP_BBMAP_BITS, order); | 863 | VMAP_BBMAP_BITS, order); |
| 813 | 864 | ||
| 814 | if (i >= 0) { | 865 | if (i < 0) { |
| 815 | addr = vb->va->va_start + (i << PAGE_SHIFT); | 866 | if (vb->free + vb->dirty == VMAP_BBMAP_BITS) { |
| 816 | BUG_ON(addr_to_vb_idx(addr) != | 867 | /* fragmented and no outstanding allocations */ |
| 817 | addr_to_vb_idx(vb->va->va_start)); | 868 | BUG_ON(vb->dirty != VMAP_BBMAP_BITS); |
| 818 | vb->free -= 1UL << order; | 869 | purge = 1; |
| 819 | if (vb->free == 0) { | ||
| 820 | spin_lock(&vbq->lock); | ||
| 821 | list_del_init(&vb->free_list); | ||
| 822 | spin_unlock(&vbq->lock); | ||
| 823 | } | 870 | } |
| 824 | spin_unlock(&vb->lock); | 871 | goto next; |
| 825 | break; | 872 | } |
| 873 | addr = vb->va->va_start + (i << PAGE_SHIFT); | ||
| 874 | BUG_ON(addr_to_vb_idx(addr) != | ||
| 875 | addr_to_vb_idx(vb->va->va_start)); | ||
| 876 | vb->free -= 1UL << order; | ||
| 877 | if (vb->free == 0) { | ||
| 878 | spin_lock(&vbq->lock); | ||
| 879 | list_del_rcu(&vb->free_list); | ||
| 880 | spin_unlock(&vbq->lock); | ||
| 826 | } | 881 | } |
| 827 | spin_unlock(&vb->lock); | 882 | spin_unlock(&vb->lock); |
| 883 | break; | ||
| 884 | next: | ||
| 885 | spin_unlock(&vb->lock); | ||
| 828 | } | 886 | } |
| 887 | |||
| 888 | if (purge) | ||
| 889 | purge_fragmented_blocks_thiscpu(); | ||
| 890 | |||
| 829 | put_cpu_var(vmap_block_queue); | 891 | put_cpu_var(vmap_block_queue); |
| 830 | rcu_read_unlock(); | 892 | rcu_read_unlock(); |
| 831 | 893 | ||
| @@ -862,11 +924,11 @@ static void vb_free(const void *addr, unsigned long size) | |||
| 862 | BUG_ON(!vb); | 924 | BUG_ON(!vb); |
| 863 | 925 | ||
| 864 | spin_lock(&vb->lock); | 926 | spin_lock(&vb->lock); |
| 865 | bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order); | 927 | BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order)); |
| 866 | 928 | ||
| 867 | vb->dirty += 1UL << order; | 929 | vb->dirty += 1UL << order; |
| 868 | if (vb->dirty == VMAP_BBMAP_BITS) { | 930 | if (vb->dirty == VMAP_BBMAP_BITS) { |
| 869 | BUG_ON(vb->free || !list_empty(&vb->free_list)); | 931 | BUG_ON(vb->free); |
| 870 | spin_unlock(&vb->lock); | 932 | spin_unlock(&vb->lock); |
| 871 | free_vmap_block(vb); | 933 | free_vmap_block(vb); |
| 872 | } else | 934 | } else |
| @@ -1035,8 +1097,6 @@ void __init vmalloc_init(void) | |||
| 1035 | vbq = &per_cpu(vmap_block_queue, i); | 1097 | vbq = &per_cpu(vmap_block_queue, i); |
| 1036 | spin_lock_init(&vbq->lock); | 1098 | spin_lock_init(&vbq->lock); |
| 1037 | INIT_LIST_HEAD(&vbq->free); | 1099 | INIT_LIST_HEAD(&vbq->free); |
| 1038 | INIT_LIST_HEAD(&vbq->dirty); | ||
| 1039 | vbq->nr_dirty = 0; | ||
| 1040 | } | 1100 | } |
| 1041 | 1101 | ||
| 1042 | /* Import existing vmlist entries. */ | 1102 | /* Import existing vmlist entries. */ |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 885207a6b6b7..c26986c85ce0 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
| @@ -1922,6 +1922,9 @@ static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining) | |||
| 1922 | if (!populated_zone(zone)) | 1922 | if (!populated_zone(zone)) |
| 1923 | continue; | 1923 | continue; |
| 1924 | 1924 | ||
| 1925 | if (zone_is_all_unreclaimable(zone)) | ||
| 1926 | continue; | ||
| 1927 | |||
| 1925 | if (!zone_watermark_ok(zone, order, high_wmark_pages(zone), | 1928 | if (!zone_watermark_ok(zone, order, high_wmark_pages(zone), |
| 1926 | 0, 0)) | 1929 | 0, 0)) |
| 1927 | return 1; | 1930 | return 1; |
