diff options
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r-- | mm/hugetlb.c | 129 |
1 files changed, 59 insertions, 70 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 7a0a73d2fcff..eeceeeb09019 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -35,7 +35,6 @@ | |||
35 | #include <linux/node.h> | 35 | #include <linux/node.h> |
36 | #include "internal.h" | 36 | #include "internal.h" |
37 | 37 | ||
38 | const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; | ||
39 | unsigned long hugepages_treat_as_movable; | 38 | unsigned long hugepages_treat_as_movable; |
40 | 39 | ||
41 | int hugetlb_max_hstate __read_mostly; | 40 | int hugetlb_max_hstate __read_mostly; |
@@ -1089,6 +1088,9 @@ void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn) | |||
1089 | unsigned long pfn; | 1088 | unsigned long pfn; |
1090 | struct hstate *h; | 1089 | struct hstate *h; |
1091 | 1090 | ||
1091 | if (!hugepages_supported()) | ||
1092 | return; | ||
1093 | |||
1092 | /* Set scan step to minimum hugepage size */ | 1094 | /* Set scan step to minimum hugepage size */ |
1093 | for_each_hstate(h) | 1095 | for_each_hstate(h) |
1094 | if (order > huge_page_order(h)) | 1096 | if (order > huge_page_order(h)) |
@@ -1734,21 +1736,13 @@ static ssize_t nr_hugepages_show_common(struct kobject *kobj, | |||
1734 | return sprintf(buf, "%lu\n", nr_huge_pages); | 1736 | return sprintf(buf, "%lu\n", nr_huge_pages); |
1735 | } | 1737 | } |
1736 | 1738 | ||
1737 | static ssize_t nr_hugepages_store_common(bool obey_mempolicy, | 1739 | static ssize_t __nr_hugepages_store_common(bool obey_mempolicy, |
1738 | struct kobject *kobj, struct kobj_attribute *attr, | 1740 | struct hstate *h, int nid, |
1739 | const char *buf, size_t len) | 1741 | unsigned long count, size_t len) |
1740 | { | 1742 | { |
1741 | int err; | 1743 | int err; |
1742 | int nid; | ||
1743 | unsigned long count; | ||
1744 | struct hstate *h; | ||
1745 | NODEMASK_ALLOC(nodemask_t, nodes_allowed, GFP_KERNEL | __GFP_NORETRY); | 1744 | NODEMASK_ALLOC(nodemask_t, nodes_allowed, GFP_KERNEL | __GFP_NORETRY); |
1746 | 1745 | ||
1747 | err = kstrtoul(buf, 10, &count); | ||
1748 | if (err) | ||
1749 | goto out; | ||
1750 | |||
1751 | h = kobj_to_hstate(kobj, &nid); | ||
1752 | if (hstate_is_gigantic(h) && !gigantic_page_supported()) { | 1746 | if (hstate_is_gigantic(h) && !gigantic_page_supported()) { |
1753 | err = -EINVAL; | 1747 | err = -EINVAL; |
1754 | goto out; | 1748 | goto out; |
@@ -1784,6 +1778,23 @@ out: | |||
1784 | return err; | 1778 | return err; |
1785 | } | 1779 | } |
1786 | 1780 | ||
1781 | static ssize_t nr_hugepages_store_common(bool obey_mempolicy, | ||
1782 | struct kobject *kobj, const char *buf, | ||
1783 | size_t len) | ||
1784 | { | ||
1785 | struct hstate *h; | ||
1786 | unsigned long count; | ||
1787 | int nid; | ||
1788 | int err; | ||
1789 | |||
1790 | err = kstrtoul(buf, 10, &count); | ||
1791 | if (err) | ||
1792 | return err; | ||
1793 | |||
1794 | h = kobj_to_hstate(kobj, &nid); | ||
1795 | return __nr_hugepages_store_common(obey_mempolicy, h, nid, count, len); | ||
1796 | } | ||
1797 | |||
1787 | static ssize_t nr_hugepages_show(struct kobject *kobj, | 1798 | static ssize_t nr_hugepages_show(struct kobject *kobj, |
1788 | struct kobj_attribute *attr, char *buf) | 1799 | struct kobj_attribute *attr, char *buf) |
1789 | { | 1800 | { |
@@ -1793,7 +1804,7 @@ static ssize_t nr_hugepages_show(struct kobject *kobj, | |||
1793 | static ssize_t nr_hugepages_store(struct kobject *kobj, | 1804 | static ssize_t nr_hugepages_store(struct kobject *kobj, |
1794 | struct kobj_attribute *attr, const char *buf, size_t len) | 1805 | struct kobj_attribute *attr, const char *buf, size_t len) |
1795 | { | 1806 | { |
1796 | return nr_hugepages_store_common(false, kobj, attr, buf, len); | 1807 | return nr_hugepages_store_common(false, kobj, buf, len); |
1797 | } | 1808 | } |
1798 | HSTATE_ATTR(nr_hugepages); | 1809 | HSTATE_ATTR(nr_hugepages); |
1799 | 1810 | ||
@@ -1812,7 +1823,7 @@ static ssize_t nr_hugepages_mempolicy_show(struct kobject *kobj, | |||
1812 | static ssize_t nr_hugepages_mempolicy_store(struct kobject *kobj, | 1823 | static ssize_t nr_hugepages_mempolicy_store(struct kobject *kobj, |
1813 | struct kobj_attribute *attr, const char *buf, size_t len) | 1824 | struct kobj_attribute *attr, const char *buf, size_t len) |
1814 | { | 1825 | { |
1815 | return nr_hugepages_store_common(true, kobj, attr, buf, len); | 1826 | return nr_hugepages_store_common(true, kobj, buf, len); |
1816 | } | 1827 | } |
1817 | HSTATE_ATTR(nr_hugepages_mempolicy); | 1828 | HSTATE_ATTR(nr_hugepages_mempolicy); |
1818 | #endif | 1829 | #endif |
@@ -2248,36 +2259,21 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy, | |||
2248 | void __user *buffer, size_t *length, loff_t *ppos) | 2259 | void __user *buffer, size_t *length, loff_t *ppos) |
2249 | { | 2260 | { |
2250 | struct hstate *h = &default_hstate; | 2261 | struct hstate *h = &default_hstate; |
2251 | unsigned long tmp; | 2262 | unsigned long tmp = h->max_huge_pages; |
2252 | int ret; | 2263 | int ret; |
2253 | 2264 | ||
2254 | if (!hugepages_supported()) | 2265 | if (!hugepages_supported()) |
2255 | return -ENOTSUPP; | 2266 | return -ENOTSUPP; |
2256 | 2267 | ||
2257 | tmp = h->max_huge_pages; | ||
2258 | |||
2259 | if (write && hstate_is_gigantic(h) && !gigantic_page_supported()) | ||
2260 | return -EINVAL; | ||
2261 | |||
2262 | table->data = &tmp; | 2268 | table->data = &tmp; |
2263 | table->maxlen = sizeof(unsigned long); | 2269 | table->maxlen = sizeof(unsigned long); |
2264 | ret = proc_doulongvec_minmax(table, write, buffer, length, ppos); | 2270 | ret = proc_doulongvec_minmax(table, write, buffer, length, ppos); |
2265 | if (ret) | 2271 | if (ret) |
2266 | goto out; | 2272 | goto out; |
2267 | 2273 | ||
2268 | if (write) { | 2274 | if (write) |
2269 | NODEMASK_ALLOC(nodemask_t, nodes_allowed, | 2275 | ret = __nr_hugepages_store_common(obey_mempolicy, h, |
2270 | GFP_KERNEL | __GFP_NORETRY); | 2276 | NUMA_NO_NODE, tmp, *length); |
2271 | if (!(obey_mempolicy && | ||
2272 | init_nodemask_of_mempolicy(nodes_allowed))) { | ||
2273 | NODEMASK_FREE(nodes_allowed); | ||
2274 | nodes_allowed = &node_states[N_MEMORY]; | ||
2275 | } | ||
2276 | h->max_huge_pages = set_max_huge_pages(h, tmp, nodes_allowed); | ||
2277 | |||
2278 | if (nodes_allowed != &node_states[N_MEMORY]) | ||
2279 | NODEMASK_FREE(nodes_allowed); | ||
2280 | } | ||
2281 | out: | 2277 | out: |
2282 | return ret; | 2278 | return ret; |
2283 | } | 2279 | } |
@@ -2754,8 +2750,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | |||
2754 | * from other VMAs and let the children be SIGKILLed if they are faulting the | 2750 | * from other VMAs and let the children be SIGKILLed if they are faulting the |
2755 | * same region. | 2751 | * same region. |
2756 | */ | 2752 | */ |
2757 | static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, | 2753 | static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, |
2758 | struct page *page, unsigned long address) | 2754 | struct page *page, unsigned long address) |
2759 | { | 2755 | { |
2760 | struct hstate *h = hstate_vma(vma); | 2756 | struct hstate *h = hstate_vma(vma); |
2761 | struct vm_area_struct *iter_vma; | 2757 | struct vm_area_struct *iter_vma; |
@@ -2794,8 +2790,6 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2794 | address + huge_page_size(h), page); | 2790 | address + huge_page_size(h), page); |
2795 | } | 2791 | } |
2796 | mutex_unlock(&mapping->i_mmap_mutex); | 2792 | mutex_unlock(&mapping->i_mmap_mutex); |
2797 | |||
2798 | return 1; | ||
2799 | } | 2793 | } |
2800 | 2794 | ||
2801 | /* | 2795 | /* |
@@ -2810,7 +2804,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2810 | { | 2804 | { |
2811 | struct hstate *h = hstate_vma(vma); | 2805 | struct hstate *h = hstate_vma(vma); |
2812 | struct page *old_page, *new_page; | 2806 | struct page *old_page, *new_page; |
2813 | int outside_reserve = 0; | 2807 | int ret = 0, outside_reserve = 0; |
2814 | unsigned long mmun_start; /* For mmu_notifiers */ | 2808 | unsigned long mmun_start; /* For mmu_notifiers */ |
2815 | unsigned long mmun_end; /* For mmu_notifiers */ | 2809 | unsigned long mmun_end; /* For mmu_notifiers */ |
2816 | 2810 | ||
@@ -2840,14 +2834,14 @@ retry_avoidcopy: | |||
2840 | 2834 | ||
2841 | page_cache_get(old_page); | 2835 | page_cache_get(old_page); |
2842 | 2836 | ||
2843 | /* Drop page table lock as buddy allocator may be called */ | 2837 | /* |
2838 | * Drop page table lock as buddy allocator may be called. It will | ||
2839 | * be acquired again before returning to the caller, as expected. | ||
2840 | */ | ||
2844 | spin_unlock(ptl); | 2841 | spin_unlock(ptl); |
2845 | new_page = alloc_huge_page(vma, address, outside_reserve); | 2842 | new_page = alloc_huge_page(vma, address, outside_reserve); |
2846 | 2843 | ||
2847 | if (IS_ERR(new_page)) { | 2844 | if (IS_ERR(new_page)) { |
2848 | long err = PTR_ERR(new_page); | ||
2849 | page_cache_release(old_page); | ||
2850 | |||
2851 | /* | 2845 | /* |
2852 | * If a process owning a MAP_PRIVATE mapping fails to COW, | 2846 | * If a process owning a MAP_PRIVATE mapping fails to COW, |
2853 | * it is due to references held by a child and an insufficient | 2847 | * it is due to references held by a child and an insufficient |
@@ -2856,29 +2850,25 @@ retry_avoidcopy: | |||
2856 | * may get SIGKILLed if it later faults. | 2850 | * may get SIGKILLed if it later faults. |
2857 | */ | 2851 | */ |
2858 | if (outside_reserve) { | 2852 | if (outside_reserve) { |
2853 | page_cache_release(old_page); | ||
2859 | BUG_ON(huge_pte_none(pte)); | 2854 | BUG_ON(huge_pte_none(pte)); |
2860 | if (unmap_ref_private(mm, vma, old_page, address)) { | 2855 | unmap_ref_private(mm, vma, old_page, address); |
2861 | BUG_ON(huge_pte_none(pte)); | 2856 | BUG_ON(huge_pte_none(pte)); |
2862 | spin_lock(ptl); | 2857 | spin_lock(ptl); |
2863 | ptep = huge_pte_offset(mm, address & huge_page_mask(h)); | 2858 | ptep = huge_pte_offset(mm, address & huge_page_mask(h)); |
2864 | if (likely(ptep && | 2859 | if (likely(ptep && |
2865 | pte_same(huge_ptep_get(ptep), pte))) | 2860 | pte_same(huge_ptep_get(ptep), pte))) |
2866 | goto retry_avoidcopy; | 2861 | goto retry_avoidcopy; |
2867 | /* | 2862 | /* |
2868 | * race occurs while re-acquiring page table | 2863 | * race occurs while re-acquiring page table |
2869 | * lock, and our job is done. | 2864 | * lock, and our job is done. |
2870 | */ | 2865 | */ |
2871 | return 0; | 2866 | return 0; |
2872 | } | ||
2873 | WARN_ON_ONCE(1); | ||
2874 | } | 2867 | } |
2875 | 2868 | ||
2876 | /* Caller expects lock to be held */ | 2869 | ret = (PTR_ERR(new_page) == -ENOMEM) ? |
2877 | spin_lock(ptl); | 2870 | VM_FAULT_OOM : VM_FAULT_SIGBUS; |
2878 | if (err == -ENOMEM) | 2871 | goto out_release_old; |
2879 | return VM_FAULT_OOM; | ||
2880 | else | ||
2881 | return VM_FAULT_SIGBUS; | ||
2882 | } | 2872 | } |
2883 | 2873 | ||
2884 | /* | 2874 | /* |
@@ -2886,11 +2876,8 @@ retry_avoidcopy: | |||
2886 | * anon_vma prepared. | 2876 | * anon_vma prepared. |
2887 | */ | 2877 | */ |
2888 | if (unlikely(anon_vma_prepare(vma))) { | 2878 | if (unlikely(anon_vma_prepare(vma))) { |
2889 | page_cache_release(new_page); | 2879 | ret = VM_FAULT_OOM; |
2890 | page_cache_release(old_page); | 2880 | goto out_release_all; |
2891 | /* Caller expects lock to be held */ | ||
2892 | spin_lock(ptl); | ||
2893 | return VM_FAULT_OOM; | ||
2894 | } | 2881 | } |
2895 | 2882 | ||
2896 | copy_user_huge_page(new_page, old_page, address, vma, | 2883 | copy_user_huge_page(new_page, old_page, address, vma, |
@@ -2900,6 +2887,7 @@ retry_avoidcopy: | |||
2900 | mmun_start = address & huge_page_mask(h); | 2887 | mmun_start = address & huge_page_mask(h); |
2901 | mmun_end = mmun_start + huge_page_size(h); | 2888 | mmun_end = mmun_start + huge_page_size(h); |
2902 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); | 2889 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); |
2890 | |||
2903 | /* | 2891 | /* |
2904 | * Retake the page table lock to check for racing updates | 2892 | * Retake the page table lock to check for racing updates |
2905 | * before the page tables are altered | 2893 | * before the page tables are altered |
@@ -2920,12 +2908,13 @@ retry_avoidcopy: | |||
2920 | } | 2908 | } |
2921 | spin_unlock(ptl); | 2909 | spin_unlock(ptl); |
2922 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | 2910 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); |
2911 | out_release_all: | ||
2923 | page_cache_release(new_page); | 2912 | page_cache_release(new_page); |
2913 | out_release_old: | ||
2924 | page_cache_release(old_page); | 2914 | page_cache_release(old_page); |
2925 | 2915 | ||
2926 | /* Caller expects lock to be held */ | 2916 | spin_lock(ptl); /* Caller expects lock to be held */ |
2927 | spin_lock(ptl); | 2917 | return ret; |
2928 | return 0; | ||
2929 | } | 2918 | } |
2930 | 2919 | ||
2931 | /* Return the pagecache page at a given address within a VMA */ | 2920 | /* Return the pagecache page at a given address within a VMA */ |