diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-06-25 06:16:51 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-06-25 06:16:51 -0400 |
commit | d02859ecb321c8c0f74cb9bbe3f51a59e58822b0 (patch) | |
tree | 05dd5bdc55081c0a90bf0afc35c07d0d6e25d015 /mm | |
parent | a987b16cc6123af2c9414032701bab5f73c54c89 (diff) | |
parent | 543cf4cb3fe6f6cae3651ba918b9c56200b257d0 (diff) |
Merge commit 'v2.6.26-rc8' into x86/xen
Conflicts:
arch/x86/xen/enlighten.c
arch/x86/xen/mmu.c
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/bootmem.c | 6 | ||||
-rw-r--r-- | mm/memory.c | 67 | ||||
-rw-r--r-- | mm/migrate.c | 10 | ||||
-rw-r--r-- | mm/slab.c | 5 |
4 files changed, 78 insertions, 10 deletions
diff --git a/mm/bootmem.c b/mm/bootmem.c index e8fb927392b9..8d9f60e06f62 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c | |||
@@ -442,15 +442,17 @@ unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn, | |||
442 | return init_bootmem_core(pgdat, freepfn, startpfn, endpfn); | 442 | return init_bootmem_core(pgdat, freepfn, startpfn, endpfn); |
443 | } | 443 | } |
444 | 444 | ||
445 | void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, | 445 | int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, |
446 | unsigned long size, int flags) | 446 | unsigned long size, int flags) |
447 | { | 447 | { |
448 | int ret; | 448 | int ret; |
449 | 449 | ||
450 | ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); | 450 | ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); |
451 | if (ret < 0) | 451 | if (ret < 0) |
452 | return; | 452 | return -ENOMEM; |
453 | reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); | 453 | reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); |
454 | |||
455 | return 0; | ||
454 | } | 456 | } |
455 | 457 | ||
456 | void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, | 458 | void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, |
diff --git a/mm/memory.c b/mm/memory.c index 19e0ae9beecb..d14b251a25a6 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -999,17 +999,15 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, | |||
999 | goto no_page_table; | 999 | goto no_page_table; |
1000 | 1000 | ||
1001 | ptep = pte_offset_map_lock(mm, pmd, address, &ptl); | 1001 | ptep = pte_offset_map_lock(mm, pmd, address, &ptl); |
1002 | if (!ptep) | ||
1003 | goto out; | ||
1004 | 1002 | ||
1005 | pte = *ptep; | 1003 | pte = *ptep; |
1006 | if (!pte_present(pte)) | 1004 | if (!pte_present(pte)) |
1007 | goto unlock; | 1005 | goto no_page; |
1008 | if ((flags & FOLL_WRITE) && !pte_write(pte)) | 1006 | if ((flags & FOLL_WRITE) && !pte_write(pte)) |
1009 | goto unlock; | 1007 | goto unlock; |
1010 | page = vm_normal_page(vma, address, pte); | 1008 | page = vm_normal_page(vma, address, pte); |
1011 | if (unlikely(!page)) | 1009 | if (unlikely(!page)) |
1012 | goto unlock; | 1010 | goto bad_page; |
1013 | 1011 | ||
1014 | if (flags & FOLL_GET) | 1012 | if (flags & FOLL_GET) |
1015 | get_page(page); | 1013 | get_page(page); |
@@ -1024,6 +1022,15 @@ unlock: | |||
1024 | out: | 1022 | out: |
1025 | return page; | 1023 | return page; |
1026 | 1024 | ||
1025 | bad_page: | ||
1026 | pte_unmap_unlock(ptep, ptl); | ||
1027 | return ERR_PTR(-EFAULT); | ||
1028 | |||
1029 | no_page: | ||
1030 | pte_unmap_unlock(ptep, ptl); | ||
1031 | if (!pte_none(pte)) | ||
1032 | return page; | ||
1033 | /* Fall through to ZERO_PAGE handling */ | ||
1027 | no_page_table: | 1034 | no_page_table: |
1028 | /* | 1035 | /* |
1029 | * When core dumping an enormous anonymous area that nobody | 1036 | * When core dumping an enormous anonymous area that nobody |
@@ -1038,6 +1045,26 @@ no_page_table: | |||
1038 | return page; | 1045 | return page; |
1039 | } | 1046 | } |
1040 | 1047 | ||
1048 | /* Can we do the FOLL_ANON optimization? */ | ||
1049 | static inline int use_zero_page(struct vm_area_struct *vma) | ||
1050 | { | ||
1051 | /* | ||
1052 | * We don't want to optimize FOLL_ANON for make_pages_present() | ||
1053 | * when it tries to page in a VM_LOCKED region. As to VM_SHARED, | ||
1054 | * we want to get the page from the page tables to make sure | ||
1055 | * that we serialize and update with any other user of that | ||
1056 | * mapping. | ||
1057 | */ | ||
1058 | if (vma->vm_flags & (VM_LOCKED | VM_SHARED)) | ||
1059 | return 0; | ||
1060 | /* | ||
1061 | * And if we have a fault or a nopfn routine, it's not an | ||
1062 | * anonymous region. | ||
1063 | */ | ||
1064 | return !vma->vm_ops || | ||
1065 | (!vma->vm_ops->fault && !vma->vm_ops->nopfn); | ||
1066 | } | ||
1067 | |||
1041 | int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | 1068 | int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, |
1042 | unsigned long start, int len, int write, int force, | 1069 | unsigned long start, int len, int write, int force, |
1043 | struct page **pages, struct vm_area_struct **vmas) | 1070 | struct page **pages, struct vm_area_struct **vmas) |
@@ -1112,8 +1139,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1112 | foll_flags = FOLL_TOUCH; | 1139 | foll_flags = FOLL_TOUCH; |
1113 | if (pages) | 1140 | if (pages) |
1114 | foll_flags |= FOLL_GET; | 1141 | foll_flags |= FOLL_GET; |
1115 | if (!write && !(vma->vm_flags & VM_LOCKED) && | 1142 | if (!write && use_zero_page(vma)) |
1116 | (!vma->vm_ops || !vma->vm_ops->fault)) | ||
1117 | foll_flags |= FOLL_ANON; | 1143 | foll_flags |= FOLL_ANON; |
1118 | 1144 | ||
1119 | do { | 1145 | do { |
@@ -1159,6 +1185,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1159 | 1185 | ||
1160 | cond_resched(); | 1186 | cond_resched(); |
1161 | } | 1187 | } |
1188 | if (IS_ERR(page)) | ||
1189 | return i ? i : PTR_ERR(page); | ||
1162 | if (pages) { | 1190 | if (pages) { |
1163 | pages[i] = page; | 1191 | pages[i] = page; |
1164 | 1192 | ||
@@ -1757,7 +1785,6 @@ gotten: | |||
1757 | page_table = pte_offset_map_lock(mm, pmd, address, &ptl); | 1785 | page_table = pte_offset_map_lock(mm, pmd, address, &ptl); |
1758 | if (likely(pte_same(*page_table, orig_pte))) { | 1786 | if (likely(pte_same(*page_table, orig_pte))) { |
1759 | if (old_page) { | 1787 | if (old_page) { |
1760 | page_remove_rmap(old_page, vma); | ||
1761 | if (!PageAnon(old_page)) { | 1788 | if (!PageAnon(old_page)) { |
1762 | dec_mm_counter(mm, file_rss); | 1789 | dec_mm_counter(mm, file_rss); |
1763 | inc_mm_counter(mm, anon_rss); | 1790 | inc_mm_counter(mm, anon_rss); |
@@ -1779,6 +1806,32 @@ gotten: | |||
1779 | lru_cache_add_active(new_page); | 1806 | lru_cache_add_active(new_page); |
1780 | page_add_new_anon_rmap(new_page, vma, address); | 1807 | page_add_new_anon_rmap(new_page, vma, address); |
1781 | 1808 | ||
1809 | if (old_page) { | ||
1810 | /* | ||
1811 | * Only after switching the pte to the new page may | ||
1812 | * we remove the mapcount here. Otherwise another | ||
1813 | * process may come and find the rmap count decremented | ||
1814 | * before the pte is switched to the new page, and | ||
1815 | * "reuse" the old page writing into it while our pte | ||
1816 | * here still points into it and can be read by other | ||
1817 | * threads. | ||
1818 | * | ||
1819 | * The critical issue is to order this | ||
1820 | * page_remove_rmap with the ptp_clear_flush above. | ||
1821 | * Those stores are ordered by (if nothing else,) | ||
1822 | * the barrier present in the atomic_add_negative | ||
1823 | * in page_remove_rmap. | ||
1824 | * | ||
1825 | * Then the TLB flush in ptep_clear_flush ensures that | ||
1826 | * no process can access the old page before the | ||
1827 | * decremented mapcount is visible. And the old page | ||
1828 | * cannot be reused until after the decremented | ||
1829 | * mapcount is visible. So transitively, TLBs to | ||
1830 | * old page will be flushed before it can be reused. | ||
1831 | */ | ||
1832 | page_remove_rmap(old_page, vma); | ||
1833 | } | ||
1834 | |||
1782 | /* Free the old page.. */ | 1835 | /* Free the old page.. */ |
1783 | new_page = old_page; | 1836 | new_page = old_page; |
1784 | ret |= VM_FAULT_WRITE; | 1837 | ret |= VM_FAULT_WRITE; |
diff --git a/mm/migrate.c b/mm/migrate.c index 449d77d409f5..112bcaeaa104 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -865,6 +865,11 @@ static int do_move_pages(struct mm_struct *mm, struct page_to_node *pm, | |||
865 | goto set_status; | 865 | goto set_status; |
866 | 866 | ||
867 | page = follow_page(vma, pp->addr, FOLL_GET); | 867 | page = follow_page(vma, pp->addr, FOLL_GET); |
868 | |||
869 | err = PTR_ERR(page); | ||
870 | if (IS_ERR(page)) | ||
871 | goto set_status; | ||
872 | |||
868 | err = -ENOENT; | 873 | err = -ENOENT; |
869 | if (!page) | 874 | if (!page) |
870 | goto set_status; | 875 | goto set_status; |
@@ -928,6 +933,11 @@ static int do_pages_stat(struct mm_struct *mm, struct page_to_node *pm) | |||
928 | goto set_status; | 933 | goto set_status; |
929 | 934 | ||
930 | page = follow_page(vma, pm->addr, 0); | 935 | page = follow_page(vma, pm->addr, 0); |
936 | |||
937 | err = PTR_ERR(page); | ||
938 | if (IS_ERR(page)) | ||
939 | goto set_status; | ||
940 | |||
931 | err = -ENOENT; | 941 | err = -ENOENT; |
932 | /* Use PageReserved to check for zero page */ | 942 | /* Use PageReserved to check for zero page */ |
933 | if (!page || PageReserved(page)) | 943 | if (!page || PageReserved(page)) |
@@ -3263,9 +3263,12 @@ retry: | |||
3263 | 3263 | ||
3264 | if (cpuset_zone_allowed_hardwall(zone, flags) && | 3264 | if (cpuset_zone_allowed_hardwall(zone, flags) && |
3265 | cache->nodelists[nid] && | 3265 | cache->nodelists[nid] && |
3266 | cache->nodelists[nid]->free_objects) | 3266 | cache->nodelists[nid]->free_objects) { |
3267 | obj = ____cache_alloc_node(cache, | 3267 | obj = ____cache_alloc_node(cache, |
3268 | flags | GFP_THISNODE, nid); | 3268 | flags | GFP_THISNODE, nid); |
3269 | if (obj) | ||
3270 | break; | ||
3271 | } | ||
3269 | } | 3272 | } |
3270 | 3273 | ||
3271 | if (!obj) { | 3274 | if (!obj) { |