diff options
author | Steven Whitehouse <swhiteho@redhat.com> | 2006-03-20 12:47:40 -0500 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2006-03-20 12:47:40 -0500 |
commit | 9a21247181d93fdf99255911845ecdb041d21583 (patch) | |
tree | 6d69be36f8a2bd8c76bf02ead2f0121a511c0a92 /mm | |
parent | c752666c17f870fa8ae9f16804dd457e9e6daaec (diff) | |
parent | 7705a8792b0fc82fd7d4dd923724606bbfd9fb20 (diff) |
Merge branch 'master'
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memory.c | 5 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 1 | ||||
-rw-r--r-- | mm/mempolicy.c | 152 | ||||
-rw-r--r-- | mm/nommu.c | 8 | ||||
-rw-r--r-- | mm/oom_kill.c | 5 | ||||
-rw-r--r-- | mm/page_alloc.c | 17 | ||||
-rw-r--r-- | mm/rmap.c | 21 | ||||
-rw-r--r-- | mm/slab.c | 122 | ||||
-rw-r--r-- | mm/swap.c | 28 | ||||
-rw-r--r-- | mm/vmscan.c | 21 |
10 files changed, 287 insertions, 93 deletions
diff --git a/mm/memory.c b/mm/memory.c index 9abc6008544b..85e80a57db29 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -623,11 +623,12 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, | |||
623 | (*zap_work)--; | 623 | (*zap_work)--; |
624 | continue; | 624 | continue; |
625 | } | 625 | } |
626 | |||
627 | (*zap_work) -= PAGE_SIZE; | ||
628 | |||
626 | if (pte_present(ptent)) { | 629 | if (pte_present(ptent)) { |
627 | struct page *page; | 630 | struct page *page; |
628 | 631 | ||
629 | (*zap_work) -= PAGE_SIZE; | ||
630 | |||
631 | page = vm_normal_page(vma, addr, ptent); | 632 | page = vm_normal_page(vma, addr, ptent); |
632 | if (unlikely(details) && page) { | 633 | if (unlikely(details) && page) { |
633 | /* | 634 | /* |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index a918f77f02f3..1fe76d963ac2 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -130,6 +130,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) | |||
130 | onlined_pages++; | 130 | onlined_pages++; |
131 | } | 131 | } |
132 | zone->present_pages += onlined_pages; | 132 | zone->present_pages += onlined_pages; |
133 | zone->zone_pgdat->node_present_pages += onlined_pages; | ||
133 | 134 | ||
134 | setup_per_zone_pages_min(); | 135 | setup_per_zone_pages_min(); |
135 | 136 | ||
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 67af4cea1e23..b21869a39f0b 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -197,7 +197,7 @@ static struct mempolicy *mpol_new(int mode, nodemask_t *nodes) | |||
197 | return policy; | 197 | return policy; |
198 | } | 198 | } |
199 | 199 | ||
200 | static void gather_stats(struct page *, void *); | 200 | static void gather_stats(struct page *, void *, int pte_dirty); |
201 | static void migrate_page_add(struct page *page, struct list_head *pagelist, | 201 | static void migrate_page_add(struct page *page, struct list_head *pagelist, |
202 | unsigned long flags); | 202 | unsigned long flags); |
203 | 203 | ||
@@ -239,7 +239,7 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
239 | continue; | 239 | continue; |
240 | 240 | ||
241 | if (flags & MPOL_MF_STATS) | 241 | if (flags & MPOL_MF_STATS) |
242 | gather_stats(page, private); | 242 | gather_stats(page, private, pte_dirty(*pte)); |
243 | else if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) | 243 | else if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) |
244 | migrate_page_add(page, private, flags); | 244 | migrate_page_add(page, private, flags); |
245 | else | 245 | else |
@@ -330,9 +330,19 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, | |||
330 | int err; | 330 | int err; |
331 | struct vm_area_struct *first, *vma, *prev; | 331 | struct vm_area_struct *first, *vma, *prev; |
332 | 332 | ||
333 | /* Clear the LRU lists so pages can be isolated */ | 333 | if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { |
334 | if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) | 334 | /* Must have swap device for migration */ |
335 | if (nr_swap_pages <= 0) | ||
336 | return ERR_PTR(-ENODEV); | ||
337 | |||
338 | /* | ||
339 | * Clear the LRU lists so pages can be isolated. | ||
340 | * Note that pages may be moved off the LRU after we have | ||
341 | * drained them. Those pages will fail to migrate like other | ||
342 | * pages that may be busy. | ||
343 | */ | ||
335 | lru_add_drain_all(); | 344 | lru_add_drain_all(); |
345 | } | ||
336 | 346 | ||
337 | first = find_vma(mm, start); | 347 | first = find_vma(mm, start); |
338 | if (!first) | 348 | if (!first) |
@@ -748,7 +758,7 @@ long do_mbind(unsigned long start, unsigned long len, | |||
748 | MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) | 758 | MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) |
749 | || mode > MPOL_MAX) | 759 | || mode > MPOL_MAX) |
750 | return -EINVAL; | 760 | return -EINVAL; |
751 | if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_RESOURCE)) | 761 | if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE)) |
752 | return -EPERM; | 762 | return -EPERM; |
753 | 763 | ||
754 | if (start & ~PAGE_MASK) | 764 | if (start & ~PAGE_MASK) |
@@ -942,19 +952,20 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, | |||
942 | */ | 952 | */ |
943 | if ((current->euid != task->suid) && (current->euid != task->uid) && | 953 | if ((current->euid != task->suid) && (current->euid != task->uid) && |
944 | (current->uid != task->suid) && (current->uid != task->uid) && | 954 | (current->uid != task->suid) && (current->uid != task->uid) && |
945 | !capable(CAP_SYS_ADMIN)) { | 955 | !capable(CAP_SYS_NICE)) { |
946 | err = -EPERM; | 956 | err = -EPERM; |
947 | goto out; | 957 | goto out; |
948 | } | 958 | } |
949 | 959 | ||
950 | task_nodes = cpuset_mems_allowed(task); | 960 | task_nodes = cpuset_mems_allowed(task); |
951 | /* Is the user allowed to access the target nodes? */ | 961 | /* Is the user allowed to access the target nodes? */ |
952 | if (!nodes_subset(new, task_nodes) && !capable(CAP_SYS_ADMIN)) { | 962 | if (!nodes_subset(new, task_nodes) && !capable(CAP_SYS_NICE)) { |
953 | err = -EPERM; | 963 | err = -EPERM; |
954 | goto out; | 964 | goto out; |
955 | } | 965 | } |
956 | 966 | ||
957 | err = do_migrate_pages(mm, &old, &new, MPOL_MF_MOVE); | 967 | err = do_migrate_pages(mm, &old, &new, |
968 | capable(CAP_SYS_NICE) ? MPOL_MF_MOVE_ALL : MPOL_MF_MOVE); | ||
958 | out: | 969 | out: |
959 | mmput(mm); | 970 | mmput(mm); |
960 | return err; | 971 | return err; |
@@ -1752,66 +1763,145 @@ static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol) | |||
1752 | struct numa_maps { | 1763 | struct numa_maps { |
1753 | unsigned long pages; | 1764 | unsigned long pages; |
1754 | unsigned long anon; | 1765 | unsigned long anon; |
1755 | unsigned long mapped; | 1766 | unsigned long active; |
1767 | unsigned long writeback; | ||
1756 | unsigned long mapcount_max; | 1768 | unsigned long mapcount_max; |
1769 | unsigned long dirty; | ||
1770 | unsigned long swapcache; | ||
1757 | unsigned long node[MAX_NUMNODES]; | 1771 | unsigned long node[MAX_NUMNODES]; |
1758 | }; | 1772 | }; |
1759 | 1773 | ||
1760 | static void gather_stats(struct page *page, void *private) | 1774 | static void gather_stats(struct page *page, void *private, int pte_dirty) |
1761 | { | 1775 | { |
1762 | struct numa_maps *md = private; | 1776 | struct numa_maps *md = private; |
1763 | int count = page_mapcount(page); | 1777 | int count = page_mapcount(page); |
1764 | 1778 | ||
1765 | if (count) | 1779 | md->pages++; |
1766 | md->mapped++; | 1780 | if (pte_dirty || PageDirty(page)) |
1781 | md->dirty++; | ||
1767 | 1782 | ||
1768 | if (count > md->mapcount_max) | 1783 | if (PageSwapCache(page)) |
1769 | md->mapcount_max = count; | 1784 | md->swapcache++; |
1770 | 1785 | ||
1771 | md->pages++; | 1786 | if (PageActive(page)) |
1787 | md->active++; | ||
1788 | |||
1789 | if (PageWriteback(page)) | ||
1790 | md->writeback++; | ||
1772 | 1791 | ||
1773 | if (PageAnon(page)) | 1792 | if (PageAnon(page)) |
1774 | md->anon++; | 1793 | md->anon++; |
1775 | 1794 | ||
1795 | if (count > md->mapcount_max) | ||
1796 | md->mapcount_max = count; | ||
1797 | |||
1776 | md->node[page_to_nid(page)]++; | 1798 | md->node[page_to_nid(page)]++; |
1777 | cond_resched(); | 1799 | cond_resched(); |
1778 | } | 1800 | } |
1779 | 1801 | ||
1802 | #ifdef CONFIG_HUGETLB_PAGE | ||
1803 | static void check_huge_range(struct vm_area_struct *vma, | ||
1804 | unsigned long start, unsigned long end, | ||
1805 | struct numa_maps *md) | ||
1806 | { | ||
1807 | unsigned long addr; | ||
1808 | struct page *page; | ||
1809 | |||
1810 | for (addr = start; addr < end; addr += HPAGE_SIZE) { | ||
1811 | pte_t *ptep = huge_pte_offset(vma->vm_mm, addr & HPAGE_MASK); | ||
1812 | pte_t pte; | ||
1813 | |||
1814 | if (!ptep) | ||
1815 | continue; | ||
1816 | |||
1817 | pte = *ptep; | ||
1818 | if (pte_none(pte)) | ||
1819 | continue; | ||
1820 | |||
1821 | page = pte_page(pte); | ||
1822 | if (!page) | ||
1823 | continue; | ||
1824 | |||
1825 | gather_stats(page, md, pte_dirty(*ptep)); | ||
1826 | } | ||
1827 | } | ||
1828 | #else | ||
1829 | static inline void check_huge_range(struct vm_area_struct *vma, | ||
1830 | unsigned long start, unsigned long end, | ||
1831 | struct numa_maps *md) | ||
1832 | { | ||
1833 | } | ||
1834 | #endif | ||
1835 | |||
1780 | int show_numa_map(struct seq_file *m, void *v) | 1836 | int show_numa_map(struct seq_file *m, void *v) |
1781 | { | 1837 | { |
1782 | struct task_struct *task = m->private; | 1838 | struct task_struct *task = m->private; |
1783 | struct vm_area_struct *vma = v; | 1839 | struct vm_area_struct *vma = v; |
1784 | struct numa_maps *md; | 1840 | struct numa_maps *md; |
1841 | struct file *file = vma->vm_file; | ||
1842 | struct mm_struct *mm = vma->vm_mm; | ||
1785 | int n; | 1843 | int n; |
1786 | char buffer[50]; | 1844 | char buffer[50]; |
1787 | 1845 | ||
1788 | if (!vma->vm_mm) | 1846 | if (!mm) |
1789 | return 0; | 1847 | return 0; |
1790 | 1848 | ||
1791 | md = kzalloc(sizeof(struct numa_maps), GFP_KERNEL); | 1849 | md = kzalloc(sizeof(struct numa_maps), GFP_KERNEL); |
1792 | if (!md) | 1850 | if (!md) |
1793 | return 0; | 1851 | return 0; |
1794 | 1852 | ||
1795 | check_pgd_range(vma, vma->vm_start, vma->vm_end, | 1853 | mpol_to_str(buffer, sizeof(buffer), |
1796 | &node_online_map, MPOL_MF_STATS, md); | 1854 | get_vma_policy(task, vma, vma->vm_start)); |
1797 | 1855 | ||
1798 | if (md->pages) { | 1856 | seq_printf(m, "%08lx %s", vma->vm_start, buffer); |
1799 | mpol_to_str(buffer, sizeof(buffer), | ||
1800 | get_vma_policy(task, vma, vma->vm_start)); | ||
1801 | 1857 | ||
1802 | seq_printf(m, "%08lx %s pages=%lu mapped=%lu maxref=%lu", | 1858 | if (file) { |
1803 | vma->vm_start, buffer, md->pages, | 1859 | seq_printf(m, " file="); |
1804 | md->mapped, md->mapcount_max); | 1860 | seq_path(m, file->f_vfsmnt, file->f_dentry, "\n\t= "); |
1861 | } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { | ||
1862 | seq_printf(m, " heap"); | ||
1863 | } else if (vma->vm_start <= mm->start_stack && | ||
1864 | vma->vm_end >= mm->start_stack) { | ||
1865 | seq_printf(m, " stack"); | ||
1866 | } | ||
1805 | 1867 | ||
1806 | if (md->anon) | 1868 | if (is_vm_hugetlb_page(vma)) { |
1807 | seq_printf(m," anon=%lu",md->anon); | 1869 | check_huge_range(vma, vma->vm_start, vma->vm_end, md); |
1870 | seq_printf(m, " huge"); | ||
1871 | } else { | ||
1872 | check_pgd_range(vma, vma->vm_start, vma->vm_end, | ||
1873 | &node_online_map, MPOL_MF_STATS, md); | ||
1874 | } | ||
1808 | 1875 | ||
1809 | for_each_online_node(n) | 1876 | if (!md->pages) |
1810 | if (md->node[n]) | 1877 | goto out; |
1811 | seq_printf(m, " N%d=%lu", n, md->node[n]); | ||
1812 | 1878 | ||
1813 | seq_putc(m, '\n'); | 1879 | if (md->anon) |
1814 | } | 1880 | seq_printf(m," anon=%lu",md->anon); |
1881 | |||
1882 | if (md->dirty) | ||
1883 | seq_printf(m," dirty=%lu",md->dirty); | ||
1884 | |||
1885 | if (md->pages != md->anon && md->pages != md->dirty) | ||
1886 | seq_printf(m, " mapped=%lu", md->pages); | ||
1887 | |||
1888 | if (md->mapcount_max > 1) | ||
1889 | seq_printf(m, " mapmax=%lu", md->mapcount_max); | ||
1890 | |||
1891 | if (md->swapcache) | ||
1892 | seq_printf(m," swapcache=%lu", md->swapcache); | ||
1893 | |||
1894 | if (md->active < md->pages && !is_vm_hugetlb_page(vma)) | ||
1895 | seq_printf(m," active=%lu", md->active); | ||
1896 | |||
1897 | if (md->writeback) | ||
1898 | seq_printf(m," writeback=%lu", md->writeback); | ||
1899 | |||
1900 | for_each_online_node(n) | ||
1901 | if (md->node[n]) | ||
1902 | seq_printf(m, " N%d=%lu", n, md->node[n]); | ||
1903 | out: | ||
1904 | seq_putc(m, '\n'); | ||
1815 | kfree(md); | 1905 | kfree(md); |
1816 | 1906 | ||
1817 | if (m->count < m->size) | 1907 | if (m->count < m->size) |
diff --git a/mm/nommu.c b/mm/nommu.c index 99d21020ec9d..4951f4786f28 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -53,7 +53,6 @@ DECLARE_RWSEM(nommu_vma_sem); | |||
53 | struct vm_operations_struct generic_file_vm_ops = { | 53 | struct vm_operations_struct generic_file_vm_ops = { |
54 | }; | 54 | }; |
55 | 55 | ||
56 | EXPORT_SYMBOL(vmalloc); | ||
57 | EXPORT_SYMBOL(vfree); | 56 | EXPORT_SYMBOL(vfree); |
58 | EXPORT_SYMBOL(vmalloc_to_page); | 57 | EXPORT_SYMBOL(vmalloc_to_page); |
59 | EXPORT_SYMBOL(vmalloc_32); | 58 | EXPORT_SYMBOL(vmalloc_32); |
@@ -205,6 +204,13 @@ void *vmalloc(unsigned long size) | |||
205 | { | 204 | { |
206 | return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL); | 205 | return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL); |
207 | } | 206 | } |
207 | EXPORT_SYMBOL(vmalloc); | ||
208 | |||
209 | void *vmalloc_node(unsigned long size, int node) | ||
210 | { | ||
211 | return vmalloc(size); | ||
212 | } | ||
213 | EXPORT_SYMBOL(vmalloc_node); | ||
208 | 214 | ||
209 | /* | 215 | /* |
210 | * vmalloc_32 - allocate virtually continguos memory (32bit addressable) | 216 | * vmalloc_32 - allocate virtually continguos memory (32bit addressable) |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 8123fad5a485..78747afad6b0 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -302,7 +302,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) | |||
302 | { | 302 | { |
303 | struct mm_struct *mm = NULL; | 303 | struct mm_struct *mm = NULL; |
304 | task_t *p; | 304 | task_t *p; |
305 | unsigned long points; | 305 | unsigned long points = 0; |
306 | 306 | ||
307 | if (printk_ratelimit()) { | 307 | if (printk_ratelimit()) { |
308 | printk("oom-killer: gfp_mask=0x%x, order=%d\n", | 308 | printk("oom-killer: gfp_mask=0x%x, order=%d\n", |
@@ -355,6 +355,7 @@ retry: | |||
355 | } | 355 | } |
356 | 356 | ||
357 | out: | 357 | out: |
358 | read_unlock(&tasklist_lock); | ||
358 | cpuset_unlock(); | 359 | cpuset_unlock(); |
359 | if (mm) | 360 | if (mm) |
360 | mmput(mm); | 361 | mmput(mm); |
@@ -364,5 +365,5 @@ out: | |||
364 | * retry to allocate memory unless "p" is current | 365 | * retry to allocate memory unless "p" is current |
365 | */ | 366 | */ |
366 | if (!test_thread_flag(TIF_MEMDIE)) | 367 | if (!test_thread_flag(TIF_MEMDIE)) |
367 | schedule_timeout_interruptible(1); | 368 | schedule_timeout_uninterruptible(1); |
368 | } | 369 | } |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 791690d7d3fa..234bd4895d14 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -590,21 +590,20 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, | |||
590 | } | 590 | } |
591 | 591 | ||
592 | #ifdef CONFIG_NUMA | 592 | #ifdef CONFIG_NUMA |
593 | /* Called from the slab reaper to drain remote pagesets */ | 593 | /* |
594 | void drain_remote_pages(void) | 594 | * Called from the slab reaper to drain pagesets on a particular node that |
595 | * belong to the currently executing processor. | ||
596 | */ | ||
597 | void drain_node_pages(int nodeid) | ||
595 | { | 598 | { |
596 | struct zone *zone; | 599 | int i, z; |
597 | int i; | ||
598 | unsigned long flags; | 600 | unsigned long flags; |
599 | 601 | ||
600 | local_irq_save(flags); | 602 | local_irq_save(flags); |
601 | for_each_zone(zone) { | 603 | for (z = 0; z < MAX_NR_ZONES; z++) { |
604 | struct zone *zone = NODE_DATA(nodeid)->node_zones + z; | ||
602 | struct per_cpu_pageset *pset; | 605 | struct per_cpu_pageset *pset; |
603 | 606 | ||
604 | /* Do not drain local pagesets */ | ||
605 | if (zone->zone_pgdat->node_id == numa_node_id()) | ||
606 | continue; | ||
607 | |||
608 | pset = zone_pcp(zone, smp_processor_id()); | 607 | pset = zone_pcp(zone, smp_processor_id()); |
609 | for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { | 608 | for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { |
610 | struct per_cpu_pages *pcp; | 609 | struct per_cpu_pages *pcp; |
@@ -212,25 +212,33 @@ out: | |||
212 | * through real pte's pointing to valid pages and then releasing | 212 | * through real pte's pointing to valid pages and then releasing |
213 | * the page from the swap cache. | 213 | * the page from the swap cache. |
214 | * | 214 | * |
215 | * Must hold page lock on page. | 215 | * Must hold page lock on page and mmap_sem of one vma that contains |
216 | * the page. | ||
216 | */ | 217 | */ |
217 | void remove_from_swap(struct page *page) | 218 | void remove_from_swap(struct page *page) |
218 | { | 219 | { |
219 | struct anon_vma *anon_vma; | 220 | struct anon_vma *anon_vma; |
220 | struct vm_area_struct *vma; | 221 | struct vm_area_struct *vma; |
222 | unsigned long mapping; | ||
221 | 223 | ||
222 | if (!PageAnon(page) || !PageSwapCache(page)) | 224 | if (!PageSwapCache(page)) |
223 | return; | 225 | return; |
224 | 226 | ||
225 | anon_vma = page_lock_anon_vma(page); | 227 | mapping = (unsigned long)page->mapping; |
226 | if (!anon_vma) | 228 | |
229 | if (!mapping || (mapping & PAGE_MAPPING_ANON) == 0) | ||
227 | return; | 230 | return; |
228 | 231 | ||
232 | /* | ||
233 | * We hold the mmap_sem lock. So no need to call page_lock_anon_vma. | ||
234 | */ | ||
235 | anon_vma = (struct anon_vma *) (mapping - PAGE_MAPPING_ANON); | ||
236 | spin_lock(&anon_vma->lock); | ||
237 | |||
229 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) | 238 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) |
230 | remove_vma_swap(vma, page); | 239 | remove_vma_swap(vma, page); |
231 | 240 | ||
232 | spin_unlock(&anon_vma->lock); | 241 | spin_unlock(&anon_vma->lock); |
233 | |||
234 | delete_from_swap_cache(page); | 242 | delete_from_swap_cache(page); |
235 | } | 243 | } |
236 | EXPORT_SYMBOL(remove_from_swap); | 244 | EXPORT_SYMBOL(remove_from_swap); |
@@ -529,9 +537,6 @@ void page_add_new_anon_rmap(struct page *page, | |||
529 | */ | 537 | */ |
530 | void page_add_file_rmap(struct page *page) | 538 | void page_add_file_rmap(struct page *page) |
531 | { | 539 | { |
532 | BUG_ON(PageAnon(page)); | ||
533 | BUG_ON(!pfn_valid(page_to_pfn(page))); | ||
534 | |||
535 | if (atomic_inc_and_test(&page->_mapcount)) | 540 | if (atomic_inc_and_test(&page->_mapcount)) |
536 | __inc_page_state(nr_mapped); | 541 | __inc_page_state(nr_mapped); |
537 | } | 542 | } |
@@ -789,6 +789,47 @@ static void __slab_error(const char *function, struct kmem_cache *cachep, char * | |||
789 | dump_stack(); | 789 | dump_stack(); |
790 | } | 790 | } |
791 | 791 | ||
792 | #ifdef CONFIG_NUMA | ||
793 | /* | ||
794 | * Special reaping functions for NUMA systems called from cache_reap(). | ||
795 | * These take care of doing round robin flushing of alien caches (containing | ||
796 | * objects freed on different nodes from which they were allocated) and the | ||
797 | * flushing of remote pcps by calling drain_node_pages. | ||
798 | */ | ||
799 | static DEFINE_PER_CPU(unsigned long, reap_node); | ||
800 | |||
801 | static void init_reap_node(int cpu) | ||
802 | { | ||
803 | int node; | ||
804 | |||
805 | node = next_node(cpu_to_node(cpu), node_online_map); | ||
806 | if (node == MAX_NUMNODES) | ||
807 | node = 0; | ||
808 | |||
809 | __get_cpu_var(reap_node) = node; | ||
810 | } | ||
811 | |||
812 | static void next_reap_node(void) | ||
813 | { | ||
814 | int node = __get_cpu_var(reap_node); | ||
815 | |||
816 | /* | ||
817 | * Also drain per cpu pages on remote zones | ||
818 | */ | ||
819 | if (node != numa_node_id()) | ||
820 | drain_node_pages(node); | ||
821 | |||
822 | node = next_node(node, node_online_map); | ||
823 | if (unlikely(node >= MAX_NUMNODES)) | ||
824 | node = first_node(node_online_map); | ||
825 | __get_cpu_var(reap_node) = node; | ||
826 | } | ||
827 | |||
828 | #else | ||
829 | #define init_reap_node(cpu) do { } while (0) | ||
830 | #define next_reap_node(void) do { } while (0) | ||
831 | #endif | ||
832 | |||
792 | /* | 833 | /* |
793 | * Initiate the reap timer running on the target CPU. We run at around 1 to 2Hz | 834 | * Initiate the reap timer running on the target CPU. We run at around 1 to 2Hz |
794 | * via the workqueue/eventd. | 835 | * via the workqueue/eventd. |
@@ -806,6 +847,7 @@ static void __devinit start_cpu_timer(int cpu) | |||
806 | * at that time. | 847 | * at that time. |
807 | */ | 848 | */ |
808 | if (keventd_up() && reap_work->func == NULL) { | 849 | if (keventd_up() && reap_work->func == NULL) { |
850 | init_reap_node(cpu); | ||
809 | INIT_WORK(reap_work, cache_reap, NULL); | 851 | INIT_WORK(reap_work, cache_reap, NULL); |
810 | schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu); | 852 | schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu); |
811 | } | 853 | } |
@@ -884,6 +926,23 @@ static void __drain_alien_cache(struct kmem_cache *cachep, | |||
884 | } | 926 | } |
885 | } | 927 | } |
886 | 928 | ||
929 | /* | ||
930 | * Called from cache_reap() to regularly drain alien caches round robin. | ||
931 | */ | ||
932 | static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3) | ||
933 | { | ||
934 | int node = __get_cpu_var(reap_node); | ||
935 | |||
936 | if (l3->alien) { | ||
937 | struct array_cache *ac = l3->alien[node]; | ||
938 | if (ac && ac->avail) { | ||
939 | spin_lock_irq(&ac->lock); | ||
940 | __drain_alien_cache(cachep, ac, node); | ||
941 | spin_unlock_irq(&ac->lock); | ||
942 | } | ||
943 | } | ||
944 | } | ||
945 | |||
887 | static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **alien) | 946 | static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **alien) |
888 | { | 947 | { |
889 | int i = 0; | 948 | int i = 0; |
@@ -902,6 +961,7 @@ static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **al | |||
902 | #else | 961 | #else |
903 | 962 | ||
904 | #define drain_alien_cache(cachep, alien) do { } while (0) | 963 | #define drain_alien_cache(cachep, alien) do { } while (0) |
964 | #define reap_alien(cachep, l3) do { } while (0) | ||
905 | 965 | ||
906 | static inline struct array_cache **alloc_alien_cache(int node, int limit) | 966 | static inline struct array_cache **alloc_alien_cache(int node, int limit) |
907 | { | 967 | { |
@@ -1124,6 +1184,7 @@ void __init kmem_cache_init(void) | |||
1124 | struct cache_sizes *sizes; | 1184 | struct cache_sizes *sizes; |
1125 | struct cache_names *names; | 1185 | struct cache_names *names; |
1126 | int i; | 1186 | int i; |
1187 | int order; | ||
1127 | 1188 | ||
1128 | for (i = 0; i < NUM_INIT_LISTS; i++) { | 1189 | for (i = 0; i < NUM_INIT_LISTS; i++) { |
1129 | kmem_list3_init(&initkmem_list3[i]); | 1190 | kmem_list3_init(&initkmem_list3[i]); |
@@ -1167,11 +1228,15 @@ void __init kmem_cache_init(void) | |||
1167 | 1228 | ||
1168 | cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, cache_line_size()); | 1229 | cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, cache_line_size()); |
1169 | 1230 | ||
1170 | cache_estimate(0, cache_cache.buffer_size, cache_line_size(), 0, | 1231 | for (order = 0; order < MAX_ORDER; order++) { |
1171 | &left_over, &cache_cache.num); | 1232 | cache_estimate(order, cache_cache.buffer_size, |
1233 | cache_line_size(), 0, &left_over, &cache_cache.num); | ||
1234 | if (cache_cache.num) | ||
1235 | break; | ||
1236 | } | ||
1172 | if (!cache_cache.num) | 1237 | if (!cache_cache.num) |
1173 | BUG(); | 1238 | BUG(); |
1174 | 1239 | cache_cache.gfporder = order; | |
1175 | cache_cache.colour = left_over / cache_cache.colour_off; | 1240 | cache_cache.colour = left_over / cache_cache.colour_off; |
1176 | cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) + | 1241 | cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) + |
1177 | sizeof(struct slab), cache_line_size()); | 1242 | sizeof(struct slab), cache_line_size()); |
@@ -1628,36 +1693,44 @@ static inline size_t calculate_slab_order(struct kmem_cache *cachep, | |||
1628 | size_t size, size_t align, unsigned long flags) | 1693 | size_t size, size_t align, unsigned long flags) |
1629 | { | 1694 | { |
1630 | size_t left_over = 0; | 1695 | size_t left_over = 0; |
1696 | int gfporder; | ||
1631 | 1697 | ||
1632 | for (;; cachep->gfporder++) { | 1698 | for (gfporder = 0 ; gfporder <= MAX_GFP_ORDER; gfporder++) { |
1633 | unsigned int num; | 1699 | unsigned int num; |
1634 | size_t remainder; | 1700 | size_t remainder; |
1635 | 1701 | ||
1636 | if (cachep->gfporder > MAX_GFP_ORDER) { | 1702 | cache_estimate(gfporder, size, align, flags, &remainder, &num); |
1637 | cachep->num = 0; | ||
1638 | break; | ||
1639 | } | ||
1640 | |||
1641 | cache_estimate(cachep->gfporder, size, align, flags, | ||
1642 | &remainder, &num); | ||
1643 | if (!num) | 1703 | if (!num) |
1644 | continue; | 1704 | continue; |
1705 | |||
1645 | /* More than offslab_limit objects will cause problems */ | 1706 | /* More than offslab_limit objects will cause problems */ |
1646 | if (flags & CFLGS_OFF_SLAB && cachep->num > offslab_limit) | 1707 | if ((flags & CFLGS_OFF_SLAB) && num > offslab_limit) |
1647 | break; | 1708 | break; |
1648 | 1709 | ||
1710 | /* Found something acceptable - save it away */ | ||
1649 | cachep->num = num; | 1711 | cachep->num = num; |
1712 | cachep->gfporder = gfporder; | ||
1650 | left_over = remainder; | 1713 | left_over = remainder; |
1651 | 1714 | ||
1652 | /* | 1715 | /* |
1716 | * A VFS-reclaimable slab tends to have most allocations | ||
1717 | * as GFP_NOFS and we really don't want to have to be allocating | ||
1718 | * higher-order pages when we are unable to shrink dcache. | ||
1719 | */ | ||
1720 | if (flags & SLAB_RECLAIM_ACCOUNT) | ||
1721 | break; | ||
1722 | |||
1723 | /* | ||
1653 | * Large number of objects is good, but very large slabs are | 1724 | * Large number of objects is good, but very large slabs are |
1654 | * currently bad for the gfp()s. | 1725 | * currently bad for the gfp()s. |
1655 | */ | 1726 | */ |
1656 | if (cachep->gfporder >= slab_break_gfp_order) | 1727 | if (gfporder >= slab_break_gfp_order) |
1657 | break; | 1728 | break; |
1658 | 1729 | ||
1659 | if ((left_over * 8) <= (PAGE_SIZE << cachep->gfporder)) | 1730 | /* |
1660 | /* Acceptable internal fragmentation */ | 1731 | * Acceptable internal fragmentation? |
1732 | */ | ||
1733 | if ((left_over * 8) <= (PAGE_SIZE << gfporder)) | ||
1661 | break; | 1734 | break; |
1662 | } | 1735 | } |
1663 | return left_over; | 1736 | return left_over; |
@@ -1869,17 +1942,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
1869 | 1942 | ||
1870 | size = ALIGN(size, align); | 1943 | size = ALIGN(size, align); |
1871 | 1944 | ||
1872 | if ((flags & SLAB_RECLAIM_ACCOUNT) && size <= PAGE_SIZE) { | 1945 | left_over = calculate_slab_order(cachep, size, align, flags); |
1873 | /* | ||
1874 | * A VFS-reclaimable slab tends to have most allocations | ||
1875 | * as GFP_NOFS and we really don't want to have to be allocating | ||
1876 | * higher-order pages when we are unable to shrink dcache. | ||
1877 | */ | ||
1878 | cachep->gfporder = 0; | ||
1879 | cache_estimate(cachep->gfporder, size, align, flags, | ||
1880 | &left_over, &cachep->num); | ||
1881 | } else | ||
1882 | left_over = calculate_slab_order(cachep, size, align, flags); | ||
1883 | 1946 | ||
1884 | if (!cachep->num) { | 1947 | if (!cachep->num) { |
1885 | printk("kmem_cache_create: couldn't create cache %s.\n", name); | 1948 | printk("kmem_cache_create: couldn't create cache %s.\n", name); |
@@ -2554,7 +2617,7 @@ static void check_slabp(struct kmem_cache *cachep, struct slab *slabp) | |||
2554 | "slab: Internal list corruption detected in cache '%s'(%d), slabp %p(%d). Hexdump:\n", | 2617 | "slab: Internal list corruption detected in cache '%s'(%d), slabp %p(%d). Hexdump:\n", |
2555 | cachep->name, cachep->num, slabp, slabp->inuse); | 2618 | cachep->name, cachep->num, slabp, slabp->inuse); |
2556 | for (i = 0; | 2619 | for (i = 0; |
2557 | i < sizeof(slabp) + cachep->num * sizeof(kmem_bufctl_t); | 2620 | i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t); |
2558 | i++) { | 2621 | i++) { |
2559 | if ((i % 16) == 0) | 2622 | if ((i % 16) == 0) |
2560 | printk("\n%03x:", i); | 2623 | printk("\n%03x:", i); |
@@ -3494,8 +3557,7 @@ static void cache_reap(void *unused) | |||
3494 | check_irq_on(); | 3557 | check_irq_on(); |
3495 | 3558 | ||
3496 | l3 = searchp->nodelists[numa_node_id()]; | 3559 | l3 = searchp->nodelists[numa_node_id()]; |
3497 | if (l3->alien) | 3560 | reap_alien(searchp, l3); |
3498 | drain_alien_cache(searchp, l3->alien); | ||
3499 | spin_lock_irq(&l3->list_lock); | 3561 | spin_lock_irq(&l3->list_lock); |
3500 | 3562 | ||
3501 | drain_array_locked(searchp, cpu_cache_get(searchp), 0, | 3563 | drain_array_locked(searchp, cpu_cache_get(searchp), 0, |
@@ -3545,7 +3607,7 @@ static void cache_reap(void *unused) | |||
3545 | } | 3607 | } |
3546 | check_irq_on(); | 3608 | check_irq_on(); |
3547 | mutex_unlock(&cache_chain_mutex); | 3609 | mutex_unlock(&cache_chain_mutex); |
3548 | drain_remote_pages(); | 3610 | next_reap_node(); |
3549 | /* Setup the next iteration */ | 3611 | /* Setup the next iteration */ |
3550 | schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC); | 3612 | schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC); |
3551 | } | 3613 | } |
@@ -393,7 +393,8 @@ void pagevec_strip(struct pagevec *pvec) | |||
393 | struct page *page = pvec->pages[i]; | 393 | struct page *page = pvec->pages[i]; |
394 | 394 | ||
395 | if (PagePrivate(page) && !TestSetPageLocked(page)) { | 395 | if (PagePrivate(page) && !TestSetPageLocked(page)) { |
396 | try_to_release_page(page, 0); | 396 | if (PagePrivate(page)) |
397 | try_to_release_page(page, 0); | ||
397 | unlock_page(page); | 398 | unlock_page(page); |
398 | } | 399 | } |
399 | } | 400 | } |
@@ -489,13 +490,34 @@ void percpu_counter_mod(struct percpu_counter *fbc, long amount) | |||
489 | if (count >= FBC_BATCH || count <= -FBC_BATCH) { | 490 | if (count >= FBC_BATCH || count <= -FBC_BATCH) { |
490 | spin_lock(&fbc->lock); | 491 | spin_lock(&fbc->lock); |
491 | fbc->count += count; | 492 | fbc->count += count; |
493 | *pcount = 0; | ||
492 | spin_unlock(&fbc->lock); | 494 | spin_unlock(&fbc->lock); |
493 | count = 0; | 495 | } else { |
496 | *pcount = count; | ||
494 | } | 497 | } |
495 | *pcount = count; | ||
496 | put_cpu(); | 498 | put_cpu(); |
497 | } | 499 | } |
498 | EXPORT_SYMBOL(percpu_counter_mod); | 500 | EXPORT_SYMBOL(percpu_counter_mod); |
501 | |||
502 | /* | ||
503 | * Add up all the per-cpu counts, return the result. This is a more accurate | ||
504 | * but much slower version of percpu_counter_read_positive() | ||
505 | */ | ||
506 | long percpu_counter_sum(struct percpu_counter *fbc) | ||
507 | { | ||
508 | long ret; | ||
509 | int cpu; | ||
510 | |||
511 | spin_lock(&fbc->lock); | ||
512 | ret = fbc->count; | ||
513 | for_each_cpu(cpu) { | ||
514 | long *pcount = per_cpu_ptr(fbc->counters, cpu); | ||
515 | ret += *pcount; | ||
516 | } | ||
517 | spin_unlock(&fbc->lock); | ||
518 | return ret < 0 ? 0 : ret; | ||
519 | } | ||
520 | EXPORT_SYMBOL(percpu_counter_sum); | ||
499 | #endif | 521 | #endif |
500 | 522 | ||
501 | /* | 523 | /* |
diff --git a/mm/vmscan.c b/mm/vmscan.c index b0af7593d01e..4fe7e3aa02e2 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -700,7 +700,7 @@ int migrate_page_remove_references(struct page *newpage, | |||
700 | * the page. | 700 | * the page. |
701 | */ | 701 | */ |
702 | if (!mapping || page_mapcount(page) + nr_refs != page_count(page)) | 702 | if (!mapping || page_mapcount(page) + nr_refs != page_count(page)) |
703 | return 1; | 703 | return -EAGAIN; |
704 | 704 | ||
705 | /* | 705 | /* |
706 | * Establish swap ptes for anonymous pages or destroy pte | 706 | * Establish swap ptes for anonymous pages or destroy pte |
@@ -721,13 +721,15 @@ int migrate_page_remove_references(struct page *newpage, | |||
721 | * If the page was not migrated then the PageSwapCache bit | 721 | * If the page was not migrated then the PageSwapCache bit |
722 | * is still set and the operation may continue. | 722 | * is still set and the operation may continue. |
723 | */ | 723 | */ |
724 | try_to_unmap(page, 1); | 724 | if (try_to_unmap(page, 1) == SWAP_FAIL) |
725 | /* A vma has VM_LOCKED set -> Permanent failure */ | ||
726 | return -EPERM; | ||
725 | 727 | ||
726 | /* | 728 | /* |
727 | * Give up if we were unable to remove all mappings. | 729 | * Give up if we were unable to remove all mappings. |
728 | */ | 730 | */ |
729 | if (page_mapcount(page)) | 731 | if (page_mapcount(page)) |
730 | return 1; | 732 | return -EAGAIN; |
731 | 733 | ||
732 | write_lock_irq(&mapping->tree_lock); | 734 | write_lock_irq(&mapping->tree_lock); |
733 | 735 | ||
@@ -738,7 +740,7 @@ int migrate_page_remove_references(struct page *newpage, | |||
738 | if (!page_mapping(page) || page_count(page) != nr_refs || | 740 | if (!page_mapping(page) || page_count(page) != nr_refs || |
739 | *radix_pointer != page) { | 741 | *radix_pointer != page) { |
740 | write_unlock_irq(&mapping->tree_lock); | 742 | write_unlock_irq(&mapping->tree_lock); |
741 | return 1; | 743 | return -EAGAIN; |
742 | } | 744 | } |
743 | 745 | ||
744 | /* | 746 | /* |
@@ -813,10 +815,14 @@ EXPORT_SYMBOL(migrate_page_copy); | |||
813 | */ | 815 | */ |
814 | int migrate_page(struct page *newpage, struct page *page) | 816 | int migrate_page(struct page *newpage, struct page *page) |
815 | { | 817 | { |
818 | int rc; | ||
819 | |||
816 | BUG_ON(PageWriteback(page)); /* Writeback must be complete */ | 820 | BUG_ON(PageWriteback(page)); /* Writeback must be complete */ |
817 | 821 | ||
818 | if (migrate_page_remove_references(newpage, page, 2)) | 822 | rc = migrate_page_remove_references(newpage, page, 2); |
819 | return -EAGAIN; | 823 | |
824 | if (rc) | ||
825 | return rc; | ||
820 | 826 | ||
821 | migrate_page_copy(newpage, page); | 827 | migrate_page_copy(newpage, page); |
822 | 828 | ||
@@ -1883,7 +1889,8 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
1883 | 1889 | ||
1884 | if (!(gfp_mask & __GFP_WAIT) || | 1890 | if (!(gfp_mask & __GFP_WAIT) || |
1885 | zone->all_unreclaimable || | 1891 | zone->all_unreclaimable || |
1886 | atomic_read(&zone->reclaim_in_progress) > 0) | 1892 | atomic_read(&zone->reclaim_in_progress) > 0 || |
1893 | (p->flags & PF_MEMALLOC)) | ||
1887 | return 0; | 1894 | return 0; |
1888 | 1895 | ||
1889 | node_id = zone->zone_pgdat->node_id; | 1896 | node_id = zone->zone_pgdat->node_id; |