aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/mempolicy.c132
-rw-r--r--mm/nommu.c8
-rw-r--r--mm/oom_kill.c5
-rw-r--r--mm/rmap.c18
-rw-r--r--mm/slab.c57
-rw-r--r--mm/swap.c25
6 files changed, 182 insertions, 63 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 67af4cea1e23..954981b14303 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -197,7 +197,7 @@ static struct mempolicy *mpol_new(int mode, nodemask_t *nodes)
197 return policy; 197 return policy;
198} 198}
199 199
200static void gather_stats(struct page *, void *); 200static void gather_stats(struct page *, void *, int pte_dirty);
201static void migrate_page_add(struct page *page, struct list_head *pagelist, 201static void migrate_page_add(struct page *page, struct list_head *pagelist,
202 unsigned long flags); 202 unsigned long flags);
203 203
@@ -239,7 +239,7 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
239 continue; 239 continue;
240 240
241 if (flags & MPOL_MF_STATS) 241 if (flags & MPOL_MF_STATS)
242 gather_stats(page, private); 242 gather_stats(page, private, pte_dirty(*pte));
243 else if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) 243 else if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
244 migrate_page_add(page, private, flags); 244 migrate_page_add(page, private, flags);
245 else 245 else
@@ -954,7 +954,8 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
954 goto out; 954 goto out;
955 } 955 }
956 956
957 err = do_migrate_pages(mm, &old, &new, MPOL_MF_MOVE); 957 err = do_migrate_pages(mm, &old, &new,
958 capable(CAP_SYS_ADMIN) ? MPOL_MF_MOVE_ALL : MPOL_MF_MOVE);
958out: 959out:
959 mmput(mm); 960 mmput(mm);
960 return err; 961 return err;
@@ -1752,66 +1753,145 @@ static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
1752struct numa_maps { 1753struct numa_maps {
1753 unsigned long pages; 1754 unsigned long pages;
1754 unsigned long anon; 1755 unsigned long anon;
1755 unsigned long mapped; 1756 unsigned long active;
1757 unsigned long writeback;
1756 unsigned long mapcount_max; 1758 unsigned long mapcount_max;
1759 unsigned long dirty;
1760 unsigned long swapcache;
1757 unsigned long node[MAX_NUMNODES]; 1761 unsigned long node[MAX_NUMNODES];
1758}; 1762};
1759 1763
1760static void gather_stats(struct page *page, void *private) 1764static void gather_stats(struct page *page, void *private, int pte_dirty)
1761{ 1765{
1762 struct numa_maps *md = private; 1766 struct numa_maps *md = private;
1763 int count = page_mapcount(page); 1767 int count = page_mapcount(page);
1764 1768
1765 if (count) 1769 md->pages++;
1766 md->mapped++; 1770 if (pte_dirty || PageDirty(page))
1771 md->dirty++;
1767 1772
1768 if (count > md->mapcount_max) 1773 if (PageSwapCache(page))
1769 md->mapcount_max = count; 1774 md->swapcache++;
1770 1775
1771 md->pages++; 1776 if (PageActive(page))
1777 md->active++;
1778
1779 if (PageWriteback(page))
1780 md->writeback++;
1772 1781
1773 if (PageAnon(page)) 1782 if (PageAnon(page))
1774 md->anon++; 1783 md->anon++;
1775 1784
1785 if (count > md->mapcount_max)
1786 md->mapcount_max = count;
1787
1776 md->node[page_to_nid(page)]++; 1788 md->node[page_to_nid(page)]++;
1777 cond_resched(); 1789 cond_resched();
1778} 1790}
1779 1791
1792#ifdef CONFIG_HUGETLB_PAGE
1793static void check_huge_range(struct vm_area_struct *vma,
1794 unsigned long start, unsigned long end,
1795 struct numa_maps *md)
1796{
1797 unsigned long addr;
1798 struct page *page;
1799
1800 for (addr = start; addr < end; addr += HPAGE_SIZE) {
1801 pte_t *ptep = huge_pte_offset(vma->vm_mm, addr & HPAGE_MASK);
1802 pte_t pte;
1803
1804 if (!ptep)
1805 continue;
1806
1807 pte = *ptep;
1808 if (pte_none(pte))
1809 continue;
1810
1811 page = pte_page(pte);
1812 if (!page)
1813 continue;
1814
1815 gather_stats(page, md, pte_dirty(*ptep));
1816 }
1817}
1818#else
1819static inline void check_huge_range(struct vm_area_struct *vma,
1820 unsigned long start, unsigned long end,
1821 struct numa_maps *md)
1822{
1823}
1824#endif
1825
1780int show_numa_map(struct seq_file *m, void *v) 1826int show_numa_map(struct seq_file *m, void *v)
1781{ 1827{
1782 struct task_struct *task = m->private; 1828 struct task_struct *task = m->private;
1783 struct vm_area_struct *vma = v; 1829 struct vm_area_struct *vma = v;
1784 struct numa_maps *md; 1830 struct numa_maps *md;
1831 struct file *file = vma->vm_file;
1832 struct mm_struct *mm = vma->vm_mm;
1785 int n; 1833 int n;
1786 char buffer[50]; 1834 char buffer[50];
1787 1835
1788 if (!vma->vm_mm) 1836 if (!mm)
1789 return 0; 1837 return 0;
1790 1838
1791 md = kzalloc(sizeof(struct numa_maps), GFP_KERNEL); 1839 md = kzalloc(sizeof(struct numa_maps), GFP_KERNEL);
1792 if (!md) 1840 if (!md)
1793 return 0; 1841 return 0;
1794 1842
1795 check_pgd_range(vma, vma->vm_start, vma->vm_end, 1843 mpol_to_str(buffer, sizeof(buffer),
1796 &node_online_map, MPOL_MF_STATS, md); 1844 get_vma_policy(task, vma, vma->vm_start));
1797 1845
1798 if (md->pages) { 1846 seq_printf(m, "%08lx %s", vma->vm_start, buffer);
1799 mpol_to_str(buffer, sizeof(buffer),
1800 get_vma_policy(task, vma, vma->vm_start));
1801 1847
1802 seq_printf(m, "%08lx %s pages=%lu mapped=%lu maxref=%lu", 1848 if (file) {
1803 vma->vm_start, buffer, md->pages, 1849 seq_printf(m, " file=");
1804 md->mapped, md->mapcount_max); 1850 seq_path(m, file->f_vfsmnt, file->f_dentry, "\n\t= ");
1851 } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
1852 seq_printf(m, " heap");
1853 } else if (vma->vm_start <= mm->start_stack &&
1854 vma->vm_end >= mm->start_stack) {
1855 seq_printf(m, " stack");
1856 }
1805 1857
1806 if (md->anon) 1858 if (is_vm_hugetlb_page(vma)) {
1807 seq_printf(m," anon=%lu",md->anon); 1859 check_huge_range(vma, vma->vm_start, vma->vm_end, md);
1860 seq_printf(m, " huge");
1861 } else {
1862 check_pgd_range(vma, vma->vm_start, vma->vm_end,
1863 &node_online_map, MPOL_MF_STATS, md);
1864 }
1808 1865
1809 for_each_online_node(n) 1866 if (!md->pages)
1810 if (md->node[n]) 1867 goto out;
1811 seq_printf(m, " N%d=%lu", n, md->node[n]);
1812 1868
1813 seq_putc(m, '\n'); 1869 if (md->anon)
1814 } 1870 seq_printf(m," anon=%lu",md->anon);
1871
1872 if (md->dirty)
1873 seq_printf(m," dirty=%lu",md->dirty);
1874
1875 if (md->pages != md->anon && md->pages != md->dirty)
1876 seq_printf(m, " mapped=%lu", md->pages);
1877
1878 if (md->mapcount_max > 1)
1879 seq_printf(m, " mapmax=%lu", md->mapcount_max);
1880
1881 if (md->swapcache)
1882 seq_printf(m," swapcache=%lu", md->swapcache);
1883
1884 if (md->active < md->pages && !is_vm_hugetlb_page(vma))
1885 seq_printf(m," active=%lu", md->active);
1886
1887 if (md->writeback)
1888 seq_printf(m," writeback=%lu", md->writeback);
1889
1890 for_each_online_node(n)
1891 if (md->node[n])
1892 seq_printf(m, " N%d=%lu", n, md->node[n]);
1893out:
1894 seq_putc(m, '\n');
1815 kfree(md); 1895 kfree(md);
1816 1896
1817 if (m->count < m->size) 1897 if (m->count < m->size)
diff --git a/mm/nommu.c b/mm/nommu.c
index 99d21020ec9d..4951f4786f28 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -53,7 +53,6 @@ DECLARE_RWSEM(nommu_vma_sem);
53struct vm_operations_struct generic_file_vm_ops = { 53struct vm_operations_struct generic_file_vm_ops = {
54}; 54};
55 55
56EXPORT_SYMBOL(vmalloc);
57EXPORT_SYMBOL(vfree); 56EXPORT_SYMBOL(vfree);
58EXPORT_SYMBOL(vmalloc_to_page); 57EXPORT_SYMBOL(vmalloc_to_page);
59EXPORT_SYMBOL(vmalloc_32); 58EXPORT_SYMBOL(vmalloc_32);
@@ -205,6 +204,13 @@ void *vmalloc(unsigned long size)
205{ 204{
206 return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL); 205 return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
207} 206}
207EXPORT_SYMBOL(vmalloc);
208
209void *vmalloc_node(unsigned long size, int node)
210{
211 return vmalloc(size);
212}
213EXPORT_SYMBOL(vmalloc_node);
208 214
209/* 215/*
210 * vmalloc_32 - allocate virtually continguos memory (32bit addressable) 216 * vmalloc_32 - allocate virtually continguos memory (32bit addressable)
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 8123fad5a485..78747afad6b0 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -302,7 +302,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
302{ 302{
303 struct mm_struct *mm = NULL; 303 struct mm_struct *mm = NULL;
304 task_t *p; 304 task_t *p;
305 unsigned long points; 305 unsigned long points = 0;
306 306
307 if (printk_ratelimit()) { 307 if (printk_ratelimit()) {
308 printk("oom-killer: gfp_mask=0x%x, order=%d\n", 308 printk("oom-killer: gfp_mask=0x%x, order=%d\n",
@@ -355,6 +355,7 @@ retry:
355 } 355 }
356 356
357out: 357out:
358 read_unlock(&tasklist_lock);
358 cpuset_unlock(); 359 cpuset_unlock();
359 if (mm) 360 if (mm)
360 mmput(mm); 361 mmput(mm);
@@ -364,5 +365,5 @@ out:
364 * retry to allocate memory unless "p" is current 365 * retry to allocate memory unless "p" is current
365 */ 366 */
366 if (!test_thread_flag(TIF_MEMDIE)) 367 if (!test_thread_flag(TIF_MEMDIE))
367 schedule_timeout_interruptible(1); 368 schedule_timeout_uninterruptible(1);
368} 369}
diff --git a/mm/rmap.c b/mm/rmap.c
index df2c41c2a9a2..d8ce5ff61454 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -212,25 +212,33 @@ out:
212 * through real pte's pointing to valid pages and then releasing 212 * through real pte's pointing to valid pages and then releasing
213 * the page from the swap cache. 213 * the page from the swap cache.
214 * 214 *
215 * Must hold page lock on page. 215 * Must hold page lock on page and mmap_sem of one vma that contains
216 * the page.
216 */ 217 */
217void remove_from_swap(struct page *page) 218void remove_from_swap(struct page *page)
218{ 219{
219 struct anon_vma *anon_vma; 220 struct anon_vma *anon_vma;
220 struct vm_area_struct *vma; 221 struct vm_area_struct *vma;
222 unsigned long mapping;
221 223
222 if (!PageAnon(page) || !PageSwapCache(page)) 224 if (!PageSwapCache(page))
223 return; 225 return;
224 226
225 anon_vma = page_lock_anon_vma(page); 227 mapping = (unsigned long)page->mapping;
226 if (!anon_vma) 228
229 if (!mapping || (mapping & PAGE_MAPPING_ANON) == 0)
227 return; 230 return;
228 231
232 /*
233 * We hold the mmap_sem lock. So no need to call page_lock_anon_vma.
234 */
235 anon_vma = (struct anon_vma *) (mapping - PAGE_MAPPING_ANON);
236 spin_lock(&anon_vma->lock);
237
229 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) 238 list_for_each_entry(vma, &anon_vma->head, anon_vma_node)
230 remove_vma_swap(vma, page); 239 remove_vma_swap(vma, page);
231 240
232 spin_unlock(&anon_vma->lock); 241 spin_unlock(&anon_vma->lock);
233
234 delete_from_swap_cache(page); 242 delete_from_swap_cache(page);
235} 243}
236EXPORT_SYMBOL(remove_from_swap); 244EXPORT_SYMBOL(remove_from_swap);
diff --git a/mm/slab.c b/mm/slab.c
index add05d808a4a..61800b88e241 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1124,6 +1124,7 @@ void __init kmem_cache_init(void)
1124 struct cache_sizes *sizes; 1124 struct cache_sizes *sizes;
1125 struct cache_names *names; 1125 struct cache_names *names;
1126 int i; 1126 int i;
1127 int order;
1127 1128
1128 for (i = 0; i < NUM_INIT_LISTS; i++) { 1129 for (i = 0; i < NUM_INIT_LISTS; i++) {
1129 kmem_list3_init(&initkmem_list3[i]); 1130 kmem_list3_init(&initkmem_list3[i]);
@@ -1167,11 +1168,15 @@ void __init kmem_cache_init(void)
1167 1168
1168 cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, cache_line_size()); 1169 cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, cache_line_size());
1169 1170
1170 cache_estimate(0, cache_cache.buffer_size, cache_line_size(), 0, 1171 for (order = 0; order < MAX_ORDER; order++) {
1171 &left_over, &cache_cache.num); 1172 cache_estimate(order, cache_cache.buffer_size,
1173 cache_line_size(), 0, &left_over, &cache_cache.num);
1174 if (cache_cache.num)
1175 break;
1176 }
1172 if (!cache_cache.num) 1177 if (!cache_cache.num)
1173 BUG(); 1178 BUG();
1174 1179 cache_cache.gfporder = order;
1175 cache_cache.colour = left_over / cache_cache.colour_off; 1180 cache_cache.colour = left_over / cache_cache.colour_off;
1176 cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) + 1181 cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
1177 sizeof(struct slab), cache_line_size()); 1182 sizeof(struct slab), cache_line_size());
@@ -1628,36 +1633,44 @@ static inline size_t calculate_slab_order(struct kmem_cache *cachep,
1628 size_t size, size_t align, unsigned long flags) 1633 size_t size, size_t align, unsigned long flags)
1629{ 1634{
1630 size_t left_over = 0; 1635 size_t left_over = 0;
1636 int gfporder;
1631 1637
1632 for (;; cachep->gfporder++) { 1638 for (gfporder = 0 ; gfporder <= MAX_GFP_ORDER; gfporder++) {
1633 unsigned int num; 1639 unsigned int num;
1634 size_t remainder; 1640 size_t remainder;
1635 1641
1636 if (cachep->gfporder > MAX_GFP_ORDER) { 1642 cache_estimate(gfporder, size, align, flags, &remainder, &num);
1637 cachep->num = 0;
1638 break;
1639 }
1640
1641 cache_estimate(cachep->gfporder, size, align, flags,
1642 &remainder, &num);
1643 if (!num) 1643 if (!num)
1644 continue; 1644 continue;
1645
1645 /* More than offslab_limit objects will cause problems */ 1646 /* More than offslab_limit objects will cause problems */
1646 if (flags & CFLGS_OFF_SLAB && cachep->num > offslab_limit) 1647 if ((flags & CFLGS_OFF_SLAB) && num > offslab_limit)
1647 break; 1648 break;
1648 1649
1650 /* Found something acceptable - save it away */
1649 cachep->num = num; 1651 cachep->num = num;
1652 cachep->gfporder = gfporder;
1650 left_over = remainder; 1653 left_over = remainder;
1651 1654
1652 /* 1655 /*
1656 * A VFS-reclaimable slab tends to have most allocations
1657 * as GFP_NOFS and we really don't want to have to be allocating
1658 * higher-order pages when we are unable to shrink dcache.
1659 */
1660 if (flags & SLAB_RECLAIM_ACCOUNT)
1661 break;
1662
1663 /*
1653 * Large number of objects is good, but very large slabs are 1664 * Large number of objects is good, but very large slabs are
1654 * currently bad for the gfp()s. 1665 * currently bad for the gfp()s.
1655 */ 1666 */
1656 if (cachep->gfporder >= slab_break_gfp_order) 1667 if (gfporder >= slab_break_gfp_order)
1657 break; 1668 break;
1658 1669
1659 if ((left_over * 8) <= (PAGE_SIZE << cachep->gfporder)) 1670 /*
1660 /* Acceptable internal fragmentation */ 1671 * Acceptable internal fragmentation?
1672 */
1673 if ((left_over * 8) <= (PAGE_SIZE << gfporder))
1661 break; 1674 break;
1662 } 1675 }
1663 return left_over; 1676 return left_over;
@@ -1869,17 +1882,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1869 1882
1870 size = ALIGN(size, align); 1883 size = ALIGN(size, align);
1871 1884
1872 if ((flags & SLAB_RECLAIM_ACCOUNT) && size <= PAGE_SIZE) { 1885 left_over = calculate_slab_order(cachep, size, align, flags);
1873 /*
1874 * A VFS-reclaimable slab tends to have most allocations
1875 * as GFP_NOFS and we really don't want to have to be allocating
1876 * higher-order pages when we are unable to shrink dcache.
1877 */
1878 cachep->gfporder = 0;
1879 cache_estimate(cachep->gfporder, size, align, flags,
1880 &left_over, &cachep->num);
1881 } else
1882 left_over = calculate_slab_order(cachep, size, align, flags);
1883 1886
1884 if (!cachep->num) { 1887 if (!cachep->num) {
1885 printk("kmem_cache_create: couldn't create cache %s.\n", name); 1888 printk("kmem_cache_create: couldn't create cache %s.\n", name);
@@ -2554,7 +2557,7 @@ static void check_slabp(struct kmem_cache *cachep, struct slab *slabp)
2554 "slab: Internal list corruption detected in cache '%s'(%d), slabp %p(%d). Hexdump:\n", 2557 "slab: Internal list corruption detected in cache '%s'(%d), slabp %p(%d). Hexdump:\n",
2555 cachep->name, cachep->num, slabp, slabp->inuse); 2558 cachep->name, cachep->num, slabp, slabp->inuse);
2556 for (i = 0; 2559 for (i = 0;
2557 i < sizeof(slabp) + cachep->num * sizeof(kmem_bufctl_t); 2560 i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t);
2558 i++) { 2561 i++) {
2559 if ((i % 16) == 0) 2562 if ((i % 16) == 0)
2560 printk("\n%03x:", i); 2563 printk("\n%03x:", i);
diff --git a/mm/swap.c b/mm/swap.c
index cce3dda59c59..e9ec06d845e8 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -489,13 +489,34 @@ void percpu_counter_mod(struct percpu_counter *fbc, long amount)
489 if (count >= FBC_BATCH || count <= -FBC_BATCH) { 489 if (count >= FBC_BATCH || count <= -FBC_BATCH) {
490 spin_lock(&fbc->lock); 490 spin_lock(&fbc->lock);
491 fbc->count += count; 491 fbc->count += count;
492 *pcount = 0;
492 spin_unlock(&fbc->lock); 493 spin_unlock(&fbc->lock);
493 count = 0; 494 } else {
495 *pcount = count;
494 } 496 }
495 *pcount = count;
496 put_cpu(); 497 put_cpu();
497} 498}
498EXPORT_SYMBOL(percpu_counter_mod); 499EXPORT_SYMBOL(percpu_counter_mod);
500
501/*
502 * Add up all the per-cpu counts, return the result. This is a more accurate
503 * but much slower version of percpu_counter_read_positive()
504 */
505long percpu_counter_sum(struct percpu_counter *fbc)
506{
507 long ret;
508 int cpu;
509
510 spin_lock(&fbc->lock);
511 ret = fbc->count;
512 for_each_cpu(cpu) {
513 long *pcount = per_cpu_ptr(fbc->counters, cpu);
514 ret += *pcount;
515 }
516 spin_unlock(&fbc->lock);
517 return ret < 0 ? 0 : ret;
518}
519EXPORT_SYMBOL(percpu_counter_sum);
499#endif 520#endif
500 521
501/* 522/*