aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/hugetlb.c10
-rw-r--r--mm/mempolicy.c2
-rw-r--r--mm/page_alloc.c10
-rw-r--r--mm/slab.c186
-rw-r--r--mm/slob.c2
-rw-r--r--mm/swap.c32
-rw-r--r--mm/vmscan.c106
7 files changed, 220 insertions, 128 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index b21d78c941b5..67f29516662a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -107,7 +107,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr)
107 set_page_count(page, 1); 107 set_page_count(page, 1);
108 page[1].mapping = (void *)free_huge_page; 108 page[1].mapping = (void *)free_huge_page;
109 for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); ++i) 109 for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); ++i)
110 clear_highpage(&page[i]); 110 clear_user_highpage(&page[i], addr);
111 return page; 111 return page;
112} 112}
113 113
@@ -391,12 +391,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
391 391
392 if (!new_page) { 392 if (!new_page) {
393 page_cache_release(old_page); 393 page_cache_release(old_page);
394 394 return VM_FAULT_OOM;
395 /* Logically this is OOM, not a SIGBUS, but an OOM
396 * could cause the kernel to go killing other
397 * processes which won't help the hugepage situation
398 * at all (?) */
399 return VM_FAULT_SIGBUS;
400 } 395 }
401 396
402 spin_unlock(&mm->page_table_lock); 397 spin_unlock(&mm->page_table_lock);
@@ -444,6 +439,7 @@ retry:
444 page = alloc_huge_page(vma, address); 439 page = alloc_huge_page(vma, address);
445 if (!page) { 440 if (!page) {
446 hugetlb_put_quota(mapping); 441 hugetlb_put_quota(mapping);
442 ret = VM_FAULT_OOM;
447 goto out; 443 goto out;
448 } 444 }
449 445
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 27da6d5c77ba..3bd7fb7e4b75 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1159,6 +1159,7 @@ static inline unsigned interleave_nid(struct mempolicy *pol,
1159 return interleave_nodes(pol); 1159 return interleave_nodes(pol);
1160} 1160}
1161 1161
1162#ifdef CONFIG_HUGETLBFS
1162/* Return a zonelist suitable for a huge page allocation. */ 1163/* Return a zonelist suitable for a huge page allocation. */
1163struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr) 1164struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr)
1164{ 1165{
@@ -1172,6 +1173,7 @@ struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr)
1172 } 1173 }
1173 return zonelist_policy(GFP_HIGHUSER, pol); 1174 return zonelist_policy(GFP_HIGHUSER, pol);
1174} 1175}
1176#endif
1175 1177
1176/* Allocate a page in interleaved policy. 1178/* Allocate a page in interleaved policy.
1177 Own path because it needs to do special accounting. */ 1179 Own path because it needs to do special accounting. */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 44b4eb4202d9..dde04ff4be31 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1213,18 +1213,21 @@ static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask)
1213{ 1213{
1214 int cpu = 0; 1214 int cpu = 0;
1215 1215
1216 memset(ret, 0, sizeof(*ret)); 1216 memset(ret, 0, nr * sizeof(unsigned long));
1217 cpus_and(*cpumask, *cpumask, cpu_online_map); 1217 cpus_and(*cpumask, *cpumask, cpu_online_map);
1218 1218
1219 cpu = first_cpu(*cpumask); 1219 cpu = first_cpu(*cpumask);
1220 while (cpu < NR_CPUS) { 1220 while (cpu < NR_CPUS) {
1221 unsigned long *in, *out, off; 1221 unsigned long *in, *out, off;
1222 1222
1223 if (!cpu_isset(cpu, *cpumask))
1224 continue;
1225
1223 in = (unsigned long *)&per_cpu(page_states, cpu); 1226 in = (unsigned long *)&per_cpu(page_states, cpu);
1224 1227
1225 cpu = next_cpu(cpu, *cpumask); 1228 cpu = next_cpu(cpu, *cpumask);
1226 1229
1227 if (cpu < NR_CPUS) 1230 if (likely(cpu < NR_CPUS))
1228 prefetch(&per_cpu(page_states, cpu)); 1231 prefetch(&per_cpu(page_states, cpu));
1229 1232
1230 out = (unsigned long *)ret; 1233 out = (unsigned long *)ret;
@@ -1886,8 +1889,7 @@ static void setup_pagelist_highmark(struct per_cpu_pageset *p,
1886 * not check if the processor is online before following the pageset pointer. 1889 * not check if the processor is online before following the pageset pointer.
1887 * Other parts of the kernel may not check if the zone is available. 1890 * Other parts of the kernel may not check if the zone is available.
1888 */ 1891 */
1889static struct per_cpu_pageset 1892static struct per_cpu_pageset boot_pageset[NR_CPUS];
1890 boot_pageset[NR_CPUS];
1891 1893
1892/* 1894/*
1893 * Dynamically allocate memory for the 1895 * Dynamically allocate memory for the
diff --git a/mm/slab.c b/mm/slab.c
index 71370256a7eb..add05d808a4a 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -294,6 +294,7 @@ struct kmem_list3 {
294 unsigned long next_reap; 294 unsigned long next_reap;
295 int free_touched; 295 int free_touched;
296 unsigned int free_limit; 296 unsigned int free_limit;
297 unsigned int colour_next; /* Per-node cache coloring */
297 spinlock_t list_lock; 298 spinlock_t list_lock;
298 struct array_cache *shared; /* shared per node */ 299 struct array_cache *shared; /* shared per node */
299 struct array_cache **alien; /* on other nodes */ 300 struct array_cache **alien; /* on other nodes */
@@ -344,6 +345,7 @@ static void kmem_list3_init(struct kmem_list3 *parent)
344 INIT_LIST_HEAD(&parent->slabs_free); 345 INIT_LIST_HEAD(&parent->slabs_free);
345 parent->shared = NULL; 346 parent->shared = NULL;
346 parent->alien = NULL; 347 parent->alien = NULL;
348 parent->colour_next = 0;
347 spin_lock_init(&parent->list_lock); 349 spin_lock_init(&parent->list_lock);
348 parent->free_objects = 0; 350 parent->free_objects = 0;
349 parent->free_touched = 0; 351 parent->free_touched = 0;
@@ -390,7 +392,6 @@ struct kmem_cache {
390 392
391 size_t colour; /* cache colouring range */ 393 size_t colour; /* cache colouring range */
392 unsigned int colour_off; /* colour offset */ 394 unsigned int colour_off; /* colour offset */
393 unsigned int colour_next; /* cache colouring */
394 struct kmem_cache *slabp_cache; 395 struct kmem_cache *slabp_cache;
395 unsigned int slab_size; 396 unsigned int slab_size;
396 unsigned int dflags; /* dynamic flags */ 397 unsigned int dflags; /* dynamic flags */
@@ -883,14 +884,14 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
883 } 884 }
884} 885}
885 886
886static void drain_alien_cache(struct kmem_cache *cachep, struct kmem_list3 *l3) 887static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **alien)
887{ 888{
888 int i = 0; 889 int i = 0;
889 struct array_cache *ac; 890 struct array_cache *ac;
890 unsigned long flags; 891 unsigned long flags;
891 892
892 for_each_online_node(i) { 893 for_each_online_node(i) {
893 ac = l3->alien[i]; 894 ac = alien[i];
894 if (ac) { 895 if (ac) {
895 spin_lock_irqsave(&ac->lock, flags); 896 spin_lock_irqsave(&ac->lock, flags);
896 __drain_alien_cache(cachep, ac, i); 897 __drain_alien_cache(cachep, ac, i);
@@ -899,9 +900,18 @@ static void drain_alien_cache(struct kmem_cache *cachep, struct kmem_list3 *l3)
899 } 900 }
900} 901}
901#else 902#else
902#define alloc_alien_cache(node, limit) do { } while (0) 903
903#define free_alien_cache(ac_ptr) do { } while (0) 904#define drain_alien_cache(cachep, alien) do { } while (0)
904#define drain_alien_cache(cachep, l3) do { } while (0) 905
906static inline struct array_cache **alloc_alien_cache(int node, int limit)
907{
908 return (struct array_cache **) 0x01020304ul;
909}
910
911static inline void free_alien_cache(struct array_cache **ac_ptr)
912{
913}
914
905#endif 915#endif
906 916
907static int __devinit cpuup_callback(struct notifier_block *nfb, 917static int __devinit cpuup_callback(struct notifier_block *nfb,
@@ -935,6 +945,11 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
935 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + 945 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
936 ((unsigned long)cachep) % REAPTIMEOUT_LIST3; 946 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
937 947
948 /*
949 * The l3s don't come and go as CPUs come and
950 * go. cache_chain_mutex is sufficient
951 * protection here.
952 */
938 cachep->nodelists[node] = l3; 953 cachep->nodelists[node] = l3;
939 } 954 }
940 955
@@ -949,26 +964,46 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
949 & array cache's */ 964 & array cache's */
950 list_for_each_entry(cachep, &cache_chain, next) { 965 list_for_each_entry(cachep, &cache_chain, next) {
951 struct array_cache *nc; 966 struct array_cache *nc;
967 struct array_cache *shared;
968 struct array_cache **alien;
952 969
953 nc = alloc_arraycache(node, cachep->limit, 970 nc = alloc_arraycache(node, cachep->limit,
954 cachep->batchcount); 971 cachep->batchcount);
955 if (!nc) 972 if (!nc)
956 goto bad; 973 goto bad;
974 shared = alloc_arraycache(node,
975 cachep->shared * cachep->batchcount,
976 0xbaadf00d);
977 if (!shared)
978 goto bad;
979
980 alien = alloc_alien_cache(node, cachep->limit);
981 if (!alien)
982 goto bad;
957 cachep->array[cpu] = nc; 983 cachep->array[cpu] = nc;
958 984
959 l3 = cachep->nodelists[node]; 985 l3 = cachep->nodelists[node];
960 BUG_ON(!l3); 986 BUG_ON(!l3);
961 if (!l3->shared) {
962 if (!(nc = alloc_arraycache(node,
963 cachep->shared *
964 cachep->batchcount,
965 0xbaadf00d)))
966 goto bad;
967 987
968 /* we are serialised from CPU_DEAD or 988 spin_lock_irq(&l3->list_lock);
969 CPU_UP_CANCELLED by the cpucontrol lock */ 989 if (!l3->shared) {
970 l3->shared = nc; 990 /*
991 * We are serialised from CPU_DEAD or
992 * CPU_UP_CANCELLED by the cpucontrol lock
993 */
994 l3->shared = shared;
995 shared = NULL;
971 } 996 }
997#ifdef CONFIG_NUMA
998 if (!l3->alien) {
999 l3->alien = alien;
1000 alien = NULL;
1001 }
1002#endif
1003 spin_unlock_irq(&l3->list_lock);
1004
1005 kfree(shared);
1006 free_alien_cache(alien);
972 } 1007 }
973 mutex_unlock(&cache_chain_mutex); 1008 mutex_unlock(&cache_chain_mutex);
974 break; 1009 break;
@@ -977,25 +1012,34 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
977 break; 1012 break;
978#ifdef CONFIG_HOTPLUG_CPU 1013#ifdef CONFIG_HOTPLUG_CPU
979 case CPU_DEAD: 1014 case CPU_DEAD:
1015 /*
1016 * Even if all the cpus of a node are down, we don't free the
1017 * kmem_list3 of any cache. This to avoid a race between
1018 * cpu_down, and a kmalloc allocation from another cpu for
1019 * memory from the node of the cpu going down. The list3
1020 * structure is usually allocated from kmem_cache_create() and
1021 * gets destroyed at kmem_cache_destroy().
1022 */
980 /* fall thru */ 1023 /* fall thru */
981 case CPU_UP_CANCELED: 1024 case CPU_UP_CANCELED:
982 mutex_lock(&cache_chain_mutex); 1025 mutex_lock(&cache_chain_mutex);
983 1026
984 list_for_each_entry(cachep, &cache_chain, next) { 1027 list_for_each_entry(cachep, &cache_chain, next) {
985 struct array_cache *nc; 1028 struct array_cache *nc;
1029 struct array_cache *shared;
1030 struct array_cache **alien;
986 cpumask_t mask; 1031 cpumask_t mask;
987 1032
988 mask = node_to_cpumask(node); 1033 mask = node_to_cpumask(node);
989 spin_lock_irq(&cachep->spinlock);
990 /* cpu is dead; no one can alloc from it. */ 1034 /* cpu is dead; no one can alloc from it. */
991 nc = cachep->array[cpu]; 1035 nc = cachep->array[cpu];
992 cachep->array[cpu] = NULL; 1036 cachep->array[cpu] = NULL;
993 l3 = cachep->nodelists[node]; 1037 l3 = cachep->nodelists[node];
994 1038
995 if (!l3) 1039 if (!l3)
996 goto unlock_cache; 1040 goto free_array_cache;
997 1041
998 spin_lock(&l3->list_lock); 1042 spin_lock_irq(&l3->list_lock);
999 1043
1000 /* Free limit for this kmem_list3 */ 1044 /* Free limit for this kmem_list3 */
1001 l3->free_limit -= cachep->batchcount; 1045 l3->free_limit -= cachep->batchcount;
@@ -1003,34 +1047,44 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
1003 free_block(cachep, nc->entry, nc->avail, node); 1047 free_block(cachep, nc->entry, nc->avail, node);
1004 1048
1005 if (!cpus_empty(mask)) { 1049 if (!cpus_empty(mask)) {
1006 spin_unlock(&l3->list_lock); 1050 spin_unlock_irq(&l3->list_lock);
1007 goto unlock_cache; 1051 goto free_array_cache;
1008 } 1052 }
1009 1053
1010 if (l3->shared) { 1054 shared = l3->shared;
1055 if (shared) {
1011 free_block(cachep, l3->shared->entry, 1056 free_block(cachep, l3->shared->entry,
1012 l3->shared->avail, node); 1057 l3->shared->avail, node);
1013 kfree(l3->shared);
1014 l3->shared = NULL; 1058 l3->shared = NULL;
1015 } 1059 }
1016 if (l3->alien) {
1017 drain_alien_cache(cachep, l3);
1018 free_alien_cache(l3->alien);
1019 l3->alien = NULL;
1020 }
1021 1060
1022 /* free slabs belonging to this node */ 1061 alien = l3->alien;
1023 if (__node_shrink(cachep, node)) { 1062 l3->alien = NULL;
1024 cachep->nodelists[node] = NULL; 1063
1025 spin_unlock(&l3->list_lock); 1064 spin_unlock_irq(&l3->list_lock);
1026 kfree(l3); 1065
1027 } else { 1066 kfree(shared);
1028 spin_unlock(&l3->list_lock); 1067 if (alien) {
1068 drain_alien_cache(cachep, alien);
1069 free_alien_cache(alien);
1029 } 1070 }
1030 unlock_cache: 1071free_array_cache:
1031 spin_unlock_irq(&cachep->spinlock);
1032 kfree(nc); 1072 kfree(nc);
1033 } 1073 }
1074 /*
1075 * In the previous loop, all the objects were freed to
1076 * the respective cache's slabs, now we can go ahead and
1077 * shrink each nodelist to its limit.
1078 */
1079 list_for_each_entry(cachep, &cache_chain, next) {
1080 l3 = cachep->nodelists[node];
1081 if (!l3)
1082 continue;
1083 spin_lock_irq(&l3->list_lock);
1084 /* free slabs belonging to this node */
1085 __node_shrink(cachep, node);
1086 spin_unlock_irq(&l3->list_lock);
1087 }
1034 mutex_unlock(&cache_chain_mutex); 1088 mutex_unlock(&cache_chain_mutex);
1035 break; 1089 break;
1036#endif 1090#endif
@@ -1119,7 +1173,6 @@ void __init kmem_cache_init(void)
1119 BUG(); 1173 BUG();
1120 1174
1121 cache_cache.colour = left_over / cache_cache.colour_off; 1175 cache_cache.colour = left_over / cache_cache.colour_off;
1122 cache_cache.colour_next = 0;
1123 cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) + 1176 cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
1124 sizeof(struct slab), cache_line_size()); 1177 sizeof(struct slab), cache_line_size());
1125 1178
@@ -1664,6 +1717,12 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1664 BUG(); 1717 BUG();
1665 } 1718 }
1666 1719
1720 /*
1721 * Prevent CPUs from coming and going.
1722 * lock_cpu_hotplug() nests outside cache_chain_mutex
1723 */
1724 lock_cpu_hotplug();
1725
1667 mutex_lock(&cache_chain_mutex); 1726 mutex_lock(&cache_chain_mutex);
1668 1727
1669 list_for_each(p, &cache_chain) { 1728 list_for_each(p, &cache_chain) {
@@ -1865,8 +1924,6 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1865 cachep->dtor = dtor; 1924 cachep->dtor = dtor;
1866 cachep->name = name; 1925 cachep->name = name;
1867 1926
1868 /* Don't let CPUs to come and go */
1869 lock_cpu_hotplug();
1870 1927
1871 if (g_cpucache_up == FULL) { 1928 if (g_cpucache_up == FULL) {
1872 enable_cpucache(cachep); 1929 enable_cpucache(cachep);
@@ -1925,12 +1982,12 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1925 1982
1926 /* cache setup completed, link it into the list */ 1983 /* cache setup completed, link it into the list */
1927 list_add(&cachep->next, &cache_chain); 1984 list_add(&cachep->next, &cache_chain);
1928 unlock_cpu_hotplug();
1929 oops: 1985 oops:
1930 if (!cachep && (flags & SLAB_PANIC)) 1986 if (!cachep && (flags & SLAB_PANIC))
1931 panic("kmem_cache_create(): failed to create slab `%s'\n", 1987 panic("kmem_cache_create(): failed to create slab `%s'\n",
1932 name); 1988 name);
1933 mutex_unlock(&cache_chain_mutex); 1989 mutex_unlock(&cache_chain_mutex);
1990 unlock_cpu_hotplug();
1934 return cachep; 1991 return cachep;
1935} 1992}
1936EXPORT_SYMBOL(kmem_cache_create); 1993EXPORT_SYMBOL(kmem_cache_create);
@@ -2011,18 +2068,16 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
2011 2068
2012 smp_call_function_all_cpus(do_drain, cachep); 2069 smp_call_function_all_cpus(do_drain, cachep);
2013 check_irq_on(); 2070 check_irq_on();
2014 spin_lock_irq(&cachep->spinlock);
2015 for_each_online_node(node) { 2071 for_each_online_node(node) {
2016 l3 = cachep->nodelists[node]; 2072 l3 = cachep->nodelists[node];
2017 if (l3) { 2073 if (l3) {
2018 spin_lock(&l3->list_lock); 2074 spin_lock_irq(&l3->list_lock);
2019 drain_array_locked(cachep, l3->shared, 1, node); 2075 drain_array_locked(cachep, l3->shared, 1, node);
2020 spin_unlock(&l3->list_lock); 2076 spin_unlock_irq(&l3->list_lock);
2021 if (l3->alien) 2077 if (l3->alien)
2022 drain_alien_cache(cachep, l3); 2078 drain_alien_cache(cachep, l3->alien);
2023 } 2079 }
2024 } 2080 }
2025 spin_unlock_irq(&cachep->spinlock);
2026} 2081}
2027 2082
2028static int __node_shrink(struct kmem_cache *cachep, int node) 2083static int __node_shrink(struct kmem_cache *cachep, int node)
@@ -2324,20 +2379,20 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
2324 */ 2379 */
2325 ctor_flags |= SLAB_CTOR_ATOMIC; 2380 ctor_flags |= SLAB_CTOR_ATOMIC;
2326 2381
2327 /* About to mess with non-constant members - lock. */ 2382 /* Take the l3 list lock to change the colour_next on this node */
2328 check_irq_off(); 2383 check_irq_off();
2329 spin_lock(&cachep->spinlock); 2384 l3 = cachep->nodelists[nodeid];
2385 spin_lock(&l3->list_lock);
2330 2386
2331 /* Get colour for the slab, and cal the next value. */ 2387 /* Get colour for the slab, and cal the next value. */
2332 offset = cachep->colour_next; 2388 offset = l3->colour_next;
2333 cachep->colour_next++; 2389 l3->colour_next++;
2334 if (cachep->colour_next >= cachep->colour) 2390 if (l3->colour_next >= cachep->colour)
2335 cachep->colour_next = 0; 2391 l3->colour_next = 0;
2336 offset *= cachep->colour_off; 2392 spin_unlock(&l3->list_lock);
2337 2393
2338 spin_unlock(&cachep->spinlock); 2394 offset *= cachep->colour_off;
2339 2395
2340 check_irq_off();
2341 if (local_flags & __GFP_WAIT) 2396 if (local_flags & __GFP_WAIT)
2342 local_irq_enable(); 2397 local_irq_enable();
2343 2398
@@ -2367,7 +2422,6 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
2367 if (local_flags & __GFP_WAIT) 2422 if (local_flags & __GFP_WAIT)
2368 local_irq_disable(); 2423 local_irq_disable();
2369 check_irq_off(); 2424 check_irq_off();
2370 l3 = cachep->nodelists[nodeid];
2371 spin_lock(&l3->list_lock); 2425 spin_lock(&l3->list_lock);
2372 2426
2373 /* Make slab active. */ 2427 /* Make slab active. */
@@ -2725,6 +2779,7 @@ static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int node
2725 BUG_ON(!l3); 2779 BUG_ON(!l3);
2726 2780
2727 retry: 2781 retry:
2782 check_irq_off();
2728 spin_lock(&l3->list_lock); 2783 spin_lock(&l3->list_lock);
2729 entry = l3->slabs_partial.next; 2784 entry = l3->slabs_partial.next;
2730 if (entry == &l3->slabs_partial) { 2785 if (entry == &l3->slabs_partial) {
@@ -3304,11 +3359,11 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, int batchcount
3304 smp_call_function_all_cpus(do_ccupdate_local, (void *)&new); 3359 smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
3305 3360
3306 check_irq_on(); 3361 check_irq_on();
3307 spin_lock_irq(&cachep->spinlock); 3362 spin_lock(&cachep->spinlock);
3308 cachep->batchcount = batchcount; 3363 cachep->batchcount = batchcount;
3309 cachep->limit = limit; 3364 cachep->limit = limit;
3310 cachep->shared = shared; 3365 cachep->shared = shared;
3311 spin_unlock_irq(&cachep->spinlock); 3366 spin_unlock(&cachep->spinlock);
3312 3367
3313 for_each_online_cpu(i) { 3368 for_each_online_cpu(i) {
3314 struct array_cache *ccold = new.new[i]; 3369 struct array_cache *ccold = new.new[i];
@@ -3440,7 +3495,7 @@ static void cache_reap(void *unused)
3440 3495
3441 l3 = searchp->nodelists[numa_node_id()]; 3496 l3 = searchp->nodelists[numa_node_id()];
3442 if (l3->alien) 3497 if (l3->alien)
3443 drain_alien_cache(searchp, l3); 3498 drain_alien_cache(searchp, l3->alien);
3444 spin_lock_irq(&l3->list_lock); 3499 spin_lock_irq(&l3->list_lock);
3445 3500
3446 drain_array_locked(searchp, cpu_cache_get(searchp), 0, 3501 drain_array_locked(searchp, cpu_cache_get(searchp), 0,
@@ -3564,8 +3619,7 @@ static int s_show(struct seq_file *m, void *p)
3564 int node; 3619 int node;
3565 struct kmem_list3 *l3; 3620 struct kmem_list3 *l3;
3566 3621
3567 check_irq_on(); 3622 spin_lock(&cachep->spinlock);
3568 spin_lock_irq(&cachep->spinlock);
3569 active_objs = 0; 3623 active_objs = 0;
3570 num_slabs = 0; 3624 num_slabs = 0;
3571 for_each_online_node(node) { 3625 for_each_online_node(node) {
@@ -3573,7 +3627,8 @@ static int s_show(struct seq_file *m, void *p)
3573 if (!l3) 3627 if (!l3)
3574 continue; 3628 continue;
3575 3629
3576 spin_lock(&l3->list_lock); 3630 check_irq_on();
3631 spin_lock_irq(&l3->list_lock);
3577 3632
3578 list_for_each(q, &l3->slabs_full) { 3633 list_for_each(q, &l3->slabs_full) {
3579 slabp = list_entry(q, struct slab, list); 3634 slabp = list_entry(q, struct slab, list);
@@ -3598,9 +3653,10 @@ static int s_show(struct seq_file *m, void *p)
3598 num_slabs++; 3653 num_slabs++;
3599 } 3654 }
3600 free_objects += l3->free_objects; 3655 free_objects += l3->free_objects;
3601 shared_avail += l3->shared->avail; 3656 if (l3->shared)
3657 shared_avail += l3->shared->avail;
3602 3658
3603 spin_unlock(&l3->list_lock); 3659 spin_unlock_irq(&l3->list_lock);
3604 } 3660 }
3605 num_slabs += active_slabs; 3661 num_slabs += active_slabs;
3606 num_objs = num_slabs * cachep->num; 3662 num_objs = num_slabs * cachep->num;
@@ -3644,7 +3700,7 @@ static int s_show(struct seq_file *m, void *p)
3644 } 3700 }
3645#endif 3701#endif
3646 seq_putc(m, '\n'); 3702 seq_putc(m, '\n');
3647 spin_unlock_irq(&cachep->spinlock); 3703 spin_unlock(&cachep->spinlock);
3648 return 0; 3704 return 0;
3649} 3705}
3650 3706
diff --git a/mm/slob.c b/mm/slob.c
index 1c240c4b71d9..a1f42bdc0245 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -336,7 +336,7 @@ EXPORT_SYMBOL(slab_reclaim_pages);
336 336
337#ifdef CONFIG_SMP 337#ifdef CONFIG_SMP
338 338
339void *__alloc_percpu(size_t size, size_t align) 339void *__alloc_percpu(size_t size)
340{ 340{
341 int i; 341 int i;
342 struct percpu_data *pdata = kmalloc(sizeof (*pdata), GFP_KERNEL); 342 struct percpu_data *pdata = kmalloc(sizeof (*pdata), GFP_KERNEL);
diff --git a/mm/swap.c b/mm/swap.c
index bc2442a7b0ee..76247424dea1 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -34,19 +34,22 @@
34/* How many pages do we try to swap or page in/out together? */ 34/* How many pages do we try to swap or page in/out together? */
35int page_cluster; 35int page_cluster;
36 36
37void put_page(struct page *page) 37static void put_compound_page(struct page *page)
38{ 38{
39 if (unlikely(PageCompound(page))) { 39 page = (struct page *)page_private(page);
40 page = (struct page *)page_private(page); 40 if (put_page_testzero(page)) {
41 if (put_page_testzero(page)) { 41 void (*dtor)(struct page *page);
42 void (*dtor)(struct page *page);
43 42
44 dtor = (void (*)(struct page *))page[1].mapping; 43 dtor = (void (*)(struct page *))page[1].mapping;
45 (*dtor)(page); 44 (*dtor)(page);
46 }
47 return;
48 } 45 }
49 if (put_page_testzero(page)) 46}
47
48void put_page(struct page *page)
49{
50 if (unlikely(PageCompound(page)))
51 put_compound_page(page);
52 else if (put_page_testzero(page))
50 __page_cache_release(page); 53 __page_cache_release(page);
51} 54}
52EXPORT_SYMBOL(put_page); 55EXPORT_SYMBOL(put_page);
@@ -244,6 +247,15 @@ void release_pages(struct page **pages, int nr, int cold)
244 struct page *page = pages[i]; 247 struct page *page = pages[i];
245 struct zone *pagezone; 248 struct zone *pagezone;
246 249
250 if (unlikely(PageCompound(page))) {
251 if (zone) {
252 spin_unlock_irq(&zone->lru_lock);
253 zone = NULL;
254 }
255 put_compound_page(page);
256 continue;
257 }
258
247 if (!put_page_testzero(page)) 259 if (!put_page_testzero(page))
248 continue; 260 continue;
249 261
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5a610804cd06..1838c15ca4fd 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -443,6 +443,10 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc)
443 BUG_ON(PageActive(page)); 443 BUG_ON(PageActive(page));
444 444
445 sc->nr_scanned++; 445 sc->nr_scanned++;
446
447 if (!sc->may_swap && page_mapped(page))
448 goto keep_locked;
449
446 /* Double the slab pressure for mapped and swapcache pages */ 450 /* Double the slab pressure for mapped and swapcache pages */
447 if (page_mapped(page) || PageSwapCache(page)) 451 if (page_mapped(page) || PageSwapCache(page))
448 sc->nr_scanned++; 452 sc->nr_scanned++;
@@ -632,7 +636,7 @@ static int swap_page(struct page *page)
632 struct address_space *mapping = page_mapping(page); 636 struct address_space *mapping = page_mapping(page);
633 637
634 if (page_mapped(page) && mapping) 638 if (page_mapped(page) && mapping)
635 if (try_to_unmap(page, 0) != SWAP_SUCCESS) 639 if (try_to_unmap(page, 1) != SWAP_SUCCESS)
636 goto unlock_retry; 640 goto unlock_retry;
637 641
638 if (PageDirty(page)) { 642 if (PageDirty(page)) {
@@ -839,7 +843,7 @@ EXPORT_SYMBOL(migrate_page);
839 * pages are swapped out. 843 * pages are swapped out.
840 * 844 *
841 * The function returns after 10 attempts or if no pages 845 * The function returns after 10 attempts or if no pages
842 * are movable anymore because t has become empty 846 * are movable anymore because to has become empty
843 * or no retryable pages exist anymore. 847 * or no retryable pages exist anymore.
844 * 848 *
845 * Return: Number of pages not migrated when "to" ran empty. 849 * Return: Number of pages not migrated when "to" ran empty.
@@ -928,12 +932,21 @@ redo:
928 goto unlock_both; 932 goto unlock_both;
929 933
930 if (mapping->a_ops->migratepage) { 934 if (mapping->a_ops->migratepage) {
935 /*
936 * Most pages have a mapping and most filesystems
937 * should provide a migration function. Anonymous
938 * pages are part of swap space which also has its
939 * own migration function. This is the most common
940 * path for page migration.
941 */
931 rc = mapping->a_ops->migratepage(newpage, page); 942 rc = mapping->a_ops->migratepage(newpage, page);
932 goto unlock_both; 943 goto unlock_both;
933 } 944 }
934 945
935 /* 946 /*
936 * Trigger writeout if page is dirty 947 * Default handling if a filesystem does not provide
948 * a migration function. We can only migrate clean
949 * pages so try to write out any dirty pages first.
937 */ 950 */
938 if (PageDirty(page)) { 951 if (PageDirty(page)) {
939 switch (pageout(page, mapping)) { 952 switch (pageout(page, mapping)) {
@@ -949,9 +962,10 @@ redo:
949 ; /* try to migrate the page below */ 962 ; /* try to migrate the page below */
950 } 963 }
951 } 964 }
965
952 /* 966 /*
953 * If we have no buffer or can release the buffer 967 * Buffers are managed in a filesystem specific way.
954 * then do a simple migration. 968 * We must have no buffers or drop them.
955 */ 969 */
956 if (!page_has_buffers(page) || 970 if (!page_has_buffers(page) ||
957 try_to_release_page(page, GFP_KERNEL)) { 971 try_to_release_page(page, GFP_KERNEL)) {
@@ -966,6 +980,11 @@ redo:
966 * swap them out. 980 * swap them out.
967 */ 981 */
968 if (pass > 4) { 982 if (pass > 4) {
983 /*
984 * Persistently unable to drop buffers..... As a
985 * measure of last resort we fall back to
986 * swap_page().
987 */
969 unlock_page(newpage); 988 unlock_page(newpage);
970 newpage = NULL; 989 newpage = NULL;
971 rc = swap_page(page); 990 rc = swap_page(page);
@@ -1176,9 +1195,47 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc)
1176 struct page *page; 1195 struct page *page;
1177 struct pagevec pvec; 1196 struct pagevec pvec;
1178 int reclaim_mapped = 0; 1197 int reclaim_mapped = 0;
1179 long mapped_ratio; 1198
1180 long distress; 1199 if (unlikely(sc->may_swap)) {
1181 long swap_tendency; 1200 long mapped_ratio;
1201 long distress;
1202 long swap_tendency;
1203
1204 /*
1205 * `distress' is a measure of how much trouble we're having
1206 * reclaiming pages. 0 -> no problems. 100 -> great trouble.
1207 */
1208 distress = 100 >> zone->prev_priority;
1209
1210 /*
1211 * The point of this algorithm is to decide when to start
1212 * reclaiming mapped memory instead of just pagecache. Work out
1213 * how much memory
1214 * is mapped.
1215 */
1216 mapped_ratio = (sc->nr_mapped * 100) / total_memory;
1217
1218 /*
1219 * Now decide how much we really want to unmap some pages. The
1220 * mapped ratio is downgraded - just because there's a lot of
1221 * mapped memory doesn't necessarily mean that page reclaim
1222 * isn't succeeding.
1223 *
1224 * The distress ratio is important - we don't want to start
1225 * going oom.
1226 *
1227 * A 100% value of vm_swappiness overrides this algorithm
1228 * altogether.
1229 */
1230 swap_tendency = mapped_ratio / 2 + distress + vm_swappiness;
1231
1232 /*
1233 * Now use this metric to decide whether to start moving mapped
1234 * memory onto the inactive list.
1235 */
1236 if (swap_tendency >= 100)
1237 reclaim_mapped = 1;
1238 }
1182 1239
1183 lru_add_drain(); 1240 lru_add_drain();
1184 spin_lock_irq(&zone->lru_lock); 1241 spin_lock_irq(&zone->lru_lock);
@@ -1188,37 +1245,6 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc)
1188 zone->nr_active -= pgmoved; 1245 zone->nr_active -= pgmoved;
1189 spin_unlock_irq(&zone->lru_lock); 1246 spin_unlock_irq(&zone->lru_lock);
1190 1247
1191 /*
1192 * `distress' is a measure of how much trouble we're having reclaiming
1193 * pages. 0 -> no problems. 100 -> great trouble.
1194 */
1195 distress = 100 >> zone->prev_priority;
1196
1197 /*
1198 * The point of this algorithm is to decide when to start reclaiming
1199 * mapped memory instead of just pagecache. Work out how much memory
1200 * is mapped.
1201 */
1202 mapped_ratio = (sc->nr_mapped * 100) / total_memory;
1203
1204 /*
1205 * Now decide how much we really want to unmap some pages. The mapped
1206 * ratio is downgraded - just because there's a lot of mapped memory
1207 * doesn't necessarily mean that page reclaim isn't succeeding.
1208 *
1209 * The distress ratio is important - we don't want to start going oom.
1210 *
1211 * A 100% value of vm_swappiness overrides this algorithm altogether.
1212 */
1213 swap_tendency = mapped_ratio / 2 + distress + vm_swappiness;
1214
1215 /*
1216 * Now use this metric to decide whether to start moving mapped memory
1217 * onto the inactive list.
1218 */
1219 if (swap_tendency >= 100)
1220 reclaim_mapped = 1;
1221
1222 while (!list_empty(&l_hold)) { 1248 while (!list_empty(&l_hold)) {
1223 cond_resched(); 1249 cond_resched();
1224 page = lru_to_page(&l_hold); 1250 page = lru_to_page(&l_hold);
@@ -1595,9 +1621,7 @@ scan:
1595 sc.nr_reclaimed = 0; 1621 sc.nr_reclaimed = 0;
1596 sc.priority = priority; 1622 sc.priority = priority;
1597 sc.swap_cluster_max = nr_pages? nr_pages : SWAP_CLUSTER_MAX; 1623 sc.swap_cluster_max = nr_pages? nr_pages : SWAP_CLUSTER_MAX;
1598 atomic_inc(&zone->reclaim_in_progress);
1599 shrink_zone(zone, &sc); 1624 shrink_zone(zone, &sc);
1600 atomic_dec(&zone->reclaim_in_progress);
1601 reclaim_state->reclaimed_slab = 0; 1625 reclaim_state->reclaimed_slab = 0;
1602 nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, 1626 nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
1603 lru_pages); 1627 lru_pages);