diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/hugetlb.c | 10 | ||||
-rw-r--r-- | mm/mempolicy.c | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 10 | ||||
-rw-r--r-- | mm/slab.c | 186 | ||||
-rw-r--r-- | mm/slob.c | 2 | ||||
-rw-r--r-- | mm/swap.c | 32 | ||||
-rw-r--r-- | mm/vmscan.c | 106 |
7 files changed, 220 insertions, 128 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index b21d78c941b5..67f29516662a 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -107,7 +107,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr) | |||
107 | set_page_count(page, 1); | 107 | set_page_count(page, 1); |
108 | page[1].mapping = (void *)free_huge_page; | 108 | page[1].mapping = (void *)free_huge_page; |
109 | for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); ++i) | 109 | for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); ++i) |
110 | clear_highpage(&page[i]); | 110 | clear_user_highpage(&page[i], addr); |
111 | return page; | 111 | return page; |
112 | } | 112 | } |
113 | 113 | ||
@@ -391,12 +391,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, | |||
391 | 391 | ||
392 | if (!new_page) { | 392 | if (!new_page) { |
393 | page_cache_release(old_page); | 393 | page_cache_release(old_page); |
394 | 394 | return VM_FAULT_OOM; | |
395 | /* Logically this is OOM, not a SIGBUS, but an OOM | ||
396 | * could cause the kernel to go killing other | ||
397 | * processes which won't help the hugepage situation | ||
398 | * at all (?) */ | ||
399 | return VM_FAULT_SIGBUS; | ||
400 | } | 395 | } |
401 | 396 | ||
402 | spin_unlock(&mm->page_table_lock); | 397 | spin_unlock(&mm->page_table_lock); |
@@ -444,6 +439,7 @@ retry: | |||
444 | page = alloc_huge_page(vma, address); | 439 | page = alloc_huge_page(vma, address); |
445 | if (!page) { | 440 | if (!page) { |
446 | hugetlb_put_quota(mapping); | 441 | hugetlb_put_quota(mapping); |
442 | ret = VM_FAULT_OOM; | ||
447 | goto out; | 443 | goto out; |
448 | } | 444 | } |
449 | 445 | ||
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 27da6d5c77ba..3bd7fb7e4b75 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -1159,6 +1159,7 @@ static inline unsigned interleave_nid(struct mempolicy *pol, | |||
1159 | return interleave_nodes(pol); | 1159 | return interleave_nodes(pol); |
1160 | } | 1160 | } |
1161 | 1161 | ||
1162 | #ifdef CONFIG_HUGETLBFS | ||
1162 | /* Return a zonelist suitable for a huge page allocation. */ | 1163 | /* Return a zonelist suitable for a huge page allocation. */ |
1163 | struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr) | 1164 | struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr) |
1164 | { | 1165 | { |
@@ -1172,6 +1173,7 @@ struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr) | |||
1172 | } | 1173 | } |
1173 | return zonelist_policy(GFP_HIGHUSER, pol); | 1174 | return zonelist_policy(GFP_HIGHUSER, pol); |
1174 | } | 1175 | } |
1176 | #endif | ||
1175 | 1177 | ||
1176 | /* Allocate a page in interleaved policy. | 1178 | /* Allocate a page in interleaved policy. |
1177 | Own path because it needs to do special accounting. */ | 1179 | Own path because it needs to do special accounting. */ |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 44b4eb4202d9..dde04ff4be31 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -1213,18 +1213,21 @@ static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask) | |||
1213 | { | 1213 | { |
1214 | int cpu = 0; | 1214 | int cpu = 0; |
1215 | 1215 | ||
1216 | memset(ret, 0, sizeof(*ret)); | 1216 | memset(ret, 0, nr * sizeof(unsigned long)); |
1217 | cpus_and(*cpumask, *cpumask, cpu_online_map); | 1217 | cpus_and(*cpumask, *cpumask, cpu_online_map); |
1218 | 1218 | ||
1219 | cpu = first_cpu(*cpumask); | 1219 | cpu = first_cpu(*cpumask); |
1220 | while (cpu < NR_CPUS) { | 1220 | while (cpu < NR_CPUS) { |
1221 | unsigned long *in, *out, off; | 1221 | unsigned long *in, *out, off; |
1222 | 1222 | ||
1223 | if (!cpu_isset(cpu, *cpumask)) | ||
1224 | continue; | ||
1225 | |||
1223 | in = (unsigned long *)&per_cpu(page_states, cpu); | 1226 | in = (unsigned long *)&per_cpu(page_states, cpu); |
1224 | 1227 | ||
1225 | cpu = next_cpu(cpu, *cpumask); | 1228 | cpu = next_cpu(cpu, *cpumask); |
1226 | 1229 | ||
1227 | if (cpu < NR_CPUS) | 1230 | if (likely(cpu < NR_CPUS)) |
1228 | prefetch(&per_cpu(page_states, cpu)); | 1231 | prefetch(&per_cpu(page_states, cpu)); |
1229 | 1232 | ||
1230 | out = (unsigned long *)ret; | 1233 | out = (unsigned long *)ret; |
@@ -1886,8 +1889,7 @@ static void setup_pagelist_highmark(struct per_cpu_pageset *p, | |||
1886 | * not check if the processor is online before following the pageset pointer. | 1889 | * not check if the processor is online before following the pageset pointer. |
1887 | * Other parts of the kernel may not check if the zone is available. | 1890 | * Other parts of the kernel may not check if the zone is available. |
1888 | */ | 1891 | */ |
1889 | static struct per_cpu_pageset | 1892 | static struct per_cpu_pageset boot_pageset[NR_CPUS]; |
1890 | boot_pageset[NR_CPUS]; | ||
1891 | 1893 | ||
1892 | /* | 1894 | /* |
1893 | * Dynamically allocate memory for the | 1895 | * Dynamically allocate memory for the |
@@ -294,6 +294,7 @@ struct kmem_list3 { | |||
294 | unsigned long next_reap; | 294 | unsigned long next_reap; |
295 | int free_touched; | 295 | int free_touched; |
296 | unsigned int free_limit; | 296 | unsigned int free_limit; |
297 | unsigned int colour_next; /* Per-node cache coloring */ | ||
297 | spinlock_t list_lock; | 298 | spinlock_t list_lock; |
298 | struct array_cache *shared; /* shared per node */ | 299 | struct array_cache *shared; /* shared per node */ |
299 | struct array_cache **alien; /* on other nodes */ | 300 | struct array_cache **alien; /* on other nodes */ |
@@ -344,6 +345,7 @@ static void kmem_list3_init(struct kmem_list3 *parent) | |||
344 | INIT_LIST_HEAD(&parent->slabs_free); | 345 | INIT_LIST_HEAD(&parent->slabs_free); |
345 | parent->shared = NULL; | 346 | parent->shared = NULL; |
346 | parent->alien = NULL; | 347 | parent->alien = NULL; |
348 | parent->colour_next = 0; | ||
347 | spin_lock_init(&parent->list_lock); | 349 | spin_lock_init(&parent->list_lock); |
348 | parent->free_objects = 0; | 350 | parent->free_objects = 0; |
349 | parent->free_touched = 0; | 351 | parent->free_touched = 0; |
@@ -390,7 +392,6 @@ struct kmem_cache { | |||
390 | 392 | ||
391 | size_t colour; /* cache colouring range */ | 393 | size_t colour; /* cache colouring range */ |
392 | unsigned int colour_off; /* colour offset */ | 394 | unsigned int colour_off; /* colour offset */ |
393 | unsigned int colour_next; /* cache colouring */ | ||
394 | struct kmem_cache *slabp_cache; | 395 | struct kmem_cache *slabp_cache; |
395 | unsigned int slab_size; | 396 | unsigned int slab_size; |
396 | unsigned int dflags; /* dynamic flags */ | 397 | unsigned int dflags; /* dynamic flags */ |
@@ -883,14 +884,14 @@ static void __drain_alien_cache(struct kmem_cache *cachep, | |||
883 | } | 884 | } |
884 | } | 885 | } |
885 | 886 | ||
886 | static void drain_alien_cache(struct kmem_cache *cachep, struct kmem_list3 *l3) | 887 | static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **alien) |
887 | { | 888 | { |
888 | int i = 0; | 889 | int i = 0; |
889 | struct array_cache *ac; | 890 | struct array_cache *ac; |
890 | unsigned long flags; | 891 | unsigned long flags; |
891 | 892 | ||
892 | for_each_online_node(i) { | 893 | for_each_online_node(i) { |
893 | ac = l3->alien[i]; | 894 | ac = alien[i]; |
894 | if (ac) { | 895 | if (ac) { |
895 | spin_lock_irqsave(&ac->lock, flags); | 896 | spin_lock_irqsave(&ac->lock, flags); |
896 | __drain_alien_cache(cachep, ac, i); | 897 | __drain_alien_cache(cachep, ac, i); |
@@ -899,9 +900,18 @@ static void drain_alien_cache(struct kmem_cache *cachep, struct kmem_list3 *l3) | |||
899 | } | 900 | } |
900 | } | 901 | } |
901 | #else | 902 | #else |
902 | #define alloc_alien_cache(node, limit) do { } while (0) | 903 | |
903 | #define free_alien_cache(ac_ptr) do { } while (0) | 904 | #define drain_alien_cache(cachep, alien) do { } while (0) |
904 | #define drain_alien_cache(cachep, l3) do { } while (0) | 905 | |
906 | static inline struct array_cache **alloc_alien_cache(int node, int limit) | ||
907 | { | ||
908 | return (struct array_cache **) 0x01020304ul; | ||
909 | } | ||
910 | |||
911 | static inline void free_alien_cache(struct array_cache **ac_ptr) | ||
912 | { | ||
913 | } | ||
914 | |||
905 | #endif | 915 | #endif |
906 | 916 | ||
907 | static int __devinit cpuup_callback(struct notifier_block *nfb, | 917 | static int __devinit cpuup_callback(struct notifier_block *nfb, |
@@ -935,6 +945,11 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, | |||
935 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + | 945 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + |
936 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; | 946 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; |
937 | 947 | ||
948 | /* | ||
949 | * The l3s don't come and go as CPUs come and | ||
950 | * go. cache_chain_mutex is sufficient | ||
951 | * protection here. | ||
952 | */ | ||
938 | cachep->nodelists[node] = l3; | 953 | cachep->nodelists[node] = l3; |
939 | } | 954 | } |
940 | 955 | ||
@@ -949,26 +964,46 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, | |||
949 | & array cache's */ | 964 | & array cache's */ |
950 | list_for_each_entry(cachep, &cache_chain, next) { | 965 | list_for_each_entry(cachep, &cache_chain, next) { |
951 | struct array_cache *nc; | 966 | struct array_cache *nc; |
967 | struct array_cache *shared; | ||
968 | struct array_cache **alien; | ||
952 | 969 | ||
953 | nc = alloc_arraycache(node, cachep->limit, | 970 | nc = alloc_arraycache(node, cachep->limit, |
954 | cachep->batchcount); | 971 | cachep->batchcount); |
955 | if (!nc) | 972 | if (!nc) |
956 | goto bad; | 973 | goto bad; |
974 | shared = alloc_arraycache(node, | ||
975 | cachep->shared * cachep->batchcount, | ||
976 | 0xbaadf00d); | ||
977 | if (!shared) | ||
978 | goto bad; | ||
979 | |||
980 | alien = alloc_alien_cache(node, cachep->limit); | ||
981 | if (!alien) | ||
982 | goto bad; | ||
957 | cachep->array[cpu] = nc; | 983 | cachep->array[cpu] = nc; |
958 | 984 | ||
959 | l3 = cachep->nodelists[node]; | 985 | l3 = cachep->nodelists[node]; |
960 | BUG_ON(!l3); | 986 | BUG_ON(!l3); |
961 | if (!l3->shared) { | ||
962 | if (!(nc = alloc_arraycache(node, | ||
963 | cachep->shared * | ||
964 | cachep->batchcount, | ||
965 | 0xbaadf00d))) | ||
966 | goto bad; | ||
967 | 987 | ||
968 | /* we are serialised from CPU_DEAD or | 988 | spin_lock_irq(&l3->list_lock); |
969 | CPU_UP_CANCELLED by the cpucontrol lock */ | 989 | if (!l3->shared) { |
970 | l3->shared = nc; | 990 | /* |
991 | * We are serialised from CPU_DEAD or | ||
992 | * CPU_UP_CANCELLED by the cpucontrol lock | ||
993 | */ | ||
994 | l3->shared = shared; | ||
995 | shared = NULL; | ||
971 | } | 996 | } |
997 | #ifdef CONFIG_NUMA | ||
998 | if (!l3->alien) { | ||
999 | l3->alien = alien; | ||
1000 | alien = NULL; | ||
1001 | } | ||
1002 | #endif | ||
1003 | spin_unlock_irq(&l3->list_lock); | ||
1004 | |||
1005 | kfree(shared); | ||
1006 | free_alien_cache(alien); | ||
972 | } | 1007 | } |
973 | mutex_unlock(&cache_chain_mutex); | 1008 | mutex_unlock(&cache_chain_mutex); |
974 | break; | 1009 | break; |
@@ -977,25 +1012,34 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, | |||
977 | break; | 1012 | break; |
978 | #ifdef CONFIG_HOTPLUG_CPU | 1013 | #ifdef CONFIG_HOTPLUG_CPU |
979 | case CPU_DEAD: | 1014 | case CPU_DEAD: |
1015 | /* | ||
1016 | * Even if all the cpus of a node are down, we don't free the | ||
1017 | * kmem_list3 of any cache. This to avoid a race between | ||
1018 | * cpu_down, and a kmalloc allocation from another cpu for | ||
1019 | * memory from the node of the cpu going down. The list3 | ||
1020 | * structure is usually allocated from kmem_cache_create() and | ||
1021 | * gets destroyed at kmem_cache_destroy(). | ||
1022 | */ | ||
980 | /* fall thru */ | 1023 | /* fall thru */ |
981 | case CPU_UP_CANCELED: | 1024 | case CPU_UP_CANCELED: |
982 | mutex_lock(&cache_chain_mutex); | 1025 | mutex_lock(&cache_chain_mutex); |
983 | 1026 | ||
984 | list_for_each_entry(cachep, &cache_chain, next) { | 1027 | list_for_each_entry(cachep, &cache_chain, next) { |
985 | struct array_cache *nc; | 1028 | struct array_cache *nc; |
1029 | struct array_cache *shared; | ||
1030 | struct array_cache **alien; | ||
986 | cpumask_t mask; | 1031 | cpumask_t mask; |
987 | 1032 | ||
988 | mask = node_to_cpumask(node); | 1033 | mask = node_to_cpumask(node); |
989 | spin_lock_irq(&cachep->spinlock); | ||
990 | /* cpu is dead; no one can alloc from it. */ | 1034 | /* cpu is dead; no one can alloc from it. */ |
991 | nc = cachep->array[cpu]; | 1035 | nc = cachep->array[cpu]; |
992 | cachep->array[cpu] = NULL; | 1036 | cachep->array[cpu] = NULL; |
993 | l3 = cachep->nodelists[node]; | 1037 | l3 = cachep->nodelists[node]; |
994 | 1038 | ||
995 | if (!l3) | 1039 | if (!l3) |
996 | goto unlock_cache; | 1040 | goto free_array_cache; |
997 | 1041 | ||
998 | spin_lock(&l3->list_lock); | 1042 | spin_lock_irq(&l3->list_lock); |
999 | 1043 | ||
1000 | /* Free limit for this kmem_list3 */ | 1044 | /* Free limit for this kmem_list3 */ |
1001 | l3->free_limit -= cachep->batchcount; | 1045 | l3->free_limit -= cachep->batchcount; |
@@ -1003,34 +1047,44 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, | |||
1003 | free_block(cachep, nc->entry, nc->avail, node); | 1047 | free_block(cachep, nc->entry, nc->avail, node); |
1004 | 1048 | ||
1005 | if (!cpus_empty(mask)) { | 1049 | if (!cpus_empty(mask)) { |
1006 | spin_unlock(&l3->list_lock); | 1050 | spin_unlock_irq(&l3->list_lock); |
1007 | goto unlock_cache; | 1051 | goto free_array_cache; |
1008 | } | 1052 | } |
1009 | 1053 | ||
1010 | if (l3->shared) { | 1054 | shared = l3->shared; |
1055 | if (shared) { | ||
1011 | free_block(cachep, l3->shared->entry, | 1056 | free_block(cachep, l3->shared->entry, |
1012 | l3->shared->avail, node); | 1057 | l3->shared->avail, node); |
1013 | kfree(l3->shared); | ||
1014 | l3->shared = NULL; | 1058 | l3->shared = NULL; |
1015 | } | 1059 | } |
1016 | if (l3->alien) { | ||
1017 | drain_alien_cache(cachep, l3); | ||
1018 | free_alien_cache(l3->alien); | ||
1019 | l3->alien = NULL; | ||
1020 | } | ||
1021 | 1060 | ||
1022 | /* free slabs belonging to this node */ | 1061 | alien = l3->alien; |
1023 | if (__node_shrink(cachep, node)) { | 1062 | l3->alien = NULL; |
1024 | cachep->nodelists[node] = NULL; | 1063 | |
1025 | spin_unlock(&l3->list_lock); | 1064 | spin_unlock_irq(&l3->list_lock); |
1026 | kfree(l3); | 1065 | |
1027 | } else { | 1066 | kfree(shared); |
1028 | spin_unlock(&l3->list_lock); | 1067 | if (alien) { |
1068 | drain_alien_cache(cachep, alien); | ||
1069 | free_alien_cache(alien); | ||
1029 | } | 1070 | } |
1030 | unlock_cache: | 1071 | free_array_cache: |
1031 | spin_unlock_irq(&cachep->spinlock); | ||
1032 | kfree(nc); | 1072 | kfree(nc); |
1033 | } | 1073 | } |
1074 | /* | ||
1075 | * In the previous loop, all the objects were freed to | ||
1076 | * the respective cache's slabs, now we can go ahead and | ||
1077 | * shrink each nodelist to its limit. | ||
1078 | */ | ||
1079 | list_for_each_entry(cachep, &cache_chain, next) { | ||
1080 | l3 = cachep->nodelists[node]; | ||
1081 | if (!l3) | ||
1082 | continue; | ||
1083 | spin_lock_irq(&l3->list_lock); | ||
1084 | /* free slabs belonging to this node */ | ||
1085 | __node_shrink(cachep, node); | ||
1086 | spin_unlock_irq(&l3->list_lock); | ||
1087 | } | ||
1034 | mutex_unlock(&cache_chain_mutex); | 1088 | mutex_unlock(&cache_chain_mutex); |
1035 | break; | 1089 | break; |
1036 | #endif | 1090 | #endif |
@@ -1119,7 +1173,6 @@ void __init kmem_cache_init(void) | |||
1119 | BUG(); | 1173 | BUG(); |
1120 | 1174 | ||
1121 | cache_cache.colour = left_over / cache_cache.colour_off; | 1175 | cache_cache.colour = left_over / cache_cache.colour_off; |
1122 | cache_cache.colour_next = 0; | ||
1123 | cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) + | 1176 | cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) + |
1124 | sizeof(struct slab), cache_line_size()); | 1177 | sizeof(struct slab), cache_line_size()); |
1125 | 1178 | ||
@@ -1664,6 +1717,12 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
1664 | BUG(); | 1717 | BUG(); |
1665 | } | 1718 | } |
1666 | 1719 | ||
1720 | /* | ||
1721 | * Prevent CPUs from coming and going. | ||
1722 | * lock_cpu_hotplug() nests outside cache_chain_mutex | ||
1723 | */ | ||
1724 | lock_cpu_hotplug(); | ||
1725 | |||
1667 | mutex_lock(&cache_chain_mutex); | 1726 | mutex_lock(&cache_chain_mutex); |
1668 | 1727 | ||
1669 | list_for_each(p, &cache_chain) { | 1728 | list_for_each(p, &cache_chain) { |
@@ -1865,8 +1924,6 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
1865 | cachep->dtor = dtor; | 1924 | cachep->dtor = dtor; |
1866 | cachep->name = name; | 1925 | cachep->name = name; |
1867 | 1926 | ||
1868 | /* Don't let CPUs to come and go */ | ||
1869 | lock_cpu_hotplug(); | ||
1870 | 1927 | ||
1871 | if (g_cpucache_up == FULL) { | 1928 | if (g_cpucache_up == FULL) { |
1872 | enable_cpucache(cachep); | 1929 | enable_cpucache(cachep); |
@@ -1925,12 +1982,12 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
1925 | 1982 | ||
1926 | /* cache setup completed, link it into the list */ | 1983 | /* cache setup completed, link it into the list */ |
1927 | list_add(&cachep->next, &cache_chain); | 1984 | list_add(&cachep->next, &cache_chain); |
1928 | unlock_cpu_hotplug(); | ||
1929 | oops: | 1985 | oops: |
1930 | if (!cachep && (flags & SLAB_PANIC)) | 1986 | if (!cachep && (flags & SLAB_PANIC)) |
1931 | panic("kmem_cache_create(): failed to create slab `%s'\n", | 1987 | panic("kmem_cache_create(): failed to create slab `%s'\n", |
1932 | name); | 1988 | name); |
1933 | mutex_unlock(&cache_chain_mutex); | 1989 | mutex_unlock(&cache_chain_mutex); |
1990 | unlock_cpu_hotplug(); | ||
1934 | return cachep; | 1991 | return cachep; |
1935 | } | 1992 | } |
1936 | EXPORT_SYMBOL(kmem_cache_create); | 1993 | EXPORT_SYMBOL(kmem_cache_create); |
@@ -2011,18 +2068,16 @@ static void drain_cpu_caches(struct kmem_cache *cachep) | |||
2011 | 2068 | ||
2012 | smp_call_function_all_cpus(do_drain, cachep); | 2069 | smp_call_function_all_cpus(do_drain, cachep); |
2013 | check_irq_on(); | 2070 | check_irq_on(); |
2014 | spin_lock_irq(&cachep->spinlock); | ||
2015 | for_each_online_node(node) { | 2071 | for_each_online_node(node) { |
2016 | l3 = cachep->nodelists[node]; | 2072 | l3 = cachep->nodelists[node]; |
2017 | if (l3) { | 2073 | if (l3) { |
2018 | spin_lock(&l3->list_lock); | 2074 | spin_lock_irq(&l3->list_lock); |
2019 | drain_array_locked(cachep, l3->shared, 1, node); | 2075 | drain_array_locked(cachep, l3->shared, 1, node); |
2020 | spin_unlock(&l3->list_lock); | 2076 | spin_unlock_irq(&l3->list_lock); |
2021 | if (l3->alien) | 2077 | if (l3->alien) |
2022 | drain_alien_cache(cachep, l3); | 2078 | drain_alien_cache(cachep, l3->alien); |
2023 | } | 2079 | } |
2024 | } | 2080 | } |
2025 | spin_unlock_irq(&cachep->spinlock); | ||
2026 | } | 2081 | } |
2027 | 2082 | ||
2028 | static int __node_shrink(struct kmem_cache *cachep, int node) | 2083 | static int __node_shrink(struct kmem_cache *cachep, int node) |
@@ -2324,20 +2379,20 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
2324 | */ | 2379 | */ |
2325 | ctor_flags |= SLAB_CTOR_ATOMIC; | 2380 | ctor_flags |= SLAB_CTOR_ATOMIC; |
2326 | 2381 | ||
2327 | /* About to mess with non-constant members - lock. */ | 2382 | /* Take the l3 list lock to change the colour_next on this node */ |
2328 | check_irq_off(); | 2383 | check_irq_off(); |
2329 | spin_lock(&cachep->spinlock); | 2384 | l3 = cachep->nodelists[nodeid]; |
2385 | spin_lock(&l3->list_lock); | ||
2330 | 2386 | ||
2331 | /* Get colour for the slab, and cal the next value. */ | 2387 | /* Get colour for the slab, and cal the next value. */ |
2332 | offset = cachep->colour_next; | 2388 | offset = l3->colour_next; |
2333 | cachep->colour_next++; | 2389 | l3->colour_next++; |
2334 | if (cachep->colour_next >= cachep->colour) | 2390 | if (l3->colour_next >= cachep->colour) |
2335 | cachep->colour_next = 0; | 2391 | l3->colour_next = 0; |
2336 | offset *= cachep->colour_off; | 2392 | spin_unlock(&l3->list_lock); |
2337 | 2393 | ||
2338 | spin_unlock(&cachep->spinlock); | 2394 | offset *= cachep->colour_off; |
2339 | 2395 | ||
2340 | check_irq_off(); | ||
2341 | if (local_flags & __GFP_WAIT) | 2396 | if (local_flags & __GFP_WAIT) |
2342 | local_irq_enable(); | 2397 | local_irq_enable(); |
2343 | 2398 | ||
@@ -2367,7 +2422,6 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
2367 | if (local_flags & __GFP_WAIT) | 2422 | if (local_flags & __GFP_WAIT) |
2368 | local_irq_disable(); | 2423 | local_irq_disable(); |
2369 | check_irq_off(); | 2424 | check_irq_off(); |
2370 | l3 = cachep->nodelists[nodeid]; | ||
2371 | spin_lock(&l3->list_lock); | 2425 | spin_lock(&l3->list_lock); |
2372 | 2426 | ||
2373 | /* Make slab active. */ | 2427 | /* Make slab active. */ |
@@ -2725,6 +2779,7 @@ static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int node | |||
2725 | BUG_ON(!l3); | 2779 | BUG_ON(!l3); |
2726 | 2780 | ||
2727 | retry: | 2781 | retry: |
2782 | check_irq_off(); | ||
2728 | spin_lock(&l3->list_lock); | 2783 | spin_lock(&l3->list_lock); |
2729 | entry = l3->slabs_partial.next; | 2784 | entry = l3->slabs_partial.next; |
2730 | if (entry == &l3->slabs_partial) { | 2785 | if (entry == &l3->slabs_partial) { |
@@ -3304,11 +3359,11 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, int batchcount | |||
3304 | smp_call_function_all_cpus(do_ccupdate_local, (void *)&new); | 3359 | smp_call_function_all_cpus(do_ccupdate_local, (void *)&new); |
3305 | 3360 | ||
3306 | check_irq_on(); | 3361 | check_irq_on(); |
3307 | spin_lock_irq(&cachep->spinlock); | 3362 | spin_lock(&cachep->spinlock); |
3308 | cachep->batchcount = batchcount; | 3363 | cachep->batchcount = batchcount; |
3309 | cachep->limit = limit; | 3364 | cachep->limit = limit; |
3310 | cachep->shared = shared; | 3365 | cachep->shared = shared; |
3311 | spin_unlock_irq(&cachep->spinlock); | 3366 | spin_unlock(&cachep->spinlock); |
3312 | 3367 | ||
3313 | for_each_online_cpu(i) { | 3368 | for_each_online_cpu(i) { |
3314 | struct array_cache *ccold = new.new[i]; | 3369 | struct array_cache *ccold = new.new[i]; |
@@ -3440,7 +3495,7 @@ static void cache_reap(void *unused) | |||
3440 | 3495 | ||
3441 | l3 = searchp->nodelists[numa_node_id()]; | 3496 | l3 = searchp->nodelists[numa_node_id()]; |
3442 | if (l3->alien) | 3497 | if (l3->alien) |
3443 | drain_alien_cache(searchp, l3); | 3498 | drain_alien_cache(searchp, l3->alien); |
3444 | spin_lock_irq(&l3->list_lock); | 3499 | spin_lock_irq(&l3->list_lock); |
3445 | 3500 | ||
3446 | drain_array_locked(searchp, cpu_cache_get(searchp), 0, | 3501 | drain_array_locked(searchp, cpu_cache_get(searchp), 0, |
@@ -3564,8 +3619,7 @@ static int s_show(struct seq_file *m, void *p) | |||
3564 | int node; | 3619 | int node; |
3565 | struct kmem_list3 *l3; | 3620 | struct kmem_list3 *l3; |
3566 | 3621 | ||
3567 | check_irq_on(); | 3622 | spin_lock(&cachep->spinlock); |
3568 | spin_lock_irq(&cachep->spinlock); | ||
3569 | active_objs = 0; | 3623 | active_objs = 0; |
3570 | num_slabs = 0; | 3624 | num_slabs = 0; |
3571 | for_each_online_node(node) { | 3625 | for_each_online_node(node) { |
@@ -3573,7 +3627,8 @@ static int s_show(struct seq_file *m, void *p) | |||
3573 | if (!l3) | 3627 | if (!l3) |
3574 | continue; | 3628 | continue; |
3575 | 3629 | ||
3576 | spin_lock(&l3->list_lock); | 3630 | check_irq_on(); |
3631 | spin_lock_irq(&l3->list_lock); | ||
3577 | 3632 | ||
3578 | list_for_each(q, &l3->slabs_full) { | 3633 | list_for_each(q, &l3->slabs_full) { |
3579 | slabp = list_entry(q, struct slab, list); | 3634 | slabp = list_entry(q, struct slab, list); |
@@ -3598,9 +3653,10 @@ static int s_show(struct seq_file *m, void *p) | |||
3598 | num_slabs++; | 3653 | num_slabs++; |
3599 | } | 3654 | } |
3600 | free_objects += l3->free_objects; | 3655 | free_objects += l3->free_objects; |
3601 | shared_avail += l3->shared->avail; | 3656 | if (l3->shared) |
3657 | shared_avail += l3->shared->avail; | ||
3602 | 3658 | ||
3603 | spin_unlock(&l3->list_lock); | 3659 | spin_unlock_irq(&l3->list_lock); |
3604 | } | 3660 | } |
3605 | num_slabs += active_slabs; | 3661 | num_slabs += active_slabs; |
3606 | num_objs = num_slabs * cachep->num; | 3662 | num_objs = num_slabs * cachep->num; |
@@ -3644,7 +3700,7 @@ static int s_show(struct seq_file *m, void *p) | |||
3644 | } | 3700 | } |
3645 | #endif | 3701 | #endif |
3646 | seq_putc(m, '\n'); | 3702 | seq_putc(m, '\n'); |
3647 | spin_unlock_irq(&cachep->spinlock); | 3703 | spin_unlock(&cachep->spinlock); |
3648 | return 0; | 3704 | return 0; |
3649 | } | 3705 | } |
3650 | 3706 | ||
@@ -336,7 +336,7 @@ EXPORT_SYMBOL(slab_reclaim_pages); | |||
336 | 336 | ||
337 | #ifdef CONFIG_SMP | 337 | #ifdef CONFIG_SMP |
338 | 338 | ||
339 | void *__alloc_percpu(size_t size, size_t align) | 339 | void *__alloc_percpu(size_t size) |
340 | { | 340 | { |
341 | int i; | 341 | int i; |
342 | struct percpu_data *pdata = kmalloc(sizeof (*pdata), GFP_KERNEL); | 342 | struct percpu_data *pdata = kmalloc(sizeof (*pdata), GFP_KERNEL); |
@@ -34,19 +34,22 @@ | |||
34 | /* How many pages do we try to swap or page in/out together? */ | 34 | /* How many pages do we try to swap or page in/out together? */ |
35 | int page_cluster; | 35 | int page_cluster; |
36 | 36 | ||
37 | void put_page(struct page *page) | 37 | static void put_compound_page(struct page *page) |
38 | { | 38 | { |
39 | if (unlikely(PageCompound(page))) { | 39 | page = (struct page *)page_private(page); |
40 | page = (struct page *)page_private(page); | 40 | if (put_page_testzero(page)) { |
41 | if (put_page_testzero(page)) { | 41 | void (*dtor)(struct page *page); |
42 | void (*dtor)(struct page *page); | ||
43 | 42 | ||
44 | dtor = (void (*)(struct page *))page[1].mapping; | 43 | dtor = (void (*)(struct page *))page[1].mapping; |
45 | (*dtor)(page); | 44 | (*dtor)(page); |
46 | } | ||
47 | return; | ||
48 | } | 45 | } |
49 | if (put_page_testzero(page)) | 46 | } |
47 | |||
48 | void put_page(struct page *page) | ||
49 | { | ||
50 | if (unlikely(PageCompound(page))) | ||
51 | put_compound_page(page); | ||
52 | else if (put_page_testzero(page)) | ||
50 | __page_cache_release(page); | 53 | __page_cache_release(page); |
51 | } | 54 | } |
52 | EXPORT_SYMBOL(put_page); | 55 | EXPORT_SYMBOL(put_page); |
@@ -244,6 +247,15 @@ void release_pages(struct page **pages, int nr, int cold) | |||
244 | struct page *page = pages[i]; | 247 | struct page *page = pages[i]; |
245 | struct zone *pagezone; | 248 | struct zone *pagezone; |
246 | 249 | ||
250 | if (unlikely(PageCompound(page))) { | ||
251 | if (zone) { | ||
252 | spin_unlock_irq(&zone->lru_lock); | ||
253 | zone = NULL; | ||
254 | } | ||
255 | put_compound_page(page); | ||
256 | continue; | ||
257 | } | ||
258 | |||
247 | if (!put_page_testzero(page)) | 259 | if (!put_page_testzero(page)) |
248 | continue; | 260 | continue; |
249 | 261 | ||
diff --git a/mm/vmscan.c b/mm/vmscan.c index 5a610804cd06..1838c15ca4fd 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -443,6 +443,10 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc) | |||
443 | BUG_ON(PageActive(page)); | 443 | BUG_ON(PageActive(page)); |
444 | 444 | ||
445 | sc->nr_scanned++; | 445 | sc->nr_scanned++; |
446 | |||
447 | if (!sc->may_swap && page_mapped(page)) | ||
448 | goto keep_locked; | ||
449 | |||
446 | /* Double the slab pressure for mapped and swapcache pages */ | 450 | /* Double the slab pressure for mapped and swapcache pages */ |
447 | if (page_mapped(page) || PageSwapCache(page)) | 451 | if (page_mapped(page) || PageSwapCache(page)) |
448 | sc->nr_scanned++; | 452 | sc->nr_scanned++; |
@@ -632,7 +636,7 @@ static int swap_page(struct page *page) | |||
632 | struct address_space *mapping = page_mapping(page); | 636 | struct address_space *mapping = page_mapping(page); |
633 | 637 | ||
634 | if (page_mapped(page) && mapping) | 638 | if (page_mapped(page) && mapping) |
635 | if (try_to_unmap(page, 0) != SWAP_SUCCESS) | 639 | if (try_to_unmap(page, 1) != SWAP_SUCCESS) |
636 | goto unlock_retry; | 640 | goto unlock_retry; |
637 | 641 | ||
638 | if (PageDirty(page)) { | 642 | if (PageDirty(page)) { |
@@ -839,7 +843,7 @@ EXPORT_SYMBOL(migrate_page); | |||
839 | * pages are swapped out. | 843 | * pages are swapped out. |
840 | * | 844 | * |
841 | * The function returns after 10 attempts or if no pages | 845 | * The function returns after 10 attempts or if no pages |
842 | * are movable anymore because t has become empty | 846 | * are movable anymore because to has become empty |
843 | * or no retryable pages exist anymore. | 847 | * or no retryable pages exist anymore. |
844 | * | 848 | * |
845 | * Return: Number of pages not migrated when "to" ran empty. | 849 | * Return: Number of pages not migrated when "to" ran empty. |
@@ -928,12 +932,21 @@ redo: | |||
928 | goto unlock_both; | 932 | goto unlock_both; |
929 | 933 | ||
930 | if (mapping->a_ops->migratepage) { | 934 | if (mapping->a_ops->migratepage) { |
935 | /* | ||
936 | * Most pages have a mapping and most filesystems | ||
937 | * should provide a migration function. Anonymous | ||
938 | * pages are part of swap space which also has its | ||
939 | * own migration function. This is the most common | ||
940 | * path for page migration. | ||
941 | */ | ||
931 | rc = mapping->a_ops->migratepage(newpage, page); | 942 | rc = mapping->a_ops->migratepage(newpage, page); |
932 | goto unlock_both; | 943 | goto unlock_both; |
933 | } | 944 | } |
934 | 945 | ||
935 | /* | 946 | /* |
936 | * Trigger writeout if page is dirty | 947 | * Default handling if a filesystem does not provide |
948 | * a migration function. We can only migrate clean | ||
949 | * pages so try to write out any dirty pages first. | ||
937 | */ | 950 | */ |
938 | if (PageDirty(page)) { | 951 | if (PageDirty(page)) { |
939 | switch (pageout(page, mapping)) { | 952 | switch (pageout(page, mapping)) { |
@@ -949,9 +962,10 @@ redo: | |||
949 | ; /* try to migrate the page below */ | 962 | ; /* try to migrate the page below */ |
950 | } | 963 | } |
951 | } | 964 | } |
965 | |||
952 | /* | 966 | /* |
953 | * If we have no buffer or can release the buffer | 967 | * Buffers are managed in a filesystem specific way. |
954 | * then do a simple migration. | 968 | * We must have no buffers or drop them. |
955 | */ | 969 | */ |
956 | if (!page_has_buffers(page) || | 970 | if (!page_has_buffers(page) || |
957 | try_to_release_page(page, GFP_KERNEL)) { | 971 | try_to_release_page(page, GFP_KERNEL)) { |
@@ -966,6 +980,11 @@ redo: | |||
966 | * swap them out. | 980 | * swap them out. |
967 | */ | 981 | */ |
968 | if (pass > 4) { | 982 | if (pass > 4) { |
983 | /* | ||
984 | * Persistently unable to drop buffers..... As a | ||
985 | * measure of last resort we fall back to | ||
986 | * swap_page(). | ||
987 | */ | ||
969 | unlock_page(newpage); | 988 | unlock_page(newpage); |
970 | newpage = NULL; | 989 | newpage = NULL; |
971 | rc = swap_page(page); | 990 | rc = swap_page(page); |
@@ -1176,9 +1195,47 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) | |||
1176 | struct page *page; | 1195 | struct page *page; |
1177 | struct pagevec pvec; | 1196 | struct pagevec pvec; |
1178 | int reclaim_mapped = 0; | 1197 | int reclaim_mapped = 0; |
1179 | long mapped_ratio; | 1198 | |
1180 | long distress; | 1199 | if (unlikely(sc->may_swap)) { |
1181 | long swap_tendency; | 1200 | long mapped_ratio; |
1201 | long distress; | ||
1202 | long swap_tendency; | ||
1203 | |||
1204 | /* | ||
1205 | * `distress' is a measure of how much trouble we're having | ||
1206 | * reclaiming pages. 0 -> no problems. 100 -> great trouble. | ||
1207 | */ | ||
1208 | distress = 100 >> zone->prev_priority; | ||
1209 | |||
1210 | /* | ||
1211 | * The point of this algorithm is to decide when to start | ||
1212 | * reclaiming mapped memory instead of just pagecache. Work out | ||
1213 | * how much memory | ||
1214 | * is mapped. | ||
1215 | */ | ||
1216 | mapped_ratio = (sc->nr_mapped * 100) / total_memory; | ||
1217 | |||
1218 | /* | ||
1219 | * Now decide how much we really want to unmap some pages. The | ||
1220 | * mapped ratio is downgraded - just because there's a lot of | ||
1221 | * mapped memory doesn't necessarily mean that page reclaim | ||
1222 | * isn't succeeding. | ||
1223 | * | ||
1224 | * The distress ratio is important - we don't want to start | ||
1225 | * going oom. | ||
1226 | * | ||
1227 | * A 100% value of vm_swappiness overrides this algorithm | ||
1228 | * altogether. | ||
1229 | */ | ||
1230 | swap_tendency = mapped_ratio / 2 + distress + vm_swappiness; | ||
1231 | |||
1232 | /* | ||
1233 | * Now use this metric to decide whether to start moving mapped | ||
1234 | * memory onto the inactive list. | ||
1235 | */ | ||
1236 | if (swap_tendency >= 100) | ||
1237 | reclaim_mapped = 1; | ||
1238 | } | ||
1182 | 1239 | ||
1183 | lru_add_drain(); | 1240 | lru_add_drain(); |
1184 | spin_lock_irq(&zone->lru_lock); | 1241 | spin_lock_irq(&zone->lru_lock); |
@@ -1188,37 +1245,6 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) | |||
1188 | zone->nr_active -= pgmoved; | 1245 | zone->nr_active -= pgmoved; |
1189 | spin_unlock_irq(&zone->lru_lock); | 1246 | spin_unlock_irq(&zone->lru_lock); |
1190 | 1247 | ||
1191 | /* | ||
1192 | * `distress' is a measure of how much trouble we're having reclaiming | ||
1193 | * pages. 0 -> no problems. 100 -> great trouble. | ||
1194 | */ | ||
1195 | distress = 100 >> zone->prev_priority; | ||
1196 | |||
1197 | /* | ||
1198 | * The point of this algorithm is to decide when to start reclaiming | ||
1199 | * mapped memory instead of just pagecache. Work out how much memory | ||
1200 | * is mapped. | ||
1201 | */ | ||
1202 | mapped_ratio = (sc->nr_mapped * 100) / total_memory; | ||
1203 | |||
1204 | /* | ||
1205 | * Now decide how much we really want to unmap some pages. The mapped | ||
1206 | * ratio is downgraded - just because there's a lot of mapped memory | ||
1207 | * doesn't necessarily mean that page reclaim isn't succeeding. | ||
1208 | * | ||
1209 | * The distress ratio is important - we don't want to start going oom. | ||
1210 | * | ||
1211 | * A 100% value of vm_swappiness overrides this algorithm altogether. | ||
1212 | */ | ||
1213 | swap_tendency = mapped_ratio / 2 + distress + vm_swappiness; | ||
1214 | |||
1215 | /* | ||
1216 | * Now use this metric to decide whether to start moving mapped memory | ||
1217 | * onto the inactive list. | ||
1218 | */ | ||
1219 | if (swap_tendency >= 100) | ||
1220 | reclaim_mapped = 1; | ||
1221 | |||
1222 | while (!list_empty(&l_hold)) { | 1248 | while (!list_empty(&l_hold)) { |
1223 | cond_resched(); | 1249 | cond_resched(); |
1224 | page = lru_to_page(&l_hold); | 1250 | page = lru_to_page(&l_hold); |
@@ -1595,9 +1621,7 @@ scan: | |||
1595 | sc.nr_reclaimed = 0; | 1621 | sc.nr_reclaimed = 0; |
1596 | sc.priority = priority; | 1622 | sc.priority = priority; |
1597 | sc.swap_cluster_max = nr_pages? nr_pages : SWAP_CLUSTER_MAX; | 1623 | sc.swap_cluster_max = nr_pages? nr_pages : SWAP_CLUSTER_MAX; |
1598 | atomic_inc(&zone->reclaim_in_progress); | ||
1599 | shrink_zone(zone, &sc); | 1624 | shrink_zone(zone, &sc); |
1600 | atomic_dec(&zone->reclaim_in_progress); | ||
1601 | reclaim_state->reclaimed_slab = 0; | 1625 | reclaim_state->reclaimed_slab = 0; |
1602 | nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, | 1626 | nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, |
1603 | lru_pages); | 1627 | lru_pages); |