aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/memory.c2
-rw-r--r--mm/mremap.c2
-rw-r--r--mm/oom_kill.c8
-rw-r--r--mm/page_alloc.c22
-rw-r--r--mm/slab.c59
-rw-r--r--mm/swap_state.c2
-rw-r--r--mm/vmalloc.c2
-rw-r--r--mm/vmscan.c27
8 files changed, 93 insertions, 31 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 7e2a4b1580e3..c1e14c9e67e4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -503,7 +503,7 @@ again:
503 return -ENOMEM; 503 return -ENOMEM;
504 src_pte = pte_offset_map_nested(src_pmd, addr); 504 src_pte = pte_offset_map_nested(src_pmd, addr);
505 src_ptl = pte_lockptr(src_mm, src_pmd); 505 src_ptl = pte_lockptr(src_mm, src_pmd);
506 spin_lock(src_ptl); 506 spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
507 507
508 do { 508 do {
509 /* 509 /*
diff --git a/mm/mremap.c b/mm/mremap.c
index 1903bdf65e42..7c15cf3373ad 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -97,7 +97,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
97 new_pte = pte_offset_map_nested(new_pmd, new_addr); 97 new_pte = pte_offset_map_nested(new_pmd, new_addr);
98 new_ptl = pte_lockptr(mm, new_pmd); 98 new_ptl = pte_lockptr(mm, new_pmd);
99 if (new_ptl != old_ptl) 99 if (new_ptl != old_ptl)
100 spin_lock(new_ptl); 100 spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
101 101
102 for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE, 102 for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
103 new_pte++, new_addr += PAGE_SIZE) { 103 new_pte++, new_addr += PAGE_SIZE) {
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index d46ed0f1dc06..b9af136e5cfa 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -225,7 +225,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
225 * CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that 225 * CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that
226 * we select a process with CAP_SYS_RAW_IO set). 226 * we select a process with CAP_SYS_RAW_IO set).
227 */ 227 */
228static void __oom_kill_task(task_t *p, const char *message) 228static void __oom_kill_task(struct task_struct *p, const char *message)
229{ 229{
230 if (p->pid == 1) { 230 if (p->pid == 1) {
231 WARN_ON(1); 231 WARN_ON(1);
@@ -255,10 +255,10 @@ static void __oom_kill_task(task_t *p, const char *message)
255 force_sig(SIGKILL, p); 255 force_sig(SIGKILL, p);
256} 256}
257 257
258static int oom_kill_task(task_t *p, const char *message) 258static int oom_kill_task(struct task_struct *p, const char *message)
259{ 259{
260 struct mm_struct *mm; 260 struct mm_struct *mm;
261 task_t * g, * q; 261 struct task_struct *g, *q;
262 262
263 mm = p->mm; 263 mm = p->mm;
264 264
@@ -316,7 +316,7 @@ static int oom_kill_process(struct task_struct *p, unsigned long points,
316 */ 316 */
317void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) 317void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
318{ 318{
319 task_t *p; 319 struct task_struct *p;
320 unsigned long points = 0; 320 unsigned long points = 0;
321 321
322 if (printk_ratelimit()) { 322 if (printk_ratelimit()) {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3e792a583f3b..54a4f5375bba 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2005,6 +2005,10 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
2005 2005
2006 zone->spanned_pages = size; 2006 zone->spanned_pages = size;
2007 zone->present_pages = realsize; 2007 zone->present_pages = realsize;
2008#ifdef CONFIG_NUMA
2009 zone->min_unmapped_ratio = (realsize*sysctl_min_unmapped_ratio)
2010 / 100;
2011#endif
2008 zone->name = zone_names[j]; 2012 zone->name = zone_names[j];
2009 spin_lock_init(&zone->lock); 2013 spin_lock_init(&zone->lock);
2010 spin_lock_init(&zone->lru_lock); 2014 spin_lock_init(&zone->lru_lock);
@@ -2298,6 +2302,24 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
2298 return 0; 2302 return 0;
2299} 2303}
2300 2304
2305#ifdef CONFIG_NUMA
2306int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
2307 struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
2308{
2309 struct zone *zone;
2310 int rc;
2311
2312 rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
2313 if (rc)
2314 return rc;
2315
2316 for_each_zone(zone)
2317 zone->min_unmapped_ratio = (zone->present_pages *
2318 sysctl_min_unmapped_ratio) / 100;
2319 return 0;
2320}
2321#endif
2322
2301/* 2323/*
2302 * lowmem_reserve_ratio_sysctl_handler - just a wrapper around 2324 * lowmem_reserve_ratio_sysctl_handler - just a wrapper around
2303 * proc_dointvec() so that we can call setup_per_zone_lowmem_reserve() 2325 * proc_dointvec() so that we can call setup_per_zone_lowmem_reserve()
diff --git a/mm/slab.c b/mm/slab.c
index 3936af344542..85c2e03098a7 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1021,7 +1021,8 @@ static void drain_alien_cache(struct kmem_cache *cachep,
1021 } 1021 }
1022} 1022}
1023 1023
1024static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) 1024static inline int cache_free_alien(struct kmem_cache *cachep, void *objp,
1025 int nesting)
1025{ 1026{
1026 struct slab *slabp = virt_to_slab(objp); 1027 struct slab *slabp = virt_to_slab(objp);
1027 int nodeid = slabp->nodeid; 1028 int nodeid = slabp->nodeid;
@@ -1039,7 +1040,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1039 STATS_INC_NODEFREES(cachep); 1040 STATS_INC_NODEFREES(cachep);
1040 if (l3->alien && l3->alien[nodeid]) { 1041 if (l3->alien && l3->alien[nodeid]) {
1041 alien = l3->alien[nodeid]; 1042 alien = l3->alien[nodeid];
1042 spin_lock(&alien->lock); 1043 spin_lock_nested(&alien->lock, nesting);
1043 if (unlikely(alien->avail == alien->limit)) { 1044 if (unlikely(alien->avail == alien->limit)) {
1044 STATS_INC_ACOVERFLOW(cachep); 1045 STATS_INC_ACOVERFLOW(cachep);
1045 __drain_alien_cache(cachep, alien, nodeid); 1046 __drain_alien_cache(cachep, alien, nodeid);
@@ -1068,7 +1069,8 @@ static inline void free_alien_cache(struct array_cache **ac_ptr)
1068{ 1069{
1069} 1070}
1070 1071
1071static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) 1072static inline int cache_free_alien(struct kmem_cache *cachep, void *objp,
1073 int nesting)
1072{ 1074{
1073 return 0; 1075 return 0;
1074} 1076}
@@ -1272,6 +1274,11 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
1272 1274
1273 local_irq_disable(); 1275 local_irq_disable();
1274 memcpy(ptr, list, sizeof(struct kmem_list3)); 1276 memcpy(ptr, list, sizeof(struct kmem_list3));
1277 /*
1278 * Do not assume that spinlocks can be initialized via memcpy:
1279 */
1280 spin_lock_init(&ptr->list_lock);
1281
1275 MAKE_ALL_LISTS(cachep, ptr, nodeid); 1282 MAKE_ALL_LISTS(cachep, ptr, nodeid);
1276 cachep->nodelists[nodeid] = ptr; 1283 cachep->nodelists[nodeid] = ptr;
1277 local_irq_enable(); 1284 local_irq_enable();
@@ -1398,7 +1405,7 @@ void __init kmem_cache_init(void)
1398 } 1405 }
1399 /* 4) Replace the bootstrap head arrays */ 1406 /* 4) Replace the bootstrap head arrays */
1400 { 1407 {
1401 void *ptr; 1408 struct array_cache *ptr;
1402 1409
1403 ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); 1410 ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
1404 1411
@@ -1406,6 +1413,11 @@ void __init kmem_cache_init(void)
1406 BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache); 1413 BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
1407 memcpy(ptr, cpu_cache_get(&cache_cache), 1414 memcpy(ptr, cpu_cache_get(&cache_cache),
1408 sizeof(struct arraycache_init)); 1415 sizeof(struct arraycache_init));
1416 /*
1417 * Do not assume that spinlocks can be initialized via memcpy:
1418 */
1419 spin_lock_init(&ptr->lock);
1420
1409 cache_cache.array[smp_processor_id()] = ptr; 1421 cache_cache.array[smp_processor_id()] = ptr;
1410 local_irq_enable(); 1422 local_irq_enable();
1411 1423
@@ -1416,6 +1428,11 @@ void __init kmem_cache_init(void)
1416 != &initarray_generic.cache); 1428 != &initarray_generic.cache);
1417 memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep), 1429 memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),
1418 sizeof(struct arraycache_init)); 1430 sizeof(struct arraycache_init));
1431 /*
1432 * Do not assume that spinlocks can be initialized via memcpy:
1433 */
1434 spin_lock_init(&ptr->lock);
1435
1419 malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = 1436 malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
1420 ptr; 1437 ptr;
1421 local_irq_enable(); 1438 local_irq_enable();
@@ -1743,6 +1760,8 @@ static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp)
1743} 1760}
1744#endif 1761#endif
1745 1762
1763static void __cache_free(struct kmem_cache *cachep, void *objp, int nesting);
1764
1746/** 1765/**
1747 * slab_destroy - destroy and release all objects in a slab 1766 * slab_destroy - destroy and release all objects in a slab
1748 * @cachep: cache pointer being destroyed 1767 * @cachep: cache pointer being destroyed
@@ -1766,8 +1785,17 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
1766 call_rcu(&slab_rcu->head, kmem_rcu_free); 1785 call_rcu(&slab_rcu->head, kmem_rcu_free);
1767 } else { 1786 } else {
1768 kmem_freepages(cachep, addr); 1787 kmem_freepages(cachep, addr);
1769 if (OFF_SLAB(cachep)) 1788 if (OFF_SLAB(cachep)) {
1770 kmem_cache_free(cachep->slabp_cache, slabp); 1789 unsigned long flags;
1790
1791 /*
1792 * lockdep: we may nest inside an already held
1793 * ac->lock, so pass in a nesting flag:
1794 */
1795 local_irq_save(flags);
1796 __cache_free(cachep->slabp_cache, slabp, 1);
1797 local_irq_restore(flags);
1798 }
1771 } 1799 }
1772} 1800}
1773 1801
@@ -3072,7 +3100,16 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
3072 if (slabp->inuse == 0) { 3100 if (slabp->inuse == 0) {
3073 if (l3->free_objects > l3->free_limit) { 3101 if (l3->free_objects > l3->free_limit) {
3074 l3->free_objects -= cachep->num; 3102 l3->free_objects -= cachep->num;
3103 /*
3104 * It is safe to drop the lock. The slab is
3105 * no longer linked to the cache. cachep
3106 * cannot disappear - we are using it and
3107 * all destruction of caches must be
3108 * serialized properly by the user.
3109 */
3110 spin_unlock(&l3->list_lock);
3075 slab_destroy(cachep, slabp); 3111 slab_destroy(cachep, slabp);
3112 spin_lock(&l3->list_lock);
3076 } else { 3113 } else {
3077 list_add(&slabp->list, &l3->slabs_free); 3114 list_add(&slabp->list, &l3->slabs_free);
3078 } 3115 }
@@ -3098,7 +3135,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
3098#endif 3135#endif
3099 check_irq_off(); 3136 check_irq_off();
3100 l3 = cachep->nodelists[node]; 3137 l3 = cachep->nodelists[node];
3101 spin_lock(&l3->list_lock); 3138 spin_lock_nested(&l3->list_lock, SINGLE_DEPTH_NESTING);
3102 if (l3->shared) { 3139 if (l3->shared) {
3103 struct array_cache *shared_array = l3->shared; 3140 struct array_cache *shared_array = l3->shared;
3104 int max = shared_array->limit - shared_array->avail; 3141 int max = shared_array->limit - shared_array->avail;
@@ -3141,14 +3178,14 @@ free_done:
3141 * Release an obj back to its cache. If the obj has a constructed state, it must 3178 * Release an obj back to its cache. If the obj has a constructed state, it must
3142 * be in this state _before_ it is released. Called with disabled ints. 3179 * be in this state _before_ it is released. Called with disabled ints.
3143 */ 3180 */
3144static inline void __cache_free(struct kmem_cache *cachep, void *objp) 3181static void __cache_free(struct kmem_cache *cachep, void *objp, int nesting)
3145{ 3182{
3146 struct array_cache *ac = cpu_cache_get(cachep); 3183 struct array_cache *ac = cpu_cache_get(cachep);
3147 3184
3148 check_irq_off(); 3185 check_irq_off();
3149 objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0)); 3186 objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
3150 3187
3151 if (cache_free_alien(cachep, objp)) 3188 if (cache_free_alien(cachep, objp, nesting))
3152 return; 3189 return;
3153 3190
3154 if (likely(ac->avail < ac->limit)) { 3191 if (likely(ac->avail < ac->limit)) {
@@ -3387,7 +3424,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
3387 BUG_ON(virt_to_cache(objp) != cachep); 3424 BUG_ON(virt_to_cache(objp) != cachep);
3388 3425
3389 local_irq_save(flags); 3426 local_irq_save(flags);
3390 __cache_free(cachep, objp); 3427 __cache_free(cachep, objp, 0);
3391 local_irq_restore(flags); 3428 local_irq_restore(flags);
3392} 3429}
3393EXPORT_SYMBOL(kmem_cache_free); 3430EXPORT_SYMBOL(kmem_cache_free);
@@ -3412,7 +3449,7 @@ void kfree(const void *objp)
3412 kfree_debugcheck(objp); 3449 kfree_debugcheck(objp);
3413 c = virt_to_cache(objp); 3450 c = virt_to_cache(objp);
3414 debug_check_no_locks_freed(objp, obj_size(c)); 3451 debug_check_no_locks_freed(objp, obj_size(c));
3415 __cache_free(c, (void *)objp); 3452 __cache_free(c, (void *)objp, 0);
3416 local_irq_restore(flags); 3453 local_irq_restore(flags);
3417} 3454}
3418EXPORT_SYMBOL(kfree); 3455EXPORT_SYMBOL(kfree);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index fccbd9bba77b..5f7cf2a4cb55 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -38,7 +38,7 @@ static struct backing_dev_info swap_backing_dev_info = {
38 38
39struct address_space swapper_space = { 39struct address_space swapper_space = {
40 .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN), 40 .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN),
41 .tree_lock = RW_LOCK_UNLOCKED, 41 .tree_lock = __RW_LOCK_UNLOCKED(swapper_space.tree_lock),
42 .a_ops = &swap_aops, 42 .a_ops = &swap_aops,
43 .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear), 43 .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear),
44 .backing_dev_info = &swap_backing_dev_info, 44 .backing_dev_info = &swap_backing_dev_info,
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 35f8553f893a..7b450798b458 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -330,6 +330,8 @@ void __vunmap(void *addr, int deallocate_pages)
330 return; 330 return;
331 } 331 }
332 332
333 debug_check_no_locks_freed(addr, area->size);
334
333 if (deallocate_pages) { 335 if (deallocate_pages) {
334 int i; 336 int i;
335 337
diff --git a/mm/vmscan.c b/mm/vmscan.c
index ff2ebe9458a3..5d4c4d02254d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1503,10 +1503,6 @@ module_init(kswapd_init)
1503 * 1503 *
1504 * If non-zero call zone_reclaim when the number of free pages falls below 1504 * If non-zero call zone_reclaim when the number of free pages falls below
1505 * the watermarks. 1505 * the watermarks.
1506 *
1507 * In the future we may add flags to the mode. However, the page allocator
1508 * should only have to check that zone_reclaim_mode != 0 before calling
1509 * zone_reclaim().
1510 */ 1506 */
1511int zone_reclaim_mode __read_mostly; 1507int zone_reclaim_mode __read_mostly;
1512 1508
@@ -1524,6 +1520,12 @@ int zone_reclaim_mode __read_mostly;
1524#define ZONE_RECLAIM_PRIORITY 4 1520#define ZONE_RECLAIM_PRIORITY 4
1525 1521
1526/* 1522/*
1523 * Percentage of pages in a zone that must be unmapped for zone_reclaim to
1524 * occur.
1525 */
1526int sysctl_min_unmapped_ratio = 1;
1527
1528/*
1527 * Try to free up some pages from this zone through reclaim. 1529 * Try to free up some pages from this zone through reclaim.
1528 */ 1530 */
1529static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) 1531static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
@@ -1590,18 +1592,17 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
1590 int node_id; 1592 int node_id;
1591 1593
1592 /* 1594 /*
1593 * Do not reclaim if there are not enough reclaimable pages in this 1595 * Zone reclaim reclaims unmapped file backed pages.
1594 * zone that would satify this allocations.
1595 * 1596 *
1596 * All unmapped pagecache pages are reclaimable. 1597 * A small portion of unmapped file backed pages is needed for
1597 * 1598 * file I/O otherwise pages read by file I/O will be immediately
1598 * Both counters may be temporarily off a bit so we use 1599 * thrown out if the zone is overallocated. So we do not reclaim
1599 * SWAP_CLUSTER_MAX as the boundary. It may also be good to 1600 * if less than a specified percentage of the zone is used by
1600 * leave a few frequently used unmapped pagecache pages around. 1601 * unmapped file backed pages.
1601 */ 1602 */
1602 if (zone_page_state(zone, NR_FILE_PAGES) - 1603 if (zone_page_state(zone, NR_FILE_PAGES) -
1603 zone_page_state(zone, NR_FILE_MAPPED) < SWAP_CLUSTER_MAX) 1604 zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_ratio)
1604 return 0; 1605 return 0;
1605 1606
1606 /* 1607 /*
1607 * Avoid concurrent zone reclaims, do not reclaim in a zone that does 1608 * Avoid concurrent zone reclaims, do not reclaim in a zone that does