aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/backing-dev.c7
-rw-r--r--mm/bootmem.c6
-rw-r--r--mm/filemap.c1
-rw-r--r--mm/hugetlb.c2
-rw-r--r--mm/kmemleak.c240
-rw-r--r--mm/memcontrol.c25
-rw-r--r--mm/memory.c11
-rw-r--r--mm/mempolicy.c84
-rw-r--r--mm/page-writeback.c8
-rw-r--r--mm/page_alloc.c41
-rw-r--r--mm/slab.c8
-rw-r--r--mm/slob.c2
-rw-r--r--mm/slub.c12
-rw-r--r--mm/swapfile.c4
-rw-r--r--mm/vmscan.c8
15 files changed, 307 insertions, 152 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 493b468a5035..c86edd244294 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -283,7 +283,6 @@ static wait_queue_head_t congestion_wqh[2] = {
283 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1]) 283 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
284 }; 284 };
285 285
286
287void clear_bdi_congested(struct backing_dev_info *bdi, int sync) 286void clear_bdi_congested(struct backing_dev_info *bdi, int sync)
288{ 287{
289 enum bdi_state bit; 288 enum bdi_state bit;
@@ -308,18 +307,18 @@ EXPORT_SYMBOL(set_bdi_congested);
308 307
309/** 308/**
310 * congestion_wait - wait for a backing_dev to become uncongested 309 * congestion_wait - wait for a backing_dev to become uncongested
311 * @rw: READ or WRITE 310 * @sync: SYNC or ASYNC IO
312 * @timeout: timeout in jiffies 311 * @timeout: timeout in jiffies
313 * 312 *
314 * Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit 313 * Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit
315 * write congestion. If no backing_devs are congested then just wait for the 314 * write congestion. If no backing_devs are congested then just wait for the
316 * next write to be completed. 315 * next write to be completed.
317 */ 316 */
318long congestion_wait(int rw, long timeout) 317long congestion_wait(int sync, long timeout)
319{ 318{
320 long ret; 319 long ret;
321 DEFINE_WAIT(wait); 320 DEFINE_WAIT(wait);
322 wait_queue_head_t *wqh = &congestion_wqh[rw]; 321 wait_queue_head_t *wqh = &congestion_wqh[sync];
323 322
324 prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); 323 prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
325 ret = io_schedule_timeout(timeout); 324 ret = io_schedule_timeout(timeout);
diff --git a/mm/bootmem.c b/mm/bootmem.c
index d2a9ce952768..701740c9e81b 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -12,6 +12,7 @@
12#include <linux/pfn.h> 12#include <linux/pfn.h>
13#include <linux/bootmem.h> 13#include <linux/bootmem.h>
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/kmemleak.h>
15 16
16#include <asm/bug.h> 17#include <asm/bug.h>
17#include <asm/io.h> 18#include <asm/io.h>
@@ -335,6 +336,8 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
335{ 336{
336 unsigned long start, end; 337 unsigned long start, end;
337 338
339 kmemleak_free_part(__va(physaddr), size);
340
338 start = PFN_UP(physaddr); 341 start = PFN_UP(physaddr);
339 end = PFN_DOWN(physaddr + size); 342 end = PFN_DOWN(physaddr + size);
340 343
@@ -354,6 +357,8 @@ void __init free_bootmem(unsigned long addr, unsigned long size)
354{ 357{
355 unsigned long start, end; 358 unsigned long start, end;
356 359
360 kmemleak_free_part(__va(addr), size);
361
357 start = PFN_UP(addr); 362 start = PFN_UP(addr);
358 end = PFN_DOWN(addr + size); 363 end = PFN_DOWN(addr + size);
359 364
@@ -516,6 +521,7 @@ find_block:
516 region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) + 521 region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) +
517 start_off); 522 start_off);
518 memset(region, 0, size); 523 memset(region, 0, size);
524 kmemleak_alloc(region, size, 1, 0);
519 return region; 525 return region;
520 } 526 }
521 527
diff --git a/mm/filemap.c b/mm/filemap.c
index 22396713feb9..ccea3b665c12 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2272,6 +2272,7 @@ again:
2272 pagefault_enable(); 2272 pagefault_enable();
2273 flush_dcache_page(page); 2273 flush_dcache_page(page);
2274 2274
2275 mark_page_accessed(page);
2275 status = a_ops->write_end(file, mapping, pos, bytes, copied, 2276 status = a_ops->write_end(file, mapping, pos, bytes, copied,
2276 page, fsdata); 2277 page, fsdata);
2277 if (unlikely(status < 0)) 2278 if (unlikely(status < 0))
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index d0351e31f474..cafdcee154e8 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2370,7 +2370,7 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
2370 long chg = region_truncate(&inode->i_mapping->private_list, offset); 2370 long chg = region_truncate(&inode->i_mapping->private_list, offset);
2371 2371
2372 spin_lock(&inode->i_lock); 2372 spin_lock(&inode->i_lock);
2373 inode->i_blocks -= blocks_per_huge_page(h); 2373 inode->i_blocks -= (blocks_per_huge_page(h) * freed);
2374 spin_unlock(&inode->i_lock); 2374 spin_unlock(&inode->i_lock);
2375 2375
2376 hugetlb_put_quota(inode->i_mapping, (chg - freed)); 2376 hugetlb_put_quota(inode->i_mapping, (chg - freed));
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index e766e1da09d2..487267310a84 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -103,10 +103,10 @@
103 * Kmemleak configuration and common defines. 103 * Kmemleak configuration and common defines.
104 */ 104 */
105#define MAX_TRACE 16 /* stack trace length */ 105#define MAX_TRACE 16 /* stack trace length */
106#define REPORTS_NR 50 /* maximum number of reported leaks */
107#define MSECS_MIN_AGE 5000 /* minimum object age for reporting */ 106#define MSECS_MIN_AGE 5000 /* minimum object age for reporting */
108#define SECS_FIRST_SCAN 60 /* delay before the first scan */ 107#define SECS_FIRST_SCAN 60 /* delay before the first scan */
109#define SECS_SCAN_WAIT 600 /* subsequent auto scanning delay */ 108#define SECS_SCAN_WAIT 600 /* subsequent auto scanning delay */
109#define GRAY_LIST_PASSES 25 /* maximum number of gray list scans */
110 110
111#define BYTES_PER_POINTER sizeof(void *) 111#define BYTES_PER_POINTER sizeof(void *)
112 112
@@ -158,6 +158,8 @@ struct kmemleak_object {
158#define OBJECT_REPORTED (1 << 1) 158#define OBJECT_REPORTED (1 << 1)
159/* flag set to not scan the object */ 159/* flag set to not scan the object */
160#define OBJECT_NO_SCAN (1 << 2) 160#define OBJECT_NO_SCAN (1 << 2)
161/* flag set on newly allocated objects */
162#define OBJECT_NEW (1 << 3)
161 163
162/* the list of all allocated objects */ 164/* the list of all allocated objects */
163static LIST_HEAD(object_list); 165static LIST_HEAD(object_list);
@@ -196,9 +198,6 @@ static int kmemleak_stack_scan = 1;
196/* protects the memory scanning, parameters and debug/kmemleak file access */ 198/* protects the memory scanning, parameters and debug/kmemleak file access */
197static DEFINE_MUTEX(scan_mutex); 199static DEFINE_MUTEX(scan_mutex);
198 200
199/* number of leaks reported (for limitation purposes) */
200static int reported_leaks;
201
202/* 201/*
203 * Early object allocation/freeing logging. Kmemleak is initialized after the 202 * Early object allocation/freeing logging. Kmemleak is initialized after the
204 * kernel allocator. However, both the kernel allocator and kmemleak may 203 * kernel allocator. However, both the kernel allocator and kmemleak may
@@ -211,6 +210,7 @@ static int reported_leaks;
211enum { 210enum {
212 KMEMLEAK_ALLOC, 211 KMEMLEAK_ALLOC,
213 KMEMLEAK_FREE, 212 KMEMLEAK_FREE,
213 KMEMLEAK_FREE_PART,
214 KMEMLEAK_NOT_LEAK, 214 KMEMLEAK_NOT_LEAK,
215 KMEMLEAK_IGNORE, 215 KMEMLEAK_IGNORE,
216 KMEMLEAK_SCAN_AREA, 216 KMEMLEAK_SCAN_AREA,
@@ -274,6 +274,11 @@ static int color_gray(const struct kmemleak_object *object)
274 return object->min_count != -1 && object->count >= object->min_count; 274 return object->min_count != -1 && object->count >= object->min_count;
275} 275}
276 276
277static int color_black(const struct kmemleak_object *object)
278{
279 return object->min_count == -1;
280}
281
277/* 282/*
278 * Objects are considered unreferenced only if their color is white, they have 283 * Objects are considered unreferenced only if their color is white, they have
279 * not be deleted and have a minimum age to avoid false positives caused by 284 * not be deleted and have a minimum age to avoid false positives caused by
@@ -451,7 +456,7 @@ static void create_object(unsigned long ptr, size_t size, int min_count,
451 INIT_HLIST_HEAD(&object->area_list); 456 INIT_HLIST_HEAD(&object->area_list);
452 spin_lock_init(&object->lock); 457 spin_lock_init(&object->lock);
453 atomic_set(&object->use_count, 1); 458 atomic_set(&object->use_count, 1);
454 object->flags = OBJECT_ALLOCATED; 459 object->flags = OBJECT_ALLOCATED | OBJECT_NEW;
455 object->pointer = ptr; 460 object->pointer = ptr;
456 object->size = size; 461 object->size = size;
457 object->min_count = min_count; 462 object->min_count = min_count;
@@ -519,27 +524,17 @@ out:
519 * Remove the metadata (struct kmemleak_object) for a memory block from the 524 * Remove the metadata (struct kmemleak_object) for a memory block from the
520 * object_list and object_tree_root and decrement its use_count. 525 * object_list and object_tree_root and decrement its use_count.
521 */ 526 */
522static void delete_object(unsigned long ptr) 527static void __delete_object(struct kmemleak_object *object)
523{ 528{
524 unsigned long flags; 529 unsigned long flags;
525 struct kmemleak_object *object;
526 530
527 write_lock_irqsave(&kmemleak_lock, flags); 531 write_lock_irqsave(&kmemleak_lock, flags);
528 object = lookup_object(ptr, 0);
529 if (!object) {
530#ifdef DEBUG
531 kmemleak_warn("Freeing unknown object at 0x%08lx\n",
532 ptr);
533#endif
534 write_unlock_irqrestore(&kmemleak_lock, flags);
535 return;
536 }
537 prio_tree_remove(&object_tree_root, &object->tree_node); 532 prio_tree_remove(&object_tree_root, &object->tree_node);
538 list_del_rcu(&object->object_list); 533 list_del_rcu(&object->object_list);
539 write_unlock_irqrestore(&kmemleak_lock, flags); 534 write_unlock_irqrestore(&kmemleak_lock, flags);
540 535
541 WARN_ON(!(object->flags & OBJECT_ALLOCATED)); 536 WARN_ON(!(object->flags & OBJECT_ALLOCATED));
542 WARN_ON(atomic_read(&object->use_count) < 1); 537 WARN_ON(atomic_read(&object->use_count) < 2);
543 538
544 /* 539 /*
545 * Locking here also ensures that the corresponding memory block 540 * Locking here also ensures that the corresponding memory block
@@ -552,6 +547,64 @@ static void delete_object(unsigned long ptr)
552} 547}
553 548
554/* 549/*
550 * Look up the metadata (struct kmemleak_object) corresponding to ptr and
551 * delete it.
552 */
553static void delete_object_full(unsigned long ptr)
554{
555 struct kmemleak_object *object;
556
557 object = find_and_get_object(ptr, 0);
558 if (!object) {
559#ifdef DEBUG
560 kmemleak_warn("Freeing unknown object at 0x%08lx\n",
561 ptr);
562#endif
563 return;
564 }
565 __delete_object(object);
566 put_object(object);
567}
568
569/*
570 * Look up the metadata (struct kmemleak_object) corresponding to ptr and
571 * delete it. If the memory block is partially freed, the function may create
572 * additional metadata for the remaining parts of the block.
573 */
574static void delete_object_part(unsigned long ptr, size_t size)
575{
576 struct kmemleak_object *object;
577 unsigned long start, end;
578
579 object = find_and_get_object(ptr, 1);
580 if (!object) {
581#ifdef DEBUG
582 kmemleak_warn("Partially freeing unknown object at 0x%08lx "
583 "(size %zu)\n", ptr, size);
584#endif
585 return;
586 }
587 __delete_object(object);
588
589 /*
590 * Create one or two objects that may result from the memory block
591 * split. Note that partial freeing is only done by free_bootmem() and
592 * this happens before kmemleak_init() is called. The path below is
593 * only executed during early log recording in kmemleak_init(), so
594 * GFP_KERNEL is enough.
595 */
596 start = object->pointer;
597 end = object->pointer + object->size;
598 if (ptr > start)
599 create_object(start, ptr - start, object->min_count,
600 GFP_KERNEL);
601 if (ptr + size < end)
602 create_object(ptr + size, end - ptr - size, object->min_count,
603 GFP_KERNEL);
604
605 put_object(object);
606}
607/*
555 * Make a object permanently as gray-colored so that it can no longer be 608 * Make a object permanently as gray-colored so that it can no longer be
556 * reported as a leak. This is used in general to mark a false positive. 609 * reported as a leak. This is used in general to mark a false positive.
557 */ 610 */
@@ -715,13 +768,28 @@ void kmemleak_free(const void *ptr)
715 pr_debug("%s(0x%p)\n", __func__, ptr); 768 pr_debug("%s(0x%p)\n", __func__, ptr);
716 769
717 if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) 770 if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
718 delete_object((unsigned long)ptr); 771 delete_object_full((unsigned long)ptr);
719 else if (atomic_read(&kmemleak_early_log)) 772 else if (atomic_read(&kmemleak_early_log))
720 log_early(KMEMLEAK_FREE, ptr, 0, 0, 0, 0); 773 log_early(KMEMLEAK_FREE, ptr, 0, 0, 0, 0);
721} 774}
722EXPORT_SYMBOL_GPL(kmemleak_free); 775EXPORT_SYMBOL_GPL(kmemleak_free);
723 776
724/* 777/*
778 * Partial memory freeing function callback. This function is usually called
779 * from bootmem allocator when (part of) a memory block is freed.
780 */
781void kmemleak_free_part(const void *ptr, size_t size)
782{
783 pr_debug("%s(0x%p)\n", __func__, ptr);
784
785 if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
786 delete_object_part((unsigned long)ptr, size);
787 else if (atomic_read(&kmemleak_early_log))
788 log_early(KMEMLEAK_FREE_PART, ptr, size, 0, 0, 0);
789}
790EXPORT_SYMBOL_GPL(kmemleak_free_part);
791
792/*
725 * Mark an already allocated memory block as a false positive. This will cause 793 * Mark an already allocated memory block as a false positive. This will cause
726 * the block to no longer be reported as leak and always be scanned. 794 * the block to no longer be reported as leak and always be scanned.
727 */ 795 */
@@ -807,7 +875,7 @@ static int scan_should_stop(void)
807 * found to the gray list. 875 * found to the gray list.
808 */ 876 */
809static void scan_block(void *_start, void *_end, 877static void scan_block(void *_start, void *_end,
810 struct kmemleak_object *scanned) 878 struct kmemleak_object *scanned, int allow_resched)
811{ 879{
812 unsigned long *ptr; 880 unsigned long *ptr;
813 unsigned long *start = PTR_ALIGN(_start, BYTES_PER_POINTER); 881 unsigned long *start = PTR_ALIGN(_start, BYTES_PER_POINTER);
@@ -818,6 +886,8 @@ static void scan_block(void *_start, void *_end,
818 unsigned long pointer = *ptr; 886 unsigned long pointer = *ptr;
819 struct kmemleak_object *object; 887 struct kmemleak_object *object;
820 888
889 if (allow_resched)
890 cond_resched();
821 if (scan_should_stop()) 891 if (scan_should_stop())
822 break; 892 break;
823 893
@@ -881,12 +951,12 @@ static void scan_object(struct kmemleak_object *object)
881 goto out; 951 goto out;
882 if (hlist_empty(&object->area_list)) 952 if (hlist_empty(&object->area_list))
883 scan_block((void *)object->pointer, 953 scan_block((void *)object->pointer,
884 (void *)(object->pointer + object->size), object); 954 (void *)(object->pointer + object->size), object, 0);
885 else 955 else
886 hlist_for_each_entry(area, elem, &object->area_list, node) 956 hlist_for_each_entry(area, elem, &object->area_list, node)
887 scan_block((void *)(object->pointer + area->offset), 957 scan_block((void *)(object->pointer + area->offset),
888 (void *)(object->pointer + area->offset 958 (void *)(object->pointer + area->offset
889 + area->length), object); 959 + area->length), object, 0);
890out: 960out:
891 spin_unlock_irqrestore(&object->lock, flags); 961 spin_unlock_irqrestore(&object->lock, flags);
892} 962}
@@ -903,6 +973,7 @@ static void kmemleak_scan(void)
903 struct task_struct *task; 973 struct task_struct *task;
904 int i; 974 int i;
905 int new_leaks = 0; 975 int new_leaks = 0;
976 int gray_list_pass = 0;
906 977
907 jiffies_last_scan = jiffies; 978 jiffies_last_scan = jiffies;
908 979
@@ -923,6 +994,7 @@ static void kmemleak_scan(void)
923#endif 994#endif
924 /* reset the reference count (whiten the object) */ 995 /* reset the reference count (whiten the object) */
925 object->count = 0; 996 object->count = 0;
997 object->flags &= ~OBJECT_NEW;
926 if (color_gray(object) && get_object(object)) 998 if (color_gray(object) && get_object(object))
927 list_add_tail(&object->gray_list, &gray_list); 999 list_add_tail(&object->gray_list, &gray_list);
928 1000
@@ -931,14 +1003,14 @@ static void kmemleak_scan(void)
931 rcu_read_unlock(); 1003 rcu_read_unlock();
932 1004
933 /* data/bss scanning */ 1005 /* data/bss scanning */
934 scan_block(_sdata, _edata, NULL); 1006 scan_block(_sdata, _edata, NULL, 1);
935 scan_block(__bss_start, __bss_stop, NULL); 1007 scan_block(__bss_start, __bss_stop, NULL, 1);
936 1008
937#ifdef CONFIG_SMP 1009#ifdef CONFIG_SMP
938 /* per-cpu sections scanning */ 1010 /* per-cpu sections scanning */
939 for_each_possible_cpu(i) 1011 for_each_possible_cpu(i)
940 scan_block(__per_cpu_start + per_cpu_offset(i), 1012 scan_block(__per_cpu_start + per_cpu_offset(i),
941 __per_cpu_end + per_cpu_offset(i), NULL); 1013 __per_cpu_end + per_cpu_offset(i), NULL, 1);
942#endif 1014#endif
943 1015
944 /* 1016 /*
@@ -960,7 +1032,7 @@ static void kmemleak_scan(void)
960 /* only scan if page is in use */ 1032 /* only scan if page is in use */
961 if (page_count(page) == 0) 1033 if (page_count(page) == 0)
962 continue; 1034 continue;
963 scan_block(page, page + 1, NULL); 1035 scan_block(page, page + 1, NULL, 1);
964 } 1036 }
965 } 1037 }
966 1038
@@ -972,7 +1044,8 @@ static void kmemleak_scan(void)
972 read_lock(&tasklist_lock); 1044 read_lock(&tasklist_lock);
973 for_each_process(task) 1045 for_each_process(task)
974 scan_block(task_stack_page(task), 1046 scan_block(task_stack_page(task),
975 task_stack_page(task) + THREAD_SIZE, NULL); 1047 task_stack_page(task) + THREAD_SIZE,
1048 NULL, 0);
976 read_unlock(&tasklist_lock); 1049 read_unlock(&tasklist_lock);
977 } 1050 }
978 1051
@@ -984,6 +1057,7 @@ static void kmemleak_scan(void)
984 * kmemleak objects cannot be freed from outside the loop because their 1057 * kmemleak objects cannot be freed from outside the loop because their
985 * use_count was increased. 1058 * use_count was increased.
986 */ 1059 */
1060repeat:
987 object = list_entry(gray_list.next, typeof(*object), gray_list); 1061 object = list_entry(gray_list.next, typeof(*object), gray_list);
988 while (&object->gray_list != &gray_list) { 1062 while (&object->gray_list != &gray_list) {
989 cond_resched(); 1063 cond_resched();
@@ -1001,12 +1075,38 @@ static void kmemleak_scan(void)
1001 1075
1002 object = tmp; 1076 object = tmp;
1003 } 1077 }
1078
1079 if (scan_should_stop() || ++gray_list_pass >= GRAY_LIST_PASSES)
1080 goto scan_end;
1081
1082 /*
1083 * Check for new objects allocated during this scanning and add them
1084 * to the gray list.
1085 */
1086 rcu_read_lock();
1087 list_for_each_entry_rcu(object, &object_list, object_list) {
1088 spin_lock_irqsave(&object->lock, flags);
1089 if ((object->flags & OBJECT_NEW) && !color_black(object) &&
1090 get_object(object)) {
1091 object->flags &= ~OBJECT_NEW;
1092 list_add_tail(&object->gray_list, &gray_list);
1093 }
1094 spin_unlock_irqrestore(&object->lock, flags);
1095 }
1096 rcu_read_unlock();
1097
1098 if (!list_empty(&gray_list))
1099 goto repeat;
1100
1101scan_end:
1004 WARN_ON(!list_empty(&gray_list)); 1102 WARN_ON(!list_empty(&gray_list));
1005 1103
1006 /* 1104 /*
1007 * If scanning was stopped do not report any new unreferenced objects. 1105 * If scanning was stopped or new objects were being allocated at a
1106 * higher rate than gray list scanning, do not report any new
1107 * unreferenced objects.
1008 */ 1108 */
1009 if (scan_should_stop()) 1109 if (scan_should_stop() || gray_list_pass >= GRAY_LIST_PASSES)
1010 return; 1110 return;
1011 1111
1012 /* 1112 /*
@@ -1039,6 +1139,7 @@ static int kmemleak_scan_thread(void *arg)
1039 static int first_run = 1; 1139 static int first_run = 1;
1040 1140
1041 pr_info("Automatic memory scanning thread started\n"); 1141 pr_info("Automatic memory scanning thread started\n");
1142 set_user_nice(current, 10);
1042 1143
1043 /* 1144 /*
1044 * Wait before the first scan to allow the system to fully initialize. 1145 * Wait before the first scan to allow the system to fully initialize.
@@ -1101,11 +1202,11 @@ static void *kmemleak_seq_start(struct seq_file *seq, loff_t *pos)
1101{ 1202{
1102 struct kmemleak_object *object; 1203 struct kmemleak_object *object;
1103 loff_t n = *pos; 1204 loff_t n = *pos;
1205 int err;
1104 1206
1105 if (!n) 1207 err = mutex_lock_interruptible(&scan_mutex);
1106 reported_leaks = 0; 1208 if (err < 0)
1107 if (reported_leaks >= REPORTS_NR) 1209 return ERR_PTR(err);
1108 return NULL;
1109 1210
1110 rcu_read_lock(); 1211 rcu_read_lock();
1111 list_for_each_entry_rcu(object, &object_list, object_list) { 1212 list_for_each_entry_rcu(object, &object_list, object_list) {
@@ -1116,7 +1217,6 @@ static void *kmemleak_seq_start(struct seq_file *seq, loff_t *pos)
1116 } 1217 }
1117 object = NULL; 1218 object = NULL;
1118out: 1219out:
1119 rcu_read_unlock();
1120 return object; 1220 return object;
1121} 1221}
1122 1222
@@ -1131,17 +1231,13 @@ static void *kmemleak_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1131 struct list_head *n = &prev_obj->object_list; 1231 struct list_head *n = &prev_obj->object_list;
1132 1232
1133 ++(*pos); 1233 ++(*pos);
1134 if (reported_leaks >= REPORTS_NR)
1135 goto out;
1136 1234
1137 rcu_read_lock();
1138 list_for_each_continue_rcu(n, &object_list) { 1235 list_for_each_continue_rcu(n, &object_list) {
1139 next_obj = list_entry(n, struct kmemleak_object, object_list); 1236 next_obj = list_entry(n, struct kmemleak_object, object_list);
1140 if (get_object(next_obj)) 1237 if (get_object(next_obj))
1141 break; 1238 break;
1142 } 1239 }
1143 rcu_read_unlock(); 1240
1144out:
1145 put_object(prev_obj); 1241 put_object(prev_obj);
1146 return next_obj; 1242 return next_obj;
1147} 1243}
@@ -1151,8 +1247,16 @@ out:
1151 */ 1247 */
1152static void kmemleak_seq_stop(struct seq_file *seq, void *v) 1248static void kmemleak_seq_stop(struct seq_file *seq, void *v)
1153{ 1249{
1154 if (v) 1250 if (!IS_ERR(v)) {
1155 put_object(v); 1251 /*
1252 * kmemleak_seq_start may return ERR_PTR if the scan_mutex
1253 * waiting was interrupted, so only release it if !IS_ERR.
1254 */
1255 rcu_read_unlock();
1256 mutex_unlock(&scan_mutex);
1257 if (v)
1258 put_object(v);
1259 }
1156} 1260}
1157 1261
1158/* 1262/*
@@ -1164,10 +1268,8 @@ static int kmemleak_seq_show(struct seq_file *seq, void *v)
1164 unsigned long flags; 1268 unsigned long flags;
1165 1269
1166 spin_lock_irqsave(&object->lock, flags); 1270 spin_lock_irqsave(&object->lock, flags);
1167 if ((object->flags & OBJECT_REPORTED) && unreferenced_object(object)) { 1271 if ((object->flags & OBJECT_REPORTED) && unreferenced_object(object))
1168 print_unreferenced(seq, object); 1272 print_unreferenced(seq, object);
1169 reported_leaks++;
1170 }
1171 spin_unlock_irqrestore(&object->lock, flags); 1273 spin_unlock_irqrestore(&object->lock, flags);
1172 return 0; 1274 return 0;
1173} 1275}
@@ -1181,36 +1283,15 @@ static const struct seq_operations kmemleak_seq_ops = {
1181 1283
1182static int kmemleak_open(struct inode *inode, struct file *file) 1284static int kmemleak_open(struct inode *inode, struct file *file)
1183{ 1285{
1184 int ret = 0;
1185
1186 if (!atomic_read(&kmemleak_enabled)) 1286 if (!atomic_read(&kmemleak_enabled))
1187 return -EBUSY; 1287 return -EBUSY;
1188 1288
1189 ret = mutex_lock_interruptible(&scan_mutex); 1289 return seq_open(file, &kmemleak_seq_ops);
1190 if (ret < 0)
1191 goto out;
1192 if (file->f_mode & FMODE_READ) {
1193 ret = seq_open(file, &kmemleak_seq_ops);
1194 if (ret < 0)
1195 goto scan_unlock;
1196 }
1197 return ret;
1198
1199scan_unlock:
1200 mutex_unlock(&scan_mutex);
1201out:
1202 return ret;
1203} 1290}
1204 1291
1205static int kmemleak_release(struct inode *inode, struct file *file) 1292static int kmemleak_release(struct inode *inode, struct file *file)
1206{ 1293{
1207 int ret = 0; 1294 return seq_release(inode, file);
1208
1209 if (file->f_mode & FMODE_READ)
1210 seq_release(inode, file);
1211 mutex_unlock(&scan_mutex);
1212
1213 return ret;
1214} 1295}
1215 1296
1216/* 1297/*
@@ -1230,15 +1311,17 @@ static ssize_t kmemleak_write(struct file *file, const char __user *user_buf,
1230{ 1311{
1231 char buf[64]; 1312 char buf[64];
1232 int buf_size; 1313 int buf_size;
1233 1314 int ret;
1234 if (!atomic_read(&kmemleak_enabled))
1235 return -EBUSY;
1236 1315
1237 buf_size = min(size, (sizeof(buf) - 1)); 1316 buf_size = min(size, (sizeof(buf) - 1));
1238 if (strncpy_from_user(buf, user_buf, buf_size) < 0) 1317 if (strncpy_from_user(buf, user_buf, buf_size) < 0)
1239 return -EFAULT; 1318 return -EFAULT;
1240 buf[buf_size] = 0; 1319 buf[buf_size] = 0;
1241 1320
1321 ret = mutex_lock_interruptible(&scan_mutex);
1322 if (ret < 0)
1323 return ret;
1324
1242 if (strncmp(buf, "off", 3) == 0) 1325 if (strncmp(buf, "off", 3) == 0)
1243 kmemleak_disable(); 1326 kmemleak_disable();
1244 else if (strncmp(buf, "stack=on", 8) == 0) 1327 else if (strncmp(buf, "stack=on", 8) == 0)
@@ -1251,11 +1334,10 @@ static ssize_t kmemleak_write(struct file *file, const char __user *user_buf,
1251 stop_scan_thread(); 1334 stop_scan_thread();
1252 else if (strncmp(buf, "scan=", 5) == 0) { 1335 else if (strncmp(buf, "scan=", 5) == 0) {
1253 unsigned long secs; 1336 unsigned long secs;
1254 int err;
1255 1337
1256 err = strict_strtoul(buf + 5, 0, &secs); 1338 ret = strict_strtoul(buf + 5, 0, &secs);
1257 if (err < 0) 1339 if (ret < 0)
1258 return err; 1340 goto out;
1259 stop_scan_thread(); 1341 stop_scan_thread();
1260 if (secs) { 1342 if (secs) {
1261 jiffies_scan_wait = msecs_to_jiffies(secs * 1000); 1343 jiffies_scan_wait = msecs_to_jiffies(secs * 1000);
@@ -1264,7 +1346,12 @@ static ssize_t kmemleak_write(struct file *file, const char __user *user_buf,
1264 } else if (strncmp(buf, "scan", 4) == 0) 1346 } else if (strncmp(buf, "scan", 4) == 0)
1265 kmemleak_scan(); 1347 kmemleak_scan();
1266 else 1348 else
1267 return -EINVAL; 1349 ret = -EINVAL;
1350
1351out:
1352 mutex_unlock(&scan_mutex);
1353 if (ret < 0)
1354 return ret;
1268 1355
1269 /* ignore the rest of the buffer, only one command at a time */ 1356 /* ignore the rest of the buffer, only one command at a time */
1270 *ppos += size; 1357 *ppos += size;
@@ -1293,7 +1380,7 @@ static int kmemleak_cleanup_thread(void *arg)
1293 1380
1294 rcu_read_lock(); 1381 rcu_read_lock();
1295 list_for_each_entry_rcu(object, &object_list, object_list) 1382 list_for_each_entry_rcu(object, &object_list, object_list)
1296 delete_object(object->pointer); 1383 delete_object_full(object->pointer);
1297 rcu_read_unlock(); 1384 rcu_read_unlock();
1298 mutex_unlock(&scan_mutex); 1385 mutex_unlock(&scan_mutex);
1299 1386
@@ -1388,6 +1475,9 @@ void __init kmemleak_init(void)
1388 case KMEMLEAK_FREE: 1475 case KMEMLEAK_FREE:
1389 kmemleak_free(log->ptr); 1476 kmemleak_free(log->ptr);
1390 break; 1477 break;
1478 case KMEMLEAK_FREE_PART:
1479 kmemleak_free_part(log->ptr, log->size);
1480 break;
1391 case KMEMLEAK_NOT_LEAK: 1481 case KMEMLEAK_NOT_LEAK:
1392 kmemleak_not_leak(log->ptr); 1482 kmemleak_not_leak(log->ptr);
1393 break; 1483 break;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e2fa20dadf40..fd4529d86de5 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1207,6 +1207,12 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
1207 ret = 0; 1207 ret = 0;
1208out: 1208out:
1209 unlock_page_cgroup(pc); 1209 unlock_page_cgroup(pc);
1210 /*
1211 * We charges against "to" which may not have any tasks. Then, "to"
1212 * can be under rmdir(). But in current implementation, caller of
1213 * this function is just force_empty() and it's garanteed that
1214 * "to" is never removed. So, we don't check rmdir status here.
1215 */
1210 return ret; 1216 return ret;
1211} 1217}
1212 1218
@@ -1428,6 +1434,7 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
1428 return; 1434 return;
1429 if (!ptr) 1435 if (!ptr)
1430 return; 1436 return;
1437 cgroup_exclude_rmdir(&ptr->css);
1431 pc = lookup_page_cgroup(page); 1438 pc = lookup_page_cgroup(page);
1432 mem_cgroup_lru_del_before_commit_swapcache(page); 1439 mem_cgroup_lru_del_before_commit_swapcache(page);
1433 __mem_cgroup_commit_charge(ptr, pc, ctype); 1440 __mem_cgroup_commit_charge(ptr, pc, ctype);
@@ -1457,8 +1464,12 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
1457 } 1464 }
1458 rcu_read_unlock(); 1465 rcu_read_unlock();
1459 } 1466 }
1460 /* add this page(page_cgroup) to the LRU we want. */ 1467 /*
1461 1468 * At swapin, we may charge account against cgroup which has no tasks.
1469 * So, rmdir()->pre_destroy() can be called while we do this charge.
1470 * In that case, we need to call pre_destroy() again. check it here.
1471 */
1472 cgroup_release_and_wakeup_rmdir(&ptr->css);
1462} 1473}
1463 1474
1464void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr) 1475void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr)
@@ -1664,7 +1675,7 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem,
1664 1675
1665 if (!mem) 1676 if (!mem)
1666 return; 1677 return;
1667 1678 cgroup_exclude_rmdir(&mem->css);
1668 /* at migration success, oldpage->mapping is NULL. */ 1679 /* at migration success, oldpage->mapping is NULL. */
1669 if (oldpage->mapping) { 1680 if (oldpage->mapping) {
1670 target = oldpage; 1681 target = oldpage;
@@ -1704,6 +1715,12 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem,
1704 */ 1715 */
1705 if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED) 1716 if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
1706 mem_cgroup_uncharge_page(target); 1717 mem_cgroup_uncharge_page(target);
1718 /*
1719 * At migration, we may charge account against cgroup which has no tasks
1720 * So, rmdir()->pre_destroy() can be called while we do this charge.
1721 * In that case, we need to call pre_destroy() again. check it here.
1722 */
1723 cgroup_release_and_wakeup_rmdir(&mem->css);
1707} 1724}
1708 1725
1709/* 1726/*
@@ -1973,7 +1990,7 @@ try_to_free:
1973 if (!progress) { 1990 if (!progress) {
1974 nr_retries--; 1991 nr_retries--;
1975 /* maybe some writeback is necessary */ 1992 /* maybe some writeback is necessary */
1976 congestion_wait(WRITE, HZ/10); 1993 congestion_wait(BLK_RW_ASYNC, HZ/10);
1977 } 1994 }
1978 1995
1979 } 1996 }
diff --git a/mm/memory.c b/mm/memory.c
index 65216194eb8d..aede2ce3aba4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -135,11 +135,12 @@ void pmd_clear_bad(pmd_t *pmd)
135 * Note: this doesn't free the actual pages themselves. That 135 * Note: this doesn't free the actual pages themselves. That
136 * has been handled earlier when unmapping all the memory regions. 136 * has been handled earlier when unmapping all the memory regions.
137 */ 137 */
138static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd) 138static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
139 unsigned long addr)
139{ 140{
140 pgtable_t token = pmd_pgtable(*pmd); 141 pgtable_t token = pmd_pgtable(*pmd);
141 pmd_clear(pmd); 142 pmd_clear(pmd);
142 pte_free_tlb(tlb, token); 143 pte_free_tlb(tlb, token, addr);
143 tlb->mm->nr_ptes--; 144 tlb->mm->nr_ptes--;
144} 145}
145 146
@@ -157,7 +158,7 @@ static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
157 next = pmd_addr_end(addr, end); 158 next = pmd_addr_end(addr, end);
158 if (pmd_none_or_clear_bad(pmd)) 159 if (pmd_none_or_clear_bad(pmd))
159 continue; 160 continue;
160 free_pte_range(tlb, pmd); 161 free_pte_range(tlb, pmd, addr);
161 } while (pmd++, addr = next, addr != end); 162 } while (pmd++, addr = next, addr != end);
162 163
163 start &= PUD_MASK; 164 start &= PUD_MASK;
@@ -173,7 +174,7 @@ static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
173 174
174 pmd = pmd_offset(pud, start); 175 pmd = pmd_offset(pud, start);
175 pud_clear(pud); 176 pud_clear(pud);
176 pmd_free_tlb(tlb, pmd); 177 pmd_free_tlb(tlb, pmd, start);
177} 178}
178 179
179static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, 180static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
@@ -206,7 +207,7 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
206 207
207 pud = pud_offset(pgd, start); 208 pud = pud_offset(pgd, start);
208 pgd_clear(pgd); 209 pgd_clear(pgd);
209 pud_free_tlb(tlb, pud); 210 pud_free_tlb(tlb, pud, start);
210} 211}
211 212
212/* 213/*
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index e08e2c4da63a..7dd9d9f80694 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -191,25 +191,27 @@ static int mpol_new_bind(struct mempolicy *pol, const nodemask_t *nodes)
191 * Must be called holding task's alloc_lock to protect task's mems_allowed 191 * Must be called holding task's alloc_lock to protect task's mems_allowed
192 * and mempolicy. May also be called holding the mmap_semaphore for write. 192 * and mempolicy. May also be called holding the mmap_semaphore for write.
193 */ 193 */
194static int mpol_set_nodemask(struct mempolicy *pol, const nodemask_t *nodes) 194static int mpol_set_nodemask(struct mempolicy *pol,
195 const nodemask_t *nodes, struct nodemask_scratch *nsc)
195{ 196{
196 nodemask_t cpuset_context_nmask;
197 int ret; 197 int ret;
198 198
199 /* if mode is MPOL_DEFAULT, pol is NULL. This is right. */ 199 /* if mode is MPOL_DEFAULT, pol is NULL. This is right. */
200 if (pol == NULL) 200 if (pol == NULL)
201 return 0; 201 return 0;
202 /* Check N_HIGH_MEMORY */
203 nodes_and(nsc->mask1,
204 cpuset_current_mems_allowed, node_states[N_HIGH_MEMORY]);
202 205
203 VM_BUG_ON(!nodes); 206 VM_BUG_ON(!nodes);
204 if (pol->mode == MPOL_PREFERRED && nodes_empty(*nodes)) 207 if (pol->mode == MPOL_PREFERRED && nodes_empty(*nodes))
205 nodes = NULL; /* explicit local allocation */ 208 nodes = NULL; /* explicit local allocation */
206 else { 209 else {
207 if (pol->flags & MPOL_F_RELATIVE_NODES) 210 if (pol->flags & MPOL_F_RELATIVE_NODES)
208 mpol_relative_nodemask(&cpuset_context_nmask, nodes, 211 mpol_relative_nodemask(&nsc->mask2, nodes,&nsc->mask1);
209 &cpuset_current_mems_allowed);
210 else 212 else
211 nodes_and(cpuset_context_nmask, *nodes, 213 nodes_and(nsc->mask2, *nodes, nsc->mask1);
212 cpuset_current_mems_allowed); 214
213 if (mpol_store_user_nodemask(pol)) 215 if (mpol_store_user_nodemask(pol))
214 pol->w.user_nodemask = *nodes; 216 pol->w.user_nodemask = *nodes;
215 else 217 else
@@ -217,8 +219,10 @@ static int mpol_set_nodemask(struct mempolicy *pol, const nodemask_t *nodes)
217 cpuset_current_mems_allowed; 219 cpuset_current_mems_allowed;
218 } 220 }
219 221
220 ret = mpol_ops[pol->mode].create(pol, 222 if (nodes)
221 nodes ? &cpuset_context_nmask : NULL); 223 ret = mpol_ops[pol->mode].create(pol, &nsc->mask2);
224 else
225 ret = mpol_ops[pol->mode].create(pol, NULL);
222 return ret; 226 return ret;
223} 227}
224 228
@@ -620,12 +624,17 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags,
620{ 624{
621 struct mempolicy *new, *old; 625 struct mempolicy *new, *old;
622 struct mm_struct *mm = current->mm; 626 struct mm_struct *mm = current->mm;
627 NODEMASK_SCRATCH(scratch);
623 int ret; 628 int ret;
624 629
625 new = mpol_new(mode, flags, nodes); 630 if (!scratch)
626 if (IS_ERR(new)) 631 return -ENOMEM;
627 return PTR_ERR(new);
628 632
633 new = mpol_new(mode, flags, nodes);
634 if (IS_ERR(new)) {
635 ret = PTR_ERR(new);
636 goto out;
637 }
629 /* 638 /*
630 * prevent changing our mempolicy while show_numa_maps() 639 * prevent changing our mempolicy while show_numa_maps()
631 * is using it. 640 * is using it.
@@ -635,13 +644,13 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags,
635 if (mm) 644 if (mm)
636 down_write(&mm->mmap_sem); 645 down_write(&mm->mmap_sem);
637 task_lock(current); 646 task_lock(current);
638 ret = mpol_set_nodemask(new, nodes); 647 ret = mpol_set_nodemask(new, nodes, scratch);
639 if (ret) { 648 if (ret) {
640 task_unlock(current); 649 task_unlock(current);
641 if (mm) 650 if (mm)
642 up_write(&mm->mmap_sem); 651 up_write(&mm->mmap_sem);
643 mpol_put(new); 652 mpol_put(new);
644 return ret; 653 goto out;
645 } 654 }
646 old = current->mempolicy; 655 old = current->mempolicy;
647 current->mempolicy = new; 656 current->mempolicy = new;
@@ -654,7 +663,10 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags,
654 up_write(&mm->mmap_sem); 663 up_write(&mm->mmap_sem);
655 664
656 mpol_put(old); 665 mpol_put(old);
657 return 0; 666 ret = 0;
667out:
668 NODEMASK_SCRATCH_FREE(scratch);
669 return ret;
658} 670}
659 671
660/* 672/*
@@ -1014,12 +1026,20 @@ static long do_mbind(unsigned long start, unsigned long len,
1014 if (err) 1026 if (err)
1015 return err; 1027 return err;
1016 } 1028 }
1017 down_write(&mm->mmap_sem); 1029 {
1018 task_lock(current); 1030 NODEMASK_SCRATCH(scratch);
1019 err = mpol_set_nodemask(new, nmask); 1031 if (scratch) {
1020 task_unlock(current); 1032 down_write(&mm->mmap_sem);
1033 task_lock(current);
1034 err = mpol_set_nodemask(new, nmask, scratch);
1035 task_unlock(current);
1036 if (err)
1037 up_write(&mm->mmap_sem);
1038 } else
1039 err = -ENOMEM;
1040 NODEMASK_SCRATCH_FREE(scratch);
1041 }
1021 if (err) { 1042 if (err) {
1022 up_write(&mm->mmap_sem);
1023 mpol_put(new); 1043 mpol_put(new);
1024 return err; 1044 return err;
1025 } 1045 }
@@ -1891,6 +1911,7 @@ restart:
1891 * Install non-NULL @mpol in inode's shared policy rb-tree. 1911 * Install non-NULL @mpol in inode's shared policy rb-tree.
1892 * On entry, the current task has a reference on a non-NULL @mpol. 1912 * On entry, the current task has a reference on a non-NULL @mpol.
1893 * This must be released on exit. 1913 * This must be released on exit.
1914 * This is called at get_inode() calls and we can use GFP_KERNEL.
1894 */ 1915 */
1895void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) 1916void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
1896{ 1917{
@@ -1902,19 +1923,24 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
1902 if (mpol) { 1923 if (mpol) {
1903 struct vm_area_struct pvma; 1924 struct vm_area_struct pvma;
1904 struct mempolicy *new; 1925 struct mempolicy *new;
1926 NODEMASK_SCRATCH(scratch);
1905 1927
1928 if (!scratch)
1929 return;
1906 /* contextualize the tmpfs mount point mempolicy */ 1930 /* contextualize the tmpfs mount point mempolicy */
1907 new = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask); 1931 new = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask);
1908 if (IS_ERR(new)) { 1932 if (IS_ERR(new)) {
1909 mpol_put(mpol); /* drop our ref on sb mpol */ 1933 mpol_put(mpol); /* drop our ref on sb mpol */
1934 NODEMASK_SCRATCH_FREE(scratch);
1910 return; /* no valid nodemask intersection */ 1935 return; /* no valid nodemask intersection */
1911 } 1936 }
1912 1937
1913 task_lock(current); 1938 task_lock(current);
1914 ret = mpol_set_nodemask(new, &mpol->w.user_nodemask); 1939 ret = mpol_set_nodemask(new, &mpol->w.user_nodemask, scratch);
1915 task_unlock(current); 1940 task_unlock(current);
1916 mpol_put(mpol); /* drop our ref on sb mpol */ 1941 mpol_put(mpol); /* drop our ref on sb mpol */
1917 if (ret) { 1942 if (ret) {
1943 NODEMASK_SCRATCH_FREE(scratch);
1918 mpol_put(new); 1944 mpol_put(new);
1919 return; 1945 return;
1920 } 1946 }
@@ -1924,6 +1950,7 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
1924 pvma.vm_end = TASK_SIZE; /* policy covers entire file */ 1950 pvma.vm_end = TASK_SIZE; /* policy covers entire file */
1925 mpol_set_shared_policy(sp, &pvma, new); /* adds ref */ 1951 mpol_set_shared_policy(sp, &pvma, new); /* adds ref */
1926 mpol_put(new); /* drop initial ref */ 1952 mpol_put(new); /* drop initial ref */
1953 NODEMASK_SCRATCH_FREE(scratch);
1927 } 1954 }
1928} 1955}
1929 1956
@@ -2140,13 +2167,18 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context)
2140 err = 1; 2167 err = 1;
2141 else { 2168 else {
2142 int ret; 2169 int ret;
2143 2170 NODEMASK_SCRATCH(scratch);
2144 task_lock(current); 2171 if (scratch) {
2145 ret = mpol_set_nodemask(new, &nodes); 2172 task_lock(current);
2146 task_unlock(current); 2173 ret = mpol_set_nodemask(new, &nodes, scratch);
2147 if (ret) 2174 task_unlock(current);
2175 } else
2176 ret = -ENOMEM;
2177 NODEMASK_SCRATCH_FREE(scratch);
2178 if (ret) {
2148 err = 1; 2179 err = 1;
2149 else if (no_context) { 2180 mpol_put(new);
2181 } else if (no_context) {
2150 /* save for contextualization */ 2182 /* save for contextualization */
2151 new->w.user_nodemask = nodes; 2183 new->w.user_nodemask = nodes;
2152 } 2184 }
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 7687879253b9..81627ebcd313 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -575,7 +575,7 @@ static void balance_dirty_pages(struct address_space *mapping)
575 if (pages_written >= write_chunk) 575 if (pages_written >= write_chunk)
576 break; /* We've done our duty */ 576 break; /* We've done our duty */
577 577
578 congestion_wait(WRITE, HZ/10); 578 congestion_wait(BLK_RW_ASYNC, HZ/10);
579 } 579 }
580 580
581 if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh && 581 if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh &&
@@ -669,7 +669,7 @@ void throttle_vm_writeout(gfp_t gfp_mask)
669 if (global_page_state(NR_UNSTABLE_NFS) + 669 if (global_page_state(NR_UNSTABLE_NFS) +
670 global_page_state(NR_WRITEBACK) <= dirty_thresh) 670 global_page_state(NR_WRITEBACK) <= dirty_thresh)
671 break; 671 break;
672 congestion_wait(WRITE, HZ/10); 672 congestion_wait(BLK_RW_ASYNC, HZ/10);
673 673
674 /* 674 /*
675 * The caller might hold locks which can prevent IO completion 675 * The caller might hold locks which can prevent IO completion
@@ -715,7 +715,7 @@ static void background_writeout(unsigned long _min_pages)
715 if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) { 715 if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
716 /* Wrote less than expected */ 716 /* Wrote less than expected */
717 if (wbc.encountered_congestion || wbc.more_io) 717 if (wbc.encountered_congestion || wbc.more_io)
718 congestion_wait(WRITE, HZ/10); 718 congestion_wait(BLK_RW_ASYNC, HZ/10);
719 else 719 else
720 break; 720 break;
721 } 721 }
@@ -787,7 +787,7 @@ static void wb_kupdate(unsigned long arg)
787 writeback_inodes(&wbc); 787 writeback_inodes(&wbc);
788 if (wbc.nr_to_write > 0) { 788 if (wbc.nr_to_write > 0) {
789 if (wbc.encountered_congestion || wbc.more_io) 789 if (wbc.encountered_congestion || wbc.more_io)
790 congestion_wait(WRITE, HZ/10); 790 congestion_wait(BLK_RW_ASYNC, HZ/10);
791 else 791 else
792 break; /* All the old data is written */ 792 break; /* All the old data is written */
793 } 793 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e0f2cdf9d8b1..d052abbe3063 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -882,7 +882,7 @@ retry_reserve:
882 */ 882 */
883static int rmqueue_bulk(struct zone *zone, unsigned int order, 883static int rmqueue_bulk(struct zone *zone, unsigned int order,
884 unsigned long count, struct list_head *list, 884 unsigned long count, struct list_head *list,
885 int migratetype) 885 int migratetype, int cold)
886{ 886{
887 int i; 887 int i;
888 888
@@ -901,7 +901,10 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
901 * merge IO requests if the physical pages are ordered 901 * merge IO requests if the physical pages are ordered
902 * properly. 902 * properly.
903 */ 903 */
904 list_add(&page->lru, list); 904 if (likely(cold == 0))
905 list_add(&page->lru, list);
906 else
907 list_add_tail(&page->lru, list);
905 set_page_private(page, migratetype); 908 set_page_private(page, migratetype);
906 list = &page->lru; 909 list = &page->lru;
907 } 910 }
@@ -1119,7 +1122,8 @@ again:
1119 local_irq_save(flags); 1122 local_irq_save(flags);
1120 if (!pcp->count) { 1123 if (!pcp->count) {
1121 pcp->count = rmqueue_bulk(zone, 0, 1124 pcp->count = rmqueue_bulk(zone, 0,
1122 pcp->batch, &pcp->list, migratetype); 1125 pcp->batch, &pcp->list,
1126 migratetype, cold);
1123 if (unlikely(!pcp->count)) 1127 if (unlikely(!pcp->count))
1124 goto failed; 1128 goto failed;
1125 } 1129 }
@@ -1138,7 +1142,8 @@ again:
1138 /* Allocate more to the pcp list if necessary */ 1142 /* Allocate more to the pcp list if necessary */
1139 if (unlikely(&page->lru == &pcp->list)) { 1143 if (unlikely(&page->lru == &pcp->list)) {
1140 pcp->count += rmqueue_bulk(zone, 0, 1144 pcp->count += rmqueue_bulk(zone, 0,
1141 pcp->batch, &pcp->list, migratetype); 1145 pcp->batch, &pcp->list,
1146 migratetype, cold);
1142 page = list_entry(pcp->list.next, struct page, lru); 1147 page = list_entry(pcp->list.next, struct page, lru);
1143 } 1148 }
1144 1149
@@ -1666,7 +1671,7 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order,
1666 preferred_zone, migratetype); 1671 preferred_zone, migratetype);
1667 1672
1668 if (!page && gfp_mask & __GFP_NOFAIL) 1673 if (!page && gfp_mask & __GFP_NOFAIL)
1669 congestion_wait(WRITE, HZ/50); 1674 congestion_wait(BLK_RW_ASYNC, HZ/50);
1670 } while (!page && (gfp_mask & __GFP_NOFAIL)); 1675 } while (!page && (gfp_mask & __GFP_NOFAIL));
1671 1676
1672 return page; 1677 return page;
@@ -1740,8 +1745,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
1740 * be using allocators in order of preference for an area that is 1745 * be using allocators in order of preference for an area that is
1741 * too large. 1746 * too large.
1742 */ 1747 */
1743 if (WARN_ON_ONCE(order >= MAX_ORDER)) 1748 if (order >= MAX_ORDER) {
1749 WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN));
1744 return NULL; 1750 return NULL;
1751 }
1745 1752
1746 /* 1753 /*
1747 * GFP_THISNODE (meaning __GFP_THISNODE, __GFP_NORETRY and 1754 * GFP_THISNODE (meaning __GFP_THISNODE, __GFP_NORETRY and
@@ -1789,6 +1796,10 @@ rebalance:
1789 if (p->flags & PF_MEMALLOC) 1796 if (p->flags & PF_MEMALLOC)
1790 goto nopage; 1797 goto nopage;
1791 1798
1799 /* Avoid allocations with no watermarks from looping endlessly */
1800 if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL))
1801 goto nopage;
1802
1792 /* Try direct reclaim and then allocating */ 1803 /* Try direct reclaim and then allocating */
1793 page = __alloc_pages_direct_reclaim(gfp_mask, order, 1804 page = __alloc_pages_direct_reclaim(gfp_mask, order,
1794 zonelist, high_zoneidx, 1805 zonelist, high_zoneidx,
@@ -1831,7 +1842,7 @@ rebalance:
1831 pages_reclaimed += did_some_progress; 1842 pages_reclaimed += did_some_progress;
1832 if (should_alloc_retry(gfp_mask, order, pages_reclaimed)) { 1843 if (should_alloc_retry(gfp_mask, order, pages_reclaimed)) {
1833 /* Wait for some write requests to complete then retry */ 1844 /* Wait for some write requests to complete then retry */
1834 congestion_wait(WRITE, HZ/50); 1845 congestion_wait(BLK_RW_ASYNC, HZ/50);
1835 goto rebalance; 1846 goto rebalance;
1836 } 1847 }
1837 1848
@@ -1983,7 +1994,7 @@ void *alloc_pages_exact(size_t size, gfp_t gfp_mask)
1983 unsigned long alloc_end = addr + (PAGE_SIZE << order); 1994 unsigned long alloc_end = addr + (PAGE_SIZE << order);
1984 unsigned long used = addr + PAGE_ALIGN(size); 1995 unsigned long used = addr + PAGE_ALIGN(size);
1985 1996
1986 split_page(virt_to_page(addr), order); 1997 split_page(virt_to_page((void *)addr), order);
1987 while (used < alloc_end) { 1998 while (used < alloc_end) {
1988 free_page(used); 1999 free_page(used);
1989 used += PAGE_SIZE; 2000 used += PAGE_SIZE;
@@ -4745,8 +4756,10 @@ void *__init alloc_large_system_hash(const char *tablename,
4745 * some pages at the end of hash table which 4756 * some pages at the end of hash table which
4746 * alloc_pages_exact() automatically does 4757 * alloc_pages_exact() automatically does
4747 */ 4758 */
4748 if (get_order(size) < MAX_ORDER) 4759 if (get_order(size) < MAX_ORDER) {
4749 table = alloc_pages_exact(size, GFP_ATOMIC); 4760 table = alloc_pages_exact(size, GFP_ATOMIC);
4761 kmemleak_alloc(table, size, 1, GFP_ATOMIC);
4762 }
4750 } 4763 }
4751 } while (!table && size > PAGE_SIZE && --log2qty); 4764 } while (!table && size > PAGE_SIZE && --log2qty);
4752 4765
@@ -4764,16 +4777,6 @@ void *__init alloc_large_system_hash(const char *tablename,
4764 if (_hash_mask) 4777 if (_hash_mask)
4765 *_hash_mask = (1 << log2qty) - 1; 4778 *_hash_mask = (1 << log2qty) - 1;
4766 4779
4767 /*
4768 * If hashdist is set, the table allocation is done with __vmalloc()
4769 * which invokes the kmemleak_alloc() callback. This function may also
4770 * be called before the slab and kmemleak are initialised when
4771 * kmemleak simply buffers the request to be executed later
4772 * (GFP_ATOMIC flag ignored in this case).
4773 */
4774 if (!hashdist)
4775 kmemleak_alloc(table, size, 1, GFP_ATOMIC);
4776
4777 return table; 4780 return table;
4778} 4781}
4779 4782
diff --git a/mm/slab.c b/mm/slab.c
index e74a16e4ced6..7b5d4deacfcd 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1544,9 +1544,6 @@ void __init kmem_cache_init(void)
1544 } 1544 }
1545 1545
1546 g_cpucache_up = EARLY; 1546 g_cpucache_up = EARLY;
1547
1548 /* Annotate slab for lockdep -- annotate the malloc caches */
1549 init_lock_keys();
1550} 1547}
1551 1548
1552void __init kmem_cache_init_late(void) 1549void __init kmem_cache_init_late(void)
@@ -1563,6 +1560,9 @@ void __init kmem_cache_init_late(void)
1563 /* Done! */ 1560 /* Done! */
1564 g_cpucache_up = FULL; 1561 g_cpucache_up = FULL;
1565 1562
1563 /* Annotate slab for lockdep -- annotate the malloc caches */
1564 init_lock_keys();
1565
1566 /* 1566 /*
1567 * Register a cpu startup notifier callback that initializes 1567 * Register a cpu startup notifier callback that initializes
1568 * cpu_cache_get for all new cpus 1568 * cpu_cache_get for all new cpus
@@ -2547,7 +2547,7 @@ void kmem_cache_destroy(struct kmem_cache *cachep)
2547 } 2547 }
2548 2548
2549 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) 2549 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
2550 synchronize_rcu(); 2550 rcu_barrier();
2551 2551
2552 __kmem_cache_destroy(cachep); 2552 __kmem_cache_destroy(cachep);
2553 mutex_unlock(&cache_chain_mutex); 2553 mutex_unlock(&cache_chain_mutex);
diff --git a/mm/slob.c b/mm/slob.c
index c78742defdc6..9641da3d5e58 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -595,6 +595,8 @@ EXPORT_SYMBOL(kmem_cache_create);
595void kmem_cache_destroy(struct kmem_cache *c) 595void kmem_cache_destroy(struct kmem_cache *c)
596{ 596{
597 kmemleak_free(c); 597 kmemleak_free(c);
598 if (c->flags & SLAB_DESTROY_BY_RCU)
599 rcu_barrier();
598 slob_free(c, sizeof(struct kmem_cache)); 600 slob_free(c, sizeof(struct kmem_cache));
599} 601}
600EXPORT_SYMBOL(kmem_cache_destroy); 602EXPORT_SYMBOL(kmem_cache_destroy);
diff --git a/mm/slub.c b/mm/slub.c
index 819f056b39c6..b9f1491a58a1 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -21,7 +21,6 @@
21#include <linux/kmemcheck.h> 21#include <linux/kmemcheck.h>
22#include <linux/cpu.h> 22#include <linux/cpu.h>
23#include <linux/cpuset.h> 23#include <linux/cpuset.h>
24#include <linux/kmemleak.h>
25#include <linux/mempolicy.h> 24#include <linux/mempolicy.h>
26#include <linux/ctype.h> 25#include <linux/ctype.h>
27#include <linux/debugobjects.h> 26#include <linux/debugobjects.h>
@@ -2595,6 +2594,8 @@ static inline int kmem_cache_close(struct kmem_cache *s)
2595 */ 2594 */
2596void kmem_cache_destroy(struct kmem_cache *s) 2595void kmem_cache_destroy(struct kmem_cache *s)
2597{ 2596{
2597 if (s->flags & SLAB_DESTROY_BY_RCU)
2598 rcu_barrier();
2598 down_write(&slub_lock); 2599 down_write(&slub_lock);
2599 s->refcount--; 2600 s->refcount--;
2600 if (!s->refcount) { 2601 if (!s->refcount) {
@@ -2833,13 +2834,15 @@ EXPORT_SYMBOL(__kmalloc);
2833static void *kmalloc_large_node(size_t size, gfp_t flags, int node) 2834static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
2834{ 2835{
2835 struct page *page; 2836 struct page *page;
2837 void *ptr = NULL;
2836 2838
2837 flags |= __GFP_COMP | __GFP_NOTRACK; 2839 flags |= __GFP_COMP | __GFP_NOTRACK;
2838 page = alloc_pages_node(node, flags, get_order(size)); 2840 page = alloc_pages_node(node, flags, get_order(size));
2839 if (page) 2841 if (page)
2840 return page_address(page); 2842 ptr = page_address(page);
2841 else 2843
2842 return NULL; 2844 kmemleak_alloc(ptr, size, 1, flags);
2845 return ptr;
2843} 2846}
2844 2847
2845#ifdef CONFIG_NUMA 2848#ifdef CONFIG_NUMA
@@ -2924,6 +2927,7 @@ void kfree(const void *x)
2924 page = virt_to_head_page(x); 2927 page = virt_to_head_page(x);
2925 if (unlikely(!PageSlab(page))) { 2928 if (unlikely(!PageSlab(page))) {
2926 BUG_ON(!PageCompound(page)); 2929 BUG_ON(!PageCompound(page));
2930 kmemleak_free(x);
2927 put_page(page); 2931 put_page(page);
2928 return; 2932 return;
2929 } 2933 }
diff --git a/mm/swapfile.c b/mm/swapfile.c
index d1ade1a48ee7..8ffdc0d23c53 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -753,7 +753,7 @@ int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
753 753
754 if (!bdev) { 754 if (!bdev) {
755 if (bdev_p) 755 if (bdev_p)
756 *bdev_p = bdget(sis->bdev->bd_dev); 756 *bdev_p = bdgrab(sis->bdev);
757 757
758 spin_unlock(&swap_lock); 758 spin_unlock(&swap_lock);
759 return i; 759 return i;
@@ -765,7 +765,7 @@ int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
765 struct swap_extent, list); 765 struct swap_extent, list);
766 if (se->start_block == offset) { 766 if (se->start_block == offset) {
767 if (bdev_p) 767 if (bdev_p)
768 *bdev_p = bdget(sis->bdev->bd_dev); 768 *bdev_p = bdgrab(sis->bdev);
769 769
770 spin_unlock(&swap_lock); 770 spin_unlock(&swap_lock);
771 bdput(bdev); 771 bdput(bdev);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 54155268dfca..dea7abd31098 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1104,7 +1104,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
1104 */ 1104 */
1105 if (nr_freed < nr_taken && !current_is_kswapd() && 1105 if (nr_freed < nr_taken && !current_is_kswapd() &&
1106 lumpy_reclaim) { 1106 lumpy_reclaim) {
1107 congestion_wait(WRITE, HZ/10); 1107 congestion_wait(BLK_RW_ASYNC, HZ/10);
1108 1108
1109 /* 1109 /*
1110 * The attempt at page out may have made some 1110 * The attempt at page out may have made some
@@ -1721,7 +1721,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
1721 1721
1722 /* Take a nap, wait for some writeback to complete */ 1722 /* Take a nap, wait for some writeback to complete */
1723 if (sc->nr_scanned && priority < DEF_PRIORITY - 2) 1723 if (sc->nr_scanned && priority < DEF_PRIORITY - 2)
1724 congestion_wait(WRITE, HZ/10); 1724 congestion_wait(BLK_RW_ASYNC, HZ/10);
1725 } 1725 }
1726 /* top priority shrink_zones still had more to do? don't OOM, then */ 1726 /* top priority shrink_zones still had more to do? don't OOM, then */
1727 if (!sc->all_unreclaimable && scanning_global_lru(sc)) 1727 if (!sc->all_unreclaimable && scanning_global_lru(sc))
@@ -1960,7 +1960,7 @@ loop_again:
1960 * another pass across the zones. 1960 * another pass across the zones.
1961 */ 1961 */
1962 if (total_scanned && priority < DEF_PRIORITY - 2) 1962 if (total_scanned && priority < DEF_PRIORITY - 2)
1963 congestion_wait(WRITE, HZ/10); 1963 congestion_wait(BLK_RW_ASYNC, HZ/10);
1964 1964
1965 /* 1965 /*
1966 * We do this so kswapd doesn't build up large priorities for 1966 * We do this so kswapd doesn't build up large priorities for
@@ -2233,7 +2233,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
2233 goto out; 2233 goto out;
2234 2234
2235 if (sc.nr_scanned && prio < DEF_PRIORITY - 2) 2235 if (sc.nr_scanned && prio < DEF_PRIORITY - 2)
2236 congestion_wait(WRITE, HZ / 10); 2236 congestion_wait(BLK_RW_ASYNC, HZ / 10);
2237 } 2237 }
2238 } 2238 }
2239 2239