aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/bootmem.c6
-rw-r--r--mm/hugetlb.c2
-rw-r--r--mm/kmemleak.c240
-rw-r--r--mm/memcontrol.c23
-rw-r--r--mm/memory.c11
-rw-r--r--mm/page_alloc.c35
-rw-r--r--mm/slub.c10
-rw-r--r--mm/swapfile.c4
8 files changed, 225 insertions, 106 deletions
diff --git a/mm/bootmem.c b/mm/bootmem.c
index d2a9ce952768..701740c9e81b 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -12,6 +12,7 @@
12#include <linux/pfn.h> 12#include <linux/pfn.h>
13#include <linux/bootmem.h> 13#include <linux/bootmem.h>
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/kmemleak.h>
15 16
16#include <asm/bug.h> 17#include <asm/bug.h>
17#include <asm/io.h> 18#include <asm/io.h>
@@ -335,6 +336,8 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
335{ 336{
336 unsigned long start, end; 337 unsigned long start, end;
337 338
339 kmemleak_free_part(__va(physaddr), size);
340
338 start = PFN_UP(physaddr); 341 start = PFN_UP(physaddr);
339 end = PFN_DOWN(physaddr + size); 342 end = PFN_DOWN(physaddr + size);
340 343
@@ -354,6 +357,8 @@ void __init free_bootmem(unsigned long addr, unsigned long size)
354{ 357{
355 unsigned long start, end; 358 unsigned long start, end;
356 359
360 kmemleak_free_part(__va(addr), size);
361
357 start = PFN_UP(addr); 362 start = PFN_UP(addr);
358 end = PFN_DOWN(addr + size); 363 end = PFN_DOWN(addr + size);
359 364
@@ -516,6 +521,7 @@ find_block:
516 region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) + 521 region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) +
517 start_off); 522 start_off);
518 memset(region, 0, size); 523 memset(region, 0, size);
524 kmemleak_alloc(region, size, 1, 0);
519 return region; 525 return region;
520 } 526 }
521 527
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index d0351e31f474..cafdcee154e8 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2370,7 +2370,7 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
2370 long chg = region_truncate(&inode->i_mapping->private_list, offset); 2370 long chg = region_truncate(&inode->i_mapping->private_list, offset);
2371 2371
2372 spin_lock(&inode->i_lock); 2372 spin_lock(&inode->i_lock);
2373 inode->i_blocks -= blocks_per_huge_page(h); 2373 inode->i_blocks -= (blocks_per_huge_page(h) * freed);
2374 spin_unlock(&inode->i_lock); 2374 spin_unlock(&inode->i_lock);
2375 2375
2376 hugetlb_put_quota(inode->i_mapping, (chg - freed)); 2376 hugetlb_put_quota(inode->i_mapping, (chg - freed));
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index e766e1da09d2..487267310a84 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -103,10 +103,10 @@
103 * Kmemleak configuration and common defines. 103 * Kmemleak configuration and common defines.
104 */ 104 */
105#define MAX_TRACE 16 /* stack trace length */ 105#define MAX_TRACE 16 /* stack trace length */
106#define REPORTS_NR 50 /* maximum number of reported leaks */
107#define MSECS_MIN_AGE 5000 /* minimum object age for reporting */ 106#define MSECS_MIN_AGE 5000 /* minimum object age for reporting */
108#define SECS_FIRST_SCAN 60 /* delay before the first scan */ 107#define SECS_FIRST_SCAN 60 /* delay before the first scan */
109#define SECS_SCAN_WAIT 600 /* subsequent auto scanning delay */ 108#define SECS_SCAN_WAIT 600 /* subsequent auto scanning delay */
109#define GRAY_LIST_PASSES 25 /* maximum number of gray list scans */
110 110
111#define BYTES_PER_POINTER sizeof(void *) 111#define BYTES_PER_POINTER sizeof(void *)
112 112
@@ -158,6 +158,8 @@ struct kmemleak_object {
158#define OBJECT_REPORTED (1 << 1) 158#define OBJECT_REPORTED (1 << 1)
159/* flag set to not scan the object */ 159/* flag set to not scan the object */
160#define OBJECT_NO_SCAN (1 << 2) 160#define OBJECT_NO_SCAN (1 << 2)
161/* flag set on newly allocated objects */
162#define OBJECT_NEW (1 << 3)
161 163
162/* the list of all allocated objects */ 164/* the list of all allocated objects */
163static LIST_HEAD(object_list); 165static LIST_HEAD(object_list);
@@ -196,9 +198,6 @@ static int kmemleak_stack_scan = 1;
196/* protects the memory scanning, parameters and debug/kmemleak file access */ 198/* protects the memory scanning, parameters and debug/kmemleak file access */
197static DEFINE_MUTEX(scan_mutex); 199static DEFINE_MUTEX(scan_mutex);
198 200
199/* number of leaks reported (for limitation purposes) */
200static int reported_leaks;
201
202/* 201/*
203 * Early object allocation/freeing logging. Kmemleak is initialized after the 202 * Early object allocation/freeing logging. Kmemleak is initialized after the
204 * kernel allocator. However, both the kernel allocator and kmemleak may 203 * kernel allocator. However, both the kernel allocator and kmemleak may
@@ -211,6 +210,7 @@ static int reported_leaks;
211enum { 210enum {
212 KMEMLEAK_ALLOC, 211 KMEMLEAK_ALLOC,
213 KMEMLEAK_FREE, 212 KMEMLEAK_FREE,
213 KMEMLEAK_FREE_PART,
214 KMEMLEAK_NOT_LEAK, 214 KMEMLEAK_NOT_LEAK,
215 KMEMLEAK_IGNORE, 215 KMEMLEAK_IGNORE,
216 KMEMLEAK_SCAN_AREA, 216 KMEMLEAK_SCAN_AREA,
@@ -274,6 +274,11 @@ static int color_gray(const struct kmemleak_object *object)
274 return object->min_count != -1 && object->count >= object->min_count; 274 return object->min_count != -1 && object->count >= object->min_count;
275} 275}
276 276
277static int color_black(const struct kmemleak_object *object)
278{
279 return object->min_count == -1;
280}
281
277/* 282/*
278 * Objects are considered unreferenced only if their color is white, they have 283 * Objects are considered unreferenced only if their color is white, they have
279 * not be deleted and have a minimum age to avoid false positives caused by 284 * not be deleted and have a minimum age to avoid false positives caused by
@@ -451,7 +456,7 @@ static void create_object(unsigned long ptr, size_t size, int min_count,
451 INIT_HLIST_HEAD(&object->area_list); 456 INIT_HLIST_HEAD(&object->area_list);
452 spin_lock_init(&object->lock); 457 spin_lock_init(&object->lock);
453 atomic_set(&object->use_count, 1); 458 atomic_set(&object->use_count, 1);
454 object->flags = OBJECT_ALLOCATED; 459 object->flags = OBJECT_ALLOCATED | OBJECT_NEW;
455 object->pointer = ptr; 460 object->pointer = ptr;
456 object->size = size; 461 object->size = size;
457 object->min_count = min_count; 462 object->min_count = min_count;
@@ -519,27 +524,17 @@ out:
519 * Remove the metadata (struct kmemleak_object) for a memory block from the 524 * Remove the metadata (struct kmemleak_object) for a memory block from the
520 * object_list and object_tree_root and decrement its use_count. 525 * object_list and object_tree_root and decrement its use_count.
521 */ 526 */
522static void delete_object(unsigned long ptr) 527static void __delete_object(struct kmemleak_object *object)
523{ 528{
524 unsigned long flags; 529 unsigned long flags;
525 struct kmemleak_object *object;
526 530
527 write_lock_irqsave(&kmemleak_lock, flags); 531 write_lock_irqsave(&kmemleak_lock, flags);
528 object = lookup_object(ptr, 0);
529 if (!object) {
530#ifdef DEBUG
531 kmemleak_warn("Freeing unknown object at 0x%08lx\n",
532 ptr);
533#endif
534 write_unlock_irqrestore(&kmemleak_lock, flags);
535 return;
536 }
537 prio_tree_remove(&object_tree_root, &object->tree_node); 532 prio_tree_remove(&object_tree_root, &object->tree_node);
538 list_del_rcu(&object->object_list); 533 list_del_rcu(&object->object_list);
539 write_unlock_irqrestore(&kmemleak_lock, flags); 534 write_unlock_irqrestore(&kmemleak_lock, flags);
540 535
541 WARN_ON(!(object->flags & OBJECT_ALLOCATED)); 536 WARN_ON(!(object->flags & OBJECT_ALLOCATED));
542 WARN_ON(atomic_read(&object->use_count) < 1); 537 WARN_ON(atomic_read(&object->use_count) < 2);
543 538
544 /* 539 /*
545 * Locking here also ensures that the corresponding memory block 540 * Locking here also ensures that the corresponding memory block
@@ -552,6 +547,64 @@ static void delete_object(unsigned long ptr)
552} 547}
553 548
554/* 549/*
550 * Look up the metadata (struct kmemleak_object) corresponding to ptr and
551 * delete it.
552 */
553static void delete_object_full(unsigned long ptr)
554{
555 struct kmemleak_object *object;
556
557 object = find_and_get_object(ptr, 0);
558 if (!object) {
559#ifdef DEBUG
560 kmemleak_warn("Freeing unknown object at 0x%08lx\n",
561 ptr);
562#endif
563 return;
564 }
565 __delete_object(object);
566 put_object(object);
567}
568
569/*
570 * Look up the metadata (struct kmemleak_object) corresponding to ptr and
571 * delete it. If the memory block is partially freed, the function may create
572 * additional metadata for the remaining parts of the block.
573 */
574static void delete_object_part(unsigned long ptr, size_t size)
575{
576 struct kmemleak_object *object;
577 unsigned long start, end;
578
579 object = find_and_get_object(ptr, 1);
580 if (!object) {
581#ifdef DEBUG
582 kmemleak_warn("Partially freeing unknown object at 0x%08lx "
583 "(size %zu)\n", ptr, size);
584#endif
585 return;
586 }
587 __delete_object(object);
588
589 /*
590 * Create one or two objects that may result from the memory block
591 * split. Note that partial freeing is only done by free_bootmem() and
592 * this happens before kmemleak_init() is called. The path below is
593 * only executed during early log recording in kmemleak_init(), so
594 * GFP_KERNEL is enough.
595 */
596 start = object->pointer;
597 end = object->pointer + object->size;
598 if (ptr > start)
599 create_object(start, ptr - start, object->min_count,
600 GFP_KERNEL);
601 if (ptr + size < end)
602 create_object(ptr + size, end - ptr - size, object->min_count,
603 GFP_KERNEL);
604
605 put_object(object);
606}
607/*
555 * Make a object permanently as gray-colored so that it can no longer be 608 * Make a object permanently as gray-colored so that it can no longer be
556 * reported as a leak. This is used in general to mark a false positive. 609 * reported as a leak. This is used in general to mark a false positive.
557 */ 610 */
@@ -715,13 +768,28 @@ void kmemleak_free(const void *ptr)
715 pr_debug("%s(0x%p)\n", __func__, ptr); 768 pr_debug("%s(0x%p)\n", __func__, ptr);
716 769
717 if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) 770 if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
718 delete_object((unsigned long)ptr); 771 delete_object_full((unsigned long)ptr);
719 else if (atomic_read(&kmemleak_early_log)) 772 else if (atomic_read(&kmemleak_early_log))
720 log_early(KMEMLEAK_FREE, ptr, 0, 0, 0, 0); 773 log_early(KMEMLEAK_FREE, ptr, 0, 0, 0, 0);
721} 774}
722EXPORT_SYMBOL_GPL(kmemleak_free); 775EXPORT_SYMBOL_GPL(kmemleak_free);
723 776
724/* 777/*
778 * Partial memory freeing function callback. This function is usually called
779 * from bootmem allocator when (part of) a memory block is freed.
780 */
781void kmemleak_free_part(const void *ptr, size_t size)
782{
783 pr_debug("%s(0x%p)\n", __func__, ptr);
784
785 if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
786 delete_object_part((unsigned long)ptr, size);
787 else if (atomic_read(&kmemleak_early_log))
788 log_early(KMEMLEAK_FREE_PART, ptr, size, 0, 0, 0);
789}
790EXPORT_SYMBOL_GPL(kmemleak_free_part);
791
792/*
725 * Mark an already allocated memory block as a false positive. This will cause 793 * Mark an already allocated memory block as a false positive. This will cause
726 * the block to no longer be reported as leak and always be scanned. 794 * the block to no longer be reported as leak and always be scanned.
727 */ 795 */
@@ -807,7 +875,7 @@ static int scan_should_stop(void)
807 * found to the gray list. 875 * found to the gray list.
808 */ 876 */
809static void scan_block(void *_start, void *_end, 877static void scan_block(void *_start, void *_end,
810 struct kmemleak_object *scanned) 878 struct kmemleak_object *scanned, int allow_resched)
811{ 879{
812 unsigned long *ptr; 880 unsigned long *ptr;
813 unsigned long *start = PTR_ALIGN(_start, BYTES_PER_POINTER); 881 unsigned long *start = PTR_ALIGN(_start, BYTES_PER_POINTER);
@@ -818,6 +886,8 @@ static void scan_block(void *_start, void *_end,
818 unsigned long pointer = *ptr; 886 unsigned long pointer = *ptr;
819 struct kmemleak_object *object; 887 struct kmemleak_object *object;
820 888
889 if (allow_resched)
890 cond_resched();
821 if (scan_should_stop()) 891 if (scan_should_stop())
822 break; 892 break;
823 893
@@ -881,12 +951,12 @@ static void scan_object(struct kmemleak_object *object)
881 goto out; 951 goto out;
882 if (hlist_empty(&object->area_list)) 952 if (hlist_empty(&object->area_list))
883 scan_block((void *)object->pointer, 953 scan_block((void *)object->pointer,
884 (void *)(object->pointer + object->size), object); 954 (void *)(object->pointer + object->size), object, 0);
885 else 955 else
886 hlist_for_each_entry(area, elem, &object->area_list, node) 956 hlist_for_each_entry(area, elem, &object->area_list, node)
887 scan_block((void *)(object->pointer + area->offset), 957 scan_block((void *)(object->pointer + area->offset),
888 (void *)(object->pointer + area->offset 958 (void *)(object->pointer + area->offset
889 + area->length), object); 959 + area->length), object, 0);
890out: 960out:
891 spin_unlock_irqrestore(&object->lock, flags); 961 spin_unlock_irqrestore(&object->lock, flags);
892} 962}
@@ -903,6 +973,7 @@ static void kmemleak_scan(void)
903 struct task_struct *task; 973 struct task_struct *task;
904 int i; 974 int i;
905 int new_leaks = 0; 975 int new_leaks = 0;
976 int gray_list_pass = 0;
906 977
907 jiffies_last_scan = jiffies; 978 jiffies_last_scan = jiffies;
908 979
@@ -923,6 +994,7 @@ static void kmemleak_scan(void)
923#endif 994#endif
924 /* reset the reference count (whiten the object) */ 995 /* reset the reference count (whiten the object) */
925 object->count = 0; 996 object->count = 0;
997 object->flags &= ~OBJECT_NEW;
926 if (color_gray(object) && get_object(object)) 998 if (color_gray(object) && get_object(object))
927 list_add_tail(&object->gray_list, &gray_list); 999 list_add_tail(&object->gray_list, &gray_list);
928 1000
@@ -931,14 +1003,14 @@ static void kmemleak_scan(void)
931 rcu_read_unlock(); 1003 rcu_read_unlock();
932 1004
933 /* data/bss scanning */ 1005 /* data/bss scanning */
934 scan_block(_sdata, _edata, NULL); 1006 scan_block(_sdata, _edata, NULL, 1);
935 scan_block(__bss_start, __bss_stop, NULL); 1007 scan_block(__bss_start, __bss_stop, NULL, 1);
936 1008
937#ifdef CONFIG_SMP 1009#ifdef CONFIG_SMP
938 /* per-cpu sections scanning */ 1010 /* per-cpu sections scanning */
939 for_each_possible_cpu(i) 1011 for_each_possible_cpu(i)
940 scan_block(__per_cpu_start + per_cpu_offset(i), 1012 scan_block(__per_cpu_start + per_cpu_offset(i),
941 __per_cpu_end + per_cpu_offset(i), NULL); 1013 __per_cpu_end + per_cpu_offset(i), NULL, 1);
942#endif 1014#endif
943 1015
944 /* 1016 /*
@@ -960,7 +1032,7 @@ static void kmemleak_scan(void)
960 /* only scan if page is in use */ 1032 /* only scan if page is in use */
961 if (page_count(page) == 0) 1033 if (page_count(page) == 0)
962 continue; 1034 continue;
963 scan_block(page, page + 1, NULL); 1035 scan_block(page, page + 1, NULL, 1);
964 } 1036 }
965 } 1037 }
966 1038
@@ -972,7 +1044,8 @@ static void kmemleak_scan(void)
972 read_lock(&tasklist_lock); 1044 read_lock(&tasklist_lock);
973 for_each_process(task) 1045 for_each_process(task)
974 scan_block(task_stack_page(task), 1046 scan_block(task_stack_page(task),
975 task_stack_page(task) + THREAD_SIZE, NULL); 1047 task_stack_page(task) + THREAD_SIZE,
1048 NULL, 0);
976 read_unlock(&tasklist_lock); 1049 read_unlock(&tasklist_lock);
977 } 1050 }
978 1051
@@ -984,6 +1057,7 @@ static void kmemleak_scan(void)
984 * kmemleak objects cannot be freed from outside the loop because their 1057 * kmemleak objects cannot be freed from outside the loop because their
985 * use_count was increased. 1058 * use_count was increased.
986 */ 1059 */
1060repeat:
987 object = list_entry(gray_list.next, typeof(*object), gray_list); 1061 object = list_entry(gray_list.next, typeof(*object), gray_list);
988 while (&object->gray_list != &gray_list) { 1062 while (&object->gray_list != &gray_list) {
989 cond_resched(); 1063 cond_resched();
@@ -1001,12 +1075,38 @@ static void kmemleak_scan(void)
1001 1075
1002 object = tmp; 1076 object = tmp;
1003 } 1077 }
1078
1079 if (scan_should_stop() || ++gray_list_pass >= GRAY_LIST_PASSES)
1080 goto scan_end;
1081
1082 /*
1083 * Check for new objects allocated during this scanning and add them
1084 * to the gray list.
1085 */
1086 rcu_read_lock();
1087 list_for_each_entry_rcu(object, &object_list, object_list) {
1088 spin_lock_irqsave(&object->lock, flags);
1089 if ((object->flags & OBJECT_NEW) && !color_black(object) &&
1090 get_object(object)) {
1091 object->flags &= ~OBJECT_NEW;
1092 list_add_tail(&object->gray_list, &gray_list);
1093 }
1094 spin_unlock_irqrestore(&object->lock, flags);
1095 }
1096 rcu_read_unlock();
1097
1098 if (!list_empty(&gray_list))
1099 goto repeat;
1100
1101scan_end:
1004 WARN_ON(!list_empty(&gray_list)); 1102 WARN_ON(!list_empty(&gray_list));
1005 1103
1006 /* 1104 /*
1007 * If scanning was stopped do not report any new unreferenced objects. 1105 * If scanning was stopped or new objects were being allocated at a
1106 * higher rate than gray list scanning, do not report any new
1107 * unreferenced objects.
1008 */ 1108 */
1009 if (scan_should_stop()) 1109 if (scan_should_stop() || gray_list_pass >= GRAY_LIST_PASSES)
1010 return; 1110 return;
1011 1111
1012 /* 1112 /*
@@ -1039,6 +1139,7 @@ static int kmemleak_scan_thread(void *arg)
1039 static int first_run = 1; 1139 static int first_run = 1;
1040 1140
1041 pr_info("Automatic memory scanning thread started\n"); 1141 pr_info("Automatic memory scanning thread started\n");
1142 set_user_nice(current, 10);
1042 1143
1043 /* 1144 /*
1044 * Wait before the first scan to allow the system to fully initialize. 1145 * Wait before the first scan to allow the system to fully initialize.
@@ -1101,11 +1202,11 @@ static void *kmemleak_seq_start(struct seq_file *seq, loff_t *pos)
1101{ 1202{
1102 struct kmemleak_object *object; 1203 struct kmemleak_object *object;
1103 loff_t n = *pos; 1204 loff_t n = *pos;
1205 int err;
1104 1206
1105 if (!n) 1207 err = mutex_lock_interruptible(&scan_mutex);
1106 reported_leaks = 0; 1208 if (err < 0)
1107 if (reported_leaks >= REPORTS_NR) 1209 return ERR_PTR(err);
1108 return NULL;
1109 1210
1110 rcu_read_lock(); 1211 rcu_read_lock();
1111 list_for_each_entry_rcu(object, &object_list, object_list) { 1212 list_for_each_entry_rcu(object, &object_list, object_list) {
@@ -1116,7 +1217,6 @@ static void *kmemleak_seq_start(struct seq_file *seq, loff_t *pos)
1116 } 1217 }
1117 object = NULL; 1218 object = NULL;
1118out: 1219out:
1119 rcu_read_unlock();
1120 return object; 1220 return object;
1121} 1221}
1122 1222
@@ -1131,17 +1231,13 @@ static void *kmemleak_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1131 struct list_head *n = &prev_obj->object_list; 1231 struct list_head *n = &prev_obj->object_list;
1132 1232
1133 ++(*pos); 1233 ++(*pos);
1134 if (reported_leaks >= REPORTS_NR)
1135 goto out;
1136 1234
1137 rcu_read_lock();
1138 list_for_each_continue_rcu(n, &object_list) { 1235 list_for_each_continue_rcu(n, &object_list) {
1139 next_obj = list_entry(n, struct kmemleak_object, object_list); 1236 next_obj = list_entry(n, struct kmemleak_object, object_list);
1140 if (get_object(next_obj)) 1237 if (get_object(next_obj))
1141 break; 1238 break;
1142 } 1239 }
1143 rcu_read_unlock(); 1240
1144out:
1145 put_object(prev_obj); 1241 put_object(prev_obj);
1146 return next_obj; 1242 return next_obj;
1147} 1243}
@@ -1151,8 +1247,16 @@ out:
1151 */ 1247 */
1152static void kmemleak_seq_stop(struct seq_file *seq, void *v) 1248static void kmemleak_seq_stop(struct seq_file *seq, void *v)
1153{ 1249{
1154 if (v) 1250 if (!IS_ERR(v)) {
1155 put_object(v); 1251 /*
1252 * kmemleak_seq_start may return ERR_PTR if the scan_mutex
1253 * waiting was interrupted, so only release it if !IS_ERR.
1254 */
1255 rcu_read_unlock();
1256 mutex_unlock(&scan_mutex);
1257 if (v)
1258 put_object(v);
1259 }
1156} 1260}
1157 1261
1158/* 1262/*
@@ -1164,10 +1268,8 @@ static int kmemleak_seq_show(struct seq_file *seq, void *v)
1164 unsigned long flags; 1268 unsigned long flags;
1165 1269
1166 spin_lock_irqsave(&object->lock, flags); 1270 spin_lock_irqsave(&object->lock, flags);
1167 if ((object->flags & OBJECT_REPORTED) && unreferenced_object(object)) { 1271 if ((object->flags & OBJECT_REPORTED) && unreferenced_object(object))
1168 print_unreferenced(seq, object); 1272 print_unreferenced(seq, object);
1169 reported_leaks++;
1170 }
1171 spin_unlock_irqrestore(&object->lock, flags); 1273 spin_unlock_irqrestore(&object->lock, flags);
1172 return 0; 1274 return 0;
1173} 1275}
@@ -1181,36 +1283,15 @@ static const struct seq_operations kmemleak_seq_ops = {
1181 1283
1182static int kmemleak_open(struct inode *inode, struct file *file) 1284static int kmemleak_open(struct inode *inode, struct file *file)
1183{ 1285{
1184 int ret = 0;
1185
1186 if (!atomic_read(&kmemleak_enabled)) 1286 if (!atomic_read(&kmemleak_enabled))
1187 return -EBUSY; 1287 return -EBUSY;
1188 1288
1189 ret = mutex_lock_interruptible(&scan_mutex); 1289 return seq_open(file, &kmemleak_seq_ops);
1190 if (ret < 0)
1191 goto out;
1192 if (file->f_mode & FMODE_READ) {
1193 ret = seq_open(file, &kmemleak_seq_ops);
1194 if (ret < 0)
1195 goto scan_unlock;
1196 }
1197 return ret;
1198
1199scan_unlock:
1200 mutex_unlock(&scan_mutex);
1201out:
1202 return ret;
1203} 1290}
1204 1291
1205static int kmemleak_release(struct inode *inode, struct file *file) 1292static int kmemleak_release(struct inode *inode, struct file *file)
1206{ 1293{
1207 int ret = 0; 1294 return seq_release(inode, file);
1208
1209 if (file->f_mode & FMODE_READ)
1210 seq_release(inode, file);
1211 mutex_unlock(&scan_mutex);
1212
1213 return ret;
1214} 1295}
1215 1296
1216/* 1297/*
@@ -1230,15 +1311,17 @@ static ssize_t kmemleak_write(struct file *file, const char __user *user_buf,
1230{ 1311{
1231 char buf[64]; 1312 char buf[64];
1232 int buf_size; 1313 int buf_size;
1233 1314 int ret;
1234 if (!atomic_read(&kmemleak_enabled))
1235 return -EBUSY;
1236 1315
1237 buf_size = min(size, (sizeof(buf) - 1)); 1316 buf_size = min(size, (sizeof(buf) - 1));
1238 if (strncpy_from_user(buf, user_buf, buf_size) < 0) 1317 if (strncpy_from_user(buf, user_buf, buf_size) < 0)
1239 return -EFAULT; 1318 return -EFAULT;
1240 buf[buf_size] = 0; 1319 buf[buf_size] = 0;
1241 1320
1321 ret = mutex_lock_interruptible(&scan_mutex);
1322 if (ret < 0)
1323 return ret;
1324
1242 if (strncmp(buf, "off", 3) == 0) 1325 if (strncmp(buf, "off", 3) == 0)
1243 kmemleak_disable(); 1326 kmemleak_disable();
1244 else if (strncmp(buf, "stack=on", 8) == 0) 1327 else if (strncmp(buf, "stack=on", 8) == 0)
@@ -1251,11 +1334,10 @@ static ssize_t kmemleak_write(struct file *file, const char __user *user_buf,
1251 stop_scan_thread(); 1334 stop_scan_thread();
1252 else if (strncmp(buf, "scan=", 5) == 0) { 1335 else if (strncmp(buf, "scan=", 5) == 0) {
1253 unsigned long secs; 1336 unsigned long secs;
1254 int err;
1255 1337
1256 err = strict_strtoul(buf + 5, 0, &secs); 1338 ret = strict_strtoul(buf + 5, 0, &secs);
1257 if (err < 0) 1339 if (ret < 0)
1258 return err; 1340 goto out;
1259 stop_scan_thread(); 1341 stop_scan_thread();
1260 if (secs) { 1342 if (secs) {
1261 jiffies_scan_wait = msecs_to_jiffies(secs * 1000); 1343 jiffies_scan_wait = msecs_to_jiffies(secs * 1000);
@@ -1264,7 +1346,12 @@ static ssize_t kmemleak_write(struct file *file, const char __user *user_buf,
1264 } else if (strncmp(buf, "scan", 4) == 0) 1346 } else if (strncmp(buf, "scan", 4) == 0)
1265 kmemleak_scan(); 1347 kmemleak_scan();
1266 else 1348 else
1267 return -EINVAL; 1349 ret = -EINVAL;
1350
1351out:
1352 mutex_unlock(&scan_mutex);
1353 if (ret < 0)
1354 return ret;
1268 1355
1269 /* ignore the rest of the buffer, only one command at a time */ 1356 /* ignore the rest of the buffer, only one command at a time */
1270 *ppos += size; 1357 *ppos += size;
@@ -1293,7 +1380,7 @@ static int kmemleak_cleanup_thread(void *arg)
1293 1380
1294 rcu_read_lock(); 1381 rcu_read_lock();
1295 list_for_each_entry_rcu(object, &object_list, object_list) 1382 list_for_each_entry_rcu(object, &object_list, object_list)
1296 delete_object(object->pointer); 1383 delete_object_full(object->pointer);
1297 rcu_read_unlock(); 1384 rcu_read_unlock();
1298 mutex_unlock(&scan_mutex); 1385 mutex_unlock(&scan_mutex);
1299 1386
@@ -1388,6 +1475,9 @@ void __init kmemleak_init(void)
1388 case KMEMLEAK_FREE: 1475 case KMEMLEAK_FREE:
1389 kmemleak_free(log->ptr); 1476 kmemleak_free(log->ptr);
1390 break; 1477 break;
1478 case KMEMLEAK_FREE_PART:
1479 kmemleak_free_part(log->ptr, log->size);
1480 break;
1391 case KMEMLEAK_NOT_LEAK: 1481 case KMEMLEAK_NOT_LEAK:
1392 kmemleak_not_leak(log->ptr); 1482 kmemleak_not_leak(log->ptr);
1393 break; 1483 break;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e717964cb5a0..fd4529d86de5 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1207,6 +1207,12 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
1207 ret = 0; 1207 ret = 0;
1208out: 1208out:
1209 unlock_page_cgroup(pc); 1209 unlock_page_cgroup(pc);
1210 /*
1211 * We charges against "to" which may not have any tasks. Then, "to"
1212 * can be under rmdir(). But in current implementation, caller of
1213 * this function is just force_empty() and it's garanteed that
1214 * "to" is never removed. So, we don't check rmdir status here.
1215 */
1210 return ret; 1216 return ret;
1211} 1217}
1212 1218
@@ -1428,6 +1434,7 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
1428 return; 1434 return;
1429 if (!ptr) 1435 if (!ptr)
1430 return; 1436 return;
1437 cgroup_exclude_rmdir(&ptr->css);
1431 pc = lookup_page_cgroup(page); 1438 pc = lookup_page_cgroup(page);
1432 mem_cgroup_lru_del_before_commit_swapcache(page); 1439 mem_cgroup_lru_del_before_commit_swapcache(page);
1433 __mem_cgroup_commit_charge(ptr, pc, ctype); 1440 __mem_cgroup_commit_charge(ptr, pc, ctype);
@@ -1457,8 +1464,12 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
1457 } 1464 }
1458 rcu_read_unlock(); 1465 rcu_read_unlock();
1459 } 1466 }
1460 /* add this page(page_cgroup) to the LRU we want. */ 1467 /*
1461 1468 * At swapin, we may charge account against cgroup which has no tasks.
1469 * So, rmdir()->pre_destroy() can be called while we do this charge.
1470 * In that case, we need to call pre_destroy() again. check it here.
1471 */
1472 cgroup_release_and_wakeup_rmdir(&ptr->css);
1462} 1473}
1463 1474
1464void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr) 1475void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr)
@@ -1664,7 +1675,7 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem,
1664 1675
1665 if (!mem) 1676 if (!mem)
1666 return; 1677 return;
1667 1678 cgroup_exclude_rmdir(&mem->css);
1668 /* at migration success, oldpage->mapping is NULL. */ 1679 /* at migration success, oldpage->mapping is NULL. */
1669 if (oldpage->mapping) { 1680 if (oldpage->mapping) {
1670 target = oldpage; 1681 target = oldpage;
@@ -1704,6 +1715,12 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem,
1704 */ 1715 */
1705 if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED) 1716 if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
1706 mem_cgroup_uncharge_page(target); 1717 mem_cgroup_uncharge_page(target);
1718 /*
1719 * At migration, we may charge account against cgroup which has no tasks
1720 * So, rmdir()->pre_destroy() can be called while we do this charge.
1721 * In that case, we need to call pre_destroy() again. check it here.
1722 */
1723 cgroup_release_and_wakeup_rmdir(&mem->css);
1707} 1724}
1708 1725
1709/* 1726/*
diff --git a/mm/memory.c b/mm/memory.c
index 65216194eb8d..aede2ce3aba4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -135,11 +135,12 @@ void pmd_clear_bad(pmd_t *pmd)
135 * Note: this doesn't free the actual pages themselves. That 135 * Note: this doesn't free the actual pages themselves. That
136 * has been handled earlier when unmapping all the memory regions. 136 * has been handled earlier when unmapping all the memory regions.
137 */ 137 */
138static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd) 138static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
139 unsigned long addr)
139{ 140{
140 pgtable_t token = pmd_pgtable(*pmd); 141 pgtable_t token = pmd_pgtable(*pmd);
141 pmd_clear(pmd); 142 pmd_clear(pmd);
142 pte_free_tlb(tlb, token); 143 pte_free_tlb(tlb, token, addr);
143 tlb->mm->nr_ptes--; 144 tlb->mm->nr_ptes--;
144} 145}
145 146
@@ -157,7 +158,7 @@ static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
157 next = pmd_addr_end(addr, end); 158 next = pmd_addr_end(addr, end);
158 if (pmd_none_or_clear_bad(pmd)) 159 if (pmd_none_or_clear_bad(pmd))
159 continue; 160 continue;
160 free_pte_range(tlb, pmd); 161 free_pte_range(tlb, pmd, addr);
161 } while (pmd++, addr = next, addr != end); 162 } while (pmd++, addr = next, addr != end);
162 163
163 start &= PUD_MASK; 164 start &= PUD_MASK;
@@ -173,7 +174,7 @@ static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
173 174
174 pmd = pmd_offset(pud, start); 175 pmd = pmd_offset(pud, start);
175 pud_clear(pud); 176 pud_clear(pud);
176 pmd_free_tlb(tlb, pmd); 177 pmd_free_tlb(tlb, pmd, start);
177} 178}
178 179
179static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, 180static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
@@ -206,7 +207,7 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
206 207
207 pud = pud_offset(pgd, start); 208 pud = pud_offset(pgd, start);
208 pgd_clear(pgd); 209 pgd_clear(pgd);
209 pud_free_tlb(tlb, pud); 210 pud_free_tlb(tlb, pud, start);
210} 211}
211 212
212/* 213/*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a35eeab2724c..d052abbe3063 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -882,7 +882,7 @@ retry_reserve:
882 */ 882 */
883static int rmqueue_bulk(struct zone *zone, unsigned int order, 883static int rmqueue_bulk(struct zone *zone, unsigned int order,
884 unsigned long count, struct list_head *list, 884 unsigned long count, struct list_head *list,
885 int migratetype) 885 int migratetype, int cold)
886{ 886{
887 int i; 887 int i;
888 888
@@ -901,7 +901,10 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
901 * merge IO requests if the physical pages are ordered 901 * merge IO requests if the physical pages are ordered
902 * properly. 902 * properly.
903 */ 903 */
904 list_add(&page->lru, list); 904 if (likely(cold == 0))
905 list_add(&page->lru, list);
906 else
907 list_add_tail(&page->lru, list);
905 set_page_private(page, migratetype); 908 set_page_private(page, migratetype);
906 list = &page->lru; 909 list = &page->lru;
907 } 910 }
@@ -1119,7 +1122,8 @@ again:
1119 local_irq_save(flags); 1122 local_irq_save(flags);
1120 if (!pcp->count) { 1123 if (!pcp->count) {
1121 pcp->count = rmqueue_bulk(zone, 0, 1124 pcp->count = rmqueue_bulk(zone, 0,
1122 pcp->batch, &pcp->list, migratetype); 1125 pcp->batch, &pcp->list,
1126 migratetype, cold);
1123 if (unlikely(!pcp->count)) 1127 if (unlikely(!pcp->count))
1124 goto failed; 1128 goto failed;
1125 } 1129 }
@@ -1138,7 +1142,8 @@ again:
1138 /* Allocate more to the pcp list if necessary */ 1142 /* Allocate more to the pcp list if necessary */
1139 if (unlikely(&page->lru == &pcp->list)) { 1143 if (unlikely(&page->lru == &pcp->list)) {
1140 pcp->count += rmqueue_bulk(zone, 0, 1144 pcp->count += rmqueue_bulk(zone, 0,
1141 pcp->batch, &pcp->list, migratetype); 1145 pcp->batch, &pcp->list,
1146 migratetype, cold);
1142 page = list_entry(pcp->list.next, struct page, lru); 1147 page = list_entry(pcp->list.next, struct page, lru);
1143 } 1148 }
1144 1149
@@ -1740,8 +1745,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
1740 * be using allocators in order of preference for an area that is 1745 * be using allocators in order of preference for an area that is
1741 * too large. 1746 * too large.
1742 */ 1747 */
1743 if (WARN_ON_ONCE(order >= MAX_ORDER)) 1748 if (order >= MAX_ORDER) {
1749 WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN));
1744 return NULL; 1750 return NULL;
1751 }
1745 1752
1746 /* 1753 /*
1747 * GFP_THISNODE (meaning __GFP_THISNODE, __GFP_NORETRY and 1754 * GFP_THISNODE (meaning __GFP_THISNODE, __GFP_NORETRY and
@@ -1789,6 +1796,10 @@ rebalance:
1789 if (p->flags & PF_MEMALLOC) 1796 if (p->flags & PF_MEMALLOC)
1790 goto nopage; 1797 goto nopage;
1791 1798
1799 /* Avoid allocations with no watermarks from looping endlessly */
1800 if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL))
1801 goto nopage;
1802
1792 /* Try direct reclaim and then allocating */ 1803 /* Try direct reclaim and then allocating */
1793 page = __alloc_pages_direct_reclaim(gfp_mask, order, 1804 page = __alloc_pages_direct_reclaim(gfp_mask, order,
1794 zonelist, high_zoneidx, 1805 zonelist, high_zoneidx,
@@ -4745,8 +4756,10 @@ void *__init alloc_large_system_hash(const char *tablename,
4745 * some pages at the end of hash table which 4756 * some pages at the end of hash table which
4746 * alloc_pages_exact() automatically does 4757 * alloc_pages_exact() automatically does
4747 */ 4758 */
4748 if (get_order(size) < MAX_ORDER) 4759 if (get_order(size) < MAX_ORDER) {
4749 table = alloc_pages_exact(size, GFP_ATOMIC); 4760 table = alloc_pages_exact(size, GFP_ATOMIC);
4761 kmemleak_alloc(table, size, 1, GFP_ATOMIC);
4762 }
4750 } 4763 }
4751 } while (!table && size > PAGE_SIZE && --log2qty); 4764 } while (!table && size > PAGE_SIZE && --log2qty);
4752 4765
@@ -4764,16 +4777,6 @@ void *__init alloc_large_system_hash(const char *tablename,
4764 if (_hash_mask) 4777 if (_hash_mask)
4765 *_hash_mask = (1 << log2qty) - 1; 4778 *_hash_mask = (1 << log2qty) - 1;
4766 4779
4767 /*
4768 * If hashdist is set, the table allocation is done with __vmalloc()
4769 * which invokes the kmemleak_alloc() callback. This function may also
4770 * be called before the slab and kmemleak are initialised when
4771 * kmemleak simply buffers the request to be executed later
4772 * (GFP_ATOMIC flag ignored in this case).
4773 */
4774 if (!hashdist)
4775 kmemleak_alloc(table, size, 1, GFP_ATOMIC);
4776
4777 return table; 4780 return table;
4778} 4781}
4779 4782
diff --git a/mm/slub.c b/mm/slub.c
index a9201d83178b..b9f1491a58a1 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -21,7 +21,6 @@
21#include <linux/kmemcheck.h> 21#include <linux/kmemcheck.h>
22#include <linux/cpu.h> 22#include <linux/cpu.h>
23#include <linux/cpuset.h> 23#include <linux/cpuset.h>
24#include <linux/kmemleak.h>
25#include <linux/mempolicy.h> 24#include <linux/mempolicy.h>
26#include <linux/ctype.h> 25#include <linux/ctype.h>
27#include <linux/debugobjects.h> 26#include <linux/debugobjects.h>
@@ -2835,13 +2834,15 @@ EXPORT_SYMBOL(__kmalloc);
2835static void *kmalloc_large_node(size_t size, gfp_t flags, int node) 2834static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
2836{ 2835{
2837 struct page *page; 2836 struct page *page;
2837 void *ptr = NULL;
2838 2838
2839 flags |= __GFP_COMP | __GFP_NOTRACK; 2839 flags |= __GFP_COMP | __GFP_NOTRACK;
2840 page = alloc_pages_node(node, flags, get_order(size)); 2840 page = alloc_pages_node(node, flags, get_order(size));
2841 if (page) 2841 if (page)
2842 return page_address(page); 2842 ptr = page_address(page);
2843 else 2843
2844 return NULL; 2844 kmemleak_alloc(ptr, size, 1, flags);
2845 return ptr;
2845} 2846}
2846 2847
2847#ifdef CONFIG_NUMA 2848#ifdef CONFIG_NUMA
@@ -2926,6 +2927,7 @@ void kfree(const void *x)
2926 page = virt_to_head_page(x); 2927 page = virt_to_head_page(x);
2927 if (unlikely(!PageSlab(page))) { 2928 if (unlikely(!PageSlab(page))) {
2928 BUG_ON(!PageCompound(page)); 2929 BUG_ON(!PageCompound(page));
2930 kmemleak_free(x);
2929 put_page(page); 2931 put_page(page);
2930 return; 2932 return;
2931 } 2933 }
diff --git a/mm/swapfile.c b/mm/swapfile.c
index d1ade1a48ee7..8ffdc0d23c53 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -753,7 +753,7 @@ int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
753 753
754 if (!bdev) { 754 if (!bdev) {
755 if (bdev_p) 755 if (bdev_p)
756 *bdev_p = bdget(sis->bdev->bd_dev); 756 *bdev_p = bdgrab(sis->bdev);
757 757
758 spin_unlock(&swap_lock); 758 spin_unlock(&swap_lock);
759 return i; 759 return i;
@@ -765,7 +765,7 @@ int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
765 struct swap_extent, list); 765 struct swap_extent, list);
766 if (se->start_block == offset) { 766 if (se->start_block == offset) {
767 if (bdev_p) 767 if (bdev_p)
768 *bdev_p = bdget(sis->bdev->bd_dev); 768 *bdev_p = bdgrab(sis->bdev);
769 769
770 spin_unlock(&swap_lock); 770 spin_unlock(&swap_lock);
771 bdput(bdev); 771 bdput(bdev);