diff options
author | Ingo Molnar <mingo@elte.hu> | 2010-01-13 03:58:37 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-01-13 04:08:50 -0500 |
commit | 61405fea92c42d072d9b8bd189689f1502a838af (patch) | |
tree | 013ea3e7ed71f4114004d5852d40b6e89e128f76 /mm | |
parent | 9c443dfdd31eddea6cbe6ee0ca469fbcc4e1dc3b (diff) | |
parent | 1703f2c321a8a531c393e137a82602e16c6061cb (diff) |
Merge branch 'perf/urgent' into perf/core
Merge reason: queue up dependent patch, update to -rc4
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 3 | ||||
-rw-r--r-- | mm/hugetlb.c | 2 | ||||
-rw-r--r-- | mm/kmemleak.c | 188 | ||||
-rw-r--r-- | mm/maccess.c | 11 | ||||
-rw-r--r-- | mm/memory-failure.c | 9 | ||||
-rw-r--r-- | mm/mmap.c | 40 | ||||
-rw-r--r-- | mm/nommu.c | 42 | ||||
-rw-r--r-- | mm/page_alloc.c | 72 | ||||
-rw-r--r-- | mm/percpu.c | 4 | ||||
-rw-r--r-- | mm/readahead.c | 12 | ||||
-rw-r--r-- | mm/slab.c | 16 | ||||
-rw-r--r-- | mm/util.c | 44 |
12 files changed, 282 insertions, 161 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 43ea8c3a2bbf..17b8947aa7da 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -221,6 +221,7 @@ config KSM | |||
221 | 221 | ||
222 | config DEFAULT_MMAP_MIN_ADDR | 222 | config DEFAULT_MMAP_MIN_ADDR |
223 | int "Low address space to protect from user allocation" | 223 | int "Low address space to protect from user allocation" |
224 | depends on MMU | ||
224 | default 4096 | 225 | default 4096 |
225 | help | 226 | help |
226 | This is the portion of low virtual memory which should be protected | 227 | This is the portion of low virtual memory which should be protected |
@@ -252,7 +253,7 @@ config MEMORY_FAILURE | |||
252 | 253 | ||
253 | config HWPOISON_INJECT | 254 | config HWPOISON_INJECT |
254 | tristate "HWPoison pages injector" | 255 | tristate "HWPoison pages injector" |
255 | depends on MEMORY_FAILURE && DEBUG_KERNEL | 256 | depends on MEMORY_FAILURE && DEBUG_KERNEL && PROC_FS |
256 | select PROC_PAGE_MONITOR | 257 | select PROC_PAGE_MONITOR |
257 | 258 | ||
258 | config NOMMU_INITIAL_TRIM_EXCESS | 259 | config NOMMU_INITIAL_TRIM_EXCESS |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 65f38c218207..e91b81b63670 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -402,7 +402,7 @@ static void clear_huge_page(struct page *page, | |||
402 | { | 402 | { |
403 | int i; | 403 | int i; |
404 | 404 | ||
405 | if (unlikely(sz > MAX_ORDER_NR_PAGES)) { | 405 | if (unlikely(sz/PAGE_SIZE > MAX_ORDER_NR_PAGES)) { |
406 | clear_gigantic_page(page, addr, sz); | 406 | clear_gigantic_page(page, addr, sz); |
407 | return; | 407 | return; |
408 | } | 408 | } |
diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 13f33b3081ec..5b069e4f5e48 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c | |||
@@ -93,6 +93,7 @@ | |||
93 | #include <linux/nodemask.h> | 93 | #include <linux/nodemask.h> |
94 | #include <linux/mm.h> | 94 | #include <linux/mm.h> |
95 | #include <linux/workqueue.h> | 95 | #include <linux/workqueue.h> |
96 | #include <linux/crc32.h> | ||
96 | 97 | ||
97 | #include <asm/sections.h> | 98 | #include <asm/sections.h> |
98 | #include <asm/processor.h> | 99 | #include <asm/processor.h> |
@@ -108,7 +109,6 @@ | |||
108 | #define MSECS_MIN_AGE 5000 /* minimum object age for reporting */ | 109 | #define MSECS_MIN_AGE 5000 /* minimum object age for reporting */ |
109 | #define SECS_FIRST_SCAN 60 /* delay before the first scan */ | 110 | #define SECS_FIRST_SCAN 60 /* delay before the first scan */ |
110 | #define SECS_SCAN_WAIT 600 /* subsequent auto scanning delay */ | 111 | #define SECS_SCAN_WAIT 600 /* subsequent auto scanning delay */ |
111 | #define GRAY_LIST_PASSES 25 /* maximum number of gray list scans */ | ||
112 | #define MAX_SCAN_SIZE 4096 /* maximum size of a scanned block */ | 112 | #define MAX_SCAN_SIZE 4096 /* maximum size of a scanned block */ |
113 | 113 | ||
114 | #define BYTES_PER_POINTER sizeof(void *) | 114 | #define BYTES_PER_POINTER sizeof(void *) |
@@ -119,8 +119,8 @@ | |||
119 | /* scanning area inside a memory block */ | 119 | /* scanning area inside a memory block */ |
120 | struct kmemleak_scan_area { | 120 | struct kmemleak_scan_area { |
121 | struct hlist_node node; | 121 | struct hlist_node node; |
122 | unsigned long offset; | 122 | unsigned long start; |
123 | size_t length; | 123 | size_t size; |
124 | }; | 124 | }; |
125 | 125 | ||
126 | #define KMEMLEAK_GREY 0 | 126 | #define KMEMLEAK_GREY 0 |
@@ -149,6 +149,8 @@ struct kmemleak_object { | |||
149 | int min_count; | 149 | int min_count; |
150 | /* the total number of pointers found pointing to this object */ | 150 | /* the total number of pointers found pointing to this object */ |
151 | int count; | 151 | int count; |
152 | /* checksum for detecting modified objects */ | ||
153 | u32 checksum; | ||
152 | /* memory ranges to be scanned inside an object (empty for all) */ | 154 | /* memory ranges to be scanned inside an object (empty for all) */ |
153 | struct hlist_head area_list; | 155 | struct hlist_head area_list; |
154 | unsigned long trace[MAX_TRACE]; | 156 | unsigned long trace[MAX_TRACE]; |
@@ -164,8 +166,6 @@ struct kmemleak_object { | |||
164 | #define OBJECT_REPORTED (1 << 1) | 166 | #define OBJECT_REPORTED (1 << 1) |
165 | /* flag set to not scan the object */ | 167 | /* flag set to not scan the object */ |
166 | #define OBJECT_NO_SCAN (1 << 2) | 168 | #define OBJECT_NO_SCAN (1 << 2) |
167 | /* flag set on newly allocated objects */ | ||
168 | #define OBJECT_NEW (1 << 3) | ||
169 | 169 | ||
170 | /* number of bytes to print per line; must be 16 or 32 */ | 170 | /* number of bytes to print per line; must be 16 or 32 */ |
171 | #define HEX_ROW_SIZE 16 | 171 | #define HEX_ROW_SIZE 16 |
@@ -241,8 +241,6 @@ struct early_log { | |||
241 | const void *ptr; /* allocated/freed memory block */ | 241 | const void *ptr; /* allocated/freed memory block */ |
242 | size_t size; /* memory block size */ | 242 | size_t size; /* memory block size */ |
243 | int min_count; /* minimum reference count */ | 243 | int min_count; /* minimum reference count */ |
244 | unsigned long offset; /* scan area offset */ | ||
245 | size_t length; /* scan area length */ | ||
246 | unsigned long trace[MAX_TRACE]; /* stack trace */ | 244 | unsigned long trace[MAX_TRACE]; /* stack trace */ |
247 | unsigned int trace_len; /* stack trace length */ | 245 | unsigned int trace_len; /* stack trace length */ |
248 | }; | 246 | }; |
@@ -323,11 +321,6 @@ static bool color_gray(const struct kmemleak_object *object) | |||
323 | object->count >= object->min_count; | 321 | object->count >= object->min_count; |
324 | } | 322 | } |
325 | 323 | ||
326 | static bool color_black(const struct kmemleak_object *object) | ||
327 | { | ||
328 | return object->min_count == KMEMLEAK_BLACK; | ||
329 | } | ||
330 | |||
331 | /* | 324 | /* |
332 | * Objects are considered unreferenced only if their color is white, they have | 325 | * Objects are considered unreferenced only if their color is white, they have |
333 | * not be deleted and have a minimum age to avoid false positives caused by | 326 | * not be deleted and have a minimum age to avoid false positives caused by |
@@ -335,7 +328,7 @@ static bool color_black(const struct kmemleak_object *object) | |||
335 | */ | 328 | */ |
336 | static bool unreferenced_object(struct kmemleak_object *object) | 329 | static bool unreferenced_object(struct kmemleak_object *object) |
337 | { | 330 | { |
338 | return (object->flags & OBJECT_ALLOCATED) && color_white(object) && | 331 | return (color_white(object) && object->flags & OBJECT_ALLOCATED) && |
339 | time_before_eq(object->jiffies + jiffies_min_age, | 332 | time_before_eq(object->jiffies + jiffies_min_age, |
340 | jiffies_last_scan); | 333 | jiffies_last_scan); |
341 | } | 334 | } |
@@ -348,11 +341,13 @@ static void print_unreferenced(struct seq_file *seq, | |||
348 | struct kmemleak_object *object) | 341 | struct kmemleak_object *object) |
349 | { | 342 | { |
350 | int i; | 343 | int i; |
344 | unsigned int msecs_age = jiffies_to_msecs(jiffies - object->jiffies); | ||
351 | 345 | ||
352 | seq_printf(seq, "unreferenced object 0x%08lx (size %zu):\n", | 346 | seq_printf(seq, "unreferenced object 0x%08lx (size %zu):\n", |
353 | object->pointer, object->size); | 347 | object->pointer, object->size); |
354 | seq_printf(seq, " comm \"%s\", pid %d, jiffies %lu\n", | 348 | seq_printf(seq, " comm \"%s\", pid %d, jiffies %lu (age %d.%03ds)\n", |
355 | object->comm, object->pid, object->jiffies); | 349 | object->comm, object->pid, object->jiffies, |
350 | msecs_age / 1000, msecs_age % 1000); | ||
356 | hex_dump_object(seq, object); | 351 | hex_dump_object(seq, object); |
357 | seq_printf(seq, " backtrace:\n"); | 352 | seq_printf(seq, " backtrace:\n"); |
358 | 353 | ||
@@ -381,6 +376,7 @@ static void dump_object_info(struct kmemleak_object *object) | |||
381 | pr_notice(" min_count = %d\n", object->min_count); | 376 | pr_notice(" min_count = %d\n", object->min_count); |
382 | pr_notice(" count = %d\n", object->count); | 377 | pr_notice(" count = %d\n", object->count); |
383 | pr_notice(" flags = 0x%lx\n", object->flags); | 378 | pr_notice(" flags = 0x%lx\n", object->flags); |
379 | pr_notice(" checksum = %d\n", object->checksum); | ||
384 | pr_notice(" backtrace:\n"); | 380 | pr_notice(" backtrace:\n"); |
385 | print_stack_trace(&trace, 4); | 381 | print_stack_trace(&trace, 4); |
386 | } | 382 | } |
@@ -522,12 +518,13 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size, | |||
522 | INIT_HLIST_HEAD(&object->area_list); | 518 | INIT_HLIST_HEAD(&object->area_list); |
523 | spin_lock_init(&object->lock); | 519 | spin_lock_init(&object->lock); |
524 | atomic_set(&object->use_count, 1); | 520 | atomic_set(&object->use_count, 1); |
525 | object->flags = OBJECT_ALLOCATED | OBJECT_NEW; | 521 | object->flags = OBJECT_ALLOCATED; |
526 | object->pointer = ptr; | 522 | object->pointer = ptr; |
527 | object->size = size; | 523 | object->size = size; |
528 | object->min_count = min_count; | 524 | object->min_count = min_count; |
529 | object->count = -1; /* no color initially */ | 525 | object->count = 0; /* white color initially */ |
530 | object->jiffies = jiffies; | 526 | object->jiffies = jiffies; |
527 | object->checksum = 0; | ||
531 | 528 | ||
532 | /* task information */ | 529 | /* task information */ |
533 | if (in_irq()) { | 530 | if (in_irq()) { |
@@ -720,14 +717,13 @@ static void make_black_object(unsigned long ptr) | |||
720 | * Add a scanning area to the object. If at least one such area is added, | 717 | * Add a scanning area to the object. If at least one such area is added, |
721 | * kmemleak will only scan these ranges rather than the whole memory block. | 718 | * kmemleak will only scan these ranges rather than the whole memory block. |
722 | */ | 719 | */ |
723 | static void add_scan_area(unsigned long ptr, unsigned long offset, | 720 | static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp) |
724 | size_t length, gfp_t gfp) | ||
725 | { | 721 | { |
726 | unsigned long flags; | 722 | unsigned long flags; |
727 | struct kmemleak_object *object; | 723 | struct kmemleak_object *object; |
728 | struct kmemleak_scan_area *area; | 724 | struct kmemleak_scan_area *area; |
729 | 725 | ||
730 | object = find_and_get_object(ptr, 0); | 726 | object = find_and_get_object(ptr, 1); |
731 | if (!object) { | 727 | if (!object) { |
732 | kmemleak_warn("Adding scan area to unknown object at 0x%08lx\n", | 728 | kmemleak_warn("Adding scan area to unknown object at 0x%08lx\n", |
733 | ptr); | 729 | ptr); |
@@ -741,7 +737,7 @@ static void add_scan_area(unsigned long ptr, unsigned long offset, | |||
741 | } | 737 | } |
742 | 738 | ||
743 | spin_lock_irqsave(&object->lock, flags); | 739 | spin_lock_irqsave(&object->lock, flags); |
744 | if (offset + length > object->size) { | 740 | if (ptr + size > object->pointer + object->size) { |
745 | kmemleak_warn("Scan area larger than object 0x%08lx\n", ptr); | 741 | kmemleak_warn("Scan area larger than object 0x%08lx\n", ptr); |
746 | dump_object_info(object); | 742 | dump_object_info(object); |
747 | kmem_cache_free(scan_area_cache, area); | 743 | kmem_cache_free(scan_area_cache, area); |
@@ -749,8 +745,8 @@ static void add_scan_area(unsigned long ptr, unsigned long offset, | |||
749 | } | 745 | } |
750 | 746 | ||
751 | INIT_HLIST_NODE(&area->node); | 747 | INIT_HLIST_NODE(&area->node); |
752 | area->offset = offset; | 748 | area->start = ptr; |
753 | area->length = length; | 749 | area->size = size; |
754 | 750 | ||
755 | hlist_add_head(&area->node, &object->area_list); | 751 | hlist_add_head(&area->node, &object->area_list); |
756 | out_unlock: | 752 | out_unlock: |
@@ -786,7 +782,7 @@ static void object_no_scan(unsigned long ptr) | |||
786 | * processed later once kmemleak is fully initialized. | 782 | * processed later once kmemleak is fully initialized. |
787 | */ | 783 | */ |
788 | static void __init log_early(int op_type, const void *ptr, size_t size, | 784 | static void __init log_early(int op_type, const void *ptr, size_t size, |
789 | int min_count, unsigned long offset, size_t length) | 785 | int min_count) |
790 | { | 786 | { |
791 | unsigned long flags; | 787 | unsigned long flags; |
792 | struct early_log *log; | 788 | struct early_log *log; |
@@ -808,8 +804,6 @@ static void __init log_early(int op_type, const void *ptr, size_t size, | |||
808 | log->ptr = ptr; | 804 | log->ptr = ptr; |
809 | log->size = size; | 805 | log->size = size; |
810 | log->min_count = min_count; | 806 | log->min_count = min_count; |
811 | log->offset = offset; | ||
812 | log->length = length; | ||
813 | if (op_type == KMEMLEAK_ALLOC) | 807 | if (op_type == KMEMLEAK_ALLOC) |
814 | log->trace_len = __save_stack_trace(log->trace); | 808 | log->trace_len = __save_stack_trace(log->trace); |
815 | crt_early_log++; | 809 | crt_early_log++; |
@@ -858,7 +852,7 @@ void __ref kmemleak_alloc(const void *ptr, size_t size, int min_count, | |||
858 | if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) | 852 | if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) |
859 | create_object((unsigned long)ptr, size, min_count, gfp); | 853 | create_object((unsigned long)ptr, size, min_count, gfp); |
860 | else if (atomic_read(&kmemleak_early_log)) | 854 | else if (atomic_read(&kmemleak_early_log)) |
861 | log_early(KMEMLEAK_ALLOC, ptr, size, min_count, 0, 0); | 855 | log_early(KMEMLEAK_ALLOC, ptr, size, min_count); |
862 | } | 856 | } |
863 | EXPORT_SYMBOL_GPL(kmemleak_alloc); | 857 | EXPORT_SYMBOL_GPL(kmemleak_alloc); |
864 | 858 | ||
@@ -873,7 +867,7 @@ void __ref kmemleak_free(const void *ptr) | |||
873 | if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) | 867 | if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) |
874 | delete_object_full((unsigned long)ptr); | 868 | delete_object_full((unsigned long)ptr); |
875 | else if (atomic_read(&kmemleak_early_log)) | 869 | else if (atomic_read(&kmemleak_early_log)) |
876 | log_early(KMEMLEAK_FREE, ptr, 0, 0, 0, 0); | 870 | log_early(KMEMLEAK_FREE, ptr, 0, 0); |
877 | } | 871 | } |
878 | EXPORT_SYMBOL_GPL(kmemleak_free); | 872 | EXPORT_SYMBOL_GPL(kmemleak_free); |
879 | 873 | ||
@@ -888,7 +882,7 @@ void __ref kmemleak_free_part(const void *ptr, size_t size) | |||
888 | if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) | 882 | if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) |
889 | delete_object_part((unsigned long)ptr, size); | 883 | delete_object_part((unsigned long)ptr, size); |
890 | else if (atomic_read(&kmemleak_early_log)) | 884 | else if (atomic_read(&kmemleak_early_log)) |
891 | log_early(KMEMLEAK_FREE_PART, ptr, size, 0, 0, 0); | 885 | log_early(KMEMLEAK_FREE_PART, ptr, size, 0); |
892 | } | 886 | } |
893 | EXPORT_SYMBOL_GPL(kmemleak_free_part); | 887 | EXPORT_SYMBOL_GPL(kmemleak_free_part); |
894 | 888 | ||
@@ -903,7 +897,7 @@ void __ref kmemleak_not_leak(const void *ptr) | |||
903 | if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) | 897 | if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) |
904 | make_gray_object((unsigned long)ptr); | 898 | make_gray_object((unsigned long)ptr); |
905 | else if (atomic_read(&kmemleak_early_log)) | 899 | else if (atomic_read(&kmemleak_early_log)) |
906 | log_early(KMEMLEAK_NOT_LEAK, ptr, 0, 0, 0, 0); | 900 | log_early(KMEMLEAK_NOT_LEAK, ptr, 0, 0); |
907 | } | 901 | } |
908 | EXPORT_SYMBOL(kmemleak_not_leak); | 902 | EXPORT_SYMBOL(kmemleak_not_leak); |
909 | 903 | ||
@@ -919,22 +913,21 @@ void __ref kmemleak_ignore(const void *ptr) | |||
919 | if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) | 913 | if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) |
920 | make_black_object((unsigned long)ptr); | 914 | make_black_object((unsigned long)ptr); |
921 | else if (atomic_read(&kmemleak_early_log)) | 915 | else if (atomic_read(&kmemleak_early_log)) |
922 | log_early(KMEMLEAK_IGNORE, ptr, 0, 0, 0, 0); | 916 | log_early(KMEMLEAK_IGNORE, ptr, 0, 0); |
923 | } | 917 | } |
924 | EXPORT_SYMBOL(kmemleak_ignore); | 918 | EXPORT_SYMBOL(kmemleak_ignore); |
925 | 919 | ||
926 | /* | 920 | /* |
927 | * Limit the range to be scanned in an allocated memory block. | 921 | * Limit the range to be scanned in an allocated memory block. |
928 | */ | 922 | */ |
929 | void __ref kmemleak_scan_area(const void *ptr, unsigned long offset, | 923 | void __ref kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) |
930 | size_t length, gfp_t gfp) | ||
931 | { | 924 | { |
932 | pr_debug("%s(0x%p)\n", __func__, ptr); | 925 | pr_debug("%s(0x%p)\n", __func__, ptr); |
933 | 926 | ||
934 | if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) | 927 | if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) |
935 | add_scan_area((unsigned long)ptr, offset, length, gfp); | 928 | add_scan_area((unsigned long)ptr, size, gfp); |
936 | else if (atomic_read(&kmemleak_early_log)) | 929 | else if (atomic_read(&kmemleak_early_log)) |
937 | log_early(KMEMLEAK_SCAN_AREA, ptr, 0, 0, offset, length); | 930 | log_early(KMEMLEAK_SCAN_AREA, ptr, size, 0); |
938 | } | 931 | } |
939 | EXPORT_SYMBOL(kmemleak_scan_area); | 932 | EXPORT_SYMBOL(kmemleak_scan_area); |
940 | 933 | ||
@@ -948,11 +941,25 @@ void __ref kmemleak_no_scan(const void *ptr) | |||
948 | if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) | 941 | if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) |
949 | object_no_scan((unsigned long)ptr); | 942 | object_no_scan((unsigned long)ptr); |
950 | else if (atomic_read(&kmemleak_early_log)) | 943 | else if (atomic_read(&kmemleak_early_log)) |
951 | log_early(KMEMLEAK_NO_SCAN, ptr, 0, 0, 0, 0); | 944 | log_early(KMEMLEAK_NO_SCAN, ptr, 0, 0); |
952 | } | 945 | } |
953 | EXPORT_SYMBOL(kmemleak_no_scan); | 946 | EXPORT_SYMBOL(kmemleak_no_scan); |
954 | 947 | ||
955 | /* | 948 | /* |
949 | * Update an object's checksum and return true if it was modified. | ||
950 | */ | ||
951 | static bool update_checksum(struct kmemleak_object *object) | ||
952 | { | ||
953 | u32 old_csum = object->checksum; | ||
954 | |||
955 | if (!kmemcheck_is_obj_initialized(object->pointer, object->size)) | ||
956 | return false; | ||
957 | |||
958 | object->checksum = crc32(0, (void *)object->pointer, object->size); | ||
959 | return object->checksum != old_csum; | ||
960 | } | ||
961 | |||
962 | /* | ||
956 | * Memory scanning is a long process and it needs to be interruptable. This | 963 | * Memory scanning is a long process and it needs to be interruptable. This |
957 | * function checks whether such interrupt condition occured. | 964 | * function checks whether such interrupt condition occured. |
958 | */ | 965 | */ |
@@ -1031,11 +1038,14 @@ static void scan_block(void *_start, void *_end, | |||
1031 | * added to the gray_list. | 1038 | * added to the gray_list. |
1032 | */ | 1039 | */ |
1033 | object->count++; | 1040 | object->count++; |
1034 | if (color_gray(object)) | 1041 | if (color_gray(object)) { |
1035 | list_add_tail(&object->gray_list, &gray_list); | 1042 | list_add_tail(&object->gray_list, &gray_list); |
1036 | else | 1043 | spin_unlock_irqrestore(&object->lock, flags); |
1037 | put_object(object); | 1044 | continue; |
1045 | } | ||
1046 | |||
1038 | spin_unlock_irqrestore(&object->lock, flags); | 1047 | spin_unlock_irqrestore(&object->lock, flags); |
1048 | put_object(object); | ||
1039 | } | 1049 | } |
1040 | } | 1050 | } |
1041 | 1051 | ||
@@ -1075,14 +1085,47 @@ static void scan_object(struct kmemleak_object *object) | |||
1075 | } | 1085 | } |
1076 | } else | 1086 | } else |
1077 | hlist_for_each_entry(area, elem, &object->area_list, node) | 1087 | hlist_for_each_entry(area, elem, &object->area_list, node) |
1078 | scan_block((void *)(object->pointer + area->offset), | 1088 | scan_block((void *)area->start, |
1079 | (void *)(object->pointer + area->offset | 1089 | (void *)(area->start + area->size), |
1080 | + area->length), object, 0); | 1090 | object, 0); |
1081 | out: | 1091 | out: |
1082 | spin_unlock_irqrestore(&object->lock, flags); | 1092 | spin_unlock_irqrestore(&object->lock, flags); |
1083 | } | 1093 | } |
1084 | 1094 | ||
1085 | /* | 1095 | /* |
1096 | * Scan the objects already referenced (gray objects). More objects will be | ||
1097 | * referenced and, if there are no memory leaks, all the objects are scanned. | ||
1098 | */ | ||
1099 | static void scan_gray_list(void) | ||
1100 | { | ||
1101 | struct kmemleak_object *object, *tmp; | ||
1102 | |||
1103 | /* | ||
1104 | * The list traversal is safe for both tail additions and removals | ||
1105 | * from inside the loop. The kmemleak objects cannot be freed from | ||
1106 | * outside the loop because their use_count was incremented. | ||
1107 | */ | ||
1108 | object = list_entry(gray_list.next, typeof(*object), gray_list); | ||
1109 | while (&object->gray_list != &gray_list) { | ||
1110 | cond_resched(); | ||
1111 | |||
1112 | /* may add new objects to the list */ | ||
1113 | if (!scan_should_stop()) | ||
1114 | scan_object(object); | ||
1115 | |||
1116 | tmp = list_entry(object->gray_list.next, typeof(*object), | ||
1117 | gray_list); | ||
1118 | |||
1119 | /* remove the object from the list and release it */ | ||
1120 | list_del(&object->gray_list); | ||
1121 | put_object(object); | ||
1122 | |||
1123 | object = tmp; | ||
1124 | } | ||
1125 | WARN_ON(!list_empty(&gray_list)); | ||
1126 | } | ||
1127 | |||
1128 | /* | ||
1086 | * Scan data sections and all the referenced memory blocks allocated via the | 1129 | * Scan data sections and all the referenced memory blocks allocated via the |
1087 | * kernel's standard allocators. This function must be called with the | 1130 | * kernel's standard allocators. This function must be called with the |
1088 | * scan_mutex held. | 1131 | * scan_mutex held. |
@@ -1090,10 +1133,9 @@ out: | |||
1090 | static void kmemleak_scan(void) | 1133 | static void kmemleak_scan(void) |
1091 | { | 1134 | { |
1092 | unsigned long flags; | 1135 | unsigned long flags; |
1093 | struct kmemleak_object *object, *tmp; | 1136 | struct kmemleak_object *object; |
1094 | int i; | 1137 | int i; |
1095 | int new_leaks = 0; | 1138 | int new_leaks = 0; |
1096 | int gray_list_pass = 0; | ||
1097 | 1139 | ||
1098 | jiffies_last_scan = jiffies; | 1140 | jiffies_last_scan = jiffies; |
1099 | 1141 | ||
@@ -1114,7 +1156,6 @@ static void kmemleak_scan(void) | |||
1114 | #endif | 1156 | #endif |
1115 | /* reset the reference count (whiten the object) */ | 1157 | /* reset the reference count (whiten the object) */ |
1116 | object->count = 0; | 1158 | object->count = 0; |
1117 | object->flags &= ~OBJECT_NEW; | ||
1118 | if (color_gray(object) && get_object(object)) | 1159 | if (color_gray(object) && get_object(object)) |
1119 | list_add_tail(&object->gray_list, &gray_list); | 1160 | list_add_tail(&object->gray_list, &gray_list); |
1120 | 1161 | ||
@@ -1172,62 +1213,36 @@ static void kmemleak_scan(void) | |||
1172 | 1213 | ||
1173 | /* | 1214 | /* |
1174 | * Scan the objects already referenced from the sections scanned | 1215 | * Scan the objects already referenced from the sections scanned |
1175 | * above. More objects will be referenced and, if there are no memory | 1216 | * above. |
1176 | * leaks, all the objects will be scanned. The list traversal is safe | ||
1177 | * for both tail additions and removals from inside the loop. The | ||
1178 | * kmemleak objects cannot be freed from outside the loop because their | ||
1179 | * use_count was increased. | ||
1180 | */ | 1217 | */ |
1181 | repeat: | 1218 | scan_gray_list(); |
1182 | object = list_entry(gray_list.next, typeof(*object), gray_list); | ||
1183 | while (&object->gray_list != &gray_list) { | ||
1184 | cond_resched(); | ||
1185 | |||
1186 | /* may add new objects to the list */ | ||
1187 | if (!scan_should_stop()) | ||
1188 | scan_object(object); | ||
1189 | |||
1190 | tmp = list_entry(object->gray_list.next, typeof(*object), | ||
1191 | gray_list); | ||
1192 | |||
1193 | /* remove the object from the list and release it */ | ||
1194 | list_del(&object->gray_list); | ||
1195 | put_object(object); | ||
1196 | |||
1197 | object = tmp; | ||
1198 | } | ||
1199 | |||
1200 | if (scan_should_stop() || ++gray_list_pass >= GRAY_LIST_PASSES) | ||
1201 | goto scan_end; | ||
1202 | 1219 | ||
1203 | /* | 1220 | /* |
1204 | * Check for new objects allocated during this scanning and add them | 1221 | * Check for new or unreferenced objects modified since the previous |
1205 | * to the gray list. | 1222 | * scan and color them gray until the next scan. |
1206 | */ | 1223 | */ |
1207 | rcu_read_lock(); | 1224 | rcu_read_lock(); |
1208 | list_for_each_entry_rcu(object, &object_list, object_list) { | 1225 | list_for_each_entry_rcu(object, &object_list, object_list) { |
1209 | spin_lock_irqsave(&object->lock, flags); | 1226 | spin_lock_irqsave(&object->lock, flags); |
1210 | if ((object->flags & OBJECT_NEW) && !color_black(object) && | 1227 | if (color_white(object) && (object->flags & OBJECT_ALLOCATED) |
1211 | get_object(object)) { | 1228 | && update_checksum(object) && get_object(object)) { |
1212 | object->flags &= ~OBJECT_NEW; | 1229 | /* color it gray temporarily */ |
1230 | object->count = object->min_count; | ||
1213 | list_add_tail(&object->gray_list, &gray_list); | 1231 | list_add_tail(&object->gray_list, &gray_list); |
1214 | } | 1232 | } |
1215 | spin_unlock_irqrestore(&object->lock, flags); | 1233 | spin_unlock_irqrestore(&object->lock, flags); |
1216 | } | 1234 | } |
1217 | rcu_read_unlock(); | 1235 | rcu_read_unlock(); |
1218 | 1236 | ||
1219 | if (!list_empty(&gray_list)) | 1237 | /* |
1220 | goto repeat; | 1238 | * Re-scan the gray list for modified unreferenced objects. |
1221 | 1239 | */ | |
1222 | scan_end: | 1240 | scan_gray_list(); |
1223 | WARN_ON(!list_empty(&gray_list)); | ||
1224 | 1241 | ||
1225 | /* | 1242 | /* |
1226 | * If scanning was stopped or new objects were being allocated at a | 1243 | * If scanning was stopped do not report any new unreferenced objects. |
1227 | * higher rate than gray list scanning, do not report any new | ||
1228 | * unreferenced objects. | ||
1229 | */ | 1244 | */ |
1230 | if (scan_should_stop() || gray_list_pass >= GRAY_LIST_PASSES) | 1245 | if (scan_should_stop()) |
1231 | return; | 1246 | return; |
1232 | 1247 | ||
1233 | /* | 1248 | /* |
@@ -1642,8 +1657,7 @@ void __init kmemleak_init(void) | |||
1642 | kmemleak_ignore(log->ptr); | 1657 | kmemleak_ignore(log->ptr); |
1643 | break; | 1658 | break; |
1644 | case KMEMLEAK_SCAN_AREA: | 1659 | case KMEMLEAK_SCAN_AREA: |
1645 | kmemleak_scan_area(log->ptr, log->offset, log->length, | 1660 | kmemleak_scan_area(log->ptr, log->size, GFP_KERNEL); |
1646 | GFP_KERNEL); | ||
1647 | break; | 1661 | break; |
1648 | case KMEMLEAK_NO_SCAN: | 1662 | case KMEMLEAK_NO_SCAN: |
1649 | kmemleak_no_scan(log->ptr); | 1663 | kmemleak_no_scan(log->ptr); |
diff --git a/mm/maccess.c b/mm/maccess.c index 9073695ff25f..4e348dbaecd7 100644 --- a/mm/maccess.c +++ b/mm/maccess.c | |||
@@ -14,7 +14,11 @@ | |||
14 | * Safely read from address @src to the buffer at @dst. If a kernel fault | 14 | * Safely read from address @src to the buffer at @dst. If a kernel fault |
15 | * happens, handle that and return -EFAULT. | 15 | * happens, handle that and return -EFAULT. |
16 | */ | 16 | */ |
17 | long probe_kernel_read(void *dst, void *src, size_t size) | 17 | |
18 | long __weak probe_kernel_read(void *dst, void *src, size_t size) | ||
19 | __attribute__((alias("__probe_kernel_read"))); | ||
20 | |||
21 | long __probe_kernel_read(void *dst, void *src, size_t size) | ||
18 | { | 22 | { |
19 | long ret; | 23 | long ret; |
20 | mm_segment_t old_fs = get_fs(); | 24 | mm_segment_t old_fs = get_fs(); |
@@ -39,7 +43,10 @@ EXPORT_SYMBOL_GPL(probe_kernel_read); | |||
39 | * Safely write to address @dst from the buffer at @src. If a kernel fault | 43 | * Safely write to address @dst from the buffer at @src. If a kernel fault |
40 | * happens, handle that and return -EFAULT. | 44 | * happens, handle that and return -EFAULT. |
41 | */ | 45 | */ |
42 | long notrace __weak probe_kernel_write(void *dst, void *src, size_t size) | 46 | long __weak probe_kernel_write(void *dst, void *src, size_t size) |
47 | __attribute__((alias("__probe_kernel_write"))); | ||
48 | |||
49 | long __probe_kernel_write(void *dst, void *src, size_t size) | ||
43 | { | 50 | { |
44 | long ret; | 51 | long ret; |
45 | mm_segment_t old_fs = get_fs(); | 52 | mm_segment_t old_fs = get_fs(); |
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 6a0466ed5bfd..17299fd4577c 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -52,6 +52,8 @@ int sysctl_memory_failure_recovery __read_mostly = 1; | |||
52 | 52 | ||
53 | atomic_long_t mce_bad_pages __read_mostly = ATOMIC_LONG_INIT(0); | 53 | atomic_long_t mce_bad_pages __read_mostly = ATOMIC_LONG_INIT(0); |
54 | 54 | ||
55 | #if defined(CONFIG_HWPOISON_INJECT) || defined(CONFIG_HWPOISON_INJECT_MODULE) | ||
56 | |||
55 | u32 hwpoison_filter_enable = 0; | 57 | u32 hwpoison_filter_enable = 0; |
56 | u32 hwpoison_filter_dev_major = ~0U; | 58 | u32 hwpoison_filter_dev_major = ~0U; |
57 | u32 hwpoison_filter_dev_minor = ~0U; | 59 | u32 hwpoison_filter_dev_minor = ~0U; |
@@ -164,6 +166,13 @@ int hwpoison_filter(struct page *p) | |||
164 | 166 | ||
165 | return 0; | 167 | return 0; |
166 | } | 168 | } |
169 | #else | ||
170 | int hwpoison_filter(struct page *p) | ||
171 | { | ||
172 | return 0; | ||
173 | } | ||
174 | #endif | ||
175 | |||
167 | EXPORT_SYMBOL_GPL(hwpoison_filter); | 176 | EXPORT_SYMBOL_GPL(hwpoison_filter); |
168 | 177 | ||
169 | /* | 178 | /* |
@@ -1043,6 +1043,46 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, | |||
1043 | } | 1043 | } |
1044 | EXPORT_SYMBOL(do_mmap_pgoff); | 1044 | EXPORT_SYMBOL(do_mmap_pgoff); |
1045 | 1045 | ||
1046 | SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, | ||
1047 | unsigned long, prot, unsigned long, flags, | ||
1048 | unsigned long, fd, unsigned long, pgoff) | ||
1049 | { | ||
1050 | struct file *file = NULL; | ||
1051 | unsigned long retval = -EBADF; | ||
1052 | |||
1053 | if (!(flags & MAP_ANONYMOUS)) { | ||
1054 | if (unlikely(flags & MAP_HUGETLB)) | ||
1055 | return -EINVAL; | ||
1056 | file = fget(fd); | ||
1057 | if (!file) | ||
1058 | goto out; | ||
1059 | } else if (flags & MAP_HUGETLB) { | ||
1060 | struct user_struct *user = NULL; | ||
1061 | /* | ||
1062 | * VM_NORESERVE is used because the reservations will be | ||
1063 | * taken when vm_ops->mmap() is called | ||
1064 | * A dummy user value is used because we are not locking | ||
1065 | * memory so no accounting is necessary | ||
1066 | */ | ||
1067 | len = ALIGN(len, huge_page_size(&default_hstate)); | ||
1068 | file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE, | ||
1069 | &user, HUGETLB_ANONHUGE_INODE); | ||
1070 | if (IS_ERR(file)) | ||
1071 | return PTR_ERR(file); | ||
1072 | } | ||
1073 | |||
1074 | flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); | ||
1075 | |||
1076 | down_write(¤t->mm->mmap_sem); | ||
1077 | retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); | ||
1078 | up_write(¤t->mm->mmap_sem); | ||
1079 | |||
1080 | if (file) | ||
1081 | fput(file); | ||
1082 | out: | ||
1083 | return retval; | ||
1084 | } | ||
1085 | |||
1046 | /* | 1086 | /* |
1047 | * Some shared mappigns will want the pages marked read-only | 1087 | * Some shared mappigns will want the pages marked read-only |
1048 | * to track write events. If so, we'll downgrade vm_page_prot | 1088 | * to track write events. If so, we'll downgrade vm_page_prot |
diff --git a/mm/nommu.c b/mm/nommu.c index 8687973462bb..17773862619b 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -432,6 +432,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) | |||
432 | /* | 432 | /* |
433 | * Ok, looks good - let it rip. | 433 | * Ok, looks good - let it rip. |
434 | */ | 434 | */ |
435 | flush_icache_range(mm->brk, brk); | ||
435 | return mm->brk = brk; | 436 | return mm->brk = brk; |
436 | } | 437 | } |
437 | 438 | ||
@@ -1353,10 +1354,14 @@ unsigned long do_mmap_pgoff(struct file *file, | |||
1353 | share: | 1354 | share: |
1354 | add_vma_to_mm(current->mm, vma); | 1355 | add_vma_to_mm(current->mm, vma); |
1355 | 1356 | ||
1356 | up_write(&nommu_region_sem); | 1357 | /* we flush the region from the icache only when the first executable |
1358 | * mapping of it is made */ | ||
1359 | if (vma->vm_flags & VM_EXEC && !region->vm_icache_flushed) { | ||
1360 | flush_icache_range(region->vm_start, region->vm_end); | ||
1361 | region->vm_icache_flushed = true; | ||
1362 | } | ||
1357 | 1363 | ||
1358 | if (prot & PROT_EXEC) | 1364 | up_write(&nommu_region_sem); |
1359 | flush_icache_range(result, result + len); | ||
1360 | 1365 | ||
1361 | kleave(" = %lx", result); | 1366 | kleave(" = %lx", result); |
1362 | return result; | 1367 | return result; |
@@ -1398,6 +1403,31 @@ error_getting_region: | |||
1398 | } | 1403 | } |
1399 | EXPORT_SYMBOL(do_mmap_pgoff); | 1404 | EXPORT_SYMBOL(do_mmap_pgoff); |
1400 | 1405 | ||
1406 | SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, | ||
1407 | unsigned long, prot, unsigned long, flags, | ||
1408 | unsigned long, fd, unsigned long, pgoff) | ||
1409 | { | ||
1410 | struct file *file = NULL; | ||
1411 | unsigned long retval = -EBADF; | ||
1412 | |||
1413 | if (!(flags & MAP_ANONYMOUS)) { | ||
1414 | file = fget(fd); | ||
1415 | if (!file) | ||
1416 | goto out; | ||
1417 | } | ||
1418 | |||
1419 | flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); | ||
1420 | |||
1421 | down_write(¤t->mm->mmap_sem); | ||
1422 | retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); | ||
1423 | up_write(¤t->mm->mmap_sem); | ||
1424 | |||
1425 | if (file) | ||
1426 | fput(file); | ||
1427 | out: | ||
1428 | return retval; | ||
1429 | } | ||
1430 | |||
1401 | /* | 1431 | /* |
1402 | * split a vma into two pieces at address 'addr', a new vma is allocated either | 1432 | * split a vma into two pieces at address 'addr', a new vma is allocated either |
1403 | * for the first part or the tail. | 1433 | * for the first part or the tail. |
@@ -1891,9 +1921,11 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in | |||
1891 | 1921 | ||
1892 | /* only read or write mappings where it is permitted */ | 1922 | /* only read or write mappings where it is permitted */ |
1893 | if (write && vma->vm_flags & VM_MAYWRITE) | 1923 | if (write && vma->vm_flags & VM_MAYWRITE) |
1894 | len -= copy_to_user((void *) addr, buf, len); | 1924 | copy_to_user_page(vma, NULL, addr, |
1925 | (void *) addr, buf, len); | ||
1895 | else if (!write && vma->vm_flags & VM_MAYREAD) | 1926 | else if (!write && vma->vm_flags & VM_MAYREAD) |
1896 | len -= copy_from_user(buf, (void *) addr, len); | 1927 | copy_from_user_page(vma, NULL, addr, |
1928 | buf, (void *) addr, len); | ||
1897 | else | 1929 | else |
1898 | len = 0; | 1930 | len = 0; |
1899 | } else { | 1931 | } else { |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 74af449b1f1d..4e9f5cc5fb59 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -48,6 +48,7 @@ | |||
48 | #include <linux/page_cgroup.h> | 48 | #include <linux/page_cgroup.h> |
49 | #include <linux/debugobjects.h> | 49 | #include <linux/debugobjects.h> |
50 | #include <linux/kmemleak.h> | 50 | #include <linux/kmemleak.h> |
51 | #include <linux/memory.h> | ||
51 | #include <trace/events/kmem.h> | 52 | #include <trace/events/kmem.h> |
52 | 53 | ||
53 | #include <asm/tlbflush.h> | 54 | #include <asm/tlbflush.h> |
@@ -2401,13 +2402,14 @@ int numa_zonelist_order_handler(ctl_table *table, int write, | |||
2401 | { | 2402 | { |
2402 | char saved_string[NUMA_ZONELIST_ORDER_LEN]; | 2403 | char saved_string[NUMA_ZONELIST_ORDER_LEN]; |
2403 | int ret; | 2404 | int ret; |
2405 | static DEFINE_MUTEX(zl_order_mutex); | ||
2404 | 2406 | ||
2407 | mutex_lock(&zl_order_mutex); | ||
2405 | if (write) | 2408 | if (write) |
2406 | strncpy(saved_string, (char*)table->data, | 2409 | strcpy(saved_string, (char*)table->data); |
2407 | NUMA_ZONELIST_ORDER_LEN); | ||
2408 | ret = proc_dostring(table, write, buffer, length, ppos); | 2410 | ret = proc_dostring(table, write, buffer, length, ppos); |
2409 | if (ret) | 2411 | if (ret) |
2410 | return ret; | 2412 | goto out; |
2411 | if (write) { | 2413 | if (write) { |
2412 | int oldval = user_zonelist_order; | 2414 | int oldval = user_zonelist_order; |
2413 | if (__parse_numa_zonelist_order((char*)table->data)) { | 2415 | if (__parse_numa_zonelist_order((char*)table->data)) { |
@@ -2420,7 +2422,9 @@ int numa_zonelist_order_handler(ctl_table *table, int write, | |||
2420 | } else if (oldval != user_zonelist_order) | 2422 | } else if (oldval != user_zonelist_order) |
2421 | build_all_zonelists(); | 2423 | build_all_zonelists(); |
2422 | } | 2424 | } |
2423 | return 0; | 2425 | out: |
2426 | mutex_unlock(&zl_order_mutex); | ||
2427 | return ret; | ||
2424 | } | 2428 | } |
2425 | 2429 | ||
2426 | 2430 | ||
@@ -3579,7 +3583,7 @@ static unsigned long __meminit zone_spanned_pages_in_node(int nid, | |||
3579 | * Return the number of holes in a range on a node. If nid is MAX_NUMNODES, | 3583 | * Return the number of holes in a range on a node. If nid is MAX_NUMNODES, |
3580 | * then all holes in the requested range will be accounted for. | 3584 | * then all holes in the requested range will be accounted for. |
3581 | */ | 3585 | */ |
3582 | static unsigned long __meminit __absent_pages_in_range(int nid, | 3586 | unsigned long __meminit __absent_pages_in_range(int nid, |
3583 | unsigned long range_start_pfn, | 3587 | unsigned long range_start_pfn, |
3584 | unsigned long range_end_pfn) | 3588 | unsigned long range_end_pfn) |
3585 | { | 3589 | { |
@@ -4108,7 +4112,7 @@ static int __init cmp_node_active_region(const void *a, const void *b) | |||
4108 | } | 4112 | } |
4109 | 4113 | ||
4110 | /* sort the node_map by start_pfn */ | 4114 | /* sort the node_map by start_pfn */ |
4111 | static void __init sort_node_map(void) | 4115 | void __init sort_node_map(void) |
4112 | { | 4116 | { |
4113 | sort(early_node_map, (size_t)nr_nodemap_entries, | 4117 | sort(early_node_map, (size_t)nr_nodemap_entries, |
4114 | sizeof(struct node_active_region), | 4118 | sizeof(struct node_active_region), |
@@ -5008,23 +5012,65 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags, | |||
5008 | int set_migratetype_isolate(struct page *page) | 5012 | int set_migratetype_isolate(struct page *page) |
5009 | { | 5013 | { |
5010 | struct zone *zone; | 5014 | struct zone *zone; |
5011 | unsigned long flags; | 5015 | struct page *curr_page; |
5016 | unsigned long flags, pfn, iter; | ||
5017 | unsigned long immobile = 0; | ||
5018 | struct memory_isolate_notify arg; | ||
5019 | int notifier_ret; | ||
5012 | int ret = -EBUSY; | 5020 | int ret = -EBUSY; |
5013 | int zone_idx; | 5021 | int zone_idx; |
5014 | 5022 | ||
5015 | zone = page_zone(page); | 5023 | zone = page_zone(page); |
5016 | zone_idx = zone_idx(zone); | 5024 | zone_idx = zone_idx(zone); |
5025 | |||
5017 | spin_lock_irqsave(&zone->lock, flags); | 5026 | spin_lock_irqsave(&zone->lock, flags); |
5027 | if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE || | ||
5028 | zone_idx == ZONE_MOVABLE) { | ||
5029 | ret = 0; | ||
5030 | goto out; | ||
5031 | } | ||
5032 | |||
5033 | pfn = page_to_pfn(page); | ||
5034 | arg.start_pfn = pfn; | ||
5035 | arg.nr_pages = pageblock_nr_pages; | ||
5036 | arg.pages_found = 0; | ||
5037 | |||
5018 | /* | 5038 | /* |
5019 | * In future, more migrate types will be able to be isolation target. | 5039 | * It may be possible to isolate a pageblock even if the |
5040 | * migratetype is not MIGRATE_MOVABLE. The memory isolation | ||
5041 | * notifier chain is used by balloon drivers to return the | ||
5042 | * number of pages in a range that are held by the balloon | ||
5043 | * driver to shrink memory. If all the pages are accounted for | ||
5044 | * by balloons, are free, or on the LRU, isolation can continue. | ||
5045 | * Later, for example, when memory hotplug notifier runs, these | ||
5046 | * pages reported as "can be isolated" should be isolated(freed) | ||
5047 | * by the balloon driver through the memory notifier chain. | ||
5020 | */ | 5048 | */ |
5021 | if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE && | 5049 | notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg); |
5022 | zone_idx != ZONE_MOVABLE) | 5050 | notifier_ret = notifier_to_errno(notifier_ret); |
5051 | if (notifier_ret || !arg.pages_found) | ||
5023 | goto out; | 5052 | goto out; |
5024 | set_pageblock_migratetype(page, MIGRATE_ISOLATE); | 5053 | |
5025 | move_freepages_block(zone, page, MIGRATE_ISOLATE); | 5054 | for (iter = pfn; iter < (pfn + pageblock_nr_pages); iter++) { |
5026 | ret = 0; | 5055 | if (!pfn_valid_within(pfn)) |
5056 | continue; | ||
5057 | |||
5058 | curr_page = pfn_to_page(iter); | ||
5059 | if (!page_count(curr_page) || PageLRU(curr_page)) | ||
5060 | continue; | ||
5061 | |||
5062 | immobile++; | ||
5063 | } | ||
5064 | |||
5065 | if (arg.pages_found == immobile) | ||
5066 | ret = 0; | ||
5067 | |||
5027 | out: | 5068 | out: |
5069 | if (!ret) { | ||
5070 | set_pageblock_migratetype(page, MIGRATE_ISOLATE); | ||
5071 | move_freepages_block(zone, page, MIGRATE_ISOLATE); | ||
5072 | } | ||
5073 | |||
5028 | spin_unlock_irqrestore(&zone->lock, flags); | 5074 | spin_unlock_irqrestore(&zone->lock, flags); |
5029 | if (!ret) | 5075 | if (!ret) |
5030 | drain_all_pages(); | 5076 | drain_all_pages(); |
diff --git a/mm/percpu.c b/mm/percpu.c index 442010cc91c6..083e7c91e5f6 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -1271,7 +1271,7 @@ static void pcpu_reclaim(struct work_struct *work) | |||
1271 | */ | 1271 | */ |
1272 | void free_percpu(void *ptr) | 1272 | void free_percpu(void *ptr) |
1273 | { | 1273 | { |
1274 | void *addr = __pcpu_ptr_to_addr(ptr); | 1274 | void *addr; |
1275 | struct pcpu_chunk *chunk; | 1275 | struct pcpu_chunk *chunk; |
1276 | unsigned long flags; | 1276 | unsigned long flags; |
1277 | int off; | 1277 | int off; |
@@ -1279,6 +1279,8 @@ void free_percpu(void *ptr) | |||
1279 | if (!ptr) | 1279 | if (!ptr) |
1280 | return; | 1280 | return; |
1281 | 1281 | ||
1282 | addr = __pcpu_ptr_to_addr(ptr); | ||
1283 | |||
1282 | spin_lock_irqsave(&pcpu_lock, flags); | 1284 | spin_lock_irqsave(&pcpu_lock, flags); |
1283 | 1285 | ||
1284 | chunk = pcpu_chunk_addr_search(addr); | 1286 | chunk = pcpu_chunk_addr_search(addr); |
diff --git a/mm/readahead.c b/mm/readahead.c index aa1aa2345235..033bc135a41f 100644 --- a/mm/readahead.c +++ b/mm/readahead.c | |||
@@ -547,5 +547,17 @@ page_cache_async_readahead(struct address_space *mapping, | |||
547 | 547 | ||
548 | /* do read-ahead */ | 548 | /* do read-ahead */ |
549 | ondemand_readahead(mapping, ra, filp, true, offset, req_size); | 549 | ondemand_readahead(mapping, ra, filp, true, offset, req_size); |
550 | |||
551 | #ifdef CONFIG_BLOCK | ||
552 | /* | ||
553 | * Normally the current page is !uptodate and lock_page() will be | ||
554 | * immediately called to implicitly unplug the device. However this | ||
555 | * is not always true for RAID conifgurations, where data arrives | ||
556 | * not strictly in their submission order. In this case we need to | ||
557 | * explicitly kick off the IO. | ||
558 | */ | ||
559 | if (PageUptodate(page)) | ||
560 | blk_run_backing_dev(mapping->backing_dev_info, NULL); | ||
561 | #endif | ||
550 | } | 562 | } |
551 | EXPORT_SYMBOL_GPL(page_cache_async_readahead); | 563 | EXPORT_SYMBOL_GPL(page_cache_async_readahead); |
@@ -654,7 +654,7 @@ static void init_node_lock_keys(int q) | |||
654 | 654 | ||
655 | l3 = s->cs_cachep->nodelists[q]; | 655 | l3 = s->cs_cachep->nodelists[q]; |
656 | if (!l3 || OFF_SLAB(s->cs_cachep)) | 656 | if (!l3 || OFF_SLAB(s->cs_cachep)) |
657 | return; | 657 | continue; |
658 | lockdep_set_class(&l3->list_lock, &on_slab_l3_key); | 658 | lockdep_set_class(&l3->list_lock, &on_slab_l3_key); |
659 | alc = l3->alien; | 659 | alc = l3->alien; |
660 | /* | 660 | /* |
@@ -665,7 +665,7 @@ static void init_node_lock_keys(int q) | |||
665 | * for alloc_alien_cache, | 665 | * for alloc_alien_cache, |
666 | */ | 666 | */ |
667 | if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC) | 667 | if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC) |
668 | return; | 668 | continue; |
669 | for_each_node(r) { | 669 | for_each_node(r) { |
670 | if (alc[r]) | 670 | if (alc[r]) |
671 | lockdep_set_class(&alc[r]->lock, | 671 | lockdep_set_class(&alc[r]->lock, |
@@ -1132,7 +1132,7 @@ static void __cpuinit cpuup_canceled(long cpu) | |||
1132 | if (nc) | 1132 | if (nc) |
1133 | free_block(cachep, nc->entry, nc->avail, node); | 1133 | free_block(cachep, nc->entry, nc->avail, node); |
1134 | 1134 | ||
1135 | if (!cpus_empty(*mask)) { | 1135 | if (!cpumask_empty(mask)) { |
1136 | spin_unlock_irq(&l3->list_lock); | 1136 | spin_unlock_irq(&l3->list_lock); |
1137 | goto free_array_cache; | 1137 | goto free_array_cache; |
1138 | } | 1138 | } |
@@ -2275,9 +2275,11 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
2275 | /* | 2275 | /* |
2276 | * Determine if the slab management is 'on' or 'off' slab. | 2276 | * Determine if the slab management is 'on' or 'off' slab. |
2277 | * (bootstrapping cannot cope with offslab caches so don't do | 2277 | * (bootstrapping cannot cope with offslab caches so don't do |
2278 | * it too early on.) | 2278 | * it too early on. Always use on-slab management when |
2279 | * SLAB_NOLEAKTRACE to avoid recursive calls into kmemleak) | ||
2279 | */ | 2280 | */ |
2280 | if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init) | 2281 | if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init && |
2282 | !(flags & SLAB_NOLEAKTRACE)) | ||
2281 | /* | 2283 | /* |
2282 | * Size is large, assume best to place the slab management obj | 2284 | * Size is large, assume best to place the slab management obj |
2283 | * off-slab (should allow better packing of objs). | 2285 | * off-slab (should allow better packing of objs). |
@@ -2596,8 +2598,8 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, | |||
2596 | * kmemleak does not treat the ->s_mem pointer as a reference | 2598 | * kmemleak does not treat the ->s_mem pointer as a reference |
2597 | * to the object. Otherwise we will not report the leak. | 2599 | * to the object. Otherwise we will not report the leak. |
2598 | */ | 2600 | */ |
2599 | kmemleak_scan_area(slabp, offsetof(struct slab, list), | 2601 | kmemleak_scan_area(&slabp->list, sizeof(struct list_head), |
2600 | sizeof(struct list_head), local_flags); | 2602 | local_flags); |
2601 | if (!slabp) | 2603 | if (!slabp) |
2602 | return NULL; | 2604 | return NULL; |
2603 | } else { | 2605 | } else { |
@@ -4,10 +4,6 @@ | |||
4 | #include <linux/module.h> | 4 | #include <linux/module.h> |
5 | #include <linux/err.h> | 5 | #include <linux/err.h> |
6 | #include <linux/sched.h> | 6 | #include <linux/sched.h> |
7 | #include <linux/hugetlb.h> | ||
8 | #include <linux/syscalls.h> | ||
9 | #include <linux/mman.h> | ||
10 | #include <linux/file.h> | ||
11 | #include <asm/uaccess.h> | 7 | #include <asm/uaccess.h> |
12 | 8 | ||
13 | #define CREATE_TRACE_POINTS | 9 | #define CREATE_TRACE_POINTS |
@@ -272,46 +268,6 @@ int __attribute__((weak)) get_user_pages_fast(unsigned long start, | |||
272 | } | 268 | } |
273 | EXPORT_SYMBOL_GPL(get_user_pages_fast); | 269 | EXPORT_SYMBOL_GPL(get_user_pages_fast); |
274 | 270 | ||
275 | SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, | ||
276 | unsigned long, prot, unsigned long, flags, | ||
277 | unsigned long, fd, unsigned long, pgoff) | ||
278 | { | ||
279 | struct file * file = NULL; | ||
280 | unsigned long retval = -EBADF; | ||
281 | |||
282 | if (!(flags & MAP_ANONYMOUS)) { | ||
283 | if (unlikely(flags & MAP_HUGETLB)) | ||
284 | return -EINVAL; | ||
285 | file = fget(fd); | ||
286 | if (!file) | ||
287 | goto out; | ||
288 | } else if (flags & MAP_HUGETLB) { | ||
289 | struct user_struct *user = NULL; | ||
290 | /* | ||
291 | * VM_NORESERVE is used because the reservations will be | ||
292 | * taken when vm_ops->mmap() is called | ||
293 | * A dummy user value is used because we are not locking | ||
294 | * memory so no accounting is necessary | ||
295 | */ | ||
296 | len = ALIGN(len, huge_page_size(&default_hstate)); | ||
297 | file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE, | ||
298 | &user, HUGETLB_ANONHUGE_INODE); | ||
299 | if (IS_ERR(file)) | ||
300 | return PTR_ERR(file); | ||
301 | } | ||
302 | |||
303 | flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); | ||
304 | |||
305 | down_write(¤t->mm->mmap_sem); | ||
306 | retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); | ||
307 | up_write(¤t->mm->mmap_sem); | ||
308 | |||
309 | if (file) | ||
310 | fput(file); | ||
311 | out: | ||
312 | return retval; | ||
313 | } | ||
314 | |||
315 | /* Tracepoints definitions. */ | 271 | /* Tracepoints definitions. */ |
316 | EXPORT_TRACEPOINT_SYMBOL(kmalloc); | 272 | EXPORT_TRACEPOINT_SYMBOL(kmalloc); |
317 | EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); | 273 | EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); |