diff options
author | Tejun Heo <tj@kernel.org> | 2009-08-14 01:41:02 -0400 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2009-08-14 01:45:31 -0400 |
commit | 384be2b18a5f9475eab9ca2bdfa95cc1a04ef59c (patch) | |
tree | 04c93f391a1b65c8bf8d7ba8643c07d26c26590a /mm | |
parent | a76761b621bcd8336065c4fe3a74f046858bc34c (diff) | |
parent | 142d44b0dd6741a64a7bdbe029110e7c1dcf1d23 (diff) |
Merge branch 'percpu-for-linus' into percpu-for-next
Conflicts:
arch/sparc/kernel/smp_64.c
arch/x86/kernel/cpu/perf_counter.c
arch/x86/kernel/setup_percpu.c
drivers/cpufreq/cpufreq_ondemand.c
mm/percpu.c
Conflicts in core and arch percpu codes are mostly from commit
ed78e1e078dd44249f88b1dd8c76dafb39567161 which substituted many
num_possible_cpus() with nr_cpu_ids. As for-next branch has moved all
the first chunk allocators into mm/percpu.c, the changes are moved
from arch code to mm/percpu.c.
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/backing-dev.c | 7 | ||||
-rw-r--r-- | mm/bootmem.c | 6 | ||||
-rw-r--r-- | mm/filemap.c | 1 | ||||
-rw-r--r-- | mm/hugetlb.c | 2 | ||||
-rw-r--r-- | mm/kmemleak.c | 240 | ||||
-rw-r--r-- | mm/memcontrol.c | 25 | ||||
-rw-r--r-- | mm/memory.c | 11 | ||||
-rw-r--r-- | mm/mempolicy.c | 84 | ||||
-rw-r--r-- | mm/mempool.c | 4 | ||||
-rw-r--r-- | mm/page-writeback.c | 8 | ||||
-rw-r--r-- | mm/page_alloc.c | 41 | ||||
-rw-r--r-- | mm/percpu.c | 23 | ||||
-rw-r--r-- | mm/slab.c | 8 | ||||
-rw-r--r-- | mm/slob.c | 2 | ||||
-rw-r--r-- | mm/slub.c | 12 | ||||
-rw-r--r-- | mm/swapfile.c | 4 | ||||
-rw-r--r-- | mm/vmscan.c | 8 |
17 files changed, 322 insertions, 164 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 493b468a5035..c86edd244294 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
@@ -283,7 +283,6 @@ static wait_queue_head_t congestion_wqh[2] = { | |||
283 | __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1]) | 283 | __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1]) |
284 | }; | 284 | }; |
285 | 285 | ||
286 | |||
287 | void clear_bdi_congested(struct backing_dev_info *bdi, int sync) | 286 | void clear_bdi_congested(struct backing_dev_info *bdi, int sync) |
288 | { | 287 | { |
289 | enum bdi_state bit; | 288 | enum bdi_state bit; |
@@ -308,18 +307,18 @@ EXPORT_SYMBOL(set_bdi_congested); | |||
308 | 307 | ||
309 | /** | 308 | /** |
310 | * congestion_wait - wait for a backing_dev to become uncongested | 309 | * congestion_wait - wait for a backing_dev to become uncongested |
311 | * @rw: READ or WRITE | 310 | * @sync: SYNC or ASYNC IO |
312 | * @timeout: timeout in jiffies | 311 | * @timeout: timeout in jiffies |
313 | * | 312 | * |
314 | * Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit | 313 | * Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit |
315 | * write congestion. If no backing_devs are congested then just wait for the | 314 | * write congestion. If no backing_devs are congested then just wait for the |
316 | * next write to be completed. | 315 | * next write to be completed. |
317 | */ | 316 | */ |
318 | long congestion_wait(int rw, long timeout) | 317 | long congestion_wait(int sync, long timeout) |
319 | { | 318 | { |
320 | long ret; | 319 | long ret; |
321 | DEFINE_WAIT(wait); | 320 | DEFINE_WAIT(wait); |
322 | wait_queue_head_t *wqh = &congestion_wqh[rw]; | 321 | wait_queue_head_t *wqh = &congestion_wqh[sync]; |
323 | 322 | ||
324 | prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); | 323 | prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); |
325 | ret = io_schedule_timeout(timeout); | 324 | ret = io_schedule_timeout(timeout); |
diff --git a/mm/bootmem.c b/mm/bootmem.c index d2a9ce952768..701740c9e81b 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/pfn.h> | 12 | #include <linux/pfn.h> |
13 | #include <linux/bootmem.h> | 13 | #include <linux/bootmem.h> |
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/kmemleak.h> | ||
15 | 16 | ||
16 | #include <asm/bug.h> | 17 | #include <asm/bug.h> |
17 | #include <asm/io.h> | 18 | #include <asm/io.h> |
@@ -335,6 +336,8 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, | |||
335 | { | 336 | { |
336 | unsigned long start, end; | 337 | unsigned long start, end; |
337 | 338 | ||
339 | kmemleak_free_part(__va(physaddr), size); | ||
340 | |||
338 | start = PFN_UP(physaddr); | 341 | start = PFN_UP(physaddr); |
339 | end = PFN_DOWN(physaddr + size); | 342 | end = PFN_DOWN(physaddr + size); |
340 | 343 | ||
@@ -354,6 +357,8 @@ void __init free_bootmem(unsigned long addr, unsigned long size) | |||
354 | { | 357 | { |
355 | unsigned long start, end; | 358 | unsigned long start, end; |
356 | 359 | ||
360 | kmemleak_free_part(__va(addr), size); | ||
361 | |||
357 | start = PFN_UP(addr); | 362 | start = PFN_UP(addr); |
358 | end = PFN_DOWN(addr + size); | 363 | end = PFN_DOWN(addr + size); |
359 | 364 | ||
@@ -516,6 +521,7 @@ find_block: | |||
516 | region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) + | 521 | region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) + |
517 | start_off); | 522 | start_off); |
518 | memset(region, 0, size); | 523 | memset(region, 0, size); |
524 | kmemleak_alloc(region, size, 1, 0); | ||
519 | return region; | 525 | return region; |
520 | } | 526 | } |
521 | 527 | ||
diff --git a/mm/filemap.c b/mm/filemap.c index 22396713feb9..ccea3b665c12 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -2272,6 +2272,7 @@ again: | |||
2272 | pagefault_enable(); | 2272 | pagefault_enable(); |
2273 | flush_dcache_page(page); | 2273 | flush_dcache_page(page); |
2274 | 2274 | ||
2275 | mark_page_accessed(page); | ||
2275 | status = a_ops->write_end(file, mapping, pos, bytes, copied, | 2276 | status = a_ops->write_end(file, mapping, pos, bytes, copied, |
2276 | page, fsdata); | 2277 | page, fsdata); |
2277 | if (unlikely(status < 0)) | 2278 | if (unlikely(status < 0)) |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d0351e31f474..cafdcee154e8 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -2370,7 +2370,7 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed) | |||
2370 | long chg = region_truncate(&inode->i_mapping->private_list, offset); | 2370 | long chg = region_truncate(&inode->i_mapping->private_list, offset); |
2371 | 2371 | ||
2372 | spin_lock(&inode->i_lock); | 2372 | spin_lock(&inode->i_lock); |
2373 | inode->i_blocks -= blocks_per_huge_page(h); | 2373 | inode->i_blocks -= (blocks_per_huge_page(h) * freed); |
2374 | spin_unlock(&inode->i_lock); | 2374 | spin_unlock(&inode->i_lock); |
2375 | 2375 | ||
2376 | hugetlb_put_quota(inode->i_mapping, (chg - freed)); | 2376 | hugetlb_put_quota(inode->i_mapping, (chg - freed)); |
diff --git a/mm/kmemleak.c b/mm/kmemleak.c index e766e1da09d2..487267310a84 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c | |||
@@ -103,10 +103,10 @@ | |||
103 | * Kmemleak configuration and common defines. | 103 | * Kmemleak configuration and common defines. |
104 | */ | 104 | */ |
105 | #define MAX_TRACE 16 /* stack trace length */ | 105 | #define MAX_TRACE 16 /* stack trace length */ |
106 | #define REPORTS_NR 50 /* maximum number of reported leaks */ | ||
107 | #define MSECS_MIN_AGE 5000 /* minimum object age for reporting */ | 106 | #define MSECS_MIN_AGE 5000 /* minimum object age for reporting */ |
108 | #define SECS_FIRST_SCAN 60 /* delay before the first scan */ | 107 | #define SECS_FIRST_SCAN 60 /* delay before the first scan */ |
109 | #define SECS_SCAN_WAIT 600 /* subsequent auto scanning delay */ | 108 | #define SECS_SCAN_WAIT 600 /* subsequent auto scanning delay */ |
109 | #define GRAY_LIST_PASSES 25 /* maximum number of gray list scans */ | ||
110 | 110 | ||
111 | #define BYTES_PER_POINTER sizeof(void *) | 111 | #define BYTES_PER_POINTER sizeof(void *) |
112 | 112 | ||
@@ -158,6 +158,8 @@ struct kmemleak_object { | |||
158 | #define OBJECT_REPORTED (1 << 1) | 158 | #define OBJECT_REPORTED (1 << 1) |
159 | /* flag set to not scan the object */ | 159 | /* flag set to not scan the object */ |
160 | #define OBJECT_NO_SCAN (1 << 2) | 160 | #define OBJECT_NO_SCAN (1 << 2) |
161 | /* flag set on newly allocated objects */ | ||
162 | #define OBJECT_NEW (1 << 3) | ||
161 | 163 | ||
162 | /* the list of all allocated objects */ | 164 | /* the list of all allocated objects */ |
163 | static LIST_HEAD(object_list); | 165 | static LIST_HEAD(object_list); |
@@ -196,9 +198,6 @@ static int kmemleak_stack_scan = 1; | |||
196 | /* protects the memory scanning, parameters and debug/kmemleak file access */ | 198 | /* protects the memory scanning, parameters and debug/kmemleak file access */ |
197 | static DEFINE_MUTEX(scan_mutex); | 199 | static DEFINE_MUTEX(scan_mutex); |
198 | 200 | ||
199 | /* number of leaks reported (for limitation purposes) */ | ||
200 | static int reported_leaks; | ||
201 | |||
202 | /* | 201 | /* |
203 | * Early object allocation/freeing logging. Kmemleak is initialized after the | 202 | * Early object allocation/freeing logging. Kmemleak is initialized after the |
204 | * kernel allocator. However, both the kernel allocator and kmemleak may | 203 | * kernel allocator. However, both the kernel allocator and kmemleak may |
@@ -211,6 +210,7 @@ static int reported_leaks; | |||
211 | enum { | 210 | enum { |
212 | KMEMLEAK_ALLOC, | 211 | KMEMLEAK_ALLOC, |
213 | KMEMLEAK_FREE, | 212 | KMEMLEAK_FREE, |
213 | KMEMLEAK_FREE_PART, | ||
214 | KMEMLEAK_NOT_LEAK, | 214 | KMEMLEAK_NOT_LEAK, |
215 | KMEMLEAK_IGNORE, | 215 | KMEMLEAK_IGNORE, |
216 | KMEMLEAK_SCAN_AREA, | 216 | KMEMLEAK_SCAN_AREA, |
@@ -274,6 +274,11 @@ static int color_gray(const struct kmemleak_object *object) | |||
274 | return object->min_count != -1 && object->count >= object->min_count; | 274 | return object->min_count != -1 && object->count >= object->min_count; |
275 | } | 275 | } |
276 | 276 | ||
277 | static int color_black(const struct kmemleak_object *object) | ||
278 | { | ||
279 | return object->min_count == -1; | ||
280 | } | ||
281 | |||
277 | /* | 282 | /* |
278 | * Objects are considered unreferenced only if their color is white, they have | 283 | * Objects are considered unreferenced only if their color is white, they have |
279 | * not be deleted and have a minimum age to avoid false positives caused by | 284 | * not be deleted and have a minimum age to avoid false positives caused by |
@@ -451,7 +456,7 @@ static void create_object(unsigned long ptr, size_t size, int min_count, | |||
451 | INIT_HLIST_HEAD(&object->area_list); | 456 | INIT_HLIST_HEAD(&object->area_list); |
452 | spin_lock_init(&object->lock); | 457 | spin_lock_init(&object->lock); |
453 | atomic_set(&object->use_count, 1); | 458 | atomic_set(&object->use_count, 1); |
454 | object->flags = OBJECT_ALLOCATED; | 459 | object->flags = OBJECT_ALLOCATED | OBJECT_NEW; |
455 | object->pointer = ptr; | 460 | object->pointer = ptr; |
456 | object->size = size; | 461 | object->size = size; |
457 | object->min_count = min_count; | 462 | object->min_count = min_count; |
@@ -519,27 +524,17 @@ out: | |||
519 | * Remove the metadata (struct kmemleak_object) for a memory block from the | 524 | * Remove the metadata (struct kmemleak_object) for a memory block from the |
520 | * object_list and object_tree_root and decrement its use_count. | 525 | * object_list and object_tree_root and decrement its use_count. |
521 | */ | 526 | */ |
522 | static void delete_object(unsigned long ptr) | 527 | static void __delete_object(struct kmemleak_object *object) |
523 | { | 528 | { |
524 | unsigned long flags; | 529 | unsigned long flags; |
525 | struct kmemleak_object *object; | ||
526 | 530 | ||
527 | write_lock_irqsave(&kmemleak_lock, flags); | 531 | write_lock_irqsave(&kmemleak_lock, flags); |
528 | object = lookup_object(ptr, 0); | ||
529 | if (!object) { | ||
530 | #ifdef DEBUG | ||
531 | kmemleak_warn("Freeing unknown object at 0x%08lx\n", | ||
532 | ptr); | ||
533 | #endif | ||
534 | write_unlock_irqrestore(&kmemleak_lock, flags); | ||
535 | return; | ||
536 | } | ||
537 | prio_tree_remove(&object_tree_root, &object->tree_node); | 532 | prio_tree_remove(&object_tree_root, &object->tree_node); |
538 | list_del_rcu(&object->object_list); | 533 | list_del_rcu(&object->object_list); |
539 | write_unlock_irqrestore(&kmemleak_lock, flags); | 534 | write_unlock_irqrestore(&kmemleak_lock, flags); |
540 | 535 | ||
541 | WARN_ON(!(object->flags & OBJECT_ALLOCATED)); | 536 | WARN_ON(!(object->flags & OBJECT_ALLOCATED)); |
542 | WARN_ON(atomic_read(&object->use_count) < 1); | 537 | WARN_ON(atomic_read(&object->use_count) < 2); |
543 | 538 | ||
544 | /* | 539 | /* |
545 | * Locking here also ensures that the corresponding memory block | 540 | * Locking here also ensures that the corresponding memory block |
@@ -552,6 +547,64 @@ static void delete_object(unsigned long ptr) | |||
552 | } | 547 | } |
553 | 548 | ||
554 | /* | 549 | /* |
550 | * Look up the metadata (struct kmemleak_object) corresponding to ptr and | ||
551 | * delete it. | ||
552 | */ | ||
553 | static void delete_object_full(unsigned long ptr) | ||
554 | { | ||
555 | struct kmemleak_object *object; | ||
556 | |||
557 | object = find_and_get_object(ptr, 0); | ||
558 | if (!object) { | ||
559 | #ifdef DEBUG | ||
560 | kmemleak_warn("Freeing unknown object at 0x%08lx\n", | ||
561 | ptr); | ||
562 | #endif | ||
563 | return; | ||
564 | } | ||
565 | __delete_object(object); | ||
566 | put_object(object); | ||
567 | } | ||
568 | |||
569 | /* | ||
570 | * Look up the metadata (struct kmemleak_object) corresponding to ptr and | ||
571 | * delete it. If the memory block is partially freed, the function may create | ||
572 | * additional metadata for the remaining parts of the block. | ||
573 | */ | ||
574 | static void delete_object_part(unsigned long ptr, size_t size) | ||
575 | { | ||
576 | struct kmemleak_object *object; | ||
577 | unsigned long start, end; | ||
578 | |||
579 | object = find_and_get_object(ptr, 1); | ||
580 | if (!object) { | ||
581 | #ifdef DEBUG | ||
582 | kmemleak_warn("Partially freeing unknown object at 0x%08lx " | ||
583 | "(size %zu)\n", ptr, size); | ||
584 | #endif | ||
585 | return; | ||
586 | } | ||
587 | __delete_object(object); | ||
588 | |||
589 | /* | ||
590 | * Create one or two objects that may result from the memory block | ||
591 | * split. Note that partial freeing is only done by free_bootmem() and | ||
592 | * this happens before kmemleak_init() is called. The path below is | ||
593 | * only executed during early log recording in kmemleak_init(), so | ||
594 | * GFP_KERNEL is enough. | ||
595 | */ | ||
596 | start = object->pointer; | ||
597 | end = object->pointer + object->size; | ||
598 | if (ptr > start) | ||
599 | create_object(start, ptr - start, object->min_count, | ||
600 | GFP_KERNEL); | ||
601 | if (ptr + size < end) | ||
602 | create_object(ptr + size, end - ptr - size, object->min_count, | ||
603 | GFP_KERNEL); | ||
604 | |||
605 | put_object(object); | ||
606 | } | ||
607 | /* | ||
555 | * Make a object permanently as gray-colored so that it can no longer be | 608 | * Make a object permanently as gray-colored so that it can no longer be |
556 | * reported as a leak. This is used in general to mark a false positive. | 609 | * reported as a leak. This is used in general to mark a false positive. |
557 | */ | 610 | */ |
@@ -715,13 +768,28 @@ void kmemleak_free(const void *ptr) | |||
715 | pr_debug("%s(0x%p)\n", __func__, ptr); | 768 | pr_debug("%s(0x%p)\n", __func__, ptr); |
716 | 769 | ||
717 | if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) | 770 | if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) |
718 | delete_object((unsigned long)ptr); | 771 | delete_object_full((unsigned long)ptr); |
719 | else if (atomic_read(&kmemleak_early_log)) | 772 | else if (atomic_read(&kmemleak_early_log)) |
720 | log_early(KMEMLEAK_FREE, ptr, 0, 0, 0, 0); | 773 | log_early(KMEMLEAK_FREE, ptr, 0, 0, 0, 0); |
721 | } | 774 | } |
722 | EXPORT_SYMBOL_GPL(kmemleak_free); | 775 | EXPORT_SYMBOL_GPL(kmemleak_free); |
723 | 776 | ||
724 | /* | 777 | /* |
778 | * Partial memory freeing function callback. This function is usually called | ||
779 | * from bootmem allocator when (part of) a memory block is freed. | ||
780 | */ | ||
781 | void kmemleak_free_part(const void *ptr, size_t size) | ||
782 | { | ||
783 | pr_debug("%s(0x%p)\n", __func__, ptr); | ||
784 | |||
785 | if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) | ||
786 | delete_object_part((unsigned long)ptr, size); | ||
787 | else if (atomic_read(&kmemleak_early_log)) | ||
788 | log_early(KMEMLEAK_FREE_PART, ptr, size, 0, 0, 0); | ||
789 | } | ||
790 | EXPORT_SYMBOL_GPL(kmemleak_free_part); | ||
791 | |||
792 | /* | ||
725 | * Mark an already allocated memory block as a false positive. This will cause | 793 | * Mark an already allocated memory block as a false positive. This will cause |
726 | * the block to no longer be reported as leak and always be scanned. | 794 | * the block to no longer be reported as leak and always be scanned. |
727 | */ | 795 | */ |
@@ -807,7 +875,7 @@ static int scan_should_stop(void) | |||
807 | * found to the gray list. | 875 | * found to the gray list. |
808 | */ | 876 | */ |
809 | static void scan_block(void *_start, void *_end, | 877 | static void scan_block(void *_start, void *_end, |
810 | struct kmemleak_object *scanned) | 878 | struct kmemleak_object *scanned, int allow_resched) |
811 | { | 879 | { |
812 | unsigned long *ptr; | 880 | unsigned long *ptr; |
813 | unsigned long *start = PTR_ALIGN(_start, BYTES_PER_POINTER); | 881 | unsigned long *start = PTR_ALIGN(_start, BYTES_PER_POINTER); |
@@ -818,6 +886,8 @@ static void scan_block(void *_start, void *_end, | |||
818 | unsigned long pointer = *ptr; | 886 | unsigned long pointer = *ptr; |
819 | struct kmemleak_object *object; | 887 | struct kmemleak_object *object; |
820 | 888 | ||
889 | if (allow_resched) | ||
890 | cond_resched(); | ||
821 | if (scan_should_stop()) | 891 | if (scan_should_stop()) |
822 | break; | 892 | break; |
823 | 893 | ||
@@ -881,12 +951,12 @@ static void scan_object(struct kmemleak_object *object) | |||
881 | goto out; | 951 | goto out; |
882 | if (hlist_empty(&object->area_list)) | 952 | if (hlist_empty(&object->area_list)) |
883 | scan_block((void *)object->pointer, | 953 | scan_block((void *)object->pointer, |
884 | (void *)(object->pointer + object->size), object); | 954 | (void *)(object->pointer + object->size), object, 0); |
885 | else | 955 | else |
886 | hlist_for_each_entry(area, elem, &object->area_list, node) | 956 | hlist_for_each_entry(area, elem, &object->area_list, node) |
887 | scan_block((void *)(object->pointer + area->offset), | 957 | scan_block((void *)(object->pointer + area->offset), |
888 | (void *)(object->pointer + area->offset | 958 | (void *)(object->pointer + area->offset |
889 | + area->length), object); | 959 | + area->length), object, 0); |
890 | out: | 960 | out: |
891 | spin_unlock_irqrestore(&object->lock, flags); | 961 | spin_unlock_irqrestore(&object->lock, flags); |
892 | } | 962 | } |
@@ -903,6 +973,7 @@ static void kmemleak_scan(void) | |||
903 | struct task_struct *task; | 973 | struct task_struct *task; |
904 | int i; | 974 | int i; |
905 | int new_leaks = 0; | 975 | int new_leaks = 0; |
976 | int gray_list_pass = 0; | ||
906 | 977 | ||
907 | jiffies_last_scan = jiffies; | 978 | jiffies_last_scan = jiffies; |
908 | 979 | ||
@@ -923,6 +994,7 @@ static void kmemleak_scan(void) | |||
923 | #endif | 994 | #endif |
924 | /* reset the reference count (whiten the object) */ | 995 | /* reset the reference count (whiten the object) */ |
925 | object->count = 0; | 996 | object->count = 0; |
997 | object->flags &= ~OBJECT_NEW; | ||
926 | if (color_gray(object) && get_object(object)) | 998 | if (color_gray(object) && get_object(object)) |
927 | list_add_tail(&object->gray_list, &gray_list); | 999 | list_add_tail(&object->gray_list, &gray_list); |
928 | 1000 | ||
@@ -931,14 +1003,14 @@ static void kmemleak_scan(void) | |||
931 | rcu_read_unlock(); | 1003 | rcu_read_unlock(); |
932 | 1004 | ||
933 | /* data/bss scanning */ | 1005 | /* data/bss scanning */ |
934 | scan_block(_sdata, _edata, NULL); | 1006 | scan_block(_sdata, _edata, NULL, 1); |
935 | scan_block(__bss_start, __bss_stop, NULL); | 1007 | scan_block(__bss_start, __bss_stop, NULL, 1); |
936 | 1008 | ||
937 | #ifdef CONFIG_SMP | 1009 | #ifdef CONFIG_SMP |
938 | /* per-cpu sections scanning */ | 1010 | /* per-cpu sections scanning */ |
939 | for_each_possible_cpu(i) | 1011 | for_each_possible_cpu(i) |
940 | scan_block(__per_cpu_start + per_cpu_offset(i), | 1012 | scan_block(__per_cpu_start + per_cpu_offset(i), |
941 | __per_cpu_end + per_cpu_offset(i), NULL); | 1013 | __per_cpu_end + per_cpu_offset(i), NULL, 1); |
942 | #endif | 1014 | #endif |
943 | 1015 | ||
944 | /* | 1016 | /* |
@@ -960,7 +1032,7 @@ static void kmemleak_scan(void) | |||
960 | /* only scan if page is in use */ | 1032 | /* only scan if page is in use */ |
961 | if (page_count(page) == 0) | 1033 | if (page_count(page) == 0) |
962 | continue; | 1034 | continue; |
963 | scan_block(page, page + 1, NULL); | 1035 | scan_block(page, page + 1, NULL, 1); |
964 | } | 1036 | } |
965 | } | 1037 | } |
966 | 1038 | ||
@@ -972,7 +1044,8 @@ static void kmemleak_scan(void) | |||
972 | read_lock(&tasklist_lock); | 1044 | read_lock(&tasklist_lock); |
973 | for_each_process(task) | 1045 | for_each_process(task) |
974 | scan_block(task_stack_page(task), | 1046 | scan_block(task_stack_page(task), |
975 | task_stack_page(task) + THREAD_SIZE, NULL); | 1047 | task_stack_page(task) + THREAD_SIZE, |
1048 | NULL, 0); | ||
976 | read_unlock(&tasklist_lock); | 1049 | read_unlock(&tasklist_lock); |
977 | } | 1050 | } |
978 | 1051 | ||
@@ -984,6 +1057,7 @@ static void kmemleak_scan(void) | |||
984 | * kmemleak objects cannot be freed from outside the loop because their | 1057 | * kmemleak objects cannot be freed from outside the loop because their |
985 | * use_count was increased. | 1058 | * use_count was increased. |
986 | */ | 1059 | */ |
1060 | repeat: | ||
987 | object = list_entry(gray_list.next, typeof(*object), gray_list); | 1061 | object = list_entry(gray_list.next, typeof(*object), gray_list); |
988 | while (&object->gray_list != &gray_list) { | 1062 | while (&object->gray_list != &gray_list) { |
989 | cond_resched(); | 1063 | cond_resched(); |
@@ -1001,12 +1075,38 @@ static void kmemleak_scan(void) | |||
1001 | 1075 | ||
1002 | object = tmp; | 1076 | object = tmp; |
1003 | } | 1077 | } |
1078 | |||
1079 | if (scan_should_stop() || ++gray_list_pass >= GRAY_LIST_PASSES) | ||
1080 | goto scan_end; | ||
1081 | |||
1082 | /* | ||
1083 | * Check for new objects allocated during this scanning and add them | ||
1084 | * to the gray list. | ||
1085 | */ | ||
1086 | rcu_read_lock(); | ||
1087 | list_for_each_entry_rcu(object, &object_list, object_list) { | ||
1088 | spin_lock_irqsave(&object->lock, flags); | ||
1089 | if ((object->flags & OBJECT_NEW) && !color_black(object) && | ||
1090 | get_object(object)) { | ||
1091 | object->flags &= ~OBJECT_NEW; | ||
1092 | list_add_tail(&object->gray_list, &gray_list); | ||
1093 | } | ||
1094 | spin_unlock_irqrestore(&object->lock, flags); | ||
1095 | } | ||
1096 | rcu_read_unlock(); | ||
1097 | |||
1098 | if (!list_empty(&gray_list)) | ||
1099 | goto repeat; | ||
1100 | |||
1101 | scan_end: | ||
1004 | WARN_ON(!list_empty(&gray_list)); | 1102 | WARN_ON(!list_empty(&gray_list)); |
1005 | 1103 | ||
1006 | /* | 1104 | /* |
1007 | * If scanning was stopped do not report any new unreferenced objects. | 1105 | * If scanning was stopped or new objects were being allocated at a |
1106 | * higher rate than gray list scanning, do not report any new | ||
1107 | * unreferenced objects. | ||
1008 | */ | 1108 | */ |
1009 | if (scan_should_stop()) | 1109 | if (scan_should_stop() || gray_list_pass >= GRAY_LIST_PASSES) |
1010 | return; | 1110 | return; |
1011 | 1111 | ||
1012 | /* | 1112 | /* |
@@ -1039,6 +1139,7 @@ static int kmemleak_scan_thread(void *arg) | |||
1039 | static int first_run = 1; | 1139 | static int first_run = 1; |
1040 | 1140 | ||
1041 | pr_info("Automatic memory scanning thread started\n"); | 1141 | pr_info("Automatic memory scanning thread started\n"); |
1142 | set_user_nice(current, 10); | ||
1042 | 1143 | ||
1043 | /* | 1144 | /* |
1044 | * Wait before the first scan to allow the system to fully initialize. | 1145 | * Wait before the first scan to allow the system to fully initialize. |
@@ -1101,11 +1202,11 @@ static void *kmemleak_seq_start(struct seq_file *seq, loff_t *pos) | |||
1101 | { | 1202 | { |
1102 | struct kmemleak_object *object; | 1203 | struct kmemleak_object *object; |
1103 | loff_t n = *pos; | 1204 | loff_t n = *pos; |
1205 | int err; | ||
1104 | 1206 | ||
1105 | if (!n) | 1207 | err = mutex_lock_interruptible(&scan_mutex); |
1106 | reported_leaks = 0; | 1208 | if (err < 0) |
1107 | if (reported_leaks >= REPORTS_NR) | 1209 | return ERR_PTR(err); |
1108 | return NULL; | ||
1109 | 1210 | ||
1110 | rcu_read_lock(); | 1211 | rcu_read_lock(); |
1111 | list_for_each_entry_rcu(object, &object_list, object_list) { | 1212 | list_for_each_entry_rcu(object, &object_list, object_list) { |
@@ -1116,7 +1217,6 @@ static void *kmemleak_seq_start(struct seq_file *seq, loff_t *pos) | |||
1116 | } | 1217 | } |
1117 | object = NULL; | 1218 | object = NULL; |
1118 | out: | 1219 | out: |
1119 | rcu_read_unlock(); | ||
1120 | return object; | 1220 | return object; |
1121 | } | 1221 | } |
1122 | 1222 | ||
@@ -1131,17 +1231,13 @@ static void *kmemleak_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
1131 | struct list_head *n = &prev_obj->object_list; | 1231 | struct list_head *n = &prev_obj->object_list; |
1132 | 1232 | ||
1133 | ++(*pos); | 1233 | ++(*pos); |
1134 | if (reported_leaks >= REPORTS_NR) | ||
1135 | goto out; | ||
1136 | 1234 | ||
1137 | rcu_read_lock(); | ||
1138 | list_for_each_continue_rcu(n, &object_list) { | 1235 | list_for_each_continue_rcu(n, &object_list) { |
1139 | next_obj = list_entry(n, struct kmemleak_object, object_list); | 1236 | next_obj = list_entry(n, struct kmemleak_object, object_list); |
1140 | if (get_object(next_obj)) | 1237 | if (get_object(next_obj)) |
1141 | break; | 1238 | break; |
1142 | } | 1239 | } |
1143 | rcu_read_unlock(); | 1240 | |
1144 | out: | ||
1145 | put_object(prev_obj); | 1241 | put_object(prev_obj); |
1146 | return next_obj; | 1242 | return next_obj; |
1147 | } | 1243 | } |
@@ -1151,8 +1247,16 @@ out: | |||
1151 | */ | 1247 | */ |
1152 | static void kmemleak_seq_stop(struct seq_file *seq, void *v) | 1248 | static void kmemleak_seq_stop(struct seq_file *seq, void *v) |
1153 | { | 1249 | { |
1154 | if (v) | 1250 | if (!IS_ERR(v)) { |
1155 | put_object(v); | 1251 | /* |
1252 | * kmemleak_seq_start may return ERR_PTR if the scan_mutex | ||
1253 | * waiting was interrupted, so only release it if !IS_ERR. | ||
1254 | */ | ||
1255 | rcu_read_unlock(); | ||
1256 | mutex_unlock(&scan_mutex); | ||
1257 | if (v) | ||
1258 | put_object(v); | ||
1259 | } | ||
1156 | } | 1260 | } |
1157 | 1261 | ||
1158 | /* | 1262 | /* |
@@ -1164,10 +1268,8 @@ static int kmemleak_seq_show(struct seq_file *seq, void *v) | |||
1164 | unsigned long flags; | 1268 | unsigned long flags; |
1165 | 1269 | ||
1166 | spin_lock_irqsave(&object->lock, flags); | 1270 | spin_lock_irqsave(&object->lock, flags); |
1167 | if ((object->flags & OBJECT_REPORTED) && unreferenced_object(object)) { | 1271 | if ((object->flags & OBJECT_REPORTED) && unreferenced_object(object)) |
1168 | print_unreferenced(seq, object); | 1272 | print_unreferenced(seq, object); |
1169 | reported_leaks++; | ||
1170 | } | ||
1171 | spin_unlock_irqrestore(&object->lock, flags); | 1273 | spin_unlock_irqrestore(&object->lock, flags); |
1172 | return 0; | 1274 | return 0; |
1173 | } | 1275 | } |
@@ -1181,36 +1283,15 @@ static const struct seq_operations kmemleak_seq_ops = { | |||
1181 | 1283 | ||
1182 | static int kmemleak_open(struct inode *inode, struct file *file) | 1284 | static int kmemleak_open(struct inode *inode, struct file *file) |
1183 | { | 1285 | { |
1184 | int ret = 0; | ||
1185 | |||
1186 | if (!atomic_read(&kmemleak_enabled)) | 1286 | if (!atomic_read(&kmemleak_enabled)) |
1187 | return -EBUSY; | 1287 | return -EBUSY; |
1188 | 1288 | ||
1189 | ret = mutex_lock_interruptible(&scan_mutex); | 1289 | return seq_open(file, &kmemleak_seq_ops); |
1190 | if (ret < 0) | ||
1191 | goto out; | ||
1192 | if (file->f_mode & FMODE_READ) { | ||
1193 | ret = seq_open(file, &kmemleak_seq_ops); | ||
1194 | if (ret < 0) | ||
1195 | goto scan_unlock; | ||
1196 | } | ||
1197 | return ret; | ||
1198 | |||
1199 | scan_unlock: | ||
1200 | mutex_unlock(&scan_mutex); | ||
1201 | out: | ||
1202 | return ret; | ||
1203 | } | 1290 | } |
1204 | 1291 | ||
1205 | static int kmemleak_release(struct inode *inode, struct file *file) | 1292 | static int kmemleak_release(struct inode *inode, struct file *file) |
1206 | { | 1293 | { |
1207 | int ret = 0; | 1294 | return seq_release(inode, file); |
1208 | |||
1209 | if (file->f_mode & FMODE_READ) | ||
1210 | seq_release(inode, file); | ||
1211 | mutex_unlock(&scan_mutex); | ||
1212 | |||
1213 | return ret; | ||
1214 | } | 1295 | } |
1215 | 1296 | ||
1216 | /* | 1297 | /* |
@@ -1230,15 +1311,17 @@ static ssize_t kmemleak_write(struct file *file, const char __user *user_buf, | |||
1230 | { | 1311 | { |
1231 | char buf[64]; | 1312 | char buf[64]; |
1232 | int buf_size; | 1313 | int buf_size; |
1233 | 1314 | int ret; | |
1234 | if (!atomic_read(&kmemleak_enabled)) | ||
1235 | return -EBUSY; | ||
1236 | 1315 | ||
1237 | buf_size = min(size, (sizeof(buf) - 1)); | 1316 | buf_size = min(size, (sizeof(buf) - 1)); |
1238 | if (strncpy_from_user(buf, user_buf, buf_size) < 0) | 1317 | if (strncpy_from_user(buf, user_buf, buf_size) < 0) |
1239 | return -EFAULT; | 1318 | return -EFAULT; |
1240 | buf[buf_size] = 0; | 1319 | buf[buf_size] = 0; |
1241 | 1320 | ||
1321 | ret = mutex_lock_interruptible(&scan_mutex); | ||
1322 | if (ret < 0) | ||
1323 | return ret; | ||
1324 | |||
1242 | if (strncmp(buf, "off", 3) == 0) | 1325 | if (strncmp(buf, "off", 3) == 0) |
1243 | kmemleak_disable(); | 1326 | kmemleak_disable(); |
1244 | else if (strncmp(buf, "stack=on", 8) == 0) | 1327 | else if (strncmp(buf, "stack=on", 8) == 0) |
@@ -1251,11 +1334,10 @@ static ssize_t kmemleak_write(struct file *file, const char __user *user_buf, | |||
1251 | stop_scan_thread(); | 1334 | stop_scan_thread(); |
1252 | else if (strncmp(buf, "scan=", 5) == 0) { | 1335 | else if (strncmp(buf, "scan=", 5) == 0) { |
1253 | unsigned long secs; | 1336 | unsigned long secs; |
1254 | int err; | ||
1255 | 1337 | ||
1256 | err = strict_strtoul(buf + 5, 0, &secs); | 1338 | ret = strict_strtoul(buf + 5, 0, &secs); |
1257 | if (err < 0) | 1339 | if (ret < 0) |
1258 | return err; | 1340 | goto out; |
1259 | stop_scan_thread(); | 1341 | stop_scan_thread(); |
1260 | if (secs) { | 1342 | if (secs) { |
1261 | jiffies_scan_wait = msecs_to_jiffies(secs * 1000); | 1343 | jiffies_scan_wait = msecs_to_jiffies(secs * 1000); |
@@ -1264,7 +1346,12 @@ static ssize_t kmemleak_write(struct file *file, const char __user *user_buf, | |||
1264 | } else if (strncmp(buf, "scan", 4) == 0) | 1346 | } else if (strncmp(buf, "scan", 4) == 0) |
1265 | kmemleak_scan(); | 1347 | kmemleak_scan(); |
1266 | else | 1348 | else |
1267 | return -EINVAL; | 1349 | ret = -EINVAL; |
1350 | |||
1351 | out: | ||
1352 | mutex_unlock(&scan_mutex); | ||
1353 | if (ret < 0) | ||
1354 | return ret; | ||
1268 | 1355 | ||
1269 | /* ignore the rest of the buffer, only one command at a time */ | 1356 | /* ignore the rest of the buffer, only one command at a time */ |
1270 | *ppos += size; | 1357 | *ppos += size; |
@@ -1293,7 +1380,7 @@ static int kmemleak_cleanup_thread(void *arg) | |||
1293 | 1380 | ||
1294 | rcu_read_lock(); | 1381 | rcu_read_lock(); |
1295 | list_for_each_entry_rcu(object, &object_list, object_list) | 1382 | list_for_each_entry_rcu(object, &object_list, object_list) |
1296 | delete_object(object->pointer); | 1383 | delete_object_full(object->pointer); |
1297 | rcu_read_unlock(); | 1384 | rcu_read_unlock(); |
1298 | mutex_unlock(&scan_mutex); | 1385 | mutex_unlock(&scan_mutex); |
1299 | 1386 | ||
@@ -1388,6 +1475,9 @@ void __init kmemleak_init(void) | |||
1388 | case KMEMLEAK_FREE: | 1475 | case KMEMLEAK_FREE: |
1389 | kmemleak_free(log->ptr); | 1476 | kmemleak_free(log->ptr); |
1390 | break; | 1477 | break; |
1478 | case KMEMLEAK_FREE_PART: | ||
1479 | kmemleak_free_part(log->ptr, log->size); | ||
1480 | break; | ||
1391 | case KMEMLEAK_NOT_LEAK: | 1481 | case KMEMLEAK_NOT_LEAK: |
1392 | kmemleak_not_leak(log->ptr); | 1482 | kmemleak_not_leak(log->ptr); |
1393 | break; | 1483 | break; |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e2fa20dadf40..fd4529d86de5 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -1207,6 +1207,12 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, | |||
1207 | ret = 0; | 1207 | ret = 0; |
1208 | out: | 1208 | out: |
1209 | unlock_page_cgroup(pc); | 1209 | unlock_page_cgroup(pc); |
1210 | /* | ||
1211 | * We charges against "to" which may not have any tasks. Then, "to" | ||
1212 | * can be under rmdir(). But in current implementation, caller of | ||
1213 | * this function is just force_empty() and it's garanteed that | ||
1214 | * "to" is never removed. So, we don't check rmdir status here. | ||
1215 | */ | ||
1210 | return ret; | 1216 | return ret; |
1211 | } | 1217 | } |
1212 | 1218 | ||
@@ -1428,6 +1434,7 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr, | |||
1428 | return; | 1434 | return; |
1429 | if (!ptr) | 1435 | if (!ptr) |
1430 | return; | 1436 | return; |
1437 | cgroup_exclude_rmdir(&ptr->css); | ||
1431 | pc = lookup_page_cgroup(page); | 1438 | pc = lookup_page_cgroup(page); |
1432 | mem_cgroup_lru_del_before_commit_swapcache(page); | 1439 | mem_cgroup_lru_del_before_commit_swapcache(page); |
1433 | __mem_cgroup_commit_charge(ptr, pc, ctype); | 1440 | __mem_cgroup_commit_charge(ptr, pc, ctype); |
@@ -1457,8 +1464,12 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr, | |||
1457 | } | 1464 | } |
1458 | rcu_read_unlock(); | 1465 | rcu_read_unlock(); |
1459 | } | 1466 | } |
1460 | /* add this page(page_cgroup) to the LRU we want. */ | 1467 | /* |
1461 | 1468 | * At swapin, we may charge account against cgroup which has no tasks. | |
1469 | * So, rmdir()->pre_destroy() can be called while we do this charge. | ||
1470 | * In that case, we need to call pre_destroy() again. check it here. | ||
1471 | */ | ||
1472 | cgroup_release_and_wakeup_rmdir(&ptr->css); | ||
1462 | } | 1473 | } |
1463 | 1474 | ||
1464 | void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr) | 1475 | void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr) |
@@ -1664,7 +1675,7 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem, | |||
1664 | 1675 | ||
1665 | if (!mem) | 1676 | if (!mem) |
1666 | return; | 1677 | return; |
1667 | 1678 | cgroup_exclude_rmdir(&mem->css); | |
1668 | /* at migration success, oldpage->mapping is NULL. */ | 1679 | /* at migration success, oldpage->mapping is NULL. */ |
1669 | if (oldpage->mapping) { | 1680 | if (oldpage->mapping) { |
1670 | target = oldpage; | 1681 | target = oldpage; |
@@ -1704,6 +1715,12 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem, | |||
1704 | */ | 1715 | */ |
1705 | if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED) | 1716 | if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED) |
1706 | mem_cgroup_uncharge_page(target); | 1717 | mem_cgroup_uncharge_page(target); |
1718 | /* | ||
1719 | * At migration, we may charge account against cgroup which has no tasks | ||
1720 | * So, rmdir()->pre_destroy() can be called while we do this charge. | ||
1721 | * In that case, we need to call pre_destroy() again. check it here. | ||
1722 | */ | ||
1723 | cgroup_release_and_wakeup_rmdir(&mem->css); | ||
1707 | } | 1724 | } |
1708 | 1725 | ||
1709 | /* | 1726 | /* |
@@ -1973,7 +1990,7 @@ try_to_free: | |||
1973 | if (!progress) { | 1990 | if (!progress) { |
1974 | nr_retries--; | 1991 | nr_retries--; |
1975 | /* maybe some writeback is necessary */ | 1992 | /* maybe some writeback is necessary */ |
1976 | congestion_wait(WRITE, HZ/10); | 1993 | congestion_wait(BLK_RW_ASYNC, HZ/10); |
1977 | } | 1994 | } |
1978 | 1995 | ||
1979 | } | 1996 | } |
diff --git a/mm/memory.c b/mm/memory.c index 65216194eb8d..aede2ce3aba4 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -135,11 +135,12 @@ void pmd_clear_bad(pmd_t *pmd) | |||
135 | * Note: this doesn't free the actual pages themselves. That | 135 | * Note: this doesn't free the actual pages themselves. That |
136 | * has been handled earlier when unmapping all the memory regions. | 136 | * has been handled earlier when unmapping all the memory regions. |
137 | */ | 137 | */ |
138 | static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd) | 138 | static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, |
139 | unsigned long addr) | ||
139 | { | 140 | { |
140 | pgtable_t token = pmd_pgtable(*pmd); | 141 | pgtable_t token = pmd_pgtable(*pmd); |
141 | pmd_clear(pmd); | 142 | pmd_clear(pmd); |
142 | pte_free_tlb(tlb, token); | 143 | pte_free_tlb(tlb, token, addr); |
143 | tlb->mm->nr_ptes--; | 144 | tlb->mm->nr_ptes--; |
144 | } | 145 | } |
145 | 146 | ||
@@ -157,7 +158,7 @@ static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud, | |||
157 | next = pmd_addr_end(addr, end); | 158 | next = pmd_addr_end(addr, end); |
158 | if (pmd_none_or_clear_bad(pmd)) | 159 | if (pmd_none_or_clear_bad(pmd)) |
159 | continue; | 160 | continue; |
160 | free_pte_range(tlb, pmd); | 161 | free_pte_range(tlb, pmd, addr); |
161 | } while (pmd++, addr = next, addr != end); | 162 | } while (pmd++, addr = next, addr != end); |
162 | 163 | ||
163 | start &= PUD_MASK; | 164 | start &= PUD_MASK; |
@@ -173,7 +174,7 @@ static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud, | |||
173 | 174 | ||
174 | pmd = pmd_offset(pud, start); | 175 | pmd = pmd_offset(pud, start); |
175 | pud_clear(pud); | 176 | pud_clear(pud); |
176 | pmd_free_tlb(tlb, pmd); | 177 | pmd_free_tlb(tlb, pmd, start); |
177 | } | 178 | } |
178 | 179 | ||
179 | static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, | 180 | static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, |
@@ -206,7 +207,7 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, | |||
206 | 207 | ||
207 | pud = pud_offset(pgd, start); | 208 | pud = pud_offset(pgd, start); |
208 | pgd_clear(pgd); | 209 | pgd_clear(pgd); |
209 | pud_free_tlb(tlb, pud); | 210 | pud_free_tlb(tlb, pud, start); |
210 | } | 211 | } |
211 | 212 | ||
212 | /* | 213 | /* |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index e08e2c4da63a..7dd9d9f80694 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -191,25 +191,27 @@ static int mpol_new_bind(struct mempolicy *pol, const nodemask_t *nodes) | |||
191 | * Must be called holding task's alloc_lock to protect task's mems_allowed | 191 | * Must be called holding task's alloc_lock to protect task's mems_allowed |
192 | * and mempolicy. May also be called holding the mmap_semaphore for write. | 192 | * and mempolicy. May also be called holding the mmap_semaphore for write. |
193 | */ | 193 | */ |
194 | static int mpol_set_nodemask(struct mempolicy *pol, const nodemask_t *nodes) | 194 | static int mpol_set_nodemask(struct mempolicy *pol, |
195 | const nodemask_t *nodes, struct nodemask_scratch *nsc) | ||
195 | { | 196 | { |
196 | nodemask_t cpuset_context_nmask; | ||
197 | int ret; | 197 | int ret; |
198 | 198 | ||
199 | /* if mode is MPOL_DEFAULT, pol is NULL. This is right. */ | 199 | /* if mode is MPOL_DEFAULT, pol is NULL. This is right. */ |
200 | if (pol == NULL) | 200 | if (pol == NULL) |
201 | return 0; | 201 | return 0; |
202 | /* Check N_HIGH_MEMORY */ | ||
203 | nodes_and(nsc->mask1, | ||
204 | cpuset_current_mems_allowed, node_states[N_HIGH_MEMORY]); | ||
202 | 205 | ||
203 | VM_BUG_ON(!nodes); | 206 | VM_BUG_ON(!nodes); |
204 | if (pol->mode == MPOL_PREFERRED && nodes_empty(*nodes)) | 207 | if (pol->mode == MPOL_PREFERRED && nodes_empty(*nodes)) |
205 | nodes = NULL; /* explicit local allocation */ | 208 | nodes = NULL; /* explicit local allocation */ |
206 | else { | 209 | else { |
207 | if (pol->flags & MPOL_F_RELATIVE_NODES) | 210 | if (pol->flags & MPOL_F_RELATIVE_NODES) |
208 | mpol_relative_nodemask(&cpuset_context_nmask, nodes, | 211 | mpol_relative_nodemask(&nsc->mask2, nodes,&nsc->mask1); |
209 | &cpuset_current_mems_allowed); | ||
210 | else | 212 | else |
211 | nodes_and(cpuset_context_nmask, *nodes, | 213 | nodes_and(nsc->mask2, *nodes, nsc->mask1); |
212 | cpuset_current_mems_allowed); | 214 | |
213 | if (mpol_store_user_nodemask(pol)) | 215 | if (mpol_store_user_nodemask(pol)) |
214 | pol->w.user_nodemask = *nodes; | 216 | pol->w.user_nodemask = *nodes; |
215 | else | 217 | else |
@@ -217,8 +219,10 @@ static int mpol_set_nodemask(struct mempolicy *pol, const nodemask_t *nodes) | |||
217 | cpuset_current_mems_allowed; | 219 | cpuset_current_mems_allowed; |
218 | } | 220 | } |
219 | 221 | ||
220 | ret = mpol_ops[pol->mode].create(pol, | 222 | if (nodes) |
221 | nodes ? &cpuset_context_nmask : NULL); | 223 | ret = mpol_ops[pol->mode].create(pol, &nsc->mask2); |
224 | else | ||
225 | ret = mpol_ops[pol->mode].create(pol, NULL); | ||
222 | return ret; | 226 | return ret; |
223 | } | 227 | } |
224 | 228 | ||
@@ -620,12 +624,17 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags, | |||
620 | { | 624 | { |
621 | struct mempolicy *new, *old; | 625 | struct mempolicy *new, *old; |
622 | struct mm_struct *mm = current->mm; | 626 | struct mm_struct *mm = current->mm; |
627 | NODEMASK_SCRATCH(scratch); | ||
623 | int ret; | 628 | int ret; |
624 | 629 | ||
625 | new = mpol_new(mode, flags, nodes); | 630 | if (!scratch) |
626 | if (IS_ERR(new)) | 631 | return -ENOMEM; |
627 | return PTR_ERR(new); | ||
628 | 632 | ||
633 | new = mpol_new(mode, flags, nodes); | ||
634 | if (IS_ERR(new)) { | ||
635 | ret = PTR_ERR(new); | ||
636 | goto out; | ||
637 | } | ||
629 | /* | 638 | /* |
630 | * prevent changing our mempolicy while show_numa_maps() | 639 | * prevent changing our mempolicy while show_numa_maps() |
631 | * is using it. | 640 | * is using it. |
@@ -635,13 +644,13 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags, | |||
635 | if (mm) | 644 | if (mm) |
636 | down_write(&mm->mmap_sem); | 645 | down_write(&mm->mmap_sem); |
637 | task_lock(current); | 646 | task_lock(current); |
638 | ret = mpol_set_nodemask(new, nodes); | 647 | ret = mpol_set_nodemask(new, nodes, scratch); |
639 | if (ret) { | 648 | if (ret) { |
640 | task_unlock(current); | 649 | task_unlock(current); |
641 | if (mm) | 650 | if (mm) |
642 | up_write(&mm->mmap_sem); | 651 | up_write(&mm->mmap_sem); |
643 | mpol_put(new); | 652 | mpol_put(new); |
644 | return ret; | 653 | goto out; |
645 | } | 654 | } |
646 | old = current->mempolicy; | 655 | old = current->mempolicy; |
647 | current->mempolicy = new; | 656 | current->mempolicy = new; |
@@ -654,7 +663,10 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags, | |||
654 | up_write(&mm->mmap_sem); | 663 | up_write(&mm->mmap_sem); |
655 | 664 | ||
656 | mpol_put(old); | 665 | mpol_put(old); |
657 | return 0; | 666 | ret = 0; |
667 | out: | ||
668 | NODEMASK_SCRATCH_FREE(scratch); | ||
669 | return ret; | ||
658 | } | 670 | } |
659 | 671 | ||
660 | /* | 672 | /* |
@@ -1014,12 +1026,20 @@ static long do_mbind(unsigned long start, unsigned long len, | |||
1014 | if (err) | 1026 | if (err) |
1015 | return err; | 1027 | return err; |
1016 | } | 1028 | } |
1017 | down_write(&mm->mmap_sem); | 1029 | { |
1018 | task_lock(current); | 1030 | NODEMASK_SCRATCH(scratch); |
1019 | err = mpol_set_nodemask(new, nmask); | 1031 | if (scratch) { |
1020 | task_unlock(current); | 1032 | down_write(&mm->mmap_sem); |
1033 | task_lock(current); | ||
1034 | err = mpol_set_nodemask(new, nmask, scratch); | ||
1035 | task_unlock(current); | ||
1036 | if (err) | ||
1037 | up_write(&mm->mmap_sem); | ||
1038 | } else | ||
1039 | err = -ENOMEM; | ||
1040 | NODEMASK_SCRATCH_FREE(scratch); | ||
1041 | } | ||
1021 | if (err) { | 1042 | if (err) { |
1022 | up_write(&mm->mmap_sem); | ||
1023 | mpol_put(new); | 1043 | mpol_put(new); |
1024 | return err; | 1044 | return err; |
1025 | } | 1045 | } |
@@ -1891,6 +1911,7 @@ restart: | |||
1891 | * Install non-NULL @mpol in inode's shared policy rb-tree. | 1911 | * Install non-NULL @mpol in inode's shared policy rb-tree. |
1892 | * On entry, the current task has a reference on a non-NULL @mpol. | 1912 | * On entry, the current task has a reference on a non-NULL @mpol. |
1893 | * This must be released on exit. | 1913 | * This must be released on exit. |
1914 | * This is called at get_inode() calls and we can use GFP_KERNEL. | ||
1894 | */ | 1915 | */ |
1895 | void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) | 1916 | void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) |
1896 | { | 1917 | { |
@@ -1902,19 +1923,24 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) | |||
1902 | if (mpol) { | 1923 | if (mpol) { |
1903 | struct vm_area_struct pvma; | 1924 | struct vm_area_struct pvma; |
1904 | struct mempolicy *new; | 1925 | struct mempolicy *new; |
1926 | NODEMASK_SCRATCH(scratch); | ||
1905 | 1927 | ||
1928 | if (!scratch) | ||
1929 | return; | ||
1906 | /* contextualize the tmpfs mount point mempolicy */ | 1930 | /* contextualize the tmpfs mount point mempolicy */ |
1907 | new = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask); | 1931 | new = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask); |
1908 | if (IS_ERR(new)) { | 1932 | if (IS_ERR(new)) { |
1909 | mpol_put(mpol); /* drop our ref on sb mpol */ | 1933 | mpol_put(mpol); /* drop our ref on sb mpol */ |
1934 | NODEMASK_SCRATCH_FREE(scratch); | ||
1910 | return; /* no valid nodemask intersection */ | 1935 | return; /* no valid nodemask intersection */ |
1911 | } | 1936 | } |
1912 | 1937 | ||
1913 | task_lock(current); | 1938 | task_lock(current); |
1914 | ret = mpol_set_nodemask(new, &mpol->w.user_nodemask); | 1939 | ret = mpol_set_nodemask(new, &mpol->w.user_nodemask, scratch); |
1915 | task_unlock(current); | 1940 | task_unlock(current); |
1916 | mpol_put(mpol); /* drop our ref on sb mpol */ | 1941 | mpol_put(mpol); /* drop our ref on sb mpol */ |
1917 | if (ret) { | 1942 | if (ret) { |
1943 | NODEMASK_SCRATCH_FREE(scratch); | ||
1918 | mpol_put(new); | 1944 | mpol_put(new); |
1919 | return; | 1945 | return; |
1920 | } | 1946 | } |
@@ -1924,6 +1950,7 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) | |||
1924 | pvma.vm_end = TASK_SIZE; /* policy covers entire file */ | 1950 | pvma.vm_end = TASK_SIZE; /* policy covers entire file */ |
1925 | mpol_set_shared_policy(sp, &pvma, new); /* adds ref */ | 1951 | mpol_set_shared_policy(sp, &pvma, new); /* adds ref */ |
1926 | mpol_put(new); /* drop initial ref */ | 1952 | mpol_put(new); /* drop initial ref */ |
1953 | NODEMASK_SCRATCH_FREE(scratch); | ||
1927 | } | 1954 | } |
1928 | } | 1955 | } |
1929 | 1956 | ||
@@ -2140,13 +2167,18 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) | |||
2140 | err = 1; | 2167 | err = 1; |
2141 | else { | 2168 | else { |
2142 | int ret; | 2169 | int ret; |
2143 | 2170 | NODEMASK_SCRATCH(scratch); | |
2144 | task_lock(current); | 2171 | if (scratch) { |
2145 | ret = mpol_set_nodemask(new, &nodes); | 2172 | task_lock(current); |
2146 | task_unlock(current); | 2173 | ret = mpol_set_nodemask(new, &nodes, scratch); |
2147 | if (ret) | 2174 | task_unlock(current); |
2175 | } else | ||
2176 | ret = -ENOMEM; | ||
2177 | NODEMASK_SCRATCH_FREE(scratch); | ||
2178 | if (ret) { | ||
2148 | err = 1; | 2179 | err = 1; |
2149 | else if (no_context) { | 2180 | mpol_put(new); |
2181 | } else if (no_context) { | ||
2150 | /* save for contextualization */ | 2182 | /* save for contextualization */ |
2151 | new->w.user_nodemask = nodes; | 2183 | new->w.user_nodemask = nodes; |
2152 | } | 2184 | } |
diff --git a/mm/mempool.c b/mm/mempool.c index a46eb1b4bb66..32e75d400503 100644 --- a/mm/mempool.c +++ b/mm/mempool.c | |||
@@ -303,14 +303,14 @@ EXPORT_SYMBOL(mempool_free_slab); | |||
303 | */ | 303 | */ |
304 | void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data) | 304 | void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data) |
305 | { | 305 | { |
306 | size_t size = (size_t)(long)pool_data; | 306 | size_t size = (size_t)pool_data; |
307 | return kmalloc(size, gfp_mask); | 307 | return kmalloc(size, gfp_mask); |
308 | } | 308 | } |
309 | EXPORT_SYMBOL(mempool_kmalloc); | 309 | EXPORT_SYMBOL(mempool_kmalloc); |
310 | 310 | ||
311 | void *mempool_kzalloc(gfp_t gfp_mask, void *pool_data) | 311 | void *mempool_kzalloc(gfp_t gfp_mask, void *pool_data) |
312 | { | 312 | { |
313 | size_t size = (size_t) pool_data; | 313 | size_t size = (size_t)pool_data; |
314 | return kzalloc(size, gfp_mask); | 314 | return kzalloc(size, gfp_mask); |
315 | } | 315 | } |
316 | EXPORT_SYMBOL(mempool_kzalloc); | 316 | EXPORT_SYMBOL(mempool_kzalloc); |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 3c7f5e1afe5f..997186c0b519 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -575,7 +575,7 @@ static void balance_dirty_pages(struct address_space *mapping) | |||
575 | if (pages_written >= write_chunk) | 575 | if (pages_written >= write_chunk) |
576 | break; /* We've done our duty */ | 576 | break; /* We've done our duty */ |
577 | 577 | ||
578 | congestion_wait(WRITE, HZ/10); | 578 | congestion_wait(BLK_RW_ASYNC, HZ/10); |
579 | } | 579 | } |
580 | 580 | ||
581 | if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh && | 581 | if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh && |
@@ -670,7 +670,7 @@ void throttle_vm_writeout(gfp_t gfp_mask) | |||
670 | if (global_page_state(NR_UNSTABLE_NFS) + | 670 | if (global_page_state(NR_UNSTABLE_NFS) + |
671 | global_page_state(NR_WRITEBACK) <= dirty_thresh) | 671 | global_page_state(NR_WRITEBACK) <= dirty_thresh) |
672 | break; | 672 | break; |
673 | congestion_wait(WRITE, HZ/10); | 673 | congestion_wait(BLK_RW_ASYNC, HZ/10); |
674 | 674 | ||
675 | /* | 675 | /* |
676 | * The caller might hold locks which can prevent IO completion | 676 | * The caller might hold locks which can prevent IO completion |
@@ -716,7 +716,7 @@ static void background_writeout(unsigned long _min_pages) | |||
716 | if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) { | 716 | if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) { |
717 | /* Wrote less than expected */ | 717 | /* Wrote less than expected */ |
718 | if (wbc.encountered_congestion || wbc.more_io) | 718 | if (wbc.encountered_congestion || wbc.more_io) |
719 | congestion_wait(WRITE, HZ/10); | 719 | congestion_wait(BLK_RW_ASYNC, HZ/10); |
720 | else | 720 | else |
721 | break; | 721 | break; |
722 | } | 722 | } |
@@ -788,7 +788,7 @@ static void wb_kupdate(unsigned long arg) | |||
788 | writeback_inodes(&wbc); | 788 | writeback_inodes(&wbc); |
789 | if (wbc.nr_to_write > 0) { | 789 | if (wbc.nr_to_write > 0) { |
790 | if (wbc.encountered_congestion || wbc.more_io) | 790 | if (wbc.encountered_congestion || wbc.more_io) |
791 | congestion_wait(WRITE, HZ/10); | 791 | congestion_wait(BLK_RW_ASYNC, HZ/10); |
792 | else | 792 | else |
793 | break; /* All the old data is written */ | 793 | break; /* All the old data is written */ |
794 | } | 794 | } |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e0f2cdf9d8b1..d052abbe3063 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -882,7 +882,7 @@ retry_reserve: | |||
882 | */ | 882 | */ |
883 | static int rmqueue_bulk(struct zone *zone, unsigned int order, | 883 | static int rmqueue_bulk(struct zone *zone, unsigned int order, |
884 | unsigned long count, struct list_head *list, | 884 | unsigned long count, struct list_head *list, |
885 | int migratetype) | 885 | int migratetype, int cold) |
886 | { | 886 | { |
887 | int i; | 887 | int i; |
888 | 888 | ||
@@ -901,7 +901,10 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, | |||
901 | * merge IO requests if the physical pages are ordered | 901 | * merge IO requests if the physical pages are ordered |
902 | * properly. | 902 | * properly. |
903 | */ | 903 | */ |
904 | list_add(&page->lru, list); | 904 | if (likely(cold == 0)) |
905 | list_add(&page->lru, list); | ||
906 | else | ||
907 | list_add_tail(&page->lru, list); | ||
905 | set_page_private(page, migratetype); | 908 | set_page_private(page, migratetype); |
906 | list = &page->lru; | 909 | list = &page->lru; |
907 | } | 910 | } |
@@ -1119,7 +1122,8 @@ again: | |||
1119 | local_irq_save(flags); | 1122 | local_irq_save(flags); |
1120 | if (!pcp->count) { | 1123 | if (!pcp->count) { |
1121 | pcp->count = rmqueue_bulk(zone, 0, | 1124 | pcp->count = rmqueue_bulk(zone, 0, |
1122 | pcp->batch, &pcp->list, migratetype); | 1125 | pcp->batch, &pcp->list, |
1126 | migratetype, cold); | ||
1123 | if (unlikely(!pcp->count)) | 1127 | if (unlikely(!pcp->count)) |
1124 | goto failed; | 1128 | goto failed; |
1125 | } | 1129 | } |
@@ -1138,7 +1142,8 @@ again: | |||
1138 | /* Allocate more to the pcp list if necessary */ | 1142 | /* Allocate more to the pcp list if necessary */ |
1139 | if (unlikely(&page->lru == &pcp->list)) { | 1143 | if (unlikely(&page->lru == &pcp->list)) { |
1140 | pcp->count += rmqueue_bulk(zone, 0, | 1144 | pcp->count += rmqueue_bulk(zone, 0, |
1141 | pcp->batch, &pcp->list, migratetype); | 1145 | pcp->batch, &pcp->list, |
1146 | migratetype, cold); | ||
1142 | page = list_entry(pcp->list.next, struct page, lru); | 1147 | page = list_entry(pcp->list.next, struct page, lru); |
1143 | } | 1148 | } |
1144 | 1149 | ||
@@ -1666,7 +1671,7 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order, | |||
1666 | preferred_zone, migratetype); | 1671 | preferred_zone, migratetype); |
1667 | 1672 | ||
1668 | if (!page && gfp_mask & __GFP_NOFAIL) | 1673 | if (!page && gfp_mask & __GFP_NOFAIL) |
1669 | congestion_wait(WRITE, HZ/50); | 1674 | congestion_wait(BLK_RW_ASYNC, HZ/50); |
1670 | } while (!page && (gfp_mask & __GFP_NOFAIL)); | 1675 | } while (!page && (gfp_mask & __GFP_NOFAIL)); |
1671 | 1676 | ||
1672 | return page; | 1677 | return page; |
@@ -1740,8 +1745,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
1740 | * be using allocators in order of preference for an area that is | 1745 | * be using allocators in order of preference for an area that is |
1741 | * too large. | 1746 | * too large. |
1742 | */ | 1747 | */ |
1743 | if (WARN_ON_ONCE(order >= MAX_ORDER)) | 1748 | if (order >= MAX_ORDER) { |
1749 | WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN)); | ||
1744 | return NULL; | 1750 | return NULL; |
1751 | } | ||
1745 | 1752 | ||
1746 | /* | 1753 | /* |
1747 | * GFP_THISNODE (meaning __GFP_THISNODE, __GFP_NORETRY and | 1754 | * GFP_THISNODE (meaning __GFP_THISNODE, __GFP_NORETRY and |
@@ -1789,6 +1796,10 @@ rebalance: | |||
1789 | if (p->flags & PF_MEMALLOC) | 1796 | if (p->flags & PF_MEMALLOC) |
1790 | goto nopage; | 1797 | goto nopage; |
1791 | 1798 | ||
1799 | /* Avoid allocations with no watermarks from looping endlessly */ | ||
1800 | if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL)) | ||
1801 | goto nopage; | ||
1802 | |||
1792 | /* Try direct reclaim and then allocating */ | 1803 | /* Try direct reclaim and then allocating */ |
1793 | page = __alloc_pages_direct_reclaim(gfp_mask, order, | 1804 | page = __alloc_pages_direct_reclaim(gfp_mask, order, |
1794 | zonelist, high_zoneidx, | 1805 | zonelist, high_zoneidx, |
@@ -1831,7 +1842,7 @@ rebalance: | |||
1831 | pages_reclaimed += did_some_progress; | 1842 | pages_reclaimed += did_some_progress; |
1832 | if (should_alloc_retry(gfp_mask, order, pages_reclaimed)) { | 1843 | if (should_alloc_retry(gfp_mask, order, pages_reclaimed)) { |
1833 | /* Wait for some write requests to complete then retry */ | 1844 | /* Wait for some write requests to complete then retry */ |
1834 | congestion_wait(WRITE, HZ/50); | 1845 | congestion_wait(BLK_RW_ASYNC, HZ/50); |
1835 | goto rebalance; | 1846 | goto rebalance; |
1836 | } | 1847 | } |
1837 | 1848 | ||
@@ -1983,7 +1994,7 @@ void *alloc_pages_exact(size_t size, gfp_t gfp_mask) | |||
1983 | unsigned long alloc_end = addr + (PAGE_SIZE << order); | 1994 | unsigned long alloc_end = addr + (PAGE_SIZE << order); |
1984 | unsigned long used = addr + PAGE_ALIGN(size); | 1995 | unsigned long used = addr + PAGE_ALIGN(size); |
1985 | 1996 | ||
1986 | split_page(virt_to_page(addr), order); | 1997 | split_page(virt_to_page((void *)addr), order); |
1987 | while (used < alloc_end) { | 1998 | while (used < alloc_end) { |
1988 | free_page(used); | 1999 | free_page(used); |
1989 | used += PAGE_SIZE; | 2000 | used += PAGE_SIZE; |
@@ -4745,8 +4756,10 @@ void *__init alloc_large_system_hash(const char *tablename, | |||
4745 | * some pages at the end of hash table which | 4756 | * some pages at the end of hash table which |
4746 | * alloc_pages_exact() automatically does | 4757 | * alloc_pages_exact() automatically does |
4747 | */ | 4758 | */ |
4748 | if (get_order(size) < MAX_ORDER) | 4759 | if (get_order(size) < MAX_ORDER) { |
4749 | table = alloc_pages_exact(size, GFP_ATOMIC); | 4760 | table = alloc_pages_exact(size, GFP_ATOMIC); |
4761 | kmemleak_alloc(table, size, 1, GFP_ATOMIC); | ||
4762 | } | ||
4750 | } | 4763 | } |
4751 | } while (!table && size > PAGE_SIZE && --log2qty); | 4764 | } while (!table && size > PAGE_SIZE && --log2qty); |
4752 | 4765 | ||
@@ -4764,16 +4777,6 @@ void *__init alloc_large_system_hash(const char *tablename, | |||
4764 | if (_hash_mask) | 4777 | if (_hash_mask) |
4765 | *_hash_mask = (1 << log2qty) - 1; | 4778 | *_hash_mask = (1 << log2qty) - 1; |
4766 | 4779 | ||
4767 | /* | ||
4768 | * If hashdist is set, the table allocation is done with __vmalloc() | ||
4769 | * which invokes the kmemleak_alloc() callback. This function may also | ||
4770 | * be called before the slab and kmemleak are initialised when | ||
4771 | * kmemleak simply buffers the request to be executed later | ||
4772 | * (GFP_ATOMIC flag ignored in this case). | ||
4773 | */ | ||
4774 | if (!hashdist) | ||
4775 | kmemleak_alloc(table, size, 1, GFP_ATOMIC); | ||
4776 | |||
4777 | return table; | 4780 | return table; |
4778 | } | 4781 | } |
4779 | 4782 | ||
diff --git a/mm/percpu.c b/mm/percpu.c index b3d0bcff8c7c..3f9f182f9b44 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -1004,7 +1004,7 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void) | |||
1004 | chunk->map_alloc = PCPU_DFL_MAP_ALLOC; | 1004 | chunk->map_alloc = PCPU_DFL_MAP_ALLOC; |
1005 | chunk->map[chunk->map_used++] = pcpu_unit_size; | 1005 | chunk->map[chunk->map_used++] = pcpu_unit_size; |
1006 | 1006 | ||
1007 | chunk->vm = get_vm_area(pcpu_chunk_size, GFP_KERNEL); | 1007 | chunk->vm = get_vm_area(pcpu_chunk_size, VM_ALLOC); |
1008 | if (!chunk->vm) { | 1008 | if (!chunk->vm) { |
1009 | free_pcpu_chunk(chunk); | 1009 | free_pcpu_chunk(chunk); |
1010 | return NULL; | 1010 | return NULL; |
@@ -1325,7 +1325,7 @@ size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size, | |||
1325 | int *identity_map; | 1325 | int *identity_map; |
1326 | 1326 | ||
1327 | /* #units == #cpus, identity mapped */ | 1327 | /* #units == #cpus, identity mapped */ |
1328 | identity_map = alloc_bootmem(num_possible_cpus() * | 1328 | identity_map = alloc_bootmem(nr_cpu_ids * |
1329 | sizeof(identity_map[0])); | 1329 | sizeof(identity_map[0])); |
1330 | 1330 | ||
1331 | for_each_possible_cpu(cpu) | 1331 | for_each_possible_cpu(cpu) |
@@ -1333,7 +1333,7 @@ size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size, | |||
1333 | 1333 | ||
1334 | pcpu_first_unit_cpu = 0; | 1334 | pcpu_first_unit_cpu = 0; |
1335 | pcpu_last_unit_cpu = pcpu_nr_units - 1; | 1335 | pcpu_last_unit_cpu = pcpu_nr_units - 1; |
1336 | pcpu_nr_units = num_possible_cpus(); | 1336 | pcpu_nr_units = nr_cpu_ids; |
1337 | pcpu_unit_map = identity_map; | 1337 | pcpu_unit_map = identity_map; |
1338 | } | 1338 | } |
1339 | 1339 | ||
@@ -1464,7 +1464,7 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, | |||
1464 | size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size); | 1464 | size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size); |
1465 | 1465 | ||
1466 | unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); | 1466 | unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); |
1467 | chunk_size = unit_size * num_possible_cpus(); | 1467 | chunk_size = unit_size * nr_cpu_ids; |
1468 | 1468 | ||
1469 | base = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE, | 1469 | base = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE, |
1470 | __pa(MAX_DMA_ADDRESS)); | 1470 | __pa(MAX_DMA_ADDRESS)); |
@@ -1475,11 +1475,15 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, | |||
1475 | } | 1475 | } |
1476 | 1476 | ||
1477 | /* return the leftover and copy */ | 1477 | /* return the leftover and copy */ |
1478 | for_each_possible_cpu(cpu) { | 1478 | for (cpu = 0; cpu < nr_cpu_ids; cpu++) { |
1479 | void *ptr = base + cpu * unit_size; | 1479 | void *ptr = base + cpu * unit_size; |
1480 | 1480 | ||
1481 | free_bootmem(__pa(ptr + size_sum), unit_size - size_sum); | 1481 | if (cpu_possible(cpu)) { |
1482 | memcpy(ptr, __per_cpu_load, static_size); | 1482 | free_bootmem(__pa(ptr + size_sum), |
1483 | unit_size - size_sum); | ||
1484 | memcpy(ptr, __per_cpu_load, static_size); | ||
1485 | } else | ||
1486 | free_bootmem(__pa(ptr), unit_size); | ||
1483 | } | 1487 | } |
1484 | 1488 | ||
1485 | /* we're ready, commit */ | 1489 | /* we're ready, commit */ |
@@ -1525,8 +1529,7 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size, | |||
1525 | PCPU_MIN_UNIT_SIZE)); | 1529 | PCPU_MIN_UNIT_SIZE)); |
1526 | 1530 | ||
1527 | /* unaligned allocations can't be freed, round up to page size */ | 1531 | /* unaligned allocations can't be freed, round up to page size */ |
1528 | pages_size = PFN_ALIGN(unit_pages * num_possible_cpus() * | 1532 | pages_size = PFN_ALIGN(unit_pages * nr_cpu_ids * sizeof(pages[0])); |
1529 | sizeof(pages[0])); | ||
1530 | pages = alloc_bootmem(pages_size); | 1533 | pages = alloc_bootmem(pages_size); |
1531 | 1534 | ||
1532 | /* allocate pages */ | 1535 | /* allocate pages */ |
@@ -1546,7 +1549,7 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size, | |||
1546 | 1549 | ||
1547 | /* allocate vm area, map the pages and copy static data */ | 1550 | /* allocate vm area, map the pages and copy static data */ |
1548 | vm.flags = VM_ALLOC; | 1551 | vm.flags = VM_ALLOC; |
1549 | vm.size = num_possible_cpus() * unit_pages << PAGE_SHIFT; | 1552 | vm.size = nr_cpu_ids * unit_pages << PAGE_SHIFT; |
1550 | vm_area_register_early(&vm, PAGE_SIZE); | 1553 | vm_area_register_early(&vm, PAGE_SIZE); |
1551 | 1554 | ||
1552 | for_each_possible_cpu(cpu) { | 1555 | for_each_possible_cpu(cpu) { |
@@ -1544,9 +1544,6 @@ void __init kmem_cache_init(void) | |||
1544 | } | 1544 | } |
1545 | 1545 | ||
1546 | g_cpucache_up = EARLY; | 1546 | g_cpucache_up = EARLY; |
1547 | |||
1548 | /* Annotate slab for lockdep -- annotate the malloc caches */ | ||
1549 | init_lock_keys(); | ||
1550 | } | 1547 | } |
1551 | 1548 | ||
1552 | void __init kmem_cache_init_late(void) | 1549 | void __init kmem_cache_init_late(void) |
@@ -1563,6 +1560,9 @@ void __init kmem_cache_init_late(void) | |||
1563 | /* Done! */ | 1560 | /* Done! */ |
1564 | g_cpucache_up = FULL; | 1561 | g_cpucache_up = FULL; |
1565 | 1562 | ||
1563 | /* Annotate slab for lockdep -- annotate the malloc caches */ | ||
1564 | init_lock_keys(); | ||
1565 | |||
1566 | /* | 1566 | /* |
1567 | * Register a cpu startup notifier callback that initializes | 1567 | * Register a cpu startup notifier callback that initializes |
1568 | * cpu_cache_get for all new cpus | 1568 | * cpu_cache_get for all new cpus |
@@ -2547,7 +2547,7 @@ void kmem_cache_destroy(struct kmem_cache *cachep) | |||
2547 | } | 2547 | } |
2548 | 2548 | ||
2549 | if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) | 2549 | if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) |
2550 | synchronize_rcu(); | 2550 | rcu_barrier(); |
2551 | 2551 | ||
2552 | __kmem_cache_destroy(cachep); | 2552 | __kmem_cache_destroy(cachep); |
2553 | mutex_unlock(&cache_chain_mutex); | 2553 | mutex_unlock(&cache_chain_mutex); |
@@ -595,6 +595,8 @@ EXPORT_SYMBOL(kmem_cache_create); | |||
595 | void kmem_cache_destroy(struct kmem_cache *c) | 595 | void kmem_cache_destroy(struct kmem_cache *c) |
596 | { | 596 | { |
597 | kmemleak_free(c); | 597 | kmemleak_free(c); |
598 | if (c->flags & SLAB_DESTROY_BY_RCU) | ||
599 | rcu_barrier(); | ||
598 | slob_free(c, sizeof(struct kmem_cache)); | 600 | slob_free(c, sizeof(struct kmem_cache)); |
599 | } | 601 | } |
600 | EXPORT_SYMBOL(kmem_cache_destroy); | 602 | EXPORT_SYMBOL(kmem_cache_destroy); |
@@ -21,7 +21,6 @@ | |||
21 | #include <linux/kmemcheck.h> | 21 | #include <linux/kmemcheck.h> |
22 | #include <linux/cpu.h> | 22 | #include <linux/cpu.h> |
23 | #include <linux/cpuset.h> | 23 | #include <linux/cpuset.h> |
24 | #include <linux/kmemleak.h> | ||
25 | #include <linux/mempolicy.h> | 24 | #include <linux/mempolicy.h> |
26 | #include <linux/ctype.h> | 25 | #include <linux/ctype.h> |
27 | #include <linux/debugobjects.h> | 26 | #include <linux/debugobjects.h> |
@@ -2595,6 +2594,8 @@ static inline int kmem_cache_close(struct kmem_cache *s) | |||
2595 | */ | 2594 | */ |
2596 | void kmem_cache_destroy(struct kmem_cache *s) | 2595 | void kmem_cache_destroy(struct kmem_cache *s) |
2597 | { | 2596 | { |
2597 | if (s->flags & SLAB_DESTROY_BY_RCU) | ||
2598 | rcu_barrier(); | ||
2598 | down_write(&slub_lock); | 2599 | down_write(&slub_lock); |
2599 | s->refcount--; | 2600 | s->refcount--; |
2600 | if (!s->refcount) { | 2601 | if (!s->refcount) { |
@@ -2833,13 +2834,15 @@ EXPORT_SYMBOL(__kmalloc); | |||
2833 | static void *kmalloc_large_node(size_t size, gfp_t flags, int node) | 2834 | static void *kmalloc_large_node(size_t size, gfp_t flags, int node) |
2834 | { | 2835 | { |
2835 | struct page *page; | 2836 | struct page *page; |
2837 | void *ptr = NULL; | ||
2836 | 2838 | ||
2837 | flags |= __GFP_COMP | __GFP_NOTRACK; | 2839 | flags |= __GFP_COMP | __GFP_NOTRACK; |
2838 | page = alloc_pages_node(node, flags, get_order(size)); | 2840 | page = alloc_pages_node(node, flags, get_order(size)); |
2839 | if (page) | 2841 | if (page) |
2840 | return page_address(page); | 2842 | ptr = page_address(page); |
2841 | else | 2843 | |
2842 | return NULL; | 2844 | kmemleak_alloc(ptr, size, 1, flags); |
2845 | return ptr; | ||
2843 | } | 2846 | } |
2844 | 2847 | ||
2845 | #ifdef CONFIG_NUMA | 2848 | #ifdef CONFIG_NUMA |
@@ -2924,6 +2927,7 @@ void kfree(const void *x) | |||
2924 | page = virt_to_head_page(x); | 2927 | page = virt_to_head_page(x); |
2925 | if (unlikely(!PageSlab(page))) { | 2928 | if (unlikely(!PageSlab(page))) { |
2926 | BUG_ON(!PageCompound(page)); | 2929 | BUG_ON(!PageCompound(page)); |
2930 | kmemleak_free(x); | ||
2927 | put_page(page); | 2931 | put_page(page); |
2928 | return; | 2932 | return; |
2929 | } | 2933 | } |
diff --git a/mm/swapfile.c b/mm/swapfile.c index d1ade1a48ee7..8ffdc0d23c53 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -753,7 +753,7 @@ int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p) | |||
753 | 753 | ||
754 | if (!bdev) { | 754 | if (!bdev) { |
755 | if (bdev_p) | 755 | if (bdev_p) |
756 | *bdev_p = bdget(sis->bdev->bd_dev); | 756 | *bdev_p = bdgrab(sis->bdev); |
757 | 757 | ||
758 | spin_unlock(&swap_lock); | 758 | spin_unlock(&swap_lock); |
759 | return i; | 759 | return i; |
@@ -765,7 +765,7 @@ int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p) | |||
765 | struct swap_extent, list); | 765 | struct swap_extent, list); |
766 | if (se->start_block == offset) { | 766 | if (se->start_block == offset) { |
767 | if (bdev_p) | 767 | if (bdev_p) |
768 | *bdev_p = bdget(sis->bdev->bd_dev); | 768 | *bdev_p = bdgrab(sis->bdev); |
769 | 769 | ||
770 | spin_unlock(&swap_lock); | 770 | spin_unlock(&swap_lock); |
771 | bdput(bdev); | 771 | bdput(bdev); |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 54155268dfca..dea7abd31098 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -1104,7 +1104,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, | |||
1104 | */ | 1104 | */ |
1105 | if (nr_freed < nr_taken && !current_is_kswapd() && | 1105 | if (nr_freed < nr_taken && !current_is_kswapd() && |
1106 | lumpy_reclaim) { | 1106 | lumpy_reclaim) { |
1107 | congestion_wait(WRITE, HZ/10); | 1107 | congestion_wait(BLK_RW_ASYNC, HZ/10); |
1108 | 1108 | ||
1109 | /* | 1109 | /* |
1110 | * The attempt at page out may have made some | 1110 | * The attempt at page out may have made some |
@@ -1721,7 +1721,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
1721 | 1721 | ||
1722 | /* Take a nap, wait for some writeback to complete */ | 1722 | /* Take a nap, wait for some writeback to complete */ |
1723 | if (sc->nr_scanned && priority < DEF_PRIORITY - 2) | 1723 | if (sc->nr_scanned && priority < DEF_PRIORITY - 2) |
1724 | congestion_wait(WRITE, HZ/10); | 1724 | congestion_wait(BLK_RW_ASYNC, HZ/10); |
1725 | } | 1725 | } |
1726 | /* top priority shrink_zones still had more to do? don't OOM, then */ | 1726 | /* top priority shrink_zones still had more to do? don't OOM, then */ |
1727 | if (!sc->all_unreclaimable && scanning_global_lru(sc)) | 1727 | if (!sc->all_unreclaimable && scanning_global_lru(sc)) |
@@ -1960,7 +1960,7 @@ loop_again: | |||
1960 | * another pass across the zones. | 1960 | * another pass across the zones. |
1961 | */ | 1961 | */ |
1962 | if (total_scanned && priority < DEF_PRIORITY - 2) | 1962 | if (total_scanned && priority < DEF_PRIORITY - 2) |
1963 | congestion_wait(WRITE, HZ/10); | 1963 | congestion_wait(BLK_RW_ASYNC, HZ/10); |
1964 | 1964 | ||
1965 | /* | 1965 | /* |
1966 | * We do this so kswapd doesn't build up large priorities for | 1966 | * We do this so kswapd doesn't build up large priorities for |
@@ -2233,7 +2233,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages) | |||
2233 | goto out; | 2233 | goto out; |
2234 | 2234 | ||
2235 | if (sc.nr_scanned && prio < DEF_PRIORITY - 2) | 2235 | if (sc.nr_scanned && prio < DEF_PRIORITY - 2) |
2236 | congestion_wait(WRITE, HZ / 10); | 2236 | congestion_wait(BLK_RW_ASYNC, HZ / 10); |
2237 | } | 2237 | } |
2238 | } | 2238 | } |
2239 | 2239 | ||