diff options
author | J. Bruce Fields <bfields@citi.umich.edu> | 2009-08-21 11:27:29 -0400 |
---|---|---|
committer | J. Bruce Fields <bfields@citi.umich.edu> | 2009-08-21 11:27:29 -0400 |
commit | e9dc122166b8d863d3057a66ada04838e5548e52 (patch) | |
tree | 749e15bf719b64bf9113db7acd8e043d9742cb26 /mm | |
parent | 560ab42ef923aaf2e4347315bdfcc74b2708972c (diff) | |
parent | 405d8f8b1d936414da2093d4149ff790ff3f84a5 (diff) |
Merge branch 'nfs-for-2.6.32' of git://git.linux-nfs.org/projects/trondmy/nfs-2.6 into for-2.6.32-incoming
Conflicts:
net/sunrpc/cache.c
Diffstat (limited to 'mm')
-rw-r--r-- | mm/backing-dev.c | 7 | ||||
-rw-r--r-- | mm/bootmem.c | 6 | ||||
-rw-r--r-- | mm/dmapool.c | 2 | ||||
-rw-r--r-- | mm/filemap.c | 1 | ||||
-rw-r--r-- | mm/hugetlb.c | 2 | ||||
-rw-r--r-- | mm/kmemleak.c | 409 | ||||
-rw-r--r-- | mm/memcontrol.c | 25 | ||||
-rw-r--r-- | mm/memory.c | 37 | ||||
-rw-r--r-- | mm/mempolicy.c | 84 | ||||
-rw-r--r-- | mm/mempool.c | 4 | ||||
-rw-r--r-- | mm/nommu.c | 33 | ||||
-rw-r--r-- | mm/page-writeback.c | 13 | ||||
-rw-r--r-- | mm/page_alloc.c | 54 | ||||
-rw-r--r-- | mm/percpu.c | 24 | ||||
-rw-r--r-- | mm/slab.c | 8 | ||||
-rw-r--r-- | mm/slob.c | 2 | ||||
-rw-r--r-- | mm/slub.c | 12 | ||||
-rw-r--r-- | mm/swapfile.c | 4 | ||||
-rw-r--r-- | mm/vmscan.c | 8 |
19 files changed, 431 insertions, 304 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 493b468a5035..c86edd244294 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
@@ -283,7 +283,6 @@ static wait_queue_head_t congestion_wqh[2] = { | |||
283 | __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1]) | 283 | __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1]) |
284 | }; | 284 | }; |
285 | 285 | ||
286 | |||
287 | void clear_bdi_congested(struct backing_dev_info *bdi, int sync) | 286 | void clear_bdi_congested(struct backing_dev_info *bdi, int sync) |
288 | { | 287 | { |
289 | enum bdi_state bit; | 288 | enum bdi_state bit; |
@@ -308,18 +307,18 @@ EXPORT_SYMBOL(set_bdi_congested); | |||
308 | 307 | ||
309 | /** | 308 | /** |
310 | * congestion_wait - wait for a backing_dev to become uncongested | 309 | * congestion_wait - wait for a backing_dev to become uncongested |
311 | * @rw: READ or WRITE | 310 | * @sync: SYNC or ASYNC IO |
312 | * @timeout: timeout in jiffies | 311 | * @timeout: timeout in jiffies |
313 | * | 312 | * |
314 | * Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit | 313 | * Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit |
315 | * write congestion. If no backing_devs are congested then just wait for the | 314 | * write congestion. If no backing_devs are congested then just wait for the |
316 | * next write to be completed. | 315 | * next write to be completed. |
317 | */ | 316 | */ |
318 | long congestion_wait(int rw, long timeout) | 317 | long congestion_wait(int sync, long timeout) |
319 | { | 318 | { |
320 | long ret; | 319 | long ret; |
321 | DEFINE_WAIT(wait); | 320 | DEFINE_WAIT(wait); |
322 | wait_queue_head_t *wqh = &congestion_wqh[rw]; | 321 | wait_queue_head_t *wqh = &congestion_wqh[sync]; |
323 | 322 | ||
324 | prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); | 323 | prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); |
325 | ret = io_schedule_timeout(timeout); | 324 | ret = io_schedule_timeout(timeout); |
diff --git a/mm/bootmem.c b/mm/bootmem.c index d2a9ce952768..701740c9e81b 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/pfn.h> | 12 | #include <linux/pfn.h> |
13 | #include <linux/bootmem.h> | 13 | #include <linux/bootmem.h> |
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/kmemleak.h> | ||
15 | 16 | ||
16 | #include <asm/bug.h> | 17 | #include <asm/bug.h> |
17 | #include <asm/io.h> | 18 | #include <asm/io.h> |
@@ -335,6 +336,8 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, | |||
335 | { | 336 | { |
336 | unsigned long start, end; | 337 | unsigned long start, end; |
337 | 338 | ||
339 | kmemleak_free_part(__va(physaddr), size); | ||
340 | |||
338 | start = PFN_UP(physaddr); | 341 | start = PFN_UP(physaddr); |
339 | end = PFN_DOWN(physaddr + size); | 342 | end = PFN_DOWN(physaddr + size); |
340 | 343 | ||
@@ -354,6 +357,8 @@ void __init free_bootmem(unsigned long addr, unsigned long size) | |||
354 | { | 357 | { |
355 | unsigned long start, end; | 358 | unsigned long start, end; |
356 | 359 | ||
360 | kmemleak_free_part(__va(addr), size); | ||
361 | |||
357 | start = PFN_UP(addr); | 362 | start = PFN_UP(addr); |
358 | end = PFN_DOWN(addr + size); | 363 | end = PFN_DOWN(addr + size); |
359 | 364 | ||
@@ -516,6 +521,7 @@ find_block: | |||
516 | region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) + | 521 | region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) + |
517 | start_off); | 522 | start_off); |
518 | memset(region, 0, size); | 523 | memset(region, 0, size); |
524 | kmemleak_alloc(region, size, 1, 0); | ||
519 | return region; | 525 | return region; |
520 | } | 526 | } |
521 | 527 | ||
diff --git a/mm/dmapool.c b/mm/dmapool.c index b1f0885dda22..3df063706f53 100644 --- a/mm/dmapool.c +++ b/mm/dmapool.c | |||
@@ -86,10 +86,12 @@ show_pools(struct device *dev, struct device_attribute *attr, char *buf) | |||
86 | unsigned pages = 0; | 86 | unsigned pages = 0; |
87 | unsigned blocks = 0; | 87 | unsigned blocks = 0; |
88 | 88 | ||
89 | spin_lock_irq(&pool->lock); | ||
89 | list_for_each_entry(page, &pool->page_list, page_list) { | 90 | list_for_each_entry(page, &pool->page_list, page_list) { |
90 | pages++; | 91 | pages++; |
91 | blocks += page->in_use; | 92 | blocks += page->in_use; |
92 | } | 93 | } |
94 | spin_unlock_irq(&pool->lock); | ||
93 | 95 | ||
94 | /* per-pool info, no real statistics yet */ | 96 | /* per-pool info, no real statistics yet */ |
95 | temp = scnprintf(next, size, "%-16s %4u %4Zu %4Zu %2u\n", | 97 | temp = scnprintf(next, size, "%-16s %4u %4Zu %4Zu %2u\n", |
diff --git a/mm/filemap.c b/mm/filemap.c index 22396713feb9..ccea3b665c12 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -2272,6 +2272,7 @@ again: | |||
2272 | pagefault_enable(); | 2272 | pagefault_enable(); |
2273 | flush_dcache_page(page); | 2273 | flush_dcache_page(page); |
2274 | 2274 | ||
2275 | mark_page_accessed(page); | ||
2275 | status = a_ops->write_end(file, mapping, pos, bytes, copied, | 2276 | status = a_ops->write_end(file, mapping, pos, bytes, copied, |
2276 | page, fsdata); | 2277 | page, fsdata); |
2277 | if (unlikely(status < 0)) | 2278 | if (unlikely(status < 0)) |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d0351e31f474..cafdcee154e8 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -2370,7 +2370,7 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed) | |||
2370 | long chg = region_truncate(&inode->i_mapping->private_list, offset); | 2370 | long chg = region_truncate(&inode->i_mapping->private_list, offset); |
2371 | 2371 | ||
2372 | spin_lock(&inode->i_lock); | 2372 | spin_lock(&inode->i_lock); |
2373 | inode->i_blocks -= blocks_per_huge_page(h); | 2373 | inode->i_blocks -= (blocks_per_huge_page(h) * freed); |
2374 | spin_unlock(&inode->i_lock); | 2374 | spin_unlock(&inode->i_lock); |
2375 | 2375 | ||
2376 | hugetlb_put_quota(inode->i_mapping, (chg - freed)); | 2376 | hugetlb_put_quota(inode->i_mapping, (chg - freed)); |
diff --git a/mm/kmemleak.c b/mm/kmemleak.c index c96f2c8700aa..487267310a84 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c | |||
@@ -48,10 +48,10 @@ | |||
48 | * scanned. This list is only modified during a scanning episode when the | 48 | * scanned. This list is only modified during a scanning episode when the |
49 | * scan_mutex is held. At the end of a scan, the gray_list is always empty. | 49 | * scan_mutex is held. At the end of a scan, the gray_list is always empty. |
50 | * Note that the kmemleak_object.use_count is incremented when an object is | 50 | * Note that the kmemleak_object.use_count is incremented when an object is |
51 | * added to the gray_list and therefore cannot be freed | 51 | * added to the gray_list and therefore cannot be freed. This mutex also |
52 | * - kmemleak_mutex (mutex): prevents multiple users of the "kmemleak" debugfs | 52 | * prevents multiple users of the "kmemleak" debugfs file together with |
53 | * file together with modifications to the memory scanning parameters | 53 | * modifications to the memory scanning parameters including the scan_thread |
54 | * including the scan_thread pointer | 54 | * pointer |
55 | * | 55 | * |
56 | * The kmemleak_object structures have a use_count incremented or decremented | 56 | * The kmemleak_object structures have a use_count incremented or decremented |
57 | * using the get_object()/put_object() functions. When the use_count becomes | 57 | * using the get_object()/put_object() functions. When the use_count becomes |
@@ -103,11 +103,10 @@ | |||
103 | * Kmemleak configuration and common defines. | 103 | * Kmemleak configuration and common defines. |
104 | */ | 104 | */ |
105 | #define MAX_TRACE 16 /* stack trace length */ | 105 | #define MAX_TRACE 16 /* stack trace length */ |
106 | #define REPORTS_NR 50 /* maximum number of reported leaks */ | ||
107 | #define MSECS_MIN_AGE 5000 /* minimum object age for reporting */ | 106 | #define MSECS_MIN_AGE 5000 /* minimum object age for reporting */ |
108 | #define MSECS_SCAN_YIELD 10 /* CPU yielding period */ | ||
109 | #define SECS_FIRST_SCAN 60 /* delay before the first scan */ | 107 | #define SECS_FIRST_SCAN 60 /* delay before the first scan */ |
110 | #define SECS_SCAN_WAIT 600 /* subsequent auto scanning delay */ | 108 | #define SECS_SCAN_WAIT 600 /* subsequent auto scanning delay */ |
109 | #define GRAY_LIST_PASSES 25 /* maximum number of gray list scans */ | ||
111 | 110 | ||
112 | #define BYTES_PER_POINTER sizeof(void *) | 111 | #define BYTES_PER_POINTER sizeof(void *) |
113 | 112 | ||
@@ -159,6 +158,8 @@ struct kmemleak_object { | |||
159 | #define OBJECT_REPORTED (1 << 1) | 158 | #define OBJECT_REPORTED (1 << 1) |
160 | /* flag set to not scan the object */ | 159 | /* flag set to not scan the object */ |
161 | #define OBJECT_NO_SCAN (1 << 2) | 160 | #define OBJECT_NO_SCAN (1 << 2) |
161 | /* flag set on newly allocated objects */ | ||
162 | #define OBJECT_NEW (1 << 3) | ||
162 | 163 | ||
163 | /* the list of all allocated objects */ | 164 | /* the list of all allocated objects */ |
164 | static LIST_HEAD(object_list); | 165 | static LIST_HEAD(object_list); |
@@ -186,22 +187,16 @@ static atomic_t kmemleak_error = ATOMIC_INIT(0); | |||
186 | static unsigned long min_addr = ULONG_MAX; | 187 | static unsigned long min_addr = ULONG_MAX; |
187 | static unsigned long max_addr; | 188 | static unsigned long max_addr; |
188 | 189 | ||
189 | /* used for yielding the CPU to other tasks during scanning */ | ||
190 | static unsigned long next_scan_yield; | ||
191 | static struct task_struct *scan_thread; | 190 | static struct task_struct *scan_thread; |
192 | static unsigned long jiffies_scan_yield; | 191 | /* used to avoid reporting of recently allocated objects */ |
193 | static unsigned long jiffies_min_age; | 192 | static unsigned long jiffies_min_age; |
193 | static unsigned long jiffies_last_scan; | ||
194 | /* delay between automatic memory scannings */ | 194 | /* delay between automatic memory scannings */ |
195 | static signed long jiffies_scan_wait; | 195 | static signed long jiffies_scan_wait; |
196 | /* enables or disables the task stacks scanning */ | 196 | /* enables or disables the task stacks scanning */ |
197 | static int kmemleak_stack_scan; | 197 | static int kmemleak_stack_scan = 1; |
198 | /* mutex protecting the memory scanning */ | 198 | /* protects the memory scanning, parameters and debug/kmemleak file access */ |
199 | static DEFINE_MUTEX(scan_mutex); | 199 | static DEFINE_MUTEX(scan_mutex); |
200 | /* mutex protecting the access to the /sys/kernel/debug/kmemleak file */ | ||
201 | static DEFINE_MUTEX(kmemleak_mutex); | ||
202 | |||
203 | /* number of leaks reported (for limitation purposes) */ | ||
204 | static int reported_leaks; | ||
205 | 200 | ||
206 | /* | 201 | /* |
207 | * Early object allocation/freeing logging. Kmemleak is initialized after the | 202 | * Early object allocation/freeing logging. Kmemleak is initialized after the |
@@ -215,6 +210,7 @@ static int reported_leaks; | |||
215 | enum { | 210 | enum { |
216 | KMEMLEAK_ALLOC, | 211 | KMEMLEAK_ALLOC, |
217 | KMEMLEAK_FREE, | 212 | KMEMLEAK_FREE, |
213 | KMEMLEAK_FREE_PART, | ||
218 | KMEMLEAK_NOT_LEAK, | 214 | KMEMLEAK_NOT_LEAK, |
219 | KMEMLEAK_IGNORE, | 215 | KMEMLEAK_IGNORE, |
220 | KMEMLEAK_SCAN_AREA, | 216 | KMEMLEAK_SCAN_AREA, |
@@ -235,7 +231,7 @@ struct early_log { | |||
235 | }; | 231 | }; |
236 | 232 | ||
237 | /* early logging buffer and current position */ | 233 | /* early logging buffer and current position */ |
238 | static struct early_log early_log[200]; | 234 | static struct early_log early_log[CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE]; |
239 | static int crt_early_log; | 235 | static int crt_early_log; |
240 | 236 | ||
241 | static void kmemleak_disable(void); | 237 | static void kmemleak_disable(void); |
@@ -278,13 +274,9 @@ static int color_gray(const struct kmemleak_object *object) | |||
278 | return object->min_count != -1 && object->count >= object->min_count; | 274 | return object->min_count != -1 && object->count >= object->min_count; |
279 | } | 275 | } |
280 | 276 | ||
281 | /* | 277 | static int color_black(const struct kmemleak_object *object) |
282 | * Objects are considered referenced if their color is gray and they have not | ||
283 | * been deleted. | ||
284 | */ | ||
285 | static int referenced_object(struct kmemleak_object *object) | ||
286 | { | 278 | { |
287 | return (object->flags & OBJECT_ALLOCATED) && color_gray(object); | 279 | return object->min_count == -1; |
288 | } | 280 | } |
289 | 281 | ||
290 | /* | 282 | /* |
@@ -295,42 +287,28 @@ static int referenced_object(struct kmemleak_object *object) | |||
295 | static int unreferenced_object(struct kmemleak_object *object) | 287 | static int unreferenced_object(struct kmemleak_object *object) |
296 | { | 288 | { |
297 | return (object->flags & OBJECT_ALLOCATED) && color_white(object) && | 289 | return (object->flags & OBJECT_ALLOCATED) && color_white(object) && |
298 | time_is_before_eq_jiffies(object->jiffies + jiffies_min_age); | 290 | time_before_eq(object->jiffies + jiffies_min_age, |
291 | jiffies_last_scan); | ||
299 | } | 292 | } |
300 | 293 | ||
301 | /* | 294 | /* |
302 | * Printing of the (un)referenced objects information, either to the seq file | 295 | * Printing of the unreferenced objects information to the seq file. The |
303 | * or to the kernel log. The print_referenced/print_unreferenced functions | 296 | * print_unreferenced function must be called with the object->lock held. |
304 | * must be called with the object->lock held. | ||
305 | */ | 297 | */ |
306 | #define print_helper(seq, x...) do { \ | ||
307 | struct seq_file *s = (seq); \ | ||
308 | if (s) \ | ||
309 | seq_printf(s, x); \ | ||
310 | else \ | ||
311 | pr_info(x); \ | ||
312 | } while (0) | ||
313 | |||
314 | static void print_referenced(struct kmemleak_object *object) | ||
315 | { | ||
316 | pr_info("referenced object 0x%08lx (size %zu)\n", | ||
317 | object->pointer, object->size); | ||
318 | } | ||
319 | |||
320 | static void print_unreferenced(struct seq_file *seq, | 298 | static void print_unreferenced(struct seq_file *seq, |
321 | struct kmemleak_object *object) | 299 | struct kmemleak_object *object) |
322 | { | 300 | { |
323 | int i; | 301 | int i; |
324 | 302 | ||
325 | print_helper(seq, "unreferenced object 0x%08lx (size %zu):\n", | 303 | seq_printf(seq, "unreferenced object 0x%08lx (size %zu):\n", |
326 | object->pointer, object->size); | 304 | object->pointer, object->size); |
327 | print_helper(seq, " comm \"%s\", pid %d, jiffies %lu\n", | 305 | seq_printf(seq, " comm \"%s\", pid %d, jiffies %lu\n", |
328 | object->comm, object->pid, object->jiffies); | 306 | object->comm, object->pid, object->jiffies); |
329 | print_helper(seq, " backtrace:\n"); | 307 | seq_printf(seq, " backtrace:\n"); |
330 | 308 | ||
331 | for (i = 0; i < object->trace_len; i++) { | 309 | for (i = 0; i < object->trace_len; i++) { |
332 | void *ptr = (void *)object->trace[i]; | 310 | void *ptr = (void *)object->trace[i]; |
333 | print_helper(seq, " [<%p>] %pS\n", ptr, ptr); | 311 | seq_printf(seq, " [<%p>] %pS\n", ptr, ptr); |
334 | } | 312 | } |
335 | } | 313 | } |
336 | 314 | ||
@@ -478,7 +456,7 @@ static void create_object(unsigned long ptr, size_t size, int min_count, | |||
478 | INIT_HLIST_HEAD(&object->area_list); | 456 | INIT_HLIST_HEAD(&object->area_list); |
479 | spin_lock_init(&object->lock); | 457 | spin_lock_init(&object->lock); |
480 | atomic_set(&object->use_count, 1); | 458 | atomic_set(&object->use_count, 1); |
481 | object->flags = OBJECT_ALLOCATED; | 459 | object->flags = OBJECT_ALLOCATED | OBJECT_NEW; |
482 | object->pointer = ptr; | 460 | object->pointer = ptr; |
483 | object->size = size; | 461 | object->size = size; |
484 | object->min_count = min_count; | 462 | object->min_count = min_count; |
@@ -546,39 +524,87 @@ out: | |||
546 | * Remove the metadata (struct kmemleak_object) for a memory block from the | 524 | * Remove the metadata (struct kmemleak_object) for a memory block from the |
547 | * object_list and object_tree_root and decrement its use_count. | 525 | * object_list and object_tree_root and decrement its use_count. |
548 | */ | 526 | */ |
549 | static void delete_object(unsigned long ptr) | 527 | static void __delete_object(struct kmemleak_object *object) |
550 | { | 528 | { |
551 | unsigned long flags; | 529 | unsigned long flags; |
552 | struct kmemleak_object *object; | ||
553 | 530 | ||
554 | write_lock_irqsave(&kmemleak_lock, flags); | 531 | write_lock_irqsave(&kmemleak_lock, flags); |
555 | object = lookup_object(ptr, 0); | ||
556 | if (!object) { | ||
557 | kmemleak_warn("Freeing unknown object at 0x%08lx\n", | ||
558 | ptr); | ||
559 | write_unlock_irqrestore(&kmemleak_lock, flags); | ||
560 | return; | ||
561 | } | ||
562 | prio_tree_remove(&object_tree_root, &object->tree_node); | 532 | prio_tree_remove(&object_tree_root, &object->tree_node); |
563 | list_del_rcu(&object->object_list); | 533 | list_del_rcu(&object->object_list); |
564 | write_unlock_irqrestore(&kmemleak_lock, flags); | 534 | write_unlock_irqrestore(&kmemleak_lock, flags); |
565 | 535 | ||
566 | WARN_ON(!(object->flags & OBJECT_ALLOCATED)); | 536 | WARN_ON(!(object->flags & OBJECT_ALLOCATED)); |
567 | WARN_ON(atomic_read(&object->use_count) < 1); | 537 | WARN_ON(atomic_read(&object->use_count) < 2); |
568 | 538 | ||
569 | /* | 539 | /* |
570 | * Locking here also ensures that the corresponding memory block | 540 | * Locking here also ensures that the corresponding memory block |
571 | * cannot be freed when it is being scanned. | 541 | * cannot be freed when it is being scanned. |
572 | */ | 542 | */ |
573 | spin_lock_irqsave(&object->lock, flags); | 543 | spin_lock_irqsave(&object->lock, flags); |
574 | if (object->flags & OBJECT_REPORTED) | ||
575 | print_referenced(object); | ||
576 | object->flags &= ~OBJECT_ALLOCATED; | 544 | object->flags &= ~OBJECT_ALLOCATED; |
577 | spin_unlock_irqrestore(&object->lock, flags); | 545 | spin_unlock_irqrestore(&object->lock, flags); |
578 | put_object(object); | 546 | put_object(object); |
579 | } | 547 | } |
580 | 548 | ||
581 | /* | 549 | /* |
550 | * Look up the metadata (struct kmemleak_object) corresponding to ptr and | ||
551 | * delete it. | ||
552 | */ | ||
553 | static void delete_object_full(unsigned long ptr) | ||
554 | { | ||
555 | struct kmemleak_object *object; | ||
556 | |||
557 | object = find_and_get_object(ptr, 0); | ||
558 | if (!object) { | ||
559 | #ifdef DEBUG | ||
560 | kmemleak_warn("Freeing unknown object at 0x%08lx\n", | ||
561 | ptr); | ||
562 | #endif | ||
563 | return; | ||
564 | } | ||
565 | __delete_object(object); | ||
566 | put_object(object); | ||
567 | } | ||
568 | |||
569 | /* | ||
570 | * Look up the metadata (struct kmemleak_object) corresponding to ptr and | ||
571 | * delete it. If the memory block is partially freed, the function may create | ||
572 | * additional metadata for the remaining parts of the block. | ||
573 | */ | ||
574 | static void delete_object_part(unsigned long ptr, size_t size) | ||
575 | { | ||
576 | struct kmemleak_object *object; | ||
577 | unsigned long start, end; | ||
578 | |||
579 | object = find_and_get_object(ptr, 1); | ||
580 | if (!object) { | ||
581 | #ifdef DEBUG | ||
582 | kmemleak_warn("Partially freeing unknown object at 0x%08lx " | ||
583 | "(size %zu)\n", ptr, size); | ||
584 | #endif | ||
585 | return; | ||
586 | } | ||
587 | __delete_object(object); | ||
588 | |||
589 | /* | ||
590 | * Create one or two objects that may result from the memory block | ||
591 | * split. Note that partial freeing is only done by free_bootmem() and | ||
592 | * this happens before kmemleak_init() is called. The path below is | ||
593 | * only executed during early log recording in kmemleak_init(), so | ||
594 | * GFP_KERNEL is enough. | ||
595 | */ | ||
596 | start = object->pointer; | ||
597 | end = object->pointer + object->size; | ||
598 | if (ptr > start) | ||
599 | create_object(start, ptr - start, object->min_count, | ||
600 | GFP_KERNEL); | ||
601 | if (ptr + size < end) | ||
602 | create_object(ptr + size, end - ptr - size, object->min_count, | ||
603 | GFP_KERNEL); | ||
604 | |||
605 | put_object(object); | ||
606 | } | ||
607 | /* | ||
582 | * Make a object permanently as gray-colored so that it can no longer be | 608 | * Make a object permanently as gray-colored so that it can no longer be |
583 | * reported as a leak. This is used in general to mark a false positive. | 609 | * reported as a leak. This is used in general to mark a false positive. |
584 | */ | 610 | */ |
@@ -696,7 +722,8 @@ static void log_early(int op_type, const void *ptr, size_t size, | |||
696 | struct early_log *log; | 722 | struct early_log *log; |
697 | 723 | ||
698 | if (crt_early_log >= ARRAY_SIZE(early_log)) { | 724 | if (crt_early_log >= ARRAY_SIZE(early_log)) { |
699 | kmemleak_stop("Early log buffer exceeded\n"); | 725 | pr_warning("Early log buffer exceeded\n"); |
726 | kmemleak_disable(); | ||
700 | return; | 727 | return; |
701 | } | 728 | } |
702 | 729 | ||
@@ -741,13 +768,28 @@ void kmemleak_free(const void *ptr) | |||
741 | pr_debug("%s(0x%p)\n", __func__, ptr); | 768 | pr_debug("%s(0x%p)\n", __func__, ptr); |
742 | 769 | ||
743 | if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) | 770 | if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) |
744 | delete_object((unsigned long)ptr); | 771 | delete_object_full((unsigned long)ptr); |
745 | else if (atomic_read(&kmemleak_early_log)) | 772 | else if (atomic_read(&kmemleak_early_log)) |
746 | log_early(KMEMLEAK_FREE, ptr, 0, 0, 0, 0); | 773 | log_early(KMEMLEAK_FREE, ptr, 0, 0, 0, 0); |
747 | } | 774 | } |
748 | EXPORT_SYMBOL_GPL(kmemleak_free); | 775 | EXPORT_SYMBOL_GPL(kmemleak_free); |
749 | 776 | ||
750 | /* | 777 | /* |
778 | * Partial memory freeing function callback. This function is usually called | ||
779 | * from bootmem allocator when (part of) a memory block is freed. | ||
780 | */ | ||
781 | void kmemleak_free_part(const void *ptr, size_t size) | ||
782 | { | ||
783 | pr_debug("%s(0x%p)\n", __func__, ptr); | ||
784 | |||
785 | if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) | ||
786 | delete_object_part((unsigned long)ptr, size); | ||
787 | else if (atomic_read(&kmemleak_early_log)) | ||
788 | log_early(KMEMLEAK_FREE_PART, ptr, size, 0, 0, 0); | ||
789 | } | ||
790 | EXPORT_SYMBOL_GPL(kmemleak_free_part); | ||
791 | |||
792 | /* | ||
751 | * Mark an already allocated memory block as a false positive. This will cause | 793 | * Mark an already allocated memory block as a false positive. This will cause |
752 | * the block to no longer be reported as leak and always be scanned. | 794 | * the block to no longer be reported as leak and always be scanned. |
753 | */ | 795 | */ |
@@ -808,21 +850,6 @@ void kmemleak_no_scan(const void *ptr) | |||
808 | EXPORT_SYMBOL(kmemleak_no_scan); | 850 | EXPORT_SYMBOL(kmemleak_no_scan); |
809 | 851 | ||
810 | /* | 852 | /* |
811 | * Yield the CPU so that other tasks get a chance to run. The yielding is | ||
812 | * rate-limited to avoid excessive number of calls to the schedule() function | ||
813 | * during memory scanning. | ||
814 | */ | ||
815 | static void scan_yield(void) | ||
816 | { | ||
817 | might_sleep(); | ||
818 | |||
819 | if (time_is_before_eq_jiffies(next_scan_yield)) { | ||
820 | schedule(); | ||
821 | next_scan_yield = jiffies + jiffies_scan_yield; | ||
822 | } | ||
823 | } | ||
824 | |||
825 | /* | ||
826 | * Memory scanning is a long process and it needs to be interruptable. This | 853 | * Memory scanning is a long process and it needs to be interruptable. This |
827 | * function checks whether such interrupt condition occured. | 854 | * function checks whether such interrupt condition occured. |
828 | */ | 855 | */ |
@@ -848,7 +875,7 @@ static int scan_should_stop(void) | |||
848 | * found to the gray list. | 875 | * found to the gray list. |
849 | */ | 876 | */ |
850 | static void scan_block(void *_start, void *_end, | 877 | static void scan_block(void *_start, void *_end, |
851 | struct kmemleak_object *scanned) | 878 | struct kmemleak_object *scanned, int allow_resched) |
852 | { | 879 | { |
853 | unsigned long *ptr; | 880 | unsigned long *ptr; |
854 | unsigned long *start = PTR_ALIGN(_start, BYTES_PER_POINTER); | 881 | unsigned long *start = PTR_ALIGN(_start, BYTES_PER_POINTER); |
@@ -859,18 +886,11 @@ static void scan_block(void *_start, void *_end, | |||
859 | unsigned long pointer = *ptr; | 886 | unsigned long pointer = *ptr; |
860 | struct kmemleak_object *object; | 887 | struct kmemleak_object *object; |
861 | 888 | ||
889 | if (allow_resched) | ||
890 | cond_resched(); | ||
862 | if (scan_should_stop()) | 891 | if (scan_should_stop()) |
863 | break; | 892 | break; |
864 | 893 | ||
865 | /* | ||
866 | * When scanning a memory block with a corresponding | ||
867 | * kmemleak_object, the CPU yielding is handled in the calling | ||
868 | * code since it holds the object->lock to avoid the block | ||
869 | * freeing. | ||
870 | */ | ||
871 | if (!scanned) | ||
872 | scan_yield(); | ||
873 | |||
874 | object = find_and_get_object(pointer, 1); | 894 | object = find_and_get_object(pointer, 1); |
875 | if (!object) | 895 | if (!object) |
876 | continue; | 896 | continue; |
@@ -931,12 +951,12 @@ static void scan_object(struct kmemleak_object *object) | |||
931 | goto out; | 951 | goto out; |
932 | if (hlist_empty(&object->area_list)) | 952 | if (hlist_empty(&object->area_list)) |
933 | scan_block((void *)object->pointer, | 953 | scan_block((void *)object->pointer, |
934 | (void *)(object->pointer + object->size), object); | 954 | (void *)(object->pointer + object->size), object, 0); |
935 | else | 955 | else |
936 | hlist_for_each_entry(area, elem, &object->area_list, node) | 956 | hlist_for_each_entry(area, elem, &object->area_list, node) |
937 | scan_block((void *)(object->pointer + area->offset), | 957 | scan_block((void *)(object->pointer + area->offset), |
938 | (void *)(object->pointer + area->offset | 958 | (void *)(object->pointer + area->offset |
939 | + area->length), object); | 959 | + area->length), object, 0); |
940 | out: | 960 | out: |
941 | spin_unlock_irqrestore(&object->lock, flags); | 961 | spin_unlock_irqrestore(&object->lock, flags); |
942 | } | 962 | } |
@@ -952,6 +972,10 @@ static void kmemleak_scan(void) | |||
952 | struct kmemleak_object *object, *tmp; | 972 | struct kmemleak_object *object, *tmp; |
953 | struct task_struct *task; | 973 | struct task_struct *task; |
954 | int i; | 974 | int i; |
975 | int new_leaks = 0; | ||
976 | int gray_list_pass = 0; | ||
977 | |||
978 | jiffies_last_scan = jiffies; | ||
955 | 979 | ||
956 | /* prepare the kmemleak_object's */ | 980 | /* prepare the kmemleak_object's */ |
957 | rcu_read_lock(); | 981 | rcu_read_lock(); |
@@ -970,6 +994,7 @@ static void kmemleak_scan(void) | |||
970 | #endif | 994 | #endif |
971 | /* reset the reference count (whiten the object) */ | 995 | /* reset the reference count (whiten the object) */ |
972 | object->count = 0; | 996 | object->count = 0; |
997 | object->flags &= ~OBJECT_NEW; | ||
973 | if (color_gray(object) && get_object(object)) | 998 | if (color_gray(object) && get_object(object)) |
974 | list_add_tail(&object->gray_list, &gray_list); | 999 | list_add_tail(&object->gray_list, &gray_list); |
975 | 1000 | ||
@@ -978,14 +1003,14 @@ static void kmemleak_scan(void) | |||
978 | rcu_read_unlock(); | 1003 | rcu_read_unlock(); |
979 | 1004 | ||
980 | /* data/bss scanning */ | 1005 | /* data/bss scanning */ |
981 | scan_block(_sdata, _edata, NULL); | 1006 | scan_block(_sdata, _edata, NULL, 1); |
982 | scan_block(__bss_start, __bss_stop, NULL); | 1007 | scan_block(__bss_start, __bss_stop, NULL, 1); |
983 | 1008 | ||
984 | #ifdef CONFIG_SMP | 1009 | #ifdef CONFIG_SMP |
985 | /* per-cpu sections scanning */ | 1010 | /* per-cpu sections scanning */ |
986 | for_each_possible_cpu(i) | 1011 | for_each_possible_cpu(i) |
987 | scan_block(__per_cpu_start + per_cpu_offset(i), | 1012 | scan_block(__per_cpu_start + per_cpu_offset(i), |
988 | __per_cpu_end + per_cpu_offset(i), NULL); | 1013 | __per_cpu_end + per_cpu_offset(i), NULL, 1); |
989 | #endif | 1014 | #endif |
990 | 1015 | ||
991 | /* | 1016 | /* |
@@ -1007,7 +1032,7 @@ static void kmemleak_scan(void) | |||
1007 | /* only scan if page is in use */ | 1032 | /* only scan if page is in use */ |
1008 | if (page_count(page) == 0) | 1033 | if (page_count(page) == 0) |
1009 | continue; | 1034 | continue; |
1010 | scan_block(page, page + 1, NULL); | 1035 | scan_block(page, page + 1, NULL, 1); |
1011 | } | 1036 | } |
1012 | } | 1037 | } |
1013 | 1038 | ||
@@ -1019,7 +1044,8 @@ static void kmemleak_scan(void) | |||
1019 | read_lock(&tasklist_lock); | 1044 | read_lock(&tasklist_lock); |
1020 | for_each_process(task) | 1045 | for_each_process(task) |
1021 | scan_block(task_stack_page(task), | 1046 | scan_block(task_stack_page(task), |
1022 | task_stack_page(task) + THREAD_SIZE, NULL); | 1047 | task_stack_page(task) + THREAD_SIZE, |
1048 | NULL, 0); | ||
1023 | read_unlock(&tasklist_lock); | 1049 | read_unlock(&tasklist_lock); |
1024 | } | 1050 | } |
1025 | 1051 | ||
@@ -1031,9 +1057,10 @@ static void kmemleak_scan(void) | |||
1031 | * kmemleak objects cannot be freed from outside the loop because their | 1057 | * kmemleak objects cannot be freed from outside the loop because their |
1032 | * use_count was increased. | 1058 | * use_count was increased. |
1033 | */ | 1059 | */ |
1060 | repeat: | ||
1034 | object = list_entry(gray_list.next, typeof(*object), gray_list); | 1061 | object = list_entry(gray_list.next, typeof(*object), gray_list); |
1035 | while (&object->gray_list != &gray_list) { | 1062 | while (&object->gray_list != &gray_list) { |
1036 | scan_yield(); | 1063 | cond_resched(); |
1037 | 1064 | ||
1038 | /* may add new objects to the list */ | 1065 | /* may add new objects to the list */ |
1039 | if (!scan_should_stop()) | 1066 | if (!scan_should_stop()) |
@@ -1048,7 +1075,59 @@ static void kmemleak_scan(void) | |||
1048 | 1075 | ||
1049 | object = tmp; | 1076 | object = tmp; |
1050 | } | 1077 | } |
1078 | |||
1079 | if (scan_should_stop() || ++gray_list_pass >= GRAY_LIST_PASSES) | ||
1080 | goto scan_end; | ||
1081 | |||
1082 | /* | ||
1083 | * Check for new objects allocated during this scanning and add them | ||
1084 | * to the gray list. | ||
1085 | */ | ||
1086 | rcu_read_lock(); | ||
1087 | list_for_each_entry_rcu(object, &object_list, object_list) { | ||
1088 | spin_lock_irqsave(&object->lock, flags); | ||
1089 | if ((object->flags & OBJECT_NEW) && !color_black(object) && | ||
1090 | get_object(object)) { | ||
1091 | object->flags &= ~OBJECT_NEW; | ||
1092 | list_add_tail(&object->gray_list, &gray_list); | ||
1093 | } | ||
1094 | spin_unlock_irqrestore(&object->lock, flags); | ||
1095 | } | ||
1096 | rcu_read_unlock(); | ||
1097 | |||
1098 | if (!list_empty(&gray_list)) | ||
1099 | goto repeat; | ||
1100 | |||
1101 | scan_end: | ||
1051 | WARN_ON(!list_empty(&gray_list)); | 1102 | WARN_ON(!list_empty(&gray_list)); |
1103 | |||
1104 | /* | ||
1105 | * If scanning was stopped or new objects were being allocated at a | ||
1106 | * higher rate than gray list scanning, do not report any new | ||
1107 | * unreferenced objects. | ||
1108 | */ | ||
1109 | if (scan_should_stop() || gray_list_pass >= GRAY_LIST_PASSES) | ||
1110 | return; | ||
1111 | |||
1112 | /* | ||
1113 | * Scanning result reporting. | ||
1114 | */ | ||
1115 | rcu_read_lock(); | ||
1116 | list_for_each_entry_rcu(object, &object_list, object_list) { | ||
1117 | spin_lock_irqsave(&object->lock, flags); | ||
1118 | if (unreferenced_object(object) && | ||
1119 | !(object->flags & OBJECT_REPORTED)) { | ||
1120 | object->flags |= OBJECT_REPORTED; | ||
1121 | new_leaks++; | ||
1122 | } | ||
1123 | spin_unlock_irqrestore(&object->lock, flags); | ||
1124 | } | ||
1125 | rcu_read_unlock(); | ||
1126 | |||
1127 | if (new_leaks) | ||
1128 | pr_info("%d new suspected memory leaks (see " | ||
1129 | "/sys/kernel/debug/kmemleak)\n", new_leaks); | ||
1130 | |||
1052 | } | 1131 | } |
1053 | 1132 | ||
1054 | /* | 1133 | /* |
@@ -1060,6 +1139,7 @@ static int kmemleak_scan_thread(void *arg) | |||
1060 | static int first_run = 1; | 1139 | static int first_run = 1; |
1061 | 1140 | ||
1062 | pr_info("Automatic memory scanning thread started\n"); | 1141 | pr_info("Automatic memory scanning thread started\n"); |
1142 | set_user_nice(current, 10); | ||
1063 | 1143 | ||
1064 | /* | 1144 | /* |
1065 | * Wait before the first scan to allow the system to fully initialize. | 1145 | * Wait before the first scan to allow the system to fully initialize. |
@@ -1070,36 +1150,12 @@ static int kmemleak_scan_thread(void *arg) | |||
1070 | } | 1150 | } |
1071 | 1151 | ||
1072 | while (!kthread_should_stop()) { | 1152 | while (!kthread_should_stop()) { |
1073 | struct kmemleak_object *object; | ||
1074 | signed long timeout = jiffies_scan_wait; | 1153 | signed long timeout = jiffies_scan_wait; |
1075 | 1154 | ||
1076 | mutex_lock(&scan_mutex); | 1155 | mutex_lock(&scan_mutex); |
1077 | |||
1078 | kmemleak_scan(); | 1156 | kmemleak_scan(); |
1079 | reported_leaks = 0; | ||
1080 | |||
1081 | rcu_read_lock(); | ||
1082 | list_for_each_entry_rcu(object, &object_list, object_list) { | ||
1083 | unsigned long flags; | ||
1084 | |||
1085 | if (reported_leaks >= REPORTS_NR) | ||
1086 | break; | ||
1087 | spin_lock_irqsave(&object->lock, flags); | ||
1088 | if (!(object->flags & OBJECT_REPORTED) && | ||
1089 | unreferenced_object(object)) { | ||
1090 | print_unreferenced(NULL, object); | ||
1091 | object->flags |= OBJECT_REPORTED; | ||
1092 | reported_leaks++; | ||
1093 | } else if ((object->flags & OBJECT_REPORTED) && | ||
1094 | referenced_object(object)) { | ||
1095 | print_referenced(object); | ||
1096 | object->flags &= ~OBJECT_REPORTED; | ||
1097 | } | ||
1098 | spin_unlock_irqrestore(&object->lock, flags); | ||
1099 | } | ||
1100 | rcu_read_unlock(); | ||
1101 | |||
1102 | mutex_unlock(&scan_mutex); | 1157 | mutex_unlock(&scan_mutex); |
1158 | |||
1103 | /* wait before the next scan */ | 1159 | /* wait before the next scan */ |
1104 | while (timeout && !kthread_should_stop()) | 1160 | while (timeout && !kthread_should_stop()) |
1105 | timeout = schedule_timeout_interruptible(timeout); | 1161 | timeout = schedule_timeout_interruptible(timeout); |
@@ -1112,7 +1168,7 @@ static int kmemleak_scan_thread(void *arg) | |||
1112 | 1168 | ||
1113 | /* | 1169 | /* |
1114 | * Start the automatic memory scanning thread. This function must be called | 1170 | * Start the automatic memory scanning thread. This function must be called |
1115 | * with the kmemleak_mutex held. | 1171 | * with the scan_mutex held. |
1116 | */ | 1172 | */ |
1117 | void start_scan_thread(void) | 1173 | void start_scan_thread(void) |
1118 | { | 1174 | { |
@@ -1127,7 +1183,7 @@ void start_scan_thread(void) | |||
1127 | 1183 | ||
1128 | /* | 1184 | /* |
1129 | * Stop the automatic memory scanning thread. This function must be called | 1185 | * Stop the automatic memory scanning thread. This function must be called |
1130 | * with the kmemleak_mutex held. | 1186 | * with the scan_mutex held. |
1131 | */ | 1187 | */ |
1132 | void stop_scan_thread(void) | 1188 | void stop_scan_thread(void) |
1133 | { | 1189 | { |
@@ -1146,13 +1202,11 @@ static void *kmemleak_seq_start(struct seq_file *seq, loff_t *pos) | |||
1146 | { | 1202 | { |
1147 | struct kmemleak_object *object; | 1203 | struct kmemleak_object *object; |
1148 | loff_t n = *pos; | 1204 | loff_t n = *pos; |
1205 | int err; | ||
1149 | 1206 | ||
1150 | if (!n) { | 1207 | err = mutex_lock_interruptible(&scan_mutex); |
1151 | kmemleak_scan(); | 1208 | if (err < 0) |
1152 | reported_leaks = 0; | 1209 | return ERR_PTR(err); |
1153 | } | ||
1154 | if (reported_leaks >= REPORTS_NR) | ||
1155 | return NULL; | ||
1156 | 1210 | ||
1157 | rcu_read_lock(); | 1211 | rcu_read_lock(); |
1158 | list_for_each_entry_rcu(object, &object_list, object_list) { | 1212 | list_for_each_entry_rcu(object, &object_list, object_list) { |
@@ -1163,7 +1217,6 @@ static void *kmemleak_seq_start(struct seq_file *seq, loff_t *pos) | |||
1163 | } | 1217 | } |
1164 | object = NULL; | 1218 | object = NULL; |
1165 | out: | 1219 | out: |
1166 | rcu_read_unlock(); | ||
1167 | return object; | 1220 | return object; |
1168 | } | 1221 | } |
1169 | 1222 | ||
@@ -1178,17 +1231,13 @@ static void *kmemleak_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
1178 | struct list_head *n = &prev_obj->object_list; | 1231 | struct list_head *n = &prev_obj->object_list; |
1179 | 1232 | ||
1180 | ++(*pos); | 1233 | ++(*pos); |
1181 | if (reported_leaks >= REPORTS_NR) | ||
1182 | goto out; | ||
1183 | 1234 | ||
1184 | rcu_read_lock(); | ||
1185 | list_for_each_continue_rcu(n, &object_list) { | 1235 | list_for_each_continue_rcu(n, &object_list) { |
1186 | next_obj = list_entry(n, struct kmemleak_object, object_list); | 1236 | next_obj = list_entry(n, struct kmemleak_object, object_list); |
1187 | if (get_object(next_obj)) | 1237 | if (get_object(next_obj)) |
1188 | break; | 1238 | break; |
1189 | } | 1239 | } |
1190 | rcu_read_unlock(); | 1240 | |
1191 | out: | ||
1192 | put_object(prev_obj); | 1241 | put_object(prev_obj); |
1193 | return next_obj; | 1242 | return next_obj; |
1194 | } | 1243 | } |
@@ -1198,8 +1247,16 @@ out: | |||
1198 | */ | 1247 | */ |
1199 | static void kmemleak_seq_stop(struct seq_file *seq, void *v) | 1248 | static void kmemleak_seq_stop(struct seq_file *seq, void *v) |
1200 | { | 1249 | { |
1201 | if (v) | 1250 | if (!IS_ERR(v)) { |
1202 | put_object(v); | 1251 | /* |
1252 | * kmemleak_seq_start may return ERR_PTR if the scan_mutex | ||
1253 | * waiting was interrupted, so only release it if !IS_ERR. | ||
1254 | */ | ||
1255 | rcu_read_unlock(); | ||
1256 | mutex_unlock(&scan_mutex); | ||
1257 | if (v) | ||
1258 | put_object(v); | ||
1259 | } | ||
1203 | } | 1260 | } |
1204 | 1261 | ||
1205 | /* | 1262 | /* |
@@ -1211,11 +1268,8 @@ static int kmemleak_seq_show(struct seq_file *seq, void *v) | |||
1211 | unsigned long flags; | 1268 | unsigned long flags; |
1212 | 1269 | ||
1213 | spin_lock_irqsave(&object->lock, flags); | 1270 | spin_lock_irqsave(&object->lock, flags); |
1214 | if (!unreferenced_object(object)) | 1271 | if ((object->flags & OBJECT_REPORTED) && unreferenced_object(object)) |
1215 | goto out; | 1272 | print_unreferenced(seq, object); |
1216 | print_unreferenced(seq, object); | ||
1217 | reported_leaks++; | ||
1218 | out: | ||
1219 | spin_unlock_irqrestore(&object->lock, flags); | 1273 | spin_unlock_irqrestore(&object->lock, flags); |
1220 | return 0; | 1274 | return 0; |
1221 | } | 1275 | } |
@@ -1229,43 +1283,15 @@ static const struct seq_operations kmemleak_seq_ops = { | |||
1229 | 1283 | ||
1230 | static int kmemleak_open(struct inode *inode, struct file *file) | 1284 | static int kmemleak_open(struct inode *inode, struct file *file) |
1231 | { | 1285 | { |
1232 | int ret = 0; | ||
1233 | |||
1234 | if (!atomic_read(&kmemleak_enabled)) | 1286 | if (!atomic_read(&kmemleak_enabled)) |
1235 | return -EBUSY; | 1287 | return -EBUSY; |
1236 | 1288 | ||
1237 | ret = mutex_lock_interruptible(&kmemleak_mutex); | 1289 | return seq_open(file, &kmemleak_seq_ops); |
1238 | if (ret < 0) | ||
1239 | goto out; | ||
1240 | if (file->f_mode & FMODE_READ) { | ||
1241 | ret = mutex_lock_interruptible(&scan_mutex); | ||
1242 | if (ret < 0) | ||
1243 | goto kmemleak_unlock; | ||
1244 | ret = seq_open(file, &kmemleak_seq_ops); | ||
1245 | if (ret < 0) | ||
1246 | goto scan_unlock; | ||
1247 | } | ||
1248 | return ret; | ||
1249 | |||
1250 | scan_unlock: | ||
1251 | mutex_unlock(&scan_mutex); | ||
1252 | kmemleak_unlock: | ||
1253 | mutex_unlock(&kmemleak_mutex); | ||
1254 | out: | ||
1255 | return ret; | ||
1256 | } | 1290 | } |
1257 | 1291 | ||
1258 | static int kmemleak_release(struct inode *inode, struct file *file) | 1292 | static int kmemleak_release(struct inode *inode, struct file *file) |
1259 | { | 1293 | { |
1260 | int ret = 0; | 1294 | return seq_release(inode, file); |
1261 | |||
1262 | if (file->f_mode & FMODE_READ) { | ||
1263 | seq_release(inode, file); | ||
1264 | mutex_unlock(&scan_mutex); | ||
1265 | } | ||
1266 | mutex_unlock(&kmemleak_mutex); | ||
1267 | |||
1268 | return ret; | ||
1269 | } | 1295 | } |
1270 | 1296 | ||
1271 | /* | 1297 | /* |
@@ -1278,21 +1304,24 @@ static int kmemleak_release(struct inode *inode, struct file *file) | |||
1278 | * scan=off - stop the automatic memory scanning thread | 1304 | * scan=off - stop the automatic memory scanning thread |
1279 | * scan=... - set the automatic memory scanning period in seconds (0 to | 1305 | * scan=... - set the automatic memory scanning period in seconds (0 to |
1280 | * disable it) | 1306 | * disable it) |
1307 | * scan - trigger a memory scan | ||
1281 | */ | 1308 | */ |
1282 | static ssize_t kmemleak_write(struct file *file, const char __user *user_buf, | 1309 | static ssize_t kmemleak_write(struct file *file, const char __user *user_buf, |
1283 | size_t size, loff_t *ppos) | 1310 | size_t size, loff_t *ppos) |
1284 | { | 1311 | { |
1285 | char buf[64]; | 1312 | char buf[64]; |
1286 | int buf_size; | 1313 | int buf_size; |
1287 | 1314 | int ret; | |
1288 | if (!atomic_read(&kmemleak_enabled)) | ||
1289 | return -EBUSY; | ||
1290 | 1315 | ||
1291 | buf_size = min(size, (sizeof(buf) - 1)); | 1316 | buf_size = min(size, (sizeof(buf) - 1)); |
1292 | if (strncpy_from_user(buf, user_buf, buf_size) < 0) | 1317 | if (strncpy_from_user(buf, user_buf, buf_size) < 0) |
1293 | return -EFAULT; | 1318 | return -EFAULT; |
1294 | buf[buf_size] = 0; | 1319 | buf[buf_size] = 0; |
1295 | 1320 | ||
1321 | ret = mutex_lock_interruptible(&scan_mutex); | ||
1322 | if (ret < 0) | ||
1323 | return ret; | ||
1324 | |||
1296 | if (strncmp(buf, "off", 3) == 0) | 1325 | if (strncmp(buf, "off", 3) == 0) |
1297 | kmemleak_disable(); | 1326 | kmemleak_disable(); |
1298 | else if (strncmp(buf, "stack=on", 8) == 0) | 1327 | else if (strncmp(buf, "stack=on", 8) == 0) |
@@ -1305,18 +1334,24 @@ static ssize_t kmemleak_write(struct file *file, const char __user *user_buf, | |||
1305 | stop_scan_thread(); | 1334 | stop_scan_thread(); |
1306 | else if (strncmp(buf, "scan=", 5) == 0) { | 1335 | else if (strncmp(buf, "scan=", 5) == 0) { |
1307 | unsigned long secs; | 1336 | unsigned long secs; |
1308 | int err; | ||
1309 | 1337 | ||
1310 | err = strict_strtoul(buf + 5, 0, &secs); | 1338 | ret = strict_strtoul(buf + 5, 0, &secs); |
1311 | if (err < 0) | 1339 | if (ret < 0) |
1312 | return err; | 1340 | goto out; |
1313 | stop_scan_thread(); | 1341 | stop_scan_thread(); |
1314 | if (secs) { | 1342 | if (secs) { |
1315 | jiffies_scan_wait = msecs_to_jiffies(secs * 1000); | 1343 | jiffies_scan_wait = msecs_to_jiffies(secs * 1000); |
1316 | start_scan_thread(); | 1344 | start_scan_thread(); |
1317 | } | 1345 | } |
1318 | } else | 1346 | } else if (strncmp(buf, "scan", 4) == 0) |
1319 | return -EINVAL; | 1347 | kmemleak_scan(); |
1348 | else | ||
1349 | ret = -EINVAL; | ||
1350 | |||
1351 | out: | ||
1352 | mutex_unlock(&scan_mutex); | ||
1353 | if (ret < 0) | ||
1354 | return ret; | ||
1320 | 1355 | ||
1321 | /* ignore the rest of the buffer, only one command at a time */ | 1356 | /* ignore the rest of the buffer, only one command at a time */ |
1322 | *ppos += size; | 1357 | *ppos += size; |
@@ -1340,14 +1375,12 @@ static int kmemleak_cleanup_thread(void *arg) | |||
1340 | { | 1375 | { |
1341 | struct kmemleak_object *object; | 1376 | struct kmemleak_object *object; |
1342 | 1377 | ||
1343 | mutex_lock(&kmemleak_mutex); | 1378 | mutex_lock(&scan_mutex); |
1344 | stop_scan_thread(); | 1379 | stop_scan_thread(); |
1345 | mutex_unlock(&kmemleak_mutex); | ||
1346 | 1380 | ||
1347 | mutex_lock(&scan_mutex); | ||
1348 | rcu_read_lock(); | 1381 | rcu_read_lock(); |
1349 | list_for_each_entry_rcu(object, &object_list, object_list) | 1382 | list_for_each_entry_rcu(object, &object_list, object_list) |
1350 | delete_object(object->pointer); | 1383 | delete_object_full(object->pointer); |
1351 | rcu_read_unlock(); | 1384 | rcu_read_unlock(); |
1352 | mutex_unlock(&scan_mutex); | 1385 | mutex_unlock(&scan_mutex); |
1353 | 1386 | ||
@@ -1411,7 +1444,6 @@ void __init kmemleak_init(void) | |||
1411 | int i; | 1444 | int i; |
1412 | unsigned long flags; | 1445 | unsigned long flags; |
1413 | 1446 | ||
1414 | jiffies_scan_yield = msecs_to_jiffies(MSECS_SCAN_YIELD); | ||
1415 | jiffies_min_age = msecs_to_jiffies(MSECS_MIN_AGE); | 1447 | jiffies_min_age = msecs_to_jiffies(MSECS_MIN_AGE); |
1416 | jiffies_scan_wait = msecs_to_jiffies(SECS_SCAN_WAIT * 1000); | 1448 | jiffies_scan_wait = msecs_to_jiffies(SECS_SCAN_WAIT * 1000); |
1417 | 1449 | ||
@@ -1443,6 +1475,9 @@ void __init kmemleak_init(void) | |||
1443 | case KMEMLEAK_FREE: | 1475 | case KMEMLEAK_FREE: |
1444 | kmemleak_free(log->ptr); | 1476 | kmemleak_free(log->ptr); |
1445 | break; | 1477 | break; |
1478 | case KMEMLEAK_FREE_PART: | ||
1479 | kmemleak_free_part(log->ptr, log->size); | ||
1480 | break; | ||
1446 | case KMEMLEAK_NOT_LEAK: | 1481 | case KMEMLEAK_NOT_LEAK: |
1447 | kmemleak_not_leak(log->ptr); | 1482 | kmemleak_not_leak(log->ptr); |
1448 | break; | 1483 | break; |
@@ -1486,9 +1521,9 @@ static int __init kmemleak_late_init(void) | |||
1486 | &kmemleak_fops); | 1521 | &kmemleak_fops); |
1487 | if (!dentry) | 1522 | if (!dentry) |
1488 | pr_warning("Failed to create the debugfs kmemleak file\n"); | 1523 | pr_warning("Failed to create the debugfs kmemleak file\n"); |
1489 | mutex_lock(&kmemleak_mutex); | 1524 | mutex_lock(&scan_mutex); |
1490 | start_scan_thread(); | 1525 | start_scan_thread(); |
1491 | mutex_unlock(&kmemleak_mutex); | 1526 | mutex_unlock(&scan_mutex); |
1492 | 1527 | ||
1493 | pr_info("Kernel memory leak detector initialized\n"); | 1528 | pr_info("Kernel memory leak detector initialized\n"); |
1494 | 1529 | ||
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e2fa20dadf40..fd4529d86de5 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -1207,6 +1207,12 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, | |||
1207 | ret = 0; | 1207 | ret = 0; |
1208 | out: | 1208 | out: |
1209 | unlock_page_cgroup(pc); | 1209 | unlock_page_cgroup(pc); |
1210 | /* | ||
1211 | * We charges against "to" which may not have any tasks. Then, "to" | ||
1212 | * can be under rmdir(). But in current implementation, caller of | ||
1213 | * this function is just force_empty() and it's garanteed that | ||
1214 | * "to" is never removed. So, we don't check rmdir status here. | ||
1215 | */ | ||
1210 | return ret; | 1216 | return ret; |
1211 | } | 1217 | } |
1212 | 1218 | ||
@@ -1428,6 +1434,7 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr, | |||
1428 | return; | 1434 | return; |
1429 | if (!ptr) | 1435 | if (!ptr) |
1430 | return; | 1436 | return; |
1437 | cgroup_exclude_rmdir(&ptr->css); | ||
1431 | pc = lookup_page_cgroup(page); | 1438 | pc = lookup_page_cgroup(page); |
1432 | mem_cgroup_lru_del_before_commit_swapcache(page); | 1439 | mem_cgroup_lru_del_before_commit_swapcache(page); |
1433 | __mem_cgroup_commit_charge(ptr, pc, ctype); | 1440 | __mem_cgroup_commit_charge(ptr, pc, ctype); |
@@ -1457,8 +1464,12 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr, | |||
1457 | } | 1464 | } |
1458 | rcu_read_unlock(); | 1465 | rcu_read_unlock(); |
1459 | } | 1466 | } |
1460 | /* add this page(page_cgroup) to the LRU we want. */ | 1467 | /* |
1461 | 1468 | * At swapin, we may charge account against cgroup which has no tasks. | |
1469 | * So, rmdir()->pre_destroy() can be called while we do this charge. | ||
1470 | * In that case, we need to call pre_destroy() again. check it here. | ||
1471 | */ | ||
1472 | cgroup_release_and_wakeup_rmdir(&ptr->css); | ||
1462 | } | 1473 | } |
1463 | 1474 | ||
1464 | void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr) | 1475 | void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr) |
@@ -1664,7 +1675,7 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem, | |||
1664 | 1675 | ||
1665 | if (!mem) | 1676 | if (!mem) |
1666 | return; | 1677 | return; |
1667 | 1678 | cgroup_exclude_rmdir(&mem->css); | |
1668 | /* at migration success, oldpage->mapping is NULL. */ | 1679 | /* at migration success, oldpage->mapping is NULL. */ |
1669 | if (oldpage->mapping) { | 1680 | if (oldpage->mapping) { |
1670 | target = oldpage; | 1681 | target = oldpage; |
@@ -1704,6 +1715,12 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem, | |||
1704 | */ | 1715 | */ |
1705 | if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED) | 1716 | if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED) |
1706 | mem_cgroup_uncharge_page(target); | 1717 | mem_cgroup_uncharge_page(target); |
1718 | /* | ||
1719 | * At migration, we may charge account against cgroup which has no tasks | ||
1720 | * So, rmdir()->pre_destroy() can be called while we do this charge. | ||
1721 | * In that case, we need to call pre_destroy() again. check it here. | ||
1722 | */ | ||
1723 | cgroup_release_and_wakeup_rmdir(&mem->css); | ||
1707 | } | 1724 | } |
1708 | 1725 | ||
1709 | /* | 1726 | /* |
@@ -1973,7 +1990,7 @@ try_to_free: | |||
1973 | if (!progress) { | 1990 | if (!progress) { |
1974 | nr_retries--; | 1991 | nr_retries--; |
1975 | /* maybe some writeback is necessary */ | 1992 | /* maybe some writeback is necessary */ |
1976 | congestion_wait(WRITE, HZ/10); | 1993 | congestion_wait(BLK_RW_ASYNC, HZ/10); |
1977 | } | 1994 | } |
1978 | 1995 | ||
1979 | } | 1996 | } |
diff --git a/mm/memory.c b/mm/memory.c index f46ac18ba231..aede2ce3aba4 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -135,11 +135,12 @@ void pmd_clear_bad(pmd_t *pmd) | |||
135 | * Note: this doesn't free the actual pages themselves. That | 135 | * Note: this doesn't free the actual pages themselves. That |
136 | * has been handled earlier when unmapping all the memory regions. | 136 | * has been handled earlier when unmapping all the memory regions. |
137 | */ | 137 | */ |
138 | static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd) | 138 | static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, |
139 | unsigned long addr) | ||
139 | { | 140 | { |
140 | pgtable_t token = pmd_pgtable(*pmd); | 141 | pgtable_t token = pmd_pgtable(*pmd); |
141 | pmd_clear(pmd); | 142 | pmd_clear(pmd); |
142 | pte_free_tlb(tlb, token); | 143 | pte_free_tlb(tlb, token, addr); |
143 | tlb->mm->nr_ptes--; | 144 | tlb->mm->nr_ptes--; |
144 | } | 145 | } |
145 | 146 | ||
@@ -157,7 +158,7 @@ static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud, | |||
157 | next = pmd_addr_end(addr, end); | 158 | next = pmd_addr_end(addr, end); |
158 | if (pmd_none_or_clear_bad(pmd)) | 159 | if (pmd_none_or_clear_bad(pmd)) |
159 | continue; | 160 | continue; |
160 | free_pte_range(tlb, pmd); | 161 | free_pte_range(tlb, pmd, addr); |
161 | } while (pmd++, addr = next, addr != end); | 162 | } while (pmd++, addr = next, addr != end); |
162 | 163 | ||
163 | start &= PUD_MASK; | 164 | start &= PUD_MASK; |
@@ -173,7 +174,7 @@ static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud, | |||
173 | 174 | ||
174 | pmd = pmd_offset(pud, start); | 175 | pmd = pmd_offset(pud, start); |
175 | pud_clear(pud); | 176 | pud_clear(pud); |
176 | pmd_free_tlb(tlb, pmd); | 177 | pmd_free_tlb(tlb, pmd, start); |
177 | } | 178 | } |
178 | 179 | ||
179 | static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, | 180 | static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, |
@@ -206,7 +207,7 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, | |||
206 | 207 | ||
207 | pud = pud_offset(pgd, start); | 208 | pud = pud_offset(pgd, start); |
208 | pgd_clear(pgd); | 209 | pgd_clear(pgd); |
209 | pud_free_tlb(tlb, pud); | 210 | pud_free_tlb(tlb, pud, start); |
210 | } | 211 | } |
211 | 212 | ||
212 | /* | 213 | /* |
@@ -1207,8 +1208,8 @@ static inline int use_zero_page(struct vm_area_struct *vma) | |||
1207 | 1208 | ||
1208 | 1209 | ||
1209 | int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | 1210 | int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, |
1210 | unsigned long start, int len, int flags, | 1211 | unsigned long start, int nr_pages, int flags, |
1211 | struct page **pages, struct vm_area_struct **vmas) | 1212 | struct page **pages, struct vm_area_struct **vmas) |
1212 | { | 1213 | { |
1213 | int i; | 1214 | int i; |
1214 | unsigned int vm_flags = 0; | 1215 | unsigned int vm_flags = 0; |
@@ -1217,7 +1218,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1217 | int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS); | 1218 | int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS); |
1218 | int ignore_sigkill = !!(flags & GUP_FLAGS_IGNORE_SIGKILL); | 1219 | int ignore_sigkill = !!(flags & GUP_FLAGS_IGNORE_SIGKILL); |
1219 | 1220 | ||
1220 | if (len <= 0) | 1221 | if (nr_pages <= 0) |
1221 | return 0; | 1222 | return 0; |
1222 | /* | 1223 | /* |
1223 | * Require read or write permissions. | 1224 | * Require read or write permissions. |
@@ -1269,7 +1270,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1269 | vmas[i] = gate_vma; | 1270 | vmas[i] = gate_vma; |
1270 | i++; | 1271 | i++; |
1271 | start += PAGE_SIZE; | 1272 | start += PAGE_SIZE; |
1272 | len--; | 1273 | nr_pages--; |
1273 | continue; | 1274 | continue; |
1274 | } | 1275 | } |
1275 | 1276 | ||
@@ -1280,7 +1281,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1280 | 1281 | ||
1281 | if (is_vm_hugetlb_page(vma)) { | 1282 | if (is_vm_hugetlb_page(vma)) { |
1282 | i = follow_hugetlb_page(mm, vma, pages, vmas, | 1283 | i = follow_hugetlb_page(mm, vma, pages, vmas, |
1283 | &start, &len, i, write); | 1284 | &start, &nr_pages, i, write); |
1284 | continue; | 1285 | continue; |
1285 | } | 1286 | } |
1286 | 1287 | ||
@@ -1357,9 +1358,9 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1357 | vmas[i] = vma; | 1358 | vmas[i] = vma; |
1358 | i++; | 1359 | i++; |
1359 | start += PAGE_SIZE; | 1360 | start += PAGE_SIZE; |
1360 | len--; | 1361 | nr_pages--; |
1361 | } while (len && start < vma->vm_end); | 1362 | } while (nr_pages && start < vma->vm_end); |
1362 | } while (len); | 1363 | } while (nr_pages); |
1363 | return i; | 1364 | return i; |
1364 | } | 1365 | } |
1365 | 1366 | ||
@@ -1368,7 +1369,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1368 | * @tsk: task_struct of target task | 1369 | * @tsk: task_struct of target task |
1369 | * @mm: mm_struct of target mm | 1370 | * @mm: mm_struct of target mm |
1370 | * @start: starting user address | 1371 | * @start: starting user address |
1371 | * @len: number of pages from start to pin | 1372 | * @nr_pages: number of pages from start to pin |
1372 | * @write: whether pages will be written to by the caller | 1373 | * @write: whether pages will be written to by the caller |
1373 | * @force: whether to force write access even if user mapping is | 1374 | * @force: whether to force write access even if user mapping is |
1374 | * readonly. This will result in the page being COWed even | 1375 | * readonly. This will result in the page being COWed even |
@@ -1380,7 +1381,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1380 | * Or NULL if the caller does not require them. | 1381 | * Or NULL if the caller does not require them. |
1381 | * | 1382 | * |
1382 | * Returns number of pages pinned. This may be fewer than the number | 1383 | * Returns number of pages pinned. This may be fewer than the number |
1383 | * requested. If len is 0 or negative, returns 0. If no pages | 1384 | * requested. If nr_pages is 0 or negative, returns 0. If no pages |
1384 | * were pinned, returns -errno. Each page returned must be released | 1385 | * were pinned, returns -errno. Each page returned must be released |
1385 | * with a put_page() call when it is finished with. vmas will only | 1386 | * with a put_page() call when it is finished with. vmas will only |
1386 | * remain valid while mmap_sem is held. | 1387 | * remain valid while mmap_sem is held. |
@@ -1414,7 +1415,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1414 | * See also get_user_pages_fast, for performance critical applications. | 1415 | * See also get_user_pages_fast, for performance critical applications. |
1415 | */ | 1416 | */ |
1416 | int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | 1417 | int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, |
1417 | unsigned long start, int len, int write, int force, | 1418 | unsigned long start, int nr_pages, int write, int force, |
1418 | struct page **pages, struct vm_area_struct **vmas) | 1419 | struct page **pages, struct vm_area_struct **vmas) |
1419 | { | 1420 | { |
1420 | int flags = 0; | 1421 | int flags = 0; |
@@ -1424,9 +1425,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1424 | if (force) | 1425 | if (force) |
1425 | flags |= GUP_FLAGS_FORCE; | 1426 | flags |= GUP_FLAGS_FORCE; |
1426 | 1427 | ||
1427 | return __get_user_pages(tsk, mm, | 1428 | return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas); |
1428 | start, len, flags, | ||
1429 | pages, vmas); | ||
1430 | } | 1429 | } |
1431 | 1430 | ||
1432 | EXPORT_SYMBOL(get_user_pages); | 1431 | EXPORT_SYMBOL(get_user_pages); |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index e08e2c4da63a..7dd9d9f80694 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -191,25 +191,27 @@ static int mpol_new_bind(struct mempolicy *pol, const nodemask_t *nodes) | |||
191 | * Must be called holding task's alloc_lock to protect task's mems_allowed | 191 | * Must be called holding task's alloc_lock to protect task's mems_allowed |
192 | * and mempolicy. May also be called holding the mmap_semaphore for write. | 192 | * and mempolicy. May also be called holding the mmap_semaphore for write. |
193 | */ | 193 | */ |
194 | static int mpol_set_nodemask(struct mempolicy *pol, const nodemask_t *nodes) | 194 | static int mpol_set_nodemask(struct mempolicy *pol, |
195 | const nodemask_t *nodes, struct nodemask_scratch *nsc) | ||
195 | { | 196 | { |
196 | nodemask_t cpuset_context_nmask; | ||
197 | int ret; | 197 | int ret; |
198 | 198 | ||
199 | /* if mode is MPOL_DEFAULT, pol is NULL. This is right. */ | 199 | /* if mode is MPOL_DEFAULT, pol is NULL. This is right. */ |
200 | if (pol == NULL) | 200 | if (pol == NULL) |
201 | return 0; | 201 | return 0; |
202 | /* Check N_HIGH_MEMORY */ | ||
203 | nodes_and(nsc->mask1, | ||
204 | cpuset_current_mems_allowed, node_states[N_HIGH_MEMORY]); | ||
202 | 205 | ||
203 | VM_BUG_ON(!nodes); | 206 | VM_BUG_ON(!nodes); |
204 | if (pol->mode == MPOL_PREFERRED && nodes_empty(*nodes)) | 207 | if (pol->mode == MPOL_PREFERRED && nodes_empty(*nodes)) |
205 | nodes = NULL; /* explicit local allocation */ | 208 | nodes = NULL; /* explicit local allocation */ |
206 | else { | 209 | else { |
207 | if (pol->flags & MPOL_F_RELATIVE_NODES) | 210 | if (pol->flags & MPOL_F_RELATIVE_NODES) |
208 | mpol_relative_nodemask(&cpuset_context_nmask, nodes, | 211 | mpol_relative_nodemask(&nsc->mask2, nodes,&nsc->mask1); |
209 | &cpuset_current_mems_allowed); | ||
210 | else | 212 | else |
211 | nodes_and(cpuset_context_nmask, *nodes, | 213 | nodes_and(nsc->mask2, *nodes, nsc->mask1); |
212 | cpuset_current_mems_allowed); | 214 | |
213 | if (mpol_store_user_nodemask(pol)) | 215 | if (mpol_store_user_nodemask(pol)) |
214 | pol->w.user_nodemask = *nodes; | 216 | pol->w.user_nodemask = *nodes; |
215 | else | 217 | else |
@@ -217,8 +219,10 @@ static int mpol_set_nodemask(struct mempolicy *pol, const nodemask_t *nodes) | |||
217 | cpuset_current_mems_allowed; | 219 | cpuset_current_mems_allowed; |
218 | } | 220 | } |
219 | 221 | ||
220 | ret = mpol_ops[pol->mode].create(pol, | 222 | if (nodes) |
221 | nodes ? &cpuset_context_nmask : NULL); | 223 | ret = mpol_ops[pol->mode].create(pol, &nsc->mask2); |
224 | else | ||
225 | ret = mpol_ops[pol->mode].create(pol, NULL); | ||
222 | return ret; | 226 | return ret; |
223 | } | 227 | } |
224 | 228 | ||
@@ -620,12 +624,17 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags, | |||
620 | { | 624 | { |
621 | struct mempolicy *new, *old; | 625 | struct mempolicy *new, *old; |
622 | struct mm_struct *mm = current->mm; | 626 | struct mm_struct *mm = current->mm; |
627 | NODEMASK_SCRATCH(scratch); | ||
623 | int ret; | 628 | int ret; |
624 | 629 | ||
625 | new = mpol_new(mode, flags, nodes); | 630 | if (!scratch) |
626 | if (IS_ERR(new)) | 631 | return -ENOMEM; |
627 | return PTR_ERR(new); | ||
628 | 632 | ||
633 | new = mpol_new(mode, flags, nodes); | ||
634 | if (IS_ERR(new)) { | ||
635 | ret = PTR_ERR(new); | ||
636 | goto out; | ||
637 | } | ||
629 | /* | 638 | /* |
630 | * prevent changing our mempolicy while show_numa_maps() | 639 | * prevent changing our mempolicy while show_numa_maps() |
631 | * is using it. | 640 | * is using it. |
@@ -635,13 +644,13 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags, | |||
635 | if (mm) | 644 | if (mm) |
636 | down_write(&mm->mmap_sem); | 645 | down_write(&mm->mmap_sem); |
637 | task_lock(current); | 646 | task_lock(current); |
638 | ret = mpol_set_nodemask(new, nodes); | 647 | ret = mpol_set_nodemask(new, nodes, scratch); |
639 | if (ret) { | 648 | if (ret) { |
640 | task_unlock(current); | 649 | task_unlock(current); |
641 | if (mm) | 650 | if (mm) |
642 | up_write(&mm->mmap_sem); | 651 | up_write(&mm->mmap_sem); |
643 | mpol_put(new); | 652 | mpol_put(new); |
644 | return ret; | 653 | goto out; |
645 | } | 654 | } |
646 | old = current->mempolicy; | 655 | old = current->mempolicy; |
647 | current->mempolicy = new; | 656 | current->mempolicy = new; |
@@ -654,7 +663,10 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags, | |||
654 | up_write(&mm->mmap_sem); | 663 | up_write(&mm->mmap_sem); |
655 | 664 | ||
656 | mpol_put(old); | 665 | mpol_put(old); |
657 | return 0; | 666 | ret = 0; |
667 | out: | ||
668 | NODEMASK_SCRATCH_FREE(scratch); | ||
669 | return ret; | ||
658 | } | 670 | } |
659 | 671 | ||
660 | /* | 672 | /* |
@@ -1014,12 +1026,20 @@ static long do_mbind(unsigned long start, unsigned long len, | |||
1014 | if (err) | 1026 | if (err) |
1015 | return err; | 1027 | return err; |
1016 | } | 1028 | } |
1017 | down_write(&mm->mmap_sem); | 1029 | { |
1018 | task_lock(current); | 1030 | NODEMASK_SCRATCH(scratch); |
1019 | err = mpol_set_nodemask(new, nmask); | 1031 | if (scratch) { |
1020 | task_unlock(current); | 1032 | down_write(&mm->mmap_sem); |
1033 | task_lock(current); | ||
1034 | err = mpol_set_nodemask(new, nmask, scratch); | ||
1035 | task_unlock(current); | ||
1036 | if (err) | ||
1037 | up_write(&mm->mmap_sem); | ||
1038 | } else | ||
1039 | err = -ENOMEM; | ||
1040 | NODEMASK_SCRATCH_FREE(scratch); | ||
1041 | } | ||
1021 | if (err) { | 1042 | if (err) { |
1022 | up_write(&mm->mmap_sem); | ||
1023 | mpol_put(new); | 1043 | mpol_put(new); |
1024 | return err; | 1044 | return err; |
1025 | } | 1045 | } |
@@ -1891,6 +1911,7 @@ restart: | |||
1891 | * Install non-NULL @mpol in inode's shared policy rb-tree. | 1911 | * Install non-NULL @mpol in inode's shared policy rb-tree. |
1892 | * On entry, the current task has a reference on a non-NULL @mpol. | 1912 | * On entry, the current task has a reference on a non-NULL @mpol. |
1893 | * This must be released on exit. | 1913 | * This must be released on exit. |
1914 | * This is called at get_inode() calls and we can use GFP_KERNEL. | ||
1894 | */ | 1915 | */ |
1895 | void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) | 1916 | void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) |
1896 | { | 1917 | { |
@@ -1902,19 +1923,24 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) | |||
1902 | if (mpol) { | 1923 | if (mpol) { |
1903 | struct vm_area_struct pvma; | 1924 | struct vm_area_struct pvma; |
1904 | struct mempolicy *new; | 1925 | struct mempolicy *new; |
1926 | NODEMASK_SCRATCH(scratch); | ||
1905 | 1927 | ||
1928 | if (!scratch) | ||
1929 | return; | ||
1906 | /* contextualize the tmpfs mount point mempolicy */ | 1930 | /* contextualize the tmpfs mount point mempolicy */ |
1907 | new = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask); | 1931 | new = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask); |
1908 | if (IS_ERR(new)) { | 1932 | if (IS_ERR(new)) { |
1909 | mpol_put(mpol); /* drop our ref on sb mpol */ | 1933 | mpol_put(mpol); /* drop our ref on sb mpol */ |
1934 | NODEMASK_SCRATCH_FREE(scratch); | ||
1910 | return; /* no valid nodemask intersection */ | 1935 | return; /* no valid nodemask intersection */ |
1911 | } | 1936 | } |
1912 | 1937 | ||
1913 | task_lock(current); | 1938 | task_lock(current); |
1914 | ret = mpol_set_nodemask(new, &mpol->w.user_nodemask); | 1939 | ret = mpol_set_nodemask(new, &mpol->w.user_nodemask, scratch); |
1915 | task_unlock(current); | 1940 | task_unlock(current); |
1916 | mpol_put(mpol); /* drop our ref on sb mpol */ | 1941 | mpol_put(mpol); /* drop our ref on sb mpol */ |
1917 | if (ret) { | 1942 | if (ret) { |
1943 | NODEMASK_SCRATCH_FREE(scratch); | ||
1918 | mpol_put(new); | 1944 | mpol_put(new); |
1919 | return; | 1945 | return; |
1920 | } | 1946 | } |
@@ -1924,6 +1950,7 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) | |||
1924 | pvma.vm_end = TASK_SIZE; /* policy covers entire file */ | 1950 | pvma.vm_end = TASK_SIZE; /* policy covers entire file */ |
1925 | mpol_set_shared_policy(sp, &pvma, new); /* adds ref */ | 1951 | mpol_set_shared_policy(sp, &pvma, new); /* adds ref */ |
1926 | mpol_put(new); /* drop initial ref */ | 1952 | mpol_put(new); /* drop initial ref */ |
1953 | NODEMASK_SCRATCH_FREE(scratch); | ||
1927 | } | 1954 | } |
1928 | } | 1955 | } |
1929 | 1956 | ||
@@ -2140,13 +2167,18 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) | |||
2140 | err = 1; | 2167 | err = 1; |
2141 | else { | 2168 | else { |
2142 | int ret; | 2169 | int ret; |
2143 | 2170 | NODEMASK_SCRATCH(scratch); | |
2144 | task_lock(current); | 2171 | if (scratch) { |
2145 | ret = mpol_set_nodemask(new, &nodes); | 2172 | task_lock(current); |
2146 | task_unlock(current); | 2173 | ret = mpol_set_nodemask(new, &nodes, scratch); |
2147 | if (ret) | 2174 | task_unlock(current); |
2175 | } else | ||
2176 | ret = -ENOMEM; | ||
2177 | NODEMASK_SCRATCH_FREE(scratch); | ||
2178 | if (ret) { | ||
2148 | err = 1; | 2179 | err = 1; |
2149 | else if (no_context) { | 2180 | mpol_put(new); |
2181 | } else if (no_context) { | ||
2150 | /* save for contextualization */ | 2182 | /* save for contextualization */ |
2151 | new->w.user_nodemask = nodes; | 2183 | new->w.user_nodemask = nodes; |
2152 | } | 2184 | } |
diff --git a/mm/mempool.c b/mm/mempool.c index a46eb1b4bb66..32e75d400503 100644 --- a/mm/mempool.c +++ b/mm/mempool.c | |||
@@ -303,14 +303,14 @@ EXPORT_SYMBOL(mempool_free_slab); | |||
303 | */ | 303 | */ |
304 | void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data) | 304 | void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data) |
305 | { | 305 | { |
306 | size_t size = (size_t)(long)pool_data; | 306 | size_t size = (size_t)pool_data; |
307 | return kmalloc(size, gfp_mask); | 307 | return kmalloc(size, gfp_mask); |
308 | } | 308 | } |
309 | EXPORT_SYMBOL(mempool_kmalloc); | 309 | EXPORT_SYMBOL(mempool_kmalloc); |
310 | 310 | ||
311 | void *mempool_kzalloc(gfp_t gfp_mask, void *pool_data) | 311 | void *mempool_kzalloc(gfp_t gfp_mask, void *pool_data) |
312 | { | 312 | { |
313 | size_t size = (size_t) pool_data; | 313 | size_t size = (size_t)pool_data; |
314 | return kzalloc(size, gfp_mask); | 314 | return kzalloc(size, gfp_mask); |
315 | } | 315 | } |
316 | EXPORT_SYMBOL(mempool_kzalloc); | 316 | EXPORT_SYMBOL(mempool_kzalloc); |
diff --git a/mm/nommu.c b/mm/nommu.c index 2fd2ad5da98e..53cab10fece4 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -173,8 +173,8 @@ unsigned int kobjsize(const void *objp) | |||
173 | } | 173 | } |
174 | 174 | ||
175 | int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | 175 | int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, |
176 | unsigned long start, int len, int flags, | 176 | unsigned long start, int nr_pages, int flags, |
177 | struct page **pages, struct vm_area_struct **vmas) | 177 | struct page **pages, struct vm_area_struct **vmas) |
178 | { | 178 | { |
179 | struct vm_area_struct *vma; | 179 | struct vm_area_struct *vma; |
180 | unsigned long vm_flags; | 180 | unsigned long vm_flags; |
@@ -189,7 +189,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
189 | vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); | 189 | vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); |
190 | vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); | 190 | vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); |
191 | 191 | ||
192 | for (i = 0; i < len; i++) { | 192 | for (i = 0; i < nr_pages; i++) { |
193 | vma = find_vma(mm, start); | 193 | vma = find_vma(mm, start); |
194 | if (!vma) | 194 | if (!vma) |
195 | goto finish_or_fault; | 195 | goto finish_or_fault; |
@@ -224,7 +224,7 @@ finish_or_fault: | |||
224 | * - don't permit access to VMAs that don't support it, such as I/O mappings | 224 | * - don't permit access to VMAs that don't support it, such as I/O mappings |
225 | */ | 225 | */ |
226 | int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | 226 | int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, |
227 | unsigned long start, int len, int write, int force, | 227 | unsigned long start, int nr_pages, int write, int force, |
228 | struct page **pages, struct vm_area_struct **vmas) | 228 | struct page **pages, struct vm_area_struct **vmas) |
229 | { | 229 | { |
230 | int flags = 0; | 230 | int flags = 0; |
@@ -234,12 +234,31 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
234 | if (force) | 234 | if (force) |
235 | flags |= GUP_FLAGS_FORCE; | 235 | flags |= GUP_FLAGS_FORCE; |
236 | 236 | ||
237 | return __get_user_pages(tsk, mm, | 237 | return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas); |
238 | start, len, flags, | ||
239 | pages, vmas); | ||
240 | } | 238 | } |
241 | EXPORT_SYMBOL(get_user_pages); | 239 | EXPORT_SYMBOL(get_user_pages); |
242 | 240 | ||
241 | /** | ||
242 | * follow_pfn - look up PFN at a user virtual address | ||
243 | * @vma: memory mapping | ||
244 | * @address: user virtual address | ||
245 | * @pfn: location to store found PFN | ||
246 | * | ||
247 | * Only IO mappings and raw PFN mappings are allowed. | ||
248 | * | ||
249 | * Returns zero and the pfn at @pfn on success, -ve otherwise. | ||
250 | */ | ||
251 | int follow_pfn(struct vm_area_struct *vma, unsigned long address, | ||
252 | unsigned long *pfn) | ||
253 | { | ||
254 | if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) | ||
255 | return -EINVAL; | ||
256 | |||
257 | *pfn = address >> PAGE_SHIFT; | ||
258 | return 0; | ||
259 | } | ||
260 | EXPORT_SYMBOL(follow_pfn); | ||
261 | |||
243 | DEFINE_RWLOCK(vmlist_lock); | 262 | DEFINE_RWLOCK(vmlist_lock); |
244 | struct vm_struct *vmlist; | 263 | struct vm_struct *vmlist; |
245 | 264 | ||
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 7b0dcea4935b..81627ebcd313 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -541,8 +541,11 @@ static void balance_dirty_pages(struct address_space *mapping) | |||
541 | * filesystems (i.e. NFS) in which data may have been | 541 | * filesystems (i.e. NFS) in which data may have been |
542 | * written to the server's write cache, but has not yet | 542 | * written to the server's write cache, but has not yet |
543 | * been flushed to permanent storage. | 543 | * been flushed to permanent storage. |
544 | * Only move pages to writeback if this bdi is over its | ||
545 | * threshold otherwise wait until the disk writes catch | ||
546 | * up. | ||
544 | */ | 547 | */ |
545 | if (bdi_nr_reclaimable) { | 548 | if (bdi_nr_reclaimable > bdi_thresh) { |
546 | writeback_inodes(&wbc); | 549 | writeback_inodes(&wbc); |
547 | pages_written += write_chunk - wbc.nr_to_write; | 550 | pages_written += write_chunk - wbc.nr_to_write; |
548 | get_dirty_limits(&background_thresh, &dirty_thresh, | 551 | get_dirty_limits(&background_thresh, &dirty_thresh, |
@@ -572,7 +575,7 @@ static void balance_dirty_pages(struct address_space *mapping) | |||
572 | if (pages_written >= write_chunk) | 575 | if (pages_written >= write_chunk) |
573 | break; /* We've done our duty */ | 576 | break; /* We've done our duty */ |
574 | 577 | ||
575 | congestion_wait(WRITE, HZ/10); | 578 | congestion_wait(BLK_RW_ASYNC, HZ/10); |
576 | } | 579 | } |
577 | 580 | ||
578 | if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh && | 581 | if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh && |
@@ -666,7 +669,7 @@ void throttle_vm_writeout(gfp_t gfp_mask) | |||
666 | if (global_page_state(NR_UNSTABLE_NFS) + | 669 | if (global_page_state(NR_UNSTABLE_NFS) + |
667 | global_page_state(NR_WRITEBACK) <= dirty_thresh) | 670 | global_page_state(NR_WRITEBACK) <= dirty_thresh) |
668 | break; | 671 | break; |
669 | congestion_wait(WRITE, HZ/10); | 672 | congestion_wait(BLK_RW_ASYNC, HZ/10); |
670 | 673 | ||
671 | /* | 674 | /* |
672 | * The caller might hold locks which can prevent IO completion | 675 | * The caller might hold locks which can prevent IO completion |
@@ -712,7 +715,7 @@ static void background_writeout(unsigned long _min_pages) | |||
712 | if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) { | 715 | if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) { |
713 | /* Wrote less than expected */ | 716 | /* Wrote less than expected */ |
714 | if (wbc.encountered_congestion || wbc.more_io) | 717 | if (wbc.encountered_congestion || wbc.more_io) |
715 | congestion_wait(WRITE, HZ/10); | 718 | congestion_wait(BLK_RW_ASYNC, HZ/10); |
716 | else | 719 | else |
717 | break; | 720 | break; |
718 | } | 721 | } |
@@ -784,7 +787,7 @@ static void wb_kupdate(unsigned long arg) | |||
784 | writeback_inodes(&wbc); | 787 | writeback_inodes(&wbc); |
785 | if (wbc.nr_to_write > 0) { | 788 | if (wbc.nr_to_write > 0) { |
786 | if (wbc.encountered_congestion || wbc.more_io) | 789 | if (wbc.encountered_congestion || wbc.more_io) |
787 | congestion_wait(WRITE, HZ/10); | 790 | congestion_wait(BLK_RW_ASYNC, HZ/10); |
788 | else | 791 | else |
789 | break; /* All the old data is written */ | 792 | break; /* All the old data is written */ |
790 | } | 793 | } |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5d714f8fb303..d052abbe3063 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -882,7 +882,7 @@ retry_reserve: | |||
882 | */ | 882 | */ |
883 | static int rmqueue_bulk(struct zone *zone, unsigned int order, | 883 | static int rmqueue_bulk(struct zone *zone, unsigned int order, |
884 | unsigned long count, struct list_head *list, | 884 | unsigned long count, struct list_head *list, |
885 | int migratetype) | 885 | int migratetype, int cold) |
886 | { | 886 | { |
887 | int i; | 887 | int i; |
888 | 888 | ||
@@ -901,7 +901,10 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, | |||
901 | * merge IO requests if the physical pages are ordered | 901 | * merge IO requests if the physical pages are ordered |
902 | * properly. | 902 | * properly. |
903 | */ | 903 | */ |
904 | list_add(&page->lru, list); | 904 | if (likely(cold == 0)) |
905 | list_add(&page->lru, list); | ||
906 | else | ||
907 | list_add_tail(&page->lru, list); | ||
905 | set_page_private(page, migratetype); | 908 | set_page_private(page, migratetype); |
906 | list = &page->lru; | 909 | list = &page->lru; |
907 | } | 910 | } |
@@ -1119,7 +1122,8 @@ again: | |||
1119 | local_irq_save(flags); | 1122 | local_irq_save(flags); |
1120 | if (!pcp->count) { | 1123 | if (!pcp->count) { |
1121 | pcp->count = rmqueue_bulk(zone, 0, | 1124 | pcp->count = rmqueue_bulk(zone, 0, |
1122 | pcp->batch, &pcp->list, migratetype); | 1125 | pcp->batch, &pcp->list, |
1126 | migratetype, cold); | ||
1123 | if (unlikely(!pcp->count)) | 1127 | if (unlikely(!pcp->count)) |
1124 | goto failed; | 1128 | goto failed; |
1125 | } | 1129 | } |
@@ -1138,7 +1142,8 @@ again: | |||
1138 | /* Allocate more to the pcp list if necessary */ | 1142 | /* Allocate more to the pcp list if necessary */ |
1139 | if (unlikely(&page->lru == &pcp->list)) { | 1143 | if (unlikely(&page->lru == &pcp->list)) { |
1140 | pcp->count += rmqueue_bulk(zone, 0, | 1144 | pcp->count += rmqueue_bulk(zone, 0, |
1141 | pcp->batch, &pcp->list, migratetype); | 1145 | pcp->batch, &pcp->list, |
1146 | migratetype, cold); | ||
1142 | page = list_entry(pcp->list.next, struct page, lru); | 1147 | page = list_entry(pcp->list.next, struct page, lru); |
1143 | } | 1148 | } |
1144 | 1149 | ||
@@ -1666,7 +1671,7 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order, | |||
1666 | preferred_zone, migratetype); | 1671 | preferred_zone, migratetype); |
1667 | 1672 | ||
1668 | if (!page && gfp_mask & __GFP_NOFAIL) | 1673 | if (!page && gfp_mask & __GFP_NOFAIL) |
1669 | congestion_wait(WRITE, HZ/50); | 1674 | congestion_wait(BLK_RW_ASYNC, HZ/50); |
1670 | } while (!page && (gfp_mask & __GFP_NOFAIL)); | 1675 | } while (!page && (gfp_mask & __GFP_NOFAIL)); |
1671 | 1676 | ||
1672 | return page; | 1677 | return page; |
@@ -1740,8 +1745,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
1740 | * be using allocators in order of preference for an area that is | 1745 | * be using allocators in order of preference for an area that is |
1741 | * too large. | 1746 | * too large. |
1742 | */ | 1747 | */ |
1743 | if (WARN_ON_ONCE(order >= MAX_ORDER)) | 1748 | if (order >= MAX_ORDER) { |
1749 | WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN)); | ||
1744 | return NULL; | 1750 | return NULL; |
1751 | } | ||
1745 | 1752 | ||
1746 | /* | 1753 | /* |
1747 | * GFP_THISNODE (meaning __GFP_THISNODE, __GFP_NORETRY and | 1754 | * GFP_THISNODE (meaning __GFP_THISNODE, __GFP_NORETRY and |
@@ -1789,6 +1796,10 @@ rebalance: | |||
1789 | if (p->flags & PF_MEMALLOC) | 1796 | if (p->flags & PF_MEMALLOC) |
1790 | goto nopage; | 1797 | goto nopage; |
1791 | 1798 | ||
1799 | /* Avoid allocations with no watermarks from looping endlessly */ | ||
1800 | if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL)) | ||
1801 | goto nopage; | ||
1802 | |||
1792 | /* Try direct reclaim and then allocating */ | 1803 | /* Try direct reclaim and then allocating */ |
1793 | page = __alloc_pages_direct_reclaim(gfp_mask, order, | 1804 | page = __alloc_pages_direct_reclaim(gfp_mask, order, |
1794 | zonelist, high_zoneidx, | 1805 | zonelist, high_zoneidx, |
@@ -1831,7 +1842,7 @@ rebalance: | |||
1831 | pages_reclaimed += did_some_progress; | 1842 | pages_reclaimed += did_some_progress; |
1832 | if (should_alloc_retry(gfp_mask, order, pages_reclaimed)) { | 1843 | if (should_alloc_retry(gfp_mask, order, pages_reclaimed)) { |
1833 | /* Wait for some write requests to complete then retry */ | 1844 | /* Wait for some write requests to complete then retry */ |
1834 | congestion_wait(WRITE, HZ/50); | 1845 | congestion_wait(BLK_RW_ASYNC, HZ/50); |
1835 | goto rebalance; | 1846 | goto rebalance; |
1836 | } | 1847 | } |
1837 | 1848 | ||
@@ -1983,7 +1994,7 @@ void *alloc_pages_exact(size_t size, gfp_t gfp_mask) | |||
1983 | unsigned long alloc_end = addr + (PAGE_SIZE << order); | 1994 | unsigned long alloc_end = addr + (PAGE_SIZE << order); |
1984 | unsigned long used = addr + PAGE_ALIGN(size); | 1995 | unsigned long used = addr + PAGE_ALIGN(size); |
1985 | 1996 | ||
1986 | split_page(virt_to_page(addr), order); | 1997 | split_page(virt_to_page((void *)addr), order); |
1987 | while (used < alloc_end) { | 1998 | while (used < alloc_end) { |
1988 | free_page(used); | 1999 | free_page(used); |
1989 | used += PAGE_SIZE; | 2000 | used += PAGE_SIZE; |
@@ -4032,6 +4043,8 @@ static void __init find_zone_movable_pfns_for_nodes(unsigned long *movable_pfn) | |||
4032 | int i, nid; | 4043 | int i, nid; |
4033 | unsigned long usable_startpfn; | 4044 | unsigned long usable_startpfn; |
4034 | unsigned long kernelcore_node, kernelcore_remaining; | 4045 | unsigned long kernelcore_node, kernelcore_remaining; |
4046 | /* save the state before borrow the nodemask */ | ||
4047 | nodemask_t saved_node_state = node_states[N_HIGH_MEMORY]; | ||
4035 | unsigned long totalpages = early_calculate_totalpages(); | 4048 | unsigned long totalpages = early_calculate_totalpages(); |
4036 | int usable_nodes = nodes_weight(node_states[N_HIGH_MEMORY]); | 4049 | int usable_nodes = nodes_weight(node_states[N_HIGH_MEMORY]); |
4037 | 4050 | ||
@@ -4059,7 +4072,7 @@ static void __init find_zone_movable_pfns_for_nodes(unsigned long *movable_pfn) | |||
4059 | 4072 | ||
4060 | /* If kernelcore was not specified, there is no ZONE_MOVABLE */ | 4073 | /* If kernelcore was not specified, there is no ZONE_MOVABLE */ |
4061 | if (!required_kernelcore) | 4074 | if (!required_kernelcore) |
4062 | return; | 4075 | goto out; |
4063 | 4076 | ||
4064 | /* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */ | 4077 | /* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */ |
4065 | find_usable_zone_for_movable(); | 4078 | find_usable_zone_for_movable(); |
@@ -4158,6 +4171,10 @@ restart: | |||
4158 | for (nid = 0; nid < MAX_NUMNODES; nid++) | 4171 | for (nid = 0; nid < MAX_NUMNODES; nid++) |
4159 | zone_movable_pfn[nid] = | 4172 | zone_movable_pfn[nid] = |
4160 | roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES); | 4173 | roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES); |
4174 | |||
4175 | out: | ||
4176 | /* restore the node_state */ | ||
4177 | node_states[N_HIGH_MEMORY] = saved_node_state; | ||
4161 | } | 4178 | } |
4162 | 4179 | ||
4163 | /* Any regular memory on that node ? */ | 4180 | /* Any regular memory on that node ? */ |
@@ -4242,11 +4259,6 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) | |||
4242 | early_node_map[i].start_pfn, | 4259 | early_node_map[i].start_pfn, |
4243 | early_node_map[i].end_pfn); | 4260 | early_node_map[i].end_pfn); |
4244 | 4261 | ||
4245 | /* | ||
4246 | * find_zone_movable_pfns_for_nodes/early_calculate_totalpages init | ||
4247 | * that node_mask, clear it at first | ||
4248 | */ | ||
4249 | nodes_clear(node_states[N_HIGH_MEMORY]); | ||
4250 | /* Initialise every node */ | 4262 | /* Initialise every node */ |
4251 | mminit_verify_pageflags_layout(); | 4263 | mminit_verify_pageflags_layout(); |
4252 | setup_nr_node_ids(); | 4264 | setup_nr_node_ids(); |
@@ -4744,8 +4756,10 @@ void *__init alloc_large_system_hash(const char *tablename, | |||
4744 | * some pages at the end of hash table which | 4756 | * some pages at the end of hash table which |
4745 | * alloc_pages_exact() automatically does | 4757 | * alloc_pages_exact() automatically does |
4746 | */ | 4758 | */ |
4747 | if (get_order(size) < MAX_ORDER) | 4759 | if (get_order(size) < MAX_ORDER) { |
4748 | table = alloc_pages_exact(size, GFP_ATOMIC); | 4760 | table = alloc_pages_exact(size, GFP_ATOMIC); |
4761 | kmemleak_alloc(table, size, 1, GFP_ATOMIC); | ||
4762 | } | ||
4749 | } | 4763 | } |
4750 | } while (!table && size > PAGE_SIZE && --log2qty); | 4764 | } while (!table && size > PAGE_SIZE && --log2qty); |
4751 | 4765 | ||
@@ -4763,16 +4777,6 @@ void *__init alloc_large_system_hash(const char *tablename, | |||
4763 | if (_hash_mask) | 4777 | if (_hash_mask) |
4764 | *_hash_mask = (1 << log2qty) - 1; | 4778 | *_hash_mask = (1 << log2qty) - 1; |
4765 | 4779 | ||
4766 | /* | ||
4767 | * If hashdist is set, the table allocation is done with __vmalloc() | ||
4768 | * which invokes the kmemleak_alloc() callback. This function may also | ||
4769 | * be called before the slab and kmemleak are initialised when | ||
4770 | * kmemleak simply buffers the request to be executed later | ||
4771 | * (GFP_ATOMIC flag ignored in this case). | ||
4772 | */ | ||
4773 | if (!hashdist) | ||
4774 | kmemleak_alloc(table, size, 1, GFP_ATOMIC); | ||
4775 | |||
4776 | return table; | 4780 | return table; |
4777 | } | 4781 | } |
4778 | 4782 | ||
diff --git a/mm/percpu.c b/mm/percpu.c index c0b2c1a76e81..b70f2acd8853 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -549,14 +549,14 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme) | |||
549 | * @chunk: chunk of interest | 549 | * @chunk: chunk of interest |
550 | * @page_start: page index of the first page to unmap | 550 | * @page_start: page index of the first page to unmap |
551 | * @page_end: page index of the last page to unmap + 1 | 551 | * @page_end: page index of the last page to unmap + 1 |
552 | * @flush: whether to flush cache and tlb or not | 552 | * @flush_tlb: whether to flush tlb or not |
553 | * | 553 | * |
554 | * For each cpu, unmap pages [@page_start,@page_end) out of @chunk. | 554 | * For each cpu, unmap pages [@page_start,@page_end) out of @chunk. |
555 | * If @flush is true, vcache is flushed before unmapping and tlb | 555 | * If @flush is true, vcache is flushed before unmapping and tlb |
556 | * after. | 556 | * after. |
557 | */ | 557 | */ |
558 | static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end, | 558 | static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end, |
559 | bool flush) | 559 | bool flush_tlb) |
560 | { | 560 | { |
561 | unsigned int last = num_possible_cpus() - 1; | 561 | unsigned int last = num_possible_cpus() - 1; |
562 | unsigned int cpu; | 562 | unsigned int cpu; |
@@ -569,9 +569,8 @@ static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end, | |||
569 | * the whole region at once rather than doing it for each cpu. | 569 | * the whole region at once rather than doing it for each cpu. |
570 | * This could be an overkill but is more scalable. | 570 | * This could be an overkill but is more scalable. |
571 | */ | 571 | */ |
572 | if (flush) | 572 | flush_cache_vunmap(pcpu_chunk_addr(chunk, 0, page_start), |
573 | flush_cache_vunmap(pcpu_chunk_addr(chunk, 0, page_start), | 573 | pcpu_chunk_addr(chunk, last, page_end)); |
574 | pcpu_chunk_addr(chunk, last, page_end)); | ||
575 | 574 | ||
576 | for_each_possible_cpu(cpu) | 575 | for_each_possible_cpu(cpu) |
577 | unmap_kernel_range_noflush( | 576 | unmap_kernel_range_noflush( |
@@ -579,7 +578,7 @@ static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end, | |||
579 | (page_end - page_start) << PAGE_SHIFT); | 578 | (page_end - page_start) << PAGE_SHIFT); |
580 | 579 | ||
581 | /* ditto as flush_cache_vunmap() */ | 580 | /* ditto as flush_cache_vunmap() */ |
582 | if (flush) | 581 | if (flush_tlb) |
583 | flush_tlb_kernel_range(pcpu_chunk_addr(chunk, 0, page_start), | 582 | flush_tlb_kernel_range(pcpu_chunk_addr(chunk, 0, page_start), |
584 | pcpu_chunk_addr(chunk, last, page_end)); | 583 | pcpu_chunk_addr(chunk, last, page_end)); |
585 | } | 584 | } |
@@ -1234,6 +1233,7 @@ static struct page * __init pcpue_get_page(unsigned int cpu, int pageno) | |||
1234 | ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, | 1233 | ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, |
1235 | ssize_t dyn_size, ssize_t unit_size) | 1234 | ssize_t dyn_size, ssize_t unit_size) |
1236 | { | 1235 | { |
1236 | size_t chunk_size; | ||
1237 | unsigned int cpu; | 1237 | unsigned int cpu; |
1238 | 1238 | ||
1239 | /* determine parameters and allocate */ | 1239 | /* determine parameters and allocate */ |
@@ -1248,11 +1248,15 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, | |||
1248 | } else | 1248 | } else |
1249 | pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); | 1249 | pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); |
1250 | 1250 | ||
1251 | pcpue_ptr = __alloc_bootmem_nopanic( | 1251 | chunk_size = pcpue_unit_size * num_possible_cpus(); |
1252 | num_possible_cpus() * pcpue_unit_size, | 1252 | |
1253 | PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); | 1253 | pcpue_ptr = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE, |
1254 | if (!pcpue_ptr) | 1254 | __pa(MAX_DMA_ADDRESS)); |
1255 | if (!pcpue_ptr) { | ||
1256 | pr_warning("PERCPU: failed to allocate %zu bytes for " | ||
1257 | "embedding\n", chunk_size); | ||
1255 | return -ENOMEM; | 1258 | return -ENOMEM; |
1259 | } | ||
1256 | 1260 | ||
1257 | /* return the leftover and copy */ | 1261 | /* return the leftover and copy */ |
1258 | for_each_possible_cpu(cpu) { | 1262 | for_each_possible_cpu(cpu) { |
@@ -1544,9 +1544,6 @@ void __init kmem_cache_init(void) | |||
1544 | } | 1544 | } |
1545 | 1545 | ||
1546 | g_cpucache_up = EARLY; | 1546 | g_cpucache_up = EARLY; |
1547 | |||
1548 | /* Annotate slab for lockdep -- annotate the malloc caches */ | ||
1549 | init_lock_keys(); | ||
1550 | } | 1547 | } |
1551 | 1548 | ||
1552 | void __init kmem_cache_init_late(void) | 1549 | void __init kmem_cache_init_late(void) |
@@ -1563,6 +1560,9 @@ void __init kmem_cache_init_late(void) | |||
1563 | /* Done! */ | 1560 | /* Done! */ |
1564 | g_cpucache_up = FULL; | 1561 | g_cpucache_up = FULL; |
1565 | 1562 | ||
1563 | /* Annotate slab for lockdep -- annotate the malloc caches */ | ||
1564 | init_lock_keys(); | ||
1565 | |||
1566 | /* | 1566 | /* |
1567 | * Register a cpu startup notifier callback that initializes | 1567 | * Register a cpu startup notifier callback that initializes |
1568 | * cpu_cache_get for all new cpus | 1568 | * cpu_cache_get for all new cpus |
@@ -2547,7 +2547,7 @@ void kmem_cache_destroy(struct kmem_cache *cachep) | |||
2547 | } | 2547 | } |
2548 | 2548 | ||
2549 | if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) | 2549 | if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) |
2550 | synchronize_rcu(); | 2550 | rcu_barrier(); |
2551 | 2551 | ||
2552 | __kmem_cache_destroy(cachep); | 2552 | __kmem_cache_destroy(cachep); |
2553 | mutex_unlock(&cache_chain_mutex); | 2553 | mutex_unlock(&cache_chain_mutex); |
@@ -595,6 +595,8 @@ EXPORT_SYMBOL(kmem_cache_create); | |||
595 | void kmem_cache_destroy(struct kmem_cache *c) | 595 | void kmem_cache_destroy(struct kmem_cache *c) |
596 | { | 596 | { |
597 | kmemleak_free(c); | 597 | kmemleak_free(c); |
598 | if (c->flags & SLAB_DESTROY_BY_RCU) | ||
599 | rcu_barrier(); | ||
598 | slob_free(c, sizeof(struct kmem_cache)); | 600 | slob_free(c, sizeof(struct kmem_cache)); |
599 | } | 601 | } |
600 | EXPORT_SYMBOL(kmem_cache_destroy); | 602 | EXPORT_SYMBOL(kmem_cache_destroy); |
@@ -21,7 +21,6 @@ | |||
21 | #include <linux/kmemcheck.h> | 21 | #include <linux/kmemcheck.h> |
22 | #include <linux/cpu.h> | 22 | #include <linux/cpu.h> |
23 | #include <linux/cpuset.h> | 23 | #include <linux/cpuset.h> |
24 | #include <linux/kmemleak.h> | ||
25 | #include <linux/mempolicy.h> | 24 | #include <linux/mempolicy.h> |
26 | #include <linux/ctype.h> | 25 | #include <linux/ctype.h> |
27 | #include <linux/debugobjects.h> | 26 | #include <linux/debugobjects.h> |
@@ -2595,6 +2594,8 @@ static inline int kmem_cache_close(struct kmem_cache *s) | |||
2595 | */ | 2594 | */ |
2596 | void kmem_cache_destroy(struct kmem_cache *s) | 2595 | void kmem_cache_destroy(struct kmem_cache *s) |
2597 | { | 2596 | { |
2597 | if (s->flags & SLAB_DESTROY_BY_RCU) | ||
2598 | rcu_barrier(); | ||
2598 | down_write(&slub_lock); | 2599 | down_write(&slub_lock); |
2599 | s->refcount--; | 2600 | s->refcount--; |
2600 | if (!s->refcount) { | 2601 | if (!s->refcount) { |
@@ -2833,13 +2834,15 @@ EXPORT_SYMBOL(__kmalloc); | |||
2833 | static void *kmalloc_large_node(size_t size, gfp_t flags, int node) | 2834 | static void *kmalloc_large_node(size_t size, gfp_t flags, int node) |
2834 | { | 2835 | { |
2835 | struct page *page; | 2836 | struct page *page; |
2837 | void *ptr = NULL; | ||
2836 | 2838 | ||
2837 | flags |= __GFP_COMP | __GFP_NOTRACK; | 2839 | flags |= __GFP_COMP | __GFP_NOTRACK; |
2838 | page = alloc_pages_node(node, flags, get_order(size)); | 2840 | page = alloc_pages_node(node, flags, get_order(size)); |
2839 | if (page) | 2841 | if (page) |
2840 | return page_address(page); | 2842 | ptr = page_address(page); |
2841 | else | 2843 | |
2842 | return NULL; | 2844 | kmemleak_alloc(ptr, size, 1, flags); |
2845 | return ptr; | ||
2843 | } | 2846 | } |
2844 | 2847 | ||
2845 | #ifdef CONFIG_NUMA | 2848 | #ifdef CONFIG_NUMA |
@@ -2924,6 +2927,7 @@ void kfree(const void *x) | |||
2924 | page = virt_to_head_page(x); | 2927 | page = virt_to_head_page(x); |
2925 | if (unlikely(!PageSlab(page))) { | 2928 | if (unlikely(!PageSlab(page))) { |
2926 | BUG_ON(!PageCompound(page)); | 2929 | BUG_ON(!PageCompound(page)); |
2930 | kmemleak_free(x); | ||
2927 | put_page(page); | 2931 | put_page(page); |
2928 | return; | 2932 | return; |
2929 | } | 2933 | } |
diff --git a/mm/swapfile.c b/mm/swapfile.c index d1ade1a48ee7..8ffdc0d23c53 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -753,7 +753,7 @@ int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p) | |||
753 | 753 | ||
754 | if (!bdev) { | 754 | if (!bdev) { |
755 | if (bdev_p) | 755 | if (bdev_p) |
756 | *bdev_p = bdget(sis->bdev->bd_dev); | 756 | *bdev_p = bdgrab(sis->bdev); |
757 | 757 | ||
758 | spin_unlock(&swap_lock); | 758 | spin_unlock(&swap_lock); |
759 | return i; | 759 | return i; |
@@ -765,7 +765,7 @@ int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p) | |||
765 | struct swap_extent, list); | 765 | struct swap_extent, list); |
766 | if (se->start_block == offset) { | 766 | if (se->start_block == offset) { |
767 | if (bdev_p) | 767 | if (bdev_p) |
768 | *bdev_p = bdget(sis->bdev->bd_dev); | 768 | *bdev_p = bdgrab(sis->bdev); |
769 | 769 | ||
770 | spin_unlock(&swap_lock); | 770 | spin_unlock(&swap_lock); |
771 | bdput(bdev); | 771 | bdput(bdev); |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 54155268dfca..dea7abd31098 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -1104,7 +1104,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, | |||
1104 | */ | 1104 | */ |
1105 | if (nr_freed < nr_taken && !current_is_kswapd() && | 1105 | if (nr_freed < nr_taken && !current_is_kswapd() && |
1106 | lumpy_reclaim) { | 1106 | lumpy_reclaim) { |
1107 | congestion_wait(WRITE, HZ/10); | 1107 | congestion_wait(BLK_RW_ASYNC, HZ/10); |
1108 | 1108 | ||
1109 | /* | 1109 | /* |
1110 | * The attempt at page out may have made some | 1110 | * The attempt at page out may have made some |
@@ -1721,7 +1721,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
1721 | 1721 | ||
1722 | /* Take a nap, wait for some writeback to complete */ | 1722 | /* Take a nap, wait for some writeback to complete */ |
1723 | if (sc->nr_scanned && priority < DEF_PRIORITY - 2) | 1723 | if (sc->nr_scanned && priority < DEF_PRIORITY - 2) |
1724 | congestion_wait(WRITE, HZ/10); | 1724 | congestion_wait(BLK_RW_ASYNC, HZ/10); |
1725 | } | 1725 | } |
1726 | /* top priority shrink_zones still had more to do? don't OOM, then */ | 1726 | /* top priority shrink_zones still had more to do? don't OOM, then */ |
1727 | if (!sc->all_unreclaimable && scanning_global_lru(sc)) | 1727 | if (!sc->all_unreclaimable && scanning_global_lru(sc)) |
@@ -1960,7 +1960,7 @@ loop_again: | |||
1960 | * another pass across the zones. | 1960 | * another pass across the zones. |
1961 | */ | 1961 | */ |
1962 | if (total_scanned && priority < DEF_PRIORITY - 2) | 1962 | if (total_scanned && priority < DEF_PRIORITY - 2) |
1963 | congestion_wait(WRITE, HZ/10); | 1963 | congestion_wait(BLK_RW_ASYNC, HZ/10); |
1964 | 1964 | ||
1965 | /* | 1965 | /* |
1966 | * We do this so kswapd doesn't build up large priorities for | 1966 | * We do this so kswapd doesn't build up large priorities for |
@@ -2233,7 +2233,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages) | |||
2233 | goto out; | 2233 | goto out; |
2234 | 2234 | ||
2235 | if (sc.nr_scanned && prio < DEF_PRIORITY - 2) | 2235 | if (sc.nr_scanned && prio < DEF_PRIORITY - 2) |
2236 | congestion_wait(WRITE, HZ / 10); | 2236 | congestion_wait(BLK_RW_ASYNC, HZ / 10); |
2237 | } | 2237 | } |
2238 | } | 2238 | } |
2239 | 2239 | ||