aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/dmapool.c2
-rw-r--r--mm/hugetlb.c17
-rw-r--r--mm/kmemleak.c243
-rw-r--r--mm/memory.c34
-rw-r--r--mm/nommu.c33
-rw-r--r--mm/page-writeback.c5
-rw-r--r--mm/page_alloc.c23
-rw-r--r--mm/shmem.c6
-rw-r--r--mm/shmem_acl.c29
-rw-r--r--mm/slub.c10
-rw-r--r--mm/thrash.c32
-rw-r--r--mm/vmscan.c2
12 files changed, 193 insertions, 243 deletions
diff --git a/mm/dmapool.c b/mm/dmapool.c
index b1f0885dda22..3df063706f53 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -86,10 +86,12 @@ show_pools(struct device *dev, struct device_attribute *attr, char *buf)
86 unsigned pages = 0; 86 unsigned pages = 0;
87 unsigned blocks = 0; 87 unsigned blocks = 0;
88 88
89 spin_lock_irq(&pool->lock);
89 list_for_each_entry(page, &pool->page_list, page_list) { 90 list_for_each_entry(page, &pool->page_list, page_list) {
90 pages++; 91 pages++;
91 blocks += page->in_use; 92 blocks += page->in_use;
92 } 93 }
94 spin_unlock_irq(&pool->lock);
93 95
94 /* per-pool info, no real statistics yet */ 96 /* per-pool info, no real statistics yet */
95 temp = scnprintf(next, size, "%-16s %4u %4Zu %4Zu %2u\n", 97 temp = scnprintf(next, size, "%-16s %4u %4Zu %4Zu %2u\n",
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a56e6f3ce979..d0351e31f474 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1985,7 +1985,7 @@ static struct page *hugetlbfs_pagecache_page(struct hstate *h,
1985} 1985}
1986 1986
1987static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, 1987static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
1988 unsigned long address, pte_t *ptep, int write_access) 1988 unsigned long address, pte_t *ptep, unsigned int flags)
1989{ 1989{
1990 struct hstate *h = hstate_vma(vma); 1990 struct hstate *h = hstate_vma(vma);
1991 int ret = VM_FAULT_SIGBUS; 1991 int ret = VM_FAULT_SIGBUS;
@@ -2053,7 +2053,7 @@ retry:
2053 * any allocations necessary to record that reservation occur outside 2053 * any allocations necessary to record that reservation occur outside
2054 * the spinlock. 2054 * the spinlock.
2055 */ 2055 */
2056 if (write_access && !(vma->vm_flags & VM_SHARED)) 2056 if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED))
2057 if (vma_needs_reservation(h, vma, address) < 0) { 2057 if (vma_needs_reservation(h, vma, address) < 0) {
2058 ret = VM_FAULT_OOM; 2058 ret = VM_FAULT_OOM;
2059 goto backout_unlocked; 2059 goto backout_unlocked;
@@ -2072,7 +2072,7 @@ retry:
2072 && (vma->vm_flags & VM_SHARED))); 2072 && (vma->vm_flags & VM_SHARED)));
2073 set_huge_pte_at(mm, address, ptep, new_pte); 2073 set_huge_pte_at(mm, address, ptep, new_pte);
2074 2074
2075 if (write_access && !(vma->vm_flags & VM_SHARED)) { 2075 if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
2076 /* Optimization, do the COW without a second fault */ 2076 /* Optimization, do the COW without a second fault */
2077 ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page); 2077 ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page);
2078 } 2078 }
@@ -2091,7 +2091,7 @@ backout_unlocked:
2091} 2091}
2092 2092
2093int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, 2093int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2094 unsigned long address, int write_access) 2094 unsigned long address, unsigned int flags)
2095{ 2095{
2096 pte_t *ptep; 2096 pte_t *ptep;
2097 pte_t entry; 2097 pte_t entry;
@@ -2112,7 +2112,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2112 mutex_lock(&hugetlb_instantiation_mutex); 2112 mutex_lock(&hugetlb_instantiation_mutex);
2113 entry = huge_ptep_get(ptep); 2113 entry = huge_ptep_get(ptep);
2114 if (huge_pte_none(entry)) { 2114 if (huge_pte_none(entry)) {
2115 ret = hugetlb_no_page(mm, vma, address, ptep, write_access); 2115 ret = hugetlb_no_page(mm, vma, address, ptep, flags);
2116 goto out_mutex; 2116 goto out_mutex;
2117 } 2117 }
2118 2118
@@ -2126,7 +2126,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2126 * page now as it is used to determine if a reservation has been 2126 * page now as it is used to determine if a reservation has been
2127 * consumed. 2127 * consumed.
2128 */ 2128 */
2129 if (write_access && !pte_write(entry)) { 2129 if ((flags & FAULT_FLAG_WRITE) && !pte_write(entry)) {
2130 if (vma_needs_reservation(h, vma, address) < 0) { 2130 if (vma_needs_reservation(h, vma, address) < 0) {
2131 ret = VM_FAULT_OOM; 2131 ret = VM_FAULT_OOM;
2132 goto out_mutex; 2132 goto out_mutex;
@@ -2143,7 +2143,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2143 goto out_page_table_lock; 2143 goto out_page_table_lock;
2144 2144
2145 2145
2146 if (write_access) { 2146 if (flags & FAULT_FLAG_WRITE) {
2147 if (!pte_write(entry)) { 2147 if (!pte_write(entry)) {
2148 ret = hugetlb_cow(mm, vma, address, ptep, entry, 2148 ret = hugetlb_cow(mm, vma, address, ptep, entry,
2149 pagecache_page); 2149 pagecache_page);
@@ -2152,7 +2152,8 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2152 entry = pte_mkdirty(entry); 2152 entry = pte_mkdirty(entry);
2153 } 2153 }
2154 entry = pte_mkyoung(entry); 2154 entry = pte_mkyoung(entry);
2155 if (huge_ptep_set_access_flags(vma, address, ptep, entry, write_access)) 2155 if (huge_ptep_set_access_flags(vma, address, ptep, entry,
2156 flags & FAULT_FLAG_WRITE))
2156 update_mmu_cache(vma, address, entry); 2157 update_mmu_cache(vma, address, entry);
2157 2158
2158out_page_table_lock: 2159out_page_table_lock:
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index ec759b60077a..e766e1da09d2 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -48,10 +48,10 @@
48 * scanned. This list is only modified during a scanning episode when the 48 * scanned. This list is only modified during a scanning episode when the
49 * scan_mutex is held. At the end of a scan, the gray_list is always empty. 49 * scan_mutex is held. At the end of a scan, the gray_list is always empty.
50 * Note that the kmemleak_object.use_count is incremented when an object is 50 * Note that the kmemleak_object.use_count is incremented when an object is
51 * added to the gray_list and therefore cannot be freed 51 * added to the gray_list and therefore cannot be freed. This mutex also
52 * - kmemleak_mutex (mutex): prevents multiple users of the "kmemleak" debugfs 52 * prevents multiple users of the "kmemleak" debugfs file together with
53 * file together with modifications to the memory scanning parameters 53 * modifications to the memory scanning parameters including the scan_thread
54 * including the scan_thread pointer 54 * pointer
55 * 55 *
56 * The kmemleak_object structures have a use_count incremented or decremented 56 * The kmemleak_object structures have a use_count incremented or decremented
57 * using the get_object()/put_object() functions. When the use_count becomes 57 * using the get_object()/put_object() functions. When the use_count becomes
@@ -61,6 +61,8 @@
61 * structure. 61 * structure.
62 */ 62 */
63 63
64#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
65
64#include <linux/init.h> 66#include <linux/init.h>
65#include <linux/kernel.h> 67#include <linux/kernel.h>
66#include <linux/list.h> 68#include <linux/list.h>
@@ -103,7 +105,6 @@
103#define MAX_TRACE 16 /* stack trace length */ 105#define MAX_TRACE 16 /* stack trace length */
104#define REPORTS_NR 50 /* maximum number of reported leaks */ 106#define REPORTS_NR 50 /* maximum number of reported leaks */
105#define MSECS_MIN_AGE 5000 /* minimum object age for reporting */ 107#define MSECS_MIN_AGE 5000 /* minimum object age for reporting */
106#define MSECS_SCAN_YIELD 10 /* CPU yielding period */
107#define SECS_FIRST_SCAN 60 /* delay before the first scan */ 108#define SECS_FIRST_SCAN 60 /* delay before the first scan */
108#define SECS_SCAN_WAIT 600 /* subsequent auto scanning delay */ 109#define SECS_SCAN_WAIT 600 /* subsequent auto scanning delay */
109 110
@@ -184,19 +185,16 @@ static atomic_t kmemleak_error = ATOMIC_INIT(0);
184static unsigned long min_addr = ULONG_MAX; 185static unsigned long min_addr = ULONG_MAX;
185static unsigned long max_addr; 186static unsigned long max_addr;
186 187
187/* used for yielding the CPU to other tasks during scanning */
188static unsigned long next_scan_yield;
189static struct task_struct *scan_thread; 188static struct task_struct *scan_thread;
190static unsigned long jiffies_scan_yield; 189/* used to avoid reporting of recently allocated objects */
191static unsigned long jiffies_min_age; 190static unsigned long jiffies_min_age;
191static unsigned long jiffies_last_scan;
192/* delay between automatic memory scannings */ 192/* delay between automatic memory scannings */
193static signed long jiffies_scan_wait; 193static signed long jiffies_scan_wait;
194/* enables or disables the task stacks scanning */ 194/* enables or disables the task stacks scanning */
195static int kmemleak_stack_scan; 195static int kmemleak_stack_scan = 1;
196/* mutex protecting the memory scanning */ 196/* protects the memory scanning, parameters and debug/kmemleak file access */
197static DEFINE_MUTEX(scan_mutex); 197static DEFINE_MUTEX(scan_mutex);
198/* mutex protecting the access to the /sys/kernel/debug/kmemleak file */
199static DEFINE_MUTEX(kmemleak_mutex);
200 198
201/* number of leaks reported (for limitation purposes) */ 199/* number of leaks reported (for limitation purposes) */
202static int reported_leaks; 200static int reported_leaks;
@@ -233,7 +231,7 @@ struct early_log {
233}; 231};
234 232
235/* early logging buffer and current position */ 233/* early logging buffer and current position */
236static struct early_log early_log[200]; 234static struct early_log early_log[CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE];
237static int crt_early_log; 235static int crt_early_log;
238 236
239static void kmemleak_disable(void); 237static void kmemleak_disable(void);
@@ -277,15 +275,6 @@ static int color_gray(const struct kmemleak_object *object)
277} 275}
278 276
279/* 277/*
280 * Objects are considered referenced if their color is gray and they have not
281 * been deleted.
282 */
283static int referenced_object(struct kmemleak_object *object)
284{
285 return (object->flags & OBJECT_ALLOCATED) && color_gray(object);
286}
287
288/*
289 * Objects are considered unreferenced only if their color is white, they have 278 * Objects are considered unreferenced only if their color is white, they have
290 * not be deleted and have a minimum age to avoid false positives caused by 279 * not be deleted and have a minimum age to avoid false positives caused by
291 * pointers temporarily stored in CPU registers. 280 * pointers temporarily stored in CPU registers.
@@ -293,42 +282,28 @@ static int referenced_object(struct kmemleak_object *object)
293static int unreferenced_object(struct kmemleak_object *object) 282static int unreferenced_object(struct kmemleak_object *object)
294{ 283{
295 return (object->flags & OBJECT_ALLOCATED) && color_white(object) && 284 return (object->flags & OBJECT_ALLOCATED) && color_white(object) &&
296 time_is_before_eq_jiffies(object->jiffies + jiffies_min_age); 285 time_before_eq(object->jiffies + jiffies_min_age,
286 jiffies_last_scan);
297} 287}
298 288
299/* 289/*
300 * Printing of the (un)referenced objects information, either to the seq file 290 * Printing of the unreferenced objects information to the seq file. The
301 * or to the kernel log. The print_referenced/print_unreferenced functions 291 * print_unreferenced function must be called with the object->lock held.
302 * must be called with the object->lock held.
303 */ 292 */
304#define print_helper(seq, x...) do { \
305 struct seq_file *s = (seq); \
306 if (s) \
307 seq_printf(s, x); \
308 else \
309 pr_info(x); \
310} while (0)
311
312static void print_referenced(struct kmemleak_object *object)
313{
314 pr_info("kmemleak: referenced object 0x%08lx (size %zu)\n",
315 object->pointer, object->size);
316}
317
318static void print_unreferenced(struct seq_file *seq, 293static void print_unreferenced(struct seq_file *seq,
319 struct kmemleak_object *object) 294 struct kmemleak_object *object)
320{ 295{
321 int i; 296 int i;
322 297
323 print_helper(seq, "kmemleak: unreferenced object 0x%08lx (size %zu):\n", 298 seq_printf(seq, "unreferenced object 0x%08lx (size %zu):\n",
324 object->pointer, object->size); 299 object->pointer, object->size);
325 print_helper(seq, " comm \"%s\", pid %d, jiffies %lu\n", 300 seq_printf(seq, " comm \"%s\", pid %d, jiffies %lu\n",
326 object->comm, object->pid, object->jiffies); 301 object->comm, object->pid, object->jiffies);
327 print_helper(seq, " backtrace:\n"); 302 seq_printf(seq, " backtrace:\n");
328 303
329 for (i = 0; i < object->trace_len; i++) { 304 for (i = 0; i < object->trace_len; i++) {
330 void *ptr = (void *)object->trace[i]; 305 void *ptr = (void *)object->trace[i];
331 print_helper(seq, " [<%p>] %pS\n", ptr, ptr); 306 seq_printf(seq, " [<%p>] %pS\n", ptr, ptr);
332 } 307 }
333} 308}
334 309
@@ -344,7 +319,7 @@ static void dump_object_info(struct kmemleak_object *object)
344 trace.nr_entries = object->trace_len; 319 trace.nr_entries = object->trace_len;
345 trace.entries = object->trace; 320 trace.entries = object->trace;
346 321
347 pr_notice("kmemleak: Object 0x%08lx (size %zu):\n", 322 pr_notice("Object 0x%08lx (size %zu):\n",
348 object->tree_node.start, object->size); 323 object->tree_node.start, object->size);
349 pr_notice(" comm \"%s\", pid %d, jiffies %lu\n", 324 pr_notice(" comm \"%s\", pid %d, jiffies %lu\n",
350 object->comm, object->pid, object->jiffies); 325 object->comm, object->pid, object->jiffies);
@@ -372,7 +347,7 @@ static struct kmemleak_object *lookup_object(unsigned long ptr, int alias)
372 object = prio_tree_entry(node, struct kmemleak_object, 347 object = prio_tree_entry(node, struct kmemleak_object,
373 tree_node); 348 tree_node);
374 if (!alias && object->pointer != ptr) { 349 if (!alias && object->pointer != ptr) {
375 kmemleak_warn("kmemleak: Found object by alias"); 350 kmemleak_warn("Found object by alias");
376 object = NULL; 351 object = NULL;
377 } 352 }
378 } else 353 } else
@@ -467,8 +442,7 @@ static void create_object(unsigned long ptr, size_t size, int min_count,
467 442
468 object = kmem_cache_alloc(object_cache, gfp & GFP_KMEMLEAK_MASK); 443 object = kmem_cache_alloc(object_cache, gfp & GFP_KMEMLEAK_MASK);
469 if (!object) { 444 if (!object) {
470 kmemleak_stop("kmemleak: Cannot allocate a kmemleak_object " 445 kmemleak_stop("Cannot allocate a kmemleak_object structure\n");
471 "structure\n");
472 return; 446 return;
473 } 447 }
474 448
@@ -527,8 +501,8 @@ static void create_object(unsigned long ptr, size_t size, int min_count,
527 if (node != &object->tree_node) { 501 if (node != &object->tree_node) {
528 unsigned long flags; 502 unsigned long flags;
529 503
530 kmemleak_stop("kmemleak: Cannot insert 0x%lx into the object " 504 kmemleak_stop("Cannot insert 0x%lx into the object search tree "
531 "search tree (already existing)\n", ptr); 505 "(already existing)\n", ptr);
532 object = lookup_object(ptr, 1); 506 object = lookup_object(ptr, 1);
533 spin_lock_irqsave(&object->lock, flags); 507 spin_lock_irqsave(&object->lock, flags);
534 dump_object_info(object); 508 dump_object_info(object);
@@ -553,8 +527,10 @@ static void delete_object(unsigned long ptr)
553 write_lock_irqsave(&kmemleak_lock, flags); 527 write_lock_irqsave(&kmemleak_lock, flags);
554 object = lookup_object(ptr, 0); 528 object = lookup_object(ptr, 0);
555 if (!object) { 529 if (!object) {
556 kmemleak_warn("kmemleak: Freeing unknown object at 0x%08lx\n", 530#ifdef DEBUG
531 kmemleak_warn("Freeing unknown object at 0x%08lx\n",
557 ptr); 532 ptr);
533#endif
558 write_unlock_irqrestore(&kmemleak_lock, flags); 534 write_unlock_irqrestore(&kmemleak_lock, flags);
559 return; 535 return;
560 } 536 }
@@ -570,8 +546,6 @@ static void delete_object(unsigned long ptr)
570 * cannot be freed when it is being scanned. 546 * cannot be freed when it is being scanned.
571 */ 547 */
572 spin_lock_irqsave(&object->lock, flags); 548 spin_lock_irqsave(&object->lock, flags);
573 if (object->flags & OBJECT_REPORTED)
574 print_referenced(object);
575 object->flags &= ~OBJECT_ALLOCATED; 549 object->flags &= ~OBJECT_ALLOCATED;
576 spin_unlock_irqrestore(&object->lock, flags); 550 spin_unlock_irqrestore(&object->lock, flags);
577 put_object(object); 551 put_object(object);
@@ -588,8 +562,7 @@ static void make_gray_object(unsigned long ptr)
588 562
589 object = find_and_get_object(ptr, 0); 563 object = find_and_get_object(ptr, 0);
590 if (!object) { 564 if (!object) {
591 kmemleak_warn("kmemleak: Graying unknown object at 0x%08lx\n", 565 kmemleak_warn("Graying unknown object at 0x%08lx\n", ptr);
592 ptr);
593 return; 566 return;
594 } 567 }
595 568
@@ -610,8 +583,7 @@ static void make_black_object(unsigned long ptr)
610 583
611 object = find_and_get_object(ptr, 0); 584 object = find_and_get_object(ptr, 0);
612 if (!object) { 585 if (!object) {
613 kmemleak_warn("kmemleak: Blacking unknown object at 0x%08lx\n", 586 kmemleak_warn("Blacking unknown object at 0x%08lx\n", ptr);
614 ptr);
615 return; 587 return;
616 } 588 }
617 589
@@ -634,21 +606,20 @@ static void add_scan_area(unsigned long ptr, unsigned long offset,
634 606
635 object = find_and_get_object(ptr, 0); 607 object = find_and_get_object(ptr, 0);
636 if (!object) { 608 if (!object) {
637 kmemleak_warn("kmemleak: Adding scan area to unknown " 609 kmemleak_warn("Adding scan area to unknown object at 0x%08lx\n",
638 "object at 0x%08lx\n", ptr); 610 ptr);
639 return; 611 return;
640 } 612 }
641 613
642 area = kmem_cache_alloc(scan_area_cache, gfp & GFP_KMEMLEAK_MASK); 614 area = kmem_cache_alloc(scan_area_cache, gfp & GFP_KMEMLEAK_MASK);
643 if (!area) { 615 if (!area) {
644 kmemleak_warn("kmemleak: Cannot allocate a scan area\n"); 616 kmemleak_warn("Cannot allocate a scan area\n");
645 goto out; 617 goto out;
646 } 618 }
647 619
648 spin_lock_irqsave(&object->lock, flags); 620 spin_lock_irqsave(&object->lock, flags);
649 if (offset + length > object->size) { 621 if (offset + length > object->size) {
650 kmemleak_warn("kmemleak: Scan area larger than object " 622 kmemleak_warn("Scan area larger than object 0x%08lx\n", ptr);
651 "0x%08lx\n", ptr);
652 dump_object_info(object); 623 dump_object_info(object);
653 kmem_cache_free(scan_area_cache, area); 624 kmem_cache_free(scan_area_cache, area);
654 goto out_unlock; 625 goto out_unlock;
@@ -677,8 +648,7 @@ static void object_no_scan(unsigned long ptr)
677 648
678 object = find_and_get_object(ptr, 0); 649 object = find_and_get_object(ptr, 0);
679 if (!object) { 650 if (!object) {
680 kmemleak_warn("kmemleak: Not scanning unknown object at " 651 kmemleak_warn("Not scanning unknown object at 0x%08lx\n", ptr);
681 "0x%08lx\n", ptr);
682 return; 652 return;
683 } 653 }
684 654
@@ -699,7 +669,8 @@ static void log_early(int op_type, const void *ptr, size_t size,
699 struct early_log *log; 669 struct early_log *log;
700 670
701 if (crt_early_log >= ARRAY_SIZE(early_log)) { 671 if (crt_early_log >= ARRAY_SIZE(early_log)) {
702 kmemleak_stop("kmemleak: Early log buffer exceeded\n"); 672 pr_warning("Early log buffer exceeded\n");
673 kmemleak_disable();
703 return; 674 return;
704 } 675 }
705 676
@@ -811,21 +782,6 @@ void kmemleak_no_scan(const void *ptr)
811EXPORT_SYMBOL(kmemleak_no_scan); 782EXPORT_SYMBOL(kmemleak_no_scan);
812 783
813/* 784/*
814 * Yield the CPU so that other tasks get a chance to run. The yielding is
815 * rate-limited to avoid excessive number of calls to the schedule() function
816 * during memory scanning.
817 */
818static void scan_yield(void)
819{
820 might_sleep();
821
822 if (time_is_before_eq_jiffies(next_scan_yield)) {
823 schedule();
824 next_scan_yield = jiffies + jiffies_scan_yield;
825 }
826}
827
828/*
829 * Memory scanning is a long process and it needs to be interruptable. This 785 * Memory scanning is a long process and it needs to be interruptable. This
830 * function checks whether such interrupt condition occured. 786 * function checks whether such interrupt condition occured.
831 */ 787 */
@@ -865,15 +821,6 @@ static void scan_block(void *_start, void *_end,
865 if (scan_should_stop()) 821 if (scan_should_stop())
866 break; 822 break;
867 823
868 /*
869 * When scanning a memory block with a corresponding
870 * kmemleak_object, the CPU yielding is handled in the calling
871 * code since it holds the object->lock to avoid the block
872 * freeing.
873 */
874 if (!scanned)
875 scan_yield();
876
877 object = find_and_get_object(pointer, 1); 824 object = find_and_get_object(pointer, 1);
878 if (!object) 825 if (!object)
879 continue; 826 continue;
@@ -955,6 +902,9 @@ static void kmemleak_scan(void)
955 struct kmemleak_object *object, *tmp; 902 struct kmemleak_object *object, *tmp;
956 struct task_struct *task; 903 struct task_struct *task;
957 int i; 904 int i;
905 int new_leaks = 0;
906
907 jiffies_last_scan = jiffies;
958 908
959 /* prepare the kmemleak_object's */ 909 /* prepare the kmemleak_object's */
960 rcu_read_lock(); 910 rcu_read_lock();
@@ -966,7 +916,7 @@ static void kmemleak_scan(void)
966 * 1 reference to any object at this point. 916 * 1 reference to any object at this point.
967 */ 917 */
968 if (atomic_read(&object->use_count) > 1) { 918 if (atomic_read(&object->use_count) > 1) {
969 pr_debug("kmemleak: object->use_count = %d\n", 919 pr_debug("object->use_count = %d\n",
970 atomic_read(&object->use_count)); 920 atomic_read(&object->use_count));
971 dump_object_info(object); 921 dump_object_info(object);
972 } 922 }
@@ -1036,7 +986,7 @@ static void kmemleak_scan(void)
1036 */ 986 */
1037 object = list_entry(gray_list.next, typeof(*object), gray_list); 987 object = list_entry(gray_list.next, typeof(*object), gray_list);
1038 while (&object->gray_list != &gray_list) { 988 while (&object->gray_list != &gray_list) {
1039 scan_yield(); 989 cond_resched();
1040 990
1041 /* may add new objects to the list */ 991 /* may add new objects to the list */
1042 if (!scan_should_stop()) 992 if (!scan_should_stop())
@@ -1052,6 +1002,32 @@ static void kmemleak_scan(void)
1052 object = tmp; 1002 object = tmp;
1053 } 1003 }
1054 WARN_ON(!list_empty(&gray_list)); 1004 WARN_ON(!list_empty(&gray_list));
1005
1006 /*
1007 * If scanning was stopped do not report any new unreferenced objects.
1008 */
1009 if (scan_should_stop())
1010 return;
1011
1012 /*
1013 * Scanning result reporting.
1014 */
1015 rcu_read_lock();
1016 list_for_each_entry_rcu(object, &object_list, object_list) {
1017 spin_lock_irqsave(&object->lock, flags);
1018 if (unreferenced_object(object) &&
1019 !(object->flags & OBJECT_REPORTED)) {
1020 object->flags |= OBJECT_REPORTED;
1021 new_leaks++;
1022 }
1023 spin_unlock_irqrestore(&object->lock, flags);
1024 }
1025 rcu_read_unlock();
1026
1027 if (new_leaks)
1028 pr_info("%d new suspected memory leaks (see "
1029 "/sys/kernel/debug/kmemleak)\n", new_leaks);
1030
1055} 1031}
1056 1032
1057/* 1033/*
@@ -1062,7 +1038,7 @@ static int kmemleak_scan_thread(void *arg)
1062{ 1038{
1063 static int first_run = 1; 1039 static int first_run = 1;
1064 1040
1065 pr_info("kmemleak: Automatic memory scanning thread started\n"); 1041 pr_info("Automatic memory scanning thread started\n");
1066 1042
1067 /* 1043 /*
1068 * Wait before the first scan to allow the system to fully initialize. 1044 * Wait before the first scan to allow the system to fully initialize.
@@ -1073,49 +1049,25 @@ static int kmemleak_scan_thread(void *arg)
1073 } 1049 }
1074 1050
1075 while (!kthread_should_stop()) { 1051 while (!kthread_should_stop()) {
1076 struct kmemleak_object *object;
1077 signed long timeout = jiffies_scan_wait; 1052 signed long timeout = jiffies_scan_wait;
1078 1053
1079 mutex_lock(&scan_mutex); 1054 mutex_lock(&scan_mutex);
1080
1081 kmemleak_scan(); 1055 kmemleak_scan();
1082 reported_leaks = 0;
1083
1084 rcu_read_lock();
1085 list_for_each_entry_rcu(object, &object_list, object_list) {
1086 unsigned long flags;
1087
1088 if (reported_leaks >= REPORTS_NR)
1089 break;
1090 spin_lock_irqsave(&object->lock, flags);
1091 if (!(object->flags & OBJECT_REPORTED) &&
1092 unreferenced_object(object)) {
1093 print_unreferenced(NULL, object);
1094 object->flags |= OBJECT_REPORTED;
1095 reported_leaks++;
1096 } else if ((object->flags & OBJECT_REPORTED) &&
1097 referenced_object(object)) {
1098 print_referenced(object);
1099 object->flags &= ~OBJECT_REPORTED;
1100 }
1101 spin_unlock_irqrestore(&object->lock, flags);
1102 }
1103 rcu_read_unlock();
1104
1105 mutex_unlock(&scan_mutex); 1056 mutex_unlock(&scan_mutex);
1057
1106 /* wait before the next scan */ 1058 /* wait before the next scan */
1107 while (timeout && !kthread_should_stop()) 1059 while (timeout && !kthread_should_stop())
1108 timeout = schedule_timeout_interruptible(timeout); 1060 timeout = schedule_timeout_interruptible(timeout);
1109 } 1061 }
1110 1062
1111 pr_info("kmemleak: Automatic memory scanning thread ended\n"); 1063 pr_info("Automatic memory scanning thread ended\n");
1112 1064
1113 return 0; 1065 return 0;
1114} 1066}
1115 1067
1116/* 1068/*
1117 * Start the automatic memory scanning thread. This function must be called 1069 * Start the automatic memory scanning thread. This function must be called
1118 * with the kmemleak_mutex held. 1070 * with the scan_mutex held.
1119 */ 1071 */
1120void start_scan_thread(void) 1072void start_scan_thread(void)
1121{ 1073{
@@ -1123,14 +1075,14 @@ void start_scan_thread(void)
1123 return; 1075 return;
1124 scan_thread = kthread_run(kmemleak_scan_thread, NULL, "kmemleak"); 1076 scan_thread = kthread_run(kmemleak_scan_thread, NULL, "kmemleak");
1125 if (IS_ERR(scan_thread)) { 1077 if (IS_ERR(scan_thread)) {
1126 pr_warning("kmemleak: Failed to create the scan thread\n"); 1078 pr_warning("Failed to create the scan thread\n");
1127 scan_thread = NULL; 1079 scan_thread = NULL;
1128 } 1080 }
1129} 1081}
1130 1082
1131/* 1083/*
1132 * Stop the automatic memory scanning thread. This function must be called 1084 * Stop the automatic memory scanning thread. This function must be called
1133 * with the kmemleak_mutex held. 1085 * with the scan_mutex held.
1134 */ 1086 */
1135void stop_scan_thread(void) 1087void stop_scan_thread(void)
1136{ 1088{
@@ -1150,10 +1102,8 @@ static void *kmemleak_seq_start(struct seq_file *seq, loff_t *pos)
1150 struct kmemleak_object *object; 1102 struct kmemleak_object *object;
1151 loff_t n = *pos; 1103 loff_t n = *pos;
1152 1104
1153 if (!n) { 1105 if (!n)
1154 kmemleak_scan();
1155 reported_leaks = 0; 1106 reported_leaks = 0;
1156 }
1157 if (reported_leaks >= REPORTS_NR) 1107 if (reported_leaks >= REPORTS_NR)
1158 return NULL; 1108 return NULL;
1159 1109
@@ -1214,11 +1164,10 @@ static int kmemleak_seq_show(struct seq_file *seq, void *v)
1214 unsigned long flags; 1164 unsigned long flags;
1215 1165
1216 spin_lock_irqsave(&object->lock, flags); 1166 spin_lock_irqsave(&object->lock, flags);
1217 if (!unreferenced_object(object)) 1167 if ((object->flags & OBJECT_REPORTED) && unreferenced_object(object)) {
1218 goto out; 1168 print_unreferenced(seq, object);
1219 print_unreferenced(seq, object); 1169 reported_leaks++;
1220 reported_leaks++; 1170 }
1221out:
1222 spin_unlock_irqrestore(&object->lock, flags); 1171 spin_unlock_irqrestore(&object->lock, flags);
1223 return 0; 1172 return 0;
1224} 1173}
@@ -1237,13 +1186,10 @@ static int kmemleak_open(struct inode *inode, struct file *file)
1237 if (!atomic_read(&kmemleak_enabled)) 1186 if (!atomic_read(&kmemleak_enabled))
1238 return -EBUSY; 1187 return -EBUSY;
1239 1188
1240 ret = mutex_lock_interruptible(&kmemleak_mutex); 1189 ret = mutex_lock_interruptible(&scan_mutex);
1241 if (ret < 0) 1190 if (ret < 0)
1242 goto out; 1191 goto out;
1243 if (file->f_mode & FMODE_READ) { 1192 if (file->f_mode & FMODE_READ) {
1244 ret = mutex_lock_interruptible(&scan_mutex);
1245 if (ret < 0)
1246 goto kmemleak_unlock;
1247 ret = seq_open(file, &kmemleak_seq_ops); 1193 ret = seq_open(file, &kmemleak_seq_ops);
1248 if (ret < 0) 1194 if (ret < 0)
1249 goto scan_unlock; 1195 goto scan_unlock;
@@ -1252,8 +1198,6 @@ static int kmemleak_open(struct inode *inode, struct file *file)
1252 1198
1253scan_unlock: 1199scan_unlock:
1254 mutex_unlock(&scan_mutex); 1200 mutex_unlock(&scan_mutex);
1255kmemleak_unlock:
1256 mutex_unlock(&kmemleak_mutex);
1257out: 1201out:
1258 return ret; 1202 return ret;
1259} 1203}
@@ -1262,11 +1206,9 @@ static int kmemleak_release(struct inode *inode, struct file *file)
1262{ 1206{
1263 int ret = 0; 1207 int ret = 0;
1264 1208
1265 if (file->f_mode & FMODE_READ) { 1209 if (file->f_mode & FMODE_READ)
1266 seq_release(inode, file); 1210 seq_release(inode, file);
1267 mutex_unlock(&scan_mutex); 1211 mutex_unlock(&scan_mutex);
1268 }
1269 mutex_unlock(&kmemleak_mutex);
1270 1212
1271 return ret; 1213 return ret;
1272} 1214}
@@ -1281,6 +1223,7 @@ static int kmemleak_release(struct inode *inode, struct file *file)
1281 * scan=off - stop the automatic memory scanning thread 1223 * scan=off - stop the automatic memory scanning thread
1282 * scan=... - set the automatic memory scanning period in seconds (0 to 1224 * scan=... - set the automatic memory scanning period in seconds (0 to
1283 * disable it) 1225 * disable it)
1226 * scan - trigger a memory scan
1284 */ 1227 */
1285static ssize_t kmemleak_write(struct file *file, const char __user *user_buf, 1228static ssize_t kmemleak_write(struct file *file, const char __user *user_buf,
1286 size_t size, loff_t *ppos) 1229 size_t size, loff_t *ppos)
@@ -1318,7 +1261,9 @@ static ssize_t kmemleak_write(struct file *file, const char __user *user_buf,
1318 jiffies_scan_wait = msecs_to_jiffies(secs * 1000); 1261 jiffies_scan_wait = msecs_to_jiffies(secs * 1000);
1319 start_scan_thread(); 1262 start_scan_thread();
1320 } 1263 }
1321 } else 1264 } else if (strncmp(buf, "scan", 4) == 0)
1265 kmemleak_scan();
1266 else
1322 return -EINVAL; 1267 return -EINVAL;
1323 1268
1324 /* ignore the rest of the buffer, only one command at a time */ 1269 /* ignore the rest of the buffer, only one command at a time */
@@ -1343,11 +1288,9 @@ static int kmemleak_cleanup_thread(void *arg)
1343{ 1288{
1344 struct kmemleak_object *object; 1289 struct kmemleak_object *object;
1345 1290
1346 mutex_lock(&kmemleak_mutex); 1291 mutex_lock(&scan_mutex);
1347 stop_scan_thread(); 1292 stop_scan_thread();
1348 mutex_unlock(&kmemleak_mutex);
1349 1293
1350 mutex_lock(&scan_mutex);
1351 rcu_read_lock(); 1294 rcu_read_lock();
1352 list_for_each_entry_rcu(object, &object_list, object_list) 1295 list_for_each_entry_rcu(object, &object_list, object_list)
1353 delete_object(object->pointer); 1296 delete_object(object->pointer);
@@ -1367,7 +1310,7 @@ static void kmemleak_cleanup(void)
1367 cleanup_thread = kthread_run(kmemleak_cleanup_thread, NULL, 1310 cleanup_thread = kthread_run(kmemleak_cleanup_thread, NULL,
1368 "kmemleak-clean"); 1311 "kmemleak-clean");
1369 if (IS_ERR(cleanup_thread)) 1312 if (IS_ERR(cleanup_thread))
1370 pr_warning("kmemleak: Failed to create the clean-up thread\n"); 1313 pr_warning("Failed to create the clean-up thread\n");
1371} 1314}
1372 1315
1373/* 1316/*
@@ -1414,7 +1357,6 @@ void __init kmemleak_init(void)
1414 int i; 1357 int i;
1415 unsigned long flags; 1358 unsigned long flags;
1416 1359
1417 jiffies_scan_yield = msecs_to_jiffies(MSECS_SCAN_YIELD);
1418 jiffies_min_age = msecs_to_jiffies(MSECS_MIN_AGE); 1360 jiffies_min_age = msecs_to_jiffies(MSECS_MIN_AGE);
1419 jiffies_scan_wait = msecs_to_jiffies(SECS_SCAN_WAIT * 1000); 1361 jiffies_scan_wait = msecs_to_jiffies(SECS_SCAN_WAIT * 1000);
1420 1362
@@ -1488,11 +1430,10 @@ static int __init kmemleak_late_init(void)
1488 dentry = debugfs_create_file("kmemleak", S_IRUGO, NULL, NULL, 1430 dentry = debugfs_create_file("kmemleak", S_IRUGO, NULL, NULL,
1489 &kmemleak_fops); 1431 &kmemleak_fops);
1490 if (!dentry) 1432 if (!dentry)
1491 pr_warning("kmemleak: Failed to create the debugfs kmemleak " 1433 pr_warning("Failed to create the debugfs kmemleak file\n");
1492 "file\n"); 1434 mutex_lock(&scan_mutex);
1493 mutex_lock(&kmemleak_mutex);
1494 start_scan_thread(); 1435 start_scan_thread();
1495 mutex_unlock(&kmemleak_mutex); 1436 mutex_unlock(&scan_mutex);
1496 1437
1497 pr_info("Kernel memory leak detector initialized\n"); 1438 pr_info("Kernel memory leak detector initialized\n");
1498 1439
diff --git a/mm/memory.c b/mm/memory.c
index 98bcb90d5957..65216194eb8d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1207,8 +1207,8 @@ static inline int use_zero_page(struct vm_area_struct *vma)
1207 1207
1208 1208
1209int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 1209int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1210 unsigned long start, int len, int flags, 1210 unsigned long start, int nr_pages, int flags,
1211 struct page **pages, struct vm_area_struct **vmas) 1211 struct page **pages, struct vm_area_struct **vmas)
1212{ 1212{
1213 int i; 1213 int i;
1214 unsigned int vm_flags = 0; 1214 unsigned int vm_flags = 0;
@@ -1217,7 +1217,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1217 int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS); 1217 int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
1218 int ignore_sigkill = !!(flags & GUP_FLAGS_IGNORE_SIGKILL); 1218 int ignore_sigkill = !!(flags & GUP_FLAGS_IGNORE_SIGKILL);
1219 1219
1220 if (len <= 0) 1220 if (nr_pages <= 0)
1221 return 0; 1221 return 0;
1222 /* 1222 /*
1223 * Require read or write permissions. 1223 * Require read or write permissions.
@@ -1269,7 +1269,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1269 vmas[i] = gate_vma; 1269 vmas[i] = gate_vma;
1270 i++; 1270 i++;
1271 start += PAGE_SIZE; 1271 start += PAGE_SIZE;
1272 len--; 1272 nr_pages--;
1273 continue; 1273 continue;
1274 } 1274 }
1275 1275
@@ -1280,7 +1280,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1280 1280
1281 if (is_vm_hugetlb_page(vma)) { 1281 if (is_vm_hugetlb_page(vma)) {
1282 i = follow_hugetlb_page(mm, vma, pages, vmas, 1282 i = follow_hugetlb_page(mm, vma, pages, vmas,
1283 &start, &len, i, write); 1283 &start, &nr_pages, i, write);
1284 continue; 1284 continue;
1285 } 1285 }
1286 1286
@@ -1311,8 +1311,10 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1311 while (!(page = follow_page(vma, start, foll_flags))) { 1311 while (!(page = follow_page(vma, start, foll_flags))) {
1312 int ret; 1312 int ret;
1313 1313
1314 /* FOLL_WRITE matches FAULT_FLAG_WRITE! */ 1314 ret = handle_mm_fault(mm, vma, start,
1315 ret = handle_mm_fault(mm, vma, start, foll_flags & FOLL_WRITE); 1315 (foll_flags & FOLL_WRITE) ?
1316 FAULT_FLAG_WRITE : 0);
1317
1316 if (ret & VM_FAULT_ERROR) { 1318 if (ret & VM_FAULT_ERROR) {
1317 if (ret & VM_FAULT_OOM) 1319 if (ret & VM_FAULT_OOM)
1318 return i ? i : -ENOMEM; 1320 return i ? i : -ENOMEM;
@@ -1355,9 +1357,9 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1355 vmas[i] = vma; 1357 vmas[i] = vma;
1356 i++; 1358 i++;
1357 start += PAGE_SIZE; 1359 start += PAGE_SIZE;
1358 len--; 1360 nr_pages--;
1359 } while (len && start < vma->vm_end); 1361 } while (nr_pages && start < vma->vm_end);
1360 } while (len); 1362 } while (nr_pages);
1361 return i; 1363 return i;
1362} 1364}
1363 1365
@@ -1366,7 +1368,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1366 * @tsk: task_struct of target task 1368 * @tsk: task_struct of target task
1367 * @mm: mm_struct of target mm 1369 * @mm: mm_struct of target mm
1368 * @start: starting user address 1370 * @start: starting user address
1369 * @len: number of pages from start to pin 1371 * @nr_pages: number of pages from start to pin
1370 * @write: whether pages will be written to by the caller 1372 * @write: whether pages will be written to by the caller
1371 * @force: whether to force write access even if user mapping is 1373 * @force: whether to force write access even if user mapping is
1372 * readonly. This will result in the page being COWed even 1374 * readonly. This will result in the page being COWed even
@@ -1378,7 +1380,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1378 * Or NULL if the caller does not require them. 1380 * Or NULL if the caller does not require them.
1379 * 1381 *
1380 * Returns number of pages pinned. This may be fewer than the number 1382 * Returns number of pages pinned. This may be fewer than the number
1381 * requested. If len is 0 or negative, returns 0. If no pages 1383 * requested. If nr_pages is 0 or negative, returns 0. If no pages
1382 * were pinned, returns -errno. Each page returned must be released 1384 * were pinned, returns -errno. Each page returned must be released
1383 * with a put_page() call when it is finished with. vmas will only 1385 * with a put_page() call when it is finished with. vmas will only
1384 * remain valid while mmap_sem is held. 1386 * remain valid while mmap_sem is held.
@@ -1412,7 +1414,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1412 * See also get_user_pages_fast, for performance critical applications. 1414 * See also get_user_pages_fast, for performance critical applications.
1413 */ 1415 */
1414int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 1416int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1415 unsigned long start, int len, int write, int force, 1417 unsigned long start, int nr_pages, int write, int force,
1416 struct page **pages, struct vm_area_struct **vmas) 1418 struct page **pages, struct vm_area_struct **vmas)
1417{ 1419{
1418 int flags = 0; 1420 int flags = 0;
@@ -1422,9 +1424,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1422 if (force) 1424 if (force)
1423 flags |= GUP_FLAGS_FORCE; 1425 flags |= GUP_FLAGS_FORCE;
1424 1426
1425 return __get_user_pages(tsk, mm, 1427 return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
1426 start, len, flags,
1427 pages, vmas);
1428} 1428}
1429 1429
1430EXPORT_SYMBOL(get_user_pages); 1430EXPORT_SYMBOL(get_user_pages);
@@ -2517,7 +2517,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2517 delayacct_set_flag(DELAYACCT_PF_SWAPIN); 2517 delayacct_set_flag(DELAYACCT_PF_SWAPIN);
2518 page = lookup_swap_cache(entry); 2518 page = lookup_swap_cache(entry);
2519 if (!page) { 2519 if (!page) {
2520 grab_swap_token(); /* Contend for token _before_ read-in */ 2520 grab_swap_token(mm); /* Contend for token _before_ read-in */
2521 page = swapin_readahead(entry, 2521 page = swapin_readahead(entry,
2522 GFP_HIGHUSER_MOVABLE, vma, address); 2522 GFP_HIGHUSER_MOVABLE, vma, address);
2523 if (!page) { 2523 if (!page) {
diff --git a/mm/nommu.c b/mm/nommu.c
index 2fd2ad5da98e..53cab10fece4 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -173,8 +173,8 @@ unsigned int kobjsize(const void *objp)
173} 173}
174 174
175int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 175int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
176 unsigned long start, int len, int flags, 176 unsigned long start, int nr_pages, int flags,
177 struct page **pages, struct vm_area_struct **vmas) 177 struct page **pages, struct vm_area_struct **vmas)
178{ 178{
179 struct vm_area_struct *vma; 179 struct vm_area_struct *vma;
180 unsigned long vm_flags; 180 unsigned long vm_flags;
@@ -189,7 +189,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
189 vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); 189 vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
190 vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); 190 vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
191 191
192 for (i = 0; i < len; i++) { 192 for (i = 0; i < nr_pages; i++) {
193 vma = find_vma(mm, start); 193 vma = find_vma(mm, start);
194 if (!vma) 194 if (!vma)
195 goto finish_or_fault; 195 goto finish_or_fault;
@@ -224,7 +224,7 @@ finish_or_fault:
224 * - don't permit access to VMAs that don't support it, such as I/O mappings 224 * - don't permit access to VMAs that don't support it, such as I/O mappings
225 */ 225 */
226int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 226int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
227 unsigned long start, int len, int write, int force, 227 unsigned long start, int nr_pages, int write, int force,
228 struct page **pages, struct vm_area_struct **vmas) 228 struct page **pages, struct vm_area_struct **vmas)
229{ 229{
230 int flags = 0; 230 int flags = 0;
@@ -234,12 +234,31 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
234 if (force) 234 if (force)
235 flags |= GUP_FLAGS_FORCE; 235 flags |= GUP_FLAGS_FORCE;
236 236
237 return __get_user_pages(tsk, mm, 237 return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
238 start, len, flags,
239 pages, vmas);
240} 238}
241EXPORT_SYMBOL(get_user_pages); 239EXPORT_SYMBOL(get_user_pages);
242 240
241/**
242 * follow_pfn - look up PFN at a user virtual address
243 * @vma: memory mapping
244 * @address: user virtual address
245 * @pfn: location to store found PFN
246 *
247 * Only IO mappings and raw PFN mappings are allowed.
248 *
249 * Returns zero and the pfn at @pfn on success, -ve otherwise.
250 */
251int follow_pfn(struct vm_area_struct *vma, unsigned long address,
252 unsigned long *pfn)
253{
254 if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
255 return -EINVAL;
256
257 *pfn = address >> PAGE_SHIFT;
258 return 0;
259}
260EXPORT_SYMBOL(follow_pfn);
261
243DEFINE_RWLOCK(vmlist_lock); 262DEFINE_RWLOCK(vmlist_lock);
244struct vm_struct *vmlist; 263struct vm_struct *vmlist;
245 264
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 2c075dcf03d4..3c7f5e1afe5f 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -541,8 +541,11 @@ static void balance_dirty_pages(struct address_space *mapping)
541 * filesystems (i.e. NFS) in which data may have been 541 * filesystems (i.e. NFS) in which data may have been
542 * written to the server's write cache, but has not yet 542 * written to the server's write cache, but has not yet
543 * been flushed to permanent storage. 543 * been flushed to permanent storage.
544 * Only move pages to writeback if this bdi is over its
545 * threshold otherwise wait until the disk writes catch
546 * up.
544 */ 547 */
545 if (bdi_nr_reclaimable) { 548 if (bdi_nr_reclaimable > bdi_thresh) {
546 writeback_inodes(&wbc); 549 writeback_inodes(&wbc);
547 pages_written += write_chunk - wbc.nr_to_write; 550 pages_written += write_chunk - wbc.nr_to_write;
548 get_dirty_limits(&background_thresh, &dirty_thresh, 551 get_dirty_limits(&background_thresh, &dirty_thresh,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 30d5093a099d..e0f2cdf9d8b1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1153,10 +1153,10 @@ again:
1153 * properly detect and handle allocation failures. 1153 * properly detect and handle allocation failures.
1154 * 1154 *
1155 * We most definitely don't want callers attempting to 1155 * We most definitely don't want callers attempting to
1156 * allocate greater than single-page units with 1156 * allocate greater than order-1 page units with
1157 * __GFP_NOFAIL. 1157 * __GFP_NOFAIL.
1158 */ 1158 */
1159 WARN_ON_ONCE(order > 0); 1159 WARN_ON_ONCE(order > 1);
1160 } 1160 }
1161 spin_lock_irqsave(&zone->lock, flags); 1161 spin_lock_irqsave(&zone->lock, flags);
1162 page = __rmqueue(zone, order, migratetype); 1162 page = __rmqueue(zone, order, migratetype);
@@ -3026,7 +3026,7 @@ bad:
3026 if (dzone == zone) 3026 if (dzone == zone)
3027 break; 3027 break;
3028 kfree(zone_pcp(dzone, cpu)); 3028 kfree(zone_pcp(dzone, cpu));
3029 zone_pcp(dzone, cpu) = NULL; 3029 zone_pcp(dzone, cpu) = &boot_pageset[cpu];
3030 } 3030 }
3031 return -ENOMEM; 3031 return -ENOMEM;
3032} 3032}
@@ -3041,7 +3041,7 @@ static inline void free_zone_pagesets(int cpu)
3041 /* Free per_cpu_pageset if it is slab allocated */ 3041 /* Free per_cpu_pageset if it is slab allocated */
3042 if (pset != &boot_pageset[cpu]) 3042 if (pset != &boot_pageset[cpu])
3043 kfree(pset); 3043 kfree(pset);
3044 zone_pcp(zone, cpu) = NULL; 3044 zone_pcp(zone, cpu) = &boot_pageset[cpu];
3045 } 3045 }
3046} 3046}
3047 3047
@@ -4032,6 +4032,8 @@ static void __init find_zone_movable_pfns_for_nodes(unsigned long *movable_pfn)
4032 int i, nid; 4032 int i, nid;
4033 unsigned long usable_startpfn; 4033 unsigned long usable_startpfn;
4034 unsigned long kernelcore_node, kernelcore_remaining; 4034 unsigned long kernelcore_node, kernelcore_remaining;
4035 /* save the state before borrow the nodemask */
4036 nodemask_t saved_node_state = node_states[N_HIGH_MEMORY];
4035 unsigned long totalpages = early_calculate_totalpages(); 4037 unsigned long totalpages = early_calculate_totalpages();
4036 int usable_nodes = nodes_weight(node_states[N_HIGH_MEMORY]); 4038 int usable_nodes = nodes_weight(node_states[N_HIGH_MEMORY]);
4037 4039
@@ -4059,7 +4061,7 @@ static void __init find_zone_movable_pfns_for_nodes(unsigned long *movable_pfn)
4059 4061
4060 /* If kernelcore was not specified, there is no ZONE_MOVABLE */ 4062 /* If kernelcore was not specified, there is no ZONE_MOVABLE */
4061 if (!required_kernelcore) 4063 if (!required_kernelcore)
4062 return; 4064 goto out;
4063 4065
4064 /* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */ 4066 /* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */
4065 find_usable_zone_for_movable(); 4067 find_usable_zone_for_movable();
@@ -4158,6 +4160,10 @@ restart:
4158 for (nid = 0; nid < MAX_NUMNODES; nid++) 4160 for (nid = 0; nid < MAX_NUMNODES; nid++)
4159 zone_movable_pfn[nid] = 4161 zone_movable_pfn[nid] =
4160 roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES); 4162 roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES);
4163
4164out:
4165 /* restore the node_state */
4166 node_states[N_HIGH_MEMORY] = saved_node_state;
4161} 4167}
4162 4168
4163/* Any regular memory on that node ? */ 4169/* Any regular memory on that node ? */
@@ -4242,11 +4248,6 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
4242 early_node_map[i].start_pfn, 4248 early_node_map[i].start_pfn,
4243 early_node_map[i].end_pfn); 4249 early_node_map[i].end_pfn);
4244 4250
4245 /*
4246 * find_zone_movable_pfns_for_nodes/early_calculate_totalpages init
4247 * that node_mask, clear it at first
4248 */
4249 nodes_clear(node_states[N_HIGH_MEMORY]);
4250 /* Initialise every node */ 4251 /* Initialise every node */
4251 mminit_verify_pageflags_layout(); 4252 mminit_verify_pageflags_layout();
4252 setup_nr_node_ids(); 4253 setup_nr_node_ids();
@@ -4659,7 +4660,7 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
4659 ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos); 4660 ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
4660 if (!write || (ret == -EINVAL)) 4661 if (!write || (ret == -EINVAL))
4661 return ret; 4662 return ret;
4662 for_each_zone(zone) { 4663 for_each_populated_zone(zone) {
4663 for_each_online_cpu(cpu) { 4664 for_each_online_cpu(cpu) {
4664 unsigned long high; 4665 unsigned long high;
4665 high = zone->present_pages / percpu_pagelist_fraction; 4666 high = zone->present_pages / percpu_pagelist_fraction;
diff --git a/mm/shmem.c b/mm/shmem.c
index e89d7ec18eda..d713239ce2ce 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1558,6 +1558,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, int mode,
1558 spin_lock_init(&info->lock); 1558 spin_lock_init(&info->lock);
1559 info->flags = flags & VM_NORESERVE; 1559 info->flags = flags & VM_NORESERVE;
1560 INIT_LIST_HEAD(&info->swaplist); 1560 INIT_LIST_HEAD(&info->swaplist);
1561 cache_no_acl(inode);
1561 1562
1562 switch (mode & S_IFMT) { 1563 switch (mode & S_IFMT) {
1563 default: 1564 default:
@@ -2388,7 +2389,6 @@ static void shmem_destroy_inode(struct inode *inode)
2388 /* only struct inode is valid if it's an inline symlink */ 2389 /* only struct inode is valid if it's an inline symlink */
2389 mpol_free_shared_policy(&SHMEM_I(inode)->policy); 2390 mpol_free_shared_policy(&SHMEM_I(inode)->policy);
2390 } 2391 }
2391 shmem_acl_destroy_inode(inode);
2392 kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); 2392 kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
2393} 2393}
2394 2394
@@ -2397,10 +2397,6 @@ static void init_once(void *foo)
2397 struct shmem_inode_info *p = (struct shmem_inode_info *) foo; 2397 struct shmem_inode_info *p = (struct shmem_inode_info *) foo;
2398 2398
2399 inode_init_once(&p->vfs_inode); 2399 inode_init_once(&p->vfs_inode);
2400#ifdef CONFIG_TMPFS_POSIX_ACL
2401 p->i_acl = NULL;
2402 p->i_default_acl = NULL;
2403#endif
2404} 2400}
2405 2401
2406static int init_inodecache(void) 2402static int init_inodecache(void)
diff --git a/mm/shmem_acl.c b/mm/shmem_acl.c
index 8e5aadd7dcd6..606a8e757a42 100644
--- a/mm/shmem_acl.c
+++ b/mm/shmem_acl.c
@@ -22,11 +22,11 @@ shmem_get_acl(struct inode *inode, int type)
22 spin_lock(&inode->i_lock); 22 spin_lock(&inode->i_lock);
23 switch(type) { 23 switch(type) {
24 case ACL_TYPE_ACCESS: 24 case ACL_TYPE_ACCESS:
25 acl = posix_acl_dup(SHMEM_I(inode)->i_acl); 25 acl = posix_acl_dup(inode->i_acl);
26 break; 26 break;
27 27
28 case ACL_TYPE_DEFAULT: 28 case ACL_TYPE_DEFAULT:
29 acl = posix_acl_dup(SHMEM_I(inode)->i_default_acl); 29 acl = posix_acl_dup(inode->i_default_acl);
30 break; 30 break;
31 } 31 }
32 spin_unlock(&inode->i_lock); 32 spin_unlock(&inode->i_lock);
@@ -45,13 +45,13 @@ shmem_set_acl(struct inode *inode, int type, struct posix_acl *acl)
45 spin_lock(&inode->i_lock); 45 spin_lock(&inode->i_lock);
46 switch(type) { 46 switch(type) {
47 case ACL_TYPE_ACCESS: 47 case ACL_TYPE_ACCESS:
48 free = SHMEM_I(inode)->i_acl; 48 free = inode->i_acl;
49 SHMEM_I(inode)->i_acl = posix_acl_dup(acl); 49 inode->i_acl = posix_acl_dup(acl);
50 break; 50 break;
51 51
52 case ACL_TYPE_DEFAULT: 52 case ACL_TYPE_DEFAULT:
53 free = SHMEM_I(inode)->i_default_acl; 53 free = inode->i_default_acl;
54 SHMEM_I(inode)->i_default_acl = posix_acl_dup(acl); 54 inode->i_default_acl = posix_acl_dup(acl);
55 break; 55 break;
56 } 56 }
57 spin_unlock(&inode->i_lock); 57 spin_unlock(&inode->i_lock);
@@ -155,23 +155,6 @@ shmem_acl_init(struct inode *inode, struct inode *dir)
155} 155}
156 156
157/** 157/**
158 * shmem_acl_destroy_inode - destroy acls hanging off the in-memory inode
159 *
160 * This is done before destroying the actual inode.
161 */
162
163void
164shmem_acl_destroy_inode(struct inode *inode)
165{
166 if (SHMEM_I(inode)->i_acl)
167 posix_acl_release(SHMEM_I(inode)->i_acl);
168 SHMEM_I(inode)->i_acl = NULL;
169 if (SHMEM_I(inode)->i_default_acl)
170 posix_acl_release(SHMEM_I(inode)->i_default_acl);
171 SHMEM_I(inode)->i_default_acl = NULL;
172}
173
174/**
175 * shmem_check_acl - check_acl() callback for generic_permission() 158 * shmem_check_acl - check_acl() callback for generic_permission()
176 */ 159 */
177static int 160static int
diff --git a/mm/slub.c b/mm/slub.c
index 23bb79acc4b9..ffc895cc3a68 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1085,11 +1085,17 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1085{ 1085{
1086 struct page *page; 1086 struct page *page;
1087 struct kmem_cache_order_objects oo = s->oo; 1087 struct kmem_cache_order_objects oo = s->oo;
1088 gfp_t alloc_gfp;
1088 1089
1089 flags |= s->allocflags; 1090 flags |= s->allocflags;
1090 1091
1091 page = alloc_slab_page(flags | __GFP_NOWARN | __GFP_NORETRY, node, 1092 /*
1092 oo); 1093 * Let the initial higher-order allocation fail under memory pressure
1094 * so we fall-back to the minimum order allocation.
1095 */
1096 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
1097
1098 page = alloc_slab_page(alloc_gfp, node, oo);
1093 if (unlikely(!page)) { 1099 if (unlikely(!page)) {
1094 oo = s->min; 1100 oo = s->min;
1095 /* 1101 /*
diff --git a/mm/thrash.c b/mm/thrash.c
index c4c5205a9c35..2372d4ed5dd8 100644
--- a/mm/thrash.c
+++ b/mm/thrash.c
@@ -26,47 +26,45 @@ static DEFINE_SPINLOCK(swap_token_lock);
26struct mm_struct *swap_token_mm; 26struct mm_struct *swap_token_mm;
27static unsigned int global_faults; 27static unsigned int global_faults;
28 28
29void grab_swap_token(void) 29void grab_swap_token(struct mm_struct *mm)
30{ 30{
31 int current_interval; 31 int current_interval;
32 32
33 global_faults++; 33 global_faults++;
34 34
35 current_interval = global_faults - current->mm->faultstamp; 35 current_interval = global_faults - mm->faultstamp;
36 36
37 if (!spin_trylock(&swap_token_lock)) 37 if (!spin_trylock(&swap_token_lock))
38 return; 38 return;
39 39
40 /* First come first served */ 40 /* First come first served */
41 if (swap_token_mm == NULL) { 41 if (swap_token_mm == NULL) {
42 current->mm->token_priority = current->mm->token_priority + 2; 42 mm->token_priority = mm->token_priority + 2;
43 swap_token_mm = current->mm; 43 swap_token_mm = mm;
44 goto out; 44 goto out;
45 } 45 }
46 46
47 if (current->mm != swap_token_mm) { 47 if (mm != swap_token_mm) {
48 if (current_interval < current->mm->last_interval) 48 if (current_interval < mm->last_interval)
49 current->mm->token_priority++; 49 mm->token_priority++;
50 else { 50 else {
51 if (likely(current->mm->token_priority > 0)) 51 if (likely(mm->token_priority > 0))
52 current->mm->token_priority--; 52 mm->token_priority--;
53 } 53 }
54 /* Check if we deserve the token */ 54 /* Check if we deserve the token */
55 if (current->mm->token_priority > 55 if (mm->token_priority > swap_token_mm->token_priority) {
56 swap_token_mm->token_priority) { 56 mm->token_priority += 2;
57 current->mm->token_priority += 2; 57 swap_token_mm = mm;
58 swap_token_mm = current->mm;
59 } 58 }
60 } else { 59 } else {
61 /* Token holder came in again! */ 60 /* Token holder came in again! */
62 current->mm->token_priority += 2; 61 mm->token_priority += 2;
63 } 62 }
64 63
65out: 64out:
66 current->mm->faultstamp = global_faults; 65 mm->faultstamp = global_faults;
67 current->mm->last_interval = current_interval; 66 mm->last_interval = current_interval;
68 spin_unlock(&swap_token_lock); 67 spin_unlock(&swap_token_lock);
69return;
70} 68}
71 69
72/* Called on process exit. */ 70/* Called on process exit. */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e8fa2d9eb212..54155268dfca 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -932,7 +932,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
932 continue; 932 continue;
933 if (__isolate_lru_page(cursor_page, mode, file) == 0) { 933 if (__isolate_lru_page(cursor_page, mode, file) == 0) {
934 list_move(&cursor_page->lru, dst); 934 list_move(&cursor_page->lru, dst);
935 mem_cgroup_del_lru(page); 935 mem_cgroup_del_lru(cursor_page);
936 nr_taken++; 936 nr_taken++;
937 scan++; 937 scan++;
938 } 938 }