aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/dmapool.c2
-rw-r--r--mm/kmemleak.c197
-rw-r--r--mm/memory.c26
-rw-r--r--mm/nommu.c33
-rw-r--r--mm/page-writeback.c5
-rw-r--r--mm/page_alloc.c13
-rw-r--r--mm/percpu.c24
7 files changed, 136 insertions, 164 deletions
diff --git a/mm/dmapool.c b/mm/dmapool.c
index b1f0885dda22..3df063706f53 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -86,10 +86,12 @@ show_pools(struct device *dev, struct device_attribute *attr, char *buf)
86 unsigned pages = 0; 86 unsigned pages = 0;
87 unsigned blocks = 0; 87 unsigned blocks = 0;
88 88
89 spin_lock_irq(&pool->lock);
89 list_for_each_entry(page, &pool->page_list, page_list) { 90 list_for_each_entry(page, &pool->page_list, page_list) {
90 pages++; 91 pages++;
91 blocks += page->in_use; 92 blocks += page->in_use;
92 } 93 }
94 spin_unlock_irq(&pool->lock);
93 95
94 /* per-pool info, no real statistics yet */ 96 /* per-pool info, no real statistics yet */
95 temp = scnprintf(next, size, "%-16s %4u %4Zu %4Zu %2u\n", 97 temp = scnprintf(next, size, "%-16s %4u %4Zu %4Zu %2u\n",
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index c96f2c8700aa..e766e1da09d2 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -48,10 +48,10 @@
48 * scanned. This list is only modified during a scanning episode when the 48 * scanned. This list is only modified during a scanning episode when the
49 * scan_mutex is held. At the end of a scan, the gray_list is always empty. 49 * scan_mutex is held. At the end of a scan, the gray_list is always empty.
50 * Note that the kmemleak_object.use_count is incremented when an object is 50 * Note that the kmemleak_object.use_count is incremented when an object is
51 * added to the gray_list and therefore cannot be freed 51 * added to the gray_list and therefore cannot be freed. This mutex also
52 * - kmemleak_mutex (mutex): prevents multiple users of the "kmemleak" debugfs 52 * prevents multiple users of the "kmemleak" debugfs file together with
53 * file together with modifications to the memory scanning parameters 53 * modifications to the memory scanning parameters including the scan_thread
54 * including the scan_thread pointer 54 * pointer
55 * 55 *
56 * The kmemleak_object structures have a use_count incremented or decremented 56 * The kmemleak_object structures have a use_count incremented or decremented
57 * using the get_object()/put_object() functions. When the use_count becomes 57 * using the get_object()/put_object() functions. When the use_count becomes
@@ -105,7 +105,6 @@
105#define MAX_TRACE 16 /* stack trace length */ 105#define MAX_TRACE 16 /* stack trace length */
106#define REPORTS_NR 50 /* maximum number of reported leaks */ 106#define REPORTS_NR 50 /* maximum number of reported leaks */
107#define MSECS_MIN_AGE 5000 /* minimum object age for reporting */ 107#define MSECS_MIN_AGE 5000 /* minimum object age for reporting */
108#define MSECS_SCAN_YIELD 10 /* CPU yielding period */
109#define SECS_FIRST_SCAN 60 /* delay before the first scan */ 108#define SECS_FIRST_SCAN 60 /* delay before the first scan */
110#define SECS_SCAN_WAIT 600 /* subsequent auto scanning delay */ 109#define SECS_SCAN_WAIT 600 /* subsequent auto scanning delay */
111 110
@@ -186,19 +185,16 @@ static atomic_t kmemleak_error = ATOMIC_INIT(0);
186static unsigned long min_addr = ULONG_MAX; 185static unsigned long min_addr = ULONG_MAX;
187static unsigned long max_addr; 186static unsigned long max_addr;
188 187
189/* used for yielding the CPU to other tasks during scanning */
190static unsigned long next_scan_yield;
191static struct task_struct *scan_thread; 188static struct task_struct *scan_thread;
192static unsigned long jiffies_scan_yield; 189/* used to avoid reporting of recently allocated objects */
193static unsigned long jiffies_min_age; 190static unsigned long jiffies_min_age;
191static unsigned long jiffies_last_scan;
194/* delay between automatic memory scannings */ 192/* delay between automatic memory scannings */
195static signed long jiffies_scan_wait; 193static signed long jiffies_scan_wait;
196/* enables or disables the task stacks scanning */ 194/* enables or disables the task stacks scanning */
197static int kmemleak_stack_scan; 195static int kmemleak_stack_scan = 1;
198/* mutex protecting the memory scanning */ 196/* protects the memory scanning, parameters and debug/kmemleak file access */
199static DEFINE_MUTEX(scan_mutex); 197static DEFINE_MUTEX(scan_mutex);
200/* mutex protecting the access to the /sys/kernel/debug/kmemleak file */
201static DEFINE_MUTEX(kmemleak_mutex);
202 198
203/* number of leaks reported (for limitation purposes) */ 199/* number of leaks reported (for limitation purposes) */
204static int reported_leaks; 200static int reported_leaks;
@@ -235,7 +231,7 @@ struct early_log {
235}; 231};
236 232
237/* early logging buffer and current position */ 233/* early logging buffer and current position */
238static struct early_log early_log[200]; 234static struct early_log early_log[CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE];
239static int crt_early_log; 235static int crt_early_log;
240 236
241static void kmemleak_disable(void); 237static void kmemleak_disable(void);
@@ -279,15 +275,6 @@ static int color_gray(const struct kmemleak_object *object)
279} 275}
280 276
281/* 277/*
282 * Objects are considered referenced if their color is gray and they have not
283 * been deleted.
284 */
285static int referenced_object(struct kmemleak_object *object)
286{
287 return (object->flags & OBJECT_ALLOCATED) && color_gray(object);
288}
289
290/*
291 * Objects are considered unreferenced only if their color is white, they have 278 * Objects are considered unreferenced only if their color is white, they have
292 * not be deleted and have a minimum age to avoid false positives caused by 279 * not be deleted and have a minimum age to avoid false positives caused by
293 * pointers temporarily stored in CPU registers. 280 * pointers temporarily stored in CPU registers.
@@ -295,42 +282,28 @@ static int referenced_object(struct kmemleak_object *object)
295static int unreferenced_object(struct kmemleak_object *object) 282static int unreferenced_object(struct kmemleak_object *object)
296{ 283{
297 return (object->flags & OBJECT_ALLOCATED) && color_white(object) && 284 return (object->flags & OBJECT_ALLOCATED) && color_white(object) &&
298 time_is_before_eq_jiffies(object->jiffies + jiffies_min_age); 285 time_before_eq(object->jiffies + jiffies_min_age,
286 jiffies_last_scan);
299} 287}
300 288
301/* 289/*
302 * Printing of the (un)referenced objects information, either to the seq file 290 * Printing of the unreferenced objects information to the seq file. The
303 * or to the kernel log. The print_referenced/print_unreferenced functions 291 * print_unreferenced function must be called with the object->lock held.
304 * must be called with the object->lock held.
305 */ 292 */
306#define print_helper(seq, x...) do { \
307 struct seq_file *s = (seq); \
308 if (s) \
309 seq_printf(s, x); \
310 else \
311 pr_info(x); \
312} while (0)
313
314static void print_referenced(struct kmemleak_object *object)
315{
316 pr_info("referenced object 0x%08lx (size %zu)\n",
317 object->pointer, object->size);
318}
319
320static void print_unreferenced(struct seq_file *seq, 293static void print_unreferenced(struct seq_file *seq,
321 struct kmemleak_object *object) 294 struct kmemleak_object *object)
322{ 295{
323 int i; 296 int i;
324 297
325 print_helper(seq, "unreferenced object 0x%08lx (size %zu):\n", 298 seq_printf(seq, "unreferenced object 0x%08lx (size %zu):\n",
326 object->pointer, object->size); 299 object->pointer, object->size);
327 print_helper(seq, " comm \"%s\", pid %d, jiffies %lu\n", 300 seq_printf(seq, " comm \"%s\", pid %d, jiffies %lu\n",
328 object->comm, object->pid, object->jiffies); 301 object->comm, object->pid, object->jiffies);
329 print_helper(seq, " backtrace:\n"); 302 seq_printf(seq, " backtrace:\n");
330 303
331 for (i = 0; i < object->trace_len; i++) { 304 for (i = 0; i < object->trace_len; i++) {
332 void *ptr = (void *)object->trace[i]; 305 void *ptr = (void *)object->trace[i];
333 print_helper(seq, " [<%p>] %pS\n", ptr, ptr); 306 seq_printf(seq, " [<%p>] %pS\n", ptr, ptr);
334 } 307 }
335} 308}
336 309
@@ -554,8 +527,10 @@ static void delete_object(unsigned long ptr)
554 write_lock_irqsave(&kmemleak_lock, flags); 527 write_lock_irqsave(&kmemleak_lock, flags);
555 object = lookup_object(ptr, 0); 528 object = lookup_object(ptr, 0);
556 if (!object) { 529 if (!object) {
530#ifdef DEBUG
557 kmemleak_warn("Freeing unknown object at 0x%08lx\n", 531 kmemleak_warn("Freeing unknown object at 0x%08lx\n",
558 ptr); 532 ptr);
533#endif
559 write_unlock_irqrestore(&kmemleak_lock, flags); 534 write_unlock_irqrestore(&kmemleak_lock, flags);
560 return; 535 return;
561 } 536 }
@@ -571,8 +546,6 @@ static void delete_object(unsigned long ptr)
571 * cannot be freed when it is being scanned. 546 * cannot be freed when it is being scanned.
572 */ 547 */
573 spin_lock_irqsave(&object->lock, flags); 548 spin_lock_irqsave(&object->lock, flags);
574 if (object->flags & OBJECT_REPORTED)
575 print_referenced(object);
576 object->flags &= ~OBJECT_ALLOCATED; 549 object->flags &= ~OBJECT_ALLOCATED;
577 spin_unlock_irqrestore(&object->lock, flags); 550 spin_unlock_irqrestore(&object->lock, flags);
578 put_object(object); 551 put_object(object);
@@ -696,7 +669,8 @@ static void log_early(int op_type, const void *ptr, size_t size,
696 struct early_log *log; 669 struct early_log *log;
697 670
698 if (crt_early_log >= ARRAY_SIZE(early_log)) { 671 if (crt_early_log >= ARRAY_SIZE(early_log)) {
699 kmemleak_stop("Early log buffer exceeded\n"); 672 pr_warning("Early log buffer exceeded\n");
673 kmemleak_disable();
700 return; 674 return;
701 } 675 }
702 676
@@ -808,21 +782,6 @@ void kmemleak_no_scan(const void *ptr)
808EXPORT_SYMBOL(kmemleak_no_scan); 782EXPORT_SYMBOL(kmemleak_no_scan);
809 783
810/* 784/*
811 * Yield the CPU so that other tasks get a chance to run. The yielding is
812 * rate-limited to avoid excessive number of calls to the schedule() function
813 * during memory scanning.
814 */
815static void scan_yield(void)
816{
817 might_sleep();
818
819 if (time_is_before_eq_jiffies(next_scan_yield)) {
820 schedule();
821 next_scan_yield = jiffies + jiffies_scan_yield;
822 }
823}
824
825/*
826 * Memory scanning is a long process and it needs to be interruptable. This 785 * Memory scanning is a long process and it needs to be interruptable. This
827 * function checks whether such interrupt condition occured. 786 * function checks whether such interrupt condition occured.
828 */ 787 */
@@ -862,15 +821,6 @@ static void scan_block(void *_start, void *_end,
862 if (scan_should_stop()) 821 if (scan_should_stop())
863 break; 822 break;
864 823
865 /*
866 * When scanning a memory block with a corresponding
867 * kmemleak_object, the CPU yielding is handled in the calling
868 * code since it holds the object->lock to avoid the block
869 * freeing.
870 */
871 if (!scanned)
872 scan_yield();
873
874 object = find_and_get_object(pointer, 1); 824 object = find_and_get_object(pointer, 1);
875 if (!object) 825 if (!object)
876 continue; 826 continue;
@@ -952,6 +902,9 @@ static void kmemleak_scan(void)
952 struct kmemleak_object *object, *tmp; 902 struct kmemleak_object *object, *tmp;
953 struct task_struct *task; 903 struct task_struct *task;
954 int i; 904 int i;
905 int new_leaks = 0;
906
907 jiffies_last_scan = jiffies;
955 908
956 /* prepare the kmemleak_object's */ 909 /* prepare the kmemleak_object's */
957 rcu_read_lock(); 910 rcu_read_lock();
@@ -1033,7 +986,7 @@ static void kmemleak_scan(void)
1033 */ 986 */
1034 object = list_entry(gray_list.next, typeof(*object), gray_list); 987 object = list_entry(gray_list.next, typeof(*object), gray_list);
1035 while (&object->gray_list != &gray_list) { 988 while (&object->gray_list != &gray_list) {
1036 scan_yield(); 989 cond_resched();
1037 990
1038 /* may add new objects to the list */ 991 /* may add new objects to the list */
1039 if (!scan_should_stop()) 992 if (!scan_should_stop())
@@ -1049,6 +1002,32 @@ static void kmemleak_scan(void)
1049 object = tmp; 1002 object = tmp;
1050 } 1003 }
1051 WARN_ON(!list_empty(&gray_list)); 1004 WARN_ON(!list_empty(&gray_list));
1005
1006 /*
1007 * If scanning was stopped do not report any new unreferenced objects.
1008 */
1009 if (scan_should_stop())
1010 return;
1011
1012 /*
1013 * Scanning result reporting.
1014 */
1015 rcu_read_lock();
1016 list_for_each_entry_rcu(object, &object_list, object_list) {
1017 spin_lock_irqsave(&object->lock, flags);
1018 if (unreferenced_object(object) &&
1019 !(object->flags & OBJECT_REPORTED)) {
1020 object->flags |= OBJECT_REPORTED;
1021 new_leaks++;
1022 }
1023 spin_unlock_irqrestore(&object->lock, flags);
1024 }
1025 rcu_read_unlock();
1026
1027 if (new_leaks)
1028 pr_info("%d new suspected memory leaks (see "
1029 "/sys/kernel/debug/kmemleak)\n", new_leaks);
1030
1052} 1031}
1053 1032
1054/* 1033/*
@@ -1070,36 +1049,12 @@ static int kmemleak_scan_thread(void *arg)
1070 } 1049 }
1071 1050
1072 while (!kthread_should_stop()) { 1051 while (!kthread_should_stop()) {
1073 struct kmemleak_object *object;
1074 signed long timeout = jiffies_scan_wait; 1052 signed long timeout = jiffies_scan_wait;
1075 1053
1076 mutex_lock(&scan_mutex); 1054 mutex_lock(&scan_mutex);
1077
1078 kmemleak_scan(); 1055 kmemleak_scan();
1079 reported_leaks = 0;
1080
1081 rcu_read_lock();
1082 list_for_each_entry_rcu(object, &object_list, object_list) {
1083 unsigned long flags;
1084
1085 if (reported_leaks >= REPORTS_NR)
1086 break;
1087 spin_lock_irqsave(&object->lock, flags);
1088 if (!(object->flags & OBJECT_REPORTED) &&
1089 unreferenced_object(object)) {
1090 print_unreferenced(NULL, object);
1091 object->flags |= OBJECT_REPORTED;
1092 reported_leaks++;
1093 } else if ((object->flags & OBJECT_REPORTED) &&
1094 referenced_object(object)) {
1095 print_referenced(object);
1096 object->flags &= ~OBJECT_REPORTED;
1097 }
1098 spin_unlock_irqrestore(&object->lock, flags);
1099 }
1100 rcu_read_unlock();
1101
1102 mutex_unlock(&scan_mutex); 1056 mutex_unlock(&scan_mutex);
1057
1103 /* wait before the next scan */ 1058 /* wait before the next scan */
1104 while (timeout && !kthread_should_stop()) 1059 while (timeout && !kthread_should_stop())
1105 timeout = schedule_timeout_interruptible(timeout); 1060 timeout = schedule_timeout_interruptible(timeout);
@@ -1112,7 +1067,7 @@ static int kmemleak_scan_thread(void *arg)
1112 1067
1113/* 1068/*
1114 * Start the automatic memory scanning thread. This function must be called 1069 * Start the automatic memory scanning thread. This function must be called
1115 * with the kmemleak_mutex held. 1070 * with the scan_mutex held.
1116 */ 1071 */
1117void start_scan_thread(void) 1072void start_scan_thread(void)
1118{ 1073{
@@ -1127,7 +1082,7 @@ void start_scan_thread(void)
1127 1082
1128/* 1083/*
1129 * Stop the automatic memory scanning thread. This function must be called 1084 * Stop the automatic memory scanning thread. This function must be called
1130 * with the kmemleak_mutex held. 1085 * with the scan_mutex held.
1131 */ 1086 */
1132void stop_scan_thread(void) 1087void stop_scan_thread(void)
1133{ 1088{
@@ -1147,10 +1102,8 @@ static void *kmemleak_seq_start(struct seq_file *seq, loff_t *pos)
1147 struct kmemleak_object *object; 1102 struct kmemleak_object *object;
1148 loff_t n = *pos; 1103 loff_t n = *pos;
1149 1104
1150 if (!n) { 1105 if (!n)
1151 kmemleak_scan();
1152 reported_leaks = 0; 1106 reported_leaks = 0;
1153 }
1154 if (reported_leaks >= REPORTS_NR) 1107 if (reported_leaks >= REPORTS_NR)
1155 return NULL; 1108 return NULL;
1156 1109
@@ -1211,11 +1164,10 @@ static int kmemleak_seq_show(struct seq_file *seq, void *v)
1211 unsigned long flags; 1164 unsigned long flags;
1212 1165
1213 spin_lock_irqsave(&object->lock, flags); 1166 spin_lock_irqsave(&object->lock, flags);
1214 if (!unreferenced_object(object)) 1167 if ((object->flags & OBJECT_REPORTED) && unreferenced_object(object)) {
1215 goto out; 1168 print_unreferenced(seq, object);
1216 print_unreferenced(seq, object); 1169 reported_leaks++;
1217 reported_leaks++; 1170 }
1218out:
1219 spin_unlock_irqrestore(&object->lock, flags); 1171 spin_unlock_irqrestore(&object->lock, flags);
1220 return 0; 1172 return 0;
1221} 1173}
@@ -1234,13 +1186,10 @@ static int kmemleak_open(struct inode *inode, struct file *file)
1234 if (!atomic_read(&kmemleak_enabled)) 1186 if (!atomic_read(&kmemleak_enabled))
1235 return -EBUSY; 1187 return -EBUSY;
1236 1188
1237 ret = mutex_lock_interruptible(&kmemleak_mutex); 1189 ret = mutex_lock_interruptible(&scan_mutex);
1238 if (ret < 0) 1190 if (ret < 0)
1239 goto out; 1191 goto out;
1240 if (file->f_mode & FMODE_READ) { 1192 if (file->f_mode & FMODE_READ) {
1241 ret = mutex_lock_interruptible(&scan_mutex);
1242 if (ret < 0)
1243 goto kmemleak_unlock;
1244 ret = seq_open(file, &kmemleak_seq_ops); 1193 ret = seq_open(file, &kmemleak_seq_ops);
1245 if (ret < 0) 1194 if (ret < 0)
1246 goto scan_unlock; 1195 goto scan_unlock;
@@ -1249,8 +1198,6 @@ static int kmemleak_open(struct inode *inode, struct file *file)
1249 1198
1250scan_unlock: 1199scan_unlock:
1251 mutex_unlock(&scan_mutex); 1200 mutex_unlock(&scan_mutex);
1252kmemleak_unlock:
1253 mutex_unlock(&kmemleak_mutex);
1254out: 1201out:
1255 return ret; 1202 return ret;
1256} 1203}
@@ -1259,11 +1206,9 @@ static int kmemleak_release(struct inode *inode, struct file *file)
1259{ 1206{
1260 int ret = 0; 1207 int ret = 0;
1261 1208
1262 if (file->f_mode & FMODE_READ) { 1209 if (file->f_mode & FMODE_READ)
1263 seq_release(inode, file); 1210 seq_release(inode, file);
1264 mutex_unlock(&scan_mutex); 1211 mutex_unlock(&scan_mutex);
1265 }
1266 mutex_unlock(&kmemleak_mutex);
1267 1212
1268 return ret; 1213 return ret;
1269} 1214}
@@ -1278,6 +1223,7 @@ static int kmemleak_release(struct inode *inode, struct file *file)
1278 * scan=off - stop the automatic memory scanning thread 1223 * scan=off - stop the automatic memory scanning thread
1279 * scan=... - set the automatic memory scanning period in seconds (0 to 1224 * scan=... - set the automatic memory scanning period in seconds (0 to
1280 * disable it) 1225 * disable it)
1226 * scan - trigger a memory scan
1281 */ 1227 */
1282static ssize_t kmemleak_write(struct file *file, const char __user *user_buf, 1228static ssize_t kmemleak_write(struct file *file, const char __user *user_buf,
1283 size_t size, loff_t *ppos) 1229 size_t size, loff_t *ppos)
@@ -1315,7 +1261,9 @@ static ssize_t kmemleak_write(struct file *file, const char __user *user_buf,
1315 jiffies_scan_wait = msecs_to_jiffies(secs * 1000); 1261 jiffies_scan_wait = msecs_to_jiffies(secs * 1000);
1316 start_scan_thread(); 1262 start_scan_thread();
1317 } 1263 }
1318 } else 1264 } else if (strncmp(buf, "scan", 4) == 0)
1265 kmemleak_scan();
1266 else
1319 return -EINVAL; 1267 return -EINVAL;
1320 1268
1321 /* ignore the rest of the buffer, only one command at a time */ 1269 /* ignore the rest of the buffer, only one command at a time */
@@ -1340,11 +1288,9 @@ static int kmemleak_cleanup_thread(void *arg)
1340{ 1288{
1341 struct kmemleak_object *object; 1289 struct kmemleak_object *object;
1342 1290
1343 mutex_lock(&kmemleak_mutex); 1291 mutex_lock(&scan_mutex);
1344 stop_scan_thread(); 1292 stop_scan_thread();
1345 mutex_unlock(&kmemleak_mutex);
1346 1293
1347 mutex_lock(&scan_mutex);
1348 rcu_read_lock(); 1294 rcu_read_lock();
1349 list_for_each_entry_rcu(object, &object_list, object_list) 1295 list_for_each_entry_rcu(object, &object_list, object_list)
1350 delete_object(object->pointer); 1296 delete_object(object->pointer);
@@ -1411,7 +1357,6 @@ void __init kmemleak_init(void)
1411 int i; 1357 int i;
1412 unsigned long flags; 1358 unsigned long flags;
1413 1359
1414 jiffies_scan_yield = msecs_to_jiffies(MSECS_SCAN_YIELD);
1415 jiffies_min_age = msecs_to_jiffies(MSECS_MIN_AGE); 1360 jiffies_min_age = msecs_to_jiffies(MSECS_MIN_AGE);
1416 jiffies_scan_wait = msecs_to_jiffies(SECS_SCAN_WAIT * 1000); 1361 jiffies_scan_wait = msecs_to_jiffies(SECS_SCAN_WAIT * 1000);
1417 1362
@@ -1486,9 +1431,9 @@ static int __init kmemleak_late_init(void)
1486 &kmemleak_fops); 1431 &kmemleak_fops);
1487 if (!dentry) 1432 if (!dentry)
1488 pr_warning("Failed to create the debugfs kmemleak file\n"); 1433 pr_warning("Failed to create the debugfs kmemleak file\n");
1489 mutex_lock(&kmemleak_mutex); 1434 mutex_lock(&scan_mutex);
1490 start_scan_thread(); 1435 start_scan_thread();
1491 mutex_unlock(&kmemleak_mutex); 1436 mutex_unlock(&scan_mutex);
1492 1437
1493 pr_info("Kernel memory leak detector initialized\n"); 1438 pr_info("Kernel memory leak detector initialized\n");
1494 1439
diff --git a/mm/memory.c b/mm/memory.c
index f46ac18ba231..65216194eb8d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1207,8 +1207,8 @@ static inline int use_zero_page(struct vm_area_struct *vma)
1207 1207
1208 1208
1209int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 1209int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1210 unsigned long start, int len, int flags, 1210 unsigned long start, int nr_pages, int flags,
1211 struct page **pages, struct vm_area_struct **vmas) 1211 struct page **pages, struct vm_area_struct **vmas)
1212{ 1212{
1213 int i; 1213 int i;
1214 unsigned int vm_flags = 0; 1214 unsigned int vm_flags = 0;
@@ -1217,7 +1217,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1217 int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS); 1217 int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
1218 int ignore_sigkill = !!(flags & GUP_FLAGS_IGNORE_SIGKILL); 1218 int ignore_sigkill = !!(flags & GUP_FLAGS_IGNORE_SIGKILL);
1219 1219
1220 if (len <= 0) 1220 if (nr_pages <= 0)
1221 return 0; 1221 return 0;
1222 /* 1222 /*
1223 * Require read or write permissions. 1223 * Require read or write permissions.
@@ -1269,7 +1269,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1269 vmas[i] = gate_vma; 1269 vmas[i] = gate_vma;
1270 i++; 1270 i++;
1271 start += PAGE_SIZE; 1271 start += PAGE_SIZE;
1272 len--; 1272 nr_pages--;
1273 continue; 1273 continue;
1274 } 1274 }
1275 1275
@@ -1280,7 +1280,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1280 1280
1281 if (is_vm_hugetlb_page(vma)) { 1281 if (is_vm_hugetlb_page(vma)) {
1282 i = follow_hugetlb_page(mm, vma, pages, vmas, 1282 i = follow_hugetlb_page(mm, vma, pages, vmas,
1283 &start, &len, i, write); 1283 &start, &nr_pages, i, write);
1284 continue; 1284 continue;
1285 } 1285 }
1286 1286
@@ -1357,9 +1357,9 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1357 vmas[i] = vma; 1357 vmas[i] = vma;
1358 i++; 1358 i++;
1359 start += PAGE_SIZE; 1359 start += PAGE_SIZE;
1360 len--; 1360 nr_pages--;
1361 } while (len && start < vma->vm_end); 1361 } while (nr_pages && start < vma->vm_end);
1362 } while (len); 1362 } while (nr_pages);
1363 return i; 1363 return i;
1364} 1364}
1365 1365
@@ -1368,7 +1368,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1368 * @tsk: task_struct of target task 1368 * @tsk: task_struct of target task
1369 * @mm: mm_struct of target mm 1369 * @mm: mm_struct of target mm
1370 * @start: starting user address 1370 * @start: starting user address
1371 * @len: number of pages from start to pin 1371 * @nr_pages: number of pages from start to pin
1372 * @write: whether pages will be written to by the caller 1372 * @write: whether pages will be written to by the caller
1373 * @force: whether to force write access even if user mapping is 1373 * @force: whether to force write access even if user mapping is
1374 * readonly. This will result in the page being COWed even 1374 * readonly. This will result in the page being COWed even
@@ -1380,7 +1380,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1380 * Or NULL if the caller does not require them. 1380 * Or NULL if the caller does not require them.
1381 * 1381 *
1382 * Returns number of pages pinned. This may be fewer than the number 1382 * Returns number of pages pinned. This may be fewer than the number
1383 * requested. If len is 0 or negative, returns 0. If no pages 1383 * requested. If nr_pages is 0 or negative, returns 0. If no pages
1384 * were pinned, returns -errno. Each page returned must be released 1384 * were pinned, returns -errno. Each page returned must be released
1385 * with a put_page() call when it is finished with. vmas will only 1385 * with a put_page() call when it is finished with. vmas will only
1386 * remain valid while mmap_sem is held. 1386 * remain valid while mmap_sem is held.
@@ -1414,7 +1414,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1414 * See also get_user_pages_fast, for performance critical applications. 1414 * See also get_user_pages_fast, for performance critical applications.
1415 */ 1415 */
1416int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 1416int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1417 unsigned long start, int len, int write, int force, 1417 unsigned long start, int nr_pages, int write, int force,
1418 struct page **pages, struct vm_area_struct **vmas) 1418 struct page **pages, struct vm_area_struct **vmas)
1419{ 1419{
1420 int flags = 0; 1420 int flags = 0;
@@ -1424,9 +1424,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1424 if (force) 1424 if (force)
1425 flags |= GUP_FLAGS_FORCE; 1425 flags |= GUP_FLAGS_FORCE;
1426 1426
1427 return __get_user_pages(tsk, mm, 1427 return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
1428 start, len, flags,
1429 pages, vmas);
1430} 1428}
1431 1429
1432EXPORT_SYMBOL(get_user_pages); 1430EXPORT_SYMBOL(get_user_pages);
diff --git a/mm/nommu.c b/mm/nommu.c
index 2fd2ad5da98e..53cab10fece4 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -173,8 +173,8 @@ unsigned int kobjsize(const void *objp)
173} 173}
174 174
175int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 175int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
176 unsigned long start, int len, int flags, 176 unsigned long start, int nr_pages, int flags,
177 struct page **pages, struct vm_area_struct **vmas) 177 struct page **pages, struct vm_area_struct **vmas)
178{ 178{
179 struct vm_area_struct *vma; 179 struct vm_area_struct *vma;
180 unsigned long vm_flags; 180 unsigned long vm_flags;
@@ -189,7 +189,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
189 vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); 189 vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
190 vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); 190 vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
191 191
192 for (i = 0; i < len; i++) { 192 for (i = 0; i < nr_pages; i++) {
193 vma = find_vma(mm, start); 193 vma = find_vma(mm, start);
194 if (!vma) 194 if (!vma)
195 goto finish_or_fault; 195 goto finish_or_fault;
@@ -224,7 +224,7 @@ finish_or_fault:
224 * - don't permit access to VMAs that don't support it, such as I/O mappings 224 * - don't permit access to VMAs that don't support it, such as I/O mappings
225 */ 225 */
226int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 226int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
227 unsigned long start, int len, int write, int force, 227 unsigned long start, int nr_pages, int write, int force,
228 struct page **pages, struct vm_area_struct **vmas) 228 struct page **pages, struct vm_area_struct **vmas)
229{ 229{
230 int flags = 0; 230 int flags = 0;
@@ -234,12 +234,31 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
234 if (force) 234 if (force)
235 flags |= GUP_FLAGS_FORCE; 235 flags |= GUP_FLAGS_FORCE;
236 236
237 return __get_user_pages(tsk, mm, 237 return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
238 start, len, flags,
239 pages, vmas);
240} 238}
241EXPORT_SYMBOL(get_user_pages); 239EXPORT_SYMBOL(get_user_pages);
242 240
241/**
242 * follow_pfn - look up PFN at a user virtual address
243 * @vma: memory mapping
244 * @address: user virtual address
245 * @pfn: location to store found PFN
246 *
247 * Only IO mappings and raw PFN mappings are allowed.
248 *
249 * Returns zero and the pfn at @pfn on success, -ve otherwise.
250 */
251int follow_pfn(struct vm_area_struct *vma, unsigned long address,
252 unsigned long *pfn)
253{
254 if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
255 return -EINVAL;
256
257 *pfn = address >> PAGE_SHIFT;
258 return 0;
259}
260EXPORT_SYMBOL(follow_pfn);
261
243DEFINE_RWLOCK(vmlist_lock); 262DEFINE_RWLOCK(vmlist_lock);
244struct vm_struct *vmlist; 263struct vm_struct *vmlist;
245 264
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 7b0dcea4935b..7687879253b9 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -541,8 +541,11 @@ static void balance_dirty_pages(struct address_space *mapping)
541 * filesystems (i.e. NFS) in which data may have been 541 * filesystems (i.e. NFS) in which data may have been
542 * written to the server's write cache, but has not yet 542 * written to the server's write cache, but has not yet
543 * been flushed to permanent storage. 543 * been flushed to permanent storage.
544 * Only move pages to writeback if this bdi is over its
545 * threshold otherwise wait until the disk writes catch
546 * up.
544 */ 547 */
545 if (bdi_nr_reclaimable) { 548 if (bdi_nr_reclaimable > bdi_thresh) {
546 writeback_inodes(&wbc); 549 writeback_inodes(&wbc);
547 pages_written += write_chunk - wbc.nr_to_write; 550 pages_written += write_chunk - wbc.nr_to_write;
548 get_dirty_limits(&background_thresh, &dirty_thresh, 551 get_dirty_limits(&background_thresh, &dirty_thresh,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5d714f8fb303..e0f2cdf9d8b1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4032,6 +4032,8 @@ static void __init find_zone_movable_pfns_for_nodes(unsigned long *movable_pfn)
4032 int i, nid; 4032 int i, nid;
4033 unsigned long usable_startpfn; 4033 unsigned long usable_startpfn;
4034 unsigned long kernelcore_node, kernelcore_remaining; 4034 unsigned long kernelcore_node, kernelcore_remaining;
4035 /* save the state before borrow the nodemask */
4036 nodemask_t saved_node_state = node_states[N_HIGH_MEMORY];
4035 unsigned long totalpages = early_calculate_totalpages(); 4037 unsigned long totalpages = early_calculate_totalpages();
4036 int usable_nodes = nodes_weight(node_states[N_HIGH_MEMORY]); 4038 int usable_nodes = nodes_weight(node_states[N_HIGH_MEMORY]);
4037 4039
@@ -4059,7 +4061,7 @@ static void __init find_zone_movable_pfns_for_nodes(unsigned long *movable_pfn)
4059 4061
4060 /* If kernelcore was not specified, there is no ZONE_MOVABLE */ 4062 /* If kernelcore was not specified, there is no ZONE_MOVABLE */
4061 if (!required_kernelcore) 4063 if (!required_kernelcore)
4062 return; 4064 goto out;
4063 4065
4064 /* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */ 4066 /* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */
4065 find_usable_zone_for_movable(); 4067 find_usable_zone_for_movable();
@@ -4158,6 +4160,10 @@ restart:
4158 for (nid = 0; nid < MAX_NUMNODES; nid++) 4160 for (nid = 0; nid < MAX_NUMNODES; nid++)
4159 zone_movable_pfn[nid] = 4161 zone_movable_pfn[nid] =
4160 roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES); 4162 roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES);
4163
4164out:
4165 /* restore the node_state */
4166 node_states[N_HIGH_MEMORY] = saved_node_state;
4161} 4167}
4162 4168
4163/* Any regular memory on that node ? */ 4169/* Any regular memory on that node ? */
@@ -4242,11 +4248,6 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
4242 early_node_map[i].start_pfn, 4248 early_node_map[i].start_pfn,
4243 early_node_map[i].end_pfn); 4249 early_node_map[i].end_pfn);
4244 4250
4245 /*
4246 * find_zone_movable_pfns_for_nodes/early_calculate_totalpages init
4247 * that node_mask, clear it at first
4248 */
4249 nodes_clear(node_states[N_HIGH_MEMORY]);
4250 /* Initialise every node */ 4251 /* Initialise every node */
4251 mminit_verify_pageflags_layout(); 4252 mminit_verify_pageflags_layout();
4252 setup_nr_node_ids(); 4253 setup_nr_node_ids();
diff --git a/mm/percpu.c b/mm/percpu.c
index c0b2c1a76e81..b70f2acd8853 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -549,14 +549,14 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme)
549 * @chunk: chunk of interest 549 * @chunk: chunk of interest
550 * @page_start: page index of the first page to unmap 550 * @page_start: page index of the first page to unmap
551 * @page_end: page index of the last page to unmap + 1 551 * @page_end: page index of the last page to unmap + 1
552 * @flush: whether to flush cache and tlb or not 552 * @flush_tlb: whether to flush tlb or not
553 * 553 *
554 * For each cpu, unmap pages [@page_start,@page_end) out of @chunk. 554 * For each cpu, unmap pages [@page_start,@page_end) out of @chunk.
555 * If @flush is true, vcache is flushed before unmapping and tlb 555 * If @flush is true, vcache is flushed before unmapping and tlb
556 * after. 556 * after.
557 */ 557 */
558static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end, 558static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end,
559 bool flush) 559 bool flush_tlb)
560{ 560{
561 unsigned int last = num_possible_cpus() - 1; 561 unsigned int last = num_possible_cpus() - 1;
562 unsigned int cpu; 562 unsigned int cpu;
@@ -569,9 +569,8 @@ static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end,
569 * the whole region at once rather than doing it for each cpu. 569 * the whole region at once rather than doing it for each cpu.
570 * This could be an overkill but is more scalable. 570 * This could be an overkill but is more scalable.
571 */ 571 */
572 if (flush) 572 flush_cache_vunmap(pcpu_chunk_addr(chunk, 0, page_start),
573 flush_cache_vunmap(pcpu_chunk_addr(chunk, 0, page_start), 573 pcpu_chunk_addr(chunk, last, page_end));
574 pcpu_chunk_addr(chunk, last, page_end));
575 574
576 for_each_possible_cpu(cpu) 575 for_each_possible_cpu(cpu)
577 unmap_kernel_range_noflush( 576 unmap_kernel_range_noflush(
@@ -579,7 +578,7 @@ static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end,
579 (page_end - page_start) << PAGE_SHIFT); 578 (page_end - page_start) << PAGE_SHIFT);
580 579
581 /* ditto as flush_cache_vunmap() */ 580 /* ditto as flush_cache_vunmap() */
582 if (flush) 581 if (flush_tlb)
583 flush_tlb_kernel_range(pcpu_chunk_addr(chunk, 0, page_start), 582 flush_tlb_kernel_range(pcpu_chunk_addr(chunk, 0, page_start),
584 pcpu_chunk_addr(chunk, last, page_end)); 583 pcpu_chunk_addr(chunk, last, page_end));
585} 584}
@@ -1234,6 +1233,7 @@ static struct page * __init pcpue_get_page(unsigned int cpu, int pageno)
1234ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, 1233ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
1235 ssize_t dyn_size, ssize_t unit_size) 1234 ssize_t dyn_size, ssize_t unit_size)
1236{ 1235{
1236 size_t chunk_size;
1237 unsigned int cpu; 1237 unsigned int cpu;
1238 1238
1239 /* determine parameters and allocate */ 1239 /* determine parameters and allocate */
@@ -1248,11 +1248,15 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
1248 } else 1248 } else
1249 pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); 1249 pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE);
1250 1250
1251 pcpue_ptr = __alloc_bootmem_nopanic( 1251 chunk_size = pcpue_unit_size * num_possible_cpus();
1252 num_possible_cpus() * pcpue_unit_size, 1252
1253 PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); 1253 pcpue_ptr = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE,
1254 if (!pcpue_ptr) 1254 __pa(MAX_DMA_ADDRESS));
1255 if (!pcpue_ptr) {
1256 pr_warning("PERCPU: failed to allocate %zu bytes for "
1257 "embedding\n", chunk_size);
1255 return -ENOMEM; 1258 return -ENOMEM;
1259 }
1256 1260
1257 /* return the leftover and copy */ 1261 /* return the leftover and copy */
1258 for_each_possible_cpu(cpu) { 1262 for_each_possible_cpu(cpu) {