aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig4
-rw-r--r--mm/ksm.c10
-rw-r--r--mm/memcontrol.c127
-rw-r--r--mm/percpu.c83
-rw-r--r--mm/rmap.c4
-rw-r--r--mm/swapfile.c12
-rw-r--r--mm/vmalloc.c1
7 files changed, 140 insertions, 101 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index edd300aca173..57963c6063d1 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -224,7 +224,9 @@ config KSM
224 the many instances by a single resident page with that content, so 224 the many instances by a single resident page with that content, so
225 saving memory until one or another app needs to modify the content. 225 saving memory until one or another app needs to modify the content.
226 Recommended for use with KVM, or with other duplicative applications. 226 Recommended for use with KVM, or with other duplicative applications.
227 See Documentation/vm/ksm.txt for more information. 227 See Documentation/vm/ksm.txt for more information: KSM is inactive
228 until a program has madvised that an area is MADV_MERGEABLE, and
229 root has set /sys/kernel/mm/ksm/run to 1 (if CONFIG_SYSFS is set).
228 230
229config DEFAULT_MMAP_MIN_ADDR 231config DEFAULT_MMAP_MIN_ADDR
230 int "Low address space to protect from user allocation" 232 int "Low address space to protect from user allocation"
diff --git a/mm/ksm.c b/mm/ksm.c
index f7edac356f46..bef1af4f77e3 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -184,11 +184,6 @@ static DEFINE_SPINLOCK(ksm_mmlist_lock);
184 sizeof(struct __struct), __alignof__(struct __struct),\ 184 sizeof(struct __struct), __alignof__(struct __struct),\
185 (__flags), NULL) 185 (__flags), NULL)
186 186
187static void __init ksm_init_max_kernel_pages(void)
188{
189 ksm_max_kernel_pages = nr_free_buffer_pages() / 4;
190}
191
192static int __init ksm_slab_init(void) 187static int __init ksm_slab_init(void)
193{ 188{
194 rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0); 189 rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0);
@@ -1673,7 +1668,7 @@ static int __init ksm_init(void)
1673 struct task_struct *ksm_thread; 1668 struct task_struct *ksm_thread;
1674 int err; 1669 int err;
1675 1670
1676 ksm_init_max_kernel_pages(); 1671 ksm_max_kernel_pages = totalram_pages / 4;
1677 1672
1678 err = ksm_slab_init(); 1673 err = ksm_slab_init();
1679 if (err) 1674 if (err)
@@ -1697,6 +1692,9 @@ static int __init ksm_init(void)
1697 kthread_stop(ksm_thread); 1692 kthread_stop(ksm_thread);
1698 goto out_free2; 1693 goto out_free2;
1699 } 1694 }
1695#else
1696 ksm_run = KSM_RUN_MERGE; /* no way for user to start it */
1697
1700#endif /* CONFIG_SYSFS */ 1698#endif /* CONFIG_SYSFS */
1701 1699
1702 return 0; 1700 return 0;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e2b98a6875c0..f99f5991d6bb 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -313,7 +313,8 @@ soft_limit_tree_from_page(struct page *page)
313static void 313static void
314__mem_cgroup_insert_exceeded(struct mem_cgroup *mem, 314__mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
315 struct mem_cgroup_per_zone *mz, 315 struct mem_cgroup_per_zone *mz,
316 struct mem_cgroup_tree_per_zone *mctz) 316 struct mem_cgroup_tree_per_zone *mctz,
317 unsigned long long new_usage_in_excess)
317{ 318{
318 struct rb_node **p = &mctz->rb_root.rb_node; 319 struct rb_node **p = &mctz->rb_root.rb_node;
319 struct rb_node *parent = NULL; 320 struct rb_node *parent = NULL;
@@ -322,7 +323,9 @@ __mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
322 if (mz->on_tree) 323 if (mz->on_tree)
323 return; 324 return;
324 325
325 mz->usage_in_excess = res_counter_soft_limit_excess(&mem->res); 326 mz->usage_in_excess = new_usage_in_excess;
327 if (!mz->usage_in_excess)
328 return;
326 while (*p) { 329 while (*p) {
327 parent = *p; 330 parent = *p;
328 mz_node = rb_entry(parent, struct mem_cgroup_per_zone, 331 mz_node = rb_entry(parent, struct mem_cgroup_per_zone,
@@ -353,16 +356,6 @@ __mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
353} 356}
354 357
355static void 358static void
356mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
357 struct mem_cgroup_per_zone *mz,
358 struct mem_cgroup_tree_per_zone *mctz)
359{
360 spin_lock(&mctz->lock);
361 __mem_cgroup_insert_exceeded(mem, mz, mctz);
362 spin_unlock(&mctz->lock);
363}
364
365static void
366mem_cgroup_remove_exceeded(struct mem_cgroup *mem, 359mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
367 struct mem_cgroup_per_zone *mz, 360 struct mem_cgroup_per_zone *mz,
368 struct mem_cgroup_tree_per_zone *mctz) 361 struct mem_cgroup_tree_per_zone *mctz)
@@ -392,34 +385,36 @@ static bool mem_cgroup_soft_limit_check(struct mem_cgroup *mem)
392 385
393static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page) 386static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page)
394{ 387{
395 unsigned long long prev_usage_in_excess, new_usage_in_excess; 388 unsigned long long excess;
396 bool updated_tree = false;
397 struct mem_cgroup_per_zone *mz; 389 struct mem_cgroup_per_zone *mz;
398 struct mem_cgroup_tree_per_zone *mctz; 390 struct mem_cgroup_tree_per_zone *mctz;
399 391 int nid = page_to_nid(page);
400 mz = mem_cgroup_zoneinfo(mem, page_to_nid(page), page_zonenum(page)); 392 int zid = page_zonenum(page);
401 mctz = soft_limit_tree_from_page(page); 393 mctz = soft_limit_tree_from_page(page);
402 394
403 /* 395 /*
404 * We do updates in lazy mode, mem's are removed 396 * Necessary to update all ancestors when hierarchy is used.
405 * lazily from the per-zone, per-node rb tree 397 * because their event counter is not touched.
406 */ 398 */
407 prev_usage_in_excess = mz->usage_in_excess; 399 for (; mem; mem = parent_mem_cgroup(mem)) {
408 400 mz = mem_cgroup_zoneinfo(mem, nid, zid);
409 new_usage_in_excess = res_counter_soft_limit_excess(&mem->res); 401 excess = res_counter_soft_limit_excess(&mem->res);
410 if (prev_usage_in_excess) { 402 /*
411 mem_cgroup_remove_exceeded(mem, mz, mctz); 403 * We have to update the tree if mz is on RB-tree or
412 updated_tree = true; 404 * mem is over its softlimit.
413 } 405 */
414 if (!new_usage_in_excess) 406 if (excess || mz->on_tree) {
415 goto done; 407 spin_lock(&mctz->lock);
416 mem_cgroup_insert_exceeded(mem, mz, mctz); 408 /* if on-tree, remove it */
417 409 if (mz->on_tree)
418done: 410 __mem_cgroup_remove_exceeded(mem, mz, mctz);
419 if (updated_tree) { 411 /*
420 spin_lock(&mctz->lock); 412 * Insert again. mz->usage_in_excess will be updated.
421 mz->usage_in_excess = new_usage_in_excess; 413 * If excess is 0, no tree ops.
422 spin_unlock(&mctz->lock); 414 */
415 __mem_cgroup_insert_exceeded(mem, mz, mctz, excess);
416 spin_unlock(&mctz->lock);
417 }
423 } 418 }
424} 419}
425 420
@@ -447,9 +442,10 @@ static struct mem_cgroup_per_zone *
447__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) 442__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
448{ 443{
449 struct rb_node *rightmost = NULL; 444 struct rb_node *rightmost = NULL;
450 struct mem_cgroup_per_zone *mz = NULL; 445 struct mem_cgroup_per_zone *mz;
451 446
452retry: 447retry:
448 mz = NULL;
453 rightmost = rb_last(&mctz->rb_root); 449 rightmost = rb_last(&mctz->rb_root);
454 if (!rightmost) 450 if (!rightmost)
455 goto done; /* Nothing to reclaim from */ 451 goto done; /* Nothing to reclaim from */
@@ -1270,9 +1266,9 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
1270 gfp_t gfp_mask, struct mem_cgroup **memcg, 1266 gfp_t gfp_mask, struct mem_cgroup **memcg,
1271 bool oom, struct page *page) 1267 bool oom, struct page *page)
1272{ 1268{
1273 struct mem_cgroup *mem, *mem_over_limit, *mem_over_soft_limit; 1269 struct mem_cgroup *mem, *mem_over_limit;
1274 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; 1270 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
1275 struct res_counter *fail_res, *soft_fail_res = NULL; 1271 struct res_counter *fail_res;
1276 1272
1277 if (unlikely(test_thread_flag(TIF_MEMDIE))) { 1273 if (unlikely(test_thread_flag(TIF_MEMDIE))) {
1278 /* Don't account this! */ 1274 /* Don't account this! */
@@ -1304,17 +1300,16 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
1304 1300
1305 if (mem_cgroup_is_root(mem)) 1301 if (mem_cgroup_is_root(mem))
1306 goto done; 1302 goto done;
1307 ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res, 1303 ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res);
1308 &soft_fail_res);
1309 if (likely(!ret)) { 1304 if (likely(!ret)) {
1310 if (!do_swap_account) 1305 if (!do_swap_account)
1311 break; 1306 break;
1312 ret = res_counter_charge(&mem->memsw, PAGE_SIZE, 1307 ret = res_counter_charge(&mem->memsw, PAGE_SIZE,
1313 &fail_res, NULL); 1308 &fail_res);
1314 if (likely(!ret)) 1309 if (likely(!ret))
1315 break; 1310 break;
1316 /* mem+swap counter fails */ 1311 /* mem+swap counter fails */
1317 res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); 1312 res_counter_uncharge(&mem->res, PAGE_SIZE);
1318 flags |= MEM_CGROUP_RECLAIM_NOSWAP; 1313 flags |= MEM_CGROUP_RECLAIM_NOSWAP;
1319 mem_over_limit = mem_cgroup_from_res_counter(fail_res, 1314 mem_over_limit = mem_cgroup_from_res_counter(fail_res,
1320 memsw); 1315 memsw);
@@ -1353,16 +1348,11 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
1353 } 1348 }
1354 } 1349 }
1355 /* 1350 /*
1356 * Insert just the ancestor, we should trickle down to the correct 1351 * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
1357 * cgroup for reclaim, since the other nodes will be below their 1352 * if they exceeds softlimit.
1358 * soft limit
1359 */ 1353 */
1360 if (soft_fail_res) { 1354 if (mem_cgroup_soft_limit_check(mem))
1361 mem_over_soft_limit = 1355 mem_cgroup_update_tree(mem, page);
1362 mem_cgroup_from_res_counter(soft_fail_res, res);
1363 if (mem_cgroup_soft_limit_check(mem_over_soft_limit))
1364 mem_cgroup_update_tree(mem_over_soft_limit, page);
1365 }
1366done: 1356done:
1367 return 0; 1357 return 0;
1368nomem: 1358nomem:
@@ -1437,10 +1427,9 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
1437 if (unlikely(PageCgroupUsed(pc))) { 1427 if (unlikely(PageCgroupUsed(pc))) {
1438 unlock_page_cgroup(pc); 1428 unlock_page_cgroup(pc);
1439 if (!mem_cgroup_is_root(mem)) { 1429 if (!mem_cgroup_is_root(mem)) {
1440 res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); 1430 res_counter_uncharge(&mem->res, PAGE_SIZE);
1441 if (do_swap_account) 1431 if (do_swap_account)
1442 res_counter_uncharge(&mem->memsw, PAGE_SIZE, 1432 res_counter_uncharge(&mem->memsw, PAGE_SIZE);
1443 NULL);
1444 } 1433 }
1445 css_put(&mem->css); 1434 css_put(&mem->css);
1446 return; 1435 return;
@@ -1519,7 +1508,7 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
1519 goto out; 1508 goto out;
1520 1509
1521 if (!mem_cgroup_is_root(from)) 1510 if (!mem_cgroup_is_root(from))
1522 res_counter_uncharge(&from->res, PAGE_SIZE, NULL); 1511 res_counter_uncharge(&from->res, PAGE_SIZE);
1523 mem_cgroup_charge_statistics(from, pc, false); 1512 mem_cgroup_charge_statistics(from, pc, false);
1524 1513
1525 page = pc->page; 1514 page = pc->page;
@@ -1539,7 +1528,7 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
1539 } 1528 }
1540 1529
1541 if (do_swap_account && !mem_cgroup_is_root(from)) 1530 if (do_swap_account && !mem_cgroup_is_root(from))
1542 res_counter_uncharge(&from->memsw, PAGE_SIZE, NULL); 1531 res_counter_uncharge(&from->memsw, PAGE_SIZE);
1543 css_put(&from->css); 1532 css_put(&from->css);
1544 1533
1545 css_get(&to->css); 1534 css_get(&to->css);
@@ -1610,9 +1599,9 @@ uncharge:
1610 css_put(&parent->css); 1599 css_put(&parent->css);
1611 /* uncharge if move fails */ 1600 /* uncharge if move fails */
1612 if (!mem_cgroup_is_root(parent)) { 1601 if (!mem_cgroup_is_root(parent)) {
1613 res_counter_uncharge(&parent->res, PAGE_SIZE, NULL); 1602 res_counter_uncharge(&parent->res, PAGE_SIZE);
1614 if (do_swap_account) 1603 if (do_swap_account)
1615 res_counter_uncharge(&parent->memsw, PAGE_SIZE, NULL); 1604 res_counter_uncharge(&parent->memsw, PAGE_SIZE);
1616 } 1605 }
1617 return ret; 1606 return ret;
1618} 1607}
@@ -1803,8 +1792,7 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
1803 * calling css_tryget 1792 * calling css_tryget
1804 */ 1793 */
1805 if (!mem_cgroup_is_root(memcg)) 1794 if (!mem_cgroup_is_root(memcg))
1806 res_counter_uncharge(&memcg->memsw, PAGE_SIZE, 1795 res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
1807 NULL);
1808 mem_cgroup_swap_statistics(memcg, false); 1796 mem_cgroup_swap_statistics(memcg, false);
1809 mem_cgroup_put(memcg); 1797 mem_cgroup_put(memcg);
1810 } 1798 }
@@ -1831,9 +1819,9 @@ void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem)
1831 if (!mem) 1819 if (!mem)
1832 return; 1820 return;
1833 if (!mem_cgroup_is_root(mem)) { 1821 if (!mem_cgroup_is_root(mem)) {
1834 res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); 1822 res_counter_uncharge(&mem->res, PAGE_SIZE);
1835 if (do_swap_account) 1823 if (do_swap_account)
1836 res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL); 1824 res_counter_uncharge(&mem->memsw, PAGE_SIZE);
1837 } 1825 }
1838 css_put(&mem->css); 1826 css_put(&mem->css);
1839} 1827}
@@ -1848,7 +1836,6 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
1848 struct page_cgroup *pc; 1836 struct page_cgroup *pc;
1849 struct mem_cgroup *mem = NULL; 1837 struct mem_cgroup *mem = NULL;
1850 struct mem_cgroup_per_zone *mz; 1838 struct mem_cgroup_per_zone *mz;
1851 bool soft_limit_excess = false;
1852 1839
1853 if (mem_cgroup_disabled()) 1840 if (mem_cgroup_disabled())
1854 return NULL; 1841 return NULL;
@@ -1888,10 +1875,10 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
1888 } 1875 }
1889 1876
1890 if (!mem_cgroup_is_root(mem)) { 1877 if (!mem_cgroup_is_root(mem)) {
1891 res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess); 1878 res_counter_uncharge(&mem->res, PAGE_SIZE);
1892 if (do_swap_account && 1879 if (do_swap_account &&
1893 (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)) 1880 (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
1894 res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL); 1881 res_counter_uncharge(&mem->memsw, PAGE_SIZE);
1895 } 1882 }
1896 if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) 1883 if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
1897 mem_cgroup_swap_statistics(mem, true); 1884 mem_cgroup_swap_statistics(mem, true);
@@ -1908,7 +1895,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
1908 mz = page_cgroup_zoneinfo(pc); 1895 mz = page_cgroup_zoneinfo(pc);
1909 unlock_page_cgroup(pc); 1896 unlock_page_cgroup(pc);
1910 1897
1911 if (soft_limit_excess && mem_cgroup_soft_limit_check(mem)) 1898 if (mem_cgroup_soft_limit_check(mem))
1912 mem_cgroup_update_tree(mem, page); 1899 mem_cgroup_update_tree(mem, page);
1913 /* at swapout, this memcg will be accessed to record to swap */ 1900 /* at swapout, this memcg will be accessed to record to swap */
1914 if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT) 1901 if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
@@ -1986,7 +1973,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
1986 * This memcg can be obsolete one. We avoid calling css_tryget 1973 * This memcg can be obsolete one. We avoid calling css_tryget
1987 */ 1974 */
1988 if (!mem_cgroup_is_root(memcg)) 1975 if (!mem_cgroup_is_root(memcg))
1989 res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL); 1976 res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
1990 mem_cgroup_swap_statistics(memcg, false); 1977 mem_cgroup_swap_statistics(memcg, false);
1991 mem_cgroup_put(memcg); 1978 mem_cgroup_put(memcg);
1992 } 1979 }
@@ -2233,6 +2220,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
2233 unsigned long reclaimed; 2220 unsigned long reclaimed;
2234 int loop = 0; 2221 int loop = 0;
2235 struct mem_cgroup_tree_per_zone *mctz; 2222 struct mem_cgroup_tree_per_zone *mctz;
2223 unsigned long long excess;
2236 2224
2237 if (order > 0) 2225 if (order > 0)
2238 return 0; 2226 return 0;
@@ -2284,9 +2272,8 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
2284 break; 2272 break;
2285 } while (1); 2273 } while (1);
2286 } 2274 }
2287 mz->usage_in_excess =
2288 res_counter_soft_limit_excess(&mz->mem->res);
2289 __mem_cgroup_remove_exceeded(mz->mem, mz, mctz); 2275 __mem_cgroup_remove_exceeded(mz->mem, mz, mctz);
2276 excess = res_counter_soft_limit_excess(&mz->mem->res);
2290 /* 2277 /*
2291 * One school of thought says that we should not add 2278 * One school of thought says that we should not add
2292 * back the node to the tree if reclaim returns 0. 2279 * back the node to the tree if reclaim returns 0.
@@ -2295,8 +2282,8 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
2295 * memory to reclaim from. Consider this as a longer 2282 * memory to reclaim from. Consider this as a longer
2296 * term TODO. 2283 * term TODO.
2297 */ 2284 */
2298 if (mz->usage_in_excess) 2285 /* If excess == 0, no tree ops */
2299 __mem_cgroup_insert_exceeded(mz->mem, mz, mctz); 2286 __mem_cgroup_insert_exceeded(mz->mem, mz, mctz, excess);
2300 spin_unlock(&mctz->lock); 2287 spin_unlock(&mctz->lock);
2301 css_put(&mz->mem->css); 2288 css_put(&mz->mem->css);
2302 loop++; 2289 loop++;
diff --git a/mm/percpu.c b/mm/percpu.c
index 43d8cacfdaa5..4a048abad043 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1043,7 +1043,9 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void)
1043 */ 1043 */
1044static void *pcpu_alloc(size_t size, size_t align, bool reserved) 1044static void *pcpu_alloc(size_t size, size_t align, bool reserved)
1045{ 1045{
1046 static int warn_limit = 10;
1046 struct pcpu_chunk *chunk; 1047 struct pcpu_chunk *chunk;
1048 const char *err;
1047 int slot, off; 1049 int slot, off;
1048 1050
1049 if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) { 1051 if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) {
@@ -1059,11 +1061,14 @@ static void *pcpu_alloc(size_t size, size_t align, bool reserved)
1059 if (reserved && pcpu_reserved_chunk) { 1061 if (reserved && pcpu_reserved_chunk) {
1060 chunk = pcpu_reserved_chunk; 1062 chunk = pcpu_reserved_chunk;
1061 if (size > chunk->contig_hint || 1063 if (size > chunk->contig_hint ||
1062 pcpu_extend_area_map(chunk) < 0) 1064 pcpu_extend_area_map(chunk) < 0) {
1065 err = "failed to extend area map of reserved chunk";
1063 goto fail_unlock; 1066 goto fail_unlock;
1067 }
1064 off = pcpu_alloc_area(chunk, size, align); 1068 off = pcpu_alloc_area(chunk, size, align);
1065 if (off >= 0) 1069 if (off >= 0)
1066 goto area_found; 1070 goto area_found;
1071 err = "alloc from reserved chunk failed";
1067 goto fail_unlock; 1072 goto fail_unlock;
1068 } 1073 }
1069 1074
@@ -1080,6 +1085,7 @@ restart:
1080 case 1: 1085 case 1:
1081 goto restart; /* pcpu_lock dropped, restart */ 1086 goto restart; /* pcpu_lock dropped, restart */
1082 default: 1087 default:
1088 err = "failed to extend area map";
1083 goto fail_unlock; 1089 goto fail_unlock;
1084 } 1090 }
1085 1091
@@ -1093,8 +1099,10 @@ restart:
1093 spin_unlock_irq(&pcpu_lock); 1099 spin_unlock_irq(&pcpu_lock);
1094 1100
1095 chunk = alloc_pcpu_chunk(); 1101 chunk = alloc_pcpu_chunk();
1096 if (!chunk) 1102 if (!chunk) {
1103 err = "failed to allocate new chunk";
1097 goto fail_unlock_mutex; 1104 goto fail_unlock_mutex;
1105 }
1098 1106
1099 spin_lock_irq(&pcpu_lock); 1107 spin_lock_irq(&pcpu_lock);
1100 pcpu_chunk_relocate(chunk, -1); 1108 pcpu_chunk_relocate(chunk, -1);
@@ -1107,6 +1115,7 @@ area_found:
1107 if (pcpu_populate_chunk(chunk, off, size)) { 1115 if (pcpu_populate_chunk(chunk, off, size)) {
1108 spin_lock_irq(&pcpu_lock); 1116 spin_lock_irq(&pcpu_lock);
1109 pcpu_free_area(chunk, off); 1117 pcpu_free_area(chunk, off);
1118 err = "failed to populate";
1110 goto fail_unlock; 1119 goto fail_unlock;
1111 } 1120 }
1112 1121
@@ -1119,6 +1128,13 @@ fail_unlock:
1119 spin_unlock_irq(&pcpu_lock); 1128 spin_unlock_irq(&pcpu_lock);
1120fail_unlock_mutex: 1129fail_unlock_mutex:
1121 mutex_unlock(&pcpu_alloc_mutex); 1130 mutex_unlock(&pcpu_alloc_mutex);
1131 if (warn_limit) {
1132 pr_warning("PERCPU: allocation failed, size=%zu align=%zu, "
1133 "%s\n", size, align, err);
1134 dump_stack();
1135 if (!--warn_limit)
1136 pr_info("PERCPU: limit reached, disable warning\n");
1137 }
1122 return NULL; 1138 return NULL;
1123} 1139}
1124 1140
@@ -1347,6 +1363,10 @@ struct pcpu_alloc_info * __init pcpu_build_alloc_info(
1347 struct pcpu_alloc_info *ai; 1363 struct pcpu_alloc_info *ai;
1348 unsigned int *cpu_map; 1364 unsigned int *cpu_map;
1349 1365
1366 /* this function may be called multiple times */
1367 memset(group_map, 0, sizeof(group_map));
1368 memset(group_cnt, 0, sizeof(group_map));
1369
1350 /* 1370 /*
1351 * Determine min_unit_size, alloc_size and max_upa such that 1371 * Determine min_unit_size, alloc_size and max_upa such that
1352 * alloc_size is multiple of atom_size and is the smallest 1372 * alloc_size is multiple of atom_size and is the smallest
@@ -1574,6 +1594,7 @@ static void pcpu_dump_alloc_info(const char *lvl,
1574int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, 1594int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
1575 void *base_addr) 1595 void *base_addr)
1576{ 1596{
1597 static char cpus_buf[4096] __initdata;
1577 static int smap[2], dmap[2]; 1598 static int smap[2], dmap[2];
1578 size_t dyn_size = ai->dyn_size; 1599 size_t dyn_size = ai->dyn_size;
1579 size_t size_sum = ai->static_size + ai->reserved_size + dyn_size; 1600 size_t size_sum = ai->static_size + ai->reserved_size + dyn_size;
@@ -1585,17 +1606,26 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
1585 int *unit_map; 1606 int *unit_map;
1586 int group, unit, i; 1607 int group, unit, i;
1587 1608
1609 cpumask_scnprintf(cpus_buf, sizeof(cpus_buf), cpu_possible_mask);
1610
1611#define PCPU_SETUP_BUG_ON(cond) do { \
1612 if (unlikely(cond)) { \
1613 pr_emerg("PERCPU: failed to initialize, %s", #cond); \
1614 pr_emerg("PERCPU: cpu_possible_mask=%s\n", cpus_buf); \
1615 pcpu_dump_alloc_info(KERN_EMERG, ai); \
1616 BUG(); \
1617 } \
1618} while (0)
1619
1588 /* sanity checks */ 1620 /* sanity checks */
1589 BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC || 1621 BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC ||
1590 ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC); 1622 ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC);
1591 BUG_ON(ai->nr_groups <= 0); 1623 PCPU_SETUP_BUG_ON(ai->nr_groups <= 0);
1592 BUG_ON(!ai->static_size); 1624 PCPU_SETUP_BUG_ON(!ai->static_size);
1593 BUG_ON(!base_addr); 1625 PCPU_SETUP_BUG_ON(!base_addr);
1594 BUG_ON(ai->unit_size < size_sum); 1626 PCPU_SETUP_BUG_ON(ai->unit_size < size_sum);
1595 BUG_ON(ai->unit_size & ~PAGE_MASK); 1627 PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK);
1596 BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE); 1628 PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE);
1597
1598 pcpu_dump_alloc_info(KERN_DEBUG, ai);
1599 1629
1600 /* process group information and build config tables accordingly */ 1630 /* process group information and build config tables accordingly */
1601 group_offsets = alloc_bootmem(ai->nr_groups * sizeof(group_offsets[0])); 1631 group_offsets = alloc_bootmem(ai->nr_groups * sizeof(group_offsets[0]));
@@ -1604,7 +1634,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
1604 unit_off = alloc_bootmem(nr_cpu_ids * sizeof(unit_off[0])); 1634 unit_off = alloc_bootmem(nr_cpu_ids * sizeof(unit_off[0]));
1605 1635
1606 for (cpu = 0; cpu < nr_cpu_ids; cpu++) 1636 for (cpu = 0; cpu < nr_cpu_ids; cpu++)
1607 unit_map[cpu] = NR_CPUS; 1637 unit_map[cpu] = UINT_MAX;
1608 pcpu_first_unit_cpu = NR_CPUS; 1638 pcpu_first_unit_cpu = NR_CPUS;
1609 1639
1610 for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) { 1640 for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) {
@@ -1618,8 +1648,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
1618 if (cpu == NR_CPUS) 1648 if (cpu == NR_CPUS)
1619 continue; 1649 continue;
1620 1650
1621 BUG_ON(cpu > nr_cpu_ids || !cpu_possible(cpu)); 1651 PCPU_SETUP_BUG_ON(cpu > nr_cpu_ids);
1622 BUG_ON(unit_map[cpu] != NR_CPUS); 1652 PCPU_SETUP_BUG_ON(!cpu_possible(cpu));
1653 PCPU_SETUP_BUG_ON(unit_map[cpu] != UINT_MAX);
1623 1654
1624 unit_map[cpu] = unit + i; 1655 unit_map[cpu] = unit + i;
1625 unit_off[cpu] = gi->base_offset + i * ai->unit_size; 1656 unit_off[cpu] = gi->base_offset + i * ai->unit_size;
@@ -1632,7 +1663,11 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
1632 pcpu_nr_units = unit; 1663 pcpu_nr_units = unit;
1633 1664
1634 for_each_possible_cpu(cpu) 1665 for_each_possible_cpu(cpu)
1635 BUG_ON(unit_map[cpu] == NR_CPUS); 1666 PCPU_SETUP_BUG_ON(unit_map[cpu] == UINT_MAX);
1667
1668 /* we're done parsing the input, undefine BUG macro and dump config */
1669#undef PCPU_SETUP_BUG_ON
1670 pcpu_dump_alloc_info(KERN_INFO, ai);
1636 1671
1637 pcpu_nr_groups = ai->nr_groups; 1672 pcpu_nr_groups = ai->nr_groups;
1638 pcpu_group_offsets = group_offsets; 1673 pcpu_group_offsets = group_offsets;
@@ -1782,7 +1817,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size,
1782 void *base = (void *)ULONG_MAX; 1817 void *base = (void *)ULONG_MAX;
1783 void **areas = NULL; 1818 void **areas = NULL;
1784 struct pcpu_alloc_info *ai; 1819 struct pcpu_alloc_info *ai;
1785 size_t size_sum, areas_size; 1820 size_t size_sum, areas_size, max_distance;
1786 int group, i, rc; 1821 int group, i, rc;
1787 1822
1788 ai = pcpu_build_alloc_info(reserved_size, dyn_size, atom_size, 1823 ai = pcpu_build_alloc_info(reserved_size, dyn_size, atom_size,
@@ -1832,8 +1867,24 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size,
1832 } 1867 }
1833 1868
1834 /* base address is now known, determine group base offsets */ 1869 /* base address is now known, determine group base offsets */
1835 for (group = 0; group < ai->nr_groups; group++) 1870 max_distance = 0;
1871 for (group = 0; group < ai->nr_groups; group++) {
1836 ai->groups[group].base_offset = areas[group] - base; 1872 ai->groups[group].base_offset = areas[group] - base;
1873 max_distance = max(max_distance, ai->groups[group].base_offset);
1874 }
1875 max_distance += ai->unit_size;
1876
1877 /* warn if maximum distance is further than 75% of vmalloc space */
1878 if (max_distance > (VMALLOC_END - VMALLOC_START) * 3 / 4) {
1879 pr_warning("PERCPU: max_distance=0x%lx too large for vmalloc "
1880 "space 0x%lx\n",
1881 max_distance, VMALLOC_END - VMALLOC_START);
1882#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
1883 /* and fail if we have fallback */
1884 rc = -EINVAL;
1885 goto out_free;
1886#endif
1887 }
1837 1888
1838 pr_info("PERCPU: Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n", 1889 pr_info("PERCPU: Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n",
1839 PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size, 1890 PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size,
diff --git a/mm/rmap.c b/mm/rmap.c
index 28aafe2b5306..dd43373a483f 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -242,8 +242,8 @@ vma_address(struct page *page, struct vm_area_struct *vma)
242} 242}
243 243
244/* 244/*
245 * At what user virtual address is page expected in vma? checking that the 245 * At what user virtual address is page expected in vma?
246 * page matches the vma: currently only used on anon pages, by unuse_vma; 246 * checking that the page matches the vma.
247 */ 247 */
248unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) 248unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
249{ 249{
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 4de7f02f820b..a1bc6b9af9a2 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1974,12 +1974,14 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
1974 goto bad_swap; 1974 goto bad_swap;
1975 } 1975 }
1976 1976
1977 if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { 1977 if (p->bdev) {
1978 p->flags |= SWP_SOLIDSTATE; 1978 if (blk_queue_nonrot(bdev_get_queue(p->bdev))) {
1979 p->cluster_next = 1 + (random32() % p->highest_bit); 1979 p->flags |= SWP_SOLIDSTATE;
1980 p->cluster_next = 1 + (random32() % p->highest_bit);
1981 }
1982 if (discard_swap(p) == 0)
1983 p->flags |= SWP_DISCARDABLE;
1980 } 1984 }
1981 if (discard_swap(p) == 0)
1982 p->flags |= SWP_DISCARDABLE;
1983 1985
1984 mutex_lock(&swapon_mutex); 1986 mutex_lock(&swapon_mutex);
1985 spin_lock(&swap_lock); 1987 spin_lock(&swap_lock);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 4ecbbded98f2..5e7aed0802bf 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -25,7 +25,6 @@
25#include <linux/rcupdate.h> 25#include <linux/rcupdate.h>
26#include <linux/pfn.h> 26#include <linux/pfn.h>
27#include <linux/kmemleak.h> 27#include <linux/kmemleak.h>
28#include <linux/highmem.h>
29#include <asm/atomic.h> 28#include <asm/atomic.h>
30#include <asm/uaccess.h> 29#include <asm/uaccess.h>
31#include <asm/tlbflush.h> 30#include <asm/tlbflush.h>