diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 4 | ||||
-rw-r--r-- | mm/ksm.c | 10 | ||||
-rw-r--r-- | mm/memcontrol.c | 127 | ||||
-rw-r--r-- | mm/percpu.c | 83 | ||||
-rw-r--r-- | mm/rmap.c | 4 | ||||
-rw-r--r-- | mm/swapfile.c | 12 | ||||
-rw-r--r-- | mm/vmalloc.c | 1 |
7 files changed, 140 insertions, 101 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index edd300aca173..57963c6063d1 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -224,7 +224,9 @@ config KSM | |||
224 | the many instances by a single resident page with that content, so | 224 | the many instances by a single resident page with that content, so |
225 | saving memory until one or another app needs to modify the content. | 225 | saving memory until one or another app needs to modify the content. |
226 | Recommended for use with KVM, or with other duplicative applications. | 226 | Recommended for use with KVM, or with other duplicative applications. |
227 | See Documentation/vm/ksm.txt for more information. | 227 | See Documentation/vm/ksm.txt for more information: KSM is inactive |
228 | until a program has madvised that an area is MADV_MERGEABLE, and | ||
229 | root has set /sys/kernel/mm/ksm/run to 1 (if CONFIG_SYSFS is set). | ||
228 | 230 | ||
229 | config DEFAULT_MMAP_MIN_ADDR | 231 | config DEFAULT_MMAP_MIN_ADDR |
230 | int "Low address space to protect from user allocation" | 232 | int "Low address space to protect from user allocation" |
@@ -184,11 +184,6 @@ static DEFINE_SPINLOCK(ksm_mmlist_lock); | |||
184 | sizeof(struct __struct), __alignof__(struct __struct),\ | 184 | sizeof(struct __struct), __alignof__(struct __struct),\ |
185 | (__flags), NULL) | 185 | (__flags), NULL) |
186 | 186 | ||
187 | static void __init ksm_init_max_kernel_pages(void) | ||
188 | { | ||
189 | ksm_max_kernel_pages = nr_free_buffer_pages() / 4; | ||
190 | } | ||
191 | |||
192 | static int __init ksm_slab_init(void) | 187 | static int __init ksm_slab_init(void) |
193 | { | 188 | { |
194 | rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0); | 189 | rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0); |
@@ -1673,7 +1668,7 @@ static int __init ksm_init(void) | |||
1673 | struct task_struct *ksm_thread; | 1668 | struct task_struct *ksm_thread; |
1674 | int err; | 1669 | int err; |
1675 | 1670 | ||
1676 | ksm_init_max_kernel_pages(); | 1671 | ksm_max_kernel_pages = totalram_pages / 4; |
1677 | 1672 | ||
1678 | err = ksm_slab_init(); | 1673 | err = ksm_slab_init(); |
1679 | if (err) | 1674 | if (err) |
@@ -1697,6 +1692,9 @@ static int __init ksm_init(void) | |||
1697 | kthread_stop(ksm_thread); | 1692 | kthread_stop(ksm_thread); |
1698 | goto out_free2; | 1693 | goto out_free2; |
1699 | } | 1694 | } |
1695 | #else | ||
1696 | ksm_run = KSM_RUN_MERGE; /* no way for user to start it */ | ||
1697 | |||
1700 | #endif /* CONFIG_SYSFS */ | 1698 | #endif /* CONFIG_SYSFS */ |
1701 | 1699 | ||
1702 | return 0; | 1700 | return 0; |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e2b98a6875c0..f99f5991d6bb 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -313,7 +313,8 @@ soft_limit_tree_from_page(struct page *page) | |||
313 | static void | 313 | static void |
314 | __mem_cgroup_insert_exceeded(struct mem_cgroup *mem, | 314 | __mem_cgroup_insert_exceeded(struct mem_cgroup *mem, |
315 | struct mem_cgroup_per_zone *mz, | 315 | struct mem_cgroup_per_zone *mz, |
316 | struct mem_cgroup_tree_per_zone *mctz) | 316 | struct mem_cgroup_tree_per_zone *mctz, |
317 | unsigned long long new_usage_in_excess) | ||
317 | { | 318 | { |
318 | struct rb_node **p = &mctz->rb_root.rb_node; | 319 | struct rb_node **p = &mctz->rb_root.rb_node; |
319 | struct rb_node *parent = NULL; | 320 | struct rb_node *parent = NULL; |
@@ -322,7 +323,9 @@ __mem_cgroup_insert_exceeded(struct mem_cgroup *mem, | |||
322 | if (mz->on_tree) | 323 | if (mz->on_tree) |
323 | return; | 324 | return; |
324 | 325 | ||
325 | mz->usage_in_excess = res_counter_soft_limit_excess(&mem->res); | 326 | mz->usage_in_excess = new_usage_in_excess; |
327 | if (!mz->usage_in_excess) | ||
328 | return; | ||
326 | while (*p) { | 329 | while (*p) { |
327 | parent = *p; | 330 | parent = *p; |
328 | mz_node = rb_entry(parent, struct mem_cgroup_per_zone, | 331 | mz_node = rb_entry(parent, struct mem_cgroup_per_zone, |
@@ -353,16 +356,6 @@ __mem_cgroup_remove_exceeded(struct mem_cgroup *mem, | |||
353 | } | 356 | } |
354 | 357 | ||
355 | static void | 358 | static void |
356 | mem_cgroup_insert_exceeded(struct mem_cgroup *mem, | ||
357 | struct mem_cgroup_per_zone *mz, | ||
358 | struct mem_cgroup_tree_per_zone *mctz) | ||
359 | { | ||
360 | spin_lock(&mctz->lock); | ||
361 | __mem_cgroup_insert_exceeded(mem, mz, mctz); | ||
362 | spin_unlock(&mctz->lock); | ||
363 | } | ||
364 | |||
365 | static void | ||
366 | mem_cgroup_remove_exceeded(struct mem_cgroup *mem, | 359 | mem_cgroup_remove_exceeded(struct mem_cgroup *mem, |
367 | struct mem_cgroup_per_zone *mz, | 360 | struct mem_cgroup_per_zone *mz, |
368 | struct mem_cgroup_tree_per_zone *mctz) | 361 | struct mem_cgroup_tree_per_zone *mctz) |
@@ -392,34 +385,36 @@ static bool mem_cgroup_soft_limit_check(struct mem_cgroup *mem) | |||
392 | 385 | ||
393 | static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page) | 386 | static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page) |
394 | { | 387 | { |
395 | unsigned long long prev_usage_in_excess, new_usage_in_excess; | 388 | unsigned long long excess; |
396 | bool updated_tree = false; | ||
397 | struct mem_cgroup_per_zone *mz; | 389 | struct mem_cgroup_per_zone *mz; |
398 | struct mem_cgroup_tree_per_zone *mctz; | 390 | struct mem_cgroup_tree_per_zone *mctz; |
399 | 391 | int nid = page_to_nid(page); | |
400 | mz = mem_cgroup_zoneinfo(mem, page_to_nid(page), page_zonenum(page)); | 392 | int zid = page_zonenum(page); |
401 | mctz = soft_limit_tree_from_page(page); | 393 | mctz = soft_limit_tree_from_page(page); |
402 | 394 | ||
403 | /* | 395 | /* |
404 | * We do updates in lazy mode, mem's are removed | 396 | * Necessary to update all ancestors when hierarchy is used. |
405 | * lazily from the per-zone, per-node rb tree | 397 | * because their event counter is not touched. |
406 | */ | 398 | */ |
407 | prev_usage_in_excess = mz->usage_in_excess; | 399 | for (; mem; mem = parent_mem_cgroup(mem)) { |
408 | 400 | mz = mem_cgroup_zoneinfo(mem, nid, zid); | |
409 | new_usage_in_excess = res_counter_soft_limit_excess(&mem->res); | 401 | excess = res_counter_soft_limit_excess(&mem->res); |
410 | if (prev_usage_in_excess) { | 402 | /* |
411 | mem_cgroup_remove_exceeded(mem, mz, mctz); | 403 | * We have to update the tree if mz is on RB-tree or |
412 | updated_tree = true; | 404 | * mem is over its softlimit. |
413 | } | 405 | */ |
414 | if (!new_usage_in_excess) | 406 | if (excess || mz->on_tree) { |
415 | goto done; | 407 | spin_lock(&mctz->lock); |
416 | mem_cgroup_insert_exceeded(mem, mz, mctz); | 408 | /* if on-tree, remove it */ |
417 | 409 | if (mz->on_tree) | |
418 | done: | 410 | __mem_cgroup_remove_exceeded(mem, mz, mctz); |
419 | if (updated_tree) { | 411 | /* |
420 | spin_lock(&mctz->lock); | 412 | * Insert again. mz->usage_in_excess will be updated. |
421 | mz->usage_in_excess = new_usage_in_excess; | 413 | * If excess is 0, no tree ops. |
422 | spin_unlock(&mctz->lock); | 414 | */ |
415 | __mem_cgroup_insert_exceeded(mem, mz, mctz, excess); | ||
416 | spin_unlock(&mctz->lock); | ||
417 | } | ||
423 | } | 418 | } |
424 | } | 419 | } |
425 | 420 | ||
@@ -447,9 +442,10 @@ static struct mem_cgroup_per_zone * | |||
447 | __mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) | 442 | __mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) |
448 | { | 443 | { |
449 | struct rb_node *rightmost = NULL; | 444 | struct rb_node *rightmost = NULL; |
450 | struct mem_cgroup_per_zone *mz = NULL; | 445 | struct mem_cgroup_per_zone *mz; |
451 | 446 | ||
452 | retry: | 447 | retry: |
448 | mz = NULL; | ||
453 | rightmost = rb_last(&mctz->rb_root); | 449 | rightmost = rb_last(&mctz->rb_root); |
454 | if (!rightmost) | 450 | if (!rightmost) |
455 | goto done; /* Nothing to reclaim from */ | 451 | goto done; /* Nothing to reclaim from */ |
@@ -1270,9 +1266,9 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
1270 | gfp_t gfp_mask, struct mem_cgroup **memcg, | 1266 | gfp_t gfp_mask, struct mem_cgroup **memcg, |
1271 | bool oom, struct page *page) | 1267 | bool oom, struct page *page) |
1272 | { | 1268 | { |
1273 | struct mem_cgroup *mem, *mem_over_limit, *mem_over_soft_limit; | 1269 | struct mem_cgroup *mem, *mem_over_limit; |
1274 | int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; | 1270 | int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; |
1275 | struct res_counter *fail_res, *soft_fail_res = NULL; | 1271 | struct res_counter *fail_res; |
1276 | 1272 | ||
1277 | if (unlikely(test_thread_flag(TIF_MEMDIE))) { | 1273 | if (unlikely(test_thread_flag(TIF_MEMDIE))) { |
1278 | /* Don't account this! */ | 1274 | /* Don't account this! */ |
@@ -1304,17 +1300,16 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
1304 | 1300 | ||
1305 | if (mem_cgroup_is_root(mem)) | 1301 | if (mem_cgroup_is_root(mem)) |
1306 | goto done; | 1302 | goto done; |
1307 | ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res, | 1303 | ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res); |
1308 | &soft_fail_res); | ||
1309 | if (likely(!ret)) { | 1304 | if (likely(!ret)) { |
1310 | if (!do_swap_account) | 1305 | if (!do_swap_account) |
1311 | break; | 1306 | break; |
1312 | ret = res_counter_charge(&mem->memsw, PAGE_SIZE, | 1307 | ret = res_counter_charge(&mem->memsw, PAGE_SIZE, |
1313 | &fail_res, NULL); | 1308 | &fail_res); |
1314 | if (likely(!ret)) | 1309 | if (likely(!ret)) |
1315 | break; | 1310 | break; |
1316 | /* mem+swap counter fails */ | 1311 | /* mem+swap counter fails */ |
1317 | res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); | 1312 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
1318 | flags |= MEM_CGROUP_RECLAIM_NOSWAP; | 1313 | flags |= MEM_CGROUP_RECLAIM_NOSWAP; |
1319 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, | 1314 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, |
1320 | memsw); | 1315 | memsw); |
@@ -1353,16 +1348,11 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
1353 | } | 1348 | } |
1354 | } | 1349 | } |
1355 | /* | 1350 | /* |
1356 | * Insert just the ancestor, we should trickle down to the correct | 1351 | * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. |
1357 | * cgroup for reclaim, since the other nodes will be below their | 1352 | * if they exceeds softlimit. |
1358 | * soft limit | ||
1359 | */ | 1353 | */ |
1360 | if (soft_fail_res) { | 1354 | if (mem_cgroup_soft_limit_check(mem)) |
1361 | mem_over_soft_limit = | 1355 | mem_cgroup_update_tree(mem, page); |
1362 | mem_cgroup_from_res_counter(soft_fail_res, res); | ||
1363 | if (mem_cgroup_soft_limit_check(mem_over_soft_limit)) | ||
1364 | mem_cgroup_update_tree(mem_over_soft_limit, page); | ||
1365 | } | ||
1366 | done: | 1356 | done: |
1367 | return 0; | 1357 | return 0; |
1368 | nomem: | 1358 | nomem: |
@@ -1437,10 +1427,9 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, | |||
1437 | if (unlikely(PageCgroupUsed(pc))) { | 1427 | if (unlikely(PageCgroupUsed(pc))) { |
1438 | unlock_page_cgroup(pc); | 1428 | unlock_page_cgroup(pc); |
1439 | if (!mem_cgroup_is_root(mem)) { | 1429 | if (!mem_cgroup_is_root(mem)) { |
1440 | res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); | 1430 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
1441 | if (do_swap_account) | 1431 | if (do_swap_account) |
1442 | res_counter_uncharge(&mem->memsw, PAGE_SIZE, | 1432 | res_counter_uncharge(&mem->memsw, PAGE_SIZE); |
1443 | NULL); | ||
1444 | } | 1433 | } |
1445 | css_put(&mem->css); | 1434 | css_put(&mem->css); |
1446 | return; | 1435 | return; |
@@ -1519,7 +1508,7 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, | |||
1519 | goto out; | 1508 | goto out; |
1520 | 1509 | ||
1521 | if (!mem_cgroup_is_root(from)) | 1510 | if (!mem_cgroup_is_root(from)) |
1522 | res_counter_uncharge(&from->res, PAGE_SIZE, NULL); | 1511 | res_counter_uncharge(&from->res, PAGE_SIZE); |
1523 | mem_cgroup_charge_statistics(from, pc, false); | 1512 | mem_cgroup_charge_statistics(from, pc, false); |
1524 | 1513 | ||
1525 | page = pc->page; | 1514 | page = pc->page; |
@@ -1539,7 +1528,7 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, | |||
1539 | } | 1528 | } |
1540 | 1529 | ||
1541 | if (do_swap_account && !mem_cgroup_is_root(from)) | 1530 | if (do_swap_account && !mem_cgroup_is_root(from)) |
1542 | res_counter_uncharge(&from->memsw, PAGE_SIZE, NULL); | 1531 | res_counter_uncharge(&from->memsw, PAGE_SIZE); |
1543 | css_put(&from->css); | 1532 | css_put(&from->css); |
1544 | 1533 | ||
1545 | css_get(&to->css); | 1534 | css_get(&to->css); |
@@ -1610,9 +1599,9 @@ uncharge: | |||
1610 | css_put(&parent->css); | 1599 | css_put(&parent->css); |
1611 | /* uncharge if move fails */ | 1600 | /* uncharge if move fails */ |
1612 | if (!mem_cgroup_is_root(parent)) { | 1601 | if (!mem_cgroup_is_root(parent)) { |
1613 | res_counter_uncharge(&parent->res, PAGE_SIZE, NULL); | 1602 | res_counter_uncharge(&parent->res, PAGE_SIZE); |
1614 | if (do_swap_account) | 1603 | if (do_swap_account) |
1615 | res_counter_uncharge(&parent->memsw, PAGE_SIZE, NULL); | 1604 | res_counter_uncharge(&parent->memsw, PAGE_SIZE); |
1616 | } | 1605 | } |
1617 | return ret; | 1606 | return ret; |
1618 | } | 1607 | } |
@@ -1803,8 +1792,7 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr, | |||
1803 | * calling css_tryget | 1792 | * calling css_tryget |
1804 | */ | 1793 | */ |
1805 | if (!mem_cgroup_is_root(memcg)) | 1794 | if (!mem_cgroup_is_root(memcg)) |
1806 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE, | 1795 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE); |
1807 | NULL); | ||
1808 | mem_cgroup_swap_statistics(memcg, false); | 1796 | mem_cgroup_swap_statistics(memcg, false); |
1809 | mem_cgroup_put(memcg); | 1797 | mem_cgroup_put(memcg); |
1810 | } | 1798 | } |
@@ -1831,9 +1819,9 @@ void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem) | |||
1831 | if (!mem) | 1819 | if (!mem) |
1832 | return; | 1820 | return; |
1833 | if (!mem_cgroup_is_root(mem)) { | 1821 | if (!mem_cgroup_is_root(mem)) { |
1834 | res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); | 1822 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
1835 | if (do_swap_account) | 1823 | if (do_swap_account) |
1836 | res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL); | 1824 | res_counter_uncharge(&mem->memsw, PAGE_SIZE); |
1837 | } | 1825 | } |
1838 | css_put(&mem->css); | 1826 | css_put(&mem->css); |
1839 | } | 1827 | } |
@@ -1848,7 +1836,6 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
1848 | struct page_cgroup *pc; | 1836 | struct page_cgroup *pc; |
1849 | struct mem_cgroup *mem = NULL; | 1837 | struct mem_cgroup *mem = NULL; |
1850 | struct mem_cgroup_per_zone *mz; | 1838 | struct mem_cgroup_per_zone *mz; |
1851 | bool soft_limit_excess = false; | ||
1852 | 1839 | ||
1853 | if (mem_cgroup_disabled()) | 1840 | if (mem_cgroup_disabled()) |
1854 | return NULL; | 1841 | return NULL; |
@@ -1888,10 +1875,10 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
1888 | } | 1875 | } |
1889 | 1876 | ||
1890 | if (!mem_cgroup_is_root(mem)) { | 1877 | if (!mem_cgroup_is_root(mem)) { |
1891 | res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess); | 1878 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
1892 | if (do_swap_account && | 1879 | if (do_swap_account && |
1893 | (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)) | 1880 | (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)) |
1894 | res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL); | 1881 | res_counter_uncharge(&mem->memsw, PAGE_SIZE); |
1895 | } | 1882 | } |
1896 | if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) | 1883 | if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) |
1897 | mem_cgroup_swap_statistics(mem, true); | 1884 | mem_cgroup_swap_statistics(mem, true); |
@@ -1908,7 +1895,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
1908 | mz = page_cgroup_zoneinfo(pc); | 1895 | mz = page_cgroup_zoneinfo(pc); |
1909 | unlock_page_cgroup(pc); | 1896 | unlock_page_cgroup(pc); |
1910 | 1897 | ||
1911 | if (soft_limit_excess && mem_cgroup_soft_limit_check(mem)) | 1898 | if (mem_cgroup_soft_limit_check(mem)) |
1912 | mem_cgroup_update_tree(mem, page); | 1899 | mem_cgroup_update_tree(mem, page); |
1913 | /* at swapout, this memcg will be accessed to record to swap */ | 1900 | /* at swapout, this memcg will be accessed to record to swap */ |
1914 | if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT) | 1901 | if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT) |
@@ -1986,7 +1973,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent) | |||
1986 | * This memcg can be obsolete one. We avoid calling css_tryget | 1973 | * This memcg can be obsolete one. We avoid calling css_tryget |
1987 | */ | 1974 | */ |
1988 | if (!mem_cgroup_is_root(memcg)) | 1975 | if (!mem_cgroup_is_root(memcg)) |
1989 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL); | 1976 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE); |
1990 | mem_cgroup_swap_statistics(memcg, false); | 1977 | mem_cgroup_swap_statistics(memcg, false); |
1991 | mem_cgroup_put(memcg); | 1978 | mem_cgroup_put(memcg); |
1992 | } | 1979 | } |
@@ -2233,6 +2220,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | |||
2233 | unsigned long reclaimed; | 2220 | unsigned long reclaimed; |
2234 | int loop = 0; | 2221 | int loop = 0; |
2235 | struct mem_cgroup_tree_per_zone *mctz; | 2222 | struct mem_cgroup_tree_per_zone *mctz; |
2223 | unsigned long long excess; | ||
2236 | 2224 | ||
2237 | if (order > 0) | 2225 | if (order > 0) |
2238 | return 0; | 2226 | return 0; |
@@ -2284,9 +2272,8 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | |||
2284 | break; | 2272 | break; |
2285 | } while (1); | 2273 | } while (1); |
2286 | } | 2274 | } |
2287 | mz->usage_in_excess = | ||
2288 | res_counter_soft_limit_excess(&mz->mem->res); | ||
2289 | __mem_cgroup_remove_exceeded(mz->mem, mz, mctz); | 2275 | __mem_cgroup_remove_exceeded(mz->mem, mz, mctz); |
2276 | excess = res_counter_soft_limit_excess(&mz->mem->res); | ||
2290 | /* | 2277 | /* |
2291 | * One school of thought says that we should not add | 2278 | * One school of thought says that we should not add |
2292 | * back the node to the tree if reclaim returns 0. | 2279 | * back the node to the tree if reclaim returns 0. |
@@ -2295,8 +2282,8 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | |||
2295 | * memory to reclaim from. Consider this as a longer | 2282 | * memory to reclaim from. Consider this as a longer |
2296 | * term TODO. | 2283 | * term TODO. |
2297 | */ | 2284 | */ |
2298 | if (mz->usage_in_excess) | 2285 | /* If excess == 0, no tree ops */ |
2299 | __mem_cgroup_insert_exceeded(mz->mem, mz, mctz); | 2286 | __mem_cgroup_insert_exceeded(mz->mem, mz, mctz, excess); |
2300 | spin_unlock(&mctz->lock); | 2287 | spin_unlock(&mctz->lock); |
2301 | css_put(&mz->mem->css); | 2288 | css_put(&mz->mem->css); |
2302 | loop++; | 2289 | loop++; |
diff --git a/mm/percpu.c b/mm/percpu.c index 43d8cacfdaa5..4a048abad043 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -1043,7 +1043,9 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void) | |||
1043 | */ | 1043 | */ |
1044 | static void *pcpu_alloc(size_t size, size_t align, bool reserved) | 1044 | static void *pcpu_alloc(size_t size, size_t align, bool reserved) |
1045 | { | 1045 | { |
1046 | static int warn_limit = 10; | ||
1046 | struct pcpu_chunk *chunk; | 1047 | struct pcpu_chunk *chunk; |
1048 | const char *err; | ||
1047 | int slot, off; | 1049 | int slot, off; |
1048 | 1050 | ||
1049 | if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) { | 1051 | if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) { |
@@ -1059,11 +1061,14 @@ static void *pcpu_alloc(size_t size, size_t align, bool reserved) | |||
1059 | if (reserved && pcpu_reserved_chunk) { | 1061 | if (reserved && pcpu_reserved_chunk) { |
1060 | chunk = pcpu_reserved_chunk; | 1062 | chunk = pcpu_reserved_chunk; |
1061 | if (size > chunk->contig_hint || | 1063 | if (size > chunk->contig_hint || |
1062 | pcpu_extend_area_map(chunk) < 0) | 1064 | pcpu_extend_area_map(chunk) < 0) { |
1065 | err = "failed to extend area map of reserved chunk"; | ||
1063 | goto fail_unlock; | 1066 | goto fail_unlock; |
1067 | } | ||
1064 | off = pcpu_alloc_area(chunk, size, align); | 1068 | off = pcpu_alloc_area(chunk, size, align); |
1065 | if (off >= 0) | 1069 | if (off >= 0) |
1066 | goto area_found; | 1070 | goto area_found; |
1071 | err = "alloc from reserved chunk failed"; | ||
1067 | goto fail_unlock; | 1072 | goto fail_unlock; |
1068 | } | 1073 | } |
1069 | 1074 | ||
@@ -1080,6 +1085,7 @@ restart: | |||
1080 | case 1: | 1085 | case 1: |
1081 | goto restart; /* pcpu_lock dropped, restart */ | 1086 | goto restart; /* pcpu_lock dropped, restart */ |
1082 | default: | 1087 | default: |
1088 | err = "failed to extend area map"; | ||
1083 | goto fail_unlock; | 1089 | goto fail_unlock; |
1084 | } | 1090 | } |
1085 | 1091 | ||
@@ -1093,8 +1099,10 @@ restart: | |||
1093 | spin_unlock_irq(&pcpu_lock); | 1099 | spin_unlock_irq(&pcpu_lock); |
1094 | 1100 | ||
1095 | chunk = alloc_pcpu_chunk(); | 1101 | chunk = alloc_pcpu_chunk(); |
1096 | if (!chunk) | 1102 | if (!chunk) { |
1103 | err = "failed to allocate new chunk"; | ||
1097 | goto fail_unlock_mutex; | 1104 | goto fail_unlock_mutex; |
1105 | } | ||
1098 | 1106 | ||
1099 | spin_lock_irq(&pcpu_lock); | 1107 | spin_lock_irq(&pcpu_lock); |
1100 | pcpu_chunk_relocate(chunk, -1); | 1108 | pcpu_chunk_relocate(chunk, -1); |
@@ -1107,6 +1115,7 @@ area_found: | |||
1107 | if (pcpu_populate_chunk(chunk, off, size)) { | 1115 | if (pcpu_populate_chunk(chunk, off, size)) { |
1108 | spin_lock_irq(&pcpu_lock); | 1116 | spin_lock_irq(&pcpu_lock); |
1109 | pcpu_free_area(chunk, off); | 1117 | pcpu_free_area(chunk, off); |
1118 | err = "failed to populate"; | ||
1110 | goto fail_unlock; | 1119 | goto fail_unlock; |
1111 | } | 1120 | } |
1112 | 1121 | ||
@@ -1119,6 +1128,13 @@ fail_unlock: | |||
1119 | spin_unlock_irq(&pcpu_lock); | 1128 | spin_unlock_irq(&pcpu_lock); |
1120 | fail_unlock_mutex: | 1129 | fail_unlock_mutex: |
1121 | mutex_unlock(&pcpu_alloc_mutex); | 1130 | mutex_unlock(&pcpu_alloc_mutex); |
1131 | if (warn_limit) { | ||
1132 | pr_warning("PERCPU: allocation failed, size=%zu align=%zu, " | ||
1133 | "%s\n", size, align, err); | ||
1134 | dump_stack(); | ||
1135 | if (!--warn_limit) | ||
1136 | pr_info("PERCPU: limit reached, disable warning\n"); | ||
1137 | } | ||
1122 | return NULL; | 1138 | return NULL; |
1123 | } | 1139 | } |
1124 | 1140 | ||
@@ -1347,6 +1363,10 @@ struct pcpu_alloc_info * __init pcpu_build_alloc_info( | |||
1347 | struct pcpu_alloc_info *ai; | 1363 | struct pcpu_alloc_info *ai; |
1348 | unsigned int *cpu_map; | 1364 | unsigned int *cpu_map; |
1349 | 1365 | ||
1366 | /* this function may be called multiple times */ | ||
1367 | memset(group_map, 0, sizeof(group_map)); | ||
1368 | memset(group_cnt, 0, sizeof(group_map)); | ||
1369 | |||
1350 | /* | 1370 | /* |
1351 | * Determine min_unit_size, alloc_size and max_upa such that | 1371 | * Determine min_unit_size, alloc_size and max_upa such that |
1352 | * alloc_size is multiple of atom_size and is the smallest | 1372 | * alloc_size is multiple of atom_size and is the smallest |
@@ -1574,6 +1594,7 @@ static void pcpu_dump_alloc_info(const char *lvl, | |||
1574 | int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | 1594 | int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, |
1575 | void *base_addr) | 1595 | void *base_addr) |
1576 | { | 1596 | { |
1597 | static char cpus_buf[4096] __initdata; | ||
1577 | static int smap[2], dmap[2]; | 1598 | static int smap[2], dmap[2]; |
1578 | size_t dyn_size = ai->dyn_size; | 1599 | size_t dyn_size = ai->dyn_size; |
1579 | size_t size_sum = ai->static_size + ai->reserved_size + dyn_size; | 1600 | size_t size_sum = ai->static_size + ai->reserved_size + dyn_size; |
@@ -1585,17 +1606,26 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
1585 | int *unit_map; | 1606 | int *unit_map; |
1586 | int group, unit, i; | 1607 | int group, unit, i; |
1587 | 1608 | ||
1609 | cpumask_scnprintf(cpus_buf, sizeof(cpus_buf), cpu_possible_mask); | ||
1610 | |||
1611 | #define PCPU_SETUP_BUG_ON(cond) do { \ | ||
1612 | if (unlikely(cond)) { \ | ||
1613 | pr_emerg("PERCPU: failed to initialize, %s", #cond); \ | ||
1614 | pr_emerg("PERCPU: cpu_possible_mask=%s\n", cpus_buf); \ | ||
1615 | pcpu_dump_alloc_info(KERN_EMERG, ai); \ | ||
1616 | BUG(); \ | ||
1617 | } \ | ||
1618 | } while (0) | ||
1619 | |||
1588 | /* sanity checks */ | 1620 | /* sanity checks */ |
1589 | BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC || | 1621 | BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC || |
1590 | ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC); | 1622 | ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC); |
1591 | BUG_ON(ai->nr_groups <= 0); | 1623 | PCPU_SETUP_BUG_ON(ai->nr_groups <= 0); |
1592 | BUG_ON(!ai->static_size); | 1624 | PCPU_SETUP_BUG_ON(!ai->static_size); |
1593 | BUG_ON(!base_addr); | 1625 | PCPU_SETUP_BUG_ON(!base_addr); |
1594 | BUG_ON(ai->unit_size < size_sum); | 1626 | PCPU_SETUP_BUG_ON(ai->unit_size < size_sum); |
1595 | BUG_ON(ai->unit_size & ~PAGE_MASK); | 1627 | PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK); |
1596 | BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE); | 1628 | PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE); |
1597 | |||
1598 | pcpu_dump_alloc_info(KERN_DEBUG, ai); | ||
1599 | 1629 | ||
1600 | /* process group information and build config tables accordingly */ | 1630 | /* process group information and build config tables accordingly */ |
1601 | group_offsets = alloc_bootmem(ai->nr_groups * sizeof(group_offsets[0])); | 1631 | group_offsets = alloc_bootmem(ai->nr_groups * sizeof(group_offsets[0])); |
@@ -1604,7 +1634,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
1604 | unit_off = alloc_bootmem(nr_cpu_ids * sizeof(unit_off[0])); | 1634 | unit_off = alloc_bootmem(nr_cpu_ids * sizeof(unit_off[0])); |
1605 | 1635 | ||
1606 | for (cpu = 0; cpu < nr_cpu_ids; cpu++) | 1636 | for (cpu = 0; cpu < nr_cpu_ids; cpu++) |
1607 | unit_map[cpu] = NR_CPUS; | 1637 | unit_map[cpu] = UINT_MAX; |
1608 | pcpu_first_unit_cpu = NR_CPUS; | 1638 | pcpu_first_unit_cpu = NR_CPUS; |
1609 | 1639 | ||
1610 | for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) { | 1640 | for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) { |
@@ -1618,8 +1648,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
1618 | if (cpu == NR_CPUS) | 1648 | if (cpu == NR_CPUS) |
1619 | continue; | 1649 | continue; |
1620 | 1650 | ||
1621 | BUG_ON(cpu > nr_cpu_ids || !cpu_possible(cpu)); | 1651 | PCPU_SETUP_BUG_ON(cpu > nr_cpu_ids); |
1622 | BUG_ON(unit_map[cpu] != NR_CPUS); | 1652 | PCPU_SETUP_BUG_ON(!cpu_possible(cpu)); |
1653 | PCPU_SETUP_BUG_ON(unit_map[cpu] != UINT_MAX); | ||
1623 | 1654 | ||
1624 | unit_map[cpu] = unit + i; | 1655 | unit_map[cpu] = unit + i; |
1625 | unit_off[cpu] = gi->base_offset + i * ai->unit_size; | 1656 | unit_off[cpu] = gi->base_offset + i * ai->unit_size; |
@@ -1632,7 +1663,11 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
1632 | pcpu_nr_units = unit; | 1663 | pcpu_nr_units = unit; |
1633 | 1664 | ||
1634 | for_each_possible_cpu(cpu) | 1665 | for_each_possible_cpu(cpu) |
1635 | BUG_ON(unit_map[cpu] == NR_CPUS); | 1666 | PCPU_SETUP_BUG_ON(unit_map[cpu] == UINT_MAX); |
1667 | |||
1668 | /* we're done parsing the input, undefine BUG macro and dump config */ | ||
1669 | #undef PCPU_SETUP_BUG_ON | ||
1670 | pcpu_dump_alloc_info(KERN_INFO, ai); | ||
1636 | 1671 | ||
1637 | pcpu_nr_groups = ai->nr_groups; | 1672 | pcpu_nr_groups = ai->nr_groups; |
1638 | pcpu_group_offsets = group_offsets; | 1673 | pcpu_group_offsets = group_offsets; |
@@ -1782,7 +1817,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size, | |||
1782 | void *base = (void *)ULONG_MAX; | 1817 | void *base = (void *)ULONG_MAX; |
1783 | void **areas = NULL; | 1818 | void **areas = NULL; |
1784 | struct pcpu_alloc_info *ai; | 1819 | struct pcpu_alloc_info *ai; |
1785 | size_t size_sum, areas_size; | 1820 | size_t size_sum, areas_size, max_distance; |
1786 | int group, i, rc; | 1821 | int group, i, rc; |
1787 | 1822 | ||
1788 | ai = pcpu_build_alloc_info(reserved_size, dyn_size, atom_size, | 1823 | ai = pcpu_build_alloc_info(reserved_size, dyn_size, atom_size, |
@@ -1832,8 +1867,24 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size, | |||
1832 | } | 1867 | } |
1833 | 1868 | ||
1834 | /* base address is now known, determine group base offsets */ | 1869 | /* base address is now known, determine group base offsets */ |
1835 | for (group = 0; group < ai->nr_groups; group++) | 1870 | max_distance = 0; |
1871 | for (group = 0; group < ai->nr_groups; group++) { | ||
1836 | ai->groups[group].base_offset = areas[group] - base; | 1872 | ai->groups[group].base_offset = areas[group] - base; |
1873 | max_distance = max(max_distance, ai->groups[group].base_offset); | ||
1874 | } | ||
1875 | max_distance += ai->unit_size; | ||
1876 | |||
1877 | /* warn if maximum distance is further than 75% of vmalloc space */ | ||
1878 | if (max_distance > (VMALLOC_END - VMALLOC_START) * 3 / 4) { | ||
1879 | pr_warning("PERCPU: max_distance=0x%lx too large for vmalloc " | ||
1880 | "space 0x%lx\n", | ||
1881 | max_distance, VMALLOC_END - VMALLOC_START); | ||
1882 | #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK | ||
1883 | /* and fail if we have fallback */ | ||
1884 | rc = -EINVAL; | ||
1885 | goto out_free; | ||
1886 | #endif | ||
1887 | } | ||
1837 | 1888 | ||
1838 | pr_info("PERCPU: Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n", | 1889 | pr_info("PERCPU: Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n", |
1839 | PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size, | 1890 | PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size, |
@@ -242,8 +242,8 @@ vma_address(struct page *page, struct vm_area_struct *vma) | |||
242 | } | 242 | } |
243 | 243 | ||
244 | /* | 244 | /* |
245 | * At what user virtual address is page expected in vma? checking that the | 245 | * At what user virtual address is page expected in vma? |
246 | * page matches the vma: currently only used on anon pages, by unuse_vma; | 246 | * checking that the page matches the vma. |
247 | */ | 247 | */ |
248 | unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) | 248 | unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) |
249 | { | 249 | { |
diff --git a/mm/swapfile.c b/mm/swapfile.c index 4de7f02f820b..a1bc6b9af9a2 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -1974,12 +1974,14 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) | |||
1974 | goto bad_swap; | 1974 | goto bad_swap; |
1975 | } | 1975 | } |
1976 | 1976 | ||
1977 | if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { | 1977 | if (p->bdev) { |
1978 | p->flags |= SWP_SOLIDSTATE; | 1978 | if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { |
1979 | p->cluster_next = 1 + (random32() % p->highest_bit); | 1979 | p->flags |= SWP_SOLIDSTATE; |
1980 | p->cluster_next = 1 + (random32() % p->highest_bit); | ||
1981 | } | ||
1982 | if (discard_swap(p) == 0) | ||
1983 | p->flags |= SWP_DISCARDABLE; | ||
1980 | } | 1984 | } |
1981 | if (discard_swap(p) == 0) | ||
1982 | p->flags |= SWP_DISCARDABLE; | ||
1983 | 1985 | ||
1984 | mutex_lock(&swapon_mutex); | 1986 | mutex_lock(&swapon_mutex); |
1985 | spin_lock(&swap_lock); | 1987 | spin_lock(&swap_lock); |
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 69511e663234..2f7c9d75c552 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c | |||
@@ -25,7 +25,6 @@ | |||
25 | #include <linux/rcupdate.h> | 25 | #include <linux/rcupdate.h> |
26 | #include <linux/pfn.h> | 26 | #include <linux/pfn.h> |
27 | #include <linux/kmemleak.h> | 27 | #include <linux/kmemleak.h> |
28 | #include <linux/highmem.h> | ||
29 | #include <asm/atomic.h> | 28 | #include <asm/atomic.h> |
30 | #include <asm/uaccess.h> | 29 | #include <asm/uaccess.h> |
31 | #include <asm/tlbflush.h> | 30 | #include <asm/tlbflush.h> |