diff options
author | Tejun Heo <tj@kernel.org> | 2011-11-28 12:46:22 -0500 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2011-11-28 12:46:22 -0500 |
commit | d4bbf7e7759afc172e2bfbc5c416324590049cdd (patch) | |
tree | 7eab5ee5481cd3dcf1162329fec827177640018a /mm/page_alloc.c | |
parent | a150439c4a97db379f0ed6faa46fbbb6e7bf3cb2 (diff) | |
parent | 401d0069cb344f401bc9d264c31db55876ff78c0 (diff) |
Merge branch 'master' into x86/memblock
Conflicts & resolutions:
* arch/x86/xen/setup.c
dc91c728fd "xen: allow extra memory to be in multiple regions"
24aa07882b "memblock, x86: Replace memblock_x86_reserve/free..."
conflicted on xen_add_extra_mem() updates. The resolution is
trivial as the latter just want to replace
memblock_x86_reserve_range() with memblock_reserve().
* drivers/pci/intel-iommu.c
166e9278a3f "x86/ia64: intel-iommu: move to drivers/iommu/"
5dfe8660a3d "bootmem: Replace work_with_active_regions() with..."
conflicted as the former moved the file under drivers/iommu/.
Resolved by applying the chnages from the latter on the moved
file.
* mm/Kconfig
6661672053a "memblock: add NO_BOOTMEM config symbol"
c378ddd53f9 "memblock, x86: Make ARCH_DISCARD_MEMBLOCK a config option"
conflicted trivially. Both added config options. Just
letting both add their own options resolves the conflict.
* mm/memblock.c
d1f0ece6cdc "mm/memblock.c: small function definition fixes"
ed7b56a799c "memblock: Remove memblock_memory_can_coalesce()"
confliected. The former updates function removed by the
latter. Resolution is trivial.
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 135 |
1 files changed, 78 insertions, 57 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3c7ea45ffba9..6ce27331834c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -321,6 +321,7 @@ static void bad_page(struct page *page) | |||
321 | current->comm, page_to_pfn(page)); | 321 | current->comm, page_to_pfn(page)); |
322 | dump_page(page); | 322 | dump_page(page); |
323 | 323 | ||
324 | print_modules(); | ||
324 | dump_stack(); | 325 | dump_stack(); |
325 | out: | 326 | out: |
326 | /* Leave bad fields for debug, except PageBuddy could make trouble */ | 327 | /* Leave bad fields for debug, except PageBuddy could make trouble */ |
@@ -1373,21 +1374,12 @@ failed: | |||
1373 | 1374 | ||
1374 | #ifdef CONFIG_FAIL_PAGE_ALLOC | 1375 | #ifdef CONFIG_FAIL_PAGE_ALLOC |
1375 | 1376 | ||
1376 | static struct fail_page_alloc_attr { | 1377 | static struct { |
1377 | struct fault_attr attr; | 1378 | struct fault_attr attr; |
1378 | 1379 | ||
1379 | u32 ignore_gfp_highmem; | 1380 | u32 ignore_gfp_highmem; |
1380 | u32 ignore_gfp_wait; | 1381 | u32 ignore_gfp_wait; |
1381 | u32 min_order; | 1382 | u32 min_order; |
1382 | |||
1383 | #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS | ||
1384 | |||
1385 | struct dentry *ignore_gfp_highmem_file; | ||
1386 | struct dentry *ignore_gfp_wait_file; | ||
1387 | struct dentry *min_order_file; | ||
1388 | |||
1389 | #endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */ | ||
1390 | |||
1391 | } fail_page_alloc = { | 1383 | } fail_page_alloc = { |
1392 | .attr = FAULT_ATTR_INITIALIZER, | 1384 | .attr = FAULT_ATTR_INITIALIZER, |
1393 | .ignore_gfp_wait = 1, | 1385 | .ignore_gfp_wait = 1, |
@@ -1421,36 +1413,27 @@ static int __init fail_page_alloc_debugfs(void) | |||
1421 | { | 1413 | { |
1422 | mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; | 1414 | mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; |
1423 | struct dentry *dir; | 1415 | struct dentry *dir; |
1424 | int err; | ||
1425 | |||
1426 | err = init_fault_attr_dentries(&fail_page_alloc.attr, | ||
1427 | "fail_page_alloc"); | ||
1428 | if (err) | ||
1429 | return err; | ||
1430 | dir = fail_page_alloc.attr.dentries.dir; | ||
1431 | |||
1432 | fail_page_alloc.ignore_gfp_wait_file = | ||
1433 | debugfs_create_bool("ignore-gfp-wait", mode, dir, | ||
1434 | &fail_page_alloc.ignore_gfp_wait); | ||
1435 | |||
1436 | fail_page_alloc.ignore_gfp_highmem_file = | ||
1437 | debugfs_create_bool("ignore-gfp-highmem", mode, dir, | ||
1438 | &fail_page_alloc.ignore_gfp_highmem); | ||
1439 | fail_page_alloc.min_order_file = | ||
1440 | debugfs_create_u32("min-order", mode, dir, | ||
1441 | &fail_page_alloc.min_order); | ||
1442 | |||
1443 | if (!fail_page_alloc.ignore_gfp_wait_file || | ||
1444 | !fail_page_alloc.ignore_gfp_highmem_file || | ||
1445 | !fail_page_alloc.min_order_file) { | ||
1446 | err = -ENOMEM; | ||
1447 | debugfs_remove(fail_page_alloc.ignore_gfp_wait_file); | ||
1448 | debugfs_remove(fail_page_alloc.ignore_gfp_highmem_file); | ||
1449 | debugfs_remove(fail_page_alloc.min_order_file); | ||
1450 | cleanup_fault_attr_dentries(&fail_page_alloc.attr); | ||
1451 | } | ||
1452 | 1416 | ||
1453 | return err; | 1417 | dir = fault_create_debugfs_attr("fail_page_alloc", NULL, |
1418 | &fail_page_alloc.attr); | ||
1419 | if (IS_ERR(dir)) | ||
1420 | return PTR_ERR(dir); | ||
1421 | |||
1422 | if (!debugfs_create_bool("ignore-gfp-wait", mode, dir, | ||
1423 | &fail_page_alloc.ignore_gfp_wait)) | ||
1424 | goto fail; | ||
1425 | if (!debugfs_create_bool("ignore-gfp-highmem", mode, dir, | ||
1426 | &fail_page_alloc.ignore_gfp_highmem)) | ||
1427 | goto fail; | ||
1428 | if (!debugfs_create_u32("min-order", mode, dir, | ||
1429 | &fail_page_alloc.min_order)) | ||
1430 | goto fail; | ||
1431 | |||
1432 | return 0; | ||
1433 | fail: | ||
1434 | debugfs_remove_recursive(dir); | ||
1435 | |||
1436 | return -ENOMEM; | ||
1454 | } | 1437 | } |
1455 | 1438 | ||
1456 | late_initcall(fail_page_alloc_debugfs); | 1439 | late_initcall(fail_page_alloc_debugfs); |
@@ -1619,6 +1602,21 @@ static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z) | |||
1619 | set_bit(i, zlc->fullzones); | 1602 | set_bit(i, zlc->fullzones); |
1620 | } | 1603 | } |
1621 | 1604 | ||
1605 | /* | ||
1606 | * clear all zones full, called after direct reclaim makes progress so that | ||
1607 | * a zone that was recently full is not skipped over for up to a second | ||
1608 | */ | ||
1609 | static void zlc_clear_zones_full(struct zonelist *zonelist) | ||
1610 | { | ||
1611 | struct zonelist_cache *zlc; /* cached zonelist speedup info */ | ||
1612 | |||
1613 | zlc = zonelist->zlcache_ptr; | ||
1614 | if (!zlc) | ||
1615 | return; | ||
1616 | |||
1617 | bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); | ||
1618 | } | ||
1619 | |||
1622 | #else /* CONFIG_NUMA */ | 1620 | #else /* CONFIG_NUMA */ |
1623 | 1621 | ||
1624 | static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags) | 1622 | static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags) |
@@ -1635,6 +1633,10 @@ static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zoneref *z, | |||
1635 | static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z) | 1633 | static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z) |
1636 | { | 1634 | { |
1637 | } | 1635 | } |
1636 | |||
1637 | static void zlc_clear_zones_full(struct zonelist *zonelist) | ||
1638 | { | ||
1639 | } | ||
1638 | #endif /* CONFIG_NUMA */ | 1640 | #endif /* CONFIG_NUMA */ |
1639 | 1641 | ||
1640 | /* | 1642 | /* |
@@ -1667,7 +1669,7 @@ zonelist_scan: | |||
1667 | continue; | 1669 | continue; |
1668 | if ((alloc_flags & ALLOC_CPUSET) && | 1670 | if ((alloc_flags & ALLOC_CPUSET) && |
1669 | !cpuset_zone_allowed_softwall(zone, gfp_mask)) | 1671 | !cpuset_zone_allowed_softwall(zone, gfp_mask)) |
1670 | goto try_next_zone; | 1672 | continue; |
1671 | 1673 | ||
1672 | BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK); | 1674 | BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK); |
1673 | if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { | 1675 | if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { |
@@ -1679,17 +1681,36 @@ zonelist_scan: | |||
1679 | classzone_idx, alloc_flags)) | 1681 | classzone_idx, alloc_flags)) |
1680 | goto try_this_zone; | 1682 | goto try_this_zone; |
1681 | 1683 | ||
1684 | if (NUMA_BUILD && !did_zlc_setup && nr_online_nodes > 1) { | ||
1685 | /* | ||
1686 | * we do zlc_setup if there are multiple nodes | ||
1687 | * and before considering the first zone allowed | ||
1688 | * by the cpuset. | ||
1689 | */ | ||
1690 | allowednodes = zlc_setup(zonelist, alloc_flags); | ||
1691 | zlc_active = 1; | ||
1692 | did_zlc_setup = 1; | ||
1693 | } | ||
1694 | |||
1682 | if (zone_reclaim_mode == 0) | 1695 | if (zone_reclaim_mode == 0) |
1683 | goto this_zone_full; | 1696 | goto this_zone_full; |
1684 | 1697 | ||
1698 | /* | ||
1699 | * As we may have just activated ZLC, check if the first | ||
1700 | * eligible zone has failed zone_reclaim recently. | ||
1701 | */ | ||
1702 | if (NUMA_BUILD && zlc_active && | ||
1703 | !zlc_zone_worth_trying(zonelist, z, allowednodes)) | ||
1704 | continue; | ||
1705 | |||
1685 | ret = zone_reclaim(zone, gfp_mask, order); | 1706 | ret = zone_reclaim(zone, gfp_mask, order); |
1686 | switch (ret) { | 1707 | switch (ret) { |
1687 | case ZONE_RECLAIM_NOSCAN: | 1708 | case ZONE_RECLAIM_NOSCAN: |
1688 | /* did not scan */ | 1709 | /* did not scan */ |
1689 | goto try_next_zone; | 1710 | continue; |
1690 | case ZONE_RECLAIM_FULL: | 1711 | case ZONE_RECLAIM_FULL: |
1691 | /* scanned but unreclaimable */ | 1712 | /* scanned but unreclaimable */ |
1692 | goto this_zone_full; | 1713 | continue; |
1693 | default: | 1714 | default: |
1694 | /* did we reclaim enough */ | 1715 | /* did we reclaim enough */ |
1695 | if (!zone_watermark_ok(zone, order, mark, | 1716 | if (!zone_watermark_ok(zone, order, mark, |
@@ -1706,16 +1727,6 @@ try_this_zone: | |||
1706 | this_zone_full: | 1727 | this_zone_full: |
1707 | if (NUMA_BUILD) | 1728 | if (NUMA_BUILD) |
1708 | zlc_mark_zone_full(zonelist, z); | 1729 | zlc_mark_zone_full(zonelist, z); |
1709 | try_next_zone: | ||
1710 | if (NUMA_BUILD && !did_zlc_setup && nr_online_nodes > 1) { | ||
1711 | /* | ||
1712 | * we do zlc_setup after the first zone is tried but only | ||
1713 | * if there are multiple nodes make it worthwhile | ||
1714 | */ | ||
1715 | allowednodes = zlc_setup(zonelist, alloc_flags); | ||
1716 | zlc_active = 1; | ||
1717 | did_zlc_setup = 1; | ||
1718 | } | ||
1719 | } | 1730 | } |
1720 | 1731 | ||
1721 | if (unlikely(NUMA_BUILD && page == NULL && zlc_active)) { | 1732 | if (unlikely(NUMA_BUILD && page == NULL && zlc_active)) { |
@@ -1746,7 +1757,6 @@ static DEFINE_RATELIMIT_STATE(nopage_rs, | |||
1746 | 1757 | ||
1747 | void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...) | 1758 | void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...) |
1748 | { | 1759 | { |
1749 | va_list args; | ||
1750 | unsigned int filter = SHOW_MEM_FILTER_NODES; | 1760 | unsigned int filter = SHOW_MEM_FILTER_NODES; |
1751 | 1761 | ||
1752 | if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs)) | 1762 | if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs)) |
@@ -1765,14 +1775,21 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...) | |||
1765 | filter &= ~SHOW_MEM_FILTER_NODES; | 1775 | filter &= ~SHOW_MEM_FILTER_NODES; |
1766 | 1776 | ||
1767 | if (fmt) { | 1777 | if (fmt) { |
1768 | printk(KERN_WARNING); | 1778 | struct va_format vaf; |
1779 | va_list args; | ||
1780 | |||
1769 | va_start(args, fmt); | 1781 | va_start(args, fmt); |
1770 | vprintk(fmt, args); | 1782 | |
1783 | vaf.fmt = fmt; | ||
1784 | vaf.va = &args; | ||
1785 | |||
1786 | pr_warn("%pV", &vaf); | ||
1787 | |||
1771 | va_end(args); | 1788 | va_end(args); |
1772 | } | 1789 | } |
1773 | 1790 | ||
1774 | pr_warning("%s: page allocation failure: order:%d, mode:0x%x\n", | 1791 | pr_warn("%s: page allocation failure: order:%d, mode:0x%x\n", |
1775 | current->comm, order, gfp_mask); | 1792 | current->comm, order, gfp_mask); |
1776 | 1793 | ||
1777 | dump_stack(); | 1794 | dump_stack(); |
1778 | if (!should_suppress_show_mem()) | 1795 | if (!should_suppress_show_mem()) |
@@ -1957,6 +1974,10 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, | |||
1957 | if (unlikely(!(*did_some_progress))) | 1974 | if (unlikely(!(*did_some_progress))) |
1958 | return NULL; | 1975 | return NULL; |
1959 | 1976 | ||
1977 | /* After successful reclaim, reconsider all zones for allocation */ | ||
1978 | if (NUMA_BUILD) | ||
1979 | zlc_clear_zones_full(zonelist); | ||
1980 | |||
1960 | retry: | 1981 | retry: |
1961 | page = get_page_from_freelist(gfp_mask, nodemask, order, | 1982 | page = get_page_from_freelist(gfp_mask, nodemask, order, |
1962 | zonelist, high_zoneidx, | 1983 | zonelist, high_zoneidx, |