diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 423 |
1 files changed, 356 insertions, 67 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index b1061b1962f8..206920796f5f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -105,11 +105,13 @@ static void bad_page(const char *function, struct page *page) | |||
105 | printk(KERN_EMERG "Backtrace:\n"); | 105 | printk(KERN_EMERG "Backtrace:\n"); |
106 | dump_stack(); | 106 | dump_stack(); |
107 | printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n"); | 107 | printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n"); |
108 | page->flags &= ~(1 << PG_private | | 108 | page->flags &= ~(1 << PG_lru | |
109 | 1 << PG_private | | ||
109 | 1 << PG_locked | | 110 | 1 << PG_locked | |
110 | 1 << PG_lru | | ||
111 | 1 << PG_active | | 111 | 1 << PG_active | |
112 | 1 << PG_dirty | | 112 | 1 << PG_dirty | |
113 | 1 << PG_reclaim | | ||
114 | 1 << PG_slab | | ||
113 | 1 << PG_swapcache | | 115 | 1 << PG_swapcache | |
114 | 1 << PG_writeback); | 116 | 1 << PG_writeback); |
115 | set_page_count(page, 0); | 117 | set_page_count(page, 0); |
@@ -440,14 +442,17 @@ void set_page_refs(struct page *page, int order) | |||
440 | */ | 442 | */ |
441 | static void prep_new_page(struct page *page, int order) | 443 | static void prep_new_page(struct page *page, int order) |
442 | { | 444 | { |
443 | if (page->mapping || page_mapcount(page) || | 445 | if ( page_mapcount(page) || |
444 | (page->flags & ( | 446 | page->mapping != NULL || |
447 | page_count(page) != 0 || | ||
448 | (page->flags & ( | ||
449 | 1 << PG_lru | | ||
445 | 1 << PG_private | | 450 | 1 << PG_private | |
446 | 1 << PG_locked | | 451 | 1 << PG_locked | |
447 | 1 << PG_lru | | ||
448 | 1 << PG_active | | 452 | 1 << PG_active | |
449 | 1 << PG_dirty | | 453 | 1 << PG_dirty | |
450 | 1 << PG_reclaim | | 454 | 1 << PG_reclaim | |
455 | 1 << PG_slab | | ||
451 | 1 << PG_swapcache | | 456 | 1 << PG_swapcache | |
452 | 1 << PG_writeback ))) | 457 | 1 << PG_writeback ))) |
453 | bad_page(__FUNCTION__, page); | 458 | bad_page(__FUNCTION__, page); |
@@ -511,6 +516,36 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, | |||
511 | return allocated; | 516 | return allocated; |
512 | } | 517 | } |
513 | 518 | ||
519 | #ifdef CONFIG_NUMA | ||
520 | /* Called from the slab reaper to drain remote pagesets */ | ||
521 | void drain_remote_pages(void) | ||
522 | { | ||
523 | struct zone *zone; | ||
524 | int i; | ||
525 | unsigned long flags; | ||
526 | |||
527 | local_irq_save(flags); | ||
528 | for_each_zone(zone) { | ||
529 | struct per_cpu_pageset *pset; | ||
530 | |||
531 | /* Do not drain local pagesets */ | ||
532 | if (zone->zone_pgdat->node_id == numa_node_id()) | ||
533 | continue; | ||
534 | |||
535 | pset = zone->pageset[smp_processor_id()]; | ||
536 | for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { | ||
537 | struct per_cpu_pages *pcp; | ||
538 | |||
539 | pcp = &pset->pcp[i]; | ||
540 | if (pcp->count) | ||
541 | pcp->count -= free_pages_bulk(zone, pcp->count, | ||
542 | &pcp->list, 0); | ||
543 | } | ||
544 | } | ||
545 | local_irq_restore(flags); | ||
546 | } | ||
547 | #endif | ||
548 | |||
514 | #if defined(CONFIG_PM) || defined(CONFIG_HOTPLUG_CPU) | 549 | #if defined(CONFIG_PM) || defined(CONFIG_HOTPLUG_CPU) |
515 | static void __drain_pages(unsigned int cpu) | 550 | static void __drain_pages(unsigned int cpu) |
516 | { | 551 | { |
@@ -520,7 +555,7 @@ static void __drain_pages(unsigned int cpu) | |||
520 | for_each_zone(zone) { | 555 | for_each_zone(zone) { |
521 | struct per_cpu_pageset *pset; | 556 | struct per_cpu_pageset *pset; |
522 | 557 | ||
523 | pset = &zone->pageset[cpu]; | 558 | pset = zone_pcp(zone, cpu); |
524 | for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { | 559 | for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { |
525 | struct per_cpu_pages *pcp; | 560 | struct per_cpu_pages *pcp; |
526 | 561 | ||
@@ -583,12 +618,12 @@ static void zone_statistics(struct zonelist *zonelist, struct zone *z) | |||
583 | 618 | ||
584 | local_irq_save(flags); | 619 | local_irq_save(flags); |
585 | cpu = smp_processor_id(); | 620 | cpu = smp_processor_id(); |
586 | p = &z->pageset[cpu]; | 621 | p = zone_pcp(z,cpu); |
587 | if (pg == orig) { | 622 | if (pg == orig) { |
588 | z->pageset[cpu].numa_hit++; | 623 | p->numa_hit++; |
589 | } else { | 624 | } else { |
590 | p->numa_miss++; | 625 | p->numa_miss++; |
591 | zonelist->zones[0]->pageset[cpu].numa_foreign++; | 626 | zone_pcp(zonelist->zones[0], cpu)->numa_foreign++; |
592 | } | 627 | } |
593 | if (pg == NODE_DATA(numa_node_id())) | 628 | if (pg == NODE_DATA(numa_node_id())) |
594 | p->local_node++; | 629 | p->local_node++; |
@@ -615,12 +650,12 @@ static void fastcall free_hot_cold_page(struct page *page, int cold) | |||
615 | if (PageAnon(page)) | 650 | if (PageAnon(page)) |
616 | page->mapping = NULL; | 651 | page->mapping = NULL; |
617 | free_pages_check(__FUNCTION__, page); | 652 | free_pages_check(__FUNCTION__, page); |
618 | pcp = &zone->pageset[get_cpu()].pcp[cold]; | 653 | pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; |
619 | local_irq_save(flags); | 654 | local_irq_save(flags); |
620 | if (pcp->count >= pcp->high) | ||
621 | pcp->count -= free_pages_bulk(zone, pcp->batch, &pcp->list, 0); | ||
622 | list_add(&page->lru, &pcp->list); | 655 | list_add(&page->lru, &pcp->list); |
623 | pcp->count++; | 656 | pcp->count++; |
657 | if (pcp->count >= pcp->high) | ||
658 | pcp->count -= free_pages_bulk(zone, pcp->batch, &pcp->list, 0); | ||
624 | local_irq_restore(flags); | 659 | local_irq_restore(flags); |
625 | put_cpu(); | 660 | put_cpu(); |
626 | } | 661 | } |
@@ -659,7 +694,7 @@ buffered_rmqueue(struct zone *zone, int order, unsigned int __nocast gfp_flags) | |||
659 | if (order == 0) { | 694 | if (order == 0) { |
660 | struct per_cpu_pages *pcp; | 695 | struct per_cpu_pages *pcp; |
661 | 696 | ||
662 | pcp = &zone->pageset[get_cpu()].pcp[cold]; | 697 | pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; |
663 | local_irq_save(flags); | 698 | local_irq_save(flags); |
664 | if (pcp->count <= pcp->low) | 699 | if (pcp->count <= pcp->low) |
665 | pcp->count += rmqueue_bulk(zone, 0, | 700 | pcp->count += rmqueue_bulk(zone, 0, |
@@ -724,6 +759,16 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark, | |||
724 | return 1; | 759 | return 1; |
725 | } | 760 | } |
726 | 761 | ||
762 | static inline int | ||
763 | should_reclaim_zone(struct zone *z, unsigned int gfp_mask) | ||
764 | { | ||
765 | if (!z->reclaim_pages) | ||
766 | return 0; | ||
767 | if (gfp_mask & __GFP_NORECLAIM) | ||
768 | return 0; | ||
769 | return 1; | ||
770 | } | ||
771 | |||
727 | /* | 772 | /* |
728 | * This is the 'heart' of the zoned buddy allocator. | 773 | * This is the 'heart' of the zoned buddy allocator. |
729 | */ | 774 | */ |
@@ -760,17 +805,32 @@ __alloc_pages(unsigned int __nocast gfp_mask, unsigned int order, | |||
760 | 805 | ||
761 | classzone_idx = zone_idx(zones[0]); | 806 | classzone_idx = zone_idx(zones[0]); |
762 | 807 | ||
763 | restart: | 808 | restart: |
764 | /* Go through the zonelist once, looking for a zone with enough free */ | 809 | /* Go through the zonelist once, looking for a zone with enough free */ |
765 | for (i = 0; (z = zones[i]) != NULL; i++) { | 810 | for (i = 0; (z = zones[i]) != NULL; i++) { |
766 | 811 | int do_reclaim = should_reclaim_zone(z, gfp_mask); | |
767 | if (!zone_watermark_ok(z, order, z->pages_low, | ||
768 | classzone_idx, 0, 0)) | ||
769 | continue; | ||
770 | 812 | ||
771 | if (!cpuset_zone_allowed(z)) | 813 | if (!cpuset_zone_allowed(z)) |
772 | continue; | 814 | continue; |
773 | 815 | ||
816 | /* | ||
817 | * If the zone is to attempt early page reclaim then this loop | ||
818 | * will try to reclaim pages and check the watermark a second | ||
819 | * time before giving up and falling back to the next zone. | ||
820 | */ | ||
821 | zone_reclaim_retry: | ||
822 | if (!zone_watermark_ok(z, order, z->pages_low, | ||
823 | classzone_idx, 0, 0)) { | ||
824 | if (!do_reclaim) | ||
825 | continue; | ||
826 | else { | ||
827 | zone_reclaim(z, gfp_mask, order); | ||
828 | /* Only try reclaim once */ | ||
829 | do_reclaim = 0; | ||
830 | goto zone_reclaim_retry; | ||
831 | } | ||
832 | } | ||
833 | |||
774 | page = buffered_rmqueue(z, order, gfp_mask); | 834 | page = buffered_rmqueue(z, order, gfp_mask); |
775 | if (page) | 835 | if (page) |
776 | goto got_pg; | 836 | goto got_pg; |
@@ -829,7 +889,7 @@ rebalance: | |||
829 | reclaim_state.reclaimed_slab = 0; | 889 | reclaim_state.reclaimed_slab = 0; |
830 | p->reclaim_state = &reclaim_state; | 890 | p->reclaim_state = &reclaim_state; |
831 | 891 | ||
832 | did_some_progress = try_to_free_pages(zones, gfp_mask, order); | 892 | did_some_progress = try_to_free_pages(zones, gfp_mask); |
833 | 893 | ||
834 | p->reclaim_state = NULL; | 894 | p->reclaim_state = NULL; |
835 | p->flags &= ~PF_MEMALLOC; | 895 | p->flags &= ~PF_MEMALLOC; |
@@ -905,6 +965,7 @@ nopage: | |||
905 | " order:%d, mode:0x%x\n", | 965 | " order:%d, mode:0x%x\n", |
906 | p->comm, order, gfp_mask); | 966 | p->comm, order, gfp_mask); |
907 | dump_stack(); | 967 | dump_stack(); |
968 | show_mem(); | ||
908 | } | 969 | } |
909 | return NULL; | 970 | return NULL; |
910 | got_pg: | 971 | got_pg: |
@@ -1114,7 +1175,7 @@ void get_full_page_state(struct page_state *ret) | |||
1114 | __get_page_state(ret, sizeof(*ret) / sizeof(unsigned long)); | 1175 | __get_page_state(ret, sizeof(*ret) / sizeof(unsigned long)); |
1115 | } | 1176 | } |
1116 | 1177 | ||
1117 | unsigned long __read_page_state(unsigned offset) | 1178 | unsigned long __read_page_state(unsigned long offset) |
1118 | { | 1179 | { |
1119 | unsigned long ret = 0; | 1180 | unsigned long ret = 0; |
1120 | int cpu; | 1181 | int cpu; |
@@ -1128,7 +1189,7 @@ unsigned long __read_page_state(unsigned offset) | |||
1128 | return ret; | 1189 | return ret; |
1129 | } | 1190 | } |
1130 | 1191 | ||
1131 | void __mod_page_state(unsigned offset, unsigned long delta) | 1192 | void __mod_page_state(unsigned long offset, unsigned long delta) |
1132 | { | 1193 | { |
1133 | unsigned long flags; | 1194 | unsigned long flags; |
1134 | void* ptr; | 1195 | void* ptr; |
@@ -1237,22 +1298,23 @@ void show_free_areas(void) | |||
1237 | if (!cpu_possible(cpu)) | 1298 | if (!cpu_possible(cpu)) |
1238 | continue; | 1299 | continue; |
1239 | 1300 | ||
1240 | pageset = zone->pageset + cpu; | 1301 | pageset = zone_pcp(zone, cpu); |
1241 | 1302 | ||
1242 | for (temperature = 0; temperature < 2; temperature++) | 1303 | for (temperature = 0; temperature < 2; temperature++) |
1243 | printk("cpu %d %s: low %d, high %d, batch %d\n", | 1304 | printk("cpu %d %s: low %d, high %d, batch %d used:%d\n", |
1244 | cpu, | 1305 | cpu, |
1245 | temperature ? "cold" : "hot", | 1306 | temperature ? "cold" : "hot", |
1246 | pageset->pcp[temperature].low, | 1307 | pageset->pcp[temperature].low, |
1247 | pageset->pcp[temperature].high, | 1308 | pageset->pcp[temperature].high, |
1248 | pageset->pcp[temperature].batch); | 1309 | pageset->pcp[temperature].batch, |
1310 | pageset->pcp[temperature].count); | ||
1249 | } | 1311 | } |
1250 | } | 1312 | } |
1251 | 1313 | ||
1252 | get_page_state(&ps); | 1314 | get_page_state(&ps); |
1253 | get_zone_counts(&active, &inactive, &free); | 1315 | get_zone_counts(&active, &inactive, &free); |
1254 | 1316 | ||
1255 | printk("\nFree pages: %11ukB (%ukB HighMem)\n", | 1317 | printk("Free pages: %11ukB (%ukB HighMem)\n", |
1256 | K(nr_free_pages()), | 1318 | K(nr_free_pages()), |
1257 | K(nr_free_highpages())); | 1319 | K(nr_free_highpages())); |
1258 | 1320 | ||
@@ -1620,6 +1682,155 @@ void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone, | |||
1620 | memmap_init_zone((size), (nid), (zone), (start_pfn)) | 1682 | memmap_init_zone((size), (nid), (zone), (start_pfn)) |
1621 | #endif | 1683 | #endif |
1622 | 1684 | ||
1685 | static int __devinit zone_batchsize(struct zone *zone) | ||
1686 | { | ||
1687 | int batch; | ||
1688 | |||
1689 | /* | ||
1690 | * The per-cpu-pages pools are set to around 1000th of the | ||
1691 | * size of the zone. But no more than 1/4 of a meg - there's | ||
1692 | * no point in going beyond the size of L2 cache. | ||
1693 | * | ||
1694 | * OK, so we don't know how big the cache is. So guess. | ||
1695 | */ | ||
1696 | batch = zone->present_pages / 1024; | ||
1697 | if (batch * PAGE_SIZE > 256 * 1024) | ||
1698 | batch = (256 * 1024) / PAGE_SIZE; | ||
1699 | batch /= 4; /* We effectively *= 4 below */ | ||
1700 | if (batch < 1) | ||
1701 | batch = 1; | ||
1702 | |||
1703 | /* | ||
1704 | * Clamp the batch to a 2^n - 1 value. Having a power | ||
1705 | * of 2 value was found to be more likely to have | ||
1706 | * suboptimal cache aliasing properties in some cases. | ||
1707 | * | ||
1708 | * For example if 2 tasks are alternately allocating | ||
1709 | * batches of pages, one task can end up with a lot | ||
1710 | * of pages of one half of the possible page colors | ||
1711 | * and the other with pages of the other colors. | ||
1712 | */ | ||
1713 | batch = (1 << fls(batch + batch/2)) - 1; | ||
1714 | return batch; | ||
1715 | } | ||
1716 | |||
1717 | inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) | ||
1718 | { | ||
1719 | struct per_cpu_pages *pcp; | ||
1720 | |||
1721 | pcp = &p->pcp[0]; /* hot */ | ||
1722 | pcp->count = 0; | ||
1723 | pcp->low = 2 * batch; | ||
1724 | pcp->high = 6 * batch; | ||
1725 | pcp->batch = max(1UL, 1 * batch); | ||
1726 | INIT_LIST_HEAD(&pcp->list); | ||
1727 | |||
1728 | pcp = &p->pcp[1]; /* cold*/ | ||
1729 | pcp->count = 0; | ||
1730 | pcp->low = 0; | ||
1731 | pcp->high = 2 * batch; | ||
1732 | pcp->batch = max(1UL, 1 * batch); | ||
1733 | INIT_LIST_HEAD(&pcp->list); | ||
1734 | } | ||
1735 | |||
1736 | #ifdef CONFIG_NUMA | ||
1737 | /* | ||
1738 | * Boot pageset table. One per cpu which is going to be used for all | ||
1739 | * zones and all nodes. The parameters will be set in such a way | ||
1740 | * that an item put on a list will immediately be handed over to | ||
1741 | * the buddy list. This is safe since pageset manipulation is done | ||
1742 | * with interrupts disabled. | ||
1743 | * | ||
1744 | * Some NUMA counter updates may also be caught by the boot pagesets. | ||
1745 | * These will be discarded when bootup is complete. | ||
1746 | */ | ||
1747 | static struct per_cpu_pageset | ||
1748 | boot_pageset[NR_CPUS] __initdata; | ||
1749 | |||
1750 | /* | ||
1751 | * Dynamically allocate memory for the | ||
1752 | * per cpu pageset array in struct zone. | ||
1753 | */ | ||
1754 | static int __devinit process_zones(int cpu) | ||
1755 | { | ||
1756 | struct zone *zone, *dzone; | ||
1757 | |||
1758 | for_each_zone(zone) { | ||
1759 | |||
1760 | zone->pageset[cpu] = kmalloc_node(sizeof(struct per_cpu_pageset), | ||
1761 | GFP_KERNEL, cpu_to_node(cpu)); | ||
1762 | if (!zone->pageset[cpu]) | ||
1763 | goto bad; | ||
1764 | |||
1765 | setup_pageset(zone->pageset[cpu], zone_batchsize(zone)); | ||
1766 | } | ||
1767 | |||
1768 | return 0; | ||
1769 | bad: | ||
1770 | for_each_zone(dzone) { | ||
1771 | if (dzone == zone) | ||
1772 | break; | ||
1773 | kfree(dzone->pageset[cpu]); | ||
1774 | dzone->pageset[cpu] = NULL; | ||
1775 | } | ||
1776 | return -ENOMEM; | ||
1777 | } | ||
1778 | |||
1779 | static inline void free_zone_pagesets(int cpu) | ||
1780 | { | ||
1781 | #ifdef CONFIG_NUMA | ||
1782 | struct zone *zone; | ||
1783 | |||
1784 | for_each_zone(zone) { | ||
1785 | struct per_cpu_pageset *pset = zone_pcp(zone, cpu); | ||
1786 | |||
1787 | zone_pcp(zone, cpu) = NULL; | ||
1788 | kfree(pset); | ||
1789 | } | ||
1790 | #endif | ||
1791 | } | ||
1792 | |||
1793 | static int __devinit pageset_cpuup_callback(struct notifier_block *nfb, | ||
1794 | unsigned long action, | ||
1795 | void *hcpu) | ||
1796 | { | ||
1797 | int cpu = (long)hcpu; | ||
1798 | int ret = NOTIFY_OK; | ||
1799 | |||
1800 | switch (action) { | ||
1801 | case CPU_UP_PREPARE: | ||
1802 | if (process_zones(cpu)) | ||
1803 | ret = NOTIFY_BAD; | ||
1804 | break; | ||
1805 | #ifdef CONFIG_HOTPLUG_CPU | ||
1806 | case CPU_DEAD: | ||
1807 | free_zone_pagesets(cpu); | ||
1808 | break; | ||
1809 | #endif | ||
1810 | default: | ||
1811 | break; | ||
1812 | } | ||
1813 | return ret; | ||
1814 | } | ||
1815 | |||
1816 | static struct notifier_block pageset_notifier = | ||
1817 | { &pageset_cpuup_callback, NULL, 0 }; | ||
1818 | |||
1819 | void __init setup_per_cpu_pageset() | ||
1820 | { | ||
1821 | int err; | ||
1822 | |||
1823 | /* Initialize per_cpu_pageset for cpu 0. | ||
1824 | * A cpuup callback will do this for every cpu | ||
1825 | * as it comes online | ||
1826 | */ | ||
1827 | err = process_zones(smp_processor_id()); | ||
1828 | BUG_ON(err); | ||
1829 | register_cpu_notifier(&pageset_notifier); | ||
1830 | } | ||
1831 | |||
1832 | #endif | ||
1833 | |||
1623 | /* | 1834 | /* |
1624 | * Set up the zone data structures: | 1835 | * Set up the zone data structures: |
1625 | * - mark all pages reserved | 1836 | * - mark all pages reserved |
@@ -1662,48 +1873,16 @@ static void __init free_area_init_core(struct pglist_data *pgdat, | |||
1662 | 1873 | ||
1663 | zone->temp_priority = zone->prev_priority = DEF_PRIORITY; | 1874 | zone->temp_priority = zone->prev_priority = DEF_PRIORITY; |
1664 | 1875 | ||
1665 | /* | 1876 | batch = zone_batchsize(zone); |
1666 | * The per-cpu-pages pools are set to around 1000th of the | ||
1667 | * size of the zone. But no more than 1/4 of a meg - there's | ||
1668 | * no point in going beyond the size of L2 cache. | ||
1669 | * | ||
1670 | * OK, so we don't know how big the cache is. So guess. | ||
1671 | */ | ||
1672 | batch = zone->present_pages / 1024; | ||
1673 | if (batch * PAGE_SIZE > 256 * 1024) | ||
1674 | batch = (256 * 1024) / PAGE_SIZE; | ||
1675 | batch /= 4; /* We effectively *= 4 below */ | ||
1676 | if (batch < 1) | ||
1677 | batch = 1; | ||
1678 | |||
1679 | /* | ||
1680 | * Clamp the batch to a 2^n - 1 value. Having a power | ||
1681 | * of 2 value was found to be more likely to have | ||
1682 | * suboptimal cache aliasing properties in some cases. | ||
1683 | * | ||
1684 | * For example if 2 tasks are alternately allocating | ||
1685 | * batches of pages, one task can end up with a lot | ||
1686 | * of pages of one half of the possible page colors | ||
1687 | * and the other with pages of the other colors. | ||
1688 | */ | ||
1689 | batch = (1 << fls(batch + batch/2)) - 1; | ||
1690 | 1877 | ||
1691 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | 1878 | for (cpu = 0; cpu < NR_CPUS; cpu++) { |
1692 | struct per_cpu_pages *pcp; | 1879 | #ifdef CONFIG_NUMA |
1693 | 1880 | /* Early boot. Slab allocator not functional yet */ | |
1694 | pcp = &zone->pageset[cpu].pcp[0]; /* hot */ | 1881 | zone->pageset[cpu] = &boot_pageset[cpu]; |
1695 | pcp->count = 0; | 1882 | setup_pageset(&boot_pageset[cpu],0); |
1696 | pcp->low = 2 * batch; | 1883 | #else |
1697 | pcp->high = 6 * batch; | 1884 | setup_pageset(zone_pcp(zone,cpu), batch); |
1698 | pcp->batch = 1 * batch; | 1885 | #endif |
1699 | INIT_LIST_HEAD(&pcp->list); | ||
1700 | |||
1701 | pcp = &zone->pageset[cpu].pcp[1]; /* cold */ | ||
1702 | pcp->count = 0; | ||
1703 | pcp->low = 0; | ||
1704 | pcp->high = 2 * batch; | ||
1705 | pcp->batch = 1 * batch; | ||
1706 | INIT_LIST_HEAD(&pcp->list); | ||
1707 | } | 1886 | } |
1708 | printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n", | 1887 | printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n", |
1709 | zone_names[j], realsize, batch); | 1888 | zone_names[j], realsize, batch); |
@@ -1713,6 +1892,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat, | |||
1713 | zone->nr_scan_inactive = 0; | 1892 | zone->nr_scan_inactive = 0; |
1714 | zone->nr_active = 0; | 1893 | zone->nr_active = 0; |
1715 | zone->nr_inactive = 0; | 1894 | zone->nr_inactive = 0; |
1895 | atomic_set(&zone->reclaim_in_progress, -1); | ||
1716 | if (!size) | 1896 | if (!size) |
1717 | continue; | 1897 | continue; |
1718 | 1898 | ||
@@ -1853,6 +2033,115 @@ struct seq_operations fragmentation_op = { | |||
1853 | .show = frag_show, | 2033 | .show = frag_show, |
1854 | }; | 2034 | }; |
1855 | 2035 | ||
2036 | /* | ||
2037 | * Output information about zones in @pgdat. | ||
2038 | */ | ||
2039 | static int zoneinfo_show(struct seq_file *m, void *arg) | ||
2040 | { | ||
2041 | pg_data_t *pgdat = arg; | ||
2042 | struct zone *zone; | ||
2043 | struct zone *node_zones = pgdat->node_zones; | ||
2044 | unsigned long flags; | ||
2045 | |||
2046 | for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; zone++) { | ||
2047 | int i; | ||
2048 | |||
2049 | if (!zone->present_pages) | ||
2050 | continue; | ||
2051 | |||
2052 | spin_lock_irqsave(&zone->lock, flags); | ||
2053 | seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name); | ||
2054 | seq_printf(m, | ||
2055 | "\n pages free %lu" | ||
2056 | "\n min %lu" | ||
2057 | "\n low %lu" | ||
2058 | "\n high %lu" | ||
2059 | "\n active %lu" | ||
2060 | "\n inactive %lu" | ||
2061 | "\n scanned %lu (a: %lu i: %lu)" | ||
2062 | "\n spanned %lu" | ||
2063 | "\n present %lu", | ||
2064 | zone->free_pages, | ||
2065 | zone->pages_min, | ||
2066 | zone->pages_low, | ||
2067 | zone->pages_high, | ||
2068 | zone->nr_active, | ||
2069 | zone->nr_inactive, | ||
2070 | zone->pages_scanned, | ||
2071 | zone->nr_scan_active, zone->nr_scan_inactive, | ||
2072 | zone->spanned_pages, | ||
2073 | zone->present_pages); | ||
2074 | seq_printf(m, | ||
2075 | "\n protection: (%lu", | ||
2076 | zone->lowmem_reserve[0]); | ||
2077 | for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++) | ||
2078 | seq_printf(m, ", %lu", zone->lowmem_reserve[i]); | ||
2079 | seq_printf(m, | ||
2080 | ")" | ||
2081 | "\n pagesets"); | ||
2082 | for (i = 0; i < ARRAY_SIZE(zone->pageset); i++) { | ||
2083 | struct per_cpu_pageset *pageset; | ||
2084 | int j; | ||
2085 | |||
2086 | pageset = zone_pcp(zone, i); | ||
2087 | for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) { | ||
2088 | if (pageset->pcp[j].count) | ||
2089 | break; | ||
2090 | } | ||
2091 | if (j == ARRAY_SIZE(pageset->pcp)) | ||
2092 | continue; | ||
2093 | for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) { | ||
2094 | seq_printf(m, | ||
2095 | "\n cpu: %i pcp: %i" | ||
2096 | "\n count: %i" | ||
2097 | "\n low: %i" | ||
2098 | "\n high: %i" | ||
2099 | "\n batch: %i", | ||
2100 | i, j, | ||
2101 | pageset->pcp[j].count, | ||
2102 | pageset->pcp[j].low, | ||
2103 | pageset->pcp[j].high, | ||
2104 | pageset->pcp[j].batch); | ||
2105 | } | ||
2106 | #ifdef CONFIG_NUMA | ||
2107 | seq_printf(m, | ||
2108 | "\n numa_hit: %lu" | ||
2109 | "\n numa_miss: %lu" | ||
2110 | "\n numa_foreign: %lu" | ||
2111 | "\n interleave_hit: %lu" | ||
2112 | "\n local_node: %lu" | ||
2113 | "\n other_node: %lu", | ||
2114 | pageset->numa_hit, | ||
2115 | pageset->numa_miss, | ||
2116 | pageset->numa_foreign, | ||
2117 | pageset->interleave_hit, | ||
2118 | pageset->local_node, | ||
2119 | pageset->other_node); | ||
2120 | #endif | ||
2121 | } | ||
2122 | seq_printf(m, | ||
2123 | "\n all_unreclaimable: %u" | ||
2124 | "\n prev_priority: %i" | ||
2125 | "\n temp_priority: %i" | ||
2126 | "\n start_pfn: %lu", | ||
2127 | zone->all_unreclaimable, | ||
2128 | zone->prev_priority, | ||
2129 | zone->temp_priority, | ||
2130 | zone->zone_start_pfn); | ||
2131 | spin_unlock_irqrestore(&zone->lock, flags); | ||
2132 | seq_putc(m, '\n'); | ||
2133 | } | ||
2134 | return 0; | ||
2135 | } | ||
2136 | |||
2137 | struct seq_operations zoneinfo_op = { | ||
2138 | .start = frag_start, /* iterate over all zones. The same as in | ||
2139 | * fragmentation. */ | ||
2140 | .next = frag_next, | ||
2141 | .stop = frag_stop, | ||
2142 | .show = zoneinfo_show, | ||
2143 | }; | ||
2144 | |||
1856 | static char *vmstat_text[] = { | 2145 | static char *vmstat_text[] = { |
1857 | "nr_dirty", | 2146 | "nr_dirty", |
1858 | "nr_writeback", | 2147 | "nr_writeback", |
@@ -2058,10 +2347,10 @@ static void setup_per_zone_pages_min(void) | |||
2058 | min_pages = 128; | 2347 | min_pages = 128; |
2059 | zone->pages_min = min_pages; | 2348 | zone->pages_min = min_pages; |
2060 | } else { | 2349 | } else { |
2061 | /* if it's a lowmem zone, reserve a number of pages | 2350 | /* if it's a lowmem zone, reserve a number of pages |
2062 | * proportionate to the zone's size. | 2351 | * proportionate to the zone's size. |
2063 | */ | 2352 | */ |
2064 | zone->pages_min = (pages_min * zone->present_pages) / | 2353 | zone->pages_min = (pages_min * zone->present_pages) / |
2065 | lowmem_pages; | 2354 | lowmem_pages; |
2066 | } | 2355 | } |
2067 | 2356 | ||