diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memcontrol.c | 101 |
1 files changed, 51 insertions, 50 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 23841af1d756..51d398f1363c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -542,39 +542,10 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz) | |||
542 | return mz; | 542 | return mz; |
543 | } | 543 | } |
544 | 544 | ||
545 | /* | ||
546 | * Return page count for single (non recursive) @memcg. | ||
547 | * | ||
548 | * Implementation Note: reading percpu statistics for memcg. | ||
549 | * | ||
550 | * Both of vmstat[] and percpu_counter has threshold and do periodic | ||
551 | * synchronization to implement "quick" read. There are trade-off between | ||
552 | * reading cost and precision of value. Then, we may have a chance to implement | ||
553 | * a periodic synchronization of counter in memcg's counter. | ||
554 | * | ||
555 | * But this _read() function is used for user interface now. The user accounts | ||
556 | * memory usage by memory cgroup and he _always_ requires exact value because | ||
557 | * he accounts memory. Even if we provide quick-and-fuzzy read, we always | ||
558 | * have to visit all online cpus and make sum. So, for now, unnecessary | ||
559 | * synchronization is not implemented. (just implemented for cpu hotplug) | ||
560 | * | ||
561 | * If there are kernel internal actions which can make use of some not-exact | ||
562 | * value, and reading all cpu value can be performance bottleneck in some | ||
563 | * common workload, threshold and synchronization as vmstat[] should be | ||
564 | * implemented. | ||
565 | * | ||
566 | * The parameter idx can be of type enum memcg_event_item or vm_event_item. | ||
567 | */ | ||
568 | |||
569 | static unsigned long memcg_sum_events(struct mem_cgroup *memcg, | 545 | static unsigned long memcg_sum_events(struct mem_cgroup *memcg, |
570 | int event) | 546 | int event) |
571 | { | 547 | { |
572 | unsigned long val = 0; | 548 | return atomic_long_read(&memcg->events[event]); |
573 | int cpu; | ||
574 | |||
575 | for_each_possible_cpu(cpu) | ||
576 | val += per_cpu(memcg->stat->events[event], cpu); | ||
577 | return val; | ||
578 | } | 549 | } |
579 | 550 | ||
580 | static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, | 551 | static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, |
@@ -606,7 +577,7 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, | |||
606 | nr_pages = -nr_pages; /* for event */ | 577 | nr_pages = -nr_pages; /* for event */ |
607 | } | 578 | } |
608 | 579 | ||
609 | __this_cpu_add(memcg->stat->nr_page_events, nr_pages); | 580 | __this_cpu_add(memcg->stat_cpu->nr_page_events, nr_pages); |
610 | } | 581 | } |
611 | 582 | ||
612 | unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, | 583 | unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, |
@@ -642,8 +613,8 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg, | |||
642 | { | 613 | { |
643 | unsigned long val, next; | 614 | unsigned long val, next; |
644 | 615 | ||
645 | val = __this_cpu_read(memcg->stat->nr_page_events); | 616 | val = __this_cpu_read(memcg->stat_cpu->nr_page_events); |
646 | next = __this_cpu_read(memcg->stat->targets[target]); | 617 | next = __this_cpu_read(memcg->stat_cpu->targets[target]); |
647 | /* from time_after() in jiffies.h */ | 618 | /* from time_after() in jiffies.h */ |
648 | if ((long)(next - val) < 0) { | 619 | if ((long)(next - val) < 0) { |
649 | switch (target) { | 620 | switch (target) { |
@@ -659,7 +630,7 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg, | |||
659 | default: | 630 | default: |
660 | break; | 631 | break; |
661 | } | 632 | } |
662 | __this_cpu_write(memcg->stat->targets[target], next); | 633 | __this_cpu_write(memcg->stat_cpu->targets[target], next); |
663 | return true; | 634 | return true; |
664 | } | 635 | } |
665 | return false; | 636 | return false; |
@@ -1707,11 +1678,6 @@ void unlock_page_memcg(struct page *page) | |||
1707 | } | 1678 | } |
1708 | EXPORT_SYMBOL(unlock_page_memcg); | 1679 | EXPORT_SYMBOL(unlock_page_memcg); |
1709 | 1680 | ||
1710 | /* | ||
1711 | * size of first charge trial. "32" comes from vmscan.c's magic value. | ||
1712 | * TODO: maybe necessary to use big numbers in big irons. | ||
1713 | */ | ||
1714 | #define CHARGE_BATCH 32U | ||
1715 | struct memcg_stock_pcp { | 1681 | struct memcg_stock_pcp { |
1716 | struct mem_cgroup *cached; /* this never be root cgroup */ | 1682 | struct mem_cgroup *cached; /* this never be root cgroup */ |
1717 | unsigned int nr_pages; | 1683 | unsigned int nr_pages; |
@@ -1739,7 +1705,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages) | |||
1739 | unsigned long flags; | 1705 | unsigned long flags; |
1740 | bool ret = false; | 1706 | bool ret = false; |
1741 | 1707 | ||
1742 | if (nr_pages > CHARGE_BATCH) | 1708 | if (nr_pages > MEMCG_CHARGE_BATCH) |
1743 | return ret; | 1709 | return ret; |
1744 | 1710 | ||
1745 | local_irq_save(flags); | 1711 | local_irq_save(flags); |
@@ -1808,7 +1774,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) | |||
1808 | } | 1774 | } |
1809 | stock->nr_pages += nr_pages; | 1775 | stock->nr_pages += nr_pages; |
1810 | 1776 | ||
1811 | if (stock->nr_pages > CHARGE_BATCH) | 1777 | if (stock->nr_pages > MEMCG_CHARGE_BATCH) |
1812 | drain_stock(stock); | 1778 | drain_stock(stock); |
1813 | 1779 | ||
1814 | local_irq_restore(flags); | 1780 | local_irq_restore(flags); |
@@ -1858,9 +1824,44 @@ static void drain_all_stock(struct mem_cgroup *root_memcg) | |||
1858 | static int memcg_hotplug_cpu_dead(unsigned int cpu) | 1824 | static int memcg_hotplug_cpu_dead(unsigned int cpu) |
1859 | { | 1825 | { |
1860 | struct memcg_stock_pcp *stock; | 1826 | struct memcg_stock_pcp *stock; |
1827 | struct mem_cgroup *memcg; | ||
1861 | 1828 | ||
1862 | stock = &per_cpu(memcg_stock, cpu); | 1829 | stock = &per_cpu(memcg_stock, cpu); |
1863 | drain_stock(stock); | 1830 | drain_stock(stock); |
1831 | |||
1832 | for_each_mem_cgroup(memcg) { | ||
1833 | int i; | ||
1834 | |||
1835 | for (i = 0; i < MEMCG_NR_STAT; i++) { | ||
1836 | int nid; | ||
1837 | long x; | ||
1838 | |||
1839 | x = this_cpu_xchg(memcg->stat_cpu->count[i], 0); | ||
1840 | if (x) | ||
1841 | atomic_long_add(x, &memcg->stat[i]); | ||
1842 | |||
1843 | if (i >= NR_VM_NODE_STAT_ITEMS) | ||
1844 | continue; | ||
1845 | |||
1846 | for_each_node(nid) { | ||
1847 | struct mem_cgroup_per_node *pn; | ||
1848 | |||
1849 | pn = mem_cgroup_nodeinfo(memcg, nid); | ||
1850 | x = this_cpu_xchg(pn->lruvec_stat_cpu->count[i], 0); | ||
1851 | if (x) | ||
1852 | atomic_long_add(x, &pn->lruvec_stat[i]); | ||
1853 | } | ||
1854 | } | ||
1855 | |||
1856 | for (i = 0; i < MEMCG_NR_EVENTS; i++) { | ||
1857 | long x; | ||
1858 | |||
1859 | x = this_cpu_xchg(memcg->stat_cpu->events[i], 0); | ||
1860 | if (x) | ||
1861 | atomic_long_add(x, &memcg->events[i]); | ||
1862 | } | ||
1863 | } | ||
1864 | |||
1864 | return 0; | 1865 | return 0; |
1865 | } | 1866 | } |
1866 | 1867 | ||
@@ -1881,7 +1882,7 @@ static void high_work_func(struct work_struct *work) | |||
1881 | struct mem_cgroup *memcg; | 1882 | struct mem_cgroup *memcg; |
1882 | 1883 | ||
1883 | memcg = container_of(work, struct mem_cgroup, high_work); | 1884 | memcg = container_of(work, struct mem_cgroup, high_work); |
1884 | reclaim_high(memcg, CHARGE_BATCH, GFP_KERNEL); | 1885 | reclaim_high(memcg, MEMCG_CHARGE_BATCH, GFP_KERNEL); |
1885 | } | 1886 | } |
1886 | 1887 | ||
1887 | /* | 1888 | /* |
@@ -1905,7 +1906,7 @@ void mem_cgroup_handle_over_high(void) | |||
1905 | static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, | 1906 | static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, |
1906 | unsigned int nr_pages) | 1907 | unsigned int nr_pages) |
1907 | { | 1908 | { |
1908 | unsigned int batch = max(CHARGE_BATCH, nr_pages); | 1909 | unsigned int batch = max(MEMCG_CHARGE_BATCH, nr_pages); |
1909 | int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; | 1910 | int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; |
1910 | struct mem_cgroup *mem_over_limit; | 1911 | struct mem_cgroup *mem_over_limit; |
1911 | struct page_counter *counter; | 1912 | struct page_counter *counter; |
@@ -4161,8 +4162,8 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) | |||
4161 | if (!pn) | 4162 | if (!pn) |
4162 | return 1; | 4163 | return 1; |
4163 | 4164 | ||
4164 | pn->lruvec_stat = alloc_percpu(struct lruvec_stat); | 4165 | pn->lruvec_stat_cpu = alloc_percpu(struct lruvec_stat); |
4165 | if (!pn->lruvec_stat) { | 4166 | if (!pn->lruvec_stat_cpu) { |
4166 | kfree(pn); | 4167 | kfree(pn); |
4167 | return 1; | 4168 | return 1; |
4168 | } | 4169 | } |
@@ -4180,7 +4181,7 @@ static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) | |||
4180 | { | 4181 | { |
4181 | struct mem_cgroup_per_node *pn = memcg->nodeinfo[node]; | 4182 | struct mem_cgroup_per_node *pn = memcg->nodeinfo[node]; |
4182 | 4183 | ||
4183 | free_percpu(pn->lruvec_stat); | 4184 | free_percpu(pn->lruvec_stat_cpu); |
4184 | kfree(pn); | 4185 | kfree(pn); |
4185 | } | 4186 | } |
4186 | 4187 | ||
@@ -4190,7 +4191,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) | |||
4190 | 4191 | ||
4191 | for_each_node(node) | 4192 | for_each_node(node) |
4192 | free_mem_cgroup_per_node_info(memcg, node); | 4193 | free_mem_cgroup_per_node_info(memcg, node); |
4193 | free_percpu(memcg->stat); | 4194 | free_percpu(memcg->stat_cpu); |
4194 | kfree(memcg); | 4195 | kfree(memcg); |
4195 | } | 4196 | } |
4196 | 4197 | ||
@@ -4219,8 +4220,8 @@ static struct mem_cgroup *mem_cgroup_alloc(void) | |||
4219 | if (memcg->id.id < 0) | 4220 | if (memcg->id.id < 0) |
4220 | goto fail; | 4221 | goto fail; |
4221 | 4222 | ||
4222 | memcg->stat = alloc_percpu(struct mem_cgroup_stat_cpu); | 4223 | memcg->stat_cpu = alloc_percpu(struct mem_cgroup_stat_cpu); |
4223 | if (!memcg->stat) | 4224 | if (!memcg->stat_cpu) |
4224 | goto fail; | 4225 | goto fail; |
4225 | 4226 | ||
4226 | for_each_node(node) | 4227 | for_each_node(node) |
@@ -5638,7 +5639,7 @@ static void uncharge_batch(const struct uncharge_gather *ug) | |||
5638 | __mod_memcg_state(ug->memcg, MEMCG_RSS_HUGE, -ug->nr_huge); | 5639 | __mod_memcg_state(ug->memcg, MEMCG_RSS_HUGE, -ug->nr_huge); |
5639 | __mod_memcg_state(ug->memcg, NR_SHMEM, -ug->nr_shmem); | 5640 | __mod_memcg_state(ug->memcg, NR_SHMEM, -ug->nr_shmem); |
5640 | __count_memcg_events(ug->memcg, PGPGOUT, ug->pgpgout); | 5641 | __count_memcg_events(ug->memcg, PGPGOUT, ug->pgpgout); |
5641 | __this_cpu_add(ug->memcg->stat->nr_page_events, nr_pages); | 5642 | __this_cpu_add(ug->memcg->stat_cpu->nr_page_events, nr_pages); |
5642 | memcg_check_events(ug->memcg, ug->dummy_page); | 5643 | memcg_check_events(ug->memcg, ug->dummy_page); |
5643 | local_irq_restore(flags); | 5644 | local_irq_restore(flags); |
5644 | 5645 | ||