diff options
Diffstat (limited to 'include/linux/memcontrol.h')
-rw-r--r-- | include/linux/memcontrol.h | 165 |
1 files changed, 101 insertions, 64 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 69966c461d1c..882046863581 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
@@ -108,7 +108,10 @@ struct lruvec_stat { | |||
108 | */ | 108 | */ |
109 | struct mem_cgroup_per_node { | 109 | struct mem_cgroup_per_node { |
110 | struct lruvec lruvec; | 110 | struct lruvec lruvec; |
111 | struct lruvec_stat __percpu *lruvec_stat; | 111 | |
112 | struct lruvec_stat __percpu *lruvec_stat_cpu; | ||
113 | atomic_long_t lruvec_stat[NR_VM_NODE_STAT_ITEMS]; | ||
114 | |||
112 | unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; | 115 | unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; |
113 | 116 | ||
114 | struct mem_cgroup_reclaim_iter iter[DEF_PRIORITY + 1]; | 117 | struct mem_cgroup_reclaim_iter iter[DEF_PRIORITY + 1]; |
@@ -227,10 +230,10 @@ struct mem_cgroup { | |||
227 | spinlock_t move_lock; | 230 | spinlock_t move_lock; |
228 | struct task_struct *move_lock_task; | 231 | struct task_struct *move_lock_task; |
229 | unsigned long move_lock_flags; | 232 | unsigned long move_lock_flags; |
230 | /* | 233 | |
231 | * percpu counter. | 234 | struct mem_cgroup_stat_cpu __percpu *stat_cpu; |
232 | */ | 235 | atomic_long_t stat[MEMCG_NR_STAT]; |
233 | struct mem_cgroup_stat_cpu __percpu *stat; | 236 | atomic_long_t events[MEMCG_NR_EVENTS]; |
234 | 237 | ||
235 | unsigned long socket_pressure; | 238 | unsigned long socket_pressure; |
236 | 239 | ||
@@ -265,6 +268,12 @@ struct mem_cgroup { | |||
265 | /* WARNING: nodeinfo must be the last member here */ | 268 | /* WARNING: nodeinfo must be the last member here */ |
266 | }; | 269 | }; |
267 | 270 | ||
271 | /* | ||
272 | * size of first charge trial. "32" comes from vmscan.c's magic value. | ||
273 | * TODO: maybe necessary to use big numbers in big irons. | ||
274 | */ | ||
275 | #define MEMCG_CHARGE_BATCH 32U | ||
276 | |||
268 | extern struct mem_cgroup *root_mem_cgroup; | 277 | extern struct mem_cgroup *root_mem_cgroup; |
269 | 278 | ||
270 | static inline bool mem_cgroup_disabled(void) | 279 | static inline bool mem_cgroup_disabled(void) |
@@ -272,13 +281,6 @@ static inline bool mem_cgroup_disabled(void) | |||
272 | return !cgroup_subsys_enabled(memory_cgrp_subsys); | 281 | return !cgroup_subsys_enabled(memory_cgrp_subsys); |
273 | } | 282 | } |
274 | 283 | ||
275 | static inline void mem_cgroup_event(struct mem_cgroup *memcg, | ||
276 | enum memcg_event_item event) | ||
277 | { | ||
278 | this_cpu_inc(memcg->stat->events[event]); | ||
279 | cgroup_file_notify(&memcg->events_file); | ||
280 | } | ||
281 | |||
282 | bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg); | 284 | bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg); |
283 | 285 | ||
284 | int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, | 286 | int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, |
@@ -492,32 +494,38 @@ void unlock_page_memcg(struct page *page); | |||
492 | static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, | 494 | static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, |
493 | int idx) | 495 | int idx) |
494 | { | 496 | { |
495 | long val = 0; | 497 | long x = atomic_long_read(&memcg->stat[idx]); |
496 | int cpu; | 498 | #ifdef CONFIG_SMP |
497 | 499 | if (x < 0) | |
498 | for_each_possible_cpu(cpu) | 500 | x = 0; |
499 | val += per_cpu(memcg->stat->count[idx], cpu); | 501 | #endif |
500 | 502 | return x; | |
501 | if (val < 0) | ||
502 | val = 0; | ||
503 | |||
504 | return val; | ||
505 | } | 503 | } |
506 | 504 | ||
507 | /* idx can be of type enum memcg_stat_item or node_stat_item */ | 505 | /* idx can be of type enum memcg_stat_item or node_stat_item */ |
508 | static inline void __mod_memcg_state(struct mem_cgroup *memcg, | 506 | static inline void __mod_memcg_state(struct mem_cgroup *memcg, |
509 | int idx, int val) | 507 | int idx, int val) |
510 | { | 508 | { |
511 | if (!mem_cgroup_disabled()) | 509 | long x; |
512 | __this_cpu_add(memcg->stat->count[idx], val); | 510 | |
511 | if (mem_cgroup_disabled()) | ||
512 | return; | ||
513 | |||
514 | x = val + __this_cpu_read(memcg->stat_cpu->count[idx]); | ||
515 | if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) { | ||
516 | atomic_long_add(x, &memcg->stat[idx]); | ||
517 | x = 0; | ||
518 | } | ||
519 | __this_cpu_write(memcg->stat_cpu->count[idx], x); | ||
513 | } | 520 | } |
514 | 521 | ||
515 | /* idx can be of type enum memcg_stat_item or node_stat_item */ | 522 | /* idx can be of type enum memcg_stat_item or node_stat_item */ |
516 | static inline void mod_memcg_state(struct mem_cgroup *memcg, | 523 | static inline void mod_memcg_state(struct mem_cgroup *memcg, |
517 | int idx, int val) | 524 | int idx, int val) |
518 | { | 525 | { |
519 | if (!mem_cgroup_disabled()) | 526 | preempt_disable(); |
520 | this_cpu_add(memcg->stat->count[idx], val); | 527 | __mod_memcg_state(memcg, idx, val); |
528 | preempt_enable(); | ||
521 | } | 529 | } |
522 | 530 | ||
523 | /** | 531 | /** |
@@ -555,87 +563,108 @@ static inline unsigned long lruvec_page_state(struct lruvec *lruvec, | |||
555 | enum node_stat_item idx) | 563 | enum node_stat_item idx) |
556 | { | 564 | { |
557 | struct mem_cgroup_per_node *pn; | 565 | struct mem_cgroup_per_node *pn; |
558 | long val = 0; | 566 | long x; |
559 | int cpu; | ||
560 | 567 | ||
561 | if (mem_cgroup_disabled()) | 568 | if (mem_cgroup_disabled()) |
562 | return node_page_state(lruvec_pgdat(lruvec), idx); | 569 | return node_page_state(lruvec_pgdat(lruvec), idx); |
563 | 570 | ||
564 | pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); | 571 | pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); |
565 | for_each_possible_cpu(cpu) | 572 | x = atomic_long_read(&pn->lruvec_stat[idx]); |
566 | val += per_cpu(pn->lruvec_stat->count[idx], cpu); | 573 | #ifdef CONFIG_SMP |
567 | 574 | if (x < 0) | |
568 | if (val < 0) | 575 | x = 0; |
569 | val = 0; | 576 | #endif |
570 | 577 | return x; | |
571 | return val; | ||
572 | } | 578 | } |
573 | 579 | ||
574 | static inline void __mod_lruvec_state(struct lruvec *lruvec, | 580 | static inline void __mod_lruvec_state(struct lruvec *lruvec, |
575 | enum node_stat_item idx, int val) | 581 | enum node_stat_item idx, int val) |
576 | { | 582 | { |
577 | struct mem_cgroup_per_node *pn; | 583 | struct mem_cgroup_per_node *pn; |
584 | long x; | ||
578 | 585 | ||
586 | /* Update node */ | ||
579 | __mod_node_page_state(lruvec_pgdat(lruvec), idx, val); | 587 | __mod_node_page_state(lruvec_pgdat(lruvec), idx, val); |
588 | |||
580 | if (mem_cgroup_disabled()) | 589 | if (mem_cgroup_disabled()) |
581 | return; | 590 | return; |
591 | |||
582 | pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); | 592 | pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); |
593 | |||
594 | /* Update memcg */ | ||
583 | __mod_memcg_state(pn->memcg, idx, val); | 595 | __mod_memcg_state(pn->memcg, idx, val); |
584 | __this_cpu_add(pn->lruvec_stat->count[idx], val); | 596 | |
597 | /* Update lruvec */ | ||
598 | x = val + __this_cpu_read(pn->lruvec_stat_cpu->count[idx]); | ||
599 | if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) { | ||
600 | atomic_long_add(x, &pn->lruvec_stat[idx]); | ||
601 | x = 0; | ||
602 | } | ||
603 | __this_cpu_write(pn->lruvec_stat_cpu->count[idx], x); | ||
585 | } | 604 | } |
586 | 605 | ||
587 | static inline void mod_lruvec_state(struct lruvec *lruvec, | 606 | static inline void mod_lruvec_state(struct lruvec *lruvec, |
588 | enum node_stat_item idx, int val) | 607 | enum node_stat_item idx, int val) |
589 | { | 608 | { |
590 | struct mem_cgroup_per_node *pn; | 609 | preempt_disable(); |
591 | 610 | __mod_lruvec_state(lruvec, idx, val); | |
592 | mod_node_page_state(lruvec_pgdat(lruvec), idx, val); | 611 | preempt_enable(); |
593 | if (mem_cgroup_disabled()) | ||
594 | return; | ||
595 | pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); | ||
596 | mod_memcg_state(pn->memcg, idx, val); | ||
597 | this_cpu_add(pn->lruvec_stat->count[idx], val); | ||
598 | } | 612 | } |
599 | 613 | ||
600 | static inline void __mod_lruvec_page_state(struct page *page, | 614 | static inline void __mod_lruvec_page_state(struct page *page, |
601 | enum node_stat_item idx, int val) | 615 | enum node_stat_item idx, int val) |
602 | { | 616 | { |
603 | struct mem_cgroup_per_node *pn; | 617 | pg_data_t *pgdat = page_pgdat(page); |
618 | struct lruvec *lruvec; | ||
604 | 619 | ||
605 | __mod_node_page_state(page_pgdat(page), idx, val); | 620 | /* Untracked pages have no memcg, no lruvec. Update only the node */ |
606 | if (mem_cgroup_disabled() || !page->mem_cgroup) | 621 | if (!page->mem_cgroup) { |
622 | __mod_node_page_state(pgdat, idx, val); | ||
607 | return; | 623 | return; |
608 | __mod_memcg_state(page->mem_cgroup, idx, val); | 624 | } |
609 | pn = page->mem_cgroup->nodeinfo[page_to_nid(page)]; | 625 | |
610 | __this_cpu_add(pn->lruvec_stat->count[idx], val); | 626 | lruvec = mem_cgroup_lruvec(pgdat, page->mem_cgroup); |
627 | __mod_lruvec_state(lruvec, idx, val); | ||
611 | } | 628 | } |
612 | 629 | ||
613 | static inline void mod_lruvec_page_state(struct page *page, | 630 | static inline void mod_lruvec_page_state(struct page *page, |
614 | enum node_stat_item idx, int val) | 631 | enum node_stat_item idx, int val) |
615 | { | 632 | { |
616 | struct mem_cgroup_per_node *pn; | 633 | preempt_disable(); |
617 | 634 | __mod_lruvec_page_state(page, idx, val); | |
618 | mod_node_page_state(page_pgdat(page), idx, val); | 635 | preempt_enable(); |
619 | if (mem_cgroup_disabled() || !page->mem_cgroup) | ||
620 | return; | ||
621 | mod_memcg_state(page->mem_cgroup, idx, val); | ||
622 | pn = page->mem_cgroup->nodeinfo[page_to_nid(page)]; | ||
623 | this_cpu_add(pn->lruvec_stat->count[idx], val); | ||
624 | } | 636 | } |
625 | 637 | ||
626 | unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, | 638 | unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, |
627 | gfp_t gfp_mask, | 639 | gfp_t gfp_mask, |
628 | unsigned long *total_scanned); | 640 | unsigned long *total_scanned); |
629 | 641 | ||
642 | /* idx can be of type enum memcg_event_item or vm_event_item */ | ||
643 | static inline void __count_memcg_events(struct mem_cgroup *memcg, | ||
644 | int idx, unsigned long count) | ||
645 | { | ||
646 | unsigned long x; | ||
647 | |||
648 | if (mem_cgroup_disabled()) | ||
649 | return; | ||
650 | |||
651 | x = count + __this_cpu_read(memcg->stat_cpu->events[idx]); | ||
652 | if (unlikely(x > MEMCG_CHARGE_BATCH)) { | ||
653 | atomic_long_add(x, &memcg->events[idx]); | ||
654 | x = 0; | ||
655 | } | ||
656 | __this_cpu_write(memcg->stat_cpu->events[idx], x); | ||
657 | } | ||
658 | |||
630 | static inline void count_memcg_events(struct mem_cgroup *memcg, | 659 | static inline void count_memcg_events(struct mem_cgroup *memcg, |
631 | enum vm_event_item idx, | 660 | int idx, unsigned long count) |
632 | unsigned long count) | ||
633 | { | 661 | { |
634 | if (!mem_cgroup_disabled()) | 662 | preempt_disable(); |
635 | this_cpu_add(memcg->stat->events[idx], count); | 663 | __count_memcg_events(memcg, idx, count); |
664 | preempt_enable(); | ||
636 | } | 665 | } |
637 | 666 | ||
638 | /* idx can be of type enum memcg_stat_item or node_stat_item */ | 667 | /* idx can be of type enum memcg_event_item or vm_event_item */ |
639 | static inline void count_memcg_page_event(struct page *page, | 668 | static inline void count_memcg_page_event(struct page *page, |
640 | int idx) | 669 | int idx) |
641 | { | 670 | { |
@@ -654,12 +683,20 @@ static inline void count_memcg_event_mm(struct mm_struct *mm, | |||
654 | rcu_read_lock(); | 683 | rcu_read_lock(); |
655 | memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); | 684 | memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); |
656 | if (likely(memcg)) { | 685 | if (likely(memcg)) { |
657 | this_cpu_inc(memcg->stat->events[idx]); | 686 | count_memcg_events(memcg, idx, 1); |
658 | if (idx == OOM_KILL) | 687 | if (idx == OOM_KILL) |
659 | cgroup_file_notify(&memcg->events_file); | 688 | cgroup_file_notify(&memcg->events_file); |
660 | } | 689 | } |
661 | rcu_read_unlock(); | 690 | rcu_read_unlock(); |
662 | } | 691 | } |
692 | |||
693 | static inline void mem_cgroup_event(struct mem_cgroup *memcg, | ||
694 | enum memcg_event_item event) | ||
695 | { | ||
696 | count_memcg_events(memcg, event, 1); | ||
697 | cgroup_file_notify(&memcg->events_file); | ||
698 | } | ||
699 | |||
663 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 700 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
664 | void mem_cgroup_split_huge_fixup(struct page *head); | 701 | void mem_cgroup_split_huge_fixup(struct page *head); |
665 | #endif | 702 | #endif |