aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/memcontrol.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/memcontrol.h')
-rw-r--r--include/linux/memcontrol.h165
1 files changed, 101 insertions, 64 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 69966c461d1c..882046863581 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -108,7 +108,10 @@ struct lruvec_stat {
108 */ 108 */
109struct mem_cgroup_per_node { 109struct mem_cgroup_per_node {
110 struct lruvec lruvec; 110 struct lruvec lruvec;
111 struct lruvec_stat __percpu *lruvec_stat; 111
112 struct lruvec_stat __percpu *lruvec_stat_cpu;
113 atomic_long_t lruvec_stat[NR_VM_NODE_STAT_ITEMS];
114
112 unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; 115 unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
113 116
114 struct mem_cgroup_reclaim_iter iter[DEF_PRIORITY + 1]; 117 struct mem_cgroup_reclaim_iter iter[DEF_PRIORITY + 1];
@@ -227,10 +230,10 @@ struct mem_cgroup {
227 spinlock_t move_lock; 230 spinlock_t move_lock;
228 struct task_struct *move_lock_task; 231 struct task_struct *move_lock_task;
229 unsigned long move_lock_flags; 232 unsigned long move_lock_flags;
230 /* 233
231 * percpu counter. 234 struct mem_cgroup_stat_cpu __percpu *stat_cpu;
232 */ 235 atomic_long_t stat[MEMCG_NR_STAT];
233 struct mem_cgroup_stat_cpu __percpu *stat; 236 atomic_long_t events[MEMCG_NR_EVENTS];
234 237
235 unsigned long socket_pressure; 238 unsigned long socket_pressure;
236 239
@@ -265,6 +268,12 @@ struct mem_cgroup {
265 /* WARNING: nodeinfo must be the last member here */ 268 /* WARNING: nodeinfo must be the last member here */
266}; 269};
267 270
271/*
272 * size of first charge trial. "32" comes from vmscan.c's magic value.
273 * TODO: maybe necessary to use big numbers in big irons.
274 */
275#define MEMCG_CHARGE_BATCH 32U
276
268extern struct mem_cgroup *root_mem_cgroup; 277extern struct mem_cgroup *root_mem_cgroup;
269 278
270static inline bool mem_cgroup_disabled(void) 279static inline bool mem_cgroup_disabled(void)
@@ -272,13 +281,6 @@ static inline bool mem_cgroup_disabled(void)
272 return !cgroup_subsys_enabled(memory_cgrp_subsys); 281 return !cgroup_subsys_enabled(memory_cgrp_subsys);
273} 282}
274 283
275static inline void mem_cgroup_event(struct mem_cgroup *memcg,
276 enum memcg_event_item event)
277{
278 this_cpu_inc(memcg->stat->events[event]);
279 cgroup_file_notify(&memcg->events_file);
280}
281
282bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg); 284bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg);
283 285
284int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, 286int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
@@ -492,32 +494,38 @@ void unlock_page_memcg(struct page *page);
492static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, 494static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
493 int idx) 495 int idx)
494{ 496{
495 long val = 0; 497 long x = atomic_long_read(&memcg->stat[idx]);
496 int cpu; 498#ifdef CONFIG_SMP
497 499 if (x < 0)
498 for_each_possible_cpu(cpu) 500 x = 0;
499 val += per_cpu(memcg->stat->count[idx], cpu); 501#endif
500 502 return x;
501 if (val < 0)
502 val = 0;
503
504 return val;
505} 503}
506 504
507/* idx can be of type enum memcg_stat_item or node_stat_item */ 505/* idx can be of type enum memcg_stat_item or node_stat_item */
508static inline void __mod_memcg_state(struct mem_cgroup *memcg, 506static inline void __mod_memcg_state(struct mem_cgroup *memcg,
509 int idx, int val) 507 int idx, int val)
510{ 508{
511 if (!mem_cgroup_disabled()) 509 long x;
512 __this_cpu_add(memcg->stat->count[idx], val); 510
511 if (mem_cgroup_disabled())
512 return;
513
514 x = val + __this_cpu_read(memcg->stat_cpu->count[idx]);
515 if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) {
516 atomic_long_add(x, &memcg->stat[idx]);
517 x = 0;
518 }
519 __this_cpu_write(memcg->stat_cpu->count[idx], x);
513} 520}
514 521
515/* idx can be of type enum memcg_stat_item or node_stat_item */ 522/* idx can be of type enum memcg_stat_item or node_stat_item */
516static inline void mod_memcg_state(struct mem_cgroup *memcg, 523static inline void mod_memcg_state(struct mem_cgroup *memcg,
517 int idx, int val) 524 int idx, int val)
518{ 525{
519 if (!mem_cgroup_disabled()) 526 preempt_disable();
520 this_cpu_add(memcg->stat->count[idx], val); 527 __mod_memcg_state(memcg, idx, val);
528 preempt_enable();
521} 529}
522 530
523/** 531/**
@@ -555,87 +563,108 @@ static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
555 enum node_stat_item idx) 563 enum node_stat_item idx)
556{ 564{
557 struct mem_cgroup_per_node *pn; 565 struct mem_cgroup_per_node *pn;
558 long val = 0; 566 long x;
559 int cpu;
560 567
561 if (mem_cgroup_disabled()) 568 if (mem_cgroup_disabled())
562 return node_page_state(lruvec_pgdat(lruvec), idx); 569 return node_page_state(lruvec_pgdat(lruvec), idx);
563 570
564 pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); 571 pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
565 for_each_possible_cpu(cpu) 572 x = atomic_long_read(&pn->lruvec_stat[idx]);
566 val += per_cpu(pn->lruvec_stat->count[idx], cpu); 573#ifdef CONFIG_SMP
567 574 if (x < 0)
568 if (val < 0) 575 x = 0;
569 val = 0; 576#endif
570 577 return x;
571 return val;
572} 578}
573 579
574static inline void __mod_lruvec_state(struct lruvec *lruvec, 580static inline void __mod_lruvec_state(struct lruvec *lruvec,
575 enum node_stat_item idx, int val) 581 enum node_stat_item idx, int val)
576{ 582{
577 struct mem_cgroup_per_node *pn; 583 struct mem_cgroup_per_node *pn;
584 long x;
578 585
586 /* Update node */
579 __mod_node_page_state(lruvec_pgdat(lruvec), idx, val); 587 __mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
588
580 if (mem_cgroup_disabled()) 589 if (mem_cgroup_disabled())
581 return; 590 return;
591
582 pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); 592 pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
593
594 /* Update memcg */
583 __mod_memcg_state(pn->memcg, idx, val); 595 __mod_memcg_state(pn->memcg, idx, val);
584 __this_cpu_add(pn->lruvec_stat->count[idx], val); 596
597 /* Update lruvec */
598 x = val + __this_cpu_read(pn->lruvec_stat_cpu->count[idx]);
599 if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) {
600 atomic_long_add(x, &pn->lruvec_stat[idx]);
601 x = 0;
602 }
603 __this_cpu_write(pn->lruvec_stat_cpu->count[idx], x);
585} 604}
586 605
587static inline void mod_lruvec_state(struct lruvec *lruvec, 606static inline void mod_lruvec_state(struct lruvec *lruvec,
588 enum node_stat_item idx, int val) 607 enum node_stat_item idx, int val)
589{ 608{
590 struct mem_cgroup_per_node *pn; 609 preempt_disable();
591 610 __mod_lruvec_state(lruvec, idx, val);
592 mod_node_page_state(lruvec_pgdat(lruvec), idx, val); 611 preempt_enable();
593 if (mem_cgroup_disabled())
594 return;
595 pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
596 mod_memcg_state(pn->memcg, idx, val);
597 this_cpu_add(pn->lruvec_stat->count[idx], val);
598} 612}
599 613
600static inline void __mod_lruvec_page_state(struct page *page, 614static inline void __mod_lruvec_page_state(struct page *page,
601 enum node_stat_item idx, int val) 615 enum node_stat_item idx, int val)
602{ 616{
603 struct mem_cgroup_per_node *pn; 617 pg_data_t *pgdat = page_pgdat(page);
618 struct lruvec *lruvec;
604 619
605 __mod_node_page_state(page_pgdat(page), idx, val); 620 /* Untracked pages have no memcg, no lruvec. Update only the node */
606 if (mem_cgroup_disabled() || !page->mem_cgroup) 621 if (!page->mem_cgroup) {
622 __mod_node_page_state(pgdat, idx, val);
607 return; 623 return;
608 __mod_memcg_state(page->mem_cgroup, idx, val); 624 }
609 pn = page->mem_cgroup->nodeinfo[page_to_nid(page)]; 625
610 __this_cpu_add(pn->lruvec_stat->count[idx], val); 626 lruvec = mem_cgroup_lruvec(pgdat, page->mem_cgroup);
627 __mod_lruvec_state(lruvec, idx, val);
611} 628}
612 629
613static inline void mod_lruvec_page_state(struct page *page, 630static inline void mod_lruvec_page_state(struct page *page,
614 enum node_stat_item idx, int val) 631 enum node_stat_item idx, int val)
615{ 632{
616 struct mem_cgroup_per_node *pn; 633 preempt_disable();
617 634 __mod_lruvec_page_state(page, idx, val);
618 mod_node_page_state(page_pgdat(page), idx, val); 635 preempt_enable();
619 if (mem_cgroup_disabled() || !page->mem_cgroup)
620 return;
621 mod_memcg_state(page->mem_cgroup, idx, val);
622 pn = page->mem_cgroup->nodeinfo[page_to_nid(page)];
623 this_cpu_add(pn->lruvec_stat->count[idx], val);
624} 636}
625 637
626unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, 638unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
627 gfp_t gfp_mask, 639 gfp_t gfp_mask,
628 unsigned long *total_scanned); 640 unsigned long *total_scanned);
629 641
642/* idx can be of type enum memcg_event_item or vm_event_item */
643static inline void __count_memcg_events(struct mem_cgroup *memcg,
644 int idx, unsigned long count)
645{
646 unsigned long x;
647
648 if (mem_cgroup_disabled())
649 return;
650
651 x = count + __this_cpu_read(memcg->stat_cpu->events[idx]);
652 if (unlikely(x > MEMCG_CHARGE_BATCH)) {
653 atomic_long_add(x, &memcg->events[idx]);
654 x = 0;
655 }
656 __this_cpu_write(memcg->stat_cpu->events[idx], x);
657}
658
630static inline void count_memcg_events(struct mem_cgroup *memcg, 659static inline void count_memcg_events(struct mem_cgroup *memcg,
631 enum vm_event_item idx, 660 int idx, unsigned long count)
632 unsigned long count)
633{ 661{
634 if (!mem_cgroup_disabled()) 662 preempt_disable();
635 this_cpu_add(memcg->stat->events[idx], count); 663 __count_memcg_events(memcg, idx, count);
664 preempt_enable();
636} 665}
637 666
638/* idx can be of type enum memcg_stat_item or node_stat_item */ 667/* idx can be of type enum memcg_event_item or vm_event_item */
639static inline void count_memcg_page_event(struct page *page, 668static inline void count_memcg_page_event(struct page *page,
640 int idx) 669 int idx)
641{ 670{
@@ -654,12 +683,20 @@ static inline void count_memcg_event_mm(struct mm_struct *mm,
654 rcu_read_lock(); 683 rcu_read_lock();
655 memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); 684 memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
656 if (likely(memcg)) { 685 if (likely(memcg)) {
657 this_cpu_inc(memcg->stat->events[idx]); 686 count_memcg_events(memcg, idx, 1);
658 if (idx == OOM_KILL) 687 if (idx == OOM_KILL)
659 cgroup_file_notify(&memcg->events_file); 688 cgroup_file_notify(&memcg->events_file);
660 } 689 }
661 rcu_read_unlock(); 690 rcu_read_unlock();
662} 691}
692
693static inline void mem_cgroup_event(struct mem_cgroup *memcg,
694 enum memcg_event_item event)
695{
696 count_memcg_events(memcg, event, 1);
697 cgroup_file_notify(&memcg->events_file);
698}
699
663#ifdef CONFIG_TRANSPARENT_HUGEPAGE 700#ifdef CONFIG_TRANSPARENT_HUGEPAGE
664void mem_cgroup_split_huge_fixup(struct page *head); 701void mem_cgroup_split_huge_fixup(struct page *head);
665#endif 702#endif