diff options
author | KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> | 2008-02-07 03:14:16 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-02-07 11:42:20 -0500 |
commit | cc8475822f8a4b17e9b76e7fadb6b9a341860422 (patch) | |
tree | b7e3ee9aba56ffaf93130786f3439cc43339e586 /mm/memcontrol.c | |
parent | 417eead30434b4bd09a54455e839cf9a62c05460 (diff) |
memory cgroup enhancements: force_empty interface for dropping all account in empty cgroup
This patch adds an interface "memory.force_empty". Any write to this file
will drop all charges in this cgroup if there is no task under.
%echo 1 > /....../memory.force_empty
will drop all charges of memory cgroup if cgroup's tasks is empty.
This is useful to invoke rmdir() against memory cgroup successfully.
Tested and worked well on x86_64/fake-NUMA system.
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Pavel Emelianov <xemul@openvz.org>
Cc: Paul Menage <menage@google.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: David Rientjes <rientjes@google.com>
Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 110 |
1 files changed, 103 insertions, 7 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9793873d5a90..c867612d9c04 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -471,6 +471,7 @@ void mem_cgroup_uncharge(struct page_cgroup *pc) | |||
471 | page = pc->page; | 471 | page = pc->page; |
472 | /* | 472 | /* |
473 | * get page->cgroup and clear it under lock. | 473 | * get page->cgroup and clear it under lock. |
474 | * force_empty can drop page->cgroup without checking refcnt. | ||
474 | */ | 475 | */ |
475 | if (clear_page_cgroup(page, pc) == pc) { | 476 | if (clear_page_cgroup(page, pc) == pc) { |
476 | mem = pc->mem_cgroup; | 477 | mem = pc->mem_cgroup; |
@@ -480,13 +481,6 @@ void mem_cgroup_uncharge(struct page_cgroup *pc) | |||
480 | list_del_init(&pc->lru); | 481 | list_del_init(&pc->lru); |
481 | spin_unlock_irqrestore(&mem->lru_lock, flags); | 482 | spin_unlock_irqrestore(&mem->lru_lock, flags); |
482 | kfree(pc); | 483 | kfree(pc); |
483 | } else { | ||
484 | /* | ||
485 | * Note:This will be removed when force-empty patch is | ||
486 | * applied. just show warning here. | ||
487 | */ | ||
488 | printk(KERN_ERR "Race in mem_cgroup_uncharge() ?"); | ||
489 | dump_stack(); | ||
490 | } | 484 | } |
491 | } | 485 | } |
492 | } | 486 | } |
@@ -534,6 +528,76 @@ retry: | |||
534 | return; | 528 | return; |
535 | } | 529 | } |
536 | 530 | ||
531 | /* | ||
532 | * This routine traverse page_cgroup in given list and drop them all. | ||
533 | * This routine ignores page_cgroup->ref_cnt. | ||
534 | * *And* this routine doesn't reclaim page itself, just removes page_cgroup. | ||
535 | */ | ||
536 | #define FORCE_UNCHARGE_BATCH (128) | ||
537 | static void | ||
538 | mem_cgroup_force_empty_list(struct mem_cgroup *mem, struct list_head *list) | ||
539 | { | ||
540 | struct page_cgroup *pc; | ||
541 | struct page *page; | ||
542 | int count; | ||
543 | unsigned long flags; | ||
544 | |||
545 | retry: | ||
546 | count = FORCE_UNCHARGE_BATCH; | ||
547 | spin_lock_irqsave(&mem->lru_lock, flags); | ||
548 | |||
549 | while (--count && !list_empty(list)) { | ||
550 | pc = list_entry(list->prev, struct page_cgroup, lru); | ||
551 | page = pc->page; | ||
552 | /* Avoid race with charge */ | ||
553 | atomic_set(&pc->ref_cnt, 0); | ||
554 | if (clear_page_cgroup(page, pc) == pc) { | ||
555 | css_put(&mem->css); | ||
556 | res_counter_uncharge(&mem->res, PAGE_SIZE); | ||
557 | list_del_init(&pc->lru); | ||
558 | kfree(pc); | ||
559 | } else /* being uncharged ? ...do relax */ | ||
560 | break; | ||
561 | } | ||
562 | spin_unlock_irqrestore(&mem->lru_lock, flags); | ||
563 | if (!list_empty(list)) { | ||
564 | cond_resched(); | ||
565 | goto retry; | ||
566 | } | ||
567 | return; | ||
568 | } | ||
569 | |||
570 | /* | ||
571 | * make mem_cgroup's charge to be 0 if there is no task. | ||
572 | * This enables deleting this mem_cgroup. | ||
573 | */ | ||
574 | |||
575 | int mem_cgroup_force_empty(struct mem_cgroup *mem) | ||
576 | { | ||
577 | int ret = -EBUSY; | ||
578 | css_get(&mem->css); | ||
579 | /* | ||
580 | * page reclaim code (kswapd etc..) will move pages between | ||
581 | ` * active_list <-> inactive_list while we don't take a lock. | ||
582 | * So, we have to do loop here until all lists are empty. | ||
583 | */ | ||
584 | while (!(list_empty(&mem->active_list) && | ||
585 | list_empty(&mem->inactive_list))) { | ||
586 | if (atomic_read(&mem->css.cgroup->count) > 0) | ||
587 | goto out; | ||
588 | /* drop all page_cgroup in active_list */ | ||
589 | mem_cgroup_force_empty_list(mem, &mem->active_list); | ||
590 | /* drop all page_cgroup in inactive_list */ | ||
591 | mem_cgroup_force_empty_list(mem, &mem->inactive_list); | ||
592 | } | ||
593 | ret = 0; | ||
594 | out: | ||
595 | css_put(&mem->css); | ||
596 | return ret; | ||
597 | } | ||
598 | |||
599 | |||
600 | |||
537 | int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp) | 601 | int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp) |
538 | { | 602 | { |
539 | *tmp = memparse(buf, &buf); | 603 | *tmp = memparse(buf, &buf); |
@@ -619,6 +683,33 @@ static ssize_t mem_control_type_read(struct cgroup *cont, | |||
619 | ppos, buf, s - buf); | 683 | ppos, buf, s - buf); |
620 | } | 684 | } |
621 | 685 | ||
686 | |||
687 | static ssize_t mem_force_empty_write(struct cgroup *cont, | ||
688 | struct cftype *cft, struct file *file, | ||
689 | const char __user *userbuf, | ||
690 | size_t nbytes, loff_t *ppos) | ||
691 | { | ||
692 | struct mem_cgroup *mem = mem_cgroup_from_cont(cont); | ||
693 | int ret; | ||
694 | ret = mem_cgroup_force_empty(mem); | ||
695 | if (!ret) | ||
696 | ret = nbytes; | ||
697 | return ret; | ||
698 | } | ||
699 | |||
700 | /* | ||
701 | * Note: This should be removed if cgroup supports write-only file. | ||
702 | */ | ||
703 | |||
704 | static ssize_t mem_force_empty_read(struct cgroup *cont, | ||
705 | struct cftype *cft, | ||
706 | struct file *file, char __user *userbuf, | ||
707 | size_t nbytes, loff_t *ppos) | ||
708 | { | ||
709 | return -EINVAL; | ||
710 | } | ||
711 | |||
712 | |||
622 | static struct cftype mem_cgroup_files[] = { | 713 | static struct cftype mem_cgroup_files[] = { |
623 | { | 714 | { |
624 | .name = "usage_in_bytes", | 715 | .name = "usage_in_bytes", |
@@ -641,6 +732,11 @@ static struct cftype mem_cgroup_files[] = { | |||
641 | .write = mem_control_type_write, | 732 | .write = mem_control_type_write, |
642 | .read = mem_control_type_read, | 733 | .read = mem_control_type_read, |
643 | }, | 734 | }, |
735 | { | ||
736 | .name = "force_empty", | ||
737 | .write = mem_force_empty_write, | ||
738 | .read = mem_force_empty_read, | ||
739 | }, | ||
644 | }; | 740 | }; |
645 | 741 | ||
646 | static struct mem_cgroup init_mem_cgroup; | 742 | static struct mem_cgroup init_mem_cgroup; |