aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2011-01-13 18:47:38 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-13 20:32:50 -0500
commitdbd4ea78f002df283c95d9774837041735fa1bf9 (patch)
treee709c6c5c026b1c230bb87ddcfe8415aaf255820
parent2a7106f2cb0768d00fe8c1eb42a754a7d8518f08 (diff)
memcg: add lock to synchronize page accounting and migration
Introduce a new bit spin lock, PCG_MOVE_LOCK, to synchronize the page accounting and migration code. This reworks the locking scheme of _update_stat() and _move_account() by adding new lock bit PCG_MOVE_LOCK, which is always taken under IRQ disable. 1. If pages are being migrated from a memcg, then updates to that memcg page statistics are protected by grabbing PCG_MOVE_LOCK using move_lock_page_cgroup(). In an upcoming commit, memcg dirty page accounting will be updating memcg page accounting (specifically: num writeback pages) from IRQ context (softirq). Avoid a deadlocking nested spin lock attempt by disabling irq on the local processor when grabbing the PCG_MOVE_LOCK. 2. lock for update_page_stat is used only for avoiding race with move_account(). So, IRQ awareness of lock_page_cgroup() itself is not a problem. The problem is between mem_cgroup_update_page_stat() and mem_cgroup_move_account_page(). Trade-off: * Changing lock_page_cgroup() to always disable IRQ (or local_bh) has some impacts on performance and I think it's bad to disable IRQ when it's not necessary. * adding a new lock makes move_account() slower. Score is here. Performance Impact: moving a 8G anon process. Before: real 0m0.792s user 0m0.000s sys 0m0.780s After: real 0m0.854s user 0m0.000s sys 0m0.842s This score is bad but planned patches for optimization can reduce this impact. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Greg Thelen <gthelen@google.com> Reviewed-by: Minchan Kim <minchan.kim@gmail.com> Acked-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Andrea Righi <arighi@develer.com> Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/page_cgroup.h31
-rw-r--r--mm/memcontrol.c9
2 files changed, 35 insertions, 5 deletions
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index fdb5a92b5ac7..5b0c971d7cae 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -35,15 +35,18 @@ struct page_cgroup *lookup_page_cgroup(struct page *page);
35 35
36enum { 36enum {
37 /* flags for mem_cgroup */ 37 /* flags for mem_cgroup */
38 PCG_LOCK, /* page cgroup is locked */ 38 PCG_LOCK, /* Lock for pc->mem_cgroup and following bits. */
39 PCG_CACHE, /* charged as cache */ 39 PCG_CACHE, /* charged as cache */
40 PCG_USED, /* this object is in use. */ 40 PCG_USED, /* this object is in use. */
41 PCG_ACCT_LRU, /* page has been accounted for */ 41 PCG_MIGRATION, /* under page migration */
42 /* flags for mem_cgroup and file and I/O status */
43 PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
42 PCG_FILE_MAPPED, /* page is accounted as "mapped" */ 44 PCG_FILE_MAPPED, /* page is accounted as "mapped" */
43 PCG_FILE_DIRTY, /* page is dirty */ 45 PCG_FILE_DIRTY, /* page is dirty */
44 PCG_FILE_WRITEBACK, /* page is under writeback */ 46 PCG_FILE_WRITEBACK, /* page is under writeback */
45 PCG_FILE_UNSTABLE_NFS, /* page is NFS unstable */ 47 PCG_FILE_UNSTABLE_NFS, /* page is NFS unstable */
46 PCG_MIGRATION, /* under page migration */ 48 /* No lock in page_cgroup */
49 PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */
47}; 50};
48 51
49#define TESTPCGFLAG(uname, lname) \ 52#define TESTPCGFLAG(uname, lname) \
@@ -117,6 +120,10 @@ static inline enum zone_type page_cgroup_zid(struct page_cgroup *pc)
117 120
118static inline void lock_page_cgroup(struct page_cgroup *pc) 121static inline void lock_page_cgroup(struct page_cgroup *pc)
119{ 122{
123 /*
124 * Don't take this lock in IRQ context.
125 * This lock is for pc->mem_cgroup, USED, CACHE, MIGRATION
126 */
120 bit_spin_lock(PCG_LOCK, &pc->flags); 127 bit_spin_lock(PCG_LOCK, &pc->flags);
121} 128}
122 129
@@ -130,6 +137,24 @@ static inline int page_is_cgroup_locked(struct page_cgroup *pc)
130 return bit_spin_is_locked(PCG_LOCK, &pc->flags); 137 return bit_spin_is_locked(PCG_LOCK, &pc->flags);
131} 138}
132 139
140static inline void move_lock_page_cgroup(struct page_cgroup *pc,
141 unsigned long *flags)
142{
143 /*
144 * We know updates to pc->flags of page cache's stats are from both of
145 * usual context or IRQ context. Disable IRQ to avoid deadlock.
146 */
147 local_irq_save(*flags);
148 bit_spin_lock(PCG_MOVE_LOCK, &pc->flags);
149}
150
151static inline void move_unlock_page_cgroup(struct page_cgroup *pc,
152 unsigned long *flags)
153{
154 bit_spin_unlock(PCG_MOVE_LOCK, &pc->flags);
155 local_irq_restore(*flags);
156}
157
133#else /* CONFIG_CGROUP_MEM_RES_CTLR */ 158#else /* CONFIG_CGROUP_MEM_RES_CTLR */
134struct page_cgroup; 159struct page_cgroup;
135 160
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 3d8a0c79dece..d888956a2cfc 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1606,6 +1606,7 @@ void mem_cgroup_update_page_stat(struct page *page,
1606 struct mem_cgroup *mem; 1606 struct mem_cgroup *mem;
1607 struct page_cgroup *pc = lookup_page_cgroup(page); 1607 struct page_cgroup *pc = lookup_page_cgroup(page);
1608 bool need_unlock = false; 1608 bool need_unlock = false;
1609 unsigned long uninitialized_var(flags);
1609 1610
1610 if (unlikely(!pc)) 1611 if (unlikely(!pc))
1611 return; 1612 return;
@@ -1617,7 +1618,7 @@ void mem_cgroup_update_page_stat(struct page *page,
1617 /* pc->mem_cgroup is unstable ? */ 1618 /* pc->mem_cgroup is unstable ? */
1618 if (unlikely(mem_cgroup_stealed(mem))) { 1619 if (unlikely(mem_cgroup_stealed(mem))) {
1619 /* take a lock against to access pc->mem_cgroup */ 1620 /* take a lock against to access pc->mem_cgroup */
1620 lock_page_cgroup(pc); 1621 move_lock_page_cgroup(pc, &flags);
1621 need_unlock = true; 1622 need_unlock = true;
1622 mem = pc->mem_cgroup; 1623 mem = pc->mem_cgroup;
1623 if (!mem || !PageCgroupUsed(pc)) 1624 if (!mem || !PageCgroupUsed(pc))
@@ -1640,7 +1641,7 @@ void mem_cgroup_update_page_stat(struct page *page,
1640 1641
1641out: 1642out:
1642 if (unlikely(need_unlock)) 1643 if (unlikely(need_unlock))
1643 unlock_page_cgroup(pc); 1644 move_unlock_page_cgroup(pc, &flags);
1644 rcu_read_unlock(); 1645 rcu_read_unlock();
1645 return; 1646 return;
1646} 1647}
@@ -2211,9 +2212,13 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
2211 struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge) 2212 struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge)
2212{ 2213{
2213 int ret = -EINVAL; 2214 int ret = -EINVAL;
2215 unsigned long flags;
2216
2214 lock_page_cgroup(pc); 2217 lock_page_cgroup(pc);
2215 if (PageCgroupUsed(pc) && pc->mem_cgroup == from) { 2218 if (PageCgroupUsed(pc) && pc->mem_cgroup == from) {
2219 move_lock_page_cgroup(pc, &flags);
2216 __mem_cgroup_move_account(pc, from, to, uncharge); 2220 __mem_cgroup_move_account(pc, from, to, uncharge);
2221 move_unlock_page_cgroup(pc, &flags);
2217 ret = 0; 2222 ret = 0;
2218 } 2223 }
2219 unlock_page_cgroup(pc); 2224 unlock_page_cgroup(pc);