diff options
author | KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> | 2011-01-13 18:47:38 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-13 20:32:50 -0500 |
commit | dbd4ea78f002df283c95d9774837041735fa1bf9 (patch) | |
tree | e709c6c5c026b1c230bb87ddcfe8415aaf255820 /include/linux | |
parent | 2a7106f2cb0768d00fe8c1eb42a754a7d8518f08 (diff) |
memcg: add lock to synchronize page accounting and migration
Introduce a new bit spin lock, PCG_MOVE_LOCK, to synchronize the page
accounting and migration code. This reworks the locking scheme of
_update_stat() and _move_account() by adding new lock bit PCG_MOVE_LOCK,
which is always taken under IRQ disable.
1. If pages are being migrated from a memcg, then updates to that
memcg page statistics are protected by grabbing PCG_MOVE_LOCK using
move_lock_page_cgroup(). In an upcoming commit, memcg dirty page
accounting will be updating memcg page accounting (specifically: num
writeback pages) from IRQ context (softirq). Avoid a deadlocking
nested spin lock attempt by disabling irq on the local processor when
grabbing the PCG_MOVE_LOCK.
2. lock for update_page_stat is used only for avoiding race with
move_account(). So, IRQ awareness of lock_page_cgroup() itself is not
a problem. The problem is between mem_cgroup_update_page_stat() and
mem_cgroup_move_account_page().
Trade-off:
* Changing lock_page_cgroup() to always disable IRQ (or
local_bh) has some impacts on performance and I think
it's bad to disable IRQ when it's not necessary.
* adding a new lock makes move_account() slower. Score is
here.
Performance Impact: moving a 8G anon process.
Before:
real 0m0.792s
user 0m0.000s
sys 0m0.780s
After:
real 0m0.854s
user 0m0.000s
sys 0m0.842s
This score is bad but planned patches for optimization can reduce
this impact.
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Greg Thelen <gthelen@google.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Acked-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Andrea Righi <arighi@develer.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/page_cgroup.h | 31 |
1 files changed, 28 insertions, 3 deletions
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index fdb5a92b5ac7..5b0c971d7cae 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h | |||
@@ -35,15 +35,18 @@ struct page_cgroup *lookup_page_cgroup(struct page *page); | |||
35 | 35 | ||
36 | enum { | 36 | enum { |
37 | /* flags for mem_cgroup */ | 37 | /* flags for mem_cgroup */ |
38 | PCG_LOCK, /* page cgroup is locked */ | 38 | PCG_LOCK, /* Lock for pc->mem_cgroup and following bits. */ |
39 | PCG_CACHE, /* charged as cache */ | 39 | PCG_CACHE, /* charged as cache */ |
40 | PCG_USED, /* this object is in use. */ | 40 | PCG_USED, /* this object is in use. */ |
41 | PCG_ACCT_LRU, /* page has been accounted for */ | 41 | PCG_MIGRATION, /* under page migration */ |
42 | /* flags for mem_cgroup and file and I/O status */ | ||
43 | PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */ | ||
42 | PCG_FILE_MAPPED, /* page is accounted as "mapped" */ | 44 | PCG_FILE_MAPPED, /* page is accounted as "mapped" */ |
43 | PCG_FILE_DIRTY, /* page is dirty */ | 45 | PCG_FILE_DIRTY, /* page is dirty */ |
44 | PCG_FILE_WRITEBACK, /* page is under writeback */ | 46 | PCG_FILE_WRITEBACK, /* page is under writeback */ |
45 | PCG_FILE_UNSTABLE_NFS, /* page is NFS unstable */ | 47 | PCG_FILE_UNSTABLE_NFS, /* page is NFS unstable */ |
46 | PCG_MIGRATION, /* under page migration */ | 48 | /* No lock in page_cgroup */ |
49 | PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */ | ||
47 | }; | 50 | }; |
48 | 51 | ||
49 | #define TESTPCGFLAG(uname, lname) \ | 52 | #define TESTPCGFLAG(uname, lname) \ |
@@ -117,6 +120,10 @@ static inline enum zone_type page_cgroup_zid(struct page_cgroup *pc) | |||
117 | 120 | ||
118 | static inline void lock_page_cgroup(struct page_cgroup *pc) | 121 | static inline void lock_page_cgroup(struct page_cgroup *pc) |
119 | { | 122 | { |
123 | /* | ||
124 | * Don't take this lock in IRQ context. | ||
125 | * This lock is for pc->mem_cgroup, USED, CACHE, MIGRATION | ||
126 | */ | ||
120 | bit_spin_lock(PCG_LOCK, &pc->flags); | 127 | bit_spin_lock(PCG_LOCK, &pc->flags); |
121 | } | 128 | } |
122 | 129 | ||
@@ -130,6 +137,24 @@ static inline int page_is_cgroup_locked(struct page_cgroup *pc) | |||
130 | return bit_spin_is_locked(PCG_LOCK, &pc->flags); | 137 | return bit_spin_is_locked(PCG_LOCK, &pc->flags); |
131 | } | 138 | } |
132 | 139 | ||
140 | static inline void move_lock_page_cgroup(struct page_cgroup *pc, | ||
141 | unsigned long *flags) | ||
142 | { | ||
143 | /* | ||
144 | * We know updates to pc->flags of page cache's stats are from both of | ||
145 | * usual context or IRQ context. Disable IRQ to avoid deadlock. | ||
146 | */ | ||
147 | local_irq_save(*flags); | ||
148 | bit_spin_lock(PCG_MOVE_LOCK, &pc->flags); | ||
149 | } | ||
150 | |||
151 | static inline void move_unlock_page_cgroup(struct page_cgroup *pc, | ||
152 | unsigned long *flags) | ||
153 | { | ||
154 | bit_spin_unlock(PCG_MOVE_LOCK, &pc->flags); | ||
155 | local_irq_restore(*flags); | ||
156 | } | ||
157 | |||
133 | #else /* CONFIG_CGROUP_MEM_RES_CTLR */ | 158 | #else /* CONFIG_CGROUP_MEM_RES_CTLR */ |
134 | struct page_cgroup; | 159 | struct page_cgroup; |
135 | 160 | ||