aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2011-03-23 19:42:30 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-03-23 22:46:28 -0400
commit6b3ae58efca06623c197fd6d91ded4aa3a8fe039 (patch)
tree6460e4e1ce206d391b862a3d398a9e22e33ecb3c
parent5564e88ba6fd2f6dcd83a592771810cd84b5ae80 (diff)
memcg: remove direct page_cgroup-to-page pointer
In struct page_cgroup, we have a full word for flags but only a few are reserved. Use the remaining upper bits to encode, depending on configuration, the node or the section, to enable page_cgroup-to-page lookups without a direct pointer. This saves a full word for every page in a system with memory cgroups enabled. Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: Minchan Kim <minchan.kim@gmail.com> Cc: Randy Dunlap <randy.dunlap@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/page_cgroup.h75
-rw-r--r--kernel/bounds.c2
-rw-r--r--mm/memcontrol.c4
-rw-r--r--mm/page_cgroup.c91
4 files changed, 117 insertions, 55 deletions
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 6b63679ce8a1..f5de21de31dd 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -1,8 +1,26 @@
1#ifndef __LINUX_PAGE_CGROUP_H 1#ifndef __LINUX_PAGE_CGROUP_H
2#define __LINUX_PAGE_CGROUP_H 2#define __LINUX_PAGE_CGROUP_H
3 3
4enum {
5 /* flags for mem_cgroup */
6 PCG_LOCK, /* Lock for pc->mem_cgroup and following bits. */
7 PCG_CACHE, /* charged as cache */
8 PCG_USED, /* this object is in use. */
9 PCG_MIGRATION, /* under page migration */
10 /* flags for mem_cgroup and file and I/O status */
11 PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
12 PCG_FILE_MAPPED, /* page is accounted as "mapped" */
13 /* No lock in page_cgroup */
14 PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */
15 __NR_PCG_FLAGS,
16};
17
18#ifndef __GENERATING_BOUNDS_H
19#include <generated/bounds.h>
20
4#ifdef CONFIG_CGROUP_MEM_RES_CTLR 21#ifdef CONFIG_CGROUP_MEM_RES_CTLR
5#include <linux/bit_spinlock.h> 22#include <linux/bit_spinlock.h>
23
6/* 24/*
7 * Page Cgroup can be considered as an extended mem_map. 25 * Page Cgroup can be considered as an extended mem_map.
8 * A page_cgroup page is associated with every page descriptor. The 26 * A page_cgroup page is associated with every page descriptor. The
@@ -13,7 +31,6 @@
13struct page_cgroup { 31struct page_cgroup {
14 unsigned long flags; 32 unsigned long flags;
15 struct mem_cgroup *mem_cgroup; 33 struct mem_cgroup *mem_cgroup;
16 struct page *page;
17 struct list_head lru; /* per cgroup LRU list */ 34 struct list_head lru; /* per cgroup LRU list */
18}; 35};
19 36
@@ -32,19 +49,7 @@ static inline void __init page_cgroup_init(void)
32#endif 49#endif
33 50
34struct page_cgroup *lookup_page_cgroup(struct page *page); 51struct page_cgroup *lookup_page_cgroup(struct page *page);
35 52struct page *lookup_cgroup_page(struct page_cgroup *pc);
36enum {
37 /* flags for mem_cgroup */
38 PCG_LOCK, /* Lock for pc->mem_cgroup and following bits. */
39 PCG_CACHE, /* charged as cache */
40 PCG_USED, /* this object is in use. */
41 PCG_MIGRATION, /* under page migration */
42 /* flags for mem_cgroup and file and I/O status */
43 PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
44 PCG_FILE_MAPPED, /* page is accounted as "mapped" */
45 /* No lock in page_cgroup */
46 PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */
47};
48 53
49#define TESTPCGFLAG(uname, lname) \ 54#define TESTPCGFLAG(uname, lname) \
50static inline int PageCgroup##uname(struct page_cgroup *pc) \ 55static inline int PageCgroup##uname(struct page_cgroup *pc) \
@@ -117,6 +122,39 @@ static inline void move_unlock_page_cgroup(struct page_cgroup *pc,
117 local_irq_restore(*flags); 122 local_irq_restore(*flags);
118} 123}
119 124
125#ifdef CONFIG_SPARSEMEM
126#define PCG_ARRAYID_WIDTH SECTIONS_SHIFT
127#else
128#define PCG_ARRAYID_WIDTH NODES_SHIFT
129#endif
130
131#if (PCG_ARRAYID_WIDTH > BITS_PER_LONG - NR_PCG_FLAGS)
132#error Not enough space left in pc->flags to store page_cgroup array IDs
133#endif
134
135/* pc->flags: ARRAY-ID | FLAGS */
136
137#define PCG_ARRAYID_MASK ((1UL << PCG_ARRAYID_WIDTH) - 1)
138
139#define PCG_ARRAYID_OFFSET (BITS_PER_LONG - PCG_ARRAYID_WIDTH)
140/*
141 * Zero the shift count for non-existant fields, to prevent compiler
142 * warnings and ensure references are optimized away.
143 */
144#define PCG_ARRAYID_SHIFT (PCG_ARRAYID_OFFSET * (PCG_ARRAYID_WIDTH != 0))
145
146static inline void set_page_cgroup_array_id(struct page_cgroup *pc,
147 unsigned long id)
148{
149 pc->flags &= ~(PCG_ARRAYID_MASK << PCG_ARRAYID_SHIFT);
150 pc->flags |= (id & PCG_ARRAYID_MASK) << PCG_ARRAYID_SHIFT;
151}
152
153static inline unsigned long page_cgroup_array_id(struct page_cgroup *pc)
154{
155 return (pc->flags >> PCG_ARRAYID_SHIFT) & PCG_ARRAYID_MASK;
156}
157
120#else /* CONFIG_CGROUP_MEM_RES_CTLR */ 158#else /* CONFIG_CGROUP_MEM_RES_CTLR */
121struct page_cgroup; 159struct page_cgroup;
122 160
@@ -137,7 +175,7 @@ static inline void __init page_cgroup_init_flatmem(void)
137{ 175{
138} 176}
139 177
140#endif 178#endif /* CONFIG_CGROUP_MEM_RES_CTLR */
141 179
142#include <linux/swap.h> 180#include <linux/swap.h>
143 181
@@ -173,5 +211,8 @@ static inline void swap_cgroup_swapoff(int type)
173 return; 211 return;
174} 212}
175 213
176#endif 214#endif /* CONFIG_CGROUP_MEM_RES_CTLR_SWAP */
177#endif 215
216#endif /* !__GENERATING_BOUNDS_H */
217
218#endif /* __LINUX_PAGE_CGROUP_H */
diff --git a/kernel/bounds.c b/kernel/bounds.c
index 98a51f26c136..0c9b862292b2 100644
--- a/kernel/bounds.c
+++ b/kernel/bounds.c
@@ -9,11 +9,13 @@
9#include <linux/page-flags.h> 9#include <linux/page-flags.h>
10#include <linux/mmzone.h> 10#include <linux/mmzone.h>
11#include <linux/kbuild.h> 11#include <linux/kbuild.h>
12#include <linux/page_cgroup.h>
12 13
13void foo(void) 14void foo(void)
14{ 15{
15 /* The enum constants to put into include/generated/bounds.h */ 16 /* The enum constants to put into include/generated/bounds.h */
16 DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS); 17 DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
17 DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES); 18 DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
19 DEFINE(NR_PCG_FLAGS, __NR_PCG_FLAGS);
18 /* End of constants */ 20 /* End of constants */
19} 21}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e286e1603e4f..660dfc27d971 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1080,7 +1080,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
1080 if (unlikely(!PageCgroupUsed(pc))) 1080 if (unlikely(!PageCgroupUsed(pc)))
1081 continue; 1081 continue;
1082 1082
1083 page = pc->page; 1083 page = lookup_cgroup_page(pc);
1084 1084
1085 if (unlikely(!PageLRU(page))) 1085 if (unlikely(!PageLRU(page)))
1086 continue; 1086 continue;
@@ -3344,7 +3344,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem,
3344 } 3344 }
3345 spin_unlock_irqrestore(&zone->lru_lock, flags); 3345 spin_unlock_irqrestore(&zone->lru_lock, flags);
3346 3346
3347 page = pc->page; 3347 page = lookup_cgroup_page(pc);
3348 3348
3349 ret = mem_cgroup_move_parent(page, pc, mem, GFP_KERNEL); 3349 ret = mem_cgroup_move_parent(page, pc, mem, GFP_KERNEL);
3350 if (ret == -ENOMEM) 3350 if (ret == -ENOMEM)
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 59a3cd4c799d..6c3f7a6a481a 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -11,12 +11,11 @@
11#include <linux/swapops.h> 11#include <linux/swapops.h>
12#include <linux/kmemleak.h> 12#include <linux/kmemleak.h>
13 13
14static void __meminit 14static void __meminit init_page_cgroup(struct page_cgroup *pc, unsigned long id)
15__init_page_cgroup(struct page_cgroup *pc, unsigned long pfn)
16{ 15{
17 pc->flags = 0; 16 pc->flags = 0;
17 set_page_cgroup_array_id(pc, id);
18 pc->mem_cgroup = NULL; 18 pc->mem_cgroup = NULL;
19 pc->page = pfn_to_page(pfn);
20 INIT_LIST_HEAD(&pc->lru); 19 INIT_LIST_HEAD(&pc->lru);
21} 20}
22static unsigned long total_usage; 21static unsigned long total_usage;
@@ -43,6 +42,19 @@ struct page_cgroup *lookup_page_cgroup(struct page *page)
43 return base + offset; 42 return base + offset;
44} 43}
45 44
45struct page *lookup_cgroup_page(struct page_cgroup *pc)
46{
47 unsigned long pfn;
48 struct page *page;
49 pg_data_t *pgdat;
50
51 pgdat = NODE_DATA(page_cgroup_array_id(pc));
52 pfn = pc - pgdat->node_page_cgroup + pgdat->node_start_pfn;
53 page = pfn_to_page(pfn);
54 VM_BUG_ON(pc != lookup_page_cgroup(page));
55 return page;
56}
57
46static int __init alloc_node_page_cgroup(int nid) 58static int __init alloc_node_page_cgroup(int nid)
47{ 59{
48 struct page_cgroup *base, *pc; 60 struct page_cgroup *base, *pc;
@@ -63,7 +75,7 @@ static int __init alloc_node_page_cgroup(int nid)
63 return -ENOMEM; 75 return -ENOMEM;
64 for (index = 0; index < nr_pages; index++) { 76 for (index = 0; index < nr_pages; index++) {
65 pc = base + index; 77 pc = base + index;
66 __init_page_cgroup(pc, start_pfn + index); 78 init_page_cgroup(pc, nid);
67 } 79 }
68 NODE_DATA(nid)->node_page_cgroup = base; 80 NODE_DATA(nid)->node_page_cgroup = base;
69 total_usage += table_size; 81 total_usage += table_size;
@@ -105,46 +117,53 @@ struct page_cgroup *lookup_page_cgroup(struct page *page)
105 return section->page_cgroup + pfn; 117 return section->page_cgroup + pfn;
106} 118}
107 119
120struct page *lookup_cgroup_page(struct page_cgroup *pc)
121{
122 struct mem_section *section;
123 struct page *page;
124 unsigned long nr;
125
126 nr = page_cgroup_array_id(pc);
127 section = __nr_to_section(nr);
128 page = pfn_to_page(pc - section->page_cgroup);
129 VM_BUG_ON(pc != lookup_page_cgroup(page));
130 return page;
131}
132
108/* __alloc_bootmem...() is protected by !slab_available() */ 133/* __alloc_bootmem...() is protected by !slab_available() */
109static int __init_refok init_section_page_cgroup(unsigned long pfn) 134static int __init_refok init_section_page_cgroup(unsigned long pfn)
110{ 135{
111 struct mem_section *section = __pfn_to_section(pfn);
112 struct page_cgroup *base, *pc; 136 struct page_cgroup *base, *pc;
137 struct mem_section *section;
113 unsigned long table_size; 138 unsigned long table_size;
139 unsigned long nr;
114 int nid, index; 140 int nid, index;
115 141
116 if (!section->page_cgroup) { 142 nr = pfn_to_section_nr(pfn);
117 nid = page_to_nid(pfn_to_page(pfn)); 143 section = __nr_to_section(nr);
118 table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; 144
119 VM_BUG_ON(!slab_is_available()); 145 if (section->page_cgroup)
120 if (node_state(nid, N_HIGH_MEMORY)) { 146 return 0;
121 base = kmalloc_node(table_size, 147
122 GFP_KERNEL | __GFP_NOWARN, nid); 148 nid = page_to_nid(pfn_to_page(pfn));
123 if (!base) 149 table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
124 base = vmalloc_node(table_size, nid); 150 VM_BUG_ON(!slab_is_available());
125 } else { 151 if (node_state(nid, N_HIGH_MEMORY)) {
126 base = kmalloc(table_size, GFP_KERNEL | __GFP_NOWARN); 152 base = kmalloc_node(table_size,
127 if (!base) 153 GFP_KERNEL | __GFP_NOWARN, nid);
128 base = vmalloc(table_size); 154 if (!base)
129 } 155 base = vmalloc_node(table_size, nid);
130 /*
131 * The value stored in section->page_cgroup is (base - pfn)
132 * and it does not point to the memory block allocated above,
133 * causing kmemleak false positives.
134 */
135 kmemleak_not_leak(base);
136 } else { 156 } else {
137 /* 157 base = kmalloc(table_size, GFP_KERNEL | __GFP_NOWARN);
138 * We don't have to allocate page_cgroup again, but 158 if (!base)
139 * address of memmap may be changed. So, we have to initialize 159 base = vmalloc(table_size);
140 * again.
141 */
142 base = section->page_cgroup + pfn;
143 table_size = 0;
144 /* check address of memmap is changed or not. */
145 if (base->page == pfn_to_page(pfn))
146 return 0;
147 } 160 }
161 /*
162 * The value stored in section->page_cgroup is (base - pfn)
163 * and it does not point to the memory block allocated above,
164 * causing kmemleak false positives.
165 */
166 kmemleak_not_leak(base);
148 167
149 if (!base) { 168 if (!base) {
150 printk(KERN_ERR "page cgroup allocation failure\n"); 169 printk(KERN_ERR "page cgroup allocation failure\n");
@@ -153,7 +172,7 @@ static int __init_refok init_section_page_cgroup(unsigned long pfn)
153 172
154 for (index = 0; index < PAGES_PER_SECTION; index++) { 173 for (index = 0; index < PAGES_PER_SECTION; index++) {
155 pc = base + index; 174 pc = base + index;
156 __init_page_cgroup(pc, pfn + index); 175 init_page_cgroup(pc, nr);
157 } 176 }
158 177
159 section->page_cgroup = base - pfn; 178 section->page_cgroup = base - pfn;