diff options
author | KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> | 2008-10-18 23:28:16 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-20 11:52:39 -0400 |
commit | 52d4b9ac0b985168009c2a57098324e67bae171f (patch) | |
tree | b3e3b854166930af893be90ea30a7ab0d65c59e7 /include/linux | |
parent | c05555b572921c464d064d9267f7f7bc06d424fa (diff) |
memcg: allocate all page_cgroup at boot
Allocate all page_cgroup at boot and remove page_cgroup poitner from
struct page. This patch adds an interface as
struct page_cgroup *lookup_page_cgroup(struct page*)
All FLATMEM/DISCONTIGMEM/SPARSEMEM and MEMORY_HOTPLUG is supported.
Remove page_cgroup pointer reduces the amount of memory by
- 4 bytes per PAGE_SIZE.
- 8 bytes per PAGE_SIZE
if memory controller is disabled. (even if configured.)
On usual 8GB x86-32 server, this saves 8MB of NORMAL_ZONE memory.
On my x86-64 server with 48GB of memory, this saves 96MB of memory.
I think this reduction makes sense.
By pre-allocation, kmalloc/kfree in charge/uncharge are removed.
This means
- we're not necessary to be afraid of kmalloc faiulre.
(this can happen because of gfp_mask type.)
- we can avoid calling kmalloc/kfree.
- we can avoid allocating tons of small objects which can be fragmented.
- we can know what amount of memory will be used for this extra-lru handling.
I added printk message as
"allocated %ld bytes of page_cgroup"
"please try cgroup_disable=memory option if you don't want"
maybe enough informative for users.
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/memcontrol.h | 13 | ||||
-rw-r--r-- | include/linux/mm_types.h | 3 | ||||
-rw-r--r-- | include/linux/mmzone.h | 14 | ||||
-rw-r--r-- | include/linux/page_cgroup.h | 103 |
4 files changed, 117 insertions, 16 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ee1b2fcb4410..1fbe14d39521 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
@@ -27,9 +27,6 @@ struct mm_struct; | |||
27 | 27 | ||
28 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | 28 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR |
29 | 29 | ||
30 | #define page_reset_bad_cgroup(page) ((page)->page_cgroup = 0) | ||
31 | |||
32 | extern struct page_cgroup *page_get_page_cgroup(struct page *page); | ||
33 | extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm, | 30 | extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm, |
34 | gfp_t gfp_mask); | 31 | gfp_t gfp_mask); |
35 | extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, | 32 | extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, |
@@ -72,16 +69,8 @@ extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, | |||
72 | extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, | 69 | extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, |
73 | int priority, enum lru_list lru); | 70 | int priority, enum lru_list lru); |
74 | 71 | ||
75 | #else /* CONFIG_CGROUP_MEM_RES_CTLR */ | ||
76 | static inline void page_reset_bad_cgroup(struct page *page) | ||
77 | { | ||
78 | } | ||
79 | |||
80 | static inline struct page_cgroup *page_get_page_cgroup(struct page *page) | ||
81 | { | ||
82 | return NULL; | ||
83 | } | ||
84 | 72 | ||
73 | #else /* CONFIG_CGROUP_MEM_RES_CTLR */ | ||
85 | static inline int mem_cgroup_charge(struct page *page, | 74 | static inline int mem_cgroup_charge(struct page *page, |
86 | struct mm_struct *mm, gfp_t gfp_mask) | 75 | struct mm_struct *mm, gfp_t gfp_mask) |
87 | { | 76 | { |
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 9d49fa36bbef..fe825471d5aa 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
@@ -94,9 +94,6 @@ struct page { | |||
94 | void *virtual; /* Kernel virtual address (NULL if | 94 | void *virtual; /* Kernel virtual address (NULL if |
95 | not kmapped, ie. highmem) */ | 95 | not kmapped, ie. highmem) */ |
96 | #endif /* WANT_PAGE_VIRTUAL */ | 96 | #endif /* WANT_PAGE_VIRTUAL */ |
97 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | ||
98 | unsigned long page_cgroup; | ||
99 | #endif | ||
100 | }; | 97 | }; |
101 | 98 | ||
102 | /* | 99 | /* |
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index da2d053a95f1..35a7b5e19465 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -601,8 +601,11 @@ typedef struct pglist_data { | |||
601 | struct zone node_zones[MAX_NR_ZONES]; | 601 | struct zone node_zones[MAX_NR_ZONES]; |
602 | struct zonelist node_zonelists[MAX_ZONELISTS]; | 602 | struct zonelist node_zonelists[MAX_ZONELISTS]; |
603 | int nr_zones; | 603 | int nr_zones; |
604 | #ifdef CONFIG_FLAT_NODE_MEM_MAP | 604 | #ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */ |
605 | struct page *node_mem_map; | 605 | struct page *node_mem_map; |
606 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | ||
607 | struct page_cgroup *node_page_cgroup; | ||
608 | #endif | ||
606 | #endif | 609 | #endif |
607 | struct bootmem_data *bdata; | 610 | struct bootmem_data *bdata; |
608 | #ifdef CONFIG_MEMORY_HOTPLUG | 611 | #ifdef CONFIG_MEMORY_HOTPLUG |
@@ -931,6 +934,7 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn) | |||
931 | #endif | 934 | #endif |
932 | 935 | ||
933 | struct page; | 936 | struct page; |
937 | struct page_cgroup; | ||
934 | struct mem_section { | 938 | struct mem_section { |
935 | /* | 939 | /* |
936 | * This is, logically, a pointer to an array of struct | 940 | * This is, logically, a pointer to an array of struct |
@@ -948,6 +952,14 @@ struct mem_section { | |||
948 | 952 | ||
949 | /* See declaration of similar field in struct zone */ | 953 | /* See declaration of similar field in struct zone */ |
950 | unsigned long *pageblock_flags; | 954 | unsigned long *pageblock_flags; |
955 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | ||
956 | /* | ||
957 | * If !SPARSEMEM, pgdat doesn't have page_cgroup pointer. We use | ||
958 | * section. (see memcontrol.h/page_cgroup.h about this.) | ||
959 | */ | ||
960 | struct page_cgroup *page_cgroup; | ||
961 | unsigned long pad; | ||
962 | #endif | ||
951 | }; | 963 | }; |
952 | 964 | ||
953 | #ifdef CONFIG_SPARSEMEM_EXTREME | 965 | #ifdef CONFIG_SPARSEMEM_EXTREME |
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h new file mode 100644 index 000000000000..0fd39f2231ec --- /dev/null +++ b/include/linux/page_cgroup.h | |||
@@ -0,0 +1,103 @@ | |||
1 | #ifndef __LINUX_PAGE_CGROUP_H | ||
2 | #define __LINUX_PAGE_CGROUP_H | ||
3 | |||
4 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | ||
5 | #include <linux/bit_spinlock.h> | ||
6 | /* | ||
7 | * Page Cgroup can be considered as an extended mem_map. | ||
8 | * A page_cgroup page is associated with every page descriptor. The | ||
9 | * page_cgroup helps us identify information about the cgroup | ||
10 | * All page cgroups are allocated at boot or memory hotplug event, | ||
11 | * then the page cgroup for pfn always exists. | ||
12 | */ | ||
13 | struct page_cgroup { | ||
14 | unsigned long flags; | ||
15 | struct mem_cgroup *mem_cgroup; | ||
16 | struct page *page; | ||
17 | struct list_head lru; /* per cgroup LRU list */ | ||
18 | }; | ||
19 | |||
20 | void __init pgdat_page_cgroup_init(struct pglist_data *pgdat); | ||
21 | void __init page_cgroup_init(void); | ||
22 | struct page_cgroup *lookup_page_cgroup(struct page *page); | ||
23 | |||
24 | enum { | ||
25 | /* flags for mem_cgroup */ | ||
26 | PCG_LOCK, /* page cgroup is locked */ | ||
27 | PCG_CACHE, /* charged as cache */ | ||
28 | PCG_USED, /* this object is in use. */ | ||
29 | /* flags for LRU placement */ | ||
30 | PCG_ACTIVE, /* page is active in this cgroup */ | ||
31 | PCG_FILE, /* page is file system backed */ | ||
32 | PCG_UNEVICTABLE, /* page is unevictableable */ | ||
33 | }; | ||
34 | |||
35 | #define TESTPCGFLAG(uname, lname) \ | ||
36 | static inline int PageCgroup##uname(struct page_cgroup *pc) \ | ||
37 | { return test_bit(PCG_##lname, &pc->flags); } | ||
38 | |||
39 | #define SETPCGFLAG(uname, lname) \ | ||
40 | static inline void SetPageCgroup##uname(struct page_cgroup *pc)\ | ||
41 | { set_bit(PCG_##lname, &pc->flags); } | ||
42 | |||
43 | #define CLEARPCGFLAG(uname, lname) \ | ||
44 | static inline void ClearPageCgroup##uname(struct page_cgroup *pc) \ | ||
45 | { clear_bit(PCG_##lname, &pc->flags); } | ||
46 | |||
47 | /* Cache flag is set only once (at allocation) */ | ||
48 | TESTPCGFLAG(Cache, CACHE) | ||
49 | |||
50 | TESTPCGFLAG(Used, USED) | ||
51 | CLEARPCGFLAG(Used, USED) | ||
52 | |||
53 | /* LRU management flags (from global-lru definition) */ | ||
54 | TESTPCGFLAG(File, FILE) | ||
55 | SETPCGFLAG(File, FILE) | ||
56 | CLEARPCGFLAG(File, FILE) | ||
57 | |||
58 | TESTPCGFLAG(Active, ACTIVE) | ||
59 | SETPCGFLAG(Active, ACTIVE) | ||
60 | CLEARPCGFLAG(Active, ACTIVE) | ||
61 | |||
62 | TESTPCGFLAG(Unevictable, UNEVICTABLE) | ||
63 | SETPCGFLAG(Unevictable, UNEVICTABLE) | ||
64 | CLEARPCGFLAG(Unevictable, UNEVICTABLE) | ||
65 | |||
66 | static inline int page_cgroup_nid(struct page_cgroup *pc) | ||
67 | { | ||
68 | return page_to_nid(pc->page); | ||
69 | } | ||
70 | |||
71 | static inline enum zone_type page_cgroup_zid(struct page_cgroup *pc) | ||
72 | { | ||
73 | return page_zonenum(pc->page); | ||
74 | } | ||
75 | |||
76 | static inline void lock_page_cgroup(struct page_cgroup *pc) | ||
77 | { | ||
78 | bit_spin_lock(PCG_LOCK, &pc->flags); | ||
79 | } | ||
80 | |||
81 | static inline int trylock_page_cgroup(struct page_cgroup *pc) | ||
82 | { | ||
83 | return bit_spin_trylock(PCG_LOCK, &pc->flags); | ||
84 | } | ||
85 | |||
86 | static inline void unlock_page_cgroup(struct page_cgroup *pc) | ||
87 | { | ||
88 | bit_spin_unlock(PCG_LOCK, &pc->flags); | ||
89 | } | ||
90 | |||
91 | #else /* CONFIG_CGROUP_MEM_RES_CTLR */ | ||
92 | struct page_cgroup; | ||
93 | |||
94 | static inline void pgdat_page_cgroup_init(struct pglist_data *pgdat) | ||
95 | { | ||
96 | } | ||
97 | |||
98 | static inline struct page_cgroup *lookup_page_cgroup(struct page *page) | ||
99 | { | ||
100 | return NULL; | ||
101 | } | ||
102 | #endif | ||
103 | #endif | ||