diff options
-rw-r--r-- | include/linux/balloon_compaction.h | 272 | ||||
-rw-r--r-- | include/linux/migrate.h | 10 | ||||
-rw-r--r-- | include/linux/pagemap.h | 16 | ||||
-rw-r--r-- | mm/Kconfig | 15 | ||||
-rw-r--r-- | mm/Makefile | 3 | ||||
-rw-r--r-- | mm/balloon_compaction.c | 302 |
6 files changed, 617 insertions, 1 deletions
diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h new file mode 100644 index 000000000000..f7f1d7169b11 --- /dev/null +++ b/include/linux/balloon_compaction.h | |||
@@ -0,0 +1,272 @@ | |||
1 | /* | ||
2 | * include/linux/balloon_compaction.h | ||
3 | * | ||
4 | * Common interface definitions for making balloon pages movable by compaction. | ||
5 | * | ||
6 | * Despite being perfectly possible to perform ballooned pages migration, they | ||
7 | * make a special corner case to compaction scans because balloon pages are not | ||
8 | * enlisted at any LRU list like the other pages we do compact / migrate. | ||
9 | * | ||
10 | * As the page isolation scanning step a compaction thread does is a lockless | ||
11 | * procedure (from a page standpoint), it might bring some racy situations while | ||
12 | * performing balloon page compaction. In order to sort out these racy scenarios | ||
13 | * and safely perform balloon's page compaction and migration we must, always, | ||
14 | * ensure following these three simple rules: | ||
15 | * | ||
16 | * i. when updating a balloon's page ->mapping element, strictly do it under | ||
17 | * the following lock order, independently of the far superior | ||
18 | * locking scheme (lru_lock, balloon_lock): | ||
19 | * +-page_lock(page); | ||
20 | * +--spin_lock_irq(&b_dev_info->pages_lock); | ||
21 | * ... page->mapping updates here ... | ||
22 | * | ||
23 | * ii. before isolating or dequeueing a balloon page from the balloon device | ||
24 | * pages list, the page reference counter must be raised by one and the | ||
25 | * extra refcount must be dropped when the page is enqueued back into | ||
26 | * the balloon device page list, thus a balloon page keeps its reference | ||
27 | * counter raised only while it is under our special handling; | ||
28 | * | ||
29 | * iii. after the lockless scan step have selected a potential balloon page for | ||
30 | * isolation, re-test the page->mapping flags and the page ref counter | ||
31 | * under the proper page lock, to ensure isolating a valid balloon page | ||
32 | * (not yet isolated, nor under release procedure) | ||
33 | * | ||
34 | * The functions provided by this interface are placed to help on coping with | ||
35 | * the aforementioned balloon page corner case, as well as to ensure the simple | ||
36 | * set of exposed rules are satisfied while we are dealing with balloon pages | ||
37 | * compaction / migration. | ||
38 | * | ||
39 | * Copyright (C) 2012, Red Hat, Inc. Rafael Aquini <aquini@redhat.com> | ||
40 | */ | ||
41 | #ifndef _LINUX_BALLOON_COMPACTION_H | ||
42 | #define _LINUX_BALLOON_COMPACTION_H | ||
43 | #include <linux/pagemap.h> | ||
44 | #include <linux/page-flags.h> | ||
45 | #include <linux/migrate.h> | ||
46 | #include <linux/gfp.h> | ||
47 | #include <linux/err.h> | ||
48 | |||
49 | /* | ||
50 | * Balloon device information descriptor. | ||
51 | * This struct is used to allow the common balloon compaction interface | ||
52 | * procedures to find the proper balloon device holding memory pages they'll | ||
53 | * have to cope for page compaction / migration, as well as it serves the | ||
54 | * balloon driver as a page book-keeper for its registered balloon devices. | ||
55 | */ | ||
56 | struct balloon_dev_info { | ||
57 | void *balloon_device; /* balloon device descriptor */ | ||
58 | struct address_space *mapping; /* balloon special page->mapping */ | ||
59 | unsigned long isolated_pages; /* # of isolated pages for migration */ | ||
60 | spinlock_t pages_lock; /* Protection to pages list */ | ||
61 | struct list_head pages; /* Pages enqueued & handled to Host */ | ||
62 | }; | ||
63 | |||
64 | extern struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info); | ||
65 | extern struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info); | ||
66 | extern struct balloon_dev_info *balloon_devinfo_alloc( | ||
67 | void *balloon_dev_descriptor); | ||
68 | |||
69 | static inline void balloon_devinfo_free(struct balloon_dev_info *b_dev_info) | ||
70 | { | ||
71 | kfree(b_dev_info); | ||
72 | } | ||
73 | |||
74 | /* | ||
75 | * balloon_page_free - release a balloon page back to the page free lists | ||
76 | * @page: ballooned page to be set free | ||
77 | * | ||
78 | * This function must be used to properly set free an isolated/dequeued balloon | ||
79 | * page at the end of a sucessful page migration, or at the balloon driver's | ||
80 | * page release procedure. | ||
81 | */ | ||
82 | static inline void balloon_page_free(struct page *page) | ||
83 | { | ||
84 | /* | ||
85 | * Balloon pages always get an extra refcount before being isolated | ||
86 | * and before being dequeued to help on sorting out fortuite colisions | ||
87 | * between a thread attempting to isolate and another thread attempting | ||
88 | * to release the very same balloon page. | ||
89 | * | ||
90 | * Before we handle the page back to Buddy, lets drop its extra refcnt. | ||
91 | */ | ||
92 | put_page(page); | ||
93 | __free_page(page); | ||
94 | } | ||
95 | |||
96 | #ifdef CONFIG_BALLOON_COMPACTION | ||
97 | extern bool balloon_page_isolate(struct page *page); | ||
98 | extern void balloon_page_putback(struct page *page); | ||
99 | extern int balloon_page_migrate(struct page *newpage, | ||
100 | struct page *page, enum migrate_mode mode); | ||
101 | extern struct address_space | ||
102 | *balloon_mapping_alloc(struct balloon_dev_info *b_dev_info, | ||
103 | const struct address_space_operations *a_ops); | ||
104 | |||
105 | static inline void balloon_mapping_free(struct address_space *balloon_mapping) | ||
106 | { | ||
107 | kfree(balloon_mapping); | ||
108 | } | ||
109 | |||
110 | /* | ||
111 | * page_flags_cleared - helper to perform balloon @page ->flags tests. | ||
112 | * | ||
113 | * As balloon pages are obtained from buddy and we do not play with page->flags | ||
114 | * at driver level (exception made when we get the page lock for compaction), | ||
115 | * we can safely identify a ballooned page by checking if the | ||
116 | * PAGE_FLAGS_CHECK_AT_PREP page->flags are all cleared. This approach also | ||
117 | * helps us skip ballooned pages that are locked for compaction or release, thus | ||
118 | * mitigating their racy check at balloon_page_movable() | ||
119 | */ | ||
120 | static inline bool page_flags_cleared(struct page *page) | ||
121 | { | ||
122 | return !(page->flags & PAGE_FLAGS_CHECK_AT_PREP); | ||
123 | } | ||
124 | |||
125 | /* | ||
126 | * __is_movable_balloon_page - helper to perform @page mapping->flags tests | ||
127 | */ | ||
128 | static inline bool __is_movable_balloon_page(struct page *page) | ||
129 | { | ||
130 | struct address_space *mapping = page->mapping; | ||
131 | return mapping_balloon(mapping); | ||
132 | } | ||
133 | |||
134 | /* | ||
135 | * balloon_page_movable - test page->mapping->flags to identify balloon pages | ||
136 | * that can be moved by compaction/migration. | ||
137 | * | ||
138 | * This function is used at core compaction's page isolation scheme, therefore | ||
139 | * most pages exposed to it are not enlisted as balloon pages and so, to avoid | ||
140 | * undesired side effects like racing against __free_pages(), we cannot afford | ||
141 | * holding the page locked while testing page->mapping->flags here. | ||
142 | * | ||
143 | * As we might return false positives in the case of a balloon page being just | ||
144 | * released under us, the page->mapping->flags need to be re-tested later, | ||
145 | * under the proper page lock, at the functions that will be coping with the | ||
146 | * balloon page case. | ||
147 | */ | ||
148 | static inline bool balloon_page_movable(struct page *page) | ||
149 | { | ||
150 | /* | ||
151 | * Before dereferencing and testing mapping->flags, let's make sure | ||
152 | * this is not a page that uses ->mapping in a different way | ||
153 | */ | ||
154 | if (page_flags_cleared(page) && !page_mapped(page) && | ||
155 | page_count(page) == 1) | ||
156 | return __is_movable_balloon_page(page); | ||
157 | |||
158 | return false; | ||
159 | } | ||
160 | |||
161 | /* | ||
162 | * balloon_page_insert - insert a page into the balloon's page list and make | ||
163 | * the page->mapping assignment accordingly. | ||
164 | * @page : page to be assigned as a 'balloon page' | ||
165 | * @mapping : allocated special 'balloon_mapping' | ||
166 | * @head : balloon's device page list head | ||
167 | * | ||
168 | * Caller must ensure the page is locked and the spin_lock protecting balloon | ||
169 | * pages list is held before inserting a page into the balloon device. | ||
170 | */ | ||
171 | static inline void balloon_page_insert(struct page *page, | ||
172 | struct address_space *mapping, | ||
173 | struct list_head *head) | ||
174 | { | ||
175 | page->mapping = mapping; | ||
176 | list_add(&page->lru, head); | ||
177 | } | ||
178 | |||
179 | /* | ||
180 | * balloon_page_delete - delete a page from balloon's page list and clear | ||
181 | * the page->mapping assignement accordingly. | ||
182 | * @page : page to be released from balloon's page list | ||
183 | * | ||
184 | * Caller must ensure the page is locked and the spin_lock protecting balloon | ||
185 | * pages list is held before deleting a page from the balloon device. | ||
186 | */ | ||
187 | static inline void balloon_page_delete(struct page *page) | ||
188 | { | ||
189 | page->mapping = NULL; | ||
190 | list_del(&page->lru); | ||
191 | } | ||
192 | |||
193 | /* | ||
194 | * balloon_page_device - get the b_dev_info descriptor for the balloon device | ||
195 | * that enqueues the given page. | ||
196 | */ | ||
197 | static inline struct balloon_dev_info *balloon_page_device(struct page *page) | ||
198 | { | ||
199 | struct address_space *mapping = page->mapping; | ||
200 | if (likely(mapping)) | ||
201 | return mapping->private_data; | ||
202 | |||
203 | return NULL; | ||
204 | } | ||
205 | |||
206 | static inline gfp_t balloon_mapping_gfp_mask(void) | ||
207 | { | ||
208 | return GFP_HIGHUSER_MOVABLE; | ||
209 | } | ||
210 | |||
211 | static inline bool balloon_compaction_check(void) | ||
212 | { | ||
213 | return true; | ||
214 | } | ||
215 | |||
216 | #else /* !CONFIG_BALLOON_COMPACTION */ | ||
217 | |||
218 | static inline void *balloon_mapping_alloc(void *balloon_device, | ||
219 | const struct address_space_operations *a_ops) | ||
220 | { | ||
221 | return ERR_PTR(-EOPNOTSUPP); | ||
222 | } | ||
223 | |||
224 | static inline void balloon_mapping_free(struct address_space *balloon_mapping) | ||
225 | { | ||
226 | return; | ||
227 | } | ||
228 | |||
229 | static inline void balloon_page_insert(struct page *page, | ||
230 | struct address_space *mapping, | ||
231 | struct list_head *head) | ||
232 | { | ||
233 | list_add(&page->lru, head); | ||
234 | } | ||
235 | |||
236 | static inline void balloon_page_delete(struct page *page) | ||
237 | { | ||
238 | list_del(&page->lru); | ||
239 | } | ||
240 | |||
241 | static inline bool balloon_page_movable(struct page *page) | ||
242 | { | ||
243 | return false; | ||
244 | } | ||
245 | |||
246 | static inline bool balloon_page_isolate(struct page *page) | ||
247 | { | ||
248 | return false; | ||
249 | } | ||
250 | |||
251 | static inline void balloon_page_putback(struct page *page) | ||
252 | { | ||
253 | return; | ||
254 | } | ||
255 | |||
256 | static inline int balloon_page_migrate(struct page *newpage, | ||
257 | struct page *page, enum migrate_mode mode) | ||
258 | { | ||
259 | return 0; | ||
260 | } | ||
261 | |||
262 | static inline gfp_t balloon_mapping_gfp_mask(void) | ||
263 | { | ||
264 | return GFP_HIGHUSER; | ||
265 | } | ||
266 | |||
267 | static inline bool balloon_compaction_check(void) | ||
268 | { | ||
269 | return false; | ||
270 | } | ||
271 | #endif /* CONFIG_BALLOON_COMPACTION */ | ||
272 | #endif /* _LINUX_BALLOON_COMPACTION_H */ | ||
diff --git a/include/linux/migrate.h b/include/linux/migrate.h index a4e886d17f87..ce42847ed35f 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h | |||
@@ -11,8 +11,18 @@ typedef struct page *new_page_t(struct page *, unsigned long private, int **); | |||
11 | * Return values from addresss_space_operations.migratepage(): | 11 | * Return values from addresss_space_operations.migratepage(): |
12 | * - negative errno on page migration failure; | 12 | * - negative errno on page migration failure; |
13 | * - zero on page migration success; | 13 | * - zero on page migration success; |
14 | * | ||
15 | * The balloon page migration introduces this special case where a 'distinct' | ||
16 | * return code is used to flag a successful page migration to unmap_and_move(). | ||
17 | * This approach is necessary because page migration can race against balloon | ||
18 | * deflation procedure, and for such case we could introduce a nasty page leak | ||
19 | * if a successfully migrated balloon page gets released concurrently with | ||
20 | * migration's unmap_and_move() wrap-up steps. | ||
14 | */ | 21 | */ |
15 | #define MIGRATEPAGE_SUCCESS 0 | 22 | #define MIGRATEPAGE_SUCCESS 0 |
23 | #define MIGRATEPAGE_BALLOON_SUCCESS 1 /* special ret code for balloon page | ||
24 | * sucessful migration case. | ||
25 | */ | ||
16 | 26 | ||
17 | #ifdef CONFIG_MIGRATION | 27 | #ifdef CONFIG_MIGRATION |
18 | 28 | ||
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index e42c762f0dc7..6da609d14c15 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h | |||
@@ -24,6 +24,7 @@ enum mapping_flags { | |||
24 | AS_ENOSPC = __GFP_BITS_SHIFT + 1, /* ENOSPC on async write */ | 24 | AS_ENOSPC = __GFP_BITS_SHIFT + 1, /* ENOSPC on async write */ |
25 | AS_MM_ALL_LOCKS = __GFP_BITS_SHIFT + 2, /* under mm_take_all_locks() */ | 25 | AS_MM_ALL_LOCKS = __GFP_BITS_SHIFT + 2, /* under mm_take_all_locks() */ |
26 | AS_UNEVICTABLE = __GFP_BITS_SHIFT + 3, /* e.g., ramdisk, SHM_LOCK */ | 26 | AS_UNEVICTABLE = __GFP_BITS_SHIFT + 3, /* e.g., ramdisk, SHM_LOCK */ |
27 | AS_BALLOON_MAP = __GFP_BITS_SHIFT + 4, /* balloon page special map */ | ||
27 | }; | 28 | }; |
28 | 29 | ||
29 | static inline void mapping_set_error(struct address_space *mapping, int error) | 30 | static inline void mapping_set_error(struct address_space *mapping, int error) |
@@ -53,6 +54,21 @@ static inline int mapping_unevictable(struct address_space *mapping) | |||
53 | return !!mapping; | 54 | return !!mapping; |
54 | } | 55 | } |
55 | 56 | ||
57 | static inline void mapping_set_balloon(struct address_space *mapping) | ||
58 | { | ||
59 | set_bit(AS_BALLOON_MAP, &mapping->flags); | ||
60 | } | ||
61 | |||
62 | static inline void mapping_clear_balloon(struct address_space *mapping) | ||
63 | { | ||
64 | clear_bit(AS_BALLOON_MAP, &mapping->flags); | ||
65 | } | ||
66 | |||
67 | static inline int mapping_balloon(struct address_space *mapping) | ||
68 | { | ||
69 | return mapping && test_bit(AS_BALLOON_MAP, &mapping->flags); | ||
70 | } | ||
71 | |||
56 | static inline gfp_t mapping_gfp_mask(struct address_space * mapping) | 72 | static inline gfp_t mapping_gfp_mask(struct address_space * mapping) |
57 | { | 73 | { |
58 | return (__force gfp_t)mapping->flags & __GFP_BITS_MASK; | 74 | return (__force gfp_t)mapping->flags & __GFP_BITS_MASK; |
diff --git a/mm/Kconfig b/mm/Kconfig index a3f8dddaaab3..e6651c5de14f 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -188,6 +188,21 @@ config SPLIT_PTLOCK_CPUS | |||
188 | default "4" | 188 | default "4" |
189 | 189 | ||
190 | # | 190 | # |
191 | # support for memory balloon compaction | ||
192 | config BALLOON_COMPACTION | ||
193 | bool "Allow for balloon memory compaction/migration" | ||
194 | def_bool y | ||
195 | depends on COMPACTION && VIRTIO_BALLOON | ||
196 | help | ||
197 | Memory fragmentation introduced by ballooning might reduce | ||
198 | significantly the number of 2MB contiguous memory blocks that can be | ||
199 | used within a guest, thus imposing performance penalties associated | ||
200 | with the reduced number of transparent huge pages that could be used | ||
201 | by the guest workload. Allowing the compaction & migration for memory | ||
202 | pages enlisted as being part of memory balloon devices avoids the | ||
203 | scenario aforementioned and helps improving memory defragmentation. | ||
204 | |||
205 | # | ||
191 | # support for memory compaction | 206 | # support for memory compaction |
192 | config COMPACTION | 207 | config COMPACTION |
193 | bool "Allow for memory compaction" | 208 | bool "Allow for memory compaction" |
diff --git a/mm/Makefile b/mm/Makefile index 6b025f80af34..3a4628751f89 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
@@ -16,7 +16,8 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ | |||
16 | readahead.o swap.o truncate.o vmscan.o shmem.o \ | 16 | readahead.o swap.o truncate.o vmscan.o shmem.o \ |
17 | util.o mmzone.o vmstat.o backing-dev.o \ | 17 | util.o mmzone.o vmstat.o backing-dev.o \ |
18 | mm_init.o mmu_context.o percpu.o slab_common.o \ | 18 | mm_init.o mmu_context.o percpu.o slab_common.o \ |
19 | compaction.o interval_tree.o $(mmu-y) | 19 | compaction.o balloon_compaction.o \ |
20 | interval_tree.o $(mmu-y) | ||
20 | 21 | ||
21 | obj-y += init-mm.o | 22 | obj-y += init-mm.o |
22 | 23 | ||
diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c new file mode 100644 index 000000000000..07dbc8ec46cf --- /dev/null +++ b/mm/balloon_compaction.c | |||
@@ -0,0 +1,302 @@ | |||
1 | /* | ||
2 | * mm/balloon_compaction.c | ||
3 | * | ||
4 | * Common interface for making balloon pages movable by compaction. | ||
5 | * | ||
6 | * Copyright (C) 2012, Red Hat, Inc. Rafael Aquini <aquini@redhat.com> | ||
7 | */ | ||
8 | #include <linux/mm.h> | ||
9 | #include <linux/slab.h> | ||
10 | #include <linux/export.h> | ||
11 | #include <linux/balloon_compaction.h> | ||
12 | |||
13 | /* | ||
14 | * balloon_devinfo_alloc - allocates a balloon device information descriptor. | ||
15 | * @balloon_dev_descriptor: pointer to reference the balloon device which | ||
16 | * this struct balloon_dev_info will be servicing. | ||
17 | * | ||
18 | * Driver must call it to properly allocate and initialize an instance of | ||
19 | * struct balloon_dev_info which will be used to reference a balloon device | ||
20 | * as well as to keep track of the balloon device page list. | ||
21 | */ | ||
22 | struct balloon_dev_info *balloon_devinfo_alloc(void *balloon_dev_descriptor) | ||
23 | { | ||
24 | struct balloon_dev_info *b_dev_info; | ||
25 | b_dev_info = kmalloc(sizeof(*b_dev_info), GFP_KERNEL); | ||
26 | if (!b_dev_info) | ||
27 | return ERR_PTR(-ENOMEM); | ||
28 | |||
29 | b_dev_info->balloon_device = balloon_dev_descriptor; | ||
30 | b_dev_info->mapping = NULL; | ||
31 | b_dev_info->isolated_pages = 0; | ||
32 | spin_lock_init(&b_dev_info->pages_lock); | ||
33 | INIT_LIST_HEAD(&b_dev_info->pages); | ||
34 | |||
35 | return b_dev_info; | ||
36 | } | ||
37 | EXPORT_SYMBOL_GPL(balloon_devinfo_alloc); | ||
38 | |||
39 | /* | ||
40 | * balloon_page_enqueue - allocates a new page and inserts it into the balloon | ||
41 | * page list. | ||
42 | * @b_dev_info: balloon device decriptor where we will insert a new page to | ||
43 | * | ||
44 | * Driver must call it to properly allocate a new enlisted balloon page | ||
45 | * before definetively removing it from the guest system. | ||
46 | * This function returns the page address for the recently enqueued page or | ||
47 | * NULL in the case we fail to allocate a new page this turn. | ||
48 | */ | ||
49 | struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info) | ||
50 | { | ||
51 | unsigned long flags; | ||
52 | struct page *page = alloc_page(balloon_mapping_gfp_mask() | | ||
53 | __GFP_NOMEMALLOC | __GFP_NORETRY); | ||
54 | if (!page) | ||
55 | return NULL; | ||
56 | |||
57 | /* | ||
58 | * Block others from accessing the 'page' when we get around to | ||
59 | * establishing additional references. We should be the only one | ||
60 | * holding a reference to the 'page' at this point. | ||
61 | */ | ||
62 | BUG_ON(!trylock_page(page)); | ||
63 | spin_lock_irqsave(&b_dev_info->pages_lock, flags); | ||
64 | balloon_page_insert(page, b_dev_info->mapping, &b_dev_info->pages); | ||
65 | spin_unlock_irqrestore(&b_dev_info->pages_lock, flags); | ||
66 | unlock_page(page); | ||
67 | return page; | ||
68 | } | ||
69 | EXPORT_SYMBOL_GPL(balloon_page_enqueue); | ||
70 | |||
71 | /* | ||
72 | * balloon_page_dequeue - removes a page from balloon's page list and returns | ||
73 | * the its address to allow the driver release the page. | ||
74 | * @b_dev_info: balloon device decriptor where we will grab a page from. | ||
75 | * | ||
76 | * Driver must call it to properly de-allocate a previous enlisted balloon page | ||
77 | * before definetively releasing it back to the guest system. | ||
78 | * This function returns the page address for the recently dequeued page or | ||
79 | * NULL in the case we find balloon's page list temporarily empty due to | ||
80 | * compaction isolated pages. | ||
81 | */ | ||
82 | struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info) | ||
83 | { | ||
84 | struct page *page, *tmp; | ||
85 | unsigned long flags; | ||
86 | bool dequeued_page; | ||
87 | |||
88 | dequeued_page = false; | ||
89 | list_for_each_entry_safe(page, tmp, &b_dev_info->pages, lru) { | ||
90 | /* | ||
91 | * Block others from accessing the 'page' while we get around | ||
92 | * establishing additional references and preparing the 'page' | ||
93 | * to be released by the balloon driver. | ||
94 | */ | ||
95 | if (trylock_page(page)) { | ||
96 | spin_lock_irqsave(&b_dev_info->pages_lock, flags); | ||
97 | /* | ||
98 | * Raise the page refcount here to prevent any wrong | ||
99 | * attempt to isolate this page, in case of coliding | ||
100 | * with balloon_page_isolate() just after we release | ||
101 | * the page lock. | ||
102 | * | ||
103 | * balloon_page_free() will take care of dropping | ||
104 | * this extra refcount later. | ||
105 | */ | ||
106 | get_page(page); | ||
107 | balloon_page_delete(page); | ||
108 | spin_unlock_irqrestore(&b_dev_info->pages_lock, flags); | ||
109 | unlock_page(page); | ||
110 | dequeued_page = true; | ||
111 | break; | ||
112 | } | ||
113 | } | ||
114 | |||
115 | if (!dequeued_page) { | ||
116 | /* | ||
117 | * If we are unable to dequeue a balloon page because the page | ||
118 | * list is empty and there is no isolated pages, then something | ||
119 | * went out of track and some balloon pages are lost. | ||
120 | * BUG() here, otherwise the balloon driver may get stuck into | ||
121 | * an infinite loop while attempting to release all its pages. | ||
122 | */ | ||
123 | spin_lock_irqsave(&b_dev_info->pages_lock, flags); | ||
124 | if (unlikely(list_empty(&b_dev_info->pages) && | ||
125 | !b_dev_info->isolated_pages)) | ||
126 | BUG(); | ||
127 | spin_unlock_irqrestore(&b_dev_info->pages_lock, flags); | ||
128 | page = NULL; | ||
129 | } | ||
130 | return page; | ||
131 | } | ||
132 | EXPORT_SYMBOL_GPL(balloon_page_dequeue); | ||
133 | |||
134 | #ifdef CONFIG_BALLOON_COMPACTION | ||
135 | /* | ||
136 | * balloon_mapping_alloc - allocates a special ->mapping for ballooned pages. | ||
137 | * @b_dev_info: holds the balloon device information descriptor. | ||
138 | * @a_ops: balloon_mapping address_space_operations descriptor. | ||
139 | * | ||
140 | * Driver must call it to properly allocate and initialize an instance of | ||
141 | * struct address_space which will be used as the special page->mapping for | ||
142 | * balloon device enlisted page instances. | ||
143 | */ | ||
144 | struct address_space *balloon_mapping_alloc(struct balloon_dev_info *b_dev_info, | ||
145 | const struct address_space_operations *a_ops) | ||
146 | { | ||
147 | struct address_space *mapping; | ||
148 | |||
149 | mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); | ||
150 | if (!mapping) | ||
151 | return ERR_PTR(-ENOMEM); | ||
152 | |||
153 | /* | ||
154 | * Give a clean 'zeroed' status to all elements of this special | ||
155 | * balloon page->mapping struct address_space instance. | ||
156 | */ | ||
157 | address_space_init_once(mapping); | ||
158 | |||
159 | /* | ||
160 | * Set mapping->flags appropriately, to allow balloon pages | ||
161 | * ->mapping identification. | ||
162 | */ | ||
163 | mapping_set_balloon(mapping); | ||
164 | mapping_set_gfp_mask(mapping, balloon_mapping_gfp_mask()); | ||
165 | |||
166 | /* balloon's page->mapping->a_ops callback descriptor */ | ||
167 | mapping->a_ops = a_ops; | ||
168 | |||
169 | /* | ||
170 | * Establish a pointer reference back to the balloon device descriptor | ||
171 | * this particular page->mapping will be servicing. | ||
172 | * This is used by compaction / migration procedures to identify and | ||
173 | * access the balloon device pageset while isolating / migrating pages. | ||
174 | * | ||
175 | * As some balloon drivers can register multiple balloon devices | ||
176 | * for a single guest, this also helps compaction / migration to | ||
177 | * properly deal with multiple balloon pagesets, when required. | ||
178 | */ | ||
179 | mapping->private_data = b_dev_info; | ||
180 | b_dev_info->mapping = mapping; | ||
181 | |||
182 | return mapping; | ||
183 | } | ||
184 | EXPORT_SYMBOL_GPL(balloon_mapping_alloc); | ||
185 | |||
186 | static inline void __isolate_balloon_page(struct page *page) | ||
187 | { | ||
188 | struct balloon_dev_info *b_dev_info = page->mapping->private_data; | ||
189 | unsigned long flags; | ||
190 | spin_lock_irqsave(&b_dev_info->pages_lock, flags); | ||
191 | list_del(&page->lru); | ||
192 | b_dev_info->isolated_pages++; | ||
193 | spin_unlock_irqrestore(&b_dev_info->pages_lock, flags); | ||
194 | } | ||
195 | |||
196 | static inline void __putback_balloon_page(struct page *page) | ||
197 | { | ||
198 | struct balloon_dev_info *b_dev_info = page->mapping->private_data; | ||
199 | unsigned long flags; | ||
200 | spin_lock_irqsave(&b_dev_info->pages_lock, flags); | ||
201 | list_add(&page->lru, &b_dev_info->pages); | ||
202 | b_dev_info->isolated_pages--; | ||
203 | spin_unlock_irqrestore(&b_dev_info->pages_lock, flags); | ||
204 | } | ||
205 | |||
206 | static inline int __migrate_balloon_page(struct address_space *mapping, | ||
207 | struct page *newpage, struct page *page, enum migrate_mode mode) | ||
208 | { | ||
209 | return page->mapping->a_ops->migratepage(mapping, newpage, page, mode); | ||
210 | } | ||
211 | |||
212 | /* __isolate_lru_page() counterpart for a ballooned page */ | ||
213 | bool balloon_page_isolate(struct page *page) | ||
214 | { | ||
215 | /* | ||
216 | * Avoid burning cycles with pages that are yet under __free_pages(), | ||
217 | * or just got freed under us. | ||
218 | * | ||
219 | * In case we 'win' a race for a balloon page being freed under us and | ||
220 | * raise its refcount preventing __free_pages() from doing its job | ||
221 | * the put_page() at the end of this block will take care of | ||
222 | * release this page, thus avoiding a nasty leakage. | ||
223 | */ | ||
224 | if (likely(get_page_unless_zero(page))) { | ||
225 | /* | ||
226 | * As balloon pages are not isolated from LRU lists, concurrent | ||
227 | * compaction threads can race against page migration functions | ||
228 | * as well as race against the balloon driver releasing a page. | ||
229 | * | ||
230 | * In order to avoid having an already isolated balloon page | ||
231 | * being (wrongly) re-isolated while it is under migration, | ||
232 | * or to avoid attempting to isolate pages being released by | ||
233 | * the balloon driver, lets be sure we have the page lock | ||
234 | * before proceeding with the balloon page isolation steps. | ||
235 | */ | ||
236 | if (likely(trylock_page(page))) { | ||
237 | /* | ||
238 | * A ballooned page, by default, has just one refcount. | ||
239 | * Prevent concurrent compaction threads from isolating | ||
240 | * an already isolated balloon page by refcount check. | ||
241 | */ | ||
242 | if (__is_movable_balloon_page(page) && | ||
243 | page_count(page) == 2) { | ||
244 | __isolate_balloon_page(page); | ||
245 | unlock_page(page); | ||
246 | return true; | ||
247 | } | ||
248 | unlock_page(page); | ||
249 | } | ||
250 | put_page(page); | ||
251 | } | ||
252 | return false; | ||
253 | } | ||
254 | |||
255 | /* putback_lru_page() counterpart for a ballooned page */ | ||
256 | void balloon_page_putback(struct page *page) | ||
257 | { | ||
258 | /* | ||
259 | * 'lock_page()' stabilizes the page and prevents races against | ||
260 | * concurrent isolation threads attempting to re-isolate it. | ||
261 | */ | ||
262 | lock_page(page); | ||
263 | |||
264 | if (__is_movable_balloon_page(page)) { | ||
265 | __putback_balloon_page(page); | ||
266 | /* drop the extra ref count taken for page isolation */ | ||
267 | put_page(page); | ||
268 | } else { | ||
269 | WARN_ON(1); | ||
270 | dump_page(page); | ||
271 | } | ||
272 | unlock_page(page); | ||
273 | } | ||
274 | |||
275 | /* move_to_new_page() counterpart for a ballooned page */ | ||
276 | int balloon_page_migrate(struct page *newpage, | ||
277 | struct page *page, enum migrate_mode mode) | ||
278 | { | ||
279 | struct address_space *mapping; | ||
280 | int rc = -EAGAIN; | ||
281 | |||
282 | /* | ||
283 | * Block others from accessing the 'newpage' when we get around to | ||
284 | * establishing additional references. We should be the only one | ||
285 | * holding a reference to the 'newpage' at this point. | ||
286 | */ | ||
287 | BUG_ON(!trylock_page(newpage)); | ||
288 | |||
289 | if (WARN_ON(!__is_movable_balloon_page(page))) { | ||
290 | dump_page(page); | ||
291 | unlock_page(newpage); | ||
292 | return rc; | ||
293 | } | ||
294 | |||
295 | mapping = page->mapping; | ||
296 | if (mapping) | ||
297 | rc = __migrate_balloon_page(mapping, newpage, page, mode); | ||
298 | |||
299 | unlock_page(newpage); | ||
300 | return rc; | ||
301 | } | ||
302 | #endif /* CONFIG_BALLOON_COMPACTION */ | ||