diff options
Diffstat (limited to 'mm/swap.c')
-rw-r--r-- | mm/swap.c | 278 |
1 files changed, 155 insertions, 123 deletions
@@ -31,7 +31,6 @@ | |||
31 | #include <linux/memcontrol.h> | 31 | #include <linux/memcontrol.h> |
32 | #include <linux/gfp.h> | 32 | #include <linux/gfp.h> |
33 | #include <linux/uio.h> | 33 | #include <linux/uio.h> |
34 | #include <linux/hugetlb.h> | ||
35 | 34 | ||
36 | #include "internal.h" | 35 | #include "internal.h" |
37 | 36 | ||
@@ -82,118 +81,150 @@ static void __put_compound_page(struct page *page) | |||
82 | 81 | ||
83 | static void put_compound_page(struct page *page) | 82 | static void put_compound_page(struct page *page) |
84 | { | 83 | { |
85 | if (unlikely(PageTail(page))) { | 84 | struct page *page_head; |
86 | /* __split_huge_page_refcount can run under us */ | ||
87 | struct page *page_head = compound_trans_head(page); | ||
88 | |||
89 | if (likely(page != page_head && | ||
90 | get_page_unless_zero(page_head))) { | ||
91 | unsigned long flags; | ||
92 | 85 | ||
86 | if (likely(!PageTail(page))) { | ||
87 | if (put_page_testzero(page)) { | ||
93 | /* | 88 | /* |
94 | * THP can not break up slab pages so avoid taking | 89 | * By the time all refcounts have been released |
95 | * compound_lock(). Slab performs non-atomic bit ops | 90 | * split_huge_page cannot run anymore from under us. |
96 | * on page->flags for better performance. In particular | ||
97 | * slab_unlock() in slub used to be a hot path. It is | ||
98 | * still hot on arches that do not support | ||
99 | * this_cpu_cmpxchg_double(). | ||
100 | */ | 91 | */ |
101 | if (PageSlab(page_head) || PageHeadHuge(page_head)) { | 92 | if (PageHead(page)) |
102 | if (likely(PageTail(page))) { | 93 | __put_compound_page(page); |
103 | /* | 94 | else |
104 | * __split_huge_page_refcount | 95 | __put_single_page(page); |
105 | * cannot race here. | 96 | } |
106 | */ | 97 | return; |
107 | VM_BUG_ON(!PageHead(page_head)); | 98 | } |
108 | atomic_dec(&page->_mapcount); | 99 | |
109 | if (put_page_testzero(page_head)) | 100 | /* __split_huge_page_refcount can run under us */ |
110 | VM_BUG_ON(1); | 101 | page_head = compound_trans_head(page); |
111 | if (put_page_testzero(page_head)) | 102 | |
112 | __put_compound_page(page_head); | 103 | /* |
113 | return; | 104 | * THP can not break up slab pages so avoid taking |
114 | } else | 105 | * compound_lock() and skip the tail page refcounting (in |
115 | /* | 106 | * _mapcount) too. Slab performs non-atomic bit ops on |
116 | * __split_huge_page_refcount | 107 | * page->flags for better performance. In particular |
117 | * run before us, "page" was a | 108 | * slab_unlock() in slub used to be a hot path. It is still |
118 | * THP tail. The split | 109 | * hot on arches that do not support |
119 | * page_head has been freed | 110 | * this_cpu_cmpxchg_double(). |
120 | * and reallocated as slab or | 111 | * |
121 | * hugetlbfs page of smaller | 112 | * If "page" is part of a slab or hugetlbfs page it cannot be |
122 | * order (only possible if | 113 | * splitted and the head page cannot change from under us. And |
123 | * reallocated as slab on | 114 | * if "page" is part of a THP page under splitting, if the |
124 | * x86). | 115 | * head page pointed by the THP tail isn't a THP head anymore, |
125 | */ | 116 | * we'll find PageTail clear after smp_rmb() and we'll treat |
126 | goto skip_lock; | 117 | * it as a single page. |
127 | } | 118 | */ |
119 | if (!__compound_tail_refcounted(page_head)) { | ||
120 | /* | ||
121 | * If "page" is a THP tail, we must read the tail page | ||
122 | * flags after the head page flags. The | ||
123 | * split_huge_page side enforces write memory barriers | ||
124 | * between clearing PageTail and before the head page | ||
125 | * can be freed and reallocated. | ||
126 | */ | ||
127 | smp_rmb(); | ||
128 | if (likely(PageTail(page))) { | ||
128 | /* | 129 | /* |
129 | * page_head wasn't a dangling pointer but it | 130 | * __split_huge_page_refcount cannot race |
130 | * may not be a head page anymore by the time | 131 | * here. |
131 | * we obtain the lock. That is ok as long as it | ||
132 | * can't be freed from under us. | ||
133 | */ | 132 | */ |
134 | flags = compound_lock_irqsave(page_head); | 133 | VM_BUG_ON(!PageHead(page_head)); |
135 | if (unlikely(!PageTail(page))) { | 134 | VM_BUG_ON(page_mapcount(page) != 0); |
136 | /* __split_huge_page_refcount run before us */ | 135 | if (put_page_testzero(page_head)) { |
137 | compound_unlock_irqrestore(page_head, flags); | 136 | /* |
138 | skip_lock: | 137 | * If this is the tail of a slab |
139 | if (put_page_testzero(page_head)) { | 138 | * compound page, the tail pin must |
140 | /* | 139 | * not be the last reference held on |
141 | * The head page may have been | 140 | * the page, because the PG_slab |
142 | * freed and reallocated as a | 141 | * cannot be cleared before all tail |
143 | * compound page of smaller | 142 | * pins (which skips the _mapcount |
144 | * order and then freed again. | 143 | * tail refcounting) have been |
145 | * All we know is that it | 144 | * released. For hugetlbfs the tail |
146 | * cannot have become: a THP | 145 | * pin may be the last reference on |
147 | * page, a compound page of | 146 | * the page instead, because |
148 | * higher order, a tail page. | 147 | * PageHeadHuge will not go away until |
149 | * That is because we still | 148 | * the compound page enters the buddy |
150 | * hold the refcount of the | 149 | * allocator. |
151 | * split THP tail and | 150 | */ |
152 | * page_head was the THP head | 151 | VM_BUG_ON(PageSlab(page_head)); |
153 | * before the split. | 152 | __put_compound_page(page_head); |
154 | */ | ||
155 | if (PageHead(page_head)) | ||
156 | __put_compound_page(page_head); | ||
157 | else | ||
158 | __put_single_page(page_head); | ||
159 | } | ||
160 | out_put_single: | ||
161 | if (put_page_testzero(page)) | ||
162 | __put_single_page(page); | ||
163 | return; | ||
164 | } | 153 | } |
165 | VM_BUG_ON(page_head != page->first_page); | 154 | return; |
155 | } else | ||
166 | /* | 156 | /* |
167 | * We can release the refcount taken by | 157 | * __split_huge_page_refcount run before us, |
168 | * get_page_unless_zero() now that | 158 | * "page" was a THP tail. The split page_head |
169 | * __split_huge_page_refcount() is blocked on | 159 | * has been freed and reallocated as slab or |
170 | * the compound_lock. | 160 | * hugetlbfs page of smaller order (only |
161 | * possible if reallocated as slab on x86). | ||
171 | */ | 162 | */ |
172 | if (put_page_testzero(page_head)) | 163 | goto out_put_single; |
173 | VM_BUG_ON(1); | 164 | } |
174 | /* __split_huge_page_refcount will wait now */ | ||
175 | VM_BUG_ON(page_mapcount(page) <= 0); | ||
176 | atomic_dec(&page->_mapcount); | ||
177 | VM_BUG_ON(atomic_read(&page_head->_count) <= 0); | ||
178 | VM_BUG_ON(atomic_read(&page->_count) != 0); | ||
179 | compound_unlock_irqrestore(page_head, flags); | ||
180 | 165 | ||
166 | if (likely(page != page_head && get_page_unless_zero(page_head))) { | ||
167 | unsigned long flags; | ||
168 | |||
169 | /* | ||
170 | * page_head wasn't a dangling pointer but it may not | ||
171 | * be a head page anymore by the time we obtain the | ||
172 | * lock. That is ok as long as it can't be freed from | ||
173 | * under us. | ||
174 | */ | ||
175 | flags = compound_lock_irqsave(page_head); | ||
176 | if (unlikely(!PageTail(page))) { | ||
177 | /* __split_huge_page_refcount run before us */ | ||
178 | compound_unlock_irqrestore(page_head, flags); | ||
181 | if (put_page_testzero(page_head)) { | 179 | if (put_page_testzero(page_head)) { |
180 | /* | ||
181 | * The head page may have been freed | ||
182 | * and reallocated as a compound page | ||
183 | * of smaller order and then freed | ||
184 | * again. All we know is that it | ||
185 | * cannot have become: a THP page, a | ||
186 | * compound page of higher order, a | ||
187 | * tail page. That is because we | ||
188 | * still hold the refcount of the | ||
189 | * split THP tail and page_head was | ||
190 | * the THP head before the split. | ||
191 | */ | ||
182 | if (PageHead(page_head)) | 192 | if (PageHead(page_head)) |
183 | __put_compound_page(page_head); | 193 | __put_compound_page(page_head); |
184 | else | 194 | else |
185 | __put_single_page(page_head); | 195 | __put_single_page(page_head); |
186 | } | 196 | } |
187 | } else { | 197 | out_put_single: |
188 | /* page_head is a dangling pointer */ | 198 | if (put_page_testzero(page)) |
189 | VM_BUG_ON(PageTail(page)); | 199 | __put_single_page(page); |
190 | goto out_put_single; | 200 | return; |
191 | } | 201 | } |
192 | } else if (put_page_testzero(page)) { | 202 | VM_BUG_ON(page_head != page->first_page); |
193 | if (PageHead(page)) | 203 | /* |
194 | __put_compound_page(page); | 204 | * We can release the refcount taken by |
195 | else | 205 | * get_page_unless_zero() now that |
196 | __put_single_page(page); | 206 | * __split_huge_page_refcount() is blocked on the |
207 | * compound_lock. | ||
208 | */ | ||
209 | if (put_page_testzero(page_head)) | ||
210 | VM_BUG_ON(1); | ||
211 | /* __split_huge_page_refcount will wait now */ | ||
212 | VM_BUG_ON(page_mapcount(page) <= 0); | ||
213 | atomic_dec(&page->_mapcount); | ||
214 | VM_BUG_ON(atomic_read(&page_head->_count) <= 0); | ||
215 | VM_BUG_ON(atomic_read(&page->_count) != 0); | ||
216 | compound_unlock_irqrestore(page_head, flags); | ||
217 | |||
218 | if (put_page_testzero(page_head)) { | ||
219 | if (PageHead(page_head)) | ||
220 | __put_compound_page(page_head); | ||
221 | else | ||
222 | __put_single_page(page_head); | ||
223 | } | ||
224 | } else { | ||
225 | /* page_head is a dangling pointer */ | ||
226 | VM_BUG_ON(PageTail(page)); | ||
227 | goto out_put_single; | ||
197 | } | 228 | } |
198 | } | 229 | } |
199 | 230 | ||
@@ -221,36 +252,37 @@ bool __get_page_tail(struct page *page) | |||
221 | * split_huge_page(). | 252 | * split_huge_page(). |
222 | */ | 253 | */ |
223 | unsigned long flags; | 254 | unsigned long flags; |
224 | bool got = false; | 255 | bool got; |
225 | struct page *page_head = compound_trans_head(page); | 256 | struct page *page_head = compound_trans_head(page); |
226 | 257 | ||
227 | if (likely(page != page_head && get_page_unless_zero(page_head))) { | 258 | /* Ref to put_compound_page() comment. */ |
228 | /* Ref to put_compound_page() comment. */ | 259 | if (!__compound_tail_refcounted(page_head)) { |
229 | if (PageSlab(page_head) || PageHeadHuge(page_head)) { | 260 | smp_rmb(); |
230 | if (likely(PageTail(page))) { | 261 | if (likely(PageTail(page))) { |
231 | /* | 262 | /* |
232 | * This is a hugetlbfs page or a slab | 263 | * This is a hugetlbfs page or a slab |
233 | * page. __split_huge_page_refcount | 264 | * page. __split_huge_page_refcount |
234 | * cannot race here. | 265 | * cannot race here. |
235 | */ | 266 | */ |
236 | VM_BUG_ON(!PageHead(page_head)); | 267 | VM_BUG_ON(!PageHead(page_head)); |
237 | __get_page_tail_foll(page, false); | 268 | __get_page_tail_foll(page, true); |
238 | return true; | 269 | return true; |
239 | } else { | 270 | } else { |
240 | /* | 271 | /* |
241 | * __split_huge_page_refcount run | 272 | * __split_huge_page_refcount run |
242 | * before us, "page" was a THP | 273 | * before us, "page" was a THP |
243 | * tail. The split page_head has been | 274 | * tail. The split page_head has been |
244 | * freed and reallocated as slab or | 275 | * freed and reallocated as slab or |
245 | * hugetlbfs page of smaller order | 276 | * hugetlbfs page of smaller order |
246 | * (only possible if reallocated as | 277 | * (only possible if reallocated as |
247 | * slab on x86). | 278 | * slab on x86). |
248 | */ | 279 | */ |
249 | put_page(page_head); | 280 | return false; |
250 | return false; | ||
251 | } | ||
252 | } | 281 | } |
282 | } | ||
253 | 283 | ||
284 | got = false; | ||
285 | if (likely(page != page_head && get_page_unless_zero(page_head))) { | ||
254 | /* | 286 | /* |
255 | * page_head wasn't a dangling pointer but it | 287 | * page_head wasn't a dangling pointer but it |
256 | * may not be a head page anymore by the time | 288 | * may not be a head page anymore by the time |