diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/shmem.c | 192 |
1 files changed, 146 insertions, 46 deletions
diff --git a/mm/shmem.c b/mm/shmem.c index e101c211ed1f..4439b7d55819 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -238,6 +238,111 @@ static swp_entry_t shmem_get_swap(struct shmem_inode_info *info, pgoff_t index) | |||
238 | info->i_direct[index] : (swp_entry_t){0}; | 238 | info->i_direct[index] : (swp_entry_t){0}; |
239 | } | 239 | } |
240 | 240 | ||
241 | /* | ||
242 | * Replace item expected in radix tree by a new item, while holding tree lock. | ||
243 | */ | ||
244 | static int shmem_radix_tree_replace(struct address_space *mapping, | ||
245 | pgoff_t index, void *expected, void *replacement) | ||
246 | { | ||
247 | void **pslot; | ||
248 | void *item = NULL; | ||
249 | |||
250 | VM_BUG_ON(!expected); | ||
251 | pslot = radix_tree_lookup_slot(&mapping->page_tree, index); | ||
252 | if (pslot) | ||
253 | item = radix_tree_deref_slot_protected(pslot, | ||
254 | &mapping->tree_lock); | ||
255 | if (item != expected) | ||
256 | return -ENOENT; | ||
257 | if (replacement) | ||
258 | radix_tree_replace_slot(pslot, replacement); | ||
259 | else | ||
260 | radix_tree_delete(&mapping->page_tree, index); | ||
261 | return 0; | ||
262 | } | ||
263 | |||
264 | /* | ||
265 | * Like find_get_pages, but collecting swap entries as well as pages. | ||
266 | */ | ||
267 | static unsigned shmem_find_get_pages_and_swap(struct address_space *mapping, | ||
268 | pgoff_t start, unsigned int nr_pages, | ||
269 | struct page **pages, pgoff_t *indices) | ||
270 | { | ||
271 | unsigned int i; | ||
272 | unsigned int ret; | ||
273 | unsigned int nr_found; | ||
274 | |||
275 | rcu_read_lock(); | ||
276 | restart: | ||
277 | nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, | ||
278 | (void ***)pages, indices, start, nr_pages); | ||
279 | ret = 0; | ||
280 | for (i = 0; i < nr_found; i++) { | ||
281 | struct page *page; | ||
282 | repeat: | ||
283 | page = radix_tree_deref_slot((void **)pages[i]); | ||
284 | if (unlikely(!page)) | ||
285 | continue; | ||
286 | if (radix_tree_exception(page)) { | ||
287 | if (radix_tree_exceptional_entry(page)) | ||
288 | goto export; | ||
289 | /* radix_tree_deref_retry(page) */ | ||
290 | goto restart; | ||
291 | } | ||
292 | if (!page_cache_get_speculative(page)) | ||
293 | goto repeat; | ||
294 | |||
295 | /* Has the page moved? */ | ||
296 | if (unlikely(page != *((void **)pages[i]))) { | ||
297 | page_cache_release(page); | ||
298 | goto repeat; | ||
299 | } | ||
300 | export: | ||
301 | indices[ret] = indices[i]; | ||
302 | pages[ret] = page; | ||
303 | ret++; | ||
304 | } | ||
305 | if (unlikely(!ret && nr_found)) | ||
306 | goto restart; | ||
307 | rcu_read_unlock(); | ||
308 | return ret; | ||
309 | } | ||
310 | |||
311 | /* | ||
312 | * Remove swap entry from radix tree, free the swap and its page cache. | ||
313 | */ | ||
314 | static int shmem_free_swap(struct address_space *mapping, | ||
315 | pgoff_t index, void *radswap) | ||
316 | { | ||
317 | int error; | ||
318 | |||
319 | spin_lock_irq(&mapping->tree_lock); | ||
320 | error = shmem_radix_tree_replace(mapping, index, radswap, NULL); | ||
321 | spin_unlock_irq(&mapping->tree_lock); | ||
322 | if (!error) | ||
323 | free_swap_and_cache(radix_to_swp_entry(radswap)); | ||
324 | return error; | ||
325 | } | ||
326 | |||
327 | /* | ||
328 | * Pagevec may contain swap entries, so shuffle up pages before releasing. | ||
329 | */ | ||
330 | static void shmem_pagevec_release(struct pagevec *pvec) | ||
331 | { | ||
332 | int i, j; | ||
333 | |||
334 | for (i = 0, j = 0; i < pagevec_count(pvec); i++) { | ||
335 | struct page *page = pvec->pages[i]; | ||
336 | if (!radix_tree_exceptional_entry(page)) | ||
337 | pvec->pages[j++] = page; | ||
338 | } | ||
339 | pvec->nr = j; | ||
340 | pagevec_release(pvec); | ||
341 | } | ||
342 | |||
343 | /* | ||
344 | * Remove range of pages and swap entries from radix tree, and free them. | ||
345 | */ | ||
241 | void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) | 346 | void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) |
242 | { | 347 | { |
243 | struct address_space *mapping = inode->i_mapping; | 348 | struct address_space *mapping = inode->i_mapping; |
@@ -246,36 +351,44 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) | |||
246 | unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); | 351 | unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); |
247 | pgoff_t end = (lend >> PAGE_CACHE_SHIFT); | 352 | pgoff_t end = (lend >> PAGE_CACHE_SHIFT); |
248 | struct pagevec pvec; | 353 | struct pagevec pvec; |
354 | pgoff_t indices[PAGEVEC_SIZE]; | ||
355 | long nr_swaps_freed = 0; | ||
249 | pgoff_t index; | 356 | pgoff_t index; |
250 | swp_entry_t swap; | ||
251 | int i; | 357 | int i; |
252 | 358 | ||
253 | BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1)); | 359 | BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1)); |
254 | 360 | ||
255 | pagevec_init(&pvec, 0); | 361 | pagevec_init(&pvec, 0); |
256 | index = start; | 362 | index = start; |
257 | while (index <= end && pagevec_lookup(&pvec, mapping, index, | 363 | while (index <= end) { |
258 | min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { | 364 | pvec.nr = shmem_find_get_pages_and_swap(mapping, index, |
365 | min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, | ||
366 | pvec.pages, indices); | ||
367 | if (!pvec.nr) | ||
368 | break; | ||
259 | mem_cgroup_uncharge_start(); | 369 | mem_cgroup_uncharge_start(); |
260 | for (i = 0; i < pagevec_count(&pvec); i++) { | 370 | for (i = 0; i < pagevec_count(&pvec); i++) { |
261 | struct page *page = pvec.pages[i]; | 371 | struct page *page = pvec.pages[i]; |
262 | 372 | ||
263 | /* We rely upon deletion not changing page->index */ | 373 | index = indices[i]; |
264 | index = page->index; | ||
265 | if (index > end) | 374 | if (index > end) |
266 | break; | 375 | break; |
267 | 376 | ||
268 | if (!trylock_page(page)) | 377 | if (radix_tree_exceptional_entry(page)) { |
378 | nr_swaps_freed += !shmem_free_swap(mapping, | ||
379 | index, page); | ||
269 | continue; | 380 | continue; |
270 | WARN_ON(page->index != index); | 381 | } |
271 | if (PageWriteback(page)) { | 382 | |
272 | unlock_page(page); | 383 | if (!trylock_page(page)) |
273 | continue; | 384 | continue; |
385 | if (page->mapping == mapping) { | ||
386 | VM_BUG_ON(PageWriteback(page)); | ||
387 | truncate_inode_page(mapping, page); | ||
274 | } | 388 | } |
275 | truncate_inode_page(mapping, page); | ||
276 | unlock_page(page); | 389 | unlock_page(page); |
277 | } | 390 | } |
278 | pagevec_release(&pvec); | 391 | shmem_pagevec_release(&pvec); |
279 | mem_cgroup_uncharge_end(); | 392 | mem_cgroup_uncharge_end(); |
280 | cond_resched(); | 393 | cond_resched(); |
281 | index++; | 394 | index++; |
@@ -295,59 +408,47 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) | |||
295 | index = start; | 408 | index = start; |
296 | for ( ; ; ) { | 409 | for ( ; ; ) { |
297 | cond_resched(); | 410 | cond_resched(); |
298 | if (!pagevec_lookup(&pvec, mapping, index, | 411 | pvec.nr = shmem_find_get_pages_and_swap(mapping, index, |
299 | min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { | 412 | min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, |
413 | pvec.pages, indices); | ||
414 | if (!pvec.nr) { | ||
300 | if (index == start) | 415 | if (index == start) |
301 | break; | 416 | break; |
302 | index = start; | 417 | index = start; |
303 | continue; | 418 | continue; |
304 | } | 419 | } |
305 | if (index == start && pvec.pages[0]->index > end) { | 420 | if (index == start && indices[0] > end) { |
306 | pagevec_release(&pvec); | 421 | shmem_pagevec_release(&pvec); |
307 | break; | 422 | break; |
308 | } | 423 | } |
309 | mem_cgroup_uncharge_start(); | 424 | mem_cgroup_uncharge_start(); |
310 | for (i = 0; i < pagevec_count(&pvec); i++) { | 425 | for (i = 0; i < pagevec_count(&pvec); i++) { |
311 | struct page *page = pvec.pages[i]; | 426 | struct page *page = pvec.pages[i]; |
312 | 427 | ||
313 | /* We rely upon deletion not changing page->index */ | 428 | index = indices[i]; |
314 | index = page->index; | ||
315 | if (index > end) | 429 | if (index > end) |
316 | break; | 430 | break; |
317 | 431 | ||
432 | if (radix_tree_exceptional_entry(page)) { | ||
433 | nr_swaps_freed += !shmem_free_swap(mapping, | ||
434 | index, page); | ||
435 | continue; | ||
436 | } | ||
437 | |||
318 | lock_page(page); | 438 | lock_page(page); |
319 | WARN_ON(page->index != index); | 439 | if (page->mapping == mapping) { |
320 | wait_on_page_writeback(page); | 440 | VM_BUG_ON(PageWriteback(page)); |
321 | truncate_inode_page(mapping, page); | 441 | truncate_inode_page(mapping, page); |
442 | } | ||
322 | unlock_page(page); | 443 | unlock_page(page); |
323 | } | 444 | } |
324 | pagevec_release(&pvec); | 445 | shmem_pagevec_release(&pvec); |
325 | mem_cgroup_uncharge_end(); | 446 | mem_cgroup_uncharge_end(); |
326 | index++; | 447 | index++; |
327 | } | 448 | } |
328 | 449 | ||
329 | if (end > SHMEM_NR_DIRECT) | ||
330 | end = SHMEM_NR_DIRECT; | ||
331 | |||
332 | spin_lock(&info->lock); | 450 | spin_lock(&info->lock); |
333 | for (index = start; index < end; index++) { | 451 | info->swapped -= nr_swaps_freed; |
334 | swap = shmem_get_swap(info, index); | ||
335 | if (swap.val) { | ||
336 | free_swap_and_cache(swap); | ||
337 | shmem_put_swap(info, index, (swp_entry_t){0}); | ||
338 | info->swapped--; | ||
339 | } | ||
340 | } | ||
341 | |||
342 | if (mapping->nrpages) { | ||
343 | spin_unlock(&info->lock); | ||
344 | /* | ||
345 | * A page may have meanwhile sneaked in from swap. | ||
346 | */ | ||
347 | truncate_inode_pages_range(mapping, lstart, lend); | ||
348 | spin_lock(&info->lock); | ||
349 | } | ||
350 | |||
351 | shmem_recalc_inode(inode); | 452 | shmem_recalc_inode(inode); |
352 | spin_unlock(&info->lock); | 453 | spin_unlock(&info->lock); |
353 | 454 | ||
@@ -552,11 +653,10 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) | |||
552 | } | 653 | } |
553 | 654 | ||
554 | /* | 655 | /* |
555 | * Just for this patch, we have a toy implementation, | 656 | * Disable even the toy swapping implementation, while we convert |
556 | * which can swap out only the first SHMEM_NR_DIRECT pages: | 657 | * functions one by one to having swap entries in the radix tree. |
557 | * for simple demonstration of where we need to think about swap. | ||
558 | */ | 658 | */ |
559 | if (index >= SHMEM_NR_DIRECT) | 659 | if (index < ULONG_MAX) |
560 | goto redirty; | 660 | goto redirty; |
561 | 661 | ||
562 | swap = get_swap_page(); | 662 | swap = get_swap_page(); |