aboutsummaryrefslogtreecommitdiffstats
path: root/mm/shmem.c
diff options
context:
space:
mode:
authorHugh Dickins <hughd@google.com>2011-08-03 19:21:22 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-08-03 20:25:23 -0400
commit7a5d0fbb29936fad7f17b1cb001b0c33a5f13328 (patch)
tree903c66ee195451f45c782155a8eb6dd6e7217522 /mm/shmem.c
parentbda97eab0cc9c6385b9f26abdda6459f630f4513 (diff)
tmpfs: convert shmem_truncate_range to radix-swap
Disable the toy swapping implementation in shmem_writepage() - it's hard to support two schemes at once - and convert shmem_truncate_range() to a lockless gang lookup of swap entries along with pages, freeing both. Since the second loop tightens its noose until all entries of either kind have been squeezed out (and we shall make sure that there's not an instant when neither is visible), there is no longer a need for yet another pass below. shmem_radix_tree_replace() compensates for the lockless lookup by checking that the expected entry is in place, under lock, before replacing it. Here it just deletes, but will be used in later patches to substitute swap entry for page or page for swap entry. Signed-off-by: Hugh Dickins <hughd@google.com> Acked-by: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/shmem.c')
-rw-r--r--mm/shmem.c192
1 files changed, 146 insertions, 46 deletions
diff --git a/mm/shmem.c b/mm/shmem.c
index e101c211ed1..4439b7d5581 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -238,6 +238,111 @@ static swp_entry_t shmem_get_swap(struct shmem_inode_info *info, pgoff_t index)
238 info->i_direct[index] : (swp_entry_t){0}; 238 info->i_direct[index] : (swp_entry_t){0};
239} 239}
240 240
241/*
242 * Replace item expected in radix tree by a new item, while holding tree lock.
243 */
244static int shmem_radix_tree_replace(struct address_space *mapping,
245 pgoff_t index, void *expected, void *replacement)
246{
247 void **pslot;
248 void *item = NULL;
249
250 VM_BUG_ON(!expected);
251 pslot = radix_tree_lookup_slot(&mapping->page_tree, index);
252 if (pslot)
253 item = radix_tree_deref_slot_protected(pslot,
254 &mapping->tree_lock);
255 if (item != expected)
256 return -ENOENT;
257 if (replacement)
258 radix_tree_replace_slot(pslot, replacement);
259 else
260 radix_tree_delete(&mapping->page_tree, index);
261 return 0;
262}
263
264/*
265 * Like find_get_pages, but collecting swap entries as well as pages.
266 */
267static unsigned shmem_find_get_pages_and_swap(struct address_space *mapping,
268 pgoff_t start, unsigned int nr_pages,
269 struct page **pages, pgoff_t *indices)
270{
271 unsigned int i;
272 unsigned int ret;
273 unsigned int nr_found;
274
275 rcu_read_lock();
276restart:
277 nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
278 (void ***)pages, indices, start, nr_pages);
279 ret = 0;
280 for (i = 0; i < nr_found; i++) {
281 struct page *page;
282repeat:
283 page = radix_tree_deref_slot((void **)pages[i]);
284 if (unlikely(!page))
285 continue;
286 if (radix_tree_exception(page)) {
287 if (radix_tree_exceptional_entry(page))
288 goto export;
289 /* radix_tree_deref_retry(page) */
290 goto restart;
291 }
292 if (!page_cache_get_speculative(page))
293 goto repeat;
294
295 /* Has the page moved? */
296 if (unlikely(page != *((void **)pages[i]))) {
297 page_cache_release(page);
298 goto repeat;
299 }
300export:
301 indices[ret] = indices[i];
302 pages[ret] = page;
303 ret++;
304 }
305 if (unlikely(!ret && nr_found))
306 goto restart;
307 rcu_read_unlock();
308 return ret;
309}
310
311/*
312 * Remove swap entry from radix tree, free the swap and its page cache.
313 */
314static int shmem_free_swap(struct address_space *mapping,
315 pgoff_t index, void *radswap)
316{
317 int error;
318
319 spin_lock_irq(&mapping->tree_lock);
320 error = shmem_radix_tree_replace(mapping, index, radswap, NULL);
321 spin_unlock_irq(&mapping->tree_lock);
322 if (!error)
323 free_swap_and_cache(radix_to_swp_entry(radswap));
324 return error;
325}
326
327/*
328 * Pagevec may contain swap entries, so shuffle up pages before releasing.
329 */
330static void shmem_pagevec_release(struct pagevec *pvec)
331{
332 int i, j;
333
334 for (i = 0, j = 0; i < pagevec_count(pvec); i++) {
335 struct page *page = pvec->pages[i];
336 if (!radix_tree_exceptional_entry(page))
337 pvec->pages[j++] = page;
338 }
339 pvec->nr = j;
340 pagevec_release(pvec);
341}
342
343/*
344 * Remove range of pages and swap entries from radix tree, and free them.
345 */
241void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) 346void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
242{ 347{
243 struct address_space *mapping = inode->i_mapping; 348 struct address_space *mapping = inode->i_mapping;
@@ -246,36 +351,44 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
246 unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); 351 unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
247 pgoff_t end = (lend >> PAGE_CACHE_SHIFT); 352 pgoff_t end = (lend >> PAGE_CACHE_SHIFT);
248 struct pagevec pvec; 353 struct pagevec pvec;
354 pgoff_t indices[PAGEVEC_SIZE];
355 long nr_swaps_freed = 0;
249 pgoff_t index; 356 pgoff_t index;
250 swp_entry_t swap;
251 int i; 357 int i;
252 358
253 BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1)); 359 BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1));
254 360
255 pagevec_init(&pvec, 0); 361 pagevec_init(&pvec, 0);
256 index = start; 362 index = start;
257 while (index <= end && pagevec_lookup(&pvec, mapping, index, 363 while (index <= end) {
258 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { 364 pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
365 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
366 pvec.pages, indices);
367 if (!pvec.nr)
368 break;
259 mem_cgroup_uncharge_start(); 369 mem_cgroup_uncharge_start();
260 for (i = 0; i < pagevec_count(&pvec); i++) { 370 for (i = 0; i < pagevec_count(&pvec); i++) {
261 struct page *page = pvec.pages[i]; 371 struct page *page = pvec.pages[i];
262 372
263 /* We rely upon deletion not changing page->index */ 373 index = indices[i];
264 index = page->index;
265 if (index > end) 374 if (index > end)
266 break; 375 break;
267 376
268 if (!trylock_page(page)) 377 if (radix_tree_exceptional_entry(page)) {
378 nr_swaps_freed += !shmem_free_swap(mapping,
379 index, page);
269 continue; 380 continue;
270 WARN_ON(page->index != index); 381 }
271 if (PageWriteback(page)) { 382
272 unlock_page(page); 383 if (!trylock_page(page))
273 continue; 384 continue;
385 if (page->mapping == mapping) {
386 VM_BUG_ON(PageWriteback(page));
387 truncate_inode_page(mapping, page);
274 } 388 }
275 truncate_inode_page(mapping, page);
276 unlock_page(page); 389 unlock_page(page);
277 } 390 }
278 pagevec_release(&pvec); 391 shmem_pagevec_release(&pvec);
279 mem_cgroup_uncharge_end(); 392 mem_cgroup_uncharge_end();
280 cond_resched(); 393 cond_resched();
281 index++; 394 index++;
@@ -295,59 +408,47 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
295 index = start; 408 index = start;
296 for ( ; ; ) { 409 for ( ; ; ) {
297 cond_resched(); 410 cond_resched();
298 if (!pagevec_lookup(&pvec, mapping, index, 411 pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
299 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { 412 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
413 pvec.pages, indices);
414 if (!pvec.nr) {
300 if (index == start) 415 if (index == start)
301 break; 416 break;
302 index = start; 417 index = start;
303 continue; 418 continue;
304 } 419 }
305 if (index == start && pvec.pages[0]->index > end) { 420 if (index == start && indices[0] > end) {
306 pagevec_release(&pvec); 421 shmem_pagevec_release(&pvec);
307 break; 422 break;
308 } 423 }
309 mem_cgroup_uncharge_start(); 424 mem_cgroup_uncharge_start();
310 for (i = 0; i < pagevec_count(&pvec); i++) { 425 for (i = 0; i < pagevec_count(&pvec); i++) {
311 struct page *page = pvec.pages[i]; 426 struct page *page = pvec.pages[i];
312 427
313 /* We rely upon deletion not changing page->index */ 428 index = indices[i];
314 index = page->index;
315 if (index > end) 429 if (index > end)
316 break; 430 break;
317 431
432 if (radix_tree_exceptional_entry(page)) {
433 nr_swaps_freed += !shmem_free_swap(mapping,
434 index, page);
435 continue;
436 }
437
318 lock_page(page); 438 lock_page(page);
319 WARN_ON(page->index != index); 439 if (page->mapping == mapping) {
320 wait_on_page_writeback(page); 440 VM_BUG_ON(PageWriteback(page));
321 truncate_inode_page(mapping, page); 441 truncate_inode_page(mapping, page);
442 }
322 unlock_page(page); 443 unlock_page(page);
323 } 444 }
324 pagevec_release(&pvec); 445 shmem_pagevec_release(&pvec);
325 mem_cgroup_uncharge_end(); 446 mem_cgroup_uncharge_end();
326 index++; 447 index++;
327 } 448 }
328 449
329 if (end > SHMEM_NR_DIRECT)
330 end = SHMEM_NR_DIRECT;
331
332 spin_lock(&info->lock); 450 spin_lock(&info->lock);
333 for (index = start; index < end; index++) { 451 info->swapped -= nr_swaps_freed;
334 swap = shmem_get_swap(info, index);
335 if (swap.val) {
336 free_swap_and_cache(swap);
337 shmem_put_swap(info, index, (swp_entry_t){0});
338 info->swapped--;
339 }
340 }
341
342 if (mapping->nrpages) {
343 spin_unlock(&info->lock);
344 /*
345 * A page may have meanwhile sneaked in from swap.
346 */
347 truncate_inode_pages_range(mapping, lstart, lend);
348 spin_lock(&info->lock);
349 }
350
351 shmem_recalc_inode(inode); 452 shmem_recalc_inode(inode);
352 spin_unlock(&info->lock); 453 spin_unlock(&info->lock);
353 454
@@ -552,11 +653,10 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
552 } 653 }
553 654
554 /* 655 /*
555 * Just for this patch, we have a toy implementation, 656 * Disable even the toy swapping implementation, while we convert
556 * which can swap out only the first SHMEM_NR_DIRECT pages: 657 * functions one by one to having swap entries in the radix tree.
557 * for simple demonstration of where we need to think about swap.
558 */ 658 */
559 if (index >= SHMEM_NR_DIRECT) 659 if (index < ULONG_MAX)
560 goto redirty; 660 goto redirty;
561 661
562 swap = get_swap_page(); 662 swap = get_swap_page();