aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/shmem.c192
1 files changed, 146 insertions, 46 deletions
diff --git a/mm/shmem.c b/mm/shmem.c
index e101c211ed1f..4439b7d55819 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -238,6 +238,111 @@ static swp_entry_t shmem_get_swap(struct shmem_inode_info *info, pgoff_t index)
238 info->i_direct[index] : (swp_entry_t){0}; 238 info->i_direct[index] : (swp_entry_t){0};
239} 239}
240 240
241/*
242 * Replace item expected in radix tree by a new item, while holding tree lock.
243 */
244static int shmem_radix_tree_replace(struct address_space *mapping,
245 pgoff_t index, void *expected, void *replacement)
246{
247 void **pslot;
248 void *item = NULL;
249
250 VM_BUG_ON(!expected);
251 pslot = radix_tree_lookup_slot(&mapping->page_tree, index);
252 if (pslot)
253 item = radix_tree_deref_slot_protected(pslot,
254 &mapping->tree_lock);
255 if (item != expected)
256 return -ENOENT;
257 if (replacement)
258 radix_tree_replace_slot(pslot, replacement);
259 else
260 radix_tree_delete(&mapping->page_tree, index);
261 return 0;
262}
263
264/*
265 * Like find_get_pages, but collecting swap entries as well as pages.
266 */
267static unsigned shmem_find_get_pages_and_swap(struct address_space *mapping,
268 pgoff_t start, unsigned int nr_pages,
269 struct page **pages, pgoff_t *indices)
270{
271 unsigned int i;
272 unsigned int ret;
273 unsigned int nr_found;
274
275 rcu_read_lock();
276restart:
277 nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
278 (void ***)pages, indices, start, nr_pages);
279 ret = 0;
280 for (i = 0; i < nr_found; i++) {
281 struct page *page;
282repeat:
283 page = radix_tree_deref_slot((void **)pages[i]);
284 if (unlikely(!page))
285 continue;
286 if (radix_tree_exception(page)) {
287 if (radix_tree_exceptional_entry(page))
288 goto export;
289 /* radix_tree_deref_retry(page) */
290 goto restart;
291 }
292 if (!page_cache_get_speculative(page))
293 goto repeat;
294
295 /* Has the page moved? */
296 if (unlikely(page != *((void **)pages[i]))) {
297 page_cache_release(page);
298 goto repeat;
299 }
300export:
301 indices[ret] = indices[i];
302 pages[ret] = page;
303 ret++;
304 }
305 if (unlikely(!ret && nr_found))
306 goto restart;
307 rcu_read_unlock();
308 return ret;
309}
310
311/*
312 * Remove swap entry from radix tree, free the swap and its page cache.
313 */
314static int shmem_free_swap(struct address_space *mapping,
315 pgoff_t index, void *radswap)
316{
317 int error;
318
319 spin_lock_irq(&mapping->tree_lock);
320 error = shmem_radix_tree_replace(mapping, index, radswap, NULL);
321 spin_unlock_irq(&mapping->tree_lock);
322 if (!error)
323 free_swap_and_cache(radix_to_swp_entry(radswap));
324 return error;
325}
326
327/*
328 * Pagevec may contain swap entries, so shuffle up pages before releasing.
329 */
330static void shmem_pagevec_release(struct pagevec *pvec)
331{
332 int i, j;
333
334 for (i = 0, j = 0; i < pagevec_count(pvec); i++) {
335 struct page *page = pvec->pages[i];
336 if (!radix_tree_exceptional_entry(page))
337 pvec->pages[j++] = page;
338 }
339 pvec->nr = j;
340 pagevec_release(pvec);
341}
342
343/*
344 * Remove range of pages and swap entries from radix tree, and free them.
345 */
241void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) 346void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
242{ 347{
243 struct address_space *mapping = inode->i_mapping; 348 struct address_space *mapping = inode->i_mapping;
@@ -246,36 +351,44 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
246 unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); 351 unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
247 pgoff_t end = (lend >> PAGE_CACHE_SHIFT); 352 pgoff_t end = (lend >> PAGE_CACHE_SHIFT);
248 struct pagevec pvec; 353 struct pagevec pvec;
354 pgoff_t indices[PAGEVEC_SIZE];
355 long nr_swaps_freed = 0;
249 pgoff_t index; 356 pgoff_t index;
250 swp_entry_t swap;
251 int i; 357 int i;
252 358
253 BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1)); 359 BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1));
254 360
255 pagevec_init(&pvec, 0); 361 pagevec_init(&pvec, 0);
256 index = start; 362 index = start;
257 while (index <= end && pagevec_lookup(&pvec, mapping, index, 363 while (index <= end) {
258 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { 364 pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
365 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
366 pvec.pages, indices);
367 if (!pvec.nr)
368 break;
259 mem_cgroup_uncharge_start(); 369 mem_cgroup_uncharge_start();
260 for (i = 0; i < pagevec_count(&pvec); i++) { 370 for (i = 0; i < pagevec_count(&pvec); i++) {
261 struct page *page = pvec.pages[i]; 371 struct page *page = pvec.pages[i];
262 372
263 /* We rely upon deletion not changing page->index */ 373 index = indices[i];
264 index = page->index;
265 if (index > end) 374 if (index > end)
266 break; 375 break;
267 376
268 if (!trylock_page(page)) 377 if (radix_tree_exceptional_entry(page)) {
378 nr_swaps_freed += !shmem_free_swap(mapping,
379 index, page);
269 continue; 380 continue;
270 WARN_ON(page->index != index); 381 }
271 if (PageWriteback(page)) { 382
272 unlock_page(page); 383 if (!trylock_page(page))
273 continue; 384 continue;
385 if (page->mapping == mapping) {
386 VM_BUG_ON(PageWriteback(page));
387 truncate_inode_page(mapping, page);
274 } 388 }
275 truncate_inode_page(mapping, page);
276 unlock_page(page); 389 unlock_page(page);
277 } 390 }
278 pagevec_release(&pvec); 391 shmem_pagevec_release(&pvec);
279 mem_cgroup_uncharge_end(); 392 mem_cgroup_uncharge_end();
280 cond_resched(); 393 cond_resched();
281 index++; 394 index++;
@@ -295,59 +408,47 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
295 index = start; 408 index = start;
296 for ( ; ; ) { 409 for ( ; ; ) {
297 cond_resched(); 410 cond_resched();
298 if (!pagevec_lookup(&pvec, mapping, index, 411 pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
299 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { 412 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
413 pvec.pages, indices);
414 if (!pvec.nr) {
300 if (index == start) 415 if (index == start)
301 break; 416 break;
302 index = start; 417 index = start;
303 continue; 418 continue;
304 } 419 }
305 if (index == start && pvec.pages[0]->index > end) { 420 if (index == start && indices[0] > end) {
306 pagevec_release(&pvec); 421 shmem_pagevec_release(&pvec);
307 break; 422 break;
308 } 423 }
309 mem_cgroup_uncharge_start(); 424 mem_cgroup_uncharge_start();
310 for (i = 0; i < pagevec_count(&pvec); i++) { 425 for (i = 0; i < pagevec_count(&pvec); i++) {
311 struct page *page = pvec.pages[i]; 426 struct page *page = pvec.pages[i];
312 427
313 /* We rely upon deletion not changing page->index */ 428 index = indices[i];
314 index = page->index;
315 if (index > end) 429 if (index > end)
316 break; 430 break;
317 431
432 if (radix_tree_exceptional_entry(page)) {
433 nr_swaps_freed += !shmem_free_swap(mapping,
434 index, page);
435 continue;
436 }
437
318 lock_page(page); 438 lock_page(page);
319 WARN_ON(page->index != index); 439 if (page->mapping == mapping) {
320 wait_on_page_writeback(page); 440 VM_BUG_ON(PageWriteback(page));
321 truncate_inode_page(mapping, page); 441 truncate_inode_page(mapping, page);
442 }
322 unlock_page(page); 443 unlock_page(page);
323 } 444 }
324 pagevec_release(&pvec); 445 shmem_pagevec_release(&pvec);
325 mem_cgroup_uncharge_end(); 446 mem_cgroup_uncharge_end();
326 index++; 447 index++;
327 } 448 }
328 449
329 if (end > SHMEM_NR_DIRECT)
330 end = SHMEM_NR_DIRECT;
331
332 spin_lock(&info->lock); 450 spin_lock(&info->lock);
333 for (index = start; index < end; index++) { 451 info->swapped -= nr_swaps_freed;
334 swap = shmem_get_swap(info, index);
335 if (swap.val) {
336 free_swap_and_cache(swap);
337 shmem_put_swap(info, index, (swp_entry_t){0});
338 info->swapped--;
339 }
340 }
341
342 if (mapping->nrpages) {
343 spin_unlock(&info->lock);
344 /*
345 * A page may have meanwhile sneaked in from swap.
346 */
347 truncate_inode_pages_range(mapping, lstart, lend);
348 spin_lock(&info->lock);
349 }
350
351 shmem_recalc_inode(inode); 452 shmem_recalc_inode(inode);
352 spin_unlock(&info->lock); 453 spin_unlock(&info->lock);
353 454
@@ -552,11 +653,10 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
552 } 653 }
553 654
554 /* 655 /*
555 * Just for this patch, we have a toy implementation, 656 * Disable even the toy swapping implementation, while we convert
556 * which can swap out only the first SHMEM_NR_DIRECT pages: 657 * functions one by one to having swap entries in the radix tree.
557 * for simple demonstration of where we need to think about swap.
558 */ 658 */
559 if (index >= SHMEM_NR_DIRECT) 659 if (index < ULONG_MAX)
560 goto redirty; 660 goto redirty;
561 661
562 swap = get_swap_page(); 662 swap = get_swap_page();