aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hughd@google.com>2011-07-25 20:12:25 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-07-25 23:57:10 -0400
commitb85e0effd3dcbf9118b896232f59526ab1a39a74 (patch)
tree32200b2e4052d50d4eb1771e555eaf66d7c4cfc8
parent8a549bea51138be2126a2cc6aabe8f17ef66b79b (diff)
mm: consistent truncate and invalidate loops
Make the pagevec_lookup loops in truncate_inode_pages_range(), invalidate_mapping_pages() and invalidate_inode_pages2_range() more consistent with each other. They were relying upon page->index of an unlocked page, but apologizing for it: accept it, embrace it, add comments and WARN_ONs, and simplify the index handling. invalidate_inode_pages2_range() had special handling for a wrapped page->index + 1 = 0 case; but MAX_LFS_FILESIZE doesn't let us anywhere near there, and a corrupt page->index in the radix_tree could cause more trouble than that would catch. Remove that wrapped handling. invalidate_inode_pages2_range() uses min() to limit the pagevec_lookup when near the end of the range: copy that into the other two, although it's less useful than you might think (it limits the use of the buffer, rather than the indices looked up). Signed-off-by: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/filemap.c2
-rw-r--r--mm/truncate.c110
2 files changed, 49 insertions, 63 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 2780be4bd493..10a171113273 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -128,6 +128,7 @@ void __delete_from_page_cache(struct page *page)
128 128
129 radix_tree_delete(&mapping->page_tree, page->index); 129 radix_tree_delete(&mapping->page_tree, page->index);
130 page->mapping = NULL; 130 page->mapping = NULL;
131 /* Leave page->index set: truncation lookup relies upon it */
131 mapping->nrpages--; 132 mapping->nrpages--;
132 __dec_zone_page_state(page, NR_FILE_PAGES); 133 __dec_zone_page_state(page, NR_FILE_PAGES);
133 if (PageSwapBacked(page)) 134 if (PageSwapBacked(page))
@@ -483,6 +484,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
483 spin_unlock_irq(&mapping->tree_lock); 484 spin_unlock_irq(&mapping->tree_lock);
484 } else { 485 } else {
485 page->mapping = NULL; 486 page->mapping = NULL;
487 /* Leave page->index set: truncation relies upon it */
486 spin_unlock_irq(&mapping->tree_lock); 488 spin_unlock_irq(&mapping->tree_lock);
487 mem_cgroup_uncharge_cache_page(page); 489 mem_cgroup_uncharge_cache_page(page);
488 page_cache_release(page); 490 page_cache_release(page);
diff --git a/mm/truncate.c b/mm/truncate.c
index c924764e2ce5..dc459014f777 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -199,9 +199,6 @@ int invalidate_inode_page(struct page *page)
199 * The first pass will remove most pages, so the search cost of the second pass 199 * The first pass will remove most pages, so the search cost of the second pass
200 * is low. 200 * is low.
201 * 201 *
202 * When looking at page->index outside the page lock we need to be careful to
203 * copy it into a local to avoid races (it could change at any time).
204 *
205 * We pass down the cache-hot hint to the page freeing code. Even if the 202 * We pass down the cache-hot hint to the page freeing code. Even if the
206 * mapping is large, it is probably the case that the final pages are the most 203 * mapping is large, it is probably the case that the final pages are the most
207 * recently touched, and freeing happens in ascending file offset order. 204 * recently touched, and freeing happens in ascending file offset order.
@@ -210,10 +207,10 @@ void truncate_inode_pages_range(struct address_space *mapping,
210 loff_t lstart, loff_t lend) 207 loff_t lstart, loff_t lend)
211{ 208{
212 const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; 209 const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
213 pgoff_t end;
214 const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); 210 const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
215 struct pagevec pvec; 211 struct pagevec pvec;
216 pgoff_t next; 212 pgoff_t index;
213 pgoff_t end;
217 int i; 214 int i;
218 215
219 cleancache_flush_inode(mapping); 216 cleancache_flush_inode(mapping);
@@ -224,24 +221,21 @@ void truncate_inode_pages_range(struct address_space *mapping,
224 end = (lend >> PAGE_CACHE_SHIFT); 221 end = (lend >> PAGE_CACHE_SHIFT);
225 222
226 pagevec_init(&pvec, 0); 223 pagevec_init(&pvec, 0);
227 next = start; 224 index = start;
228 while (next <= end && 225 while (index <= end && pagevec_lookup(&pvec, mapping, index,
229 pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { 226 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
230 mem_cgroup_uncharge_start(); 227 mem_cgroup_uncharge_start();
231 for (i = 0; i < pagevec_count(&pvec); i++) { 228 for (i = 0; i < pagevec_count(&pvec); i++) {
232 struct page *page = pvec.pages[i]; 229 struct page *page = pvec.pages[i];
233 pgoff_t page_index = page->index;
234 230
235 if (page_index > end) { 231 /* We rely upon deletion not changing page->index */
236 next = page_index; 232 index = page->index;
233 if (index > end)
237 break; 234 break;
238 }
239 235
240 if (page_index > next)
241 next = page_index;
242 next++;
243 if (!trylock_page(page)) 236 if (!trylock_page(page))
244 continue; 237 continue;
238 WARN_ON(page->index != index);
245 if (PageWriteback(page)) { 239 if (PageWriteback(page)) {
246 unlock_page(page); 240 unlock_page(page);
247 continue; 241 continue;
@@ -252,6 +246,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
252 pagevec_release(&pvec); 246 pagevec_release(&pvec);
253 mem_cgroup_uncharge_end(); 247 mem_cgroup_uncharge_end();
254 cond_resched(); 248 cond_resched();
249 index++;
255 } 250 }
256 251
257 if (partial) { 252 if (partial) {
@@ -264,13 +259,14 @@ void truncate_inode_pages_range(struct address_space *mapping,
264 } 259 }
265 } 260 }
266 261
267 next = start; 262 index = start;
268 for ( ; ; ) { 263 for ( ; ; ) {
269 cond_resched(); 264 cond_resched();
270 if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { 265 if (!pagevec_lookup(&pvec, mapping, index,
271 if (next == start) 266 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
267 if (index == start)
272 break; 268 break;
273 next = start; 269 index = start;
274 continue; 270 continue;
275 } 271 }
276 if (pvec.pages[0]->index > end) { 272 if (pvec.pages[0]->index > end) {
@@ -281,18 +277,20 @@ void truncate_inode_pages_range(struct address_space *mapping,
281 for (i = 0; i < pagevec_count(&pvec); i++) { 277 for (i = 0; i < pagevec_count(&pvec); i++) {
282 struct page *page = pvec.pages[i]; 278 struct page *page = pvec.pages[i];
283 279
284 if (page->index > end) 280 /* We rely upon deletion not changing page->index */
281 index = page->index;
282 if (index > end)
285 break; 283 break;
284
286 lock_page(page); 285 lock_page(page);
286 WARN_ON(page->index != index);
287 wait_on_page_writeback(page); 287 wait_on_page_writeback(page);
288 truncate_inode_page(mapping, page); 288 truncate_inode_page(mapping, page);
289 if (page->index > next)
290 next = page->index;
291 next++;
292 unlock_page(page); 289 unlock_page(page);
293 } 290 }
294 pagevec_release(&pvec); 291 pagevec_release(&pvec);
295 mem_cgroup_uncharge_end(); 292 mem_cgroup_uncharge_end();
293 index++;
296 } 294 }
297 cleancache_flush_inode(mapping); 295 cleancache_flush_inode(mapping);
298} 296}
@@ -333,35 +331,26 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
333 pgoff_t start, pgoff_t end) 331 pgoff_t start, pgoff_t end)
334{ 332{
335 struct pagevec pvec; 333 struct pagevec pvec;
336 pgoff_t next = start; 334 pgoff_t index = start;
337 unsigned long ret; 335 unsigned long ret;
338 unsigned long count = 0; 336 unsigned long count = 0;
339 int i; 337 int i;
340 338
341 pagevec_init(&pvec, 0); 339 pagevec_init(&pvec, 0);
342 while (next <= end && 340 while (index <= end && pagevec_lookup(&pvec, mapping, index,
343 pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { 341 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
344 mem_cgroup_uncharge_start(); 342 mem_cgroup_uncharge_start();
345 for (i = 0; i < pagevec_count(&pvec); i++) { 343 for (i = 0; i < pagevec_count(&pvec); i++) {
346 struct page *page = pvec.pages[i]; 344 struct page *page = pvec.pages[i];
347 pgoff_t index;
348 int lock_failed;
349
350 lock_failed = !trylock_page(page);
351 345
352 /* 346 /* We rely upon deletion not changing page->index */
353 * We really shouldn't be looking at the ->index of an
354 * unlocked page. But we're not allowed to lock these
355 * pages. So we rely upon nobody altering the ->index
356 * of this (pinned-by-us) page.
357 */
358 index = page->index; 347 index = page->index;
359 if (index > next) 348 if (index > end)
360 next = index; 349 break;
361 next++;
362 if (lock_failed)
363 continue;
364 350
351 if (!trylock_page(page))
352 continue;
353 WARN_ON(page->index != index);
365 ret = invalidate_inode_page(page); 354 ret = invalidate_inode_page(page);
366 unlock_page(page); 355 unlock_page(page);
367 /* 356 /*
@@ -371,12 +360,11 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
371 if (!ret) 360 if (!ret)
372 deactivate_page(page); 361 deactivate_page(page);
373 count += ret; 362 count += ret;
374 if (next > end)
375 break;
376 } 363 }
377 pagevec_release(&pvec); 364 pagevec_release(&pvec);
378 mem_cgroup_uncharge_end(); 365 mem_cgroup_uncharge_end();
379 cond_resched(); 366 cond_resched();
367 index++;
380 } 368 }
381 return count; 369 return count;
382} 370}
@@ -442,37 +430,32 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
442 pgoff_t start, pgoff_t end) 430 pgoff_t start, pgoff_t end)
443{ 431{
444 struct pagevec pvec; 432 struct pagevec pvec;
445 pgoff_t next; 433 pgoff_t index;
446 int i; 434 int i;
447 int ret = 0; 435 int ret = 0;
448 int ret2 = 0; 436 int ret2 = 0;
449 int did_range_unmap = 0; 437 int did_range_unmap = 0;
450 int wrapped = 0;
451 438
452 cleancache_flush_inode(mapping); 439 cleancache_flush_inode(mapping);
453 pagevec_init(&pvec, 0); 440 pagevec_init(&pvec, 0);
454 next = start; 441 index = start;
455 while (next <= end && !wrapped && 442 while (index <= end && pagevec_lookup(&pvec, mapping, index,
456 pagevec_lookup(&pvec, mapping, next, 443 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
457 min(end - next, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
458 mem_cgroup_uncharge_start(); 444 mem_cgroup_uncharge_start();
459 for (i = 0; i < pagevec_count(&pvec); i++) { 445 for (i = 0; i < pagevec_count(&pvec); i++) {
460 struct page *page = pvec.pages[i]; 446 struct page *page = pvec.pages[i];
461 pgoff_t page_index; 447
448 /* We rely upon deletion not changing page->index */
449 index = page->index;
450 if (index > end)
451 break;
462 452
463 lock_page(page); 453 lock_page(page);
454 WARN_ON(page->index != index);
464 if (page->mapping != mapping) { 455 if (page->mapping != mapping) {
465 unlock_page(page); 456 unlock_page(page);
466 continue; 457 continue;
467 } 458 }
468 page_index = page->index;
469 next = page_index + 1;
470 if (next == 0)
471 wrapped = 1;
472 if (page_index > end) {
473 unlock_page(page);
474 break;
475 }
476 wait_on_page_writeback(page); 459 wait_on_page_writeback(page);
477 if (page_mapped(page)) { 460 if (page_mapped(page)) {
478 if (!did_range_unmap) { 461 if (!did_range_unmap) {
@@ -480,9 +463,9 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
480 * Zap the rest of the file in one hit. 463 * Zap the rest of the file in one hit.
481 */ 464 */
482 unmap_mapping_range(mapping, 465 unmap_mapping_range(mapping,
483 (loff_t)page_index<<PAGE_CACHE_SHIFT, 466 (loff_t)index << PAGE_CACHE_SHIFT,
484 (loff_t)(end - page_index + 1) 467 (loff_t)(1 + end - index)
485 << PAGE_CACHE_SHIFT, 468 << PAGE_CACHE_SHIFT,
486 0); 469 0);
487 did_range_unmap = 1; 470 did_range_unmap = 1;
488 } else { 471 } else {
@@ -490,8 +473,8 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
490 * Just zap this page 473 * Just zap this page
491 */ 474 */
492 unmap_mapping_range(mapping, 475 unmap_mapping_range(mapping,
493 (loff_t)page_index<<PAGE_CACHE_SHIFT, 476 (loff_t)index << PAGE_CACHE_SHIFT,
494 PAGE_CACHE_SIZE, 0); 477 PAGE_CACHE_SIZE, 0);
495 } 478 }
496 } 479 }
497 BUG_ON(page_mapped(page)); 480 BUG_ON(page_mapped(page));
@@ -507,6 +490,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
507 pagevec_release(&pvec); 490 pagevec_release(&pvec);
508 mem_cgroup_uncharge_end(); 491 mem_cgroup_uncharge_end();
509 cond_resched(); 492 cond_resched();
493 index++;
510 } 494 }
511 cleancache_flush_inode(mapping); 495 cleancache_flush_inode(mapping);
512 return ret; 496 return ret;