diff options
author | Hugh Dickins <hughd@google.com> | 2011-07-25 20:12:25 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-07-25 23:57:10 -0400 |
commit | b85e0effd3dcbf9118b896232f59526ab1a39a74 (patch) | |
tree | 32200b2e4052d50d4eb1771e555eaf66d7c4cfc8 | |
parent | 8a549bea51138be2126a2cc6aabe8f17ef66b79b (diff) |
mm: consistent truncate and invalidate loops
Make the pagevec_lookup loops in truncate_inode_pages_range(),
invalidate_mapping_pages() and invalidate_inode_pages2_range() more
consistent with each other.
They were relying upon page->index of an unlocked page, but apologizing
for it: accept it, embrace it, add comments and WARN_ONs, and simplify the
index handling.
invalidate_inode_pages2_range() had special handling for a wrapped
page->index + 1 = 0 case; but MAX_LFS_FILESIZE doesn't let us anywhere
near there, and a corrupt page->index in the radix_tree could cause more
trouble than that would catch. Remove that wrapped handling.
invalidate_inode_pages2_range() uses min() to limit the pagevec_lookup
when near the end of the range: copy that into the other two, although
it's less useful than you might think (it limits the use of the buffer,
rather than the indices looked up).
Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | mm/filemap.c | 2 | ||||
-rw-r--r-- | mm/truncate.c | 110 |
2 files changed, 49 insertions, 63 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 2780be4bd493..10a171113273 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -128,6 +128,7 @@ void __delete_from_page_cache(struct page *page) | |||
128 | 128 | ||
129 | radix_tree_delete(&mapping->page_tree, page->index); | 129 | radix_tree_delete(&mapping->page_tree, page->index); |
130 | page->mapping = NULL; | 130 | page->mapping = NULL; |
131 | /* Leave page->index set: truncation lookup relies upon it */ | ||
131 | mapping->nrpages--; | 132 | mapping->nrpages--; |
132 | __dec_zone_page_state(page, NR_FILE_PAGES); | 133 | __dec_zone_page_state(page, NR_FILE_PAGES); |
133 | if (PageSwapBacked(page)) | 134 | if (PageSwapBacked(page)) |
@@ -483,6 +484,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, | |||
483 | spin_unlock_irq(&mapping->tree_lock); | 484 | spin_unlock_irq(&mapping->tree_lock); |
484 | } else { | 485 | } else { |
485 | page->mapping = NULL; | 486 | page->mapping = NULL; |
487 | /* Leave page->index set: truncation relies upon it */ | ||
486 | spin_unlock_irq(&mapping->tree_lock); | 488 | spin_unlock_irq(&mapping->tree_lock); |
487 | mem_cgroup_uncharge_cache_page(page); | 489 | mem_cgroup_uncharge_cache_page(page); |
488 | page_cache_release(page); | 490 | page_cache_release(page); |
diff --git a/mm/truncate.c b/mm/truncate.c index c924764e2ce5..dc459014f777 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -199,9 +199,6 @@ int invalidate_inode_page(struct page *page) | |||
199 | * The first pass will remove most pages, so the search cost of the second pass | 199 | * The first pass will remove most pages, so the search cost of the second pass |
200 | * is low. | 200 | * is low. |
201 | * | 201 | * |
202 | * When looking at page->index outside the page lock we need to be careful to | ||
203 | * copy it into a local to avoid races (it could change at any time). | ||
204 | * | ||
205 | * We pass down the cache-hot hint to the page freeing code. Even if the | 202 | * We pass down the cache-hot hint to the page freeing code. Even if the |
206 | * mapping is large, it is probably the case that the final pages are the most | 203 | * mapping is large, it is probably the case that the final pages are the most |
207 | * recently touched, and freeing happens in ascending file offset order. | 204 | * recently touched, and freeing happens in ascending file offset order. |
@@ -210,10 +207,10 @@ void truncate_inode_pages_range(struct address_space *mapping, | |||
210 | loff_t lstart, loff_t lend) | 207 | loff_t lstart, loff_t lend) |
211 | { | 208 | { |
212 | const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; | 209 | const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; |
213 | pgoff_t end; | ||
214 | const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); | 210 | const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); |
215 | struct pagevec pvec; | 211 | struct pagevec pvec; |
216 | pgoff_t next; | 212 | pgoff_t index; |
213 | pgoff_t end; | ||
217 | int i; | 214 | int i; |
218 | 215 | ||
219 | cleancache_flush_inode(mapping); | 216 | cleancache_flush_inode(mapping); |
@@ -224,24 +221,21 @@ void truncate_inode_pages_range(struct address_space *mapping, | |||
224 | end = (lend >> PAGE_CACHE_SHIFT); | 221 | end = (lend >> PAGE_CACHE_SHIFT); |
225 | 222 | ||
226 | pagevec_init(&pvec, 0); | 223 | pagevec_init(&pvec, 0); |
227 | next = start; | 224 | index = start; |
228 | while (next <= end && | 225 | while (index <= end && pagevec_lookup(&pvec, mapping, index, |
229 | pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { | 226 | min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { |
230 | mem_cgroup_uncharge_start(); | 227 | mem_cgroup_uncharge_start(); |
231 | for (i = 0; i < pagevec_count(&pvec); i++) { | 228 | for (i = 0; i < pagevec_count(&pvec); i++) { |
232 | struct page *page = pvec.pages[i]; | 229 | struct page *page = pvec.pages[i]; |
233 | pgoff_t page_index = page->index; | ||
234 | 230 | ||
235 | if (page_index > end) { | 231 | /* We rely upon deletion not changing page->index */ |
236 | next = page_index; | 232 | index = page->index; |
233 | if (index > end) | ||
237 | break; | 234 | break; |
238 | } | ||
239 | 235 | ||
240 | if (page_index > next) | ||
241 | next = page_index; | ||
242 | next++; | ||
243 | if (!trylock_page(page)) | 236 | if (!trylock_page(page)) |
244 | continue; | 237 | continue; |
238 | WARN_ON(page->index != index); | ||
245 | if (PageWriteback(page)) { | 239 | if (PageWriteback(page)) { |
246 | unlock_page(page); | 240 | unlock_page(page); |
247 | continue; | 241 | continue; |
@@ -252,6 +246,7 @@ void truncate_inode_pages_range(struct address_space *mapping, | |||
252 | pagevec_release(&pvec); | 246 | pagevec_release(&pvec); |
253 | mem_cgroup_uncharge_end(); | 247 | mem_cgroup_uncharge_end(); |
254 | cond_resched(); | 248 | cond_resched(); |
249 | index++; | ||
255 | } | 250 | } |
256 | 251 | ||
257 | if (partial) { | 252 | if (partial) { |
@@ -264,13 +259,14 @@ void truncate_inode_pages_range(struct address_space *mapping, | |||
264 | } | 259 | } |
265 | } | 260 | } |
266 | 261 | ||
267 | next = start; | 262 | index = start; |
268 | for ( ; ; ) { | 263 | for ( ; ; ) { |
269 | cond_resched(); | 264 | cond_resched(); |
270 | if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { | 265 | if (!pagevec_lookup(&pvec, mapping, index, |
271 | if (next == start) | 266 | min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { |
267 | if (index == start) | ||
272 | break; | 268 | break; |
273 | next = start; | 269 | index = start; |
274 | continue; | 270 | continue; |
275 | } | 271 | } |
276 | if (pvec.pages[0]->index > end) { | 272 | if (pvec.pages[0]->index > end) { |
@@ -281,18 +277,20 @@ void truncate_inode_pages_range(struct address_space *mapping, | |||
281 | for (i = 0; i < pagevec_count(&pvec); i++) { | 277 | for (i = 0; i < pagevec_count(&pvec); i++) { |
282 | struct page *page = pvec.pages[i]; | 278 | struct page *page = pvec.pages[i]; |
283 | 279 | ||
284 | if (page->index > end) | 280 | /* We rely upon deletion not changing page->index */ |
281 | index = page->index; | ||
282 | if (index > end) | ||
285 | break; | 283 | break; |
284 | |||
286 | lock_page(page); | 285 | lock_page(page); |
286 | WARN_ON(page->index != index); | ||
287 | wait_on_page_writeback(page); | 287 | wait_on_page_writeback(page); |
288 | truncate_inode_page(mapping, page); | 288 | truncate_inode_page(mapping, page); |
289 | if (page->index > next) | ||
290 | next = page->index; | ||
291 | next++; | ||
292 | unlock_page(page); | 289 | unlock_page(page); |
293 | } | 290 | } |
294 | pagevec_release(&pvec); | 291 | pagevec_release(&pvec); |
295 | mem_cgroup_uncharge_end(); | 292 | mem_cgroup_uncharge_end(); |
293 | index++; | ||
296 | } | 294 | } |
297 | cleancache_flush_inode(mapping); | 295 | cleancache_flush_inode(mapping); |
298 | } | 296 | } |
@@ -333,35 +331,26 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, | |||
333 | pgoff_t start, pgoff_t end) | 331 | pgoff_t start, pgoff_t end) |
334 | { | 332 | { |
335 | struct pagevec pvec; | 333 | struct pagevec pvec; |
336 | pgoff_t next = start; | 334 | pgoff_t index = start; |
337 | unsigned long ret; | 335 | unsigned long ret; |
338 | unsigned long count = 0; | 336 | unsigned long count = 0; |
339 | int i; | 337 | int i; |
340 | 338 | ||
341 | pagevec_init(&pvec, 0); | 339 | pagevec_init(&pvec, 0); |
342 | while (next <= end && | 340 | while (index <= end && pagevec_lookup(&pvec, mapping, index, |
343 | pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { | 341 | min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { |
344 | mem_cgroup_uncharge_start(); | 342 | mem_cgroup_uncharge_start(); |
345 | for (i = 0; i < pagevec_count(&pvec); i++) { | 343 | for (i = 0; i < pagevec_count(&pvec); i++) { |
346 | struct page *page = pvec.pages[i]; | 344 | struct page *page = pvec.pages[i]; |
347 | pgoff_t index; | ||
348 | int lock_failed; | ||
349 | |||
350 | lock_failed = !trylock_page(page); | ||
351 | 345 | ||
352 | /* | 346 | /* We rely upon deletion not changing page->index */ |
353 | * We really shouldn't be looking at the ->index of an | ||
354 | * unlocked page. But we're not allowed to lock these | ||
355 | * pages. So we rely upon nobody altering the ->index | ||
356 | * of this (pinned-by-us) page. | ||
357 | */ | ||
358 | index = page->index; | 347 | index = page->index; |
359 | if (index > next) | 348 | if (index > end) |
360 | next = index; | 349 | break; |
361 | next++; | ||
362 | if (lock_failed) | ||
363 | continue; | ||
364 | 350 | ||
351 | if (!trylock_page(page)) | ||
352 | continue; | ||
353 | WARN_ON(page->index != index); | ||
365 | ret = invalidate_inode_page(page); | 354 | ret = invalidate_inode_page(page); |
366 | unlock_page(page); | 355 | unlock_page(page); |
367 | /* | 356 | /* |
@@ -371,12 +360,11 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, | |||
371 | if (!ret) | 360 | if (!ret) |
372 | deactivate_page(page); | 361 | deactivate_page(page); |
373 | count += ret; | 362 | count += ret; |
374 | if (next > end) | ||
375 | break; | ||
376 | } | 363 | } |
377 | pagevec_release(&pvec); | 364 | pagevec_release(&pvec); |
378 | mem_cgroup_uncharge_end(); | 365 | mem_cgroup_uncharge_end(); |
379 | cond_resched(); | 366 | cond_resched(); |
367 | index++; | ||
380 | } | 368 | } |
381 | return count; | 369 | return count; |
382 | } | 370 | } |
@@ -442,37 +430,32 @@ int invalidate_inode_pages2_range(struct address_space *mapping, | |||
442 | pgoff_t start, pgoff_t end) | 430 | pgoff_t start, pgoff_t end) |
443 | { | 431 | { |
444 | struct pagevec pvec; | 432 | struct pagevec pvec; |
445 | pgoff_t next; | 433 | pgoff_t index; |
446 | int i; | 434 | int i; |
447 | int ret = 0; | 435 | int ret = 0; |
448 | int ret2 = 0; | 436 | int ret2 = 0; |
449 | int did_range_unmap = 0; | 437 | int did_range_unmap = 0; |
450 | int wrapped = 0; | ||
451 | 438 | ||
452 | cleancache_flush_inode(mapping); | 439 | cleancache_flush_inode(mapping); |
453 | pagevec_init(&pvec, 0); | 440 | pagevec_init(&pvec, 0); |
454 | next = start; | 441 | index = start; |
455 | while (next <= end && !wrapped && | 442 | while (index <= end && pagevec_lookup(&pvec, mapping, index, |
456 | pagevec_lookup(&pvec, mapping, next, | 443 | min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { |
457 | min(end - next, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { | ||
458 | mem_cgroup_uncharge_start(); | 444 | mem_cgroup_uncharge_start(); |
459 | for (i = 0; i < pagevec_count(&pvec); i++) { | 445 | for (i = 0; i < pagevec_count(&pvec); i++) { |
460 | struct page *page = pvec.pages[i]; | 446 | struct page *page = pvec.pages[i]; |
461 | pgoff_t page_index; | 447 | |
448 | /* We rely upon deletion not changing page->index */ | ||
449 | index = page->index; | ||
450 | if (index > end) | ||
451 | break; | ||
462 | 452 | ||
463 | lock_page(page); | 453 | lock_page(page); |
454 | WARN_ON(page->index != index); | ||
464 | if (page->mapping != mapping) { | 455 | if (page->mapping != mapping) { |
465 | unlock_page(page); | 456 | unlock_page(page); |
466 | continue; | 457 | continue; |
467 | } | 458 | } |
468 | page_index = page->index; | ||
469 | next = page_index + 1; | ||
470 | if (next == 0) | ||
471 | wrapped = 1; | ||
472 | if (page_index > end) { | ||
473 | unlock_page(page); | ||
474 | break; | ||
475 | } | ||
476 | wait_on_page_writeback(page); | 459 | wait_on_page_writeback(page); |
477 | if (page_mapped(page)) { | 460 | if (page_mapped(page)) { |
478 | if (!did_range_unmap) { | 461 | if (!did_range_unmap) { |
@@ -480,9 +463,9 @@ int invalidate_inode_pages2_range(struct address_space *mapping, | |||
480 | * Zap the rest of the file in one hit. | 463 | * Zap the rest of the file in one hit. |
481 | */ | 464 | */ |
482 | unmap_mapping_range(mapping, | 465 | unmap_mapping_range(mapping, |
483 | (loff_t)page_index<<PAGE_CACHE_SHIFT, | 466 | (loff_t)index << PAGE_CACHE_SHIFT, |
484 | (loff_t)(end - page_index + 1) | 467 | (loff_t)(1 + end - index) |
485 | << PAGE_CACHE_SHIFT, | 468 | << PAGE_CACHE_SHIFT, |
486 | 0); | 469 | 0); |
487 | did_range_unmap = 1; | 470 | did_range_unmap = 1; |
488 | } else { | 471 | } else { |
@@ -490,8 +473,8 @@ int invalidate_inode_pages2_range(struct address_space *mapping, | |||
490 | * Just zap this page | 473 | * Just zap this page |
491 | */ | 474 | */ |
492 | unmap_mapping_range(mapping, | 475 | unmap_mapping_range(mapping, |
493 | (loff_t)page_index<<PAGE_CACHE_SHIFT, | 476 | (loff_t)index << PAGE_CACHE_SHIFT, |
494 | PAGE_CACHE_SIZE, 0); | 477 | PAGE_CACHE_SIZE, 0); |
495 | } | 478 | } |
496 | } | 479 | } |
497 | BUG_ON(page_mapped(page)); | 480 | BUG_ON(page_mapped(page)); |
@@ -507,6 +490,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping, | |||
507 | pagevec_release(&pvec); | 490 | pagevec_release(&pvec); |
508 | mem_cgroup_uncharge_end(); | 491 | mem_cgroup_uncharge_end(); |
509 | cond_resched(); | 492 | cond_resched(); |
493 | index++; | ||
510 | } | 494 | } |
511 | cleancache_flush_inode(mapping); | 495 | cleancache_flush_inode(mapping); |
512 | return ret; | 496 | return ret; |