diff options
| author | Sage Weil <sage@newdream.net> | 2011-08-24 17:07:01 -0400 |
|---|---|---|
| committer | Sage Weil <sage@newdream.net> | 2011-10-25 19:10:16 -0400 |
| commit | 83eaea22bdfc9e1cec88f81be5b64f30f6c37e8b (patch) | |
| tree | f63f415ce4241c12639ef78f4920ed6b99915760 | |
| parent | 80db8bea6a0f4fd047eafd8329a44d5a110f462b (diff) | |
Revert "ceph: don't truncate dirty pages in invalidate work thread"
This reverts commit c9af9fb68e01eb2c2165e1bc45cfeeed510c64e6.
We need to block and truncate all pages in order to reliably invalidate
them. Otherwise, we could:
- have some uptodate pages in the cache
- queue an invalidate
- write(2) locks some pages
- invalidate_work skips them
- write(2) only overwrites part of the page
- page now dirty and uptodate
-> partial leakage of invalidated data
It's not entirely clear why we started skipping locked pages in the first
place. I just ran this through fsx and didn't see any problems.
Signed-off-by: Sage Weil <sage@newdream.net>
| -rw-r--r-- | fs/ceph/inode.c | 46 |
1 files changed, 1 insertions, 45 deletions
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 095799ba9dd1..5dde7d51dc11 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
| @@ -9,7 +9,6 @@ | |||
| 9 | #include <linux/namei.h> | 9 | #include <linux/namei.h> |
| 10 | #include <linux/writeback.h> | 10 | #include <linux/writeback.h> |
| 11 | #include <linux/vmalloc.h> | 11 | #include <linux/vmalloc.h> |
| 12 | #include <linux/pagevec.h> | ||
| 13 | 12 | ||
| 14 | #include "super.h" | 13 | #include "super.h" |
| 15 | #include "mds_client.h" | 14 | #include "mds_client.h" |
| @@ -1364,49 +1363,6 @@ void ceph_queue_invalidate(struct inode *inode) | |||
| 1364 | } | 1363 | } |
| 1365 | 1364 | ||
| 1366 | /* | 1365 | /* |
| 1367 | * invalidate any pages that are not dirty or under writeback. this | ||
| 1368 | * includes pages that are clean and mapped. | ||
| 1369 | */ | ||
| 1370 | static void ceph_invalidate_nondirty_pages(struct address_space *mapping) | ||
| 1371 | { | ||
| 1372 | struct pagevec pvec; | ||
| 1373 | pgoff_t next = 0; | ||
| 1374 | int i; | ||
| 1375 | |||
| 1376 | pagevec_init(&pvec, 0); | ||
| 1377 | while (pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { | ||
| 1378 | for (i = 0; i < pagevec_count(&pvec); i++) { | ||
| 1379 | struct page *page = pvec.pages[i]; | ||
| 1380 | pgoff_t index; | ||
| 1381 | int skip_page = | ||
| 1382 | (PageDirty(page) || PageWriteback(page)); | ||
| 1383 | |||
| 1384 | if (!skip_page) | ||
| 1385 | skip_page = !trylock_page(page); | ||
| 1386 | |||
| 1387 | /* | ||
| 1388 | * We really shouldn't be looking at the ->index of an | ||
| 1389 | * unlocked page. But we're not allowed to lock these | ||
| 1390 | * pages. So we rely upon nobody altering the ->index | ||
| 1391 | * of this (pinned-by-us) page. | ||
| 1392 | */ | ||
| 1393 | index = page->index; | ||
| 1394 | if (index > next) | ||
| 1395 | next = index; | ||
| 1396 | next++; | ||
| 1397 | |||
| 1398 | if (skip_page) | ||
| 1399 | continue; | ||
| 1400 | |||
| 1401 | generic_error_remove_page(mapping, page); | ||
| 1402 | unlock_page(page); | ||
| 1403 | } | ||
| 1404 | pagevec_release(&pvec); | ||
| 1405 | cond_resched(); | ||
| 1406 | } | ||
| 1407 | } | ||
| 1408 | |||
| 1409 | /* | ||
| 1410 | * Invalidate inode pages in a worker thread. (This can't be done | 1366 | * Invalidate inode pages in a worker thread. (This can't be done |
| 1411 | * in the message handler context.) | 1367 | * in the message handler context.) |
| 1412 | */ | 1368 | */ |
| @@ -1429,7 +1385,7 @@ static void ceph_invalidate_work(struct work_struct *work) | |||
| 1429 | orig_gen = ci->i_rdcache_gen; | 1385 | orig_gen = ci->i_rdcache_gen; |
| 1430 | spin_unlock(&inode->i_lock); | 1386 | spin_unlock(&inode->i_lock); |
| 1431 | 1387 | ||
| 1432 | ceph_invalidate_nondirty_pages(inode->i_mapping); | 1388 | truncate_inode_pages(&inode->i_data, 0); |
| 1433 | 1389 | ||
| 1434 | spin_lock(&inode->i_lock); | 1390 | spin_lock(&inode->i_lock); |
| 1435 | if (orig_gen == ci->i_rdcache_gen && | 1391 | if (orig_gen == ci->i_rdcache_gen && |
