diff options
author | Sage Weil <sage@newdream.net> | 2011-08-24 17:07:01 -0400 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2011-10-25 19:10:16 -0400 |
commit | 83eaea22bdfc9e1cec88f81be5b64f30f6c37e8b (patch) | |
tree | f63f415ce4241c12639ef78f4920ed6b99915760 /fs/ceph | |
parent | 80db8bea6a0f4fd047eafd8329a44d5a110f462b (diff) |
Revert "ceph: don't truncate dirty pages in invalidate work thread"
This reverts commit c9af9fb68e01eb2c2165e1bc45cfeeed510c64e6.
We need to block and truncate all pages in order to reliably invalidate
them. Otherwise, we could:
- have some uptodate pages in the cache
- queue an invalidate
- write(2) locks some pages
- invalidate_work skips them
- write(2) only overwrites part of the page
- page now dirty and uptodate
-> partial leakage of invalidated data
It's not entirely clear why we started skipping locked pages in the first
place. I just ran this through fsx and didn't see any problems.
Signed-off-by: Sage Weil <sage@newdream.net>
Diffstat (limited to 'fs/ceph')
-rw-r--r-- | fs/ceph/inode.c | 46 |
1 files changed, 1 insertions, 45 deletions
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 095799ba9dd1..5dde7d51dc11 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -9,7 +9,6 @@ | |||
9 | #include <linux/namei.h> | 9 | #include <linux/namei.h> |
10 | #include <linux/writeback.h> | 10 | #include <linux/writeback.h> |
11 | #include <linux/vmalloc.h> | 11 | #include <linux/vmalloc.h> |
12 | #include <linux/pagevec.h> | ||
13 | 12 | ||
14 | #include "super.h" | 13 | #include "super.h" |
15 | #include "mds_client.h" | 14 | #include "mds_client.h" |
@@ -1364,49 +1363,6 @@ void ceph_queue_invalidate(struct inode *inode) | |||
1364 | } | 1363 | } |
1365 | 1364 | ||
1366 | /* | 1365 | /* |
1367 | * invalidate any pages that are not dirty or under writeback. this | ||
1368 | * includes pages that are clean and mapped. | ||
1369 | */ | ||
1370 | static void ceph_invalidate_nondirty_pages(struct address_space *mapping) | ||
1371 | { | ||
1372 | struct pagevec pvec; | ||
1373 | pgoff_t next = 0; | ||
1374 | int i; | ||
1375 | |||
1376 | pagevec_init(&pvec, 0); | ||
1377 | while (pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { | ||
1378 | for (i = 0; i < pagevec_count(&pvec); i++) { | ||
1379 | struct page *page = pvec.pages[i]; | ||
1380 | pgoff_t index; | ||
1381 | int skip_page = | ||
1382 | (PageDirty(page) || PageWriteback(page)); | ||
1383 | |||
1384 | if (!skip_page) | ||
1385 | skip_page = !trylock_page(page); | ||
1386 | |||
1387 | /* | ||
1388 | * We really shouldn't be looking at the ->index of an | ||
1389 | * unlocked page. But we're not allowed to lock these | ||
1390 | * pages. So we rely upon nobody altering the ->index | ||
1391 | * of this (pinned-by-us) page. | ||
1392 | */ | ||
1393 | index = page->index; | ||
1394 | if (index > next) | ||
1395 | next = index; | ||
1396 | next++; | ||
1397 | |||
1398 | if (skip_page) | ||
1399 | continue; | ||
1400 | |||
1401 | generic_error_remove_page(mapping, page); | ||
1402 | unlock_page(page); | ||
1403 | } | ||
1404 | pagevec_release(&pvec); | ||
1405 | cond_resched(); | ||
1406 | } | ||
1407 | } | ||
1408 | |||
1409 | /* | ||
1410 | * Invalidate inode pages in a worker thread. (This can't be done | 1366 | * Invalidate inode pages in a worker thread. (This can't be done |
1411 | * in the message handler context.) | 1367 | * in the message handler context.) |
1412 | */ | 1368 | */ |
@@ -1429,7 +1385,7 @@ static void ceph_invalidate_work(struct work_struct *work) | |||
1429 | orig_gen = ci->i_rdcache_gen; | 1385 | orig_gen = ci->i_rdcache_gen; |
1430 | spin_unlock(&inode->i_lock); | 1386 | spin_unlock(&inode->i_lock); |
1431 | 1387 | ||
1432 | ceph_invalidate_nondirty_pages(inode->i_mapping); | 1388 | truncate_inode_pages(&inode->i_data, 0); |
1433 | 1389 | ||
1434 | spin_lock(&inode->i_lock); | 1390 | spin_lock(&inode->i_lock); |
1435 | if (orig_gen == ci->i_rdcache_gen && | 1391 | if (orig_gen == ci->i_rdcache_gen && |