aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph
diff options
context:
space:
mode:
authorSage Weil <sage@newdream.net>2011-08-24 17:07:01 -0400
committerSage Weil <sage@newdream.net>2011-10-25 19:10:16 -0400
commit83eaea22bdfc9e1cec88f81be5b64f30f6c37e8b (patch)
treef63f415ce4241c12639ef78f4920ed6b99915760 /fs/ceph
parent80db8bea6a0f4fd047eafd8329a44d5a110f462b (diff)
Revert "ceph: don't truncate dirty pages in invalidate work thread"
This reverts commit c9af9fb68e01eb2c2165e1bc45cfeeed510c64e6. We need to block and truncate all pages in order to reliably invalidate them. Otherwise, we could: - have some uptodate pages in the cache - queue an invalidate - write(2) locks some pages - invalidate_work skips them - write(2) only overwrites part of the page - page now dirty and uptodate -> partial leakage of invalidated data It's not entirely clear why we started skipping locked pages in the first place. I just ran this through fsx and didn't see any problems. Signed-off-by: Sage Weil <sage@newdream.net>
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/inode.c46
1 files changed, 1 insertions, 45 deletions
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 095799ba9dd1..5dde7d51dc11 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -9,7 +9,6 @@
9#include <linux/namei.h> 9#include <linux/namei.h>
10#include <linux/writeback.h> 10#include <linux/writeback.h>
11#include <linux/vmalloc.h> 11#include <linux/vmalloc.h>
12#include <linux/pagevec.h>
13 12
14#include "super.h" 13#include "super.h"
15#include "mds_client.h" 14#include "mds_client.h"
@@ -1364,49 +1363,6 @@ void ceph_queue_invalidate(struct inode *inode)
1364} 1363}
1365 1364
1366/* 1365/*
1367 * invalidate any pages that are not dirty or under writeback. this
1368 * includes pages that are clean and mapped.
1369 */
1370static void ceph_invalidate_nondirty_pages(struct address_space *mapping)
1371{
1372 struct pagevec pvec;
1373 pgoff_t next = 0;
1374 int i;
1375
1376 pagevec_init(&pvec, 0);
1377 while (pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
1378 for (i = 0; i < pagevec_count(&pvec); i++) {
1379 struct page *page = pvec.pages[i];
1380 pgoff_t index;
1381 int skip_page =
1382 (PageDirty(page) || PageWriteback(page));
1383
1384 if (!skip_page)
1385 skip_page = !trylock_page(page);
1386
1387 /*
1388 * We really shouldn't be looking at the ->index of an
1389 * unlocked page. But we're not allowed to lock these
1390 * pages. So we rely upon nobody altering the ->index
1391 * of this (pinned-by-us) page.
1392 */
1393 index = page->index;
1394 if (index > next)
1395 next = index;
1396 next++;
1397
1398 if (skip_page)
1399 continue;
1400
1401 generic_error_remove_page(mapping, page);
1402 unlock_page(page);
1403 }
1404 pagevec_release(&pvec);
1405 cond_resched();
1406 }
1407}
1408
1409/*
1410 * Invalidate inode pages in a worker thread. (This can't be done 1366 * Invalidate inode pages in a worker thread. (This can't be done
1411 * in the message handler context.) 1367 * in the message handler context.)
1412 */ 1368 */
@@ -1429,7 +1385,7 @@ static void ceph_invalidate_work(struct work_struct *work)
1429 orig_gen = ci->i_rdcache_gen; 1385 orig_gen = ci->i_rdcache_gen;
1430 spin_unlock(&inode->i_lock); 1386 spin_unlock(&inode->i_lock);
1431 1387
1432 ceph_invalidate_nondirty_pages(inode->i_mapping); 1388 truncate_inode_pages(&inode->i_data, 0);
1433 1389
1434 spin_lock(&inode->i_lock); 1390 spin_lock(&inode->i_lock);
1435 if (orig_gen == ci->i_rdcache_gen && 1391 if (orig_gen == ci->i_rdcache_gen &&