diff options
author | Steven Whitehouse <swhiteho@redhat.com> | 2011-07-14 03:59:44 -0400 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2011-07-14 03:59:44 -0400 |
commit | 380f7c65a7eb3288e4b6812acf3474a1de230707 (patch) | |
tree | 7abe4b4ce390afc6b4d2bc7cae0ec298fe2efa3c | |
parent | 3942ae5319640ced5844b75f44884e4bcb8a2f16 (diff) |
GFS2: Resolve inode eviction and ail list interaction bug
This patch contains a few misc fixes which resolve a recently
reported issue. This patch has been a real team effort and has
received a lot of testing.
The first issue is that the ail lock needs to be held over a few
more operations. The lock thats added into gfs2_releasepage() may
possibly be a candidate for replacing with RCU at some future
point, but at this stage we've gone for the obvious fix.
The second issue is that gfs2_write_inode() can end up calling
a glock recursively when called from gfs2_evict_inode() via the
syncing code, so it needs a guard added.
The third issue is that we either need to not truncate the metadata
pages of inodes which have zero link count, but which we cannot
deallocate due to them still being in use by other nodes, or we need
to ensure that those pages have all made it through the journal and
ail lists first. This patch takes the former approach, but the
latter has also been tested and there is nothing to choose between
them performance-wise. So again, we could revise that decision
in the future.
Also, the inode eviction process is now better documented.
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Tested-by: Bob Peterson <rpeterso@redhat.com>
Tested-by: Abhijith Das <adas@redhat.com>
Reported-by: Barry J. Marson <bmarson@redhat.com>
Reported-by: David Teigland <teigland@redhat.com>
-rw-r--r-- | fs/gfs2/aops.c | 3 | ||||
-rw-r--r-- | fs/gfs2/glops.c | 4 | ||||
-rw-r--r-- | fs/gfs2/log.c | 1 | ||||
-rw-r--r-- | fs/gfs2/super.c | 36 |
4 files changed, 36 insertions, 8 deletions
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 802ac5eeba28..f9fbbe96c222 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c | |||
@@ -1069,6 +1069,7 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask) | |||
1069 | return 0; | 1069 | return 0; |
1070 | 1070 | ||
1071 | gfs2_log_lock(sdp); | 1071 | gfs2_log_lock(sdp); |
1072 | spin_lock(&sdp->sd_ail_lock); | ||
1072 | head = bh = page_buffers(page); | 1073 | head = bh = page_buffers(page); |
1073 | do { | 1074 | do { |
1074 | if (atomic_read(&bh->b_count)) | 1075 | if (atomic_read(&bh->b_count)) |
@@ -1080,6 +1081,7 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask) | |||
1080 | goto not_possible; | 1081 | goto not_possible; |
1081 | bh = bh->b_this_page; | 1082 | bh = bh->b_this_page; |
1082 | } while(bh != head); | 1083 | } while(bh != head); |
1084 | spin_unlock(&sdp->sd_ail_lock); | ||
1083 | gfs2_log_unlock(sdp); | 1085 | gfs2_log_unlock(sdp); |
1084 | 1086 | ||
1085 | head = bh = page_buffers(page); | 1087 | head = bh = page_buffers(page); |
@@ -1112,6 +1114,7 @@ not_possible: /* Should never happen */ | |||
1112 | WARN_ON(buffer_dirty(bh)); | 1114 | WARN_ON(buffer_dirty(bh)); |
1113 | WARN_ON(buffer_pinned(bh)); | 1115 | WARN_ON(buffer_pinned(bh)); |
1114 | cannot_release: | 1116 | cannot_release: |
1117 | spin_unlock(&sdp->sd_ail_lock); | ||
1115 | gfs2_log_unlock(sdp); | 1118 | gfs2_log_unlock(sdp); |
1116 | return 0; | 1119 | return 0; |
1117 | } | 1120 | } |
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 712b722030a1..2cca29316bd6 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c | |||
@@ -47,10 +47,10 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl) | |||
47 | bd_ail_gl_list); | 47 | bd_ail_gl_list); |
48 | bh = bd->bd_bh; | 48 | bh = bd->bd_bh; |
49 | gfs2_remove_from_ail(bd); | 49 | gfs2_remove_from_ail(bd); |
50 | spin_unlock(&sdp->sd_ail_lock); | ||
51 | |||
52 | bd->bd_bh = NULL; | 50 | bd->bd_bh = NULL; |
53 | bh->b_private = NULL; | 51 | bh->b_private = NULL; |
52 | spin_unlock(&sdp->sd_ail_lock); | ||
53 | |||
54 | bd->bd_blkno = bh->b_blocknr; | 54 | bd->bd_blkno = bh->b_blocknr; |
55 | gfs2_log_lock(sdp); | 55 | gfs2_log_lock(sdp); |
56 | gfs2_assert_withdraw(sdp, !buffer_busy(bh)); | 56 | gfs2_assert_withdraw(sdp, !buffer_busy(bh)); |
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 903115f2bb34..85c62923ee29 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c | |||
@@ -903,6 +903,7 @@ void gfs2_meta_syncfs(struct gfs2_sbd *sdp) | |||
903 | if (gfs2_ail1_empty(sdp)) | 903 | if (gfs2_ail1_empty(sdp)) |
904 | break; | 904 | break; |
905 | } | 905 | } |
906 | gfs2_log_flush(sdp, NULL); | ||
906 | } | 907 | } |
907 | 908 | ||
908 | static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp) | 909 | static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp) |
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index ed540e7018be..fb0edf735483 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c | |||
@@ -757,13 +757,17 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
757 | struct timespec atime; | 757 | struct timespec atime; |
758 | struct gfs2_dinode *di; | 758 | struct gfs2_dinode *di; |
759 | int ret = -EAGAIN; | 759 | int ret = -EAGAIN; |
760 | int unlock_required = 0; | ||
760 | 761 | ||
761 | /* Skip timestamp update, if this is from a memalloc */ | 762 | /* Skip timestamp update, if this is from a memalloc */ |
762 | if (current->flags & PF_MEMALLOC) | 763 | if (current->flags & PF_MEMALLOC) |
763 | goto do_flush; | 764 | goto do_flush; |
764 | ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); | 765 | if (!gfs2_glock_is_locked_by_me(ip->i_gl)) { |
765 | if (ret) | 766 | ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); |
766 | goto do_flush; | 767 | if (ret) |
768 | goto do_flush; | ||
769 | unlock_required = 1; | ||
770 | } | ||
767 | ret = gfs2_trans_begin(sdp, RES_DINODE, 0); | 771 | ret = gfs2_trans_begin(sdp, RES_DINODE, 0); |
768 | if (ret) | 772 | if (ret) |
769 | goto do_unlock; | 773 | goto do_unlock; |
@@ -780,7 +784,8 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
780 | } | 784 | } |
781 | gfs2_trans_end(sdp); | 785 | gfs2_trans_end(sdp); |
782 | do_unlock: | 786 | do_unlock: |
783 | gfs2_glock_dq_uninit(&gh); | 787 | if (unlock_required) |
788 | gfs2_glock_dq_uninit(&gh); | ||
784 | do_flush: | 789 | do_flush: |
785 | if (wbc->sync_mode == WB_SYNC_ALL) | 790 | if (wbc->sync_mode == WB_SYNC_ALL) |
786 | gfs2_log_flush(GFS2_SB(inode), ip->i_gl); | 791 | gfs2_log_flush(GFS2_SB(inode), ip->i_gl); |
@@ -1427,7 +1432,20 @@ out: | |||
1427 | return error; | 1432 | return error; |
1428 | } | 1433 | } |
1429 | 1434 | ||
1430 | /* | 1435 | /** |
1436 | * gfs2_evict_inode - Remove an inode from cache | ||
1437 | * @inode: The inode to evict | ||
1438 | * | ||
1439 | * There are three cases to consider: | ||
1440 | * 1. i_nlink == 0, we are final opener (and must deallocate) | ||
1441 | * 2. i_nlink == 0, we are not the final opener (and cannot deallocate) | ||
1442 | * 3. i_nlink > 0 | ||
1443 | * | ||
1444 | * If the fs is read only, then we have to treat all cases as per #3 | ||
1445 | * since we are unable to do any deallocation. The inode will be | ||
1446 | * deallocated by the next read/write node to attempt an allocation | ||
1447 | * in the same resource group | ||
1448 | * | ||
1431 | * We have to (at the moment) hold the inodes main lock to cover | 1449 | * We have to (at the moment) hold the inodes main lock to cover |
1432 | * the gap between unlocking the shared lock on the iopen lock and | 1450 | * the gap between unlocking the shared lock on the iopen lock and |
1433 | * taking the exclusive lock. I'd rather do a shared -> exclusive | 1451 | * taking the exclusive lock. I'd rather do a shared -> exclusive |
@@ -1470,6 +1488,8 @@ static void gfs2_evict_inode(struct inode *inode) | |||
1470 | if (error) | 1488 | if (error) |
1471 | goto out_truncate; | 1489 | goto out_truncate; |
1472 | 1490 | ||
1491 | /* Case 1 starts here */ | ||
1492 | |||
1473 | if (S_ISDIR(inode->i_mode) && | 1493 | if (S_ISDIR(inode->i_mode) && |
1474 | (ip->i_diskflags & GFS2_DIF_EXHASH)) { | 1494 | (ip->i_diskflags & GFS2_DIF_EXHASH)) { |
1475 | error = gfs2_dir_exhash_dealloc(ip); | 1495 | error = gfs2_dir_exhash_dealloc(ip); |
@@ -1493,13 +1513,16 @@ static void gfs2_evict_inode(struct inode *inode) | |||
1493 | goto out_unlock; | 1513 | goto out_unlock; |
1494 | 1514 | ||
1495 | out_truncate: | 1515 | out_truncate: |
1516 | /* Case 2 starts here */ | ||
1496 | error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); | 1517 | error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); |
1497 | if (error) | 1518 | if (error) |
1498 | goto out_unlock; | 1519 | goto out_unlock; |
1499 | gfs2_final_release_pages(ip); | 1520 | /* Needs to be done before glock release & also in a transaction */ |
1521 | truncate_inode_pages(&inode->i_data, 0); | ||
1500 | gfs2_trans_end(sdp); | 1522 | gfs2_trans_end(sdp); |
1501 | 1523 | ||
1502 | out_unlock: | 1524 | out_unlock: |
1525 | /* Error path for case 1 */ | ||
1503 | if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) | 1526 | if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) |
1504 | gfs2_glock_dq(&ip->i_iopen_gh); | 1527 | gfs2_glock_dq(&ip->i_iopen_gh); |
1505 | gfs2_holder_uninit(&ip->i_iopen_gh); | 1528 | gfs2_holder_uninit(&ip->i_iopen_gh); |
@@ -1507,6 +1530,7 @@ out_unlock: | |||
1507 | if (error && error != GLR_TRYFAILED && error != -EROFS) | 1530 | if (error && error != GLR_TRYFAILED && error != -EROFS) |
1508 | fs_warn(sdp, "gfs2_evict_inode: %d\n", error); | 1531 | fs_warn(sdp, "gfs2_evict_inode: %d\n", error); |
1509 | out: | 1532 | out: |
1533 | /* Case 3 starts here */ | ||
1510 | truncate_inode_pages(&inode->i_data, 0); | 1534 | truncate_inode_pages(&inode->i_data, 0); |
1511 | end_writeback(inode); | 1535 | end_writeback(inode); |
1512 | 1536 | ||