aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteven Whitehouse <swhiteho@redhat.com>2011-07-14 03:59:44 -0400
committerSteven Whitehouse <swhiteho@redhat.com>2011-07-14 03:59:44 -0400
commit380f7c65a7eb3288e4b6812acf3474a1de230707 (patch)
tree7abe4b4ce390afc6b4d2bc7cae0ec298fe2efa3c
parent3942ae5319640ced5844b75f44884e4bcb8a2f16 (diff)
GFS2: Resolve inode eviction and ail list interaction bug
This patch contains a few misc fixes which resolve a recently reported issue. This patch has been a real team effort and has received a lot of testing. The first issue is that the ail lock needs to be held over a few more operations. The lock thats added into gfs2_releasepage() may possibly be a candidate for replacing with RCU at some future point, but at this stage we've gone for the obvious fix. The second issue is that gfs2_write_inode() can end up calling a glock recursively when called from gfs2_evict_inode() via the syncing code, so it needs a guard added. The third issue is that we either need to not truncate the metadata pages of inodes which have zero link count, but which we cannot deallocate due to them still being in use by other nodes, or we need to ensure that those pages have all made it through the journal and ail lists first. This patch takes the former approach, but the latter has also been tested and there is nothing to choose between them performance-wise. So again, we could revise that decision in the future. Also, the inode eviction process is now better documented. Signed-off-by: Steven Whitehouse <swhiteho@redhat.com> Tested-by: Bob Peterson <rpeterso@redhat.com> Tested-by: Abhijith Das <adas@redhat.com> Reported-by: Barry J. Marson <bmarson@redhat.com> Reported-by: David Teigland <teigland@redhat.com>
-rw-r--r--fs/gfs2/aops.c3
-rw-r--r--fs/gfs2/glops.c4
-rw-r--r--fs/gfs2/log.c1
-rw-r--r--fs/gfs2/super.c36
4 files changed, 36 insertions, 8 deletions
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 802ac5eeba28..f9fbbe96c222 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -1069,6 +1069,7 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
1069 return 0; 1069 return 0;
1070 1070
1071 gfs2_log_lock(sdp); 1071 gfs2_log_lock(sdp);
1072 spin_lock(&sdp->sd_ail_lock);
1072 head = bh = page_buffers(page); 1073 head = bh = page_buffers(page);
1073 do { 1074 do {
1074 if (atomic_read(&bh->b_count)) 1075 if (atomic_read(&bh->b_count))
@@ -1080,6 +1081,7 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
1080 goto not_possible; 1081 goto not_possible;
1081 bh = bh->b_this_page; 1082 bh = bh->b_this_page;
1082 } while(bh != head); 1083 } while(bh != head);
1084 spin_unlock(&sdp->sd_ail_lock);
1083 gfs2_log_unlock(sdp); 1085 gfs2_log_unlock(sdp);
1084 1086
1085 head = bh = page_buffers(page); 1087 head = bh = page_buffers(page);
@@ -1112,6 +1114,7 @@ not_possible: /* Should never happen */
1112 WARN_ON(buffer_dirty(bh)); 1114 WARN_ON(buffer_dirty(bh));
1113 WARN_ON(buffer_pinned(bh)); 1115 WARN_ON(buffer_pinned(bh));
1114cannot_release: 1116cannot_release:
1117 spin_unlock(&sdp->sd_ail_lock);
1115 gfs2_log_unlock(sdp); 1118 gfs2_log_unlock(sdp);
1116 return 0; 1119 return 0;
1117} 1120}
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 712b722030a1..2cca29316bd6 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -47,10 +47,10 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl)
47 bd_ail_gl_list); 47 bd_ail_gl_list);
48 bh = bd->bd_bh; 48 bh = bd->bd_bh;
49 gfs2_remove_from_ail(bd); 49 gfs2_remove_from_ail(bd);
50 spin_unlock(&sdp->sd_ail_lock);
51
52 bd->bd_bh = NULL; 50 bd->bd_bh = NULL;
53 bh->b_private = NULL; 51 bh->b_private = NULL;
52 spin_unlock(&sdp->sd_ail_lock);
53
54 bd->bd_blkno = bh->b_blocknr; 54 bd->bd_blkno = bh->b_blocknr;
55 gfs2_log_lock(sdp); 55 gfs2_log_lock(sdp);
56 gfs2_assert_withdraw(sdp, !buffer_busy(bh)); 56 gfs2_assert_withdraw(sdp, !buffer_busy(bh));
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 903115f2bb34..85c62923ee29 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -903,6 +903,7 @@ void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
903 if (gfs2_ail1_empty(sdp)) 903 if (gfs2_ail1_empty(sdp))
904 break; 904 break;
905 } 905 }
906 gfs2_log_flush(sdp, NULL);
906} 907}
907 908
908static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp) 909static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp)
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index ed540e7018be..fb0edf735483 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -757,13 +757,17 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc)
757 struct timespec atime; 757 struct timespec atime;
758 struct gfs2_dinode *di; 758 struct gfs2_dinode *di;
759 int ret = -EAGAIN; 759 int ret = -EAGAIN;
760 int unlock_required = 0;
760 761
761 /* Skip timestamp update, if this is from a memalloc */ 762 /* Skip timestamp update, if this is from a memalloc */
762 if (current->flags & PF_MEMALLOC) 763 if (current->flags & PF_MEMALLOC)
763 goto do_flush; 764 goto do_flush;
764 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 765 if (!gfs2_glock_is_locked_by_me(ip->i_gl)) {
765 if (ret) 766 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
766 goto do_flush; 767 if (ret)
768 goto do_flush;
769 unlock_required = 1;
770 }
767 ret = gfs2_trans_begin(sdp, RES_DINODE, 0); 771 ret = gfs2_trans_begin(sdp, RES_DINODE, 0);
768 if (ret) 772 if (ret)
769 goto do_unlock; 773 goto do_unlock;
@@ -780,7 +784,8 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc)
780 } 784 }
781 gfs2_trans_end(sdp); 785 gfs2_trans_end(sdp);
782do_unlock: 786do_unlock:
783 gfs2_glock_dq_uninit(&gh); 787 if (unlock_required)
788 gfs2_glock_dq_uninit(&gh);
784do_flush: 789do_flush:
785 if (wbc->sync_mode == WB_SYNC_ALL) 790 if (wbc->sync_mode == WB_SYNC_ALL)
786 gfs2_log_flush(GFS2_SB(inode), ip->i_gl); 791 gfs2_log_flush(GFS2_SB(inode), ip->i_gl);
@@ -1427,7 +1432,20 @@ out:
1427 return error; 1432 return error;
1428} 1433}
1429 1434
1430/* 1435/**
1436 * gfs2_evict_inode - Remove an inode from cache
1437 * @inode: The inode to evict
1438 *
1439 * There are three cases to consider:
1440 * 1. i_nlink == 0, we are final opener (and must deallocate)
1441 * 2. i_nlink == 0, we are not the final opener (and cannot deallocate)
1442 * 3. i_nlink > 0
1443 *
1444 * If the fs is read only, then we have to treat all cases as per #3
1445 * since we are unable to do any deallocation. The inode will be
1446 * deallocated by the next read/write node to attempt an allocation
1447 * in the same resource group
1448 *
1431 * We have to (at the moment) hold the inodes main lock to cover 1449 * We have to (at the moment) hold the inodes main lock to cover
1432 * the gap between unlocking the shared lock on the iopen lock and 1450 * the gap between unlocking the shared lock on the iopen lock and
1433 * taking the exclusive lock. I'd rather do a shared -> exclusive 1451 * taking the exclusive lock. I'd rather do a shared -> exclusive
@@ -1470,6 +1488,8 @@ static void gfs2_evict_inode(struct inode *inode)
1470 if (error) 1488 if (error)
1471 goto out_truncate; 1489 goto out_truncate;
1472 1490
1491 /* Case 1 starts here */
1492
1473 if (S_ISDIR(inode->i_mode) && 1493 if (S_ISDIR(inode->i_mode) &&
1474 (ip->i_diskflags & GFS2_DIF_EXHASH)) { 1494 (ip->i_diskflags & GFS2_DIF_EXHASH)) {
1475 error = gfs2_dir_exhash_dealloc(ip); 1495 error = gfs2_dir_exhash_dealloc(ip);
@@ -1493,13 +1513,16 @@ static void gfs2_evict_inode(struct inode *inode)
1493 goto out_unlock; 1513 goto out_unlock;
1494 1514
1495out_truncate: 1515out_truncate:
1516 /* Case 2 starts here */
1496 error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); 1517 error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
1497 if (error) 1518 if (error)
1498 goto out_unlock; 1519 goto out_unlock;
1499 gfs2_final_release_pages(ip); 1520 /* Needs to be done before glock release & also in a transaction */
1521 truncate_inode_pages(&inode->i_data, 0);
1500 gfs2_trans_end(sdp); 1522 gfs2_trans_end(sdp);
1501 1523
1502out_unlock: 1524out_unlock:
1525 /* Error path for case 1 */
1503 if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) 1526 if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags))
1504 gfs2_glock_dq(&ip->i_iopen_gh); 1527 gfs2_glock_dq(&ip->i_iopen_gh);
1505 gfs2_holder_uninit(&ip->i_iopen_gh); 1528 gfs2_holder_uninit(&ip->i_iopen_gh);
@@ -1507,6 +1530,7 @@ out_unlock:
1507 if (error && error != GLR_TRYFAILED && error != -EROFS) 1530 if (error && error != GLR_TRYFAILED && error != -EROFS)
1508 fs_warn(sdp, "gfs2_evict_inode: %d\n", error); 1531 fs_warn(sdp, "gfs2_evict_inode: %d\n", error);
1509out: 1532out:
1533 /* Case 3 starts here */
1510 truncate_inode_pages(&inode->i_data, 0); 1534 truncate_inode_pages(&inode->i_data, 0);
1511 end_writeback(inode); 1535 end_writeback(inode);
1512 1536