aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Chinner <dgc@sgi.com>2007-11-23 00:29:18 -0500
committerLachlan McIlroy <lachlan@redback.melbourne.sgi.com>2008-02-07 02:16:07 -0500
commit5d51eff4538bdfeb9b7a2ec030ee3b0980b067d2 (patch)
tree27606a34eab830ccc9a066f14f598af0c0c9027c
parente4143a1cf5973e3443c0650fc4c35292d3b7baa8 (diff)
[XFS] Fix inode allocation latency
The log force added in xfs_iget_core() has been a performance issue since it was introduced for tight loops that allocate then unlink a single file. under heavy writeback, this can introduce unnecessary latency due tothe log I/o getting stuck behind bulk data writes. Fix this latency problem by avoinding the need for the log force by moving the place we mark linux inode dirty to the transaction commit rather than on transaction completion. This also closes a potential hole in the sync code where a linux inode is not dirty between the time it is modified and the time the log buffer has been written to disk. SGI-PV: 972753 SGI-Modid: xfs-linux-melb:xfs-kern:30007a Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Christoph Hellwig <hch@infradead.org> Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c16
-rw-r--r--fs/xfs/xfs_iget.c18
-rw-r--r--fs/xfs/xfs_inode.c34
-rw-r--r--fs/xfs/xfs_inode.h1
-rw-r--r--fs/xfs/xfs_inode_item.c5
5 files changed, 23 insertions, 51 deletions
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index b5afcfcdc7d5..264b1e7dacf7 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -71,6 +71,22 @@ xfs_synchronize_atime(
71} 71}
72 72
73/* 73/*
74 * If the linux inode exists, mark it dirty.
75 * Used when commiting a dirty inode into a transaction so that
76 * the inode will get written back by the linux code
77 */
78void
79xfs_mark_inode_dirty_sync(
80 xfs_inode_t *ip)
81{
82 bhv_vnode_t *vp;
83
84 vp = XFS_ITOV_NULL(ip);
85 if (vp)
86 mark_inode_dirty_sync(vn_to_inode(vp));
87}
88
89/*
74 * Change the requested timestamp in the given inode. 90 * Change the requested timestamp in the given inode.
75 * We don't lock across timestamp updates, and we don't log them but 91 * We don't lock across timestamp updates, and we don't log them but
76 * we do record the fact that there is dirty information in core. 92 * we do record the fact that there is dirty information in core.
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index eecc33d3751f..f01b07687faf 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -140,27 +140,9 @@ again:
140 return ENOENT; 140 return ENOENT;
141 } 141 }
142 142
143 /*
144 * There may be transactions sitting in the
145 * incore log buffers or being flushed to disk
146 * at this time. We can't clear the
147 * XFS_IRECLAIMABLE flag until these
148 * transactions have hit the disk, otherwise we
149 * will void the guarantee the flag provides
150 * xfs_iunpin()
151 */
152 if (xfs_ipincount(ip)) {
153 read_unlock(&pag->pag_ici_lock);
154 xfs_log_force(mp, 0,
155 XFS_LOG_FORCE|XFS_LOG_SYNC);
156 XFS_STATS_INC(xs_ig_frecycle);
157 goto again;
158 }
159
160 xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); 143 xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
161 144
162 XFS_STATS_INC(xs_ig_found); 145 XFS_STATS_INC(xs_ig_found);
163
164 xfs_iflags_clear(ip, XFS_IRECLAIMABLE); 146 xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
165 read_unlock(&pag->pag_ici_lock); 147 read_unlock(&pag->pag_ici_lock);
166 148
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 597e0ed4d2b6..805cab7b2770 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2814,40 +2814,8 @@ xfs_iunpin(
2814{ 2814{
2815 ASSERT(atomic_read(&ip->i_pincount) > 0); 2815 ASSERT(atomic_read(&ip->i_pincount) > 0);
2816 2816
2817 if (atomic_dec_and_lock(&ip->i_pincount, &ip->i_flags_lock)) { 2817 if (atomic_dec_and_test(&ip->i_pincount))
2818
2819 /*
2820 * If the inode is currently being reclaimed, the link between
2821 * the bhv_vnode and the xfs_inode will be broken after the
2822 * XFS_IRECLAIM* flag is set. Hence, if these flags are not
2823 * set, then we can move forward and mark the linux inode dirty
2824 * knowing that it is still valid as it won't freed until after
2825 * the bhv_vnode<->xfs_inode link is broken in xfs_reclaim. The
2826 * i_flags_lock is used to synchronise the setting of the
2827 * XFS_IRECLAIM* flags and the breaking of the link, and so we
2828 * can execute atomically w.r.t to reclaim by holding this lock
2829 * here.
2830 *
2831 * However, we still need to issue the unpin wakeup call as the
2832 * inode reclaim may be blocked waiting for the inode to become
2833 * unpinned.
2834 */
2835
2836 if (!__xfs_iflags_test(ip, XFS_IRECLAIM|XFS_IRECLAIMABLE)) {
2837 bhv_vnode_t *vp = XFS_ITOV_NULL(ip);
2838 struct inode *inode = NULL;
2839
2840 BUG_ON(vp == NULL);
2841 inode = vn_to_inode(vp);
2842 BUG_ON(inode->i_state & I_CLEAR);
2843
2844 /* make sync come back and flush this inode */
2845 if (!(inode->i_state & (I_NEW|I_FREEING)))
2846 mark_inode_dirty_sync(inode);
2847 }
2848 spin_unlock(&ip->i_flags_lock);
2849 wake_up(&ip->i_ipin_wait); 2818 wake_up(&ip->i_ipin_wait);
2850 }
2851} 2819}
2852 2820
2853/* 2821/*
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index d8ed51e28cbb..bc869fd2f6ef 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -532,6 +532,7 @@ xfs_fsize_t xfs_file_last_byte(xfs_inode_t *);
532void xfs_lock_inodes(xfs_inode_t **, int, int, uint); 532void xfs_lock_inodes(xfs_inode_t **, int, int, uint);
533 533
534void xfs_synchronize_atime(xfs_inode_t *); 534void xfs_synchronize_atime(xfs_inode_t *);
535void xfs_mark_inode_dirty_sync(xfs_inode_t *);
535 536
536xfs_bmbt_rec_host_t *xfs_iext_get_ext(xfs_ifork_t *, xfs_extnum_t); 537xfs_bmbt_rec_host_t *xfs_iext_get_ext(xfs_ifork_t *, xfs_extnum_t);
537void xfs_iext_insert(xfs_ifork_t *, xfs_extnum_t, xfs_extnum_t, 538void xfs_iext_insert(xfs_ifork_t *, xfs_extnum_t, xfs_extnum_t,
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index e365b137ee4f..034ca7202295 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -274,6 +274,11 @@ xfs_inode_item_format(
274 */ 274 */
275 xfs_synchronize_atime(ip); 275 xfs_synchronize_atime(ip);
276 276
277 /*
278 * make sure the linux inode is dirty
279 */
280 xfs_mark_inode_dirty_sync(ip);
281
277 vecp->i_addr = (xfs_caddr_t)&ip->i_d; 282 vecp->i_addr = (xfs_caddr_t)&ip->i_d;
278 vecp->i_len = sizeof(xfs_dinode_core_t); 283 vecp->i_len = sizeof(xfs_dinode_core_t);
279 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE); 284 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE);