aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteven Whitehouse <swhiteho@redhat.com>2006-11-23 10:51:34 -0500
committerSteven Whitehouse <swhiteho@redhat.com>2006-11-30 10:36:42 -0500
commitb004157ab5b374a498a5874cda68c389219d23e7 (patch)
tree1e7d7d5c62f3e12cc453e763bbff139b47458be4
parentae619320b22f8e0b2bbe4a3a5ac2f9ccf08d7ec2 (diff)
[GFS2] Fix journal flush problem
This fixes a bug which resulted in poor performance due to flushing the journal too often. The code path in question was via the inode_go_sync() function in glops.c. The solution is not to flush the journal immediately when inodes are ejected from memory, but batch up the work for glockd to deal with later on. This means that glocks may now live on beyond the end of the lifetime of their inodes (but not very much longer in the normal case). Also fixed in this patch is a bug (which was hidden by the bug mentioned above) in calculation of the number of free journal blocks. The gfs2_logd process has been altered to be more responsive to the journal filling up. We now wake it up when the number of uncommitted journal blocks has reached the threshold level rather than trying to flush directly at the end of each transaction. This again means doing fewer, but larger, log flushes in general. Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
-rw-r--r--fs/gfs2/daemon.c7
-rw-r--r--fs/gfs2/glock.c17
-rw-r--r--fs/gfs2/glock.h1
-rw-r--r--fs/gfs2/glops.c93
-rw-r--r--fs/gfs2/log.c17
-rw-r--r--fs/gfs2/meta_io.c3
-rw-r--r--fs/gfs2/ops_super.c7
7 files changed, 46 insertions, 99 deletions
diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c
index cab1f68d4685..683cb5bda870 100644
--- a/fs/gfs2/daemon.c
+++ b/fs/gfs2/daemon.c
@@ -112,6 +112,7 @@ int gfs2_logd(void *data)
112 struct gfs2_sbd *sdp = data; 112 struct gfs2_sbd *sdp = data;
113 struct gfs2_holder ji_gh; 113 struct gfs2_holder ji_gh;
114 unsigned long t; 114 unsigned long t;
115 int need_flush;
115 116
116 while (!kthread_should_stop()) { 117 while (!kthread_should_stop()) {
117 /* Advance the log tail */ 118 /* Advance the log tail */
@@ -120,8 +121,10 @@ int gfs2_logd(void *data)
120 gfs2_tune_get(sdp, gt_log_flush_secs) * HZ; 121 gfs2_tune_get(sdp, gt_log_flush_secs) * HZ;
121 122
122 gfs2_ail1_empty(sdp, DIO_ALL); 123 gfs2_ail1_empty(sdp, DIO_ALL);
123 124 gfs2_log_lock(sdp);
124 if (time_after_eq(jiffies, t)) { 125 need_flush = sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks);
126 gfs2_log_unlock(sdp);
127 if (need_flush || time_after_eq(jiffies, t)) {
125 gfs2_log_flush(sdp, NULL); 128 gfs2_log_flush(sdp, NULL);
126 sdp->sd_log_flush_time = jiffies; 129 sdp->sd_log_flush_time = jiffies;
127 } 130 }
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index b8ba4d5c1d9e..3c2ff81c84e2 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -785,21 +785,6 @@ out:
785 gfs2_holder_put(new_gh); 785 gfs2_holder_put(new_gh);
786} 786}
787 787
788void gfs2_glock_inode_squish(struct inode *inode)
789{
790 struct gfs2_holder gh;
791 struct gfs2_glock *gl = GFS2_I(inode)->i_gl;
792 gfs2_holder_init(gl, LM_ST_UNLOCKED, 0, &gh);
793 set_bit(HIF_DEMOTE, &gh.gh_iflags);
794 spin_lock(&gl->gl_spin);
795 gfs2_assert(inode->i_sb->s_fs_info, list_empty(&gl->gl_holders));
796 list_add_tail(&gh.gh_list, &gl->gl_waiters2);
797 run_queue(gl);
798 spin_unlock(&gl->gl_spin);
799 wait_for_completion(&gh.gh_wait);
800 gfs2_holder_uninit(&gh);
801}
802
803/** 788/**
804 * state_change - record that the glock is now in a different state 789 * state_change - record that the glock is now in a different state
805 * @gl: the glock 790 * @gl: the glock
@@ -1920,7 +1905,7 @@ out:
1920 1905
1921static void scan_glock(struct gfs2_glock *gl) 1906static void scan_glock(struct gfs2_glock *gl)
1922{ 1907{
1923 if (gl->gl_ops == &gfs2_inode_glops) 1908 if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object)
1924 return; 1909 return;
1925 1910
1926 if (gfs2_glmutex_trylock(gl)) { 1911 if (gfs2_glmutex_trylock(gl)) {
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index a331bf8175ea..fb39108fc05c 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -106,7 +106,6 @@ void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs);
106void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number, 106void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number,
107 const struct gfs2_glock_operations *glops, 107 const struct gfs2_glock_operations *glops,
108 unsigned int state, int flags); 108 unsigned int state, int flags);
109void gfs2_glock_inode_squish(struct inode *inode);
110 109
111/** 110/**
112 * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock 111 * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 60561ca070c2..b068d10bcb6e 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -107,70 +107,6 @@ static void gfs2_pte_inval(struct gfs2_glock *gl)
107} 107}
108 108
109/** 109/**
110 * gfs2_page_inval - Invalidate all pages associated with a glock
111 * @gl: the glock
112 *
113 */
114
115static void gfs2_page_inval(struct gfs2_glock *gl)
116{
117 struct gfs2_inode *ip;
118 struct inode *inode;
119
120 ip = gl->gl_object;
121 inode = &ip->i_inode;
122 if (!ip || !S_ISREG(inode->i_mode))
123 return;
124
125 truncate_inode_pages(inode->i_mapping, 0);
126 gfs2_assert_withdraw(GFS2_SB(&ip->i_inode), !inode->i_mapping->nrpages);
127 clear_bit(GIF_PAGED, &ip->i_flags);
128}
129
130/**
131 * gfs2_page_wait - Wait for writeback of data
132 * @gl: the glock
133 *
134 * Syncs data (not metadata) for a regular file.
135 * No-op for all other types.
136 */
137
138static void gfs2_page_wait(struct gfs2_glock *gl)
139{
140 struct gfs2_inode *ip = gl->gl_object;
141 struct inode *inode = &ip->i_inode;
142 struct address_space *mapping = inode->i_mapping;
143 int error;
144
145 if (!S_ISREG(inode->i_mode))
146 return;
147
148 error = filemap_fdatawait(mapping);
149
150 /* Put back any errors cleared by filemap_fdatawait()
151 so they can be caught by someone who can pass them
152 up to user space. */
153
154 if (error == -ENOSPC)
155 set_bit(AS_ENOSPC, &mapping->flags);
156 else if (error)
157 set_bit(AS_EIO, &mapping->flags);
158
159}
160
161static void gfs2_page_writeback(struct gfs2_glock *gl)
162{
163 struct gfs2_inode *ip = gl->gl_object;
164 struct inode *inode = &ip->i_inode;
165 struct address_space *mapping = inode->i_mapping;
166
167 if (!S_ISREG(inode->i_mode))
168 return;
169
170 filemap_fdatawrite(mapping);
171}
172
173/**
174 * meta_go_sync - sync out the metadata for this glock 110 * meta_go_sync - sync out the metadata for this glock
175 * @gl: the glock 111 * @gl: the glock
176 * 112 *
@@ -264,11 +200,24 @@ static void inode_go_drop_th(struct gfs2_glock *gl)
264 200
265static void inode_go_sync(struct gfs2_glock *gl) 201static void inode_go_sync(struct gfs2_glock *gl)
266{ 202{
203 struct gfs2_inode *ip = gl->gl_object;
204
205 if (ip && !S_ISREG(ip->i_inode.i_mode))
206 ip = NULL;
207
267 if (test_bit(GLF_DIRTY, &gl->gl_flags)) { 208 if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
268 gfs2_page_writeback(gl);
269 gfs2_log_flush(gl->gl_sbd, gl); 209 gfs2_log_flush(gl->gl_sbd, gl);
210 if (ip)
211 filemap_fdatawrite(ip->i_inode.i_mapping);
270 gfs2_meta_sync(gl); 212 gfs2_meta_sync(gl);
271 gfs2_page_wait(gl); 213 if (ip) {
214 struct address_space *mapping = ip->i_inode.i_mapping;
215 int error = filemap_fdatawait(mapping);
216 if (error == -ENOSPC)
217 set_bit(AS_ENOSPC, &mapping->flags);
218 else if (error)
219 set_bit(AS_EIO, &mapping->flags);
220 }
272 clear_bit(GLF_DIRTY, &gl->gl_flags); 221 clear_bit(GLF_DIRTY, &gl->gl_flags);
273 gfs2_ail_empty_gl(gl); 222 gfs2_ail_empty_gl(gl);
274 } 223 }
@@ -283,14 +232,20 @@ static void inode_go_sync(struct gfs2_glock *gl)
283 232
284static void inode_go_inval(struct gfs2_glock *gl, int flags) 233static void inode_go_inval(struct gfs2_glock *gl, int flags)
285{ 234{
235 struct gfs2_inode *ip = gl->gl_object;
286 int meta = (flags & DIO_METADATA); 236 int meta = (flags & DIO_METADATA);
287 237
288 if (meta) { 238 if (meta) {
289 struct gfs2_inode *ip = gl->gl_object;
290 gfs2_meta_inval(gl); 239 gfs2_meta_inval(gl);
291 set_bit(GIF_INVALID, &ip->i_flags); 240 if (ip)
241 set_bit(GIF_INVALID, &ip->i_flags);
242 }
243
244 if (ip && S_ISREG(ip->i_inode.i_mode)) {
245 truncate_inode_pages(ip->i_inode.i_mapping, 0);
246 gfs2_assert_withdraw(GFS2_SB(&ip->i_inode), !ip->i_inode.i_mapping->nrpages);
247 clear_bit(GIF_PAGED, &ip->i_flags);
292 } 248 }
293 gfs2_page_inval(gl);
294} 249}
295 250
296/** 251/**
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 0cace3da9dbb..6456fc39aace 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -261,6 +261,12 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
261 * @sdp: The GFS2 superblock 261 * @sdp: The GFS2 superblock
262 * @blks: The number of blocks to reserve 262 * @blks: The number of blocks to reserve
263 * 263 *
264 * Note that we never give out the last 6 blocks of the journal. Thats
265 * due to the fact that there is are a small number of header blocks
266 * associated with each log flush. The exact number can't be known until
267 * flush time, so we ensure that we have just enough free blocks at all
268 * times to avoid running out during a log flush.
269 *
264 * Returns: errno 270 * Returns: errno
265 */ 271 */
266 272
@@ -274,7 +280,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
274 280
275 mutex_lock(&sdp->sd_log_reserve_mutex); 281 mutex_lock(&sdp->sd_log_reserve_mutex);
276 gfs2_log_lock(sdp); 282 gfs2_log_lock(sdp);
277 while(sdp->sd_log_blks_free <= blks) { 283 while(sdp->sd_log_blks_free <= (blks + 6)) {
278 gfs2_log_unlock(sdp); 284 gfs2_log_unlock(sdp);
279 gfs2_ail1_empty(sdp, 0); 285 gfs2_ail1_empty(sdp, 0);
280 gfs2_log_flush(sdp, NULL); 286 gfs2_log_flush(sdp, NULL);
@@ -643,12 +649,9 @@ void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
643 up_read(&sdp->sd_log_flush_lock); 649 up_read(&sdp->sd_log_flush_lock);
644 650
645 gfs2_log_lock(sdp); 651 gfs2_log_lock(sdp);
646 if (sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks)) { 652 if (sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks))
647 gfs2_log_unlock(sdp); 653 wake_up_process(sdp->sd_logd_process);
648 gfs2_log_flush(sdp, NULL); 654 gfs2_log_unlock(sdp);
649 } else {
650 gfs2_log_unlock(sdp);
651 }
652} 655}
653 656
654/** 657/**
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 3912d6a4b1e6..939a09f6e885 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -472,6 +472,9 @@ int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
472 struct buffer_head *bh = NULL, **bh_slot = ip->i_cache + height; 472 struct buffer_head *bh = NULL, **bh_slot = ip->i_cache + height;
473 int in_cache = 0; 473 int in_cache = 0;
474 474
475 BUG_ON(!gl);
476 BUG_ON(!sdp);
477
475 spin_lock(&ip->i_spin); 478 spin_lock(&ip->i_spin);
476 if (*bh_slot && (*bh_slot)->b_blocknr == num) { 479 if (*bh_slot && (*bh_slot)->b_blocknr == num) {
477 bh = *bh_slot; 480 bh = *bh_slot;
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index 863517569223..7685b46f934b 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -157,7 +157,8 @@ static void gfs2_write_super(struct super_block *sb)
157static int gfs2_sync_fs(struct super_block *sb, int wait) 157static int gfs2_sync_fs(struct super_block *sb, int wait)
158{ 158{
159 sb->s_dirt = 0; 159 sb->s_dirt = 0;
160 gfs2_log_flush(sb->s_fs_info, NULL); 160 if (wait)
161 gfs2_log_flush(sb->s_fs_info, NULL);
161 return 0; 162 return 0;
162} 163}
163 164
@@ -293,8 +294,6 @@ static void gfs2_clear_inode(struct inode *inode)
293 */ 294 */
294 if (inode->i_private) { 295 if (inode->i_private) {
295 struct gfs2_inode *ip = GFS2_I(inode); 296 struct gfs2_inode *ip = GFS2_I(inode);
296 gfs2_glock_inode_squish(inode);
297 gfs2_assert(inode->i_sb->s_fs_info, ip->i_gl->gl_state == LM_ST_UNLOCKED);
298 ip->i_gl->gl_object = NULL; 297 ip->i_gl->gl_object = NULL;
299 gfs2_glock_schedule_for_reclaim(ip->i_gl); 298 gfs2_glock_schedule_for_reclaim(ip->i_gl);
300 gfs2_glock_put(ip->i_gl); 299 gfs2_glock_put(ip->i_gl);
@@ -395,7 +394,7 @@ static void gfs2_delete_inode(struct inode *inode)
395 if (!inode->i_private) 394 if (!inode->i_private)
396 goto out; 395 goto out;
397 396
398 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &gh); 397 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB, &gh);
399 if (unlikely(error)) { 398 if (unlikely(error)) {
400 gfs2_glock_dq_uninit(&ip->i_iopen_gh); 399 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
401 goto out; 400 goto out;