aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-09-19 14:40:13 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-09-19 14:40:13 -0400
commita78feb7c8a0687913f9869c05811054595a42a76 (patch)
treeb89ceba977943e58475e28cf6acd6377a8d84320
parent91fe7d7cdd7ebb0b6c01f201a23824ab5b466ada (diff)
parentb394e43e995d08821588a22561c6a71a63b4ff27 (diff)
Merge branch 'for-linus' of git://oss.sgi.com:8090/xfs/xfs-2.6
* 'for-linus' of git://oss.sgi.com:8090/xfs/xfs-2.6: [XFS] Avoid replaying inode buffer initialisation log items if on-disk version is newer. [XFS] Ensure file size updates have been completed before writing inode to disk. [XFS] On-demand reaping of the MRU cache
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c4
-rw-r--r--fs/xfs/xfs_buf_item.h5
-rw-r--r--fs/xfs/xfs_filestream.c3
-rw-r--r--fs/xfs/xfs_log_recover.c51
-rw-r--r--fs/xfs/xfs_mru_cache.c72
-rw-r--r--fs/xfs/xfs_mru_cache.h6
-rw-r--r--fs/xfs/xfs_trans_buf.c1
-rw-r--r--fs/xfs/xfs_vnodeops.c20
9 files changed, 101 insertions, 62 deletions
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index d9c40fe64195..5f152f60d74d 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -181,6 +181,7 @@ xfs_setfilesize(
181 ip->i_d.di_size = isize; 181 ip->i_d.di_size = isize;
182 ip->i_update_core = 1; 182 ip->i_update_core = 1;
183 ip->i_update_size = 1; 183 ip->i_update_size = 1;
184 mark_inode_dirty_sync(vn_to_inode(ioend->io_vnode));
184 } 185 }
185 186
186 xfs_iunlock(ip, XFS_ILOCK_EXCL); 187 xfs_iunlock(ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 4528f9a3f304..491d1f4f202d 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -415,8 +415,10 @@ xfs_fs_write_inode(
415 415
416 if (vp) { 416 if (vp) {
417 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 417 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
418 if (sync) 418 if (sync) {
419 filemap_fdatawait(inode->i_mapping);
419 flags |= FLUSH_SYNC; 420 flags |= FLUSH_SYNC;
421 }
420 error = bhv_vop_iflush(vp, flags); 422 error = bhv_vop_iflush(vp, flags);
421 if (error == EAGAIN) 423 if (error == EAGAIN)
422 error = sync? bhv_vop_iflush(vp, flags | FLUSH_LOG) : 0; 424 error = sync? bhv_vop_iflush(vp, flags | FLUSH_LOG) : 0;
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index d7e136143066..fa25b7dcc6c3 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -52,6 +52,11 @@ typedef struct xfs_buf_log_format_t {
52#define XFS_BLI_UDQUOT_BUF 0x4 52#define XFS_BLI_UDQUOT_BUF 0x4
53#define XFS_BLI_PDQUOT_BUF 0x8 53#define XFS_BLI_PDQUOT_BUF 0x8
54#define XFS_BLI_GDQUOT_BUF 0x10 54#define XFS_BLI_GDQUOT_BUF 0x10
55/*
56 * This flag indicates that the buffer contains newly allocated
57 * inodes.
58 */
59#define XFS_BLI_INODE_NEW_BUF 0x20
55 60
56#define XFS_BLI_CHUNK 128 61#define XFS_BLI_CHUNK 128
57#define XFS_BLI_SHIFT 7 62#define XFS_BLI_SHIFT 7
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index ce2278611bb7..16f8e175167d 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -467,8 +467,7 @@ void
467xfs_filestream_flush( 467xfs_filestream_flush(
468 xfs_mount_t *mp) 468 xfs_mount_t *mp)
469{ 469{
470 /* point in time flush, so keep the reaper running */ 470 xfs_mru_cache_flush(mp->m_filestream);
471 xfs_mru_cache_flush(mp->m_filestream, 1);
472} 471}
473 472
474/* 473/*
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 8ae6e8e5f3db..dacb19739cc2 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1874,6 +1874,7 @@ xlog_recover_do_inode_buffer(
1874/*ARGSUSED*/ 1874/*ARGSUSED*/
1875STATIC void 1875STATIC void
1876xlog_recover_do_reg_buffer( 1876xlog_recover_do_reg_buffer(
1877 xfs_mount_t *mp,
1877 xlog_recover_item_t *item, 1878 xlog_recover_item_t *item,
1878 xfs_buf_t *bp, 1879 xfs_buf_t *bp,
1879 xfs_buf_log_format_t *buf_f) 1880 xfs_buf_log_format_t *buf_f)
@@ -1884,6 +1885,50 @@ xlog_recover_do_reg_buffer(
1884 unsigned int *data_map = NULL; 1885 unsigned int *data_map = NULL;
1885 unsigned int map_size = 0; 1886 unsigned int map_size = 0;
1886 int error; 1887 int error;
1888 int stale_buf = 1;
1889
1890 /*
1891 * Scan through the on-disk inode buffer and attempt to
1892 * determine if it has been written to since it was logged.
1893 *
1894 * - If any of the magic numbers are incorrect then the buffer is stale
1895 * - If any of the modes are non-zero then the buffer is not stale
1896 * - If all of the modes are zero and at least one of the generation
1897 * counts is non-zero then the buffer is stale
1898 *
1899 * If the end result is a stale buffer then the log buffer is replayed
1900 * otherwise it is skipped.
1901 *
1902 * This heuristic is not perfect. It can be improved by scanning the
1903 * entire inode chunk for evidence that any of the inode clusters have
1904 * been updated. To fix this problem completely we will need a major
1905 * architectural change to the logging system.
1906 */
1907 if (buf_f->blf_flags & XFS_BLI_INODE_NEW_BUF) {
1908 xfs_dinode_t *dip;
1909 int inodes_per_buf;
1910 int mode_count = 0;
1911 int gen_count = 0;
1912
1913 stale_buf = 0;
1914 inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog;
1915 for (i = 0; i < inodes_per_buf; i++) {
1916 dip = (xfs_dinode_t *)xfs_buf_offset(bp,
1917 i * mp->m_sb.sb_inodesize);
1918 if (be16_to_cpu(dip->di_core.di_magic) !=
1919 XFS_DINODE_MAGIC) {
1920 stale_buf = 1;
1921 break;
1922 }
1923 if (be16_to_cpu(dip->di_core.di_mode))
1924 mode_count++;
1925 if (be16_to_cpu(dip->di_core.di_gen))
1926 gen_count++;
1927 }
1928
1929 if (!mode_count && gen_count)
1930 stale_buf = 1;
1931 }
1887 1932
1888 switch (buf_f->blf_type) { 1933 switch (buf_f->blf_type) {
1889 case XFS_LI_BUF: 1934 case XFS_LI_BUF:
@@ -1917,7 +1962,7 @@ xlog_recover_do_reg_buffer(
1917 -1, 0, XFS_QMOPT_DOWARN, 1962 -1, 0, XFS_QMOPT_DOWARN,
1918 "dquot_buf_recover"); 1963 "dquot_buf_recover");
1919 } 1964 }
1920 if (!error) 1965 if (!error && stale_buf)
1921 memcpy(xfs_buf_offset(bp, 1966 memcpy(xfs_buf_offset(bp,
1922 (uint)bit << XFS_BLI_SHIFT), /* dest */ 1967 (uint)bit << XFS_BLI_SHIFT), /* dest */
1923 item->ri_buf[i].i_addr, /* source */ 1968 item->ri_buf[i].i_addr, /* source */
@@ -2089,7 +2134,7 @@ xlog_recover_do_dquot_buffer(
2089 if (log->l_quotaoffs_flag & type) 2134 if (log->l_quotaoffs_flag & type)
2090 return; 2135 return;
2091 2136
2092 xlog_recover_do_reg_buffer(item, bp, buf_f); 2137 xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
2093} 2138}
2094 2139
2095/* 2140/*
@@ -2190,7 +2235,7 @@ xlog_recover_do_buffer_trans(
2190 (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { 2235 (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
2191 xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); 2236 xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
2192 } else { 2237 } else {
2193 xlog_recover_do_reg_buffer(item, bp, buf_f); 2238 xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
2194 } 2239 }
2195 if (error) 2240 if (error)
2196 return XFS_ERROR(error); 2241 return XFS_ERROR(error);
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index 7deb9e3cbbd3..e0b358c1c533 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -206,8 +206,11 @@ _xfs_mru_cache_list_insert(
206 */ 206 */
207 if (!_xfs_mru_cache_migrate(mru, now)) { 207 if (!_xfs_mru_cache_migrate(mru, now)) {
208 mru->time_zero = now; 208 mru->time_zero = now;
209 if (!mru->next_reap) 209 if (!mru->queued) {
210 mru->next_reap = mru->grp_count * mru->grp_time; 210 mru->queued = 1;
211 queue_delayed_work(xfs_mru_reap_wq, &mru->work,
212 mru->grp_count * mru->grp_time);
213 }
211 } else { 214 } else {
212 grp = (now - mru->time_zero) / mru->grp_time; 215 grp = (now - mru->time_zero) / mru->grp_time;
213 grp = (mru->lru_grp + grp) % mru->grp_count; 216 grp = (mru->lru_grp + grp) % mru->grp_count;
@@ -271,29 +274,26 @@ _xfs_mru_cache_reap(
271 struct work_struct *work) 274 struct work_struct *work)
272{ 275{
273 xfs_mru_cache_t *mru = container_of(work, xfs_mru_cache_t, work.work); 276 xfs_mru_cache_t *mru = container_of(work, xfs_mru_cache_t, work.work);
274 unsigned long now; 277 unsigned long now, next;
275 278
276 ASSERT(mru && mru->lists); 279 ASSERT(mru && mru->lists);
277 if (!mru || !mru->lists) 280 if (!mru || !mru->lists)
278 return; 281 return;
279 282
280 mutex_spinlock(&mru->lock); 283 mutex_spinlock(&mru->lock);
281 now = jiffies; 284 next = _xfs_mru_cache_migrate(mru, jiffies);
282 if (mru->reap_all || 285 _xfs_mru_cache_clear_reap_list(mru);
283 (mru->next_reap && time_after(now, mru->next_reap))) { 286
284 if (mru->reap_all) 287 mru->queued = next;
285 now += mru->grp_count * mru->grp_time * 2; 288 if ((mru->queued > 0)) {
286 mru->next_reap = _xfs_mru_cache_migrate(mru, now); 289 now = jiffies;
287 _xfs_mru_cache_clear_reap_list(mru); 290 if (next <= now)
291 next = 0;
292 else
293 next -= now;
294 queue_delayed_work(xfs_mru_reap_wq, &mru->work, next);
288 } 295 }
289 296
290 /*
291 * the process that triggered the reap_all is responsible
292 * for restating the periodic reap if it is required.
293 */
294 if (!mru->reap_all)
295 queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
296 mru->reap_all = 0;
297 mutex_spinunlock(&mru->lock, 0); 297 mutex_spinunlock(&mru->lock, 0);
298} 298}
299 299
@@ -352,7 +352,7 @@ xfs_mru_cache_create(
352 352
353 /* An extra list is needed to avoid reaping up to a grp_time early. */ 353 /* An extra list is needed to avoid reaping up to a grp_time early. */
354 mru->grp_count = grp_count + 1; 354 mru->grp_count = grp_count + 1;
355 mru->lists = kmem_alloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP); 355 mru->lists = kmem_zalloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP);
356 356
357 if (!mru->lists) { 357 if (!mru->lists) {
358 err = ENOMEM; 358 err = ENOMEM;
@@ -374,11 +374,6 @@ xfs_mru_cache_create(
374 mru->grp_time = grp_time; 374 mru->grp_time = grp_time;
375 mru->free_func = free_func; 375 mru->free_func = free_func;
376 376
377 /* start up the reaper event */
378 mru->next_reap = 0;
379 mru->reap_all = 0;
380 queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
381
382 *mrup = mru; 377 *mrup = mru;
383 378
384exit: 379exit:
@@ -394,35 +389,25 @@ exit:
394 * Call xfs_mru_cache_flush() to flush out all cached entries, calling their 389 * Call xfs_mru_cache_flush() to flush out all cached entries, calling their
395 * free functions as they're deleted. When this function returns, the caller is 390 * free functions as they're deleted. When this function returns, the caller is
396 * guaranteed that all the free functions for all the elements have finished 391 * guaranteed that all the free functions for all the elements have finished
397 * executing. 392 * executing and the reaper is not running.
398 *
399 * While we are flushing, we stop the periodic reaper event from triggering.
400 * Normally, we want to restart this periodic event, but if we are shutting
401 * down the cache we do not want it restarted. hence the restart parameter
402 * where 0 = do not restart reaper and 1 = restart reaper.
403 */ 393 */
404void 394void
405xfs_mru_cache_flush( 395xfs_mru_cache_flush(
406 xfs_mru_cache_t *mru, 396 xfs_mru_cache_t *mru)
407 int restart)
408{ 397{
409 if (!mru || !mru->lists) 398 if (!mru || !mru->lists)
410 return; 399 return;
411 400
412 cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work);
413
414 mutex_spinlock(&mru->lock); 401 mutex_spinlock(&mru->lock);
415 mru->reap_all = 1; 402 if (mru->queued) {
416 mutex_spinunlock(&mru->lock, 0); 403 mutex_spinunlock(&mru->lock, 0);
404 cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work);
405 mutex_spinlock(&mru->lock);
406 }
417 407
418 queue_work(xfs_mru_reap_wq, &mru->work.work); 408 _xfs_mru_cache_migrate(mru, jiffies + mru->grp_count * mru->grp_time);
419 flush_workqueue(xfs_mru_reap_wq); 409 _xfs_mru_cache_clear_reap_list(mru);
420 410
421 mutex_spinlock(&mru->lock);
422 WARN_ON_ONCE(mru->reap_all != 0);
423 mru->reap_all = 0;
424 if (restart)
425 queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
426 mutex_spinunlock(&mru->lock, 0); 411 mutex_spinunlock(&mru->lock, 0);
427} 412}
428 413
@@ -433,8 +418,7 @@ xfs_mru_cache_destroy(
433 if (!mru || !mru->lists) 418 if (!mru || !mru->lists)
434 return; 419 return;
435 420
436 /* we don't want the reaper to restart here */ 421 xfs_mru_cache_flush(mru);
437 xfs_mru_cache_flush(mru, 0);
438 422
439 kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists)); 423 kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists));
440 kmem_free(mru, sizeof(*mru)); 424 kmem_free(mru, sizeof(*mru));
diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h
index 624fd10ee8e5..dd58ea1bbebe 100644
--- a/fs/xfs/xfs_mru_cache.h
+++ b/fs/xfs/xfs_mru_cache.h
@@ -32,11 +32,9 @@ typedef struct xfs_mru_cache
32 unsigned int grp_time; /* Time period spanned by grps. */ 32 unsigned int grp_time; /* Time period spanned by grps. */
33 unsigned int lru_grp; /* Group containing time zero. */ 33 unsigned int lru_grp; /* Group containing time zero. */
34 unsigned long time_zero; /* Time first element was added. */ 34 unsigned long time_zero; /* Time first element was added. */
35 unsigned long next_reap; /* Time that the reaper should
36 next do something. */
37 unsigned int reap_all; /* if set, reap all lists */
38 xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */ 35 xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */
39 struct delayed_work work; /* Workqueue data for reaping. */ 36 struct delayed_work work; /* Workqueue data for reaping. */
37 unsigned int queued; /* work has been queued */
40} xfs_mru_cache_t; 38} xfs_mru_cache_t;
41 39
42int xfs_mru_cache_init(void); 40int xfs_mru_cache_init(void);
@@ -44,7 +42,7 @@ void xfs_mru_cache_uninit(void);
44int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms, 42int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms,
45 unsigned int grp_count, 43 unsigned int grp_count,
46 xfs_mru_cache_free_func_t free_func); 44 xfs_mru_cache_free_func_t free_func);
47void xfs_mru_cache_flush(xfs_mru_cache_t *mru, int restart); 45void xfs_mru_cache_flush(xfs_mru_cache_t *mru);
48void xfs_mru_cache_destroy(struct xfs_mru_cache *mru); 46void xfs_mru_cache_destroy(struct xfs_mru_cache *mru);
49int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key, 47int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key,
50 void *value); 48 void *value);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 60b6b898022b..95fff6872a2f 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -966,6 +966,7 @@ xfs_trans_inode_alloc_buf(
966 ASSERT(atomic_read(&bip->bli_refcount) > 0); 966 ASSERT(atomic_read(&bip->bli_refcount) > 0);
967 967
968 bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF; 968 bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;
969 bip->bli_format.blf_flags |= XFS_BLI_INODE_NEW_BUF;
969} 970}
970 971
971 972
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 1a5ad8cd97b0..603459229904 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1082,6 +1082,9 @@ xfs_fsync(
1082 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 1082 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
1083 return XFS_ERROR(EIO); 1083 return XFS_ERROR(EIO);
1084 1084
1085 if (flag & FSYNC_DATA)
1086 filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping);
1087
1085 /* 1088 /*
1086 * We always need to make sure that the required inode state 1089 * We always need to make sure that the required inode state
1087 * is safe on disk. The vnode might be clean but because 1090 * is safe on disk. The vnode might be clean but because
@@ -3769,12 +3772,16 @@ xfs_inode_flush(
3769 sync_lsn = log->l_last_sync_lsn; 3772 sync_lsn = log->l_last_sync_lsn;
3770 GRANT_UNLOCK(log, s); 3773 GRANT_UNLOCK(log, s);
3771 3774
3772 if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) <= 0)) 3775 if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) > 0)) {
3773 return 0; 3776 if (flags & FLUSH_SYNC)
3777 log_flags |= XFS_LOG_SYNC;
3778 error = xfs_log_force(mp, iip->ili_last_lsn, log_flags);
3779 if (error)
3780 return error;
3781 }
3774 3782
3775 if (flags & FLUSH_SYNC) 3783 if (ip->i_update_core == 0)
3776 log_flags |= XFS_LOG_SYNC; 3784 return 0;
3777 return xfs_log_force(mp, iip->ili_last_lsn, log_flags);
3778 } 3785 }
3779 } 3786 }
3780 3787
@@ -3788,9 +3795,6 @@ xfs_inode_flush(
3788 if (flags & FLUSH_INODE) { 3795 if (flags & FLUSH_INODE) {
3789 int flush_flags; 3796 int flush_flags;
3790 3797
3791 if (xfs_ipincount(ip))
3792 return EAGAIN;
3793
3794 if (flags & FLUSH_SYNC) { 3798 if (flags & FLUSH_SYNC) {
3795 xfs_ilock(ip, XFS_ILOCK_SHARED); 3799 xfs_ilock(ip, XFS_ILOCK_SHARED);
3796 xfs_iflock(ip); 3800 xfs_iflock(ip);