aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@infradead.org>2012-02-29 04:53:52 -0500
committerBen Myers <bpm@sgi.com>2012-03-13 18:01:15 -0400
commit8a9c9980f24f6d86e0ec0150ed35fba45d0c9f88 (patch)
treedf976343a603bad7e6bdc20db31c64f752312434
parent281627df3eb55e1b729b9bb06fff5ff112929646 (diff)
xfs: log timestamp updates
Timestamps on regular files are the last metadata that XFS does not update transactionally. Now that we use the delaylog mode exclusively and made the log scode scale extremly well there is no need to bypass that code for timestamp updates. Logging all updates allows to drop a lot of code, and will allow for further performance improvements later on. Note that this patch drops optimized handling of fdatasync - it will be added back in a separate commit. Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Mark Tinguely <tinguely@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
-rw-r--r--fs/xfs/xfs_file.c83
-rw-r--r--fs/xfs/xfs_iget.c1
-rw-r--r--fs/xfs/xfs_inode.c25
-rw-r--r--fs/xfs/xfs_inode.h5
-rw-r--r--fs/xfs/xfs_inode_item.c36
-rw-r--r--fs/xfs/xfs_inode_item.h5
-rw-r--r--fs/xfs/xfs_iops.c58
-rw-r--r--fs/xfs/xfs_itable.c21
-rw-r--r--fs/xfs/xfs_super.c108
-rw-r--r--fs/xfs/xfs_sync.c36
-rw-r--r--fs/xfs/xfs_sync.h2
-rw-r--r--fs/xfs/xfs_trace.h2
-rw-r--r--fs/xfs/xfs_trans_inode.c4
13 files changed, 65 insertions, 321 deletions
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 7e5bc872f2b4..78d8b0299592 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -163,7 +163,6 @@ xfs_file_fsync(
163 struct inode *inode = file->f_mapping->host; 163 struct inode *inode = file->f_mapping->host;
164 struct xfs_inode *ip = XFS_I(inode); 164 struct xfs_inode *ip = XFS_I(inode);
165 struct xfs_mount *mp = ip->i_mount; 165 struct xfs_mount *mp = ip->i_mount;
166 struct xfs_trans *tp;
167 int error = 0; 166 int error = 0;
168 int log_flushed = 0; 167 int log_flushed = 0;
169 xfs_lsn_t lsn = 0; 168 xfs_lsn_t lsn = 0;
@@ -194,75 +193,15 @@ xfs_file_fsync(
194 } 193 }
195 194
196 /* 195 /*
197 * We always need to make sure that the required inode state is safe on 196 * All metadata updates are logged, which means that we just have
198 * disk. The inode might be clean but we still might need to force the 197 * to flush the log up to the latest LSN that touched the inode.
199 * log because of committed transactions that haven't hit the disk yet.
200 * Likewise, there could be unflushed non-transactional changes to the
201 * inode core that have to go to disk and this requires us to issue
202 * a synchronous transaction to capture these changes correctly.
203 *
204 * This code relies on the assumption that if the i_update_core field
205 * of the inode is clear and the inode is unpinned then it is clean
206 * and no action is required.
207 */ 198 */
208 xfs_ilock(ip, XFS_ILOCK_SHARED); 199 xfs_ilock(ip, XFS_ILOCK_SHARED);
209 200 if (xfs_ipincount(ip))
210 /*
211 * First check if the VFS inode is marked dirty. All the dirtying
212 * of non-transactional updates do not go through mark_inode_dirty*,
213 * which allows us to distinguish between pure timestamp updates
214 * and i_size updates which need to be caught for fdatasync.
215 * After that also check for the dirty state in the XFS inode, which
216 * might gets cleared when the inode gets written out via the AIL
217 * or xfs_iflush_cluster.
218 */
219 if (((inode->i_state & I_DIRTY_DATASYNC) ||
220 ((inode->i_state & I_DIRTY_SYNC) && !datasync)) &&
221 ip->i_update_core) {
222 /*
223 * Kick off a transaction to log the inode core to get the
224 * updates. The sync transaction will also force the log.
225 */
226 xfs_iunlock(ip, XFS_ILOCK_SHARED);
227 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
228 error = xfs_trans_reserve(tp, 0,
229 XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
230 if (error) {
231 xfs_trans_cancel(tp, 0);
232 return -error;
233 }
234 xfs_ilock(ip, XFS_ILOCK_EXCL);
235
236 /*
237 * Note - it's possible that we might have pushed ourselves out
238 * of the way during trans_reserve which would flush the inode.
239 * But there's no guarantee that the inode buffer has actually
240 * gone out yet (it's delwri). Plus the buffer could be pinned
241 * anyway if it's part of an inode in another recent
242 * transaction. So we play it safe and fire off the
243 * transaction anyway.
244 */
245 xfs_trans_ijoin(tp, ip, 0);
246 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
247 error = xfs_trans_commit(tp, 0);
248
249 lsn = ip->i_itemp->ili_last_lsn; 201 lsn = ip->i_itemp->ili_last_lsn;
250 xfs_iunlock(ip, XFS_ILOCK_EXCL); 202 xfs_iunlock(ip, XFS_ILOCK_SHARED);
251 } else {
252 /*
253 * Timestamps/size haven't changed since last inode flush or
254 * inode transaction commit. That means either nothing got
255 * written or a transaction committed which caught the updates.
256 * If the latter happened and the transaction hasn't hit the
257 * disk yet, the inode will be still be pinned. If it is,
258 * force the log.
259 */
260 if (xfs_ipincount(ip))
261 lsn = ip->i_itemp->ili_last_lsn;
262 xfs_iunlock(ip, XFS_ILOCK_SHARED);
263 }
264 203
265 if (!error && lsn) 204 if (lsn)
266 error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed); 205 error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);
267 206
268 /* 207 /*
@@ -659,9 +598,6 @@ restart:
659 return error; 598 return error;
660 } 599 }
661 600
662 if (likely(!(file->f_mode & FMODE_NOCMTIME)))
663 file_update_time(file);
664
665 /* 601 /*
666 * If the offset is beyond the size of the file, we need to zero any 602 * If the offset is beyond the size of the file, we need to zero any
667 * blocks that fall between the existing EOF and the start of this 603 * blocks that fall between the existing EOF and the start of this
@@ -685,6 +621,15 @@ restart:
685 return error; 621 return error;
686 622
687 /* 623 /*
624 * Updating the timestamps will grab the ilock again from
625 * xfs_fs_dirty_inode, so we have to call it after dropping the
626 * lock above. Eventually we should look into a way to avoid
627 * the pointless lock roundtrip.
628 */
629 if (likely(!(file->f_mode & FMODE_NOCMTIME)))
630 file_update_time(file);
631
632 /*
688 * If we're writing the file then make sure to clear the setuid and 633 * If we're writing the file then make sure to clear the setuid and
689 * setgid bits if the process is not being run by root. This keeps 634 * setgid bits if the process is not being run by root. This keeps
690 * people from modifying setuid and setgid binaries. 635 * people from modifying setuid and setgid binaries.
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 37f22dad5f59..af3f30a3d9c2 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -91,7 +91,6 @@ xfs_inode_alloc(
91 ip->i_afp = NULL; 91 ip->i_afp = NULL;
92 memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); 92 memset(&ip->i_df, 0, sizeof(xfs_ifork_t));
93 ip->i_flags = 0; 93 ip->i_flags = 0;
94 ip->i_update_core = 0;
95 ip->i_delayed_blks = 0; 94 ip->i_delayed_blks = 0;
96 memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); 95 memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
97 96
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index b21022499c2e..7ce9ccbf17c4 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1656,7 +1656,6 @@ retry:
1656 iip = ip->i_itemp; 1656 iip = ip->i_itemp;
1657 if (!iip || xfs_inode_clean(ip)) { 1657 if (!iip || xfs_inode_clean(ip)) {
1658 ASSERT(ip != free_ip); 1658 ASSERT(ip != free_ip);
1659 ip->i_update_core = 0;
1660 xfs_ifunlock(ip); 1659 xfs_ifunlock(ip);
1661 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1660 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1662 continue; 1661 continue;
@@ -2451,7 +2450,6 @@ xfs_iflush(
2451 * to disk, because the log record didn't make it to disk! 2450 * to disk, because the log record didn't make it to disk!
2452 */ 2451 */
2453 if (XFS_FORCED_SHUTDOWN(mp)) { 2452 if (XFS_FORCED_SHUTDOWN(mp)) {
2454 ip->i_update_core = 0;
2455 if (iip) 2453 if (iip)
2456 iip->ili_format.ilf_fields = 0; 2454 iip->ili_format.ilf_fields = 0;
2457 xfs_ifunlock(ip); 2455 xfs_ifunlock(ip);
@@ -2533,26 +2531,6 @@ xfs_iflush_int(
2533 /* set *dip = inode's place in the buffer */ 2531 /* set *dip = inode's place in the buffer */
2534 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); 2532 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
2535 2533
2536 /*
2537 * Clear i_update_core before copying out the data.
2538 * This is for coordination with our timestamp updates
2539 * that don't hold the inode lock. They will always
2540 * update the timestamps BEFORE setting i_update_core,
2541 * so if we clear i_update_core after they set it we
2542 * are guaranteed to see their updates to the timestamps.
2543 * I believe that this depends on strongly ordered memory
2544 * semantics, but we have that. We use the SYNCHRONIZE
2545 * macro to make sure that the compiler does not reorder
2546 * the i_update_core access below the data copy below.
2547 */
2548 ip->i_update_core = 0;
2549 SYNCHRONIZE();
2550
2551 /*
2552 * Make sure to get the latest timestamps from the Linux inode.
2553 */
2554 xfs_synchronize_times(ip);
2555
2556 if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC), 2534 if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
2557 mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { 2535 mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
2558 xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 2536 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
@@ -2711,8 +2689,7 @@ xfs_iflush_int(
2711 } else { 2689 } else {
2712 /* 2690 /*
2713 * We're flushing an inode which is not in the AIL and has 2691 * We're flushing an inode which is not in the AIL and has
2714 * not been logged but has i_update_core set. For this 2692 * not been logged. For this case we can immediately drop
2715 * case we can use a B_DELWRI flush and immediately drop
2716 * the inode flush lock because we can avoid the whole 2693 * the inode flush lock because we can avoid the whole
2717 * AIL state thing. It's OK to drop the flush lock now, 2694 * AIL state thing. It's OK to drop the flush lock now,
2718 * because we've already locked the buffer and to do anything 2695 * because we've already locked the buffer and to do anything
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 7f90469141d7..f123dbe6d42a 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -241,7 +241,6 @@ typedef struct xfs_inode {
241 spinlock_t i_flags_lock; /* inode i_flags lock */ 241 spinlock_t i_flags_lock; /* inode i_flags lock */
242 /* Miscellaneous state. */ 242 /* Miscellaneous state. */
243 unsigned long i_flags; /* see defined flags below */ 243 unsigned long i_flags; /* see defined flags below */
244 unsigned char i_update_core; /* timestamps/size is dirty */
245 unsigned int i_delayed_blks; /* count of delay alloc blks */ 244 unsigned int i_delayed_blks; /* count of delay alloc blks */
246 245
247 xfs_icdinode_t i_d; /* most of ondisk inode */ 246 xfs_icdinode_t i_d; /* most of ondisk inode */
@@ -534,10 +533,6 @@ void xfs_promote_inode(struct xfs_inode *);
534void xfs_lock_inodes(xfs_inode_t **, int, uint); 533void xfs_lock_inodes(xfs_inode_t **, int, uint);
535void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); 534void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
536 535
537void xfs_synchronize_times(xfs_inode_t *);
538void xfs_mark_inode_dirty(xfs_inode_t *);
539void xfs_mark_inode_dirty_sync(xfs_inode_t *);
540
541#define IHOLD(ip) \ 536#define IHOLD(ip) \
542do { \ 537do { \
543 ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ 538 ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index adc8a261b5d0..7a60da64f31d 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -254,42 +254,6 @@ xfs_inode_item_format(
254 vecp++; 254 vecp++;
255 nvecs = 1; 255 nvecs = 1;
256 256
257 /*
258 * Clear i_update_core if the timestamps (or any other
259 * non-transactional modification) need flushing/logging
260 * and we're about to log them with the rest of the core.
261 *
262 * This is the same logic as xfs_iflush() but this code can't
263 * run at the same time as xfs_iflush because we're in commit
264 * processing here and so we have the inode lock held in
265 * exclusive mode. Although it doesn't really matter
266 * for the timestamps if both routines were to grab the
267 * timestamps or not. That would be ok.
268 *
269 * We clear i_update_core before copying out the data.
270 * This is for coordination with our timestamp updates
271 * that don't hold the inode lock. They will always
272 * update the timestamps BEFORE setting i_update_core,
273 * so if we clear i_update_core after they set it we
274 * are guaranteed to see their updates to the timestamps
275 * either here. Likewise, if they set it after we clear it
276 * here, we'll see it either on the next commit of this
277 * inode or the next time the inode gets flushed via
278 * xfs_iflush(). This depends on strongly ordered memory
279 * semantics, but we have that. We use the SYNCHRONIZE
280 * macro to make sure that the compiler does not reorder
281 * the i_update_core access below the data copy below.
282 */
283 if (ip->i_update_core) {
284 ip->i_update_core = 0;
285 SYNCHRONIZE();
286 }
287
288 /*
289 * Make sure to get the latest timestamps from the Linux inode.
290 */
291 xfs_synchronize_times(ip);
292
293 vecp->i_addr = &ip->i_d; 257 vecp->i_addr = &ip->i_d;
294 vecp->i_len = sizeof(struct xfs_icdinode); 258 vecp->i_len = sizeof(struct xfs_icdinode);
295 vecp->i_type = XLOG_REG_TYPE_ICORE; 259 vecp->i_type = XLOG_REG_TYPE_ICORE;
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index d3dee61e6d91..25784b066568 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -148,9 +148,8 @@ typedef struct xfs_inode_log_item {
148 148
149static inline int xfs_inode_clean(xfs_inode_t *ip) 149static inline int xfs_inode_clean(xfs_inode_t *ip)
150{ 150{
151 return (!ip->i_itemp || 151 return !ip->i_itemp ||
152 !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) && 152 !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL);
153 !ip->i_update_core;
154} 153}
155 154
156extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *); 155extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index ab302539e5b9..7c01cda16727 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -50,59 +50,6 @@
50#include <linux/fiemap.h> 50#include <linux/fiemap.h>
51#include <linux/slab.h> 51#include <linux/slab.h>
52 52
53/*
54 * Bring the timestamps in the XFS inode uptodate.
55 *
56 * Used before writing the inode to disk.
57 */
58void
59xfs_synchronize_times(
60 xfs_inode_t *ip)
61{
62 struct inode *inode = VFS_I(ip);
63
64 ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
65 ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
66 ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec;
67 ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec;
68 ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec;
69 ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec;
70}
71
72/*
73 * If the linux inode is valid, mark it dirty, else mark the dirty state
74 * in the XFS inode to make sure we pick it up when reclaiming the inode.
75 */
76void
77xfs_mark_inode_dirty_sync(
78 xfs_inode_t *ip)
79{
80 struct inode *inode = VFS_I(ip);
81
82 if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
83 mark_inode_dirty_sync(inode);
84 else {
85 barrier();
86 ip->i_update_core = 1;
87 }
88}
89
90void
91xfs_mark_inode_dirty(
92 xfs_inode_t *ip)
93{
94 struct inode *inode = VFS_I(ip);
95
96 if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
97 mark_inode_dirty(inode);
98 else {
99 barrier();
100 ip->i_update_core = 1;
101 }
102
103}
104
105
106int xfs_initxattrs(struct inode *inode, const struct xattr *xattr_array, 53int xfs_initxattrs(struct inode *inode, const struct xattr *xattr_array,
107 void *fs_info) 54 void *fs_info)
108{ 55{
@@ -678,19 +625,16 @@ xfs_setattr_nonsize(
678 inode->i_atime = iattr->ia_atime; 625 inode->i_atime = iattr->ia_atime;
679 ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; 626 ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
680 ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; 627 ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
681 ip->i_update_core = 1;
682 } 628 }
683 if (mask & ATTR_CTIME) { 629 if (mask & ATTR_CTIME) {
684 inode->i_ctime = iattr->ia_ctime; 630 inode->i_ctime = iattr->ia_ctime;
685 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; 631 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
686 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; 632 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
687 ip->i_update_core = 1;
688 } 633 }
689 if (mask & ATTR_MTIME) { 634 if (mask & ATTR_MTIME) {
690 inode->i_mtime = iattr->ia_mtime; 635 inode->i_mtime = iattr->ia_mtime;
691 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; 636 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
692 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; 637 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
693 ip->i_update_core = 1;
694 } 638 }
695 639
696 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 640 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
@@ -918,13 +862,11 @@ xfs_setattr_size(
918 inode->i_ctime = iattr->ia_ctime; 862 inode->i_ctime = iattr->ia_ctime;
919 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; 863 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
920 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; 864 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
921 ip->i_update_core = 1;
922 } 865 }
923 if (mask & ATTR_MTIME) { 866 if (mask & ATTR_MTIME) {
924 inode->i_mtime = iattr->ia_mtime; 867 inode->i_mtime = iattr->ia_mtime;
925 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; 868 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
926 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; 869 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
927 ip->i_update_core = 1;
928 } 870 }
929 871
930 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 872 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 751e94fe1f77..9720c54bbed0 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -62,7 +62,6 @@ xfs_bulkstat_one_int(
62{ 62{
63 struct xfs_icdinode *dic; /* dinode core info pointer */ 63 struct xfs_icdinode *dic; /* dinode core info pointer */
64 struct xfs_inode *ip; /* incore inode pointer */ 64 struct xfs_inode *ip; /* incore inode pointer */
65 struct inode *inode;
66 struct xfs_bstat *buf; /* return buffer */ 65 struct xfs_bstat *buf; /* return buffer */
67 int error = 0; /* error value */ 66 int error = 0; /* error value */
68 67
@@ -86,7 +85,6 @@ xfs_bulkstat_one_int(
86 ASSERT(ip->i_imap.im_blkno != 0); 85 ASSERT(ip->i_imap.im_blkno != 0);
87 86
88 dic = &ip->i_d; 87 dic = &ip->i_d;
89 inode = VFS_I(ip);
90 88
91 /* xfs_iget returns the following without needing 89 /* xfs_iget returns the following without needing
92 * further change. 90 * further change.
@@ -99,19 +97,12 @@ xfs_bulkstat_one_int(
99 buf->bs_uid = dic->di_uid; 97 buf->bs_uid = dic->di_uid;
100 buf->bs_gid = dic->di_gid; 98 buf->bs_gid = dic->di_gid;
101 buf->bs_size = dic->di_size; 99 buf->bs_size = dic->di_size;
102 100 buf->bs_atime.tv_sec = dic->di_atime.t_sec;
103 /* 101 buf->bs_atime.tv_nsec = dic->di_atime.t_nsec;
104 * We need to read the timestamps from the Linux inode because 102 buf->bs_mtime.tv_sec = dic->di_mtime.t_sec;
105 * the VFS keeps writing directly into the inode structure instead 103 buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec;
106 * of telling us about the updates. 104 buf->bs_ctime.tv_sec = dic->di_ctime.t_sec;
107 */ 105 buf->bs_ctime.tv_nsec = dic->di_ctime.t_nsec;
108 buf->bs_atime.tv_sec = inode->i_atime.tv_sec;
109 buf->bs_atime.tv_nsec = inode->i_atime.tv_nsec;
110 buf->bs_mtime.tv_sec = inode->i_mtime.tv_sec;
111 buf->bs_mtime.tv_nsec = inode->i_mtime.tv_nsec;
112 buf->bs_ctime.tv_sec = inode->i_ctime.tv_sec;
113 buf->bs_ctime.tv_nsec = inode->i_ctime.tv_nsec;
114
115 buf->bs_xflags = xfs_ip2xflags(ip); 106 buf->bs_xflags = xfs_ip2xflags(ip);
116 buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog; 107 buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog;
117 buf->bs_extents = dic->di_nextents; 108 buf->bs_extents = dic->di_nextents;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index c7f7bc2855a4..e602c8c67c5c 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -863,91 +863,58 @@ xfs_fs_inode_init_once(
863} 863}
864 864
865/* 865/*
866 * Dirty the XFS inode when mark_inode_dirty_sync() is called so that 866 * This is called by the VFS when dirtying inode metadata. This can happen
867 * we catch unlogged VFS level updates to the inode. 867 * for a few reasons, but we only care about timestamp updates, given that
868 * we handled the rest ourselves. In theory no other calls should happen,
869 * but for example generic_write_end() keeps dirtying the inode after
870 * updating i_size. Thus we check that the flags are exactly I_DIRTY_SYNC,
871 * and skip this call otherwise.
868 * 872 *
869 * We need the barrier() to maintain correct ordering between unlogged 873 * We'll hopefull get a different method just for updating timestamps soon,
870 * updates and the transaction commit code that clears the i_update_core 874 * at which point this hack can go away, and maybe we'll also get real
871 * field. This requires all updates to be completed before marking the 875 * error handling here.
872 * inode dirty.
873 */ 876 */
874STATIC void 877STATIC void
875xfs_fs_dirty_inode( 878xfs_fs_dirty_inode(
876 struct inode *inode,
877 int flags)
878{
879 barrier();
880 XFS_I(inode)->i_update_core = 1;
881}
882
883STATIC int
884xfs_fs_write_inode(
885 struct inode *inode, 879 struct inode *inode,
886 struct writeback_control *wbc) 880 int flags)
887{ 881{
888 struct xfs_inode *ip = XFS_I(inode); 882 struct xfs_inode *ip = XFS_I(inode);
889 struct xfs_mount *mp = ip->i_mount; 883 struct xfs_mount *mp = ip->i_mount;
890 int error = EAGAIN; 884 struct xfs_trans *tp;
891 885 int error;
892 trace_xfs_write_inode(ip);
893
894 if (XFS_FORCED_SHUTDOWN(mp))
895 return -XFS_ERROR(EIO);
896
897 if (wbc->sync_mode == WB_SYNC_ALL || wbc->for_kupdate) {
898 /*
899 * Make sure the inode has made it it into the log. Instead
900 * of forcing it all the way to stable storage using a
901 * synchronous transaction we let the log force inside the
902 * ->sync_fs call do that for thus, which reduces the number
903 * of synchronous log forces dramatically.
904 */
905 error = xfs_log_dirty_inode(ip, NULL, 0);
906 if (error)
907 goto out;
908 return 0;
909 } else {
910 if (!ip->i_update_core)
911 return 0;
912 886
913 /* 887 if (flags != I_DIRTY_SYNC)
914 * We make this non-blocking if the inode is contended, return 888 return;
915 * EAGAIN to indicate to the caller that they did not succeed.
916 * This prevents the flush path from blocking on inodes inside
917 * another operation right now, they get caught later by
918 * xfs_sync.
919 */
920 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
921 goto out;
922 889
923 if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) 890 trace_xfs_dirty_inode(ip);
924 goto out_unlock;
925 891
926 /* 892 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
927 * Now we have the flush lock and the inode is not pinned, we 893 error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
928 * can check if the inode is really clean as we know that 894 if (error) {
929 * there are no pending transaction completions, it is not 895 xfs_trans_cancel(tp, 0);
930 * waiting on the delayed write queue and there is no IO in 896 goto trouble;
931 * progress.
932 */
933 if (xfs_inode_clean(ip)) {
934 xfs_ifunlock(ip);
935 error = 0;
936 goto out_unlock;
937 }
938 error = xfs_iflush(ip, SYNC_TRYLOCK);
939 } 897 }
940 898 xfs_ilock(ip, XFS_ILOCK_EXCL);
941 out_unlock:
942 xfs_iunlock(ip, XFS_ILOCK_SHARED);
943 out:
944 /* 899 /*
945 * if we failed to write out the inode then mark 900 * Grab all the latest timestamps from the Linux inode.
946 * it dirty again so we'll try again later.
947 */ 901 */
902 ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
903 ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
904 ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec;
905 ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec;
906 ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec;
907 ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec;
908
909 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
910 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
911 error = xfs_trans_commit(tp, 0);
948 if (error) 912 if (error)
949 xfs_mark_inode_dirty_sync(ip); 913 goto trouble;
950 return -error; 914 return;
915
916trouble:
917 xfs_warn(mp, "failed to update timestamps for inode 0x%llx", ip->i_ino);
951} 918}
952 919
953STATIC void 920STATIC void
@@ -1466,7 +1433,6 @@ static const struct super_operations xfs_super_operations = {
1466 .alloc_inode = xfs_fs_alloc_inode, 1433 .alloc_inode = xfs_fs_alloc_inode,
1467 .destroy_inode = xfs_fs_destroy_inode, 1434 .destroy_inode = xfs_fs_destroy_inode,
1468 .dirty_inode = xfs_fs_dirty_inode, 1435 .dirty_inode = xfs_fs_dirty_inode,
1469 .write_inode = xfs_fs_write_inode,
1470 .evict_inode = xfs_fs_evict_inode, 1436 .evict_inode = xfs_fs_evict_inode,
1471 .put_super = xfs_fs_put_super, 1437 .put_super = xfs_fs_put_super,
1472 .sync_fs = xfs_fs_sync_fs, 1438 .sync_fs = xfs_fs_sync_fs,
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index 71bf846b7280..205ebcb34d9e 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -336,32 +336,6 @@ xfs_sync_fsdata(
336 return error; 336 return error;
337} 337}
338 338
339int
340xfs_log_dirty_inode(
341 struct xfs_inode *ip,
342 struct xfs_perag *pag,
343 int flags)
344{
345 struct xfs_mount *mp = ip->i_mount;
346 struct xfs_trans *tp;
347 int error;
348
349 if (!ip->i_update_core)
350 return 0;
351
352 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
353 error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
354 if (error) {
355 xfs_trans_cancel(tp, 0);
356 return error;
357 }
358
359 xfs_ilock(ip, XFS_ILOCK_EXCL);
360 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
361 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
362 return xfs_trans_commit(tp, 0);
363}
364
365/* 339/*
366 * When remounting a filesystem read-only or freezing the filesystem, we have 340 * When remounting a filesystem read-only or freezing the filesystem, we have
367 * two phases to execute. This first phase is syncing the data before we 341 * two phases to execute. This first phase is syncing the data before we
@@ -385,16 +359,6 @@ xfs_quiesce_data(
385{ 359{
386 int error, error2 = 0; 360 int error, error2 = 0;
387 361
388 /*
389 * Log all pending size and timestamp updates. The vfs writeback
390 * code is supposed to do this, but due to its overagressive
391 * livelock detection it will skip inodes where appending writes
392 * were written out in the first non-blocking sync phase if their
393 * completion took long enough that it happened after taking the
394 * timestamp for the cut-off in the blocking phase.
395 */
396 xfs_inode_ag_iterator(mp, xfs_log_dirty_inode, 0);
397
398 /* force out the log */ 362 /* force out the log */
399 xfs_log_force(mp, XFS_LOG_SYNC); 363 xfs_log_force(mp, XFS_LOG_SYNC);
400 364
diff --git a/fs/xfs/xfs_sync.h b/fs/xfs/xfs_sync.h
index fa965479d788..941202e7ac6e 100644
--- a/fs/xfs/xfs_sync.h
+++ b/fs/xfs/xfs_sync.h
@@ -34,8 +34,6 @@ void xfs_quiesce_attr(struct xfs_mount *mp);
34 34
35void xfs_flush_inodes(struct xfs_inode *ip); 35void xfs_flush_inodes(struct xfs_inode *ip);
36 36
37int xfs_log_dirty_inode(struct xfs_inode *ip, struct xfs_perag *pag, int flags);
38
39int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); 37int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
40int xfs_reclaim_inodes_count(struct xfs_mount *mp); 38int xfs_reclaim_inodes_count(struct xfs_mount *mp);
41void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan); 39void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan);
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 3b369c1277f0..ceaf6fe67e41 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -580,7 +580,7 @@ DEFINE_INODE_EVENT(xfs_ioctl_setattr);
580DEFINE_INODE_EVENT(xfs_dir_fsync); 580DEFINE_INODE_EVENT(xfs_dir_fsync);
581DEFINE_INODE_EVENT(xfs_file_fsync); 581DEFINE_INODE_EVENT(xfs_file_fsync);
582DEFINE_INODE_EVENT(xfs_destroy_inode); 582DEFINE_INODE_EVENT(xfs_destroy_inode);
583DEFINE_INODE_EVENT(xfs_write_inode); 583DEFINE_INODE_EVENT(xfs_dirty_inode);
584DEFINE_INODE_EVENT(xfs_evict_inode); 584DEFINE_INODE_EVENT(xfs_evict_inode);
585 585
586DEFINE_INODE_EVENT(xfs_dquot_dqalloc); 586DEFINE_INODE_EVENT(xfs_dquot_dqalloc);
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 32f0288ae10f..892763effdf1 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -95,10 +95,14 @@ xfs_trans_ichgtime(
95 if ((flags & XFS_ICHGTIME_MOD) && 95 if ((flags & XFS_ICHGTIME_MOD) &&
96 !timespec_equal(&inode->i_mtime, &tv)) { 96 !timespec_equal(&inode->i_mtime, &tv)) {
97 inode->i_mtime = tv; 97 inode->i_mtime = tv;
98 ip->i_d.di_mtime.t_sec = tv.tv_sec;
99 ip->i_d.di_mtime.t_nsec = tv.tv_nsec;
98 } 100 }
99 if ((flags & XFS_ICHGTIME_CHG) && 101 if ((flags & XFS_ICHGTIME_CHG) &&
100 !timespec_equal(&inode->i_ctime, &tv)) { 102 !timespec_equal(&inode->i_ctime, &tv)) {
101 inode->i_ctime = tv; 103 inode->i_ctime = tv;
104 ip->i_d.di_ctime.t_sec = tv.tv_sec;
105 ip->i_d.di_ctime.t_nsec = tv.tv_nsec;
102 } 106 }
103} 107}
104 108