aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_inode.c
diff options
context:
space:
mode:
authorJiri Kosina <jkosina@suse.cz>2013-01-29 04:48:30 -0500
committerJiri Kosina <jkosina@suse.cz>2013-01-29 04:48:30 -0500
commit617677295b53a40d0e54aac4cbbc216ffbc755dd (patch)
tree51b9e87213243ed5efff252c8e8d8fec4eebc588 /fs/xfs/xfs_inode.c
parent5c8d1b68e01a144813e38795fe6dbe7ebb506131 (diff)
parent6abb7c25775b7fb2225ad0508236d63ca710e65f (diff)
Merge branch 'master' into for-next
Conflicts: drivers/devfreq/exynos4_bus.c Sync with Linus' tree to be able to apply patches that are against newer code (mvneta).
Diffstat (limited to 'fs/xfs/xfs_inode.c')
-rw-r--r--fs/xfs/xfs_inode.c440
1 files changed, 381 insertions, 59 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 2778258fcfa2..66282dcb821b 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -45,6 +45,7 @@
45#include "xfs_filestream.h" 45#include "xfs_filestream.h"
46#include "xfs_vnodeops.h" 46#include "xfs_vnodeops.h"
47#include "xfs_trace.h" 47#include "xfs_trace.h"
48#include "xfs_icache.h"
48 49
49kmem_zone_t *xfs_ifork_zone; 50kmem_zone_t *xfs_ifork_zone;
50kmem_zone_t *xfs_inode_zone; 51kmem_zone_t *xfs_inode_zone;
@@ -74,6 +75,256 @@ xfs_get_extsz_hint(
74 return 0; 75 return 0;
75} 76}
76 77
78/*
79 * This is a wrapper routine around the xfs_ilock() routine used to centralize
80 * some grungy code. It is used in places that wish to lock the inode solely
81 * for reading the extents. The reason these places can't just call
82 * xfs_ilock(SHARED) is that the inode lock also guards to bringing in of the
83 * extents from disk for a file in b-tree format. If the inode is in b-tree
84 * format, then we need to lock the inode exclusively until the extents are read
85 * in. Locking it exclusively all the time would limit our parallelism
86 * unnecessarily, though. What we do instead is check to see if the extents
87 * have been read in yet, and only lock the inode exclusively if they have not.
88 *
89 * The function returns a value which should be given to the corresponding
90 * xfs_iunlock_map_shared(). This value is the mode in which the lock was
91 * actually taken.
92 */
93uint
94xfs_ilock_map_shared(
95 xfs_inode_t *ip)
96{
97 uint lock_mode;
98
99 if ((ip->i_d.di_format == XFS_DINODE_FMT_BTREE) &&
100 ((ip->i_df.if_flags & XFS_IFEXTENTS) == 0)) {
101 lock_mode = XFS_ILOCK_EXCL;
102 } else {
103 lock_mode = XFS_ILOCK_SHARED;
104 }
105
106 xfs_ilock(ip, lock_mode);
107
108 return lock_mode;
109}
110
111/*
112 * This is simply the unlock routine to go with xfs_ilock_map_shared().
113 * All it does is call xfs_iunlock() with the given lock_mode.
114 */
115void
116xfs_iunlock_map_shared(
117 xfs_inode_t *ip,
118 unsigned int lock_mode)
119{
120 xfs_iunlock(ip, lock_mode);
121}
122
123/*
124 * The xfs inode contains 2 locks: a multi-reader lock called the
125 * i_iolock and a multi-reader lock called the i_lock. This routine
126 * allows either or both of the locks to be obtained.
127 *
128 * The 2 locks should always be ordered so that the IO lock is
129 * obtained first in order to prevent deadlock.
130 *
131 * ip -- the inode being locked
132 * lock_flags -- this parameter indicates the inode's locks
133 * to be locked. It can be:
134 * XFS_IOLOCK_SHARED,
135 * XFS_IOLOCK_EXCL,
136 * XFS_ILOCK_SHARED,
137 * XFS_ILOCK_EXCL,
138 * XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED,
139 * XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL,
140 * XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED,
141 * XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL
142 */
143void
144xfs_ilock(
145 xfs_inode_t *ip,
146 uint lock_flags)
147{
148 trace_xfs_ilock(ip, lock_flags, _RET_IP_);
149
150 /*
151 * You can't set both SHARED and EXCL for the same lock,
152 * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
153 * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
154 */
155 ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
156 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
157 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
158 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
159 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
160
161 if (lock_flags & XFS_IOLOCK_EXCL)
162 mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
163 else if (lock_flags & XFS_IOLOCK_SHARED)
164 mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
165
166 if (lock_flags & XFS_ILOCK_EXCL)
167 mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
168 else if (lock_flags & XFS_ILOCK_SHARED)
169 mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
170}
171
172/*
173 * This is just like xfs_ilock(), except that the caller
174 * is guaranteed not to sleep. It returns 1 if it gets
175 * the requested locks and 0 otherwise. If the IO lock is
176 * obtained but the inode lock cannot be, then the IO lock
177 * is dropped before returning.
178 *
179 * ip -- the inode being locked
180 * lock_flags -- this parameter indicates the inode's locks to be
181 * to be locked. See the comment for xfs_ilock() for a list
182 * of valid values.
183 */
184int
185xfs_ilock_nowait(
186 xfs_inode_t *ip,
187 uint lock_flags)
188{
189 trace_xfs_ilock_nowait(ip, lock_flags, _RET_IP_);
190
191 /*
192 * You can't set both SHARED and EXCL for the same lock,
193 * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
194 * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
195 */
196 ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
197 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
198 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
199 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
200 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
201
202 if (lock_flags & XFS_IOLOCK_EXCL) {
203 if (!mrtryupdate(&ip->i_iolock))
204 goto out;
205 } else if (lock_flags & XFS_IOLOCK_SHARED) {
206 if (!mrtryaccess(&ip->i_iolock))
207 goto out;
208 }
209 if (lock_flags & XFS_ILOCK_EXCL) {
210 if (!mrtryupdate(&ip->i_lock))
211 goto out_undo_iolock;
212 } else if (lock_flags & XFS_ILOCK_SHARED) {
213 if (!mrtryaccess(&ip->i_lock))
214 goto out_undo_iolock;
215 }
216 return 1;
217
218 out_undo_iolock:
219 if (lock_flags & XFS_IOLOCK_EXCL)
220 mrunlock_excl(&ip->i_iolock);
221 else if (lock_flags & XFS_IOLOCK_SHARED)
222 mrunlock_shared(&ip->i_iolock);
223 out:
224 return 0;
225}
226
227/*
228 * xfs_iunlock() is used to drop the inode locks acquired with
229 * xfs_ilock() and xfs_ilock_nowait(). The caller must pass
230 * in the flags given to xfs_ilock() or xfs_ilock_nowait() so
231 * that we know which locks to drop.
232 *
233 * ip -- the inode being unlocked
234 * lock_flags -- this parameter indicates the inode's locks to be
235 * to be unlocked. See the comment for xfs_ilock() for a list
236 * of valid values for this parameter.
237 *
238 */
239void
240xfs_iunlock(
241 xfs_inode_t *ip,
242 uint lock_flags)
243{
244 /*
245 * You can't set both SHARED and EXCL for the same lock,
246 * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
247 * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
248 */
249 ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
250 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
251 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
252 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
253 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
254 ASSERT(lock_flags != 0);
255
256 if (lock_flags & XFS_IOLOCK_EXCL)
257 mrunlock_excl(&ip->i_iolock);
258 else if (lock_flags & XFS_IOLOCK_SHARED)
259 mrunlock_shared(&ip->i_iolock);
260
261 if (lock_flags & XFS_ILOCK_EXCL)
262 mrunlock_excl(&ip->i_lock);
263 else if (lock_flags & XFS_ILOCK_SHARED)
264 mrunlock_shared(&ip->i_lock);
265
266 trace_xfs_iunlock(ip, lock_flags, _RET_IP_);
267}
268
269/*
270 * give up write locks. the i/o lock cannot be held nested
271 * if it is being demoted.
272 */
273void
274xfs_ilock_demote(
275 xfs_inode_t *ip,
276 uint lock_flags)
277{
278 ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL));
279 ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
280
281 if (lock_flags & XFS_ILOCK_EXCL)
282 mrdemote(&ip->i_lock);
283 if (lock_flags & XFS_IOLOCK_EXCL)
284 mrdemote(&ip->i_iolock);
285
286 trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_);
287}
288
289#ifdef DEBUG
290int
291xfs_isilocked(
292 xfs_inode_t *ip,
293 uint lock_flags)
294{
295 if (lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) {
296 if (!(lock_flags & XFS_ILOCK_SHARED))
297 return !!ip->i_lock.mr_writer;
298 return rwsem_is_locked(&ip->i_lock.mr_lock);
299 }
300
301 if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
302 if (!(lock_flags & XFS_IOLOCK_SHARED))
303 return !!ip->i_iolock.mr_writer;
304 return rwsem_is_locked(&ip->i_iolock.mr_lock);
305 }
306
307 ASSERT(0);
308 return 0;
309}
310#endif
311
312void
313__xfs_iflock(
314 struct xfs_inode *ip)
315{
316 wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT);
317 DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT);
318
319 do {
320 prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
321 if (xfs_isiflocked(ip))
322 io_schedule();
323 } while (!xfs_iflock_nowait(ip));
324
325 finish_wait(wq, &wait.wait);
326}
327
77#ifdef DEBUG 328#ifdef DEBUG
78/* 329/*
79 * Make sure that the extents in the given memory buffer 330 * Make sure that the extents in the given memory buffer
@@ -131,6 +382,65 @@ xfs_inobp_check(
131} 382}
132#endif 383#endif
133 384
385static void
386xfs_inode_buf_verify(
387 struct xfs_buf *bp)
388{
389 struct xfs_mount *mp = bp->b_target->bt_mount;
390 int i;
391 int ni;
392
393 /*
394 * Validate the magic number and version of every inode in the buffer
395 */
396 ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
397 for (i = 0; i < ni; i++) {
398 int di_ok;
399 xfs_dinode_t *dip;
400
401 dip = (struct xfs_dinode *)xfs_buf_offset(bp,
402 (i << mp->m_sb.sb_inodelog));
403 di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
404 XFS_DINODE_GOOD_VERSION(dip->di_version);
405 if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
406 XFS_ERRTAG_ITOBP_INOTOBP,
407 XFS_RANDOM_ITOBP_INOTOBP))) {
408 xfs_buf_ioerror(bp, EFSCORRUPTED);
409 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH,
410 mp, dip);
411#ifdef DEBUG
412 xfs_emerg(mp,
413 "bad inode magic/vsn daddr %lld #%d (magic=%x)",
414 (unsigned long long)bp->b_bn, i,
415 be16_to_cpu(dip->di_magic));
416 ASSERT(0);
417#endif
418 }
419 }
420 xfs_inobp_check(mp, bp);
421}
422
423
424static void
425xfs_inode_buf_read_verify(
426 struct xfs_buf *bp)
427{
428 xfs_inode_buf_verify(bp);
429}
430
431static void
432xfs_inode_buf_write_verify(
433 struct xfs_buf *bp)
434{
435 xfs_inode_buf_verify(bp);
436}
437
438const struct xfs_buf_ops xfs_inode_buf_ops = {
439 .verify_read = xfs_inode_buf_read_verify,
440 .verify_write = xfs_inode_buf_write_verify,
441};
442
443
134/* 444/*
135 * This routine is called to map an inode to the buffer containing the on-disk 445 * This routine is called to map an inode to the buffer containing the on-disk
136 * version of the inode. It returns a pointer to the buffer containing the 446 * version of the inode. It returns a pointer to the buffer containing the
@@ -145,71 +455,33 @@ xfs_imap_to_bp(
145 struct xfs_mount *mp, 455 struct xfs_mount *mp,
146 struct xfs_trans *tp, 456 struct xfs_trans *tp,
147 struct xfs_imap *imap, 457 struct xfs_imap *imap,
148 struct xfs_dinode **dipp, 458 struct xfs_dinode **dipp,
149 struct xfs_buf **bpp, 459 struct xfs_buf **bpp,
150 uint buf_flags, 460 uint buf_flags,
151 uint iget_flags) 461 uint iget_flags)
152{ 462{
153 struct xfs_buf *bp; 463 struct xfs_buf *bp;
154 int error; 464 int error;
155 int i;
156 int ni;
157 465
158 buf_flags |= XBF_UNMAPPED; 466 buf_flags |= XBF_UNMAPPED;
159 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, 467 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
160 (int)imap->im_len, buf_flags, &bp); 468 (int)imap->im_len, buf_flags, &bp,
469 &xfs_inode_buf_ops);
161 if (error) { 470 if (error) {
162 if (error != EAGAIN) { 471 if (error == EAGAIN) {
163 xfs_warn(mp,
164 "%s: xfs_trans_read_buf() returned error %d.",
165 __func__, error);
166 } else {
167 ASSERT(buf_flags & XBF_TRYLOCK); 472 ASSERT(buf_flags & XBF_TRYLOCK);
473 return error;
168 } 474 }
169 return error;
170 }
171
172 /*
173 * Validate the magic number and version of every inode in the buffer
174 * (if DEBUG kernel) or the first inode in the buffer, otherwise.
175 */
176#ifdef DEBUG
177 ni = BBTOB(imap->im_len) >> mp->m_sb.sb_inodelog;
178#else /* usual case */
179 ni = 1;
180#endif
181 475
182 for (i = 0; i < ni; i++) { 476 if (error == EFSCORRUPTED &&
183 int di_ok; 477 (iget_flags & XFS_IGET_UNTRUSTED))
184 xfs_dinode_t *dip; 478 return XFS_ERROR(EINVAL);
185 479
186 dip = (xfs_dinode_t *)xfs_buf_offset(bp, 480 xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.",
187 (i << mp->m_sb.sb_inodelog)); 481 __func__, error);
188 di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) && 482 return error;
189 XFS_DINODE_GOOD_VERSION(dip->di_version);
190 if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
191 XFS_ERRTAG_ITOBP_INOTOBP,
192 XFS_RANDOM_ITOBP_INOTOBP))) {
193 if (iget_flags & XFS_IGET_UNTRUSTED) {
194 xfs_trans_brelse(tp, bp);
195 return XFS_ERROR(EINVAL);
196 }
197 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH,
198 mp, dip);
199#ifdef DEBUG
200 xfs_emerg(mp,
201 "bad inode magic/vsn daddr %lld #%d (magic=%x)",
202 (unsigned long long)imap->im_blkno, i,
203 be16_to_cpu(dip->di_magic));
204 ASSERT(0);
205#endif
206 xfs_trans_brelse(tp, bp);
207 return XFS_ERROR(EFSCORRUPTED);
208 }
209 } 483 }
210 484
211 xfs_inobp_check(mp, bp);
212
213 *bpp = bp; 485 *bpp = bp;
214 *dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset); 486 *dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset);
215 return 0; 487 return 0;
@@ -853,16 +1125,16 @@ xfs_iread_extents(
853 * set according to the contents of the given cred structure. 1125 * set according to the contents of the given cred structure.
854 * 1126 *
855 * Use xfs_dialloc() to allocate the on-disk inode. If xfs_dialloc() 1127 * Use xfs_dialloc() to allocate the on-disk inode. If xfs_dialloc()
856 * has a free inode available, call xfs_iget() 1128 * has a free inode available, call xfs_iget() to obtain the in-core
857 * to obtain the in-core version of the allocated inode. Finally, 1129 * version of the allocated inode. Finally, fill in the inode and
858 * fill in the inode and log its initial contents. In this case, 1130 * log its initial contents. In this case, ialloc_context would be
859 * ialloc_context would be set to NULL and call_again set to false. 1131 * set to NULL.
860 * 1132 *
861 * If xfs_dialloc() does not have an available inode, 1133 * If xfs_dialloc() does not have an available inode, it will replenish
862 * it will replenish its supply by doing an allocation. Since we can 1134 * its supply by doing an allocation. Since we can only do one
863 * only do one allocation within a transaction without deadlocks, we 1135 * allocation within a transaction without deadlocks, we must commit
864 * must commit the current transaction before returning the inode itself. 1136 * the current transaction before returning the inode itself.
865 * In this case, therefore, we will set call_again to true and return. 1137 * In this case, therefore, we will set ialloc_context and return.
866 * The caller should then commit the current transaction, start a new 1138 * The caller should then commit the current transaction, start a new
867 * transaction, and call xfs_ialloc() again to actually get the inode. 1139 * transaction, and call xfs_ialloc() again to actually get the inode.
868 * 1140 *
@@ -1509,10 +1781,23 @@ xfs_ifree_cluster(
1509 * to mark all the active inodes on the buffer stale. 1781 * to mark all the active inodes on the buffer stale.
1510 */ 1782 */
1511 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, 1783 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
1512 mp->m_bsize * blks_per_cluster, 0); 1784 mp->m_bsize * blks_per_cluster,
1785 XBF_UNMAPPED);
1513 1786
1514 if (!bp) 1787 if (!bp)
1515 return ENOMEM; 1788 return ENOMEM;
1789
1790 /*
1791 * This buffer may not have been correctly initialised as we
1792 * didn't read it from disk. That's not important because we are
1793 * only using to mark the buffer as stale in the log, and to
1794 * attach stale cached inodes on it. That means it will never be
1795 * dispatched for IO. If it is, we want to know about it, and we
1796 * want it to fail. We can acheive this by adding a write
1797 * verifier to the buffer.
1798 */
1799 bp->b_ops = &xfs_inode_buf_ops;
1800
1516 /* 1801 /*
1517 * Walk the inodes already attached to the buffer and mark them 1802 * Walk the inodes already attached to the buffer and mark them
1518 * stale. These will all have the flush locks held, so an 1803 * stale. These will all have the flush locks held, so an
@@ -3660,3 +3945,40 @@ xfs_iext_irec_update_extoffs(
3660 ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff; 3945 ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff;
3661 } 3946 }
3662} 3947}
3948
3949/*
3950 * Test whether it is appropriate to check an inode for and free post EOF
3951 * blocks. The 'force' parameter determines whether we should also consider
3952 * regular files that are marked preallocated or append-only.
3953 */
3954bool
3955xfs_can_free_eofblocks(struct xfs_inode *ip, bool force)
3956{
3957 /* prealloc/delalloc exists only on regular files */
3958 if (!S_ISREG(ip->i_d.di_mode))
3959 return false;
3960
3961 /*
3962 * Zero sized files with no cached pages and delalloc blocks will not
3963 * have speculative prealloc/delalloc blocks to remove.
3964 */
3965 if (VFS_I(ip)->i_size == 0 &&
3966 VN_CACHED(VFS_I(ip)) == 0 &&
3967 ip->i_delayed_blks == 0)
3968 return false;
3969
3970 /* If we haven't read in the extent list, then don't do it now. */
3971 if (!(ip->i_df.if_flags & XFS_IFEXTENTS))
3972 return false;
3973
3974 /*
3975 * Do not free real preallocated or append-only files unless the file
3976 * has delalloc blocks and we are forced to remove them.
3977 */
3978 if (ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))
3979 if (!force || ip->i_delayed_blks == 0)
3980 return false;
3981
3982 return true;
3983}
3984