aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_inode.c
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2015-02-23 05:43:37 -0500
committerDave Chinner <david@fromorbit.com>2015-02-23 05:43:37 -0500
commit653c60b633a9019a54a80d64b5ed33ecb214823c (patch)
tree833b977a047a36b07e49524ba3afb295cf140287 /fs/xfs/xfs_inode.c
parentc517d838eb7d07bbe9507871fab3931deccff539 (diff)
xfs: introduce mmap/truncate lock
Right now we cannot serialise mmap against truncate or hole punch sanely. ->page_mkwrite is not able to take locks that the read IO path normally takes (i.e. the inode iolock) because that could result in lock inversions (read - iolock - page fault - page_mkwrite - iolock) and so we cannot use an IO path lock to serialise page write faults against truncate operations. Instead, introduce a new lock that is used *only* in the ->page_mkwrite path that is the equivalent of the iolock. The lock ordering in a page fault is i_mmaplock -> page lock -> i_ilock, and so in truncate we can i_iolock -> i_mmaplock and so lock out new write faults during the process of truncation. Because i_mmap_lock is outside the page lock, we can hold it across all the same operations we hold the i_iolock for. The only difference is that we never hold the i_mmaplock in the normal IO path and so do not ever have the possibility that we can page fault inside it. Hence there are no recursion issues on the i_mmap_lock and so we can use it to serialise page fault IO against inode modification operations that affect the IO path. This patch introduces the i_mmaplock infrastructure, lockdep annotations and initialisation/destruction code. Use of the new lock will be in subsequent patches. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Brian Foster <bfoster@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
Diffstat (limited to 'fs/xfs/xfs_inode.c')
-rw-r--r--fs/xfs/xfs_inode.c128
1 files changed, 97 insertions, 31 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index daafa1f6d260..ac24818f7b2d 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -117,24 +117,34 @@ xfs_ilock_attr_map_shared(
117} 117}
118 118
119/* 119/*
120 * The xfs inode contains 2 locks: a multi-reader lock called the 120 * The xfs inode contains 3 multi-reader locks: the i_iolock the i_mmap_lock and
121 * i_iolock and a multi-reader lock called the i_lock. This routine 121 * the i_lock. This routine allows various combinations of the locks to be
122 * allows either or both of the locks to be obtained. 122 * obtained.
123 * 123 *
124 * The 2 locks should always be ordered so that the IO lock is 124 * The 3 locks should always be ordered so that the IO lock is obtained first,
125 * obtained first in order to prevent deadlock. 125 * the mmap lock second and the ilock last in order to prevent deadlock.
126 * 126 *
127 * ip -- the inode being locked 127 * Basic locking order:
128 * lock_flags -- this parameter indicates the inode's locks 128 *
129 * to be locked. It can be: 129 * i_iolock -> i_mmap_lock -> page_lock -> i_ilock
130 * XFS_IOLOCK_SHARED, 130 *
131 * XFS_IOLOCK_EXCL, 131 * mmap_sem locking order:
132 * XFS_ILOCK_SHARED, 132 *
133 * XFS_ILOCK_EXCL, 133 * i_iolock -> page lock -> mmap_sem
134 * XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED, 134 * mmap_sem -> i_mmap_lock -> page_lock
135 * XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL, 135 *
136 * XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED, 136 * The difference in mmap_sem locking order mean that we cannot hold the
137 * XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL 137 * i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can
138 * fault in pages during copy in/out (for buffered IO) or require the mmap_sem
139 * in get_user_pages() to map the user pages into the kernel address space for
140 * direct IO. Similarly the i_iolock cannot be taken inside a page fault because
141 * page faults already hold the mmap_sem.
142 *
143 * Hence to serialise fully against both syscall and mmap based IO, we need to
144 * take both the i_iolock and the i_mmap_lock. These locks should *only* be both
145 * taken in places where we need to invalidate the page cache in a race
146 * free manner (e.g. truncate, hole punch and other extent manipulation
147 * functions).
138 */ 148 */
139void 149void
140xfs_ilock( 150xfs_ilock(
@@ -150,6 +160,8 @@ xfs_ilock(
150 */ 160 */
151 ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != 161 ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
152 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); 162 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
163 ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
164 (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
153 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != 165 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
154 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 166 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
155 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); 167 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
@@ -159,6 +171,11 @@ xfs_ilock(
159 else if (lock_flags & XFS_IOLOCK_SHARED) 171 else if (lock_flags & XFS_IOLOCK_SHARED)
160 mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); 172 mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
161 173
174 if (lock_flags & XFS_MMAPLOCK_EXCL)
175 mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
176 else if (lock_flags & XFS_MMAPLOCK_SHARED)
177 mraccess_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
178
162 if (lock_flags & XFS_ILOCK_EXCL) 179 if (lock_flags & XFS_ILOCK_EXCL)
163 mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); 180 mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
164 else if (lock_flags & XFS_ILOCK_SHARED) 181 else if (lock_flags & XFS_ILOCK_SHARED)
@@ -191,6 +208,8 @@ xfs_ilock_nowait(
191 */ 208 */
192 ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != 209 ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
193 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); 210 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
211 ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
212 (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
194 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != 213 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
195 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 214 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
196 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); 215 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
@@ -202,21 +221,35 @@ xfs_ilock_nowait(
202 if (!mrtryaccess(&ip->i_iolock)) 221 if (!mrtryaccess(&ip->i_iolock))
203 goto out; 222 goto out;
204 } 223 }
224
225 if (lock_flags & XFS_MMAPLOCK_EXCL) {
226 if (!mrtryupdate(&ip->i_mmaplock))
227 goto out_undo_iolock;
228 } else if (lock_flags & XFS_MMAPLOCK_SHARED) {
229 if (!mrtryaccess(&ip->i_mmaplock))
230 goto out_undo_iolock;
231 }
232
205 if (lock_flags & XFS_ILOCK_EXCL) { 233 if (lock_flags & XFS_ILOCK_EXCL) {
206 if (!mrtryupdate(&ip->i_lock)) 234 if (!mrtryupdate(&ip->i_lock))
207 goto out_undo_iolock; 235 goto out_undo_mmaplock;
208 } else if (lock_flags & XFS_ILOCK_SHARED) { 236 } else if (lock_flags & XFS_ILOCK_SHARED) {
209 if (!mrtryaccess(&ip->i_lock)) 237 if (!mrtryaccess(&ip->i_lock))
210 goto out_undo_iolock; 238 goto out_undo_mmaplock;
211 } 239 }
212 return 1; 240 return 1;
213 241
214 out_undo_iolock: 242out_undo_mmaplock:
243 if (lock_flags & XFS_MMAPLOCK_EXCL)
244 mrunlock_excl(&ip->i_mmaplock);
245 else if (lock_flags & XFS_MMAPLOCK_SHARED)
246 mrunlock_shared(&ip->i_mmaplock);
247out_undo_iolock:
215 if (lock_flags & XFS_IOLOCK_EXCL) 248 if (lock_flags & XFS_IOLOCK_EXCL)
216 mrunlock_excl(&ip->i_iolock); 249 mrunlock_excl(&ip->i_iolock);
217 else if (lock_flags & XFS_IOLOCK_SHARED) 250 else if (lock_flags & XFS_IOLOCK_SHARED)
218 mrunlock_shared(&ip->i_iolock); 251 mrunlock_shared(&ip->i_iolock);
219 out: 252out:
220 return 0; 253 return 0;
221} 254}
222 255
@@ -244,6 +277,8 @@ xfs_iunlock(
244 */ 277 */
245 ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != 278 ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
246 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); 279 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
280 ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
281 (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
247 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != 282 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
248 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 283 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
249 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); 284 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
@@ -254,6 +289,11 @@ xfs_iunlock(
254 else if (lock_flags & XFS_IOLOCK_SHARED) 289 else if (lock_flags & XFS_IOLOCK_SHARED)
255 mrunlock_shared(&ip->i_iolock); 290 mrunlock_shared(&ip->i_iolock);
256 291
292 if (lock_flags & XFS_MMAPLOCK_EXCL)
293 mrunlock_excl(&ip->i_mmaplock);
294 else if (lock_flags & XFS_MMAPLOCK_SHARED)
295 mrunlock_shared(&ip->i_mmaplock);
296
257 if (lock_flags & XFS_ILOCK_EXCL) 297 if (lock_flags & XFS_ILOCK_EXCL)
258 mrunlock_excl(&ip->i_lock); 298 mrunlock_excl(&ip->i_lock);
259 else if (lock_flags & XFS_ILOCK_SHARED) 299 else if (lock_flags & XFS_ILOCK_SHARED)
@@ -271,11 +311,14 @@ xfs_ilock_demote(
271 xfs_inode_t *ip, 311 xfs_inode_t *ip,
272 uint lock_flags) 312 uint lock_flags)
273{ 313{
274 ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)); 314 ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL));
275 ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0); 315 ASSERT((lock_flags &
316 ~(XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
276 317
277 if (lock_flags & XFS_ILOCK_EXCL) 318 if (lock_flags & XFS_ILOCK_EXCL)
278 mrdemote(&ip->i_lock); 319 mrdemote(&ip->i_lock);
320 if (lock_flags & XFS_MMAPLOCK_EXCL)
321 mrdemote(&ip->i_mmaplock);
279 if (lock_flags & XFS_IOLOCK_EXCL) 322 if (lock_flags & XFS_IOLOCK_EXCL)
280 mrdemote(&ip->i_iolock); 323 mrdemote(&ip->i_iolock);
281 324
@@ -294,6 +337,12 @@ xfs_isilocked(
294 return rwsem_is_locked(&ip->i_lock.mr_lock); 337 return rwsem_is_locked(&ip->i_lock.mr_lock);
295 } 338 }
296 339
340 if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) {
341 if (!(lock_flags & XFS_MMAPLOCK_SHARED))
342 return !!ip->i_mmaplock.mr_writer;
343 return rwsem_is_locked(&ip->i_mmaplock.mr_lock);
344 }
345
297 if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) { 346 if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
298 if (!(lock_flags & XFS_IOLOCK_SHARED)) 347 if (!(lock_flags & XFS_IOLOCK_SHARED))
299 return !!ip->i_iolock.mr_writer; 348 return !!ip->i_iolock.mr_writer;
@@ -314,14 +363,27 @@ int xfs_lock_delays;
314#endif 363#endif
315 364
316/* 365/*
317 * Bump the subclass so xfs_lock_inodes() acquires each lock with 366 * Bump the subclass so xfs_lock_inodes() acquires each lock with a different
318 * a different value 367 * value. This shouldn't be called for page fault locking, but we also need to
368 * ensure we don't overrun the number of lockdep subclasses for the iolock or
369 * mmaplock as that is limited to 12 by the mmap lock lockdep annotations.
319 */ 370 */
320static inline int 371static inline int
321xfs_lock_inumorder(int lock_mode, int subclass) 372xfs_lock_inumorder(int lock_mode, int subclass)
322{ 373{
323 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) 374 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
375 ASSERT(subclass + XFS_LOCK_INUMORDER <
376 (1 << (XFS_MMAPLOCK_SHIFT - XFS_IOLOCK_SHIFT)));
324 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT; 377 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
378 }
379
380 if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) {
381 ASSERT(subclass + XFS_LOCK_INUMORDER <
382 (1 << (XFS_ILOCK_SHIFT - XFS_MMAPLOCK_SHIFT)));
383 lock_mode |= (subclass + XFS_LOCK_INUMORDER) <<
384 XFS_MMAPLOCK_SHIFT;
385 }
386
325 if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) 387 if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
326 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT; 388 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
327 389
@@ -440,10 +502,10 @@ again:
440} 502}
441 503
442/* 504/*
443 * xfs_lock_two_inodes() can only be used to lock one type of lock 505 * xfs_lock_two_inodes() can only be used to lock one type of lock at a time -
444 * at a time - the iolock or the ilock, but not both at once. If 506 * the iolock, the mmaplock or the ilock, but not more than one at a time. If we
445 * we lock both at once, lockdep will report false positives saying 507 * lock more than one at a time, lockdep will report false positives saying we
446 * we have violated locking orders. 508 * have violated locking orders.
447 */ 509 */
448void 510void
449xfs_lock_two_inodes( 511xfs_lock_two_inodes(
@@ -455,8 +517,12 @@ xfs_lock_two_inodes(
455 int attempts = 0; 517 int attempts = 0;
456 xfs_log_item_t *lp; 518 xfs_log_item_t *lp;
457 519
458 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) 520 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
459 ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0); 521 ASSERT(!(lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)));
522 ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
523 } else if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))
524 ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
525
460 ASSERT(ip0->i_ino != ip1->i_ino); 526 ASSERT(ip0->i_ino != ip1->i_ino);
461 527
462 if (ip0->i_ino > ip1->i_ino) { 528 if (ip0->i_ino > ip1->i_ino) {