xfs: push the ilock into xfs_zero_eof

Instead of calling xfs_zero_eof with the ilock held only take it internally for the minimall required critical section around xfs_bmapi_read. This also requires changing the calling convention for xfs_zero_last_block slightly. The actual zeroing operation is still serialized by the iolock, which must be taken exclusively over the call to xfs_zero_eof. We could in fact use a shared lock for the xfs_bmapi_read calls as long as the extent list has been read in, but given that we already hold the iolock exclusively there is little reason to micro optimize this further. Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Mark Tinguely <tinguely@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
author: Christoph Hellwig <hch@infradead.org> 2012-03-27 10:34:49 -0400
committer: Ben Myers <bpm@sgi.com> 2012-05-14 17:20:20 -0400
commit: 193aec10504e4c24521449c46317282141fb36e8 (patch)
tree: ab68802be2bc151911732a78c3acd84604164b20 /fs/xfs/xfs_file.c
parent: f38996f5768713fb60e1d2de66c097367d54bb6a (diff)
1 files changed, 63 insertions, 100 deletions
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 3537c8d0af48..75b8fe9229b0 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -396,114 +396,96 @@ xfs_file_splice_write(
 }
 /*
- * This routine is called to handle zeroing any space in the last
+ * This routine is called to handle zeroing any space in the last block of the
- * block of the file that is beyond the EOF.  We do this since the
+ * file that is beyond the EOF.  We do this since the size is being increased
- * size is being increased without writing anything to that block
+ * without writing anything to that block and we don't want to read the
- * and we don't want anyone to read the garbage on the disk.
+ * garbage on the disk.
 */
 STATIC int                              /* error (positive) */
 xfs_zero_last_block(
-        xfs_inode_t     *ip,
+        struct xfs_inode        *ip,
-        xfs_fsize_t     offset,
+        xfs_fsize_t             offset,
-        xfs_fsize_t     isize)
+        xfs_fsize_t             isize)
 {
-        xfs_fileoff_t   last_fsb;
+        struct xfs_mount        *mp = ip->i_mount;
-        xfs_mount_t     *mp = ip->i_mount;
+        xfs_fileoff_t           last_fsb = XFS_B_TO_FSBT(mp, isize);
-        int             nimaps;
+        int                     zero_offset = XFS_B_FSB_OFFSET(mp, isize);
-        int             zero_offset;
+        int                     zero_len;
-        int             zero_len;
+        int                     nimaps = 1;
-        int             error = 0;
+        int                     error = 0;
-        xfs_bmbt_irec_t imap;
+        struct xfs_bmbt_irec    imap;
-        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-        zero_offset = XFS_B_FSB_OFFSET(mp, isize);
-        if (zero_offset == 0) {
-                /*
-                 * There are no extra bytes in the last block on disk to
-                 * zero, so return.
-                 */
-                return 0;
-        }
-        last_fsb = XFS_B_TO_FSBT(mp, isize);
+        xfs_ilock(ip, XFS_ILOCK_EXCL);
-        nimaps = 1;
        error = xfs_bmapi_read(ip, last_fsb, 1, &imap, &nimaps, 0);
+        xfs_iunlock(ip, XFS_ILOCK_EXCL);
        if (error)
                return error;
        ASSERT(nimaps > 0);
        /*
         * If the block underlying isize is just a hole, then there
         * is nothing to zero.
         */
-        if (imap.br_startblock == HOLESTARTBLOCK) {
+        if (imap.br_startblock == HOLESTARTBLOCK)
                return 0;
-        }
-        /*
-         * Zero the part of the last block beyond the EOF, and write it
-         * out sync.  We need to drop the ilock while we do this so we
-         * don't deadlock when the buffer cache calls back to us.
-         */
-        xfs_iunlock(ip, XFS_ILOCK_EXCL);
        zero_len = mp->m_sb.sb_blocksize - zero_offset;
        if (isize + zero_len > offset)
                zero_len = offset - isize;
-        error = xfs_iozero(ip, isize, zero_len);
+        return xfs_iozero(ip, isize, zero_len);
-        xfs_ilock(ip, XFS_ILOCK_EXCL);
-        ASSERT(error >= 0);
-        return error;
 }
 /*
- * Zero any on disk space between the current EOF and the new,
+ * Zero any on disk space between the current EOF and the new, larger EOF.
- * larger EOF.  This handles the normal case of zeroing the remainder
+ *
- * of the last block in the file and the unusual case of zeroing blocks
+ * This handles the normal case of zeroing the remainder of the last block in
- * out beyond the size of the file.  This second case only happens
+ * the file and the unusual case of zeroing blocks out beyond the size of the
- * with fixed size extents and when the system crashes before the inode
+ * file.  This second case only happens with fixed size extents and when the
- * size was updated but after blocks were allocated.  If fill is set,
+ * system crashes before the inode size was updated but after blocks were
- * then any holes in the range are filled and zeroed.  If not, the holes
+ * allocated.
- * are left alone as holes.
+ *
+ * Expects the iolock to be held exclusive, and will take the ilock internally.
 */
 int                                     /* error (positive) */
 xfs_zero_eof(
-        xfs_inode_t     *ip,
+        struct xfs_inode        *ip,
-        xfs_off_t       offset,         /* starting I/O offset */
+        xfs_off_t               offset,         /* starting I/O offset */
-        xfs_fsize_t     isize)          /* current inode size */
+        xfs_fsize_t             isize)          /* current inode size */
 {
-        xfs_mount_t     *mp = ip->i_mount;
+        struct xfs_mount        *mp = ip->i_mount;
-        xfs_fileoff_t   start_zero_fsb;
+        xfs_fileoff_t           start_zero_fsb;
-        xfs_fileoff_t   end_zero_fsb;
+        xfs_fileoff_t           end_zero_fsb;
-        xfs_fileoff_t   zero_count_fsb;
+        xfs_fileoff_t           zero_count_fsb;
-        xfs_fileoff_t   last_fsb;
+        xfs_fileoff_t           last_fsb;
-        xfs_fileoff_t   zero_off;
+        xfs_fileoff_t           zero_off;
-        xfs_fsize_t     zero_len;
+        xfs_fsize_t             zero_len;
-        int             nimaps;
+        int                     nimaps;
-        int             error = 0;
+        int                     error = 0;
-        xfs_bmbt_irec_t imap;
+        struct xfs_bmbt_irec    imap;
-        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
+        ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
        ASSERT(offset > isize);
        /*
         * First handle zeroing the block on which isize resides.
+         *
         * We only zero a part of that block so it is handled specially.
         */
-        error = xfs_zero_last_block(ip, offset, isize);
+        if (XFS_B_FSB_OFFSET(mp, isize) != 0) {
-        if (error) {
+                error = xfs_zero_last_block(ip, offset, isize);
-                ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
+                if (error)
-                return error;
+                        return error;
        }
        /*
-         * Calculate the range between the new size and the old
+         * Calculate the range between the new size and the old where blocks
-         * where blocks needing to be zeroed may exist.  To get the
+         * needing to be zeroed may exist.
-         * block where the last byte in the file currently resides,
+         *
-         * we need to subtract one from the size and truncate back
+         * To get the block where the last byte in the file currently resides,
-         * to a block boundary.  We subtract 1 in case the size is
+         * we need to subtract one from the size and truncate back to a block
-         * exactly on a block boundary.
+         * boundary.  We subtract 1 in case the size is exactly on a block
+         * boundary.
         */
        last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
        start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
@@ -521,23 +503,18 @@ xfs_zero_eof(
        while (start_zero_fsb <= end_zero_fsb) {
                nimaps = 1;
                zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
+                xfs_ilock(ip, XFS_ILOCK_EXCL);
                error = xfs_bmapi_read(ip, start_zero_fsb, zero_count_fsb,
                                          &imap, &nimaps, 0);
-                if (error) {
+                xfs_iunlock(ip, XFS_ILOCK_EXCL);
-                        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
+                if (error)
                        return error;
-                }
                ASSERT(nimaps > 0);
                if (imap.br_state == XFS_EXT_UNWRITTEN ||
                    imap.br_startblock == HOLESTARTBLOCK) {
-                        /*
-                         * This loop handles initializing pages that were
-                         * partially initialized by the code below this
-                         * loop. It basically zeroes the part of the page
-                         * that sits on a hole and sets the page as P_HOLE
-                         * and calls remapf if it is a mapped file.
-                         */
                        start_zero_fsb = imap.br_startoff + imap.br_blockcount;
                        ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
                        continue;
@@ -545,11 +522,7 @@ xfs_zero_eof(
                /*
                 * There are blocks we need to zero.
-                 * Drop the inode lock while we're doing the I/O.
-                 * We'll still have the iolock to protect us.
                 */
-                xfs_iunlock(ip, XFS_ILOCK_EXCL);
                zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
                zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
@@ -557,22 +530,14 @@ xfs_zero_eof(
                        zero_len = offset - zero_off;
                error = xfs_iozero(ip, zero_off, zero_len);
-                if (error) {
+                if (error)
-                        goto out_lock;
+                        return error;
-                }
                start_zero_fsb = imap.br_startoff + imap.br_blockcount;
                ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
-                xfs_ilock(ip, XFS_ILOCK_EXCL);
        }
        return 0;
-out_lock:
-        xfs_ilock(ip, XFS_ILOCK_EXCL);
-        ASSERT(error >= 0);
-        return error;
 }
 /*
@@ -612,9 +577,7 @@ restart:
                        xfs_rw_ilock(ip, *iolock);
                        goto restart;
                }
-                xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
                error = -xfs_zero_eof(ip, *pos, i_size_read(inode));
-                xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
                if (error)
                        return error;
        }
author	Christoph Hellwig <hch@infradead.org>	2012-03-27 10:34:49 -0400
committer	Ben Myers <bpm@sgi.com>	2012-05-14 17:20:20 -0400
commit	193aec10504e4c24521449c46317282141fb36e8 (patch)
tree	ab68802be2bc151911732a78c3acd84604164b20 /fs/xfs/xfs_file.c
parent	f38996f5768713fb60e1d2de66c097367d54bb6a (diff)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 3537c8d0af48..75b8fe9229b0 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c
@@ -396,114 +396,96 @@ xfs_file_splice_write(
396	}	396	}
397		397
398	/*	398	/*
399	* This routine is called to handle zeroing any space in the last	399	* This routine is called to handle zeroing any space in the last block of the
400	* block of the file that is beyond the EOF. We do this since the	400	* file that is beyond the EOF. We do this since the size is being increased
401	* size is being increased without writing anything to that block	401	* without writing anything to that block and we don't want to read the
402	* and we don't want anyone to read the garbage on the disk.	402	* garbage on the disk.
403	*/	403	*/
404	STATIC int /* error (positive) */	404	STATIC int /* error (positive) */
405	xfs_zero_last_block(	405	xfs_zero_last_block(
406	xfs_inode_t *ip,	406	struct xfs_inode *ip,
407	xfs_fsize_t offset,	407	xfs_fsize_t offset,
408	xfs_fsize_t isize)	408	xfs_fsize_t isize)
409	{	409	{
410	xfs_fileoff_t last_fsb;	410	struct xfs_mount *mp = ip->i_mount;
411	xfs_mount_t *mp = ip->i_mount;	411	xfs_fileoff_t last_fsb = XFS_B_TO_FSBT(mp, isize);
412	int nimaps;	412	int zero_offset = XFS_B_FSB_OFFSET(mp, isize);
413	int zero_offset;	413	int zero_len;
414	int zero_len;	414	int nimaps = 1;
415	int error = 0;	415	int error = 0;
416	xfs_bmbt_irec_t imap;	416	struct xfs_bmbt_irec imap;
417
418	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
419
420	zero_offset = XFS_B_FSB_OFFSET(mp, isize);
421	if (zero_offset == 0) {
422	/*
423	* There are no extra bytes in the last block on disk to
424	* zero, so return.
425	*/
426	return 0;
427	}
428		417
429	last_fsb = XFS_B_TO_FSBT(mp, isize);	418	xfs_ilock(ip, XFS_ILOCK_EXCL);
430	nimaps = 1;
431	error = xfs_bmapi_read(ip, last_fsb, 1, &imap, &nimaps, 0);	419	error = xfs_bmapi_read(ip, last_fsb, 1, &imap, &nimaps, 0);
		420	xfs_iunlock(ip, XFS_ILOCK_EXCL);
432	if (error)	421	if (error)
433	return error;	422	return error;
		423
434	ASSERT(nimaps > 0);	424	ASSERT(nimaps > 0);
		425
435	/*	426	/*
436	* If the block underlying isize is just a hole, then there	427	* If the block underlying isize is just a hole, then there
437	* is nothing to zero.	428	* is nothing to zero.
438	*/	429	*/
439	if (imap.br_startblock == HOLESTARTBLOCK) {	430	if (imap.br_startblock == HOLESTARTBLOCK)
440	return 0;	431	return 0;
441	}
442	/*
443	* Zero the part of the last block beyond the EOF, and write it
444	* out sync. We need to drop the ilock while we do this so we
445	* don't deadlock when the buffer cache calls back to us.
446	*/
447	xfs_iunlock(ip, XFS_ILOCK_EXCL);
448		432
449	zero_len = mp->m_sb.sb_blocksize - zero_offset;	433	zero_len = mp->m_sb.sb_blocksize - zero_offset;
450	if (isize + zero_len > offset)	434	if (isize + zero_len > offset)
451	zero_len = offset - isize;	435	zero_len = offset - isize;
452	error = xfs_iozero(ip, isize, zero_len);	436	return xfs_iozero(ip, isize, zero_len);
453
454	xfs_ilock(ip, XFS_ILOCK_EXCL);
455	ASSERT(error >= 0);
456	return error;
457	}	437	}
458		438
459	/*	439	/*
460	* Zero any on disk space between the current EOF and the new,	440	* Zero any on disk space between the current EOF and the new, larger EOF.
461	* larger EOF. This handles the normal case of zeroing the remainder	441	*
462	* of the last block in the file and the unusual case of zeroing blocks	442	* This handles the normal case of zeroing the remainder of the last block in
463	* out beyond the size of the file. This second case only happens	443	* the file and the unusual case of zeroing blocks out beyond the size of the
464	* with fixed size extents and when the system crashes before the inode	444	* file. This second case only happens with fixed size extents and when the
465	* size was updated but after blocks were allocated. If fill is set,	445	* system crashes before the inode size was updated but after blocks were
466	* then any holes in the range are filled and zeroed. If not, the holes	446	* allocated.
467	* are left alone as holes.	447	*
		448	* Expects the iolock to be held exclusive, and will take the ilock internally.
468	*/	449	*/
469
470	int /* error (positive) */	450	int /* error (positive) */
471	xfs_zero_eof(	451	xfs_zero_eof(
472	xfs_inode_t *ip,	452	struct xfs_inode *ip,
473	xfs_off_t offset, /* starting I/O offset */	453	xfs_off_t offset, /* starting I/O offset */
474	xfs_fsize_t isize) /* current inode size */	454	xfs_fsize_t isize) /* current inode size */
475	{	455	{
476	xfs_mount_t *mp = ip->i_mount;	456	struct xfs_mount *mp = ip->i_mount;
477	xfs_fileoff_t start_zero_fsb;	457	xfs_fileoff_t start_zero_fsb;
478	xfs_fileoff_t end_zero_fsb;	458	xfs_fileoff_t end_zero_fsb;
479	xfs_fileoff_t zero_count_fsb;	459	xfs_fileoff_t zero_count_fsb;
480	xfs_fileoff_t last_fsb;	460	xfs_fileoff_t last_fsb;
481	xfs_fileoff_t zero_off;	461	xfs_fileoff_t zero_off;
482	xfs_fsize_t zero_len;	462	xfs_fsize_t zero_len;
483	int nimaps;	463	int nimaps;
484	int error = 0;	464	int error = 0;
485	xfs_bmbt_irec_t imap;	465	struct xfs_bmbt_irec imap;
486		466
487	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL\|XFS_IOLOCK_EXCL));	467	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
488	ASSERT(offset > isize);	468	ASSERT(offset > isize);
489		469
490	/*	470	/*
491	* First handle zeroing the block on which isize resides.	471	* First handle zeroing the block on which isize resides.
		472	*
492	* We only zero a part of that block so it is handled specially.	473	* We only zero a part of that block so it is handled specially.
493	*/	474	*/
494	error = xfs_zero_last_block(ip, offset, isize);	475	if (XFS_B_FSB_OFFSET(mp, isize) != 0) {
495	if (error) {	476	error = xfs_zero_last_block(ip, offset, isize);
496	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL\|XFS_IOLOCK_EXCL));	477	if (error)
497	return error;	478	return error;
498	}	479	}
499		480
500	/*	481	/*
501	* Calculate the range between the new size and the old	482	* Calculate the range between the new size and the old where blocks
502	* where blocks needing to be zeroed may exist. To get the	483	* needing to be zeroed may exist.
503	* block where the last byte in the file currently resides,	484	*
504	* we need to subtract one from the size and truncate back	485	* To get the block where the last byte in the file currently resides,
505	* to a block boundary. We subtract 1 in case the size is	486	* we need to subtract one from the size and truncate back to a block
506	* exactly on a block boundary.	487	* boundary. We subtract 1 in case the size is exactly on a block
		488	* boundary.
507	*/	489	*/
508	last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;	490	last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
509	start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);	491	start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
@@ -521,23 +503,18 @@ xfs_zero_eof(
521	while (start_zero_fsb <= end_zero_fsb) {	503	while (start_zero_fsb <= end_zero_fsb) {
522	nimaps = 1;	504	nimaps = 1;
523	zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;	505	zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
		506
		507	xfs_ilock(ip, XFS_ILOCK_EXCL);
524	error = xfs_bmapi_read(ip, start_zero_fsb, zero_count_fsb,	508	error = xfs_bmapi_read(ip, start_zero_fsb, zero_count_fsb,
525	&imap, &nimaps, 0);	509	&imap, &nimaps, 0);
526	if (error) {	510	xfs_iunlock(ip, XFS_ILOCK_EXCL);
527	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL\|XFS_IOLOCK_EXCL));	511	if (error)
528	return error;	512	return error;
529	}	513
530	ASSERT(nimaps > 0);	514	ASSERT(nimaps > 0);
531		515
532	if (imap.br_state == XFS_EXT_UNWRITTEN \|\|	516	if (imap.br_state == XFS_EXT_UNWRITTEN \|\|
533	imap.br_startblock == HOLESTARTBLOCK) {	517	imap.br_startblock == HOLESTARTBLOCK) {
534	/*
535	* This loop handles initializing pages that were
536	* partially initialized by the code below this
537	* loop. It basically zeroes the part of the page
538	* that sits on a hole and sets the page as P_HOLE
539	* and calls remapf if it is a mapped file.
540	*/
541	start_zero_fsb = imap.br_startoff + imap.br_blockcount;	518	start_zero_fsb = imap.br_startoff + imap.br_blockcount;
542	ASSERT(start_zero_fsb <= (end_zero_fsb + 1));	519	ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
543	continue;	520	continue;
@@ -545,11 +522,7 @@ xfs_zero_eof(
545		522
546	/*	523	/*
547	* There are blocks we need to zero.	524	* There are blocks we need to zero.
548	* Drop the inode lock while we're doing the I/O.
549	* We'll still have the iolock to protect us.
550	*/	525	*/
551	xfs_iunlock(ip, XFS_ILOCK_EXCL);
552
553	zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);	526	zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
554	zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);	527	zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
555		528
@@ -557,22 +530,14 @@ xfs_zero_eof(
557	zero_len = offset - zero_off;	530	zero_len = offset - zero_off;
558		531
559	error = xfs_iozero(ip, zero_off, zero_len);	532	error = xfs_iozero(ip, zero_off, zero_len);
560	if (error) {	533	if (error)
561	goto out_lock;	534	return error;
562	}
563		535
564	start_zero_fsb = imap.br_startoff + imap.br_blockcount;	536	start_zero_fsb = imap.br_startoff + imap.br_blockcount;
565	ASSERT(start_zero_fsb <= (end_zero_fsb + 1));	537	ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
566
567	xfs_ilock(ip, XFS_ILOCK_EXCL);
568	}	538	}
569		539
570	return 0;	540	return 0;
571
572	out_lock:
573	xfs_ilock(ip, XFS_ILOCK_EXCL);
574	ASSERT(error >= 0);
575	return error;
576	}	541	}
577		542
578	/*	543	/*
@@ -612,9 +577,7 @@ restart:
612	xfs_rw_ilock(ip, *iolock);	577	xfs_rw_ilock(ip, *iolock);
613	goto restart;	578	goto restart;
614	}	579	}
615	xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
616	error = -xfs_zero_eof(ip, *pos, i_size_read(inode));	580	error = -xfs_zero_eof(ip, *pos, i_size_read(inode));
617	xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
618	if (error)	581	if (error)
619	return error;	582	return error;
620	}	583	}