aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@infradead.org>2011-12-18 15:00:12 -0500
committerBen Myers <bpm@sgi.com>2012-01-17 16:10:19 -0500
commit2813d682e8e6a278f94817429afd46b30875bb6e (patch)
treed865b04ec89076b692a922b7f5fced9be0458f47 /fs/xfs
parentce7ae151ddada3dbf67301464343c154903166b3 (diff)
xfs: remove the i_new_size field in struct xfs_inode
Now that we use the VFS i_size field throughout XFS there is no need for the i_new_size field any more given that the VFS i_size field gets updated in ->write_end before unlocking the page, and thus is always uptodate when writeback could see a page. Removing i_new_size also has the advantage that we will never have to trim back di_size during a failed buffered write, given that it never gets updated past i_size. Note that currently the generic direct I/O code only updates i_size after calling our end_io handler, which requires a small workaround to make sure di_size actually makes it to disk. I hope to fix this properly in the generic code. A downside is that we lose the support for parallel non-overlapping O_DIRECT appending writes that recently was added. I don't think keeping the complex and fragile i_new_size infrastructure for this is a good tradeoff - if we really care about parallel appending writers we should investigate turning the iolock into a range lock, which would also allow for parallel non-overlapping buffered writers. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Ben Myers <bpm@sgi.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/xfs_aops.c29
-rw-r--r--fs/xfs/xfs_file.c72
-rw-r--r--fs/xfs/xfs_iget.c1
-rw-r--r--fs/xfs/xfs_inode.h2
-rw-r--r--fs/xfs/xfs_trace.h18
5 files changed, 30 insertions, 92 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 4d27ea117e0e..74b9baf36ac3 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -111,8 +111,7 @@ xfs_ioend_new_eof(
111 xfs_fsize_t bsize; 111 xfs_fsize_t bsize;
112 112
113 bsize = ioend->io_offset + ioend->io_size; 113 bsize = ioend->io_offset + ioend->io_size;
114 isize = MAX(i_size_read(VFS_I(ip)), ip->i_new_size); 114 isize = MIN(i_size_read(VFS_I(ip)), bsize);
115 isize = MIN(isize, bsize);
116 return isize > ip->i_d.di_size ? isize : 0; 115 return isize > ip->i_d.di_size ? isize : 0;
117} 116}
118 117
@@ -126,11 +125,7 @@ static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
126} 125}
127 126
128/* 127/*
129 * Update on-disk file size now that data has been written to disk. The 128 * Update on-disk file size now that data has been written to disk.
130 * current in-memory file size is i_size. If a write is beyond eof i_new_size
131 * will be the intended file size until i_size is updated. If this write does
132 * not extend all the way to the valid file size then restrict this update to
133 * the end of the write.
134 * 129 *
135 * This function does not block as blocking on the inode lock in IO completion 130 * This function does not block as blocking on the inode lock in IO completion
136 * can lead to IO completion order dependency deadlocks.. If it can't get the 131 * can lead to IO completion order dependency deadlocks.. If it can't get the
@@ -1279,6 +1274,15 @@ xfs_end_io_direct_write(
1279 struct xfs_ioend *ioend = iocb->private; 1274 struct xfs_ioend *ioend = iocb->private;
1280 1275
1281 /* 1276 /*
1277 * While the generic direct I/O code updates the inode size, it does
1278 * so only after the end_io handler is called, which means our
1279 * end_io handler thinks the on-disk size is outside the in-core
1280 * size. To prevent this just update it a little bit earlier here.
1281 */
1282 if (offset + size > i_size_read(ioend->io_inode))
1283 i_size_write(ioend->io_inode, offset + size);
1284
1285 /*
1282 * blockdev_direct_IO can return an error even after the I/O 1286 * blockdev_direct_IO can return an error even after the I/O
1283 * completion handler was called. Thus we need to protect 1287 * completion handler was called. Thus we need to protect
1284 * against double-freeing. 1288 * against double-freeing.
@@ -1340,12 +1344,11 @@ xfs_vm_write_failed(
1340 1344
1341 if (to > inode->i_size) { 1345 if (to > inode->i_size) {
1342 /* 1346 /*
1343 * punch out the delalloc blocks we have already allocated. We 1347 * Punch out the delalloc blocks we have already allocated.
1344 * don't call xfs_setattr() to do this as we may be in the 1348 *
1345 * middle of a multi-iovec write and so the vfs inode->i_size 1349 * Don't bother with xfs_setattr given that nothing can have
1346 * will not match the xfs ip->i_size and so it will zero too 1350 * made it to disk yet as the page is still locked at this
1347 * much. Hence we jus truncate the page cache to zero what is 1351 * point.
1348 * necessary and punch the delalloc blocks directly.
1349 */ 1352 */
1350 struct xfs_inode *ip = XFS_I(inode); 1353 struct xfs_inode *ip = XFS_I(inode);
1351 xfs_fileoff_t start_fsb; 1354 xfs_fileoff_t start_fsb;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 86d5dc260464..632313926788 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -413,27 +413,6 @@ xfs_file_splice_read(
413} 413}
414 414
415/* 415/*
416 * If this was a direct or synchronous I/O that failed (such as ENOSPC) then
417 * part of the I/O may have been written to disk before the error occurred. In
418 * this case the on-disk file size may have been adjusted beyond the in-memory
419 * file size and now needs to be truncated back.
420 */
421STATIC void
422xfs_aio_write_newsize_update(
423 struct xfs_inode *ip,
424 xfs_fsize_t new_size)
425{
426 if (new_size == ip->i_new_size) {
427 xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
428 if (new_size == ip->i_new_size)
429 ip->i_new_size = 0;
430 if (ip->i_d.di_size > i_size_read(VFS_I(ip)))
431 ip->i_d.di_size = i_size_read(VFS_I(ip));
432 xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
433 }
434}
435
436/*
437 * xfs_file_splice_write() does not use xfs_rw_ilock() because 416 * xfs_file_splice_write() does not use xfs_rw_ilock() because
438 * generic_file_splice_write() takes the i_mutex itself. This, in theory, 417 * generic_file_splice_write() takes the i_mutex itself. This, in theory,
439 * couuld cause lock inversions between the aio_write path and the splice path 418 * couuld cause lock inversions between the aio_write path and the splice path
@@ -451,7 +430,6 @@ xfs_file_splice_write(
451{ 430{
452 struct inode *inode = outfilp->f_mapping->host; 431 struct inode *inode = outfilp->f_mapping->host;
453 struct xfs_inode *ip = XFS_I(inode); 432 struct xfs_inode *ip = XFS_I(inode);
454 xfs_fsize_t new_size;
455 int ioflags = 0; 433 int ioflags = 0;
456 ssize_t ret; 434 ssize_t ret;
457 435
@@ -465,20 +443,12 @@ xfs_file_splice_write(
465 443
466 xfs_ilock(ip, XFS_IOLOCK_EXCL); 444 xfs_ilock(ip, XFS_IOLOCK_EXCL);
467 445
468 new_size = *ppos + count;
469
470 xfs_ilock(ip, XFS_ILOCK_EXCL);
471 if (new_size > i_size_read(inode))
472 ip->i_new_size = new_size;
473 xfs_iunlock(ip, XFS_ILOCK_EXCL);
474
475 trace_xfs_file_splice_write(ip, count, *ppos, ioflags); 446 trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
476 447
477 ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); 448 ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
478 if (ret > 0) 449 if (ret > 0)
479 XFS_STATS_ADD(xs_write_bytes, ret); 450 XFS_STATS_ADD(xs_write_bytes, ret);
480 451
481 xfs_aio_write_newsize_update(ip, new_size);
482 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 452 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
483 return ret; 453 return ret;
484} 454}
@@ -673,16 +643,13 @@ xfs_file_aio_write_checks(
673 struct file *file, 643 struct file *file,
674 loff_t *pos, 644 loff_t *pos,
675 size_t *count, 645 size_t *count,
676 xfs_fsize_t *new_sizep,
677 int *iolock) 646 int *iolock)
678{ 647{
679 struct inode *inode = file->f_mapping->host; 648 struct inode *inode = file->f_mapping->host;
680 struct xfs_inode *ip = XFS_I(inode); 649 struct xfs_inode *ip = XFS_I(inode);
681 xfs_fsize_t new_size;
682 int error = 0; 650 int error = 0;
683 651
684 xfs_rw_ilock(ip, XFS_ILOCK_EXCL); 652 xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
685 *new_sizep = 0;
686restart: 653restart:
687 error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); 654 error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode));
688 if (error) { 655 if (error) {
@@ -697,15 +664,13 @@ restart:
697 /* 664 /*
698 * If the offset is beyond the size of the file, we need to zero any 665 * If the offset is beyond the size of the file, we need to zero any
699 * blocks that fall between the existing EOF and the start of this 666 * blocks that fall between the existing EOF and the start of this
700 * write. There is no need to issue zeroing if another in-flght IO ends 667 * write. If zeroing is needed and we are currently holding the
701 * at or before this one If zeronig is needed and we are currently 668 * iolock shared, we need to update it to exclusive which involves
702 * holding the iolock shared, we need to update it to exclusive which 669 * dropping all locks and relocking to maintain correct locking order.
703 * involves dropping all locks and relocking to maintain correct locking 670 * If we do this, restart the function to ensure all checks and values
704 * order. If we do this, restart the function to ensure all checks and 671 * are still valid.
705 * values are still valid.
706 */ 672 */
707 if ((ip->i_new_size && *pos > ip->i_new_size) || 673 if (*pos > i_size_read(inode)) {
708 (!ip->i_new_size && *pos > i_size_read(inode))) {
709 if (*iolock == XFS_IOLOCK_SHARED) { 674 if (*iolock == XFS_IOLOCK_SHARED) {
710 xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); 675 xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock);
711 *iolock = XFS_IOLOCK_EXCL; 676 *iolock = XFS_IOLOCK_EXCL;
@@ -714,19 +679,6 @@ restart:
714 } 679 }
715 error = -xfs_zero_eof(ip, *pos, i_size_read(inode)); 680 error = -xfs_zero_eof(ip, *pos, i_size_read(inode));
716 } 681 }
717
718 /*
719 * If this IO extends beyond EOF, we may need to update ip->i_new_size.
720 * We have already zeroed space beyond EOF (if necessary). Only update
721 * ip->i_new_size if this IO ends beyond any other in-flight writes.
722 */
723 new_size = *pos + *count;
724 if (new_size > i_size_read(inode)) {
725 if (new_size > ip->i_new_size)
726 ip->i_new_size = new_size;
727 *new_sizep = new_size;
728 }
729
730 xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); 682 xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
731 if (error) 683 if (error)
732 return error; 684 return error;
@@ -772,7 +724,6 @@ xfs_file_dio_aio_write(
772 unsigned long nr_segs, 724 unsigned long nr_segs,
773 loff_t pos, 725 loff_t pos,
774 size_t ocount, 726 size_t ocount,
775 xfs_fsize_t *new_size,
776 int *iolock) 727 int *iolock)
777{ 728{
778 struct file *file = iocb->ki_filp; 729 struct file *file = iocb->ki_filp;
@@ -817,7 +768,7 @@ xfs_file_dio_aio_write(
817 xfs_rw_ilock(ip, *iolock); 768 xfs_rw_ilock(ip, *iolock);
818 } 769 }
819 770
820 ret = xfs_file_aio_write_checks(file, &pos, &count, new_size, iolock); 771 ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
821 if (ret) 772 if (ret)
822 return ret; 773 return ret;
823 774
@@ -855,7 +806,6 @@ xfs_file_buffered_aio_write(
855 unsigned long nr_segs, 806 unsigned long nr_segs,
856 loff_t pos, 807 loff_t pos,
857 size_t ocount, 808 size_t ocount,
858 xfs_fsize_t *new_size,
859 int *iolock) 809 int *iolock)
860{ 810{
861 struct file *file = iocb->ki_filp; 811 struct file *file = iocb->ki_filp;
@@ -869,7 +819,7 @@ xfs_file_buffered_aio_write(
869 *iolock = XFS_IOLOCK_EXCL; 819 *iolock = XFS_IOLOCK_EXCL;
870 xfs_rw_ilock(ip, *iolock); 820 xfs_rw_ilock(ip, *iolock);
871 821
872 ret = xfs_file_aio_write_checks(file, &pos, &count, new_size, iolock); 822 ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
873 if (ret) 823 if (ret)
874 return ret; 824 return ret;
875 825
@@ -909,7 +859,6 @@ xfs_file_aio_write(
909 ssize_t ret; 859 ssize_t ret;
910 int iolock; 860 int iolock;
911 size_t ocount = 0; 861 size_t ocount = 0;
912 xfs_fsize_t new_size = 0;
913 862
914 XFS_STATS_INC(xs_write_calls); 863 XFS_STATS_INC(xs_write_calls);
915 864
@@ -929,10 +878,10 @@ xfs_file_aio_write(
929 878
930 if (unlikely(file->f_flags & O_DIRECT)) 879 if (unlikely(file->f_flags & O_DIRECT))
931 ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, 880 ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos,
932 ocount, &new_size, &iolock); 881 ocount, &iolock);
933 else 882 else
934 ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos, 883 ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos,
935 ocount, &new_size, &iolock); 884 ocount, &iolock);
936 885
937 if (ret <= 0) 886 if (ret <= 0)
938 goto out_unlock; 887 goto out_unlock;
@@ -953,7 +902,6 @@ xfs_file_aio_write(
953 } 902 }
954 903
955out_unlock: 904out_unlock:
956 xfs_aio_write_newsize_update(ip, new_size);
957 xfs_rw_iunlock(ip, iolock); 905 xfs_rw_iunlock(ip, iolock);
958 return ret; 906 return ret;
959} 907}
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 3b5b78aa3b87..8c3e46394d48 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -94,7 +94,6 @@ xfs_inode_alloc(
94 ip->i_update_core = 0; 94 ip->i_update_core = 0;
95 ip->i_delayed_blks = 0; 95 ip->i_delayed_blks = 0;
96 memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); 96 memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
97 ip->i_new_size = 0;
98 97
99 return ip; 98 return ip;
100} 99}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index cd99e43fa8f0..2f27b7454085 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -246,8 +246,6 @@ typedef struct xfs_inode {
246 246
247 xfs_icdinode_t i_d; /* most of ondisk inode */ 247 xfs_icdinode_t i_d; /* most of ondisk inode */
248 248
249 xfs_fsize_t i_new_size; /* size when write completes */
250
251 /* VFS inode */ 249 /* VFS inode */
252 struct inode i_vnode; /* embedded VFS inode */ 250 struct inode i_vnode; /* embedded VFS inode */
253} xfs_inode_t; 251} xfs_inode_t;
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 2aabcc9c507e..6b6df5802e95 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -891,7 +891,6 @@ DECLARE_EVENT_CLASS(xfs_file_class,
891 __field(dev_t, dev) 891 __field(dev_t, dev)
892 __field(xfs_ino_t, ino) 892 __field(xfs_ino_t, ino)
893 __field(xfs_fsize_t, size) 893 __field(xfs_fsize_t, size)
894 __field(xfs_fsize_t, new_size)
895 __field(loff_t, offset) 894 __field(loff_t, offset)
896 __field(size_t, count) 895 __field(size_t, count)
897 __field(int, flags) 896 __field(int, flags)
@@ -900,17 +899,15 @@ DECLARE_EVENT_CLASS(xfs_file_class,
900 __entry->dev = VFS_I(ip)->i_sb->s_dev; 899 __entry->dev = VFS_I(ip)->i_sb->s_dev;
901 __entry->ino = ip->i_ino; 900 __entry->ino = ip->i_ino;
902 __entry->size = ip->i_d.di_size; 901 __entry->size = ip->i_d.di_size;
903 __entry->new_size = ip->i_new_size;
904 __entry->offset = offset; 902 __entry->offset = offset;
905 __entry->count = count; 903 __entry->count = count;
906 __entry->flags = flags; 904 __entry->flags = flags;
907 ), 905 ),
908 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " 906 TP_printk("dev %d:%d ino 0x%llx size 0x%llx "
909 "offset 0x%llx count 0x%zx ioflags %s", 907 "offset 0x%llx count 0x%zx ioflags %s",
910 MAJOR(__entry->dev), MINOR(__entry->dev), 908 MAJOR(__entry->dev), MINOR(__entry->dev),
911 __entry->ino, 909 __entry->ino,
912 __entry->size, 910 __entry->size,
913 __entry->new_size,
914 __entry->offset, 911 __entry->offset,
915 __entry->count, 912 __entry->count,
916 __print_flags(__entry->flags, "|", XFS_IO_FLAGS)) 913 __print_flags(__entry->flags, "|", XFS_IO_FLAGS))
@@ -978,7 +975,6 @@ DECLARE_EVENT_CLASS(xfs_imap_class,
978 __field(dev_t, dev) 975 __field(dev_t, dev)
979 __field(xfs_ino_t, ino) 976 __field(xfs_ino_t, ino)
980 __field(loff_t, size) 977 __field(loff_t, size)
981 __field(loff_t, new_size)
982 __field(loff_t, offset) 978 __field(loff_t, offset)
983 __field(size_t, count) 979 __field(size_t, count)
984 __field(int, type) 980 __field(int, type)
@@ -990,7 +986,6 @@ DECLARE_EVENT_CLASS(xfs_imap_class,
990 __entry->dev = VFS_I(ip)->i_sb->s_dev; 986 __entry->dev = VFS_I(ip)->i_sb->s_dev;
991 __entry->ino = ip->i_ino; 987 __entry->ino = ip->i_ino;
992 __entry->size = ip->i_d.di_size; 988 __entry->size = ip->i_d.di_size;
993 __entry->new_size = ip->i_new_size;
994 __entry->offset = offset; 989 __entry->offset = offset;
995 __entry->count = count; 990 __entry->count = count;
996 __entry->type = type; 991 __entry->type = type;
@@ -998,13 +993,11 @@ DECLARE_EVENT_CLASS(xfs_imap_class,
998 __entry->startblock = irec ? irec->br_startblock : 0; 993 __entry->startblock = irec ? irec->br_startblock : 0;
999 __entry->blockcount = irec ? irec->br_blockcount : 0; 994 __entry->blockcount = irec ? irec->br_blockcount : 0;
1000 ), 995 ),
1001 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " 996 TP_printk("dev %d:%d ino 0x%llx size 0x%llx offset 0x%llx count %zd "
1002 "offset 0x%llx count %zd type %s " 997 "type %s startoff 0x%llx startblock %lld blockcount 0x%llx",
1003 "startoff 0x%llx startblock %lld blockcount 0x%llx",
1004 MAJOR(__entry->dev), MINOR(__entry->dev), 998 MAJOR(__entry->dev), MINOR(__entry->dev),
1005 __entry->ino, 999 __entry->ino,
1006 __entry->size, 1000 __entry->size,
1007 __entry->new_size,
1008 __entry->offset, 1001 __entry->offset,
1009 __entry->count, 1002 __entry->count,
1010 __print_symbolic(__entry->type, XFS_IO_TYPES), 1003 __print_symbolic(__entry->type, XFS_IO_TYPES),
@@ -1031,7 +1024,6 @@ DECLARE_EVENT_CLASS(xfs_simple_io_class,
1031 __field(xfs_ino_t, ino) 1024 __field(xfs_ino_t, ino)
1032 __field(loff_t, isize) 1025 __field(loff_t, isize)
1033 __field(loff_t, disize) 1026 __field(loff_t, disize)
1034 __field(loff_t, new_size)
1035 __field(loff_t, offset) 1027 __field(loff_t, offset)
1036 __field(size_t, count) 1028 __field(size_t, count)
1037 ), 1029 ),
@@ -1040,17 +1032,15 @@ DECLARE_EVENT_CLASS(xfs_simple_io_class,
1040 __entry->ino = ip->i_ino; 1032 __entry->ino = ip->i_ino;
1041 __entry->isize = VFS_I(ip)->i_size; 1033 __entry->isize = VFS_I(ip)->i_size;
1042 __entry->disize = ip->i_d.di_size; 1034 __entry->disize = ip->i_d.di_size;
1043 __entry->new_size = ip->i_new_size;
1044 __entry->offset = offset; 1035 __entry->offset = offset;
1045 __entry->count = count; 1036 __entry->count = count;
1046 ), 1037 ),
1047 TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx new_size 0x%llx " 1038 TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx "
1048 "offset 0x%llx count %zd", 1039 "offset 0x%llx count %zd",
1049 MAJOR(__entry->dev), MINOR(__entry->dev), 1040 MAJOR(__entry->dev), MINOR(__entry->dev),
1050 __entry->ino, 1041 __entry->ino,
1051 __entry->isize, 1042 __entry->isize,
1052 __entry->disize, 1043 __entry->disize,
1053 __entry->new_size,
1054 __entry->offset, 1044 __entry->offset,
1055 __entry->count) 1045 __entry->count)
1056); 1046);