aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLachlan McIlroy <lachlan@sgi.com>2007-05-07 23:49:39 -0400
committerTim Shimmin <tes@sgi.com>2007-05-07 23:49:39 -0400
commit2a32963130aec5e157b58ff7dfa3dfa1afdf7ca1 (patch)
tree97b38a1d7c69bad94fafeee75102a0701c44f27e
parente6d29426bc8a5d07d0eebd0842fe0cf6ecc862cd (diff)
[XFS] Fix race condition in xfs_write().
This change addresses a race in xfs_write() where, for direct I/O, the flags need_i_mutex and need_flush are setup before the iolock is acquired. The logic used to setup the flags may change between setting the flags and acquiring the iolock resulting in these flags having incorrect values. For example, if a file is not currently cached then need_i_mutex is set to zero and then if the file is cached before the iolock is acquired we will fail to do the flushinval before the direct write. The flush (and also the call to xfs_zero_eof()) need to be done with the iolock held exclusive so we need to acquire the iolock before checking for cached data (or if the write begins after eof) to prevent this state from changing. For direct I/O I've chosen to always acquire the iolock in shared mode initially and if there is a need to promote it then drop it and reacquire it. There's also some other tidy-ups including removing the O_APPEND offset adjustment since that work is done in generic_write_checks() (and we don't use offset as an input parameter anywhere). SGI-PV: 962170 SGI-Modid: xfs-linux-melb:xfs-kern:28319a Signed-off-by: Lachlan McIlroy <lachlan@sgi.com> Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Tim Shimmin <tes@sgi.com>
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c61
1 files changed, 32 insertions, 29 deletions
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 8e46c9798fbf..80fe31233471 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -649,7 +649,7 @@ xfs_write(
649 bhv_vrwlock_t locktype; 649 bhv_vrwlock_t locktype;
650 size_t ocount = 0, count; 650 size_t ocount = 0, count;
651 loff_t pos; 651 loff_t pos;
652 int need_i_mutex = 1, need_flush = 0; 652 int need_i_mutex;
653 653
654 XFS_STATS_INC(xs_write_calls); 654 XFS_STATS_INC(xs_write_calls);
655 655
@@ -689,39 +689,20 @@ xfs_write(
689 if (XFS_FORCED_SHUTDOWN(mp)) 689 if (XFS_FORCED_SHUTDOWN(mp))
690 return -EIO; 690 return -EIO;
691 691
692 if (ioflags & IO_ISDIRECT) {
693 xfs_buftarg_t *target =
694 (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
695 mp->m_rtdev_targp : mp->m_ddev_targp;
696
697 if ((pos & target->bt_smask) || (count & target->bt_smask))
698 return XFS_ERROR(-EINVAL);
699
700 if (!VN_CACHED(vp) && pos < i_size_read(inode))
701 need_i_mutex = 0;
702
703 if (VN_CACHED(vp))
704 need_flush = 1;
705 }
706
707relock: 692relock:
708 if (need_i_mutex) { 693 if (ioflags & IO_ISDIRECT) {
694 iolock = XFS_IOLOCK_SHARED;
695 locktype = VRWLOCK_WRITE_DIRECT;
696 need_i_mutex = 0;
697 } else {
709 iolock = XFS_IOLOCK_EXCL; 698 iolock = XFS_IOLOCK_EXCL;
710 locktype = VRWLOCK_WRITE; 699 locktype = VRWLOCK_WRITE;
711 700 need_i_mutex = 1;
712 mutex_lock(&inode->i_mutex); 701 mutex_lock(&inode->i_mutex);
713 } else {
714 iolock = XFS_IOLOCK_SHARED;
715 locktype = VRWLOCK_WRITE_DIRECT;
716 } 702 }
717 703
718 xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); 704 xfs_ilock(xip, XFS_ILOCK_EXCL|iolock);
719 705
720 isize = i_size_read(inode);
721
722 if (file->f_flags & O_APPEND)
723 *offset = isize;
724
725start: 706start:
726 error = -generic_write_checks(file, &pos, &count, 707 error = -generic_write_checks(file, &pos, &count,
727 S_ISBLK(inode->i_mode)); 708 S_ISBLK(inode->i_mode));
@@ -730,6 +711,29 @@ start:
730 goto out_unlock_mutex; 711 goto out_unlock_mutex;
731 } 712 }
732 713
714 isize = i_size_read(inode);
715
716 if (ioflags & IO_ISDIRECT) {
717 xfs_buftarg_t *target =
718 (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
719 mp->m_rtdev_targp : mp->m_ddev_targp;
720
721 if ((pos & target->bt_smask) || (count & target->bt_smask)) {
722 xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
723 return XFS_ERROR(-EINVAL);
724 }
725
726 if (!need_i_mutex && (VN_CACHED(vp) || pos > isize)) {
727 xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
728 iolock = XFS_IOLOCK_EXCL;
729 locktype = VRWLOCK_WRITE;
730 need_i_mutex = 1;
731 mutex_lock(&inode->i_mutex);
732 xfs_ilock(xip, XFS_ILOCK_EXCL|iolock);
733 goto start;
734 }
735 }
736
733 new_size = pos + count; 737 new_size = pos + count;
734 if (new_size > isize) 738 if (new_size > isize)
735 io->io_new_size = new_size; 739 io->io_new_size = new_size;
@@ -761,7 +765,6 @@ start:
761 * what allows the size to change in the first place. 765 * what allows the size to change in the first place.
762 */ 766 */
763 if ((file->f_flags & O_APPEND) && savedsize != isize) { 767 if ((file->f_flags & O_APPEND) && savedsize != isize) {
764 pos = isize = xip->i_d.di_size;
765 goto start; 768 goto start;
766 } 769 }
767 } 770 }
@@ -815,7 +818,8 @@ retry:
815 current->backing_dev_info = mapping->backing_dev_info; 818 current->backing_dev_info = mapping->backing_dev_info;
816 819
817 if ((ioflags & IO_ISDIRECT)) { 820 if ((ioflags & IO_ISDIRECT)) {
818 if (need_flush) { 821 if (VN_CACHED(vp)) {
822 WARN_ON(need_i_mutex == 0);
819 xfs_inval_cached_trace(io, pos, -1, 823 xfs_inval_cached_trace(io, pos, -1,
820 ctooff(offtoct(pos)), -1); 824 ctooff(offtoct(pos)), -1);
821 error = bhv_vop_flushinval_pages(vp, ctooff(offtoct(pos)), 825 error = bhv_vop_flushinval_pages(vp, ctooff(offtoct(pos)),
@@ -849,7 +853,6 @@ retry:
849 pos += ret; 853 pos += ret;
850 count -= ret; 854 count -= ret;
851 855
852 need_i_mutex = 1;
853 ioflags &= ~IO_ISDIRECT; 856 ioflags &= ~IO_ISDIRECT;
854 xfs_iunlock(xip, iolock); 857 xfs_iunlock(xip, iolock);
855 goto relock; 858 goto relock;