aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2011-01-10 18:15:36 -0500
committerDave Chinner <david@fromorbit.com>2011-01-10 18:15:36 -0500
commitf0d26e860b6c496464c5c8165d7df08dabde01fa (patch)
treeaf6fe7630ff41ff6621e7d192d8d7333e1c65ac8 /fs/xfs
parent487f84f3f80bc6f00c59725e822653d3ec174b85 (diff)
xfs: split direct IO write path from xfs_file_aio_write
The current xfs_file_aio_write code is a mess of locking shenanigans to handle the different locking requirements of buffered and direct IO. Start to clean this up by disentangling the direct IO path from the mess. This also removes the failed direct IO fallback path to buffered IO. XFS handles all direct IO cases without needing to fall back to buffered IO, so we can safely remove this unused path. This greatly simplifies the logic and locking needed in the write path. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c179
1 files changed, 116 insertions, 63 deletions
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index b5e13fbb7386..00661fd21fc0 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -628,6 +628,116 @@ out_lock:
628 return error; 628 return error;
629} 629}
630 630
631/*
632 * xfs_file_dio_aio_write - handle direct IO writes
633 *
634 * Lock the inode appropriately to prepare for and issue a direct IO write.
635 * By spearating it from the buffered write path we remove all the tricky to
636 * follow locking changes and looping.
637 *
638 * Returns with locks held indicated by @iolock and errors indicated by
639 * negative return values.
640 */
641STATIC ssize_t
642xfs_file_dio_aio_write(
643 struct kiocb *iocb,
644 const struct iovec *iovp,
645 unsigned long nr_segs,
646 loff_t pos,
647 size_t ocount,
648 int *iolock)
649{
650 struct file *file = iocb->ki_filp;
651 struct address_space *mapping = file->f_mapping;
652 struct inode *inode = mapping->host;
653 struct xfs_inode *ip = XFS_I(inode);
654 struct xfs_mount *mp = ip->i_mount;
655 ssize_t ret = 0;
656 xfs_fsize_t new_size;
657 size_t count = ocount;
658 struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ?
659 mp->m_rtdev_targp : mp->m_ddev_targp;
660
661 *iolock = 0;
662 if ((pos & target->bt_smask) || (count & target->bt_smask))
663 return -XFS_ERROR(EINVAL);
664
665 /*
666 * For direct I/O, if there are cached pages or we're extending
667 * the file, we need IOLOCK_EXCL until we're sure the bytes at
668 * the new EOF have been zeroed and/or the cached pages are
669 * flushed out.
670 */
671 if (mapping->nrpages || pos > ip->i_size)
672 *iolock = XFS_IOLOCK_EXCL;
673 else
674 *iolock = XFS_IOLOCK_SHARED;
675 xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
676
677 ret = generic_write_checks(file, &pos, &count,
678 S_ISBLK(inode->i_mode));
679 if (ret) {
680 xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock);
681 *iolock = 0;
682 return ret;
683 }
684
685 new_size = pos + count;
686 if (new_size > ip->i_size)
687 ip->i_new_size = new_size;
688
689 if (likely(!(file->f_mode & FMODE_NOCMTIME)))
690 file_update_time(file);
691
692 /*
693 * If the offset is beyond the size of the file, we have a couple of
694 * things to do. First, if there is already space allocated we need to
695 * either create holes or zero the disk or ...
696 *
697 * If there is a page where the previous size lands, we need to zero it
698 * out up to the new size.
699 */
700 if (pos > ip->i_size) {
701 ret = -xfs_zero_eof(ip, pos, ip->i_size);
702 if (ret) {
703 xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
704 return ret;
705 }
706 }
707 xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
708
709 /*
710 * If we're writing the file then make sure to clear the setuid and
711 * setgid bits if the process is not being run by root. This keeps
712 * people from modifying setuid and setgid binaries.
713 */
714 ret = file_remove_suid(file);
715 if (unlikely(ret))
716 return ret;
717
718 if (mapping->nrpages) {
719 WARN_ON(*iolock != XFS_IOLOCK_EXCL);
720 ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1,
721 FI_REMAPF_LOCKED);
722 if (ret)
723 return ret;
724 }
725
726 if (*iolock == XFS_IOLOCK_EXCL) {
727 /* demote the lock now the cached pages are gone */
728 xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
729 *iolock = XFS_IOLOCK_SHARED;
730 }
731
732 trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
733 ret = generic_file_direct_write(iocb, iovp,
734 &nr_segs, pos, &iocb->ki_pos, count, ocount);
735
736 /* No fallback to buffered IO on errors for XFS. */
737 ASSERT(ret < 0 || ret == count);
738 return ret;
739}
740
631STATIC ssize_t 741STATIC ssize_t
632xfs_file_aio_write( 742xfs_file_aio_write(
633 struct kiocb *iocb, 743 struct kiocb *iocb,
@@ -670,12 +780,12 @@ xfs_file_aio_write(
670 780
671relock: 781relock:
672 if (ioflags & IO_ISDIRECT) { 782 if (ioflags & IO_ISDIRECT) {
673 iolock = XFS_IOLOCK_SHARED; 783 ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos,
674 } else { 784 ocount, &iolock);
675 iolock = XFS_IOLOCK_EXCL; 785 goto done_io;
676 } 786 }
787 iolock = XFS_IOLOCK_EXCL;
677 788
678start:
679 xfs_rw_ilock(ip, XFS_ILOCK_EXCL|iolock); 789 xfs_rw_ilock(ip, XFS_ILOCK_EXCL|iolock);
680 ret = generic_write_checks(file, &pos, &count, 790 ret = generic_write_checks(file, &pos, &count,
681 S_ISBLK(inode->i_mode)); 791 S_ISBLK(inode->i_mode));
@@ -684,30 +794,6 @@ start:
684 return ret; 794 return ret;
685 } 795 }
686 796
687 if (ioflags & IO_ISDIRECT) {
688 xfs_buftarg_t *target =
689 XFS_IS_REALTIME_INODE(ip) ?
690 mp->m_rtdev_targp : mp->m_ddev_targp;
691
692 if ((pos & target->bt_smask) || (count & target->bt_smask)) {
693 xfs_rw_iunlock(ip, XFS_ILOCK_EXCL|iolock);
694 return XFS_ERROR(-EINVAL);
695 }
696
697 /*
698 * For direct I/O, if there are cached pages or we're extending
699 * the file, we need IOLOCK_EXCL until we're sure the bytes at
700 * the new EOF have been zeroed and/or the cached pages are
701 * flushed out. Upgrade the I/O lock and start again.
702 */
703 if (iolock != XFS_IOLOCK_EXCL &&
704 (mapping->nrpages || pos > ip->i_size)) {
705 xfs_rw_iunlock(ip, XFS_ILOCK_EXCL|iolock);
706 iolock = XFS_IOLOCK_EXCL;
707 goto start;
708 }
709 }
710
711 new_size = pos + count; 797 new_size = pos + count;
712 if (new_size > ip->i_size) 798 if (new_size > ip->i_size)
713 ip->i_new_size = new_size; 799 ip->i_new_size = new_size;
@@ -746,41 +832,7 @@ start:
746 /* We can write back this queue in page reclaim */ 832 /* We can write back this queue in page reclaim */
747 current->backing_dev_info = mapping->backing_dev_info; 833 current->backing_dev_info = mapping->backing_dev_info;
748 834
749 if ((ioflags & IO_ISDIRECT)) { 835 if (!(ioflags & IO_ISDIRECT)) {
750 if (mapping->nrpages) {
751 WARN_ON(iolock != XFS_IOLOCK_EXCL);
752 ret = -xfs_flushinval_pages(ip,
753 (pos & PAGE_CACHE_MASK),
754 -1, FI_REMAPF_LOCKED);
755 if (ret)
756 goto out_unlock_internal;
757 }
758
759 if (iolock == XFS_IOLOCK_EXCL) {
760 /* demote the lock now the cached pages are gone */
761 xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
762 iolock = XFS_IOLOCK_SHARED;
763 }
764
765 trace_xfs_file_direct_write(ip, count, iocb->ki_pos, ioflags);
766 ret = generic_file_direct_write(iocb, iovp,
767 &nr_segs, pos, &iocb->ki_pos, count, ocount);
768
769 /*
770 * direct-io write to a hole: fall through to buffered I/O
771 * for completing the rest of the request.
772 */
773 if (ret >= 0 && ret != count) {
774 XFS_STATS_ADD(xs_write_bytes, ret);
775
776 pos += ret;
777 count -= ret;
778
779 ioflags &= ~IO_ISDIRECT;
780 xfs_rw_iunlock(ip, iolock);
781 goto relock;
782 }
783 } else {
784 int enospc = 0; 836 int enospc = 0;
785 837
786write_retry: 838write_retry:
@@ -802,6 +854,7 @@ write_retry:
802 854
803 current->backing_dev_info = NULL; 855 current->backing_dev_info = NULL;
804 856
857done_io:
805 xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret); 858 xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret);
806 859
807 if (ret <= 0) 860 if (ret <= 0)