1 files changed, 723 insertions, 0 deletions
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 3805ada98747..51fc510828a4 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -16,6 +16,7 @@
 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 #include "xfs.h"
+#include "xfs_fs.h"
 #include "xfs_bit.h"
 #include "xfs_log.h"
 #include "xfs_inum.h"
@@ -34,16 +35,738 @@
 #include "xfs_dir2_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
+#include "xfs_bmap.h"
 #include "xfs_error.h"
 #include "xfs_rw.h"
 #include "xfs_vnodeops.h"
 #include "xfs_da_btree.h"
 #include "xfs_ioctl.h"
+#include "xfs_trace.h"
 #include <linux/dcache.h>
 static const struct vm_operations_struct xfs_file_vm_ops;
+/*
+ *      xfs_iozero
+ *
+ *      xfs_iozero clears the specified range of buffer supplied,
+ *      and marks all the affected blocks as valid and modified.  If
+ *      an affected block is not allocated, it will be allocated.  If
+ *      an affected block is not completely overwritten, and is not
+ *      valid before the operation, it will be read from disk before
+ *      being partially zeroed.
+ */
+STATIC int
+xfs_iozero(
+        struct xfs_inode        *ip,    /* inode                        */
+        loff_t                  pos,    /* offset in file               */
+        size_t                  count)  /* size of data to zero         */
+{
+        struct page             *page;
+        struct address_space    *mapping;
+        int                     status;
+        mapping = VFS_I(ip)->i_mapping;
+        do {
+                unsigned offset, bytes;
+                void *fsdata;
+                offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
+                bytes = PAGE_CACHE_SIZE - offset;
+                if (bytes > count)
+                        bytes = count;
+                status = pagecache_write_begin(NULL, mapping, pos, bytes,
+                                        AOP_FLAG_UNINTERRUPTIBLE,
+                                        &page, &fsdata);
+                if (status)
+                        break;
+                zero_user(page, offset, bytes);
+                status = pagecache_write_end(NULL, mapping, pos, bytes, bytes,
+                                        page, fsdata);
+                WARN_ON(status <= 0); /* can't return less than zero! */
+                pos += bytes;
+                count -= bytes;
+                status = 0;
+        } while (count);
+        return (-status);
+}
+ssize_t                 /* bytes read, or (-)  error */
+xfs_read(
+        xfs_inode_t             *ip,
+        struct kiocb            *iocb,
+        const struct iovec      *iovp,
+        unsigned int            segs,
+        loff_t                  *offset,
+        int                     ioflags)
+{
+        struct file             *file = iocb->ki_filp;
+        struct inode            *inode = file->f_mapping->host;
+        xfs_mount_t             *mp = ip->i_mount;
+        size_t                  size = 0;
+        ssize_t                 ret = 0;
+        xfs_fsize_t             n;
+        unsigned long           seg;
+        XFS_STATS_INC(xs_read_calls);
+        /* START copy & waste from filemap.c */
+        for (seg = 0; seg < segs; seg++) {
+                const struct iovec *iv = &iovp[seg];
+                /*
+                 * If any segment has a negative length, or the cumulative
+                 * length ever wraps negative then return -EINVAL.
+                 */
+                size += iv->iov_len;
+                if (unlikely((ssize_t)(size|iv->iov_len) < 0))
+                        return XFS_ERROR(-EINVAL);
+        }
+        /* END copy & waste from filemap.c */
+        if (unlikely(ioflags & IO_ISDIRECT)) {
+                xfs_buftarg_t   *target =
+                        XFS_IS_REALTIME_INODE(ip) ?
+                                mp->m_rtdev_targp : mp->m_ddev_targp;
+                if ((*offset & target->bt_smask) ||
+                    (size & target->bt_smask)) {
+                        if (*offset == ip->i_size) {
+                                return (0);
+                        }
+                        return -XFS_ERROR(EINVAL);
+                }
+        }
+        n = XFS_MAXIOFFSET(mp) - *offset;
+        if ((n <= 0) || (size == 0))
+                return 0;
+        if (n < size)
+                size = n;
+        if (XFS_FORCED_SHUTDOWN(mp))
+                return -EIO;
+        if (unlikely(ioflags & IO_ISDIRECT))
+                mutex_lock(&inode->i_mutex);
+        xfs_ilock(ip, XFS_IOLOCK_SHARED);
+        if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
+                int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags);
+                int iolock = XFS_IOLOCK_SHARED;
+                ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *offset, size,
+                                        dmflags, &iolock);
+                if (ret) {
+                        xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+                        if (unlikely(ioflags & IO_ISDIRECT))
+                                mutex_unlock(&inode->i_mutex);
+                        return ret;
+                }
+        }
+        if (unlikely(ioflags & IO_ISDIRECT)) {
+                if (inode->i_mapping->nrpages)
+                        ret = -xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK),
+                                                    -1, FI_REMAPF_LOCKED);
+                mutex_unlock(&inode->i_mutex);
+                if (ret) {
+                        xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+                        return ret;
+                }
+        }
+        trace_xfs_file_read(ip, size, *offset, ioflags);
+        iocb->ki_pos = *offset;
+        ret = generic_file_aio_read(iocb, iovp, segs, *offset);
+        if (ret > 0)
+                XFS_STATS_ADD(xs_read_bytes, ret);
+        xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+        return ret;
+}
+ssize_t
+xfs_splice_read(
+        xfs_inode_t             *ip,
+        struct file             *infilp,
+        loff_t                  *ppos,
+        struct pipe_inode_info  *pipe,
+        size_t                  count,
+        int                     flags,
+        int                     ioflags)
+{
+        xfs_mount_t             *mp = ip->i_mount;
+        ssize_t                 ret;
+        XFS_STATS_INC(xs_read_calls);
+        if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+                return -EIO;
+        xfs_ilock(ip, XFS_IOLOCK_SHARED);
+        if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
+                int iolock = XFS_IOLOCK_SHARED;
+                int error;
+                error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *ppos, count,
+                                        FILP_DELAY_FLAG(infilp), &iolock);
+                if (error) {
+                        xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+                        return -error;
+                }
+        }
+        trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
+        ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
+        if (ret > 0)
+                XFS_STATS_ADD(xs_read_bytes, ret);
+        xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+        return ret;
+}
+ssize_t
+xfs_splice_write(
+        xfs_inode_t             *ip,
+        struct pipe_inode_info  *pipe,
+        struct file             *outfilp,
+        loff_t                  *ppos,
+        size_t                  count,
+        int                     flags,
+        int                     ioflags)
+{
+        xfs_mount_t             *mp = ip->i_mount;
+        ssize_t                 ret;
+        struct inode            *inode = outfilp->f_mapping->host;
+        xfs_fsize_t             isize, new_size;
+        XFS_STATS_INC(xs_write_calls);
+        if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+                return -EIO;
+        xfs_ilock(ip, XFS_IOLOCK_EXCL);
+        if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) {
+                int iolock = XFS_IOLOCK_EXCL;
+                int error;
+                error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, *ppos, count,
+                                        FILP_DELAY_FLAG(outfilp), &iolock);
+                if (error) {
+                        xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+                        return -error;
+                }
+        }
+        new_size = *ppos + count;
+        xfs_ilock(ip, XFS_ILOCK_EXCL);
+        if (new_size > ip->i_size)
+                ip->i_new_size = new_size;
+        xfs_iunlock(ip, XFS_ILOCK_EXCL);
+        trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
+        ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
+        if (ret > 0)
+                XFS_STATS_ADD(xs_write_bytes, ret);
+        isize = i_size_read(inode);
+        if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize))
+                *ppos = isize;
+        if (*ppos > ip->i_size) {
+                xfs_ilock(ip, XFS_ILOCK_EXCL);
+                if (*ppos > ip->i_size)
+                        ip->i_size = *ppos;
+                xfs_iunlock(ip, XFS_ILOCK_EXCL);
+        }
+        if (ip->i_new_size) {
+                xfs_ilock(ip, XFS_ILOCK_EXCL);
+                ip->i_new_size = 0;
+                if (ip->i_d.di_size > ip->i_size)
+                        ip->i_d.di_size = ip->i_size;
+                xfs_iunlock(ip, XFS_ILOCK_EXCL);
+        }
+        xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+        return ret;
+}
+/*
+ * This routine is called to handle zeroing any space in the last
+ * block of the file that is beyond the EOF.  We do this since the
+ * size is being increased without writing anything to that block
+ * and we don't want anyone to read the garbage on the disk.
+ */
+STATIC int                              /* error (positive) */
+xfs_zero_last_block(
+        xfs_inode_t     *ip,
+        xfs_fsize_t     offset,
+        xfs_fsize_t     isize)
+{
+        xfs_fileoff_t   last_fsb;
+        xfs_mount_t     *mp = ip->i_mount;
+        int             nimaps;
+        int             zero_offset;
+        int             zero_len;
+        int             error = 0;
+        xfs_bmbt_irec_t imap;
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+        zero_offset = XFS_B_FSB_OFFSET(mp, isize);
+        if (zero_offset == 0) {
+                /*
+                 * There are no extra bytes in the last block on disk to
+                 * zero, so return.
+                 */
+                return 0;
+        }
+        last_fsb = XFS_B_TO_FSBT(mp, isize);
+        nimaps = 1;
+        error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap,
+                          &nimaps, NULL, NULL);
+        if (error) {
+                return error;
+        }
+        ASSERT(nimaps > 0);
+        /*
+         * If the block underlying isize is just a hole, then there
+         * is nothing to zero.
+         */
+        if (imap.br_startblock == HOLESTARTBLOCK) {
+                return 0;
+        }
+        /*
+         * Zero the part of the last block beyond the EOF, and write it
+         * out sync.  We need to drop the ilock while we do this so we
+         * don't deadlock when the buffer cache calls back to us.
+         */
+        xfs_iunlock(ip, XFS_ILOCK_EXCL);
+        zero_len = mp->m_sb.sb_blocksize - zero_offset;
+        if (isize + zero_len > offset)
+                zero_len = offset - isize;
+        error = xfs_iozero(ip, isize, zero_len);
+        xfs_ilock(ip, XFS_ILOCK_EXCL);
+        ASSERT(error >= 0);
+        return error;
+}
+/*
+ * Zero any on disk space between the current EOF and the new,
+ * larger EOF.  This handles the normal case of zeroing the remainder
+ * of the last block in the file and the unusual case of zeroing blocks
+ * out beyond the size of the file.  This second case only happens
+ * with fixed size extents and when the system crashes before the inode
+ * size was updated but after blocks were allocated.  If fill is set,
+ * then any holes in the range are filled and zeroed.  If not, the holes
+ * are left alone as holes.
+ */
+int                                     /* error (positive) */
+xfs_zero_eof(
+        xfs_inode_t     *ip,
+        xfs_off_t       offset,         /* starting I/O offset */
+        xfs_fsize_t     isize)          /* current inode size */
+{
+        xfs_mount_t     *mp = ip->i_mount;
+        xfs_fileoff_t   start_zero_fsb;
+        xfs_fileoff_t   end_zero_fsb;
+        xfs_fileoff_t   zero_count_fsb;
+        xfs_fileoff_t   last_fsb;
+        xfs_fileoff_t   zero_off;
+        xfs_fsize_t     zero_len;
+        int             nimaps;
+        int             error = 0;
+        xfs_bmbt_irec_t imap;
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
+        ASSERT(offset > isize);
+        /*
+         * First handle zeroing the block on which isize resides.
+         * We only zero a part of that block so it is handled specially.
+         */
+        error = xfs_zero_last_block(ip, offset, isize);
+        if (error) {
+                ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
+                return error;
+        }
+        /*
+         * Calculate the range between the new size and the old
+         * where blocks needing to be zeroed may exist.  To get the
+         * block where the last byte in the file currently resides,
+         * we need to subtract one from the size and truncate back
+         * to a block boundary.  We subtract 1 in case the size is
+         * exactly on a block boundary.
+         */
+        last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
+        start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
+        end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
+        ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
+        if (last_fsb == end_zero_fsb) {
+                /*
+                 * The size was only incremented on its last block.
+                 * We took care of that above, so just return.
+                 */
+                return 0;
+        }
+        ASSERT(start_zero_fsb <= end_zero_fsb);
+        while (start_zero_fsb <= end_zero_fsb) {
+                nimaps = 1;
+                zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
+                error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
+                                  0, NULL, 0, &imap, &nimaps, NULL, NULL);
+                if (error) {
+                        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
+                        return error;
+                }
+                ASSERT(nimaps > 0);
+                if (imap.br_state == XFS_EXT_UNWRITTEN ||
+                    imap.br_startblock == HOLESTARTBLOCK) {
+                        /*
+                         * This loop handles initializing pages that were
+                         * partially initialized by the code below this
+                         * loop. It basically zeroes the part of the page
+                         * that sits on a hole and sets the page as P_HOLE
+                         * and calls remapf if it is a mapped file.
+                         */
+                        start_zero_fsb = imap.br_startoff + imap.br_blockcount;
+                        ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
+                        continue;
+                }
+                /*
+                 * There are blocks we need to zero.
+                 * Drop the inode lock while we're doing the I/O.
+                 * We'll still have the iolock to protect us.
+                 */
+                xfs_iunlock(ip, XFS_ILOCK_EXCL);
+                zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
+                zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
+                if ((zero_off + zero_len) > offset)
+                        zero_len = offset - zero_off;
+                error = xfs_iozero(ip, zero_off, zero_len);
+                if (error) {
+                        goto out_lock;
+                }
+                start_zero_fsb = imap.br_startoff + imap.br_blockcount;
+                ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
+                xfs_ilock(ip, XFS_ILOCK_EXCL);
+        }
+        return 0;
+out_lock:
+        xfs_ilock(ip, XFS_ILOCK_EXCL);
+        ASSERT(error >= 0);
+        return error;
+}
+ssize_t                         /* bytes written, or (-) error */
+xfs_write(
+        struct xfs_inode        *xip,
+        struct kiocb            *iocb,
+        const struct iovec      *iovp,
+        unsigned int            nsegs,
+        loff_t                  *offset,
+        int                     ioflags)
+{
+        struct file             *file = iocb->ki_filp;
+        struct address_space    *mapping = file->f_mapping;
+        struct inode            *inode = mapping->host;
+        unsigned long           segs = nsegs;
+        xfs_mount_t             *mp;
+        ssize_t                 ret = 0, error = 0;
+        xfs_fsize_t             isize, new_size;
+        int                     iolock;
+        int                     eventsent = 0;
+        size_t                  ocount = 0, count;
+        loff_t                  pos;
+        int                     need_i_mutex;
+        XFS_STATS_INC(xs_write_calls);
+        error = generic_segment_checks(iovp, &segs, &ocount, VERIFY_READ);
+        if (error)
+                return error;
+        count = ocount;
+        pos = *offset;
+        if (count == 0)
+                return 0;
+        mp = xip->i_mount;
+        xfs_wait_for_freeze(mp, SB_FREEZE_WRITE);
+        if (XFS_FORCED_SHUTDOWN(mp))
+                return -EIO;
+relock:
+        if (ioflags & IO_ISDIRECT) {
+                iolock = XFS_IOLOCK_SHARED;
+                need_i_mutex = 0;
+        } else {
+                iolock = XFS_IOLOCK_EXCL;
+                need_i_mutex = 1;
+                mutex_lock(&inode->i_mutex);
+        }
+        xfs_ilock(xip, XFS_ILOCK_EXCL|iolock);
+start:
+        error = -generic_write_checks(file, &pos, &count,
+                                        S_ISBLK(inode->i_mode));
+        if (error) {
+                xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
+                goto out_unlock_mutex;
+        }
+        if ((DM_EVENT_ENABLED(xip, DM_EVENT_WRITE) &&
+            !(ioflags & IO_INVIS) && !eventsent)) {
+                int             dmflags = FILP_DELAY_FLAG(file);
+                if (need_i_mutex)
+                        dmflags |= DM_FLAGS_IMUX;
+                xfs_iunlock(xip, XFS_ILOCK_EXCL);
+                error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, xip,
+                                      pos, count, dmflags, &iolock);
+                if (error) {
+                        goto out_unlock_internal;
+                }
+                xfs_ilock(xip, XFS_ILOCK_EXCL);
+                eventsent = 1;
+                /*
+                 * The iolock was dropped and reacquired in XFS_SEND_DATA
+                 * so we have to recheck the size when appending.
+                 * We will only "goto start;" once, since having sent the
+                 * event prevents another call to XFS_SEND_DATA, which is
+                 * what allows the size to change in the first place.
+                 */
+                if ((file->f_flags & O_APPEND) && pos != xip->i_size)
+                        goto start;
+        }
+        if (ioflags & IO_ISDIRECT) {
+                xfs_buftarg_t   *target =
+                        XFS_IS_REALTIME_INODE(xip) ?
+                                mp->m_rtdev_targp : mp->m_ddev_targp;
+                if ((pos & target->bt_smask) || (count & target->bt_smask)) {
+                        xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
+                        return XFS_ERROR(-EINVAL);
+                }
+                if (!need_i_mutex && (mapping->nrpages || pos > xip->i_size)) {
+                        xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
+                        iolock = XFS_IOLOCK_EXCL;
+                        need_i_mutex = 1;
+                        mutex_lock(&inode->i_mutex);
+                        xfs_ilock(xip, XFS_ILOCK_EXCL|iolock);
+                        goto start;
+                }
+        }
+        new_size = pos + count;
+        if (new_size > xip->i_size)
+                xip->i_new_size = new_size;
+        if (likely(!(ioflags & IO_INVIS)))
+                file_update_time(file);
+        /*
+         * If the offset is beyond the size of the file, we have a couple
+         * of things to do. First, if there is already space allocated
+         * we need to either create holes or zero the disk or ...
+         *
+         * If there is a page where the previous size lands, we need
+         * to zero it out up to the new size.
+         */
+        if (pos > xip->i_size) {
+                error = xfs_zero_eof(xip, pos, xip->i_size);
+                if (error) {
+                        xfs_iunlock(xip, XFS_ILOCK_EXCL);
+                        goto out_unlock_internal;
+                }
+        }
+        xfs_iunlock(xip, XFS_ILOCK_EXCL);
+        /*
+         * If we're writing the file then make sure to clear the
+         * setuid and setgid bits if the process is not being run
+         * by root.  This keeps people from modifying setuid and
+         * setgid binaries.
+         */
+        error = -file_remove_suid(file);
+        if (unlikely(error))
+                goto out_unlock_internal;
+        /* We can write back this queue in page reclaim */
+        current->backing_dev_info = mapping->backing_dev_info;
+        if ((ioflags & IO_ISDIRECT)) {
+                if (mapping->nrpages) {
+                        WARN_ON(need_i_mutex == 0);
+                        error = xfs_flushinval_pages(xip,
+                                        (pos & PAGE_CACHE_MASK),
+                                        -1, FI_REMAPF_LOCKED);
+                        if (error)
+                                goto out_unlock_internal;
+                }
+                if (need_i_mutex) {
+                        /* demote the lock now the cached pages are gone */
+                        xfs_ilock_demote(xip, XFS_IOLOCK_EXCL);
+                        mutex_unlock(&inode->i_mutex);
+                        iolock = XFS_IOLOCK_SHARED;
+                        need_i_mutex = 0;
+                }
+                trace_xfs_file_direct_write(xip, count, *offset, ioflags);
+                ret = generic_file_direct_write(iocb, iovp,
+                                &segs, pos, offset, count, ocount);
+                /*
+                 * direct-io write to a hole: fall through to buffered I/O
+                 * for completing the rest of the request.
+                 */
+                if (ret >= 0 && ret != count) {
+                        XFS_STATS_ADD(xs_write_bytes, ret);
+                        pos += ret;
+                        count -= ret;
+                        ioflags &= ~IO_ISDIRECT;
+                        xfs_iunlock(xip, iolock);
+                        goto relock;
+                }
+        } else {
+                int enospc = 0;
+                ssize_t ret2 = 0;
+write_retry:
+                trace_xfs_file_buffered_write(xip, count, *offset, ioflags);
+                ret2 = generic_file_buffered_write(iocb, iovp, segs,
+                                pos, offset, count, ret);
+                /*
+                 * if we just got an ENOSPC, flush the inode now we
+                 * aren't holding any page locks and retry *once*
+                 */
+                if (ret2 == -ENOSPC && !enospc) {
+                        error = xfs_flush_pages(xip, 0, -1, 0, FI_NONE);
+                        if (error)
+                                goto out_unlock_internal;
+                        enospc = 1;
+                        goto write_retry;
+                }
+                ret = ret2;
+        }
+        current->backing_dev_info = NULL;
+        isize = i_size_read(inode);
+        if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize))
+                *offset = isize;
+        if (*offset > xip->i_size) {
+                xfs_ilock(xip, XFS_ILOCK_EXCL);
+                if (*offset > xip->i_size)
+                        xip->i_size = *offset;
+                xfs_iunlock(xip, XFS_ILOCK_EXCL);
+        }
+        if (ret == -ENOSPC &&
+            DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) {
+                xfs_iunlock(xip, iolock);
+                if (need_i_mutex)
+                        mutex_unlock(&inode->i_mutex);
+                error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, xip,
+                                DM_RIGHT_NULL, xip, DM_RIGHT_NULL, NULL, NULL,
+                                0, 0, 0); /* Delay flag intentionally  unused */
+                if (need_i_mutex)
+                        mutex_lock(&inode->i_mutex);
+                xfs_ilock(xip, iolock);
+                if (error)
+                        goto out_unlock_internal;
+                goto start;
+        }
+        error = -ret;
+        if (ret <= 0)
+                goto out_unlock_internal;
+        XFS_STATS_ADD(xs_write_bytes, ret);
+        /* Handle various SYNC-type writes */
+        if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
+                loff_t end = pos + ret - 1;
+                int error2;
+                xfs_iunlock(xip, iolock);
+                if (need_i_mutex)
+                        mutex_unlock(&inode->i_mutex);
+                error2 = filemap_write_and_wait_range(mapping, pos, end);
+                if (!error)
+                        error = error2;
+                if (need_i_mutex)
+                        mutex_lock(&inode->i_mutex);
+                xfs_ilock(xip, iolock);
+                error2 = xfs_fsync(xip);
+                if (!error)
+                        error = error2;
+        }
+ out_unlock_internal:
+        if (xip->i_new_size) {
+                xfs_ilock(xip, XFS_ILOCK_EXCL);
+                xip->i_new_size = 0;
+                /*
+                 * If this was a direct or synchronous I/O that failed (such
+                 * as ENOSPC) then part of the I/O may have been written to
+                 * disk before the error occured.  In this case the on-disk
+                 * file size may have been adjusted beyond the in-memory file
+                 * size and now needs to be truncated back.
+                 */
+                if (xip->i_d.di_size > xip->i_size)
+                        xip->i_d.di_size = xip->i_size;
+                xfs_iunlock(xip, XFS_ILOCK_EXCL);
+        }
+        xfs_iunlock(xip, iolock);
+ out_unlock_mutex:
+        if (need_i_mutex)
+                mutex_unlock(&inode->i_mutex);
+        return -error;
+}
 STATIC ssize_t
 xfs_file_aio_read(
        struct kiocb            *iocb,

diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 3805ada98747..51fc510828a4 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -16,6 +16,7 @@
16	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA	16	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17	*/	17	*/
18	#include "xfs.h"	18	#include "xfs.h"
		19	#include "xfs_fs.h"
19	#include "xfs_bit.h"	20	#include "xfs_bit.h"
20	#include "xfs_log.h"	21	#include "xfs_log.h"
21	#include "xfs_inum.h"	22	#include "xfs_inum.h"
@@ -34,16 +35,738 @@
34	#include "xfs_dir2_sf.h"	35	#include "xfs_dir2_sf.h"
35	#include "xfs_dinode.h"	36	#include "xfs_dinode.h"
36	#include "xfs_inode.h"	37	#include "xfs_inode.h"
		38	#include "xfs_bmap.h"
37	#include "xfs_error.h"	39	#include "xfs_error.h"
38	#include "xfs_rw.h"	40	#include "xfs_rw.h"
39	#include "xfs_vnodeops.h"	41	#include "xfs_vnodeops.h"
40	#include "xfs_da_btree.h"	42	#include "xfs_da_btree.h"
41	#include "xfs_ioctl.h"	43	#include "xfs_ioctl.h"
		44	#include "xfs_trace.h"
42		45
43	#include <linux/dcache.h>	46	#include <linux/dcache.h>
44		47
45	static const struct vm_operations_struct xfs_file_vm_ops;	48	static const struct vm_operations_struct xfs_file_vm_ops;
46		49
		50	/*
		51	* xfs_iozero
		52	*
		53	* xfs_iozero clears the specified range of buffer supplied,
		54	* and marks all the affected blocks as valid and modified. If
		55	* an affected block is not allocated, it will be allocated. If
		56	* an affected block is not completely overwritten, and is not
		57	* valid before the operation, it will be read from disk before
		58	* being partially zeroed.
		59	*/
		60	STATIC int
		61	xfs_iozero(
		62	struct xfs_inode ip, / inode */
		63	loff_t pos, /* offset in file */
		64	size_t count) /* size of data to zero */
		65	{
		66	struct page *page;
		67	struct address_space *mapping;
		68	int status;
		69
		70	mapping = VFS_I(ip)->i_mapping;
		71	do {
		72	unsigned offset, bytes;
		73	void *fsdata;
		74
		75	offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
		76	bytes = PAGE_CACHE_SIZE - offset;
		77	if (bytes > count)
		78	bytes = count;
		79
		80	status = pagecache_write_begin(NULL, mapping, pos, bytes,
		81	AOP_FLAG_UNINTERRUPTIBLE,
		82	&page, &fsdata);
		83	if (status)
		84	break;
		85
		86	zero_user(page, offset, bytes);
		87
		88	status = pagecache_write_end(NULL, mapping, pos, bytes, bytes,
		89	page, fsdata);
		90	WARN_ON(status <= 0); /* can't return less than zero! */
		91	pos += bytes;
		92	count -= bytes;
		93	status = 0;
		94	} while (count);
		95
		96	return (-status);
		97	}
		98
		99	ssize_t /* bytes read, or (-) error */
		100	xfs_read(
		101	xfs_inode_t *ip,
		102	struct kiocb *iocb,
		103	const struct iovec *iovp,
		104	unsigned int segs,
		105	loff_t *offset,
		106	int ioflags)
		107	{
		108	struct file *file = iocb->ki_filp;
		109	struct inode *inode = file->f_mapping->host;
		110	xfs_mount_t *mp = ip->i_mount;
		111	size_t size = 0;
		112	ssize_t ret = 0;
		113	xfs_fsize_t n;
		114	unsigned long seg;
		115
		116
		117	XFS_STATS_INC(xs_read_calls);
		118
		119	/* START copy & waste from filemap.c */
		120	for (seg = 0; seg < segs; seg++) {
		121	const struct iovec *iv = &iovp[seg];
		122
		123	/*
		124	* If any segment has a negative length, or the cumulative
		125	* length ever wraps negative then return -EINVAL.
		126	*/
		127	size += iv->iov_len;
		128	if (unlikely((ssize_t)(size\|iv->iov_len) < 0))
		129	return XFS_ERROR(-EINVAL);
		130	}
		131	/* END copy & waste from filemap.c */
		132
		133	if (unlikely(ioflags & IO_ISDIRECT)) {
		134	xfs_buftarg_t *target =
		135	XFS_IS_REALTIME_INODE(ip) ?
		136	mp->m_rtdev_targp : mp->m_ddev_targp;
		137	if ((*offset & target->bt_smask) \|\|
		138	(size & target->bt_smask)) {
		139	if (*offset == ip->i_size) {
		140	return (0);
		141	}
		142	return -XFS_ERROR(EINVAL);
		143	}
		144	}
		145
		146	n = XFS_MAXIOFFSET(mp) - *offset;
		147	if ((n <= 0) \|\| (size == 0))
		148	return 0;
		149
		150	if (n < size)
		151	size = n;
		152
		153	if (XFS_FORCED_SHUTDOWN(mp))
		154	return -EIO;
		155
		156	if (unlikely(ioflags & IO_ISDIRECT))
		157	mutex_lock(&inode->i_mutex);
		158	xfs_ilock(ip, XFS_IOLOCK_SHARED);
		159
		160	if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
		161	int dmflags = FILP_DELAY_FLAG(file) \| DM_SEM_FLAG_RD(ioflags);
		162	int iolock = XFS_IOLOCK_SHARED;
		163
		164	ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *offset, size,
		165	dmflags, &iolock);
		166	if (ret) {
		167	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
		168	if (unlikely(ioflags & IO_ISDIRECT))
		169	mutex_unlock(&inode->i_mutex);
		170	return ret;
		171	}
		172	}
		173
		174	if (unlikely(ioflags & IO_ISDIRECT)) {
		175	if (inode->i_mapping->nrpages)
		176	ret = -xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK),
		177	-1, FI_REMAPF_LOCKED);
		178	mutex_unlock(&inode->i_mutex);
		179	if (ret) {
		180	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
		181	return ret;
		182	}
		183	}
		184
		185	trace_xfs_file_read(ip, size, *offset, ioflags);
		186
		187	iocb->ki_pos = *offset;
		188	ret = generic_file_aio_read(iocb, iovp, segs, *offset);
		189	if (ret > 0)
		190	XFS_STATS_ADD(xs_read_bytes, ret);
		191
		192	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
		193	return ret;
		194	}
		195
		196	ssize_t
		197	xfs_splice_read(
		198	xfs_inode_t *ip,
		199	struct file *infilp,
		200	loff_t *ppos,
		201	struct pipe_inode_info *pipe,
		202	size_t count,
		203	int flags,
		204	int ioflags)
		205	{
		206	xfs_mount_t *mp = ip->i_mount;
		207	ssize_t ret;
		208
		209	XFS_STATS_INC(xs_read_calls);
		210	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
		211	return -EIO;
		212
		213	xfs_ilock(ip, XFS_IOLOCK_SHARED);
		214
		215	if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
		216	int iolock = XFS_IOLOCK_SHARED;
		217	int error;
		218
		219	error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *ppos, count,
		220	FILP_DELAY_FLAG(infilp), &iolock);
		221	if (error) {
		222	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
		223	return -error;
		224	}
		225	}
		226
		227	trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
		228
		229	ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
		230	if (ret > 0)
		231	XFS_STATS_ADD(xs_read_bytes, ret);
		232
		233	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
		234	return ret;
		235	}
		236
		237	ssize_t
		238	xfs_splice_write(
		239	xfs_inode_t *ip,
		240	struct pipe_inode_info *pipe,
		241	struct file *outfilp,
		242	loff_t *ppos,
		243	size_t count,
		244	int flags,
		245	int ioflags)
		246	{
		247	xfs_mount_t *mp = ip->i_mount;
		248	ssize_t ret;
		249	struct inode *inode = outfilp->f_mapping->host;
		250	xfs_fsize_t isize, new_size;
		251
		252	XFS_STATS_INC(xs_write_calls);
		253	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
		254	return -EIO;
		255
		256	xfs_ilock(ip, XFS_IOLOCK_EXCL);
		257
		258	if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) {
		259	int iolock = XFS_IOLOCK_EXCL;
		260	int error;
		261
		262	error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, *ppos, count,
		263	FILP_DELAY_FLAG(outfilp), &iolock);
		264	if (error) {
		265	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
		266	return -error;
		267	}
		268	}
		269
		270	new_size = *ppos + count;
		271
		272	xfs_ilock(ip, XFS_ILOCK_EXCL);
		273	if (new_size > ip->i_size)
		274	ip->i_new_size = new_size;
		275	xfs_iunlock(ip, XFS_ILOCK_EXCL);
		276
		277	trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
		278
		279	ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
		280	if (ret > 0)
		281	XFS_STATS_ADD(xs_write_bytes, ret);
		282
		283	isize = i_size_read(inode);
		284	if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize))
		285	*ppos = isize;
		286
		287	if (*ppos > ip->i_size) {
		288	xfs_ilock(ip, XFS_ILOCK_EXCL);
		289	if (*ppos > ip->i_size)
		290	ip->i_size = *ppos;
		291	xfs_iunlock(ip, XFS_ILOCK_EXCL);
		292	}
		293
		294	if (ip->i_new_size) {
		295	xfs_ilock(ip, XFS_ILOCK_EXCL);
		296	ip->i_new_size = 0;
		297	if (ip->i_d.di_size > ip->i_size)
		298	ip->i_d.di_size = ip->i_size;
		299	xfs_iunlock(ip, XFS_ILOCK_EXCL);
		300	}
		301	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
		302	return ret;
		303	}
		304
		305	/*
		306	* This routine is called to handle zeroing any space in the last
		307	* block of the file that is beyond the EOF. We do this since the
		308	* size is being increased without writing anything to that block
		309	* and we don't want anyone to read the garbage on the disk.
		310	*/
		311	STATIC int /* error (positive) */
		312	xfs_zero_last_block(
		313	xfs_inode_t *ip,
		314	xfs_fsize_t offset,
		315	xfs_fsize_t isize)
		316	{
		317	xfs_fileoff_t last_fsb;
		318	xfs_mount_t *mp = ip->i_mount;
		319	int nimaps;
		320	int zero_offset;
		321	int zero_len;
		322	int error = 0;
		323	xfs_bmbt_irec_t imap;
		324
		325	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
		326
		327	zero_offset = XFS_B_FSB_OFFSET(mp, isize);
		328	if (zero_offset == 0) {
		329	/*
		330	* There are no extra bytes in the last block on disk to
		331	* zero, so return.
		332	*/
		333	return 0;
		334	}
		335
		336	last_fsb = XFS_B_TO_FSBT(mp, isize);
		337	nimaps = 1;
		338	error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap,
		339	&nimaps, NULL, NULL);
		340	if (error) {
		341	return error;
		342	}
		343	ASSERT(nimaps > 0);
		344	/*
		345	* If the block underlying isize is just a hole, then there
		346	* is nothing to zero.
		347	*/
		348	if (imap.br_startblock == HOLESTARTBLOCK) {
		349	return 0;
		350	}
		351	/*
		352	* Zero the part of the last block beyond the EOF, and write it
		353	* out sync. We need to drop the ilock while we do this so we
		354	* don't deadlock when the buffer cache calls back to us.
		355	*/
		356	xfs_iunlock(ip, XFS_ILOCK_EXCL);
		357
		358	zero_len = mp->m_sb.sb_blocksize - zero_offset;
		359	if (isize + zero_len > offset)
		360	zero_len = offset - isize;
		361	error = xfs_iozero(ip, isize, zero_len);
		362
		363	xfs_ilock(ip, XFS_ILOCK_EXCL);
		364	ASSERT(error >= 0);
		365	return error;
		366	}
		367
		368	/*
		369	* Zero any on disk space between the current EOF and the new,
		370	* larger EOF. This handles the normal case of zeroing the remainder
		371	* of the last block in the file and the unusual case of zeroing blocks
		372	* out beyond the size of the file. This second case only happens
		373	* with fixed size extents and when the system crashes before the inode
		374	* size was updated but after blocks were allocated. If fill is set,
		375	* then any holes in the range are filled and zeroed. If not, the holes
		376	* are left alone as holes.
		377	*/
		378
		379	int /* error (positive) */
		380	xfs_zero_eof(
		381	xfs_inode_t *ip,
		382	xfs_off_t offset, /* starting I/O offset */
		383	xfs_fsize_t isize) /* current inode size */
		384	{
		385	xfs_mount_t *mp = ip->i_mount;
		386	xfs_fileoff_t start_zero_fsb;
		387	xfs_fileoff_t end_zero_fsb;
		388	xfs_fileoff_t zero_count_fsb;
		389	xfs_fileoff_t last_fsb;
		390	xfs_fileoff_t zero_off;
		391	xfs_fsize_t zero_len;
		392	int nimaps;
		393	int error = 0;
		394	xfs_bmbt_irec_t imap;
		395
		396	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL\|XFS_IOLOCK_EXCL));
		397	ASSERT(offset > isize);
		398
		399	/*
		400	* First handle zeroing the block on which isize resides.
		401	* We only zero a part of that block so it is handled specially.
		402	*/
		403	error = xfs_zero_last_block(ip, offset, isize);
		404	if (error) {
		405	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL\|XFS_IOLOCK_EXCL));
		406	return error;
		407	}
		408
		409	/*
		410	* Calculate the range between the new size and the old
		411	* where blocks needing to be zeroed may exist. To get the
		412	* block where the last byte in the file currently resides,
		413	* we need to subtract one from the size and truncate back
		414	* to a block boundary. We subtract 1 in case the size is
		415	* exactly on a block boundary.
		416	*/
		417	last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
		418	start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
		419	end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
		420	ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
		421	if (last_fsb == end_zero_fsb) {
		422	/*
		423	* The size was only incremented on its last block.
		424	* We took care of that above, so just return.
		425	*/
		426	return 0;
		427	}
		428
		429	ASSERT(start_zero_fsb <= end_zero_fsb);
		430	while (start_zero_fsb <= end_zero_fsb) {
		431	nimaps = 1;
		432	zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
		433	error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
		434	0, NULL, 0, &imap, &nimaps, NULL, NULL);
		435	if (error) {
		436	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL\|XFS_IOLOCK_EXCL));
		437	return error;
		438	}
		439	ASSERT(nimaps > 0);
		440
		441	if (imap.br_state == XFS_EXT_UNWRITTEN \|\|
		442	imap.br_startblock == HOLESTARTBLOCK) {
		443	/*
		444	* This loop handles initializing pages that were
		445	* partially initialized by the code below this
		446	* loop. It basically zeroes the part of the page
		447	* that sits on a hole and sets the page as P_HOLE
		448	* and calls remapf if it is a mapped file.
		449	*/
		450	start_zero_fsb = imap.br_startoff + imap.br_blockcount;
		451	ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
		452	continue;
		453	}
		454
		455	/*
		456	* There are blocks we need to zero.
		457	* Drop the inode lock while we're doing the I/O.
		458	* We'll still have the iolock to protect us.
		459	*/
		460	xfs_iunlock(ip, XFS_ILOCK_EXCL);
		461
		462	zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
		463	zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
		464
		465	if ((zero_off + zero_len) > offset)
		466	zero_len = offset - zero_off;
		467
		468	error = xfs_iozero(ip, zero_off, zero_len);
		469	if (error) {
		470	goto out_lock;
		471	}
		472
		473	start_zero_fsb = imap.br_startoff + imap.br_blockcount;
		474	ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
		475
		476	xfs_ilock(ip, XFS_ILOCK_EXCL);
		477	}
		478
		479	return 0;
		480
		481	out_lock:
		482	xfs_ilock(ip, XFS_ILOCK_EXCL);
		483	ASSERT(error >= 0);
		484	return error;
		485	}
		486
		487	ssize_t /* bytes written, or (-) error */
		488	xfs_write(
		489	struct xfs_inode *xip,
		490	struct kiocb *iocb,
		491	const struct iovec *iovp,
		492	unsigned int nsegs,
		493	loff_t *offset,
		494	int ioflags)
		495	{
		496	struct file *file = iocb->ki_filp;
		497	struct address_space *mapping = file->f_mapping;
		498	struct inode *inode = mapping->host;
		499	unsigned long segs = nsegs;
		500	xfs_mount_t *mp;
		501	ssize_t ret = 0, error = 0;
		502	xfs_fsize_t isize, new_size;
		503	int iolock;
		504	int eventsent = 0;
		505	size_t ocount = 0, count;
		506	loff_t pos;
		507	int need_i_mutex;
		508
		509	XFS_STATS_INC(xs_write_calls);
		510
		511	error = generic_segment_checks(iovp, &segs, &ocount, VERIFY_READ);
		512	if (error)
		513	return error;
		514
		515	count = ocount;
		516	pos = *offset;
		517
		518	if (count == 0)
		519	return 0;
		520
		521	mp = xip->i_mount;
		522
		523	xfs_wait_for_freeze(mp, SB_FREEZE_WRITE);
		524
		525	if (XFS_FORCED_SHUTDOWN(mp))
		526	return -EIO;
		527
		528	relock:
		529	if (ioflags & IO_ISDIRECT) {
		530	iolock = XFS_IOLOCK_SHARED;
		531	need_i_mutex = 0;
		532	} else {
		533	iolock = XFS_IOLOCK_EXCL;
		534	need_i_mutex = 1;
		535	mutex_lock(&inode->i_mutex);
		536	}
		537
		538	xfs_ilock(xip, XFS_ILOCK_EXCL\|iolock);
		539
		540	start:
		541	error = -generic_write_checks(file, &pos, &count,
		542	S_ISBLK(inode->i_mode));
		543	if (error) {
		544	xfs_iunlock(xip, XFS_ILOCK_EXCL\|iolock);
		545	goto out_unlock_mutex;
		546	}
		547
		548	if ((DM_EVENT_ENABLED(xip, DM_EVENT_WRITE) &&
		549	!(ioflags & IO_INVIS) && !eventsent)) {
		550	int dmflags = FILP_DELAY_FLAG(file);
		551
		552	if (need_i_mutex)
		553	dmflags \|= DM_FLAGS_IMUX;
		554
		555	xfs_iunlock(xip, XFS_ILOCK_EXCL);
		556	error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, xip,
		557	pos, count, dmflags, &iolock);
		558	if (error) {
		559	goto out_unlock_internal;
		560	}
		561	xfs_ilock(xip, XFS_ILOCK_EXCL);
		562	eventsent = 1;
		563
		564	/*
		565	* The iolock was dropped and reacquired in XFS_SEND_DATA
		566	* so we have to recheck the size when appending.
		567	* We will only "goto start;" once, since having sent the
		568	* event prevents another call to XFS_SEND_DATA, which is
		569	* what allows the size to change in the first place.
		570	*/
		571	if ((file->f_flags & O_APPEND) && pos != xip->i_size)
		572	goto start;
		573	}
		574
		575	if (ioflags & IO_ISDIRECT) {
		576	xfs_buftarg_t *target =
		577	XFS_IS_REALTIME_INODE(xip) ?
		578	mp->m_rtdev_targp : mp->m_ddev_targp;
		579
		580	if ((pos & target->bt_smask) \|\| (count & target->bt_smask)) {
		581	xfs_iunlock(xip, XFS_ILOCK_EXCL\|iolock);
		582	return XFS_ERROR(-EINVAL);
		583	}
		584
		585	if (!need_i_mutex && (mapping->nrpages \|\| pos > xip->i_size)) {
		586	xfs_iunlock(xip, XFS_ILOCK_EXCL\|iolock);
		587	iolock = XFS_IOLOCK_EXCL;
		588	need_i_mutex = 1;
		589	mutex_lock(&inode->i_mutex);
		590	xfs_ilock(xip, XFS_ILOCK_EXCL\|iolock);
		591	goto start;
		592	}
		593	}
		594
		595	new_size = pos + count;
		596	if (new_size > xip->i_size)
		597	xip->i_new_size = new_size;
		598
		599	if (likely(!(ioflags & IO_INVIS)))
		600	file_update_time(file);
		601
		602	/*
		603	* If the offset is beyond the size of the file, we have a couple
		604	* of things to do. First, if there is already space allocated
		605	* we need to either create holes or zero the disk or ...
		606	*
		607	* If there is a page where the previous size lands, we need
		608	* to zero it out up to the new size.
		609	*/
		610
		611	if (pos > xip->i_size) {
		612	error = xfs_zero_eof(xip, pos, xip->i_size);
		613	if (error) {
		614	xfs_iunlock(xip, XFS_ILOCK_EXCL);
		615	goto out_unlock_internal;
		616	}
		617	}
		618	xfs_iunlock(xip, XFS_ILOCK_EXCL);
		619
		620	/*
		621	* If we're writing the file then make sure to clear the
		622	* setuid and setgid bits if the process is not being run
		623	* by root. This keeps people from modifying setuid and
		624	* setgid binaries.
		625	*/
		626	error = -file_remove_suid(file);
		627	if (unlikely(error))
		628	goto out_unlock_internal;
		629
		630	/* We can write back this queue in page reclaim */
		631	current->backing_dev_info = mapping->backing_dev_info;
		632
		633	if ((ioflags & IO_ISDIRECT)) {
		634	if (mapping->nrpages) {
		635	WARN_ON(need_i_mutex == 0);
		636	error = xfs_flushinval_pages(xip,
		637	(pos & PAGE_CACHE_MASK),
		638	-1, FI_REMAPF_LOCKED);
		639	if (error)
		640	goto out_unlock_internal;
		641	}
		642
		643	if (need_i_mutex) {
		644	/* demote the lock now the cached pages are gone */
		645	xfs_ilock_demote(xip, XFS_IOLOCK_EXCL);
		646	mutex_unlock(&inode->i_mutex);
		647
		648	iolock = XFS_IOLOCK_SHARED;
		649	need_i_mutex = 0;
		650	}
		651
		652	trace_xfs_file_direct_write(xip, count, *offset, ioflags);
		653	ret = generic_file_direct_write(iocb, iovp,
		654	&segs, pos, offset, count, ocount);
		655
		656	/*
		657	* direct-io write to a hole: fall through to buffered I/O
		658	* for completing the rest of the request.
		659	*/
		660	if (ret >= 0 && ret != count) {
		661	XFS_STATS_ADD(xs_write_bytes, ret);
		662
		663	pos += ret;
		664	count -= ret;
		665
		666	ioflags &= ~IO_ISDIRECT;
		667	xfs_iunlock(xip, iolock);
		668	goto relock;
		669	}
		670	} else {
		671	int enospc = 0;
		672	ssize_t ret2 = 0;
		673
		674	write_retry:
		675	trace_xfs_file_buffered_write(xip, count, *offset, ioflags);
		676	ret2 = generic_file_buffered_write(iocb, iovp, segs,
		677	pos, offset, count, ret);
		678	/*
		679	* if we just got an ENOSPC, flush the inode now we
		680	* aren't holding any page locks and retry once
		681	*/
		682	if (ret2 == -ENOSPC && !enospc) {
		683	error = xfs_flush_pages(xip, 0, -1, 0, FI_NONE);
		684	if (error)
		685	goto out_unlock_internal;
		686	enospc = 1;
		687	goto write_retry;
		688	}
		689	ret = ret2;
		690	}
		691
		692	current->backing_dev_info = NULL;
		693
		694	isize = i_size_read(inode);
		695	if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize))
		696	*offset = isize;
		697
		698	if (*offset > xip->i_size) {
		699	xfs_ilock(xip, XFS_ILOCK_EXCL);
		700	if (*offset > xip->i_size)
		701	xip->i_size = *offset;
		702	xfs_iunlock(xip, XFS_ILOCK_EXCL);
		703	}
		704
		705	if (ret == -ENOSPC &&
		706	DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) {
		707	xfs_iunlock(xip, iolock);
		708	if (need_i_mutex)
		709	mutex_unlock(&inode->i_mutex);
		710	error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, xip,
		711	DM_RIGHT_NULL, xip, DM_RIGHT_NULL, NULL, NULL,
		712	0, 0, 0); /* Delay flag intentionally unused */
		713	if (need_i_mutex)
		714	mutex_lock(&inode->i_mutex);
		715	xfs_ilock(xip, iolock);
		716	if (error)
		717	goto out_unlock_internal;
		718	goto start;
		719	}
		720
		721	error = -ret;
		722	if (ret <= 0)
		723	goto out_unlock_internal;
		724
		725	XFS_STATS_ADD(xs_write_bytes, ret);
		726
		727	/* Handle various SYNC-type writes */
		728	if ((file->f_flags & O_DSYNC) \|\| IS_SYNC(inode)) {
		729	loff_t end = pos + ret - 1;
		730	int error2;
		731
		732	xfs_iunlock(xip, iolock);
		733	if (need_i_mutex)
		734	mutex_unlock(&inode->i_mutex);
		735
		736	error2 = filemap_write_and_wait_range(mapping, pos, end);
		737	if (!error)
		738	error = error2;
		739	if (need_i_mutex)
		740	mutex_lock(&inode->i_mutex);
		741	xfs_ilock(xip, iolock);
		742
		743	error2 = xfs_fsync(xip);
		744	if (!error)
		745	error = error2;
		746	}
		747
		748	out_unlock_internal:
		749	if (xip->i_new_size) {
		750	xfs_ilock(xip, XFS_ILOCK_EXCL);
		751	xip->i_new_size = 0;
		752	/*
		753	* If this was a direct or synchronous I/O that failed (such
		754	* as ENOSPC) then part of the I/O may have been written to
		755	* disk before the error occured. In this case the on-disk
		756	* file size may have been adjusted beyond the in-memory file
		757	* size and now needs to be truncated back.
		758	*/
		759	if (xip->i_d.di_size > xip->i_size)
		760	xip->i_d.di_size = xip->i_size;
		761	xfs_iunlock(xip, XFS_ILOCK_EXCL);
		762	}
		763	xfs_iunlock(xip, iolock);
		764	out_unlock_mutex:
		765	if (need_i_mutex)
		766	mutex_unlock(&inode->i_mutex);
		767	return -error;
		768	}
		769
47	STATIC ssize_t	770	STATIC ssize_t
48	xfs_file_aio_read(	771	xfs_file_aio_read(
49	struct kiocb *iocb,	772	struct kiocb *iocb,