From 4b4fa25ced2d719a06a3a63009bea1cf1fbedd55 Mon Sep 17 00:00:00 2001 From: Mandy Kirkconnell Date: Fri, 31 Mar 2006 13:03:58 +1000 Subject: [XFS] Cleanup comment to remove reference to obsoleted function xfs_bmap_do_search_extents(). SGI-PV: 951415 SGI-Modid: xfs-linux-melb:xfs-kern:208491a Signed-off-by: Mandy Kirkconnell Signed-off-by: Nathan Scott --- fs/xfs/xfs_bmap.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index f83399c89c..8e0d73d9cc 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h @@ -353,10 +353,11 @@ xfs_check_nostate_extents( xfs_extnum_t num); /* - * Call xfs_bmap_do_search_extents() to search for the extent - * record containing block bno. If in multi-level in-core extent - * allocation mode, find and extract the target extent buffer, - * otherwise just use the direct extent list. + * Search the extent records for the entry containing block bno. + * If bno lies in a hole, point to the next entry. If bno lies + * past eof, *eofp will be set, and *prevp will contain the last + * entry (null if none). Else, *lastxp will be set to the index + * of the found entry; *gotp will contain the entry. */ xfs_bmbt_rec_t * xfs_bmap_search_multi_extents(struct xfs_ifork *, xfs_fileoff_t, int *, -- cgit v1.2.2 From 764d1f89a5f2b914bc13b1b8b8920a600a5fba10 Mon Sep 17 00:00:00 2001 From: Nathan Scott Date: Fri, 31 Mar 2006 13:04:17 +1000 Subject: [XFS] Implement the silent parameter to fill_super, previously ignored. SGI-PV: 951299 SGI-Modid: xfs-linux-melb:xfs-kern:25632a Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_super.c | 11 +++---- fs/xfs/xfs_clnt.h | 1 + fs/xfs/xfs_error.h | 3 ++ fs/xfs/xfs_mount.c | 71 +++++++++++++++++++------------------------- fs/xfs/xfs_mount.h | 5 +++- fs/xfs/xfs_vfsops.c | 5 +++- 6 files changed, 49 insertions(+), 47 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 1884300417..68f4793e8a 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -67,7 +67,8 @@ mempool_t *xfs_ioend_pool; STATIC struct xfs_mount_args * xfs_args_allocate( - struct super_block *sb) + struct super_block *sb, + int silent) { struct xfs_mount_args *args; @@ -80,8 +81,8 @@ xfs_args_allocate( args->flags |= XFSMNT_DIRSYNC; if (sb->s_flags & MS_SYNCHRONOUS) args->flags |= XFSMNT_WSYNC; - - /* Default to 32 bit inodes on Linux all the time */ + if (silent) + args->flags |= XFSMNT_QUIET; args->flags |= XFSMNT_32BITINODES; return args; @@ -719,7 +720,7 @@ xfs_fs_remount( char *options) { vfs_t *vfsp = vfs_from_sb(sb); - struct xfs_mount_args *args = xfs_args_allocate(sb); + struct xfs_mount_args *args = xfs_args_allocate(sb, 0); int error; VFS_PARSEARGS(vfsp, options, args, 1, error); @@ -825,7 +826,7 @@ xfs_fs_fill_super( { vnode_t *rootvp; struct vfs *vfsp = vfs_allocate(sb); - struct xfs_mount_args *args = xfs_args_allocate(sb); + struct xfs_mount_args *args = xfs_args_allocate(sb, silent); struct kstatfs statvfs; int error, error2; diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h index 022fff6208..5b7eb81453 100644 --- a/fs/xfs/xfs_clnt.h +++ b/fs/xfs/xfs_clnt.h @@ -68,6 +68,7 @@ struct xfs_mount_args { * enforcement */ #define XFSMNT_PQUOTAENF 0x00000040 /* IRIX project quota limit * enforcement */ +#define XFSMNT_QUIET 0x00000080 /* don't report mount errors */ #define XFSMNT_NOALIGN 0x00000200 /* don't allocate at * stripe boundaries*/ #define XFSMNT_RETERR 0x00000400 /* return error to user */ diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index 26b8e709a5..bc43163456 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -186,4 +186,7 @@ extern void xfs_fs_cmn_err(int level, struct xfs_mount *mp, char *fmt, ...); #define xfs_fs_repair_cmn_err(level, mp, fmt, args...) \ xfs_fs_cmn_err(level, mp, fmt " Unmount and run xfs_repair.", ## args) +#define xfs_fs_mount_cmn_err(f, fmt, args...) \ + ((f & XFS_MFSI_QUIET)? cmn_err(CE_WARN, "XFS: " fmt, ## args) : (void)0) + #endif /* __XFS_ERROR_H__ */ diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 72e7e78bff..049fabb7f7 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -213,7 +213,8 @@ xfs_mount_free( STATIC int xfs_mount_validate_sb( xfs_mount_t *mp, - xfs_sb_t *sbp) + xfs_sb_t *sbp, + int flags) { /* * If the log device and data device have the @@ -223,33 +224,29 @@ xfs_mount_validate_sb( * a volume filesystem in a non-volume manner. */ if (sbp->sb_magicnum != XFS_SB_MAGIC) { - cmn_err(CE_WARN, "XFS: bad magic number"); + xfs_fs_mount_cmn_err(flags, "bad magic number"); return XFS_ERROR(EWRONGFS); } if (!XFS_SB_GOOD_VERSION(sbp)) { - cmn_err(CE_WARN, "XFS: bad version"); + xfs_fs_mount_cmn_err(flags, "bad version"); return XFS_ERROR(EWRONGFS); } if (unlikely( sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) { - cmn_err(CE_WARN, - "XFS: filesystem is marked as having an external log; " - "specify logdev on the\nmount command line."); - XFS_CORRUPTION_ERROR("xfs_mount_validate_sb(1)", - XFS_ERRLEVEL_HIGH, mp, sbp); - return XFS_ERROR(EFSCORRUPTED); + xfs_fs_mount_cmn_err(flags, + "filesystem is marked as having an external log; " + "specify logdev on the\nmount command line."); + return XFS_ERROR(EINVAL); } if (unlikely( sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) { - cmn_err(CE_WARN, - "XFS: filesystem is marked as having an internal log; " - "don't specify logdev on\nthe mount command line."); - XFS_CORRUPTION_ERROR("xfs_mount_validate_sb(2)", - XFS_ERRLEVEL_HIGH, mp, sbp); - return XFS_ERROR(EFSCORRUPTED); + xfs_fs_mount_cmn_err(flags, + "filesystem is marked as having an internal log; " + "do not specify logdev on\nthe mount command line."); + return XFS_ERROR(EINVAL); } /* @@ -274,9 +271,7 @@ xfs_mount_validate_sb( (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || (sbp->sb_imax_pct > 100 || sbp->sb_imax_pct < 1))) { - cmn_err(CE_WARN, "XFS: SB sanity check 1 failed"); - XFS_CORRUPTION_ERROR("xfs_mount_validate_sb(3)", - XFS_ERRLEVEL_LOW, mp, sbp); + xfs_fs_mount_cmn_err(flags, "SB sanity check 1 failed"); return XFS_ERROR(EFSCORRUPTED); } @@ -289,9 +284,7 @@ xfs_mount_validate_sb( (xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks || sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) * sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) { - cmn_err(CE_WARN, "XFS: SB sanity check 2 failed"); - XFS_ERROR_REPORT("xfs_mount_validate_sb(4)", - XFS_ERRLEVEL_LOW, mp); + xfs_fs_mount_cmn_err(flags, "SB sanity check 2 failed"); return XFS_ERROR(EFSCORRUPTED); } @@ -307,15 +300,13 @@ xfs_mount_validate_sb( (sbp->sb_dblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX || (sbp->sb_rblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX)) { #endif - cmn_err(CE_WARN, - "XFS: File system is too large to be mounted on this system."); + xfs_fs_mount_cmn_err(flags, + "file system too large to be mounted on this system."); return XFS_ERROR(E2BIG); } if (unlikely(sbp->sb_inprogress)) { - cmn_err(CE_WARN, "XFS: file system busy"); - XFS_ERROR_REPORT("xfs_mount_validate_sb(5)", - XFS_ERRLEVEL_LOW, mp); + xfs_fs_mount_cmn_err(flags, "file system busy"); return XFS_ERROR(EFSCORRUPTED); } @@ -323,8 +314,8 @@ xfs_mount_validate_sb( * Version 1 directory format has never worked on Linux. */ if (unlikely(!XFS_SB_VERSION_HASDIRV2(sbp))) { - cmn_err(CE_WARN, - "XFS: Attempted to mount file system using version 1 directory format"); + xfs_fs_mount_cmn_err(flags, + "file system using version 1 directory format"); return XFS_ERROR(ENOSYS); } @@ -332,11 +323,11 @@ xfs_mount_validate_sb( * Until this is fixed only page-sized or smaller data blocks work. */ if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) { - cmn_err(CE_WARN, - "XFS: Attempted to mount file system with blocksize %d bytes", + xfs_fs_mount_cmn_err(flags, + "file system with blocksize %d bytes", sbp->sb_blocksize); - cmn_err(CE_WARN, - "XFS: Only page-sized (%ld) or less blocksizes currently work.", + xfs_fs_mount_cmn_err(flags, + "only pagesize (%ld) or less will currently work.", PAGE_SIZE); return XFS_ERROR(ENOSYS); } @@ -484,7 +475,7 @@ xfs_xlatesb( * Does the initial read of the superblock. */ int -xfs_readsb(xfs_mount_t *mp) +xfs_readsb(xfs_mount_t *mp, int flags) { unsigned int sector_size; unsigned int extra_flags; @@ -506,7 +497,7 @@ xfs_readsb(xfs_mount_t *mp) bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size), extra_flags); if (!bp || XFS_BUF_ISERROR(bp)) { - cmn_err(CE_WARN, "XFS: SB read failed"); + xfs_fs_mount_cmn_err(flags, "SB read failed"); error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; goto fail; } @@ -520,9 +511,9 @@ xfs_readsb(xfs_mount_t *mp) sbp = XFS_BUF_TO_SBP(bp); xfs_xlatesb(XFS_BUF_PTR(bp), &(mp->m_sb), 1, XFS_SB_ALL_BITS); - error = xfs_mount_validate_sb(mp, &(mp->m_sb)); + error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags); if (error) { - cmn_err(CE_WARN, "XFS: SB validate failed"); + xfs_fs_mount_cmn_err(flags, "SB validate failed"); goto fail; } @@ -530,8 +521,8 @@ xfs_readsb(xfs_mount_t *mp) * We must be able to do sector-sized and sector-aligned IO. */ if (sector_size > mp->m_sb.sb_sectsize) { - cmn_err(CE_WARN, - "XFS: device supports only %u byte sectors (not %u)", + xfs_fs_mount_cmn_err(flags, + "device supports only %u byte sectors (not %u)", sector_size, mp->m_sb.sb_sectsize); error = ENOSYS; goto fail; @@ -548,7 +539,7 @@ xfs_readsb(xfs_mount_t *mp) bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size), extra_flags); if (!bp || XFS_BUF_ISERROR(bp)) { - cmn_err(CE_WARN, "XFS: SB re-read failed"); + xfs_fs_mount_cmn_err(flags, "SB re-read failed"); error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; goto fail; } @@ -678,7 +669,7 @@ xfs_mountfs( int error = 0; if (mp->m_sb_bp == NULL) { - if ((error = xfs_readsb(mp))) { + if ((error = xfs_readsb(mp, mfsi_flags))) { return error; } } diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 66cbee7986..668ad23fd3 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -510,9 +510,12 @@ xfs_preferred_iosize(xfs_mount_t *mp) */ #define XFS_MFSI_SECOND 0x01 /* Secondary mount -- skip stuff */ #define XFS_MFSI_CLIENT 0x02 /* Is a client -- skip lots of stuff */ +/* XFS_MFSI_RRINODES */ #define XFS_MFSI_NOUNLINK 0x08 /* Skip unlinked inode processing in */ /* log recovery */ #define XFS_MFSI_NO_QUOTACHECK 0x10 /* Skip quotacheck processing */ +/* XFS_MFSI_CONVERT_SUNIT */ +#define XFS_MFSI_QUIET 0x40 /* Be silent if mount errors found */ /* * Macros for getting from mount to vfs and back. @@ -581,7 +584,7 @@ extern int xfs_mod_incore_sb_unlocked(xfs_mount_t *, xfs_sb_field_t, extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, uint, int); extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); -extern int xfs_readsb(xfs_mount_t *mp); +extern int xfs_readsb(xfs_mount_t *, int); extern void xfs_freesb(xfs_mount_t *); extern void xfs_do_force_shutdown(bhv_desc_t *, int, char *, int); extern int xfs_syncsub(xfs_mount_t *, int, int, int *); diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index 504d2a8074..89020c15d8 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -442,6 +442,9 @@ xfs_mount( p = vfs_bhv_lookup(vfsp, VFS_POSITION_IO); mp->m_io_ops = p ? *(xfs_ioops_t *) vfs_bhv_custom(p) : xfs_iocore_xfs; + if (args->flags & XFSMNT_QUIET) + flags |= XFS_MFSI_QUIET; + /* * Open real time and log devices - order is important. */ @@ -492,7 +495,7 @@ xfs_mount( error = xfs_start_flags(vfsp, args, mp); if (error) goto error1; - error = xfs_readsb(mp); + error = xfs_readsb(mp, flags); if (error) goto error1; error = xfs_finish_flags(vfsp, args, mp); -- cgit v1.2.2 From 9a2a7de268f67fea0c450ed3e99a2d31f43d7166 Mon Sep 17 00:00:00 2001 From: Nathan Scott Date: Fri, 31 Mar 2006 13:04:49 +1000 Subject: [XFS] Make project quota enforcement return an error code consistent with its use. SGI-PV: 951300 SGI-Modid: xfs-linux-melb:xfs-kern:25633a Signed-off-by: Nathan Scott --- fs/xfs/quota/xfs_qm.c | 17 ++++++----- fs/xfs/quota/xfs_trans_dquot.c | 68 ++++++++++++++++++++++-------------------- fs/xfs/xfs_bmap.c | 11 ++++--- fs/xfs/xfs_quota.h | 5 ++-- 4 files changed, 54 insertions(+), 47 deletions(-) (limited to 'fs') diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 73c1e5e80c..7fb5eca9bd 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -2624,7 +2624,7 @@ xfs_qm_vop_chown_reserve( { int error; xfs_mount_t *mp; - uint delblks, blkflags; + uint delblks, blkflags, prjflags = 0; xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq; ASSERT(XFS_ISLOCKED_INODE(ip)); @@ -2650,10 +2650,13 @@ xfs_qm_vop_chown_reserve( } } if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) { - if ((XFS_IS_GQUOTA_ON(ip->i_mount) && - ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id)) || - (XFS_IS_PQUOTA_ON(ip->i_mount) && - ip->i_d.di_projid != be32_to_cpu(gdqp->q_core.d_id))) { + if (XFS_IS_PQUOTA_ON(ip->i_mount) && + ip->i_d.di_projid != be32_to_cpu(gdqp->q_core.d_id)) + prjflags = XFS_QMOPT_ENOSPC; + + if (prjflags || + (XFS_IS_GQUOTA_ON(ip->i_mount) && + ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) { delblksgdq = gdqp; if (delblks) { ASSERT(ip->i_gdquot); @@ -2664,7 +2667,7 @@ xfs_qm_vop_chown_reserve( if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount, delblksudq, delblksgdq, ip->i_d.di_nblocks, 1, - flags | blkflags))) + flags | blkflags | prjflags))) return (error); /* @@ -2681,7 +2684,7 @@ xfs_qm_vop_chown_reserve( ASSERT(unresudq || unresgdq); if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0, - flags | blkflags))) + flags | blkflags | prjflags))) return (error); xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0, diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c index d8e131ec0a..9168918db2 100644 --- a/fs/xfs/quota/xfs_trans_dquot.c +++ b/fs/xfs/quota/xfs_trans_dquot.c @@ -595,12 +595,19 @@ xfs_trans_unreserve_and_mod_dquots( } } +STATIC int +xfs_quota_error(uint flags) +{ + if (flags & XFS_QMOPT_ENOSPC) + return ENOSPC; + return EDQUOT; +} + /* * This reserves disk blocks and inodes against a dquot. * Flags indicate if the dquot is to be locked here and also * if the blk reservation is for RT or regular blocks. * Sending in XFS_QMOPT_FORCE_RES flag skips the quota check. - * Returns EDQUOT if quota is exceeded. */ STATIC int xfs_trans_dqresv( @@ -666,19 +673,15 @@ xfs_trans_dqresv( */ if (hardlimit > 0ULL && (hardlimit <= nblks + *resbcountp)) { - error = EDQUOT; + error = xfs_quota_error(flags); goto error_return; } if (softlimit > 0ULL && (softlimit <= nblks + *resbcountp)) { - /* - * If timer or warnings has expired, - * return EDQUOT - */ if ((timer != 0 && get_seconds() > timer) || (warns != 0 && warns >= warnlimit)) { - error = EDQUOT; + error = xfs_quota_error(flags); goto error_return; } } @@ -695,16 +698,12 @@ xfs_trans_dqresv( if (!softlimit) softlimit = q->qi_isoftlimit; if (hardlimit > 0ULL && count >= hardlimit) { - error = EDQUOT; + error = xfs_quota_error(flags); goto error_return; } else if (softlimit > 0ULL && count >= softlimit) { - /* - * If timer or warnings has expired, - * return EDQUOT - */ if ((timer != 0 && get_seconds() > timer) || (warns != 0 && warns >= warnlimit)) { - error = EDQUOT; + error = xfs_quota_error(flags); goto error_return; } } @@ -751,13 +750,14 @@ error_return: /* - * Given a dquot(s), make disk block and/or inode reservations against them. + * Given dquot(s), make disk block and/or inode reservations against them. * The fact that this does the reservation against both the usr and - * grp quotas is important, because this follows a both-or-nothing + * grp/prj quotas is important, because this follows a both-or-nothing * approach. * * flags = XFS_QMOPT_DQLOCK indicate if dquot(s) need to be locked. * XFS_QMOPT_FORCE_RES evades limit enforcement. Used by chown. + * XFS_QMOPT_ENOSPC returns ENOSPC not EDQUOT. Used by pquota. * XFS_TRANS_DQ_RES_BLKS reserves regular disk blocks * XFS_TRANS_DQ_RES_RTBLKS reserves realtime disk blocks * dquots are unlocked on return, if they were not locked by caller. @@ -772,25 +772,27 @@ xfs_trans_reserve_quota_bydquots( long ninos, uint flags) { - int resvd; + int resvd = 0, error; - if (! XFS_IS_QUOTA_ON(mp)) - return (0); + if (!XFS_IS_QUOTA_ON(mp)) + return 0; if (tp && tp->t_dqinfo == NULL) xfs_trans_alloc_dqinfo(tp); ASSERT(flags & XFS_QMOPT_RESBLK_MASK); - resvd = 0; if (udqp) { - if (xfs_trans_dqresv(tp, mp, udqp, nblks, ninos, flags)) - return (EDQUOT); + error = xfs_trans_dqresv(tp, mp, udqp, nblks, ninos, + (flags & ~XFS_QMOPT_ENOSPC)); + if (error) + return error; resvd = 1; } if (gdqp) { - if (xfs_trans_dqresv(tp, mp, gdqp, nblks, ninos, flags)) { + error = xfs_trans_dqresv(tp, mp, gdqp, nblks, ninos, flags); + if (error) { /* * can't do it, so backout previous reservation */ @@ -799,14 +801,14 @@ xfs_trans_reserve_quota_bydquots( xfs_trans_dqresv(tp, mp, udqp, -nblks, -ninos, flags); } - return (EDQUOT); + return error; } } /* * Didn't change anything critical, so, no need to log */ - return (0); + return 0; } @@ -814,8 +816,6 @@ xfs_trans_reserve_quota_bydquots( * Lock the dquot and change the reservation if we can. * This doesn't change the actual usage, just the reservation. * The inode sent in is locked. - * - * Returns 0 on success, EDQUOT or other errors otherwise */ STATIC int xfs_trans_reserve_quota_nblks( @@ -824,20 +824,24 @@ xfs_trans_reserve_quota_nblks( xfs_inode_t *ip, long nblks, long ninos, - uint type) + uint flags) { int error; if (!XFS_IS_QUOTA_ON(mp)) - return (0); + return 0; + if (XFS_IS_PQUOTA_ON(mp)) + flags |= XFS_QMOPT_ENOSPC; ASSERT(ip->i_ino != mp->m_sb.sb_uquotino); ASSERT(ip->i_ino != mp->m_sb.sb_gquotino); ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount)); - ASSERT((type & ~XFS_QMOPT_FORCE_RES) == XFS_TRANS_DQ_RES_RTBLKS || - (type & ~XFS_QMOPT_FORCE_RES) == XFS_TRANS_DQ_RES_BLKS); + ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) == + XFS_TRANS_DQ_RES_RTBLKS || + (flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) == + XFS_TRANS_DQ_RES_BLKS); /* * Reserve nblks against these dquots, with trans as the mediator. @@ -845,8 +849,8 @@ xfs_trans_reserve_quota_nblks( error = xfs_trans_reserve_quota_bydquots(tp, mp, ip->i_udquot, ip->i_gdquot, nblks, ninos, - type); - return (error); + flags); + return error; } /* diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index d384e48970..26939d364b 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -4719,18 +4719,17 @@ xfs_bmapi( /* * Make a transaction-less quota reservation for * delayed allocation blocks. This number gets - * adjusted later. - * We return EDQUOT if we haven't allocated - * blks already inside this loop; + * adjusted later. We return if we haven't + * allocated blocks already inside this loop. */ - if (XFS_TRANS_RESERVE_QUOTA_NBLKS( + if ((error = XFS_TRANS_RESERVE_QUOTA_NBLKS( mp, NULL, ip, (long)alen, 0, rt ? XFS_QMOPT_RES_RTBLKS : - XFS_QMOPT_RES_REGBLKS)) { + XFS_QMOPT_RES_REGBLKS))) { if (n == 0) { *nmap = 0; ASSERT(cur == NULL); - return XFS_ERROR(EDQUOT); + return error; } break; } diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index 4f6a034de7..7fbef974bc 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -196,10 +196,11 @@ typedef struct xfs_qoff_logformat { #define XFS_QMOPT_QUOTAOFF 0x0000080 /* quotas are being turned off */ #define XFS_QMOPT_UMOUNTING 0x0000100 /* filesys is being unmounted */ #define XFS_QMOPT_DOLOG 0x0000200 /* log buf changes (in quotacheck) */ -#define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if necessary */ +#define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */ #define XFS_QMOPT_ILOCKED 0x0000800 /* inode is already locked (excl) */ -#define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot, if damaged. */ +#define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */ #define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */ +#define XFS_QMOPT_ENOSPC 0x0004000 /* enospc instead of edquot (prj) */ /* * flags to xfs_trans_mod_dquot to indicate which field needs to be -- cgit v1.2.2 From 3bbcc8e3976f8bba2fd607c8850d7dfe7e332fda Mon Sep 17 00:00:00 2001 From: Nathan Scott Date: Fri, 31 Mar 2006 13:04:56 +1000 Subject: [XFS] Reenable write barriers by default. SGI-PV: 912426 SGI-Modid: xfs-linux-melb:xfs-kern:25634a Signed-off-by: Nathan Scott --- fs/xfs/xfs_vfsops.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index 89020c15d8..f0e09ca141 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -1700,8 +1700,9 @@ xfs_parseargs( int dsunit, dswidth, vol_dsunit, vol_dswidth; int iosize; - args->flags2 |= XFSMNT2_COMPAT_IOSIZE; args->flags |= XFSMNT_IDELETE; + args->flags |= XFSMNT_BARRIER; + args->flags2 |= XFSMNT2_COMPAT_IOSIZE; if (!options) goto done; @@ -1950,8 +1951,6 @@ xfs_showargs( seq_printf(m, "," MNTOPT_IKEEP); if (!(mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE)) seq_printf(m, "," MNTOPT_LARGEIO); - if (mp->m_flags & XFS_MOUNT_BARRIER) - seq_printf(m, "," MNTOPT_BARRIER); if (!(vfsp->vfs_flag & VFS_32BITINODES)) seq_printf(m, "," MNTOPT_64BITINODE); -- cgit v1.2.2 From 1b895840ce93fd2d150a86c800a3085eaab4eb9e Mon Sep 17 00:00:00 2001 From: Nathan Scott Date: Fri, 31 Mar 2006 13:08:59 +1000 Subject: [XFS] Provide XFS support for the splice syscall. Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_file.c | 113 ++++++++++++++++++++++++++++------------ fs/xfs/linux-2.6/xfs_linux.h | 1 + fs/xfs/linux-2.6/xfs_lrw.c | 120 +++++++++++++++++++++++++++++++++---------- fs/xfs/linux-2.6/xfs_lrw.h | 11 +++- fs/xfs/linux-2.6/xfs_vnode.h | 12 +++++ fs/xfs/xfs_vnodeops.c | 4 ++ 6 files changed, 199 insertions(+), 62 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 85997b1205..ae4c4754ed 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -69,7 +69,6 @@ __xfs_file_read( return rval; } - STATIC ssize_t xfs_file_aio_read( struct kiocb *iocb, @@ -90,7 +89,6 @@ xfs_file_aio_read_invis( return __xfs_file_read(iocb, buf, IO_ISAIO|IO_INVIS, count, pos); } - STATIC inline ssize_t __xfs_file_write( struct kiocb *iocb, @@ -113,7 +111,6 @@ __xfs_file_write( return rval; } - STATIC ssize_t xfs_file_aio_write( struct kiocb *iocb, @@ -134,7 +131,6 @@ xfs_file_aio_write_invis( return __xfs_file_write(iocb, buf, IO_ISAIO|IO_INVIS, count, pos); } - STATIC inline ssize_t __xfs_file_readv( struct file *file, @@ -179,7 +175,6 @@ xfs_file_readv_invis( return __xfs_file_readv(file, iov, IO_INVIS, nr_segs, ppos); } - STATIC inline ssize_t __xfs_file_writev( struct file *file, @@ -204,7 +199,6 @@ __xfs_file_writev( return rval; } - STATIC ssize_t xfs_file_writev( struct file *file, @@ -228,7 +222,7 @@ xfs_file_writev_invis( STATIC ssize_t xfs_file_sendfile( struct file *filp, - loff_t *ppos, + loff_t *pos, size_t count, read_actor_t actor, void *target) @@ -236,10 +230,80 @@ xfs_file_sendfile( vnode_t *vp = vn_from_inode(filp->f_dentry->d_inode); ssize_t rval; - VOP_SENDFILE(vp, filp, ppos, 0, count, actor, target, NULL, rval); + VOP_SENDFILE(vp, filp, pos, 0, count, actor, target, NULL, rval); return rval; } +STATIC ssize_t +xfs_file_sendfile_invis( + struct file *filp, + loff_t *pos, + size_t count, + read_actor_t actor, + void *target) +{ + vnode_t *vp = vn_from_inode(filp->f_dentry->d_inode); + ssize_t rval; + + VOP_SENDFILE(vp, filp, pos, IO_INVIS, count, actor, target, NULL, rval); + return rval; +} + +STATIC ssize_t +xfs_file_splice_read( + struct file *infilp, + struct inode *pipe, + size_t len, + unsigned int flags) +{ + vnode_t *vp = vn_from_inode(infilp->f_dentry->d_inode); + ssize_t rval; + + VOP_SPLICE_READ(vp, infilp, pipe, len, flags, 0, NULL, rval); + return rval; +} + +STATIC ssize_t +xfs_file_splice_read_invis( + struct file *infilp, + struct inode *pipe, + size_t len, + unsigned int flags) +{ + vnode_t *vp = vn_from_inode(infilp->f_dentry->d_inode); + ssize_t rval; + + VOP_SPLICE_READ(vp, infilp, pipe, len, flags, IO_INVIS, NULL, rval); + return rval; +} + +STATIC ssize_t +xfs_file_splice_write( + struct inode *pipe, + struct file *outfilp, + size_t len, + unsigned int flags) +{ + vnode_t *vp = vn_from_inode(outfilp->f_dentry->d_inode); + ssize_t rval; + + VOP_SPLICE_WRITE(vp, pipe, outfilp, len, flags, 0, NULL, rval); + return rval; +} + +STATIC ssize_t +xfs_file_splice_write_invis( + struct inode *pipe, + struct file *outfilp, + size_t len, + unsigned int flags) +{ + vnode_t *vp = vn_from_inode(outfilp->f_dentry->d_inode); + ssize_t rval; + + VOP_SPLICE_WRITE(vp, pipe, outfilp, len, flags, IO_INVIS, NULL, rval); + return rval; +} STATIC int xfs_file_open( @@ -251,13 +315,10 @@ xfs_file_open( if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) return -EFBIG; - - ASSERT(vp); VOP_OPEN(vp, NULL, error); return -error; } - STATIC int xfs_file_release( struct inode *inode, @@ -271,7 +332,6 @@ xfs_file_release( return -error; } - STATIC int xfs_file_fsync( struct file *filp, @@ -285,21 +345,11 @@ xfs_file_fsync( if (datasync) flags |= FSYNC_DATA; - - ASSERT(vp); VOP_FSYNC(vp, flags, NULL, (xfs_off_t)0, (xfs_off_t)-1, error); return -error; } -/* - * xfs_file_readdir maps to VOP_READDIR(). - * We need to build a uio, cred, ... - */ - -#define nextdp(dp) ((struct xfs_dirent *)((char *)(dp) + (dp)->d_reclen)) - #ifdef CONFIG_XFS_DMAPI - STATIC struct page * xfs_vm_nopage( struct vm_area_struct *area, @@ -319,10 +369,8 @@ xfs_vm_nopage( return filemap_nopage(area, address, type); } - #endif /* CONFIG_XFS_DMAPI */ - STATIC int xfs_file_readdir( struct file *filp, @@ -330,7 +378,7 @@ xfs_file_readdir( filldir_t filldir) { int error = 0; - vnode_t *vp; + vnode_t *vp = vn_from_inode(filp->f_dentry->d_inode); uio_t uio; iovec_t iov; int eof = 0; @@ -340,9 +388,6 @@ xfs_file_readdir( xfs_off_t start_offset, curr_offset; xfs_dirent_t *dbp = NULL; - vp = vn_from_inode(filp->f_dentry->d_inode); - ASSERT(vp); - /* Try fairly hard to get memory */ do { if ((read_buf = (caddr_t)kmalloc(rlen, GFP_KERNEL))) @@ -387,7 +432,7 @@ xfs_file_readdir( } size -= dbp->d_reclen; curr_offset = (loff_t)dbp->d_off /* & 0x7fffffff */; - dbp = nextdp(dbp); + dbp = (xfs_dirent_t *)((char *)dbp + dbp->d_reclen); } } done: @@ -402,7 +447,6 @@ done: return -error; } - STATIC int xfs_file_mmap( struct file *filp, @@ -457,11 +501,10 @@ xfs_file_ioctl_invis( unsigned int cmd, unsigned long arg) { - int error; struct inode *inode = filp->f_dentry->d_inode; vnode_t *vp = vn_from_inode(inode); + int error; - ASSERT(vp); VOP_IOCTL(vp, inode, filp, IO_INVIS, cmd, (void __user *)arg, error); VMODIFY(vp); @@ -537,6 +580,8 @@ const struct file_operations xfs_file_operations = { .aio_read = xfs_file_aio_read, .aio_write = xfs_file_aio_write, .sendfile = xfs_file_sendfile, + .splice_read = xfs_file_splice_read, + .splice_write = xfs_file_splice_write, .unlocked_ioctl = xfs_file_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = xfs_file_compat_ioctl, @@ -558,7 +603,9 @@ const struct file_operations xfs_invis_file_operations = { .writev = xfs_file_writev_invis, .aio_read = xfs_file_aio_read_invis, .aio_write = xfs_file_aio_write_invis, - .sendfile = xfs_file_sendfile, + .sendfile = xfs_file_sendfile_invis, + .splice_read = xfs_file_splice_read_invis, + .splice_write = xfs_file_splice_write_invis, .unlocked_ioctl = xfs_file_ioctl_invis, #ifdef CONFIG_COMPAT .compat_ioctl = xfs_file_compat_invis_ioctl, diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 1fe09f2d65..e9fe43d747 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -103,6 +103,7 @@ */ #undef HAVE_REFCACHE /* reference cache not needed for NFS in 2.6 */ #define HAVE_SENDFILE /* sendfile(2) exists in 2.6, but not in 2.4 */ +#define HAVE_SPLICE /* a splice(2) exists in 2.6, but not in 2.4 */ #ifdef CONFIG_SMP #define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ #else diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 84ddf18938..90cd314acb 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -301,36 +301,23 @@ xfs_sendfile( void *target, cred_t *credp) { + xfs_inode_t *ip = XFS_BHVTOI(bdp); + xfs_mount_t *mp = ip->i_mount; ssize_t ret; - xfs_fsize_t n; - xfs_inode_t *ip; - xfs_mount_t *mp; - vnode_t *vp; - - ip = XFS_BHVTOI(bdp); - vp = BHV_TO_VNODE(bdp); - mp = ip->i_mount; XFS_STATS_INC(xs_read_calls); - - n = XFS_MAXIOFFSET(mp) - *offset; - if ((n <= 0) || (count == 0)) - return 0; - - if (n < count) - count = n; - - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; xfs_ilock(ip, XFS_IOLOCK_SHARED); - if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) && + if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) && (!(ioflags & IO_INVIS))) { vrwlock_t locktype = VRWLOCK_READ; int error; - error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), *offset, count, + error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), + *offset, count, FILP_DELAY_FLAG(filp), &locktype); if (error) { xfs_iunlock(ip, XFS_IOLOCK_SHARED); @@ -340,12 +327,96 @@ xfs_sendfile( xfs_rw_enter_trace(XFS_SENDFILE_ENTER, &ip->i_iocore, (void *)(unsigned long)target, count, *offset, ioflags); ret = generic_file_sendfile(filp, offset, count, actor, target); + if (ret > 0) + XFS_STATS_ADD(xs_read_bytes, ret); xfs_iunlock(ip, XFS_IOLOCK_SHARED); + return ret; +} +ssize_t +xfs_splice_read( + bhv_desc_t *bdp, + struct file *infilp, + struct inode *pipe, + size_t count, + int flags, + int ioflags, + cred_t *credp) +{ + xfs_inode_t *ip = XFS_BHVTOI(bdp); + xfs_mount_t *mp = ip->i_mount; + ssize_t ret; + + XFS_STATS_INC(xs_read_calls); + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + return -EIO; + + xfs_ilock(ip, XFS_IOLOCK_SHARED); + + if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) && + (!(ioflags & IO_INVIS))) { + vrwlock_t locktype = VRWLOCK_READ; + int error; + + error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), + infilp->f_pos, count, + FILP_DELAY_FLAG(infilp), &locktype); + if (error) { + xfs_iunlock(ip, XFS_IOLOCK_SHARED); + return -error; + } + } + xfs_rw_enter_trace(XFS_SPLICE_READ_ENTER, &ip->i_iocore, + pipe, count, infilp->f_pos, ioflags); + ret = generic_file_splice_read(infilp, pipe, count, flags); if (ret > 0) XFS_STATS_ADD(xs_read_bytes, ret); + xfs_iunlock(ip, XFS_IOLOCK_SHARED); + return ret; +} + +ssize_t +xfs_splice_write( + bhv_desc_t *bdp, + struct inode *pipe, + struct file *outfilp, + size_t count, + int flags, + int ioflags, + cred_t *credp) +{ + xfs_inode_t *ip = XFS_BHVTOI(bdp); + xfs_mount_t *mp = ip->i_mount; + ssize_t ret; + + XFS_STATS_INC(xs_write_calls); + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + return -EIO; + + xfs_ilock(ip, XFS_IOLOCK_EXCL); + + if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_WRITE) && + (!(ioflags & IO_INVIS))) { + vrwlock_t locktype = VRWLOCK_WRITE; + int error; + + error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, BHV_TO_VNODE(bdp), + outfilp->f_pos, count, + FILP_DELAY_FLAG(outfilp), &locktype); + if (error) { + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + return -error; + } + } + xfs_rw_enter_trace(XFS_SPLICE_WRITE_ENTER, &ip->i_iocore, + pipe, count, outfilp->f_pos, ioflags); + ret = generic_file_splice_write(pipe, outfilp, count, flags); + if (ret > 0) + XFS_STATS_ADD(xs_write_bytes, ret); + + xfs_iunlock(ip, XFS_IOLOCK_EXCL); return ret; } @@ -363,7 +434,7 @@ xfs_zero_last_block( xfs_fsize_t end_size) { xfs_fileoff_t last_fsb; - xfs_mount_t *mp; + xfs_mount_t *mp = io->io_mount; int nimaps; int zero_offset; int zero_len; @@ -373,8 +444,6 @@ xfs_zero_last_block( ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0); - mp = io->io_mount; - zero_offset = XFS_B_FSB_OFFSET(mp, isize); if (zero_offset == 0) { /* @@ -405,10 +474,9 @@ xfs_zero_last_block( * don't deadlock when the buffer cache calls back to us. */ XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD); - loff = XFS_FSB_TO_B(mp, last_fsb); + loff = XFS_FSB_TO_B(mp, last_fsb); zero_len = mp->m_sb.sb_blocksize - zero_offset; - error = xfs_iozero(ip, loff + zero_offset, zero_len, end_size); XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); @@ -441,7 +509,7 @@ xfs_zero_eof( xfs_fileoff_t zero_count_fsb; xfs_fileoff_t last_fsb; xfs_extlen_t buf_len_fsb; - xfs_mount_t *mp; + xfs_mount_t *mp = io->io_mount; int nimaps; int error = 0; xfs_bmbt_irec_t imap; @@ -450,8 +518,6 @@ xfs_zero_eof( ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); ASSERT(offset > isize); - mp = io->io_mount; - /* * First handle zeroing the block on which isize resides. * We only zero a part of that block so it is handled specially. diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h index 38864a88d4..eaa5659713 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.h +++ b/fs/xfs/linux-2.6/xfs_lrw.h @@ -60,6 +60,8 @@ struct xfs_iomap; #define XFS_IOMAP_ALLOC_ENTER 25 #define XFS_IOMAP_ALLOC_MAP 26 #define XFS_IOMAP_UNWRITTEN 27 +#define XFS_SPLICE_READ_ENTER 28 +#define XFS_SPLICE_WRITE_ENTER 29 extern void xfs_rw_enter_trace(int, struct xfs_iocore *, void *, size_t, loff_t, int); extern void xfs_inval_cached_trace(struct xfs_iocore *, @@ -78,6 +80,7 @@ extern int xfs_bmap(struct bhv_desc *, xfs_off_t, ssize_t, int, struct xfs_iomap *, int *); extern int xfsbdstrat(struct xfs_mount *, struct xfs_buf *); extern int xfs_bdstrat_cb(struct xfs_buf *); +extern int xfs_dev_is_read_only(struct xfs_mount *, char *); extern int xfs_zero_eof(struct vnode *, struct xfs_iocore *, xfs_off_t, xfs_fsize_t, xfs_fsize_t); @@ -90,7 +93,11 @@ extern ssize_t xfs_write(struct bhv_desc *, struct kiocb *, extern ssize_t xfs_sendfile(struct bhv_desc *, struct file *, loff_t *, int, size_t, read_actor_t, void *, struct cred *); - -extern int xfs_dev_is_read_only(struct xfs_mount *, char *); +extern ssize_t xfs_splice_read(struct bhv_desc *, struct file *, + struct inode *, size_t, int, int, + struct cred *); +extern ssize_t xfs_splice_write(struct bhv_desc *, struct inode *, + struct file *, size_t, int, int, + struct cred *); #endif /* __XFS_LRW_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index 06f5845e95..6f1c79a28f 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -173,6 +173,12 @@ typedef ssize_t (*vop_write_t)(bhv_desc_t *, struct kiocb *, typedef ssize_t (*vop_sendfile_t)(bhv_desc_t *, struct file *, loff_t *, int, size_t, read_actor_t, void *, struct cred *); +typedef ssize_t (*vop_splice_read_t)(bhv_desc_t *, struct file *, + struct inode *, size_t, int, int, + struct cred *); +typedef ssize_t (*vop_splice_write_t)(bhv_desc_t *, struct inode *, + struct file *, size_t, int, int, + struct cred *); typedef int (*vop_ioctl_t)(bhv_desc_t *, struct inode *, struct file *, int, unsigned int, void __user *); typedef int (*vop_getattr_t)(bhv_desc_t *, struct vattr *, int, @@ -231,6 +237,8 @@ typedef struct vnodeops { vop_read_t vop_read; vop_write_t vop_write; vop_sendfile_t vop_sendfile; + vop_splice_read_t vop_splice_read; + vop_splice_write_t vop_splice_write; vop_ioctl_t vop_ioctl; vop_getattr_t vop_getattr; vop_setattr_t vop_setattr; @@ -276,6 +284,10 @@ typedef struct vnodeops { rv = _VOP_(vop_write, vp)((vp)->v_fbhv,file,iov,segs,offset,ioflags,cr) #define VOP_SENDFILE(vp,f,off,ioflags,cnt,act,targ,cr,rv) \ rv = _VOP_(vop_sendfile, vp)((vp)->v_fbhv,f,off,ioflags,cnt,act,targ,cr) +#define VOP_SPLICE_READ(vp,f,pipe,cnt,fl,iofl,cr,rv) \ + rv = _VOP_(vop_splice_read, vp)((vp)->v_fbhv,f,pipe,cnt,fl,iofl,cr) +#define VOP_SPLICE_WRITE(vp,f,pipe,cnt,fl,iofl,cr,rv) \ + rv = _VOP_(vop_splice_write, vp)((vp)->v_fbhv,f,pipe,cnt,fl,iofl,cr) #define VOP_BMAP(vp,of,sz,rw,b,n,rv) \ rv = _VOP_(vop_bmap, vp)((vp)->v_fbhv,of,sz,rw,b,n) #define VOP_OPEN(vp, cr, rv) \ diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index de49601919..fa71b305ba 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -4648,6 +4648,10 @@ vnodeops_t xfs_vnodeops = { .vop_read = xfs_read, #ifdef HAVE_SENDFILE .vop_sendfile = xfs_sendfile, +#endif +#ifdef HAVE_SPLICE + .vop_splice_read = xfs_splice_read, + .vop_splice_write = xfs_splice_write, #endif .vop_write = xfs_write, .vop_ioctl = xfs_ioctl, -- cgit v1.2.2 From d4569d2e6949a63851032b40c811913d4a6f85f5 Mon Sep 17 00:00:00 2001 From: Eric Sesterhenn Date: Sat, 1 Apr 2006 01:10:13 +0200 Subject: BUG_ON() Conversion in fs/direct-io.c this changes if() BUG(); constructs to BUG_ON() which is cleaner and can better optimized away Signed-off-by: Eric Sesterhenn Signed-off-by: Adrian Bunk --- fs/direct-io.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/direct-io.c b/fs/direct-io.c index 910a8ed74b..b05d1b2187 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -929,8 +929,7 @@ do_holes: block_in_page += this_chunk_blocks; dio->blocks_available -= this_chunk_blocks; next_block: - if (dio->block_in_file > dio->final_block_in_request) - BUG(); + BUG_ON(dio->block_in_file > dio->final_block_in_request); if (dio->block_in_file == dio->final_block_in_request) break; } -- cgit v1.2.2 From 7dddb12c63553db850365cfd066a00416aa8c6cb Mon Sep 17 00:00:00 2001 From: Eric Sesterhenn Date: Sat, 1 Apr 2006 01:13:38 +0200 Subject: BUG_ON() Conversion in fs/exec.c this changes if() BUG(); constructs to BUG_ON() which is cleaner and can better optimized away Signed-off-by: Eric Sesterhenn Signed-off-by: Adrian Bunk --- fs/exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 950ebd43cd..0291a68a36 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -561,7 +561,7 @@ static int exec_mmap(struct mm_struct *mm) arch_pick_mmap_layout(mm); if (old_mm) { up_read(&old_mm->mmap_sem); - if (active_mm != old_mm) BUG(); + BUG_ON(active_mm != old_mm); mmput(old_mm); return 0; } -- cgit v1.2.2 From 0bf3ba538a150f8430104a50e88c1449e8fa1fe6 Mon Sep 17 00:00:00 2001 From: Eric Sesterhenn Date: Sat, 1 Apr 2006 01:14:43 +0200 Subject: BUG_ON() Conversion in fs/hfsplus/ this changes if() BUG(); constructs to BUG_ON() which is cleaner, contains unlikely() and can better optimized away. Signed-off-by: Eric Sesterhenn Signed-off-by: Adrian Bunk --- fs/hfsplus/bnode.c | 6 ++---- fs/hfsplus/btree.c | 3 +-- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c index 8f07e8fbd0..746abc9ecf 100644 --- a/fs/hfsplus/bnode.c +++ b/fs/hfsplus/bnode.c @@ -466,8 +466,7 @@ void hfs_bnode_unhash(struct hfs_bnode *node) for (p = &node->tree->node_hash[hfs_bnode_hash(node->this)]; *p && *p != node; p = &(*p)->next_hash) ; - if (!*p) - BUG(); + BUG_ON(!*p); *p = node->next_hash; node->tree->node_hash_cnt--; } @@ -622,8 +621,7 @@ void hfs_bnode_put(struct hfs_bnode *node) dprint(DBG_BNODE_REFS, "put_node(%d:%d): %d\n", node->tree->cnid, node->this, atomic_read(&node->refcnt)); - if (!atomic_read(&node->refcnt)) - BUG(); + BUG_ON(!atomic_read(&node->refcnt)); if (!atomic_dec_and_lock(&node->refcnt, &tree->hash_lock)) return; for (i = 0; i < tree->pages_per_bnode; i++) { diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c index a67edfa34e..effa899199 100644 --- a/fs/hfsplus/btree.c +++ b/fs/hfsplus/btree.c @@ -269,8 +269,7 @@ void hfs_bmap_free(struct hfs_bnode *node) u8 *data, byte, m; dprint(DBG_BNODE_MOD, "btree_free_node: %u\n", node->this); - if (!node->this) - BUG(); + BUG_ON(!node->this); tree = node->tree; nidx = node->this; node = hfs_bnode_find(tree, 0); -- cgit v1.2.2 From 4b4d1cc7336b29f766d4e59d1ed2c627443a694a Mon Sep 17 00:00:00 2001 From: Eric Sesterhenn Date: Sat, 1 Apr 2006 01:15:35 +0200 Subject: BUG_ON() Conversion in fs/jffs2/ this changes if() BUG(); constructs to BUG_ON() which is cleaner, contains unlikely() and can better optimized away. Signed-off-by: Eric Sesterhenn Signed-off-by: Adrian Bunk --- fs/jffs2/background.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c index 7b77a95411..ff2a872e80 100644 --- a/fs/jffs2/background.c +++ b/fs/jffs2/background.c @@ -35,8 +35,7 @@ int jffs2_start_garbage_collect_thread(struct jffs2_sb_info *c) pid_t pid; int ret = 0; - if (c->gc_task) - BUG(); + BUG_ON(c->gc_task); init_completion(&c->gc_thread_start); init_completion(&c->gc_thread_exit); -- cgit v1.2.2 From 5df0d312413d920628f149421d7b0a3994684620 Mon Sep 17 00:00:00 2001 From: Eric Sesterhenn Date: Sat, 1 Apr 2006 01:16:26 +0200 Subject: BUG_ON() Conversion in fs/smbfs/ this changes if() BUG(); constructs to BUG_ON() which is cleaner, contains unlikely() and can better optimized away. Signed-off-by: Eric Sesterhenn Signed-off-by: Adrian Bunk --- fs/smbfs/file.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c index c56bd99a97..ed9a24d19d 100644 --- a/fs/smbfs/file.c +++ b/fs/smbfs/file.c @@ -178,11 +178,9 @@ smb_writepage(struct page *page, struct writeback_control *wbc) unsigned offset = PAGE_CACHE_SIZE; int err; - if (!mapping) - BUG(); + BUG_ON(!mapping); inode = mapping->host; - if (!inode) - BUG(); + BUG_ON(!inode); end_index = inode->i_size >> PAGE_CACHE_SHIFT; -- cgit v1.2.2 From 99cee0cd7560fc4e7f3646ee18d90e328bd1cb32 Mon Sep 17 00:00:00 2001 From: Eric Sesterhenn Date: Sat, 1 Apr 2006 01:18:38 +0200 Subject: BUG_ON() Conversion in fs/sysfs/ this changes if() BUG(); constructs to BUG_ON() which is cleaner, contains unlikely() and can better optimized away. Signed-off-by: Eric Sesterhenn Signed-off-by: Adrian Bunk --- fs/sysfs/inode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index 4c29ac41ac..f0b347bd12 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -175,8 +175,7 @@ const unsigned char * sysfs_get_name(struct sysfs_dirent *sd) struct bin_attribute * bin_attr; struct sysfs_symlink * sl; - if (!sd || !sd->s_element) - BUG(); + BUG_ON(!sd || !sd->s_element); switch (sd->s_type) { case SYSFS_DIR: -- cgit v1.2.2 From 8abf6a4707cfb95ca552b882959c6f8ff9924270 Mon Sep 17 00:00:00 2001 From: Eric Sesterhenn Date: Sun, 2 Apr 2006 13:36:13 +0200 Subject: BUG_ON() Conversion in fs/dquot.c this changes if() BUG(); constructs to BUG_ON() which is cleaner and can better optimized away Signed-off-by: Eric Sesterhenn Signed-off-by: Adrian Bunk --- fs/dquot.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/dquot.c b/fs/dquot.c index 6b38869209..81d87a413c 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -590,8 +590,7 @@ we_slept: atomic_dec(&dquot->dq_count); #ifdef __DQUOT_PARANOIA /* sanity check */ - if (!list_empty(&dquot->dq_free)) - BUG(); + BUG_ON(!list_empty(&dquot->dq_free)); #endif put_dquot_last(dquot); spin_unlock(&dq_list_lock); @@ -666,8 +665,7 @@ we_slept: return NODQUOT; } #ifdef __DQUOT_PARANOIA - if (!dquot->dq_sb) /* Has somebody invalidated entry under us? */ - BUG(); + BUG_ON(!dquot->dq_sb); /* Has somebody invalidated entry under us? */ #endif return dquot; -- cgit v1.2.2 From f6298aab2ebaa61de39931595f125bc1968905cc Mon Sep 17 00:00:00 2001 From: Eric Sesterhenn Date: Sun, 2 Apr 2006 13:37:19 +0200 Subject: BUG_ON() Conversion in fs/fcntl.c this changes if() BUG(); constructs to BUG_ON() which is cleaner and can better optimized away Signed-off-by: Eric Sesterhenn Signed-off-by: Adrian Bunk --- fs/fcntl.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/fcntl.c b/fs/fcntl.c index 2a2479196f..d35cbc6bc1 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -453,8 +453,7 @@ static void send_sigio_to_task(struct task_struct *p, /* Make sure we are called with one of the POLL_* reasons, otherwise we could leak kernel stack into userspace. */ - if ((reason & __SI_MASK) != __SI_POLL) - BUG(); + BUG_ON((reason & __SI_MASK) != __SI_POLL); if (reason - POLL_IN >= NSIGPOLL) si.si_band = ~0L; else -- cgit v1.2.2 From b7542f8c7eb40efb967a558c5be90fe5f939c3ef Mon Sep 17 00:00:00 2001 From: Eric Sesterhenn Date: Sun, 2 Apr 2006 13:38:18 +0200 Subject: BUG_ON() Conversion in fs/inode.c this changes if() BUG(); constructs to BUG_ON() which is cleaner and can better optimized away Signed-off-by: Eric Sesterhenn Signed-off-by: Adrian Bunk --- fs/inode.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/inode.c b/fs/inode.c index 32b7c33750..3a2446a27d 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -172,8 +172,7 @@ static struct inode *alloc_inode(struct super_block *sb) void destroy_inode(struct inode *inode) { - if (inode_has_buffers(inode)) - BUG(); + BUG_ON(inode_has_buffers(inode)); security_inode_free(inode); if (inode->i_sb->s_op->destroy_inode) inode->i_sb->s_op->destroy_inode(inode); @@ -249,12 +248,9 @@ void clear_inode(struct inode *inode) might_sleep(); invalidate_inode_buffers(inode); - if (inode->i_data.nrpages) - BUG(); - if (!(inode->i_state & I_FREEING)) - BUG(); - if (inode->i_state & I_CLEAR) - BUG(); + BUG_ON(inode->i_data.nrpages); + BUG_ON(!(inode->i_state & I_FREEING)); + BUG_ON(inode->i_state & I_CLEAR); wait_on_inode(inode); DQUOT_DROP(inode); if (inode->i_sb && inode->i_sb->s_op->clear_inode) @@ -1054,8 +1050,7 @@ void generic_delete_inode(struct inode *inode) hlist_del_init(&inode->i_hash); spin_unlock(&inode_lock); wake_up_inode(inode); - if (inode->i_state != I_CLEAR) - BUG(); + BUG_ON(inode->i_state != I_CLEAR); destroy_inode(inode); } -- cgit v1.2.2 From d6735bfcc998863dab89dacca2aed20932b6bc21 Mon Sep 17 00:00:00 2001 From: Eric Sesterhenn Date: Sun, 2 Apr 2006 13:39:21 +0200 Subject: BUG_ON() Conversion in fs/sysv/ this changes if() BUG(); constructs to BUG_ON() which is cleaner, contains unlikely() and can better optimized away. Signed-off-by: Eric Sesterhenn Signed-off-by: Adrian Bunk --- fs/sysv/dir.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c index 8c66e9270d..d7074341ee 100644 --- a/fs/sysv/dir.c +++ b/fs/sysv/dir.c @@ -253,8 +253,7 @@ int sysv_delete_entry(struct sysv_dir_entry *de, struct page *page) lock_page(page); err = mapping->a_ops->prepare_write(NULL, page, from, to); - if (err) - BUG(); + BUG_ON(err); de->inode = 0; err = dir_commit_chunk(page, from, to); dir_put_page(page); @@ -353,8 +352,7 @@ void sysv_set_link(struct sysv_dir_entry *de, struct page *page, lock_page(page); err = page->mapping->a_ops->prepare_write(NULL, page, from, to); - if (err) - BUG(); + BUG_ON(err); de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino); err = dir_commit_chunk(page, from, to); dir_put_page(page); -- cgit v1.2.2 From 2c2111c2bd821d3e7cf5a6a37a112a620fd947a3 Mon Sep 17 00:00:00 2001 From: Eric Sesterhenn Date: Sun, 2 Apr 2006 13:40:13 +0200 Subject: BUG_ON() Conversion in fs/udf/ this changes if() BUG(); constructs to BUG_ON() which is cleaner, contains unlikely() and can better optimized away. Signed-off-by: Eric Sesterhenn Signed-off-by: Adrian Bunk --- fs/udf/inode.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 81e0e8459a..2983afd5e7 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -312,12 +312,10 @@ static int udf_get_block(struct inode *inode, sector_t block, struct buffer_head err = 0; bh = inode_getblk(inode, block, &err, &phys, &new); - if (bh) - BUG(); + BUG_ON(bh); if (err) goto abort; - if (!phys) - BUG(); + BUG_ON(!phys); if (new) set_buffer_new(bh_result); -- cgit v1.2.2 From 7ec70738097af9dfd25d5f83e9b27a532f462912 Mon Sep 17 00:00:00 2001 From: Eric Sesterhenn Date: Sun, 2 Apr 2006 13:41:02 +0200 Subject: BUG_ON() Conversion in fs/freevxfs/ this changes if() BUG(); constructs to BUG_ON() which is cleaner, contains unlikely() and can better optimized away. Signed-off-by: Eric Sesterhenn Signed-off-by: Adrian Bunk --- fs/freevxfs/vxfs_olt.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/freevxfs/vxfs_olt.c b/fs/freevxfs/vxfs_olt.c index 76a0708ae9..0495008479 100644 --- a/fs/freevxfs/vxfs_olt.c +++ b/fs/freevxfs/vxfs_olt.c @@ -42,24 +42,21 @@ static inline void vxfs_get_fshead(struct vxfs_oltfshead *fshp, struct vxfs_sb_info *infp) { - if (infp->vsi_fshino) - BUG(); + BUG_ON(infp->vsi_fshino); infp->vsi_fshino = fshp->olt_fsino[0]; } static inline void vxfs_get_ilist(struct vxfs_oltilist *ilistp, struct vxfs_sb_info *infp) { - if (infp->vsi_iext) - BUG(); + BUG_ON(infp->vsi_iext); infp->vsi_iext = ilistp->olt_iext[0]; } static inline u_long vxfs_oblock(struct super_block *sbp, daddr_t block, u_long bsize) { - if (sbp->s_blocksize % bsize) - BUG(); + BUG_ON(sbp->s_blocksize % bsize); return (block * (sbp->s_blocksize / bsize)); } -- cgit v1.2.2 From a580290c3e64bb695158a090d02d1232d9609311 Mon Sep 17 00:00:00 2001 From: Martin Waitz Date: Sun, 2 Apr 2006 13:59:55 +0200 Subject: Documentation: fix minor kernel-doc warnings This patch updates the comments to match the actual code. Signed-off-by: Martin Waitz Signed-off-by: Adrian Bunk --- fs/sysfs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index f26880a478..6cfdc9a877 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -50,7 +50,7 @@ static struct sysfs_dirent * sysfs_new_dirent(struct sysfs_dirent * parent_sd, return sd; } -/** +/* * * Return -EEXIST if there is already a sysfs element with the same name for * the same parent. -- cgit v1.2.2 From 29e350944fdc2dfca102500790d8ad6d6ff4f69d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 2 Apr 2006 12:46:35 -0700 Subject: splice: add SPLICE_F_NONBLOCK flag It doesn't make the splice itself necessarily nonblocking (because the actual file descriptors that are spliced from/to may block unless they have the O_NONBLOCK flag set), but it makes the splice pipe operations nonblocking. Signed-off-by: Linus Torvalds --- fs/splice.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/splice.c b/fs/splice.c index 7c2bbf18d7..6081cf7d2d 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -106,7 +106,7 @@ static struct pipe_buf_operations page_cache_pipe_buf_ops = { static ssize_t move_to_pipe(struct inode *inode, struct page **pages, int nr_pages, unsigned long offset, - unsigned long len) + unsigned long len, unsigned int flags) { struct pipe_inode_info *info; int ret, do_wakeup, i; @@ -159,6 +159,12 @@ static ssize_t move_to_pipe(struct inode *inode, struct page **pages, break; } + if (flags & SPLICE_F_NONBLOCK) { + if (!ret) + ret = -EAGAIN; + break; + } + if (signal_pending(current)) { if (!ret) ret = -ERESTARTSYS; @@ -191,7 +197,7 @@ static ssize_t move_to_pipe(struct inode *inode, struct page **pages, } static int __generic_file_splice_read(struct file *in, struct inode *pipe, - size_t len) + size_t len, unsigned int flags) { struct address_space *mapping = in->f_mapping; unsigned int offset, nr_pages; @@ -279,7 +285,7 @@ static int __generic_file_splice_read(struct file *in, struct inode *pipe, * Now we splice them into the pipe.. */ splice_them: - return move_to_pipe(pipe, pages, i, offset, len); + return move_to_pipe(pipe, pages, i, offset, len, flags); } ssize_t generic_file_splice_read(struct file *in, struct inode *pipe, @@ -291,7 +297,7 @@ ssize_t generic_file_splice_read(struct file *in, struct inode *pipe, ret = 0; spliced = 0; while (len) { - ret = __generic_file_splice_read(in, pipe, len); + ret = __generic_file_splice_read(in, pipe, len, flags); if (ret <= 0) break; @@ -299,6 +305,11 @@ ssize_t generic_file_splice_read(struct file *in, struct inode *pipe, in->f_pos += ret; len -= ret; spliced += ret; + + if (!(flags & SPLICE_F_NONBLOCK)) + continue; + ret = -EAGAIN; + break; } if (spliced) @@ -527,6 +538,12 @@ static ssize_t move_from_pipe(struct inode *inode, struct file *out, break; } + if (flags & SPLICE_F_NONBLOCK) { + if (!ret) + ret = -EAGAIN; + break; + } + if (signal_pending(current)) { if (!ret) ret = -ERESTARTSYS; -- cgit v1.2.2 From 6e0dd741a89be35defa05bd79f4211c5a2762825 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 31 Mar 2006 15:37:06 -0800 Subject: [PATCH] sysfs: zero terminate sysfs write buffers No one should be writing a PAGE_SIZE worth of data to a normal sysfs file, so properly terminate the buffer. Thanks to Al Viro for pointing out my supidity here. Signed-off-by: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- fs/sysfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 830f76fa09..f1cb1ddde5 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -183,7 +183,7 @@ fill_write_buffer(struct sysfs_buffer * buffer, const char __user * buf, size_t return -ENOMEM; if (count >= PAGE_SIZE) - count = PAGE_SIZE; + count = PAGE_SIZE - 1; error = copy_from_user(buffer->page,buf,count); buffer->needs_read_fill = 1; return error ? -EFAULT : count; -- cgit v1.2.2 From 53cd9ae886273d6c2b8ba4aa63d6cd6b1217b57f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 2 Apr 2006 23:04:21 +0200 Subject: [PATCH] splice: fix shadow[] filling logic Clear the entire range, and don't increment pidx or we keep filling the same position again and again. Thanks to KAMEZAWA Hiroyuki. Signed-off-by: Jens Axboe --- fs/splice.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/splice.c b/fs/splice.c index 6081cf7d2d..a555d0a83f 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -237,9 +237,9 @@ static int __generic_file_splice_read(struct file *in, struct inode *pipe, * fill shadow[] with pages at the right locations, so we only * have to fill holes */ - memset(shadow, 0, i * sizeof(struct page *)); - for (j = 0, pidx = index; j < i; pidx++, j++) - shadow[pages[j]->index - pidx] = pages[j]; + memset(shadow, 0, nr_pages * sizeof(struct page *)); + for (j = 0; j < i; j++) + shadow[pages[j]->index - index] = pages[j]; /* * now fill in the holes -- cgit v1.2.2 From 4f6f0bd2ffa4e31c3524f5e65c84a29b6ab73307 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 2 Apr 2006 23:04:46 +0200 Subject: [PATCH] splice: improve writeback and clean up page stealing By cleaning up the writeback logic (killing write_one_page() and the manual set_page_dirty()), we can get rid of ->stolen inside the pipe_buffer and just keep it local in pipe_to_file(). This also adds dirty page balancing logic and O_SYNC handling. Signed-off-by: Jens Axboe --- fs/pipe.c | 1 - fs/splice.c | 64 +++++++++++++++++++++++++++++++++++++++++++++---------------- 2 files changed, 48 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/pipe.c b/fs/pipe.c index 109a102c15..5093408746 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -124,7 +124,6 @@ static void anon_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer static int anon_pipe_buf_steal(struct pipe_inode_info *info, struct pipe_buffer *buf) { - buf->stolen = 1; return 0; } diff --git a/fs/splice.c b/fs/splice.c index a555d0a83f..07f4d863c2 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -22,7 +22,10 @@ #include #include #include +#include +#include #include +#include /* * Passed to the actors @@ -38,11 +41,15 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, struct pipe_buffer *buf) { struct page *page = buf->page; + struct address_space *mapping = page_mapping(page); WARN_ON(!PageLocked(page)); WARN_ON(!PageUptodate(page)); - if (!remove_mapping(page_mapping(page), page)) + if (PagePrivate(page)) + try_to_release_page(page, mapping_gfp_mask(mapping)); + + if (!remove_mapping(mapping, page)) return 1; if (PageLRU(page)) { @@ -55,7 +62,6 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, spin_unlock_irq(&zone->lru_lock); } - buf->stolen = 1; return 0; } @@ -64,7 +70,6 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *info, { page_cache_release(buf->page); buf->page = NULL; - buf->stolen = 0; } static void *page_cache_pipe_buf_map(struct file *file, @@ -91,8 +96,7 @@ static void *page_cache_pipe_buf_map(struct file *file, static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer *buf) { - if (!buf->stolen) - unlock_page(buf->page); + unlock_page(buf->page); kunmap(buf->page); } @@ -319,7 +323,8 @@ ssize_t generic_file_splice_read(struct file *in, struct inode *pipe, } /* - * Send 'len' bytes to socket from 'file' at position 'pos' using sendpage(). + * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' + * using sendpage(). */ static int pipe_to_sendpage(struct pipe_inode_info *info, struct pipe_buffer *buf, struct splice_desc *sd) @@ -379,7 +384,7 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, struct page *page; pgoff_t index; char *src; - int ret; + int ret, stolen; /* * after this, page will be locked and unmapped @@ -390,6 +395,7 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, index = sd->pos >> PAGE_CACHE_SHIFT; offset = sd->pos & ~PAGE_CACHE_MASK; + stolen = 0; /* * reuse buf page, if SPLICE_F_MOVE is set @@ -399,6 +405,7 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, goto find_page; page = buf->page; + stolen = 1; if (add_to_page_cache_lru(page, mapping, index, mapping_gfp_mask(mapping))) goto find_page; @@ -443,10 +450,13 @@ find_page: } ret = mapping->a_ops->prepare_write(file, page, 0, sd->len); - if (ret) + if (ret == AOP_TRUNCATED_PAGE) { + page_cache_release(page); + goto find_page; + } else if (ret) goto out; - if (!buf->stolen) { + if (!stolen) { char *dst = kmap_atomic(page, KM_USER0); memcpy(dst + offset, src + buf->offset, sd->len); @@ -455,16 +465,18 @@ find_page: } ret = mapping->a_ops->commit_write(file, page, 0, sd->len); - if (ret < 0) + if (ret == AOP_TRUNCATED_PAGE) { + page_cache_release(page); + goto find_page; + } else if (ret) goto out; - set_page_dirty(page); - ret = write_one_page(page, 0); + balance_dirty_pages_ratelimited(mapping); out: - if (ret < 0) - unlock_page(page); - if (!buf->stolen) + if (!stolen) { page_cache_release(page); + unlock_page(page); + } buf->ops->unmap(info, buf); return ret; } @@ -576,7 +588,27 @@ static ssize_t move_from_pipe(struct inode *inode, struct file *out, ssize_t generic_file_splice_write(struct inode *inode, struct file *out, size_t len, unsigned int flags) { - return move_from_pipe(inode, out, len, flags, pipe_to_file); + struct address_space *mapping = out->f_mapping; + ssize_t ret = move_from_pipe(inode, out, len, flags, pipe_to_file); + + /* + * if file or inode is SYNC and we actually wrote some data, sync it + */ + if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(mapping->host)) + && ret > 0) { + struct inode *inode = mapping->host; + int err; + + mutex_lock(&inode->i_mutex); + err = generic_osync_inode(mapping->host, mapping, + OSYNC_METADATA|OSYNC_DATA); + mutex_unlock(&inode->i_mutex); + + if (err) + ret = err; + } + + return ret; } ssize_t generic_splice_sendpage(struct inode *inode, struct file *out, -- cgit v1.2.2 From 83f9135bddffded9f1716519b6c147bcf046c87e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 2 Apr 2006 23:05:09 +0200 Subject: [PATCH] splice: add comments documenting more of the code Hopefully this will make Andrew a little more happy. Signed-off-by: Jens Axboe --- fs/splice.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 66 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/splice.c b/fs/splice.c index 07f4d863c2..34591924c7 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -37,6 +37,12 @@ struct splice_desc { loff_t pos; /* file position */ }; +/* + * Attempt to steal a page from a pipe buffer. This should perhaps go into + * a vm helper function, it's already simplified quite a bit by the + * addition of remove_mapping(). If success is returned, the caller may + * attempt to reuse this page for another destination. + */ static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, struct pipe_buffer *buf) { @@ -108,6 +114,10 @@ static struct pipe_buf_operations page_cache_pipe_buf_ops = { .steal = page_cache_pipe_buf_steal, }; +/* + * Pipe output worker. This sets up our pipe format with the page cache + * pipe buffer operations. Otherwise very similar to the regular pipe_writev(). + */ static ssize_t move_to_pipe(struct inode *inode, struct page **pages, int nr_pages, unsigned long offset, unsigned long len, unsigned int flags) @@ -292,6 +302,16 @@ splice_them: return move_to_pipe(pipe, pages, i, offset, len, flags); } +/** + * generic_file_splice_read - splice data from file to a pipe + * @in: file to splice from + * @pipe: pipe to splice to + * @len: number of bytes to splice + * @flags: splice modifier flags + * + * Will read pages from given file and fill them into a pipe. + * + */ ssize_t generic_file_splice_read(struct file *in, struct inode *pipe, size_t len, unsigned int flags) { @@ -370,10 +390,12 @@ static int pipe_to_sendpage(struct pipe_inode_info *info, * - Destination page does not exist, we can add the pipe page to * the page cache and avoid the copy. * - * For now we just do the slower thing and always copy pages over, it's - * easier than migrating pages from the pipe to the target file. For the - * case of doing file | file splicing, the migrate approach had some LRU - * nastiness... + * If asked to move pages to the output file (SPLICE_F_MOVE is set in + * sd->flags), we attempt to migrate pages from the pipe to the output + * file address space page cache. This is possible if no one else has + * the pipe page referenced outside of the pipe and page cache. If + * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create + * a new page in the output file page cache and fill/dirty that. */ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, struct splice_desc *sd) @@ -401,6 +423,10 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, * reuse buf page, if SPLICE_F_MOVE is set */ if (sd->flags & SPLICE_F_MOVE) { + /* + * If steal succeeds, buf->page is now pruned from the vm + * side (LRU and page cache) and we can reuse it. + */ if (buf->ops->steal(info, buf)) goto find_page; @@ -484,6 +510,11 @@ out: typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *, struct splice_desc *); +/* + * Pipe input worker. Most of this logic works like a regular pipe, the + * key here is the 'actor' worker passed in that actually moves the data + * to the wanted destination. See pipe_to_file/pipe_to_sendpage above. + */ static ssize_t move_from_pipe(struct inode *inode, struct file *out, size_t len, unsigned int flags, splice_actor *actor) @@ -585,6 +616,17 @@ static ssize_t move_from_pipe(struct inode *inode, struct file *out, } +/** + * generic_file_splice_write - splice data from a pipe to a file + * @inode: pipe inode + * @out: file to write to + * @len: number of bytes to splice + * @flags: splice modifier flags + * + * Will either move or copy pages (determined by @flags options) from + * the given pipe inode to the given file. + * + */ ssize_t generic_file_splice_write(struct inode *inode, struct file *out, size_t len, unsigned int flags) { @@ -611,6 +653,17 @@ ssize_t generic_file_splice_write(struct inode *inode, struct file *out, return ret; } +/** + * generic_splice_sendpage - splice data from a pipe to a socket + * @inode: pipe inode + * @out: socket to write to + * @len: number of bytes to splice + * @flags: splice modifier flags + * + * Will send @len bytes from the pipe to a network socket. No data copying + * is involved. + * + */ ssize_t generic_splice_sendpage(struct inode *inode, struct file *out, size_t len, unsigned int flags) { @@ -620,6 +673,9 @@ ssize_t generic_splice_sendpage(struct inode *inode, struct file *out, EXPORT_SYMBOL(generic_file_splice_write); EXPORT_SYMBOL(generic_file_splice_read); +/* + * Attempt to initiate a splice from pipe to file. + */ static long do_splice_from(struct inode *pipe, struct file *out, size_t len, unsigned int flags) { @@ -640,6 +696,9 @@ static long do_splice_from(struct inode *pipe, struct file *out, size_t len, return out->f_op->splice_write(pipe, out, len, flags); } +/* + * Attempt to initiate a splice from a file to a pipe. + */ static long do_splice_to(struct file *in, struct inode *pipe, size_t len, unsigned int flags) { @@ -668,6 +727,9 @@ static long do_splice_to(struct file *in, struct inode *pipe, size_t len, return in->f_op->splice_read(in, pipe, len, flags); } +/* + * Determine where to splice to/from. + */ static long do_splice(struct file *in, struct file *out, size_t len, unsigned int flags) { -- cgit v1.2.2 From b2b39fa478db6db89b7ccafb0649973845b0eb75 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 2 Apr 2006 23:05:41 +0200 Subject: [PATCH] splice: add a SPLICE_F_MORE flag This lets userspace indicate whether more data will be coming in a subsequent splice call. Signed-off-by: Jens Axboe --- fs/splice.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/splice.c b/fs/splice.c index 34591924c7..e845445589 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -354,6 +354,7 @@ static int pipe_to_sendpage(struct pipe_inode_info *info, unsigned int offset; ssize_t ret; void *ptr; + int more; /* * sub-optimal, but we are limited by the pipe ->map. we don't @@ -366,9 +367,9 @@ static int pipe_to_sendpage(struct pipe_inode_info *info, return PTR_ERR(ptr); offset = pos & ~PAGE_CACHE_MASK; + more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; - ret = file->f_op->sendpage(file, buf->page, offset, sd->len, &pos, - sd->len < sd->total_len); + ret = file->f_op->sendpage(file, buf->page, offset, sd->len, &pos,more); buf->ops->unmap(info, buf); if (ret == sd->len) -- cgit v1.2.2 From 059a8f3734a66cb2c41731083b0cbf836721485b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 2 Apr 2006 23:06:05 +0200 Subject: [PATCH] splice: export generic_splice_sendpage Forgot that one, thanks Jeff. Also move the other EXPORT_SYMBOL to right below the functions. Signed-off-by: Jens Axboe --- fs/splice.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/splice.c b/fs/splice.c index e845445589..7c2abd4504 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -342,6 +342,8 @@ ssize_t generic_file_splice_read(struct file *in, struct inode *pipe, return ret; } +EXPORT_SYMBOL(generic_file_splice_read); + /* * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' * using sendpage(). @@ -654,6 +656,8 @@ ssize_t generic_file_splice_write(struct inode *inode, struct file *out, return ret; } +EXPORT_SYMBOL(generic_file_splice_write); + /** * generic_splice_sendpage - splice data from a pipe to a socket * @inode: pipe inode @@ -671,8 +675,7 @@ ssize_t generic_splice_sendpage(struct inode *inode, struct file *out, return move_from_pipe(inode, out, len, flags, pipe_to_sendpage); } -EXPORT_SYMBOL(generic_file_splice_write); -EXPORT_SYMBOL(generic_file_splice_read); +EXPORT_SYMBOL(generic_splice_sendpage); /* * Attempt to initiate a splice from pipe to file. -- cgit v1.2.2 From ad8d6f0a783ffa2ff9b0cf09910b889715772201 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 2 Apr 2006 23:10:32 +0200 Subject: [PATCH] splice: page stealing needs to wait_on_page_writeback() Thanks to Andrew for the good explanation of why this is so. akpm writes: If a page is under writeback and we remove it from pagecache, it's still going to get written to disk. But the VFS no longer knows about that page, nor that this page is about to modify disk blocks. So there might be scenarios in which those blocks-which-are-about-to-be-written-to get reused for something else. When writeback completes, it'll scribble on those blocks. This won't happen in ext2/ext3-style filesystems in normal mode because the page has buffers and try_to_release_page() will fail. But ext2 in nobh mode doesn't attach buffers at all - it just sticks the page in a BIO, finds some new blocks, points the BIO at those blocks and lets it rip. While that write IO's in flight, someone could truncate the file. Truncate won't block on the writeout because the page isn't in pagecache any more. So truncate will the free the blocks from the file under the page's feet. Then something else can reallocate those blocks. Then write data to them. Now, the original write completes, corrupting the filesystem. Signed-off-by: Jens Axboe --- fs/splice.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs') diff --git a/fs/splice.c b/fs/splice.c index 7c2abd4504..b5fb2f3e3a 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -52,6 +52,15 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, WARN_ON(!PageLocked(page)); WARN_ON(!PageUptodate(page)); + /* + * At least for ext2 with nobh option, we need to wait on writeback + * completing on this page, since we'll remove it from the pagecache. + * Otherwise truncate wont wait on the page, allowing the disk + * blocks to be reused by someone else before we actually wrote our + * data to them. fs corruption ensues. + */ + wait_on_page_writeback(page); + if (PagePrivate(page)) try_to_release_page(page, mapping_gfp_mask(mapping)); -- cgit v1.2.2 From 3e7ee3e7b36fa4e2d88d8fb0a2577be95fc4636d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 2 Apr 2006 23:11:04 +0200 Subject: [PATCH] splice: fix page stealing LRU handling. Originally from Nick Piggin, just adapted to the newer branch. You can't check PageLRU without holding zone->lru_lock. The page release code can get away with it only because the page refcount is 0 at that point. Also, you can't reliably remove pages from the LRU unless the refcount is 0. Ever. Signed-off-by: Nick Piggin Signed-off-by: Jens Axboe --- fs/pipe.c | 3 +++ fs/splice.c | 30 +++++++++++------------------- 2 files changed, 14 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/pipe.c b/fs/pipe.c index 5093408746..795df987cd 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -95,6 +95,8 @@ static void anon_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buff { struct page *page = buf->page; + buf->flags &= ~PIPE_BUF_FLAG_STOLEN; + /* * If nobody else uses this page, and we don't already have a * temporary page, let's keep track of it as a one-deep @@ -124,6 +126,7 @@ static void anon_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer static int anon_pipe_buf_steal(struct pipe_inode_info *info, struct pipe_buffer *buf) { + buf->flags |= PIPE_BUF_FLAG_STOLEN; return 0; } diff --git a/fs/splice.c b/fs/splice.c index b5fb2f3e3a..bfa42a277b 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -67,16 +67,7 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, if (!remove_mapping(mapping, page)) return 1; - if (PageLRU(page)) { - struct zone *zone = page_zone(page); - - spin_lock_irq(&zone->lru_lock); - BUG_ON(!PageLRU(page)); - __ClearPageLRU(page); - del_page_from_lru(zone, page); - spin_unlock_irq(&zone->lru_lock); - } - + buf->flags |= PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU; return 0; } @@ -85,6 +76,7 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *info, { page_cache_release(buf->page); buf->page = NULL; + buf->flags &= ~(PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU); } static void *page_cache_pipe_buf_map(struct file *file, @@ -414,11 +406,12 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, { struct file *file = sd->file; struct address_space *mapping = file->f_mapping; + gfp_t gfp_mask = mapping_gfp_mask(mapping); unsigned int offset; struct page *page; pgoff_t index; char *src; - int ret, stolen; + int ret; /* * after this, page will be locked and unmapped @@ -429,7 +422,6 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, index = sd->pos >> PAGE_CACHE_SHIFT; offset = sd->pos & ~PAGE_CACHE_MASK; - stolen = 0; /* * reuse buf page, if SPLICE_F_MOVE is set @@ -443,15 +435,15 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, goto find_page; page = buf->page; - stolen = 1; - if (add_to_page_cache_lru(page, mapping, index, - mapping_gfp_mask(mapping))) + if (add_to_page_cache(page, mapping, index, gfp_mask)) goto find_page; + + if (!(buf->flags & PIPE_BUF_FLAG_LRU)) + lru_cache_add(page); } else { find_page: ret = -ENOMEM; - page = find_or_create_page(mapping, index, - mapping_gfp_mask(mapping)); + page = find_or_create_page(mapping, index, gfp_mask); if (!page) goto out; @@ -494,7 +486,7 @@ find_page: } else if (ret) goto out; - if (!stolen) { + if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { char *dst = kmap_atomic(page, KM_USER0); memcpy(dst + offset, src + buf->offset, sd->len); @@ -511,7 +503,7 @@ find_page: balance_dirty_pages_ratelimited(mapping); out: - if (!stolen) { + if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { page_cache_release(page); unlock_page(page); } -- cgit v1.2.2