aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Chinner <dgc@sgi.com>2005-11-01 18:33:05 -0500
committerNathan Scott <nathans@sgi.com>2005-11-01 18:33:05 -0500
commite8c8b3a79d85c22d3665b97dde843dc4d8d7ae37 (patch)
treea82b58ebc5c40474cdc53c91bd135b97034de565
parentee34807a65aa0c5911dc27682863afca780a003e (diff)
[XFS] Introduce two new mount options (nolargeio/largeio) to allow
filesystems to expose the filesystem stripe width in stat(2) rather than the page cache size. This allows applications requiring high bandwidth to easily determine the optimum I/O size for the underlying filesystem. The default is to report the page cache size (i.e. "nolargeio"). SGI-PV: 942818 SGI-Modid: xfs-linux:xfs-kern:23830a Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Nathan Scott <nathans@sgi.com>
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.c1
-rw-r--r--fs/xfs/xfs_clnt.h2
-rw-r--r--fs/xfs/xfs_mount.h27
-rw-r--r--fs/xfs/xfs_vfsops.c11
-rw-r--r--fs/xfs/xfs_vnodeops.c35
6 files changed, 43 insertions, 35 deletions
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index fa87279405d8..f6f6b6750d4f 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -189,7 +189,7 @@ xfs_revalidate_inode(
189 break; 189 break;
190 } 190 }
191 191
192 inode->i_blksize = PAGE_CACHE_SIZE; 192 inode->i_blksize = xfs_preferred_iosize(mp);
193 inode->i_generation = ip->i_d.di_gen; 193 inode->i_generation = ip->i_d.di_gen;
194 i_size_write(inode, ip->i_d.di_size); 194 i_size_write(inode, ip->i_d.di_size);
195 inode->i_blocks = 195 inode->i_blocks =
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index 268f45bf6a9a..61999649ecef 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -124,6 +124,7 @@ vn_revalidate_core(
124 inode->i_mtime = vap->va_mtime; 124 inode->i_mtime = vap->va_mtime;
125 inode->i_ctime = vap->va_ctime; 125 inode->i_ctime = vap->va_ctime;
126 inode->i_atime = vap->va_atime; 126 inode->i_atime = vap->va_atime;
127 inode->i_blksize = vap->va_blocksize;
127 if (vap->va_xflags & XFS_XFLAG_IMMUTABLE) 128 if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
128 inode->i_flags |= S_IMMUTABLE; 129 inode->i_flags |= S_IMMUTABLE;
129 else 130 else
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
index c93cb282f3d8..90d9d56c4dc8 100644
--- a/fs/xfs/xfs_clnt.h
+++ b/fs/xfs/xfs_clnt.h
@@ -106,5 +106,7 @@ struct xfs_mount_args {
106#define XFSMNT_IHASHSIZE 0x20000000 /* inode hash table size */ 106#define XFSMNT_IHASHSIZE 0x20000000 /* inode hash table size */
107#define XFSMNT_DIRSYNC 0x40000000 /* sync creat,link,unlink,rename 107#define XFSMNT_DIRSYNC 0x40000000 /* sync creat,link,unlink,rename
108 * symlink,mkdir,rmdir,mknod */ 108 * symlink,mkdir,rmdir,mknod */
109#define XFSMNT_COMPAT_IOSIZE 0x80000000 /* don't report large preferred
110 * I/O size in stat() */
109 111
110#endif /* __XFS_CLNT_H__ */ 112#endif /* __XFS_CLNT_H__ */
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 0653beecf93a..b71af184aea6 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -421,6 +421,9 @@ typedef struct xfs_mount {
421 * allocation */ 421 * allocation */
422#define XFS_MOUNT_IHASHSIZE 0x00100000 /* inode hash table size */ 422#define XFS_MOUNT_IHASHSIZE 0x00100000 /* inode hash table size */
423#define XFS_MOUNT_DIRSYNC 0x00200000 /* synchronous directory ops */ 423#define XFS_MOUNT_DIRSYNC 0x00200000 /* synchronous directory ops */
424#define XFS_MOUNT_COMPAT_IOSIZE 0x00400000 /* don't report large preferred
425 * I/O size in stat() */
426
424 427
425/* 428/*
426 * Default minimum read and write sizes. 429 * Default minimum read and write sizes.
@@ -442,6 +445,30 @@ typedef struct xfs_mount {
442#define XFS_WSYNC_READIO_LOG 15 /* 32K */ 445#define XFS_WSYNC_READIO_LOG 15 /* 32K */
443#define XFS_WSYNC_WRITEIO_LOG 14 /* 16K */ 446#define XFS_WSYNC_WRITEIO_LOG 14 /* 16K */
444 447
448/*
449 * Allow large block sizes to be reported to userspace programs if the
450 * "largeio" mount option is used.
451 *
452 * If compatibility mode is specified, simply return the basic unit of caching
453 * so that we don't get inefficient read/modify/write I/O from user apps.
454 * Otherwise....
455 *
456 * If the underlying volume is a stripe, then return the stripe width in bytes
457 * as the recommended I/O size. It is not a stripe and we've set a default
458 * buffered I/O size, return that, otherwise return the compat default.
459 */
460static inline unsigned long
461xfs_preferred_iosize(xfs_mount_t *mp)
462{
463 if (mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE)
464 return PAGE_CACHE_SIZE;
465 return (mp->m_swidth ?
466 (mp->m_swidth << mp->m_sb.sb_blocklog) :
467 ((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) ?
468 (1 << (int)MAX(mp->m_readio_log, mp->m_writeio_log)) :
469 PAGE_CACHE_SIZE));
470}
471
445#define XFS_MAXIOFFSET(mp) ((mp)->m_maxioffset) 472#define XFS_MAXIOFFSET(mp) ((mp)->m_maxioffset)
446 473
447#define XFS_FORCED_SHUTDOWN(mp) ((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN) 474#define XFS_FORCED_SHUTDOWN(mp) ((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN)
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 9142351df515..7227baee8994 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -307,6 +307,9 @@ xfs_start_flags(
307 if (ap->flags & XFSMNT_DIRSYNC) 307 if (ap->flags & XFSMNT_DIRSYNC)
308 mp->m_flags |= XFS_MOUNT_DIRSYNC; 308 mp->m_flags |= XFS_MOUNT_DIRSYNC;
309 309
310 if (ap->flags & XFSMNT_COMPAT_IOSIZE)
311 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
312
310 /* 313 /*
311 * no recovery flag requires a read-only mount 314 * no recovery flag requires a read-only mount
312 */ 315 */
@@ -1645,6 +1648,9 @@ xfs_vget(
1645#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */ 1648#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */
1646#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */ 1649#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */
1647#define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */ 1650#define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */
1651#define MNTOPT_LARGEIO "largeio" /* report large I/O sizes in stat() */
1652#define MNTOPT_NOLARGEIO "nolargeio" /* do not report large I/O sizes
1653 * in stat(). */
1648 1654
1649STATIC unsigned long 1655STATIC unsigned long
1650suffix_strtoul(const char *cp, char **endp, unsigned int base) 1656suffix_strtoul(const char *cp, char **endp, unsigned int base)
@@ -1681,6 +1687,7 @@ xfs_parseargs(
1681 int dsunit, dswidth, vol_dsunit, vol_dswidth; 1687 int dsunit, dswidth, vol_dsunit, vol_dswidth;
1682 int iosize; 1688 int iosize;
1683 1689
1690 args->flags |= XFSMNT_COMPAT_IOSIZE;
1684#if 0 /* XXX: off by default, until some remaining issues ironed out */ 1691#if 0 /* XXX: off by default, until some remaining issues ironed out */
1685 args->flags |= XFSMNT_IDELETE; /* default to on */ 1692 args->flags |= XFSMNT_IDELETE; /* default to on */
1686#endif 1693#endif
@@ -1809,6 +1816,10 @@ xfs_parseargs(
1809 args->flags &= ~XFSMNT_IDELETE; 1816 args->flags &= ~XFSMNT_IDELETE;
1810 } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) { 1817 } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
1811 args->flags |= XFSMNT_IDELETE; 1818 args->flags |= XFSMNT_IDELETE;
1819 } else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
1820 args->flags &= ~XFSMNT_COMPAT_IOSIZE;
1821 } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
1822 args->flags |= XFSMNT_COMPAT_IOSIZE;
1812 } else if (!strcmp(this_char, "osyncisdsync")) { 1823 } else if (!strcmp(this_char, "osyncisdsync")) {
1813 /* no-op, this is now the default */ 1824 /* no-op, this is now the default */
1814printk("XFS: osyncisdsync is now the default, option is deprecated.\n"); 1825printk("XFS: osyncisdsync is now the default, option is deprecated.\n");
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index e2bf2ef58b66..8221b11a48c0 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -181,40 +181,7 @@ xfs_getattr(
181 vap->va_rdev = 0; 181 vap->va_rdev = 0;
182 182
183 if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) { 183 if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) {
184 184 vap->va_blocksize = xfs_preferred_iosize(mp);
185#if 0
186 /* Large block sizes confuse various
187 * user space programs, so letting the
188 * stripe size through is not a good
189 * idea for now.
190 */
191 vap->va_blocksize = mp->m_swidth ?
192 /*
193 * If the underlying volume is a stripe, then
194 * return the stripe width in bytes as the
195 * recommended I/O size.
196 */
197 (mp->m_swidth << mp->m_sb.sb_blocklog) :
198 /*
199 * Return the largest of the preferred buffer
200 * sizes since doing small I/Os into larger
201 * buffers causes buffers to be decommissioned.
202 * The value returned is in bytes.
203 */
204 (1 << (int)MAX(mp->m_readio_log,
205 mp->m_writeio_log));
206
207#else
208 vap->va_blocksize =
209 /*
210 * Return the largest of the preferred buffer
211 * sizes since doing small I/Os into larger
212 * buffers causes buffers to be decommissioned.
213 * The value returned is in bytes.
214 */
215 1 << (int)MAX(mp->m_readio_log,
216 mp->m_writeio_log);
217#endif
218 } else { 185 } else {
219 186
220 /* 187 /*