diff options
author | David Chinner <dgc@sgi.com> | 2005-11-01 18:33:05 -0500 |
---|---|---|
committer | Nathan Scott <nathans@sgi.com> | 2005-11-01 18:33:05 -0500 |
commit | e8c8b3a79d85c22d3665b97dde843dc4d8d7ae37 (patch) | |
tree | a82b58ebc5c40474cdc53c91bd135b97034de565 | |
parent | ee34807a65aa0c5911dc27682863afca780a003e (diff) |
[XFS] Introduce two new mount options (nolargeio/largeio) to allow
filesystems to expose the filesystem stripe width in stat(2) rather than
the page cache size. This allows applications requiring high bandwidth to
easily determine the optimum I/O size for the underlying filesystem. The
default is to report the page cache size (i.e. "nolargeio").
SGI-PV: 942818
SGI-Modid: xfs-linux:xfs-kern:23830a
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
-rw-r--r-- | fs/xfs/linux-2.6/xfs_super.c | 2 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_vnode.c | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_clnt.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.h | 27 | ||||
-rw-r--r-- | fs/xfs/xfs_vfsops.c | 11 | ||||
-rw-r--r-- | fs/xfs/xfs_vnodeops.c | 35 |
6 files changed, 43 insertions, 35 deletions
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index fa87279405d8..f6f6b6750d4f 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -189,7 +189,7 @@ xfs_revalidate_inode( | |||
189 | break; | 189 | break; |
190 | } | 190 | } |
191 | 191 | ||
192 | inode->i_blksize = PAGE_CACHE_SIZE; | 192 | inode->i_blksize = xfs_preferred_iosize(mp); |
193 | inode->i_generation = ip->i_d.di_gen; | 193 | inode->i_generation = ip->i_d.di_gen; |
194 | i_size_write(inode, ip->i_d.di_size); | 194 | i_size_write(inode, ip->i_d.di_size); |
195 | inode->i_blocks = | 195 | inode->i_blocks = |
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index 268f45bf6a9a..61999649ecef 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c | |||
@@ -124,6 +124,7 @@ vn_revalidate_core( | |||
124 | inode->i_mtime = vap->va_mtime; | 124 | inode->i_mtime = vap->va_mtime; |
125 | inode->i_ctime = vap->va_ctime; | 125 | inode->i_ctime = vap->va_ctime; |
126 | inode->i_atime = vap->va_atime; | 126 | inode->i_atime = vap->va_atime; |
127 | inode->i_blksize = vap->va_blocksize; | ||
127 | if (vap->va_xflags & XFS_XFLAG_IMMUTABLE) | 128 | if (vap->va_xflags & XFS_XFLAG_IMMUTABLE) |
128 | inode->i_flags |= S_IMMUTABLE; | 129 | inode->i_flags |= S_IMMUTABLE; |
129 | else | 130 | else |
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h index c93cb282f3d8..90d9d56c4dc8 100644 --- a/fs/xfs/xfs_clnt.h +++ b/fs/xfs/xfs_clnt.h | |||
@@ -106,5 +106,7 @@ struct xfs_mount_args { | |||
106 | #define XFSMNT_IHASHSIZE 0x20000000 /* inode hash table size */ | 106 | #define XFSMNT_IHASHSIZE 0x20000000 /* inode hash table size */ |
107 | #define XFSMNT_DIRSYNC 0x40000000 /* sync creat,link,unlink,rename | 107 | #define XFSMNT_DIRSYNC 0x40000000 /* sync creat,link,unlink,rename |
108 | * symlink,mkdir,rmdir,mknod */ | 108 | * symlink,mkdir,rmdir,mknod */ |
109 | #define XFSMNT_COMPAT_IOSIZE 0x80000000 /* don't report large preferred | ||
110 | * I/O size in stat() */ | ||
109 | 111 | ||
110 | #endif /* __XFS_CLNT_H__ */ | 112 | #endif /* __XFS_CLNT_H__ */ |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 0653beecf93a..b71af184aea6 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -421,6 +421,9 @@ typedef struct xfs_mount { | |||
421 | * allocation */ | 421 | * allocation */ |
422 | #define XFS_MOUNT_IHASHSIZE 0x00100000 /* inode hash table size */ | 422 | #define XFS_MOUNT_IHASHSIZE 0x00100000 /* inode hash table size */ |
423 | #define XFS_MOUNT_DIRSYNC 0x00200000 /* synchronous directory ops */ | 423 | #define XFS_MOUNT_DIRSYNC 0x00200000 /* synchronous directory ops */ |
424 | #define XFS_MOUNT_COMPAT_IOSIZE 0x00400000 /* don't report large preferred | ||
425 | * I/O size in stat() */ | ||
426 | |||
424 | 427 | ||
425 | /* | 428 | /* |
426 | * Default minimum read and write sizes. | 429 | * Default minimum read and write sizes. |
@@ -442,6 +445,30 @@ typedef struct xfs_mount { | |||
442 | #define XFS_WSYNC_READIO_LOG 15 /* 32K */ | 445 | #define XFS_WSYNC_READIO_LOG 15 /* 32K */ |
443 | #define XFS_WSYNC_WRITEIO_LOG 14 /* 16K */ | 446 | #define XFS_WSYNC_WRITEIO_LOG 14 /* 16K */ |
444 | 447 | ||
448 | /* | ||
449 | * Allow large block sizes to be reported to userspace programs if the | ||
450 | * "largeio" mount option is used. | ||
451 | * | ||
452 | * If compatibility mode is specified, simply return the basic unit of caching | ||
453 | * so that we don't get inefficient read/modify/write I/O from user apps. | ||
454 | * Otherwise.... | ||
455 | * | ||
456 | * If the underlying volume is a stripe, then return the stripe width in bytes | ||
457 | * as the recommended I/O size. It is not a stripe and we've set a default | ||
458 | * buffered I/O size, return that, otherwise return the compat default. | ||
459 | */ | ||
460 | static inline unsigned long | ||
461 | xfs_preferred_iosize(xfs_mount_t *mp) | ||
462 | { | ||
463 | if (mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE) | ||
464 | return PAGE_CACHE_SIZE; | ||
465 | return (mp->m_swidth ? | ||
466 | (mp->m_swidth << mp->m_sb.sb_blocklog) : | ||
467 | ((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) ? | ||
468 | (1 << (int)MAX(mp->m_readio_log, mp->m_writeio_log)) : | ||
469 | PAGE_CACHE_SIZE)); | ||
470 | } | ||
471 | |||
445 | #define XFS_MAXIOFFSET(mp) ((mp)->m_maxioffset) | 472 | #define XFS_MAXIOFFSET(mp) ((mp)->m_maxioffset) |
446 | 473 | ||
447 | #define XFS_FORCED_SHUTDOWN(mp) ((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN) | 474 | #define XFS_FORCED_SHUTDOWN(mp) ((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN) |
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index 9142351df515..7227baee8994 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c | |||
@@ -307,6 +307,9 @@ xfs_start_flags( | |||
307 | if (ap->flags & XFSMNT_DIRSYNC) | 307 | if (ap->flags & XFSMNT_DIRSYNC) |
308 | mp->m_flags |= XFS_MOUNT_DIRSYNC; | 308 | mp->m_flags |= XFS_MOUNT_DIRSYNC; |
309 | 309 | ||
310 | if (ap->flags & XFSMNT_COMPAT_IOSIZE) | ||
311 | mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; | ||
312 | |||
310 | /* | 313 | /* |
311 | * no recovery flag requires a read-only mount | 314 | * no recovery flag requires a read-only mount |
312 | */ | 315 | */ |
@@ -1645,6 +1648,9 @@ xfs_vget( | |||
1645 | #define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */ | 1648 | #define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */ |
1646 | #define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */ | 1649 | #define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */ |
1647 | #define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */ | 1650 | #define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */ |
1651 | #define MNTOPT_LARGEIO "largeio" /* report large I/O sizes in stat() */ | ||
1652 | #define MNTOPT_NOLARGEIO "nolargeio" /* do not report large I/O sizes | ||
1653 | * in stat(). */ | ||
1648 | 1654 | ||
1649 | STATIC unsigned long | 1655 | STATIC unsigned long |
1650 | suffix_strtoul(const char *cp, char **endp, unsigned int base) | 1656 | suffix_strtoul(const char *cp, char **endp, unsigned int base) |
@@ -1681,6 +1687,7 @@ xfs_parseargs( | |||
1681 | int dsunit, dswidth, vol_dsunit, vol_dswidth; | 1687 | int dsunit, dswidth, vol_dsunit, vol_dswidth; |
1682 | int iosize; | 1688 | int iosize; |
1683 | 1689 | ||
1690 | args->flags |= XFSMNT_COMPAT_IOSIZE; | ||
1684 | #if 0 /* XXX: off by default, until some remaining issues ironed out */ | 1691 | #if 0 /* XXX: off by default, until some remaining issues ironed out */ |
1685 | args->flags |= XFSMNT_IDELETE; /* default to on */ | 1692 | args->flags |= XFSMNT_IDELETE; /* default to on */ |
1686 | #endif | 1693 | #endif |
@@ -1809,6 +1816,10 @@ xfs_parseargs( | |||
1809 | args->flags &= ~XFSMNT_IDELETE; | 1816 | args->flags &= ~XFSMNT_IDELETE; |
1810 | } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) { | 1817 | } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) { |
1811 | args->flags |= XFSMNT_IDELETE; | 1818 | args->flags |= XFSMNT_IDELETE; |
1819 | } else if (!strcmp(this_char, MNTOPT_LARGEIO)) { | ||
1820 | args->flags &= ~XFSMNT_COMPAT_IOSIZE; | ||
1821 | } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) { | ||
1822 | args->flags |= XFSMNT_COMPAT_IOSIZE; | ||
1812 | } else if (!strcmp(this_char, "osyncisdsync")) { | 1823 | } else if (!strcmp(this_char, "osyncisdsync")) { |
1813 | /* no-op, this is now the default */ | 1824 | /* no-op, this is now the default */ |
1814 | printk("XFS: osyncisdsync is now the default, option is deprecated.\n"); | 1825 | printk("XFS: osyncisdsync is now the default, option is deprecated.\n"); |
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index e2bf2ef58b66..8221b11a48c0 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
@@ -181,40 +181,7 @@ xfs_getattr( | |||
181 | vap->va_rdev = 0; | 181 | vap->va_rdev = 0; |
182 | 182 | ||
183 | if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) { | 183 | if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) { |
184 | 184 | vap->va_blocksize = xfs_preferred_iosize(mp); | |
185 | #if 0 | ||
186 | /* Large block sizes confuse various | ||
187 | * user space programs, so letting the | ||
188 | * stripe size through is not a good | ||
189 | * idea for now. | ||
190 | */ | ||
191 | vap->va_blocksize = mp->m_swidth ? | ||
192 | /* | ||
193 | * If the underlying volume is a stripe, then | ||
194 | * return the stripe width in bytes as the | ||
195 | * recommended I/O size. | ||
196 | */ | ||
197 | (mp->m_swidth << mp->m_sb.sb_blocklog) : | ||
198 | /* | ||
199 | * Return the largest of the preferred buffer | ||
200 | * sizes since doing small I/Os into larger | ||
201 | * buffers causes buffers to be decommissioned. | ||
202 | * The value returned is in bytes. | ||
203 | */ | ||
204 | (1 << (int)MAX(mp->m_readio_log, | ||
205 | mp->m_writeio_log)); | ||
206 | |||
207 | #else | ||
208 | vap->va_blocksize = | ||
209 | /* | ||
210 | * Return the largest of the preferred buffer | ||
211 | * sizes since doing small I/Os into larger | ||
212 | * buffers causes buffers to be decommissioned. | ||
213 | * The value returned is in bytes. | ||
214 | */ | ||
215 | 1 << (int)MAX(mp->m_readio_log, | ||
216 | mp->m_writeio_log); | ||
217 | #endif | ||
218 | } else { | 185 | } else { |
219 | 186 | ||
220 | /* | 187 | /* |