aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorEric Sandeen <sandeen@sandeen.net>2014-01-21 17:46:23 -0500
committerBen Myers <bpm@sgi.com>2014-01-24 12:55:42 -0500
commit7c71ee78031c248dca13fc94dea9a4cc217db6cf (patch)
tree16951eb9c67010960c328b1626efa2a4ab668c39 /fs
parent6da54179b3f1bb6a302fd5f3b38fae32ee463ed1 (diff)
xfs: allow logical-sector sized O_DIRECT
Some time ago, mkfs.xfs started picking the storage physical sector size as the default filesystem "sector size" in order to avoid RMW costs incurred by doing IOs at logical sector size alignments. However, this means that for a filesystem made with i.e. a 4k sector size on an "advanced format" 4k/512 disk, 512-byte direct IOs are no longer allowed. This means that XFS has essentially turned this AF drive into a hard 4K device, from the filesystem on up. XFS's mkfs-specified "sector size" is really just controlling the minimum size & alignment of filesystem metadata. There is no real need to tightly couple XFS's minimal metadata size to the minimum allowed direct IO size; XFS can continue doing metadata in optimal sizes, but still allow smaller DIOs for apps which issue them, for whatever reason. This patch adds a new field to the xfs_buftarg, so that we now track 2 sizes: 1) The metadata sector size, which is the minimum unit and alignment of IO which will be performed by metadata operations. 2) The device logical sector size The first is used internally by the file system for metadata alignment and IOs. The second is used for the minimum allowed direct IO alignment. This has passed xfstests on filesystems made with 4k sectors, including when run under the patch I sent to ignore XFS_IOC_DIOINFO, and issue 512 DIOs anyway. I also directly tested end of block behavior on preallocated, sparse, and existing files when we do a 512 IO into a 4k file on a 4k-sector filesystem, to be sure there were no unexpected behaviors. Signed-off-by: Eric Sandeen <sandeen@redhat.com> Reviewed-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Ben Myers <bpm@sgi.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/xfs_buf.c5
-rw-r--r--fs/xfs/xfs_buf.h15
-rw-r--r--fs/xfs/xfs_file.c7
-rw-r--r--fs/xfs/xfs_ioctl.c2
4 files changed, 26 insertions, 3 deletions
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index a526f8d2dc6f..51757113a822 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1599,6 +1599,7 @@ xfs_setsize_buftarg(
1599 unsigned int blocksize, 1599 unsigned int blocksize,
1600 unsigned int sectorsize) 1600 unsigned int sectorsize)
1601{ 1601{
1602 /* Set up metadata sector size info */
1602 btp->bt_meta_sectorsize = sectorsize; 1603 btp->bt_meta_sectorsize = sectorsize;
1603 btp->bt_meta_sectormask = sectorsize - 1; 1604 btp->bt_meta_sectormask = sectorsize - 1;
1604 1605
@@ -1613,6 +1614,10 @@ xfs_setsize_buftarg(
1613 return EINVAL; 1614 return EINVAL;
1614 } 1615 }
1615 1616
1617 /* Set up device logical sector size mask */
1618 btp->bt_logical_sectorsize = bdev_logical_block_size(btp->bt_bdev);
1619 btp->bt_logical_sectormask = bdev_logical_block_size(btp->bt_bdev) - 1;
1620
1616 return 0; 1621 return 0;
1617} 1622}
1618 1623
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index d5d88dda4d31..995339534db6 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -88,6 +88,19 @@ typedef unsigned int xfs_buf_flags_t;
88 */ 88 */
89#define XFS_BSTATE_DISPOSE (1 << 0) /* buffer being discarded */ 89#define XFS_BSTATE_DISPOSE (1 << 0) /* buffer being discarded */
90 90
91/*
92 * The xfs_buftarg contains 2 notions of "sector size" -
93 *
94 * 1) The metadata sector size, which is the minimum unit and
95 * alignment of IO which will be performed by metadata operations.
96 * 2) The device logical sector size
97 *
98 * The first is specified at mkfs time, and is stored on-disk in the
99 * superblock's sb_sectsize.
100 *
101 * The latter is derived from the underlying device, and controls direct IO
102 * alignment constraints.
103 */
91typedef struct xfs_buftarg { 104typedef struct xfs_buftarg {
92 dev_t bt_dev; 105 dev_t bt_dev;
93 struct block_device *bt_bdev; 106 struct block_device *bt_bdev;
@@ -95,6 +108,8 @@ typedef struct xfs_buftarg {
95 struct xfs_mount *bt_mount; 108 struct xfs_mount *bt_mount;
96 unsigned int bt_meta_sectorsize; 109 unsigned int bt_meta_sectorsize;
97 size_t bt_meta_sectormask; 110 size_t bt_meta_sectormask;
111 size_t bt_logical_sectorsize;
112 size_t bt_logical_sectormask;
98 113
99 /* LRU control structures */ 114 /* LRU control structures */
100 struct shrinker bt_shrinker; 115 struct shrinker bt_shrinker;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index d01745f748ac..2e7989e3a2d6 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -261,7 +261,8 @@ xfs_file_aio_read(
261 xfs_buftarg_t *target = 261 xfs_buftarg_t *target =
262 XFS_IS_REALTIME_INODE(ip) ? 262 XFS_IS_REALTIME_INODE(ip) ?
263 mp->m_rtdev_targp : mp->m_ddev_targp; 263 mp->m_rtdev_targp : mp->m_ddev_targp;
264 if ((pos | size) & target->bt_meta_sectormask) { 264 /* DIO must be aligned to device logical sector size */
265 if ((pos | size) & target->bt_logical_sectormask) {
265 if (pos == i_size_read(inode)) 266 if (pos == i_size_read(inode))
266 return 0; 267 return 0;
267 return -XFS_ERROR(EINVAL); 268 return -XFS_ERROR(EINVAL);
@@ -641,9 +642,11 @@ xfs_file_dio_aio_write(
641 struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? 642 struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ?
642 mp->m_rtdev_targp : mp->m_ddev_targp; 643 mp->m_rtdev_targp : mp->m_ddev_targp;
643 644
644 if ((pos | count) & target->bt_meta_sectormask) 645 /* DIO must be aligned to device logical sector size */
646 if ((pos | count) & target->bt_logical_sectormask)
645 return -XFS_ERROR(EINVAL); 647 return -XFS_ERROR(EINVAL);
646 648
649 /* "unaligned" here means not aligned to a filesystem block */
647 if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask)) 650 if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask))
648 unaligned_io = 1; 651 unaligned_io = 1;
649 652
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 3dc60ed9572a..bcfe61202115 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1583,7 +1583,7 @@ xfs_file_ioctl(
1583 XFS_IS_REALTIME_INODE(ip) ? 1583 XFS_IS_REALTIME_INODE(ip) ?
1584 mp->m_rtdev_targp : mp->m_ddev_targp; 1584 mp->m_rtdev_targp : mp->m_ddev_targp;
1585 1585
1586 da.d_mem = da.d_miniosz = target->bt_meta_sectorsize; 1586 da.d_mem = da.d_miniosz = target->bt_logical_sectorsize;
1587 da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1); 1587 da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);
1588 1588
1589 if (copy_to_user(arg, &da, sizeof(da))) 1589 if (copy_to_user(arg, &da, sizeof(da)))