diff options
author | Dave Chinner <dchinner@redhat.com> | 2013-08-12 06:49:48 -0400 |
---|---|---|
committer | Ben Myers <bpm@sgi.com> | 2013-08-12 17:56:06 -0400 |
commit | a133d952b44cef278d2da664d742d51ef95f4dd3 (patch) | |
tree | 7a6f2d8bfc9be9d29246f2ed94582186331cbd45 | |
parent | e546cb79ef7ebe53060369dae665fa449a544353 (diff) |
xfs: consolidate extent swap code
So we don't need xfs_dfrag.h in userspace anymore, move the extent
swap ioctl structure definition to xfs_fs.h where most of the other
ioctl structure definitions are.
Now that we don't need separate files for extent swapping, separate
the basic file descriptor checking code to xfs_ioctl.c, and the code
that does the extent swap operation to xfs_bmap_util.c. This
cleanly separates the user interface code from the physical
mechanism used to do the extent swap.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
-rw-r--r-- | fs/xfs/Makefile | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap_util.c | 343 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap_util.h | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_dfrag.c | 459 | ||||
-rw-r--r-- | fs/xfs/xfs_dfrag.h | 53 | ||||
-rw-r--r-- | fs/xfs/xfs_fs.h | 15 | ||||
-rw-r--r-- | fs/xfs/xfs_ioctl.c | 72 | ||||
-rw-r--r-- | fs/xfs/xfs_ioctl.h | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_ioctl32.c | 3 |
9 files changed, 436 insertions, 517 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 201c61df3c45..d6ccf5742d18 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile | |||
@@ -32,7 +32,6 @@ xfs-y += xfs_aops.o \ | |||
32 | xfs_bit.o \ | 32 | xfs_bit.o \ |
33 | xfs_bmap_util.o \ | 33 | xfs_bmap_util.o \ |
34 | xfs_buf.o \ | 34 | xfs_buf.o \ |
35 | xfs_dfrag.o \ | ||
36 | xfs_dir2_readdir.o \ | 35 | xfs_dir2_readdir.o \ |
37 | xfs_discard.o \ | 36 | xfs_discard.o \ |
38 | xfs_error.o \ | 37 | xfs_error.o \ |
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index f557022bd0e7..b5232d094418 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c | |||
@@ -1693,3 +1693,346 @@ xfs_change_file_space( | |||
1693 | xfs_trans_set_sync(tp); | 1693 | xfs_trans_set_sync(tp); |
1694 | return xfs_trans_commit(tp, 0); | 1694 | return xfs_trans_commit(tp, 0); |
1695 | } | 1695 | } |
1696 | |||
1697 | /* | ||
1698 | * We need to check that the format of the data fork in the temporary inode is | ||
1699 | * valid for the target inode before doing the swap. This is not a problem with | ||
1700 | * attr1 because of the fixed fork offset, but attr2 has a dynamically sized | ||
1701 | * data fork depending on the space the attribute fork is taking so we can get | ||
1702 | * invalid formats on the target inode. | ||
1703 | * | ||
1704 | * E.g. target has space for 7 extents in extent format, temp inode only has | ||
1705 | * space for 6. If we defragment down to 7 extents, then the tmp format is a | ||
1706 | * btree, but when swapped it needs to be in extent format. Hence we can't just | ||
1707 | * blindly swap data forks on attr2 filesystems. | ||
1708 | * | ||
1709 | * Note that we check the swap in both directions so that we don't end up with | ||
1710 | * a corrupt temporary inode, either. | ||
1711 | * | ||
1712 | * Note that fixing the way xfs_fsr sets up the attribute fork in the source | ||
1713 | * inode will prevent this situation from occurring, so all we do here is | ||
1714 | * reject and log the attempt. basically we are putting the responsibility on | ||
1715 | * userspace to get this right. | ||
1716 | */ | ||
1717 | static int | ||
1718 | xfs_swap_extents_check_format( | ||
1719 | xfs_inode_t *ip, /* target inode */ | ||
1720 | xfs_inode_t *tip) /* tmp inode */ | ||
1721 | { | ||
1722 | |||
1723 | /* Should never get a local format */ | ||
1724 | if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL || | ||
1725 | tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) | ||
1726 | return EINVAL; | ||
1727 | |||
1728 | /* | ||
1729 | * if the target inode has less extents that then temporary inode then | ||
1730 | * why did userspace call us? | ||
1731 | */ | ||
1732 | if (ip->i_d.di_nextents < tip->i_d.di_nextents) | ||
1733 | return EINVAL; | ||
1734 | |||
1735 | /* | ||
1736 | * if the target inode is in extent form and the temp inode is in btree | ||
1737 | * form then we will end up with the target inode in the wrong format | ||
1738 | * as we already know there are less extents in the temp inode. | ||
1739 | */ | ||
1740 | if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && | ||
1741 | tip->i_d.di_format == XFS_DINODE_FMT_BTREE) | ||
1742 | return EINVAL; | ||
1743 | |||
1744 | /* Check temp in extent form to max in target */ | ||
1745 | if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && | ||
1746 | XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > | ||
1747 | XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) | ||
1748 | return EINVAL; | ||
1749 | |||
1750 | /* Check target in extent form to max in temp */ | ||
1751 | if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && | ||
1752 | XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > | ||
1753 | XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) | ||
1754 | return EINVAL; | ||
1755 | |||
1756 | /* | ||
1757 | * If we are in a btree format, check that the temp root block will fit | ||
1758 | * in the target and that it has enough extents to be in btree format | ||
1759 | * in the target. | ||
1760 | * | ||
1761 | * Note that we have to be careful to allow btree->extent conversions | ||
1762 | * (a common defrag case) which will occur when the temp inode is in | ||
1763 | * extent format... | ||
1764 | */ | ||
1765 | if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) { | ||
1766 | if (XFS_IFORK_BOFF(ip) && | ||
1767 | XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip)) | ||
1768 | return EINVAL; | ||
1769 | if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= | ||
1770 | XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) | ||
1771 | return EINVAL; | ||
1772 | } | ||
1773 | |||
1774 | /* Reciprocal target->temp btree format checks */ | ||
1775 | if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) { | ||
1776 | if (XFS_IFORK_BOFF(tip) && | ||
1777 | XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip)) | ||
1778 | return EINVAL; | ||
1779 | if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= | ||
1780 | XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) | ||
1781 | return EINVAL; | ||
1782 | } | ||
1783 | |||
1784 | return 0; | ||
1785 | } | ||
1786 | |||
1787 | int | ||
1788 | xfs_swap_extents( | ||
1789 | xfs_inode_t *ip, /* target inode */ | ||
1790 | xfs_inode_t *tip, /* tmp inode */ | ||
1791 | xfs_swapext_t *sxp) | ||
1792 | { | ||
1793 | xfs_mount_t *mp = ip->i_mount; | ||
1794 | xfs_trans_t *tp; | ||
1795 | xfs_bstat_t *sbp = &sxp->sx_stat; | ||
1796 | xfs_ifork_t *tempifp, *ifp, *tifp; | ||
1797 | int src_log_flags, target_log_flags; | ||
1798 | int error = 0; | ||
1799 | int aforkblks = 0; | ||
1800 | int taforkblks = 0; | ||
1801 | __uint64_t tmp; | ||
1802 | |||
1803 | /* | ||
1804 | * We have no way of updating owner information in the BMBT blocks for | ||
1805 | * each inode on CRC enabled filesystems, so to avoid corrupting the | ||
1806 | * this metadata we simply don't allow extent swaps to occur. | ||
1807 | */ | ||
1808 | if (xfs_sb_version_hascrc(&mp->m_sb)) | ||
1809 | return XFS_ERROR(EINVAL); | ||
1810 | |||
1811 | tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); | ||
1812 | if (!tempifp) { | ||
1813 | error = XFS_ERROR(ENOMEM); | ||
1814 | goto out; | ||
1815 | } | ||
1816 | |||
1817 | /* | ||
1818 | * we have to do two separate lock calls here to keep lockdep | ||
1819 | * happy. If we try to get all the locks in one call, lock will | ||
1820 | * report false positives when we drop the ILOCK and regain them | ||
1821 | * below. | ||
1822 | */ | ||
1823 | xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); | ||
1824 | xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); | ||
1825 | |||
1826 | /* Verify that both files have the same format */ | ||
1827 | if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) { | ||
1828 | error = XFS_ERROR(EINVAL); | ||
1829 | goto out_unlock; | ||
1830 | } | ||
1831 | |||
1832 | /* Verify both files are either real-time or non-realtime */ | ||
1833 | if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) { | ||
1834 | error = XFS_ERROR(EINVAL); | ||
1835 | goto out_unlock; | ||
1836 | } | ||
1837 | |||
1838 | error = -filemap_write_and_wait(VFS_I(tip)->i_mapping); | ||
1839 | if (error) | ||
1840 | goto out_unlock; | ||
1841 | truncate_pagecache_range(VFS_I(tip), 0, -1); | ||
1842 | |||
1843 | /* Verify O_DIRECT for ftmp */ | ||
1844 | if (VN_CACHED(VFS_I(tip)) != 0) { | ||
1845 | error = XFS_ERROR(EINVAL); | ||
1846 | goto out_unlock; | ||
1847 | } | ||
1848 | |||
1849 | /* Verify all data are being swapped */ | ||
1850 | if (sxp->sx_offset != 0 || | ||
1851 | sxp->sx_length != ip->i_d.di_size || | ||
1852 | sxp->sx_length != tip->i_d.di_size) { | ||
1853 | error = XFS_ERROR(EFAULT); | ||
1854 | goto out_unlock; | ||
1855 | } | ||
1856 | |||
1857 | trace_xfs_swap_extent_before(ip, 0); | ||
1858 | trace_xfs_swap_extent_before(tip, 1); | ||
1859 | |||
1860 | /* check inode formats now that data is flushed */ | ||
1861 | error = xfs_swap_extents_check_format(ip, tip); | ||
1862 | if (error) { | ||
1863 | xfs_notice(mp, | ||
1864 | "%s: inode 0x%llx format is incompatible for exchanging.", | ||
1865 | __func__, ip->i_ino); | ||
1866 | goto out_unlock; | ||
1867 | } | ||
1868 | |||
1869 | /* | ||
1870 | * Compare the current change & modify times with that | ||
1871 | * passed in. If they differ, we abort this swap. | ||
1872 | * This is the mechanism used to ensure the calling | ||
1873 | * process that the file was not changed out from | ||
1874 | * under it. | ||
1875 | */ | ||
1876 | if ((sbp->bs_ctime.tv_sec != VFS_I(ip)->i_ctime.tv_sec) || | ||
1877 | (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) || | ||
1878 | (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) || | ||
1879 | (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) { | ||
1880 | error = XFS_ERROR(EBUSY); | ||
1881 | goto out_unlock; | ||
1882 | } | ||
1883 | |||
1884 | /* We need to fail if the file is memory mapped. Once we have tossed | ||
1885 | * all existing pages, the page fault will have no option | ||
1886 | * but to go to the filesystem for pages. By making the page fault call | ||
1887 | * vop_read (or write in the case of autogrow) they block on the iolock | ||
1888 | * until we have switched the extents. | ||
1889 | */ | ||
1890 | if (VN_MAPPED(VFS_I(ip))) { | ||
1891 | error = XFS_ERROR(EBUSY); | ||
1892 | goto out_unlock; | ||
1893 | } | ||
1894 | |||
1895 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1896 | xfs_iunlock(tip, XFS_ILOCK_EXCL); | ||
1897 | |||
1898 | /* | ||
1899 | * There is a race condition here since we gave up the | ||
1900 | * ilock. However, the data fork will not change since | ||
1901 | * we have the iolock (locked for truncation too) so we | ||
1902 | * are safe. We don't really care if non-io related | ||
1903 | * fields change. | ||
1904 | */ | ||
1905 | truncate_pagecache_range(VFS_I(ip), 0, -1); | ||
1906 | |||
1907 | tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT); | ||
1908 | if ((error = xfs_trans_reserve(tp, 0, | ||
1909 | XFS_ICHANGE_LOG_RES(mp), 0, | ||
1910 | 0, 0))) { | ||
1911 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | ||
1912 | xfs_iunlock(tip, XFS_IOLOCK_EXCL); | ||
1913 | xfs_trans_cancel(tp, 0); | ||
1914 | goto out; | ||
1915 | } | ||
1916 | xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); | ||
1917 | |||
1918 | /* | ||
1919 | * Count the number of extended attribute blocks | ||
1920 | */ | ||
1921 | if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) && | ||
1922 | (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { | ||
1923 | error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &aforkblks); | ||
1924 | if (error) | ||
1925 | goto out_trans_cancel; | ||
1926 | } | ||
1927 | if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) && | ||
1928 | (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { | ||
1929 | error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, | ||
1930 | &taforkblks); | ||
1931 | if (error) | ||
1932 | goto out_trans_cancel; | ||
1933 | } | ||
1934 | |||
1935 | /* | ||
1936 | * Swap the data forks of the inodes | ||
1937 | */ | ||
1938 | ifp = &ip->i_df; | ||
1939 | tifp = &tip->i_df; | ||
1940 | *tempifp = *ifp; /* struct copy */ | ||
1941 | *ifp = *tifp; /* struct copy */ | ||
1942 | *tifp = *tempifp; /* struct copy */ | ||
1943 | |||
1944 | /* | ||
1945 | * Fix the on-disk inode values | ||
1946 | */ | ||
1947 | tmp = (__uint64_t)ip->i_d.di_nblocks; | ||
1948 | ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks; | ||
1949 | tip->i_d.di_nblocks = tmp + taforkblks - aforkblks; | ||
1950 | |||
1951 | tmp = (__uint64_t) ip->i_d.di_nextents; | ||
1952 | ip->i_d.di_nextents = tip->i_d.di_nextents; | ||
1953 | tip->i_d.di_nextents = tmp; | ||
1954 | |||
1955 | tmp = (__uint64_t) ip->i_d.di_format; | ||
1956 | ip->i_d.di_format = tip->i_d.di_format; | ||
1957 | tip->i_d.di_format = tmp; | ||
1958 | |||
1959 | /* | ||
1960 | * The extents in the source inode could still contain speculative | ||
1961 | * preallocation beyond EOF (e.g. the file is open but not modified | ||
1962 | * while defrag is in progress). In that case, we need to copy over the | ||
1963 | * number of delalloc blocks the data fork in the source inode is | ||
1964 | * tracking beyond EOF so that when the fork is truncated away when the | ||
1965 | * temporary inode is unlinked we don't underrun the i_delayed_blks | ||
1966 | * counter on that inode. | ||
1967 | */ | ||
1968 | ASSERT(tip->i_delayed_blks == 0); | ||
1969 | tip->i_delayed_blks = ip->i_delayed_blks; | ||
1970 | ip->i_delayed_blks = 0; | ||
1971 | |||
1972 | src_log_flags = XFS_ILOG_CORE; | ||
1973 | switch (ip->i_d.di_format) { | ||
1974 | case XFS_DINODE_FMT_EXTENTS: | ||
1975 | /* If the extents fit in the inode, fix the | ||
1976 | * pointer. Otherwise it's already NULL or | ||
1977 | * pointing to the extent. | ||
1978 | */ | ||
1979 | if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) { | ||
1980 | ifp->if_u1.if_extents = | ||
1981 | ifp->if_u2.if_inline_ext; | ||
1982 | } | ||
1983 | src_log_flags |= XFS_ILOG_DEXT; | ||
1984 | break; | ||
1985 | case XFS_DINODE_FMT_BTREE: | ||
1986 | src_log_flags |= XFS_ILOG_DBROOT; | ||
1987 | break; | ||
1988 | } | ||
1989 | |||
1990 | target_log_flags = XFS_ILOG_CORE; | ||
1991 | switch (tip->i_d.di_format) { | ||
1992 | case XFS_DINODE_FMT_EXTENTS: | ||
1993 | /* If the extents fit in the inode, fix the | ||
1994 | * pointer. Otherwise it's already NULL or | ||
1995 | * pointing to the extent. | ||
1996 | */ | ||
1997 | if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) { | ||
1998 | tifp->if_u1.if_extents = | ||
1999 | tifp->if_u2.if_inline_ext; | ||
2000 | } | ||
2001 | target_log_flags |= XFS_ILOG_DEXT; | ||
2002 | break; | ||
2003 | case XFS_DINODE_FMT_BTREE: | ||
2004 | target_log_flags |= XFS_ILOG_DBROOT; | ||
2005 | break; | ||
2006 | } | ||
2007 | |||
2008 | |||
2009 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | ||
2010 | xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | ||
2011 | |||
2012 | xfs_trans_log_inode(tp, ip, src_log_flags); | ||
2013 | xfs_trans_log_inode(tp, tip, target_log_flags); | ||
2014 | |||
2015 | /* | ||
2016 | * If this is a synchronous mount, make sure that the | ||
2017 | * transaction goes to disk before returning to the user. | ||
2018 | */ | ||
2019 | if (mp->m_flags & XFS_MOUNT_WSYNC) | ||
2020 | xfs_trans_set_sync(tp); | ||
2021 | |||
2022 | error = xfs_trans_commit(tp, 0); | ||
2023 | |||
2024 | trace_xfs_swap_extent_after(ip, 0); | ||
2025 | trace_xfs_swap_extent_after(tip, 1); | ||
2026 | out: | ||
2027 | kmem_free(tempifp); | ||
2028 | return error; | ||
2029 | |||
2030 | out_unlock: | ||
2031 | xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | ||
2032 | xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | ||
2033 | goto out; | ||
2034 | |||
2035 | out_trans_cancel: | ||
2036 | xfs_trans_cancel(tp, 0); | ||
2037 | goto out_unlock; | ||
2038 | } | ||
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index a6d207769dee..061260946f7a 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h | |||
@@ -102,6 +102,9 @@ bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force); | |||
102 | int xfs_free_eofblocks(struct xfs_mount *mp, struct xfs_inode *ip, | 102 | int xfs_free_eofblocks(struct xfs_mount *mp, struct xfs_inode *ip, |
103 | bool need_iolock); | 103 | bool need_iolock); |
104 | 104 | ||
105 | int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip, | ||
106 | struct xfs_swapext *sx); | ||
107 | |||
105 | xfs_daddr_t xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb); | 108 | xfs_daddr_t xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb); |
106 | 109 | ||
107 | #endif /* __XFS_BMAP_UTIL_H__ */ | 110 | #endif /* __XFS_BMAP_UTIL_H__ */ |
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c deleted file mode 100644 index b83d45f8cb12..000000000000 --- a/fs/xfs/xfs_dfrag.c +++ /dev/null | |||
@@ -1,459 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2006 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #include "xfs.h" | ||
19 | #include "xfs_fs.h" | ||
20 | #include "xfs_types.h" | ||
21 | #include "xfs_log.h" | ||
22 | #include "xfs_trans.h" | ||
23 | #include "xfs_sb.h" | ||
24 | #include "xfs_ag.h" | ||
25 | #include "xfs_mount.h" | ||
26 | #include "xfs_bmap_btree.h" | ||
27 | #include "xfs_alloc_btree.h" | ||
28 | #include "xfs_ialloc_btree.h" | ||
29 | #include "xfs_btree.h" | ||
30 | #include "xfs_dinode.h" | ||
31 | #include "xfs_inode.h" | ||
32 | #include "xfs_inode_item.h" | ||
33 | #include "xfs_bmap.h" | ||
34 | #include "xfs_bmap_util.h" | ||
35 | #include "xfs_itable.h" | ||
36 | #include "xfs_dfrag.h" | ||
37 | #include "xfs_error.h" | ||
38 | #include "xfs_trace.h" | ||
39 | |||
40 | |||
41 | static int xfs_swap_extents( | ||
42 | xfs_inode_t *ip, /* target inode */ | ||
43 | xfs_inode_t *tip, /* tmp inode */ | ||
44 | xfs_swapext_t *sxp); | ||
45 | |||
46 | /* | ||
47 | * ioctl interface for swapext | ||
48 | */ | ||
49 | int | ||
50 | xfs_swapext( | ||
51 | xfs_swapext_t *sxp) | ||
52 | { | ||
53 | xfs_inode_t *ip, *tip; | ||
54 | struct fd f, tmp; | ||
55 | int error = 0; | ||
56 | |||
57 | /* Pull information for the target fd */ | ||
58 | f = fdget((int)sxp->sx_fdtarget); | ||
59 | if (!f.file) { | ||
60 | error = XFS_ERROR(EINVAL); | ||
61 | goto out; | ||
62 | } | ||
63 | |||
64 | if (!(f.file->f_mode & FMODE_WRITE) || | ||
65 | !(f.file->f_mode & FMODE_READ) || | ||
66 | (f.file->f_flags & O_APPEND)) { | ||
67 | error = XFS_ERROR(EBADF); | ||
68 | goto out_put_file; | ||
69 | } | ||
70 | |||
71 | tmp = fdget((int)sxp->sx_fdtmp); | ||
72 | if (!tmp.file) { | ||
73 | error = XFS_ERROR(EINVAL); | ||
74 | goto out_put_file; | ||
75 | } | ||
76 | |||
77 | if (!(tmp.file->f_mode & FMODE_WRITE) || | ||
78 | !(tmp.file->f_mode & FMODE_READ) || | ||
79 | (tmp.file->f_flags & O_APPEND)) { | ||
80 | error = XFS_ERROR(EBADF); | ||
81 | goto out_put_tmp_file; | ||
82 | } | ||
83 | |||
84 | if (IS_SWAPFILE(file_inode(f.file)) || | ||
85 | IS_SWAPFILE(file_inode(tmp.file))) { | ||
86 | error = XFS_ERROR(EINVAL); | ||
87 | goto out_put_tmp_file; | ||
88 | } | ||
89 | |||
90 | ip = XFS_I(file_inode(f.file)); | ||
91 | tip = XFS_I(file_inode(tmp.file)); | ||
92 | |||
93 | if (ip->i_mount != tip->i_mount) { | ||
94 | error = XFS_ERROR(EINVAL); | ||
95 | goto out_put_tmp_file; | ||
96 | } | ||
97 | |||
98 | if (ip->i_ino == tip->i_ino) { | ||
99 | error = XFS_ERROR(EINVAL); | ||
100 | goto out_put_tmp_file; | ||
101 | } | ||
102 | |||
103 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { | ||
104 | error = XFS_ERROR(EIO); | ||
105 | goto out_put_tmp_file; | ||
106 | } | ||
107 | |||
108 | error = xfs_swap_extents(ip, tip, sxp); | ||
109 | |||
110 | out_put_tmp_file: | ||
111 | fdput(tmp); | ||
112 | out_put_file: | ||
113 | fdput(f); | ||
114 | out: | ||
115 | return error; | ||
116 | } | ||
117 | |||
118 | /* | ||
119 | * We need to check that the format of the data fork in the temporary inode is | ||
120 | * valid for the target inode before doing the swap. This is not a problem with | ||
121 | * attr1 because of the fixed fork offset, but attr2 has a dynamically sized | ||
122 | * data fork depending on the space the attribute fork is taking so we can get | ||
123 | * invalid formats on the target inode. | ||
124 | * | ||
125 | * E.g. target has space for 7 extents in extent format, temp inode only has | ||
126 | * space for 6. If we defragment down to 7 extents, then the tmp format is a | ||
127 | * btree, but when swapped it needs to be in extent format. Hence we can't just | ||
128 | * blindly swap data forks on attr2 filesystems. | ||
129 | * | ||
130 | * Note that we check the swap in both directions so that we don't end up with | ||
131 | * a corrupt temporary inode, either. | ||
132 | * | ||
133 | * Note that fixing the way xfs_fsr sets up the attribute fork in the source | ||
134 | * inode will prevent this situation from occurring, so all we do here is | ||
135 | * reject and log the attempt. basically we are putting the responsibility on | ||
136 | * userspace to get this right. | ||
137 | */ | ||
138 | static int | ||
139 | xfs_swap_extents_check_format( | ||
140 | xfs_inode_t *ip, /* target inode */ | ||
141 | xfs_inode_t *tip) /* tmp inode */ | ||
142 | { | ||
143 | |||
144 | /* Should never get a local format */ | ||
145 | if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL || | ||
146 | tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) | ||
147 | return EINVAL; | ||
148 | |||
149 | /* | ||
150 | * if the target inode has less extents that then temporary inode then | ||
151 | * why did userspace call us? | ||
152 | */ | ||
153 | if (ip->i_d.di_nextents < tip->i_d.di_nextents) | ||
154 | return EINVAL; | ||
155 | |||
156 | /* | ||
157 | * if the target inode is in extent form and the temp inode is in btree | ||
158 | * form then we will end up with the target inode in the wrong format | ||
159 | * as we already know there are less extents in the temp inode. | ||
160 | */ | ||
161 | if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && | ||
162 | tip->i_d.di_format == XFS_DINODE_FMT_BTREE) | ||
163 | return EINVAL; | ||
164 | |||
165 | /* Check temp in extent form to max in target */ | ||
166 | if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && | ||
167 | XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > | ||
168 | XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) | ||
169 | return EINVAL; | ||
170 | |||
171 | /* Check target in extent form to max in temp */ | ||
172 | if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && | ||
173 | XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > | ||
174 | XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) | ||
175 | return EINVAL; | ||
176 | |||
177 | /* | ||
178 | * If we are in a btree format, check that the temp root block will fit | ||
179 | * in the target and that it has enough extents to be in btree format | ||
180 | * in the target. | ||
181 | * | ||
182 | * Note that we have to be careful to allow btree->extent conversions | ||
183 | * (a common defrag case) which will occur when the temp inode is in | ||
184 | * extent format... | ||
185 | */ | ||
186 | if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) { | ||
187 | if (XFS_IFORK_BOFF(ip) && | ||
188 | XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip)) | ||
189 | return EINVAL; | ||
190 | if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= | ||
191 | XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) | ||
192 | return EINVAL; | ||
193 | } | ||
194 | |||
195 | /* Reciprocal target->temp btree format checks */ | ||
196 | if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) { | ||
197 | if (XFS_IFORK_BOFF(tip) && | ||
198 | XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip)) | ||
199 | return EINVAL; | ||
200 | if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= | ||
201 | XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) | ||
202 | return EINVAL; | ||
203 | } | ||
204 | |||
205 | return 0; | ||
206 | } | ||
207 | |||
208 | static int | ||
209 | xfs_swap_extents( | ||
210 | xfs_inode_t *ip, /* target inode */ | ||
211 | xfs_inode_t *tip, /* tmp inode */ | ||
212 | xfs_swapext_t *sxp) | ||
213 | { | ||
214 | xfs_mount_t *mp = ip->i_mount; | ||
215 | xfs_trans_t *tp; | ||
216 | xfs_bstat_t *sbp = &sxp->sx_stat; | ||
217 | xfs_ifork_t *tempifp, *ifp, *tifp; | ||
218 | int src_log_flags, target_log_flags; | ||
219 | int error = 0; | ||
220 | int aforkblks = 0; | ||
221 | int taforkblks = 0; | ||
222 | __uint64_t tmp; | ||
223 | |||
224 | /* | ||
225 | * We have no way of updating owner information in the BMBT blocks for | ||
226 | * each inode on CRC enabled filesystems, so to avoid corrupting the | ||
227 | * this metadata we simply don't allow extent swaps to occur. | ||
228 | */ | ||
229 | if (xfs_sb_version_hascrc(&mp->m_sb)) | ||
230 | return XFS_ERROR(EINVAL); | ||
231 | |||
232 | tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); | ||
233 | if (!tempifp) { | ||
234 | error = XFS_ERROR(ENOMEM); | ||
235 | goto out; | ||
236 | } | ||
237 | |||
238 | /* | ||
239 | * we have to do two separate lock calls here to keep lockdep | ||
240 | * happy. If we try to get all the locks in one call, lock will | ||
241 | * report false positives when we drop the ILOCK and regain them | ||
242 | * below. | ||
243 | */ | ||
244 | xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); | ||
245 | xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); | ||
246 | |||
247 | /* Verify that both files have the same format */ | ||
248 | if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) { | ||
249 | error = XFS_ERROR(EINVAL); | ||
250 | goto out_unlock; | ||
251 | } | ||
252 | |||
253 | /* Verify both files are either real-time or non-realtime */ | ||
254 | if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) { | ||
255 | error = XFS_ERROR(EINVAL); | ||
256 | goto out_unlock; | ||
257 | } | ||
258 | |||
259 | error = -filemap_write_and_wait(VFS_I(tip)->i_mapping); | ||
260 | if (error) | ||
261 | goto out_unlock; | ||
262 | truncate_pagecache_range(VFS_I(tip), 0, -1); | ||
263 | |||
264 | /* Verify O_DIRECT for ftmp */ | ||
265 | if (VN_CACHED(VFS_I(tip)) != 0) { | ||
266 | error = XFS_ERROR(EINVAL); | ||
267 | goto out_unlock; | ||
268 | } | ||
269 | |||
270 | /* Verify all data are being swapped */ | ||
271 | if (sxp->sx_offset != 0 || | ||
272 | sxp->sx_length != ip->i_d.di_size || | ||
273 | sxp->sx_length != tip->i_d.di_size) { | ||
274 | error = XFS_ERROR(EFAULT); | ||
275 | goto out_unlock; | ||
276 | } | ||
277 | |||
278 | trace_xfs_swap_extent_before(ip, 0); | ||
279 | trace_xfs_swap_extent_before(tip, 1); | ||
280 | |||
281 | /* check inode formats now that data is flushed */ | ||
282 | error = xfs_swap_extents_check_format(ip, tip); | ||
283 | if (error) { | ||
284 | xfs_notice(mp, | ||
285 | "%s: inode 0x%llx format is incompatible for exchanging.", | ||
286 | __func__, ip->i_ino); | ||
287 | goto out_unlock; | ||
288 | } | ||
289 | |||
290 | /* | ||
291 | * Compare the current change & modify times with that | ||
292 | * passed in. If they differ, we abort this swap. | ||
293 | * This is the mechanism used to ensure the calling | ||
294 | * process that the file was not changed out from | ||
295 | * under it. | ||
296 | */ | ||
297 | if ((sbp->bs_ctime.tv_sec != VFS_I(ip)->i_ctime.tv_sec) || | ||
298 | (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) || | ||
299 | (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) || | ||
300 | (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) { | ||
301 | error = XFS_ERROR(EBUSY); | ||
302 | goto out_unlock; | ||
303 | } | ||
304 | |||
305 | /* We need to fail if the file is memory mapped. Once we have tossed | ||
306 | * all existing pages, the page fault will have no option | ||
307 | * but to go to the filesystem for pages. By making the page fault call | ||
308 | * vop_read (or write in the case of autogrow) they block on the iolock | ||
309 | * until we have switched the extents. | ||
310 | */ | ||
311 | if (VN_MAPPED(VFS_I(ip))) { | ||
312 | error = XFS_ERROR(EBUSY); | ||
313 | goto out_unlock; | ||
314 | } | ||
315 | |||
316 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
317 | xfs_iunlock(tip, XFS_ILOCK_EXCL); | ||
318 | |||
319 | /* | ||
320 | * There is a race condition here since we gave up the | ||
321 | * ilock. However, the data fork will not change since | ||
322 | * we have the iolock (locked for truncation too) so we | ||
323 | * are safe. We don't really care if non-io related | ||
324 | * fields change. | ||
325 | */ | ||
326 | truncate_pagecache_range(VFS_I(ip), 0, -1); | ||
327 | |||
328 | tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT); | ||
329 | if ((error = xfs_trans_reserve(tp, 0, | ||
330 | XFS_ICHANGE_LOG_RES(mp), 0, | ||
331 | 0, 0))) { | ||
332 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | ||
333 | xfs_iunlock(tip, XFS_IOLOCK_EXCL); | ||
334 | xfs_trans_cancel(tp, 0); | ||
335 | goto out; | ||
336 | } | ||
337 | xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); | ||
338 | |||
339 | /* | ||
340 | * Count the number of extended attribute blocks | ||
341 | */ | ||
342 | if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) && | ||
343 | (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { | ||
344 | error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &aforkblks); | ||
345 | if (error) | ||
346 | goto out_trans_cancel; | ||
347 | } | ||
348 | if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) && | ||
349 | (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { | ||
350 | error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, | ||
351 | &taforkblks); | ||
352 | if (error) | ||
353 | goto out_trans_cancel; | ||
354 | } | ||
355 | |||
356 | /* | ||
357 | * Swap the data forks of the inodes | ||
358 | */ | ||
359 | ifp = &ip->i_df; | ||
360 | tifp = &tip->i_df; | ||
361 | *tempifp = *ifp; /* struct copy */ | ||
362 | *ifp = *tifp; /* struct copy */ | ||
363 | *tifp = *tempifp; /* struct copy */ | ||
364 | |||
365 | /* | ||
366 | * Fix the on-disk inode values | ||
367 | */ | ||
368 | tmp = (__uint64_t)ip->i_d.di_nblocks; | ||
369 | ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks; | ||
370 | tip->i_d.di_nblocks = tmp + taforkblks - aforkblks; | ||
371 | |||
372 | tmp = (__uint64_t) ip->i_d.di_nextents; | ||
373 | ip->i_d.di_nextents = tip->i_d.di_nextents; | ||
374 | tip->i_d.di_nextents = tmp; | ||
375 | |||
376 | tmp = (__uint64_t) ip->i_d.di_format; | ||
377 | ip->i_d.di_format = tip->i_d.di_format; | ||
378 | tip->i_d.di_format = tmp; | ||
379 | |||
380 | /* | ||
381 | * The extents in the source inode could still contain speculative | ||
382 | * preallocation beyond EOF (e.g. the file is open but not modified | ||
383 | * while defrag is in progress). In that case, we need to copy over the | ||
384 | * number of delalloc blocks the data fork in the source inode is | ||
385 | * tracking beyond EOF so that when the fork is truncated away when the | ||
386 | * temporary inode is unlinked we don't underrun the i_delayed_blks | ||
387 | * counter on that inode. | ||
388 | */ | ||
389 | ASSERT(tip->i_delayed_blks == 0); | ||
390 | tip->i_delayed_blks = ip->i_delayed_blks; | ||
391 | ip->i_delayed_blks = 0; | ||
392 | |||
393 | src_log_flags = XFS_ILOG_CORE; | ||
394 | switch (ip->i_d.di_format) { | ||
395 | case XFS_DINODE_FMT_EXTENTS: | ||
396 | /* If the extents fit in the inode, fix the | ||
397 | * pointer. Otherwise it's already NULL or | ||
398 | * pointing to the extent. | ||
399 | */ | ||
400 | if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) { | ||
401 | ifp->if_u1.if_extents = | ||
402 | ifp->if_u2.if_inline_ext; | ||
403 | } | ||
404 | src_log_flags |= XFS_ILOG_DEXT; | ||
405 | break; | ||
406 | case XFS_DINODE_FMT_BTREE: | ||
407 | src_log_flags |= XFS_ILOG_DBROOT; | ||
408 | break; | ||
409 | } | ||
410 | |||
411 | target_log_flags = XFS_ILOG_CORE; | ||
412 | switch (tip->i_d.di_format) { | ||
413 | case XFS_DINODE_FMT_EXTENTS: | ||
414 | /* If the extents fit in the inode, fix the | ||
415 | * pointer. Otherwise it's already NULL or | ||
416 | * pointing to the extent. | ||
417 | */ | ||
418 | if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) { | ||
419 | tifp->if_u1.if_extents = | ||
420 | tifp->if_u2.if_inline_ext; | ||
421 | } | ||
422 | target_log_flags |= XFS_ILOG_DEXT; | ||
423 | break; | ||
424 | case XFS_DINODE_FMT_BTREE: | ||
425 | target_log_flags |= XFS_ILOG_DBROOT; | ||
426 | break; | ||
427 | } | ||
428 | |||
429 | |||
430 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | ||
431 | xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | ||
432 | |||
433 | xfs_trans_log_inode(tp, ip, src_log_flags); | ||
434 | xfs_trans_log_inode(tp, tip, target_log_flags); | ||
435 | |||
436 | /* | ||
437 | * If this is a synchronous mount, make sure that the | ||
438 | * transaction goes to disk before returning to the user. | ||
439 | */ | ||
440 | if (mp->m_flags & XFS_MOUNT_WSYNC) | ||
441 | xfs_trans_set_sync(tp); | ||
442 | |||
443 | error = xfs_trans_commit(tp, 0); | ||
444 | |||
445 | trace_xfs_swap_extent_after(ip, 0); | ||
446 | trace_xfs_swap_extent_after(tip, 1); | ||
447 | out: | ||
448 | kmem_free(tempifp); | ||
449 | return error; | ||
450 | |||
451 | out_unlock: | ||
452 | xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | ||
453 | xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | ||
454 | goto out; | ||
455 | |||
456 | out_trans_cancel: | ||
457 | xfs_trans_cancel(tp, 0); | ||
458 | goto out_unlock; | ||
459 | } | ||
diff --git a/fs/xfs/xfs_dfrag.h b/fs/xfs/xfs_dfrag.h deleted file mode 100644 index 20bdd935c121..000000000000 --- a/fs/xfs/xfs_dfrag.h +++ /dev/null | |||
@@ -1,53 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_DFRAG_H__ | ||
19 | #define __XFS_DFRAG_H__ | ||
20 | |||
21 | /* | ||
22 | * Structure passed to xfs_swapext | ||
23 | */ | ||
24 | |||
25 | typedef struct xfs_swapext | ||
26 | { | ||
27 | __int64_t sx_version; /* version */ | ||
28 | __int64_t sx_fdtarget; /* fd of target file */ | ||
29 | __int64_t sx_fdtmp; /* fd of tmp file */ | ||
30 | xfs_off_t sx_offset; /* offset into file */ | ||
31 | xfs_off_t sx_length; /* leng from offset */ | ||
32 | char sx_pad[16]; /* pad space, unused */ | ||
33 | xfs_bstat_t sx_stat; /* stat of target b4 copy */ | ||
34 | } xfs_swapext_t; | ||
35 | |||
36 | /* | ||
37 | * Version flag | ||
38 | */ | ||
39 | #define XFS_SX_VERSION 0 | ||
40 | |||
41 | #ifdef __KERNEL__ | ||
42 | /* | ||
43 | * Prototypes for visible xfs_dfrag.c routines. | ||
44 | */ | ||
45 | |||
46 | /* | ||
47 | * Syscall interface for xfs_swapext | ||
48 | */ | ||
49 | int xfs_swapext(struct xfs_swapext *sx); | ||
50 | |||
51 | #endif /* __KERNEL__ */ | ||
52 | |||
53 | #endif /* __XFS_DFRAG_H__ */ | ||
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index 68c2e18f7e07..74b24b2ecd07 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h | |||
@@ -461,6 +461,21 @@ typedef struct xfs_handle { | |||
461 | + (handle).ha_fid.fid_len) | 461 | + (handle).ha_fid.fid_len) |
462 | 462 | ||
463 | /* | 463 | /* |
464 | * Structure passed to XFS_IOC_SWAPEXT | ||
465 | */ | ||
466 | typedef struct xfs_swapext | ||
467 | { | ||
468 | __int64_t sx_version; /* version */ | ||
469 | #define XFS_SX_VERSION 0 | ||
470 | __int64_t sx_fdtarget; /* fd of target file */ | ||
471 | __int64_t sx_fdtmp; /* fd of tmp file */ | ||
472 | xfs_off_t sx_offset; /* offset into file */ | ||
473 | xfs_off_t sx_length; /* leng from offset */ | ||
474 | char sx_pad[16]; /* pad space, unused */ | ||
475 | xfs_bstat_t sx_stat; /* stat of target b4 copy */ | ||
476 | } xfs_swapext_t; | ||
477 | |||
478 | /* | ||
464 | * Flags for going down operation | 479 | * Flags for going down operation |
465 | */ | 480 | */ |
466 | #define XFS_FSOP_GOING_FLAGS_DEFAULT 0x0 /* going down */ | 481 | #define XFS_FSOP_GOING_FLAGS_DEFAULT 0x0 /* going down */ |
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 557c7b8b2425..efb216de5f69 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c | |||
@@ -35,7 +35,6 @@ | |||
35 | #include "xfs_bmap.h" | 35 | #include "xfs_bmap.h" |
36 | #include "xfs_bmap_util.h" | 36 | #include "xfs_bmap_util.h" |
37 | #include "xfs_buf_item.h" | 37 | #include "xfs_buf_item.h" |
38 | #include "xfs_dfrag.h" | ||
39 | #include "xfs_fsops.h" | 38 | #include "xfs_fsops.h" |
40 | #include "xfs_discard.h" | 39 | #include "xfs_discard.h" |
41 | #include "xfs_quota.h" | 40 | #include "xfs_quota.h" |
@@ -1363,6 +1362,75 @@ xfs_ioc_getbmapx( | |||
1363 | return 0; | 1362 | return 0; |
1364 | } | 1363 | } |
1365 | 1364 | ||
1365 | int | ||
1366 | xfs_ioc_swapext( | ||
1367 | xfs_swapext_t *sxp) | ||
1368 | { | ||
1369 | xfs_inode_t *ip, *tip; | ||
1370 | struct fd f, tmp; | ||
1371 | int error = 0; | ||
1372 | |||
1373 | /* Pull information for the target fd */ | ||
1374 | f = fdget((int)sxp->sx_fdtarget); | ||
1375 | if (!f.file) { | ||
1376 | error = XFS_ERROR(EINVAL); | ||
1377 | goto out; | ||
1378 | } | ||
1379 | |||
1380 | if (!(f.file->f_mode & FMODE_WRITE) || | ||
1381 | !(f.file->f_mode & FMODE_READ) || | ||
1382 | (f.file->f_flags & O_APPEND)) { | ||
1383 | error = XFS_ERROR(EBADF); | ||
1384 | goto out_put_file; | ||
1385 | } | ||
1386 | |||
1387 | tmp = fdget((int)sxp->sx_fdtmp); | ||
1388 | if (!tmp.file) { | ||
1389 | error = XFS_ERROR(EINVAL); | ||
1390 | goto out_put_file; | ||
1391 | } | ||
1392 | |||
1393 | if (!(tmp.file->f_mode & FMODE_WRITE) || | ||
1394 | !(tmp.file->f_mode & FMODE_READ) || | ||
1395 | (tmp.file->f_flags & O_APPEND)) { | ||
1396 | error = XFS_ERROR(EBADF); | ||
1397 | goto out_put_tmp_file; | ||
1398 | } | ||
1399 | |||
1400 | if (IS_SWAPFILE(file_inode(f.file)) || | ||
1401 | IS_SWAPFILE(file_inode(tmp.file))) { | ||
1402 | error = XFS_ERROR(EINVAL); | ||
1403 | goto out_put_tmp_file; | ||
1404 | } | ||
1405 | |||
1406 | ip = XFS_I(file_inode(f.file)); | ||
1407 | tip = XFS_I(file_inode(tmp.file)); | ||
1408 | |||
1409 | if (ip->i_mount != tip->i_mount) { | ||
1410 | error = XFS_ERROR(EINVAL); | ||
1411 | goto out_put_tmp_file; | ||
1412 | } | ||
1413 | |||
1414 | if (ip->i_ino == tip->i_ino) { | ||
1415 | error = XFS_ERROR(EINVAL); | ||
1416 | goto out_put_tmp_file; | ||
1417 | } | ||
1418 | |||
1419 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { | ||
1420 | error = XFS_ERROR(EIO); | ||
1421 | goto out_put_tmp_file; | ||
1422 | } | ||
1423 | |||
1424 | error = xfs_swap_extents(ip, tip, sxp); | ||
1425 | |||
1426 | out_put_tmp_file: | ||
1427 | fdput(tmp); | ||
1428 | out_put_file: | ||
1429 | fdput(f); | ||
1430 | out: | ||
1431 | return error; | ||
1432 | } | ||
1433 | |||
1366 | /* | 1434 | /* |
1367 | * Note: some of the ioctl's return positive numbers as a | 1435 | * Note: some of the ioctl's return positive numbers as a |
1368 | * byte count indicating success, such as readlink_by_handle. | 1436 | * byte count indicating success, such as readlink_by_handle. |
@@ -1507,7 +1575,7 @@ xfs_file_ioctl( | |||
1507 | error = mnt_want_write_file(filp); | 1575 | error = mnt_want_write_file(filp); |
1508 | if (error) | 1576 | if (error) |
1509 | return error; | 1577 | return error; |
1510 | error = xfs_swapext(&sxp); | 1578 | error = xfs_ioc_swapext(&sxp); |
1511 | mnt_drop_write_file(filp); | 1579 | mnt_drop_write_file(filp); |
1512 | return -error; | 1580 | return -error; |
1513 | } | 1581 | } |
diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h index 1233dee4fef0..77c02c7900b6 100644 --- a/fs/xfs/xfs_ioctl.h +++ b/fs/xfs/xfs_ioctl.h | |||
@@ -27,6 +27,10 @@ xfs_ioc_space( | |||
27 | unsigned int cmd, | 27 | unsigned int cmd, |
28 | xfs_flock64_t *bf); | 28 | xfs_flock64_t *bf); |
29 | 29 | ||
30 | int | ||
31 | xfs_ioc_swapext( | ||
32 | xfs_swapext_t *sxp); | ||
33 | |||
30 | extern int | 34 | extern int |
31 | xfs_find_handle( | 35 | xfs_find_handle( |
32 | unsigned int cmd, | 36 | unsigned int cmd, |
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index 55a3072e7f56..d3ab9534307f 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c | |||
@@ -33,7 +33,6 @@ | |||
33 | #include "xfs_inode.h" | 33 | #include "xfs_inode.h" |
34 | #include "xfs_itable.h" | 34 | #include "xfs_itable.h" |
35 | #include "xfs_error.h" | 35 | #include "xfs_error.h" |
36 | #include "xfs_dfrag.h" | ||
37 | #include "xfs_fsops.h" | 36 | #include "xfs_fsops.h" |
38 | #include "xfs_alloc.h" | 37 | #include "xfs_alloc.h" |
39 | #include "xfs_rtalloc.h" | 38 | #include "xfs_rtalloc.h" |
@@ -643,7 +642,7 @@ xfs_file_compat_ioctl( | |||
643 | error = mnt_want_write_file(filp); | 642 | error = mnt_want_write_file(filp); |
644 | if (error) | 643 | if (error) |
645 | return error; | 644 | return error; |
646 | error = xfs_swapext(&sxp); | 645 | error = xfs_ioc_swapext(&sxp); |
647 | mnt_drop_write_file(filp); | 646 | mnt_drop_write_file(filp); |
648 | return -error; | 647 | return -error; |
649 | } | 648 | } |