diff options
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_aops.c')
| -rw-r--r-- | fs/xfs/linux-2.6/xfs_aops.c | 221 |
1 files changed, 172 insertions, 49 deletions
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 66abe36c1213..9083357f9e44 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
| @@ -39,6 +39,7 @@ | |||
| 39 | #include "xfs_iomap.h" | 39 | #include "xfs_iomap.h" |
| 40 | #include "xfs_vnodeops.h" | 40 | #include "xfs_vnodeops.h" |
| 41 | #include "xfs_trace.h" | 41 | #include "xfs_trace.h" |
| 42 | #include "xfs_bmap.h" | ||
| 42 | #include <linux/mpage.h> | 43 | #include <linux/mpage.h> |
| 43 | #include <linux/pagevec.h> | 44 | #include <linux/pagevec.h> |
| 44 | #include <linux/writeback.h> | 45 | #include <linux/writeback.h> |
| @@ -163,14 +164,17 @@ xfs_ioend_new_eof( | |||
| 163 | } | 164 | } |
| 164 | 165 | ||
| 165 | /* | 166 | /* |
| 166 | * Update on-disk file size now that data has been written to disk. | 167 | * Update on-disk file size now that data has been written to disk. The |
| 167 | * The current in-memory file size is i_size. If a write is beyond | 168 | * current in-memory file size is i_size. If a write is beyond eof i_new_size |
| 168 | * eof i_new_size will be the intended file size until i_size is | 169 | * will be the intended file size until i_size is updated. If this write does |
| 169 | * updated. If this write does not extend all the way to the valid | 170 | * not extend all the way to the valid file size then restrict this update to |
| 170 | * file size then restrict this update to the end of the write. | 171 | * the end of the write. |
| 172 | * | ||
| 173 | * This function does not block as blocking on the inode lock in IO completion | ||
| 174 | * can lead to IO completion order dependency deadlocks.. If it can't get the | ||
| 175 | * inode ilock it will return EAGAIN. Callers must handle this. | ||
| 171 | */ | 176 | */ |
| 172 | 177 | STATIC int | |
| 173 | STATIC void | ||
| 174 | xfs_setfilesize( | 178 | xfs_setfilesize( |
| 175 | xfs_ioend_t *ioend) | 179 | xfs_ioend_t *ioend) |
| 176 | { | 180 | { |
| @@ -181,16 +185,40 @@ xfs_setfilesize( | |||
| 181 | ASSERT(ioend->io_type != IOMAP_READ); | 185 | ASSERT(ioend->io_type != IOMAP_READ); |
| 182 | 186 | ||
| 183 | if (unlikely(ioend->io_error)) | 187 | if (unlikely(ioend->io_error)) |
| 184 | return; | 188 | return 0; |
| 189 | |||
| 190 | if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) | ||
| 191 | return EAGAIN; | ||
| 185 | 192 | ||
| 186 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
| 187 | isize = xfs_ioend_new_eof(ioend); | 193 | isize = xfs_ioend_new_eof(ioend); |
| 188 | if (isize) { | 194 | if (isize) { |
| 189 | ip->i_d.di_size = isize; | 195 | ip->i_d.di_size = isize; |
| 190 | xfs_mark_inode_dirty_sync(ip); | 196 | xfs_mark_inode_dirty(ip); |
| 191 | } | 197 | } |
| 192 | 198 | ||
| 193 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 199 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
| 200 | return 0; | ||
| 201 | } | ||
| 202 | |||
| 203 | /* | ||
| 204 | * Schedule IO completion handling on a xfsdatad if this was | ||
| 205 | * the final hold on this ioend. If we are asked to wait, | ||
| 206 | * flush the workqueue. | ||
| 207 | */ | ||
| 208 | STATIC void | ||
| 209 | xfs_finish_ioend( | ||
| 210 | xfs_ioend_t *ioend, | ||
| 211 | int wait) | ||
| 212 | { | ||
| 213 | if (atomic_dec_and_test(&ioend->io_remaining)) { | ||
| 214 | struct workqueue_struct *wq; | ||
| 215 | |||
| 216 | wq = (ioend->io_type == IOMAP_UNWRITTEN) ? | ||
| 217 | xfsconvertd_workqueue : xfsdatad_workqueue; | ||
| 218 | queue_work(wq, &ioend->io_work); | ||
| 219 | if (wait) | ||
| 220 | flush_workqueue(wq); | ||
| 221 | } | ||
| 194 | } | 222 | } |
| 195 | 223 | ||
| 196 | /* | 224 | /* |
| @@ -198,11 +226,11 @@ xfs_setfilesize( | |||
| 198 | */ | 226 | */ |
| 199 | STATIC void | 227 | STATIC void |
| 200 | xfs_end_io( | 228 | xfs_end_io( |
| 201 | struct work_struct *work) | 229 | struct work_struct *work) |
| 202 | { | 230 | { |
| 203 | xfs_ioend_t *ioend = | 231 | xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work); |
| 204 | container_of(work, xfs_ioend_t, io_work); | 232 | struct xfs_inode *ip = XFS_I(ioend->io_inode); |
| 205 | struct xfs_inode *ip = XFS_I(ioend->io_inode); | 233 | int error = 0; |
| 206 | 234 | ||
| 207 | /* | 235 | /* |
| 208 | * For unwritten extents we need to issue transactions to convert a | 236 | * For unwritten extents we need to issue transactions to convert a |
| @@ -210,7 +238,6 @@ xfs_end_io( | |||
| 210 | */ | 238 | */ |
| 211 | if (ioend->io_type == IOMAP_UNWRITTEN && | 239 | if (ioend->io_type == IOMAP_UNWRITTEN && |
| 212 | likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) { | 240 | likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) { |
| 213 | int error; | ||
| 214 | 241 | ||
| 215 | error = xfs_iomap_write_unwritten(ip, ioend->io_offset, | 242 | error = xfs_iomap_write_unwritten(ip, ioend->io_offset, |
| 216 | ioend->io_size); | 243 | ioend->io_size); |
| @@ -222,30 +249,23 @@ xfs_end_io( | |||
| 222 | * We might have to update the on-disk file size after extending | 249 | * We might have to update the on-disk file size after extending |
| 223 | * writes. | 250 | * writes. |
| 224 | */ | 251 | */ |
| 225 | if (ioend->io_type != IOMAP_READ) | 252 | if (ioend->io_type != IOMAP_READ) { |
| 226 | xfs_setfilesize(ioend); | 253 | error = xfs_setfilesize(ioend); |
| 227 | xfs_destroy_ioend(ioend); | 254 | ASSERT(!error || error == EAGAIN); |
| 228 | } | ||
| 229 | |||
| 230 | /* | ||
| 231 | * Schedule IO completion handling on a xfsdatad if this was | ||
| 232 | * the final hold on this ioend. If we are asked to wait, | ||
| 233 | * flush the workqueue. | ||
| 234 | */ | ||
| 235 | STATIC void | ||
| 236 | xfs_finish_ioend( | ||
| 237 | xfs_ioend_t *ioend, | ||
| 238 | int wait) | ||
| 239 | { | ||
| 240 | if (atomic_dec_and_test(&ioend->io_remaining)) { | ||
| 241 | struct workqueue_struct *wq; | ||
| 242 | |||
| 243 | wq = (ioend->io_type == IOMAP_UNWRITTEN) ? | ||
| 244 | xfsconvertd_workqueue : xfsdatad_workqueue; | ||
| 245 | queue_work(wq, &ioend->io_work); | ||
| 246 | if (wait) | ||
| 247 | flush_workqueue(wq); | ||
| 248 | } | 255 | } |
| 256 | |||
| 257 | /* | ||
| 258 | * If we didn't complete processing of the ioend, requeue it to the | ||
| 259 | * tail of the workqueue for another attempt later. Otherwise destroy | ||
| 260 | * it. | ||
| 261 | */ | ||
| 262 | if (error == EAGAIN) { | ||
| 263 | atomic_inc(&ioend->io_remaining); | ||
| 264 | xfs_finish_ioend(ioend, 0); | ||
| 265 | /* ensure we don't spin on blocked ioends */ | ||
| 266 | delay(1); | ||
| 267 | } else | ||
| 268 | xfs_destroy_ioend(ioend); | ||
| 249 | } | 269 | } |
| 250 | 270 | ||
| 251 | /* | 271 | /* |
| @@ -341,7 +361,7 @@ xfs_submit_ioend_bio( | |||
| 341 | * but don't update the inode size until I/O completion. | 361 | * but don't update the inode size until I/O completion. |
| 342 | */ | 362 | */ |
| 343 | if (xfs_ioend_new_eof(ioend)) | 363 | if (xfs_ioend_new_eof(ioend)) |
| 344 | xfs_mark_inode_dirty_sync(XFS_I(ioend->io_inode)); | 364 | xfs_mark_inode_dirty(XFS_I(ioend->io_inode)); |
| 345 | 365 | ||
| 346 | submit_bio(wbc->sync_mode == WB_SYNC_ALL ? | 366 | submit_bio(wbc->sync_mode == WB_SYNC_ALL ? |
| 347 | WRITE_SYNC_PLUG : WRITE, bio); | 367 | WRITE_SYNC_PLUG : WRITE, bio); |
| @@ -874,6 +894,118 @@ xfs_cluster_write( | |||
| 874 | } | 894 | } |
| 875 | } | 895 | } |
| 876 | 896 | ||
| 897 | STATIC void | ||
| 898 | xfs_vm_invalidatepage( | ||
| 899 | struct page *page, | ||
| 900 | unsigned long offset) | ||
| 901 | { | ||
| 902 | trace_xfs_invalidatepage(page->mapping->host, page, offset); | ||
| 903 | block_invalidatepage(page, offset); | ||
| 904 | } | ||
| 905 | |||
| 906 | /* | ||
| 907 | * If the page has delalloc buffers on it, we need to punch them out before we | ||
| 908 | * invalidate the page. If we don't, we leave a stale delalloc mapping on the | ||
| 909 | * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read | ||
| 910 | * is done on that same region - the delalloc extent is returned when none is | ||
| 911 | * supposed to be there. | ||
| 912 | * | ||
| 913 | * We prevent this by truncating away the delalloc regions on the page before | ||
| 914 | * invalidating it. Because they are delalloc, we can do this without needing a | ||
| 915 | * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this | ||
| 916 | * truncation without a transaction as there is no space left for block | ||
| 917 | * reservation (typically why we see a ENOSPC in writeback). | ||
| 918 | * | ||
| 919 | * This is not a performance critical path, so for now just do the punching a | ||
| 920 | * buffer head at a time. | ||
| 921 | */ | ||
| 922 | STATIC void | ||
| 923 | xfs_aops_discard_page( | ||
| 924 | struct page *page) | ||
| 925 | { | ||
| 926 | struct inode *inode = page->mapping->host; | ||
| 927 | struct xfs_inode *ip = XFS_I(inode); | ||
| 928 | struct buffer_head *bh, *head; | ||
| 929 | loff_t offset = page_offset(page); | ||
| 930 | ssize_t len = 1 << inode->i_blkbits; | ||
| 931 | |||
| 932 | if (!xfs_is_delayed_page(page, IOMAP_DELAY)) | ||
| 933 | goto out_invalidate; | ||
| 934 | |||
| 935 | xfs_fs_cmn_err(CE_ALERT, ip->i_mount, | ||
| 936 | "page discard on page %p, inode 0x%llx, offset %llu.", | ||
| 937 | page, ip->i_ino, offset); | ||
| 938 | |||
| 939 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
| 940 | bh = head = page_buffers(page); | ||
| 941 | do { | ||
| 942 | int done; | ||
| 943 | xfs_fileoff_t offset_fsb; | ||
| 944 | xfs_bmbt_irec_t imap; | ||
| 945 | int nimaps = 1; | ||
| 946 | int error; | ||
| 947 | xfs_fsblock_t firstblock; | ||
| 948 | xfs_bmap_free_t flist; | ||
| 949 | |||
| 950 | if (!buffer_delay(bh)) | ||
| 951 | goto next_buffer; | ||
| 952 | |||
| 953 | offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); | ||
| 954 | |||
| 955 | /* | ||
| 956 | * Map the range first and check that it is a delalloc extent | ||
| 957 | * before trying to unmap the range. Otherwise we will be | ||
| 958 | * trying to remove a real extent (which requires a | ||
| 959 | * transaction) or a hole, which is probably a bad idea... | ||
| 960 | */ | ||
| 961 | error = xfs_bmapi(NULL, ip, offset_fsb, 1, | ||
| 962 | XFS_BMAPI_ENTIRE, NULL, 0, &imap, | ||
| 963 | &nimaps, NULL, NULL); | ||
| 964 | |||
| 965 | if (error) { | ||
| 966 | /* something screwed, just bail */ | ||
| 967 | xfs_fs_cmn_err(CE_ALERT, ip->i_mount, | ||
| 968 | "page discard failed delalloc mapping lookup."); | ||
| 969 | break; | ||
| 970 | } | ||
| 971 | if (!nimaps) { | ||
| 972 | /* nothing there */ | ||
| 973 | goto next_buffer; | ||
| 974 | } | ||
| 975 | if (imap.br_startblock != DELAYSTARTBLOCK) { | ||
| 976 | /* been converted, ignore */ | ||
| 977 | goto next_buffer; | ||
| 978 | } | ||
| 979 | WARN_ON(imap.br_blockcount == 0); | ||
| 980 | |||
| 981 | /* | ||
| 982 | * Note: while we initialise the firstblock/flist pair, they | ||
| 983 | * should never be used because blocks should never be | ||
| 984 | * allocated or freed for a delalloc extent and hence we need | ||
| 985 | * don't cancel or finish them after the xfs_bunmapi() call. | ||
| 986 | */ | ||
| 987 | xfs_bmap_init(&flist, &firstblock); | ||
| 988 | error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock, | ||
| 989 | &flist, NULL, &done); | ||
| 990 | |||
| 991 | ASSERT(!flist.xbf_count && !flist.xbf_first); | ||
| 992 | if (error) { | ||
| 993 | /* something screwed, just bail */ | ||
| 994 | xfs_fs_cmn_err(CE_ALERT, ip->i_mount, | ||
| 995 | "page discard unable to remove delalloc mapping."); | ||
| 996 | break; | ||
| 997 | } | ||
| 998 | next_buffer: | ||
| 999 | offset += len; | ||
| 1000 | |||
| 1001 | } while ((bh = bh->b_this_page) != head); | ||
| 1002 | |||
| 1003 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
| 1004 | out_invalidate: | ||
| 1005 | xfs_vm_invalidatepage(page, 0); | ||
| 1006 | return; | ||
| 1007 | } | ||
| 1008 | |||
| 877 | /* | 1009 | /* |
| 878 | * Calling this without startio set means we are being asked to make a dirty | 1010 | * Calling this without startio set means we are being asked to make a dirty |
| 879 | * page ready for freeing it's buffers. When called with startio set then | 1011 | * page ready for freeing it's buffers. When called with startio set then |
| @@ -1125,7 +1257,7 @@ error: | |||
| 1125 | */ | 1257 | */ |
| 1126 | if (err != -EAGAIN) { | 1258 | if (err != -EAGAIN) { |
| 1127 | if (!unmapped) | 1259 | if (!unmapped) |
| 1128 | block_invalidatepage(page, 0); | 1260 | xfs_aops_discard_page(page); |
| 1129 | ClearPageUptodate(page); | 1261 | ClearPageUptodate(page); |
| 1130 | } | 1262 | } |
| 1131 | return err; | 1263 | return err; |
| @@ -1535,15 +1667,6 @@ xfs_vm_readpages( | |||
| 1535 | return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); | 1667 | return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); |
| 1536 | } | 1668 | } |
| 1537 | 1669 | ||
| 1538 | STATIC void | ||
| 1539 | xfs_vm_invalidatepage( | ||
| 1540 | struct page *page, | ||
| 1541 | unsigned long offset) | ||
| 1542 | { | ||
| 1543 | trace_xfs_invalidatepage(page->mapping->host, page, offset); | ||
| 1544 | block_invalidatepage(page, offset); | ||
| 1545 | } | ||
| 1546 | |||
| 1547 | const struct address_space_operations xfs_address_space_operations = { | 1670 | const struct address_space_operations xfs_address_space_operations = { |
| 1548 | .readpage = xfs_vm_readpage, | 1671 | .readpage = xfs_vm_readpage, |
| 1549 | .readpages = xfs_vm_readpages, | 1672 | .readpages = xfs_vm_readpages, |
