diff options
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_aops.c')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_aops.c | 221 |
1 files changed, 172 insertions, 49 deletions
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 66abe36c1213..9083357f9e44 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include "xfs_iomap.h" | 39 | #include "xfs_iomap.h" |
40 | #include "xfs_vnodeops.h" | 40 | #include "xfs_vnodeops.h" |
41 | #include "xfs_trace.h" | 41 | #include "xfs_trace.h" |
42 | #include "xfs_bmap.h" | ||
42 | #include <linux/mpage.h> | 43 | #include <linux/mpage.h> |
43 | #include <linux/pagevec.h> | 44 | #include <linux/pagevec.h> |
44 | #include <linux/writeback.h> | 45 | #include <linux/writeback.h> |
@@ -163,14 +164,17 @@ xfs_ioend_new_eof( | |||
163 | } | 164 | } |
164 | 165 | ||
165 | /* | 166 | /* |
166 | * Update on-disk file size now that data has been written to disk. | 167 | * Update on-disk file size now that data has been written to disk. The |
167 | * The current in-memory file size is i_size. If a write is beyond | 168 | * current in-memory file size is i_size. If a write is beyond eof i_new_size |
168 | * eof i_new_size will be the intended file size until i_size is | 169 | * will be the intended file size until i_size is updated. If this write does |
169 | * updated. If this write does not extend all the way to the valid | 170 | * not extend all the way to the valid file size then restrict this update to |
170 | * file size then restrict this update to the end of the write. | 171 | * the end of the write. |
172 | * | ||
173 | * This function does not block as blocking on the inode lock in IO completion | ||
174 | * can lead to IO completion order dependency deadlocks.. If it can't get the | ||
175 | * inode ilock it will return EAGAIN. Callers must handle this. | ||
171 | */ | 176 | */ |
172 | 177 | STATIC int | |
173 | STATIC void | ||
174 | xfs_setfilesize( | 178 | xfs_setfilesize( |
175 | xfs_ioend_t *ioend) | 179 | xfs_ioend_t *ioend) |
176 | { | 180 | { |
@@ -181,16 +185,40 @@ xfs_setfilesize( | |||
181 | ASSERT(ioend->io_type != IOMAP_READ); | 185 | ASSERT(ioend->io_type != IOMAP_READ); |
182 | 186 | ||
183 | if (unlikely(ioend->io_error)) | 187 | if (unlikely(ioend->io_error)) |
184 | return; | 188 | return 0; |
189 | |||
190 | if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) | ||
191 | return EAGAIN; | ||
185 | 192 | ||
186 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
187 | isize = xfs_ioend_new_eof(ioend); | 193 | isize = xfs_ioend_new_eof(ioend); |
188 | if (isize) { | 194 | if (isize) { |
189 | ip->i_d.di_size = isize; | 195 | ip->i_d.di_size = isize; |
190 | xfs_mark_inode_dirty_sync(ip); | 196 | xfs_mark_inode_dirty(ip); |
191 | } | 197 | } |
192 | 198 | ||
193 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 199 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
200 | return 0; | ||
201 | } | ||
202 | |||
203 | /* | ||
204 | * Schedule IO completion handling on a xfsdatad if this was | ||
205 | * the final hold on this ioend. If we are asked to wait, | ||
206 | * flush the workqueue. | ||
207 | */ | ||
208 | STATIC void | ||
209 | xfs_finish_ioend( | ||
210 | xfs_ioend_t *ioend, | ||
211 | int wait) | ||
212 | { | ||
213 | if (atomic_dec_and_test(&ioend->io_remaining)) { | ||
214 | struct workqueue_struct *wq; | ||
215 | |||
216 | wq = (ioend->io_type == IOMAP_UNWRITTEN) ? | ||
217 | xfsconvertd_workqueue : xfsdatad_workqueue; | ||
218 | queue_work(wq, &ioend->io_work); | ||
219 | if (wait) | ||
220 | flush_workqueue(wq); | ||
221 | } | ||
194 | } | 222 | } |
195 | 223 | ||
196 | /* | 224 | /* |
@@ -198,11 +226,11 @@ xfs_setfilesize( | |||
198 | */ | 226 | */ |
199 | STATIC void | 227 | STATIC void |
200 | xfs_end_io( | 228 | xfs_end_io( |
201 | struct work_struct *work) | 229 | struct work_struct *work) |
202 | { | 230 | { |
203 | xfs_ioend_t *ioend = | 231 | xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work); |
204 | container_of(work, xfs_ioend_t, io_work); | 232 | struct xfs_inode *ip = XFS_I(ioend->io_inode); |
205 | struct xfs_inode *ip = XFS_I(ioend->io_inode); | 233 | int error = 0; |
206 | 234 | ||
207 | /* | 235 | /* |
208 | * For unwritten extents we need to issue transactions to convert a | 236 | * For unwritten extents we need to issue transactions to convert a |
@@ -210,7 +238,6 @@ xfs_end_io( | |||
210 | */ | 238 | */ |
211 | if (ioend->io_type == IOMAP_UNWRITTEN && | 239 | if (ioend->io_type == IOMAP_UNWRITTEN && |
212 | likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) { | 240 | likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) { |
213 | int error; | ||
214 | 241 | ||
215 | error = xfs_iomap_write_unwritten(ip, ioend->io_offset, | 242 | error = xfs_iomap_write_unwritten(ip, ioend->io_offset, |
216 | ioend->io_size); | 243 | ioend->io_size); |
@@ -222,30 +249,23 @@ xfs_end_io( | |||
222 | * We might have to update the on-disk file size after extending | 249 | * We might have to update the on-disk file size after extending |
223 | * writes. | 250 | * writes. |
224 | */ | 251 | */ |
225 | if (ioend->io_type != IOMAP_READ) | 252 | if (ioend->io_type != IOMAP_READ) { |
226 | xfs_setfilesize(ioend); | 253 | error = xfs_setfilesize(ioend); |
227 | xfs_destroy_ioend(ioend); | 254 | ASSERT(!error || error == EAGAIN); |
228 | } | ||
229 | |||
230 | /* | ||
231 | * Schedule IO completion handling on a xfsdatad if this was | ||
232 | * the final hold on this ioend. If we are asked to wait, | ||
233 | * flush the workqueue. | ||
234 | */ | ||
235 | STATIC void | ||
236 | xfs_finish_ioend( | ||
237 | xfs_ioend_t *ioend, | ||
238 | int wait) | ||
239 | { | ||
240 | if (atomic_dec_and_test(&ioend->io_remaining)) { | ||
241 | struct workqueue_struct *wq; | ||
242 | |||
243 | wq = (ioend->io_type == IOMAP_UNWRITTEN) ? | ||
244 | xfsconvertd_workqueue : xfsdatad_workqueue; | ||
245 | queue_work(wq, &ioend->io_work); | ||
246 | if (wait) | ||
247 | flush_workqueue(wq); | ||
248 | } | 255 | } |
256 | |||
257 | /* | ||
258 | * If we didn't complete processing of the ioend, requeue it to the | ||
259 | * tail of the workqueue for another attempt later. Otherwise destroy | ||
260 | * it. | ||
261 | */ | ||
262 | if (error == EAGAIN) { | ||
263 | atomic_inc(&ioend->io_remaining); | ||
264 | xfs_finish_ioend(ioend, 0); | ||
265 | /* ensure we don't spin on blocked ioends */ | ||
266 | delay(1); | ||
267 | } else | ||
268 | xfs_destroy_ioend(ioend); | ||
249 | } | 269 | } |
250 | 270 | ||
251 | /* | 271 | /* |
@@ -341,7 +361,7 @@ xfs_submit_ioend_bio( | |||
341 | * but don't update the inode size until I/O completion. | 361 | * but don't update the inode size until I/O completion. |
342 | */ | 362 | */ |
343 | if (xfs_ioend_new_eof(ioend)) | 363 | if (xfs_ioend_new_eof(ioend)) |
344 | xfs_mark_inode_dirty_sync(XFS_I(ioend->io_inode)); | 364 | xfs_mark_inode_dirty(XFS_I(ioend->io_inode)); |
345 | 365 | ||
346 | submit_bio(wbc->sync_mode == WB_SYNC_ALL ? | 366 | submit_bio(wbc->sync_mode == WB_SYNC_ALL ? |
347 | WRITE_SYNC_PLUG : WRITE, bio); | 367 | WRITE_SYNC_PLUG : WRITE, bio); |
@@ -874,6 +894,118 @@ xfs_cluster_write( | |||
874 | } | 894 | } |
875 | } | 895 | } |
876 | 896 | ||
897 | STATIC void | ||
898 | xfs_vm_invalidatepage( | ||
899 | struct page *page, | ||
900 | unsigned long offset) | ||
901 | { | ||
902 | trace_xfs_invalidatepage(page->mapping->host, page, offset); | ||
903 | block_invalidatepage(page, offset); | ||
904 | } | ||
905 | |||
906 | /* | ||
907 | * If the page has delalloc buffers on it, we need to punch them out before we | ||
908 | * invalidate the page. If we don't, we leave a stale delalloc mapping on the | ||
909 | * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read | ||
910 | * is done on that same region - the delalloc extent is returned when none is | ||
911 | * supposed to be there. | ||
912 | * | ||
913 | * We prevent this by truncating away the delalloc regions on the page before | ||
914 | * invalidating it. Because they are delalloc, we can do this without needing a | ||
915 | * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this | ||
916 | * truncation without a transaction as there is no space left for block | ||
917 | * reservation (typically why we see a ENOSPC in writeback). | ||
918 | * | ||
919 | * This is not a performance critical path, so for now just do the punching a | ||
920 | * buffer head at a time. | ||
921 | */ | ||
922 | STATIC void | ||
923 | xfs_aops_discard_page( | ||
924 | struct page *page) | ||
925 | { | ||
926 | struct inode *inode = page->mapping->host; | ||
927 | struct xfs_inode *ip = XFS_I(inode); | ||
928 | struct buffer_head *bh, *head; | ||
929 | loff_t offset = page_offset(page); | ||
930 | ssize_t len = 1 << inode->i_blkbits; | ||
931 | |||
932 | if (!xfs_is_delayed_page(page, IOMAP_DELAY)) | ||
933 | goto out_invalidate; | ||
934 | |||
935 | xfs_fs_cmn_err(CE_ALERT, ip->i_mount, | ||
936 | "page discard on page %p, inode 0x%llx, offset %llu.", | ||
937 | page, ip->i_ino, offset); | ||
938 | |||
939 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
940 | bh = head = page_buffers(page); | ||
941 | do { | ||
942 | int done; | ||
943 | xfs_fileoff_t offset_fsb; | ||
944 | xfs_bmbt_irec_t imap; | ||
945 | int nimaps = 1; | ||
946 | int error; | ||
947 | xfs_fsblock_t firstblock; | ||
948 | xfs_bmap_free_t flist; | ||
949 | |||
950 | if (!buffer_delay(bh)) | ||
951 | goto next_buffer; | ||
952 | |||
953 | offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); | ||
954 | |||
955 | /* | ||
956 | * Map the range first and check that it is a delalloc extent | ||
957 | * before trying to unmap the range. Otherwise we will be | ||
958 | * trying to remove a real extent (which requires a | ||
959 | * transaction) or a hole, which is probably a bad idea... | ||
960 | */ | ||
961 | error = xfs_bmapi(NULL, ip, offset_fsb, 1, | ||
962 | XFS_BMAPI_ENTIRE, NULL, 0, &imap, | ||
963 | &nimaps, NULL, NULL); | ||
964 | |||
965 | if (error) { | ||
966 | /* something screwed, just bail */ | ||
967 | xfs_fs_cmn_err(CE_ALERT, ip->i_mount, | ||
968 | "page discard failed delalloc mapping lookup."); | ||
969 | break; | ||
970 | } | ||
971 | if (!nimaps) { | ||
972 | /* nothing there */ | ||
973 | goto next_buffer; | ||
974 | } | ||
975 | if (imap.br_startblock != DELAYSTARTBLOCK) { | ||
976 | /* been converted, ignore */ | ||
977 | goto next_buffer; | ||
978 | } | ||
979 | WARN_ON(imap.br_blockcount == 0); | ||
980 | |||
981 | /* | ||
982 | * Note: while we initialise the firstblock/flist pair, they | ||
983 | * should never be used because blocks should never be | ||
984 | * allocated or freed for a delalloc extent and hence we need | ||
985 | * don't cancel or finish them after the xfs_bunmapi() call. | ||
986 | */ | ||
987 | xfs_bmap_init(&flist, &firstblock); | ||
988 | error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock, | ||
989 | &flist, NULL, &done); | ||
990 | |||
991 | ASSERT(!flist.xbf_count && !flist.xbf_first); | ||
992 | if (error) { | ||
993 | /* something screwed, just bail */ | ||
994 | xfs_fs_cmn_err(CE_ALERT, ip->i_mount, | ||
995 | "page discard unable to remove delalloc mapping."); | ||
996 | break; | ||
997 | } | ||
998 | next_buffer: | ||
999 | offset += len; | ||
1000 | |||
1001 | } while ((bh = bh->b_this_page) != head); | ||
1002 | |||
1003 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1004 | out_invalidate: | ||
1005 | xfs_vm_invalidatepage(page, 0); | ||
1006 | return; | ||
1007 | } | ||
1008 | |||
877 | /* | 1009 | /* |
878 | * Calling this without startio set means we are being asked to make a dirty | 1010 | * Calling this without startio set means we are being asked to make a dirty |
879 | * page ready for freeing it's buffers. When called with startio set then | 1011 | * page ready for freeing it's buffers. When called with startio set then |
@@ -1125,7 +1257,7 @@ error: | |||
1125 | */ | 1257 | */ |
1126 | if (err != -EAGAIN) { | 1258 | if (err != -EAGAIN) { |
1127 | if (!unmapped) | 1259 | if (!unmapped) |
1128 | block_invalidatepage(page, 0); | 1260 | xfs_aops_discard_page(page); |
1129 | ClearPageUptodate(page); | 1261 | ClearPageUptodate(page); |
1130 | } | 1262 | } |
1131 | return err; | 1263 | return err; |
@@ -1535,15 +1667,6 @@ xfs_vm_readpages( | |||
1535 | return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); | 1667 | return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); |
1536 | } | 1668 | } |
1537 | 1669 | ||
1538 | STATIC void | ||
1539 | xfs_vm_invalidatepage( | ||
1540 | struct page *page, | ||
1541 | unsigned long offset) | ||
1542 | { | ||
1543 | trace_xfs_invalidatepage(page->mapping->host, page, offset); | ||
1544 | block_invalidatepage(page, offset); | ||
1545 | } | ||
1546 | |||
1547 | const struct address_space_operations xfs_address_space_operations = { | 1670 | const struct address_space_operations xfs_address_space_operations = { |
1548 | .readpage = xfs_vm_readpage, | 1671 | .readpage = xfs_vm_readpage, |
1549 | .readpages = xfs_vm_readpages, | 1672 | .readpages = xfs_vm_readpages, |