aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/linux-2.6/xfs_aops.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_aops.c')
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c221
1 files changed, 172 insertions, 49 deletions
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 66abe36c1213..9083357f9e44 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -39,6 +39,7 @@
39#include "xfs_iomap.h" 39#include "xfs_iomap.h"
40#include "xfs_vnodeops.h" 40#include "xfs_vnodeops.h"
41#include "xfs_trace.h" 41#include "xfs_trace.h"
42#include "xfs_bmap.h"
42#include <linux/mpage.h> 43#include <linux/mpage.h>
43#include <linux/pagevec.h> 44#include <linux/pagevec.h>
44#include <linux/writeback.h> 45#include <linux/writeback.h>
@@ -163,14 +164,17 @@ xfs_ioend_new_eof(
163} 164}
164 165
165/* 166/*
166 * Update on-disk file size now that data has been written to disk. 167 * Update on-disk file size now that data has been written to disk. The
167 * The current in-memory file size is i_size. If a write is beyond 168 * current in-memory file size is i_size. If a write is beyond eof i_new_size
168 * eof i_new_size will be the intended file size until i_size is 169 * will be the intended file size until i_size is updated. If this write does
169 * updated. If this write does not extend all the way to the valid 170 * not extend all the way to the valid file size then restrict this update to
170 * file size then restrict this update to the end of the write. 171 * the end of the write.
172 *
173 * This function does not block as blocking on the inode lock in IO completion
174 * can lead to IO completion order dependency deadlocks.. If it can't get the
175 * inode ilock it will return EAGAIN. Callers must handle this.
171 */ 176 */
172 177STATIC int
173STATIC void
174xfs_setfilesize( 178xfs_setfilesize(
175 xfs_ioend_t *ioend) 179 xfs_ioend_t *ioend)
176{ 180{
@@ -181,16 +185,40 @@ xfs_setfilesize(
181 ASSERT(ioend->io_type != IOMAP_READ); 185 ASSERT(ioend->io_type != IOMAP_READ);
182 186
183 if (unlikely(ioend->io_error)) 187 if (unlikely(ioend->io_error))
184 return; 188 return 0;
189
190 if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
191 return EAGAIN;
185 192
186 xfs_ilock(ip, XFS_ILOCK_EXCL);
187 isize = xfs_ioend_new_eof(ioend); 193 isize = xfs_ioend_new_eof(ioend);
188 if (isize) { 194 if (isize) {
189 ip->i_d.di_size = isize; 195 ip->i_d.di_size = isize;
190 xfs_mark_inode_dirty_sync(ip); 196 xfs_mark_inode_dirty(ip);
191 } 197 }
192 198
193 xfs_iunlock(ip, XFS_ILOCK_EXCL); 199 xfs_iunlock(ip, XFS_ILOCK_EXCL);
200 return 0;
201}
202
203/*
204 * Schedule IO completion handling on a xfsdatad if this was
205 * the final hold on this ioend. If we are asked to wait,
206 * flush the workqueue.
207 */
208STATIC void
209xfs_finish_ioend(
210 xfs_ioend_t *ioend,
211 int wait)
212{
213 if (atomic_dec_and_test(&ioend->io_remaining)) {
214 struct workqueue_struct *wq;
215
216 wq = (ioend->io_type == IOMAP_UNWRITTEN) ?
217 xfsconvertd_workqueue : xfsdatad_workqueue;
218 queue_work(wq, &ioend->io_work);
219 if (wait)
220 flush_workqueue(wq);
221 }
194} 222}
195 223
196/* 224/*
@@ -198,11 +226,11 @@ xfs_setfilesize(
198 */ 226 */
199STATIC void 227STATIC void
200xfs_end_io( 228xfs_end_io(
201 struct work_struct *work) 229 struct work_struct *work)
202{ 230{
203 xfs_ioend_t *ioend = 231 xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work);
204 container_of(work, xfs_ioend_t, io_work); 232 struct xfs_inode *ip = XFS_I(ioend->io_inode);
205 struct xfs_inode *ip = XFS_I(ioend->io_inode); 233 int error = 0;
206 234
207 /* 235 /*
208 * For unwritten extents we need to issue transactions to convert a 236 * For unwritten extents we need to issue transactions to convert a
@@ -210,7 +238,6 @@ xfs_end_io(
210 */ 238 */
211 if (ioend->io_type == IOMAP_UNWRITTEN && 239 if (ioend->io_type == IOMAP_UNWRITTEN &&
212 likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) { 240 likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) {
213 int error;
214 241
215 error = xfs_iomap_write_unwritten(ip, ioend->io_offset, 242 error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
216 ioend->io_size); 243 ioend->io_size);
@@ -222,30 +249,23 @@ xfs_end_io(
222 * We might have to update the on-disk file size after extending 249 * We might have to update the on-disk file size after extending
223 * writes. 250 * writes.
224 */ 251 */
225 if (ioend->io_type != IOMAP_READ) 252 if (ioend->io_type != IOMAP_READ) {
226 xfs_setfilesize(ioend); 253 error = xfs_setfilesize(ioend);
227 xfs_destroy_ioend(ioend); 254 ASSERT(!error || error == EAGAIN);
228}
229
230/*
231 * Schedule IO completion handling on a xfsdatad if this was
232 * the final hold on this ioend. If we are asked to wait,
233 * flush the workqueue.
234 */
235STATIC void
236xfs_finish_ioend(
237 xfs_ioend_t *ioend,
238 int wait)
239{
240 if (atomic_dec_and_test(&ioend->io_remaining)) {
241 struct workqueue_struct *wq;
242
243 wq = (ioend->io_type == IOMAP_UNWRITTEN) ?
244 xfsconvertd_workqueue : xfsdatad_workqueue;
245 queue_work(wq, &ioend->io_work);
246 if (wait)
247 flush_workqueue(wq);
248 } 255 }
256
257 /*
258 * If we didn't complete processing of the ioend, requeue it to the
259 * tail of the workqueue for another attempt later. Otherwise destroy
260 * it.
261 */
262 if (error == EAGAIN) {
263 atomic_inc(&ioend->io_remaining);
264 xfs_finish_ioend(ioend, 0);
265 /* ensure we don't spin on blocked ioends */
266 delay(1);
267 } else
268 xfs_destroy_ioend(ioend);
249} 269}
250 270
251/* 271/*
@@ -341,7 +361,7 @@ xfs_submit_ioend_bio(
341 * but don't update the inode size until I/O completion. 361 * but don't update the inode size until I/O completion.
342 */ 362 */
343 if (xfs_ioend_new_eof(ioend)) 363 if (xfs_ioend_new_eof(ioend))
344 xfs_mark_inode_dirty_sync(XFS_I(ioend->io_inode)); 364 xfs_mark_inode_dirty(XFS_I(ioend->io_inode));
345 365
346 submit_bio(wbc->sync_mode == WB_SYNC_ALL ? 366 submit_bio(wbc->sync_mode == WB_SYNC_ALL ?
347 WRITE_SYNC_PLUG : WRITE, bio); 367 WRITE_SYNC_PLUG : WRITE, bio);
@@ -874,6 +894,118 @@ xfs_cluster_write(
874 } 894 }
875} 895}
876 896
897STATIC void
898xfs_vm_invalidatepage(
899 struct page *page,
900 unsigned long offset)
901{
902 trace_xfs_invalidatepage(page->mapping->host, page, offset);
903 block_invalidatepage(page, offset);
904}
905
906/*
907 * If the page has delalloc buffers on it, we need to punch them out before we
908 * invalidate the page. If we don't, we leave a stale delalloc mapping on the
909 * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read
910 * is done on that same region - the delalloc extent is returned when none is
911 * supposed to be there.
912 *
913 * We prevent this by truncating away the delalloc regions on the page before
914 * invalidating it. Because they are delalloc, we can do this without needing a
915 * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this
916 * truncation without a transaction as there is no space left for block
917 * reservation (typically why we see a ENOSPC in writeback).
918 *
919 * This is not a performance critical path, so for now just do the punching a
920 * buffer head at a time.
921 */
922STATIC void
923xfs_aops_discard_page(
924 struct page *page)
925{
926 struct inode *inode = page->mapping->host;
927 struct xfs_inode *ip = XFS_I(inode);
928 struct buffer_head *bh, *head;
929 loff_t offset = page_offset(page);
930 ssize_t len = 1 << inode->i_blkbits;
931
932 if (!xfs_is_delayed_page(page, IOMAP_DELAY))
933 goto out_invalidate;
934
935 xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
936 "page discard on page %p, inode 0x%llx, offset %llu.",
937 page, ip->i_ino, offset);
938
939 xfs_ilock(ip, XFS_ILOCK_EXCL);
940 bh = head = page_buffers(page);
941 do {
942 int done;
943 xfs_fileoff_t offset_fsb;
944 xfs_bmbt_irec_t imap;
945 int nimaps = 1;
946 int error;
947 xfs_fsblock_t firstblock;
948 xfs_bmap_free_t flist;
949
950 if (!buffer_delay(bh))
951 goto next_buffer;
952
953 offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
954
955 /*
956 * Map the range first and check that it is a delalloc extent
957 * before trying to unmap the range. Otherwise we will be
958 * trying to remove a real extent (which requires a
959 * transaction) or a hole, which is probably a bad idea...
960 */
961 error = xfs_bmapi(NULL, ip, offset_fsb, 1,
962 XFS_BMAPI_ENTIRE, NULL, 0, &imap,
963 &nimaps, NULL, NULL);
964
965 if (error) {
966 /* something screwed, just bail */
967 xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
968 "page discard failed delalloc mapping lookup.");
969 break;
970 }
971 if (!nimaps) {
972 /* nothing there */
973 goto next_buffer;
974 }
975 if (imap.br_startblock != DELAYSTARTBLOCK) {
976 /* been converted, ignore */
977 goto next_buffer;
978 }
979 WARN_ON(imap.br_blockcount == 0);
980
981 /*
982 * Note: while we initialise the firstblock/flist pair, they
983 * should never be used because blocks should never be
984 * allocated or freed for a delalloc extent and hence we need
985 * don't cancel or finish them after the xfs_bunmapi() call.
986 */
987 xfs_bmap_init(&flist, &firstblock);
988 error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock,
989 &flist, NULL, &done);
990
991 ASSERT(!flist.xbf_count && !flist.xbf_first);
992 if (error) {
993 /* something screwed, just bail */
994 xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
995 "page discard unable to remove delalloc mapping.");
996 break;
997 }
998next_buffer:
999 offset += len;
1000
1001 } while ((bh = bh->b_this_page) != head);
1002
1003 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1004out_invalidate:
1005 xfs_vm_invalidatepage(page, 0);
1006 return;
1007}
1008
877/* 1009/*
878 * Calling this without startio set means we are being asked to make a dirty 1010 * Calling this without startio set means we are being asked to make a dirty
879 * page ready for freeing it's buffers. When called with startio set then 1011 * page ready for freeing it's buffers. When called with startio set then
@@ -1125,7 +1257,7 @@ error:
1125 */ 1257 */
1126 if (err != -EAGAIN) { 1258 if (err != -EAGAIN) {
1127 if (!unmapped) 1259 if (!unmapped)
1128 block_invalidatepage(page, 0); 1260 xfs_aops_discard_page(page);
1129 ClearPageUptodate(page); 1261 ClearPageUptodate(page);
1130 } 1262 }
1131 return err; 1263 return err;
@@ -1535,15 +1667,6 @@ xfs_vm_readpages(
1535 return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); 1667 return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
1536} 1668}
1537 1669
1538STATIC void
1539xfs_vm_invalidatepage(
1540 struct page *page,
1541 unsigned long offset)
1542{
1543 trace_xfs_invalidatepage(page->mapping->host, page, offset);
1544 block_invalidatepage(page, offset);
1545}
1546
1547const struct address_space_operations xfs_address_space_operations = { 1670const struct address_space_operations xfs_address_space_operations = {
1548 .readpage = xfs_vm_readpage, 1671 .readpage = xfs_vm_readpage,
1549 .readpages = xfs_vm_readpages, 1672 .readpages = xfs_vm_readpages,