aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_aops.c
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@infradead.org>2011-08-12 17:21:35 -0400
committerAlex Elder <aelder@sgi.com>2011-08-12 17:21:35 -0400
commitc59d87c460767bc35dafd490139d3cfe78fb8da4 (patch)
tree2aad8261f86488e501d9645bd35d1398906da46d /fs/xfs/xfs_aops.c
parent06f8e2d6754dc631732415b741b5aa58a0f7133f (diff)
xfs: remove subdirectories
Use the move from Linux 2.6 to Linux 3.x as an excuse to kill the annoying subdirectories in the XFS source code. Besides the large amount of file rename the only changes are to the Makefile, a few files including headers with the subdirectory prefix, and the binary sysctl compat code that includes a header under fs/xfs/ from kernel/. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Alex Elder <aelder@sgi.com>
Diffstat (limited to 'fs/xfs/xfs_aops.c')
-rw-r--r--fs/xfs/xfs_aops.c1499
1 files changed, 1499 insertions, 0 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
new file mode 100644
index 000000000000..63e971e2b837
--- /dev/null
+++ b/fs/xfs/xfs_aops.c
@@ -0,0 +1,1499 @@
1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_bit.h"
20#include "xfs_log.h"
21#include "xfs_inum.h"
22#include "xfs_sb.h"
23#include "xfs_ag.h"
24#include "xfs_trans.h"
25#include "xfs_mount.h"
26#include "xfs_bmap_btree.h"
27#include "xfs_dinode.h"
28#include "xfs_inode.h"
29#include "xfs_alloc.h"
30#include "xfs_error.h"
31#include "xfs_rw.h"
32#include "xfs_iomap.h"
33#include "xfs_vnodeops.h"
34#include "xfs_trace.h"
35#include "xfs_bmap.h"
36#include <linux/gfp.h>
37#include <linux/mpage.h>
38#include <linux/pagevec.h>
39#include <linux/writeback.h>
40
41
42/*
43 * Prime number of hash buckets since address is used as the key.
44 */
45#define NVSYNC 37
46#define to_ioend_wq(v) (&xfs_ioend_wq[((unsigned long)v) % NVSYNC])
47static wait_queue_head_t xfs_ioend_wq[NVSYNC];
48
49void __init
50xfs_ioend_init(void)
51{
52 int i;
53
54 for (i = 0; i < NVSYNC; i++)
55 init_waitqueue_head(&xfs_ioend_wq[i]);
56}
57
58void
59xfs_ioend_wait(
60 xfs_inode_t *ip)
61{
62 wait_queue_head_t *wq = to_ioend_wq(ip);
63
64 wait_event(*wq, (atomic_read(&ip->i_iocount) == 0));
65}
66
67STATIC void
68xfs_ioend_wake(
69 xfs_inode_t *ip)
70{
71 if (atomic_dec_and_test(&ip->i_iocount))
72 wake_up(to_ioend_wq(ip));
73}
74
75void
76xfs_count_page_state(
77 struct page *page,
78 int *delalloc,
79 int *unwritten)
80{
81 struct buffer_head *bh, *head;
82
83 *delalloc = *unwritten = 0;
84
85 bh = head = page_buffers(page);
86 do {
87 if (buffer_unwritten(bh))
88 (*unwritten) = 1;
89 else if (buffer_delay(bh))
90 (*delalloc) = 1;
91 } while ((bh = bh->b_this_page) != head);
92}
93
94STATIC struct block_device *
95xfs_find_bdev_for_inode(
96 struct inode *inode)
97{
98 struct xfs_inode *ip = XFS_I(inode);
99 struct xfs_mount *mp = ip->i_mount;
100
101 if (XFS_IS_REALTIME_INODE(ip))
102 return mp->m_rtdev_targp->bt_bdev;
103 else
104 return mp->m_ddev_targp->bt_bdev;
105}
106
107/*
108 * We're now finished for good with this ioend structure.
109 * Update the page state via the associated buffer_heads,
110 * release holds on the inode and bio, and finally free
111 * up memory. Do not use the ioend after this.
112 */
113STATIC void
114xfs_destroy_ioend(
115 xfs_ioend_t *ioend)
116{
117 struct buffer_head *bh, *next;
118 struct xfs_inode *ip = XFS_I(ioend->io_inode);
119
120 for (bh = ioend->io_buffer_head; bh; bh = next) {
121 next = bh->b_private;
122 bh->b_end_io(bh, !ioend->io_error);
123 }
124
125 /*
126 * Volume managers supporting multiple paths can send back ENODEV
127 * when the final path disappears. In this case continuing to fill
128 * the page cache with dirty data which cannot be written out is
129 * evil, so prevent that.
130 */
131 if (unlikely(ioend->io_error == -ENODEV)) {
132 xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ,
133 __FILE__, __LINE__);
134 }
135
136 xfs_ioend_wake(ip);
137 mempool_free(ioend, xfs_ioend_pool);
138}
139
140/*
141 * If the end of the current ioend is beyond the current EOF,
142 * return the new EOF value, otherwise zero.
143 */
144STATIC xfs_fsize_t
145xfs_ioend_new_eof(
146 xfs_ioend_t *ioend)
147{
148 xfs_inode_t *ip = XFS_I(ioend->io_inode);
149 xfs_fsize_t isize;
150 xfs_fsize_t bsize;
151
152 bsize = ioend->io_offset + ioend->io_size;
153 isize = MAX(ip->i_size, ip->i_new_size);
154 isize = MIN(isize, bsize);
155 return isize > ip->i_d.di_size ? isize : 0;
156}
157
158/*
159 * Update on-disk file size now that data has been written to disk. The
160 * current in-memory file size is i_size. If a write is beyond eof i_new_size
161 * will be the intended file size until i_size is updated. If this write does
162 * not extend all the way to the valid file size then restrict this update to
163 * the end of the write.
164 *
165 * This function does not block as blocking on the inode lock in IO completion
166 * can lead to IO completion order dependency deadlocks.. If it can't get the
167 * inode ilock it will return EAGAIN. Callers must handle this.
168 */
169STATIC int
170xfs_setfilesize(
171 xfs_ioend_t *ioend)
172{
173 xfs_inode_t *ip = XFS_I(ioend->io_inode);
174 xfs_fsize_t isize;
175
176 if (unlikely(ioend->io_error))
177 return 0;
178
179 if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
180 return EAGAIN;
181
182 isize = xfs_ioend_new_eof(ioend);
183 if (isize) {
184 trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
185 ip->i_d.di_size = isize;
186 xfs_mark_inode_dirty(ip);
187 }
188
189 xfs_iunlock(ip, XFS_ILOCK_EXCL);
190 return 0;
191}
192
193/*
194 * Schedule IO completion handling on the final put of an ioend.
195 */
196STATIC void
197xfs_finish_ioend(
198 struct xfs_ioend *ioend)
199{
200 if (atomic_dec_and_test(&ioend->io_remaining)) {
201 if (ioend->io_type == IO_UNWRITTEN)
202 queue_work(xfsconvertd_workqueue, &ioend->io_work);
203 else
204 queue_work(xfsdatad_workqueue, &ioend->io_work);
205 }
206}
207
208/*
209 * IO write completion.
210 */
211STATIC void
212xfs_end_io(
213 struct work_struct *work)
214{
215 xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work);
216 struct xfs_inode *ip = XFS_I(ioend->io_inode);
217 int error = 0;
218
219 /*
220 * For unwritten extents we need to issue transactions to convert a
221 * range to normal written extens after the data I/O has finished.
222 */
223 if (ioend->io_type == IO_UNWRITTEN &&
224 likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) {
225
226 error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
227 ioend->io_size);
228 if (error)
229 ioend->io_error = error;
230 }
231
232 /*
233 * We might have to update the on-disk file size after extending
234 * writes.
235 */
236 error = xfs_setfilesize(ioend);
237 ASSERT(!error || error == EAGAIN);
238
239 /*
240 * If we didn't complete processing of the ioend, requeue it to the
241 * tail of the workqueue for another attempt later. Otherwise destroy
242 * it.
243 */
244 if (error == EAGAIN) {
245 atomic_inc(&ioend->io_remaining);
246 xfs_finish_ioend(ioend);
247 /* ensure we don't spin on blocked ioends */
248 delay(1);
249 } else {
250 if (ioend->io_iocb)
251 aio_complete(ioend->io_iocb, ioend->io_result, 0);
252 xfs_destroy_ioend(ioend);
253 }
254}
255
256/*
257 * Call IO completion handling in caller context on the final put of an ioend.
258 */
259STATIC void
260xfs_finish_ioend_sync(
261 struct xfs_ioend *ioend)
262{
263 if (atomic_dec_and_test(&ioend->io_remaining))
264 xfs_end_io(&ioend->io_work);
265}
266
267/*
268 * Allocate and initialise an IO completion structure.
269 * We need to track unwritten extent write completion here initially.
270 * We'll need to extend this for updating the ondisk inode size later
271 * (vs. incore size).
272 */
273STATIC xfs_ioend_t *
274xfs_alloc_ioend(
275 struct inode *inode,
276 unsigned int type)
277{
278 xfs_ioend_t *ioend;
279
280 ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS);
281
282 /*
283 * Set the count to 1 initially, which will prevent an I/O
284 * completion callback from happening before we have started
285 * all the I/O from calling the completion routine too early.
286 */
287 atomic_set(&ioend->io_remaining, 1);
288 ioend->io_error = 0;
289 ioend->io_list = NULL;
290 ioend->io_type = type;
291 ioend->io_inode = inode;
292 ioend->io_buffer_head = NULL;
293 ioend->io_buffer_tail = NULL;
294 atomic_inc(&XFS_I(ioend->io_inode)->i_iocount);
295 ioend->io_offset = 0;
296 ioend->io_size = 0;
297 ioend->io_iocb = NULL;
298 ioend->io_result = 0;
299
300 INIT_WORK(&ioend->io_work, xfs_end_io);
301 return ioend;
302}
303
304STATIC int
305xfs_map_blocks(
306 struct inode *inode,
307 loff_t offset,
308 struct xfs_bmbt_irec *imap,
309 int type,
310 int nonblocking)
311{
312 struct xfs_inode *ip = XFS_I(inode);
313 struct xfs_mount *mp = ip->i_mount;
314 ssize_t count = 1 << inode->i_blkbits;
315 xfs_fileoff_t offset_fsb, end_fsb;
316 int error = 0;
317 int bmapi_flags = XFS_BMAPI_ENTIRE;
318 int nimaps = 1;
319
320 if (XFS_FORCED_SHUTDOWN(mp))
321 return -XFS_ERROR(EIO);
322
323 if (type == IO_UNWRITTEN)
324 bmapi_flags |= XFS_BMAPI_IGSTATE;
325
326 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
327 if (nonblocking)
328 return -XFS_ERROR(EAGAIN);
329 xfs_ilock(ip, XFS_ILOCK_SHARED);
330 }
331
332 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
333 (ip->i_df.if_flags & XFS_IFEXTENTS));
334 ASSERT(offset <= mp->m_maxioffset);
335
336 if (offset + count > mp->m_maxioffset)
337 count = mp->m_maxioffset - offset;
338 end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
339 offset_fsb = XFS_B_TO_FSBT(mp, offset);
340 error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
341 bmapi_flags, NULL, 0, imap, &nimaps, NULL);
342 xfs_iunlock(ip, XFS_ILOCK_SHARED);
343
344 if (error)
345 return -XFS_ERROR(error);
346
347 if (type == IO_DELALLOC &&
348 (!nimaps || isnullstartblock(imap->br_startblock))) {
349 error = xfs_iomap_write_allocate(ip, offset, count, imap);
350 if (!error)
351 trace_xfs_map_blocks_alloc(ip, offset, count, type, imap);
352 return -XFS_ERROR(error);
353 }
354
355#ifdef DEBUG
356 if (type == IO_UNWRITTEN) {
357 ASSERT(nimaps);
358 ASSERT(imap->br_startblock != HOLESTARTBLOCK);
359 ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
360 }
361#endif
362 if (nimaps)
363 trace_xfs_map_blocks_found(ip, offset, count, type, imap);
364 return 0;
365}
366
367STATIC int
368xfs_imap_valid(
369 struct inode *inode,
370 struct xfs_bmbt_irec *imap,
371 xfs_off_t offset)
372{
373 offset >>= inode->i_blkbits;
374
375 return offset >= imap->br_startoff &&
376 offset < imap->br_startoff + imap->br_blockcount;
377}
378
379/*
380 * BIO completion handler for buffered IO.
381 */
382STATIC void
383xfs_end_bio(
384 struct bio *bio,
385 int error)
386{
387 xfs_ioend_t *ioend = bio->bi_private;
388
389 ASSERT(atomic_read(&bio->bi_cnt) >= 1);
390 ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error;
391
392 /* Toss bio and pass work off to an xfsdatad thread */
393 bio->bi_private = NULL;
394 bio->bi_end_io = NULL;
395 bio_put(bio);
396
397 xfs_finish_ioend(ioend);
398}
399
400STATIC void
401xfs_submit_ioend_bio(
402 struct writeback_control *wbc,
403 xfs_ioend_t *ioend,
404 struct bio *bio)
405{
406 atomic_inc(&ioend->io_remaining);
407 bio->bi_private = ioend;
408 bio->bi_end_io = xfs_end_bio;
409
410 /*
411 * If the I/O is beyond EOF we mark the inode dirty immediately
412 * but don't update the inode size until I/O completion.
413 */
414 if (xfs_ioend_new_eof(ioend))
415 xfs_mark_inode_dirty(XFS_I(ioend->io_inode));
416
417 submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
418}
419
420STATIC struct bio *
421xfs_alloc_ioend_bio(
422 struct buffer_head *bh)
423{
424 int nvecs = bio_get_nr_vecs(bh->b_bdev);
425 struct bio *bio = bio_alloc(GFP_NOIO, nvecs);
426
427 ASSERT(bio->bi_private == NULL);
428 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
429 bio->bi_bdev = bh->b_bdev;
430 return bio;
431}
432
433STATIC void
434xfs_start_buffer_writeback(
435 struct buffer_head *bh)
436{
437 ASSERT(buffer_mapped(bh));
438 ASSERT(buffer_locked(bh));
439 ASSERT(!buffer_delay(bh));
440 ASSERT(!buffer_unwritten(bh));
441
442 mark_buffer_async_write(bh);
443 set_buffer_uptodate(bh);
444 clear_buffer_dirty(bh);
445}
446
447STATIC void
448xfs_start_page_writeback(
449 struct page *page,
450 int clear_dirty,
451 int buffers)
452{
453 ASSERT(PageLocked(page));
454 ASSERT(!PageWriteback(page));
455 if (clear_dirty)
456 clear_page_dirty_for_io(page);
457 set_page_writeback(page);
458 unlock_page(page);
459 /* If no buffers on the page are to be written, finish it here */
460 if (!buffers)
461 end_page_writeback(page);
462}
463
464static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
465{
466 return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
467}
468
469/*
470 * Submit all of the bios for all of the ioends we have saved up, covering the
471 * initial writepage page and also any probed pages.
472 *
473 * Because we may have multiple ioends spanning a page, we need to start
474 * writeback on all the buffers before we submit them for I/O. If we mark the
475 * buffers as we got, then we can end up with a page that only has buffers
476 * marked async write and I/O complete on can occur before we mark the other
477 * buffers async write.
478 *
479 * The end result of this is that we trip a bug in end_page_writeback() because
480 * we call it twice for the one page as the code in end_buffer_async_write()
481 * assumes that all buffers on the page are started at the same time.
482 *
483 * The fix is two passes across the ioend list - one to start writeback on the
484 * buffer_heads, and then submit them for I/O on the second pass.
485 */
486STATIC void
487xfs_submit_ioend(
488 struct writeback_control *wbc,
489 xfs_ioend_t *ioend)
490{
491 xfs_ioend_t *head = ioend;
492 xfs_ioend_t *next;
493 struct buffer_head *bh;
494 struct bio *bio;
495 sector_t lastblock = 0;
496
497 /* Pass 1 - start writeback */
498 do {
499 next = ioend->io_list;
500 for (bh = ioend->io_buffer_head; bh; bh = bh->b_private)
501 xfs_start_buffer_writeback(bh);
502 } while ((ioend = next) != NULL);
503
504 /* Pass 2 - submit I/O */
505 ioend = head;
506 do {
507 next = ioend->io_list;
508 bio = NULL;
509
510 for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
511
512 if (!bio) {
513 retry:
514 bio = xfs_alloc_ioend_bio(bh);
515 } else if (bh->b_blocknr != lastblock + 1) {
516 xfs_submit_ioend_bio(wbc, ioend, bio);
517 goto retry;
518 }
519
520 if (bio_add_buffer(bio, bh) != bh->b_size) {
521 xfs_submit_ioend_bio(wbc, ioend, bio);
522 goto retry;
523 }
524
525 lastblock = bh->b_blocknr;
526 }
527 if (bio)
528 xfs_submit_ioend_bio(wbc, ioend, bio);
529 xfs_finish_ioend(ioend);
530 } while ((ioend = next) != NULL);
531}
532
533/*
534 * Cancel submission of all buffer_heads so far in this endio.
535 * Toss the endio too. Only ever called for the initial page
536 * in a writepage request, so only ever one page.
537 */
538STATIC void
539xfs_cancel_ioend(
540 xfs_ioend_t *ioend)
541{
542 xfs_ioend_t *next;
543 struct buffer_head *bh, *next_bh;
544
545 do {
546 next = ioend->io_list;
547 bh = ioend->io_buffer_head;
548 do {
549 next_bh = bh->b_private;
550 clear_buffer_async_write(bh);
551 unlock_buffer(bh);
552 } while ((bh = next_bh) != NULL);
553
554 xfs_ioend_wake(XFS_I(ioend->io_inode));
555 mempool_free(ioend, xfs_ioend_pool);
556 } while ((ioend = next) != NULL);
557}
558
559/*
560 * Test to see if we've been building up a completion structure for
561 * earlier buffers -- if so, we try to append to this ioend if we
562 * can, otherwise we finish off any current ioend and start another.
563 * Return true if we've finished the given ioend.
564 */
565STATIC void
566xfs_add_to_ioend(
567 struct inode *inode,
568 struct buffer_head *bh,
569 xfs_off_t offset,
570 unsigned int type,
571 xfs_ioend_t **result,
572 int need_ioend)
573{
574 xfs_ioend_t *ioend = *result;
575
576 if (!ioend || need_ioend || type != ioend->io_type) {
577 xfs_ioend_t *previous = *result;
578
579 ioend = xfs_alloc_ioend(inode, type);
580 ioend->io_offset = offset;
581 ioend->io_buffer_head = bh;
582 ioend->io_buffer_tail = bh;
583 if (previous)
584 previous->io_list = ioend;
585 *result = ioend;
586 } else {
587 ioend->io_buffer_tail->b_private = bh;
588 ioend->io_buffer_tail = bh;
589 }
590
591 bh->b_private = NULL;
592 ioend->io_size += bh->b_size;
593}
594
595STATIC void
596xfs_map_buffer(
597 struct inode *inode,
598 struct buffer_head *bh,
599 struct xfs_bmbt_irec *imap,
600 xfs_off_t offset)
601{
602 sector_t bn;
603 struct xfs_mount *m = XFS_I(inode)->i_mount;
604 xfs_off_t iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff);
605 xfs_daddr_t iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock);
606
607 ASSERT(imap->br_startblock != HOLESTARTBLOCK);
608 ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
609
610 bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) +
611 ((offset - iomap_offset) >> inode->i_blkbits);
612
613 ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode)));
614
615 bh->b_blocknr = bn;
616 set_buffer_mapped(bh);
617}
618
619STATIC void
620xfs_map_at_offset(
621 struct inode *inode,
622 struct buffer_head *bh,
623 struct xfs_bmbt_irec *imap,
624 xfs_off_t offset)
625{
626 ASSERT(imap->br_startblock != HOLESTARTBLOCK);
627 ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
628
629 xfs_map_buffer(inode, bh, imap, offset);
630 set_buffer_mapped(bh);
631 clear_buffer_delay(bh);
632 clear_buffer_unwritten(bh);
633}
634
635/*
636 * Test if a given page is suitable for writing as part of an unwritten
637 * or delayed allocate extent.
638 */
639STATIC int
640xfs_is_delayed_page(
641 struct page *page,
642 unsigned int type)
643{
644 if (PageWriteback(page))
645 return 0;
646
647 if (page->mapping && page_has_buffers(page)) {
648 struct buffer_head *bh, *head;
649 int acceptable = 0;
650
651 bh = head = page_buffers(page);
652 do {
653 if (buffer_unwritten(bh))
654 acceptable = (type == IO_UNWRITTEN);
655 else if (buffer_delay(bh))
656 acceptable = (type == IO_DELALLOC);
657 else if (buffer_dirty(bh) && buffer_mapped(bh))
658 acceptable = (type == IO_OVERWRITE);
659 else
660 break;
661 } while ((bh = bh->b_this_page) != head);
662
663 if (acceptable)
664 return 1;
665 }
666
667 return 0;
668}
669
670/*
671 * Allocate & map buffers for page given the extent map. Write it out.
672 * except for the original page of a writepage, this is called on
673 * delalloc/unwritten pages only, for the original page it is possible
674 * that the page has no mapping at all.
675 */
676STATIC int
677xfs_convert_page(
678 struct inode *inode,
679 struct page *page,
680 loff_t tindex,
681 struct xfs_bmbt_irec *imap,
682 xfs_ioend_t **ioendp,
683 struct writeback_control *wbc)
684{
685 struct buffer_head *bh, *head;
686 xfs_off_t end_offset;
687 unsigned long p_offset;
688 unsigned int type;
689 int len, page_dirty;
690 int count = 0, done = 0, uptodate = 1;
691 xfs_off_t offset = page_offset(page);
692
693 if (page->index != tindex)
694 goto fail;
695 if (!trylock_page(page))
696 goto fail;
697 if (PageWriteback(page))
698 goto fail_unlock_page;
699 if (page->mapping != inode->i_mapping)
700 goto fail_unlock_page;
701 if (!xfs_is_delayed_page(page, (*ioendp)->io_type))
702 goto fail_unlock_page;
703
704 /*
705 * page_dirty is initially a count of buffers on the page before
706 * EOF and is decremented as we move each into a cleanable state.
707 *
708 * Derivation:
709 *
710 * End offset is the highest offset that this page should represent.
711 * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
712 * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
713 * hence give us the correct page_dirty count. On any other page,
714 * it will be zero and in that case we need page_dirty to be the
715 * count of buffers on the page.
716 */
717 end_offset = min_t(unsigned long long,
718 (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
719 i_size_read(inode));
720
721 len = 1 << inode->i_blkbits;
722 p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
723 PAGE_CACHE_SIZE);
724 p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
725 page_dirty = p_offset / len;
726
727 bh = head = page_buffers(page);
728 do {
729 if (offset >= end_offset)
730 break;
731 if (!buffer_uptodate(bh))
732 uptodate = 0;
733 if (!(PageUptodate(page) || buffer_uptodate(bh))) {
734 done = 1;
735 continue;
736 }
737
738 if (buffer_unwritten(bh) || buffer_delay(bh) ||
739 buffer_mapped(bh)) {
740 if (buffer_unwritten(bh))
741 type = IO_UNWRITTEN;
742 else if (buffer_delay(bh))
743 type = IO_DELALLOC;
744 else
745 type = IO_OVERWRITE;
746
747 if (!xfs_imap_valid(inode, imap, offset)) {
748 done = 1;
749 continue;
750 }
751
752 lock_buffer(bh);
753 if (type != IO_OVERWRITE)
754 xfs_map_at_offset(inode, bh, imap, offset);
755 xfs_add_to_ioend(inode, bh, offset, type,
756 ioendp, done);
757
758 page_dirty--;
759 count++;
760 } else {
761 done = 1;
762 }
763 } while (offset += len, (bh = bh->b_this_page) != head);
764
765 if (uptodate && bh == head)
766 SetPageUptodate(page);
767
768 if (count) {
769 if (--wbc->nr_to_write <= 0 &&
770 wbc->sync_mode == WB_SYNC_NONE)
771 done = 1;
772 }
773 xfs_start_page_writeback(page, !page_dirty, count);
774
775 return done;
776 fail_unlock_page:
777 unlock_page(page);
778 fail:
779 return 1;
780}
781
782/*
783 * Convert & write out a cluster of pages in the same extent as defined
784 * by mp and following the start page.
785 */
786STATIC void
787xfs_cluster_write(
788 struct inode *inode,
789 pgoff_t tindex,
790 struct xfs_bmbt_irec *imap,
791 xfs_ioend_t **ioendp,
792 struct writeback_control *wbc,
793 pgoff_t tlast)
794{
795 struct pagevec pvec;
796 int done = 0, i;
797
798 pagevec_init(&pvec, 0);
799 while (!done && tindex <= tlast) {
800 unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
801
802 if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
803 break;
804
805 for (i = 0; i < pagevec_count(&pvec); i++) {
806 done = xfs_convert_page(inode, pvec.pages[i], tindex++,
807 imap, ioendp, wbc);
808 if (done)
809 break;
810 }
811
812 pagevec_release(&pvec);
813 cond_resched();
814 }
815}
816
817STATIC void
818xfs_vm_invalidatepage(
819 struct page *page,
820 unsigned long offset)
821{
822 trace_xfs_invalidatepage(page->mapping->host, page, offset);
823 block_invalidatepage(page, offset);
824}
825
826/*
827 * If the page has delalloc buffers on it, we need to punch them out before we
828 * invalidate the page. If we don't, we leave a stale delalloc mapping on the
829 * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read
830 * is done on that same region - the delalloc extent is returned when none is
831 * supposed to be there.
832 *
833 * We prevent this by truncating away the delalloc regions on the page before
834 * invalidating it. Because they are delalloc, we can do this without needing a
835 * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this
836 * truncation without a transaction as there is no space left for block
837 * reservation (typically why we see a ENOSPC in writeback).
838 *
839 * This is not a performance critical path, so for now just do the punching a
840 * buffer head at a time.
841 */
842STATIC void
843xfs_aops_discard_page(
844 struct page *page)
845{
846 struct inode *inode = page->mapping->host;
847 struct xfs_inode *ip = XFS_I(inode);
848 struct buffer_head *bh, *head;
849 loff_t offset = page_offset(page);
850
851 if (!xfs_is_delayed_page(page, IO_DELALLOC))
852 goto out_invalidate;
853
854 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
855 goto out_invalidate;
856
857 xfs_alert(ip->i_mount,
858 "page discard on page %p, inode 0x%llx, offset %llu.",
859 page, ip->i_ino, offset);
860
861 xfs_ilock(ip, XFS_ILOCK_EXCL);
862 bh = head = page_buffers(page);
863 do {
864 int error;
865 xfs_fileoff_t start_fsb;
866
867 if (!buffer_delay(bh))
868 goto next_buffer;
869
870 start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
871 error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1);
872 if (error) {
873 /* something screwed, just bail */
874 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
875 xfs_alert(ip->i_mount,
876 "page discard unable to remove delalloc mapping.");
877 }
878 break;
879 }
880next_buffer:
881 offset += 1 << inode->i_blkbits;
882
883 } while ((bh = bh->b_this_page) != head);
884
885 xfs_iunlock(ip, XFS_ILOCK_EXCL);
886out_invalidate:
887 xfs_vm_invalidatepage(page, 0);
888 return;
889}
890
891/*
892 * Write out a dirty page.
893 *
894 * For delalloc space on the page we need to allocate space and flush it.
895 * For unwritten space on the page we need to start the conversion to
896 * regular allocated space.
897 * For any other dirty buffer heads on the page we should flush them.
898 */
899STATIC int
900xfs_vm_writepage(
901 struct page *page,
902 struct writeback_control *wbc)
903{
904 struct inode *inode = page->mapping->host;
905 struct buffer_head *bh, *head;
906 struct xfs_bmbt_irec imap;
907 xfs_ioend_t *ioend = NULL, *iohead = NULL;
908 loff_t offset;
909 unsigned int type;
910 __uint64_t end_offset;
911 pgoff_t end_index, last_index;
912 ssize_t len;
913 int err, imap_valid = 0, uptodate = 1;
914 int count = 0;
915 int nonblocking = 0;
916
917 trace_xfs_writepage(inode, page, 0);
918
919 ASSERT(page_has_buffers(page));
920
921 /*
922 * Refuse to write the page out if we are called from reclaim context.
923 *
924 * This avoids stack overflows when called from deeply used stacks in
925 * random callers for direct reclaim or memcg reclaim. We explicitly
926 * allow reclaim from kswapd as the stack usage there is relatively low.
927 *
928 * This should really be done by the core VM, but until that happens
929 * filesystems like XFS, btrfs and ext4 have to take care of this
930 * by themselves.
931 */
932 if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC)
933 goto redirty;
934
935 /*
936 * Given that we do not allow direct reclaim to call us, we should
937 * never be called while in a filesystem transaction.
938 */
939 if (WARN_ON(current->flags & PF_FSTRANS))
940 goto redirty;
941
942 /* Is this page beyond the end of the file? */
943 offset = i_size_read(inode);
944 end_index = offset >> PAGE_CACHE_SHIFT;
945 last_index = (offset - 1) >> PAGE_CACHE_SHIFT;
946 if (page->index >= end_index) {
947 if ((page->index >= end_index + 1) ||
948 !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
949 unlock_page(page);
950 return 0;
951 }
952 }
953
954 end_offset = min_t(unsigned long long,
955 (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
956 offset);
957 len = 1 << inode->i_blkbits;
958
959 bh = head = page_buffers(page);
960 offset = page_offset(page);
961 type = IO_OVERWRITE;
962
963 if (wbc->sync_mode == WB_SYNC_NONE)
964 nonblocking = 1;
965
966 do {
967 int new_ioend = 0;
968
969 if (offset >= end_offset)
970 break;
971 if (!buffer_uptodate(bh))
972 uptodate = 0;
973
974 /*
975 * set_page_dirty dirties all buffers in a page, independent
976 * of their state. The dirty state however is entirely
977 * meaningless for holes (!mapped && uptodate), so skip
978 * buffers covering holes here.
979 */
980 if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
981 imap_valid = 0;
982 continue;
983 }
984
985 if (buffer_unwritten(bh)) {
986 if (type != IO_UNWRITTEN) {
987 type = IO_UNWRITTEN;
988 imap_valid = 0;
989 }
990 } else if (buffer_delay(bh)) {
991 if (type != IO_DELALLOC) {
992 type = IO_DELALLOC;
993 imap_valid = 0;
994 }
995 } else if (buffer_uptodate(bh)) {
996 if (type != IO_OVERWRITE) {
997 type = IO_OVERWRITE;
998 imap_valid = 0;
999 }
1000 } else {
1001 if (PageUptodate(page)) {
1002 ASSERT(buffer_mapped(bh));
1003 imap_valid = 0;
1004 }
1005 continue;
1006 }
1007
1008 if (imap_valid)
1009 imap_valid = xfs_imap_valid(inode, &imap, offset);
1010 if (!imap_valid) {
1011 /*
1012 * If we didn't have a valid mapping then we need to
1013 * put the new mapping into a separate ioend structure.
1014 * This ensures non-contiguous extents always have
1015 * separate ioends, which is particularly important
1016 * for unwritten extent conversion at I/O completion
1017 * time.
1018 */
1019 new_ioend = 1;
1020 err = xfs_map_blocks(inode, offset, &imap, type,
1021 nonblocking);
1022 if (err)
1023 goto error;
1024 imap_valid = xfs_imap_valid(inode, &imap, offset);
1025 }
1026 if (imap_valid) {
1027 lock_buffer(bh);
1028 if (type != IO_OVERWRITE)
1029 xfs_map_at_offset(inode, bh, &imap, offset);
1030 xfs_add_to_ioend(inode, bh, offset, type, &ioend,
1031 new_ioend);
1032 count++;
1033 }
1034
1035 if (!iohead)
1036 iohead = ioend;
1037
1038 } while (offset += len, ((bh = bh->b_this_page) != head));
1039
1040 if (uptodate && bh == head)
1041 SetPageUptodate(page);
1042
1043 xfs_start_page_writeback(page, 1, count);
1044
1045 if (ioend && imap_valid) {
1046 xfs_off_t end_index;
1047
1048 end_index = imap.br_startoff + imap.br_blockcount;
1049
1050 /* to bytes */
1051 end_index <<= inode->i_blkbits;
1052
1053 /* to pages */
1054 end_index = (end_index - 1) >> PAGE_CACHE_SHIFT;
1055
1056 /* check against file size */
1057 if (end_index > last_index)
1058 end_index = last_index;
1059
1060 xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
1061 wbc, end_index);
1062 }
1063
1064 if (iohead)
1065 xfs_submit_ioend(wbc, iohead);
1066
1067 return 0;
1068
1069error:
1070 if (iohead)
1071 xfs_cancel_ioend(iohead);
1072
1073 if (err == -EAGAIN)
1074 goto redirty;
1075
1076 xfs_aops_discard_page(page);
1077 ClearPageUptodate(page);
1078 unlock_page(page);
1079 return err;
1080
1081redirty:
1082 redirty_page_for_writepage(wbc, page);
1083 unlock_page(page);
1084 return 0;
1085}
1086
1087STATIC int
1088xfs_vm_writepages(
1089 struct address_space *mapping,
1090 struct writeback_control *wbc)
1091{
1092 xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
1093 return generic_writepages(mapping, wbc);
1094}
1095
1096/*
1097 * Called to move a page into cleanable state - and from there
1098 * to be released. The page should already be clean. We always
1099 * have buffer heads in this call.
1100 *
1101 * Returns 1 if the page is ok to release, 0 otherwise.
1102 */
1103STATIC int
1104xfs_vm_releasepage(
1105 struct page *page,
1106 gfp_t gfp_mask)
1107{
1108 int delalloc, unwritten;
1109
1110 trace_xfs_releasepage(page->mapping->host, page, 0);
1111
1112 xfs_count_page_state(page, &delalloc, &unwritten);
1113
1114 if (WARN_ON(delalloc))
1115 return 0;
1116 if (WARN_ON(unwritten))
1117 return 0;
1118
1119 return try_to_free_buffers(page);
1120}
1121
1122STATIC int
1123__xfs_get_blocks(
1124 struct inode *inode,
1125 sector_t iblock,
1126 struct buffer_head *bh_result,
1127 int create,
1128 int direct)
1129{
1130 struct xfs_inode *ip = XFS_I(inode);
1131 struct xfs_mount *mp = ip->i_mount;
1132 xfs_fileoff_t offset_fsb, end_fsb;
1133 int error = 0;
1134 int lockmode = 0;
1135 struct xfs_bmbt_irec imap;
1136 int nimaps = 1;
1137 xfs_off_t offset;
1138 ssize_t size;
1139 int new = 0;
1140
1141 if (XFS_FORCED_SHUTDOWN(mp))
1142 return -XFS_ERROR(EIO);
1143
1144 offset = (xfs_off_t)iblock << inode->i_blkbits;
1145 ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
1146 size = bh_result->b_size;
1147
1148 if (!create && direct && offset >= i_size_read(inode))
1149 return 0;
1150
1151 if (create) {
1152 lockmode = XFS_ILOCK_EXCL;
1153 xfs_ilock(ip, lockmode);
1154 } else {
1155 lockmode = xfs_ilock_map_shared(ip);
1156 }
1157
1158 ASSERT(offset <= mp->m_maxioffset);
1159 if (offset + size > mp->m_maxioffset)
1160 size = mp->m_maxioffset - offset;
1161 end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
1162 offset_fsb = XFS_B_TO_FSBT(mp, offset);
1163
1164 error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
1165 XFS_BMAPI_ENTIRE, NULL, 0, &imap, &nimaps, NULL);
1166 if (error)
1167 goto out_unlock;
1168
1169 if (create &&
1170 (!nimaps ||
1171 (imap.br_startblock == HOLESTARTBLOCK ||
1172 imap.br_startblock == DELAYSTARTBLOCK))) {
1173 if (direct) {
1174 error = xfs_iomap_write_direct(ip, offset, size,
1175 &imap, nimaps);
1176 } else {
1177 error = xfs_iomap_write_delay(ip, offset, size, &imap);
1178 }
1179 if (error)
1180 goto out_unlock;
1181
1182 trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap);
1183 } else if (nimaps) {
1184 trace_xfs_get_blocks_found(ip, offset, size, 0, &imap);
1185 } else {
1186 trace_xfs_get_blocks_notfound(ip, offset, size);
1187 goto out_unlock;
1188 }
1189 xfs_iunlock(ip, lockmode);
1190
1191 if (imap.br_startblock != HOLESTARTBLOCK &&
1192 imap.br_startblock != DELAYSTARTBLOCK) {
1193 /*
1194 * For unwritten extents do not report a disk address on
1195 * the read case (treat as if we're reading into a hole).
1196 */
1197 if (create || !ISUNWRITTEN(&imap))
1198 xfs_map_buffer(inode, bh_result, &imap, offset);
1199 if (create && ISUNWRITTEN(&imap)) {
1200 if (direct)
1201 bh_result->b_private = inode;
1202 set_buffer_unwritten(bh_result);
1203 }
1204 }
1205
1206 /*
1207 * If this is a realtime file, data may be on a different device.
1208 * to that pointed to from the buffer_head b_bdev currently.
1209 */
1210 bh_result->b_bdev = xfs_find_bdev_for_inode(inode);
1211
1212 /*
1213 * If we previously allocated a block out beyond eof and we are now
1214 * coming back to use it then we will need to flag it as new even if it
1215 * has a disk address.
1216 *
1217 * With sub-block writes into unwritten extents we also need to mark
1218 * the buffer as new so that the unwritten parts of the buffer gets
1219 * correctly zeroed.
1220 */
1221 if (create &&
1222 ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
1223 (offset >= i_size_read(inode)) ||
1224 (new || ISUNWRITTEN(&imap))))
1225 set_buffer_new(bh_result);
1226
1227 if (imap.br_startblock == DELAYSTARTBLOCK) {
1228 BUG_ON(direct);
1229 if (create) {
1230 set_buffer_uptodate(bh_result);
1231 set_buffer_mapped(bh_result);
1232 set_buffer_delay(bh_result);
1233 }
1234 }
1235
1236 /*
1237 * If this is O_DIRECT or the mpage code calling tell them how large
1238 * the mapping is, so that we can avoid repeated get_blocks calls.
1239 */
1240 if (direct || size > (1 << inode->i_blkbits)) {
1241 xfs_off_t mapping_size;
1242
1243 mapping_size = imap.br_startoff + imap.br_blockcount - iblock;
1244 mapping_size <<= inode->i_blkbits;
1245
1246 ASSERT(mapping_size > 0);
1247 if (mapping_size > size)
1248 mapping_size = size;
1249 if (mapping_size > LONG_MAX)
1250 mapping_size = LONG_MAX;
1251
1252 bh_result->b_size = mapping_size;
1253 }
1254
1255 return 0;
1256
1257out_unlock:
1258 xfs_iunlock(ip, lockmode);
1259 return -error;
1260}
1261
1262int
1263xfs_get_blocks(
1264 struct inode *inode,
1265 sector_t iblock,
1266 struct buffer_head *bh_result,
1267 int create)
1268{
1269 return __xfs_get_blocks(inode, iblock, bh_result, create, 0);
1270}
1271
1272STATIC int
1273xfs_get_blocks_direct(
1274 struct inode *inode,
1275 sector_t iblock,
1276 struct buffer_head *bh_result,
1277 int create)
1278{
1279 return __xfs_get_blocks(inode, iblock, bh_result, create, 1);
1280}
1281
1282/*
1283 * Complete a direct I/O write request.
1284 *
1285 * If the private argument is non-NULL __xfs_get_blocks signals us that we
1286 * need to issue a transaction to convert the range from unwritten to written
1287 * extents. In case this is regular synchronous I/O we just call xfs_end_io
1288 * to do this and we are done. But in case this was a successful AIO
1289 * request this handler is called from interrupt context, from which we
1290 * can't start transactions. In that case offload the I/O completion to
1291 * the workqueues we also use for buffered I/O completion.
1292 */
1293STATIC void
1294xfs_end_io_direct_write(
1295 struct kiocb *iocb,
1296 loff_t offset,
1297 ssize_t size,
1298 void *private,
1299 int ret,
1300 bool is_async)
1301{
1302 struct xfs_ioend *ioend = iocb->private;
1303
1304 /*
1305 * blockdev_direct_IO can return an error even after the I/O
1306 * completion handler was called. Thus we need to protect
1307 * against double-freeing.
1308 */
1309 iocb->private = NULL;
1310
1311 ioend->io_offset = offset;
1312 ioend->io_size = size;
1313 if (private && size > 0)
1314 ioend->io_type = IO_UNWRITTEN;
1315
1316 if (is_async) {
1317 /*
1318 * If we are converting an unwritten extent we need to delay
1319 * the AIO completion until after the unwrittent extent
1320 * conversion has completed, otherwise do it ASAP.
1321 */
1322 if (ioend->io_type == IO_UNWRITTEN) {
1323 ioend->io_iocb = iocb;
1324 ioend->io_result = ret;
1325 } else {
1326 aio_complete(iocb, ret, 0);
1327 }
1328 xfs_finish_ioend(ioend);
1329 } else {
1330 xfs_finish_ioend_sync(ioend);
1331 }
1332
1333 /* XXX: probably should move into the real I/O completion handler */
1334 inode_dio_done(ioend->io_inode);
1335}
1336
1337STATIC ssize_t
1338xfs_vm_direct_IO(
1339 int rw,
1340 struct kiocb *iocb,
1341 const struct iovec *iov,
1342 loff_t offset,
1343 unsigned long nr_segs)
1344{
1345 struct inode *inode = iocb->ki_filp->f_mapping->host;
1346 struct block_device *bdev = xfs_find_bdev_for_inode(inode);
1347 ssize_t ret;
1348
1349 if (rw & WRITE) {
1350 iocb->private = xfs_alloc_ioend(inode, IO_DIRECT);
1351
1352 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
1353 offset, nr_segs,
1354 xfs_get_blocks_direct,
1355 xfs_end_io_direct_write, NULL, 0);
1356 if (ret != -EIOCBQUEUED && iocb->private)
1357 xfs_destroy_ioend(iocb->private);
1358 } else {
1359 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
1360 offset, nr_segs,
1361 xfs_get_blocks_direct,
1362 NULL, NULL, 0);
1363 }
1364
1365 return ret;
1366}
1367
1368STATIC void
1369xfs_vm_write_failed(
1370 struct address_space *mapping,
1371 loff_t to)
1372{
1373 struct inode *inode = mapping->host;
1374
1375 if (to > inode->i_size) {
1376 /*
1377 * punch out the delalloc blocks we have already allocated. We
1378 * don't call xfs_setattr() to do this as we may be in the
1379 * middle of a multi-iovec write and so the vfs inode->i_size
1380 * will not match the xfs ip->i_size and so it will zero too
1381 * much. Hence we jus truncate the page cache to zero what is
1382 * necessary and punch the delalloc blocks directly.
1383 */
1384 struct xfs_inode *ip = XFS_I(inode);
1385 xfs_fileoff_t start_fsb;
1386 xfs_fileoff_t end_fsb;
1387 int error;
1388
1389 truncate_pagecache(inode, to, inode->i_size);
1390
1391 /*
1392 * Check if there are any blocks that are outside of i_size
1393 * that need to be trimmed back.
1394 */
1395 start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1;
1396 end_fsb = XFS_B_TO_FSB(ip->i_mount, to);
1397 if (end_fsb <= start_fsb)
1398 return;
1399
1400 xfs_ilock(ip, XFS_ILOCK_EXCL);
1401 error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
1402 end_fsb - start_fsb);
1403 if (error) {
1404 /* something screwed, just bail */
1405 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
1406 xfs_alert(ip->i_mount,
1407 "xfs_vm_write_failed: unable to clean up ino %lld",
1408 ip->i_ino);
1409 }
1410 }
1411 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1412 }
1413}
1414
1415STATIC int
1416xfs_vm_write_begin(
1417 struct file *file,
1418 struct address_space *mapping,
1419 loff_t pos,
1420 unsigned len,
1421 unsigned flags,
1422 struct page **pagep,
1423 void **fsdata)
1424{
1425 int ret;
1426
1427 ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS,
1428 pagep, xfs_get_blocks);
1429 if (unlikely(ret))
1430 xfs_vm_write_failed(mapping, pos + len);
1431 return ret;
1432}
1433
1434STATIC int
1435xfs_vm_write_end(
1436 struct file *file,
1437 struct address_space *mapping,
1438 loff_t pos,
1439 unsigned len,
1440 unsigned copied,
1441 struct page *page,
1442 void *fsdata)
1443{
1444 int ret;
1445
1446 ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
1447 if (unlikely(ret < len))
1448 xfs_vm_write_failed(mapping, pos + len);
1449 return ret;
1450}
1451
1452STATIC sector_t
1453xfs_vm_bmap(
1454 struct address_space *mapping,
1455 sector_t block)
1456{
1457 struct inode *inode = (struct inode *)mapping->host;
1458 struct xfs_inode *ip = XFS_I(inode);
1459
1460 trace_xfs_vm_bmap(XFS_I(inode));
1461 xfs_ilock(ip, XFS_IOLOCK_SHARED);
1462 xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF);
1463 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
1464 return generic_block_bmap(mapping, block, xfs_get_blocks);
1465}
1466
1467STATIC int
1468xfs_vm_readpage(
1469 struct file *unused,
1470 struct page *page)
1471{
1472 return mpage_readpage(page, xfs_get_blocks);
1473}
1474
1475STATIC int
1476xfs_vm_readpages(
1477 struct file *unused,
1478 struct address_space *mapping,
1479 struct list_head *pages,
1480 unsigned nr_pages)
1481{
1482 return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
1483}
1484
1485const struct address_space_operations xfs_address_space_operations = {
1486 .readpage = xfs_vm_readpage,
1487 .readpages = xfs_vm_readpages,
1488 .writepage = xfs_vm_writepage,
1489 .writepages = xfs_vm_writepages,
1490 .releasepage = xfs_vm_releasepage,
1491 .invalidatepage = xfs_vm_invalidatepage,
1492 .write_begin = xfs_vm_write_begin,
1493 .write_end = xfs_vm_write_end,
1494 .bmap = xfs_vm_bmap,
1495 .direct_IO = xfs_vm_direct_IO,
1496 .migratepage = buffer_migrate_page,
1497 .is_partially_uptodate = block_is_partially_uptodate,
1498 .error_remove_page = generic_error_remove_page,
1499};