aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/linux-2.6/xfs_aops.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_aops.c')
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c1111
1 files changed, 613 insertions, 498 deletions
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 94d3cdfbf9b8..120626789406 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -40,11 +40,10 @@
40#include "xfs_rw.h" 40#include "xfs_rw.h"
41#include "xfs_iomap.h" 41#include "xfs_iomap.h"
42#include <linux/mpage.h> 42#include <linux/mpage.h>
43#include <linux/pagevec.h>
43#include <linux/writeback.h> 44#include <linux/writeback.h>
44 45
45STATIC void xfs_count_page_state(struct page *, int *, int *, int *); 46STATIC void xfs_count_page_state(struct page *, int *, int *, int *);
46STATIC void xfs_convert_page(struct inode *, struct page *, xfs_iomap_t *,
47 struct writeback_control *wbc, void *, int, int);
48 47
49#if defined(XFS_RW_TRACE) 48#if defined(XFS_RW_TRACE)
50void 49void
@@ -55,17 +54,15 @@ xfs_page_trace(
55 int mask) 54 int mask)
56{ 55{
57 xfs_inode_t *ip; 56 xfs_inode_t *ip;
58 bhv_desc_t *bdp;
59 vnode_t *vp = LINVFS_GET_VP(inode); 57 vnode_t *vp = LINVFS_GET_VP(inode);
60 loff_t isize = i_size_read(inode); 58 loff_t isize = i_size_read(inode);
61 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT; 59 loff_t offset = page_offset(page);
62 int delalloc = -1, unmapped = -1, unwritten = -1; 60 int delalloc = -1, unmapped = -1, unwritten = -1;
63 61
64 if (page_has_buffers(page)) 62 if (page_has_buffers(page))
65 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); 63 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
66 64
67 bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops); 65 ip = xfs_vtoi(vp);
68 ip = XFS_BHVTOI(bdp);
69 if (!ip->i_rwtrace) 66 if (!ip->i_rwtrace)
70 return; 67 return;
71 68
@@ -103,15 +100,56 @@ xfs_finish_ioend(
103 queue_work(xfsdatad_workqueue, &ioend->io_work); 100 queue_work(xfsdatad_workqueue, &ioend->io_work);
104} 101}
105 102
103/*
104 * We're now finished for good with this ioend structure.
105 * Update the page state via the associated buffer_heads,
106 * release holds on the inode and bio, and finally free
107 * up memory. Do not use the ioend after this.
108 */
106STATIC void 109STATIC void
107xfs_destroy_ioend( 110xfs_destroy_ioend(
108 xfs_ioend_t *ioend) 111 xfs_ioend_t *ioend)
109{ 112{
113 struct buffer_head *bh, *next;
114
115 for (bh = ioend->io_buffer_head; bh; bh = next) {
116 next = bh->b_private;
117 bh->b_end_io(bh, ioend->io_uptodate);
118 }
119
110 vn_iowake(ioend->io_vnode); 120 vn_iowake(ioend->io_vnode);
111 mempool_free(ioend, xfs_ioend_pool); 121 mempool_free(ioend, xfs_ioend_pool);
112} 122}
113 123
114/* 124/*
125 * Buffered IO write completion for delayed allocate extents.
126 * TODO: Update ondisk isize now that we know the file data
127 * has been flushed (i.e. the notorious "NULL file" problem).
128 */
129STATIC void
130xfs_end_bio_delalloc(
131 void *data)
132{
133 xfs_ioend_t *ioend = data;
134
135 xfs_destroy_ioend(ioend);
136}
137
138/*
139 * Buffered IO write completion for regular, written extents.
140 */
141STATIC void
142xfs_end_bio_written(
143 void *data)
144{
145 xfs_ioend_t *ioend = data;
146
147 xfs_destroy_ioend(ioend);
148}
149
150/*
151 * IO write completion for unwritten extents.
152 *
115 * Issue transactions to convert a buffer range from unwritten 153 * Issue transactions to convert a buffer range from unwritten
116 * to written extents. 154 * to written extents.
117 */ 155 */
@@ -123,21 +161,10 @@ xfs_end_bio_unwritten(
123 vnode_t *vp = ioend->io_vnode; 161 vnode_t *vp = ioend->io_vnode;
124 xfs_off_t offset = ioend->io_offset; 162 xfs_off_t offset = ioend->io_offset;
125 size_t size = ioend->io_size; 163 size_t size = ioend->io_size;
126 struct buffer_head *bh, *next;
127 int error; 164 int error;
128 165
129 if (ioend->io_uptodate) 166 if (ioend->io_uptodate)
130 VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error); 167 VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error);
131
132 /* ioend->io_buffer_head is only non-NULL for buffered I/O */
133 for (bh = ioend->io_buffer_head; bh; bh = next) {
134 next = bh->b_private;
135
136 bh->b_end_io = NULL;
137 clear_buffer_unwritten(bh);
138 end_buffer_async_write(bh, ioend->io_uptodate);
139 }
140
141 xfs_destroy_ioend(ioend); 168 xfs_destroy_ioend(ioend);
142} 169}
143 170
@@ -149,7 +176,8 @@ xfs_end_bio_unwritten(
149 */ 176 */
150STATIC xfs_ioend_t * 177STATIC xfs_ioend_t *
151xfs_alloc_ioend( 178xfs_alloc_ioend(
152 struct inode *inode) 179 struct inode *inode,
180 unsigned int type)
153{ 181{
154 xfs_ioend_t *ioend; 182 xfs_ioend_t *ioend;
155 183
@@ -162,45 +190,25 @@ xfs_alloc_ioend(
162 */ 190 */
163 atomic_set(&ioend->io_remaining, 1); 191 atomic_set(&ioend->io_remaining, 1);
164 ioend->io_uptodate = 1; /* cleared if any I/O fails */ 192 ioend->io_uptodate = 1; /* cleared if any I/O fails */
193 ioend->io_list = NULL;
194 ioend->io_type = type;
165 ioend->io_vnode = LINVFS_GET_VP(inode); 195 ioend->io_vnode = LINVFS_GET_VP(inode);
166 ioend->io_buffer_head = NULL; 196 ioend->io_buffer_head = NULL;
197 ioend->io_buffer_tail = NULL;
167 atomic_inc(&ioend->io_vnode->v_iocount); 198 atomic_inc(&ioend->io_vnode->v_iocount);
168 ioend->io_offset = 0; 199 ioend->io_offset = 0;
169 ioend->io_size = 0; 200 ioend->io_size = 0;
170 201
171 INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend); 202 if (type == IOMAP_UNWRITTEN)
203 INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend);
204 else if (type == IOMAP_DELAY)
205 INIT_WORK(&ioend->io_work, xfs_end_bio_delalloc, ioend);
206 else
207 INIT_WORK(&ioend->io_work, xfs_end_bio_written, ioend);
172 208
173 return ioend; 209 return ioend;
174} 210}
175 211
176void
177linvfs_unwritten_done(
178 struct buffer_head *bh,
179 int uptodate)
180{
181 xfs_ioend_t *ioend = bh->b_private;
182 static spinlock_t unwritten_done_lock = SPIN_LOCK_UNLOCKED;
183 unsigned long flags;
184
185 ASSERT(buffer_unwritten(bh));
186 bh->b_end_io = NULL;
187
188 if (!uptodate)
189 ioend->io_uptodate = 0;
190
191 /*
192 * Deep magic here. We reuse b_private in the buffer_heads to build
193 * a chain for completing the I/O from user context after we've issued
194 * a transaction to convert the unwritten extent.
195 */
196 spin_lock_irqsave(&unwritten_done_lock, flags);
197 bh->b_private = ioend->io_buffer_head;
198 ioend->io_buffer_head = bh;
199 spin_unlock_irqrestore(&unwritten_done_lock, flags);
200
201 xfs_finish_ioend(ioend);
202}
203
204STATIC int 212STATIC int
205xfs_map_blocks( 213xfs_map_blocks(
206 struct inode *inode, 214 struct inode *inode,
@@ -218,138 +226,283 @@ xfs_map_blocks(
218 return -error; 226 return -error;
219} 227}
220 228
229STATIC inline int
230xfs_iomap_valid(
231 xfs_iomap_t *iomapp,
232 loff_t offset)
233{
234 return offset >= iomapp->iomap_offset &&
235 offset < iomapp->iomap_offset + iomapp->iomap_bsize;
236}
237
221/* 238/*
222 * Finds the corresponding mapping in block @map array of the 239 * BIO completion handler for buffered IO.
223 * given @offset within a @page.
224 */ 240 */
225STATIC xfs_iomap_t * 241STATIC int
226xfs_offset_to_map( 242xfs_end_bio(
243 struct bio *bio,
244 unsigned int bytes_done,
245 int error)
246{
247 xfs_ioend_t *ioend = bio->bi_private;
248
249 if (bio->bi_size)
250 return 1;
251
252 ASSERT(ioend);
253 ASSERT(atomic_read(&bio->bi_cnt) >= 1);
254
255 /* Toss bio and pass work off to an xfsdatad thread */
256 if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
257 ioend->io_uptodate = 0;
258 bio->bi_private = NULL;
259 bio->bi_end_io = NULL;
260
261 bio_put(bio);
262 xfs_finish_ioend(ioend);
263 return 0;
264}
265
266STATIC void
267xfs_submit_ioend_bio(
268 xfs_ioend_t *ioend,
269 struct bio *bio)
270{
271 atomic_inc(&ioend->io_remaining);
272
273 bio->bi_private = ioend;
274 bio->bi_end_io = xfs_end_bio;
275
276 submit_bio(WRITE, bio);
277 ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP));
278 bio_put(bio);
279}
280
281STATIC struct bio *
282xfs_alloc_ioend_bio(
283 struct buffer_head *bh)
284{
285 struct bio *bio;
286 int nvecs = bio_get_nr_vecs(bh->b_bdev);
287
288 do {
289 bio = bio_alloc(GFP_NOIO, nvecs);
290 nvecs >>= 1;
291 } while (!bio);
292
293 ASSERT(bio->bi_private == NULL);
294 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
295 bio->bi_bdev = bh->b_bdev;
296 bio_get(bio);
297 return bio;
298}
299
300STATIC void
301xfs_start_buffer_writeback(
302 struct buffer_head *bh)
303{
304 ASSERT(buffer_mapped(bh));
305 ASSERT(buffer_locked(bh));
306 ASSERT(!buffer_delay(bh));
307 ASSERT(!buffer_unwritten(bh));
308
309 mark_buffer_async_write(bh);
310 set_buffer_uptodate(bh);
311 clear_buffer_dirty(bh);
312}
313
314STATIC void
315xfs_start_page_writeback(
227 struct page *page, 316 struct page *page,
228 xfs_iomap_t *iomapp, 317 struct writeback_control *wbc,
229 unsigned long offset) 318 int clear_dirty,
319 int buffers)
320{
321 ASSERT(PageLocked(page));
322 ASSERT(!PageWriteback(page));
323 set_page_writeback(page);
324 if (clear_dirty)
325 clear_page_dirty(page);
326 unlock_page(page);
327 if (!buffers) {
328 end_page_writeback(page);
329 wbc->pages_skipped++; /* We didn't write this page */
330 }
331}
332
333static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
334{
335 return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
336}
337
338/*
339 * Submit all of the bios for all of the ioends we have saved up, covering the
340 * initial writepage page and also any probed pages.
341 *
342 * Because we may have multiple ioends spanning a page, we need to start
343 * writeback on all the buffers before we submit them for I/O. If we mark the
344 * buffers as we got, then we can end up with a page that only has buffers
345 * marked async write and I/O complete on can occur before we mark the other
346 * buffers async write.
347 *
348 * The end result of this is that we trip a bug in end_page_writeback() because
349 * we call it twice for the one page as the code in end_buffer_async_write()
350 * assumes that all buffers on the page are started at the same time.
351 *
352 * The fix is two passes across the ioend list - one to start writeback on the
353 * bufferheads, and then the second one submit them for I/O.
354 */
355STATIC void
356xfs_submit_ioend(
357 xfs_ioend_t *ioend)
358{
359 xfs_ioend_t *head = ioend;
360 xfs_ioend_t *next;
361 struct buffer_head *bh;
362 struct bio *bio;
363 sector_t lastblock = 0;
364
365 /* Pass 1 - start writeback */
366 do {
367 next = ioend->io_list;
368 for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
369 xfs_start_buffer_writeback(bh);
370 }
371 } while ((ioend = next) != NULL);
372
373 /* Pass 2 - submit I/O */
374 ioend = head;
375 do {
376 next = ioend->io_list;
377 bio = NULL;
378
379 for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
380
381 if (!bio) {
382 retry:
383 bio = xfs_alloc_ioend_bio(bh);
384 } else if (bh->b_blocknr != lastblock + 1) {
385 xfs_submit_ioend_bio(ioend, bio);
386 goto retry;
387 }
388
389 if (bio_add_buffer(bio, bh) != bh->b_size) {
390 xfs_submit_ioend_bio(ioend, bio);
391 goto retry;
392 }
393
394 lastblock = bh->b_blocknr;
395 }
396 if (bio)
397 xfs_submit_ioend_bio(ioend, bio);
398 xfs_finish_ioend(ioend);
399 } while ((ioend = next) != NULL);
400}
401
402/*
403 * Cancel submission of all buffer_heads so far in this endio.
404 * Toss the endio too. Only ever called for the initial page
405 * in a writepage request, so only ever one page.
406 */
407STATIC void
408xfs_cancel_ioend(
409 xfs_ioend_t *ioend)
410{
411 xfs_ioend_t *next;
412 struct buffer_head *bh, *next_bh;
413
414 do {
415 next = ioend->io_list;
416 bh = ioend->io_buffer_head;
417 do {
418 next_bh = bh->b_private;
419 clear_buffer_async_write(bh);
420 unlock_buffer(bh);
421 } while ((bh = next_bh) != NULL);
422
423 vn_iowake(ioend->io_vnode);
424 mempool_free(ioend, xfs_ioend_pool);
425 } while ((ioend = next) != NULL);
426}
427
428/*
429 * Test to see if we've been building up a completion structure for
430 * earlier buffers -- if so, we try to append to this ioend if we
431 * can, otherwise we finish off any current ioend and start another.
432 * Return true if we've finished the given ioend.
433 */
434STATIC void
435xfs_add_to_ioend(
436 struct inode *inode,
437 struct buffer_head *bh,
438 xfs_off_t offset,
439 unsigned int type,
440 xfs_ioend_t **result,
441 int need_ioend)
230{ 442{
231 loff_t full_offset; /* offset from start of file */ 443 xfs_ioend_t *ioend = *result;
232 444
233 ASSERT(offset < PAGE_CACHE_SIZE); 445 if (!ioend || need_ioend || type != ioend->io_type) {
446 xfs_ioend_t *previous = *result;
234 447
235 full_offset = page->index; /* NB: using 64bit number */ 448 ioend = xfs_alloc_ioend(inode, type);
236 full_offset <<= PAGE_CACHE_SHIFT; /* offset from file start */ 449 ioend->io_offset = offset;
237 full_offset += offset; /* offset from page start */ 450 ioend->io_buffer_head = bh;
451 ioend->io_buffer_tail = bh;
452 if (previous)
453 previous->io_list = ioend;
454 *result = ioend;
455 } else {
456 ioend->io_buffer_tail->b_private = bh;
457 ioend->io_buffer_tail = bh;
458 }
238 459
239 if (full_offset < iomapp->iomap_offset) 460 bh->b_private = NULL;
240 return NULL; 461 ioend->io_size += bh->b_size;
241 if (iomapp->iomap_offset + (iomapp->iomap_bsize -1) >= full_offset)
242 return iomapp;
243 return NULL;
244} 462}
245 463
246STATIC void 464STATIC void
247xfs_map_at_offset( 465xfs_map_at_offset(
248 struct page *page,
249 struct buffer_head *bh, 466 struct buffer_head *bh,
250 unsigned long offset, 467 loff_t offset,
251 int block_bits, 468 int block_bits,
252 xfs_iomap_t *iomapp) 469 xfs_iomap_t *iomapp)
253{ 470{
254 xfs_daddr_t bn; 471 xfs_daddr_t bn;
255 loff_t delta;
256 int sector_shift; 472 int sector_shift;
257 473
258 ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE)); 474 ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE));
259 ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY)); 475 ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY));
260 ASSERT(iomapp->iomap_bn != IOMAP_DADDR_NULL); 476 ASSERT(iomapp->iomap_bn != IOMAP_DADDR_NULL);
261 477
262 delta = page->index;
263 delta <<= PAGE_CACHE_SHIFT;
264 delta += offset;
265 delta -= iomapp->iomap_offset;
266 delta >>= block_bits;
267
268 sector_shift = block_bits - BBSHIFT; 478 sector_shift = block_bits - BBSHIFT;
269 bn = iomapp->iomap_bn >> sector_shift; 479 bn = (iomapp->iomap_bn >> sector_shift) +
270 bn += delta; 480 ((offset - iomapp->iomap_offset) >> block_bits);
271 BUG_ON(!bn && !(iomapp->iomap_flags & IOMAP_REALTIME)); 481
482 ASSERT(bn || (iomapp->iomap_flags & IOMAP_REALTIME));
272 ASSERT((bn << sector_shift) >= iomapp->iomap_bn); 483 ASSERT((bn << sector_shift) >= iomapp->iomap_bn);
273 484
274 lock_buffer(bh); 485 lock_buffer(bh);
275 bh->b_blocknr = bn; 486 bh->b_blocknr = bn;
276 bh->b_bdev = iomapp->iomap_target->pbr_bdev; 487 bh->b_bdev = iomapp->iomap_target->bt_bdev;
277 set_buffer_mapped(bh); 488 set_buffer_mapped(bh);
278 clear_buffer_delay(bh); 489 clear_buffer_delay(bh);
490 clear_buffer_unwritten(bh);
279} 491}
280 492
281/* 493/*
282 * Look for a page at index which is unlocked and contains our 494 * Look for a page at index that is suitable for clustering.
283 * unwritten extent flagged buffers at its head. Returns page
284 * locked and with an extra reference count, and length of the
285 * unwritten extent component on this page that we can write,
286 * in units of filesystem blocks.
287 */
288STATIC struct page *
289xfs_probe_unwritten_page(
290 struct address_space *mapping,
291 pgoff_t index,
292 xfs_iomap_t *iomapp,
293 xfs_ioend_t *ioend,
294 unsigned long max_offset,
295 unsigned long *fsbs,
296 unsigned int bbits)
297{
298 struct page *page;
299
300 page = find_trylock_page(mapping, index);
301 if (!page)
302 return NULL;
303 if (PageWriteback(page))
304 goto out;
305
306 if (page->mapping && page_has_buffers(page)) {
307 struct buffer_head *bh, *head;
308 unsigned long p_offset = 0;
309
310 *fsbs = 0;
311 bh = head = page_buffers(page);
312 do {
313 if (!buffer_unwritten(bh) || !buffer_uptodate(bh))
314 break;
315 if (!xfs_offset_to_map(page, iomapp, p_offset))
316 break;
317 if (p_offset >= max_offset)
318 break;
319 xfs_map_at_offset(page, bh, p_offset, bbits, iomapp);
320 set_buffer_unwritten_io(bh);
321 bh->b_private = ioend;
322 p_offset += bh->b_size;
323 (*fsbs)++;
324 } while ((bh = bh->b_this_page) != head);
325
326 if (p_offset)
327 return page;
328 }
329
330out:
331 unlock_page(page);
332 return NULL;
333}
334
335/*
336 * Look for a page at index which is unlocked and not mapped
337 * yet - clustering for mmap write case.
338 */ 495 */
339STATIC unsigned int 496STATIC unsigned int
340xfs_probe_unmapped_page( 497xfs_probe_page(
341 struct address_space *mapping, 498 struct page *page,
342 pgoff_t index, 499 unsigned int pg_offset,
343 unsigned int pg_offset) 500 int mapped)
344{ 501{
345 struct page *page;
346 int ret = 0; 502 int ret = 0;
347 503
348 page = find_trylock_page(mapping, index);
349 if (!page)
350 return 0;
351 if (PageWriteback(page)) 504 if (PageWriteback(page))
352 goto out; 505 return 0;
353 506
354 if (page->mapping && PageDirty(page)) { 507 if (page->mapping && PageDirty(page)) {
355 if (page_has_buffers(page)) { 508 if (page_has_buffers(page)) {
@@ -357,79 +510,101 @@ xfs_probe_unmapped_page(
357 510
358 bh = head = page_buffers(page); 511 bh = head = page_buffers(page);
359 do { 512 do {
360 if (buffer_mapped(bh) || !buffer_uptodate(bh)) 513 if (!buffer_uptodate(bh))
514 break;
515 if (mapped != buffer_mapped(bh))
361 break; 516 break;
362 ret += bh->b_size; 517 ret += bh->b_size;
363 if (ret >= pg_offset) 518 if (ret >= pg_offset)
364 break; 519 break;
365 } while ((bh = bh->b_this_page) != head); 520 } while ((bh = bh->b_this_page) != head);
366 } else 521 } else
367 ret = PAGE_CACHE_SIZE; 522 ret = mapped ? 0 : PAGE_CACHE_SIZE;
368 } 523 }
369 524
370out:
371 unlock_page(page);
372 return ret; 525 return ret;
373} 526}
374 527
375STATIC unsigned int 528STATIC size_t
376xfs_probe_unmapped_cluster( 529xfs_probe_cluster(
377 struct inode *inode, 530 struct inode *inode,
378 struct page *startpage, 531 struct page *startpage,
379 struct buffer_head *bh, 532 struct buffer_head *bh,
380 struct buffer_head *head) 533 struct buffer_head *head,
534 int mapped)
381{ 535{
536 struct pagevec pvec;
382 pgoff_t tindex, tlast, tloff; 537 pgoff_t tindex, tlast, tloff;
383 unsigned int pg_offset, len, total = 0; 538 size_t total = 0;
384 struct address_space *mapping = inode->i_mapping; 539 int done = 0, i;
385 540
386 /* First sum forwards in this page */ 541 /* First sum forwards in this page */
387 do { 542 do {
388 if (buffer_mapped(bh)) 543 if (mapped != buffer_mapped(bh))
389 break; 544 return total;
390 total += bh->b_size; 545 total += bh->b_size;
391 } while ((bh = bh->b_this_page) != head); 546 } while ((bh = bh->b_this_page) != head);
392 547
393 /* If we reached the end of the page, sum forwards in 548 /* if we reached the end of the page, sum forwards in following pages */
394 * following pages. 549 tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
395 */ 550 tindex = startpage->index + 1;
396 if (bh == head) { 551
397 tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT; 552 /* Prune this back to avoid pathological behavior */
398 /* Prune this back to avoid pathological behavior */ 553 tloff = min(tlast, startpage->index + 64);
399 tloff = min(tlast, startpage->index + 64); 554
400 for (tindex = startpage->index + 1; tindex < tloff; tindex++) { 555 pagevec_init(&pvec, 0);
401 len = xfs_probe_unmapped_page(mapping, tindex, 556 while (!done && tindex <= tloff) {
402 PAGE_CACHE_SIZE); 557 unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
403 if (!len) 558
404 return total; 559 if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
560 break;
561
562 for (i = 0; i < pagevec_count(&pvec); i++) {
563 struct page *page = pvec.pages[i];
564 size_t pg_offset, len = 0;
565
566 if (tindex == tlast) {
567 pg_offset =
568 i_size_read(inode) & (PAGE_CACHE_SIZE - 1);
569 if (!pg_offset) {
570 done = 1;
571 break;
572 }
573 } else
574 pg_offset = PAGE_CACHE_SIZE;
575
576 if (page->index == tindex && !TestSetPageLocked(page)) {
577 len = xfs_probe_page(page, pg_offset, mapped);
578 unlock_page(page);
579 }
580
581 if (!len) {
582 done = 1;
583 break;
584 }
585
405 total += len; 586 total += len;
587 tindex++;
406 } 588 }
407 if (tindex == tlast && 589
408 (pg_offset = i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) { 590 pagevec_release(&pvec);
409 total += xfs_probe_unmapped_page(mapping, 591 cond_resched();
410 tindex, pg_offset);
411 }
412 } 592 }
593
413 return total; 594 return total;
414} 595}
415 596
416/* 597/*
417 * Probe for a given page (index) in the inode and test if it is delayed 598 * Test if a given page is suitable for writing as part of an unwritten
418 * and without unwritten buffers. Returns page locked and with an extra 599 * or delayed allocate extent.
419 * reference count.
420 */ 600 */
421STATIC struct page * 601STATIC int
422xfs_probe_delalloc_page( 602xfs_is_delayed_page(
423 struct inode *inode, 603 struct page *page,
424 pgoff_t index) 604 unsigned int type)
425{ 605{
426 struct page *page;
427
428 page = find_trylock_page(inode->i_mapping, index);
429 if (!page)
430 return NULL;
431 if (PageWriteback(page)) 606 if (PageWriteback(page))
432 goto out; 607 return 0;
433 608
434 if (page->mapping && page_has_buffers(page)) { 609 if (page->mapping && page_has_buffers(page)) {
435 struct buffer_head *bh, *head; 610 struct buffer_head *bh, *head;
@@ -437,243 +612,156 @@ xfs_probe_delalloc_page(
437 612
438 bh = head = page_buffers(page); 613 bh = head = page_buffers(page);
439 do { 614 do {
440 if (buffer_unwritten(bh)) { 615 if (buffer_unwritten(bh))
441 acceptable = 0; 616 acceptable = (type == IOMAP_UNWRITTEN);
617 else if (buffer_delay(bh))
618 acceptable = (type == IOMAP_DELAY);
619 else if (buffer_mapped(bh))
620 acceptable = (type == 0);
621 else
442 break; 622 break;
443 } else if (buffer_delay(bh)) {
444 acceptable = 1;
445 }
446 } while ((bh = bh->b_this_page) != head); 623 } while ((bh = bh->b_this_page) != head);
447 624
448 if (acceptable) 625 if (acceptable)
449 return page; 626 return 1;
450 }
451
452out:
453 unlock_page(page);
454 return NULL;
455}
456
457STATIC int
458xfs_map_unwritten(
459 struct inode *inode,
460 struct page *start_page,
461 struct buffer_head *head,
462 struct buffer_head *curr,
463 unsigned long p_offset,
464 int block_bits,
465 xfs_iomap_t *iomapp,
466 struct writeback_control *wbc,
467 int startio,
468 int all_bh)
469{
470 struct buffer_head *bh = curr;
471 xfs_iomap_t *tmp;
472 xfs_ioend_t *ioend;
473 loff_t offset;
474 unsigned long nblocks = 0;
475
476 offset = start_page->index;
477 offset <<= PAGE_CACHE_SHIFT;
478 offset += p_offset;
479
480 ioend = xfs_alloc_ioend(inode);
481
482 /* First map forwards in the page consecutive buffers
483 * covering this unwritten extent
484 */
485 do {
486 if (!buffer_unwritten(bh))
487 break;
488 tmp = xfs_offset_to_map(start_page, iomapp, p_offset);
489 if (!tmp)
490 break;
491 xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp);
492 set_buffer_unwritten_io(bh);
493 bh->b_private = ioend;
494 p_offset += bh->b_size;
495 nblocks++;
496 } while ((bh = bh->b_this_page) != head);
497
498 atomic_add(nblocks, &ioend->io_remaining);
499
500 /* If we reached the end of the page, map forwards in any
501 * following pages which are also covered by this extent.
502 */
503 if (bh == head) {
504 struct address_space *mapping = inode->i_mapping;
505 pgoff_t tindex, tloff, tlast;
506 unsigned long bs;
507 unsigned int pg_offset, bbits = inode->i_blkbits;
508 struct page *page;
509
510 tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
511 tloff = (iomapp->iomap_offset + iomapp->iomap_bsize) >> PAGE_CACHE_SHIFT;
512 tloff = min(tlast, tloff);
513 for (tindex = start_page->index + 1; tindex < tloff; tindex++) {
514 page = xfs_probe_unwritten_page(mapping,
515 tindex, iomapp, ioend,
516 PAGE_CACHE_SIZE, &bs, bbits);
517 if (!page)
518 break;
519 nblocks += bs;
520 atomic_add(bs, &ioend->io_remaining);
521 xfs_convert_page(inode, page, iomapp, wbc, ioend,
522 startio, all_bh);
523 /* stop if converting the next page might add
524 * enough blocks that the corresponding byte
525 * count won't fit in our ulong page buf length */
526 if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits))
527 goto enough;
528 }
529
530 if (tindex == tlast &&
531 (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) {
532 page = xfs_probe_unwritten_page(mapping,
533 tindex, iomapp, ioend,
534 pg_offset, &bs, bbits);
535 if (page) {
536 nblocks += bs;
537 atomic_add(bs, &ioend->io_remaining);
538 xfs_convert_page(inode, page, iomapp, wbc, ioend,
539 startio, all_bh);
540 if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits))
541 goto enough;
542 }
543 }
544 } 627 }
545 628
546enough:
547 ioend->io_size = (xfs_off_t)nblocks << block_bits;
548 ioend->io_offset = offset;
549 xfs_finish_ioend(ioend);
550 return 0; 629 return 0;
551} 630}
552 631
553STATIC void
554xfs_submit_page(
555 struct page *page,
556 struct writeback_control *wbc,
557 struct buffer_head *bh_arr[],
558 int bh_count,
559 int probed_page,
560 int clear_dirty)
561{
562 struct buffer_head *bh;
563 int i;
564
565 BUG_ON(PageWriteback(page));
566 if (bh_count)
567 set_page_writeback(page);
568 if (clear_dirty)
569 clear_page_dirty(page);
570 unlock_page(page);
571
572 if (bh_count) {
573 for (i = 0; i < bh_count; i++) {
574 bh = bh_arr[i];
575 mark_buffer_async_write(bh);
576 if (buffer_unwritten(bh))
577 set_buffer_unwritten_io(bh);
578 set_buffer_uptodate(bh);
579 clear_buffer_dirty(bh);
580 }
581
582 for (i = 0; i < bh_count; i++)
583 submit_bh(WRITE, bh_arr[i]);
584
585 if (probed_page && clear_dirty)
586 wbc->nr_to_write--; /* Wrote an "extra" page */
587 }
588}
589
590/* 632/*
591 * Allocate & map buffers for page given the extent map. Write it out. 633 * Allocate & map buffers for page given the extent map. Write it out.
592 * except for the original page of a writepage, this is called on 634 * except for the original page of a writepage, this is called on
593 * delalloc/unwritten pages only, for the original page it is possible 635 * delalloc/unwritten pages only, for the original page it is possible
594 * that the page has no mapping at all. 636 * that the page has no mapping at all.
595 */ 637 */
596STATIC void 638STATIC int
597xfs_convert_page( 639xfs_convert_page(
598 struct inode *inode, 640 struct inode *inode,
599 struct page *page, 641 struct page *page,
600 xfs_iomap_t *iomapp, 642 loff_t tindex,
643 xfs_iomap_t *mp,
644 xfs_ioend_t **ioendp,
601 struct writeback_control *wbc, 645 struct writeback_control *wbc,
602 void *private,
603 int startio, 646 int startio,
604 int all_bh) 647 int all_bh)
605{ 648{
606 struct buffer_head *bh_arr[MAX_BUF_PER_PAGE], *bh, *head; 649 struct buffer_head *bh, *head;
607 xfs_iomap_t *mp = iomapp, *tmp; 650 xfs_off_t end_offset;
608 unsigned long offset, end_offset; 651 unsigned long p_offset;
609 int index = 0; 652 unsigned int type;
610 int bbits = inode->i_blkbits; 653 int bbits = inode->i_blkbits;
611 int len, page_dirty; 654 int len, page_dirty;
655 int count = 0, done = 0, uptodate = 1;
656 xfs_off_t offset = page_offset(page);
612 657
613 end_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)); 658 if (page->index != tindex)
659 goto fail;
660 if (TestSetPageLocked(page))
661 goto fail;
662 if (PageWriteback(page))
663 goto fail_unlock_page;
664 if (page->mapping != inode->i_mapping)
665 goto fail_unlock_page;
666 if (!xfs_is_delayed_page(page, (*ioendp)->io_type))
667 goto fail_unlock_page;
614 668
615 /* 669 /*
616 * page_dirty is initially a count of buffers on the page before 670 * page_dirty is initially a count of buffers on the page before
617 * EOF and is decrememted as we move each into a cleanable state. 671 * EOF and is decrememted as we move each into a cleanable state.
672 *
673 * Derivation:
674 *
675 * End offset is the highest offset that this page should represent.
676 * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
677 * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
678 * hence give us the correct page_dirty count. On any other page,
679 * it will be zero and in that case we need page_dirty to be the
680 * count of buffers on the page.
618 */ 681 */
682 end_offset = min_t(unsigned long long,
683 (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
684 i_size_read(inode));
685
619 len = 1 << inode->i_blkbits; 686 len = 1 << inode->i_blkbits;
620 end_offset = max(end_offset, PAGE_CACHE_SIZE); 687 p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
621 end_offset = roundup(end_offset, len); 688 PAGE_CACHE_SIZE);
622 page_dirty = end_offset / len; 689 p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
690 page_dirty = p_offset / len;
623 691
624 offset = 0;
625 bh = head = page_buffers(page); 692 bh = head = page_buffers(page);
626 do { 693 do {
627 if (offset >= end_offset) 694 if (offset >= end_offset)
628 break; 695 break;
629 if (!(PageUptodate(page) || buffer_uptodate(bh))) 696 if (!buffer_uptodate(bh))
697 uptodate = 0;
698 if (!(PageUptodate(page) || buffer_uptodate(bh))) {
699 done = 1;
630 continue; 700 continue;
631 if (buffer_mapped(bh) && all_bh && 701 }
632 !(buffer_unwritten(bh) || buffer_delay(bh))) { 702
703 if (buffer_unwritten(bh) || buffer_delay(bh)) {
704 if (buffer_unwritten(bh))
705 type = IOMAP_UNWRITTEN;
706 else
707 type = IOMAP_DELAY;
708
709 if (!xfs_iomap_valid(mp, offset)) {
710 done = 1;
711 continue;
712 }
713
714 ASSERT(!(mp->iomap_flags & IOMAP_HOLE));
715 ASSERT(!(mp->iomap_flags & IOMAP_DELAY));
716
717 xfs_map_at_offset(bh, offset, bbits, mp);
633 if (startio) { 718 if (startio) {
719 xfs_add_to_ioend(inode, bh, offset,
720 type, ioendp, done);
721 } else {
722 set_buffer_dirty(bh);
723 unlock_buffer(bh);
724 mark_buffer_dirty(bh);
725 }
726 page_dirty--;
727 count++;
728 } else {
729 type = 0;
730 if (buffer_mapped(bh) && all_bh && startio) {
634 lock_buffer(bh); 731 lock_buffer(bh);
635 bh_arr[index++] = bh; 732 xfs_add_to_ioend(inode, bh, offset,
733 type, ioendp, done);
734 count++;
636 page_dirty--; 735 page_dirty--;
736 } else {
737 done = 1;
637 } 738 }
638 continue;
639 } 739 }
640 tmp = xfs_offset_to_map(page, mp, offset); 740 } while (offset += len, (bh = bh->b_this_page) != head);
641 if (!tmp)
642 continue;
643 ASSERT(!(tmp->iomap_flags & IOMAP_HOLE));
644 ASSERT(!(tmp->iomap_flags & IOMAP_DELAY));
645 741
646 /* If this is a new unwritten extent buffer (i.e. one 742 if (uptodate && bh == head)
647 * that we haven't passed in private data for, we must 743 SetPageUptodate(page);
648 * now map this buffer too. 744
649 */ 745 if (startio) {
650 if (buffer_unwritten(bh) && !bh->b_end_io) { 746 if (count) {
651 ASSERT(tmp->iomap_flags & IOMAP_UNWRITTEN); 747 struct backing_dev_info *bdi;
652 xfs_map_unwritten(inode, page, head, bh, offset, 748
653 bbits, tmp, wbc, startio, all_bh); 749 bdi = inode->i_mapping->backing_dev_info;
654 } else if (! (buffer_unwritten(bh) && buffer_locked(bh))) { 750 if (bdi_write_congested(bdi)) {
655 xfs_map_at_offset(page, bh, offset, bbits, tmp); 751 wbc->encountered_congestion = 1;
656 if (buffer_unwritten(bh)) { 752 done = 1;
657 set_buffer_unwritten_io(bh); 753 } else if (--wbc->nr_to_write <= 0) {
658 bh->b_private = private; 754 done = 1;
659 ASSERT(private);
660 } 755 }
661 } 756 }
662 if (startio) { 757 xfs_start_page_writeback(page, wbc, !page_dirty, count);
663 bh_arr[index++] = bh;
664 } else {
665 set_buffer_dirty(bh);
666 unlock_buffer(bh);
667 mark_buffer_dirty(bh);
668 }
669 page_dirty--;
670 } while (offset += len, (bh = bh->b_this_page) != head);
671
672 if (startio && index) {
673 xfs_submit_page(page, wbc, bh_arr, index, 1, !page_dirty);
674 } else {
675 unlock_page(page);
676 } 758 }
759
760 return done;
761 fail_unlock_page:
762 unlock_page(page);
763 fail:
764 return 1;
677} 765}
678 766
679/* 767/*
@@ -685,19 +773,31 @@ xfs_cluster_write(
685 struct inode *inode, 773 struct inode *inode,
686 pgoff_t tindex, 774 pgoff_t tindex,
687 xfs_iomap_t *iomapp, 775 xfs_iomap_t *iomapp,
776 xfs_ioend_t **ioendp,
688 struct writeback_control *wbc, 777 struct writeback_control *wbc,
689 int startio, 778 int startio,
690 int all_bh, 779 int all_bh,
691 pgoff_t tlast) 780 pgoff_t tlast)
692{ 781{
693 struct page *page; 782 struct pagevec pvec;
783 int done = 0, i;
784
785 pagevec_init(&pvec, 0);
786 while (!done && tindex <= tlast) {
787 unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
694 788
695 for (; tindex <= tlast; tindex++) { 789 if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
696 page = xfs_probe_delalloc_page(inode, tindex);
697 if (!page)
698 break; 790 break;
699 xfs_convert_page(inode, page, iomapp, wbc, NULL, 791
700 startio, all_bh); 792 for (i = 0; i < pagevec_count(&pvec); i++) {
793 done = xfs_convert_page(inode, pvec.pages[i], tindex++,
794 iomapp, ioendp, wbc, startio, all_bh);
795 if (done)
796 break;
797 }
798
799 pagevec_release(&pvec);
800 cond_resched();
701 } 801 }
702} 802}
703 803
@@ -728,18 +828,22 @@ xfs_page_state_convert(
728 int startio, 828 int startio,
729 int unmapped) /* also implies page uptodate */ 829 int unmapped) /* also implies page uptodate */
730{ 830{
731 struct buffer_head *bh_arr[MAX_BUF_PER_PAGE], *bh, *head; 831 struct buffer_head *bh, *head;
732 xfs_iomap_t *iomp, iomap; 832 xfs_iomap_t iomap;
833 xfs_ioend_t *ioend = NULL, *iohead = NULL;
733 loff_t offset; 834 loff_t offset;
734 unsigned long p_offset = 0; 835 unsigned long p_offset = 0;
836 unsigned int type;
735 __uint64_t end_offset; 837 __uint64_t end_offset;
736 pgoff_t end_index, last_index, tlast; 838 pgoff_t end_index, last_index, tlast;
737 int len, err, i, cnt = 0, uptodate = 1; 839 ssize_t size, len;
738 int flags; 840 int flags, err, iomap_valid = 0, uptodate = 1;
739 int page_dirty; 841 int page_dirty, count = 0, trylock_flag = 0;
842 int all_bh = unmapped;
740 843
741 /* wait for other IO threads? */ 844 /* wait for other IO threads? */
742 flags = (startio && wbc->sync_mode != WB_SYNC_NONE) ? 0 : BMAPI_TRYLOCK; 845 if (startio && (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking))
846 trylock_flag |= BMAPI_TRYLOCK;
743 847
744 /* Is this page beyond the end of the file? */ 848 /* Is this page beyond the end of the file? */
745 offset = i_size_read(inode); 849 offset = i_size_read(inode);
@@ -754,161 +858,173 @@ xfs_page_state_convert(
754 } 858 }
755 } 859 }
756 860
757 end_offset = min_t(unsigned long long,
758 (loff_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset);
759 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
760
761 /* 861 /*
762 * page_dirty is initially a count of buffers on the page before 862 * page_dirty is initially a count of buffers on the page before
763 * EOF and is decrememted as we move each into a cleanable state. 863 * EOF and is decrememted as we move each into a cleanable state.
764 */ 864 *
865 * Derivation:
866 *
867 * End offset is the highest offset that this page should represent.
868 * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
869 * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
870 * hence give us the correct page_dirty count. On any other page,
871 * it will be zero and in that case we need page_dirty to be the
872 * count of buffers on the page.
873 */
874 end_offset = min_t(unsigned long long,
875 (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset);
765 len = 1 << inode->i_blkbits; 876 len = 1 << inode->i_blkbits;
766 p_offset = max(p_offset, PAGE_CACHE_SIZE); 877 p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
767 p_offset = roundup(p_offset, len); 878 PAGE_CACHE_SIZE);
879 p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
768 page_dirty = p_offset / len; 880 page_dirty = p_offset / len;
769 881
770 iomp = NULL;
771 p_offset = 0;
772 bh = head = page_buffers(page); 882 bh = head = page_buffers(page);
883 offset = page_offset(page);
884 flags = -1;
885 type = 0;
886
887 /* TODO: cleanup count and page_dirty */
773 888
774 do { 889 do {
775 if (offset >= end_offset) 890 if (offset >= end_offset)
776 break; 891 break;
777 if (!buffer_uptodate(bh)) 892 if (!buffer_uptodate(bh))
778 uptodate = 0; 893 uptodate = 0;
779 if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) 894 if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) {
895 /*
896 * the iomap is actually still valid, but the ioend
897 * isn't. shouldn't happen too often.
898 */
899 iomap_valid = 0;
780 continue; 900 continue;
781
782 if (iomp) {
783 iomp = xfs_offset_to_map(page, &iomap, p_offset);
784 } 901 }
785 902
903 if (iomap_valid)
904 iomap_valid = xfs_iomap_valid(&iomap, offset);
905
786 /* 906 /*
787 * First case, map an unwritten extent and prepare for 907 * First case, map an unwritten extent and prepare for
788 * extent state conversion transaction on completion. 908 * extent state conversion transaction on completion.
789 */ 909 *
790 if (buffer_unwritten(bh)) { 910 * Second case, allocate space for a delalloc buffer.
791 if (!startio) 911 * We can return EAGAIN here in the release page case.
792 continue; 912 *
793 if (!iomp) { 913 * Third case, an unmapped buffer was found, and we are
794 err = xfs_map_blocks(inode, offset, len, &iomap, 914 * in a path where we need to write the whole page out.
795 BMAPI_WRITE|BMAPI_IGNSTATE); 915 */
796 if (err) { 916 if (buffer_unwritten(bh) || buffer_delay(bh) ||
797 goto error; 917 ((buffer_uptodate(bh) || PageUptodate(page)) &&
798 } 918 !buffer_mapped(bh) && (unmapped || startio))) {
799 iomp = xfs_offset_to_map(page, &iomap, 919 /*
800 p_offset); 920 * Make sure we don't use a read-only iomap
921 */
922 if (flags == BMAPI_READ)
923 iomap_valid = 0;
924
925 if (buffer_unwritten(bh)) {
926 type = IOMAP_UNWRITTEN;
927 flags = BMAPI_WRITE|BMAPI_IGNSTATE;
928 } else if (buffer_delay(bh)) {
929 type = IOMAP_DELAY;
930 flags = BMAPI_ALLOCATE;
931 if (!startio)
932 flags |= trylock_flag;
933 } else {
934 type = IOMAP_NEW;
935 flags = BMAPI_WRITE|BMAPI_MMAP;
801 } 936 }
802 if (iomp) { 937
803 if (!bh->b_end_io) { 938 if (!iomap_valid) {
804 err = xfs_map_unwritten(inode, page, 939 if (type == IOMAP_NEW) {
805 head, bh, p_offset, 940 size = xfs_probe_cluster(inode,
806 inode->i_blkbits, iomp, 941 page, bh, head, 0);
807 wbc, startio, unmapped);
808 if (err) {
809 goto error;
810 }
811 } else { 942 } else {
812 set_bit(BH_Lock, &bh->b_state); 943 size = len;
813 } 944 }
814 BUG_ON(!buffer_locked(bh)); 945
815 bh_arr[cnt++] = bh; 946 err = xfs_map_blocks(inode, offset, size,
816 page_dirty--; 947 &iomap, flags);
817 } 948 if (err)
818 /*
819 * Second case, allocate space for a delalloc buffer.
820 * We can return EAGAIN here in the release page case.
821 */
822 } else if (buffer_delay(bh)) {
823 if (!iomp) {
824 err = xfs_map_blocks(inode, offset, len, &iomap,
825 BMAPI_ALLOCATE | flags);
826 if (err) {
827 goto error; 949 goto error;
828 } 950 iomap_valid = xfs_iomap_valid(&iomap, offset);
829 iomp = xfs_offset_to_map(page, &iomap,
830 p_offset);
831 } 951 }
832 if (iomp) { 952 if (iomap_valid) {
833 xfs_map_at_offset(page, bh, p_offset, 953 xfs_map_at_offset(bh, offset,
834 inode->i_blkbits, iomp); 954 inode->i_blkbits, &iomap);
835 if (startio) { 955 if (startio) {
836 bh_arr[cnt++] = bh; 956 xfs_add_to_ioend(inode, bh, offset,
957 type, &ioend,
958 !iomap_valid);
837 } else { 959 } else {
838 set_buffer_dirty(bh); 960 set_buffer_dirty(bh);
839 unlock_buffer(bh); 961 unlock_buffer(bh);
840 mark_buffer_dirty(bh); 962 mark_buffer_dirty(bh);
841 } 963 }
842 page_dirty--; 964 page_dirty--;
965 count++;
966 }
967 } else if (buffer_uptodate(bh) && startio) {
968 /*
969 * we got here because the buffer is already mapped.
970 * That means it must already have extents allocated
971 * underneath it. Map the extent by reading it.
972 */
973 if (!iomap_valid || type != 0) {
974 flags = BMAPI_READ;
975 size = xfs_probe_cluster(inode, page, bh,
976 head, 1);
977 err = xfs_map_blocks(inode, offset, size,
978 &iomap, flags);
979 if (err)
980 goto error;
981 iomap_valid = xfs_iomap_valid(&iomap, offset);
843 } 982 }
844 } else if ((buffer_uptodate(bh) || PageUptodate(page)) &&
845 (unmapped || startio)) {
846 983
847 if (!buffer_mapped(bh)) { 984 type = 0;
848 int size; 985 if (!test_and_set_bit(BH_Lock, &bh->b_state)) {
849 986 ASSERT(buffer_mapped(bh));
850 /* 987 if (iomap_valid)
851 * Getting here implies an unmapped buffer 988 all_bh = 1;
852 * was found, and we are in a path where we 989 xfs_add_to_ioend(inode, bh, offset, type,
853 * need to write the whole page out. 990 &ioend, !iomap_valid);
854 */ 991 page_dirty--;
855 if (!iomp) { 992 count++;
856 size = xfs_probe_unmapped_cluster( 993 } else {
857 inode, page, bh, head); 994 iomap_valid = 0;
858 err = xfs_map_blocks(inode, offset,
859 size, &iomap,
860 BMAPI_WRITE|BMAPI_MMAP);
861 if (err) {
862 goto error;
863 }
864 iomp = xfs_offset_to_map(page, &iomap,
865 p_offset);
866 }
867 if (iomp) {
868 xfs_map_at_offset(page,
869 bh, p_offset,
870 inode->i_blkbits, iomp);
871 if (startio) {
872 bh_arr[cnt++] = bh;
873 } else {
874 set_buffer_dirty(bh);
875 unlock_buffer(bh);
876 mark_buffer_dirty(bh);
877 }
878 page_dirty--;
879 }
880 } else if (startio) {
881 if (buffer_uptodate(bh) &&
882 !test_and_set_bit(BH_Lock, &bh->b_state)) {
883 bh_arr[cnt++] = bh;
884 page_dirty--;
885 }
886 } 995 }
996 } else if ((buffer_uptodate(bh) || PageUptodate(page)) &&
997 (unmapped || startio)) {
998 iomap_valid = 0;
887 } 999 }
888 } while (offset += len, p_offset += len, 1000
889 ((bh = bh->b_this_page) != head)); 1001 if (!iohead)
1002 iohead = ioend;
1003
1004 } while (offset += len, ((bh = bh->b_this_page) != head));
890 1005
891 if (uptodate && bh == head) 1006 if (uptodate && bh == head)
892 SetPageUptodate(page); 1007 SetPageUptodate(page);
893 1008
894 if (startio) { 1009 if (startio)
895 xfs_submit_page(page, wbc, bh_arr, cnt, 0, !page_dirty); 1010 xfs_start_page_writeback(page, wbc, 1, count);
896 }
897 1011
898 if (iomp) { 1012 if (ioend && iomap_valid) {
899 offset = (iomp->iomap_offset + iomp->iomap_bsize - 1) >> 1013 offset = (iomap.iomap_offset + iomap.iomap_bsize - 1) >>
900 PAGE_CACHE_SHIFT; 1014 PAGE_CACHE_SHIFT;
901 tlast = min_t(pgoff_t, offset, last_index); 1015 tlast = min_t(pgoff_t, offset, last_index);
902 xfs_cluster_write(inode, page->index + 1, iomp, wbc, 1016 xfs_cluster_write(inode, page->index + 1, &iomap, &ioend,
903 startio, unmapped, tlast); 1017 wbc, startio, all_bh, tlast);
904 } 1018 }
905 1019
1020 if (iohead)
1021 xfs_submit_ioend(iohead);
1022
906 return page_dirty; 1023 return page_dirty;
907 1024
908error: 1025error:
909 for (i = 0; i < cnt; i++) { 1026 if (iohead)
910 unlock_buffer(bh_arr[i]); 1027 xfs_cancel_ioend(iohead);
911 }
912 1028
913 /* 1029 /*
914 * If it's delalloc and we have nowhere to put it, 1030 * If it's delalloc and we have nowhere to put it,
@@ -916,9 +1032,8 @@ error:
916 * us to try again. 1032 * us to try again.
917 */ 1033 */
918 if (err != -EAGAIN) { 1034 if (err != -EAGAIN) {
919 if (!unmapped) { 1035 if (!unmapped)
920 block_invalidatepage(page, 0); 1036 block_invalidatepage(page, 0);
921 }
922 ClearPageUptodate(page); 1037 ClearPageUptodate(page);
923 } 1038 }
924 return err; 1039 return err;
@@ -982,7 +1097,7 @@ __linvfs_get_block(
982 } 1097 }
983 1098
984 /* If this is a realtime file, data might be on a new device */ 1099 /* If this is a realtime file, data might be on a new device */
985 bh_result->b_bdev = iomap.iomap_target->pbr_bdev; 1100 bh_result->b_bdev = iomap.iomap_target->bt_bdev;
986 1101
987 /* If we previously allocated a block out beyond eof and 1102 /* If we previously allocated a block out beyond eof and
988 * we are now coming back to use it then we will need to 1103 * we are now coming back to use it then we will need to
@@ -1094,10 +1209,10 @@ linvfs_direct_IO(
1094 if (error) 1209 if (error)
1095 return -error; 1210 return -error;
1096 1211
1097 iocb->private = xfs_alloc_ioend(inode); 1212 iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN);
1098 1213
1099 ret = blockdev_direct_IO_own_locking(rw, iocb, inode, 1214 ret = blockdev_direct_IO_own_locking(rw, iocb, inode,
1100 iomap.iomap_target->pbr_bdev, 1215 iomap.iomap_target->bt_bdev,
1101 iov, offset, nr_segs, 1216 iov, offset, nr_segs,
1102 linvfs_get_blocks_direct, 1217 linvfs_get_blocks_direct,
1103 linvfs_end_io_direct); 1218 linvfs_end_io_direct);