diff options
author | Christoph Hellwig <hch@sgi.com> | 2006-01-10 23:40:13 -0500 |
---|---|---|
committer | Nathan Scott <nathans@sgi.com> | 2006-01-10 23:40:13 -0500 |
commit | f6d6d4fcd180f8e47bf6b13fc6cce1e6c156d0ea (patch) | |
tree | 2d4e981bb61f564904f7b7ca1ab69d163c0f69dd | |
parent | ce8e922c0e79c8093452ba9a124981332b75706b (diff) |
[XFS] Initial pass at going directly-to-bio on the buffered IO path. This
allows us to submit much larger I/Os instead of sending down lots of small
buffer_heads. To do this we need to have a rather complicated I/O
submission and completion tracking infrastructure. Part of the latter has
been merged already a long time ago for direct I/O support. Part of the
problem is that we need to track sub-pagesize regions and for that we
still need buffer_heads for the time beeing. Long-term I hope we can move
to better data strucutures and/or maybe move this to fs/mpage.c instead of
having it in XFS. Original patch from Nathan Scott with various updates
from David Chinner and Christoph Hellwig.
SGI-PV: 947118
SGI-Modid: xfs-linux-melb:xfs-kern:203822a
Signed-off-by: Christoph Hellwig <hch@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
-rw-r--r-- | fs/xfs/linux-2.6/xfs_aops.c | 776 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_aops.h | 10 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_iops.h | 5 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_linux.h | 4 |
4 files changed, 431 insertions, 364 deletions
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 3f6b9e29850c..e99d04d3fe82 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -43,8 +43,6 @@ | |||
43 | #include <linux/writeback.h> | 43 | #include <linux/writeback.h> |
44 | 44 | ||
45 | STATIC void xfs_count_page_state(struct page *, int *, int *, int *); | 45 | STATIC void xfs_count_page_state(struct page *, int *, int *, int *); |
46 | STATIC void xfs_convert_page(struct inode *, struct page *, xfs_iomap_t *, | ||
47 | struct writeback_control *wbc, void *, int, int); | ||
48 | 46 | ||
49 | #if defined(XFS_RW_TRACE) | 47 | #if defined(XFS_RW_TRACE) |
50 | void | 48 | void |
@@ -58,7 +56,7 @@ xfs_page_trace( | |||
58 | bhv_desc_t *bdp; | 56 | bhv_desc_t *bdp; |
59 | vnode_t *vp = LINVFS_GET_VP(inode); | 57 | vnode_t *vp = LINVFS_GET_VP(inode); |
60 | loff_t isize = i_size_read(inode); | 58 | loff_t isize = i_size_read(inode); |
61 | loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT; | 59 | loff_t offset = page_offset(page); |
62 | int delalloc = -1, unmapped = -1, unwritten = -1; | 60 | int delalloc = -1, unmapped = -1, unwritten = -1; |
63 | 61 | ||
64 | if (page_has_buffers(page)) | 62 | if (page_has_buffers(page)) |
@@ -103,15 +101,56 @@ xfs_finish_ioend( | |||
103 | queue_work(xfsdatad_workqueue, &ioend->io_work); | 101 | queue_work(xfsdatad_workqueue, &ioend->io_work); |
104 | } | 102 | } |
105 | 103 | ||
104 | /* | ||
105 | * We're now finished for good with this ioend structure. | ||
106 | * Update the page state via the associated buffer_heads, | ||
107 | * release holds on the inode and bio, and finally free | ||
108 | * up memory. Do not use the ioend after this. | ||
109 | */ | ||
106 | STATIC void | 110 | STATIC void |
107 | xfs_destroy_ioend( | 111 | xfs_destroy_ioend( |
108 | xfs_ioend_t *ioend) | 112 | xfs_ioend_t *ioend) |
109 | { | 113 | { |
114 | struct buffer_head *bh, *next; | ||
115 | |||
116 | for (bh = ioend->io_buffer_head; bh; bh = next) { | ||
117 | next = bh->b_private; | ||
118 | bh->b_end_io(bh, ioend->io_uptodate); | ||
119 | } | ||
120 | |||
110 | vn_iowake(ioend->io_vnode); | 121 | vn_iowake(ioend->io_vnode); |
111 | mempool_free(ioend, xfs_ioend_pool); | 122 | mempool_free(ioend, xfs_ioend_pool); |
112 | } | 123 | } |
113 | 124 | ||
114 | /* | 125 | /* |
126 | * Buffered IO write completion for delayed allocate extents. | ||
127 | * TODO: Update ondisk isize now that we know the file data | ||
128 | * has been flushed (i.e. the notorious "NULL file" problem). | ||
129 | */ | ||
130 | STATIC void | ||
131 | xfs_end_bio_delalloc( | ||
132 | void *data) | ||
133 | { | ||
134 | xfs_ioend_t *ioend = data; | ||
135 | |||
136 | xfs_destroy_ioend(ioend); | ||
137 | } | ||
138 | |||
139 | /* | ||
140 | * Buffered IO write completion for regular, written extents. | ||
141 | */ | ||
142 | STATIC void | ||
143 | xfs_end_bio_written( | ||
144 | void *data) | ||
145 | { | ||
146 | xfs_ioend_t *ioend = data; | ||
147 | |||
148 | xfs_destroy_ioend(ioend); | ||
149 | } | ||
150 | |||
151 | /* | ||
152 | * IO write completion for unwritten extents. | ||
153 | * | ||
115 | * Issue transactions to convert a buffer range from unwritten | 154 | * Issue transactions to convert a buffer range from unwritten |
116 | * to written extents. | 155 | * to written extents. |
117 | */ | 156 | */ |
@@ -123,21 +162,10 @@ xfs_end_bio_unwritten( | |||
123 | vnode_t *vp = ioend->io_vnode; | 162 | vnode_t *vp = ioend->io_vnode; |
124 | xfs_off_t offset = ioend->io_offset; | 163 | xfs_off_t offset = ioend->io_offset; |
125 | size_t size = ioend->io_size; | 164 | size_t size = ioend->io_size; |
126 | struct buffer_head *bh, *next; | ||
127 | int error; | 165 | int error; |
128 | 166 | ||
129 | if (ioend->io_uptodate) | 167 | if (ioend->io_uptodate) |
130 | VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error); | 168 | VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error); |
131 | |||
132 | /* ioend->io_buffer_head is only non-NULL for buffered I/O */ | ||
133 | for (bh = ioend->io_buffer_head; bh; bh = next) { | ||
134 | next = bh->b_private; | ||
135 | |||
136 | bh->b_end_io = NULL; | ||
137 | clear_buffer_unwritten(bh); | ||
138 | end_buffer_async_write(bh, ioend->io_uptodate); | ||
139 | } | ||
140 | |||
141 | xfs_destroy_ioend(ioend); | 169 | xfs_destroy_ioend(ioend); |
142 | } | 170 | } |
143 | 171 | ||
@@ -149,7 +177,8 @@ xfs_end_bio_unwritten( | |||
149 | */ | 177 | */ |
150 | STATIC xfs_ioend_t * | 178 | STATIC xfs_ioend_t * |
151 | xfs_alloc_ioend( | 179 | xfs_alloc_ioend( |
152 | struct inode *inode) | 180 | struct inode *inode, |
181 | unsigned int type) | ||
153 | { | 182 | { |
154 | xfs_ioend_t *ioend; | 183 | xfs_ioend_t *ioend; |
155 | 184 | ||
@@ -162,45 +191,25 @@ xfs_alloc_ioend( | |||
162 | */ | 191 | */ |
163 | atomic_set(&ioend->io_remaining, 1); | 192 | atomic_set(&ioend->io_remaining, 1); |
164 | ioend->io_uptodate = 1; /* cleared if any I/O fails */ | 193 | ioend->io_uptodate = 1; /* cleared if any I/O fails */ |
194 | ioend->io_list = NULL; | ||
195 | ioend->io_type = type; | ||
165 | ioend->io_vnode = LINVFS_GET_VP(inode); | 196 | ioend->io_vnode = LINVFS_GET_VP(inode); |
166 | ioend->io_buffer_head = NULL; | 197 | ioend->io_buffer_head = NULL; |
198 | ioend->io_buffer_tail = NULL; | ||
167 | atomic_inc(&ioend->io_vnode->v_iocount); | 199 | atomic_inc(&ioend->io_vnode->v_iocount); |
168 | ioend->io_offset = 0; | 200 | ioend->io_offset = 0; |
169 | ioend->io_size = 0; | 201 | ioend->io_size = 0; |
170 | 202 | ||
171 | INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend); | 203 | if (type == IOMAP_UNWRITTEN) |
204 | INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend); | ||
205 | else if (type == IOMAP_DELAY) | ||
206 | INIT_WORK(&ioend->io_work, xfs_end_bio_delalloc, ioend); | ||
207 | else | ||
208 | INIT_WORK(&ioend->io_work, xfs_end_bio_written, ioend); | ||
172 | 209 | ||
173 | return ioend; | 210 | return ioend; |
174 | } | 211 | } |
175 | 212 | ||
176 | void | ||
177 | linvfs_unwritten_done( | ||
178 | struct buffer_head *bh, | ||
179 | int uptodate) | ||
180 | { | ||
181 | xfs_ioend_t *ioend = bh->b_private; | ||
182 | static spinlock_t unwritten_done_lock = SPIN_LOCK_UNLOCKED; | ||
183 | unsigned long flags; | ||
184 | |||
185 | ASSERT(buffer_unwritten(bh)); | ||
186 | bh->b_end_io = NULL; | ||
187 | |||
188 | if (!uptodate) | ||
189 | ioend->io_uptodate = 0; | ||
190 | |||
191 | /* | ||
192 | * Deep magic here. We reuse b_private in the buffer_heads to build | ||
193 | * a chain for completing the I/O from user context after we've issued | ||
194 | * a transaction to convert the unwritten extent. | ||
195 | */ | ||
196 | spin_lock_irqsave(&unwritten_done_lock, flags); | ||
197 | bh->b_private = ioend->io_buffer_head; | ||
198 | ioend->io_buffer_head = bh; | ||
199 | spin_unlock_irqrestore(&unwritten_done_lock, flags); | ||
200 | |||
201 | xfs_finish_ioend(ioend); | ||
202 | } | ||
203 | |||
204 | STATIC int | 213 | STATIC int |
205 | xfs_map_blocks( | 214 | xfs_map_blocks( |
206 | struct inode *inode, | 215 | struct inode *inode, |
@@ -228,7 +237,7 @@ xfs_offset_to_map( | |||
228 | xfs_iomap_t *iomapp, | 237 | xfs_iomap_t *iomapp, |
229 | unsigned long offset) | 238 | unsigned long offset) |
230 | { | 239 | { |
231 | loff_t full_offset; /* offset from start of file */ | 240 | xfs_off_t full_offset; /* offset from start of file */ |
232 | 241 | ||
233 | ASSERT(offset < PAGE_CACHE_SIZE); | 242 | ASSERT(offset < PAGE_CACHE_SIZE); |
234 | 243 | ||
@@ -243,16 +252,223 @@ xfs_offset_to_map( | |||
243 | return NULL; | 252 | return NULL; |
244 | } | 253 | } |
245 | 254 | ||
255 | /* | ||
256 | * BIO completion handler for buffered IO. | ||
257 | */ | ||
258 | STATIC int | ||
259 | xfs_end_bio( | ||
260 | struct bio *bio, | ||
261 | unsigned int bytes_done, | ||
262 | int error) | ||
263 | { | ||
264 | xfs_ioend_t *ioend = bio->bi_private; | ||
265 | |||
266 | if (bio->bi_size) | ||
267 | return 1; | ||
268 | |||
269 | ASSERT(ioend); | ||
270 | ASSERT(atomic_read(&bio->bi_cnt) >= 1); | ||
271 | |||
272 | /* Toss bio and pass work off to an xfsdatad thread */ | ||
273 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) | ||
274 | ioend->io_uptodate = 0; | ||
275 | bio->bi_private = NULL; | ||
276 | bio->bi_end_io = NULL; | ||
277 | |||
278 | bio_put(bio); | ||
279 | xfs_finish_ioend(ioend); | ||
280 | return 0; | ||
281 | } | ||
282 | |||
283 | STATIC void | ||
284 | xfs_submit_ioend_bio( | ||
285 | xfs_ioend_t *ioend, | ||
286 | struct bio *bio) | ||
287 | { | ||
288 | atomic_inc(&ioend->io_remaining); | ||
289 | |||
290 | bio->bi_private = ioend; | ||
291 | bio->bi_end_io = xfs_end_bio; | ||
292 | |||
293 | submit_bio(WRITE, bio); | ||
294 | ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP)); | ||
295 | bio_put(bio); | ||
296 | } | ||
297 | |||
298 | STATIC struct bio * | ||
299 | xfs_alloc_ioend_bio( | ||
300 | struct buffer_head *bh) | ||
301 | { | ||
302 | struct bio *bio; | ||
303 | int nvecs = bio_get_nr_vecs(bh->b_bdev); | ||
304 | |||
305 | do { | ||
306 | bio = bio_alloc(GFP_NOIO, nvecs); | ||
307 | nvecs >>= 1; | ||
308 | } while (!bio); | ||
309 | |||
310 | ASSERT(bio->bi_private == NULL); | ||
311 | bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); | ||
312 | bio->bi_bdev = bh->b_bdev; | ||
313 | bio_get(bio); | ||
314 | return bio; | ||
315 | } | ||
316 | |||
317 | STATIC void | ||
318 | xfs_start_buffer_writeback( | ||
319 | struct buffer_head *bh) | ||
320 | { | ||
321 | ASSERT(buffer_mapped(bh)); | ||
322 | ASSERT(buffer_locked(bh)); | ||
323 | ASSERT(!buffer_delay(bh)); | ||
324 | ASSERT(!buffer_unwritten(bh)); | ||
325 | |||
326 | mark_buffer_async_write(bh); | ||
327 | set_buffer_uptodate(bh); | ||
328 | clear_buffer_dirty(bh); | ||
329 | } | ||
330 | |||
331 | STATIC void | ||
332 | xfs_start_page_writeback( | ||
333 | struct page *page, | ||
334 | struct writeback_control *wbc, | ||
335 | int clear_dirty, | ||
336 | int buffers) | ||
337 | { | ||
338 | ASSERT(PageLocked(page)); | ||
339 | ASSERT(!PageWriteback(page)); | ||
340 | set_page_writeback(page); | ||
341 | if (clear_dirty) | ||
342 | clear_page_dirty(page); | ||
343 | unlock_page(page); | ||
344 | if (!buffers) { | ||
345 | end_page_writeback(page); | ||
346 | wbc->pages_skipped++; /* We didn't write this page */ | ||
347 | } | ||
348 | } | ||
349 | |||
350 | static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh) | ||
351 | { | ||
352 | return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); | ||
353 | } | ||
354 | |||
355 | /* | ||
356 | * Submit all of the bios for all of the ioends we have saved up, | ||
357 | * covering the initial writepage page and also any probed pages. | ||
358 | */ | ||
359 | STATIC void | ||
360 | xfs_submit_ioend( | ||
361 | xfs_ioend_t *ioend) | ||
362 | { | ||
363 | xfs_ioend_t *next; | ||
364 | struct buffer_head *bh; | ||
365 | struct bio *bio; | ||
366 | sector_t lastblock = 0; | ||
367 | |||
368 | do { | ||
369 | next = ioend->io_list; | ||
370 | bio = NULL; | ||
371 | |||
372 | for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { | ||
373 | xfs_start_buffer_writeback(bh); | ||
374 | |||
375 | if (!bio) { | ||
376 | retry: | ||
377 | bio = xfs_alloc_ioend_bio(bh); | ||
378 | } else if (bh->b_blocknr != lastblock + 1) { | ||
379 | xfs_submit_ioend_bio(ioend, bio); | ||
380 | goto retry; | ||
381 | } | ||
382 | |||
383 | if (bio_add_buffer(bio, bh) != bh->b_size) { | ||
384 | xfs_submit_ioend_bio(ioend, bio); | ||
385 | goto retry; | ||
386 | } | ||
387 | |||
388 | lastblock = bh->b_blocknr; | ||
389 | } | ||
390 | if (bio) | ||
391 | xfs_submit_ioend_bio(ioend, bio); | ||
392 | xfs_finish_ioend(ioend); | ||
393 | } while ((ioend = next) != NULL); | ||
394 | } | ||
395 | |||
396 | /* | ||
397 | * Cancel submission of all buffer_heads so far in this endio. | ||
398 | * Toss the endio too. Only ever called for the initial page | ||
399 | * in a writepage request, so only ever one page. | ||
400 | */ | ||
401 | STATIC void | ||
402 | xfs_cancel_ioend( | ||
403 | xfs_ioend_t *ioend) | ||
404 | { | ||
405 | xfs_ioend_t *next; | ||
406 | struct buffer_head *bh, *next_bh; | ||
407 | |||
408 | do { | ||
409 | next = ioend->io_list; | ||
410 | bh = ioend->io_buffer_head; | ||
411 | do { | ||
412 | next_bh = bh->b_private; | ||
413 | clear_buffer_async_write(bh); | ||
414 | unlock_buffer(bh); | ||
415 | } while ((bh = next_bh) != NULL); | ||
416 | |||
417 | vn_iowake(ioend->io_vnode); | ||
418 | mempool_free(ioend, xfs_ioend_pool); | ||
419 | } while ((ioend = next) != NULL); | ||
420 | } | ||
421 | |||
422 | /* | ||
423 | * Test to see if we've been building up a completion structure for | ||
424 | * earlier buffers -- if so, we try to append to this ioend if we | ||
425 | * can, otherwise we finish off any current ioend and start another. | ||
426 | * Return true if we've finished the given ioend. | ||
427 | */ | ||
428 | STATIC void | ||
429 | xfs_add_to_ioend( | ||
430 | struct inode *inode, | ||
431 | struct buffer_head *bh, | ||
432 | unsigned int p_offset, | ||
433 | unsigned int type, | ||
434 | xfs_ioend_t **result, | ||
435 | int need_ioend) | ||
436 | { | ||
437 | xfs_ioend_t *ioend = *result; | ||
438 | |||
439 | if (!ioend || need_ioend || type != ioend->io_type) { | ||
440 | xfs_ioend_t *previous = *result; | ||
441 | xfs_off_t offset; | ||
442 | |||
443 | offset = (xfs_off_t)bh->b_page->index << PAGE_CACHE_SHIFT; | ||
444 | offset += p_offset; | ||
445 | ioend = xfs_alloc_ioend(inode, type); | ||
446 | ioend->io_offset = offset; | ||
447 | ioend->io_buffer_head = bh; | ||
448 | ioend->io_buffer_tail = bh; | ||
449 | if (previous) | ||
450 | previous->io_list = ioend; | ||
451 | *result = ioend; | ||
452 | } else { | ||
453 | ioend->io_buffer_tail->b_private = bh; | ||
454 | ioend->io_buffer_tail = bh; | ||
455 | } | ||
456 | |||
457 | bh->b_private = NULL; | ||
458 | ioend->io_size += bh->b_size; | ||
459 | } | ||
460 | |||
246 | STATIC void | 461 | STATIC void |
247 | xfs_map_at_offset( | 462 | xfs_map_at_offset( |
248 | struct page *page, | 463 | struct page *page, |
249 | struct buffer_head *bh, | 464 | struct buffer_head *bh, |
250 | unsigned long offset, | 465 | unsigned long offset, |
251 | int block_bits, | 466 | int block_bits, |
252 | xfs_iomap_t *iomapp) | 467 | xfs_iomap_t *iomapp, |
468 | xfs_ioend_t *ioend) | ||
253 | { | 469 | { |
254 | xfs_daddr_t bn; | 470 | xfs_daddr_t bn; |
255 | loff_t delta; | 471 | xfs_off_t delta; |
256 | int sector_shift; | 472 | int sector_shift; |
257 | 473 | ||
258 | ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE)); | 474 | ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE)); |
@@ -276,60 +492,7 @@ xfs_map_at_offset( | |||
276 | bh->b_bdev = iomapp->iomap_target->bt_bdev; | 492 | bh->b_bdev = iomapp->iomap_target->bt_bdev; |
277 | set_buffer_mapped(bh); | 493 | set_buffer_mapped(bh); |
278 | clear_buffer_delay(bh); | 494 | clear_buffer_delay(bh); |
279 | } | 495 | clear_buffer_unwritten(bh); |
280 | |||
281 | /* | ||
282 | * Look for a page at index which is unlocked and contains our | ||
283 | * unwritten extent flagged buffers at its head. Returns page | ||
284 | * locked and with an extra reference count, and length of the | ||
285 | * unwritten extent component on this page that we can write, | ||
286 | * in units of filesystem blocks. | ||
287 | */ | ||
288 | STATIC struct page * | ||
289 | xfs_probe_unwritten_page( | ||
290 | struct address_space *mapping, | ||
291 | pgoff_t index, | ||
292 | xfs_iomap_t *iomapp, | ||
293 | xfs_ioend_t *ioend, | ||
294 | unsigned long max_offset, | ||
295 | unsigned long *fsbs, | ||
296 | unsigned int bbits) | ||
297 | { | ||
298 | struct page *page; | ||
299 | |||
300 | page = find_trylock_page(mapping, index); | ||
301 | if (!page) | ||
302 | return NULL; | ||
303 | if (PageWriteback(page)) | ||
304 | goto out; | ||
305 | |||
306 | if (page->mapping && page_has_buffers(page)) { | ||
307 | struct buffer_head *bh, *head; | ||
308 | unsigned long p_offset = 0; | ||
309 | |||
310 | *fsbs = 0; | ||
311 | bh = head = page_buffers(page); | ||
312 | do { | ||
313 | if (!buffer_unwritten(bh) || !buffer_uptodate(bh)) | ||
314 | break; | ||
315 | if (!xfs_offset_to_map(page, iomapp, p_offset)) | ||
316 | break; | ||
317 | if (p_offset >= max_offset) | ||
318 | break; | ||
319 | xfs_map_at_offset(page, bh, p_offset, bbits, iomapp); | ||
320 | set_buffer_unwritten_io(bh); | ||
321 | bh->b_private = ioend; | ||
322 | p_offset += bh->b_size; | ||
323 | (*fsbs)++; | ||
324 | } while ((bh = bh->b_this_page) != head); | ||
325 | |||
326 | if (p_offset) | ||
327 | return page; | ||
328 | } | ||
329 | |||
330 | out: | ||
331 | unlock_page(page); | ||
332 | return NULL; | ||
333 | } | 496 | } |
334 | 497 | ||
335 | /* | 498 | /* |
@@ -372,15 +535,16 @@ out: | |||
372 | return ret; | 535 | return ret; |
373 | } | 536 | } |
374 | 537 | ||
375 | STATIC unsigned int | 538 | STATIC size_t |
376 | xfs_probe_unmapped_cluster( | 539 | xfs_probe_unmapped_cluster( |
377 | struct inode *inode, | 540 | struct inode *inode, |
378 | struct page *startpage, | 541 | struct page *startpage, |
379 | struct buffer_head *bh, | 542 | struct buffer_head *bh, |
380 | struct buffer_head *head) | 543 | struct buffer_head *head) |
381 | { | 544 | { |
545 | size_t len, total = 0; | ||
382 | pgoff_t tindex, tlast, tloff; | 546 | pgoff_t tindex, tlast, tloff; |
383 | unsigned int pg_offset, len, total = 0; | 547 | unsigned int pg_offset; |
384 | struct address_space *mapping = inode->i_mapping; | 548 | struct address_space *mapping = inode->i_mapping; |
385 | 549 | ||
386 | /* First sum forwards in this page */ | 550 | /* First sum forwards in this page */ |
@@ -414,14 +578,15 @@ xfs_probe_unmapped_cluster( | |||
414 | } | 578 | } |
415 | 579 | ||
416 | /* | 580 | /* |
417 | * Probe for a given page (index) in the inode and test if it is delayed | 581 | * Probe for a given page (index) in the inode and test if it is suitable |
418 | * and without unwritten buffers. Returns page locked and with an extra | 582 | * for writing as part of an unwritten or delayed allocate extent. |
419 | * reference count. | 583 | * Returns page locked and with an extra reference count if so, else NULL. |
420 | */ | 584 | */ |
421 | STATIC struct page * | 585 | STATIC struct page * |
422 | xfs_probe_delalloc_page( | 586 | xfs_probe_delayed_page( |
423 | struct inode *inode, | 587 | struct inode *inode, |
424 | pgoff_t index) | 588 | pgoff_t index, |
589 | unsigned int type) | ||
425 | { | 590 | { |
426 | struct page *page; | 591 | struct page *page; |
427 | 592 | ||
@@ -437,12 +602,12 @@ xfs_probe_delalloc_page( | |||
437 | 602 | ||
438 | bh = head = page_buffers(page); | 603 | bh = head = page_buffers(page); |
439 | do { | 604 | do { |
440 | if (buffer_unwritten(bh)) { | 605 | if (buffer_unwritten(bh)) |
441 | acceptable = 0; | 606 | acceptable = (type == IOMAP_UNWRITTEN); |
607 | else if (buffer_delay(bh)) | ||
608 | acceptable = (type == IOMAP_DELAY); | ||
609 | else | ||
442 | break; | 610 | break; |
443 | } else if (buffer_delay(bh)) { | ||
444 | acceptable = 1; | ||
445 | } | ||
446 | } while ((bh = bh->b_this_page) != head); | 611 | } while ((bh = bh->b_this_page) != head); |
447 | 612 | ||
448 | if (acceptable) | 613 | if (acceptable) |
@@ -454,161 +619,30 @@ out: | |||
454 | return NULL; | 619 | return NULL; |
455 | } | 620 | } |
456 | 621 | ||
457 | STATIC int | ||
458 | xfs_map_unwritten( | ||
459 | struct inode *inode, | ||
460 | struct page *start_page, | ||
461 | struct buffer_head *head, | ||
462 | struct buffer_head *curr, | ||
463 | unsigned long p_offset, | ||
464 | int block_bits, | ||
465 | xfs_iomap_t *iomapp, | ||
466 | struct writeback_control *wbc, | ||
467 | int startio, | ||
468 | int all_bh) | ||
469 | { | ||
470 | struct buffer_head *bh = curr; | ||
471 | xfs_iomap_t *tmp; | ||
472 | xfs_ioend_t *ioend; | ||
473 | loff_t offset; | ||
474 | unsigned long nblocks = 0; | ||
475 | |||
476 | offset = start_page->index; | ||
477 | offset <<= PAGE_CACHE_SHIFT; | ||
478 | offset += p_offset; | ||
479 | |||
480 | ioend = xfs_alloc_ioend(inode); | ||
481 | |||
482 | /* First map forwards in the page consecutive buffers | ||
483 | * covering this unwritten extent | ||
484 | */ | ||
485 | do { | ||
486 | if (!buffer_unwritten(bh)) | ||
487 | break; | ||
488 | tmp = xfs_offset_to_map(start_page, iomapp, p_offset); | ||
489 | if (!tmp) | ||
490 | break; | ||
491 | xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp); | ||
492 | set_buffer_unwritten_io(bh); | ||
493 | bh->b_private = ioend; | ||
494 | p_offset += bh->b_size; | ||
495 | nblocks++; | ||
496 | } while ((bh = bh->b_this_page) != head); | ||
497 | |||
498 | atomic_add(nblocks, &ioend->io_remaining); | ||
499 | |||
500 | /* If we reached the end of the page, map forwards in any | ||
501 | * following pages which are also covered by this extent. | ||
502 | */ | ||
503 | if (bh == head) { | ||
504 | struct address_space *mapping = inode->i_mapping; | ||
505 | pgoff_t tindex, tloff, tlast; | ||
506 | unsigned long bs; | ||
507 | unsigned int pg_offset, bbits = inode->i_blkbits; | ||
508 | struct page *page; | ||
509 | |||
510 | tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT; | ||
511 | tloff = (iomapp->iomap_offset + iomapp->iomap_bsize) >> PAGE_CACHE_SHIFT; | ||
512 | tloff = min(tlast, tloff); | ||
513 | for (tindex = start_page->index + 1; tindex < tloff; tindex++) { | ||
514 | page = xfs_probe_unwritten_page(mapping, | ||
515 | tindex, iomapp, ioend, | ||
516 | PAGE_CACHE_SIZE, &bs, bbits); | ||
517 | if (!page) | ||
518 | break; | ||
519 | nblocks += bs; | ||
520 | atomic_add(bs, &ioend->io_remaining); | ||
521 | xfs_convert_page(inode, page, iomapp, wbc, ioend, | ||
522 | startio, all_bh); | ||
523 | /* stop if converting the next page might add | ||
524 | * enough blocks that the corresponding byte | ||
525 | * count won't fit in our ulong page buf length */ | ||
526 | if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits)) | ||
527 | goto enough; | ||
528 | } | ||
529 | |||
530 | if (tindex == tlast && | ||
531 | (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) { | ||
532 | page = xfs_probe_unwritten_page(mapping, | ||
533 | tindex, iomapp, ioend, | ||
534 | pg_offset, &bs, bbits); | ||
535 | if (page) { | ||
536 | nblocks += bs; | ||
537 | atomic_add(bs, &ioend->io_remaining); | ||
538 | xfs_convert_page(inode, page, iomapp, wbc, ioend, | ||
539 | startio, all_bh); | ||
540 | if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits)) | ||
541 | goto enough; | ||
542 | } | ||
543 | } | ||
544 | } | ||
545 | |||
546 | enough: | ||
547 | ioend->io_size = (xfs_off_t)nblocks << block_bits; | ||
548 | ioend->io_offset = offset; | ||
549 | xfs_finish_ioend(ioend); | ||
550 | return 0; | ||
551 | } | ||
552 | |||
553 | STATIC void | ||
554 | xfs_submit_page( | ||
555 | struct page *page, | ||
556 | struct writeback_control *wbc, | ||
557 | struct buffer_head *bh_arr[], | ||
558 | int bh_count, | ||
559 | int probed_page, | ||
560 | int clear_dirty) | ||
561 | { | ||
562 | struct buffer_head *bh; | ||
563 | int i; | ||
564 | |||
565 | BUG_ON(PageWriteback(page)); | ||
566 | if (bh_count) | ||
567 | set_page_writeback(page); | ||
568 | if (clear_dirty) | ||
569 | clear_page_dirty(page); | ||
570 | unlock_page(page); | ||
571 | |||
572 | if (bh_count) { | ||
573 | for (i = 0; i < bh_count; i++) { | ||
574 | bh = bh_arr[i]; | ||
575 | mark_buffer_async_write(bh); | ||
576 | if (buffer_unwritten(bh)) | ||
577 | set_buffer_unwritten_io(bh); | ||
578 | set_buffer_uptodate(bh); | ||
579 | clear_buffer_dirty(bh); | ||
580 | } | ||
581 | |||
582 | for (i = 0; i < bh_count; i++) | ||
583 | submit_bh(WRITE, bh_arr[i]); | ||
584 | |||
585 | if (probed_page && clear_dirty) | ||
586 | wbc->nr_to_write--; /* Wrote an "extra" page */ | ||
587 | } | ||
588 | } | ||
589 | |||
590 | /* | 622 | /* |
591 | * Allocate & map buffers for page given the extent map. Write it out. | 623 | * Allocate & map buffers for page given the extent map. Write it out. |
592 | * except for the original page of a writepage, this is called on | 624 | * except for the original page of a writepage, this is called on |
593 | * delalloc/unwritten pages only, for the original page it is possible | 625 | * delalloc/unwritten pages only, for the original page it is possible |
594 | * that the page has no mapping at all. | 626 | * that the page has no mapping at all. |
595 | */ | 627 | */ |
596 | STATIC void | 628 | STATIC int |
597 | xfs_convert_page( | 629 | xfs_convert_page( |
598 | struct inode *inode, | 630 | struct inode *inode, |
599 | struct page *page, | 631 | struct page *page, |
600 | xfs_iomap_t *iomapp, | 632 | xfs_iomap_t *iomapp, |
633 | xfs_ioend_t **ioendp, | ||
601 | struct writeback_control *wbc, | 634 | struct writeback_control *wbc, |
602 | void *private, | 635 | void *private, |
603 | int startio, | 636 | int startio, |
604 | int all_bh) | 637 | int all_bh) |
605 | { | 638 | { |
606 | struct buffer_head *bh_arr[MAX_BUF_PER_PAGE], *bh, *head; | 639 | struct buffer_head *bh, *head; |
607 | xfs_iomap_t *mp = iomapp, *tmp; | 640 | xfs_iomap_t *mp = iomapp, *tmp; |
608 | unsigned long offset, end_offset; | 641 | unsigned long p_offset, end_offset; |
609 | int index = 0; | 642 | unsigned int type; |
610 | int bbits = inode->i_blkbits; | 643 | int bbits = inode->i_blkbits; |
611 | int len, page_dirty; | 644 | int len, page_dirty; |
645 | int count = 0, done = 0, uptodate = 1; | ||
612 | 646 | ||
613 | end_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)); | 647 | end_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)); |
614 | 648 | ||
@@ -621,59 +655,66 @@ xfs_convert_page( | |||
621 | end_offset = roundup(end_offset, len); | 655 | end_offset = roundup(end_offset, len); |
622 | page_dirty = end_offset / len; | 656 | page_dirty = end_offset / len; |
623 | 657 | ||
624 | offset = 0; | 658 | p_offset = 0; |
625 | bh = head = page_buffers(page); | 659 | bh = head = page_buffers(page); |
626 | do { | 660 | do { |
627 | if (offset >= end_offset) | 661 | if (p_offset >= end_offset) |
628 | break; | 662 | break; |
629 | if (!(PageUptodate(page) || buffer_uptodate(bh))) | 663 | if (!buffer_uptodate(bh)) |
664 | uptodate = 0; | ||
665 | if (!(PageUptodate(page) || buffer_uptodate(bh))) { | ||
666 | done = 1; | ||
630 | continue; | 667 | continue; |
631 | if (buffer_mapped(bh) && all_bh && | 668 | } |
632 | !(buffer_unwritten(bh) || buffer_delay(bh))) { | 669 | |
633 | if (startio) { | 670 | if (buffer_unwritten(bh)) |
671 | type = IOMAP_UNWRITTEN; | ||
672 | else if (buffer_delay(bh)) | ||
673 | type = IOMAP_DELAY; | ||
674 | else { | ||
675 | type = 0; | ||
676 | if (!(buffer_mapped(bh) && all_bh && startio)) { | ||
677 | done = 1; | ||
678 | } else if (startio) { | ||
634 | lock_buffer(bh); | 679 | lock_buffer(bh); |
635 | bh_arr[index++] = bh; | 680 | xfs_add_to_ioend(inode, bh, p_offset, |
681 | type, ioendp, done); | ||
682 | count++; | ||
636 | page_dirty--; | 683 | page_dirty--; |
637 | } | 684 | } |
638 | continue; | 685 | continue; |
639 | } | 686 | } |
640 | tmp = xfs_offset_to_map(page, mp, offset); | 687 | tmp = xfs_offset_to_map(page, mp, p_offset); |
641 | if (!tmp) | 688 | if (!tmp) { |
689 | done = 1; | ||
642 | continue; | 690 | continue; |
691 | } | ||
643 | ASSERT(!(tmp->iomap_flags & IOMAP_HOLE)); | 692 | ASSERT(!(tmp->iomap_flags & IOMAP_HOLE)); |
644 | ASSERT(!(tmp->iomap_flags & IOMAP_DELAY)); | 693 | ASSERT(!(tmp->iomap_flags & IOMAP_DELAY)); |
645 | 694 | ||
646 | /* If this is a new unwritten extent buffer (i.e. one | 695 | xfs_map_at_offset(page, bh, p_offset, bbits, tmp, *ioendp); |
647 | * that we haven't passed in private data for, we must | ||
648 | * now map this buffer too. | ||
649 | */ | ||
650 | if (buffer_unwritten(bh) && !bh->b_end_io) { | ||
651 | ASSERT(tmp->iomap_flags & IOMAP_UNWRITTEN); | ||
652 | xfs_map_unwritten(inode, page, head, bh, offset, | ||
653 | bbits, tmp, wbc, startio, all_bh); | ||
654 | } else if (! (buffer_unwritten(bh) && buffer_locked(bh))) { | ||
655 | xfs_map_at_offset(page, bh, offset, bbits, tmp); | ||
656 | if (buffer_unwritten(bh)) { | ||
657 | set_buffer_unwritten_io(bh); | ||
658 | bh->b_private = private; | ||
659 | ASSERT(private); | ||
660 | } | ||
661 | } | ||
662 | if (startio) { | 696 | if (startio) { |
663 | bh_arr[index++] = bh; | 697 | xfs_add_to_ioend(inode, bh, p_offset, |
698 | type, ioendp, done); | ||
699 | count++; | ||
664 | } else { | 700 | } else { |
665 | set_buffer_dirty(bh); | 701 | set_buffer_dirty(bh); |
666 | unlock_buffer(bh); | 702 | unlock_buffer(bh); |
667 | mark_buffer_dirty(bh); | 703 | mark_buffer_dirty(bh); |
668 | } | 704 | } |
669 | page_dirty--; | 705 | page_dirty--; |
670 | } while (offset += len, (bh = bh->b_this_page) != head); | 706 | } while (p_offset += len, (bh = bh->b_this_page) != head); |
671 | 707 | ||
672 | if (startio && index) { | 708 | if (uptodate && bh == head) |
673 | xfs_submit_page(page, wbc, bh_arr, index, 1, !page_dirty); | 709 | SetPageUptodate(page); |
674 | } else { | 710 | |
675 | unlock_page(page); | 711 | if (startio) { |
712 | if (count) | ||
713 | wbc->nr_to_write--; | ||
714 | xfs_start_page_writeback(page, wbc, !page_dirty, count); | ||
676 | } | 715 | } |
716 | |||
717 | return done; | ||
677 | } | 718 | } |
678 | 719 | ||
679 | /* | 720 | /* |
@@ -685,19 +726,22 @@ xfs_cluster_write( | |||
685 | struct inode *inode, | 726 | struct inode *inode, |
686 | pgoff_t tindex, | 727 | pgoff_t tindex, |
687 | xfs_iomap_t *iomapp, | 728 | xfs_iomap_t *iomapp, |
729 | xfs_ioend_t **ioendp, | ||
688 | struct writeback_control *wbc, | 730 | struct writeback_control *wbc, |
689 | int startio, | 731 | int startio, |
690 | int all_bh, | 732 | int all_bh, |
691 | pgoff_t tlast) | 733 | pgoff_t tlast) |
692 | { | 734 | { |
693 | struct page *page; | 735 | struct page *page; |
736 | unsigned int type = (*ioendp)->io_type; | ||
737 | int done; | ||
694 | 738 | ||
695 | for (; tindex <= tlast; tindex++) { | 739 | for (done = 0; tindex <= tlast && !done; tindex++) { |
696 | page = xfs_probe_delalloc_page(inode, tindex); | 740 | page = xfs_probe_delayed_page(inode, tindex, type); |
697 | if (!page) | 741 | if (!page) |
698 | break; | 742 | break; |
699 | xfs_convert_page(inode, page, iomapp, wbc, NULL, | 743 | done = xfs_convert_page(inode, page, iomapp, ioendp, |
700 | startio, all_bh); | 744 | wbc, NULL, startio, all_bh); |
701 | } | 745 | } |
702 | } | 746 | } |
703 | 747 | ||
@@ -728,18 +772,21 @@ xfs_page_state_convert( | |||
728 | int startio, | 772 | int startio, |
729 | int unmapped) /* also implies page uptodate */ | 773 | int unmapped) /* also implies page uptodate */ |
730 | { | 774 | { |
731 | struct buffer_head *bh_arr[MAX_BUF_PER_PAGE], *bh, *head; | 775 | struct buffer_head *bh, *head; |
732 | xfs_iomap_t *iomp, iomap; | 776 | xfs_iomap_t *iomp, iomap; |
777 | xfs_ioend_t *ioend = NULL, *iohead = NULL; | ||
733 | loff_t offset; | 778 | loff_t offset; |
734 | unsigned long p_offset = 0; | 779 | unsigned long p_offset = 0; |
780 | unsigned int type; | ||
735 | __uint64_t end_offset; | 781 | __uint64_t end_offset; |
736 | pgoff_t end_index, last_index, tlast; | 782 | pgoff_t end_index, last_index, tlast; |
737 | int len, err, i, cnt = 0, uptodate = 1; | 783 | int flags, len, err, done = 1; |
738 | int flags; | 784 | int uptodate = 1; |
739 | int page_dirty; | 785 | int page_dirty, count = 0, trylock_flag = 0; |
740 | 786 | ||
741 | /* wait for other IO threads? */ | 787 | /* wait for other IO threads? */ |
742 | flags = (startio && wbc->sync_mode != WB_SYNC_NONE) ? 0 : BMAPI_TRYLOCK; | 788 | if (startio && wbc->sync_mode != WB_SYNC_NONE) |
789 | trylock_flag |= BMAPI_TRYLOCK; | ||
743 | 790 | ||
744 | /* Is this page beyond the end of the file? */ | 791 | /* Is this page beyond the end of the file? */ |
745 | offset = i_size_read(inode); | 792 | offset = i_size_read(inode); |
@@ -754,98 +801,98 @@ xfs_page_state_convert( | |||
754 | } | 801 | } |
755 | } | 802 | } |
756 | 803 | ||
757 | end_offset = min_t(unsigned long long, | ||
758 | (loff_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset); | ||
759 | offset = (loff_t)page->index << PAGE_CACHE_SHIFT; | ||
760 | |||
761 | /* | 804 | /* |
762 | * page_dirty is initially a count of buffers on the page before | 805 | * page_dirty is initially a count of buffers on the page before |
763 | * EOF and is decrememted as we move each into a cleanable state. | 806 | * EOF and is decrememted as we move each into a cleanable state. |
764 | */ | 807 | * |
808 | * Derivation: | ||
809 | * | ||
810 | * End offset is the highest offset that this page should represent. | ||
811 | * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1)) | ||
812 | * will evaluate non-zero and be less than PAGE_CACHE_SIZE and | ||
813 | * hence give us the correct page_dirty count. On any other page, | ||
814 | * it will be zero and in that case we need page_dirty to be the | ||
815 | * count of buffers on the page. | ||
816 | */ | ||
817 | end_offset = min_t(unsigned long long, | ||
818 | (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset); | ||
765 | len = 1 << inode->i_blkbits; | 819 | len = 1 << inode->i_blkbits; |
766 | p_offset = max(p_offset, PAGE_CACHE_SIZE); | 820 | p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1), |
767 | p_offset = roundup(p_offset, len); | 821 | PAGE_CACHE_SIZE); |
822 | p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; | ||
768 | page_dirty = p_offset / len; | 823 | page_dirty = p_offset / len; |
769 | 824 | ||
770 | iomp = NULL; | 825 | iomp = NULL; |
771 | p_offset = 0; | ||
772 | bh = head = page_buffers(page); | 826 | bh = head = page_buffers(page); |
827 | offset = page_offset(page); | ||
828 | |||
829 | /* TODO: fix up "done" variable and iomap pointer (boolean) */ | ||
830 | /* TODO: cleanup count and page_dirty */ | ||
773 | 831 | ||
774 | do { | 832 | do { |
775 | if (offset >= end_offset) | 833 | if (offset >= end_offset) |
776 | break; | 834 | break; |
777 | if (!buffer_uptodate(bh)) | 835 | if (!buffer_uptodate(bh)) |
778 | uptodate = 0; | 836 | uptodate = 0; |
779 | if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) | 837 | if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) { |
838 | done = 1; | ||
780 | continue; | 839 | continue; |
840 | } | ||
781 | 841 | ||
782 | if (iomp) { | 842 | if (iomp) { |
783 | iomp = xfs_offset_to_map(page, &iomap, p_offset); | 843 | iomp = xfs_offset_to_map(page, &iomap, p_offset); |
844 | done = (iomp == NULL); | ||
784 | } | 845 | } |
785 | 846 | ||
786 | /* | 847 | /* |
787 | * First case, map an unwritten extent and prepare for | 848 | * First case, map an unwritten extent and prepare for |
788 | * extent state conversion transaction on completion. | 849 | * extent state conversion transaction on completion. |
789 | */ | 850 | * |
790 | if (buffer_unwritten(bh)) { | ||
791 | if (!startio) | ||
792 | continue; | ||
793 | if (!iomp) { | ||
794 | err = xfs_map_blocks(inode, offset, len, &iomap, | ||
795 | BMAPI_WRITE|BMAPI_IGNSTATE); | ||
796 | if (err) { | ||
797 | goto error; | ||
798 | } | ||
799 | iomp = xfs_offset_to_map(page, &iomap, | ||
800 | p_offset); | ||
801 | } | ||
802 | if (iomp) { | ||
803 | if (!bh->b_end_io) { | ||
804 | err = xfs_map_unwritten(inode, page, | ||
805 | head, bh, p_offset, | ||
806 | inode->i_blkbits, iomp, | ||
807 | wbc, startio, unmapped); | ||
808 | if (err) { | ||
809 | goto error; | ||
810 | } | ||
811 | } else { | ||
812 | set_bit(BH_Lock, &bh->b_state); | ||
813 | } | ||
814 | BUG_ON(!buffer_locked(bh)); | ||
815 | bh_arr[cnt++] = bh; | ||
816 | page_dirty--; | ||
817 | } | ||
818 | /* | ||
819 | * Second case, allocate space for a delalloc buffer. | 851 | * Second case, allocate space for a delalloc buffer. |
820 | * We can return EAGAIN here in the release page case. | 852 | * We can return EAGAIN here in the release page case. |
821 | */ | 853 | */ |
822 | } else if (buffer_delay(bh)) { | 854 | if (buffer_unwritten(bh) || buffer_delay(bh)) { |
855 | if (buffer_unwritten(bh)) { | ||
856 | type = IOMAP_UNWRITTEN; | ||
857 | flags = BMAPI_WRITE|BMAPI_IGNSTATE; | ||
858 | } else { | ||
859 | type = IOMAP_DELAY; | ||
860 | flags = BMAPI_ALLOCATE; | ||
861 | if (!startio) | ||
862 | flags |= trylock_flag; | ||
863 | } | ||
864 | |||
823 | if (!iomp) { | 865 | if (!iomp) { |
866 | done = 1; | ||
824 | err = xfs_map_blocks(inode, offset, len, &iomap, | 867 | err = xfs_map_blocks(inode, offset, len, &iomap, |
825 | BMAPI_ALLOCATE | flags); | 868 | flags); |
826 | if (err) { | 869 | if (err) |
827 | goto error; | 870 | goto error; |
828 | } | ||
829 | iomp = xfs_offset_to_map(page, &iomap, | 871 | iomp = xfs_offset_to_map(page, &iomap, |
830 | p_offset); | 872 | p_offset); |
873 | done = (iomp == NULL); | ||
831 | } | 874 | } |
832 | if (iomp) { | 875 | if (iomp) { |
833 | xfs_map_at_offset(page, bh, p_offset, | 876 | xfs_map_at_offset(page, bh, p_offset, |
834 | inode->i_blkbits, iomp); | 877 | inode->i_blkbits, iomp, ioend); |
835 | if (startio) { | 878 | if (startio) { |
836 | bh_arr[cnt++] = bh; | 879 | xfs_add_to_ioend(inode, bh, p_offset, |
880 | type, &ioend, done); | ||
837 | } else { | 881 | } else { |
838 | set_buffer_dirty(bh); | 882 | set_buffer_dirty(bh); |
839 | unlock_buffer(bh); | 883 | unlock_buffer(bh); |
840 | mark_buffer_dirty(bh); | 884 | mark_buffer_dirty(bh); |
841 | } | 885 | } |
842 | page_dirty--; | 886 | page_dirty--; |
887 | count++; | ||
888 | } else { | ||
889 | done = 1; | ||
843 | } | 890 | } |
844 | } else if ((buffer_uptodate(bh) || PageUptodate(page)) && | 891 | } else if ((buffer_uptodate(bh) || PageUptodate(page)) && |
845 | (unmapped || startio)) { | 892 | (unmapped || startio)) { |
846 | 893 | ||
894 | type = 0; | ||
847 | if (!buffer_mapped(bh)) { | 895 | if (!buffer_mapped(bh)) { |
848 | int size; | ||
849 | 896 | ||
850 | /* | 897 | /* |
851 | * Getting here implies an unmapped buffer | 898 | * Getting here implies an unmapped buffer |
@@ -853,6 +900,8 @@ xfs_page_state_convert( | |||
853 | * need to write the whole page out. | 900 | * need to write the whole page out. |
854 | */ | 901 | */ |
855 | if (!iomp) { | 902 | if (!iomp) { |
903 | int size; | ||
904 | |||
856 | size = xfs_probe_unmapped_cluster( | 905 | size = xfs_probe_unmapped_cluster( |
857 | inode, page, bh, head); | 906 | inode, page, bh, head); |
858 | err = xfs_map_blocks(inode, offset, | 907 | err = xfs_map_blocks(inode, offset, |
@@ -863,52 +912,70 @@ xfs_page_state_convert( | |||
863 | } | 912 | } |
864 | iomp = xfs_offset_to_map(page, &iomap, | 913 | iomp = xfs_offset_to_map(page, &iomap, |
865 | p_offset); | 914 | p_offset); |
915 | done = (iomp == NULL); | ||
866 | } | 916 | } |
867 | if (iomp) { | 917 | if (iomp) { |
868 | xfs_map_at_offset(page, | 918 | xfs_map_at_offset(page, bh, p_offset, |
869 | bh, p_offset, | 919 | inode->i_blkbits, iomp, |
870 | inode->i_blkbits, iomp); | 920 | ioend); |
871 | if (startio) { | 921 | if (startio) { |
872 | bh_arr[cnt++] = bh; | 922 | xfs_add_to_ioend(inode, |
923 | bh, p_offset, type, | ||
924 | &ioend, done); | ||
873 | } else { | 925 | } else { |
874 | set_buffer_dirty(bh); | 926 | set_buffer_dirty(bh); |
875 | unlock_buffer(bh); | 927 | unlock_buffer(bh); |
876 | mark_buffer_dirty(bh); | 928 | mark_buffer_dirty(bh); |
877 | } | 929 | } |
878 | page_dirty--; | 930 | page_dirty--; |
931 | count++; | ||
932 | } else { | ||
933 | done = 1; | ||
879 | } | 934 | } |
880 | } else if (startio) { | 935 | } else if (startio) { |
881 | if (buffer_uptodate(bh) && | 936 | if (buffer_uptodate(bh) && |
882 | !test_and_set_bit(BH_Lock, &bh->b_state)) { | 937 | !test_and_set_bit(BH_Lock, &bh->b_state)) { |
883 | bh_arr[cnt++] = bh; | 938 | ASSERT(buffer_mapped(bh)); |
939 | xfs_add_to_ioend(inode, | ||
940 | bh, p_offset, type, | ||
941 | &ioend, done); | ||
884 | page_dirty--; | 942 | page_dirty--; |
943 | count++; | ||
944 | } else { | ||
945 | done = 1; | ||
885 | } | 946 | } |
947 | } else { | ||
948 | done = 1; | ||
886 | } | 949 | } |
887 | } | 950 | } |
888 | } while (offset += len, p_offset += len, | 951 | |
889 | ((bh = bh->b_this_page) != head)); | 952 | if (!iohead) |
953 | iohead = ioend; | ||
954 | |||
955 | } while (offset += len, ((bh = bh->b_this_page) != head)); | ||
890 | 956 | ||
891 | if (uptodate && bh == head) | 957 | if (uptodate && bh == head) |
892 | SetPageUptodate(page); | 958 | SetPageUptodate(page); |
893 | 959 | ||
894 | if (startio) { | 960 | if (startio) |
895 | xfs_submit_page(page, wbc, bh_arr, cnt, 0, !page_dirty); | 961 | xfs_start_page_writeback(page, wbc, 1, count); |
896 | } | ||
897 | 962 | ||
898 | if (iomp) { | 963 | if (ioend && iomp && !done) { |
899 | offset = (iomp->iomap_offset + iomp->iomap_bsize - 1) >> | 964 | offset = (iomp->iomap_offset + iomp->iomap_bsize - 1) >> |
900 | PAGE_CACHE_SHIFT; | 965 | PAGE_CACHE_SHIFT; |
901 | tlast = min_t(pgoff_t, offset, last_index); | 966 | tlast = min_t(pgoff_t, offset, last_index); |
902 | xfs_cluster_write(inode, page->index + 1, iomp, wbc, | 967 | xfs_cluster_write(inode, page->index + 1, iomp, &ioend, |
903 | startio, unmapped, tlast); | 968 | wbc, startio, unmapped, tlast); |
904 | } | 969 | } |
905 | 970 | ||
971 | if (iohead) | ||
972 | xfs_submit_ioend(iohead); | ||
973 | |||
906 | return page_dirty; | 974 | return page_dirty; |
907 | 975 | ||
908 | error: | 976 | error: |
909 | for (i = 0; i < cnt; i++) { | 977 | if (iohead) |
910 | unlock_buffer(bh_arr[i]); | 978 | xfs_cancel_ioend(iohead); |
911 | } | ||
912 | 979 | ||
913 | /* | 980 | /* |
914 | * If it's delalloc and we have nowhere to put it, | 981 | * If it's delalloc and we have nowhere to put it, |
@@ -916,9 +983,8 @@ error: | |||
916 | * us to try again. | 983 | * us to try again. |
917 | */ | 984 | */ |
918 | if (err != -EAGAIN) { | 985 | if (err != -EAGAIN) { |
919 | if (!unmapped) { | 986 | if (!unmapped) |
920 | block_invalidatepage(page, 0); | 987 | block_invalidatepage(page, 0); |
921 | } | ||
922 | ClearPageUptodate(page); | 988 | ClearPageUptodate(page); |
923 | } | 989 | } |
924 | return err; | 990 | return err; |
@@ -1094,7 +1160,7 @@ linvfs_direct_IO( | |||
1094 | if (error) | 1160 | if (error) |
1095 | return -error; | 1161 | return -error; |
1096 | 1162 | ||
1097 | iocb->private = xfs_alloc_ioend(inode); | 1163 | iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN); |
1098 | 1164 | ||
1099 | ret = blockdev_direct_IO_own_locking(rw, iocb, inode, | 1165 | ret = blockdev_direct_IO_own_locking(rw, iocb, inode, |
1100 | iomap.iomap_target->bt_bdev, | 1166 | iomap.iomap_target->bt_bdev, |
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h index 4720758a9ade..55339dd5a30d 100644 --- a/fs/xfs/linux-2.6/xfs_aops.h +++ b/fs/xfs/linux-2.6/xfs_aops.h | |||
@@ -23,14 +23,24 @@ extern mempool_t *xfs_ioend_pool; | |||
23 | 23 | ||
24 | typedef void (*xfs_ioend_func_t)(void *); | 24 | typedef void (*xfs_ioend_func_t)(void *); |
25 | 25 | ||
26 | /* | ||
27 | * xfs_ioend struct manages large extent writes for XFS. | ||
28 | * It can manage several multi-page bio's at once. | ||
29 | */ | ||
26 | typedef struct xfs_ioend { | 30 | typedef struct xfs_ioend { |
31 | struct xfs_ioend *io_list; /* next ioend in chain */ | ||
32 | unsigned int io_type; /* delalloc / unwritten */ | ||
27 | unsigned int io_uptodate; /* I/O status register */ | 33 | unsigned int io_uptodate; /* I/O status register */ |
28 | atomic_t io_remaining; /* hold count */ | 34 | atomic_t io_remaining; /* hold count */ |
29 | struct vnode *io_vnode; /* file being written to */ | 35 | struct vnode *io_vnode; /* file being written to */ |
30 | struct buffer_head *io_buffer_head;/* buffer linked list head */ | 36 | struct buffer_head *io_buffer_head;/* buffer linked list head */ |
37 | struct buffer_head *io_buffer_tail;/* buffer linked list tail */ | ||
31 | size_t io_size; /* size of the extent */ | 38 | size_t io_size; /* size of the extent */ |
32 | xfs_off_t io_offset; /* offset in the file */ | 39 | xfs_off_t io_offset; /* offset in the file */ |
33 | struct work_struct io_work; /* xfsdatad work queue */ | 40 | struct work_struct io_work; /* xfsdatad work queue */ |
34 | } xfs_ioend_t; | 41 | } xfs_ioend_t; |
35 | 42 | ||
43 | extern struct address_space_operations linvfs_aops; | ||
44 | extern int linvfs_get_block(struct inode *, sector_t, struct buffer_head *, int); | ||
45 | |||
36 | #endif /* __XFS_IOPS_H__ */ | 46 | #endif /* __XFS_IOPS_H__ */ |
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h index ee784b63acbf..6899a6b4a50a 100644 --- a/fs/xfs/linux-2.6/xfs_iops.h +++ b/fs/xfs/linux-2.6/xfs_iops.h | |||
@@ -26,11 +26,6 @@ extern struct file_operations linvfs_file_operations; | |||
26 | extern struct file_operations linvfs_invis_file_operations; | 26 | extern struct file_operations linvfs_invis_file_operations; |
27 | extern struct file_operations linvfs_dir_operations; | 27 | extern struct file_operations linvfs_dir_operations; |
28 | 28 | ||
29 | extern struct address_space_operations linvfs_aops; | ||
30 | |||
31 | extern int linvfs_get_block(struct inode *, sector_t, struct buffer_head *, int); | ||
32 | extern void linvfs_unwritten_done(struct buffer_head *, int); | ||
33 | |||
34 | extern int xfs_ioctl(struct bhv_desc *, struct inode *, struct file *, | 29 | extern int xfs_ioctl(struct bhv_desc *, struct inode *, struct file *, |
35 | int, unsigned int, void __user *); | 30 | int, unsigned int, void __user *); |
36 | 31 | ||
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 95efe948a095..67389b745526 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h | |||
@@ -110,10 +110,6 @@ | |||
110 | * delalloc and these ondisk-uninitialised buffers. | 110 | * delalloc and these ondisk-uninitialised buffers. |
111 | */ | 111 | */ |
112 | BUFFER_FNS(PrivateStart, unwritten); | 112 | BUFFER_FNS(PrivateStart, unwritten); |
113 | static inline void set_buffer_unwritten_io(struct buffer_head *bh) | ||
114 | { | ||
115 | bh->b_end_io = linvfs_unwritten_done; | ||
116 | } | ||
117 | 113 | ||
118 | #define restricted_chown xfs_params.restrict_chown.val | 114 | #define restricted_chown xfs_params.restrict_chown.val |
119 | #define irix_sgid_inherit xfs_params.sgid_inherit.val | 115 | #define irix_sgid_inherit xfs_params.sgid_inherit.val |