diff options
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_aops.c')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_aops.c | 1111 |
1 files changed, 613 insertions, 498 deletions
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 94d3cdfbf9b8..120626789406 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -40,11 +40,10 @@ | |||
40 | #include "xfs_rw.h" | 40 | #include "xfs_rw.h" |
41 | #include "xfs_iomap.h" | 41 | #include "xfs_iomap.h" |
42 | #include <linux/mpage.h> | 42 | #include <linux/mpage.h> |
43 | #include <linux/pagevec.h> | ||
43 | #include <linux/writeback.h> | 44 | #include <linux/writeback.h> |
44 | 45 | ||
45 | STATIC void xfs_count_page_state(struct page *, int *, int *, int *); | 46 | STATIC void xfs_count_page_state(struct page *, int *, int *, int *); |
46 | STATIC void xfs_convert_page(struct inode *, struct page *, xfs_iomap_t *, | ||
47 | struct writeback_control *wbc, void *, int, int); | ||
48 | 47 | ||
49 | #if defined(XFS_RW_TRACE) | 48 | #if defined(XFS_RW_TRACE) |
50 | void | 49 | void |
@@ -55,17 +54,15 @@ xfs_page_trace( | |||
55 | int mask) | 54 | int mask) |
56 | { | 55 | { |
57 | xfs_inode_t *ip; | 56 | xfs_inode_t *ip; |
58 | bhv_desc_t *bdp; | ||
59 | vnode_t *vp = LINVFS_GET_VP(inode); | 57 | vnode_t *vp = LINVFS_GET_VP(inode); |
60 | loff_t isize = i_size_read(inode); | 58 | loff_t isize = i_size_read(inode); |
61 | loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT; | 59 | loff_t offset = page_offset(page); |
62 | int delalloc = -1, unmapped = -1, unwritten = -1; | 60 | int delalloc = -1, unmapped = -1, unwritten = -1; |
63 | 61 | ||
64 | if (page_has_buffers(page)) | 62 | if (page_has_buffers(page)) |
65 | xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); | 63 | xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); |
66 | 64 | ||
67 | bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops); | 65 | ip = xfs_vtoi(vp); |
68 | ip = XFS_BHVTOI(bdp); | ||
69 | if (!ip->i_rwtrace) | 66 | if (!ip->i_rwtrace) |
70 | return; | 67 | return; |
71 | 68 | ||
@@ -103,15 +100,56 @@ xfs_finish_ioend( | |||
103 | queue_work(xfsdatad_workqueue, &ioend->io_work); | 100 | queue_work(xfsdatad_workqueue, &ioend->io_work); |
104 | } | 101 | } |
105 | 102 | ||
103 | /* | ||
104 | * We're now finished for good with this ioend structure. | ||
105 | * Update the page state via the associated buffer_heads, | ||
106 | * release holds on the inode and bio, and finally free | ||
107 | * up memory. Do not use the ioend after this. | ||
108 | */ | ||
106 | STATIC void | 109 | STATIC void |
107 | xfs_destroy_ioend( | 110 | xfs_destroy_ioend( |
108 | xfs_ioend_t *ioend) | 111 | xfs_ioend_t *ioend) |
109 | { | 112 | { |
113 | struct buffer_head *bh, *next; | ||
114 | |||
115 | for (bh = ioend->io_buffer_head; bh; bh = next) { | ||
116 | next = bh->b_private; | ||
117 | bh->b_end_io(bh, ioend->io_uptodate); | ||
118 | } | ||
119 | |||
110 | vn_iowake(ioend->io_vnode); | 120 | vn_iowake(ioend->io_vnode); |
111 | mempool_free(ioend, xfs_ioend_pool); | 121 | mempool_free(ioend, xfs_ioend_pool); |
112 | } | 122 | } |
113 | 123 | ||
114 | /* | 124 | /* |
125 | * Buffered IO write completion for delayed allocate extents. | ||
126 | * TODO: Update ondisk isize now that we know the file data | ||
127 | * has been flushed (i.e. the notorious "NULL file" problem). | ||
128 | */ | ||
129 | STATIC void | ||
130 | xfs_end_bio_delalloc( | ||
131 | void *data) | ||
132 | { | ||
133 | xfs_ioend_t *ioend = data; | ||
134 | |||
135 | xfs_destroy_ioend(ioend); | ||
136 | } | ||
137 | |||
138 | /* | ||
139 | * Buffered IO write completion for regular, written extents. | ||
140 | */ | ||
141 | STATIC void | ||
142 | xfs_end_bio_written( | ||
143 | void *data) | ||
144 | { | ||
145 | xfs_ioend_t *ioend = data; | ||
146 | |||
147 | xfs_destroy_ioend(ioend); | ||
148 | } | ||
149 | |||
150 | /* | ||
151 | * IO write completion for unwritten extents. | ||
152 | * | ||
115 | * Issue transactions to convert a buffer range from unwritten | 153 | * Issue transactions to convert a buffer range from unwritten |
116 | * to written extents. | 154 | * to written extents. |
117 | */ | 155 | */ |
@@ -123,21 +161,10 @@ xfs_end_bio_unwritten( | |||
123 | vnode_t *vp = ioend->io_vnode; | 161 | vnode_t *vp = ioend->io_vnode; |
124 | xfs_off_t offset = ioend->io_offset; | 162 | xfs_off_t offset = ioend->io_offset; |
125 | size_t size = ioend->io_size; | 163 | size_t size = ioend->io_size; |
126 | struct buffer_head *bh, *next; | ||
127 | int error; | 164 | int error; |
128 | 165 | ||
129 | if (ioend->io_uptodate) | 166 | if (ioend->io_uptodate) |
130 | VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error); | 167 | VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error); |
131 | |||
132 | /* ioend->io_buffer_head is only non-NULL for buffered I/O */ | ||
133 | for (bh = ioend->io_buffer_head; bh; bh = next) { | ||
134 | next = bh->b_private; | ||
135 | |||
136 | bh->b_end_io = NULL; | ||
137 | clear_buffer_unwritten(bh); | ||
138 | end_buffer_async_write(bh, ioend->io_uptodate); | ||
139 | } | ||
140 | |||
141 | xfs_destroy_ioend(ioend); | 168 | xfs_destroy_ioend(ioend); |
142 | } | 169 | } |
143 | 170 | ||
@@ -149,7 +176,8 @@ xfs_end_bio_unwritten( | |||
149 | */ | 176 | */ |
150 | STATIC xfs_ioend_t * | 177 | STATIC xfs_ioend_t * |
151 | xfs_alloc_ioend( | 178 | xfs_alloc_ioend( |
152 | struct inode *inode) | 179 | struct inode *inode, |
180 | unsigned int type) | ||
153 | { | 181 | { |
154 | xfs_ioend_t *ioend; | 182 | xfs_ioend_t *ioend; |
155 | 183 | ||
@@ -162,45 +190,25 @@ xfs_alloc_ioend( | |||
162 | */ | 190 | */ |
163 | atomic_set(&ioend->io_remaining, 1); | 191 | atomic_set(&ioend->io_remaining, 1); |
164 | ioend->io_uptodate = 1; /* cleared if any I/O fails */ | 192 | ioend->io_uptodate = 1; /* cleared if any I/O fails */ |
193 | ioend->io_list = NULL; | ||
194 | ioend->io_type = type; | ||
165 | ioend->io_vnode = LINVFS_GET_VP(inode); | 195 | ioend->io_vnode = LINVFS_GET_VP(inode); |
166 | ioend->io_buffer_head = NULL; | 196 | ioend->io_buffer_head = NULL; |
197 | ioend->io_buffer_tail = NULL; | ||
167 | atomic_inc(&ioend->io_vnode->v_iocount); | 198 | atomic_inc(&ioend->io_vnode->v_iocount); |
168 | ioend->io_offset = 0; | 199 | ioend->io_offset = 0; |
169 | ioend->io_size = 0; | 200 | ioend->io_size = 0; |
170 | 201 | ||
171 | INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend); | 202 | if (type == IOMAP_UNWRITTEN) |
203 | INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend); | ||
204 | else if (type == IOMAP_DELAY) | ||
205 | INIT_WORK(&ioend->io_work, xfs_end_bio_delalloc, ioend); | ||
206 | else | ||
207 | INIT_WORK(&ioend->io_work, xfs_end_bio_written, ioend); | ||
172 | 208 | ||
173 | return ioend; | 209 | return ioend; |
174 | } | 210 | } |
175 | 211 | ||
176 | void | ||
177 | linvfs_unwritten_done( | ||
178 | struct buffer_head *bh, | ||
179 | int uptodate) | ||
180 | { | ||
181 | xfs_ioend_t *ioend = bh->b_private; | ||
182 | static spinlock_t unwritten_done_lock = SPIN_LOCK_UNLOCKED; | ||
183 | unsigned long flags; | ||
184 | |||
185 | ASSERT(buffer_unwritten(bh)); | ||
186 | bh->b_end_io = NULL; | ||
187 | |||
188 | if (!uptodate) | ||
189 | ioend->io_uptodate = 0; | ||
190 | |||
191 | /* | ||
192 | * Deep magic here. We reuse b_private in the buffer_heads to build | ||
193 | * a chain for completing the I/O from user context after we've issued | ||
194 | * a transaction to convert the unwritten extent. | ||
195 | */ | ||
196 | spin_lock_irqsave(&unwritten_done_lock, flags); | ||
197 | bh->b_private = ioend->io_buffer_head; | ||
198 | ioend->io_buffer_head = bh; | ||
199 | spin_unlock_irqrestore(&unwritten_done_lock, flags); | ||
200 | |||
201 | xfs_finish_ioend(ioend); | ||
202 | } | ||
203 | |||
204 | STATIC int | 212 | STATIC int |
205 | xfs_map_blocks( | 213 | xfs_map_blocks( |
206 | struct inode *inode, | 214 | struct inode *inode, |
@@ -218,138 +226,283 @@ xfs_map_blocks( | |||
218 | return -error; | 226 | return -error; |
219 | } | 227 | } |
220 | 228 | ||
229 | STATIC inline int | ||
230 | xfs_iomap_valid( | ||
231 | xfs_iomap_t *iomapp, | ||
232 | loff_t offset) | ||
233 | { | ||
234 | return offset >= iomapp->iomap_offset && | ||
235 | offset < iomapp->iomap_offset + iomapp->iomap_bsize; | ||
236 | } | ||
237 | |||
221 | /* | 238 | /* |
222 | * Finds the corresponding mapping in block @map array of the | 239 | * BIO completion handler for buffered IO. |
223 | * given @offset within a @page. | ||
224 | */ | 240 | */ |
225 | STATIC xfs_iomap_t * | 241 | STATIC int |
226 | xfs_offset_to_map( | 242 | xfs_end_bio( |
243 | struct bio *bio, | ||
244 | unsigned int bytes_done, | ||
245 | int error) | ||
246 | { | ||
247 | xfs_ioend_t *ioend = bio->bi_private; | ||
248 | |||
249 | if (bio->bi_size) | ||
250 | return 1; | ||
251 | |||
252 | ASSERT(ioend); | ||
253 | ASSERT(atomic_read(&bio->bi_cnt) >= 1); | ||
254 | |||
255 | /* Toss bio and pass work off to an xfsdatad thread */ | ||
256 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) | ||
257 | ioend->io_uptodate = 0; | ||
258 | bio->bi_private = NULL; | ||
259 | bio->bi_end_io = NULL; | ||
260 | |||
261 | bio_put(bio); | ||
262 | xfs_finish_ioend(ioend); | ||
263 | return 0; | ||
264 | } | ||
265 | |||
266 | STATIC void | ||
267 | xfs_submit_ioend_bio( | ||
268 | xfs_ioend_t *ioend, | ||
269 | struct bio *bio) | ||
270 | { | ||
271 | atomic_inc(&ioend->io_remaining); | ||
272 | |||
273 | bio->bi_private = ioend; | ||
274 | bio->bi_end_io = xfs_end_bio; | ||
275 | |||
276 | submit_bio(WRITE, bio); | ||
277 | ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP)); | ||
278 | bio_put(bio); | ||
279 | } | ||
280 | |||
281 | STATIC struct bio * | ||
282 | xfs_alloc_ioend_bio( | ||
283 | struct buffer_head *bh) | ||
284 | { | ||
285 | struct bio *bio; | ||
286 | int nvecs = bio_get_nr_vecs(bh->b_bdev); | ||
287 | |||
288 | do { | ||
289 | bio = bio_alloc(GFP_NOIO, nvecs); | ||
290 | nvecs >>= 1; | ||
291 | } while (!bio); | ||
292 | |||
293 | ASSERT(bio->bi_private == NULL); | ||
294 | bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); | ||
295 | bio->bi_bdev = bh->b_bdev; | ||
296 | bio_get(bio); | ||
297 | return bio; | ||
298 | } | ||
299 | |||
300 | STATIC void | ||
301 | xfs_start_buffer_writeback( | ||
302 | struct buffer_head *bh) | ||
303 | { | ||
304 | ASSERT(buffer_mapped(bh)); | ||
305 | ASSERT(buffer_locked(bh)); | ||
306 | ASSERT(!buffer_delay(bh)); | ||
307 | ASSERT(!buffer_unwritten(bh)); | ||
308 | |||
309 | mark_buffer_async_write(bh); | ||
310 | set_buffer_uptodate(bh); | ||
311 | clear_buffer_dirty(bh); | ||
312 | } | ||
313 | |||
314 | STATIC void | ||
315 | xfs_start_page_writeback( | ||
227 | struct page *page, | 316 | struct page *page, |
228 | xfs_iomap_t *iomapp, | 317 | struct writeback_control *wbc, |
229 | unsigned long offset) | 318 | int clear_dirty, |
319 | int buffers) | ||
320 | { | ||
321 | ASSERT(PageLocked(page)); | ||
322 | ASSERT(!PageWriteback(page)); | ||
323 | set_page_writeback(page); | ||
324 | if (clear_dirty) | ||
325 | clear_page_dirty(page); | ||
326 | unlock_page(page); | ||
327 | if (!buffers) { | ||
328 | end_page_writeback(page); | ||
329 | wbc->pages_skipped++; /* We didn't write this page */ | ||
330 | } | ||
331 | } | ||
332 | |||
333 | static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh) | ||
334 | { | ||
335 | return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); | ||
336 | } | ||
337 | |||
338 | /* | ||
339 | * Submit all of the bios for all of the ioends we have saved up, covering the | ||
340 | * initial writepage page and also any probed pages. | ||
341 | * | ||
342 | * Because we may have multiple ioends spanning a page, we need to start | ||
343 | * writeback on all the buffers before we submit them for I/O. If we mark the | ||
344 | * buffers as we got, then we can end up with a page that only has buffers | ||
345 | * marked async write and I/O complete on can occur before we mark the other | ||
346 | * buffers async write. | ||
347 | * | ||
348 | * The end result of this is that we trip a bug in end_page_writeback() because | ||
349 | * we call it twice for the one page as the code in end_buffer_async_write() | ||
350 | * assumes that all buffers on the page are started at the same time. | ||
351 | * | ||
352 | * The fix is two passes across the ioend list - one to start writeback on the | ||
353 | * bufferheads, and then the second one submit them for I/O. | ||
354 | */ | ||
355 | STATIC void | ||
356 | xfs_submit_ioend( | ||
357 | xfs_ioend_t *ioend) | ||
358 | { | ||
359 | xfs_ioend_t *head = ioend; | ||
360 | xfs_ioend_t *next; | ||
361 | struct buffer_head *bh; | ||
362 | struct bio *bio; | ||
363 | sector_t lastblock = 0; | ||
364 | |||
365 | /* Pass 1 - start writeback */ | ||
366 | do { | ||
367 | next = ioend->io_list; | ||
368 | for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { | ||
369 | xfs_start_buffer_writeback(bh); | ||
370 | } | ||
371 | } while ((ioend = next) != NULL); | ||
372 | |||
373 | /* Pass 2 - submit I/O */ | ||
374 | ioend = head; | ||
375 | do { | ||
376 | next = ioend->io_list; | ||
377 | bio = NULL; | ||
378 | |||
379 | for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { | ||
380 | |||
381 | if (!bio) { | ||
382 | retry: | ||
383 | bio = xfs_alloc_ioend_bio(bh); | ||
384 | } else if (bh->b_blocknr != lastblock + 1) { | ||
385 | xfs_submit_ioend_bio(ioend, bio); | ||
386 | goto retry; | ||
387 | } | ||
388 | |||
389 | if (bio_add_buffer(bio, bh) != bh->b_size) { | ||
390 | xfs_submit_ioend_bio(ioend, bio); | ||
391 | goto retry; | ||
392 | } | ||
393 | |||
394 | lastblock = bh->b_blocknr; | ||
395 | } | ||
396 | if (bio) | ||
397 | xfs_submit_ioend_bio(ioend, bio); | ||
398 | xfs_finish_ioend(ioend); | ||
399 | } while ((ioend = next) != NULL); | ||
400 | } | ||
401 | |||
402 | /* | ||
403 | * Cancel submission of all buffer_heads so far in this endio. | ||
404 | * Toss the endio too. Only ever called for the initial page | ||
405 | * in a writepage request, so only ever one page. | ||
406 | */ | ||
407 | STATIC void | ||
408 | xfs_cancel_ioend( | ||
409 | xfs_ioend_t *ioend) | ||
410 | { | ||
411 | xfs_ioend_t *next; | ||
412 | struct buffer_head *bh, *next_bh; | ||
413 | |||
414 | do { | ||
415 | next = ioend->io_list; | ||
416 | bh = ioend->io_buffer_head; | ||
417 | do { | ||
418 | next_bh = bh->b_private; | ||
419 | clear_buffer_async_write(bh); | ||
420 | unlock_buffer(bh); | ||
421 | } while ((bh = next_bh) != NULL); | ||
422 | |||
423 | vn_iowake(ioend->io_vnode); | ||
424 | mempool_free(ioend, xfs_ioend_pool); | ||
425 | } while ((ioend = next) != NULL); | ||
426 | } | ||
427 | |||
428 | /* | ||
429 | * Test to see if we've been building up a completion structure for | ||
430 | * earlier buffers -- if so, we try to append to this ioend if we | ||
431 | * can, otherwise we finish off any current ioend and start another. | ||
432 | * Return true if we've finished the given ioend. | ||
433 | */ | ||
434 | STATIC void | ||
435 | xfs_add_to_ioend( | ||
436 | struct inode *inode, | ||
437 | struct buffer_head *bh, | ||
438 | xfs_off_t offset, | ||
439 | unsigned int type, | ||
440 | xfs_ioend_t **result, | ||
441 | int need_ioend) | ||
230 | { | 442 | { |
231 | loff_t full_offset; /* offset from start of file */ | 443 | xfs_ioend_t *ioend = *result; |
232 | 444 | ||
233 | ASSERT(offset < PAGE_CACHE_SIZE); | 445 | if (!ioend || need_ioend || type != ioend->io_type) { |
446 | xfs_ioend_t *previous = *result; | ||
234 | 447 | ||
235 | full_offset = page->index; /* NB: using 64bit number */ | 448 | ioend = xfs_alloc_ioend(inode, type); |
236 | full_offset <<= PAGE_CACHE_SHIFT; /* offset from file start */ | 449 | ioend->io_offset = offset; |
237 | full_offset += offset; /* offset from page start */ | 450 | ioend->io_buffer_head = bh; |
451 | ioend->io_buffer_tail = bh; | ||
452 | if (previous) | ||
453 | previous->io_list = ioend; | ||
454 | *result = ioend; | ||
455 | } else { | ||
456 | ioend->io_buffer_tail->b_private = bh; | ||
457 | ioend->io_buffer_tail = bh; | ||
458 | } | ||
238 | 459 | ||
239 | if (full_offset < iomapp->iomap_offset) | 460 | bh->b_private = NULL; |
240 | return NULL; | 461 | ioend->io_size += bh->b_size; |
241 | if (iomapp->iomap_offset + (iomapp->iomap_bsize -1) >= full_offset) | ||
242 | return iomapp; | ||
243 | return NULL; | ||
244 | } | 462 | } |
245 | 463 | ||
246 | STATIC void | 464 | STATIC void |
247 | xfs_map_at_offset( | 465 | xfs_map_at_offset( |
248 | struct page *page, | ||
249 | struct buffer_head *bh, | 466 | struct buffer_head *bh, |
250 | unsigned long offset, | 467 | loff_t offset, |
251 | int block_bits, | 468 | int block_bits, |
252 | xfs_iomap_t *iomapp) | 469 | xfs_iomap_t *iomapp) |
253 | { | 470 | { |
254 | xfs_daddr_t bn; | 471 | xfs_daddr_t bn; |
255 | loff_t delta; | ||
256 | int sector_shift; | 472 | int sector_shift; |
257 | 473 | ||
258 | ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE)); | 474 | ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE)); |
259 | ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY)); | 475 | ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY)); |
260 | ASSERT(iomapp->iomap_bn != IOMAP_DADDR_NULL); | 476 | ASSERT(iomapp->iomap_bn != IOMAP_DADDR_NULL); |
261 | 477 | ||
262 | delta = page->index; | ||
263 | delta <<= PAGE_CACHE_SHIFT; | ||
264 | delta += offset; | ||
265 | delta -= iomapp->iomap_offset; | ||
266 | delta >>= block_bits; | ||
267 | |||
268 | sector_shift = block_bits - BBSHIFT; | 478 | sector_shift = block_bits - BBSHIFT; |
269 | bn = iomapp->iomap_bn >> sector_shift; | 479 | bn = (iomapp->iomap_bn >> sector_shift) + |
270 | bn += delta; | 480 | ((offset - iomapp->iomap_offset) >> block_bits); |
271 | BUG_ON(!bn && !(iomapp->iomap_flags & IOMAP_REALTIME)); | 481 | |
482 | ASSERT(bn || (iomapp->iomap_flags & IOMAP_REALTIME)); | ||
272 | ASSERT((bn << sector_shift) >= iomapp->iomap_bn); | 483 | ASSERT((bn << sector_shift) >= iomapp->iomap_bn); |
273 | 484 | ||
274 | lock_buffer(bh); | 485 | lock_buffer(bh); |
275 | bh->b_blocknr = bn; | 486 | bh->b_blocknr = bn; |
276 | bh->b_bdev = iomapp->iomap_target->pbr_bdev; | 487 | bh->b_bdev = iomapp->iomap_target->bt_bdev; |
277 | set_buffer_mapped(bh); | 488 | set_buffer_mapped(bh); |
278 | clear_buffer_delay(bh); | 489 | clear_buffer_delay(bh); |
490 | clear_buffer_unwritten(bh); | ||
279 | } | 491 | } |
280 | 492 | ||
281 | /* | 493 | /* |
282 | * Look for a page at index which is unlocked and contains our | 494 | * Look for a page at index that is suitable for clustering. |
283 | * unwritten extent flagged buffers at its head. Returns page | ||
284 | * locked and with an extra reference count, and length of the | ||
285 | * unwritten extent component on this page that we can write, | ||
286 | * in units of filesystem blocks. | ||
287 | */ | ||
288 | STATIC struct page * | ||
289 | xfs_probe_unwritten_page( | ||
290 | struct address_space *mapping, | ||
291 | pgoff_t index, | ||
292 | xfs_iomap_t *iomapp, | ||
293 | xfs_ioend_t *ioend, | ||
294 | unsigned long max_offset, | ||
295 | unsigned long *fsbs, | ||
296 | unsigned int bbits) | ||
297 | { | ||
298 | struct page *page; | ||
299 | |||
300 | page = find_trylock_page(mapping, index); | ||
301 | if (!page) | ||
302 | return NULL; | ||
303 | if (PageWriteback(page)) | ||
304 | goto out; | ||
305 | |||
306 | if (page->mapping && page_has_buffers(page)) { | ||
307 | struct buffer_head *bh, *head; | ||
308 | unsigned long p_offset = 0; | ||
309 | |||
310 | *fsbs = 0; | ||
311 | bh = head = page_buffers(page); | ||
312 | do { | ||
313 | if (!buffer_unwritten(bh) || !buffer_uptodate(bh)) | ||
314 | break; | ||
315 | if (!xfs_offset_to_map(page, iomapp, p_offset)) | ||
316 | break; | ||
317 | if (p_offset >= max_offset) | ||
318 | break; | ||
319 | xfs_map_at_offset(page, bh, p_offset, bbits, iomapp); | ||
320 | set_buffer_unwritten_io(bh); | ||
321 | bh->b_private = ioend; | ||
322 | p_offset += bh->b_size; | ||
323 | (*fsbs)++; | ||
324 | } while ((bh = bh->b_this_page) != head); | ||
325 | |||
326 | if (p_offset) | ||
327 | return page; | ||
328 | } | ||
329 | |||
330 | out: | ||
331 | unlock_page(page); | ||
332 | return NULL; | ||
333 | } | ||
334 | |||
335 | /* | ||
336 | * Look for a page at index which is unlocked and not mapped | ||
337 | * yet - clustering for mmap write case. | ||
338 | */ | 495 | */ |
339 | STATIC unsigned int | 496 | STATIC unsigned int |
340 | xfs_probe_unmapped_page( | 497 | xfs_probe_page( |
341 | struct address_space *mapping, | 498 | struct page *page, |
342 | pgoff_t index, | 499 | unsigned int pg_offset, |
343 | unsigned int pg_offset) | 500 | int mapped) |
344 | { | 501 | { |
345 | struct page *page; | ||
346 | int ret = 0; | 502 | int ret = 0; |
347 | 503 | ||
348 | page = find_trylock_page(mapping, index); | ||
349 | if (!page) | ||
350 | return 0; | ||
351 | if (PageWriteback(page)) | 504 | if (PageWriteback(page)) |
352 | goto out; | 505 | return 0; |
353 | 506 | ||
354 | if (page->mapping && PageDirty(page)) { | 507 | if (page->mapping && PageDirty(page)) { |
355 | if (page_has_buffers(page)) { | 508 | if (page_has_buffers(page)) { |
@@ -357,79 +510,101 @@ xfs_probe_unmapped_page( | |||
357 | 510 | ||
358 | bh = head = page_buffers(page); | 511 | bh = head = page_buffers(page); |
359 | do { | 512 | do { |
360 | if (buffer_mapped(bh) || !buffer_uptodate(bh)) | 513 | if (!buffer_uptodate(bh)) |
514 | break; | ||
515 | if (mapped != buffer_mapped(bh)) | ||
361 | break; | 516 | break; |
362 | ret += bh->b_size; | 517 | ret += bh->b_size; |
363 | if (ret >= pg_offset) | 518 | if (ret >= pg_offset) |
364 | break; | 519 | break; |
365 | } while ((bh = bh->b_this_page) != head); | 520 | } while ((bh = bh->b_this_page) != head); |
366 | } else | 521 | } else |
367 | ret = PAGE_CACHE_SIZE; | 522 | ret = mapped ? 0 : PAGE_CACHE_SIZE; |
368 | } | 523 | } |
369 | 524 | ||
370 | out: | ||
371 | unlock_page(page); | ||
372 | return ret; | 525 | return ret; |
373 | } | 526 | } |
374 | 527 | ||
375 | STATIC unsigned int | 528 | STATIC size_t |
376 | xfs_probe_unmapped_cluster( | 529 | xfs_probe_cluster( |
377 | struct inode *inode, | 530 | struct inode *inode, |
378 | struct page *startpage, | 531 | struct page *startpage, |
379 | struct buffer_head *bh, | 532 | struct buffer_head *bh, |
380 | struct buffer_head *head) | 533 | struct buffer_head *head, |
534 | int mapped) | ||
381 | { | 535 | { |
536 | struct pagevec pvec; | ||
382 | pgoff_t tindex, tlast, tloff; | 537 | pgoff_t tindex, tlast, tloff; |
383 | unsigned int pg_offset, len, total = 0; | 538 | size_t total = 0; |
384 | struct address_space *mapping = inode->i_mapping; | 539 | int done = 0, i; |
385 | 540 | ||
386 | /* First sum forwards in this page */ | 541 | /* First sum forwards in this page */ |
387 | do { | 542 | do { |
388 | if (buffer_mapped(bh)) | 543 | if (mapped != buffer_mapped(bh)) |
389 | break; | 544 | return total; |
390 | total += bh->b_size; | 545 | total += bh->b_size; |
391 | } while ((bh = bh->b_this_page) != head); | 546 | } while ((bh = bh->b_this_page) != head); |
392 | 547 | ||
393 | /* If we reached the end of the page, sum forwards in | 548 | /* if we reached the end of the page, sum forwards in following pages */ |
394 | * following pages. | 549 | tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT; |
395 | */ | 550 | tindex = startpage->index + 1; |
396 | if (bh == head) { | 551 | |
397 | tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT; | 552 | /* Prune this back to avoid pathological behavior */ |
398 | /* Prune this back to avoid pathological behavior */ | 553 | tloff = min(tlast, startpage->index + 64); |
399 | tloff = min(tlast, startpage->index + 64); | 554 | |
400 | for (tindex = startpage->index + 1; tindex < tloff; tindex++) { | 555 | pagevec_init(&pvec, 0); |
401 | len = xfs_probe_unmapped_page(mapping, tindex, | 556 | while (!done && tindex <= tloff) { |
402 | PAGE_CACHE_SIZE); | 557 | unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1); |
403 | if (!len) | 558 | |
404 | return total; | 559 | if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len)) |
560 | break; | ||
561 | |||
562 | for (i = 0; i < pagevec_count(&pvec); i++) { | ||
563 | struct page *page = pvec.pages[i]; | ||
564 | size_t pg_offset, len = 0; | ||
565 | |||
566 | if (tindex == tlast) { | ||
567 | pg_offset = | ||
568 | i_size_read(inode) & (PAGE_CACHE_SIZE - 1); | ||
569 | if (!pg_offset) { | ||
570 | done = 1; | ||
571 | break; | ||
572 | } | ||
573 | } else | ||
574 | pg_offset = PAGE_CACHE_SIZE; | ||
575 | |||
576 | if (page->index == tindex && !TestSetPageLocked(page)) { | ||
577 | len = xfs_probe_page(page, pg_offset, mapped); | ||
578 | unlock_page(page); | ||
579 | } | ||
580 | |||
581 | if (!len) { | ||
582 | done = 1; | ||
583 | break; | ||
584 | } | ||
585 | |||
405 | total += len; | 586 | total += len; |
587 | tindex++; | ||
406 | } | 588 | } |
407 | if (tindex == tlast && | 589 | |
408 | (pg_offset = i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) { | 590 | pagevec_release(&pvec); |
409 | total += xfs_probe_unmapped_page(mapping, | 591 | cond_resched(); |
410 | tindex, pg_offset); | ||
411 | } | ||
412 | } | 592 | } |
593 | |||
413 | return total; | 594 | return total; |
414 | } | 595 | } |
415 | 596 | ||
416 | /* | 597 | /* |
417 | * Probe for a given page (index) in the inode and test if it is delayed | 598 | * Test if a given page is suitable for writing as part of an unwritten |
418 | * and without unwritten buffers. Returns page locked and with an extra | 599 | * or delayed allocate extent. |
419 | * reference count. | ||
420 | */ | 600 | */ |
421 | STATIC struct page * | 601 | STATIC int |
422 | xfs_probe_delalloc_page( | 602 | xfs_is_delayed_page( |
423 | struct inode *inode, | 603 | struct page *page, |
424 | pgoff_t index) | 604 | unsigned int type) |
425 | { | 605 | { |
426 | struct page *page; | ||
427 | |||
428 | page = find_trylock_page(inode->i_mapping, index); | ||
429 | if (!page) | ||
430 | return NULL; | ||
431 | if (PageWriteback(page)) | 606 | if (PageWriteback(page)) |
432 | goto out; | 607 | return 0; |
433 | 608 | ||
434 | if (page->mapping && page_has_buffers(page)) { | 609 | if (page->mapping && page_has_buffers(page)) { |
435 | struct buffer_head *bh, *head; | 610 | struct buffer_head *bh, *head; |
@@ -437,243 +612,156 @@ xfs_probe_delalloc_page( | |||
437 | 612 | ||
438 | bh = head = page_buffers(page); | 613 | bh = head = page_buffers(page); |
439 | do { | 614 | do { |
440 | if (buffer_unwritten(bh)) { | 615 | if (buffer_unwritten(bh)) |
441 | acceptable = 0; | 616 | acceptable = (type == IOMAP_UNWRITTEN); |
617 | else if (buffer_delay(bh)) | ||
618 | acceptable = (type == IOMAP_DELAY); | ||
619 | else if (buffer_mapped(bh)) | ||
620 | acceptable = (type == 0); | ||
621 | else | ||
442 | break; | 622 | break; |
443 | } else if (buffer_delay(bh)) { | ||
444 | acceptable = 1; | ||
445 | } | ||
446 | } while ((bh = bh->b_this_page) != head); | 623 | } while ((bh = bh->b_this_page) != head); |
447 | 624 | ||
448 | if (acceptable) | 625 | if (acceptable) |
449 | return page; | 626 | return 1; |
450 | } | ||
451 | |||
452 | out: | ||
453 | unlock_page(page); | ||
454 | return NULL; | ||
455 | } | ||
456 | |||
457 | STATIC int | ||
458 | xfs_map_unwritten( | ||
459 | struct inode *inode, | ||
460 | struct page *start_page, | ||
461 | struct buffer_head *head, | ||
462 | struct buffer_head *curr, | ||
463 | unsigned long p_offset, | ||
464 | int block_bits, | ||
465 | xfs_iomap_t *iomapp, | ||
466 | struct writeback_control *wbc, | ||
467 | int startio, | ||
468 | int all_bh) | ||
469 | { | ||
470 | struct buffer_head *bh = curr; | ||
471 | xfs_iomap_t *tmp; | ||
472 | xfs_ioend_t *ioend; | ||
473 | loff_t offset; | ||
474 | unsigned long nblocks = 0; | ||
475 | |||
476 | offset = start_page->index; | ||
477 | offset <<= PAGE_CACHE_SHIFT; | ||
478 | offset += p_offset; | ||
479 | |||
480 | ioend = xfs_alloc_ioend(inode); | ||
481 | |||
482 | /* First map forwards in the page consecutive buffers | ||
483 | * covering this unwritten extent | ||
484 | */ | ||
485 | do { | ||
486 | if (!buffer_unwritten(bh)) | ||
487 | break; | ||
488 | tmp = xfs_offset_to_map(start_page, iomapp, p_offset); | ||
489 | if (!tmp) | ||
490 | break; | ||
491 | xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp); | ||
492 | set_buffer_unwritten_io(bh); | ||
493 | bh->b_private = ioend; | ||
494 | p_offset += bh->b_size; | ||
495 | nblocks++; | ||
496 | } while ((bh = bh->b_this_page) != head); | ||
497 | |||
498 | atomic_add(nblocks, &ioend->io_remaining); | ||
499 | |||
500 | /* If we reached the end of the page, map forwards in any | ||
501 | * following pages which are also covered by this extent. | ||
502 | */ | ||
503 | if (bh == head) { | ||
504 | struct address_space *mapping = inode->i_mapping; | ||
505 | pgoff_t tindex, tloff, tlast; | ||
506 | unsigned long bs; | ||
507 | unsigned int pg_offset, bbits = inode->i_blkbits; | ||
508 | struct page *page; | ||
509 | |||
510 | tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT; | ||
511 | tloff = (iomapp->iomap_offset + iomapp->iomap_bsize) >> PAGE_CACHE_SHIFT; | ||
512 | tloff = min(tlast, tloff); | ||
513 | for (tindex = start_page->index + 1; tindex < tloff; tindex++) { | ||
514 | page = xfs_probe_unwritten_page(mapping, | ||
515 | tindex, iomapp, ioend, | ||
516 | PAGE_CACHE_SIZE, &bs, bbits); | ||
517 | if (!page) | ||
518 | break; | ||
519 | nblocks += bs; | ||
520 | atomic_add(bs, &ioend->io_remaining); | ||
521 | xfs_convert_page(inode, page, iomapp, wbc, ioend, | ||
522 | startio, all_bh); | ||
523 | /* stop if converting the next page might add | ||
524 | * enough blocks that the corresponding byte | ||
525 | * count won't fit in our ulong page buf length */ | ||
526 | if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits)) | ||
527 | goto enough; | ||
528 | } | ||
529 | |||
530 | if (tindex == tlast && | ||
531 | (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) { | ||
532 | page = xfs_probe_unwritten_page(mapping, | ||
533 | tindex, iomapp, ioend, | ||
534 | pg_offset, &bs, bbits); | ||
535 | if (page) { | ||
536 | nblocks += bs; | ||
537 | atomic_add(bs, &ioend->io_remaining); | ||
538 | xfs_convert_page(inode, page, iomapp, wbc, ioend, | ||
539 | startio, all_bh); | ||
540 | if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits)) | ||
541 | goto enough; | ||
542 | } | ||
543 | } | ||
544 | } | 627 | } |
545 | 628 | ||
546 | enough: | ||
547 | ioend->io_size = (xfs_off_t)nblocks << block_bits; | ||
548 | ioend->io_offset = offset; | ||
549 | xfs_finish_ioend(ioend); | ||
550 | return 0; | 629 | return 0; |
551 | } | 630 | } |
552 | 631 | ||
553 | STATIC void | ||
554 | xfs_submit_page( | ||
555 | struct page *page, | ||
556 | struct writeback_control *wbc, | ||
557 | struct buffer_head *bh_arr[], | ||
558 | int bh_count, | ||
559 | int probed_page, | ||
560 | int clear_dirty) | ||
561 | { | ||
562 | struct buffer_head *bh; | ||
563 | int i; | ||
564 | |||
565 | BUG_ON(PageWriteback(page)); | ||
566 | if (bh_count) | ||
567 | set_page_writeback(page); | ||
568 | if (clear_dirty) | ||
569 | clear_page_dirty(page); | ||
570 | unlock_page(page); | ||
571 | |||
572 | if (bh_count) { | ||
573 | for (i = 0; i < bh_count; i++) { | ||
574 | bh = bh_arr[i]; | ||
575 | mark_buffer_async_write(bh); | ||
576 | if (buffer_unwritten(bh)) | ||
577 | set_buffer_unwritten_io(bh); | ||
578 | set_buffer_uptodate(bh); | ||
579 | clear_buffer_dirty(bh); | ||
580 | } | ||
581 | |||
582 | for (i = 0; i < bh_count; i++) | ||
583 | submit_bh(WRITE, bh_arr[i]); | ||
584 | |||
585 | if (probed_page && clear_dirty) | ||
586 | wbc->nr_to_write--; /* Wrote an "extra" page */ | ||
587 | } | ||
588 | } | ||
589 | |||
590 | /* | 632 | /* |
591 | * Allocate & map buffers for page given the extent map. Write it out. | 633 | * Allocate & map buffers for page given the extent map. Write it out. |
592 | * except for the original page of a writepage, this is called on | 634 | * except for the original page of a writepage, this is called on |
593 | * delalloc/unwritten pages only, for the original page it is possible | 635 | * delalloc/unwritten pages only, for the original page it is possible |
594 | * that the page has no mapping at all. | 636 | * that the page has no mapping at all. |
595 | */ | 637 | */ |
596 | STATIC void | 638 | STATIC int |
597 | xfs_convert_page( | 639 | xfs_convert_page( |
598 | struct inode *inode, | 640 | struct inode *inode, |
599 | struct page *page, | 641 | struct page *page, |
600 | xfs_iomap_t *iomapp, | 642 | loff_t tindex, |
643 | xfs_iomap_t *mp, | ||
644 | xfs_ioend_t **ioendp, | ||
601 | struct writeback_control *wbc, | 645 | struct writeback_control *wbc, |
602 | void *private, | ||
603 | int startio, | 646 | int startio, |
604 | int all_bh) | 647 | int all_bh) |
605 | { | 648 | { |
606 | struct buffer_head *bh_arr[MAX_BUF_PER_PAGE], *bh, *head; | 649 | struct buffer_head *bh, *head; |
607 | xfs_iomap_t *mp = iomapp, *tmp; | 650 | xfs_off_t end_offset; |
608 | unsigned long offset, end_offset; | 651 | unsigned long p_offset; |
609 | int index = 0; | 652 | unsigned int type; |
610 | int bbits = inode->i_blkbits; | 653 | int bbits = inode->i_blkbits; |
611 | int len, page_dirty; | 654 | int len, page_dirty; |
655 | int count = 0, done = 0, uptodate = 1; | ||
656 | xfs_off_t offset = page_offset(page); | ||
612 | 657 | ||
613 | end_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)); | 658 | if (page->index != tindex) |
659 | goto fail; | ||
660 | if (TestSetPageLocked(page)) | ||
661 | goto fail; | ||
662 | if (PageWriteback(page)) | ||
663 | goto fail_unlock_page; | ||
664 | if (page->mapping != inode->i_mapping) | ||
665 | goto fail_unlock_page; | ||
666 | if (!xfs_is_delayed_page(page, (*ioendp)->io_type)) | ||
667 | goto fail_unlock_page; | ||
614 | 668 | ||
615 | /* | 669 | /* |
616 | * page_dirty is initially a count of buffers on the page before | 670 | * page_dirty is initially a count of buffers on the page before |
617 | * EOF and is decrememted as we move each into a cleanable state. | 671 | * EOF and is decrememted as we move each into a cleanable state. |
672 | * | ||
673 | * Derivation: | ||
674 | * | ||
675 | * End offset is the highest offset that this page should represent. | ||
676 | * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1)) | ||
677 | * will evaluate non-zero and be less than PAGE_CACHE_SIZE and | ||
678 | * hence give us the correct page_dirty count. On any other page, | ||
679 | * it will be zero and in that case we need page_dirty to be the | ||
680 | * count of buffers on the page. | ||
618 | */ | 681 | */ |
682 | end_offset = min_t(unsigned long long, | ||
683 | (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, | ||
684 | i_size_read(inode)); | ||
685 | |||
619 | len = 1 << inode->i_blkbits; | 686 | len = 1 << inode->i_blkbits; |
620 | end_offset = max(end_offset, PAGE_CACHE_SIZE); | 687 | p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1), |
621 | end_offset = roundup(end_offset, len); | 688 | PAGE_CACHE_SIZE); |
622 | page_dirty = end_offset / len; | 689 | p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; |
690 | page_dirty = p_offset / len; | ||
623 | 691 | ||
624 | offset = 0; | ||
625 | bh = head = page_buffers(page); | 692 | bh = head = page_buffers(page); |
626 | do { | 693 | do { |
627 | if (offset >= end_offset) | 694 | if (offset >= end_offset) |
628 | break; | 695 | break; |
629 | if (!(PageUptodate(page) || buffer_uptodate(bh))) | 696 | if (!buffer_uptodate(bh)) |
697 | uptodate = 0; | ||
698 | if (!(PageUptodate(page) || buffer_uptodate(bh))) { | ||
699 | done = 1; | ||
630 | continue; | 700 | continue; |
631 | if (buffer_mapped(bh) && all_bh && | 701 | } |
632 | !(buffer_unwritten(bh) || buffer_delay(bh))) { | 702 | |
703 | if (buffer_unwritten(bh) || buffer_delay(bh)) { | ||
704 | if (buffer_unwritten(bh)) | ||
705 | type = IOMAP_UNWRITTEN; | ||
706 | else | ||
707 | type = IOMAP_DELAY; | ||
708 | |||
709 | if (!xfs_iomap_valid(mp, offset)) { | ||
710 | done = 1; | ||
711 | continue; | ||
712 | } | ||
713 | |||
714 | ASSERT(!(mp->iomap_flags & IOMAP_HOLE)); | ||
715 | ASSERT(!(mp->iomap_flags & IOMAP_DELAY)); | ||
716 | |||
717 | xfs_map_at_offset(bh, offset, bbits, mp); | ||
633 | if (startio) { | 718 | if (startio) { |
719 | xfs_add_to_ioend(inode, bh, offset, | ||
720 | type, ioendp, done); | ||
721 | } else { | ||
722 | set_buffer_dirty(bh); | ||
723 | unlock_buffer(bh); | ||
724 | mark_buffer_dirty(bh); | ||
725 | } | ||
726 | page_dirty--; | ||
727 | count++; | ||
728 | } else { | ||
729 | type = 0; | ||
730 | if (buffer_mapped(bh) && all_bh && startio) { | ||
634 | lock_buffer(bh); | 731 | lock_buffer(bh); |
635 | bh_arr[index++] = bh; | 732 | xfs_add_to_ioend(inode, bh, offset, |
733 | type, ioendp, done); | ||
734 | count++; | ||
636 | page_dirty--; | 735 | page_dirty--; |
736 | } else { | ||
737 | done = 1; | ||
637 | } | 738 | } |
638 | continue; | ||
639 | } | 739 | } |
640 | tmp = xfs_offset_to_map(page, mp, offset); | 740 | } while (offset += len, (bh = bh->b_this_page) != head); |
641 | if (!tmp) | ||
642 | continue; | ||
643 | ASSERT(!(tmp->iomap_flags & IOMAP_HOLE)); | ||
644 | ASSERT(!(tmp->iomap_flags & IOMAP_DELAY)); | ||
645 | 741 | ||
646 | /* If this is a new unwritten extent buffer (i.e. one | 742 | if (uptodate && bh == head) |
647 | * that we haven't passed in private data for, we must | 743 | SetPageUptodate(page); |
648 | * now map this buffer too. | 744 | |
649 | */ | 745 | if (startio) { |
650 | if (buffer_unwritten(bh) && !bh->b_end_io) { | 746 | if (count) { |
651 | ASSERT(tmp->iomap_flags & IOMAP_UNWRITTEN); | 747 | struct backing_dev_info *bdi; |
652 | xfs_map_unwritten(inode, page, head, bh, offset, | 748 | |
653 | bbits, tmp, wbc, startio, all_bh); | 749 | bdi = inode->i_mapping->backing_dev_info; |
654 | } else if (! (buffer_unwritten(bh) && buffer_locked(bh))) { | 750 | if (bdi_write_congested(bdi)) { |
655 | xfs_map_at_offset(page, bh, offset, bbits, tmp); | 751 | wbc->encountered_congestion = 1; |
656 | if (buffer_unwritten(bh)) { | 752 | done = 1; |
657 | set_buffer_unwritten_io(bh); | 753 | } else if (--wbc->nr_to_write <= 0) { |
658 | bh->b_private = private; | 754 | done = 1; |
659 | ASSERT(private); | ||
660 | } | 755 | } |
661 | } | 756 | } |
662 | if (startio) { | 757 | xfs_start_page_writeback(page, wbc, !page_dirty, count); |
663 | bh_arr[index++] = bh; | ||
664 | } else { | ||
665 | set_buffer_dirty(bh); | ||
666 | unlock_buffer(bh); | ||
667 | mark_buffer_dirty(bh); | ||
668 | } | ||
669 | page_dirty--; | ||
670 | } while (offset += len, (bh = bh->b_this_page) != head); | ||
671 | |||
672 | if (startio && index) { | ||
673 | xfs_submit_page(page, wbc, bh_arr, index, 1, !page_dirty); | ||
674 | } else { | ||
675 | unlock_page(page); | ||
676 | } | 758 | } |
759 | |||
760 | return done; | ||
761 | fail_unlock_page: | ||
762 | unlock_page(page); | ||
763 | fail: | ||
764 | return 1; | ||
677 | } | 765 | } |
678 | 766 | ||
679 | /* | 767 | /* |
@@ -685,19 +773,31 @@ xfs_cluster_write( | |||
685 | struct inode *inode, | 773 | struct inode *inode, |
686 | pgoff_t tindex, | 774 | pgoff_t tindex, |
687 | xfs_iomap_t *iomapp, | 775 | xfs_iomap_t *iomapp, |
776 | xfs_ioend_t **ioendp, | ||
688 | struct writeback_control *wbc, | 777 | struct writeback_control *wbc, |
689 | int startio, | 778 | int startio, |
690 | int all_bh, | 779 | int all_bh, |
691 | pgoff_t tlast) | 780 | pgoff_t tlast) |
692 | { | 781 | { |
693 | struct page *page; | 782 | struct pagevec pvec; |
783 | int done = 0, i; | ||
784 | |||
785 | pagevec_init(&pvec, 0); | ||
786 | while (!done && tindex <= tlast) { | ||
787 | unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1); | ||
694 | 788 | ||
695 | for (; tindex <= tlast; tindex++) { | 789 | if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len)) |
696 | page = xfs_probe_delalloc_page(inode, tindex); | ||
697 | if (!page) | ||
698 | break; | 790 | break; |
699 | xfs_convert_page(inode, page, iomapp, wbc, NULL, | 791 | |
700 | startio, all_bh); | 792 | for (i = 0; i < pagevec_count(&pvec); i++) { |
793 | done = xfs_convert_page(inode, pvec.pages[i], tindex++, | ||
794 | iomapp, ioendp, wbc, startio, all_bh); | ||
795 | if (done) | ||
796 | break; | ||
797 | } | ||
798 | |||
799 | pagevec_release(&pvec); | ||
800 | cond_resched(); | ||
701 | } | 801 | } |
702 | } | 802 | } |
703 | 803 | ||
@@ -728,18 +828,22 @@ xfs_page_state_convert( | |||
728 | int startio, | 828 | int startio, |
729 | int unmapped) /* also implies page uptodate */ | 829 | int unmapped) /* also implies page uptodate */ |
730 | { | 830 | { |
731 | struct buffer_head *bh_arr[MAX_BUF_PER_PAGE], *bh, *head; | 831 | struct buffer_head *bh, *head; |
732 | xfs_iomap_t *iomp, iomap; | 832 | xfs_iomap_t iomap; |
833 | xfs_ioend_t *ioend = NULL, *iohead = NULL; | ||
733 | loff_t offset; | 834 | loff_t offset; |
734 | unsigned long p_offset = 0; | 835 | unsigned long p_offset = 0; |
836 | unsigned int type; | ||
735 | __uint64_t end_offset; | 837 | __uint64_t end_offset; |
736 | pgoff_t end_index, last_index, tlast; | 838 | pgoff_t end_index, last_index, tlast; |
737 | int len, err, i, cnt = 0, uptodate = 1; | 839 | ssize_t size, len; |
738 | int flags; | 840 | int flags, err, iomap_valid = 0, uptodate = 1; |
739 | int page_dirty; | 841 | int page_dirty, count = 0, trylock_flag = 0; |
842 | int all_bh = unmapped; | ||
740 | 843 | ||
741 | /* wait for other IO threads? */ | 844 | /* wait for other IO threads? */ |
742 | flags = (startio && wbc->sync_mode != WB_SYNC_NONE) ? 0 : BMAPI_TRYLOCK; | 845 | if (startio && (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking)) |
846 | trylock_flag |= BMAPI_TRYLOCK; | ||
743 | 847 | ||
744 | /* Is this page beyond the end of the file? */ | 848 | /* Is this page beyond the end of the file? */ |
745 | offset = i_size_read(inode); | 849 | offset = i_size_read(inode); |
@@ -754,161 +858,173 @@ xfs_page_state_convert( | |||
754 | } | 858 | } |
755 | } | 859 | } |
756 | 860 | ||
757 | end_offset = min_t(unsigned long long, | ||
758 | (loff_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset); | ||
759 | offset = (loff_t)page->index << PAGE_CACHE_SHIFT; | ||
760 | |||
761 | /* | 861 | /* |
762 | * page_dirty is initially a count of buffers on the page before | 862 | * page_dirty is initially a count of buffers on the page before |
763 | * EOF and is decrememted as we move each into a cleanable state. | 863 | * EOF and is decrememted as we move each into a cleanable state. |
764 | */ | 864 | * |
865 | * Derivation: | ||
866 | * | ||
867 | * End offset is the highest offset that this page should represent. | ||
868 | * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1)) | ||
869 | * will evaluate non-zero and be less than PAGE_CACHE_SIZE and | ||
870 | * hence give us the correct page_dirty count. On any other page, | ||
871 | * it will be zero and in that case we need page_dirty to be the | ||
872 | * count of buffers on the page. | ||
873 | */ | ||
874 | end_offset = min_t(unsigned long long, | ||
875 | (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset); | ||
765 | len = 1 << inode->i_blkbits; | 876 | len = 1 << inode->i_blkbits; |
766 | p_offset = max(p_offset, PAGE_CACHE_SIZE); | 877 | p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1), |
767 | p_offset = roundup(p_offset, len); | 878 | PAGE_CACHE_SIZE); |
879 | p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; | ||
768 | page_dirty = p_offset / len; | 880 | page_dirty = p_offset / len; |
769 | 881 | ||
770 | iomp = NULL; | ||
771 | p_offset = 0; | ||
772 | bh = head = page_buffers(page); | 882 | bh = head = page_buffers(page); |
883 | offset = page_offset(page); | ||
884 | flags = -1; | ||
885 | type = 0; | ||
886 | |||
887 | /* TODO: cleanup count and page_dirty */ | ||
773 | 888 | ||
774 | do { | 889 | do { |
775 | if (offset >= end_offset) | 890 | if (offset >= end_offset) |
776 | break; | 891 | break; |
777 | if (!buffer_uptodate(bh)) | 892 | if (!buffer_uptodate(bh)) |
778 | uptodate = 0; | 893 | uptodate = 0; |
779 | if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) | 894 | if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) { |
895 | /* | ||
896 | * the iomap is actually still valid, but the ioend | ||
897 | * isn't. shouldn't happen too often. | ||
898 | */ | ||
899 | iomap_valid = 0; | ||
780 | continue; | 900 | continue; |
781 | |||
782 | if (iomp) { | ||
783 | iomp = xfs_offset_to_map(page, &iomap, p_offset); | ||
784 | } | 901 | } |
785 | 902 | ||
903 | if (iomap_valid) | ||
904 | iomap_valid = xfs_iomap_valid(&iomap, offset); | ||
905 | |||
786 | /* | 906 | /* |
787 | * First case, map an unwritten extent and prepare for | 907 | * First case, map an unwritten extent and prepare for |
788 | * extent state conversion transaction on completion. | 908 | * extent state conversion transaction on completion. |
789 | */ | 909 | * |
790 | if (buffer_unwritten(bh)) { | 910 | * Second case, allocate space for a delalloc buffer. |
791 | if (!startio) | 911 | * We can return EAGAIN here in the release page case. |
792 | continue; | 912 | * |
793 | if (!iomp) { | 913 | * Third case, an unmapped buffer was found, and we are |
794 | err = xfs_map_blocks(inode, offset, len, &iomap, | 914 | * in a path where we need to write the whole page out. |
795 | BMAPI_WRITE|BMAPI_IGNSTATE); | 915 | */ |
796 | if (err) { | 916 | if (buffer_unwritten(bh) || buffer_delay(bh) || |
797 | goto error; | 917 | ((buffer_uptodate(bh) || PageUptodate(page)) && |
798 | } | 918 | !buffer_mapped(bh) && (unmapped || startio))) { |
799 | iomp = xfs_offset_to_map(page, &iomap, | 919 | /* |
800 | p_offset); | 920 | * Make sure we don't use a read-only iomap |
921 | */ | ||
922 | if (flags == BMAPI_READ) | ||
923 | iomap_valid = 0; | ||
924 | |||
925 | if (buffer_unwritten(bh)) { | ||
926 | type = IOMAP_UNWRITTEN; | ||
927 | flags = BMAPI_WRITE|BMAPI_IGNSTATE; | ||
928 | } else if (buffer_delay(bh)) { | ||
929 | type = IOMAP_DELAY; | ||
930 | flags = BMAPI_ALLOCATE; | ||
931 | if (!startio) | ||
932 | flags |= trylock_flag; | ||
933 | } else { | ||
934 | type = IOMAP_NEW; | ||
935 | flags = BMAPI_WRITE|BMAPI_MMAP; | ||
801 | } | 936 | } |
802 | if (iomp) { | 937 | |
803 | if (!bh->b_end_io) { | 938 | if (!iomap_valid) { |
804 | err = xfs_map_unwritten(inode, page, | 939 | if (type == IOMAP_NEW) { |
805 | head, bh, p_offset, | 940 | size = xfs_probe_cluster(inode, |
806 | inode->i_blkbits, iomp, | 941 | page, bh, head, 0); |
807 | wbc, startio, unmapped); | ||
808 | if (err) { | ||
809 | goto error; | ||
810 | } | ||
811 | } else { | 942 | } else { |
812 | set_bit(BH_Lock, &bh->b_state); | 943 | size = len; |
813 | } | 944 | } |
814 | BUG_ON(!buffer_locked(bh)); | 945 | |
815 | bh_arr[cnt++] = bh; | 946 | err = xfs_map_blocks(inode, offset, size, |
816 | page_dirty--; | 947 | &iomap, flags); |
817 | } | 948 | if (err) |
818 | /* | ||
819 | * Second case, allocate space for a delalloc buffer. | ||
820 | * We can return EAGAIN here in the release page case. | ||
821 | */ | ||
822 | } else if (buffer_delay(bh)) { | ||
823 | if (!iomp) { | ||
824 | err = xfs_map_blocks(inode, offset, len, &iomap, | ||
825 | BMAPI_ALLOCATE | flags); | ||
826 | if (err) { | ||
827 | goto error; | 949 | goto error; |
828 | } | 950 | iomap_valid = xfs_iomap_valid(&iomap, offset); |
829 | iomp = xfs_offset_to_map(page, &iomap, | ||
830 | p_offset); | ||
831 | } | 951 | } |
832 | if (iomp) { | 952 | if (iomap_valid) { |
833 | xfs_map_at_offset(page, bh, p_offset, | 953 | xfs_map_at_offset(bh, offset, |
834 | inode->i_blkbits, iomp); | 954 | inode->i_blkbits, &iomap); |
835 | if (startio) { | 955 | if (startio) { |
836 | bh_arr[cnt++] = bh; | 956 | xfs_add_to_ioend(inode, bh, offset, |
957 | type, &ioend, | ||
958 | !iomap_valid); | ||
837 | } else { | 959 | } else { |
838 | set_buffer_dirty(bh); | 960 | set_buffer_dirty(bh); |
839 | unlock_buffer(bh); | 961 | unlock_buffer(bh); |
840 | mark_buffer_dirty(bh); | 962 | mark_buffer_dirty(bh); |
841 | } | 963 | } |
842 | page_dirty--; | 964 | page_dirty--; |
965 | count++; | ||
966 | } | ||
967 | } else if (buffer_uptodate(bh) && startio) { | ||
968 | /* | ||
969 | * we got here because the buffer is already mapped. | ||
970 | * That means it must already have extents allocated | ||
971 | * underneath it. Map the extent by reading it. | ||
972 | */ | ||
973 | if (!iomap_valid || type != 0) { | ||
974 | flags = BMAPI_READ; | ||
975 | size = xfs_probe_cluster(inode, page, bh, | ||
976 | head, 1); | ||
977 | err = xfs_map_blocks(inode, offset, size, | ||
978 | &iomap, flags); | ||
979 | if (err) | ||
980 | goto error; | ||
981 | iomap_valid = xfs_iomap_valid(&iomap, offset); | ||
843 | } | 982 | } |
844 | } else if ((buffer_uptodate(bh) || PageUptodate(page)) && | ||
845 | (unmapped || startio)) { | ||
846 | 983 | ||
847 | if (!buffer_mapped(bh)) { | 984 | type = 0; |
848 | int size; | 985 | if (!test_and_set_bit(BH_Lock, &bh->b_state)) { |
849 | 986 | ASSERT(buffer_mapped(bh)); | |
850 | /* | 987 | if (iomap_valid) |
851 | * Getting here implies an unmapped buffer | 988 | all_bh = 1; |
852 | * was found, and we are in a path where we | 989 | xfs_add_to_ioend(inode, bh, offset, type, |
853 | * need to write the whole page out. | 990 | &ioend, !iomap_valid); |
854 | */ | 991 | page_dirty--; |
855 | if (!iomp) { | 992 | count++; |
856 | size = xfs_probe_unmapped_cluster( | 993 | } else { |
857 | inode, page, bh, head); | 994 | iomap_valid = 0; |
858 | err = xfs_map_blocks(inode, offset, | ||
859 | size, &iomap, | ||
860 | BMAPI_WRITE|BMAPI_MMAP); | ||
861 | if (err) { | ||
862 | goto error; | ||
863 | } | ||
864 | iomp = xfs_offset_to_map(page, &iomap, | ||
865 | p_offset); | ||
866 | } | ||
867 | if (iomp) { | ||
868 | xfs_map_at_offset(page, | ||
869 | bh, p_offset, | ||
870 | inode->i_blkbits, iomp); | ||
871 | if (startio) { | ||
872 | bh_arr[cnt++] = bh; | ||
873 | } else { | ||
874 | set_buffer_dirty(bh); | ||
875 | unlock_buffer(bh); | ||
876 | mark_buffer_dirty(bh); | ||
877 | } | ||
878 | page_dirty--; | ||
879 | } | ||
880 | } else if (startio) { | ||
881 | if (buffer_uptodate(bh) && | ||
882 | !test_and_set_bit(BH_Lock, &bh->b_state)) { | ||
883 | bh_arr[cnt++] = bh; | ||
884 | page_dirty--; | ||
885 | } | ||
886 | } | 995 | } |
996 | } else if ((buffer_uptodate(bh) || PageUptodate(page)) && | ||
997 | (unmapped || startio)) { | ||
998 | iomap_valid = 0; | ||
887 | } | 999 | } |
888 | } while (offset += len, p_offset += len, | 1000 | |
889 | ((bh = bh->b_this_page) != head)); | 1001 | if (!iohead) |
1002 | iohead = ioend; | ||
1003 | |||
1004 | } while (offset += len, ((bh = bh->b_this_page) != head)); | ||
890 | 1005 | ||
891 | if (uptodate && bh == head) | 1006 | if (uptodate && bh == head) |
892 | SetPageUptodate(page); | 1007 | SetPageUptodate(page); |
893 | 1008 | ||
894 | if (startio) { | 1009 | if (startio) |
895 | xfs_submit_page(page, wbc, bh_arr, cnt, 0, !page_dirty); | 1010 | xfs_start_page_writeback(page, wbc, 1, count); |
896 | } | ||
897 | 1011 | ||
898 | if (iomp) { | 1012 | if (ioend && iomap_valid) { |
899 | offset = (iomp->iomap_offset + iomp->iomap_bsize - 1) >> | 1013 | offset = (iomap.iomap_offset + iomap.iomap_bsize - 1) >> |
900 | PAGE_CACHE_SHIFT; | 1014 | PAGE_CACHE_SHIFT; |
901 | tlast = min_t(pgoff_t, offset, last_index); | 1015 | tlast = min_t(pgoff_t, offset, last_index); |
902 | xfs_cluster_write(inode, page->index + 1, iomp, wbc, | 1016 | xfs_cluster_write(inode, page->index + 1, &iomap, &ioend, |
903 | startio, unmapped, tlast); | 1017 | wbc, startio, all_bh, tlast); |
904 | } | 1018 | } |
905 | 1019 | ||
1020 | if (iohead) | ||
1021 | xfs_submit_ioend(iohead); | ||
1022 | |||
906 | return page_dirty; | 1023 | return page_dirty; |
907 | 1024 | ||
908 | error: | 1025 | error: |
909 | for (i = 0; i < cnt; i++) { | 1026 | if (iohead) |
910 | unlock_buffer(bh_arr[i]); | 1027 | xfs_cancel_ioend(iohead); |
911 | } | ||
912 | 1028 | ||
913 | /* | 1029 | /* |
914 | * If it's delalloc and we have nowhere to put it, | 1030 | * If it's delalloc and we have nowhere to put it, |
@@ -916,9 +1032,8 @@ error: | |||
916 | * us to try again. | 1032 | * us to try again. |
917 | */ | 1033 | */ |
918 | if (err != -EAGAIN) { | 1034 | if (err != -EAGAIN) { |
919 | if (!unmapped) { | 1035 | if (!unmapped) |
920 | block_invalidatepage(page, 0); | 1036 | block_invalidatepage(page, 0); |
921 | } | ||
922 | ClearPageUptodate(page); | 1037 | ClearPageUptodate(page); |
923 | } | 1038 | } |
924 | return err; | 1039 | return err; |
@@ -982,7 +1097,7 @@ __linvfs_get_block( | |||
982 | } | 1097 | } |
983 | 1098 | ||
984 | /* If this is a realtime file, data might be on a new device */ | 1099 | /* If this is a realtime file, data might be on a new device */ |
985 | bh_result->b_bdev = iomap.iomap_target->pbr_bdev; | 1100 | bh_result->b_bdev = iomap.iomap_target->bt_bdev; |
986 | 1101 | ||
987 | /* If we previously allocated a block out beyond eof and | 1102 | /* If we previously allocated a block out beyond eof and |
988 | * we are now coming back to use it then we will need to | 1103 | * we are now coming back to use it then we will need to |
@@ -1094,10 +1209,10 @@ linvfs_direct_IO( | |||
1094 | if (error) | 1209 | if (error) |
1095 | return -error; | 1210 | return -error; |
1096 | 1211 | ||
1097 | iocb->private = xfs_alloc_ioend(inode); | 1212 | iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN); |
1098 | 1213 | ||
1099 | ret = blockdev_direct_IO_own_locking(rw, iocb, inode, | 1214 | ret = blockdev_direct_IO_own_locking(rw, iocb, inode, |
1100 | iomap.iomap_target->pbr_bdev, | 1215 | iomap.iomap_target->bt_bdev, |
1101 | iov, offset, nr_segs, | 1216 | iov, offset, nr_segs, |
1102 | linvfs_get_blocks_direct, | 1217 | linvfs_get_blocks_direct, |
1103 | linvfs_end_io_direct); | 1218 | linvfs_end_io_direct); |