diff options
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_aops.c')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_aops.c | 1088 |
1 files changed, 590 insertions, 498 deletions
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 94d3cdfbf9b8..d1db8c17a74e 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -40,11 +40,10 @@ | |||
40 | #include "xfs_rw.h" | 40 | #include "xfs_rw.h" |
41 | #include "xfs_iomap.h" | 41 | #include "xfs_iomap.h" |
42 | #include <linux/mpage.h> | 42 | #include <linux/mpage.h> |
43 | #include <linux/pagevec.h> | ||
43 | #include <linux/writeback.h> | 44 | #include <linux/writeback.h> |
44 | 45 | ||
45 | STATIC void xfs_count_page_state(struct page *, int *, int *, int *); | 46 | STATIC void xfs_count_page_state(struct page *, int *, int *, int *); |
46 | STATIC void xfs_convert_page(struct inode *, struct page *, xfs_iomap_t *, | ||
47 | struct writeback_control *wbc, void *, int, int); | ||
48 | 47 | ||
49 | #if defined(XFS_RW_TRACE) | 48 | #if defined(XFS_RW_TRACE) |
50 | void | 49 | void |
@@ -55,17 +54,15 @@ xfs_page_trace( | |||
55 | int mask) | 54 | int mask) |
56 | { | 55 | { |
57 | xfs_inode_t *ip; | 56 | xfs_inode_t *ip; |
58 | bhv_desc_t *bdp; | ||
59 | vnode_t *vp = LINVFS_GET_VP(inode); | 57 | vnode_t *vp = LINVFS_GET_VP(inode); |
60 | loff_t isize = i_size_read(inode); | 58 | loff_t isize = i_size_read(inode); |
61 | loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT; | 59 | loff_t offset = page_offset(page); |
62 | int delalloc = -1, unmapped = -1, unwritten = -1; | 60 | int delalloc = -1, unmapped = -1, unwritten = -1; |
63 | 61 | ||
64 | if (page_has_buffers(page)) | 62 | if (page_has_buffers(page)) |
65 | xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); | 63 | xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); |
66 | 64 | ||
67 | bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops); | 65 | ip = xfs_vtoi(vp); |
68 | ip = XFS_BHVTOI(bdp); | ||
69 | if (!ip->i_rwtrace) | 66 | if (!ip->i_rwtrace) |
70 | return; | 67 | return; |
71 | 68 | ||
@@ -103,15 +100,56 @@ xfs_finish_ioend( | |||
103 | queue_work(xfsdatad_workqueue, &ioend->io_work); | 100 | queue_work(xfsdatad_workqueue, &ioend->io_work); |
104 | } | 101 | } |
105 | 102 | ||
103 | /* | ||
104 | * We're now finished for good with this ioend structure. | ||
105 | * Update the page state via the associated buffer_heads, | ||
106 | * release holds on the inode and bio, and finally free | ||
107 | * up memory. Do not use the ioend after this. | ||
108 | */ | ||
106 | STATIC void | 109 | STATIC void |
107 | xfs_destroy_ioend( | 110 | xfs_destroy_ioend( |
108 | xfs_ioend_t *ioend) | 111 | xfs_ioend_t *ioend) |
109 | { | 112 | { |
113 | struct buffer_head *bh, *next; | ||
114 | |||
115 | for (bh = ioend->io_buffer_head; bh; bh = next) { | ||
116 | next = bh->b_private; | ||
117 | bh->b_end_io(bh, ioend->io_uptodate); | ||
118 | } | ||
119 | |||
110 | vn_iowake(ioend->io_vnode); | 120 | vn_iowake(ioend->io_vnode); |
111 | mempool_free(ioend, xfs_ioend_pool); | 121 | mempool_free(ioend, xfs_ioend_pool); |
112 | } | 122 | } |
113 | 123 | ||
114 | /* | 124 | /* |
125 | * Buffered IO write completion for delayed allocate extents. | ||
126 | * TODO: Update ondisk isize now that we know the file data | ||
127 | * has been flushed (i.e. the notorious "NULL file" problem). | ||
128 | */ | ||
129 | STATIC void | ||
130 | xfs_end_bio_delalloc( | ||
131 | void *data) | ||
132 | { | ||
133 | xfs_ioend_t *ioend = data; | ||
134 | |||
135 | xfs_destroy_ioend(ioend); | ||
136 | } | ||
137 | |||
138 | /* | ||
139 | * Buffered IO write completion for regular, written extents. | ||
140 | */ | ||
141 | STATIC void | ||
142 | xfs_end_bio_written( | ||
143 | void *data) | ||
144 | { | ||
145 | xfs_ioend_t *ioend = data; | ||
146 | |||
147 | xfs_destroy_ioend(ioend); | ||
148 | } | ||
149 | |||
150 | /* | ||
151 | * IO write completion for unwritten extents. | ||
152 | * | ||
115 | * Issue transactions to convert a buffer range from unwritten | 153 | * Issue transactions to convert a buffer range from unwritten |
116 | * to written extents. | 154 | * to written extents. |
117 | */ | 155 | */ |
@@ -123,21 +161,10 @@ xfs_end_bio_unwritten( | |||
123 | vnode_t *vp = ioend->io_vnode; | 161 | vnode_t *vp = ioend->io_vnode; |
124 | xfs_off_t offset = ioend->io_offset; | 162 | xfs_off_t offset = ioend->io_offset; |
125 | size_t size = ioend->io_size; | 163 | size_t size = ioend->io_size; |
126 | struct buffer_head *bh, *next; | ||
127 | int error; | 164 | int error; |
128 | 165 | ||
129 | if (ioend->io_uptodate) | 166 | if (ioend->io_uptodate) |
130 | VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error); | 167 | VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error); |
131 | |||
132 | /* ioend->io_buffer_head is only non-NULL for buffered I/O */ | ||
133 | for (bh = ioend->io_buffer_head; bh; bh = next) { | ||
134 | next = bh->b_private; | ||
135 | |||
136 | bh->b_end_io = NULL; | ||
137 | clear_buffer_unwritten(bh); | ||
138 | end_buffer_async_write(bh, ioend->io_uptodate); | ||
139 | } | ||
140 | |||
141 | xfs_destroy_ioend(ioend); | 168 | xfs_destroy_ioend(ioend); |
142 | } | 169 | } |
143 | 170 | ||
@@ -149,7 +176,8 @@ xfs_end_bio_unwritten( | |||
149 | */ | 176 | */ |
150 | STATIC xfs_ioend_t * | 177 | STATIC xfs_ioend_t * |
151 | xfs_alloc_ioend( | 178 | xfs_alloc_ioend( |
152 | struct inode *inode) | 179 | struct inode *inode, |
180 | unsigned int type) | ||
153 | { | 181 | { |
154 | xfs_ioend_t *ioend; | 182 | xfs_ioend_t *ioend; |
155 | 183 | ||
@@ -162,45 +190,25 @@ xfs_alloc_ioend( | |||
162 | */ | 190 | */ |
163 | atomic_set(&ioend->io_remaining, 1); | 191 | atomic_set(&ioend->io_remaining, 1); |
164 | ioend->io_uptodate = 1; /* cleared if any I/O fails */ | 192 | ioend->io_uptodate = 1; /* cleared if any I/O fails */ |
193 | ioend->io_list = NULL; | ||
194 | ioend->io_type = type; | ||
165 | ioend->io_vnode = LINVFS_GET_VP(inode); | 195 | ioend->io_vnode = LINVFS_GET_VP(inode); |
166 | ioend->io_buffer_head = NULL; | 196 | ioend->io_buffer_head = NULL; |
197 | ioend->io_buffer_tail = NULL; | ||
167 | atomic_inc(&ioend->io_vnode->v_iocount); | 198 | atomic_inc(&ioend->io_vnode->v_iocount); |
168 | ioend->io_offset = 0; | 199 | ioend->io_offset = 0; |
169 | ioend->io_size = 0; | 200 | ioend->io_size = 0; |
170 | 201 | ||
171 | INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend); | 202 | if (type == IOMAP_UNWRITTEN) |
203 | INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend); | ||
204 | else if (type == IOMAP_DELAY) | ||
205 | INIT_WORK(&ioend->io_work, xfs_end_bio_delalloc, ioend); | ||
206 | else | ||
207 | INIT_WORK(&ioend->io_work, xfs_end_bio_written, ioend); | ||
172 | 208 | ||
173 | return ioend; | 209 | return ioend; |
174 | } | 210 | } |
175 | 211 | ||
176 | void | ||
177 | linvfs_unwritten_done( | ||
178 | struct buffer_head *bh, | ||
179 | int uptodate) | ||
180 | { | ||
181 | xfs_ioend_t *ioend = bh->b_private; | ||
182 | static spinlock_t unwritten_done_lock = SPIN_LOCK_UNLOCKED; | ||
183 | unsigned long flags; | ||
184 | |||
185 | ASSERT(buffer_unwritten(bh)); | ||
186 | bh->b_end_io = NULL; | ||
187 | |||
188 | if (!uptodate) | ||
189 | ioend->io_uptodate = 0; | ||
190 | |||
191 | /* | ||
192 | * Deep magic here. We reuse b_private in the buffer_heads to build | ||
193 | * a chain for completing the I/O from user context after we've issued | ||
194 | * a transaction to convert the unwritten extent. | ||
195 | */ | ||
196 | spin_lock_irqsave(&unwritten_done_lock, flags); | ||
197 | bh->b_private = ioend->io_buffer_head; | ||
198 | ioend->io_buffer_head = bh; | ||
199 | spin_unlock_irqrestore(&unwritten_done_lock, flags); | ||
200 | |||
201 | xfs_finish_ioend(ioend); | ||
202 | } | ||
203 | |||
204 | STATIC int | 212 | STATIC int |
205 | xfs_map_blocks( | 213 | xfs_map_blocks( |
206 | struct inode *inode, | 214 | struct inode *inode, |
@@ -218,138 +226,260 @@ xfs_map_blocks( | |||
218 | return -error; | 226 | return -error; |
219 | } | 227 | } |
220 | 228 | ||
229 | STATIC inline int | ||
230 | xfs_iomap_valid( | ||
231 | xfs_iomap_t *iomapp, | ||
232 | loff_t offset) | ||
233 | { | ||
234 | return offset >= iomapp->iomap_offset && | ||
235 | offset < iomapp->iomap_offset + iomapp->iomap_bsize; | ||
236 | } | ||
237 | |||
221 | /* | 238 | /* |
222 | * Finds the corresponding mapping in block @map array of the | 239 | * BIO completion handler for buffered IO. |
223 | * given @offset within a @page. | ||
224 | */ | 240 | */ |
225 | STATIC xfs_iomap_t * | 241 | STATIC int |
226 | xfs_offset_to_map( | 242 | xfs_end_bio( |
243 | struct bio *bio, | ||
244 | unsigned int bytes_done, | ||
245 | int error) | ||
246 | { | ||
247 | xfs_ioend_t *ioend = bio->bi_private; | ||
248 | |||
249 | if (bio->bi_size) | ||
250 | return 1; | ||
251 | |||
252 | ASSERT(ioend); | ||
253 | ASSERT(atomic_read(&bio->bi_cnt) >= 1); | ||
254 | |||
255 | /* Toss bio and pass work off to an xfsdatad thread */ | ||
256 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) | ||
257 | ioend->io_uptodate = 0; | ||
258 | bio->bi_private = NULL; | ||
259 | bio->bi_end_io = NULL; | ||
260 | |||
261 | bio_put(bio); | ||
262 | xfs_finish_ioend(ioend); | ||
263 | return 0; | ||
264 | } | ||
265 | |||
266 | STATIC void | ||
267 | xfs_submit_ioend_bio( | ||
268 | xfs_ioend_t *ioend, | ||
269 | struct bio *bio) | ||
270 | { | ||
271 | atomic_inc(&ioend->io_remaining); | ||
272 | |||
273 | bio->bi_private = ioend; | ||
274 | bio->bi_end_io = xfs_end_bio; | ||
275 | |||
276 | submit_bio(WRITE, bio); | ||
277 | ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP)); | ||
278 | bio_put(bio); | ||
279 | } | ||
280 | |||
281 | STATIC struct bio * | ||
282 | xfs_alloc_ioend_bio( | ||
283 | struct buffer_head *bh) | ||
284 | { | ||
285 | struct bio *bio; | ||
286 | int nvecs = bio_get_nr_vecs(bh->b_bdev); | ||
287 | |||
288 | do { | ||
289 | bio = bio_alloc(GFP_NOIO, nvecs); | ||
290 | nvecs >>= 1; | ||
291 | } while (!bio); | ||
292 | |||
293 | ASSERT(bio->bi_private == NULL); | ||
294 | bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); | ||
295 | bio->bi_bdev = bh->b_bdev; | ||
296 | bio_get(bio); | ||
297 | return bio; | ||
298 | } | ||
299 | |||
300 | STATIC void | ||
301 | xfs_start_buffer_writeback( | ||
302 | struct buffer_head *bh) | ||
303 | { | ||
304 | ASSERT(buffer_mapped(bh)); | ||
305 | ASSERT(buffer_locked(bh)); | ||
306 | ASSERT(!buffer_delay(bh)); | ||
307 | ASSERT(!buffer_unwritten(bh)); | ||
308 | |||
309 | mark_buffer_async_write(bh); | ||
310 | set_buffer_uptodate(bh); | ||
311 | clear_buffer_dirty(bh); | ||
312 | } | ||
313 | |||
314 | STATIC void | ||
315 | xfs_start_page_writeback( | ||
227 | struct page *page, | 316 | struct page *page, |
228 | xfs_iomap_t *iomapp, | 317 | struct writeback_control *wbc, |
229 | unsigned long offset) | 318 | int clear_dirty, |
319 | int buffers) | ||
320 | { | ||
321 | ASSERT(PageLocked(page)); | ||
322 | ASSERT(!PageWriteback(page)); | ||
323 | set_page_writeback(page); | ||
324 | if (clear_dirty) | ||
325 | clear_page_dirty(page); | ||
326 | unlock_page(page); | ||
327 | if (!buffers) { | ||
328 | end_page_writeback(page); | ||
329 | wbc->pages_skipped++; /* We didn't write this page */ | ||
330 | } | ||
331 | } | ||
332 | |||
333 | static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh) | ||
334 | { | ||
335 | return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); | ||
336 | } | ||
337 | |||
338 | /* | ||
339 | * Submit all of the bios for all of the ioends we have saved up, | ||
340 | * covering the initial writepage page and also any probed pages. | ||
341 | */ | ||
342 | STATIC void | ||
343 | xfs_submit_ioend( | ||
344 | xfs_ioend_t *ioend) | ||
345 | { | ||
346 | xfs_ioend_t *next; | ||
347 | struct buffer_head *bh; | ||
348 | struct bio *bio; | ||
349 | sector_t lastblock = 0; | ||
350 | |||
351 | do { | ||
352 | next = ioend->io_list; | ||
353 | bio = NULL; | ||
354 | |||
355 | for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { | ||
356 | xfs_start_buffer_writeback(bh); | ||
357 | |||
358 | if (!bio) { | ||
359 | retry: | ||
360 | bio = xfs_alloc_ioend_bio(bh); | ||
361 | } else if (bh->b_blocknr != lastblock + 1) { | ||
362 | xfs_submit_ioend_bio(ioend, bio); | ||
363 | goto retry; | ||
364 | } | ||
365 | |||
366 | if (bio_add_buffer(bio, bh) != bh->b_size) { | ||
367 | xfs_submit_ioend_bio(ioend, bio); | ||
368 | goto retry; | ||
369 | } | ||
370 | |||
371 | lastblock = bh->b_blocknr; | ||
372 | } | ||
373 | if (bio) | ||
374 | xfs_submit_ioend_bio(ioend, bio); | ||
375 | xfs_finish_ioend(ioend); | ||
376 | } while ((ioend = next) != NULL); | ||
377 | } | ||
378 | |||
379 | /* | ||
380 | * Cancel submission of all buffer_heads so far in this endio. | ||
381 | * Toss the endio too. Only ever called for the initial page | ||
382 | * in a writepage request, so only ever one page. | ||
383 | */ | ||
384 | STATIC void | ||
385 | xfs_cancel_ioend( | ||
386 | xfs_ioend_t *ioend) | ||
387 | { | ||
388 | xfs_ioend_t *next; | ||
389 | struct buffer_head *bh, *next_bh; | ||
390 | |||
391 | do { | ||
392 | next = ioend->io_list; | ||
393 | bh = ioend->io_buffer_head; | ||
394 | do { | ||
395 | next_bh = bh->b_private; | ||
396 | clear_buffer_async_write(bh); | ||
397 | unlock_buffer(bh); | ||
398 | } while ((bh = next_bh) != NULL); | ||
399 | |||
400 | vn_iowake(ioend->io_vnode); | ||
401 | mempool_free(ioend, xfs_ioend_pool); | ||
402 | } while ((ioend = next) != NULL); | ||
403 | } | ||
404 | |||
405 | /* | ||
406 | * Test to see if we've been building up a completion structure for | ||
407 | * earlier buffers -- if so, we try to append to this ioend if we | ||
408 | * can, otherwise we finish off any current ioend and start another. | ||
409 | * Return true if we've finished the given ioend. | ||
410 | */ | ||
411 | STATIC void | ||
412 | xfs_add_to_ioend( | ||
413 | struct inode *inode, | ||
414 | struct buffer_head *bh, | ||
415 | xfs_off_t offset, | ||
416 | unsigned int type, | ||
417 | xfs_ioend_t **result, | ||
418 | int need_ioend) | ||
230 | { | 419 | { |
231 | loff_t full_offset; /* offset from start of file */ | 420 | xfs_ioend_t *ioend = *result; |
232 | 421 | ||
233 | ASSERT(offset < PAGE_CACHE_SIZE); | 422 | if (!ioend || need_ioend || type != ioend->io_type) { |
423 | xfs_ioend_t *previous = *result; | ||
234 | 424 | ||
235 | full_offset = page->index; /* NB: using 64bit number */ | 425 | ioend = xfs_alloc_ioend(inode, type); |
236 | full_offset <<= PAGE_CACHE_SHIFT; /* offset from file start */ | 426 | ioend->io_offset = offset; |
237 | full_offset += offset; /* offset from page start */ | 427 | ioend->io_buffer_head = bh; |
428 | ioend->io_buffer_tail = bh; | ||
429 | if (previous) | ||
430 | previous->io_list = ioend; | ||
431 | *result = ioend; | ||
432 | } else { | ||
433 | ioend->io_buffer_tail->b_private = bh; | ||
434 | ioend->io_buffer_tail = bh; | ||
435 | } | ||
238 | 436 | ||
239 | if (full_offset < iomapp->iomap_offset) | 437 | bh->b_private = NULL; |
240 | return NULL; | 438 | ioend->io_size += bh->b_size; |
241 | if (iomapp->iomap_offset + (iomapp->iomap_bsize -1) >= full_offset) | ||
242 | return iomapp; | ||
243 | return NULL; | ||
244 | } | 439 | } |
245 | 440 | ||
246 | STATIC void | 441 | STATIC void |
247 | xfs_map_at_offset( | 442 | xfs_map_at_offset( |
248 | struct page *page, | ||
249 | struct buffer_head *bh, | 443 | struct buffer_head *bh, |
250 | unsigned long offset, | 444 | loff_t offset, |
251 | int block_bits, | 445 | int block_bits, |
252 | xfs_iomap_t *iomapp) | 446 | xfs_iomap_t *iomapp) |
253 | { | 447 | { |
254 | xfs_daddr_t bn; | 448 | xfs_daddr_t bn; |
255 | loff_t delta; | ||
256 | int sector_shift; | 449 | int sector_shift; |
257 | 450 | ||
258 | ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE)); | 451 | ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE)); |
259 | ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY)); | 452 | ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY)); |
260 | ASSERT(iomapp->iomap_bn != IOMAP_DADDR_NULL); | 453 | ASSERT(iomapp->iomap_bn != IOMAP_DADDR_NULL); |
261 | 454 | ||
262 | delta = page->index; | ||
263 | delta <<= PAGE_CACHE_SHIFT; | ||
264 | delta += offset; | ||
265 | delta -= iomapp->iomap_offset; | ||
266 | delta >>= block_bits; | ||
267 | |||
268 | sector_shift = block_bits - BBSHIFT; | 455 | sector_shift = block_bits - BBSHIFT; |
269 | bn = iomapp->iomap_bn >> sector_shift; | 456 | bn = (iomapp->iomap_bn >> sector_shift) + |
270 | bn += delta; | 457 | ((offset - iomapp->iomap_offset) >> block_bits); |
271 | BUG_ON(!bn && !(iomapp->iomap_flags & IOMAP_REALTIME)); | 458 | |
459 | ASSERT(bn || (iomapp->iomap_flags & IOMAP_REALTIME)); | ||
272 | ASSERT((bn << sector_shift) >= iomapp->iomap_bn); | 460 | ASSERT((bn << sector_shift) >= iomapp->iomap_bn); |
273 | 461 | ||
274 | lock_buffer(bh); | 462 | lock_buffer(bh); |
275 | bh->b_blocknr = bn; | 463 | bh->b_blocknr = bn; |
276 | bh->b_bdev = iomapp->iomap_target->pbr_bdev; | 464 | bh->b_bdev = iomapp->iomap_target->bt_bdev; |
277 | set_buffer_mapped(bh); | 465 | set_buffer_mapped(bh); |
278 | clear_buffer_delay(bh); | 466 | clear_buffer_delay(bh); |
467 | clear_buffer_unwritten(bh); | ||
279 | } | 468 | } |
280 | 469 | ||
281 | /* | 470 | /* |
282 | * Look for a page at index which is unlocked and contains our | 471 | * Look for a page at index that is suitable for clustering. |
283 | * unwritten extent flagged buffers at its head. Returns page | ||
284 | * locked and with an extra reference count, and length of the | ||
285 | * unwritten extent component on this page that we can write, | ||
286 | * in units of filesystem blocks. | ||
287 | */ | ||
288 | STATIC struct page * | ||
289 | xfs_probe_unwritten_page( | ||
290 | struct address_space *mapping, | ||
291 | pgoff_t index, | ||
292 | xfs_iomap_t *iomapp, | ||
293 | xfs_ioend_t *ioend, | ||
294 | unsigned long max_offset, | ||
295 | unsigned long *fsbs, | ||
296 | unsigned int bbits) | ||
297 | { | ||
298 | struct page *page; | ||
299 | |||
300 | page = find_trylock_page(mapping, index); | ||
301 | if (!page) | ||
302 | return NULL; | ||
303 | if (PageWriteback(page)) | ||
304 | goto out; | ||
305 | |||
306 | if (page->mapping && page_has_buffers(page)) { | ||
307 | struct buffer_head *bh, *head; | ||
308 | unsigned long p_offset = 0; | ||
309 | |||
310 | *fsbs = 0; | ||
311 | bh = head = page_buffers(page); | ||
312 | do { | ||
313 | if (!buffer_unwritten(bh) || !buffer_uptodate(bh)) | ||
314 | break; | ||
315 | if (!xfs_offset_to_map(page, iomapp, p_offset)) | ||
316 | break; | ||
317 | if (p_offset >= max_offset) | ||
318 | break; | ||
319 | xfs_map_at_offset(page, bh, p_offset, bbits, iomapp); | ||
320 | set_buffer_unwritten_io(bh); | ||
321 | bh->b_private = ioend; | ||
322 | p_offset += bh->b_size; | ||
323 | (*fsbs)++; | ||
324 | } while ((bh = bh->b_this_page) != head); | ||
325 | |||
326 | if (p_offset) | ||
327 | return page; | ||
328 | } | ||
329 | |||
330 | out: | ||
331 | unlock_page(page); | ||
332 | return NULL; | ||
333 | } | ||
334 | |||
335 | /* | ||
336 | * Look for a page at index which is unlocked and not mapped | ||
337 | * yet - clustering for mmap write case. | ||
338 | */ | 472 | */ |
339 | STATIC unsigned int | 473 | STATIC unsigned int |
340 | xfs_probe_unmapped_page( | 474 | xfs_probe_page( |
341 | struct address_space *mapping, | 475 | struct page *page, |
342 | pgoff_t index, | 476 | unsigned int pg_offset, |
343 | unsigned int pg_offset) | 477 | int mapped) |
344 | { | 478 | { |
345 | struct page *page; | ||
346 | int ret = 0; | 479 | int ret = 0; |
347 | 480 | ||
348 | page = find_trylock_page(mapping, index); | ||
349 | if (!page) | ||
350 | return 0; | ||
351 | if (PageWriteback(page)) | 481 | if (PageWriteback(page)) |
352 | goto out; | 482 | return 0; |
353 | 483 | ||
354 | if (page->mapping && PageDirty(page)) { | 484 | if (page->mapping && PageDirty(page)) { |
355 | if (page_has_buffers(page)) { | 485 | if (page_has_buffers(page)) { |
@@ -357,79 +487,101 @@ xfs_probe_unmapped_page( | |||
357 | 487 | ||
358 | bh = head = page_buffers(page); | 488 | bh = head = page_buffers(page); |
359 | do { | 489 | do { |
360 | if (buffer_mapped(bh) || !buffer_uptodate(bh)) | 490 | if (!buffer_uptodate(bh)) |
491 | break; | ||
492 | if (mapped != buffer_mapped(bh)) | ||
361 | break; | 493 | break; |
362 | ret += bh->b_size; | 494 | ret += bh->b_size; |
363 | if (ret >= pg_offset) | 495 | if (ret >= pg_offset) |
364 | break; | 496 | break; |
365 | } while ((bh = bh->b_this_page) != head); | 497 | } while ((bh = bh->b_this_page) != head); |
366 | } else | 498 | } else |
367 | ret = PAGE_CACHE_SIZE; | 499 | ret = mapped ? 0 : PAGE_CACHE_SIZE; |
368 | } | 500 | } |
369 | 501 | ||
370 | out: | ||
371 | unlock_page(page); | ||
372 | return ret; | 502 | return ret; |
373 | } | 503 | } |
374 | 504 | ||
375 | STATIC unsigned int | 505 | STATIC size_t |
376 | xfs_probe_unmapped_cluster( | 506 | xfs_probe_cluster( |
377 | struct inode *inode, | 507 | struct inode *inode, |
378 | struct page *startpage, | 508 | struct page *startpage, |
379 | struct buffer_head *bh, | 509 | struct buffer_head *bh, |
380 | struct buffer_head *head) | 510 | struct buffer_head *head, |
511 | int mapped) | ||
381 | { | 512 | { |
513 | struct pagevec pvec; | ||
382 | pgoff_t tindex, tlast, tloff; | 514 | pgoff_t tindex, tlast, tloff; |
383 | unsigned int pg_offset, len, total = 0; | 515 | size_t total = 0; |
384 | struct address_space *mapping = inode->i_mapping; | 516 | int done = 0, i; |
385 | 517 | ||
386 | /* First sum forwards in this page */ | 518 | /* First sum forwards in this page */ |
387 | do { | 519 | do { |
388 | if (buffer_mapped(bh)) | 520 | if (mapped != buffer_mapped(bh)) |
389 | break; | 521 | return total; |
390 | total += bh->b_size; | 522 | total += bh->b_size; |
391 | } while ((bh = bh->b_this_page) != head); | 523 | } while ((bh = bh->b_this_page) != head); |
392 | 524 | ||
393 | /* If we reached the end of the page, sum forwards in | 525 | /* if we reached the end of the page, sum forwards in following pages */ |
394 | * following pages. | 526 | tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT; |
395 | */ | 527 | tindex = startpage->index + 1; |
396 | if (bh == head) { | 528 | |
397 | tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT; | 529 | /* Prune this back to avoid pathological behavior */ |
398 | /* Prune this back to avoid pathological behavior */ | 530 | tloff = min(tlast, startpage->index + 64); |
399 | tloff = min(tlast, startpage->index + 64); | 531 | |
400 | for (tindex = startpage->index + 1; tindex < tloff; tindex++) { | 532 | pagevec_init(&pvec, 0); |
401 | len = xfs_probe_unmapped_page(mapping, tindex, | 533 | while (!done && tindex <= tloff) { |
402 | PAGE_CACHE_SIZE); | 534 | unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1); |
403 | if (!len) | 535 | |
404 | return total; | 536 | if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len)) |
537 | break; | ||
538 | |||
539 | for (i = 0; i < pagevec_count(&pvec); i++) { | ||
540 | struct page *page = pvec.pages[i]; | ||
541 | size_t pg_offset, len = 0; | ||
542 | |||
543 | if (tindex == tlast) { | ||
544 | pg_offset = | ||
545 | i_size_read(inode) & (PAGE_CACHE_SIZE - 1); | ||
546 | if (!pg_offset) { | ||
547 | done = 1; | ||
548 | break; | ||
549 | } | ||
550 | } else | ||
551 | pg_offset = PAGE_CACHE_SIZE; | ||
552 | |||
553 | if (page->index == tindex && !TestSetPageLocked(page)) { | ||
554 | len = xfs_probe_page(page, pg_offset, mapped); | ||
555 | unlock_page(page); | ||
556 | } | ||
557 | |||
558 | if (!len) { | ||
559 | done = 1; | ||
560 | break; | ||
561 | } | ||
562 | |||
405 | total += len; | 563 | total += len; |
564 | tindex++; | ||
406 | } | 565 | } |
407 | if (tindex == tlast && | 566 | |
408 | (pg_offset = i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) { | 567 | pagevec_release(&pvec); |
409 | total += xfs_probe_unmapped_page(mapping, | 568 | cond_resched(); |
410 | tindex, pg_offset); | ||
411 | } | ||
412 | } | 569 | } |
570 | |||
413 | return total; | 571 | return total; |
414 | } | 572 | } |
415 | 573 | ||
416 | /* | 574 | /* |
417 | * Probe for a given page (index) in the inode and test if it is delayed | 575 | * Test if a given page is suitable for writing as part of an unwritten |
418 | * and without unwritten buffers. Returns page locked and with an extra | 576 | * or delayed allocate extent. |
419 | * reference count. | ||
420 | */ | 577 | */ |
421 | STATIC struct page * | 578 | STATIC int |
422 | xfs_probe_delalloc_page( | 579 | xfs_is_delayed_page( |
423 | struct inode *inode, | 580 | struct page *page, |
424 | pgoff_t index) | 581 | unsigned int type) |
425 | { | 582 | { |
426 | struct page *page; | ||
427 | |||
428 | page = find_trylock_page(inode->i_mapping, index); | ||
429 | if (!page) | ||
430 | return NULL; | ||
431 | if (PageWriteback(page)) | 583 | if (PageWriteback(page)) |
432 | goto out; | 584 | return 0; |
433 | 585 | ||
434 | if (page->mapping && page_has_buffers(page)) { | 586 | if (page->mapping && page_has_buffers(page)) { |
435 | struct buffer_head *bh, *head; | 587 | struct buffer_head *bh, *head; |
@@ -437,243 +589,156 @@ xfs_probe_delalloc_page( | |||
437 | 589 | ||
438 | bh = head = page_buffers(page); | 590 | bh = head = page_buffers(page); |
439 | do { | 591 | do { |
440 | if (buffer_unwritten(bh)) { | 592 | if (buffer_unwritten(bh)) |
441 | acceptable = 0; | 593 | acceptable = (type == IOMAP_UNWRITTEN); |
594 | else if (buffer_delay(bh)) | ||
595 | acceptable = (type == IOMAP_DELAY); | ||
596 | else if (buffer_mapped(bh)) | ||
597 | acceptable = (type == 0); | ||
598 | else | ||
442 | break; | 599 | break; |
443 | } else if (buffer_delay(bh)) { | ||
444 | acceptable = 1; | ||
445 | } | ||
446 | } while ((bh = bh->b_this_page) != head); | 600 | } while ((bh = bh->b_this_page) != head); |
447 | 601 | ||
448 | if (acceptable) | 602 | if (acceptable) |
449 | return page; | 603 | return 1; |
450 | } | ||
451 | |||
452 | out: | ||
453 | unlock_page(page); | ||
454 | return NULL; | ||
455 | } | ||
456 | |||
457 | STATIC int | ||
458 | xfs_map_unwritten( | ||
459 | struct inode *inode, | ||
460 | struct page *start_page, | ||
461 | struct buffer_head *head, | ||
462 | struct buffer_head *curr, | ||
463 | unsigned long p_offset, | ||
464 | int block_bits, | ||
465 | xfs_iomap_t *iomapp, | ||
466 | struct writeback_control *wbc, | ||
467 | int startio, | ||
468 | int all_bh) | ||
469 | { | ||
470 | struct buffer_head *bh = curr; | ||
471 | xfs_iomap_t *tmp; | ||
472 | xfs_ioend_t *ioend; | ||
473 | loff_t offset; | ||
474 | unsigned long nblocks = 0; | ||
475 | |||
476 | offset = start_page->index; | ||
477 | offset <<= PAGE_CACHE_SHIFT; | ||
478 | offset += p_offset; | ||
479 | |||
480 | ioend = xfs_alloc_ioend(inode); | ||
481 | |||
482 | /* First map forwards in the page consecutive buffers | ||
483 | * covering this unwritten extent | ||
484 | */ | ||
485 | do { | ||
486 | if (!buffer_unwritten(bh)) | ||
487 | break; | ||
488 | tmp = xfs_offset_to_map(start_page, iomapp, p_offset); | ||
489 | if (!tmp) | ||
490 | break; | ||
491 | xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp); | ||
492 | set_buffer_unwritten_io(bh); | ||
493 | bh->b_private = ioend; | ||
494 | p_offset += bh->b_size; | ||
495 | nblocks++; | ||
496 | } while ((bh = bh->b_this_page) != head); | ||
497 | |||
498 | atomic_add(nblocks, &ioend->io_remaining); | ||
499 | |||
500 | /* If we reached the end of the page, map forwards in any | ||
501 | * following pages which are also covered by this extent. | ||
502 | */ | ||
503 | if (bh == head) { | ||
504 | struct address_space *mapping = inode->i_mapping; | ||
505 | pgoff_t tindex, tloff, tlast; | ||
506 | unsigned long bs; | ||
507 | unsigned int pg_offset, bbits = inode->i_blkbits; | ||
508 | struct page *page; | ||
509 | |||
510 | tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT; | ||
511 | tloff = (iomapp->iomap_offset + iomapp->iomap_bsize) >> PAGE_CACHE_SHIFT; | ||
512 | tloff = min(tlast, tloff); | ||
513 | for (tindex = start_page->index + 1; tindex < tloff; tindex++) { | ||
514 | page = xfs_probe_unwritten_page(mapping, | ||
515 | tindex, iomapp, ioend, | ||
516 | PAGE_CACHE_SIZE, &bs, bbits); | ||
517 | if (!page) | ||
518 | break; | ||
519 | nblocks += bs; | ||
520 | atomic_add(bs, &ioend->io_remaining); | ||
521 | xfs_convert_page(inode, page, iomapp, wbc, ioend, | ||
522 | startio, all_bh); | ||
523 | /* stop if converting the next page might add | ||
524 | * enough blocks that the corresponding byte | ||
525 | * count won't fit in our ulong page buf length */ | ||
526 | if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits)) | ||
527 | goto enough; | ||
528 | } | ||
529 | |||
530 | if (tindex == tlast && | ||
531 | (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) { | ||
532 | page = xfs_probe_unwritten_page(mapping, | ||
533 | tindex, iomapp, ioend, | ||
534 | pg_offset, &bs, bbits); | ||
535 | if (page) { | ||
536 | nblocks += bs; | ||
537 | atomic_add(bs, &ioend->io_remaining); | ||
538 | xfs_convert_page(inode, page, iomapp, wbc, ioend, | ||
539 | startio, all_bh); | ||
540 | if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits)) | ||
541 | goto enough; | ||
542 | } | ||
543 | } | ||
544 | } | 604 | } |
545 | 605 | ||
546 | enough: | ||
547 | ioend->io_size = (xfs_off_t)nblocks << block_bits; | ||
548 | ioend->io_offset = offset; | ||
549 | xfs_finish_ioend(ioend); | ||
550 | return 0; | 606 | return 0; |
551 | } | 607 | } |
552 | 608 | ||
553 | STATIC void | ||
554 | xfs_submit_page( | ||
555 | struct page *page, | ||
556 | struct writeback_control *wbc, | ||
557 | struct buffer_head *bh_arr[], | ||
558 | int bh_count, | ||
559 | int probed_page, | ||
560 | int clear_dirty) | ||
561 | { | ||
562 | struct buffer_head *bh; | ||
563 | int i; | ||
564 | |||
565 | BUG_ON(PageWriteback(page)); | ||
566 | if (bh_count) | ||
567 | set_page_writeback(page); | ||
568 | if (clear_dirty) | ||
569 | clear_page_dirty(page); | ||
570 | unlock_page(page); | ||
571 | |||
572 | if (bh_count) { | ||
573 | for (i = 0; i < bh_count; i++) { | ||
574 | bh = bh_arr[i]; | ||
575 | mark_buffer_async_write(bh); | ||
576 | if (buffer_unwritten(bh)) | ||
577 | set_buffer_unwritten_io(bh); | ||
578 | set_buffer_uptodate(bh); | ||
579 | clear_buffer_dirty(bh); | ||
580 | } | ||
581 | |||
582 | for (i = 0; i < bh_count; i++) | ||
583 | submit_bh(WRITE, bh_arr[i]); | ||
584 | |||
585 | if (probed_page && clear_dirty) | ||
586 | wbc->nr_to_write--; /* Wrote an "extra" page */ | ||
587 | } | ||
588 | } | ||
589 | |||
590 | /* | 609 | /* |
591 | * Allocate & map buffers for page given the extent map. Write it out. | 610 | * Allocate & map buffers for page given the extent map. Write it out. |
592 | * except for the original page of a writepage, this is called on | 611 | * except for the original page of a writepage, this is called on |
593 | * delalloc/unwritten pages only, for the original page it is possible | 612 | * delalloc/unwritten pages only, for the original page it is possible |
594 | * that the page has no mapping at all. | 613 | * that the page has no mapping at all. |
595 | */ | 614 | */ |
596 | STATIC void | 615 | STATIC int |
597 | xfs_convert_page( | 616 | xfs_convert_page( |
598 | struct inode *inode, | 617 | struct inode *inode, |
599 | struct page *page, | 618 | struct page *page, |
600 | xfs_iomap_t *iomapp, | 619 | loff_t tindex, |
620 | xfs_iomap_t *mp, | ||
621 | xfs_ioend_t **ioendp, | ||
601 | struct writeback_control *wbc, | 622 | struct writeback_control *wbc, |
602 | void *private, | ||
603 | int startio, | 623 | int startio, |
604 | int all_bh) | 624 | int all_bh) |
605 | { | 625 | { |
606 | struct buffer_head *bh_arr[MAX_BUF_PER_PAGE], *bh, *head; | 626 | struct buffer_head *bh, *head; |
607 | xfs_iomap_t *mp = iomapp, *tmp; | 627 | xfs_off_t end_offset; |
608 | unsigned long offset, end_offset; | 628 | unsigned long p_offset; |
609 | int index = 0; | 629 | unsigned int type; |
610 | int bbits = inode->i_blkbits; | 630 | int bbits = inode->i_blkbits; |
611 | int len, page_dirty; | 631 | int len, page_dirty; |
632 | int count = 0, done = 0, uptodate = 1; | ||
633 | xfs_off_t offset = page_offset(page); | ||
612 | 634 | ||
613 | end_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)); | 635 | if (page->index != tindex) |
636 | goto fail; | ||
637 | if (TestSetPageLocked(page)) | ||
638 | goto fail; | ||
639 | if (PageWriteback(page)) | ||
640 | goto fail_unlock_page; | ||
641 | if (page->mapping != inode->i_mapping) | ||
642 | goto fail_unlock_page; | ||
643 | if (!xfs_is_delayed_page(page, (*ioendp)->io_type)) | ||
644 | goto fail_unlock_page; | ||
614 | 645 | ||
615 | /* | 646 | /* |
616 | * page_dirty is initially a count of buffers on the page before | 647 | * page_dirty is initially a count of buffers on the page before |
617 | * EOF and is decrememted as we move each into a cleanable state. | 648 | * EOF and is decrememted as we move each into a cleanable state. |
649 | * | ||
650 | * Derivation: | ||
651 | * | ||
652 | * End offset is the highest offset that this page should represent. | ||
653 | * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1)) | ||
654 | * will evaluate non-zero and be less than PAGE_CACHE_SIZE and | ||
655 | * hence give us the correct page_dirty count. On any other page, | ||
656 | * it will be zero and in that case we need page_dirty to be the | ||
657 | * count of buffers on the page. | ||
618 | */ | 658 | */ |
659 | end_offset = min_t(unsigned long long, | ||
660 | (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, | ||
661 | i_size_read(inode)); | ||
662 | |||
619 | len = 1 << inode->i_blkbits; | 663 | len = 1 << inode->i_blkbits; |
620 | end_offset = max(end_offset, PAGE_CACHE_SIZE); | 664 | p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1), |
621 | end_offset = roundup(end_offset, len); | 665 | PAGE_CACHE_SIZE); |
622 | page_dirty = end_offset / len; | 666 | p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; |
667 | page_dirty = p_offset / len; | ||
623 | 668 | ||
624 | offset = 0; | ||
625 | bh = head = page_buffers(page); | 669 | bh = head = page_buffers(page); |
626 | do { | 670 | do { |
627 | if (offset >= end_offset) | 671 | if (offset >= end_offset) |
628 | break; | 672 | break; |
629 | if (!(PageUptodate(page) || buffer_uptodate(bh))) | 673 | if (!buffer_uptodate(bh)) |
674 | uptodate = 0; | ||
675 | if (!(PageUptodate(page) || buffer_uptodate(bh))) { | ||
676 | done = 1; | ||
630 | continue; | 677 | continue; |
631 | if (buffer_mapped(bh) && all_bh && | 678 | } |
632 | !(buffer_unwritten(bh) || buffer_delay(bh))) { | 679 | |
680 | if (buffer_unwritten(bh) || buffer_delay(bh)) { | ||
681 | if (buffer_unwritten(bh)) | ||
682 | type = IOMAP_UNWRITTEN; | ||
683 | else | ||
684 | type = IOMAP_DELAY; | ||
685 | |||
686 | if (!xfs_iomap_valid(mp, offset)) { | ||
687 | done = 1; | ||
688 | continue; | ||
689 | } | ||
690 | |||
691 | ASSERT(!(mp->iomap_flags & IOMAP_HOLE)); | ||
692 | ASSERT(!(mp->iomap_flags & IOMAP_DELAY)); | ||
693 | |||
694 | xfs_map_at_offset(bh, offset, bbits, mp); | ||
633 | if (startio) { | 695 | if (startio) { |
696 | xfs_add_to_ioend(inode, bh, offset, | ||
697 | type, ioendp, done); | ||
698 | } else { | ||
699 | set_buffer_dirty(bh); | ||
700 | unlock_buffer(bh); | ||
701 | mark_buffer_dirty(bh); | ||
702 | } | ||
703 | page_dirty--; | ||
704 | count++; | ||
705 | } else { | ||
706 | type = 0; | ||
707 | if (buffer_mapped(bh) && all_bh && startio) { | ||
634 | lock_buffer(bh); | 708 | lock_buffer(bh); |
635 | bh_arr[index++] = bh; | 709 | xfs_add_to_ioend(inode, bh, offset, |
710 | type, ioendp, done); | ||
711 | count++; | ||
636 | page_dirty--; | 712 | page_dirty--; |
713 | } else { | ||
714 | done = 1; | ||
637 | } | 715 | } |
638 | continue; | ||
639 | } | 716 | } |
640 | tmp = xfs_offset_to_map(page, mp, offset); | 717 | } while (offset += len, (bh = bh->b_this_page) != head); |
641 | if (!tmp) | ||
642 | continue; | ||
643 | ASSERT(!(tmp->iomap_flags & IOMAP_HOLE)); | ||
644 | ASSERT(!(tmp->iomap_flags & IOMAP_DELAY)); | ||
645 | 718 | ||
646 | /* If this is a new unwritten extent buffer (i.e. one | 719 | if (uptodate && bh == head) |
647 | * that we haven't passed in private data for, we must | 720 | SetPageUptodate(page); |
648 | * now map this buffer too. | 721 | |
649 | */ | 722 | if (startio) { |
650 | if (buffer_unwritten(bh) && !bh->b_end_io) { | 723 | if (count) { |
651 | ASSERT(tmp->iomap_flags & IOMAP_UNWRITTEN); | 724 | struct backing_dev_info *bdi; |
652 | xfs_map_unwritten(inode, page, head, bh, offset, | 725 | |
653 | bbits, tmp, wbc, startio, all_bh); | 726 | bdi = inode->i_mapping->backing_dev_info; |
654 | } else if (! (buffer_unwritten(bh) && buffer_locked(bh))) { | 727 | if (bdi_write_congested(bdi)) { |
655 | xfs_map_at_offset(page, bh, offset, bbits, tmp); | 728 | wbc->encountered_congestion = 1; |
656 | if (buffer_unwritten(bh)) { | 729 | done = 1; |
657 | set_buffer_unwritten_io(bh); | 730 | } else if (--wbc->nr_to_write <= 0) { |
658 | bh->b_private = private; | 731 | done = 1; |
659 | ASSERT(private); | ||
660 | } | 732 | } |
661 | } | 733 | } |
662 | if (startio) { | 734 | xfs_start_page_writeback(page, wbc, !page_dirty, count); |
663 | bh_arr[index++] = bh; | ||
664 | } else { | ||
665 | set_buffer_dirty(bh); | ||
666 | unlock_buffer(bh); | ||
667 | mark_buffer_dirty(bh); | ||
668 | } | ||
669 | page_dirty--; | ||
670 | } while (offset += len, (bh = bh->b_this_page) != head); | ||
671 | |||
672 | if (startio && index) { | ||
673 | xfs_submit_page(page, wbc, bh_arr, index, 1, !page_dirty); | ||
674 | } else { | ||
675 | unlock_page(page); | ||
676 | } | 735 | } |
736 | |||
737 | return done; | ||
738 | fail_unlock_page: | ||
739 | unlock_page(page); | ||
740 | fail: | ||
741 | return 1; | ||
677 | } | 742 | } |
678 | 743 | ||
679 | /* | 744 | /* |
@@ -685,19 +750,31 @@ xfs_cluster_write( | |||
685 | struct inode *inode, | 750 | struct inode *inode, |
686 | pgoff_t tindex, | 751 | pgoff_t tindex, |
687 | xfs_iomap_t *iomapp, | 752 | xfs_iomap_t *iomapp, |
753 | xfs_ioend_t **ioendp, | ||
688 | struct writeback_control *wbc, | 754 | struct writeback_control *wbc, |
689 | int startio, | 755 | int startio, |
690 | int all_bh, | 756 | int all_bh, |
691 | pgoff_t tlast) | 757 | pgoff_t tlast) |
692 | { | 758 | { |
693 | struct page *page; | 759 | struct pagevec pvec; |
760 | int done = 0, i; | ||
694 | 761 | ||
695 | for (; tindex <= tlast; tindex++) { | 762 | pagevec_init(&pvec, 0); |
696 | page = xfs_probe_delalloc_page(inode, tindex); | 763 | while (!done && tindex <= tlast) { |
697 | if (!page) | 764 | unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1); |
765 | |||
766 | if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len)) | ||
698 | break; | 767 | break; |
699 | xfs_convert_page(inode, page, iomapp, wbc, NULL, | 768 | |
700 | startio, all_bh); | 769 | for (i = 0; i < pagevec_count(&pvec); i++) { |
770 | done = xfs_convert_page(inode, pvec.pages[i], tindex++, | ||
771 | iomapp, ioendp, wbc, startio, all_bh); | ||
772 | if (done) | ||
773 | break; | ||
774 | } | ||
775 | |||
776 | pagevec_release(&pvec); | ||
777 | cond_resched(); | ||
701 | } | 778 | } |
702 | } | 779 | } |
703 | 780 | ||
@@ -728,18 +805,22 @@ xfs_page_state_convert( | |||
728 | int startio, | 805 | int startio, |
729 | int unmapped) /* also implies page uptodate */ | 806 | int unmapped) /* also implies page uptodate */ |
730 | { | 807 | { |
731 | struct buffer_head *bh_arr[MAX_BUF_PER_PAGE], *bh, *head; | 808 | struct buffer_head *bh, *head; |
732 | xfs_iomap_t *iomp, iomap; | 809 | xfs_iomap_t iomap; |
810 | xfs_ioend_t *ioend = NULL, *iohead = NULL; | ||
733 | loff_t offset; | 811 | loff_t offset; |
734 | unsigned long p_offset = 0; | 812 | unsigned long p_offset = 0; |
813 | unsigned int type; | ||
735 | __uint64_t end_offset; | 814 | __uint64_t end_offset; |
736 | pgoff_t end_index, last_index, tlast; | 815 | pgoff_t end_index, last_index, tlast; |
737 | int len, err, i, cnt = 0, uptodate = 1; | 816 | ssize_t size, len; |
738 | int flags; | 817 | int flags, err, iomap_valid = 0, uptodate = 1; |
739 | int page_dirty; | 818 | int page_dirty, count = 0, trylock_flag = 0; |
819 | int all_bh = unmapped; | ||
740 | 820 | ||
741 | /* wait for other IO threads? */ | 821 | /* wait for other IO threads? */ |
742 | flags = (startio && wbc->sync_mode != WB_SYNC_NONE) ? 0 : BMAPI_TRYLOCK; | 822 | if (startio && (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking)) |
823 | trylock_flag |= BMAPI_TRYLOCK; | ||
743 | 824 | ||
744 | /* Is this page beyond the end of the file? */ | 825 | /* Is this page beyond the end of the file? */ |
745 | offset = i_size_read(inode); | 826 | offset = i_size_read(inode); |
@@ -754,161 +835,173 @@ xfs_page_state_convert( | |||
754 | } | 835 | } |
755 | } | 836 | } |
756 | 837 | ||
757 | end_offset = min_t(unsigned long long, | ||
758 | (loff_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset); | ||
759 | offset = (loff_t)page->index << PAGE_CACHE_SHIFT; | ||
760 | |||
761 | /* | 838 | /* |
762 | * page_dirty is initially a count of buffers on the page before | 839 | * page_dirty is initially a count of buffers on the page before |
763 | * EOF and is decrememted as we move each into a cleanable state. | 840 | * EOF and is decrememted as we move each into a cleanable state. |
764 | */ | 841 | * |
842 | * Derivation: | ||
843 | * | ||
844 | * End offset is the highest offset that this page should represent. | ||
845 | * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1)) | ||
846 | * will evaluate non-zero and be less than PAGE_CACHE_SIZE and | ||
847 | * hence give us the correct page_dirty count. On any other page, | ||
848 | * it will be zero and in that case we need page_dirty to be the | ||
849 | * count of buffers on the page. | ||
850 | */ | ||
851 | end_offset = min_t(unsigned long long, | ||
852 | (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset); | ||
765 | len = 1 << inode->i_blkbits; | 853 | len = 1 << inode->i_blkbits; |
766 | p_offset = max(p_offset, PAGE_CACHE_SIZE); | 854 | p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1), |
767 | p_offset = roundup(p_offset, len); | 855 | PAGE_CACHE_SIZE); |
856 | p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; | ||
768 | page_dirty = p_offset / len; | 857 | page_dirty = p_offset / len; |
769 | 858 | ||
770 | iomp = NULL; | ||
771 | p_offset = 0; | ||
772 | bh = head = page_buffers(page); | 859 | bh = head = page_buffers(page); |
860 | offset = page_offset(page); | ||
861 | flags = -1; | ||
862 | type = 0; | ||
863 | |||
864 | /* TODO: cleanup count and page_dirty */ | ||
773 | 865 | ||
774 | do { | 866 | do { |
775 | if (offset >= end_offset) | 867 | if (offset >= end_offset) |
776 | break; | 868 | break; |
777 | if (!buffer_uptodate(bh)) | 869 | if (!buffer_uptodate(bh)) |
778 | uptodate = 0; | 870 | uptodate = 0; |
779 | if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) | 871 | if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) { |
872 | /* | ||
873 | * the iomap is actually still valid, but the ioend | ||
874 | * isn't. shouldn't happen too often. | ||
875 | */ | ||
876 | iomap_valid = 0; | ||
780 | continue; | 877 | continue; |
781 | |||
782 | if (iomp) { | ||
783 | iomp = xfs_offset_to_map(page, &iomap, p_offset); | ||
784 | } | 878 | } |
785 | 879 | ||
880 | if (iomap_valid) | ||
881 | iomap_valid = xfs_iomap_valid(&iomap, offset); | ||
882 | |||
786 | /* | 883 | /* |
787 | * First case, map an unwritten extent and prepare for | 884 | * First case, map an unwritten extent and prepare for |
788 | * extent state conversion transaction on completion. | 885 | * extent state conversion transaction on completion. |
789 | */ | 886 | * |
790 | if (buffer_unwritten(bh)) { | 887 | * Second case, allocate space for a delalloc buffer. |
791 | if (!startio) | 888 | * We can return EAGAIN here in the release page case. |
792 | continue; | 889 | * |
793 | if (!iomp) { | 890 | * Third case, an unmapped buffer was found, and we are |
794 | err = xfs_map_blocks(inode, offset, len, &iomap, | 891 | * in a path where we need to write the whole page out. |
795 | BMAPI_WRITE|BMAPI_IGNSTATE); | 892 | */ |
796 | if (err) { | 893 | if (buffer_unwritten(bh) || buffer_delay(bh) || |
797 | goto error; | 894 | ((buffer_uptodate(bh) || PageUptodate(page)) && |
798 | } | 895 | !buffer_mapped(bh) && (unmapped || startio))) { |
799 | iomp = xfs_offset_to_map(page, &iomap, | 896 | /* |
800 | p_offset); | 897 | * Make sure we don't use a read-only iomap |
898 | */ | ||
899 | if (flags == BMAPI_READ) | ||
900 | iomap_valid = 0; | ||
901 | |||
902 | if (buffer_unwritten(bh)) { | ||
903 | type = IOMAP_UNWRITTEN; | ||
904 | flags = BMAPI_WRITE|BMAPI_IGNSTATE; | ||
905 | } else if (buffer_delay(bh)) { | ||
906 | type = IOMAP_DELAY; | ||
907 | flags = BMAPI_ALLOCATE; | ||
908 | if (!startio) | ||
909 | flags |= trylock_flag; | ||
910 | } else { | ||
911 | type = IOMAP_NEW; | ||
912 | flags = BMAPI_WRITE|BMAPI_MMAP; | ||
801 | } | 913 | } |
802 | if (iomp) { | 914 | |
803 | if (!bh->b_end_io) { | 915 | if (!iomap_valid) { |
804 | err = xfs_map_unwritten(inode, page, | 916 | if (type == IOMAP_NEW) { |
805 | head, bh, p_offset, | 917 | size = xfs_probe_cluster(inode, |
806 | inode->i_blkbits, iomp, | 918 | page, bh, head, 0); |
807 | wbc, startio, unmapped); | ||
808 | if (err) { | ||
809 | goto error; | ||
810 | } | ||
811 | } else { | 919 | } else { |
812 | set_bit(BH_Lock, &bh->b_state); | 920 | size = len; |
813 | } | 921 | } |
814 | BUG_ON(!buffer_locked(bh)); | 922 | |
815 | bh_arr[cnt++] = bh; | 923 | err = xfs_map_blocks(inode, offset, size, |
816 | page_dirty--; | 924 | &iomap, flags); |
817 | } | 925 | if (err) |
818 | /* | ||
819 | * Second case, allocate space for a delalloc buffer. | ||
820 | * We can return EAGAIN here in the release page case. | ||
821 | */ | ||
822 | } else if (buffer_delay(bh)) { | ||
823 | if (!iomp) { | ||
824 | err = xfs_map_blocks(inode, offset, len, &iomap, | ||
825 | BMAPI_ALLOCATE | flags); | ||
826 | if (err) { | ||
827 | goto error; | 926 | goto error; |
828 | } | 927 | iomap_valid = xfs_iomap_valid(&iomap, offset); |
829 | iomp = xfs_offset_to_map(page, &iomap, | ||
830 | p_offset); | ||
831 | } | 928 | } |
832 | if (iomp) { | 929 | if (iomap_valid) { |
833 | xfs_map_at_offset(page, bh, p_offset, | 930 | xfs_map_at_offset(bh, offset, |
834 | inode->i_blkbits, iomp); | 931 | inode->i_blkbits, &iomap); |
835 | if (startio) { | 932 | if (startio) { |
836 | bh_arr[cnt++] = bh; | 933 | xfs_add_to_ioend(inode, bh, offset, |
934 | type, &ioend, | ||
935 | !iomap_valid); | ||
837 | } else { | 936 | } else { |
838 | set_buffer_dirty(bh); | 937 | set_buffer_dirty(bh); |
839 | unlock_buffer(bh); | 938 | unlock_buffer(bh); |
840 | mark_buffer_dirty(bh); | 939 | mark_buffer_dirty(bh); |
841 | } | 940 | } |
842 | page_dirty--; | 941 | page_dirty--; |
942 | count++; | ||
943 | } | ||
944 | } else if (buffer_uptodate(bh) && startio) { | ||
945 | /* | ||
946 | * we got here because the buffer is already mapped. | ||
947 | * That means it must already have extents allocated | ||
948 | * underneath it. Map the extent by reading it. | ||
949 | */ | ||
950 | if (!iomap_valid || type != 0) { | ||
951 | flags = BMAPI_READ; | ||
952 | size = xfs_probe_cluster(inode, page, bh, | ||
953 | head, 1); | ||
954 | err = xfs_map_blocks(inode, offset, size, | ||
955 | &iomap, flags); | ||
956 | if (err) | ||
957 | goto error; | ||
958 | iomap_valid = xfs_iomap_valid(&iomap, offset); | ||
843 | } | 959 | } |
844 | } else if ((buffer_uptodate(bh) || PageUptodate(page)) && | ||
845 | (unmapped || startio)) { | ||
846 | 960 | ||
847 | if (!buffer_mapped(bh)) { | 961 | type = 0; |
848 | int size; | 962 | if (!test_and_set_bit(BH_Lock, &bh->b_state)) { |
849 | 963 | ASSERT(buffer_mapped(bh)); | |
850 | /* | 964 | if (iomap_valid) |
851 | * Getting here implies an unmapped buffer | 965 | all_bh = 1; |
852 | * was found, and we are in a path where we | 966 | xfs_add_to_ioend(inode, bh, offset, type, |
853 | * need to write the whole page out. | 967 | &ioend, !iomap_valid); |
854 | */ | 968 | page_dirty--; |
855 | if (!iomp) { | 969 | count++; |
856 | size = xfs_probe_unmapped_cluster( | 970 | } else { |
857 | inode, page, bh, head); | 971 | iomap_valid = 0; |
858 | err = xfs_map_blocks(inode, offset, | ||
859 | size, &iomap, | ||
860 | BMAPI_WRITE|BMAPI_MMAP); | ||
861 | if (err) { | ||
862 | goto error; | ||
863 | } | ||
864 | iomp = xfs_offset_to_map(page, &iomap, | ||
865 | p_offset); | ||
866 | } | ||
867 | if (iomp) { | ||
868 | xfs_map_at_offset(page, | ||
869 | bh, p_offset, | ||
870 | inode->i_blkbits, iomp); | ||
871 | if (startio) { | ||
872 | bh_arr[cnt++] = bh; | ||
873 | } else { | ||
874 | set_buffer_dirty(bh); | ||
875 | unlock_buffer(bh); | ||
876 | mark_buffer_dirty(bh); | ||
877 | } | ||
878 | page_dirty--; | ||
879 | } | ||
880 | } else if (startio) { | ||
881 | if (buffer_uptodate(bh) && | ||
882 | !test_and_set_bit(BH_Lock, &bh->b_state)) { | ||
883 | bh_arr[cnt++] = bh; | ||
884 | page_dirty--; | ||
885 | } | ||
886 | } | 972 | } |
973 | } else if ((buffer_uptodate(bh) || PageUptodate(page)) && | ||
974 | (unmapped || startio)) { | ||
975 | iomap_valid = 0; | ||
887 | } | 976 | } |
888 | } while (offset += len, p_offset += len, | 977 | |
889 | ((bh = bh->b_this_page) != head)); | 978 | if (!iohead) |
979 | iohead = ioend; | ||
980 | |||
981 | } while (offset += len, ((bh = bh->b_this_page) != head)); | ||
890 | 982 | ||
891 | if (uptodate && bh == head) | 983 | if (uptodate && bh == head) |
892 | SetPageUptodate(page); | 984 | SetPageUptodate(page); |
893 | 985 | ||
894 | if (startio) { | 986 | if (startio) |
895 | xfs_submit_page(page, wbc, bh_arr, cnt, 0, !page_dirty); | 987 | xfs_start_page_writeback(page, wbc, 1, count); |
896 | } | ||
897 | 988 | ||
898 | if (iomp) { | 989 | if (ioend && iomap_valid) { |
899 | offset = (iomp->iomap_offset + iomp->iomap_bsize - 1) >> | 990 | offset = (iomap.iomap_offset + iomap.iomap_bsize - 1) >> |
900 | PAGE_CACHE_SHIFT; | 991 | PAGE_CACHE_SHIFT; |
901 | tlast = min_t(pgoff_t, offset, last_index); | 992 | tlast = min_t(pgoff_t, offset, last_index); |
902 | xfs_cluster_write(inode, page->index + 1, iomp, wbc, | 993 | xfs_cluster_write(inode, page->index + 1, &iomap, &ioend, |
903 | startio, unmapped, tlast); | 994 | wbc, startio, all_bh, tlast); |
904 | } | 995 | } |
905 | 996 | ||
997 | if (iohead) | ||
998 | xfs_submit_ioend(iohead); | ||
999 | |||
906 | return page_dirty; | 1000 | return page_dirty; |
907 | 1001 | ||
908 | error: | 1002 | error: |
909 | for (i = 0; i < cnt; i++) { | 1003 | if (iohead) |
910 | unlock_buffer(bh_arr[i]); | 1004 | xfs_cancel_ioend(iohead); |
911 | } | ||
912 | 1005 | ||
913 | /* | 1006 | /* |
914 | * If it's delalloc and we have nowhere to put it, | 1007 | * If it's delalloc and we have nowhere to put it, |
@@ -916,9 +1009,8 @@ error: | |||
916 | * us to try again. | 1009 | * us to try again. |
917 | */ | 1010 | */ |
918 | if (err != -EAGAIN) { | 1011 | if (err != -EAGAIN) { |
919 | if (!unmapped) { | 1012 | if (!unmapped) |
920 | block_invalidatepage(page, 0); | 1013 | block_invalidatepage(page, 0); |
921 | } | ||
922 | ClearPageUptodate(page); | 1014 | ClearPageUptodate(page); |
923 | } | 1015 | } |
924 | return err; | 1016 | return err; |
@@ -982,7 +1074,7 @@ __linvfs_get_block( | |||
982 | } | 1074 | } |
983 | 1075 | ||
984 | /* If this is a realtime file, data might be on a new device */ | 1076 | /* If this is a realtime file, data might be on a new device */ |
985 | bh_result->b_bdev = iomap.iomap_target->pbr_bdev; | 1077 | bh_result->b_bdev = iomap.iomap_target->bt_bdev; |
986 | 1078 | ||
987 | /* If we previously allocated a block out beyond eof and | 1079 | /* If we previously allocated a block out beyond eof and |
988 | * we are now coming back to use it then we will need to | 1080 | * we are now coming back to use it then we will need to |
@@ -1094,10 +1186,10 @@ linvfs_direct_IO( | |||
1094 | if (error) | 1186 | if (error) |
1095 | return -error; | 1187 | return -error; |
1096 | 1188 | ||
1097 | iocb->private = xfs_alloc_ioend(inode); | 1189 | iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN); |
1098 | 1190 | ||
1099 | ret = blockdev_direct_IO_own_locking(rw, iocb, inode, | 1191 | ret = blockdev_direct_IO_own_locking(rw, iocb, inode, |
1100 | iomap.iomap_target->pbr_bdev, | 1192 | iomap.iomap_target->bt_bdev, |
1101 | iov, offset, nr_segs, | 1193 | iov, offset, nr_segs, |
1102 | linvfs_get_blocks_direct, | 1194 | linvfs_get_blocks_direct, |
1103 | linvfs_end_io_direct); | 1195 | linvfs_end_io_direct); |