diff options
author | Steven Whitehouse <swhiteho@redhat.com> | 2007-10-17 04:04:24 -0400 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2008-01-25 03:07:28 -0500 |
commit | b8e7cbb65bcc99630e123422c6829ce3c0fcdf14 (patch) | |
tree | a9f68259b90e9e65ea7f0369f448d580a8944f06 /fs/gfs2 | |
parent | 9ff8ec32e58875022447af619bec6e5aee7c77e4 (diff) |
[GFS2] Add writepages for GFS2 jdata
This patch resolves a lock ordering issue where we had been getting
a transaction lock in the wrong order with respect to the page lock.
By using writepages rather than just writepage, it is then possible
to start a transaction before locking the page, and thus matching the
locking order elsewhere in the code.
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Diffstat (limited to 'fs/gfs2')
-rw-r--r-- | fs/gfs2/log.c | 2 | ||||
-rw-r--r-- | fs/gfs2/ops_address.c | 213 |
2 files changed, 206 insertions, 9 deletions
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 70b404d2774b..1e1fe8def375 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c | |||
@@ -650,7 +650,7 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp) | |||
650 | get_bh(bh); | 650 | get_bh(bh); |
651 | gfs2_log_unlock(sdp); | 651 | gfs2_log_unlock(sdp); |
652 | lock_buffer(bh); | 652 | lock_buffer(bh); |
653 | if (test_clear_buffer_dirty(bh)) { | 653 | if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) { |
654 | bh->b_end_io = end_buffer_write_sync; | 654 | bh->b_end_io = end_buffer_write_sync; |
655 | submit_bh(WRITE, bh); | 655 | submit_bh(WRITE, bh); |
656 | } else { | 656 | } else { |
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index 4bf73ed945ae..48913e569907 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/gfs2_ondisk.h> | 21 | #include <linux/gfs2_ondisk.h> |
22 | #include <linux/lm_interface.h> | 22 | #include <linux/lm_interface.h> |
23 | #include <linux/swap.h> | 23 | #include <linux/swap.h> |
24 | #include <linux/pagevec.h> | ||
24 | 25 | ||
25 | #include "gfs2.h" | 26 | #include "gfs2.h" |
26 | #include "incore.h" | 27 | #include "incore.h" |
@@ -189,6 +190,34 @@ static int gfs2_ordered_writepage(struct page *page, | |||
189 | } | 190 | } |
190 | 191 | ||
191 | /** | 192 | /** |
193 | * __gfs2_jdata_writepage - The core of jdata writepage | ||
194 | * @page: The page to write | ||
195 | * @wbc: The writeback control | ||
196 | * | ||
197 | * This is shared between writepage and writepages and implements the | ||
198 | * core of the writepage operation. If a transaction is required then | ||
199 | * PageChecked will have been set and the transaction will have | ||
200 | * already been started before this is called. | ||
201 | */ | ||
202 | |||
203 | static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc) | ||
204 | { | ||
205 | struct inode *inode = page->mapping->host; | ||
206 | struct gfs2_inode *ip = GFS2_I(inode); | ||
207 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
208 | |||
209 | if (PageChecked(page)) { | ||
210 | ClearPageChecked(page); | ||
211 | if (!page_has_buffers(page)) { | ||
212 | create_empty_buffers(page, inode->i_sb->s_blocksize, | ||
213 | (1 << BH_Dirty)|(1 << BH_Uptodate)); | ||
214 | } | ||
215 | gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1); | ||
216 | } | ||
217 | return block_write_full_page(page, gfs2_get_block_noalloc, wbc); | ||
218 | } | ||
219 | |||
220 | /** | ||
192 | * gfs2_jdata_writepage - Write complete page | 221 | * gfs2_jdata_writepage - Write complete page |
193 | * @page: Page to write | 222 | * @page: Page to write |
194 | * | 223 | * |
@@ -199,7 +228,6 @@ static int gfs2_ordered_writepage(struct page *page, | |||
199 | static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc) | 228 | static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc) |
200 | { | 229 | { |
201 | struct inode *inode = page->mapping->host; | 230 | struct inode *inode = page->mapping->host; |
202 | struct gfs2_inode *ip = GFS2_I(inode); | ||
203 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 231 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
204 | int error; | 232 | int error; |
205 | int done_trans = 0; | 233 | int done_trans = 0; |
@@ -209,18 +237,14 @@ static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc | |||
209 | return error; | 237 | return error; |
210 | 238 | ||
211 | if (PageChecked(page)) { | 239 | if (PageChecked(page)) { |
240 | if (wbc->sync_mode != WB_SYNC_ALL) | ||
241 | goto out_ignore; | ||
212 | error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0); | 242 | error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0); |
213 | if (error) | 243 | if (error) |
214 | goto out_ignore; | 244 | goto out_ignore; |
215 | ClearPageChecked(page); | ||
216 | if (!page_has_buffers(page)) { | ||
217 | create_empty_buffers(page, inode->i_sb->s_blocksize, | ||
218 | (1 << BH_Dirty)|(1 << BH_Uptodate)); | ||
219 | } | ||
220 | gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1); | ||
221 | done_trans = 1; | 245 | done_trans = 1; |
222 | } | 246 | } |
223 | error = block_write_full_page(page, gfs2_get_block_noalloc, wbc); | 247 | error = __gfs2_jdata_writepage(page, wbc); |
224 | if (done_trans) | 248 | if (done_trans) |
225 | gfs2_trans_end(sdp); | 249 | gfs2_trans_end(sdp); |
226 | return error; | 250 | return error; |
@@ -247,6 +271,178 @@ static int gfs2_writeback_writepages(struct address_space *mapping, | |||
247 | } | 271 | } |
248 | 272 | ||
249 | /** | 273 | /** |
274 | * gfs2_write_jdata_pagevec - Write back a pagevec's worth of pages | ||
275 | * @mapping: The mapping | ||
276 | * @wbc: The writeback control | ||
277 | * @writepage: The writepage function to call for each page | ||
278 | * @pvec: The vector of pages | ||
279 | * @nr_pages: The number of pages to write | ||
280 | * | ||
281 | * Returns: non-zero if loop should terminate, zero otherwise | ||
282 | */ | ||
283 | |||
284 | static int gfs2_write_jdata_pagevec(struct address_space *mapping, | ||
285 | struct writeback_control *wbc, | ||
286 | struct pagevec *pvec, | ||
287 | int nr_pages, pgoff_t end) | ||
288 | { | ||
289 | struct inode *inode = mapping->host; | ||
290 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
291 | loff_t i_size = i_size_read(inode); | ||
292 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; | ||
293 | unsigned offset = i_size & (PAGE_CACHE_SIZE-1); | ||
294 | unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize); | ||
295 | struct backing_dev_info *bdi = mapping->backing_dev_info; | ||
296 | int i; | ||
297 | int ret; | ||
298 | |||
299 | ret = gfs2_trans_begin(sdp, nrblocks, 0); | ||
300 | if (ret < 0) | ||
301 | return ret; | ||
302 | |||
303 | for(i = 0; i < nr_pages; i++) { | ||
304 | struct page *page = pvec->pages[i]; | ||
305 | |||
306 | lock_page(page); | ||
307 | |||
308 | if (unlikely(page->mapping != mapping)) { | ||
309 | unlock_page(page); | ||
310 | continue; | ||
311 | } | ||
312 | |||
313 | if (!wbc->range_cyclic && page->index > end) { | ||
314 | ret = 1; | ||
315 | unlock_page(page); | ||
316 | continue; | ||
317 | } | ||
318 | |||
319 | if (wbc->sync_mode != WB_SYNC_NONE) | ||
320 | wait_on_page_writeback(page); | ||
321 | |||
322 | if (PageWriteback(page) || | ||
323 | !clear_page_dirty_for_io(page)) { | ||
324 | unlock_page(page); | ||
325 | continue; | ||
326 | } | ||
327 | |||
328 | /* Is the page fully outside i_size? (truncate in progress) */ | ||
329 | if (page->index > end_index || (page->index == end_index && !offset)) { | ||
330 | page->mapping->a_ops->invalidatepage(page, 0); | ||
331 | unlock_page(page); | ||
332 | continue; | ||
333 | } | ||
334 | |||
335 | ret = __gfs2_jdata_writepage(page, wbc); | ||
336 | |||
337 | if (ret || (--(wbc->nr_to_write) <= 0)) | ||
338 | ret = 1; | ||
339 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | ||
340 | wbc->encountered_congestion = 1; | ||
341 | ret = 1; | ||
342 | } | ||
343 | |||
344 | } | ||
345 | gfs2_trans_end(sdp); | ||
346 | return ret; | ||
347 | } | ||
348 | |||
349 | /** | ||
350 | * gfs2_write_cache_jdata - Like write_cache_pages but different | ||
351 | * @mapping: The mapping to write | ||
352 | * @wbc: The writeback control | ||
353 | * @writepage: The writepage function to call | ||
354 | * @data: The data to pass to writepage | ||
355 | * | ||
356 | * The reason that we use our own function here is that we need to | ||
357 | * start transactions before we grab page locks. This allows us | ||
358 | * to get the ordering right. | ||
359 | */ | ||
360 | |||
361 | static int gfs2_write_cache_jdata(struct address_space *mapping, | ||
362 | struct writeback_control *wbc) | ||
363 | { | ||
364 | struct backing_dev_info *bdi = mapping->backing_dev_info; | ||
365 | int ret = 0; | ||
366 | int done = 0; | ||
367 | struct pagevec pvec; | ||
368 | int nr_pages; | ||
369 | pgoff_t index; | ||
370 | pgoff_t end; | ||
371 | int scanned = 0; | ||
372 | int range_whole = 0; | ||
373 | |||
374 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | ||
375 | wbc->encountered_congestion = 1; | ||
376 | return 0; | ||
377 | } | ||
378 | |||
379 | pagevec_init(&pvec, 0); | ||
380 | if (wbc->range_cyclic) { | ||
381 | index = mapping->writeback_index; /* Start from prev offset */ | ||
382 | end = -1; | ||
383 | } else { | ||
384 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | ||
385 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | ||
386 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) | ||
387 | range_whole = 1; | ||
388 | scanned = 1; | ||
389 | } | ||
390 | |||
391 | retry: | ||
392 | while (!done && (index <= end) && | ||
393 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | ||
394 | PAGECACHE_TAG_DIRTY, | ||
395 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { | ||
396 | scanned = 1; | ||
397 | ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, end); | ||
398 | if (ret) | ||
399 | done = 1; | ||
400 | if (ret > 0) | ||
401 | ret = 0; | ||
402 | |||
403 | pagevec_release(&pvec); | ||
404 | cond_resched(); | ||
405 | } | ||
406 | |||
407 | if (!scanned && !done) { | ||
408 | /* | ||
409 | * We hit the last page and there is more work to be done: wrap | ||
410 | * back to the start of the file | ||
411 | */ | ||
412 | scanned = 1; | ||
413 | index = 0; | ||
414 | goto retry; | ||
415 | } | ||
416 | |||
417 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) | ||
418 | mapping->writeback_index = index; | ||
419 | return ret; | ||
420 | } | ||
421 | |||
422 | |||
423 | /** | ||
424 | * gfs2_jdata_writepages - Write a bunch of dirty pages back to disk | ||
425 | * @mapping: The mapping to write | ||
426 | * @wbc: The writeback control | ||
427 | * | ||
428 | */ | ||
429 | |||
430 | static int gfs2_jdata_writepages(struct address_space *mapping, | ||
431 | struct writeback_control *wbc) | ||
432 | { | ||
433 | struct gfs2_inode *ip = GFS2_I(mapping->host); | ||
434 | struct gfs2_sbd *sdp = GFS2_SB(mapping->host); | ||
435 | int ret; | ||
436 | |||
437 | ret = gfs2_write_cache_jdata(mapping, wbc); | ||
438 | if (ret == 0 && wbc->sync_mode == WB_SYNC_ALL) { | ||
439 | gfs2_log_flush(sdp, ip->i_gl); | ||
440 | ret = gfs2_write_cache_jdata(mapping, wbc); | ||
441 | } | ||
442 | return ret; | ||
443 | } | ||
444 | |||
445 | /** | ||
250 | * stuffed_readpage - Fill in a Linux page with stuffed file data | 446 | * stuffed_readpage - Fill in a Linux page with stuffed file data |
251 | * @ip: the inode | 447 | * @ip: the inode |
252 | * @page: the page | 448 | * @page: the page |
@@ -937,6 +1133,7 @@ static const struct address_space_operations gfs2_ordered_aops = { | |||
937 | 1133 | ||
938 | static const struct address_space_operations gfs2_jdata_aops = { | 1134 | static const struct address_space_operations gfs2_jdata_aops = { |
939 | .writepage = gfs2_jdata_writepage, | 1135 | .writepage = gfs2_jdata_writepage, |
1136 | .writepages = gfs2_jdata_writepages, | ||
940 | .readpage = gfs2_readpage, | 1137 | .readpage = gfs2_readpage, |
941 | .readpages = gfs2_readpages, | 1138 | .readpages = gfs2_readpages, |
942 | .sync_page = block_sync_page, | 1139 | .sync_page = block_sync_page, |