aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorSteven Whitehouse <swhiteho@redhat.com>2007-10-17 04:04:24 -0400
committerSteven Whitehouse <swhiteho@redhat.com>2008-01-25 03:07:28 -0500
commitb8e7cbb65bcc99630e123422c6829ce3c0fcdf14 (patch)
treea9f68259b90e9e65ea7f0369f448d580a8944f06 /fs
parent9ff8ec32e58875022447af619bec6e5aee7c77e4 (diff)
[GFS2] Add writepages for GFS2 jdata
This patch resolves a lock ordering issue where we had been getting a transaction lock in the wrong order with respect to the page lock. By using writepages rather than just writepage, it is then possible to start a transaction before locking the page, and thus matching the locking order elsewhere in the code. Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/gfs2/log.c2
-rw-r--r--fs/gfs2/ops_address.c213
2 files changed, 206 insertions, 9 deletions
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 70b404d2774b..1e1fe8def375 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -650,7 +650,7 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp)
650 get_bh(bh); 650 get_bh(bh);
651 gfs2_log_unlock(sdp); 651 gfs2_log_unlock(sdp);
652 lock_buffer(bh); 652 lock_buffer(bh);
653 if (test_clear_buffer_dirty(bh)) { 653 if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) {
654 bh->b_end_io = end_buffer_write_sync; 654 bh->b_end_io = end_buffer_write_sync;
655 submit_bh(WRITE, bh); 655 submit_bh(WRITE, bh);
656 } else { 656 } else {
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 4bf73ed945ae..48913e569907 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -21,6 +21,7 @@
21#include <linux/gfs2_ondisk.h> 21#include <linux/gfs2_ondisk.h>
22#include <linux/lm_interface.h> 22#include <linux/lm_interface.h>
23#include <linux/swap.h> 23#include <linux/swap.h>
24#include <linux/pagevec.h>
24 25
25#include "gfs2.h" 26#include "gfs2.h"
26#include "incore.h" 27#include "incore.h"
@@ -189,6 +190,34 @@ static int gfs2_ordered_writepage(struct page *page,
189} 190}
190 191
191/** 192/**
193 * __gfs2_jdata_writepage - The core of jdata writepage
194 * @page: The page to write
195 * @wbc: The writeback control
196 *
197 * This is shared between writepage and writepages and implements the
198 * core of the writepage operation. If a transaction is required then
199 * PageChecked will have been set and the transaction will have
200 * already been started before this is called.
201 */
202
203static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc)
204{
205 struct inode *inode = page->mapping->host;
206 struct gfs2_inode *ip = GFS2_I(inode);
207 struct gfs2_sbd *sdp = GFS2_SB(inode);
208
209 if (PageChecked(page)) {
210 ClearPageChecked(page);
211 if (!page_has_buffers(page)) {
212 create_empty_buffers(page, inode->i_sb->s_blocksize,
213 (1 << BH_Dirty)|(1 << BH_Uptodate));
214 }
215 gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1);
216 }
217 return block_write_full_page(page, gfs2_get_block_noalloc, wbc);
218}
219
220/**
192 * gfs2_jdata_writepage - Write complete page 221 * gfs2_jdata_writepage - Write complete page
193 * @page: Page to write 222 * @page: Page to write
194 * 223 *
@@ -199,7 +228,6 @@ static int gfs2_ordered_writepage(struct page *page,
199static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc) 228static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc)
200{ 229{
201 struct inode *inode = page->mapping->host; 230 struct inode *inode = page->mapping->host;
202 struct gfs2_inode *ip = GFS2_I(inode);
203 struct gfs2_sbd *sdp = GFS2_SB(inode); 231 struct gfs2_sbd *sdp = GFS2_SB(inode);
204 int error; 232 int error;
205 int done_trans = 0; 233 int done_trans = 0;
@@ -209,18 +237,14 @@ static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc
209 return error; 237 return error;
210 238
211 if (PageChecked(page)) { 239 if (PageChecked(page)) {
240 if (wbc->sync_mode != WB_SYNC_ALL)
241 goto out_ignore;
212 error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0); 242 error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
213 if (error) 243 if (error)
214 goto out_ignore; 244 goto out_ignore;
215 ClearPageChecked(page);
216 if (!page_has_buffers(page)) {
217 create_empty_buffers(page, inode->i_sb->s_blocksize,
218 (1 << BH_Dirty)|(1 << BH_Uptodate));
219 }
220 gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1);
221 done_trans = 1; 245 done_trans = 1;
222 } 246 }
223 error = block_write_full_page(page, gfs2_get_block_noalloc, wbc); 247 error = __gfs2_jdata_writepage(page, wbc);
224 if (done_trans) 248 if (done_trans)
225 gfs2_trans_end(sdp); 249 gfs2_trans_end(sdp);
226 return error; 250 return error;
@@ -247,6 +271,178 @@ static int gfs2_writeback_writepages(struct address_space *mapping,
247} 271}
248 272
249/** 273/**
274 * gfs2_write_jdata_pagevec - Write back a pagevec's worth of pages
275 * @mapping: The mapping
276 * @wbc: The writeback control
277 * @writepage: The writepage function to call for each page
278 * @pvec: The vector of pages
279 * @nr_pages: The number of pages to write
280 *
281 * Returns: non-zero if loop should terminate, zero otherwise
282 */
283
284static int gfs2_write_jdata_pagevec(struct address_space *mapping,
285 struct writeback_control *wbc,
286 struct pagevec *pvec,
287 int nr_pages, pgoff_t end)
288{
289 struct inode *inode = mapping->host;
290 struct gfs2_sbd *sdp = GFS2_SB(inode);
291 loff_t i_size = i_size_read(inode);
292 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
293 unsigned offset = i_size & (PAGE_CACHE_SIZE-1);
294 unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize);
295 struct backing_dev_info *bdi = mapping->backing_dev_info;
296 int i;
297 int ret;
298
299 ret = gfs2_trans_begin(sdp, nrblocks, 0);
300 if (ret < 0)
301 return ret;
302
303 for(i = 0; i < nr_pages; i++) {
304 struct page *page = pvec->pages[i];
305
306 lock_page(page);
307
308 if (unlikely(page->mapping != mapping)) {
309 unlock_page(page);
310 continue;
311 }
312
313 if (!wbc->range_cyclic && page->index > end) {
314 ret = 1;
315 unlock_page(page);
316 continue;
317 }
318
319 if (wbc->sync_mode != WB_SYNC_NONE)
320 wait_on_page_writeback(page);
321
322 if (PageWriteback(page) ||
323 !clear_page_dirty_for_io(page)) {
324 unlock_page(page);
325 continue;
326 }
327
328 /* Is the page fully outside i_size? (truncate in progress) */
329 if (page->index > end_index || (page->index == end_index && !offset)) {
330 page->mapping->a_ops->invalidatepage(page, 0);
331 unlock_page(page);
332 continue;
333 }
334
335 ret = __gfs2_jdata_writepage(page, wbc);
336
337 if (ret || (--(wbc->nr_to_write) <= 0))
338 ret = 1;
339 if (wbc->nonblocking && bdi_write_congested(bdi)) {
340 wbc->encountered_congestion = 1;
341 ret = 1;
342 }
343
344 }
345 gfs2_trans_end(sdp);
346 return ret;
347}
348
349/**
350 * gfs2_write_cache_jdata - Like write_cache_pages but different
351 * @mapping: The mapping to write
352 * @wbc: The writeback control
353 * @writepage: The writepage function to call
354 * @data: The data to pass to writepage
355 *
356 * The reason that we use our own function here is that we need to
357 * start transactions before we grab page locks. This allows us
358 * to get the ordering right.
359 */
360
361static int gfs2_write_cache_jdata(struct address_space *mapping,
362 struct writeback_control *wbc)
363{
364 struct backing_dev_info *bdi = mapping->backing_dev_info;
365 int ret = 0;
366 int done = 0;
367 struct pagevec pvec;
368 int nr_pages;
369 pgoff_t index;
370 pgoff_t end;
371 int scanned = 0;
372 int range_whole = 0;
373
374 if (wbc->nonblocking && bdi_write_congested(bdi)) {
375 wbc->encountered_congestion = 1;
376 return 0;
377 }
378
379 pagevec_init(&pvec, 0);
380 if (wbc->range_cyclic) {
381 index = mapping->writeback_index; /* Start from prev offset */
382 end = -1;
383 } else {
384 index = wbc->range_start >> PAGE_CACHE_SHIFT;
385 end = wbc->range_end >> PAGE_CACHE_SHIFT;
386 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
387 range_whole = 1;
388 scanned = 1;
389 }
390
391retry:
392 while (!done && (index <= end) &&
393 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
394 PAGECACHE_TAG_DIRTY,
395 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
396 scanned = 1;
397 ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, end);
398 if (ret)
399 done = 1;
400 if (ret > 0)
401 ret = 0;
402
403 pagevec_release(&pvec);
404 cond_resched();
405 }
406
407 if (!scanned && !done) {
408 /*
409 * We hit the last page and there is more work to be done: wrap
410 * back to the start of the file
411 */
412 scanned = 1;
413 index = 0;
414 goto retry;
415 }
416
417 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
418 mapping->writeback_index = index;
419 return ret;
420}
421
422
423/**
424 * gfs2_jdata_writepages - Write a bunch of dirty pages back to disk
425 * @mapping: The mapping to write
426 * @wbc: The writeback control
427 *
428 */
429
430static int gfs2_jdata_writepages(struct address_space *mapping,
431 struct writeback_control *wbc)
432{
433 struct gfs2_inode *ip = GFS2_I(mapping->host);
434 struct gfs2_sbd *sdp = GFS2_SB(mapping->host);
435 int ret;
436
437 ret = gfs2_write_cache_jdata(mapping, wbc);
438 if (ret == 0 && wbc->sync_mode == WB_SYNC_ALL) {
439 gfs2_log_flush(sdp, ip->i_gl);
440 ret = gfs2_write_cache_jdata(mapping, wbc);
441 }
442 return ret;
443}
444
445/**
250 * stuffed_readpage - Fill in a Linux page with stuffed file data 446 * stuffed_readpage - Fill in a Linux page with stuffed file data
251 * @ip: the inode 447 * @ip: the inode
252 * @page: the page 448 * @page: the page
@@ -937,6 +1133,7 @@ static const struct address_space_operations gfs2_ordered_aops = {
937 1133
938static const struct address_space_operations gfs2_jdata_aops = { 1134static const struct address_space_operations gfs2_jdata_aops = {
939 .writepage = gfs2_jdata_writepage, 1135 .writepage = gfs2_jdata_writepage,
1136 .writepages = gfs2_jdata_writepages,
940 .readpage = gfs2_readpage, 1137 .readpage = gfs2_readpage,
941 .readpages = gfs2_readpages, 1138 .readpages = gfs2_readpages,
942 .sync_page = block_sync_page, 1139 .sync_page = block_sync_page,