aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteven Whitehouse <swhiteho@redhat.com>2013-01-28 04:30:07 -0500
committerSteven Whitehouse <swhiteho@redhat.com>2013-01-29 05:29:17 -0500
commit4513899092b3254b3539f92a65d2839afa1d50f6 (patch)
tree1080b7adfac648dacd2d4aa70643a6a456284492
parentd564053f074634e7a966359dc97d26900fa5f52d (diff)
GFS2: Use ->writepages for ordered writes
Instead of using a list of buffers to write ahead of the journal flush, this now uses a list of inodes and calls ->writepages via filemap_fdatawrite() in order to achieve the same thing. For most use cases this results in a shorter ordered write list, as well as much larger i/os being issued. The ordered write list is sorted by inode number before writing in order to retain the disk block ordering between inodes as per the previous code. The previous ordered write code used to conflict in its assumptions about how to write out the disk blocks with mpage_writepages() so that with this updated version we can also use mpage_writepages() for GFS2's ordered write, writepages implementation. So we will also send larger i/os from writeback too. Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
-rw-r--r--fs/gfs2/aops.c13
-rw-r--r--fs/gfs2/bmap.c2
-rw-r--r--fs/gfs2/incore.h3
-rw-r--r--fs/gfs2/log.c76
-rw-r--r--fs/gfs2/log.h12
-rw-r--r--fs/gfs2/ops_fstype.c1
-rw-r--r--fs/gfs2/super.c1
-rw-r--r--fs/gfs2/trans.c41
8 files changed, 79 insertions, 70 deletions
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 92340dd23bba..24f414f0ce61 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -230,16 +230,14 @@ out_ignore:
230} 230}
231 231
232/** 232/**
233 * gfs2_writeback_writepages - Write a bunch of dirty pages back to disk 233 * gfs2_writepages - Write a bunch of dirty pages back to disk
234 * @mapping: The mapping to write 234 * @mapping: The mapping to write
235 * @wbc: Write-back control 235 * @wbc: Write-back control
236 * 236 *
237 * For the data=writeback case we can already ignore buffer heads 237 * Used for both ordered and writeback modes.
238 * and write whole extents at once. This is a big reduction in the
239 * number of I/O requests we send and the bmap calls we make in this case.
240 */ 238 */
241static int gfs2_writeback_writepages(struct address_space *mapping, 239static int gfs2_writepages(struct address_space *mapping,
242 struct writeback_control *wbc) 240 struct writeback_control *wbc)
243{ 241{
244 return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc); 242 return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
245} 243}
@@ -1102,7 +1100,7 @@ cannot_release:
1102 1100
1103static const struct address_space_operations gfs2_writeback_aops = { 1101static const struct address_space_operations gfs2_writeback_aops = {
1104 .writepage = gfs2_writeback_writepage, 1102 .writepage = gfs2_writeback_writepage,
1105 .writepages = gfs2_writeback_writepages, 1103 .writepages = gfs2_writepages,
1106 .readpage = gfs2_readpage, 1104 .readpage = gfs2_readpage,
1107 .readpages = gfs2_readpages, 1105 .readpages = gfs2_readpages,
1108 .write_begin = gfs2_write_begin, 1106 .write_begin = gfs2_write_begin,
@@ -1118,6 +1116,7 @@ static const struct address_space_operations gfs2_writeback_aops = {
1118 1116
1119static const struct address_space_operations gfs2_ordered_aops = { 1117static const struct address_space_operations gfs2_ordered_aops = {
1120 .writepage = gfs2_ordered_writepage, 1118 .writepage = gfs2_ordered_writepage,
1119 .writepages = gfs2_writepages,
1121 .readpage = gfs2_readpage, 1120 .readpage = gfs2_readpage,
1122 .readpages = gfs2_readpages, 1121 .readpages = gfs2_readpages,
1123 .write_begin = gfs2_write_begin, 1122 .write_begin = gfs2_write_begin,
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 7a8627569a25..d29d7793b211 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -22,6 +22,7 @@
22#include "meta_io.h" 22#include "meta_io.h"
23#include "quota.h" 23#include "quota.h"
24#include "rgrp.h" 24#include "rgrp.h"
25#include "log.h"
25#include "super.h" 26#include "super.h"
26#include "trans.h" 27#include "trans.h"
27#include "dir.h" 28#include "dir.h"
@@ -1137,6 +1138,7 @@ static int trunc_end(struct gfs2_inode *ip)
1137 ip->i_height = 0; 1138 ip->i_height = 0;
1138 ip->i_goal = ip->i_no_addr; 1139 ip->i_goal = ip->i_no_addr;
1139 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 1140 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
1141 gfs2_ordered_del_inode(ip);
1140 } 1142 }
1141 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1143 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
1142 ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG; 1144 ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 19750bcb1ce7..1533cf8b4269 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -340,6 +340,7 @@ enum {
340 GIF_QD_LOCKED = 1, 340 GIF_QD_LOCKED = 1,
341 GIF_ALLOC_FAILED = 2, 341 GIF_ALLOC_FAILED = 2,
342 GIF_SW_PAGED = 3, 342 GIF_SW_PAGED = 3,
343 GIF_ORDERED = 4,
343}; 344};
344 345
345struct gfs2_inode { 346struct gfs2_inode {
@@ -356,6 +357,7 @@ struct gfs2_inode {
356 struct gfs2_rgrpd *i_rgd; 357 struct gfs2_rgrpd *i_rgd;
357 u64 i_goal; /* goal block for allocations */ 358 u64 i_goal; /* goal block for allocations */
358 struct rw_semaphore i_rw_mutex; 359 struct rw_semaphore i_rw_mutex;
360 struct list_head i_ordered;
359 struct list_head i_trunc_list; 361 struct list_head i_trunc_list;
360 __be64 *i_hash_cache; 362 __be64 *i_hash_cache;
361 u32 i_entries; 363 u32 i_entries;
@@ -722,6 +724,7 @@ struct gfs2_sbd {
722 struct list_head sd_log_le_revoke; 724 struct list_head sd_log_le_revoke;
723 struct list_head sd_log_le_databuf; 725 struct list_head sd_log_le_databuf;
724 struct list_head sd_log_le_ordered; 726 struct list_head sd_log_le_ordered;
727 spinlock_t sd_ordered_lock;
725 728
726 atomic_t sd_log_thresh1; 729 atomic_t sd_log_thresh1;
727 atomic_t sd_log_thresh2; 730 atomic_t sd_log_thresh2;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index f4beeb9c81c1..9a2ca8be7647 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -482,70 +482,66 @@ static void log_flush_wait(struct gfs2_sbd *sdp)
482 } 482 }
483} 483}
484 484
485static int bd_cmp(void *priv, struct list_head *a, struct list_head *b) 485static int ip_cmp(void *priv, struct list_head *a, struct list_head *b)
486{ 486{
487 struct gfs2_bufdata *bda, *bdb; 487 struct gfs2_inode *ipa, *ipb;
488 488
489 bda = list_entry(a, struct gfs2_bufdata, bd_list); 489 ipa = list_entry(a, struct gfs2_inode, i_ordered);
490 bdb = list_entry(b, struct gfs2_bufdata, bd_list); 490 ipb = list_entry(b, struct gfs2_inode, i_ordered);
491 491
492 if (bda->bd_bh->b_blocknr < bdb->bd_bh->b_blocknr) 492 if (ipa->i_no_addr < ipb->i_no_addr)
493 return -1; 493 return -1;
494 if (bda->bd_bh->b_blocknr > bdb->bd_bh->b_blocknr) 494 if (ipa->i_no_addr > ipb->i_no_addr)
495 return 1; 495 return 1;
496 return 0; 496 return 0;
497} 497}
498 498
499static void gfs2_ordered_write(struct gfs2_sbd *sdp) 499static void gfs2_ordered_write(struct gfs2_sbd *sdp)
500{ 500{
501 struct gfs2_bufdata *bd; 501 struct gfs2_inode *ip;
502 struct buffer_head *bh;
503 LIST_HEAD(written); 502 LIST_HEAD(written);
504 503
505 gfs2_log_lock(sdp); 504 spin_lock(&sdp->sd_ordered_lock);
506 list_sort(NULL, &sdp->sd_log_le_ordered, &bd_cmp); 505 list_sort(NULL, &sdp->sd_log_le_ordered, &ip_cmp);
507 while (!list_empty(&sdp->sd_log_le_ordered)) { 506 while (!list_empty(&sdp->sd_log_le_ordered)) {
508 bd = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_bufdata, bd_list); 507 ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered);
509 list_move(&bd->bd_list, &written); 508 list_move(&ip->i_ordered, &written);
510 bh = bd->bd_bh; 509 if (ip->i_inode.i_mapping->nrpages == 0)
511 if (!buffer_dirty(bh))
512 continue; 510 continue;
513 get_bh(bh); 511 spin_unlock(&sdp->sd_ordered_lock);
514 gfs2_log_unlock(sdp); 512 filemap_fdatawrite(ip->i_inode.i_mapping);
515 lock_buffer(bh); 513 spin_lock(&sdp->sd_ordered_lock);
516 if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) {
517 bh->b_end_io = end_buffer_write_sync;
518 submit_bh(WRITE_SYNC, bh);
519 } else {
520 unlock_buffer(bh);
521 brelse(bh);
522 }
523 gfs2_log_lock(sdp);
524 } 514 }
525 list_splice(&written, &sdp->sd_log_le_ordered); 515 list_splice(&written, &sdp->sd_log_le_ordered);
526 gfs2_log_unlock(sdp); 516 spin_unlock(&sdp->sd_ordered_lock);
527} 517}
528 518
529static void gfs2_ordered_wait(struct gfs2_sbd *sdp) 519static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
530{ 520{
531 struct gfs2_bufdata *bd; 521 struct gfs2_inode *ip;
532 struct buffer_head *bh;
533 522
534 gfs2_log_lock(sdp); 523 spin_lock(&sdp->sd_ordered_lock);
535 while (!list_empty(&sdp->sd_log_le_ordered)) { 524 while (!list_empty(&sdp->sd_log_le_ordered)) {
536 bd = list_entry(sdp->sd_log_le_ordered.prev, struct gfs2_bufdata, bd_list); 525 ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered);
537 bh = bd->bd_bh; 526 list_del(&ip->i_ordered);
538 if (buffer_locked(bh)) { 527 WARN_ON(!test_and_clear_bit(GIF_ORDERED, &ip->i_flags));
539 get_bh(bh); 528 if (ip->i_inode.i_mapping->nrpages == 0)
540 gfs2_log_unlock(sdp);
541 wait_on_buffer(bh);
542 brelse(bh);
543 gfs2_log_lock(sdp);
544 continue; 529 continue;
545 } 530 spin_unlock(&sdp->sd_ordered_lock);
546 list_del_init(&bd->bd_list); 531 filemap_fdatawait(ip->i_inode.i_mapping);
532 spin_lock(&sdp->sd_ordered_lock);
547 } 533 }
548 gfs2_log_unlock(sdp); 534 spin_unlock(&sdp->sd_ordered_lock);
535}
536
537void gfs2_ordered_del_inode(struct gfs2_inode *ip)
538{
539 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
540
541 spin_lock(&sdp->sd_ordered_lock);
542 if (test_and_clear_bit(GIF_ORDERED, &ip->i_flags))
543 list_del(&ip->i_ordered);
544 spin_unlock(&sdp->sd_ordered_lock);
549} 545}
550 546
551/** 547/**
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 3fd5215ea25f..3566f35915e0 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -48,6 +48,18 @@ static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp,
48 sdp->sd_log_head = sdp->sd_log_tail = value; 48 sdp->sd_log_head = sdp->sd_log_tail = value;
49} 49}
50 50
51static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip)
52{
53 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
54
55 if (!test_bit(GIF_ORDERED, &ip->i_flags)) {
56 spin_lock(&sdp->sd_ordered_lock);
57 if (!test_and_set_bit(GIF_ORDERED, &ip->i_flags))
58 list_add(&ip->i_ordered, &sdp->sd_log_le_ordered);
59 spin_unlock(&sdp->sd_ordered_lock);
60 }
61}
62extern void gfs2_ordered_del_inode(struct gfs2_inode *ip);
51extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct, 63extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
52 unsigned int ssize); 64 unsigned int ssize);
53 65
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 5f5aba529fb1..e063f22d9e4c 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -102,6 +102,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
102 INIT_LIST_HEAD(&sdp->sd_log_le_revoke); 102 INIT_LIST_HEAD(&sdp->sd_log_le_revoke);
103 INIT_LIST_HEAD(&sdp->sd_log_le_databuf); 103 INIT_LIST_HEAD(&sdp->sd_log_le_databuf);
104 INIT_LIST_HEAD(&sdp->sd_log_le_ordered); 104 INIT_LIST_HEAD(&sdp->sd_log_le_ordered);
105 spin_lock_init(&sdp->sd_ordered_lock);
105 106
106 init_waitqueue_head(&sdp->sd_log_waitq); 107 init_waitqueue_head(&sdp->sd_log_waitq);
107 init_waitqueue_head(&sdp->sd_logd_waitq); 108 init_waitqueue_head(&sdp->sd_logd_waitq);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index c075b62aef59..a3b40eeaa6e2 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1524,6 +1524,7 @@ out:
1524 /* Case 3 starts here */ 1524 /* Case 3 starts here */
1525 truncate_inode_pages(&inode->i_data, 0); 1525 truncate_inode_pages(&inode->i_data, 0);
1526 gfs2_rs_delete(ip); 1526 gfs2_rs_delete(ip);
1527 gfs2_ordered_del_inode(ip);
1527 clear_inode(inode); 1528 clear_inode(inode);
1528 gfs2_dir_hash_inval(ip); 1529 gfs2_dir_hash_inval(ip);
1529 ip->i_gl->gl_object = NULL; 1530 ip->i_gl->gl_object = NULL;
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 14dbf6d3cdc0..88162fae27a5 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -159,7 +159,9 @@ static struct gfs2_bufdata *gfs2_alloc_bufdata(struct gfs2_glock *gl,
159} 159}
160 160
161/** 161/**
162 * databuf_lo_add - Add a databuf to the transaction. 162 * gfs2_trans_add_data - Add a databuf to the transaction.
163 * @gl: The inode glock associated with the buffer
164 * @bh: The buffer to add
163 * 165 *
164 * This is used in two distinct cases: 166 * This is used in two distinct cases:
165 * i) In ordered write mode 167 * i) In ordered write mode
@@ -174,33 +176,18 @@ static struct gfs2_bufdata *gfs2_alloc_bufdata(struct gfs2_glock *gl,
174 * blocks, which isn't an enormous overhead but twice as much as 176 * blocks, which isn't an enormous overhead but twice as much as
175 * for normal metadata blocks. 177 * for normal metadata blocks.
176 */ 178 */
177static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) 179void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh)
178{ 180{
179 struct gfs2_trans *tr = current->journal_info; 181 struct gfs2_trans *tr = current->journal_info;
180 struct address_space *mapping = bd->bd_bh->b_page->mapping; 182 struct gfs2_sbd *sdp = gl->gl_sbd;
183 struct address_space *mapping = bh->b_page->mapping;
181 struct gfs2_inode *ip = GFS2_I(mapping->host); 184 struct gfs2_inode *ip = GFS2_I(mapping->host);
185 struct gfs2_bufdata *bd;
182 186
183 if (tr) 187 if (!gfs2_is_jdata(ip)) {
184 tr->tr_touched = 1; 188 gfs2_ordered_add_inode(ip);
185 if (!list_empty(&bd->bd_list))
186 return; 189 return;
187 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
188 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
189 if (gfs2_is_jdata(ip)) {
190 gfs2_pin(sdp, bd->bd_bh);
191 tr->tr_num_databuf_new++;
192 sdp->sd_log_num_databuf++;
193 list_add_tail(&bd->bd_list, &sdp->sd_log_le_databuf);
194 } else {
195 list_add_tail(&bd->bd_list, &sdp->sd_log_le_ordered);
196 } 190 }
197}
198
199void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh)
200{
201
202 struct gfs2_sbd *sdp = gl->gl_sbd;
203 struct gfs2_bufdata *bd;
204 191
205 lock_buffer(bh); 192 lock_buffer(bh);
206 gfs2_log_lock(sdp); 193 gfs2_log_lock(sdp);
@@ -214,7 +201,15 @@ void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh)
214 gfs2_log_lock(sdp); 201 gfs2_log_lock(sdp);
215 } 202 }
216 gfs2_assert(sdp, bd->bd_gl == gl); 203 gfs2_assert(sdp, bd->bd_gl == gl);
217 databuf_lo_add(sdp, bd); 204 tr->tr_touched = 1;
205 if (list_empty(&bd->bd_list)) {
206 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
207 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
208 gfs2_pin(sdp, bd->bd_bh);
209 tr->tr_num_databuf_new++;
210 sdp->sd_log_num_databuf++;
211 list_add_tail(&bd->bd_list, &sdp->sd_log_le_databuf);
212 }
218 gfs2_log_unlock(sdp); 213 gfs2_log_unlock(sdp);
219 unlock_buffer(bh); 214 unlock_buffer(bh);
220} 215}