aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCorrado Zoccolo <czoccolo@gmail.com>2010-09-20 09:24:50 -0400
committerJens Axboe <jaxboe@fusionio.com>2010-09-20 09:24:50 -0400
commit749ef9f8423054e326f3a246327ed2db4b6d395f (patch)
tree388df763e4e731f5d9b4d8dcaedca641521571e3
parent6d0aed7a38d06284db2a0e46c0a072b0c1c3299b (diff)
cfq: improve fsync performance for small files
Fsync performance for small files achieved by cfq on high-end disks is lower than what deadline can achieve, due to idling introduced between the sync write happening in process context and the journal commit. Moreover, when competing with a sequential reader, a process writing small files and fsync-ing them is starved. This patch fixes the two problems by: - marking journal commits as WRITE_SYNC, so that they get the REQ_NOIDLE flag set, - force all queues that have REQ_NOIDLE requests to be put in the noidle tree. Having the queue associated to the fsync-ing process and the one associated to journal commits in the noidle tree allows: - switching between them without idling, - fairness vs. competing idling queues, since they will be serviced only after the noidle tree expires its slice. Acked-by: Vivek Goyal <vgoyal@redhat.com> Reviewed-by: Jeff Moyer <jmoyer@redhat.com> Tested-by: Jeff Moyer <jmoyer@redhat.com> Signed-off-by: Corrado Zoccolo <czoccolo@gmail.com> Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
-rw-r--r--block/cfq-iosched.c18
-rw-r--r--fs/jbd/commit.c2
-rw-r--r--fs/jbd2/commit.c2
3 files changed, 6 insertions, 16 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index b9f86190763b..684592621736 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -216,7 +216,6 @@ struct cfq_data {
216 enum wl_type_t serving_type; 216 enum wl_type_t serving_type;
217 unsigned long workload_expires; 217 unsigned long workload_expires;
218 struct cfq_group *serving_group; 218 struct cfq_group *serving_group;
219 bool noidle_tree_requires_idle;
220 219
221 /* 220 /*
222 * Each priority tree is sorted by next_request position. These 221 * Each priority tree is sorted by next_request position. These
@@ -2126,7 +2125,6 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
2126 slice = max_t(unsigned, slice, CFQ_MIN_TT); 2125 slice = max_t(unsigned, slice, CFQ_MIN_TT);
2127 cfq_log(cfqd, "workload slice:%d", slice); 2126 cfq_log(cfqd, "workload slice:%d", slice);
2128 cfqd->workload_expires = jiffies + slice; 2127 cfqd->workload_expires = jiffies + slice;
2129 cfqd->noidle_tree_requires_idle = false;
2130} 2128}
2131 2129
2132static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd) 2130static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
@@ -3108,7 +3106,9 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
3108 if (cfqq->queued[0] + cfqq->queued[1] >= 4) 3106 if (cfqq->queued[0] + cfqq->queued[1] >= 4)
3109 cfq_mark_cfqq_deep(cfqq); 3107 cfq_mark_cfqq_deep(cfqq);
3110 3108
3111 if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || 3109 if (cfqq->next_rq && (cfqq->next_rq->cmd_flags & REQ_NOIDLE))
3110 enable_idle = 0;
3111 else if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
3112 (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq))) 3112 (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq)))
3113 enable_idle = 0; 3113 enable_idle = 0;
3114 else if (sample_valid(cic->ttime_samples)) { 3114 else if (sample_valid(cic->ttime_samples)) {
@@ -3421,17 +3421,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
3421 cfq_slice_expired(cfqd, 1); 3421 cfq_slice_expired(cfqd, 1);
3422 else if (sync && cfqq_empty && 3422 else if (sync && cfqq_empty &&
3423 !cfq_close_cooperator(cfqd, cfqq)) { 3423 !cfq_close_cooperator(cfqd, cfqq)) {
3424 cfqd->noidle_tree_requires_idle |= 3424 cfq_arm_slice_timer(cfqd);
3425 !(rq->cmd_flags & REQ_NOIDLE);
3426 /*
3427 * Idling is enabled for SYNC_WORKLOAD.
3428 * SYNC_NOIDLE_WORKLOAD idles at the end of the tree
3429 * only if we processed at least one !REQ_NOIDLE request
3430 */
3431 if (cfqd->serving_type == SYNC_WORKLOAD
3432 || cfqd->noidle_tree_requires_idle
3433 || cfqq->cfqg->nr_cfqq == 1)
3434 cfq_arm_slice_timer(cfqd);
3435 } 3425 }
3436 } 3426 }
3437 3427
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 95d8c11c929e..3f030e9efea6 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -318,7 +318,7 @@ void journal_commit_transaction(journal_t *journal)
318 int first_tag = 0; 318 int first_tag = 0;
319 int tag_flag; 319 int tag_flag;
320 int i; 320 int i;
321 int write_op = WRITE; 321 int write_op = WRITE_SYNC;
322 322
323 /* 323 /*
324 * First job: lock down the current transaction and wait for 324 * First job: lock down the current transaction and wait for
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 7c068c189d80..80910f51d4b4 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -360,7 +360,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
360 int tag_bytes = journal_tag_bytes(journal); 360 int tag_bytes = journal_tag_bytes(journal);
361 struct buffer_head *cbh = NULL; /* For transactional checksums */ 361 struct buffer_head *cbh = NULL; /* For transactional checksums */
362 __u32 crc32_sum = ~0; 362 __u32 crc32_sum = ~0;
363 int write_op = WRITE; 363 int write_op = WRITE_SYNC;
364 364
365 /* 365 /*
366 * First job: lock down the current transaction and wait for 366 * First job: lock down the current transaction and wait for