diff options
author | Corrado Zoccolo <czoccolo@gmail.com> | 2010-09-20 09:24:50 -0400 |
---|---|---|
committer | Jens Axboe <jaxboe@fusionio.com> | 2010-09-20 09:24:50 -0400 |
commit | 749ef9f8423054e326f3a246327ed2db4b6d395f (patch) | |
tree | 388df763e4e731f5d9b4d8dcaedca641521571e3 | |
parent | 6d0aed7a38d06284db2a0e46c0a072b0c1c3299b (diff) |
cfq: improve fsync performance for small files
Fsync performance for small files achieved by cfq on high-end disks is
lower than what deadline can achieve, due to idling introduced between
the sync write happening in process context and the journal commit.
Moreover, when competing with a sequential reader, a process writing
small files and fsync-ing them is starved.
This patch fixes the two problems by:
- marking journal commits as WRITE_SYNC, so that they get the REQ_NOIDLE
flag set,
- force all queues that have REQ_NOIDLE requests to be put in the noidle
tree.
Having the queue associated to the fsync-ing process and the one associated
to journal commits in the noidle tree allows:
- switching between them without idling,
- fairness vs. competing idling queues, since they will be serviced only
after the noidle tree expires its slice.
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Tested-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Corrado Zoccolo <czoccolo@gmail.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
-rw-r--r-- | block/cfq-iosched.c | 18 | ||||
-rw-r--r-- | fs/jbd/commit.c | 2 | ||||
-rw-r--r-- | fs/jbd2/commit.c | 2 |
3 files changed, 6 insertions, 16 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index b9f86190763b..684592621736 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -216,7 +216,6 @@ struct cfq_data { | |||
216 | enum wl_type_t serving_type; | 216 | enum wl_type_t serving_type; |
217 | unsigned long workload_expires; | 217 | unsigned long workload_expires; |
218 | struct cfq_group *serving_group; | 218 | struct cfq_group *serving_group; |
219 | bool noidle_tree_requires_idle; | ||
220 | 219 | ||
221 | /* | 220 | /* |
222 | * Each priority tree is sorted by next_request position. These | 221 | * Each priority tree is sorted by next_request position. These |
@@ -2126,7 +2125,6 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
2126 | slice = max_t(unsigned, slice, CFQ_MIN_TT); | 2125 | slice = max_t(unsigned, slice, CFQ_MIN_TT); |
2127 | cfq_log(cfqd, "workload slice:%d", slice); | 2126 | cfq_log(cfqd, "workload slice:%d", slice); |
2128 | cfqd->workload_expires = jiffies + slice; | 2127 | cfqd->workload_expires = jiffies + slice; |
2129 | cfqd->noidle_tree_requires_idle = false; | ||
2130 | } | 2128 | } |
2131 | 2129 | ||
2132 | static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd) | 2130 | static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd) |
@@ -3108,7 +3106,9 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
3108 | if (cfqq->queued[0] + cfqq->queued[1] >= 4) | 3106 | if (cfqq->queued[0] + cfqq->queued[1] >= 4) |
3109 | cfq_mark_cfqq_deep(cfqq); | 3107 | cfq_mark_cfqq_deep(cfqq); |
3110 | 3108 | ||
3111 | if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || | 3109 | if (cfqq->next_rq && (cfqq->next_rq->cmd_flags & REQ_NOIDLE)) |
3110 | enable_idle = 0; | ||
3111 | else if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || | ||
3112 | (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq))) | 3112 | (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq))) |
3113 | enable_idle = 0; | 3113 | enable_idle = 0; |
3114 | else if (sample_valid(cic->ttime_samples)) { | 3114 | else if (sample_valid(cic->ttime_samples)) { |
@@ -3421,17 +3421,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) | |||
3421 | cfq_slice_expired(cfqd, 1); | 3421 | cfq_slice_expired(cfqd, 1); |
3422 | else if (sync && cfqq_empty && | 3422 | else if (sync && cfqq_empty && |
3423 | !cfq_close_cooperator(cfqd, cfqq)) { | 3423 | !cfq_close_cooperator(cfqd, cfqq)) { |
3424 | cfqd->noidle_tree_requires_idle |= | 3424 | cfq_arm_slice_timer(cfqd); |
3425 | !(rq->cmd_flags & REQ_NOIDLE); | ||
3426 | /* | ||
3427 | * Idling is enabled for SYNC_WORKLOAD. | ||
3428 | * SYNC_NOIDLE_WORKLOAD idles at the end of the tree | ||
3429 | * only if we processed at least one !REQ_NOIDLE request | ||
3430 | */ | ||
3431 | if (cfqd->serving_type == SYNC_WORKLOAD | ||
3432 | || cfqd->noidle_tree_requires_idle | ||
3433 | || cfqq->cfqg->nr_cfqq == 1) | ||
3434 | cfq_arm_slice_timer(cfqd); | ||
3435 | } | 3425 | } |
3436 | } | 3426 | } |
3437 | 3427 | ||
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 95d8c11c929e..3f030e9efea6 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
@@ -318,7 +318,7 @@ void journal_commit_transaction(journal_t *journal) | |||
318 | int first_tag = 0; | 318 | int first_tag = 0; |
319 | int tag_flag; | 319 | int tag_flag; |
320 | int i; | 320 | int i; |
321 | int write_op = WRITE; | 321 | int write_op = WRITE_SYNC; |
322 | 322 | ||
323 | /* | 323 | /* |
324 | * First job: lock down the current transaction and wait for | 324 | * First job: lock down the current transaction and wait for |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 7c068c189d80..80910f51d4b4 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -360,7 +360,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
360 | int tag_bytes = journal_tag_bytes(journal); | 360 | int tag_bytes = journal_tag_bytes(journal); |
361 | struct buffer_head *cbh = NULL; /* For transactional checksums */ | 361 | struct buffer_head *cbh = NULL; /* For transactional checksums */ |
362 | __u32 crc32_sum = ~0; | 362 | __u32 crc32_sum = ~0; |
363 | int write_op = WRITE; | 363 | int write_op = WRITE_SYNC; |
364 | 364 | ||
365 | /* | 365 | /* |
366 | * First job: lock down the current transaction and wait for | 366 | * First job: lock down the current transaction and wait for |