aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Morton <akpm@osdl.org>2006-02-05 02:27:54 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-02-05 14:06:53 -0500
commitfe1dcbc4f311c2e6c23b33c0fa8572461618ab3e (patch)
tree189e935ff275bba20629e46e2832bd523acf6cff
parentbc5e483da61eb5ab8d24b4a919fb512e5886d02c (diff)
[PATCH] jbd: fix transaction batching
Ben points out that: When writing files out using O_SYNC, jbd's 1 jiffy delay results in a significant drop in throughput as the disk sits idle. The patch below results in a 4-5x performance improvement (from 6.5MB/s to ~24-30MB/s on my IDE test box) when writing out files using O_SYNC. So optimise the batching code by omitting it entirely if the process which is doing a sync write is the same as the one which did the most recent sync write. If that's true, we're unlikely to get any other processes joining the transaction. (Has been in -mm for ages - it took me a long time to get on to performance testing it) Numbers, on write-cache-disabled IDE: /usr/bin/time -p synctest -n 10 -uf -t 1 -p 1 dir-name Unpatched: 40 seconds Patched: 35 seconds Batching disabled: 35 seconds This is the problematic single-process-doing-fsync case. With multiple fsyncing processes the numbers are AFACIT unaltered by the patch. Aside: performance testing and instrumentation shows that the transaction batching almost doesn't help (testing with synctest -n 1 -uf -t 100 -p 10 dir-name on non-writeback-caching IDE). This is because by the time one process is running a synchronous commit, a bunch of other processes already have a transaction handle open, so they're all going to batch into the same transaction anyway. The batching seems to offer maybe 5-10% speedup with this workload, but I'm pretty sure it was more important than that when it was first developed 4-odd years ago... Cc: "Stephen C. Tweedie" <sct@redhat.com> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--fs/jbd/transaction.c10
-rw-r--r--include/linux/jbd.h4
2 files changed, 13 insertions, 1 deletions
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 429f4b263cf1..ca917973c2c0 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1308,6 +1308,7 @@ int journal_stop(handle_t *handle)
1308 transaction_t *transaction = handle->h_transaction; 1308 transaction_t *transaction = handle->h_transaction;
1309 journal_t *journal = transaction->t_journal; 1309 journal_t *journal = transaction->t_journal;
1310 int old_handle_count, err; 1310 int old_handle_count, err;
1311 pid_t pid;
1311 1312
1312 J_ASSERT(transaction->t_updates > 0); 1313 J_ASSERT(transaction->t_updates > 0);
1313 J_ASSERT(journal_current_handle() == handle); 1314 J_ASSERT(journal_current_handle() == handle);
@@ -1333,8 +1334,15 @@ int journal_stop(handle_t *handle)
1333 * It doesn't cost much - we're about to run a commit and sleep 1334 * It doesn't cost much - we're about to run a commit and sleep
1334 * on IO anyway. Speeds up many-threaded, many-dir operations 1335 * on IO anyway. Speeds up many-threaded, many-dir operations
1335 * by 30x or more... 1336 * by 30x or more...
1337 *
1338 * But don't do this if this process was the most recent one to
1339 * perform a synchronous write. We do this to detect the case where a
1340 * single process is doing a stream of sync writes. No point in waiting
1341 * for joiners in that case.
1336 */ 1342 */
1337 if (handle->h_sync) { 1343 pid = current->pid;
1344 if (handle->h_sync && journal->j_last_sync_writer != pid) {
1345 journal->j_last_sync_writer = pid;
1338 do { 1346 do {
1339 old_handle_count = transaction->t_handle_count; 1347 old_handle_count = transaction->t_handle_count;
1340 schedule_timeout_uninterruptible(1); 1348 schedule_timeout_uninterruptible(1);
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 558cb4c26ec9..751bb3849467 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -23,6 +23,7 @@
23#define jfs_debug jbd_debug 23#define jfs_debug jbd_debug
24#else 24#else
25 25
26#include <linux/types.h>
26#include <linux/buffer_head.h> 27#include <linux/buffer_head.h>
27#include <linux/journal-head.h> 28#include <linux/journal-head.h>
28#include <linux/stddef.h> 29#include <linux/stddef.h>
@@ -618,6 +619,7 @@ struct transaction_s
618 * @j_wbuf: array of buffer_heads for journal_commit_transaction 619 * @j_wbuf: array of buffer_heads for journal_commit_transaction
619 * @j_wbufsize: maximum number of buffer_heads allowed in j_wbuf, the 620 * @j_wbufsize: maximum number of buffer_heads allowed in j_wbuf, the
620 * number that will fit in j_blocksize 621 * number that will fit in j_blocksize
622 * @j_last_sync_writer: most recent pid which did a synchronous write
621 * @j_private: An opaque pointer to fs-private information. 623 * @j_private: An opaque pointer to fs-private information.
622 */ 624 */
623 625
@@ -807,6 +809,8 @@ struct journal_s
807 struct buffer_head **j_wbuf; 809 struct buffer_head **j_wbuf;
808 int j_wbufsize; 810 int j_wbufsize;
809 811
812 pid_t j_last_sync_writer;
813
810 /* 814 /*
811 * An opaque pointer to fs-private information. ext3 puts its 815 * An opaque pointer to fs-private information. ext3 puts its
812 * superblock pointer here 816 * superblock pointer here