aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd2
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2015-06-08 12:46:37 -0400
committerTheodore Ts'o <tytso@mit.edu>2015-06-08 12:46:37 -0400
commitde92c8caf16ca84926fa31b7a5590c0fb9c0d5ca (patch)
tree3fda30d72edf93965320abb2d3a689b04fbb9f46 /fs/jbd2
parent8b00f400eedf91d074f831077003c0d4d9147377 (diff)
jbd2: speedup jbd2_journal_get_[write|undo]_access()
jbd2_journal_get_write_access() and jbd2_journal_get_create_access() are frequently called for buffers that are already part of the running transaction - most frequently it is the case for bitmaps, inode table blocks, and superblock. Since in such cases we have nothing to do, it is unfortunate we still grab reference to journal head, lock the bh, lock bh_state only to find out there's nothing to do. Improving this is a bit subtle though since until we find out journal head is attached to the running transaction, it can disappear from under us because checkpointing / commit decided it's no longer needed. We deal with this by protecting journal_head slab with RCU. We still have to be careful about journal head being freed & reallocated within slab and about exposing journal head in consistent state (in particular b_modified and b_frozen_data must be in correct state before we allow user to touch the buffer). Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Diffstat (limited to 'fs/jbd2')
-rw-r--r--fs/jbd2/journal.c2
-rw-r--r--fs/jbd2/transaction.c76
2 files changed, 73 insertions, 5 deletions
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 0bc333b4a594..303ccd953e95 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2321,7 +2321,7 @@ static int jbd2_journal_init_journal_head_cache(void)
2321 jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head", 2321 jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head",
2322 sizeof(struct journal_head), 2322 sizeof(struct journal_head),
2323 0, /* offset */ 2323 0, /* offset */
2324 SLAB_TEMPORARY, /* flags */ 2324 SLAB_TEMPORARY | SLAB_DESTROY_BY_RCU,
2325 NULL); /* ctor */ 2325 NULL); /* ctor */
2326 retval = 0; 2326 retval = 0;
2327 if (!jbd2_journal_head_cache) { 2327 if (!jbd2_journal_head_cache) {
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 1bbcf86499c9..f3d06174b051 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -892,6 +892,12 @@ repeat:
892 JBUFFER_TRACE(jh, "no transaction"); 892 JBUFFER_TRACE(jh, "no transaction");
893 J_ASSERT_JH(jh, !jh->b_next_transaction); 893 J_ASSERT_JH(jh, !jh->b_next_transaction);
894 JBUFFER_TRACE(jh, "file as BJ_Reserved"); 894 JBUFFER_TRACE(jh, "file as BJ_Reserved");
895 /*
896 * Make sure all stores to jh (b_modified, b_frozen_data) are
897 * visible before attaching it to the running transaction.
898 * Paired with barrier in jbd2_write_access_granted()
899 */
900 smp_wmb();
895 spin_lock(&journal->j_list_lock); 901 spin_lock(&journal->j_list_lock);
896 __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); 902 __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
897 spin_unlock(&journal->j_list_lock); 903 spin_unlock(&journal->j_list_lock);
@@ -904,8 +910,7 @@ repeat:
904 if (jh->b_frozen_data) { 910 if (jh->b_frozen_data) {
905 JBUFFER_TRACE(jh, "has frozen data"); 911 JBUFFER_TRACE(jh, "has frozen data");
906 J_ASSERT_JH(jh, jh->b_next_transaction == NULL); 912 J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
907 jh->b_next_transaction = transaction; 913 goto attach_next;
908 goto done;
909 } 914 }
910 915
911 JBUFFER_TRACE(jh, "owned by older transaction"); 916 JBUFFER_TRACE(jh, "owned by older transaction");
@@ -959,6 +964,13 @@ repeat:
959 frozen_buffer = NULL; 964 frozen_buffer = NULL;
960 jbd2_freeze_jh_data(jh); 965 jbd2_freeze_jh_data(jh);
961 } 966 }
967attach_next:
968 /*
969 * Make sure all stores to jh (b_modified, b_frozen_data) are visible
970 * before attaching it to the running transaction. Paired with barrier
971 * in jbd2_write_access_granted()
972 */
973 smp_wmb();
962 jh->b_next_transaction = transaction; 974 jh->b_next_transaction = transaction;
963 975
964done: 976done:
@@ -978,6 +990,55 @@ out:
978 return error; 990 return error;
979} 991}
980 992
993/* Fast check whether buffer is already attached to the required transaction */
994static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh)
995{
996 struct journal_head *jh;
997 bool ret = false;
998
999 /* Dirty buffers require special handling... */
1000 if (buffer_dirty(bh))
1001 return false;
1002
1003 /*
1004 * RCU protects us from dereferencing freed pages. So the checks we do
1005 * are guaranteed not to oops. However the jh slab object can get freed
1006 * & reallocated while we work with it. So we have to be careful. When
1007 * we see jh attached to the running transaction, we know it must stay
1008 * so until the transaction is committed. Thus jh won't be freed and
1009 * will be attached to the same bh while we run. However it can
1010 * happen jh gets freed, reallocated, and attached to the transaction
1011 * just after we get pointer to it from bh. So we have to be careful
1012 * and recheck jh still belongs to our bh before we return success.
1013 */
1014 rcu_read_lock();
1015 if (!buffer_jbd(bh))
1016 goto out;
1017 /* This should be bh2jh() but that doesn't work with inline functions */
1018 jh = READ_ONCE(bh->b_private);
1019 if (!jh)
1020 goto out;
1021 if (jh->b_transaction != handle->h_transaction &&
1022 jh->b_next_transaction != handle->h_transaction)
1023 goto out;
1024 /*
1025 * There are two reasons for the barrier here:
1026 * 1) Make sure to fetch b_bh after we did previous checks so that we
1027 * detect when jh went through free, realloc, attach to transaction
1028 * while we were checking. Paired with implicit barrier in that path.
1029 * 2) So that access to bh done after jbd2_write_access_granted()
1030 * doesn't get reordered and see inconsistent state of concurrent
1031 * do_get_write_access().
1032 */
1033 smp_mb();
1034 if (unlikely(jh->b_bh != bh))
1035 goto out;
1036 ret = true;
1037out:
1038 rcu_read_unlock();
1039 return ret;
1040}
1041
981/** 1042/**
982 * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update. 1043 * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
983 * @handle: transaction to add buffer modifications to 1044 * @handle: transaction to add buffer modifications to
@@ -991,9 +1052,13 @@ out:
991 1052
992int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh) 1053int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh)
993{ 1054{
994 struct journal_head *jh = jbd2_journal_add_journal_head(bh); 1055 struct journal_head *jh;
995 int rc; 1056 int rc;
996 1057
1058 if (jbd2_write_access_granted(handle, bh))
1059 return 0;
1060
1061 jh = jbd2_journal_add_journal_head(bh);
997 /* We do not want to get caught playing with fields which the 1062 /* We do not want to get caught playing with fields which the
998 * log thread also manipulates. Make sure that the buffer 1063 * log thread also manipulates. Make sure that the buffer
999 * completes any outstanding IO before proceeding. */ 1064 * completes any outstanding IO before proceeding. */
@@ -1123,11 +1188,14 @@ out:
1123int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh) 1188int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
1124{ 1189{
1125 int err; 1190 int err;
1126 struct journal_head *jh = jbd2_journal_add_journal_head(bh); 1191 struct journal_head *jh;
1127 char *committed_data = NULL; 1192 char *committed_data = NULL;
1128 1193
1129 JBUFFER_TRACE(jh, "entry"); 1194 JBUFFER_TRACE(jh, "entry");
1195 if (jbd2_write_access_granted(handle, bh))
1196 return 0;
1130 1197
1198 jh = jbd2_journal_add_journal_head(bh);
1131 /* 1199 /*
1132 * Do this first --- it can drop the journal lock, so we want to 1200 * Do this first --- it can drop the journal lock, so we want to
1133 * make sure that obtaining the committed_data is done 1201 * make sure that obtaining the committed_data is done