diff options
Diffstat (limited to 'fs/jbd2/transaction.c')
-rw-r--r-- | fs/jbd2/transaction.c | 217 |
1 files changed, 6 insertions, 211 deletions
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 98b596d23705..4f7cadbb19fa 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -943,183 +943,6 @@ out: | |||
943 | } | 943 | } |
944 | 944 | ||
945 | /** | 945 | /** |
946 | * int jbd2_journal_dirty_data() - mark a buffer as containing dirty data which | ||
947 | * needs to be flushed before we can commit the | ||
948 | * current transaction. | ||
949 | * @handle: transaction | ||
950 | * @bh: bufferhead to mark | ||
951 | * | ||
952 | * The buffer is placed on the transaction's data list and is marked as | ||
953 | * belonging to the transaction. | ||
954 | * | ||
955 | * Returns error number or 0 on success. | ||
956 | * | ||
957 | * jbd2_journal_dirty_data() can be called via page_launder->ext3_writepage | ||
958 | * by kswapd. | ||
959 | */ | ||
960 | int jbd2_journal_dirty_data(handle_t *handle, struct buffer_head *bh) | ||
961 | { | ||
962 | journal_t *journal = handle->h_transaction->t_journal; | ||
963 | int need_brelse = 0; | ||
964 | struct journal_head *jh; | ||
965 | |||
966 | if (is_handle_aborted(handle)) | ||
967 | return 0; | ||
968 | |||
969 | jh = jbd2_journal_add_journal_head(bh); | ||
970 | JBUFFER_TRACE(jh, "entry"); | ||
971 | |||
972 | /* | ||
973 | * The buffer could *already* be dirty. Writeout can start | ||
974 | * at any time. | ||
975 | */ | ||
976 | jbd_debug(4, "jh: %p, tid:%d\n", jh, handle->h_transaction->t_tid); | ||
977 | |||
978 | /* | ||
979 | * What if the buffer is already part of a running transaction? | ||
980 | * | ||
981 | * There are two cases: | ||
982 | * 1) It is part of the current running transaction. Refile it, | ||
983 | * just in case we have allocated it as metadata, deallocated | ||
984 | * it, then reallocated it as data. | ||
985 | * 2) It is part of the previous, still-committing transaction. | ||
986 | * If all we want to do is to guarantee that the buffer will be | ||
987 | * written to disk before this new transaction commits, then | ||
988 | * being sure that the *previous* transaction has this same | ||
989 | * property is sufficient for us! Just leave it on its old | ||
990 | * transaction. | ||
991 | * | ||
992 | * In case (2), the buffer must not already exist as metadata | ||
993 | * --- that would violate write ordering (a transaction is free | ||
994 | * to write its data at any point, even before the previous | ||
995 | * committing transaction has committed). The caller must | ||
996 | * never, ever allow this to happen: there's nothing we can do | ||
997 | * about it in this layer. | ||
998 | */ | ||
999 | jbd_lock_bh_state(bh); | ||
1000 | spin_lock(&journal->j_list_lock); | ||
1001 | |||
1002 | /* Now that we have bh_state locked, are we really still mapped? */ | ||
1003 | if (!buffer_mapped(bh)) { | ||
1004 | JBUFFER_TRACE(jh, "unmapped buffer, bailing out"); | ||
1005 | goto no_journal; | ||
1006 | } | ||
1007 | |||
1008 | if (jh->b_transaction) { | ||
1009 | JBUFFER_TRACE(jh, "has transaction"); | ||
1010 | if (jh->b_transaction != handle->h_transaction) { | ||
1011 | JBUFFER_TRACE(jh, "belongs to older transaction"); | ||
1012 | J_ASSERT_JH(jh, jh->b_transaction == | ||
1013 | journal->j_committing_transaction); | ||
1014 | |||
1015 | /* @@@ IS THIS TRUE ? */ | ||
1016 | /* | ||
1017 | * Not any more. Scenario: someone does a write() | ||
1018 | * in data=journal mode. The buffer's transaction has | ||
1019 | * moved into commit. Then someone does another | ||
1020 | * write() to the file. We do the frozen data copyout | ||
1021 | * and set b_next_transaction to point to j_running_t. | ||
1022 | * And while we're in that state, someone does a | ||
1023 | * writepage() in an attempt to pageout the same area | ||
1024 | * of the file via a shared mapping. At present that | ||
1025 | * calls jbd2_journal_dirty_data(), and we get right here. | ||
1026 | * It may be too late to journal the data. Simply | ||
1027 | * falling through to the next test will suffice: the | ||
1028 | * data will be dirty and wil be checkpointed. The | ||
1029 | * ordering comments in the next comment block still | ||
1030 | * apply. | ||
1031 | */ | ||
1032 | //J_ASSERT_JH(jh, jh->b_next_transaction == NULL); | ||
1033 | |||
1034 | /* | ||
1035 | * If we're journalling data, and this buffer was | ||
1036 | * subject to a write(), it could be metadata, forget | ||
1037 | * or shadow against the committing transaction. Now, | ||
1038 | * someone has dirtied the same darn page via a mapping | ||
1039 | * and it is being writepage()'d. | ||
1040 | * We *could* just steal the page from commit, with some | ||
1041 | * fancy locking there. Instead, we just skip it - | ||
1042 | * don't tie the page's buffers to the new transaction | ||
1043 | * at all. | ||
1044 | * Implication: if we crash before the writepage() data | ||
1045 | * is written into the filesystem, recovery will replay | ||
1046 | * the write() data. | ||
1047 | */ | ||
1048 | if (jh->b_jlist != BJ_None && | ||
1049 | jh->b_jlist != BJ_SyncData && | ||
1050 | jh->b_jlist != BJ_Locked) { | ||
1051 | JBUFFER_TRACE(jh, "Not stealing"); | ||
1052 | goto no_journal; | ||
1053 | } | ||
1054 | |||
1055 | /* | ||
1056 | * This buffer may be undergoing writeout in commit. We | ||
1057 | * can't return from here and let the caller dirty it | ||
1058 | * again because that can cause the write-out loop in | ||
1059 | * commit to never terminate. | ||
1060 | */ | ||
1061 | if (buffer_dirty(bh)) { | ||
1062 | get_bh(bh); | ||
1063 | spin_unlock(&journal->j_list_lock); | ||
1064 | jbd_unlock_bh_state(bh); | ||
1065 | need_brelse = 1; | ||
1066 | sync_dirty_buffer(bh); | ||
1067 | jbd_lock_bh_state(bh); | ||
1068 | spin_lock(&journal->j_list_lock); | ||
1069 | /* Since we dropped the lock... */ | ||
1070 | if (!buffer_mapped(bh)) { | ||
1071 | JBUFFER_TRACE(jh, "buffer got unmapped"); | ||
1072 | goto no_journal; | ||
1073 | } | ||
1074 | /* The buffer may become locked again at any | ||
1075 | time if it is redirtied */ | ||
1076 | } | ||
1077 | |||
1078 | /* journal_clean_data_list() may have got there first */ | ||
1079 | if (jh->b_transaction != NULL) { | ||
1080 | JBUFFER_TRACE(jh, "unfile from commit"); | ||
1081 | __jbd2_journal_temp_unlink_buffer(jh); | ||
1082 | /* It still points to the committing | ||
1083 | * transaction; move it to this one so | ||
1084 | * that the refile assert checks are | ||
1085 | * happy. */ | ||
1086 | jh->b_transaction = handle->h_transaction; | ||
1087 | } | ||
1088 | /* The buffer will be refiled below */ | ||
1089 | |||
1090 | } | ||
1091 | /* | ||
1092 | * Special case --- the buffer might actually have been | ||
1093 | * allocated and then immediately deallocated in the previous, | ||
1094 | * committing transaction, so might still be left on that | ||
1095 | * transaction's metadata lists. | ||
1096 | */ | ||
1097 | if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) { | ||
1098 | JBUFFER_TRACE(jh, "not on correct data list: unfile"); | ||
1099 | J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow); | ||
1100 | __jbd2_journal_temp_unlink_buffer(jh); | ||
1101 | jh->b_transaction = handle->h_transaction; | ||
1102 | JBUFFER_TRACE(jh, "file as data"); | ||
1103 | __jbd2_journal_file_buffer(jh, handle->h_transaction, | ||
1104 | BJ_SyncData); | ||
1105 | } | ||
1106 | } else { | ||
1107 | JBUFFER_TRACE(jh, "not on a transaction"); | ||
1108 | __jbd2_journal_file_buffer(jh, handle->h_transaction, BJ_SyncData); | ||
1109 | } | ||
1110 | no_journal: | ||
1111 | spin_unlock(&journal->j_list_lock); | ||
1112 | jbd_unlock_bh_state(bh); | ||
1113 | if (need_brelse) { | ||
1114 | BUFFER_TRACE(bh, "brelse"); | ||
1115 | __brelse(bh); | ||
1116 | } | ||
1117 | JBUFFER_TRACE(jh, "exit"); | ||
1118 | jbd2_journal_put_journal_head(jh); | ||
1119 | return 0; | ||
1120 | } | ||
1121 | |||
1122 | /** | ||
1123 | * int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata | 946 | * int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata |
1124 | * @handle: transaction to add buffer to. | 947 | * @handle: transaction to add buffer to. |
1125 | * @bh: buffer to mark | 948 | * @bh: buffer to mark |
@@ -1541,10 +1364,10 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh) | |||
1541 | * Remove a buffer from the appropriate transaction list. | 1364 | * Remove a buffer from the appropriate transaction list. |
1542 | * | 1365 | * |
1543 | * Note that this function can *change* the value of | 1366 | * Note that this function can *change* the value of |
1544 | * bh->b_transaction->t_sync_datalist, t_buffers, t_forget, | 1367 | * bh->b_transaction->t_buffers, t_forget, t_iobuf_list, t_shadow_list, |
1545 | * t_iobuf_list, t_shadow_list, t_log_list or t_reserved_list. If the caller | 1368 | * t_log_list or t_reserved_list. If the caller is holding onto a copy of one |
1546 | * is holding onto a copy of one of thee pointers, it could go bad. | 1369 | * of these pointers, it could go bad. Generally the caller needs to re-read |
1547 | * Generally the caller needs to re-read the pointer from the transaction_t. | 1370 | * the pointer from the transaction_t. |
1548 | * | 1371 | * |
1549 | * Called under j_list_lock. The journal may not be locked. | 1372 | * Called under j_list_lock. The journal may not be locked. |
1550 | */ | 1373 | */ |
@@ -1566,9 +1389,6 @@ void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) | |||
1566 | switch (jh->b_jlist) { | 1389 | switch (jh->b_jlist) { |
1567 | case BJ_None: | 1390 | case BJ_None: |
1568 | return; | 1391 | return; |
1569 | case BJ_SyncData: | ||
1570 | list = &transaction->t_sync_datalist; | ||
1571 | break; | ||
1572 | case BJ_Metadata: | 1392 | case BJ_Metadata: |
1573 | transaction->t_nr_buffers--; | 1393 | transaction->t_nr_buffers--; |
1574 | J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0); | 1394 | J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0); |
@@ -1589,9 +1409,6 @@ void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) | |||
1589 | case BJ_Reserved: | 1409 | case BJ_Reserved: |
1590 | list = &transaction->t_reserved_list; | 1410 | list = &transaction->t_reserved_list; |
1591 | break; | 1411 | break; |
1592 | case BJ_Locked: | ||
1593 | list = &transaction->t_locked_list; | ||
1594 | break; | ||
1595 | } | 1412 | } |
1596 | 1413 | ||
1597 | __blist_del_buffer(list, jh); | 1414 | __blist_del_buffer(list, jh); |
@@ -1634,15 +1451,7 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) | |||
1634 | goto out; | 1451 | goto out; |
1635 | 1452 | ||
1636 | spin_lock(&journal->j_list_lock); | 1453 | spin_lock(&journal->j_list_lock); |
1637 | if (jh->b_transaction != NULL && jh->b_cp_transaction == NULL) { | 1454 | if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { |
1638 | if (jh->b_jlist == BJ_SyncData || jh->b_jlist == BJ_Locked) { | ||
1639 | /* A written-back ordered data buffer */ | ||
1640 | JBUFFER_TRACE(jh, "release data"); | ||
1641 | __jbd2_journal_unfile_buffer(jh); | ||
1642 | jbd2_journal_remove_journal_head(bh); | ||
1643 | __brelse(bh); | ||
1644 | } | ||
1645 | } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { | ||
1646 | /* written-back checkpointed metadata buffer */ | 1455 | /* written-back checkpointed metadata buffer */ |
1647 | if (jh->b_jlist == BJ_None) { | 1456 | if (jh->b_jlist == BJ_None) { |
1648 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 1457 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
@@ -1878,6 +1687,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
1878 | if (!buffer_jbd(bh)) | 1687 | if (!buffer_jbd(bh)) |
1879 | goto zap_buffer_unlocked; | 1688 | goto zap_buffer_unlocked; |
1880 | 1689 | ||
1690 | /* OK, we have data buffer in journaled mode */ | ||
1881 | spin_lock(&journal->j_state_lock); | 1691 | spin_lock(&journal->j_state_lock); |
1882 | jbd_lock_bh_state(bh); | 1692 | jbd_lock_bh_state(bh); |
1883 | spin_lock(&journal->j_list_lock); | 1693 | spin_lock(&journal->j_list_lock); |
@@ -1941,15 +1751,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
1941 | } | 1751 | } |
1942 | } else if (transaction == journal->j_committing_transaction) { | 1752 | } else if (transaction == journal->j_committing_transaction) { |
1943 | JBUFFER_TRACE(jh, "on committing transaction"); | 1753 | JBUFFER_TRACE(jh, "on committing transaction"); |
1944 | if (jh->b_jlist == BJ_Locked) { | ||
1945 | /* | ||
1946 | * The buffer is on the committing transaction's locked | ||
1947 | * list. We have the buffer locked, so I/O has | ||
1948 | * completed. So we can nail the buffer now. | ||
1949 | */ | ||
1950 | may_free = __dispose_buffer(jh, transaction); | ||
1951 | goto zap_buffer; | ||
1952 | } | ||
1953 | /* | 1754 | /* |
1954 | * If it is committing, we simply cannot touch it. We | 1755 | * If it is committing, we simply cannot touch it. We |
1955 | * can remove it's next_transaction pointer from the | 1756 | * can remove it's next_transaction pointer from the |
@@ -2082,9 +1883,6 @@ void __jbd2_journal_file_buffer(struct journal_head *jh, | |||
2082 | J_ASSERT_JH(jh, !jh->b_committed_data); | 1883 | J_ASSERT_JH(jh, !jh->b_committed_data); |
2083 | J_ASSERT_JH(jh, !jh->b_frozen_data); | 1884 | J_ASSERT_JH(jh, !jh->b_frozen_data); |
2084 | return; | 1885 | return; |
2085 | case BJ_SyncData: | ||
2086 | list = &transaction->t_sync_datalist; | ||
2087 | break; | ||
2088 | case BJ_Metadata: | 1886 | case BJ_Metadata: |
2089 | transaction->t_nr_buffers++; | 1887 | transaction->t_nr_buffers++; |
2090 | list = &transaction->t_buffers; | 1888 | list = &transaction->t_buffers; |
@@ -2104,9 +1902,6 @@ void __jbd2_journal_file_buffer(struct journal_head *jh, | |||
2104 | case BJ_Reserved: | 1902 | case BJ_Reserved: |
2105 | list = &transaction->t_reserved_list; | 1903 | list = &transaction->t_reserved_list; |
2106 | break; | 1904 | break; |
2107 | case BJ_Locked: | ||
2108 | list = &transaction->t_locked_list; | ||
2109 | break; | ||
2110 | } | 1905 | } |
2111 | 1906 | ||
2112 | __blist_add_buffer(list, jh); | 1907 | __blist_add_buffer(list, jh); |