diff options
-rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.c | 9 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_quotaops.c | 1 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_trace.h | 83 | ||||
-rw-r--r-- | fs/xfs/xfs_ag.h | 24 | ||||
-rw-r--r-- | fs/xfs/xfs_alloc.c | 357 | ||||
-rw-r--r-- | fs/xfs/xfs_alloc.h | 7 | ||||
-rw-r--r-- | fs/xfs/xfs_alloc_btree.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_trans.c | 41 | ||||
-rw-r--r-- | fs/xfs/xfs_trans.h | 35 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_item.c | 109 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_priv.h | 4 |
11 files changed, 350 insertions, 322 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index f01de3c55c43..649ade8ef598 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -37,6 +37,7 @@ | |||
37 | 37 | ||
38 | #include "xfs_sb.h" | 38 | #include "xfs_sb.h" |
39 | #include "xfs_inum.h" | 39 | #include "xfs_inum.h" |
40 | #include "xfs_log.h" | ||
40 | #include "xfs_ag.h" | 41 | #include "xfs_ag.h" |
41 | #include "xfs_dmapi.h" | 42 | #include "xfs_dmapi.h" |
42 | #include "xfs_mount.h" | 43 | #include "xfs_mount.h" |
@@ -850,6 +851,12 @@ xfs_buf_lock_value( | |||
850 | * Note that this in no way locks the underlying pages, so it is only | 851 | * Note that this in no way locks the underlying pages, so it is only |
851 | * useful for synchronizing concurrent use of buffer objects, not for | 852 | * useful for synchronizing concurrent use of buffer objects, not for |
852 | * synchronizing independent access to the underlying pages. | 853 | * synchronizing independent access to the underlying pages. |
854 | * | ||
855 | * If we come across a stale, pinned, locked buffer, we know that we | ||
856 | * are being asked to lock a buffer that has been reallocated. Because | ||
857 | * it is pinned, we know that the log has not been pushed to disk and | ||
858 | * hence it will still be locked. Rather than sleeping until someone | ||
859 | * else pushes the log, push it ourselves before trying to get the lock. | ||
853 | */ | 860 | */ |
854 | void | 861 | void |
855 | xfs_buf_lock( | 862 | xfs_buf_lock( |
@@ -857,6 +864,8 @@ xfs_buf_lock( | |||
857 | { | 864 | { |
858 | trace_xfs_buf_lock(bp, _RET_IP_); | 865 | trace_xfs_buf_lock(bp, _RET_IP_); |
859 | 866 | ||
867 | if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) | ||
868 | xfs_log_force(bp->b_mount, 0); | ||
860 | if (atomic_read(&bp->b_io_remaining)) | 869 | if (atomic_read(&bp->b_io_remaining)) |
861 | blk_run_address_space(bp->b_target->bt_mapping); | 870 | blk_run_address_space(bp->b_target->bt_mapping); |
862 | down(&bp->b_sema); | 871 | down(&bp->b_sema); |
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c index 1947514ce1ad..2e73688dae9c 100644 --- a/fs/xfs/linux-2.6/xfs_quotaops.c +++ b/fs/xfs/linux-2.6/xfs_quotaops.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include "xfs_dmapi.h" | 19 | #include "xfs_dmapi.h" |
20 | #include "xfs_sb.h" | 20 | #include "xfs_sb.h" |
21 | #include "xfs_inum.h" | 21 | #include "xfs_inum.h" |
22 | #include "xfs_log.h" | ||
22 | #include "xfs_ag.h" | 23 | #include "xfs_ag.h" |
23 | #include "xfs_mount.h" | 24 | #include "xfs_mount.h" |
24 | #include "xfs_quota.h" | 25 | #include "xfs_quota.h" |
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index 8a319cfd2901..ff6bc797baf2 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h | |||
@@ -1059,83 +1059,112 @@ TRACE_EVENT(xfs_bunmap, | |||
1059 | 1059 | ||
1060 | ); | 1060 | ); |
1061 | 1061 | ||
1062 | #define XFS_BUSY_SYNC \ | ||
1063 | { 0, "async" }, \ | ||
1064 | { 1, "sync" } | ||
1065 | |||
1062 | TRACE_EVENT(xfs_alloc_busy, | 1066 | TRACE_EVENT(xfs_alloc_busy, |
1063 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, | 1067 | TP_PROTO(struct xfs_trans *trans, xfs_agnumber_t agno, |
1064 | xfs_extlen_t len, int slot), | 1068 | xfs_agblock_t agbno, xfs_extlen_t len, int sync), |
1065 | TP_ARGS(mp, agno, agbno, len, slot), | 1069 | TP_ARGS(trans, agno, agbno, len, sync), |
1066 | TP_STRUCT__entry( | 1070 | TP_STRUCT__entry( |
1067 | __field(dev_t, dev) | 1071 | __field(dev_t, dev) |
1072 | __field(struct xfs_trans *, tp) | ||
1073 | __field(int, tid) | ||
1068 | __field(xfs_agnumber_t, agno) | 1074 | __field(xfs_agnumber_t, agno) |
1069 | __field(xfs_agblock_t, agbno) | 1075 | __field(xfs_agblock_t, agbno) |
1070 | __field(xfs_extlen_t, len) | 1076 | __field(xfs_extlen_t, len) |
1071 | __field(int, slot) | 1077 | __field(int, sync) |
1072 | ), | 1078 | ), |
1073 | TP_fast_assign( | 1079 | TP_fast_assign( |
1074 | __entry->dev = mp->m_super->s_dev; | 1080 | __entry->dev = trans->t_mountp->m_super->s_dev; |
1081 | __entry->tp = trans; | ||
1082 | __entry->tid = trans->t_ticket->t_tid; | ||
1075 | __entry->agno = agno; | 1083 | __entry->agno = agno; |
1076 | __entry->agbno = agbno; | 1084 | __entry->agbno = agbno; |
1077 | __entry->len = len; | 1085 | __entry->len = len; |
1078 | __entry->slot = slot; | 1086 | __entry->sync = sync; |
1079 | ), | 1087 | ), |
1080 | TP_printk("dev %d:%d agno %u agbno %u len %u slot %d", | 1088 | TP_printk("dev %d:%d trans 0x%p tid 0x%x agno %u agbno %u len %u %s", |
1081 | MAJOR(__entry->dev), MINOR(__entry->dev), | 1089 | MAJOR(__entry->dev), MINOR(__entry->dev), |
1090 | __entry->tp, | ||
1091 | __entry->tid, | ||
1082 | __entry->agno, | 1092 | __entry->agno, |
1083 | __entry->agbno, | 1093 | __entry->agbno, |
1084 | __entry->len, | 1094 | __entry->len, |
1085 | __entry->slot) | 1095 | __print_symbolic(__entry->sync, XFS_BUSY_SYNC)) |
1086 | 1096 | ||
1087 | ); | 1097 | ); |
1088 | 1098 | ||
1089 | #define XFS_BUSY_STATES \ | ||
1090 | { 0, "found" }, \ | ||
1091 | { 1, "missing" } | ||
1092 | |||
1093 | TRACE_EVENT(xfs_alloc_unbusy, | 1099 | TRACE_EVENT(xfs_alloc_unbusy, |
1094 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, | 1100 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, |
1095 | int slot, int found), | 1101 | xfs_agblock_t agbno, xfs_extlen_t len), |
1096 | TP_ARGS(mp, agno, slot, found), | 1102 | TP_ARGS(mp, agno, agbno, len), |
1097 | TP_STRUCT__entry( | 1103 | TP_STRUCT__entry( |
1098 | __field(dev_t, dev) | 1104 | __field(dev_t, dev) |
1099 | __field(xfs_agnumber_t, agno) | 1105 | __field(xfs_agnumber_t, agno) |
1100 | __field(int, slot) | 1106 | __field(xfs_agblock_t, agbno) |
1101 | __field(int, found) | 1107 | __field(xfs_extlen_t, len) |
1102 | ), | 1108 | ), |
1103 | TP_fast_assign( | 1109 | TP_fast_assign( |
1104 | __entry->dev = mp->m_super->s_dev; | 1110 | __entry->dev = mp->m_super->s_dev; |
1105 | __entry->agno = agno; | 1111 | __entry->agno = agno; |
1106 | __entry->slot = slot; | 1112 | __entry->agbno = agbno; |
1107 | __entry->found = found; | 1113 | __entry->len = len; |
1108 | ), | 1114 | ), |
1109 | TP_printk("dev %d:%d agno %u slot %d %s", | 1115 | TP_printk("dev %d:%d agno %u agbno %u len %u", |
1110 | MAJOR(__entry->dev), MINOR(__entry->dev), | 1116 | MAJOR(__entry->dev), MINOR(__entry->dev), |
1111 | __entry->agno, | 1117 | __entry->agno, |
1112 | __entry->slot, | 1118 | __entry->agbno, |
1113 | __print_symbolic(__entry->found, XFS_BUSY_STATES)) | 1119 | __entry->len) |
1114 | ); | 1120 | ); |
1115 | 1121 | ||
1122 | #define XFS_BUSY_STATES \ | ||
1123 | { 0, "missing" }, \ | ||
1124 | { 1, "found" } | ||
1125 | |||
1116 | TRACE_EVENT(xfs_alloc_busysearch, | 1126 | TRACE_EVENT(xfs_alloc_busysearch, |
1117 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, | 1127 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, |
1118 | xfs_extlen_t len, xfs_lsn_t lsn), | 1128 | xfs_agblock_t agbno, xfs_extlen_t len, int found), |
1119 | TP_ARGS(mp, agno, agbno, len, lsn), | 1129 | TP_ARGS(mp, agno, agbno, len, found), |
1120 | TP_STRUCT__entry( | 1130 | TP_STRUCT__entry( |
1121 | __field(dev_t, dev) | 1131 | __field(dev_t, dev) |
1122 | __field(xfs_agnumber_t, agno) | 1132 | __field(xfs_agnumber_t, agno) |
1123 | __field(xfs_agblock_t, agbno) | 1133 | __field(xfs_agblock_t, agbno) |
1124 | __field(xfs_extlen_t, len) | 1134 | __field(xfs_extlen_t, len) |
1125 | __field(xfs_lsn_t, lsn) | 1135 | __field(int, found) |
1126 | ), | 1136 | ), |
1127 | TP_fast_assign( | 1137 | TP_fast_assign( |
1128 | __entry->dev = mp->m_super->s_dev; | 1138 | __entry->dev = mp->m_super->s_dev; |
1129 | __entry->agno = agno; | 1139 | __entry->agno = agno; |
1130 | __entry->agbno = agbno; | 1140 | __entry->agbno = agbno; |
1131 | __entry->len = len; | 1141 | __entry->len = len; |
1132 | __entry->lsn = lsn; | 1142 | __entry->found = found; |
1133 | ), | 1143 | ), |
1134 | TP_printk("dev %d:%d agno %u agbno %u len %u force lsn 0x%llx", | 1144 | TP_printk("dev %d:%d agno %u agbno %u len %u %s", |
1135 | MAJOR(__entry->dev), MINOR(__entry->dev), | 1145 | MAJOR(__entry->dev), MINOR(__entry->dev), |
1136 | __entry->agno, | 1146 | __entry->agno, |
1137 | __entry->agbno, | 1147 | __entry->agbno, |
1138 | __entry->len, | 1148 | __entry->len, |
1149 | __print_symbolic(__entry->found, XFS_BUSY_STATES)) | ||
1150 | ); | ||
1151 | |||
1152 | TRACE_EVENT(xfs_trans_commit_lsn, | ||
1153 | TP_PROTO(struct xfs_trans *trans), | ||
1154 | TP_ARGS(trans), | ||
1155 | TP_STRUCT__entry( | ||
1156 | __field(dev_t, dev) | ||
1157 | __field(struct xfs_trans *, tp) | ||
1158 | __field(xfs_lsn_t, lsn) | ||
1159 | ), | ||
1160 | TP_fast_assign( | ||
1161 | __entry->dev = trans->t_mountp->m_super->s_dev; | ||
1162 | __entry->tp = trans; | ||
1163 | __entry->lsn = trans->t_commit_lsn; | ||
1164 | ), | ||
1165 | TP_printk("dev %d:%d trans 0x%p commit_lsn 0x%llx", | ||
1166 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
1167 | __entry->tp, | ||
1139 | __entry->lsn) | 1168 | __entry->lsn) |
1140 | ); | 1169 | ); |
1141 | 1170 | ||
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index abb8222b88c9..401f364ad36c 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h | |||
@@ -175,14 +175,20 @@ typedef struct xfs_agfl { | |||
175 | } xfs_agfl_t; | 175 | } xfs_agfl_t; |
176 | 176 | ||
177 | /* | 177 | /* |
178 | * Busy block/extent entry. Used in perag to mark blocks that have been freed | 178 | * Busy block/extent entry. Indexed by a rbtree in perag to mark blocks that |
179 | * but whose transactions aren't committed to disk yet. | 179 | * have been freed but whose transactions aren't committed to disk yet. |
180 | * | ||
181 | * Note that we use the transaction ID to record the transaction, not the | ||
182 | * transaction structure itself. See xfs_alloc_busy_insert() for details. | ||
180 | */ | 183 | */ |
181 | typedef struct xfs_perag_busy { | 184 | struct xfs_busy_extent { |
182 | xfs_agblock_t busy_start; | 185 | struct rb_node rb_node; /* ag by-bno indexed search tree */ |
183 | xfs_extlen_t busy_length; | 186 | struct list_head list; /* transaction busy extent list */ |
184 | struct xfs_trans *busy_tp; /* transaction that did the free */ | 187 | xfs_agnumber_t agno; |
185 | } xfs_perag_busy_t; | 188 | xfs_agblock_t bno; |
189 | xfs_extlen_t length; | ||
190 | xlog_tid_t tid; /* transaction that created this */ | ||
191 | }; | ||
186 | 192 | ||
187 | /* | 193 | /* |
188 | * Per-ag incore structure, copies of information in agf and agi, | 194 | * Per-ag incore structure, copies of information in agf and agi, |
@@ -216,7 +222,8 @@ typedef struct xfs_perag { | |||
216 | xfs_agino_t pagl_leftrec; | 222 | xfs_agino_t pagl_leftrec; |
217 | xfs_agino_t pagl_rightrec; | 223 | xfs_agino_t pagl_rightrec; |
218 | #ifdef __KERNEL__ | 224 | #ifdef __KERNEL__ |
219 | spinlock_t pagb_lock; /* lock for pagb_list */ | 225 | spinlock_t pagb_lock; /* lock for pagb_tree */ |
226 | struct rb_root pagb_tree; /* ordered tree of busy extents */ | ||
220 | 227 | ||
221 | atomic_t pagf_fstrms; /* # of filestreams active in this AG */ | 228 | atomic_t pagf_fstrms; /* # of filestreams active in this AG */ |
222 | 229 | ||
@@ -226,7 +233,6 @@ typedef struct xfs_perag { | |||
226 | int pag_ici_reclaimable; /* reclaimable inodes */ | 233 | int pag_ici_reclaimable; /* reclaimable inodes */ |
227 | #endif | 234 | #endif |
228 | int pagb_count; /* pagb slots in use */ | 235 | int pagb_count; /* pagb slots in use */ |
229 | xfs_perag_busy_t pagb_list[XFS_PAGB_NUM_SLOTS]; /* unstable blocks */ | ||
230 | } xfs_perag_t; | 236 | } xfs_perag_t; |
231 | 237 | ||
232 | /* | 238 | /* |
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 94cddbfb2560..a7fbe8a99b12 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c | |||
@@ -46,11 +46,9 @@ | |||
46 | #define XFSA_FIXUP_BNO_OK 1 | 46 | #define XFSA_FIXUP_BNO_OK 1 |
47 | #define XFSA_FIXUP_CNT_OK 2 | 47 | #define XFSA_FIXUP_CNT_OK 2 |
48 | 48 | ||
49 | STATIC void | 49 | static int |
50 | xfs_alloc_search_busy(xfs_trans_t *tp, | 50 | xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, |
51 | xfs_agnumber_t agno, | 51 | xfs_agblock_t bno, xfs_extlen_t len); |
52 | xfs_agblock_t bno, | ||
53 | xfs_extlen_t len); | ||
54 | 52 | ||
55 | /* | 53 | /* |
56 | * Prototypes for per-ag allocation routines | 54 | * Prototypes for per-ag allocation routines |
@@ -540,9 +538,16 @@ xfs_alloc_ag_vextent( | |||
540 | be32_to_cpu(agf->agf_length)); | 538 | be32_to_cpu(agf->agf_length)); |
541 | xfs_alloc_log_agf(args->tp, args->agbp, | 539 | xfs_alloc_log_agf(args->tp, args->agbp, |
542 | XFS_AGF_FREEBLKS); | 540 | XFS_AGF_FREEBLKS); |
543 | /* search the busylist for these blocks */ | 541 | /* |
544 | xfs_alloc_search_busy(args->tp, args->agno, | 542 | * Search the busylist for these blocks and mark the |
545 | args->agbno, args->len); | 543 | * transaction as synchronous if blocks are found. This |
544 | * avoids the need to block due to a synchronous log | ||
545 | * force to ensure correct ordering as the synchronous | ||
546 | * transaction will guarantee that for us. | ||
547 | */ | ||
548 | if (xfs_alloc_busy_search(args->mp, args->agno, | ||
549 | args->agbno, args->len)) | ||
550 | xfs_trans_set_sync(args->tp); | ||
546 | } | 551 | } |
547 | if (!args->isfl) | 552 | if (!args->isfl) |
548 | xfs_trans_mod_sb(args->tp, | 553 | xfs_trans_mod_sb(args->tp, |
@@ -1693,7 +1698,7 @@ xfs_free_ag_extent( | |||
1693 | * when the iclog commits to disk. If a busy block is allocated, | 1698 | * when the iclog commits to disk. If a busy block is allocated, |
1694 | * the iclog is pushed up to the LSN that freed the block. | 1699 | * the iclog is pushed up to the LSN that freed the block. |
1695 | */ | 1700 | */ |
1696 | xfs_alloc_mark_busy(tp, agno, bno, len); | 1701 | xfs_alloc_busy_insert(tp, agno, bno, len); |
1697 | return 0; | 1702 | return 0; |
1698 | 1703 | ||
1699 | error0: | 1704 | error0: |
@@ -1989,14 +1994,20 @@ xfs_alloc_get_freelist( | |||
1989 | *bnop = bno; | 1994 | *bnop = bno; |
1990 | 1995 | ||
1991 | /* | 1996 | /* |
1992 | * As blocks are freed, they are added to the per-ag busy list | 1997 | * As blocks are freed, they are added to the per-ag busy list and |
1993 | * and remain there until the freeing transaction is committed to | 1998 | * remain there until the freeing transaction is committed to disk. |
1994 | * disk. Now that we have allocated blocks, this list must be | 1999 | * Now that we have allocated blocks, this list must be searched to see |
1995 | * searched to see if a block is being reused. If one is, then | 2000 | * if a block is being reused. If one is, then the freeing transaction |
1996 | * the freeing transaction must be pushed to disk NOW by forcing | 2001 | * must be pushed to disk before this transaction. |
1997 | * to disk all iclogs up that transaction's LSN. | 2002 | * |
2003 | * We do this by setting the current transaction to a sync transaction | ||
2004 | * which guarantees that the freeing transaction is on disk before this | ||
2005 | * transaction. This is done instead of a synchronous log force here so | ||
2006 | * that we don't sit and wait with the AGF locked in the transaction | ||
2007 | * during the log force. | ||
1998 | */ | 2008 | */ |
1999 | xfs_alloc_search_busy(tp, be32_to_cpu(agf->agf_seqno), bno, 1); | 2009 | if (xfs_alloc_busy_search(mp, be32_to_cpu(agf->agf_seqno), bno, 1)) |
2010 | xfs_trans_set_sync(tp); | ||
2000 | return 0; | 2011 | return 0; |
2001 | } | 2012 | } |
2002 | 2013 | ||
@@ -2201,7 +2212,7 @@ xfs_alloc_read_agf( | |||
2201 | be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); | 2212 | be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); |
2202 | spin_lock_init(&pag->pagb_lock); | 2213 | spin_lock_init(&pag->pagb_lock); |
2203 | pag->pagb_count = 0; | 2214 | pag->pagb_count = 0; |
2204 | memset(pag->pagb_list, 0, sizeof(pag->pagb_list)); | 2215 | pag->pagb_tree = RB_ROOT; |
2205 | pag->pagf_init = 1; | 2216 | pag->pagf_init = 1; |
2206 | } | 2217 | } |
2207 | #ifdef DEBUG | 2218 | #ifdef DEBUG |
@@ -2479,127 +2490,263 @@ error0: | |||
2479 | * list is reused, the transaction that freed it must be forced to disk | 2490 | * list is reused, the transaction that freed it must be forced to disk |
2480 | * before continuing to use the block. | 2491 | * before continuing to use the block. |
2481 | * | 2492 | * |
2482 | * xfs_alloc_mark_busy - add to the per-ag busy list | 2493 | * xfs_alloc_busy_insert - add to the per-ag busy list |
2483 | * xfs_alloc_clear_busy - remove an item from the per-ag busy list | 2494 | * xfs_alloc_busy_clear - remove an item from the per-ag busy list |
2495 | * xfs_alloc_busy_search - search for a busy extent | ||
2496 | */ | ||
2497 | |||
2498 | /* | ||
2499 | * Insert a new extent into the busy tree. | ||
2500 | * | ||
2501 | * The busy extent tree is indexed by the start block of the busy extent. | ||
2502 | * there can be multiple overlapping ranges in the busy extent tree but only | ||
2503 | * ever one entry at a given start block. The reason for this is that | ||
2504 | * multi-block extents can be freed, then smaller chunks of that extent | ||
2505 | * allocated and freed again before the first transaction commit is on disk. | ||
2506 | * If the exact same start block is freed a second time, we have to wait for | ||
2507 | * that busy extent to pass out of the tree before the new extent is inserted. | ||
2508 | * There are two main cases we have to handle here. | ||
2509 | * | ||
2510 | * The first case is a transaction that triggers a "free - allocate - free" | ||
2511 | * cycle. This can occur during btree manipulations as a btree block is freed | ||
2512 | * to the freelist, then allocated from the free list, then freed again. In | ||
2513 | * this case, the second extxpnet free is what triggers the duplicate and as | ||
2514 | * such the transaction IDs should match. Because the extent was allocated in | ||
2515 | * this transaction, the transaction must be marked as synchronous. This is | ||
2516 | * true for all cases where the free/alloc/free occurs in the one transaction, | ||
2517 | * hence the addition of the ASSERT(tp->t_flags & XFS_TRANS_SYNC) to this case. | ||
2518 | * This serves to catch violations of the second case quite effectively. | ||
2519 | * | ||
2520 | * The second case is where the free/alloc/free occur in different | ||
2521 | * transactions. In this case, the thread freeing the extent the second time | ||
2522 | * can't mark the extent busy immediately because it is already tracked in a | ||
2523 | * transaction that may be committing. When the log commit for the existing | ||
2524 | * busy extent completes, the busy extent will be removed from the tree. If we | ||
2525 | * allow the second busy insert to continue using that busy extent structure, | ||
2526 | * it can be freed before this transaction is safely in the log. Hence our | ||
2527 | * only option in this case is to force the log to remove the existing busy | ||
2528 | * extent from the list before we insert the new one with the current | ||
2529 | * transaction ID. | ||
2530 | * | ||
2531 | * The problem we are trying to avoid in the free-alloc-free in separate | ||
2532 | * transactions is most easily described with a timeline: | ||
2533 | * | ||
2534 | * Thread 1 Thread 2 Thread 3 xfslogd | ||
2535 | * xact alloc | ||
2536 | * free X | ||
2537 | * mark busy | ||
2538 | * commit xact | ||
2539 | * free xact | ||
2540 | * xact alloc | ||
2541 | * alloc X | ||
2542 | * busy search | ||
2543 | * mark xact sync | ||
2544 | * commit xact | ||
2545 | * free xact | ||
2546 | * force log | ||
2547 | * checkpoint starts | ||
2548 | * .... | ||
2549 | * xact alloc | ||
2550 | * free X | ||
2551 | * mark busy | ||
2552 | * finds match | ||
2553 | * *** KABOOM! *** | ||
2554 | * .... | ||
2555 | * log IO completes | ||
2556 | * unbusy X | ||
2557 | * checkpoint completes | ||
2558 | * | ||
2559 | * By issuing a log force in thread 3 @ "KABOOM", the thread will block until | ||
2560 | * the checkpoint completes, and the busy extent it matched will have been | ||
2561 | * removed from the tree when it is woken. Hence it can then continue safely. | ||
2562 | * | ||
2563 | * However, to ensure this matching process is robust, we need to use the | ||
2564 | * transaction ID for identifying transaction, as delayed logging results in | ||
2565 | * the busy extent and transaction lifecycles being different. i.e. the busy | ||
2566 | * extent is active for a lot longer than the transaction. Hence the | ||
2567 | * transaction structure can be freed and reallocated, then mark the same | ||
2568 | * extent busy again in the new transaction. In this case the new transaction | ||
2569 | * will have a different tid but can have the same address, and hence we need | ||
2570 | * to check against the tid. | ||
2571 | * | ||
2572 | * Future: for delayed logging, we could avoid the log force if the extent was | ||
2573 | * first freed in the current checkpoint sequence. This, however, requires the | ||
2574 | * ability to pin the current checkpoint in memory until this transaction | ||
2575 | * commits to ensure that both the original free and the current one combine | ||
2576 | * logically into the one checkpoint. If the checkpoint sequences are | ||
2577 | * different, however, we still need to wait on a log force. | ||
2484 | */ | 2578 | */ |
2485 | void | 2579 | void |
2486 | xfs_alloc_mark_busy(xfs_trans_t *tp, | 2580 | xfs_alloc_busy_insert( |
2487 | xfs_agnumber_t agno, | 2581 | struct xfs_trans *tp, |
2488 | xfs_agblock_t bno, | 2582 | xfs_agnumber_t agno, |
2489 | xfs_extlen_t len) | 2583 | xfs_agblock_t bno, |
2584 | xfs_extlen_t len) | ||
2490 | { | 2585 | { |
2491 | xfs_perag_busy_t *bsy; | 2586 | struct xfs_busy_extent *new; |
2587 | struct xfs_busy_extent *busyp; | ||
2492 | struct xfs_perag *pag; | 2588 | struct xfs_perag *pag; |
2493 | int n; | 2589 | struct rb_node **rbp; |
2590 | struct rb_node *parent; | ||
2591 | int match; | ||
2494 | 2592 | ||
2495 | pag = xfs_perag_get(tp->t_mountp, agno); | ||
2496 | spin_lock(&pag->pagb_lock); | ||
2497 | 2593 | ||
2498 | /* search pagb_list for an open slot */ | 2594 | new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL); |
2499 | for (bsy = pag->pagb_list, n = 0; | 2595 | if (!new) { |
2500 | n < XFS_PAGB_NUM_SLOTS; | 2596 | /* |
2501 | bsy++, n++) { | 2597 | * No Memory! Since it is now not possible to track the free |
2502 | if (bsy->busy_tp == NULL) { | 2598 | * block, make this a synchronous transaction to insure that |
2503 | break; | 2599 | * the block is not reused before this transaction commits. |
2504 | } | 2600 | */ |
2601 | trace_xfs_alloc_busy(tp, agno, bno, len, 1); | ||
2602 | xfs_trans_set_sync(tp); | ||
2603 | return; | ||
2505 | } | 2604 | } |
2506 | 2605 | ||
2507 | trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len, n); | 2606 | new->agno = agno; |
2607 | new->bno = bno; | ||
2608 | new->length = len; | ||
2609 | new->tid = xfs_log_get_trans_ident(tp); | ||
2508 | 2610 | ||
2509 | if (n < XFS_PAGB_NUM_SLOTS) { | 2611 | INIT_LIST_HEAD(&new->list); |
2510 | bsy = &pag->pagb_list[n]; | 2612 | |
2511 | pag->pagb_count++; | 2613 | /* trace before insert to be able to see failed inserts */ |
2512 | bsy->busy_start = bno; | 2614 | trace_xfs_alloc_busy(tp, agno, bno, len, 0); |
2513 | bsy->busy_length = len; | 2615 | |
2514 | bsy->busy_tp = tp; | 2616 | pag = xfs_perag_get(tp->t_mountp, new->agno); |
2515 | xfs_trans_add_busy(tp, agno, n); | 2617 | restart: |
2516 | } else { | 2618 | spin_lock(&pag->pagb_lock); |
2619 | rbp = &pag->pagb_tree.rb_node; | ||
2620 | parent = NULL; | ||
2621 | busyp = NULL; | ||
2622 | match = 0; | ||
2623 | while (*rbp && match >= 0) { | ||
2624 | parent = *rbp; | ||
2625 | busyp = rb_entry(parent, struct xfs_busy_extent, rb_node); | ||
2626 | |||
2627 | if (new->bno < busyp->bno) { | ||
2628 | /* may overlap, but exact start block is lower */ | ||
2629 | rbp = &(*rbp)->rb_left; | ||
2630 | if (new->bno + new->length > busyp->bno) | ||
2631 | match = busyp->tid == new->tid ? 1 : -1; | ||
2632 | } else if (new->bno > busyp->bno) { | ||
2633 | /* may overlap, but exact start block is higher */ | ||
2634 | rbp = &(*rbp)->rb_right; | ||
2635 | if (bno < busyp->bno + busyp->length) | ||
2636 | match = busyp->tid == new->tid ? 1 : -1; | ||
2637 | } else { | ||
2638 | match = busyp->tid == new->tid ? 1 : -1; | ||
2639 | break; | ||
2640 | } | ||
2641 | } | ||
2642 | if (match < 0) { | ||
2643 | /* overlap marked busy in different transaction */ | ||
2644 | spin_unlock(&pag->pagb_lock); | ||
2645 | xfs_log_force(tp->t_mountp, XFS_LOG_SYNC); | ||
2646 | goto restart; | ||
2647 | } | ||
2648 | if (match > 0) { | ||
2517 | /* | 2649 | /* |
2518 | * The busy list is full! Since it is now not possible to | 2650 | * overlap marked busy in same transaction. Update if exact |
2519 | * track the free block, make this a synchronous transaction | 2651 | * start block match, otherwise combine the busy extents into |
2520 | * to insure that the block is not reused before this | 2652 | * a single range. |
2521 | * transaction commits. | ||
2522 | */ | 2653 | */ |
2523 | xfs_trans_set_sync(tp); | 2654 | if (busyp->bno == new->bno) { |
2524 | } | 2655 | busyp->length = max(busyp->length, new->length); |
2656 | spin_unlock(&pag->pagb_lock); | ||
2657 | ASSERT(tp->t_flags & XFS_TRANS_SYNC); | ||
2658 | xfs_perag_put(pag); | ||
2659 | kmem_free(new); | ||
2660 | return; | ||
2661 | } | ||
2662 | rb_erase(&busyp->rb_node, &pag->pagb_tree); | ||
2663 | new->length = max(busyp->bno + busyp->length, | ||
2664 | new->bno + new->length) - | ||
2665 | min(busyp->bno, new->bno); | ||
2666 | new->bno = min(busyp->bno, new->bno); | ||
2667 | } else | ||
2668 | busyp = NULL; | ||
2525 | 2669 | ||
2670 | rb_link_node(&new->rb_node, parent, rbp); | ||
2671 | rb_insert_color(&new->rb_node, &pag->pagb_tree); | ||
2672 | |||
2673 | list_add(&new->list, &tp->t_busy); | ||
2526 | spin_unlock(&pag->pagb_lock); | 2674 | spin_unlock(&pag->pagb_lock); |
2527 | xfs_perag_put(pag); | 2675 | xfs_perag_put(pag); |
2676 | kmem_free(busyp); | ||
2528 | } | 2677 | } |
2529 | 2678 | ||
2530 | void | 2679 | /* |
2531 | xfs_alloc_clear_busy(xfs_trans_t *tp, | 2680 | * Search for a busy extent within the range of the extent we are about to |
2532 | xfs_agnumber_t agno, | 2681 | * allocate. You need to be holding the busy extent tree lock when calling |
2533 | int idx) | 2682 | * xfs_alloc_busy_search(). This function returns 0 for no overlapping busy |
2683 | * extent, -1 for an overlapping but not exact busy extent, and 1 for an exact | ||
2684 | * match. This is done so that a non-zero return indicates an overlap that | ||
2685 | * will require a synchronous transaction, but it can still be | ||
2686 | * used to distinguish between a partial or exact match. | ||
2687 | */ | ||
2688 | static int | ||
2689 | xfs_alloc_busy_search( | ||
2690 | struct xfs_mount *mp, | ||
2691 | xfs_agnumber_t agno, | ||
2692 | xfs_agblock_t bno, | ||
2693 | xfs_extlen_t len) | ||
2534 | { | 2694 | { |
2535 | struct xfs_perag *pag; | 2695 | struct xfs_perag *pag; |
2536 | xfs_perag_busy_t *list; | 2696 | struct rb_node *rbp; |
2697 | struct xfs_busy_extent *busyp; | ||
2698 | int match = 0; | ||
2537 | 2699 | ||
2538 | ASSERT(idx < XFS_PAGB_NUM_SLOTS); | 2700 | pag = xfs_perag_get(mp, agno); |
2539 | pag = xfs_perag_get(tp->t_mountp, agno); | ||
2540 | spin_lock(&pag->pagb_lock); | 2701 | spin_lock(&pag->pagb_lock); |
2541 | list = pag->pagb_list; | ||
2542 | 2702 | ||
2543 | trace_xfs_alloc_unbusy(tp->t_mountp, agno, idx, list[idx].busy_tp == tp); | 2703 | rbp = pag->pagb_tree.rb_node; |
2544 | 2704 | ||
2545 | if (list[idx].busy_tp == tp) { | 2705 | /* find closest start bno overlap */ |
2546 | list[idx].busy_tp = NULL; | 2706 | while (rbp) { |
2547 | pag->pagb_count--; | 2707 | busyp = rb_entry(rbp, struct xfs_busy_extent, rb_node); |
2708 | if (bno < busyp->bno) { | ||
2709 | /* may overlap, but exact start block is lower */ | ||
2710 | if (bno + len > busyp->bno) | ||
2711 | match = -1; | ||
2712 | rbp = rbp->rb_left; | ||
2713 | } else if (bno > busyp->bno) { | ||
2714 | /* may overlap, but exact start block is higher */ | ||
2715 | if (bno < busyp->bno + busyp->length) | ||
2716 | match = -1; | ||
2717 | rbp = rbp->rb_right; | ||
2718 | } else { | ||
2719 | /* bno matches busyp, length determines exact match */ | ||
2720 | match = (busyp->length == len) ? 1 : -1; | ||
2721 | break; | ||
2722 | } | ||
2548 | } | 2723 | } |
2549 | |||
2550 | spin_unlock(&pag->pagb_lock); | 2724 | spin_unlock(&pag->pagb_lock); |
2725 | trace_xfs_alloc_busysearch(mp, agno, bno, len, !!match); | ||
2551 | xfs_perag_put(pag); | 2726 | xfs_perag_put(pag); |
2727 | return match; | ||
2552 | } | 2728 | } |
2553 | 2729 | ||
2554 | 2730 | void | |
2555 | /* | 2731 | xfs_alloc_busy_clear( |
2556 | * If we find the extent in the busy list, force the log out to get the | 2732 | struct xfs_mount *mp, |
2557 | * extent out of the busy list so the caller can use it straight away. | 2733 | struct xfs_busy_extent *busyp) |
2558 | */ | ||
2559 | STATIC void | ||
2560 | xfs_alloc_search_busy(xfs_trans_t *tp, | ||
2561 | xfs_agnumber_t agno, | ||
2562 | xfs_agblock_t bno, | ||
2563 | xfs_extlen_t len) | ||
2564 | { | 2734 | { |
2565 | struct xfs_perag *pag; | 2735 | struct xfs_perag *pag; |
2566 | xfs_perag_busy_t *bsy; | ||
2567 | xfs_agblock_t uend, bend; | ||
2568 | xfs_lsn_t lsn = 0; | ||
2569 | int cnt; | ||
2570 | 2736 | ||
2571 | pag = xfs_perag_get(tp->t_mountp, agno); | 2737 | trace_xfs_alloc_unbusy(mp, busyp->agno, busyp->bno, |
2572 | spin_lock(&pag->pagb_lock); | 2738 | busyp->length); |
2573 | cnt = pag->pagb_count; | ||
2574 | 2739 | ||
2575 | /* | 2740 | ASSERT(xfs_alloc_busy_search(mp, busyp->agno, busyp->bno, |
2576 | * search pagb_list for this slot, skipping open slots. We have to | 2741 | busyp->length) == 1); |
2577 | * search the entire array as there may be multiple overlaps and | ||
2578 | * we have to get the most recent LSN for the log force to push out | ||
2579 | * all the transactions that span the range. | ||
2580 | */ | ||
2581 | uend = bno + len - 1; | ||
2582 | for (cnt = 0; cnt < pag->pagb_count; cnt++) { | ||
2583 | bsy = &pag->pagb_list[cnt]; | ||
2584 | if (!bsy->busy_tp) | ||
2585 | continue; | ||
2586 | 2742 | ||
2587 | bend = bsy->busy_start + bsy->busy_length - 1; | 2743 | list_del_init(&busyp->list); |
2588 | if (bno > bend || uend < bsy->busy_start) | ||
2589 | continue; | ||
2590 | 2744 | ||
2591 | /* (start1,length1) within (start2, length2) */ | 2745 | pag = xfs_perag_get(mp, busyp->agno); |
2592 | if (XFS_LSN_CMP(bsy->busy_tp->t_commit_lsn, lsn) > 0) | 2746 | spin_lock(&pag->pagb_lock); |
2593 | lsn = bsy->busy_tp->t_commit_lsn; | 2747 | rb_erase(&busyp->rb_node, &pag->pagb_tree); |
2594 | } | ||
2595 | spin_unlock(&pag->pagb_lock); | 2748 | spin_unlock(&pag->pagb_lock); |
2596 | xfs_perag_put(pag); | 2749 | xfs_perag_put(pag); |
2597 | trace_xfs_alloc_busysearch(tp->t_mountp, agno, bno, len, lsn); | ||
2598 | 2750 | ||
2599 | /* | 2751 | kmem_free(busyp); |
2600 | * If a block was found, force the log through the LSN of the | ||
2601 | * transaction that freed the block | ||
2602 | */ | ||
2603 | if (lsn) | ||
2604 | xfs_log_force_lsn(tp->t_mountp, lsn, XFS_LOG_SYNC); | ||
2605 | } | 2752 | } |
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index 599bffa39784..6d05199b667c 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h | |||
@@ -22,6 +22,7 @@ struct xfs_buf; | |||
22 | struct xfs_mount; | 22 | struct xfs_mount; |
23 | struct xfs_perag; | 23 | struct xfs_perag; |
24 | struct xfs_trans; | 24 | struct xfs_trans; |
25 | struct xfs_busy_extent; | ||
25 | 26 | ||
26 | /* | 27 | /* |
27 | * Freespace allocation types. Argument to xfs_alloc_[v]extent. | 28 | * Freespace allocation types. Argument to xfs_alloc_[v]extent. |
@@ -119,15 +120,13 @@ xfs_alloc_longest_free_extent(struct xfs_mount *mp, | |||
119 | #ifdef __KERNEL__ | 120 | #ifdef __KERNEL__ |
120 | 121 | ||
121 | void | 122 | void |
122 | xfs_alloc_mark_busy(xfs_trans_t *tp, | 123 | xfs_alloc_busy_insert(xfs_trans_t *tp, |
123 | xfs_agnumber_t agno, | 124 | xfs_agnumber_t agno, |
124 | xfs_agblock_t bno, | 125 | xfs_agblock_t bno, |
125 | xfs_extlen_t len); | 126 | xfs_extlen_t len); |
126 | 127 | ||
127 | void | 128 | void |
128 | xfs_alloc_clear_busy(xfs_trans_t *tp, | 129 | xfs_alloc_busy_clear(struct xfs_mount *mp, struct xfs_busy_extent *busyp); |
129 | xfs_agnumber_t ag, | ||
130 | int idx); | ||
131 | 130 | ||
132 | #endif /* __KERNEL__ */ | 131 | #endif /* __KERNEL__ */ |
133 | 132 | ||
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index b726e10d2c1c..83f494218759 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c | |||
@@ -134,7 +134,7 @@ xfs_allocbt_free_block( | |||
134 | * disk. If a busy block is allocated, the iclog is pushed up to the | 134 | * disk. If a busy block is allocated, the iclog is pushed up to the |
135 | * LSN that freed the block. | 135 | * LSN that freed the block. |
136 | */ | 136 | */ |
137 | xfs_alloc_mark_busy(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1); | 137 | xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1); |
138 | xfs_trans_agbtree_delta(cur->bc_tp, -1); | 138 | xfs_trans_agbtree_delta(cur->bc_tp, -1); |
139 | return 0; | 139 | return 0; |
140 | } | 140 | } |
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index be578ecb4af2..40d9595a8de2 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
@@ -44,6 +44,7 @@ | |||
44 | #include "xfs_trans_priv.h" | 44 | #include "xfs_trans_priv.h" |
45 | #include "xfs_trans_space.h" | 45 | #include "xfs_trans_space.h" |
46 | #include "xfs_inode_item.h" | 46 | #include "xfs_inode_item.h" |
47 | #include "xfs_trace.h" | ||
47 | 48 | ||
48 | kmem_zone_t *xfs_trans_zone; | 49 | kmem_zone_t *xfs_trans_zone; |
49 | 50 | ||
@@ -243,9 +244,8 @@ _xfs_trans_alloc( | |||
243 | tp->t_type = type; | 244 | tp->t_type = type; |
244 | tp->t_mountp = mp; | 245 | tp->t_mountp = mp; |
245 | tp->t_items_free = XFS_LIC_NUM_SLOTS; | 246 | tp->t_items_free = XFS_LIC_NUM_SLOTS; |
246 | tp->t_busy_free = XFS_LBC_NUM_SLOTS; | ||
247 | xfs_lic_init(&(tp->t_items)); | 247 | xfs_lic_init(&(tp->t_items)); |
248 | XFS_LBC_INIT(&(tp->t_busy)); | 248 | INIT_LIST_HEAD(&tp->t_busy); |
249 | return tp; | 249 | return tp; |
250 | } | 250 | } |
251 | 251 | ||
@@ -255,8 +255,13 @@ _xfs_trans_alloc( | |||
255 | */ | 255 | */ |
256 | STATIC void | 256 | STATIC void |
257 | xfs_trans_free( | 257 | xfs_trans_free( |
258 | xfs_trans_t *tp) | 258 | struct xfs_trans *tp) |
259 | { | 259 | { |
260 | struct xfs_busy_extent *busyp, *n; | ||
261 | |||
262 | list_for_each_entry_safe(busyp, n, &tp->t_busy, list) | ||
263 | xfs_alloc_busy_clear(tp->t_mountp, busyp); | ||
264 | |||
260 | atomic_dec(&tp->t_mountp->m_active_trans); | 265 | atomic_dec(&tp->t_mountp->m_active_trans); |
261 | xfs_trans_free_dqinfo(tp); | 266 | xfs_trans_free_dqinfo(tp); |
262 | kmem_zone_free(xfs_trans_zone, tp); | 267 | kmem_zone_free(xfs_trans_zone, tp); |
@@ -285,9 +290,8 @@ xfs_trans_dup( | |||
285 | ntp->t_type = tp->t_type; | 290 | ntp->t_type = tp->t_type; |
286 | ntp->t_mountp = tp->t_mountp; | 291 | ntp->t_mountp = tp->t_mountp; |
287 | ntp->t_items_free = XFS_LIC_NUM_SLOTS; | 292 | ntp->t_items_free = XFS_LIC_NUM_SLOTS; |
288 | ntp->t_busy_free = XFS_LBC_NUM_SLOTS; | ||
289 | xfs_lic_init(&(ntp->t_items)); | 293 | xfs_lic_init(&(ntp->t_items)); |
290 | XFS_LBC_INIT(&(ntp->t_busy)); | 294 | INIT_LIST_HEAD(&ntp->t_busy); |
291 | 295 | ||
292 | ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); | 296 | ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); |
293 | ASSERT(tp->t_ticket != NULL); | 297 | ASSERT(tp->t_ticket != NULL); |
@@ -423,7 +427,6 @@ undo_blocks: | |||
423 | return error; | 427 | return error; |
424 | } | 428 | } |
425 | 429 | ||
426 | |||
427 | /* | 430 | /* |
428 | * Record the indicated change to the given field for application | 431 | * Record the indicated change to the given field for application |
429 | * to the file system's superblock when the transaction commits. | 432 | * to the file system's superblock when the transaction commits. |
@@ -930,26 +933,6 @@ xfs_trans_item_committed( | |||
930 | IOP_UNPIN(lip); | 933 | IOP_UNPIN(lip); |
931 | } | 934 | } |
932 | 935 | ||
933 | /* Clear all the per-AG busy list items listed in this transaction */ | ||
934 | static void | ||
935 | xfs_trans_clear_busy_extents( | ||
936 | struct xfs_trans *tp) | ||
937 | { | ||
938 | xfs_log_busy_chunk_t *lbcp; | ||
939 | xfs_log_busy_slot_t *lbsp; | ||
940 | int i; | ||
941 | |||
942 | for (lbcp = &tp->t_busy; lbcp != NULL; lbcp = lbcp->lbc_next) { | ||
943 | i = 0; | ||
944 | for (lbsp = lbcp->lbc_busy; i < lbcp->lbc_unused; i++, lbsp++) { | ||
945 | if (XFS_LBC_ISFREE(lbcp, i)) | ||
946 | continue; | ||
947 | xfs_alloc_clear_busy(tp, lbsp->lbc_ag, lbsp->lbc_idx); | ||
948 | } | ||
949 | } | ||
950 | xfs_trans_free_busy(tp); | ||
951 | } | ||
952 | |||
953 | /* | 936 | /* |
954 | * This is typically called by the LM when a transaction has been fully | 937 | * This is typically called by the LM when a transaction has been fully |
955 | * committed to disk. It needs to unpin the items which have | 938 | * committed to disk. It needs to unpin the items which have |
@@ -984,7 +967,6 @@ xfs_trans_committed( | |||
984 | kmem_free(licp); | 967 | kmem_free(licp); |
985 | } | 968 | } |
986 | 969 | ||
987 | xfs_trans_clear_busy_extents(tp); | ||
988 | xfs_trans_free(tp); | 970 | xfs_trans_free(tp); |
989 | } | 971 | } |
990 | 972 | ||
@@ -1013,7 +995,6 @@ xfs_trans_uncommit( | |||
1013 | xfs_trans_unreserve_and_mod_dquots(tp); | 995 | xfs_trans_unreserve_and_mod_dquots(tp); |
1014 | 996 | ||
1015 | xfs_trans_free_items(tp, flags); | 997 | xfs_trans_free_items(tp, flags); |
1016 | xfs_trans_free_busy(tp); | ||
1017 | xfs_trans_free(tp); | 998 | xfs_trans_free(tp); |
1018 | } | 999 | } |
1019 | 1000 | ||
@@ -1075,6 +1056,8 @@ xfs_trans_commit_iclog( | |||
1075 | *commit_lsn = xfs_log_done(mp, tp->t_ticket, &commit_iclog, log_flags); | 1056 | *commit_lsn = xfs_log_done(mp, tp->t_ticket, &commit_iclog, log_flags); |
1076 | 1057 | ||
1077 | tp->t_commit_lsn = *commit_lsn; | 1058 | tp->t_commit_lsn = *commit_lsn; |
1059 | trace_xfs_trans_commit_lsn(tp); | ||
1060 | |||
1078 | if (nvec > XFS_TRANS_LOGVEC_COUNT) | 1061 | if (nvec > XFS_TRANS_LOGVEC_COUNT) |
1079 | kmem_free(log_vector); | 1062 | kmem_free(log_vector); |
1080 | 1063 | ||
@@ -1260,7 +1243,6 @@ out_unreserve: | |||
1260 | } | 1243 | } |
1261 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); | 1244 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); |
1262 | xfs_trans_free_items(tp, error ? XFS_TRANS_ABORT : 0); | 1245 | xfs_trans_free_items(tp, error ? XFS_TRANS_ABORT : 0); |
1263 | xfs_trans_free_busy(tp); | ||
1264 | xfs_trans_free(tp); | 1246 | xfs_trans_free(tp); |
1265 | 1247 | ||
1266 | XFS_STATS_INC(xs_trans_empty); | 1248 | XFS_STATS_INC(xs_trans_empty); |
@@ -1339,7 +1321,6 @@ xfs_trans_cancel( | |||
1339 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); | 1321 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); |
1340 | 1322 | ||
1341 | xfs_trans_free_items(tp, flags); | 1323 | xfs_trans_free_items(tp, flags); |
1342 | xfs_trans_free_busy(tp); | ||
1343 | xfs_trans_free(tp); | 1324 | xfs_trans_free(tp); |
1344 | } | 1325 | } |
1345 | 1326 | ||
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index c62beee0921e..ff7e9e6eee84 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h | |||
@@ -813,6 +813,7 @@ struct xfs_log_item_desc; | |||
813 | struct xfs_mount; | 813 | struct xfs_mount; |
814 | struct xfs_trans; | 814 | struct xfs_trans; |
815 | struct xfs_dquot_acct; | 815 | struct xfs_dquot_acct; |
816 | struct xfs_busy_extent; | ||
816 | 817 | ||
817 | typedef struct xfs_log_item { | 818 | typedef struct xfs_log_item { |
818 | struct list_head li_ail; /* AIL pointers */ | 819 | struct list_head li_ail; /* AIL pointers */ |
@@ -872,34 +873,6 @@ typedef struct xfs_item_ops { | |||
872 | #define XFS_ITEM_PUSHBUF 3 | 873 | #define XFS_ITEM_PUSHBUF 3 |
873 | 874 | ||
874 | /* | 875 | /* |
875 | * This structure is used to maintain a list of block ranges that have been | ||
876 | * freed in the transaction. The ranges are listed in the perag[] busy list | ||
877 | * between when they're freed and the transaction is committed to disk. | ||
878 | */ | ||
879 | |||
880 | typedef struct xfs_log_busy_slot { | ||
881 | xfs_agnumber_t lbc_ag; | ||
882 | ushort lbc_idx; /* index in perag.busy[] */ | ||
883 | } xfs_log_busy_slot_t; | ||
884 | |||
885 | #define XFS_LBC_NUM_SLOTS 31 | ||
886 | typedef struct xfs_log_busy_chunk { | ||
887 | struct xfs_log_busy_chunk *lbc_next; | ||
888 | uint lbc_free; /* free slots bitmask */ | ||
889 | ushort lbc_unused; /* first unused */ | ||
890 | xfs_log_busy_slot_t lbc_busy[XFS_LBC_NUM_SLOTS]; | ||
891 | } xfs_log_busy_chunk_t; | ||
892 | |||
893 | #define XFS_LBC_MAX_SLOT (XFS_LBC_NUM_SLOTS - 1) | ||
894 | #define XFS_LBC_FREEMASK ((1U << XFS_LBC_NUM_SLOTS) - 1) | ||
895 | |||
896 | #define XFS_LBC_INIT(cp) ((cp)->lbc_free = XFS_LBC_FREEMASK) | ||
897 | #define XFS_LBC_CLAIM(cp, slot) ((cp)->lbc_free &= ~(1 << (slot))) | ||
898 | #define XFS_LBC_SLOT(cp, slot) (&((cp)->lbc_busy[(slot)])) | ||
899 | #define XFS_LBC_VACANCY(cp) (((cp)->lbc_free) & XFS_LBC_FREEMASK) | ||
900 | #define XFS_LBC_ISFREE(cp, slot) ((cp)->lbc_free & (1 << (slot))) | ||
901 | |||
902 | /* | ||
903 | * This is the type of function which can be given to xfs_trans_callback() | 876 | * This is the type of function which can be given to xfs_trans_callback() |
904 | * to be called upon the transaction's commit to disk. | 877 | * to be called upon the transaction's commit to disk. |
905 | */ | 878 | */ |
@@ -950,8 +923,7 @@ typedef struct xfs_trans { | |||
950 | unsigned int t_items_free; /* log item descs free */ | 923 | unsigned int t_items_free; /* log item descs free */ |
951 | xfs_log_item_chunk_t t_items; /* first log item desc chunk */ | 924 | xfs_log_item_chunk_t t_items; /* first log item desc chunk */ |
952 | xfs_trans_header_t t_header; /* header for in-log trans */ | 925 | xfs_trans_header_t t_header; /* header for in-log trans */ |
953 | unsigned int t_busy_free; /* busy descs free */ | 926 | struct list_head t_busy; /* list of busy extents */ |
954 | xfs_log_busy_chunk_t t_busy; /* busy/async free blocks */ | ||
955 | unsigned long t_pflags; /* saved process flags state */ | 927 | unsigned long t_pflags; /* saved process flags state */ |
956 | } xfs_trans_t; | 928 | } xfs_trans_t; |
957 | 929 | ||
@@ -1025,9 +997,6 @@ int _xfs_trans_commit(xfs_trans_t *, | |||
1025 | void xfs_trans_cancel(xfs_trans_t *, int); | 997 | void xfs_trans_cancel(xfs_trans_t *, int); |
1026 | int xfs_trans_ail_init(struct xfs_mount *); | 998 | int xfs_trans_ail_init(struct xfs_mount *); |
1027 | void xfs_trans_ail_destroy(struct xfs_mount *); | 999 | void xfs_trans_ail_destroy(struct xfs_mount *); |
1028 | xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp, | ||
1029 | xfs_agnumber_t ag, | ||
1030 | xfs_extlen_t idx); | ||
1031 | 1000 | ||
1032 | extern kmem_zone_t *xfs_trans_zone; | 1001 | extern kmem_zone_t *xfs_trans_zone; |
1033 | 1002 | ||
diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c index eb3fc57f9eef..2937a1e53318 100644 --- a/fs/xfs/xfs_trans_item.c +++ b/fs/xfs/xfs_trans_item.c | |||
@@ -438,112 +438,3 @@ xfs_trans_unlock_chunk( | |||
438 | 438 | ||
439 | return freed; | 439 | return freed; |
440 | } | 440 | } |
441 | |||
442 | |||
443 | /* | ||
444 | * This is called to add the given busy item to the transaction's | ||
445 | * list of busy items. It must find a free busy item descriptor | ||
446 | * or allocate a new one and add the item to that descriptor. | ||
447 | * The function returns a pointer to busy descriptor used to point | ||
448 | * to the new busy entry. The log busy entry will now point to its new | ||
449 | * descriptor with its ???? field. | ||
450 | */ | ||
451 | xfs_log_busy_slot_t * | ||
452 | xfs_trans_add_busy(xfs_trans_t *tp, xfs_agnumber_t ag, xfs_extlen_t idx) | ||
453 | { | ||
454 | xfs_log_busy_chunk_t *lbcp; | ||
455 | xfs_log_busy_slot_t *lbsp; | ||
456 | int i=0; | ||
457 | |||
458 | /* | ||
459 | * If there are no free descriptors, allocate a new chunk | ||
460 | * of them and put it at the front of the chunk list. | ||
461 | */ | ||
462 | if (tp->t_busy_free == 0) { | ||
463 | lbcp = (xfs_log_busy_chunk_t*) | ||
464 | kmem_alloc(sizeof(xfs_log_busy_chunk_t), KM_SLEEP); | ||
465 | ASSERT(lbcp != NULL); | ||
466 | /* | ||
467 | * Initialize the chunk, and then | ||
468 | * claim the first slot in the newly allocated chunk. | ||
469 | */ | ||
470 | XFS_LBC_INIT(lbcp); | ||
471 | XFS_LBC_CLAIM(lbcp, 0); | ||
472 | lbcp->lbc_unused = 1; | ||
473 | lbsp = XFS_LBC_SLOT(lbcp, 0); | ||
474 | |||
475 | /* | ||
476 | * Link in the new chunk and update the free count. | ||
477 | */ | ||
478 | lbcp->lbc_next = tp->t_busy.lbc_next; | ||
479 | tp->t_busy.lbc_next = lbcp; | ||
480 | tp->t_busy_free = XFS_LIC_NUM_SLOTS - 1; | ||
481 | |||
482 | /* | ||
483 | * Initialize the descriptor and the generic portion | ||
484 | * of the log item. | ||
485 | * | ||
486 | * Point the new slot at this item and return it. | ||
487 | * Also point the log item at its currently active | ||
488 | * descriptor and set the item's mount pointer. | ||
489 | */ | ||
490 | lbsp->lbc_ag = ag; | ||
491 | lbsp->lbc_idx = idx; | ||
492 | return lbsp; | ||
493 | } | ||
494 | |||
495 | /* | ||
496 | * Find the free descriptor. It is somewhere in the chunklist | ||
497 | * of descriptors. | ||
498 | */ | ||
499 | lbcp = &tp->t_busy; | ||
500 | while (lbcp != NULL) { | ||
501 | if (XFS_LBC_VACANCY(lbcp)) { | ||
502 | if (lbcp->lbc_unused <= XFS_LBC_MAX_SLOT) { | ||
503 | i = lbcp->lbc_unused; | ||
504 | break; | ||
505 | } else { | ||
506 | /* out-of-order vacancy */ | ||
507 | cmn_err(CE_DEBUG, "OOO vacancy lbcp 0x%p\n", lbcp); | ||
508 | ASSERT(0); | ||
509 | } | ||
510 | } | ||
511 | lbcp = lbcp->lbc_next; | ||
512 | } | ||
513 | ASSERT(lbcp != NULL); | ||
514 | /* | ||
515 | * If we find a free descriptor, claim it, | ||
516 | * initialize it, and return it. | ||
517 | */ | ||
518 | XFS_LBC_CLAIM(lbcp, i); | ||
519 | if (lbcp->lbc_unused <= i) { | ||
520 | lbcp->lbc_unused = i + 1; | ||
521 | } | ||
522 | lbsp = XFS_LBC_SLOT(lbcp, i); | ||
523 | tp->t_busy_free--; | ||
524 | lbsp->lbc_ag = ag; | ||
525 | lbsp->lbc_idx = idx; | ||
526 | return lbsp; | ||
527 | } | ||
528 | |||
529 | |||
530 | /* | ||
531 | * xfs_trans_free_busy | ||
532 | * Free all of the busy lists from a transaction | ||
533 | */ | ||
534 | void | ||
535 | xfs_trans_free_busy(xfs_trans_t *tp) | ||
536 | { | ||
537 | xfs_log_busy_chunk_t *lbcp; | ||
538 | xfs_log_busy_chunk_t *lbcq; | ||
539 | |||
540 | lbcp = tp->t_busy.lbc_next; | ||
541 | while (lbcp != NULL) { | ||
542 | lbcq = lbcp->lbc_next; | ||
543 | kmem_free(lbcp); | ||
544 | lbcp = lbcq; | ||
545 | } | ||
546 | |||
547 | XFS_LBC_INIT(&tp->t_busy); | ||
548 | tp->t_busy.lbc_unused = 0; | ||
549 | } | ||
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 73e2ad397432..901dc0f032da 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h | |||
@@ -38,10 +38,6 @@ struct xfs_log_item_desc *xfs_trans_next_item(struct xfs_trans *, | |||
38 | void xfs_trans_free_items(struct xfs_trans *, int); | 38 | void xfs_trans_free_items(struct xfs_trans *, int); |
39 | void xfs_trans_unlock_items(struct xfs_trans *, | 39 | void xfs_trans_unlock_items(struct xfs_trans *, |
40 | xfs_lsn_t); | 40 | xfs_lsn_t); |
41 | void xfs_trans_free_busy(xfs_trans_t *tp); | ||
42 | xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp, | ||
43 | xfs_agnumber_t ag, | ||
44 | xfs_extlen_t idx); | ||
45 | 41 | ||
46 | /* | 42 | /* |
47 | * AIL traversal cursor. | 43 | * AIL traversal cursor. |