diff options
author | Chris Mason <chris.mason@oracle.com> | 2008-09-08 11:18:08 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-09-25 11:04:07 -0400 |
commit | 4bef084857ab8fe71cf49eae349c25e440a49150 (patch) | |
tree | 7a9a850515538421c2976f8ee4c1bea5ceced59c /fs/btrfs/extent_io.c | |
parent | 325cd4bafeb6cfb44addd6e807a9b74170d1be31 (diff) |
Btrfs: Tree logging fixes
* Pin down data blocks to prevent them from being reallocated like so:
trans 1: allocate file extent
trans 2: free file extent
trans 3: free file extent during old snapshot deletion
trans 3: allocate file extent to new file
trans 3: fsync new file
Before the tree logging code, this was legal because the fsync
would commit the transation that did the final data extent free
and the transaction that allocated the extent to the new file
at the same time.
With the tree logging code, the tree log subtransaction can commit
before the transaction that freed the extent. If we crash,
we're left with two different files using the extent.
* Don't wait in start_transaction if log replay is going on. This
avoids deadlocks from iput while we're cleaning up link counts in the
replay code.
* Don't deadlock in replay_one_name by trying to read an inode off
the disk while holding paths for the directory
* Hold the buffer lock while we mark a buffer as written. This
closes a race where someone is changing a buffer while we write it.
They are supposed to mark it dirty again after they change it, but
this violates the cow rules.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r-- | fs/btrfs/extent_io.c | 47 |
1 files changed, 36 insertions, 11 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 239e7c908abf..319a0c7a4a58 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -29,7 +29,10 @@ static struct kmem_cache *extent_buffer_cache; | |||
29 | 29 | ||
30 | static LIST_HEAD(buffers); | 30 | static LIST_HEAD(buffers); |
31 | static LIST_HEAD(states); | 31 | static LIST_HEAD(states); |
32 | |||
33 | #ifdef LEAK_DEBUG | ||
32 | static spinlock_t leak_lock = SPIN_LOCK_UNLOCKED; | 34 | static spinlock_t leak_lock = SPIN_LOCK_UNLOCKED; |
35 | #endif | ||
33 | 36 | ||
34 | #define BUFFER_LRU_MAX 64 | 37 | #define BUFFER_LRU_MAX 64 |
35 | 38 | ||
@@ -106,7 +109,9 @@ EXPORT_SYMBOL(extent_io_tree_init); | |||
106 | struct extent_state *alloc_extent_state(gfp_t mask) | 109 | struct extent_state *alloc_extent_state(gfp_t mask) |
107 | { | 110 | { |
108 | struct extent_state *state; | 111 | struct extent_state *state; |
112 | #ifdef LEAK_DEBUG | ||
109 | unsigned long flags; | 113 | unsigned long flags; |
114 | #endif | ||
110 | 115 | ||
111 | state = kmem_cache_alloc(extent_state_cache, mask); | 116 | state = kmem_cache_alloc(extent_state_cache, mask); |
112 | if (!state) | 117 | if (!state) |
@@ -114,10 +119,11 @@ struct extent_state *alloc_extent_state(gfp_t mask) | |||
114 | state->state = 0; | 119 | state->state = 0; |
115 | state->private = 0; | 120 | state->private = 0; |
116 | state->tree = NULL; | 121 | state->tree = NULL; |
122 | #ifdef LEAK_DEBUG | ||
117 | spin_lock_irqsave(&leak_lock, flags); | 123 | spin_lock_irqsave(&leak_lock, flags); |
118 | list_add(&state->leak_list, &states); | 124 | list_add(&state->leak_list, &states); |
119 | spin_unlock_irqrestore(&leak_lock, flags); | 125 | spin_unlock_irqrestore(&leak_lock, flags); |
120 | 126 | #endif | |
121 | atomic_set(&state->refs, 1); | 127 | atomic_set(&state->refs, 1); |
122 | init_waitqueue_head(&state->wq); | 128 | init_waitqueue_head(&state->wq); |
123 | return state; | 129 | return state; |
@@ -129,11 +135,15 @@ void free_extent_state(struct extent_state *state) | |||
129 | if (!state) | 135 | if (!state) |
130 | return; | 136 | return; |
131 | if (atomic_dec_and_test(&state->refs)) { | 137 | if (atomic_dec_and_test(&state->refs)) { |
138 | #ifdef LEAK_DEBUG | ||
132 | unsigned long flags; | 139 | unsigned long flags; |
140 | #endif | ||
133 | WARN_ON(state->tree); | 141 | WARN_ON(state->tree); |
142 | #ifdef LEAK_DEBUG | ||
134 | spin_lock_irqsave(&leak_lock, flags); | 143 | spin_lock_irqsave(&leak_lock, flags); |
135 | list_del(&state->leak_list); | 144 | list_del(&state->leak_list); |
136 | spin_unlock_irqrestore(&leak_lock, flags); | 145 | spin_unlock_irqrestore(&leak_lock, flags); |
146 | #endif | ||
137 | kmem_cache_free(extent_state_cache, state); | 147 | kmem_cache_free(extent_state_cache, state); |
138 | } | 148 | } |
139 | } | 149 | } |
@@ -2070,13 +2080,13 @@ done: | |||
2070 | } | 2080 | } |
2071 | 2081 | ||
2072 | #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) | 2082 | #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) |
2073 | /* Taken directly from 2.6.23 for 2.6.18 back port */ | 2083 | /* Taken directly from 2.6.23 with a mod for a lockpage hook */ |
2074 | typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, | 2084 | typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, |
2075 | void *data); | 2085 | void *data); |
2086 | #endif | ||
2076 | 2087 | ||
2077 | /** | 2088 | /** |
2078 | * write_cache_pages - walk the list of dirty pages of the given address space | 2089 | * write_cache_pages - walk the list of dirty pages of the given address space and write all of them. |
2079 | * and write all of them. | ||
2080 | * @mapping: address space structure to write | 2090 | * @mapping: address space structure to write |
2081 | * @wbc: subtract the number of written pages from *@wbc->nr_to_write | 2091 | * @wbc: subtract the number of written pages from *@wbc->nr_to_write |
2082 | * @writepage: function called for each page | 2092 | * @writepage: function called for each page |
@@ -2090,9 +2100,10 @@ typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, | |||
2090 | * WB_SYNC_ALL then we were called for data integrity and we must wait for | 2100 | * WB_SYNC_ALL then we were called for data integrity and we must wait for |
2091 | * existing IO to complete. | 2101 | * existing IO to complete. |
2092 | */ | 2102 | */ |
2093 | static int write_cache_pages(struct address_space *mapping, | 2103 | int extent_write_cache_pages(struct extent_io_tree *tree, |
2094 | struct writeback_control *wbc, writepage_t writepage, | 2104 | struct address_space *mapping, |
2095 | void *data) | 2105 | struct writeback_control *wbc, |
2106 | writepage_t writepage, void *data) | ||
2096 | { | 2107 | { |
2097 | struct backing_dev_info *bdi = mapping->backing_dev_info; | 2108 | struct backing_dev_info *bdi = mapping->backing_dev_info; |
2098 | int ret = 0; | 2109 | int ret = 0; |
@@ -2138,7 +2149,10 @@ retry: | |||
2138 | * swizzled back from swapper_space to tmpfs file | 2149 | * swizzled back from swapper_space to tmpfs file |
2139 | * mapping | 2150 | * mapping |
2140 | */ | 2151 | */ |
2141 | lock_page(page); | 2152 | if (tree->ops && tree->ops->write_cache_pages_lock_hook) |
2153 | tree->ops->write_cache_pages_lock_hook(page); | ||
2154 | else | ||
2155 | lock_page(page); | ||
2142 | 2156 | ||
2143 | if (unlikely(page->mapping != mapping)) { | 2157 | if (unlikely(page->mapping != mapping)) { |
2144 | unlock_page(page); | 2158 | unlock_page(page); |
@@ -2187,9 +2201,12 @@ retry: | |||
2187 | } | 2201 | } |
2188 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) | 2202 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) |
2189 | mapping->writeback_index = index; | 2203 | mapping->writeback_index = index; |
2204 | |||
2205 | if (wbc->range_cont) | ||
2206 | wbc->range_start = index << PAGE_CACHE_SHIFT; | ||
2190 | return ret; | 2207 | return ret; |
2191 | } | 2208 | } |
2192 | #endif | 2209 | EXPORT_SYMBOL(extent_write_cache_pages); |
2193 | 2210 | ||
2194 | int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | 2211 | int extent_write_full_page(struct extent_io_tree *tree, struct page *page, |
2195 | get_extent_t *get_extent, | 2212 | get_extent_t *get_extent, |
@@ -2214,7 +2231,8 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | |||
2214 | 2231 | ||
2215 | ret = __extent_writepage(page, wbc, &epd); | 2232 | ret = __extent_writepage(page, wbc, &epd); |
2216 | 2233 | ||
2217 | write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd); | 2234 | extent_write_cache_pages(tree, mapping, &wbc_writepages, |
2235 | __extent_writepage, &epd); | ||
2218 | if (epd.bio) { | 2236 | if (epd.bio) { |
2219 | submit_one_bio(WRITE, epd.bio, 0); | 2237 | submit_one_bio(WRITE, epd.bio, 0); |
2220 | } | 2238 | } |
@@ -2235,7 +2253,8 @@ int extent_writepages(struct extent_io_tree *tree, | |||
2235 | .get_extent = get_extent, | 2253 | .get_extent = get_extent, |
2236 | }; | 2254 | }; |
2237 | 2255 | ||
2238 | ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd); | 2256 | ret = extent_write_cache_pages(tree, mapping, wbc, |
2257 | __extent_writepage, &epd); | ||
2239 | if (epd.bio) { | 2258 | if (epd.bio) { |
2240 | submit_one_bio(WRITE, epd.bio, 0); | 2259 | submit_one_bio(WRITE, epd.bio, 0); |
2241 | } | 2260 | } |
@@ -2567,15 +2586,19 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, | |||
2567 | gfp_t mask) | 2586 | gfp_t mask) |
2568 | { | 2587 | { |
2569 | struct extent_buffer *eb = NULL; | 2588 | struct extent_buffer *eb = NULL; |
2589 | #ifdef LEAK_DEBUG | ||
2570 | unsigned long flags; | 2590 | unsigned long flags; |
2591 | #endif | ||
2571 | 2592 | ||
2572 | eb = kmem_cache_zalloc(extent_buffer_cache, mask); | 2593 | eb = kmem_cache_zalloc(extent_buffer_cache, mask); |
2573 | eb->start = start; | 2594 | eb->start = start; |
2574 | eb->len = len; | 2595 | eb->len = len; |
2575 | mutex_init(&eb->mutex); | 2596 | mutex_init(&eb->mutex); |
2597 | #ifdef LEAK_DEBUG | ||
2576 | spin_lock_irqsave(&leak_lock, flags); | 2598 | spin_lock_irqsave(&leak_lock, flags); |
2577 | list_add(&eb->leak_list, &buffers); | 2599 | list_add(&eb->leak_list, &buffers); |
2578 | spin_unlock_irqrestore(&leak_lock, flags); | 2600 | spin_unlock_irqrestore(&leak_lock, flags); |
2601 | #endif | ||
2579 | atomic_set(&eb->refs, 1); | 2602 | atomic_set(&eb->refs, 1); |
2580 | 2603 | ||
2581 | return eb; | 2604 | return eb; |
@@ -2583,10 +2606,12 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, | |||
2583 | 2606 | ||
2584 | static void __free_extent_buffer(struct extent_buffer *eb) | 2607 | static void __free_extent_buffer(struct extent_buffer *eb) |
2585 | { | 2608 | { |
2609 | #ifdef LEAK_DEBUG | ||
2586 | unsigned long flags; | 2610 | unsigned long flags; |
2587 | spin_lock_irqsave(&leak_lock, flags); | 2611 | spin_lock_irqsave(&leak_lock, flags); |
2588 | list_del(&eb->leak_list); | 2612 | list_del(&eb->leak_list); |
2589 | spin_unlock_irqrestore(&leak_lock, flags); | 2613 | spin_unlock_irqrestore(&leak_lock, flags); |
2614 | #endif | ||
2590 | kmem_cache_free(extent_buffer_cache, eb); | 2615 | kmem_cache_free(extent_buffer_cache, eb); |
2591 | } | 2616 | } |
2592 | 2617 | ||