aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-09-08 11:18:08 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:04:07 -0400
commit4bef084857ab8fe71cf49eae349c25e440a49150 (patch)
tree7a9a850515538421c2976f8ee4c1bea5ceced59c /fs/btrfs/extent_io.c
parent325cd4bafeb6cfb44addd6e807a9b74170d1be31 (diff)
Btrfs: Tree logging fixes
* Pin down data blocks to prevent them from being reallocated like so: trans 1: allocate file extent trans 2: free file extent trans 3: free file extent during old snapshot deletion trans 3: allocate file extent to new file trans 3: fsync new file Before the tree logging code, this was legal because the fsync would commit the transation that did the final data extent free and the transaction that allocated the extent to the new file at the same time. With the tree logging code, the tree log subtransaction can commit before the transaction that freed the extent. If we crash, we're left with two different files using the extent. * Don't wait in start_transaction if log replay is going on. This avoids deadlocks from iput while we're cleaning up link counts in the replay code. * Don't deadlock in replay_one_name by trying to read an inode off the disk while holding paths for the directory * Hold the buffer lock while we mark a buffer as written. This closes a race where someone is changing a buffer while we write it. They are supposed to mark it dirty again after they change it, but this violates the cow rules. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c47
1 files changed, 36 insertions, 11 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 239e7c908abf..319a0c7a4a58 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -29,7 +29,10 @@ static struct kmem_cache *extent_buffer_cache;
29 29
30static LIST_HEAD(buffers); 30static LIST_HEAD(buffers);
31static LIST_HEAD(states); 31static LIST_HEAD(states);
32
33#ifdef LEAK_DEBUG
32static spinlock_t leak_lock = SPIN_LOCK_UNLOCKED; 34static spinlock_t leak_lock = SPIN_LOCK_UNLOCKED;
35#endif
33 36
34#define BUFFER_LRU_MAX 64 37#define BUFFER_LRU_MAX 64
35 38
@@ -106,7 +109,9 @@ EXPORT_SYMBOL(extent_io_tree_init);
106struct extent_state *alloc_extent_state(gfp_t mask) 109struct extent_state *alloc_extent_state(gfp_t mask)
107{ 110{
108 struct extent_state *state; 111 struct extent_state *state;
112#ifdef LEAK_DEBUG
109 unsigned long flags; 113 unsigned long flags;
114#endif
110 115
111 state = kmem_cache_alloc(extent_state_cache, mask); 116 state = kmem_cache_alloc(extent_state_cache, mask);
112 if (!state) 117 if (!state)
@@ -114,10 +119,11 @@ struct extent_state *alloc_extent_state(gfp_t mask)
114 state->state = 0; 119 state->state = 0;
115 state->private = 0; 120 state->private = 0;
116 state->tree = NULL; 121 state->tree = NULL;
122#ifdef LEAK_DEBUG
117 spin_lock_irqsave(&leak_lock, flags); 123 spin_lock_irqsave(&leak_lock, flags);
118 list_add(&state->leak_list, &states); 124 list_add(&state->leak_list, &states);
119 spin_unlock_irqrestore(&leak_lock, flags); 125 spin_unlock_irqrestore(&leak_lock, flags);
120 126#endif
121 atomic_set(&state->refs, 1); 127 atomic_set(&state->refs, 1);
122 init_waitqueue_head(&state->wq); 128 init_waitqueue_head(&state->wq);
123 return state; 129 return state;
@@ -129,11 +135,15 @@ void free_extent_state(struct extent_state *state)
129 if (!state) 135 if (!state)
130 return; 136 return;
131 if (atomic_dec_and_test(&state->refs)) { 137 if (atomic_dec_and_test(&state->refs)) {
138#ifdef LEAK_DEBUG
132 unsigned long flags; 139 unsigned long flags;
140#endif
133 WARN_ON(state->tree); 141 WARN_ON(state->tree);
142#ifdef LEAK_DEBUG
134 spin_lock_irqsave(&leak_lock, flags); 143 spin_lock_irqsave(&leak_lock, flags);
135 list_del(&state->leak_list); 144 list_del(&state->leak_list);
136 spin_unlock_irqrestore(&leak_lock, flags); 145 spin_unlock_irqrestore(&leak_lock, flags);
146#endif
137 kmem_cache_free(extent_state_cache, state); 147 kmem_cache_free(extent_state_cache, state);
138 } 148 }
139} 149}
@@ -2070,13 +2080,13 @@ done:
2070} 2080}
2071 2081
2072#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) 2082#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
2073/* Taken directly from 2.6.23 for 2.6.18 back port */ 2083/* Taken directly from 2.6.23 with a mod for a lockpage hook */
2074typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, 2084typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc,
2075 void *data); 2085 void *data);
2086#endif
2076 2087
2077/** 2088/**
2078 * write_cache_pages - walk the list of dirty pages of the given address space 2089 * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
2079 * and write all of them.
2080 * @mapping: address space structure to write 2090 * @mapping: address space structure to write
2081 * @wbc: subtract the number of written pages from *@wbc->nr_to_write 2091 * @wbc: subtract the number of written pages from *@wbc->nr_to_write
2082 * @writepage: function called for each page 2092 * @writepage: function called for each page
@@ -2090,9 +2100,10 @@ typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc,
2090 * WB_SYNC_ALL then we were called for data integrity and we must wait for 2100 * WB_SYNC_ALL then we were called for data integrity and we must wait for
2091 * existing IO to complete. 2101 * existing IO to complete.
2092 */ 2102 */
2093static int write_cache_pages(struct address_space *mapping, 2103int extent_write_cache_pages(struct extent_io_tree *tree,
2094 struct writeback_control *wbc, writepage_t writepage, 2104 struct address_space *mapping,
2095 void *data) 2105 struct writeback_control *wbc,
2106 writepage_t writepage, void *data)
2096{ 2107{
2097 struct backing_dev_info *bdi = mapping->backing_dev_info; 2108 struct backing_dev_info *bdi = mapping->backing_dev_info;
2098 int ret = 0; 2109 int ret = 0;
@@ -2138,7 +2149,10 @@ retry:
2138 * swizzled back from swapper_space to tmpfs file 2149 * swizzled back from swapper_space to tmpfs file
2139 * mapping 2150 * mapping
2140 */ 2151 */
2141 lock_page(page); 2152 if (tree->ops && tree->ops->write_cache_pages_lock_hook)
2153 tree->ops->write_cache_pages_lock_hook(page);
2154 else
2155 lock_page(page);
2142 2156
2143 if (unlikely(page->mapping != mapping)) { 2157 if (unlikely(page->mapping != mapping)) {
2144 unlock_page(page); 2158 unlock_page(page);
@@ -2187,9 +2201,12 @@ retry:
2187 } 2201 }
2188 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) 2202 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2189 mapping->writeback_index = index; 2203 mapping->writeback_index = index;
2204
2205 if (wbc->range_cont)
2206 wbc->range_start = index << PAGE_CACHE_SHIFT;
2190 return ret; 2207 return ret;
2191} 2208}
2192#endif 2209EXPORT_SYMBOL(extent_write_cache_pages);
2193 2210
2194int extent_write_full_page(struct extent_io_tree *tree, struct page *page, 2211int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
2195 get_extent_t *get_extent, 2212 get_extent_t *get_extent,
@@ -2214,7 +2231,8 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
2214 2231
2215 ret = __extent_writepage(page, wbc, &epd); 2232 ret = __extent_writepage(page, wbc, &epd);
2216 2233
2217 write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd); 2234 extent_write_cache_pages(tree, mapping, &wbc_writepages,
2235 __extent_writepage, &epd);
2218 if (epd.bio) { 2236 if (epd.bio) {
2219 submit_one_bio(WRITE, epd.bio, 0); 2237 submit_one_bio(WRITE, epd.bio, 0);
2220 } 2238 }
@@ -2235,7 +2253,8 @@ int extent_writepages(struct extent_io_tree *tree,
2235 .get_extent = get_extent, 2253 .get_extent = get_extent,
2236 }; 2254 };
2237 2255
2238 ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd); 2256 ret = extent_write_cache_pages(tree, mapping, wbc,
2257 __extent_writepage, &epd);
2239 if (epd.bio) { 2258 if (epd.bio) {
2240 submit_one_bio(WRITE, epd.bio, 0); 2259 submit_one_bio(WRITE, epd.bio, 0);
2241 } 2260 }
@@ -2567,15 +2586,19 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
2567 gfp_t mask) 2586 gfp_t mask)
2568{ 2587{
2569 struct extent_buffer *eb = NULL; 2588 struct extent_buffer *eb = NULL;
2589#ifdef LEAK_DEBUG
2570 unsigned long flags; 2590 unsigned long flags;
2591#endif
2571 2592
2572 eb = kmem_cache_zalloc(extent_buffer_cache, mask); 2593 eb = kmem_cache_zalloc(extent_buffer_cache, mask);
2573 eb->start = start; 2594 eb->start = start;
2574 eb->len = len; 2595 eb->len = len;
2575 mutex_init(&eb->mutex); 2596 mutex_init(&eb->mutex);
2597#ifdef LEAK_DEBUG
2576 spin_lock_irqsave(&leak_lock, flags); 2598 spin_lock_irqsave(&leak_lock, flags);
2577 list_add(&eb->leak_list, &buffers); 2599 list_add(&eb->leak_list, &buffers);
2578 spin_unlock_irqrestore(&leak_lock, flags); 2600 spin_unlock_irqrestore(&leak_lock, flags);
2601#endif
2579 atomic_set(&eb->refs, 1); 2602 atomic_set(&eb->refs, 1);
2580 2603
2581 return eb; 2604 return eb;
@@ -2583,10 +2606,12 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
2583 2606
2584static void __free_extent_buffer(struct extent_buffer *eb) 2607static void __free_extent_buffer(struct extent_buffer *eb)
2585{ 2608{
2609#ifdef LEAK_DEBUG
2586 unsigned long flags; 2610 unsigned long flags;
2587 spin_lock_irqsave(&leak_lock, flags); 2611 spin_lock_irqsave(&leak_lock, flags);
2588 list_del(&eb->leak_list); 2612 list_del(&eb->leak_list);
2589 spin_unlock_irqrestore(&leak_lock, flags); 2613 spin_unlock_irqrestore(&leak_lock, flags);
2614#endif
2590 kmem_cache_free(extent_buffer_cache, eb); 2615 kmem_cache_free(extent_buffer_cache, eb);
2591} 2616}
2592 2617