aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-07-17 13:53:27 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:04:05 -0400
commiteb84ae039e10f1f80443d846ba1350122bbdc753 (patch)
treed87bcbead436eaf96f5160ac46f78699d65c5251 /fs/btrfs
parent54641bd17db9fbfc13c7b1d4ee0dd2713bf3e076 (diff)
Btrfs: Cleanup and comment ordered-data.c
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/inode.c12
-rw-r--r--fs/btrfs/ordered-data.c130
-rw-r--r--fs/btrfs/ordered-data.h49
3 files changed, 121 insertions, 70 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0d1e2ba47a9c..f37e09e724f7 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1343,7 +1343,7 @@ again:
1343 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 1343 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
1344 unlock_page(page); 1344 unlock_page(page);
1345 page_cache_release(page); 1345 page_cache_release(page);
1346 btrfs_wait_ordered_extent(inode, ordered); 1346 btrfs_start_ordered_extent(inode, ordered, 1);
1347 btrfs_put_ordered_extent(ordered); 1347 btrfs_put_ordered_extent(ordered);
1348 goto again; 1348 goto again;
1349 } 1349 }
@@ -2660,6 +2660,10 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
2660 ordered = btrfs_lookup_ordered_extent(page->mapping->host, 2660 ordered = btrfs_lookup_ordered_extent(page->mapping->host,
2661 page_offset(page)); 2661 page_offset(page));
2662 if (ordered) { 2662 if (ordered) {
2663 /*
2664 * IO on this page will never be started, so we need
2665 * to account for any ordered extents now
2666 */
2663 clear_extent_bit(tree, page_start, page_end, 2667 clear_extent_bit(tree, page_start, page_end,
2664 EXTENT_DIRTY | EXTENT_DELALLOC | 2668 EXTENT_DIRTY | EXTENT_DELALLOC |
2665 EXTENT_LOCKED, 1, 0, GFP_NOFS); 2669 EXTENT_LOCKED, 1, 0, GFP_NOFS);
@@ -2732,11 +2736,15 @@ again:
2732 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 2736 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
2733 set_page_extent_mapped(page); 2737 set_page_extent_mapped(page);
2734 2738
2739 /*
2740 * we can't set the delalloc bits if there are pending ordered
2741 * extents. Drop our locks and wait for them to finish
2742 */
2735 ordered = btrfs_lookup_ordered_extent(inode, page_start); 2743 ordered = btrfs_lookup_ordered_extent(inode, page_start);
2736 if (ordered) { 2744 if (ordered) {
2737 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 2745 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2738 unlock_page(page); 2746 unlock_page(page);
2739 btrfs_wait_ordered_extent(inode, ordered); 2747 btrfs_start_ordered_extent(inode, ordered, 1);
2740 btrfs_put_ordered_extent(ordered); 2748 btrfs_put_ordered_extent(ordered);
2741 goto again; 2749 goto again;
2742 } 2750 }
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index b739e3abebb9..230fd3ca6b2c 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -136,6 +136,19 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
136 return ret; 136 return ret;
137} 137}
138 138
139/* allocate and add a new ordered_extent into the per-inode tree.
140 * file_offset is the logical offset in the file
141 *
142 * start is the disk block number of an extent already reserved in the
143 * extent allocation tree
144 *
145 * len is the length of the extent
146 *
147 * This also sets the EXTENT_ORDERED bit on the range in the inode.
148 *
149 * The tree is given a single reference on the ordered extent that was
150 * inserted.
151 */
139int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, 152int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
140 u64 start, u64 len) 153 u64 start, u64 len)
141{ 154{
@@ -152,7 +165,6 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
152 entry->file_offset = file_offset; 165 entry->file_offset = file_offset;
153 entry->start = start; 166 entry->start = start;
154 entry->len = len; 167 entry->len = len;
155 entry->inode = inode;
156 /* one ref for the tree */ 168 /* one ref for the tree */
157 atomic_set(&entry->refs, 1); 169 atomic_set(&entry->refs, 1);
158 init_waitqueue_head(&entry->wait); 170 init_waitqueue_head(&entry->wait);
@@ -167,12 +179,15 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
167 set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset, 179 set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset,
168 entry_end(entry) - 1, GFP_NOFS); 180 entry_end(entry) - 1, GFP_NOFS);
169 181
170 set_bit(BTRFS_ORDERED_START, &entry->flags);
171 mutex_unlock(&tree->mutex); 182 mutex_unlock(&tree->mutex);
172 BUG_ON(node); 183 BUG_ON(node);
173 return 0; 184 return 0;
174} 185}
175 186
187/*
188 * Add a struct btrfs_ordered_sum into the list of checksums to be inserted
189 * when an ordered extent is finished.
190 */
176int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_sum *sum) 191int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_sum *sum)
177{ 192{
178 struct btrfs_ordered_inode_tree *tree; 193 struct btrfs_ordered_inode_tree *tree;
@@ -182,29 +197,25 @@ int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_sum *sum)
182 tree = &BTRFS_I(inode)->ordered_tree; 197 tree = &BTRFS_I(inode)->ordered_tree;
183 mutex_lock(&tree->mutex); 198 mutex_lock(&tree->mutex);
184 node = tree_search(tree, sum->file_offset); 199 node = tree_search(tree, sum->file_offset);
185 if (!node) {
186search_fail:
187printk("add ordered sum failed to find a node for inode %lu offset %Lu\n", inode->i_ino, sum->file_offset);
188 node = rb_first(&tree->tree);
189 while(node) {
190 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
191 printk("entry %Lu %Lu %Lu\n", entry->file_offset, entry->file_offset + entry->len, entry->start);
192 node = rb_next(node);
193 }
194 BUG();
195 }
196 BUG_ON(!node); 200 BUG_ON(!node);
197 201
198 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); 202 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
199 if (!offset_in_entry(entry, sum->file_offset)) { 203 BUG_ON(!offset_in_entry(entry, sum->file_offset));
200 goto search_fail;
201 }
202 204
203 list_add_tail(&sum->list, &entry->list); 205 list_add_tail(&sum->list, &entry->list);
204 mutex_unlock(&tree->mutex); 206 mutex_unlock(&tree->mutex);
205 return 0; 207 return 0;
206} 208}
207 209
210/*
211 * this is used to account for finished IO across a given range
212 * of the file. The IO should not span ordered extents. If
213 * a given ordered_extent is completely done, 1 is returned, otherwise
214 * 0.
215 *
216 * test_and_set_bit on a flag in the struct btrfs_ordered_extent is used
217 * to make sure this function only returns 1 once for a given ordered extent.
218 */
208int btrfs_dec_test_ordered_pending(struct inode *inode, 219int btrfs_dec_test_ordered_pending(struct inode *inode,
209 u64 file_offset, u64 io_size) 220 u64 file_offset, u64 io_size)
210{ 221{
@@ -233,9 +244,6 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
233 ret = test_range_bit(io_tree, entry->file_offset, 244 ret = test_range_bit(io_tree, entry->file_offset,
234 entry->file_offset + entry->len - 1, 245 entry->file_offset + entry->len - 1,
235 EXTENT_ORDERED, 0); 246 EXTENT_ORDERED, 0);
236 if (!test_bit(BTRFS_ORDERED_START, &entry->flags)) {
237printk("inode %lu not ready yet for extent %Lu %Lu\n", inode->i_ino, entry->file_offset, entry_end(entry));
238 }
239 if (ret == 0) 247 if (ret == 0)
240 ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); 248 ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
241out: 249out:
@@ -243,6 +251,10 @@ out:
243 return ret == 0; 251 return ret == 0;
244} 252}
245 253
254/*
255 * used to drop a reference on an ordered extent. This will free
256 * the extent if the last reference is dropped
257 */
246int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) 258int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
247{ 259{
248 struct list_head *cur; 260 struct list_head *cur;
@@ -260,6 +272,10 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
260 return 0; 272 return 0;
261} 273}
262 274
275/*
276 * remove an ordered extent from the tree. No references are dropped
277 * but, anyone waiting on this extent is woken up.
278 */
263int btrfs_remove_ordered_extent(struct inode *inode, 279int btrfs_remove_ordered_extent(struct inode *inode,
264 struct btrfs_ordered_extent *entry) 280 struct btrfs_ordered_extent *entry)
265{ 281{
@@ -277,27 +293,25 @@ int btrfs_remove_ordered_extent(struct inode *inode,
277 return 0; 293 return 0;
278} 294}
279 295
280void btrfs_wait_ordered_extent(struct inode *inode, 296/*
281 struct btrfs_ordered_extent *entry) 297 * Used to start IO or wait for a given ordered extent to finish.
282{ 298 *
283 u64 start = entry->file_offset; 299 * If wait is one, this effectively waits on page writeback for all the pages
284 u64 end = start + entry->len - 1; 300 * in the extent, and it waits on the io completion code to insert
285#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) 301 * metadata into the btree corresponding to the extent
286 do_sync_file_range(file, start, end, SYNC_FILE_RANGE_WRITE); 302 */
287#else 303void btrfs_start_ordered_extent(struct inode *inode,
288 do_sync_mapping_range(inode->i_mapping, start, end, 304 struct btrfs_ordered_extent *entry,
289 SYNC_FILE_RANGE_WRITE); 305 int wait)
290#endif
291 wait_event(entry->wait,
292 test_bit(BTRFS_ORDERED_COMPLETE, &entry->flags));
293}
294
295static void btrfs_start_ordered_extent(struct inode *inode,
296 struct btrfs_ordered_extent *entry, int wait)
297{ 306{
298 u64 start = entry->file_offset; 307 u64 start = entry->file_offset;
299 u64 end = start + entry->len - 1; 308 u64 end = start + entry->len - 1;
300 309
310 /*
311 * pages in the range can be dirty, clean or writeback. We
312 * start IO on any dirty ones so the wait doesn't stall waiting
313 * for pdflush to find them
314 */
301#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) 315#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
302 do_sync_file_range(file, start, end, SYNC_FILE_RANGE_WRITE); 316 do_sync_file_range(file, start, end, SYNC_FILE_RANGE_WRITE);
303#else 317#else
@@ -309,6 +323,9 @@ static void btrfs_start_ordered_extent(struct inode *inode,
309 &entry->flags)); 323 &entry->flags));
310} 324}
311 325
326/*
327 * Used to wait on ordered extents across a large range of bytes.
328 */
312void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) 329void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
313{ 330{
314 u64 end; 331 u64 end;
@@ -349,31 +366,11 @@ again:
349 } 366 }
350} 367}
351 368
352int btrfs_add_ordered_pending(struct inode *inode,
353 struct btrfs_ordered_extent *ordered,
354 u64 start, u64 len)
355{
356 WARN_ON(1);
357 return 0;
358#if 0
359 int ret;
360 struct btrfs_ordered_inode_tree *tree;
361 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
362
363 tree = &BTRFS_I(inode)->ordered_tree;
364 mutex_lock(&tree->mutex);
365 if (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags)) {
366 ret = -EAGAIN;
367 goto out;
368 }
369 set_extent_ordered(io_tree, start, start + len - 1, GFP_NOFS);
370 ret = 0;
371out:
372 mutex_unlock(&tree->mutex);
373 return ret;
374#endif
375}
376 369
370/*
371 * find an ordered extent corresponding to file_offset. return NULL if
372 * nothing is found, otherwise take a reference on the extent and return it
373 */
377struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, 374struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
378 u64 file_offset) 375 u64 file_offset)
379{ 376{
@@ -397,6 +394,10 @@ out:
397 return entry; 394 return entry;
398} 395}
399 396
397/*
398 * lookup and return any extent before 'file_offset'. NULL is returned
399 * if none is found
400 */
400struct btrfs_ordered_extent * 401struct btrfs_ordered_extent *
401btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset) 402btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset)
402{ 403{
@@ -417,6 +418,10 @@ out:
417 return entry; 418 return entry;
418} 419}
419 420
421/*
422 * After an extent is done, call this to conditionally update the on disk
423 * i_size. i_size is updated to cover any fully written part of the file.
424 */
420int btrfs_ordered_update_i_size(struct inode *inode, 425int btrfs_ordered_update_i_size(struct inode *inode,
421 struct btrfs_ordered_extent *ordered) 426 struct btrfs_ordered_extent *ordered)
422{ 427{
@@ -507,6 +512,11 @@ out:
507 return 0; 512 return 0;
508} 513}
509 514
515/*
516 * search the ordered extents for one corresponding to 'offset' and
517 * try to find a checksum. This is used because we allow pages to
518 * be reclaimed before their checksum is actually put into the btree
519 */
510int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum) 520int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum)
511{ 521{
512 struct btrfs_ordered_sum *ordered_sum; 522 struct btrfs_ordered_sum *ordered_sum;
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 33f0d9e91b11..98f491d1022b 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -19,12 +19,19 @@
19#ifndef __BTRFS_ORDERED_DATA__ 19#ifndef __BTRFS_ORDERED_DATA__
20#define __BTRFS_ORDERED_DATA__ 20#define __BTRFS_ORDERED_DATA__
21 21
22/* one of these per inode */
22struct btrfs_ordered_inode_tree { 23struct btrfs_ordered_inode_tree {
23 struct mutex mutex; 24 struct mutex mutex;
24 struct rb_root tree; 25 struct rb_root tree;
25 struct rb_node *last; 26 struct rb_node *last;
26}; 27};
27 28
29/*
30 * these are used to collect checksums done just before bios submission.
31 * They are attached via a list into the ordered extent, and
32 * checksum items are inserted into the tree after all the blocks in
33 * the ordered extent are on disk
34 */
28struct btrfs_sector_sum { 35struct btrfs_sector_sum {
29 u64 offset; 36 u64 offset;
30 u32 sum; 37 u32 sum;
@@ -34,27 +41,56 @@ struct btrfs_ordered_sum {
34 u64 file_offset; 41 u64 file_offset;
35 u64 len; 42 u64 len;
36 struct list_head list; 43 struct list_head list;
44 /* last field is a variable length array of btrfs_sector_sums */
37 struct btrfs_sector_sum sums; 45 struct btrfs_sector_sum sums;
38}; 46};
39 47
40/* bits for the flags field */ 48/*
49 * bits for the flags field:
50 *
51 * BTRFS_ORDERED_IO_DONE is set when all of the blocks are written.
52 * It is used to make sure metadata is inserted into the tree only once
53 * per extent.
54 *
55 * BTRFS_ORDERED_COMPLETE is set when the extent is removed from the
56 * rbtree, just before waking any waiters. It is used to indicate the
57 * IO is done and any metadata is inserted into the tree.
58 */
41#define BTRFS_ORDERED_IO_DONE 0 /* set when all the pages are written */ 59#define BTRFS_ORDERED_IO_DONE 0 /* set when all the pages are written */
60
42#define BTRFS_ORDERED_COMPLETE 1 /* set when removed from the tree */ 61#define BTRFS_ORDERED_COMPLETE 1 /* set when removed from the tree */
43#define BTRFS_ORDERED_START 2 /* set when tree setup */
44 62
45struct btrfs_ordered_extent { 63struct btrfs_ordered_extent {
64 /* logical offset in the file */
46 u64 file_offset; 65 u64 file_offset;
66
67 /* disk byte number */
47 u64 start; 68 u64 start;
69
70 /* length of the extent in bytes */
48 u64 len; 71 u64 len;
72
73 /* flags (described above) */
49 unsigned long flags; 74 unsigned long flags;
75
76 /* reference count */
50 atomic_t refs; 77 atomic_t refs;
78
79 /* list of checksums for insertion when the extent io is done */
51 struct list_head list; 80 struct list_head list;
52 struct inode *inode; 81
82 /* used to wait for the BTRFS_ORDERED_COMPLETE bit */
53 wait_queue_head_t wait; 83 wait_queue_head_t wait;
84
85 /* our friendly rbtree entry */
54 struct rb_node rb_node; 86 struct rb_node rb_node;
55}; 87};
56 88
57 89
90/*
91 * calculates the total size you need to allocate for an ordered sum
92 * structure spanning 'bytes' in the file
93 */
58static inline int btrfs_ordered_sum_size(struct btrfs_root *root, u64 bytes) 94static inline int btrfs_ordered_sum_size(struct btrfs_root *root, u64 bytes)
59{ 95{
60 unsigned long num_sectors = (bytes + root->sectorsize - 1) / 96 unsigned long num_sectors = (bytes + root->sectorsize - 1) /
@@ -81,14 +117,11 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
81int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_sum *sum); 117int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_sum *sum);
82struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, 118struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
83 u64 file_offset); 119 u64 file_offset);
84void btrfs_wait_ordered_extent(struct inode *inode, 120void btrfs_start_ordered_extent(struct inode *inode,
85 struct btrfs_ordered_extent *entry); 121 struct btrfs_ordered_extent *entry, int wait);
86void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); 122void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
87struct btrfs_ordered_extent * 123struct btrfs_ordered_extent *
88btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); 124btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
89int btrfs_add_ordered_pending(struct inode *inode,
90 struct btrfs_ordered_extent *ordered,
91 u64 start, u64 len);
92int btrfs_ordered_update_i_size(struct inode *inode, 125int btrfs_ordered_update_i_size(struct inode *inode,
93 struct btrfs_ordered_extent *ordered); 126 struct btrfs_ordered_extent *ordered);
94int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum); 127int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum);