aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/file.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r--fs/btrfs/file.c302
1 files changed, 280 insertions, 22 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 75899a01dded..c6a22d783c35 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -40,6 +40,263 @@
40#include "locking.h" 40#include "locking.h"
41#include "compat.h" 41#include "compat.h"
42 42
43/*
44 * when auto defrag is enabled we
45 * queue up these defrag structs to remember which
46 * inodes need defragging passes
47 */
48struct inode_defrag {
49 struct rb_node rb_node;
50 /* objectid */
51 u64 ino;
52 /*
53 * transid where the defrag was added, we search for
54 * extents newer than this
55 */
56 u64 transid;
57
58 /* root objectid */
59 u64 root;
60
61 /* last offset we were able to defrag */
62 u64 last_offset;
63
64 /* if we've wrapped around back to zero once already */
65 int cycled;
66};
67
68/* pop a record for an inode into the defrag tree. The lock
69 * must be held already
70 *
71 * If you're inserting a record for an older transid than an
72 * existing record, the transid already in the tree is lowered
73 *
74 * If an existing record is found the defrag item you
75 * pass in is freed
76 */
77static int __btrfs_add_inode_defrag(struct inode *inode,
78 struct inode_defrag *defrag)
79{
80 struct btrfs_root *root = BTRFS_I(inode)->root;
81 struct inode_defrag *entry;
82 struct rb_node **p;
83 struct rb_node *parent = NULL;
84
85 p = &root->fs_info->defrag_inodes.rb_node;
86 while (*p) {
87 parent = *p;
88 entry = rb_entry(parent, struct inode_defrag, rb_node);
89
90 if (defrag->ino < entry->ino)
91 p = &parent->rb_left;
92 else if (defrag->ino > entry->ino)
93 p = &parent->rb_right;
94 else {
95 /* if we're reinserting an entry for
96 * an old defrag run, make sure to
97 * lower the transid of our existing record
98 */
99 if (defrag->transid < entry->transid)
100 entry->transid = defrag->transid;
101 if (defrag->last_offset > entry->last_offset)
102 entry->last_offset = defrag->last_offset;
103 goto exists;
104 }
105 }
106 BTRFS_I(inode)->in_defrag = 1;
107 rb_link_node(&defrag->rb_node, parent, p);
108 rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes);
109 return 0;
110
111exists:
112 kfree(defrag);
113 return 0;
114
115}
116
117/*
118 * insert a defrag record for this inode if auto defrag is
119 * enabled
120 */
121int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
122 struct inode *inode)
123{
124 struct btrfs_root *root = BTRFS_I(inode)->root;
125 struct inode_defrag *defrag;
126 int ret = 0;
127 u64 transid;
128
129 if (!btrfs_test_opt(root, AUTO_DEFRAG))
130 return 0;
131
132 if (root->fs_info->closing)
133 return 0;
134
135 if (BTRFS_I(inode)->in_defrag)
136 return 0;
137
138 if (trans)
139 transid = trans->transid;
140 else
141 transid = BTRFS_I(inode)->root->last_trans;
142
143 defrag = kzalloc(sizeof(*defrag), GFP_NOFS);
144 if (!defrag)
145 return -ENOMEM;
146
147 defrag->ino = inode->i_ino;
148 defrag->transid = transid;
149 defrag->root = root->root_key.objectid;
150
151 spin_lock(&root->fs_info->defrag_inodes_lock);
152 if (!BTRFS_I(inode)->in_defrag)
153 ret = __btrfs_add_inode_defrag(inode, defrag);
154 spin_unlock(&root->fs_info->defrag_inodes_lock);
155 return ret;
156}
157
158/*
159 * must be called with the defrag_inodes lock held
160 */
161struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, u64 ino,
162 struct rb_node **next)
163{
164 struct inode_defrag *entry = NULL;
165 struct rb_node *p;
166 struct rb_node *parent = NULL;
167
168 p = info->defrag_inodes.rb_node;
169 while (p) {
170 parent = p;
171 entry = rb_entry(parent, struct inode_defrag, rb_node);
172
173 if (ino < entry->ino)
174 p = parent->rb_left;
175 else if (ino > entry->ino)
176 p = parent->rb_right;
177 else
178 return entry;
179 }
180
181 if (next) {
182 while (parent && ino > entry->ino) {
183 parent = rb_next(parent);
184 entry = rb_entry(parent, struct inode_defrag, rb_node);
185 }
186 *next = parent;
187 }
188 return NULL;
189}
190
191/*
192 * run through the list of inodes in the FS that need
193 * defragging
194 */
195int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
196{
197 struct inode_defrag *defrag;
198 struct btrfs_root *inode_root;
199 struct inode *inode;
200 struct rb_node *n;
201 struct btrfs_key key;
202 struct btrfs_ioctl_defrag_range_args range;
203 u64 first_ino = 0;
204 int num_defrag;
205 int defrag_batch = 1024;
206
207 memset(&range, 0, sizeof(range));
208 range.len = (u64)-1;
209
210 atomic_inc(&fs_info->defrag_running);
211 spin_lock(&fs_info->defrag_inodes_lock);
212 while(1) {
213 n = NULL;
214
215 /* find an inode to defrag */
216 defrag = btrfs_find_defrag_inode(fs_info, first_ino, &n);
217 if (!defrag) {
218 if (n)
219 defrag = rb_entry(n, struct inode_defrag, rb_node);
220 else if (first_ino) {
221 first_ino = 0;
222 continue;
223 } else {
224 break;
225 }
226 }
227
228 /* remove it from the rbtree */
229 first_ino = defrag->ino + 1;
230 rb_erase(&defrag->rb_node, &fs_info->defrag_inodes);
231
232 if (fs_info->closing)
233 goto next_free;
234
235 spin_unlock(&fs_info->defrag_inodes_lock);
236
237 /* get the inode */
238 key.objectid = defrag->root;
239 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
240 key.offset = (u64)-1;
241 inode_root = btrfs_read_fs_root_no_name(fs_info, &key);
242 if (IS_ERR(inode_root))
243 goto next;
244
245 key.objectid = defrag->ino;
246 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
247 key.offset = 0;
248
249 inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL);
250 if (IS_ERR(inode))
251 goto next;
252
253 /* do a chunk of defrag */
254 BTRFS_I(inode)->in_defrag = 0;
255 range.start = defrag->last_offset;
256 num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
257 defrag_batch);
258 /*
259 * if we filled the whole defrag batch, there
260 * must be more work to do. Queue this defrag
261 * again
262 */
263 if (num_defrag == defrag_batch) {
264 defrag->last_offset = range.start;
265 __btrfs_add_inode_defrag(inode, defrag);
266 /*
267 * we don't want to kfree defrag, we added it back to
268 * the rbtree
269 */
270 defrag = NULL;
271 } else if (defrag->last_offset && !defrag->cycled) {
272 /*
273 * we didn't fill our defrag batch, but
274 * we didn't start at zero. Make sure we loop
275 * around to the start of the file.
276 */
277 defrag->last_offset = 0;
278 defrag->cycled = 1;
279 __btrfs_add_inode_defrag(inode, defrag);
280 defrag = NULL;
281 }
282
283 iput(inode);
284next:
285 spin_lock(&fs_info->defrag_inodes_lock);
286next_free:
287 kfree(defrag);
288 }
289 spin_unlock(&fs_info->defrag_inodes_lock);
290
291 atomic_dec(&fs_info->defrag_running);
292
293 /*
294 * during unmount, we use the transaction_wait queue to
295 * wait for the defragger to stop
296 */
297 wake_up(&fs_info->transaction_wait);
298 return 0;
299}
43 300
44/* simple helper to fault in pages and copy. This should go away 301/* simple helper to fault in pages and copy. This should go away
45 * and be replaced with calls into generic code. 302 * and be replaced with calls into generic code.
@@ -191,9 +448,9 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
191 } 448 }
192 while (1) { 449 while (1) {
193 if (!split) 450 if (!split)
194 split = alloc_extent_map(GFP_NOFS); 451 split = alloc_extent_map();
195 if (!split2) 452 if (!split2)
196 split2 = alloc_extent_map(GFP_NOFS); 453 split2 = alloc_extent_map();
197 BUG_ON(!split || !split2); 454 BUG_ON(!split || !split2);
198 455
199 write_lock(&em_tree->lock); 456 write_lock(&em_tree->lock);
@@ -298,6 +555,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
298 struct btrfs_path *path; 555 struct btrfs_path *path;
299 struct btrfs_key key; 556 struct btrfs_key key;
300 struct btrfs_key new_key; 557 struct btrfs_key new_key;
558 u64 ino = btrfs_ino(inode);
301 u64 search_start = start; 559 u64 search_start = start;
302 u64 disk_bytenr = 0; 560 u64 disk_bytenr = 0;
303 u64 num_bytes = 0; 561 u64 num_bytes = 0;
@@ -318,14 +576,14 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
318 576
319 while (1) { 577 while (1) {
320 recow = 0; 578 recow = 0;
321 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, 579 ret = btrfs_lookup_file_extent(trans, root, path, ino,
322 search_start, -1); 580 search_start, -1);
323 if (ret < 0) 581 if (ret < 0)
324 break; 582 break;
325 if (ret > 0 && path->slots[0] > 0 && search_start == start) { 583 if (ret > 0 && path->slots[0] > 0 && search_start == start) {
326 leaf = path->nodes[0]; 584 leaf = path->nodes[0];
327 btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1); 585 btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
328 if (key.objectid == inode->i_ino && 586 if (key.objectid == ino &&
329 key.type == BTRFS_EXTENT_DATA_KEY) 587 key.type == BTRFS_EXTENT_DATA_KEY)
330 path->slots[0]--; 588 path->slots[0]--;
331 } 589 }
@@ -346,7 +604,7 @@ next_slot:
346 } 604 }
347 605
348 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 606 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
349 if (key.objectid > inode->i_ino || 607 if (key.objectid > ino ||
350 key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end) 608 key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
351 break; 609 break;
352 610
@@ -376,7 +634,7 @@ next_slot:
376 634
377 search_start = max(key.offset, start); 635 search_start = max(key.offset, start);
378 if (recow) { 636 if (recow) {
379 btrfs_release_path(root, path); 637 btrfs_release_path(path);
380 continue; 638 continue;
381 } 639 }
382 640
@@ -393,7 +651,7 @@ next_slot:
393 ret = btrfs_duplicate_item(trans, root, path, 651 ret = btrfs_duplicate_item(trans, root, path,
394 &new_key); 652 &new_key);
395 if (ret == -EAGAIN) { 653 if (ret == -EAGAIN) {
396 btrfs_release_path(root, path); 654 btrfs_release_path(path);
397 continue; 655 continue;
398 } 656 }
399 if (ret < 0) 657 if (ret < 0)
@@ -516,7 +774,7 @@ next_slot:
516 del_nr = 0; 774 del_nr = 0;
517 del_slot = 0; 775 del_slot = 0;
518 776
519 btrfs_release_path(root, path); 777 btrfs_release_path(path);
520 continue; 778 continue;
521 } 779 }
522 780
@@ -592,6 +850,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
592 int del_slot = 0; 850 int del_slot = 0;
593 int recow; 851 int recow;
594 int ret; 852 int ret;
853 u64 ino = btrfs_ino(inode);
595 854
596 btrfs_drop_extent_cache(inode, start, end - 1, 0); 855 btrfs_drop_extent_cache(inode, start, end - 1, 0);
597 856
@@ -600,7 +859,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
600again: 859again:
601 recow = 0; 860 recow = 0;
602 split = start; 861 split = start;
603 key.objectid = inode->i_ino; 862 key.objectid = ino;
604 key.type = BTRFS_EXTENT_DATA_KEY; 863 key.type = BTRFS_EXTENT_DATA_KEY;
605 key.offset = split; 864 key.offset = split;
606 865
@@ -612,8 +871,7 @@ again:
612 871
613 leaf = path->nodes[0]; 872 leaf = path->nodes[0];
614 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 873 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
615 BUG_ON(key.objectid != inode->i_ino || 874 BUG_ON(key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY);
616 key.type != BTRFS_EXTENT_DATA_KEY);
617 fi = btrfs_item_ptr(leaf, path->slots[0], 875 fi = btrfs_item_ptr(leaf, path->slots[0],
618 struct btrfs_file_extent_item); 876 struct btrfs_file_extent_item);
619 BUG_ON(btrfs_file_extent_type(leaf, fi) != 877 BUG_ON(btrfs_file_extent_type(leaf, fi) !=
@@ -630,7 +888,7 @@ again:
630 other_start = 0; 888 other_start = 0;
631 other_end = start; 889 other_end = start;
632 if (extent_mergeable(leaf, path->slots[0] - 1, 890 if (extent_mergeable(leaf, path->slots[0] - 1,
633 inode->i_ino, bytenr, orig_offset, 891 ino, bytenr, orig_offset,
634 &other_start, &other_end)) { 892 &other_start, &other_end)) {
635 new_key.offset = end; 893 new_key.offset = end;
636 btrfs_set_item_key_safe(trans, root, path, &new_key); 894 btrfs_set_item_key_safe(trans, root, path, &new_key);
@@ -653,7 +911,7 @@ again:
653 other_start = end; 911 other_start = end;
654 other_end = 0; 912 other_end = 0;
655 if (extent_mergeable(leaf, path->slots[0] + 1, 913 if (extent_mergeable(leaf, path->slots[0] + 1,
656 inode->i_ino, bytenr, orig_offset, 914 ino, bytenr, orig_offset,
657 &other_start, &other_end)) { 915 &other_start, &other_end)) {
658 fi = btrfs_item_ptr(leaf, path->slots[0], 916 fi = btrfs_item_ptr(leaf, path->slots[0],
659 struct btrfs_file_extent_item); 917 struct btrfs_file_extent_item);
@@ -681,7 +939,7 @@ again:
681 new_key.offset = split; 939 new_key.offset = split;
682 ret = btrfs_duplicate_item(trans, root, path, &new_key); 940 ret = btrfs_duplicate_item(trans, root, path, &new_key);
683 if (ret == -EAGAIN) { 941 if (ret == -EAGAIN) {
684 btrfs_release_path(root, path); 942 btrfs_release_path(path);
685 goto again; 943 goto again;
686 } 944 }
687 BUG_ON(ret < 0); 945 BUG_ON(ret < 0);
@@ -702,7 +960,7 @@ again:
702 960
703 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, 961 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
704 root->root_key.objectid, 962 root->root_key.objectid,
705 inode->i_ino, orig_offset); 963 ino, orig_offset);
706 BUG_ON(ret); 964 BUG_ON(ret);
707 965
708 if (split == start) { 966 if (split == start) {
@@ -718,10 +976,10 @@ again:
718 other_start = end; 976 other_start = end;
719 other_end = 0; 977 other_end = 0;
720 if (extent_mergeable(leaf, path->slots[0] + 1, 978 if (extent_mergeable(leaf, path->slots[0] + 1,
721 inode->i_ino, bytenr, orig_offset, 979 ino, bytenr, orig_offset,
722 &other_start, &other_end)) { 980 &other_start, &other_end)) {
723 if (recow) { 981 if (recow) {
724 btrfs_release_path(root, path); 982 btrfs_release_path(path);
725 goto again; 983 goto again;
726 } 984 }
727 extent_end = other_end; 985 extent_end = other_end;
@@ -729,16 +987,16 @@ again:
729 del_nr++; 987 del_nr++;
730 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 988 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
731 0, root->root_key.objectid, 989 0, root->root_key.objectid,
732 inode->i_ino, orig_offset); 990 ino, orig_offset);
733 BUG_ON(ret); 991 BUG_ON(ret);
734 } 992 }
735 other_start = 0; 993 other_start = 0;
736 other_end = start; 994 other_end = start;
737 if (extent_mergeable(leaf, path->slots[0] - 1, 995 if (extent_mergeable(leaf, path->slots[0] - 1,
738 inode->i_ino, bytenr, orig_offset, 996 ino, bytenr, orig_offset,
739 &other_start, &other_end)) { 997 &other_start, &other_end)) {
740 if (recow) { 998 if (recow) {
741 btrfs_release_path(root, path); 999 btrfs_release_path(path);
742 goto again; 1000 goto again;
743 } 1001 }
744 key.offset = other_start; 1002 key.offset = other_start;
@@ -746,7 +1004,7 @@ again:
746 del_nr++; 1004 del_nr++;
747 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 1005 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
748 0, root->root_key.objectid, 1006 0, root->root_key.objectid,
749 inode->i_ino, orig_offset); 1007 ino, orig_offset);
750 BUG_ON(ret); 1008 BUG_ON(ret);
751 } 1009 }
752 if (del_nr == 0) { 1010 if (del_nr == 0) {
@@ -1375,7 +1633,7 @@ static long btrfs_fallocate(struct file *file, int mode,
1375 while (1) { 1633 while (1) {
1376 em = btrfs_get_extent(inode, NULL, 0, cur_offset, 1634 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
1377 alloc_end - cur_offset, 0); 1635 alloc_end - cur_offset, 0);
1378 BUG_ON(IS_ERR(em) || !em); 1636 BUG_ON(IS_ERR_OR_NULL(em));
1379 last_byte = min(extent_map_end(em), alloc_end); 1637 last_byte = min(extent_map_end(em), alloc_end);
1380 last_byte = (last_byte + mask) & ~mask; 1638 last_byte = (last_byte + mask) & ~mask;
1381 if (em->block_start == EXTENT_MAP_HOLE || 1639 if (em->block_start == EXTENT_MAP_HOLE ||