aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/Makefile4
-rw-r--r--fs/btrfs/TODO1
-rw-r--r--fs/btrfs/ctree.h25
-rw-r--r--fs/btrfs/disk-io.c46
-rw-r--r--fs/btrfs/file.c783
-rw-r--r--fs/btrfs/inode.c2210
-rw-r--r--fs/btrfs/super.c2879
7 files changed, 3070 insertions, 2878 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 5346f706b2c5..5af2426116d3 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -4,7 +4,7 @@ ifneq ($(KERNELRELEASE),)
4obj-m := btrfs.o 4obj-m := btrfs.o
5btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ 5btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
6 hash.o file-item.o inode-item.o inode-map.o disk-io.o \ 6 hash.o file-item.o inode-item.o inode-map.o disk-io.o \
7 transaction.o bit-radix.o 7 transaction.o bit-radix.o inode.o file.o
8 8
9#btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ 9#btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \
10# root-tree.o dir-item.o hash.o file-item.o inode-item.o \ 10# root-tree.o dir-item.o hash.o file-item.o inode-item.o \
@@ -16,7 +16,7 @@ else
16 16
17KERNELDIR := /lib/modules/`uname -r`/build 17KERNELDIR := /lib/modules/`uname -r`/build
18all:: 18all::
19 $(MAKE) C=1 -C $(KERNELDIR) M=`pwd` modules 19 $(MAKE) -C $(KERNELDIR) M=`pwd` modules
20clean:: 20clean::
21 rm *.o btrfs.ko 21 rm *.o btrfs.ko
22endif 22endif
diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO
index d9b6d38c603a..4a48a2365d57 100644
--- a/fs/btrfs/TODO
+++ b/fs/btrfs/TODO
@@ -1,3 +1,4 @@
1* fix printk warnings
1* cleanup, add more error checking, get rid of BUG_ONs 2* cleanup, add more error checking, get rid of BUG_ONs
2* Fix ENOSPC handling 3* Fix ENOSPC handling
3* Make allocator smarter 4* Make allocator smarter
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4e136b7b03ae..24410d42e3ce 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1123,5 +1123,28 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
1123int btrfs_csum_truncate(struct btrfs_trans_handle *trans, 1123int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
1124 struct btrfs_root *root, struct btrfs_path *path, 1124 struct btrfs_root *root, struct btrfs_path *path,
1125 u64 isize); 1125 u64 isize);
1126/* super.c */ 1126/* inode.c */
1127void btrfs_delete_inode(struct inode *inode);
1128void btrfs_read_locked_inode(struct inode *inode);
1129int btrfs_write_inode(struct inode *inode, int wait);
1130void btrfs_dirty_inode(struct inode *inode);
1131struct inode *btrfs_alloc_inode(struct super_block *sb);
1132void btrfs_destroy_inode(struct inode *inode);
1133int btrfs_init_cachep(void);
1134void btrfs_destroy_cachep(void);
1135int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
1136 unsigned long arg);
1137long btrfs_compat_ioctl(struct file *file, unsigned int cmd,
1138 unsigned long arg);
1139struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
1140 struct btrfs_root *root);
1141int btrfs_commit_write(struct file *file, struct page *page,
1142 unsigned from, unsigned to);
1143int btrfs_get_block(struct inode *inode, sector_t iblock,
1144 struct buffer_head *result, int create);
1145/* file.c */
1146extern struct file_operations btrfs_file_operations;
1147int btrfs_drop_extents(struct btrfs_trans_handle *trans,
1148 struct btrfs_root *root, struct inode *inode,
1149 u64 start, u64 end, u64 *hint_block);
1127#endif 1150#endif
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8a88404525e8..96bf3ef3a798 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -22,7 +22,7 @@ static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf)
22 if (bh_blocknr(buf) != btrfs_header_blocknr(&node->header)) { 22 if (bh_blocknr(buf) != btrfs_header_blocknr(&node->header)) {
23 printk(KERN_CRIT "bh_blocknr(buf) is %Lu, header is %Lu\n", 23 printk(KERN_CRIT "bh_blocknr(buf) is %Lu, header is %Lu\n",
24 bh_blocknr(buf), btrfs_header_blocknr(&node->header)); 24 bh_blocknr(buf), btrfs_header_blocknr(&node->header));
25 BUG(); 25 return 1;
26 } 26 }
27 return 0; 27 return 0;
28} 28}
@@ -253,7 +253,7 @@ uptodate:
253 set_buffer_checked(bh); 253 set_buffer_checked(bh);
254 } 254 }
255 if (check_tree_block(root, bh)) 255 if (check_tree_block(root, bh))
256 BUG(); 256 goto fail;
257 return bh; 257 return bh;
258fail: 258fail:
259 brelse(bh); 259 brelse(bh);
@@ -398,8 +398,13 @@ struct btrfs_root *open_ctree(struct super_block *sb)
398 struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info), 398 struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info),
399 GFP_NOFS); 399 GFP_NOFS);
400 int ret; 400 int ret;
401 int err = -EIO;
401 struct btrfs_super_block *disk_super; 402 struct btrfs_super_block *disk_super;
402 403
404 if (!extent_root || !tree_root || !fs_info) {
405 err = -ENOMEM;
406 goto fail;
407 }
403 init_bit_radix(&fs_info->pinned_radix); 408 init_bit_radix(&fs_info->pinned_radix);
404 init_bit_radix(&fs_info->pending_del_radix); 409 init_bit_radix(&fs_info->pending_del_radix);
405 init_bit_radix(&fs_info->extent_map_radix); 410 init_bit_radix(&fs_info->extent_map_radix);
@@ -431,9 +436,11 @@ struct btrfs_root *open_ctree(struct super_block *sb)
431 mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); 436 mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
432 fs_info->hash_tfm = crypto_alloc_hash("crc32c", 0, CRYPTO_ALG_ASYNC); 437 fs_info->hash_tfm = crypto_alloc_hash("crc32c", 0, CRYPTO_ALG_ASYNC);
433 spin_lock_init(&fs_info->hash_lock); 438 spin_lock_init(&fs_info->hash_lock);
439
434 if (!fs_info->hash_tfm || IS_ERR(fs_info->hash_tfm)) { 440 if (!fs_info->hash_tfm || IS_ERR(fs_info->hash_tfm)) {
435 printk("failed to allocate digest hash\n"); 441 printk("btrfs: failed hash setup, modprobe cryptomgr?\n");
436 return NULL; 442 err = -ENOMEM;
443 goto fail_iput;
437 } 444 }
438 mutex_init(&fs_info->trans_mutex); 445 mutex_init(&fs_info->trans_mutex);
439 mutex_init(&fs_info->fs_mutex); 446 mutex_init(&fs_info->fs_mutex);
@@ -446,30 +453,53 @@ struct btrfs_root *open_ctree(struct super_block *sb)
446 sb->s_blocksize); 453 sb->s_blocksize);
447 454
448 if (!fs_info->sb_buffer) 455 if (!fs_info->sb_buffer)
449 return NULL; 456 goto fail_iput;
450 disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data; 457 disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data;
458
451 if (!btrfs_super_root(disk_super)) 459 if (!btrfs_super_root(disk_super))
452 return NULL; 460 goto fail_sb_buffer;
453 461
454 i_size_write(fs_info->btree_inode, 462 i_size_write(fs_info->btree_inode,
455 btrfs_super_total_blocks(disk_super) << 463 btrfs_super_total_blocks(disk_super) <<
456 fs_info->btree_inode->i_blkbits); 464 fs_info->btree_inode->i_blkbits);
457 465
458 fs_info->disk_super = disk_super; 466 fs_info->disk_super = disk_super;
467
468 if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
469 sizeof(disk_super->magic))) {
470 printk("btrfs: valid FS not found on %s\n", sb->s_id);
471 goto fail_sb_buffer;
472 }
459 tree_root->node = read_tree_block(tree_root, 473 tree_root->node = read_tree_block(tree_root,
460 btrfs_super_root(disk_super)); 474 btrfs_super_root(disk_super));
461 BUG_ON(!tree_root->node); 475 if (!tree_root->node)
476 goto fail_sb_buffer;
462 477
463 mutex_lock(&fs_info->fs_mutex); 478 mutex_lock(&fs_info->fs_mutex);
464 ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info, 479 ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info,
465 BTRFS_EXTENT_TREE_OBJECTID, extent_root); 480 BTRFS_EXTENT_TREE_OBJECTID, extent_root);
466 BUG_ON(ret); 481 if (ret) {
482 mutex_unlock(&fs_info->fs_mutex);
483 goto fail_tree_root;
484 }
467 485
468 btrfs_read_block_groups(extent_root); 486 btrfs_read_block_groups(extent_root);
469 487
470 fs_info->generation = btrfs_super_generation(disk_super) + 1; 488 fs_info->generation = btrfs_super_generation(disk_super) + 1;
471 mutex_unlock(&fs_info->fs_mutex); 489 mutex_unlock(&fs_info->fs_mutex);
472 return tree_root; 490 return tree_root;
491
492fail_tree_root:
493 btrfs_block_release(tree_root, tree_root->node);
494fail_sb_buffer:
495 btrfs_block_release(tree_root, fs_info->sb_buffer);
496fail_iput:
497 iput(fs_info->btree_inode);
498fail:
499 kfree(extent_root);
500 kfree(tree_root);
501 kfree(fs_info);
502 return ERR_PTR(err);
473} 503}
474 504
475int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root 505int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
new file mode 100644
index 000000000000..7f8e3035d99e
--- /dev/null
+++ b/fs/btrfs/file.c
@@ -0,0 +1,783 @@
1#include <linux/module.h>
2#include <linux/buffer_head.h>
3#include <linux/fs.h>
4#include <linux/pagemap.h>
5#include <linux/highmem.h>
6#include <linux/time.h>
7#include <linux/init.h>
8#include <linux/string.h>
9#include <linux/smp_lock.h>
10#include <linux/backing-dev.h>
11#include <linux/mpage.h>
12#include <linux/swap.h>
13#include <linux/writeback.h>
14#include <linux/statfs.h>
15#include <linux/compat.h>
16#include "ctree.h"
17#include "disk-io.h"
18#include "transaction.h"
19#include "btrfs_inode.h"
20#include "ioctl.h"
21#include "print-tree.h"
22
23
24static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes,
25 struct page **prepared_pages,
26 const char __user * buf)
27{
28 long page_fault = 0;
29 int i;
30 int offset = pos & (PAGE_CACHE_SIZE - 1);
31
32 for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
33 size_t count = min_t(size_t,
34 PAGE_CACHE_SIZE - offset, write_bytes);
35 struct page *page = prepared_pages[i];
36 fault_in_pages_readable(buf, count);
37
38 /* Copy data from userspace to the current page */
39 kmap(page);
40 page_fault = __copy_from_user(page_address(page) + offset,
41 buf, count);
42 /* Flush processor's dcache for this page */
43 flush_dcache_page(page);
44 kunmap(page);
45 buf += count;
46 write_bytes -= count;
47
48 if (page_fault)
49 break;
50 }
51 return page_fault ? -EFAULT : 0;
52}
53
54static void btrfs_drop_pages(struct page **pages, size_t num_pages)
55{
56 size_t i;
57 for (i = 0; i < num_pages; i++) {
58 if (!pages[i])
59 break;
60 unlock_page(pages[i]);
61 mark_page_accessed(pages[i]);
62 page_cache_release(pages[i]);
63 }
64}
65
66static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
67 struct btrfs_root *root,
68 struct file *file,
69 struct page **pages,
70 size_t num_pages,
71 loff_t pos,
72 size_t write_bytes)
73{
74 int i;
75 int offset;
76 int err = 0;
77 int ret;
78 int this_write;
79 struct inode *inode = file->f_path.dentry->d_inode;
80 struct buffer_head *bh;
81 struct btrfs_file_extent_item *ei;
82
83 for (i = 0; i < num_pages; i++) {
84 offset = pos & (PAGE_CACHE_SIZE -1);
85 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
86 /* FIXME, one block at a time */
87
88 mutex_lock(&root->fs_info->fs_mutex);
89 trans = btrfs_start_transaction(root, 1);
90 btrfs_set_trans_block_group(trans, inode);
91
92 bh = page_buffers(pages[i]);
93
94 if (buffer_mapped(bh) && bh->b_blocknr == 0) {
95 struct btrfs_key key;
96 struct btrfs_path *path;
97 char *ptr;
98 u32 datasize;
99
100 /* create an inline extent, and copy the data in */
101 path = btrfs_alloc_path();
102 BUG_ON(!path);
103 key.objectid = inode->i_ino;
104 key.offset = pages[i]->index << PAGE_CACHE_SHIFT;
105 key.flags = 0;
106 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
107 BUG_ON(write_bytes >= PAGE_CACHE_SIZE);
108 datasize = offset +
109 btrfs_file_extent_calc_inline_size(write_bytes);
110
111 ret = btrfs_insert_empty_item(trans, root, path, &key,
112 datasize);
113 BUG_ON(ret);
114 ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
115 path->slots[0], struct btrfs_file_extent_item);
116 btrfs_set_file_extent_generation(ei, trans->transid);
117 btrfs_set_file_extent_type(ei,
118 BTRFS_FILE_EXTENT_INLINE);
119 ptr = btrfs_file_extent_inline_start(ei);
120 btrfs_memcpy(root, path->nodes[0]->b_data,
121 ptr, bh->b_data, offset + write_bytes);
122 mark_buffer_dirty(path->nodes[0]);
123 btrfs_free_path(path);
124 } else if (buffer_mapped(bh)) {
125 /* csum the file data */
126 btrfs_csum_file_block(trans, root, inode->i_ino,
127 pages[i]->index << PAGE_CACHE_SHIFT,
128 kmap(pages[i]), PAGE_CACHE_SIZE);
129 kunmap(pages[i]);
130 }
131 SetPageChecked(pages[i]);
132 ret = btrfs_end_transaction(trans, root);
133 BUG_ON(ret);
134 mutex_unlock(&root->fs_info->fs_mutex);
135
136 ret = btrfs_commit_write(file, pages[i], offset,
137 offset + this_write);
138 pos += this_write;
139 if (ret) {
140 err = ret;
141 goto failed;
142 }
143 WARN_ON(this_write > write_bytes);
144 write_bytes -= this_write;
145 }
146failed:
147 return err;
148}
149
150/*
151 * this is very complex, but the basic idea is to drop all extents
152 * in the range start - end. hint_block is filled in with a block number
153 * that would be a good hint to the block allocator for this file.
154 *
155 * If an extent intersects the range but is not entirely inside the range
156 * it is either truncated or split. Anything entirely inside the range
157 * is deleted from the tree.
158 */
159int btrfs_drop_extents(struct btrfs_trans_handle *trans,
160 struct btrfs_root *root, struct inode *inode,
161 u64 start, u64 end, u64 *hint_block)
162{
163 int ret;
164 struct btrfs_key key;
165 struct btrfs_leaf *leaf;
166 int slot;
167 struct btrfs_file_extent_item *extent;
168 u64 extent_end = 0;
169 int keep;
170 struct btrfs_file_extent_item old;
171 struct btrfs_path *path;
172 u64 search_start = start;
173 int bookend;
174 int found_type;
175 int found_extent;
176 int found_inline;
177
178 path = btrfs_alloc_path();
179 if (!path)
180 return -ENOMEM;
181 while(1) {
182 btrfs_release_path(root, path);
183 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
184 search_start, -1);
185 if (ret < 0)
186 goto out;
187 if (ret > 0) {
188 if (path->slots[0] == 0) {
189 ret = 0;
190 goto out;
191 }
192 path->slots[0]--;
193 }
194 keep = 0;
195 bookend = 0;
196 found_extent = 0;
197 found_inline = 0;
198 extent = NULL;
199 leaf = btrfs_buffer_leaf(path->nodes[0]);
200 slot = path->slots[0];
201 btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
202 if (key.offset >= end || key.objectid != inode->i_ino) {
203 ret = 0;
204 goto out;
205 }
206 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) {
207 ret = 0;
208 goto out;
209 }
210 extent = btrfs_item_ptr(leaf, slot,
211 struct btrfs_file_extent_item);
212 found_type = btrfs_file_extent_type(extent);
213 if (found_type == BTRFS_FILE_EXTENT_REG) {
214 extent_end = key.offset +
215 (btrfs_file_extent_num_blocks(extent) <<
216 inode->i_blkbits);
217 found_extent = 1;
218 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
219 found_inline = 1;
220 extent_end = key.offset +
221 btrfs_file_extent_inline_len(leaf->items + slot);
222 }
223
224 /* we found nothing we can drop */
225 if (!found_extent && !found_inline) {
226 ret = 0;
227 goto out;
228 }
229
230 /* we found nothing inside the range */
231 if (search_start >= extent_end) {
232 ret = 0;
233 goto out;
234 }
235
236 /* FIXME, there's only one inline extent allowed right now */
237 if (found_inline) {
238 u64 mask = root->blocksize - 1;
239 search_start = (extent_end + mask) & ~mask;
240 } else
241 search_start = extent_end;
242
243 if (end < extent_end && end >= key.offset) {
244 if (found_extent) {
245 u64 disk_blocknr =
246 btrfs_file_extent_disk_blocknr(extent);
247 u64 disk_num_blocks =
248 btrfs_file_extent_disk_num_blocks(extent);
249 memcpy(&old, extent, sizeof(old));
250 if (disk_blocknr != 0) {
251 ret = btrfs_inc_extent_ref(trans, root,
252 disk_blocknr, disk_num_blocks);
253 BUG_ON(ret);
254 }
255 }
256 WARN_ON(found_inline);
257 bookend = 1;
258 }
259
260 /* truncate existing extent */
261 if (start > key.offset) {
262 u64 new_num;
263 u64 old_num;
264 keep = 1;
265 WARN_ON(start & (root->blocksize - 1));
266 if (found_extent) {
267 new_num = (start - key.offset) >>
268 inode->i_blkbits;
269 old_num = btrfs_file_extent_num_blocks(extent);
270 *hint_block =
271 btrfs_file_extent_disk_blocknr(extent);
272 if (btrfs_file_extent_disk_blocknr(extent)) {
273 inode->i_blocks -=
274 (old_num - new_num) << 3;
275 }
276 btrfs_set_file_extent_num_blocks(extent,
277 new_num);
278 mark_buffer_dirty(path->nodes[0]);
279 } else {
280 WARN_ON(1);
281 }
282 }
283 /* delete the entire extent */
284 if (!keep) {
285 u64 disk_blocknr = 0;
286 u64 disk_num_blocks = 0;
287 u64 extent_num_blocks = 0;
288 if (found_extent) {
289 disk_blocknr =
290 btrfs_file_extent_disk_blocknr(extent);
291 disk_num_blocks =
292 btrfs_file_extent_disk_num_blocks(extent);
293 extent_num_blocks =
294 btrfs_file_extent_num_blocks(extent);
295 *hint_block =
296 btrfs_file_extent_disk_blocknr(extent);
297 }
298 ret = btrfs_del_item(trans, root, path);
299 BUG_ON(ret);
300 btrfs_release_path(root, path);
301 extent = NULL;
302 if (found_extent && disk_blocknr != 0) {
303 inode->i_blocks -= extent_num_blocks << 3;
304 ret = btrfs_free_extent(trans, root,
305 disk_blocknr,
306 disk_num_blocks, 0);
307 }
308
309 BUG_ON(ret);
310 if (!bookend && search_start >= end) {
311 ret = 0;
312 goto out;
313 }
314 if (!bookend)
315 continue;
316 }
317 /* create bookend, splitting the extent in two */
318 if (bookend && found_extent) {
319 struct btrfs_key ins;
320 ins.objectid = inode->i_ino;
321 ins.offset = end;
322 ins.flags = 0;
323 btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
324
325 btrfs_release_path(root, path);
326 ret = btrfs_insert_empty_item(trans, root, path, &ins,
327 sizeof(*extent));
328 BUG_ON(ret);
329 extent = btrfs_item_ptr(
330 btrfs_buffer_leaf(path->nodes[0]),
331 path->slots[0],
332 struct btrfs_file_extent_item);
333 btrfs_set_file_extent_disk_blocknr(extent,
334 btrfs_file_extent_disk_blocknr(&old));
335 btrfs_set_file_extent_disk_num_blocks(extent,
336 btrfs_file_extent_disk_num_blocks(&old));
337
338 btrfs_set_file_extent_offset(extent,
339 btrfs_file_extent_offset(&old) +
340 ((end - key.offset) >> inode->i_blkbits));
341 WARN_ON(btrfs_file_extent_num_blocks(&old) <
342 (extent_end - end) >> inode->i_blkbits);
343 btrfs_set_file_extent_num_blocks(extent,
344 (extent_end - end) >> inode->i_blkbits);
345
346 btrfs_set_file_extent_type(extent,
347 BTRFS_FILE_EXTENT_REG);
348 btrfs_set_file_extent_generation(extent,
349 btrfs_file_extent_generation(&old));
350 btrfs_mark_buffer_dirty(path->nodes[0]);
351 if (btrfs_file_extent_disk_blocknr(&old) != 0) {
352 inode->i_blocks +=
353 btrfs_file_extent_num_blocks(extent) << 3;
354 }
355 ret = 0;
356 goto out;
357 }
358 }
359out:
360 btrfs_free_path(path);
361 return ret;
362}
363
364/*
365 * this gets pages into the page cache and locks them down
366 */
367static int prepare_pages(struct btrfs_root *root,
368 struct file *file,
369 struct page **pages,
370 size_t num_pages,
371 loff_t pos,
372 unsigned long first_index,
373 unsigned long last_index,
374 size_t write_bytes,
375 u64 alloc_extent_start)
376{
377 int i;
378 unsigned long index = pos >> PAGE_CACHE_SHIFT;
379 struct inode *inode = file->f_path.dentry->d_inode;
380 int offset;
381 int err = 0;
382 int this_write;
383 struct buffer_head *bh;
384 struct buffer_head *head;
385 loff_t isize = i_size_read(inode);
386
387 memset(pages, 0, num_pages * sizeof(struct page *));
388
389 for (i = 0; i < num_pages; i++) {
390 pages[i] = grab_cache_page(inode->i_mapping, index + i);
391 if (!pages[i]) {
392 err = -ENOMEM;
393 goto failed_release;
394 }
395 cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
396 wait_on_page_writeback(pages[i]);
397 offset = pos & (PAGE_CACHE_SIZE -1);
398 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
399 if (!page_has_buffers(pages[i])) {
400 create_empty_buffers(pages[i],
401 root->fs_info->sb->s_blocksize,
402 (1 << BH_Uptodate));
403 }
404 head = page_buffers(pages[i]);
405 bh = head;
406 do {
407 err = btrfs_map_bh_to_logical(root, bh,
408 alloc_extent_start);
409 BUG_ON(err);
410 if (err)
411 goto failed_truncate;
412 bh = bh->b_this_page;
413 if (alloc_extent_start)
414 alloc_extent_start++;
415 } while (bh != head);
416 pos += this_write;
417 WARN_ON(this_write > write_bytes);
418 write_bytes -= this_write;
419 }
420 return 0;
421
422failed_release:
423 btrfs_drop_pages(pages, num_pages);
424 return err;
425
426failed_truncate:
427 btrfs_drop_pages(pages, num_pages);
428 if (pos > isize)
429 vmtruncate(inode, isize);
430 return err;
431}
432
433static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
434 size_t count, loff_t *ppos)
435{
436 loff_t pos;
437 size_t num_written = 0;
438 int err = 0;
439 int ret = 0;
440 struct inode *inode = file->f_path.dentry->d_inode;
441 struct btrfs_root *root = BTRFS_I(inode)->root;
442 struct page *pages[8];
443 struct page *pinned[2];
444 unsigned long first_index;
445 unsigned long last_index;
446 u64 start_pos;
447 u64 num_blocks;
448 u64 alloc_extent_start;
449 u64 hint_block;
450 struct btrfs_trans_handle *trans;
451 struct btrfs_key ins;
452 pinned[0] = NULL;
453 pinned[1] = NULL;
454 if (file->f_flags & O_DIRECT)
455 return -EINVAL;
456 pos = *ppos;
457 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
458 current->backing_dev_info = inode->i_mapping->backing_dev_info;
459 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
460 if (err)
461 goto out;
462 if (count == 0)
463 goto out;
464 err = remove_suid(file->f_path.dentry);
465 if (err)
466 goto out;
467 file_update_time(file);
468
469 start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
470 num_blocks = (count + pos - start_pos + root->blocksize - 1) >>
471 inode->i_blkbits;
472
473 mutex_lock(&inode->i_mutex);
474 first_index = pos >> PAGE_CACHE_SHIFT;
475 last_index = (pos + count) >> PAGE_CACHE_SHIFT;
476
477 /*
478 * there are lots of better ways to do this, but this code
479 * makes sure the first and last page in the file range are
480 * up to date and ready for cow
481 */
482 if ((pos & (PAGE_CACHE_SIZE - 1))) {
483 pinned[0] = grab_cache_page(inode->i_mapping, first_index);
484 if (!PageUptodate(pinned[0])) {
485 ret = mpage_readpage(pinned[0], btrfs_get_block);
486 BUG_ON(ret);
487 wait_on_page_locked(pinned[0]);
488 } else {
489 unlock_page(pinned[0]);
490 }
491 }
492 if ((pos + count) & (PAGE_CACHE_SIZE - 1)) {
493 pinned[1] = grab_cache_page(inode->i_mapping, last_index);
494 if (!PageUptodate(pinned[1])) {
495 ret = mpage_readpage(pinned[1], btrfs_get_block);
496 BUG_ON(ret);
497 wait_on_page_locked(pinned[1]);
498 } else {
499 unlock_page(pinned[1]);
500 }
501 }
502
503 mutex_lock(&root->fs_info->fs_mutex);
504 trans = btrfs_start_transaction(root, 1);
505 if (!trans) {
506 err = -ENOMEM;
507 mutex_unlock(&root->fs_info->fs_mutex);
508 goto out_unlock;
509 }
510 btrfs_set_trans_block_group(trans, inode);
511 /* FIXME blocksize != 4096 */
512 inode->i_blocks += num_blocks << 3;
513 hint_block = 0;
514
515 /* FIXME...EIEIO, ENOSPC and more */
516
517 /* step one, delete the existing extents in this range */
518 if (start_pos < inode->i_size) {
519 /* FIXME blocksize != pagesize */
520 ret = btrfs_drop_extents(trans, root, inode,
521 start_pos,
522 (pos + count + root->blocksize -1) &
523 ~((u64)root->blocksize - 1),
524 &hint_block);
525 BUG_ON(ret);
526 }
527
528 /* insert any holes we need to create */
529 if (inode->i_size < start_pos) {
530 u64 last_pos_in_file;
531 u64 hole_size;
532 u64 mask = root->blocksize - 1;
533 last_pos_in_file = (inode->i_size + mask) & ~mask;
534 hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
535 hole_size >>= inode->i_blkbits;
536 if (last_pos_in_file < start_pos) {
537 ret = btrfs_insert_file_extent(trans, root,
538 inode->i_ino,
539 last_pos_in_file,
540 0, 0, hole_size);
541 }
542 BUG_ON(ret);
543 }
544
545 /*
546 * either allocate an extent for the new bytes or setup the key
547 * to show we are doing inline data in the extent
548 */
549 if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size ||
550 pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
551 ret = btrfs_alloc_extent(trans, root, inode->i_ino,
552 num_blocks, hint_block, (u64)-1,
553 &ins, 1);
554 BUG_ON(ret);
555 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
556 start_pos, ins.objectid, ins.offset,
557 ins.offset);
558 BUG_ON(ret);
559 } else {
560 ins.offset = 0;
561 ins.objectid = 0;
562 }
563 BUG_ON(ret);
564 alloc_extent_start = ins.objectid;
565 ret = btrfs_end_transaction(trans, root);
566 mutex_unlock(&root->fs_info->fs_mutex);
567
568 while(count > 0) {
569 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
570 size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset);
571 size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
572 PAGE_CACHE_SHIFT;
573
574 memset(pages, 0, sizeof(pages));
575 ret = prepare_pages(root, file, pages, num_pages,
576 pos, first_index, last_index,
577 write_bytes, alloc_extent_start);
578 BUG_ON(ret);
579
580 /* FIXME blocks != pagesize */
581 if (alloc_extent_start)
582 alloc_extent_start += num_pages;
583 ret = btrfs_copy_from_user(pos, num_pages,
584 write_bytes, pages, buf);
585 BUG_ON(ret);
586
587 ret = dirty_and_release_pages(NULL, root, file, pages,
588 num_pages, pos, write_bytes);
589 BUG_ON(ret);
590 btrfs_drop_pages(pages, num_pages);
591
592 buf += write_bytes;
593 count -= write_bytes;
594 pos += write_bytes;
595 num_written += write_bytes;
596
597 balance_dirty_pages_ratelimited(inode->i_mapping);
598 btrfs_btree_balance_dirty(root);
599 cond_resched();
600 }
601out_unlock:
602 mutex_unlock(&inode->i_mutex);
603out:
604 if (pinned[0])
605 page_cache_release(pinned[0]);
606 if (pinned[1])
607 page_cache_release(pinned[1]);
608 *ppos = pos;
609 current->backing_dev_info = NULL;
610 mark_inode_dirty(inode);
611 return num_written ? num_written : err;
612}
613
614/*
615 * FIXME, do this by stuffing the csum we want in the info hanging off
616 * page->private. For now, verify file csums on read
617 */
618static int btrfs_read_actor(read_descriptor_t *desc, struct page *page,
619 unsigned long offset, unsigned long size)
620{
621 char *kaddr;
622 unsigned long left, count = desc->count;
623 struct inode *inode = page->mapping->host;
624
625 if (size > count)
626 size = count;
627
628 if (!PageChecked(page)) {
629 /* FIXME, do it per block */
630 struct btrfs_root *root = BTRFS_I(inode)->root;
631 int ret;
632 struct buffer_head *bh;
633
634 if (page_has_buffers(page)) {
635 bh = page_buffers(page);
636 if (!buffer_mapped(bh)) {
637 SetPageChecked(page);
638 goto checked;
639 }
640 }
641
642 ret = btrfs_csum_verify_file_block(root,
643 page->mapping->host->i_ino,
644 page->index << PAGE_CACHE_SHIFT,
645 kmap(page), PAGE_CACHE_SIZE);
646 if (ret) {
647 if (ret != -ENOENT) {
648 printk("failed to verify ino %lu page %lu ret %d\n",
649 page->mapping->host->i_ino,
650 page->index, ret);
651 memset(page_address(page), 1, PAGE_CACHE_SIZE);
652 flush_dcache_page(page);
653 }
654 }
655 SetPageChecked(page);
656 kunmap(page);
657 }
658checked:
659 /*
660 * Faults on the destination of a read are common, so do it before
661 * taking the kmap.
662 */
663 if (!fault_in_pages_writeable(desc->arg.buf, size)) {
664 kaddr = kmap_atomic(page, KM_USER0);
665 left = __copy_to_user_inatomic(desc->arg.buf,
666 kaddr + offset, size);
667 kunmap_atomic(kaddr, KM_USER0);
668 if (left == 0)
669 goto success;
670 }
671
672 /* Do it the slow way */
673 kaddr = kmap(page);
674 left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
675 kunmap(page);
676
677 if (left) {
678 size -= left;
679 desc->error = -EFAULT;
680 }
681success:
682 desc->count = count - size;
683 desc->written += size;
684 desc->arg.buf += size;
685 return size;
686}
687
688/**
689 * btrfs_file_aio_read - filesystem read routine, with a mod to csum verify
690 * @iocb: kernel I/O control block
691 * @iov: io vector request
692 * @nr_segs: number of segments in the iovec
693 * @pos: current file position
694 */
695static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
696 unsigned long nr_segs, loff_t pos)
697{
698 struct file *filp = iocb->ki_filp;
699 ssize_t retval;
700 unsigned long seg;
701 size_t count;
702 loff_t *ppos = &iocb->ki_pos;
703
704 count = 0;
705 for (seg = 0; seg < nr_segs; seg++) {
706 const struct iovec *iv = &iov[seg];
707
708 /*
709 * If any segment has a negative length, or the cumulative
710 * length ever wraps negative then return -EINVAL.
711 */
712 count += iv->iov_len;
713 if (unlikely((ssize_t)(count|iv->iov_len) < 0))
714 return -EINVAL;
715 if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
716 continue;
717 if (seg == 0)
718 return -EFAULT;
719 nr_segs = seg;
720 count -= iv->iov_len; /* This segment is no good */
721 break;
722 }
723 retval = 0;
724 if (count) {
725 for (seg = 0; seg < nr_segs; seg++) {
726 read_descriptor_t desc;
727
728 desc.written = 0;
729 desc.arg.buf = iov[seg].iov_base;
730 desc.count = iov[seg].iov_len;
731 if (desc.count == 0)
732 continue;
733 desc.error = 0;
734 do_generic_file_read(filp, ppos, &desc,
735 btrfs_read_actor);
736 retval += desc.written;
737 if (desc.error) {
738 retval = retval ?: desc.error;
739 break;
740 }
741 }
742 }
743 return retval;
744}
745
746static int btrfs_sync_file(struct file *file,
747 struct dentry *dentry, int datasync)
748{
749 struct inode *inode = dentry->d_inode;
750 struct btrfs_root *root = BTRFS_I(inode)->root;
751 int ret;
752 struct btrfs_trans_handle *trans;
753
754 /*
755 * FIXME, use inode generation number to check if we can skip the
756 * commit
757 */
758 mutex_lock(&root->fs_info->fs_mutex);
759 trans = btrfs_start_transaction(root, 1);
760 if (!trans) {
761 ret = -ENOMEM;
762 goto out;
763 }
764 ret = btrfs_commit_transaction(trans, root);
765 mutex_unlock(&root->fs_info->fs_mutex);
766out:
767 return ret > 0 ? EIO : ret;
768}
769
770struct file_operations btrfs_file_operations = {
771 .llseek = generic_file_llseek,
772 .read = do_sync_read,
773 .aio_read = btrfs_file_aio_read,
774 .write = btrfs_file_write,
775 .mmap = generic_file_mmap,
776 .open = generic_file_open,
777 .ioctl = btrfs_ioctl,
778 .fsync = btrfs_sync_file,
779#ifdef CONFIG_COMPAT
780 .compat_ioctl = btrfs_compat_ioctl,
781#endif
782};
783
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
new file mode 100644
index 000000000000..62a3a778d37d
--- /dev/null
+++ b/fs/btrfs/inode.c
@@ -0,0 +1,2210 @@
1#include <linux/module.h>
2#include <linux/buffer_head.h>
3#include <linux/fs.h>
4#include <linux/pagemap.h>
5#include <linux/highmem.h>
6#include <linux/time.h>
7#include <linux/init.h>
8#include <linux/string.h>
9#include <linux/smp_lock.h>
10#include <linux/backing-dev.h>
11#include <linux/mpage.h>
12#include <linux/swap.h>
13#include <linux/writeback.h>
14#include <linux/statfs.h>
15#include <linux/compat.h>
16#include "ctree.h"
17#include "disk-io.h"
18#include "transaction.h"
19#include "btrfs_inode.h"
20#include "ioctl.h"
21#include "print-tree.h"
22
23struct btrfs_iget_args {
24 u64 ino;
25 struct btrfs_root *root;
26};
27
28static struct inode_operations btrfs_dir_inode_operations;
29static struct inode_operations btrfs_symlink_inode_operations;
30static struct inode_operations btrfs_dir_ro_inode_operations;
31static struct inode_operations btrfs_file_inode_operations;
32static struct address_space_operations btrfs_aops;
33static struct address_space_operations btrfs_symlink_aops;
34static struct file_operations btrfs_dir_file_operations;
35
36static struct kmem_cache *btrfs_inode_cachep;
37struct kmem_cache *btrfs_trans_handle_cachep;
38struct kmem_cache *btrfs_transaction_cachep;
39struct kmem_cache *btrfs_bit_radix_cachep;
40struct kmem_cache *btrfs_path_cachep;
41
42#define S_SHIFT 12
43static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
44 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
45 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
46 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
47 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
48 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
49 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
50 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
51};
52
53void btrfs_read_locked_inode(struct inode *inode)
54{
55 struct btrfs_path *path;
56 struct btrfs_inode_item *inode_item;
57 struct btrfs_root *root = BTRFS_I(inode)->root;
58 struct btrfs_key location;
59 u64 alloc_group_block;
60 int ret;
61
62 path = btrfs_alloc_path();
63 BUG_ON(!path);
64 btrfs_init_path(path);
65 mutex_lock(&root->fs_info->fs_mutex);
66
67 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
68 ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
69 if (ret) {
70 btrfs_free_path(path);
71 goto make_bad;
72 }
73 inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
74 path->slots[0],
75 struct btrfs_inode_item);
76
77 inode->i_mode = btrfs_inode_mode(inode_item);
78 inode->i_nlink = btrfs_inode_nlink(inode_item);
79 inode->i_uid = btrfs_inode_uid(inode_item);
80 inode->i_gid = btrfs_inode_gid(inode_item);
81 inode->i_size = btrfs_inode_size(inode_item);
82 inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime);
83 inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime);
84 inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime);
85 inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime);
86 inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime);
87 inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime);
88 inode->i_blocks = btrfs_inode_nblocks(inode_item);
89 inode->i_generation = btrfs_inode_generation(inode_item);
90 alloc_group_block = btrfs_inode_block_group(inode_item);
91 BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info,
92 alloc_group_block);
93
94 btrfs_free_path(path);
95 inode_item = NULL;
96
97 mutex_unlock(&root->fs_info->fs_mutex);
98
99 switch (inode->i_mode & S_IFMT) {
100#if 0
101 default:
102 init_special_inode(inode, inode->i_mode,
103 btrfs_inode_rdev(inode_item));
104 break;
105#endif
106 case S_IFREG:
107 inode->i_mapping->a_ops = &btrfs_aops;
108 inode->i_fop = &btrfs_file_operations;
109 inode->i_op = &btrfs_file_inode_operations;
110 break;
111 case S_IFDIR:
112 inode->i_fop = &btrfs_dir_file_operations;
113 if (root == root->fs_info->tree_root)
114 inode->i_op = &btrfs_dir_ro_inode_operations;
115 else
116 inode->i_op = &btrfs_dir_inode_operations;
117 break;
118 case S_IFLNK:
119 inode->i_op = &btrfs_symlink_inode_operations;
120 inode->i_mapping->a_ops = &btrfs_symlink_aops;
121 break;
122 }
123 return;
124
125make_bad:
126 btrfs_release_path(root, path);
127 btrfs_free_path(path);
128 mutex_unlock(&root->fs_info->fs_mutex);
129 make_bad_inode(inode);
130}
131
132static void fill_inode_item(struct btrfs_inode_item *item,
133 struct inode *inode)
134{
135 btrfs_set_inode_uid(item, inode->i_uid);
136 btrfs_set_inode_gid(item, inode->i_gid);
137 btrfs_set_inode_size(item, inode->i_size);
138 btrfs_set_inode_mode(item, inode->i_mode);
139 btrfs_set_inode_nlink(item, inode->i_nlink);
140 btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec);
141 btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec);
142 btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec);
143 btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec);
144 btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec);
145 btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec);
146 btrfs_set_inode_nblocks(item, inode->i_blocks);
147 btrfs_set_inode_generation(item, inode->i_generation);
148 btrfs_set_inode_block_group(item,
149 BTRFS_I(inode)->block_group->key.objectid);
150}
151
152static int btrfs_update_inode(struct btrfs_trans_handle *trans,
153 struct btrfs_root *root,
154 struct inode *inode)
155{
156 struct btrfs_inode_item *inode_item;
157 struct btrfs_path *path;
158 int ret;
159
160 path = btrfs_alloc_path();
161 BUG_ON(!path);
162 btrfs_init_path(path);
163 ret = btrfs_lookup_inode(trans, root, path,
164 &BTRFS_I(inode)->location, 1);
165 if (ret) {
166 if (ret > 0)
167 ret = -ENOENT;
168 goto failed;
169 }
170
171 inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
172 path->slots[0],
173 struct btrfs_inode_item);
174
175 fill_inode_item(inode_item, inode);
176 btrfs_mark_buffer_dirty(path->nodes[0]);
177 ret = 0;
178failed:
179 btrfs_release_path(root, path);
180 btrfs_free_path(path);
181 return ret;
182}
183
184
185static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
186 struct btrfs_root *root,
187 struct inode *dir,
188 struct dentry *dentry)
189{
190 struct btrfs_path *path;
191 const char *name = dentry->d_name.name;
192 int name_len = dentry->d_name.len;
193 int ret = 0;
194 u64 objectid;
195 struct btrfs_dir_item *di;
196
197 path = btrfs_alloc_path();
198 BUG_ON(!path);
199 btrfs_init_path(path);
200 di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
201 name, name_len, -1);
202 if (IS_ERR(di)) {
203 ret = PTR_ERR(di);
204 goto err;
205 }
206 if (!di) {
207 ret = -ENOENT;
208 goto err;
209 }
210 objectid = btrfs_disk_key_objectid(&di->location);
211 ret = btrfs_delete_one_dir_name(trans, root, path, di);
212 BUG_ON(ret);
213 btrfs_release_path(root, path);
214
215 di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
216 objectid, name, name_len, -1);
217 if (IS_ERR(di)) {
218 ret = PTR_ERR(di);
219 goto err;
220 }
221 if (!di) {
222 ret = -ENOENT;
223 goto err;
224 }
225 ret = btrfs_delete_one_dir_name(trans, root, path, di);
226 BUG_ON(ret);
227
228 dentry->d_inode->i_ctime = dir->i_ctime;
229err:
230 btrfs_free_path(path);
231 if (!ret) {
232 dir->i_size -= name_len * 2;
233 btrfs_update_inode(trans, root, dir);
234 drop_nlink(dentry->d_inode);
235 btrfs_update_inode(trans, root, dentry->d_inode);
236 dir->i_sb->s_dirt = 1;
237 }
238 return ret;
239}
240
241static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
242{
243 struct btrfs_root *root;
244 struct btrfs_trans_handle *trans;
245 int ret;
246
247 root = BTRFS_I(dir)->root;
248 mutex_lock(&root->fs_info->fs_mutex);
249 trans = btrfs_start_transaction(root, 1);
250 btrfs_set_trans_block_group(trans, dir);
251 ret = btrfs_unlink_trans(trans, root, dir, dentry);
252 btrfs_end_transaction(trans, root);
253 mutex_unlock(&root->fs_info->fs_mutex);
254 btrfs_btree_balance_dirty(root);
255 return ret;
256}
257
258static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
259{
260 struct inode *inode = dentry->d_inode;
261 int err;
262 int ret;
263 struct btrfs_root *root = BTRFS_I(dir)->root;
264 struct btrfs_path *path;
265 struct btrfs_key key;
266 struct btrfs_trans_handle *trans;
267 struct btrfs_key found_key;
268 int found_type;
269 struct btrfs_leaf *leaf;
270 char *goodnames = "..";
271
272 path = btrfs_alloc_path();
273 BUG_ON(!path);
274 btrfs_init_path(path);
275 mutex_lock(&root->fs_info->fs_mutex);
276 trans = btrfs_start_transaction(root, 1);
277 btrfs_set_trans_block_group(trans, dir);
278 key.objectid = inode->i_ino;
279 key.offset = (u64)-1;
280 key.flags = (u32)-1;
281 while(1) {
282 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
283 if (ret < 0) {
284 err = ret;
285 goto out;
286 }
287 BUG_ON(ret == 0);
288 if (path->slots[0] == 0) {
289 err = -ENOENT;
290 goto out;
291 }
292 path->slots[0]--;
293 leaf = btrfs_buffer_leaf(path->nodes[0]);
294 btrfs_disk_key_to_cpu(&found_key,
295 &leaf->items[path->slots[0]].key);
296 found_type = btrfs_key_type(&found_key);
297 if (found_key.objectid != inode->i_ino) {
298 err = -ENOENT;
299 goto out;
300 }
301 if ((found_type != BTRFS_DIR_ITEM_KEY &&
302 found_type != BTRFS_DIR_INDEX_KEY) ||
303 (!btrfs_match_dir_item_name(root, path, goodnames, 2) &&
304 !btrfs_match_dir_item_name(root, path, goodnames, 1))) {
305 err = -ENOTEMPTY;
306 goto out;
307 }
308 ret = btrfs_del_item(trans, root, path);
309 BUG_ON(ret);
310
311 if (found_type == BTRFS_DIR_ITEM_KEY && found_key.offset == 1)
312 break;
313 btrfs_release_path(root, path);
314 }
315 ret = 0;
316 btrfs_release_path(root, path);
317
318 /* now the directory is empty */
319 err = btrfs_unlink_trans(trans, root, dir, dentry);
320 if (!err) {
321 inode->i_size = 0;
322 }
323out:
324 btrfs_release_path(root, path);
325 btrfs_free_path(path);
326 mutex_unlock(&root->fs_info->fs_mutex);
327 ret = btrfs_end_transaction(trans, root);
328 btrfs_btree_balance_dirty(root);
329 if (ret && !err)
330 err = ret;
331 return err;
332}
333
334static int btrfs_free_inode(struct btrfs_trans_handle *trans,
335 struct btrfs_root *root,
336 struct inode *inode)
337{
338 struct btrfs_path *path;
339 int ret;
340
341 clear_inode(inode);
342
343 path = btrfs_alloc_path();
344 BUG_ON(!path);
345 btrfs_init_path(path);
346 ret = btrfs_lookup_inode(trans, root, path,
347 &BTRFS_I(inode)->location, -1);
348 BUG_ON(ret);
349 ret = btrfs_del_item(trans, root, path);
350 BUG_ON(ret);
351 btrfs_free_path(path);
352 return ret;
353}
354
355/*
356 * truncates go from a high offset to a low offset. So, walk
357 * from hi to lo in the node and issue readas. Stop when you find
358 * keys from a different objectid
359 */
360static void reada_truncate(struct btrfs_root *root, struct btrfs_path *path,
361 u64 objectid)
362{
363 struct btrfs_node *node;
364 int i;
365 int nritems;
366 u64 item_objectid;
367 u64 blocknr;
368 int slot;
369 int ret;
370
371 if (!path->nodes[1])
372 return;
373 node = btrfs_buffer_node(path->nodes[1]);
374 slot = path->slots[1];
375 if (slot == 0)
376 return;
377 nritems = btrfs_header_nritems(&node->header);
378 for (i = slot - 1; i >= 0; i--) {
379 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
380 if (item_objectid != objectid)
381 break;
382 blocknr = btrfs_node_blockptr(node, i);
383 ret = readahead_tree_block(root, blocknr);
384 if (ret)
385 break;
386 }
387}
388
389/*
390 * this can truncate away extent items, csum items and directory items.
391 * It starts at a high offset and removes keys until it can't find
392 * any higher than i_size.
393 *
394 * csum items that cross the new i_size are truncated to the new size
395 * as well.
396 */
397static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
398 struct btrfs_root *root,
399 struct inode *inode)
400{
401 int ret;
402 struct btrfs_path *path;
403 struct btrfs_key key;
404 struct btrfs_disk_key *found_key;
405 u32 found_type;
406 struct btrfs_leaf *leaf;
407 struct btrfs_file_extent_item *fi;
408 u64 extent_start = 0;
409 u64 extent_num_blocks = 0;
410 u64 item_end = 0;
411 int found_extent;
412 int del_item;
413
414 path = btrfs_alloc_path();
415 BUG_ON(!path);
416 /* FIXME, add redo link to tree so we don't leak on crash */
417 key.objectid = inode->i_ino;
418 key.offset = (u64)-1;
419 key.flags = (u32)-1;
420 while(1) {
421 btrfs_init_path(path);
422 fi = NULL;
423 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
424 if (ret < 0) {
425 goto error;
426 }
427 if (ret > 0) {
428 BUG_ON(path->slots[0] == 0);
429 path->slots[0]--;
430 }
431 reada_truncate(root, path, inode->i_ino);
432 leaf = btrfs_buffer_leaf(path->nodes[0]);
433 found_key = &leaf->items[path->slots[0]].key;
434 found_type = btrfs_disk_key_type(found_key);
435
436 if (btrfs_disk_key_objectid(found_key) != inode->i_ino)
437 break;
438 if (found_type != BTRFS_CSUM_ITEM_KEY &&
439 found_type != BTRFS_DIR_ITEM_KEY &&
440 found_type != BTRFS_DIR_INDEX_KEY &&
441 found_type != BTRFS_EXTENT_DATA_KEY)
442 break;
443
444 item_end = btrfs_disk_key_offset(found_key);
445 if (found_type == BTRFS_EXTENT_DATA_KEY) {
446 fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
447 path->slots[0],
448 struct btrfs_file_extent_item);
449 if (btrfs_file_extent_type(fi) !=
450 BTRFS_FILE_EXTENT_INLINE) {
451 item_end += btrfs_file_extent_num_blocks(fi) <<
452 inode->i_blkbits;
453 }
454 }
455 if (found_type == BTRFS_CSUM_ITEM_KEY) {
456 ret = btrfs_csum_truncate(trans, root, path,
457 inode->i_size);
458 BUG_ON(ret);
459 }
460 if (item_end < inode->i_size) {
461 if (found_type) {
462 btrfs_set_key_type(&key, found_type - 1);
463 continue;
464 }
465 break;
466 }
467 if (btrfs_disk_key_offset(found_key) >= inode->i_size)
468 del_item = 1;
469 else
470 del_item = 0;
471 found_extent = 0;
472
473 /* FIXME, shrink the extent if the ref count is only 1 */
474 if (found_type == BTRFS_EXTENT_DATA_KEY &&
475 btrfs_file_extent_type(fi) !=
476 BTRFS_FILE_EXTENT_INLINE) {
477 u64 num_dec;
478 if (!del_item) {
479 u64 orig_num_blocks =
480 btrfs_file_extent_num_blocks(fi);
481 extent_num_blocks = inode->i_size -
482 btrfs_disk_key_offset(found_key) +
483 root->blocksize - 1;
484 extent_num_blocks >>= inode->i_blkbits;
485 btrfs_set_file_extent_num_blocks(fi,
486 extent_num_blocks);
487 inode->i_blocks -= (orig_num_blocks -
488 extent_num_blocks) << 3;
489 mark_buffer_dirty(path->nodes[0]);
490 } else {
491 extent_start =
492 btrfs_file_extent_disk_blocknr(fi);
493 extent_num_blocks =
494 btrfs_file_extent_disk_num_blocks(fi);
495 /* FIXME blocksize != 4096 */
496 num_dec = btrfs_file_extent_num_blocks(fi) << 3;
497 if (extent_start != 0) {
498 found_extent = 1;
499 inode->i_blocks -= num_dec;
500 }
501 }
502 }
503 if (del_item) {
504 ret = btrfs_del_item(trans, root, path);
505 BUG_ON(ret);
506 } else {
507 break;
508 }
509 btrfs_release_path(root, path);
510 if (found_extent) {
511 ret = btrfs_free_extent(trans, root, extent_start,
512 extent_num_blocks, 0);
513 BUG_ON(ret);
514 }
515 }
516 ret = 0;
517error:
518 btrfs_release_path(root, path);
519 btrfs_free_path(path);
520 inode->i_sb->s_dirt = 1;
521 return ret;
522}
523
524/*
525 * taken from block_truncate_page, but does cow as it zeros out
526 * any bytes left in the last page in the file.
527 */
528static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
529{
530 struct inode *inode = mapping->host;
531 unsigned blocksize = 1 << inode->i_blkbits;
532 pgoff_t index = from >> PAGE_CACHE_SHIFT;
533 unsigned offset = from & (PAGE_CACHE_SIZE-1);
534 struct page *page;
535 char *kaddr;
536 int ret = 0;
537 struct btrfs_root *root = BTRFS_I(inode)->root;
538 u64 alloc_hint = 0;
539 struct btrfs_key ins;
540 struct btrfs_trans_handle *trans;
541
542 if ((offset & (blocksize - 1)) == 0)
543 goto out;
544
545 ret = -ENOMEM;
546 page = grab_cache_page(mapping, index);
547 if (!page)
548 goto out;
549
550 if (!PageUptodate(page)) {
551 ret = mpage_readpage(page, btrfs_get_block);
552 lock_page(page);
553 if (!PageUptodate(page)) {
554 ret = -EIO;
555 goto out;
556 }
557 }
558 mutex_lock(&root->fs_info->fs_mutex);
559 trans = btrfs_start_transaction(root, 1);
560 btrfs_set_trans_block_group(trans, inode);
561
562 ret = btrfs_drop_extents(trans, root, inode,
563 page->index << PAGE_CACHE_SHIFT,
564 (page->index + 1) << PAGE_CACHE_SHIFT,
565 &alloc_hint);
566 BUG_ON(ret);
567 ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1,
568 alloc_hint, (u64)-1, &ins, 1);
569 BUG_ON(ret);
570 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
571 page->index << PAGE_CACHE_SHIFT,
572 ins.objectid, 1, 1);
573 BUG_ON(ret);
574 SetPageChecked(page);
575 kaddr = kmap(page);
576 memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
577 flush_dcache_page(page);
578 btrfs_csum_file_block(trans, root, inode->i_ino,
579 page->index << PAGE_CACHE_SHIFT,
580 kaddr, PAGE_CACHE_SIZE);
581 kunmap(page);
582 btrfs_end_transaction(trans, root);
583 mutex_unlock(&root->fs_info->fs_mutex);
584
585 set_page_dirty(page);
586 unlock_page(page);
587 page_cache_release(page);
588out:
589 return ret;
590}
591
592static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
593{
594 struct inode *inode = dentry->d_inode;
595 int err;
596
597 err = inode_change_ok(inode, attr);
598 if (err)
599 return err;
600
601 if (S_ISREG(inode->i_mode) &&
602 attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) {
603 struct btrfs_trans_handle *trans;
604 struct btrfs_root *root = BTRFS_I(inode)->root;
605 u64 mask = root->blocksize - 1;
606 u64 pos = (inode->i_size + mask) & ~mask;
607 u64 hole_size;
608
609 if (attr->ia_size <= pos)
610 goto out;
611
612 btrfs_truncate_page(inode->i_mapping, inode->i_size);
613
614 hole_size = (attr->ia_size - pos + mask) & ~mask;
615 hole_size >>= inode->i_blkbits;
616
617 mutex_lock(&root->fs_info->fs_mutex);
618 trans = btrfs_start_transaction(root, 1);
619 btrfs_set_trans_block_group(trans, inode);
620 err = btrfs_insert_file_extent(trans, root, inode->i_ino,
621 pos, 0, 0, hole_size);
622 BUG_ON(err);
623 btrfs_end_transaction(trans, root);
624 mutex_unlock(&root->fs_info->fs_mutex);
625 }
626out:
627 err = inode_setattr(inode, attr);
628
629 return err;
630}
631void btrfs_delete_inode(struct inode *inode)
632{
633 struct btrfs_trans_handle *trans;
634 struct btrfs_root *root = BTRFS_I(inode)->root;
635 int ret;
636
637 truncate_inode_pages(&inode->i_data, 0);
638 if (is_bad_inode(inode)) {
639 goto no_delete;
640 }
641 inode->i_size = 0;
642 mutex_lock(&root->fs_info->fs_mutex);
643 trans = btrfs_start_transaction(root, 1);
644 btrfs_set_trans_block_group(trans, inode);
645 ret = btrfs_truncate_in_trans(trans, root, inode);
646 BUG_ON(ret);
647 btrfs_free_inode(trans, root, inode);
648 btrfs_end_transaction(trans, root);
649 mutex_unlock(&root->fs_info->fs_mutex);
650 btrfs_btree_balance_dirty(root);
651 return;
652no_delete:
653 clear_inode(inode);
654}
655
656/*
657 * this returns the key found in the dir entry in the location pointer.
658 * If no dir entries were found, location->objectid is 0.
659 */
660static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
661 struct btrfs_key *location)
662{
663 const char *name = dentry->d_name.name;
664 int namelen = dentry->d_name.len;
665 struct btrfs_dir_item *di;
666 struct btrfs_path *path;
667 struct btrfs_root *root = BTRFS_I(dir)->root;
668 int ret;
669
670 path = btrfs_alloc_path();
671 BUG_ON(!path);
672 btrfs_init_path(path);
673 di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
674 namelen, 0);
675 if (!di || IS_ERR(di)) {
676 location->objectid = 0;
677 ret = 0;
678 goto out;
679 }
680 btrfs_disk_key_to_cpu(location, &di->location);
681out:
682 btrfs_release_path(root, path);
683 btrfs_free_path(path);
684 return ret;
685}
686
687/*
688 * when we hit a tree root in a directory, the btrfs part of the inode
689 * needs to be changed to reflect the root directory of the tree root. This
690 * is kind of like crossing a mount point.
691 */
692static int fixup_tree_root_location(struct btrfs_root *root,
693 struct btrfs_key *location,
694 struct btrfs_root **sub_root)
695{
696 struct btrfs_path *path;
697 struct btrfs_root_item *ri;
698
699 if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
700 return 0;
701 if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
702 return 0;
703
704 path = btrfs_alloc_path();
705 BUG_ON(!path);
706 mutex_lock(&root->fs_info->fs_mutex);
707
708 *sub_root = btrfs_read_fs_root(root->fs_info, location);
709 if (IS_ERR(*sub_root))
710 return PTR_ERR(*sub_root);
711
712 ri = &(*sub_root)->root_item;
713 location->objectid = btrfs_root_dirid(ri);
714 location->flags = 0;
715 btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
716 location->offset = 0;
717
718 btrfs_free_path(path);
719 mutex_unlock(&root->fs_info->fs_mutex);
720 return 0;
721}
722
723static int btrfs_init_locked_inode(struct inode *inode, void *p)
724{
725 struct btrfs_iget_args *args = p;
726 inode->i_ino = args->ino;
727 BTRFS_I(inode)->root = args->root;
728 return 0;
729}
730
731static int btrfs_find_actor(struct inode *inode, void *opaque)
732{
733 struct btrfs_iget_args *args = opaque;
734 return (args->ino == inode->i_ino &&
735 args->root == BTRFS_I(inode)->root);
736}
737
738struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
739 struct btrfs_root *root)
740{
741 struct inode *inode;
742 struct btrfs_iget_args args;
743 args.ino = objectid;
744 args.root = root;
745
746 inode = iget5_locked(s, objectid, btrfs_find_actor,
747 btrfs_init_locked_inode,
748 (void *)&args);
749 return inode;
750}
751
752static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
753 struct nameidata *nd)
754{
755 struct inode * inode;
756 struct btrfs_inode *bi = BTRFS_I(dir);
757 struct btrfs_root *root = bi->root;
758 struct btrfs_root *sub_root = root;
759 struct btrfs_key location;
760 int ret;
761
762 if (dentry->d_name.len > BTRFS_NAME_LEN)
763 return ERR_PTR(-ENAMETOOLONG);
764 mutex_lock(&root->fs_info->fs_mutex);
765 ret = btrfs_inode_by_name(dir, dentry, &location);
766 mutex_unlock(&root->fs_info->fs_mutex);
767 if (ret < 0)
768 return ERR_PTR(ret);
769 inode = NULL;
770 if (location.objectid) {
771 ret = fixup_tree_root_location(root, &location, &sub_root);
772 if (ret < 0)
773 return ERR_PTR(ret);
774 if (ret > 0)
775 return ERR_PTR(-ENOENT);
776 inode = btrfs_iget_locked(dir->i_sb, location.objectid,
777 sub_root);
778 if (!inode)
779 return ERR_PTR(-EACCES);
780 if (inode->i_state & I_NEW) {
781 /* the inode and parent dir are two different roots */
782 if (sub_root != root) {
783 igrab(inode);
784 sub_root->inode = inode;
785 }
786 BTRFS_I(inode)->root = sub_root;
787 memcpy(&BTRFS_I(inode)->location, &location,
788 sizeof(location));
789 btrfs_read_locked_inode(inode);
790 unlock_new_inode(inode);
791 }
792 }
793 return d_splice_alias(inode, dentry);
794}
795
796/*
797 * readahead one full node of leaves as long as their keys include
798 * the objectid supplied
799 */
800static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path,
801 u64 objectid)
802{
803 struct btrfs_node *node;
804 int i;
805 u32 nritems;
806 u64 item_objectid;
807 u64 blocknr;
808 int slot;
809 int ret;
810
811 if (!path->nodes[1])
812 return;
813 node = btrfs_buffer_node(path->nodes[1]);
814 slot = path->slots[1];
815 nritems = btrfs_header_nritems(&node->header);
816 for (i = slot + 1; i < nritems; i++) {
817 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
818 if (item_objectid != objectid)
819 break;
820 blocknr = btrfs_node_blockptr(node, i);
821 ret = readahead_tree_block(root, blocknr);
822 if (ret)
823 break;
824 }
825}
826static unsigned char btrfs_filetype_table[] = {
827 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
828};
829
830static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
831{
832 struct inode *inode = filp->f_path.dentry->d_inode;
833 struct btrfs_root *root = BTRFS_I(inode)->root;
834 struct btrfs_item *item;
835 struct btrfs_dir_item *di;
836 struct btrfs_key key;
837 struct btrfs_path *path;
838 int ret;
839 u32 nritems;
840 struct btrfs_leaf *leaf;
841 int slot;
842 int advance;
843 unsigned char d_type;
844 int over = 0;
845 u32 di_cur;
846 u32 di_total;
847 u32 di_len;
848 int key_type = BTRFS_DIR_INDEX_KEY;
849
850 /* FIXME, use a real flag for deciding about the key type */
851 if (root->fs_info->tree_root == root)
852 key_type = BTRFS_DIR_ITEM_KEY;
853 mutex_lock(&root->fs_info->fs_mutex);
854 key.objectid = inode->i_ino;
855 key.flags = 0;
856 btrfs_set_key_type(&key, key_type);
857 key.offset = filp->f_pos;
858 path = btrfs_alloc_path();
859 btrfs_init_path(path);
860 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
861 if (ret < 0)
862 goto err;
863 advance = 0;
864 reada_leaves(root, path, inode->i_ino);
865 while(1) {
866 leaf = btrfs_buffer_leaf(path->nodes[0]);
867 nritems = btrfs_header_nritems(&leaf->header);
868 slot = path->slots[0];
869 if (advance || slot >= nritems) {
870 if (slot >= nritems -1) {
871 reada_leaves(root, path, inode->i_ino);
872 ret = btrfs_next_leaf(root, path);
873 if (ret)
874 break;
875 leaf = btrfs_buffer_leaf(path->nodes[0]);
876 nritems = btrfs_header_nritems(&leaf->header);
877 slot = path->slots[0];
878 } else {
879 slot++;
880 path->slots[0]++;
881 }
882 }
883 advance = 1;
884 item = leaf->items + slot;
885 if (btrfs_disk_key_objectid(&item->key) != key.objectid)
886 break;
887 if (btrfs_disk_key_type(&item->key) != key_type)
888 break;
889 if (btrfs_disk_key_offset(&item->key) < filp->f_pos)
890 continue;
891 filp->f_pos = btrfs_disk_key_offset(&item->key);
892 advance = 1;
893 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
894 di_cur = 0;
895 di_total = btrfs_item_size(leaf->items + slot);
896 while(di_cur < di_total) {
897 d_type = btrfs_filetype_table[btrfs_dir_type(di)];
898 over = filldir(dirent, (const char *)(di + 1),
899 btrfs_dir_name_len(di),
900 btrfs_disk_key_offset(&item->key),
901 btrfs_disk_key_objectid(&di->location),
902 d_type);
903 if (over)
904 goto nopos;
905 di_len = btrfs_dir_name_len(di) + sizeof(*di);
906 di_cur += di_len;
907 di = (struct btrfs_dir_item *)((char *)di + di_len);
908 }
909 }
910 filp->f_pos++;
911nopos:
912 ret = 0;
913err:
914 btrfs_release_path(root, path);
915 btrfs_free_path(path);
916 mutex_unlock(&root->fs_info->fs_mutex);
917 return ret;
918}
919
920int btrfs_write_inode(struct inode *inode, int wait)
921{
922 struct btrfs_root *root = BTRFS_I(inode)->root;
923 struct btrfs_trans_handle *trans;
924 int ret = 0;
925
926 if (wait) {
927 mutex_lock(&root->fs_info->fs_mutex);
928 trans = btrfs_start_transaction(root, 1);
929 btrfs_set_trans_block_group(trans, inode);
930 ret = btrfs_commit_transaction(trans, root);
931 mutex_unlock(&root->fs_info->fs_mutex);
932 }
933 return ret;
934}
935
936/*
937 * This is somewhat expense, updating the tree every time the
938 * inode changes. But, it is most likely to find the inode in cache.
939 * FIXME, needs more benchmarking...there are no reasons other than performance
940 * to keep or drop this code.
941 */
942void btrfs_dirty_inode(struct inode *inode)
943{
944 struct btrfs_root *root = BTRFS_I(inode)->root;
945 struct btrfs_trans_handle *trans;
946
947 mutex_lock(&root->fs_info->fs_mutex);
948 trans = btrfs_start_transaction(root, 1);
949 btrfs_set_trans_block_group(trans, inode);
950 btrfs_update_inode(trans, root, inode);
951 btrfs_end_transaction(trans, root);
952 mutex_unlock(&root->fs_info->fs_mutex);
953 btrfs_btree_balance_dirty(root);
954}
955
956static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
957 struct btrfs_root *root,
958 u64 objectid,
959 struct btrfs_block_group_cache *group,
960 int mode)
961{
962 struct inode *inode;
963 struct btrfs_inode_item inode_item;
964 struct btrfs_key *location;
965 int ret;
966 int owner;
967
968 inode = new_inode(root->fs_info->sb);
969 if (!inode)
970 return ERR_PTR(-ENOMEM);
971
972 BTRFS_I(inode)->root = root;
973 if (mode & S_IFDIR)
974 owner = 0;
975 else
976 owner = 1;
977 group = btrfs_find_block_group(root, group, 0, 0, owner);
978 BTRFS_I(inode)->block_group = group;
979
980 inode->i_uid = current->fsuid;
981 inode->i_gid = current->fsgid;
982 inode->i_mode = mode;
983 inode->i_ino = objectid;
984 inode->i_blocks = 0;
985 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
986 fill_inode_item(&inode_item, inode);
987 location = &BTRFS_I(inode)->location;
988 location->objectid = objectid;
989 location->flags = 0;
990 location->offset = 0;
991 btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
992
993 ret = btrfs_insert_inode(trans, root, objectid, &inode_item);
994 BUG_ON(ret);
995
996 insert_inode_hash(inode);
997 return inode;
998}
999
1000static inline u8 btrfs_inode_type(struct inode *inode)
1001{
1002 return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
1003}
1004
1005static int btrfs_add_link(struct btrfs_trans_handle *trans,
1006 struct dentry *dentry, struct inode *inode)
1007{
1008 int ret;
1009 struct btrfs_key key;
1010 struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
1011 key.objectid = inode->i_ino;
1012 key.flags = 0;
1013 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1014 key.offset = 0;
1015
1016 ret = btrfs_insert_dir_item(trans, root,
1017 dentry->d_name.name, dentry->d_name.len,
1018 dentry->d_parent->d_inode->i_ino,
1019 &key, btrfs_inode_type(inode));
1020 if (ret == 0) {
1021 dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2;
1022 ret = btrfs_update_inode(trans, root,
1023 dentry->d_parent->d_inode);
1024 }
1025 return ret;
1026}
1027
1028static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
1029 struct dentry *dentry, struct inode *inode)
1030{
1031 int err = btrfs_add_link(trans, dentry, inode);
1032 if (!err) {
1033 d_instantiate(dentry, inode);
1034 return 0;
1035 }
1036 if (err > 0)
1037 err = -EEXIST;
1038 return err;
1039}
1040
1041static int btrfs_create(struct inode *dir, struct dentry *dentry,
1042 int mode, struct nameidata *nd)
1043{
1044 struct btrfs_trans_handle *trans;
1045 struct btrfs_root *root = BTRFS_I(dir)->root;
1046 struct inode *inode;
1047 int err;
1048 int drop_inode = 0;
1049 u64 objectid;
1050
1051 mutex_lock(&root->fs_info->fs_mutex);
1052 trans = btrfs_start_transaction(root, 1);
1053 btrfs_set_trans_block_group(trans, dir);
1054
1055 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1056 if (err) {
1057 err = -ENOSPC;
1058 goto out_unlock;
1059 }
1060
1061 inode = btrfs_new_inode(trans, root, objectid,
1062 BTRFS_I(dir)->block_group, mode);
1063 err = PTR_ERR(inode);
1064 if (IS_ERR(inode))
1065 goto out_unlock;
1066
1067 btrfs_set_trans_block_group(trans, inode);
1068 err = btrfs_add_nondir(trans, dentry, inode);
1069 if (err)
1070 drop_inode = 1;
1071 else {
1072 inode->i_mapping->a_ops = &btrfs_aops;
1073 inode->i_fop = &btrfs_file_operations;
1074 inode->i_op = &btrfs_file_inode_operations;
1075 }
1076 dir->i_sb->s_dirt = 1;
1077 btrfs_update_inode_block_group(trans, inode);
1078 btrfs_update_inode_block_group(trans, dir);
1079out_unlock:
1080 btrfs_end_transaction(trans, root);
1081 mutex_unlock(&root->fs_info->fs_mutex);
1082
1083 if (drop_inode) {
1084 inode_dec_link_count(inode);
1085 iput(inode);
1086 }
1087 btrfs_btree_balance_dirty(root);
1088 return err;
1089}
1090
1091static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
1092 struct dentry *dentry)
1093{
1094 struct btrfs_trans_handle *trans;
1095 struct btrfs_root *root = BTRFS_I(dir)->root;
1096 struct inode *inode = old_dentry->d_inode;
1097 int err;
1098 int drop_inode = 0;
1099
1100 if (inode->i_nlink == 0)
1101 return -ENOENT;
1102
1103 inc_nlink(inode);
1104 mutex_lock(&root->fs_info->fs_mutex);
1105 trans = btrfs_start_transaction(root, 1);
1106 btrfs_set_trans_block_group(trans, dir);
1107 atomic_inc(&inode->i_count);
1108 err = btrfs_add_nondir(trans, dentry, inode);
1109 if (err)
1110 drop_inode = 1;
1111 dir->i_sb->s_dirt = 1;
1112 btrfs_update_inode_block_group(trans, dir);
1113 btrfs_update_inode(trans, root, inode);
1114
1115 btrfs_end_transaction(trans, root);
1116 mutex_unlock(&root->fs_info->fs_mutex);
1117
1118 if (drop_inode) {
1119 inode_dec_link_count(inode);
1120 iput(inode);
1121 }
1122 btrfs_btree_balance_dirty(root);
1123 return err;
1124}
1125
1126static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans,
1127 struct btrfs_root *root,
1128 u64 objectid, u64 dirid)
1129{
1130 int ret;
1131 char buf[2];
1132 struct btrfs_key key;
1133
1134 buf[0] = '.';
1135 buf[1] = '.';
1136
1137 key.objectid = objectid;
1138 key.offset = 0;
1139 key.flags = 0;
1140 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1141
1142 ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid,
1143 &key, BTRFS_FT_DIR);
1144 if (ret)
1145 goto error;
1146 key.objectid = dirid;
1147 ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid,
1148 &key, BTRFS_FT_DIR);
1149 if (ret)
1150 goto error;
1151error:
1152 return ret;
1153}
1154
1155static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1156{
1157 struct inode *inode;
1158 struct btrfs_trans_handle *trans;
1159 struct btrfs_root *root = BTRFS_I(dir)->root;
1160 int err = 0;
1161 int drop_on_err = 0;
1162 u64 objectid;
1163
1164 mutex_lock(&root->fs_info->fs_mutex);
1165 trans = btrfs_start_transaction(root, 1);
1166 btrfs_set_trans_block_group(trans, dir);
1167 if (IS_ERR(trans)) {
1168 err = PTR_ERR(trans);
1169 goto out_unlock;
1170 }
1171
1172 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1173 if (err) {
1174 err = -ENOSPC;
1175 goto out_unlock;
1176 }
1177
1178 inode = btrfs_new_inode(trans, root, objectid,
1179 BTRFS_I(dir)->block_group, S_IFDIR | mode);
1180 if (IS_ERR(inode)) {
1181 err = PTR_ERR(inode);
1182 goto out_fail;
1183 }
1184 drop_on_err = 1;
1185 inode->i_op = &btrfs_dir_inode_operations;
1186 inode->i_fop = &btrfs_dir_file_operations;
1187 btrfs_set_trans_block_group(trans, inode);
1188
1189 err = btrfs_make_empty_dir(trans, root, inode->i_ino, dir->i_ino);
1190 if (err)
1191 goto out_fail;
1192
1193 inode->i_size = 6;
1194 err = btrfs_update_inode(trans, root, inode);
1195 if (err)
1196 goto out_fail;
1197 err = btrfs_add_link(trans, dentry, inode);
1198 if (err)
1199 goto out_fail;
1200 d_instantiate(dentry, inode);
1201 drop_on_err = 0;
1202 dir->i_sb->s_dirt = 1;
1203 btrfs_update_inode_block_group(trans, inode);
1204 btrfs_update_inode_block_group(trans, dir);
1205
1206out_fail:
1207 btrfs_end_transaction(trans, root);
1208out_unlock:
1209 mutex_unlock(&root->fs_info->fs_mutex);
1210 if (drop_on_err)
1211 iput(inode);
1212 btrfs_btree_balance_dirty(root);
1213 return err;
1214}
1215
1216/*
1217 * FIBMAP and others want to pass in a fake buffer head. They need to
1218 * use BTRFS_GET_BLOCK_NO_DIRECT to make sure we don't try to memcpy
1219 * any packed file data into the fake bh
1220 */
1221#define BTRFS_GET_BLOCK_NO_CREATE 0
1222#define BTRFS_GET_BLOCK_CREATE 1
1223#define BTRFS_GET_BLOCK_NO_DIRECT 2
1224
1225/*
1226 * FIXME create==1 doe not work.
1227 */
1228static int btrfs_get_block_lock(struct inode *inode, sector_t iblock,
1229 struct buffer_head *result, int create)
1230{
1231 int ret;
1232 int err = 0;
1233 u64 blocknr;
1234 u64 extent_start = 0;
1235 u64 extent_end = 0;
1236 u64 objectid = inode->i_ino;
1237 u32 found_type;
1238 u64 alloc_hint = 0;
1239 struct btrfs_path *path;
1240 struct btrfs_root *root = BTRFS_I(inode)->root;
1241 struct btrfs_file_extent_item *item;
1242 struct btrfs_leaf *leaf;
1243 struct btrfs_disk_key *found_key;
1244 struct btrfs_trans_handle *trans = NULL;
1245
1246 path = btrfs_alloc_path();
1247 BUG_ON(!path);
1248 btrfs_init_path(path);
1249 if (create & BTRFS_GET_BLOCK_CREATE) {
1250 WARN_ON(1);
1251 /* this almost but not quite works */
1252 trans = btrfs_start_transaction(root, 1);
1253 if (!trans) {
1254 err = -ENOMEM;
1255 goto out;
1256 }
1257 ret = btrfs_drop_extents(trans, root, inode,
1258 iblock << inode->i_blkbits,
1259 (iblock + 1) << inode->i_blkbits,
1260 &alloc_hint);
1261 BUG_ON(ret);
1262 }
1263
1264 ret = btrfs_lookup_file_extent(NULL, root, path,
1265 inode->i_ino,
1266 iblock << inode->i_blkbits, 0);
1267 if (ret < 0) {
1268 err = ret;
1269 goto out;
1270 }
1271
1272 if (ret != 0) {
1273 if (path->slots[0] == 0) {
1274 btrfs_release_path(root, path);
1275 goto not_found;
1276 }
1277 path->slots[0]--;
1278 }
1279
1280 item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
1281 struct btrfs_file_extent_item);
1282 leaf = btrfs_buffer_leaf(path->nodes[0]);
1283 blocknr = btrfs_file_extent_disk_blocknr(item);
1284 blocknr += btrfs_file_extent_offset(item);
1285
1286 /* are we inside the extent that was found? */
1287 found_key = &leaf->items[path->slots[0]].key;
1288 found_type = btrfs_disk_key_type(found_key);
1289 if (btrfs_disk_key_objectid(found_key) != objectid ||
1290 found_type != BTRFS_EXTENT_DATA_KEY) {
1291 extent_end = 0;
1292 extent_start = 0;
1293 goto not_found;
1294 }
1295 found_type = btrfs_file_extent_type(item);
1296 extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key);
1297 if (found_type == BTRFS_FILE_EXTENT_REG) {
1298 extent_start = extent_start >> inode->i_blkbits;
1299 extent_end = extent_start + btrfs_file_extent_num_blocks(item);
1300 err = 0;
1301 if (btrfs_file_extent_disk_blocknr(item) == 0)
1302 goto out;
1303 if (iblock >= extent_start && iblock < extent_end) {
1304 btrfs_map_bh_to_logical(root, result, blocknr +
1305 iblock - extent_start);
1306 goto out;
1307 }
1308 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1309 char *ptr;
1310 char *map;
1311 u32 size;
1312
1313 if (create & BTRFS_GET_BLOCK_NO_DIRECT) {
1314 err = -EINVAL;
1315 goto out;
1316 }
1317 size = btrfs_file_extent_inline_len(leaf->items +
1318 path->slots[0]);
1319 extent_end = (extent_start + size) >> inode->i_blkbits;
1320 extent_start >>= inode->i_blkbits;
1321 if (iblock < extent_start || iblock > extent_end) {
1322 goto not_found;
1323 }
1324 ptr = btrfs_file_extent_inline_start(item);
1325 map = kmap(result->b_page);
1326 memcpy(map, ptr, size);
1327 memset(map + size, 0, PAGE_CACHE_SIZE - size);
1328 flush_dcache_page(result->b_page);
1329 kunmap(result->b_page);
1330 set_buffer_uptodate(result);
1331 SetPageChecked(result->b_page);
1332 btrfs_map_bh_to_logical(root, result, 0);
1333 }
1334not_found:
1335 if (create & BTRFS_GET_BLOCK_CREATE) {
1336 struct btrfs_key ins;
1337 ret = btrfs_alloc_extent(trans, root, inode->i_ino,
1338 1, alloc_hint, (u64)-1,
1339 &ins, 1);
1340 BUG_ON(ret);
1341 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
1342 iblock << inode->i_blkbits,
1343 ins.objectid, ins.offset,
1344 ins.offset);
1345 BUG_ON(ret);
1346 SetPageChecked(result->b_page);
1347 btrfs_map_bh_to_logical(root, result, ins.objectid);
1348 }
1349out:
1350 if (trans)
1351 err = btrfs_end_transaction(trans, root);
1352 btrfs_free_path(path);
1353 return err;
1354}
1355
1356int btrfs_get_block(struct inode *inode, sector_t iblock,
1357 struct buffer_head *result, int create)
1358{
1359 int err;
1360 struct btrfs_root *root = BTRFS_I(inode)->root;
1361 mutex_lock(&root->fs_info->fs_mutex);
1362 err = btrfs_get_block_lock(inode, iblock, result, create);
1363 mutex_unlock(&root->fs_info->fs_mutex);
1364 return err;
1365}
1366
1367static int btrfs_get_block_bmap(struct inode *inode, sector_t iblock,
1368 struct buffer_head *result, int create)
1369{
1370 struct btrfs_root *root = BTRFS_I(inode)->root;
1371 mutex_lock(&root->fs_info->fs_mutex);
1372 btrfs_get_block_lock(inode, iblock, result, BTRFS_GET_BLOCK_NO_DIRECT);
1373 mutex_unlock(&root->fs_info->fs_mutex);
1374 return 0;
1375}
1376
1377static sector_t btrfs_bmap(struct address_space *as, sector_t block)
1378{
1379 return generic_block_bmap(as, block, btrfs_get_block_bmap);
1380}
1381
1382static int btrfs_prepare_write(struct file *file, struct page *page,
1383 unsigned from, unsigned to)
1384{
1385 return block_prepare_write(page, from, to, btrfs_get_block);
1386}
1387
1388static int btrfs_readpage(struct file *file, struct page *page)
1389{
1390 return mpage_readpage(page, btrfs_get_block);
1391}
1392
1393/*
1394 * Aside from a tiny bit of packed file data handling, this is the
1395 * same as the generic code.
1396 *
1397 * While block_write_full_page is writing back the dirty buffers under
1398 * the page lock, whoever dirtied the buffers may decide to clean them
1399 * again at any time. We handle that by only looking at the buffer
1400 * state inside lock_buffer().
1401 *
1402 * If block_write_full_page() is called for regular writeback
1403 * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
1404 * locked buffer. This only can happen if someone has written the buffer
1405 * directly, with submit_bh(). At the address_space level PageWriteback
1406 * prevents this contention from occurring.
1407 */
1408static int __btrfs_write_full_page(struct inode *inode, struct page *page,
1409 struct writeback_control *wbc)
1410{
1411 int err;
1412 sector_t block;
1413 sector_t last_block;
1414 struct buffer_head *bh, *head;
1415 const unsigned blocksize = 1 << inode->i_blkbits;
1416 int nr_underway = 0;
1417
1418 BUG_ON(!PageLocked(page));
1419
1420 last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
1421
1422 if (!page_has_buffers(page)) {
1423 create_empty_buffers(page, blocksize,
1424 (1 << BH_Dirty)|(1 << BH_Uptodate));
1425 }
1426
1427 /*
1428 * Be very careful. We have no exclusion from __set_page_dirty_buffers
1429 * here, and the (potentially unmapped) buffers may become dirty at
1430 * any time. If a buffer becomes dirty here after we've inspected it
1431 * then we just miss that fact, and the page stays dirty.
1432 *
1433 * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
1434 * handle that here by just cleaning them.
1435 */
1436
1437 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1438 head = page_buffers(page);
1439 bh = head;
1440
1441 /*
1442 * Get all the dirty buffers mapped to disk addresses and
1443 * handle any aliases from the underlying blockdev's mapping.
1444 */
1445 do {
1446 if (block > last_block) {
1447 /*
1448 * mapped buffers outside i_size will occur, because
1449 * this page can be outside i_size when there is a
1450 * truncate in progress.
1451 */
1452 /*
1453 * The buffer was zeroed by block_write_full_page()
1454 */
1455 clear_buffer_dirty(bh);
1456 set_buffer_uptodate(bh);
1457 } else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
1458 WARN_ON(bh->b_size != blocksize);
1459 err = btrfs_get_block(inode, block, bh, 0);
1460 if (err) {
1461 goto recover;
1462 }
1463 if (buffer_new(bh)) {
1464 /* blockdev mappings never come here */
1465 clear_buffer_new(bh);
1466 }
1467 }
1468 bh = bh->b_this_page;
1469 block++;
1470 } while (bh != head);
1471
1472 do {
1473 if (!buffer_mapped(bh))
1474 continue;
1475 /*
1476 * If it's a fully non-blocking write attempt and we cannot
1477 * lock the buffer then redirty the page. Note that this can
1478 * potentially cause a busy-wait loop from pdflush and kswapd
1479 * activity, but those code paths have their own higher-level
1480 * throttling.
1481 */
1482 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
1483 lock_buffer(bh);
1484 } else if (test_set_buffer_locked(bh)) {
1485 redirty_page_for_writepage(wbc, page);
1486 continue;
1487 }
1488 if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) {
1489 mark_buffer_async_write(bh);
1490 } else {
1491 unlock_buffer(bh);
1492 }
1493 } while ((bh = bh->b_this_page) != head);
1494
1495 /*
1496 * The page and its buffers are protected by PageWriteback(), so we can
1497 * drop the bh refcounts early.
1498 */
1499 BUG_ON(PageWriteback(page));
1500 set_page_writeback(page);
1501
1502 do {
1503 struct buffer_head *next = bh->b_this_page;
1504 if (buffer_async_write(bh)) {
1505 submit_bh(WRITE, bh);
1506 nr_underway++;
1507 }
1508 bh = next;
1509 } while (bh != head);
1510 unlock_page(page);
1511
1512 err = 0;
1513done:
1514 if (nr_underway == 0) {
1515 /*
1516 * The page was marked dirty, but the buffers were
1517 * clean. Someone wrote them back by hand with
1518 * ll_rw_block/submit_bh. A rare case.
1519 */
1520 int uptodate = 1;
1521 do {
1522 if (!buffer_uptodate(bh)) {
1523 uptodate = 0;
1524 break;
1525 }
1526 bh = bh->b_this_page;
1527 } while (bh != head);
1528 if (uptodate)
1529 SetPageUptodate(page);
1530 end_page_writeback(page);
1531 }
1532 return err;
1533
1534recover:
1535 /*
1536 * ENOSPC, or some other error. We may already have added some
1537 * blocks to the file, so we need to write these out to avoid
1538 * exposing stale data.
1539 * The page is currently locked and not marked for writeback
1540 */
1541 bh = head;
1542 /* Recovery: lock and submit the mapped buffers */
1543 do {
1544 if (buffer_mapped(bh) && buffer_dirty(bh)) {
1545 lock_buffer(bh);
1546 mark_buffer_async_write(bh);
1547 } else {
1548 /*
1549 * The buffer may have been set dirty during
1550 * attachment to a dirty page.
1551 */
1552 clear_buffer_dirty(bh);
1553 }
1554 } while ((bh = bh->b_this_page) != head);
1555 SetPageError(page);
1556 BUG_ON(PageWriteback(page));
1557 set_page_writeback(page);
1558 do {
1559 struct buffer_head *next = bh->b_this_page;
1560 if (buffer_async_write(bh)) {
1561 clear_buffer_dirty(bh);
1562 submit_bh(WRITE, bh);
1563 nr_underway++;
1564 }
1565 bh = next;
1566 } while (bh != head);
1567 unlock_page(page);
1568 goto done;
1569}
1570
1571static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
1572{
1573 struct inode * const inode = page->mapping->host;
1574 loff_t i_size = i_size_read(inode);
1575 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
1576 unsigned offset;
1577 void *kaddr;
1578
1579 /* Is the page fully inside i_size? */
1580 if (page->index < end_index)
1581 return __btrfs_write_full_page(inode, page, wbc);
1582
1583 /* Is the page fully outside i_size? (truncate in progress) */
1584 offset = i_size & (PAGE_CACHE_SIZE-1);
1585 if (page->index >= end_index+1 || !offset) {
1586 /*
1587 * The page may have dirty, unmapped buffers. For example,
1588 * they may have been added in ext3_writepage(). Make them
1589 * freeable here, so the page does not leak.
1590 */
1591 block_invalidatepage(page, 0);
1592 unlock_page(page);
1593 return 0; /* don't care */
1594 }
1595
1596 /*
1597 * The page straddles i_size. It must be zeroed out on each and every
1598 * writepage invokation because it may be mmapped. "A file is mapped
1599 * in multiples of the page size. For a file that is not a multiple of
1600 * the page size, the remaining memory is zeroed when mapped, and
1601 * writes to that region are not written out to the file."
1602 */
1603 kaddr = kmap_atomic(page, KM_USER0);
1604 memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
1605 flush_dcache_page(page);
1606 kunmap_atomic(kaddr, KM_USER0);
1607 return __btrfs_write_full_page(inode, page, wbc);
1608}
1609
1610static void btrfs_truncate(struct inode *inode)
1611{
1612 struct btrfs_root *root = BTRFS_I(inode)->root;
1613 int ret;
1614 struct btrfs_trans_handle *trans;
1615
1616 if (!S_ISREG(inode->i_mode))
1617 return;
1618 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1619 return;
1620
1621 btrfs_truncate_page(inode->i_mapping, inode->i_size);
1622
1623 mutex_lock(&root->fs_info->fs_mutex);
1624 trans = btrfs_start_transaction(root, 1);
1625 btrfs_set_trans_block_group(trans, inode);
1626
1627 /* FIXME, add redo link to tree so we don't leak on crash */
1628 ret = btrfs_truncate_in_trans(trans, root, inode);
1629 BUG_ON(ret);
1630 btrfs_update_inode(trans, root, inode);
1631 ret = btrfs_end_transaction(trans, root);
1632 BUG_ON(ret);
1633 mutex_unlock(&root->fs_info->fs_mutex);
1634 btrfs_btree_balance_dirty(root);
1635}
1636
1637int btrfs_commit_write(struct file *file, struct page *page,
1638 unsigned from, unsigned to)
1639{
1640 struct inode *inode = page->mapping->host;
1641 struct buffer_head *bh;
1642 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
1643
1644 SetPageUptodate(page);
1645 bh = page_buffers(page);
1646 set_buffer_uptodate(bh);
1647 if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1648 set_page_dirty(page);
1649 }
1650 if (pos > inode->i_size) {
1651 i_size_write(inode, pos);
1652 mark_inode_dirty(inode);
1653 }
1654 return 0;
1655}
1656
1657static int create_subvol(struct btrfs_root *root, char *name, int namelen)
1658{
1659 struct btrfs_trans_handle *trans;
1660 struct btrfs_key key;
1661 struct btrfs_root_item root_item;
1662 struct btrfs_inode_item *inode_item;
1663 struct buffer_head *subvol;
1664 struct btrfs_leaf *leaf;
1665 struct btrfs_root *new_root;
1666 struct inode *inode;
1667 struct inode *dir;
1668 int ret;
1669 u64 objectid;
1670 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
1671
1672 mutex_lock(&root->fs_info->fs_mutex);
1673 trans = btrfs_start_transaction(root, 1);
1674 BUG_ON(!trans);
1675
1676 subvol = btrfs_alloc_free_block(trans, root, 0);
1677 if (subvol == NULL)
1678 return -ENOSPC;
1679 leaf = btrfs_buffer_leaf(subvol);
1680 btrfs_set_header_nritems(&leaf->header, 0);
1681 btrfs_set_header_level(&leaf->header, 0);
1682 btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol));
1683 btrfs_set_header_generation(&leaf->header, trans->transid);
1684 btrfs_set_header_owner(&leaf->header, root->root_key.objectid);
1685 memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid,
1686 sizeof(leaf->header.fsid));
1687 mark_buffer_dirty(subvol);
1688
1689 inode_item = &root_item.inode;
1690 memset(inode_item, 0, sizeof(*inode_item));
1691 btrfs_set_inode_generation(inode_item, 1);
1692 btrfs_set_inode_size(inode_item, 3);
1693 btrfs_set_inode_nlink(inode_item, 1);
1694 btrfs_set_inode_nblocks(inode_item, 1);
1695 btrfs_set_inode_mode(inode_item, S_IFDIR | 0755);
1696
1697 btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol));
1698 btrfs_set_root_refs(&root_item, 1);
1699 brelse(subvol);
1700 subvol = NULL;
1701
1702 ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
1703 0, &objectid);
1704 BUG_ON(ret);
1705
1706 btrfs_set_root_dirid(&root_item, new_dirid);
1707
1708 key.objectid = objectid;
1709 key.offset = 1;
1710 key.flags = 0;
1711 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
1712 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
1713 &root_item);
1714 BUG_ON(ret);
1715
1716 /*
1717 * insert the directory item
1718 */
1719 key.offset = (u64)-1;
1720 dir = root->fs_info->sb->s_root->d_inode;
1721 ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
1722 name, namelen, dir->i_ino, &key,
1723 BTRFS_FT_DIR);
1724 BUG_ON(ret);
1725
1726 ret = btrfs_commit_transaction(trans, root);
1727 BUG_ON(ret);
1728
1729 new_root = btrfs_read_fs_root(root->fs_info, &key);
1730 BUG_ON(!new_root);
1731
1732 trans = btrfs_start_transaction(new_root, 1);
1733 BUG_ON(!trans);
1734
1735 inode = btrfs_new_inode(trans, new_root, new_dirid,
1736 BTRFS_I(dir)->block_group, S_IFDIR | 0700);
1737 inode->i_op = &btrfs_dir_inode_operations;
1738 inode->i_fop = &btrfs_dir_file_operations;
1739
1740 ret = btrfs_make_empty_dir(trans, new_root, new_dirid, new_dirid);
1741 BUG_ON(ret);
1742
1743 inode->i_nlink = 1;
1744 inode->i_size = 6;
1745 ret = btrfs_update_inode(trans, new_root, inode);
1746 BUG_ON(ret);
1747
1748 ret = btrfs_commit_transaction(trans, new_root);
1749 BUG_ON(ret);
1750
1751 iput(inode);
1752
1753 mutex_unlock(&root->fs_info->fs_mutex);
1754 btrfs_btree_balance_dirty(root);
1755 return 0;
1756}
1757
1758static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
1759{
1760 struct btrfs_trans_handle *trans;
1761 struct btrfs_key key;
1762 struct btrfs_root_item new_root_item;
1763 int ret;
1764 u64 objectid;
1765
1766 if (!root->ref_cows)
1767 return -EINVAL;
1768
1769 mutex_lock(&root->fs_info->fs_mutex);
1770 trans = btrfs_start_transaction(root, 1);
1771 BUG_ON(!trans);
1772
1773 ret = btrfs_update_inode(trans, root, root->inode);
1774 BUG_ON(ret);
1775
1776 ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
1777 0, &objectid);
1778 BUG_ON(ret);
1779
1780 memcpy(&new_root_item, &root->root_item,
1781 sizeof(new_root_item));
1782
1783 key.objectid = objectid;
1784 key.offset = 1;
1785 key.flags = 0;
1786 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
1787 btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node));
1788
1789 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
1790 &new_root_item);
1791 BUG_ON(ret);
1792
1793 /*
1794 * insert the directory item
1795 */
1796 key.offset = (u64)-1;
1797 ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
1798 name, namelen,
1799 root->fs_info->sb->s_root->d_inode->i_ino,
1800 &key, BTRFS_FT_DIR);
1801
1802 BUG_ON(ret);
1803
1804 ret = btrfs_inc_root_ref(trans, root);
1805 BUG_ON(ret);
1806
1807 ret = btrfs_commit_transaction(trans, root);
1808 BUG_ON(ret);
1809 mutex_unlock(&root->fs_info->fs_mutex);
1810 btrfs_btree_balance_dirty(root);
1811 return 0;
1812}
1813
1814int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int
1815 cmd, unsigned long arg)
1816{
1817 struct btrfs_root *root = BTRFS_I(inode)->root;
1818 struct btrfs_ioctl_vol_args vol_args;
1819 int ret = 0;
1820 struct btrfs_dir_item *di;
1821 int namelen;
1822 struct btrfs_path *path;
1823 u64 root_dirid;
1824
1825 switch (cmd) {
1826 case BTRFS_IOC_SNAP_CREATE:
1827 if (copy_from_user(&vol_args,
1828 (struct btrfs_ioctl_vol_args __user *)arg,
1829 sizeof(vol_args)))
1830 return -EFAULT;
1831 namelen = strlen(vol_args.name);
1832 if (namelen > BTRFS_VOL_NAME_MAX)
1833 return -EINVAL;
1834 path = btrfs_alloc_path();
1835 if (!path)
1836 return -ENOMEM;
1837 root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
1838 mutex_lock(&root->fs_info->fs_mutex);
1839 di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
1840 path, root_dirid,
1841 vol_args.name, namelen, 0);
1842 mutex_unlock(&root->fs_info->fs_mutex);
1843 btrfs_free_path(path);
1844 if (di && !IS_ERR(di))
1845 return -EEXIST;
1846
1847 if (root == root->fs_info->tree_root)
1848 ret = create_subvol(root, vol_args.name, namelen);
1849 else
1850 ret = create_snapshot(root, vol_args.name, namelen);
1851 WARN_ON(ret);
1852 break;
1853 default:
1854 return -ENOTTY;
1855 }
1856 return ret;
1857}
1858
1859#ifdef CONFIG_COMPAT
1860long btrfs_compat_ioctl(struct file *file, unsigned int cmd,
1861 unsigned long arg)
1862{
1863 struct inode *inode = file->f_path.dentry->d_inode;
1864 int ret;
1865 lock_kernel();
1866 ret = btrfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
1867 unlock_kernel();
1868 return ret;
1869
1870}
1871#endif
1872
1873/*
1874 * Called inside transaction, so use GFP_NOFS
1875 */
1876struct inode *btrfs_alloc_inode(struct super_block *sb)
1877{
1878 struct btrfs_inode *ei;
1879
1880 ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
1881 if (!ei)
1882 return NULL;
1883 return &ei->vfs_inode;
1884}
1885
1886void btrfs_destroy_inode(struct inode *inode)
1887{
1888 WARN_ON(!list_empty(&inode->i_dentry));
1889 WARN_ON(inode->i_data.nrpages);
1890
1891 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
1892}
1893
1894static void init_once(void * foo, struct kmem_cache * cachep,
1895 unsigned long flags)
1896{
1897 struct btrfs_inode *ei = (struct btrfs_inode *) foo;
1898
1899 inode_init_once(&ei->vfs_inode);
1900}
1901
1902void btrfs_destroy_cachep(void)
1903{
1904 if (btrfs_inode_cachep)
1905 kmem_cache_destroy(btrfs_inode_cachep);
1906 if (btrfs_trans_handle_cachep)
1907 kmem_cache_destroy(btrfs_trans_handle_cachep);
1908 if (btrfs_transaction_cachep)
1909 kmem_cache_destroy(btrfs_transaction_cachep);
1910 if (btrfs_bit_radix_cachep)
1911 kmem_cache_destroy(btrfs_bit_radix_cachep);
1912 if (btrfs_path_cachep)
1913 kmem_cache_destroy(btrfs_path_cachep);
1914}
1915
1916int btrfs_init_cachep(void)
1917{
1918 btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache",
1919 sizeof(struct btrfs_inode),
1920 0, (SLAB_RECLAIM_ACCOUNT|
1921 SLAB_MEM_SPREAD),
1922 init_once, NULL);
1923 if (!btrfs_inode_cachep)
1924 goto fail;
1925 btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache",
1926 sizeof(struct btrfs_trans_handle),
1927 0, (SLAB_RECLAIM_ACCOUNT|
1928 SLAB_MEM_SPREAD),
1929 NULL, NULL);
1930 if (!btrfs_trans_handle_cachep)
1931 goto fail;
1932 btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache",
1933 sizeof(struct btrfs_transaction),
1934 0, (SLAB_RECLAIM_ACCOUNT|
1935 SLAB_MEM_SPREAD),
1936 NULL, NULL);
1937 if (!btrfs_transaction_cachep)
1938 goto fail;
1939 btrfs_path_cachep = kmem_cache_create("btrfs_path_cache",
1940 sizeof(struct btrfs_transaction),
1941 0, (SLAB_RECLAIM_ACCOUNT|
1942 SLAB_MEM_SPREAD),
1943 NULL, NULL);
1944 if (!btrfs_path_cachep)
1945 goto fail;
1946 btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix",
1947 256,
1948 0, (SLAB_RECLAIM_ACCOUNT|
1949 SLAB_MEM_SPREAD |
1950 SLAB_DESTROY_BY_RCU),
1951 NULL, NULL);
1952 if (!btrfs_bit_radix_cachep)
1953 goto fail;
1954 return 0;
1955fail:
1956 btrfs_destroy_cachep();
1957 return -ENOMEM;
1958}
1959
1960static int btrfs_getattr(struct vfsmount *mnt,
1961 struct dentry *dentry, struct kstat *stat)
1962{
1963 struct inode *inode = dentry->d_inode;
1964 generic_fillattr(inode, stat);
1965 stat->blksize = 256 * 1024;
1966 return 0;
1967}
1968
1969static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry,
1970 struct inode * new_dir,struct dentry *new_dentry)
1971{
1972 struct btrfs_trans_handle *trans;
1973 struct btrfs_root *root = BTRFS_I(old_dir)->root;
1974 struct inode *new_inode = new_dentry->d_inode;
1975 struct inode *old_inode = old_dentry->d_inode;
1976 struct timespec ctime = CURRENT_TIME;
1977 struct btrfs_path *path;
1978 struct btrfs_dir_item *di;
1979 int ret;
1980
1981 if (S_ISDIR(old_inode->i_mode) && new_inode &&
1982 new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
1983 return -ENOTEMPTY;
1984 }
1985 mutex_lock(&root->fs_info->fs_mutex);
1986 trans = btrfs_start_transaction(root, 1);
1987 btrfs_set_trans_block_group(trans, new_dir);
1988 path = btrfs_alloc_path();
1989 if (!path) {
1990 ret = -ENOMEM;
1991 goto out_fail;
1992 }
1993
1994 old_dentry->d_inode->i_nlink++;
1995 old_dir->i_ctime = old_dir->i_mtime = ctime;
1996 new_dir->i_ctime = new_dir->i_mtime = ctime;
1997 old_inode->i_ctime = ctime;
1998 if (S_ISDIR(old_inode->i_mode) && old_dir != new_dir) {
1999 struct btrfs_key *location = &BTRFS_I(new_dir)->location;
2000 u64 old_parent_oid;
2001 di = btrfs_lookup_dir_item(trans, root, path, old_inode->i_ino,
2002 "..", 2, -1);
2003 if (IS_ERR(di)) {
2004 ret = PTR_ERR(di);
2005 goto out_fail;
2006 }
2007 if (!di) {
2008 ret = -ENOENT;
2009 goto out_fail;
2010 }
2011 old_parent_oid = btrfs_disk_key_objectid(&di->location);
2012 ret = btrfs_del_item(trans, root, path);
2013 if (ret) {
2014 ret = -EIO;
2015 goto out_fail;
2016 }
2017 btrfs_release_path(root, path);
2018
2019 di = btrfs_lookup_dir_index_item(trans, root, path,
2020 old_inode->i_ino,
2021 old_parent_oid,
2022 "..", 2, -1);
2023 if (IS_ERR(di)) {
2024 ret = PTR_ERR(di);
2025 goto out_fail;
2026 }
2027 if (!di) {
2028 ret = -ENOENT;
2029 goto out_fail;
2030 }
2031 ret = btrfs_del_item(trans, root, path);
2032 if (ret) {
2033 ret = -EIO;
2034 goto out_fail;
2035 }
2036 btrfs_release_path(root, path);
2037
2038 ret = btrfs_insert_dir_item(trans, root, "..", 2,
2039 old_inode->i_ino, location,
2040 BTRFS_FT_DIR);
2041 if (ret)
2042 goto out_fail;
2043 }
2044
2045
2046 ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry);
2047 if (ret)
2048 goto out_fail;
2049
2050 if (new_inode) {
2051 new_inode->i_ctime = CURRENT_TIME;
2052 ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry);
2053 if (ret)
2054 goto out_fail;
2055 if (S_ISDIR(new_inode->i_mode))
2056 clear_nlink(new_inode);
2057 else
2058 drop_nlink(new_inode);
2059 btrfs_update_inode(trans, root, new_inode);
2060 }
2061 ret = btrfs_add_link(trans, new_dentry, old_inode);
2062 if (ret)
2063 goto out_fail;
2064
2065out_fail:
2066 btrfs_free_path(path);
2067 btrfs_end_transaction(trans, root);
2068 mutex_unlock(&root->fs_info->fs_mutex);
2069 return ret;
2070}
2071
2072static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
2073 const char *symname)
2074{
2075 struct btrfs_trans_handle *trans;
2076 struct btrfs_root *root = BTRFS_I(dir)->root;
2077 struct btrfs_path *path;
2078 struct btrfs_key key;
2079 struct inode *inode;
2080 int err;
2081 int drop_inode = 0;
2082 u64 objectid;
2083 int name_len;
2084 int datasize;
2085 char *ptr;
2086 struct btrfs_file_extent_item *ei;
2087
2088 name_len = strlen(symname) + 1;
2089 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
2090 return -ENAMETOOLONG;
2091 mutex_lock(&root->fs_info->fs_mutex);
2092 trans = btrfs_start_transaction(root, 1);
2093 btrfs_set_trans_block_group(trans, dir);
2094
2095 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
2096 if (err) {
2097 err = -ENOSPC;
2098 goto out_unlock;
2099 }
2100
2101 inode = btrfs_new_inode(trans, root, objectid,
2102 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO);
2103 err = PTR_ERR(inode);
2104 if (IS_ERR(inode))
2105 goto out_unlock;
2106
2107 btrfs_set_trans_block_group(trans, inode);
2108 err = btrfs_add_nondir(trans, dentry, inode);
2109 if (err)
2110 drop_inode = 1;
2111 else {
2112 inode->i_mapping->a_ops = &btrfs_aops;
2113 inode->i_fop = &btrfs_file_operations;
2114 inode->i_op = &btrfs_file_inode_operations;
2115 }
2116 dir->i_sb->s_dirt = 1;
2117 btrfs_update_inode_block_group(trans, inode);
2118 btrfs_update_inode_block_group(trans, dir);
2119 if (drop_inode)
2120 goto out_unlock;
2121
2122 path = btrfs_alloc_path();
2123 BUG_ON(!path);
2124 key.objectid = inode->i_ino;
2125 key.offset = 0;
2126 key.flags = 0;
2127 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
2128 datasize = btrfs_file_extent_calc_inline_size(name_len);
2129 err = btrfs_insert_empty_item(trans, root, path, &key,
2130 datasize);
2131 BUG_ON(err);
2132 ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
2133 path->slots[0], struct btrfs_file_extent_item);
2134 btrfs_set_file_extent_generation(ei, trans->transid);
2135 btrfs_set_file_extent_type(ei,
2136 BTRFS_FILE_EXTENT_INLINE);
2137 ptr = btrfs_file_extent_inline_start(ei);
2138 btrfs_memcpy(root, path->nodes[0]->b_data,
2139 ptr, symname, name_len);
2140 mark_buffer_dirty(path->nodes[0]);
2141 btrfs_free_path(path);
2142 inode->i_op = &btrfs_symlink_inode_operations;
2143 inode->i_mapping->a_ops = &btrfs_symlink_aops;
2144 inode->i_size = name_len - 1;
2145 btrfs_update_inode(trans, root, inode);
2146 err = 0;
2147
2148out_unlock:
2149 btrfs_end_transaction(trans, root);
2150 mutex_unlock(&root->fs_info->fs_mutex);
2151
2152 if (drop_inode) {
2153 inode_dec_link_count(inode);
2154 iput(inode);
2155 }
2156 btrfs_btree_balance_dirty(root);
2157 return err;
2158}
2159
2160static struct inode_operations btrfs_dir_inode_operations = {
2161 .lookup = btrfs_lookup,
2162 .create = btrfs_create,
2163 .unlink = btrfs_unlink,
2164 .link = btrfs_link,
2165 .mkdir = btrfs_mkdir,
2166 .rmdir = btrfs_rmdir,
2167 .rename = btrfs_rename,
2168 .symlink = btrfs_symlink,
2169 .setattr = btrfs_setattr,
2170};
2171
2172static struct inode_operations btrfs_dir_ro_inode_operations = {
2173 .lookup = btrfs_lookup,
2174};
2175
2176static struct file_operations btrfs_dir_file_operations = {
2177 .llseek = generic_file_llseek,
2178 .read = generic_read_dir,
2179 .readdir = btrfs_readdir,
2180 .ioctl = btrfs_ioctl,
2181#ifdef CONFIG_COMPAT
2182 .compat_ioctl = btrfs_compat_ioctl,
2183#endif
2184};
2185
2186static struct address_space_operations btrfs_aops = {
2187 .readpage = btrfs_readpage,
2188 .writepage = btrfs_writepage,
2189 .sync_page = block_sync_page,
2190 .prepare_write = btrfs_prepare_write,
2191 .commit_write = btrfs_commit_write,
2192 .bmap = btrfs_bmap,
2193};
2194
2195static struct address_space_operations btrfs_symlink_aops = {
2196 .readpage = btrfs_readpage,
2197 .writepage = btrfs_writepage,
2198};
2199
2200static struct inode_operations btrfs_file_inode_operations = {
2201 .truncate = btrfs_truncate,
2202 .getattr = btrfs_getattr,
2203 .setattr = btrfs_setattr,
2204};
2205
2206static struct inode_operations btrfs_symlink_inode_operations = {
2207 .readlink = generic_readlink,
2208 .follow_link = page_follow_link_light,
2209 .put_link = page_put_link,
2210};
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 9749090cfbb8..94b3535a2a11 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -20,873 +20,9 @@
20#include "ioctl.h" 20#include "ioctl.h"
21#include "print-tree.h" 21#include "print-tree.h"
22 22
23struct btrfs_iget_args {
24 u64 ino;
25 struct btrfs_root *root;
26};
27
28#define BTRFS_SUPER_MAGIC 0x9123682E 23#define BTRFS_SUPER_MAGIC 0x9123682E
29 24
30static struct inode_operations btrfs_dir_inode_operations;
31static struct inode_operations btrfs_symlink_inode_operations;
32static struct inode_operations btrfs_dir_ro_inode_operations;
33static struct super_operations btrfs_super_ops; 25static struct super_operations btrfs_super_ops;
34static struct file_operations btrfs_dir_file_operations;
35static struct inode_operations btrfs_file_inode_operations;
36static struct address_space_operations btrfs_aops;
37static struct address_space_operations btrfs_symlink_aops;
38static struct file_operations btrfs_file_operations;
39
40static int drop_extents(struct btrfs_trans_handle *trans,
41 struct btrfs_root *root,
42 struct inode *inode,
43 u64 start, u64 end, u64 *hint_block);
44static int btrfs_get_block(struct inode *inode, sector_t iblock,
45 struct buffer_head *result, int create);
46
47
48#define S_SHIFT 12
49static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
50 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
51 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
52 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
53 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
54 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
55 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
56 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
57};
58
59static void btrfs_read_locked_inode(struct inode *inode)
60{
61 struct btrfs_path *path;
62 struct btrfs_inode_item *inode_item;
63 struct btrfs_root *root = BTRFS_I(inode)->root;
64 struct btrfs_key location;
65 u64 alloc_group_block;
66 int ret;
67
68 path = btrfs_alloc_path();
69 BUG_ON(!path);
70 btrfs_init_path(path);
71 mutex_lock(&root->fs_info->fs_mutex);
72
73 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
74 ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
75 if (ret) {
76 btrfs_free_path(path);
77 goto make_bad;
78 }
79 inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
80 path->slots[0],
81 struct btrfs_inode_item);
82
83 inode->i_mode = btrfs_inode_mode(inode_item);
84 inode->i_nlink = btrfs_inode_nlink(inode_item);
85 inode->i_uid = btrfs_inode_uid(inode_item);
86 inode->i_gid = btrfs_inode_gid(inode_item);
87 inode->i_size = btrfs_inode_size(inode_item);
88 inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime);
89 inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime);
90 inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime);
91 inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime);
92 inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime);
93 inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime);
94 inode->i_blocks = btrfs_inode_nblocks(inode_item);
95 inode->i_generation = btrfs_inode_generation(inode_item);
96 alloc_group_block = btrfs_inode_block_group(inode_item);
97 BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info,
98 alloc_group_block);
99
100 btrfs_free_path(path);
101 inode_item = NULL;
102
103 mutex_unlock(&root->fs_info->fs_mutex);
104
105 switch (inode->i_mode & S_IFMT) {
106#if 0
107 default:
108 init_special_inode(inode, inode->i_mode,
109 btrfs_inode_rdev(inode_item));
110 break;
111#endif
112 case S_IFREG:
113 inode->i_mapping->a_ops = &btrfs_aops;
114 inode->i_fop = &btrfs_file_operations;
115 inode->i_op = &btrfs_file_inode_operations;
116 break;
117 case S_IFDIR:
118 inode->i_fop = &btrfs_dir_file_operations;
119 if (root == root->fs_info->tree_root)
120 inode->i_op = &btrfs_dir_ro_inode_operations;
121 else
122 inode->i_op = &btrfs_dir_inode_operations;
123 break;
124 case S_IFLNK:
125 inode->i_op = &btrfs_symlink_inode_operations;
126 inode->i_mapping->a_ops = &btrfs_symlink_aops;
127 break;
128 }
129 return;
130
131make_bad:
132 btrfs_release_path(root, path);
133 btrfs_free_path(path);
134 mutex_unlock(&root->fs_info->fs_mutex);
135 make_bad_inode(inode);
136}
137
138static void fill_inode_item(struct btrfs_inode_item *item,
139 struct inode *inode)
140{
141 btrfs_set_inode_uid(item, inode->i_uid);
142 btrfs_set_inode_gid(item, inode->i_gid);
143 btrfs_set_inode_size(item, inode->i_size);
144 btrfs_set_inode_mode(item, inode->i_mode);
145 btrfs_set_inode_nlink(item, inode->i_nlink);
146 btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec);
147 btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec);
148 btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec);
149 btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec);
150 btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec);
151 btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec);
152 btrfs_set_inode_nblocks(item, inode->i_blocks);
153 btrfs_set_inode_generation(item, inode->i_generation);
154 btrfs_set_inode_block_group(item,
155 BTRFS_I(inode)->block_group->key.objectid);
156}
157
158static int btrfs_update_inode(struct btrfs_trans_handle *trans,
159 struct btrfs_root *root,
160 struct inode *inode)
161{
162 struct btrfs_inode_item *inode_item;
163 struct btrfs_path *path;
164 int ret;
165
166 path = btrfs_alloc_path();
167 BUG_ON(!path);
168 btrfs_init_path(path);
169 ret = btrfs_lookup_inode(trans, root, path,
170 &BTRFS_I(inode)->location, 1);
171 if (ret) {
172 if (ret > 0)
173 ret = -ENOENT;
174 goto failed;
175 }
176
177 inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
178 path->slots[0],
179 struct btrfs_inode_item);
180
181 fill_inode_item(inode_item, inode);
182 btrfs_mark_buffer_dirty(path->nodes[0]);
183 ret = 0;
184failed:
185 btrfs_release_path(root, path);
186 btrfs_free_path(path);
187 return ret;
188}
189
190
191static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
192 struct btrfs_root *root,
193 struct inode *dir,
194 struct dentry *dentry)
195{
196 struct btrfs_path *path;
197 const char *name = dentry->d_name.name;
198 int name_len = dentry->d_name.len;
199 int ret = 0;
200 u64 objectid;
201 struct btrfs_dir_item *di;
202
203 path = btrfs_alloc_path();
204 BUG_ON(!path);
205 btrfs_init_path(path);
206 di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
207 name, name_len, -1);
208 if (IS_ERR(di)) {
209 ret = PTR_ERR(di);
210 goto err;
211 }
212 if (!di) {
213 ret = -ENOENT;
214 goto err;
215 }
216 objectid = btrfs_disk_key_objectid(&di->location);
217 ret = btrfs_delete_one_dir_name(trans, root, path, di);
218 BUG_ON(ret);
219 btrfs_release_path(root, path);
220
221 di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
222 objectid, name, name_len, -1);
223 if (IS_ERR(di)) {
224 ret = PTR_ERR(di);
225 goto err;
226 }
227 if (!di) {
228 ret = -ENOENT;
229 goto err;
230 }
231 ret = btrfs_delete_one_dir_name(trans, root, path, di);
232 BUG_ON(ret);
233
234 dentry->d_inode->i_ctime = dir->i_ctime;
235err:
236 btrfs_free_path(path);
237 if (!ret) {
238 dir->i_size -= name_len * 2;
239 btrfs_update_inode(trans, root, dir);
240 drop_nlink(dentry->d_inode);
241 btrfs_update_inode(trans, root, dentry->d_inode);
242 dir->i_sb->s_dirt = 1;
243 }
244 return ret;
245}
246
247static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
248{
249 struct btrfs_root *root;
250 struct btrfs_trans_handle *trans;
251 int ret;
252
253 root = BTRFS_I(dir)->root;
254 mutex_lock(&root->fs_info->fs_mutex);
255 trans = btrfs_start_transaction(root, 1);
256 btrfs_set_trans_block_group(trans, dir);
257 ret = btrfs_unlink_trans(trans, root, dir, dentry);
258 btrfs_end_transaction(trans, root);
259 mutex_unlock(&root->fs_info->fs_mutex);
260 btrfs_btree_balance_dirty(root);
261 return ret;
262}
263
264static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
265{
266 struct inode *inode = dentry->d_inode;
267 int err;
268 int ret;
269 struct btrfs_root *root = BTRFS_I(dir)->root;
270 struct btrfs_path *path;
271 struct btrfs_key key;
272 struct btrfs_trans_handle *trans;
273 struct btrfs_key found_key;
274 int found_type;
275 struct btrfs_leaf *leaf;
276 char *goodnames = "..";
277
278 path = btrfs_alloc_path();
279 BUG_ON(!path);
280 btrfs_init_path(path);
281 mutex_lock(&root->fs_info->fs_mutex);
282 trans = btrfs_start_transaction(root, 1);
283 btrfs_set_trans_block_group(trans, dir);
284 key.objectid = inode->i_ino;
285 key.offset = (u64)-1;
286 key.flags = (u32)-1;
287 while(1) {
288 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
289 if (ret < 0) {
290 err = ret;
291 goto out;
292 }
293 BUG_ON(ret == 0);
294 if (path->slots[0] == 0) {
295 err = -ENOENT;
296 goto out;
297 }
298 path->slots[0]--;
299 leaf = btrfs_buffer_leaf(path->nodes[0]);
300 btrfs_disk_key_to_cpu(&found_key,
301 &leaf->items[path->slots[0]].key);
302 found_type = btrfs_key_type(&found_key);
303 if (found_key.objectid != inode->i_ino) {
304 err = -ENOENT;
305 goto out;
306 }
307 if ((found_type != BTRFS_DIR_ITEM_KEY &&
308 found_type != BTRFS_DIR_INDEX_KEY) ||
309 (!btrfs_match_dir_item_name(root, path, goodnames, 2) &&
310 !btrfs_match_dir_item_name(root, path, goodnames, 1))) {
311 err = -ENOTEMPTY;
312 goto out;
313 }
314 ret = btrfs_del_item(trans, root, path);
315 BUG_ON(ret);
316
317 if (found_type == BTRFS_DIR_ITEM_KEY && found_key.offset == 1)
318 break;
319 btrfs_release_path(root, path);
320 }
321 ret = 0;
322 btrfs_release_path(root, path);
323
324 /* now the directory is empty */
325 err = btrfs_unlink_trans(trans, root, dir, dentry);
326 if (!err) {
327 inode->i_size = 0;
328 }
329out:
330 btrfs_release_path(root, path);
331 btrfs_free_path(path);
332 mutex_unlock(&root->fs_info->fs_mutex);
333 ret = btrfs_end_transaction(trans, root);
334 btrfs_btree_balance_dirty(root);
335 if (ret && !err)
336 err = ret;
337 return err;
338}
339
340static int btrfs_free_inode(struct btrfs_trans_handle *trans,
341 struct btrfs_root *root,
342 struct inode *inode)
343{
344 struct btrfs_path *path;
345 int ret;
346
347 clear_inode(inode);
348
349 path = btrfs_alloc_path();
350 BUG_ON(!path);
351 btrfs_init_path(path);
352 ret = btrfs_lookup_inode(trans, root, path,
353 &BTRFS_I(inode)->location, -1);
354 BUG_ON(ret);
355 ret = btrfs_del_item(trans, root, path);
356 BUG_ON(ret);
357 btrfs_free_path(path);
358 return ret;
359}
360
361static void reada_truncate(struct btrfs_root *root, struct btrfs_path *path,
362 u64 objectid)
363{
364 struct btrfs_node *node;
365 int i;
366 int nritems;
367 u64 item_objectid;
368 u64 blocknr;
369 int slot;
370 int ret;
371
372 if (!path->nodes[1])
373 return;
374 node = btrfs_buffer_node(path->nodes[1]);
375 slot = path->slots[1];
376 if (slot == 0)
377 return;
378 nritems = btrfs_header_nritems(&node->header);
379 for (i = slot - 1; i >= 0; i--) {
380 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
381 if (item_objectid != objectid)
382 break;
383 blocknr = btrfs_node_blockptr(node, i);
384 ret = readahead_tree_block(root, blocknr);
385 if (ret)
386 break;
387 }
388}
389
390static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
391 struct btrfs_root *root,
392 struct inode *inode)
393{
394 int ret;
395 struct btrfs_path *path;
396 struct btrfs_key key;
397 struct btrfs_disk_key *found_key;
398 u32 found_type;
399 struct btrfs_leaf *leaf;
400 struct btrfs_file_extent_item *fi;
401 u64 extent_start = 0;
402 u64 extent_num_blocks = 0;
403 u64 item_end = 0;
404 int found_extent;
405 int del_item;
406
407 path = btrfs_alloc_path();
408 BUG_ON(!path);
409 /* FIXME, add redo link to tree so we don't leak on crash */
410 key.objectid = inode->i_ino;
411 key.offset = (u64)-1;
412 key.flags = (u32)-1;
413 while(1) {
414 btrfs_init_path(path);
415 fi = NULL;
416 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
417 if (ret < 0) {
418 goto error;
419 }
420 if (ret > 0) {
421 BUG_ON(path->slots[0] == 0);
422 path->slots[0]--;
423 }
424 reada_truncate(root, path, inode->i_ino);
425 leaf = btrfs_buffer_leaf(path->nodes[0]);
426 found_key = &leaf->items[path->slots[0]].key;
427 found_type = btrfs_disk_key_type(found_key);
428 if (btrfs_disk_key_objectid(found_key) != inode->i_ino)
429 break;
430 if (found_type != BTRFS_CSUM_ITEM_KEY &&
431 found_type != BTRFS_DIR_ITEM_KEY &&
432 found_type != BTRFS_DIR_INDEX_KEY &&
433 found_type != BTRFS_EXTENT_DATA_KEY)
434 break;
435 item_end = btrfs_disk_key_offset(found_key);
436 if (found_type == BTRFS_EXTENT_DATA_KEY) {
437 fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
438 path->slots[0],
439 struct btrfs_file_extent_item);
440 if (btrfs_file_extent_type(fi) !=
441 BTRFS_FILE_EXTENT_INLINE) {
442 item_end += btrfs_file_extent_num_blocks(fi) <<
443 inode->i_blkbits;
444 }
445 }
446 if (found_type == BTRFS_CSUM_ITEM_KEY) {
447 ret = btrfs_csum_truncate(trans, root, path,
448 inode->i_size);
449 BUG_ON(ret);
450 }
451 if (item_end < inode->i_size) {
452 if (found_type) {
453 btrfs_set_key_type(&key, found_type - 1);
454 continue;
455 }
456 break;
457 }
458 if (btrfs_disk_key_offset(found_key) >= inode->i_size)
459 del_item = 1;
460 else
461 del_item = 0;
462 found_extent = 0;
463
464 if (found_type == BTRFS_EXTENT_DATA_KEY &&
465 btrfs_file_extent_type(fi) !=
466 BTRFS_FILE_EXTENT_INLINE) {
467 u64 num_dec;
468 if (!del_item) {
469 u64 orig_num_blocks =
470 btrfs_file_extent_num_blocks(fi);
471 extent_num_blocks = inode->i_size -
472 btrfs_disk_key_offset(found_key) +
473 root->blocksize - 1;
474 extent_num_blocks >>= inode->i_blkbits;
475 btrfs_set_file_extent_num_blocks(fi,
476 extent_num_blocks);
477 inode->i_blocks -= (orig_num_blocks -
478 extent_num_blocks) << 3;
479 mark_buffer_dirty(path->nodes[0]);
480 } else {
481 extent_start =
482 btrfs_file_extent_disk_blocknr(fi);
483 extent_num_blocks =
484 btrfs_file_extent_disk_num_blocks(fi);
485 /* FIXME blocksize != 4096 */
486 num_dec = btrfs_file_extent_num_blocks(fi) << 3;
487 if (extent_start != 0) {
488 found_extent = 1;
489 inode->i_blocks -= num_dec;
490 }
491 }
492 }
493 if (del_item) {
494 ret = btrfs_del_item(trans, root, path);
495 BUG_ON(ret);
496 } else {
497 break;
498 }
499 btrfs_release_path(root, path);
500 if (found_extent) {
501 ret = btrfs_free_extent(trans, root, extent_start,
502 extent_num_blocks, 0);
503 BUG_ON(ret);
504 }
505 }
506 ret = 0;
507error:
508 btrfs_release_path(root, path);
509 btrfs_free_path(path);
510 inode->i_sb->s_dirt = 1;
511 return ret;
512}
513
514static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
515{
516 struct inode *inode = mapping->host;
517 unsigned blocksize = 1 << inode->i_blkbits;
518 pgoff_t index = from >> PAGE_CACHE_SHIFT;
519 unsigned offset = from & (PAGE_CACHE_SIZE-1);
520 struct page *page;
521 char *kaddr;
522 int ret = 0;
523 struct btrfs_root *root = BTRFS_I(inode)->root;
524 u64 alloc_hint;
525 struct btrfs_key ins;
526 struct btrfs_trans_handle *trans;
527
528 if ((offset & (blocksize - 1)) == 0)
529 goto out;
530
531 ret = -ENOMEM;
532 page = grab_cache_page(mapping, index);
533 if (!page)
534 goto out;
535
536 if (!PageUptodate(page)) {
537 ret = mpage_readpage(page, btrfs_get_block);
538 lock_page(page);
539 if (!PageUptodate(page)) {
540 ret = -EIO;
541 goto out;
542 }
543 }
544 mutex_lock(&root->fs_info->fs_mutex);
545 trans = btrfs_start_transaction(root, 1);
546 btrfs_set_trans_block_group(trans, inode);
547
548 ret = drop_extents(trans, root, inode, page->index << PAGE_CACHE_SHIFT,
549 (page->index + 1) << PAGE_CACHE_SHIFT, &alloc_hint);
550 BUG_ON(ret);
551 ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1,
552 alloc_hint, (u64)-1, &ins, 1);
553 BUG_ON(ret);
554 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
555 page->index << PAGE_CACHE_SHIFT,
556 ins.objectid, 1, 1);
557 BUG_ON(ret);
558 SetPageChecked(page);
559 kaddr = kmap(page);
560 memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
561 flush_dcache_page(page);
562 btrfs_csum_file_block(trans, root, inode->i_ino,
563 page->index << PAGE_CACHE_SHIFT,
564 kaddr, PAGE_CACHE_SIZE);
565 kunmap(page);
566 btrfs_end_transaction(trans, root);
567 mutex_unlock(&root->fs_info->fs_mutex);
568
569 set_page_dirty(page);
570 unlock_page(page);
571 page_cache_release(page);
572out:
573 return ret;
574}
575
576static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
577{
578 struct inode *inode = dentry->d_inode;
579 int err;
580
581 err = inode_change_ok(inode, attr);
582 if (err)
583 return err;
584
585 if (S_ISREG(inode->i_mode) &&
586 attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) {
587 struct btrfs_trans_handle *trans;
588 struct btrfs_root *root = BTRFS_I(inode)->root;
589 u64 mask = root->blocksize - 1;
590 u64 pos = (inode->i_size + mask) & ~mask;
591 u64 hole_size;
592
593 if (attr->ia_size <= pos)
594 goto out;
595
596 btrfs_truncate_page(inode->i_mapping, inode->i_size);
597
598 hole_size = (attr->ia_size - pos + mask) & ~mask;
599 hole_size >>= inode->i_blkbits;
600
601 mutex_lock(&root->fs_info->fs_mutex);
602 trans = btrfs_start_transaction(root, 1);
603 btrfs_set_trans_block_group(trans, inode);
604 err = btrfs_insert_file_extent(trans, root, inode->i_ino,
605 pos, 0, 0, hole_size);
606 BUG_ON(err);
607 btrfs_end_transaction(trans, root);
608 mutex_unlock(&root->fs_info->fs_mutex);
609 }
610out:
611 err = inode_setattr(inode, attr);
612
613 return err;
614}
615static void btrfs_delete_inode(struct inode *inode)
616{
617 struct btrfs_trans_handle *trans;
618 struct btrfs_root *root = BTRFS_I(inode)->root;
619 int ret;
620
621 truncate_inode_pages(&inode->i_data, 0);
622 if (is_bad_inode(inode)) {
623 goto no_delete;
624 }
625 inode->i_size = 0;
626 mutex_lock(&root->fs_info->fs_mutex);
627 trans = btrfs_start_transaction(root, 1);
628 btrfs_set_trans_block_group(trans, inode);
629 ret = btrfs_truncate_in_trans(trans, root, inode);
630 BUG_ON(ret);
631 btrfs_free_inode(trans, root, inode);
632 btrfs_end_transaction(trans, root);
633 mutex_unlock(&root->fs_info->fs_mutex);
634 btrfs_btree_balance_dirty(root);
635 return;
636no_delete:
637 clear_inode(inode);
638}
639
640static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
641 struct btrfs_key *location)
642{
643 const char *name = dentry->d_name.name;
644 int namelen = dentry->d_name.len;
645 struct btrfs_dir_item *di;
646 struct btrfs_path *path;
647 struct btrfs_root *root = BTRFS_I(dir)->root;
648 int ret;
649
650 path = btrfs_alloc_path();
651 BUG_ON(!path);
652 btrfs_init_path(path);
653 di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
654 namelen, 0);
655 if (!di || IS_ERR(di)) {
656 location->objectid = 0;
657 ret = 0;
658 goto out;
659 }
660 btrfs_disk_key_to_cpu(location, &di->location);
661out:
662 btrfs_release_path(root, path);
663 btrfs_free_path(path);
664 return ret;
665}
666
667static int fixup_tree_root_location(struct btrfs_root *root,
668 struct btrfs_key *location,
669 struct btrfs_root **sub_root)
670{
671 struct btrfs_path *path;
672 struct btrfs_root_item *ri;
673
674 if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
675 return 0;
676 if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
677 return 0;
678
679 path = btrfs_alloc_path();
680 BUG_ON(!path);
681 mutex_lock(&root->fs_info->fs_mutex);
682
683 *sub_root = btrfs_read_fs_root(root->fs_info, location);
684 if (IS_ERR(*sub_root))
685 return PTR_ERR(*sub_root);
686
687 ri = &(*sub_root)->root_item;
688 location->objectid = btrfs_root_dirid(ri);
689 location->flags = 0;
690 btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
691 location->offset = 0;
692
693 btrfs_free_path(path);
694 mutex_unlock(&root->fs_info->fs_mutex);
695 return 0;
696}
697
698static int btrfs_init_locked_inode(struct inode *inode, void *p)
699{
700 struct btrfs_iget_args *args = p;
701 inode->i_ino = args->ino;
702 BTRFS_I(inode)->root = args->root;
703 return 0;
704}
705
706static int btrfs_find_actor(struct inode *inode, void *opaque)
707{
708 struct btrfs_iget_args *args = opaque;
709 return (args->ino == inode->i_ino &&
710 args->root == BTRFS_I(inode)->root);
711}
712
713static struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
714 struct btrfs_root *root)
715{
716 struct inode *inode;
717 struct btrfs_iget_args args;
718 args.ino = objectid;
719 args.root = root;
720
721 inode = iget5_locked(s, objectid, btrfs_find_actor,
722 btrfs_init_locked_inode,
723 (void *)&args);
724 return inode;
725}
726
727static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
728 struct nameidata *nd)
729{
730 struct inode * inode;
731 struct btrfs_inode *bi = BTRFS_I(dir);
732 struct btrfs_root *root = bi->root;
733 struct btrfs_root *sub_root = root;
734 struct btrfs_key location;
735 int ret;
736
737 if (dentry->d_name.len > BTRFS_NAME_LEN)
738 return ERR_PTR(-ENAMETOOLONG);
739 mutex_lock(&root->fs_info->fs_mutex);
740 ret = btrfs_inode_by_name(dir, dentry, &location);
741 mutex_unlock(&root->fs_info->fs_mutex);
742 if (ret < 0)
743 return ERR_PTR(ret);
744 inode = NULL;
745 if (location.objectid) {
746 ret = fixup_tree_root_location(root, &location, &sub_root);
747 if (ret < 0)
748 return ERR_PTR(ret);
749 if (ret > 0)
750 return ERR_PTR(-ENOENT);
751 inode = btrfs_iget_locked(dir->i_sb, location.objectid,
752 sub_root);
753 if (!inode)
754 return ERR_PTR(-EACCES);
755 if (inode->i_state & I_NEW) {
756 if (sub_root != root) {
757printk("adding new root for inode %lu root %p (found %p)\n", inode->i_ino, sub_root, BTRFS_I(inode)->root);
758 igrab(inode);
759 sub_root->inode = inode;
760 }
761 BTRFS_I(inode)->root = sub_root;
762 memcpy(&BTRFS_I(inode)->location, &location,
763 sizeof(location));
764 btrfs_read_locked_inode(inode);
765 unlock_new_inode(inode);
766 }
767 }
768 return d_splice_alias(inode, dentry);
769}
770
771static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path,
772 u64 objectid)
773{
774 struct btrfs_node *node;
775 int i;
776 u32 nritems;
777 u64 item_objectid;
778 u64 blocknr;
779 int slot;
780 int ret;
781
782 if (!path->nodes[1])
783 return;
784 node = btrfs_buffer_node(path->nodes[1]);
785 slot = path->slots[1];
786 nritems = btrfs_header_nritems(&node->header);
787 for (i = slot + 1; i < nritems; i++) {
788 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
789 if (item_objectid != objectid)
790 break;
791 blocknr = btrfs_node_blockptr(node, i);
792 ret = readahead_tree_block(root, blocknr);
793 if (ret)
794 break;
795 }
796}
797static unsigned char btrfs_filetype_table[] = {
798 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
799};
800
801static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
802{
803 struct inode *inode = filp->f_path.dentry->d_inode;
804 struct btrfs_root *root = BTRFS_I(inode)->root;
805 struct btrfs_item *item;
806 struct btrfs_dir_item *di;
807 struct btrfs_key key;
808 struct btrfs_path *path;
809 int ret;
810 u32 nritems;
811 struct btrfs_leaf *leaf;
812 int slot;
813 int advance;
814 unsigned char d_type;
815 int over = 0;
816 u32 di_cur;
817 u32 di_total;
818 u32 di_len;
819 int key_type = BTRFS_DIR_INDEX_KEY;
820
821 /* FIXME, use a real flag for deciding about the key type */
822 if (root->fs_info->tree_root == root)
823 key_type = BTRFS_DIR_ITEM_KEY;
824 mutex_lock(&root->fs_info->fs_mutex);
825 key.objectid = inode->i_ino;
826 key.flags = 0;
827 btrfs_set_key_type(&key, key_type);
828 key.offset = filp->f_pos;
829 path = btrfs_alloc_path();
830 btrfs_init_path(path);
831 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
832 if (ret < 0)
833 goto err;
834 advance = 0;
835 reada_leaves(root, path, inode->i_ino);
836 while(1) {
837 leaf = btrfs_buffer_leaf(path->nodes[0]);
838 nritems = btrfs_header_nritems(&leaf->header);
839 slot = path->slots[0];
840 if (advance || slot >= nritems) {
841 if (slot >= nritems -1) {
842 reada_leaves(root, path, inode->i_ino);
843 ret = btrfs_next_leaf(root, path);
844 if (ret)
845 break;
846 leaf = btrfs_buffer_leaf(path->nodes[0]);
847 nritems = btrfs_header_nritems(&leaf->header);
848 slot = path->slots[0];
849 } else {
850 slot++;
851 path->slots[0]++;
852 }
853 }
854 advance = 1;
855 item = leaf->items + slot;
856 if (btrfs_disk_key_objectid(&item->key) != key.objectid)
857 break;
858 if (btrfs_disk_key_type(&item->key) != key_type)
859 break;
860 if (btrfs_disk_key_offset(&item->key) < filp->f_pos)
861 continue;
862 filp->f_pos = btrfs_disk_key_offset(&item->key);
863 advance = 1;
864 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
865 di_cur = 0;
866 di_total = btrfs_item_size(leaf->items + slot);
867 while(di_cur < di_total) {
868 d_type = btrfs_filetype_table[btrfs_dir_type(di)];
869 over = filldir(dirent, (const char *)(di + 1),
870 btrfs_dir_name_len(di),
871 btrfs_disk_key_offset(&item->key),
872 btrfs_disk_key_objectid(&di->location),
873 d_type);
874 if (over)
875 goto nopos;
876 di_len = btrfs_dir_name_len(di) + sizeof(*di);
877 di_cur += di_len;
878 di = (struct btrfs_dir_item *)((char *)di + di_len);
879 }
880 }
881 filp->f_pos++;
882nopos:
883 ret = 0;
884err:
885 btrfs_release_path(root, path);
886 btrfs_free_path(path);
887 mutex_unlock(&root->fs_info->fs_mutex);
888 return ret;
889}
890 26
891static void btrfs_put_super (struct super_block * sb) 27static void btrfs_put_super (struct super_block * sb)
892{ 28{
@@ -907,6 +43,7 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent)
907 struct btrfs_super_block *disk_super; 43 struct btrfs_super_block *disk_super;
908 struct btrfs_root *tree_root; 44 struct btrfs_root *tree_root;
909 struct btrfs_inode *bi; 45 struct btrfs_inode *bi;
46 int err;
910 47
911 sb->s_maxbytes = MAX_LFS_FILESIZE; 48 sb->s_maxbytes = MAX_LFS_FILESIZE;
912 sb->s_magic = BTRFS_SUPER_MAGIC; 49 sb->s_magic = BTRFS_SUPER_MAGIC;
@@ -915,16 +52,12 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent)
915 52
916 tree_root = open_ctree(sb); 53 tree_root = open_ctree(sb);
917 54
918 if (!tree_root) { 55 if (!tree_root || IS_ERR(tree_root)) {
919 printk("btrfs: open_ctree failed\n"); 56 printk("btrfs: open_ctree failed\n");
920 return -EIO; 57 return -EIO;
921 } 58 }
922 sb->s_fs_info = tree_root; 59 sb->s_fs_info = tree_root;
923 disk_super = tree_root->fs_info->disk_super; 60 disk_super = tree_root->fs_info->disk_super;
924 printk("read in super total blocks %Lu root %Lu\n",
925 btrfs_super_total_blocks(disk_super),
926 btrfs_super_root_dir(disk_super));
927
928 inode = btrfs_iget_locked(sb, btrfs_super_root_dir(disk_super), 61 inode = btrfs_iget_locked(sb, btrfs_super_root_dir(disk_super),
929 tree_root); 62 tree_root);
930 bi = BTRFS_I(inode); 63 bi = BTRFS_I(inode);
@@ -934,8 +67,10 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent)
934 bi->root = tree_root; 67 bi->root = tree_root;
935 btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY); 68 btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY);
936 69
937 if (!inode) 70 if (!inode) {
938 return -ENOMEM; 71 err = -ENOMEM;
72 goto fail_close;
73 }
939 if (inode->i_state & I_NEW) { 74 if (inode->i_state & I_NEW) {
940 btrfs_read_locked_inode(inode); 75 btrfs_read_locked_inode(inode);
941 unlock_new_inode(inode); 76 unlock_new_inode(inode);
@@ -944,323 +79,18 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent)
944 root_dentry = d_alloc_root(inode); 79 root_dentry = d_alloc_root(inode);
945 if (!root_dentry) { 80 if (!root_dentry) {
946 iput(inode); 81 iput(inode);
947 return -ENOMEM; 82 err = -ENOMEM;
83 goto fail_close;
948 } 84 }
949 sb->s_root = root_dentry; 85 sb->s_root = root_dentry;
950 btrfs_transaction_queue_work(tree_root, HZ * 30); 86 btrfs_transaction_queue_work(tree_root, HZ * 30);
951 return 0; 87 return 0;
952}
953 88
954static int btrfs_write_inode(struct inode *inode, int wait) 89fail_close:
955{ 90 close_ctree(tree_root);
956 struct btrfs_root *root = BTRFS_I(inode)->root;
957 struct btrfs_trans_handle *trans;
958 int ret = 0;
959
960 if (wait) {
961 mutex_lock(&root->fs_info->fs_mutex);
962 trans = btrfs_start_transaction(root, 1);
963 btrfs_set_trans_block_group(trans, inode);
964 ret = btrfs_commit_transaction(trans, root);
965 mutex_unlock(&root->fs_info->fs_mutex);
966 }
967 return ret;
968}
969
970static void btrfs_dirty_inode(struct inode *inode)
971{
972 struct btrfs_root *root = BTRFS_I(inode)->root;
973 struct btrfs_trans_handle *trans;
974
975 mutex_lock(&root->fs_info->fs_mutex);
976 trans = btrfs_start_transaction(root, 1);
977 btrfs_set_trans_block_group(trans, inode);
978 btrfs_update_inode(trans, root, inode);
979 btrfs_end_transaction(trans, root);
980 mutex_unlock(&root->fs_info->fs_mutex);
981 btrfs_btree_balance_dirty(root);
982}
983
984static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
985 struct btrfs_root *root,
986 u64 objectid,
987 struct btrfs_block_group_cache *group,
988 int mode)
989{
990 struct inode *inode;
991 struct btrfs_inode_item inode_item;
992 struct btrfs_key *location;
993 int ret;
994 int owner;
995
996 inode = new_inode(root->fs_info->sb);
997 if (!inode)
998 return ERR_PTR(-ENOMEM);
999
1000 BTRFS_I(inode)->root = root;
1001 if (mode & S_IFDIR)
1002 owner = 0;
1003 else
1004 owner = 1;
1005 group = btrfs_find_block_group(root, group, 0, 0, owner);
1006 BTRFS_I(inode)->block_group = group;
1007
1008 inode->i_uid = current->fsuid;
1009 inode->i_gid = current->fsgid;
1010 inode->i_mode = mode;
1011 inode->i_ino = objectid;
1012 inode->i_blocks = 0;
1013 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1014 fill_inode_item(&inode_item, inode);
1015 location = &BTRFS_I(inode)->location;
1016 location->objectid = objectid;
1017 location->flags = 0;
1018 location->offset = 0;
1019 btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
1020
1021 ret = btrfs_insert_inode(trans, root, objectid, &inode_item);
1022 BUG_ON(ret);
1023
1024 insert_inode_hash(inode);
1025 return inode;
1026}
1027
1028static inline u8 btrfs_inode_type(struct inode *inode)
1029{
1030 return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
1031}
1032
1033static int btrfs_add_link(struct btrfs_trans_handle *trans,
1034 struct dentry *dentry, struct inode *inode)
1035{
1036 int ret;
1037 struct btrfs_key key;
1038 struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
1039 key.objectid = inode->i_ino;
1040 key.flags = 0;
1041 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1042 key.offset = 0;
1043
1044 ret = btrfs_insert_dir_item(trans, root,
1045 dentry->d_name.name, dentry->d_name.len,
1046 dentry->d_parent->d_inode->i_ino,
1047 &key, btrfs_inode_type(inode));
1048 if (ret == 0) {
1049 dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2;
1050 ret = btrfs_update_inode(trans, root,
1051 dentry->d_parent->d_inode);
1052 }
1053 return ret;
1054}
1055
1056static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
1057 struct dentry *dentry, struct inode *inode)
1058{
1059 int err = btrfs_add_link(trans, dentry, inode);
1060 if (!err) {
1061 d_instantiate(dentry, inode);
1062 return 0;
1063 }
1064 if (err > 0)
1065 err = -EEXIST;
1066 return err; 91 return err;
1067} 92}
1068 93
1069static int btrfs_create(struct inode *dir, struct dentry *dentry,
1070 int mode, struct nameidata *nd)
1071{
1072 struct btrfs_trans_handle *trans;
1073 struct btrfs_root *root = BTRFS_I(dir)->root;
1074 struct inode *inode;
1075 int err;
1076 int drop_inode = 0;
1077 u64 objectid;
1078
1079 mutex_lock(&root->fs_info->fs_mutex);
1080 trans = btrfs_start_transaction(root, 1);
1081 btrfs_set_trans_block_group(trans, dir);
1082
1083 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1084 if (err) {
1085 err = -ENOSPC;
1086 goto out_unlock;
1087 }
1088
1089 inode = btrfs_new_inode(trans, root, objectid,
1090 BTRFS_I(dir)->block_group, mode);
1091 err = PTR_ERR(inode);
1092 if (IS_ERR(inode))
1093 goto out_unlock;
1094
1095 btrfs_set_trans_block_group(trans, inode);
1096 err = btrfs_add_nondir(trans, dentry, inode);
1097 if (err)
1098 drop_inode = 1;
1099 else {
1100 inode->i_mapping->a_ops = &btrfs_aops;
1101 inode->i_fop = &btrfs_file_operations;
1102 inode->i_op = &btrfs_file_inode_operations;
1103 }
1104 dir->i_sb->s_dirt = 1;
1105 btrfs_update_inode_block_group(trans, inode);
1106 btrfs_update_inode_block_group(trans, dir);
1107out_unlock:
1108 btrfs_end_transaction(trans, root);
1109 mutex_unlock(&root->fs_info->fs_mutex);
1110
1111 if (drop_inode) {
1112 inode_dec_link_count(inode);
1113 iput(inode);
1114 }
1115 btrfs_btree_balance_dirty(root);
1116 return err;
1117}
1118
1119static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
1120 struct dentry *dentry)
1121{
1122 struct btrfs_trans_handle *trans;
1123 struct btrfs_root *root = BTRFS_I(dir)->root;
1124 struct inode *inode = old_dentry->d_inode;
1125 int err;
1126 int drop_inode = 0;
1127
1128 if (inode->i_nlink == 0)
1129 return -ENOENT;
1130
1131 inc_nlink(inode);
1132 mutex_lock(&root->fs_info->fs_mutex);
1133 trans = btrfs_start_transaction(root, 1);
1134 btrfs_set_trans_block_group(trans, dir);
1135 atomic_inc(&inode->i_count);
1136 err = btrfs_add_nondir(trans, dentry, inode);
1137 if (err)
1138 drop_inode = 1;
1139 dir->i_sb->s_dirt = 1;
1140 btrfs_update_inode_block_group(trans, dir);
1141 btrfs_update_inode(trans, root, inode);
1142
1143 btrfs_end_transaction(trans, root);
1144 mutex_unlock(&root->fs_info->fs_mutex);
1145
1146 if (drop_inode) {
1147 inode_dec_link_count(inode);
1148 iput(inode);
1149 }
1150 btrfs_btree_balance_dirty(root);
1151 return err;
1152}
1153
1154static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans,
1155 struct btrfs_root *root,
1156 u64 objectid, u64 dirid)
1157{
1158 int ret;
1159 char buf[2];
1160 struct btrfs_key key;
1161
1162 buf[0] = '.';
1163 buf[1] = '.';
1164
1165 key.objectid = objectid;
1166 key.offset = 0;
1167 key.flags = 0;
1168 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1169
1170 ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid,
1171 &key, BTRFS_FT_DIR);
1172 if (ret)
1173 goto error;
1174 key.objectid = dirid;
1175 ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid,
1176 &key, BTRFS_FT_DIR);
1177 if (ret)
1178 goto error;
1179error:
1180 return ret;
1181}
1182
1183static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1184{
1185 struct inode *inode;
1186 struct btrfs_trans_handle *trans;
1187 struct btrfs_root *root = BTRFS_I(dir)->root;
1188 int err = 0;
1189 int drop_on_err = 0;
1190 u64 objectid;
1191
1192 mutex_lock(&root->fs_info->fs_mutex);
1193 trans = btrfs_start_transaction(root, 1);
1194 btrfs_set_trans_block_group(trans, dir);
1195 if (IS_ERR(trans)) {
1196 err = PTR_ERR(trans);
1197 goto out_unlock;
1198 }
1199
1200 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1201 if (err) {
1202 err = -ENOSPC;
1203 goto out_unlock;
1204 }
1205
1206 inode = btrfs_new_inode(trans, root, objectid,
1207 BTRFS_I(dir)->block_group, S_IFDIR | mode);
1208 if (IS_ERR(inode)) {
1209 err = PTR_ERR(inode);
1210 goto out_fail;
1211 }
1212 drop_on_err = 1;
1213 inode->i_op = &btrfs_dir_inode_operations;
1214 inode->i_fop = &btrfs_dir_file_operations;
1215 btrfs_set_trans_block_group(trans, inode);
1216
1217 err = btrfs_make_empty_dir(trans, root, inode->i_ino, dir->i_ino);
1218 if (err)
1219 goto out_fail;
1220
1221 inode->i_size = 6;
1222 err = btrfs_update_inode(trans, root, inode);
1223 if (err)
1224 goto out_fail;
1225 err = btrfs_add_link(trans, dentry, inode);
1226 if (err)
1227 goto out_fail;
1228 d_instantiate(dentry, inode);
1229 drop_on_err = 0;
1230 dir->i_sb->s_dirt = 1;
1231 btrfs_update_inode_block_group(trans, inode);
1232 btrfs_update_inode_block_group(trans, dir);
1233
1234out_fail:
1235 btrfs_end_transaction(trans, root);
1236out_unlock:
1237 mutex_unlock(&root->fs_info->fs_mutex);
1238 if (drop_on_err)
1239 iput(inode);
1240 btrfs_btree_balance_dirty(root);
1241 return err;
1242}
1243
1244static int btrfs_sync_file(struct file *file,
1245 struct dentry *dentry, int datasync)
1246{
1247 struct inode *inode = dentry->d_inode;
1248 struct btrfs_root *root = BTRFS_I(inode)->root;
1249 int ret;
1250 struct btrfs_trans_handle *trans;
1251
1252 mutex_lock(&root->fs_info->fs_mutex);
1253 trans = btrfs_start_transaction(root, 1);
1254 if (!trans) {
1255 ret = -ENOMEM;
1256 goto out;
1257 }
1258 ret = btrfs_commit_transaction(trans, root);
1259 mutex_unlock(&root->fs_info->fs_mutex);
1260out:
1261 return ret > 0 ? EIO : ret;
1262}
1263
1264static int btrfs_sync_fs(struct super_block *sb, int wait) 94static int btrfs_sync_fs(struct super_block *sb, int wait)
1265{ 95{
1266 struct btrfs_trans_handle *trans; 96 struct btrfs_trans_handle *trans;
@@ -1278,1430 +108,15 @@ static int btrfs_sync_fs(struct super_block *sb, int wait)
1278 ret = btrfs_commit_transaction(trans, root); 108 ret = btrfs_commit_transaction(trans, root);
1279 sb->s_dirt = 0; 109 sb->s_dirt = 0;
1280 BUG_ON(ret); 110 BUG_ON(ret);
1281printk("btrfs sync_fs\n");
1282 mutex_unlock(&root->fs_info->fs_mutex); 111 mutex_unlock(&root->fs_info->fs_mutex);
1283 return 0; 112 return 0;
1284} 113}
1285 114
1286#define BTRFS_GET_BLOCK_NO_CREATE 0
1287#define BTRFS_GET_BLOCK_CREATE 1
1288#define BTRFS_GET_BLOCK_NO_DIRECT 2
1289
1290static int btrfs_get_block_lock(struct inode *inode, sector_t iblock,
1291 struct buffer_head *result, int create)
1292{
1293 int ret;
1294 int err = 0;
1295 u64 blocknr;
1296 u64 extent_start = 0;
1297 u64 extent_end = 0;
1298 u64 objectid = inode->i_ino;
1299 u32 found_type;
1300 u64 alloc_hint = 0;
1301 struct btrfs_path *path;
1302 struct btrfs_root *root = BTRFS_I(inode)->root;
1303 struct btrfs_file_extent_item *item;
1304 struct btrfs_leaf *leaf;
1305 struct btrfs_disk_key *found_key;
1306 struct btrfs_trans_handle *trans = NULL;
1307
1308 path = btrfs_alloc_path();
1309 BUG_ON(!path);
1310 btrfs_init_path(path);
1311 if (create & BTRFS_GET_BLOCK_CREATE) {
1312 WARN_ON(1);
1313 /* this almost but not quite works */
1314 trans = btrfs_start_transaction(root, 1);
1315 if (!trans) {
1316 err = -ENOMEM;
1317 goto out;
1318 }
1319 ret = drop_extents(trans, root, inode,
1320 iblock << inode->i_blkbits,
1321 (iblock + 1) << inode->i_blkbits,
1322 &alloc_hint);
1323 BUG_ON(ret);
1324 }
1325
1326 ret = btrfs_lookup_file_extent(NULL, root, path,
1327 inode->i_ino,
1328 iblock << inode->i_blkbits, 0);
1329 if (ret < 0) {
1330 err = ret;
1331 goto out;
1332 }
1333
1334 if (ret != 0) {
1335 if (path->slots[0] == 0) {
1336 btrfs_release_path(root, path);
1337 goto not_found;
1338 }
1339 path->slots[0]--;
1340 }
1341
1342 item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
1343 struct btrfs_file_extent_item);
1344 leaf = btrfs_buffer_leaf(path->nodes[0]);
1345 blocknr = btrfs_file_extent_disk_blocknr(item);
1346 blocknr += btrfs_file_extent_offset(item);
1347
1348 /* are we inside the extent that was found? */
1349 found_key = &leaf->items[path->slots[0]].key;
1350 found_type = btrfs_disk_key_type(found_key);
1351 if (btrfs_disk_key_objectid(found_key) != objectid ||
1352 found_type != BTRFS_EXTENT_DATA_KEY) {
1353 extent_end = 0;
1354 extent_start = 0;
1355 goto not_found;
1356 }
1357 found_type = btrfs_file_extent_type(item);
1358 extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key);
1359 if (found_type == BTRFS_FILE_EXTENT_REG) {
1360 extent_start = extent_start >> inode->i_blkbits;
1361 extent_end = extent_start + btrfs_file_extent_num_blocks(item);
1362 err = 0;
1363 if (btrfs_file_extent_disk_blocknr(item) == 0)
1364 goto out;
1365 if (iblock >= extent_start && iblock < extent_end) {
1366 btrfs_map_bh_to_logical(root, result, blocknr +
1367 iblock - extent_start);
1368 goto out;
1369 }
1370 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1371 char *ptr;
1372 char *map;
1373 u32 size;
1374
1375 if (create & BTRFS_GET_BLOCK_NO_DIRECT) {
1376 err = -EINVAL;
1377 goto out;
1378 }
1379 size = btrfs_file_extent_inline_len(leaf->items +
1380 path->slots[0]);
1381 extent_end = (extent_start + size) >> inode->i_blkbits;
1382 extent_start >>= inode->i_blkbits;
1383 if (iblock < extent_start || iblock > extent_end) {
1384 goto not_found;
1385 }
1386 ptr = btrfs_file_extent_inline_start(item);
1387 map = kmap(result->b_page);
1388 memcpy(map, ptr, size);
1389 memset(map + size, 0, PAGE_CACHE_SIZE - size);
1390 flush_dcache_page(result->b_page);
1391 kunmap(result->b_page);
1392 set_buffer_uptodate(result);
1393 SetPageChecked(result->b_page);
1394 btrfs_map_bh_to_logical(root, result, 0);
1395 }
1396not_found:
1397 if (create & BTRFS_GET_BLOCK_CREATE) {
1398 struct btrfs_key ins;
1399 ret = btrfs_alloc_extent(trans, root, inode->i_ino,
1400 1, alloc_hint, (u64)-1,
1401 &ins, 1);
1402 BUG_ON(ret);
1403 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
1404 iblock << inode->i_blkbits,
1405 ins.objectid, ins.offset,
1406 ins.offset);
1407 BUG_ON(ret);
1408 SetPageChecked(result->b_page);
1409 btrfs_map_bh_to_logical(root, result, ins.objectid);
1410 }
1411out:
1412 if (trans)
1413 err = btrfs_end_transaction(trans, root);
1414 btrfs_free_path(path);
1415 return err;
1416}
1417
1418static int btrfs_get_block(struct inode *inode, sector_t iblock,
1419 struct buffer_head *result, int create)
1420{
1421 int err;
1422 struct btrfs_root *root = BTRFS_I(inode)->root;
1423 mutex_lock(&root->fs_info->fs_mutex);
1424 err = btrfs_get_block_lock(inode, iblock, result, create);
1425 mutex_unlock(&root->fs_info->fs_mutex);
1426 return err;
1427}
1428
1429static int btrfs_get_block_bmap(struct inode *inode, sector_t iblock,
1430 struct buffer_head *result, int create)
1431{
1432 struct btrfs_root *root = BTRFS_I(inode)->root;
1433 mutex_lock(&root->fs_info->fs_mutex);
1434 btrfs_get_block_lock(inode, iblock, result, BTRFS_GET_BLOCK_NO_DIRECT);
1435 mutex_unlock(&root->fs_info->fs_mutex);
1436 return 0;
1437}
1438
1439static sector_t btrfs_bmap(struct address_space *as, sector_t block)
1440{
1441 return generic_block_bmap(as, block, btrfs_get_block_bmap);
1442}
1443
1444static int btrfs_prepare_write(struct file *file, struct page *page,
1445 unsigned from, unsigned to)
1446{
1447 return block_prepare_write(page, from, to, btrfs_get_block);
1448}
1449
1450static void btrfs_write_super(struct super_block *sb) 115static void btrfs_write_super(struct super_block *sb)
1451{ 116{
1452 sb->s_dirt = 0; 117 sb->s_dirt = 0;
1453} 118}
1454 119
1455static int btrfs_readpage(struct file *file, struct page *page)
1456{
1457 return mpage_readpage(page, btrfs_get_block);
1458}
1459
1460/*
1461 * While block_write_full_page is writing back the dirty buffers under
1462 * the page lock, whoever dirtied the buffers may decide to clean them
1463 * again at any time. We handle that by only looking at the buffer
1464 * state inside lock_buffer().
1465 *
1466 * If block_write_full_page() is called for regular writeback
1467 * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
1468 * locked buffer. This only can happen if someone has written the buffer
1469 * directly, with submit_bh(). At the address_space level PageWriteback
1470 * prevents this contention from occurring.
1471 */
1472static int __btrfs_write_full_page(struct inode *inode, struct page *page,
1473 struct writeback_control *wbc)
1474{
1475 int err;
1476 sector_t block;
1477 sector_t last_block;
1478 struct buffer_head *bh, *head;
1479 const unsigned blocksize = 1 << inode->i_blkbits;
1480 int nr_underway = 0;
1481
1482 BUG_ON(!PageLocked(page));
1483
1484 last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
1485
1486 if (!page_has_buffers(page)) {
1487 create_empty_buffers(page, blocksize,
1488 (1 << BH_Dirty)|(1 << BH_Uptodate));
1489 }
1490
1491 /*
1492 * Be very careful. We have no exclusion from __set_page_dirty_buffers
1493 * here, and the (potentially unmapped) buffers may become dirty at
1494 * any time. If a buffer becomes dirty here after we've inspected it
1495 * then we just miss that fact, and the page stays dirty.
1496 *
1497 * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
1498 * handle that here by just cleaning them.
1499 */
1500
1501 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1502 head = page_buffers(page);
1503 bh = head;
1504
1505 /*
1506 * Get all the dirty buffers mapped to disk addresses and
1507 * handle any aliases from the underlying blockdev's mapping.
1508 */
1509 do {
1510 if (block > last_block) {
1511 /*
1512 * mapped buffers outside i_size will occur, because
1513 * this page can be outside i_size when there is a
1514 * truncate in progress.
1515 */
1516 /*
1517 * The buffer was zeroed by block_write_full_page()
1518 */
1519 clear_buffer_dirty(bh);
1520 set_buffer_uptodate(bh);
1521 } else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
1522 WARN_ON(bh->b_size != blocksize);
1523 err = btrfs_get_block(inode, block, bh, 0);
1524 if (err) {
1525printk("writepage going to recovery err %d\n", err);
1526 goto recover;
1527 }
1528 if (buffer_new(bh)) {
1529 /* blockdev mappings never come here */
1530 clear_buffer_new(bh);
1531 }
1532 }
1533 bh = bh->b_this_page;
1534 block++;
1535 } while (bh != head);
1536
1537 do {
1538 if (!buffer_mapped(bh))
1539 continue;
1540 /*
1541 * If it's a fully non-blocking write attempt and we cannot
1542 * lock the buffer then redirty the page. Note that this can
1543 * potentially cause a busy-wait loop from pdflush and kswapd
1544 * activity, but those code paths have their own higher-level
1545 * throttling.
1546 */
1547 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
1548 lock_buffer(bh);
1549 } else if (test_set_buffer_locked(bh)) {
1550 redirty_page_for_writepage(wbc, page);
1551 continue;
1552 }
1553 if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) {
1554 mark_buffer_async_write(bh);
1555 } else {
1556 unlock_buffer(bh);
1557 }
1558 } while ((bh = bh->b_this_page) != head);
1559
1560 /*
1561 * The page and its buffers are protected by PageWriteback(), so we can
1562 * drop the bh refcounts early.
1563 */
1564 BUG_ON(PageWriteback(page));
1565 set_page_writeback(page);
1566
1567 do {
1568 struct buffer_head *next = bh->b_this_page;
1569 if (buffer_async_write(bh)) {
1570 submit_bh(WRITE, bh);
1571 nr_underway++;
1572 }
1573 bh = next;
1574 } while (bh != head);
1575 unlock_page(page);
1576
1577 err = 0;
1578done:
1579 if (nr_underway == 0) {
1580 /*
1581 * The page was marked dirty, but the buffers were
1582 * clean. Someone wrote them back by hand with
1583 * ll_rw_block/submit_bh. A rare case.
1584 */
1585 int uptodate = 1;
1586 do {
1587 if (!buffer_uptodate(bh)) {
1588 uptodate = 0;
1589 break;
1590 }
1591 bh = bh->b_this_page;
1592 } while (bh != head);
1593 if (uptodate)
1594 SetPageUptodate(page);
1595 end_page_writeback(page);
1596 }
1597 return err;
1598
1599recover:
1600 /*
1601 * ENOSPC, or some other error. We may already have added some
1602 * blocks to the file, so we need to write these out to avoid
1603 * exposing stale data.
1604 * The page is currently locked and not marked for writeback
1605 */
1606 bh = head;
1607 /* Recovery: lock and submit the mapped buffers */
1608 do {
1609 if (buffer_mapped(bh) && buffer_dirty(bh)) {
1610 lock_buffer(bh);
1611 mark_buffer_async_write(bh);
1612 } else {
1613 /*
1614 * The buffer may have been set dirty during
1615 * attachment to a dirty page.
1616 */
1617 clear_buffer_dirty(bh);
1618 }
1619 } while ((bh = bh->b_this_page) != head);
1620 SetPageError(page);
1621 BUG_ON(PageWriteback(page));
1622 set_page_writeback(page);
1623 do {
1624 struct buffer_head *next = bh->b_this_page;
1625 if (buffer_async_write(bh)) {
1626 clear_buffer_dirty(bh);
1627 submit_bh(WRITE, bh);
1628 nr_underway++;
1629 }
1630 bh = next;
1631 } while (bh != head);
1632 unlock_page(page);
1633 goto done;
1634}
1635
1636/*
1637 * The generic ->writepage function for buffer-backed address_spaces
1638 */
1639static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
1640{
1641 struct inode * const inode = page->mapping->host;
1642 loff_t i_size = i_size_read(inode);
1643 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
1644 unsigned offset;
1645 void *kaddr;
1646
1647 /* Is the page fully inside i_size? */
1648 if (page->index < end_index)
1649 return __btrfs_write_full_page(inode, page, wbc);
1650
1651 /* Is the page fully outside i_size? (truncate in progress) */
1652 offset = i_size & (PAGE_CACHE_SIZE-1);
1653 if (page->index >= end_index+1 || !offset) {
1654 /*
1655 * The page may have dirty, unmapped buffers. For example,
1656 * they may have been added in ext3_writepage(). Make them
1657 * freeable here, so the page does not leak.
1658 */
1659 block_invalidatepage(page, 0);
1660 unlock_page(page);
1661 return 0; /* don't care */
1662 }
1663
1664 /*
1665 * The page straddles i_size. It must be zeroed out on each and every
1666 * writepage invokation because it may be mmapped. "A file is mapped
1667 * in multiples of the page size. For a file that is not a multiple of
1668 * the page size, the remaining memory is zeroed when mapped, and
1669 * writes to that region are not written out to the file."
1670 */
1671 kaddr = kmap_atomic(page, KM_USER0);
1672 memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
1673 flush_dcache_page(page);
1674 kunmap_atomic(kaddr, KM_USER0);
1675 return __btrfs_write_full_page(inode, page, wbc);
1676}
1677
1678static void btrfs_truncate(struct inode *inode)
1679{
1680 struct btrfs_root *root = BTRFS_I(inode)->root;
1681 int ret;
1682 struct btrfs_trans_handle *trans;
1683
1684 if (!S_ISREG(inode->i_mode))
1685 return;
1686 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1687 return;
1688
1689 btrfs_truncate_page(inode->i_mapping, inode->i_size);
1690
1691 mutex_lock(&root->fs_info->fs_mutex);
1692 trans = btrfs_start_transaction(root, 1);
1693 btrfs_set_trans_block_group(trans, inode);
1694
1695 /* FIXME, add redo link to tree so we don't leak on crash */
1696 ret = btrfs_truncate_in_trans(trans, root, inode);
1697 BUG_ON(ret);
1698 btrfs_update_inode(trans, root, inode);
1699 ret = btrfs_end_transaction(trans, root);
1700 BUG_ON(ret);
1701 mutex_unlock(&root->fs_info->fs_mutex);
1702 btrfs_btree_balance_dirty(root);
1703}
1704
1705static int btrfs_commit_write(struct file *file, struct page *page,
1706 unsigned from, unsigned to)
1707{
1708 struct inode *inode = page->mapping->host;
1709 struct buffer_head *bh;
1710 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
1711
1712 SetPageUptodate(page);
1713 bh = page_buffers(page);
1714 set_buffer_uptodate(bh);
1715 if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1716 set_page_dirty(page);
1717 }
1718 if (pos > inode->i_size) {
1719 i_size_write(inode, pos);
1720 mark_inode_dirty(inode);
1721 }
1722 return 0;
1723}
1724
1725static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes,
1726 struct page **prepared_pages,
1727 const char __user * buf)
1728{
1729 long page_fault = 0;
1730 int i;
1731 int offset = pos & (PAGE_CACHE_SIZE - 1);
1732
1733 for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
1734 size_t count = min_t(size_t,
1735 PAGE_CACHE_SIZE - offset, write_bytes);
1736 struct page *page = prepared_pages[i];
1737 fault_in_pages_readable(buf, count);
1738
1739 /* Copy data from userspace to the current page */
1740 kmap(page);
1741 page_fault = __copy_from_user(page_address(page) + offset,
1742 buf, count);
1743 /* Flush processor's dcache for this page */
1744 flush_dcache_page(page);
1745 kunmap(page);
1746 buf += count;
1747 write_bytes -= count;
1748
1749 if (page_fault)
1750 break;
1751 }
1752 return page_fault ? -EFAULT : 0;
1753}
1754
1755static void btrfs_drop_pages(struct page **pages, size_t num_pages)
1756{
1757 size_t i;
1758 for (i = 0; i < num_pages; i++) {
1759 if (!pages[i])
1760 break;
1761 unlock_page(pages[i]);
1762 mark_page_accessed(pages[i]);
1763 page_cache_release(pages[i]);
1764 }
1765}
1766static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
1767 struct btrfs_root *root,
1768 struct file *file,
1769 struct page **pages,
1770 size_t num_pages,
1771 loff_t pos,
1772 size_t write_bytes)
1773{
1774 int i;
1775 int offset;
1776 int err = 0;
1777 int ret;
1778 int this_write;
1779 struct inode *inode = file->f_path.dentry->d_inode;
1780 struct buffer_head *bh;
1781 struct btrfs_file_extent_item *ei;
1782
1783 for (i = 0; i < num_pages; i++) {
1784 offset = pos & (PAGE_CACHE_SIZE -1);
1785 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
1786 /* FIXME, one block at a time */
1787
1788 mutex_lock(&root->fs_info->fs_mutex);
1789 trans = btrfs_start_transaction(root, 1);
1790 btrfs_set_trans_block_group(trans, inode);
1791
1792 bh = page_buffers(pages[i]);
1793 if (buffer_mapped(bh) && bh->b_blocknr == 0) {
1794 struct btrfs_key key;
1795 struct btrfs_path *path;
1796 char *ptr;
1797 u32 datasize;
1798
1799 path = btrfs_alloc_path();
1800 BUG_ON(!path);
1801 key.objectid = inode->i_ino;
1802 key.offset = pages[i]->index << PAGE_CACHE_SHIFT;
1803 key.flags = 0;
1804 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
1805 BUG_ON(write_bytes >= PAGE_CACHE_SIZE);
1806 datasize = offset +
1807 btrfs_file_extent_calc_inline_size(write_bytes);
1808 ret = btrfs_insert_empty_item(trans, root, path, &key,
1809 datasize);
1810 BUG_ON(ret);
1811 ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
1812 path->slots[0], struct btrfs_file_extent_item);
1813 btrfs_set_file_extent_generation(ei, trans->transid);
1814 btrfs_set_file_extent_type(ei,
1815 BTRFS_FILE_EXTENT_INLINE);
1816 ptr = btrfs_file_extent_inline_start(ei);
1817 btrfs_memcpy(root, path->nodes[0]->b_data,
1818 ptr, bh->b_data, offset + write_bytes);
1819 mark_buffer_dirty(path->nodes[0]);
1820 btrfs_free_path(path);
1821 } else if (buffer_mapped(bh)) {
1822 btrfs_csum_file_block(trans, root, inode->i_ino,
1823 pages[i]->index << PAGE_CACHE_SHIFT,
1824 kmap(pages[i]), PAGE_CACHE_SIZE);
1825 kunmap(pages[i]);
1826 }
1827 SetPageChecked(pages[i]);
1828 // btrfs_update_inode_block_group(trans, inode);
1829 ret = btrfs_end_transaction(trans, root);
1830 BUG_ON(ret);
1831 mutex_unlock(&root->fs_info->fs_mutex);
1832
1833 ret = btrfs_commit_write(file, pages[i], offset,
1834 offset + this_write);
1835 pos += this_write;
1836 if (ret) {
1837 err = ret;
1838 goto failed;
1839 }
1840 WARN_ON(this_write > write_bytes);
1841 write_bytes -= this_write;
1842 }
1843failed:
1844 return err;
1845}
1846
1847static int drop_extents(struct btrfs_trans_handle *trans,
1848 struct btrfs_root *root,
1849 struct inode *inode,
1850 u64 start, u64 end, u64 *hint_block)
1851{
1852 int ret;
1853 struct btrfs_key key;
1854 struct btrfs_leaf *leaf;
1855 int slot;
1856 struct btrfs_file_extent_item *extent;
1857 u64 extent_end = 0;
1858 int keep;
1859 struct btrfs_file_extent_item old;
1860 struct btrfs_path *path;
1861 u64 search_start = start;
1862 int bookend;
1863 int found_type;
1864 int found_extent;
1865 int found_inline;
1866
1867 path = btrfs_alloc_path();
1868 if (!path)
1869 return -ENOMEM;
1870 while(1) {
1871 btrfs_release_path(root, path);
1872 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
1873 search_start, -1);
1874 if (ret < 0)
1875 goto out;
1876 if (ret > 0) {
1877 if (path->slots[0] == 0) {
1878 ret = 0;
1879 goto out;
1880 }
1881 path->slots[0]--;
1882 }
1883 keep = 0;
1884 bookend = 0;
1885 found_extent = 0;
1886 found_inline = 0;
1887 extent = NULL;
1888 leaf = btrfs_buffer_leaf(path->nodes[0]);
1889 slot = path->slots[0];
1890 btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
1891 if (key.offset >= end || key.objectid != inode->i_ino) {
1892 ret = 0;
1893 goto out;
1894 }
1895 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) {
1896 ret = 0;
1897 goto out;
1898 }
1899 extent = btrfs_item_ptr(leaf, slot,
1900 struct btrfs_file_extent_item);
1901 found_type = btrfs_file_extent_type(extent);
1902 if (found_type == BTRFS_FILE_EXTENT_REG) {
1903 extent_end = key.offset +
1904 (btrfs_file_extent_num_blocks(extent) <<
1905 inode->i_blkbits);
1906 found_extent = 1;
1907 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1908 found_inline = 1;
1909 extent_end = key.offset +
1910 btrfs_file_extent_inline_len(leaf->items + slot);
1911 }
1912
1913 if (!found_extent && !found_inline) {
1914 ret = 0;
1915 goto out;
1916 }
1917
1918 if (search_start >= extent_end) {
1919 ret = 0;
1920 goto out;
1921 }
1922
1923 if (found_inline) {
1924 u64 mask = root->blocksize - 1;
1925 search_start = (extent_end + mask) & ~mask;
1926 } else
1927 search_start = extent_end;
1928
1929 if (end < extent_end && end >= key.offset) {
1930 if (found_extent) {
1931 u64 disk_blocknr =
1932 btrfs_file_extent_disk_blocknr(extent);
1933 u64 disk_num_blocks =
1934 btrfs_file_extent_disk_num_blocks(extent);
1935 memcpy(&old, extent, sizeof(old));
1936 if (disk_blocknr != 0) {
1937 ret = btrfs_inc_extent_ref(trans, root,
1938 disk_blocknr, disk_num_blocks);
1939 BUG_ON(ret);
1940 }
1941 }
1942 WARN_ON(found_inline);
1943 bookend = 1;
1944 }
1945
1946 if (start > key.offset) {
1947 u64 new_num;
1948 u64 old_num;
1949 /* truncate existing extent */
1950 keep = 1;
1951 WARN_ON(start & (root->blocksize - 1));
1952 if (found_extent) {
1953 new_num = (start - key.offset) >>
1954 inode->i_blkbits;
1955 old_num = btrfs_file_extent_num_blocks(extent);
1956 *hint_block =
1957 btrfs_file_extent_disk_blocknr(extent);
1958 if (btrfs_file_extent_disk_blocknr(extent)) {
1959 inode->i_blocks -=
1960 (old_num - new_num) << 3;
1961 }
1962 btrfs_set_file_extent_num_blocks(extent,
1963 new_num);
1964 mark_buffer_dirty(path->nodes[0]);
1965 } else {
1966 WARN_ON(1);
1967 }
1968 }
1969 if (!keep) {
1970 u64 disk_blocknr = 0;
1971 u64 disk_num_blocks = 0;
1972 u64 extent_num_blocks = 0;
1973 if (found_extent) {
1974 disk_blocknr =
1975 btrfs_file_extent_disk_blocknr(extent);
1976 disk_num_blocks =
1977 btrfs_file_extent_disk_num_blocks(extent);
1978 extent_num_blocks =
1979 btrfs_file_extent_num_blocks(extent);
1980 *hint_block =
1981 btrfs_file_extent_disk_blocknr(extent);
1982 }
1983 ret = btrfs_del_item(trans, root, path);
1984 BUG_ON(ret);
1985 btrfs_release_path(root, path);
1986 extent = NULL;
1987 if (found_extent && disk_blocknr != 0) {
1988 inode->i_blocks -= extent_num_blocks << 3;
1989 ret = btrfs_free_extent(trans, root,
1990 disk_blocknr,
1991 disk_num_blocks, 0);
1992 }
1993
1994 BUG_ON(ret);
1995 if (!bookend && search_start >= end) {
1996 ret = 0;
1997 goto out;
1998 }
1999 if (!bookend)
2000 continue;
2001 }
2002 if (bookend && found_extent) {
2003 /* create bookend */
2004 struct btrfs_key ins;
2005 ins.objectid = inode->i_ino;
2006 ins.offset = end;
2007 ins.flags = 0;
2008 btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
2009
2010 btrfs_release_path(root, path);
2011 ret = btrfs_insert_empty_item(trans, root, path, &ins,
2012 sizeof(*extent));
2013 BUG_ON(ret);
2014 extent = btrfs_item_ptr(
2015 btrfs_buffer_leaf(path->nodes[0]),
2016 path->slots[0],
2017 struct btrfs_file_extent_item);
2018 btrfs_set_file_extent_disk_blocknr(extent,
2019 btrfs_file_extent_disk_blocknr(&old));
2020 btrfs_set_file_extent_disk_num_blocks(extent,
2021 btrfs_file_extent_disk_num_blocks(&old));
2022
2023 btrfs_set_file_extent_offset(extent,
2024 btrfs_file_extent_offset(&old) +
2025 ((end - key.offset) >> inode->i_blkbits));
2026 WARN_ON(btrfs_file_extent_num_blocks(&old) <
2027 (extent_end - end) >> inode->i_blkbits);
2028 btrfs_set_file_extent_num_blocks(extent,
2029 (extent_end - end) >> inode->i_blkbits);
2030
2031 btrfs_set_file_extent_type(extent,
2032 BTRFS_FILE_EXTENT_REG);
2033 btrfs_set_file_extent_generation(extent,
2034 btrfs_file_extent_generation(&old));
2035 btrfs_mark_buffer_dirty(path->nodes[0]);
2036 if (btrfs_file_extent_disk_blocknr(&old) != 0) {
2037 inode->i_blocks +=
2038 btrfs_file_extent_num_blocks(extent) << 3;
2039 }
2040 ret = 0;
2041 goto out;
2042 }
2043 }
2044out:
2045 btrfs_free_path(path);
2046 return ret;
2047}
2048
2049static int prepare_pages(struct btrfs_root *root,
2050 struct file *file,
2051 struct page **pages,
2052 size_t num_pages,
2053 loff_t pos,
2054 unsigned long first_index,
2055 unsigned long last_index,
2056 size_t write_bytes,
2057 u64 alloc_extent_start)
2058{
2059 int i;
2060 unsigned long index = pos >> PAGE_CACHE_SHIFT;
2061 struct inode *inode = file->f_path.dentry->d_inode;
2062 int offset;
2063 int err = 0;
2064 int this_write;
2065 struct buffer_head *bh;
2066 struct buffer_head *head;
2067 loff_t isize = i_size_read(inode);
2068
2069 memset(pages, 0, num_pages * sizeof(struct page *));
2070
2071 for (i = 0; i < num_pages; i++) {
2072 pages[i] = grab_cache_page(inode->i_mapping, index + i);
2073 if (!pages[i]) {
2074 err = -ENOMEM;
2075 goto failed_release;
2076 }
2077 cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
2078 wait_on_page_writeback(pages[i]);
2079 offset = pos & (PAGE_CACHE_SIZE -1);
2080 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
2081 if (!page_has_buffers(pages[i])) {
2082 create_empty_buffers(pages[i],
2083 root->fs_info->sb->s_blocksize,
2084 (1 << BH_Uptodate));
2085 }
2086 head = page_buffers(pages[i]);
2087 bh = head;
2088 do {
2089 err = btrfs_map_bh_to_logical(root, bh,
2090 alloc_extent_start);
2091 BUG_ON(err);
2092 if (err)
2093 goto failed_truncate;
2094 bh = bh->b_this_page;
2095 if (alloc_extent_start)
2096 alloc_extent_start++;
2097 } while (bh != head);
2098 pos += this_write;
2099 WARN_ON(this_write > write_bytes);
2100 write_bytes -= this_write;
2101 }
2102 return 0;
2103
2104failed_release:
2105 btrfs_drop_pages(pages, num_pages);
2106 return err;
2107
2108failed_truncate:
2109 btrfs_drop_pages(pages, num_pages);
2110 if (pos > isize)
2111 vmtruncate(inode, isize);
2112 return err;
2113}
2114
2115static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
2116 size_t count, loff_t *ppos)
2117{
2118 loff_t pos;
2119 size_t num_written = 0;
2120 int err = 0;
2121 int ret = 0;
2122 struct inode *inode = file->f_path.dentry->d_inode;
2123 struct btrfs_root *root = BTRFS_I(inode)->root;
2124 struct page *pages[8];
2125 struct page *pinned[2];
2126 unsigned long first_index;
2127 unsigned long last_index;
2128 u64 start_pos;
2129 u64 num_blocks;
2130 u64 alloc_extent_start;
2131 u64 hint_block;
2132 struct btrfs_trans_handle *trans;
2133 struct btrfs_key ins;
2134 pinned[0] = NULL;
2135 pinned[1] = NULL;
2136 if (file->f_flags & O_DIRECT)
2137 return -EINVAL;
2138 pos = *ppos;
2139 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
2140 current->backing_dev_info = inode->i_mapping->backing_dev_info;
2141 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
2142 if (err)
2143 goto out;
2144 if (count == 0)
2145 goto out;
2146 err = remove_suid(file->f_path.dentry);
2147 if (err)
2148 goto out;
2149 file_update_time(file);
2150
2151 start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
2152 num_blocks = (count + pos - start_pos + root->blocksize - 1) >>
2153 inode->i_blkbits;
2154
2155 mutex_lock(&inode->i_mutex);
2156 first_index = pos >> PAGE_CACHE_SHIFT;
2157 last_index = (pos + count) >> PAGE_CACHE_SHIFT;
2158
2159 if ((pos & (PAGE_CACHE_SIZE - 1))) {
2160 pinned[0] = grab_cache_page(inode->i_mapping, first_index);
2161 if (!PageUptodate(pinned[0])) {
2162 ret = mpage_readpage(pinned[0], btrfs_get_block);
2163 BUG_ON(ret);
2164 wait_on_page_locked(pinned[0]);
2165 } else {
2166 unlock_page(pinned[0]);
2167 }
2168 }
2169 if ((pos + count) & (PAGE_CACHE_SIZE - 1)) {
2170 pinned[1] = grab_cache_page(inode->i_mapping, last_index);
2171 if (!PageUptodate(pinned[1])) {
2172 ret = mpage_readpage(pinned[1], btrfs_get_block);
2173 BUG_ON(ret);
2174 wait_on_page_locked(pinned[1]);
2175 } else {
2176 unlock_page(pinned[1]);
2177 }
2178 }
2179
2180 mutex_lock(&root->fs_info->fs_mutex);
2181 trans = btrfs_start_transaction(root, 1);
2182 if (!trans) {
2183 err = -ENOMEM;
2184 mutex_unlock(&root->fs_info->fs_mutex);
2185 goto out_unlock;
2186 }
2187 btrfs_set_trans_block_group(trans, inode);
2188 /* FIXME blocksize != 4096 */
2189 inode->i_blocks += num_blocks << 3;
2190 hint_block = 0;
2191 if (start_pos < inode->i_size) {
2192 /* FIXME blocksize != pagesize */
2193 ret = drop_extents(trans, root, inode,
2194 start_pos,
2195 (pos + count + root->blocksize -1) &
2196 ~((u64)root->blocksize - 1), &hint_block);
2197 BUG_ON(ret);
2198 }
2199 if (inode->i_size < start_pos) {
2200 u64 last_pos_in_file;
2201 u64 hole_size;
2202 u64 mask = root->blocksize - 1;
2203 last_pos_in_file = (inode->i_size + mask) & ~mask;
2204 hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
2205 hole_size >>= inode->i_blkbits;
2206 if (last_pos_in_file < start_pos) {
2207 ret = btrfs_insert_file_extent(trans, root,
2208 inode->i_ino,
2209 last_pos_in_file,
2210 0, 0, hole_size);
2211 }
2212 BUG_ON(ret);
2213 }
2214 if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size ||
2215 pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
2216 ret = btrfs_alloc_extent(trans, root, inode->i_ino,
2217 num_blocks, hint_block, (u64)-1,
2218 &ins, 1);
2219 BUG_ON(ret);
2220 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
2221 start_pos, ins.objectid, ins.offset,
2222 ins.offset);
2223 BUG_ON(ret);
2224 } else {
2225 ins.offset = 0;
2226 ins.objectid = 0;
2227 }
2228 BUG_ON(ret);
2229 alloc_extent_start = ins.objectid;
2230 // btrfs_update_inode_block_group(trans, inode);
2231 ret = btrfs_end_transaction(trans, root);
2232 mutex_unlock(&root->fs_info->fs_mutex);
2233
2234 while(count > 0) {
2235 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
2236 size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset);
2237 size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
2238 PAGE_CACHE_SHIFT;
2239
2240 memset(pages, 0, sizeof(pages));
2241 ret = prepare_pages(root, file, pages, num_pages,
2242 pos, first_index, last_index,
2243 write_bytes, alloc_extent_start);
2244 BUG_ON(ret);
2245
2246 /* FIXME blocks != pagesize */
2247 if (alloc_extent_start)
2248 alloc_extent_start += num_pages;
2249 ret = btrfs_copy_from_user(pos, num_pages,
2250 write_bytes, pages, buf);
2251 BUG_ON(ret);
2252
2253 ret = dirty_and_release_pages(NULL, root, file, pages,
2254 num_pages, pos, write_bytes);
2255 BUG_ON(ret);
2256 btrfs_drop_pages(pages, num_pages);
2257
2258 buf += write_bytes;
2259 count -= write_bytes;
2260 pos += write_bytes;
2261 num_written += write_bytes;
2262
2263 balance_dirty_pages_ratelimited(inode->i_mapping);
2264 btrfs_btree_balance_dirty(root);
2265 cond_resched();
2266 }
2267out_unlock:
2268 mutex_unlock(&inode->i_mutex);
2269out:
2270 if (pinned[0])
2271 page_cache_release(pinned[0]);
2272 if (pinned[1])
2273 page_cache_release(pinned[1]);
2274 *ppos = pos;
2275 current->backing_dev_info = NULL;
2276 mark_inode_dirty(inode);
2277 return num_written ? num_written : err;
2278}
2279
2280static int btrfs_read_actor(read_descriptor_t *desc, struct page *page,
2281 unsigned long offset, unsigned long size)
2282{
2283 char *kaddr;
2284 unsigned long left, count = desc->count;
2285 struct inode *inode = page->mapping->host;
2286
2287 if (size > count)
2288 size = count;
2289
2290 if (!PageChecked(page)) {
2291 /* FIXME, do it per block */
2292 struct btrfs_root *root = BTRFS_I(inode)->root;
2293 int ret;
2294 struct buffer_head *bh;
2295
2296 if (page_has_buffers(page)) {
2297 bh = page_buffers(page);
2298 if (!buffer_mapped(bh)) {
2299 SetPageChecked(page);
2300 goto checked;
2301 }
2302 }
2303
2304 ret = btrfs_csum_verify_file_block(root,
2305 page->mapping->host->i_ino,
2306 page->index << PAGE_CACHE_SHIFT,
2307 kmap(page), PAGE_CACHE_SIZE);
2308 if (ret) {
2309 if (ret != -ENOENT) {
2310 printk("failed to verify ino %lu page %lu ret %d\n",
2311 page->mapping->host->i_ino,
2312 page->index, ret);
2313 memset(page_address(page), 1, PAGE_CACHE_SIZE);
2314 flush_dcache_page(page);
2315 }
2316 }
2317 SetPageChecked(page);
2318 kunmap(page);
2319 }
2320checked:
2321 /*
2322 * Faults on the destination of a read are common, so do it before
2323 * taking the kmap.
2324 */
2325 if (!fault_in_pages_writeable(desc->arg.buf, size)) {
2326 kaddr = kmap_atomic(page, KM_USER0);
2327 left = __copy_to_user_inatomic(desc->arg.buf,
2328 kaddr + offset, size);
2329 kunmap_atomic(kaddr, KM_USER0);
2330 if (left == 0)
2331 goto success;
2332 }
2333
2334 /* Do it the slow way */
2335 kaddr = kmap(page);
2336 left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
2337 kunmap(page);
2338
2339 if (left) {
2340 size -= left;
2341 desc->error = -EFAULT;
2342 }
2343success:
2344 desc->count = count - size;
2345 desc->written += size;
2346 desc->arg.buf += size;
2347 return size;
2348}
2349
2350/**
2351 * btrfs_file_aio_read - filesystem read routine
2352 * @iocb: kernel I/O control block
2353 * @iov: io vector request
2354 * @nr_segs: number of segments in the iovec
2355 * @pos: current file position
2356 */
2357static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
2358 unsigned long nr_segs, loff_t pos)
2359{
2360 struct file *filp = iocb->ki_filp;
2361 ssize_t retval;
2362 unsigned long seg;
2363 size_t count;
2364 loff_t *ppos = &iocb->ki_pos;
2365
2366 count = 0;
2367 for (seg = 0; seg < nr_segs; seg++) {
2368 const struct iovec *iv = &iov[seg];
2369
2370 /*
2371 * If any segment has a negative length, or the cumulative
2372 * length ever wraps negative then return -EINVAL.
2373 */
2374 count += iv->iov_len;
2375 if (unlikely((ssize_t)(count|iv->iov_len) < 0))
2376 return -EINVAL;
2377 if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
2378 continue;
2379 if (seg == 0)
2380 return -EFAULT;
2381 nr_segs = seg;
2382 count -= iv->iov_len; /* This segment is no good */
2383 break;
2384 }
2385 retval = 0;
2386 if (count) {
2387 for (seg = 0; seg < nr_segs; seg++) {
2388 read_descriptor_t desc;
2389
2390 desc.written = 0;
2391 desc.arg.buf = iov[seg].iov_base;
2392 desc.count = iov[seg].iov_len;
2393 if (desc.count == 0)
2394 continue;
2395 desc.error = 0;
2396 do_generic_file_read(filp, ppos, &desc,
2397 btrfs_read_actor);
2398 retval += desc.written;
2399 if (desc.error) {
2400 retval = retval ?: desc.error;
2401 break;
2402 }
2403 }
2404 }
2405 return retval;
2406}
2407
2408static int create_subvol(struct btrfs_root *root, char *name, int namelen)
2409{
2410 struct btrfs_trans_handle *trans;
2411 struct btrfs_key key;
2412 struct btrfs_root_item root_item;
2413 struct btrfs_inode_item *inode_item;
2414 struct buffer_head *subvol;
2415 struct btrfs_leaf *leaf;
2416 struct btrfs_root *new_root;
2417 struct inode *inode;
2418 struct inode *dir;
2419 int ret;
2420 u64 objectid;
2421 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
2422
2423 mutex_lock(&root->fs_info->fs_mutex);
2424 trans = btrfs_start_transaction(root, 1);
2425 BUG_ON(!trans);
2426
2427 subvol = btrfs_alloc_free_block(trans, root, 0);
2428 if (subvol == NULL)
2429 return -ENOSPC;
2430 leaf = btrfs_buffer_leaf(subvol);
2431 btrfs_set_header_nritems(&leaf->header, 0);
2432 btrfs_set_header_level(&leaf->header, 0);
2433 btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol));
2434 btrfs_set_header_generation(&leaf->header, trans->transid);
2435 btrfs_set_header_owner(&leaf->header, root->root_key.objectid);
2436 memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid,
2437 sizeof(leaf->header.fsid));
2438 mark_buffer_dirty(subvol);
2439
2440 inode_item = &root_item.inode;
2441 memset(inode_item, 0, sizeof(*inode_item));
2442 btrfs_set_inode_generation(inode_item, 1);
2443 btrfs_set_inode_size(inode_item, 3);
2444 btrfs_set_inode_nlink(inode_item, 1);
2445 btrfs_set_inode_nblocks(inode_item, 1);
2446 btrfs_set_inode_mode(inode_item, S_IFDIR | 0755);
2447
2448 btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol));
2449 btrfs_set_root_refs(&root_item, 1);
2450 brelse(subvol);
2451 subvol = NULL;
2452
2453 ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2454 0, &objectid);
2455 BUG_ON(ret);
2456
2457 btrfs_set_root_dirid(&root_item, new_dirid);
2458
2459 key.objectid = objectid;
2460 key.offset = 1;
2461 key.flags = 0;
2462 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2463 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2464 &root_item);
2465 BUG_ON(ret);
2466
2467 /*
2468 * insert the directory item
2469 */
2470 key.offset = (u64)-1;
2471 dir = root->fs_info->sb->s_root->d_inode;
2472 ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2473 name, namelen, dir->i_ino, &key,
2474 BTRFS_FT_DIR);
2475 BUG_ON(ret);
2476
2477 ret = btrfs_commit_transaction(trans, root);
2478 BUG_ON(ret);
2479
2480 new_root = btrfs_read_fs_root(root->fs_info, &key);
2481 BUG_ON(!new_root);
2482
2483 trans = btrfs_start_transaction(new_root, 1);
2484 BUG_ON(!trans);
2485
2486 inode = btrfs_new_inode(trans, new_root, new_dirid,
2487 BTRFS_I(dir)->block_group, S_IFDIR | 0700);
2488 inode->i_op = &btrfs_dir_inode_operations;
2489 inode->i_fop = &btrfs_dir_file_operations;
2490
2491 ret = btrfs_make_empty_dir(trans, new_root, new_dirid, new_dirid);
2492 BUG_ON(ret);
2493
2494 inode->i_nlink = 1;
2495 inode->i_size = 6;
2496 ret = btrfs_update_inode(trans, new_root, inode);
2497 BUG_ON(ret);
2498
2499 ret = btrfs_commit_transaction(trans, new_root);
2500 BUG_ON(ret);
2501
2502 iput(inode);
2503
2504 mutex_unlock(&root->fs_info->fs_mutex);
2505 btrfs_btree_balance_dirty(root);
2506 return 0;
2507}
2508
2509static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
2510{
2511 struct btrfs_trans_handle *trans;
2512 struct btrfs_key key;
2513 struct btrfs_root_item new_root_item;
2514 int ret;
2515 u64 objectid;
2516
2517 if (!root->ref_cows)
2518 return -EINVAL;
2519
2520 mutex_lock(&root->fs_info->fs_mutex);
2521 trans = btrfs_start_transaction(root, 1);
2522 BUG_ON(!trans);
2523
2524 ret = btrfs_update_inode(trans, root, root->inode);
2525 BUG_ON(ret);
2526
2527 ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2528 0, &objectid);
2529 BUG_ON(ret);
2530
2531 memcpy(&new_root_item, &root->root_item,
2532 sizeof(new_root_item));
2533
2534 key.objectid = objectid;
2535 key.offset = 1;
2536 key.flags = 0;
2537 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2538 btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node));
2539
2540 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2541 &new_root_item);
2542 BUG_ON(ret);
2543
2544 /*
2545 * insert the directory item
2546 */
2547 key.offset = (u64)-1;
2548 ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2549 name, namelen,
2550 root->fs_info->sb->s_root->d_inode->i_ino,
2551 &key, BTRFS_FT_DIR);
2552
2553 BUG_ON(ret);
2554
2555 ret = btrfs_inc_root_ref(trans, root);
2556 BUG_ON(ret);
2557
2558 ret = btrfs_commit_transaction(trans, root);
2559 BUG_ON(ret);
2560 mutex_unlock(&root->fs_info->fs_mutex);
2561 btrfs_btree_balance_dirty(root);
2562 return 0;
2563}
2564
2565static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int
2566 cmd, unsigned long arg)
2567{
2568 struct btrfs_root *root = BTRFS_I(inode)->root;
2569 struct btrfs_ioctl_vol_args vol_args;
2570 int ret = 0;
2571 struct btrfs_dir_item *di;
2572 int namelen;
2573 struct btrfs_path *path;
2574 u64 root_dirid;
2575
2576 switch (cmd) {
2577 case BTRFS_IOC_SNAP_CREATE:
2578 if (copy_from_user(&vol_args,
2579 (struct btrfs_ioctl_vol_args __user *)arg,
2580 sizeof(vol_args)))
2581 return -EFAULT;
2582 namelen = strlen(vol_args.name);
2583 if (namelen > BTRFS_VOL_NAME_MAX)
2584 return -EINVAL;
2585 path = btrfs_alloc_path();
2586 if (!path)
2587 return -ENOMEM;
2588 root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
2589 mutex_lock(&root->fs_info->fs_mutex);
2590 di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
2591 path, root_dirid,
2592 vol_args.name, namelen, 0);
2593 mutex_unlock(&root->fs_info->fs_mutex);
2594 btrfs_free_path(path);
2595 if (di && !IS_ERR(di))
2596 return -EEXIST;
2597
2598 if (root == root->fs_info->tree_root)
2599 ret = create_subvol(root, vol_args.name, namelen);
2600 else
2601 ret = create_snapshot(root, vol_args.name, namelen);
2602 WARN_ON(ret);
2603 break;
2604 default:
2605 return -ENOTTY;
2606 }
2607 return ret;
2608}
2609
2610#ifdef CONFIG_COMPAT
2611static long btrfs_compat_ioctl(struct file *file, unsigned int cmd,
2612 unsigned long arg)
2613{
2614 struct inode *inode = file->f_path.dentry->d_inode;
2615 int ret;
2616 lock_kernel();
2617 ret = btrfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
2618 unlock_kernel();
2619 return ret;
2620
2621}
2622#endif
2623
2624static struct kmem_cache *btrfs_inode_cachep;
2625struct kmem_cache *btrfs_trans_handle_cachep;
2626struct kmem_cache *btrfs_transaction_cachep;
2627struct kmem_cache *btrfs_bit_radix_cachep;
2628struct kmem_cache *btrfs_path_cachep;
2629
2630/*
2631 * Called inside transaction, so use GFP_NOFS
2632 */
2633static struct inode *btrfs_alloc_inode(struct super_block *sb)
2634{
2635 struct btrfs_inode *ei;
2636
2637 ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
2638 if (!ei)
2639 return NULL;
2640 return &ei->vfs_inode;
2641}
2642
2643static void btrfs_destroy_inode(struct inode *inode)
2644{
2645 WARN_ON(!list_empty(&inode->i_dentry));
2646 WARN_ON(inode->i_data.nrpages);
2647
2648 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
2649}
2650
2651static void init_once(void * foo, struct kmem_cache * cachep,
2652 unsigned long flags)
2653{
2654 struct btrfs_inode *ei = (struct btrfs_inode *) foo;
2655
2656 if ((flags & (SLAB_CTOR_CONSTRUCTOR)) ==
2657 SLAB_CTOR_CONSTRUCTOR) {
2658 inode_init_once(&ei->vfs_inode);
2659 }
2660}
2661
2662static int init_inodecache(void)
2663{
2664 btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache",
2665 sizeof(struct btrfs_inode),
2666 0, (SLAB_RECLAIM_ACCOUNT|
2667 SLAB_MEM_SPREAD),
2668 init_once, NULL);
2669 btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache",
2670 sizeof(struct btrfs_trans_handle),
2671 0, (SLAB_RECLAIM_ACCOUNT|
2672 SLAB_MEM_SPREAD),
2673 NULL, NULL);
2674 btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache",
2675 sizeof(struct btrfs_transaction),
2676 0, (SLAB_RECLAIM_ACCOUNT|
2677 SLAB_MEM_SPREAD),
2678 NULL, NULL);
2679 btrfs_path_cachep = kmem_cache_create("btrfs_path_cache",
2680 sizeof(struct btrfs_transaction),
2681 0, (SLAB_RECLAIM_ACCOUNT|
2682 SLAB_MEM_SPREAD),
2683 NULL, NULL);
2684 btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix",
2685 256,
2686 0, (SLAB_RECLAIM_ACCOUNT|
2687 SLAB_MEM_SPREAD |
2688 SLAB_DESTROY_BY_RCU),
2689 NULL, NULL);
2690 if (btrfs_inode_cachep == NULL || btrfs_trans_handle_cachep == NULL ||
2691 btrfs_transaction_cachep == NULL || btrfs_bit_radix_cachep == NULL)
2692 return -ENOMEM;
2693 return 0;
2694}
2695
2696static void destroy_inodecache(void)
2697{
2698 kmem_cache_destroy(btrfs_inode_cachep);
2699 kmem_cache_destroy(btrfs_trans_handle_cachep);
2700 kmem_cache_destroy(btrfs_transaction_cachep);
2701 kmem_cache_destroy(btrfs_bit_radix_cachep);
2702 kmem_cache_destroy(btrfs_path_cachep);
2703}
2704
2705static int btrfs_get_sb(struct file_system_type *fs_type, 120static int btrfs_get_sb(struct file_system_type *fs_type,
2706 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 121 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
2707{ 122{
@@ -2709,15 +124,6 @@ static int btrfs_get_sb(struct file_system_type *fs_type,
2709 btrfs_fill_super, mnt); 124 btrfs_fill_super, mnt);
2710} 125}
2711 126
2712static int btrfs_getattr(struct vfsmount *mnt,
2713 struct dentry *dentry, struct kstat *stat)
2714{
2715 struct inode *inode = dentry->d_inode;
2716 generic_fillattr(inode, stat);
2717 stat->blksize = 256 * 1024;
2718 return 0;
2719}
2720
2721static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) 127static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
2722{ 128{
2723 struct btrfs_root *root = btrfs_sb(dentry->d_sb); 129 struct btrfs_root *root = btrfs_sb(dentry->d_sb);
@@ -2732,197 +138,6 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
2732 return 0; 138 return 0;
2733} 139}
2734 140
2735static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry,
2736 struct inode * new_dir,struct dentry *new_dentry)
2737{
2738 struct btrfs_trans_handle *trans;
2739 struct btrfs_root *root = BTRFS_I(old_dir)->root;
2740 struct inode *new_inode = new_dentry->d_inode;
2741 struct inode *old_inode = old_dentry->d_inode;
2742 struct timespec ctime = CURRENT_TIME;
2743 struct btrfs_path *path;
2744 struct btrfs_dir_item *di;
2745 int ret;
2746
2747 if (S_ISDIR(old_inode->i_mode) && new_inode &&
2748 new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
2749 return -ENOTEMPTY;
2750 }
2751 mutex_lock(&root->fs_info->fs_mutex);
2752 trans = btrfs_start_transaction(root, 1);
2753 btrfs_set_trans_block_group(trans, new_dir);
2754 path = btrfs_alloc_path();
2755 if (!path) {
2756 ret = -ENOMEM;
2757 goto out_fail;
2758 }
2759
2760 old_dentry->d_inode->i_nlink++;
2761 old_dir->i_ctime = old_dir->i_mtime = ctime;
2762 new_dir->i_ctime = new_dir->i_mtime = ctime;
2763 old_inode->i_ctime = ctime;
2764 if (S_ISDIR(old_inode->i_mode) && old_dir != new_dir) {
2765 struct btrfs_key *location = &BTRFS_I(new_dir)->location;
2766 u64 old_parent_oid;
2767 di = btrfs_lookup_dir_item(trans, root, path, old_inode->i_ino,
2768 "..", 2, -1);
2769 if (IS_ERR(di)) {
2770 ret = PTR_ERR(di);
2771 goto out_fail;
2772 }
2773 if (!di) {
2774 ret = -ENOENT;
2775 goto out_fail;
2776 }
2777 old_parent_oid = btrfs_disk_key_objectid(&di->location);
2778 ret = btrfs_del_item(trans, root, path);
2779 if (ret) {
2780 ret = -EIO;
2781 goto out_fail;
2782 }
2783 btrfs_release_path(root, path);
2784
2785 di = btrfs_lookup_dir_index_item(trans, root, path,
2786 old_inode->i_ino,
2787 old_parent_oid,
2788 "..", 2, -1);
2789 if (IS_ERR(di)) {
2790 ret = PTR_ERR(di);
2791 goto out_fail;
2792 }
2793 if (!di) {
2794 ret = -ENOENT;
2795 goto out_fail;
2796 }
2797 ret = btrfs_del_item(trans, root, path);
2798 if (ret) {
2799 ret = -EIO;
2800 goto out_fail;
2801 }
2802 btrfs_release_path(root, path);
2803
2804 ret = btrfs_insert_dir_item(trans, root, "..", 2,
2805 old_inode->i_ino, location,
2806 BTRFS_FT_DIR);
2807 if (ret)
2808 goto out_fail;
2809 }
2810
2811
2812 ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry);
2813 if (ret)
2814 goto out_fail;
2815
2816 if (new_inode) {
2817 new_inode->i_ctime = CURRENT_TIME;
2818 ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry);
2819 if (ret)
2820 goto out_fail;
2821 if (S_ISDIR(new_inode->i_mode))
2822 clear_nlink(new_inode);
2823 else
2824 drop_nlink(new_inode);
2825 btrfs_update_inode(trans, root, new_inode);
2826 }
2827 ret = btrfs_add_link(trans, new_dentry, old_inode);
2828 if (ret)
2829 goto out_fail;
2830
2831out_fail:
2832 btrfs_free_path(path);
2833 btrfs_end_transaction(trans, root);
2834 mutex_unlock(&root->fs_info->fs_mutex);
2835 return ret;
2836}
2837
2838static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
2839 const char *symname)
2840{
2841 struct btrfs_trans_handle *trans;
2842 struct btrfs_root *root = BTRFS_I(dir)->root;
2843 struct btrfs_path *path;
2844 struct btrfs_key key;
2845 struct inode *inode;
2846 int err;
2847 int drop_inode = 0;
2848 u64 objectid;
2849 int name_len;
2850 int datasize;
2851 char *ptr;
2852 struct btrfs_file_extent_item *ei;
2853
2854 name_len = strlen(symname) + 1;
2855 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
2856 return -ENAMETOOLONG;
2857 mutex_lock(&root->fs_info->fs_mutex);
2858 trans = btrfs_start_transaction(root, 1);
2859 btrfs_set_trans_block_group(trans, dir);
2860
2861 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
2862 if (err) {
2863 err = -ENOSPC;
2864 goto out_unlock;
2865 }
2866
2867 inode = btrfs_new_inode(trans, root, objectid,
2868 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO);
2869 err = PTR_ERR(inode);
2870 if (IS_ERR(inode))
2871 goto out_unlock;
2872
2873 btrfs_set_trans_block_group(trans, inode);
2874 err = btrfs_add_nondir(trans, dentry, inode);
2875 if (err)
2876 drop_inode = 1;
2877 else {
2878 inode->i_mapping->a_ops = &btrfs_aops;
2879 inode->i_fop = &btrfs_file_operations;
2880 inode->i_op = &btrfs_file_inode_operations;
2881 }
2882 dir->i_sb->s_dirt = 1;
2883 btrfs_update_inode_block_group(trans, inode);
2884 btrfs_update_inode_block_group(trans, dir);
2885 if (drop_inode)
2886 goto out_unlock;
2887
2888 path = btrfs_alloc_path();
2889 BUG_ON(!path);
2890 key.objectid = inode->i_ino;
2891 key.offset = 0;
2892 key.flags = 0;
2893 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
2894 datasize = btrfs_file_extent_calc_inline_size(name_len);
2895 err = btrfs_insert_empty_item(trans, root, path, &key,
2896 datasize);
2897 BUG_ON(err);
2898 ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
2899 path->slots[0], struct btrfs_file_extent_item);
2900 btrfs_set_file_extent_generation(ei, trans->transid);
2901 btrfs_set_file_extent_type(ei,
2902 BTRFS_FILE_EXTENT_INLINE);
2903 ptr = btrfs_file_extent_inline_start(ei);
2904 btrfs_memcpy(root, path->nodes[0]->b_data,
2905 ptr, symname, name_len);
2906 mark_buffer_dirty(path->nodes[0]);
2907 btrfs_free_path(path);
2908 inode->i_op = &btrfs_symlink_inode_operations;
2909 inode->i_mapping->a_ops = &btrfs_symlink_aops;
2910 inode->i_size = name_len - 1;
2911 btrfs_update_inode(trans, root, inode);
2912 err = 0;
2913
2914out_unlock:
2915 btrfs_end_transaction(trans, root);
2916 mutex_unlock(&root->fs_info->fs_mutex);
2917
2918 if (drop_inode) {
2919 inode_dec_link_count(inode);
2920 iput(inode);
2921 }
2922 btrfs_btree_balance_dirty(root);
2923 return err;
2924}
2925
2926static struct file_system_type btrfs_fs_type = { 141static struct file_system_type btrfs_fs_type = {
2927 .owner = THIS_MODULE, 142 .owner = THIS_MODULE,
2928 .name = "btrfs", 143 .name = "btrfs",
@@ -2944,91 +159,21 @@ static struct super_operations btrfs_super_ops = {
2944 .statfs = btrfs_statfs, 159 .statfs = btrfs_statfs,
2945}; 160};
2946 161
2947static struct inode_operations btrfs_dir_inode_operations = {
2948 .lookup = btrfs_lookup,
2949 .create = btrfs_create,
2950 .unlink = btrfs_unlink,
2951 .link = btrfs_link,
2952 .mkdir = btrfs_mkdir,
2953 .rmdir = btrfs_rmdir,
2954 .rename = btrfs_rename,
2955 .symlink = btrfs_symlink,
2956 .setattr = btrfs_setattr,
2957};
2958
2959static struct inode_operations btrfs_dir_ro_inode_operations = {
2960 .lookup = btrfs_lookup,
2961};
2962
2963static struct file_operations btrfs_dir_file_operations = {
2964 .llseek = generic_file_llseek,
2965 .read = generic_read_dir,
2966 .readdir = btrfs_readdir,
2967 .ioctl = btrfs_ioctl,
2968#ifdef CONFIG_COMPAT
2969 .compat_ioctl = btrfs_compat_ioctl,
2970#endif
2971};
2972
2973static struct address_space_operations btrfs_aops = {
2974 .readpage = btrfs_readpage,
2975 .writepage = btrfs_writepage,
2976 .sync_page = block_sync_page,
2977 .prepare_write = btrfs_prepare_write,
2978 .commit_write = btrfs_commit_write,
2979 .bmap = btrfs_bmap,
2980};
2981
2982static struct address_space_operations btrfs_symlink_aops = {
2983 .readpage = btrfs_readpage,
2984 .writepage = btrfs_writepage,
2985};
2986
2987static struct inode_operations btrfs_file_inode_operations = {
2988 .truncate = btrfs_truncate,
2989 .getattr = btrfs_getattr,
2990 .setattr = btrfs_setattr,
2991};
2992
2993static struct file_operations btrfs_file_operations = {
2994 .llseek = generic_file_llseek,
2995 .read = do_sync_read,
2996 .aio_read = btrfs_file_aio_read,
2997 .write = btrfs_file_write,
2998 .mmap = generic_file_mmap,
2999 .open = generic_file_open,
3000 .ioctl = btrfs_ioctl,
3001 .fsync = btrfs_sync_file,
3002#ifdef CONFIG_COMPAT
3003 .compat_ioctl = btrfs_compat_ioctl,
3004#endif
3005};
3006
3007static struct inode_operations btrfs_symlink_inode_operations = {
3008 .readlink = generic_readlink,
3009 .follow_link = page_follow_link_light,
3010 .put_link = page_put_link,
3011};
3012
3013static int __init init_btrfs_fs(void) 162static int __init init_btrfs_fs(void)
3014{ 163{
3015 int err; 164 int err;
3016 printk("btrfs loaded!\n");
3017 btrfs_init_transaction_sys(); 165 btrfs_init_transaction_sys();
3018 err = init_inodecache(); 166 err = btrfs_init_cachep();
3019 if (err) 167 if (err)
3020 return err; 168 return err;
3021 return register_filesystem(&btrfs_fs_type); 169 return register_filesystem(&btrfs_fs_type);
3022 destroy_inodecache();
3023 return err;
3024} 170}
3025 171
3026static void __exit exit_btrfs_fs(void) 172static void __exit exit_btrfs_fs(void)
3027{ 173{
3028 btrfs_exit_transaction_sys(); 174 btrfs_exit_transaction_sys();
3029 destroy_inodecache(); 175 btrfs_destroy_cachep();
3030 unregister_filesystem(&btrfs_fs_type); 176 unregister_filesystem(&btrfs_fs_type);
3031 printk("btrfs unloaded\n");
3032} 177}
3033 178
3034module_init(init_btrfs_fs) 179module_init(init_btrfs_fs)