diff options
Diffstat (limited to 'fs')
85 files changed, 3419 insertions, 2522 deletions
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index 798cb071d132..3f57ce4bee5d 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c | |||
@@ -19,9 +19,6 @@ static int | |||
19 | adfs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh, | 19 | adfs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh, |
20 | int create) | 20 | int create) |
21 | { | 21 | { |
22 | if (block < 0) | ||
23 | goto abort_negative; | ||
24 | |||
25 | if (!create) { | 22 | if (!create) { |
26 | if (block >= inode->i_blocks) | 23 | if (block >= inode->i_blocks) |
27 | goto abort_toobig; | 24 | goto abort_toobig; |
@@ -34,10 +31,6 @@ adfs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh, | |||
34 | /* don't support allocation of blocks yet */ | 31 | /* don't support allocation of blocks yet */ |
35 | return -EIO; | 32 | return -EIO; |
36 | 33 | ||
37 | abort_negative: | ||
38 | adfs_error(inode->i_sb, "block %d < 0", block); | ||
39 | return -EIO; | ||
40 | |||
41 | abort_toobig: | 34 | abort_toobig: |
42 | return 0; | 35 | return 0; |
43 | } | 36 | } |
@@ -18,7 +18,7 @@ | |||
18 | /* Taken over from the old code... */ | 18 | /* Taken over from the old code... */ |
19 | 19 | ||
20 | /* POSIX UID/GID verification for setting inode attributes. */ | 20 | /* POSIX UID/GID verification for setting inode attributes. */ |
21 | int inode_change_ok(struct inode *inode, struct iattr *attr) | 21 | int inode_change_ok(const struct inode *inode, struct iattr *attr) |
22 | { | 22 | { |
23 | int retval = -EPERM; | 23 | int retval = -EPERM; |
24 | unsigned int ia_valid = attr->ia_valid; | 24 | unsigned int ia_valid = attr->ia_valid; |
@@ -60,9 +60,51 @@ fine: | |||
60 | error: | 60 | error: |
61 | return retval; | 61 | return retval; |
62 | } | 62 | } |
63 | |||
64 | EXPORT_SYMBOL(inode_change_ok); | 63 | EXPORT_SYMBOL(inode_change_ok); |
65 | 64 | ||
65 | /** | ||
66 | * inode_newsize_ok - may this inode be truncated to a given size | ||
67 | * @inode: the inode to be truncated | ||
68 | * @offset: the new size to assign to the inode | ||
69 | * @Returns: 0 on success, -ve errno on failure | ||
70 | * | ||
71 | * inode_newsize_ok will check filesystem limits and ulimits to check that the | ||
72 | * new inode size is within limits. inode_newsize_ok will also send SIGXFSZ | ||
73 | * when necessary. Caller must not proceed with inode size change if failure is | ||
74 | * returned. @inode must be a file (not directory), with appropriate | ||
75 | * permissions to allow truncate (inode_newsize_ok does NOT check these | ||
76 | * conditions). | ||
77 | * | ||
78 | * inode_newsize_ok must be called with i_mutex held. | ||
79 | */ | ||
80 | int inode_newsize_ok(const struct inode *inode, loff_t offset) | ||
81 | { | ||
82 | if (inode->i_size < offset) { | ||
83 | unsigned long limit; | ||
84 | |||
85 | limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; | ||
86 | if (limit != RLIM_INFINITY && offset > limit) | ||
87 | goto out_sig; | ||
88 | if (offset > inode->i_sb->s_maxbytes) | ||
89 | goto out_big; | ||
90 | } else { | ||
91 | /* | ||
92 | * truncation of in-use swapfiles is disallowed - it would | ||
93 | * cause subsequent swapout to scribble on the now-freed | ||
94 | * blocks. | ||
95 | */ | ||
96 | if (IS_SWAPFILE(inode)) | ||
97 | return -ETXTBSY; | ||
98 | } | ||
99 | |||
100 | return 0; | ||
101 | out_sig: | ||
102 | send_sig(SIGXFSZ, current, 0); | ||
103 | out_big: | ||
104 | return -EFBIG; | ||
105 | } | ||
106 | EXPORT_SYMBOL(inode_newsize_ok); | ||
107 | |||
66 | int inode_setattr(struct inode * inode, struct iattr * attr) | 108 | int inode_setattr(struct inode * inode, struct iattr * attr) |
67 | { | 109 | { |
68 | unsigned int ia_valid = attr->ia_valid; | 110 | unsigned int ia_valid = attr->ia_valid; |
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index dd376c124e71..33baf27fac78 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c | |||
@@ -737,12 +737,7 @@ befs_put_super(struct super_block *sb) | |||
737 | { | 737 | { |
738 | kfree(BEFS_SB(sb)->mount_opts.iocharset); | 738 | kfree(BEFS_SB(sb)->mount_opts.iocharset); |
739 | BEFS_SB(sb)->mount_opts.iocharset = NULL; | 739 | BEFS_SB(sb)->mount_opts.iocharset = NULL; |
740 | 740 | unload_nls(BEFS_SB(sb)->nls); | |
741 | if (BEFS_SB(sb)->nls) { | ||
742 | unload_nls(BEFS_SB(sb)->nls); | ||
743 | BEFS_SB(sb)->nls = NULL; | ||
744 | } | ||
745 | |||
746 | kfree(sb->s_fs_info); | 741 | kfree(sb->s_fs_info); |
747 | sb->s_fs_info = NULL; | 742 | sb->s_fs_info = NULL; |
748 | } | 743 | } |
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 442d94fe255c..b9b3bb51b1e4 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
@@ -1711,42 +1711,52 @@ struct elf_note_info { | |||
1711 | int numnote; | 1711 | int numnote; |
1712 | }; | 1712 | }; |
1713 | 1713 | ||
1714 | static int fill_note_info(struct elfhdr *elf, int phdrs, | 1714 | static int elf_note_info_init(struct elf_note_info *info) |
1715 | struct elf_note_info *info, | ||
1716 | long signr, struct pt_regs *regs) | ||
1717 | { | 1715 | { |
1718 | #define NUM_NOTES 6 | 1716 | memset(info, 0, sizeof(*info)); |
1719 | struct list_head *t; | ||
1720 | |||
1721 | info->notes = NULL; | ||
1722 | info->prstatus = NULL; | ||
1723 | info->psinfo = NULL; | ||
1724 | info->fpu = NULL; | ||
1725 | #ifdef ELF_CORE_COPY_XFPREGS | ||
1726 | info->xfpu = NULL; | ||
1727 | #endif | ||
1728 | INIT_LIST_HEAD(&info->thread_list); | 1717 | INIT_LIST_HEAD(&info->thread_list); |
1729 | 1718 | ||
1730 | info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), | 1719 | /* Allocate space for six ELF notes */ |
1731 | GFP_KERNEL); | 1720 | info->notes = kmalloc(6 * sizeof(struct memelfnote), GFP_KERNEL); |
1732 | if (!info->notes) | 1721 | if (!info->notes) |
1733 | return 0; | 1722 | return 0; |
1734 | info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL); | 1723 | info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL); |
1735 | if (!info->psinfo) | 1724 | if (!info->psinfo) |
1736 | return 0; | 1725 | goto notes_free; |
1737 | info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL); | 1726 | info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL); |
1738 | if (!info->prstatus) | 1727 | if (!info->prstatus) |
1739 | return 0; | 1728 | goto psinfo_free; |
1740 | info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL); | 1729 | info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL); |
1741 | if (!info->fpu) | 1730 | if (!info->fpu) |
1742 | return 0; | 1731 | goto prstatus_free; |
1743 | #ifdef ELF_CORE_COPY_XFPREGS | 1732 | #ifdef ELF_CORE_COPY_XFPREGS |
1744 | info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL); | 1733 | info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL); |
1745 | if (!info->xfpu) | 1734 | if (!info->xfpu) |
1746 | return 0; | 1735 | goto fpu_free; |
1736 | #endif | ||
1737 | return 1; | ||
1738 | #ifdef ELF_CORE_COPY_XFPREGS | ||
1739 | fpu_free: | ||
1740 | kfree(info->fpu); | ||
1747 | #endif | 1741 | #endif |
1742 | prstatus_free: | ||
1743 | kfree(info->prstatus); | ||
1744 | psinfo_free: | ||
1745 | kfree(info->psinfo); | ||
1746 | notes_free: | ||
1747 | kfree(info->notes); | ||
1748 | return 0; | ||
1749 | } | ||
1750 | |||
1751 | static int fill_note_info(struct elfhdr *elf, int phdrs, | ||
1752 | struct elf_note_info *info, | ||
1753 | long signr, struct pt_regs *regs) | ||
1754 | { | ||
1755 | struct list_head *t; | ||
1756 | |||
1757 | if (!elf_note_info_init(info)) | ||
1758 | return 0; | ||
1748 | 1759 | ||
1749 | info->thread_status_size = 0; | ||
1750 | if (signr) { | 1760 | if (signr) { |
1751 | struct core_thread *ct; | 1761 | struct core_thread *ct; |
1752 | struct elf_thread_status *ets; | 1762 | struct elf_thread_status *ets; |
@@ -1806,8 +1816,6 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, | |||
1806 | #endif | 1816 | #endif |
1807 | 1817 | ||
1808 | return 1; | 1818 | return 1; |
1809 | |||
1810 | #undef NUM_NOTES | ||
1811 | } | 1819 | } |
1812 | 1820 | ||
1813 | static size_t get_note_info_size(struct elf_note_info *info) | 1821 | static size_t get_note_info_size(struct elf_note_info *info) |
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 76285471073e..38502c67987c 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c | |||
@@ -283,20 +283,23 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, | |||
283 | } | 283 | } |
284 | 284 | ||
285 | stack_size = exec_params.stack_size; | 285 | stack_size = exec_params.stack_size; |
286 | if (stack_size < interp_params.stack_size) | ||
287 | stack_size = interp_params.stack_size; | ||
288 | |||
289 | if (exec_params.flags & ELF_FDPIC_FLAG_EXEC_STACK) | 286 | if (exec_params.flags & ELF_FDPIC_FLAG_EXEC_STACK) |
290 | executable_stack = EXSTACK_ENABLE_X; | 287 | executable_stack = EXSTACK_ENABLE_X; |
291 | else if (exec_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK) | 288 | else if (exec_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK) |
292 | executable_stack = EXSTACK_DISABLE_X; | 289 | executable_stack = EXSTACK_DISABLE_X; |
293 | else if (interp_params.flags & ELF_FDPIC_FLAG_EXEC_STACK) | ||
294 | executable_stack = EXSTACK_ENABLE_X; | ||
295 | else if (interp_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK) | ||
296 | executable_stack = EXSTACK_DISABLE_X; | ||
297 | else | 290 | else |
298 | executable_stack = EXSTACK_DEFAULT; | 291 | executable_stack = EXSTACK_DEFAULT; |
299 | 292 | ||
293 | if (stack_size == 0) { | ||
294 | stack_size = interp_params.stack_size; | ||
295 | if (interp_params.flags & ELF_FDPIC_FLAG_EXEC_STACK) | ||
296 | executable_stack = EXSTACK_ENABLE_X; | ||
297 | else if (interp_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK) | ||
298 | executable_stack = EXSTACK_DISABLE_X; | ||
299 | else | ||
300 | executable_stack = EXSTACK_DEFAULT; | ||
301 | } | ||
302 | |||
300 | retval = -ENOEXEC; | 303 | retval = -ENOEXEC; |
301 | if (stack_size == 0) | 304 | if (stack_size == 0) |
302 | goto error; | 305 | goto error; |
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index e92f229e3c6e..a2796651e756 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c | |||
@@ -278,8 +278,6 @@ static int decompress_exec( | |||
278 | ret = bprm->file->f_op->read(bprm->file, buf, LBUFSIZE, &fpos); | 278 | ret = bprm->file->f_op->read(bprm->file, buf, LBUFSIZE, &fpos); |
279 | if (ret <= 0) | 279 | if (ret <= 0) |
280 | break; | 280 | break; |
281 | if (ret >= (unsigned long) -4096) | ||
282 | break; | ||
283 | len -= ret; | 281 | len -= ret; |
284 | 282 | ||
285 | strm.next_in = buf; | 283 | strm.next_in = buf; |
@@ -335,7 +333,7 @@ calc_reloc(unsigned long r, struct lib_info *p, int curid, int internalp) | |||
335 | "(%d != %d)", (unsigned) r, curid, id); | 333 | "(%d != %d)", (unsigned) r, curid, id); |
336 | goto failed; | 334 | goto failed; |
337 | } else if ( ! p->lib_list[id].loaded && | 335 | } else if ( ! p->lib_list[id].loaded && |
338 | load_flat_shared_library(id, p) > (unsigned long) -4096) { | 336 | IS_ERR_VALUE(load_flat_shared_library(id, p))) { |
339 | printk("BINFMT_FLAT: failed to load library %d", id); | 337 | printk("BINFMT_FLAT: failed to load library %d", id); |
340 | goto failed; | 338 | goto failed; |
341 | } | 339 | } |
@@ -545,7 +543,7 @@ static int load_flat_file(struct linux_binprm * bprm, | |||
545 | textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, | 543 | textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, |
546 | MAP_PRIVATE|MAP_EXECUTABLE, 0); | 544 | MAP_PRIVATE|MAP_EXECUTABLE, 0); |
547 | up_write(¤t->mm->mmap_sem); | 545 | up_write(¤t->mm->mmap_sem); |
548 | if (!textpos || textpos >= (unsigned long) -4096) { | 546 | if (!textpos || IS_ERR_VALUE(textpos)) { |
549 | if (!textpos) | 547 | if (!textpos) |
550 | textpos = (unsigned long) -ENOMEM; | 548 | textpos = (unsigned long) -ENOMEM; |
551 | printk("Unable to mmap process text, errno %d\n", (int)-textpos); | 549 | printk("Unable to mmap process text, errno %d\n", (int)-textpos); |
@@ -560,7 +558,7 @@ static int load_flat_file(struct linux_binprm * bprm, | |||
560 | PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0); | 558 | PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0); |
561 | up_write(¤t->mm->mmap_sem); | 559 | up_write(¤t->mm->mmap_sem); |
562 | 560 | ||
563 | if (realdatastart == 0 || realdatastart >= (unsigned long)-4096) { | 561 | if (realdatastart == 0 || IS_ERR_VALUE(realdatastart)) { |
564 | if (!realdatastart) | 562 | if (!realdatastart) |
565 | realdatastart = (unsigned long) -ENOMEM; | 563 | realdatastart = (unsigned long) -ENOMEM; |
566 | printk("Unable to allocate RAM for process data, errno %d\n", | 564 | printk("Unable to allocate RAM for process data, errno %d\n", |
@@ -587,7 +585,7 @@ static int load_flat_file(struct linux_binprm * bprm, | |||
587 | result = bprm->file->f_op->read(bprm->file, (char *) datapos, | 585 | result = bprm->file->f_op->read(bprm->file, (char *) datapos, |
588 | data_len + (relocs * sizeof(unsigned long)), &fpos); | 586 | data_len + (relocs * sizeof(unsigned long)), &fpos); |
589 | } | 587 | } |
590 | if (result >= (unsigned long)-4096) { | 588 | if (IS_ERR_VALUE(result)) { |
591 | printk("Unable to read data+bss, errno %d\n", (int)-result); | 589 | printk("Unable to read data+bss, errno %d\n", (int)-result); |
592 | do_munmap(current->mm, textpos, text_len); | 590 | do_munmap(current->mm, textpos, text_len); |
593 | do_munmap(current->mm, realdatastart, data_len + extra); | 591 | do_munmap(current->mm, realdatastart, data_len + extra); |
@@ -607,7 +605,7 @@ static int load_flat_file(struct linux_binprm * bprm, | |||
607 | PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0); | 605 | PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0); |
608 | up_write(¤t->mm->mmap_sem); | 606 | up_write(¤t->mm->mmap_sem); |
609 | 607 | ||
610 | if (!textpos || textpos >= (unsigned long) -4096) { | 608 | if (!textpos || IS_ERR_VALUE(textpos)) { |
611 | if (!textpos) | 609 | if (!textpos) |
612 | textpos = (unsigned long) -ENOMEM; | 610 | textpos = (unsigned long) -ENOMEM; |
613 | printk("Unable to allocate RAM for process text/data, errno %d\n", | 611 | printk("Unable to allocate RAM for process text/data, errno %d\n", |
@@ -641,7 +639,7 @@ static int load_flat_file(struct linux_binprm * bprm, | |||
641 | fpos = 0; | 639 | fpos = 0; |
642 | result = bprm->file->f_op->read(bprm->file, | 640 | result = bprm->file->f_op->read(bprm->file, |
643 | (char *) textpos, text_len, &fpos); | 641 | (char *) textpos, text_len, &fpos); |
644 | if (result < (unsigned long) -4096) | 642 | if (!IS_ERR_VALUE(result)) |
645 | result = decompress_exec(bprm, text_len, (char *) datapos, | 643 | result = decompress_exec(bprm, text_len, (char *) datapos, |
646 | data_len + (relocs * sizeof(unsigned long)), 0); | 644 | data_len + (relocs * sizeof(unsigned long)), 0); |
647 | } | 645 | } |
@@ -651,13 +649,13 @@ static int load_flat_file(struct linux_binprm * bprm, | |||
651 | fpos = 0; | 649 | fpos = 0; |
652 | result = bprm->file->f_op->read(bprm->file, | 650 | result = bprm->file->f_op->read(bprm->file, |
653 | (char *) textpos, text_len, &fpos); | 651 | (char *) textpos, text_len, &fpos); |
654 | if (result < (unsigned long) -4096) { | 652 | if (!IS_ERR_VALUE(result)) { |
655 | fpos = ntohl(hdr->data_start); | 653 | fpos = ntohl(hdr->data_start); |
656 | result = bprm->file->f_op->read(bprm->file, (char *) datapos, | 654 | result = bprm->file->f_op->read(bprm->file, (char *) datapos, |
657 | data_len + (relocs * sizeof(unsigned long)), &fpos); | 655 | data_len + (relocs * sizeof(unsigned long)), &fpos); |
658 | } | 656 | } |
659 | } | 657 | } |
660 | if (result >= (unsigned long)-4096) { | 658 | if (IS_ERR_VALUE(result)) { |
661 | printk("Unable to read code+data+bss, errno %d\n",(int)-result); | 659 | printk("Unable to read code+data+bss, errno %d\n",(int)-result); |
662 | do_munmap(current->mm, textpos, text_len + data_len + extra + | 660 | do_munmap(current->mm, textpos, text_len + data_len + extra + |
663 | MAX_SHARED_LIBS * sizeof(unsigned long)); | 661 | MAX_SHARED_LIBS * sizeof(unsigned long)); |
@@ -835,7 +833,7 @@ static int load_flat_shared_library(int id, struct lib_info *libs) | |||
835 | 833 | ||
836 | res = prepare_binprm(&bprm); | 834 | res = prepare_binprm(&bprm); |
837 | 835 | ||
838 | if (res <= (unsigned long)-4096) | 836 | if (!IS_ERR_VALUE(res)) |
839 | res = load_flat_file(&bprm, libs, id, NULL); | 837 | res = load_flat_file(&bprm, libs, id, NULL); |
840 | 838 | ||
841 | abort_creds(bprm.cred); | 839 | abort_creds(bprm.cred); |
@@ -880,7 +878,7 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs) | |||
880 | stack_len += FLAT_DATA_ALIGN - 1; /* reserve for upcoming alignment */ | 878 | stack_len += FLAT_DATA_ALIGN - 1; /* reserve for upcoming alignment */ |
881 | 879 | ||
882 | res = load_flat_file(bprm, &libinfo, 0, &stack_len); | 880 | res = load_flat_file(bprm, &libinfo, 0, &stack_len); |
883 | if (res > (unsigned long)-4096) | 881 | if (IS_ERR_VALUE(res)) |
884 | return res; | 882 | return res; |
885 | 883 | ||
886 | /* Update data segment pointers for all libraries */ | 884 | /* Update data segment pointers for all libraries */ |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 5d1ed50bd46c..9cf4b926f8e4 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -216,8 +216,6 @@ EXPORT_SYMBOL(fsync_bdev); | |||
216 | * freeze_bdev -- lock a filesystem and force it into a consistent state | 216 | * freeze_bdev -- lock a filesystem and force it into a consistent state |
217 | * @bdev: blockdevice to lock | 217 | * @bdev: blockdevice to lock |
218 | * | 218 | * |
219 | * This takes the block device bd_mount_sem to make sure no new mounts | ||
220 | * happen on bdev until thaw_bdev() is called. | ||
221 | * If a superblock is found on this device, we take the s_umount semaphore | 219 | * If a superblock is found on this device, we take the s_umount semaphore |
222 | * on it to make sure nobody unmounts until the snapshot creation is done. | 220 | * on it to make sure nobody unmounts until the snapshot creation is done. |
223 | * The reference counter (bd_fsfreeze_count) guarantees that only the last | 221 | * The reference counter (bd_fsfreeze_count) guarantees that only the last |
@@ -232,46 +230,55 @@ struct super_block *freeze_bdev(struct block_device *bdev) | |||
232 | int error = 0; | 230 | int error = 0; |
233 | 231 | ||
234 | mutex_lock(&bdev->bd_fsfreeze_mutex); | 232 | mutex_lock(&bdev->bd_fsfreeze_mutex); |
235 | if (bdev->bd_fsfreeze_count > 0) { | 233 | if (++bdev->bd_fsfreeze_count > 1) { |
236 | bdev->bd_fsfreeze_count++; | 234 | /* |
235 | * We don't even need to grab a reference - the first call | ||
236 | * to freeze_bdev grab an active reference and only the last | ||
237 | * thaw_bdev drops it. | ||
238 | */ | ||
237 | sb = get_super(bdev); | 239 | sb = get_super(bdev); |
240 | drop_super(sb); | ||
238 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | 241 | mutex_unlock(&bdev->bd_fsfreeze_mutex); |
239 | return sb; | 242 | return sb; |
240 | } | 243 | } |
241 | bdev->bd_fsfreeze_count++; | 244 | |
242 | 245 | sb = get_active_super(bdev); | |
243 | down(&bdev->bd_mount_sem); | 246 | if (!sb) |
244 | sb = get_super(bdev); | 247 | goto out; |
245 | if (sb && !(sb->s_flags & MS_RDONLY)) { | 248 | if (sb->s_flags & MS_RDONLY) { |
246 | sb->s_frozen = SB_FREEZE_WRITE; | 249 | deactivate_locked_super(sb); |
247 | smp_wmb(); | 250 | mutex_unlock(&bdev->bd_fsfreeze_mutex); |
248 | 251 | return sb; | |
249 | sync_filesystem(sb); | 252 | } |
250 | 253 | ||
251 | sb->s_frozen = SB_FREEZE_TRANS; | 254 | sb->s_frozen = SB_FREEZE_WRITE; |
252 | smp_wmb(); | 255 | smp_wmb(); |
253 | 256 | ||
254 | sync_blockdev(sb->s_bdev); | 257 | sync_filesystem(sb); |
255 | 258 | ||
256 | if (sb->s_op->freeze_fs) { | 259 | sb->s_frozen = SB_FREEZE_TRANS; |
257 | error = sb->s_op->freeze_fs(sb); | 260 | smp_wmb(); |
258 | if (error) { | 261 | |
259 | printk(KERN_ERR | 262 | sync_blockdev(sb->s_bdev); |
260 | "VFS:Filesystem freeze failed\n"); | 263 | |
261 | sb->s_frozen = SB_UNFROZEN; | 264 | if (sb->s_op->freeze_fs) { |
262 | drop_super(sb); | 265 | error = sb->s_op->freeze_fs(sb); |
263 | up(&bdev->bd_mount_sem); | 266 | if (error) { |
264 | bdev->bd_fsfreeze_count--; | 267 | printk(KERN_ERR |
265 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | 268 | "VFS:Filesystem freeze failed\n"); |
266 | return ERR_PTR(error); | 269 | sb->s_frozen = SB_UNFROZEN; |
267 | } | 270 | deactivate_locked_super(sb); |
271 | bdev->bd_fsfreeze_count--; | ||
272 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | ||
273 | return ERR_PTR(error); | ||
268 | } | 274 | } |
269 | } | 275 | } |
276 | up_write(&sb->s_umount); | ||
270 | 277 | ||
278 | out: | ||
271 | sync_blockdev(bdev); | 279 | sync_blockdev(bdev); |
272 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | 280 | mutex_unlock(&bdev->bd_fsfreeze_mutex); |
273 | 281 | return sb; /* thaw_bdev releases s->s_umount */ | |
274 | return sb; /* thaw_bdev releases s->s_umount and bd_mount_sem */ | ||
275 | } | 282 | } |
276 | EXPORT_SYMBOL(freeze_bdev); | 283 | EXPORT_SYMBOL(freeze_bdev); |
277 | 284 | ||
@@ -284,44 +291,44 @@ EXPORT_SYMBOL(freeze_bdev); | |||
284 | */ | 291 | */ |
285 | int thaw_bdev(struct block_device *bdev, struct super_block *sb) | 292 | int thaw_bdev(struct block_device *bdev, struct super_block *sb) |
286 | { | 293 | { |
287 | int error = 0; | 294 | int error = -EINVAL; |
288 | 295 | ||
289 | mutex_lock(&bdev->bd_fsfreeze_mutex); | 296 | mutex_lock(&bdev->bd_fsfreeze_mutex); |
290 | if (!bdev->bd_fsfreeze_count) { | 297 | if (!bdev->bd_fsfreeze_count) |
291 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | 298 | goto out_unlock; |
292 | return -EINVAL; | 299 | |
293 | } | 300 | error = 0; |
294 | 301 | if (--bdev->bd_fsfreeze_count > 0) | |
295 | bdev->bd_fsfreeze_count--; | 302 | goto out_unlock; |
296 | if (bdev->bd_fsfreeze_count > 0) { | 303 | |
297 | if (sb) | 304 | if (!sb) |
298 | drop_super(sb); | 305 | goto out_unlock; |
299 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | 306 | |
300 | return 0; | 307 | BUG_ON(sb->s_bdev != bdev); |
301 | } | 308 | down_write(&sb->s_umount); |
302 | 309 | if (sb->s_flags & MS_RDONLY) | |
303 | if (sb) { | 310 | goto out_deactivate; |
304 | BUG_ON(sb->s_bdev != bdev); | 311 | |
305 | if (!(sb->s_flags & MS_RDONLY)) { | 312 | if (sb->s_op->unfreeze_fs) { |
306 | if (sb->s_op->unfreeze_fs) { | 313 | error = sb->s_op->unfreeze_fs(sb); |
307 | error = sb->s_op->unfreeze_fs(sb); | 314 | if (error) { |
308 | if (error) { | 315 | printk(KERN_ERR |
309 | printk(KERN_ERR | 316 | "VFS:Filesystem thaw failed\n"); |
310 | "VFS:Filesystem thaw failed\n"); | 317 | sb->s_frozen = SB_FREEZE_TRANS; |
311 | sb->s_frozen = SB_FREEZE_TRANS; | 318 | bdev->bd_fsfreeze_count++; |
312 | bdev->bd_fsfreeze_count++; | 319 | mutex_unlock(&bdev->bd_fsfreeze_mutex); |
313 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | 320 | return error; |
314 | return error; | ||
315 | } | ||
316 | } | ||
317 | sb->s_frozen = SB_UNFROZEN; | ||
318 | smp_wmb(); | ||
319 | wake_up(&sb->s_wait_unfrozen); | ||
320 | } | 321 | } |
321 | drop_super(sb); | ||
322 | } | 322 | } |
323 | 323 | ||
324 | up(&bdev->bd_mount_sem); | 324 | sb->s_frozen = SB_UNFROZEN; |
325 | smp_wmb(); | ||
326 | wake_up(&sb->s_wait_unfrozen); | ||
327 | |||
328 | out_deactivate: | ||
329 | if (sb) | ||
330 | deactivate_locked_super(sb); | ||
331 | out_unlock: | ||
325 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | 332 | mutex_unlock(&bdev->bd_fsfreeze_mutex); |
326 | return 0; | 333 | return 0; |
327 | } | 334 | } |
@@ -430,7 +437,6 @@ static void init_once(void *foo) | |||
430 | 437 | ||
431 | memset(bdev, 0, sizeof(*bdev)); | 438 | memset(bdev, 0, sizeof(*bdev)); |
432 | mutex_init(&bdev->bd_mutex); | 439 | mutex_init(&bdev->bd_mutex); |
433 | sema_init(&bdev->bd_mount_sem, 1); | ||
434 | INIT_LIST_HEAD(&bdev->bd_inodes); | 440 | INIT_LIST_HEAD(&bdev->bd_inodes); |
435 | INIT_LIST_HEAD(&bdev->bd_list); | 441 | INIT_LIST_HEAD(&bdev->bd_list); |
436 | #ifdef CONFIG_SYSFS | 442 | #ifdef CONFIG_SYSFS |
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 019e8af449ab..282ca085c2fb 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
@@ -48,6 +48,9 @@ struct btrfs_worker_thread { | |||
48 | /* number of things on the pending list */ | 48 | /* number of things on the pending list */ |
49 | atomic_t num_pending; | 49 | atomic_t num_pending; |
50 | 50 | ||
51 | /* reference counter for this struct */ | ||
52 | atomic_t refs; | ||
53 | |||
51 | unsigned long sequence; | 54 | unsigned long sequence; |
52 | 55 | ||
53 | /* protects the pending list. */ | 56 | /* protects the pending list. */ |
@@ -71,7 +74,12 @@ static void check_idle_worker(struct btrfs_worker_thread *worker) | |||
71 | unsigned long flags; | 74 | unsigned long flags; |
72 | spin_lock_irqsave(&worker->workers->lock, flags); | 75 | spin_lock_irqsave(&worker->workers->lock, flags); |
73 | worker->idle = 1; | 76 | worker->idle = 1; |
74 | list_move(&worker->worker_list, &worker->workers->idle_list); | 77 | |
78 | /* the list may be empty if the worker is just starting */ | ||
79 | if (!list_empty(&worker->worker_list)) { | ||
80 | list_move(&worker->worker_list, | ||
81 | &worker->workers->idle_list); | ||
82 | } | ||
75 | spin_unlock_irqrestore(&worker->workers->lock, flags); | 83 | spin_unlock_irqrestore(&worker->workers->lock, flags); |
76 | } | 84 | } |
77 | } | 85 | } |
@@ -87,23 +95,49 @@ static void check_busy_worker(struct btrfs_worker_thread *worker) | |||
87 | unsigned long flags; | 95 | unsigned long flags; |
88 | spin_lock_irqsave(&worker->workers->lock, flags); | 96 | spin_lock_irqsave(&worker->workers->lock, flags); |
89 | worker->idle = 0; | 97 | worker->idle = 0; |
90 | list_move_tail(&worker->worker_list, | 98 | |
91 | &worker->workers->worker_list); | 99 | if (!list_empty(&worker->worker_list)) { |
100 | list_move_tail(&worker->worker_list, | ||
101 | &worker->workers->worker_list); | ||
102 | } | ||
92 | spin_unlock_irqrestore(&worker->workers->lock, flags); | 103 | spin_unlock_irqrestore(&worker->workers->lock, flags); |
93 | } | 104 | } |
94 | } | 105 | } |
95 | 106 | ||
96 | static noinline int run_ordered_completions(struct btrfs_workers *workers, | 107 | static void check_pending_worker_creates(struct btrfs_worker_thread *worker) |
97 | struct btrfs_work *work) | ||
98 | { | 108 | { |
109 | struct btrfs_workers *workers = worker->workers; | ||
99 | unsigned long flags; | 110 | unsigned long flags; |
100 | 111 | ||
112 | rmb(); | ||
113 | if (!workers->atomic_start_pending) | ||
114 | return; | ||
115 | |||
116 | spin_lock_irqsave(&workers->lock, flags); | ||
117 | if (!workers->atomic_start_pending) | ||
118 | goto out; | ||
119 | |||
120 | workers->atomic_start_pending = 0; | ||
121 | if (workers->num_workers >= workers->max_workers) | ||
122 | goto out; | ||
123 | |||
124 | spin_unlock_irqrestore(&workers->lock, flags); | ||
125 | btrfs_start_workers(workers, 1); | ||
126 | return; | ||
127 | |||
128 | out: | ||
129 | spin_unlock_irqrestore(&workers->lock, flags); | ||
130 | } | ||
131 | |||
132 | static noinline int run_ordered_completions(struct btrfs_workers *workers, | ||
133 | struct btrfs_work *work) | ||
134 | { | ||
101 | if (!workers->ordered) | 135 | if (!workers->ordered) |
102 | return 0; | 136 | return 0; |
103 | 137 | ||
104 | set_bit(WORK_DONE_BIT, &work->flags); | 138 | set_bit(WORK_DONE_BIT, &work->flags); |
105 | 139 | ||
106 | spin_lock_irqsave(&workers->lock, flags); | 140 | spin_lock(&workers->order_lock); |
107 | 141 | ||
108 | while (1) { | 142 | while (1) { |
109 | if (!list_empty(&workers->prio_order_list)) { | 143 | if (!list_empty(&workers->prio_order_list)) { |
@@ -126,45 +160,118 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers, | |||
126 | if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags)) | 160 | if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags)) |
127 | break; | 161 | break; |
128 | 162 | ||
129 | spin_unlock_irqrestore(&workers->lock, flags); | 163 | spin_unlock(&workers->order_lock); |
130 | 164 | ||
131 | work->ordered_func(work); | 165 | work->ordered_func(work); |
132 | 166 | ||
133 | /* now take the lock again and call the freeing code */ | 167 | /* now take the lock again and call the freeing code */ |
134 | spin_lock_irqsave(&workers->lock, flags); | 168 | spin_lock(&workers->order_lock); |
135 | list_del(&work->order_list); | 169 | list_del(&work->order_list); |
136 | work->ordered_free(work); | 170 | work->ordered_free(work); |
137 | } | 171 | } |
138 | 172 | ||
139 | spin_unlock_irqrestore(&workers->lock, flags); | 173 | spin_unlock(&workers->order_lock); |
140 | return 0; | 174 | return 0; |
141 | } | 175 | } |
142 | 176 | ||
177 | static void put_worker(struct btrfs_worker_thread *worker) | ||
178 | { | ||
179 | if (atomic_dec_and_test(&worker->refs)) | ||
180 | kfree(worker); | ||
181 | } | ||
182 | |||
183 | static int try_worker_shutdown(struct btrfs_worker_thread *worker) | ||
184 | { | ||
185 | int freeit = 0; | ||
186 | |||
187 | spin_lock_irq(&worker->lock); | ||
188 | spin_lock(&worker->workers->lock); | ||
189 | if (worker->workers->num_workers > 1 && | ||
190 | worker->idle && | ||
191 | !worker->working && | ||
192 | !list_empty(&worker->worker_list) && | ||
193 | list_empty(&worker->prio_pending) && | ||
194 | list_empty(&worker->pending) && | ||
195 | atomic_read(&worker->num_pending) == 0) { | ||
196 | freeit = 1; | ||
197 | list_del_init(&worker->worker_list); | ||
198 | worker->workers->num_workers--; | ||
199 | } | ||
200 | spin_unlock(&worker->workers->lock); | ||
201 | spin_unlock_irq(&worker->lock); | ||
202 | |||
203 | if (freeit) | ||
204 | put_worker(worker); | ||
205 | return freeit; | ||
206 | } | ||
207 | |||
208 | static struct btrfs_work *get_next_work(struct btrfs_worker_thread *worker, | ||
209 | struct list_head *prio_head, | ||
210 | struct list_head *head) | ||
211 | { | ||
212 | struct btrfs_work *work = NULL; | ||
213 | struct list_head *cur = NULL; | ||
214 | |||
215 | if(!list_empty(prio_head)) | ||
216 | cur = prio_head->next; | ||
217 | |||
218 | smp_mb(); | ||
219 | if (!list_empty(&worker->prio_pending)) | ||
220 | goto refill; | ||
221 | |||
222 | if (!list_empty(head)) | ||
223 | cur = head->next; | ||
224 | |||
225 | if (cur) | ||
226 | goto out; | ||
227 | |||
228 | refill: | ||
229 | spin_lock_irq(&worker->lock); | ||
230 | list_splice_tail_init(&worker->prio_pending, prio_head); | ||
231 | list_splice_tail_init(&worker->pending, head); | ||
232 | |||
233 | if (!list_empty(prio_head)) | ||
234 | cur = prio_head->next; | ||
235 | else if (!list_empty(head)) | ||
236 | cur = head->next; | ||
237 | spin_unlock_irq(&worker->lock); | ||
238 | |||
239 | if (!cur) | ||
240 | goto out_fail; | ||
241 | |||
242 | out: | ||
243 | work = list_entry(cur, struct btrfs_work, list); | ||
244 | |||
245 | out_fail: | ||
246 | return work; | ||
247 | } | ||
248 | |||
143 | /* | 249 | /* |
144 | * main loop for servicing work items | 250 | * main loop for servicing work items |
145 | */ | 251 | */ |
146 | static int worker_loop(void *arg) | 252 | static int worker_loop(void *arg) |
147 | { | 253 | { |
148 | struct btrfs_worker_thread *worker = arg; | 254 | struct btrfs_worker_thread *worker = arg; |
149 | struct list_head *cur; | 255 | struct list_head head; |
256 | struct list_head prio_head; | ||
150 | struct btrfs_work *work; | 257 | struct btrfs_work *work; |
258 | |||
259 | INIT_LIST_HEAD(&head); | ||
260 | INIT_LIST_HEAD(&prio_head); | ||
261 | |||
151 | do { | 262 | do { |
152 | spin_lock_irq(&worker->lock); | 263 | again: |
153 | again_locked: | ||
154 | while (1) { | 264 | while (1) { |
155 | if (!list_empty(&worker->prio_pending)) | 265 | |
156 | cur = worker->prio_pending.next; | 266 | |
157 | else if (!list_empty(&worker->pending)) | 267 | work = get_next_work(worker, &prio_head, &head); |
158 | cur = worker->pending.next; | 268 | if (!work) |
159 | else | ||
160 | break; | 269 | break; |
161 | 270 | ||
162 | work = list_entry(cur, struct btrfs_work, list); | ||
163 | list_del(&work->list); | 271 | list_del(&work->list); |
164 | clear_bit(WORK_QUEUED_BIT, &work->flags); | 272 | clear_bit(WORK_QUEUED_BIT, &work->flags); |
165 | 273 | ||
166 | work->worker = worker; | 274 | work->worker = worker; |
167 | spin_unlock_irq(&worker->lock); | ||
168 | 275 | ||
169 | work->func(work); | 276 | work->func(work); |
170 | 277 | ||
@@ -175,9 +282,13 @@ again_locked: | |||
175 | */ | 282 | */ |
176 | run_ordered_completions(worker->workers, work); | 283 | run_ordered_completions(worker->workers, work); |
177 | 284 | ||
178 | spin_lock_irq(&worker->lock); | 285 | check_pending_worker_creates(worker); |
179 | check_idle_worker(worker); | 286 | |
180 | } | 287 | } |
288 | |||
289 | spin_lock_irq(&worker->lock); | ||
290 | check_idle_worker(worker); | ||
291 | |||
181 | if (freezing(current)) { | 292 | if (freezing(current)) { |
182 | worker->working = 0; | 293 | worker->working = 0; |
183 | spin_unlock_irq(&worker->lock); | 294 | spin_unlock_irq(&worker->lock); |
@@ -216,8 +327,10 @@ again_locked: | |||
216 | spin_lock_irq(&worker->lock); | 327 | spin_lock_irq(&worker->lock); |
217 | set_current_state(TASK_INTERRUPTIBLE); | 328 | set_current_state(TASK_INTERRUPTIBLE); |
218 | if (!list_empty(&worker->pending) || | 329 | if (!list_empty(&worker->pending) || |
219 | !list_empty(&worker->prio_pending)) | 330 | !list_empty(&worker->prio_pending)) { |
220 | goto again_locked; | 331 | spin_unlock_irq(&worker->lock); |
332 | goto again; | ||
333 | } | ||
221 | 334 | ||
222 | /* | 335 | /* |
223 | * this makes sure we get a wakeup when someone | 336 | * this makes sure we get a wakeup when someone |
@@ -226,8 +339,13 @@ again_locked: | |||
226 | worker->working = 0; | 339 | worker->working = 0; |
227 | spin_unlock_irq(&worker->lock); | 340 | spin_unlock_irq(&worker->lock); |
228 | 341 | ||
229 | if (!kthread_should_stop()) | 342 | if (!kthread_should_stop()) { |
230 | schedule(); | 343 | schedule_timeout(HZ * 120); |
344 | if (!worker->working && | ||
345 | try_worker_shutdown(worker)) { | ||
346 | return 0; | ||
347 | } | ||
348 | } | ||
231 | } | 349 | } |
232 | __set_current_state(TASK_RUNNING); | 350 | __set_current_state(TASK_RUNNING); |
233 | } | 351 | } |
@@ -242,16 +360,30 @@ int btrfs_stop_workers(struct btrfs_workers *workers) | |||
242 | { | 360 | { |
243 | struct list_head *cur; | 361 | struct list_head *cur; |
244 | struct btrfs_worker_thread *worker; | 362 | struct btrfs_worker_thread *worker; |
363 | int can_stop; | ||
245 | 364 | ||
365 | spin_lock_irq(&workers->lock); | ||
246 | list_splice_init(&workers->idle_list, &workers->worker_list); | 366 | list_splice_init(&workers->idle_list, &workers->worker_list); |
247 | while (!list_empty(&workers->worker_list)) { | 367 | while (!list_empty(&workers->worker_list)) { |
248 | cur = workers->worker_list.next; | 368 | cur = workers->worker_list.next; |
249 | worker = list_entry(cur, struct btrfs_worker_thread, | 369 | worker = list_entry(cur, struct btrfs_worker_thread, |
250 | worker_list); | 370 | worker_list); |
251 | kthread_stop(worker->task); | 371 | |
252 | list_del(&worker->worker_list); | 372 | atomic_inc(&worker->refs); |
253 | kfree(worker); | 373 | workers->num_workers -= 1; |
374 | if (!list_empty(&worker->worker_list)) { | ||
375 | list_del_init(&worker->worker_list); | ||
376 | put_worker(worker); | ||
377 | can_stop = 1; | ||
378 | } else | ||
379 | can_stop = 0; | ||
380 | spin_unlock_irq(&workers->lock); | ||
381 | if (can_stop) | ||
382 | kthread_stop(worker->task); | ||
383 | spin_lock_irq(&workers->lock); | ||
384 | put_worker(worker); | ||
254 | } | 385 | } |
386 | spin_unlock_irq(&workers->lock); | ||
255 | return 0; | 387 | return 0; |
256 | } | 388 | } |
257 | 389 | ||
@@ -266,10 +398,13 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max) | |||
266 | INIT_LIST_HEAD(&workers->order_list); | 398 | INIT_LIST_HEAD(&workers->order_list); |
267 | INIT_LIST_HEAD(&workers->prio_order_list); | 399 | INIT_LIST_HEAD(&workers->prio_order_list); |
268 | spin_lock_init(&workers->lock); | 400 | spin_lock_init(&workers->lock); |
401 | spin_lock_init(&workers->order_lock); | ||
269 | workers->max_workers = max; | 402 | workers->max_workers = max; |
270 | workers->idle_thresh = 32; | 403 | workers->idle_thresh = 32; |
271 | workers->name = name; | 404 | workers->name = name; |
272 | workers->ordered = 0; | 405 | workers->ordered = 0; |
406 | workers->atomic_start_pending = 0; | ||
407 | workers->atomic_worker_start = 0; | ||
273 | } | 408 | } |
274 | 409 | ||
275 | /* | 410 | /* |
@@ -293,7 +428,9 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) | |||
293 | INIT_LIST_HEAD(&worker->prio_pending); | 428 | INIT_LIST_HEAD(&worker->prio_pending); |
294 | INIT_LIST_HEAD(&worker->worker_list); | 429 | INIT_LIST_HEAD(&worker->worker_list); |
295 | spin_lock_init(&worker->lock); | 430 | spin_lock_init(&worker->lock); |
431 | |||
296 | atomic_set(&worker->num_pending, 0); | 432 | atomic_set(&worker->num_pending, 0); |
433 | atomic_set(&worker->refs, 1); | ||
297 | worker->workers = workers; | 434 | worker->workers = workers; |
298 | worker->task = kthread_run(worker_loop, worker, | 435 | worker->task = kthread_run(worker_loop, worker, |
299 | "btrfs-%s-%d", workers->name, | 436 | "btrfs-%s-%d", workers->name, |
@@ -303,7 +440,6 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) | |||
303 | kfree(worker); | 440 | kfree(worker); |
304 | goto fail; | 441 | goto fail; |
305 | } | 442 | } |
306 | |||
307 | spin_lock_irq(&workers->lock); | 443 | spin_lock_irq(&workers->lock); |
308 | list_add_tail(&worker->worker_list, &workers->idle_list); | 444 | list_add_tail(&worker->worker_list, &workers->idle_list); |
309 | worker->idle = 1; | 445 | worker->idle = 1; |
@@ -350,7 +486,6 @@ static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers) | |||
350 | */ | 486 | */ |
351 | next = workers->worker_list.next; | 487 | next = workers->worker_list.next; |
352 | worker = list_entry(next, struct btrfs_worker_thread, worker_list); | 488 | worker = list_entry(next, struct btrfs_worker_thread, worker_list); |
353 | atomic_inc(&worker->num_pending); | ||
354 | worker->sequence++; | 489 | worker->sequence++; |
355 | 490 | ||
356 | if (worker->sequence % workers->idle_thresh == 0) | 491 | if (worker->sequence % workers->idle_thresh == 0) |
@@ -367,28 +502,18 @@ static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers) | |||
367 | { | 502 | { |
368 | struct btrfs_worker_thread *worker; | 503 | struct btrfs_worker_thread *worker; |
369 | unsigned long flags; | 504 | unsigned long flags; |
505 | struct list_head *fallback; | ||
370 | 506 | ||
371 | again: | 507 | again: |
372 | spin_lock_irqsave(&workers->lock, flags); | 508 | spin_lock_irqsave(&workers->lock, flags); |
373 | worker = next_worker(workers); | 509 | worker = next_worker(workers); |
374 | spin_unlock_irqrestore(&workers->lock, flags); | ||
375 | 510 | ||
376 | if (!worker) { | 511 | if (!worker) { |
377 | spin_lock_irqsave(&workers->lock, flags); | ||
378 | if (workers->num_workers >= workers->max_workers) { | 512 | if (workers->num_workers >= workers->max_workers) { |
379 | struct list_head *fallback = NULL; | 513 | goto fallback; |
380 | /* | 514 | } else if (workers->atomic_worker_start) { |
381 | * we have failed to find any workers, just | 515 | workers->atomic_start_pending = 1; |
382 | * return the force one | 516 | goto fallback; |
383 | */ | ||
384 | if (!list_empty(&workers->worker_list)) | ||
385 | fallback = workers->worker_list.next; | ||
386 | if (!list_empty(&workers->idle_list)) | ||
387 | fallback = workers->idle_list.next; | ||
388 | BUG_ON(!fallback); | ||
389 | worker = list_entry(fallback, | ||
390 | struct btrfs_worker_thread, worker_list); | ||
391 | spin_unlock_irqrestore(&workers->lock, flags); | ||
392 | } else { | 517 | } else { |
393 | spin_unlock_irqrestore(&workers->lock, flags); | 518 | spin_unlock_irqrestore(&workers->lock, flags); |
394 | /* we're below the limit, start another worker */ | 519 | /* we're below the limit, start another worker */ |
@@ -396,6 +521,28 @@ again: | |||
396 | goto again; | 521 | goto again; |
397 | } | 522 | } |
398 | } | 523 | } |
524 | goto found; | ||
525 | |||
526 | fallback: | ||
527 | fallback = NULL; | ||
528 | /* | ||
529 | * we have failed to find any workers, just | ||
530 | * return the first one we can find. | ||
531 | */ | ||
532 | if (!list_empty(&workers->worker_list)) | ||
533 | fallback = workers->worker_list.next; | ||
534 | if (!list_empty(&workers->idle_list)) | ||
535 | fallback = workers->idle_list.next; | ||
536 | BUG_ON(!fallback); | ||
537 | worker = list_entry(fallback, | ||
538 | struct btrfs_worker_thread, worker_list); | ||
539 | found: | ||
540 | /* | ||
541 | * this makes sure the worker doesn't exit before it is placed | ||
542 | * onto a busy/idle list | ||
543 | */ | ||
544 | atomic_inc(&worker->num_pending); | ||
545 | spin_unlock_irqrestore(&workers->lock, flags); | ||
399 | return worker; | 546 | return worker; |
400 | } | 547 | } |
401 | 548 | ||
@@ -427,7 +574,7 @@ int btrfs_requeue_work(struct btrfs_work *work) | |||
427 | spin_lock(&worker->workers->lock); | 574 | spin_lock(&worker->workers->lock); |
428 | worker->idle = 0; | 575 | worker->idle = 0; |
429 | list_move_tail(&worker->worker_list, | 576 | list_move_tail(&worker->worker_list, |
430 | &worker->workers->worker_list); | 577 | &worker->workers->worker_list); |
431 | spin_unlock(&worker->workers->lock); | 578 | spin_unlock(&worker->workers->lock); |
432 | } | 579 | } |
433 | if (!worker->working) { | 580 | if (!worker->working) { |
@@ -435,9 +582,9 @@ int btrfs_requeue_work(struct btrfs_work *work) | |||
435 | worker->working = 1; | 582 | worker->working = 1; |
436 | } | 583 | } |
437 | 584 | ||
438 | spin_unlock_irqrestore(&worker->lock, flags); | ||
439 | if (wake) | 585 | if (wake) |
440 | wake_up_process(worker->task); | 586 | wake_up_process(worker->task); |
587 | spin_unlock_irqrestore(&worker->lock, flags); | ||
441 | out: | 588 | out: |
442 | 589 | ||
443 | return 0; | 590 | return 0; |
@@ -463,14 +610,18 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) | |||
463 | 610 | ||
464 | worker = find_worker(workers); | 611 | worker = find_worker(workers); |
465 | if (workers->ordered) { | 612 | if (workers->ordered) { |
466 | spin_lock_irqsave(&workers->lock, flags); | 613 | /* |
614 | * you're not allowed to do ordered queues from an | ||
615 | * interrupt handler | ||
616 | */ | ||
617 | spin_lock(&workers->order_lock); | ||
467 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) { | 618 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) { |
468 | list_add_tail(&work->order_list, | 619 | list_add_tail(&work->order_list, |
469 | &workers->prio_order_list); | 620 | &workers->prio_order_list); |
470 | } else { | 621 | } else { |
471 | list_add_tail(&work->order_list, &workers->order_list); | 622 | list_add_tail(&work->order_list, &workers->order_list); |
472 | } | 623 | } |
473 | spin_unlock_irqrestore(&workers->lock, flags); | 624 | spin_unlock(&workers->order_lock); |
474 | } else { | 625 | } else { |
475 | INIT_LIST_HEAD(&work->order_list); | 626 | INIT_LIST_HEAD(&work->order_list); |
476 | } | 627 | } |
@@ -481,7 +632,6 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) | |||
481 | list_add_tail(&work->list, &worker->prio_pending); | 632 | list_add_tail(&work->list, &worker->prio_pending); |
482 | else | 633 | else |
483 | list_add_tail(&work->list, &worker->pending); | 634 | list_add_tail(&work->list, &worker->pending); |
484 | atomic_inc(&worker->num_pending); | ||
485 | check_busy_worker(worker); | 635 | check_busy_worker(worker); |
486 | 636 | ||
487 | /* | 637 | /* |
@@ -492,10 +642,10 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) | |||
492 | wake = 1; | 642 | wake = 1; |
493 | worker->working = 1; | 643 | worker->working = 1; |
494 | 644 | ||
495 | spin_unlock_irqrestore(&worker->lock, flags); | ||
496 | |||
497 | if (wake) | 645 | if (wake) |
498 | wake_up_process(worker->task); | 646 | wake_up_process(worker->task); |
647 | spin_unlock_irqrestore(&worker->lock, flags); | ||
648 | |||
499 | out: | 649 | out: |
500 | return 0; | 650 | return 0; |
501 | } | 651 | } |
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 1b511c109db6..fc089b95ec14 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h | |||
@@ -73,6 +73,15 @@ struct btrfs_workers { | |||
73 | /* force completions in the order they were queued */ | 73 | /* force completions in the order they were queued */ |
74 | int ordered; | 74 | int ordered; |
75 | 75 | ||
76 | /* more workers required, but in an interrupt handler */ | ||
77 | int atomic_start_pending; | ||
78 | |||
79 | /* | ||
80 | * are we allowed to sleep while starting workers or are we required | ||
81 | * to start them at a later time? | ||
82 | */ | ||
83 | int atomic_worker_start; | ||
84 | |||
76 | /* list with all the work threads. The workers on the idle thread | 85 | /* list with all the work threads. The workers on the idle thread |
77 | * may be actively servicing jobs, but they haven't yet hit the | 86 | * may be actively servicing jobs, but they haven't yet hit the |
78 | * idle thresh limit above. | 87 | * idle thresh limit above. |
@@ -90,6 +99,9 @@ struct btrfs_workers { | |||
90 | /* lock for finding the next worker thread to queue on */ | 99 | /* lock for finding the next worker thread to queue on */ |
91 | spinlock_t lock; | 100 | spinlock_t lock; |
92 | 101 | ||
102 | /* lock for the ordered lists */ | ||
103 | spinlock_t order_lock; | ||
104 | |||
93 | /* extra name for this worker, used for current->name */ | 105 | /* extra name for this worker, used for current->name */ |
94 | char *name; | 106 | char *name; |
95 | }; | 107 | }; |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index ea1ea0af8c0e..82ee56bba299 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -138,6 +138,7 @@ struct btrfs_inode { | |||
138 | * of these. | 138 | * of these. |
139 | */ | 139 | */ |
140 | unsigned ordered_data_close:1; | 140 | unsigned ordered_data_close:1; |
141 | unsigned dummy_inode:1; | ||
141 | 142 | ||
142 | struct inode vfs_inode; | 143 | struct inode vfs_inode; |
143 | }; | 144 | }; |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 9d8ba4d54a37..a11a32058b50 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -506,10 +506,10 @@ static noinline int add_ra_bio_pages(struct inode *inode, | |||
506 | */ | 506 | */ |
507 | set_page_extent_mapped(page); | 507 | set_page_extent_mapped(page); |
508 | lock_extent(tree, last_offset, end, GFP_NOFS); | 508 | lock_extent(tree, last_offset, end, GFP_NOFS); |
509 | spin_lock(&em_tree->lock); | 509 | read_lock(&em_tree->lock); |
510 | em = lookup_extent_mapping(em_tree, last_offset, | 510 | em = lookup_extent_mapping(em_tree, last_offset, |
511 | PAGE_CACHE_SIZE); | 511 | PAGE_CACHE_SIZE); |
512 | spin_unlock(&em_tree->lock); | 512 | read_unlock(&em_tree->lock); |
513 | 513 | ||
514 | if (!em || last_offset < em->start || | 514 | if (!em || last_offset < em->start || |
515 | (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) || | 515 | (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) || |
@@ -593,11 +593,11 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
593 | em_tree = &BTRFS_I(inode)->extent_tree; | 593 | em_tree = &BTRFS_I(inode)->extent_tree; |
594 | 594 | ||
595 | /* we need the actual starting offset of this extent in the file */ | 595 | /* we need the actual starting offset of this extent in the file */ |
596 | spin_lock(&em_tree->lock); | 596 | read_lock(&em_tree->lock); |
597 | em = lookup_extent_mapping(em_tree, | 597 | em = lookup_extent_mapping(em_tree, |
598 | page_offset(bio->bi_io_vec->bv_page), | 598 | page_offset(bio->bi_io_vec->bv_page), |
599 | PAGE_CACHE_SIZE); | 599 | PAGE_CACHE_SIZE); |
600 | spin_unlock(&em_tree->lock); | 600 | read_unlock(&em_tree->lock); |
601 | 601 | ||
602 | compressed_len = em->block_len; | 602 | compressed_len = em->block_len; |
603 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); | 603 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 3fdcc0512d3a..ec96f3a6d536 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -2853,6 +2853,12 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, | |||
2853 | int split; | 2853 | int split; |
2854 | int num_doubles = 0; | 2854 | int num_doubles = 0; |
2855 | 2855 | ||
2856 | l = path->nodes[0]; | ||
2857 | slot = path->slots[0]; | ||
2858 | if (extend && data_size + btrfs_item_size_nr(l, slot) + | ||
2859 | sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root)) | ||
2860 | return -EOVERFLOW; | ||
2861 | |||
2856 | /* first try to make some room by pushing left and right */ | 2862 | /* first try to make some room by pushing left and right */ |
2857 | if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { | 2863 | if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { |
2858 | wret = push_leaf_right(trans, root, path, data_size, 0); | 2864 | wret = push_leaf_right(trans, root, path, data_size, 0); |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 837435ce84ca..80599b4e42bd 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -114,6 +114,10 @@ struct btrfs_ordered_sum; | |||
114 | */ | 114 | */ |
115 | #define BTRFS_DEV_ITEMS_OBJECTID 1ULL | 115 | #define BTRFS_DEV_ITEMS_OBJECTID 1ULL |
116 | 116 | ||
117 | #define BTRFS_BTREE_INODE_OBJECTID 1 | ||
118 | |||
119 | #define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2 | ||
120 | |||
117 | /* | 121 | /* |
118 | * we can actually store much bigger names, but lets not confuse the rest | 122 | * we can actually store much bigger names, but lets not confuse the rest |
119 | * of linux | 123 | * of linux |
@@ -670,6 +674,7 @@ struct btrfs_space_info { | |||
670 | u64 bytes_reserved; /* total bytes the allocator has reserved for | 674 | u64 bytes_reserved; /* total bytes the allocator has reserved for |
671 | current allocations */ | 675 | current allocations */ |
672 | u64 bytes_readonly; /* total bytes that are read only */ | 676 | u64 bytes_readonly; /* total bytes that are read only */ |
677 | u64 bytes_super; /* total bytes reserved for the super blocks */ | ||
673 | 678 | ||
674 | /* delalloc accounting */ | 679 | /* delalloc accounting */ |
675 | u64 bytes_delalloc; /* number of bytes reserved for allocation, | 680 | u64 bytes_delalloc; /* number of bytes reserved for allocation, |
@@ -726,6 +731,15 @@ enum btrfs_caching_type { | |||
726 | BTRFS_CACHE_FINISHED = 2, | 731 | BTRFS_CACHE_FINISHED = 2, |
727 | }; | 732 | }; |
728 | 733 | ||
734 | struct btrfs_caching_control { | ||
735 | struct list_head list; | ||
736 | struct mutex mutex; | ||
737 | wait_queue_head_t wait; | ||
738 | struct btrfs_block_group_cache *block_group; | ||
739 | u64 progress; | ||
740 | atomic_t count; | ||
741 | }; | ||
742 | |||
729 | struct btrfs_block_group_cache { | 743 | struct btrfs_block_group_cache { |
730 | struct btrfs_key key; | 744 | struct btrfs_key key; |
731 | struct btrfs_block_group_item item; | 745 | struct btrfs_block_group_item item; |
@@ -733,6 +747,7 @@ struct btrfs_block_group_cache { | |||
733 | spinlock_t lock; | 747 | spinlock_t lock; |
734 | u64 pinned; | 748 | u64 pinned; |
735 | u64 reserved; | 749 | u64 reserved; |
750 | u64 bytes_super; | ||
736 | u64 flags; | 751 | u64 flags; |
737 | u64 sectorsize; | 752 | u64 sectorsize; |
738 | int extents_thresh; | 753 | int extents_thresh; |
@@ -742,8 +757,9 @@ struct btrfs_block_group_cache { | |||
742 | int dirty; | 757 | int dirty; |
743 | 758 | ||
744 | /* cache tracking stuff */ | 759 | /* cache tracking stuff */ |
745 | wait_queue_head_t caching_q; | ||
746 | int cached; | 760 | int cached; |
761 | struct btrfs_caching_control *caching_ctl; | ||
762 | u64 last_byte_to_unpin; | ||
747 | 763 | ||
748 | struct btrfs_space_info *space_info; | 764 | struct btrfs_space_info *space_info; |
749 | 765 | ||
@@ -782,13 +798,16 @@ struct btrfs_fs_info { | |||
782 | 798 | ||
783 | /* the log root tree is a directory of all the other log roots */ | 799 | /* the log root tree is a directory of all the other log roots */ |
784 | struct btrfs_root *log_root_tree; | 800 | struct btrfs_root *log_root_tree; |
801 | |||
802 | spinlock_t fs_roots_radix_lock; | ||
785 | struct radix_tree_root fs_roots_radix; | 803 | struct radix_tree_root fs_roots_radix; |
786 | 804 | ||
787 | /* block group cache stuff */ | 805 | /* block group cache stuff */ |
788 | spinlock_t block_group_cache_lock; | 806 | spinlock_t block_group_cache_lock; |
789 | struct rb_root block_group_cache_tree; | 807 | struct rb_root block_group_cache_tree; |
790 | 808 | ||
791 | struct extent_io_tree pinned_extents; | 809 | struct extent_io_tree freed_extents[2]; |
810 | struct extent_io_tree *pinned_extents; | ||
792 | 811 | ||
793 | /* logical->physical extent mapping */ | 812 | /* logical->physical extent mapping */ |
794 | struct btrfs_mapping_tree mapping_tree; | 813 | struct btrfs_mapping_tree mapping_tree; |
@@ -822,11 +841,7 @@ struct btrfs_fs_info { | |||
822 | struct mutex transaction_kthread_mutex; | 841 | struct mutex transaction_kthread_mutex; |
823 | struct mutex cleaner_mutex; | 842 | struct mutex cleaner_mutex; |
824 | struct mutex chunk_mutex; | 843 | struct mutex chunk_mutex; |
825 | struct mutex drop_mutex; | ||
826 | struct mutex volume_mutex; | 844 | struct mutex volume_mutex; |
827 | struct mutex tree_reloc_mutex; | ||
828 | struct rw_semaphore extent_commit_sem; | ||
829 | |||
830 | /* | 845 | /* |
831 | * this protects the ordered operations list only while we are | 846 | * this protects the ordered operations list only while we are |
832 | * processing all of the entries on it. This way we make | 847 | * processing all of the entries on it. This way we make |
@@ -835,10 +850,16 @@ struct btrfs_fs_info { | |||
835 | * before jumping into the main commit. | 850 | * before jumping into the main commit. |
836 | */ | 851 | */ |
837 | struct mutex ordered_operations_mutex; | 852 | struct mutex ordered_operations_mutex; |
853 | struct rw_semaphore extent_commit_sem; | ||
854 | |||
855 | struct rw_semaphore subvol_sem; | ||
856 | |||
857 | struct srcu_struct subvol_srcu; | ||
838 | 858 | ||
839 | struct list_head trans_list; | 859 | struct list_head trans_list; |
840 | struct list_head hashers; | 860 | struct list_head hashers; |
841 | struct list_head dead_roots; | 861 | struct list_head dead_roots; |
862 | struct list_head caching_block_groups; | ||
842 | 863 | ||
843 | atomic_t nr_async_submits; | 864 | atomic_t nr_async_submits; |
844 | atomic_t async_submit_draining; | 865 | atomic_t async_submit_draining; |
@@ -996,10 +1017,12 @@ struct btrfs_root { | |||
996 | u32 stripesize; | 1017 | u32 stripesize; |
997 | 1018 | ||
998 | u32 type; | 1019 | u32 type; |
999 | u64 highest_inode; | 1020 | |
1000 | u64 last_inode_alloc; | 1021 | u64 highest_objectid; |
1001 | int ref_cows; | 1022 | int ref_cows; |
1002 | int track_dirty; | 1023 | int track_dirty; |
1024 | int in_radix; | ||
1025 | |||
1003 | u64 defrag_trans_start; | 1026 | u64 defrag_trans_start; |
1004 | struct btrfs_key defrag_progress; | 1027 | struct btrfs_key defrag_progress; |
1005 | struct btrfs_key defrag_max; | 1028 | struct btrfs_key defrag_max; |
@@ -1920,8 +1943,8 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache); | |||
1920 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | 1943 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, |
1921 | struct btrfs_root *root, unsigned long count); | 1944 | struct btrfs_root *root, unsigned long count); |
1922 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); | 1945 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); |
1923 | int btrfs_update_pinned_extents(struct btrfs_root *root, | 1946 | int btrfs_pin_extent(struct btrfs_root *root, |
1924 | u64 bytenr, u64 num, int pin); | 1947 | u64 bytenr, u64 num, int reserved); |
1925 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | 1948 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, |
1926 | struct btrfs_root *root, struct extent_buffer *leaf); | 1949 | struct btrfs_root *root, struct extent_buffer *leaf); |
1927 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, | 1950 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, |
@@ -1971,9 +1994,10 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
1971 | u64 root_objectid, u64 owner, u64 offset); | 1994 | u64 root_objectid, u64 owner, u64 offset); |
1972 | 1995 | ||
1973 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); | 1996 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); |
1997 | int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | ||
1998 | struct btrfs_root *root); | ||
1974 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | 1999 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, |
1975 | struct btrfs_root *root, | 2000 | struct btrfs_root *root); |
1976 | struct extent_io_tree *unpin); | ||
1977 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | 2001 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, |
1978 | struct btrfs_root *root, | 2002 | struct btrfs_root *root, |
1979 | u64 bytenr, u64 num_bytes, u64 parent, | 2003 | u64 bytenr, u64 num_bytes, u64 parent, |
@@ -1984,6 +2008,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
1984 | int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr); | 2008 | int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr); |
1985 | int btrfs_free_block_groups(struct btrfs_fs_info *info); | 2009 | int btrfs_free_block_groups(struct btrfs_fs_info *info); |
1986 | int btrfs_read_block_groups(struct btrfs_root *root); | 2010 | int btrfs_read_block_groups(struct btrfs_root *root); |
2011 | int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr); | ||
1987 | int btrfs_make_block_group(struct btrfs_trans_handle *trans, | 2012 | int btrfs_make_block_group(struct btrfs_trans_handle *trans, |
1988 | struct btrfs_root *root, u64 bytes_used, | 2013 | struct btrfs_root *root, u64 bytes_used, |
1989 | u64 type, u64 chunk_objectid, u64 chunk_offset, | 2014 | u64 type, u64 chunk_objectid, u64 chunk_offset, |
@@ -2006,7 +2031,6 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, | |||
2006 | u64 bytes); | 2031 | u64 bytes); |
2007 | void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, | 2032 | void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, |
2008 | u64 bytes); | 2033 | u64 bytes); |
2009 | void btrfs_free_pinned_extents(struct btrfs_fs_info *info); | ||
2010 | /* ctree.c */ | 2034 | /* ctree.c */ |
2011 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 2035 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
2012 | int level, int *slot); | 2036 | int level, int *slot); |
@@ -2100,12 +2124,15 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | |||
2100 | struct extent_buffer *parent); | 2124 | struct extent_buffer *parent); |
2101 | /* root-item.c */ | 2125 | /* root-item.c */ |
2102 | int btrfs_find_root_ref(struct btrfs_root *tree_root, | 2126 | int btrfs_find_root_ref(struct btrfs_root *tree_root, |
2103 | struct btrfs_path *path, | 2127 | struct btrfs_path *path, |
2104 | u64 root_id, u64 ref_id); | 2128 | u64 root_id, u64 ref_id); |
2105 | int btrfs_add_root_ref(struct btrfs_trans_handle *trans, | 2129 | int btrfs_add_root_ref(struct btrfs_trans_handle *trans, |
2106 | struct btrfs_root *tree_root, | 2130 | struct btrfs_root *tree_root, |
2107 | u64 root_id, u8 type, u64 ref_id, | 2131 | u64 root_id, u64 ref_id, u64 dirid, u64 sequence, |
2108 | u64 dirid, u64 sequence, | 2132 | const char *name, int name_len); |
2133 | int btrfs_del_root_ref(struct btrfs_trans_handle *trans, | ||
2134 | struct btrfs_root *tree_root, | ||
2135 | u64 root_id, u64 ref_id, u64 dirid, u64 *sequence, | ||
2109 | const char *name, int name_len); | 2136 | const char *name, int name_len); |
2110 | int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 2137 | int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
2111 | struct btrfs_key *key); | 2138 | struct btrfs_key *key); |
@@ -2120,6 +2147,7 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct | |||
2120 | int btrfs_search_root(struct btrfs_root *root, u64 search_start, | 2147 | int btrfs_search_root(struct btrfs_root *root, u64 search_start, |
2121 | u64 *found_objectid); | 2148 | u64 *found_objectid); |
2122 | int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); | 2149 | int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); |
2150 | int btrfs_find_orphan_roots(struct btrfs_root *tree_root); | ||
2123 | int btrfs_set_root_node(struct btrfs_root_item *item, | 2151 | int btrfs_set_root_node(struct btrfs_root_item *item, |
2124 | struct extent_buffer *node); | 2152 | struct extent_buffer *node); |
2125 | /* dir-item.c */ | 2153 | /* dir-item.c */ |
@@ -2138,6 +2166,10 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, | |||
2138 | struct btrfs_path *path, u64 dir, | 2166 | struct btrfs_path *path, u64 dir, |
2139 | u64 objectid, const char *name, int name_len, | 2167 | u64 objectid, const char *name, int name_len, |
2140 | int mod); | 2168 | int mod); |
2169 | struct btrfs_dir_item * | ||
2170 | btrfs_search_dir_index_item(struct btrfs_root *root, | ||
2171 | struct btrfs_path *path, u64 dirid, | ||
2172 | const char *name, int name_len); | ||
2141 | struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, | 2173 | struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, |
2142 | struct btrfs_path *path, | 2174 | struct btrfs_path *path, |
2143 | const char *name, int name_len); | 2175 | const char *name, int name_len); |
@@ -2160,6 +2192,7 @@ int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, | |||
2160 | struct btrfs_root *root, u64 offset); | 2192 | struct btrfs_root *root, u64 offset); |
2161 | int btrfs_del_orphan_item(struct btrfs_trans_handle *trans, | 2193 | int btrfs_del_orphan_item(struct btrfs_trans_handle *trans, |
2162 | struct btrfs_root *root, u64 offset); | 2194 | struct btrfs_root *root, u64 offset); |
2195 | int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset); | ||
2163 | 2196 | ||
2164 | /* inode-map.c */ | 2197 | /* inode-map.c */ |
2165 | int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, | 2198 | int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, |
@@ -2232,6 +2265,10 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, | |||
2232 | int btrfs_add_link(struct btrfs_trans_handle *trans, | 2265 | int btrfs_add_link(struct btrfs_trans_handle *trans, |
2233 | struct inode *parent_inode, struct inode *inode, | 2266 | struct inode *parent_inode, struct inode *inode, |
2234 | const char *name, int name_len, int add_backref, u64 index); | 2267 | const char *name, int name_len, int add_backref, u64 index); |
2268 | int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, | ||
2269 | struct btrfs_root *root, | ||
2270 | struct inode *dir, u64 objectid, | ||
2271 | const char *name, int name_len); | ||
2235 | int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | 2272 | int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, |
2236 | struct btrfs_root *root, | 2273 | struct btrfs_root *root, |
2237 | struct inode *inode, u64 new_size, | 2274 | struct inode *inode, u64 new_size, |
@@ -2242,7 +2279,7 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end); | |||
2242 | int btrfs_writepages(struct address_space *mapping, | 2279 | int btrfs_writepages(struct address_space *mapping, |
2243 | struct writeback_control *wbc); | 2280 | struct writeback_control *wbc); |
2244 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | 2281 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, |
2245 | struct btrfs_root *new_root, struct dentry *dentry, | 2282 | struct btrfs_root *new_root, |
2246 | u64 new_dirid, u64 alloc_hint); | 2283 | u64 new_dirid, u64 alloc_hint); |
2247 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | 2284 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, |
2248 | size_t size, struct bio *bio, unsigned long bio_flags); | 2285 | size_t size, struct bio *bio, unsigned long bio_flags); |
@@ -2258,6 +2295,7 @@ int btrfs_write_inode(struct inode *inode, int wait); | |||
2258 | void btrfs_dirty_inode(struct inode *inode); | 2295 | void btrfs_dirty_inode(struct inode *inode); |
2259 | struct inode *btrfs_alloc_inode(struct super_block *sb); | 2296 | struct inode *btrfs_alloc_inode(struct super_block *sb); |
2260 | void btrfs_destroy_inode(struct inode *inode); | 2297 | void btrfs_destroy_inode(struct inode *inode); |
2298 | void btrfs_drop_inode(struct inode *inode); | ||
2261 | int btrfs_init_cachep(void); | 2299 | int btrfs_init_cachep(void); |
2262 | void btrfs_destroy_cachep(void); | 2300 | void btrfs_destroy_cachep(void); |
2263 | long btrfs_ioctl_trans_end(struct file *file); | 2301 | long btrfs_ioctl_trans_end(struct file *file); |
@@ -2275,6 +2313,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); | |||
2275 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); | 2313 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); |
2276 | void btrfs_orphan_cleanup(struct btrfs_root *root); | 2314 | void btrfs_orphan_cleanup(struct btrfs_root *root); |
2277 | int btrfs_cont_expand(struct inode *inode, loff_t size); | 2315 | int btrfs_cont_expand(struct inode *inode, loff_t size); |
2316 | int btrfs_invalidate_inodes(struct btrfs_root *root); | ||
2317 | extern struct dentry_operations btrfs_dentry_operations; | ||
2278 | 2318 | ||
2279 | /* ioctl.c */ | 2319 | /* ioctl.c */ |
2280 | long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | 2320 | long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); |
@@ -2290,7 +2330,7 @@ extern struct file_operations btrfs_file_operations; | |||
2290 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, | 2330 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, |
2291 | struct btrfs_root *root, struct inode *inode, | 2331 | struct btrfs_root *root, struct inode *inode, |
2292 | u64 start, u64 end, u64 locked_end, | 2332 | u64 start, u64 end, u64 locked_end, |
2293 | u64 inline_limit, u64 *hint_block); | 2333 | u64 inline_limit, u64 *hint_block, int drop_cache); |
2294 | int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, | 2334 | int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, |
2295 | struct btrfs_root *root, | 2335 | struct btrfs_root *root, |
2296 | struct inode *inode, u64 start, u64 end); | 2336 | struct inode *inode, u64 start, u64 end); |
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 1d70236ba00c..f3a6075519cc 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c | |||
@@ -281,6 +281,53 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, | |||
281 | return btrfs_match_dir_item_name(root, path, name, name_len); | 281 | return btrfs_match_dir_item_name(root, path, name, name_len); |
282 | } | 282 | } |
283 | 283 | ||
284 | struct btrfs_dir_item * | ||
285 | btrfs_search_dir_index_item(struct btrfs_root *root, | ||
286 | struct btrfs_path *path, u64 dirid, | ||
287 | const char *name, int name_len) | ||
288 | { | ||
289 | struct extent_buffer *leaf; | ||
290 | struct btrfs_dir_item *di; | ||
291 | struct btrfs_key key; | ||
292 | u32 nritems; | ||
293 | int ret; | ||
294 | |||
295 | key.objectid = dirid; | ||
296 | key.type = BTRFS_DIR_INDEX_KEY; | ||
297 | key.offset = 0; | ||
298 | |||
299 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
300 | if (ret < 0) | ||
301 | return ERR_PTR(ret); | ||
302 | |||
303 | leaf = path->nodes[0]; | ||
304 | nritems = btrfs_header_nritems(leaf); | ||
305 | |||
306 | while (1) { | ||
307 | if (path->slots[0] >= nritems) { | ||
308 | ret = btrfs_next_leaf(root, path); | ||
309 | if (ret < 0) | ||
310 | return ERR_PTR(ret); | ||
311 | if (ret > 0) | ||
312 | break; | ||
313 | leaf = path->nodes[0]; | ||
314 | nritems = btrfs_header_nritems(leaf); | ||
315 | continue; | ||
316 | } | ||
317 | |||
318 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
319 | if (key.objectid != dirid || key.type != BTRFS_DIR_INDEX_KEY) | ||
320 | break; | ||
321 | |||
322 | di = btrfs_match_dir_item_name(root, path, name, name_len); | ||
323 | if (di) | ||
324 | return di; | ||
325 | |||
326 | path->slots[0]++; | ||
327 | } | ||
328 | return NULL; | ||
329 | } | ||
330 | |||
284 | struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, | 331 | struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, |
285 | struct btrfs_root *root, | 332 | struct btrfs_root *root, |
286 | struct btrfs_path *path, u64 dir, | 333 | struct btrfs_path *path, u64 dir, |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6c4173146bb7..644e796fd643 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -41,6 +41,7 @@ | |||
41 | 41 | ||
42 | static struct extent_io_ops btree_extent_io_ops; | 42 | static struct extent_io_ops btree_extent_io_ops; |
43 | static void end_workqueue_fn(struct btrfs_work *work); | 43 | static void end_workqueue_fn(struct btrfs_work *work); |
44 | static void free_fs_root(struct btrfs_root *root); | ||
44 | 45 | ||
45 | static atomic_t btrfs_bdi_num = ATOMIC_INIT(0); | 46 | static atomic_t btrfs_bdi_num = ATOMIC_INIT(0); |
46 | 47 | ||
@@ -123,15 +124,15 @@ static struct extent_map *btree_get_extent(struct inode *inode, | |||
123 | struct extent_map *em; | 124 | struct extent_map *em; |
124 | int ret; | 125 | int ret; |
125 | 126 | ||
126 | spin_lock(&em_tree->lock); | 127 | read_lock(&em_tree->lock); |
127 | em = lookup_extent_mapping(em_tree, start, len); | 128 | em = lookup_extent_mapping(em_tree, start, len); |
128 | if (em) { | 129 | if (em) { |
129 | em->bdev = | 130 | em->bdev = |
130 | BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | 131 | BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; |
131 | spin_unlock(&em_tree->lock); | 132 | read_unlock(&em_tree->lock); |
132 | goto out; | 133 | goto out; |
133 | } | 134 | } |
134 | spin_unlock(&em_tree->lock); | 135 | read_unlock(&em_tree->lock); |
135 | 136 | ||
136 | em = alloc_extent_map(GFP_NOFS); | 137 | em = alloc_extent_map(GFP_NOFS); |
137 | if (!em) { | 138 | if (!em) { |
@@ -144,7 +145,7 @@ static struct extent_map *btree_get_extent(struct inode *inode, | |||
144 | em->block_start = 0; | 145 | em->block_start = 0; |
145 | em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | 146 | em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; |
146 | 147 | ||
147 | spin_lock(&em_tree->lock); | 148 | write_lock(&em_tree->lock); |
148 | ret = add_extent_mapping(em_tree, em); | 149 | ret = add_extent_mapping(em_tree, em); |
149 | if (ret == -EEXIST) { | 150 | if (ret == -EEXIST) { |
150 | u64 failed_start = em->start; | 151 | u64 failed_start = em->start; |
@@ -163,7 +164,7 @@ static struct extent_map *btree_get_extent(struct inode *inode, | |||
163 | free_extent_map(em); | 164 | free_extent_map(em); |
164 | em = NULL; | 165 | em = NULL; |
165 | } | 166 | } |
166 | spin_unlock(&em_tree->lock); | 167 | write_unlock(&em_tree->lock); |
167 | 168 | ||
168 | if (ret) | 169 | if (ret) |
169 | em = ERR_PTR(ret); | 170 | em = ERR_PTR(ret); |
@@ -895,8 +896,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
895 | root->fs_info = fs_info; | 896 | root->fs_info = fs_info; |
896 | root->objectid = objectid; | 897 | root->objectid = objectid; |
897 | root->last_trans = 0; | 898 | root->last_trans = 0; |
898 | root->highest_inode = 0; | 899 | root->highest_objectid = 0; |
899 | root->last_inode_alloc = 0; | ||
900 | root->name = NULL; | 900 | root->name = NULL; |
901 | root->in_sysfs = 0; | 901 | root->in_sysfs = 0; |
902 | root->inode_tree.rb_node = NULL; | 902 | root->inode_tree.rb_node = NULL; |
@@ -952,14 +952,16 @@ static int find_and_setup_root(struct btrfs_root *tree_root, | |||
952 | root, fs_info, objectid); | 952 | root, fs_info, objectid); |
953 | ret = btrfs_find_last_root(tree_root, objectid, | 953 | ret = btrfs_find_last_root(tree_root, objectid, |
954 | &root->root_item, &root->root_key); | 954 | &root->root_item, &root->root_key); |
955 | if (ret > 0) | ||
956 | return -ENOENT; | ||
955 | BUG_ON(ret); | 957 | BUG_ON(ret); |
956 | 958 | ||
957 | generation = btrfs_root_generation(&root->root_item); | 959 | generation = btrfs_root_generation(&root->root_item); |
958 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); | 960 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); |
959 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), | 961 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), |
960 | blocksize, generation); | 962 | blocksize, generation); |
961 | root->commit_root = btrfs_root_node(root); | ||
962 | BUG_ON(!root->node); | 963 | BUG_ON(!root->node); |
964 | root->commit_root = btrfs_root_node(root); | ||
963 | return 0; | 965 | return 0; |
964 | } | 966 | } |
965 | 967 | ||
@@ -1095,7 +1097,6 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
1095 | struct btrfs_fs_info *fs_info = tree_root->fs_info; | 1097 | struct btrfs_fs_info *fs_info = tree_root->fs_info; |
1096 | struct btrfs_path *path; | 1098 | struct btrfs_path *path; |
1097 | struct extent_buffer *l; | 1099 | struct extent_buffer *l; |
1098 | u64 highest_inode; | ||
1099 | u64 generation; | 1100 | u64 generation; |
1100 | u32 blocksize; | 1101 | u32 blocksize; |
1101 | int ret = 0; | 1102 | int ret = 0; |
@@ -1110,7 +1111,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
1110 | kfree(root); | 1111 | kfree(root); |
1111 | return ERR_PTR(ret); | 1112 | return ERR_PTR(ret); |
1112 | } | 1113 | } |
1113 | goto insert; | 1114 | goto out; |
1114 | } | 1115 | } |
1115 | 1116 | ||
1116 | __setup_root(tree_root->nodesize, tree_root->leafsize, | 1117 | __setup_root(tree_root->nodesize, tree_root->leafsize, |
@@ -1120,39 +1121,30 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
1120 | path = btrfs_alloc_path(); | 1121 | path = btrfs_alloc_path(); |
1121 | BUG_ON(!path); | 1122 | BUG_ON(!path); |
1122 | ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); | 1123 | ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); |
1123 | if (ret != 0) { | 1124 | if (ret == 0) { |
1124 | if (ret > 0) | 1125 | l = path->nodes[0]; |
1125 | ret = -ENOENT; | 1126 | read_extent_buffer(l, &root->root_item, |
1126 | goto out; | 1127 | btrfs_item_ptr_offset(l, path->slots[0]), |
1128 | sizeof(root->root_item)); | ||
1129 | memcpy(&root->root_key, location, sizeof(*location)); | ||
1127 | } | 1130 | } |
1128 | l = path->nodes[0]; | ||
1129 | read_extent_buffer(l, &root->root_item, | ||
1130 | btrfs_item_ptr_offset(l, path->slots[0]), | ||
1131 | sizeof(root->root_item)); | ||
1132 | memcpy(&root->root_key, location, sizeof(*location)); | ||
1133 | ret = 0; | ||
1134 | out: | ||
1135 | btrfs_release_path(root, path); | ||
1136 | btrfs_free_path(path); | 1131 | btrfs_free_path(path); |
1137 | if (ret) { | 1132 | if (ret) { |
1138 | kfree(root); | 1133 | if (ret > 0) |
1134 | ret = -ENOENT; | ||
1139 | return ERR_PTR(ret); | 1135 | return ERR_PTR(ret); |
1140 | } | 1136 | } |
1137 | |||
1141 | generation = btrfs_root_generation(&root->root_item); | 1138 | generation = btrfs_root_generation(&root->root_item); |
1142 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); | 1139 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); |
1143 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), | 1140 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), |
1144 | blocksize, generation); | 1141 | blocksize, generation); |
1145 | root->commit_root = btrfs_root_node(root); | 1142 | root->commit_root = btrfs_root_node(root); |
1146 | BUG_ON(!root->node); | 1143 | BUG_ON(!root->node); |
1147 | insert: | 1144 | out: |
1148 | if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { | 1145 | if (location->objectid != BTRFS_TREE_LOG_OBJECTID) |
1149 | root->ref_cows = 1; | 1146 | root->ref_cows = 1; |
1150 | ret = btrfs_find_highest_inode(root, &highest_inode); | 1147 | |
1151 | if (ret == 0) { | ||
1152 | root->highest_inode = highest_inode; | ||
1153 | root->last_inode_alloc = highest_inode; | ||
1154 | } | ||
1155 | } | ||
1156 | return root; | 1148 | return root; |
1157 | } | 1149 | } |
1158 | 1150 | ||
@@ -1187,39 +1179,66 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, | |||
1187 | return fs_info->dev_root; | 1179 | return fs_info->dev_root; |
1188 | if (location->objectid == BTRFS_CSUM_TREE_OBJECTID) | 1180 | if (location->objectid == BTRFS_CSUM_TREE_OBJECTID) |
1189 | return fs_info->csum_root; | 1181 | return fs_info->csum_root; |
1190 | 1182 | again: | |
1183 | spin_lock(&fs_info->fs_roots_radix_lock); | ||
1191 | root = radix_tree_lookup(&fs_info->fs_roots_radix, | 1184 | root = radix_tree_lookup(&fs_info->fs_roots_radix, |
1192 | (unsigned long)location->objectid); | 1185 | (unsigned long)location->objectid); |
1186 | spin_unlock(&fs_info->fs_roots_radix_lock); | ||
1193 | if (root) | 1187 | if (root) |
1194 | return root; | 1188 | return root; |
1195 | 1189 | ||
1190 | ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid); | ||
1191 | if (ret == 0) | ||
1192 | ret = -ENOENT; | ||
1193 | if (ret < 0) | ||
1194 | return ERR_PTR(ret); | ||
1195 | |||
1196 | root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); | 1196 | root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); |
1197 | if (IS_ERR(root)) | 1197 | if (IS_ERR(root)) |
1198 | return root; | 1198 | return root; |
1199 | 1199 | ||
1200 | WARN_ON(btrfs_root_refs(&root->root_item) == 0); | ||
1200 | set_anon_super(&root->anon_super, NULL); | 1201 | set_anon_super(&root->anon_super, NULL); |
1201 | 1202 | ||
1203 | ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); | ||
1204 | if (ret) | ||
1205 | goto fail; | ||
1206 | |||
1207 | spin_lock(&fs_info->fs_roots_radix_lock); | ||
1202 | ret = radix_tree_insert(&fs_info->fs_roots_radix, | 1208 | ret = radix_tree_insert(&fs_info->fs_roots_radix, |
1203 | (unsigned long)root->root_key.objectid, | 1209 | (unsigned long)root->root_key.objectid, |
1204 | root); | 1210 | root); |
1211 | if (ret == 0) | ||
1212 | root->in_radix = 1; | ||
1213 | spin_unlock(&fs_info->fs_roots_radix_lock); | ||
1214 | radix_tree_preload_end(); | ||
1205 | if (ret) { | 1215 | if (ret) { |
1206 | free_extent_buffer(root->node); | 1216 | if (ret == -EEXIST) { |
1207 | kfree(root); | 1217 | free_fs_root(root); |
1208 | return ERR_PTR(ret); | 1218 | goto again; |
1219 | } | ||
1220 | goto fail; | ||
1209 | } | 1221 | } |
1210 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { | 1222 | |
1211 | ret = btrfs_find_dead_roots(fs_info->tree_root, | 1223 | ret = btrfs_find_dead_roots(fs_info->tree_root, |
1212 | root->root_key.objectid); | 1224 | root->root_key.objectid); |
1213 | BUG_ON(ret); | 1225 | WARN_ON(ret); |
1226 | |||
1227 | if (!(fs_info->sb->s_flags & MS_RDONLY)) | ||
1214 | btrfs_orphan_cleanup(root); | 1228 | btrfs_orphan_cleanup(root); |
1215 | } | 1229 | |
1216 | return root; | 1230 | return root; |
1231 | fail: | ||
1232 | free_fs_root(root); | ||
1233 | return ERR_PTR(ret); | ||
1217 | } | 1234 | } |
1218 | 1235 | ||
1219 | struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, | 1236 | struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, |
1220 | struct btrfs_key *location, | 1237 | struct btrfs_key *location, |
1221 | const char *name, int namelen) | 1238 | const char *name, int namelen) |
1222 | { | 1239 | { |
1240 | return btrfs_read_fs_root_no_name(fs_info, location); | ||
1241 | #if 0 | ||
1223 | struct btrfs_root *root; | 1242 | struct btrfs_root *root; |
1224 | int ret; | 1243 | int ret; |
1225 | 1244 | ||
@@ -1236,7 +1255,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, | |||
1236 | kfree(root); | 1255 | kfree(root); |
1237 | return ERR_PTR(ret); | 1256 | return ERR_PTR(ret); |
1238 | } | 1257 | } |
1239 | #if 0 | 1258 | |
1240 | ret = btrfs_sysfs_add_root(root); | 1259 | ret = btrfs_sysfs_add_root(root); |
1241 | if (ret) { | 1260 | if (ret) { |
1242 | free_extent_buffer(root->node); | 1261 | free_extent_buffer(root->node); |
@@ -1244,9 +1263,9 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, | |||
1244 | kfree(root); | 1263 | kfree(root); |
1245 | return ERR_PTR(ret); | 1264 | return ERR_PTR(ret); |
1246 | } | 1265 | } |
1247 | #endif | ||
1248 | root->in_sysfs = 1; | 1266 | root->in_sysfs = 1; |
1249 | return root; | 1267 | return root; |
1268 | #endif | ||
1250 | } | 1269 | } |
1251 | 1270 | ||
1252 | static int btrfs_congested_fn(void *congested_data, int bdi_bits) | 1271 | static int btrfs_congested_fn(void *congested_data, int bdi_bits) |
@@ -1325,9 +1344,9 @@ static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) | |||
1325 | offset = page_offset(page); | 1344 | offset = page_offset(page); |
1326 | 1345 | ||
1327 | em_tree = &BTRFS_I(inode)->extent_tree; | 1346 | em_tree = &BTRFS_I(inode)->extent_tree; |
1328 | spin_lock(&em_tree->lock); | 1347 | read_lock(&em_tree->lock); |
1329 | em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); | 1348 | em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); |
1330 | spin_unlock(&em_tree->lock); | 1349 | read_unlock(&em_tree->lock); |
1331 | if (!em) { | 1350 | if (!em) { |
1332 | __unplug_io_fn(bdi, page); | 1351 | __unplug_io_fn(bdi, page); |
1333 | return; | 1352 | return; |
@@ -1360,8 +1379,10 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) | |||
1360 | 1379 | ||
1361 | err = bdi_register(bdi, NULL, "btrfs-%d", | 1380 | err = bdi_register(bdi, NULL, "btrfs-%d", |
1362 | atomic_inc_return(&btrfs_bdi_num)); | 1381 | atomic_inc_return(&btrfs_bdi_num)); |
1363 | if (err) | 1382 | if (err) { |
1383 | bdi_destroy(bdi); | ||
1364 | return err; | 1384 | return err; |
1385 | } | ||
1365 | 1386 | ||
1366 | bdi->ra_pages = default_backing_dev_info.ra_pages; | 1387 | bdi->ra_pages = default_backing_dev_info.ra_pages; |
1367 | bdi->unplug_io_fn = btrfs_unplug_io_fn; | 1388 | bdi->unplug_io_fn = btrfs_unplug_io_fn; |
@@ -1451,9 +1472,12 @@ static int cleaner_kthread(void *arg) | |||
1451 | break; | 1472 | break; |
1452 | 1473 | ||
1453 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); | 1474 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); |
1454 | mutex_lock(&root->fs_info->cleaner_mutex); | 1475 | |
1455 | btrfs_clean_old_snapshots(root); | 1476 | if (!(root->fs_info->sb->s_flags & MS_RDONLY) && |
1456 | mutex_unlock(&root->fs_info->cleaner_mutex); | 1477 | mutex_trylock(&root->fs_info->cleaner_mutex)) { |
1478 | btrfs_clean_old_snapshots(root); | ||
1479 | mutex_unlock(&root->fs_info->cleaner_mutex); | ||
1480 | } | ||
1457 | 1481 | ||
1458 | if (freezing(current)) { | 1482 | if (freezing(current)) { |
1459 | refrigerator(); | 1483 | refrigerator(); |
@@ -1558,15 +1582,36 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1558 | err = -ENOMEM; | 1582 | err = -ENOMEM; |
1559 | goto fail; | 1583 | goto fail; |
1560 | } | 1584 | } |
1561 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); | 1585 | |
1586 | ret = init_srcu_struct(&fs_info->subvol_srcu); | ||
1587 | if (ret) { | ||
1588 | err = ret; | ||
1589 | goto fail; | ||
1590 | } | ||
1591 | |||
1592 | ret = setup_bdi(fs_info, &fs_info->bdi); | ||
1593 | if (ret) { | ||
1594 | err = ret; | ||
1595 | goto fail_srcu; | ||
1596 | } | ||
1597 | |||
1598 | fs_info->btree_inode = new_inode(sb); | ||
1599 | if (!fs_info->btree_inode) { | ||
1600 | err = -ENOMEM; | ||
1601 | goto fail_bdi; | ||
1602 | } | ||
1603 | |||
1604 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); | ||
1562 | INIT_LIST_HEAD(&fs_info->trans_list); | 1605 | INIT_LIST_HEAD(&fs_info->trans_list); |
1563 | INIT_LIST_HEAD(&fs_info->dead_roots); | 1606 | INIT_LIST_HEAD(&fs_info->dead_roots); |
1564 | INIT_LIST_HEAD(&fs_info->hashers); | 1607 | INIT_LIST_HEAD(&fs_info->hashers); |
1565 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); | 1608 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); |
1566 | INIT_LIST_HEAD(&fs_info->ordered_operations); | 1609 | INIT_LIST_HEAD(&fs_info->ordered_operations); |
1610 | INIT_LIST_HEAD(&fs_info->caching_block_groups); | ||
1567 | spin_lock_init(&fs_info->delalloc_lock); | 1611 | spin_lock_init(&fs_info->delalloc_lock); |
1568 | spin_lock_init(&fs_info->new_trans_lock); | 1612 | spin_lock_init(&fs_info->new_trans_lock); |
1569 | spin_lock_init(&fs_info->ref_cache_lock); | 1613 | spin_lock_init(&fs_info->ref_cache_lock); |
1614 | spin_lock_init(&fs_info->fs_roots_radix_lock); | ||
1570 | 1615 | ||
1571 | init_completion(&fs_info->kobj_unregister); | 1616 | init_completion(&fs_info->kobj_unregister); |
1572 | fs_info->tree_root = tree_root; | 1617 | fs_info->tree_root = tree_root; |
@@ -1585,11 +1630,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1585 | fs_info->sb = sb; | 1630 | fs_info->sb = sb; |
1586 | fs_info->max_extent = (u64)-1; | 1631 | fs_info->max_extent = (u64)-1; |
1587 | fs_info->max_inline = 8192 * 1024; | 1632 | fs_info->max_inline = 8192 * 1024; |
1588 | if (setup_bdi(fs_info, &fs_info->bdi)) | ||
1589 | goto fail_bdi; | ||
1590 | fs_info->btree_inode = new_inode(sb); | ||
1591 | fs_info->btree_inode->i_ino = 1; | ||
1592 | fs_info->btree_inode->i_nlink = 1; | ||
1593 | fs_info->metadata_ratio = 8; | 1633 | fs_info->metadata_ratio = 8; |
1594 | 1634 | ||
1595 | fs_info->thread_pool_size = min_t(unsigned long, | 1635 | fs_info->thread_pool_size = min_t(unsigned long, |
@@ -1602,6 +1642,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1602 | sb->s_blocksize_bits = blksize_bits(4096); | 1642 | sb->s_blocksize_bits = blksize_bits(4096); |
1603 | sb->s_bdi = &fs_info->bdi; | 1643 | sb->s_bdi = &fs_info->bdi; |
1604 | 1644 | ||
1645 | fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID; | ||
1646 | fs_info->btree_inode->i_nlink = 1; | ||
1605 | /* | 1647 | /* |
1606 | * we set the i_size on the btree inode to the max possible int. | 1648 | * we set the i_size on the btree inode to the max possible int. |
1607 | * the real end of the address space is determined by all of | 1649 | * the real end of the address space is determined by all of |
@@ -1620,28 +1662,32 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1620 | 1662 | ||
1621 | BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; | 1663 | BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; |
1622 | 1664 | ||
1665 | BTRFS_I(fs_info->btree_inode)->root = tree_root; | ||
1666 | memset(&BTRFS_I(fs_info->btree_inode)->location, 0, | ||
1667 | sizeof(struct btrfs_key)); | ||
1668 | BTRFS_I(fs_info->btree_inode)->dummy_inode = 1; | ||
1669 | insert_inode_hash(fs_info->btree_inode); | ||
1670 | |||
1623 | spin_lock_init(&fs_info->block_group_cache_lock); | 1671 | spin_lock_init(&fs_info->block_group_cache_lock); |
1624 | fs_info->block_group_cache_tree.rb_node = NULL; | 1672 | fs_info->block_group_cache_tree.rb_node = NULL; |
1625 | 1673 | ||
1626 | extent_io_tree_init(&fs_info->pinned_extents, | 1674 | extent_io_tree_init(&fs_info->freed_extents[0], |
1627 | fs_info->btree_inode->i_mapping, GFP_NOFS); | 1675 | fs_info->btree_inode->i_mapping, GFP_NOFS); |
1676 | extent_io_tree_init(&fs_info->freed_extents[1], | ||
1677 | fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
1678 | fs_info->pinned_extents = &fs_info->freed_extents[0]; | ||
1628 | fs_info->do_barriers = 1; | 1679 | fs_info->do_barriers = 1; |
1629 | 1680 | ||
1630 | BTRFS_I(fs_info->btree_inode)->root = tree_root; | ||
1631 | memset(&BTRFS_I(fs_info->btree_inode)->location, 0, | ||
1632 | sizeof(struct btrfs_key)); | ||
1633 | insert_inode_hash(fs_info->btree_inode); | ||
1634 | 1681 | ||
1635 | mutex_init(&fs_info->trans_mutex); | 1682 | mutex_init(&fs_info->trans_mutex); |
1636 | mutex_init(&fs_info->ordered_operations_mutex); | 1683 | mutex_init(&fs_info->ordered_operations_mutex); |
1637 | mutex_init(&fs_info->tree_log_mutex); | 1684 | mutex_init(&fs_info->tree_log_mutex); |
1638 | mutex_init(&fs_info->drop_mutex); | ||
1639 | mutex_init(&fs_info->chunk_mutex); | 1685 | mutex_init(&fs_info->chunk_mutex); |
1640 | mutex_init(&fs_info->transaction_kthread_mutex); | 1686 | mutex_init(&fs_info->transaction_kthread_mutex); |
1641 | mutex_init(&fs_info->cleaner_mutex); | 1687 | mutex_init(&fs_info->cleaner_mutex); |
1642 | mutex_init(&fs_info->volume_mutex); | 1688 | mutex_init(&fs_info->volume_mutex); |
1643 | mutex_init(&fs_info->tree_reloc_mutex); | ||
1644 | init_rwsem(&fs_info->extent_commit_sem); | 1689 | init_rwsem(&fs_info->extent_commit_sem); |
1690 | init_rwsem(&fs_info->subvol_sem); | ||
1645 | 1691 | ||
1646 | btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); | 1692 | btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); |
1647 | btrfs_init_free_cluster(&fs_info->data_alloc_cluster); | 1693 | btrfs_init_free_cluster(&fs_info->data_alloc_cluster); |
@@ -1700,7 +1746,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1700 | err = -EINVAL; | 1746 | err = -EINVAL; |
1701 | goto fail_iput; | 1747 | goto fail_iput; |
1702 | } | 1748 | } |
1703 | 1749 | printk("thread pool is %d\n", fs_info->thread_pool_size); | |
1704 | /* | 1750 | /* |
1705 | * we need to start all the end_io workers up front because the | 1751 | * we need to start all the end_io workers up front because the |
1706 | * queue work function gets called at interrupt time, and so it | 1752 | * queue work function gets called at interrupt time, and so it |
@@ -1745,20 +1791,22 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1745 | fs_info->endio_workers.idle_thresh = 4; | 1791 | fs_info->endio_workers.idle_thresh = 4; |
1746 | fs_info->endio_meta_workers.idle_thresh = 4; | 1792 | fs_info->endio_meta_workers.idle_thresh = 4; |
1747 | 1793 | ||
1748 | fs_info->endio_write_workers.idle_thresh = 64; | 1794 | fs_info->endio_write_workers.idle_thresh = 2; |
1749 | fs_info->endio_meta_write_workers.idle_thresh = 64; | 1795 | fs_info->endio_meta_write_workers.idle_thresh = 2; |
1796 | |||
1797 | fs_info->endio_workers.atomic_worker_start = 1; | ||
1798 | fs_info->endio_meta_workers.atomic_worker_start = 1; | ||
1799 | fs_info->endio_write_workers.atomic_worker_start = 1; | ||
1800 | fs_info->endio_meta_write_workers.atomic_worker_start = 1; | ||
1750 | 1801 | ||
1751 | btrfs_start_workers(&fs_info->workers, 1); | 1802 | btrfs_start_workers(&fs_info->workers, 1); |
1752 | btrfs_start_workers(&fs_info->submit_workers, 1); | 1803 | btrfs_start_workers(&fs_info->submit_workers, 1); |
1753 | btrfs_start_workers(&fs_info->delalloc_workers, 1); | 1804 | btrfs_start_workers(&fs_info->delalloc_workers, 1); |
1754 | btrfs_start_workers(&fs_info->fixup_workers, 1); | 1805 | btrfs_start_workers(&fs_info->fixup_workers, 1); |
1755 | btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); | 1806 | btrfs_start_workers(&fs_info->endio_workers, 1); |
1756 | btrfs_start_workers(&fs_info->endio_meta_workers, | 1807 | btrfs_start_workers(&fs_info->endio_meta_workers, 1); |
1757 | fs_info->thread_pool_size); | 1808 | btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); |
1758 | btrfs_start_workers(&fs_info->endio_meta_write_workers, | 1809 | btrfs_start_workers(&fs_info->endio_write_workers, 1); |
1759 | fs_info->thread_pool_size); | ||
1760 | btrfs_start_workers(&fs_info->endio_write_workers, | ||
1761 | fs_info->thread_pool_size); | ||
1762 | 1810 | ||
1763 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); | 1811 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); |
1764 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, | 1812 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, |
@@ -1918,6 +1966,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1918 | } | 1966 | } |
1919 | } | 1967 | } |
1920 | 1968 | ||
1969 | ret = btrfs_find_orphan_roots(tree_root); | ||
1970 | BUG_ON(ret); | ||
1971 | |||
1921 | if (!(sb->s_flags & MS_RDONLY)) { | 1972 | if (!(sb->s_flags & MS_RDONLY)) { |
1922 | ret = btrfs_recover_relocation(tree_root); | 1973 | ret = btrfs_recover_relocation(tree_root); |
1923 | BUG_ON(ret); | 1974 | BUG_ON(ret); |
@@ -1977,6 +2028,8 @@ fail_iput: | |||
1977 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2028 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
1978 | fail_bdi: | 2029 | fail_bdi: |
1979 | bdi_destroy(&fs_info->bdi); | 2030 | bdi_destroy(&fs_info->bdi); |
2031 | fail_srcu: | ||
2032 | cleanup_srcu_struct(&fs_info->subvol_srcu); | ||
1980 | fail: | 2033 | fail: |
1981 | kfree(extent_root); | 2034 | kfree(extent_root); |
1982 | kfree(tree_root); | 2035 | kfree(tree_root); |
@@ -2236,20 +2289,29 @@ int write_ctree_super(struct btrfs_trans_handle *trans, | |||
2236 | 2289 | ||
2237 | int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) | 2290 | int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) |
2238 | { | 2291 | { |
2239 | WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); | 2292 | spin_lock(&fs_info->fs_roots_radix_lock); |
2240 | radix_tree_delete(&fs_info->fs_roots_radix, | 2293 | radix_tree_delete(&fs_info->fs_roots_radix, |
2241 | (unsigned long)root->root_key.objectid); | 2294 | (unsigned long)root->root_key.objectid); |
2295 | spin_unlock(&fs_info->fs_roots_radix_lock); | ||
2296 | |||
2297 | if (btrfs_root_refs(&root->root_item) == 0) | ||
2298 | synchronize_srcu(&fs_info->subvol_srcu); | ||
2299 | |||
2300 | free_fs_root(root); | ||
2301 | return 0; | ||
2302 | } | ||
2303 | |||
2304 | static void free_fs_root(struct btrfs_root *root) | ||
2305 | { | ||
2306 | WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); | ||
2242 | if (root->anon_super.s_dev) { | 2307 | if (root->anon_super.s_dev) { |
2243 | down_write(&root->anon_super.s_umount); | 2308 | down_write(&root->anon_super.s_umount); |
2244 | kill_anon_super(&root->anon_super); | 2309 | kill_anon_super(&root->anon_super); |
2245 | } | 2310 | } |
2246 | if (root->node) | 2311 | free_extent_buffer(root->node); |
2247 | free_extent_buffer(root->node); | 2312 | free_extent_buffer(root->commit_root); |
2248 | if (root->commit_root) | ||
2249 | free_extent_buffer(root->commit_root); | ||
2250 | kfree(root->name); | 2313 | kfree(root->name); |
2251 | kfree(root); | 2314 | kfree(root); |
2252 | return 0; | ||
2253 | } | 2315 | } |
2254 | 2316 | ||
2255 | static int del_fs_roots(struct btrfs_fs_info *fs_info) | 2317 | static int del_fs_roots(struct btrfs_fs_info *fs_info) |
@@ -2258,6 +2320,20 @@ static int del_fs_roots(struct btrfs_fs_info *fs_info) | |||
2258 | struct btrfs_root *gang[8]; | 2320 | struct btrfs_root *gang[8]; |
2259 | int i; | 2321 | int i; |
2260 | 2322 | ||
2323 | while (!list_empty(&fs_info->dead_roots)) { | ||
2324 | gang[0] = list_entry(fs_info->dead_roots.next, | ||
2325 | struct btrfs_root, root_list); | ||
2326 | list_del(&gang[0]->root_list); | ||
2327 | |||
2328 | if (gang[0]->in_radix) { | ||
2329 | btrfs_free_fs_root(fs_info, gang[0]); | ||
2330 | } else { | ||
2331 | free_extent_buffer(gang[0]->node); | ||
2332 | free_extent_buffer(gang[0]->commit_root); | ||
2333 | kfree(gang[0]); | ||
2334 | } | ||
2335 | } | ||
2336 | |||
2261 | while (1) { | 2337 | while (1) { |
2262 | ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, | 2338 | ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, |
2263 | (void **)gang, 0, | 2339 | (void **)gang, 0, |
@@ -2287,9 +2363,6 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) | |||
2287 | root_objectid = gang[ret - 1]->root_key.objectid + 1; | 2363 | root_objectid = gang[ret - 1]->root_key.objectid + 1; |
2288 | for (i = 0; i < ret; i++) { | 2364 | for (i = 0; i < ret; i++) { |
2289 | root_objectid = gang[i]->root_key.objectid; | 2365 | root_objectid = gang[i]->root_key.objectid; |
2290 | ret = btrfs_find_dead_roots(fs_info->tree_root, | ||
2291 | root_objectid); | ||
2292 | BUG_ON(ret); | ||
2293 | btrfs_orphan_cleanup(gang[i]); | 2366 | btrfs_orphan_cleanup(gang[i]); |
2294 | } | 2367 | } |
2295 | root_objectid++; | 2368 | root_objectid++; |
@@ -2359,7 +2432,6 @@ int close_ctree(struct btrfs_root *root) | |||
2359 | free_extent_buffer(root->fs_info->csum_root->commit_root); | 2432 | free_extent_buffer(root->fs_info->csum_root->commit_root); |
2360 | 2433 | ||
2361 | btrfs_free_block_groups(root->fs_info); | 2434 | btrfs_free_block_groups(root->fs_info); |
2362 | btrfs_free_pinned_extents(root->fs_info); | ||
2363 | 2435 | ||
2364 | del_fs_roots(fs_info); | 2436 | del_fs_roots(fs_info); |
2365 | 2437 | ||
@@ -2378,6 +2450,7 @@ int close_ctree(struct btrfs_root *root) | |||
2378 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2450 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
2379 | 2451 | ||
2380 | bdi_destroy(&fs_info->bdi); | 2452 | bdi_destroy(&fs_info->bdi); |
2453 | cleanup_srcu_struct(&fs_info->subvol_srcu); | ||
2381 | 2454 | ||
2382 | kfree(fs_info->extent_root); | 2455 | kfree(fs_info->extent_root); |
2383 | kfree(fs_info->tree_root); | 2456 | kfree(fs_info->tree_root); |
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 9596b40caa4e..ba5c3fd5ab8c 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c | |||
@@ -28,7 +28,7 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, | |||
28 | len = BTRFS_FID_SIZE_NON_CONNECTABLE; | 28 | len = BTRFS_FID_SIZE_NON_CONNECTABLE; |
29 | type = FILEID_BTRFS_WITHOUT_PARENT; | 29 | type = FILEID_BTRFS_WITHOUT_PARENT; |
30 | 30 | ||
31 | fid->objectid = BTRFS_I(inode)->location.objectid; | 31 | fid->objectid = inode->i_ino; |
32 | fid->root_objectid = BTRFS_I(inode)->root->objectid; | 32 | fid->root_objectid = BTRFS_I(inode)->root->objectid; |
33 | fid->gen = inode->i_generation; | 33 | fid->gen = inode->i_generation; |
34 | 34 | ||
@@ -60,34 +60,61 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, | |||
60 | } | 60 | } |
61 | 61 | ||
62 | static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, | 62 | static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, |
63 | u64 root_objectid, u32 generation) | 63 | u64 root_objectid, u32 generation, |
64 | int check_generation) | ||
64 | { | 65 | { |
66 | struct btrfs_fs_info *fs_info = btrfs_sb(sb)->fs_info; | ||
65 | struct btrfs_root *root; | 67 | struct btrfs_root *root; |
68 | struct dentry *dentry; | ||
66 | struct inode *inode; | 69 | struct inode *inode; |
67 | struct btrfs_key key; | 70 | struct btrfs_key key; |
71 | int index; | ||
72 | int err = 0; | ||
73 | |||
74 | if (objectid < BTRFS_FIRST_FREE_OBJECTID) | ||
75 | return ERR_PTR(-ESTALE); | ||
68 | 76 | ||
69 | key.objectid = root_objectid; | 77 | key.objectid = root_objectid; |
70 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | 78 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); |
71 | key.offset = (u64)-1; | 79 | key.offset = (u64)-1; |
72 | 80 | ||
73 | root = btrfs_read_fs_root_no_name(btrfs_sb(sb)->fs_info, &key); | 81 | index = srcu_read_lock(&fs_info->subvol_srcu); |
74 | if (IS_ERR(root)) | 82 | |
75 | return ERR_CAST(root); | 83 | root = btrfs_read_fs_root_no_name(fs_info, &key); |
84 | if (IS_ERR(root)) { | ||
85 | err = PTR_ERR(root); | ||
86 | goto fail; | ||
87 | } | ||
88 | |||
89 | if (btrfs_root_refs(&root->root_item) == 0) { | ||
90 | err = -ENOENT; | ||
91 | goto fail; | ||
92 | } | ||
76 | 93 | ||
77 | key.objectid = objectid; | 94 | key.objectid = objectid; |
78 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | 95 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); |
79 | key.offset = 0; | 96 | key.offset = 0; |
80 | 97 | ||
81 | inode = btrfs_iget(sb, &key, root); | 98 | inode = btrfs_iget(sb, &key, root); |
82 | if (IS_ERR(inode)) | 99 | if (IS_ERR(inode)) { |
83 | return (void *)inode; | 100 | err = PTR_ERR(inode); |
101 | goto fail; | ||
102 | } | ||
103 | |||
104 | srcu_read_unlock(&fs_info->subvol_srcu, index); | ||
84 | 105 | ||
85 | if (generation != inode->i_generation) { | 106 | if (check_generation && generation != inode->i_generation) { |
86 | iput(inode); | 107 | iput(inode); |
87 | return ERR_PTR(-ESTALE); | 108 | return ERR_PTR(-ESTALE); |
88 | } | 109 | } |
89 | 110 | ||
90 | return d_obtain_alias(inode); | 111 | dentry = d_obtain_alias(inode); |
112 | if (!IS_ERR(dentry)) | ||
113 | dentry->d_op = &btrfs_dentry_operations; | ||
114 | return dentry; | ||
115 | fail: | ||
116 | srcu_read_unlock(&fs_info->subvol_srcu, index); | ||
117 | return ERR_PTR(err); | ||
91 | } | 118 | } |
92 | 119 | ||
93 | static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh, | 120 | static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh, |
@@ -111,7 +138,7 @@ static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh, | |||
111 | objectid = fid->parent_objectid; | 138 | objectid = fid->parent_objectid; |
112 | generation = fid->parent_gen; | 139 | generation = fid->parent_gen; |
113 | 140 | ||
114 | return btrfs_get_dentry(sb, objectid, root_objectid, generation); | 141 | return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1); |
115 | } | 142 | } |
116 | 143 | ||
117 | static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh, | 144 | static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh, |
@@ -133,66 +160,76 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh, | |||
133 | root_objectid = fid->root_objectid; | 160 | root_objectid = fid->root_objectid; |
134 | generation = fid->gen; | 161 | generation = fid->gen; |
135 | 162 | ||
136 | return btrfs_get_dentry(sb, objectid, root_objectid, generation); | 163 | return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1); |
137 | } | 164 | } |
138 | 165 | ||
139 | static struct dentry *btrfs_get_parent(struct dentry *child) | 166 | static struct dentry *btrfs_get_parent(struct dentry *child) |
140 | { | 167 | { |
141 | struct inode *dir = child->d_inode; | 168 | struct inode *dir = child->d_inode; |
169 | static struct dentry *dentry; | ||
142 | struct btrfs_root *root = BTRFS_I(dir)->root; | 170 | struct btrfs_root *root = BTRFS_I(dir)->root; |
143 | struct btrfs_key key; | ||
144 | struct btrfs_path *path; | 171 | struct btrfs_path *path; |
145 | struct extent_buffer *leaf; | 172 | struct extent_buffer *leaf; |
146 | int slot; | 173 | struct btrfs_root_ref *ref; |
147 | u64 objectid; | 174 | struct btrfs_key key; |
175 | struct btrfs_key found_key; | ||
148 | int ret; | 176 | int ret; |
149 | 177 | ||
150 | path = btrfs_alloc_path(); | 178 | path = btrfs_alloc_path(); |
151 | 179 | ||
152 | key.objectid = dir->i_ino; | 180 | if (dir->i_ino == BTRFS_FIRST_FREE_OBJECTID) { |
153 | btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY); | 181 | key.objectid = root->root_key.objectid; |
154 | key.offset = (u64)-1; | 182 | key.type = BTRFS_ROOT_BACKREF_KEY; |
183 | key.offset = (u64)-1; | ||
184 | root = root->fs_info->tree_root; | ||
185 | } else { | ||
186 | key.objectid = dir->i_ino; | ||
187 | key.type = BTRFS_INODE_REF_KEY; | ||
188 | key.offset = (u64)-1; | ||
189 | } | ||
155 | 190 | ||
156 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 191 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
157 | if (ret < 0) { | 192 | if (ret < 0) |
158 | /* Error */ | 193 | goto fail; |
159 | btrfs_free_path(path); | 194 | |
160 | return ERR_PTR(ret); | 195 | BUG_ON(ret == 0); |
196 | if (path->slots[0] == 0) { | ||
197 | ret = -ENOENT; | ||
198 | goto fail; | ||
161 | } | 199 | } |
200 | |||
201 | path->slots[0]--; | ||
162 | leaf = path->nodes[0]; | 202 | leaf = path->nodes[0]; |
163 | slot = path->slots[0]; | 203 | |
164 | if (ret) { | 204 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
165 | /* btrfs_search_slot() returns the slot where we'd want to | 205 | if (found_key.objectid != key.objectid || found_key.type != key.type) { |
166 | insert a backref for parent inode #0xFFFFFFFFFFFFFFFF. | 206 | ret = -ENOENT; |
167 | The _real_ backref, telling us what the parent inode | 207 | goto fail; |
168 | _actually_ is, will be in the slot _before_ the one | ||
169 | that btrfs_search_slot() returns. */ | ||
170 | if (!slot) { | ||
171 | /* Unless there is _no_ key in the tree before... */ | ||
172 | btrfs_free_path(path); | ||
173 | return ERR_PTR(-EIO); | ||
174 | } | ||
175 | slot--; | ||
176 | } | 208 | } |
177 | 209 | ||
178 | btrfs_item_key_to_cpu(leaf, &key, slot); | 210 | if (found_key.type == BTRFS_ROOT_BACKREF_KEY) { |
211 | ref = btrfs_item_ptr(leaf, path->slots[0], | ||
212 | struct btrfs_root_ref); | ||
213 | key.objectid = btrfs_root_ref_dirid(leaf, ref); | ||
214 | } else { | ||
215 | key.objectid = found_key.offset; | ||
216 | } | ||
179 | btrfs_free_path(path); | 217 | btrfs_free_path(path); |
180 | 218 | ||
181 | if (key.objectid != dir->i_ino || key.type != BTRFS_INODE_REF_KEY) | 219 | if (found_key.type == BTRFS_ROOT_BACKREF_KEY) { |
182 | return ERR_PTR(-EINVAL); | 220 | return btrfs_get_dentry(root->fs_info->sb, key.objectid, |
183 | 221 | found_key.offset, 0, 0); | |
184 | objectid = key.offset; | 222 | } |
185 | |||
186 | /* If we are already at the root of a subvol, return the real root */ | ||
187 | if (objectid == dir->i_ino) | ||
188 | return dget(dir->i_sb->s_root); | ||
189 | 223 | ||
190 | /* Build a new key for the inode item */ | 224 | key.type = BTRFS_INODE_ITEM_KEY; |
191 | key.objectid = objectid; | ||
192 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | ||
193 | key.offset = 0; | 225 | key.offset = 0; |
194 | 226 | dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root)); | |
195 | return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root)); | 227 | if (!IS_ERR(dentry)) |
228 | dentry->d_op = &btrfs_dentry_operations; | ||
229 | return dentry; | ||
230 | fail: | ||
231 | btrfs_free_path(path); | ||
232 | return ERR_PTR(ret); | ||
196 | } | 233 | } |
197 | 234 | ||
198 | const struct export_operations btrfs_export_ops = { | 235 | const struct export_operations btrfs_export_ops = { |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 535f85ba104f..993f93ff7ba6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -32,12 +32,12 @@ | |||
32 | #include "locking.h" | 32 | #include "locking.h" |
33 | #include "free-space-cache.h" | 33 | #include "free-space-cache.h" |
34 | 34 | ||
35 | static int update_reserved_extents(struct btrfs_root *root, | ||
36 | u64 bytenr, u64 num, int reserve); | ||
37 | static int update_block_group(struct btrfs_trans_handle *trans, | 35 | static int update_block_group(struct btrfs_trans_handle *trans, |
38 | struct btrfs_root *root, | 36 | struct btrfs_root *root, |
39 | u64 bytenr, u64 num_bytes, int alloc, | 37 | u64 bytenr, u64 num_bytes, int alloc, |
40 | int mark_free); | 38 | int mark_free); |
39 | static int update_reserved_extents(struct btrfs_block_group_cache *cache, | ||
40 | u64 num_bytes, int reserve); | ||
41 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 41 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
42 | struct btrfs_root *root, | 42 | struct btrfs_root *root, |
43 | u64 bytenr, u64 num_bytes, u64 parent, | 43 | u64 bytenr, u64 num_bytes, u64 parent, |
@@ -57,10 +57,17 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
57 | u64 parent, u64 root_objectid, | 57 | u64 parent, u64 root_objectid, |
58 | u64 flags, struct btrfs_disk_key *key, | 58 | u64 flags, struct btrfs_disk_key *key, |
59 | int level, struct btrfs_key *ins); | 59 | int level, struct btrfs_key *ins); |
60 | |||
61 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 60 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
62 | struct btrfs_root *extent_root, u64 alloc_bytes, | 61 | struct btrfs_root *extent_root, u64 alloc_bytes, |
63 | u64 flags, int force); | 62 | u64 flags, int force); |
63 | static int pin_down_bytes(struct btrfs_trans_handle *trans, | ||
64 | struct btrfs_root *root, | ||
65 | struct btrfs_path *path, | ||
66 | u64 bytenr, u64 num_bytes, | ||
67 | int is_data, int reserved, | ||
68 | struct extent_buffer **must_clean); | ||
69 | static int find_next_key(struct btrfs_path *path, int level, | ||
70 | struct btrfs_key *key); | ||
64 | 71 | ||
65 | static noinline int | 72 | static noinline int |
66 | block_group_cache_done(struct btrfs_block_group_cache *cache) | 73 | block_group_cache_done(struct btrfs_block_group_cache *cache) |
@@ -153,34 +160,34 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, | |||
153 | return ret; | 160 | return ret; |
154 | } | 161 | } |
155 | 162 | ||
156 | /* | 163 | static int add_excluded_extent(struct btrfs_root *root, |
157 | * We always set EXTENT_LOCKED for the super mirror extents so we don't | 164 | u64 start, u64 num_bytes) |
158 | * overwrite them, so those bits need to be unset. Also, if we are unmounting | ||
159 | * with pinned extents still sitting there because we had a block group caching, | ||
160 | * we need to clear those now, since we are done. | ||
161 | */ | ||
162 | void btrfs_free_pinned_extents(struct btrfs_fs_info *info) | ||
163 | { | 165 | { |
164 | u64 start, end, last = 0; | 166 | u64 end = start + num_bytes - 1; |
165 | int ret; | 167 | set_extent_bits(&root->fs_info->freed_extents[0], |
168 | start, end, EXTENT_UPTODATE, GFP_NOFS); | ||
169 | set_extent_bits(&root->fs_info->freed_extents[1], | ||
170 | start, end, EXTENT_UPTODATE, GFP_NOFS); | ||
171 | return 0; | ||
172 | } | ||
166 | 173 | ||
167 | while (1) { | 174 | static void free_excluded_extents(struct btrfs_root *root, |
168 | ret = find_first_extent_bit(&info->pinned_extents, last, | 175 | struct btrfs_block_group_cache *cache) |
169 | &start, &end, | 176 | { |
170 | EXTENT_LOCKED|EXTENT_DIRTY); | 177 | u64 start, end; |
171 | if (ret) | ||
172 | break; | ||
173 | 178 | ||
174 | clear_extent_bits(&info->pinned_extents, start, end, | 179 | start = cache->key.objectid; |
175 | EXTENT_LOCKED|EXTENT_DIRTY, GFP_NOFS); | 180 | end = start + cache->key.offset - 1; |
176 | last = end+1; | 181 | |
177 | } | 182 | clear_extent_bits(&root->fs_info->freed_extents[0], |
183 | start, end, EXTENT_UPTODATE, GFP_NOFS); | ||
184 | clear_extent_bits(&root->fs_info->freed_extents[1], | ||
185 | start, end, EXTENT_UPTODATE, GFP_NOFS); | ||
178 | } | 186 | } |
179 | 187 | ||
180 | static int remove_sb_from_cache(struct btrfs_root *root, | 188 | static int exclude_super_stripes(struct btrfs_root *root, |
181 | struct btrfs_block_group_cache *cache) | 189 | struct btrfs_block_group_cache *cache) |
182 | { | 190 | { |
183 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
184 | u64 bytenr; | 191 | u64 bytenr; |
185 | u64 *logical; | 192 | u64 *logical; |
186 | int stripe_len; | 193 | int stripe_len; |
@@ -192,17 +199,42 @@ static int remove_sb_from_cache(struct btrfs_root *root, | |||
192 | cache->key.objectid, bytenr, | 199 | cache->key.objectid, bytenr, |
193 | 0, &logical, &nr, &stripe_len); | 200 | 0, &logical, &nr, &stripe_len); |
194 | BUG_ON(ret); | 201 | BUG_ON(ret); |
202 | |||
195 | while (nr--) { | 203 | while (nr--) { |
196 | try_lock_extent(&fs_info->pinned_extents, | 204 | cache->bytes_super += stripe_len; |
197 | logical[nr], | 205 | ret = add_excluded_extent(root, logical[nr], |
198 | logical[nr] + stripe_len - 1, GFP_NOFS); | 206 | stripe_len); |
207 | BUG_ON(ret); | ||
199 | } | 208 | } |
209 | |||
200 | kfree(logical); | 210 | kfree(logical); |
201 | } | 211 | } |
202 | |||
203 | return 0; | 212 | return 0; |
204 | } | 213 | } |
205 | 214 | ||
215 | static struct btrfs_caching_control * | ||
216 | get_caching_control(struct btrfs_block_group_cache *cache) | ||
217 | { | ||
218 | struct btrfs_caching_control *ctl; | ||
219 | |||
220 | spin_lock(&cache->lock); | ||
221 | if (cache->cached != BTRFS_CACHE_STARTED) { | ||
222 | spin_unlock(&cache->lock); | ||
223 | return NULL; | ||
224 | } | ||
225 | |||
226 | ctl = cache->caching_ctl; | ||
227 | atomic_inc(&ctl->count); | ||
228 | spin_unlock(&cache->lock); | ||
229 | return ctl; | ||
230 | } | ||
231 | |||
232 | static void put_caching_control(struct btrfs_caching_control *ctl) | ||
233 | { | ||
234 | if (atomic_dec_and_test(&ctl->count)) | ||
235 | kfree(ctl); | ||
236 | } | ||
237 | |||
206 | /* | 238 | /* |
207 | * this is only called by cache_block_group, since we could have freed extents | 239 | * this is only called by cache_block_group, since we could have freed extents |
208 | * we need to check the pinned_extents for any extents that can't be used yet | 240 | * we need to check the pinned_extents for any extents that can't be used yet |
@@ -215,9 +247,9 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, | |||
215 | int ret; | 247 | int ret; |
216 | 248 | ||
217 | while (start < end) { | 249 | while (start < end) { |
218 | ret = find_first_extent_bit(&info->pinned_extents, start, | 250 | ret = find_first_extent_bit(info->pinned_extents, start, |
219 | &extent_start, &extent_end, | 251 | &extent_start, &extent_end, |
220 | EXTENT_DIRTY|EXTENT_LOCKED); | 252 | EXTENT_DIRTY | EXTENT_UPTODATE); |
221 | if (ret) | 253 | if (ret) |
222 | break; | 254 | break; |
223 | 255 | ||
@@ -249,22 +281,27 @@ static int caching_kthread(void *data) | |||
249 | { | 281 | { |
250 | struct btrfs_block_group_cache *block_group = data; | 282 | struct btrfs_block_group_cache *block_group = data; |
251 | struct btrfs_fs_info *fs_info = block_group->fs_info; | 283 | struct btrfs_fs_info *fs_info = block_group->fs_info; |
252 | u64 last = 0; | 284 | struct btrfs_caching_control *caching_ctl = block_group->caching_ctl; |
285 | struct btrfs_root *extent_root = fs_info->extent_root; | ||
253 | struct btrfs_path *path; | 286 | struct btrfs_path *path; |
254 | int ret = 0; | ||
255 | struct btrfs_key key; | ||
256 | struct extent_buffer *leaf; | 287 | struct extent_buffer *leaf; |
257 | int slot; | 288 | struct btrfs_key key; |
258 | u64 total_found = 0; | 289 | u64 total_found = 0; |
259 | 290 | u64 last = 0; | |
260 | BUG_ON(!fs_info); | 291 | u32 nritems; |
292 | int ret = 0; | ||
261 | 293 | ||
262 | path = btrfs_alloc_path(); | 294 | path = btrfs_alloc_path(); |
263 | if (!path) | 295 | if (!path) |
264 | return -ENOMEM; | 296 | return -ENOMEM; |
265 | 297 | ||
266 | atomic_inc(&block_group->space_info->caching_threads); | 298 | exclude_super_stripes(extent_root, block_group); |
299 | spin_lock(&block_group->space_info->lock); | ||
300 | block_group->space_info->bytes_super += block_group->bytes_super; | ||
301 | spin_unlock(&block_group->space_info->lock); | ||
302 | |||
267 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); | 303 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); |
304 | |||
268 | /* | 305 | /* |
269 | * We don't want to deadlock with somebody trying to allocate a new | 306 | * We don't want to deadlock with somebody trying to allocate a new |
270 | * extent for the extent root while also trying to search the extent | 307 | * extent for the extent root while also trying to search the extent |
@@ -277,74 +314,64 @@ static int caching_kthread(void *data) | |||
277 | 314 | ||
278 | key.objectid = last; | 315 | key.objectid = last; |
279 | key.offset = 0; | 316 | key.offset = 0; |
280 | btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); | 317 | key.type = BTRFS_EXTENT_ITEM_KEY; |
281 | again: | 318 | again: |
319 | mutex_lock(&caching_ctl->mutex); | ||
282 | /* need to make sure the commit_root doesn't disappear */ | 320 | /* need to make sure the commit_root doesn't disappear */ |
283 | down_read(&fs_info->extent_commit_sem); | 321 | down_read(&fs_info->extent_commit_sem); |
284 | 322 | ||
285 | ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0); | 323 | ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); |
286 | if (ret < 0) | 324 | if (ret < 0) |
287 | goto err; | 325 | goto err; |
288 | 326 | ||
327 | leaf = path->nodes[0]; | ||
328 | nritems = btrfs_header_nritems(leaf); | ||
329 | |||
289 | while (1) { | 330 | while (1) { |
290 | smp_mb(); | 331 | smp_mb(); |
291 | if (block_group->fs_info->closing > 1) { | 332 | if (fs_info->closing > 1) { |
292 | last = (u64)-1; | 333 | last = (u64)-1; |
293 | break; | 334 | break; |
294 | } | 335 | } |
295 | 336 | ||
296 | leaf = path->nodes[0]; | 337 | if (path->slots[0] < nritems) { |
297 | slot = path->slots[0]; | 338 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); |
298 | if (slot >= btrfs_header_nritems(leaf)) { | 339 | } else { |
299 | ret = btrfs_next_leaf(fs_info->extent_root, path); | 340 | ret = find_next_key(path, 0, &key); |
300 | if (ret < 0) | 341 | if (ret) |
301 | goto err; | ||
302 | else if (ret) | ||
303 | break; | 342 | break; |
304 | 343 | ||
305 | if (need_resched() || | 344 | caching_ctl->progress = last; |
306 | btrfs_transaction_in_commit(fs_info)) { | 345 | btrfs_release_path(extent_root, path); |
307 | leaf = path->nodes[0]; | 346 | up_read(&fs_info->extent_commit_sem); |
308 | 347 | mutex_unlock(&caching_ctl->mutex); | |
309 | /* this shouldn't happen, but if the | 348 | if (btrfs_transaction_in_commit(fs_info)) |
310 | * leaf is empty just move on. | ||
311 | */ | ||
312 | if (btrfs_header_nritems(leaf) == 0) | ||
313 | break; | ||
314 | /* | ||
315 | * we need to copy the key out so that | ||
316 | * we are sure the next search advances | ||
317 | * us forward in the btree. | ||
318 | */ | ||
319 | btrfs_item_key_to_cpu(leaf, &key, 0); | ||
320 | btrfs_release_path(fs_info->extent_root, path); | ||
321 | up_read(&fs_info->extent_commit_sem); | ||
322 | schedule_timeout(1); | 349 | schedule_timeout(1); |
323 | goto again; | 350 | else |
324 | } | 351 | cond_resched(); |
352 | goto again; | ||
353 | } | ||
325 | 354 | ||
355 | if (key.objectid < block_group->key.objectid) { | ||
356 | path->slots[0]++; | ||
326 | continue; | 357 | continue; |
327 | } | 358 | } |
328 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
329 | if (key.objectid < block_group->key.objectid) | ||
330 | goto next; | ||
331 | 359 | ||
332 | if (key.objectid >= block_group->key.objectid + | 360 | if (key.objectid >= block_group->key.objectid + |
333 | block_group->key.offset) | 361 | block_group->key.offset) |
334 | break; | 362 | break; |
335 | 363 | ||
336 | if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { | 364 | if (key.type == BTRFS_EXTENT_ITEM_KEY) { |
337 | total_found += add_new_free_space(block_group, | 365 | total_found += add_new_free_space(block_group, |
338 | fs_info, last, | 366 | fs_info, last, |
339 | key.objectid); | 367 | key.objectid); |
340 | last = key.objectid + key.offset; | 368 | last = key.objectid + key.offset; |
341 | } | ||
342 | 369 | ||
343 | if (total_found > (1024 * 1024 * 2)) { | 370 | if (total_found > (1024 * 1024 * 2)) { |
344 | total_found = 0; | 371 | total_found = 0; |
345 | wake_up(&block_group->caching_q); | 372 | wake_up(&caching_ctl->wait); |
373 | } | ||
346 | } | 374 | } |
347 | next: | ||
348 | path->slots[0]++; | 375 | path->slots[0]++; |
349 | } | 376 | } |
350 | ret = 0; | 377 | ret = 0; |
@@ -352,33 +379,65 @@ next: | |||
352 | total_found += add_new_free_space(block_group, fs_info, last, | 379 | total_found += add_new_free_space(block_group, fs_info, last, |
353 | block_group->key.objectid + | 380 | block_group->key.objectid + |
354 | block_group->key.offset); | 381 | block_group->key.offset); |
382 | caching_ctl->progress = (u64)-1; | ||
355 | 383 | ||
356 | spin_lock(&block_group->lock); | 384 | spin_lock(&block_group->lock); |
385 | block_group->caching_ctl = NULL; | ||
357 | block_group->cached = BTRFS_CACHE_FINISHED; | 386 | block_group->cached = BTRFS_CACHE_FINISHED; |
358 | spin_unlock(&block_group->lock); | 387 | spin_unlock(&block_group->lock); |
359 | 388 | ||
360 | err: | 389 | err: |
361 | btrfs_free_path(path); | 390 | btrfs_free_path(path); |
362 | up_read(&fs_info->extent_commit_sem); | 391 | up_read(&fs_info->extent_commit_sem); |
363 | atomic_dec(&block_group->space_info->caching_threads); | ||
364 | wake_up(&block_group->caching_q); | ||
365 | 392 | ||
393 | free_excluded_extents(extent_root, block_group); | ||
394 | |||
395 | mutex_unlock(&caching_ctl->mutex); | ||
396 | wake_up(&caching_ctl->wait); | ||
397 | |||
398 | put_caching_control(caching_ctl); | ||
399 | atomic_dec(&block_group->space_info->caching_threads); | ||
366 | return 0; | 400 | return 0; |
367 | } | 401 | } |
368 | 402 | ||
369 | static int cache_block_group(struct btrfs_block_group_cache *cache) | 403 | static int cache_block_group(struct btrfs_block_group_cache *cache) |
370 | { | 404 | { |
405 | struct btrfs_fs_info *fs_info = cache->fs_info; | ||
406 | struct btrfs_caching_control *caching_ctl; | ||
371 | struct task_struct *tsk; | 407 | struct task_struct *tsk; |
372 | int ret = 0; | 408 | int ret = 0; |
373 | 409 | ||
410 | smp_mb(); | ||
411 | if (cache->cached != BTRFS_CACHE_NO) | ||
412 | return 0; | ||
413 | |||
414 | caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); | ||
415 | BUG_ON(!caching_ctl); | ||
416 | |||
417 | INIT_LIST_HEAD(&caching_ctl->list); | ||
418 | mutex_init(&caching_ctl->mutex); | ||
419 | init_waitqueue_head(&caching_ctl->wait); | ||
420 | caching_ctl->block_group = cache; | ||
421 | caching_ctl->progress = cache->key.objectid; | ||
422 | /* one for caching kthread, one for caching block group list */ | ||
423 | atomic_set(&caching_ctl->count, 2); | ||
424 | |||
374 | spin_lock(&cache->lock); | 425 | spin_lock(&cache->lock); |
375 | if (cache->cached != BTRFS_CACHE_NO) { | 426 | if (cache->cached != BTRFS_CACHE_NO) { |
376 | spin_unlock(&cache->lock); | 427 | spin_unlock(&cache->lock); |
377 | return ret; | 428 | kfree(caching_ctl); |
429 | return 0; | ||
378 | } | 430 | } |
431 | cache->caching_ctl = caching_ctl; | ||
379 | cache->cached = BTRFS_CACHE_STARTED; | 432 | cache->cached = BTRFS_CACHE_STARTED; |
380 | spin_unlock(&cache->lock); | 433 | spin_unlock(&cache->lock); |
381 | 434 | ||
435 | down_write(&fs_info->extent_commit_sem); | ||
436 | list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); | ||
437 | up_write(&fs_info->extent_commit_sem); | ||
438 | |||
439 | atomic_inc(&cache->space_info->caching_threads); | ||
440 | |||
382 | tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", | 441 | tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", |
383 | cache->key.objectid); | 442 | cache->key.objectid); |
384 | if (IS_ERR(tsk)) { | 443 | if (IS_ERR(tsk)) { |
@@ -1657,7 +1716,6 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, | |||
1657 | parent, ref_root, flags, | 1716 | parent, ref_root, flags, |
1658 | ref->objectid, ref->offset, | 1717 | ref->objectid, ref->offset, |
1659 | &ins, node->ref_mod); | 1718 | &ins, node->ref_mod); |
1660 | update_reserved_extents(root, ins.objectid, ins.offset, 0); | ||
1661 | } else if (node->action == BTRFS_ADD_DELAYED_REF) { | 1719 | } else if (node->action == BTRFS_ADD_DELAYED_REF) { |
1662 | ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, | 1720 | ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, |
1663 | node->num_bytes, parent, | 1721 | node->num_bytes, parent, |
@@ -1783,7 +1841,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, | |||
1783 | extent_op->flags_to_set, | 1841 | extent_op->flags_to_set, |
1784 | &extent_op->key, | 1842 | &extent_op->key, |
1785 | ref->level, &ins); | 1843 | ref->level, &ins); |
1786 | update_reserved_extents(root, ins.objectid, ins.offset, 0); | ||
1787 | } else if (node->action == BTRFS_ADD_DELAYED_REF) { | 1844 | } else if (node->action == BTRFS_ADD_DELAYED_REF) { |
1788 | ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, | 1845 | ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, |
1789 | node->num_bytes, parent, ref_root, | 1846 | node->num_bytes, parent, ref_root, |
@@ -1818,16 +1875,32 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, | |||
1818 | BUG_ON(extent_op); | 1875 | BUG_ON(extent_op); |
1819 | head = btrfs_delayed_node_to_head(node); | 1876 | head = btrfs_delayed_node_to_head(node); |
1820 | if (insert_reserved) { | 1877 | if (insert_reserved) { |
1878 | int mark_free = 0; | ||
1879 | struct extent_buffer *must_clean = NULL; | ||
1880 | |||
1881 | ret = pin_down_bytes(trans, root, NULL, | ||
1882 | node->bytenr, node->num_bytes, | ||
1883 | head->is_data, 1, &must_clean); | ||
1884 | if (ret > 0) | ||
1885 | mark_free = 1; | ||
1886 | |||
1887 | if (must_clean) { | ||
1888 | clean_tree_block(NULL, root, must_clean); | ||
1889 | btrfs_tree_unlock(must_clean); | ||
1890 | free_extent_buffer(must_clean); | ||
1891 | } | ||
1821 | if (head->is_data) { | 1892 | if (head->is_data) { |
1822 | ret = btrfs_del_csums(trans, root, | 1893 | ret = btrfs_del_csums(trans, root, |
1823 | node->bytenr, | 1894 | node->bytenr, |
1824 | node->num_bytes); | 1895 | node->num_bytes); |
1825 | BUG_ON(ret); | 1896 | BUG_ON(ret); |
1826 | } | 1897 | } |
1827 | btrfs_update_pinned_extents(root, node->bytenr, | 1898 | if (mark_free) { |
1828 | node->num_bytes, 1); | 1899 | ret = btrfs_free_reserved_extent(root, |
1829 | update_reserved_extents(root, node->bytenr, | 1900 | node->bytenr, |
1830 | node->num_bytes, 0); | 1901 | node->num_bytes); |
1902 | BUG_ON(ret); | ||
1903 | } | ||
1831 | } | 1904 | } |
1832 | mutex_unlock(&head->mutex); | 1905 | mutex_unlock(&head->mutex); |
1833 | return 0; | 1906 | return 0; |
@@ -2706,6 +2779,8 @@ int btrfs_check_metadata_free_space(struct btrfs_root *root) | |||
2706 | /* get the space info for where the metadata will live */ | 2779 | /* get the space info for where the metadata will live */ |
2707 | alloc_target = btrfs_get_alloc_profile(root, 0); | 2780 | alloc_target = btrfs_get_alloc_profile(root, 0); |
2708 | meta_sinfo = __find_space_info(info, alloc_target); | 2781 | meta_sinfo = __find_space_info(info, alloc_target); |
2782 | if (!meta_sinfo) | ||
2783 | goto alloc; | ||
2709 | 2784 | ||
2710 | again: | 2785 | again: |
2711 | spin_lock(&meta_sinfo->lock); | 2786 | spin_lock(&meta_sinfo->lock); |
@@ -2717,12 +2792,13 @@ again: | |||
2717 | do_div(thresh, 100); | 2792 | do_div(thresh, 100); |
2718 | 2793 | ||
2719 | if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | 2794 | if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + |
2720 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) { | 2795 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + |
2796 | meta_sinfo->bytes_super > thresh) { | ||
2721 | struct btrfs_trans_handle *trans; | 2797 | struct btrfs_trans_handle *trans; |
2722 | if (!meta_sinfo->full) { | 2798 | if (!meta_sinfo->full) { |
2723 | meta_sinfo->force_alloc = 1; | 2799 | meta_sinfo->force_alloc = 1; |
2724 | spin_unlock(&meta_sinfo->lock); | 2800 | spin_unlock(&meta_sinfo->lock); |
2725 | 2801 | alloc: | |
2726 | trans = btrfs_start_transaction(root, 1); | 2802 | trans = btrfs_start_transaction(root, 1); |
2727 | if (!trans) | 2803 | if (!trans) |
2728 | return -ENOMEM; | 2804 | return -ENOMEM; |
@@ -2730,6 +2806,10 @@ again: | |||
2730 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | 2806 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, |
2731 | 2 * 1024 * 1024, alloc_target, 0); | 2807 | 2 * 1024 * 1024, alloc_target, 0); |
2732 | btrfs_end_transaction(trans, root); | 2808 | btrfs_end_transaction(trans, root); |
2809 | if (!meta_sinfo) { | ||
2810 | meta_sinfo = __find_space_info(info, | ||
2811 | alloc_target); | ||
2812 | } | ||
2733 | goto again; | 2813 | goto again; |
2734 | } | 2814 | } |
2735 | spin_unlock(&meta_sinfo->lock); | 2815 | spin_unlock(&meta_sinfo->lock); |
@@ -2765,13 +2845,16 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | |||
2765 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | 2845 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); |
2766 | 2846 | ||
2767 | data_sinfo = BTRFS_I(inode)->space_info; | 2847 | data_sinfo = BTRFS_I(inode)->space_info; |
2848 | if (!data_sinfo) | ||
2849 | goto alloc; | ||
2850 | |||
2768 | again: | 2851 | again: |
2769 | /* make sure we have enough space to handle the data first */ | 2852 | /* make sure we have enough space to handle the data first */ |
2770 | spin_lock(&data_sinfo->lock); | 2853 | spin_lock(&data_sinfo->lock); |
2771 | if (data_sinfo->total_bytes - data_sinfo->bytes_used - | 2854 | if (data_sinfo->total_bytes - data_sinfo->bytes_used - |
2772 | data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - | 2855 | data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - |
2773 | data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - | 2856 | data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - |
2774 | data_sinfo->bytes_may_use < bytes) { | 2857 | data_sinfo->bytes_may_use - data_sinfo->bytes_super < bytes) { |
2775 | struct btrfs_trans_handle *trans; | 2858 | struct btrfs_trans_handle *trans; |
2776 | 2859 | ||
2777 | /* | 2860 | /* |
@@ -2783,7 +2866,7 @@ again: | |||
2783 | 2866 | ||
2784 | data_sinfo->force_alloc = 1; | 2867 | data_sinfo->force_alloc = 1; |
2785 | spin_unlock(&data_sinfo->lock); | 2868 | spin_unlock(&data_sinfo->lock); |
2786 | 2869 | alloc: | |
2787 | alloc_target = btrfs_get_alloc_profile(root, 1); | 2870 | alloc_target = btrfs_get_alloc_profile(root, 1); |
2788 | trans = btrfs_start_transaction(root, 1); | 2871 | trans = btrfs_start_transaction(root, 1); |
2789 | if (!trans) | 2872 | if (!trans) |
@@ -2795,6 +2878,11 @@ again: | |||
2795 | btrfs_end_transaction(trans, root); | 2878 | btrfs_end_transaction(trans, root); |
2796 | if (ret) | 2879 | if (ret) |
2797 | return ret; | 2880 | return ret; |
2881 | |||
2882 | if (!data_sinfo) { | ||
2883 | btrfs_set_inode_space_info(root, inode); | ||
2884 | data_sinfo = BTRFS_I(inode)->space_info; | ||
2885 | } | ||
2798 | goto again; | 2886 | goto again; |
2799 | } | 2887 | } |
2800 | spin_unlock(&data_sinfo->lock); | 2888 | spin_unlock(&data_sinfo->lock); |
@@ -3009,10 +3097,12 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
3009 | num_bytes = min(total, cache->key.offset - byte_in_group); | 3097 | num_bytes = min(total, cache->key.offset - byte_in_group); |
3010 | if (alloc) { | 3098 | if (alloc) { |
3011 | old_val += num_bytes; | 3099 | old_val += num_bytes; |
3100 | btrfs_set_block_group_used(&cache->item, old_val); | ||
3101 | cache->reserved -= num_bytes; | ||
3012 | cache->space_info->bytes_used += num_bytes; | 3102 | cache->space_info->bytes_used += num_bytes; |
3103 | cache->space_info->bytes_reserved -= num_bytes; | ||
3013 | if (cache->ro) | 3104 | if (cache->ro) |
3014 | cache->space_info->bytes_readonly -= num_bytes; | 3105 | cache->space_info->bytes_readonly -= num_bytes; |
3015 | btrfs_set_block_group_used(&cache->item, old_val); | ||
3016 | spin_unlock(&cache->lock); | 3106 | spin_unlock(&cache->lock); |
3017 | spin_unlock(&cache->space_info->lock); | 3107 | spin_unlock(&cache->space_info->lock); |
3018 | } else { | 3108 | } else { |
@@ -3057,127 +3147,136 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) | |||
3057 | return bytenr; | 3147 | return bytenr; |
3058 | } | 3148 | } |
3059 | 3149 | ||
3060 | int btrfs_update_pinned_extents(struct btrfs_root *root, | 3150 | /* |
3061 | u64 bytenr, u64 num, int pin) | 3151 | * this function must be called within transaction |
3152 | */ | ||
3153 | int btrfs_pin_extent(struct btrfs_root *root, | ||
3154 | u64 bytenr, u64 num_bytes, int reserved) | ||
3062 | { | 3155 | { |
3063 | u64 len; | ||
3064 | struct btrfs_block_group_cache *cache; | ||
3065 | struct btrfs_fs_info *fs_info = root->fs_info; | 3156 | struct btrfs_fs_info *fs_info = root->fs_info; |
3157 | struct btrfs_block_group_cache *cache; | ||
3066 | 3158 | ||
3067 | if (pin) | 3159 | cache = btrfs_lookup_block_group(fs_info, bytenr); |
3068 | set_extent_dirty(&fs_info->pinned_extents, | 3160 | BUG_ON(!cache); |
3069 | bytenr, bytenr + num - 1, GFP_NOFS); | ||
3070 | |||
3071 | while (num > 0) { | ||
3072 | cache = btrfs_lookup_block_group(fs_info, bytenr); | ||
3073 | BUG_ON(!cache); | ||
3074 | len = min(num, cache->key.offset - | ||
3075 | (bytenr - cache->key.objectid)); | ||
3076 | if (pin) { | ||
3077 | spin_lock(&cache->space_info->lock); | ||
3078 | spin_lock(&cache->lock); | ||
3079 | cache->pinned += len; | ||
3080 | cache->space_info->bytes_pinned += len; | ||
3081 | spin_unlock(&cache->lock); | ||
3082 | spin_unlock(&cache->space_info->lock); | ||
3083 | fs_info->total_pinned += len; | ||
3084 | } else { | ||
3085 | int unpin = 0; | ||
3086 | 3161 | ||
3087 | /* | 3162 | spin_lock(&cache->space_info->lock); |
3088 | * in order to not race with the block group caching, we | 3163 | spin_lock(&cache->lock); |
3089 | * only want to unpin the extent if we are cached. If | 3164 | cache->pinned += num_bytes; |
3090 | * we aren't cached, we want to start async caching this | 3165 | cache->space_info->bytes_pinned += num_bytes; |
3091 | * block group so we can free the extent the next time | 3166 | if (reserved) { |
3092 | * around. | 3167 | cache->reserved -= num_bytes; |
3093 | */ | 3168 | cache->space_info->bytes_reserved -= num_bytes; |
3094 | spin_lock(&cache->space_info->lock); | 3169 | } |
3095 | spin_lock(&cache->lock); | 3170 | spin_unlock(&cache->lock); |
3096 | unpin = (cache->cached == BTRFS_CACHE_FINISHED); | 3171 | spin_unlock(&cache->space_info->lock); |
3097 | if (likely(unpin)) { | ||
3098 | cache->pinned -= len; | ||
3099 | cache->space_info->bytes_pinned -= len; | ||
3100 | fs_info->total_pinned -= len; | ||
3101 | } | ||
3102 | spin_unlock(&cache->lock); | ||
3103 | spin_unlock(&cache->space_info->lock); | ||
3104 | 3172 | ||
3105 | if (likely(unpin)) | 3173 | btrfs_put_block_group(cache); |
3106 | clear_extent_dirty(&fs_info->pinned_extents, | ||
3107 | bytenr, bytenr + len -1, | ||
3108 | GFP_NOFS); | ||
3109 | else | ||
3110 | cache_block_group(cache); | ||
3111 | 3174 | ||
3112 | if (unpin) | 3175 | set_extent_dirty(fs_info->pinned_extents, |
3113 | btrfs_add_free_space(cache, bytenr, len); | 3176 | bytenr, bytenr + num_bytes - 1, GFP_NOFS); |
3114 | } | 3177 | return 0; |
3115 | btrfs_put_block_group(cache); | 3178 | } |
3116 | bytenr += len; | 3179 | |
3117 | num -= len; | 3180 | static int update_reserved_extents(struct btrfs_block_group_cache *cache, |
3181 | u64 num_bytes, int reserve) | ||
3182 | { | ||
3183 | spin_lock(&cache->space_info->lock); | ||
3184 | spin_lock(&cache->lock); | ||
3185 | if (reserve) { | ||
3186 | cache->reserved += num_bytes; | ||
3187 | cache->space_info->bytes_reserved += num_bytes; | ||
3188 | } else { | ||
3189 | cache->reserved -= num_bytes; | ||
3190 | cache->space_info->bytes_reserved -= num_bytes; | ||
3118 | } | 3191 | } |
3192 | spin_unlock(&cache->lock); | ||
3193 | spin_unlock(&cache->space_info->lock); | ||
3119 | return 0; | 3194 | return 0; |
3120 | } | 3195 | } |
3121 | 3196 | ||
3122 | static int update_reserved_extents(struct btrfs_root *root, | 3197 | int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, |
3123 | u64 bytenr, u64 num, int reserve) | 3198 | struct btrfs_root *root) |
3124 | { | 3199 | { |
3125 | u64 len; | ||
3126 | struct btrfs_block_group_cache *cache; | ||
3127 | struct btrfs_fs_info *fs_info = root->fs_info; | 3200 | struct btrfs_fs_info *fs_info = root->fs_info; |
3201 | struct btrfs_caching_control *next; | ||
3202 | struct btrfs_caching_control *caching_ctl; | ||
3203 | struct btrfs_block_group_cache *cache; | ||
3128 | 3204 | ||
3129 | while (num > 0) { | 3205 | down_write(&fs_info->extent_commit_sem); |
3130 | cache = btrfs_lookup_block_group(fs_info, bytenr); | ||
3131 | BUG_ON(!cache); | ||
3132 | len = min(num, cache->key.offset - | ||
3133 | (bytenr - cache->key.objectid)); | ||
3134 | 3206 | ||
3135 | spin_lock(&cache->space_info->lock); | 3207 | list_for_each_entry_safe(caching_ctl, next, |
3136 | spin_lock(&cache->lock); | 3208 | &fs_info->caching_block_groups, list) { |
3137 | if (reserve) { | 3209 | cache = caching_ctl->block_group; |
3138 | cache->reserved += len; | 3210 | if (block_group_cache_done(cache)) { |
3139 | cache->space_info->bytes_reserved += len; | 3211 | cache->last_byte_to_unpin = (u64)-1; |
3212 | list_del_init(&caching_ctl->list); | ||
3213 | put_caching_control(caching_ctl); | ||
3140 | } else { | 3214 | } else { |
3141 | cache->reserved -= len; | 3215 | cache->last_byte_to_unpin = caching_ctl->progress; |
3142 | cache->space_info->bytes_reserved -= len; | ||
3143 | } | 3216 | } |
3144 | spin_unlock(&cache->lock); | ||
3145 | spin_unlock(&cache->space_info->lock); | ||
3146 | btrfs_put_block_group(cache); | ||
3147 | bytenr += len; | ||
3148 | num -= len; | ||
3149 | } | 3217 | } |
3218 | |||
3219 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) | ||
3220 | fs_info->pinned_extents = &fs_info->freed_extents[1]; | ||
3221 | else | ||
3222 | fs_info->pinned_extents = &fs_info->freed_extents[0]; | ||
3223 | |||
3224 | up_write(&fs_info->extent_commit_sem); | ||
3150 | return 0; | 3225 | return 0; |
3151 | } | 3226 | } |
3152 | 3227 | ||
3153 | int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) | 3228 | static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) |
3154 | { | 3229 | { |
3155 | u64 last = 0; | 3230 | struct btrfs_fs_info *fs_info = root->fs_info; |
3156 | u64 start; | 3231 | struct btrfs_block_group_cache *cache = NULL; |
3157 | u64 end; | 3232 | u64 len; |
3158 | struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; | ||
3159 | int ret; | ||
3160 | 3233 | ||
3161 | while (1) { | 3234 | while (start <= end) { |
3162 | ret = find_first_extent_bit(pinned_extents, last, | 3235 | if (!cache || |
3163 | &start, &end, EXTENT_DIRTY); | 3236 | start >= cache->key.objectid + cache->key.offset) { |
3164 | if (ret) | 3237 | if (cache) |
3165 | break; | 3238 | btrfs_put_block_group(cache); |
3239 | cache = btrfs_lookup_block_group(fs_info, start); | ||
3240 | BUG_ON(!cache); | ||
3241 | } | ||
3242 | |||
3243 | len = cache->key.objectid + cache->key.offset - start; | ||
3244 | len = min(len, end + 1 - start); | ||
3166 | 3245 | ||
3167 | set_extent_dirty(copy, start, end, GFP_NOFS); | 3246 | if (start < cache->last_byte_to_unpin) { |
3168 | last = end + 1; | 3247 | len = min(len, cache->last_byte_to_unpin - start); |
3248 | btrfs_add_free_space(cache, start, len); | ||
3249 | } | ||
3250 | |||
3251 | spin_lock(&cache->space_info->lock); | ||
3252 | spin_lock(&cache->lock); | ||
3253 | cache->pinned -= len; | ||
3254 | cache->space_info->bytes_pinned -= len; | ||
3255 | spin_unlock(&cache->lock); | ||
3256 | spin_unlock(&cache->space_info->lock); | ||
3257 | |||
3258 | start += len; | ||
3169 | } | 3259 | } |
3260 | |||
3261 | if (cache) | ||
3262 | btrfs_put_block_group(cache); | ||
3170 | return 0; | 3263 | return 0; |
3171 | } | 3264 | } |
3172 | 3265 | ||
3173 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | 3266 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, |
3174 | struct btrfs_root *root, | 3267 | struct btrfs_root *root) |
3175 | struct extent_io_tree *unpin) | ||
3176 | { | 3268 | { |
3269 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
3270 | struct extent_io_tree *unpin; | ||
3177 | u64 start; | 3271 | u64 start; |
3178 | u64 end; | 3272 | u64 end; |
3179 | int ret; | 3273 | int ret; |
3180 | 3274 | ||
3275 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) | ||
3276 | unpin = &fs_info->freed_extents[1]; | ||
3277 | else | ||
3278 | unpin = &fs_info->freed_extents[0]; | ||
3279 | |||
3181 | while (1) { | 3280 | while (1) { |
3182 | ret = find_first_extent_bit(unpin, 0, &start, &end, | 3281 | ret = find_first_extent_bit(unpin, 0, &start, &end, |
3183 | EXTENT_DIRTY); | 3282 | EXTENT_DIRTY); |
@@ -3186,10 +3285,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
3186 | 3285 | ||
3187 | ret = btrfs_discard_extent(root, start, end + 1 - start); | 3286 | ret = btrfs_discard_extent(root, start, end + 1 - start); |
3188 | 3287 | ||
3189 | /* unlocks the pinned mutex */ | ||
3190 | btrfs_update_pinned_extents(root, start, end + 1 - start, 0); | ||
3191 | clear_extent_dirty(unpin, start, end, GFP_NOFS); | 3288 | clear_extent_dirty(unpin, start, end, GFP_NOFS); |
3192 | 3289 | unpin_extent_range(root, start, end); | |
3193 | cond_resched(); | 3290 | cond_resched(); |
3194 | } | 3291 | } |
3195 | 3292 | ||
@@ -3199,7 +3296,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
3199 | static int pin_down_bytes(struct btrfs_trans_handle *trans, | 3296 | static int pin_down_bytes(struct btrfs_trans_handle *trans, |
3200 | struct btrfs_root *root, | 3297 | struct btrfs_root *root, |
3201 | struct btrfs_path *path, | 3298 | struct btrfs_path *path, |
3202 | u64 bytenr, u64 num_bytes, int is_data, | 3299 | u64 bytenr, u64 num_bytes, |
3300 | int is_data, int reserved, | ||
3203 | struct extent_buffer **must_clean) | 3301 | struct extent_buffer **must_clean) |
3204 | { | 3302 | { |
3205 | int err = 0; | 3303 | int err = 0; |
@@ -3231,15 +3329,15 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans, | |||
3231 | } | 3329 | } |
3232 | free_extent_buffer(buf); | 3330 | free_extent_buffer(buf); |
3233 | pinit: | 3331 | pinit: |
3234 | btrfs_set_path_blocking(path); | 3332 | if (path) |
3333 | btrfs_set_path_blocking(path); | ||
3235 | /* unlocks the pinned mutex */ | 3334 | /* unlocks the pinned mutex */ |
3236 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); | 3335 | btrfs_pin_extent(root, bytenr, num_bytes, reserved); |
3237 | 3336 | ||
3238 | BUG_ON(err < 0); | 3337 | BUG_ON(err < 0); |
3239 | return 0; | 3338 | return 0; |
3240 | } | 3339 | } |
3241 | 3340 | ||
3242 | |||
3243 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 3341 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
3244 | struct btrfs_root *root, | 3342 | struct btrfs_root *root, |
3245 | u64 bytenr, u64 num_bytes, u64 parent, | 3343 | u64 bytenr, u64 num_bytes, u64 parent, |
@@ -3413,7 +3511,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
3413 | } | 3511 | } |
3414 | 3512 | ||
3415 | ret = pin_down_bytes(trans, root, path, bytenr, | 3513 | ret = pin_down_bytes(trans, root, path, bytenr, |
3416 | num_bytes, is_data, &must_clean); | 3514 | num_bytes, is_data, 0, &must_clean); |
3417 | if (ret > 0) | 3515 | if (ret > 0) |
3418 | mark_free = 1; | 3516 | mark_free = 1; |
3419 | BUG_ON(ret < 0); | 3517 | BUG_ON(ret < 0); |
@@ -3544,8 +3642,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
3544 | if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { | 3642 | if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { |
3545 | WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); | 3643 | WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); |
3546 | /* unlocks the pinned mutex */ | 3644 | /* unlocks the pinned mutex */ |
3547 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); | 3645 | btrfs_pin_extent(root, bytenr, num_bytes, 1); |
3548 | update_reserved_extents(root, bytenr, num_bytes, 0); | ||
3549 | ret = 0; | 3646 | ret = 0; |
3550 | } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { | 3647 | } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { |
3551 | ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, | 3648 | ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, |
@@ -3585,19 +3682,33 @@ static noinline int | |||
3585 | wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, | 3682 | wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, |
3586 | u64 num_bytes) | 3683 | u64 num_bytes) |
3587 | { | 3684 | { |
3685 | struct btrfs_caching_control *caching_ctl; | ||
3588 | DEFINE_WAIT(wait); | 3686 | DEFINE_WAIT(wait); |
3589 | 3687 | ||
3590 | prepare_to_wait(&cache->caching_q, &wait, TASK_UNINTERRUPTIBLE); | 3688 | caching_ctl = get_caching_control(cache); |
3591 | 3689 | if (!caching_ctl) | |
3592 | if (block_group_cache_done(cache)) { | ||
3593 | finish_wait(&cache->caching_q, &wait); | ||
3594 | return 0; | 3690 | return 0; |
3595 | } | ||
3596 | schedule(); | ||
3597 | finish_wait(&cache->caching_q, &wait); | ||
3598 | 3691 | ||
3599 | wait_event(cache->caching_q, block_group_cache_done(cache) || | 3692 | wait_event(caching_ctl->wait, block_group_cache_done(cache) || |
3600 | (cache->free_space >= num_bytes)); | 3693 | (cache->free_space >= num_bytes)); |
3694 | |||
3695 | put_caching_control(caching_ctl); | ||
3696 | return 0; | ||
3697 | } | ||
3698 | |||
3699 | static noinline int | ||
3700 | wait_block_group_cache_done(struct btrfs_block_group_cache *cache) | ||
3701 | { | ||
3702 | struct btrfs_caching_control *caching_ctl; | ||
3703 | DEFINE_WAIT(wait); | ||
3704 | |||
3705 | caching_ctl = get_caching_control(cache); | ||
3706 | if (!caching_ctl) | ||
3707 | return 0; | ||
3708 | |||
3709 | wait_event(caching_ctl->wait, block_group_cache_done(cache)); | ||
3710 | |||
3711 | put_caching_control(caching_ctl); | ||
3601 | return 0; | 3712 | return 0; |
3602 | } | 3713 | } |
3603 | 3714 | ||
@@ -3635,6 +3746,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
3635 | int last_ptr_loop = 0; | 3746 | int last_ptr_loop = 0; |
3636 | int loop = 0; | 3747 | int loop = 0; |
3637 | bool found_uncached_bg = false; | 3748 | bool found_uncached_bg = false; |
3749 | bool failed_cluster_refill = false; | ||
3638 | 3750 | ||
3639 | WARN_ON(num_bytes < root->sectorsize); | 3751 | WARN_ON(num_bytes < root->sectorsize); |
3640 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); | 3752 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); |
@@ -3732,7 +3844,16 @@ have_block_group: | |||
3732 | if (unlikely(block_group->ro)) | 3844 | if (unlikely(block_group->ro)) |
3733 | goto loop; | 3845 | goto loop; |
3734 | 3846 | ||
3735 | if (last_ptr) { | 3847 | /* |
3848 | * Ok we want to try and use the cluster allocator, so lets look | ||
3849 | * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will | ||
3850 | * have tried the cluster allocator plenty of times at this | ||
3851 | * point and not have found anything, so we are likely way too | ||
3852 | * fragmented for the clustering stuff to find anything, so lets | ||
3853 | * just skip it and let the allocator find whatever block it can | ||
3854 | * find | ||
3855 | */ | ||
3856 | if (last_ptr && loop < LOOP_NO_EMPTY_SIZE) { | ||
3736 | /* | 3857 | /* |
3737 | * the refill lock keeps out other | 3858 | * the refill lock keeps out other |
3738 | * people trying to start a new cluster | 3859 | * people trying to start a new cluster |
@@ -3807,9 +3928,11 @@ refill_cluster: | |||
3807 | spin_unlock(&last_ptr->refill_lock); | 3928 | spin_unlock(&last_ptr->refill_lock); |
3808 | goto checks; | 3929 | goto checks; |
3809 | } | 3930 | } |
3810 | } else if (!cached && loop > LOOP_CACHING_NOWAIT) { | 3931 | } else if (!cached && loop > LOOP_CACHING_NOWAIT |
3932 | && !failed_cluster_refill) { | ||
3811 | spin_unlock(&last_ptr->refill_lock); | 3933 | spin_unlock(&last_ptr->refill_lock); |
3812 | 3934 | ||
3935 | failed_cluster_refill = true; | ||
3813 | wait_block_group_cache_progress(block_group, | 3936 | wait_block_group_cache_progress(block_group, |
3814 | num_bytes + empty_cluster + empty_size); | 3937 | num_bytes + empty_cluster + empty_size); |
3815 | goto have_block_group; | 3938 | goto have_block_group; |
@@ -3821,13 +3944,9 @@ refill_cluster: | |||
3821 | * cluster. Free the cluster we've been trying | 3944 | * cluster. Free the cluster we've been trying |
3822 | * to use, and go to the next block group | 3945 | * to use, and go to the next block group |
3823 | */ | 3946 | */ |
3824 | if (loop < LOOP_NO_EMPTY_SIZE) { | 3947 | btrfs_return_cluster_to_free_space(NULL, last_ptr); |
3825 | btrfs_return_cluster_to_free_space(NULL, | ||
3826 | last_ptr); | ||
3827 | spin_unlock(&last_ptr->refill_lock); | ||
3828 | goto loop; | ||
3829 | } | ||
3830 | spin_unlock(&last_ptr->refill_lock); | 3948 | spin_unlock(&last_ptr->refill_lock); |
3949 | goto loop; | ||
3831 | } | 3950 | } |
3832 | 3951 | ||
3833 | offset = btrfs_find_space_for_alloc(block_group, search_start, | 3952 | offset = btrfs_find_space_for_alloc(block_group, search_start, |
@@ -3881,9 +4000,12 @@ checks: | |||
3881 | search_start - offset); | 4000 | search_start - offset); |
3882 | BUG_ON(offset > search_start); | 4001 | BUG_ON(offset > search_start); |
3883 | 4002 | ||
4003 | update_reserved_extents(block_group, num_bytes, 1); | ||
4004 | |||
3884 | /* we are all good, lets return */ | 4005 | /* we are all good, lets return */ |
3885 | break; | 4006 | break; |
3886 | loop: | 4007 | loop: |
4008 | failed_cluster_refill = false; | ||
3887 | btrfs_put_block_group(block_group); | 4009 | btrfs_put_block_group(block_group); |
3888 | } | 4010 | } |
3889 | up_read(&space_info->groups_sem); | 4011 | up_read(&space_info->groups_sem); |
@@ -3973,12 +4095,12 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes) | |||
3973 | up_read(&info->groups_sem); | 4095 | up_read(&info->groups_sem); |
3974 | } | 4096 | } |
3975 | 4097 | ||
3976 | static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, | 4098 | int btrfs_reserve_extent(struct btrfs_trans_handle *trans, |
3977 | struct btrfs_root *root, | 4099 | struct btrfs_root *root, |
3978 | u64 num_bytes, u64 min_alloc_size, | 4100 | u64 num_bytes, u64 min_alloc_size, |
3979 | u64 empty_size, u64 hint_byte, | 4101 | u64 empty_size, u64 hint_byte, |
3980 | u64 search_end, struct btrfs_key *ins, | 4102 | u64 search_end, struct btrfs_key *ins, |
3981 | u64 data) | 4103 | u64 data) |
3982 | { | 4104 | { |
3983 | int ret; | 4105 | int ret; |
3984 | u64 search_start = 0; | 4106 | u64 search_start = 0; |
@@ -4044,25 +4166,8 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) | |||
4044 | ret = btrfs_discard_extent(root, start, len); | 4166 | ret = btrfs_discard_extent(root, start, len); |
4045 | 4167 | ||
4046 | btrfs_add_free_space(cache, start, len); | 4168 | btrfs_add_free_space(cache, start, len); |
4169 | update_reserved_extents(cache, len, 0); | ||
4047 | btrfs_put_block_group(cache); | 4170 | btrfs_put_block_group(cache); |
4048 | update_reserved_extents(root, start, len, 0); | ||
4049 | |||
4050 | return ret; | ||
4051 | } | ||
4052 | |||
4053 | int btrfs_reserve_extent(struct btrfs_trans_handle *trans, | ||
4054 | struct btrfs_root *root, | ||
4055 | u64 num_bytes, u64 min_alloc_size, | ||
4056 | u64 empty_size, u64 hint_byte, | ||
4057 | u64 search_end, struct btrfs_key *ins, | ||
4058 | u64 data) | ||
4059 | { | ||
4060 | int ret; | ||
4061 | ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, | ||
4062 | empty_size, hint_byte, search_end, ins, | ||
4063 | data); | ||
4064 | if (!ret) | ||
4065 | update_reserved_extents(root, ins->objectid, ins->offset, 1); | ||
4066 | 4171 | ||
4067 | return ret; | 4172 | return ret; |
4068 | } | 4173 | } |
@@ -4223,15 +4328,46 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
4223 | { | 4328 | { |
4224 | int ret; | 4329 | int ret; |
4225 | struct btrfs_block_group_cache *block_group; | 4330 | struct btrfs_block_group_cache *block_group; |
4331 | struct btrfs_caching_control *caching_ctl; | ||
4332 | u64 start = ins->objectid; | ||
4333 | u64 num_bytes = ins->offset; | ||
4226 | 4334 | ||
4227 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); | 4335 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); |
4228 | cache_block_group(block_group); | 4336 | cache_block_group(block_group); |
4229 | wait_event(block_group->caching_q, | 4337 | caching_ctl = get_caching_control(block_group); |
4230 | block_group_cache_done(block_group)); | ||
4231 | 4338 | ||
4232 | ret = btrfs_remove_free_space(block_group, ins->objectid, | 4339 | if (!caching_ctl) { |
4233 | ins->offset); | 4340 | BUG_ON(!block_group_cache_done(block_group)); |
4234 | BUG_ON(ret); | 4341 | ret = btrfs_remove_free_space(block_group, start, num_bytes); |
4342 | BUG_ON(ret); | ||
4343 | } else { | ||
4344 | mutex_lock(&caching_ctl->mutex); | ||
4345 | |||
4346 | if (start >= caching_ctl->progress) { | ||
4347 | ret = add_excluded_extent(root, start, num_bytes); | ||
4348 | BUG_ON(ret); | ||
4349 | } else if (start + num_bytes <= caching_ctl->progress) { | ||
4350 | ret = btrfs_remove_free_space(block_group, | ||
4351 | start, num_bytes); | ||
4352 | BUG_ON(ret); | ||
4353 | } else { | ||
4354 | num_bytes = caching_ctl->progress - start; | ||
4355 | ret = btrfs_remove_free_space(block_group, | ||
4356 | start, num_bytes); | ||
4357 | BUG_ON(ret); | ||
4358 | |||
4359 | start = caching_ctl->progress; | ||
4360 | num_bytes = ins->objectid + ins->offset - | ||
4361 | caching_ctl->progress; | ||
4362 | ret = add_excluded_extent(root, start, num_bytes); | ||
4363 | BUG_ON(ret); | ||
4364 | } | ||
4365 | |||
4366 | mutex_unlock(&caching_ctl->mutex); | ||
4367 | put_caching_control(caching_ctl); | ||
4368 | } | ||
4369 | |||
4370 | update_reserved_extents(block_group, ins->offset, 1); | ||
4235 | btrfs_put_block_group(block_group); | 4371 | btrfs_put_block_group(block_group); |
4236 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, | 4372 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, |
4237 | 0, owner, offset, ins, 1); | 4373 | 0, owner, offset, ins, 1); |
@@ -4255,9 +4391,9 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans, | |||
4255 | int ret; | 4391 | int ret; |
4256 | u64 flags = 0; | 4392 | u64 flags = 0; |
4257 | 4393 | ||
4258 | ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes, | 4394 | ret = btrfs_reserve_extent(trans, root, num_bytes, num_bytes, |
4259 | empty_size, hint_byte, search_end, | 4395 | empty_size, hint_byte, search_end, |
4260 | ins, 0); | 4396 | ins, 0); |
4261 | if (ret) | 4397 | if (ret) |
4262 | return ret; | 4398 | return ret; |
4263 | 4399 | ||
@@ -4268,7 +4404,6 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans, | |||
4268 | } else | 4404 | } else |
4269 | BUG_ON(parent > 0); | 4405 | BUG_ON(parent > 0); |
4270 | 4406 | ||
4271 | update_reserved_extents(root, ins->objectid, ins->offset, 1); | ||
4272 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { | 4407 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { |
4273 | struct btrfs_delayed_extent_op *extent_op; | 4408 | struct btrfs_delayed_extent_op *extent_op; |
4274 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | 4409 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); |
@@ -4347,452 +4482,99 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
4347 | return buf; | 4482 | return buf; |
4348 | } | 4483 | } |
4349 | 4484 | ||
4350 | #if 0 | 4485 | struct walk_control { |
4351 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | 4486 | u64 refs[BTRFS_MAX_LEVEL]; |
4352 | struct btrfs_root *root, struct extent_buffer *leaf) | 4487 | u64 flags[BTRFS_MAX_LEVEL]; |
4353 | { | 4488 | struct btrfs_key update_progress; |
4354 | u64 disk_bytenr; | 4489 | int stage; |
4355 | u64 num_bytes; | 4490 | int level; |
4356 | struct btrfs_key key; | 4491 | int shared_level; |
4357 | struct btrfs_file_extent_item *fi; | 4492 | int update_ref; |
4358 | u32 nritems; | 4493 | int keep_locks; |
4359 | int i; | 4494 | int reada_slot; |
4360 | int ret; | 4495 | int reada_count; |
4361 | 4496 | }; | |
4362 | BUG_ON(!btrfs_is_leaf(leaf)); | ||
4363 | nritems = btrfs_header_nritems(leaf); | ||
4364 | |||
4365 | for (i = 0; i < nritems; i++) { | ||
4366 | cond_resched(); | ||
4367 | btrfs_item_key_to_cpu(leaf, &key, i); | ||
4368 | |||
4369 | /* only extents have references, skip everything else */ | ||
4370 | if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) | ||
4371 | continue; | ||
4372 | |||
4373 | fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); | ||
4374 | |||
4375 | /* inline extents live in the btree, they don't have refs */ | ||
4376 | if (btrfs_file_extent_type(leaf, fi) == | ||
4377 | BTRFS_FILE_EXTENT_INLINE) | ||
4378 | continue; | ||
4379 | |||
4380 | disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
4381 | |||
4382 | /* holes don't have refs */ | ||
4383 | if (disk_bytenr == 0) | ||
4384 | continue; | ||
4385 | |||
4386 | num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); | ||
4387 | ret = btrfs_free_extent(trans, root, disk_bytenr, num_bytes, | ||
4388 | leaf->start, 0, key.objectid, 0); | ||
4389 | BUG_ON(ret); | ||
4390 | } | ||
4391 | return 0; | ||
4392 | } | ||
4393 | |||
4394 | static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, | ||
4395 | struct btrfs_root *root, | ||
4396 | struct btrfs_leaf_ref *ref) | ||
4397 | { | ||
4398 | int i; | ||
4399 | int ret; | ||
4400 | struct btrfs_extent_info *info; | ||
4401 | struct refsort *sorted; | ||
4402 | |||
4403 | if (ref->nritems == 0) | ||
4404 | return 0; | ||
4405 | |||
4406 | sorted = kmalloc(sizeof(*sorted) * ref->nritems, GFP_NOFS); | ||
4407 | for (i = 0; i < ref->nritems; i++) { | ||
4408 | sorted[i].bytenr = ref->extents[i].bytenr; | ||
4409 | sorted[i].slot = i; | ||
4410 | } | ||
4411 | sort(sorted, ref->nritems, sizeof(struct refsort), refsort_cmp, NULL); | ||
4412 | |||
4413 | /* | ||
4414 | * the items in the ref were sorted when the ref was inserted | ||
4415 | * into the ref cache, so this is already in order | ||
4416 | */ | ||
4417 | for (i = 0; i < ref->nritems; i++) { | ||
4418 | info = ref->extents + sorted[i].slot; | ||
4419 | ret = btrfs_free_extent(trans, root, info->bytenr, | ||
4420 | info->num_bytes, ref->bytenr, | ||
4421 | ref->owner, ref->generation, | ||
4422 | info->objectid, 0); | ||
4423 | |||
4424 | atomic_inc(&root->fs_info->throttle_gen); | ||
4425 | wake_up(&root->fs_info->transaction_throttle); | ||
4426 | cond_resched(); | ||
4427 | |||
4428 | BUG_ON(ret); | ||
4429 | info++; | ||
4430 | } | ||
4431 | |||
4432 | kfree(sorted); | ||
4433 | return 0; | ||
4434 | } | ||
4435 | |||
4436 | |||
4437 | static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans, | ||
4438 | struct btrfs_root *root, u64 start, | ||
4439 | u64 len, u32 *refs) | ||
4440 | { | ||
4441 | int ret; | ||
4442 | |||
4443 | ret = btrfs_lookup_extent_refs(trans, root, start, len, refs); | ||
4444 | BUG_ON(ret); | ||
4445 | |||
4446 | #if 0 /* some debugging code in case we see problems here */ | ||
4447 | /* if the refs count is one, it won't get increased again. But | ||
4448 | * if the ref count is > 1, someone may be decreasing it at | ||
4449 | * the same time we are. | ||
4450 | */ | ||
4451 | if (*refs != 1) { | ||
4452 | struct extent_buffer *eb = NULL; | ||
4453 | eb = btrfs_find_create_tree_block(root, start, len); | ||
4454 | if (eb) | ||
4455 | btrfs_tree_lock(eb); | ||
4456 | |||
4457 | mutex_lock(&root->fs_info->alloc_mutex); | ||
4458 | ret = lookup_extent_ref(NULL, root, start, len, refs); | ||
4459 | BUG_ON(ret); | ||
4460 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
4461 | |||
4462 | if (eb) { | ||
4463 | btrfs_tree_unlock(eb); | ||
4464 | free_extent_buffer(eb); | ||
4465 | } | ||
4466 | if (*refs == 1) { | ||
4467 | printk(KERN_ERR "btrfs block %llu went down to one " | ||
4468 | "during drop_snap\n", (unsigned long long)start); | ||
4469 | } | ||
4470 | |||
4471 | } | ||
4472 | #endif | ||
4473 | |||
4474 | cond_resched(); | ||
4475 | return ret; | ||
4476 | } | ||
4477 | 4497 | ||
4498 | #define DROP_REFERENCE 1 | ||
4499 | #define UPDATE_BACKREF 2 | ||
4478 | 4500 | ||
4479 | /* | 4501 | static noinline void reada_walk_down(struct btrfs_trans_handle *trans, |
4480 | * this is used while deleting old snapshots, and it drops the refs | 4502 | struct btrfs_root *root, |
4481 | * on a whole subtree starting from a level 1 node. | 4503 | struct walk_control *wc, |
4482 | * | 4504 | struct btrfs_path *path) |
4483 | * The idea is to sort all the leaf pointers, and then drop the | ||
4484 | * ref on all the leaves in order. Most of the time the leaves | ||
4485 | * will have ref cache entries, so no leaf IOs will be required to | ||
4486 | * find the extents they have references on. | ||
4487 | * | ||
4488 | * For each leaf, any references it has are also dropped in order | ||
4489 | * | ||
4490 | * This ends up dropping the references in something close to optimal | ||
4491 | * order for reading and modifying the extent allocation tree. | ||
4492 | */ | ||
4493 | static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans, | ||
4494 | struct btrfs_root *root, | ||
4495 | struct btrfs_path *path) | ||
4496 | { | 4505 | { |
4497 | u64 bytenr; | 4506 | u64 bytenr; |
4498 | u64 root_owner; | 4507 | u64 generation; |
4499 | u64 root_gen; | 4508 | u64 refs; |
4500 | struct extent_buffer *eb = path->nodes[1]; | 4509 | u64 last = 0; |
4501 | struct extent_buffer *leaf; | 4510 | u32 nritems; |
4502 | struct btrfs_leaf_ref *ref; | 4511 | u32 blocksize; |
4503 | struct refsort *sorted = NULL; | 4512 | struct btrfs_key key; |
4504 | int nritems = btrfs_header_nritems(eb); | 4513 | struct extent_buffer *eb; |
4505 | int ret; | 4514 | int ret; |
4506 | int i; | 4515 | int slot; |
4507 | int refi = 0; | 4516 | int nread = 0; |
4508 | int slot = path->slots[1]; | ||
4509 | u32 blocksize = btrfs_level_size(root, 0); | ||
4510 | u32 refs; | ||
4511 | |||
4512 | if (nritems == 0) | ||
4513 | goto out; | ||
4514 | |||
4515 | root_owner = btrfs_header_owner(eb); | ||
4516 | root_gen = btrfs_header_generation(eb); | ||
4517 | sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS); | ||
4518 | 4517 | ||
4519 | /* | 4518 | if (path->slots[wc->level] < wc->reada_slot) { |
4520 | * step one, sort all the leaf pointers so we don't scribble | 4519 | wc->reada_count = wc->reada_count * 2 / 3; |
4521 | * randomly into the extent allocation tree | 4520 | wc->reada_count = max(wc->reada_count, 2); |
4522 | */ | 4521 | } else { |
4523 | for (i = slot; i < nritems; i++) { | 4522 | wc->reada_count = wc->reada_count * 3 / 2; |
4524 | sorted[refi].bytenr = btrfs_node_blockptr(eb, i); | 4523 | wc->reada_count = min_t(int, wc->reada_count, |
4525 | sorted[refi].slot = i; | 4524 | BTRFS_NODEPTRS_PER_BLOCK(root)); |
4526 | refi++; | ||
4527 | } | 4525 | } |
4528 | 4526 | ||
4529 | /* | 4527 | eb = path->nodes[wc->level]; |
4530 | * nritems won't be zero, but if we're picking up drop_snapshot | 4528 | nritems = btrfs_header_nritems(eb); |
4531 | * after a crash, slot might be > 0, so double check things | 4529 | blocksize = btrfs_level_size(root, wc->level - 1); |
4532 | * just in case. | ||
4533 | */ | ||
4534 | if (refi == 0) | ||
4535 | goto out; | ||
4536 | 4530 | ||
4537 | sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); | 4531 | for (slot = path->slots[wc->level]; slot < nritems; slot++) { |
4532 | if (nread >= wc->reada_count) | ||
4533 | break; | ||
4538 | 4534 | ||
4539 | /* | 4535 | cond_resched(); |
4540 | * the first loop frees everything the leaves point to | 4536 | bytenr = btrfs_node_blockptr(eb, slot); |
4541 | */ | 4537 | generation = btrfs_node_ptr_generation(eb, slot); |
4542 | for (i = 0; i < refi; i++) { | ||
4543 | u64 ptr_gen; | ||
4544 | 4538 | ||
4545 | bytenr = sorted[i].bytenr; | 4539 | if (slot == path->slots[wc->level]) |
4540 | goto reada; | ||
4546 | 4541 | ||
4547 | /* | 4542 | if (wc->stage == UPDATE_BACKREF && |
4548 | * check the reference count on this leaf. If it is > 1 | 4543 | generation <= root->root_key.offset) |
4549 | * we just decrement it below and don't update any | ||
4550 | * of the refs the leaf points to. | ||
4551 | */ | ||
4552 | ret = drop_snap_lookup_refcount(trans, root, bytenr, | ||
4553 | blocksize, &refs); | ||
4554 | BUG_ON(ret); | ||
4555 | if (refs != 1) | ||
4556 | continue; | 4544 | continue; |
4557 | 4545 | ||
4558 | ptr_gen = btrfs_node_ptr_generation(eb, sorted[i].slot); | 4546 | if (wc->stage == DROP_REFERENCE) { |
4559 | 4547 | ret = btrfs_lookup_extent_info(trans, root, | |
4560 | /* | 4548 | bytenr, blocksize, |
4561 | * the leaf only had one reference, which means the | 4549 | &refs, NULL); |
4562 | * only thing pointing to this leaf is the snapshot | ||
4563 | * we're deleting. It isn't possible for the reference | ||
4564 | * count to increase again later | ||
4565 | * | ||
4566 | * The reference cache is checked for the leaf, | ||
4567 | * and if found we'll be able to drop any refs held by | ||
4568 | * the leaf without needing to read it in. | ||
4569 | */ | ||
4570 | ref = btrfs_lookup_leaf_ref(root, bytenr); | ||
4571 | if (ref && ref->generation != ptr_gen) { | ||
4572 | btrfs_free_leaf_ref(root, ref); | ||
4573 | ref = NULL; | ||
4574 | } | ||
4575 | if (ref) { | ||
4576 | ret = cache_drop_leaf_ref(trans, root, ref); | ||
4577 | BUG_ON(ret); | ||
4578 | btrfs_remove_leaf_ref(root, ref); | ||
4579 | btrfs_free_leaf_ref(root, ref); | ||
4580 | } else { | ||
4581 | /* | ||
4582 | * the leaf wasn't in the reference cache, so | ||
4583 | * we have to read it. | ||
4584 | */ | ||
4585 | leaf = read_tree_block(root, bytenr, blocksize, | ||
4586 | ptr_gen); | ||
4587 | ret = btrfs_drop_leaf_ref(trans, root, leaf); | ||
4588 | BUG_ON(ret); | 4550 | BUG_ON(ret); |
4589 | free_extent_buffer(leaf); | 4551 | BUG_ON(refs == 0); |
4590 | } | 4552 | if (refs == 1) |
4591 | atomic_inc(&root->fs_info->throttle_gen); | 4553 | goto reada; |
4592 | wake_up(&root->fs_info->transaction_throttle); | ||
4593 | cond_resched(); | ||
4594 | } | ||
4595 | |||
4596 | /* | ||
4597 | * run through the loop again to free the refs on the leaves. | ||
4598 | * This is faster than doing it in the loop above because | ||
4599 | * the leaves are likely to be clustered together. We end up | ||
4600 | * working in nice chunks on the extent allocation tree. | ||
4601 | */ | ||
4602 | for (i = 0; i < refi; i++) { | ||
4603 | bytenr = sorted[i].bytenr; | ||
4604 | ret = btrfs_free_extent(trans, root, bytenr, | ||
4605 | blocksize, eb->start, | ||
4606 | root_owner, root_gen, 0, 1); | ||
4607 | BUG_ON(ret); | ||
4608 | |||
4609 | atomic_inc(&root->fs_info->throttle_gen); | ||
4610 | wake_up(&root->fs_info->transaction_throttle); | ||
4611 | cond_resched(); | ||
4612 | } | ||
4613 | out: | ||
4614 | kfree(sorted); | ||
4615 | |||
4616 | /* | ||
4617 | * update the path to show we've processed the entire level 1 | ||
4618 | * node. This will get saved into the root's drop_snapshot_progress | ||
4619 | * field so these drops are not repeated again if this transaction | ||
4620 | * commits. | ||
4621 | */ | ||
4622 | path->slots[1] = nritems; | ||
4623 | return 0; | ||
4624 | } | ||
4625 | |||
4626 | /* | ||
4627 | * helper function for drop_snapshot, this walks down the tree dropping ref | ||
4628 | * counts as it goes. | ||
4629 | */ | ||
4630 | static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | ||
4631 | struct btrfs_root *root, | ||
4632 | struct btrfs_path *path, int *level) | ||
4633 | { | ||
4634 | u64 root_owner; | ||
4635 | u64 root_gen; | ||
4636 | u64 bytenr; | ||
4637 | u64 ptr_gen; | ||
4638 | struct extent_buffer *next; | ||
4639 | struct extent_buffer *cur; | ||
4640 | struct extent_buffer *parent; | ||
4641 | u32 blocksize; | ||
4642 | int ret; | ||
4643 | u32 refs; | ||
4644 | |||
4645 | WARN_ON(*level < 0); | ||
4646 | WARN_ON(*level >= BTRFS_MAX_LEVEL); | ||
4647 | ret = drop_snap_lookup_refcount(trans, root, path->nodes[*level]->start, | ||
4648 | path->nodes[*level]->len, &refs); | ||
4649 | BUG_ON(ret); | ||
4650 | if (refs > 1) | ||
4651 | goto out; | ||
4652 | |||
4653 | /* | ||
4654 | * walk down to the last node level and free all the leaves | ||
4655 | */ | ||
4656 | while (*level >= 0) { | ||
4657 | WARN_ON(*level < 0); | ||
4658 | WARN_ON(*level >= BTRFS_MAX_LEVEL); | ||
4659 | cur = path->nodes[*level]; | ||
4660 | |||
4661 | if (btrfs_header_level(cur) != *level) | ||
4662 | WARN_ON(1); | ||
4663 | 4554 | ||
4664 | if (path->slots[*level] >= | 4555 | if (!wc->update_ref || |
4665 | btrfs_header_nritems(cur)) | 4556 | generation <= root->root_key.offset) |
4666 | break; | 4557 | continue; |
4667 | 4558 | btrfs_node_key_to_cpu(eb, &key, slot); | |
4668 | /* the new code goes down to level 1 and does all the | 4559 | ret = btrfs_comp_cpu_keys(&key, |
4669 | * leaves pointed to that node in bulk. So, this check | 4560 | &wc->update_progress); |
4670 | * for level 0 will always be false. | 4561 | if (ret < 0) |
4671 | * | 4562 | continue; |
4672 | * But, the disk format allows the drop_snapshot_progress | ||
4673 | * field in the root to leave things in a state where | ||
4674 | * a leaf will need cleaning up here. If someone crashes | ||
4675 | * with the old code and then boots with the new code, | ||
4676 | * we might find a leaf here. | ||
4677 | */ | ||
4678 | if (*level == 0) { | ||
4679 | ret = btrfs_drop_leaf_ref(trans, root, cur); | ||
4680 | BUG_ON(ret); | ||
4681 | break; | ||
4682 | } | 4563 | } |
4683 | 4564 | reada: | |
4684 | /* | 4565 | ret = readahead_tree_block(root, bytenr, blocksize, |
4685 | * once we get to level one, process the whole node | 4566 | generation); |
4686 | * at once, including everything below it. | 4567 | if (ret) |
4687 | */ | ||
4688 | if (*level == 1) { | ||
4689 | ret = drop_level_one_refs(trans, root, path); | ||
4690 | BUG_ON(ret); | ||
4691 | break; | 4568 | break; |
4692 | } | 4569 | last = bytenr + blocksize; |
4693 | 4570 | nread++; | |
4694 | bytenr = btrfs_node_blockptr(cur, path->slots[*level]); | ||
4695 | ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); | ||
4696 | blocksize = btrfs_level_size(root, *level - 1); | ||
4697 | |||
4698 | ret = drop_snap_lookup_refcount(trans, root, bytenr, | ||
4699 | blocksize, &refs); | ||
4700 | BUG_ON(ret); | ||
4701 | |||
4702 | /* | ||
4703 | * if there is more than one reference, we don't need | ||
4704 | * to read that node to drop any references it has. We | ||
4705 | * just drop the ref we hold on that node and move on to the | ||
4706 | * next slot in this level. | ||
4707 | */ | ||
4708 | if (refs != 1) { | ||
4709 | parent = path->nodes[*level]; | ||
4710 | root_owner = btrfs_header_owner(parent); | ||
4711 | root_gen = btrfs_header_generation(parent); | ||
4712 | path->slots[*level]++; | ||
4713 | |||
4714 | ret = btrfs_free_extent(trans, root, bytenr, | ||
4715 | blocksize, parent->start, | ||
4716 | root_owner, root_gen, | ||
4717 | *level - 1, 1); | ||
4718 | BUG_ON(ret); | ||
4719 | |||
4720 | atomic_inc(&root->fs_info->throttle_gen); | ||
4721 | wake_up(&root->fs_info->transaction_throttle); | ||
4722 | cond_resched(); | ||
4723 | |||
4724 | continue; | ||
4725 | } | ||
4726 | |||
4727 | /* | ||
4728 | * we need to keep freeing things in the next level down. | ||
4729 | * read the block and loop around to process it | ||
4730 | */ | ||
4731 | next = read_tree_block(root, bytenr, blocksize, ptr_gen); | ||
4732 | WARN_ON(*level <= 0); | ||
4733 | if (path->nodes[*level-1]) | ||
4734 | free_extent_buffer(path->nodes[*level-1]); | ||
4735 | path->nodes[*level-1] = next; | ||
4736 | *level = btrfs_header_level(next); | ||
4737 | path->slots[*level] = 0; | ||
4738 | cond_resched(); | ||
4739 | } | 4571 | } |
4740 | out: | 4572 | wc->reada_slot = slot; |
4741 | WARN_ON(*level < 0); | ||
4742 | WARN_ON(*level >= BTRFS_MAX_LEVEL); | ||
4743 | |||
4744 | if (path->nodes[*level] == root->node) { | ||
4745 | parent = path->nodes[*level]; | ||
4746 | bytenr = path->nodes[*level]->start; | ||
4747 | } else { | ||
4748 | parent = path->nodes[*level + 1]; | ||
4749 | bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]); | ||
4750 | } | ||
4751 | |||
4752 | blocksize = btrfs_level_size(root, *level); | ||
4753 | root_owner = btrfs_header_owner(parent); | ||
4754 | root_gen = btrfs_header_generation(parent); | ||
4755 | |||
4756 | /* | ||
4757 | * cleanup and free the reference on the last node | ||
4758 | * we processed | ||
4759 | */ | ||
4760 | ret = btrfs_free_extent(trans, root, bytenr, blocksize, | ||
4761 | parent->start, root_owner, root_gen, | ||
4762 | *level, 1); | ||
4763 | free_extent_buffer(path->nodes[*level]); | ||
4764 | path->nodes[*level] = NULL; | ||
4765 | |||
4766 | *level += 1; | ||
4767 | BUG_ON(ret); | ||
4768 | |||
4769 | cond_resched(); | ||
4770 | return 0; | ||
4771 | } | 4573 | } |
4772 | #endif | ||
4773 | |||
4774 | struct walk_control { | ||
4775 | u64 refs[BTRFS_MAX_LEVEL]; | ||
4776 | u64 flags[BTRFS_MAX_LEVEL]; | ||
4777 | struct btrfs_key update_progress; | ||
4778 | int stage; | ||
4779 | int level; | ||
4780 | int shared_level; | ||
4781 | int update_ref; | ||
4782 | int keep_locks; | ||
4783 | }; | ||
4784 | |||
4785 | #define DROP_REFERENCE 1 | ||
4786 | #define UPDATE_BACKREF 2 | ||
4787 | 4574 | ||
4788 | /* | 4575 | /* |
4789 | * hepler to process tree block while walking down the tree. | 4576 | * hepler to process tree block while walking down the tree. |
4790 | * | 4577 | * |
4791 | * when wc->stage == DROP_REFERENCE, this function checks | ||
4792 | * reference count of the block. if the block is shared and | ||
4793 | * we need update back refs for the subtree rooted at the | ||
4794 | * block, this function changes wc->stage to UPDATE_BACKREF | ||
4795 | * | ||
4796 | * when wc->stage == UPDATE_BACKREF, this function updates | 4578 | * when wc->stage == UPDATE_BACKREF, this function updates |
4797 | * back refs for pointers in the block. | 4579 | * back refs for pointers in the block. |
4798 | * | 4580 | * |
@@ -4805,7 +4587,6 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
4805 | { | 4587 | { |
4806 | int level = wc->level; | 4588 | int level = wc->level; |
4807 | struct extent_buffer *eb = path->nodes[level]; | 4589 | struct extent_buffer *eb = path->nodes[level]; |
4808 | struct btrfs_key key; | ||
4809 | u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; | 4590 | u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; |
4810 | int ret; | 4591 | int ret; |
4811 | 4592 | ||
@@ -4828,21 +4609,6 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
4828 | BUG_ON(wc->refs[level] == 0); | 4609 | BUG_ON(wc->refs[level] == 0); |
4829 | } | 4610 | } |
4830 | 4611 | ||
4831 | if (wc->stage == DROP_REFERENCE && | ||
4832 | wc->update_ref && wc->refs[level] > 1) { | ||
4833 | BUG_ON(eb == root->node); | ||
4834 | BUG_ON(path->slots[level] > 0); | ||
4835 | if (level == 0) | ||
4836 | btrfs_item_key_to_cpu(eb, &key, path->slots[level]); | ||
4837 | else | ||
4838 | btrfs_node_key_to_cpu(eb, &key, path->slots[level]); | ||
4839 | if (btrfs_header_owner(eb) == root->root_key.objectid && | ||
4840 | btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) { | ||
4841 | wc->stage = UPDATE_BACKREF; | ||
4842 | wc->shared_level = level; | ||
4843 | } | ||
4844 | } | ||
4845 | |||
4846 | if (wc->stage == DROP_REFERENCE) { | 4612 | if (wc->stage == DROP_REFERENCE) { |
4847 | if (wc->refs[level] > 1) | 4613 | if (wc->refs[level] > 1) |
4848 | return 1; | 4614 | return 1; |
@@ -4879,6 +4645,123 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
4879 | } | 4645 | } |
4880 | 4646 | ||
4881 | /* | 4647 | /* |
4648 | * hepler to process tree block pointer. | ||
4649 | * | ||
4650 | * when wc->stage == DROP_REFERENCE, this function checks | ||
4651 | * reference count of the block pointed to. if the block | ||
4652 | * is shared and we need update back refs for the subtree | ||
4653 | * rooted at the block, this function changes wc->stage to | ||
4654 | * UPDATE_BACKREF. if the block is shared and there is no | ||
4655 | * need to update back, this function drops the reference | ||
4656 | * to the block. | ||
4657 | * | ||
4658 | * NOTE: return value 1 means we should stop walking down. | ||
4659 | */ | ||
4660 | static noinline int do_walk_down(struct btrfs_trans_handle *trans, | ||
4661 | struct btrfs_root *root, | ||
4662 | struct btrfs_path *path, | ||
4663 | struct walk_control *wc) | ||
4664 | { | ||
4665 | u64 bytenr; | ||
4666 | u64 generation; | ||
4667 | u64 parent; | ||
4668 | u32 blocksize; | ||
4669 | struct btrfs_key key; | ||
4670 | struct extent_buffer *next; | ||
4671 | int level = wc->level; | ||
4672 | int reada = 0; | ||
4673 | int ret = 0; | ||
4674 | |||
4675 | generation = btrfs_node_ptr_generation(path->nodes[level], | ||
4676 | path->slots[level]); | ||
4677 | /* | ||
4678 | * if the lower level block was created before the snapshot | ||
4679 | * was created, we know there is no need to update back refs | ||
4680 | * for the subtree | ||
4681 | */ | ||
4682 | if (wc->stage == UPDATE_BACKREF && | ||
4683 | generation <= root->root_key.offset) | ||
4684 | return 1; | ||
4685 | |||
4686 | bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); | ||
4687 | blocksize = btrfs_level_size(root, level - 1); | ||
4688 | |||
4689 | next = btrfs_find_tree_block(root, bytenr, blocksize); | ||
4690 | if (!next) { | ||
4691 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); | ||
4692 | reada = 1; | ||
4693 | } | ||
4694 | btrfs_tree_lock(next); | ||
4695 | btrfs_set_lock_blocking(next); | ||
4696 | |||
4697 | if (wc->stage == DROP_REFERENCE) { | ||
4698 | ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, | ||
4699 | &wc->refs[level - 1], | ||
4700 | &wc->flags[level - 1]); | ||
4701 | BUG_ON(ret); | ||
4702 | BUG_ON(wc->refs[level - 1] == 0); | ||
4703 | |||
4704 | if (wc->refs[level - 1] > 1) { | ||
4705 | if (!wc->update_ref || | ||
4706 | generation <= root->root_key.offset) | ||
4707 | goto skip; | ||
4708 | |||
4709 | btrfs_node_key_to_cpu(path->nodes[level], &key, | ||
4710 | path->slots[level]); | ||
4711 | ret = btrfs_comp_cpu_keys(&key, &wc->update_progress); | ||
4712 | if (ret < 0) | ||
4713 | goto skip; | ||
4714 | |||
4715 | wc->stage = UPDATE_BACKREF; | ||
4716 | wc->shared_level = level - 1; | ||
4717 | } | ||
4718 | } | ||
4719 | |||
4720 | if (!btrfs_buffer_uptodate(next, generation)) { | ||
4721 | btrfs_tree_unlock(next); | ||
4722 | free_extent_buffer(next); | ||
4723 | next = NULL; | ||
4724 | } | ||
4725 | |||
4726 | if (!next) { | ||
4727 | if (reada && level == 1) | ||
4728 | reada_walk_down(trans, root, wc, path); | ||
4729 | next = read_tree_block(root, bytenr, blocksize, generation); | ||
4730 | btrfs_tree_lock(next); | ||
4731 | btrfs_set_lock_blocking(next); | ||
4732 | } | ||
4733 | |||
4734 | level--; | ||
4735 | BUG_ON(level != btrfs_header_level(next)); | ||
4736 | path->nodes[level] = next; | ||
4737 | path->slots[level] = 0; | ||
4738 | path->locks[level] = 1; | ||
4739 | wc->level = level; | ||
4740 | if (wc->level == 1) | ||
4741 | wc->reada_slot = 0; | ||
4742 | return 0; | ||
4743 | skip: | ||
4744 | wc->refs[level - 1] = 0; | ||
4745 | wc->flags[level - 1] = 0; | ||
4746 | |||
4747 | if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) { | ||
4748 | parent = path->nodes[level]->start; | ||
4749 | } else { | ||
4750 | BUG_ON(root->root_key.objectid != | ||
4751 | btrfs_header_owner(path->nodes[level])); | ||
4752 | parent = 0; | ||
4753 | } | ||
4754 | |||
4755 | ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, | ||
4756 | root->root_key.objectid, level - 1, 0); | ||
4757 | BUG_ON(ret); | ||
4758 | |||
4759 | btrfs_tree_unlock(next); | ||
4760 | free_extent_buffer(next); | ||
4761 | return 1; | ||
4762 | } | ||
4763 | |||
4764 | /* | ||
4882 | * hepler to process tree block while walking up the tree. | 4765 | * hepler to process tree block while walking up the tree. |
4883 | * | 4766 | * |
4884 | * when wc->stage == DROP_REFERENCE, this function drops | 4767 | * when wc->stage == DROP_REFERENCE, this function drops |
@@ -4905,7 +4788,6 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
4905 | if (level < wc->shared_level) | 4788 | if (level < wc->shared_level) |
4906 | goto out; | 4789 | goto out; |
4907 | 4790 | ||
4908 | BUG_ON(wc->refs[level] <= 1); | ||
4909 | ret = find_next_key(path, level + 1, &wc->update_progress); | 4791 | ret = find_next_key(path, level + 1, &wc->update_progress); |
4910 | if (ret > 0) | 4792 | if (ret > 0) |
4911 | wc->update_ref = 0; | 4793 | wc->update_ref = 0; |
@@ -4936,8 +4818,6 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
4936 | path->locks[level] = 0; | 4818 | path->locks[level] = 0; |
4937 | return 1; | 4819 | return 1; |
4938 | } | 4820 | } |
4939 | } else { | ||
4940 | BUG_ON(level != 0); | ||
4941 | } | 4821 | } |
4942 | } | 4822 | } |
4943 | 4823 | ||
@@ -4990,17 +4870,13 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | |||
4990 | struct btrfs_path *path, | 4870 | struct btrfs_path *path, |
4991 | struct walk_control *wc) | 4871 | struct walk_control *wc) |
4992 | { | 4872 | { |
4993 | struct extent_buffer *next; | ||
4994 | struct extent_buffer *cur; | ||
4995 | u64 bytenr; | ||
4996 | u64 ptr_gen; | ||
4997 | u32 blocksize; | ||
4998 | int level = wc->level; | 4873 | int level = wc->level; |
4999 | int ret; | 4874 | int ret; |
5000 | 4875 | ||
5001 | while (level >= 0) { | 4876 | while (level >= 0) { |
5002 | cur = path->nodes[level]; | 4877 | if (path->slots[level] >= |
5003 | BUG_ON(path->slots[level] >= btrfs_header_nritems(cur)); | 4878 | btrfs_header_nritems(path->nodes[level])) |
4879 | break; | ||
5004 | 4880 | ||
5005 | ret = walk_down_proc(trans, root, path, wc); | 4881 | ret = walk_down_proc(trans, root, path, wc); |
5006 | if (ret > 0) | 4882 | if (ret > 0) |
@@ -5009,20 +4885,12 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | |||
5009 | if (level == 0) | 4885 | if (level == 0) |
5010 | break; | 4886 | break; |
5011 | 4887 | ||
5012 | bytenr = btrfs_node_blockptr(cur, path->slots[level]); | 4888 | ret = do_walk_down(trans, root, path, wc); |
5013 | blocksize = btrfs_level_size(root, level - 1); | 4889 | if (ret > 0) { |
5014 | ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]); | 4890 | path->slots[level]++; |
5015 | 4891 | continue; | |
5016 | next = read_tree_block(root, bytenr, blocksize, ptr_gen); | 4892 | } |
5017 | btrfs_tree_lock(next); | 4893 | level = wc->level; |
5018 | btrfs_set_lock_blocking(next); | ||
5019 | |||
5020 | level--; | ||
5021 | BUG_ON(level != btrfs_header_level(next)); | ||
5022 | path->nodes[level] = next; | ||
5023 | path->slots[level] = 0; | ||
5024 | path->locks[level] = 1; | ||
5025 | wc->level = level; | ||
5026 | } | 4894 | } |
5027 | return 0; | 4895 | return 0; |
5028 | } | 4896 | } |
@@ -5112,9 +4980,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
5112 | err = ret; | 4980 | err = ret; |
5113 | goto out; | 4981 | goto out; |
5114 | } | 4982 | } |
5115 | btrfs_node_key_to_cpu(path->nodes[level], &key, | 4983 | WARN_ON(ret > 0); |
5116 | path->slots[level]); | ||
5117 | WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key))); | ||
5118 | 4984 | ||
5119 | /* | 4985 | /* |
5120 | * unlock our path, this is safe because only this | 4986 | * unlock our path, this is safe because only this |
@@ -5149,6 +5015,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
5149 | wc->stage = DROP_REFERENCE; | 5015 | wc->stage = DROP_REFERENCE; |
5150 | wc->update_ref = update_ref; | 5016 | wc->update_ref = update_ref; |
5151 | wc->keep_locks = 0; | 5017 | wc->keep_locks = 0; |
5018 | wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); | ||
5152 | 5019 | ||
5153 | while (1) { | 5020 | while (1) { |
5154 | ret = walk_down_tree(trans, root, path, wc); | 5021 | ret = walk_down_tree(trans, root, path, wc); |
@@ -5201,9 +5068,24 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
5201 | ret = btrfs_del_root(trans, tree_root, &root->root_key); | 5068 | ret = btrfs_del_root(trans, tree_root, &root->root_key); |
5202 | BUG_ON(ret); | 5069 | BUG_ON(ret); |
5203 | 5070 | ||
5204 | free_extent_buffer(root->node); | 5071 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { |
5205 | free_extent_buffer(root->commit_root); | 5072 | ret = btrfs_find_last_root(tree_root, root->root_key.objectid, |
5206 | kfree(root); | 5073 | NULL, NULL); |
5074 | BUG_ON(ret < 0); | ||
5075 | if (ret > 0) { | ||
5076 | ret = btrfs_del_orphan_item(trans, tree_root, | ||
5077 | root->root_key.objectid); | ||
5078 | BUG_ON(ret); | ||
5079 | } | ||
5080 | } | ||
5081 | |||
5082 | if (root->in_radix) { | ||
5083 | btrfs_free_fs_root(tree_root->fs_info, root); | ||
5084 | } else { | ||
5085 | free_extent_buffer(root->node); | ||
5086 | free_extent_buffer(root->commit_root); | ||
5087 | kfree(root); | ||
5088 | } | ||
5207 | out: | 5089 | out: |
5208 | btrfs_end_transaction(trans, tree_root); | 5090 | btrfs_end_transaction(trans, tree_root); |
5209 | kfree(wc); | 5091 | kfree(wc); |
@@ -5255,6 +5137,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | |||
5255 | wc->stage = DROP_REFERENCE; | 5137 | wc->stage = DROP_REFERENCE; |
5256 | wc->update_ref = 0; | 5138 | wc->update_ref = 0; |
5257 | wc->keep_locks = 1; | 5139 | wc->keep_locks = 1; |
5140 | wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); | ||
5258 | 5141 | ||
5259 | while (1) { | 5142 | while (1) { |
5260 | wret = walk_down_tree(trans, root, path, wc); | 5143 | wret = walk_down_tree(trans, root, path, wc); |
@@ -5397,9 +5280,9 @@ static noinline int relocate_data_extent(struct inode *reloc_inode, | |||
5397 | lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); | 5280 | lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); |
5398 | while (1) { | 5281 | while (1) { |
5399 | int ret; | 5282 | int ret; |
5400 | spin_lock(&em_tree->lock); | 5283 | write_lock(&em_tree->lock); |
5401 | ret = add_extent_mapping(em_tree, em); | 5284 | ret = add_extent_mapping(em_tree, em); |
5402 | spin_unlock(&em_tree->lock); | 5285 | write_unlock(&em_tree->lock); |
5403 | if (ret != -EEXIST) { | 5286 | if (ret != -EEXIST) { |
5404 | free_extent_map(em); | 5287 | free_extent_map(em); |
5405 | break; | 5288 | break; |
@@ -6842,287 +6725,86 @@ int btrfs_prepare_block_group_relocation(struct btrfs_root *root, | |||
6842 | return 0; | 6725 | return 0; |
6843 | } | 6726 | } |
6844 | 6727 | ||
6845 | #if 0 | 6728 | /* |
6846 | static int __insert_orphan_inode(struct btrfs_trans_handle *trans, | 6729 | * checks to see if its even possible to relocate this block group. |
6847 | struct btrfs_root *root, | 6730 | * |
6848 | u64 objectid, u64 size) | 6731 | * @return - -1 if it's not a good idea to relocate this block group, 0 if its |
6849 | { | 6732 | * ok to go ahead and try. |
6850 | struct btrfs_path *path; | 6733 | */ |
6851 | struct btrfs_inode_item *item; | 6734 | int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) |
6852 | struct extent_buffer *leaf; | ||
6853 | int ret; | ||
6854 | |||
6855 | path = btrfs_alloc_path(); | ||
6856 | if (!path) | ||
6857 | return -ENOMEM; | ||
6858 | |||
6859 | path->leave_spinning = 1; | ||
6860 | ret = btrfs_insert_empty_inode(trans, root, path, objectid); | ||
6861 | if (ret) | ||
6862 | goto out; | ||
6863 | |||
6864 | leaf = path->nodes[0]; | ||
6865 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); | ||
6866 | memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); | ||
6867 | btrfs_set_inode_generation(leaf, item, 1); | ||
6868 | btrfs_set_inode_size(leaf, item, size); | ||
6869 | btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); | ||
6870 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); | ||
6871 | btrfs_mark_buffer_dirty(leaf); | ||
6872 | btrfs_release_path(root, path); | ||
6873 | out: | ||
6874 | btrfs_free_path(path); | ||
6875 | return ret; | ||
6876 | } | ||
6877 | |||
6878 | static noinline struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | ||
6879 | struct btrfs_block_group_cache *group) | ||
6880 | { | 6735 | { |
6881 | struct inode *inode = NULL; | 6736 | struct btrfs_block_group_cache *block_group; |
6882 | struct btrfs_trans_handle *trans; | 6737 | struct btrfs_space_info *space_info; |
6883 | struct btrfs_root *root; | 6738 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; |
6884 | struct btrfs_key root_key; | 6739 | struct btrfs_device *device; |
6885 | u64 objectid = BTRFS_FIRST_FREE_OBJECTID; | 6740 | int full = 0; |
6886 | int err = 0; | 6741 | int ret = 0; |
6887 | 6742 | ||
6888 | root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; | 6743 | block_group = btrfs_lookup_block_group(root->fs_info, bytenr); |
6889 | root_key.type = BTRFS_ROOT_ITEM_KEY; | ||
6890 | root_key.offset = (u64)-1; | ||
6891 | root = btrfs_read_fs_root_no_name(fs_info, &root_key); | ||
6892 | if (IS_ERR(root)) | ||
6893 | return ERR_CAST(root); | ||
6894 | 6744 | ||
6895 | trans = btrfs_start_transaction(root, 1); | 6745 | /* odd, couldn't find the block group, leave it alone */ |
6896 | BUG_ON(!trans); | 6746 | if (!block_group) |
6747 | return -1; | ||
6897 | 6748 | ||
6898 | err = btrfs_find_free_objectid(trans, root, objectid, &objectid); | 6749 | /* no bytes used, we're good */ |
6899 | if (err) | 6750 | if (!btrfs_block_group_used(&block_group->item)) |
6900 | goto out; | 6751 | goto out; |
6901 | 6752 | ||
6902 | err = __insert_orphan_inode(trans, root, objectid, group->key.offset); | 6753 | space_info = block_group->space_info; |
6903 | BUG_ON(err); | 6754 | spin_lock(&space_info->lock); |
6904 | |||
6905 | err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, | ||
6906 | group->key.offset, 0, group->key.offset, | ||
6907 | 0, 0, 0); | ||
6908 | BUG_ON(err); | ||
6909 | |||
6910 | inode = btrfs_iget_locked(root->fs_info->sb, objectid, root); | ||
6911 | if (inode->i_state & I_NEW) { | ||
6912 | BTRFS_I(inode)->root = root; | ||
6913 | BTRFS_I(inode)->location.objectid = objectid; | ||
6914 | BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; | ||
6915 | BTRFS_I(inode)->location.offset = 0; | ||
6916 | btrfs_read_locked_inode(inode); | ||
6917 | unlock_new_inode(inode); | ||
6918 | BUG_ON(is_bad_inode(inode)); | ||
6919 | } else { | ||
6920 | BUG_ON(1); | ||
6921 | } | ||
6922 | BTRFS_I(inode)->index_cnt = group->key.objectid; | ||
6923 | |||
6924 | err = btrfs_orphan_add(trans, inode); | ||
6925 | out: | ||
6926 | btrfs_end_transaction(trans, root); | ||
6927 | if (err) { | ||
6928 | if (inode) | ||
6929 | iput(inode); | ||
6930 | inode = ERR_PTR(err); | ||
6931 | } | ||
6932 | return inode; | ||
6933 | } | ||
6934 | |||
6935 | int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) | ||
6936 | { | ||
6937 | |||
6938 | struct btrfs_ordered_sum *sums; | ||
6939 | struct btrfs_sector_sum *sector_sum; | ||
6940 | struct btrfs_ordered_extent *ordered; | ||
6941 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
6942 | struct list_head list; | ||
6943 | size_t offset; | ||
6944 | int ret; | ||
6945 | u64 disk_bytenr; | ||
6946 | |||
6947 | INIT_LIST_HEAD(&list); | ||
6948 | |||
6949 | ordered = btrfs_lookup_ordered_extent(inode, file_pos); | ||
6950 | BUG_ON(ordered->file_offset != file_pos || ordered->len != len); | ||
6951 | |||
6952 | disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; | ||
6953 | ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, | ||
6954 | disk_bytenr + len - 1, &list); | ||
6955 | |||
6956 | while (!list_empty(&list)) { | ||
6957 | sums = list_entry(list.next, struct btrfs_ordered_sum, list); | ||
6958 | list_del_init(&sums->list); | ||
6959 | |||
6960 | sector_sum = sums->sums; | ||
6961 | sums->bytenr = ordered->start; | ||
6962 | 6755 | ||
6963 | offset = 0; | 6756 | full = space_info->full; |
6964 | while (offset < sums->len) { | ||
6965 | sector_sum->bytenr += ordered->start - disk_bytenr; | ||
6966 | sector_sum++; | ||
6967 | offset += root->sectorsize; | ||
6968 | } | ||
6969 | 6757 | ||
6970 | btrfs_add_ordered_sum(inode, ordered, sums); | 6758 | /* |
6759 | * if this is the last block group we have in this space, we can't | ||
6760 | * relocate it unless we're able to allocate a new chunk below. | ||
6761 | * | ||
6762 | * Otherwise, we need to make sure we have room in the space to handle | ||
6763 | * all of the extents from this block group. If we can, we're good | ||
6764 | */ | ||
6765 | if ((space_info->total_bytes != block_group->key.offset) && | ||
6766 | (space_info->bytes_used + space_info->bytes_reserved + | ||
6767 | space_info->bytes_pinned + space_info->bytes_readonly + | ||
6768 | btrfs_block_group_used(&block_group->item) < | ||
6769 | space_info->total_bytes)) { | ||
6770 | spin_unlock(&space_info->lock); | ||
6771 | goto out; | ||
6971 | } | 6772 | } |
6972 | btrfs_put_ordered_extent(ordered); | 6773 | spin_unlock(&space_info->lock); |
6973 | return 0; | ||
6974 | } | ||
6975 | |||
6976 | int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start) | ||
6977 | { | ||
6978 | struct btrfs_trans_handle *trans; | ||
6979 | struct btrfs_path *path; | ||
6980 | struct btrfs_fs_info *info = root->fs_info; | ||
6981 | struct extent_buffer *leaf; | ||
6982 | struct inode *reloc_inode; | ||
6983 | struct btrfs_block_group_cache *block_group; | ||
6984 | struct btrfs_key key; | ||
6985 | u64 skipped; | ||
6986 | u64 cur_byte; | ||
6987 | u64 total_found; | ||
6988 | u32 nritems; | ||
6989 | int ret; | ||
6990 | int progress; | ||
6991 | int pass = 0; | ||
6992 | |||
6993 | root = root->fs_info->extent_root; | ||
6994 | |||
6995 | block_group = btrfs_lookup_block_group(info, group_start); | ||
6996 | BUG_ON(!block_group); | ||
6997 | |||
6998 | printk(KERN_INFO "btrfs relocating block group %llu flags %llu\n", | ||
6999 | (unsigned long long)block_group->key.objectid, | ||
7000 | (unsigned long long)block_group->flags); | ||
7001 | |||
7002 | path = btrfs_alloc_path(); | ||
7003 | BUG_ON(!path); | ||
7004 | |||
7005 | reloc_inode = create_reloc_inode(info, block_group); | ||
7006 | BUG_ON(IS_ERR(reloc_inode)); | ||
7007 | |||
7008 | __alloc_chunk_for_shrink(root, block_group, 1); | ||
7009 | set_block_group_readonly(block_group); | ||
7010 | |||
7011 | btrfs_start_delalloc_inodes(info->tree_root); | ||
7012 | btrfs_wait_ordered_extents(info->tree_root, 0); | ||
7013 | again: | ||
7014 | skipped = 0; | ||
7015 | total_found = 0; | ||
7016 | progress = 0; | ||
7017 | key.objectid = block_group->key.objectid; | ||
7018 | key.offset = 0; | ||
7019 | key.type = 0; | ||
7020 | cur_byte = key.objectid; | ||
7021 | |||
7022 | trans = btrfs_start_transaction(info->tree_root, 1); | ||
7023 | btrfs_commit_transaction(trans, info->tree_root); | ||
7024 | 6774 | ||
7025 | mutex_lock(&root->fs_info->cleaner_mutex); | 6775 | /* |
7026 | btrfs_clean_old_snapshots(info->tree_root); | 6776 | * ok we don't have enough space, but maybe we have free space on our |
7027 | btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); | 6777 | * devices to allocate new chunks for relocation, so loop through our |
7028 | mutex_unlock(&root->fs_info->cleaner_mutex); | 6778 | * alloc devices and guess if we have enough space. However, if we |
6779 | * were marked as full, then we know there aren't enough chunks, and we | ||
6780 | * can just return. | ||
6781 | */ | ||
6782 | ret = -1; | ||
6783 | if (full) | ||
6784 | goto out; | ||
7029 | 6785 | ||
7030 | trans = btrfs_start_transaction(info->tree_root, 1); | 6786 | mutex_lock(&root->fs_info->chunk_mutex); |
7031 | btrfs_commit_transaction(trans, info->tree_root); | 6787 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { |
6788 | u64 min_free = btrfs_block_group_used(&block_group->item); | ||
6789 | u64 dev_offset, max_avail; | ||
7032 | 6790 | ||
7033 | while (1) { | 6791 | /* |
7034 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 6792 | * check to make sure we can actually find a chunk with enough |
7035 | if (ret < 0) | 6793 | * space to fit our block group in. |
7036 | goto out; | 6794 | */ |
7037 | next: | 6795 | if (device->total_bytes > device->bytes_used + min_free) { |
7038 | leaf = path->nodes[0]; | 6796 | ret = find_free_dev_extent(NULL, device, min_free, |
7039 | nritems = btrfs_header_nritems(leaf); | 6797 | &dev_offset, &max_avail); |
7040 | if (path->slots[0] >= nritems) { | 6798 | if (!ret) |
7041 | ret = btrfs_next_leaf(root, path); | ||
7042 | if (ret < 0) | ||
7043 | goto out; | ||
7044 | if (ret == 1) { | ||
7045 | ret = 0; | ||
7046 | break; | 6799 | break; |
7047 | } | 6800 | ret = -1; |
7048 | leaf = path->nodes[0]; | ||
7049 | nritems = btrfs_header_nritems(leaf); | ||
7050 | } | 6801 | } |
7051 | |||
7052 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
7053 | |||
7054 | if (key.objectid >= block_group->key.objectid + | ||
7055 | block_group->key.offset) | ||
7056 | break; | ||
7057 | |||
7058 | if (progress && need_resched()) { | ||
7059 | btrfs_release_path(root, path); | ||
7060 | cond_resched(); | ||
7061 | progress = 0; | ||
7062 | continue; | ||
7063 | } | ||
7064 | progress = 1; | ||
7065 | |||
7066 | if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY || | ||
7067 | key.objectid + key.offset <= cur_byte) { | ||
7068 | path->slots[0]++; | ||
7069 | goto next; | ||
7070 | } | ||
7071 | |||
7072 | total_found++; | ||
7073 | cur_byte = key.objectid + key.offset; | ||
7074 | btrfs_release_path(root, path); | ||
7075 | |||
7076 | __alloc_chunk_for_shrink(root, block_group, 0); | ||
7077 | ret = relocate_one_extent(root, path, &key, block_group, | ||
7078 | reloc_inode, pass); | ||
7079 | BUG_ON(ret < 0); | ||
7080 | if (ret > 0) | ||
7081 | skipped++; | ||
7082 | |||
7083 | key.objectid = cur_byte; | ||
7084 | key.type = 0; | ||
7085 | key.offset = 0; | ||
7086 | } | ||
7087 | |||
7088 | btrfs_release_path(root, path); | ||
7089 | |||
7090 | if (pass == 0) { | ||
7091 | btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1); | ||
7092 | invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1); | ||
7093 | } | ||
7094 | |||
7095 | if (total_found > 0) { | ||
7096 | printk(KERN_INFO "btrfs found %llu extents in pass %d\n", | ||
7097 | (unsigned long long)total_found, pass); | ||
7098 | pass++; | ||
7099 | if (total_found == skipped && pass > 2) { | ||
7100 | iput(reloc_inode); | ||
7101 | reloc_inode = create_reloc_inode(info, block_group); | ||
7102 | pass = 0; | ||
7103 | } | ||
7104 | goto again; | ||
7105 | } | 6802 | } |
7106 | 6803 | mutex_unlock(&root->fs_info->chunk_mutex); | |
7107 | /* delete reloc_inode */ | ||
7108 | iput(reloc_inode); | ||
7109 | |||
7110 | /* unpin extents in this range */ | ||
7111 | trans = btrfs_start_transaction(info->tree_root, 1); | ||
7112 | btrfs_commit_transaction(trans, info->tree_root); | ||
7113 | |||
7114 | spin_lock(&block_group->lock); | ||
7115 | WARN_ON(block_group->pinned > 0); | ||
7116 | WARN_ON(block_group->reserved > 0); | ||
7117 | WARN_ON(btrfs_block_group_used(&block_group->item) > 0); | ||
7118 | spin_unlock(&block_group->lock); | ||
7119 | btrfs_put_block_group(block_group); | ||
7120 | ret = 0; | ||
7121 | out: | 6804 | out: |
7122 | btrfs_free_path(path); | 6805 | btrfs_put_block_group(block_group); |
7123 | return ret; | 6806 | return ret; |
7124 | } | 6807 | } |
7125 | #endif | ||
7126 | 6808 | ||
7127 | static int find_first_block_group(struct btrfs_root *root, | 6809 | static int find_first_block_group(struct btrfs_root *root, |
7128 | struct btrfs_path *path, struct btrfs_key *key) | 6810 | struct btrfs_path *path, struct btrfs_key *key) |
@@ -7165,8 +6847,18 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
7165 | { | 6847 | { |
7166 | struct btrfs_block_group_cache *block_group; | 6848 | struct btrfs_block_group_cache *block_group; |
7167 | struct btrfs_space_info *space_info; | 6849 | struct btrfs_space_info *space_info; |
6850 | struct btrfs_caching_control *caching_ctl; | ||
7168 | struct rb_node *n; | 6851 | struct rb_node *n; |
7169 | 6852 | ||
6853 | down_write(&info->extent_commit_sem); | ||
6854 | while (!list_empty(&info->caching_block_groups)) { | ||
6855 | caching_ctl = list_entry(info->caching_block_groups.next, | ||
6856 | struct btrfs_caching_control, list); | ||
6857 | list_del(&caching_ctl->list); | ||
6858 | put_caching_control(caching_ctl); | ||
6859 | } | ||
6860 | up_write(&info->extent_commit_sem); | ||
6861 | |||
7170 | spin_lock(&info->block_group_cache_lock); | 6862 | spin_lock(&info->block_group_cache_lock); |
7171 | while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { | 6863 | while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { |
7172 | block_group = rb_entry(n, struct btrfs_block_group_cache, | 6864 | block_group = rb_entry(n, struct btrfs_block_group_cache, |
@@ -7180,8 +6872,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
7180 | up_write(&block_group->space_info->groups_sem); | 6872 | up_write(&block_group->space_info->groups_sem); |
7181 | 6873 | ||
7182 | if (block_group->cached == BTRFS_CACHE_STARTED) | 6874 | if (block_group->cached == BTRFS_CACHE_STARTED) |
7183 | wait_event(block_group->caching_q, | 6875 | wait_block_group_cache_done(block_group); |
7184 | block_group_cache_done(block_group)); | ||
7185 | 6876 | ||
7186 | btrfs_remove_free_space_cache(block_group); | 6877 | btrfs_remove_free_space_cache(block_group); |
7187 | 6878 | ||
@@ -7251,7 +6942,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7251 | spin_lock_init(&cache->lock); | 6942 | spin_lock_init(&cache->lock); |
7252 | spin_lock_init(&cache->tree_lock); | 6943 | spin_lock_init(&cache->tree_lock); |
7253 | cache->fs_info = info; | 6944 | cache->fs_info = info; |
7254 | init_waitqueue_head(&cache->caching_q); | ||
7255 | INIT_LIST_HEAD(&cache->list); | 6945 | INIT_LIST_HEAD(&cache->list); |
7256 | INIT_LIST_HEAD(&cache->cluster_list); | 6946 | INIT_LIST_HEAD(&cache->cluster_list); |
7257 | 6947 | ||
@@ -7273,8 +6963,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7273 | cache->flags = btrfs_block_group_flags(&cache->item); | 6963 | cache->flags = btrfs_block_group_flags(&cache->item); |
7274 | cache->sectorsize = root->sectorsize; | 6964 | cache->sectorsize = root->sectorsize; |
7275 | 6965 | ||
7276 | remove_sb_from_cache(root, cache); | ||
7277 | |||
7278 | /* | 6966 | /* |
7279 | * check for two cases, either we are full, and therefore | 6967 | * check for two cases, either we are full, and therefore |
7280 | * don't need to bother with the caching work since we won't | 6968 | * don't need to bother with the caching work since we won't |
@@ -7283,13 +6971,19 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7283 | * time, particularly in the full case. | 6971 | * time, particularly in the full case. |
7284 | */ | 6972 | */ |
7285 | if (found_key.offset == btrfs_block_group_used(&cache->item)) { | 6973 | if (found_key.offset == btrfs_block_group_used(&cache->item)) { |
6974 | exclude_super_stripes(root, cache); | ||
6975 | cache->last_byte_to_unpin = (u64)-1; | ||
7286 | cache->cached = BTRFS_CACHE_FINISHED; | 6976 | cache->cached = BTRFS_CACHE_FINISHED; |
6977 | free_excluded_extents(root, cache); | ||
7287 | } else if (btrfs_block_group_used(&cache->item) == 0) { | 6978 | } else if (btrfs_block_group_used(&cache->item) == 0) { |
6979 | exclude_super_stripes(root, cache); | ||
6980 | cache->last_byte_to_unpin = (u64)-1; | ||
7288 | cache->cached = BTRFS_CACHE_FINISHED; | 6981 | cache->cached = BTRFS_CACHE_FINISHED; |
7289 | add_new_free_space(cache, root->fs_info, | 6982 | add_new_free_space(cache, root->fs_info, |
7290 | found_key.objectid, | 6983 | found_key.objectid, |
7291 | found_key.objectid + | 6984 | found_key.objectid + |
7292 | found_key.offset); | 6985 | found_key.offset); |
6986 | free_excluded_extents(root, cache); | ||
7293 | } | 6987 | } |
7294 | 6988 | ||
7295 | ret = update_space_info(info, cache->flags, found_key.offset, | 6989 | ret = update_space_info(info, cache->flags, found_key.offset, |
@@ -7297,6 +6991,10 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7297 | &space_info); | 6991 | &space_info); |
7298 | BUG_ON(ret); | 6992 | BUG_ON(ret); |
7299 | cache->space_info = space_info; | 6993 | cache->space_info = space_info; |
6994 | spin_lock(&cache->space_info->lock); | ||
6995 | cache->space_info->bytes_super += cache->bytes_super; | ||
6996 | spin_unlock(&cache->space_info->lock); | ||
6997 | |||
7300 | down_write(&space_info->groups_sem); | 6998 | down_write(&space_info->groups_sem); |
7301 | list_add_tail(&cache->list, &space_info->block_groups); | 6999 | list_add_tail(&cache->list, &space_info->block_groups); |
7302 | up_write(&space_info->groups_sem); | 7000 | up_write(&space_info->groups_sem); |
@@ -7346,7 +7044,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
7346 | atomic_set(&cache->count, 1); | 7044 | atomic_set(&cache->count, 1); |
7347 | spin_lock_init(&cache->lock); | 7045 | spin_lock_init(&cache->lock); |
7348 | spin_lock_init(&cache->tree_lock); | 7046 | spin_lock_init(&cache->tree_lock); |
7349 | init_waitqueue_head(&cache->caching_q); | ||
7350 | INIT_LIST_HEAD(&cache->list); | 7047 | INIT_LIST_HEAD(&cache->list); |
7351 | INIT_LIST_HEAD(&cache->cluster_list); | 7048 | INIT_LIST_HEAD(&cache->cluster_list); |
7352 | 7049 | ||
@@ -7355,15 +7052,23 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
7355 | cache->flags = type; | 7052 | cache->flags = type; |
7356 | btrfs_set_block_group_flags(&cache->item, type); | 7053 | btrfs_set_block_group_flags(&cache->item, type); |
7357 | 7054 | ||
7055 | cache->last_byte_to_unpin = (u64)-1; | ||
7358 | cache->cached = BTRFS_CACHE_FINISHED; | 7056 | cache->cached = BTRFS_CACHE_FINISHED; |
7359 | remove_sb_from_cache(root, cache); | 7057 | exclude_super_stripes(root, cache); |
7360 | 7058 | ||
7361 | add_new_free_space(cache, root->fs_info, chunk_offset, | 7059 | add_new_free_space(cache, root->fs_info, chunk_offset, |
7362 | chunk_offset + size); | 7060 | chunk_offset + size); |
7363 | 7061 | ||
7062 | free_excluded_extents(root, cache); | ||
7063 | |||
7364 | ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, | 7064 | ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, |
7365 | &cache->space_info); | 7065 | &cache->space_info); |
7366 | BUG_ON(ret); | 7066 | BUG_ON(ret); |
7067 | |||
7068 | spin_lock(&cache->space_info->lock); | ||
7069 | cache->space_info->bytes_super += cache->bytes_super; | ||
7070 | spin_unlock(&cache->space_info->lock); | ||
7071 | |||
7367 | down_write(&cache->space_info->groups_sem); | 7072 | down_write(&cache->space_info->groups_sem); |
7368 | list_add_tail(&cache->list, &cache->space_info->block_groups); | 7073 | list_add_tail(&cache->list, &cache->space_info->block_groups); |
7369 | up_write(&cache->space_info->groups_sem); | 7074 | up_write(&cache->space_info->groups_sem); |
@@ -7429,8 +7134,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
7429 | up_write(&block_group->space_info->groups_sem); | 7134 | up_write(&block_group->space_info->groups_sem); |
7430 | 7135 | ||
7431 | if (block_group->cached == BTRFS_CACHE_STARTED) | 7136 | if (block_group->cached == BTRFS_CACHE_STARTED) |
7432 | wait_event(block_group->caching_q, | 7137 | wait_block_group_cache_done(block_group); |
7433 | block_group_cache_done(block_group)); | ||
7434 | 7138 | ||
7435 | btrfs_remove_free_space_cache(block_group); | 7139 | btrfs_remove_free_space_cache(block_group); |
7436 | 7140 | ||
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 68260180f587..0cb88f8146ea 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -367,10 +367,10 @@ static int insert_state(struct extent_io_tree *tree, | |||
367 | } | 367 | } |
368 | if (bits & EXTENT_DIRTY) | 368 | if (bits & EXTENT_DIRTY) |
369 | tree->dirty_bytes += end - start + 1; | 369 | tree->dirty_bytes += end - start + 1; |
370 | set_state_cb(tree, state, bits); | ||
371 | state->state |= bits; | ||
372 | state->start = start; | 370 | state->start = start; |
373 | state->end = end; | 371 | state->end = end; |
372 | set_state_cb(tree, state, bits); | ||
373 | state->state |= bits; | ||
374 | node = tree_insert(&tree->state, end, &state->rb_node); | 374 | node = tree_insert(&tree->state, end, &state->rb_node); |
375 | if (node) { | 375 | if (node) { |
376 | struct extent_state *found; | 376 | struct extent_state *found; |
@@ -471,10 +471,14 @@ static int clear_state_bit(struct extent_io_tree *tree, | |||
471 | * bits were already set, or zero if none of the bits were already set. | 471 | * bits were already set, or zero if none of the bits were already set. |
472 | */ | 472 | */ |
473 | int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 473 | int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
474 | int bits, int wake, int delete, gfp_t mask) | 474 | int bits, int wake, int delete, |
475 | struct extent_state **cached_state, | ||
476 | gfp_t mask) | ||
475 | { | 477 | { |
476 | struct extent_state *state; | 478 | struct extent_state *state; |
479 | struct extent_state *cached; | ||
477 | struct extent_state *prealloc = NULL; | 480 | struct extent_state *prealloc = NULL; |
481 | struct rb_node *next_node; | ||
478 | struct rb_node *node; | 482 | struct rb_node *node; |
479 | u64 last_end; | 483 | u64 last_end; |
480 | int err; | 484 | int err; |
@@ -488,6 +492,17 @@ again: | |||
488 | } | 492 | } |
489 | 493 | ||
490 | spin_lock(&tree->lock); | 494 | spin_lock(&tree->lock); |
495 | if (cached_state) { | ||
496 | cached = *cached_state; | ||
497 | *cached_state = NULL; | ||
498 | cached_state = NULL; | ||
499 | if (cached && cached->tree && cached->start == start) { | ||
500 | atomic_dec(&cached->refs); | ||
501 | state = cached; | ||
502 | goto hit_next; | ||
503 | } | ||
504 | free_extent_state(cached); | ||
505 | } | ||
491 | /* | 506 | /* |
492 | * this search will find the extents that end after | 507 | * this search will find the extents that end after |
493 | * our range starts | 508 | * our range starts |
@@ -496,6 +511,7 @@ again: | |||
496 | if (!node) | 511 | if (!node) |
497 | goto out; | 512 | goto out; |
498 | state = rb_entry(node, struct extent_state, rb_node); | 513 | state = rb_entry(node, struct extent_state, rb_node); |
514 | hit_next: | ||
499 | if (state->start > end) | 515 | if (state->start > end) |
500 | goto out; | 516 | goto out; |
501 | WARN_ON(state->end < start); | 517 | WARN_ON(state->end < start); |
@@ -531,8 +547,6 @@ again: | |||
531 | if (last_end == (u64)-1) | 547 | if (last_end == (u64)-1) |
532 | goto out; | 548 | goto out; |
533 | start = last_end + 1; | 549 | start = last_end + 1; |
534 | } else { | ||
535 | start = state->start; | ||
536 | } | 550 | } |
537 | goto search_again; | 551 | goto search_again; |
538 | } | 552 | } |
@@ -550,16 +564,28 @@ again: | |||
550 | 564 | ||
551 | if (wake) | 565 | if (wake) |
552 | wake_up(&state->wq); | 566 | wake_up(&state->wq); |
567 | |||
553 | set |= clear_state_bit(tree, prealloc, bits, | 568 | set |= clear_state_bit(tree, prealloc, bits, |
554 | wake, delete); | 569 | wake, delete); |
555 | prealloc = NULL; | 570 | prealloc = NULL; |
556 | goto out; | 571 | goto out; |
557 | } | 572 | } |
558 | 573 | ||
574 | if (state->end < end && prealloc && !need_resched()) | ||
575 | next_node = rb_next(&state->rb_node); | ||
576 | else | ||
577 | next_node = NULL; | ||
578 | |||
559 | set |= clear_state_bit(tree, state, bits, wake, delete); | 579 | set |= clear_state_bit(tree, state, bits, wake, delete); |
560 | if (last_end == (u64)-1) | 580 | if (last_end == (u64)-1) |
561 | goto out; | 581 | goto out; |
562 | start = last_end + 1; | 582 | start = last_end + 1; |
583 | if (start <= end && next_node) { | ||
584 | state = rb_entry(next_node, struct extent_state, | ||
585 | rb_node); | ||
586 | if (state->start == start) | ||
587 | goto hit_next; | ||
588 | } | ||
563 | goto search_again; | 589 | goto search_again; |
564 | 590 | ||
565 | out: | 591 | out: |
@@ -653,28 +679,40 @@ static void set_state_bits(struct extent_io_tree *tree, | |||
653 | state->state |= bits; | 679 | state->state |= bits; |
654 | } | 680 | } |
655 | 681 | ||
682 | static void cache_state(struct extent_state *state, | ||
683 | struct extent_state **cached_ptr) | ||
684 | { | ||
685 | if (cached_ptr && !(*cached_ptr)) { | ||
686 | if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) { | ||
687 | *cached_ptr = state; | ||
688 | atomic_inc(&state->refs); | ||
689 | } | ||
690 | } | ||
691 | } | ||
692 | |||
656 | /* | 693 | /* |
657 | * set some bits on a range in the tree. This may require allocations | 694 | * set some bits on a range in the tree. This may require allocations or |
658 | * or sleeping, so the gfp mask is used to indicate what is allowed. | 695 | * sleeping, so the gfp mask is used to indicate what is allowed. |
659 | * | 696 | * |
660 | * If 'exclusive' == 1, this will fail with -EEXIST if some part of the | 697 | * If any of the exclusive bits are set, this will fail with -EEXIST if some |
661 | * range already has the desired bits set. The start of the existing | 698 | * part of the range already has the desired bits set. The start of the |
662 | * range is returned in failed_start in this case. | 699 | * existing range is returned in failed_start in this case. |
663 | * | 700 | * |
664 | * [start, end] is inclusive | 701 | * [start, end] is inclusive This takes the tree lock. |
665 | * This takes the tree lock. | ||
666 | */ | 702 | */ |
703 | |||
667 | static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 704 | static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
668 | int bits, int exclusive, u64 *failed_start, | 705 | int bits, int exclusive_bits, u64 *failed_start, |
706 | struct extent_state **cached_state, | ||
669 | gfp_t mask) | 707 | gfp_t mask) |
670 | { | 708 | { |
671 | struct extent_state *state; | 709 | struct extent_state *state; |
672 | struct extent_state *prealloc = NULL; | 710 | struct extent_state *prealloc = NULL; |
673 | struct rb_node *node; | 711 | struct rb_node *node; |
674 | int err = 0; | 712 | int err = 0; |
675 | int set; | ||
676 | u64 last_start; | 713 | u64 last_start; |
677 | u64 last_end; | 714 | u64 last_end; |
715 | |||
678 | again: | 716 | again: |
679 | if (!prealloc && (mask & __GFP_WAIT)) { | 717 | if (!prealloc && (mask & __GFP_WAIT)) { |
680 | prealloc = alloc_extent_state(mask); | 718 | prealloc = alloc_extent_state(mask); |
@@ -683,6 +721,13 @@ again: | |||
683 | } | 721 | } |
684 | 722 | ||
685 | spin_lock(&tree->lock); | 723 | spin_lock(&tree->lock); |
724 | if (cached_state && *cached_state) { | ||
725 | state = *cached_state; | ||
726 | if (state->start == start && state->tree) { | ||
727 | node = &state->rb_node; | ||
728 | goto hit_next; | ||
729 | } | ||
730 | } | ||
686 | /* | 731 | /* |
687 | * this search will find all the extents that end after | 732 | * this search will find all the extents that end after |
688 | * our range starts. | 733 | * our range starts. |
@@ -694,8 +739,8 @@ again: | |||
694 | BUG_ON(err == -EEXIST); | 739 | BUG_ON(err == -EEXIST); |
695 | goto out; | 740 | goto out; |
696 | } | 741 | } |
697 | |||
698 | state = rb_entry(node, struct extent_state, rb_node); | 742 | state = rb_entry(node, struct extent_state, rb_node); |
743 | hit_next: | ||
699 | last_start = state->start; | 744 | last_start = state->start; |
700 | last_end = state->end; | 745 | last_end = state->end; |
701 | 746 | ||
@@ -706,17 +751,29 @@ again: | |||
706 | * Just lock what we found and keep going | 751 | * Just lock what we found and keep going |
707 | */ | 752 | */ |
708 | if (state->start == start && state->end <= end) { | 753 | if (state->start == start && state->end <= end) { |
709 | set = state->state & bits; | 754 | struct rb_node *next_node; |
710 | if (set && exclusive) { | 755 | if (state->state & exclusive_bits) { |
711 | *failed_start = state->start; | 756 | *failed_start = state->start; |
712 | err = -EEXIST; | 757 | err = -EEXIST; |
713 | goto out; | 758 | goto out; |
714 | } | 759 | } |
760 | |||
715 | set_state_bits(tree, state, bits); | 761 | set_state_bits(tree, state, bits); |
762 | cache_state(state, cached_state); | ||
716 | merge_state(tree, state); | 763 | merge_state(tree, state); |
717 | if (last_end == (u64)-1) | 764 | if (last_end == (u64)-1) |
718 | goto out; | 765 | goto out; |
766 | |||
719 | start = last_end + 1; | 767 | start = last_end + 1; |
768 | if (start < end && prealloc && !need_resched()) { | ||
769 | next_node = rb_next(node); | ||
770 | if (next_node) { | ||
771 | state = rb_entry(next_node, struct extent_state, | ||
772 | rb_node); | ||
773 | if (state->start == start) | ||
774 | goto hit_next; | ||
775 | } | ||
776 | } | ||
720 | goto search_again; | 777 | goto search_again; |
721 | } | 778 | } |
722 | 779 | ||
@@ -737,8 +794,7 @@ again: | |||
737 | * desired bit on it. | 794 | * desired bit on it. |
738 | */ | 795 | */ |
739 | if (state->start < start) { | 796 | if (state->start < start) { |
740 | set = state->state & bits; | 797 | if (state->state & exclusive_bits) { |
741 | if (exclusive && set) { | ||
742 | *failed_start = start; | 798 | *failed_start = start; |
743 | err = -EEXIST; | 799 | err = -EEXIST; |
744 | goto out; | 800 | goto out; |
@@ -750,12 +806,11 @@ again: | |||
750 | goto out; | 806 | goto out; |
751 | if (state->end <= end) { | 807 | if (state->end <= end) { |
752 | set_state_bits(tree, state, bits); | 808 | set_state_bits(tree, state, bits); |
809 | cache_state(state, cached_state); | ||
753 | merge_state(tree, state); | 810 | merge_state(tree, state); |
754 | if (last_end == (u64)-1) | 811 | if (last_end == (u64)-1) |
755 | goto out; | 812 | goto out; |
756 | start = last_end + 1; | 813 | start = last_end + 1; |
757 | } else { | ||
758 | start = state->start; | ||
759 | } | 814 | } |
760 | goto search_again; | 815 | goto search_again; |
761 | } | 816 | } |
@@ -774,6 +829,7 @@ again: | |||
774 | this_end = last_start - 1; | 829 | this_end = last_start - 1; |
775 | err = insert_state(tree, prealloc, start, this_end, | 830 | err = insert_state(tree, prealloc, start, this_end, |
776 | bits); | 831 | bits); |
832 | cache_state(prealloc, cached_state); | ||
777 | prealloc = NULL; | 833 | prealloc = NULL; |
778 | BUG_ON(err == -EEXIST); | 834 | BUG_ON(err == -EEXIST); |
779 | if (err) | 835 | if (err) |
@@ -788,8 +844,7 @@ again: | |||
788 | * on the first half | 844 | * on the first half |
789 | */ | 845 | */ |
790 | if (state->start <= end && state->end > end) { | 846 | if (state->start <= end && state->end > end) { |
791 | set = state->state & bits; | 847 | if (state->state & exclusive_bits) { |
792 | if (exclusive && set) { | ||
793 | *failed_start = start; | 848 | *failed_start = start; |
794 | err = -EEXIST; | 849 | err = -EEXIST; |
795 | goto out; | 850 | goto out; |
@@ -798,6 +853,7 @@ again: | |||
798 | BUG_ON(err == -EEXIST); | 853 | BUG_ON(err == -EEXIST); |
799 | 854 | ||
800 | set_state_bits(tree, prealloc, bits); | 855 | set_state_bits(tree, prealloc, bits); |
856 | cache_state(prealloc, cached_state); | ||
801 | merge_state(tree, prealloc); | 857 | merge_state(tree, prealloc); |
802 | prealloc = NULL; | 858 | prealloc = NULL; |
803 | goto out; | 859 | goto out; |
@@ -826,86 +882,64 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | |||
826 | gfp_t mask) | 882 | gfp_t mask) |
827 | { | 883 | { |
828 | return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, | 884 | return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, |
829 | mask); | 885 | NULL, mask); |
830 | } | ||
831 | |||
832 | int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, | ||
833 | gfp_t mask) | ||
834 | { | ||
835 | return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask); | ||
836 | } | 886 | } |
837 | 887 | ||
838 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 888 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
839 | int bits, gfp_t mask) | 889 | int bits, gfp_t mask) |
840 | { | 890 | { |
841 | return set_extent_bit(tree, start, end, bits, 0, NULL, | 891 | return set_extent_bit(tree, start, end, bits, 0, NULL, |
842 | mask); | 892 | NULL, mask); |
843 | } | 893 | } |
844 | 894 | ||
845 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 895 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
846 | int bits, gfp_t mask) | 896 | int bits, gfp_t mask) |
847 | { | 897 | { |
848 | return clear_extent_bit(tree, start, end, bits, 0, 0, mask); | 898 | return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask); |
849 | } | 899 | } |
850 | 900 | ||
851 | int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, | 901 | int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, |
852 | gfp_t mask) | 902 | gfp_t mask) |
853 | { | 903 | { |
854 | return set_extent_bit(tree, start, end, | 904 | return set_extent_bit(tree, start, end, |
855 | EXTENT_DELALLOC | EXTENT_DIRTY, | 905 | EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE, |
856 | 0, NULL, mask); | 906 | 0, NULL, NULL, mask); |
857 | } | 907 | } |
858 | 908 | ||
859 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | 909 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, |
860 | gfp_t mask) | 910 | gfp_t mask) |
861 | { | 911 | { |
862 | return clear_extent_bit(tree, start, end, | 912 | return clear_extent_bit(tree, start, end, |
863 | EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); | 913 | EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, |
864 | } | 914 | NULL, mask); |
865 | |||
866 | int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, | ||
867 | gfp_t mask) | ||
868 | { | ||
869 | return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask); | ||
870 | } | 915 | } |
871 | 916 | ||
872 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | 917 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, |
873 | gfp_t mask) | 918 | gfp_t mask) |
874 | { | 919 | { |
875 | return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, | 920 | return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, |
876 | mask); | 921 | NULL, mask); |
877 | } | 922 | } |
878 | 923 | ||
879 | static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | 924 | static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, |
880 | gfp_t mask) | 925 | gfp_t mask) |
881 | { | 926 | { |
882 | return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); | 927 | return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, |
928 | NULL, mask); | ||
883 | } | 929 | } |
884 | 930 | ||
885 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | 931 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, |
886 | gfp_t mask) | 932 | gfp_t mask) |
887 | { | 933 | { |
888 | return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, | 934 | return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, |
889 | mask); | 935 | NULL, mask); |
890 | } | 936 | } |
891 | 937 | ||
892 | static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, | 938 | static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, |
893 | u64 end, gfp_t mask) | 939 | u64 end, gfp_t mask) |
894 | { | 940 | { |
895 | return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); | 941 | return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, |
896 | } | 942 | NULL, mask); |
897 | |||
898 | static int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end, | ||
899 | gfp_t mask) | ||
900 | { | ||
901 | return set_extent_bit(tree, start, end, EXTENT_WRITEBACK, | ||
902 | 0, NULL, mask); | ||
903 | } | ||
904 | |||
905 | static int clear_extent_writeback(struct extent_io_tree *tree, u64 start, | ||
906 | u64 end, gfp_t mask) | ||
907 | { | ||
908 | return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask); | ||
909 | } | 943 | } |
910 | 944 | ||
911 | int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) | 945 | int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) |
@@ -917,13 +951,15 @@ int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) | |||
917 | * either insert or lock state struct between start and end use mask to tell | 951 | * either insert or lock state struct between start and end use mask to tell |
918 | * us if waiting is desired. | 952 | * us if waiting is desired. |
919 | */ | 953 | */ |
920 | int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) | 954 | int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
955 | int bits, struct extent_state **cached_state, gfp_t mask) | ||
921 | { | 956 | { |
922 | int err; | 957 | int err; |
923 | u64 failed_start; | 958 | u64 failed_start; |
924 | while (1) { | 959 | while (1) { |
925 | err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, | 960 | err = set_extent_bit(tree, start, end, EXTENT_LOCKED | bits, |
926 | &failed_start, mask); | 961 | EXTENT_LOCKED, &failed_start, |
962 | cached_state, mask); | ||
927 | if (err == -EEXIST && (mask & __GFP_WAIT)) { | 963 | if (err == -EEXIST && (mask & __GFP_WAIT)) { |
928 | wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); | 964 | wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); |
929 | start = failed_start; | 965 | start = failed_start; |
@@ -935,27 +971,40 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) | |||
935 | return err; | 971 | return err; |
936 | } | 972 | } |
937 | 973 | ||
974 | int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) | ||
975 | { | ||
976 | return lock_extent_bits(tree, start, end, 0, NULL, mask); | ||
977 | } | ||
978 | |||
938 | int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, | 979 | int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, |
939 | gfp_t mask) | 980 | gfp_t mask) |
940 | { | 981 | { |
941 | int err; | 982 | int err; |
942 | u64 failed_start; | 983 | u64 failed_start; |
943 | 984 | ||
944 | err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, | 985 | err = set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED, |
945 | &failed_start, mask); | 986 | &failed_start, NULL, mask); |
946 | if (err == -EEXIST) { | 987 | if (err == -EEXIST) { |
947 | if (failed_start > start) | 988 | if (failed_start > start) |
948 | clear_extent_bit(tree, start, failed_start - 1, | 989 | clear_extent_bit(tree, start, failed_start - 1, |
949 | EXTENT_LOCKED, 1, 0, mask); | 990 | EXTENT_LOCKED, 1, 0, NULL, mask); |
950 | return 0; | 991 | return 0; |
951 | } | 992 | } |
952 | return 1; | 993 | return 1; |
953 | } | 994 | } |
954 | 995 | ||
996 | int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, | ||
997 | struct extent_state **cached, gfp_t mask) | ||
998 | { | ||
999 | return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached, | ||
1000 | mask); | ||
1001 | } | ||
1002 | |||
955 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, | 1003 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, |
956 | gfp_t mask) | 1004 | gfp_t mask) |
957 | { | 1005 | { |
958 | return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); | 1006 | return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL, |
1007 | mask); | ||
959 | } | 1008 | } |
960 | 1009 | ||
961 | /* | 1010 | /* |
@@ -974,7 +1023,6 @@ int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end) | |||
974 | page_cache_release(page); | 1023 | page_cache_release(page); |
975 | index++; | 1024 | index++; |
976 | } | 1025 | } |
977 | set_extent_dirty(tree, start, end, GFP_NOFS); | ||
978 | return 0; | 1026 | return 0; |
979 | } | 1027 | } |
980 | 1028 | ||
@@ -994,7 +1042,6 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) | |||
994 | page_cache_release(page); | 1042 | page_cache_release(page); |
995 | index++; | 1043 | index++; |
996 | } | 1044 | } |
997 | set_extent_writeback(tree, start, end, GFP_NOFS); | ||
998 | return 0; | 1045 | return 0; |
999 | } | 1046 | } |
1000 | 1047 | ||
@@ -1232,6 +1279,7 @@ static noinline u64 find_lock_delalloc_range(struct inode *inode, | |||
1232 | u64 delalloc_start; | 1279 | u64 delalloc_start; |
1233 | u64 delalloc_end; | 1280 | u64 delalloc_end; |
1234 | u64 found; | 1281 | u64 found; |
1282 | struct extent_state *cached_state = NULL; | ||
1235 | int ret; | 1283 | int ret; |
1236 | int loops = 0; | 1284 | int loops = 0; |
1237 | 1285 | ||
@@ -1269,6 +1317,7 @@ again: | |||
1269 | /* some of the pages are gone, lets avoid looping by | 1317 | /* some of the pages are gone, lets avoid looping by |
1270 | * shortening the size of the delalloc range we're searching | 1318 | * shortening the size of the delalloc range we're searching |
1271 | */ | 1319 | */ |
1320 | free_extent_state(cached_state); | ||
1272 | if (!loops) { | 1321 | if (!loops) { |
1273 | unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1); | 1322 | unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1); |
1274 | max_bytes = PAGE_CACHE_SIZE - offset; | 1323 | max_bytes = PAGE_CACHE_SIZE - offset; |
@@ -1282,18 +1331,21 @@ again: | |||
1282 | BUG_ON(ret); | 1331 | BUG_ON(ret); |
1283 | 1332 | ||
1284 | /* step three, lock the state bits for the whole range */ | 1333 | /* step three, lock the state bits for the whole range */ |
1285 | lock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); | 1334 | lock_extent_bits(tree, delalloc_start, delalloc_end, |
1335 | 0, &cached_state, GFP_NOFS); | ||
1286 | 1336 | ||
1287 | /* then test to make sure it is all still delalloc */ | 1337 | /* then test to make sure it is all still delalloc */ |
1288 | ret = test_range_bit(tree, delalloc_start, delalloc_end, | 1338 | ret = test_range_bit(tree, delalloc_start, delalloc_end, |
1289 | EXTENT_DELALLOC, 1); | 1339 | EXTENT_DELALLOC, 1, cached_state); |
1290 | if (!ret) { | 1340 | if (!ret) { |
1291 | unlock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); | 1341 | unlock_extent_cached(tree, delalloc_start, delalloc_end, |
1342 | &cached_state, GFP_NOFS); | ||
1292 | __unlock_for_delalloc(inode, locked_page, | 1343 | __unlock_for_delalloc(inode, locked_page, |
1293 | delalloc_start, delalloc_end); | 1344 | delalloc_start, delalloc_end); |
1294 | cond_resched(); | 1345 | cond_resched(); |
1295 | goto again; | 1346 | goto again; |
1296 | } | 1347 | } |
1348 | free_extent_state(cached_state); | ||
1297 | *start = delalloc_start; | 1349 | *start = delalloc_start; |
1298 | *end = delalloc_end; | 1350 | *end = delalloc_end; |
1299 | out_failed: | 1351 | out_failed: |
@@ -1307,7 +1359,8 @@ int extent_clear_unlock_delalloc(struct inode *inode, | |||
1307 | int clear_unlock, | 1359 | int clear_unlock, |
1308 | int clear_delalloc, int clear_dirty, | 1360 | int clear_delalloc, int clear_dirty, |
1309 | int set_writeback, | 1361 | int set_writeback, |
1310 | int end_writeback) | 1362 | int end_writeback, |
1363 | int set_private2) | ||
1311 | { | 1364 | { |
1312 | int ret; | 1365 | int ret; |
1313 | struct page *pages[16]; | 1366 | struct page *pages[16]; |
@@ -1325,8 +1378,9 @@ int extent_clear_unlock_delalloc(struct inode *inode, | |||
1325 | if (clear_delalloc) | 1378 | if (clear_delalloc) |
1326 | clear_bits |= EXTENT_DELALLOC; | 1379 | clear_bits |= EXTENT_DELALLOC; |
1327 | 1380 | ||
1328 | clear_extent_bit(tree, start, end, clear_bits, 1, 0, GFP_NOFS); | 1381 | clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS); |
1329 | if (!(unlock_pages || clear_dirty || set_writeback || end_writeback)) | 1382 | if (!(unlock_pages || clear_dirty || set_writeback || end_writeback || |
1383 | set_private2)) | ||
1330 | return 0; | 1384 | return 0; |
1331 | 1385 | ||
1332 | while (nr_pages > 0) { | 1386 | while (nr_pages > 0) { |
@@ -1334,6 +1388,10 @@ int extent_clear_unlock_delalloc(struct inode *inode, | |||
1334 | min_t(unsigned long, | 1388 | min_t(unsigned long, |
1335 | nr_pages, ARRAY_SIZE(pages)), pages); | 1389 | nr_pages, ARRAY_SIZE(pages)), pages); |
1336 | for (i = 0; i < ret; i++) { | 1390 | for (i = 0; i < ret; i++) { |
1391 | |||
1392 | if (set_private2) | ||
1393 | SetPagePrivate2(pages[i]); | ||
1394 | |||
1337 | if (pages[i] == locked_page) { | 1395 | if (pages[i] == locked_page) { |
1338 | page_cache_release(pages[i]); | 1396 | page_cache_release(pages[i]); |
1339 | continue; | 1397 | continue; |
@@ -1476,14 +1534,17 @@ out: | |||
1476 | * range is found set. | 1534 | * range is found set. |
1477 | */ | 1535 | */ |
1478 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | 1536 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, |
1479 | int bits, int filled) | 1537 | int bits, int filled, struct extent_state *cached) |
1480 | { | 1538 | { |
1481 | struct extent_state *state = NULL; | 1539 | struct extent_state *state = NULL; |
1482 | struct rb_node *node; | 1540 | struct rb_node *node; |
1483 | int bitset = 0; | 1541 | int bitset = 0; |
1484 | 1542 | ||
1485 | spin_lock(&tree->lock); | 1543 | spin_lock(&tree->lock); |
1486 | node = tree_search(tree, start); | 1544 | if (cached && cached->tree && cached->start == start) |
1545 | node = &cached->rb_node; | ||
1546 | else | ||
1547 | node = tree_search(tree, start); | ||
1487 | while (node && start <= end) { | 1548 | while (node && start <= end) { |
1488 | state = rb_entry(node, struct extent_state, rb_node); | 1549 | state = rb_entry(node, struct extent_state, rb_node); |
1489 | 1550 | ||
@@ -1503,6 +1564,10 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
1503 | bitset = 0; | 1564 | bitset = 0; |
1504 | break; | 1565 | break; |
1505 | } | 1566 | } |
1567 | |||
1568 | if (state->end == (u64)-1) | ||
1569 | break; | ||
1570 | |||
1506 | start = state->end + 1; | 1571 | start = state->end + 1; |
1507 | if (start > end) | 1572 | if (start > end) |
1508 | break; | 1573 | break; |
@@ -1526,7 +1591,7 @@ static int check_page_uptodate(struct extent_io_tree *tree, | |||
1526 | { | 1591 | { |
1527 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | 1592 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; |
1528 | u64 end = start + PAGE_CACHE_SIZE - 1; | 1593 | u64 end = start + PAGE_CACHE_SIZE - 1; |
1529 | if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) | 1594 | if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL)) |
1530 | SetPageUptodate(page); | 1595 | SetPageUptodate(page); |
1531 | return 0; | 1596 | return 0; |
1532 | } | 1597 | } |
@@ -1540,7 +1605,7 @@ static int check_page_locked(struct extent_io_tree *tree, | |||
1540 | { | 1605 | { |
1541 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | 1606 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; |
1542 | u64 end = start + PAGE_CACHE_SIZE - 1; | 1607 | u64 end = start + PAGE_CACHE_SIZE - 1; |
1543 | if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) | 1608 | if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) |
1544 | unlock_page(page); | 1609 | unlock_page(page); |
1545 | return 0; | 1610 | return 0; |
1546 | } | 1611 | } |
@@ -1552,10 +1617,7 @@ static int check_page_locked(struct extent_io_tree *tree, | |||
1552 | static int check_page_writeback(struct extent_io_tree *tree, | 1617 | static int check_page_writeback(struct extent_io_tree *tree, |
1553 | struct page *page) | 1618 | struct page *page) |
1554 | { | 1619 | { |
1555 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | 1620 | end_page_writeback(page); |
1556 | u64 end = start + PAGE_CACHE_SIZE - 1; | ||
1557 | if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0)) | ||
1558 | end_page_writeback(page); | ||
1559 | return 0; | 1621 | return 0; |
1560 | } | 1622 | } |
1561 | 1623 | ||
@@ -1613,13 +1675,11 @@ static void end_bio_extent_writepage(struct bio *bio, int err) | |||
1613 | } | 1675 | } |
1614 | 1676 | ||
1615 | if (!uptodate) { | 1677 | if (!uptodate) { |
1616 | clear_extent_uptodate(tree, start, end, GFP_ATOMIC); | 1678 | clear_extent_uptodate(tree, start, end, GFP_NOFS); |
1617 | ClearPageUptodate(page); | 1679 | ClearPageUptodate(page); |
1618 | SetPageError(page); | 1680 | SetPageError(page); |
1619 | } | 1681 | } |
1620 | 1682 | ||
1621 | clear_extent_writeback(tree, start, end, GFP_ATOMIC); | ||
1622 | |||
1623 | if (whole_page) | 1683 | if (whole_page) |
1624 | end_page_writeback(page); | 1684 | end_page_writeback(page); |
1625 | else | 1685 | else |
@@ -1983,7 +2043,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
1983 | continue; | 2043 | continue; |
1984 | } | 2044 | } |
1985 | /* the get_extent function already copied into the page */ | 2045 | /* the get_extent function already copied into the page */ |
1986 | if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { | 2046 | if (test_range_bit(tree, cur, cur_end, |
2047 | EXTENT_UPTODATE, 1, NULL)) { | ||
1987 | check_page_uptodate(tree, page); | 2048 | check_page_uptodate(tree, page); |
1988 | unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); | 2049 | unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); |
1989 | cur = cur + iosize; | 2050 | cur = cur + iosize; |
@@ -2078,6 +2139,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2078 | u64 iosize; | 2139 | u64 iosize; |
2079 | u64 unlock_start; | 2140 | u64 unlock_start; |
2080 | sector_t sector; | 2141 | sector_t sector; |
2142 | struct extent_state *cached_state = NULL; | ||
2081 | struct extent_map *em; | 2143 | struct extent_map *em; |
2082 | struct block_device *bdev; | 2144 | struct block_device *bdev; |
2083 | int ret; | 2145 | int ret; |
@@ -2124,6 +2186,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2124 | delalloc_end = 0; | 2186 | delalloc_end = 0; |
2125 | page_started = 0; | 2187 | page_started = 0; |
2126 | if (!epd->extent_locked) { | 2188 | if (!epd->extent_locked) { |
2189 | u64 delalloc_to_write = 0; | ||
2127 | /* | 2190 | /* |
2128 | * make sure the wbc mapping index is at least updated | 2191 | * make sure the wbc mapping index is at least updated |
2129 | * to this page. | 2192 | * to this page. |
@@ -2143,8 +2206,24 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2143 | tree->ops->fill_delalloc(inode, page, delalloc_start, | 2206 | tree->ops->fill_delalloc(inode, page, delalloc_start, |
2144 | delalloc_end, &page_started, | 2207 | delalloc_end, &page_started, |
2145 | &nr_written); | 2208 | &nr_written); |
2209 | /* | ||
2210 | * delalloc_end is already one less than the total | ||
2211 | * length, so we don't subtract one from | ||
2212 | * PAGE_CACHE_SIZE | ||
2213 | */ | ||
2214 | delalloc_to_write += (delalloc_end - delalloc_start + | ||
2215 | PAGE_CACHE_SIZE) >> | ||
2216 | PAGE_CACHE_SHIFT; | ||
2146 | delalloc_start = delalloc_end + 1; | 2217 | delalloc_start = delalloc_end + 1; |
2147 | } | 2218 | } |
2219 | if (wbc->nr_to_write < delalloc_to_write) { | ||
2220 | int thresh = 8192; | ||
2221 | |||
2222 | if (delalloc_to_write < thresh * 2) | ||
2223 | thresh = delalloc_to_write; | ||
2224 | wbc->nr_to_write = min_t(u64, delalloc_to_write, | ||
2225 | thresh); | ||
2226 | } | ||
2148 | 2227 | ||
2149 | /* did the fill delalloc function already unlock and start | 2228 | /* did the fill delalloc function already unlock and start |
2150 | * the IO? | 2229 | * the IO? |
@@ -2160,15 +2239,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2160 | goto done_unlocked; | 2239 | goto done_unlocked; |
2161 | } | 2240 | } |
2162 | } | 2241 | } |
2163 | lock_extent(tree, start, page_end, GFP_NOFS); | ||
2164 | |||
2165 | unlock_start = start; | ||
2166 | |||
2167 | if (tree->ops && tree->ops->writepage_start_hook) { | 2242 | if (tree->ops && tree->ops->writepage_start_hook) { |
2168 | ret = tree->ops->writepage_start_hook(page, start, | 2243 | ret = tree->ops->writepage_start_hook(page, start, |
2169 | page_end); | 2244 | page_end); |
2170 | if (ret == -EAGAIN) { | 2245 | if (ret == -EAGAIN) { |
2171 | unlock_extent(tree, start, page_end, GFP_NOFS); | ||
2172 | redirty_page_for_writepage(wbc, page); | 2246 | redirty_page_for_writepage(wbc, page); |
2173 | update_nr_written(page, wbc, nr_written); | 2247 | update_nr_written(page, wbc, nr_written); |
2174 | unlock_page(page); | 2248 | unlock_page(page); |
@@ -2184,12 +2258,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2184 | update_nr_written(page, wbc, nr_written + 1); | 2258 | update_nr_written(page, wbc, nr_written + 1); |
2185 | 2259 | ||
2186 | end = page_end; | 2260 | end = page_end; |
2187 | if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) | ||
2188 | printk(KERN_ERR "btrfs delalloc bits after lock_extent\n"); | ||
2189 | |||
2190 | if (last_byte <= start) { | 2261 | if (last_byte <= start) { |
2191 | clear_extent_dirty(tree, start, page_end, GFP_NOFS); | ||
2192 | unlock_extent(tree, start, page_end, GFP_NOFS); | ||
2193 | if (tree->ops && tree->ops->writepage_end_io_hook) | 2262 | if (tree->ops && tree->ops->writepage_end_io_hook) |
2194 | tree->ops->writepage_end_io_hook(page, start, | 2263 | tree->ops->writepage_end_io_hook(page, start, |
2195 | page_end, NULL, 1); | 2264 | page_end, NULL, 1); |
@@ -2197,13 +2266,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2197 | goto done; | 2266 | goto done; |
2198 | } | 2267 | } |
2199 | 2268 | ||
2200 | set_extent_uptodate(tree, start, page_end, GFP_NOFS); | ||
2201 | blocksize = inode->i_sb->s_blocksize; | 2269 | blocksize = inode->i_sb->s_blocksize; |
2202 | 2270 | ||
2203 | while (cur <= end) { | 2271 | while (cur <= end) { |
2204 | if (cur >= last_byte) { | 2272 | if (cur >= last_byte) { |
2205 | clear_extent_dirty(tree, cur, page_end, GFP_NOFS); | ||
2206 | unlock_extent(tree, unlock_start, page_end, GFP_NOFS); | ||
2207 | if (tree->ops && tree->ops->writepage_end_io_hook) | 2273 | if (tree->ops && tree->ops->writepage_end_io_hook) |
2208 | tree->ops->writepage_end_io_hook(page, cur, | 2274 | tree->ops->writepage_end_io_hook(page, cur, |
2209 | page_end, NULL, 1); | 2275 | page_end, NULL, 1); |
@@ -2235,12 +2301,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2235 | */ | 2301 | */ |
2236 | if (compressed || block_start == EXTENT_MAP_HOLE || | 2302 | if (compressed || block_start == EXTENT_MAP_HOLE || |
2237 | block_start == EXTENT_MAP_INLINE) { | 2303 | block_start == EXTENT_MAP_INLINE) { |
2238 | clear_extent_dirty(tree, cur, | ||
2239 | cur + iosize - 1, GFP_NOFS); | ||
2240 | |||
2241 | unlock_extent(tree, unlock_start, cur + iosize - 1, | ||
2242 | GFP_NOFS); | ||
2243 | |||
2244 | /* | 2304 | /* |
2245 | * end_io notification does not happen here for | 2305 | * end_io notification does not happen here for |
2246 | * compressed extents | 2306 | * compressed extents |
@@ -2265,13 +2325,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2265 | } | 2325 | } |
2266 | /* leave this out until we have a page_mkwrite call */ | 2326 | /* leave this out until we have a page_mkwrite call */ |
2267 | if (0 && !test_range_bit(tree, cur, cur + iosize - 1, | 2327 | if (0 && !test_range_bit(tree, cur, cur + iosize - 1, |
2268 | EXTENT_DIRTY, 0)) { | 2328 | EXTENT_DIRTY, 0, NULL)) { |
2269 | cur = cur + iosize; | 2329 | cur = cur + iosize; |
2270 | pg_offset += iosize; | 2330 | pg_offset += iosize; |
2271 | continue; | 2331 | continue; |
2272 | } | 2332 | } |
2273 | 2333 | ||
2274 | clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); | ||
2275 | if (tree->ops && tree->ops->writepage_io_hook) { | 2334 | if (tree->ops && tree->ops->writepage_io_hook) { |
2276 | ret = tree->ops->writepage_io_hook(page, cur, | 2335 | ret = tree->ops->writepage_io_hook(page, cur, |
2277 | cur + iosize - 1); | 2336 | cur + iosize - 1); |
@@ -2309,12 +2368,12 @@ done: | |||
2309 | set_page_writeback(page); | 2368 | set_page_writeback(page); |
2310 | end_page_writeback(page); | 2369 | end_page_writeback(page); |
2311 | } | 2370 | } |
2312 | if (unlock_start <= page_end) | ||
2313 | unlock_extent(tree, unlock_start, page_end, GFP_NOFS); | ||
2314 | unlock_page(page); | 2371 | unlock_page(page); |
2315 | 2372 | ||
2316 | done_unlocked: | 2373 | done_unlocked: |
2317 | 2374 | ||
2375 | /* drop our reference on any cached states */ | ||
2376 | free_extent_state(cached_state); | ||
2318 | return 0; | 2377 | return 0; |
2319 | } | 2378 | } |
2320 | 2379 | ||
@@ -2339,9 +2398,9 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, | |||
2339 | writepage_t writepage, void *data, | 2398 | writepage_t writepage, void *data, |
2340 | void (*flush_fn)(void *)) | 2399 | void (*flush_fn)(void *)) |
2341 | { | 2400 | { |
2342 | struct backing_dev_info *bdi = mapping->backing_dev_info; | ||
2343 | int ret = 0; | 2401 | int ret = 0; |
2344 | int done = 0; | 2402 | int done = 0; |
2403 | int nr_to_write_done = 0; | ||
2345 | struct pagevec pvec; | 2404 | struct pagevec pvec; |
2346 | int nr_pages; | 2405 | int nr_pages; |
2347 | pgoff_t index; | 2406 | pgoff_t index; |
@@ -2361,7 +2420,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, | |||
2361 | scanned = 1; | 2420 | scanned = 1; |
2362 | } | 2421 | } |
2363 | retry: | 2422 | retry: |
2364 | while (!done && (index <= end) && | 2423 | while (!done && !nr_to_write_done && (index <= end) && |
2365 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | 2424 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, |
2366 | PAGECACHE_TAG_DIRTY, min(end - index, | 2425 | PAGECACHE_TAG_DIRTY, min(end - index, |
2367 | (pgoff_t)PAGEVEC_SIZE-1) + 1))) { | 2426 | (pgoff_t)PAGEVEC_SIZE-1) + 1))) { |
@@ -2412,12 +2471,15 @@ retry: | |||
2412 | unlock_page(page); | 2471 | unlock_page(page); |
2413 | ret = 0; | 2472 | ret = 0; |
2414 | } | 2473 | } |
2415 | if (ret || wbc->nr_to_write <= 0) | 2474 | if (ret) |
2416 | done = 1; | ||
2417 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | ||
2418 | wbc->encountered_congestion = 1; | ||
2419 | done = 1; | 2475 | done = 1; |
2420 | } | 2476 | |
2477 | /* | ||
2478 | * the filesystem may choose to bump up nr_to_write. | ||
2479 | * We have to make sure to honor the new nr_to_write | ||
2480 | * at any time | ||
2481 | */ | ||
2482 | nr_to_write_done = wbc->nr_to_write <= 0; | ||
2421 | } | 2483 | } |
2422 | pagevec_release(&pvec); | 2484 | pagevec_release(&pvec); |
2423 | cond_resched(); | 2485 | cond_resched(); |
@@ -2604,10 +2666,10 @@ int extent_invalidatepage(struct extent_io_tree *tree, | |||
2604 | return 0; | 2666 | return 0; |
2605 | 2667 | ||
2606 | lock_extent(tree, start, end, GFP_NOFS); | 2668 | lock_extent(tree, start, end, GFP_NOFS); |
2607 | wait_on_extent_writeback(tree, start, end); | 2669 | wait_on_page_writeback(page); |
2608 | clear_extent_bit(tree, start, end, | 2670 | clear_extent_bit(tree, start, end, |
2609 | EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, | 2671 | EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, |
2610 | 1, 1, GFP_NOFS); | 2672 | 1, 1, NULL, GFP_NOFS); |
2611 | return 0; | 2673 | return 0; |
2612 | } | 2674 | } |
2613 | 2675 | ||
@@ -2687,7 +2749,7 @@ int extent_prepare_write(struct extent_io_tree *tree, | |||
2687 | !isnew && !PageUptodate(page) && | 2749 | !isnew && !PageUptodate(page) && |
2688 | (block_off_end > to || block_off_start < from) && | 2750 | (block_off_end > to || block_off_start < from) && |
2689 | !test_range_bit(tree, block_start, cur_end, | 2751 | !test_range_bit(tree, block_start, cur_end, |
2690 | EXTENT_UPTODATE, 1)) { | 2752 | EXTENT_UPTODATE, 1, NULL)) { |
2691 | u64 sector; | 2753 | u64 sector; |
2692 | u64 extent_offset = block_start - em->start; | 2754 | u64 extent_offset = block_start - em->start; |
2693 | size_t iosize; | 2755 | size_t iosize; |
@@ -2701,7 +2763,7 @@ int extent_prepare_write(struct extent_io_tree *tree, | |||
2701 | */ | 2763 | */ |
2702 | set_extent_bit(tree, block_start, | 2764 | set_extent_bit(tree, block_start, |
2703 | block_start + iosize - 1, | 2765 | block_start + iosize - 1, |
2704 | EXTENT_LOCKED, 0, NULL, GFP_NOFS); | 2766 | EXTENT_LOCKED, 0, NULL, NULL, GFP_NOFS); |
2705 | ret = submit_extent_page(READ, tree, page, | 2767 | ret = submit_extent_page(READ, tree, page, |
2706 | sector, iosize, page_offset, em->bdev, | 2768 | sector, iosize, page_offset, em->bdev, |
2707 | NULL, 1, | 2769 | NULL, 1, |
@@ -2742,13 +2804,18 @@ int try_release_extent_state(struct extent_map_tree *map, | |||
2742 | int ret = 1; | 2804 | int ret = 1; |
2743 | 2805 | ||
2744 | if (test_range_bit(tree, start, end, | 2806 | if (test_range_bit(tree, start, end, |
2745 | EXTENT_IOBITS | EXTENT_ORDERED, 0)) | 2807 | EXTENT_IOBITS, 0, NULL)) |
2746 | ret = 0; | 2808 | ret = 0; |
2747 | else { | 2809 | else { |
2748 | if ((mask & GFP_NOFS) == GFP_NOFS) | 2810 | if ((mask & GFP_NOFS) == GFP_NOFS) |
2749 | mask = GFP_NOFS; | 2811 | mask = GFP_NOFS; |
2750 | clear_extent_bit(tree, start, end, EXTENT_UPTODATE, | 2812 | /* |
2751 | 1, 1, mask); | 2813 | * at this point we can safely clear everything except the |
2814 | * locked bit and the nodatasum bit | ||
2815 | */ | ||
2816 | clear_extent_bit(tree, start, end, | ||
2817 | ~(EXTENT_LOCKED | EXTENT_NODATASUM), | ||
2818 | 0, 0, NULL, mask); | ||
2752 | } | 2819 | } |
2753 | return ret; | 2820 | return ret; |
2754 | } | 2821 | } |
@@ -2771,29 +2838,28 @@ int try_release_extent_mapping(struct extent_map_tree *map, | |||
2771 | u64 len; | 2838 | u64 len; |
2772 | while (start <= end) { | 2839 | while (start <= end) { |
2773 | len = end - start + 1; | 2840 | len = end - start + 1; |
2774 | spin_lock(&map->lock); | 2841 | write_lock(&map->lock); |
2775 | em = lookup_extent_mapping(map, start, len); | 2842 | em = lookup_extent_mapping(map, start, len); |
2776 | if (!em || IS_ERR(em)) { | 2843 | if (!em || IS_ERR(em)) { |
2777 | spin_unlock(&map->lock); | 2844 | write_unlock(&map->lock); |
2778 | break; | 2845 | break; |
2779 | } | 2846 | } |
2780 | if (test_bit(EXTENT_FLAG_PINNED, &em->flags) || | 2847 | if (test_bit(EXTENT_FLAG_PINNED, &em->flags) || |
2781 | em->start != start) { | 2848 | em->start != start) { |
2782 | spin_unlock(&map->lock); | 2849 | write_unlock(&map->lock); |
2783 | free_extent_map(em); | 2850 | free_extent_map(em); |
2784 | break; | 2851 | break; |
2785 | } | 2852 | } |
2786 | if (!test_range_bit(tree, em->start, | 2853 | if (!test_range_bit(tree, em->start, |
2787 | extent_map_end(em) - 1, | 2854 | extent_map_end(em) - 1, |
2788 | EXTENT_LOCKED | EXTENT_WRITEBACK | | 2855 | EXTENT_LOCKED | EXTENT_WRITEBACK, |
2789 | EXTENT_ORDERED, | 2856 | 0, NULL)) { |
2790 | 0)) { | ||
2791 | remove_extent_mapping(map, em); | 2857 | remove_extent_mapping(map, em); |
2792 | /* once for the rb tree */ | 2858 | /* once for the rb tree */ |
2793 | free_extent_map(em); | 2859 | free_extent_map(em); |
2794 | } | 2860 | } |
2795 | start = extent_map_end(em); | 2861 | start = extent_map_end(em); |
2796 | spin_unlock(&map->lock); | 2862 | write_unlock(&map->lock); |
2797 | 2863 | ||
2798 | /* once for us */ | 2864 | /* once for us */ |
2799 | free_extent_map(em); | 2865 | free_extent_map(em); |
@@ -3203,7 +3269,7 @@ int extent_range_uptodate(struct extent_io_tree *tree, | |||
3203 | int uptodate; | 3269 | int uptodate; |
3204 | unsigned long index; | 3270 | unsigned long index; |
3205 | 3271 | ||
3206 | ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1); | 3272 | ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL); |
3207 | if (ret) | 3273 | if (ret) |
3208 | return 1; | 3274 | return 1; |
3209 | while (start <= end) { | 3275 | while (start <= end) { |
@@ -3233,7 +3299,7 @@ int extent_buffer_uptodate(struct extent_io_tree *tree, | |||
3233 | return 1; | 3299 | return 1; |
3234 | 3300 | ||
3235 | ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, | 3301 | ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, |
3236 | EXTENT_UPTODATE, 1); | 3302 | EXTENT_UPTODATE, 1, NULL); |
3237 | if (ret) | 3303 | if (ret) |
3238 | return ret; | 3304 | return ret; |
3239 | 3305 | ||
@@ -3269,7 +3335,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
3269 | return 0; | 3335 | return 0; |
3270 | 3336 | ||
3271 | if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, | 3337 | if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, |
3272 | EXTENT_UPTODATE, 1)) { | 3338 | EXTENT_UPTODATE, 1, NULL)) { |
3273 | return 0; | 3339 | return 0; |
3274 | } | 3340 | } |
3275 | 3341 | ||
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 5bc20abf3f3d..14ed16fd862d 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -13,10 +13,8 @@ | |||
13 | #define EXTENT_DEFRAG (1 << 6) | 13 | #define EXTENT_DEFRAG (1 << 6) |
14 | #define EXTENT_DEFRAG_DONE (1 << 7) | 14 | #define EXTENT_DEFRAG_DONE (1 << 7) |
15 | #define EXTENT_BUFFER_FILLED (1 << 8) | 15 | #define EXTENT_BUFFER_FILLED (1 << 8) |
16 | #define EXTENT_ORDERED (1 << 9) | 16 | #define EXTENT_BOUNDARY (1 << 9) |
17 | #define EXTENT_ORDERED_METADATA (1 << 10) | 17 | #define EXTENT_NODATASUM (1 << 10) |
18 | #define EXTENT_BOUNDARY (1 << 11) | ||
19 | #define EXTENT_NODATASUM (1 << 12) | ||
20 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) | 18 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) |
21 | 19 | ||
22 | /* flags for bio submission */ | 20 | /* flags for bio submission */ |
@@ -142,6 +140,8 @@ int try_release_extent_state(struct extent_map_tree *map, | |||
142 | struct extent_io_tree *tree, struct page *page, | 140 | struct extent_io_tree *tree, struct page *page, |
143 | gfp_t mask); | 141 | gfp_t mask); |
144 | int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); | 142 | int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); |
143 | int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | ||
144 | int bits, struct extent_state **cached, gfp_t mask); | ||
145 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); | 145 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); |
146 | int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, | 146 | int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, |
147 | gfp_t mask); | 147 | gfp_t mask); |
@@ -155,11 +155,12 @@ u64 count_range_bits(struct extent_io_tree *tree, | |||
155 | u64 max_bytes, unsigned long bits); | 155 | u64 max_bytes, unsigned long bits); |
156 | 156 | ||
157 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | 157 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, |
158 | int bits, int filled); | 158 | int bits, int filled, struct extent_state *cached_state); |
159 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 159 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
160 | int bits, gfp_t mask); | 160 | int bits, gfp_t mask); |
161 | int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 161 | int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
162 | int bits, int wake, int delete, gfp_t mask); | 162 | int bits, int wake, int delete, struct extent_state **cached, |
163 | gfp_t mask); | ||
163 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 164 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
164 | int bits, gfp_t mask); | 165 | int bits, gfp_t mask); |
165 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | 166 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, |
@@ -282,5 +283,6 @@ int extent_clear_unlock_delalloc(struct inode *inode, | |||
282 | int clear_unlock, | 283 | int clear_unlock, |
283 | int clear_delalloc, int clear_dirty, | 284 | int clear_delalloc, int clear_dirty, |
284 | int set_writeback, | 285 | int set_writeback, |
285 | int end_writeback); | 286 | int end_writeback, |
287 | int set_private2); | ||
286 | #endif | 288 | #endif |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 30c9365861e6..2c726b7b9faa 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
@@ -36,7 +36,7 @@ void extent_map_exit(void) | |||
36 | void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) | 36 | void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) |
37 | { | 37 | { |
38 | tree->map.rb_node = NULL; | 38 | tree->map.rb_node = NULL; |
39 | spin_lock_init(&tree->lock); | 39 | rwlock_init(&tree->lock); |
40 | } | 40 | } |
41 | 41 | ||
42 | /** | 42 | /** |
@@ -198,6 +198,56 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next) | |||
198 | return 0; | 198 | return 0; |
199 | } | 199 | } |
200 | 200 | ||
201 | int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) | ||
202 | { | ||
203 | int ret = 0; | ||
204 | struct extent_map *merge = NULL; | ||
205 | struct rb_node *rb; | ||
206 | struct extent_map *em; | ||
207 | |||
208 | write_lock(&tree->lock); | ||
209 | em = lookup_extent_mapping(tree, start, len); | ||
210 | |||
211 | WARN_ON(em->start != start || !em); | ||
212 | |||
213 | if (!em) | ||
214 | goto out; | ||
215 | |||
216 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
217 | |||
218 | if (em->start != 0) { | ||
219 | rb = rb_prev(&em->rb_node); | ||
220 | if (rb) | ||
221 | merge = rb_entry(rb, struct extent_map, rb_node); | ||
222 | if (rb && mergable_maps(merge, em)) { | ||
223 | em->start = merge->start; | ||
224 | em->len += merge->len; | ||
225 | em->block_len += merge->block_len; | ||
226 | em->block_start = merge->block_start; | ||
227 | merge->in_tree = 0; | ||
228 | rb_erase(&merge->rb_node, &tree->map); | ||
229 | free_extent_map(merge); | ||
230 | } | ||
231 | } | ||
232 | |||
233 | rb = rb_next(&em->rb_node); | ||
234 | if (rb) | ||
235 | merge = rb_entry(rb, struct extent_map, rb_node); | ||
236 | if (rb && mergable_maps(em, merge)) { | ||
237 | em->len += merge->len; | ||
238 | em->block_len += merge->len; | ||
239 | rb_erase(&merge->rb_node, &tree->map); | ||
240 | merge->in_tree = 0; | ||
241 | free_extent_map(merge); | ||
242 | } | ||
243 | |||
244 | free_extent_map(em); | ||
245 | out: | ||
246 | write_unlock(&tree->lock); | ||
247 | return ret; | ||
248 | |||
249 | } | ||
250 | |||
201 | /** | 251 | /** |
202 | * add_extent_mapping - add new extent map to the extent tree | 252 | * add_extent_mapping - add new extent map to the extent tree |
203 | * @tree: tree to insert new map in | 253 | * @tree: tree to insert new map in |
@@ -222,7 +272,6 @@ int add_extent_mapping(struct extent_map_tree *tree, | |||
222 | ret = -EEXIST; | 272 | ret = -EEXIST; |
223 | goto out; | 273 | goto out; |
224 | } | 274 | } |
225 | assert_spin_locked(&tree->lock); | ||
226 | rb = tree_insert(&tree->map, em->start, &em->rb_node); | 275 | rb = tree_insert(&tree->map, em->start, &em->rb_node); |
227 | if (rb) { | 276 | if (rb) { |
228 | ret = -EEXIST; | 277 | ret = -EEXIST; |
@@ -285,7 +334,6 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, | |||
285 | struct rb_node *next = NULL; | 334 | struct rb_node *next = NULL; |
286 | u64 end = range_end(start, len); | 335 | u64 end = range_end(start, len); |
287 | 336 | ||
288 | assert_spin_locked(&tree->lock); | ||
289 | rb_node = __tree_search(&tree->map, start, &prev, &next); | 337 | rb_node = __tree_search(&tree->map, start, &prev, &next); |
290 | if (!rb_node && prev) { | 338 | if (!rb_node && prev) { |
291 | em = rb_entry(prev, struct extent_map, rb_node); | 339 | em = rb_entry(prev, struct extent_map, rb_node); |
@@ -319,6 +367,54 @@ out: | |||
319 | } | 367 | } |
320 | 368 | ||
321 | /** | 369 | /** |
370 | * search_extent_mapping - find a nearby extent map | ||
371 | * @tree: tree to lookup in | ||
372 | * @start: byte offset to start the search | ||
373 | * @len: length of the lookup range | ||
374 | * | ||
375 | * Find and return the first extent_map struct in @tree that intersects the | ||
376 | * [start, len] range. | ||
377 | * | ||
378 | * If one can't be found, any nearby extent may be returned | ||
379 | */ | ||
380 | struct extent_map *search_extent_mapping(struct extent_map_tree *tree, | ||
381 | u64 start, u64 len) | ||
382 | { | ||
383 | struct extent_map *em; | ||
384 | struct rb_node *rb_node; | ||
385 | struct rb_node *prev = NULL; | ||
386 | struct rb_node *next = NULL; | ||
387 | |||
388 | rb_node = __tree_search(&tree->map, start, &prev, &next); | ||
389 | if (!rb_node && prev) { | ||
390 | em = rb_entry(prev, struct extent_map, rb_node); | ||
391 | goto found; | ||
392 | } | ||
393 | if (!rb_node && next) { | ||
394 | em = rb_entry(next, struct extent_map, rb_node); | ||
395 | goto found; | ||
396 | } | ||
397 | if (!rb_node) { | ||
398 | em = NULL; | ||
399 | goto out; | ||
400 | } | ||
401 | if (IS_ERR(rb_node)) { | ||
402 | em = ERR_PTR(PTR_ERR(rb_node)); | ||
403 | goto out; | ||
404 | } | ||
405 | em = rb_entry(rb_node, struct extent_map, rb_node); | ||
406 | goto found; | ||
407 | |||
408 | em = NULL; | ||
409 | goto out; | ||
410 | |||
411 | found: | ||
412 | atomic_inc(&em->refs); | ||
413 | out: | ||
414 | return em; | ||
415 | } | ||
416 | |||
417 | /** | ||
322 | * remove_extent_mapping - removes an extent_map from the extent tree | 418 | * remove_extent_mapping - removes an extent_map from the extent tree |
323 | * @tree: extent tree to remove from | 419 | * @tree: extent tree to remove from |
324 | * @em: extent map beeing removed | 420 | * @em: extent map beeing removed |
@@ -331,7 +427,6 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) | |||
331 | int ret = 0; | 427 | int ret = 0; |
332 | 428 | ||
333 | WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags)); | 429 | WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags)); |
334 | assert_spin_locked(&tree->lock); | ||
335 | rb_erase(&em->rb_node, &tree->map); | 430 | rb_erase(&em->rb_node, &tree->map); |
336 | em->in_tree = 0; | 431 | em->in_tree = 0; |
337 | return ret; | 432 | return ret; |
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index fb6eeef06bb0..ab6d74b6e647 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h | |||
@@ -31,7 +31,7 @@ struct extent_map { | |||
31 | 31 | ||
32 | struct extent_map_tree { | 32 | struct extent_map_tree { |
33 | struct rb_root map; | 33 | struct rb_root map; |
34 | spinlock_t lock; | 34 | rwlock_t lock; |
35 | }; | 35 | }; |
36 | 36 | ||
37 | static inline u64 extent_map_end(struct extent_map *em) | 37 | static inline u64 extent_map_end(struct extent_map *em) |
@@ -59,4 +59,7 @@ struct extent_map *alloc_extent_map(gfp_t mask); | |||
59 | void free_extent_map(struct extent_map *em); | 59 | void free_extent_map(struct extent_map *em); |
60 | int __init extent_map_init(void); | 60 | int __init extent_map_init(void); |
61 | void extent_map_exit(void); | 61 | void extent_map_exit(void); |
62 | int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len); | ||
63 | struct extent_map *search_extent_mapping(struct extent_map_tree *tree, | ||
64 | u64 start, u64 len); | ||
62 | #endif | 65 | #endif |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 4b833972273a..571ad3c13b47 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -112,8 +112,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
112 | int err = 0; | 112 | int err = 0; |
113 | int i; | 113 | int i; |
114 | struct inode *inode = fdentry(file)->d_inode; | 114 | struct inode *inode = fdentry(file)->d_inode; |
115 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
116 | u64 hint_byte; | ||
117 | u64 num_bytes; | 115 | u64 num_bytes; |
118 | u64 start_pos; | 116 | u64 start_pos; |
119 | u64 end_of_last_block; | 117 | u64 end_of_last_block; |
@@ -125,22 +123,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
125 | root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | 123 | root->sectorsize - 1) & ~((u64)root->sectorsize - 1); |
126 | 124 | ||
127 | end_of_last_block = start_pos + num_bytes - 1; | 125 | end_of_last_block = start_pos + num_bytes - 1; |
128 | |||
129 | lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); | ||
130 | trans = btrfs_join_transaction(root, 1); | ||
131 | if (!trans) { | ||
132 | err = -ENOMEM; | ||
133 | goto out_unlock; | ||
134 | } | ||
135 | btrfs_set_trans_block_group(trans, inode); | ||
136 | hint_byte = 0; | ||
137 | |||
138 | set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS); | ||
139 | |||
140 | /* check for reserved extents on each page, we don't want | ||
141 | * to reset the delalloc bit on things that already have | ||
142 | * extents reserved. | ||
143 | */ | ||
144 | btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); | 126 | btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); |
145 | for (i = 0; i < num_pages; i++) { | 127 | for (i = 0; i < num_pages; i++) { |
146 | struct page *p = pages[i]; | 128 | struct page *p = pages[i]; |
@@ -155,9 +137,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
155 | * at this time. | 137 | * at this time. |
156 | */ | 138 | */ |
157 | } | 139 | } |
158 | err = btrfs_end_transaction(trans, root); | ||
159 | out_unlock: | ||
160 | unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); | ||
161 | return err; | 140 | return err; |
162 | } | 141 | } |
163 | 142 | ||
@@ -189,18 +168,18 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
189 | if (!split2) | 168 | if (!split2) |
190 | split2 = alloc_extent_map(GFP_NOFS); | 169 | split2 = alloc_extent_map(GFP_NOFS); |
191 | 170 | ||
192 | spin_lock(&em_tree->lock); | 171 | write_lock(&em_tree->lock); |
193 | em = lookup_extent_mapping(em_tree, start, len); | 172 | em = lookup_extent_mapping(em_tree, start, len); |
194 | if (!em) { | 173 | if (!em) { |
195 | spin_unlock(&em_tree->lock); | 174 | write_unlock(&em_tree->lock); |
196 | break; | 175 | break; |
197 | } | 176 | } |
198 | flags = em->flags; | 177 | flags = em->flags; |
199 | if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { | 178 | if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { |
200 | spin_unlock(&em_tree->lock); | ||
201 | if (em->start <= start && | 179 | if (em->start <= start && |
202 | (!testend || em->start + em->len >= start + len)) { | 180 | (!testend || em->start + em->len >= start + len)) { |
203 | free_extent_map(em); | 181 | free_extent_map(em); |
182 | write_unlock(&em_tree->lock); | ||
204 | break; | 183 | break; |
205 | } | 184 | } |
206 | if (start < em->start) { | 185 | if (start < em->start) { |
@@ -210,6 +189,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
210 | start = em->start + em->len; | 189 | start = em->start + em->len; |
211 | } | 190 | } |
212 | free_extent_map(em); | 191 | free_extent_map(em); |
192 | write_unlock(&em_tree->lock); | ||
213 | continue; | 193 | continue; |
214 | } | 194 | } |
215 | compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 195 | compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
@@ -260,7 +240,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
260 | free_extent_map(split); | 240 | free_extent_map(split); |
261 | split = NULL; | 241 | split = NULL; |
262 | } | 242 | } |
263 | spin_unlock(&em_tree->lock); | 243 | write_unlock(&em_tree->lock); |
264 | 244 | ||
265 | /* once for us */ | 245 | /* once for us */ |
266 | free_extent_map(em); | 246 | free_extent_map(em); |
@@ -289,7 +269,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
289 | noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, | 269 | noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, |
290 | struct btrfs_root *root, struct inode *inode, | 270 | struct btrfs_root *root, struct inode *inode, |
291 | u64 start, u64 end, u64 locked_end, | 271 | u64 start, u64 end, u64 locked_end, |
292 | u64 inline_limit, u64 *hint_byte) | 272 | u64 inline_limit, u64 *hint_byte, int drop_cache) |
293 | { | 273 | { |
294 | u64 extent_end = 0; | 274 | u64 extent_end = 0; |
295 | u64 search_start = start; | 275 | u64 search_start = start; |
@@ -314,7 +294,8 @@ noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, | |||
314 | int ret; | 294 | int ret; |
315 | 295 | ||
316 | inline_limit = 0; | 296 | inline_limit = 0; |
317 | btrfs_drop_extent_cache(inode, start, end - 1, 0); | 297 | if (drop_cache) |
298 | btrfs_drop_extent_cache(inode, start, end - 1, 0); | ||
318 | 299 | ||
319 | path = btrfs_alloc_path(); | 300 | path = btrfs_alloc_path(); |
320 | if (!path) | 301 | if (!path) |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 5edcee3a617f..5c2caad76212 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -259,7 +259,9 @@ static int link_free_space(struct btrfs_block_group_cache *block_group, | |||
259 | 259 | ||
260 | static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) | 260 | static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) |
261 | { | 261 | { |
262 | u64 max_bytes, possible_bytes; | 262 | u64 max_bytes; |
263 | u64 bitmap_bytes; | ||
264 | u64 extent_bytes; | ||
263 | 265 | ||
264 | /* | 266 | /* |
265 | * The goal is to keep the total amount of memory used per 1gb of space | 267 | * The goal is to keep the total amount of memory used per 1gb of space |
@@ -269,22 +271,27 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) | |||
269 | max_bytes = MAX_CACHE_BYTES_PER_GIG * | 271 | max_bytes = MAX_CACHE_BYTES_PER_GIG * |
270 | (div64_u64(block_group->key.offset, 1024 * 1024 * 1024)); | 272 | (div64_u64(block_group->key.offset, 1024 * 1024 * 1024)); |
271 | 273 | ||
272 | possible_bytes = (block_group->total_bitmaps * PAGE_CACHE_SIZE) + | 274 | /* |
273 | (sizeof(struct btrfs_free_space) * | 275 | * we want to account for 1 more bitmap than what we have so we can make |
274 | block_group->extents_thresh); | 276 | * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as |
277 | * we add more bitmaps. | ||
278 | */ | ||
279 | bitmap_bytes = (block_group->total_bitmaps + 1) * PAGE_CACHE_SIZE; | ||
275 | 280 | ||
276 | if (possible_bytes > max_bytes) { | 281 | if (bitmap_bytes >= max_bytes) { |
277 | int extent_bytes = max_bytes - | 282 | block_group->extents_thresh = 0; |
278 | (block_group->total_bitmaps * PAGE_CACHE_SIZE); | 283 | return; |
284 | } | ||
279 | 285 | ||
280 | if (extent_bytes <= 0) { | 286 | /* |
281 | block_group->extents_thresh = 0; | 287 | * we want the extent entry threshold to always be at most 1/2 the maxw |
282 | return; | 288 | * bytes we can have, or whatever is less than that. |
283 | } | 289 | */ |
290 | extent_bytes = max_bytes - bitmap_bytes; | ||
291 | extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2)); | ||
284 | 292 | ||
285 | block_group->extents_thresh = extent_bytes / | 293 | block_group->extents_thresh = |
286 | (sizeof(struct btrfs_free_space)); | 294 | div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); |
287 | } | ||
288 | } | 295 | } |
289 | 296 | ||
290 | static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group, | 297 | static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group, |
@@ -403,6 +410,7 @@ static void add_new_bitmap(struct btrfs_block_group_cache *block_group, | |||
403 | BUG_ON(block_group->total_bitmaps >= max_bitmaps); | 410 | BUG_ON(block_group->total_bitmaps >= max_bitmaps); |
404 | 411 | ||
405 | info->offset = offset_to_bitmap(block_group, offset); | 412 | info->offset = offset_to_bitmap(block_group, offset); |
413 | info->bytes = 0; | ||
406 | link_free_space(block_group, info); | 414 | link_free_space(block_group, info); |
407 | block_group->total_bitmaps++; | 415 | block_group->total_bitmaps++; |
408 | 416 | ||
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 6b627c611808..72ce3c173d6a 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c | |||
@@ -149,6 +149,8 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, | |||
149 | ptr = (unsigned long)(ref + 1); | 149 | ptr = (unsigned long)(ref + 1); |
150 | ret = 0; | 150 | ret = 0; |
151 | } else if (ret < 0) { | 151 | } else if (ret < 0) { |
152 | if (ret == -EOVERFLOW) | ||
153 | ret = -EMLINK; | ||
152 | goto out; | 154 | goto out; |
153 | } else { | 155 | } else { |
154 | ref = btrfs_item_ptr(path->nodes[0], path->slots[0], | 156 | ref = btrfs_item_ptr(path->nodes[0], path->slots[0], |
@@ -177,8 +179,6 @@ int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, | |||
177 | 179 | ||
178 | ret = btrfs_insert_empty_item(trans, root, path, &key, | 180 | ret = btrfs_insert_empty_item(trans, root, path, &key, |
179 | sizeof(struct btrfs_inode_item)); | 181 | sizeof(struct btrfs_inode_item)); |
180 | if (ret == 0 && objectid > root->highest_inode) | ||
181 | root->highest_inode = objectid; | ||
182 | return ret; | 182 | return ret; |
183 | } | 183 | } |
184 | 184 | ||
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 9abbced1123d..c56eb5909172 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c | |||
@@ -43,9 +43,10 @@ int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid) | |||
43 | slot = path->slots[0] - 1; | 43 | slot = path->slots[0] - 1; |
44 | l = path->nodes[0]; | 44 | l = path->nodes[0]; |
45 | btrfs_item_key_to_cpu(l, &found_key, slot); | 45 | btrfs_item_key_to_cpu(l, &found_key, slot); |
46 | *objectid = found_key.objectid; | 46 | *objectid = max_t(u64, found_key.objectid, |
47 | BTRFS_FIRST_FREE_OBJECTID - 1); | ||
47 | } else { | 48 | } else { |
48 | *objectid = BTRFS_FIRST_FREE_OBJECTID; | 49 | *objectid = BTRFS_FIRST_FREE_OBJECTID - 1; |
49 | } | 50 | } |
50 | ret = 0; | 51 | ret = 0; |
51 | error: | 52 | error: |
@@ -53,91 +54,27 @@ error: | |||
53 | return ret; | 54 | return ret; |
54 | } | 55 | } |
55 | 56 | ||
56 | /* | ||
57 | * walks the btree of allocated inodes and find a hole. | ||
58 | */ | ||
59 | int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, | 57 | int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, |
60 | struct btrfs_root *root, | 58 | struct btrfs_root *root, |
61 | u64 dirid, u64 *objectid) | 59 | u64 dirid, u64 *objectid) |
62 | { | 60 | { |
63 | struct btrfs_path *path; | ||
64 | struct btrfs_key key; | ||
65 | int ret; | 61 | int ret; |
66 | int slot = 0; | ||
67 | u64 last_ino = 0; | ||
68 | int start_found; | ||
69 | struct extent_buffer *l; | ||
70 | struct btrfs_key search_key; | ||
71 | u64 search_start = dirid; | ||
72 | |||
73 | mutex_lock(&root->objectid_mutex); | 62 | mutex_lock(&root->objectid_mutex); |
74 | if (root->last_inode_alloc >= BTRFS_FIRST_FREE_OBJECTID && | ||
75 | root->last_inode_alloc < BTRFS_LAST_FREE_OBJECTID) { | ||
76 | *objectid = ++root->last_inode_alloc; | ||
77 | mutex_unlock(&root->objectid_mutex); | ||
78 | return 0; | ||
79 | } | ||
80 | path = btrfs_alloc_path(); | ||
81 | BUG_ON(!path); | ||
82 | search_start = max(search_start, (u64)BTRFS_FIRST_FREE_OBJECTID); | ||
83 | search_key.objectid = search_start; | ||
84 | search_key.type = 0; | ||
85 | search_key.offset = 0; | ||
86 | |||
87 | start_found = 0; | ||
88 | ret = btrfs_search_slot(trans, root, &search_key, path, 0, 0); | ||
89 | if (ret < 0) | ||
90 | goto error; | ||
91 | 63 | ||
92 | while (1) { | 64 | if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) { |
93 | l = path->nodes[0]; | 65 | ret = btrfs_find_highest_inode(root, &root->highest_objectid); |
94 | slot = path->slots[0]; | 66 | if (ret) |
95 | if (slot >= btrfs_header_nritems(l)) { | 67 | goto out; |
96 | ret = btrfs_next_leaf(root, path); | 68 | } |
97 | if (ret == 0) | ||
98 | continue; | ||
99 | if (ret < 0) | ||
100 | goto error; | ||
101 | if (!start_found) { | ||
102 | *objectid = search_start; | ||
103 | start_found = 1; | ||
104 | goto found; | ||
105 | } | ||
106 | *objectid = last_ino > search_start ? | ||
107 | last_ino : search_start; | ||
108 | goto found; | ||
109 | } | ||
110 | btrfs_item_key_to_cpu(l, &key, slot); | ||
111 | if (key.objectid >= search_start) { | ||
112 | if (start_found) { | ||
113 | if (last_ino < search_start) | ||
114 | last_ino = search_start; | ||
115 | if (key.objectid > last_ino) { | ||
116 | *objectid = last_ino; | ||
117 | goto found; | ||
118 | } | ||
119 | } else if (key.objectid > search_start) { | ||
120 | *objectid = search_start; | ||
121 | goto found; | ||
122 | } | ||
123 | } | ||
124 | if (key.objectid >= BTRFS_LAST_FREE_OBJECTID) | ||
125 | break; | ||
126 | 69 | ||
127 | start_found = 1; | 70 | if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) { |
128 | last_ino = key.objectid + 1; | 71 | ret = -ENOSPC; |
129 | path->slots[0]++; | 72 | goto out; |
130 | } | 73 | } |
131 | BUG_ON(1); | 74 | |
132 | found: | 75 | *objectid = ++root->highest_objectid; |
133 | btrfs_release_path(root, path); | 76 | ret = 0; |
134 | btrfs_free_path(path); | 77 | out: |
135 | BUG_ON(*objectid < search_start); | ||
136 | mutex_unlock(&root->objectid_mutex); | ||
137 | return 0; | ||
138 | error: | ||
139 | btrfs_release_path(root, path); | ||
140 | btrfs_free_path(path); | ||
141 | mutex_unlock(&root->objectid_mutex); | 78 | mutex_unlock(&root->objectid_mutex); |
142 | return ret; | 79 | return ret; |
143 | } | 80 | } |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9096fd0ca3ca..e9b76bcd1c12 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -231,7 +231,8 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
231 | } | 231 | } |
232 | 232 | ||
233 | ret = btrfs_drop_extents(trans, root, inode, start, | 233 | ret = btrfs_drop_extents(trans, root, inode, start, |
234 | aligned_end, aligned_end, start, &hint_byte); | 234 | aligned_end, aligned_end, start, |
235 | &hint_byte, 1); | ||
235 | BUG_ON(ret); | 236 | BUG_ON(ret); |
236 | 237 | ||
237 | if (isize > actual_end) | 238 | if (isize > actual_end) |
@@ -240,7 +241,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
240 | inline_len, compressed_size, | 241 | inline_len, compressed_size, |
241 | compressed_pages); | 242 | compressed_pages); |
242 | BUG_ON(ret); | 243 | BUG_ON(ret); |
243 | btrfs_drop_extent_cache(inode, start, aligned_end, 0); | 244 | btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); |
244 | return 0; | 245 | return 0; |
245 | } | 246 | } |
246 | 247 | ||
@@ -425,7 +426,7 @@ again: | |||
425 | extent_clear_unlock_delalloc(inode, | 426 | extent_clear_unlock_delalloc(inode, |
426 | &BTRFS_I(inode)->io_tree, | 427 | &BTRFS_I(inode)->io_tree, |
427 | start, end, NULL, 1, 0, | 428 | start, end, NULL, 1, 0, |
428 | 0, 1, 1, 1); | 429 | 0, 1, 1, 1, 0); |
429 | ret = 0; | 430 | ret = 0; |
430 | goto free_pages_out; | 431 | goto free_pages_out; |
431 | } | 432 | } |
@@ -611,9 +612,9 @@ static noinline int submit_compressed_extents(struct inode *inode, | |||
611 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 612 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
612 | 613 | ||
613 | while (1) { | 614 | while (1) { |
614 | spin_lock(&em_tree->lock); | 615 | write_lock(&em_tree->lock); |
615 | ret = add_extent_mapping(em_tree, em); | 616 | ret = add_extent_mapping(em_tree, em); |
616 | spin_unlock(&em_tree->lock); | 617 | write_unlock(&em_tree->lock); |
617 | if (ret != -EEXIST) { | 618 | if (ret != -EEXIST) { |
618 | free_extent_map(em); | 619 | free_extent_map(em); |
619 | break; | 620 | break; |
@@ -640,7 +641,7 @@ static noinline int submit_compressed_extents(struct inode *inode, | |||
640 | async_extent->start, | 641 | async_extent->start, |
641 | async_extent->start + | 642 | async_extent->start + |
642 | async_extent->ram_size - 1, | 643 | async_extent->ram_size - 1, |
643 | NULL, 1, 1, 0, 1, 1, 0); | 644 | NULL, 1, 1, 0, 1, 1, 0, 0); |
644 | 645 | ||
645 | ret = btrfs_submit_compressed_write(inode, | 646 | ret = btrfs_submit_compressed_write(inode, |
646 | async_extent->start, | 647 | async_extent->start, |
@@ -713,7 +714,7 @@ static noinline int cow_file_range(struct inode *inode, | |||
713 | extent_clear_unlock_delalloc(inode, | 714 | extent_clear_unlock_delalloc(inode, |
714 | &BTRFS_I(inode)->io_tree, | 715 | &BTRFS_I(inode)->io_tree, |
715 | start, end, NULL, 1, 1, | 716 | start, end, NULL, 1, 1, |
716 | 1, 1, 1, 1); | 717 | 1, 1, 1, 1, 0); |
717 | *nr_written = *nr_written + | 718 | *nr_written = *nr_written + |
718 | (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; | 719 | (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; |
719 | *page_started = 1; | 720 | *page_started = 1; |
@@ -725,6 +726,15 @@ static noinline int cow_file_range(struct inode *inode, | |||
725 | BUG_ON(disk_num_bytes > | 726 | BUG_ON(disk_num_bytes > |
726 | btrfs_super_total_bytes(&root->fs_info->super_copy)); | 727 | btrfs_super_total_bytes(&root->fs_info->super_copy)); |
727 | 728 | ||
729 | |||
730 | read_lock(&BTRFS_I(inode)->extent_tree.lock); | ||
731 | em = search_extent_mapping(&BTRFS_I(inode)->extent_tree, | ||
732 | start, num_bytes); | ||
733 | if (em) { | ||
734 | alloc_hint = em->block_start; | ||
735 | free_extent_map(em); | ||
736 | } | ||
737 | read_unlock(&BTRFS_I(inode)->extent_tree.lock); | ||
728 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); | 738 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); |
729 | 739 | ||
730 | while (disk_num_bytes > 0) { | 740 | while (disk_num_bytes > 0) { |
@@ -737,7 +747,6 @@ static noinline int cow_file_range(struct inode *inode, | |||
737 | em = alloc_extent_map(GFP_NOFS); | 747 | em = alloc_extent_map(GFP_NOFS); |
738 | em->start = start; | 748 | em->start = start; |
739 | em->orig_start = em->start; | 749 | em->orig_start = em->start; |
740 | |||
741 | ram_size = ins.offset; | 750 | ram_size = ins.offset; |
742 | em->len = ins.offset; | 751 | em->len = ins.offset; |
743 | 752 | ||
@@ -747,9 +756,9 @@ static noinline int cow_file_range(struct inode *inode, | |||
747 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 756 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
748 | 757 | ||
749 | while (1) { | 758 | while (1) { |
750 | spin_lock(&em_tree->lock); | 759 | write_lock(&em_tree->lock); |
751 | ret = add_extent_mapping(em_tree, em); | 760 | ret = add_extent_mapping(em_tree, em); |
752 | spin_unlock(&em_tree->lock); | 761 | write_unlock(&em_tree->lock); |
753 | if (ret != -EEXIST) { | 762 | if (ret != -EEXIST) { |
754 | free_extent_map(em); | 763 | free_extent_map(em); |
755 | break; | 764 | break; |
@@ -776,11 +785,14 @@ static noinline int cow_file_range(struct inode *inode, | |||
776 | /* we're not doing compressed IO, don't unlock the first | 785 | /* we're not doing compressed IO, don't unlock the first |
777 | * page (which the caller expects to stay locked), don't | 786 | * page (which the caller expects to stay locked), don't |
778 | * clear any dirty bits and don't set any writeback bits | 787 | * clear any dirty bits and don't set any writeback bits |
788 | * | ||
789 | * Do set the Private2 bit so we know this page was properly | ||
790 | * setup for writepage | ||
779 | */ | 791 | */ |
780 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, | 792 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, |
781 | start, start + ram_size - 1, | 793 | start, start + ram_size - 1, |
782 | locked_page, unlock, 1, | 794 | locked_page, unlock, 1, |
783 | 1, 0, 0, 0); | 795 | 1, 0, 0, 0, 1); |
784 | disk_num_bytes -= cur_alloc_size; | 796 | disk_num_bytes -= cur_alloc_size; |
785 | num_bytes -= cur_alloc_size; | 797 | num_bytes -= cur_alloc_size; |
786 | alloc_hint = ins.objectid + ins.offset; | 798 | alloc_hint = ins.objectid + ins.offset; |
@@ -853,7 +865,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, | |||
853 | int limit = 10 * 1024 * 1042; | 865 | int limit = 10 * 1024 * 1042; |
854 | 866 | ||
855 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED | | 867 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED | |
856 | EXTENT_DELALLOC, 1, 0, GFP_NOFS); | 868 | EXTENT_DELALLOC, 1, 0, NULL, GFP_NOFS); |
857 | while (start < end) { | 869 | while (start < end) { |
858 | async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); | 870 | async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); |
859 | async_cow->inode = inode; | 871 | async_cow->inode = inode; |
@@ -1080,9 +1092,9 @@ out_check: | |||
1080 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 1092 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
1081 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 1093 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
1082 | while (1) { | 1094 | while (1) { |
1083 | spin_lock(&em_tree->lock); | 1095 | write_lock(&em_tree->lock); |
1084 | ret = add_extent_mapping(em_tree, em); | 1096 | ret = add_extent_mapping(em_tree, em); |
1085 | spin_unlock(&em_tree->lock); | 1097 | write_unlock(&em_tree->lock); |
1086 | if (ret != -EEXIST) { | 1098 | if (ret != -EEXIST) { |
1087 | free_extent_map(em); | 1099 | free_extent_map(em); |
1088 | break; | 1100 | break; |
@@ -1101,7 +1113,7 @@ out_check: | |||
1101 | 1113 | ||
1102 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, | 1114 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, |
1103 | cur_offset, cur_offset + num_bytes - 1, | 1115 | cur_offset, cur_offset + num_bytes - 1, |
1104 | locked_page, 1, 1, 1, 0, 0, 0); | 1116 | locked_page, 1, 1, 1, 0, 0, 0, 1); |
1105 | cur_offset = extent_end; | 1117 | cur_offset = extent_end; |
1106 | if (cur_offset > end) | 1118 | if (cur_offset > end) |
1107 | break; | 1119 | break; |
@@ -1374,10 +1386,8 @@ again: | |||
1374 | lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); | 1386 | lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); |
1375 | 1387 | ||
1376 | /* already ordered? We're done */ | 1388 | /* already ordered? We're done */ |
1377 | if (test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, | 1389 | if (PagePrivate2(page)) |
1378 | EXTENT_ORDERED, 0)) { | ||
1379 | goto out; | 1390 | goto out; |
1380 | } | ||
1381 | 1391 | ||
1382 | ordered = btrfs_lookup_ordered_extent(inode, page_start); | 1392 | ordered = btrfs_lookup_ordered_extent(inode, page_start); |
1383 | if (ordered) { | 1393 | if (ordered) { |
@@ -1413,11 +1423,9 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) | |||
1413 | struct inode *inode = page->mapping->host; | 1423 | struct inode *inode = page->mapping->host; |
1414 | struct btrfs_writepage_fixup *fixup; | 1424 | struct btrfs_writepage_fixup *fixup; |
1415 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1425 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1416 | int ret; | ||
1417 | 1426 | ||
1418 | ret = test_range_bit(&BTRFS_I(inode)->io_tree, start, end, | 1427 | /* this page is properly in the ordered list */ |
1419 | EXTENT_ORDERED, 0); | 1428 | if (TestClearPagePrivate2(page)) |
1420 | if (ret) | ||
1421 | return 0; | 1429 | return 0; |
1422 | 1430 | ||
1423 | if (PageChecked(page)) | 1431 | if (PageChecked(page)) |
@@ -1455,9 +1463,19 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
1455 | BUG_ON(!path); | 1463 | BUG_ON(!path); |
1456 | 1464 | ||
1457 | path->leave_spinning = 1; | 1465 | path->leave_spinning = 1; |
1466 | |||
1467 | /* | ||
1468 | * we may be replacing one extent in the tree with another. | ||
1469 | * The new extent is pinned in the extent map, and we don't want | ||
1470 | * to drop it from the cache until it is completely in the btree. | ||
1471 | * | ||
1472 | * So, tell btrfs_drop_extents to leave this extent in the cache. | ||
1473 | * the caller is expected to unpin it and allow it to be merged | ||
1474 | * with the others. | ||
1475 | */ | ||
1458 | ret = btrfs_drop_extents(trans, root, inode, file_pos, | 1476 | ret = btrfs_drop_extents(trans, root, inode, file_pos, |
1459 | file_pos + num_bytes, locked_end, | 1477 | file_pos + num_bytes, locked_end, |
1460 | file_pos, &hint); | 1478 | file_pos, &hint, 0); |
1461 | BUG_ON(ret); | 1479 | BUG_ON(ret); |
1462 | 1480 | ||
1463 | ins.objectid = inode->i_ino; | 1481 | ins.objectid = inode->i_ino; |
@@ -1485,7 +1503,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
1485 | btrfs_mark_buffer_dirty(leaf); | 1503 | btrfs_mark_buffer_dirty(leaf); |
1486 | 1504 | ||
1487 | inode_add_bytes(inode, num_bytes); | 1505 | inode_add_bytes(inode, num_bytes); |
1488 | btrfs_drop_extent_cache(inode, file_pos, file_pos + num_bytes - 1, 0); | ||
1489 | 1506 | ||
1490 | ins.objectid = disk_bytenr; | 1507 | ins.objectid = disk_bytenr; |
1491 | ins.offset = disk_num_bytes; | 1508 | ins.offset = disk_num_bytes; |
@@ -1596,6 +1613,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1596 | ordered_extent->len, | 1613 | ordered_extent->len, |
1597 | compressed, 0, 0, | 1614 | compressed, 0, 0, |
1598 | BTRFS_FILE_EXTENT_REG); | 1615 | BTRFS_FILE_EXTENT_REG); |
1616 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, | ||
1617 | ordered_extent->file_offset, | ||
1618 | ordered_extent->len); | ||
1599 | BUG_ON(ret); | 1619 | BUG_ON(ret); |
1600 | } | 1620 | } |
1601 | unlock_extent(io_tree, ordered_extent->file_offset, | 1621 | unlock_extent(io_tree, ordered_extent->file_offset, |
@@ -1623,6 +1643,7 @@ nocow: | |||
1623 | static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | 1643 | static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, |
1624 | struct extent_state *state, int uptodate) | 1644 | struct extent_state *state, int uptodate) |
1625 | { | 1645 | { |
1646 | ClearPagePrivate2(page); | ||
1626 | return btrfs_finish_ordered_io(page->mapping->host, start, end); | 1647 | return btrfs_finish_ordered_io(page->mapping->host, start, end); |
1627 | } | 1648 | } |
1628 | 1649 | ||
@@ -1669,13 +1690,13 @@ static int btrfs_io_failed_hook(struct bio *failed_bio, | |||
1669 | failrec->last_mirror = 0; | 1690 | failrec->last_mirror = 0; |
1670 | failrec->bio_flags = 0; | 1691 | failrec->bio_flags = 0; |
1671 | 1692 | ||
1672 | spin_lock(&em_tree->lock); | 1693 | read_lock(&em_tree->lock); |
1673 | em = lookup_extent_mapping(em_tree, start, failrec->len); | 1694 | em = lookup_extent_mapping(em_tree, start, failrec->len); |
1674 | if (em->start > start || em->start + em->len < start) { | 1695 | if (em->start > start || em->start + em->len < start) { |
1675 | free_extent_map(em); | 1696 | free_extent_map(em); |
1676 | em = NULL; | 1697 | em = NULL; |
1677 | } | 1698 | } |
1678 | spin_unlock(&em_tree->lock); | 1699 | read_unlock(&em_tree->lock); |
1679 | 1700 | ||
1680 | if (!em || IS_ERR(em)) { | 1701 | if (!em || IS_ERR(em)) { |
1681 | kfree(failrec); | 1702 | kfree(failrec); |
@@ -1794,7 +1815,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
1794 | return 0; | 1815 | return 0; |
1795 | 1816 | ||
1796 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && | 1817 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && |
1797 | test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1)) { | 1818 | test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) { |
1798 | clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM, | 1819 | clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM, |
1799 | GFP_NOFS); | 1820 | GFP_NOFS); |
1800 | return 0; | 1821 | return 0; |
@@ -2352,6 +2373,69 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | |||
2352 | return ret; | 2373 | return ret; |
2353 | } | 2374 | } |
2354 | 2375 | ||
2376 | int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, | ||
2377 | struct btrfs_root *root, | ||
2378 | struct inode *dir, u64 objectid, | ||
2379 | const char *name, int name_len) | ||
2380 | { | ||
2381 | struct btrfs_path *path; | ||
2382 | struct extent_buffer *leaf; | ||
2383 | struct btrfs_dir_item *di; | ||
2384 | struct btrfs_key key; | ||
2385 | u64 index; | ||
2386 | int ret; | ||
2387 | |||
2388 | path = btrfs_alloc_path(); | ||
2389 | if (!path) | ||
2390 | return -ENOMEM; | ||
2391 | |||
2392 | di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, | ||
2393 | name, name_len, -1); | ||
2394 | BUG_ON(!di || IS_ERR(di)); | ||
2395 | |||
2396 | leaf = path->nodes[0]; | ||
2397 | btrfs_dir_item_key_to_cpu(leaf, di, &key); | ||
2398 | WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid); | ||
2399 | ret = btrfs_delete_one_dir_name(trans, root, path, di); | ||
2400 | BUG_ON(ret); | ||
2401 | btrfs_release_path(root, path); | ||
2402 | |||
2403 | ret = btrfs_del_root_ref(trans, root->fs_info->tree_root, | ||
2404 | objectid, root->root_key.objectid, | ||
2405 | dir->i_ino, &index, name, name_len); | ||
2406 | if (ret < 0) { | ||
2407 | BUG_ON(ret != -ENOENT); | ||
2408 | di = btrfs_search_dir_index_item(root, path, dir->i_ino, | ||
2409 | name, name_len); | ||
2410 | BUG_ON(!di || IS_ERR(di)); | ||
2411 | |||
2412 | leaf = path->nodes[0]; | ||
2413 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
2414 | btrfs_release_path(root, path); | ||
2415 | index = key.offset; | ||
2416 | } | ||
2417 | |||
2418 | di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, | ||
2419 | index, name, name_len, -1); | ||
2420 | BUG_ON(!di || IS_ERR(di)); | ||
2421 | |||
2422 | leaf = path->nodes[0]; | ||
2423 | btrfs_dir_item_key_to_cpu(leaf, di, &key); | ||
2424 | WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid); | ||
2425 | ret = btrfs_delete_one_dir_name(trans, root, path, di); | ||
2426 | BUG_ON(ret); | ||
2427 | btrfs_release_path(root, path); | ||
2428 | |||
2429 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); | ||
2430 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; | ||
2431 | ret = btrfs_update_inode(trans, root, dir); | ||
2432 | BUG_ON(ret); | ||
2433 | dir->i_sb->s_dirt = 1; | ||
2434 | |||
2435 | btrfs_free_path(path); | ||
2436 | return 0; | ||
2437 | } | ||
2438 | |||
2355 | static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | 2439 | static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) |
2356 | { | 2440 | { |
2357 | struct inode *inode = dentry->d_inode; | 2441 | struct inode *inode = dentry->d_inode; |
@@ -2361,29 +2445,31 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
2361 | struct btrfs_trans_handle *trans; | 2445 | struct btrfs_trans_handle *trans; |
2362 | unsigned long nr = 0; | 2446 | unsigned long nr = 0; |
2363 | 2447 | ||
2364 | /* | ||
2365 | * the FIRST_FREE_OBJECTID check makes sure we don't try to rmdir | ||
2366 | * the root of a subvolume or snapshot | ||
2367 | */ | ||
2368 | if (inode->i_size > BTRFS_EMPTY_DIR_SIZE || | 2448 | if (inode->i_size > BTRFS_EMPTY_DIR_SIZE || |
2369 | inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { | 2449 | inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) |
2370 | return -ENOTEMPTY; | 2450 | return -ENOTEMPTY; |
2371 | } | ||
2372 | 2451 | ||
2373 | trans = btrfs_start_transaction(root, 1); | 2452 | trans = btrfs_start_transaction(root, 1); |
2374 | btrfs_set_trans_block_group(trans, dir); | 2453 | btrfs_set_trans_block_group(trans, dir); |
2375 | 2454 | ||
2455 | if (unlikely(inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { | ||
2456 | err = btrfs_unlink_subvol(trans, root, dir, | ||
2457 | BTRFS_I(inode)->location.objectid, | ||
2458 | dentry->d_name.name, | ||
2459 | dentry->d_name.len); | ||
2460 | goto out; | ||
2461 | } | ||
2462 | |||
2376 | err = btrfs_orphan_add(trans, inode); | 2463 | err = btrfs_orphan_add(trans, inode); |
2377 | if (err) | 2464 | if (err) |
2378 | goto fail_trans; | 2465 | goto out; |
2379 | 2466 | ||
2380 | /* now the directory is empty */ | 2467 | /* now the directory is empty */ |
2381 | err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, | 2468 | err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, |
2382 | dentry->d_name.name, dentry->d_name.len); | 2469 | dentry->d_name.name, dentry->d_name.len); |
2383 | if (!err) | 2470 | if (!err) |
2384 | btrfs_i_size_write(inode, 0); | 2471 | btrfs_i_size_write(inode, 0); |
2385 | 2472 | out: | |
2386 | fail_trans: | ||
2387 | nr = trans->blocks_used; | 2473 | nr = trans->blocks_used; |
2388 | ret = btrfs_end_transaction_throttle(trans, root); | 2474 | ret = btrfs_end_transaction_throttle(trans, root); |
2389 | btrfs_btree_balance_dirty(root, nr); | 2475 | btrfs_btree_balance_dirty(root, nr); |
@@ -2935,7 +3021,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
2935 | cur_offset, | 3021 | cur_offset, |
2936 | cur_offset + hole_size, | 3022 | cur_offset + hole_size, |
2937 | block_end, | 3023 | block_end, |
2938 | cur_offset, &hint_byte); | 3024 | cur_offset, &hint_byte, 1); |
2939 | if (err) | 3025 | if (err) |
2940 | break; | 3026 | break; |
2941 | err = btrfs_insert_file_extent(trans, root, | 3027 | err = btrfs_insert_file_extent(trans, root, |
@@ -3003,6 +3089,11 @@ void btrfs_delete_inode(struct inode *inode) | |||
3003 | } | 3089 | } |
3004 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | 3090 | btrfs_wait_ordered_range(inode, 0, (u64)-1); |
3005 | 3091 | ||
3092 | if (inode->i_nlink > 0) { | ||
3093 | BUG_ON(btrfs_root_refs(&root->root_item) != 0); | ||
3094 | goto no_delete; | ||
3095 | } | ||
3096 | |||
3006 | btrfs_i_size_write(inode, 0); | 3097 | btrfs_i_size_write(inode, 0); |
3007 | trans = btrfs_join_transaction(root, 1); | 3098 | trans = btrfs_join_transaction(root, 1); |
3008 | 3099 | ||
@@ -3070,29 +3161,67 @@ out_err: | |||
3070 | * is kind of like crossing a mount point. | 3161 | * is kind of like crossing a mount point. |
3071 | */ | 3162 | */ |
3072 | static int fixup_tree_root_location(struct btrfs_root *root, | 3163 | static int fixup_tree_root_location(struct btrfs_root *root, |
3073 | struct btrfs_key *location, | 3164 | struct inode *dir, |
3074 | struct btrfs_root **sub_root, | 3165 | struct dentry *dentry, |
3075 | struct dentry *dentry) | 3166 | struct btrfs_key *location, |
3167 | struct btrfs_root **sub_root) | ||
3076 | { | 3168 | { |
3077 | struct btrfs_root_item *ri; | 3169 | struct btrfs_path *path; |
3170 | struct btrfs_root *new_root; | ||
3171 | struct btrfs_root_ref *ref; | ||
3172 | struct extent_buffer *leaf; | ||
3173 | int ret; | ||
3174 | int err = 0; | ||
3078 | 3175 | ||
3079 | if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY) | 3176 | path = btrfs_alloc_path(); |
3080 | return 0; | 3177 | if (!path) { |
3081 | if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) | 3178 | err = -ENOMEM; |
3082 | return 0; | 3179 | goto out; |
3180 | } | ||
3083 | 3181 | ||
3084 | *sub_root = btrfs_read_fs_root(root->fs_info, location, | 3182 | err = -ENOENT; |
3085 | dentry->d_name.name, | 3183 | ret = btrfs_find_root_ref(root->fs_info->tree_root, path, |
3086 | dentry->d_name.len); | 3184 | BTRFS_I(dir)->root->root_key.objectid, |
3087 | if (IS_ERR(*sub_root)) | 3185 | location->objectid); |
3088 | return PTR_ERR(*sub_root); | 3186 | if (ret) { |
3187 | if (ret < 0) | ||
3188 | err = ret; | ||
3189 | goto out; | ||
3190 | } | ||
3089 | 3191 | ||
3090 | ri = &(*sub_root)->root_item; | 3192 | leaf = path->nodes[0]; |
3091 | location->objectid = btrfs_root_dirid(ri); | 3193 | ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); |
3092 | btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); | 3194 | if (btrfs_root_ref_dirid(leaf, ref) != dir->i_ino || |
3093 | location->offset = 0; | 3195 | btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len) |
3196 | goto out; | ||
3094 | 3197 | ||
3095 | return 0; | 3198 | ret = memcmp_extent_buffer(leaf, dentry->d_name.name, |
3199 | (unsigned long)(ref + 1), | ||
3200 | dentry->d_name.len); | ||
3201 | if (ret) | ||
3202 | goto out; | ||
3203 | |||
3204 | btrfs_release_path(root->fs_info->tree_root, path); | ||
3205 | |||
3206 | new_root = btrfs_read_fs_root_no_name(root->fs_info, location); | ||
3207 | if (IS_ERR(new_root)) { | ||
3208 | err = PTR_ERR(new_root); | ||
3209 | goto out; | ||
3210 | } | ||
3211 | |||
3212 | if (btrfs_root_refs(&new_root->root_item) == 0) { | ||
3213 | err = -ENOENT; | ||
3214 | goto out; | ||
3215 | } | ||
3216 | |||
3217 | *sub_root = new_root; | ||
3218 | location->objectid = btrfs_root_dirid(&new_root->root_item); | ||
3219 | location->type = BTRFS_INODE_ITEM_KEY; | ||
3220 | location->offset = 0; | ||
3221 | err = 0; | ||
3222 | out: | ||
3223 | btrfs_free_path(path); | ||
3224 | return err; | ||
3096 | } | 3225 | } |
3097 | 3226 | ||
3098 | static void inode_tree_add(struct inode *inode) | 3227 | static void inode_tree_add(struct inode *inode) |
@@ -3101,11 +3230,13 @@ static void inode_tree_add(struct inode *inode) | |||
3101 | struct btrfs_inode *entry; | 3230 | struct btrfs_inode *entry; |
3102 | struct rb_node **p; | 3231 | struct rb_node **p; |
3103 | struct rb_node *parent; | 3232 | struct rb_node *parent; |
3104 | |||
3105 | again: | 3233 | again: |
3106 | p = &root->inode_tree.rb_node; | 3234 | p = &root->inode_tree.rb_node; |
3107 | parent = NULL; | 3235 | parent = NULL; |
3108 | 3236 | ||
3237 | if (hlist_unhashed(&inode->i_hash)) | ||
3238 | return; | ||
3239 | |||
3109 | spin_lock(&root->inode_lock); | 3240 | spin_lock(&root->inode_lock); |
3110 | while (*p) { | 3241 | while (*p) { |
3111 | parent = *p; | 3242 | parent = *p; |
@@ -3132,13 +3263,87 @@ again: | |||
3132 | static void inode_tree_del(struct inode *inode) | 3263 | static void inode_tree_del(struct inode *inode) |
3133 | { | 3264 | { |
3134 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3265 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3266 | int empty = 0; | ||
3135 | 3267 | ||
3136 | spin_lock(&root->inode_lock); | 3268 | spin_lock(&root->inode_lock); |
3137 | if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) { | 3269 | if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) { |
3138 | rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree); | 3270 | rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree); |
3139 | RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); | 3271 | RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); |
3272 | empty = RB_EMPTY_ROOT(&root->inode_tree); | ||
3140 | } | 3273 | } |
3141 | spin_unlock(&root->inode_lock); | 3274 | spin_unlock(&root->inode_lock); |
3275 | |||
3276 | if (empty && btrfs_root_refs(&root->root_item) == 0) { | ||
3277 | synchronize_srcu(&root->fs_info->subvol_srcu); | ||
3278 | spin_lock(&root->inode_lock); | ||
3279 | empty = RB_EMPTY_ROOT(&root->inode_tree); | ||
3280 | spin_unlock(&root->inode_lock); | ||
3281 | if (empty) | ||
3282 | btrfs_add_dead_root(root); | ||
3283 | } | ||
3284 | } | ||
3285 | |||
3286 | int btrfs_invalidate_inodes(struct btrfs_root *root) | ||
3287 | { | ||
3288 | struct rb_node *node; | ||
3289 | struct rb_node *prev; | ||
3290 | struct btrfs_inode *entry; | ||
3291 | struct inode *inode; | ||
3292 | u64 objectid = 0; | ||
3293 | |||
3294 | WARN_ON(btrfs_root_refs(&root->root_item) != 0); | ||
3295 | |||
3296 | spin_lock(&root->inode_lock); | ||
3297 | again: | ||
3298 | node = root->inode_tree.rb_node; | ||
3299 | prev = NULL; | ||
3300 | while (node) { | ||
3301 | prev = node; | ||
3302 | entry = rb_entry(node, struct btrfs_inode, rb_node); | ||
3303 | |||
3304 | if (objectid < entry->vfs_inode.i_ino) | ||
3305 | node = node->rb_left; | ||
3306 | else if (objectid > entry->vfs_inode.i_ino) | ||
3307 | node = node->rb_right; | ||
3308 | else | ||
3309 | break; | ||
3310 | } | ||
3311 | if (!node) { | ||
3312 | while (prev) { | ||
3313 | entry = rb_entry(prev, struct btrfs_inode, rb_node); | ||
3314 | if (objectid <= entry->vfs_inode.i_ino) { | ||
3315 | node = prev; | ||
3316 | break; | ||
3317 | } | ||
3318 | prev = rb_next(prev); | ||
3319 | } | ||
3320 | } | ||
3321 | while (node) { | ||
3322 | entry = rb_entry(node, struct btrfs_inode, rb_node); | ||
3323 | objectid = entry->vfs_inode.i_ino + 1; | ||
3324 | inode = igrab(&entry->vfs_inode); | ||
3325 | if (inode) { | ||
3326 | spin_unlock(&root->inode_lock); | ||
3327 | if (atomic_read(&inode->i_count) > 1) | ||
3328 | d_prune_aliases(inode); | ||
3329 | /* | ||
3330 | * btrfs_drop_inode will remove it from | ||
3331 | * the inode cache when its usage count | ||
3332 | * hits zero. | ||
3333 | */ | ||
3334 | iput(inode); | ||
3335 | cond_resched(); | ||
3336 | spin_lock(&root->inode_lock); | ||
3337 | goto again; | ||
3338 | } | ||
3339 | |||
3340 | if (cond_resched_lock(&root->inode_lock)) | ||
3341 | goto again; | ||
3342 | |||
3343 | node = rb_next(node); | ||
3344 | } | ||
3345 | spin_unlock(&root->inode_lock); | ||
3346 | return 0; | ||
3142 | } | 3347 | } |
3143 | 3348 | ||
3144 | static noinline void init_btrfs_i(struct inode *inode) | 3349 | static noinline void init_btrfs_i(struct inode *inode) |
@@ -3225,15 +3430,41 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, | |||
3225 | return inode; | 3430 | return inode; |
3226 | } | 3431 | } |
3227 | 3432 | ||
3433 | static struct inode *new_simple_dir(struct super_block *s, | ||
3434 | struct btrfs_key *key, | ||
3435 | struct btrfs_root *root) | ||
3436 | { | ||
3437 | struct inode *inode = new_inode(s); | ||
3438 | |||
3439 | if (!inode) | ||
3440 | return ERR_PTR(-ENOMEM); | ||
3441 | |||
3442 | init_btrfs_i(inode); | ||
3443 | |||
3444 | BTRFS_I(inode)->root = root; | ||
3445 | memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); | ||
3446 | BTRFS_I(inode)->dummy_inode = 1; | ||
3447 | |||
3448 | inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID; | ||
3449 | inode->i_op = &simple_dir_inode_operations; | ||
3450 | inode->i_fop = &simple_dir_operations; | ||
3451 | inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; | ||
3452 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | ||
3453 | |||
3454 | return inode; | ||
3455 | } | ||
3456 | |||
3228 | struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) | 3457 | struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) |
3229 | { | 3458 | { |
3230 | struct inode *inode; | 3459 | struct inode *inode; |
3231 | struct btrfs_inode *bi = BTRFS_I(dir); | 3460 | struct btrfs_root *root = BTRFS_I(dir)->root; |
3232 | struct btrfs_root *root = bi->root; | ||
3233 | struct btrfs_root *sub_root = root; | 3461 | struct btrfs_root *sub_root = root; |
3234 | struct btrfs_key location; | 3462 | struct btrfs_key location; |
3463 | int index; | ||
3235 | int ret; | 3464 | int ret; |
3236 | 3465 | ||
3466 | dentry->d_op = &btrfs_dentry_operations; | ||
3467 | |||
3237 | if (dentry->d_name.len > BTRFS_NAME_LEN) | 3468 | if (dentry->d_name.len > BTRFS_NAME_LEN) |
3238 | return ERR_PTR(-ENAMETOOLONG); | 3469 | return ERR_PTR(-ENAMETOOLONG); |
3239 | 3470 | ||
@@ -3242,29 +3473,50 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) | |||
3242 | if (ret < 0) | 3473 | if (ret < 0) |
3243 | return ERR_PTR(ret); | 3474 | return ERR_PTR(ret); |
3244 | 3475 | ||
3245 | inode = NULL; | 3476 | if (location.objectid == 0) |
3246 | if (location.objectid) { | 3477 | return NULL; |
3247 | ret = fixup_tree_root_location(root, &location, &sub_root, | 3478 | |
3248 | dentry); | 3479 | if (location.type == BTRFS_INODE_ITEM_KEY) { |
3249 | if (ret < 0) | 3480 | inode = btrfs_iget(dir->i_sb, &location, root); |
3250 | return ERR_PTR(ret); | 3481 | return inode; |
3251 | if (ret > 0) | 3482 | } |
3252 | return ERR_PTR(-ENOENT); | 3483 | |
3484 | BUG_ON(location.type != BTRFS_ROOT_ITEM_KEY); | ||
3485 | |||
3486 | index = srcu_read_lock(&root->fs_info->subvol_srcu); | ||
3487 | ret = fixup_tree_root_location(root, dir, dentry, | ||
3488 | &location, &sub_root); | ||
3489 | if (ret < 0) { | ||
3490 | if (ret != -ENOENT) | ||
3491 | inode = ERR_PTR(ret); | ||
3492 | else | ||
3493 | inode = new_simple_dir(dir->i_sb, &location, sub_root); | ||
3494 | } else { | ||
3253 | inode = btrfs_iget(dir->i_sb, &location, sub_root); | 3495 | inode = btrfs_iget(dir->i_sb, &location, sub_root); |
3254 | if (IS_ERR(inode)) | ||
3255 | return ERR_CAST(inode); | ||
3256 | } | 3496 | } |
3497 | srcu_read_unlock(&root->fs_info->subvol_srcu, index); | ||
3498 | |||
3257 | return inode; | 3499 | return inode; |
3258 | } | 3500 | } |
3259 | 3501 | ||
3502 | static int btrfs_dentry_delete(struct dentry *dentry) | ||
3503 | { | ||
3504 | struct btrfs_root *root; | ||
3505 | |||
3506 | if (!dentry->d_inode) | ||
3507 | return 0; | ||
3508 | |||
3509 | root = BTRFS_I(dentry->d_inode)->root; | ||
3510 | if (btrfs_root_refs(&root->root_item) == 0) | ||
3511 | return 1; | ||
3512 | return 0; | ||
3513 | } | ||
3514 | |||
3260 | static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, | 3515 | static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, |
3261 | struct nameidata *nd) | 3516 | struct nameidata *nd) |
3262 | { | 3517 | { |
3263 | struct inode *inode; | 3518 | struct inode *inode; |
3264 | 3519 | ||
3265 | if (dentry->d_name.len > BTRFS_NAME_LEN) | ||
3266 | return ERR_PTR(-ENAMETOOLONG); | ||
3267 | |||
3268 | inode = btrfs_lookup_dentry(dir, dentry); | 3520 | inode = btrfs_lookup_dentry(dir, dentry); |
3269 | if (IS_ERR(inode)) | 3521 | if (IS_ERR(inode)) |
3270 | return ERR_CAST(inode); | 3522 | return ERR_CAST(inode); |
@@ -3603,9 +3855,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
3603 | if (ret != 0) | 3855 | if (ret != 0) |
3604 | goto fail; | 3856 | goto fail; |
3605 | 3857 | ||
3606 | if (objectid > root->highest_inode) | ||
3607 | root->highest_inode = objectid; | ||
3608 | |||
3609 | inode->i_uid = current_fsuid(); | 3858 | inode->i_uid = current_fsuid(); |
3610 | 3859 | ||
3611 | if (dir && (dir->i_mode & S_ISGID)) { | 3860 | if (dir && (dir->i_mode & S_ISGID)) { |
@@ -3673,26 +3922,35 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, | |||
3673 | struct inode *parent_inode, struct inode *inode, | 3922 | struct inode *parent_inode, struct inode *inode, |
3674 | const char *name, int name_len, int add_backref, u64 index) | 3923 | const char *name, int name_len, int add_backref, u64 index) |
3675 | { | 3924 | { |
3676 | int ret; | 3925 | int ret = 0; |
3677 | struct btrfs_key key; | 3926 | struct btrfs_key key; |
3678 | struct btrfs_root *root = BTRFS_I(parent_inode)->root; | 3927 | struct btrfs_root *root = BTRFS_I(parent_inode)->root; |
3679 | 3928 | ||
3680 | key.objectid = inode->i_ino; | 3929 | if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { |
3681 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | 3930 | memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key)); |
3682 | key.offset = 0; | 3931 | } else { |
3932 | key.objectid = inode->i_ino; | ||
3933 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | ||
3934 | key.offset = 0; | ||
3935 | } | ||
3936 | |||
3937 | if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { | ||
3938 | ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, | ||
3939 | key.objectid, root->root_key.objectid, | ||
3940 | parent_inode->i_ino, | ||
3941 | index, name, name_len); | ||
3942 | } else if (add_backref) { | ||
3943 | ret = btrfs_insert_inode_ref(trans, root, | ||
3944 | name, name_len, inode->i_ino, | ||
3945 | parent_inode->i_ino, index); | ||
3946 | } | ||
3683 | 3947 | ||
3684 | ret = btrfs_insert_dir_item(trans, root, name, name_len, | ||
3685 | parent_inode->i_ino, | ||
3686 | &key, btrfs_inode_type(inode), | ||
3687 | index); | ||
3688 | if (ret == 0) { | 3948 | if (ret == 0) { |
3689 | if (add_backref) { | 3949 | ret = btrfs_insert_dir_item(trans, root, name, name_len, |
3690 | ret = btrfs_insert_inode_ref(trans, root, | 3950 | parent_inode->i_ino, &key, |
3691 | name, name_len, | 3951 | btrfs_inode_type(inode), index); |
3692 | inode->i_ino, | 3952 | BUG_ON(ret); |
3693 | parent_inode->i_ino, | 3953 | |
3694 | index); | ||
3695 | } | ||
3696 | btrfs_i_size_write(parent_inode, parent_inode->i_size + | 3954 | btrfs_i_size_write(parent_inode, parent_inode->i_size + |
3697 | name_len * 2); | 3955 | name_len * 2); |
3698 | parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; | 3956 | parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; |
@@ -3875,18 +4133,16 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
3875 | 4133 | ||
3876 | err = btrfs_add_nondir(trans, dentry, inode, 1, index); | 4134 | err = btrfs_add_nondir(trans, dentry, inode, 1, index); |
3877 | 4135 | ||
3878 | if (err) | 4136 | if (err) { |
3879 | drop_inode = 1; | ||
3880 | |||
3881 | btrfs_update_inode_block_group(trans, dir); | ||
3882 | err = btrfs_update_inode(trans, root, inode); | ||
3883 | |||
3884 | if (err) | ||
3885 | drop_inode = 1; | 4137 | drop_inode = 1; |
4138 | } else { | ||
4139 | btrfs_update_inode_block_group(trans, dir); | ||
4140 | err = btrfs_update_inode(trans, root, inode); | ||
4141 | BUG_ON(err); | ||
4142 | btrfs_log_new_name(trans, inode, NULL, dentry->d_parent); | ||
4143 | } | ||
3886 | 4144 | ||
3887 | nr = trans->blocks_used; | 4145 | nr = trans->blocks_used; |
3888 | |||
3889 | btrfs_log_new_name(trans, inode, NULL, dentry->d_parent); | ||
3890 | btrfs_end_transaction_throttle(trans, root); | 4146 | btrfs_end_transaction_throttle(trans, root); |
3891 | fail: | 4147 | fail: |
3892 | if (drop_inode) { | 4148 | if (drop_inode) { |
@@ -4064,11 +4320,11 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, | |||
4064 | int compressed; | 4320 | int compressed; |
4065 | 4321 | ||
4066 | again: | 4322 | again: |
4067 | spin_lock(&em_tree->lock); | 4323 | read_lock(&em_tree->lock); |
4068 | em = lookup_extent_mapping(em_tree, start, len); | 4324 | em = lookup_extent_mapping(em_tree, start, len); |
4069 | if (em) | 4325 | if (em) |
4070 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 4326 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
4071 | spin_unlock(&em_tree->lock); | 4327 | read_unlock(&em_tree->lock); |
4072 | 4328 | ||
4073 | if (em) { | 4329 | if (em) { |
4074 | if (em->start > start || em->start + em->len <= start) | 4330 | if (em->start > start || em->start + em->len <= start) |
@@ -4215,6 +4471,11 @@ again: | |||
4215 | map = kmap(page); | 4471 | map = kmap(page); |
4216 | read_extent_buffer(leaf, map + pg_offset, ptr, | 4472 | read_extent_buffer(leaf, map + pg_offset, ptr, |
4217 | copy_size); | 4473 | copy_size); |
4474 | if (pg_offset + copy_size < PAGE_CACHE_SIZE) { | ||
4475 | memset(map + pg_offset + copy_size, 0, | ||
4476 | PAGE_CACHE_SIZE - pg_offset - | ||
4477 | copy_size); | ||
4478 | } | ||
4218 | kunmap(page); | 4479 | kunmap(page); |
4219 | } | 4480 | } |
4220 | flush_dcache_page(page); | 4481 | flush_dcache_page(page); |
@@ -4259,7 +4520,7 @@ insert: | |||
4259 | } | 4520 | } |
4260 | 4521 | ||
4261 | err = 0; | 4522 | err = 0; |
4262 | spin_lock(&em_tree->lock); | 4523 | write_lock(&em_tree->lock); |
4263 | ret = add_extent_mapping(em_tree, em); | 4524 | ret = add_extent_mapping(em_tree, em); |
4264 | /* it is possible that someone inserted the extent into the tree | 4525 | /* it is possible that someone inserted the extent into the tree |
4265 | * while we had the lock dropped. It is also possible that | 4526 | * while we had the lock dropped. It is also possible that |
@@ -4299,7 +4560,7 @@ insert: | |||
4299 | err = 0; | 4560 | err = 0; |
4300 | } | 4561 | } |
4301 | } | 4562 | } |
4302 | spin_unlock(&em_tree->lock); | 4563 | write_unlock(&em_tree->lock); |
4303 | out: | 4564 | out: |
4304 | if (path) | 4565 | if (path) |
4305 | btrfs_free_path(path); | 4566 | btrfs_free_path(path); |
@@ -4398,13 +4659,21 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) | |||
4398 | u64 page_start = page_offset(page); | 4659 | u64 page_start = page_offset(page); |
4399 | u64 page_end = page_start + PAGE_CACHE_SIZE - 1; | 4660 | u64 page_end = page_start + PAGE_CACHE_SIZE - 1; |
4400 | 4661 | ||
4662 | |||
4663 | /* | ||
4664 | * we have the page locked, so new writeback can't start, | ||
4665 | * and the dirty bit won't be cleared while we are here. | ||
4666 | * | ||
4667 | * Wait for IO on this page so that we can safely clear | ||
4668 | * the PagePrivate2 bit and do ordered accounting | ||
4669 | */ | ||
4401 | wait_on_page_writeback(page); | 4670 | wait_on_page_writeback(page); |
4671 | |||
4402 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 4672 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
4403 | if (offset) { | 4673 | if (offset) { |
4404 | btrfs_releasepage(page, GFP_NOFS); | 4674 | btrfs_releasepage(page, GFP_NOFS); |
4405 | return; | 4675 | return; |
4406 | } | 4676 | } |
4407 | |||
4408 | lock_extent(tree, page_start, page_end, GFP_NOFS); | 4677 | lock_extent(tree, page_start, page_end, GFP_NOFS); |
4409 | ordered = btrfs_lookup_ordered_extent(page->mapping->host, | 4678 | ordered = btrfs_lookup_ordered_extent(page->mapping->host, |
4410 | page_offset(page)); | 4679 | page_offset(page)); |
@@ -4415,16 +4684,21 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) | |||
4415 | */ | 4684 | */ |
4416 | clear_extent_bit(tree, page_start, page_end, | 4685 | clear_extent_bit(tree, page_start, page_end, |
4417 | EXTENT_DIRTY | EXTENT_DELALLOC | | 4686 | EXTENT_DIRTY | EXTENT_DELALLOC | |
4418 | EXTENT_LOCKED, 1, 0, GFP_NOFS); | 4687 | EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS); |
4419 | btrfs_finish_ordered_io(page->mapping->host, | 4688 | /* |
4420 | page_start, page_end); | 4689 | * whoever cleared the private bit is responsible |
4690 | * for the finish_ordered_io | ||
4691 | */ | ||
4692 | if (TestClearPagePrivate2(page)) { | ||
4693 | btrfs_finish_ordered_io(page->mapping->host, | ||
4694 | page_start, page_end); | ||
4695 | } | ||
4421 | btrfs_put_ordered_extent(ordered); | 4696 | btrfs_put_ordered_extent(ordered); |
4422 | lock_extent(tree, page_start, page_end, GFP_NOFS); | 4697 | lock_extent(tree, page_start, page_end, GFP_NOFS); |
4423 | } | 4698 | } |
4424 | clear_extent_bit(tree, page_start, page_end, | 4699 | clear_extent_bit(tree, page_start, page_end, |
4425 | EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | | 4700 | EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, |
4426 | EXTENT_ORDERED, | 4701 | 1, 1, NULL, GFP_NOFS); |
4427 | 1, 1, GFP_NOFS); | ||
4428 | __btrfs_releasepage(page, GFP_NOFS); | 4702 | __btrfs_releasepage(page, GFP_NOFS); |
4429 | 4703 | ||
4430 | ClearPageChecked(page); | 4704 | ClearPageChecked(page); |
@@ -4521,11 +4795,14 @@ again: | |||
4521 | } | 4795 | } |
4522 | ClearPageChecked(page); | 4796 | ClearPageChecked(page); |
4523 | set_page_dirty(page); | 4797 | set_page_dirty(page); |
4798 | SetPageUptodate(page); | ||
4524 | 4799 | ||
4525 | BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; | 4800 | BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; |
4526 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | 4801 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); |
4527 | 4802 | ||
4528 | out_unlock: | 4803 | out_unlock: |
4804 | if (!ret) | ||
4805 | return VM_FAULT_LOCKED; | ||
4529 | unlock_page(page); | 4806 | unlock_page(page); |
4530 | out: | 4807 | out: |
4531 | return ret; | 4808 | return ret; |
@@ -4594,11 +4871,11 @@ out: | |||
4594 | * create a new subvolume directory/inode (helper for the ioctl). | 4871 | * create a new subvolume directory/inode (helper for the ioctl). |
4595 | */ | 4872 | */ |
4596 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | 4873 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, |
4597 | struct btrfs_root *new_root, struct dentry *dentry, | 4874 | struct btrfs_root *new_root, |
4598 | u64 new_dirid, u64 alloc_hint) | 4875 | u64 new_dirid, u64 alloc_hint) |
4599 | { | 4876 | { |
4600 | struct inode *inode; | 4877 | struct inode *inode; |
4601 | int error; | 4878 | int err; |
4602 | u64 index = 0; | 4879 | u64 index = 0; |
4603 | 4880 | ||
4604 | inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, | 4881 | inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, |
@@ -4611,11 +4888,10 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | |||
4611 | inode->i_nlink = 1; | 4888 | inode->i_nlink = 1; |
4612 | btrfs_i_size_write(inode, 0); | 4889 | btrfs_i_size_write(inode, 0); |
4613 | 4890 | ||
4614 | error = btrfs_update_inode(trans, new_root, inode); | 4891 | err = btrfs_update_inode(trans, new_root, inode); |
4615 | if (error) | 4892 | BUG_ON(err); |
4616 | return error; | ||
4617 | 4893 | ||
4618 | d_instantiate(dentry, inode); | 4894 | iput(inode); |
4619 | return 0; | 4895 | return 0; |
4620 | } | 4896 | } |
4621 | 4897 | ||
@@ -4693,6 +4969,16 @@ void btrfs_destroy_inode(struct inode *inode) | |||
4693 | kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); | 4969 | kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); |
4694 | } | 4970 | } |
4695 | 4971 | ||
4972 | void btrfs_drop_inode(struct inode *inode) | ||
4973 | { | ||
4974 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
4975 | |||
4976 | if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0) | ||
4977 | generic_delete_inode(inode); | ||
4978 | else | ||
4979 | generic_drop_inode(inode); | ||
4980 | } | ||
4981 | |||
4696 | static void init_once(void *foo) | 4982 | static void init_once(void *foo) |
4697 | { | 4983 | { |
4698 | struct btrfs_inode *ei = (struct btrfs_inode *) foo; | 4984 | struct btrfs_inode *ei = (struct btrfs_inode *) foo; |
@@ -4761,31 +5047,32 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
4761 | { | 5047 | { |
4762 | struct btrfs_trans_handle *trans; | 5048 | struct btrfs_trans_handle *trans; |
4763 | struct btrfs_root *root = BTRFS_I(old_dir)->root; | 5049 | struct btrfs_root *root = BTRFS_I(old_dir)->root; |
5050 | struct btrfs_root *dest = BTRFS_I(new_dir)->root; | ||
4764 | struct inode *new_inode = new_dentry->d_inode; | 5051 | struct inode *new_inode = new_dentry->d_inode; |
4765 | struct inode *old_inode = old_dentry->d_inode; | 5052 | struct inode *old_inode = old_dentry->d_inode; |
4766 | struct timespec ctime = CURRENT_TIME; | 5053 | struct timespec ctime = CURRENT_TIME; |
4767 | u64 index = 0; | 5054 | u64 index = 0; |
5055 | u64 root_objectid; | ||
4768 | int ret; | 5056 | int ret; |
4769 | 5057 | ||
4770 | /* we're not allowed to rename between subvolumes */ | 5058 | if (new_dir->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) |
4771 | if (BTRFS_I(old_inode)->root->root_key.objectid != | 5059 | return -EPERM; |
4772 | BTRFS_I(new_dir)->root->root_key.objectid) | 5060 | |
5061 | /* we only allow rename subvolume link between subvolumes */ | ||
5062 | if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest) | ||
4773 | return -EXDEV; | 5063 | return -EXDEV; |
4774 | 5064 | ||
4775 | if (S_ISDIR(old_inode->i_mode) && new_inode && | 5065 | if (old_inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID || |
4776 | new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) { | 5066 | (new_inode && new_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) |
4777 | return -ENOTEMPTY; | 5067 | return -ENOTEMPTY; |
4778 | } | ||
4779 | 5068 | ||
4780 | /* to rename a snapshot or subvolume, we need to juggle the | 5069 | if (S_ISDIR(old_inode->i_mode) && new_inode && |
4781 | * backrefs. This isn't coded yet | 5070 | new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) |
4782 | */ | 5071 | return -ENOTEMPTY; |
4783 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | ||
4784 | return -EXDEV; | ||
4785 | 5072 | ||
4786 | ret = btrfs_check_metadata_free_space(root); | 5073 | ret = btrfs_check_metadata_free_space(root); |
4787 | if (ret) | 5074 | if (ret) |
4788 | goto out_unlock; | 5075 | return ret; |
4789 | 5076 | ||
4790 | /* | 5077 | /* |
4791 | * we're using rename to replace one file with another. | 5078 | * we're using rename to replace one file with another. |
@@ -4796,8 +5083,40 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
4796 | old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) | 5083 | old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) |
4797 | filemap_flush(old_inode->i_mapping); | 5084 | filemap_flush(old_inode->i_mapping); |
4798 | 5085 | ||
5086 | /* close the racy window with snapshot create/destroy ioctl */ | ||
5087 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | ||
5088 | down_read(&root->fs_info->subvol_sem); | ||
5089 | |||
4799 | trans = btrfs_start_transaction(root, 1); | 5090 | trans = btrfs_start_transaction(root, 1); |
5091 | btrfs_set_trans_block_group(trans, new_dir); | ||
5092 | |||
5093 | if (dest != root) | ||
5094 | btrfs_record_root_in_trans(trans, dest); | ||
4800 | 5095 | ||
5096 | ret = btrfs_set_inode_index(new_dir, &index); | ||
5097 | if (ret) | ||
5098 | goto out_fail; | ||
5099 | |||
5100 | if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { | ||
5101 | /* force full log commit if subvolume involved. */ | ||
5102 | root->fs_info->last_trans_log_full_commit = trans->transid; | ||
5103 | } else { | ||
5104 | ret = btrfs_insert_inode_ref(trans, dest, | ||
5105 | new_dentry->d_name.name, | ||
5106 | new_dentry->d_name.len, | ||
5107 | old_inode->i_ino, | ||
5108 | new_dir->i_ino, index); | ||
5109 | if (ret) | ||
5110 | goto out_fail; | ||
5111 | /* | ||
5112 | * this is an ugly little race, but the rename is required | ||
5113 | * to make sure that if we crash, the inode is either at the | ||
5114 | * old name or the new one. pinning the log transaction lets | ||
5115 | * us make sure we don't allow a log commit to come in after | ||
5116 | * we unlink the name but before we add the new name back in. | ||
5117 | */ | ||
5118 | btrfs_pin_log_trans(root); | ||
5119 | } | ||
4801 | /* | 5120 | /* |
4802 | * make sure the inode gets flushed if it is replacing | 5121 | * make sure the inode gets flushed if it is replacing |
4803 | * something. | 5122 | * something. |
@@ -4807,18 +5126,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
4807 | btrfs_add_ordered_operation(trans, root, old_inode); | 5126 | btrfs_add_ordered_operation(trans, root, old_inode); |
4808 | } | 5127 | } |
4809 | 5128 | ||
4810 | /* | ||
4811 | * this is an ugly little race, but the rename is required to make | ||
4812 | * sure that if we crash, the inode is either at the old name | ||
4813 | * or the new one. pinning the log transaction lets us make sure | ||
4814 | * we don't allow a log commit to come in after we unlink the | ||
4815 | * name but before we add the new name back in. | ||
4816 | */ | ||
4817 | btrfs_pin_log_trans(root); | ||
4818 | |||
4819 | btrfs_set_trans_block_group(trans, new_dir); | ||
4820 | |||
4821 | btrfs_inc_nlink(old_dentry->d_inode); | ||
4822 | old_dir->i_ctime = old_dir->i_mtime = ctime; | 5129 | old_dir->i_ctime = old_dir->i_mtime = ctime; |
4823 | new_dir->i_ctime = new_dir->i_mtime = ctime; | 5130 | new_dir->i_ctime = new_dir->i_mtime = ctime; |
4824 | old_inode->i_ctime = ctime; | 5131 | old_inode->i_ctime = ctime; |
@@ -4826,47 +5133,58 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
4826 | if (old_dentry->d_parent != new_dentry->d_parent) | 5133 | if (old_dentry->d_parent != new_dentry->d_parent) |
4827 | btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); | 5134 | btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); |
4828 | 5135 | ||
4829 | ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode, | 5136 | if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { |
4830 | old_dentry->d_name.name, | 5137 | root_objectid = BTRFS_I(old_inode)->root->root_key.objectid; |
4831 | old_dentry->d_name.len); | 5138 | ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid, |
4832 | if (ret) | 5139 | old_dentry->d_name.name, |
4833 | goto out_fail; | 5140 | old_dentry->d_name.len); |
5141 | } else { | ||
5142 | btrfs_inc_nlink(old_dentry->d_inode); | ||
5143 | ret = btrfs_unlink_inode(trans, root, old_dir, | ||
5144 | old_dentry->d_inode, | ||
5145 | old_dentry->d_name.name, | ||
5146 | old_dentry->d_name.len); | ||
5147 | } | ||
5148 | BUG_ON(ret); | ||
4834 | 5149 | ||
4835 | if (new_inode) { | 5150 | if (new_inode) { |
4836 | new_inode->i_ctime = CURRENT_TIME; | 5151 | new_inode->i_ctime = CURRENT_TIME; |
4837 | ret = btrfs_unlink_inode(trans, root, new_dir, | 5152 | if (unlikely(new_inode->i_ino == |
4838 | new_dentry->d_inode, | 5153 | BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { |
4839 | new_dentry->d_name.name, | 5154 | root_objectid = BTRFS_I(new_inode)->location.objectid; |
4840 | new_dentry->d_name.len); | 5155 | ret = btrfs_unlink_subvol(trans, dest, new_dir, |
4841 | if (ret) | 5156 | root_objectid, |
4842 | goto out_fail; | 5157 | new_dentry->d_name.name, |
5158 | new_dentry->d_name.len); | ||
5159 | BUG_ON(new_inode->i_nlink == 0); | ||
5160 | } else { | ||
5161 | ret = btrfs_unlink_inode(trans, dest, new_dir, | ||
5162 | new_dentry->d_inode, | ||
5163 | new_dentry->d_name.name, | ||
5164 | new_dentry->d_name.len); | ||
5165 | } | ||
5166 | BUG_ON(ret); | ||
4843 | if (new_inode->i_nlink == 0) { | 5167 | if (new_inode->i_nlink == 0) { |
4844 | ret = btrfs_orphan_add(trans, new_dentry->d_inode); | 5168 | ret = btrfs_orphan_add(trans, new_dentry->d_inode); |
4845 | if (ret) | 5169 | BUG_ON(ret); |
4846 | goto out_fail; | ||
4847 | } | 5170 | } |
4848 | |||
4849 | } | 5171 | } |
4850 | ret = btrfs_set_inode_index(new_dir, &index); | ||
4851 | if (ret) | ||
4852 | goto out_fail; | ||
4853 | 5172 | ||
4854 | ret = btrfs_add_link(trans, new_dentry->d_parent->d_inode, | 5173 | ret = btrfs_add_link(trans, new_dir, old_inode, |
4855 | old_inode, new_dentry->d_name.name, | 5174 | new_dentry->d_name.name, |
4856 | new_dentry->d_name.len, 1, index); | 5175 | new_dentry->d_name.len, 0, index); |
4857 | if (ret) | 5176 | BUG_ON(ret); |
4858 | goto out_fail; | ||
4859 | 5177 | ||
4860 | btrfs_log_new_name(trans, old_inode, old_dir, | 5178 | if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { |
4861 | new_dentry->d_parent); | 5179 | btrfs_log_new_name(trans, old_inode, old_dir, |
5180 | new_dentry->d_parent); | ||
5181 | btrfs_end_log_trans(root); | ||
5182 | } | ||
4862 | out_fail: | 5183 | out_fail: |
4863 | |||
4864 | /* this btrfs_end_log_trans just allows the current | ||
4865 | * log-sub transaction to complete | ||
4866 | */ | ||
4867 | btrfs_end_log_trans(root); | ||
4868 | btrfs_end_transaction_throttle(trans, root); | 5184 | btrfs_end_transaction_throttle(trans, root); |
4869 | out_unlock: | 5185 | |
5186 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | ||
5187 | up_read(&root->fs_info->subvol_sem); | ||
4870 | return ret; | 5188 | return ret; |
4871 | } | 5189 | } |
4872 | 5190 | ||
@@ -5058,6 +5376,8 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans, | |||
5058 | 0, 0, 0, | 5376 | 0, 0, 0, |
5059 | BTRFS_FILE_EXTENT_PREALLOC); | 5377 | BTRFS_FILE_EXTENT_PREALLOC); |
5060 | BUG_ON(ret); | 5378 | BUG_ON(ret); |
5379 | btrfs_drop_extent_cache(inode, cur_offset, | ||
5380 | cur_offset + ins.offset -1, 0); | ||
5061 | num_bytes -= ins.offset; | 5381 | num_bytes -= ins.offset; |
5062 | cur_offset += ins.offset; | 5382 | cur_offset += ins.offset; |
5063 | alloc_hint = ins.objectid + ins.offset; | 5383 | alloc_hint = ins.objectid + ins.offset; |
@@ -5223,6 +5543,7 @@ static const struct inode_operations btrfs_dir_ro_inode_operations = { | |||
5223 | .lookup = btrfs_lookup, | 5543 | .lookup = btrfs_lookup, |
5224 | .permission = btrfs_permission, | 5544 | .permission = btrfs_permission, |
5225 | }; | 5545 | }; |
5546 | |||
5226 | static struct file_operations btrfs_dir_file_operations = { | 5547 | static struct file_operations btrfs_dir_file_operations = { |
5227 | .llseek = generic_file_llseek, | 5548 | .llseek = generic_file_llseek, |
5228 | .read = generic_read_dir, | 5549 | .read = generic_read_dir, |
@@ -5269,6 +5590,7 @@ static const struct address_space_operations btrfs_aops = { | |||
5269 | .invalidatepage = btrfs_invalidatepage, | 5590 | .invalidatepage = btrfs_invalidatepage, |
5270 | .releasepage = btrfs_releasepage, | 5591 | .releasepage = btrfs_releasepage, |
5271 | .set_page_dirty = btrfs_set_page_dirty, | 5592 | .set_page_dirty = btrfs_set_page_dirty, |
5593 | .error_remove_page = generic_error_remove_page, | ||
5272 | }; | 5594 | }; |
5273 | 5595 | ||
5274 | static const struct address_space_operations btrfs_symlink_aops = { | 5596 | static const struct address_space_operations btrfs_symlink_aops = { |
@@ -5309,3 +5631,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = { | |||
5309 | .listxattr = btrfs_listxattr, | 5631 | .listxattr = btrfs_listxattr, |
5310 | .removexattr = btrfs_removexattr, | 5632 | .removexattr = btrfs_removexattr, |
5311 | }; | 5633 | }; |
5634 | |||
5635 | struct dentry_operations btrfs_dentry_operations = { | ||
5636 | .d_delete = btrfs_dentry_delete, | ||
5637 | }; | ||
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index bd88f25889f7..a8577a7f26ab 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -230,8 +230,8 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
230 | struct btrfs_root_item root_item; | 230 | struct btrfs_root_item root_item; |
231 | struct btrfs_inode_item *inode_item; | 231 | struct btrfs_inode_item *inode_item; |
232 | struct extent_buffer *leaf; | 232 | struct extent_buffer *leaf; |
233 | struct btrfs_root *new_root = root; | 233 | struct btrfs_root *new_root; |
234 | struct inode *dir; | 234 | struct inode *dir = dentry->d_parent->d_inode; |
235 | int ret; | 235 | int ret; |
236 | int err; | 236 | int err; |
237 | u64 objectid; | 237 | u64 objectid; |
@@ -241,7 +241,7 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
241 | 241 | ||
242 | ret = btrfs_check_metadata_free_space(root); | 242 | ret = btrfs_check_metadata_free_space(root); |
243 | if (ret) | 243 | if (ret) |
244 | goto fail_commit; | 244 | return ret; |
245 | 245 | ||
246 | trans = btrfs_start_transaction(root, 1); | 246 | trans = btrfs_start_transaction(root, 1); |
247 | BUG_ON(!trans); | 247 | BUG_ON(!trans); |
@@ -304,11 +304,17 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
304 | if (ret) | 304 | if (ret) |
305 | goto fail; | 305 | goto fail; |
306 | 306 | ||
307 | key.offset = (u64)-1; | ||
308 | new_root = btrfs_read_fs_root_no_name(root->fs_info, &key); | ||
309 | BUG_ON(IS_ERR(new_root)); | ||
310 | |||
311 | btrfs_record_root_in_trans(trans, new_root); | ||
312 | |||
313 | ret = btrfs_create_subvol_root(trans, new_root, new_dirid, | ||
314 | BTRFS_I(dir)->block_group); | ||
307 | /* | 315 | /* |
308 | * insert the directory item | 316 | * insert the directory item |
309 | */ | 317 | */ |
310 | key.offset = (u64)-1; | ||
311 | dir = dentry->d_parent->d_inode; | ||
312 | ret = btrfs_set_inode_index(dir, &index); | 318 | ret = btrfs_set_inode_index(dir, &index); |
313 | BUG_ON(ret); | 319 | BUG_ON(ret); |
314 | 320 | ||
@@ -322,44 +328,18 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
322 | ret = btrfs_update_inode(trans, root, dir); | 328 | ret = btrfs_update_inode(trans, root, dir); |
323 | BUG_ON(ret); | 329 | BUG_ON(ret); |
324 | 330 | ||
325 | /* add the backref first */ | ||
326 | ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, | 331 | ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, |
327 | objectid, BTRFS_ROOT_BACKREF_KEY, | 332 | objectid, root->root_key.objectid, |
328 | root->root_key.objectid, | ||
329 | dir->i_ino, index, name, namelen); | 333 | dir->i_ino, index, name, namelen); |
330 | 334 | ||
331 | BUG_ON(ret); | 335 | BUG_ON(ret); |
332 | 336 | ||
333 | /* now add the forward ref */ | 337 | d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); |
334 | ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, | ||
335 | root->root_key.objectid, BTRFS_ROOT_REF_KEY, | ||
336 | objectid, | ||
337 | dir->i_ino, index, name, namelen); | ||
338 | |||
339 | BUG_ON(ret); | ||
340 | |||
341 | ret = btrfs_commit_transaction(trans, root); | ||
342 | if (ret) | ||
343 | goto fail_commit; | ||
344 | |||
345 | new_root = btrfs_read_fs_root_no_name(root->fs_info, &key); | ||
346 | BUG_ON(!new_root); | ||
347 | |||
348 | trans = btrfs_start_transaction(new_root, 1); | ||
349 | BUG_ON(!trans); | ||
350 | |||
351 | ret = btrfs_create_subvol_root(trans, new_root, dentry, new_dirid, | ||
352 | BTRFS_I(dir)->block_group); | ||
353 | if (ret) | ||
354 | goto fail; | ||
355 | |||
356 | fail: | 338 | fail: |
357 | nr = trans->blocks_used; | 339 | nr = trans->blocks_used; |
358 | err = btrfs_commit_transaction(trans, new_root); | 340 | err = btrfs_commit_transaction(trans, root); |
359 | if (err && !ret) | 341 | if (err && !ret) |
360 | ret = err; | 342 | ret = err; |
361 | fail_commit: | ||
362 | btrfs_btree_balance_dirty(root, nr); | ||
363 | return ret; | 343 | return ret; |
364 | } | 344 | } |
365 | 345 | ||
@@ -420,14 +400,15 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child) | |||
420 | * sys_mkdirat and vfs_mkdir, but we only do a single component lookup | 400 | * sys_mkdirat and vfs_mkdir, but we only do a single component lookup |
421 | * inside this filesystem so it's quite a bit simpler. | 401 | * inside this filesystem so it's quite a bit simpler. |
422 | */ | 402 | */ |
423 | static noinline int btrfs_mksubvol(struct path *parent, char *name, | 403 | static noinline int btrfs_mksubvol(struct path *parent, |
424 | int mode, int namelen, | 404 | char *name, int namelen, |
425 | struct btrfs_root *snap_src) | 405 | struct btrfs_root *snap_src) |
426 | { | 406 | { |
407 | struct inode *dir = parent->dentry->d_inode; | ||
427 | struct dentry *dentry; | 408 | struct dentry *dentry; |
428 | int error; | 409 | int error; |
429 | 410 | ||
430 | mutex_lock_nested(&parent->dentry->d_inode->i_mutex, I_MUTEX_PARENT); | 411 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); |
431 | 412 | ||
432 | dentry = lookup_one_len(name, parent->dentry, namelen); | 413 | dentry = lookup_one_len(name, parent->dentry, namelen); |
433 | error = PTR_ERR(dentry); | 414 | error = PTR_ERR(dentry); |
@@ -438,99 +419,39 @@ static noinline int btrfs_mksubvol(struct path *parent, char *name, | |||
438 | if (dentry->d_inode) | 419 | if (dentry->d_inode) |
439 | goto out_dput; | 420 | goto out_dput; |
440 | 421 | ||
441 | if (!IS_POSIXACL(parent->dentry->d_inode)) | ||
442 | mode &= ~current_umask(); | ||
443 | |||
444 | error = mnt_want_write(parent->mnt); | 422 | error = mnt_want_write(parent->mnt); |
445 | if (error) | 423 | if (error) |
446 | goto out_dput; | 424 | goto out_dput; |
447 | 425 | ||
448 | error = btrfs_may_create(parent->dentry->d_inode, dentry); | 426 | error = btrfs_may_create(dir, dentry); |
449 | if (error) | 427 | if (error) |
450 | goto out_drop_write; | 428 | goto out_drop_write; |
451 | 429 | ||
452 | /* | 430 | down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); |
453 | * Actually perform the low-level subvolume creation after all | 431 | |
454 | * this VFS fuzz. | 432 | if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0) |
455 | * | 433 | goto out_up_read; |
456 | * Eventually we want to pass in an inode under which we create this | 434 | |
457 | * subvolume, but for now all are under the filesystem root. | ||
458 | * | ||
459 | * Also we should pass on the mode eventually to allow creating new | ||
460 | * subvolume with specific mode bits. | ||
461 | */ | ||
462 | if (snap_src) { | 435 | if (snap_src) { |
463 | struct dentry *dir = dentry->d_parent; | 436 | error = create_snapshot(snap_src, dentry, |
464 | struct dentry *test = dir->d_parent; | 437 | name, namelen); |
465 | struct btrfs_path *path = btrfs_alloc_path(); | ||
466 | int ret; | ||
467 | u64 test_oid; | ||
468 | u64 parent_oid = BTRFS_I(dir->d_inode)->root->root_key.objectid; | ||
469 | |||
470 | test_oid = snap_src->root_key.objectid; | ||
471 | |||
472 | ret = btrfs_find_root_ref(snap_src->fs_info->tree_root, | ||
473 | path, parent_oid, test_oid); | ||
474 | if (ret == 0) | ||
475 | goto create; | ||
476 | btrfs_release_path(snap_src->fs_info->tree_root, path); | ||
477 | |||
478 | /* we need to make sure we aren't creating a directory loop | ||
479 | * by taking a snapshot of something that has our current | ||
480 | * subvol in its directory tree. So, this loops through | ||
481 | * the dentries and checks the forward refs for each subvolume | ||
482 | * to see if is references the subvolume where we are | ||
483 | * placing this new snapshot. | ||
484 | */ | ||
485 | while (1) { | ||
486 | if (!test || | ||
487 | dir == snap_src->fs_info->sb->s_root || | ||
488 | test == snap_src->fs_info->sb->s_root || | ||
489 | test->d_inode->i_sb != snap_src->fs_info->sb) { | ||
490 | break; | ||
491 | } | ||
492 | if (S_ISLNK(test->d_inode->i_mode)) { | ||
493 | printk(KERN_INFO "Btrfs symlink in snapshot " | ||
494 | "path, failed\n"); | ||
495 | error = -EMLINK; | ||
496 | btrfs_free_path(path); | ||
497 | goto out_drop_write; | ||
498 | } | ||
499 | test_oid = | ||
500 | BTRFS_I(test->d_inode)->root->root_key.objectid; | ||
501 | ret = btrfs_find_root_ref(snap_src->fs_info->tree_root, | ||
502 | path, test_oid, parent_oid); | ||
503 | if (ret == 0) { | ||
504 | printk(KERN_INFO "Btrfs snapshot creation " | ||
505 | "failed, looping\n"); | ||
506 | error = -EMLINK; | ||
507 | btrfs_free_path(path); | ||
508 | goto out_drop_write; | ||
509 | } | ||
510 | btrfs_release_path(snap_src->fs_info->tree_root, path); | ||
511 | test = test->d_parent; | ||
512 | } | ||
513 | create: | ||
514 | btrfs_free_path(path); | ||
515 | error = create_snapshot(snap_src, dentry, name, namelen); | ||
516 | } else { | 438 | } else { |
517 | error = create_subvol(BTRFS_I(parent->dentry->d_inode)->root, | 439 | error = create_subvol(BTRFS_I(dir)->root, dentry, |
518 | dentry, name, namelen); | 440 | name, namelen); |
519 | } | 441 | } |
520 | if (error) | 442 | if (!error) |
521 | goto out_drop_write; | 443 | fsnotify_mkdir(dir, dentry); |
522 | 444 | out_up_read: | |
523 | fsnotify_mkdir(parent->dentry->d_inode, dentry); | 445 | up_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); |
524 | out_drop_write: | 446 | out_drop_write: |
525 | mnt_drop_write(parent->mnt); | 447 | mnt_drop_write(parent->mnt); |
526 | out_dput: | 448 | out_dput: |
527 | dput(dentry); | 449 | dput(dentry); |
528 | out_unlock: | 450 | out_unlock: |
529 | mutex_unlock(&parent->dentry->d_inode->i_mutex); | 451 | mutex_unlock(&dir->i_mutex); |
530 | return error; | 452 | return error; |
531 | } | 453 | } |
532 | 454 | ||
533 | |||
534 | static int btrfs_defrag_file(struct file *file) | 455 | static int btrfs_defrag_file(struct file *file) |
535 | { | 456 | { |
536 | struct inode *inode = fdentry(file)->d_inode; | 457 | struct inode *inode = fdentry(file)->d_inode; |
@@ -596,9 +517,8 @@ again: | |||
596 | clear_page_dirty_for_io(page); | 517 | clear_page_dirty_for_io(page); |
597 | 518 | ||
598 | btrfs_set_extent_delalloc(inode, page_start, page_end); | 519 | btrfs_set_extent_delalloc(inode, page_start, page_end); |
599 | |||
600 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
601 | set_page_dirty(page); | 520 | set_page_dirty(page); |
521 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
602 | unlock_page(page); | 522 | unlock_page(page); |
603 | page_cache_release(page); | 523 | page_cache_release(page); |
604 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); | 524 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); |
@@ -609,7 +529,8 @@ out_unlock: | |||
609 | return 0; | 529 | return 0; |
610 | } | 530 | } |
611 | 531 | ||
612 | static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) | 532 | static noinline int btrfs_ioctl_resize(struct btrfs_root *root, |
533 | void __user *arg) | ||
613 | { | 534 | { |
614 | u64 new_size; | 535 | u64 new_size; |
615 | u64 old_size; | 536 | u64 old_size; |
@@ -718,10 +639,7 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, | |||
718 | { | 639 | { |
719 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | 640 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; |
720 | struct btrfs_ioctl_vol_args *vol_args; | 641 | struct btrfs_ioctl_vol_args *vol_args; |
721 | struct btrfs_dir_item *di; | ||
722 | struct btrfs_path *path; | ||
723 | struct file *src_file; | 642 | struct file *src_file; |
724 | u64 root_dirid; | ||
725 | int namelen; | 643 | int namelen; |
726 | int ret = 0; | 644 | int ret = 0; |
727 | 645 | ||
@@ -739,32 +657,9 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, | |||
739 | goto out; | 657 | goto out; |
740 | } | 658 | } |
741 | 659 | ||
742 | path = btrfs_alloc_path(); | ||
743 | if (!path) { | ||
744 | ret = -ENOMEM; | ||
745 | goto out; | ||
746 | } | ||
747 | |||
748 | root_dirid = root->fs_info->sb->s_root->d_inode->i_ino, | ||
749 | di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, | ||
750 | path, root_dirid, | ||
751 | vol_args->name, namelen, 0); | ||
752 | btrfs_free_path(path); | ||
753 | |||
754 | if (di && !IS_ERR(di)) { | ||
755 | ret = -EEXIST; | ||
756 | goto out; | ||
757 | } | ||
758 | |||
759 | if (IS_ERR(di)) { | ||
760 | ret = PTR_ERR(di); | ||
761 | goto out; | ||
762 | } | ||
763 | |||
764 | if (subvol) { | 660 | if (subvol) { |
765 | ret = btrfs_mksubvol(&file->f_path, vol_args->name, | 661 | ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen, |
766 | file->f_path.dentry->d_inode->i_mode, | 662 | NULL); |
767 | namelen, NULL); | ||
768 | } else { | 663 | } else { |
769 | struct inode *src_inode; | 664 | struct inode *src_inode; |
770 | src_file = fget(vol_args->fd); | 665 | src_file = fget(vol_args->fd); |
@@ -781,17 +676,156 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, | |||
781 | fput(src_file); | 676 | fput(src_file); |
782 | goto out; | 677 | goto out; |
783 | } | 678 | } |
784 | ret = btrfs_mksubvol(&file->f_path, vol_args->name, | 679 | ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen, |
785 | file->f_path.dentry->d_inode->i_mode, | 680 | BTRFS_I(src_inode)->root); |
786 | namelen, BTRFS_I(src_inode)->root); | ||
787 | fput(src_file); | 681 | fput(src_file); |
788 | } | 682 | } |
789 | |||
790 | out: | 683 | out: |
791 | kfree(vol_args); | 684 | kfree(vol_args); |
792 | return ret; | 685 | return ret; |
793 | } | 686 | } |
794 | 687 | ||
688 | /* | ||
689 | * helper to check if the subvolume references other subvolumes | ||
690 | */ | ||
691 | static noinline int may_destroy_subvol(struct btrfs_root *root) | ||
692 | { | ||
693 | struct btrfs_path *path; | ||
694 | struct btrfs_key key; | ||
695 | int ret; | ||
696 | |||
697 | path = btrfs_alloc_path(); | ||
698 | if (!path) | ||
699 | return -ENOMEM; | ||
700 | |||
701 | key.objectid = root->root_key.objectid; | ||
702 | key.type = BTRFS_ROOT_REF_KEY; | ||
703 | key.offset = (u64)-1; | ||
704 | |||
705 | ret = btrfs_search_slot(NULL, root->fs_info->tree_root, | ||
706 | &key, path, 0, 0); | ||
707 | if (ret < 0) | ||
708 | goto out; | ||
709 | BUG_ON(ret == 0); | ||
710 | |||
711 | ret = 0; | ||
712 | if (path->slots[0] > 0) { | ||
713 | path->slots[0]--; | ||
714 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); | ||
715 | if (key.objectid == root->root_key.objectid && | ||
716 | key.type == BTRFS_ROOT_REF_KEY) | ||
717 | ret = -ENOTEMPTY; | ||
718 | } | ||
719 | out: | ||
720 | btrfs_free_path(path); | ||
721 | return ret; | ||
722 | } | ||
723 | |||
724 | static noinline int btrfs_ioctl_snap_destroy(struct file *file, | ||
725 | void __user *arg) | ||
726 | { | ||
727 | struct dentry *parent = fdentry(file); | ||
728 | struct dentry *dentry; | ||
729 | struct inode *dir = parent->d_inode; | ||
730 | struct inode *inode; | ||
731 | struct btrfs_root *root = BTRFS_I(dir)->root; | ||
732 | struct btrfs_root *dest = NULL; | ||
733 | struct btrfs_ioctl_vol_args *vol_args; | ||
734 | struct btrfs_trans_handle *trans; | ||
735 | int namelen; | ||
736 | int ret; | ||
737 | int err = 0; | ||
738 | |||
739 | if (!capable(CAP_SYS_ADMIN)) | ||
740 | return -EPERM; | ||
741 | |||
742 | vol_args = memdup_user(arg, sizeof(*vol_args)); | ||
743 | if (IS_ERR(vol_args)) | ||
744 | return PTR_ERR(vol_args); | ||
745 | |||
746 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | ||
747 | namelen = strlen(vol_args->name); | ||
748 | if (strchr(vol_args->name, '/') || | ||
749 | strncmp(vol_args->name, "..", namelen) == 0) { | ||
750 | err = -EINVAL; | ||
751 | goto out; | ||
752 | } | ||
753 | |||
754 | err = mnt_want_write(file->f_path.mnt); | ||
755 | if (err) | ||
756 | goto out; | ||
757 | |||
758 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); | ||
759 | dentry = lookup_one_len(vol_args->name, parent, namelen); | ||
760 | if (IS_ERR(dentry)) { | ||
761 | err = PTR_ERR(dentry); | ||
762 | goto out_unlock_dir; | ||
763 | } | ||
764 | |||
765 | if (!dentry->d_inode) { | ||
766 | err = -ENOENT; | ||
767 | goto out_dput; | ||
768 | } | ||
769 | |||
770 | inode = dentry->d_inode; | ||
771 | if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { | ||
772 | err = -EINVAL; | ||
773 | goto out_dput; | ||
774 | } | ||
775 | |||
776 | dest = BTRFS_I(inode)->root; | ||
777 | |||
778 | mutex_lock(&inode->i_mutex); | ||
779 | err = d_invalidate(dentry); | ||
780 | if (err) | ||
781 | goto out_unlock; | ||
782 | |||
783 | down_write(&root->fs_info->subvol_sem); | ||
784 | |||
785 | err = may_destroy_subvol(dest); | ||
786 | if (err) | ||
787 | goto out_up_write; | ||
788 | |||
789 | trans = btrfs_start_transaction(root, 1); | ||
790 | ret = btrfs_unlink_subvol(trans, root, dir, | ||
791 | dest->root_key.objectid, | ||
792 | dentry->d_name.name, | ||
793 | dentry->d_name.len); | ||
794 | BUG_ON(ret); | ||
795 | |||
796 | btrfs_record_root_in_trans(trans, dest); | ||
797 | |||
798 | memset(&dest->root_item.drop_progress, 0, | ||
799 | sizeof(dest->root_item.drop_progress)); | ||
800 | dest->root_item.drop_level = 0; | ||
801 | btrfs_set_root_refs(&dest->root_item, 0); | ||
802 | |||
803 | ret = btrfs_insert_orphan_item(trans, | ||
804 | root->fs_info->tree_root, | ||
805 | dest->root_key.objectid); | ||
806 | BUG_ON(ret); | ||
807 | |||
808 | ret = btrfs_commit_transaction(trans, root); | ||
809 | BUG_ON(ret); | ||
810 | inode->i_flags |= S_DEAD; | ||
811 | out_up_write: | ||
812 | up_write(&root->fs_info->subvol_sem); | ||
813 | out_unlock: | ||
814 | mutex_unlock(&inode->i_mutex); | ||
815 | if (!err) { | ||
816 | btrfs_invalidate_inodes(dest); | ||
817 | d_delete(dentry); | ||
818 | } | ||
819 | out_dput: | ||
820 | dput(dentry); | ||
821 | out_unlock_dir: | ||
822 | mutex_unlock(&dir->i_mutex); | ||
823 | mnt_drop_write(file->f_path.mnt); | ||
824 | out: | ||
825 | kfree(vol_args); | ||
826 | return err; | ||
827 | } | ||
828 | |||
795 | static int btrfs_ioctl_defrag(struct file *file) | 829 | static int btrfs_ioctl_defrag(struct file *file) |
796 | { | 830 | { |
797 | struct inode *inode = fdentry(file)->d_inode; | 831 | struct inode *inode = fdentry(file)->d_inode; |
@@ -865,8 +899,8 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) | |||
865 | return ret; | 899 | return ret; |
866 | } | 900 | } |
867 | 901 | ||
868 | static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | 902 | static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, |
869 | u64 off, u64 olen, u64 destoff) | 903 | u64 off, u64 olen, u64 destoff) |
870 | { | 904 | { |
871 | struct inode *inode = fdentry(file)->d_inode; | 905 | struct inode *inode = fdentry(file)->d_inode; |
872 | struct btrfs_root *root = BTRFS_I(inode)->root; | 906 | struct btrfs_root *root = BTRFS_I(inode)->root; |
@@ -976,7 +1010,7 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
976 | 1010 | ||
977 | /* punch hole in destination first */ | 1011 | /* punch hole in destination first */ |
978 | btrfs_drop_extents(trans, root, inode, off, off + len, | 1012 | btrfs_drop_extents(trans, root, inode, off, off + len, |
979 | off + len, 0, &hint_byte); | 1013 | off + len, 0, &hint_byte, 1); |
980 | 1014 | ||
981 | /* clone data */ | 1015 | /* clone data */ |
982 | key.objectid = src->i_ino; | 1016 | key.objectid = src->i_ino; |
@@ -1071,8 +1105,7 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1071 | datao += off - key.offset; | 1105 | datao += off - key.offset; |
1072 | datal -= off - key.offset; | 1106 | datal -= off - key.offset; |
1073 | } | 1107 | } |
1074 | if (key.offset + datao + datal + key.offset > | 1108 | if (key.offset + datao + datal > off + len) |
1075 | off + len) | ||
1076 | datal = off + len - key.offset - datao; | 1109 | datal = off + len - key.offset - datao; |
1077 | /* disko == 0 means it's a hole */ | 1110 | /* disko == 0 means it's a hole */ |
1078 | if (!disko) | 1111 | if (!disko) |
@@ -1258,6 +1291,8 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
1258 | return btrfs_ioctl_snap_create(file, argp, 0); | 1291 | return btrfs_ioctl_snap_create(file, argp, 0); |
1259 | case BTRFS_IOC_SUBVOL_CREATE: | 1292 | case BTRFS_IOC_SUBVOL_CREATE: |
1260 | return btrfs_ioctl_snap_create(file, argp, 1); | 1293 | return btrfs_ioctl_snap_create(file, argp, 1); |
1294 | case BTRFS_IOC_SNAP_DESTROY: | ||
1295 | return btrfs_ioctl_snap_destroy(file, argp); | ||
1261 | case BTRFS_IOC_DEFRAG: | 1296 | case BTRFS_IOC_DEFRAG: |
1262 | return btrfs_ioctl_defrag(file); | 1297 | return btrfs_ioctl_defrag(file); |
1263 | case BTRFS_IOC_RESIZE: | 1298 | case BTRFS_IOC_RESIZE: |
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index b320b103fa13..bc49914475eb 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h | |||
@@ -65,5 +65,6 @@ struct btrfs_ioctl_clone_range_args { | |||
65 | 65 | ||
66 | #define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \ | 66 | #define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \ |
67 | struct btrfs_ioctl_vol_args) | 67 | struct btrfs_ioctl_vol_args) |
68 | 68 | #define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \ | |
69 | struct btrfs_ioctl_vol_args) | ||
69 | #endif | 70 | #endif |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 7b2f401e604e..b5d6d24726b0 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -159,8 +159,6 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, | |||
159 | * | 159 | * |
160 | * len is the length of the extent | 160 | * len is the length of the extent |
161 | * | 161 | * |
162 | * This also sets the EXTENT_ORDERED bit on the range in the inode. | ||
163 | * | ||
164 | * The tree is given a single reference on the ordered extent that was | 162 | * The tree is given a single reference on the ordered extent that was |
165 | * inserted. | 163 | * inserted. |
166 | */ | 164 | */ |
@@ -181,6 +179,7 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
181 | entry->start = start; | 179 | entry->start = start; |
182 | entry->len = len; | 180 | entry->len = len; |
183 | entry->disk_len = disk_len; | 181 | entry->disk_len = disk_len; |
182 | entry->bytes_left = len; | ||
184 | entry->inode = inode; | 183 | entry->inode = inode; |
185 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) | 184 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) |
186 | set_bit(type, &entry->flags); | 185 | set_bit(type, &entry->flags); |
@@ -195,9 +194,6 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
195 | &entry->rb_node); | 194 | &entry->rb_node); |
196 | BUG_ON(node); | 195 | BUG_ON(node); |
197 | 196 | ||
198 | set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset, | ||
199 | entry_end(entry) - 1, GFP_NOFS); | ||
200 | |||
201 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | 197 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); |
202 | list_add_tail(&entry->root_extent_list, | 198 | list_add_tail(&entry->root_extent_list, |
203 | &BTRFS_I(inode)->root->fs_info->ordered_extents); | 199 | &BTRFS_I(inode)->root->fs_info->ordered_extents); |
@@ -241,13 +237,10 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, | |||
241 | struct btrfs_ordered_inode_tree *tree; | 237 | struct btrfs_ordered_inode_tree *tree; |
242 | struct rb_node *node; | 238 | struct rb_node *node; |
243 | struct btrfs_ordered_extent *entry; | 239 | struct btrfs_ordered_extent *entry; |
244 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
245 | int ret; | 240 | int ret; |
246 | 241 | ||
247 | tree = &BTRFS_I(inode)->ordered_tree; | 242 | tree = &BTRFS_I(inode)->ordered_tree; |
248 | mutex_lock(&tree->mutex); | 243 | mutex_lock(&tree->mutex); |
249 | clear_extent_ordered(io_tree, file_offset, file_offset + io_size - 1, | ||
250 | GFP_NOFS); | ||
251 | node = tree_search(tree, file_offset); | 244 | node = tree_search(tree, file_offset); |
252 | if (!node) { | 245 | if (!node) { |
253 | ret = 1; | 246 | ret = 1; |
@@ -260,11 +253,16 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, | |||
260 | goto out; | 253 | goto out; |
261 | } | 254 | } |
262 | 255 | ||
263 | ret = test_range_bit(io_tree, entry->file_offset, | 256 | if (io_size > entry->bytes_left) { |
264 | entry->file_offset + entry->len - 1, | 257 | printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n", |
265 | EXTENT_ORDERED, 0); | 258 | (unsigned long long)entry->bytes_left, |
266 | if (ret == 0) | 259 | (unsigned long long)io_size); |
260 | } | ||
261 | entry->bytes_left -= io_size; | ||
262 | if (entry->bytes_left == 0) | ||
267 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); | 263 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); |
264 | else | ||
265 | ret = 1; | ||
268 | out: | 266 | out: |
269 | mutex_unlock(&tree->mutex); | 267 | mutex_unlock(&tree->mutex); |
270 | return ret == 0; | 268 | return ret == 0; |
@@ -476,6 +474,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) | |||
476 | u64 orig_end; | 474 | u64 orig_end; |
477 | u64 wait_end; | 475 | u64 wait_end; |
478 | struct btrfs_ordered_extent *ordered; | 476 | struct btrfs_ordered_extent *ordered; |
477 | int found; | ||
479 | 478 | ||
480 | if (start + len < start) { | 479 | if (start + len < start) { |
481 | orig_end = INT_LIMIT(loff_t); | 480 | orig_end = INT_LIMIT(loff_t); |
@@ -502,6 +501,7 @@ again: | |||
502 | orig_end >> PAGE_CACHE_SHIFT); | 501 | orig_end >> PAGE_CACHE_SHIFT); |
503 | 502 | ||
504 | end = orig_end; | 503 | end = orig_end; |
504 | found = 0; | ||
505 | while (1) { | 505 | while (1) { |
506 | ordered = btrfs_lookup_first_ordered_extent(inode, end); | 506 | ordered = btrfs_lookup_first_ordered_extent(inode, end); |
507 | if (!ordered) | 507 | if (!ordered) |
@@ -514,6 +514,7 @@ again: | |||
514 | btrfs_put_ordered_extent(ordered); | 514 | btrfs_put_ordered_extent(ordered); |
515 | break; | 515 | break; |
516 | } | 516 | } |
517 | found++; | ||
517 | btrfs_start_ordered_extent(inode, ordered, 1); | 518 | btrfs_start_ordered_extent(inode, ordered, 1); |
518 | end = ordered->file_offset; | 519 | end = ordered->file_offset; |
519 | btrfs_put_ordered_extent(ordered); | 520 | btrfs_put_ordered_extent(ordered); |
@@ -521,8 +522,8 @@ again: | |||
521 | break; | 522 | break; |
522 | end--; | 523 | end--; |
523 | } | 524 | } |
524 | if (test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end, | 525 | if (found || test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end, |
525 | EXTENT_ORDERED | EXTENT_DELALLOC, 0)) { | 526 | EXTENT_DELALLOC, 0, NULL)) { |
526 | schedule_timeout(1); | 527 | schedule_timeout(1); |
527 | goto again; | 528 | goto again; |
528 | } | 529 | } |
@@ -613,7 +614,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, | |||
613 | */ | 614 | */ |
614 | if (test_range_bit(io_tree, disk_i_size, | 615 | if (test_range_bit(io_tree, disk_i_size, |
615 | ordered->file_offset + ordered->len - 1, | 616 | ordered->file_offset + ordered->len - 1, |
616 | EXTENT_DELALLOC, 0)) { | 617 | EXTENT_DELALLOC, 0, NULL)) { |
617 | goto out; | 618 | goto out; |
618 | } | 619 | } |
619 | /* | 620 | /* |
@@ -664,7 +665,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, | |||
664 | */ | 665 | */ |
665 | if (i_size_test > entry_end(ordered) && | 666 | if (i_size_test > entry_end(ordered) && |
666 | !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1, | 667 | !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1, |
667 | EXTENT_DELALLOC, 0)) { | 668 | EXTENT_DELALLOC, 0, NULL)) { |
668 | new_i_size = min_t(u64, i_size_test, i_size_read(inode)); | 669 | new_i_size = min_t(u64, i_size_test, i_size_read(inode)); |
669 | } | 670 | } |
670 | BTRFS_I(inode)->disk_i_size = new_i_size; | 671 | BTRFS_I(inode)->disk_i_size = new_i_size; |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 3d31c8827b01..993a7ea45c70 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
@@ -85,6 +85,9 @@ struct btrfs_ordered_extent { | |||
85 | /* extent length on disk */ | 85 | /* extent length on disk */ |
86 | u64 disk_len; | 86 | u64 disk_len; |
87 | 87 | ||
88 | /* number of bytes that still need writing */ | ||
89 | u64 bytes_left; | ||
90 | |||
88 | /* flags (described above) */ | 91 | /* flags (described above) */ |
89 | unsigned long flags; | 92 | unsigned long flags; |
90 | 93 | ||
diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c index 3c0d52af4f80..79cba5fbc28e 100644 --- a/fs/btrfs/orphan.c +++ b/fs/btrfs/orphan.c | |||
@@ -65,3 +65,23 @@ out: | |||
65 | btrfs_free_path(path); | 65 | btrfs_free_path(path); |
66 | return ret; | 66 | return ret; |
67 | } | 67 | } |
68 | |||
69 | int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset) | ||
70 | { | ||
71 | struct btrfs_path *path; | ||
72 | struct btrfs_key key; | ||
73 | int ret; | ||
74 | |||
75 | key.objectid = BTRFS_ORPHAN_OBJECTID; | ||
76 | key.type = BTRFS_ORPHAN_ITEM_KEY; | ||
77 | key.offset = offset; | ||
78 | |||
79 | path = btrfs_alloc_path(); | ||
80 | if (!path) | ||
81 | return -ENOMEM; | ||
82 | |||
83 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
84 | |||
85 | btrfs_free_path(path); | ||
86 | return ret; | ||
87 | } | ||
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index c04f7f212602..361ad323faac 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -121,6 +121,15 @@ struct inodevec { | |||
121 | int nr; | 121 | int nr; |
122 | }; | 122 | }; |
123 | 123 | ||
124 | #define MAX_EXTENTS 128 | ||
125 | |||
126 | struct file_extent_cluster { | ||
127 | u64 start; | ||
128 | u64 end; | ||
129 | u64 boundary[MAX_EXTENTS]; | ||
130 | unsigned int nr; | ||
131 | }; | ||
132 | |||
124 | struct reloc_control { | 133 | struct reloc_control { |
125 | /* block group to relocate */ | 134 | /* block group to relocate */ |
126 | struct btrfs_block_group_cache *block_group; | 135 | struct btrfs_block_group_cache *block_group; |
@@ -2180,7 +2189,7 @@ static int tree_block_processed(u64 bytenr, u32 blocksize, | |||
2180 | struct reloc_control *rc) | 2189 | struct reloc_control *rc) |
2181 | { | 2190 | { |
2182 | if (test_range_bit(&rc->processed_blocks, bytenr, | 2191 | if (test_range_bit(&rc->processed_blocks, bytenr, |
2183 | bytenr + blocksize - 1, EXTENT_DIRTY, 1)) | 2192 | bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL)) |
2184 | return 1; | 2193 | return 1; |
2185 | return 0; | 2194 | return 0; |
2186 | } | 2195 | } |
@@ -2529,56 +2538,94 @@ out: | |||
2529 | } | 2538 | } |
2530 | 2539 | ||
2531 | static noinline_for_stack | 2540 | static noinline_for_stack |
2532 | int relocate_inode_pages(struct inode *inode, u64 start, u64 len) | 2541 | int setup_extent_mapping(struct inode *inode, u64 start, u64 end, |
2542 | u64 block_start) | ||
2543 | { | ||
2544 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
2545 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
2546 | struct extent_map *em; | ||
2547 | int ret = 0; | ||
2548 | |||
2549 | em = alloc_extent_map(GFP_NOFS); | ||
2550 | if (!em) | ||
2551 | return -ENOMEM; | ||
2552 | |||
2553 | em->start = start; | ||
2554 | em->len = end + 1 - start; | ||
2555 | em->block_len = em->len; | ||
2556 | em->block_start = block_start; | ||
2557 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
2558 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
2559 | |||
2560 | lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | ||
2561 | while (1) { | ||
2562 | write_lock(&em_tree->lock); | ||
2563 | ret = add_extent_mapping(em_tree, em); | ||
2564 | write_unlock(&em_tree->lock); | ||
2565 | if (ret != -EEXIST) { | ||
2566 | free_extent_map(em); | ||
2567 | break; | ||
2568 | } | ||
2569 | btrfs_drop_extent_cache(inode, start, end, 0); | ||
2570 | } | ||
2571 | unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | ||
2572 | return ret; | ||
2573 | } | ||
2574 | |||
2575 | static int relocate_file_extent_cluster(struct inode *inode, | ||
2576 | struct file_extent_cluster *cluster) | ||
2533 | { | 2577 | { |
2534 | u64 page_start; | 2578 | u64 page_start; |
2535 | u64 page_end; | 2579 | u64 page_end; |
2536 | unsigned long i; | 2580 | u64 offset = BTRFS_I(inode)->index_cnt; |
2537 | unsigned long first_index; | 2581 | unsigned long index; |
2538 | unsigned long last_index; | 2582 | unsigned long last_index; |
2539 | unsigned int total_read = 0; | 2583 | unsigned int dirty_page = 0; |
2540 | unsigned int total_dirty = 0; | ||
2541 | struct page *page; | 2584 | struct page *page; |
2542 | struct file_ra_state *ra; | 2585 | struct file_ra_state *ra; |
2543 | struct btrfs_ordered_extent *ordered; | 2586 | int nr = 0; |
2544 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
2545 | int ret = 0; | 2587 | int ret = 0; |
2546 | 2588 | ||
2589 | if (!cluster->nr) | ||
2590 | return 0; | ||
2591 | |||
2547 | ra = kzalloc(sizeof(*ra), GFP_NOFS); | 2592 | ra = kzalloc(sizeof(*ra), GFP_NOFS); |
2548 | if (!ra) | 2593 | if (!ra) |
2549 | return -ENOMEM; | 2594 | return -ENOMEM; |
2550 | 2595 | ||
2596 | index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; | ||
2597 | last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; | ||
2598 | |||
2551 | mutex_lock(&inode->i_mutex); | 2599 | mutex_lock(&inode->i_mutex); |
2552 | first_index = start >> PAGE_CACHE_SHIFT; | ||
2553 | last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; | ||
2554 | 2600 | ||
2555 | /* make sure the dirty trick played by the caller work */ | 2601 | i_size_write(inode, cluster->end + 1 - offset); |
2556 | while (1) { | 2602 | ret = setup_extent_mapping(inode, cluster->start - offset, |
2557 | ret = invalidate_inode_pages2_range(inode->i_mapping, | 2603 | cluster->end - offset, cluster->start); |
2558 | first_index, last_index); | ||
2559 | if (ret != -EBUSY) | ||
2560 | break; | ||
2561 | schedule_timeout(HZ/10); | ||
2562 | } | ||
2563 | if (ret) | 2604 | if (ret) |
2564 | goto out_unlock; | 2605 | goto out_unlock; |
2565 | 2606 | ||
2566 | file_ra_state_init(ra, inode->i_mapping); | 2607 | file_ra_state_init(ra, inode->i_mapping); |
2567 | 2608 | ||
2568 | for (i = first_index ; i <= last_index; i++) { | 2609 | WARN_ON(cluster->start != cluster->boundary[0]); |
2569 | if (total_read % ra->ra_pages == 0) { | 2610 | while (index <= last_index) { |
2570 | btrfs_force_ra(inode->i_mapping, ra, NULL, i, | 2611 | page = find_lock_page(inode->i_mapping, index); |
2571 | min(last_index, ra->ra_pages + i - 1)); | ||
2572 | } | ||
2573 | total_read++; | ||
2574 | again: | ||
2575 | if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode)) | ||
2576 | BUG_ON(1); | ||
2577 | page = grab_cache_page(inode->i_mapping, i); | ||
2578 | if (!page) { | 2612 | if (!page) { |
2579 | ret = -ENOMEM; | 2613 | page_cache_sync_readahead(inode->i_mapping, |
2580 | goto out_unlock; | 2614 | ra, NULL, index, |
2615 | last_index + 1 - index); | ||
2616 | page = grab_cache_page(inode->i_mapping, index); | ||
2617 | if (!page) { | ||
2618 | ret = -ENOMEM; | ||
2619 | goto out_unlock; | ||
2620 | } | ||
2621 | } | ||
2622 | |||
2623 | if (PageReadahead(page)) { | ||
2624 | page_cache_async_readahead(inode->i_mapping, | ||
2625 | ra, NULL, page, index, | ||
2626 | last_index + 1 - index); | ||
2581 | } | 2627 | } |
2628 | |||
2582 | if (!PageUptodate(page)) { | 2629 | if (!PageUptodate(page)) { |
2583 | btrfs_readpage(NULL, page); | 2630 | btrfs_readpage(NULL, page); |
2584 | lock_page(page); | 2631 | lock_page(page); |
@@ -2589,75 +2636,79 @@ again: | |||
2589 | goto out_unlock; | 2636 | goto out_unlock; |
2590 | } | 2637 | } |
2591 | } | 2638 | } |
2592 | wait_on_page_writeback(page); | ||
2593 | 2639 | ||
2594 | page_start = (u64)page->index << PAGE_CACHE_SHIFT; | 2640 | page_start = (u64)page->index << PAGE_CACHE_SHIFT; |
2595 | page_end = page_start + PAGE_CACHE_SIZE - 1; | 2641 | page_end = page_start + PAGE_CACHE_SIZE - 1; |
2596 | lock_extent(io_tree, page_start, page_end, GFP_NOFS); | 2642 | |
2597 | 2643 | lock_extent(&BTRFS_I(inode)->io_tree, | |
2598 | ordered = btrfs_lookup_ordered_extent(inode, page_start); | 2644 | page_start, page_end, GFP_NOFS); |
2599 | if (ordered) { | 2645 | |
2600 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
2601 | unlock_page(page); | ||
2602 | page_cache_release(page); | ||
2603 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
2604 | btrfs_put_ordered_extent(ordered); | ||
2605 | goto again; | ||
2606 | } | ||
2607 | set_page_extent_mapped(page); | 2646 | set_page_extent_mapped(page); |
2608 | 2647 | ||
2609 | if (i == first_index) | 2648 | if (nr < cluster->nr && |
2610 | set_extent_bits(io_tree, page_start, page_end, | 2649 | page_start + offset == cluster->boundary[nr]) { |
2650 | set_extent_bits(&BTRFS_I(inode)->io_tree, | ||
2651 | page_start, page_end, | ||
2611 | EXTENT_BOUNDARY, GFP_NOFS); | 2652 | EXTENT_BOUNDARY, GFP_NOFS); |
2653 | nr++; | ||
2654 | } | ||
2612 | btrfs_set_extent_delalloc(inode, page_start, page_end); | 2655 | btrfs_set_extent_delalloc(inode, page_start, page_end); |
2613 | 2656 | ||
2614 | set_page_dirty(page); | 2657 | set_page_dirty(page); |
2615 | total_dirty++; | 2658 | dirty_page++; |
2616 | 2659 | ||
2617 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | 2660 | unlock_extent(&BTRFS_I(inode)->io_tree, |
2661 | page_start, page_end, GFP_NOFS); | ||
2618 | unlock_page(page); | 2662 | unlock_page(page); |
2619 | page_cache_release(page); | 2663 | page_cache_release(page); |
2664 | |||
2665 | index++; | ||
2666 | if (nr < cluster->nr && | ||
2667 | page_end + 1 + offset == cluster->boundary[nr]) { | ||
2668 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | ||
2669 | dirty_page); | ||
2670 | dirty_page = 0; | ||
2671 | } | ||
2672 | } | ||
2673 | if (dirty_page) { | ||
2674 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | ||
2675 | dirty_page); | ||
2620 | } | 2676 | } |
2677 | WARN_ON(nr != cluster->nr); | ||
2621 | out_unlock: | 2678 | out_unlock: |
2622 | mutex_unlock(&inode->i_mutex); | 2679 | mutex_unlock(&inode->i_mutex); |
2623 | kfree(ra); | 2680 | kfree(ra); |
2624 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty); | ||
2625 | return ret; | 2681 | return ret; |
2626 | } | 2682 | } |
2627 | 2683 | ||
2628 | static noinline_for_stack | 2684 | static noinline_for_stack |
2629 | int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key) | 2685 | int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key, |
2686 | struct file_extent_cluster *cluster) | ||
2630 | { | 2687 | { |
2631 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2688 | int ret; |
2632 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
2633 | struct extent_map *em; | ||
2634 | u64 start = extent_key->objectid - BTRFS_I(inode)->index_cnt; | ||
2635 | u64 end = start + extent_key->offset - 1; | ||
2636 | |||
2637 | em = alloc_extent_map(GFP_NOFS); | ||
2638 | em->start = start; | ||
2639 | em->len = extent_key->offset; | ||
2640 | em->block_len = extent_key->offset; | ||
2641 | em->block_start = extent_key->objectid; | ||
2642 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
2643 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
2644 | 2689 | ||
2645 | /* setup extent map to cheat btrfs_readpage */ | 2690 | if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) { |
2646 | lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | 2691 | ret = relocate_file_extent_cluster(inode, cluster); |
2647 | while (1) { | 2692 | if (ret) |
2648 | int ret; | 2693 | return ret; |
2649 | spin_lock(&em_tree->lock); | 2694 | cluster->nr = 0; |
2650 | ret = add_extent_mapping(em_tree, em); | ||
2651 | spin_unlock(&em_tree->lock); | ||
2652 | if (ret != -EEXIST) { | ||
2653 | free_extent_map(em); | ||
2654 | break; | ||
2655 | } | ||
2656 | btrfs_drop_extent_cache(inode, start, end, 0); | ||
2657 | } | 2695 | } |
2658 | unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | ||
2659 | 2696 | ||
2660 | return relocate_inode_pages(inode, start, extent_key->offset); | 2697 | if (!cluster->nr) |
2698 | cluster->start = extent_key->objectid; | ||
2699 | else | ||
2700 | BUG_ON(cluster->nr >= MAX_EXTENTS); | ||
2701 | cluster->end = extent_key->objectid + extent_key->offset - 1; | ||
2702 | cluster->boundary[cluster->nr] = extent_key->objectid; | ||
2703 | cluster->nr++; | ||
2704 | |||
2705 | if (cluster->nr >= MAX_EXTENTS) { | ||
2706 | ret = relocate_file_extent_cluster(inode, cluster); | ||
2707 | if (ret) | ||
2708 | return ret; | ||
2709 | cluster->nr = 0; | ||
2710 | } | ||
2711 | return 0; | ||
2661 | } | 2712 | } |
2662 | 2713 | ||
2663 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | 2714 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 |
@@ -3203,10 +3254,12 @@ static int check_extent_flags(u64 flags) | |||
3203 | return 0; | 3254 | return 0; |
3204 | } | 3255 | } |
3205 | 3256 | ||
3257 | |||
3206 | static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | 3258 | static noinline_for_stack int relocate_block_group(struct reloc_control *rc) |
3207 | { | 3259 | { |
3208 | struct rb_root blocks = RB_ROOT; | 3260 | struct rb_root blocks = RB_ROOT; |
3209 | struct btrfs_key key; | 3261 | struct btrfs_key key; |
3262 | struct file_extent_cluster *cluster; | ||
3210 | struct btrfs_trans_handle *trans = NULL; | 3263 | struct btrfs_trans_handle *trans = NULL; |
3211 | struct btrfs_path *path; | 3264 | struct btrfs_path *path; |
3212 | struct btrfs_extent_item *ei; | 3265 | struct btrfs_extent_item *ei; |
@@ -3216,10 +3269,17 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3216 | int ret; | 3269 | int ret; |
3217 | int err = 0; | 3270 | int err = 0; |
3218 | 3271 | ||
3272 | cluster = kzalloc(sizeof(*cluster), GFP_NOFS); | ||
3273 | if (!cluster) | ||
3274 | return -ENOMEM; | ||
3275 | |||
3219 | path = btrfs_alloc_path(); | 3276 | path = btrfs_alloc_path(); |
3220 | if (!path) | 3277 | if (!path) |
3221 | return -ENOMEM; | 3278 | return -ENOMEM; |
3222 | 3279 | ||
3280 | rc->extents_found = 0; | ||
3281 | rc->extents_skipped = 0; | ||
3282 | |||
3223 | rc->search_start = rc->block_group->key.objectid; | 3283 | rc->search_start = rc->block_group->key.objectid; |
3224 | clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, | 3284 | clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, |
3225 | GFP_NOFS); | 3285 | GFP_NOFS); |
@@ -3306,14 +3366,15 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3306 | } | 3366 | } |
3307 | 3367 | ||
3308 | nr = trans->blocks_used; | 3368 | nr = trans->blocks_used; |
3309 | btrfs_end_transaction_throttle(trans, rc->extent_root); | 3369 | btrfs_end_transaction(trans, rc->extent_root); |
3310 | trans = NULL; | 3370 | trans = NULL; |
3311 | btrfs_btree_balance_dirty(rc->extent_root, nr); | 3371 | btrfs_btree_balance_dirty(rc->extent_root, nr); |
3312 | 3372 | ||
3313 | if (rc->stage == MOVE_DATA_EXTENTS && | 3373 | if (rc->stage == MOVE_DATA_EXTENTS && |
3314 | (flags & BTRFS_EXTENT_FLAG_DATA)) { | 3374 | (flags & BTRFS_EXTENT_FLAG_DATA)) { |
3315 | rc->found_file_extent = 1; | 3375 | rc->found_file_extent = 1; |
3316 | ret = relocate_data_extent(rc->data_inode, &key); | 3376 | ret = relocate_data_extent(rc->data_inode, |
3377 | &key, cluster); | ||
3317 | if (ret < 0) { | 3378 | if (ret < 0) { |
3318 | err = ret; | 3379 | err = ret; |
3319 | break; | 3380 | break; |
@@ -3328,6 +3389,14 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3328 | btrfs_btree_balance_dirty(rc->extent_root, nr); | 3389 | btrfs_btree_balance_dirty(rc->extent_root, nr); |
3329 | } | 3390 | } |
3330 | 3391 | ||
3392 | if (!err) { | ||
3393 | ret = relocate_file_extent_cluster(rc->data_inode, cluster); | ||
3394 | if (ret < 0) | ||
3395 | err = ret; | ||
3396 | } | ||
3397 | |||
3398 | kfree(cluster); | ||
3399 | |||
3331 | rc->create_reloc_root = 0; | 3400 | rc->create_reloc_root = 0; |
3332 | smp_mb(); | 3401 | smp_mb(); |
3333 | 3402 | ||
@@ -3348,8 +3417,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3348 | } | 3417 | } |
3349 | 3418 | ||
3350 | static int __insert_orphan_inode(struct btrfs_trans_handle *trans, | 3419 | static int __insert_orphan_inode(struct btrfs_trans_handle *trans, |
3351 | struct btrfs_root *root, | 3420 | struct btrfs_root *root, u64 objectid) |
3352 | u64 objectid, u64 size) | ||
3353 | { | 3421 | { |
3354 | struct btrfs_path *path; | 3422 | struct btrfs_path *path; |
3355 | struct btrfs_inode_item *item; | 3423 | struct btrfs_inode_item *item; |
@@ -3368,7 +3436,7 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans, | |||
3368 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); | 3436 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); |
3369 | memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); | 3437 | memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); |
3370 | btrfs_set_inode_generation(leaf, item, 1); | 3438 | btrfs_set_inode_generation(leaf, item, 1); |
3371 | btrfs_set_inode_size(leaf, item, size); | 3439 | btrfs_set_inode_size(leaf, item, 0); |
3372 | btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); | 3440 | btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); |
3373 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); | 3441 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); |
3374 | btrfs_mark_buffer_dirty(leaf); | 3442 | btrfs_mark_buffer_dirty(leaf); |
@@ -3404,12 +3472,7 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | |||
3404 | if (err) | 3472 | if (err) |
3405 | goto out; | 3473 | goto out; |
3406 | 3474 | ||
3407 | err = __insert_orphan_inode(trans, root, objectid, group->key.offset); | 3475 | err = __insert_orphan_inode(trans, root, objectid); |
3408 | BUG_ON(err); | ||
3409 | |||
3410 | err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, | ||
3411 | group->key.offset, 0, group->key.offset, | ||
3412 | 0, 0, 0); | ||
3413 | BUG_ON(err); | 3476 | BUG_ON(err); |
3414 | 3477 | ||
3415 | key.objectid = objectid; | 3478 | key.objectid = objectid; |
@@ -3475,14 +3538,15 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
3475 | btrfs_wait_ordered_extents(fs_info->tree_root, 0); | 3538 | btrfs_wait_ordered_extents(fs_info->tree_root, 0); |
3476 | 3539 | ||
3477 | while (1) { | 3540 | while (1) { |
3478 | mutex_lock(&fs_info->cleaner_mutex); | ||
3479 | btrfs_clean_old_snapshots(fs_info->tree_root); | ||
3480 | mutex_unlock(&fs_info->cleaner_mutex); | ||
3481 | |||
3482 | rc->extents_found = 0; | 3541 | rc->extents_found = 0; |
3483 | rc->extents_skipped = 0; | 3542 | rc->extents_skipped = 0; |
3484 | 3543 | ||
3544 | mutex_lock(&fs_info->cleaner_mutex); | ||
3545 | |||
3546 | btrfs_clean_old_snapshots(fs_info->tree_root); | ||
3485 | ret = relocate_block_group(rc); | 3547 | ret = relocate_block_group(rc); |
3548 | |||
3549 | mutex_unlock(&fs_info->cleaner_mutex); | ||
3486 | if (ret < 0) { | 3550 | if (ret < 0) { |
3487 | err = ret; | 3551 | err = ret; |
3488 | break; | 3552 | break; |
@@ -3514,10 +3578,10 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
3514 | } | 3578 | } |
3515 | } | 3579 | } |
3516 | 3580 | ||
3517 | filemap_fdatawrite_range(fs_info->btree_inode->i_mapping, | 3581 | filemap_write_and_wait_range(fs_info->btree_inode->i_mapping, |
3518 | rc->block_group->key.objectid, | 3582 | rc->block_group->key.objectid, |
3519 | rc->block_group->key.objectid + | 3583 | rc->block_group->key.objectid + |
3520 | rc->block_group->key.offset - 1); | 3584 | rc->block_group->key.offset - 1); |
3521 | 3585 | ||
3522 | WARN_ON(rc->block_group->pinned > 0); | 3586 | WARN_ON(rc->block_group->pinned > 0); |
3523 | WARN_ON(rc->block_group->reserved > 0); | 3587 | WARN_ON(rc->block_group->reserved > 0); |
@@ -3530,6 +3594,26 @@ out: | |||
3530 | return err; | 3594 | return err; |
3531 | } | 3595 | } |
3532 | 3596 | ||
3597 | static noinline_for_stack int mark_garbage_root(struct btrfs_root *root) | ||
3598 | { | ||
3599 | struct btrfs_trans_handle *trans; | ||
3600 | int ret; | ||
3601 | |||
3602 | trans = btrfs_start_transaction(root->fs_info->tree_root, 1); | ||
3603 | |||
3604 | memset(&root->root_item.drop_progress, 0, | ||
3605 | sizeof(root->root_item.drop_progress)); | ||
3606 | root->root_item.drop_level = 0; | ||
3607 | btrfs_set_root_refs(&root->root_item, 0); | ||
3608 | ret = btrfs_update_root(trans, root->fs_info->tree_root, | ||
3609 | &root->root_key, &root->root_item); | ||
3610 | BUG_ON(ret); | ||
3611 | |||
3612 | ret = btrfs_end_transaction(trans, root->fs_info->tree_root); | ||
3613 | BUG_ON(ret); | ||
3614 | return 0; | ||
3615 | } | ||
3616 | |||
3533 | /* | 3617 | /* |
3534 | * recover relocation interrupted by system crash. | 3618 | * recover relocation interrupted by system crash. |
3535 | * | 3619 | * |
@@ -3589,8 +3673,12 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
3589 | fs_root = read_fs_root(root->fs_info, | 3673 | fs_root = read_fs_root(root->fs_info, |
3590 | reloc_root->root_key.offset); | 3674 | reloc_root->root_key.offset); |
3591 | if (IS_ERR(fs_root)) { | 3675 | if (IS_ERR(fs_root)) { |
3592 | err = PTR_ERR(fs_root); | 3676 | ret = PTR_ERR(fs_root); |
3593 | goto out; | 3677 | if (ret != -ENOENT) { |
3678 | err = ret; | ||
3679 | goto out; | ||
3680 | } | ||
3681 | mark_garbage_root(reloc_root); | ||
3594 | } | 3682 | } |
3595 | } | 3683 | } |
3596 | 3684 | ||
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 0ddc6d61c55a..9351428f30e2 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
@@ -94,17 +94,23 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, | |||
94 | goto out; | 94 | goto out; |
95 | 95 | ||
96 | BUG_ON(ret == 0); | 96 | BUG_ON(ret == 0); |
97 | if (path->slots[0] == 0) { | ||
98 | ret = 1; | ||
99 | goto out; | ||
100 | } | ||
97 | l = path->nodes[0]; | 101 | l = path->nodes[0]; |
98 | BUG_ON(path->slots[0] == 0); | ||
99 | slot = path->slots[0] - 1; | 102 | slot = path->slots[0] - 1; |
100 | btrfs_item_key_to_cpu(l, &found_key, slot); | 103 | btrfs_item_key_to_cpu(l, &found_key, slot); |
101 | if (found_key.objectid != objectid) { | 104 | if (found_key.objectid != objectid || |
105 | found_key.type != BTRFS_ROOT_ITEM_KEY) { | ||
102 | ret = 1; | 106 | ret = 1; |
103 | goto out; | 107 | goto out; |
104 | } | 108 | } |
105 | read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot), | 109 | if (item) |
106 | sizeof(*item)); | 110 | read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot), |
107 | memcpy(key, &found_key, sizeof(found_key)); | 111 | sizeof(*item)); |
112 | if (key) | ||
113 | memcpy(key, &found_key, sizeof(found_key)); | ||
108 | ret = 0; | 114 | ret = 0; |
109 | out: | 115 | out: |
110 | btrfs_free_path(path); | 116 | btrfs_free_path(path); |
@@ -249,6 +255,59 @@ err: | |||
249 | return ret; | 255 | return ret; |
250 | } | 256 | } |
251 | 257 | ||
258 | int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | ||
259 | { | ||
260 | struct extent_buffer *leaf; | ||
261 | struct btrfs_path *path; | ||
262 | struct btrfs_key key; | ||
263 | int err = 0; | ||
264 | int ret; | ||
265 | |||
266 | path = btrfs_alloc_path(); | ||
267 | if (!path) | ||
268 | return -ENOMEM; | ||
269 | |||
270 | key.objectid = BTRFS_ORPHAN_OBJECTID; | ||
271 | key.type = BTRFS_ORPHAN_ITEM_KEY; | ||
272 | key.offset = 0; | ||
273 | |||
274 | while (1) { | ||
275 | ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); | ||
276 | if (ret < 0) { | ||
277 | err = ret; | ||
278 | break; | ||
279 | } | ||
280 | |||
281 | leaf = path->nodes[0]; | ||
282 | if (path->slots[0] >= btrfs_header_nritems(leaf)) { | ||
283 | ret = btrfs_next_leaf(tree_root, path); | ||
284 | if (ret < 0) | ||
285 | err = ret; | ||
286 | if (ret != 0) | ||
287 | break; | ||
288 | leaf = path->nodes[0]; | ||
289 | } | ||
290 | |||
291 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
292 | btrfs_release_path(tree_root, path); | ||
293 | |||
294 | if (key.objectid != BTRFS_ORPHAN_OBJECTID || | ||
295 | key.type != BTRFS_ORPHAN_ITEM_KEY) | ||
296 | break; | ||
297 | |||
298 | ret = btrfs_find_dead_roots(tree_root, key.offset); | ||
299 | if (ret) { | ||
300 | err = ret; | ||
301 | break; | ||
302 | } | ||
303 | |||
304 | key.offset++; | ||
305 | } | ||
306 | |||
307 | btrfs_free_path(path); | ||
308 | return err; | ||
309 | } | ||
310 | |||
252 | /* drop the root item for 'key' from 'root' */ | 311 | /* drop the root item for 'key' from 'root' */ |
253 | int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 312 | int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
254 | struct btrfs_key *key) | 313 | struct btrfs_key *key) |
@@ -278,31 +337,57 @@ out: | |||
278 | return ret; | 337 | return ret; |
279 | } | 338 | } |
280 | 339 | ||
281 | #if 0 /* this will get used when snapshot deletion is implemented */ | ||
282 | int btrfs_del_root_ref(struct btrfs_trans_handle *trans, | 340 | int btrfs_del_root_ref(struct btrfs_trans_handle *trans, |
283 | struct btrfs_root *tree_root, | 341 | struct btrfs_root *tree_root, |
284 | u64 root_id, u8 type, u64 ref_id) | 342 | u64 root_id, u64 ref_id, u64 dirid, u64 *sequence, |
343 | const char *name, int name_len) | ||
344 | |||
285 | { | 345 | { |
346 | struct btrfs_path *path; | ||
347 | struct btrfs_root_ref *ref; | ||
348 | struct extent_buffer *leaf; | ||
286 | struct btrfs_key key; | 349 | struct btrfs_key key; |
350 | unsigned long ptr; | ||
351 | int err = 0; | ||
287 | int ret; | 352 | int ret; |
288 | struct btrfs_path *path; | ||
289 | 353 | ||
290 | path = btrfs_alloc_path(); | 354 | path = btrfs_alloc_path(); |
355 | if (!path) | ||
356 | return -ENOMEM; | ||
291 | 357 | ||
292 | key.objectid = root_id; | 358 | key.objectid = root_id; |
293 | key.type = type; | 359 | key.type = BTRFS_ROOT_BACKREF_KEY; |
294 | key.offset = ref_id; | 360 | key.offset = ref_id; |
295 | 361 | again: | |
296 | ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); | 362 | ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); |
297 | BUG_ON(ret); | 363 | BUG_ON(ret < 0); |
298 | 364 | if (ret == 0) { | |
299 | ret = btrfs_del_item(trans, tree_root, path); | 365 | leaf = path->nodes[0]; |
300 | BUG_ON(ret); | 366 | ref = btrfs_item_ptr(leaf, path->slots[0], |
367 | struct btrfs_root_ref); | ||
368 | |||
369 | WARN_ON(btrfs_root_ref_dirid(leaf, ref) != dirid); | ||
370 | WARN_ON(btrfs_root_ref_name_len(leaf, ref) != name_len); | ||
371 | ptr = (unsigned long)(ref + 1); | ||
372 | WARN_ON(memcmp_extent_buffer(leaf, name, ptr, name_len)); | ||
373 | *sequence = btrfs_root_ref_sequence(leaf, ref); | ||
374 | |||
375 | ret = btrfs_del_item(trans, tree_root, path); | ||
376 | BUG_ON(ret); | ||
377 | } else | ||
378 | err = -ENOENT; | ||
379 | |||
380 | if (key.type == BTRFS_ROOT_BACKREF_KEY) { | ||
381 | btrfs_release_path(tree_root, path); | ||
382 | key.objectid = ref_id; | ||
383 | key.type = BTRFS_ROOT_REF_KEY; | ||
384 | key.offset = root_id; | ||
385 | goto again; | ||
386 | } | ||
301 | 387 | ||
302 | btrfs_free_path(path); | 388 | btrfs_free_path(path); |
303 | return ret; | 389 | return err; |
304 | } | 390 | } |
305 | #endif | ||
306 | 391 | ||
307 | int btrfs_find_root_ref(struct btrfs_root *tree_root, | 392 | int btrfs_find_root_ref(struct btrfs_root *tree_root, |
308 | struct btrfs_path *path, | 393 | struct btrfs_path *path, |
@@ -319,7 +404,6 @@ int btrfs_find_root_ref(struct btrfs_root *tree_root, | |||
319 | return ret; | 404 | return ret; |
320 | } | 405 | } |
321 | 406 | ||
322 | |||
323 | /* | 407 | /* |
324 | * add a btrfs_root_ref item. type is either BTRFS_ROOT_REF_KEY | 408 | * add a btrfs_root_ref item. type is either BTRFS_ROOT_REF_KEY |
325 | * or BTRFS_ROOT_BACKREF_KEY. | 409 | * or BTRFS_ROOT_BACKREF_KEY. |
@@ -335,8 +419,7 @@ int btrfs_find_root_ref(struct btrfs_root *tree_root, | |||
335 | */ | 419 | */ |
336 | int btrfs_add_root_ref(struct btrfs_trans_handle *trans, | 420 | int btrfs_add_root_ref(struct btrfs_trans_handle *trans, |
337 | struct btrfs_root *tree_root, | 421 | struct btrfs_root *tree_root, |
338 | u64 root_id, u8 type, u64 ref_id, | 422 | u64 root_id, u64 ref_id, u64 dirid, u64 sequence, |
339 | u64 dirid, u64 sequence, | ||
340 | const char *name, int name_len) | 423 | const char *name, int name_len) |
341 | { | 424 | { |
342 | struct btrfs_key key; | 425 | struct btrfs_key key; |
@@ -346,13 +429,14 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, | |||
346 | struct extent_buffer *leaf; | 429 | struct extent_buffer *leaf; |
347 | unsigned long ptr; | 430 | unsigned long ptr; |
348 | 431 | ||
349 | |||
350 | path = btrfs_alloc_path(); | 432 | path = btrfs_alloc_path(); |
433 | if (!path) | ||
434 | return -ENOMEM; | ||
351 | 435 | ||
352 | key.objectid = root_id; | 436 | key.objectid = root_id; |
353 | key.type = type; | 437 | key.type = BTRFS_ROOT_BACKREF_KEY; |
354 | key.offset = ref_id; | 438 | key.offset = ref_id; |
355 | 439 | again: | |
356 | ret = btrfs_insert_empty_item(trans, tree_root, path, &key, | 440 | ret = btrfs_insert_empty_item(trans, tree_root, path, &key, |
357 | sizeof(*ref) + name_len); | 441 | sizeof(*ref) + name_len); |
358 | BUG_ON(ret); | 442 | BUG_ON(ret); |
@@ -366,6 +450,14 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, | |||
366 | write_extent_buffer(leaf, name, ptr, name_len); | 450 | write_extent_buffer(leaf, name, ptr, name_len); |
367 | btrfs_mark_buffer_dirty(leaf); | 451 | btrfs_mark_buffer_dirty(leaf); |
368 | 452 | ||
453 | if (key.type == BTRFS_ROOT_BACKREF_KEY) { | ||
454 | btrfs_release_path(tree_root, path); | ||
455 | key.objectid = ref_id; | ||
456 | key.type = BTRFS_ROOT_REF_KEY; | ||
457 | key.offset = root_id; | ||
458 | goto again; | ||
459 | } | ||
460 | |||
369 | btrfs_free_path(path); | 461 | btrfs_free_path(path); |
370 | return ret; | 462 | return 0; |
371 | } | 463 | } |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2db17cd66fc5..67035385444c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -676,6 +676,7 @@ static int btrfs_unfreeze(struct super_block *sb) | |||
676 | } | 676 | } |
677 | 677 | ||
678 | static const struct super_operations btrfs_super_ops = { | 678 | static const struct super_operations btrfs_super_ops = { |
679 | .drop_inode = btrfs_drop_inode, | ||
679 | .delete_inode = btrfs_delete_inode, | 680 | .delete_inode = btrfs_delete_inode, |
680 | .put_super = btrfs_put_super, | 681 | .put_super = btrfs_put_super, |
681 | .sync_fs = btrfs_sync_fs, | 682 | .sync_fs = btrfs_sync_fs, |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index cdbb5022da52..88f866f85e7a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -104,7 +104,6 @@ static noinline int record_root_in_trans(struct btrfs_trans_handle *trans, | |||
104 | { | 104 | { |
105 | if (root->ref_cows && root->last_trans < trans->transid) { | 105 | if (root->ref_cows && root->last_trans < trans->transid) { |
106 | WARN_ON(root == root->fs_info->extent_root); | 106 | WARN_ON(root == root->fs_info->extent_root); |
107 | WARN_ON(root->root_item.refs == 0); | ||
108 | WARN_ON(root->commit_root != root->node); | 107 | WARN_ON(root->commit_root != root->node); |
109 | 108 | ||
110 | radix_tree_tag_set(&root->fs_info->fs_roots_radix, | 109 | radix_tree_tag_set(&root->fs_info->fs_roots_radix, |
@@ -720,7 +719,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
720 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); | 719 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); |
721 | 720 | ||
722 | key.objectid = objectid; | 721 | key.objectid = objectid; |
723 | key.offset = 0; | 722 | /* record when the snapshot was created in key.offset */ |
723 | key.offset = trans->transid; | ||
724 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | 724 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); |
725 | 725 | ||
726 | old = btrfs_lock_root_node(root); | 726 | old = btrfs_lock_root_node(root); |
@@ -778,24 +778,14 @@ static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info, | |||
778 | ret = btrfs_update_inode(trans, parent_root, parent_inode); | 778 | ret = btrfs_update_inode(trans, parent_root, parent_inode); |
779 | BUG_ON(ret); | 779 | BUG_ON(ret); |
780 | 780 | ||
781 | /* add the backref first */ | ||
782 | ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, | 781 | ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, |
783 | pending->root_key.objectid, | 782 | pending->root_key.objectid, |
784 | BTRFS_ROOT_BACKREF_KEY, | ||
785 | parent_root->root_key.objectid, | 783 | parent_root->root_key.objectid, |
786 | parent_inode->i_ino, index, pending->name, | 784 | parent_inode->i_ino, index, pending->name, |
787 | namelen); | 785 | namelen); |
788 | 786 | ||
789 | BUG_ON(ret); | 787 | BUG_ON(ret); |
790 | 788 | ||
791 | /* now add the forward ref */ | ||
792 | ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, | ||
793 | parent_root->root_key.objectid, | ||
794 | BTRFS_ROOT_REF_KEY, | ||
795 | pending->root_key.objectid, | ||
796 | parent_inode->i_ino, index, pending->name, | ||
797 | namelen); | ||
798 | |||
799 | inode = btrfs_lookup_dentry(parent_inode, pending->dentry); | 789 | inode = btrfs_lookup_dentry(parent_inode, pending->dentry); |
800 | d_instantiate(pending->dentry, inode); | 790 | d_instantiate(pending->dentry, inode); |
801 | fail: | 791 | fail: |
@@ -874,7 +864,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
874 | unsigned long timeout = 1; | 864 | unsigned long timeout = 1; |
875 | struct btrfs_transaction *cur_trans; | 865 | struct btrfs_transaction *cur_trans; |
876 | struct btrfs_transaction *prev_trans = NULL; | 866 | struct btrfs_transaction *prev_trans = NULL; |
877 | struct extent_io_tree *pinned_copy; | ||
878 | DEFINE_WAIT(wait); | 867 | DEFINE_WAIT(wait); |
879 | int ret; | 868 | int ret; |
880 | int should_grow = 0; | 869 | int should_grow = 0; |
@@ -915,13 +904,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
915 | return 0; | 904 | return 0; |
916 | } | 905 | } |
917 | 906 | ||
918 | pinned_copy = kmalloc(sizeof(*pinned_copy), GFP_NOFS); | ||
919 | if (!pinned_copy) | ||
920 | return -ENOMEM; | ||
921 | |||
922 | extent_io_tree_init(pinned_copy, | ||
923 | root->fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
924 | |||
925 | trans->transaction->in_commit = 1; | 907 | trans->transaction->in_commit = 1; |
926 | trans->transaction->blocked = 1; | 908 | trans->transaction->blocked = 1; |
927 | if (cur_trans->list.prev != &root->fs_info->trans_list) { | 909 | if (cur_trans->list.prev != &root->fs_info->trans_list) { |
@@ -1019,6 +1001,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1019 | ret = commit_cowonly_roots(trans, root); | 1001 | ret = commit_cowonly_roots(trans, root); |
1020 | BUG_ON(ret); | 1002 | BUG_ON(ret); |
1021 | 1003 | ||
1004 | btrfs_prepare_extent_commit(trans, root); | ||
1005 | |||
1022 | cur_trans = root->fs_info->running_transaction; | 1006 | cur_trans = root->fs_info->running_transaction; |
1023 | spin_lock(&root->fs_info->new_trans_lock); | 1007 | spin_lock(&root->fs_info->new_trans_lock); |
1024 | root->fs_info->running_transaction = NULL; | 1008 | root->fs_info->running_transaction = NULL; |
@@ -1042,8 +1026,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1042 | memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy, | 1026 | memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy, |
1043 | sizeof(root->fs_info->super_copy)); | 1027 | sizeof(root->fs_info->super_copy)); |
1044 | 1028 | ||
1045 | btrfs_copy_pinned(root, pinned_copy); | ||
1046 | |||
1047 | trans->transaction->blocked = 0; | 1029 | trans->transaction->blocked = 0; |
1048 | 1030 | ||
1049 | wake_up(&root->fs_info->transaction_wait); | 1031 | wake_up(&root->fs_info->transaction_wait); |
@@ -1059,8 +1041,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1059 | */ | 1041 | */ |
1060 | mutex_unlock(&root->fs_info->tree_log_mutex); | 1042 | mutex_unlock(&root->fs_info->tree_log_mutex); |
1061 | 1043 | ||
1062 | btrfs_finish_extent_commit(trans, root, pinned_copy); | 1044 | btrfs_finish_extent_commit(trans, root); |
1063 | kfree(pinned_copy); | ||
1064 | 1045 | ||
1065 | /* do the directory inserts of any pending snapshot creations */ | 1046 | /* do the directory inserts of any pending snapshot creations */ |
1066 | finish_pending_snapshots(trans, root->fs_info); | 1047 | finish_pending_snapshots(trans, root->fs_info); |
@@ -1096,8 +1077,13 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) | |||
1096 | 1077 | ||
1097 | while (!list_empty(&list)) { | 1078 | while (!list_empty(&list)) { |
1098 | root = list_entry(list.next, struct btrfs_root, root_list); | 1079 | root = list_entry(list.next, struct btrfs_root, root_list); |
1099 | list_del_init(&root->root_list); | 1080 | list_del(&root->root_list); |
1100 | btrfs_drop_snapshot(root, 0); | 1081 | |
1082 | if (btrfs_header_backref_rev(root->node) < | ||
1083 | BTRFS_MIXED_BACKREF_REV) | ||
1084 | btrfs_drop_snapshot(root, 0); | ||
1085 | else | ||
1086 | btrfs_drop_snapshot(root, 1); | ||
1101 | } | 1087 | } |
1102 | return 0; | 1088 | return 0; |
1103 | } | 1089 | } |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 30c0d45c1b5e..7827841b55cb 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -263,8 +263,8 @@ static int process_one_buffer(struct btrfs_root *log, | |||
263 | struct walk_control *wc, u64 gen) | 263 | struct walk_control *wc, u64 gen) |
264 | { | 264 | { |
265 | if (wc->pin) | 265 | if (wc->pin) |
266 | btrfs_update_pinned_extents(log->fs_info->extent_root, | 266 | btrfs_pin_extent(log->fs_info->extent_root, |
267 | eb->start, eb->len, 1); | 267 | eb->start, eb->len, 0); |
268 | 268 | ||
269 | if (btrfs_buffer_uptodate(eb, gen)) { | 269 | if (btrfs_buffer_uptodate(eb, gen)) { |
270 | if (wc->write) | 270 | if (wc->write) |
@@ -534,7 +534,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
534 | saved_nbytes = inode_get_bytes(inode); | 534 | saved_nbytes = inode_get_bytes(inode); |
535 | /* drop any overlapping extents */ | 535 | /* drop any overlapping extents */ |
536 | ret = btrfs_drop_extents(trans, root, inode, | 536 | ret = btrfs_drop_extents(trans, root, inode, |
537 | start, extent_end, extent_end, start, &alloc_hint); | 537 | start, extent_end, extent_end, start, &alloc_hint, 1); |
538 | BUG_ON(ret); | 538 | BUG_ON(ret); |
539 | 539 | ||
540 | if (found_type == BTRFS_FILE_EXTENT_REG || | 540 | if (found_type == BTRFS_FILE_EXTENT_REG || |
@@ -2841,7 +2841,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, | |||
2841 | if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) | 2841 | if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) |
2842 | break; | 2842 | break; |
2843 | 2843 | ||
2844 | if (parent == sb->s_root) | 2844 | if (IS_ROOT(parent)) |
2845 | break; | 2845 | break; |
2846 | 2846 | ||
2847 | parent = parent->d_parent; | 2847 | parent = parent->d_parent; |
@@ -2880,6 +2880,12 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
2880 | goto end_no_trans; | 2880 | goto end_no_trans; |
2881 | } | 2881 | } |
2882 | 2882 | ||
2883 | if (root != BTRFS_I(inode)->root || | ||
2884 | btrfs_root_refs(&root->root_item) == 0) { | ||
2885 | ret = 1; | ||
2886 | goto end_no_trans; | ||
2887 | } | ||
2888 | |||
2883 | ret = check_parent_dirs_for_sync(trans, inode, parent, | 2889 | ret = check_parent_dirs_for_sync(trans, inode, parent, |
2884 | sb, last_committed); | 2890 | sb, last_committed); |
2885 | if (ret) | 2891 | if (ret) |
@@ -2907,12 +2913,15 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
2907 | break; | 2913 | break; |
2908 | 2914 | ||
2909 | inode = parent->d_inode; | 2915 | inode = parent->d_inode; |
2916 | if (root != BTRFS_I(inode)->root) | ||
2917 | break; | ||
2918 | |||
2910 | if (BTRFS_I(inode)->generation > | 2919 | if (BTRFS_I(inode)->generation > |
2911 | root->fs_info->last_trans_committed) { | 2920 | root->fs_info->last_trans_committed) { |
2912 | ret = btrfs_log_inode(trans, root, inode, inode_only); | 2921 | ret = btrfs_log_inode(trans, root, inode, inode_only); |
2913 | BUG_ON(ret); | 2922 | BUG_ON(ret); |
2914 | } | 2923 | } |
2915 | if (parent == sb->s_root) | 2924 | if (IS_ROOT(parent)) |
2916 | break; | 2925 | break; |
2917 | 2926 | ||
2918 | parent = parent->d_parent; | 2927 | parent = parent->d_parent; |
@@ -2951,7 +2960,6 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) | |||
2951 | struct btrfs_key tmp_key; | 2960 | struct btrfs_key tmp_key; |
2952 | struct btrfs_root *log; | 2961 | struct btrfs_root *log; |
2953 | struct btrfs_fs_info *fs_info = log_root_tree->fs_info; | 2962 | struct btrfs_fs_info *fs_info = log_root_tree->fs_info; |
2954 | u64 highest_inode; | ||
2955 | struct walk_control wc = { | 2963 | struct walk_control wc = { |
2956 | .process_func = process_one_buffer, | 2964 | .process_func = process_one_buffer, |
2957 | .stage = 0, | 2965 | .stage = 0, |
@@ -3010,11 +3018,6 @@ again: | |||
3010 | path); | 3018 | path); |
3011 | BUG_ON(ret); | 3019 | BUG_ON(ret); |
3012 | } | 3020 | } |
3013 | ret = btrfs_find_highest_inode(wc.replay_dest, &highest_inode); | ||
3014 | if (ret == 0) { | ||
3015 | wc.replay_dest->highest_inode = highest_inode; | ||
3016 | wc.replay_dest->last_inode_alloc = highest_inode; | ||
3017 | } | ||
3018 | 3021 | ||
3019 | key.offset = found_key.offset - 1; | 3022 | key.offset = found_key.offset - 1; |
3020 | wc.replay_dest->log_root = NULL; | 3023 | wc.replay_dest->log_root = NULL; |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5cf405b0828d..23e7d36ff325 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -276,7 +276,7 @@ loop_lock: | |||
276 | * is now congested. Back off and let other work structs | 276 | * is now congested. Back off and let other work structs |
277 | * run instead | 277 | * run instead |
278 | */ | 278 | */ |
279 | if (pending && bdi_write_congested(bdi) && batch_run > 32 && | 279 | if (pending && bdi_write_congested(bdi) && batch_run > 8 && |
280 | fs_info->fs_devices->open_devices > 1) { | 280 | fs_info->fs_devices->open_devices > 1) { |
281 | struct io_context *ioc; | 281 | struct io_context *ioc; |
282 | 282 | ||
@@ -719,10 +719,9 @@ error: | |||
719 | * called very infrequently and that a given device has a small number | 719 | * called very infrequently and that a given device has a small number |
720 | * of extents | 720 | * of extents |
721 | */ | 721 | */ |
722 | static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans, | 722 | int find_free_dev_extent(struct btrfs_trans_handle *trans, |
723 | struct btrfs_device *device, | 723 | struct btrfs_device *device, u64 num_bytes, |
724 | u64 num_bytes, u64 *start, | 724 | u64 *start, u64 *max_avail) |
725 | u64 *max_avail) | ||
726 | { | 725 | { |
727 | struct btrfs_key key; | 726 | struct btrfs_key key; |
728 | struct btrfs_root *root = device->dev_root; | 727 | struct btrfs_root *root = device->dev_root; |
@@ -1736,6 +1735,10 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, | |||
1736 | extent_root = root->fs_info->extent_root; | 1735 | extent_root = root->fs_info->extent_root; |
1737 | em_tree = &root->fs_info->mapping_tree.map_tree; | 1736 | em_tree = &root->fs_info->mapping_tree.map_tree; |
1738 | 1737 | ||
1738 | ret = btrfs_can_relocate(extent_root, chunk_offset); | ||
1739 | if (ret) | ||
1740 | return -ENOSPC; | ||
1741 | |||
1739 | /* step one, relocate all the extents inside this chunk */ | 1742 | /* step one, relocate all the extents inside this chunk */ |
1740 | ret = btrfs_relocate_block_group(extent_root, chunk_offset); | 1743 | ret = btrfs_relocate_block_group(extent_root, chunk_offset); |
1741 | BUG_ON(ret); | 1744 | BUG_ON(ret); |
@@ -1749,9 +1752,9 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, | |||
1749 | * step two, delete the device extents and the | 1752 | * step two, delete the device extents and the |
1750 | * chunk tree entries | 1753 | * chunk tree entries |
1751 | */ | 1754 | */ |
1752 | spin_lock(&em_tree->lock); | 1755 | read_lock(&em_tree->lock); |
1753 | em = lookup_extent_mapping(em_tree, chunk_offset, 1); | 1756 | em = lookup_extent_mapping(em_tree, chunk_offset, 1); |
1754 | spin_unlock(&em_tree->lock); | 1757 | read_unlock(&em_tree->lock); |
1755 | 1758 | ||
1756 | BUG_ON(em->start > chunk_offset || | 1759 | BUG_ON(em->start > chunk_offset || |
1757 | em->start + em->len < chunk_offset); | 1760 | em->start + em->len < chunk_offset); |
@@ -1780,9 +1783,9 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, | |||
1780 | ret = btrfs_remove_block_group(trans, extent_root, chunk_offset); | 1783 | ret = btrfs_remove_block_group(trans, extent_root, chunk_offset); |
1781 | BUG_ON(ret); | 1784 | BUG_ON(ret); |
1782 | 1785 | ||
1783 | spin_lock(&em_tree->lock); | 1786 | write_lock(&em_tree->lock); |
1784 | remove_extent_mapping(em_tree, em); | 1787 | remove_extent_mapping(em_tree, em); |
1785 | spin_unlock(&em_tree->lock); | 1788 | write_unlock(&em_tree->lock); |
1786 | 1789 | ||
1787 | kfree(map); | 1790 | kfree(map); |
1788 | em->bdev = NULL; | 1791 | em->bdev = NULL; |
@@ -1807,12 +1810,15 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root) | |||
1807 | struct btrfs_key found_key; | 1810 | struct btrfs_key found_key; |
1808 | u64 chunk_tree = chunk_root->root_key.objectid; | 1811 | u64 chunk_tree = chunk_root->root_key.objectid; |
1809 | u64 chunk_type; | 1812 | u64 chunk_type; |
1813 | bool retried = false; | ||
1814 | int failed = 0; | ||
1810 | int ret; | 1815 | int ret; |
1811 | 1816 | ||
1812 | path = btrfs_alloc_path(); | 1817 | path = btrfs_alloc_path(); |
1813 | if (!path) | 1818 | if (!path) |
1814 | return -ENOMEM; | 1819 | return -ENOMEM; |
1815 | 1820 | ||
1821 | again: | ||
1816 | key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; | 1822 | key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; |
1817 | key.offset = (u64)-1; | 1823 | key.offset = (u64)-1; |
1818 | key.type = BTRFS_CHUNK_ITEM_KEY; | 1824 | key.type = BTRFS_CHUNK_ITEM_KEY; |
@@ -1842,7 +1848,10 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root) | |||
1842 | ret = btrfs_relocate_chunk(chunk_root, chunk_tree, | 1848 | ret = btrfs_relocate_chunk(chunk_root, chunk_tree, |
1843 | found_key.objectid, | 1849 | found_key.objectid, |
1844 | found_key.offset); | 1850 | found_key.offset); |
1845 | BUG_ON(ret); | 1851 | if (ret == -ENOSPC) |
1852 | failed++; | ||
1853 | else if (ret) | ||
1854 | BUG(); | ||
1846 | } | 1855 | } |
1847 | 1856 | ||
1848 | if (found_key.offset == 0) | 1857 | if (found_key.offset == 0) |
@@ -1850,6 +1859,14 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root) | |||
1850 | key.offset = found_key.offset - 1; | 1859 | key.offset = found_key.offset - 1; |
1851 | } | 1860 | } |
1852 | ret = 0; | 1861 | ret = 0; |
1862 | if (failed && !retried) { | ||
1863 | failed = 0; | ||
1864 | retried = true; | ||
1865 | goto again; | ||
1866 | } else if (failed && retried) { | ||
1867 | WARN_ON(1); | ||
1868 | ret = -ENOSPC; | ||
1869 | } | ||
1853 | error: | 1870 | error: |
1854 | btrfs_free_path(path); | 1871 | btrfs_free_path(path); |
1855 | return ret; | 1872 | return ret; |
@@ -1894,6 +1911,8 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
1894 | continue; | 1911 | continue; |
1895 | 1912 | ||
1896 | ret = btrfs_shrink_device(device, old_size - size_to_free); | 1913 | ret = btrfs_shrink_device(device, old_size - size_to_free); |
1914 | if (ret == -ENOSPC) | ||
1915 | break; | ||
1897 | BUG_ON(ret); | 1916 | BUG_ON(ret); |
1898 | 1917 | ||
1899 | trans = btrfs_start_transaction(dev_root, 1); | 1918 | trans = btrfs_start_transaction(dev_root, 1); |
@@ -1938,9 +1957,8 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
1938 | chunk = btrfs_item_ptr(path->nodes[0], | 1957 | chunk = btrfs_item_ptr(path->nodes[0], |
1939 | path->slots[0], | 1958 | path->slots[0], |
1940 | struct btrfs_chunk); | 1959 | struct btrfs_chunk); |
1941 | key.offset = found_key.offset; | ||
1942 | /* chunk zero is special */ | 1960 | /* chunk zero is special */ |
1943 | if (key.offset == 0) | 1961 | if (found_key.offset == 0) |
1944 | break; | 1962 | break; |
1945 | 1963 | ||
1946 | btrfs_release_path(chunk_root, path); | 1964 | btrfs_release_path(chunk_root, path); |
@@ -1948,7 +1966,8 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
1948 | chunk_root->root_key.objectid, | 1966 | chunk_root->root_key.objectid, |
1949 | found_key.objectid, | 1967 | found_key.objectid, |
1950 | found_key.offset); | 1968 | found_key.offset); |
1951 | BUG_ON(ret); | 1969 | BUG_ON(ret && ret != -ENOSPC); |
1970 | key.offset = found_key.offset - 1; | ||
1952 | } | 1971 | } |
1953 | ret = 0; | 1972 | ret = 0; |
1954 | error: | 1973 | error: |
@@ -1974,10 +1993,13 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
1974 | u64 chunk_offset; | 1993 | u64 chunk_offset; |
1975 | int ret; | 1994 | int ret; |
1976 | int slot; | 1995 | int slot; |
1996 | int failed = 0; | ||
1997 | bool retried = false; | ||
1977 | struct extent_buffer *l; | 1998 | struct extent_buffer *l; |
1978 | struct btrfs_key key; | 1999 | struct btrfs_key key; |
1979 | struct btrfs_super_block *super_copy = &root->fs_info->super_copy; | 2000 | struct btrfs_super_block *super_copy = &root->fs_info->super_copy; |
1980 | u64 old_total = btrfs_super_total_bytes(super_copy); | 2001 | u64 old_total = btrfs_super_total_bytes(super_copy); |
2002 | u64 old_size = device->total_bytes; | ||
1981 | u64 diff = device->total_bytes - new_size; | 2003 | u64 diff = device->total_bytes - new_size; |
1982 | 2004 | ||
1983 | if (new_size >= device->total_bytes) | 2005 | if (new_size >= device->total_bytes) |
@@ -1987,12 +2009,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
1987 | if (!path) | 2009 | if (!path) |
1988 | return -ENOMEM; | 2010 | return -ENOMEM; |
1989 | 2011 | ||
1990 | trans = btrfs_start_transaction(root, 1); | ||
1991 | if (!trans) { | ||
1992 | ret = -ENOMEM; | ||
1993 | goto done; | ||
1994 | } | ||
1995 | |||
1996 | path->reada = 2; | 2012 | path->reada = 2; |
1997 | 2013 | ||
1998 | lock_chunks(root); | 2014 | lock_chunks(root); |
@@ -2001,8 +2017,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
2001 | if (device->writeable) | 2017 | if (device->writeable) |
2002 | device->fs_devices->total_rw_bytes -= diff; | 2018 | device->fs_devices->total_rw_bytes -= diff; |
2003 | unlock_chunks(root); | 2019 | unlock_chunks(root); |
2004 | btrfs_end_transaction(trans, root); | ||
2005 | 2020 | ||
2021 | again: | ||
2006 | key.objectid = device->devid; | 2022 | key.objectid = device->devid; |
2007 | key.offset = (u64)-1; | 2023 | key.offset = (u64)-1; |
2008 | key.type = BTRFS_DEV_EXTENT_KEY; | 2024 | key.type = BTRFS_DEV_EXTENT_KEY; |
@@ -2017,6 +2033,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
2017 | goto done; | 2033 | goto done; |
2018 | if (ret) { | 2034 | if (ret) { |
2019 | ret = 0; | 2035 | ret = 0; |
2036 | btrfs_release_path(root, path); | ||
2020 | break; | 2037 | break; |
2021 | } | 2038 | } |
2022 | 2039 | ||
@@ -2024,14 +2041,18 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
2024 | slot = path->slots[0]; | 2041 | slot = path->slots[0]; |
2025 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); | 2042 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); |
2026 | 2043 | ||
2027 | if (key.objectid != device->devid) | 2044 | if (key.objectid != device->devid) { |
2045 | btrfs_release_path(root, path); | ||
2028 | break; | 2046 | break; |
2047 | } | ||
2029 | 2048 | ||
2030 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); | 2049 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); |
2031 | length = btrfs_dev_extent_length(l, dev_extent); | 2050 | length = btrfs_dev_extent_length(l, dev_extent); |
2032 | 2051 | ||
2033 | if (key.offset + length <= new_size) | 2052 | if (key.offset + length <= new_size) { |
2053 | btrfs_release_path(root, path); | ||
2034 | break; | 2054 | break; |
2055 | } | ||
2035 | 2056 | ||
2036 | chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); | 2057 | chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); |
2037 | chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); | 2058 | chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); |
@@ -2040,8 +2061,26 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
2040 | 2061 | ||
2041 | ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid, | 2062 | ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid, |
2042 | chunk_offset); | 2063 | chunk_offset); |
2043 | if (ret) | 2064 | if (ret && ret != -ENOSPC) |
2044 | goto done; | 2065 | goto done; |
2066 | if (ret == -ENOSPC) | ||
2067 | failed++; | ||
2068 | key.offset -= 1; | ||
2069 | } | ||
2070 | |||
2071 | if (failed && !retried) { | ||
2072 | failed = 0; | ||
2073 | retried = true; | ||
2074 | goto again; | ||
2075 | } else if (failed && retried) { | ||
2076 | ret = -ENOSPC; | ||
2077 | lock_chunks(root); | ||
2078 | |||
2079 | device->total_bytes = old_size; | ||
2080 | if (device->writeable) | ||
2081 | device->fs_devices->total_rw_bytes += diff; | ||
2082 | unlock_chunks(root); | ||
2083 | goto done; | ||
2045 | } | 2084 | } |
2046 | 2085 | ||
2047 | /* Shrinking succeeded, else we would be at "done". */ | 2086 | /* Shrinking succeeded, else we would be at "done". */ |
@@ -2294,9 +2333,9 @@ again: | |||
2294 | em->block_len = em->len; | 2333 | em->block_len = em->len; |
2295 | 2334 | ||
2296 | em_tree = &extent_root->fs_info->mapping_tree.map_tree; | 2335 | em_tree = &extent_root->fs_info->mapping_tree.map_tree; |
2297 | spin_lock(&em_tree->lock); | 2336 | write_lock(&em_tree->lock); |
2298 | ret = add_extent_mapping(em_tree, em); | 2337 | ret = add_extent_mapping(em_tree, em); |
2299 | spin_unlock(&em_tree->lock); | 2338 | write_unlock(&em_tree->lock); |
2300 | BUG_ON(ret); | 2339 | BUG_ON(ret); |
2301 | free_extent_map(em); | 2340 | free_extent_map(em); |
2302 | 2341 | ||
@@ -2491,9 +2530,9 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset) | |||
2491 | int readonly = 0; | 2530 | int readonly = 0; |
2492 | int i; | 2531 | int i; |
2493 | 2532 | ||
2494 | spin_lock(&map_tree->map_tree.lock); | 2533 | read_lock(&map_tree->map_tree.lock); |
2495 | em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); | 2534 | em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); |
2496 | spin_unlock(&map_tree->map_tree.lock); | 2535 | read_unlock(&map_tree->map_tree.lock); |
2497 | if (!em) | 2536 | if (!em) |
2498 | return 1; | 2537 | return 1; |
2499 | 2538 | ||
@@ -2518,11 +2557,11 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) | |||
2518 | struct extent_map *em; | 2557 | struct extent_map *em; |
2519 | 2558 | ||
2520 | while (1) { | 2559 | while (1) { |
2521 | spin_lock(&tree->map_tree.lock); | 2560 | write_lock(&tree->map_tree.lock); |
2522 | em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1); | 2561 | em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1); |
2523 | if (em) | 2562 | if (em) |
2524 | remove_extent_mapping(&tree->map_tree, em); | 2563 | remove_extent_mapping(&tree->map_tree, em); |
2525 | spin_unlock(&tree->map_tree.lock); | 2564 | write_unlock(&tree->map_tree.lock); |
2526 | if (!em) | 2565 | if (!em) |
2527 | break; | 2566 | break; |
2528 | kfree(em->bdev); | 2567 | kfree(em->bdev); |
@@ -2540,9 +2579,9 @@ int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len) | |||
2540 | struct extent_map_tree *em_tree = &map_tree->map_tree; | 2579 | struct extent_map_tree *em_tree = &map_tree->map_tree; |
2541 | int ret; | 2580 | int ret; |
2542 | 2581 | ||
2543 | spin_lock(&em_tree->lock); | 2582 | read_lock(&em_tree->lock); |
2544 | em = lookup_extent_mapping(em_tree, logical, len); | 2583 | em = lookup_extent_mapping(em_tree, logical, len); |
2545 | spin_unlock(&em_tree->lock); | 2584 | read_unlock(&em_tree->lock); |
2546 | BUG_ON(!em); | 2585 | BUG_ON(!em); |
2547 | 2586 | ||
2548 | BUG_ON(em->start > logical || em->start + em->len < logical); | 2587 | BUG_ON(em->start > logical || em->start + em->len < logical); |
@@ -2604,9 +2643,9 @@ again: | |||
2604 | atomic_set(&multi->error, 0); | 2643 | atomic_set(&multi->error, 0); |
2605 | } | 2644 | } |
2606 | 2645 | ||
2607 | spin_lock(&em_tree->lock); | 2646 | read_lock(&em_tree->lock); |
2608 | em = lookup_extent_mapping(em_tree, logical, *length); | 2647 | em = lookup_extent_mapping(em_tree, logical, *length); |
2609 | spin_unlock(&em_tree->lock); | 2648 | read_unlock(&em_tree->lock); |
2610 | 2649 | ||
2611 | if (!em && unplug_page) | 2650 | if (!em && unplug_page) |
2612 | return 0; | 2651 | return 0; |
@@ -2763,9 +2802,9 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | |||
2763 | u64 stripe_nr; | 2802 | u64 stripe_nr; |
2764 | int i, j, nr = 0; | 2803 | int i, j, nr = 0; |
2765 | 2804 | ||
2766 | spin_lock(&em_tree->lock); | 2805 | read_lock(&em_tree->lock); |
2767 | em = lookup_extent_mapping(em_tree, chunk_start, 1); | 2806 | em = lookup_extent_mapping(em_tree, chunk_start, 1); |
2768 | spin_unlock(&em_tree->lock); | 2807 | read_unlock(&em_tree->lock); |
2769 | 2808 | ||
2770 | BUG_ON(!em || em->start != chunk_start); | 2809 | BUG_ON(!em || em->start != chunk_start); |
2771 | map = (struct map_lookup *)em->bdev; | 2810 | map = (struct map_lookup *)em->bdev; |
@@ -3053,9 +3092,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, | |||
3053 | logical = key->offset; | 3092 | logical = key->offset; |
3054 | length = btrfs_chunk_length(leaf, chunk); | 3093 | length = btrfs_chunk_length(leaf, chunk); |
3055 | 3094 | ||
3056 | spin_lock(&map_tree->map_tree.lock); | 3095 | read_lock(&map_tree->map_tree.lock); |
3057 | em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); | 3096 | em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); |
3058 | spin_unlock(&map_tree->map_tree.lock); | 3097 | read_unlock(&map_tree->map_tree.lock); |
3059 | 3098 | ||
3060 | /* already mapped? */ | 3099 | /* already mapped? */ |
3061 | if (em && em->start <= logical && em->start + em->len > logical) { | 3100 | if (em && em->start <= logical && em->start + em->len > logical) { |
@@ -3114,9 +3153,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, | |||
3114 | map->stripes[i].dev->in_fs_metadata = 1; | 3153 | map->stripes[i].dev->in_fs_metadata = 1; |
3115 | } | 3154 | } |
3116 | 3155 | ||
3117 | spin_lock(&map_tree->map_tree.lock); | 3156 | write_lock(&map_tree->map_tree.lock); |
3118 | ret = add_extent_mapping(&map_tree->map_tree, em); | 3157 | ret = add_extent_mapping(&map_tree->map_tree, em); |
3119 | spin_unlock(&map_tree->map_tree.lock); | 3158 | write_unlock(&map_tree->map_tree.lock); |
3120 | BUG_ON(ret); | 3159 | BUG_ON(ret); |
3121 | free_extent_map(em); | 3160 | free_extent_map(em); |
3122 | 3161 | ||
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 5139a833f721..31b0fabdd2ea 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -181,4 +181,7 @@ int btrfs_balance(struct btrfs_root *dev_root); | |||
181 | void btrfs_unlock_volumes(void); | 181 | void btrfs_unlock_volumes(void); |
182 | void btrfs_lock_volumes(void); | 182 | void btrfs_lock_volumes(void); |
183 | int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); | 183 | int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); |
184 | int find_free_dev_extent(struct btrfs_trans_handle *trans, | ||
185 | struct btrfs_device *device, u64 num_bytes, | ||
186 | u64 *start, u64 *max_avail); | ||
184 | #endif | 187 | #endif |
diff --git a/fs/buffer.c b/fs/buffer.c index 209f7f15f5f8..24afd7422ae8 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -2239,16 +2239,10 @@ int generic_cont_expand_simple(struct inode *inode, loff_t size) | |||
2239 | struct address_space *mapping = inode->i_mapping; | 2239 | struct address_space *mapping = inode->i_mapping; |
2240 | struct page *page; | 2240 | struct page *page; |
2241 | void *fsdata; | 2241 | void *fsdata; |
2242 | unsigned long limit; | ||
2243 | int err; | 2242 | int err; |
2244 | 2243 | ||
2245 | err = -EFBIG; | 2244 | err = inode_newsize_ok(inode, size); |
2246 | limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; | 2245 | if (err) |
2247 | if (limit != RLIM_INFINITY && size > (loff_t)limit) { | ||
2248 | send_sig(SIGXFSZ, current, 0); | ||
2249 | goto out; | ||
2250 | } | ||
2251 | if (size > inode->i_sb->s_maxbytes) | ||
2252 | goto out; | 2246 | goto out; |
2253 | 2247 | ||
2254 | err = pagecache_write_begin(NULL, mapping, size, 0, | 2248 | err = pagecache_write_begin(NULL, mapping, size, 0, |
diff --git a/fs/char_dev.c b/fs/char_dev.c index 3cbc57f932d2..d6db933df2b2 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c | |||
@@ -264,7 +264,6 @@ int __register_chrdev(unsigned int major, unsigned int baseminor, | |||
264 | { | 264 | { |
265 | struct char_device_struct *cd; | 265 | struct char_device_struct *cd; |
266 | struct cdev *cdev; | 266 | struct cdev *cdev; |
267 | char *s; | ||
268 | int err = -ENOMEM; | 267 | int err = -ENOMEM; |
269 | 268 | ||
270 | cd = __register_chrdev_region(major, baseminor, count, name); | 269 | cd = __register_chrdev_region(major, baseminor, count, name); |
@@ -278,8 +277,6 @@ int __register_chrdev(unsigned int major, unsigned int baseminor, | |||
278 | cdev->owner = fops->owner; | 277 | cdev->owner = fops->owner; |
279 | cdev->ops = fops; | 278 | cdev->ops = fops; |
280 | kobject_set_name(&cdev->kobj, "%s", name); | 279 | kobject_set_name(&cdev->kobj, "%s", name); |
281 | for (s = strchr(kobject_name(&cdev->kobj),'/'); s; s = strchr(s, '/')) | ||
282 | *s = '!'; | ||
283 | 280 | ||
284 | err = cdev_add(cdev, MKDEV(cd->major, baseminor), count); | 281 | err = cdev_add(cdev, MKDEV(cd->major, baseminor), count); |
285 | if (err) | 282 | if (err) |
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index d79ce2e95c23..90c5b39f0313 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
@@ -185,8 +185,7 @@ out_mount_failed: | |||
185 | cifs_sb->mountdata = NULL; | 185 | cifs_sb->mountdata = NULL; |
186 | } | 186 | } |
187 | #endif | 187 | #endif |
188 | if (cifs_sb->local_nls) | 188 | unload_nls(cifs_sb->local_nls); |
189 | unload_nls(cifs_sb->local_nls); | ||
190 | kfree(cifs_sb); | 189 | kfree(cifs_sb); |
191 | } | 190 | } |
192 | return rc; | 191 | return rc; |
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 1f09c7619319..5e2492535daa 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
@@ -1557,57 +1557,24 @@ static int cifs_truncate_page(struct address_space *mapping, loff_t from) | |||
1557 | 1557 | ||
1558 | static int cifs_vmtruncate(struct inode *inode, loff_t offset) | 1558 | static int cifs_vmtruncate(struct inode *inode, loff_t offset) |
1559 | { | 1559 | { |
1560 | struct address_space *mapping = inode->i_mapping; | 1560 | loff_t oldsize; |
1561 | unsigned long limit; | 1561 | int err; |
1562 | 1562 | ||
1563 | spin_lock(&inode->i_lock); | 1563 | spin_lock(&inode->i_lock); |
1564 | if (inode->i_size < offset) | 1564 | err = inode_newsize_ok(inode, offset); |
1565 | goto do_expand; | 1565 | if (err) { |
1566 | /* | ||
1567 | * truncation of in-use swapfiles is disallowed - it would cause | ||
1568 | * subsequent swapout to scribble on the now-freed blocks. | ||
1569 | */ | ||
1570 | if (IS_SWAPFILE(inode)) { | ||
1571 | spin_unlock(&inode->i_lock); | ||
1572 | goto out_busy; | ||
1573 | } | ||
1574 | i_size_write(inode, offset); | ||
1575 | spin_unlock(&inode->i_lock); | ||
1576 | /* | ||
1577 | * unmap_mapping_range is called twice, first simply for efficiency | ||
1578 | * so that truncate_inode_pages does fewer single-page unmaps. However | ||
1579 | * after this first call, and before truncate_inode_pages finishes, | ||
1580 | * it is possible for private pages to be COWed, which remain after | ||
1581 | * truncate_inode_pages finishes, hence the second unmap_mapping_range | ||
1582 | * call must be made for correctness. | ||
1583 | */ | ||
1584 | unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); | ||
1585 | truncate_inode_pages(mapping, offset); | ||
1586 | unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); | ||
1587 | goto out_truncate; | ||
1588 | |||
1589 | do_expand: | ||
1590 | limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; | ||
1591 | if (limit != RLIM_INFINITY && offset > limit) { | ||
1592 | spin_unlock(&inode->i_lock); | 1566 | spin_unlock(&inode->i_lock); |
1593 | goto out_sig; | 1567 | goto out; |
1594 | } | ||
1595 | if (offset > inode->i_sb->s_maxbytes) { | ||
1596 | spin_unlock(&inode->i_lock); | ||
1597 | goto out_big; | ||
1598 | } | 1568 | } |
1569 | |||
1570 | oldsize = inode->i_size; | ||
1599 | i_size_write(inode, offset); | 1571 | i_size_write(inode, offset); |
1600 | spin_unlock(&inode->i_lock); | 1572 | spin_unlock(&inode->i_lock); |
1601 | out_truncate: | 1573 | truncate_pagecache(inode, oldsize, offset); |
1602 | if (inode->i_op->truncate) | 1574 | if (inode->i_op->truncate) |
1603 | inode->i_op->truncate(inode); | 1575 | inode->i_op->truncate(inode); |
1604 | return 0; | 1576 | out: |
1605 | out_sig: | 1577 | return err; |
1606 | send_sig(SIGXFSZ, current, 0); | ||
1607 | out_big: | ||
1608 | return -EFBIG; | ||
1609 | out_busy: | ||
1610 | return -ETXTBSY; | ||
1611 | } | 1578 | } |
1612 | 1579 | ||
1613 | static int | 1580 | static int |
diff --git a/fs/coda/coda_int.h b/fs/coda/coda_int.h index 8ccd5ed81d9c..d99860a33890 100644 --- a/fs/coda/coda_int.h +++ b/fs/coda/coda_int.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define _CODA_INT_ | 2 | #define _CODA_INT_ |
3 | 3 | ||
4 | struct dentry; | 4 | struct dentry; |
5 | struct file; | ||
5 | 6 | ||
6 | extern struct file_system_type coda_fs_type; | 7 | extern struct file_system_type coda_fs_type; |
7 | extern unsigned long coda_timeout; | 8 | extern unsigned long coda_timeout; |
diff --git a/fs/compat.c b/fs/compat.c index 3aa48834a222..d576b552e8e2 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -768,13 +768,13 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name, | |||
768 | char __user * type, unsigned long flags, | 768 | char __user * type, unsigned long flags, |
769 | void __user * data) | 769 | void __user * data) |
770 | { | 770 | { |
771 | unsigned long type_page; | 771 | char *kernel_type; |
772 | unsigned long data_page; | 772 | unsigned long data_page; |
773 | unsigned long dev_page; | 773 | char *kernel_dev; |
774 | char *dir_page; | 774 | char *dir_page; |
775 | int retval; | 775 | int retval; |
776 | 776 | ||
777 | retval = copy_mount_options (type, &type_page); | 777 | retval = copy_mount_string(type, &kernel_type); |
778 | if (retval < 0) | 778 | if (retval < 0) |
779 | goto out; | 779 | goto out; |
780 | 780 | ||
@@ -783,38 +783,38 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name, | |||
783 | if (IS_ERR(dir_page)) | 783 | if (IS_ERR(dir_page)) |
784 | goto out1; | 784 | goto out1; |
785 | 785 | ||
786 | retval = copy_mount_options (dev_name, &dev_page); | 786 | retval = copy_mount_string(dev_name, &kernel_dev); |
787 | if (retval < 0) | 787 | if (retval < 0) |
788 | goto out2; | 788 | goto out2; |
789 | 789 | ||
790 | retval = copy_mount_options (data, &data_page); | 790 | retval = copy_mount_options(data, &data_page); |
791 | if (retval < 0) | 791 | if (retval < 0) |
792 | goto out3; | 792 | goto out3; |
793 | 793 | ||
794 | retval = -EINVAL; | 794 | retval = -EINVAL; |
795 | 795 | ||
796 | if (type_page && data_page) { | 796 | if (kernel_type && data_page) { |
797 | if (!strcmp((char *)type_page, SMBFS_NAME)) { | 797 | if (!strcmp(kernel_type, SMBFS_NAME)) { |
798 | do_smb_super_data_conv((void *)data_page); | 798 | do_smb_super_data_conv((void *)data_page); |
799 | } else if (!strcmp((char *)type_page, NCPFS_NAME)) { | 799 | } else if (!strcmp(kernel_type, NCPFS_NAME)) { |
800 | do_ncp_super_data_conv((void *)data_page); | 800 | do_ncp_super_data_conv((void *)data_page); |
801 | } else if (!strcmp((char *)type_page, NFS4_NAME)) { | 801 | } else if (!strcmp(kernel_type, NFS4_NAME)) { |
802 | if (do_nfs4_super_data_conv((void *) data_page)) | 802 | if (do_nfs4_super_data_conv((void *) data_page)) |
803 | goto out4; | 803 | goto out4; |
804 | } | 804 | } |
805 | } | 805 | } |
806 | 806 | ||
807 | retval = do_mount((char*)dev_page, dir_page, (char*)type_page, | 807 | retval = do_mount(kernel_dev, dir_page, kernel_type, |
808 | flags, (void*)data_page); | 808 | flags, (void*)data_page); |
809 | 809 | ||
810 | out4: | 810 | out4: |
811 | free_page(data_page); | 811 | free_page(data_page); |
812 | out3: | 812 | out3: |
813 | free_page(dev_page); | 813 | kfree(kernel_dev); |
814 | out2: | 814 | out2: |
815 | putname(dir_page); | 815 | putname(dir_page); |
816 | out1: | 816 | out1: |
817 | free_page(type_page); | 817 | kfree(kernel_type); |
818 | out: | 818 | out: |
819 | return retval; | 819 | return retval; |
820 | } | 820 | } |
diff --git a/fs/drop_caches.c b/fs/drop_caches.c index a2edb7913447..31f4b0e6d72c 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c | |||
@@ -63,9 +63,9 @@ static void drop_slab(void) | |||
63 | } | 63 | } |
64 | 64 | ||
65 | int drop_caches_sysctl_handler(ctl_table *table, int write, | 65 | int drop_caches_sysctl_handler(ctl_table *table, int write, |
66 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) | 66 | void __user *buffer, size_t *length, loff_t *ppos) |
67 | { | 67 | { |
68 | proc_dointvec_minmax(table, write, file, buffer, length, ppos); | 68 | proc_dointvec_minmax(table, write, buffer, length, ppos); |
69 | if (write) { | 69 | if (write) { |
70 | if (sysctl_drop_caches & 1) | 70 | if (sysctl_drop_caches & 1) |
71 | drop_pagecache(); | 71 | drop_pagecache(); |
@@ -55,6 +55,7 @@ | |||
55 | #include <linux/kmod.h> | 55 | #include <linux/kmod.h> |
56 | #include <linux/fsnotify.h> | 56 | #include <linux/fsnotify.h> |
57 | #include <linux/fs_struct.h> | 57 | #include <linux/fs_struct.h> |
58 | #include <linux/pipe_fs_i.h> | ||
58 | 59 | ||
59 | #include <asm/uaccess.h> | 60 | #include <asm/uaccess.h> |
60 | #include <asm/mmu_context.h> | 61 | #include <asm/mmu_context.h> |
@@ -63,6 +64,7 @@ | |||
63 | 64 | ||
64 | int core_uses_pid; | 65 | int core_uses_pid; |
65 | char core_pattern[CORENAME_MAX_SIZE] = "core"; | 66 | char core_pattern[CORENAME_MAX_SIZE] = "core"; |
67 | unsigned int core_pipe_limit; | ||
66 | int suid_dumpable = 0; | 68 | int suid_dumpable = 0; |
67 | 69 | ||
68 | /* The maximal length of core_pattern is also specified in sysctl.c */ | 70 | /* The maximal length of core_pattern is also specified in sysctl.c */ |
@@ -1393,18 +1395,16 @@ out_ret: | |||
1393 | return retval; | 1395 | return retval; |
1394 | } | 1396 | } |
1395 | 1397 | ||
1396 | int set_binfmt(struct linux_binfmt *new) | 1398 | void set_binfmt(struct linux_binfmt *new) |
1397 | { | 1399 | { |
1398 | struct linux_binfmt *old = current->binfmt; | 1400 | struct mm_struct *mm = current->mm; |
1399 | 1401 | ||
1400 | if (new) { | 1402 | if (mm->binfmt) |
1401 | if (!try_module_get(new->module)) | 1403 | module_put(mm->binfmt->module); |
1402 | return -1; | 1404 | |
1403 | } | 1405 | mm->binfmt = new; |
1404 | current->binfmt = new; | 1406 | if (new) |
1405 | if (old) | 1407 | __module_get(new->module); |
1406 | module_put(old->module); | ||
1407 | return 0; | ||
1408 | } | 1408 | } |
1409 | 1409 | ||
1410 | EXPORT_SYMBOL(set_binfmt); | 1410 | EXPORT_SYMBOL(set_binfmt); |
@@ -1728,6 +1728,29 @@ int get_dumpable(struct mm_struct *mm) | |||
1728 | return (ret >= 2) ? 2 : ret; | 1728 | return (ret >= 2) ? 2 : ret; |
1729 | } | 1729 | } |
1730 | 1730 | ||
1731 | static void wait_for_dump_helpers(struct file *file) | ||
1732 | { | ||
1733 | struct pipe_inode_info *pipe; | ||
1734 | |||
1735 | pipe = file->f_path.dentry->d_inode->i_pipe; | ||
1736 | |||
1737 | pipe_lock(pipe); | ||
1738 | pipe->readers++; | ||
1739 | pipe->writers--; | ||
1740 | |||
1741 | while ((pipe->readers > 1) && (!signal_pending(current))) { | ||
1742 | wake_up_interruptible_sync(&pipe->wait); | ||
1743 | kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); | ||
1744 | pipe_wait(pipe); | ||
1745 | } | ||
1746 | |||
1747 | pipe->readers--; | ||
1748 | pipe->writers++; | ||
1749 | pipe_unlock(pipe); | ||
1750 | |||
1751 | } | ||
1752 | |||
1753 | |||
1731 | void do_coredump(long signr, int exit_code, struct pt_regs *regs) | 1754 | void do_coredump(long signr, int exit_code, struct pt_regs *regs) |
1732 | { | 1755 | { |
1733 | struct core_state core_state; | 1756 | struct core_state core_state; |
@@ -1744,11 +1767,12 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) | |||
1744 | unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur; | 1767 | unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur; |
1745 | char **helper_argv = NULL; | 1768 | char **helper_argv = NULL; |
1746 | int helper_argc = 0; | 1769 | int helper_argc = 0; |
1747 | char *delimit; | 1770 | int dump_count = 0; |
1771 | static atomic_t core_dump_count = ATOMIC_INIT(0); | ||
1748 | 1772 | ||
1749 | audit_core_dumps(signr); | 1773 | audit_core_dumps(signr); |
1750 | 1774 | ||
1751 | binfmt = current->binfmt; | 1775 | binfmt = mm->binfmt; |
1752 | if (!binfmt || !binfmt->core_dump) | 1776 | if (!binfmt || !binfmt->core_dump) |
1753 | goto fail; | 1777 | goto fail; |
1754 | 1778 | ||
@@ -1799,54 +1823,63 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) | |||
1799 | lock_kernel(); | 1823 | lock_kernel(); |
1800 | ispipe = format_corename(corename, signr); | 1824 | ispipe = format_corename(corename, signr); |
1801 | unlock_kernel(); | 1825 | unlock_kernel(); |
1802 | /* | 1826 | |
1803 | * Don't bother to check the RLIMIT_CORE value if core_pattern points | ||
1804 | * to a pipe. Since we're not writing directly to the filesystem | ||
1805 | * RLIMIT_CORE doesn't really apply, as no actual core file will be | ||
1806 | * created unless the pipe reader choses to write out the core file | ||
1807 | * at which point file size limits and permissions will be imposed | ||
1808 | * as it does with any other process | ||
1809 | */ | ||
1810 | if ((!ispipe) && (core_limit < binfmt->min_coredump)) | 1827 | if ((!ispipe) && (core_limit < binfmt->min_coredump)) |
1811 | goto fail_unlock; | 1828 | goto fail_unlock; |
1812 | 1829 | ||
1813 | if (ispipe) { | 1830 | if (ispipe) { |
1831 | if (core_limit == 0) { | ||
1832 | /* | ||
1833 | * Normally core limits are irrelevant to pipes, since | ||
1834 | * we're not writing to the file system, but we use | ||
1835 | * core_limit of 0 here as a speacial value. Any | ||
1836 | * non-zero limit gets set to RLIM_INFINITY below, but | ||
1837 | * a limit of 0 skips the dump. This is a consistent | ||
1838 | * way to catch recursive crashes. We can still crash | ||
1839 | * if the core_pattern binary sets RLIM_CORE = !0 | ||
1840 | * but it runs as root, and can do lots of stupid things | ||
1841 | * Note that we use task_tgid_vnr here to grab the pid | ||
1842 | * of the process group leader. That way we get the | ||
1843 | * right pid if a thread in a multi-threaded | ||
1844 | * core_pattern process dies. | ||
1845 | */ | ||
1846 | printk(KERN_WARNING | ||
1847 | "Process %d(%s) has RLIMIT_CORE set to 0\n", | ||
1848 | task_tgid_vnr(current), current->comm); | ||
1849 | printk(KERN_WARNING "Aborting core\n"); | ||
1850 | goto fail_unlock; | ||
1851 | } | ||
1852 | |||
1853 | dump_count = atomic_inc_return(&core_dump_count); | ||
1854 | if (core_pipe_limit && (core_pipe_limit < dump_count)) { | ||
1855 | printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n", | ||
1856 | task_tgid_vnr(current), current->comm); | ||
1857 | printk(KERN_WARNING "Skipping core dump\n"); | ||
1858 | goto fail_dropcount; | ||
1859 | } | ||
1860 | |||
1814 | helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc); | 1861 | helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc); |
1815 | if (!helper_argv) { | 1862 | if (!helper_argv) { |
1816 | printk(KERN_WARNING "%s failed to allocate memory\n", | 1863 | printk(KERN_WARNING "%s failed to allocate memory\n", |
1817 | __func__); | 1864 | __func__); |
1818 | goto fail_unlock; | 1865 | goto fail_dropcount; |
1819 | } | ||
1820 | /* Terminate the string before the first option */ | ||
1821 | delimit = strchr(corename, ' '); | ||
1822 | if (delimit) | ||
1823 | *delimit = '\0'; | ||
1824 | delimit = strrchr(helper_argv[0], '/'); | ||
1825 | if (delimit) | ||
1826 | delimit++; | ||
1827 | else | ||
1828 | delimit = helper_argv[0]; | ||
1829 | if (!strcmp(delimit, current->comm)) { | ||
1830 | printk(KERN_NOTICE "Recursive core dump detected, " | ||
1831 | "aborting\n"); | ||
1832 | goto fail_unlock; | ||
1833 | } | 1866 | } |
1834 | 1867 | ||
1835 | core_limit = RLIM_INFINITY; | 1868 | core_limit = RLIM_INFINITY; |
1836 | 1869 | ||
1837 | /* SIGPIPE can happen, but it's just never processed */ | 1870 | /* SIGPIPE can happen, but it's just never processed */ |
1838 | if (call_usermodehelper_pipe(corename+1, helper_argv, NULL, | 1871 | if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL, |
1839 | &file)) { | 1872 | &file)) { |
1840 | printk(KERN_INFO "Core dump to %s pipe failed\n", | 1873 | printk(KERN_INFO "Core dump to %s pipe failed\n", |
1841 | corename); | 1874 | corename); |
1842 | goto fail_unlock; | 1875 | goto fail_dropcount; |
1843 | } | 1876 | } |
1844 | } else | 1877 | } else |
1845 | file = filp_open(corename, | 1878 | file = filp_open(corename, |
1846 | O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, | 1879 | O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, |
1847 | 0600); | 1880 | 0600); |
1848 | if (IS_ERR(file)) | 1881 | if (IS_ERR(file)) |
1849 | goto fail_unlock; | 1882 | goto fail_dropcount; |
1850 | inode = file->f_path.dentry->d_inode; | 1883 | inode = file->f_path.dentry->d_inode; |
1851 | if (inode->i_nlink > 1) | 1884 | if (inode->i_nlink > 1) |
1852 | goto close_fail; /* multiple links - don't dump */ | 1885 | goto close_fail; /* multiple links - don't dump */ |
@@ -1875,7 +1908,12 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) | |||
1875 | if (retval) | 1908 | if (retval) |
1876 | current->signal->group_exit_code |= 0x80; | 1909 | current->signal->group_exit_code |= 0x80; |
1877 | close_fail: | 1910 | close_fail: |
1911 | if (ispipe && core_pipe_limit) | ||
1912 | wait_for_dump_helpers(file); | ||
1878 | filp_close(file, NULL); | 1913 | filp_close(file, NULL); |
1914 | fail_dropcount: | ||
1915 | if (dump_count) | ||
1916 | atomic_dec(&core_dump_count); | ||
1879 | fail_unlock: | 1917 | fail_unlock: |
1880 | if (helper_argv) | 1918 | if (helper_argv) |
1881 | argv_free(helper_argv); | 1919 | argv_free(helper_argv); |
diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 5ab10c3bbebe..9f500dec3b59 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c | |||
@@ -214,7 +214,6 @@ int exofs_sync_fs(struct super_block *sb, int wait) | |||
214 | } | 214 | } |
215 | 215 | ||
216 | lock_super(sb); | 216 | lock_super(sb); |
217 | lock_kernel(); | ||
218 | sbi = sb->s_fs_info; | 217 | sbi = sb->s_fs_info; |
219 | fscb->s_nextid = cpu_to_le64(sbi->s_nextid); | 218 | fscb->s_nextid = cpu_to_le64(sbi->s_nextid); |
220 | fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles); | 219 | fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles); |
@@ -245,7 +244,6 @@ int exofs_sync_fs(struct super_block *sb, int wait) | |||
245 | out: | 244 | out: |
246 | if (or) | 245 | if (or) |
247 | osd_end_request(or); | 246 | osd_end_request(or); |
248 | unlock_kernel(); | ||
249 | unlock_super(sb); | 247 | unlock_super(sb); |
250 | kfree(fscb); | 248 | kfree(fscb); |
251 | return ret; | 249 | return ret; |
@@ -268,8 +266,6 @@ static void exofs_put_super(struct super_block *sb) | |||
268 | int num_pend; | 266 | int num_pend; |
269 | struct exofs_sb_info *sbi = sb->s_fs_info; | 267 | struct exofs_sb_info *sbi = sb->s_fs_info; |
270 | 268 | ||
271 | lock_kernel(); | ||
272 | |||
273 | if (sb->s_dirt) | 269 | if (sb->s_dirt) |
274 | exofs_write_super(sb); | 270 | exofs_write_super(sb); |
275 | 271 | ||
@@ -286,8 +282,6 @@ static void exofs_put_super(struct super_block *sb) | |||
286 | osduld_put_device(sbi->s_dev); | 282 | osduld_put_device(sbi->s_dev); |
287 | kfree(sb->s_fs_info); | 283 | kfree(sb->s_fs_info); |
288 | sb->s_fs_info = NULL; | 284 | sb->s_fs_info = NULL; |
289 | |||
290 | unlock_kernel(); | ||
291 | } | 285 | } |
292 | 286 | ||
293 | /* | 287 | /* |
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 1c1638f873a4..ade634076d0a 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c | |||
@@ -819,6 +819,7 @@ const struct address_space_operations ext2_aops = { | |||
819 | .writepages = ext2_writepages, | 819 | .writepages = ext2_writepages, |
820 | .migratepage = buffer_migrate_page, | 820 | .migratepage = buffer_migrate_page, |
821 | .is_partially_uptodate = block_is_partially_uptodate, | 821 | .is_partially_uptodate = block_is_partially_uptodate, |
822 | .error_remove_page = generic_error_remove_page, | ||
822 | }; | 823 | }; |
823 | 824 | ||
824 | const struct address_space_operations ext2_aops_xip = { | 825 | const struct address_space_operations ext2_aops_xip = { |
@@ -837,6 +838,7 @@ const struct address_space_operations ext2_nobh_aops = { | |||
837 | .direct_IO = ext2_direct_IO, | 838 | .direct_IO = ext2_direct_IO, |
838 | .writepages = ext2_writepages, | 839 | .writepages = ext2_writepages, |
839 | .migratepage = buffer_migrate_page, | 840 | .migratepage = buffer_migrate_page, |
841 | .error_remove_page = generic_error_remove_page, | ||
840 | }; | 842 | }; |
841 | 843 | ||
842 | /* | 844 | /* |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index cd098a7b77fc..acf1b1423327 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -1830,6 +1830,7 @@ static const struct address_space_operations ext3_ordered_aops = { | |||
1830 | .direct_IO = ext3_direct_IO, | 1830 | .direct_IO = ext3_direct_IO, |
1831 | .migratepage = buffer_migrate_page, | 1831 | .migratepage = buffer_migrate_page, |
1832 | .is_partially_uptodate = block_is_partially_uptodate, | 1832 | .is_partially_uptodate = block_is_partially_uptodate, |
1833 | .error_remove_page = generic_error_remove_page, | ||
1833 | }; | 1834 | }; |
1834 | 1835 | ||
1835 | static const struct address_space_operations ext3_writeback_aops = { | 1836 | static const struct address_space_operations ext3_writeback_aops = { |
@@ -1845,6 +1846,7 @@ static const struct address_space_operations ext3_writeback_aops = { | |||
1845 | .direct_IO = ext3_direct_IO, | 1846 | .direct_IO = ext3_direct_IO, |
1846 | .migratepage = buffer_migrate_page, | 1847 | .migratepage = buffer_migrate_page, |
1847 | .is_partially_uptodate = block_is_partially_uptodate, | 1848 | .is_partially_uptodate = block_is_partially_uptodate, |
1849 | .error_remove_page = generic_error_remove_page, | ||
1848 | }; | 1850 | }; |
1849 | 1851 | ||
1850 | static const struct address_space_operations ext3_journalled_aops = { | 1852 | static const struct address_space_operations ext3_journalled_aops = { |
@@ -1859,6 +1861,7 @@ static const struct address_space_operations ext3_journalled_aops = { | |||
1859 | .invalidatepage = ext3_invalidatepage, | 1861 | .invalidatepage = ext3_invalidatepage, |
1860 | .releasepage = ext3_releasepage, | 1862 | .releasepage = ext3_releasepage, |
1861 | .is_partially_uptodate = block_is_partially_uptodate, | 1863 | .is_partially_uptodate = block_is_partially_uptodate, |
1864 | .error_remove_page = generic_error_remove_page, | ||
1862 | }; | 1865 | }; |
1863 | 1866 | ||
1864 | void ext3_set_aops(struct inode *inode) | 1867 | void ext3_set_aops(struct inode *inode) |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 3a798737e305..064746fad581 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -3386,6 +3386,7 @@ static const struct address_space_operations ext4_ordered_aops = { | |||
3386 | .direct_IO = ext4_direct_IO, | 3386 | .direct_IO = ext4_direct_IO, |
3387 | .migratepage = buffer_migrate_page, | 3387 | .migratepage = buffer_migrate_page, |
3388 | .is_partially_uptodate = block_is_partially_uptodate, | 3388 | .is_partially_uptodate = block_is_partially_uptodate, |
3389 | .error_remove_page = generic_error_remove_page, | ||
3389 | }; | 3390 | }; |
3390 | 3391 | ||
3391 | static const struct address_space_operations ext4_writeback_aops = { | 3392 | static const struct address_space_operations ext4_writeback_aops = { |
@@ -3401,6 +3402,7 @@ static const struct address_space_operations ext4_writeback_aops = { | |||
3401 | .direct_IO = ext4_direct_IO, | 3402 | .direct_IO = ext4_direct_IO, |
3402 | .migratepage = buffer_migrate_page, | 3403 | .migratepage = buffer_migrate_page, |
3403 | .is_partially_uptodate = block_is_partially_uptodate, | 3404 | .is_partially_uptodate = block_is_partially_uptodate, |
3405 | .error_remove_page = generic_error_remove_page, | ||
3404 | }; | 3406 | }; |
3405 | 3407 | ||
3406 | static const struct address_space_operations ext4_journalled_aops = { | 3408 | static const struct address_space_operations ext4_journalled_aops = { |
@@ -3415,6 +3417,7 @@ static const struct address_space_operations ext4_journalled_aops = { | |||
3415 | .invalidatepage = ext4_invalidatepage, | 3417 | .invalidatepage = ext4_invalidatepage, |
3416 | .releasepage = ext4_releasepage, | 3418 | .releasepage = ext4_releasepage, |
3417 | .is_partially_uptodate = block_is_partially_uptodate, | 3419 | .is_partially_uptodate = block_is_partially_uptodate, |
3420 | .error_remove_page = generic_error_remove_page, | ||
3418 | }; | 3421 | }; |
3419 | 3422 | ||
3420 | static const struct address_space_operations ext4_da_aops = { | 3423 | static const struct address_space_operations ext4_da_aops = { |
@@ -3431,6 +3434,7 @@ static const struct address_space_operations ext4_da_aops = { | |||
3431 | .direct_IO = ext4_direct_IO, | 3434 | .direct_IO = ext4_direct_IO, |
3432 | .migratepage = buffer_migrate_page, | 3435 | .migratepage = buffer_migrate_page, |
3433 | .is_partially_uptodate = block_is_partially_uptodate, | 3436 | .is_partially_uptodate = block_is_partially_uptodate, |
3437 | .error_remove_page = generic_error_remove_page, | ||
3434 | }; | 3438 | }; |
3435 | 3439 | ||
3436 | void ext4_set_aops(struct inode *inode) | 3440 | void ext4_set_aops(struct inode *inode) |
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 8970d8c49bb0..04629d1302fc 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
@@ -470,19 +470,11 @@ static void fat_put_super(struct super_block *sb) | |||
470 | 470 | ||
471 | iput(sbi->fat_inode); | 471 | iput(sbi->fat_inode); |
472 | 472 | ||
473 | if (sbi->nls_disk) { | 473 | unload_nls(sbi->nls_disk); |
474 | unload_nls(sbi->nls_disk); | 474 | unload_nls(sbi->nls_io); |
475 | sbi->nls_disk = NULL; | 475 | |
476 | sbi->options.codepage = fat_default_codepage; | 476 | if (sbi->options.iocharset != fat_default_iocharset) |
477 | } | ||
478 | if (sbi->nls_io) { | ||
479 | unload_nls(sbi->nls_io); | ||
480 | sbi->nls_io = NULL; | ||
481 | } | ||
482 | if (sbi->options.iocharset != fat_default_iocharset) { | ||
483 | kfree(sbi->options.iocharset); | 477 | kfree(sbi->options.iocharset); |
484 | sbi->options.iocharset = fat_default_iocharset; | ||
485 | } | ||
486 | 478 | ||
487 | sb->s_fs_info = NULL; | 479 | sb->s_fs_info = NULL; |
488 | kfree(sbi); | 480 | kfree(sbi); |
diff --git a/fs/fcntl.c b/fs/fcntl.c index ae413086db97..fc089f2f7f56 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c | |||
@@ -263,6 +263,79 @@ pid_t f_getown(struct file *filp) | |||
263 | return pid; | 263 | return pid; |
264 | } | 264 | } |
265 | 265 | ||
266 | static int f_setown_ex(struct file *filp, unsigned long arg) | ||
267 | { | ||
268 | struct f_owner_ex * __user owner_p = (void * __user)arg; | ||
269 | struct f_owner_ex owner; | ||
270 | struct pid *pid; | ||
271 | int type; | ||
272 | int ret; | ||
273 | |||
274 | ret = copy_from_user(&owner, owner_p, sizeof(owner)); | ||
275 | if (ret) | ||
276 | return ret; | ||
277 | |||
278 | switch (owner.type) { | ||
279 | case F_OWNER_TID: | ||
280 | type = PIDTYPE_MAX; | ||
281 | break; | ||
282 | |||
283 | case F_OWNER_PID: | ||
284 | type = PIDTYPE_PID; | ||
285 | break; | ||
286 | |||
287 | case F_OWNER_GID: | ||
288 | type = PIDTYPE_PGID; | ||
289 | break; | ||
290 | |||
291 | default: | ||
292 | return -EINVAL; | ||
293 | } | ||
294 | |||
295 | rcu_read_lock(); | ||
296 | pid = find_vpid(owner.pid); | ||
297 | if (owner.pid && !pid) | ||
298 | ret = -ESRCH; | ||
299 | else | ||
300 | ret = __f_setown(filp, pid, type, 1); | ||
301 | rcu_read_unlock(); | ||
302 | |||
303 | return ret; | ||
304 | } | ||
305 | |||
306 | static int f_getown_ex(struct file *filp, unsigned long arg) | ||
307 | { | ||
308 | struct f_owner_ex * __user owner_p = (void * __user)arg; | ||
309 | struct f_owner_ex owner; | ||
310 | int ret = 0; | ||
311 | |||
312 | read_lock(&filp->f_owner.lock); | ||
313 | owner.pid = pid_vnr(filp->f_owner.pid); | ||
314 | switch (filp->f_owner.pid_type) { | ||
315 | case PIDTYPE_MAX: | ||
316 | owner.type = F_OWNER_TID; | ||
317 | break; | ||
318 | |||
319 | case PIDTYPE_PID: | ||
320 | owner.type = F_OWNER_PID; | ||
321 | break; | ||
322 | |||
323 | case PIDTYPE_PGID: | ||
324 | owner.type = F_OWNER_GID; | ||
325 | break; | ||
326 | |||
327 | default: | ||
328 | WARN_ON(1); | ||
329 | ret = -EINVAL; | ||
330 | break; | ||
331 | } | ||
332 | read_unlock(&filp->f_owner.lock); | ||
333 | |||
334 | if (!ret) | ||
335 | ret = copy_to_user(owner_p, &owner, sizeof(owner)); | ||
336 | return ret; | ||
337 | } | ||
338 | |||
266 | static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, | 339 | static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, |
267 | struct file *filp) | 340 | struct file *filp) |
268 | { | 341 | { |
@@ -313,6 +386,12 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, | |||
313 | case F_SETOWN: | 386 | case F_SETOWN: |
314 | err = f_setown(filp, arg, 1); | 387 | err = f_setown(filp, arg, 1); |
315 | break; | 388 | break; |
389 | case F_GETOWN_EX: | ||
390 | err = f_getown_ex(filp, arg); | ||
391 | break; | ||
392 | case F_SETOWN_EX: | ||
393 | err = f_setown_ex(filp, arg); | ||
394 | break; | ||
316 | case F_GETSIG: | 395 | case F_GETSIG: |
317 | err = filp->f_owner.signum; | 396 | err = filp->f_owner.signum; |
318 | break; | 397 | break; |
@@ -428,8 +507,7 @@ static inline int sigio_perm(struct task_struct *p, | |||
428 | 507 | ||
429 | static void send_sigio_to_task(struct task_struct *p, | 508 | static void send_sigio_to_task(struct task_struct *p, |
430 | struct fown_struct *fown, | 509 | struct fown_struct *fown, |
431 | int fd, | 510 | int fd, int reason, int group) |
432 | int reason) | ||
433 | { | 511 | { |
434 | /* | 512 | /* |
435 | * F_SETSIG can change ->signum lockless in parallel, make | 513 | * F_SETSIG can change ->signum lockless in parallel, make |
@@ -461,11 +539,11 @@ static void send_sigio_to_task(struct task_struct *p, | |||
461 | else | 539 | else |
462 | si.si_band = band_table[reason - POLL_IN]; | 540 | si.si_band = band_table[reason - POLL_IN]; |
463 | si.si_fd = fd; | 541 | si.si_fd = fd; |
464 | if (!group_send_sig_info(signum, &si, p)) | 542 | if (!do_send_sig_info(signum, &si, p, group)) |
465 | break; | 543 | break; |
466 | /* fall-through: fall back on the old plain SIGIO signal */ | 544 | /* fall-through: fall back on the old plain SIGIO signal */ |
467 | case 0: | 545 | case 0: |
468 | group_send_sig_info(SIGIO, SEND_SIG_PRIV, p); | 546 | do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, group); |
469 | } | 547 | } |
470 | } | 548 | } |
471 | 549 | ||
@@ -474,16 +552,23 @@ void send_sigio(struct fown_struct *fown, int fd, int band) | |||
474 | struct task_struct *p; | 552 | struct task_struct *p; |
475 | enum pid_type type; | 553 | enum pid_type type; |
476 | struct pid *pid; | 554 | struct pid *pid; |
555 | int group = 1; | ||
477 | 556 | ||
478 | read_lock(&fown->lock); | 557 | read_lock(&fown->lock); |
558 | |||
479 | type = fown->pid_type; | 559 | type = fown->pid_type; |
560 | if (type == PIDTYPE_MAX) { | ||
561 | group = 0; | ||
562 | type = PIDTYPE_PID; | ||
563 | } | ||
564 | |||
480 | pid = fown->pid; | 565 | pid = fown->pid; |
481 | if (!pid) | 566 | if (!pid) |
482 | goto out_unlock_fown; | 567 | goto out_unlock_fown; |
483 | 568 | ||
484 | read_lock(&tasklist_lock); | 569 | read_lock(&tasklist_lock); |
485 | do_each_pid_task(pid, type, p) { | 570 | do_each_pid_task(pid, type, p) { |
486 | send_sigio_to_task(p, fown, fd, band); | 571 | send_sigio_to_task(p, fown, fd, band, group); |
487 | } while_each_pid_task(pid, type, p); | 572 | } while_each_pid_task(pid, type, p); |
488 | read_unlock(&tasklist_lock); | 573 | read_unlock(&tasklist_lock); |
489 | out_unlock_fown: | 574 | out_unlock_fown: |
@@ -491,10 +576,10 @@ void send_sigio(struct fown_struct *fown, int fd, int band) | |||
491 | } | 576 | } |
492 | 577 | ||
493 | static void send_sigurg_to_task(struct task_struct *p, | 578 | static void send_sigurg_to_task(struct task_struct *p, |
494 | struct fown_struct *fown) | 579 | struct fown_struct *fown, int group) |
495 | { | 580 | { |
496 | if (sigio_perm(p, fown, SIGURG)) | 581 | if (sigio_perm(p, fown, SIGURG)) |
497 | group_send_sig_info(SIGURG, SEND_SIG_PRIV, p); | 582 | do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, group); |
498 | } | 583 | } |
499 | 584 | ||
500 | int send_sigurg(struct fown_struct *fown) | 585 | int send_sigurg(struct fown_struct *fown) |
@@ -502,10 +587,17 @@ int send_sigurg(struct fown_struct *fown) | |||
502 | struct task_struct *p; | 587 | struct task_struct *p; |
503 | enum pid_type type; | 588 | enum pid_type type; |
504 | struct pid *pid; | 589 | struct pid *pid; |
590 | int group = 1; | ||
505 | int ret = 0; | 591 | int ret = 0; |
506 | 592 | ||
507 | read_lock(&fown->lock); | 593 | read_lock(&fown->lock); |
594 | |||
508 | type = fown->pid_type; | 595 | type = fown->pid_type; |
596 | if (type == PIDTYPE_MAX) { | ||
597 | group = 0; | ||
598 | type = PIDTYPE_PID; | ||
599 | } | ||
600 | |||
509 | pid = fown->pid; | 601 | pid = fown->pid; |
510 | if (!pid) | 602 | if (!pid) |
511 | goto out_unlock_fown; | 603 | goto out_unlock_fown; |
@@ -514,7 +606,7 @@ int send_sigurg(struct fown_struct *fown) | |||
514 | 606 | ||
515 | read_lock(&tasklist_lock); | 607 | read_lock(&tasklist_lock); |
516 | do_each_pid_task(pid, type, p) { | 608 | do_each_pid_task(pid, type, p) { |
517 | send_sigurg_to_task(p, fown); | 609 | send_sigurg_to_task(p, fown, group); |
518 | } while_each_pid_task(pid, type, p); | 610 | } while_each_pid_task(pid, type, p); |
519 | read_unlock(&tasklist_lock); | 611 | read_unlock(&tasklist_lock); |
520 | out_unlock_fown: | 612 | out_unlock_fown: |
diff --git a/fs/file_table.c b/fs/file_table.c index 334ce39881f8..8eb44042e009 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
@@ -74,14 +74,14 @@ EXPORT_SYMBOL_GPL(get_max_files); | |||
74 | * Handle nr_files sysctl | 74 | * Handle nr_files sysctl |
75 | */ | 75 | */ |
76 | #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) | 76 | #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) |
77 | int proc_nr_files(ctl_table *table, int write, struct file *filp, | 77 | int proc_nr_files(ctl_table *table, int write, |
78 | void __user *buffer, size_t *lenp, loff_t *ppos) | 78 | void __user *buffer, size_t *lenp, loff_t *ppos) |
79 | { | 79 | { |
80 | files_stat.nr_files = get_nr_files(); | 80 | files_stat.nr_files = get_nr_files(); |
81 | return proc_dointvec(table, write, filp, buffer, lenp, ppos); | 81 | return proc_dointvec(table, write, buffer, lenp, ppos); |
82 | } | 82 | } |
83 | #else | 83 | #else |
84 | int proc_nr_files(ctl_table *table, int write, struct file *filp, | 84 | int proc_nr_files(ctl_table *table, int write, |
85 | void __user *buffer, size_t *lenp, loff_t *ppos) | 85 | void __user *buffer, size_t *lenp, loff_t *ppos) |
86 | { | 86 | { |
87 | return -ENOSYS; | 87 | return -ENOSYS; |
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index e703654e7f40..992f6c9410bb 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c | |||
@@ -1276,14 +1276,9 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr, | |||
1276 | return 0; | 1276 | return 0; |
1277 | 1277 | ||
1278 | if (attr->ia_valid & ATTR_SIZE) { | 1278 | if (attr->ia_valid & ATTR_SIZE) { |
1279 | unsigned long limit; | 1279 | err = inode_newsize_ok(inode, attr->ia_size); |
1280 | if (IS_SWAPFILE(inode)) | 1280 | if (err) |
1281 | return -ETXTBSY; | 1281 | return err; |
1282 | limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; | ||
1283 | if (limit != RLIM_INFINITY && attr->ia_size > (loff_t) limit) { | ||
1284 | send_sig(SIGXFSZ, current, 0); | ||
1285 | return -EFBIG; | ||
1286 | } | ||
1287 | is_truncate = true; | 1282 | is_truncate = true; |
1288 | } | 1283 | } |
1289 | 1284 | ||
@@ -1350,8 +1345,7 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr, | |||
1350 | * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock. | 1345 | * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock. |
1351 | */ | 1346 | */ |
1352 | if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { | 1347 | if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { |
1353 | if (outarg.attr.size < oldsize) | 1348 | truncate_pagecache(inode, oldsize, outarg.attr.size); |
1354 | fuse_truncate(inode->i_mapping, outarg.attr.size); | ||
1355 | invalidate_inode_pages2(inode->i_mapping); | 1349 | invalidate_inode_pages2(inode->i_mapping); |
1356 | } | 1350 | } |
1357 | 1351 | ||
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index fc9c79feb5f7..01cc462ff45d 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h | |||
@@ -606,8 +606,6 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, | |||
606 | void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, | 606 | void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, |
607 | u64 attr_valid); | 607 | u64 attr_valid); |
608 | 608 | ||
609 | void fuse_truncate(struct address_space *mapping, loff_t offset); | ||
610 | |||
611 | /** | 609 | /** |
612 | * Initialize the client device | 610 | * Initialize the client device |
613 | */ | 611 | */ |
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 6da947daabda..1a822ce2b24b 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
@@ -140,14 +140,6 @@ static int fuse_remount_fs(struct super_block *sb, int *flags, char *data) | |||
140 | return 0; | 140 | return 0; |
141 | } | 141 | } |
142 | 142 | ||
143 | void fuse_truncate(struct address_space *mapping, loff_t offset) | ||
144 | { | ||
145 | /* See vmtruncate() */ | ||
146 | unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); | ||
147 | truncate_inode_pages(mapping, offset); | ||
148 | unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); | ||
149 | } | ||
150 | |||
151 | void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, | 143 | void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, |
152 | u64 attr_valid) | 144 | u64 attr_valid) |
153 | { | 145 | { |
@@ -205,8 +197,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, | |||
205 | spin_unlock(&fc->lock); | 197 | spin_unlock(&fc->lock); |
206 | 198 | ||
207 | if (S_ISREG(inode->i_mode) && oldsize != attr->size) { | 199 | if (S_ISREG(inode->i_mode) && oldsize != attr->size) { |
208 | if (attr->size < oldsize) | 200 | truncate_pagecache(inode, oldsize, attr->size); |
209 | fuse_truncate(inode->i_mapping, attr->size); | ||
210 | invalidate_inode_pages2(inode->i_mapping); | 201 | invalidate_inode_pages2(inode->i_mapping); |
211 | } | 202 | } |
212 | } | 203 | } |
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 7ebae9a4ecc0..694b5d48f036 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c | |||
@@ -1135,6 +1135,7 @@ static const struct address_space_operations gfs2_writeback_aops = { | |||
1135 | .direct_IO = gfs2_direct_IO, | 1135 | .direct_IO = gfs2_direct_IO, |
1136 | .migratepage = buffer_migrate_page, | 1136 | .migratepage = buffer_migrate_page, |
1137 | .is_partially_uptodate = block_is_partially_uptodate, | 1137 | .is_partially_uptodate = block_is_partially_uptodate, |
1138 | .error_remove_page = generic_error_remove_page, | ||
1138 | }; | 1139 | }; |
1139 | 1140 | ||
1140 | static const struct address_space_operations gfs2_ordered_aops = { | 1141 | static const struct address_space_operations gfs2_ordered_aops = { |
@@ -1151,6 +1152,7 @@ static const struct address_space_operations gfs2_ordered_aops = { | |||
1151 | .direct_IO = gfs2_direct_IO, | 1152 | .direct_IO = gfs2_direct_IO, |
1152 | .migratepage = buffer_migrate_page, | 1153 | .migratepage = buffer_migrate_page, |
1153 | .is_partially_uptodate = block_is_partially_uptodate, | 1154 | .is_partially_uptodate = block_is_partially_uptodate, |
1155 | .error_remove_page = generic_error_remove_page, | ||
1154 | }; | 1156 | }; |
1155 | 1157 | ||
1156 | static const struct address_space_operations gfs2_jdata_aops = { | 1158 | static const struct address_space_operations gfs2_jdata_aops = { |
@@ -1166,6 +1168,7 @@ static const struct address_space_operations gfs2_jdata_aops = { | |||
1166 | .invalidatepage = gfs2_invalidatepage, | 1168 | .invalidatepage = gfs2_invalidatepage, |
1167 | .releasepage = gfs2_releasepage, | 1169 | .releasepage = gfs2_releasepage, |
1168 | .is_partially_uptodate = block_is_partially_uptodate, | 1170 | .is_partially_uptodate = block_is_partially_uptodate, |
1171 | .error_remove_page = generic_error_remove_page, | ||
1169 | }; | 1172 | }; |
1170 | 1173 | ||
1171 | void gfs2_set_aops(struct inode *inode) | 1174 | void gfs2_set_aops(struct inode *inode) |
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c index 7b6165f25fbe..8bbe03c3f6d5 100644 --- a/fs/hfs/mdb.c +++ b/fs/hfs/mdb.c | |||
@@ -344,10 +344,8 @@ void hfs_mdb_put(struct super_block *sb) | |||
344 | brelse(HFS_SB(sb)->mdb_bh); | 344 | brelse(HFS_SB(sb)->mdb_bh); |
345 | brelse(HFS_SB(sb)->alt_mdb_bh); | 345 | brelse(HFS_SB(sb)->alt_mdb_bh); |
346 | 346 | ||
347 | if (HFS_SB(sb)->nls_io) | 347 | unload_nls(HFS_SB(sb)->nls_io); |
348 | unload_nls(HFS_SB(sb)->nls_io); | 348 | unload_nls(HFS_SB(sb)->nls_disk); |
349 | if (HFS_SB(sb)->nls_disk) | ||
350 | unload_nls(HFS_SB(sb)->nls_disk); | ||
351 | 349 | ||
352 | free_pages((unsigned long)HFS_SB(sb)->bitmap, PAGE_SIZE < 8192 ? 1 : 0); | 350 | free_pages((unsigned long)HFS_SB(sb)->bitmap, PAGE_SIZE < 8192 ? 1 : 0); |
353 | kfree(HFS_SB(sb)); | 351 | kfree(HFS_SB(sb)); |
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index c0759fe0855b..43022f3d5148 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c | |||
@@ -229,8 +229,7 @@ static void hfsplus_put_super(struct super_block *sb) | |||
229 | iput(HFSPLUS_SB(sb).alloc_file); | 229 | iput(HFSPLUS_SB(sb).alloc_file); |
230 | iput(HFSPLUS_SB(sb).hidden_dir); | 230 | iput(HFSPLUS_SB(sb).hidden_dir); |
231 | brelse(HFSPLUS_SB(sb).s_vhbh); | 231 | brelse(HFSPLUS_SB(sb).s_vhbh); |
232 | if (HFSPLUS_SB(sb).nls) | 232 | unload_nls(HFSPLUS_SB(sb).nls); |
233 | unload_nls(HFSPLUS_SB(sb).nls); | ||
234 | kfree(sb->s_fs_info); | 233 | kfree(sb->s_fs_info); |
235 | sb->s_fs_info = NULL; | 234 | sb->s_fs_info = NULL; |
236 | 235 | ||
@@ -464,8 +463,7 @@ out: | |||
464 | 463 | ||
465 | cleanup: | 464 | cleanup: |
466 | hfsplus_put_super(sb); | 465 | hfsplus_put_super(sb); |
467 | if (nls) | 466 | unload_nls(nls); |
468 | unload_nls(nls); | ||
469 | return err; | 467 | return err; |
470 | } | 468 | } |
471 | 469 | ||
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index eba6d552d9c9..87a1258953b8 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -380,36 +380,11 @@ static void hugetlbfs_delete_inode(struct inode *inode) | |||
380 | 380 | ||
381 | static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock) | 381 | static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock) |
382 | { | 382 | { |
383 | struct super_block *sb = inode->i_sb; | 383 | if (generic_detach_inode(inode)) { |
384 | 384 | truncate_hugepages(inode, 0); | |
385 | if (!hlist_unhashed(&inode->i_hash)) { | 385 | clear_inode(inode); |
386 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) | 386 | destroy_inode(inode); |
387 | list_move(&inode->i_list, &inode_unused); | ||
388 | inodes_stat.nr_unused++; | ||
389 | if (!sb || (sb->s_flags & MS_ACTIVE)) { | ||
390 | spin_unlock(&inode_lock); | ||
391 | return; | ||
392 | } | ||
393 | inode->i_state |= I_WILL_FREE; | ||
394 | spin_unlock(&inode_lock); | ||
395 | /* | ||
396 | * write_inode_now is a noop as we set BDI_CAP_NO_WRITEBACK | ||
397 | * in our backing_dev_info. | ||
398 | */ | ||
399 | write_inode_now(inode, 1); | ||
400 | spin_lock(&inode_lock); | ||
401 | inode->i_state &= ~I_WILL_FREE; | ||
402 | inodes_stat.nr_unused--; | ||
403 | hlist_del_init(&inode->i_hash); | ||
404 | } | 387 | } |
405 | list_del_init(&inode->i_list); | ||
406 | list_del_init(&inode->i_sb_list); | ||
407 | inode->i_state |= I_FREEING; | ||
408 | inodes_stat.nr_inodes--; | ||
409 | spin_unlock(&inode_lock); | ||
410 | truncate_hugepages(inode, 0); | ||
411 | clear_inode(inode); | ||
412 | destroy_inode(inode); | ||
413 | } | 388 | } |
414 | 389 | ||
415 | static void hugetlbfs_drop_inode(struct inode *inode) | 390 | static void hugetlbfs_drop_inode(struct inode *inode) |
@@ -936,15 +911,9 @@ static struct file_system_type hugetlbfs_fs_type = { | |||
936 | 911 | ||
937 | static struct vfsmount *hugetlbfs_vfsmount; | 912 | static struct vfsmount *hugetlbfs_vfsmount; |
938 | 913 | ||
939 | static int can_do_hugetlb_shm(int creat_flags) | 914 | static int can_do_hugetlb_shm(void) |
940 | { | 915 | { |
941 | if (creat_flags != HUGETLB_SHMFS_INODE) | 916 | return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group); |
942 | return 0; | ||
943 | if (capable(CAP_IPC_LOCK)) | ||
944 | return 1; | ||
945 | if (in_group_p(sysctl_hugetlb_shm_group)) | ||
946 | return 1; | ||
947 | return 0; | ||
948 | } | 917 | } |
949 | 918 | ||
950 | struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag, | 919 | struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag, |
@@ -960,7 +929,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag, | |||
960 | if (!hugetlbfs_vfsmount) | 929 | if (!hugetlbfs_vfsmount) |
961 | return ERR_PTR(-ENOENT); | 930 | return ERR_PTR(-ENOENT); |
962 | 931 | ||
963 | if (!can_do_hugetlb_shm(creat_flags)) { | 932 | if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) { |
964 | *user = current_user(); | 933 | *user = current_user(); |
965 | if (user_shm_lock(size, *user)) { | 934 | if (user_shm_lock(size, *user)) { |
966 | WARN_ONCE(1, | 935 | WARN_ONCE(1, |
diff --git a/fs/inode.c b/fs/inode.c index 76582b06ab97..4d8e3be55976 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -1241,7 +1241,16 @@ void generic_delete_inode(struct inode *inode) | |||
1241 | } | 1241 | } |
1242 | EXPORT_SYMBOL(generic_delete_inode); | 1242 | EXPORT_SYMBOL(generic_delete_inode); |
1243 | 1243 | ||
1244 | static void generic_forget_inode(struct inode *inode) | 1244 | /** |
1245 | * generic_detach_inode - remove inode from inode lists | ||
1246 | * @inode: inode to remove | ||
1247 | * | ||
1248 | * Remove inode from inode lists, write it if it's dirty. This is just an | ||
1249 | * internal VFS helper exported for hugetlbfs. Do not use! | ||
1250 | * | ||
1251 | * Returns 1 if inode should be completely destroyed. | ||
1252 | */ | ||
1253 | int generic_detach_inode(struct inode *inode) | ||
1245 | { | 1254 | { |
1246 | struct super_block *sb = inode->i_sb; | 1255 | struct super_block *sb = inode->i_sb; |
1247 | 1256 | ||
@@ -1251,7 +1260,7 @@ static void generic_forget_inode(struct inode *inode) | |||
1251 | inodes_stat.nr_unused++; | 1260 | inodes_stat.nr_unused++; |
1252 | if (sb->s_flags & MS_ACTIVE) { | 1261 | if (sb->s_flags & MS_ACTIVE) { |
1253 | spin_unlock(&inode_lock); | 1262 | spin_unlock(&inode_lock); |
1254 | return; | 1263 | return 0; |
1255 | } | 1264 | } |
1256 | WARN_ON(inode->i_state & I_NEW); | 1265 | WARN_ON(inode->i_state & I_NEW); |
1257 | inode->i_state |= I_WILL_FREE; | 1266 | inode->i_state |= I_WILL_FREE; |
@@ -1269,6 +1278,14 @@ static void generic_forget_inode(struct inode *inode) | |||
1269 | inode->i_state |= I_FREEING; | 1278 | inode->i_state |= I_FREEING; |
1270 | inodes_stat.nr_inodes--; | 1279 | inodes_stat.nr_inodes--; |
1271 | spin_unlock(&inode_lock); | 1280 | spin_unlock(&inode_lock); |
1281 | return 1; | ||
1282 | } | ||
1283 | EXPORT_SYMBOL_GPL(generic_detach_inode); | ||
1284 | |||
1285 | static void generic_forget_inode(struct inode *inode) | ||
1286 | { | ||
1287 | if (!generic_detach_inode(inode)) | ||
1288 | return; | ||
1272 | if (inode->i_data.nrpages) | 1289 | if (inode->i_data.nrpages) |
1273 | truncate_inode_pages(&inode->i_data, 0); | 1290 | truncate_inode_pages(&inode->i_data, 0); |
1274 | clear_inode(inode); | 1291 | clear_inode(inode); |
@@ -1399,31 +1416,31 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry) | |||
1399 | struct inode *inode = dentry->d_inode; | 1416 | struct inode *inode = dentry->d_inode; |
1400 | struct timespec now; | 1417 | struct timespec now; |
1401 | 1418 | ||
1402 | if (mnt_want_write(mnt)) | ||
1403 | return; | ||
1404 | if (inode->i_flags & S_NOATIME) | 1419 | if (inode->i_flags & S_NOATIME) |
1405 | goto out; | 1420 | return; |
1406 | if (IS_NOATIME(inode)) | 1421 | if (IS_NOATIME(inode)) |
1407 | goto out; | 1422 | return; |
1408 | if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) | 1423 | if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) |
1409 | goto out; | 1424 | return; |
1410 | 1425 | ||
1411 | if (mnt->mnt_flags & MNT_NOATIME) | 1426 | if (mnt->mnt_flags & MNT_NOATIME) |
1412 | goto out; | 1427 | return; |
1413 | if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) | 1428 | if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) |
1414 | goto out; | 1429 | return; |
1415 | 1430 | ||
1416 | now = current_fs_time(inode->i_sb); | 1431 | now = current_fs_time(inode->i_sb); |
1417 | 1432 | ||
1418 | if (!relatime_need_update(mnt, inode, now)) | 1433 | if (!relatime_need_update(mnt, inode, now)) |
1419 | goto out; | 1434 | return; |
1420 | 1435 | ||
1421 | if (timespec_equal(&inode->i_atime, &now)) | 1436 | if (timespec_equal(&inode->i_atime, &now)) |
1422 | goto out; | 1437 | return; |
1438 | |||
1439 | if (mnt_want_write(mnt)) | ||
1440 | return; | ||
1423 | 1441 | ||
1424 | inode->i_atime = now; | 1442 | inode->i_atime = now; |
1425 | mark_inode_dirty_sync(inode); | 1443 | mark_inode_dirty_sync(inode); |
1426 | out: | ||
1427 | mnt_drop_write(mnt); | 1444 | mnt_drop_write(mnt); |
1428 | } | 1445 | } |
1429 | EXPORT_SYMBOL(touch_atime); | 1446 | EXPORT_SYMBOL(touch_atime); |
@@ -1444,34 +1461,37 @@ void file_update_time(struct file *file) | |||
1444 | { | 1461 | { |
1445 | struct inode *inode = file->f_path.dentry->d_inode; | 1462 | struct inode *inode = file->f_path.dentry->d_inode; |
1446 | struct timespec now; | 1463 | struct timespec now; |
1447 | int sync_it = 0; | 1464 | enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0; |
1448 | int err; | ||
1449 | 1465 | ||
1466 | /* First try to exhaust all avenues to not sync */ | ||
1450 | if (IS_NOCMTIME(inode)) | 1467 | if (IS_NOCMTIME(inode)) |
1451 | return; | 1468 | return; |
1452 | 1469 | ||
1453 | err = mnt_want_write_file(file); | ||
1454 | if (err) | ||
1455 | return; | ||
1456 | |||
1457 | now = current_fs_time(inode->i_sb); | 1470 | now = current_fs_time(inode->i_sb); |
1458 | if (!timespec_equal(&inode->i_mtime, &now)) { | 1471 | if (!timespec_equal(&inode->i_mtime, &now)) |
1459 | inode->i_mtime = now; | 1472 | sync_it = S_MTIME; |
1460 | sync_it = 1; | ||
1461 | } | ||
1462 | 1473 | ||
1463 | if (!timespec_equal(&inode->i_ctime, &now)) { | 1474 | if (!timespec_equal(&inode->i_ctime, &now)) |
1464 | inode->i_ctime = now; | 1475 | sync_it |= S_CTIME; |
1465 | sync_it = 1; | ||
1466 | } | ||
1467 | 1476 | ||
1468 | if (IS_I_VERSION(inode)) { | 1477 | if (IS_I_VERSION(inode)) |
1469 | inode_inc_iversion(inode); | 1478 | sync_it |= S_VERSION; |
1470 | sync_it = 1; | 1479 | |
1471 | } | 1480 | if (!sync_it) |
1481 | return; | ||
1472 | 1482 | ||
1473 | if (sync_it) | 1483 | /* Finally allowed to write? Takes lock. */ |
1474 | mark_inode_dirty_sync(inode); | 1484 | if (mnt_want_write_file(file)) |
1485 | return; | ||
1486 | |||
1487 | /* Only change inode inside the lock region */ | ||
1488 | if (sync_it & S_VERSION) | ||
1489 | inode_inc_iversion(inode); | ||
1490 | if (sync_it & S_CTIME) | ||
1491 | inode->i_ctime = now; | ||
1492 | if (sync_it & S_MTIME) | ||
1493 | inode->i_mtime = now; | ||
1494 | mark_inode_dirty_sync(inode); | ||
1475 | mnt_drop_write(file->f_path.mnt); | 1495 | mnt_drop_write(file->f_path.mnt); |
1476 | } | 1496 | } |
1477 | EXPORT_SYMBOL(file_update_time); | 1497 | EXPORT_SYMBOL(file_update_time); |
@@ -1599,7 +1619,8 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev) | |||
1599 | else if (S_ISSOCK(mode)) | 1619 | else if (S_ISSOCK(mode)) |
1600 | inode->i_fop = &bad_sock_fops; | 1620 | inode->i_fop = &bad_sock_fops; |
1601 | else | 1621 | else |
1602 | printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o)\n", | 1622 | printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for" |
1603 | mode); | 1623 | " inode %s:%lu\n", mode, inode->i_sb->s_id, |
1624 | inode->i_ino); | ||
1604 | } | 1625 | } |
1605 | EXPORT_SYMBOL(init_special_inode); | 1626 | EXPORT_SYMBOL(init_special_inode); |
diff --git a/fs/internal.h b/fs/internal.h index d55ef562f0bb..515175b8b72e 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
@@ -57,6 +57,7 @@ extern int check_unsafe_exec(struct linux_binprm *); | |||
57 | * namespace.c | 57 | * namespace.c |
58 | */ | 58 | */ |
59 | extern int copy_mount_options(const void __user *, unsigned long *); | 59 | extern int copy_mount_options(const void __user *, unsigned long *); |
60 | extern int copy_mount_string(const void __user *, char **); | ||
60 | 61 | ||
61 | extern void free_vfsmnt(struct vfsmount *); | 62 | extern void free_vfsmnt(struct vfsmount *); |
62 | extern struct vfsmount *alloc_vfsmnt(const char *); | 63 | extern struct vfsmount *alloc_vfsmnt(const char *); |
diff --git a/fs/ioctl.c b/fs/ioctl.c index 5612880fcbe7..7b17a14396ff 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c | |||
@@ -162,20 +162,21 @@ EXPORT_SYMBOL(fiemap_check_flags); | |||
162 | static int fiemap_check_ranges(struct super_block *sb, | 162 | static int fiemap_check_ranges(struct super_block *sb, |
163 | u64 start, u64 len, u64 *new_len) | 163 | u64 start, u64 len, u64 *new_len) |
164 | { | 164 | { |
165 | u64 maxbytes = (u64) sb->s_maxbytes; | ||
166 | |||
165 | *new_len = len; | 167 | *new_len = len; |
166 | 168 | ||
167 | if (len == 0) | 169 | if (len == 0) |
168 | return -EINVAL; | 170 | return -EINVAL; |
169 | 171 | ||
170 | if (start > sb->s_maxbytes) | 172 | if (start > maxbytes) |
171 | return -EFBIG; | 173 | return -EFBIG; |
172 | 174 | ||
173 | /* | 175 | /* |
174 | * Shrink request scope to what the fs can actually handle. | 176 | * Shrink request scope to what the fs can actually handle. |
175 | */ | 177 | */ |
176 | if ((len > sb->s_maxbytes) || | 178 | if (len > maxbytes || (maxbytes - len) < start) |
177 | (sb->s_maxbytes - len) < start) | 179 | *new_len = maxbytes - start; |
178 | *new_len = sb->s_maxbytes - start; | ||
179 | 180 | ||
180 | return 0; | 181 | return 0; |
181 | } | 182 | } |
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 85f96bc651c7..6b4dcd4f2943 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c | |||
@@ -46,10 +46,7 @@ static void isofs_put_super(struct super_block *sb) | |||
46 | #ifdef CONFIG_JOLIET | 46 | #ifdef CONFIG_JOLIET |
47 | lock_kernel(); | 47 | lock_kernel(); |
48 | 48 | ||
49 | if (sbi->s_nls_iocharset) { | 49 | unload_nls(sbi->s_nls_iocharset); |
50 | unload_nls(sbi->s_nls_iocharset); | ||
51 | sbi->s_nls_iocharset = NULL; | ||
52 | } | ||
53 | 50 | ||
54 | unlock_kernel(); | 51 | unlock_kernel(); |
55 | #endif | 52 | #endif |
@@ -912,8 +909,7 @@ out_no_root: | |||
912 | printk(KERN_WARNING "%s: get root inode failed\n", __func__); | 909 | printk(KERN_WARNING "%s: get root inode failed\n", __func__); |
913 | out_no_inode: | 910 | out_no_inode: |
914 | #ifdef CONFIG_JOLIET | 911 | #ifdef CONFIG_JOLIET |
915 | if (sbi->s_nls_iocharset) | 912 | unload_nls(sbi->s_nls_iocharset); |
916 | unload_nls(sbi->s_nls_iocharset); | ||
917 | #endif | 913 | #endif |
918 | goto out_freesbi; | 914 | goto out_freesbi; |
919 | out_no_read: | 915 | out_no_read: |
diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 37e6dcda8fc8..2234c73fc577 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c | |||
@@ -178,13 +178,11 @@ static void jfs_put_super(struct super_block *sb) | |||
178 | rc = jfs_umount(sb); | 178 | rc = jfs_umount(sb); |
179 | if (rc) | 179 | if (rc) |
180 | jfs_err("jfs_umount failed with return code %d", rc); | 180 | jfs_err("jfs_umount failed with return code %d", rc); |
181 | if (sbi->nls_tab) | 181 | |
182 | unload_nls(sbi->nls_tab); | 182 | unload_nls(sbi->nls_tab); |
183 | sbi->nls_tab = NULL; | ||
184 | 183 | ||
185 | truncate_inode_pages(sbi->direct_inode->i_mapping, 0); | 184 | truncate_inode_pages(sbi->direct_inode->i_mapping, 0); |
186 | iput(sbi->direct_inode); | 185 | iput(sbi->direct_inode); |
187 | sbi->direct_inode = NULL; | ||
188 | 186 | ||
189 | kfree(sbi); | 187 | kfree(sbi); |
190 | 188 | ||
@@ -347,8 +345,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, | |||
347 | 345 | ||
348 | if (nls_map != (void *) -1) { | 346 | if (nls_map != (void *) -1) { |
349 | /* Discard old (if remount) */ | 347 | /* Discard old (if remount) */ |
350 | if (sbi->nls_tab) | 348 | unload_nls(sbi->nls_tab); |
351 | unload_nls(sbi->nls_tab); | ||
352 | sbi->nls_tab = nls_map; | 349 | sbi->nls_tab = nls_map; |
353 | } | 350 | } |
354 | return 1; | 351 | return 1; |
diff --git a/fs/libfs.c b/fs/libfs.c index dcec3d3ea64f..219576c52d80 100644 --- a/fs/libfs.c +++ b/fs/libfs.c | |||
@@ -527,14 +527,18 @@ ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, | |||
527 | const void *from, size_t available) | 527 | const void *from, size_t available) |
528 | { | 528 | { |
529 | loff_t pos = *ppos; | 529 | loff_t pos = *ppos; |
530 | size_t ret; | ||
531 | |||
530 | if (pos < 0) | 532 | if (pos < 0) |
531 | return -EINVAL; | 533 | return -EINVAL; |
532 | if (pos >= available) | 534 | if (pos >= available || !count) |
533 | return 0; | 535 | return 0; |
534 | if (count > available - pos) | 536 | if (count > available - pos) |
535 | count = available - pos; | 537 | count = available - pos; |
536 | if (copy_to_user(to, from + pos, count)) | 538 | ret = copy_to_user(to, from + pos, count); |
539 | if (ret == count) | ||
537 | return -EFAULT; | 540 | return -EFAULT; |
541 | count -= ret; | ||
538 | *ppos = pos + count; | 542 | *ppos = pos + count; |
539 | return count; | 543 | return count; |
540 | } | 544 | } |
@@ -735,10 +739,11 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, | |||
735 | if (copy_from_user(attr->set_buf, buf, size)) | 739 | if (copy_from_user(attr->set_buf, buf, size)) |
736 | goto out; | 740 | goto out; |
737 | 741 | ||
738 | ret = len; /* claim we got the whole input */ | ||
739 | attr->set_buf[size] = '\0'; | 742 | attr->set_buf[size] = '\0'; |
740 | val = simple_strtol(attr->set_buf, NULL, 0); | 743 | val = simple_strtol(attr->set_buf, NULL, 0); |
741 | attr->set(attr->data, val); | 744 | ret = attr->set(attr->data, val); |
745 | if (ret == 0) | ||
746 | ret = len; /* on success, claim we got the whole input */ | ||
742 | out: | 747 | out: |
743 | mutex_unlock(&attr->mutex); | 748 | mutex_unlock(&attr->mutex); |
744 | return ret; | 749 | return ret; |
diff --git a/fs/namespace.c b/fs/namespace.c index 7230787d18b0..bdc3cb4fd222 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -1640,7 +1640,7 @@ static int do_new_mount(struct path *path, char *type, int flags, | |||
1640 | { | 1640 | { |
1641 | struct vfsmount *mnt; | 1641 | struct vfsmount *mnt; |
1642 | 1642 | ||
1643 | if (!type || !memchr(type, 0, PAGE_SIZE)) | 1643 | if (!type) |
1644 | return -EINVAL; | 1644 | return -EINVAL; |
1645 | 1645 | ||
1646 | /* we need capabilities... */ | 1646 | /* we need capabilities... */ |
@@ -1871,6 +1871,23 @@ int copy_mount_options(const void __user * data, unsigned long *where) | |||
1871 | return 0; | 1871 | return 0; |
1872 | } | 1872 | } |
1873 | 1873 | ||
1874 | int copy_mount_string(const void __user *data, char **where) | ||
1875 | { | ||
1876 | char *tmp; | ||
1877 | |||
1878 | if (!data) { | ||
1879 | *where = NULL; | ||
1880 | return 0; | ||
1881 | } | ||
1882 | |||
1883 | tmp = strndup_user(data, PAGE_SIZE); | ||
1884 | if (IS_ERR(tmp)) | ||
1885 | return PTR_ERR(tmp); | ||
1886 | |||
1887 | *where = tmp; | ||
1888 | return 0; | ||
1889 | } | ||
1890 | |||
1874 | /* | 1891 | /* |
1875 | * Flags is a 32-bit value that allows up to 31 non-fs dependent flags to | 1892 | * Flags is a 32-bit value that allows up to 31 non-fs dependent flags to |
1876 | * be given to the mount() call (ie: read-only, no-dev, no-suid etc). | 1893 | * be given to the mount() call (ie: read-only, no-dev, no-suid etc). |
@@ -1900,8 +1917,6 @@ long do_mount(char *dev_name, char *dir_name, char *type_page, | |||
1900 | 1917 | ||
1901 | if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE)) | 1918 | if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE)) |
1902 | return -EINVAL; | 1919 | return -EINVAL; |
1903 | if (dev_name && !memchr(dev_name, 0, PAGE_SIZE)) | ||
1904 | return -EINVAL; | ||
1905 | 1920 | ||
1906 | if (data_page) | 1921 | if (data_page) |
1907 | ((char *)data_page)[PAGE_SIZE - 1] = 0; | 1922 | ((char *)data_page)[PAGE_SIZE - 1] = 0; |
@@ -2070,40 +2085,42 @@ EXPORT_SYMBOL(create_mnt_ns); | |||
2070 | SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, | 2085 | SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, |
2071 | char __user *, type, unsigned long, flags, void __user *, data) | 2086 | char __user *, type, unsigned long, flags, void __user *, data) |
2072 | { | 2087 | { |
2073 | int retval; | 2088 | int ret; |
2089 | char *kernel_type; | ||
2090 | char *kernel_dir; | ||
2091 | char *kernel_dev; | ||
2074 | unsigned long data_page; | 2092 | unsigned long data_page; |
2075 | unsigned long type_page; | ||
2076 | unsigned long dev_page; | ||
2077 | char *dir_page; | ||
2078 | 2093 | ||
2079 | retval = copy_mount_options(type, &type_page); | 2094 | ret = copy_mount_string(type, &kernel_type); |
2080 | if (retval < 0) | 2095 | if (ret < 0) |
2081 | return retval; | 2096 | goto out_type; |
2082 | 2097 | ||
2083 | dir_page = getname(dir_name); | 2098 | kernel_dir = getname(dir_name); |
2084 | retval = PTR_ERR(dir_page); | 2099 | if (IS_ERR(kernel_dir)) { |
2085 | if (IS_ERR(dir_page)) | 2100 | ret = PTR_ERR(kernel_dir); |
2086 | goto out1; | 2101 | goto out_dir; |
2102 | } | ||
2087 | 2103 | ||
2088 | retval = copy_mount_options(dev_name, &dev_page); | 2104 | ret = copy_mount_string(dev_name, &kernel_dev); |
2089 | if (retval < 0) | 2105 | if (ret < 0) |
2090 | goto out2; | 2106 | goto out_dev; |
2091 | 2107 | ||
2092 | retval = copy_mount_options(data, &data_page); | 2108 | ret = copy_mount_options(data, &data_page); |
2093 | if (retval < 0) | 2109 | if (ret < 0) |
2094 | goto out3; | 2110 | goto out_data; |
2095 | 2111 | ||
2096 | retval = do_mount((char *)dev_page, dir_page, (char *)type_page, | 2112 | ret = do_mount(kernel_dev, kernel_dir, kernel_type, flags, |
2097 | flags, (void *)data_page); | 2113 | (void *) data_page); |
2098 | free_page(data_page); | ||
2099 | 2114 | ||
2100 | out3: | 2115 | free_page(data_page); |
2101 | free_page(dev_page); | 2116 | out_data: |
2102 | out2: | 2117 | kfree(kernel_dev); |
2103 | putname(dir_page); | 2118 | out_dev: |
2104 | out1: | 2119 | putname(kernel_dir); |
2105 | free_page(type_page); | 2120 | out_dir: |
2106 | return retval; | 2121 | kfree(kernel_type); |
2122 | out_type: | ||
2123 | return ret; | ||
2107 | } | 2124 | } |
2108 | 2125 | ||
2109 | /* | 2126 | /* |
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index b99ce205b1bd..cf98da1be23e 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c | |||
@@ -746,16 +746,8 @@ static void ncp_put_super(struct super_block *sb) | |||
746 | 746 | ||
747 | #ifdef CONFIG_NCPFS_NLS | 747 | #ifdef CONFIG_NCPFS_NLS |
748 | /* unload the NLS charsets */ | 748 | /* unload the NLS charsets */ |
749 | if (server->nls_vol) | 749 | unload_nls(server->nls_vol); |
750 | { | 750 | unload_nls(server->nls_io); |
751 | unload_nls(server->nls_vol); | ||
752 | server->nls_vol = NULL; | ||
753 | } | ||
754 | if (server->nls_io) | ||
755 | { | ||
756 | unload_nls(server->nls_io); | ||
757 | server->nls_io = NULL; | ||
758 | } | ||
759 | #endif /* CONFIG_NCPFS_NLS */ | 751 | #endif /* CONFIG_NCPFS_NLS */ |
760 | 752 | ||
761 | if (server->info_filp) | 753 | if (server->info_filp) |
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index 53a7ed7eb9c6..0d58caf4a6e1 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c | |||
@@ -223,10 +223,8 @@ ncp_set_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg) | |||
223 | oldset_io = server->nls_io; | 223 | oldset_io = server->nls_io; |
224 | server->nls_io = iocharset; | 224 | server->nls_io = iocharset; |
225 | 225 | ||
226 | if (oldset_cp) | 226 | unload_nls(oldset_cp); |
227 | unload_nls(oldset_cp); | 227 | unload_nls(oldset_io); |
228 | if (oldset_io) | ||
229 | unload_nls(oldset_io); | ||
230 | 228 | ||
231 | return 0; | 229 | return 0; |
232 | } | 230 | } |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 5021b75d2d1e..86d6b4db1096 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -525,6 +525,7 @@ const struct address_space_operations nfs_file_aops = { | |||
525 | .direct_IO = nfs_direct_IO, | 525 | .direct_IO = nfs_direct_IO, |
526 | .migratepage = nfs_migrate_page, | 526 | .migratepage = nfs_migrate_page, |
527 | .launder_page = nfs_launder_page, | 527 | .launder_page = nfs_launder_page, |
528 | .error_remove_page = generic_error_remove_page, | ||
528 | }; | 529 | }; |
529 | 530 | ||
530 | /* | 531 | /* |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 060022b4651c..faa091865ad0 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -458,49 +458,21 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
458 | */ | 458 | */ |
459 | static int nfs_vmtruncate(struct inode * inode, loff_t offset) | 459 | static int nfs_vmtruncate(struct inode * inode, loff_t offset) |
460 | { | 460 | { |
461 | if (i_size_read(inode) < offset) { | 461 | loff_t oldsize; |
462 | unsigned long limit; | 462 | int err; |
463 | |||
464 | limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; | ||
465 | if (limit != RLIM_INFINITY && offset > limit) | ||
466 | goto out_sig; | ||
467 | if (offset > inode->i_sb->s_maxbytes) | ||
468 | goto out_big; | ||
469 | spin_lock(&inode->i_lock); | ||
470 | i_size_write(inode, offset); | ||
471 | spin_unlock(&inode->i_lock); | ||
472 | } else { | ||
473 | struct address_space *mapping = inode->i_mapping; | ||
474 | 463 | ||
475 | /* | 464 | err = inode_newsize_ok(inode, offset); |
476 | * truncation of in-use swapfiles is disallowed - it would | 465 | if (err) |
477 | * cause subsequent swapout to scribble on the now-freed | 466 | goto out; |
478 | * blocks. | ||
479 | */ | ||
480 | if (IS_SWAPFILE(inode)) | ||
481 | return -ETXTBSY; | ||
482 | spin_lock(&inode->i_lock); | ||
483 | i_size_write(inode, offset); | ||
484 | spin_unlock(&inode->i_lock); | ||
485 | 467 | ||
486 | /* | 468 | spin_lock(&inode->i_lock); |
487 | * unmap_mapping_range is called twice, first simply for | 469 | oldsize = inode->i_size; |
488 | * efficiency so that truncate_inode_pages does fewer | 470 | i_size_write(inode, offset); |
489 | * single-page unmaps. However after this first call, and | 471 | spin_unlock(&inode->i_lock); |
490 | * before truncate_inode_pages finishes, it is possible for | 472 | |
491 | * private pages to be COWed, which remain after | 473 | truncate_pagecache(inode, oldsize, offset); |
492 | * truncate_inode_pages finishes, hence the second | 474 | out: |
493 | * unmap_mapping_range call must be made for correctness. | 475 | return err; |
494 | */ | ||
495 | unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); | ||
496 | truncate_inode_pages(mapping, offset); | ||
497 | unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); | ||
498 | } | ||
499 | return 0; | ||
500 | out_sig: | ||
501 | send_sig(SIGXFSZ, current, 0); | ||
502 | out_big: | ||
503 | return -EFBIG; | ||
504 | } | 476 | } |
505 | 477 | ||
506 | /** | 478 | /** |
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c index 477d37d83b31..2224b4d07bf0 100644 --- a/fs/nls/nls_base.c +++ b/fs/nls/nls_base.c | |||
@@ -270,7 +270,8 @@ struct nls_table *load_nls(char *charset) | |||
270 | 270 | ||
271 | void unload_nls(struct nls_table *nls) | 271 | void unload_nls(struct nls_table *nls) |
272 | { | 272 | { |
273 | module_put(nls->owner); | 273 | if (nls) |
274 | module_put(nls->owner); | ||
274 | } | 275 | } |
275 | 276 | ||
276 | static const wchar_t charset2uni[256] = { | 277 | static const wchar_t charset2uni[256] = { |
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index b38f944f0667..cfce53cb65d7 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c | |||
@@ -1550,6 +1550,7 @@ const struct address_space_operations ntfs_aops = { | |||
1550 | .migratepage = buffer_migrate_page, /* Move a page cache page from | 1550 | .migratepage = buffer_migrate_page, /* Move a page cache page from |
1551 | one physical page to an | 1551 | one physical page to an |
1552 | other. */ | 1552 | other. */ |
1553 | .error_remove_page = generic_error_remove_page, | ||
1553 | }; | 1554 | }; |
1554 | 1555 | ||
1555 | /** | 1556 | /** |
@@ -1569,6 +1570,7 @@ const struct address_space_operations ntfs_mst_aops = { | |||
1569 | .migratepage = buffer_migrate_page, /* Move a page cache page from | 1570 | .migratepage = buffer_migrate_page, /* Move a page cache page from |
1570 | one physical page to an | 1571 | one physical page to an |
1571 | other. */ | 1572 | other. */ |
1573 | .error_remove_page = generic_error_remove_page, | ||
1572 | }; | 1574 | }; |
1573 | 1575 | ||
1574 | #ifdef NTFS_RW | 1576 | #ifdef NTFS_RW |
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index abaaa1cbf8de..80b04770e8e9 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c | |||
@@ -201,8 +201,7 @@ use_utf8: | |||
201 | v, old_nls->charset); | 201 | v, old_nls->charset); |
202 | nls_map = old_nls; | 202 | nls_map = old_nls; |
203 | } else /* nls_map */ { | 203 | } else /* nls_map */ { |
204 | if (old_nls) | 204 | unload_nls(old_nls); |
205 | unload_nls(old_nls); | ||
206 | } | 205 | } |
207 | } else if (!strcmp(p, "utf8")) { | 206 | } else if (!strcmp(p, "utf8")) { |
208 | bool val = false; | 207 | bool val = false; |
@@ -2427,10 +2426,9 @@ static void ntfs_put_super(struct super_block *sb) | |||
2427 | ntfs_free(vol->upcase); | 2426 | ntfs_free(vol->upcase); |
2428 | vol->upcase = NULL; | 2427 | vol->upcase = NULL; |
2429 | } | 2428 | } |
2430 | if (vol->nls_map) { | 2429 | |
2431 | unload_nls(vol->nls_map); | 2430 | unload_nls(vol->nls_map); |
2432 | vol->nls_map = NULL; | 2431 | |
2433 | } | ||
2434 | sb->s_fs_info = NULL; | 2432 | sb->s_fs_info = NULL; |
2435 | kfree(vol); | 2433 | kfree(vol); |
2436 | 2434 | ||
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 72e76062a900..deb2b132ae5e 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -2022,4 +2022,5 @@ const struct address_space_operations ocfs2_aops = { | |||
2022 | .releasepage = ocfs2_releasepage, | 2022 | .releasepage = ocfs2_releasepage, |
2023 | .migratepage = buffer_migrate_page, | 2023 | .migratepage = buffer_migrate_page, |
2024 | .is_partially_uptodate = block_is_partially_uptodate, | 2024 | .is_partially_uptodate = block_is_partially_uptodate, |
2025 | .error_remove_page = generic_error_remove_page, | ||
2025 | }; | 2026 | }; |
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 171e052c07b3..c7bff4f603ff 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c | |||
@@ -97,7 +97,11 @@ static int meminfo_proc_show(struct seq_file *m, void *v) | |||
97 | "Committed_AS: %8lu kB\n" | 97 | "Committed_AS: %8lu kB\n" |
98 | "VmallocTotal: %8lu kB\n" | 98 | "VmallocTotal: %8lu kB\n" |
99 | "VmallocUsed: %8lu kB\n" | 99 | "VmallocUsed: %8lu kB\n" |
100 | "VmallocChunk: %8lu kB\n", | 100 | "VmallocChunk: %8lu kB\n" |
101 | #ifdef CONFIG_MEMORY_FAILURE | ||
102 | "HardwareCorrupted: %8lu kB\n" | ||
103 | #endif | ||
104 | , | ||
101 | K(i.totalram), | 105 | K(i.totalram), |
102 | K(i.freeram), | 106 | K(i.freeram), |
103 | K(i.bufferram), | 107 | K(i.bufferram), |
@@ -144,6 +148,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v) | |||
144 | (unsigned long)VMALLOC_TOTAL >> 10, | 148 | (unsigned long)VMALLOC_TOTAL >> 10, |
145 | vmi.used >> 10, | 149 | vmi.used >> 10, |
146 | vmi.largest_chunk >> 10 | 150 | vmi.largest_chunk >> 10 |
151 | #ifdef CONFIG_MEMORY_FAILURE | ||
152 | ,atomic_long_read(&mce_bad_pages) << (PAGE_SHIFT - 10) | ||
153 | #endif | ||
147 | ); | 154 | ); |
148 | 155 | ||
149 | hugetlb_report_meminfo(m); | 156 | hugetlb_report_meminfo(m); |
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 9b1e4e9a16bf..f667e8aeabdf 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c | |||
@@ -153,7 +153,7 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, | |||
153 | 153 | ||
154 | /* careful: calling conventions are nasty here */ | 154 | /* careful: calling conventions are nasty here */ |
155 | res = count; | 155 | res = count; |
156 | error = table->proc_handler(table, write, filp, buf, &res, ppos); | 156 | error = table->proc_handler(table, write, buf, &res, ppos); |
157 | if (!error) | 157 | if (!error) |
158 | error = res; | 158 | error = res; |
159 | out: | 159 | out: |
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 11f0c06316de..32fae4040ebf 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c | |||
@@ -69,14 +69,11 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) | |||
69 | /* make various checks */ | 69 | /* make various checks */ |
70 | order = get_order(newsize); | 70 | order = get_order(newsize); |
71 | if (unlikely(order >= MAX_ORDER)) | 71 | if (unlikely(order >= MAX_ORDER)) |
72 | goto too_big; | 72 | return -EFBIG; |
73 | 73 | ||
74 | limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; | 74 | ret = inode_newsize_ok(inode, newsize); |
75 | if (limit != RLIM_INFINITY && newsize > limit) | 75 | if (ret) |
76 | goto fsize_exceeded; | 76 | return ret; |
77 | |||
78 | if (newsize > inode->i_sb->s_maxbytes) | ||
79 | goto too_big; | ||
80 | 77 | ||
81 | i_size_write(inode, newsize); | 78 | i_size_write(inode, newsize); |
82 | 79 | ||
@@ -118,12 +115,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) | |||
118 | 115 | ||
119 | return 0; | 116 | return 0; |
120 | 117 | ||
121 | fsize_exceeded: | 118 | add_error: |
122 | send_sig(SIGXFSZ, current, 0); | ||
123 | too_big: | ||
124 | return -EFBIG; | ||
125 | |||
126 | add_error: | ||
127 | while (loop < npages) | 119 | while (loop < npages) |
128 | __free_page(pages + loop++); | 120 | __free_page(pages + loop++); |
129 | return ret; | 121 | return ret; |
diff --git a/fs/read_write.c b/fs/read_write.c index 6c8c55dec2bc..3ac28987f22a 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
@@ -839,9 +839,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, | |||
839 | max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); | 839 | max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); |
840 | 840 | ||
841 | pos = *ppos; | 841 | pos = *ppos; |
842 | retval = -EINVAL; | ||
843 | if (unlikely(pos < 0)) | ||
844 | goto fput_out; | ||
845 | if (unlikely(pos + count > max)) { | 842 | if (unlikely(pos + count > max)) { |
846 | retval = -EOVERFLOW; | 843 | retval = -EOVERFLOW; |
847 | if (pos >= max) | 844 | if (pos >= max) |
diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 47f132df0c3f..c117fa80d1e9 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c | |||
@@ -528,7 +528,7 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent) | |||
528 | pos = (ROMFH_SIZE + len + 1 + ROMFH_PAD) & ROMFH_MASK; | 528 | pos = (ROMFH_SIZE + len + 1 + ROMFH_PAD) & ROMFH_MASK; |
529 | 529 | ||
530 | root = romfs_iget(sb, pos); | 530 | root = romfs_iget(sb, pos); |
531 | if (!root) | 531 | if (IS_ERR(root)) |
532 | goto error; | 532 | goto error; |
533 | 533 | ||
534 | sb->s_root = d_alloc_root(root); | 534 | sb->s_root = d_alloc_root(root); |
diff --git a/fs/seq_file.c b/fs/seq_file.c index 6c959275f2d0..eae7d9dbf3ff 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c | |||
@@ -429,20 +429,21 @@ EXPORT_SYMBOL(mangle_path); | |||
429 | */ | 429 | */ |
430 | int seq_path(struct seq_file *m, struct path *path, char *esc) | 430 | int seq_path(struct seq_file *m, struct path *path, char *esc) |
431 | { | 431 | { |
432 | if (m->count < m->size) { | 432 | char *buf; |
433 | char *s = m->buf + m->count; | 433 | size_t size = seq_get_buf(m, &buf); |
434 | char *p = d_path(path, s, m->size - m->count); | 434 | int res = -1; |
435 | |||
436 | if (size) { | ||
437 | char *p = d_path(path, buf, size); | ||
435 | if (!IS_ERR(p)) { | 438 | if (!IS_ERR(p)) { |
436 | s = mangle_path(s, p, esc); | 439 | char *end = mangle_path(buf, p, esc); |
437 | if (s) { | 440 | if (end) |
438 | p = m->buf + m->count; | 441 | res = end - buf; |
439 | m->count = s - m->buf; | ||
440 | return s - p; | ||
441 | } | ||
442 | } | 442 | } |
443 | } | 443 | } |
444 | m->count = m->size; | 444 | seq_commit(m, res); |
445 | return -1; | 445 | |
446 | return res; | ||
446 | } | 447 | } |
447 | EXPORT_SYMBOL(seq_path); | 448 | EXPORT_SYMBOL(seq_path); |
448 | 449 | ||
@@ -454,26 +455,28 @@ EXPORT_SYMBOL(seq_path); | |||
454 | int seq_path_root(struct seq_file *m, struct path *path, struct path *root, | 455 | int seq_path_root(struct seq_file *m, struct path *path, struct path *root, |
455 | char *esc) | 456 | char *esc) |
456 | { | 457 | { |
457 | int err = -ENAMETOOLONG; | 458 | char *buf; |
458 | if (m->count < m->size) { | 459 | size_t size = seq_get_buf(m, &buf); |
459 | char *s = m->buf + m->count; | 460 | int res = -ENAMETOOLONG; |
461 | |||
462 | if (size) { | ||
460 | char *p; | 463 | char *p; |
461 | 464 | ||
462 | spin_lock(&dcache_lock); | 465 | spin_lock(&dcache_lock); |
463 | p = __d_path(path, root, s, m->size - m->count); | 466 | p = __d_path(path, root, buf, size); |
464 | spin_unlock(&dcache_lock); | 467 | spin_unlock(&dcache_lock); |
465 | err = PTR_ERR(p); | 468 | res = PTR_ERR(p); |
466 | if (!IS_ERR(p)) { | 469 | if (!IS_ERR(p)) { |
467 | s = mangle_path(s, p, esc); | 470 | char *end = mangle_path(buf, p, esc); |
468 | if (s) { | 471 | if (end) |
469 | p = m->buf + m->count; | 472 | res = end - buf; |
470 | m->count = s - m->buf; | 473 | else |
471 | return 0; | 474 | res = -ENAMETOOLONG; |
472 | } | ||
473 | } | 475 | } |
474 | } | 476 | } |
475 | m->count = m->size; | 477 | seq_commit(m, res); |
476 | return err; | 478 | |
479 | return res < 0 ? res : 0; | ||
477 | } | 480 | } |
478 | 481 | ||
479 | /* | 482 | /* |
@@ -481,20 +484,21 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root, | |||
481 | */ | 484 | */ |
482 | int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc) | 485 | int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc) |
483 | { | 486 | { |
484 | if (m->count < m->size) { | 487 | char *buf; |
485 | char *s = m->buf + m->count; | 488 | size_t size = seq_get_buf(m, &buf); |
486 | char *p = dentry_path(dentry, s, m->size - m->count); | 489 | int res = -1; |
490 | |||
491 | if (size) { | ||
492 | char *p = dentry_path(dentry, buf, size); | ||
487 | if (!IS_ERR(p)) { | 493 | if (!IS_ERR(p)) { |
488 | s = mangle_path(s, p, esc); | 494 | char *end = mangle_path(buf, p, esc); |
489 | if (s) { | 495 | if (end) |
490 | p = m->buf + m->count; | 496 | res = end - buf; |
491 | m->count = s - m->buf; | ||
492 | return s - p; | ||
493 | } | ||
494 | } | 497 | } |
495 | } | 498 | } |
496 | m->count = m->size; | 499 | seq_commit(m, res); |
497 | return -1; | 500 | |
501 | return res; | ||
498 | } | 502 | } |
499 | 503 | ||
500 | int seq_bitmap(struct seq_file *m, const unsigned long *bits, | 504 | int seq_bitmap(struct seq_file *m, const unsigned long *bits, |
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index 1402d2d54f52..1c4c8f089970 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c | |||
@@ -459,14 +459,8 @@ smb_show_options(struct seq_file *s, struct vfsmount *m) | |||
459 | static void | 459 | static void |
460 | smb_unload_nls(struct smb_sb_info *server) | 460 | smb_unload_nls(struct smb_sb_info *server) |
461 | { | 461 | { |
462 | if (server->remote_nls) { | 462 | unload_nls(server->remote_nls); |
463 | unload_nls(server->remote_nls); | 463 | unload_nls(server->local_nls); |
464 | server->remote_nls = NULL; | ||
465 | } | ||
466 | if (server->local_nls) { | ||
467 | unload_nls(server->local_nls); | ||
468 | server->local_nls = NULL; | ||
469 | } | ||
470 | } | 464 | } |
471 | 465 | ||
472 | static void | 466 | static void |
diff --git a/fs/super.c b/fs/super.c index 0e7207b9815c..19eb70b374bc 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -465,6 +465,48 @@ rescan: | |||
465 | } | 465 | } |
466 | 466 | ||
467 | EXPORT_SYMBOL(get_super); | 467 | EXPORT_SYMBOL(get_super); |
468 | |||
469 | /** | ||
470 | * get_active_super - get an active reference to the superblock of a device | ||
471 | * @bdev: device to get the superblock for | ||
472 | * | ||
473 | * Scans the superblock list and finds the superblock of the file system | ||
474 | * mounted on the device given. Returns the superblock with an active | ||
475 | * reference and s_umount held exclusively or %NULL if none was found. | ||
476 | */ | ||
477 | struct super_block *get_active_super(struct block_device *bdev) | ||
478 | { | ||
479 | struct super_block *sb; | ||
480 | |||
481 | if (!bdev) | ||
482 | return NULL; | ||
483 | |||
484 | spin_lock(&sb_lock); | ||
485 | list_for_each_entry(sb, &super_blocks, s_list) { | ||
486 | if (sb->s_bdev != bdev) | ||
487 | continue; | ||
488 | |||
489 | sb->s_count++; | ||
490 | spin_unlock(&sb_lock); | ||
491 | down_write(&sb->s_umount); | ||
492 | if (sb->s_root) { | ||
493 | spin_lock(&sb_lock); | ||
494 | if (sb->s_count > S_BIAS) { | ||
495 | atomic_inc(&sb->s_active); | ||
496 | sb->s_count--; | ||
497 | spin_unlock(&sb_lock); | ||
498 | return sb; | ||
499 | } | ||
500 | spin_unlock(&sb_lock); | ||
501 | } | ||
502 | up_write(&sb->s_umount); | ||
503 | put_super(sb); | ||
504 | yield(); | ||
505 | spin_lock(&sb_lock); | ||
506 | } | ||
507 | spin_unlock(&sb_lock); | ||
508 | return NULL; | ||
509 | } | ||
468 | 510 | ||
469 | struct super_block * user_get_super(dev_t dev) | 511 | struct super_block * user_get_super(dev_t dev) |
470 | { | 512 | { |
@@ -527,11 +569,15 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) | |||
527 | { | 569 | { |
528 | int retval; | 570 | int retval; |
529 | int remount_rw; | 571 | int remount_rw; |
530 | 572 | ||
573 | if (sb->s_frozen != SB_UNFROZEN) | ||
574 | return -EBUSY; | ||
575 | |||
531 | #ifdef CONFIG_BLOCK | 576 | #ifdef CONFIG_BLOCK |
532 | if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev)) | 577 | if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev)) |
533 | return -EACCES; | 578 | return -EACCES; |
534 | #endif | 579 | #endif |
580 | |||
535 | if (flags & MS_RDONLY) | 581 | if (flags & MS_RDONLY) |
536 | acct_auto_close(sb); | 582 | acct_auto_close(sb); |
537 | shrink_dcache_sb(sb); | 583 | shrink_dcache_sb(sb); |
@@ -743,9 +789,14 @@ int get_sb_bdev(struct file_system_type *fs_type, | |||
743 | * will protect the lockfs code from trying to start a snapshot | 789 | * will protect the lockfs code from trying to start a snapshot |
744 | * while we are mounting | 790 | * while we are mounting |
745 | */ | 791 | */ |
746 | down(&bdev->bd_mount_sem); | 792 | mutex_lock(&bdev->bd_fsfreeze_mutex); |
793 | if (bdev->bd_fsfreeze_count > 0) { | ||
794 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | ||
795 | error = -EBUSY; | ||
796 | goto error_bdev; | ||
797 | } | ||
747 | s = sget(fs_type, test_bdev_super, set_bdev_super, bdev); | 798 | s = sget(fs_type, test_bdev_super, set_bdev_super, bdev); |
748 | up(&bdev->bd_mount_sem); | 799 | mutex_unlock(&bdev->bd_fsfreeze_mutex); |
749 | if (IS_ERR(s)) | 800 | if (IS_ERR(s)) |
750 | goto error_s; | 801 | goto error_s; |
751 | 802 | ||
@@ -892,6 +943,16 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void | |||
892 | if (error) | 943 | if (error) |
893 | goto out_sb; | 944 | goto out_sb; |
894 | 945 | ||
946 | /* | ||
947 | * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE | ||
948 | * but s_maxbytes was an unsigned long long for many releases. Throw | ||
949 | * this warning for a little while to try and catch filesystems that | ||
950 | * violate this rule. This warning should be either removed or | ||
951 | * converted to a BUG() in 2.6.34. | ||
952 | */ | ||
953 | WARN((mnt->mnt_sb->s_maxbytes < 0), "%s set sb->s_maxbytes to " | ||
954 | "negative value (%lld)\n", type->name, mnt->mnt_sb->s_maxbytes); | ||
955 | |||
895 | mnt->mnt_mountpoint = mnt->mnt_root; | 956 | mnt->mnt_mountpoint = mnt->mnt_root; |
896 | mnt->mnt_parent = mnt; | 957 | mnt->mnt_parent = mnt; |
897 | up_write(&mnt->mnt_sb->s_umount); | 958 | up_write(&mnt->mnt_sb->s_umount); |
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index d5e5559e31db..381854461b28 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -1635,4 +1635,5 @@ const struct address_space_operations xfs_address_space_operations = { | |||
1635 | .direct_IO = xfs_vm_direct_IO, | 1635 | .direct_IO = xfs_vm_direct_IO, |
1636 | .migratepage = buffer_migrate_page, | 1636 | .migratepage = buffer_migrate_page, |
1637 | .is_partially_uptodate = block_is_partially_uptodate, | 1637 | .is_partially_uptodate = block_is_partially_uptodate, |
1638 | .error_remove_page = generic_error_remove_page, | ||
1638 | }; | 1639 | }; |
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c index 916c0ffb6083..c5bc67c4e3bb 100644 --- a/fs/xfs/linux-2.6/xfs_sysctl.c +++ b/fs/xfs/linux-2.6/xfs_sysctl.c | |||
@@ -26,7 +26,6 @@ STATIC int | |||
26 | xfs_stats_clear_proc_handler( | 26 | xfs_stats_clear_proc_handler( |
27 | ctl_table *ctl, | 27 | ctl_table *ctl, |
28 | int write, | 28 | int write, |
29 | struct file *filp, | ||
30 | void __user *buffer, | 29 | void __user *buffer, |
31 | size_t *lenp, | 30 | size_t *lenp, |
32 | loff_t *ppos) | 31 | loff_t *ppos) |
@@ -34,7 +33,7 @@ xfs_stats_clear_proc_handler( | |||
34 | int c, ret, *valp = ctl->data; | 33 | int c, ret, *valp = ctl->data; |
35 | __uint32_t vn_active; | 34 | __uint32_t vn_active; |
36 | 35 | ||
37 | ret = proc_dointvec_minmax(ctl, write, filp, buffer, lenp, ppos); | 36 | ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); |
38 | 37 | ||
39 | if (!ret && write && *valp) { | 38 | if (!ret && write && *valp) { |
40 | printk("XFS Clearing xfsstats\n"); | 39 | printk("XFS Clearing xfsstats\n"); |