diff options
| author | Linus Torvalds <torvalds@g5.osdl.org> | 2006-05-17 19:11:41 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-05-17 19:11:41 -0400 |
| commit | 15eb7105a74a0a5d72d006fec2192b0ec692b5cc (patch) | |
| tree | b774ed6e13751f590038c1d0454436326339cd62 | |
| parent | a5d1792847172077d173c959c37d4562b9ec69e6 (diff) | |
| parent | cef0893dcf1fdf22943aa49e75ee1eb3bfffe5f5 (diff) | |
Merge branch 'upstream-linus' of git://oss.oracle.com/home/sourcebo/git/ocfs2
* 'upstream-linus' of git://oss.oracle.com/home/sourcebo/git/ocfs2:
configfs: Make sure configfs_init() is called before consumers.
configfs: configfs_mkdir() failed to cleanup linkage.
configfs: Fix a reference leak in configfs_mkdir().
ocfs2: fix gfp mask in some file system paths
ocfs2: Don't populate uptodate cache in ocfs2_force_read_journal()
ocfs2: take meta data lock in ocfs2_file_aio_read()
ocfs2: take data locks around extend
| -rw-r--r-- | fs/Makefile | 2 | ||||
| -rw-r--r-- | fs/configfs/dir.c | 137 | ||||
| -rw-r--r-- | fs/ocfs2/aops.c | 46 | ||||
| -rw-r--r-- | fs/ocfs2/aops.h | 4 | ||||
| -rw-r--r-- | fs/ocfs2/extent_map.c | 6 | ||||
| -rw-r--r-- | fs/ocfs2/file.c | 86 | ||||
| -rw-r--r-- | fs/ocfs2/journal.c | 8 | ||||
| -rw-r--r-- | fs/ocfs2/uptodate.c | 4 | ||||
| -rw-r--r-- | fs/ocfs2/vote.c | 6 |
9 files changed, 213 insertions, 86 deletions
diff --git a/fs/Makefile b/fs/Makefile index 83bf478e786b..078d3d1191a5 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
| @@ -45,6 +45,7 @@ obj-$(CONFIG_DNOTIFY) += dnotify.o | |||
| 45 | obj-$(CONFIG_PROC_FS) += proc/ | 45 | obj-$(CONFIG_PROC_FS) += proc/ |
| 46 | obj-y += partitions/ | 46 | obj-y += partitions/ |
| 47 | obj-$(CONFIG_SYSFS) += sysfs/ | 47 | obj-$(CONFIG_SYSFS) += sysfs/ |
| 48 | obj-$(CONFIG_CONFIGFS_FS) += configfs/ | ||
| 48 | obj-y += devpts/ | 49 | obj-y += devpts/ |
| 49 | 50 | ||
| 50 | obj-$(CONFIG_PROFILING) += dcookies.o | 51 | obj-$(CONFIG_PROFILING) += dcookies.o |
| @@ -100,5 +101,4 @@ obj-$(CONFIG_BEFS_FS) += befs/ | |||
| 100 | obj-$(CONFIG_HOSTFS) += hostfs/ | 101 | obj-$(CONFIG_HOSTFS) += hostfs/ |
| 101 | obj-$(CONFIG_HPPFS) += hppfs/ | 102 | obj-$(CONFIG_HPPFS) += hppfs/ |
| 102 | obj-$(CONFIG_DEBUG_FS) += debugfs/ | 103 | obj-$(CONFIG_DEBUG_FS) += debugfs/ |
| 103 | obj-$(CONFIG_CONFIGFS_FS) += configfs/ | ||
| 104 | obj-$(CONFIG_OCFS2_FS) += ocfs2/ | 104 | obj-$(CONFIG_OCFS2_FS) += ocfs2/ |
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 5638c8f9362f..5f952187fc53 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c | |||
| @@ -505,13 +505,15 @@ static int populate_groups(struct config_group *group) | |||
| 505 | int i; | 505 | int i; |
| 506 | 506 | ||
| 507 | if (group->default_groups) { | 507 | if (group->default_groups) { |
| 508 | /* FYI, we're faking mkdir here | 508 | /* |
| 509 | * FYI, we're faking mkdir here | ||
| 509 | * I'm not sure we need this semaphore, as we're called | 510 | * I'm not sure we need this semaphore, as we're called |
| 510 | * from our parent's mkdir. That holds our parent's | 511 | * from our parent's mkdir. That holds our parent's |
| 511 | * i_mutex, so afaik lookup cannot continue through our | 512 | * i_mutex, so afaik lookup cannot continue through our |
| 512 | * parent to find us, let alone mess with our tree. | 513 | * parent to find us, let alone mess with our tree. |
| 513 | * That said, taking our i_mutex is closer to mkdir | 514 | * That said, taking our i_mutex is closer to mkdir |
| 514 | * emulation, and shouldn't hurt. */ | 515 | * emulation, and shouldn't hurt. |
| 516 | */ | ||
| 515 | mutex_lock(&dentry->d_inode->i_mutex); | 517 | mutex_lock(&dentry->d_inode->i_mutex); |
| 516 | 518 | ||
| 517 | for (i = 0; group->default_groups[i]; i++) { | 519 | for (i = 0; group->default_groups[i]; i++) { |
| @@ -546,20 +548,34 @@ static void unlink_obj(struct config_item *item) | |||
| 546 | 548 | ||
| 547 | item->ci_group = NULL; | 549 | item->ci_group = NULL; |
| 548 | item->ci_parent = NULL; | 550 | item->ci_parent = NULL; |
| 551 | |||
| 552 | /* Drop the reference for ci_entry */ | ||
| 549 | config_item_put(item); | 553 | config_item_put(item); |
| 550 | 554 | ||
| 555 | /* Drop the reference for ci_parent */ | ||
| 551 | config_group_put(group); | 556 | config_group_put(group); |
| 552 | } | 557 | } |
| 553 | } | 558 | } |
| 554 | 559 | ||
| 555 | static void link_obj(struct config_item *parent_item, struct config_item *item) | 560 | static void link_obj(struct config_item *parent_item, struct config_item *item) |
| 556 | { | 561 | { |
| 557 | /* Parent seems redundant with group, but it makes certain | 562 | /* |
| 558 | * traversals much nicer. */ | 563 | * Parent seems redundant with group, but it makes certain |
| 564 | * traversals much nicer. | ||
| 565 | */ | ||
| 559 | item->ci_parent = parent_item; | 566 | item->ci_parent = parent_item; |
| 567 | |||
| 568 | /* | ||
| 569 | * We hold a reference on the parent for the child's ci_parent | ||
| 570 | * link. | ||
| 571 | */ | ||
| 560 | item->ci_group = config_group_get(to_config_group(parent_item)); | 572 | item->ci_group = config_group_get(to_config_group(parent_item)); |
| 561 | list_add_tail(&item->ci_entry, &item->ci_group->cg_children); | 573 | list_add_tail(&item->ci_entry, &item->ci_group->cg_children); |
| 562 | 574 | ||
| 575 | /* | ||
| 576 | * We hold a reference on the child for ci_entry on the parent's | ||
| 577 | * cg_children | ||
| 578 | */ | ||
| 563 | config_item_get(item); | 579 | config_item_get(item); |
| 564 | } | 580 | } |
| 565 | 581 | ||
| @@ -684,6 +700,10 @@ static void client_drop_item(struct config_item *parent_item, | |||
| 684 | type = parent_item->ci_type; | 700 | type = parent_item->ci_type; |
| 685 | BUG_ON(!type); | 701 | BUG_ON(!type); |
| 686 | 702 | ||
| 703 | /* | ||
| 704 | * If ->drop_item() exists, it is responsible for the | ||
| 705 | * config_item_put(). | ||
| 706 | */ | ||
| 687 | if (type->ct_group_ops && type->ct_group_ops->drop_item) | 707 | if (type->ct_group_ops && type->ct_group_ops->drop_item) |
| 688 | type->ct_group_ops->drop_item(to_config_group(parent_item), | 708 | type->ct_group_ops->drop_item(to_config_group(parent_item), |
| 689 | item); | 709 | item); |
| @@ -694,23 +714,28 @@ static void client_drop_item(struct config_item *parent_item, | |||
| 694 | 714 | ||
| 695 | static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | 715 | static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) |
| 696 | { | 716 | { |
| 697 | int ret; | 717 | int ret, module_got = 0; |
| 698 | struct config_group *group; | 718 | struct config_group *group; |
| 699 | struct config_item *item; | 719 | struct config_item *item; |
| 700 | struct config_item *parent_item; | 720 | struct config_item *parent_item; |
| 701 | struct configfs_subsystem *subsys; | 721 | struct configfs_subsystem *subsys; |
| 702 | struct configfs_dirent *sd; | 722 | struct configfs_dirent *sd; |
| 703 | struct config_item_type *type; | 723 | struct config_item_type *type; |
| 704 | struct module *owner; | 724 | struct module *owner = NULL; |
| 705 | char *name; | 725 | char *name; |
| 706 | 726 | ||
| 707 | if (dentry->d_parent == configfs_sb->s_root) | 727 | if (dentry->d_parent == configfs_sb->s_root) { |
| 708 | return -EPERM; | 728 | ret = -EPERM; |
| 729 | goto out; | ||
| 730 | } | ||
| 709 | 731 | ||
| 710 | sd = dentry->d_parent->d_fsdata; | 732 | sd = dentry->d_parent->d_fsdata; |
| 711 | if (!(sd->s_type & CONFIGFS_USET_DIR)) | 733 | if (!(sd->s_type & CONFIGFS_USET_DIR)) { |
| 712 | return -EPERM; | 734 | ret = -EPERM; |
| 735 | goto out; | ||
| 736 | } | ||
| 713 | 737 | ||
| 738 | /* Get a working ref for the duration of this function */ | ||
| 714 | parent_item = configfs_get_config_item(dentry->d_parent); | 739 | parent_item = configfs_get_config_item(dentry->d_parent); |
| 715 | type = parent_item->ci_type; | 740 | type = parent_item->ci_type; |
| 716 | subsys = to_config_group(parent_item)->cg_subsys; | 741 | subsys = to_config_group(parent_item)->cg_subsys; |
| @@ -719,15 +744,16 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
| 719 | if (!type || !type->ct_group_ops || | 744 | if (!type || !type->ct_group_ops || |
| 720 | (!type->ct_group_ops->make_group && | 745 | (!type->ct_group_ops->make_group && |
| 721 | !type->ct_group_ops->make_item)) { | 746 | !type->ct_group_ops->make_item)) { |
| 722 | config_item_put(parent_item); | 747 | ret = -EPERM; /* Lack-of-mkdir returns -EPERM */ |
| 723 | return -EPERM; /* What lack-of-mkdir returns */ | 748 | goto out_put; |
| 724 | } | 749 | } |
| 725 | 750 | ||
| 726 | name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL); | 751 | name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL); |
| 727 | if (!name) { | 752 | if (!name) { |
| 728 | config_item_put(parent_item); | 753 | ret = -ENOMEM; |
| 729 | return -ENOMEM; | 754 | goto out_put; |
| 730 | } | 755 | } |
| 756 | |||
| 731 | snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name); | 757 | snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name); |
| 732 | 758 | ||
| 733 | down(&subsys->su_sem); | 759 | down(&subsys->su_sem); |
| @@ -748,40 +774,67 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
| 748 | 774 | ||
| 749 | kfree(name); | 775 | kfree(name); |
| 750 | if (!item) { | 776 | if (!item) { |
| 751 | config_item_put(parent_item); | 777 | /* |
| 752 | return -ENOMEM; | 778 | * If item == NULL, then link_obj() was never called. |
| 779 | * There are no extra references to clean up. | ||
| 780 | */ | ||
| 781 | ret = -ENOMEM; | ||
| 782 | goto out_put; | ||
| 753 | } | 783 | } |
| 754 | 784 | ||
| 755 | ret = -EINVAL; | 785 | /* |
| 786 | * link_obj() has been called (via link_group() for groups). | ||
| 787 | * From here on out, errors must clean that up. | ||
| 788 | */ | ||
| 789 | |||
| 756 | type = item->ci_type; | 790 | type = item->ci_type; |
| 757 | if (type) { | 791 | if (!type) { |
| 758 | owner = type->ct_owner; | 792 | ret = -EINVAL; |
| 759 | if (try_module_get(owner)) { | 793 | goto out_unlink; |
| 760 | if (group) { | 794 | } |
| 761 | ret = configfs_attach_group(parent_item, | ||
| 762 | item, | ||
| 763 | dentry); | ||
| 764 | } else { | ||
| 765 | ret = configfs_attach_item(parent_item, | ||
| 766 | item, | ||
| 767 | dentry); | ||
| 768 | } | ||
| 769 | 795 | ||
| 770 | if (ret) { | 796 | owner = type->ct_owner; |
| 771 | down(&subsys->su_sem); | 797 | if (!try_module_get(owner)) { |
| 772 | if (group) | 798 | ret = -EINVAL; |
| 773 | unlink_group(group); | 799 | goto out_unlink; |
| 774 | else | 800 | } |
| 775 | unlink_obj(item); | ||
| 776 | client_drop_item(parent_item, item); | ||
| 777 | up(&subsys->su_sem); | ||
| 778 | 801 | ||
| 779 | config_item_put(parent_item); | 802 | /* |
| 780 | module_put(owner); | 803 | * I hate doing it this way, but if there is |
| 781 | } | 804 | * an error, module_put() probably should |
| 782 | } | 805 | * happen after any cleanup. |
| 806 | */ | ||
| 807 | module_got = 1; | ||
| 808 | |||
| 809 | if (group) | ||
| 810 | ret = configfs_attach_group(parent_item, item, dentry); | ||
| 811 | else | ||
| 812 | ret = configfs_attach_item(parent_item, item, dentry); | ||
| 813 | |||
| 814 | out_unlink: | ||
| 815 | if (ret) { | ||
| 816 | /* Tear down everything we built up */ | ||
| 817 | down(&subsys->su_sem); | ||
| 818 | if (group) | ||
| 819 | unlink_group(group); | ||
| 820 | else | ||
| 821 | unlink_obj(item); | ||
| 822 | client_drop_item(parent_item, item); | ||
| 823 | up(&subsys->su_sem); | ||
| 824 | |||
| 825 | if (module_got) | ||
| 826 | module_put(owner); | ||
| 783 | } | 827 | } |
| 784 | 828 | ||
| 829 | out_put: | ||
| 830 | /* | ||
| 831 | * link_obj()/link_group() took a reference from child->parent, | ||
| 832 | * so the parent is safely pinned. We can drop our working | ||
| 833 | * reference. | ||
| 834 | */ | ||
| 835 | config_item_put(parent_item); | ||
| 836 | |||
| 837 | out: | ||
| 785 | return ret; | 838 | return ret; |
| 786 | } | 839 | } |
| 787 | 840 | ||
| @@ -801,6 +854,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 801 | if (sd->s_type & CONFIGFS_USET_DEFAULT) | 854 | if (sd->s_type & CONFIGFS_USET_DEFAULT) |
| 802 | return -EPERM; | 855 | return -EPERM; |
| 803 | 856 | ||
| 857 | /* Get a working ref until we have the child */ | ||
| 804 | parent_item = configfs_get_config_item(dentry->d_parent); | 858 | parent_item = configfs_get_config_item(dentry->d_parent); |
| 805 | subsys = to_config_group(parent_item)->cg_subsys; | 859 | subsys = to_config_group(parent_item)->cg_subsys; |
| 806 | BUG_ON(!subsys); | 860 | BUG_ON(!subsys); |
| @@ -817,6 +871,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 817 | return ret; | 871 | return ret; |
| 818 | } | 872 | } |
| 819 | 873 | ||
| 874 | /* Get a working ref for the duration of this function */ | ||
| 820 | item = configfs_get_config_item(dentry); | 875 | item = configfs_get_config_item(dentry); |
| 821 | 876 | ||
| 822 | /* Drop reference from above, item already holds one. */ | 877 | /* Drop reference from above, item already holds one. */ |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 0d858d0b25be..47152bf9a7f2 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
| @@ -276,13 +276,29 @@ static int ocfs2_writepage(struct page *page, struct writeback_control *wbc) | |||
| 276 | return ret; | 276 | return ret; |
| 277 | } | 277 | } |
| 278 | 278 | ||
| 279 | /* This can also be called from ocfs2_write_zero_page() which has done | ||
| 280 | * it's own cluster locking. */ | ||
| 281 | int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page, | ||
| 282 | unsigned from, unsigned to) | ||
| 283 | { | ||
| 284 | int ret; | ||
| 285 | |||
| 286 | down_read(&OCFS2_I(inode)->ip_alloc_sem); | ||
| 287 | |||
| 288 | ret = block_prepare_write(page, from, to, ocfs2_get_block); | ||
| 289 | |||
| 290 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | ||
| 291 | |||
| 292 | return ret; | ||
| 293 | } | ||
| 294 | |||
| 279 | /* | 295 | /* |
| 280 | * ocfs2_prepare_write() can be an outer-most ocfs2 call when it is called | 296 | * ocfs2_prepare_write() can be an outer-most ocfs2 call when it is called |
| 281 | * from loopback. It must be able to perform its own locking around | 297 | * from loopback. It must be able to perform its own locking around |
| 282 | * ocfs2_get_block(). | 298 | * ocfs2_get_block(). |
| 283 | */ | 299 | */ |
| 284 | int ocfs2_prepare_write(struct file *file, struct page *page, | 300 | static int ocfs2_prepare_write(struct file *file, struct page *page, |
| 285 | unsigned from, unsigned to) | 301 | unsigned from, unsigned to) |
| 286 | { | 302 | { |
| 287 | struct inode *inode = page->mapping->host; | 303 | struct inode *inode = page->mapping->host; |
| 288 | int ret; | 304 | int ret; |
| @@ -295,11 +311,7 @@ int ocfs2_prepare_write(struct file *file, struct page *page, | |||
| 295 | goto out; | 311 | goto out; |
| 296 | } | 312 | } |
| 297 | 313 | ||
| 298 | down_read(&OCFS2_I(inode)->ip_alloc_sem); | 314 | ret = ocfs2_prepare_write_nolock(inode, page, from, to); |
| 299 | |||
| 300 | ret = block_prepare_write(page, from, to, ocfs2_get_block); | ||
| 301 | |||
| 302 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | ||
| 303 | 315 | ||
| 304 | ocfs2_meta_unlock(inode, 0); | 316 | ocfs2_meta_unlock(inode, 0); |
| 305 | out: | 317 | out: |
| @@ -625,11 +637,31 @@ static ssize_t ocfs2_direct_IO(int rw, | |||
| 625 | int ret; | 637 | int ret; |
| 626 | 638 | ||
| 627 | mlog_entry_void(); | 639 | mlog_entry_void(); |
| 640 | |||
| 641 | /* | ||
| 642 | * We get PR data locks even for O_DIRECT. This allows | ||
| 643 | * concurrent O_DIRECT I/O but doesn't let O_DIRECT with | ||
| 644 | * extending and buffered zeroing writes race. If they did | ||
| 645 | * race then the buffered zeroing could be written back after | ||
| 646 | * the O_DIRECT I/O. It's one thing to tell people not to mix | ||
| 647 | * buffered and O_DIRECT writes, but expecting them to | ||
| 648 | * understand that file extension is also an implicit buffered | ||
| 649 | * write is too much. By getting the PR we force writeback of | ||
| 650 | * the buffered zeroing before proceeding. | ||
| 651 | */ | ||
| 652 | ret = ocfs2_data_lock(inode, 0); | ||
| 653 | if (ret < 0) { | ||
| 654 | mlog_errno(ret); | ||
| 655 | goto out; | ||
| 656 | } | ||
| 657 | ocfs2_data_unlock(inode, 0); | ||
| 658 | |||
| 628 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, | 659 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, |
| 629 | inode->i_sb->s_bdev, iov, offset, | 660 | inode->i_sb->s_bdev, iov, offset, |
| 630 | nr_segs, | 661 | nr_segs, |
| 631 | ocfs2_direct_IO_get_blocks, | 662 | ocfs2_direct_IO_get_blocks, |
| 632 | ocfs2_dio_end_io); | 663 | ocfs2_dio_end_io); |
| 664 | out: | ||
| 633 | mlog_exit(ret); | 665 | mlog_exit(ret); |
| 634 | return ret; | 666 | return ret; |
| 635 | } | 667 | } |
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h index d40456d509a0..e88c3f0b8fa9 100644 --- a/fs/ocfs2/aops.h +++ b/fs/ocfs2/aops.h | |||
| @@ -22,8 +22,8 @@ | |||
| 22 | #ifndef OCFS2_AOPS_H | 22 | #ifndef OCFS2_AOPS_H |
| 23 | #define OCFS2_AOPS_H | 23 | #define OCFS2_AOPS_H |
| 24 | 24 | ||
| 25 | int ocfs2_prepare_write(struct file *file, struct page *page, | 25 | int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page, |
| 26 | unsigned from, unsigned to); | 26 | unsigned from, unsigned to); |
| 27 | 27 | ||
| 28 | struct ocfs2_journal_handle *ocfs2_start_walk_page_trans(struct inode *inode, | 28 | struct ocfs2_journal_handle *ocfs2_start_walk_page_trans(struct inode *inode, |
| 29 | struct page *page, | 29 | struct page *page, |
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index 4601fc256f11..1a5c69071df6 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c | |||
| @@ -569,7 +569,7 @@ static int ocfs2_extent_map_insert(struct inode *inode, | |||
| 569 | 569 | ||
| 570 | ret = -ENOMEM; | 570 | ret = -ENOMEM; |
| 571 | ctxt.new_ent = kmem_cache_alloc(ocfs2_em_ent_cachep, | 571 | ctxt.new_ent = kmem_cache_alloc(ocfs2_em_ent_cachep, |
| 572 | GFP_KERNEL); | 572 | GFP_NOFS); |
| 573 | if (!ctxt.new_ent) { | 573 | if (!ctxt.new_ent) { |
| 574 | mlog_errno(ret); | 574 | mlog_errno(ret); |
| 575 | return ret; | 575 | return ret; |
| @@ -583,14 +583,14 @@ static int ocfs2_extent_map_insert(struct inode *inode, | |||
| 583 | if (ctxt.need_left && !ctxt.left_ent) { | 583 | if (ctxt.need_left && !ctxt.left_ent) { |
| 584 | ctxt.left_ent = | 584 | ctxt.left_ent = |
| 585 | kmem_cache_alloc(ocfs2_em_ent_cachep, | 585 | kmem_cache_alloc(ocfs2_em_ent_cachep, |
| 586 | GFP_KERNEL); | 586 | GFP_NOFS); |
| 587 | if (!ctxt.left_ent) | 587 | if (!ctxt.left_ent) |
| 588 | break; | 588 | break; |
| 589 | } | 589 | } |
| 590 | if (ctxt.need_right && !ctxt.right_ent) { | 590 | if (ctxt.need_right && !ctxt.right_ent) { |
| 591 | ctxt.right_ent = | 591 | ctxt.right_ent = |
| 592 | kmem_cache_alloc(ocfs2_em_ent_cachep, | 592 | kmem_cache_alloc(ocfs2_em_ent_cachep, |
| 593 | GFP_KERNEL); | 593 | GFP_NOFS); |
| 594 | if (!ctxt.right_ent) | 594 | if (!ctxt.right_ent) |
| 595 | break; | 595 | break; |
| 596 | } | 596 | } |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 581eb451a41a..a9559c874530 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
| @@ -613,7 +613,8 @@ leave: | |||
| 613 | 613 | ||
| 614 | /* Some parts of this taken from generic_cont_expand, which turned out | 614 | /* Some parts of this taken from generic_cont_expand, which turned out |
| 615 | * to be too fragile to do exactly what we need without us having to | 615 | * to be too fragile to do exactly what we need without us having to |
| 616 | * worry about recursive locking in ->commit_write(). */ | 616 | * worry about recursive locking in ->prepare_write() and |
| 617 | * ->commit_write(). */ | ||
| 617 | static int ocfs2_write_zero_page(struct inode *inode, | 618 | static int ocfs2_write_zero_page(struct inode *inode, |
| 618 | u64 size) | 619 | u64 size) |
| 619 | { | 620 | { |
| @@ -641,7 +642,7 @@ static int ocfs2_write_zero_page(struct inode *inode, | |||
| 641 | goto out; | 642 | goto out; |
| 642 | } | 643 | } |
| 643 | 644 | ||
| 644 | ret = ocfs2_prepare_write(NULL, page, offset, offset); | 645 | ret = ocfs2_prepare_write_nolock(inode, page, offset, offset); |
| 645 | if (ret < 0) { | 646 | if (ret < 0) { |
| 646 | mlog_errno(ret); | 647 | mlog_errno(ret); |
| 647 | goto out_unlock; | 648 | goto out_unlock; |
| @@ -695,13 +696,26 @@ out: | |||
| 695 | return ret; | 696 | return ret; |
| 696 | } | 697 | } |
| 697 | 698 | ||
| 699 | /* | ||
| 700 | * A tail_to_skip value > 0 indicates that we're being called from | ||
| 701 | * ocfs2_file_aio_write(). This has the following implications: | ||
| 702 | * | ||
| 703 | * - we don't want to update i_size | ||
| 704 | * - di_bh will be NULL, which is fine because it's only used in the | ||
| 705 | * case where we want to update i_size. | ||
| 706 | * - ocfs2_zero_extend() will then only be filling the hole created | ||
| 707 | * between i_size and the start of the write. | ||
| 708 | */ | ||
| 698 | static int ocfs2_extend_file(struct inode *inode, | 709 | static int ocfs2_extend_file(struct inode *inode, |
| 699 | struct buffer_head *di_bh, | 710 | struct buffer_head *di_bh, |
| 700 | u64 new_i_size) | 711 | u64 new_i_size, |
| 712 | size_t tail_to_skip) | ||
| 701 | { | 713 | { |
| 702 | int ret = 0; | 714 | int ret = 0; |
| 703 | u32 clusters_to_add; | 715 | u32 clusters_to_add; |
| 704 | 716 | ||
| 717 | BUG_ON(!tail_to_skip && !di_bh); | ||
| 718 | |||
| 705 | /* setattr sometimes calls us like this. */ | 719 | /* setattr sometimes calls us like this. */ |
| 706 | if (new_i_size == 0) | 720 | if (new_i_size == 0) |
| 707 | goto out; | 721 | goto out; |
| @@ -714,27 +728,44 @@ static int ocfs2_extend_file(struct inode *inode, | |||
| 714 | OCFS2_I(inode)->ip_clusters; | 728 | OCFS2_I(inode)->ip_clusters; |
| 715 | 729 | ||
| 716 | if (clusters_to_add) { | 730 | if (clusters_to_add) { |
| 717 | ret = ocfs2_extend_allocation(inode, clusters_to_add); | 731 | /* |
| 732 | * protect the pages that ocfs2_zero_extend is going to | ||
| 733 | * be pulling into the page cache.. we do this before the | ||
| 734 | * metadata extend so that we don't get into the situation | ||
| 735 | * where we've extended the metadata but can't get the data | ||
| 736 | * lock to zero. | ||
| 737 | */ | ||
| 738 | ret = ocfs2_data_lock(inode, 1); | ||
| 718 | if (ret < 0) { | 739 | if (ret < 0) { |
| 719 | mlog_errno(ret); | 740 | mlog_errno(ret); |
| 720 | goto out; | 741 | goto out; |
| 721 | } | 742 | } |
| 722 | 743 | ||
| 723 | ret = ocfs2_zero_extend(inode, new_i_size); | 744 | ret = ocfs2_extend_allocation(inode, clusters_to_add); |
| 724 | if (ret < 0) { | 745 | if (ret < 0) { |
| 725 | mlog_errno(ret); | 746 | mlog_errno(ret); |
| 726 | goto out; | 747 | goto out_unlock; |
| 727 | } | 748 | } |
| 728 | } | ||
| 729 | 749 | ||
| 730 | /* No allocation required, we just use this helper to | 750 | ret = ocfs2_zero_extend(inode, (u64)new_i_size - tail_to_skip); |
| 731 | * do a trivial update of i_size. */ | 751 | if (ret < 0) { |
| 732 | ret = ocfs2_simple_size_update(inode, di_bh, new_i_size); | 752 | mlog_errno(ret); |
| 733 | if (ret < 0) { | 753 | goto out_unlock; |
| 734 | mlog_errno(ret); | 754 | } |
| 735 | goto out; | 755 | } |
| 756 | |||
| 757 | if (!tail_to_skip) { | ||
| 758 | /* We're being called from ocfs2_setattr() which wants | ||
| 759 | * us to update i_size */ | ||
| 760 | ret = ocfs2_simple_size_update(inode, di_bh, new_i_size); | ||
| 761 | if (ret < 0) | ||
| 762 | mlog_errno(ret); | ||
| 736 | } | 763 | } |
| 737 | 764 | ||
| 765 | out_unlock: | ||
| 766 | if (clusters_to_add) /* this is the only case in which we lock */ | ||
| 767 | ocfs2_data_unlock(inode, 1); | ||
| 768 | |||
| 738 | out: | 769 | out: |
| 739 | return ret; | 770 | return ret; |
| 740 | } | 771 | } |
| @@ -793,7 +824,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 793 | if (i_size_read(inode) > attr->ia_size) | 824 | if (i_size_read(inode) > attr->ia_size) |
| 794 | status = ocfs2_truncate_file(inode, bh, attr->ia_size); | 825 | status = ocfs2_truncate_file(inode, bh, attr->ia_size); |
| 795 | else | 826 | else |
| 796 | status = ocfs2_extend_file(inode, bh, attr->ia_size); | 827 | status = ocfs2_extend_file(inode, bh, attr->ia_size, 0); |
| 797 | if (status < 0) { | 828 | if (status < 0) { |
| 798 | if (status != -ENOSPC) | 829 | if (status != -ENOSPC) |
| 799 | mlog_errno(status); | 830 | mlog_errno(status); |
| @@ -1049,21 +1080,12 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, | |||
| 1049 | if (!clusters) | 1080 | if (!clusters) |
| 1050 | break; | 1081 | break; |
| 1051 | 1082 | ||
| 1052 | ret = ocfs2_extend_allocation(inode, clusters); | 1083 | ret = ocfs2_extend_file(inode, NULL, newsize, count); |
| 1053 | if (ret < 0) { | 1084 | if (ret < 0) { |
| 1054 | if (ret != -ENOSPC) | 1085 | if (ret != -ENOSPC) |
| 1055 | mlog_errno(ret); | 1086 | mlog_errno(ret); |
| 1056 | goto out; | 1087 | goto out; |
| 1057 | } | 1088 | } |
| 1058 | |||
| 1059 | /* Fill any holes which would've been created by this | ||
| 1060 | * write. If we're O_APPEND, this will wind up | ||
| 1061 | * (correctly) being a noop. */ | ||
| 1062 | ret = ocfs2_zero_extend(inode, (u64) newsize - count); | ||
| 1063 | if (ret < 0) { | ||
| 1064 | mlog_errno(ret); | ||
| 1065 | goto out; | ||
| 1066 | } | ||
| 1067 | break; | 1089 | break; |
| 1068 | } | 1090 | } |
| 1069 | 1091 | ||
| @@ -1146,6 +1168,22 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, | |||
| 1146 | ocfs2_iocb_set_rw_locked(iocb); | 1168 | ocfs2_iocb_set_rw_locked(iocb); |
| 1147 | } | 1169 | } |
| 1148 | 1170 | ||
| 1171 | /* | ||
| 1172 | * We're fine letting folks race truncates and extending | ||
| 1173 | * writes with read across the cluster, just like they can | ||
| 1174 | * locally. Hence no rw_lock during read. | ||
| 1175 | * | ||
| 1176 | * Take and drop the meta data lock to update inode fields | ||
| 1177 | * like i_size. This allows the checks down below | ||
| 1178 | * generic_file_aio_read() a chance of actually working. | ||
| 1179 | */ | ||
| 1180 | ret = ocfs2_meta_lock(inode, NULL, NULL, 0); | ||
| 1181 | if (ret < 0) { | ||
| 1182 | mlog_errno(ret); | ||
| 1183 | goto bail; | ||
| 1184 | } | ||
| 1185 | ocfs2_meta_unlock(inode, 0); | ||
| 1186 | |||
| 1149 | ret = generic_file_aio_read(iocb, buf, count, iocb->ki_pos); | 1187 | ret = generic_file_aio_read(iocb, buf, count, iocb->ki_pos); |
| 1150 | if (ret == -EINVAL) | 1188 | if (ret == -EINVAL) |
| 1151 | mlog(ML_ERROR, "generic_file_aio_read returned -EINVAL\n"); | 1189 | mlog(ML_ERROR, "generic_file_aio_read returned -EINVAL\n"); |
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 6a610ae53583..eebc3cfa6be8 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
| @@ -117,7 +117,7 @@ struct ocfs2_journal_handle *ocfs2_alloc_handle(struct ocfs2_super *osb) | |||
| 117 | { | 117 | { |
| 118 | struct ocfs2_journal_handle *retval = NULL; | 118 | struct ocfs2_journal_handle *retval = NULL; |
| 119 | 119 | ||
| 120 | retval = kcalloc(1, sizeof(*retval), GFP_KERNEL); | 120 | retval = kcalloc(1, sizeof(*retval), GFP_NOFS); |
| 121 | if (!retval) { | 121 | if (!retval) { |
| 122 | mlog(ML_ERROR, "Failed to allocate memory for journal " | 122 | mlog(ML_ERROR, "Failed to allocate memory for journal " |
| 123 | "handle!\n"); | 123 | "handle!\n"); |
| @@ -870,9 +870,11 @@ static int ocfs2_force_read_journal(struct inode *inode) | |||
| 870 | if (p_blocks > CONCURRENT_JOURNAL_FILL) | 870 | if (p_blocks > CONCURRENT_JOURNAL_FILL) |
| 871 | p_blocks = CONCURRENT_JOURNAL_FILL; | 871 | p_blocks = CONCURRENT_JOURNAL_FILL; |
| 872 | 872 | ||
| 873 | /* We are reading journal data which should not | ||
| 874 | * be put in the uptodate cache */ | ||
| 873 | status = ocfs2_read_blocks(OCFS2_SB(inode->i_sb), | 875 | status = ocfs2_read_blocks(OCFS2_SB(inode->i_sb), |
| 874 | p_blkno, p_blocks, bhs, 0, | 876 | p_blkno, p_blocks, bhs, 0, |
| 875 | inode); | 877 | NULL); |
| 876 | if (status < 0) { | 878 | if (status < 0) { |
| 877 | mlog_errno(status); | 879 | mlog_errno(status); |
| 878 | goto bail; | 880 | goto bail; |
| @@ -982,7 +984,7 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, | |||
| 982 | { | 984 | { |
| 983 | struct ocfs2_la_recovery_item *item; | 985 | struct ocfs2_la_recovery_item *item; |
| 984 | 986 | ||
| 985 | item = kmalloc(sizeof(struct ocfs2_la_recovery_item), GFP_KERNEL); | 987 | item = kmalloc(sizeof(struct ocfs2_la_recovery_item), GFP_NOFS); |
| 986 | if (!item) { | 988 | if (!item) { |
| 987 | /* Though we wish to avoid it, we are in fact safe in | 989 | /* Though we wish to avoid it, we are in fact safe in |
| 988 | * skipping local alloc cleanup as fsck.ocfs2 is more | 990 | * skipping local alloc cleanup as fsck.ocfs2 is more |
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c index 04a684dfdd96..b8a00a793326 100644 --- a/fs/ocfs2/uptodate.c +++ b/fs/ocfs2/uptodate.c | |||
| @@ -337,7 +337,7 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi, | |||
| 337 | (unsigned long long)oi->ip_blkno, | 337 | (unsigned long long)oi->ip_blkno, |
| 338 | (unsigned long long)block, expand_tree); | 338 | (unsigned long long)block, expand_tree); |
| 339 | 339 | ||
| 340 | new = kmem_cache_alloc(ocfs2_uptodate_cachep, GFP_KERNEL); | 340 | new = kmem_cache_alloc(ocfs2_uptodate_cachep, GFP_NOFS); |
| 341 | if (!new) { | 341 | if (!new) { |
| 342 | mlog_errno(-ENOMEM); | 342 | mlog_errno(-ENOMEM); |
| 343 | return; | 343 | return; |
| @@ -349,7 +349,7 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi, | |||
| 349 | * has no way of tracking that. */ | 349 | * has no way of tracking that. */ |
| 350 | for(i = 0; i < OCFS2_INODE_MAX_CACHE_ARRAY; i++) { | 350 | for(i = 0; i < OCFS2_INODE_MAX_CACHE_ARRAY; i++) { |
| 351 | tree[i] = kmem_cache_alloc(ocfs2_uptodate_cachep, | 351 | tree[i] = kmem_cache_alloc(ocfs2_uptodate_cachep, |
| 352 | GFP_KERNEL); | 352 | GFP_NOFS); |
| 353 | if (!tree[i]) { | 353 | if (!tree[i]) { |
| 354 | mlog_errno(-ENOMEM); | 354 | mlog_errno(-ENOMEM); |
| 355 | goto out_free; | 355 | goto out_free; |
diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c index 53049a204197..ee42765a8553 100644 --- a/fs/ocfs2/vote.c +++ b/fs/ocfs2/vote.c | |||
| @@ -586,7 +586,7 @@ static struct ocfs2_net_wait_ctxt *ocfs2_new_net_wait_ctxt(unsigned int response | |||
| 586 | { | 586 | { |
| 587 | struct ocfs2_net_wait_ctxt *w; | 587 | struct ocfs2_net_wait_ctxt *w; |
| 588 | 588 | ||
| 589 | w = kcalloc(1, sizeof(*w), GFP_KERNEL); | 589 | w = kcalloc(1, sizeof(*w), GFP_NOFS); |
| 590 | if (!w) { | 590 | if (!w) { |
| 591 | mlog_errno(-ENOMEM); | 591 | mlog_errno(-ENOMEM); |
| 592 | goto bail; | 592 | goto bail; |
| @@ -749,7 +749,7 @@ static struct ocfs2_vote_msg * ocfs2_new_vote_request(struct ocfs2_super *osb, | |||
| 749 | 749 | ||
| 750 | BUG_ON(!ocfs2_is_valid_vote_request(type)); | 750 | BUG_ON(!ocfs2_is_valid_vote_request(type)); |
| 751 | 751 | ||
| 752 | request = kcalloc(1, sizeof(*request), GFP_KERNEL); | 752 | request = kcalloc(1, sizeof(*request), GFP_NOFS); |
| 753 | if (!request) { | 753 | if (!request) { |
| 754 | mlog_errno(-ENOMEM); | 754 | mlog_errno(-ENOMEM); |
| 755 | } else { | 755 | } else { |
| @@ -1129,7 +1129,7 @@ static int ocfs2_handle_vote_message(struct o2net_msg *msg, | |||
| 1129 | struct ocfs2_super *osb = data; | 1129 | struct ocfs2_super *osb = data; |
| 1130 | struct ocfs2_vote_work *work; | 1130 | struct ocfs2_vote_work *work; |
| 1131 | 1131 | ||
| 1132 | work = kmalloc(sizeof(struct ocfs2_vote_work), GFP_KERNEL); | 1132 | work = kmalloc(sizeof(struct ocfs2_vote_work), GFP_NOFS); |
| 1133 | if (!work) { | 1133 | if (!work) { |
| 1134 | status = -ENOMEM; | 1134 | status = -ENOMEM; |
| 1135 | mlog_errno(status); | 1135 | mlog_errno(status); |
