aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJames Bottomley <jejb@mulgrave.il.steeleye.com>2006-06-10 14:47:26 -0400
committerJames Bottomley <jejb@mulgrave.il.steeleye.com>2006-06-10 14:47:26 -0400
commitf0cd91a68acdc9b49d7f6738b514a426da627649 (patch)
tree8ad73564015794197583b094217ae0a71e71e753 /fs
parent60eef25701d25e99c991dd0f4a9f3832a0c3ad3e (diff)
parent128e6ced247cda88f96fa9f2e4ba8b2c4a681560 (diff)
Merge ../linux-2.6
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/fcall.c21
-rw-r--r--fs/9p/mux.c222
-rw-r--r--fs/9p/mux.h4
-rw-r--r--fs/9p/vfs_file.c13
-rw-r--r--fs/9p/vfs_inode.c19
-rw-r--r--fs/Kconfig6
-rw-r--r--fs/Makefile2
-rw-r--r--fs/affs/namei.c3
-rw-r--r--fs/autofs4/autofs_i.h5
-rw-r--r--fs/autofs4/root.c10
-rw-r--r--fs/autofs4/waitq.c77
-rw-r--r--fs/binfmt_flat.c30
-rw-r--r--fs/bio.c3
-rw-r--r--fs/block_dev.c2
-rw-r--r--fs/cifs/CHANGES13
-rw-r--r--fs/cifs/README8
-rw-r--r--fs/cifs/cifsfs.c99
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsproto.h2
-rw-r--r--fs/cifs/cifssmb.c42
-rw-r--r--fs/cifs/connect.c103
-rw-r--r--fs/cifs/dir.c18
-rw-r--r--fs/cifs/fcntl.c2
-rw-r--r--fs/cifs/file.c44
-rw-r--r--fs/cifs/inode.c6
-rw-r--r--fs/cifs/link.c6
-rw-r--r--fs/cifs/ntlmssp.c14
-rw-r--r--fs/cifs/readdir.c45
-rw-r--r--fs/cifs/xattr.c8
-rw-r--r--fs/compat.c195
-rw-r--r--fs/configfs/dir.c137
-rw-r--r--fs/debugfs/inode.c3
-rw-r--r--fs/exec.c29
-rw-r--r--fs/exportfs/expfs.c2
-rw-r--r--fs/ext3/inode.c13
-rw-r--r--fs/ext3/ioctl.c18
-rw-r--r--fs/ext3/resize.c2
-rw-r--r--fs/fuse/dev.c39
-rw-r--r--fs/fuse/file.c10
-rw-r--r--fs/fuse/fuse_i.h3
-rw-r--r--fs/fuse/inode.c13
-rw-r--r--fs/inotify.c9
-rw-r--r--fs/jffs2/nodelist.c6
-rw-r--r--fs/jfs/jfs_metapage.c20
-rw-r--r--fs/lockd/svclock.c2
-rw-r--r--fs/locks.c30
-rw-r--r--fs/namei.c19
-rw-r--r--fs/namespace.c7
-rw-r--r--fs/nfs/dir.c5
-rw-r--r--fs/nfs/direct.c8
-rw-r--r--fs/nfs/file.c5
-rw-r--r--fs/nfs/inode.c5
-rw-r--r--fs/nfs/nfs4proc.c10
-rw-r--r--fs/nfsd/export.c4
-rw-r--r--fs/nfsd/vfs.c7
-rw-r--r--fs/ocfs2/aops.c46
-rw-r--r--fs/ocfs2/aops.h4
-rw-r--r--fs/ocfs2/extent_map.c6
-rw-r--r--fs/ocfs2/file.c86
-rw-r--r--fs/ocfs2/journal.c8
-rw-r--r--fs/ocfs2/uptodate.c4
-rw-r--r--fs/ocfs2/vote.c6
-rw-r--r--fs/open.c25
-rw-r--r--fs/partitions/check.c46
-rw-r--r--fs/pipe.c189
-rw-r--r--fs/proc/base.c21
-rw-r--r--fs/reiserfs/xattr_acl.c5
-rw-r--r--fs/smbfs/dir.c5
-rw-r--r--fs/smbfs/request.c4
-rw-r--r--fs/splice.c963
-rw-r--r--fs/stat.c2
-rw-r--r--fs/sysfs/dir.c1
-rw-r--r--fs/sysfs/file.c76
-rw-r--r--fs/sysfs/sysfs.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c12
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c14
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.h4
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h12
-rw-r--r--fs/xfs/xfs_alloc.c5
-rw-r--r--fs/xfs/xfs_rename.c12
-rw-r--r--fs/xfs/xfs_vfsops.c27
-rw-r--r--fs/xfs/xfs_vnodeops.c2
82 files changed, 2101 insertions, 915 deletions
diff --git a/fs/9p/fcall.c b/fs/9p/fcall.c
index 71742ba150c4..6f2617820a4e 100644
--- a/fs/9p/fcall.c
+++ b/fs/9p/fcall.c
@@ -98,23 +98,20 @@ v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
98static void v9fs_t_clunk_cb(void *a, struct v9fs_fcall *tc, 98static void v9fs_t_clunk_cb(void *a, struct v9fs_fcall *tc,
99 struct v9fs_fcall *rc, int err) 99 struct v9fs_fcall *rc, int err)
100{ 100{
101 int fid; 101 int fid, id;
102 struct v9fs_session_info *v9ses; 102 struct v9fs_session_info *v9ses;
103 103
104 if (err) 104 id = 0;
105 return;
106
107 fid = tc->params.tclunk.fid; 105 fid = tc->params.tclunk.fid;
108 kfree(tc); 106 if (rc)
109 107 id = rc->id;
110 if (!rc)
111 return;
112
113 v9ses = a;
114 if (rc->id == RCLUNK)
115 v9fs_put_idpool(fid, &v9ses->fidpool);
116 108
109 kfree(tc);
117 kfree(rc); 110 kfree(rc);
111 if (id == RCLUNK) {
112 v9ses = a;
113 v9fs_put_idpool(fid, &v9ses->fidpool);
114 }
118} 115}
119 116
120/** 117/**
diff --git a/fs/9p/mux.c b/fs/9p/mux.c
index 3e5b124a7212..f4407eb276c7 100644
--- a/fs/9p/mux.c
+++ b/fs/9p/mux.c
@@ -50,15 +50,23 @@ enum {
50 Wpending = 8, /* can write */ 50 Wpending = 8, /* can write */
51}; 51};
52 52
53enum {
54 None,
55 Flushing,
56 Flushed,
57};
58
53struct v9fs_mux_poll_task; 59struct v9fs_mux_poll_task;
54 60
55struct v9fs_req { 61struct v9fs_req {
62 spinlock_t lock;
56 int tag; 63 int tag;
57 struct v9fs_fcall *tcall; 64 struct v9fs_fcall *tcall;
58 struct v9fs_fcall *rcall; 65 struct v9fs_fcall *rcall;
59 int err; 66 int err;
60 v9fs_mux_req_callback cb; 67 v9fs_mux_req_callback cb;
61 void *cba; 68 void *cba;
69 int flush;
62 struct list_head req_list; 70 struct list_head req_list;
63}; 71};
64 72
@@ -96,8 +104,8 @@ struct v9fs_mux_poll_task {
96 104
97struct v9fs_mux_rpc { 105struct v9fs_mux_rpc {
98 struct v9fs_mux_data *m; 106 struct v9fs_mux_data *m;
99 struct v9fs_req *req;
100 int err; 107 int err;
108 struct v9fs_fcall *tcall;
101 struct v9fs_fcall *rcall; 109 struct v9fs_fcall *rcall;
102 wait_queue_head_t wqueue; 110 wait_queue_head_t wqueue;
103}; 111};
@@ -524,10 +532,9 @@ again:
524 532
525static void process_request(struct v9fs_mux_data *m, struct v9fs_req *req) 533static void process_request(struct v9fs_mux_data *m, struct v9fs_req *req)
526{ 534{
527 int ecode, tag; 535 int ecode;
528 struct v9fs_str *ename; 536 struct v9fs_str *ename;
529 537
530 tag = req->tag;
531 if (!req->err && req->rcall->id == RERROR) { 538 if (!req->err && req->rcall->id == RERROR) {
532 ecode = req->rcall->params.rerror.errno; 539 ecode = req->rcall->params.rerror.errno;
533 ename = &req->rcall->params.rerror.error; 540 ename = &req->rcall->params.rerror.error;
@@ -553,23 +560,6 @@ static void process_request(struct v9fs_mux_data *m, struct v9fs_req *req)
553 if (!req->err) 560 if (!req->err)
554 req->err = -EIO; 561 req->err = -EIO;
555 } 562 }
556
557 if (req->err == ERREQFLUSH)
558 return;
559
560 if (req->cb) {
561 dprintk(DEBUG_MUX, "calling callback tcall %p rcall %p\n",
562 req->tcall, req->rcall);
563
564 (*req->cb) (req->cba, req->tcall, req->rcall, req->err);
565 req->cb = NULL;
566 } else
567 kfree(req->rcall);
568
569 v9fs_mux_put_tag(m, tag);
570
571 wake_up(&m->equeue);
572 kfree(req);
573} 563}
574 564
575/** 565/**
@@ -669,17 +659,26 @@ static void v9fs_read_work(void *a)
669 list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) { 659 list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) {
670 if (rreq->tag == rcall->tag) { 660 if (rreq->tag == rcall->tag) {
671 req = rreq; 661 req = rreq;
672 req->rcall = rcall; 662 if (req->flush != Flushing)
673 list_del(&req->req_list); 663 list_del(&req->req_list);
674 spin_unlock(&m->lock);
675 process_request(m, req);
676 break; 664 break;
677 } 665 }
678
679 } 666 }
667 spin_unlock(&m->lock);
680 668
681 if (!req) { 669 if (req) {
682 spin_unlock(&m->lock); 670 req->rcall = rcall;
671 process_request(m, req);
672
673 if (req->flush != Flushing) {
674 if (req->cb)
675 (*req->cb) (req, req->cba);
676 else
677 kfree(req->rcall);
678
679 wake_up(&m->equeue);
680 }
681 } else {
683 if (err >= 0 && rcall->id != RFLUSH) 682 if (err >= 0 && rcall->id != RFLUSH)
684 dprintk(DEBUG_ERROR, 683 dprintk(DEBUG_ERROR,
685 "unexpected response mux %p id %d tag %d\n", 684 "unexpected response mux %p id %d tag %d\n",
@@ -746,7 +745,6 @@ static struct v9fs_req *v9fs_send_request(struct v9fs_mux_data *m,
746 return ERR_PTR(-ENOMEM); 745 return ERR_PTR(-ENOMEM);
747 746
748 v9fs_set_tag(tc, n); 747 v9fs_set_tag(tc, n);
749
750 if ((v9fs_debug_level&DEBUG_FCALL) == DEBUG_FCALL) { 748 if ((v9fs_debug_level&DEBUG_FCALL) == DEBUG_FCALL) {
751 char buf[150]; 749 char buf[150];
752 750
@@ -754,12 +752,14 @@ static struct v9fs_req *v9fs_send_request(struct v9fs_mux_data *m,
754 printk(KERN_NOTICE "<<< %p %s\n", m, buf); 752 printk(KERN_NOTICE "<<< %p %s\n", m, buf);
755 } 753 }
756 754
755 spin_lock_init(&req->lock);
757 req->tag = n; 756 req->tag = n;
758 req->tcall = tc; 757 req->tcall = tc;
759 req->rcall = NULL; 758 req->rcall = NULL;
760 req->err = 0; 759 req->err = 0;
761 req->cb = cb; 760 req->cb = cb;
762 req->cba = cba; 761 req->cba = cba;
762 req->flush = None;
763 763
764 spin_lock(&m->lock); 764 spin_lock(&m->lock);
765 list_add_tail(&req->req_list, &m->unsent_req_list); 765 list_add_tail(&req->req_list, &m->unsent_req_list);
@@ -776,72 +776,108 @@ static struct v9fs_req *v9fs_send_request(struct v9fs_mux_data *m,
776 return req; 776 return req;
777} 777}
778 778
779static void v9fs_mux_flush_cb(void *a, struct v9fs_fcall *tc, 779static void v9fs_mux_free_request(struct v9fs_mux_data *m, struct v9fs_req *req)
780 struct v9fs_fcall *rc, int err) 780{
781 v9fs_mux_put_tag(m, req->tag);
782 kfree(req);
783}
784
785static void v9fs_mux_flush_cb(struct v9fs_req *freq, void *a)
781{ 786{
782 v9fs_mux_req_callback cb; 787 v9fs_mux_req_callback cb;
783 int tag; 788 int tag;
784 struct v9fs_mux_data *m; 789 struct v9fs_mux_data *m;
785 struct v9fs_req *req, *rptr; 790 struct v9fs_req *req, *rreq, *rptr;
786 791
787 m = a; 792 m = a;
788 dprintk(DEBUG_MUX, "mux %p tc %p rc %p err %d oldtag %d\n", m, tc, 793 dprintk(DEBUG_MUX, "mux %p tc %p rc %p err %d oldtag %d\n", m,
789 rc, err, tc->params.tflush.oldtag); 794 freq->tcall, freq->rcall, freq->err,
795 freq->tcall->params.tflush.oldtag);
790 796
791 spin_lock(&m->lock); 797 spin_lock(&m->lock);
792 cb = NULL; 798 cb = NULL;
793 tag = tc->params.tflush.oldtag; 799 tag = freq->tcall->params.tflush.oldtag;
794 list_for_each_entry_safe(req, rptr, &m->req_list, req_list) { 800 req = NULL;
795 if (req->tag == tag) { 801 list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) {
802 if (rreq->tag == tag) {
803 req = rreq;
796 list_del(&req->req_list); 804 list_del(&req->req_list);
797 if (req->cb) {
798 cb = req->cb;
799 req->cb = NULL;
800 spin_unlock(&m->lock);
801 (*cb) (req->cba, req->tcall, req->rcall,
802 req->err);
803 }
804 kfree(req);
805 wake_up(&m->equeue);
806 break; 805 break;
807 } 806 }
808 } 807 }
808 spin_unlock(&m->lock);
809 809
810 if (!cb) 810 if (req) {
811 spin_unlock(&m->lock); 811 spin_lock(&req->lock);
812 req->flush = Flushed;
813 spin_unlock(&req->lock);
814
815 if (req->cb)
816 (*req->cb) (req, req->cba);
817 else
818 kfree(req->rcall);
819
820 wake_up(&m->equeue);
821 }
812 822
813 v9fs_mux_put_tag(m, tag); 823 kfree(freq->tcall);
814 kfree(tc); 824 kfree(freq->rcall);
815 kfree(rc); 825 v9fs_mux_free_request(m, freq);
816} 826}
817 827
818static void 828static int
819v9fs_mux_flush_request(struct v9fs_mux_data *m, struct v9fs_req *req) 829v9fs_mux_flush_request(struct v9fs_mux_data *m, struct v9fs_req *req)
820{ 830{
821 struct v9fs_fcall *fc; 831 struct v9fs_fcall *fc;
832 struct v9fs_req *rreq, *rptr;
822 833
823 dprintk(DEBUG_MUX, "mux %p req %p tag %d\n", m, req, req->tag); 834 dprintk(DEBUG_MUX, "mux %p req %p tag %d\n", m, req, req->tag);
824 835
836 /* if a response was received for a request, do nothing */
837 spin_lock(&req->lock);
838 if (req->rcall || req->err) {
839 spin_unlock(&req->lock);
840 dprintk(DEBUG_MUX, "mux %p req %p response already received\n", m, req);
841 return 0;
842 }
843
844 req->flush = Flushing;
845 spin_unlock(&req->lock);
846
847 spin_lock(&m->lock);
848 /* if the request is not sent yet, just remove it from the list */
849 list_for_each_entry_safe(rreq, rptr, &m->unsent_req_list, req_list) {
850 if (rreq->tag == req->tag) {
851 dprintk(DEBUG_MUX, "mux %p req %p request is not sent yet\n", m, req);
852 list_del(&rreq->req_list);
853 req->flush = Flushed;
854 spin_unlock(&m->lock);
855 if (req->cb)
856 (*req->cb) (req, req->cba);
857 return 0;
858 }
859 }
860 spin_unlock(&m->lock);
861
862 clear_thread_flag(TIF_SIGPENDING);
825 fc = v9fs_create_tflush(req->tag); 863 fc = v9fs_create_tflush(req->tag);
826 v9fs_send_request(m, fc, v9fs_mux_flush_cb, m); 864 v9fs_send_request(m, fc, v9fs_mux_flush_cb, m);
865 return 1;
827} 866}
828 867
829static void 868static void
830v9fs_mux_rpc_cb(void *a, struct v9fs_fcall *tc, struct v9fs_fcall *rc, int err) 869v9fs_mux_rpc_cb(struct v9fs_req *req, void *a)
831{ 870{
832 struct v9fs_mux_rpc *r; 871 struct v9fs_mux_rpc *r;
833 872
834 if (err == ERREQFLUSH) { 873 dprintk(DEBUG_MUX, "req %p r %p\n", req, a);
835 kfree(rc);
836 dprintk(DEBUG_MUX, "err req flush\n");
837 return;
838 }
839
840 r = a; 874 r = a;
841 dprintk(DEBUG_MUX, "mux %p req %p tc %p rc %p err %d\n", r->m, r->req, 875 r->rcall = req->rcall;
842 tc, rc, err); 876 r->err = req->err;
843 r->rcall = rc; 877
844 r->err = err; 878 if (req->flush!=None && !req->err)
879 r->err = -ERESTARTSYS;
880
845 wake_up(&r->wqueue); 881 wake_up(&r->wqueue);
846} 882}
847 883
@@ -856,12 +892,13 @@ int
856v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc, 892v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
857 struct v9fs_fcall **rc) 893 struct v9fs_fcall **rc)
858{ 894{
859 int err; 895 int err, sigpending;
860 unsigned long flags; 896 unsigned long flags;
861 struct v9fs_req *req; 897 struct v9fs_req *req;
862 struct v9fs_mux_rpc r; 898 struct v9fs_mux_rpc r;
863 899
864 r.err = 0; 900 r.err = 0;
901 r.tcall = tc;
865 r.rcall = NULL; 902 r.rcall = NULL;
866 r.m = m; 903 r.m = m;
867 init_waitqueue_head(&r.wqueue); 904 init_waitqueue_head(&r.wqueue);
@@ -869,48 +906,50 @@ v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
869 if (rc) 906 if (rc)
870 *rc = NULL; 907 *rc = NULL;
871 908
909 sigpending = 0;
910 if (signal_pending(current)) {
911 sigpending = 1;
912 clear_thread_flag(TIF_SIGPENDING);
913 }
914
872 req = v9fs_send_request(m, tc, v9fs_mux_rpc_cb, &r); 915 req = v9fs_send_request(m, tc, v9fs_mux_rpc_cb, &r);
873 if (IS_ERR(req)) { 916 if (IS_ERR(req)) {
874 err = PTR_ERR(req); 917 err = PTR_ERR(req);
875 dprintk(DEBUG_MUX, "error %d\n", err); 918 dprintk(DEBUG_MUX, "error %d\n", err);
876 return PTR_ERR(req); 919 return err;
877 } 920 }
878 921
879 r.req = req;
880 dprintk(DEBUG_MUX, "mux %p tc %p tag %d rpc %p req %p\n", m, tc,
881 req->tag, &r, req);
882 err = wait_event_interruptible(r.wqueue, r.rcall != NULL || r.err < 0); 922 err = wait_event_interruptible(r.wqueue, r.rcall != NULL || r.err < 0);
883 if (r.err < 0) 923 if (r.err < 0)
884 err = r.err; 924 err = r.err;
885 925
886 if (err == -ERESTARTSYS && m->trans->status == Connected && m->err == 0) { 926 if (err == -ERESTARTSYS && m->trans->status == Connected && m->err == 0) {
887 spin_lock(&m->lock); 927 if (v9fs_mux_flush_request(m, req)) {
888 req->tcall = NULL; 928 /* wait until we get response of the flush message */
889 req->err = ERREQFLUSH; 929 do {
890 spin_unlock(&m->lock); 930 clear_thread_flag(TIF_SIGPENDING);
931 err = wait_event_interruptible(r.wqueue,
932 r.rcall || r.err);
933 } while (!r.rcall && !r.err && err==-ERESTARTSYS &&
934 m->trans->status==Connected && !m->err);
935 }
936 sigpending = 1;
937 }
891 938
892 clear_thread_flag(TIF_SIGPENDING); 939 if (sigpending) {
893 v9fs_mux_flush_request(m, req);
894 spin_lock_irqsave(&current->sighand->siglock, flags); 940 spin_lock_irqsave(&current->sighand->siglock, flags);
895 recalc_sigpending(); 941 recalc_sigpending();
896 spin_unlock_irqrestore(&current->sighand->siglock, flags); 942 spin_unlock_irqrestore(&current->sighand->siglock, flags);
897 } 943 }
898 944
899 if (!err) { 945 if (rc)
900 if (r.rcall) 946 *rc = r.rcall;
901 dprintk(DEBUG_MUX, "got response id %d tag %d\n", 947 else
902 r.rcall->id, r.rcall->tag);
903
904 if (rc)
905 *rc = r.rcall;
906 else
907 kfree(r.rcall);
908 } else {
909 kfree(r.rcall); 948 kfree(r.rcall);
910 dprintk(DEBUG_MUX, "got error %d\n", err); 949
911 if (err > 0) 950 v9fs_mux_free_request(m, req);
912 err = -EIO; 951 if (err > 0)
913 } 952 err = -EIO;
914 953
915 return err; 954 return err;
916} 955}
@@ -951,12 +990,15 @@ void v9fs_mux_cancel(struct v9fs_mux_data *m, int err)
951 struct v9fs_req *req, *rtmp; 990 struct v9fs_req *req, *rtmp;
952 LIST_HEAD(cancel_list); 991 LIST_HEAD(cancel_list);
953 992
954 dprintk(DEBUG_MUX, "mux %p err %d\n", m, err); 993 dprintk(DEBUG_ERROR, "mux %p err %d\n", m, err);
955 m->err = err; 994 m->err = err;
956 spin_lock(&m->lock); 995 spin_lock(&m->lock);
957 list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { 996 list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
958 list_move(&req->req_list, &cancel_list); 997 list_move(&req->req_list, &cancel_list);
959 } 998 }
999 list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) {
1000 list_move(&req->req_list, &cancel_list);
1001 }
960 spin_unlock(&m->lock); 1002 spin_unlock(&m->lock);
961 1003
962 list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { 1004 list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
@@ -965,11 +1007,9 @@ void v9fs_mux_cancel(struct v9fs_mux_data *m, int err)
965 req->err = err; 1007 req->err = err;
966 1008
967 if (req->cb) 1009 if (req->cb)
968 (*req->cb) (req->cba, req->tcall, req->rcall, req->err); 1010 (*req->cb) (req, req->cba);
969 else 1011 else
970 kfree(req->rcall); 1012 kfree(req->rcall);
971
972 kfree(req);
973 } 1013 }
974 1014
975 wake_up(&m->equeue); 1015 wake_up(&m->equeue);
diff --git a/fs/9p/mux.h b/fs/9p/mux.h
index e90bfd32ea42..fb10c50186a1 100644
--- a/fs/9p/mux.h
+++ b/fs/9p/mux.h
@@ -24,6 +24,7 @@
24 */ 24 */
25 25
26struct v9fs_mux_data; 26struct v9fs_mux_data;
27struct v9fs_req;
27 28
28/** 29/**
29 * v9fs_mux_req_callback - callback function that is called when the 30 * v9fs_mux_req_callback - callback function that is called when the
@@ -36,8 +37,7 @@ struct v9fs_mux_data;
36 * @rc - response call 37 * @rc - response call
37 * @err - error code (non-zero if error occured) 38 * @err - error code (non-zero if error occured)
38 */ 39 */
39typedef void (*v9fs_mux_req_callback)(void *a, struct v9fs_fcall *tc, 40typedef void (*v9fs_mux_req_callback)(struct v9fs_req *req, void *a);
40 struct v9fs_fcall *rc, int err);
41 41
42int v9fs_mux_global_init(void); 42int v9fs_mux_global_init(void);
43void v9fs_mux_global_exit(void); 43void v9fs_mux_global_exit(void);
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 083dcfcd158e..1a8e46084f0e 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -72,11 +72,17 @@ int v9fs_file_open(struct inode *inode, struct file *file)
72 return -ENOSPC; 72 return -ENOSPC;
73 } 73 }
74 74
75 err = v9fs_t_walk(v9ses, vfid->fid, fid, NULL, NULL); 75 err = v9fs_t_walk(v9ses, vfid->fid, fid, NULL, &fcall);
76 if (err < 0) { 76 if (err < 0) {
77 dprintk(DEBUG_ERROR, "rewalk didn't work\n"); 77 dprintk(DEBUG_ERROR, "rewalk didn't work\n");
78 goto put_fid; 78 if (fcall && fcall->id == RWALK)
79 goto clunk_fid;
80 else {
81 v9fs_put_idpool(fid, &v9ses->fidpool);
82 goto free_fcall;
83 }
79 } 84 }
85 kfree(fcall);
80 86
81 /* TODO: do special things for O_EXCL, O_NOFOLLOW, O_SYNC */ 87 /* TODO: do special things for O_EXCL, O_NOFOLLOW, O_SYNC */
82 /* translate open mode appropriately */ 88 /* translate open mode appropriately */
@@ -109,8 +115,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
109clunk_fid: 115clunk_fid:
110 v9fs_t_clunk(v9ses, fid); 116 v9fs_t_clunk(v9ses, fid);
111 117
112put_fid: 118free_fcall:
113 v9fs_put_idpool(fid, &v9ses->fidpool);
114 kfree(fcall); 119 kfree(fcall);
115 120
116 return err; 121 return err;
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 133db366d306..2cb87ba4b1c1 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -270,7 +270,10 @@ v9fs_create(struct v9fs_session_info *v9ses, u32 pfid, char *name, u32 perm,
270 err = v9fs_t_walk(v9ses, pfid, fid, NULL, &fcall); 270 err = v9fs_t_walk(v9ses, pfid, fid, NULL, &fcall);
271 if (err < 0) { 271 if (err < 0) {
272 PRINT_FCALL_ERROR("clone error", fcall); 272 PRINT_FCALL_ERROR("clone error", fcall);
273 goto put_fid; 273 if (fcall && fcall->id == RWALK)
274 goto clunk_fid;
275 else
276 goto put_fid;
274 } 277 }
275 kfree(fcall); 278 kfree(fcall);
276 279
@@ -322,6 +325,9 @@ v9fs_clone_walk(struct v9fs_session_info *v9ses, u32 fid, struct dentry *dentry)
322 &fcall); 325 &fcall);
323 326
324 if (err < 0) { 327 if (err < 0) {
328 if (fcall && fcall->id == RWALK)
329 goto clunk_fid;
330
325 PRINT_FCALL_ERROR("walk error", fcall); 331 PRINT_FCALL_ERROR("walk error", fcall);
326 v9fs_put_idpool(nfid, &v9ses->fidpool); 332 v9fs_put_idpool(nfid, &v9ses->fidpool);
327 goto error; 333 goto error;
@@ -640,19 +646,26 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
640 } 646 }
641 647
642 result = v9fs_t_walk(v9ses, dirfidnum, newfid, 648 result = v9fs_t_walk(v9ses, dirfidnum, newfid,
643 (char *)dentry->d_name.name, NULL); 649 (char *)dentry->d_name.name, &fcall);
650
644 if (result < 0) { 651 if (result < 0) {
645 v9fs_put_idpool(newfid, &v9ses->fidpool); 652 if (fcall && fcall->id == RWALK)
653 v9fs_t_clunk(v9ses, newfid);
654 else
655 v9fs_put_idpool(newfid, &v9ses->fidpool);
656
646 if (result == -ENOENT) { 657 if (result == -ENOENT) {
647 d_add(dentry, NULL); 658 d_add(dentry, NULL);
648 dprintk(DEBUG_VFS, 659 dprintk(DEBUG_VFS,
649 "Return negative dentry %p count %d\n", 660 "Return negative dentry %p count %d\n",
650 dentry, atomic_read(&dentry->d_count)); 661 dentry, atomic_read(&dentry->d_count));
662 kfree(fcall);
651 return NULL; 663 return NULL;
652 } 664 }
653 dprintk(DEBUG_ERROR, "walk error:%d\n", result); 665 dprintk(DEBUG_ERROR, "walk error:%d\n", result);
654 goto FreeFcall; 666 goto FreeFcall;
655 } 667 }
668 kfree(fcall);
656 669
657 result = v9fs_t_stat(v9ses, newfid, &fcall); 670 result = v9fs_t_stat(v9ses, newfid, &fcall);
658 if (result < 0) { 671 if (result < 0) {
diff --git a/fs/Kconfig b/fs/Kconfig
index 2524629dc835..f9b5842c8d2d 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -842,6 +842,12 @@ config TMPFS
842config HUGETLBFS 842config HUGETLBFS
843 bool "HugeTLB file system support" 843 bool "HugeTLB file system support"
844 depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || BROKEN 844 depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || BROKEN
845 help
846 hugetlbfs is a filesystem backing for HugeTLB pages, based on
847 ramfs. For architectures that support it, say Y here and read
848 <file:Documentation/vm/hugetlbpage.txt> for details.
849
850 If unsure, say N.
845 851
846config HUGETLB_PAGE 852config HUGETLB_PAGE
847 def_bool HUGETLBFS 853 def_bool HUGETLBFS
diff --git a/fs/Makefile b/fs/Makefile
index 83bf478e786b..078d3d1191a5 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -45,6 +45,7 @@ obj-$(CONFIG_DNOTIFY) += dnotify.o
45obj-$(CONFIG_PROC_FS) += proc/ 45obj-$(CONFIG_PROC_FS) += proc/
46obj-y += partitions/ 46obj-y += partitions/
47obj-$(CONFIG_SYSFS) += sysfs/ 47obj-$(CONFIG_SYSFS) += sysfs/
48obj-$(CONFIG_CONFIGFS_FS) += configfs/
48obj-y += devpts/ 49obj-y += devpts/
49 50
50obj-$(CONFIG_PROFILING) += dcookies.o 51obj-$(CONFIG_PROFILING) += dcookies.o
@@ -100,5 +101,4 @@ obj-$(CONFIG_BEFS_FS) += befs/
100obj-$(CONFIG_HOSTFS) += hostfs/ 101obj-$(CONFIG_HOSTFS) += hostfs/
101obj-$(CONFIG_HPPFS) += hppfs/ 102obj-$(CONFIG_HPPFS) += hppfs/
102obj-$(CONFIG_DEBUG_FS) += debugfs/ 103obj-$(CONFIG_DEBUG_FS) += debugfs/
103obj-$(CONFIG_CONFIGFS_FS) += configfs/
104obj-$(CONFIG_OCFS2_FS) += ocfs2/ 104obj-$(CONFIG_OCFS2_FS) += ocfs2/
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index d4c2d636c479..a42143ca0169 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -416,10 +416,9 @@ affs_rename(struct inode *old_dir, struct dentry *old_dentry,
416 return retval; 416 return retval;
417 } 417 }
418 418
419 retval = -EIO;
420 bh = affs_bread(sb, old_dentry->d_inode->i_ino); 419 bh = affs_bread(sb, old_dentry->d_inode->i_ino);
421 if (!bh) 420 if (!bh)
422 goto done; 421 return -EIO;
423 422
424 /* Remove header from its parent directory. */ 423 /* Remove header from its parent directory. */
425 affs_lock_dir(old_dir); 424 affs_lock_dir(old_dir);
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 57c4903614e5..d6603d02304c 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -74,8 +74,8 @@ struct autofs_wait_queue {
74 struct autofs_wait_queue *next; 74 struct autofs_wait_queue *next;
75 autofs_wqt_t wait_queue_token; 75 autofs_wqt_t wait_queue_token;
76 /* We use the following to see what we are waiting for */ 76 /* We use the following to see what we are waiting for */
77 int hash; 77 unsigned int hash;
78 int len; 78 unsigned int len;
79 char *name; 79 char *name;
80 u32 dev; 80 u32 dev;
81 u64 ino; 81 u64 ino;
@@ -85,7 +85,6 @@ struct autofs_wait_queue {
85 pid_t tgid; 85 pid_t tgid;
86 /* This is for status reporting upon return */ 86 /* This is for status reporting upon return */
87 int status; 87 int status;
88 atomic_t notify;
89 atomic_t wait_ctr; 88 atomic_t wait_ctr;
90}; 89};
91 90
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 84e030c8ddd0..5100f984783f 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -327,6 +327,7 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
327static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) 327static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
328{ 328{
329 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 329 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
330 struct autofs_info *ino = autofs4_dentry_ino(dentry);
330 int oz_mode = autofs4_oz_mode(sbi); 331 int oz_mode = autofs4_oz_mode(sbi);
331 unsigned int lookup_type; 332 unsigned int lookup_type;
332 int status; 333 int status;
@@ -340,13 +341,8 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
340 if (oz_mode || !lookup_type) 341 if (oz_mode || !lookup_type)
341 goto done; 342 goto done;
342 343
343 /* 344 /* If an expire request is pending wait for it. */
344 * If a request is pending wait for it. 345 if (ino && (ino->flags & AUTOFS_INF_EXPIRING)) {
345 * If it's a mount then it won't be expired till at least
346 * a liitle later and if it's an expire then we might need
347 * to mount it again.
348 */
349 if (autofs4_ispending(dentry)) {
350 DPRINTK("waiting for active request %p name=%.*s", 346 DPRINTK("waiting for active request %p name=%.*s",
351 dentry, dentry->d_name.len, dentry->d_name.name); 347 dentry, dentry->d_name.len, dentry->d_name.name);
352 348
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 142ab6aa2aa1..ce103e7b0bc3 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -189,14 +189,30 @@ static int autofs4_getpath(struct autofs_sb_info *sbi,
189 return len; 189 return len;
190} 190}
191 191
192static struct autofs_wait_queue *
193autofs4_find_wait(struct autofs_sb_info *sbi,
194 char *name, unsigned int hash, unsigned int len)
195{
196 struct autofs_wait_queue *wq;
197
198 for (wq = sbi->queues; wq; wq = wq->next) {
199 if (wq->hash == hash &&
200 wq->len == len &&
201 wq->name && !memcmp(wq->name, name, len))
202 break;
203 }
204 return wq;
205}
206
192int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, 207int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
193 enum autofs_notify notify) 208 enum autofs_notify notify)
194{ 209{
210 struct autofs_info *ino;
195 struct autofs_wait_queue *wq; 211 struct autofs_wait_queue *wq;
196 char *name; 212 char *name;
197 unsigned int len = 0; 213 unsigned int len = 0;
198 unsigned int hash = 0; 214 unsigned int hash = 0;
199 int status; 215 int status, type;
200 216
201 /* In catatonic mode, we don't wait for nobody */ 217 /* In catatonic mode, we don't wait for nobody */
202 if (sbi->catatonic) 218 if (sbi->catatonic)
@@ -223,21 +239,41 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
223 return -EINTR; 239 return -EINTR;
224 } 240 }
225 241
226 for (wq = sbi->queues ; wq ; wq = wq->next) { 242 wq = autofs4_find_wait(sbi, name, hash, len);
227 if (wq->hash == dentry->d_name.hash && 243 ino = autofs4_dentry_ino(dentry);
228 wq->len == len && 244 if (!wq && ino && notify == NFY_NONE) {
229 wq->name && !memcmp(wq->name, name, len)) 245 /*
230 break; 246 * Either we've betean the pending expire to post it's
231 } 247 * wait or it finished while we waited on the mutex.
248 * So we need to wait till either, the wait appears
249 * or the expire finishes.
250 */
251
252 while (ino->flags & AUTOFS_INF_EXPIRING) {
253 mutex_unlock(&sbi->wq_mutex);
254 schedule_timeout_interruptible(HZ/10);
255 if (mutex_lock_interruptible(&sbi->wq_mutex)) {
256 kfree(name);
257 return -EINTR;
258 }
259 wq = autofs4_find_wait(sbi, name, hash, len);
260 if (wq)
261 break;
262 }
232 263
233 if (!wq) { 264 /*
234 /* Can't wait for an expire if there's no mount */ 265 * Not ideal but the status has already gone. Of the two
235 if (notify == NFY_NONE && !d_mountpoint(dentry)) { 266 * cases where we wait on NFY_NONE neither depend on the
267 * return status of the wait.
268 */
269 if (!wq) {
236 kfree(name); 270 kfree(name);
237 mutex_unlock(&sbi->wq_mutex); 271 mutex_unlock(&sbi->wq_mutex);
238 return -ENOENT; 272 return 0;
239 } 273 }
274 }
240 275
276 if (!wq) {
241 /* Create a new wait queue */ 277 /* Create a new wait queue */
242 wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL); 278 wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL);
243 if (!wq) { 279 if (!wq) {
@@ -263,20 +299,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
263 wq->tgid = current->tgid; 299 wq->tgid = current->tgid;
264 wq->status = -EINTR; /* Status return if interrupted */ 300 wq->status = -EINTR; /* Status return if interrupted */
265 atomic_set(&wq->wait_ctr, 2); 301 atomic_set(&wq->wait_ctr, 2);
266 atomic_set(&wq->notify, 1);
267 mutex_unlock(&sbi->wq_mutex);
268 } else {
269 atomic_inc(&wq->wait_ctr);
270 mutex_unlock(&sbi->wq_mutex); 302 mutex_unlock(&sbi->wq_mutex);
271 kfree(name);
272 DPRINTK("existing wait id = 0x%08lx, name = %.*s, nfy=%d",
273 (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify);
274 }
275
276 if (notify != NFY_NONE && atomic_read(&wq->notify)) {
277 int type;
278
279 atomic_dec(&wq->notify);
280 303
281 if (sbi->version < 5) { 304 if (sbi->version < 5) {
282 if (notify == NFY_MOUNT) 305 if (notify == NFY_MOUNT)
@@ -299,6 +322,12 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
299 322
300 /* autofs4_notify_daemon() may block */ 323 /* autofs4_notify_daemon() may block */
301 autofs4_notify_daemon(sbi, wq, type); 324 autofs4_notify_daemon(sbi, wq, type);
325 } else {
326 atomic_inc(&wq->wait_ctr);
327 mutex_unlock(&sbi->wq_mutex);
328 kfree(name);
329 DPRINTK("existing wait id = 0x%08lx, name = %.*s, nfy=%d",
330 (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify);
302 } 331 }
303 332
304 /* wq->name is NULL if and only if the lock is already released */ 333 /* wq->name is NULL if and only if the lock is already released */
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 69f44dcdb0b4..b1c902e319c1 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -428,7 +428,6 @@ static int load_flat_file(struct linux_binprm * bprm,
428 loff_t fpos; 428 loff_t fpos;
429 unsigned long start_code, end_code; 429 unsigned long start_code, end_code;
430 int ret; 430 int ret;
431 int exec_fileno;
432 431
433 hdr = ((struct flat_hdr *) bprm->buf); /* exec-header */ 432 hdr = ((struct flat_hdr *) bprm->buf); /* exec-header */
434 inode = bprm->file->f_dentry->d_inode; 433 inode = bprm->file->f_dentry->d_inode;
@@ -502,21 +501,12 @@ static int load_flat_file(struct linux_binprm * bprm,
502 goto err; 501 goto err;
503 } 502 }
504 503
505 /* check file descriptor */
506 exec_fileno = get_unused_fd();
507 if (exec_fileno < 0) {
508 ret = -EMFILE;
509 goto err;
510 }
511 get_file(bprm->file);
512 fd_install(exec_fileno, bprm->file);
513
514 /* Flush all traces of the currently running executable */ 504 /* Flush all traces of the currently running executable */
515 if (id == 0) { 505 if (id == 0) {
516 result = flush_old_exec(bprm); 506 result = flush_old_exec(bprm);
517 if (result) { 507 if (result) {
518 ret = result; 508 ret = result;
519 goto err_close; 509 goto err;
520 } 510 }
521 511
522 /* OK, This is the point of no return */ 512 /* OK, This is the point of no return */
@@ -548,7 +538,7 @@ static int load_flat_file(struct linux_binprm * bprm,
548 textpos = (unsigned long) -ENOMEM; 538 textpos = (unsigned long) -ENOMEM;
549 printk("Unable to mmap process text, errno %d\n", (int)-textpos); 539 printk("Unable to mmap process text, errno %d\n", (int)-textpos);
550 ret = textpos; 540 ret = textpos;
551 goto err_close; 541 goto err;
552 } 542 }
553 543
554 down_write(&current->mm->mmap_sem); 544 down_write(&current->mm->mmap_sem);
@@ -564,7 +554,7 @@ static int load_flat_file(struct linux_binprm * bprm,
564 (int)-datapos); 554 (int)-datapos);
565 do_munmap(current->mm, textpos, text_len); 555 do_munmap(current->mm, textpos, text_len);
566 ret = realdatastart; 556 ret = realdatastart;
567 goto err_close; 557 goto err;
568 } 558 }
569 datapos = realdatastart + MAX_SHARED_LIBS * sizeof(unsigned long); 559 datapos = realdatastart + MAX_SHARED_LIBS * sizeof(unsigned long);
570 560
@@ -587,7 +577,7 @@ static int load_flat_file(struct linux_binprm * bprm,
587 do_munmap(current->mm, textpos, text_len); 577 do_munmap(current->mm, textpos, text_len);
588 do_munmap(current->mm, realdatastart, data_len + extra); 578 do_munmap(current->mm, realdatastart, data_len + extra);
589 ret = result; 579 ret = result;
590 goto err_close; 580 goto err;
591 } 581 }
592 582
593 reloc = (unsigned long *) (datapos+(ntohl(hdr->reloc_start)-text_len)); 583 reloc = (unsigned long *) (datapos+(ntohl(hdr->reloc_start)-text_len));
@@ -606,7 +596,7 @@ static int load_flat_file(struct linux_binprm * bprm,
606 printk("Unable to allocate RAM for process text/data, errno %d\n", 596 printk("Unable to allocate RAM for process text/data, errno %d\n",
607 (int)-textpos); 597 (int)-textpos);
608 ret = textpos; 598 ret = textpos;
609 goto err_close; 599 goto err;
610 } 600 }
611 601
612 realdatastart = textpos + ntohl(hdr->data_start); 602 realdatastart = textpos + ntohl(hdr->data_start);
@@ -652,7 +642,7 @@ static int load_flat_file(struct linux_binprm * bprm,
652 do_munmap(current->mm, textpos, text_len + data_len + extra + 642 do_munmap(current->mm, textpos, text_len + data_len + extra +
653 MAX_SHARED_LIBS * sizeof(unsigned long)); 643 MAX_SHARED_LIBS * sizeof(unsigned long));
654 ret = result; 644 ret = result;
655 goto err_close; 645 goto err;
656 } 646 }
657 } 647 }
658 648
@@ -717,7 +707,7 @@ static int load_flat_file(struct linux_binprm * bprm,
717 addr = calc_reloc(*rp, libinfo, id, 0); 707 addr = calc_reloc(*rp, libinfo, id, 0);
718 if (addr == RELOC_FAILED) { 708 if (addr == RELOC_FAILED) {
719 ret = -ENOEXEC; 709 ret = -ENOEXEC;
720 goto err_close; 710 goto err;
721 } 711 }
722 *rp = addr; 712 *rp = addr;
723 } 713 }
@@ -747,7 +737,7 @@ static int load_flat_file(struct linux_binprm * bprm,
747 rp = (unsigned long *) calc_reloc(addr, libinfo, id, 1); 737 rp = (unsigned long *) calc_reloc(addr, libinfo, id, 1);
748 if (rp == (unsigned long *)RELOC_FAILED) { 738 if (rp == (unsigned long *)RELOC_FAILED) {
749 ret = -ENOEXEC; 739 ret = -ENOEXEC;
750 goto err_close; 740 goto err;
751 } 741 }
752 742
753 /* Get the pointer's value. */ 743 /* Get the pointer's value. */
@@ -762,7 +752,7 @@ static int load_flat_file(struct linux_binprm * bprm,
762 addr = calc_reloc(addr, libinfo, id, 0); 752 addr = calc_reloc(addr, libinfo, id, 0);
763 if (addr == RELOC_FAILED) { 753 if (addr == RELOC_FAILED) {
764 ret = -ENOEXEC; 754 ret = -ENOEXEC;
765 goto err_close; 755 goto err;
766 } 756 }
767 757
768 /* Write back the relocated pointer. */ 758 /* Write back the relocated pointer. */
@@ -783,8 +773,6 @@ static int load_flat_file(struct linux_binprm * bprm,
783 stack_len); 773 stack_len);
784 774
785 return 0; 775 return 0;
786err_close:
787 sys_close(exec_fileno);
788err: 776err:
789 return ret; 777 return ret;
790} 778}
diff --git a/fs/bio.c b/fs/bio.c
index eb8fbc53f2cd..098c12b2d60a 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1116,6 +1116,9 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors)
1116 bp->bio1.bi_io_vec = &bp->bv1; 1116 bp->bio1.bi_io_vec = &bp->bv1;
1117 bp->bio2.bi_io_vec = &bp->bv2; 1117 bp->bio2.bi_io_vec = &bp->bv2;
1118 1118
1119 bp->bio1.bi_max_vecs = 1;
1120 bp->bio2.bi_max_vecs = 1;
1121
1119 bp->bio1.bi_end_io = bio_pair_end_1; 1122 bp->bio1.bi_end_io = bio_pair_end_1;
1120 bp->bio2.bi_end_io = bio_pair_end_2; 1123 bp->bio2.bi_end_io = bio_pair_end_2;
1121 1124
diff --git a/fs/block_dev.c b/fs/block_dev.c
index af88c43043d5..f5958f413bd1 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1104,6 +1104,8 @@ const struct file_operations def_blk_fops = {
1104 .readv = generic_file_readv, 1104 .readv = generic_file_readv,
1105 .writev = generic_file_write_nolock, 1105 .writev = generic_file_write_nolock,
1106 .sendfile = generic_file_sendfile, 1106 .sendfile = generic_file_sendfile,
1107 .splice_read = generic_file_splice_read,
1108 .splice_write = generic_file_splice_write,
1107}; 1109};
1108 1110
1109int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) 1111int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 8a2de038882e..7271bb0257f6 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,7 +1,18 @@
1Version 1.43
2------------
3POSIX locking to servers which support CIFS POSIX Extensions
4(disabled by default controlled by proc/fs/cifs/Experimental).
5Handle conversion of long share names (especially Asian languages)
6to Unicode during mount.
7
1Version 1.42 8Version 1.42
2------------ 9------------
3Fix slow oplock break when mounted to different servers at the same time and 10Fix slow oplock break when mounted to different servers at the same time and
4the tids match and we try to find matching fid on wrong server. 11the tids match and we try to find matching fid on wrong server. Fix read
12looping when signing required by server (2.6.16 kernel only). Fix readdir
13vs. rename race which could cause each to hang. Return . and .. even
14if server does not. Allow searches to skip first three entries and
15begin at any location. Fix oops in find_writeable_file.
5 16
6Version 1.41 17Version 1.41
7------------ 18------------
diff --git a/fs/cifs/README b/fs/cifs/README
index b2b4d0803761..0355003f4f0a 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -511,6 +511,14 @@ LinuxExtensionsEnabled If set to one then the client will attempt to
511 support and want to map the uid and gid fields 511 support and want to map the uid and gid fields
512 to values supplied at mount (rather than the 512 to values supplied at mount (rather than the
513 actual values, then set this to zero. (default 1) 513 actual values, then set this to zero. (default 1)
514Experimental When set to 1 used to enable certain experimental
515 features (currently enables multipage writes
516 when signing is enabled, the multipage write
517 performance enhancement was disabled when
518 signing turned on in case buffer was modified
519 just before it was sent, also this flag will
520 be used to use the new experimental sessionsetup
521 code).
514 522
515These experimental features and tracing can be enabled by changing flags in 523These experimental features and tracing can be enabled by changing flags in
516/proc/fs/cifs (after the cifs module has been installed or built into the 524/proc/fs/cifs (after the cifs module has been installed or built into the
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index d4b713e5affb..c262d8874ce9 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -33,6 +33,7 @@
33#include <linux/vfs.h> 33#include <linux/vfs.h>
34#include <linux/mempool.h> 34#include <linux/mempool.h>
35#include <linux/delay.h> 35#include <linux/delay.h>
36#include <linux/kthread.h>
36#include "cifsfs.h" 37#include "cifsfs.h"
37#include "cifspdu.h" 38#include "cifspdu.h"
38#define DECLARE_GLOBALS_HERE 39#define DECLARE_GLOBALS_HERE
@@ -75,9 +76,6 @@ unsigned int cifs_max_pending = CIFS_MAX_REQ;
75module_param(cifs_max_pending, int, 0); 76module_param(cifs_max_pending, int, 0);
76MODULE_PARM_DESC(cifs_max_pending,"Simultaneous requests to server. Default: 50 Range: 2 to 256"); 77MODULE_PARM_DESC(cifs_max_pending,"Simultaneous requests to server. Default: 50 Range: 2 to 256");
77 78
78static DECLARE_COMPLETION(cifs_oplock_exited);
79static DECLARE_COMPLETION(cifs_dnotify_exited);
80
81extern mempool_t *cifs_sm_req_poolp; 79extern mempool_t *cifs_sm_req_poolp;
82extern mempool_t *cifs_req_poolp; 80extern mempool_t *cifs_req_poolp;
83extern mempool_t *cifs_mid_poolp; 81extern mempool_t *cifs_mid_poolp;
@@ -841,10 +839,6 @@ static int cifs_oplock_thread(void * dummyarg)
841 __u16 netfid; 839 __u16 netfid;
842 int rc; 840 int rc;
843 841
844 daemonize("cifsoplockd");
845 allow_signal(SIGTERM);
846
847 oplockThread = current;
848 do { 842 do {
849 if (try_to_freeze()) 843 if (try_to_freeze())
850 continue; 844 continue;
@@ -900,9 +894,9 @@ static int cifs_oplock_thread(void * dummyarg)
900 set_current_state(TASK_INTERRUPTIBLE); 894 set_current_state(TASK_INTERRUPTIBLE);
901 schedule_timeout(1); /* yield in case q were corrupt */ 895 schedule_timeout(1); /* yield in case q were corrupt */
902 } 896 }
903 } while(!signal_pending(current)); 897 } while (!kthread_should_stop());
904 oplockThread = NULL; 898
905 complete_and_exit (&cifs_oplock_exited, 0); 899 return 0;
906} 900}
907 901
908static int cifs_dnotify_thread(void * dummyarg) 902static int cifs_dnotify_thread(void * dummyarg)
@@ -910,10 +904,6 @@ static int cifs_dnotify_thread(void * dummyarg)
910 struct list_head *tmp; 904 struct list_head *tmp;
911 struct cifsSesInfo *ses; 905 struct cifsSesInfo *ses;
912 906
913 daemonize("cifsdnotifyd");
914 allow_signal(SIGTERM);
915
916 dnotifyThread = current;
917 do { 907 do {
918 if(try_to_freeze()) 908 if(try_to_freeze())
919 continue; 909 continue;
@@ -931,8 +921,9 @@ static int cifs_dnotify_thread(void * dummyarg)
931 wake_up_all(&ses->server->response_q); 921 wake_up_all(&ses->server->response_q);
932 } 922 }
933 read_unlock(&GlobalSMBSeslock); 923 read_unlock(&GlobalSMBSeslock);
934 } while(!signal_pending(current)); 924 } while (!kthread_should_stop());
935 complete_and_exit (&cifs_dnotify_exited, 0); 925
926 return 0;
936} 927}
937 928
938static int __init 929static int __init
@@ -982,32 +973,48 @@ init_cifs(void)
982 } 973 }
983 974
984 rc = cifs_init_inodecache(); 975 rc = cifs_init_inodecache();
985 if (!rc) { 976 if (rc)
986 rc = cifs_init_mids(); 977 goto out_clean_proc;
987 if (!rc) { 978
988 rc = cifs_init_request_bufs(); 979 rc = cifs_init_mids();
989 if (!rc) { 980 if (rc)
990 rc = register_filesystem(&cifs_fs_type); 981 goto out_destroy_inodecache;
991 if (!rc) { 982
992 rc = (int)kernel_thread(cifs_oplock_thread, NULL, 983 rc = cifs_init_request_bufs();
993 CLONE_FS | CLONE_FILES | CLONE_VM); 984 if (rc)
994 if(rc > 0) { 985 goto out_destroy_mids;
995 rc = (int)kernel_thread(cifs_dnotify_thread, NULL, 986
996 CLONE_FS | CLONE_FILES | CLONE_VM); 987 rc = register_filesystem(&cifs_fs_type);
997 if(rc > 0) 988 if (rc)
998 return 0; 989 goto out_destroy_request_bufs;
999 else 990
1000 cERROR(1,("error %d create dnotify thread", rc)); 991 oplockThread = kthread_run(cifs_oplock_thread, NULL, "cifsoplockd");
1001 } else { 992 if (IS_ERR(oplockThread)) {
1002 cERROR(1,("error %d create oplock thread",rc)); 993 rc = PTR_ERR(oplockThread);
1003 } 994 cERROR(1,("error %d create oplock thread", rc));
1004 } 995 goto out_unregister_filesystem;
1005 cifs_destroy_request_bufs();
1006 }
1007 cifs_destroy_mids();
1008 }
1009 cifs_destroy_inodecache();
1010 } 996 }
997
998 dnotifyThread = kthread_run(cifs_dnotify_thread, NULL, "cifsdnotifyd");
999 if (IS_ERR(dnotifyThread)) {
1000 rc = PTR_ERR(dnotifyThread);
1001 cERROR(1,("error %d create dnotify thread", rc));
1002 goto out_stop_oplock_thread;
1003 }
1004
1005 return 0;
1006
1007 out_stop_oplock_thread:
1008 kthread_stop(oplockThread);
1009 out_unregister_filesystem:
1010 unregister_filesystem(&cifs_fs_type);
1011 out_destroy_request_bufs:
1012 cifs_destroy_request_bufs();
1013 out_destroy_mids:
1014 cifs_destroy_mids();
1015 out_destroy_inodecache:
1016 cifs_destroy_inodecache();
1017 out_clean_proc:
1011#ifdef CONFIG_PROC_FS 1018#ifdef CONFIG_PROC_FS
1012 cifs_proc_clean(); 1019 cifs_proc_clean();
1013#endif 1020#endif
@@ -1025,14 +1032,8 @@ exit_cifs(void)
1025 cifs_destroy_inodecache(); 1032 cifs_destroy_inodecache();
1026 cifs_destroy_mids(); 1033 cifs_destroy_mids();
1027 cifs_destroy_request_bufs(); 1034 cifs_destroy_request_bufs();
1028 if(oplockThread) { 1035 kthread_stop(oplockThread);
1029 send_sig(SIGTERM, oplockThread, 1); 1036 kthread_stop(dnotifyThread);
1030 wait_for_completion(&cifs_oplock_exited);
1031 }
1032 if(dnotifyThread) {
1033 send_sig(SIGTERM, dnotifyThread, 1);
1034 wait_for_completion(&cifs_dnotify_exited);
1035 }
1036} 1037}
1037 1038
1038MODULE_AUTHOR("Steve French <sfrench@us.ibm.com>"); 1039MODULE_AUTHOR("Steve French <sfrench@us.ibm.com>");
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 4e829dc672a6..c98755dca868 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -99,5 +99,5 @@ extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t);
99extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); 99extern ssize_t cifs_listxattr(struct dentry *, char *, size_t);
100extern int cifs_ioctl (struct inode * inode, struct file * filep, 100extern int cifs_ioctl (struct inode * inode, struct file * filep,
101 unsigned int command, unsigned long arg); 101 unsigned int command, unsigned long arg);
102#define CIFS_VERSION "1.42" 102#define CIFS_VERSION "1.43"
103#endif /* _CIFSFS_H */ 103#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 2879ba343ca7..310ea2f0e0bf 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -267,7 +267,7 @@ extern int CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
267 const int waitFlag); 267 const int waitFlag);
268extern int CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, 268extern int CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
269 const __u16 smb_file_id, const int get_flag, 269 const __u16 smb_file_id, const int get_flag,
270 const __u64 len, const __u64 offset, 270 const __u64 len, struct file_lock *,
271 const __u16 lock_type, const int waitFlag); 271 const __u16 lock_type, const int waitFlag);
272extern int CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon); 272extern int CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon);
273extern int CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses); 273extern int CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index d705500aa283..925881e00ff2 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1355,7 +1355,8 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
1355int 1355int
1356CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, 1356CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
1357 const __u16 smb_file_id, const int get_flag, const __u64 len, 1357 const __u16 smb_file_id, const int get_flag, const __u64 len,
1358 const __u64 lkoffset, const __u16 lock_type, const int waitFlag) 1358 struct file_lock *pLockData, const __u16 lock_type,
1359 const int waitFlag)
1359{ 1360{
1360 struct smb_com_transaction2_sfi_req *pSMB = NULL; 1361 struct smb_com_transaction2_sfi_req *pSMB = NULL;
1361 struct smb_com_transaction2_sfi_rsp *pSMBr = NULL; 1362 struct smb_com_transaction2_sfi_rsp *pSMBr = NULL;
@@ -1366,6 +1367,10 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
1366 __u16 params, param_offset, offset, byte_count, count; 1367 __u16 params, param_offset, offset, byte_count, count;
1367 1368
1368 cFYI(1, ("Posix Lock")); 1369 cFYI(1, ("Posix Lock"));
1370
1371 if(pLockData == NULL)
1372 return EINVAL;
1373
1369 rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB); 1374 rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB);
1370 1375
1371 if (rc) 1376 if (rc)
@@ -1404,10 +1409,10 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
1404 1409
1405 parm_data->lock_type = cpu_to_le16(lock_type); 1410 parm_data->lock_type = cpu_to_le16(lock_type);
1406 if(waitFlag) 1411 if(waitFlag)
1407 parm_data->lock_flags = 1; 1412 parm_data->lock_flags = cpu_to_le16(1);
1408 parm_data->pid = cpu_to_le32(current->tgid); 1413 parm_data->pid = cpu_to_le32(current->tgid);
1409 parm_data->start = lkoffset; 1414 parm_data->start = cpu_to_le64(pLockData->fl_start);
1410 parm_data->length = len; /* normalize negative numbers */ 1415 parm_data->length = cpu_to_le64(len); /* normalize negative numbers */
1411 1416
1412 pSMB->DataOffset = cpu_to_le16(offset); 1417 pSMB->DataOffset = cpu_to_le16(offset);
1413 pSMB->Fid = smb_file_id; 1418 pSMB->Fid = smb_file_id;
@@ -1419,8 +1424,33 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
1419 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 1424 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
1420 if (rc) { 1425 if (rc) {
1421 cFYI(1, ("Send error in Posix Lock = %d", rc)); 1426 cFYI(1, ("Send error in Posix Lock = %d", rc));
1422 } 1427 } else if (get_flag) {
1428 /* lock structure can be returned on get */
1429 __u16 data_offset;
1430 __u16 data_count;
1431 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
1423 1432
1433 if (rc || (pSMBr->ByteCount < sizeof(struct cifs_posix_lock))) {
1434 rc = -EIO; /* bad smb */
1435 goto plk_err_exit;
1436 }
1437 if(pLockData == NULL) {
1438 rc = -EINVAL;
1439 goto plk_err_exit;
1440 }
1441 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
1442 data_count = le16_to_cpu(pSMBr->t2.DataCount);
1443 if(data_count < sizeof(struct cifs_posix_lock)) {
1444 rc = -EIO;
1445 goto plk_err_exit;
1446 }
1447 parm_data = (struct cifs_posix_lock *)
1448 ((char *)&pSMBr->hdr.Protocol + data_offset);
1449 if(parm_data->lock_type == cpu_to_le16(CIFS_UNLCK))
1450 pLockData->fl_type = F_UNLCK;
1451 }
1452
1453plk_err_exit:
1424 if (pSMB) 1454 if (pSMB)
1425 cifs_small_buf_release(pSMB); 1455 cifs_small_buf_release(pSMB);
1426 1456
@@ -3119,7 +3149,7 @@ findFirstRetry:
3119 psrch_inf->endOfSearch = FALSE; 3149 psrch_inf->endOfSearch = FALSE;
3120 3150
3121 psrch_inf->entries_in_buffer = le16_to_cpu(parms->SearchCount); 3151 psrch_inf->entries_in_buffer = le16_to_cpu(parms->SearchCount);
3122 psrch_inf->index_of_last_entry = 3152 psrch_inf->index_of_last_entry = 2 /* skip . and .. */ +
3123 psrch_inf->entries_in_buffer; 3153 psrch_inf->entries_in_buffer;
3124 *pnetfid = parms->SearchHandle; 3154 *pnetfid = parms->SearchHandle;
3125 } else { 3155 } else {
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 0b86d5ca9014..bae1479318d1 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2148,6 +2148,8 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2148/* We look for obvious messed up bcc or strings in response so we do not go off 2148/* We look for obvious messed up bcc or strings in response so we do not go off
2149 the end since (at least) WIN2K and Windows XP have a major bug in not null 2149 the end since (at least) WIN2K and Windows XP have a major bug in not null
2150 terminating last Unicode string in response */ 2150 terminating last Unicode string in response */
2151 if(ses->serverOS)
2152 kfree(ses->serverOS);
2151 ses->serverOS = kzalloc(2 * (len + 1), GFP_KERNEL); 2153 ses->serverOS = kzalloc(2 * (len + 1), GFP_KERNEL);
2152 if(ses->serverOS == NULL) 2154 if(ses->serverOS == NULL)
2153 goto sesssetup_nomem; 2155 goto sesssetup_nomem;
@@ -2160,6 +2162,8 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2160 if (remaining_words > 0) { 2162 if (remaining_words > 0) {
2161 len = UniStrnlen((wchar_t *)bcc_ptr, 2163 len = UniStrnlen((wchar_t *)bcc_ptr,
2162 remaining_words-1); 2164 remaining_words-1);
2165 if(ses->serverNOS)
2166 kfree(ses->serverNOS);
2163 ses->serverNOS = kzalloc(2 * (len + 1),GFP_KERNEL); 2167 ses->serverNOS = kzalloc(2 * (len + 1),GFP_KERNEL);
2164 if(ses->serverNOS == NULL) 2168 if(ses->serverNOS == NULL)
2165 goto sesssetup_nomem; 2169 goto sesssetup_nomem;
@@ -2177,6 +2181,8 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2177 if (remaining_words > 0) { 2181 if (remaining_words > 0) {
2178 len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); 2182 len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words);
2179 /* last string is not always null terminated (for e.g. for Windows XP & 2000) */ 2183 /* last string is not always null terminated (for e.g. for Windows XP & 2000) */
2184 if(ses->serverDomain)
2185 kfree(ses->serverDomain);
2180 ses->serverDomain = 2186 ses->serverDomain =
2181 kzalloc(2*(len+1),GFP_KERNEL); 2187 kzalloc(2*(len+1),GFP_KERNEL);
2182 if(ses->serverDomain == NULL) 2188 if(ses->serverDomain == NULL)
@@ -2187,15 +2193,22 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2187 ses->serverDomain[2*len] = 0; 2193 ses->serverDomain[2*len] = 0;
2188 ses->serverDomain[1+(2*len)] = 0; 2194 ses->serverDomain[1+(2*len)] = 0;
2189 } /* else no more room so create dummy domain string */ 2195 } /* else no more room so create dummy domain string */
2190 else 2196 else {
2197 if(ses->serverDomain)
2198 kfree(ses->serverDomain);
2191 ses->serverDomain = 2199 ses->serverDomain =
2192 kzalloc(2, GFP_KERNEL); 2200 kzalloc(2, GFP_KERNEL);
2201 }
2193 } else { /* no room so create dummy domain and NOS string */ 2202 } else { /* no room so create dummy domain and NOS string */
2194 /* if these kcallocs fail not much we 2203 /* if these kcallocs fail not much we
2195 can do, but better to not fail the 2204 can do, but better to not fail the
2196 sesssetup itself */ 2205 sesssetup itself */
2206 if(ses->serverDomain)
2207 kfree(ses->serverDomain);
2197 ses->serverDomain = 2208 ses->serverDomain =
2198 kzalloc(2, GFP_KERNEL); 2209 kzalloc(2, GFP_KERNEL);
2210 if(ses->serverNOS)
2211 kfree(ses->serverNOS);
2199 ses->serverNOS = 2212 ses->serverNOS =
2200 kzalloc(2, GFP_KERNEL); 2213 kzalloc(2, GFP_KERNEL);
2201 } 2214 }
@@ -2204,6 +2217,8 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2204 if (((long) bcc_ptr + len) - (long) 2217 if (((long) bcc_ptr + len) - (long)
2205 pByteArea(smb_buffer_response) 2218 pByteArea(smb_buffer_response)
2206 <= BCC(smb_buffer_response)) { 2219 <= BCC(smb_buffer_response)) {
2220 if(ses->serverOS)
2221 kfree(ses->serverOS);
2207 ses->serverOS = kzalloc(len + 1,GFP_KERNEL); 2222 ses->serverOS = kzalloc(len + 1,GFP_KERNEL);
2208 if(ses->serverOS == NULL) 2223 if(ses->serverOS == NULL)
2209 goto sesssetup_nomem; 2224 goto sesssetup_nomem;
@@ -2214,6 +2229,8 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2214 bcc_ptr++; 2229 bcc_ptr++;
2215 2230
2216 len = strnlen(bcc_ptr, 1024); 2231 len = strnlen(bcc_ptr, 1024);
2232 if(ses->serverNOS)
2233 kfree(ses->serverNOS);
2217 ses->serverNOS = kzalloc(len + 1,GFP_KERNEL); 2234 ses->serverNOS = kzalloc(len + 1,GFP_KERNEL);
2218 if(ses->serverNOS == NULL) 2235 if(ses->serverNOS == NULL)
2219 goto sesssetup_nomem; 2236 goto sesssetup_nomem;
@@ -2223,6 +2240,8 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2223 bcc_ptr++; 2240 bcc_ptr++;
2224 2241
2225 len = strnlen(bcc_ptr, 1024); 2242 len = strnlen(bcc_ptr, 1024);
2243 if(ses->serverDomain)
2244 kfree(ses->serverDomain);
2226 ses->serverDomain = kzalloc(len + 1,GFP_KERNEL); 2245 ses->serverDomain = kzalloc(len + 1,GFP_KERNEL);
2227 if(ses->serverDomain == NULL) 2246 if(ses->serverDomain == NULL)
2228 goto sesssetup_nomem; 2247 goto sesssetup_nomem;
@@ -2427,6 +2446,8 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2427/* We look for obvious messed up bcc or strings in response so we do not go off 2446/* We look for obvious messed up bcc or strings in response so we do not go off
2428 the end since (at least) WIN2K and Windows XP have a major bug in not null 2447 the end since (at least) WIN2K and Windows XP have a major bug in not null
2429 terminating last Unicode string in response */ 2448 terminating last Unicode string in response */
2449 if(ses->serverOS)
2450 kfree(ses->serverOS);
2430 ses->serverOS = 2451 ses->serverOS =
2431 kzalloc(2 * (len + 1), GFP_KERNEL); 2452 kzalloc(2 * (len + 1), GFP_KERNEL);
2432 cifs_strfromUCS_le(ses->serverOS, 2453 cifs_strfromUCS_le(ses->serverOS,
@@ -2441,6 +2462,8 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2441 len = UniStrnlen((wchar_t *)bcc_ptr, 2462 len = UniStrnlen((wchar_t *)bcc_ptr,
2442 remaining_words 2463 remaining_words
2443 - 1); 2464 - 1);
2465 if(ses->serverNOS)
2466 kfree(ses->serverNOS);
2444 ses->serverNOS = 2467 ses->serverNOS =
2445 kzalloc(2 * (len + 1), 2468 kzalloc(2 * (len + 1),
2446 GFP_KERNEL); 2469 GFP_KERNEL);
@@ -2454,7 +2477,9 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2454 remaining_words -= len + 1; 2477 remaining_words -= len + 1;
2455 if (remaining_words > 0) { 2478 if (remaining_words > 0) {
2456 len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); 2479 len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words);
2457 /* last string is not always null terminated (for e.g. for Windows XP & 2000) */ 2480 /* last string not null terminated (e.g.Windows XP/2000) */
2481 if(ses->serverDomain)
2482 kfree(ses->serverDomain);
2458 ses->serverDomain = kzalloc(2*(len+1),GFP_KERNEL); 2483 ses->serverDomain = kzalloc(2*(len+1),GFP_KERNEL);
2459 cifs_strfromUCS_le(ses->serverDomain, 2484 cifs_strfromUCS_le(ses->serverDomain,
2460 (__le16 *)bcc_ptr, 2485 (__le16 *)bcc_ptr,
@@ -2463,11 +2488,18 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2463 ses->serverDomain[2*len] = 0; 2488 ses->serverDomain[2*len] = 0;
2464 ses->serverDomain[1+(2*len)] = 0; 2489 ses->serverDomain[1+(2*len)] = 0;
2465 } /* else no more room so create dummy domain string */ 2490 } /* else no more room so create dummy domain string */
2466 else 2491 else {
2492 if(ses->serverDomain)
2493 kfree(ses->serverDomain);
2467 ses->serverDomain = 2494 ses->serverDomain =
2468 kzalloc(2,GFP_KERNEL); 2495 kzalloc(2,GFP_KERNEL);
2469 } else { /* no room so create dummy domain and NOS string */ 2496 }
2497 } else {/* no room use dummy domain&NOS */
2498 if(ses->serverDomain)
2499 kfree(ses->serverDomain);
2470 ses->serverDomain = kzalloc(2, GFP_KERNEL); 2500 ses->serverDomain = kzalloc(2, GFP_KERNEL);
2501 if(ses->serverNOS)
2502 kfree(ses->serverNOS);
2471 ses->serverNOS = kzalloc(2, GFP_KERNEL); 2503 ses->serverNOS = kzalloc(2, GFP_KERNEL);
2472 } 2504 }
2473 } else { /* ASCII */ 2505 } else { /* ASCII */
@@ -2476,6 +2508,8 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2476 if (((long) bcc_ptr + len) - (long) 2508 if (((long) bcc_ptr + len) - (long)
2477 pByteArea(smb_buffer_response) 2509 pByteArea(smb_buffer_response)
2478 <= BCC(smb_buffer_response)) { 2510 <= BCC(smb_buffer_response)) {
2511 if(ses->serverOS)
2512 kfree(ses->serverOS);
2479 ses->serverOS = kzalloc(len + 1, GFP_KERNEL); 2513 ses->serverOS = kzalloc(len + 1, GFP_KERNEL);
2480 strncpy(ses->serverOS, bcc_ptr, len); 2514 strncpy(ses->serverOS, bcc_ptr, len);
2481 2515
@@ -2484,6 +2518,8 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2484 bcc_ptr++; 2518 bcc_ptr++;
2485 2519
2486 len = strnlen(bcc_ptr, 1024); 2520 len = strnlen(bcc_ptr, 1024);
2521 if(ses->serverNOS)
2522 kfree(ses->serverNOS);
2487 ses->serverNOS = kzalloc(len + 1,GFP_KERNEL); 2523 ses->serverNOS = kzalloc(len + 1,GFP_KERNEL);
2488 strncpy(ses->serverNOS, bcc_ptr, len); 2524 strncpy(ses->serverNOS, bcc_ptr, len);
2489 bcc_ptr += len; 2525 bcc_ptr += len;
@@ -2491,6 +2527,8 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2491 bcc_ptr++; 2527 bcc_ptr++;
2492 2528
2493 len = strnlen(bcc_ptr, 1024); 2529 len = strnlen(bcc_ptr, 1024);
2530 if(ses->serverDomain)
2531 kfree(ses->serverDomain);
2494 ses->serverDomain = kzalloc(len + 1, GFP_KERNEL); 2532 ses->serverDomain = kzalloc(len + 1, GFP_KERNEL);
2495 strncpy(ses->serverDomain, bcc_ptr, len); 2533 strncpy(ses->serverDomain, bcc_ptr, len);
2496 bcc_ptr += len; 2534 bcc_ptr += len;
@@ -2728,6 +2766,8 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2728/* We look for obvious messed up bcc or strings in response so we do not go off 2766/* We look for obvious messed up bcc or strings in response so we do not go off
2729 the end since (at least) WIN2K and Windows XP have a major bug in not null 2767 the end since (at least) WIN2K and Windows XP have a major bug in not null
2730 terminating last Unicode string in response */ 2768 terminating last Unicode string in response */
2769 if(ses->serverOS)
2770 kfree(ses->serverOS);
2731 ses->serverOS = 2771 ses->serverOS =
2732 kzalloc(2 * (len + 1), GFP_KERNEL); 2772 kzalloc(2 * (len + 1), GFP_KERNEL);
2733 cifs_strfromUCS_le(ses->serverOS, 2773 cifs_strfromUCS_le(ses->serverOS,
@@ -2743,6 +2783,8 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2743 bcc_ptr, 2783 bcc_ptr,
2744 remaining_words 2784 remaining_words
2745 - 1); 2785 - 1);
2786 if(ses->serverNOS)
2787 kfree(ses->serverNOS);
2746 ses->serverNOS = 2788 ses->serverNOS =
2747 kzalloc(2 * (len + 1), 2789 kzalloc(2 * (len + 1),
2748 GFP_KERNEL); 2790 GFP_KERNEL);
@@ -2760,6 +2802,8 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2760 if (remaining_words > 0) { 2802 if (remaining_words > 0) {
2761 len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); 2803 len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words);
2762 /* last string is not always null terminated (for e.g. for Windows XP & 2000) */ 2804 /* last string is not always null terminated (for e.g. for Windows XP & 2000) */
2805 if(ses->serverDomain)
2806 kfree(ses->serverDomain);
2763 ses->serverDomain = 2807 ses->serverDomain =
2764 kzalloc(2 * 2808 kzalloc(2 *
2765 (len + 2809 (len +
@@ -2777,13 +2821,20 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2777 [1 + (2 * len)] 2821 [1 + (2 * len)]
2778 = 0; 2822 = 0;
2779 } /* else no more room so create dummy domain string */ 2823 } /* else no more room so create dummy domain string */
2780 else 2824 else {
2825 if(ses->serverDomain)
2826 kfree(ses->serverDomain);
2781 ses->serverDomain = 2827 ses->serverDomain =
2782 kzalloc(2, 2828 kzalloc(2,
2783 GFP_KERNEL); 2829 GFP_KERNEL);
2830 }
2784 } else { /* no room so create dummy domain and NOS string */ 2831 } else { /* no room so create dummy domain and NOS string */
2832 if(ses->serverDomain);
2833 kfree(ses->serverDomain);
2785 ses->serverDomain = 2834 ses->serverDomain =
2786 kzalloc(2, GFP_KERNEL); 2835 kzalloc(2, GFP_KERNEL);
2836 if(ses->serverNOS)
2837 kfree(ses->serverNOS);
2787 ses->serverNOS = 2838 ses->serverNOS =
2788 kzalloc(2, GFP_KERNEL); 2839 kzalloc(2, GFP_KERNEL);
2789 } 2840 }
@@ -2792,6 +2843,8 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2792 if (((long) bcc_ptr + len) - (long) 2843 if (((long) bcc_ptr + len) - (long)
2793 pByteArea(smb_buffer_response) 2844 pByteArea(smb_buffer_response)
2794 <= BCC(smb_buffer_response)) { 2845 <= BCC(smb_buffer_response)) {
2846 if(ses->serverOS)
2847 kfree(ses->serverOS);
2795 ses->serverOS = 2848 ses->serverOS =
2796 kzalloc(len + 1, 2849 kzalloc(len + 1,
2797 GFP_KERNEL); 2850 GFP_KERNEL);
@@ -2803,6 +2856,8 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2803 bcc_ptr++; 2856 bcc_ptr++;
2804 2857
2805 len = strnlen(bcc_ptr, 1024); 2858 len = strnlen(bcc_ptr, 1024);
2859 if(ses->serverNOS)
2860 kfree(ses->serverNOS);
2806 ses->serverNOS = 2861 ses->serverNOS =
2807 kzalloc(len + 1, 2862 kzalloc(len + 1,
2808 GFP_KERNEL); 2863 GFP_KERNEL);
@@ -2812,6 +2867,8 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2812 bcc_ptr++; 2867 bcc_ptr++;
2813 2868
2814 len = strnlen(bcc_ptr, 1024); 2869 len = strnlen(bcc_ptr, 1024);
2870 if(ses->serverDomain)
2871 kfree(ses->serverDomain);
2815 ses->serverDomain = 2872 ses->serverDomain =
2816 kzalloc(len + 1, 2873 kzalloc(len + 1,
2817 GFP_KERNEL); 2874 GFP_KERNEL);
@@ -3116,6 +3173,8 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
3116/* We look for obvious messed up bcc or strings in response so we do not go off 3173/* We look for obvious messed up bcc or strings in response so we do not go off
3117 the end since (at least) WIN2K and Windows XP have a major bug in not null 3174 the end since (at least) WIN2K and Windows XP have a major bug in not null
3118 terminating last Unicode string in response */ 3175 terminating last Unicode string in response */
3176 if(ses->serverOS)
3177 kfree(ses->serverOS);
3119 ses->serverOS = 3178 ses->serverOS =
3120 kzalloc(2 * (len + 1), GFP_KERNEL); 3179 kzalloc(2 * (len + 1), GFP_KERNEL);
3121 cifs_strfromUCS_le(ses->serverOS, 3180 cifs_strfromUCS_le(ses->serverOS,
@@ -3131,6 +3190,8 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
3131 bcc_ptr, 3190 bcc_ptr,
3132 remaining_words 3191 remaining_words
3133 - 1); 3192 - 1);
3193 if(ses->serverNOS)
3194 kfree(ses->serverNOS);
3134 ses->serverNOS = 3195 ses->serverNOS =
3135 kzalloc(2 * (len + 1), 3196 kzalloc(2 * (len + 1),
3136 GFP_KERNEL); 3197 GFP_KERNEL);
@@ -3147,6 +3208,8 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
3147 if (remaining_words > 0) { 3208 if (remaining_words > 0) {
3148 len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); 3209 len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words);
3149 /* last string not always null terminated (e.g. for Windows XP & 2000) */ 3210 /* last string not always null terminated (e.g. for Windows XP & 2000) */
3211 if(ses->serverDomain)
3212 kfree(ses->serverDomain);
3150 ses->serverDomain = 3213 ses->serverDomain =
3151 kzalloc(2 * 3214 kzalloc(2 *
3152 (len + 3215 (len +
@@ -3172,10 +3235,17 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
3172 len)] 3235 len)]
3173 = 0; 3236 = 0;
3174 } /* else no more room so create dummy domain string */ 3237 } /* else no more room so create dummy domain string */
3175 else 3238 else {
3239 if(ses->serverDomain)
3240 kfree(ses->serverDomain);
3176 ses->serverDomain = kzalloc(2,GFP_KERNEL); 3241 ses->serverDomain = kzalloc(2,GFP_KERNEL);
3242 }
3177 } else { /* no room so create dummy domain and NOS string */ 3243 } else { /* no room so create dummy domain and NOS string */
3244 if(ses->serverDomain)
3245 kfree(ses->serverDomain);
3178 ses->serverDomain = kzalloc(2, GFP_KERNEL); 3246 ses->serverDomain = kzalloc(2, GFP_KERNEL);
3247 if(ses->serverNOS)
3248 kfree(ses->serverNOS);
3179 ses->serverNOS = kzalloc(2, GFP_KERNEL); 3249 ses->serverNOS = kzalloc(2, GFP_KERNEL);
3180 } 3250 }
3181 } else { /* ASCII */ 3251 } else { /* ASCII */
@@ -3183,6 +3253,8 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
3183 if (((long) bcc_ptr + len) - 3253 if (((long) bcc_ptr + len) -
3184 (long) pByteArea(smb_buffer_response) 3254 (long) pByteArea(smb_buffer_response)
3185 <= BCC(smb_buffer_response)) { 3255 <= BCC(smb_buffer_response)) {
3256 if(ses->serverOS)
3257 kfree(ses->serverOS);
3186 ses->serverOS = kzalloc(len + 1,GFP_KERNEL); 3258 ses->serverOS = kzalloc(len + 1,GFP_KERNEL);
3187 strncpy(ses->serverOS,bcc_ptr, len); 3259 strncpy(ses->serverOS,bcc_ptr, len);
3188 3260
@@ -3191,6 +3263,8 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
3191 bcc_ptr++; 3263 bcc_ptr++;
3192 3264
3193 len = strnlen(bcc_ptr, 1024); 3265 len = strnlen(bcc_ptr, 1024);
3266 if(ses->serverNOS)
3267 kfree(ses->serverNOS);
3194 ses->serverNOS = kzalloc(len+1,GFP_KERNEL); 3268 ses->serverNOS = kzalloc(len+1,GFP_KERNEL);
3195 strncpy(ses->serverNOS, bcc_ptr, len); 3269 strncpy(ses->serverNOS, bcc_ptr, len);
3196 bcc_ptr += len; 3270 bcc_ptr += len;
@@ -3198,6 +3272,8 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
3198 bcc_ptr++; 3272 bcc_ptr++;
3199 3273
3200 len = strnlen(bcc_ptr, 1024); 3274 len = strnlen(bcc_ptr, 1024);
3275 if(ses->serverDomain)
3276 kfree(ses->serverDomain);
3201 ses->serverDomain = kzalloc(len+1,GFP_KERNEL); 3277 ses->serverDomain = kzalloc(len+1,GFP_KERNEL);
3202 strncpy(ses->serverDomain, bcc_ptr, len); 3278 strncpy(ses->serverDomain, bcc_ptr, len);
3203 bcc_ptr += len; 3279 bcc_ptr += len;
@@ -3282,7 +3358,8 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3282 bcc_ptr++; /* align */ 3358 bcc_ptr++; /* align */
3283 } 3359 }
3284 3360
3285 if(ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) 3361 if(ses->server->secMode &
3362 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
3286 smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; 3363 smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
3287 3364
3288 if (ses->capabilities & CAP_STATUS32) { 3365 if (ses->capabilities & CAP_STATUS32) {
@@ -3294,8 +3371,10 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3294 if (ses->capabilities & CAP_UNICODE) { 3371 if (ses->capabilities & CAP_UNICODE) {
3295 smb_buffer->Flags2 |= SMBFLG2_UNICODE; 3372 smb_buffer->Flags2 |= SMBFLG2_UNICODE;
3296 length = 3373 length =
3297 cifs_strtoUCS((__le16 *) bcc_ptr, tree, 100, nls_codepage); 3374 cifs_strtoUCS((__le16 *) bcc_ptr, tree,
3298 bcc_ptr += 2 * length; /* convert num of 16 bit words to bytes */ 3375 6 /* max utf8 char length in bytes */ *
3376 (/* server len*/ + 256 /* share len */), nls_codepage);
3377 bcc_ptr += 2 * length; /* convert num 16 bit words to bytes */
3299 bcc_ptr += 2; /* skip trailing null */ 3378 bcc_ptr += 2; /* skip trailing null */
3300 } else { /* ASCII */ 3379 } else { /* ASCII */
3301 strcpy(bcc_ptr, tree); 3380 strcpy(bcc_ptr, tree);
@@ -3447,6 +3526,12 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
3447 pSesInfo->server->secMode, 3526 pSesInfo->server->secMode,
3448 pSesInfo->server->capabilities, 3527 pSesInfo->server->capabilities,
3449 pSesInfo->server->timeZone)); 3528 pSesInfo->server->timeZone));
3529#ifdef CONFIG_CIFS_EXPERIMENTAL
3530 if(experimEnabled > 1)
3531 rc = CIFS_SessSetup(xid, pSesInfo, CIFS_NTLM /* type */,
3532 &ntlmv2_flag, nls_info);
3533 else
3534#endif
3450 if (extended_security 3535 if (extended_security
3451 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) 3536 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
3452 && (pSesInfo->server->secType == NTLMSSP)) { 3537 && (pSesInfo->server->secType == NTLMSSP)) {
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 1d0ca3eaaca5..82315edc77d7 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -139,9 +139,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
139 cifs_sb = CIFS_SB(inode->i_sb); 139 cifs_sb = CIFS_SB(inode->i_sb);
140 pTcon = cifs_sb->tcon; 140 pTcon = cifs_sb->tcon;
141 141
142 mutex_lock(&direntry->d_sb->s_vfs_rename_mutex);
143 full_path = build_path_from_dentry(direntry); 142 full_path = build_path_from_dentry(direntry);
144 mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex);
145 if(full_path == NULL) { 143 if(full_path == NULL) {
146 FreeXid(xid); 144 FreeXid(xid);
147 return -ENOMEM; 145 return -ENOMEM;
@@ -316,9 +314,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
316 cifs_sb = CIFS_SB(inode->i_sb); 314 cifs_sb = CIFS_SB(inode->i_sb);
317 pTcon = cifs_sb->tcon; 315 pTcon = cifs_sb->tcon;
318 316
319 mutex_lock(&direntry->d_sb->s_vfs_rename_mutex);
320 full_path = build_path_from_dentry(direntry); 317 full_path = build_path_from_dentry(direntry);
321 mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex);
322 if(full_path == NULL) 318 if(full_path == NULL)
323 rc = -ENOMEM; 319 rc = -ENOMEM;
324 else if (pTcon->ses->capabilities & CAP_UNIX) { 320 else if (pTcon->ses->capabilities & CAP_UNIX) {
@@ -440,6 +436,20 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct name
440 cifs_sb = CIFS_SB(parent_dir_inode->i_sb); 436 cifs_sb = CIFS_SB(parent_dir_inode->i_sb);
441 pTcon = cifs_sb->tcon; 437 pTcon = cifs_sb->tcon;
442 438
439 /*
440 * Don't allow the separator character in a path component.
441 * The VFS will not allow "/", but "\" is allowed by posix.
442 */
443 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) {
444 int i;
445 for (i = 0; i < direntry->d_name.len; i++)
446 if (direntry->d_name.name[i] == '\\') {
447 cFYI(1, ("Invalid file name"));
448 FreeXid(xid);
449 return ERR_PTR(-EINVAL);
450 }
451 }
452
443 /* can not grab the rename sem here since it would 453 /* can not grab the rename sem here since it would
444 deadlock in the cases (beginning of sys_rename itself) 454 deadlock in the cases (beginning of sys_rename itself)
445 in which we already have the sb rename sem */ 455 in which we already have the sb rename sem */
diff --git a/fs/cifs/fcntl.c b/fs/cifs/fcntl.c
index ec4dfe9bf5ef..633a93811328 100644
--- a/fs/cifs/fcntl.c
+++ b/fs/cifs/fcntl.c
@@ -86,9 +86,7 @@ int cifs_dir_notify(struct file * file, unsigned long arg)
86 cifs_sb = CIFS_SB(file->f_dentry->d_sb); 86 cifs_sb = CIFS_SB(file->f_dentry->d_sb);
87 pTcon = cifs_sb->tcon; 87 pTcon = cifs_sb->tcon;
88 88
89 mutex_lock(&file->f_dentry->d_sb->s_vfs_rename_mutex);
90 full_path = build_path_from_dentry(file->f_dentry); 89 full_path = build_path_from_dentry(file->f_dentry);
91 mutex_unlock(&file->f_dentry->d_sb->s_vfs_rename_mutex);
92 90
93 if(full_path == NULL) { 91 if(full_path == NULL) {
94 rc = -ENOMEM; 92 rc = -ENOMEM;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 5c497c529772..e2b4ce1dad66 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -84,6 +84,8 @@ static inline int cifs_get_disposition(unsigned int flags)
84 return FILE_OVERWRITE_IF; 84 return FILE_OVERWRITE_IF;
85 else if ((flags & O_CREAT) == O_CREAT) 85 else if ((flags & O_CREAT) == O_CREAT)
86 return FILE_OPEN_IF; 86 return FILE_OPEN_IF;
87 else if ((flags & O_TRUNC) == O_TRUNC)
88 return FILE_OVERWRITE;
87 else 89 else
88 return FILE_OPEN; 90 return FILE_OPEN;
89} 91}
@@ -203,9 +205,7 @@ int cifs_open(struct inode *inode, struct file *file)
203 } 205 }
204 } 206 }
205 207
206 mutex_lock(&inode->i_sb->s_vfs_rename_mutex);
207 full_path = build_path_from_dentry(file->f_dentry); 208 full_path = build_path_from_dentry(file->f_dentry);
208 mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);
209 if (full_path == NULL) { 209 if (full_path == NULL) {
210 FreeXid(xid); 210 FreeXid(xid);
211 return -ENOMEM; 211 return -ENOMEM;
@@ -658,7 +658,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
658 else 658 else
659 posix_lock_type = CIFS_WRLCK; 659 posix_lock_type = CIFS_WRLCK;
660 rc = CIFSSMBPosixLock(xid, pTcon, netfid, 1 /* get */, 660 rc = CIFSSMBPosixLock(xid, pTcon, netfid, 1 /* get */,
661 length, pfLock->fl_start, 661 length, pfLock,
662 posix_lock_type, wait_flag); 662 posix_lock_type, wait_flag);
663 FreeXid(xid); 663 FreeXid(xid);
664 return rc; 664 return rc;
@@ -706,7 +706,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
706 return -EOPNOTSUPP; 706 return -EOPNOTSUPP;
707 } 707 }
708 rc = CIFSSMBPosixLock(xid, pTcon, netfid, 0 /* set */, 708 rc = CIFSSMBPosixLock(xid, pTcon, netfid, 0 /* set */,
709 length, pfLock->fl_start, 709 length, pfLock,
710 posix_lock_type, wait_flag); 710 posix_lock_type, wait_flag);
711 } else 711 } else
712 rc = CIFSSMBLock(xid, pTcon, netfid, length, pfLock->fl_start, 712 rc = CIFSSMBLock(xid, pTcon, netfid, length, pfLock->fl_start,
@@ -906,9 +906,10 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
906 if (rc != 0) 906 if (rc != 0)
907 break; 907 break;
908 } 908 }
909 /* BB FIXME We can not sign across two buffers yet */ 909 if(experimEnabled || (pTcon->ses->server &&
910 if((pTcon->ses->server->secMode & 910 ((pTcon->ses->server->secMode &
911 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) == 0) { 911 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
912 == 0))) {
912 struct kvec iov[2]; 913 struct kvec iov[2];
913 unsigned int len; 914 unsigned int len;
914 915
@@ -923,13 +924,13 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
923 *poffset, &bytes_written, 924 *poffset, &bytes_written,
924 iov, 1, long_op); 925 iov, 1, long_op);
925 } else 926 } else
926 /* BB FIXME fixup indentation of line below */ 927 rc = CIFSSMBWrite(xid, pTcon,
927 rc = CIFSSMBWrite(xid, pTcon, 928 open_file->netfid,
928 open_file->netfid, 929 min_t(const int, cifs_sb->wsize,
929 min_t(const int, cifs_sb->wsize, 930 write_size - total_written),
930 write_size - total_written), 931 *poffset, &bytes_written,
931 *poffset, &bytes_written, 932 write_data + total_written,
932 write_data + total_written, NULL, long_op); 933 NULL, long_op);
933 } 934 }
934 if (rc || (bytes_written == 0)) { 935 if (rc || (bytes_written == 0)) {
935 if (total_written) 936 if (total_written)
@@ -968,6 +969,16 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode)
968 struct cifsFileInfo *open_file; 969 struct cifsFileInfo *open_file;
969 int rc; 970 int rc;
970 971
972 /* Having a null inode here (because mapping->host was set to zero by
973 the VFS or MM) should not happen but we had reports of on oops (due to
974 it being zero) during stress testcases so we need to check for it */
975
976 if(cifs_inode == NULL) {
977 cERROR(1,("Null inode passed to cifs_writeable_file"));
978 dump_stack();
979 return NULL;
980 }
981
971 read_lock(&GlobalSMBSeslock); 982 read_lock(&GlobalSMBSeslock);
972 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 983 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
973 if (open_file->closePend) 984 if (open_file->closePend)
@@ -1093,12 +1104,11 @@ static int cifs_writepages(struct address_space *mapping,
1093 if (cifs_sb->wsize < PAGE_CACHE_SIZE) 1104 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1094 return generic_writepages(mapping, wbc); 1105 return generic_writepages(mapping, wbc);
1095 1106
1096 /* BB FIXME we do not have code to sign across multiple buffers yet,
1097 so go to older writepage style write which we can sign if needed */
1098 if((cifs_sb->tcon->ses) && (cifs_sb->tcon->ses->server)) 1107 if((cifs_sb->tcon->ses) && (cifs_sb->tcon->ses->server))
1099 if(cifs_sb->tcon->ses->server->secMode & 1108 if(cifs_sb->tcon->ses->server->secMode &
1100 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) 1109 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
1101 return generic_writepages(mapping, wbc); 1110 if(!experimEnabled)
1111 return generic_writepages(mapping, wbc);
1102 1112
1103 /* 1113 /*
1104 * BB: Is this meaningful for a non-block-device file system? 1114 * BB: Is this meaningful for a non-block-device file system?
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 957ddd1571c6..4093764ef461 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -722,9 +722,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
722 cifs_sb = CIFS_SB(inode->i_sb); 722 cifs_sb = CIFS_SB(inode->i_sb);
723 pTcon = cifs_sb->tcon; 723 pTcon = cifs_sb->tcon;
724 724
725 mutex_lock(&inode->i_sb->s_vfs_rename_mutex);
726 full_path = build_path_from_dentry(direntry); 725 full_path = build_path_from_dentry(direntry);
727 mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);
728 if (full_path == NULL) { 726 if (full_path == NULL) {
729 FreeXid(xid); 727 FreeXid(xid);
730 return -ENOMEM; 728 return -ENOMEM;
@@ -807,9 +805,7 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
807 cifs_sb = CIFS_SB(inode->i_sb); 805 cifs_sb = CIFS_SB(inode->i_sb);
808 pTcon = cifs_sb->tcon; 806 pTcon = cifs_sb->tcon;
809 807
810 mutex_lock(&inode->i_sb->s_vfs_rename_mutex);
811 full_path = build_path_from_dentry(direntry); 808 full_path = build_path_from_dentry(direntry);
812 mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);
813 if (full_path == NULL) { 809 if (full_path == NULL) {
814 FreeXid(xid); 810 FreeXid(xid);
815 return -ENOMEM; 811 return -ENOMEM;
@@ -1141,9 +1137,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1141 rc = 0; 1137 rc = 0;
1142 } 1138 }
1143 1139
1144 mutex_lock(&direntry->d_sb->s_vfs_rename_mutex);
1145 full_path = build_path_from_dentry(direntry); 1140 full_path = build_path_from_dentry(direntry);
1146 mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex);
1147 if (full_path == NULL) { 1141 if (full_path == NULL) {
1148 FreeXid(xid); 1142 FreeXid(xid);
1149 return -ENOMEM; 1143 return -ENOMEM;
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 9562f5bba65c..2ec99f833142 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -48,10 +48,8 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode,
48/* No need to check for cross device links since server will do that 48/* No need to check for cross device links since server will do that
49 BB note DFS case in future though (when we may have to check) */ 49 BB note DFS case in future though (when we may have to check) */
50 50
51 mutex_lock(&inode->i_sb->s_vfs_rename_mutex);
52 fromName = build_path_from_dentry(old_file); 51 fromName = build_path_from_dentry(old_file);
53 toName = build_path_from_dentry(direntry); 52 toName = build_path_from_dentry(direntry);
54 mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);
55 if((fromName == NULL) || (toName == NULL)) { 53 if((fromName == NULL) || (toName == NULL)) {
56 rc = -ENOMEM; 54 rc = -ENOMEM;
57 goto cifs_hl_exit; 55 goto cifs_hl_exit;
@@ -103,9 +101,7 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
103 101
104 xid = GetXid(); 102 xid = GetXid();
105 103
106 mutex_lock(&direntry->d_sb->s_vfs_rename_mutex);
107 full_path = build_path_from_dentry(direntry); 104 full_path = build_path_from_dentry(direntry);
108 mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex);
109 105
110 if (!full_path) 106 if (!full_path)
111 goto out_no_free; 107 goto out_no_free;
@@ -164,9 +160,7 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
164 cifs_sb = CIFS_SB(inode->i_sb); 160 cifs_sb = CIFS_SB(inode->i_sb);
165 pTcon = cifs_sb->tcon; 161 pTcon = cifs_sb->tcon;
166 162
167 mutex_lock(&inode->i_sb->s_vfs_rename_mutex);
168 full_path = build_path_from_dentry(direntry); 163 full_path = build_path_from_dentry(direntry);
169 mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);
170 164
171 if(full_path == NULL) { 165 if(full_path == NULL) {
172 FreeXid(xid); 166 FreeXid(xid);
diff --git a/fs/cifs/ntlmssp.c b/fs/cifs/ntlmssp.c
index 78866f925747..115359cc7a32 100644
--- a/fs/cifs/ntlmssp.c
+++ b/fs/cifs/ntlmssp.c
@@ -121,6 +121,20 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, const int type,
121 } 121 }
122 122
123 123
124 /* copy session key */
125
126 /* if Unicode, align strings to two byte boundary */
127
128 /* copy user name */ /* BB Do we need to special case null user name? */
129
130 /* copy domain name */
131
132 /* copy Linux version */
133
134 /* copy network operating system name */
135
136 /* update bcc and smb buffer length */
137
124/* rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buf_type, 0); */ 138/* rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buf_type, 0); */
125 /* SMB request buf freed in SendReceive2 */ 139 /* SMB request buf freed in SendReceive2 */
126 140
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 2f6e2825571e..b689c5035124 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -404,9 +404,7 @@ static int initiate_cifs_search(const int xid, struct file *file)
404 if(pTcon == NULL) 404 if(pTcon == NULL)
405 return -EINVAL; 405 return -EINVAL;
406 406
407 mutex_lock(&file->f_dentry->d_sb->s_vfs_rename_mutex);
408 full_path = build_path_from_dentry(file->f_dentry); 407 full_path = build_path_from_dentry(file->f_dentry);
409 mutex_unlock(&file->f_dentry->d_sb->s_vfs_rename_mutex);
410 408
411 if(full_path == NULL) { 409 if(full_path == NULL) {
412 return -ENOMEM; 410 return -ENOMEM;
@@ -592,6 +590,13 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
592 first_entry_in_buffer = 590 first_entry_in_buffer =
593 cifsFile->srch_inf.index_of_last_entry - 591 cifsFile->srch_inf.index_of_last_entry -
594 cifsFile->srch_inf.entries_in_buffer; 592 cifsFile->srch_inf.entries_in_buffer;
593
594 /* if first entry in buf is zero then is first buffer
595 in search response data which means it is likely . and ..
596 will be in this buffer, although some servers do not return
597 . and .. for the root of a drive and for those we need
598 to start two entries earlier */
599
595/* dump_cifs_file_struct(file, "In fce ");*/ 600/* dump_cifs_file_struct(file, "In fce ");*/
596 if(((index_to_find < cifsFile->srch_inf.index_of_last_entry) && 601 if(((index_to_find < cifsFile->srch_inf.index_of_last_entry) &&
597 is_dir_changed(file)) || 602 is_dir_changed(file)) ||
@@ -634,23 +639,14 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
634 char * end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + 639 char * end_of_smb = cifsFile->srch_inf.ntwrk_buf_start +
635 smbCalcSize((struct smb_hdr *) 640 smbCalcSize((struct smb_hdr *)
636 cifsFile->srch_inf.ntwrk_buf_start); 641 cifsFile->srch_inf.ntwrk_buf_start);
642
643 current_entry = cifsFile->srch_inf.srch_entries_start;
637 first_entry_in_buffer = cifsFile->srch_inf.index_of_last_entry 644 first_entry_in_buffer = cifsFile->srch_inf.index_of_last_entry
638 - cifsFile->srch_inf.entries_in_buffer; 645 - cifsFile->srch_inf.entries_in_buffer;
639 pos_in_buf = index_to_find - first_entry_in_buffer; 646 pos_in_buf = index_to_find - first_entry_in_buffer;
640 cFYI(1,("found entry - pos_in_buf %d",pos_in_buf)); 647 cFYI(1,("found entry - pos_in_buf %d",pos_in_buf));
641 current_entry = cifsFile->srch_inf.srch_entries_start;
642 for(i=0;(i<(pos_in_buf)) && (current_entry != NULL);i++) { 648 for(i=0;(i<(pos_in_buf)) && (current_entry != NULL);i++) {
643 /* go entry by entry figuring out which is first */ 649 /* go entry by entry figuring out which is first */
644 /* if( . or ..)
645 skip */
646 rc = cifs_entry_is_dot(current_entry,cifsFile);
647 if(rc == 1) /* is . or .. so skip */ {
648 cFYI(1,("Entry is .")); /* BB removeme BB */
649 /* continue; */
650 } else if (rc == 2 ) {
651 cFYI(1,("Entry is ..")); /* BB removeme BB */
652 /* continue; */
653 }
654 current_entry = nxt_dir_entry(current_entry,end_of_smb); 650 current_entry = nxt_dir_entry(current_entry,end_of_smb);
655 } 651 }
656 if((current_entry == NULL) && (i < pos_in_buf)) { 652 if((current_entry == NULL) && (i < pos_in_buf)) {
@@ -770,6 +766,11 @@ static int cifs_filldir(char *pfindEntry, struct file *file,
770 if(file->f_dentry == NULL) 766 if(file->f_dentry == NULL)
771 return -ENOENT; 767 return -ENOENT;
772 768
769 rc = cifs_entry_is_dot(pfindEntry,pCifsF);
770 /* skip . and .. since we added them first */
771 if(rc != 0)
772 return 0;
773
773 cifs_sb = CIFS_SB(file->f_dentry->d_sb); 774 cifs_sb = CIFS_SB(file->f_dentry->d_sb);
774 775
775 qstring.name = scratch_buf; 776 qstring.name = scratch_buf;
@@ -898,22 +899,22 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
898 899
899 switch ((int) file->f_pos) { 900 switch ((int) file->f_pos) {
900 case 0: 901 case 0:
901 /*if (filldir(direntry, ".", 1, file->f_pos, 902 if (filldir(direntry, ".", 1, file->f_pos,
902 file->f_dentry->d_inode->i_ino, DT_DIR) < 0) { 903 file->f_dentry->d_inode->i_ino, DT_DIR) < 0) {
903 cERROR(1, ("Filldir for current dir failed ")); 904 cERROR(1, ("Filldir for current dir failed"));
904 rc = -ENOMEM; 905 rc = -ENOMEM;
905 break; 906 break;
906 } 907 }
907 file->f_pos++; */ 908 file->f_pos++;
908 case 1: 909 case 1:
909 /* if (filldir(direntry, "..", 2, file->f_pos, 910 if (filldir(direntry, "..", 2, file->f_pos,
910 file->f_dentry->d_parent->d_inode->i_ino, DT_DIR) < 0) { 911 file->f_dentry->d_parent->d_inode->i_ino, DT_DIR) < 0) {
911 cERROR(1, ("Filldir for parent dir failed ")); 912 cERROR(1, ("Filldir for parent dir failed "));
912 rc = -ENOMEM; 913 rc = -ENOMEM;
913 break; 914 break;
914 } 915 }
915 file->f_pos++; */ 916 file->f_pos++;
916 case 2: 917 default:
917 /* 1) If search is active, 918 /* 1) If search is active,
918 is in current search buffer? 919 is in current search buffer?
919 if it before then restart search 920 if it before then restart search
@@ -927,7 +928,6 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
927 return rc; 928 return rc;
928 } 929 }
929 } 930 }
930 default:
931 if(file->private_data == NULL) { 931 if(file->private_data == NULL) {
932 rc = -EINVAL; 932 rc = -EINVAL;
933 FreeXid(xid); 933 FreeXid(xid);
@@ -947,8 +947,6 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
947 kfree(cifsFile->search_resume_name); 947 kfree(cifsFile->search_resume_name);
948 cifsFile->search_resume_name = NULL; */ 948 cifsFile->search_resume_name = NULL; */
949 949
950 /* BB account for . and .. in f_pos as special case */
951
952 rc = find_cifs_entry(xid,pTcon, file, 950 rc = find_cifs_entry(xid,pTcon, file,
953 &current_entry,&num_to_fill); 951 &current_entry,&num_to_fill);
954 if(rc) { 952 if(rc) {
@@ -977,7 +975,8 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
977 num_to_fill, i)); 975 num_to_fill, i));
978 break; 976 break;
979 } 977 }
980 978 /* if buggy server returns . and .. late do
979 we want to check for that here? */
981 rc = cifs_filldir(current_entry, file, 980 rc = cifs_filldir(current_entry, file,
982 filldir, direntry,tmp_buf); 981 filldir, direntry,tmp_buf);
983 file->f_pos++; 982 file->f_pos++;
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 3938444d87b2..7754d641775e 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -62,9 +62,7 @@ int cifs_removexattr(struct dentry * direntry, const char * ea_name)
62 cifs_sb = CIFS_SB(sb); 62 cifs_sb = CIFS_SB(sb);
63 pTcon = cifs_sb->tcon; 63 pTcon = cifs_sb->tcon;
64 64
65 mutex_lock(&sb->s_vfs_rename_mutex);
66 full_path = build_path_from_dentry(direntry); 65 full_path = build_path_from_dentry(direntry);
67 mutex_unlock(&sb->s_vfs_rename_mutex);
68 if(full_path == NULL) { 66 if(full_path == NULL) {
69 FreeXid(xid); 67 FreeXid(xid);
70 return -ENOMEM; 68 return -ENOMEM;
@@ -116,9 +114,7 @@ int cifs_setxattr(struct dentry * direntry, const char * ea_name,
116 cifs_sb = CIFS_SB(sb); 114 cifs_sb = CIFS_SB(sb);
117 pTcon = cifs_sb->tcon; 115 pTcon = cifs_sb->tcon;
118 116
119 mutex_lock(&sb->s_vfs_rename_mutex);
120 full_path = build_path_from_dentry(direntry); 117 full_path = build_path_from_dentry(direntry);
121 mutex_unlock(&sb->s_vfs_rename_mutex);
122 if(full_path == NULL) { 118 if(full_path == NULL) {
123 FreeXid(xid); 119 FreeXid(xid);
124 return -ENOMEM; 120 return -ENOMEM;
@@ -223,9 +219,7 @@ ssize_t cifs_getxattr(struct dentry * direntry, const char * ea_name,
223 cifs_sb = CIFS_SB(sb); 219 cifs_sb = CIFS_SB(sb);
224 pTcon = cifs_sb->tcon; 220 pTcon = cifs_sb->tcon;
225 221
226 mutex_lock(&sb->s_vfs_rename_mutex);
227 full_path = build_path_from_dentry(direntry); 222 full_path = build_path_from_dentry(direntry);
228 mutex_unlock(&sb->s_vfs_rename_mutex);
229 if(full_path == NULL) { 223 if(full_path == NULL) {
230 FreeXid(xid); 224 FreeXid(xid);
231 return -ENOMEM; 225 return -ENOMEM;
@@ -341,9 +335,7 @@ ssize_t cifs_listxattr(struct dentry * direntry, char * data, size_t buf_size)
341 cifs_sb = CIFS_SB(sb); 335 cifs_sb = CIFS_SB(sb);
342 pTcon = cifs_sb->tcon; 336 pTcon = cifs_sb->tcon;
343 337
344 mutex_lock(&sb->s_vfs_rename_mutex);
345 full_path = build_path_from_dentry(direntry); 338 full_path = build_path_from_dentry(direntry);
346 mutex_unlock(&sb->s_vfs_rename_mutex);
347 if(full_path == NULL) { 339 if(full_path == NULL) {
348 FreeXid(xid); 340 FreeXid(xid);
349 return -ENOMEM; 341 return -ENOMEM;
diff --git a/fs/compat.c b/fs/compat.c
index 7f8e26ea427c..b1f64786a613 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1217,6 +1217,10 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
1217 if (ret < 0) 1217 if (ret < 0)
1218 goto out; 1218 goto out;
1219 1219
1220 ret = security_file_permission(file, type == READ ? MAY_READ:MAY_WRITE);
1221 if (ret)
1222 goto out;
1223
1220 fnv = NULL; 1224 fnv = NULL;
1221 if (type == READ) { 1225 if (type == READ) {
1222 fn = file->f_op->read; 1226 fn = file->f_op->read;
@@ -1313,6 +1317,26 @@ out:
1313 return ret; 1317 return ret;
1314} 1318}
1315 1319
1320asmlinkage long
1321compat_sys_vmsplice(int fd, const struct compat_iovec __user *iov32,
1322 unsigned int nr_segs, unsigned int flags)
1323{
1324 unsigned i;
1325 struct iovec *iov;
1326 if (nr_segs > UIO_MAXIOV)
1327 return -EINVAL;
1328 iov = compat_alloc_user_space(nr_segs * sizeof(struct iovec));
1329 for (i = 0; i < nr_segs; i++) {
1330 struct compat_iovec v;
1331 if (get_user(v.iov_base, &iov32[i].iov_base) ||
1332 get_user(v.iov_len, &iov32[i].iov_len) ||
1333 put_user(compat_ptr(v.iov_base), &iov[i].iov_base) ||
1334 put_user(v.iov_len, &iov[i].iov_len))
1335 return -EFAULT;
1336 }
1337 return sys_vmsplice(fd, iov, nr_segs, flags);
1338}
1339
1316/* 1340/*
1317 * Exactly like fs/open.c:sys_open(), except that it doesn't set the 1341 * Exactly like fs/open.c:sys_open(), except that it doesn't set the
1318 * O_LARGEFILE flag. 1342 * O_LARGEFILE flag.
@@ -1889,7 +1913,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
1889 } 1913 }
1890 1914
1891 if (sigmask) { 1915 if (sigmask) {
1892 if (sigsetsize |= sizeof(compat_sigset_t)) 1916 if (sigsetsize != sizeof(compat_sigset_t))
1893 return -EINVAL; 1917 return -EINVAL;
1894 if (copy_from_user(&ss32, sigmask, sizeof(ss32))) 1918 if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
1895 return -EFAULT; 1919 return -EFAULT;
@@ -2006,109 +2030,115 @@ union compat_nfsctl_res {
2006 struct knfsd_fh cr32_getfs; 2030 struct knfsd_fh cr32_getfs;
2007}; 2031};
2008 2032
2009static int compat_nfs_svc_trans(struct nfsctl_arg *karg, struct compat_nfsctl_arg __user *arg) 2033static int compat_nfs_svc_trans(struct nfsctl_arg *karg,
2034 struct compat_nfsctl_arg __user *arg)
2010{ 2035{
2011 int err; 2036 if (!access_ok(VERIFY_READ, &arg->ca32_svc, sizeof(arg->ca32_svc)) ||
2012 2037 get_user(karg->ca_version, &arg->ca32_version) ||
2013 err = access_ok(VERIFY_READ, &arg->ca32_svc, sizeof(arg->ca32_svc)); 2038 __get_user(karg->ca_svc.svc_port, &arg->ca32_svc.svc32_port) ||
2014 err |= get_user(karg->ca_version, &arg->ca32_version); 2039 __get_user(karg->ca_svc.svc_nthreads,
2015 err |= __get_user(karg->ca_svc.svc_port, &arg->ca32_svc.svc32_port); 2040 &arg->ca32_svc.svc32_nthreads))
2016 err |= __get_user(karg->ca_svc.svc_nthreads, &arg->ca32_svc.svc32_nthreads); 2041 return -EFAULT;
2017 return (err) ? -EFAULT : 0; 2042 return 0;
2018} 2043}
2019 2044
2020static int compat_nfs_clnt_trans(struct nfsctl_arg *karg, struct compat_nfsctl_arg __user *arg) 2045static int compat_nfs_clnt_trans(struct nfsctl_arg *karg,
2046 struct compat_nfsctl_arg __user *arg)
2021{ 2047{
2022 int err; 2048 if (!access_ok(VERIFY_READ, &arg->ca32_client,
2023 2049 sizeof(arg->ca32_client)) ||
2024 err = access_ok(VERIFY_READ, &arg->ca32_client, sizeof(arg->ca32_client)); 2050 get_user(karg->ca_version, &arg->ca32_version) ||
2025 err |= get_user(karg->ca_version, &arg->ca32_version); 2051 __copy_from_user(&karg->ca_client.cl_ident[0],
2026 err |= __copy_from_user(&karg->ca_client.cl_ident[0], 2052 &arg->ca32_client.cl32_ident[0],
2027 &arg->ca32_client.cl32_ident[0], 2053 NFSCLNT_IDMAX) ||
2028 NFSCLNT_IDMAX); 2054 __get_user(karg->ca_client.cl_naddr,
2029 err |= __get_user(karg->ca_client.cl_naddr, &arg->ca32_client.cl32_naddr); 2055 &arg->ca32_client.cl32_naddr) ||
2030 err |= __copy_from_user(&karg->ca_client.cl_addrlist[0], 2056 __copy_from_user(&karg->ca_client.cl_addrlist[0],
2031 &arg->ca32_client.cl32_addrlist[0], 2057 &arg->ca32_client.cl32_addrlist[0],
2032 (sizeof(struct in_addr) * NFSCLNT_ADDRMAX)); 2058 (sizeof(struct in_addr) * NFSCLNT_ADDRMAX)) ||
2033 err |= __get_user(karg->ca_client.cl_fhkeytype, 2059 __get_user(karg->ca_client.cl_fhkeytype,
2034 &arg->ca32_client.cl32_fhkeytype); 2060 &arg->ca32_client.cl32_fhkeytype) ||
2035 err |= __get_user(karg->ca_client.cl_fhkeylen, 2061 __get_user(karg->ca_client.cl_fhkeylen,
2036 &arg->ca32_client.cl32_fhkeylen); 2062 &arg->ca32_client.cl32_fhkeylen) ||
2037 err |= __copy_from_user(&karg->ca_client.cl_fhkey[0], 2063 __copy_from_user(&karg->ca_client.cl_fhkey[0],
2038 &arg->ca32_client.cl32_fhkey[0], 2064 &arg->ca32_client.cl32_fhkey[0],
2039 NFSCLNT_KEYMAX); 2065 NFSCLNT_KEYMAX))
2066 return -EFAULT;
2040 2067
2041 return (err) ? -EFAULT : 0; 2068 return 0;
2042} 2069}
2043 2070
2044static int compat_nfs_exp_trans(struct nfsctl_arg *karg, struct compat_nfsctl_arg __user *arg) 2071static int compat_nfs_exp_trans(struct nfsctl_arg *karg,
2072 struct compat_nfsctl_arg __user *arg)
2045{ 2073{
2046 int err; 2074 if (!access_ok(VERIFY_READ, &arg->ca32_export,
2047 2075 sizeof(arg->ca32_export)) ||
2048 err = access_ok(VERIFY_READ, &arg->ca32_export, sizeof(arg->ca32_export)); 2076 get_user(karg->ca_version, &arg->ca32_version) ||
2049 err |= get_user(karg->ca_version, &arg->ca32_version); 2077 __copy_from_user(&karg->ca_export.ex_client[0],
2050 err |= __copy_from_user(&karg->ca_export.ex_client[0], 2078 &arg->ca32_export.ex32_client[0],
2051 &arg->ca32_export.ex32_client[0], 2079 NFSCLNT_IDMAX) ||
2052 NFSCLNT_IDMAX); 2080 __copy_from_user(&karg->ca_export.ex_path[0],
2053 err |= __copy_from_user(&karg->ca_export.ex_path[0], 2081 &arg->ca32_export.ex32_path[0],
2054 &arg->ca32_export.ex32_path[0], 2082 NFS_MAXPATHLEN) ||
2055 NFS_MAXPATHLEN); 2083 __get_user(karg->ca_export.ex_dev,
2056 err |= __get_user(karg->ca_export.ex_dev, 2084 &arg->ca32_export.ex32_dev) ||
2057 &arg->ca32_export.ex32_dev); 2085 __get_user(karg->ca_export.ex_ino,
2058 err |= __get_user(karg->ca_export.ex_ino, 2086 &arg->ca32_export.ex32_ino) ||
2059 &arg->ca32_export.ex32_ino); 2087 __get_user(karg->ca_export.ex_flags,
2060 err |= __get_user(karg->ca_export.ex_flags, 2088 &arg->ca32_export.ex32_flags) ||
2061 &arg->ca32_export.ex32_flags); 2089 __get_user(karg->ca_export.ex_anon_uid,
2062 err |= __get_user(karg->ca_export.ex_anon_uid, 2090 &arg->ca32_export.ex32_anon_uid) ||
2063 &arg->ca32_export.ex32_anon_uid); 2091 __get_user(karg->ca_export.ex_anon_gid,
2064 err |= __get_user(karg->ca_export.ex_anon_gid, 2092 &arg->ca32_export.ex32_anon_gid))
2065 &arg->ca32_export.ex32_anon_gid); 2093 return -EFAULT;
2066 SET_UID(karg->ca_export.ex_anon_uid, karg->ca_export.ex_anon_uid); 2094 SET_UID(karg->ca_export.ex_anon_uid, karg->ca_export.ex_anon_uid);
2067 SET_GID(karg->ca_export.ex_anon_gid, karg->ca_export.ex_anon_gid); 2095 SET_GID(karg->ca_export.ex_anon_gid, karg->ca_export.ex_anon_gid);
2068 2096
2069 return (err) ? -EFAULT : 0; 2097 return 0;
2070} 2098}
2071 2099
2072static int compat_nfs_getfd_trans(struct nfsctl_arg *karg, struct compat_nfsctl_arg __user *arg) 2100static int compat_nfs_getfd_trans(struct nfsctl_arg *karg,
2101 struct compat_nfsctl_arg __user *arg)
2073{ 2102{
2074 int err; 2103 if (!access_ok(VERIFY_READ, &arg->ca32_getfd,
2075 2104 sizeof(arg->ca32_getfd)) ||
2076 err = access_ok(VERIFY_READ, &arg->ca32_getfd, sizeof(arg->ca32_getfd)); 2105 get_user(karg->ca_version, &arg->ca32_version) ||
2077 err |= get_user(karg->ca_version, &arg->ca32_version); 2106 __copy_from_user(&karg->ca_getfd.gd_addr,
2078 err |= __copy_from_user(&karg->ca_getfd.gd_addr, 2107 &arg->ca32_getfd.gd32_addr,
2079 &arg->ca32_getfd.gd32_addr, 2108 (sizeof(struct sockaddr))) ||
2080 (sizeof(struct sockaddr))); 2109 __copy_from_user(&karg->ca_getfd.gd_path,
2081 err |= __copy_from_user(&karg->ca_getfd.gd_path, 2110 &arg->ca32_getfd.gd32_path,
2082 &arg->ca32_getfd.gd32_path, 2111 (NFS_MAXPATHLEN+1)) ||
2083 (NFS_MAXPATHLEN+1)); 2112 __get_user(karg->ca_getfd.gd_version,
2084 err |= __get_user(karg->ca_getfd.gd_version, 2113 &arg->ca32_getfd.gd32_version))
2085 &arg->ca32_getfd.gd32_version); 2114 return -EFAULT;
2086 2115
2087 return (err) ? -EFAULT : 0; 2116 return 0;
2088} 2117}
2089 2118
2090static int compat_nfs_getfs_trans(struct nfsctl_arg *karg, struct compat_nfsctl_arg __user *arg) 2119static int compat_nfs_getfs_trans(struct nfsctl_arg *karg,
2120 struct compat_nfsctl_arg __user *arg)
2091{ 2121{
2092 int err; 2122 if (!access_ok(VERIFY_READ,&arg->ca32_getfs,sizeof(arg->ca32_getfs)) ||
2093 2123 get_user(karg->ca_version, &arg->ca32_version) ||
2094 err = access_ok(VERIFY_READ, &arg->ca32_getfs, sizeof(arg->ca32_getfs)); 2124 __copy_from_user(&karg->ca_getfs.gd_addr,
2095 err |= get_user(karg->ca_version, &arg->ca32_version); 2125 &arg->ca32_getfs.gd32_addr,
2096 err |= __copy_from_user(&karg->ca_getfs.gd_addr, 2126 (sizeof(struct sockaddr))) ||
2097 &arg->ca32_getfs.gd32_addr, 2127 __copy_from_user(&karg->ca_getfs.gd_path,
2098 (sizeof(struct sockaddr))); 2128 &arg->ca32_getfs.gd32_path,
2099 err |= __copy_from_user(&karg->ca_getfs.gd_path, 2129 (NFS_MAXPATHLEN+1)) ||
2100 &arg->ca32_getfs.gd32_path, 2130 __get_user(karg->ca_getfs.gd_maxlen,
2101 (NFS_MAXPATHLEN+1)); 2131 &arg->ca32_getfs.gd32_maxlen))
2102 err |= __get_user(karg->ca_getfs.gd_maxlen, 2132 return -EFAULT;
2103 &arg->ca32_getfs.gd32_maxlen);
2104 2133
2105 return (err) ? -EFAULT : 0; 2134 return 0;
2106} 2135}
2107 2136
2108/* This really doesn't need translations, we are only passing 2137/* This really doesn't need translations, we are only passing
2109 * back a union which contains opaque nfs file handle data. 2138 * back a union which contains opaque nfs file handle data.
2110 */ 2139 */
2111static int compat_nfs_getfh_res_trans(union nfsctl_res *kres, union compat_nfsctl_res __user *res) 2140static int compat_nfs_getfh_res_trans(union nfsctl_res *kres,
2141 union compat_nfsctl_res __user *res)
2112{ 2142{
2113 int err; 2143 int err;
2114 2144
@@ -2117,8 +2147,9 @@ static int compat_nfs_getfh_res_trans(union nfsctl_res *kres, union compat_nfsct
2117 return (err) ? -EFAULT : 0; 2147 return (err) ? -EFAULT : 0;
2118} 2148}
2119 2149
2120asmlinkage long compat_sys_nfsservctl(int cmd, struct compat_nfsctl_arg __user *arg, 2150asmlinkage long compat_sys_nfsservctl(int cmd,
2121 union compat_nfsctl_res __user *res) 2151 struct compat_nfsctl_arg __user *arg,
2152 union compat_nfsctl_res __user *res)
2122{ 2153{
2123 struct nfsctl_arg *karg; 2154 struct nfsctl_arg *karg;
2124 union nfsctl_res *kres; 2155 union nfsctl_res *kres;
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 5638c8f9362f..5f952187fc53 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -505,13 +505,15 @@ static int populate_groups(struct config_group *group)
505 int i; 505 int i;
506 506
507 if (group->default_groups) { 507 if (group->default_groups) {
508 /* FYI, we're faking mkdir here 508 /*
509 * FYI, we're faking mkdir here
509 * I'm not sure we need this semaphore, as we're called 510 * I'm not sure we need this semaphore, as we're called
510 * from our parent's mkdir. That holds our parent's 511 * from our parent's mkdir. That holds our parent's
511 * i_mutex, so afaik lookup cannot continue through our 512 * i_mutex, so afaik lookup cannot continue through our
512 * parent to find us, let alone mess with our tree. 513 * parent to find us, let alone mess with our tree.
513 * That said, taking our i_mutex is closer to mkdir 514 * That said, taking our i_mutex is closer to mkdir
514 * emulation, and shouldn't hurt. */ 515 * emulation, and shouldn't hurt.
516 */
515 mutex_lock(&dentry->d_inode->i_mutex); 517 mutex_lock(&dentry->d_inode->i_mutex);
516 518
517 for (i = 0; group->default_groups[i]; i++) { 519 for (i = 0; group->default_groups[i]; i++) {
@@ -546,20 +548,34 @@ static void unlink_obj(struct config_item *item)
546 548
547 item->ci_group = NULL; 549 item->ci_group = NULL;
548 item->ci_parent = NULL; 550 item->ci_parent = NULL;
551
552 /* Drop the reference for ci_entry */
549 config_item_put(item); 553 config_item_put(item);
550 554
555 /* Drop the reference for ci_parent */
551 config_group_put(group); 556 config_group_put(group);
552 } 557 }
553} 558}
554 559
555static void link_obj(struct config_item *parent_item, struct config_item *item) 560static void link_obj(struct config_item *parent_item, struct config_item *item)
556{ 561{
557 /* Parent seems redundant with group, but it makes certain 562 /*
558 * traversals much nicer. */ 563 * Parent seems redundant with group, but it makes certain
564 * traversals much nicer.
565 */
559 item->ci_parent = parent_item; 566 item->ci_parent = parent_item;
567
568 /*
569 * We hold a reference on the parent for the child's ci_parent
570 * link.
571 */
560 item->ci_group = config_group_get(to_config_group(parent_item)); 572 item->ci_group = config_group_get(to_config_group(parent_item));
561 list_add_tail(&item->ci_entry, &item->ci_group->cg_children); 573 list_add_tail(&item->ci_entry, &item->ci_group->cg_children);
562 574
575 /*
576 * We hold a reference on the child for ci_entry on the parent's
577 * cg_children
578 */
563 config_item_get(item); 579 config_item_get(item);
564} 580}
565 581
@@ -684,6 +700,10 @@ static void client_drop_item(struct config_item *parent_item,
684 type = parent_item->ci_type; 700 type = parent_item->ci_type;
685 BUG_ON(!type); 701 BUG_ON(!type);
686 702
703 /*
704 * If ->drop_item() exists, it is responsible for the
705 * config_item_put().
706 */
687 if (type->ct_group_ops && type->ct_group_ops->drop_item) 707 if (type->ct_group_ops && type->ct_group_ops->drop_item)
688 type->ct_group_ops->drop_item(to_config_group(parent_item), 708 type->ct_group_ops->drop_item(to_config_group(parent_item),
689 item); 709 item);
@@ -694,23 +714,28 @@ static void client_drop_item(struct config_item *parent_item,
694 714
695static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 715static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
696{ 716{
697 int ret; 717 int ret, module_got = 0;
698 struct config_group *group; 718 struct config_group *group;
699 struct config_item *item; 719 struct config_item *item;
700 struct config_item *parent_item; 720 struct config_item *parent_item;
701 struct configfs_subsystem *subsys; 721 struct configfs_subsystem *subsys;
702 struct configfs_dirent *sd; 722 struct configfs_dirent *sd;
703 struct config_item_type *type; 723 struct config_item_type *type;
704 struct module *owner; 724 struct module *owner = NULL;
705 char *name; 725 char *name;
706 726
707 if (dentry->d_parent == configfs_sb->s_root) 727 if (dentry->d_parent == configfs_sb->s_root) {
708 return -EPERM; 728 ret = -EPERM;
729 goto out;
730 }
709 731
710 sd = dentry->d_parent->d_fsdata; 732 sd = dentry->d_parent->d_fsdata;
711 if (!(sd->s_type & CONFIGFS_USET_DIR)) 733 if (!(sd->s_type & CONFIGFS_USET_DIR)) {
712 return -EPERM; 734 ret = -EPERM;
735 goto out;
736 }
713 737
738 /* Get a working ref for the duration of this function */
714 parent_item = configfs_get_config_item(dentry->d_parent); 739 parent_item = configfs_get_config_item(dentry->d_parent);
715 type = parent_item->ci_type; 740 type = parent_item->ci_type;
716 subsys = to_config_group(parent_item)->cg_subsys; 741 subsys = to_config_group(parent_item)->cg_subsys;
@@ -719,15 +744,16 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
719 if (!type || !type->ct_group_ops || 744 if (!type || !type->ct_group_ops ||
720 (!type->ct_group_ops->make_group && 745 (!type->ct_group_ops->make_group &&
721 !type->ct_group_ops->make_item)) { 746 !type->ct_group_ops->make_item)) {
722 config_item_put(parent_item); 747 ret = -EPERM; /* Lack-of-mkdir returns -EPERM */
723 return -EPERM; /* What lack-of-mkdir returns */ 748 goto out_put;
724 } 749 }
725 750
726 name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL); 751 name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL);
727 if (!name) { 752 if (!name) {
728 config_item_put(parent_item); 753 ret = -ENOMEM;
729 return -ENOMEM; 754 goto out_put;
730 } 755 }
756
731 snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name); 757 snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name);
732 758
733 down(&subsys->su_sem); 759 down(&subsys->su_sem);
@@ -748,40 +774,67 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
748 774
749 kfree(name); 775 kfree(name);
750 if (!item) { 776 if (!item) {
751 config_item_put(parent_item); 777 /*
752 return -ENOMEM; 778 * If item == NULL, then link_obj() was never called.
779 * There are no extra references to clean up.
780 */
781 ret = -ENOMEM;
782 goto out_put;
753 } 783 }
754 784
755 ret = -EINVAL; 785 /*
786 * link_obj() has been called (via link_group() for groups).
787 * From here on out, errors must clean that up.
788 */
789
756 type = item->ci_type; 790 type = item->ci_type;
757 if (type) { 791 if (!type) {
758 owner = type->ct_owner; 792 ret = -EINVAL;
759 if (try_module_get(owner)) { 793 goto out_unlink;
760 if (group) { 794 }
761 ret = configfs_attach_group(parent_item,
762 item,
763 dentry);
764 } else {
765 ret = configfs_attach_item(parent_item,
766 item,
767 dentry);
768 }
769 795
770 if (ret) { 796 owner = type->ct_owner;
771 down(&subsys->su_sem); 797 if (!try_module_get(owner)) {
772 if (group) 798 ret = -EINVAL;
773 unlink_group(group); 799 goto out_unlink;
774 else 800 }
775 unlink_obj(item);
776 client_drop_item(parent_item, item);
777 up(&subsys->su_sem);
778 801
779 config_item_put(parent_item); 802 /*
780 module_put(owner); 803 * I hate doing it this way, but if there is
781 } 804 * an error, module_put() probably should
782 } 805 * happen after any cleanup.
806 */
807 module_got = 1;
808
809 if (group)
810 ret = configfs_attach_group(parent_item, item, dentry);
811 else
812 ret = configfs_attach_item(parent_item, item, dentry);
813
814out_unlink:
815 if (ret) {
816 /* Tear down everything we built up */
817 down(&subsys->su_sem);
818 if (group)
819 unlink_group(group);
820 else
821 unlink_obj(item);
822 client_drop_item(parent_item, item);
823 up(&subsys->su_sem);
824
825 if (module_got)
826 module_put(owner);
783 } 827 }
784 828
829out_put:
830 /*
831 * link_obj()/link_group() took a reference from child->parent,
832 * so the parent is safely pinned. We can drop our working
833 * reference.
834 */
835 config_item_put(parent_item);
836
837out:
785 return ret; 838 return ret;
786} 839}
787 840
@@ -801,6 +854,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
801 if (sd->s_type & CONFIGFS_USET_DEFAULT) 854 if (sd->s_type & CONFIGFS_USET_DEFAULT)
802 return -EPERM; 855 return -EPERM;
803 856
857 /* Get a working ref until we have the child */
804 parent_item = configfs_get_config_item(dentry->d_parent); 858 parent_item = configfs_get_config_item(dentry->d_parent);
805 subsys = to_config_group(parent_item)->cg_subsys; 859 subsys = to_config_group(parent_item)->cg_subsys;
806 BUG_ON(!subsys); 860 BUG_ON(!subsys);
@@ -817,6 +871,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
817 return ret; 871 return ret;
818 } 872 }
819 873
874 /* Get a working ref for the duration of this function */
820 item = configfs_get_config_item(dentry); 875 item = configfs_get_config_item(dentry);
821 876
822 /* Drop reference from above, item already holds one. */ 877 /* Drop reference from above, item already holds one. */
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 85d166cdcae4..b55b4ea9a676 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -67,12 +67,13 @@ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t d
67static int debugfs_mknod(struct inode *dir, struct dentry *dentry, 67static int debugfs_mknod(struct inode *dir, struct dentry *dentry,
68 int mode, dev_t dev) 68 int mode, dev_t dev)
69{ 69{
70 struct inode *inode = debugfs_get_inode(dir->i_sb, mode, dev); 70 struct inode *inode;
71 int error = -EPERM; 71 int error = -EPERM;
72 72
73 if (dentry->d_inode) 73 if (dentry->d_inode)
74 return -EEXIST; 74 return -EEXIST;
75 75
76 inode = debugfs_get_inode(dir->i_sb, mode, dev);
76 if (inode) { 77 if (inode) {
77 d_instantiate(dentry, inode); 78 d_instantiate(dentry, inode);
78 dget(dentry); 79 dget(dentry);
diff --git a/fs/exec.c b/fs/exec.c
index 3234a0c32d54..3a79d97ac234 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -665,9 +665,7 @@ static int de_thread(struct task_struct *tsk)
665 * and to assume its PID: 665 * and to assume its PID:
666 */ 666 */
667 if (!thread_group_leader(current)) { 667 if (!thread_group_leader(current)) {
668 struct task_struct *parent;
669 struct dentry *proc_dentry1, *proc_dentry2; 668 struct dentry *proc_dentry1, *proc_dentry2;
670 unsigned long ptrace;
671 669
672 /* 670 /*
673 * Wait for the thread group leader to be a zombie. 671 * Wait for the thread group leader to be a zombie.
@@ -704,22 +702,6 @@ static int de_thread(struct task_struct *tsk)
704 * two threads with a switched PID, and release 702 * two threads with a switched PID, and release
705 * the former thread group leader: 703 * the former thread group leader:
706 */ 704 */
707 ptrace = leader->ptrace;
708 parent = leader->parent;
709 if (unlikely(ptrace) && unlikely(parent == current)) {
710 /*
711 * Joker was ptracing his own group leader,
712 * and now he wants to be his own parent!
713 * We can't have that.
714 */
715 ptrace = 0;
716 }
717
718 ptrace_unlink(current);
719 ptrace_unlink(leader);
720 remove_parent(current);
721 remove_parent(leader);
722
723 705
724 /* Become a process group leader with the old leader's pid. 706 /* Become a process group leader with the old leader's pid.
725 * Note: The old leader also uses thispid until release_task 707 * Note: The old leader also uses thispid until release_task
@@ -730,10 +712,8 @@ static int de_thread(struct task_struct *tsk)
730 attach_pid(current, PIDTYPE_PID, current->pid); 712 attach_pid(current, PIDTYPE_PID, current->pid);
731 attach_pid(current, PIDTYPE_PGID, current->signal->pgrp); 713 attach_pid(current, PIDTYPE_PGID, current->signal->pgrp);
732 attach_pid(current, PIDTYPE_SID, current->signal->session); 714 attach_pid(current, PIDTYPE_SID, current->signal->session);
733 list_add_tail(&current->tasks, &init_task.tasks); 715 list_add_tail_rcu(&current->tasks, &init_task.tasks);
734 716
735 current->parent = current->real_parent = leader->real_parent;
736 leader->parent = leader->real_parent = child_reaper;
737 current->group_leader = current; 717 current->group_leader = current;
738 leader->group_leader = current; 718 leader->group_leader = current;
739 719
@@ -742,13 +722,6 @@ static int de_thread(struct task_struct *tsk)
742 detach_pid(leader, PIDTYPE_SID); 722 detach_pid(leader, PIDTYPE_SID);
743 list_del_init(&leader->tasks); 723 list_del_init(&leader->tasks);
744 724
745 add_parent(current);
746 add_parent(leader);
747 if (ptrace) {
748 current->ptrace = ptrace;
749 __ptrace_link(current, parent);
750 }
751
752 current->exit_signal = SIGCHLD; 725 current->exit_signal = SIGCHLD;
753 726
754 BUG_ON(leader->exit_state != EXIT_ZOMBIE); 727 BUG_ON(leader->exit_state != EXIT_ZOMBIE);
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index b06b54f1bbbb..4c39009350f3 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -102,7 +102,7 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent,
102 if (acceptable(context, result)) 102 if (acceptable(context, result))
103 return result; 103 return result;
104 if (S_ISDIR(result->d_inode->i_mode)) { 104 if (S_ISDIR(result->d_inode->i_mode)) {
105 /* there is no other dentry, so fail */ 105 err = -EACCES;
106 goto err_result; 106 goto err_result;
107 } 107 }
108 108
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 48ae0339af17..2edd7eec88fd 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -711,7 +711,7 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode,
711 * direct blocks blocks 711 * direct blocks blocks
712 */ 712 */
713 if (num == 0 && blks > 1) { 713 if (num == 0 && blks > 1) {
714 current_block = le32_to_cpu(where->key + 1); 714 current_block = le32_to_cpu(where->key) + 1;
715 for (i = 1; i < blks; i++) 715 for (i = 1; i < blks; i++)
716 *(where->p + i ) = cpu_to_le32(current_block++); 716 *(where->p + i ) = cpu_to_le32(current_block++);
717 } 717 }
@@ -724,7 +724,7 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode,
724 if (block_i) { 724 if (block_i) {
725 block_i->last_alloc_logical_block = block + blks - 1; 725 block_i->last_alloc_logical_block = block + blks - 1;
726 block_i->last_alloc_physical_block = 726 block_i->last_alloc_physical_block =
727 le32_to_cpu(where[num].key + blks - 1); 727 le32_to_cpu(where[num].key) + blks - 1;
728 } 728 }
729 729
730 /* We are done with atomic stuff, now do the rest of housekeeping */ 730 /* We are done with atomic stuff, now do the rest of housekeeping */
@@ -814,11 +814,13 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
814 814
815 /* Simplest case - block found, no allocation needed */ 815 /* Simplest case - block found, no allocation needed */
816 if (!partial) { 816 if (!partial) {
817 first_block = chain[depth - 1].key; 817 first_block = le32_to_cpu(chain[depth - 1].key);
818 clear_buffer_new(bh_result); 818 clear_buffer_new(bh_result);
819 count++; 819 count++;
820 /*map more blocks*/ 820 /*map more blocks*/
821 while (count < maxblocks && count <= blocks_to_boundary) { 821 while (count < maxblocks && count <= blocks_to_boundary) {
822 unsigned long blk;
823
822 if (!verify_chain(chain, partial)) { 824 if (!verify_chain(chain, partial)) {
823 /* 825 /*
824 * Indirect block might be removed by 826 * Indirect block might be removed by
@@ -831,8 +833,9 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
831 count = 0; 833 count = 0;
832 break; 834 break;
833 } 835 }
834 if (le32_to_cpu(*(chain[depth-1].p+count) == 836 blk = le32_to_cpu(*(chain[depth-1].p + count));
835 (first_block + count))) 837
838 if (blk == first_block + count)
836 count++; 839 count++;
837 else 840 else
838 break; 841 break;
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index aaf1da17b6d4..8c22aa9a7fbb 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -48,6 +48,7 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
48 if (!S_ISDIR(inode->i_mode)) 48 if (!S_ISDIR(inode->i_mode))
49 flags &= ~EXT3_DIRSYNC_FL; 49 flags &= ~EXT3_DIRSYNC_FL;
50 50
51 mutex_lock(&inode->i_mutex);
51 oldflags = ei->i_flags; 52 oldflags = ei->i_flags;
52 53
53 /* The JOURNAL_DATA flag is modifiable only by root */ 54 /* The JOURNAL_DATA flag is modifiable only by root */
@@ -60,8 +61,10 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
60 * This test looks nicer. Thanks to Pauline Middelink 61 * This test looks nicer. Thanks to Pauline Middelink
61 */ 62 */
62 if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) { 63 if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) {
63 if (!capable(CAP_LINUX_IMMUTABLE)) 64 if (!capable(CAP_LINUX_IMMUTABLE)) {
65 mutex_unlock(&inode->i_mutex);
64 return -EPERM; 66 return -EPERM;
67 }
65 } 68 }
66 69
67 /* 70 /*
@@ -69,14 +72,18 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
69 * the relevant capability. 72 * the relevant capability.
70 */ 73 */
71 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) { 74 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) {
72 if (!capable(CAP_SYS_RESOURCE)) 75 if (!capable(CAP_SYS_RESOURCE)) {
76 mutex_unlock(&inode->i_mutex);
73 return -EPERM; 77 return -EPERM;
78 }
74 } 79 }
75 80
76 81
77 handle = ext3_journal_start(inode, 1); 82 handle = ext3_journal_start(inode, 1);
78 if (IS_ERR(handle)) 83 if (IS_ERR(handle)) {
84 mutex_unlock(&inode->i_mutex);
79 return PTR_ERR(handle); 85 return PTR_ERR(handle);
86 }
80 if (IS_SYNC(inode)) 87 if (IS_SYNC(inode))
81 handle->h_sync = 1; 88 handle->h_sync = 1;
82 err = ext3_reserve_inode_write(handle, inode, &iloc); 89 err = ext3_reserve_inode_write(handle, inode, &iloc);
@@ -93,11 +100,14 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
93 err = ext3_mark_iloc_dirty(handle, inode, &iloc); 100 err = ext3_mark_iloc_dirty(handle, inode, &iloc);
94flags_err: 101flags_err:
95 ext3_journal_stop(handle); 102 ext3_journal_stop(handle);
96 if (err) 103 if (err) {
104 mutex_unlock(&inode->i_mutex);
97 return err; 105 return err;
106 }
98 107
99 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) 108 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL))
100 err = ext3_change_inode_journal_flag(inode, jflag); 109 err = ext3_change_inode_journal_flag(inode, jflag);
110 mutex_unlock(&inode->i_mutex);
101 return err; 111 return err;
102 } 112 }
103 case EXT3_IOC_GETVERSION: 113 case EXT3_IOC_GETVERSION:
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 14f5f6ea3e72..34b39e9a1e5a 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -213,7 +213,7 @@ static int setup_new_group_blocks(struct super_block *sb,
213 goto exit_bh; 213 goto exit_bh;
214 } 214 }
215 lock_buffer(bh); 215 lock_buffer(bh);
216 memcpy(gdb->b_data, sbi->s_group_desc[i], bh->b_size); 216 memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, bh->b_size);
217 set_buffer_uptodate(gdb); 217 set_buffer_uptodate(gdb);
218 unlock_buffer(bh); 218 unlock_buffer(bh);
219 ext3_journal_dirty_metadata(handle, gdb); 219 ext3_journal_dirty_metadata(handle, gdb);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 6c740f860665..104a62dadb94 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -92,40 +92,52 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc)
92{ 92{
93 struct fuse_req *req; 93 struct fuse_req *req;
94 sigset_t oldset; 94 sigset_t oldset;
95 int intr;
95 int err; 96 int err;
96 97
98 atomic_inc(&fc->num_waiting);
97 block_sigs(&oldset); 99 block_sigs(&oldset);
98 err = wait_event_interruptible(fc->blocked_waitq, !fc->blocked); 100 intr = wait_event_interruptible(fc->blocked_waitq, !fc->blocked);
99 restore_sigs(&oldset); 101 restore_sigs(&oldset);
100 if (err) 102 err = -EINTR;
101 return ERR_PTR(-EINTR); 103 if (intr)
104 goto out;
102 105
103 req = fuse_request_alloc(); 106 req = fuse_request_alloc();
107 err = -ENOMEM;
104 if (!req) 108 if (!req)
105 return ERR_PTR(-ENOMEM); 109 goto out;
106 110
107 atomic_inc(&fc->num_waiting);
108 fuse_request_init(req);
109 req->in.h.uid = current->fsuid; 111 req->in.h.uid = current->fsuid;
110 req->in.h.gid = current->fsgid; 112 req->in.h.gid = current->fsgid;
111 req->in.h.pid = current->pid; 113 req->in.h.pid = current->pid;
114 req->waiting = 1;
112 return req; 115 return req;
116
117 out:
118 atomic_dec(&fc->num_waiting);
119 return ERR_PTR(err);
113} 120}
114 121
115void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) 122void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
116{ 123{
117 if (atomic_dec_and_test(&req->count)) { 124 if (atomic_dec_and_test(&req->count)) {
118 atomic_dec(&fc->num_waiting); 125 if (req->waiting)
126 atomic_dec(&fc->num_waiting);
119 fuse_request_free(req); 127 fuse_request_free(req);
120 } 128 }
121} 129}
122 130
131/*
132 * Called with sbput_sem held for read (request_end) or write
133 * (fuse_put_super). By the time fuse_put_super() is finished, all
134 * inodes belonging to background requests must be released, so the
135 * iputs have to be done within the locked region.
136 */
123void fuse_release_background(struct fuse_conn *fc, struct fuse_req *req) 137void fuse_release_background(struct fuse_conn *fc, struct fuse_req *req)
124{ 138{
125 iput(req->inode); 139 iput(req->inode);
126 iput(req->inode2); 140 iput(req->inode2);
127 if (req->file)
128 fput(req->file);
129 spin_lock(&fc->lock); 141 spin_lock(&fc->lock);
130 list_del(&req->bg_entry); 142 list_del(&req->bg_entry);
131 if (fc->num_background == FUSE_MAX_BACKGROUND) { 143 if (fc->num_background == FUSE_MAX_BACKGROUND) {
@@ -170,6 +182,11 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
170 if (fc->mounted) 182 if (fc->mounted)
171 fuse_release_background(fc, req); 183 fuse_release_background(fc, req);
172 up_read(&fc->sbput_sem); 184 up_read(&fc->sbput_sem);
185
186 /* fput must go outside sbput_sem, otherwise it can deadlock */
187 if (req->file)
188 fput(req->file);
189
173 if (end) 190 if (end)
174 end(fc, req); 191 end(fc, req);
175 else 192 else
@@ -277,6 +294,10 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
277 len_args(req->in.numargs, (struct fuse_arg *) req->in.args); 294 len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
278 list_add_tail(&req->list, &fc->pending); 295 list_add_tail(&req->list, &fc->pending);
279 req->state = FUSE_REQ_PENDING; 296 req->state = FUSE_REQ_PENDING;
297 if (!req->waiting) {
298 req->waiting = 1;
299 atomic_inc(&fc->num_waiting);
300 }
280 wake_up(&fc->waitq); 301 wake_up(&fc->waitq);
281 kill_fasync(&fc->fasync, SIGIO, POLL_IN); 302 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
282} 303}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index e4f041a11bb5..fc342cf7c2cc 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1,6 +1,6 @@
1/* 1/*
2 FUSE: Filesystem in Userspace 2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu> 3 Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu>
4 4
5 This program can be distributed under the terms of the GNU GPL. 5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING. 6 See the file COPYING.
@@ -565,8 +565,12 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
565 buf += nres; 565 buf += nres;
566 if (nres != nbytes) 566 if (nres != nbytes)
567 break; 567 break;
568 if (count) 568 if (count) {
569 fuse_reset_request(req); 569 fuse_put_request(fc, req);
570 req = fuse_get_req(fc);
571 if (IS_ERR(req))
572 break;
573 }
570 } 574 }
571 fuse_put_request(fc, req); 575 fuse_put_request(fc, req);
572 if (res > 0) { 576 if (res > 0) {
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 19c7185a7546..0474202cb5dc 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -159,6 +159,9 @@ struct fuse_req {
159 /** Data is being copied to/from the request */ 159 /** Data is being copied to/from the request */
160 unsigned locked:1; 160 unsigned locked:1;
161 161
162 /** Request is counted as "waiting" */
163 unsigned waiting:1;
164
162 /** State of the request */ 165 /** State of the request */
163 enum fuse_req_state state; 166 enum fuse_req_state state;
164 167
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index fd34037b0588..7627022446b2 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -500,11 +500,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
500 if (file->f_op != &fuse_dev_operations) 500 if (file->f_op != &fuse_dev_operations)
501 return -EINVAL; 501 return -EINVAL;
502 502
503 /* Setting file->private_data can't race with other mount()
504 instances, since BKL is held for ->get_sb() */
505 if (file->private_data)
506 return -EINVAL;
507
508 fc = new_conn(); 503 fc = new_conn();
509 if (!fc) 504 if (!fc)
510 return -ENOMEM; 505 return -ENOMEM;
@@ -540,6 +535,12 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
540 if (err) 535 if (err)
541 goto err_free_req; 536 goto err_free_req;
542 537
538 /* Setting file->private_data can't race with other mount()
539 instances, since BKL is held for ->get_sb() */
540 err = -EINVAL;
541 if (file->private_data)
542 goto err_kobject_del;
543
543 sb->s_root = root_dentry; 544 sb->s_root = root_dentry;
544 fc->mounted = 1; 545 fc->mounted = 1;
545 fc->connected = 1; 546 fc->connected = 1;
@@ -556,6 +557,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
556 557
557 return 0; 558 return 0;
558 559
560 err_kobject_del:
561 kobject_del(&fc->kobj);
559 err_free_req: 562 err_free_req:
560 fuse_request_free(init_req); 563 fuse_request_free(init_req);
561 err_put_root: 564 err_put_root:
diff --git a/fs/inotify.c b/fs/inotify.c
index 1f50302849c5..732ec4bd5774 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -848,7 +848,11 @@ static int inotify_release(struct inode *ignored, struct file *file)
848 inode = watch->inode; 848 inode = watch->inode;
849 mutex_lock(&inode->inotify_mutex); 849 mutex_lock(&inode->inotify_mutex);
850 mutex_lock(&dev->mutex); 850 mutex_lock(&dev->mutex);
851 remove_watch_no_event(watch, dev); 851
852 /* make sure we didn't race with another list removal */
853 if (likely(idr_find(&dev->idr, watch->wd)))
854 remove_watch_no_event(watch, dev);
855
852 mutex_unlock(&dev->mutex); 856 mutex_unlock(&dev->mutex);
853 mutex_unlock(&inode->inotify_mutex); 857 mutex_unlock(&inode->inotify_mutex);
854 put_inotify_watch(watch); 858 put_inotify_watch(watch);
@@ -890,8 +894,7 @@ static int inotify_ignore(struct inotify_device *dev, s32 wd)
890 mutex_lock(&dev->mutex); 894 mutex_lock(&dev->mutex);
891 895
892 /* make sure that we did not race */ 896 /* make sure that we did not race */
893 watch = idr_find(&dev->idr, wd); 897 if (likely(idr_find(&dev->idr, wd) == watch))
894 if (likely(watch))
895 remove_watch(watch, dev); 898 remove_watch(watch, dev);
896 899
897 mutex_unlock(&dev->mutex); 900 mutex_unlock(&dev->mutex);
diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c
index d4d0c41490cd..1d46677afd17 100644
--- a/fs/jffs2/nodelist.c
+++ b/fs/jffs2/nodelist.c
@@ -438,7 +438,8 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info
438 if (c->mtd->point) { 438 if (c->mtd->point) {
439 err = c->mtd->point(c->mtd, ofs, len, &retlen, &buffer); 439 err = c->mtd->point(c->mtd, ofs, len, &retlen, &buffer);
440 if (!err && retlen < tn->csize) { 440 if (!err && retlen < tn->csize) {
441 JFFS2_WARNING("MTD point returned len too short: %u instead of %u.\n", retlen, tn->csize); 441 JFFS2_WARNING("MTD point returned len too short: %zu "
442 "instead of %u.\n", retlen, tn->csize);
442 c->mtd->unpoint(c->mtd, buffer, ofs, len); 443 c->mtd->unpoint(c->mtd, buffer, ofs, len);
443 } else if (err) 444 } else if (err)
444 JFFS2_WARNING("MTD point failed: error code %d.\n", err); 445 JFFS2_WARNING("MTD point failed: error code %d.\n", err);
@@ -461,7 +462,8 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info
461 } 462 }
462 463
463 if (retlen != len) { 464 if (retlen != len) {
464 JFFS2_ERROR("short read at %#08x: %d instead of %d.\n", ofs, retlen, len); 465 JFFS2_ERROR("short read at %#08x: %zd instead of %d.\n",
466 ofs, retlen, len);
465 err = -EIO; 467 err = -EIO;
466 goto free_out; 468 goto free_out;
467 } 469 }
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index f28696f235c4..2b220dd6b4e7 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -542,7 +542,7 @@ add_failed:
542static int metapage_releasepage(struct page *page, gfp_t gfp_mask) 542static int metapage_releasepage(struct page *page, gfp_t gfp_mask)
543{ 543{
544 struct metapage *mp; 544 struct metapage *mp;
545 int busy = 0; 545 int ret = 1;
546 unsigned int offset; 546 unsigned int offset;
547 547
548 for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) { 548 for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
@@ -552,30 +552,20 @@ static int metapage_releasepage(struct page *page, gfp_t gfp_mask)
552 continue; 552 continue;
553 553
554 jfs_info("metapage_releasepage: mp = 0x%p", mp); 554 jfs_info("metapage_releasepage: mp = 0x%p", mp);
555 if (mp->count || mp->nohomeok) { 555 if (mp->count || mp->nohomeok ||
556 test_bit(META_dirty, &mp->flag)) {
556 jfs_info("count = %ld, nohomeok = %d", mp->count, 557 jfs_info("count = %ld, nohomeok = %d", mp->count,
557 mp->nohomeok); 558 mp->nohomeok);
558 busy = 1; 559 ret = 0;
559 continue; 560 continue;
560 } 561 }
561 wait_on_page_writeback(page);
562 //WARN_ON(test_bit(META_dirty, &mp->flag));
563 if (test_bit(META_dirty, &mp->flag)) {
564 dump_mem("dirty mp in metapage_releasepage", mp,
565 sizeof(struct metapage));
566 dump_mem("page", page, sizeof(struct page));
567 dump_stack();
568 }
569 if (mp->lsn) 562 if (mp->lsn)
570 remove_from_logsync(mp); 563 remove_from_logsync(mp);
571 remove_metapage(page, mp); 564 remove_metapage(page, mp);
572 INCREMENT(mpStat.pagefree); 565 INCREMENT(mpStat.pagefree);
573 free_metapage(mp); 566 free_metapage(mp);
574 } 567 }
575 if (busy) 568 return ret;
576 return -1;
577
578 return 0;
579} 569}
580 570
581static void metapage_invalidatepage(struct page *page, unsigned long offset) 571static void metapage_invalidatepage(struct page *page, unsigned long offset)
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index d2b66bad7d50..3ef739120dff 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -650,7 +650,7 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
650 svc_wake_up(block->b_daemon); 650 svc_wake_up(block->b_daemon);
651} 651}
652 652
653void nlmsvc_grant_release(void *data) 653static void nlmsvc_grant_release(void *data)
654{ 654{
655 struct nlm_rqst *call = data; 655 struct nlm_rqst *call = data;
656 656
diff --git a/fs/locks.c b/fs/locks.c
index dda83d6cd48b..6f99c0a6f836 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -446,15 +446,14 @@ static struct lock_manager_operations lease_manager_ops = {
446 */ 446 */
447static int lease_init(struct file *filp, int type, struct file_lock *fl) 447static int lease_init(struct file *filp, int type, struct file_lock *fl)
448 { 448 {
449 if (assign_type(fl, type) != 0)
450 return -EINVAL;
451
449 fl->fl_owner = current->files; 452 fl->fl_owner = current->files;
450 fl->fl_pid = current->tgid; 453 fl->fl_pid = current->tgid;
451 454
452 fl->fl_file = filp; 455 fl->fl_file = filp;
453 fl->fl_flags = FL_LEASE; 456 fl->fl_flags = FL_LEASE;
454 if (assign_type(fl, type) != 0) {
455 locks_free_lock(fl);
456 return -EINVAL;
457 }
458 fl->fl_start = 0; 457 fl->fl_start = 0;
459 fl->fl_end = OFFSET_MAX; 458 fl->fl_end = OFFSET_MAX;
460 fl->fl_ops = NULL; 459 fl->fl_ops = NULL;
@@ -466,16 +465,19 @@ static int lease_init(struct file *filp, int type, struct file_lock *fl)
466static int lease_alloc(struct file *filp, int type, struct file_lock **flp) 465static int lease_alloc(struct file *filp, int type, struct file_lock **flp)
467{ 466{
468 struct file_lock *fl = locks_alloc_lock(); 467 struct file_lock *fl = locks_alloc_lock();
469 int error; 468 int error = -ENOMEM;
470 469
471 if (fl == NULL) 470 if (fl == NULL)
472 return -ENOMEM; 471 goto out;
473 472
474 error = lease_init(filp, type, fl); 473 error = lease_init(filp, type, fl);
475 if (error) 474 if (error) {
476 return error; 475 locks_free_lock(fl);
476 fl = NULL;
477 }
478out:
477 *flp = fl; 479 *flp = fl;
478 return 0; 480 return error;
479} 481}
480 482
481/* Check if two locks overlap each other. 483/* Check if two locks overlap each other.
@@ -1372,6 +1374,7 @@ static int __setlease(struct file *filp, long arg, struct file_lock **flp)
1372 goto out; 1374 goto out;
1373 1375
1374 if (my_before != NULL) { 1376 if (my_before != NULL) {
1377 *flp = *my_before;
1375 error = lease->fl_lmops->fl_change(my_before, arg); 1378 error = lease->fl_lmops->fl_change(my_before, arg);
1376 goto out; 1379 goto out;
1377 } 1380 }
@@ -2230,7 +2233,12 @@ void steal_locks(fl_owner_t from)
2230 2233
2231 lock_kernel(); 2234 lock_kernel();
2232 j = 0; 2235 j = 0;
2233 rcu_read_lock(); 2236
2237 /*
2238 * We are not taking a ref to the file structures, so
2239 * we need to acquire ->file_lock.
2240 */
2241 spin_lock(&files->file_lock);
2234 fdt = files_fdtable(files); 2242 fdt = files_fdtable(files);
2235 for (;;) { 2243 for (;;) {
2236 unsigned long set; 2244 unsigned long set;
@@ -2248,7 +2256,7 @@ void steal_locks(fl_owner_t from)
2248 set >>= 1; 2256 set >>= 1;
2249 } 2257 }
2250 } 2258 }
2251 rcu_read_unlock(); 2259 spin_unlock(&files->file_lock);
2252 unlock_kernel(); 2260 unlock_kernel();
2253} 2261}
2254EXPORT_SYMBOL(steal_locks); 2262EXPORT_SYMBOL(steal_locks);
diff --git a/fs/namei.c b/fs/namei.c
index 96723ae83c89..d6e2ee251736 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1080,8 +1080,8 @@ static int fastcall do_path_lookup(int dfd, const char *name,
1080 nd->flags = flags; 1080 nd->flags = flags;
1081 nd->depth = 0; 1081 nd->depth = 0;
1082 1082
1083 read_lock(&current->fs->lock);
1084 if (*name=='/') { 1083 if (*name=='/') {
1084 read_lock(&current->fs->lock);
1085 if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) { 1085 if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
1086 nd->mnt = mntget(current->fs->altrootmnt); 1086 nd->mnt = mntget(current->fs->altrootmnt);
1087 nd->dentry = dget(current->fs->altroot); 1087 nd->dentry = dget(current->fs->altroot);
@@ -1092,33 +1092,35 @@ static int fastcall do_path_lookup(int dfd, const char *name,
1092 } 1092 }
1093 nd->mnt = mntget(current->fs->rootmnt); 1093 nd->mnt = mntget(current->fs->rootmnt);
1094 nd->dentry = dget(current->fs->root); 1094 nd->dentry = dget(current->fs->root);
1095 read_unlock(&current->fs->lock);
1095 } else if (dfd == AT_FDCWD) { 1096 } else if (dfd == AT_FDCWD) {
1097 read_lock(&current->fs->lock);
1096 nd->mnt = mntget(current->fs->pwdmnt); 1098 nd->mnt = mntget(current->fs->pwdmnt);
1097 nd->dentry = dget(current->fs->pwd); 1099 nd->dentry = dget(current->fs->pwd);
1100 read_unlock(&current->fs->lock);
1098 } else { 1101 } else {
1099 struct dentry *dentry; 1102 struct dentry *dentry;
1100 1103
1101 file = fget_light(dfd, &fput_needed); 1104 file = fget_light(dfd, &fput_needed);
1102 retval = -EBADF; 1105 retval = -EBADF;
1103 if (!file) 1106 if (!file)
1104 goto unlock_fail; 1107 goto out_fail;
1105 1108
1106 dentry = file->f_dentry; 1109 dentry = file->f_dentry;
1107 1110
1108 retval = -ENOTDIR; 1111 retval = -ENOTDIR;
1109 if (!S_ISDIR(dentry->d_inode->i_mode)) 1112 if (!S_ISDIR(dentry->d_inode->i_mode))
1110 goto fput_unlock_fail; 1113 goto fput_fail;
1111 1114
1112 retval = file_permission(file, MAY_EXEC); 1115 retval = file_permission(file, MAY_EXEC);
1113 if (retval) 1116 if (retval)
1114 goto fput_unlock_fail; 1117 goto fput_fail;
1115 1118
1116 nd->mnt = mntget(file->f_vfsmnt); 1119 nd->mnt = mntget(file->f_vfsmnt);
1117 nd->dentry = dget(dentry); 1120 nd->dentry = dget(dentry);
1118 1121
1119 fput_light(file, fput_needed); 1122 fput_light(file, fput_needed);
1120 } 1123 }
1121 read_unlock(&current->fs->lock);
1122 current->total_link_count = 0; 1124 current->total_link_count = 0;
1123 retval = link_path_walk(name, nd); 1125 retval = link_path_walk(name, nd);
1124out: 1126out:
@@ -1127,13 +1129,12 @@ out:
1127 nd->dentry->d_inode)) 1129 nd->dentry->d_inode))
1128 audit_inode(name, nd->dentry->d_inode, flags); 1130 audit_inode(name, nd->dentry->d_inode, flags);
1129 } 1131 }
1132out_fail:
1130 return retval; 1133 return retval;
1131 1134
1132fput_unlock_fail: 1135fput_fail:
1133 fput_light(file, fput_needed); 1136 fput_light(file, fput_needed);
1134unlock_fail: 1137 goto out_fail;
1135 read_unlock(&current->fs->lock);
1136 return retval;
1137} 1138}
1138 1139
1139int fastcall path_lookup(const char *name, unsigned int flags, 1140int fastcall path_lookup(const char *name, unsigned int flags,
diff --git a/fs/namespace.c b/fs/namespace.c
index 2c5f1f80bdc2..bf478addb852 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -899,13 +899,11 @@ static int do_change_type(struct nameidata *nd, int flag)
899/* 899/*
900 * do loopback mount. 900 * do loopback mount.
901 */ 901 */
902static int do_loopback(struct nameidata *nd, char *old_name, unsigned long flags, int mnt_flags) 902static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
903{ 903{
904 struct nameidata old_nd; 904 struct nameidata old_nd;
905 struct vfsmount *mnt = NULL; 905 struct vfsmount *mnt = NULL;
906 int recurse = flags & MS_REC;
907 int err = mount_is_safe(nd); 906 int err = mount_is_safe(nd);
908
909 if (err) 907 if (err)
910 return err; 908 return err;
911 if (!old_name || !*old_name) 909 if (!old_name || !*old_name)
@@ -939,7 +937,6 @@ static int do_loopback(struct nameidata *nd, char *old_name, unsigned long flags
939 spin_unlock(&vfsmount_lock); 937 spin_unlock(&vfsmount_lock);
940 release_mounts(&umount_list); 938 release_mounts(&umount_list);
941 } 939 }
942 mnt->mnt_flags = mnt_flags;
943 940
944out: 941out:
945 up_write(&namespace_sem); 942 up_write(&namespace_sem);
@@ -1353,7 +1350,7 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
1353 retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags, 1350 retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
1354 data_page); 1351 data_page);
1355 else if (flags & MS_BIND) 1352 else if (flags & MS_BIND)
1356 retval = do_loopback(&nd, dev_name, flags, mnt_flags); 1353 retval = do_loopback(&nd, dev_name, flags & MS_REC);
1357 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) 1354 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
1358 retval = do_change_type(&nd, flags); 1355 retval = do_change_type(&nd, flags);
1359 else if (flags & MS_MOVE) 1356 else if (flags & MS_MOVE)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index a23f34894167..cae74dd4c7f5 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -128,15 +128,14 @@ struct inode_operations nfs4_dir_inode_operations = {
128static int 128static int
129nfs_opendir(struct inode *inode, struct file *filp) 129nfs_opendir(struct inode *inode, struct file *filp)
130{ 130{
131 int res = 0; 131 int res;
132 132
133 dfprintk(VFS, "NFS: opendir(%s/%ld)\n", 133 dfprintk(VFS, "NFS: opendir(%s/%ld)\n",
134 inode->i_sb->s_id, inode->i_ino); 134 inode->i_sb->s_id, inode->i_ino);
135 135
136 lock_kernel(); 136 lock_kernel();
137 /* Call generic open code in order to cache credentials */ 137 /* Call generic open code in order to cache credentials */
138 if (!res) 138 res = nfs_open(inode, filp);
139 res = nfs_open(inode, filp);
140 unlock_kernel(); 139 unlock_kernel();
141 return res; 140 return res;
142} 141}
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 0f583cb16ddb..3c72b0c07283 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -112,10 +112,9 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
112 */ 112 */
113ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs) 113ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
114{ 114{
115 struct dentry *dentry = iocb->ki_filp->f_dentry;
116
117 dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n", 115 dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n",
118 dentry->d_name.name, (long long) pos, nr_segs); 116 iocb->ki_filp->f_dentry->d_name.name,
117 (long long) pos, nr_segs);
119 118
120 return -EINVAL; 119 return -EINVAL;
121} 120}
@@ -468,7 +467,6 @@ static const struct rpc_call_ops nfs_commit_direct_ops = {
468static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) 467static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
469{ 468{
470 struct nfs_write_data *data = dreq->commit_data; 469 struct nfs_write_data *data = dreq->commit_data;
471 struct rpc_task *task = &data->task;
472 470
473 data->inode = dreq->inode; 471 data->inode = dreq->inode;
474 data->cred = dreq->ctx->cred; 472 data->cred = dreq->ctx->cred;
@@ -489,7 +487,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
489 /* Note: task.tk_ops->rpc_release will free dreq->commit_data */ 487 /* Note: task.tk_ops->rpc_release will free dreq->commit_data */
490 dreq->commit_data = NULL; 488 dreq->commit_data = NULL;
491 489
492 dprintk("NFS: %5u initiated commit call\n", task->tk_pid); 490 dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
493 491
494 lock_kernel(); 492 lock_kernel();
495 rpc_execute(&data->task); 493 rpc_execute(&data->task);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index f1df2c8d9259..fade02c15e6e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -534,10 +534,9 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
534 */ 534 */
535static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) 535static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
536{ 536{
537 struct inode * inode = filp->f_mapping->host;
538
539 dprintk("NFS: nfs_flock(f=%s/%ld, t=%x, fl=%x)\n", 537 dprintk("NFS: nfs_flock(f=%s/%ld, t=%x, fl=%x)\n",
540 inode->i_sb->s_id, inode->i_ino, 538 filp->f_dentry->d_inode->i_sb->s_id,
539 filp->f_dentry->d_inode->i_ino,
541 fl->fl_type, fl->fl_flags); 540 fl->fl_type, fl->fl_flags);
542 541
543 /* 542 /*
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 2f7656b911b6..d0b991a92327 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -700,12 +700,9 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
700 /* 700 /*
701 * Display superblock I/O counters 701 * Display superblock I/O counters
702 */ 702 */
703 for (cpu = 0; cpu < NR_CPUS; cpu++) { 703 for_each_possible_cpu(cpu) {
704 struct nfs_iostats *stats; 704 struct nfs_iostats *stats;
705 705
706 if (!cpu_possible(cpu))
707 continue;
708
709 preempt_disable(); 706 preempt_disable();
710 stats = per_cpu_ptr(nfss->io_stats, cpu); 707 stats = per_cpu_ptr(nfss->io_stats, cpu);
711 708
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 47ece1dd3c67..d86c0db7b1e8 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1218,7 +1218,7 @@ out:
1218 return status; 1218 return status;
1219} 1219}
1220 1220
1221static void nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state) 1221static int nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state)
1222{ 1222{
1223 struct file *filp; 1223 struct file *filp;
1224 1224
@@ -1227,8 +1227,10 @@ static void nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, st
1227 struct nfs_open_context *ctx; 1227 struct nfs_open_context *ctx;
1228 ctx = (struct nfs_open_context *)filp->private_data; 1228 ctx = (struct nfs_open_context *)filp->private_data;
1229 ctx->state = state; 1229 ctx->state = state;
1230 } else 1230 return 0;
1231 nfs4_close_state(state, nd->intent.open.flags); 1231 }
1232 nfs4_close_state(state, nd->intent.open.flags);
1233 return PTR_ERR(filp);
1232} 1234}
1233 1235
1234struct dentry * 1236struct dentry *
@@ -1835,7 +1837,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
1835 nfs_setattr_update_inode(state->inode, sattr); 1837 nfs_setattr_update_inode(state->inode, sattr);
1836 } 1838 }
1837 if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN)) 1839 if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN))
1838 nfs4_intent_set_file(nd, dentry, state); 1840 status = nfs4_intent_set_file(nd, dentry, state);
1839 else 1841 else
1840 nfs4_close_state(state, flags); 1842 nfs4_close_state(state, flags);
1841out: 1843out:
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 4e0578121d9a..3eec30000f3f 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1066,9 +1066,11 @@ exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp,
1066 rv = nfserr_perm; 1066 rv = nfserr_perm;
1067 else if (IS_ERR(exp)) 1067 else if (IS_ERR(exp))
1068 rv = nfserrno(PTR_ERR(exp)); 1068 rv = nfserrno(PTR_ERR(exp));
1069 else 1069 else {
1070 rv = fh_compose(fhp, exp, 1070 rv = fh_compose(fhp, exp,
1071 fsid_key->ek_dentry, NULL); 1071 fsid_key->ek_dentry, NULL);
1072 exp_put(exp);
1073 }
1072 cache_put(&fsid_key->h, &svc_expkey_cache); 1074 cache_put(&fsid_key->h, &svc_expkey_cache);
1073 return rv; 1075 return rv;
1074} 1076}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 6aa92d0e6876..1d65f13f458c 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1922,11 +1922,10 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
1922 value = kmalloc(size, GFP_KERNEL); 1922 value = kmalloc(size, GFP_KERNEL);
1923 if (!value) 1923 if (!value)
1924 return -ENOMEM; 1924 return -ENOMEM;
1925 size = posix_acl_to_xattr(acl, value, size); 1925 error = posix_acl_to_xattr(acl, value, size);
1926 if (size < 0) { 1926 if (error < 0)
1927 error = size;
1928 goto getout; 1927 goto getout;
1929 } 1928 size = error;
1930 } else 1929 } else
1931 size = 0; 1930 size = 0;
1932 1931
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 0d858d0b25be..47152bf9a7f2 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -276,13 +276,29 @@ static int ocfs2_writepage(struct page *page, struct writeback_control *wbc)
276 return ret; 276 return ret;
277} 277}
278 278
279/* This can also be called from ocfs2_write_zero_page() which has done
280 * it's own cluster locking. */
281int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page,
282 unsigned from, unsigned to)
283{
284 int ret;
285
286 down_read(&OCFS2_I(inode)->ip_alloc_sem);
287
288 ret = block_prepare_write(page, from, to, ocfs2_get_block);
289
290 up_read(&OCFS2_I(inode)->ip_alloc_sem);
291
292 return ret;
293}
294
279/* 295/*
280 * ocfs2_prepare_write() can be an outer-most ocfs2 call when it is called 296 * ocfs2_prepare_write() can be an outer-most ocfs2 call when it is called
281 * from loopback. It must be able to perform its own locking around 297 * from loopback. It must be able to perform its own locking around
282 * ocfs2_get_block(). 298 * ocfs2_get_block().
283 */ 299 */
284int ocfs2_prepare_write(struct file *file, struct page *page, 300static int ocfs2_prepare_write(struct file *file, struct page *page,
285 unsigned from, unsigned to) 301 unsigned from, unsigned to)
286{ 302{
287 struct inode *inode = page->mapping->host; 303 struct inode *inode = page->mapping->host;
288 int ret; 304 int ret;
@@ -295,11 +311,7 @@ int ocfs2_prepare_write(struct file *file, struct page *page,
295 goto out; 311 goto out;
296 } 312 }
297 313
298 down_read(&OCFS2_I(inode)->ip_alloc_sem); 314 ret = ocfs2_prepare_write_nolock(inode, page, from, to);
299
300 ret = block_prepare_write(page, from, to, ocfs2_get_block);
301
302 up_read(&OCFS2_I(inode)->ip_alloc_sem);
303 315
304 ocfs2_meta_unlock(inode, 0); 316 ocfs2_meta_unlock(inode, 0);
305out: 317out:
@@ -625,11 +637,31 @@ static ssize_t ocfs2_direct_IO(int rw,
625 int ret; 637 int ret;
626 638
627 mlog_entry_void(); 639 mlog_entry_void();
640
641 /*
642 * We get PR data locks even for O_DIRECT. This allows
643 * concurrent O_DIRECT I/O but doesn't let O_DIRECT with
644 * extending and buffered zeroing writes race. If they did
645 * race then the buffered zeroing could be written back after
646 * the O_DIRECT I/O. It's one thing to tell people not to mix
647 * buffered and O_DIRECT writes, but expecting them to
648 * understand that file extension is also an implicit buffered
649 * write is too much. By getting the PR we force writeback of
650 * the buffered zeroing before proceeding.
651 */
652 ret = ocfs2_data_lock(inode, 0);
653 if (ret < 0) {
654 mlog_errno(ret);
655 goto out;
656 }
657 ocfs2_data_unlock(inode, 0);
658
628 ret = blockdev_direct_IO_no_locking(rw, iocb, inode, 659 ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
629 inode->i_sb->s_bdev, iov, offset, 660 inode->i_sb->s_bdev, iov, offset,
630 nr_segs, 661 nr_segs,
631 ocfs2_direct_IO_get_blocks, 662 ocfs2_direct_IO_get_blocks,
632 ocfs2_dio_end_io); 663 ocfs2_dio_end_io);
664out:
633 mlog_exit(ret); 665 mlog_exit(ret);
634 return ret; 666 return ret;
635} 667}
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index d40456d509a0..e88c3f0b8fa9 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -22,8 +22,8 @@
22#ifndef OCFS2_AOPS_H 22#ifndef OCFS2_AOPS_H
23#define OCFS2_AOPS_H 23#define OCFS2_AOPS_H
24 24
25int ocfs2_prepare_write(struct file *file, struct page *page, 25int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page,
26 unsigned from, unsigned to); 26 unsigned from, unsigned to);
27 27
28struct ocfs2_journal_handle *ocfs2_start_walk_page_trans(struct inode *inode, 28struct ocfs2_journal_handle *ocfs2_start_walk_page_trans(struct inode *inode,
29 struct page *page, 29 struct page *page,
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 4601fc256f11..1a5c69071df6 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -569,7 +569,7 @@ static int ocfs2_extent_map_insert(struct inode *inode,
569 569
570 ret = -ENOMEM; 570 ret = -ENOMEM;
571 ctxt.new_ent = kmem_cache_alloc(ocfs2_em_ent_cachep, 571 ctxt.new_ent = kmem_cache_alloc(ocfs2_em_ent_cachep,
572 GFP_KERNEL); 572 GFP_NOFS);
573 if (!ctxt.new_ent) { 573 if (!ctxt.new_ent) {
574 mlog_errno(ret); 574 mlog_errno(ret);
575 return ret; 575 return ret;
@@ -583,14 +583,14 @@ static int ocfs2_extent_map_insert(struct inode *inode,
583 if (ctxt.need_left && !ctxt.left_ent) { 583 if (ctxt.need_left && !ctxt.left_ent) {
584 ctxt.left_ent = 584 ctxt.left_ent =
585 kmem_cache_alloc(ocfs2_em_ent_cachep, 585 kmem_cache_alloc(ocfs2_em_ent_cachep,
586 GFP_KERNEL); 586 GFP_NOFS);
587 if (!ctxt.left_ent) 587 if (!ctxt.left_ent)
588 break; 588 break;
589 } 589 }
590 if (ctxt.need_right && !ctxt.right_ent) { 590 if (ctxt.need_right && !ctxt.right_ent) {
591 ctxt.right_ent = 591 ctxt.right_ent =
592 kmem_cache_alloc(ocfs2_em_ent_cachep, 592 kmem_cache_alloc(ocfs2_em_ent_cachep,
593 GFP_KERNEL); 593 GFP_NOFS);
594 if (!ctxt.right_ent) 594 if (!ctxt.right_ent)
595 break; 595 break;
596 } 596 }
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 581eb451a41a..a9559c874530 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -613,7 +613,8 @@ leave:
613 613
614/* Some parts of this taken from generic_cont_expand, which turned out 614/* Some parts of this taken from generic_cont_expand, which turned out
615 * to be too fragile to do exactly what we need without us having to 615 * to be too fragile to do exactly what we need without us having to
616 * worry about recursive locking in ->commit_write(). */ 616 * worry about recursive locking in ->prepare_write() and
617 * ->commit_write(). */
617static int ocfs2_write_zero_page(struct inode *inode, 618static int ocfs2_write_zero_page(struct inode *inode,
618 u64 size) 619 u64 size)
619{ 620{
@@ -641,7 +642,7 @@ static int ocfs2_write_zero_page(struct inode *inode,
641 goto out; 642 goto out;
642 } 643 }
643 644
644 ret = ocfs2_prepare_write(NULL, page, offset, offset); 645 ret = ocfs2_prepare_write_nolock(inode, page, offset, offset);
645 if (ret < 0) { 646 if (ret < 0) {
646 mlog_errno(ret); 647 mlog_errno(ret);
647 goto out_unlock; 648 goto out_unlock;
@@ -695,13 +696,26 @@ out:
695 return ret; 696 return ret;
696} 697}
697 698
699/*
700 * A tail_to_skip value > 0 indicates that we're being called from
701 * ocfs2_file_aio_write(). This has the following implications:
702 *
703 * - we don't want to update i_size
704 * - di_bh will be NULL, which is fine because it's only used in the
705 * case where we want to update i_size.
706 * - ocfs2_zero_extend() will then only be filling the hole created
707 * between i_size and the start of the write.
708 */
698static int ocfs2_extend_file(struct inode *inode, 709static int ocfs2_extend_file(struct inode *inode,
699 struct buffer_head *di_bh, 710 struct buffer_head *di_bh,
700 u64 new_i_size) 711 u64 new_i_size,
712 size_t tail_to_skip)
701{ 713{
702 int ret = 0; 714 int ret = 0;
703 u32 clusters_to_add; 715 u32 clusters_to_add;
704 716
717 BUG_ON(!tail_to_skip && !di_bh);
718
705 /* setattr sometimes calls us like this. */ 719 /* setattr sometimes calls us like this. */
706 if (new_i_size == 0) 720 if (new_i_size == 0)
707 goto out; 721 goto out;
@@ -714,27 +728,44 @@ static int ocfs2_extend_file(struct inode *inode,
714 OCFS2_I(inode)->ip_clusters; 728 OCFS2_I(inode)->ip_clusters;
715 729
716 if (clusters_to_add) { 730 if (clusters_to_add) {
717 ret = ocfs2_extend_allocation(inode, clusters_to_add); 731 /*
732 * protect the pages that ocfs2_zero_extend is going to
733 * be pulling into the page cache.. we do this before the
734 * metadata extend so that we don't get into the situation
735 * where we've extended the metadata but can't get the data
736 * lock to zero.
737 */
738 ret = ocfs2_data_lock(inode, 1);
718 if (ret < 0) { 739 if (ret < 0) {
719 mlog_errno(ret); 740 mlog_errno(ret);
720 goto out; 741 goto out;
721 } 742 }
722 743
723 ret = ocfs2_zero_extend(inode, new_i_size); 744 ret = ocfs2_extend_allocation(inode, clusters_to_add);
724 if (ret < 0) { 745 if (ret < 0) {
725 mlog_errno(ret); 746 mlog_errno(ret);
726 goto out; 747 goto out_unlock;
727 } 748 }
728 }
729 749
730 /* No allocation required, we just use this helper to 750 ret = ocfs2_zero_extend(inode, (u64)new_i_size - tail_to_skip);
731 * do a trivial update of i_size. */ 751 if (ret < 0) {
732 ret = ocfs2_simple_size_update(inode, di_bh, new_i_size); 752 mlog_errno(ret);
733 if (ret < 0) { 753 goto out_unlock;
734 mlog_errno(ret); 754 }
735 goto out; 755 }
756
757 if (!tail_to_skip) {
758 /* We're being called from ocfs2_setattr() which wants
759 * us to update i_size */
760 ret = ocfs2_simple_size_update(inode, di_bh, new_i_size);
761 if (ret < 0)
762 mlog_errno(ret);
736 } 763 }
737 764
765out_unlock:
766 if (clusters_to_add) /* this is the only case in which we lock */
767 ocfs2_data_unlock(inode, 1);
768
738out: 769out:
739 return ret; 770 return ret;
740} 771}
@@ -793,7 +824,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
793 if (i_size_read(inode) > attr->ia_size) 824 if (i_size_read(inode) > attr->ia_size)
794 status = ocfs2_truncate_file(inode, bh, attr->ia_size); 825 status = ocfs2_truncate_file(inode, bh, attr->ia_size);
795 else 826 else
796 status = ocfs2_extend_file(inode, bh, attr->ia_size); 827 status = ocfs2_extend_file(inode, bh, attr->ia_size, 0);
797 if (status < 0) { 828 if (status < 0) {
798 if (status != -ENOSPC) 829 if (status != -ENOSPC)
799 mlog_errno(status); 830 mlog_errno(status);
@@ -1049,21 +1080,12 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
1049 if (!clusters) 1080 if (!clusters)
1050 break; 1081 break;
1051 1082
1052 ret = ocfs2_extend_allocation(inode, clusters); 1083 ret = ocfs2_extend_file(inode, NULL, newsize, count);
1053 if (ret < 0) { 1084 if (ret < 0) {
1054 if (ret != -ENOSPC) 1085 if (ret != -ENOSPC)
1055 mlog_errno(ret); 1086 mlog_errno(ret);
1056 goto out; 1087 goto out;
1057 } 1088 }
1058
1059 /* Fill any holes which would've been created by this
1060 * write. If we're O_APPEND, this will wind up
1061 * (correctly) being a noop. */
1062 ret = ocfs2_zero_extend(inode, (u64) newsize - count);
1063 if (ret < 0) {
1064 mlog_errno(ret);
1065 goto out;
1066 }
1067 break; 1089 break;
1068 } 1090 }
1069 1091
@@ -1146,6 +1168,22 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
1146 ocfs2_iocb_set_rw_locked(iocb); 1168 ocfs2_iocb_set_rw_locked(iocb);
1147 } 1169 }
1148 1170
1171 /*
1172 * We're fine letting folks race truncates and extending
1173 * writes with read across the cluster, just like they can
1174 * locally. Hence no rw_lock during read.
1175 *
1176 * Take and drop the meta data lock to update inode fields
1177 * like i_size. This allows the checks down below
1178 * generic_file_aio_read() a chance of actually working.
1179 */
1180 ret = ocfs2_meta_lock(inode, NULL, NULL, 0);
1181 if (ret < 0) {
1182 mlog_errno(ret);
1183 goto bail;
1184 }
1185 ocfs2_meta_unlock(inode, 0);
1186
1149 ret = generic_file_aio_read(iocb, buf, count, iocb->ki_pos); 1187 ret = generic_file_aio_read(iocb, buf, count, iocb->ki_pos);
1150 if (ret == -EINVAL) 1188 if (ret == -EINVAL)
1151 mlog(ML_ERROR, "generic_file_aio_read returned -EINVAL\n"); 1189 mlog(ML_ERROR, "generic_file_aio_read returned -EINVAL\n");
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 6a610ae53583..eebc3cfa6be8 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -117,7 +117,7 @@ struct ocfs2_journal_handle *ocfs2_alloc_handle(struct ocfs2_super *osb)
117{ 117{
118 struct ocfs2_journal_handle *retval = NULL; 118 struct ocfs2_journal_handle *retval = NULL;
119 119
120 retval = kcalloc(1, sizeof(*retval), GFP_KERNEL); 120 retval = kcalloc(1, sizeof(*retval), GFP_NOFS);
121 if (!retval) { 121 if (!retval) {
122 mlog(ML_ERROR, "Failed to allocate memory for journal " 122 mlog(ML_ERROR, "Failed to allocate memory for journal "
123 "handle!\n"); 123 "handle!\n");
@@ -870,9 +870,11 @@ static int ocfs2_force_read_journal(struct inode *inode)
870 if (p_blocks > CONCURRENT_JOURNAL_FILL) 870 if (p_blocks > CONCURRENT_JOURNAL_FILL)
871 p_blocks = CONCURRENT_JOURNAL_FILL; 871 p_blocks = CONCURRENT_JOURNAL_FILL;
872 872
873 /* We are reading journal data which should not
874 * be put in the uptodate cache */
873 status = ocfs2_read_blocks(OCFS2_SB(inode->i_sb), 875 status = ocfs2_read_blocks(OCFS2_SB(inode->i_sb),
874 p_blkno, p_blocks, bhs, 0, 876 p_blkno, p_blocks, bhs, 0,
875 inode); 877 NULL);
876 if (status < 0) { 878 if (status < 0) {
877 mlog_errno(status); 879 mlog_errno(status);
878 goto bail; 880 goto bail;
@@ -982,7 +984,7 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
982{ 984{
983 struct ocfs2_la_recovery_item *item; 985 struct ocfs2_la_recovery_item *item;
984 986
985 item = kmalloc(sizeof(struct ocfs2_la_recovery_item), GFP_KERNEL); 987 item = kmalloc(sizeof(struct ocfs2_la_recovery_item), GFP_NOFS);
986 if (!item) { 988 if (!item) {
987 /* Though we wish to avoid it, we are in fact safe in 989 /* Though we wish to avoid it, we are in fact safe in
988 * skipping local alloc cleanup as fsck.ocfs2 is more 990 * skipping local alloc cleanup as fsck.ocfs2 is more
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index 04a684dfdd96..b8a00a793326 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -337,7 +337,7 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi,
337 (unsigned long long)oi->ip_blkno, 337 (unsigned long long)oi->ip_blkno,
338 (unsigned long long)block, expand_tree); 338 (unsigned long long)block, expand_tree);
339 339
340 new = kmem_cache_alloc(ocfs2_uptodate_cachep, GFP_KERNEL); 340 new = kmem_cache_alloc(ocfs2_uptodate_cachep, GFP_NOFS);
341 if (!new) { 341 if (!new) {
342 mlog_errno(-ENOMEM); 342 mlog_errno(-ENOMEM);
343 return; 343 return;
@@ -349,7 +349,7 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi,
349 * has no way of tracking that. */ 349 * has no way of tracking that. */
350 for(i = 0; i < OCFS2_INODE_MAX_CACHE_ARRAY; i++) { 350 for(i = 0; i < OCFS2_INODE_MAX_CACHE_ARRAY; i++) {
351 tree[i] = kmem_cache_alloc(ocfs2_uptodate_cachep, 351 tree[i] = kmem_cache_alloc(ocfs2_uptodate_cachep,
352 GFP_KERNEL); 352 GFP_NOFS);
353 if (!tree[i]) { 353 if (!tree[i]) {
354 mlog_errno(-ENOMEM); 354 mlog_errno(-ENOMEM);
355 goto out_free; 355 goto out_free;
diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c
index 53049a204197..ee42765a8553 100644
--- a/fs/ocfs2/vote.c
+++ b/fs/ocfs2/vote.c
@@ -586,7 +586,7 @@ static struct ocfs2_net_wait_ctxt *ocfs2_new_net_wait_ctxt(unsigned int response
586{ 586{
587 struct ocfs2_net_wait_ctxt *w; 587 struct ocfs2_net_wait_ctxt *w;
588 588
589 w = kcalloc(1, sizeof(*w), GFP_KERNEL); 589 w = kcalloc(1, sizeof(*w), GFP_NOFS);
590 if (!w) { 590 if (!w) {
591 mlog_errno(-ENOMEM); 591 mlog_errno(-ENOMEM);
592 goto bail; 592 goto bail;
@@ -749,7 +749,7 @@ static struct ocfs2_vote_msg * ocfs2_new_vote_request(struct ocfs2_super *osb,
749 749
750 BUG_ON(!ocfs2_is_valid_vote_request(type)); 750 BUG_ON(!ocfs2_is_valid_vote_request(type));
751 751
752 request = kcalloc(1, sizeof(*request), GFP_KERNEL); 752 request = kcalloc(1, sizeof(*request), GFP_NOFS);
753 if (!request) { 753 if (!request) {
754 mlog_errno(-ENOMEM); 754 mlog_errno(-ENOMEM);
755 } else { 755 } else {
@@ -1129,7 +1129,7 @@ static int ocfs2_handle_vote_message(struct o2net_msg *msg,
1129 struct ocfs2_super *osb = data; 1129 struct ocfs2_super *osb = data;
1130 struct ocfs2_vote_work *work; 1130 struct ocfs2_vote_work *work;
1131 1131
1132 work = kmalloc(sizeof(struct ocfs2_vote_work), GFP_KERNEL); 1132 work = kmalloc(sizeof(struct ocfs2_vote_work), GFP_NOFS);
1133 if (!work) { 1133 if (!work) {
1134 status = -ENOMEM; 1134 status = -ENOMEM;
1135 mlog_errno(status); 1135 mlog_errno(status);
diff --git a/fs/open.c b/fs/open.c
index c32c89d6d8db..317b7c7f38a7 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -331,7 +331,10 @@ out:
331 331
332asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length) 332asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length)
333{ 333{
334 return do_sys_ftruncate(fd, length, 1); 334 long ret = do_sys_ftruncate(fd, length, 1);
335 /* avoid REGPARM breakage on x86: */
336 prevent_tail_call(ret);
337 return ret;
335} 338}
336 339
337/* LFS versions of truncate are only needed on 32 bit machines */ 340/* LFS versions of truncate are only needed on 32 bit machines */
@@ -343,7 +346,10 @@ asmlinkage long sys_truncate64(const char __user * path, loff_t length)
343 346
344asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length) 347asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length)
345{ 348{
346 return do_sys_ftruncate(fd, length, 0); 349 long ret = do_sys_ftruncate(fd, length, 0);
350 /* avoid REGPARM breakage on x86: */
351 prevent_tail_call(ret);
352 return ret;
347} 353}
348#endif 354#endif
349 355
@@ -1093,22 +1099,31 @@ long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
1093 1099
1094asmlinkage long sys_open(const char __user *filename, int flags, int mode) 1100asmlinkage long sys_open(const char __user *filename, int flags, int mode)
1095{ 1101{
1102 long ret;
1103
1096 if (force_o_largefile()) 1104 if (force_o_largefile())
1097 flags |= O_LARGEFILE; 1105 flags |= O_LARGEFILE;
1098 1106
1099 return do_sys_open(AT_FDCWD, filename, flags, mode); 1107 ret = do_sys_open(AT_FDCWD, filename, flags, mode);
1108 /* avoid REGPARM breakage on x86: */
1109 prevent_tail_call(ret);
1110 return ret;
1100} 1111}
1101EXPORT_SYMBOL_GPL(sys_open); 1112EXPORT_SYMBOL_GPL(sys_open);
1102 1113
1103asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, 1114asmlinkage long sys_openat(int dfd, const char __user *filename, int flags,
1104 int mode) 1115 int mode)
1105{ 1116{
1117 long ret;
1118
1106 if (force_o_largefile()) 1119 if (force_o_largefile())
1107 flags |= O_LARGEFILE; 1120 flags |= O_LARGEFILE;
1108 1121
1109 return do_sys_open(dfd, filename, flags, mode); 1122 ret = do_sys_open(dfd, filename, flags, mode);
1123 /* avoid REGPARM breakage on x86: */
1124 prevent_tail_call(ret);
1125 return ret;
1110} 1126}
1111EXPORT_SYMBOL_GPL(sys_openat);
1112 1127
1113#ifndef __alpha__ 1128#ifndef __alpha__
1114 1129
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index af0cb4b9e784..7ef1f094de91 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -331,7 +331,9 @@ void delete_partition(struct gendisk *disk, int part)
331 devfs_remove("%s/part%d", disk->devfs_name, part); 331 devfs_remove("%s/part%d", disk->devfs_name, part);
332 if (p->holder_dir) 332 if (p->holder_dir)
333 kobject_unregister(p->holder_dir); 333 kobject_unregister(p->holder_dir);
334 kobject_unregister(&p->kobj); 334 kobject_uevent(&p->kobj, KOBJ_REMOVE);
335 kobject_del(&p->kobj);
336 kobject_put(&p->kobj);
335} 337}
336 338
337void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len) 339void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len)
@@ -357,7 +359,10 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len)
357 snprintf(p->kobj.name,KOBJ_NAME_LEN,"%s%d",disk->kobj.name,part); 359 snprintf(p->kobj.name,KOBJ_NAME_LEN,"%s%d",disk->kobj.name,part);
358 p->kobj.parent = &disk->kobj; 360 p->kobj.parent = &disk->kobj;
359 p->kobj.ktype = &ktype_part; 361 p->kobj.ktype = &ktype_part;
360 kobject_register(&p->kobj); 362 kobject_init(&p->kobj);
363 kobject_add(&p->kobj);
364 if (!disk->part_uevent_suppress)
365 kobject_uevent(&p->kobj, KOBJ_ADD);
361 partition_sysfs_add_subdir(p); 366 partition_sysfs_add_subdir(p);
362 disk->part[part-1] = p; 367 disk->part[part-1] = p;
363} 368}
@@ -367,6 +372,7 @@ static char *make_block_name(struct gendisk *disk)
367 char *name; 372 char *name;
368 static char *block_str = "block:"; 373 static char *block_str = "block:";
369 int size; 374 int size;
375 char *s;
370 376
371 size = strlen(block_str) + strlen(disk->disk_name) + 1; 377 size = strlen(block_str) + strlen(disk->disk_name) + 1;
372 name = kmalloc(size, GFP_KERNEL); 378 name = kmalloc(size, GFP_KERNEL);
@@ -374,6 +380,10 @@ static char *make_block_name(struct gendisk *disk)
374 return NULL; 380 return NULL;
375 strcpy(name, block_str); 381 strcpy(name, block_str);
376 strcat(name, disk->disk_name); 382 strcat(name, disk->disk_name);
383 /* ewww... some of these buggers have / in name... */
384 s = strchr(name, '/');
385 if (s)
386 *s = '!';
377 return name; 387 return name;
378} 388}
379 389
@@ -395,6 +405,8 @@ void register_disk(struct gendisk *disk)
395{ 405{
396 struct block_device *bdev; 406 struct block_device *bdev;
397 char *s; 407 char *s;
408 int i;
409 struct hd_struct *p;
398 int err; 410 int err;
399 411
400 strlcpy(disk->kobj.name,disk->disk_name,KOBJ_NAME_LEN); 412 strlcpy(disk->kobj.name,disk->disk_name,KOBJ_NAME_LEN);
@@ -406,13 +418,12 @@ void register_disk(struct gendisk *disk)
406 return; 418 return;
407 disk_sysfs_symlinks(disk); 419 disk_sysfs_symlinks(disk);
408 disk_sysfs_add_subdirs(disk); 420 disk_sysfs_add_subdirs(disk);
409 kobject_uevent(&disk->kobj, KOBJ_ADD);
410 421
411 /* No minors to use for partitions */ 422 /* No minors to use for partitions */
412 if (disk->minors == 1) { 423 if (disk->minors == 1) {
413 if (disk->devfs_name[0] != '\0') 424 if (disk->devfs_name[0] != '\0')
414 devfs_add_disk(disk); 425 devfs_add_disk(disk);
415 return; 426 goto exit;
416 } 427 }
417 428
418 /* always add handle for the whole disk */ 429 /* always add handle for the whole disk */
@@ -420,16 +431,32 @@ void register_disk(struct gendisk *disk)
420 431
421 /* No such device (e.g., media were just removed) */ 432 /* No such device (e.g., media were just removed) */
422 if (!get_capacity(disk)) 433 if (!get_capacity(disk))
423 return; 434 goto exit;
424 435
425 bdev = bdget_disk(disk, 0); 436 bdev = bdget_disk(disk, 0);
426 if (!bdev) 437 if (!bdev)
427 return; 438 goto exit;
428 439
440 /* scan partition table, but suppress uevents */
429 bdev->bd_invalidated = 1; 441 bdev->bd_invalidated = 1;
430 if (blkdev_get(bdev, FMODE_READ, 0) < 0) 442 disk->part_uevent_suppress = 1;
431 return; 443 err = blkdev_get(bdev, FMODE_READ, 0);
444 disk->part_uevent_suppress = 0;
445 if (err < 0)
446 goto exit;
432 blkdev_put(bdev); 447 blkdev_put(bdev);
448
449exit:
450 /* announce disk after possible partitions are already created */
451 kobject_uevent(&disk->kobj, KOBJ_ADD);
452
453 /* announce possible partitions */
454 for (i = 1; i < disk->minors; i++) {
455 p = disk->part[i-1];
456 if (!p || !p->nr_sects)
457 continue;
458 kobject_uevent(&p->kobj, KOBJ_ADD);
459 }
433} 460}
434 461
435int rescan_partitions(struct gendisk *disk, struct block_device *bdev) 462int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
@@ -506,6 +533,7 @@ void del_gendisk(struct gendisk *disk)
506 533
507 devfs_remove_disk(disk); 534 devfs_remove_disk(disk);
508 535
536 kobject_uevent(&disk->kobj, KOBJ_REMOVE);
509 if (disk->holder_dir) 537 if (disk->holder_dir)
510 kobject_unregister(disk->holder_dir); 538 kobject_unregister(disk->holder_dir);
511 if (disk->slave_dir) 539 if (disk->slave_dir)
@@ -518,7 +546,7 @@ void del_gendisk(struct gendisk *disk)
518 kfree(disk_name); 546 kfree(disk_name);
519 } 547 }
520 put_device(disk->driverfs_dev); 548 put_device(disk->driverfs_dev);
549 disk->driverfs_dev = NULL;
521 } 550 }
522 kobject_uevent(&disk->kobj, KOBJ_REMOVE);
523 kobject_del(&disk->kobj); 551 kobject_del(&disk->kobj);
524} 552}
diff --git a/fs/pipe.c b/fs/pipe.c
index e984beb93a0e..5acd8954aaa0 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -55,7 +55,8 @@ void pipe_wait(struct pipe_inode_info *pipe)
55} 55}
56 56
57static int 57static int
58pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len) 58pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len,
59 int atomic)
59{ 60{
60 unsigned long copy; 61 unsigned long copy;
61 62
@@ -64,8 +65,13 @@ pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
64 iov++; 65 iov++;
65 copy = min_t(unsigned long, len, iov->iov_len); 66 copy = min_t(unsigned long, len, iov->iov_len);
66 67
67 if (copy_from_user(to, iov->iov_base, copy)) 68 if (atomic) {
68 return -EFAULT; 69 if (__copy_from_user_inatomic(to, iov->iov_base, copy))
70 return -EFAULT;
71 } else {
72 if (copy_from_user(to, iov->iov_base, copy))
73 return -EFAULT;
74 }
69 to += copy; 75 to += copy;
70 len -= copy; 76 len -= copy;
71 iov->iov_base += copy; 77 iov->iov_base += copy;
@@ -75,7 +81,8 @@ pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
75} 81}
76 82
77static int 83static int
78pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len) 84pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len,
85 int atomic)
79{ 86{
80 unsigned long copy; 87 unsigned long copy;
81 88
@@ -84,8 +91,13 @@ pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
84 iov++; 91 iov++;
85 copy = min_t(unsigned long, len, iov->iov_len); 92 copy = min_t(unsigned long, len, iov->iov_len);
86 93
87 if (copy_to_user(iov->iov_base, from, copy)) 94 if (atomic) {
88 return -EFAULT; 95 if (__copy_to_user_inatomic(iov->iov_base, from, copy))
96 return -EFAULT;
97 } else {
98 if (copy_to_user(iov->iov_base, from, copy))
99 return -EFAULT;
100 }
89 from += copy; 101 from += copy;
90 len -= copy; 102 len -= copy;
91 iov->iov_base += copy; 103 iov->iov_base += copy;
@@ -94,13 +106,52 @@ pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
94 return 0; 106 return 0;
95} 107}
96 108
109/*
110 * Attempt to pre-fault in the user memory, so we can use atomic copies.
111 * Returns the number of bytes not faulted in.
112 */
113static int iov_fault_in_pages_write(struct iovec *iov, unsigned long len)
114{
115 while (!iov->iov_len)
116 iov++;
117
118 while (len > 0) {
119 unsigned long this_len;
120
121 this_len = min_t(unsigned long, len, iov->iov_len);
122 if (fault_in_pages_writeable(iov->iov_base, this_len))
123 break;
124
125 len -= this_len;
126 iov++;
127 }
128
129 return len;
130}
131
132/*
133 * Pre-fault in the user memory, so we can use atomic copies.
134 */
135static void iov_fault_in_pages_read(struct iovec *iov, unsigned long len)
136{
137 while (!iov->iov_len)
138 iov++;
139
140 while (len > 0) {
141 unsigned long this_len;
142
143 this_len = min_t(unsigned long, len, iov->iov_len);
144 fault_in_pages_readable(iov->iov_base, this_len);
145 len -= this_len;
146 iov++;
147 }
148}
149
97static void anon_pipe_buf_release(struct pipe_inode_info *pipe, 150static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
98 struct pipe_buffer *buf) 151 struct pipe_buffer *buf)
99{ 152{
100 struct page *page = buf->page; 153 struct page *page = buf->page;
101 154
102 buf->flags &= ~PIPE_BUF_FLAG_STOLEN;
103
104 /* 155 /*
105 * If nobody else uses this page, and we don't already have a 156 * If nobody else uses this page, and we don't already have a
106 * temporary page, let's keep track of it as a one-deep 157 * temporary page, let's keep track of it as a one-deep
@@ -112,31 +163,58 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
112 page_cache_release(page); 163 page_cache_release(page);
113} 164}
114 165
115static void * anon_pipe_buf_map(struct file *file, struct pipe_inode_info *pipe, 166void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
116 struct pipe_buffer *buf) 167 struct pipe_buffer *buf, int atomic)
117{ 168{
169 if (atomic) {
170 buf->flags |= PIPE_BUF_FLAG_ATOMIC;
171 return kmap_atomic(buf->page, KM_USER0);
172 }
173
118 return kmap(buf->page); 174 return kmap(buf->page);
119} 175}
120 176
121static void anon_pipe_buf_unmap(struct pipe_inode_info *pipe, 177void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
122 struct pipe_buffer *buf) 178 struct pipe_buffer *buf, void *map_data)
179{
180 if (buf->flags & PIPE_BUF_FLAG_ATOMIC) {
181 buf->flags &= ~PIPE_BUF_FLAG_ATOMIC;
182 kunmap_atomic(map_data, KM_USER0);
183 } else
184 kunmap(buf->page);
185}
186
187int generic_pipe_buf_steal(struct pipe_inode_info *pipe,
188 struct pipe_buffer *buf)
189{
190 struct page *page = buf->page;
191
192 if (page_count(page) == 1) {
193 lock_page(page);
194 return 0;
195 }
196
197 return 1;
198}
199
200void generic_pipe_buf_get(struct pipe_inode_info *info, struct pipe_buffer *buf)
123{ 201{
124 kunmap(buf->page); 202 page_cache_get(buf->page);
125} 203}
126 204
127static int anon_pipe_buf_steal(struct pipe_inode_info *pipe, 205int generic_pipe_buf_pin(struct pipe_inode_info *info, struct pipe_buffer *buf)
128 struct pipe_buffer *buf)
129{ 206{
130 buf->flags |= PIPE_BUF_FLAG_STOLEN;
131 return 0; 207 return 0;
132} 208}
133 209
134static struct pipe_buf_operations anon_pipe_buf_ops = { 210static struct pipe_buf_operations anon_pipe_buf_ops = {
135 .can_merge = 1, 211 .can_merge = 1,
136 .map = anon_pipe_buf_map, 212 .map = generic_pipe_buf_map,
137 .unmap = anon_pipe_buf_unmap, 213 .unmap = generic_pipe_buf_unmap,
214 .pin = generic_pipe_buf_pin,
138 .release = anon_pipe_buf_release, 215 .release = anon_pipe_buf_release,
139 .steal = anon_pipe_buf_steal, 216 .steal = generic_pipe_buf_steal,
217 .get = generic_pipe_buf_get,
140}; 218};
141 219
142static ssize_t 220static ssize_t
@@ -167,22 +245,33 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
167 struct pipe_buf_operations *ops = buf->ops; 245 struct pipe_buf_operations *ops = buf->ops;
168 void *addr; 246 void *addr;
169 size_t chars = buf->len; 247 size_t chars = buf->len;
170 int error; 248 int error, atomic;
171 249
172 if (chars > total_len) 250 if (chars > total_len)
173 chars = total_len; 251 chars = total_len;
174 252
175 addr = ops->map(filp, pipe, buf); 253 error = ops->pin(pipe, buf);
176 if (IS_ERR(addr)) { 254 if (error) {
177 if (!ret) 255 if (!ret)
178 ret = PTR_ERR(addr); 256 error = ret;
179 break; 257 break;
180 } 258 }
181 error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars); 259
182 ops->unmap(pipe, buf); 260 atomic = !iov_fault_in_pages_write(iov, chars);
261redo:
262 addr = ops->map(pipe, buf, atomic);
263 error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic);
264 ops->unmap(pipe, buf, addr);
183 if (unlikely(error)) { 265 if (unlikely(error)) {
266 /*
267 * Just retry with the slow path if we failed.
268 */
269 if (atomic) {
270 atomic = 0;
271 goto redo;
272 }
184 if (!ret) 273 if (!ret)
185 ret = -EFAULT; 274 ret = error;
186 break; 275 break;
187 } 276 }
188 ret += chars; 277 ret += chars;
@@ -286,21 +375,28 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
286 int offset = buf->offset + buf->len; 375 int offset = buf->offset + buf->len;
287 376
288 if (ops->can_merge && offset + chars <= PAGE_SIZE) { 377 if (ops->can_merge && offset + chars <= PAGE_SIZE) {
378 int error, atomic = 1;
289 void *addr; 379 void *addr;
290 int error;
291 380
292 addr = ops->map(filp, pipe, buf); 381 error = ops->pin(pipe, buf);
293 if (IS_ERR(addr)) { 382 if (error)
294 error = PTR_ERR(addr);
295 goto out; 383 goto out;
296 } 384
385 iov_fault_in_pages_read(iov, chars);
386redo1:
387 addr = ops->map(pipe, buf, atomic);
297 error = pipe_iov_copy_from_user(offset + addr, iov, 388 error = pipe_iov_copy_from_user(offset + addr, iov,
298 chars); 389 chars, atomic);
299 ops->unmap(pipe, buf); 390 ops->unmap(pipe, buf, addr);
300 ret = error; 391 ret = error;
301 do_wakeup = 1; 392 do_wakeup = 1;
302 if (error) 393 if (error) {
394 if (atomic) {
395 atomic = 0;
396 goto redo1;
397 }
303 goto out; 398 goto out;
399 }
304 buf->len += chars; 400 buf->len += chars;
305 total_len -= chars; 401 total_len -= chars;
306 ret = chars; 402 ret = chars;
@@ -323,7 +419,8 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
323 int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1); 419 int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1);
324 struct pipe_buffer *buf = pipe->bufs + newbuf; 420 struct pipe_buffer *buf = pipe->bufs + newbuf;
325 struct page *page = pipe->tmp_page; 421 struct page *page = pipe->tmp_page;
326 int error; 422 char *src;
423 int error, atomic = 1;
327 424
328 if (!page) { 425 if (!page) {
329 page = alloc_page(GFP_HIGHUSER); 426 page = alloc_page(GFP_HIGHUSER);
@@ -343,11 +440,27 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
343 if (chars > total_len) 440 if (chars > total_len)
344 chars = total_len; 441 chars = total_len;
345 442
346 error = pipe_iov_copy_from_user(kmap(page), iov, chars); 443 iov_fault_in_pages_read(iov, chars);
347 kunmap(page); 444redo2:
445 if (atomic)
446 src = kmap_atomic(page, KM_USER0);
447 else
448 src = kmap(page);
449
450 error = pipe_iov_copy_from_user(src, iov, chars,
451 atomic);
452 if (atomic)
453 kunmap_atomic(src, KM_USER0);
454 else
455 kunmap(page);
456
348 if (unlikely(error)) { 457 if (unlikely(error)) {
458 if (atomic) {
459 atomic = 0;
460 goto redo2;
461 }
349 if (!ret) 462 if (!ret)
350 ret = -EFAULT; 463 ret = error;
351 break; 464 break;
352 } 465 }
353 ret += chars; 466 ret += chars;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index a3a3eecef689..6cc77dc3f3ff 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -297,16 +297,20 @@ static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsm
297 297
298 files = get_files_struct(task); 298 files = get_files_struct(task);
299 if (files) { 299 if (files) {
300 rcu_read_lock(); 300 /*
301 * We are not taking a ref to the file structure, so we must
302 * hold ->file_lock.
303 */
304 spin_lock(&files->file_lock);
301 file = fcheck_files(files, fd); 305 file = fcheck_files(files, fd);
302 if (file) { 306 if (file) {
303 *mnt = mntget(file->f_vfsmnt); 307 *mnt = mntget(file->f_vfsmnt);
304 *dentry = dget(file->f_dentry); 308 *dentry = dget(file->f_dentry);
305 rcu_read_unlock(); 309 spin_unlock(&files->file_lock);
306 put_files_struct(files); 310 put_files_struct(files);
307 return 0; 311 return 0;
308 } 312 }
309 rcu_read_unlock(); 313 spin_unlock(&files->file_lock);
310 put_files_struct(files); 314 put_files_struct(files);
311 } 315 }
312 return -ENOENT; 316 return -ENOENT;
@@ -1523,7 +1527,12 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
1523 if (!files) 1527 if (!files)
1524 goto out_unlock; 1528 goto out_unlock;
1525 inode->i_mode = S_IFLNK; 1529 inode->i_mode = S_IFLNK;
1526 rcu_read_lock(); 1530
1531 /*
1532 * We are not taking a ref to the file structure, so we must
1533 * hold ->file_lock.
1534 */
1535 spin_lock(&files->file_lock);
1527 file = fcheck_files(files, fd); 1536 file = fcheck_files(files, fd);
1528 if (!file) 1537 if (!file)
1529 goto out_unlock2; 1538 goto out_unlock2;
@@ -1531,7 +1540,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
1531 inode->i_mode |= S_IRUSR | S_IXUSR; 1540 inode->i_mode |= S_IRUSR | S_IXUSR;
1532 if (file->f_mode & 2) 1541 if (file->f_mode & 2)
1533 inode->i_mode |= S_IWUSR | S_IXUSR; 1542 inode->i_mode |= S_IWUSR | S_IXUSR;
1534 rcu_read_unlock(); 1543 spin_unlock(&files->file_lock);
1535 put_files_struct(files); 1544 put_files_struct(files);
1536 inode->i_op = &proc_pid_link_inode_operations; 1545 inode->i_op = &proc_pid_link_inode_operations;
1537 inode->i_size = 64; 1546 inode->i_size = 64;
@@ -1541,7 +1550,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
1541 return NULL; 1550 return NULL;
1542 1551
1543out_unlock2: 1552out_unlock2:
1544 rcu_read_unlock(); 1553 spin_unlock(&files->file_lock);
1545 put_files_struct(files); 1554 put_files_struct(files);
1546out_unlock: 1555out_unlock:
1547 iput(inode); 1556 iput(inode);
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 58c418fbca2c..97ae1b92bc47 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -408,8 +408,9 @@ int reiserfs_cache_default_acl(struct inode *inode)
408 acl = reiserfs_get_acl(inode, ACL_TYPE_DEFAULT); 408 acl = reiserfs_get_acl(inode, ACL_TYPE_DEFAULT);
409 reiserfs_read_unlock_xattrs(inode->i_sb); 409 reiserfs_read_unlock_xattrs(inode->i_sb);
410 reiserfs_read_unlock_xattr_i(inode); 410 reiserfs_read_unlock_xattr_i(inode);
411 ret = acl ? 1 : 0; 411 ret = (acl && !IS_ERR(acl));
412 posix_acl_release(acl); 412 if (ret)
413 posix_acl_release(acl);
413 } 414 }
414 415
415 return ret; 416 return ret;
diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c
index 34c7a11d91f0..70d9c5a37f5a 100644
--- a/fs/smbfs/dir.c
+++ b/fs/smbfs/dir.c
@@ -434,6 +434,11 @@ smb_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
434 if (dentry->d_name.len > SMB_MAXNAMELEN) 434 if (dentry->d_name.len > SMB_MAXNAMELEN)
435 goto out; 435 goto out;
436 436
437 /* Do not allow lookup of names with backslashes in */
438 error = -EINVAL;
439 if (memchr(dentry->d_name.name, '\\', dentry->d_name.len))
440 goto out;
441
437 lock_kernel(); 442 lock_kernel();
438 error = smb_proc_getattr(dentry, &finfo); 443 error = smb_proc_getattr(dentry, &finfo);
439#ifdef SMBFS_PARANOIA 444#ifdef SMBFS_PARANOIA
diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c
index c71c375863cc..c71dd2760d32 100644
--- a/fs/smbfs/request.c
+++ b/fs/smbfs/request.c
@@ -339,9 +339,11 @@ int smb_add_request(struct smb_request *req)
339 /* 339 /*
340 * On timeout or on interrupt we want to try and remove the 340 * On timeout or on interrupt we want to try and remove the
341 * request from the recvq/xmitq. 341 * request from the recvq/xmitq.
342 * First check if the request is still part of a queue. (May
343 * have been removed by some error condition)
342 */ 344 */
343 smb_lock_server(server); 345 smb_lock_server(server);
344 if (!(req->rq_flags & SMB_REQ_RECEIVED)) { 346 if (!list_empty(&req->rq_queue)) {
345 list_del_init(&req->rq_queue); 347 list_del_init(&req->rq_queue);
346 smb_rput(req); 348 smb_rput(req);
347 } 349 }
diff --git a/fs/splice.c b/fs/splice.c
index e50a460239dd..a285fd746dc0 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -27,15 +27,22 @@
27#include <linux/buffer_head.h> 27#include <linux/buffer_head.h>
28#include <linux/module.h> 28#include <linux/module.h>
29#include <linux/syscalls.h> 29#include <linux/syscalls.h>
30#include <linux/uio.h>
31
32struct partial_page {
33 unsigned int offset;
34 unsigned int len;
35};
30 36
31/* 37/*
32 * Passed to the actors 38 * Passed to splice_to_pipe
33 */ 39 */
34struct splice_desc { 40struct splice_pipe_desc {
35 unsigned int len, total_len; /* current and remaining length */ 41 struct page **pages; /* page map */
42 struct partial_page *partial; /* pages[] may not be contig */
43 int nr_pages; /* number of pages in map */
36 unsigned int flags; /* splice flags */ 44 unsigned int flags; /* splice flags */
37 struct file *file; /* file to read/write */ 45 struct pipe_buf_operations *ops;/* ops associated with output pipe */
38 loff_t pos; /* file position */
39}; 46};
40 47
41/* 48/*
@@ -44,13 +51,14 @@ struct splice_desc {
44 * addition of remove_mapping(). If success is returned, the caller may 51 * addition of remove_mapping(). If success is returned, the caller may
45 * attempt to reuse this page for another destination. 52 * attempt to reuse this page for another destination.
46 */ 53 */
47static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, 54static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe,
48 struct pipe_buffer *buf) 55 struct pipe_buffer *buf)
49{ 56{
50 struct page *page = buf->page; 57 struct page *page = buf->page;
51 struct address_space *mapping = page_mapping(page); 58 struct address_space *mapping = page_mapping(page);
52 59
53 WARN_ON(!PageLocked(page)); 60 lock_page(page);
61
54 WARN_ON(!PageUptodate(page)); 62 WARN_ON(!PageUptodate(page));
55 63
56 /* 64 /*
@@ -65,24 +73,24 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *info,
65 if (PagePrivate(page)) 73 if (PagePrivate(page))
66 try_to_release_page(page, mapping_gfp_mask(mapping)); 74 try_to_release_page(page, mapping_gfp_mask(mapping));
67 75
68 if (!remove_mapping(mapping, page)) 76 if (!remove_mapping(mapping, page)) {
77 unlock_page(page);
69 return 1; 78 return 1;
79 }
70 80
71 buf->flags |= PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU; 81 buf->flags |= PIPE_BUF_FLAG_LRU;
72 return 0; 82 return 0;
73} 83}
74 84
75static void page_cache_pipe_buf_release(struct pipe_inode_info *info, 85static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe,
76 struct pipe_buffer *buf) 86 struct pipe_buffer *buf)
77{ 87{
78 page_cache_release(buf->page); 88 page_cache_release(buf->page);
79 buf->page = NULL; 89 buf->flags &= ~PIPE_BUF_FLAG_LRU;
80 buf->flags &= ~(PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU);
81} 90}
82 91
83static void *page_cache_pipe_buf_map(struct file *file, 92static int page_cache_pipe_buf_pin(struct pipe_inode_info *pipe,
84 struct pipe_inode_info *info, 93 struct pipe_buffer *buf)
85 struct pipe_buffer *buf)
86{ 94{
87 struct page *page = buf->page; 95 struct page *page = buf->page;
88 int err; 96 int err;
@@ -108,44 +116,59 @@ static void *page_cache_pipe_buf_map(struct file *file,
108 } 116 }
109 117
110 /* 118 /*
111 * Page is ok afterall, fall through to mapping. 119 * Page is ok afterall, we are done.
112 */ 120 */
113 unlock_page(page); 121 unlock_page(page);
114 } 122 }
115 123
116 return kmap(page); 124 return 0;
117error: 125error:
118 unlock_page(page); 126 unlock_page(page);
119 return ERR_PTR(err); 127 return err;
120} 128}
121 129
122static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info, 130static struct pipe_buf_operations page_cache_pipe_buf_ops = {
123 struct pipe_buffer *buf) 131 .can_merge = 0,
132 .map = generic_pipe_buf_map,
133 .unmap = generic_pipe_buf_unmap,
134 .pin = page_cache_pipe_buf_pin,
135 .release = page_cache_pipe_buf_release,
136 .steal = page_cache_pipe_buf_steal,
137 .get = generic_pipe_buf_get,
138};
139
140static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe,
141 struct pipe_buffer *buf)
124{ 142{
125 kunmap(buf->page); 143 if (!(buf->flags & PIPE_BUF_FLAG_GIFT))
144 return 1;
145
146 buf->flags |= PIPE_BUF_FLAG_LRU;
147 return generic_pipe_buf_steal(pipe, buf);
126} 148}
127 149
128static struct pipe_buf_operations page_cache_pipe_buf_ops = { 150static struct pipe_buf_operations user_page_pipe_buf_ops = {
129 .can_merge = 0, 151 .can_merge = 0,
130 .map = page_cache_pipe_buf_map, 152 .map = generic_pipe_buf_map,
131 .unmap = page_cache_pipe_buf_unmap, 153 .unmap = generic_pipe_buf_unmap,
154 .pin = generic_pipe_buf_pin,
132 .release = page_cache_pipe_buf_release, 155 .release = page_cache_pipe_buf_release,
133 .steal = page_cache_pipe_buf_steal, 156 .steal = user_page_pipe_buf_steal,
157 .get = generic_pipe_buf_get,
134}; 158};
135 159
136/* 160/*
137 * Pipe output worker. This sets up our pipe format with the page cache 161 * Pipe output worker. This sets up our pipe format with the page cache
138 * pipe buffer operations. Otherwise very similar to the regular pipe_writev(). 162 * pipe buffer operations. Otherwise very similar to the regular pipe_writev().
139 */ 163 */
140static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages, 164static ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
141 int nr_pages, unsigned long offset, 165 struct splice_pipe_desc *spd)
142 unsigned long len, unsigned int flags)
143{ 166{
144 int ret, do_wakeup, i; 167 int ret, do_wakeup, page_nr;
145 168
146 ret = 0; 169 ret = 0;
147 do_wakeup = 0; 170 do_wakeup = 0;
148 i = 0; 171 page_nr = 0;
149 172
150 if (pipe->inode) 173 if (pipe->inode)
151 mutex_lock(&pipe->inode->i_mutex); 174 mutex_lock(&pipe->inode->i_mutex);
@@ -161,27 +184,22 @@ static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages,
161 if (pipe->nrbufs < PIPE_BUFFERS) { 184 if (pipe->nrbufs < PIPE_BUFFERS) {
162 int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1); 185 int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1);
163 struct pipe_buffer *buf = pipe->bufs + newbuf; 186 struct pipe_buffer *buf = pipe->bufs + newbuf;
164 struct page *page = pages[i++];
165 unsigned long this_len;
166 187
167 this_len = PAGE_CACHE_SIZE - offset; 188 buf->page = spd->pages[page_nr];
168 if (this_len > len) 189 buf->offset = spd->partial[page_nr].offset;
169 this_len = len; 190 buf->len = spd->partial[page_nr].len;
191 buf->ops = spd->ops;
192 if (spd->flags & SPLICE_F_GIFT)
193 buf->flags |= PIPE_BUF_FLAG_GIFT;
170 194
171 buf->page = page;
172 buf->offset = offset;
173 buf->len = this_len;
174 buf->ops = &page_cache_pipe_buf_ops;
175 pipe->nrbufs++; 195 pipe->nrbufs++;
196 page_nr++;
197 ret += buf->len;
198
176 if (pipe->inode) 199 if (pipe->inode)
177 do_wakeup = 1; 200 do_wakeup = 1;
178 201
179 ret += this_len; 202 if (!--spd->nr_pages)
180 len -= this_len;
181 offset = 0;
182 if (!--nr_pages)
183 break;
184 if (!len)
185 break; 203 break;
186 if (pipe->nrbufs < PIPE_BUFFERS) 204 if (pipe->nrbufs < PIPE_BUFFERS)
187 continue; 205 continue;
@@ -189,7 +207,7 @@ static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages,
189 break; 207 break;
190 } 208 }
191 209
192 if (flags & SPLICE_F_NONBLOCK) { 210 if (spd->flags & SPLICE_F_NONBLOCK) {
193 if (!ret) 211 if (!ret)
194 ret = -EAGAIN; 212 ret = -EAGAIN;
195 break; 213 break;
@@ -224,26 +242,36 @@ static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages,
224 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 242 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
225 } 243 }
226 244
227 while (i < nr_pages) 245 while (page_nr < spd->nr_pages)
228 page_cache_release(pages[i++]); 246 page_cache_release(spd->pages[page_nr++]);
229 247
230 return ret; 248 return ret;
231} 249}
232 250
233static int 251static int
234__generic_file_splice_read(struct file *in, struct pipe_inode_info *pipe, 252__generic_file_splice_read(struct file *in, loff_t *ppos,
235 size_t len, unsigned int flags) 253 struct pipe_inode_info *pipe, size_t len,
254 unsigned int flags)
236{ 255{
237 struct address_space *mapping = in->f_mapping; 256 struct address_space *mapping = in->f_mapping;
238 unsigned int offset, nr_pages; 257 unsigned int loff, nr_pages;
239 struct page *pages[PIPE_BUFFERS]; 258 struct page *pages[PIPE_BUFFERS];
259 struct partial_page partial[PIPE_BUFFERS];
240 struct page *page; 260 struct page *page;
241 pgoff_t index; 261 pgoff_t index, end_index;
242 int i, error; 262 loff_t isize;
243 263 size_t total_len;
244 index = in->f_pos >> PAGE_CACHE_SHIFT; 264 int error, page_nr;
245 offset = in->f_pos & ~PAGE_CACHE_MASK; 265 struct splice_pipe_desc spd = {
246 nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 266 .pages = pages,
267 .partial = partial,
268 .flags = flags,
269 .ops = &page_cache_pipe_buf_ops,
270 };
271
272 index = *ppos >> PAGE_CACHE_SHIFT;
273 loff = *ppos & ~PAGE_CACHE_MASK;
274 nr_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
247 275
248 if (nr_pages > PIPE_BUFFERS) 276 if (nr_pages > PIPE_BUFFERS)
249 nr_pages = PIPE_BUFFERS; 277 nr_pages = PIPE_BUFFERS;
@@ -253,49 +281,94 @@ __generic_file_splice_read(struct file *in, struct pipe_inode_info *pipe,
253 * read-ahead if this is a non-zero offset (we are likely doing small 281 * read-ahead if this is a non-zero offset (we are likely doing small
254 * chunk splice and the page is already there) for a single page. 282 * chunk splice and the page is already there) for a single page.
255 */ 283 */
256 if (!offset || nr_pages > 1) 284 if (!loff || nr_pages > 1)
257 do_page_cache_readahead(mapping, in, index, nr_pages); 285 page_cache_readahead(mapping, &in->f_ra, in, index, nr_pages);
258 286
259 /* 287 /*
260 * Now fill in the holes: 288 * Now fill in the holes:
261 */ 289 */
262 error = 0; 290 error = 0;
263 for (i = 0; i < nr_pages; i++, index++) { 291 total_len = 0;
264find_page: 292
293 /*
294 * Lookup the (hopefully) full range of pages we need.
295 */
296 spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, pages);
297
298 /*
299 * If find_get_pages_contig() returned fewer pages than we needed,
300 * allocate the rest.
301 */
302 index += spd.nr_pages;
303 while (spd.nr_pages < nr_pages) {
265 /* 304 /*
266 * lookup the page for this index 305 * Page could be there, find_get_pages_contig() breaks on
306 * the first hole.
267 */ 307 */
268 page = find_get_page(mapping, index); 308 page = find_get_page(mapping, index);
269 if (!page) { 309 if (!page) {
270 /* 310 /*
271 * If in nonblock mode then dont block on 311 * Make sure the read-ahead engine is notified
272 * readpage (we've kicked readahead so there 312 * about this failure.
273 * will be asynchronous progress):
274 */ 313 */
275 if (flags & SPLICE_F_NONBLOCK) 314 handle_ra_miss(mapping, &in->f_ra, index);
276 break;
277 315
278 /* 316 /*
279 * page didn't exist, allocate one 317 * page didn't exist, allocate one.
280 */ 318 */
281 page = page_cache_alloc_cold(mapping); 319 page = page_cache_alloc_cold(mapping);
282 if (!page) 320 if (!page)
283 break; 321 break;
284 322
285 error = add_to_page_cache_lru(page, mapping, index, 323 error = add_to_page_cache_lru(page, mapping, index,
286 mapping_gfp_mask(mapping)); 324 mapping_gfp_mask(mapping));
287 if (unlikely(error)) { 325 if (unlikely(error)) {
288 page_cache_release(page); 326 page_cache_release(page);
327 if (error == -EEXIST)
328 continue;
289 break; 329 break;
290 } 330 }
291 331 /*
292 goto readpage; 332 * add_to_page_cache() locks the page, unlock it
333 * to avoid convoluting the logic below even more.
334 */
335 unlock_page(page);
293 } 336 }
294 337
338 pages[spd.nr_pages++] = page;
339 index++;
340 }
341
342 /*
343 * Now loop over the map and see if we need to start IO on any
344 * pages, fill in the partial map, etc.
345 */
346 index = *ppos >> PAGE_CACHE_SHIFT;
347 nr_pages = spd.nr_pages;
348 spd.nr_pages = 0;
349 for (page_nr = 0; page_nr < nr_pages; page_nr++) {
350 unsigned int this_len;
351
352 if (!len)
353 break;
354
355 /*
356 * this_len is the max we'll use from this page
357 */
358 this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff);
359 page = pages[page_nr];
360
295 /* 361 /*
296 * If the page isn't uptodate, we may need to start io on it 362 * If the page isn't uptodate, we may need to start io on it
297 */ 363 */
298 if (!PageUptodate(page)) { 364 if (!PageUptodate(page)) {
365 /*
366 * If in nonblock mode then dont block on waiting
367 * for an in-flight io page
368 */
369 if (flags & SPLICE_F_NONBLOCK)
370 break;
371
299 lock_page(page); 372 lock_page(page);
300 373
301 /* 374 /*
@@ -305,7 +378,6 @@ find_page:
305 */ 378 */
306 if (!page->mapping) { 379 if (!page->mapping) {
307 unlock_page(page); 380 unlock_page(page);
308 page_cache_release(page);
309 break; 381 break;
310 } 382 }
311 /* 383 /*
@@ -316,25 +388,66 @@ find_page:
316 goto fill_it; 388 goto fill_it;
317 } 389 }
318 390
319readpage:
320 /* 391 /*
321 * need to read in the page 392 * need to read in the page
322 */ 393 */
323 error = mapping->a_ops->readpage(in, page); 394 error = mapping->a_ops->readpage(in, page);
324
325 if (unlikely(error)) { 395 if (unlikely(error)) {
326 page_cache_release(page); 396 /*
397 * We really should re-lookup the page here,
398 * but it complicates things a lot. Instead
399 * lets just do what we already stored, and
400 * we'll get it the next time we are called.
401 */
327 if (error == AOP_TRUNCATED_PAGE) 402 if (error == AOP_TRUNCATED_PAGE)
328 goto find_page; 403 error = 0;
404
329 break; 405 break;
330 } 406 }
407
408 /*
409 * i_size must be checked after ->readpage().
410 */
411 isize = i_size_read(mapping->host);
412 end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
413 if (unlikely(!isize || index > end_index))
414 break;
415
416 /*
417 * if this is the last page, see if we need to shrink
418 * the length and stop
419 */
420 if (end_index == index) {
421 loff = PAGE_CACHE_SIZE - (isize & ~PAGE_CACHE_MASK);
422 if (total_len + loff > isize)
423 break;
424 /*
425 * force quit after adding this page
426 */
427 len = this_len;
428 this_len = min(this_len, loff);
429 loff = 0;
430 }
331 } 431 }
332fill_it: 432fill_it:
333 pages[i] = page; 433 partial[page_nr].offset = loff;
434 partial[page_nr].len = this_len;
435 len -= this_len;
436 total_len += this_len;
437 loff = 0;
438 spd.nr_pages++;
439 index++;
334 } 440 }
335 441
336 if (i) 442 /*
337 return move_to_pipe(pipe, pages, i, offset, len, flags); 443 * Release any pages at the end, if we quit early. 'i' is how far
444 * we got, 'nr_pages' is how many pages are in the map.
445 */
446 while (page_nr < nr_pages)
447 page_cache_release(pages[page_nr++]);
448
449 if (spd.nr_pages)
450 return splice_to_pipe(pipe, &spd);
338 451
339 return error; 452 return error;
340} 453}
@@ -348,8 +461,9 @@ fill_it:
348 * 461 *
349 * Will read pages from given file and fill them into a pipe. 462 * Will read pages from given file and fill them into a pipe.
350 */ 463 */
351ssize_t generic_file_splice_read(struct file *in, struct pipe_inode_info *pipe, 464ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
352 size_t len, unsigned int flags) 465 struct pipe_inode_info *pipe, size_t len,
466 unsigned int flags)
353{ 467{
354 ssize_t spliced; 468 ssize_t spliced;
355 int ret; 469 int ret;
@@ -358,19 +472,22 @@ ssize_t generic_file_splice_read(struct file *in, struct pipe_inode_info *pipe,
358 spliced = 0; 472 spliced = 0;
359 473
360 while (len) { 474 while (len) {
361 ret = __generic_file_splice_read(in, pipe, len, flags); 475 ret = __generic_file_splice_read(in, ppos, pipe, len, flags);
362 476
363 if (ret <= 0) 477 if (ret < 0)
364 break; 478 break;
479 else if (!ret) {
480 if (spliced)
481 break;
482 if (flags & SPLICE_F_NONBLOCK) {
483 ret = -EAGAIN;
484 break;
485 }
486 }
365 487
366 in->f_pos += ret; 488 *ppos += ret;
367 len -= ret; 489 len -= ret;
368 spliced += ret; 490 spliced += ret;
369
370 if (!(flags & SPLICE_F_NONBLOCK))
371 continue;
372 ret = -EAGAIN;
373 break;
374 } 491 }
375 492
376 if (spliced) 493 if (spliced)
@@ -383,38 +500,24 @@ EXPORT_SYMBOL(generic_file_splice_read);
383 500
384/* 501/*
385 * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' 502 * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
386 * using sendpage(). 503 * using sendpage(). Return the number of bytes sent.
387 */ 504 */
388static int pipe_to_sendpage(struct pipe_inode_info *info, 505static int pipe_to_sendpage(struct pipe_inode_info *pipe,
389 struct pipe_buffer *buf, struct splice_desc *sd) 506 struct pipe_buffer *buf, struct splice_desc *sd)
390{ 507{
391 struct file *file = sd->file; 508 struct file *file = sd->file;
392 loff_t pos = sd->pos; 509 loff_t pos = sd->pos;
393 unsigned int offset; 510 int ret, more;
394 ssize_t ret;
395 void *ptr;
396 int more;
397
398 /*
399 * Sub-optimal, but we are limited by the pipe ->map. We don't
400 * need a kmap'ed buffer here, we just want to make sure we
401 * have the page pinned if the pipe page originates from the
402 * page cache.
403 */
404 ptr = buf->ops->map(file, info, buf);
405 if (IS_ERR(ptr))
406 return PTR_ERR(ptr);
407 511
408 offset = pos & ~PAGE_CACHE_MASK; 512 ret = buf->ops->pin(pipe, buf);
409 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; 513 if (!ret) {
514 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
410 515
411 ret = file->f_op->sendpage(file, buf->page, offset, sd->len, &pos,more); 516 ret = file->f_op->sendpage(file, buf->page, buf->offset,
412 517 sd->len, &pos, more);
413 buf->ops->unmap(info, buf); 518 }
414 if (ret == sd->len)
415 return 0;
416 519
417 return -EIO; 520 return ret;
418} 521}
419 522
420/* 523/*
@@ -437,62 +540,80 @@ static int pipe_to_sendpage(struct pipe_inode_info *info,
437 * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create 540 * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create
438 * a new page in the output file page cache and fill/dirty that. 541 * a new page in the output file page cache and fill/dirty that.
439 */ 542 */
440static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, 543static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
441 struct splice_desc *sd) 544 struct splice_desc *sd)
442{ 545{
443 struct file *file = sd->file; 546 struct file *file = sd->file;
444 struct address_space *mapping = file->f_mapping; 547 struct address_space *mapping = file->f_mapping;
445 gfp_t gfp_mask = mapping_gfp_mask(mapping); 548 gfp_t gfp_mask = mapping_gfp_mask(mapping);
446 unsigned int offset; 549 unsigned int offset, this_len;
447 struct page *page; 550 struct page *page;
448 pgoff_t index; 551 pgoff_t index;
449 char *src;
450 int ret; 552 int ret;
451 553
452 /* 554 /*
453 * make sure the data in this buffer is uptodate 555 * make sure the data in this buffer is uptodate
454 */ 556 */
455 src = buf->ops->map(file, info, buf); 557 ret = buf->ops->pin(pipe, buf);
456 if (IS_ERR(src)) 558 if (unlikely(ret))
457 return PTR_ERR(src); 559 return ret;
458 560
459 index = sd->pos >> PAGE_CACHE_SHIFT; 561 index = sd->pos >> PAGE_CACHE_SHIFT;
460 offset = sd->pos & ~PAGE_CACHE_MASK; 562 offset = sd->pos & ~PAGE_CACHE_MASK;
461 563
564 this_len = sd->len;
565 if (this_len + offset > PAGE_CACHE_SIZE)
566 this_len = PAGE_CACHE_SIZE - offset;
567
462 /* 568 /*
463 * Reuse buf page, if SPLICE_F_MOVE is set. 569 * Reuse buf page, if SPLICE_F_MOVE is set and we are doing a full
570 * page.
464 */ 571 */
465 if (sd->flags & SPLICE_F_MOVE) { 572 if ((sd->flags & SPLICE_F_MOVE) && this_len == PAGE_CACHE_SIZE) {
466 /* 573 /*
467 * If steal succeeds, buf->page is now pruned from the vm 574 * If steal succeeds, buf->page is now pruned from the
468 * side (LRU and page cache) and we can reuse it. 575 * pagecache and we can reuse it. The page will also be
576 * locked on successful return.
469 */ 577 */
470 if (buf->ops->steal(info, buf)) 578 if (buf->ops->steal(pipe, buf))
471 goto find_page; 579 goto find_page;
472 580
473 /*
474 * this will also set the page locked
475 */
476 page = buf->page; 581 page = buf->page;
477 if (add_to_page_cache(page, mapping, index, gfp_mask)) 582 if (add_to_page_cache(page, mapping, index, gfp_mask)) {
583 unlock_page(page);
478 goto find_page; 584 goto find_page;
585 }
586
587 page_cache_get(page);
479 588
480 if (!(buf->flags & PIPE_BUF_FLAG_LRU)) 589 if (!(buf->flags & PIPE_BUF_FLAG_LRU))
481 lru_cache_add(page); 590 lru_cache_add(page);
482 } else { 591 } else {
483find_page: 592find_page:
484 ret = -ENOMEM; 593 page = find_lock_page(mapping, index);
485 page = find_or_create_page(mapping, index, gfp_mask); 594 if (!page) {
486 if (!page) 595 ret = -ENOMEM;
487 goto out_nomem; 596 page = page_cache_alloc_cold(mapping);
597 if (unlikely(!page))
598 goto out_nomem;
599
600 /*
601 * This will also lock the page
602 */
603 ret = add_to_page_cache_lru(page, mapping, index,
604 gfp_mask);
605 if (unlikely(ret))
606 goto out;
607 }
488 608
489 /* 609 /*
490 * If the page is uptodate, it is also locked. If it isn't 610 * We get here with the page locked. If the page is also
491 * uptodate, we can mark it uptodate if we are filling the 611 * uptodate, we don't need to do more. If it isn't, we
492 * full page. Otherwise we need to read it in first... 612 * may need to bring it in if we are not going to overwrite
613 * the full page.
493 */ 614 */
494 if (!PageUptodate(page)) { 615 if (!PageUptodate(page)) {
495 if (sd->len < PAGE_CACHE_SIZE) { 616 if (this_len < PAGE_CACHE_SIZE) {
496 ret = mapping->a_ops->readpage(file, page); 617 ret = mapping->a_ops->readpage(file, page);
497 if (unlikely(ret)) 618 if (unlikely(ret))
498 goto out; 619 goto out;
@@ -511,58 +632,72 @@ find_page:
511 ret = -EIO; 632 ret = -EIO;
512 goto out; 633 goto out;
513 } 634 }
514 } else { 635 } else
515 WARN_ON(!PageLocked(page));
516 SetPageUptodate(page); 636 SetPageUptodate(page);
517 }
518 } 637 }
519 } 638 }
520 639
521 ret = mapping->a_ops->prepare_write(file, page, 0, sd->len); 640 ret = mapping->a_ops->prepare_write(file, page, offset, offset+this_len);
522 if (ret == AOP_TRUNCATED_PAGE) { 641 if (unlikely(ret)) {
642 loff_t isize = i_size_read(mapping->host);
643
644 if (ret != AOP_TRUNCATED_PAGE)
645 unlock_page(page);
523 page_cache_release(page); 646 page_cache_release(page);
524 goto find_page; 647 if (ret == AOP_TRUNCATED_PAGE)
525 } else if (ret) 648 goto find_page;
649
650 /*
651 * prepare_write() may have instantiated a few blocks
652 * outside i_size. Trim these off again.
653 */
654 if (sd->pos + this_len > isize)
655 vmtruncate(mapping->host, isize);
656
526 goto out; 657 goto out;
658 }
527 659
528 if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { 660 if (buf->page != page) {
529 char *dst = kmap_atomic(page, KM_USER0); 661 /*
662 * Careful, ->map() uses KM_USER0!
663 */
664 char *src = buf->ops->map(pipe, buf, 1);
665 char *dst = kmap_atomic(page, KM_USER1);
530 666
531 memcpy(dst + offset, src + buf->offset, sd->len); 667 memcpy(dst + offset, src + buf->offset, this_len);
532 flush_dcache_page(page); 668 flush_dcache_page(page);
533 kunmap_atomic(dst, KM_USER0); 669 kunmap_atomic(dst, KM_USER1);
670 buf->ops->unmap(pipe, buf, src);
534 } 671 }
535 672
536 ret = mapping->a_ops->commit_write(file, page, 0, sd->len); 673 ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len);
537 if (ret == AOP_TRUNCATED_PAGE) { 674 if (!ret) {
675 /*
676 * Return the number of bytes written and mark page as
677 * accessed, we are now done!
678 */
679 ret = this_len;
680 mark_page_accessed(page);
681 balance_dirty_pages_ratelimited(mapping);
682 } else if (ret == AOP_TRUNCATED_PAGE) {
538 page_cache_release(page); 683 page_cache_release(page);
539 goto find_page; 684 goto find_page;
540 } else if (ret)
541 goto out;
542
543 mark_page_accessed(page);
544 balance_dirty_pages_ratelimited(mapping);
545out:
546 if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) {
547 page_cache_release(page);
548 unlock_page(page);
549 } 685 }
686out:
687 page_cache_release(page);
688 unlock_page(page);
550out_nomem: 689out_nomem:
551 buf->ops->unmap(info, buf);
552 return ret; 690 return ret;
553} 691}
554 692
555typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
556 struct splice_desc *);
557
558/* 693/*
559 * Pipe input worker. Most of this logic works like a regular pipe, the 694 * Pipe input worker. Most of this logic works like a regular pipe, the
560 * key here is the 'actor' worker passed in that actually moves the data 695 * key here is the 'actor' worker passed in that actually moves the data
561 * to the wanted destination. See pipe_to_file/pipe_to_sendpage above. 696 * to the wanted destination. See pipe_to_file/pipe_to_sendpage above.
562 */ 697 */
563static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out, 698ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
564 size_t len, unsigned int flags, 699 loff_t *ppos, size_t len, unsigned int flags,
565 splice_actor *actor) 700 splice_actor *actor)
566{ 701{
567 int ret, do_wakeup, err; 702 int ret, do_wakeup, err;
568 struct splice_desc sd; 703 struct splice_desc sd;
@@ -573,7 +708,7 @@ static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out,
573 sd.total_len = len; 708 sd.total_len = len;
574 sd.flags = flags; 709 sd.flags = flags;
575 sd.file = out; 710 sd.file = out;
576 sd.pos = out->f_pos; 711 sd.pos = *ppos;
577 712
578 if (pipe->inode) 713 if (pipe->inode)
579 mutex_lock(&pipe->inode->i_mutex); 714 mutex_lock(&pipe->inode->i_mutex);
@@ -588,16 +723,22 @@ static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out,
588 sd.len = sd.total_len; 723 sd.len = sd.total_len;
589 724
590 err = actor(pipe, buf, &sd); 725 err = actor(pipe, buf, &sd);
591 if (err) { 726 if (err <= 0) {
592 if (!ret && err != -ENODATA) 727 if (!ret && err != -ENODATA)
593 ret = err; 728 ret = err;
594 729
595 break; 730 break;
596 } 731 }
597 732
598 ret += sd.len; 733 ret += err;
599 buf->offset += sd.len; 734 buf->offset += err;
600 buf->len -= sd.len; 735 buf->len -= err;
736
737 sd.len -= err;
738 sd.pos += err;
739 sd.total_len -= err;
740 if (sd.len)
741 continue;
601 742
602 if (!buf->len) { 743 if (!buf->len) {
603 buf->ops = NULL; 744 buf->ops = NULL;
@@ -608,8 +749,6 @@ static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out,
608 do_wakeup = 1; 749 do_wakeup = 1;
609 } 750 }
610 751
611 sd.pos += sd.len;
612 sd.total_len -= sd.len;
613 if (!sd.total_len) 752 if (!sd.total_len)
614 break; 753 break;
615 } 754 }
@@ -656,9 +795,7 @@ static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out,
656 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 795 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
657 } 796 }
658 797
659 out->f_pos = sd.pos;
660 return ret; 798 return ret;
661
662} 799}
663 800
664/** 801/**
@@ -674,28 +811,32 @@ static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out,
674 */ 811 */
675ssize_t 812ssize_t
676generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, 813generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
677 size_t len, unsigned int flags) 814 loff_t *ppos, size_t len, unsigned int flags)
678{ 815{
679 struct address_space *mapping = out->f_mapping; 816 struct address_space *mapping = out->f_mapping;
680 ssize_t ret; 817 ssize_t ret;
681 818
682 ret = move_from_pipe(pipe, out, len, flags, pipe_to_file); 819 ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
683 820 if (ret > 0) {
684 /*
685 * If file or inode is SYNC and we actually wrote some data, sync it.
686 */
687 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(mapping->host))
688 && ret > 0) {
689 struct inode *inode = mapping->host; 821 struct inode *inode = mapping->host;
690 int err;
691 822
692 mutex_lock(&inode->i_mutex); 823 *ppos += ret;
693 err = generic_osync_inode(mapping->host, mapping,
694 OSYNC_METADATA|OSYNC_DATA);
695 mutex_unlock(&inode->i_mutex);
696 824
697 if (err) 825 /*
698 ret = err; 826 * If file or inode is SYNC and we actually wrote some data,
827 * sync it.
828 */
829 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
830 int err;
831
832 mutex_lock(&inode->i_mutex);
833 err = generic_osync_inode(inode, mapping,
834 OSYNC_METADATA|OSYNC_DATA);
835 mutex_unlock(&inode->i_mutex);
836
837 if (err)
838 ret = err;
839 }
699 } 840 }
700 841
701 return ret; 842 return ret;
@@ -715,9 +856,9 @@ EXPORT_SYMBOL(generic_file_splice_write);
715 * 856 *
716 */ 857 */
717ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, 858ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
718 size_t len, unsigned int flags) 859 loff_t *ppos, size_t len, unsigned int flags)
719{ 860{
720 return move_from_pipe(pipe, out, len, flags, pipe_to_sendpage); 861 return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage);
721} 862}
722 863
723EXPORT_SYMBOL(generic_splice_sendpage); 864EXPORT_SYMBOL(generic_splice_sendpage);
@@ -726,9 +867,8 @@ EXPORT_SYMBOL(generic_splice_sendpage);
726 * Attempt to initiate a splice from pipe to file. 867 * Attempt to initiate a splice from pipe to file.
727 */ 868 */
728static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, 869static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
729 size_t len, unsigned int flags) 870 loff_t *ppos, size_t len, unsigned int flags)
730{ 871{
731 loff_t pos;
732 int ret; 872 int ret;
733 873
734 if (unlikely(!out->f_op || !out->f_op->splice_write)) 874 if (unlikely(!out->f_op || !out->f_op->splice_write))
@@ -737,22 +877,21 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
737 if (unlikely(!(out->f_mode & FMODE_WRITE))) 877 if (unlikely(!(out->f_mode & FMODE_WRITE)))
738 return -EBADF; 878 return -EBADF;
739 879
740 pos = out->f_pos; 880 ret = rw_verify_area(WRITE, out, ppos, len);
741
742 ret = rw_verify_area(WRITE, out, &pos, len);
743 if (unlikely(ret < 0)) 881 if (unlikely(ret < 0))
744 return ret; 882 return ret;
745 883
746 return out->f_op->splice_write(pipe, out, len, flags); 884 return out->f_op->splice_write(pipe, out, ppos, len, flags);
747} 885}
748 886
749/* 887/*
750 * Attempt to initiate a splice from a file to a pipe. 888 * Attempt to initiate a splice from a file to a pipe.
751 */ 889 */
752static long do_splice_to(struct file *in, struct pipe_inode_info *pipe, 890static long do_splice_to(struct file *in, loff_t *ppos,
753 size_t len, unsigned int flags) 891 struct pipe_inode_info *pipe, size_t len,
892 unsigned int flags)
754{ 893{
755 loff_t pos, isize, left; 894 loff_t isize, left;
756 int ret; 895 int ret;
757 896
758 if (unlikely(!in->f_op || !in->f_op->splice_read)) 897 if (unlikely(!in->f_op || !in->f_op->splice_read))
@@ -761,28 +900,27 @@ static long do_splice_to(struct file *in, struct pipe_inode_info *pipe,
761 if (unlikely(!(in->f_mode & FMODE_READ))) 900 if (unlikely(!(in->f_mode & FMODE_READ)))
762 return -EBADF; 901 return -EBADF;
763 902
764 pos = in->f_pos; 903 ret = rw_verify_area(READ, in, ppos, len);
765
766 ret = rw_verify_area(READ, in, &pos, len);
767 if (unlikely(ret < 0)) 904 if (unlikely(ret < 0))
768 return ret; 905 return ret;
769 906
770 isize = i_size_read(in->f_mapping->host); 907 isize = i_size_read(in->f_mapping->host);
771 if (unlikely(in->f_pos >= isize)) 908 if (unlikely(*ppos >= isize))
772 return 0; 909 return 0;
773 910
774 left = isize - in->f_pos; 911 left = isize - *ppos;
775 if (unlikely(left < len)) 912 if (unlikely(left < len))
776 len = left; 913 len = left;
777 914
778 return in->f_op->splice_read(in, pipe, len, flags); 915 return in->f_op->splice_read(in, ppos, pipe, len, flags);
779} 916}
780 917
781long do_splice_direct(struct file *in, struct file *out, size_t len, 918long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
782 unsigned int flags) 919 size_t len, unsigned int flags)
783{ 920{
784 struct pipe_inode_info *pipe; 921 struct pipe_inode_info *pipe;
785 long ret, bytes; 922 long ret, bytes;
923 loff_t out_off;
786 umode_t i_mode; 924 umode_t i_mode;
787 int i; 925 int i;
788 926
@@ -807,7 +945,7 @@ long do_splice_direct(struct file *in, struct file *out, size_t len,
807 945
808 /* 946 /*
809 * We don't have an immediate reader, but we'll read the stuff 947 * We don't have an immediate reader, but we'll read the stuff
810 * out of the pipe right after the move_to_pipe(). So set 948 * out of the pipe right after the splice_to_pipe(). So set
811 * PIPE_READERS appropriately. 949 * PIPE_READERS appropriately.
812 */ 950 */
813 pipe->readers = 1; 951 pipe->readers = 1;
@@ -820,6 +958,7 @@ long do_splice_direct(struct file *in, struct file *out, size_t len,
820 */ 958 */
821 ret = 0; 959 ret = 0;
822 bytes = 0; 960 bytes = 0;
961 out_off = 0;
823 962
824 while (len) { 963 while (len) {
825 size_t read_len, max_read_len; 964 size_t read_len, max_read_len;
@@ -829,7 +968,7 @@ long do_splice_direct(struct file *in, struct file *out, size_t len,
829 */ 968 */
830 max_read_len = min(len, (size_t)(PIPE_BUFFERS*PAGE_SIZE)); 969 max_read_len = min(len, (size_t)(PIPE_BUFFERS*PAGE_SIZE));
831 970
832 ret = do_splice_to(in, pipe, max_read_len, flags); 971 ret = do_splice_to(in, ppos, pipe, max_read_len, flags);
833 if (unlikely(ret < 0)) 972 if (unlikely(ret < 0))
834 goto out_release; 973 goto out_release;
835 974
@@ -840,7 +979,7 @@ long do_splice_direct(struct file *in, struct file *out, size_t len,
840 * must not do the output in nonblocking mode as then we 979 * must not do the output in nonblocking mode as then we
841 * could get stuck data in the internal pipe: 980 * could get stuck data in the internal pipe:
842 */ 981 */
843 ret = do_splice_from(pipe, out, read_len, 982 ret = do_splice_from(pipe, out, &out_off, read_len,
844 flags & ~SPLICE_F_NONBLOCK); 983 flags & ~SPLICE_F_NONBLOCK);
845 if (unlikely(ret < 0)) 984 if (unlikely(ret < 0))
846 goto out_release; 985 goto out_release;
@@ -898,6 +1037,8 @@ static long do_splice(struct file *in, loff_t __user *off_in,
898 size_t len, unsigned int flags) 1037 size_t len, unsigned int flags)
899{ 1038{
900 struct pipe_inode_info *pipe; 1039 struct pipe_inode_info *pipe;
1040 loff_t offset, *off;
1041 long ret;
901 1042
902 pipe = in->f_dentry->d_inode->i_pipe; 1043 pipe = in->f_dentry->d_inode->i_pipe;
903 if (pipe) { 1044 if (pipe) {
@@ -906,12 +1047,18 @@ static long do_splice(struct file *in, loff_t __user *off_in,
906 if (off_out) { 1047 if (off_out) {
907 if (out->f_op->llseek == no_llseek) 1048 if (out->f_op->llseek == no_llseek)
908 return -EINVAL; 1049 return -EINVAL;
909 if (copy_from_user(&out->f_pos, off_out, 1050 if (copy_from_user(&offset, off_out, sizeof(loff_t)))
910 sizeof(loff_t)))
911 return -EFAULT; 1051 return -EFAULT;
912 } 1052 off = &offset;
1053 } else
1054 off = &out->f_pos;
913 1055
914 return do_splice_from(pipe, out, len, flags); 1056 ret = do_splice_from(pipe, out, off, len, flags);
1057
1058 if (off_out && copy_to_user(off_out, off, sizeof(loff_t)))
1059 ret = -EFAULT;
1060
1061 return ret;
915 } 1062 }
916 1063
917 pipe = out->f_dentry->d_inode->i_pipe; 1064 pipe = out->f_dentry->d_inode->i_pipe;
@@ -921,16 +1068,201 @@ static long do_splice(struct file *in, loff_t __user *off_in,
921 if (off_in) { 1068 if (off_in) {
922 if (in->f_op->llseek == no_llseek) 1069 if (in->f_op->llseek == no_llseek)
923 return -EINVAL; 1070 return -EINVAL;
924 if (copy_from_user(&in->f_pos, off_in, sizeof(loff_t))) 1071 if (copy_from_user(&offset, off_in, sizeof(loff_t)))
925 return -EFAULT; 1072 return -EFAULT;
926 } 1073 off = &offset;
1074 } else
1075 off = &in->f_pos;
927 1076
928 return do_splice_to(in, pipe, len, flags); 1077 ret = do_splice_to(in, off, pipe, len, flags);
1078
1079 if (off_in && copy_to_user(off_in, off, sizeof(loff_t)))
1080 ret = -EFAULT;
1081
1082 return ret;
929 } 1083 }
930 1084
931 return -EINVAL; 1085 return -EINVAL;
932} 1086}
933 1087
1088/*
1089 * Map an iov into an array of pages and offset/length tupples. With the
1090 * partial_page structure, we can map several non-contiguous ranges into
1091 * our ones pages[] map instead of splitting that operation into pieces.
1092 * Could easily be exported as a generic helper for other users, in which
1093 * case one would probably want to add a 'max_nr_pages' parameter as well.
1094 */
1095static int get_iovec_page_array(const struct iovec __user *iov,
1096 unsigned int nr_vecs, struct page **pages,
1097 struct partial_page *partial, int aligned)
1098{
1099 int buffers = 0, error = 0;
1100
1101 /*
1102 * It's ok to take the mmap_sem for reading, even
1103 * across a "get_user()".
1104 */
1105 down_read(&current->mm->mmap_sem);
1106
1107 while (nr_vecs) {
1108 unsigned long off, npages;
1109 void __user *base;
1110 size_t len;
1111 int i;
1112
1113 /*
1114 * Get user address base and length for this iovec.
1115 */
1116 error = get_user(base, &iov->iov_base);
1117 if (unlikely(error))
1118 break;
1119 error = get_user(len, &iov->iov_len);
1120 if (unlikely(error))
1121 break;
1122
1123 /*
1124 * Sanity check this iovec. 0 read succeeds.
1125 */
1126 if (unlikely(!len))
1127 break;
1128 error = -EFAULT;
1129 if (unlikely(!base))
1130 break;
1131
1132 /*
1133 * Get this base offset and number of pages, then map
1134 * in the user pages.
1135 */
1136 off = (unsigned long) base & ~PAGE_MASK;
1137
1138 /*
1139 * If asked for alignment, the offset must be zero and the
1140 * length a multiple of the PAGE_SIZE.
1141 */
1142 error = -EINVAL;
1143 if (aligned && (off || len & ~PAGE_MASK))
1144 break;
1145
1146 npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
1147 if (npages > PIPE_BUFFERS - buffers)
1148 npages = PIPE_BUFFERS - buffers;
1149
1150 error = get_user_pages(current, current->mm,
1151 (unsigned long) base, npages, 0, 0,
1152 &pages[buffers], NULL);
1153
1154 if (unlikely(error <= 0))
1155 break;
1156
1157 /*
1158 * Fill this contiguous range into the partial page map.
1159 */
1160 for (i = 0; i < error; i++) {
1161 const int plen = min_t(size_t, len, PAGE_SIZE - off);
1162
1163 partial[buffers].offset = off;
1164 partial[buffers].len = plen;
1165
1166 off = 0;
1167 len -= plen;
1168 buffers++;
1169 }
1170
1171 /*
1172 * We didn't complete this iov, stop here since it probably
1173 * means we have to move some of this into a pipe to
1174 * be able to continue.
1175 */
1176 if (len)
1177 break;
1178
1179 /*
1180 * Don't continue if we mapped fewer pages than we asked for,
1181 * or if we mapped the max number of pages that we have
1182 * room for.
1183 */
1184 if (error < npages || buffers == PIPE_BUFFERS)
1185 break;
1186
1187 nr_vecs--;
1188 iov++;
1189 }
1190
1191 up_read(&current->mm->mmap_sem);
1192
1193 if (buffers)
1194 return buffers;
1195
1196 return error;
1197}
1198
1199/*
1200 * vmsplice splices a user address range into a pipe. It can be thought of
1201 * as splice-from-memory, where the regular splice is splice-from-file (or
1202 * to file). In both cases the output is a pipe, naturally.
1203 *
1204 * Note that vmsplice only supports splicing _from_ user memory to a pipe,
1205 * not the other way around. Splicing from user memory is a simple operation
1206 * that can be supported without any funky alignment restrictions or nasty
1207 * vm tricks. We simply map in the user memory and fill them into a pipe.
1208 * The reverse isn't quite as easy, though. There are two possible solutions
1209 * for that:
1210 *
1211 * - memcpy() the data internally, at which point we might as well just
1212 * do a regular read() on the buffer anyway.
1213 * - Lots of nasty vm tricks, that are neither fast nor flexible (it
1214 * has restriction limitations on both ends of the pipe).
1215 *
1216 * Alas, it isn't here.
1217 *
1218 */
1219static long do_vmsplice(struct file *file, const struct iovec __user *iov,
1220 unsigned long nr_segs, unsigned int flags)
1221{
1222 struct pipe_inode_info *pipe = file->f_dentry->d_inode->i_pipe;
1223 struct page *pages[PIPE_BUFFERS];
1224 struct partial_page partial[PIPE_BUFFERS];
1225 struct splice_pipe_desc spd = {
1226 .pages = pages,
1227 .partial = partial,
1228 .flags = flags,
1229 .ops = &user_page_pipe_buf_ops,
1230 };
1231
1232 if (unlikely(!pipe))
1233 return -EBADF;
1234 if (unlikely(nr_segs > UIO_MAXIOV))
1235 return -EINVAL;
1236 else if (unlikely(!nr_segs))
1237 return 0;
1238
1239 spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial,
1240 flags & SPLICE_F_GIFT);
1241 if (spd.nr_pages <= 0)
1242 return spd.nr_pages;
1243
1244 return splice_to_pipe(pipe, &spd);
1245}
1246
1247asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov,
1248 unsigned long nr_segs, unsigned int flags)
1249{
1250 struct file *file;
1251 long error;
1252 int fput;
1253
1254 error = -EBADF;
1255 file = fget_light(fd, &fput);
1256 if (file) {
1257 if (file->f_mode & FMODE_WRITE)
1258 error = do_vmsplice(file, iov, nr_segs, flags);
1259
1260 fput_light(file, fput);
1261 }
1262
1263 return error;
1264}
1265
934asmlinkage long sys_splice(int fd_in, loff_t __user *off_in, 1266asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
935 int fd_out, loff_t __user *off_out, 1267 int fd_out, loff_t __user *off_out,
936 size_t len, unsigned int flags) 1268 size_t len, unsigned int flags)
@@ -961,3 +1293,198 @@ asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
961 1293
962 return error; 1294 return error;
963} 1295}
1296
1297/*
1298 * Link contents of ipipe to opipe.
1299 */
1300static int link_pipe(struct pipe_inode_info *ipipe,
1301 struct pipe_inode_info *opipe,
1302 size_t len, unsigned int flags)
1303{
1304 struct pipe_buffer *ibuf, *obuf;
1305 int ret, do_wakeup, i, ipipe_first;
1306
1307 ret = do_wakeup = ipipe_first = 0;
1308
1309 /*
1310 * Potential ABBA deadlock, work around it by ordering lock
1311 * grabbing by inode address. Otherwise two different processes
1312 * could deadlock (one doing tee from A -> B, the other from B -> A).
1313 */
1314 if (ipipe->inode < opipe->inode) {
1315 ipipe_first = 1;
1316 mutex_lock(&ipipe->inode->i_mutex);
1317 mutex_lock(&opipe->inode->i_mutex);
1318 } else {
1319 mutex_lock(&opipe->inode->i_mutex);
1320 mutex_lock(&ipipe->inode->i_mutex);
1321 }
1322
1323 for (i = 0;; i++) {
1324 if (!opipe->readers) {
1325 send_sig(SIGPIPE, current, 0);
1326 if (!ret)
1327 ret = -EPIPE;
1328 break;
1329 }
1330 if (ipipe->nrbufs - i) {
1331 ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1));
1332
1333 /*
1334 * If we have room, fill this buffer
1335 */
1336 if (opipe->nrbufs < PIPE_BUFFERS) {
1337 int nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1);
1338
1339 /*
1340 * Get a reference to this pipe buffer,
1341 * so we can copy the contents over.
1342 */
1343 ibuf->ops->get(ipipe, ibuf);
1344
1345 obuf = opipe->bufs + nbuf;
1346 *obuf = *ibuf;
1347
1348 /*
1349 * Don't inherit the gift flag, we need to
1350 * prevent multiple steals of this page.
1351 */
1352 obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
1353
1354 if (obuf->len > len)
1355 obuf->len = len;
1356
1357 opipe->nrbufs++;
1358 do_wakeup = 1;
1359 ret += obuf->len;
1360 len -= obuf->len;
1361
1362 if (!len)
1363 break;
1364 if (opipe->nrbufs < PIPE_BUFFERS)
1365 continue;
1366 }
1367
1368 /*
1369 * We have input available, but no output room.
1370 * If we already copied data, return that. If we
1371 * need to drop the opipe lock, it must be ordered
1372 * last to avoid deadlocks.
1373 */
1374 if ((flags & SPLICE_F_NONBLOCK) || !ipipe_first) {
1375 if (!ret)
1376 ret = -EAGAIN;
1377 break;
1378 }
1379 if (signal_pending(current)) {
1380 if (!ret)
1381 ret = -ERESTARTSYS;
1382 break;
1383 }
1384 if (do_wakeup) {
1385 smp_mb();
1386 if (waitqueue_active(&opipe->wait))
1387 wake_up_interruptible(&opipe->wait);
1388 kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
1389 do_wakeup = 0;
1390 }
1391
1392 opipe->waiting_writers++;
1393 pipe_wait(opipe);
1394 opipe->waiting_writers--;
1395 continue;
1396 }
1397
1398 /*
1399 * No input buffers, do the usual checks for available
1400 * writers and blocking and wait if necessary
1401 */
1402 if (!ipipe->writers)
1403 break;
1404 if (!ipipe->waiting_writers) {
1405 if (ret)
1406 break;
1407 }
1408 /*
1409 * pipe_wait() drops the ipipe mutex. To avoid deadlocks
1410 * with another process, we can only safely do that if
1411 * the ipipe lock is ordered last.
1412 */
1413 if ((flags & SPLICE_F_NONBLOCK) || ipipe_first) {
1414 if (!ret)
1415 ret = -EAGAIN;
1416 break;
1417 }
1418 if (signal_pending(current)) {
1419 if (!ret)
1420 ret = -ERESTARTSYS;
1421 break;
1422 }
1423
1424 if (waitqueue_active(&ipipe->wait))
1425 wake_up_interruptible_sync(&ipipe->wait);
1426 kill_fasync(&ipipe->fasync_writers, SIGIO, POLL_OUT);
1427
1428 pipe_wait(ipipe);
1429 }
1430
1431 mutex_unlock(&ipipe->inode->i_mutex);
1432 mutex_unlock(&opipe->inode->i_mutex);
1433
1434 if (do_wakeup) {
1435 smp_mb();
1436 if (waitqueue_active(&opipe->wait))
1437 wake_up_interruptible(&opipe->wait);
1438 kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
1439 }
1440
1441 return ret;
1442}
1443
1444/*
1445 * This is a tee(1) implementation that works on pipes. It doesn't copy
1446 * any data, it simply references the 'in' pages on the 'out' pipe.
1447 * The 'flags' used are the SPLICE_F_* variants, currently the only
1448 * applicable one is SPLICE_F_NONBLOCK.
1449 */
1450static long do_tee(struct file *in, struct file *out, size_t len,
1451 unsigned int flags)
1452{
1453 struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe;
1454 struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe;
1455
1456 /*
1457 * Link ipipe to the two output pipes, consuming as we go along.
1458 */
1459 if (ipipe && opipe)
1460 return link_pipe(ipipe, opipe, len, flags);
1461
1462 return -EINVAL;
1463}
1464
1465asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags)
1466{
1467 struct file *in;
1468 int error, fput_in;
1469
1470 if (unlikely(!len))
1471 return 0;
1472
1473 error = -EBADF;
1474 in = fget_light(fdin, &fput_in);
1475 if (in) {
1476 if (in->f_mode & FMODE_READ) {
1477 int fput_out;
1478 struct file *out = fget_light(fdout, &fput_out);
1479
1480 if (out) {
1481 if (out->f_mode & FMODE_WRITE)
1482 error = do_tee(in, out, len, flags);
1483 fput_light(out, fput_out);
1484 }
1485 }
1486 fput_light(in, fput_in);
1487 }
1488
1489 return error;
1490}
diff --git a/fs/stat.c b/fs/stat.c
index 9948cc1685a4..0f282face322 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -261,7 +261,7 @@ asmlinkage long sys_newlstat(char __user *filename, struct stat __user *statbuf)
261 return error; 261 return error;
262} 262}
263 263
264#ifndef __ARCH_WANT_STAT64 264#if !defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_SYS_NEWFSTATAT)
265asmlinkage long sys_newfstatat(int dfd, char __user *filename, 265asmlinkage long sys_newfstatat(int dfd, char __user *filename,
266 struct stat __user *statbuf, int flag) 266 struct stat __user *statbuf, int flag)
267{ 267{
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 6cfdc9a87772..610b5bdbe75b 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -43,6 +43,7 @@ static struct sysfs_dirent * sysfs_new_dirent(struct sysfs_dirent * parent_sd,
43 43
44 memset(sd, 0, sizeof(*sd)); 44 memset(sd, 0, sizeof(*sd));
45 atomic_set(&sd->s_count, 1); 45 atomic_set(&sd->s_count, 1);
46 atomic_set(&sd->s_event, 0);
46 INIT_LIST_HEAD(&sd->s_children); 47 INIT_LIST_HEAD(&sd->s_children);
47 list_add(&sd->s_sibling, &parent_sd->s_children); 48 list_add(&sd->s_sibling, &parent_sd->s_children);
48 sd->s_element = element; 49 sd->s_element = element;
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index f1cb1ddde511..cf3786625bfa 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -6,6 +6,7 @@
6#include <linux/fsnotify.h> 6#include <linux/fsnotify.h>
7#include <linux/kobject.h> 7#include <linux/kobject.h>
8#include <linux/namei.h> 8#include <linux/namei.h>
9#include <linux/poll.h>
9#include <asm/uaccess.h> 10#include <asm/uaccess.h>
10#include <asm/semaphore.h> 11#include <asm/semaphore.h>
11 12
@@ -57,6 +58,7 @@ struct sysfs_buffer {
57 struct sysfs_ops * ops; 58 struct sysfs_ops * ops;
58 struct semaphore sem; 59 struct semaphore sem;
59 int needs_read_fill; 60 int needs_read_fill;
61 int event;
60}; 62};
61 63
62 64
@@ -72,6 +74,7 @@ struct sysfs_buffer {
72 */ 74 */
73static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer) 75static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer)
74{ 76{
77 struct sysfs_dirent * sd = dentry->d_fsdata;
75 struct attribute * attr = to_attr(dentry); 78 struct attribute * attr = to_attr(dentry);
76 struct kobject * kobj = to_kobj(dentry->d_parent); 79 struct kobject * kobj = to_kobj(dentry->d_parent);
77 struct sysfs_ops * ops = buffer->ops; 80 struct sysfs_ops * ops = buffer->ops;
@@ -83,6 +86,7 @@ static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer
83 if (!buffer->page) 86 if (!buffer->page)
84 return -ENOMEM; 87 return -ENOMEM;
85 88
89 buffer->event = atomic_read(&sd->s_event);
86 count = ops->show(kobj,attr,buffer->page); 90 count = ops->show(kobj,attr,buffer->page);
87 buffer->needs_read_fill = 0; 91 buffer->needs_read_fill = 0;
88 BUG_ON(count > (ssize_t)PAGE_SIZE); 92 BUG_ON(count > (ssize_t)PAGE_SIZE);
@@ -348,12 +352,84 @@ static int sysfs_release(struct inode * inode, struct file * filp)
348 return 0; 352 return 0;
349} 353}
350 354
355/* Sysfs attribute files are pollable. The idea is that you read
356 * the content and then you use 'poll' or 'select' to wait for
357 * the content to change. When the content changes (assuming the
358 * manager for the kobject supports notification), poll will
359 * return POLLERR|POLLPRI, and select will return the fd whether
360 * it is waiting for read, write, or exceptions.
361 * Once poll/select indicates that the value has changed, you
362 * need to close and re-open the file, as simply seeking and reading
363 * again will not get new data, or reset the state of 'poll'.
364 * Reminder: this only works for attributes which actively support
365 * it, and it is not possible to test an attribute from userspace
366 * to see if it supports poll (Nether 'poll' or 'select' return
367 * an appropriate error code). When in doubt, set a suitable timeout value.
368 */
369static unsigned int sysfs_poll(struct file *filp, poll_table *wait)
370{
371 struct sysfs_buffer * buffer = filp->private_data;
372 struct kobject * kobj = to_kobj(filp->f_dentry->d_parent);
373 struct sysfs_dirent * sd = filp->f_dentry->d_fsdata;
374 int res = 0;
375
376 poll_wait(filp, &kobj->poll, wait);
377
378 if (buffer->event != atomic_read(&sd->s_event)) {
379 res = POLLERR|POLLPRI;
380 buffer->needs_read_fill = 1;
381 }
382
383 return res;
384}
385
386
387static struct dentry *step_down(struct dentry *dir, const char * name)
388{
389 struct dentry * de;
390
391 if (dir == NULL || dir->d_inode == NULL)
392 return NULL;
393
394 mutex_lock(&dir->d_inode->i_mutex);
395 de = lookup_one_len(name, dir, strlen(name));
396 mutex_unlock(&dir->d_inode->i_mutex);
397 dput(dir);
398 if (IS_ERR(de))
399 return NULL;
400 if (de->d_inode == NULL) {
401 dput(de);
402 return NULL;
403 }
404 return de;
405}
406
407void sysfs_notify(struct kobject * k, char *dir, char *attr)
408{
409 struct dentry *de = k->dentry;
410 if (de)
411 dget(de);
412 if (de && dir)
413 de = step_down(de, dir);
414 if (de && attr)
415 de = step_down(de, attr);
416 if (de) {
417 struct sysfs_dirent * sd = de->d_fsdata;
418 if (sd)
419 atomic_inc(&sd->s_event);
420 wake_up_interruptible(&k->poll);
421 dput(de);
422 }
423}
424EXPORT_SYMBOL_GPL(sysfs_notify);
425
351const struct file_operations sysfs_file_operations = { 426const struct file_operations sysfs_file_operations = {
352 .read = sysfs_read_file, 427 .read = sysfs_read_file,
353 .write = sysfs_write_file, 428 .write = sysfs_write_file,
354 .llseek = generic_file_llseek, 429 .llseek = generic_file_llseek,
355 .open = sysfs_open_file, 430 .open = sysfs_open_file,
356 .release = sysfs_release, 431 .release = sysfs_release,
432 .poll = sysfs_poll,
357}; 433};
358 434
359 435
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 32958a7c50e9..3651ffb5ec09 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -11,6 +11,7 @@ extern int sysfs_make_dirent(struct sysfs_dirent *, struct dentry *, void *,
11 11
12extern int sysfs_add_file(struct dentry *, const struct attribute *, int); 12extern int sysfs_add_file(struct dentry *, const struct attribute *, int);
13extern void sysfs_hash_and_remove(struct dentry * dir, const char * name); 13extern void sysfs_hash_and_remove(struct dentry * dir, const char * name);
14extern struct sysfs_dirent *sysfs_find(struct sysfs_dirent *dir, const char * name);
14 15
15extern int sysfs_create_subdir(struct kobject *, const char *, struct dentry **); 16extern int sysfs_create_subdir(struct kobject *, const char *, struct dentry **);
16extern void sysfs_remove_subdir(struct dentry *); 17extern void sysfs_remove_subdir(struct dentry *);
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 269721af02f3..c847416f6d10 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -252,6 +252,7 @@ xfs_file_sendfile_invis(
252STATIC ssize_t 252STATIC ssize_t
253xfs_file_splice_read( 253xfs_file_splice_read(
254 struct file *infilp, 254 struct file *infilp,
255 loff_t *ppos,
255 struct pipe_inode_info *pipe, 256 struct pipe_inode_info *pipe,
256 size_t len, 257 size_t len,
257 unsigned int flags) 258 unsigned int flags)
@@ -259,13 +260,14 @@ xfs_file_splice_read(
259 vnode_t *vp = vn_from_inode(infilp->f_dentry->d_inode); 260 vnode_t *vp = vn_from_inode(infilp->f_dentry->d_inode);
260 ssize_t rval; 261 ssize_t rval;
261 262
262 VOP_SPLICE_READ(vp, infilp, pipe, len, flags, 0, NULL, rval); 263 VOP_SPLICE_READ(vp, infilp, ppos, pipe, len, flags, 0, NULL, rval);
263 return rval; 264 return rval;
264} 265}
265 266
266STATIC ssize_t 267STATIC ssize_t
267xfs_file_splice_read_invis( 268xfs_file_splice_read_invis(
268 struct file *infilp, 269 struct file *infilp,
270 loff_t *ppos,
269 struct pipe_inode_info *pipe, 271 struct pipe_inode_info *pipe,
270 size_t len, 272 size_t len,
271 unsigned int flags) 273 unsigned int flags)
@@ -273,7 +275,7 @@ xfs_file_splice_read_invis(
273 vnode_t *vp = vn_from_inode(infilp->f_dentry->d_inode); 275 vnode_t *vp = vn_from_inode(infilp->f_dentry->d_inode);
274 ssize_t rval; 276 ssize_t rval;
275 277
276 VOP_SPLICE_READ(vp, infilp, pipe, len, flags, IO_INVIS, NULL, rval); 278 VOP_SPLICE_READ(vp, infilp, ppos, pipe, len, flags, IO_INVIS, NULL, rval);
277 return rval; 279 return rval;
278} 280}
279 281
@@ -281,13 +283,14 @@ STATIC ssize_t
281xfs_file_splice_write( 283xfs_file_splice_write(
282 struct pipe_inode_info *pipe, 284 struct pipe_inode_info *pipe,
283 struct file *outfilp, 285 struct file *outfilp,
286 loff_t *ppos,
284 size_t len, 287 size_t len,
285 unsigned int flags) 288 unsigned int flags)
286{ 289{
287 vnode_t *vp = vn_from_inode(outfilp->f_dentry->d_inode); 290 vnode_t *vp = vn_from_inode(outfilp->f_dentry->d_inode);
288 ssize_t rval; 291 ssize_t rval;
289 292
290 VOP_SPLICE_WRITE(vp, pipe, outfilp, len, flags, 0, NULL, rval); 293 VOP_SPLICE_WRITE(vp, pipe, outfilp, ppos, len, flags, 0, NULL, rval);
291 return rval; 294 return rval;
292} 295}
293 296
@@ -295,13 +298,14 @@ STATIC ssize_t
295xfs_file_splice_write_invis( 298xfs_file_splice_write_invis(
296 struct pipe_inode_info *pipe, 299 struct pipe_inode_info *pipe,
297 struct file *outfilp, 300 struct file *outfilp,
301 loff_t *ppos,
298 size_t len, 302 size_t len,
299 unsigned int flags) 303 unsigned int flags)
300{ 304{
301 vnode_t *vp = vn_from_inode(outfilp->f_dentry->d_inode); 305 vnode_t *vp = vn_from_inode(outfilp->f_dentry->d_inode);
302 ssize_t rval; 306 ssize_t rval;
303 307
304 VOP_SPLICE_WRITE(vp, pipe, outfilp, len, flags, IO_INVIS, NULL, rval); 308 VOP_SPLICE_WRITE(vp, pipe, outfilp, ppos, len, flags, IO_INVIS, NULL, rval);
305 return rval; 309 return rval;
306} 310}
307 311
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 74a52937f208..67efe3308980 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -338,6 +338,7 @@ ssize_t
338xfs_splice_read( 338xfs_splice_read(
339 bhv_desc_t *bdp, 339 bhv_desc_t *bdp,
340 struct file *infilp, 340 struct file *infilp,
341 loff_t *ppos,
341 struct pipe_inode_info *pipe, 342 struct pipe_inode_info *pipe,
342 size_t count, 343 size_t count,
343 int flags, 344 int flags,
@@ -360,7 +361,7 @@ xfs_splice_read(
360 int error; 361 int error;
361 362
362 error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), 363 error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp),
363 infilp->f_pos, count, 364 *ppos, count,
364 FILP_DELAY_FLAG(infilp), &locktype); 365 FILP_DELAY_FLAG(infilp), &locktype);
365 if (error) { 366 if (error) {
366 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 367 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
@@ -368,8 +369,8 @@ xfs_splice_read(
368 } 369 }
369 } 370 }
370 xfs_rw_enter_trace(XFS_SPLICE_READ_ENTER, &ip->i_iocore, 371 xfs_rw_enter_trace(XFS_SPLICE_READ_ENTER, &ip->i_iocore,
371 pipe, count, infilp->f_pos, ioflags); 372 pipe, count, *ppos, ioflags);
372 ret = generic_file_splice_read(infilp, pipe, count, flags); 373 ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
373 if (ret > 0) 374 if (ret > 0)
374 XFS_STATS_ADD(xs_read_bytes, ret); 375 XFS_STATS_ADD(xs_read_bytes, ret);
375 376
@@ -382,6 +383,7 @@ xfs_splice_write(
382 bhv_desc_t *bdp, 383 bhv_desc_t *bdp,
383 struct pipe_inode_info *pipe, 384 struct pipe_inode_info *pipe,
384 struct file *outfilp, 385 struct file *outfilp,
386 loff_t *ppos,
385 size_t count, 387 size_t count,
386 int flags, 388 int flags,
387 int ioflags, 389 int ioflags,
@@ -403,7 +405,7 @@ xfs_splice_write(
403 int error; 405 int error;
404 406
405 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, BHV_TO_VNODE(bdp), 407 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, BHV_TO_VNODE(bdp),
406 outfilp->f_pos, count, 408 *ppos, count,
407 FILP_DELAY_FLAG(outfilp), &locktype); 409 FILP_DELAY_FLAG(outfilp), &locktype);
408 if (error) { 410 if (error) {
409 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 411 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
@@ -411,8 +413,8 @@ xfs_splice_write(
411 } 413 }
412 } 414 }
413 xfs_rw_enter_trace(XFS_SPLICE_WRITE_ENTER, &ip->i_iocore, 415 xfs_rw_enter_trace(XFS_SPLICE_WRITE_ENTER, &ip->i_iocore,
414 pipe, count, outfilp->f_pos, ioflags); 416 pipe, count, *ppos, ioflags);
415 ret = generic_file_splice_write(pipe, outfilp, count, flags); 417 ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
416 if (ret > 0) 418 if (ret > 0)
417 XFS_STATS_ADD(xs_write_bytes, ret); 419 XFS_STATS_ADD(xs_write_bytes, ret);
418 420
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index 55c689a86ad2..8f4539952350 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -93,11 +93,11 @@ extern ssize_t xfs_write(struct bhv_desc *, struct kiocb *,
93extern ssize_t xfs_sendfile(struct bhv_desc *, struct file *, 93extern ssize_t xfs_sendfile(struct bhv_desc *, struct file *,
94 loff_t *, int, size_t, read_actor_t, 94 loff_t *, int, size_t, read_actor_t,
95 void *, struct cred *); 95 void *, struct cred *);
96extern ssize_t xfs_splice_read(struct bhv_desc *, struct file *, 96extern ssize_t xfs_splice_read(struct bhv_desc *, struct file *, loff_t *,
97 struct pipe_inode_info *, size_t, int, int, 97 struct pipe_inode_info *, size_t, int, int,
98 struct cred *); 98 struct cred *);
99extern ssize_t xfs_splice_write(struct bhv_desc *, struct pipe_inode_info *, 99extern ssize_t xfs_splice_write(struct bhv_desc *, struct pipe_inode_info *,
100 struct file *, size_t, int, int, 100 struct file *, loff_t *, size_t, int, int,
101 struct cred *); 101 struct cred *);
102 102
103#endif /* __XFS_LRW_H__ */ 103#endif /* __XFS_LRW_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 88b09f186289..2a8e16c22353 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -173,11 +173,11 @@ typedef ssize_t (*vop_write_t)(bhv_desc_t *, struct kiocb *,
173typedef ssize_t (*vop_sendfile_t)(bhv_desc_t *, struct file *, 173typedef ssize_t (*vop_sendfile_t)(bhv_desc_t *, struct file *,
174 loff_t *, int, size_t, read_actor_t, 174 loff_t *, int, size_t, read_actor_t,
175 void *, struct cred *); 175 void *, struct cred *);
176typedef ssize_t (*vop_splice_read_t)(bhv_desc_t *, struct file *, 176typedef ssize_t (*vop_splice_read_t)(bhv_desc_t *, struct file *, loff_t *,
177 struct pipe_inode_info *, size_t, int, int, 177 struct pipe_inode_info *, size_t, int, int,
178 struct cred *); 178 struct cred *);
179typedef ssize_t (*vop_splice_write_t)(bhv_desc_t *, struct pipe_inode_info *, 179typedef ssize_t (*vop_splice_write_t)(bhv_desc_t *, struct pipe_inode_info *,
180 struct file *, size_t, int, int, 180 struct file *, loff_t *, size_t, int, int,
181 struct cred *); 181 struct cred *);
182typedef int (*vop_ioctl_t)(bhv_desc_t *, struct inode *, struct file *, 182typedef int (*vop_ioctl_t)(bhv_desc_t *, struct inode *, struct file *,
183 int, unsigned int, void __user *); 183 int, unsigned int, void __user *);
@@ -284,10 +284,10 @@ typedef struct vnodeops {
284 rv = _VOP_(vop_write, vp)((vp)->v_fbhv,file,iov,segs,offset,ioflags,cr) 284 rv = _VOP_(vop_write, vp)((vp)->v_fbhv,file,iov,segs,offset,ioflags,cr)
285#define VOP_SENDFILE(vp,f,off,ioflags,cnt,act,targ,cr,rv) \ 285#define VOP_SENDFILE(vp,f,off,ioflags,cnt,act,targ,cr,rv) \
286 rv = _VOP_(vop_sendfile, vp)((vp)->v_fbhv,f,off,ioflags,cnt,act,targ,cr) 286 rv = _VOP_(vop_sendfile, vp)((vp)->v_fbhv,f,off,ioflags,cnt,act,targ,cr)
287#define VOP_SPLICE_READ(vp,f,pipe,cnt,fl,iofl,cr,rv) \ 287#define VOP_SPLICE_READ(vp,f,o,pipe,cnt,fl,iofl,cr,rv) \
288 rv = _VOP_(vop_splice_read, vp)((vp)->v_fbhv,f,pipe,cnt,fl,iofl,cr) 288 rv = _VOP_(vop_splice_read, vp)((vp)->v_fbhv,f,o,pipe,cnt,fl,iofl,cr)
289#define VOP_SPLICE_WRITE(vp,f,pipe,cnt,fl,iofl,cr,rv) \ 289#define VOP_SPLICE_WRITE(vp,f,o,pipe,cnt,fl,iofl,cr,rv) \
290 rv = _VOP_(vop_splice_write, vp)((vp)->v_fbhv,f,pipe,cnt,fl,iofl,cr) 290 rv = _VOP_(vop_splice_write, vp)((vp)->v_fbhv,f,o,pipe,cnt,fl,iofl,cr)
291#define VOP_BMAP(vp,of,sz,rw,b,n,rv) \ 291#define VOP_BMAP(vp,of,sz,rw,b,n,rv) \
292 rv = _VOP_(vop_bmap, vp)((vp)->v_fbhv,of,sz,rw,b,n) 292 rv = _VOP_(vop_bmap, vp)((vp)->v_fbhv,of,sz,rw,b,n)
293#define VOP_OPEN(vp, cr, rv) \ 293#define VOP_OPEN(vp, cr, rv) \
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 64ee07db0d5e..8558226281c4 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1942,8 +1942,10 @@ xfs_alloc_fix_freelist(
1942 /* 1942 /*
1943 * Allocate as many blocks as possible at once. 1943 * Allocate as many blocks as possible at once.
1944 */ 1944 */
1945 if ((error = xfs_alloc_ag_vextent(&targs))) 1945 if ((error = xfs_alloc_ag_vextent(&targs))) {
1946 xfs_trans_brelse(tp, agflbp);
1946 return error; 1947 return error;
1948 }
1947 /* 1949 /*
1948 * Stop if we run out. Won't happen if callers are obeying 1950 * Stop if we run out. Won't happen if callers are obeying
1949 * the restrictions correctly. Can happen for free calls 1951 * the restrictions correctly. Can happen for free calls
@@ -1960,6 +1962,7 @@ xfs_alloc_fix_freelist(
1960 return error; 1962 return error;
1961 } 1963 }
1962 } 1964 }
1965 xfs_trans_brelse(tp, agflbp);
1963 args->agbp = agbp; 1966 args->agbp = agbp;
1964 return 0; 1967 return 0;
1965} 1968}
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index 81a05cfd77d2..1f148762eb28 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -316,6 +316,18 @@ xfs_rename(
316 } 316 }
317 } 317 }
318 318
319 /*
320 * If we are using project inheritance, we only allow renames
321 * into our tree when the project IDs are the same; else the
322 * tree quota mechanism would be circumvented.
323 */
324 if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
325 (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) {
326 error = XFS_ERROR(EXDEV);
327 xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED);
328 goto rele_return;
329 }
330
319 new_parent = (src_dp != target_dp); 331 new_parent = (src_dp != target_dp);
320 src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR); 332 src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR);
321 333
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index f0e09ca14139..36ea1b2094f2 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -669,31 +669,22 @@ xfs_mntupdate(
669 xfs_mount_t *mp = XFS_BHVTOM(bdp); 669 xfs_mount_t *mp = XFS_BHVTOM(bdp);
670 int error; 670 int error;
671 671
672 if (args->flags & XFSMNT_BARRIER) 672 if (!(*flags & MS_RDONLY)) { /* rw/ro -> rw */
673 mp->m_flags |= XFS_MOUNT_BARRIER; 673 if (vfsp->vfs_flag & VFS_RDONLY)
674 else 674 vfsp->vfs_flag &= ~VFS_RDONLY;
675 mp->m_flags &= ~XFS_MOUNT_BARRIER; 675 if (args->flags & XFSMNT_BARRIER) {
676 676 mp->m_flags |= XFS_MOUNT_BARRIER;
677 if ((vfsp->vfs_flag & VFS_RDONLY) &&
678 !(*flags & MS_RDONLY)) {
679 vfsp->vfs_flag &= ~VFS_RDONLY;
680
681 if (args->flags & XFSMNT_BARRIER)
682 xfs_mountfs_check_barriers(mp); 677 xfs_mountfs_check_barriers(mp);
683 } 678 } else {
684 679 mp->m_flags &= ~XFS_MOUNT_BARRIER;
685 if (!(vfsp->vfs_flag & VFS_RDONLY) && 680 }
686 (*flags & MS_RDONLY)) { 681 } else if (!(vfsp->vfs_flag & VFS_RDONLY)) { /* rw -> ro */
687 VFS_SYNC(vfsp, SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR, NULL, error); 682 VFS_SYNC(vfsp, SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR, NULL, error);
688
689 xfs_quiesce_fs(mp); 683 xfs_quiesce_fs(mp);
690
691 /* Ok now write out an unmount record */
692 xfs_log_unmount_write(mp); 684 xfs_log_unmount_write(mp);
693 xfs_unmountfs_writesb(mp); 685 xfs_unmountfs_writesb(mp);
694 vfsp->vfs_flag |= VFS_RDONLY; 686 vfsp->vfs_flag |= VFS_RDONLY;
695 } 687 }
696
697 return 0; 688 return 0;
698} 689}
699 690
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index fa71b305ba5c..7027ae68ee38 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -2663,7 +2663,7 @@ xfs_link(
2663 */ 2663 */
2664 if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && 2664 if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
2665 (tdp->i_d.di_projid != sip->i_d.di_projid))) { 2665 (tdp->i_d.di_projid != sip->i_d.di_projid))) {
2666 error = XFS_ERROR(EPERM); 2666 error = XFS_ERROR(EXDEV);
2667 goto error_return; 2667 goto error_return;
2668 } 2668 }
2669 2669