aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/block/biodoc.txt113
-rw-r--r--Makefile2
-rw-r--r--arch/alpha/kernel/pci-noop.c2
-rw-r--r--arch/alpha/kernel/pci_iommu.c2
-rw-r--r--arch/arm/mm/consistent.c8
-rw-r--r--arch/frv/mb93090-mb00/pci-dma-nommu.c2
-rw-r--r--arch/frv/mb93090-mb00/pci-dma.c2
-rw-r--r--arch/frv/mm/dma-alloc.c2
-rw-r--r--arch/ia64/hp/common/hwsw_iommu.c2
-rw-r--r--arch/ia64/hp/common/sba_iommu.c2
-rw-r--r--arch/ia64/lib/swiotlb.c2
-rw-r--r--arch/ia64/sn/kernel/xpc.h2
-rw-r--r--arch/ia64/sn/pci/pci_dma.c2
-rw-r--r--arch/mips/mm/dma-coherent.c4
-rw-r--r--arch/mips/mm/dma-ip27.c4
-rw-r--r--arch/mips/mm/dma-ip32.c4
-rw-r--r--arch/mips/mm/dma-noncoherent.c4
-rw-r--r--arch/parisc/kernel/pci-dma.c6
-rw-r--r--arch/ppc/8xx_io/cs4218.h2
-rw-r--r--arch/ppc/8xx_io/cs4218_tdm.c4
-rw-r--r--arch/ppc/kernel/dma-mapping.c4
-rw-r--r--arch/ppc/mm/pgtable.c4
-rw-r--r--arch/sh/boards/renesas/rts7751r2d/mach.c2
-rw-r--r--arch/sh/cchips/voyagergx/consistent.c2
-rw-r--r--arch/sh/drivers/pci/dma-dreamcast.c2
-rw-r--r--arch/sh/mm/consistent.c2
-rw-r--r--arch/sparc64/solaris/socksys.c2
-rw-r--r--arch/sparc64/solaris/timod.c2
-rw-r--r--arch/um/kernel/mem.c2
-rw-r--r--arch/um/kernel/process_kern.c2
-rw-r--r--arch/x86_64/kernel/pci-gart.c4
-rw-r--r--arch/x86_64/kernel/pci-nommu.c2
-rw-r--r--arch/xtensa/kernel/pci-dma.c2
-rw-r--r--drivers/block/as-iosched.c327
-rw-r--r--drivers/block/cfq-iosched.c372
-rw-r--r--drivers/block/deadline-iosched.c125
-rw-r--r--drivers/block/elevator.c345
-rw-r--r--drivers/block/ll_rw_blk.c193
-rw-r--r--drivers/block/loop.c2
-rw-r--r--drivers/block/noop-iosched.c48
-rw-r--r--drivers/block/rd.c2
-rw-r--r--drivers/char/n_tty.c2
-rw-r--r--drivers/ieee1394/eth1394.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.h2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.h2
-rw-r--r--drivers/md/bitmap.c2
-rw-r--r--drivers/md/dm-crypt.c2
-rw-r--r--drivers/net/cassini.c4
-rw-r--r--drivers/net/lance.c4
-rw-r--r--drivers/net/myri_sbus.c2
-rw-r--r--drivers/net/myri_sbus.h2
-rw-r--r--drivers/net/sunbmac.c3
-rw-r--r--drivers/net/sunbmac.h2
-rw-r--r--drivers/parisc/ccio-dma.c2
-rw-r--r--drivers/parisc/sba_iommu.c2
-rw-r--r--drivers/s390/net/fsm.c2
-rw-r--r--drivers/s390/net/fsm.h2
-rw-r--r--drivers/scsi/eata.c2
-rw-r--r--drivers/scsi/hosts.c3
-rw-r--r--drivers/scsi/lpfc/lpfc_mem.c2
-rw-r--r--drivers/scsi/osst.c6
-rw-r--r--drivers/scsi/qla2xxx/qla_gbl.h4
-rw-r--r--drivers/scsi/qla2xxx/qla_init.c2
-rw-r--r--drivers/scsi/qla2xxx/qla_rscn.c2
-rw-r--r--drivers/scsi/scsi.c8
-rw-r--r--drivers/scsi/scsi_ioctl.c3
-rw-r--r--drivers/scsi/scsi_lib.c2
-rw-r--r--drivers/scsi/sg.c2
-rw-r--r--drivers/scsi/st.c6
-rw-r--r--drivers/usb/core/buffer.c2
-rw-r--r--drivers/usb/core/hcd.c2
-rw-r--r--drivers/usb/core/hcd.h8
-rw-r--r--drivers/usb/core/message.c2
-rw-r--r--drivers/usb/core/urb.c4
-rw-r--r--drivers/usb/core/usb.c2
-rw-r--r--drivers/usb/gadget/dummy_hcd.c8
-rw-r--r--drivers/usb/gadget/ether.c22
-rw-r--r--drivers/usb/gadget/goku_udc.c6
-rw-r--r--drivers/usb/gadget/lh7a40x_udc.c12
-rw-r--r--drivers/usb/gadget/net2280.c6
-rw-r--r--drivers/usb/gadget/omap_udc.c6
-rw-r--r--drivers/usb/gadget/pxa2xx_udc.c6
-rw-r--r--drivers/usb/gadget/serial.c16
-rw-r--r--drivers/usb/gadget/zero.c8
-rw-r--r--drivers/usb/host/ehci-hcd.c2
-rw-r--r--drivers/usb/host/ehci-mem.c6
-rw-r--r--drivers/usb/host/ehci-q.c6
-rw-r--r--drivers/usb/host/ehci-sched.c14
-rw-r--r--drivers/usb/host/isp116x-hcd.c2
-rw-r--r--drivers/usb/host/ohci-hcd.c2
-rw-r--r--drivers/usb/host/ohci-mem.c4
-rw-r--r--drivers/usb/host/sl811-hcd.c2
-rw-r--r--drivers/usb/host/uhci-q.c2
-rw-r--r--drivers/usb/misc/uss720.c6
-rw-r--r--drivers/usb/net/asix.c2
-rw-r--r--drivers/usb/net/gl620a.c2
-rw-r--r--drivers/usb/net/kaweth.c6
-rw-r--r--drivers/usb/net/net1080.c2
-rw-r--r--drivers/usb/net/rndis_host.c2
-rw-r--r--drivers/usb/net/usbnet.c2
-rw-r--r--drivers/usb/net/usbnet.h2
-rw-r--r--drivers/usb/net/zaurus.c2
-rw-r--r--drivers/usb/net/zd1201.c2
-rw-r--r--fs/afs/file.c4
-rw-r--r--fs/bio.c4
-rw-r--r--fs/buffer.c4
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/dquot.c2
-rw-r--r--fs/exec.c12
-rw-r--r--fs/ext3/inode.c2
-rw-r--r--fs/hfs/inode.c2
-rw-r--r--fs/hfsplus/inode.c2
-rw-r--r--fs/inode.c2
-rw-r--r--fs/jbd/journal.c2
-rw-r--r--fs/jbd/transaction.c2
-rw-r--r--fs/jfs/jfs_metapage.c4
-rw-r--r--fs/lockd/host.c4
-rw-r--r--fs/locks.c48
-rw-r--r--fs/mbcache.c6
-rw-r--r--fs/namei.c95
-rw-r--r--fs/nfs/delegation.c2
-rw-r--r--fs/nfs/delegation.h16
-rw-r--r--fs/nfs/dir.c67
-rw-r--r--fs/nfs/file.c31
-rw-r--r--fs/nfs/inode.c200
-rw-r--r--fs/nfs/nfs2xdr.c1
-rw-r--r--fs/nfs/nfs3proc.c92
-rw-r--r--fs/nfs/nfs3xdr.c1
-rw-r--r--fs/nfs/nfs4_fs.h53
-rw-r--r--fs/nfs/nfs4proc.c735
-rw-r--r--fs/nfs/nfs4state.c181
-rw-r--r--fs/nfs/nfs4xdr.c305
-rw-r--r--fs/nfs/proc.c44
-rw-r--r--fs/nfs/read.c1
-rw-r--r--fs/nfs/write.c2
-rw-r--r--fs/open.c79
-rw-r--r--fs/partitions/check.c2
-rw-r--r--fs/reiserfs/fix_node.c2
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/reiserfs/xattr.c2
-rw-r--r--fs/xfs/linux-2.6/kmem.c22
-rw-r--r--fs/xfs/linux-2.6/kmem.h18
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c8
-rw-r--r--include/asm-alpha/dma-mapping.h2
-rw-r--r--include/asm-arm/dma-mapping.h4
-rw-r--r--include/asm-cris/dma-mapping.h4
-rw-r--r--include/asm-frv/dma-mapping.h2
-rw-r--r--include/asm-frv/pci.h2
-rw-r--r--include/asm-generic/dma-mapping-broken.h2
-rw-r--r--include/asm-ia64/machvec.h2
-rw-r--r--include/asm-m32r/dma-mapping.h2
-rw-r--r--include/asm-mips/dma-mapping.h4
-rw-r--r--include/asm-parisc/dma-mapping.h8
-rw-r--r--include/asm-ppc/dma-mapping.h2
-rw-r--r--include/asm-sh/dma-mapping.h4
-rw-r--r--include/asm-sh/machvec.h2
-rw-r--r--include/asm-sh64/dma-mapping.h2
-rw-r--r--include/asm-sparc/dma-mapping.h2
-rw-r--r--include/asm-sparc64/dma-mapping.h2
-rw-r--r--include/asm-um/dma-mapping.h2
-rw-r--r--include/asm-um/page.h2
-rw-r--r--include/asm-x86_64/dma-mapping.h2
-rw-r--r--include/asm-x86_64/swiotlb.h2
-rw-r--r--include/asm-xtensa/dma-mapping.h2
-rw-r--r--include/linux/audit.h4
-rw-r--r--include/linux/bio.h2
-rw-r--r--include/linux/blkdev.h50
-rw-r--r--include/linux/buffer_head.h2
-rw-r--r--include/linux/elevator.h22
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/genhd.h2
-rw-r--r--include/linux/gfp.h39
-rw-r--r--include/linux/i2o.h4
-rw-r--r--include/linux/idr.h2
-rw-r--r--include/linux/jbd.h4
-rw-r--r--include/linux/kobject.h2
-rw-r--r--include/linux/loop.h2
-rw-r--r--include/linux/mbcache.h2
-rw-r--r--include/linux/mm.h2
-rw-r--r--include/linux/mmzone.h2
-rw-r--r--include/linux/namei.h8
-rw-r--r--include/linux/nfs_fs.h28
-rw-r--r--include/linux/nfs_xdr.h65
-rw-r--r--include/linux/pagemap.h13
-rw-r--r--include/linux/radix-tree.h2
-rw-r--r--include/linux/reiserfs_fs.h2
-rw-r--r--include/linux/security.h2
-rw-r--r--include/linux/skbuff.h3
-rw-r--r--include/linux/slab.h2
-rw-r--r--include/linux/sunrpc/auth.h7
-rw-r--r--include/linux/sunrpc/debug.h3
-rw-r--r--include/linux/sunrpc/gss_api.h27
-rw-r--r--include/linux/sunrpc/gss_err.h10
-rw-r--r--include/linux/sunrpc/gss_krb5.h27
-rw-r--r--include/linux/sunrpc/gss_spkm3.h4
-rw-r--r--include/linux/sunrpc/msg_prot.h25
-rw-r--r--include/linux/sunrpc/xdr.h6
-rw-r--r--include/linux/sunrpc/xprt.h227
-rw-r--r--include/linux/suspend.h2
-rw-r--r--include/linux/swap.h4
-rw-r--r--include/linux/textsearch.h4
-rw-r--r--include/linux/types.h9
-rw-r--r--include/linux/usb.h8
-rw-r--r--include/linux/usb_gadget.h12
-rw-r--r--include/net/dst.h1
-rw-r--r--include/net/sock.h2
-rw-r--r--include/scsi/scsi_cmnd.h2
-rw-r--r--include/scsi/scsi_request.h2
-rw-r--r--include/sound/memalloc.h2
-rw-r--r--kernel/audit.c6
-rw-r--r--kernel/auditsc.c2
-rw-r--r--kernel/kexec.c7
-rw-r--r--kernel/power/swsusp.c2
-rw-r--r--lib/idr.c2
-rw-r--r--lib/kobject.c2
-rw-r--r--lib/kobject_uevent.c4
-rw-r--r--lib/textsearch.c2
-rw-r--r--mm/filemap.c8
-rw-r--r--mm/highmem.c14
-rw-r--r--mm/mempolicy.c6
-rw-r--r--mm/mempool.c2
-rw-r--r--mm/page_alloc.c33
-rw-r--r--mm/shmem.c4
-rw-r--r--mm/slab.c8
-rw-r--r--mm/vmscan.c8
-rw-r--r--net/core/neighbour.c5
-rw-r--r--net/core/pktgen.c506
-rw-r--r--net/core/skbuff.c2
-rw-r--r--net/core/sock.c2
-rw-r--r--net/dccp/output.c2
-rw-r--r--net/decnet/af_decnet.c13
-rw-r--r--net/ipv4/devinet.c3
-rw-r--r--net/ipv4/fib_trie.c2
-rw-r--r--net/ipv4/icmp.c5
-rw-r--r--net/ipv4/ip_output.c5
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c132
-rw-r--r--net/ipv4/proc.c4
-rw-r--r--net/ipv6/icmp.c9
-rw-r--r--net/ipv6/proc.c4
-rw-r--r--net/netlink/af_netlink.c5
-rw-r--r--net/rose/rose_route.c2
-rw-r--r--net/sctp/proc.c4
-rw-r--r--net/sunrpc/Makefile2
-rw-r--r--net/sunrpc/auth.c1
-rw-r--r--net/sunrpc/auth_gss/Makefile2
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c187
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c260
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c41
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seal.c44
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_unseal.c39
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c363
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c29
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_mech.c21
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_seal.c4
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_unseal.c2
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c9
-rw-r--r--net/sunrpc/auth_null.c2
-rw-r--r--net/sunrpc/auth_unix.c2
-rw-r--r--net/sunrpc/clnt.c147
-rw-r--r--net/sunrpc/pmap_clnt.c12
-rw-r--r--net/sunrpc/rpc_pipe.c29
-rw-r--r--net/sunrpc/socklib.c175
-rw-r--r--net/sunrpc/sunrpc_syms.c1
-rw-r--r--net/sunrpc/svcsock.c3
-rw-r--r--net/sunrpc/sysctl.c32
-rw-r--r--net/sunrpc/xdr.c177
-rw-r--r--net/sunrpc/xprt.c1613
-rw-r--r--net/sunrpc/xprtsock.c1252
-rw-r--r--net/xfrm/xfrm_policy.c43
-rw-r--r--net/xfrm/xfrm_state.c6
-rw-r--r--security/dummy.c2
-rw-r--r--security/selinux/hooks.c4
-rw-r--r--sound/core/memalloc.c4
-rw-r--r--sound/core/seq/instr/ainstr_gf1.c5
-rw-r--r--sound/core/seq/instr/ainstr_iw.c4
-rw-r--r--sound/core/seq/instr/ainstr_simple.c3
-rw-r--r--sound/oss/dmasound/dmasound.h2
-rw-r--r--sound/oss/dmasound/dmasound_atari.c4
-rw-r--r--sound/oss/dmasound/dmasound_awacs.c4
-rw-r--r--sound/oss/dmasound/dmasound_paula.c4
-rw-r--r--sound/oss/dmasound/dmasound_q40.c4
-rw-r--r--sound/usb/usbmidi.c2
285 files changed, 5785 insertions, 4221 deletions
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 6dd274d7e1cf..2d65c2182161 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -906,9 +906,20 @@ Aside:
906 906
907 907
9084. The I/O scheduler 9084. The I/O scheduler
909I/O schedulers are now per queue. They should be runtime switchable and modular 909I/O scheduler, a.k.a. elevator, is implemented in two layers. Generic dispatch
910but aren't yet. Jens has most bits to do this, but the sysfs implementation is 910queue and specific I/O schedulers. Unless stated otherwise, elevator is used
911missing. 911to refer to both parts and I/O scheduler to specific I/O schedulers.
912
913Block layer implements generic dispatch queue in ll_rw_blk.c and elevator.c.
914The generic dispatch queue is responsible for properly ordering barrier
915requests, requeueing, handling non-fs requests and all other subtleties.
916
917Specific I/O schedulers are responsible for ordering normal filesystem
918requests. They can also choose to delay certain requests to improve
919throughput or whatever purpose. As the plural form indicates, there are
920multiple I/O schedulers. They can be built as modules but at least one should
921be built inside the kernel. Each queue can choose different one and can also
922change to another one dynamically.
912 923
913A block layer call to the i/o scheduler follows the convention elv_xxx(). This 924A block layer call to the i/o scheduler follows the convention elv_xxx(). This
914calls elevator_xxx_fn in the elevator switch (drivers/block/elevator.c). Oh, 925calls elevator_xxx_fn in the elevator switch (drivers/block/elevator.c). Oh,
@@ -921,44 +932,36 @@ keeping work.
921The functions an elevator may implement are: (* are mandatory) 932The functions an elevator may implement are: (* are mandatory)
922elevator_merge_fn called to query requests for merge with a bio 933elevator_merge_fn called to query requests for merge with a bio
923 934
924elevator_merge_req_fn " " " with another request 935elevator_merge_req_fn called when two requests get merged. the one
936 which gets merged into the other one will be
937 never seen by I/O scheduler again. IOW, after
938 being merged, the request is gone.
925 939
926elevator_merged_fn called when a request in the scheduler has been 940elevator_merged_fn called when a request in the scheduler has been
927 involved in a merge. It is used in the deadline 941 involved in a merge. It is used in the deadline
928 scheduler for example, to reposition the request 942 scheduler for example, to reposition the request
929 if its sorting order has changed. 943 if its sorting order has changed.
930 944
931*elevator_next_req_fn returns the next scheduled request, or NULL 945elevator_dispatch_fn fills the dispatch queue with ready requests.
932 if there are none (or none are ready). 946 I/O schedulers are free to postpone requests by
947 not filling the dispatch queue unless @force
948 is non-zero. Once dispatched, I/O schedulers
949 are not allowed to manipulate the requests -
950 they belong to generic dispatch queue.
933 951
934*elevator_add_req_fn called to add a new request into the scheduler 952elevator_add_req_fn called to add a new request into the scheduler
935 953
936elevator_queue_empty_fn returns true if the merge queue is empty. 954elevator_queue_empty_fn returns true if the merge queue is empty.
937 Drivers shouldn't use this, but rather check 955 Drivers shouldn't use this, but rather check
938 if elv_next_request is NULL (without losing the 956 if elv_next_request is NULL (without losing the
939 request if one exists!) 957 request if one exists!)
940 958
941elevator_remove_req_fn This is called when a driver claims ownership of
942 the target request - it now belongs to the
943 driver. It must not be modified or merged.
944 Drivers must not lose the request! A subsequent
945 call of elevator_next_req_fn must return the
946 _next_ request.
947
948elevator_requeue_req_fn called to add a request to the scheduler. This
949 is used when the request has alrnadebeen
950 returned by elv_next_request, but hasn't
951 completed. If this is not implemented then
952 elevator_add_req_fn is called instead.
953
954elevator_former_req_fn 959elevator_former_req_fn
955elevator_latter_req_fn These return the request before or after the 960elevator_latter_req_fn These return the request before or after the
956 one specified in disk sort order. Used by the 961 one specified in disk sort order. Used by the
957 block layer to find merge possibilities. 962 block layer to find merge possibilities.
958 963
959elevator_completed_req_fn called when a request is completed. This might 964elevator_completed_req_fn called when a request is completed.
960 come about due to being merged with another or
961 when the device completes the request.
962 965
963elevator_may_queue_fn returns true if the scheduler wants to allow the 966elevator_may_queue_fn returns true if the scheduler wants to allow the
964 current context to queue a new request even if 967 current context to queue a new request even if
@@ -967,13 +970,33 @@ elevator_may_queue_fn returns true if the scheduler wants to allow the
967 970
968elevator_set_req_fn 971elevator_set_req_fn
969elevator_put_req_fn Must be used to allocate and free any elevator 972elevator_put_req_fn Must be used to allocate and free any elevator
970 specific storate for a request. 973 specific storage for a request.
974
975elevator_activate_req_fn Called when device driver first sees a request.
976 I/O schedulers can use this callback to
977 determine when actual execution of a request
978 starts.
979elevator_deactivate_req_fn Called when device driver decides to delay
980 a request by requeueing it.
971 981
972elevator_init_fn 982elevator_init_fn
973elevator_exit_fn Allocate and free any elevator specific storage 983elevator_exit_fn Allocate and free any elevator specific storage
974 for a queue. 984 for a queue.
975 985
9764.2 I/O scheduler implementation 9864.2 Request flows seen by I/O schedulers
987All requests seens by I/O schedulers strictly follow one of the following three
988flows.
989
990 set_req_fn ->
991
992 i. add_req_fn -> (merged_fn ->)* -> dispatch_fn -> activate_req_fn ->
993 (deactivate_req_fn -> activate_req_fn ->)* -> completed_req_fn
994 ii. add_req_fn -> (merged_fn ->)* -> merge_req_fn
995 iii. [none]
996
997 -> put_req_fn
998
9994.3 I/O scheduler implementation
977The generic i/o scheduler algorithm attempts to sort/merge/batch requests for 1000The generic i/o scheduler algorithm attempts to sort/merge/batch requests for
978optimal disk scan and request servicing performance (based on generic 1001optimal disk scan and request servicing performance (based on generic
979principles and device capabilities), optimized for: 1002principles and device capabilities), optimized for:
@@ -993,18 +1016,7 @@ request in sort order to prevent binary tree lookups.
993This arrangement is not a generic block layer characteristic however, so 1016This arrangement is not a generic block layer characteristic however, so
994elevators may implement queues as they please. 1017elevators may implement queues as they please.
995 1018
996ii. Last merge hint 1019ii. Merge hash
997The last merge hint is part of the generic queue layer. I/O schedulers must do
998some management on it. For the most part, the most important thing is to make
999sure q->last_merge is cleared (set to NULL) when the request on it is no longer
1000a candidate for merging (for example if it has been sent to the driver).
1001
1002The last merge performed is cached as a hint for the subsequent request. If
1003sequential data is being submitted, the hint is used to perform merges without
1004any scanning. This is not sufficient when there are multiple processes doing
1005I/O though, so a "merge hash" is used by some schedulers.
1006
1007iii. Merge hash
1008AS and deadline use a hash table indexed by the last sector of a request. This 1020AS and deadline use a hash table indexed by the last sector of a request. This
1009enables merging code to quickly look up "back merge" candidates, even when 1021enables merging code to quickly look up "back merge" candidates, even when
1010multiple I/O streams are being performed at once on one disk. 1022multiple I/O streams are being performed at once on one disk.
@@ -1013,29 +1025,8 @@ multiple I/O streams are being performed at once on one disk.
1013are far less common than "back merges" due to the nature of most I/O patterns. 1025are far less common than "back merges" due to the nature of most I/O patterns.
1014Front merges are handled by the binary trees in AS and deadline schedulers. 1026Front merges are handled by the binary trees in AS and deadline schedulers.
1015 1027
1016iv. Handling barrier cases 1028iii. Plugging the queue to batch requests in anticipation of opportunities for
1017A request with flags REQ_HARDBARRIER or REQ_SOFTBARRIER must not be ordered 1029 merge/sort optimizations
1018around. That is, they must be processed after all older requests, and before
1019any newer ones. This includes merges!
1020
1021In AS and deadline schedulers, barriers have the effect of flushing the reorder
1022queue. The performance cost of this will vary from nothing to a lot depending
1023on i/o patterns and device characteristics. Obviously they won't improve
1024performance, so their use should be kept to a minimum.
1025
1026v. Handling insertion position directives
1027A request may be inserted with a position directive. The directives are one of
1028ELEVATOR_INSERT_BACK, ELEVATOR_INSERT_FRONT, ELEVATOR_INSERT_SORT.
1029
1030ELEVATOR_INSERT_SORT is a general directive for non-barrier requests.
1031ELEVATOR_INSERT_BACK is used to insert a barrier to the back of the queue.
1032ELEVATOR_INSERT_FRONT is used to insert a barrier to the front of the queue, and
1033overrides the ordering requested by any previous barriers. In practice this is
1034harmless and required, because it is used for SCSI requeueing. This does not
1035require flushing the reorder queue, so does not impose a performance penalty.
1036
1037vi. Plugging the queue to batch requests in anticipation of opportunities for
1038 merge/sort optimizations
1039 1030
1040This is just the same as in 2.4 so far, though per-device unplugging 1031This is just the same as in 2.4 so far, though per-device unplugging
1041support is anticipated for 2.5. Also with a priority-based i/o scheduler, 1032support is anticipated for 2.5. Also with a priority-based i/o scheduler,
@@ -1069,7 +1060,7 @@ Aside:
1069 blk_kick_queue() to unplug a specific queue (right away ?) 1060 blk_kick_queue() to unplug a specific queue (right away ?)
1070 or optionally, all queues, is in the plan. 1061 or optionally, all queues, is in the plan.
1071 1062
10724.3 I/O contexts 10634.4 I/O contexts
1073I/O contexts provide a dynamically allocated per process data area. They may 1064I/O contexts provide a dynamically allocated per process data area. They may
1074be used in I/O schedulers, and in the block layer (could be used for IO statis, 1065be used in I/O schedulers, and in the block layer (could be used for IO statis,
1075priorities for example). See *io_context in drivers/block/ll_rw_blk.c, and 1066priorities for example). See *io_context in drivers/block/ll_rw_blk.c, and
diff --git a/Makefile b/Makefile
index 1fa7e5343464..f1d121f23025 100644
--- a/Makefile
+++ b/Makefile
@@ -334,7 +334,7 @@ KALLSYMS = scripts/kallsyms
334PERL = perl 334PERL = perl
335CHECK = sparse 335CHECK = sparse
336 336
337CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ $(CF) 337CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ -Wbitwise $(CF)
338MODFLAGS = -DMODULE 338MODFLAGS = -DMODULE
339CFLAGS_MODULE = $(MODFLAGS) 339CFLAGS_MODULE = $(MODFLAGS)
340AFLAGS_MODULE = $(MODFLAGS) 340AFLAGS_MODULE = $(MODFLAGS)
diff --git a/arch/alpha/kernel/pci-noop.c b/arch/alpha/kernel/pci-noop.c
index 582a3519fb28..9903e3a79102 100644
--- a/arch/alpha/kernel/pci-noop.c
+++ b/arch/alpha/kernel/pci-noop.c
@@ -154,7 +154,7 @@ pci_dma_supported(struct pci_dev *hwdev, dma_addr_t mask)
154 154
155void * 155void *
156dma_alloc_coherent(struct device *dev, size_t size, 156dma_alloc_coherent(struct device *dev, size_t size,
157 dma_addr_t *dma_handle, int gfp) 157 dma_addr_t *dma_handle, gfp_t gfp)
158{ 158{
159 void *ret; 159 void *ret;
160 160
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index 7cb23f12ecbd..c468e312e5f8 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -397,7 +397,7 @@ pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_addrp)
397{ 397{
398 void *cpu_addr; 398 void *cpu_addr;
399 long order = get_order(size); 399 long order = get_order(size);
400 int gfp = GFP_ATOMIC; 400 gfp_t gfp = GFP_ATOMIC;
401 401
402try_again: 402try_again:
403 cpu_addr = (void *)__get_free_pages(gfp, order); 403 cpu_addr = (void *)__get_free_pages(gfp, order);
diff --git a/arch/arm/mm/consistent.c b/arch/arm/mm/consistent.c
index 26356ce4da54..82f4d5e27c54 100644
--- a/arch/arm/mm/consistent.c
+++ b/arch/arm/mm/consistent.c
@@ -75,7 +75,7 @@ static struct vm_region consistent_head = {
75}; 75};
76 76
77static struct vm_region * 77static struct vm_region *
78vm_region_alloc(struct vm_region *head, size_t size, int gfp) 78vm_region_alloc(struct vm_region *head, size_t size, gfp_t gfp)
79{ 79{
80 unsigned long addr = head->vm_start, end = head->vm_end - size; 80 unsigned long addr = head->vm_start, end = head->vm_end - size;
81 unsigned long flags; 81 unsigned long flags;
@@ -133,7 +133,7 @@ static struct vm_region *vm_region_find(struct vm_region *head, unsigned long ad
133#endif 133#endif
134 134
135static void * 135static void *
136__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, int gfp, 136__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
137 pgprot_t prot) 137 pgprot_t prot)
138{ 138{
139 struct page *page; 139 struct page *page;
@@ -251,7 +251,7 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, int gfp,
251 * virtual and bus address for that space. 251 * virtual and bus address for that space.
252 */ 252 */
253void * 253void *
254dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, int gfp) 254dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp)
255{ 255{
256 return __dma_alloc(dev, size, handle, gfp, 256 return __dma_alloc(dev, size, handle, gfp,
257 pgprot_noncached(pgprot_kernel)); 257 pgprot_noncached(pgprot_kernel));
@@ -263,7 +263,7 @@ EXPORT_SYMBOL(dma_alloc_coherent);
263 * dma_alloc_coherent above. 263 * dma_alloc_coherent above.
264 */ 264 */
265void * 265void *
266dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *handle, int gfp) 266dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp)
267{ 267{
268 return __dma_alloc(dev, size, handle, gfp, 268 return __dma_alloc(dev, size, handle, gfp,
269 pgprot_writecombine(pgprot_kernel)); 269 pgprot_writecombine(pgprot_kernel));
diff --git a/arch/frv/mb93090-mb00/pci-dma-nommu.c b/arch/frv/mb93090-mb00/pci-dma-nommu.c
index 819895cf0b9e..2082a9647f4f 100644
--- a/arch/frv/mb93090-mb00/pci-dma-nommu.c
+++ b/arch/frv/mb93090-mb00/pci-dma-nommu.c
@@ -33,7 +33,7 @@ struct dma_alloc_record {
33static DEFINE_SPINLOCK(dma_alloc_lock); 33static DEFINE_SPINLOCK(dma_alloc_lock);
34static LIST_HEAD(dma_alloc_list); 34static LIST_HEAD(dma_alloc_list);
35 35
36void *dma_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, int gfp) 36void *dma_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t gfp)
37{ 37{
38 struct dma_alloc_record *new; 38 struct dma_alloc_record *new;
39 struct list_head *this = &dma_alloc_list; 39 struct list_head *this = &dma_alloc_list;
diff --git a/arch/frv/mb93090-mb00/pci-dma.c b/arch/frv/mb93090-mb00/pci-dma.c
index 27eb12066507..86fbdadc51b6 100644
--- a/arch/frv/mb93090-mb00/pci-dma.c
+++ b/arch/frv/mb93090-mb00/pci-dma.c
@@ -17,7 +17,7 @@
17#include <linux/highmem.h> 17#include <linux/highmem.h>
18#include <asm/io.h> 18#include <asm/io.h>
19 19
20void *dma_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, int gfp) 20void *dma_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t gfp)
21{ 21{
22 void *ret; 22 void *ret;
23 23
diff --git a/arch/frv/mm/dma-alloc.c b/arch/frv/mm/dma-alloc.c
index 4b38d45435f6..cfc4f97490c6 100644
--- a/arch/frv/mm/dma-alloc.c
+++ b/arch/frv/mm/dma-alloc.c
@@ -81,7 +81,7 @@ static int map_page(unsigned long va, unsigned long pa, pgprot_t prot)
81 * portions of the kernel with single large page TLB entries, and 81 * portions of the kernel with single large page TLB entries, and
82 * still get unique uncached pages for consistent DMA. 82 * still get unique uncached pages for consistent DMA.
83 */ 83 */
84void *consistent_alloc(int gfp, size_t size, dma_addr_t *dma_handle) 84void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *dma_handle)
85{ 85{
86 struct vm_struct *area; 86 struct vm_struct *area;
87 unsigned long page, va, pa; 87 unsigned long page, va, pa;
diff --git a/arch/ia64/hp/common/hwsw_iommu.c b/arch/ia64/hp/common/hwsw_iommu.c
index 80f8ef013939..1ba02baf2f94 100644
--- a/arch/ia64/hp/common/hwsw_iommu.c
+++ b/arch/ia64/hp/common/hwsw_iommu.c
@@ -71,7 +71,7 @@ hwsw_init (void)
71} 71}
72 72
73void * 73void *
74hwsw_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, int flags) 74hwsw_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags)
75{ 75{
76 if (use_swiotlb(dev)) 76 if (use_swiotlb(dev))
77 return swiotlb_alloc_coherent(dev, size, dma_handle, flags); 77 return swiotlb_alloc_coherent(dev, size, dma_handle, flags);
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index 11957598a8b9..21bffba78b6d 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -1076,7 +1076,7 @@ void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir)
1076 * See Documentation/DMA-mapping.txt 1076 * See Documentation/DMA-mapping.txt
1077 */ 1077 */
1078void * 1078void *
1079sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, int flags) 1079sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags)
1080{ 1080{
1081 struct ioc *ioc; 1081 struct ioc *ioc;
1082 void *addr; 1082 void *addr;
diff --git a/arch/ia64/lib/swiotlb.c b/arch/ia64/lib/swiotlb.c
index a604efc7f6c9..3ebbb3c8ba36 100644
--- a/arch/ia64/lib/swiotlb.c
+++ b/arch/ia64/lib/swiotlb.c
@@ -314,7 +314,7 @@ sync_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
314 314
315void * 315void *
316swiotlb_alloc_coherent(struct device *hwdev, size_t size, 316swiotlb_alloc_coherent(struct device *hwdev, size_t size,
317 dma_addr_t *dma_handle, int flags) 317 dma_addr_t *dma_handle, gfp_t flags)
318{ 318{
319 unsigned long dev_addr; 319 unsigned long dev_addr;
320 void *ret; 320 void *ret;
diff --git a/arch/ia64/sn/kernel/xpc.h b/arch/ia64/sn/kernel/xpc.h
index d0ee635daf2e..e5f5a4e51f70 100644
--- a/arch/ia64/sn/kernel/xpc.h
+++ b/arch/ia64/sn/kernel/xpc.h
@@ -939,7 +939,7 @@ xpc_map_bte_errors(bte_result_t error)
939 939
940 940
941static inline void * 941static inline void *
942xpc_kmalloc_cacheline_aligned(size_t size, int flags, void **base) 942xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
943{ 943{
944 /* see if kmalloc will give us cachline aligned memory by default */ 944 /* see if kmalloc will give us cachline aligned memory by default */
945 *base = kmalloc(size, flags); 945 *base = kmalloc(size, flags);
diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c
index 0e4b9ad9ef02..75e6e874bebf 100644
--- a/arch/ia64/sn/pci/pci_dma.c
+++ b/arch/ia64/sn/pci/pci_dma.c
@@ -75,7 +75,7 @@ EXPORT_SYMBOL(sn_dma_set_mask);
75 * more information. 75 * more information.
76 */ 76 */
77void *sn_dma_alloc_coherent(struct device *dev, size_t size, 77void *sn_dma_alloc_coherent(struct device *dev, size_t size,
78 dma_addr_t * dma_handle, int flags) 78 dma_addr_t * dma_handle, gfp_t flags)
79{ 79{
80 void *cpuaddr; 80 void *cpuaddr;
81 unsigned long phys_addr; 81 unsigned long phys_addr;
diff --git a/arch/mips/mm/dma-coherent.c b/arch/mips/mm/dma-coherent.c
index 97a50d38c98f..a617f8c327e8 100644
--- a/arch/mips/mm/dma-coherent.c
+++ b/arch/mips/mm/dma-coherent.c
@@ -18,7 +18,7 @@
18#include <asm/io.h> 18#include <asm/io.h>
19 19
20void *dma_alloc_noncoherent(struct device *dev, size_t size, 20void *dma_alloc_noncoherent(struct device *dev, size_t size,
21 dma_addr_t * dma_handle, int gfp) 21 dma_addr_t * dma_handle, gfp_t gfp)
22{ 22{
23 void *ret; 23 void *ret;
24 /* ignore region specifiers */ 24 /* ignore region specifiers */
@@ -39,7 +39,7 @@ void *dma_alloc_noncoherent(struct device *dev, size_t size,
39EXPORT_SYMBOL(dma_alloc_noncoherent); 39EXPORT_SYMBOL(dma_alloc_noncoherent);
40 40
41void *dma_alloc_coherent(struct device *dev, size_t size, 41void *dma_alloc_coherent(struct device *dev, size_t size,
42 dma_addr_t * dma_handle, int gfp) 42 dma_addr_t * dma_handle, gfp_t gfp)
43 __attribute__((alias("dma_alloc_noncoherent"))); 43 __attribute__((alias("dma_alloc_noncoherent")));
44 44
45EXPORT_SYMBOL(dma_alloc_coherent); 45EXPORT_SYMBOL(dma_alloc_coherent);
diff --git a/arch/mips/mm/dma-ip27.c b/arch/mips/mm/dma-ip27.c
index aa7c94b5d781..8da19fd22ac6 100644
--- a/arch/mips/mm/dma-ip27.c
+++ b/arch/mips/mm/dma-ip27.c
@@ -22,7 +22,7 @@
22 pdev_to_baddr(to_pci_dev(dev), (addr)) 22 pdev_to_baddr(to_pci_dev(dev), (addr))
23 23
24void *dma_alloc_noncoherent(struct device *dev, size_t size, 24void *dma_alloc_noncoherent(struct device *dev, size_t size,
25 dma_addr_t * dma_handle, int gfp) 25 dma_addr_t * dma_handle, gfp_t gfp)
26{ 26{
27 void *ret; 27 void *ret;
28 28
@@ -44,7 +44,7 @@ void *dma_alloc_noncoherent(struct device *dev, size_t size,
44EXPORT_SYMBOL(dma_alloc_noncoherent); 44EXPORT_SYMBOL(dma_alloc_noncoherent);
45 45
46void *dma_alloc_coherent(struct device *dev, size_t size, 46void *dma_alloc_coherent(struct device *dev, size_t size,
47 dma_addr_t * dma_handle, int gfp) 47 dma_addr_t * dma_handle, gfp_t gfp)
48 __attribute__((alias("dma_alloc_noncoherent"))); 48 __attribute__((alias("dma_alloc_noncoherent")));
49 49
50EXPORT_SYMBOL(dma_alloc_coherent); 50EXPORT_SYMBOL(dma_alloc_coherent);
diff --git a/arch/mips/mm/dma-ip32.c b/arch/mips/mm/dma-ip32.c
index 2cbe196c35fb..a7e3072ff78d 100644
--- a/arch/mips/mm/dma-ip32.c
+++ b/arch/mips/mm/dma-ip32.c
@@ -37,7 +37,7 @@
37#define RAM_OFFSET_MASK 0x3fffffff 37#define RAM_OFFSET_MASK 0x3fffffff
38 38
39void *dma_alloc_noncoherent(struct device *dev, size_t size, 39void *dma_alloc_noncoherent(struct device *dev, size_t size,
40 dma_addr_t * dma_handle, int gfp) 40 dma_addr_t * dma_handle, gfp_t gfp)
41{ 41{
42 void *ret; 42 void *ret;
43 /* ignore region specifiers */ 43 /* ignore region specifiers */
@@ -61,7 +61,7 @@ void *dma_alloc_noncoherent(struct device *dev, size_t size,
61EXPORT_SYMBOL(dma_alloc_noncoherent); 61EXPORT_SYMBOL(dma_alloc_noncoherent);
62 62
63void *dma_alloc_coherent(struct device *dev, size_t size, 63void *dma_alloc_coherent(struct device *dev, size_t size,
64 dma_addr_t * dma_handle, int gfp) 64 dma_addr_t * dma_handle, gfp_t gfp)
65{ 65{
66 void *ret; 66 void *ret;
67 67
diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c
index 59e54f12212e..4ce02028a292 100644
--- a/arch/mips/mm/dma-noncoherent.c
+++ b/arch/mips/mm/dma-noncoherent.c
@@ -24,7 +24,7 @@
24 */ 24 */
25 25
26void *dma_alloc_noncoherent(struct device *dev, size_t size, 26void *dma_alloc_noncoherent(struct device *dev, size_t size,
27 dma_addr_t * dma_handle, int gfp) 27 dma_addr_t * dma_handle, gfp_t gfp)
28{ 28{
29 void *ret; 29 void *ret;
30 /* ignore region specifiers */ 30 /* ignore region specifiers */
@@ -45,7 +45,7 @@ void *dma_alloc_noncoherent(struct device *dev, size_t size,
45EXPORT_SYMBOL(dma_alloc_noncoherent); 45EXPORT_SYMBOL(dma_alloc_noncoherent);
46 46
47void *dma_alloc_coherent(struct device *dev, size_t size, 47void *dma_alloc_coherent(struct device *dev, size_t size,
48 dma_addr_t * dma_handle, int gfp) 48 dma_addr_t * dma_handle, gfp_t gfp)
49{ 49{
50 void *ret; 50 void *ret;
51 51
diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
index 368cc095c99f..844c2877a2e3 100644
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -349,7 +349,7 @@ pcxl_dma_init(void)
349 349
350__initcall(pcxl_dma_init); 350__initcall(pcxl_dma_init);
351 351
352static void * pa11_dma_alloc_consistent (struct device *dev, size_t size, dma_addr_t *dma_handle, int flag) 352static void * pa11_dma_alloc_consistent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag)
353{ 353{
354 unsigned long vaddr; 354 unsigned long vaddr;
355 unsigned long paddr; 355 unsigned long paddr;
@@ -502,13 +502,13 @@ struct hppa_dma_ops pcxl_dma_ops = {
502}; 502};
503 503
504static void *fail_alloc_consistent(struct device *dev, size_t size, 504static void *fail_alloc_consistent(struct device *dev, size_t size,
505 dma_addr_t *dma_handle, int flag) 505 dma_addr_t *dma_handle, gfp_t flag)
506{ 506{
507 return NULL; 507 return NULL;
508} 508}
509 509
510static void *pa11_dma_alloc_noncoherent(struct device *dev, size_t size, 510static void *pa11_dma_alloc_noncoherent(struct device *dev, size_t size,
511 dma_addr_t *dma_handle, int flag) 511 dma_addr_t *dma_handle, gfp_t flag)
512{ 512{
513 void *addr = NULL; 513 void *addr = NULL;
514 514
diff --git a/arch/ppc/8xx_io/cs4218.h b/arch/ppc/8xx_io/cs4218.h
index a3c38c5a5db2..f1c7392255f8 100644
--- a/arch/ppc/8xx_io/cs4218.h
+++ b/arch/ppc/8xx_io/cs4218.h
@@ -78,7 +78,7 @@ typedef struct {
78 const char *name2; 78 const char *name2;
79 void (*open)(void); 79 void (*open)(void);
80 void (*release)(void); 80 void (*release)(void);
81 void *(*dma_alloc)(unsigned int, int); 81 void *(*dma_alloc)(unsigned int, gfp_t);
82 void (*dma_free)(void *, unsigned int); 82 void (*dma_free)(void *, unsigned int);
83 int (*irqinit)(void); 83 int (*irqinit)(void);
84#ifdef MODULE 84#ifdef MODULE
diff --git a/arch/ppc/8xx_io/cs4218_tdm.c b/arch/ppc/8xx_io/cs4218_tdm.c
index 2ca9ec7ec3a7..532caa388dc2 100644
--- a/arch/ppc/8xx_io/cs4218_tdm.c
+++ b/arch/ppc/8xx_io/cs4218_tdm.c
@@ -318,7 +318,7 @@ struct cs_sound_settings {
318 318
319static struct cs_sound_settings sound; 319static struct cs_sound_settings sound;
320 320
321static void *CS_Alloc(unsigned int size, int flags); 321static void *CS_Alloc(unsigned int size, gfp_t flags);
322static void CS_Free(void *ptr, unsigned int size); 322static void CS_Free(void *ptr, unsigned int size);
323static int CS_IrqInit(void); 323static int CS_IrqInit(void);
324#ifdef MODULE 324#ifdef MODULE
@@ -959,7 +959,7 @@ static TRANS transCSNormalRead = {
959 959
960/*** Low level stuff *********************************************************/ 960/*** Low level stuff *********************************************************/
961 961
962static void *CS_Alloc(unsigned int size, int flags) 962static void *CS_Alloc(unsigned int size, gfp_t flags)
963{ 963{
964 int order; 964 int order;
965 965
diff --git a/arch/ppc/kernel/dma-mapping.c b/arch/ppc/kernel/dma-mapping.c
index 8edee806dae7..0f710d2baec6 100644
--- a/arch/ppc/kernel/dma-mapping.c
+++ b/arch/ppc/kernel/dma-mapping.c
@@ -115,7 +115,7 @@ static struct vm_region consistent_head = {
115}; 115};
116 116
117static struct vm_region * 117static struct vm_region *
118vm_region_alloc(struct vm_region *head, size_t size, int gfp) 118vm_region_alloc(struct vm_region *head, size_t size, gfp_t gfp)
119{ 119{
120 unsigned long addr = head->vm_start, end = head->vm_end - size; 120 unsigned long addr = head->vm_start, end = head->vm_end - size;
121 unsigned long flags; 121 unsigned long flags;
@@ -173,7 +173,7 @@ static struct vm_region *vm_region_find(struct vm_region *head, unsigned long ad
173 * virtual and bus address for that space. 173 * virtual and bus address for that space.
174 */ 174 */
175void * 175void *
176__dma_alloc_coherent(size_t size, dma_addr_t *handle, int gfp) 176__dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp)
177{ 177{
178 struct page *page; 178 struct page *page;
179 struct vm_region *c; 179 struct vm_region *c;
diff --git a/arch/ppc/mm/pgtable.c b/arch/ppc/mm/pgtable.c
index 81a3d7446d37..43505b1fc5d8 100644
--- a/arch/ppc/mm/pgtable.c
+++ b/arch/ppc/mm/pgtable.c
@@ -114,9 +114,9 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
114 struct page *ptepage; 114 struct page *ptepage;
115 115
116#ifdef CONFIG_HIGHPTE 116#ifdef CONFIG_HIGHPTE
117 int flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT; 117 gfp_t flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT;
118#else 118#else
119 int flags = GFP_KERNEL | __GFP_REPEAT; 119 gfp_t flags = GFP_KERNEL | __GFP_REPEAT;
120#endif 120#endif
121 121
122 ptepage = alloc_pages(flags, 0); 122 ptepage = alloc_pages(flags, 0);
diff --git a/arch/sh/boards/renesas/rts7751r2d/mach.c b/arch/sh/boards/renesas/rts7751r2d/mach.c
index 1efc18e786d5..610740512d56 100644
--- a/arch/sh/boards/renesas/rts7751r2d/mach.c
+++ b/arch/sh/boards/renesas/rts7751r2d/mach.c
@@ -23,7 +23,7 @@ extern void init_rts7751r2d_IRQ(void);
23extern void *rts7751r2d_ioremap(unsigned long, unsigned long); 23extern void *rts7751r2d_ioremap(unsigned long, unsigned long);
24extern int rts7751r2d_irq_demux(int irq); 24extern int rts7751r2d_irq_demux(int irq);
25 25
26extern void *voyagergx_consistent_alloc(struct device *, size_t, dma_addr_t *, int); 26extern void *voyagergx_consistent_alloc(struct device *, size_t, dma_addr_t *, gfp_t);
27extern int voyagergx_consistent_free(struct device *, size_t, void *, dma_addr_t); 27extern int voyagergx_consistent_free(struct device *, size_t, void *, dma_addr_t);
28 28
29/* 29/*
diff --git a/arch/sh/cchips/voyagergx/consistent.c b/arch/sh/cchips/voyagergx/consistent.c
index 5b92585a38d2..3d9a02c093a3 100644
--- a/arch/sh/cchips/voyagergx/consistent.c
+++ b/arch/sh/cchips/voyagergx/consistent.c
@@ -31,7 +31,7 @@ static LIST_HEAD(voya_alloc_list);
31#define OHCI_SRAM_SIZE 0x10000 31#define OHCI_SRAM_SIZE 0x10000
32 32
33void *voyagergx_consistent_alloc(struct device *dev, size_t size, 33void *voyagergx_consistent_alloc(struct device *dev, size_t size,
34 dma_addr_t *handle, int flag) 34 dma_addr_t *handle, gfp_t flag)
35{ 35{
36 struct list_head *list = &voya_alloc_list; 36 struct list_head *list = &voya_alloc_list;
37 struct voya_alloc_entry *entry; 37 struct voya_alloc_entry *entry;
diff --git a/arch/sh/drivers/pci/dma-dreamcast.c b/arch/sh/drivers/pci/dma-dreamcast.c
index 83de7ef4e7df..e12418bb1fa5 100644
--- a/arch/sh/drivers/pci/dma-dreamcast.c
+++ b/arch/sh/drivers/pci/dma-dreamcast.c
@@ -33,7 +33,7 @@
33static int gapspci_dma_used = 0; 33static int gapspci_dma_used = 0;
34 34
35void *dreamcast_consistent_alloc(struct device *dev, size_t size, 35void *dreamcast_consistent_alloc(struct device *dev, size_t size,
36 dma_addr_t *dma_handle, int flag) 36 dma_addr_t *dma_handle, gfp_t flag)
37{ 37{
38 unsigned long buf; 38 unsigned long buf;
39 39
diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c
index 1f7af0c73cf4..df3a9e452cc5 100644
--- a/arch/sh/mm/consistent.c
+++ b/arch/sh/mm/consistent.c
@@ -11,7 +11,7 @@
11#include <linux/dma-mapping.h> 11#include <linux/dma-mapping.h>
12#include <asm/io.h> 12#include <asm/io.h>
13 13
14void *consistent_alloc(int gfp, size_t size, dma_addr_t *handle) 14void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *handle)
15{ 15{
16 struct page *page, *end, *free; 16 struct page *page, *end, *free;
17 void *ret; 17 void *ret;
diff --git a/arch/sparc64/solaris/socksys.c b/arch/sparc64/solaris/socksys.c
index d7c1c76582cc..fc6669e8dde1 100644
--- a/arch/sparc64/solaris/socksys.c
+++ b/arch/sparc64/solaris/socksys.c
@@ -49,7 +49,7 @@ IPPROTO_EGP, IPPROTO_PUP, IPPROTO_UDP, IPPROTO_IDP, IPPROTO_RAW,
49 49
50#else 50#else
51 51
52extern void * mykmalloc(size_t s, int gfp); 52extern void * mykmalloc(size_t s, gfp_t gfp);
53extern void mykfree(void *); 53extern void mykfree(void *);
54 54
55#endif 55#endif
diff --git a/arch/sparc64/solaris/timod.c b/arch/sparc64/solaris/timod.c
index aaad29c35c83..b84e5456b025 100644
--- a/arch/sparc64/solaris/timod.c
+++ b/arch/sparc64/solaris/timod.c
@@ -39,7 +39,7 @@ static char * page = NULL ;
39 39
40#else 40#else
41 41
42void * mykmalloc(size_t s, int gfp) 42void * mykmalloc(size_t s, gfp_t gfp)
43{ 43{
44 static char * page; 44 static char * page;
45 static size_t free; 45 static size_t free;
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index ea008b031a8f..462cc9d65386 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -252,7 +252,7 @@ void paging_init(void)
252#endif 252#endif
253} 253}
254 254
255struct page *arch_validate(struct page *page, int mask, int order) 255struct page *arch_validate(struct page *page, gfp_t mask, int order)
256{ 256{
257 unsigned long addr, zero = 0; 257 unsigned long addr, zero = 0;
258 int i; 258 int i;
diff --git a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c
index ea65db679e9c..0d73ceeece72 100644
--- a/arch/um/kernel/process_kern.c
+++ b/arch/um/kernel/process_kern.c
@@ -80,7 +80,7 @@ void free_stack(unsigned long stack, int order)
80unsigned long alloc_stack(int order, int atomic) 80unsigned long alloc_stack(int order, int atomic)
81{ 81{
82 unsigned long page; 82 unsigned long page;
83 int flags = GFP_KERNEL; 83 gfp_t flags = GFP_KERNEL;
84 84
85 if (atomic) 85 if (atomic)
86 flags = GFP_ATOMIC; 86 flags = GFP_ATOMIC;
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c
index cf0a0315d586..88be97c96987 100644
--- a/arch/x86_64/kernel/pci-gart.c
+++ b/arch/x86_64/kernel/pci-gart.c
@@ -187,7 +187,7 @@ static void flush_gart(struct device *dev)
187 187
188/* Allocate DMA memory on node near device */ 188/* Allocate DMA memory on node near device */
189noinline 189noinline
190static void *dma_alloc_pages(struct device *dev, unsigned gfp, unsigned order) 190static void *dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order)
191{ 191{
192 struct page *page; 192 struct page *page;
193 int node; 193 int node;
@@ -204,7 +204,7 @@ static void *dma_alloc_pages(struct device *dev, unsigned gfp, unsigned order)
204 */ 204 */
205void * 205void *
206dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, 206dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
207 unsigned gfp) 207 gfp_t gfp)
208{ 208{
209 void *memory; 209 void *memory;
210 unsigned long dma_mask = 0; 210 unsigned long dma_mask = 0;
diff --git a/arch/x86_64/kernel/pci-nommu.c b/arch/x86_64/kernel/pci-nommu.c
index 67d90b89af0b..5a981dca87ff 100644
--- a/arch/x86_64/kernel/pci-nommu.c
+++ b/arch/x86_64/kernel/pci-nommu.c
@@ -24,7 +24,7 @@ EXPORT_SYMBOL(iommu_sac_force);
24 */ 24 */
25 25
26void *dma_alloc_coherent(struct device *hwdev, size_t size, 26void *dma_alloc_coherent(struct device *hwdev, size_t size,
27 dma_addr_t *dma_handle, unsigned gfp) 27 dma_addr_t *dma_handle, gfp_t gfp)
28{ 28{
29 void *ret; 29 void *ret;
30 u64 mask; 30 u64 mask;
diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c
index 84fde258cf85..1ff82268e8ea 100644
--- a/arch/xtensa/kernel/pci-dma.c
+++ b/arch/xtensa/kernel/pci-dma.c
@@ -29,7 +29,7 @@
29 */ 29 */
30 30
31void * 31void *
32dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, int gfp) 32dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp)
33{ 33{
34 void *ret; 34 void *ret;
35 35
diff --git a/drivers/block/as-iosched.c b/drivers/block/as-iosched.c
index 95c0a3690b0f..4081c36c8c19 100644
--- a/drivers/block/as-iosched.c
+++ b/drivers/block/as-iosched.c
@@ -98,7 +98,6 @@ struct as_data {
98 98
99 struct as_rq *next_arq[2]; /* next in sort order */ 99 struct as_rq *next_arq[2]; /* next in sort order */
100 sector_t last_sector[2]; /* last REQ_SYNC & REQ_ASYNC sectors */ 100 sector_t last_sector[2]; /* last REQ_SYNC & REQ_ASYNC sectors */
101 struct list_head *dispatch; /* driver dispatch queue */
102 struct list_head *hash; /* request hash */ 101 struct list_head *hash; /* request hash */
103 102
104 unsigned long exit_prob; /* probability a task will exit while 103 unsigned long exit_prob; /* probability a task will exit while
@@ -239,6 +238,25 @@ static struct io_context *as_get_io_context(void)
239 return ioc; 238 return ioc;
240} 239}
241 240
241static void as_put_io_context(struct as_rq *arq)
242{
243 struct as_io_context *aic;
244
245 if (unlikely(!arq->io_context))
246 return;
247
248 aic = arq->io_context->aic;
249
250 if (arq->is_sync == REQ_SYNC && aic) {
251 spin_lock(&aic->lock);
252 set_bit(AS_TASK_IORUNNING, &aic->state);
253 aic->last_end_request = jiffies;
254 spin_unlock(&aic->lock);
255 }
256
257 put_io_context(arq->io_context);
258}
259
242/* 260/*
243 * the back merge hash support functions 261 * the back merge hash support functions
244 */ 262 */
@@ -261,14 +279,6 @@ static inline void as_del_arq_hash(struct as_rq *arq)
261 __as_del_arq_hash(arq); 279 __as_del_arq_hash(arq);
262} 280}
263 281
264static void as_remove_merge_hints(request_queue_t *q, struct as_rq *arq)
265{
266 as_del_arq_hash(arq);
267
268 if (q->last_merge == arq->request)
269 q->last_merge = NULL;
270}
271
272static void as_add_arq_hash(struct as_data *ad, struct as_rq *arq) 282static void as_add_arq_hash(struct as_data *ad, struct as_rq *arq)
273{ 283{
274 struct request *rq = arq->request; 284 struct request *rq = arq->request;
@@ -312,7 +322,7 @@ static struct request *as_find_arq_hash(struct as_data *ad, sector_t offset)
312 BUG_ON(!arq->on_hash); 322 BUG_ON(!arq->on_hash);
313 323
314 if (!rq_mergeable(__rq)) { 324 if (!rq_mergeable(__rq)) {
315 as_remove_merge_hints(ad->q, arq); 325 as_del_arq_hash(arq);
316 continue; 326 continue;
317 } 327 }
318 328
@@ -950,23 +960,12 @@ static void as_completed_request(request_queue_t *q, struct request *rq)
950 960
951 WARN_ON(!list_empty(&rq->queuelist)); 961 WARN_ON(!list_empty(&rq->queuelist));
952 962
953 if (arq->state == AS_RQ_PRESCHED) {
954 WARN_ON(arq->io_context);
955 goto out;
956 }
957
958 if (arq->state == AS_RQ_MERGED)
959 goto out_ioc;
960
961 if (arq->state != AS_RQ_REMOVED) { 963 if (arq->state != AS_RQ_REMOVED) {
962 printk("arq->state %d\n", arq->state); 964 printk("arq->state %d\n", arq->state);
963 WARN_ON(1); 965 WARN_ON(1);
964 goto out; 966 goto out;
965 } 967 }
966 968
967 if (!blk_fs_request(rq))
968 goto out;
969
970 if (ad->changed_batch && ad->nr_dispatched == 1) { 969 if (ad->changed_batch && ad->nr_dispatched == 1) {
971 kblockd_schedule_work(&ad->antic_work); 970 kblockd_schedule_work(&ad->antic_work);
972 ad->changed_batch = 0; 971 ad->changed_batch = 0;
@@ -1001,21 +1000,7 @@ static void as_completed_request(request_queue_t *q, struct request *rq)
1001 } 1000 }
1002 } 1001 }
1003 1002
1004out_ioc: 1003 as_put_io_context(arq);
1005 if (!arq->io_context)
1006 goto out;
1007
1008 if (arq->is_sync == REQ_SYNC) {
1009 struct as_io_context *aic = arq->io_context->aic;
1010 if (aic) {
1011 spin_lock(&aic->lock);
1012 set_bit(AS_TASK_IORUNNING, &aic->state);
1013 aic->last_end_request = jiffies;
1014 spin_unlock(&aic->lock);
1015 }
1016 }
1017
1018 put_io_context(arq->io_context);
1019out: 1004out:
1020 arq->state = AS_RQ_POSTSCHED; 1005 arq->state = AS_RQ_POSTSCHED;
1021} 1006}
@@ -1047,73 +1032,11 @@ static void as_remove_queued_request(request_queue_t *q, struct request *rq)
1047 ad->next_arq[data_dir] = as_find_next_arq(ad, arq); 1032 ad->next_arq[data_dir] = as_find_next_arq(ad, arq);
1048 1033
1049 list_del_init(&arq->fifo); 1034 list_del_init(&arq->fifo);
1050 as_remove_merge_hints(q, arq); 1035 as_del_arq_hash(arq);
1051 as_del_arq_rb(ad, arq); 1036 as_del_arq_rb(ad, arq);
1052} 1037}
1053 1038
1054/* 1039/*
1055 * as_remove_dispatched_request is called to remove a request which has gone
1056 * to the dispatch list.
1057 */
1058static void as_remove_dispatched_request(request_queue_t *q, struct request *rq)
1059{
1060 struct as_rq *arq = RQ_DATA(rq);
1061 struct as_io_context *aic;
1062
1063 if (!arq) {
1064 WARN_ON(1);
1065 return;
1066 }
1067
1068 WARN_ON(arq->state != AS_RQ_DISPATCHED);
1069 WARN_ON(ON_RB(&arq->rb_node));
1070 if (arq->io_context && arq->io_context->aic) {
1071 aic = arq->io_context->aic;
1072 if (aic) {
1073 WARN_ON(!atomic_read(&aic->nr_dispatched));
1074 atomic_dec(&aic->nr_dispatched);
1075 }
1076 }
1077}
1078
1079/*
1080 * as_remove_request is called when a driver has finished with a request.
1081 * This should be only called for dispatched requests, but for some reason
1082 * a POWER4 box running hwscan it does not.
1083 */
1084static void as_remove_request(request_queue_t *q, struct request *rq)
1085{
1086 struct as_rq *arq = RQ_DATA(rq);
1087
1088 if (unlikely(arq->state == AS_RQ_NEW))
1089 goto out;
1090
1091 if (ON_RB(&arq->rb_node)) {
1092 if (arq->state != AS_RQ_QUEUED) {
1093 printk("arq->state %d\n", arq->state);
1094 WARN_ON(1);
1095 goto out;
1096 }
1097 /*
1098 * We'll lose the aliased request(s) here. I don't think this
1099 * will ever happen, but if it does, hopefully someone will
1100 * report it.
1101 */
1102 WARN_ON(!list_empty(&rq->queuelist));
1103 as_remove_queued_request(q, rq);
1104 } else {
1105 if (arq->state != AS_RQ_DISPATCHED) {
1106 printk("arq->state %d\n", arq->state);
1107 WARN_ON(1);
1108 goto out;
1109 }
1110 as_remove_dispatched_request(q, rq);
1111 }
1112out:
1113 arq->state = AS_RQ_REMOVED;
1114}
1115
1116/*
1117 * as_fifo_expired returns 0 if there are no expired reads on the fifo, 1040 * as_fifo_expired returns 0 if there are no expired reads on the fifo,
1118 * 1 otherwise. It is ratelimited so that we only perform the check once per 1041 * 1 otherwise. It is ratelimited so that we only perform the check once per
1119 * `fifo_expire' interval. Otherwise a large number of expired requests 1042 * `fifo_expire' interval. Otherwise a large number of expired requests
@@ -1165,7 +1088,6 @@ static inline int as_batch_expired(struct as_data *ad)
1165static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq) 1088static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
1166{ 1089{
1167 struct request *rq = arq->request; 1090 struct request *rq = arq->request;
1168 struct list_head *insert;
1169 const int data_dir = arq->is_sync; 1091 const int data_dir = arq->is_sync;
1170 1092
1171 BUG_ON(!ON_RB(&arq->rb_node)); 1093 BUG_ON(!ON_RB(&arq->rb_node));
@@ -1198,13 +1120,13 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
1198 /* 1120 /*
1199 * take it off the sort and fifo list, add to dispatch queue 1121 * take it off the sort and fifo list, add to dispatch queue
1200 */ 1122 */
1201 insert = ad->dispatch->prev;
1202
1203 while (!list_empty(&rq->queuelist)) { 1123 while (!list_empty(&rq->queuelist)) {
1204 struct request *__rq = list_entry_rq(rq->queuelist.next); 1124 struct request *__rq = list_entry_rq(rq->queuelist.next);
1205 struct as_rq *__arq = RQ_DATA(__rq); 1125 struct as_rq *__arq = RQ_DATA(__rq);
1206 1126
1207 list_move_tail(&__rq->queuelist, ad->dispatch); 1127 list_del(&__rq->queuelist);
1128
1129 elv_dispatch_add_tail(ad->q, __rq);
1208 1130
1209 if (__arq->io_context && __arq->io_context->aic) 1131 if (__arq->io_context && __arq->io_context->aic)
1210 atomic_inc(&__arq->io_context->aic->nr_dispatched); 1132 atomic_inc(&__arq->io_context->aic->nr_dispatched);
@@ -1218,7 +1140,8 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
1218 as_remove_queued_request(ad->q, rq); 1140 as_remove_queued_request(ad->q, rq);
1219 WARN_ON(arq->state != AS_RQ_QUEUED); 1141 WARN_ON(arq->state != AS_RQ_QUEUED);
1220 1142
1221 list_add(&rq->queuelist, insert); 1143 elv_dispatch_sort(ad->q, rq);
1144
1222 arq->state = AS_RQ_DISPATCHED; 1145 arq->state = AS_RQ_DISPATCHED;
1223 if (arq->io_context && arq->io_context->aic) 1146 if (arq->io_context && arq->io_context->aic)
1224 atomic_inc(&arq->io_context->aic->nr_dispatched); 1147 atomic_inc(&arq->io_context->aic->nr_dispatched);
@@ -1230,12 +1153,42 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
1230 * read/write expire, batch expire, etc, and moves it to the dispatch 1153 * read/write expire, batch expire, etc, and moves it to the dispatch
1231 * queue. Returns 1 if a request was found, 0 otherwise. 1154 * queue. Returns 1 if a request was found, 0 otherwise.
1232 */ 1155 */
1233static int as_dispatch_request(struct as_data *ad) 1156static int as_dispatch_request(request_queue_t *q, int force)
1234{ 1157{
1158 struct as_data *ad = q->elevator->elevator_data;
1235 struct as_rq *arq; 1159 struct as_rq *arq;
1236 const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]); 1160 const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]);
1237 const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]); 1161 const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]);
1238 1162
1163 if (unlikely(force)) {
1164 /*
1165 * Forced dispatch, accounting is useless. Reset
1166 * accounting states and dump fifo_lists. Note that
1167 * batch_data_dir is reset to REQ_SYNC to avoid
1168 * screwing write batch accounting as write batch
1169 * accounting occurs on W->R transition.
1170 */
1171 int dispatched = 0;
1172
1173 ad->batch_data_dir = REQ_SYNC;
1174 ad->changed_batch = 0;
1175 ad->new_batch = 0;
1176
1177 while (ad->next_arq[REQ_SYNC]) {
1178 as_move_to_dispatch(ad, ad->next_arq[REQ_SYNC]);
1179 dispatched++;
1180 }
1181 ad->last_check_fifo[REQ_SYNC] = jiffies;
1182
1183 while (ad->next_arq[REQ_ASYNC]) {
1184 as_move_to_dispatch(ad, ad->next_arq[REQ_ASYNC]);
1185 dispatched++;
1186 }
1187 ad->last_check_fifo[REQ_ASYNC] = jiffies;
1188
1189 return dispatched;
1190 }
1191
1239 /* Signal that the write batch was uncontended, so we can't time it */ 1192 /* Signal that the write batch was uncontended, so we can't time it */
1240 if (ad->batch_data_dir == REQ_ASYNC && !reads) { 1193 if (ad->batch_data_dir == REQ_ASYNC && !reads) {
1241 if (ad->current_write_count == 0 || !writes) 1194 if (ad->current_write_count == 0 || !writes)
@@ -1359,20 +1312,6 @@ fifo_expired:
1359 return 1; 1312 return 1;
1360} 1313}
1361 1314
1362static struct request *as_next_request(request_queue_t *q)
1363{
1364 struct as_data *ad = q->elevator->elevator_data;
1365 struct request *rq = NULL;
1366
1367 /*
1368 * if there are still requests on the dispatch queue, grab the first
1369 */
1370 if (!list_empty(ad->dispatch) || as_dispatch_request(ad))
1371 rq = list_entry_rq(ad->dispatch->next);
1372
1373 return rq;
1374}
1375
1376/* 1315/*
1377 * Add arq to a list behind alias 1316 * Add arq to a list behind alias
1378 */ 1317 */
@@ -1404,17 +1343,25 @@ as_add_aliased_request(struct as_data *ad, struct as_rq *arq, struct as_rq *alia
1404 /* 1343 /*
1405 * Don't want to have to handle merges. 1344 * Don't want to have to handle merges.
1406 */ 1345 */
1407 as_remove_merge_hints(ad->q, arq); 1346 as_del_arq_hash(arq);
1408} 1347}
1409 1348
1410/* 1349/*
1411 * add arq to rbtree and fifo 1350 * add arq to rbtree and fifo
1412 */ 1351 */
1413static void as_add_request(struct as_data *ad, struct as_rq *arq) 1352static void as_add_request(request_queue_t *q, struct request *rq)
1414{ 1353{
1354 struct as_data *ad = q->elevator->elevator_data;
1355 struct as_rq *arq = RQ_DATA(rq);
1415 struct as_rq *alias; 1356 struct as_rq *alias;
1416 int data_dir; 1357 int data_dir;
1417 1358
1359 if (arq->state != AS_RQ_PRESCHED) {
1360 printk("arq->state: %d\n", arq->state);
1361 WARN_ON(1);
1362 }
1363 arq->state = AS_RQ_NEW;
1364
1418 if (rq_data_dir(arq->request) == READ 1365 if (rq_data_dir(arq->request) == READ
1419 || current->flags&PF_SYNCWRITE) 1366 || current->flags&PF_SYNCWRITE)
1420 arq->is_sync = 1; 1367 arq->is_sync = 1;
@@ -1437,12 +1384,8 @@ static void as_add_request(struct as_data *ad, struct as_rq *arq)
1437 arq->expires = jiffies + ad->fifo_expire[data_dir]; 1384 arq->expires = jiffies + ad->fifo_expire[data_dir];
1438 list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]); 1385 list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]);
1439 1386
1440 if (rq_mergeable(arq->request)) { 1387 if (rq_mergeable(arq->request))
1441 as_add_arq_hash(ad, arq); 1388 as_add_arq_hash(ad, arq);
1442
1443 if (!ad->q->last_merge)
1444 ad->q->last_merge = arq->request;
1445 }
1446 as_update_arq(ad, arq); /* keep state machine up to date */ 1389 as_update_arq(ad, arq); /* keep state machine up to date */
1447 1390
1448 } else { 1391 } else {
@@ -1463,96 +1406,24 @@ static void as_add_request(struct as_data *ad, struct as_rq *arq)
1463 arq->state = AS_RQ_QUEUED; 1406 arq->state = AS_RQ_QUEUED;
1464} 1407}
1465 1408
1466static void as_deactivate_request(request_queue_t *q, struct request *rq) 1409static void as_activate_request(request_queue_t *q, struct request *rq)
1467{ 1410{
1468 struct as_data *ad = q->elevator->elevator_data;
1469 struct as_rq *arq = RQ_DATA(rq); 1411 struct as_rq *arq = RQ_DATA(rq);
1470 1412
1471 if (arq) { 1413 WARN_ON(arq->state != AS_RQ_DISPATCHED);
1472 if (arq->state == AS_RQ_REMOVED) { 1414 arq->state = AS_RQ_REMOVED;
1473 arq->state = AS_RQ_DISPATCHED; 1415 if (arq->io_context && arq->io_context->aic)
1474 if (arq->io_context && arq->io_context->aic) 1416 atomic_dec(&arq->io_context->aic->nr_dispatched);
1475 atomic_inc(&arq->io_context->aic->nr_dispatched);
1476 }
1477 } else
1478 WARN_ON(blk_fs_request(rq)
1479 && (!(rq->flags & (REQ_HARDBARRIER|REQ_SOFTBARRIER))) );
1480
1481 /* Stop anticipating - let this request get through */
1482 as_antic_stop(ad);
1483}
1484
1485/*
1486 * requeue the request. The request has not been completed, nor is it a
1487 * new request, so don't touch accounting.
1488 */
1489static void as_requeue_request(request_queue_t *q, struct request *rq)
1490{
1491 as_deactivate_request(q, rq);
1492 list_add(&rq->queuelist, &q->queue_head);
1493}
1494
1495/*
1496 * Account a request that is inserted directly onto the dispatch queue.
1497 * arq->io_context->aic->nr_dispatched should not need to be incremented
1498 * because only new requests should come through here: requeues go through
1499 * our explicit requeue handler.
1500 */
1501static void as_account_queued_request(struct as_data *ad, struct request *rq)
1502{
1503 if (blk_fs_request(rq)) {
1504 struct as_rq *arq = RQ_DATA(rq);
1505 arq->state = AS_RQ_DISPATCHED;
1506 ad->nr_dispatched++;
1507 }
1508} 1417}
1509 1418
1510static void 1419static void as_deactivate_request(request_queue_t *q, struct request *rq)
1511as_insert_request(request_queue_t *q, struct request *rq, int where)
1512{ 1420{
1513 struct as_data *ad = q->elevator->elevator_data;
1514 struct as_rq *arq = RQ_DATA(rq); 1421 struct as_rq *arq = RQ_DATA(rq);
1515 1422
1516 if (arq) { 1423 WARN_ON(arq->state != AS_RQ_REMOVED);
1517 if (arq->state != AS_RQ_PRESCHED) { 1424 arq->state = AS_RQ_DISPATCHED;
1518 printk("arq->state: %d\n", arq->state); 1425 if (arq->io_context && arq->io_context->aic)
1519 WARN_ON(1); 1426 atomic_inc(&arq->io_context->aic->nr_dispatched);
1520 }
1521 arq->state = AS_RQ_NEW;
1522 }
1523
1524 /* barriers must flush the reorder queue */
1525 if (unlikely(rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)
1526 && where == ELEVATOR_INSERT_SORT)) {
1527 WARN_ON(1);
1528 where = ELEVATOR_INSERT_BACK;
1529 }
1530
1531 switch (where) {
1532 case ELEVATOR_INSERT_BACK:
1533 while (ad->next_arq[REQ_SYNC])
1534 as_move_to_dispatch(ad, ad->next_arq[REQ_SYNC]);
1535
1536 while (ad->next_arq[REQ_ASYNC])
1537 as_move_to_dispatch(ad, ad->next_arq[REQ_ASYNC]);
1538
1539 list_add_tail(&rq->queuelist, ad->dispatch);
1540 as_account_queued_request(ad, rq);
1541 as_antic_stop(ad);
1542 break;
1543 case ELEVATOR_INSERT_FRONT:
1544 list_add(&rq->queuelist, ad->dispatch);
1545 as_account_queued_request(ad, rq);
1546 as_antic_stop(ad);
1547 break;
1548 case ELEVATOR_INSERT_SORT:
1549 BUG_ON(!blk_fs_request(rq));
1550 as_add_request(ad, arq);
1551 break;
1552 default:
1553 BUG();
1554 return;
1555 }
1556} 1427}
1557 1428
1558/* 1429/*
@@ -1565,12 +1436,8 @@ static int as_queue_empty(request_queue_t *q)
1565{ 1436{
1566 struct as_data *ad = q->elevator->elevator_data; 1437 struct as_data *ad = q->elevator->elevator_data;
1567 1438
1568 if (!list_empty(&ad->fifo_list[REQ_ASYNC]) 1439 return list_empty(&ad->fifo_list[REQ_ASYNC])
1569 || !list_empty(&ad->fifo_list[REQ_SYNC]) 1440 && list_empty(&ad->fifo_list[REQ_SYNC]);
1570 || !list_empty(ad->dispatch))
1571 return 0;
1572
1573 return 1;
1574} 1441}
1575 1442
1576static struct request * 1443static struct request *
@@ -1608,15 +1475,6 @@ as_merge(request_queue_t *q, struct request **req, struct bio *bio)
1608 int ret; 1475 int ret;
1609 1476
1610 /* 1477 /*
1611 * try last_merge to avoid going to hash
1612 */
1613 ret = elv_try_last_merge(q, bio);
1614 if (ret != ELEVATOR_NO_MERGE) {
1615 __rq = q->last_merge;
1616 goto out_insert;
1617 }
1618
1619 /*
1620 * see if the merge hash can satisfy a back merge 1478 * see if the merge hash can satisfy a back merge
1621 */ 1479 */
1622 __rq = as_find_arq_hash(ad, bio->bi_sector); 1480 __rq = as_find_arq_hash(ad, bio->bi_sector);
@@ -1644,9 +1502,6 @@ as_merge(request_queue_t *q, struct request **req, struct bio *bio)
1644 1502
1645 return ELEVATOR_NO_MERGE; 1503 return ELEVATOR_NO_MERGE;
1646out: 1504out:
1647 if (rq_mergeable(__rq))
1648 q->last_merge = __rq;
1649out_insert:
1650 if (ret) { 1505 if (ret) {
1651 if (rq_mergeable(__rq)) 1506 if (rq_mergeable(__rq))
1652 as_hot_arq_hash(ad, RQ_DATA(__rq)); 1507 as_hot_arq_hash(ad, RQ_DATA(__rq));
@@ -1693,9 +1548,6 @@ static void as_merged_request(request_queue_t *q, struct request *req)
1693 * behind the disk head. We currently don't bother adjusting. 1548 * behind the disk head. We currently don't bother adjusting.
1694 */ 1549 */
1695 } 1550 }
1696
1697 if (arq->on_hash)
1698 q->last_merge = req;
1699} 1551}
1700 1552
1701static void 1553static void
@@ -1763,6 +1615,7 @@ as_merged_requests(request_queue_t *q, struct request *req,
1763 * kill knowledge of next, this one is a goner 1615 * kill knowledge of next, this one is a goner
1764 */ 1616 */
1765 as_remove_queued_request(q, next); 1617 as_remove_queued_request(q, next);
1618 as_put_io_context(anext);
1766 1619
1767 anext->state = AS_RQ_MERGED; 1620 anext->state = AS_RQ_MERGED;
1768} 1621}
@@ -1782,7 +1635,7 @@ static void as_work_handler(void *data)
1782 unsigned long flags; 1635 unsigned long flags;
1783 1636
1784 spin_lock_irqsave(q->queue_lock, flags); 1637 spin_lock_irqsave(q->queue_lock, flags);
1785 if (as_next_request(q)) 1638 if (!as_queue_empty(q))
1786 q->request_fn(q); 1639 q->request_fn(q);
1787 spin_unlock_irqrestore(q->queue_lock, flags); 1640 spin_unlock_irqrestore(q->queue_lock, flags);
1788} 1641}
@@ -1797,7 +1650,9 @@ static void as_put_request(request_queue_t *q, struct request *rq)
1797 return; 1650 return;
1798 } 1651 }
1799 1652
1800 if (arq->state != AS_RQ_POSTSCHED && arq->state != AS_RQ_PRESCHED) { 1653 if (unlikely(arq->state != AS_RQ_POSTSCHED &&
1654 arq->state != AS_RQ_PRESCHED &&
1655 arq->state != AS_RQ_MERGED)) {
1801 printk("arq->state %d\n", arq->state); 1656 printk("arq->state %d\n", arq->state);
1802 WARN_ON(1); 1657 WARN_ON(1);
1803 } 1658 }
@@ -1807,7 +1662,7 @@ static void as_put_request(request_queue_t *q, struct request *rq)
1807} 1662}
1808 1663
1809static int as_set_request(request_queue_t *q, struct request *rq, 1664static int as_set_request(request_queue_t *q, struct request *rq,
1810 struct bio *bio, int gfp_mask) 1665 struct bio *bio, gfp_t gfp_mask)
1811{ 1666{
1812 struct as_data *ad = q->elevator->elevator_data; 1667 struct as_data *ad = q->elevator->elevator_data;
1813 struct as_rq *arq = mempool_alloc(ad->arq_pool, gfp_mask); 1668 struct as_rq *arq = mempool_alloc(ad->arq_pool, gfp_mask);
@@ -1907,7 +1762,6 @@ static int as_init_queue(request_queue_t *q, elevator_t *e)
1907 INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]); 1762 INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]);
1908 ad->sort_list[REQ_SYNC] = RB_ROOT; 1763 ad->sort_list[REQ_SYNC] = RB_ROOT;
1909 ad->sort_list[REQ_ASYNC] = RB_ROOT; 1764 ad->sort_list[REQ_ASYNC] = RB_ROOT;
1910 ad->dispatch = &q->queue_head;
1911 ad->fifo_expire[REQ_SYNC] = default_read_expire; 1765 ad->fifo_expire[REQ_SYNC] = default_read_expire;
1912 ad->fifo_expire[REQ_ASYNC] = default_write_expire; 1766 ad->fifo_expire[REQ_ASYNC] = default_write_expire;
1913 ad->antic_expire = default_antic_expire; 1767 ad->antic_expire = default_antic_expire;
@@ -2072,10 +1926,9 @@ static struct elevator_type iosched_as = {
2072 .elevator_merge_fn = as_merge, 1926 .elevator_merge_fn = as_merge,
2073 .elevator_merged_fn = as_merged_request, 1927 .elevator_merged_fn = as_merged_request,
2074 .elevator_merge_req_fn = as_merged_requests, 1928 .elevator_merge_req_fn = as_merged_requests,
2075 .elevator_next_req_fn = as_next_request, 1929 .elevator_dispatch_fn = as_dispatch_request,
2076 .elevator_add_req_fn = as_insert_request, 1930 .elevator_add_req_fn = as_add_request,
2077 .elevator_remove_req_fn = as_remove_request, 1931 .elevator_activate_req_fn = as_activate_request,
2078 .elevator_requeue_req_fn = as_requeue_request,
2079 .elevator_deactivate_req_fn = as_deactivate_request, 1932 .elevator_deactivate_req_fn = as_deactivate_request,
2080 .elevator_queue_empty_fn = as_queue_empty, 1933 .elevator_queue_empty_fn = as_queue_empty,
2081 .elevator_completed_req_fn = as_completed_request, 1934 .elevator_completed_req_fn = as_completed_request,
diff --git a/drivers/block/cfq-iosched.c b/drivers/block/cfq-iosched.c
index cd056e7e64ec..94690e4d41e0 100644
--- a/drivers/block/cfq-iosched.c
+++ b/drivers/block/cfq-iosched.c
@@ -84,7 +84,6 @@ static int cfq_max_depth = 2;
84 (node)->rb_left = NULL; \ 84 (node)->rb_left = NULL; \
85} while (0) 85} while (0)
86#define RB_CLEAR_ROOT(root) ((root)->rb_node = NULL) 86#define RB_CLEAR_ROOT(root) ((root)->rb_node = NULL)
87#define ON_RB(node) ((node)->rb_color != RB_NONE)
88#define rb_entry_crq(node) rb_entry((node), struct cfq_rq, rb_node) 87#define rb_entry_crq(node) rb_entry((node), struct cfq_rq, rb_node)
89#define rq_rb_key(rq) (rq)->sector 88#define rq_rb_key(rq) (rq)->sector
90 89
@@ -271,10 +270,7 @@ CFQ_CFQQ_FNS(expired);
271#undef CFQ_CFQQ_FNS 270#undef CFQ_CFQQ_FNS
272 271
273enum cfq_rq_state_flags { 272enum cfq_rq_state_flags {
274 CFQ_CRQ_FLAG_in_flight = 0, 273 CFQ_CRQ_FLAG_is_sync = 0,
275 CFQ_CRQ_FLAG_in_driver,
276 CFQ_CRQ_FLAG_is_sync,
277 CFQ_CRQ_FLAG_requeued,
278}; 274};
279 275
280#define CFQ_CRQ_FNS(name) \ 276#define CFQ_CRQ_FNS(name) \
@@ -291,14 +287,11 @@ static inline int cfq_crq_##name(const struct cfq_rq *crq) \
291 return (crq->crq_flags & (1 << CFQ_CRQ_FLAG_##name)) != 0; \ 287 return (crq->crq_flags & (1 << CFQ_CRQ_FLAG_##name)) != 0; \
292} 288}
293 289
294CFQ_CRQ_FNS(in_flight);
295CFQ_CRQ_FNS(in_driver);
296CFQ_CRQ_FNS(is_sync); 290CFQ_CRQ_FNS(is_sync);
297CFQ_CRQ_FNS(requeued);
298#undef CFQ_CRQ_FNS 291#undef CFQ_CRQ_FNS
299 292
300static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short); 293static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short);
301static void cfq_dispatch_sort(request_queue_t *, struct cfq_rq *); 294static void cfq_dispatch_insert(request_queue_t *, struct cfq_rq *);
302static void cfq_put_cfqd(struct cfq_data *cfqd); 295static void cfq_put_cfqd(struct cfq_data *cfqd);
303 296
304#define process_sync(tsk) ((tsk)->flags & PF_SYNCWRITE) 297#define process_sync(tsk) ((tsk)->flags & PF_SYNCWRITE)
@@ -311,14 +304,6 @@ static inline void cfq_del_crq_hash(struct cfq_rq *crq)
311 hlist_del_init(&crq->hash); 304 hlist_del_init(&crq->hash);
312} 305}
313 306
314static void cfq_remove_merge_hints(request_queue_t *q, struct cfq_rq *crq)
315{
316 cfq_del_crq_hash(crq);
317
318 if (q->last_merge == crq->request)
319 q->last_merge = NULL;
320}
321
322static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq) 307static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq)
323{ 308{
324 const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request)); 309 const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request));
@@ -347,18 +332,13 @@ static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset)
347 return NULL; 332 return NULL;
348} 333}
349 334
350static inline int cfq_pending_requests(struct cfq_data *cfqd)
351{
352 return !list_empty(&cfqd->queue->queue_head) || cfqd->busy_queues;
353}
354
355/* 335/*
356 * scheduler run of queue, if there are requests pending and no one in the 336 * scheduler run of queue, if there are requests pending and no one in the
357 * driver that will restart queueing 337 * driver that will restart queueing
358 */ 338 */
359static inline void cfq_schedule_dispatch(struct cfq_data *cfqd) 339static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
360{ 340{
361 if (!cfqd->rq_in_driver && cfq_pending_requests(cfqd)) 341 if (!cfqd->rq_in_driver && cfqd->busy_queues)
362 kblockd_schedule_work(&cfqd->unplug_work); 342 kblockd_schedule_work(&cfqd->unplug_work);
363} 343}
364 344
@@ -366,7 +346,7 @@ static int cfq_queue_empty(request_queue_t *q)
366{ 346{
367 struct cfq_data *cfqd = q->elevator->elevator_data; 347 struct cfq_data *cfqd = q->elevator->elevator_data;
368 348
369 return !cfq_pending_requests(cfqd); 349 return !cfqd->busy_queues;
370} 350}
371 351
372/* 352/*
@@ -386,11 +366,6 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
386 if (crq2 == NULL) 366 if (crq2 == NULL)
387 return crq1; 367 return crq1;
388 368
389 if (cfq_crq_requeued(crq1) && !cfq_crq_requeued(crq2))
390 return crq1;
391 else if (cfq_crq_requeued(crq2) && !cfq_crq_requeued(crq1))
392 return crq2;
393
394 if (cfq_crq_is_sync(crq1) && !cfq_crq_is_sync(crq2)) 369 if (cfq_crq_is_sync(crq1) && !cfq_crq_is_sync(crq2))
395 return crq1; 370 return crq1;
396 else if (cfq_crq_is_sync(crq2) && !cfq_crq_is_sync(crq1)) 371 else if (cfq_crq_is_sync(crq2) && !cfq_crq_is_sync(crq1))
@@ -461,10 +436,7 @@ cfq_find_next_crq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
461 struct cfq_rq *crq_next = NULL, *crq_prev = NULL; 436 struct cfq_rq *crq_next = NULL, *crq_prev = NULL;
462 struct rb_node *rbnext, *rbprev; 437 struct rb_node *rbnext, *rbprev;
463 438
464 rbnext = NULL; 439 if (!(rbnext = rb_next(&last->rb_node))) {
465 if (ON_RB(&last->rb_node))
466 rbnext = rb_next(&last->rb_node);
467 if (!rbnext) {
468 rbnext = rb_first(&cfqq->sort_list); 440 rbnext = rb_first(&cfqq->sort_list);
469 if (rbnext == &last->rb_node) 441 if (rbnext == &last->rb_node)
470 rbnext = NULL; 442 rbnext = NULL;
@@ -545,13 +517,13 @@ static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
545 * the pending list according to last request service 517 * the pending list according to last request service
546 */ 518 */
547static inline void 519static inline void
548cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq, int requeue) 520cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
549{ 521{
550 BUG_ON(cfq_cfqq_on_rr(cfqq)); 522 BUG_ON(cfq_cfqq_on_rr(cfqq));
551 cfq_mark_cfqq_on_rr(cfqq); 523 cfq_mark_cfqq_on_rr(cfqq);
552 cfqd->busy_queues++; 524 cfqd->busy_queues++;
553 525
554 cfq_resort_rr_list(cfqq, requeue); 526 cfq_resort_rr_list(cfqq, 0);
555} 527}
556 528
557static inline void 529static inline void
@@ -571,22 +543,19 @@ cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
571static inline void cfq_del_crq_rb(struct cfq_rq *crq) 543static inline void cfq_del_crq_rb(struct cfq_rq *crq)
572{ 544{
573 struct cfq_queue *cfqq = crq->cfq_queue; 545 struct cfq_queue *cfqq = crq->cfq_queue;
546 struct cfq_data *cfqd = cfqq->cfqd;
547 const int sync = cfq_crq_is_sync(crq);
574 548
575 if (ON_RB(&crq->rb_node)) { 549 BUG_ON(!cfqq->queued[sync]);
576 struct cfq_data *cfqd = cfqq->cfqd; 550 cfqq->queued[sync]--;
577 const int sync = cfq_crq_is_sync(crq);
578 551
579 BUG_ON(!cfqq->queued[sync]); 552 cfq_update_next_crq(crq);
580 cfqq->queued[sync]--;
581 553
582 cfq_update_next_crq(crq); 554 rb_erase(&crq->rb_node, &cfqq->sort_list);
555 RB_CLEAR_COLOR(&crq->rb_node);
583 556
584 rb_erase(&crq->rb_node, &cfqq->sort_list); 557 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY(&cfqq->sort_list))
585 RB_CLEAR_COLOR(&crq->rb_node); 558 cfq_del_cfqq_rr(cfqd, cfqq);
586
587 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY(&cfqq->sort_list))
588 cfq_del_cfqq_rr(cfqd, cfqq);
589 }
590} 559}
591 560
592static struct cfq_rq * 561static struct cfq_rq *
@@ -627,12 +596,12 @@ static void cfq_add_crq_rb(struct cfq_rq *crq)
627 * if that happens, put the alias on the dispatch list 596 * if that happens, put the alias on the dispatch list
628 */ 597 */
629 while ((__alias = __cfq_add_crq_rb(crq)) != NULL) 598 while ((__alias = __cfq_add_crq_rb(crq)) != NULL)
630 cfq_dispatch_sort(cfqd->queue, __alias); 599 cfq_dispatch_insert(cfqd->queue, __alias);
631 600
632 rb_insert_color(&crq->rb_node, &cfqq->sort_list); 601 rb_insert_color(&crq->rb_node, &cfqq->sort_list);
633 602
634 if (!cfq_cfqq_on_rr(cfqq)) 603 if (!cfq_cfqq_on_rr(cfqq))
635 cfq_add_cfqq_rr(cfqd, cfqq, cfq_crq_requeued(crq)); 604 cfq_add_cfqq_rr(cfqd, cfqq);
636 605
637 /* 606 /*
638 * check if this request is a better next-serve candidate 607 * check if this request is a better next-serve candidate
@@ -643,10 +612,8 @@ static void cfq_add_crq_rb(struct cfq_rq *crq)
643static inline void 612static inline void
644cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq) 613cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
645{ 614{
646 if (ON_RB(&crq->rb_node)) { 615 rb_erase(&crq->rb_node, &cfqq->sort_list);
647 rb_erase(&crq->rb_node, &cfqq->sort_list); 616 cfqq->queued[cfq_crq_is_sync(crq)]--;
648 cfqq->queued[cfq_crq_is_sync(crq)]--;
649 }
650 617
651 cfq_add_crq_rb(crq); 618 cfq_add_crq_rb(crq);
652} 619}
@@ -676,49 +643,28 @@ out:
676 return NULL; 643 return NULL;
677} 644}
678 645
679static void cfq_deactivate_request(request_queue_t *q, struct request *rq) 646static void cfq_activate_request(request_queue_t *q, struct request *rq)
680{ 647{
681 struct cfq_data *cfqd = q->elevator->elevator_data; 648 struct cfq_data *cfqd = q->elevator->elevator_data;
682 struct cfq_rq *crq = RQ_DATA(rq);
683
684 if (crq) {
685 struct cfq_queue *cfqq = crq->cfq_queue;
686
687 if (cfq_crq_in_driver(crq)) {
688 cfq_clear_crq_in_driver(crq);
689 WARN_ON(!cfqd->rq_in_driver);
690 cfqd->rq_in_driver--;
691 }
692 if (cfq_crq_in_flight(crq)) {
693 const int sync = cfq_crq_is_sync(crq);
694 649
695 cfq_clear_crq_in_flight(crq); 650 cfqd->rq_in_driver++;
696 WARN_ON(!cfqq->on_dispatch[sync]);
697 cfqq->on_dispatch[sync]--;
698 }
699 cfq_mark_crq_requeued(crq);
700 }
701} 651}
702 652
703/* 653static void cfq_deactivate_request(request_queue_t *q, struct request *rq)
704 * make sure the service time gets corrected on reissue of this request
705 */
706static void cfq_requeue_request(request_queue_t *q, struct request *rq)
707{ 654{
708 cfq_deactivate_request(q, rq); 655 struct cfq_data *cfqd = q->elevator->elevator_data;
709 list_add(&rq->queuelist, &q->queue_head); 656
657 WARN_ON(!cfqd->rq_in_driver);
658 cfqd->rq_in_driver--;
710} 659}
711 660
712static void cfq_remove_request(request_queue_t *q, struct request *rq) 661static void cfq_remove_request(struct request *rq)
713{ 662{
714 struct cfq_rq *crq = RQ_DATA(rq); 663 struct cfq_rq *crq = RQ_DATA(rq);
715 664
716 if (crq) { 665 list_del_init(&rq->queuelist);
717 list_del_init(&rq->queuelist); 666 cfq_del_crq_rb(crq);
718 cfq_del_crq_rb(crq); 667 cfq_del_crq_hash(crq);
719 cfq_remove_merge_hints(q, crq);
720
721 }
722} 668}
723 669
724static int 670static int
@@ -728,12 +674,6 @@ cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
728 struct request *__rq; 674 struct request *__rq;
729 int ret; 675 int ret;
730 676
731 ret = elv_try_last_merge(q, bio);
732 if (ret != ELEVATOR_NO_MERGE) {
733 __rq = q->last_merge;
734 goto out_insert;
735 }
736
737 __rq = cfq_find_rq_hash(cfqd, bio->bi_sector); 677 __rq = cfq_find_rq_hash(cfqd, bio->bi_sector);
738 if (__rq && elv_rq_merge_ok(__rq, bio)) { 678 if (__rq && elv_rq_merge_ok(__rq, bio)) {
739 ret = ELEVATOR_BACK_MERGE; 679 ret = ELEVATOR_BACK_MERGE;
@@ -748,8 +688,6 @@ cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
748 688
749 return ELEVATOR_NO_MERGE; 689 return ELEVATOR_NO_MERGE;
750out: 690out:
751 q->last_merge = __rq;
752out_insert:
753 *req = __rq; 691 *req = __rq;
754 return ret; 692 return ret;
755} 693}
@@ -762,14 +700,12 @@ static void cfq_merged_request(request_queue_t *q, struct request *req)
762 cfq_del_crq_hash(crq); 700 cfq_del_crq_hash(crq);
763 cfq_add_crq_hash(cfqd, crq); 701 cfq_add_crq_hash(cfqd, crq);
764 702
765 if (ON_RB(&crq->rb_node) && (rq_rb_key(req) != crq->rb_key)) { 703 if (rq_rb_key(req) != crq->rb_key) {
766 struct cfq_queue *cfqq = crq->cfq_queue; 704 struct cfq_queue *cfqq = crq->cfq_queue;
767 705
768 cfq_update_next_crq(crq); 706 cfq_update_next_crq(crq);
769 cfq_reposition_crq_rb(cfqq, crq); 707 cfq_reposition_crq_rb(cfqq, crq);
770 } 708 }
771
772 q->last_merge = req;
773} 709}
774 710
775static void 711static void
@@ -785,7 +721,7 @@ cfq_merged_requests(request_queue_t *q, struct request *rq,
785 time_before(next->start_time, rq->start_time)) 721 time_before(next->start_time, rq->start_time))
786 list_move(&rq->queuelist, &next->queuelist); 722 list_move(&rq->queuelist, &next->queuelist);
787 723
788 cfq_remove_request(q, next); 724 cfq_remove_request(next);
789} 725}
790 726
791static inline void 727static inline void
@@ -992,53 +928,15 @@ static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
992 return 1; 928 return 1;
993} 929}
994 930
995/* 931static void cfq_dispatch_insert(request_queue_t *q, struct cfq_rq *crq)
996 * we dispatch cfqd->cfq_quantum requests in total from the rr_list queues,
997 * this function sector sorts the selected request to minimize seeks. we start
998 * at cfqd->last_sector, not 0.
999 */
1000static void cfq_dispatch_sort(request_queue_t *q, struct cfq_rq *crq)
1001{ 932{
1002 struct cfq_data *cfqd = q->elevator->elevator_data; 933 struct cfq_data *cfqd = q->elevator->elevator_data;
1003 struct cfq_queue *cfqq = crq->cfq_queue; 934 struct cfq_queue *cfqq = crq->cfq_queue;
1004 struct list_head *head = &q->queue_head, *entry = head;
1005 struct request *__rq;
1006 sector_t last;
1007
1008 list_del(&crq->request->queuelist);
1009
1010 last = cfqd->last_sector;
1011 list_for_each_entry_reverse(__rq, head, queuelist) {
1012 struct cfq_rq *__crq = RQ_DATA(__rq);
1013
1014 if (blk_barrier_rq(__rq))
1015 break;
1016 if (!blk_fs_request(__rq))
1017 break;
1018 if (cfq_crq_requeued(__crq))
1019 break;
1020
1021 if (__rq->sector <= crq->request->sector)
1022 break;
1023 if (__rq->sector > last && crq->request->sector < last) {
1024 last = crq->request->sector + crq->request->nr_sectors;
1025 break;
1026 }
1027 entry = &__rq->queuelist;
1028 }
1029
1030 cfqd->last_sector = last;
1031 935
1032 cfqq->next_crq = cfq_find_next_crq(cfqd, cfqq, crq); 936 cfqq->next_crq = cfq_find_next_crq(cfqd, cfqq, crq);
1033 937 cfq_remove_request(crq->request);
1034 cfq_del_crq_rb(crq);
1035 cfq_remove_merge_hints(q, crq);
1036
1037 cfq_mark_crq_in_flight(crq);
1038 cfq_clear_crq_requeued(crq);
1039
1040 cfqq->on_dispatch[cfq_crq_is_sync(crq)]++; 938 cfqq->on_dispatch[cfq_crq_is_sync(crq)]++;
1041 list_add_tail(&crq->request->queuelist, entry); 939 elv_dispatch_sort(q, crq->request);
1042} 940}
1043 941
1044/* 942/*
@@ -1159,7 +1057,7 @@ __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1159 /* 1057 /*
1160 * finally, insert request into driver dispatch list 1058 * finally, insert request into driver dispatch list
1161 */ 1059 */
1162 cfq_dispatch_sort(cfqd->queue, crq); 1060 cfq_dispatch_insert(cfqd->queue, crq);
1163 1061
1164 cfqd->dispatch_slice++; 1062 cfqd->dispatch_slice++;
1165 dispatched++; 1063 dispatched++;
@@ -1194,7 +1092,7 @@ __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1194} 1092}
1195 1093
1196static int 1094static int
1197cfq_dispatch_requests(request_queue_t *q, int max_dispatch, int force) 1095cfq_dispatch_requests(request_queue_t *q, int force)
1198{ 1096{
1199 struct cfq_data *cfqd = q->elevator->elevator_data; 1097 struct cfq_data *cfqd = q->elevator->elevator_data;
1200 struct cfq_queue *cfqq; 1098 struct cfq_queue *cfqq;
@@ -1204,12 +1102,25 @@ cfq_dispatch_requests(request_queue_t *q, int max_dispatch, int force)
1204 1102
1205 cfqq = cfq_select_queue(cfqd, force); 1103 cfqq = cfq_select_queue(cfqd, force);
1206 if (cfqq) { 1104 if (cfqq) {
1105 int max_dispatch;
1106
1107 /*
1108 * if idle window is disabled, allow queue buildup
1109 */
1110 if (!cfq_cfqq_idle_window(cfqq) &&
1111 cfqd->rq_in_driver >= cfqd->cfq_max_depth)
1112 return 0;
1113
1207 cfq_clear_cfqq_must_dispatch(cfqq); 1114 cfq_clear_cfqq_must_dispatch(cfqq);
1208 cfq_clear_cfqq_wait_request(cfqq); 1115 cfq_clear_cfqq_wait_request(cfqq);
1209 del_timer(&cfqd->idle_slice_timer); 1116 del_timer(&cfqd->idle_slice_timer);
1210 1117
1211 if (cfq_class_idle(cfqq)) 1118 if (!force) {
1212 max_dispatch = 1; 1119 max_dispatch = cfqd->cfq_quantum;
1120 if (cfq_class_idle(cfqq))
1121 max_dispatch = 1;
1122 } else
1123 max_dispatch = INT_MAX;
1213 1124
1214 return __cfq_dispatch_requests(cfqd, cfqq, max_dispatch); 1125 return __cfq_dispatch_requests(cfqd, cfqq, max_dispatch);
1215 } 1126 }
@@ -1217,93 +1128,6 @@ cfq_dispatch_requests(request_queue_t *q, int max_dispatch, int force)
1217 return 0; 1128 return 0;
1218} 1129}
1219 1130
1220static inline void cfq_account_dispatch(struct cfq_rq *crq)
1221{
1222 struct cfq_queue *cfqq = crq->cfq_queue;
1223 struct cfq_data *cfqd = cfqq->cfqd;
1224
1225 if (unlikely(!blk_fs_request(crq->request)))
1226 return;
1227
1228 /*
1229 * accounted bit is necessary since some drivers will call
1230 * elv_next_request() many times for the same request (eg ide)
1231 */
1232 if (cfq_crq_in_driver(crq))
1233 return;
1234
1235 cfq_mark_crq_in_driver(crq);
1236 cfqd->rq_in_driver++;
1237}
1238
1239static inline void
1240cfq_account_completion(struct cfq_queue *cfqq, struct cfq_rq *crq)
1241{
1242 struct cfq_data *cfqd = cfqq->cfqd;
1243 unsigned long now;
1244
1245 if (!cfq_crq_in_driver(crq))
1246 return;
1247
1248 now = jiffies;
1249
1250 WARN_ON(!cfqd->rq_in_driver);
1251 cfqd->rq_in_driver--;
1252
1253 if (!cfq_class_idle(cfqq))
1254 cfqd->last_end_request = now;
1255
1256 if (!cfq_cfqq_dispatched(cfqq)) {
1257 if (cfq_cfqq_on_rr(cfqq)) {
1258 cfqq->service_last = now;
1259 cfq_resort_rr_list(cfqq, 0);
1260 }
1261 if (cfq_cfqq_expired(cfqq)) {
1262 __cfq_slice_expired(cfqd, cfqq, 0);
1263 cfq_schedule_dispatch(cfqd);
1264 }
1265 }
1266
1267 if (cfq_crq_is_sync(crq))
1268 crq->io_context->last_end_request = now;
1269}
1270
1271static struct request *cfq_next_request(request_queue_t *q)
1272{
1273 struct cfq_data *cfqd = q->elevator->elevator_data;
1274 struct request *rq;
1275
1276 if (!list_empty(&q->queue_head)) {
1277 struct cfq_rq *crq;
1278dispatch:
1279 rq = list_entry_rq(q->queue_head.next);
1280
1281 crq = RQ_DATA(rq);
1282 if (crq) {
1283 struct cfq_queue *cfqq = crq->cfq_queue;
1284
1285 /*
1286 * if idle window is disabled, allow queue buildup
1287 */
1288 if (!cfq_crq_in_driver(crq) &&
1289 !cfq_cfqq_idle_window(cfqq) &&
1290 !blk_barrier_rq(rq) &&
1291 cfqd->rq_in_driver >= cfqd->cfq_max_depth)
1292 return NULL;
1293
1294 cfq_remove_merge_hints(q, crq);
1295 cfq_account_dispatch(crq);
1296 }
1297
1298 return rq;
1299 }
1300
1301 if (cfq_dispatch_requests(q, cfqd->cfq_quantum, 0))
1302 goto dispatch;
1303
1304 return NULL;
1305}
1306
1307/* 1131/*
1308 * task holds one reference to the queue, dropped when task exits. each crq 1132 * task holds one reference to the queue, dropped when task exits. each crq
1309 * in-flight on this queue also holds a reference, dropped when crq is freed. 1133 * in-flight on this queue also holds a reference, dropped when crq is freed.
@@ -1422,7 +1246,7 @@ static void cfq_exit_io_context(struct cfq_io_context *cic)
1422} 1246}
1423 1247
1424static struct cfq_io_context * 1248static struct cfq_io_context *
1425cfq_alloc_io_context(struct cfq_data *cfqd, int gfp_mask) 1249cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
1426{ 1250{
1427 struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask); 1251 struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask);
1428 1252
@@ -1517,7 +1341,7 @@ static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio)
1517 1341
1518static struct cfq_queue * 1342static struct cfq_queue *
1519cfq_get_queue(struct cfq_data *cfqd, unsigned int key, unsigned short ioprio, 1343cfq_get_queue(struct cfq_data *cfqd, unsigned int key, unsigned short ioprio,
1520 int gfp_mask) 1344 gfp_t gfp_mask)
1521{ 1345{
1522 const int hashval = hash_long(key, CFQ_QHASH_SHIFT); 1346 const int hashval = hash_long(key, CFQ_QHASH_SHIFT);
1523 struct cfq_queue *cfqq, *new_cfqq = NULL; 1347 struct cfq_queue *cfqq, *new_cfqq = NULL;
@@ -1578,7 +1402,7 @@ out:
1578 * cfqq, so we don't need to worry about it disappearing 1402 * cfqq, so we don't need to worry about it disappearing
1579 */ 1403 */
1580static struct cfq_io_context * 1404static struct cfq_io_context *
1581cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, int gfp_mask) 1405cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, gfp_t gfp_mask)
1582{ 1406{
1583 struct io_context *ioc = NULL; 1407 struct io_context *ioc = NULL;
1584 struct cfq_io_context *cic; 1408 struct cfq_io_context *cic;
@@ -1816,8 +1640,9 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1816 } 1640 }
1817} 1641}
1818 1642
1819static void cfq_enqueue(struct cfq_data *cfqd, struct request *rq) 1643static void cfq_insert_request(request_queue_t *q, struct request *rq)
1820{ 1644{
1645 struct cfq_data *cfqd = q->elevator->elevator_data;
1821 struct cfq_rq *crq = RQ_DATA(rq); 1646 struct cfq_rq *crq = RQ_DATA(rq);
1822 struct cfq_queue *cfqq = crq->cfq_queue; 1647 struct cfq_queue *cfqq = crq->cfq_queue;
1823 1648
@@ -1827,66 +1652,43 @@ static void cfq_enqueue(struct cfq_data *cfqd, struct request *rq)
1827 1652
1828 list_add_tail(&rq->queuelist, &cfqq->fifo); 1653 list_add_tail(&rq->queuelist, &cfqq->fifo);
1829 1654
1830 if (rq_mergeable(rq)) { 1655 if (rq_mergeable(rq))
1831 cfq_add_crq_hash(cfqd, crq); 1656 cfq_add_crq_hash(cfqd, crq);
1832 1657
1833 if (!cfqd->queue->last_merge)
1834 cfqd->queue->last_merge = rq;
1835 }
1836
1837 cfq_crq_enqueued(cfqd, cfqq, crq); 1658 cfq_crq_enqueued(cfqd, cfqq, crq);
1838} 1659}
1839 1660
1840static void
1841cfq_insert_request(request_queue_t *q, struct request *rq, int where)
1842{
1843 struct cfq_data *cfqd = q->elevator->elevator_data;
1844
1845 switch (where) {
1846 case ELEVATOR_INSERT_BACK:
1847 while (cfq_dispatch_requests(q, INT_MAX, 1))
1848 ;
1849 list_add_tail(&rq->queuelist, &q->queue_head);
1850 /*
1851 * If we were idling with pending requests on
1852 * inactive cfqqs, force dispatching will
1853 * remove the idle timer and the queue won't
1854 * be kicked by __make_request() afterward.
1855 * Kick it here.
1856 */
1857 cfq_schedule_dispatch(cfqd);
1858 break;
1859 case ELEVATOR_INSERT_FRONT:
1860 list_add(&rq->queuelist, &q->queue_head);
1861 break;
1862 case ELEVATOR_INSERT_SORT:
1863 BUG_ON(!blk_fs_request(rq));
1864 cfq_enqueue(cfqd, rq);
1865 break;
1866 default:
1867 printk("%s: bad insert point %d\n", __FUNCTION__,where);
1868 return;
1869 }
1870}
1871
1872static void cfq_completed_request(request_queue_t *q, struct request *rq) 1661static void cfq_completed_request(request_queue_t *q, struct request *rq)
1873{ 1662{
1874 struct cfq_rq *crq = RQ_DATA(rq); 1663 struct cfq_rq *crq = RQ_DATA(rq);
1875 struct cfq_queue *cfqq; 1664 struct cfq_queue *cfqq = crq->cfq_queue;
1665 struct cfq_data *cfqd = cfqq->cfqd;
1666 const int sync = cfq_crq_is_sync(crq);
1667 unsigned long now;
1876 1668
1877 if (unlikely(!blk_fs_request(rq))) 1669 now = jiffies;
1878 return;
1879 1670
1880 cfqq = crq->cfq_queue; 1671 WARN_ON(!cfqd->rq_in_driver);
1672 WARN_ON(!cfqq->on_dispatch[sync]);
1673 cfqd->rq_in_driver--;
1674 cfqq->on_dispatch[sync]--;
1881 1675
1882 if (cfq_crq_in_flight(crq)) { 1676 if (!cfq_class_idle(cfqq))
1883 const int sync = cfq_crq_is_sync(crq); 1677 cfqd->last_end_request = now;
1884 1678
1885 WARN_ON(!cfqq->on_dispatch[sync]); 1679 if (!cfq_cfqq_dispatched(cfqq)) {
1886 cfqq->on_dispatch[sync]--; 1680 if (cfq_cfqq_on_rr(cfqq)) {
1681 cfqq->service_last = now;
1682 cfq_resort_rr_list(cfqq, 0);
1683 }
1684 if (cfq_cfqq_expired(cfqq)) {
1685 __cfq_slice_expired(cfqd, cfqq, 0);
1686 cfq_schedule_dispatch(cfqd);
1687 }
1887 } 1688 }
1888 1689
1889 cfq_account_completion(cfqq, crq); 1690 if (cfq_crq_is_sync(crq))
1691 crq->io_context->last_end_request = now;
1890} 1692}
1891 1693
1892static struct request * 1694static struct request *
@@ -2075,7 +1877,7 @@ static void cfq_put_request(request_queue_t *q, struct request *rq)
2075 */ 1877 */
2076static int 1878static int
2077cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio, 1879cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
2078 int gfp_mask) 1880 gfp_t gfp_mask)
2079{ 1881{
2080 struct cfq_data *cfqd = q->elevator->elevator_data; 1882 struct cfq_data *cfqd = q->elevator->elevator_data;
2081 struct task_struct *tsk = current; 1883 struct task_struct *tsk = current;
@@ -2118,9 +1920,6 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
2118 INIT_HLIST_NODE(&crq->hash); 1920 INIT_HLIST_NODE(&crq->hash);
2119 crq->cfq_queue = cfqq; 1921 crq->cfq_queue = cfqq;
2120 crq->io_context = cic; 1922 crq->io_context = cic;
2121 cfq_clear_crq_in_flight(crq);
2122 cfq_clear_crq_in_driver(crq);
2123 cfq_clear_crq_requeued(crq);
2124 1923
2125 if (rw == READ || process_sync(tsk)) 1924 if (rw == READ || process_sync(tsk))
2126 cfq_mark_crq_is_sync(crq); 1925 cfq_mark_crq_is_sync(crq);
@@ -2201,7 +2000,7 @@ static void cfq_idle_slice_timer(unsigned long data)
2201 * only expire and reinvoke request handler, if there are 2000 * only expire and reinvoke request handler, if there are
2202 * other queues with pending requests 2001 * other queues with pending requests
2203 */ 2002 */
2204 if (!cfq_pending_requests(cfqd)) { 2003 if (!cfqd->busy_queues) {
2205 cfqd->idle_slice_timer.expires = min(now + cfqd->cfq_slice_idle, cfqq->slice_end); 2004 cfqd->idle_slice_timer.expires = min(now + cfqd->cfq_slice_idle, cfqq->slice_end);
2206 add_timer(&cfqd->idle_slice_timer); 2005 add_timer(&cfqd->idle_slice_timer);
2207 goto out_cont; 2006 goto out_cont;
@@ -2576,10 +2375,9 @@ static struct elevator_type iosched_cfq = {
2576 .elevator_merge_fn = cfq_merge, 2375 .elevator_merge_fn = cfq_merge,
2577 .elevator_merged_fn = cfq_merged_request, 2376 .elevator_merged_fn = cfq_merged_request,
2578 .elevator_merge_req_fn = cfq_merged_requests, 2377 .elevator_merge_req_fn = cfq_merged_requests,
2579 .elevator_next_req_fn = cfq_next_request, 2378 .elevator_dispatch_fn = cfq_dispatch_requests,
2580 .elevator_add_req_fn = cfq_insert_request, 2379 .elevator_add_req_fn = cfq_insert_request,
2581 .elevator_remove_req_fn = cfq_remove_request, 2380 .elevator_activate_req_fn = cfq_activate_request,
2582 .elevator_requeue_req_fn = cfq_requeue_request,
2583 .elevator_deactivate_req_fn = cfq_deactivate_request, 2381 .elevator_deactivate_req_fn = cfq_deactivate_request,
2584 .elevator_queue_empty_fn = cfq_queue_empty, 2382 .elevator_queue_empty_fn = cfq_queue_empty,
2585 .elevator_completed_req_fn = cfq_completed_request, 2383 .elevator_completed_req_fn = cfq_completed_request,
diff --git a/drivers/block/deadline-iosched.c b/drivers/block/deadline-iosched.c
index 52a3ae5289a0..7929471d7df7 100644
--- a/drivers/block/deadline-iosched.c
+++ b/drivers/block/deadline-iosched.c
@@ -50,7 +50,6 @@ struct deadline_data {
50 * next in sort order. read, write or both are NULL 50 * next in sort order. read, write or both are NULL
51 */ 51 */
52 struct deadline_rq *next_drq[2]; 52 struct deadline_rq *next_drq[2];
53 struct list_head *dispatch; /* driver dispatch queue */
54 struct list_head *hash; /* request hash */ 53 struct list_head *hash; /* request hash */
55 unsigned int batching; /* number of sequential requests made */ 54 unsigned int batching; /* number of sequential requests made */
56 sector_t last_sector; /* head position */ 55 sector_t last_sector; /* head position */
@@ -113,15 +112,6 @@ static inline void deadline_del_drq_hash(struct deadline_rq *drq)
113 __deadline_del_drq_hash(drq); 112 __deadline_del_drq_hash(drq);
114} 113}
115 114
116static void
117deadline_remove_merge_hints(request_queue_t *q, struct deadline_rq *drq)
118{
119 deadline_del_drq_hash(drq);
120
121 if (q->last_merge == drq->request)
122 q->last_merge = NULL;
123}
124
125static inline void 115static inline void
126deadline_add_drq_hash(struct deadline_data *dd, struct deadline_rq *drq) 116deadline_add_drq_hash(struct deadline_data *dd, struct deadline_rq *drq)
127{ 117{
@@ -239,10 +229,9 @@ deadline_del_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
239 dd->next_drq[data_dir] = rb_entry_drq(rbnext); 229 dd->next_drq[data_dir] = rb_entry_drq(rbnext);
240 } 230 }
241 231
242 if (ON_RB(&drq->rb_node)) { 232 BUG_ON(!ON_RB(&drq->rb_node));
243 rb_erase(&drq->rb_node, DRQ_RB_ROOT(dd, drq)); 233 rb_erase(&drq->rb_node, DRQ_RB_ROOT(dd, drq));
244 RB_CLEAR(&drq->rb_node); 234 RB_CLEAR(&drq->rb_node);
245 }
246} 235}
247 236
248static struct request * 237static struct request *
@@ -286,7 +275,7 @@ deadline_find_first_drq(struct deadline_data *dd, int data_dir)
286/* 275/*
287 * add drq to rbtree and fifo 276 * add drq to rbtree and fifo
288 */ 277 */
289static inline void 278static void
290deadline_add_request(struct request_queue *q, struct request *rq) 279deadline_add_request(struct request_queue *q, struct request *rq)
291{ 280{
292 struct deadline_data *dd = q->elevator->elevator_data; 281 struct deadline_data *dd = q->elevator->elevator_data;
@@ -301,12 +290,8 @@ deadline_add_request(struct request_queue *q, struct request *rq)
301 drq->expires = jiffies + dd->fifo_expire[data_dir]; 290 drq->expires = jiffies + dd->fifo_expire[data_dir];
302 list_add_tail(&drq->fifo, &dd->fifo_list[data_dir]); 291 list_add_tail(&drq->fifo, &dd->fifo_list[data_dir]);
303 292
304 if (rq_mergeable(rq)) { 293 if (rq_mergeable(rq))
305 deadline_add_drq_hash(dd, drq); 294 deadline_add_drq_hash(dd, drq);
306
307 if (!q->last_merge)
308 q->last_merge = rq;
309 }
310} 295}
311 296
312/* 297/*
@@ -315,14 +300,11 @@ deadline_add_request(struct request_queue *q, struct request *rq)
315static void deadline_remove_request(request_queue_t *q, struct request *rq) 300static void deadline_remove_request(request_queue_t *q, struct request *rq)
316{ 301{
317 struct deadline_rq *drq = RQ_DATA(rq); 302 struct deadline_rq *drq = RQ_DATA(rq);
303 struct deadline_data *dd = q->elevator->elevator_data;
318 304
319 if (drq) { 305 list_del_init(&drq->fifo);
320 struct deadline_data *dd = q->elevator->elevator_data; 306 deadline_del_drq_rb(dd, drq);
321 307 deadline_del_drq_hash(drq);
322 list_del_init(&drq->fifo);
323 deadline_remove_merge_hints(q, drq);
324 deadline_del_drq_rb(dd, drq);
325 }
326} 308}
327 309
328static int 310static int
@@ -333,15 +315,6 @@ deadline_merge(request_queue_t *q, struct request **req, struct bio *bio)
333 int ret; 315 int ret;
334 316
335 /* 317 /*
336 * try last_merge to avoid going to hash
337 */
338 ret = elv_try_last_merge(q, bio);
339 if (ret != ELEVATOR_NO_MERGE) {
340 __rq = q->last_merge;
341 goto out_insert;
342 }
343
344 /*
345 * see if the merge hash can satisfy a back merge 318 * see if the merge hash can satisfy a back merge
346 */ 319 */
347 __rq = deadline_find_drq_hash(dd, bio->bi_sector); 320 __rq = deadline_find_drq_hash(dd, bio->bi_sector);
@@ -373,8 +346,6 @@ deadline_merge(request_queue_t *q, struct request **req, struct bio *bio)
373 346
374 return ELEVATOR_NO_MERGE; 347 return ELEVATOR_NO_MERGE;
375out: 348out:
376 q->last_merge = __rq;
377out_insert:
378 if (ret) 349 if (ret)
379 deadline_hot_drq_hash(dd, RQ_DATA(__rq)); 350 deadline_hot_drq_hash(dd, RQ_DATA(__rq));
380 *req = __rq; 351 *req = __rq;
@@ -399,8 +370,6 @@ static void deadline_merged_request(request_queue_t *q, struct request *req)
399 deadline_del_drq_rb(dd, drq); 370 deadline_del_drq_rb(dd, drq);
400 deadline_add_drq_rb(dd, drq); 371 deadline_add_drq_rb(dd, drq);
401 } 372 }
402
403 q->last_merge = req;
404} 373}
405 374
406static void 375static void
@@ -452,7 +421,7 @@ deadline_move_to_dispatch(struct deadline_data *dd, struct deadline_rq *drq)
452 request_queue_t *q = drq->request->q; 421 request_queue_t *q = drq->request->q;
453 422
454 deadline_remove_request(q, drq->request); 423 deadline_remove_request(q, drq->request);
455 list_add_tail(&drq->request->queuelist, dd->dispatch); 424 elv_dispatch_add_tail(q, drq->request);
456} 425}
457 426
458/* 427/*
@@ -502,8 +471,9 @@ static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
502 * deadline_dispatch_requests selects the best request according to 471 * deadline_dispatch_requests selects the best request according to
503 * read/write expire, fifo_batch, etc 472 * read/write expire, fifo_batch, etc
504 */ 473 */
505static int deadline_dispatch_requests(struct deadline_data *dd) 474static int deadline_dispatch_requests(request_queue_t *q, int force)
506{ 475{
476 struct deadline_data *dd = q->elevator->elevator_data;
507 const int reads = !list_empty(&dd->fifo_list[READ]); 477 const int reads = !list_empty(&dd->fifo_list[READ]);
508 const int writes = !list_empty(&dd->fifo_list[WRITE]); 478 const int writes = !list_empty(&dd->fifo_list[WRITE]);
509 struct deadline_rq *drq; 479 struct deadline_rq *drq;
@@ -597,65 +567,12 @@ dispatch_request:
597 return 1; 567 return 1;
598} 568}
599 569
600static struct request *deadline_next_request(request_queue_t *q)
601{
602 struct deadline_data *dd = q->elevator->elevator_data;
603 struct request *rq;
604
605 /*
606 * if there are still requests on the dispatch queue, grab the first one
607 */
608 if (!list_empty(dd->dispatch)) {
609dispatch:
610 rq = list_entry_rq(dd->dispatch->next);
611 return rq;
612 }
613
614 if (deadline_dispatch_requests(dd))
615 goto dispatch;
616
617 return NULL;
618}
619
620static void
621deadline_insert_request(request_queue_t *q, struct request *rq, int where)
622{
623 struct deadline_data *dd = q->elevator->elevator_data;
624
625 /* barriers must flush the reorder queue */
626 if (unlikely(rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)
627 && where == ELEVATOR_INSERT_SORT))
628 where = ELEVATOR_INSERT_BACK;
629
630 switch (where) {
631 case ELEVATOR_INSERT_BACK:
632 while (deadline_dispatch_requests(dd))
633 ;
634 list_add_tail(&rq->queuelist, dd->dispatch);
635 break;
636 case ELEVATOR_INSERT_FRONT:
637 list_add(&rq->queuelist, dd->dispatch);
638 break;
639 case ELEVATOR_INSERT_SORT:
640 BUG_ON(!blk_fs_request(rq));
641 deadline_add_request(q, rq);
642 break;
643 default:
644 printk("%s: bad insert point %d\n", __FUNCTION__,where);
645 return;
646 }
647}
648
649static int deadline_queue_empty(request_queue_t *q) 570static int deadline_queue_empty(request_queue_t *q)
650{ 571{
651 struct deadline_data *dd = q->elevator->elevator_data; 572 struct deadline_data *dd = q->elevator->elevator_data;
652 573
653 if (!list_empty(&dd->fifo_list[WRITE]) 574 return list_empty(&dd->fifo_list[WRITE])
654 || !list_empty(&dd->fifo_list[READ]) 575 && list_empty(&dd->fifo_list[READ]);
655 || !list_empty(dd->dispatch))
656 return 0;
657
658 return 1;
659} 576}
660 577
661static struct request * 578static struct request *
@@ -733,7 +650,6 @@ static int deadline_init_queue(request_queue_t *q, elevator_t *e)
733 INIT_LIST_HEAD(&dd->fifo_list[WRITE]); 650 INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
734 dd->sort_list[READ] = RB_ROOT; 651 dd->sort_list[READ] = RB_ROOT;
735 dd->sort_list[WRITE] = RB_ROOT; 652 dd->sort_list[WRITE] = RB_ROOT;
736 dd->dispatch = &q->queue_head;
737 dd->fifo_expire[READ] = read_expire; 653 dd->fifo_expire[READ] = read_expire;
738 dd->fifo_expire[WRITE] = write_expire; 654 dd->fifo_expire[WRITE] = write_expire;
739 dd->writes_starved = writes_starved; 655 dd->writes_starved = writes_starved;
@@ -748,15 +664,13 @@ static void deadline_put_request(request_queue_t *q, struct request *rq)
748 struct deadline_data *dd = q->elevator->elevator_data; 664 struct deadline_data *dd = q->elevator->elevator_data;
749 struct deadline_rq *drq = RQ_DATA(rq); 665 struct deadline_rq *drq = RQ_DATA(rq);
750 666
751 if (drq) { 667 mempool_free(drq, dd->drq_pool);
752 mempool_free(drq, dd->drq_pool); 668 rq->elevator_private = NULL;
753 rq->elevator_private = NULL;
754 }
755} 669}
756 670
757static int 671static int
758deadline_set_request(request_queue_t *q, struct request *rq, struct bio *bio, 672deadline_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
759 int gfp_mask) 673 gfp_t gfp_mask)
760{ 674{
761 struct deadline_data *dd = q->elevator->elevator_data; 675 struct deadline_data *dd = q->elevator->elevator_data;
762 struct deadline_rq *drq; 676 struct deadline_rq *drq;
@@ -917,9 +831,8 @@ static struct elevator_type iosched_deadline = {
917 .elevator_merge_fn = deadline_merge, 831 .elevator_merge_fn = deadline_merge,
918 .elevator_merged_fn = deadline_merged_request, 832 .elevator_merged_fn = deadline_merged_request,
919 .elevator_merge_req_fn = deadline_merged_requests, 833 .elevator_merge_req_fn = deadline_merged_requests,
920 .elevator_next_req_fn = deadline_next_request, 834 .elevator_dispatch_fn = deadline_dispatch_requests,
921 .elevator_add_req_fn = deadline_insert_request, 835 .elevator_add_req_fn = deadline_add_request,
922 .elevator_remove_req_fn = deadline_remove_request,
923 .elevator_queue_empty_fn = deadline_queue_empty, 836 .elevator_queue_empty_fn = deadline_queue_empty,
924 .elevator_former_req_fn = deadline_former_request, 837 .elevator_former_req_fn = deadline_former_request,
925 .elevator_latter_req_fn = deadline_latter_request, 838 .elevator_latter_req_fn = deadline_latter_request,
diff --git a/drivers/block/elevator.c b/drivers/block/elevator.c
index 98f0126a2deb..55621d5c5774 100644
--- a/drivers/block/elevator.c
+++ b/drivers/block/elevator.c
@@ -34,6 +34,7 @@
34#include <linux/slab.h> 34#include <linux/slab.h>
35#include <linux/init.h> 35#include <linux/init.h>
36#include <linux/compiler.h> 36#include <linux/compiler.h>
37#include <linux/delay.h>
37 38
38#include <asm/uaccess.h> 39#include <asm/uaccess.h>
39 40
@@ -83,21 +84,11 @@ inline int elv_try_merge(struct request *__rq, struct bio *bio)
83} 84}
84EXPORT_SYMBOL(elv_try_merge); 85EXPORT_SYMBOL(elv_try_merge);
85 86
86inline int elv_try_last_merge(request_queue_t *q, struct bio *bio)
87{
88 if (q->last_merge)
89 return elv_try_merge(q->last_merge, bio);
90
91 return ELEVATOR_NO_MERGE;
92}
93EXPORT_SYMBOL(elv_try_last_merge);
94
95static struct elevator_type *elevator_find(const char *name) 87static struct elevator_type *elevator_find(const char *name)
96{ 88{
97 struct elevator_type *e = NULL; 89 struct elevator_type *e = NULL;
98 struct list_head *entry; 90 struct list_head *entry;
99 91
100 spin_lock_irq(&elv_list_lock);
101 list_for_each(entry, &elv_list) { 92 list_for_each(entry, &elv_list) {
102 struct elevator_type *__e; 93 struct elevator_type *__e;
103 94
@@ -108,7 +99,6 @@ static struct elevator_type *elevator_find(const char *name)
108 break; 99 break;
109 } 100 }
110 } 101 }
111 spin_unlock_irq(&elv_list_lock);
112 102
113 return e; 103 return e;
114} 104}
@@ -120,12 +110,15 @@ static void elevator_put(struct elevator_type *e)
120 110
121static struct elevator_type *elevator_get(const char *name) 111static struct elevator_type *elevator_get(const char *name)
122{ 112{
123 struct elevator_type *e = elevator_find(name); 113 struct elevator_type *e;
124 114
125 if (!e) 115 spin_lock_irq(&elv_list_lock);
126 return NULL; 116
127 if (!try_module_get(e->elevator_owner)) 117 e = elevator_find(name);
128 return NULL; 118 if (e && !try_module_get(e->elevator_owner))
119 e = NULL;
120
121 spin_unlock_irq(&elv_list_lock);
129 122
130 return e; 123 return e;
131} 124}
@@ -139,8 +132,6 @@ static int elevator_attach(request_queue_t *q, struct elevator_type *e,
139 eq->ops = &e->ops; 132 eq->ops = &e->ops;
140 eq->elevator_type = e; 133 eq->elevator_type = e;
141 134
142 INIT_LIST_HEAD(&q->queue_head);
143 q->last_merge = NULL;
144 q->elevator = eq; 135 q->elevator = eq;
145 136
146 if (eq->ops->elevator_init_fn) 137 if (eq->ops->elevator_init_fn)
@@ -153,11 +144,15 @@ static char chosen_elevator[16];
153 144
154static void elevator_setup_default(void) 145static void elevator_setup_default(void)
155{ 146{
147 struct elevator_type *e;
148
156 /* 149 /*
157 * check if default is set and exists 150 * check if default is set and exists
158 */ 151 */
159 if (chosen_elevator[0] && elevator_find(chosen_elevator)) 152 if (chosen_elevator[0] && (e = elevator_get(chosen_elevator))) {
153 elevator_put(e);
160 return; 154 return;
155 }
161 156
162#if defined(CONFIG_IOSCHED_AS) 157#if defined(CONFIG_IOSCHED_AS)
163 strcpy(chosen_elevator, "anticipatory"); 158 strcpy(chosen_elevator, "anticipatory");
@@ -186,6 +181,11 @@ int elevator_init(request_queue_t *q, char *name)
186 struct elevator_queue *eq; 181 struct elevator_queue *eq;
187 int ret = 0; 182 int ret = 0;
188 183
184 INIT_LIST_HEAD(&q->queue_head);
185 q->last_merge = NULL;
186 q->end_sector = 0;
187 q->boundary_rq = NULL;
188
189 elevator_setup_default(); 189 elevator_setup_default();
190 190
191 if (!name) 191 if (!name)
@@ -220,9 +220,52 @@ void elevator_exit(elevator_t *e)
220 kfree(e); 220 kfree(e);
221} 221}
222 222
223/*
224 * Insert rq into dispatch queue of q. Queue lock must be held on
225 * entry. If sort != 0, rq is sort-inserted; otherwise, rq will be
226 * appended to the dispatch queue. To be used by specific elevators.
227 */
228void elv_dispatch_sort(request_queue_t *q, struct request *rq)
229{
230 sector_t boundary;
231 struct list_head *entry;
232
233 if (q->last_merge == rq)
234 q->last_merge = NULL;
235
236 boundary = q->end_sector;
237
238 list_for_each_prev(entry, &q->queue_head) {
239 struct request *pos = list_entry_rq(entry);
240
241 if (pos->flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED))
242 break;
243 if (rq->sector >= boundary) {
244 if (pos->sector < boundary)
245 continue;
246 } else {
247 if (pos->sector >= boundary)
248 break;
249 }
250 if (rq->sector >= pos->sector)
251 break;
252 }
253
254 list_add(&rq->queuelist, entry);
255}
256
223int elv_merge(request_queue_t *q, struct request **req, struct bio *bio) 257int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
224{ 258{
225 elevator_t *e = q->elevator; 259 elevator_t *e = q->elevator;
260 int ret;
261
262 if (q->last_merge) {
263 ret = elv_try_merge(q->last_merge, bio);
264 if (ret != ELEVATOR_NO_MERGE) {
265 *req = q->last_merge;
266 return ret;
267 }
268 }
226 269
227 if (e->ops->elevator_merge_fn) 270 if (e->ops->elevator_merge_fn)
228 return e->ops->elevator_merge_fn(q, req, bio); 271 return e->ops->elevator_merge_fn(q, req, bio);
@@ -236,6 +279,8 @@ void elv_merged_request(request_queue_t *q, struct request *rq)
236 279
237 if (e->ops->elevator_merged_fn) 280 if (e->ops->elevator_merged_fn)
238 e->ops->elevator_merged_fn(q, rq); 281 e->ops->elevator_merged_fn(q, rq);
282
283 q->last_merge = rq;
239} 284}
240 285
241void elv_merge_requests(request_queue_t *q, struct request *rq, 286void elv_merge_requests(request_queue_t *q, struct request *rq,
@@ -243,20 +288,13 @@ void elv_merge_requests(request_queue_t *q, struct request *rq,
243{ 288{
244 elevator_t *e = q->elevator; 289 elevator_t *e = q->elevator;
245 290
246 if (q->last_merge == next)
247 q->last_merge = NULL;
248
249 if (e->ops->elevator_merge_req_fn) 291 if (e->ops->elevator_merge_req_fn)
250 e->ops->elevator_merge_req_fn(q, rq, next); 292 e->ops->elevator_merge_req_fn(q, rq, next);
293
294 q->last_merge = rq;
251} 295}
252 296
253/* 297void elv_requeue_request(request_queue_t *q, struct request *rq)
254 * For careful internal use by the block layer. Essentially the same as
255 * a requeue in that it tells the io scheduler that this request is not
256 * active in the driver or hardware anymore, but we don't want the request
257 * added back to the scheduler. Function is not exported.
258 */
259void elv_deactivate_request(request_queue_t *q, struct request *rq)
260{ 298{
261 elevator_t *e = q->elevator; 299 elevator_t *e = q->elevator;
262 300
@@ -264,19 +302,14 @@ void elv_deactivate_request(request_queue_t *q, struct request *rq)
264 * it already went through dequeue, we need to decrement the 302 * it already went through dequeue, we need to decrement the
265 * in_flight count again 303 * in_flight count again
266 */ 304 */
267 if (blk_account_rq(rq)) 305 if (blk_account_rq(rq)) {
268 q->in_flight--; 306 q->in_flight--;
307 if (blk_sorted_rq(rq) && e->ops->elevator_deactivate_req_fn)
308 e->ops->elevator_deactivate_req_fn(q, rq);
309 }
269 310
270 rq->flags &= ~REQ_STARTED; 311 rq->flags &= ~REQ_STARTED;
271 312
272 if (e->ops->elevator_deactivate_req_fn)
273 e->ops->elevator_deactivate_req_fn(q, rq);
274}
275
276void elv_requeue_request(request_queue_t *q, struct request *rq)
277{
278 elv_deactivate_request(q, rq);
279
280 /* 313 /*
281 * if this is the flush, requeue the original instead and drop the flush 314 * if this is the flush, requeue the original instead and drop the flush
282 */ 315 */
@@ -285,31 +318,27 @@ void elv_requeue_request(request_queue_t *q, struct request *rq)
285 rq = rq->end_io_data; 318 rq = rq->end_io_data;
286 } 319 }
287 320
288 /* 321 __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
289 * the request is prepped and may have some resources allocated.
290 * allowing unprepped requests to pass this one may cause resource
291 * deadlock. turn on softbarrier.
292 */
293 rq->flags |= REQ_SOFTBARRIER;
294
295 /*
296 * if iosched has an explicit requeue hook, then use that. otherwise
297 * just put the request at the front of the queue
298 */
299 if (q->elevator->ops->elevator_requeue_req_fn)
300 q->elevator->ops->elevator_requeue_req_fn(q, rq);
301 else
302 __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
303} 322}
304 323
305void __elv_add_request(request_queue_t *q, struct request *rq, int where, 324void __elv_add_request(request_queue_t *q, struct request *rq, int where,
306 int plug) 325 int plug)
307{ 326{
308 /* 327 if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
309 * barriers implicitly indicate back insertion 328 /*
310 */ 329 * barriers implicitly indicate back insertion
311 if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER) && 330 */
312 where == ELEVATOR_INSERT_SORT) 331 if (where == ELEVATOR_INSERT_SORT)
332 where = ELEVATOR_INSERT_BACK;
333
334 /*
335 * this request is scheduling boundary, update end_sector
336 */
337 if (blk_fs_request(rq)) {
338 q->end_sector = rq_end_sector(rq);
339 q->boundary_rq = rq;
340 }
341 } else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
313 where = ELEVATOR_INSERT_BACK; 342 where = ELEVATOR_INSERT_BACK;
314 343
315 if (plug) 344 if (plug)
@@ -317,23 +346,54 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where,
317 346
318 rq->q = q; 347 rq->q = q;
319 348
320 if (!test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)) { 349 switch (where) {
321 q->elevator->ops->elevator_add_req_fn(q, rq, where); 350 case ELEVATOR_INSERT_FRONT:
351 rq->flags |= REQ_SOFTBARRIER;
322 352
323 if (blk_queue_plugged(q)) { 353 list_add(&rq->queuelist, &q->queue_head);
324 int nrq = q->rq.count[READ] + q->rq.count[WRITE] 354 break;
325 - q->in_flight;
326 355
327 if (nrq >= q->unplug_thresh) 356 case ELEVATOR_INSERT_BACK:
328 __generic_unplug_device(q); 357 rq->flags |= REQ_SOFTBARRIER;
329 } 358
330 } else 359 while (q->elevator->ops->elevator_dispatch_fn(q, 1))
360 ;
361 list_add_tail(&rq->queuelist, &q->queue_head);
331 /* 362 /*
332 * if drain is set, store the request "locally". when the drain 363 * We kick the queue here for the following reasons.
333 * is finished, the requests will be handed ordered to the io 364 * - The elevator might have returned NULL previously
334 * scheduler 365 * to delay requests and returned them now. As the
366 * queue wasn't empty before this request, ll_rw_blk
367 * won't run the queue on return, resulting in hang.
368 * - Usually, back inserted requests won't be merged
369 * with anything. There's no point in delaying queue
370 * processing.
335 */ 371 */
336 list_add_tail(&rq->queuelist, &q->drain_list); 372 blk_remove_plug(q);
373 q->request_fn(q);
374 break;
375
376 case ELEVATOR_INSERT_SORT:
377 BUG_ON(!blk_fs_request(rq));
378 rq->flags |= REQ_SORTED;
379 q->elevator->ops->elevator_add_req_fn(q, rq);
380 if (q->last_merge == NULL && rq_mergeable(rq))
381 q->last_merge = rq;
382 break;
383
384 default:
385 printk(KERN_ERR "%s: bad insertion point %d\n",
386 __FUNCTION__, where);
387 BUG();
388 }
389
390 if (blk_queue_plugged(q)) {
391 int nrq = q->rq.count[READ] + q->rq.count[WRITE]
392 - q->in_flight;
393
394 if (nrq >= q->unplug_thresh)
395 __generic_unplug_device(q);
396 }
337} 397}
338 398
339void elv_add_request(request_queue_t *q, struct request *rq, int where, 399void elv_add_request(request_queue_t *q, struct request *rq, int where,
@@ -348,13 +408,19 @@ void elv_add_request(request_queue_t *q, struct request *rq, int where,
348 408
349static inline struct request *__elv_next_request(request_queue_t *q) 409static inline struct request *__elv_next_request(request_queue_t *q)
350{ 410{
351 struct request *rq = q->elevator->ops->elevator_next_req_fn(q); 411 struct request *rq;
412
413 if (unlikely(list_empty(&q->queue_head) &&
414 !q->elevator->ops->elevator_dispatch_fn(q, 0)))
415 return NULL;
416
417 rq = list_entry_rq(q->queue_head.next);
352 418
353 /* 419 /*
354 * if this is a barrier write and the device has to issue a 420 * if this is a barrier write and the device has to issue a
355 * flush sequence to support it, check how far we are 421 * flush sequence to support it, check how far we are
356 */ 422 */
357 if (rq && blk_fs_request(rq) && blk_barrier_rq(rq)) { 423 if (blk_fs_request(rq) && blk_barrier_rq(rq)) {
358 BUG_ON(q->ordered == QUEUE_ORDERED_NONE); 424 BUG_ON(q->ordered == QUEUE_ORDERED_NONE);
359 425
360 if (q->ordered == QUEUE_ORDERED_FLUSH && 426 if (q->ordered == QUEUE_ORDERED_FLUSH &&
@@ -371,15 +437,30 @@ struct request *elv_next_request(request_queue_t *q)
371 int ret; 437 int ret;
372 438
373 while ((rq = __elv_next_request(q)) != NULL) { 439 while ((rq = __elv_next_request(q)) != NULL) {
374 /* 440 if (!(rq->flags & REQ_STARTED)) {
375 * just mark as started even if we don't start it, a request 441 elevator_t *e = q->elevator;
376 * that has been delayed should not be passed by new incoming
377 * requests
378 */
379 rq->flags |= REQ_STARTED;
380 442
381 if (rq == q->last_merge) 443 /*
382 q->last_merge = NULL; 444 * This is the first time the device driver
445 * sees this request (possibly after
446 * requeueing). Notify IO scheduler.
447 */
448 if (blk_sorted_rq(rq) &&
449 e->ops->elevator_activate_req_fn)
450 e->ops->elevator_activate_req_fn(q, rq);
451
452 /*
453 * just mark as started even if we don't start
454 * it, a request that has been delayed should
455 * not be passed by new incoming requests
456 */
457 rq->flags |= REQ_STARTED;
458 }
459
460 if (!q->boundary_rq || q->boundary_rq == rq) {
461 q->end_sector = rq_end_sector(rq);
462 q->boundary_rq = NULL;
463 }
383 464
384 if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn) 465 if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn)
385 break; 466 break;
@@ -391,9 +472,9 @@ struct request *elv_next_request(request_queue_t *q)
391 /* 472 /*
392 * the request may have been (partially) prepped. 473 * the request may have been (partially) prepped.
393 * we need to keep this request in the front to 474 * we need to keep this request in the front to
394 * avoid resource deadlock. turn on softbarrier. 475 * avoid resource deadlock. REQ_STARTED will
476 * prevent other fs requests from passing this one.
395 */ 477 */
396 rq->flags |= REQ_SOFTBARRIER;
397 rq = NULL; 478 rq = NULL;
398 break; 479 break;
399 } else if (ret == BLKPREP_KILL) { 480 } else if (ret == BLKPREP_KILL) {
@@ -416,42 +497,32 @@ struct request *elv_next_request(request_queue_t *q)
416 return rq; 497 return rq;
417} 498}
418 499
419void elv_remove_request(request_queue_t *q, struct request *rq) 500void elv_dequeue_request(request_queue_t *q, struct request *rq)
420{ 501{
421 elevator_t *e = q->elevator; 502 BUG_ON(list_empty(&rq->queuelist));
503
504 list_del_init(&rq->queuelist);
422 505
423 /* 506 /*
424 * the time frame between a request being removed from the lists 507 * the time frame between a request being removed from the lists
425 * and to it is freed is accounted as io that is in progress at 508 * and to it is freed is accounted as io that is in progress at
426 * the driver side. note that we only account requests that the 509 * the driver side.
427 * driver has seen (REQ_STARTED set), to avoid false accounting
428 * for request-request merges
429 */ 510 */
430 if (blk_account_rq(rq)) 511 if (blk_account_rq(rq))
431 q->in_flight++; 512 q->in_flight++;
432
433 /*
434 * the main clearing point for q->last_merge is on retrieval of
435 * request by driver (it calls elv_next_request()), but it _can_
436 * also happen here if a request is added to the queue but later
437 * deleted without ever being given to driver (merged with another
438 * request).
439 */
440 if (rq == q->last_merge)
441 q->last_merge = NULL;
442
443 if (e->ops->elevator_remove_req_fn)
444 e->ops->elevator_remove_req_fn(q, rq);
445} 513}
446 514
447int elv_queue_empty(request_queue_t *q) 515int elv_queue_empty(request_queue_t *q)
448{ 516{
449 elevator_t *e = q->elevator; 517 elevator_t *e = q->elevator;
450 518
519 if (!list_empty(&q->queue_head))
520 return 0;
521
451 if (e->ops->elevator_queue_empty_fn) 522 if (e->ops->elevator_queue_empty_fn)
452 return e->ops->elevator_queue_empty_fn(q); 523 return e->ops->elevator_queue_empty_fn(q);
453 524
454 return list_empty(&q->queue_head); 525 return 1;
455} 526}
456 527
457struct request *elv_latter_request(request_queue_t *q, struct request *rq) 528struct request *elv_latter_request(request_queue_t *q, struct request *rq)
@@ -487,7 +558,7 @@ struct request *elv_former_request(request_queue_t *q, struct request *rq)
487} 558}
488 559
489int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio, 560int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
490 int gfp_mask) 561 gfp_t gfp_mask)
491{ 562{
492 elevator_t *e = q->elevator; 563 elevator_t *e = q->elevator;
493 564
@@ -523,11 +594,11 @@ void elv_completed_request(request_queue_t *q, struct request *rq)
523 /* 594 /*
524 * request is released from the driver, io must be done 595 * request is released from the driver, io must be done
525 */ 596 */
526 if (blk_account_rq(rq)) 597 if (blk_account_rq(rq)) {
527 q->in_flight--; 598 q->in_flight--;
528 599 if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
529 if (e->ops->elevator_completed_req_fn) 600 e->ops->elevator_completed_req_fn(q, rq);
530 e->ops->elevator_completed_req_fn(q, rq); 601 }
531} 602}
532 603
533int elv_register_queue(struct request_queue *q) 604int elv_register_queue(struct request_queue *q)
@@ -555,10 +626,9 @@ void elv_unregister_queue(struct request_queue *q)
555 626
556int elv_register(struct elevator_type *e) 627int elv_register(struct elevator_type *e)
557{ 628{
629 spin_lock_irq(&elv_list_lock);
558 if (elevator_find(e->elevator_name)) 630 if (elevator_find(e->elevator_name))
559 BUG(); 631 BUG();
560
561 spin_lock_irq(&elv_list_lock);
562 list_add_tail(&e->list, &elv_list); 632 list_add_tail(&e->list, &elv_list);
563 spin_unlock_irq(&elv_list_lock); 633 spin_unlock_irq(&elv_list_lock);
564 634
@@ -582,25 +652,36 @@ EXPORT_SYMBOL_GPL(elv_unregister);
582 * switch to new_e io scheduler. be careful not to introduce deadlocks - 652 * switch to new_e io scheduler. be careful not to introduce deadlocks -
583 * we don't free the old io scheduler, before we have allocated what we 653 * we don't free the old io scheduler, before we have allocated what we
584 * need for the new one. this way we have a chance of going back to the old 654 * need for the new one. this way we have a chance of going back to the old
585 * one, if the new one fails init for some reason. we also do an intermediate 655 * one, if the new one fails init for some reason.
586 * switch to noop to ensure safety with stack-allocated requests, since they
587 * don't originate from the block layer allocator. noop is safe here, because
588 * it never needs to touch the elevator itself for completion events. DRAIN
589 * flags will make sure we don't touch it for additions either.
590 */ 656 */
591static void elevator_switch(request_queue_t *q, struct elevator_type *new_e) 657static void elevator_switch(request_queue_t *q, struct elevator_type *new_e)
592{ 658{
593 elevator_t *e = kmalloc(sizeof(elevator_t), GFP_KERNEL); 659 elevator_t *old_elevator, *e;
594 struct elevator_type *noop_elevator = NULL;
595 elevator_t *old_elevator;
596 660
661 /*
662 * Allocate new elevator
663 */
664 e = kmalloc(sizeof(elevator_t), GFP_KERNEL);
597 if (!e) 665 if (!e)
598 goto error; 666 goto error;
599 667
600 /* 668 /*
601 * first step, drain requests from the block freelist 669 * Turn on BYPASS and drain all requests w/ elevator private data
602 */ 670 */
603 blk_wait_queue_drained(q, 0); 671 spin_lock_irq(q->queue_lock);
672
673 set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
674
675 while (q->elevator->ops->elevator_dispatch_fn(q, 1))
676 ;
677
678 while (q->rq.elvpriv) {
679 spin_unlock_irq(q->queue_lock);
680 msleep(10);
681 spin_lock_irq(q->queue_lock);
682 }
683
684 spin_unlock_irq(q->queue_lock);
604 685
605 /* 686 /*
606 * unregister old elevator data 687 * unregister old elevator data
@@ -609,18 +690,6 @@ static void elevator_switch(request_queue_t *q, struct elevator_type *new_e)
609 old_elevator = q->elevator; 690 old_elevator = q->elevator;
610 691
611 /* 692 /*
612 * next step, switch to noop since it uses no private rq structures
613 * and doesn't allocate any memory for anything. then wait for any
614 * non-fs requests in-flight
615 */
616 noop_elevator = elevator_get("noop");
617 spin_lock_irq(q->queue_lock);
618 elevator_attach(q, noop_elevator, e);
619 spin_unlock_irq(q->queue_lock);
620
621 blk_wait_queue_drained(q, 1);
622
623 /*
624 * attach and start new elevator 693 * attach and start new elevator
625 */ 694 */
626 if (elevator_attach(q, new_e, e)) 695 if (elevator_attach(q, new_e, e))
@@ -630,11 +699,10 @@ static void elevator_switch(request_queue_t *q, struct elevator_type *new_e)
630 goto fail_register; 699 goto fail_register;
631 700
632 /* 701 /*
633 * finally exit old elevator and start queue again 702 * finally exit old elevator and turn off BYPASS.
634 */ 703 */
635 elevator_exit(old_elevator); 704 elevator_exit(old_elevator);
636 blk_finish_queue_drain(q); 705 clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
637 elevator_put(noop_elevator);
638 return; 706 return;
639 707
640fail_register: 708fail_register:
@@ -643,13 +711,13 @@ fail_register:
643 * one again (along with re-adding the sysfs dir) 711 * one again (along with re-adding the sysfs dir)
644 */ 712 */
645 elevator_exit(e); 713 elevator_exit(e);
714 e = NULL;
646fail: 715fail:
647 q->elevator = old_elevator; 716 q->elevator = old_elevator;
648 elv_register_queue(q); 717 elv_register_queue(q);
649 blk_finish_queue_drain(q); 718 clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
719 kfree(e);
650error: 720error:
651 if (noop_elevator)
652 elevator_put(noop_elevator);
653 elevator_put(new_e); 721 elevator_put(new_e);
654 printk(KERN_ERR "elevator: switch to %s failed\n",new_e->elevator_name); 722 printk(KERN_ERR "elevator: switch to %s failed\n",new_e->elevator_name);
655} 723}
@@ -701,11 +769,12 @@ ssize_t elv_iosched_show(request_queue_t *q, char *name)
701 return len; 769 return len;
702} 770}
703 771
772EXPORT_SYMBOL(elv_dispatch_sort);
704EXPORT_SYMBOL(elv_add_request); 773EXPORT_SYMBOL(elv_add_request);
705EXPORT_SYMBOL(__elv_add_request); 774EXPORT_SYMBOL(__elv_add_request);
706EXPORT_SYMBOL(elv_requeue_request); 775EXPORT_SYMBOL(elv_requeue_request);
707EXPORT_SYMBOL(elv_next_request); 776EXPORT_SYMBOL(elv_next_request);
708EXPORT_SYMBOL(elv_remove_request); 777EXPORT_SYMBOL(elv_dequeue_request);
709EXPORT_SYMBOL(elv_queue_empty); 778EXPORT_SYMBOL(elv_queue_empty);
710EXPORT_SYMBOL(elv_completed_request); 779EXPORT_SYMBOL(elv_completed_request);
711EXPORT_SYMBOL(elevator_exit); 780EXPORT_SYMBOL(elevator_exit);
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
index baedac522945..0af73512b9a8 100644
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -263,8 +263,6 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
263 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); 263 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
264 264
265 blk_queue_activity_fn(q, NULL, NULL); 265 blk_queue_activity_fn(q, NULL, NULL);
266
267 INIT_LIST_HEAD(&q->drain_list);
268} 266}
269 267
270EXPORT_SYMBOL(blk_queue_make_request); 268EXPORT_SYMBOL(blk_queue_make_request);
@@ -353,6 +351,8 @@ static void blk_pre_flush_end_io(struct request *flush_rq)
353 struct request *rq = flush_rq->end_io_data; 351 struct request *rq = flush_rq->end_io_data;
354 request_queue_t *q = rq->q; 352 request_queue_t *q = rq->q;
355 353
354 elv_completed_request(q, flush_rq);
355
356 rq->flags |= REQ_BAR_PREFLUSH; 356 rq->flags |= REQ_BAR_PREFLUSH;
357 357
358 if (!flush_rq->errors) 358 if (!flush_rq->errors)
@@ -369,6 +369,8 @@ static void blk_post_flush_end_io(struct request *flush_rq)
369 struct request *rq = flush_rq->end_io_data; 369 struct request *rq = flush_rq->end_io_data;
370 request_queue_t *q = rq->q; 370 request_queue_t *q = rq->q;
371 371
372 elv_completed_request(q, flush_rq);
373
372 rq->flags |= REQ_BAR_POSTFLUSH; 374 rq->flags |= REQ_BAR_POSTFLUSH;
373 375
374 q->end_flush_fn(q, flush_rq); 376 q->end_flush_fn(q, flush_rq);
@@ -408,8 +410,6 @@ struct request *blk_start_pre_flush(request_queue_t *q, struct request *rq)
408 if (!list_empty(&rq->queuelist)) 410 if (!list_empty(&rq->queuelist))
409 blkdev_dequeue_request(rq); 411 blkdev_dequeue_request(rq);
410 412
411 elv_deactivate_request(q, rq);
412
413 flush_rq->end_io_data = rq; 413 flush_rq->end_io_data = rq;
414 flush_rq->end_io = blk_pre_flush_end_io; 414 flush_rq->end_io = blk_pre_flush_end_io;
415 415
@@ -1040,6 +1040,7 @@ EXPORT_SYMBOL(blk_queue_invalidate_tags);
1040static char *rq_flags[] = { 1040static char *rq_flags[] = {
1041 "REQ_RW", 1041 "REQ_RW",
1042 "REQ_FAILFAST", 1042 "REQ_FAILFAST",
1043 "REQ_SORTED",
1043 "REQ_SOFTBARRIER", 1044 "REQ_SOFTBARRIER",
1044 "REQ_HARDBARRIER", 1045 "REQ_HARDBARRIER",
1045 "REQ_CMD", 1046 "REQ_CMD",
@@ -1047,6 +1048,7 @@ static char *rq_flags[] = {
1047 "REQ_STARTED", 1048 "REQ_STARTED",
1048 "REQ_DONTPREP", 1049 "REQ_DONTPREP",
1049 "REQ_QUEUED", 1050 "REQ_QUEUED",
1051 "REQ_ELVPRIV",
1050 "REQ_PC", 1052 "REQ_PC",
1051 "REQ_BLOCK_PC", 1053 "REQ_BLOCK_PC",
1052 "REQ_SENSE", 1054 "REQ_SENSE",
@@ -1637,9 +1639,9 @@ static int blk_init_free_list(request_queue_t *q)
1637 1639
1638 rl->count[READ] = rl->count[WRITE] = 0; 1640 rl->count[READ] = rl->count[WRITE] = 0;
1639 rl->starved[READ] = rl->starved[WRITE] = 0; 1641 rl->starved[READ] = rl->starved[WRITE] = 0;
1642 rl->elvpriv = 0;
1640 init_waitqueue_head(&rl->wait[READ]); 1643 init_waitqueue_head(&rl->wait[READ]);
1641 init_waitqueue_head(&rl->wait[WRITE]); 1644 init_waitqueue_head(&rl->wait[WRITE]);
1642 init_waitqueue_head(&rl->drain);
1643 1645
1644 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, 1646 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
1645 mempool_free_slab, request_cachep, q->node); 1647 mempool_free_slab, request_cachep, q->node);
@@ -1652,13 +1654,13 @@ static int blk_init_free_list(request_queue_t *q)
1652 1654
1653static int __make_request(request_queue_t *, struct bio *); 1655static int __make_request(request_queue_t *, struct bio *);
1654 1656
1655request_queue_t *blk_alloc_queue(int gfp_mask) 1657request_queue_t *blk_alloc_queue(gfp_t gfp_mask)
1656{ 1658{
1657 return blk_alloc_queue_node(gfp_mask, -1); 1659 return blk_alloc_queue_node(gfp_mask, -1);
1658} 1660}
1659EXPORT_SYMBOL(blk_alloc_queue); 1661EXPORT_SYMBOL(blk_alloc_queue);
1660 1662
1661request_queue_t *blk_alloc_queue_node(int gfp_mask, int node_id) 1663request_queue_t *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
1662{ 1664{
1663 request_queue_t *q; 1665 request_queue_t *q;
1664 1666
@@ -1782,12 +1784,14 @@ EXPORT_SYMBOL(blk_get_queue);
1782 1784
1783static inline void blk_free_request(request_queue_t *q, struct request *rq) 1785static inline void blk_free_request(request_queue_t *q, struct request *rq)
1784{ 1786{
1785 elv_put_request(q, rq); 1787 if (rq->flags & REQ_ELVPRIV)
1788 elv_put_request(q, rq);
1786 mempool_free(rq, q->rq.rq_pool); 1789 mempool_free(rq, q->rq.rq_pool);
1787} 1790}
1788 1791
1789static inline struct request * 1792static inline struct request *
1790blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, int gfp_mask) 1793blk_alloc_request(request_queue_t *q, int rw, struct bio *bio,
1794 int priv, gfp_t gfp_mask)
1791{ 1795{
1792 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); 1796 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
1793 1797
@@ -1800,11 +1804,15 @@ blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, int gfp_mask)
1800 */ 1804 */
1801 rq->flags = rw; 1805 rq->flags = rw;
1802 1806
1803 if (!elv_set_request(q, rq, bio, gfp_mask)) 1807 if (priv) {
1804 return rq; 1808 if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) {
1809 mempool_free(rq, q->rq.rq_pool);
1810 return NULL;
1811 }
1812 rq->flags |= REQ_ELVPRIV;
1813 }
1805 1814
1806 mempool_free(rq, q->rq.rq_pool); 1815 return rq;
1807 return NULL;
1808} 1816}
1809 1817
1810/* 1818/*
@@ -1860,22 +1868,18 @@ static void __freed_request(request_queue_t *q, int rw)
1860 * A request has just been released. Account for it, update the full and 1868 * A request has just been released. Account for it, update the full and
1861 * congestion status, wake up any waiters. Called under q->queue_lock. 1869 * congestion status, wake up any waiters. Called under q->queue_lock.
1862 */ 1870 */
1863static void freed_request(request_queue_t *q, int rw) 1871static void freed_request(request_queue_t *q, int rw, int priv)
1864{ 1872{
1865 struct request_list *rl = &q->rq; 1873 struct request_list *rl = &q->rq;
1866 1874
1867 rl->count[rw]--; 1875 rl->count[rw]--;
1876 if (priv)
1877 rl->elvpriv--;
1868 1878
1869 __freed_request(q, rw); 1879 __freed_request(q, rw);
1870 1880
1871 if (unlikely(rl->starved[rw ^ 1])) 1881 if (unlikely(rl->starved[rw ^ 1]))
1872 __freed_request(q, rw ^ 1); 1882 __freed_request(q, rw ^ 1);
1873
1874 if (!rl->count[READ] && !rl->count[WRITE]) {
1875 smp_mb();
1876 if (unlikely(waitqueue_active(&rl->drain)))
1877 wake_up(&rl->drain);
1878 }
1879} 1883}
1880 1884
1881#define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) 1885#define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)
@@ -1885,14 +1889,12 @@ static void freed_request(request_queue_t *q, int rw)
1885 * Returns !NULL on success, with queue_lock *not held*. 1889 * Returns !NULL on success, with queue_lock *not held*.
1886 */ 1890 */
1887static struct request *get_request(request_queue_t *q, int rw, struct bio *bio, 1891static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
1888 int gfp_mask) 1892 gfp_t gfp_mask)
1889{ 1893{
1890 struct request *rq = NULL; 1894 struct request *rq = NULL;
1891 struct request_list *rl = &q->rq; 1895 struct request_list *rl = &q->rq;
1892 struct io_context *ioc = current_io_context(GFP_ATOMIC); 1896 struct io_context *ioc = current_io_context(GFP_ATOMIC);
1893 1897 int priv;
1894 if (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)))
1895 goto out;
1896 1898
1897 if (rl->count[rw]+1 >= q->nr_requests) { 1899 if (rl->count[rw]+1 >= q->nr_requests) {
1898 /* 1900 /*
@@ -1937,9 +1939,14 @@ get_rq:
1937 rl->starved[rw] = 0; 1939 rl->starved[rw] = 0;
1938 if (rl->count[rw] >= queue_congestion_on_threshold(q)) 1940 if (rl->count[rw] >= queue_congestion_on_threshold(q))
1939 set_queue_congested(q, rw); 1941 set_queue_congested(q, rw);
1942
1943 priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
1944 if (priv)
1945 rl->elvpriv++;
1946
1940 spin_unlock_irq(q->queue_lock); 1947 spin_unlock_irq(q->queue_lock);
1941 1948
1942 rq = blk_alloc_request(q, rw, bio, gfp_mask); 1949 rq = blk_alloc_request(q, rw, bio, priv, gfp_mask);
1943 if (!rq) { 1950 if (!rq) {
1944 /* 1951 /*
1945 * Allocation failed presumably due to memory. Undo anything 1952 * Allocation failed presumably due to memory. Undo anything
@@ -1949,7 +1956,7 @@ get_rq:
1949 * wait queue, but this is pretty rare. 1956 * wait queue, but this is pretty rare.
1950 */ 1957 */
1951 spin_lock_irq(q->queue_lock); 1958 spin_lock_irq(q->queue_lock);
1952 freed_request(q, rw); 1959 freed_request(q, rw, priv);
1953 1960
1954 /* 1961 /*
1955 * in the very unlikely event that allocation failed and no 1962 * in the very unlikely event that allocation failed and no
@@ -2019,7 +2026,7 @@ static struct request *get_request_wait(request_queue_t *q, int rw,
2019 return rq; 2026 return rq;
2020} 2027}
2021 2028
2022struct request *blk_get_request(request_queue_t *q, int rw, int gfp_mask) 2029struct request *blk_get_request(request_queue_t *q, int rw, gfp_t gfp_mask)
2023{ 2030{
2024 struct request *rq; 2031 struct request *rq;
2025 2032
@@ -2251,7 +2258,7 @@ EXPORT_SYMBOL(blk_rq_unmap_user);
2251 * @gfp_mask: memory allocation flags 2258 * @gfp_mask: memory allocation flags
2252 */ 2259 */
2253int blk_rq_map_kern(request_queue_t *q, struct request *rq, void *kbuf, 2260int blk_rq_map_kern(request_queue_t *q, struct request *rq, void *kbuf,
2254 unsigned int len, unsigned int gfp_mask) 2261 unsigned int len, gfp_t gfp_mask)
2255{ 2262{
2256 struct bio *bio; 2263 struct bio *bio;
2257 2264
@@ -2433,13 +2440,15 @@ void disk_round_stats(struct gendisk *disk)
2433{ 2440{
2434 unsigned long now = jiffies; 2441 unsigned long now = jiffies;
2435 2442
2436 __disk_stat_add(disk, time_in_queue, 2443 if (now == disk->stamp)
2437 disk->in_flight * (now - disk->stamp)); 2444 return;
2438 disk->stamp = now;
2439 2445
2440 if (disk->in_flight) 2446 if (disk->in_flight) {
2441 __disk_stat_add(disk, io_ticks, (now - disk->stamp_idle)); 2447 __disk_stat_add(disk, time_in_queue,
2442 disk->stamp_idle = now; 2448 disk->in_flight * (now - disk->stamp));
2449 __disk_stat_add(disk, io_ticks, (now - disk->stamp));
2450 }
2451 disk->stamp = now;
2443} 2452}
2444 2453
2445/* 2454/*
@@ -2454,6 +2463,8 @@ static void __blk_put_request(request_queue_t *q, struct request *req)
2454 if (unlikely(--req->ref_count)) 2463 if (unlikely(--req->ref_count))
2455 return; 2464 return;
2456 2465
2466 elv_completed_request(q, req);
2467
2457 req->rq_status = RQ_INACTIVE; 2468 req->rq_status = RQ_INACTIVE;
2458 req->rl = NULL; 2469 req->rl = NULL;
2459 2470
@@ -2463,26 +2474,25 @@ static void __blk_put_request(request_queue_t *q, struct request *req)
2463 */ 2474 */
2464 if (rl) { 2475 if (rl) {
2465 int rw = rq_data_dir(req); 2476 int rw = rq_data_dir(req);
2466 2477 int priv = req->flags & REQ_ELVPRIV;
2467 elv_completed_request(q, req);
2468 2478
2469 BUG_ON(!list_empty(&req->queuelist)); 2479 BUG_ON(!list_empty(&req->queuelist));
2470 2480
2471 blk_free_request(q, req); 2481 blk_free_request(q, req);
2472 freed_request(q, rw); 2482 freed_request(q, rw, priv);
2473 } 2483 }
2474} 2484}
2475 2485
2476void blk_put_request(struct request *req) 2486void blk_put_request(struct request *req)
2477{ 2487{
2488 unsigned long flags;
2489 request_queue_t *q = req->q;
2490
2478 /* 2491 /*
2479 * if req->rl isn't set, this request didnt originate from the 2492 * Gee, IDE calls in w/ NULL q. Fix IDE and remove the
2480 * block layer, so it's safe to just disregard it 2493 * following if (q) test.
2481 */ 2494 */
2482 if (req->rl) { 2495 if (q) {
2483 unsigned long flags;
2484 request_queue_t *q = req->q;
2485
2486 spin_lock_irqsave(q->queue_lock, flags); 2496 spin_lock_irqsave(q->queue_lock, flags);
2487 __blk_put_request(q, req); 2497 __blk_put_request(q, req);
2488 spin_unlock_irqrestore(q->queue_lock, flags); 2498 spin_unlock_irqrestore(q->queue_lock, flags);
@@ -2797,97 +2807,6 @@ static inline void blk_partition_remap(struct bio *bio)
2797 } 2807 }
2798} 2808}
2799 2809
2800void blk_finish_queue_drain(request_queue_t *q)
2801{
2802 struct request_list *rl = &q->rq;
2803 struct request *rq;
2804 int requeued = 0;
2805
2806 spin_lock_irq(q->queue_lock);
2807 clear_bit(QUEUE_FLAG_DRAIN, &q->queue_flags);
2808
2809 while (!list_empty(&q->drain_list)) {
2810 rq = list_entry_rq(q->drain_list.next);
2811
2812 list_del_init(&rq->queuelist);
2813 elv_requeue_request(q, rq);
2814 requeued++;
2815 }
2816
2817 if (requeued)
2818 q->request_fn(q);
2819
2820 spin_unlock_irq(q->queue_lock);
2821
2822 wake_up(&rl->wait[0]);
2823 wake_up(&rl->wait[1]);
2824 wake_up(&rl->drain);
2825}
2826
2827static int wait_drain(request_queue_t *q, struct request_list *rl, int dispatch)
2828{
2829 int wait = rl->count[READ] + rl->count[WRITE];
2830
2831 if (dispatch)
2832 wait += !list_empty(&q->queue_head);
2833
2834 return wait;
2835}
2836
2837/*
2838 * We rely on the fact that only requests allocated through blk_alloc_request()
2839 * have io scheduler private data structures associated with them. Any other
2840 * type of request (allocated on stack or through kmalloc()) should not go
2841 * to the io scheduler core, but be attached to the queue head instead.
2842 */
2843void blk_wait_queue_drained(request_queue_t *q, int wait_dispatch)
2844{
2845 struct request_list *rl = &q->rq;
2846 DEFINE_WAIT(wait);
2847
2848 spin_lock_irq(q->queue_lock);
2849 set_bit(QUEUE_FLAG_DRAIN, &q->queue_flags);
2850
2851 while (wait_drain(q, rl, wait_dispatch)) {
2852 prepare_to_wait(&rl->drain, &wait, TASK_UNINTERRUPTIBLE);
2853
2854 if (wait_drain(q, rl, wait_dispatch)) {
2855 __generic_unplug_device(q);
2856 spin_unlock_irq(q->queue_lock);
2857 io_schedule();
2858 spin_lock_irq(q->queue_lock);
2859 }
2860
2861 finish_wait(&rl->drain, &wait);
2862 }
2863
2864 spin_unlock_irq(q->queue_lock);
2865}
2866
2867/*
2868 * block waiting for the io scheduler being started again.
2869 */
2870static inline void block_wait_queue_running(request_queue_t *q)
2871{
2872 DEFINE_WAIT(wait);
2873
2874 while (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))) {
2875 struct request_list *rl = &q->rq;
2876
2877 prepare_to_wait_exclusive(&rl->drain, &wait,
2878 TASK_UNINTERRUPTIBLE);
2879
2880 /*
2881 * re-check the condition. avoids using prepare_to_wait()
2882 * in the fast path (queue is running)
2883 */
2884 if (test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))
2885 io_schedule();
2886
2887 finish_wait(&rl->drain, &wait);
2888 }
2889}
2890
2891static void handle_bad_sector(struct bio *bio) 2810static void handle_bad_sector(struct bio *bio)
2892{ 2811{
2893 char b[BDEVNAME_SIZE]; 2812 char b[BDEVNAME_SIZE];
@@ -2983,8 +2902,6 @@ end_io:
2983 if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) 2902 if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
2984 goto end_io; 2903 goto end_io;
2985 2904
2986 block_wait_queue_running(q);
2987
2988 /* 2905 /*
2989 * If this device has partitions, remap block n 2906 * If this device has partitions, remap block n
2990 * of partition p to block n+start(p) of the disk. 2907 * of partition p to block n+start(p) of the disk.
@@ -3393,7 +3310,7 @@ void exit_io_context(void)
3393 * but since the current task itself holds a reference, the context can be 3310 * but since the current task itself holds a reference, the context can be
3394 * used in general code, so long as it stays within `current` context. 3311 * used in general code, so long as it stays within `current` context.
3395 */ 3312 */
3396struct io_context *current_io_context(int gfp_flags) 3313struct io_context *current_io_context(gfp_t gfp_flags)
3397{ 3314{
3398 struct task_struct *tsk = current; 3315 struct task_struct *tsk = current;
3399 struct io_context *ret; 3316 struct io_context *ret;
@@ -3424,7 +3341,7 @@ EXPORT_SYMBOL(current_io_context);
3424 * 3341 *
3425 * This is always called in the context of the task which submitted the I/O. 3342 * This is always called in the context of the task which submitted the I/O.
3426 */ 3343 */
3427struct io_context *get_io_context(int gfp_flags) 3344struct io_context *get_io_context(gfp_t gfp_flags)
3428{ 3345{
3429 struct io_context *ret; 3346 struct io_context *ret;
3430 ret = current_io_context(gfp_flags); 3347 ret = current_io_context(gfp_flags);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index b35e08876dd4..96c664af8d06 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -881,7 +881,7 @@ loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
881static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev) 881static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
882{ 882{
883 struct file *filp = lo->lo_backing_file; 883 struct file *filp = lo->lo_backing_file;
884 int gfp = lo->old_gfp_mask; 884 gfp_t gfp = lo->old_gfp_mask;
885 885
886 if (lo->lo_state != Lo_bound) 886 if (lo->lo_state != Lo_bound)
887 return -ENXIO; 887 return -ENXIO;
diff --git a/drivers/block/noop-iosched.c b/drivers/block/noop-iosched.c
index b1730b62c37e..f56b8edb06e4 100644
--- a/drivers/block/noop-iosched.c
+++ b/drivers/block/noop-iosched.c
@@ -7,57 +7,19 @@
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/init.h> 8#include <linux/init.h>
9 9
10/* 10static void elevator_noop_add_request(request_queue_t *q, struct request *rq)
11 * See if we can find a request that this buffer can be coalesced with.
12 */
13static int elevator_noop_merge(request_queue_t *q, struct request **req,
14 struct bio *bio)
15{
16 int ret;
17
18 ret = elv_try_last_merge(q, bio);
19 if (ret != ELEVATOR_NO_MERGE)
20 *req = q->last_merge;
21
22 return ret;
23}
24
25static void elevator_noop_merge_requests(request_queue_t *q, struct request *req,
26 struct request *next)
27{
28 list_del_init(&next->queuelist);
29}
30
31static void elevator_noop_add_request(request_queue_t *q, struct request *rq,
32 int where)
33{ 11{
34 if (where == ELEVATOR_INSERT_FRONT) 12 elv_dispatch_add_tail(q, rq);
35 list_add(&rq->queuelist, &q->queue_head);
36 else
37 list_add_tail(&rq->queuelist, &q->queue_head);
38
39 /*
40 * new merges must not precede this barrier
41 */
42 if (rq->flags & REQ_HARDBARRIER)
43 q->last_merge = NULL;
44 else if (!q->last_merge)
45 q->last_merge = rq;
46} 13}
47 14
48static struct request *elevator_noop_next_request(request_queue_t *q) 15static int elevator_noop_dispatch(request_queue_t *q, int force)
49{ 16{
50 if (!list_empty(&q->queue_head)) 17 return 0;
51 return list_entry_rq(q->queue_head.next);
52
53 return NULL;
54} 18}
55 19
56static struct elevator_type elevator_noop = { 20static struct elevator_type elevator_noop = {
57 .ops = { 21 .ops = {
58 .elevator_merge_fn = elevator_noop_merge, 22 .elevator_dispatch_fn = elevator_noop_dispatch,
59 .elevator_merge_req_fn = elevator_noop_merge_requests,
60 .elevator_next_req_fn = elevator_noop_next_request,
61 .elevator_add_req_fn = elevator_noop_add_request, 23 .elevator_add_req_fn = elevator_noop_add_request,
62 }, 24 },
63 .elevator_name = "noop", 25 .elevator_name = "noop",
diff --git a/drivers/block/rd.c b/drivers/block/rd.c
index 145c1fbffe01..68c60a5bcdab 100644
--- a/drivers/block/rd.c
+++ b/drivers/block/rd.c
@@ -348,7 +348,7 @@ static int rd_open(struct inode *inode, struct file *filp)
348 struct block_device *bdev = inode->i_bdev; 348 struct block_device *bdev = inode->i_bdev;
349 struct address_space *mapping; 349 struct address_space *mapping;
350 unsigned bsize; 350 unsigned bsize;
351 int gfp_mask; 351 gfp_t gfp_mask;
352 352
353 inode = igrab(bdev->bd_inode); 353 inode = igrab(bdev->bd_inode);
354 rd_bdev[unit] = bdev; 354 rd_bdev[unit] = bdev;
diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c
index c9bdf544ed2c..c556f4d3ccd7 100644
--- a/drivers/char/n_tty.c
+++ b/drivers/char/n_tty.c
@@ -62,7 +62,7 @@
62 62
63static inline unsigned char *alloc_buf(void) 63static inline unsigned char *alloc_buf(void)
64{ 64{
65 unsigned int prio = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL; 65 gfp_t prio = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL;
66 66
67 if (PAGE_SIZE != N_TTY_BUF_SIZE) 67 if (PAGE_SIZE != N_TTY_BUF_SIZE)
68 return kmalloc(N_TTY_BUF_SIZE, prio); 68 return kmalloc(N_TTY_BUF_SIZE, prio);
diff --git a/drivers/ieee1394/eth1394.c b/drivers/ieee1394/eth1394.c
index 4802bbbb6dc9..c9e92d85c893 100644
--- a/drivers/ieee1394/eth1394.c
+++ b/drivers/ieee1394/eth1394.c
@@ -1630,7 +1630,7 @@ static void ether1394_complete_cb(void *__ptask)
1630/* Transmit a packet (called by kernel) */ 1630/* Transmit a packet (called by kernel) */
1631static int ether1394_tx (struct sk_buff *skb, struct net_device *dev) 1631static int ether1394_tx (struct sk_buff *skb, struct net_device *dev)
1632{ 1632{
1633 int kmflags = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL; 1633 gfp_t kmflags = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL;
1634 struct eth1394hdr *eth; 1634 struct eth1394hdr *eth;
1635 struct eth1394_priv *priv = netdev_priv(dev); 1635 struct eth1394_priv *priv = netdev_priv(dev);
1636 int proto; 1636 int proto;
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index f6a8ac026557..378646b5a1b8 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -524,7 +524,7 @@ void mthca_cmd_use_polling(struct mthca_dev *dev)
524} 524}
525 525
526struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev, 526struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev,
527 unsigned int gfp_mask) 527 gfp_t gfp_mask)
528{ 528{
529 struct mthca_mailbox *mailbox; 529 struct mthca_mailbox *mailbox;
530 530
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h
index 65f976a13e02..18175bec84c2 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.h
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.h
@@ -248,7 +248,7 @@ void mthca_cmd_event(struct mthca_dev *dev, u16 token,
248 u8 status, u64 out_param); 248 u8 status, u64 out_param);
249 249
250struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev, 250struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev,
251 unsigned int gfp_mask); 251 gfp_t gfp_mask);
252void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox); 252void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox);
253 253
254int mthca_SYS_EN(struct mthca_dev *dev, u8 *status); 254int mthca_SYS_EN(struct mthca_dev *dev, u8 *status);
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index 7bd7a4bec7b4..9ad8b3b6cfef 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -82,7 +82,7 @@ void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm)
82} 82}
83 83
84struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages, 84struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
85 unsigned int gfp_mask) 85 gfp_t gfp_mask)
86{ 86{
87 struct mthca_icm *icm; 87 struct mthca_icm *icm;
88 struct mthca_icm_chunk *chunk = NULL; 88 struct mthca_icm_chunk *chunk = NULL;
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h
index bafa51544aa3..29433f295253 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.h
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.h
@@ -77,7 +77,7 @@ struct mthca_icm_iter {
77struct mthca_dev; 77struct mthca_dev;
78 78
79struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages, 79struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
80 unsigned int gfp_mask); 80 gfp_t gfp_mask);
81void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm); 81void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm);
82 82
83struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev, 83struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 2fba2bbe72d8..01654fcabc52 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -91,7 +91,7 @@ int bitmap_active(struct bitmap *bitmap)
91 91
92#define WRITE_POOL_SIZE 256 92#define WRITE_POOL_SIZE 256
93/* mempool for queueing pending writes on the bitmap file */ 93/* mempool for queueing pending writes on the bitmap file */
94static void *write_pool_alloc(unsigned int gfp_flags, void *data) 94static void *write_pool_alloc(gfp_t gfp_flags, void *data)
95{ 95{
96 return kmalloc(sizeof(struct page_list), gfp_flags); 96 return kmalloc(sizeof(struct page_list), gfp_flags);
97} 97}
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index b6148f6f7836..28c1a628621f 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -331,7 +331,7 @@ crypt_alloc_buffer(struct crypt_config *cc, unsigned int size,
331{ 331{
332 struct bio *bio; 332 struct bio *bio;
333 unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; 333 unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
334 int gfp_mask = GFP_NOIO | __GFP_HIGHMEM; 334 gfp_t gfp_mask = GFP_NOIO | __GFP_HIGHMEM;
335 unsigned int i; 335 unsigned int i;
336 336
337 /* 337 /*
diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c
index 2e617424d3fb..50f43dbf31ae 100644
--- a/drivers/net/cassini.c
+++ b/drivers/net/cassini.c
@@ -489,7 +489,7 @@ static int cas_page_free(struct cas *cp, cas_page_t *page)
489/* local page allocation routines for the receive buffers. jumbo pages 489/* local page allocation routines for the receive buffers. jumbo pages
490 * require at least 8K contiguous and 8K aligned buffers. 490 * require at least 8K contiguous and 8K aligned buffers.
491 */ 491 */
492static cas_page_t *cas_page_alloc(struct cas *cp, const int flags) 492static cas_page_t *cas_page_alloc(struct cas *cp, const gfp_t flags)
493{ 493{
494 cas_page_t *page; 494 cas_page_t *page;
495 495
@@ -561,7 +561,7 @@ static void cas_spare_free(struct cas *cp)
561} 561}
562 562
563/* replenish spares if needed */ 563/* replenish spares if needed */
564static void cas_spare_recover(struct cas *cp, const int flags) 564static void cas_spare_recover(struct cas *cp, const gfp_t flags)
565{ 565{
566 struct list_head list, *elem, *tmp; 566 struct list_head list, *elem, *tmp;
567 int needed, i; 567 int needed, i;
diff --git a/drivers/net/lance.c b/drivers/net/lance.c
index b4929beb33b2..1d75ca0bb939 100644
--- a/drivers/net/lance.c
+++ b/drivers/net/lance.c
@@ -298,7 +298,7 @@ enum {OLD_LANCE = 0, PCNET_ISA=1, PCNET_ISAP=2, PCNET_PCI=3, PCNET_VLB=4, PCNET_
298static unsigned char lance_need_isa_bounce_buffers = 1; 298static unsigned char lance_need_isa_bounce_buffers = 1;
299 299
300static int lance_open(struct net_device *dev); 300static int lance_open(struct net_device *dev);
301static void lance_init_ring(struct net_device *dev, int mode); 301static void lance_init_ring(struct net_device *dev, gfp_t mode);
302static int lance_start_xmit(struct sk_buff *skb, struct net_device *dev); 302static int lance_start_xmit(struct sk_buff *skb, struct net_device *dev);
303static int lance_rx(struct net_device *dev); 303static int lance_rx(struct net_device *dev);
304static irqreturn_t lance_interrupt(int irq, void *dev_id, struct pt_regs *regs); 304static irqreturn_t lance_interrupt(int irq, void *dev_id, struct pt_regs *regs);
@@ -846,7 +846,7 @@ lance_purge_ring(struct net_device *dev)
846 846
847/* Initialize the LANCE Rx and Tx rings. */ 847/* Initialize the LANCE Rx and Tx rings. */
848static void 848static void
849lance_init_ring(struct net_device *dev, int gfp) 849lance_init_ring(struct net_device *dev, gfp_t gfp)
850{ 850{
851 struct lance_private *lp = dev->priv; 851 struct lance_private *lp = dev->priv;
852 int i; 852 int i;
diff --git a/drivers/net/myri_sbus.c b/drivers/net/myri_sbus.c
index f0996ce5c268..6c86dca62e2a 100644
--- a/drivers/net/myri_sbus.c
+++ b/drivers/net/myri_sbus.c
@@ -277,7 +277,7 @@ static void myri_init_rings(struct myri_eth *mp, int from_irq)
277 struct recvq __iomem *rq = mp->rq; 277 struct recvq __iomem *rq = mp->rq;
278 struct myri_rxd __iomem *rxd = &rq->myri_rxd[0]; 278 struct myri_rxd __iomem *rxd = &rq->myri_rxd[0];
279 struct net_device *dev = mp->dev; 279 struct net_device *dev = mp->dev;
280 int gfp_flags = GFP_KERNEL; 280 gfp_t gfp_flags = GFP_KERNEL;
281 int i; 281 int i;
282 282
283 if (from_irq || in_interrupt()) 283 if (from_irq || in_interrupt())
diff --git a/drivers/net/myri_sbus.h b/drivers/net/myri_sbus.h
index 9391e55a5e92..47722f708a41 100644
--- a/drivers/net/myri_sbus.h
+++ b/drivers/net/myri_sbus.h
@@ -296,7 +296,7 @@ struct myri_eth {
296/* We use this to acquire receive skb's that we can DMA directly into. */ 296/* We use this to acquire receive skb's that we can DMA directly into. */
297#define ALIGNED_RX_SKB_ADDR(addr) \ 297#define ALIGNED_RX_SKB_ADDR(addr) \
298 ((((unsigned long)(addr) + (64 - 1)) & ~(64 - 1)) - (unsigned long)(addr)) 298 ((((unsigned long)(addr) + (64 - 1)) & ~(64 - 1)) - (unsigned long)(addr))
299static inline struct sk_buff *myri_alloc_skb(unsigned int length, int gfp_flags) 299static inline struct sk_buff *myri_alloc_skb(unsigned int length, gfp_t gfp_flags)
300{ 300{
301 struct sk_buff *skb; 301 struct sk_buff *skb;
302 302
diff --git a/drivers/net/sunbmac.c b/drivers/net/sunbmac.c
index f88f5e32b714..cfaf47c63c58 100644
--- a/drivers/net/sunbmac.c
+++ b/drivers/net/sunbmac.c
@@ -214,7 +214,8 @@ static void bigmac_init_rings(struct bigmac *bp, int from_irq)
214{ 214{
215 struct bmac_init_block *bb = bp->bmac_block; 215 struct bmac_init_block *bb = bp->bmac_block;
216 struct net_device *dev = bp->dev; 216 struct net_device *dev = bp->dev;
217 int i, gfp_flags = GFP_KERNEL; 217 int i;
218 gfp_t gfp_flags = GFP_KERNEL;
218 219
219 if (from_irq || in_interrupt()) 220 if (from_irq || in_interrupt())
220 gfp_flags = GFP_ATOMIC; 221 gfp_flags = GFP_ATOMIC;
diff --git a/drivers/net/sunbmac.h b/drivers/net/sunbmac.h
index 5674003fc38a..b0dbc5187143 100644
--- a/drivers/net/sunbmac.h
+++ b/drivers/net/sunbmac.h
@@ -339,7 +339,7 @@ struct bigmac {
339#define ALIGNED_RX_SKB_ADDR(addr) \ 339#define ALIGNED_RX_SKB_ADDR(addr) \
340 ((((unsigned long)(addr) + (64 - 1)) & ~(64 - 1)) - (unsigned long)(addr)) 340 ((((unsigned long)(addr) + (64 - 1)) & ~(64 - 1)) - (unsigned long)(addr))
341 341
342static inline struct sk_buff *big_mac_alloc_skb(unsigned int length, int gfp_flags) 342static inline struct sk_buff *big_mac_alloc_skb(unsigned int length, gfp_t gfp_flags)
343{ 343{
344 struct sk_buff *skb; 344 struct sk_buff *skb;
345 345
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index 0e98a9d9834c..a3bd91a61827 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -836,7 +836,7 @@ ccio_unmap_single(struct device *dev, dma_addr_t iova, size_t size,
836 * This function implements the pci_alloc_consistent function. 836 * This function implements the pci_alloc_consistent function.
837 */ 837 */
838static void * 838static void *
839ccio_alloc_consistent(struct device *dev, size_t size, dma_addr_t *dma_handle, int flag) 839ccio_alloc_consistent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag)
840{ 840{
841 void *ret; 841 void *ret;
842#if 0 842#if 0
diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c
index 82ea68b55df4..bd8b3e5a5cd7 100644
--- a/drivers/parisc/sba_iommu.c
+++ b/drivers/parisc/sba_iommu.c
@@ -986,7 +986,7 @@ sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size,
986 * See Documentation/DMA-mapping.txt 986 * See Documentation/DMA-mapping.txt
987 */ 987 */
988static void *sba_alloc_consistent(struct device *hwdev, size_t size, 988static void *sba_alloc_consistent(struct device *hwdev, size_t size,
989 dma_addr_t *dma_handle, int gfp) 989 dma_addr_t *dma_handle, gfp_t gfp)
990{ 990{
991 void *ret; 991 void *ret;
992 992
diff --git a/drivers/s390/net/fsm.c b/drivers/s390/net/fsm.c
index fa09440d82e5..38f50b7129a2 100644
--- a/drivers/s390/net/fsm.c
+++ b/drivers/s390/net/fsm.c
@@ -16,7 +16,7 @@ MODULE_LICENSE("GPL");
16 16
17fsm_instance * 17fsm_instance *
18init_fsm(char *name, const char **state_names, const char **event_names, int nr_states, 18init_fsm(char *name, const char **state_names, const char **event_names, int nr_states,
19 int nr_events, const fsm_node *tmpl, int tmpl_len, int order) 19 int nr_events, const fsm_node *tmpl, int tmpl_len, gfp_t order)
20{ 20{
21 int i; 21 int i;
22 fsm_instance *this; 22 fsm_instance *this;
diff --git a/drivers/s390/net/fsm.h b/drivers/s390/net/fsm.h
index f9a011001eb6..1b8a7e7c34f3 100644
--- a/drivers/s390/net/fsm.h
+++ b/drivers/s390/net/fsm.h
@@ -110,7 +110,7 @@ extern fsm_instance *
110init_fsm(char *name, const char **state_names, 110init_fsm(char *name, const char **state_names,
111 const char **event_names, 111 const char **event_names,
112 int nr_states, int nr_events, const fsm_node *tmpl, 112 int nr_states, int nr_events, const fsm_node *tmpl,
113 int tmpl_len, int order); 113 int tmpl_len, gfp_t order);
114 114
115/** 115/**
116 * Releases an FSM 116 * Releases an FSM
diff --git a/drivers/scsi/eata.c b/drivers/scsi/eata.c
index c10e45b94b62..3d13fdee4fc2 100644
--- a/drivers/scsi/eata.c
+++ b/drivers/scsi/eata.c
@@ -1357,7 +1357,7 @@ static int port_detect(unsigned long port_base, unsigned int j,
1357 1357
1358 for (i = 0; i < shost->can_queue; i++) { 1358 for (i = 0; i < shost->can_queue; i++) {
1359 size_t sz = shost->sg_tablesize *sizeof(struct sg_list); 1359 size_t sz = shost->sg_tablesize *sizeof(struct sg_list);
1360 unsigned int gfp_mask = (shost->unchecked_isa_dma ? GFP_DMA : 0) | GFP_ATOMIC; 1360 gfp_t gfp_mask = (shost->unchecked_isa_dma ? GFP_DMA : 0) | GFP_ATOMIC;
1361 ha->cp[i].sglist = kmalloc(sz, gfp_mask); 1361 ha->cp[i].sglist = kmalloc(sz, gfp_mask);
1362 if (!ha->cp[i].sglist) { 1362 if (!ha->cp[i].sglist) {
1363 printk 1363 printk
diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 02fe371b0ab8..f24d84538fd5 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -287,7 +287,8 @@ static void scsi_host_dev_release(struct device *dev)
287struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize) 287struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize)
288{ 288{
289 struct Scsi_Host *shost; 289 struct Scsi_Host *shost;
290 int gfp_mask = GFP_KERNEL, rval; 290 gfp_t gfp_mask = GFP_KERNEL;
291 int rval;
291 292
292 if (sht->unchecked_isa_dma && privsize) 293 if (sht->unchecked_isa_dma && privsize)
293 gfp_mask |= __GFP_DMA; 294 gfp_mask |= __GFP_DMA;
diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c
index 0aba13ceaacf..352df47bcaca 100644
--- a/drivers/scsi/lpfc/lpfc_mem.c
+++ b/drivers/scsi/lpfc/lpfc_mem.c
@@ -39,7 +39,7 @@
39#define LPFC_MEM_POOL_SIZE 64 /* max elem in non-DMA safety pool */ 39#define LPFC_MEM_POOL_SIZE 64 /* max elem in non-DMA safety pool */
40 40
41static void * 41static void *
42lpfc_pool_kmalloc(unsigned int gfp_flags, void *data) 42lpfc_pool_kmalloc(gfp_t gfp_flags, void *data)
43{ 43{
44 return kmalloc((unsigned long)data, gfp_flags); 44 return kmalloc((unsigned long)data, gfp_flags);
45} 45}
diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c
index 3f2f2464fa63..af1133104b3f 100644
--- a/drivers/scsi/osst.c
+++ b/drivers/scsi/osst.c
@@ -5146,7 +5146,8 @@ static long osst_compat_ioctl(struct file * file, unsigned int cmd_in, unsigned
5146/* Try to allocate a new tape buffer skeleton. Caller must not hold os_scsi_tapes_lock */ 5146/* Try to allocate a new tape buffer skeleton. Caller must not hold os_scsi_tapes_lock */
5147static struct osst_buffer * new_tape_buffer( int from_initialization, int need_dma, int max_sg ) 5147static struct osst_buffer * new_tape_buffer( int from_initialization, int need_dma, int max_sg )
5148{ 5148{
5149 int i, priority; 5149 int i;
5150 gfp_t priority;
5150 struct osst_buffer *tb; 5151 struct osst_buffer *tb;
5151 5152
5152 if (from_initialization) 5153 if (from_initialization)
@@ -5178,7 +5179,8 @@ static struct osst_buffer * new_tape_buffer( int from_initialization, int need_d
5178/* Try to allocate a temporary (while a user has the device open) enlarged tape buffer */ 5179/* Try to allocate a temporary (while a user has the device open) enlarged tape buffer */
5179static int enlarge_buffer(struct osst_buffer *STbuffer, int need_dma) 5180static int enlarge_buffer(struct osst_buffer *STbuffer, int need_dma)
5180{ 5181{
5181 int segs, nbr, max_segs, b_size, priority, order, got; 5182 int segs, nbr, max_segs, b_size, order, got;
5183 gfp_t priority;
5182 5184
5183 if (STbuffer->buffer_size >= OS_FRAME_SIZE) 5185 if (STbuffer->buffer_size >= OS_FRAME_SIZE)
5184 return 1; 5186 return 1;
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index 1ed32e7b5472..e451941ad81d 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -52,7 +52,7 @@ extern int qla2x00_load_risc(struct scsi_qla_host *, uint32_t *);
52extern int qla24xx_load_risc_flash(scsi_qla_host_t *, uint32_t *); 52extern int qla24xx_load_risc_flash(scsi_qla_host_t *, uint32_t *);
53extern int qla24xx_load_risc_hotplug(scsi_qla_host_t *, uint32_t *); 53extern int qla24xx_load_risc_hotplug(scsi_qla_host_t *, uint32_t *);
54 54
55extern fc_port_t *qla2x00_alloc_fcport(scsi_qla_host_t *, int); 55extern fc_port_t *qla2x00_alloc_fcport(scsi_qla_host_t *, gfp_t);
56 56
57extern int qla2x00_loop_resync(scsi_qla_host_t *); 57extern int qla2x00_loop_resync(scsi_qla_host_t *);
58 58
@@ -277,7 +277,7 @@ extern int qla2x00_fdmi_register(scsi_qla_host_t *);
277/* 277/*
278 * Global Function Prototypes in qla_rscn.c source file. 278 * Global Function Prototypes in qla_rscn.c source file.
279 */ 279 */
280extern fc_port_t *qla2x00_alloc_rscn_fcport(scsi_qla_host_t *, int); 280extern fc_port_t *qla2x00_alloc_rscn_fcport(scsi_qla_host_t *, gfp_t);
281extern int qla2x00_handle_port_rscn(scsi_qla_host_t *, uint32_t, fc_port_t *, 281extern int qla2x00_handle_port_rscn(scsi_qla_host_t *, uint32_t, fc_port_t *,
282 int); 282 int);
283extern void qla2x00_process_iodesc(scsi_qla_host_t *, struct mbx_entry *); 283extern void qla2x00_process_iodesc(scsi_qla_host_t *, struct mbx_entry *);
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 23d095d3817b..fbb6feee40cf 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -1685,7 +1685,7 @@ qla2x00_nvram_config(scsi_qla_host_t *ha)
1685 * Returns a pointer to the allocated fcport, or NULL, if none available. 1685 * Returns a pointer to the allocated fcport, or NULL, if none available.
1686 */ 1686 */
1687fc_port_t * 1687fc_port_t *
1688qla2x00_alloc_fcport(scsi_qla_host_t *ha, int flags) 1688qla2x00_alloc_fcport(scsi_qla_host_t *ha, gfp_t flags)
1689{ 1689{
1690 fc_port_t *fcport; 1690 fc_port_t *fcport;
1691 1691
diff --git a/drivers/scsi/qla2xxx/qla_rscn.c b/drivers/scsi/qla2xxx/qla_rscn.c
index 1eba98828636..7534efcc8918 100644
--- a/drivers/scsi/qla2xxx/qla_rscn.c
+++ b/drivers/scsi/qla2xxx/qla_rscn.c
@@ -1066,7 +1066,7 @@ qla2x00_send_login_iocb_cb(scsi_qla_host_t *ha, struct io_descriptor *iodesc,
1066 * Returns a pointer to the allocated RSCN fcport, or NULL, if none available. 1066 * Returns a pointer to the allocated RSCN fcport, or NULL, if none available.
1067 */ 1067 */
1068fc_port_t * 1068fc_port_t *
1069qla2x00_alloc_rscn_fcport(scsi_qla_host_t *ha, int flags) 1069qla2x00_alloc_rscn_fcport(scsi_qla_host_t *ha, gfp_t flags)
1070{ 1070{
1071 fc_port_t *fcport; 1071 fc_port_t *fcport;
1072 1072
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 1f0ebabf6d47..a5711d545d71 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -130,7 +130,7 @@ EXPORT_SYMBOL(scsi_device_types);
130 * Returns: Pointer to request block. 130 * Returns: Pointer to request block.
131 */ 131 */
132struct scsi_request *scsi_allocate_request(struct scsi_device *sdev, 132struct scsi_request *scsi_allocate_request(struct scsi_device *sdev,
133 int gfp_mask) 133 gfp_t gfp_mask)
134{ 134{
135 const int offset = ALIGN(sizeof(struct scsi_request), 4); 135 const int offset = ALIGN(sizeof(struct scsi_request), 4);
136 const int size = offset + sizeof(struct request); 136 const int size = offset + sizeof(struct request);
@@ -196,7 +196,7 @@ struct scsi_host_cmd_pool {
196 unsigned int users; 196 unsigned int users;
197 char *name; 197 char *name;
198 unsigned int slab_flags; 198 unsigned int slab_flags;
199 unsigned int gfp_mask; 199 gfp_t gfp_mask;
200}; 200};
201 201
202static struct scsi_host_cmd_pool scsi_cmd_pool = { 202static struct scsi_host_cmd_pool scsi_cmd_pool = {
@@ -213,7 +213,7 @@ static struct scsi_host_cmd_pool scsi_cmd_dma_pool = {
213static DECLARE_MUTEX(host_cmd_pool_mutex); 213static DECLARE_MUTEX(host_cmd_pool_mutex);
214 214
215static struct scsi_cmnd *__scsi_get_command(struct Scsi_Host *shost, 215static struct scsi_cmnd *__scsi_get_command(struct Scsi_Host *shost,
216 int gfp_mask) 216 gfp_t gfp_mask)
217{ 217{
218 struct scsi_cmnd *cmd; 218 struct scsi_cmnd *cmd;
219 219
@@ -245,7 +245,7 @@ static struct scsi_cmnd *__scsi_get_command(struct Scsi_Host *shost,
245 * 245 *
246 * Returns: The allocated scsi command structure. 246 * Returns: The allocated scsi command structure.
247 */ 247 */
248struct scsi_cmnd *scsi_get_command(struct scsi_device *dev, int gfp_mask) 248struct scsi_cmnd *scsi_get_command(struct scsi_device *dev, gfp_t gfp_mask)
249{ 249{
250 struct scsi_cmnd *cmd; 250 struct scsi_cmnd *cmd;
251 251
diff --git a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c
index de7f98cc38fe..6a3f6aae8a97 100644
--- a/drivers/scsi/scsi_ioctl.c
+++ b/drivers/scsi/scsi_ioctl.c
@@ -205,7 +205,8 @@ int scsi_ioctl_send_command(struct scsi_device *sdev,
205 unsigned int inlen, outlen, cmdlen; 205 unsigned int inlen, outlen, cmdlen;
206 unsigned int needed, buf_needed; 206 unsigned int needed, buf_needed;
207 int timeout, retries, result; 207 int timeout, retries, result;
208 int data_direction, gfp_mask = GFP_KERNEL; 208 int data_direction;
209 gfp_t gfp_mask = GFP_KERNEL;
209 210
210 if (!sic) 211 if (!sic)
211 return -EINVAL; 212 return -EINVAL;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 0074f28c37b2..3ff538809786 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -677,7 +677,7 @@ static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int uptodate,
677 return NULL; 677 return NULL;
678} 678}
679 679
680static struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, int gfp_mask) 680static struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask)
681{ 681{
682 struct scsi_host_sg_pool *sgp; 682 struct scsi_host_sg_pool *sgp;
683 struct scatterlist *sgl; 683 struct scatterlist *sgl;
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index ad94367df430..fd56b7ec88b6 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -2644,7 +2644,7 @@ static char *
2644sg_page_malloc(int rqSz, int lowDma, int *retSzp) 2644sg_page_malloc(int rqSz, int lowDma, int *retSzp)
2645{ 2645{
2646 char *resp = NULL; 2646 char *resp = NULL;
2647 int page_mask; 2647 gfp_t page_mask;
2648 int order, a_size; 2648 int order, a_size;
2649 int resSz = rqSz; 2649 int resSz = rqSz;
2650 2650
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index d001c046551b..927d700f0073 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -3577,7 +3577,8 @@ static long st_compat_ioctl(struct file *file, unsigned int cmd, unsigned long a
3577static struct st_buffer * 3577static struct st_buffer *
3578 new_tape_buffer(int from_initialization, int need_dma, int max_sg) 3578 new_tape_buffer(int from_initialization, int need_dma, int max_sg)
3579{ 3579{
3580 int i, priority, got = 0, segs = 0; 3580 int i, got = 0, segs = 0;
3581 gfp_t priority;
3581 struct st_buffer *tb; 3582 struct st_buffer *tb;
3582 3583
3583 if (from_initialization) 3584 if (from_initialization)
@@ -3610,7 +3611,8 @@ static struct st_buffer *
3610/* Try to allocate enough space in the tape buffer */ 3611/* Try to allocate enough space in the tape buffer */
3611static int enlarge_buffer(struct st_buffer * STbuffer, int new_size, int need_dma) 3612static int enlarge_buffer(struct st_buffer * STbuffer, int new_size, int need_dma)
3612{ 3613{
3613 int segs, nbr, max_segs, b_size, priority, order, got; 3614 int segs, nbr, max_segs, b_size, order, got;
3615 gfp_t priority;
3614 3616
3615 if (new_size <= STbuffer->buffer_size) 3617 if (new_size <= STbuffer->buffer_size)
3616 return 1; 3618 return 1;
diff --git a/drivers/usb/core/buffer.c b/drivers/usb/core/buffer.c
index fc15b4acc8af..57e800ac3cee 100644
--- a/drivers/usb/core/buffer.c
+++ b/drivers/usb/core/buffer.c
@@ -106,7 +106,7 @@ void hcd_buffer_destroy (struct usb_hcd *hcd)
106void *hcd_buffer_alloc ( 106void *hcd_buffer_alloc (
107 struct usb_bus *bus, 107 struct usb_bus *bus,
108 size_t size, 108 size_t size,
109 unsigned mem_flags, 109 gfp_t mem_flags,
110 dma_addr_t *dma 110 dma_addr_t *dma
111) 111)
112{ 112{
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 1017a97a418b..ff19d64041b5 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1112,7 +1112,7 @@ static void urb_unlink (struct urb *urb)
1112 * expects usb_submit_urb() to have sanity checked and conditioned all 1112 * expects usb_submit_urb() to have sanity checked and conditioned all
1113 * inputs in the urb 1113 * inputs in the urb
1114 */ 1114 */
1115static int hcd_submit_urb (struct urb *urb, unsigned mem_flags) 1115static int hcd_submit_urb (struct urb *urb, gfp_t mem_flags)
1116{ 1116{
1117 int status; 1117 int status;
1118 struct usb_hcd *hcd = urb->dev->bus->hcpriv; 1118 struct usb_hcd *hcd = urb->dev->bus->hcpriv;
diff --git a/drivers/usb/core/hcd.h b/drivers/usb/core/hcd.h
index ac451fa7e4d2..1f1ed6211af8 100644
--- a/drivers/usb/core/hcd.h
+++ b/drivers/usb/core/hcd.h
@@ -142,12 +142,12 @@ struct hcd_timeout { /* timeouts we allocate */
142 142
143struct usb_operations { 143struct usb_operations {
144 int (*get_frame_number) (struct usb_device *usb_dev); 144 int (*get_frame_number) (struct usb_device *usb_dev);
145 int (*submit_urb) (struct urb *urb, unsigned mem_flags); 145 int (*submit_urb) (struct urb *urb, gfp_t mem_flags);
146 int (*unlink_urb) (struct urb *urb, int status); 146 int (*unlink_urb) (struct urb *urb, int status);
147 147
148 /* allocate dma-consistent buffer for URB_DMA_NOMAPPING */ 148 /* allocate dma-consistent buffer for URB_DMA_NOMAPPING */
149 void *(*buffer_alloc)(struct usb_bus *bus, size_t size, 149 void *(*buffer_alloc)(struct usb_bus *bus, size_t size,
150 unsigned mem_flags, 150 gfp_t mem_flags,
151 dma_addr_t *dma); 151 dma_addr_t *dma);
152 void (*buffer_free)(struct usb_bus *bus, size_t size, 152 void (*buffer_free)(struct usb_bus *bus, size_t size,
153 void *addr, dma_addr_t dma); 153 void *addr, dma_addr_t dma);
@@ -200,7 +200,7 @@ struct hc_driver {
200 int (*urb_enqueue) (struct usb_hcd *hcd, 200 int (*urb_enqueue) (struct usb_hcd *hcd,
201 struct usb_host_endpoint *ep, 201 struct usb_host_endpoint *ep,
202 struct urb *urb, 202 struct urb *urb,
203 unsigned mem_flags); 203 gfp_t mem_flags);
204 int (*urb_dequeue) (struct usb_hcd *hcd, struct urb *urb); 204 int (*urb_dequeue) (struct usb_hcd *hcd, struct urb *urb);
205 205
206 /* hw synch, freeing endpoint resources that urb_dequeue can't */ 206 /* hw synch, freeing endpoint resources that urb_dequeue can't */
@@ -247,7 +247,7 @@ int hcd_buffer_create (struct usb_hcd *hcd);
247void hcd_buffer_destroy (struct usb_hcd *hcd); 247void hcd_buffer_destroy (struct usb_hcd *hcd);
248 248
249void *hcd_buffer_alloc (struct usb_bus *bus, size_t size, 249void *hcd_buffer_alloc (struct usb_bus *bus, size_t size,
250 unsigned mem_flags, dma_addr_t *dma); 250 gfp_t mem_flags, dma_addr_t *dma);
251void hcd_buffer_free (struct usb_bus *bus, size_t size, 251void hcd_buffer_free (struct usb_bus *bus, size_t size,
252 void *addr, dma_addr_t dma); 252 void *addr, dma_addr_t dma);
253 253
diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index f1fb67fe22a8..f9a81e84dbdf 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -321,7 +321,7 @@ int usb_sg_init (
321 struct scatterlist *sg, 321 struct scatterlist *sg,
322 int nents, 322 int nents,
323 size_t length, 323 size_t length,
324 unsigned mem_flags 324 gfp_t mem_flags
325) 325)
326{ 326{
327 int i; 327 int i;
diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c
index c846fefb7386..b32898e0a27d 100644
--- a/drivers/usb/core/urb.c
+++ b/drivers/usb/core/urb.c
@@ -60,7 +60,7 @@ void usb_init_urb(struct urb *urb)
60 * 60 *
61 * The driver must call usb_free_urb() when it is finished with the urb. 61 * The driver must call usb_free_urb() when it is finished with the urb.
62 */ 62 */
63struct urb *usb_alloc_urb(int iso_packets, unsigned mem_flags) 63struct urb *usb_alloc_urb(int iso_packets, gfp_t mem_flags)
64{ 64{
65 struct urb *urb; 65 struct urb *urb;
66 66
@@ -224,7 +224,7 @@ struct urb * usb_get_urb(struct urb *urb)
224 * GFP_NOIO, unless b) or c) apply 224 * GFP_NOIO, unless b) or c) apply
225 * 225 *
226 */ 226 */
227int usb_submit_urb(struct urb *urb, unsigned mem_flags) 227int usb_submit_urb(struct urb *urb, gfp_t mem_flags)
228{ 228{
229 int pipe, temp, max; 229 int pipe, temp, max;
230 struct usb_device *dev; 230 struct usb_device *dev;
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 7d131509e419..4c57f3f649ed 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -1147,7 +1147,7 @@ int __usb_get_extra_descriptor(char *buffer, unsigned size,
1147void *usb_buffer_alloc ( 1147void *usb_buffer_alloc (
1148 struct usb_device *dev, 1148 struct usb_device *dev,
1149 size_t size, 1149 size_t size,
1150 unsigned mem_flags, 1150 gfp_t mem_flags,
1151 dma_addr_t *dma 1151 dma_addr_t *dma
1152) 1152)
1153{ 1153{
diff --git a/drivers/usb/gadget/dummy_hcd.c b/drivers/usb/gadget/dummy_hcd.c
index 583db7c38cf1..8d9d8ee89554 100644
--- a/drivers/usb/gadget/dummy_hcd.c
+++ b/drivers/usb/gadget/dummy_hcd.c
@@ -470,7 +470,7 @@ static int dummy_disable (struct usb_ep *_ep)
470} 470}
471 471
472static struct usb_request * 472static struct usb_request *
473dummy_alloc_request (struct usb_ep *_ep, unsigned mem_flags) 473dummy_alloc_request (struct usb_ep *_ep, gfp_t mem_flags)
474{ 474{
475 struct dummy_ep *ep; 475 struct dummy_ep *ep;
476 struct dummy_request *req; 476 struct dummy_request *req;
@@ -507,7 +507,7 @@ dummy_alloc_buffer (
507 struct usb_ep *_ep, 507 struct usb_ep *_ep,
508 unsigned bytes, 508 unsigned bytes,
509 dma_addr_t *dma, 509 dma_addr_t *dma,
510 unsigned mem_flags 510 gfp_t mem_flags
511) { 511) {
512 char *retval; 512 char *retval;
513 struct dummy_ep *ep; 513 struct dummy_ep *ep;
@@ -541,7 +541,7 @@ fifo_complete (struct usb_ep *ep, struct usb_request *req)
541 541
542static int 542static int
543dummy_queue (struct usb_ep *_ep, struct usb_request *_req, 543dummy_queue (struct usb_ep *_ep, struct usb_request *_req,
544 unsigned mem_flags) 544 gfp_t mem_flags)
545{ 545{
546 struct dummy_ep *ep; 546 struct dummy_ep *ep;
547 struct dummy_request *req; 547 struct dummy_request *req;
@@ -999,7 +999,7 @@ static int dummy_urb_enqueue (
999 struct usb_hcd *hcd, 999 struct usb_hcd *hcd,
1000 struct usb_host_endpoint *ep, 1000 struct usb_host_endpoint *ep,
1001 struct urb *urb, 1001 struct urb *urb,
1002 unsigned mem_flags 1002 gfp_t mem_flags
1003) { 1003) {
1004 struct dummy *dum; 1004 struct dummy *dum;
1005 struct urbp *urbp; 1005 struct urbp *urbp;
diff --git a/drivers/usb/gadget/ether.c b/drivers/usb/gadget/ether.c
index 49459e33e952..f1024e804d5c 100644
--- a/drivers/usb/gadget/ether.c
+++ b/drivers/usb/gadget/ether.c
@@ -945,11 +945,11 @@ config_buf (enum usb_device_speed speed,
945 945
946/*-------------------------------------------------------------------------*/ 946/*-------------------------------------------------------------------------*/
947 947
948static void eth_start (struct eth_dev *dev, unsigned gfp_flags); 948static void eth_start (struct eth_dev *dev, gfp_t gfp_flags);
949static int alloc_requests (struct eth_dev *dev, unsigned n, unsigned gfp_flags); 949static int alloc_requests (struct eth_dev *dev, unsigned n, gfp_t gfp_flags);
950 950
951static int 951static int
952set_ether_config (struct eth_dev *dev, unsigned gfp_flags) 952set_ether_config (struct eth_dev *dev, gfp_t gfp_flags)
953{ 953{
954 int result = 0; 954 int result = 0;
955 struct usb_gadget *gadget = dev->gadget; 955 struct usb_gadget *gadget = dev->gadget;
@@ -1081,7 +1081,7 @@ static void eth_reset_config (struct eth_dev *dev)
1081 * that returns config descriptors, and altsetting code. 1081 * that returns config descriptors, and altsetting code.
1082 */ 1082 */
1083static int 1083static int
1084eth_set_config (struct eth_dev *dev, unsigned number, unsigned gfp_flags) 1084eth_set_config (struct eth_dev *dev, unsigned number, gfp_t gfp_flags)
1085{ 1085{
1086 int result = 0; 1086 int result = 0;
1087 struct usb_gadget *gadget = dev->gadget; 1087 struct usb_gadget *gadget = dev->gadget;
@@ -1598,7 +1598,7 @@ static void defer_kevent (struct eth_dev *dev, int flag)
1598static void rx_complete (struct usb_ep *ep, struct usb_request *req); 1598static void rx_complete (struct usb_ep *ep, struct usb_request *req);
1599 1599
1600static int 1600static int
1601rx_submit (struct eth_dev *dev, struct usb_request *req, unsigned gfp_flags) 1601rx_submit (struct eth_dev *dev, struct usb_request *req, gfp_t gfp_flags)
1602{ 1602{
1603 struct sk_buff *skb; 1603 struct sk_buff *skb;
1604 int retval = -ENOMEM; 1604 int retval = -ENOMEM;
@@ -1724,7 +1724,7 @@ clean:
1724} 1724}
1725 1725
1726static int prealloc (struct list_head *list, struct usb_ep *ep, 1726static int prealloc (struct list_head *list, struct usb_ep *ep,
1727 unsigned n, unsigned gfp_flags) 1727 unsigned n, gfp_t gfp_flags)
1728{ 1728{
1729 unsigned i; 1729 unsigned i;
1730 struct usb_request *req; 1730 struct usb_request *req;
@@ -1763,7 +1763,7 @@ extra:
1763 return 0; 1763 return 0;
1764} 1764}
1765 1765
1766static int alloc_requests (struct eth_dev *dev, unsigned n, unsigned gfp_flags) 1766static int alloc_requests (struct eth_dev *dev, unsigned n, gfp_t gfp_flags)
1767{ 1767{
1768 int status; 1768 int status;
1769 1769
@@ -1779,7 +1779,7 @@ fail:
1779 return status; 1779 return status;
1780} 1780}
1781 1781
1782static void rx_fill (struct eth_dev *dev, unsigned gfp_flags) 1782static void rx_fill (struct eth_dev *dev, gfp_t gfp_flags)
1783{ 1783{
1784 struct usb_request *req; 1784 struct usb_request *req;
1785 unsigned long flags; 1785 unsigned long flags;
@@ -1962,7 +1962,7 @@ drop:
1962 * normally just one notification will be queued. 1962 * normally just one notification will be queued.
1963 */ 1963 */
1964 1964
1965static struct usb_request *eth_req_alloc (struct usb_ep *, unsigned, unsigned); 1965static struct usb_request *eth_req_alloc (struct usb_ep *, unsigned, gfp_t);
1966static void eth_req_free (struct usb_ep *ep, struct usb_request *req); 1966static void eth_req_free (struct usb_ep *ep, struct usb_request *req);
1967 1967
1968static void 1968static void
@@ -2024,7 +2024,7 @@ static int rndis_control_ack (struct net_device *net)
2024 2024
2025#endif /* RNDIS */ 2025#endif /* RNDIS */
2026 2026
2027static void eth_start (struct eth_dev *dev, unsigned gfp_flags) 2027static void eth_start (struct eth_dev *dev, gfp_t gfp_flags)
2028{ 2028{
2029 DEBUG (dev, "%s\n", __FUNCTION__); 2029 DEBUG (dev, "%s\n", __FUNCTION__);
2030 2030
@@ -2092,7 +2092,7 @@ static int eth_stop (struct net_device *net)
2092/*-------------------------------------------------------------------------*/ 2092/*-------------------------------------------------------------------------*/
2093 2093
2094static struct usb_request * 2094static struct usb_request *
2095eth_req_alloc (struct usb_ep *ep, unsigned size, unsigned gfp_flags) 2095eth_req_alloc (struct usb_ep *ep, unsigned size, gfp_t gfp_flags)
2096{ 2096{
2097 struct usb_request *req; 2097 struct usb_request *req;
2098 2098
diff --git a/drivers/usb/gadget/goku_udc.c b/drivers/usb/gadget/goku_udc.c
index eaab26f4ed37..b0f3cd63e3b9 100644
--- a/drivers/usb/gadget/goku_udc.c
+++ b/drivers/usb/gadget/goku_udc.c
@@ -269,7 +269,7 @@ static int goku_ep_disable(struct usb_ep *_ep)
269/*-------------------------------------------------------------------------*/ 269/*-------------------------------------------------------------------------*/
270 270
271static struct usb_request * 271static struct usb_request *
272goku_alloc_request(struct usb_ep *_ep, unsigned gfp_flags) 272goku_alloc_request(struct usb_ep *_ep, gfp_t gfp_flags)
273{ 273{
274 struct goku_request *req; 274 struct goku_request *req;
275 275
@@ -327,7 +327,7 @@ goku_free_request(struct usb_ep *_ep, struct usb_request *_req)
327 */ 327 */
328static void * 328static void *
329goku_alloc_buffer(struct usb_ep *_ep, unsigned bytes, 329goku_alloc_buffer(struct usb_ep *_ep, unsigned bytes,
330 dma_addr_t *dma, unsigned gfp_flags) 330 dma_addr_t *dma, gfp_t gfp_flags)
331{ 331{
332 void *retval; 332 void *retval;
333 struct goku_ep *ep; 333 struct goku_ep *ep;
@@ -789,7 +789,7 @@ finished:
789/*-------------------------------------------------------------------------*/ 789/*-------------------------------------------------------------------------*/
790 790
791static int 791static int
792goku_queue(struct usb_ep *_ep, struct usb_request *_req, unsigned gfp_flags) 792goku_queue(struct usb_ep *_ep, struct usb_request *_req, gfp_t gfp_flags)
793{ 793{
794 struct goku_request *req; 794 struct goku_request *req;
795 struct goku_ep *ep; 795 struct goku_ep *ep;
diff --git a/drivers/usb/gadget/lh7a40x_udc.c b/drivers/usb/gadget/lh7a40x_udc.c
index 4842577789c9..012d1e5f1524 100644
--- a/drivers/usb/gadget/lh7a40x_udc.c
+++ b/drivers/usb/gadget/lh7a40x_udc.c
@@ -71,13 +71,13 @@ static char *state_names[] = {
71static int lh7a40x_ep_enable(struct usb_ep *ep, 71static int lh7a40x_ep_enable(struct usb_ep *ep,
72 const struct usb_endpoint_descriptor *); 72 const struct usb_endpoint_descriptor *);
73static int lh7a40x_ep_disable(struct usb_ep *ep); 73static int lh7a40x_ep_disable(struct usb_ep *ep);
74static struct usb_request *lh7a40x_alloc_request(struct usb_ep *ep, int); 74static struct usb_request *lh7a40x_alloc_request(struct usb_ep *ep, gfp_t);
75static void lh7a40x_free_request(struct usb_ep *ep, struct usb_request *); 75static void lh7a40x_free_request(struct usb_ep *ep, struct usb_request *);
76static void *lh7a40x_alloc_buffer(struct usb_ep *ep, unsigned, dma_addr_t *, 76static void *lh7a40x_alloc_buffer(struct usb_ep *ep, unsigned, dma_addr_t *,
77 int); 77 gfp_t);
78static void lh7a40x_free_buffer(struct usb_ep *ep, void *, dma_addr_t, 78static void lh7a40x_free_buffer(struct usb_ep *ep, void *, dma_addr_t,
79 unsigned); 79 unsigned);
80static int lh7a40x_queue(struct usb_ep *ep, struct usb_request *, int); 80static int lh7a40x_queue(struct usb_ep *ep, struct usb_request *, gfp_t);
81static int lh7a40x_dequeue(struct usb_ep *ep, struct usb_request *); 81static int lh7a40x_dequeue(struct usb_ep *ep, struct usb_request *);
82static int lh7a40x_set_halt(struct usb_ep *ep, int); 82static int lh7a40x_set_halt(struct usb_ep *ep, int);
83static int lh7a40x_fifo_status(struct usb_ep *ep); 83static int lh7a40x_fifo_status(struct usb_ep *ep);
@@ -1106,7 +1106,7 @@ static int lh7a40x_ep_disable(struct usb_ep *_ep)
1106} 1106}
1107 1107
1108static struct usb_request *lh7a40x_alloc_request(struct usb_ep *ep, 1108static struct usb_request *lh7a40x_alloc_request(struct usb_ep *ep,
1109 unsigned gfp_flags) 1109 gfp_t gfp_flags)
1110{ 1110{
1111 struct lh7a40x_request *req; 1111 struct lh7a40x_request *req;
1112 1112
@@ -1134,7 +1134,7 @@ static void lh7a40x_free_request(struct usb_ep *ep, struct usb_request *_req)
1134} 1134}
1135 1135
1136static void *lh7a40x_alloc_buffer(struct usb_ep *ep, unsigned bytes, 1136static void *lh7a40x_alloc_buffer(struct usb_ep *ep, unsigned bytes,
1137 dma_addr_t * dma, unsigned gfp_flags) 1137 dma_addr_t * dma, gfp_t gfp_flags)
1138{ 1138{
1139 char *retval; 1139 char *retval;
1140 1140
@@ -1158,7 +1158,7 @@ static void lh7a40x_free_buffer(struct usb_ep *ep, void *buf, dma_addr_t dma,
1158 * NOTE: Sets INDEX register 1158 * NOTE: Sets INDEX register
1159 */ 1159 */
1160static int lh7a40x_queue(struct usb_ep *_ep, struct usb_request *_req, 1160static int lh7a40x_queue(struct usb_ep *_ep, struct usb_request *_req,
1161 unsigned gfp_flags) 1161 gfp_t gfp_flags)
1162{ 1162{
1163 struct lh7a40x_request *req; 1163 struct lh7a40x_request *req;
1164 struct lh7a40x_ep *ep; 1164 struct lh7a40x_ep *ep;
diff --git a/drivers/usb/gadget/net2280.c b/drivers/usb/gadget/net2280.c
index 477fab2e74d1..c32e1f7476da 100644
--- a/drivers/usb/gadget/net2280.c
+++ b/drivers/usb/gadget/net2280.c
@@ -376,7 +376,7 @@ static int net2280_disable (struct usb_ep *_ep)
376/*-------------------------------------------------------------------------*/ 376/*-------------------------------------------------------------------------*/
377 377
378static struct usb_request * 378static struct usb_request *
379net2280_alloc_request (struct usb_ep *_ep, unsigned gfp_flags) 379net2280_alloc_request (struct usb_ep *_ep, gfp_t gfp_flags)
380{ 380{
381 struct net2280_ep *ep; 381 struct net2280_ep *ep;
382 struct net2280_request *req; 382 struct net2280_request *req;
@@ -463,7 +463,7 @@ net2280_alloc_buffer (
463 struct usb_ep *_ep, 463 struct usb_ep *_ep,
464 unsigned bytes, 464 unsigned bytes,
465 dma_addr_t *dma, 465 dma_addr_t *dma,
466 unsigned gfp_flags 466 gfp_t gfp_flags
467) 467)
468{ 468{
469 void *retval; 469 void *retval;
@@ -897,7 +897,7 @@ done (struct net2280_ep *ep, struct net2280_request *req, int status)
897/*-------------------------------------------------------------------------*/ 897/*-------------------------------------------------------------------------*/
898 898
899static int 899static int
900net2280_queue (struct usb_ep *_ep, struct usb_request *_req, unsigned gfp_flags) 900net2280_queue (struct usb_ep *_ep, struct usb_request *_req, gfp_t gfp_flags)
901{ 901{
902 struct net2280_request *req; 902 struct net2280_request *req;
903 struct net2280_ep *ep; 903 struct net2280_ep *ep;
diff --git a/drivers/usb/gadget/omap_udc.c b/drivers/usb/gadget/omap_udc.c
index ff5533e69560..287c5900fb13 100644
--- a/drivers/usb/gadget/omap_udc.c
+++ b/drivers/usb/gadget/omap_udc.c
@@ -269,7 +269,7 @@ static int omap_ep_disable(struct usb_ep *_ep)
269/*-------------------------------------------------------------------------*/ 269/*-------------------------------------------------------------------------*/
270 270
271static struct usb_request * 271static struct usb_request *
272omap_alloc_request(struct usb_ep *ep, unsigned gfp_flags) 272omap_alloc_request(struct usb_ep *ep, gfp_t gfp_flags)
273{ 273{
274 struct omap_req *req; 274 struct omap_req *req;
275 275
@@ -298,7 +298,7 @@ omap_alloc_buffer(
298 struct usb_ep *_ep, 298 struct usb_ep *_ep,
299 unsigned bytes, 299 unsigned bytes,
300 dma_addr_t *dma, 300 dma_addr_t *dma,
301 unsigned gfp_flags 301 gfp_t gfp_flags
302) 302)
303{ 303{
304 void *retval; 304 void *retval;
@@ -937,7 +937,7 @@ static void dma_channel_release(struct omap_ep *ep)
937/*-------------------------------------------------------------------------*/ 937/*-------------------------------------------------------------------------*/
938 938
939static int 939static int
940omap_ep_queue(struct usb_ep *_ep, struct usb_request *_req, unsigned gfp_flags) 940omap_ep_queue(struct usb_ep *_ep, struct usb_request *_req, gfp_t gfp_flags)
941{ 941{
942 struct omap_ep *ep = container_of(_ep, struct omap_ep, ep); 942 struct omap_ep *ep = container_of(_ep, struct omap_ep, ep);
943 struct omap_req *req = container_of(_req, struct omap_req, req); 943 struct omap_req *req = container_of(_req, struct omap_req, req);
diff --git a/drivers/usb/gadget/pxa2xx_udc.c b/drivers/usb/gadget/pxa2xx_udc.c
index 73f8c9404156..6e545393cfff 100644
--- a/drivers/usb/gadget/pxa2xx_udc.c
+++ b/drivers/usb/gadget/pxa2xx_udc.c
@@ -332,7 +332,7 @@ static int pxa2xx_ep_disable (struct usb_ep *_ep)
332 * pxa2xx_ep_alloc_request - allocate a request data structure 332 * pxa2xx_ep_alloc_request - allocate a request data structure
333 */ 333 */
334static struct usb_request * 334static struct usb_request *
335pxa2xx_ep_alloc_request (struct usb_ep *_ep, unsigned gfp_flags) 335pxa2xx_ep_alloc_request (struct usb_ep *_ep, gfp_t gfp_flags)
336{ 336{
337 struct pxa2xx_request *req; 337 struct pxa2xx_request *req;
338 338
@@ -367,7 +367,7 @@ pxa2xx_ep_free_request (struct usb_ep *_ep, struct usb_request *_req)
367 */ 367 */
368static void * 368static void *
369pxa2xx_ep_alloc_buffer(struct usb_ep *_ep, unsigned bytes, 369pxa2xx_ep_alloc_buffer(struct usb_ep *_ep, unsigned bytes,
370 dma_addr_t *dma, unsigned gfp_flags) 370 dma_addr_t *dma, gfp_t gfp_flags)
371{ 371{
372 char *retval; 372 char *retval;
373 373
@@ -874,7 +874,7 @@ done:
874/*-------------------------------------------------------------------------*/ 874/*-------------------------------------------------------------------------*/
875 875
876static int 876static int
877pxa2xx_ep_queue(struct usb_ep *_ep, struct usb_request *_req, unsigned gfp_flags) 877pxa2xx_ep_queue(struct usb_ep *_ep, struct usb_request *_req, gfp_t gfp_flags)
878{ 878{
879 struct pxa2xx_request *req; 879 struct pxa2xx_request *req;
880 struct pxa2xx_ep *ep; 880 struct pxa2xx_ep *ep;
diff --git a/drivers/usb/gadget/serial.c b/drivers/usb/gadget/serial.c
index c925d9222f53..b35ac6d334f8 100644
--- a/drivers/usb/gadget/serial.c
+++ b/drivers/usb/gadget/serial.c
@@ -300,18 +300,18 @@ static int gs_build_config_buf(u8 *buf, enum usb_device_speed speed,
300 u8 type, unsigned int index, int is_otg); 300 u8 type, unsigned int index, int is_otg);
301 301
302static struct usb_request *gs_alloc_req(struct usb_ep *ep, unsigned int len, 302static struct usb_request *gs_alloc_req(struct usb_ep *ep, unsigned int len,
303 unsigned kmalloc_flags); 303 gfp_t kmalloc_flags);
304static void gs_free_req(struct usb_ep *ep, struct usb_request *req); 304static void gs_free_req(struct usb_ep *ep, struct usb_request *req);
305 305
306static struct gs_req_entry *gs_alloc_req_entry(struct usb_ep *ep, unsigned len, 306static struct gs_req_entry *gs_alloc_req_entry(struct usb_ep *ep, unsigned len,
307 unsigned kmalloc_flags); 307 gfp_t kmalloc_flags);
308static void gs_free_req_entry(struct usb_ep *ep, struct gs_req_entry *req); 308static void gs_free_req_entry(struct usb_ep *ep, struct gs_req_entry *req);
309 309
310static int gs_alloc_ports(struct gs_dev *dev, unsigned kmalloc_flags); 310static int gs_alloc_ports(struct gs_dev *dev, gfp_t kmalloc_flags);
311static void gs_free_ports(struct gs_dev *dev); 311static void gs_free_ports(struct gs_dev *dev);
312 312
313/* circular buffer */ 313/* circular buffer */
314static struct gs_buf *gs_buf_alloc(unsigned int size, unsigned kmalloc_flags); 314static struct gs_buf *gs_buf_alloc(unsigned int size, gfp_t kmalloc_flags);
315static void gs_buf_free(struct gs_buf *gb); 315static void gs_buf_free(struct gs_buf *gb);
316static void gs_buf_clear(struct gs_buf *gb); 316static void gs_buf_clear(struct gs_buf *gb);
317static unsigned int gs_buf_data_avail(struct gs_buf *gb); 317static unsigned int gs_buf_data_avail(struct gs_buf *gb);
@@ -2091,7 +2091,7 @@ static int gs_build_config_buf(u8 *buf, enum usb_device_speed speed,
2091 * usb_request or NULL if there is an error. 2091 * usb_request or NULL if there is an error.
2092 */ 2092 */
2093static struct usb_request * 2093static struct usb_request *
2094gs_alloc_req(struct usb_ep *ep, unsigned int len, unsigned kmalloc_flags) 2094gs_alloc_req(struct usb_ep *ep, unsigned int len, gfp_t kmalloc_flags)
2095{ 2095{
2096 struct usb_request *req; 2096 struct usb_request *req;
2097 2097
@@ -2132,7 +2132,7 @@ static void gs_free_req(struct usb_ep *ep, struct usb_request *req)
2132 * endpoint, buffer len, and kmalloc flags. 2132 * endpoint, buffer len, and kmalloc flags.
2133 */ 2133 */
2134static struct gs_req_entry * 2134static struct gs_req_entry *
2135gs_alloc_req_entry(struct usb_ep *ep, unsigned len, unsigned kmalloc_flags) 2135gs_alloc_req_entry(struct usb_ep *ep, unsigned len, gfp_t kmalloc_flags)
2136{ 2136{
2137 struct gs_req_entry *req; 2137 struct gs_req_entry *req;
2138 2138
@@ -2173,7 +2173,7 @@ static void gs_free_req_entry(struct usb_ep *ep, struct gs_req_entry *req)
2173 * 2173 *
2174 * The device lock is normally held when calling this function. 2174 * The device lock is normally held when calling this function.
2175 */ 2175 */
2176static int gs_alloc_ports(struct gs_dev *dev, unsigned kmalloc_flags) 2176static int gs_alloc_ports(struct gs_dev *dev, gfp_t kmalloc_flags)
2177{ 2177{
2178 int i; 2178 int i;
2179 struct gs_port *port; 2179 struct gs_port *port;
@@ -2255,7 +2255,7 @@ static void gs_free_ports(struct gs_dev *dev)
2255 * 2255 *
2256 * Allocate a circular buffer and all associated memory. 2256 * Allocate a circular buffer and all associated memory.
2257 */ 2257 */
2258static struct gs_buf *gs_buf_alloc(unsigned int size, unsigned kmalloc_flags) 2258static struct gs_buf *gs_buf_alloc(unsigned int size, gfp_t kmalloc_flags)
2259{ 2259{
2260 struct gs_buf *gb; 2260 struct gs_buf *gb;
2261 2261
diff --git a/drivers/usb/gadget/zero.c b/drivers/usb/gadget/zero.c
index 6890e773b2a2..ec9c424f1d97 100644
--- a/drivers/usb/gadget/zero.c
+++ b/drivers/usb/gadget/zero.c
@@ -612,7 +612,7 @@ static void source_sink_complete (struct usb_ep *ep, struct usb_request *req)
612} 612}
613 613
614static struct usb_request * 614static struct usb_request *
615source_sink_start_ep (struct usb_ep *ep, unsigned gfp_flags) 615source_sink_start_ep (struct usb_ep *ep, gfp_t gfp_flags)
616{ 616{
617 struct usb_request *req; 617 struct usb_request *req;
618 int status; 618 int status;
@@ -640,7 +640,7 @@ source_sink_start_ep (struct usb_ep *ep, unsigned gfp_flags)
640} 640}
641 641
642static int 642static int
643set_source_sink_config (struct zero_dev *dev, unsigned gfp_flags) 643set_source_sink_config (struct zero_dev *dev, gfp_t gfp_flags)
644{ 644{
645 int result = 0; 645 int result = 0;
646 struct usb_ep *ep; 646 struct usb_ep *ep;
@@ -744,7 +744,7 @@ static void loopback_complete (struct usb_ep *ep, struct usb_request *req)
744} 744}
745 745
746static int 746static int
747set_loopback_config (struct zero_dev *dev, unsigned gfp_flags) 747set_loopback_config (struct zero_dev *dev, gfp_t gfp_flags)
748{ 748{
749 int result = 0; 749 int result = 0;
750 struct usb_ep *ep; 750 struct usb_ep *ep;
@@ -845,7 +845,7 @@ static void zero_reset_config (struct zero_dev *dev)
845 * by limiting configuration choices (like the pxa2xx). 845 * by limiting configuration choices (like the pxa2xx).
846 */ 846 */
847static int 847static int
848zero_set_config (struct zero_dev *dev, unsigned number, unsigned gfp_flags) 848zero_set_config (struct zero_dev *dev, unsigned number, gfp_t gfp_flags)
849{ 849{
850 int result = 0; 850 int result = 0;
851 struct usb_gadget *gadget = dev->gadget; 851 struct usb_gadget *gadget = dev->gadget;
diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index b948ffd94f45..f5eb9e7b5b18 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -983,7 +983,7 @@ static int ehci_urb_enqueue (
983 struct usb_hcd *hcd, 983 struct usb_hcd *hcd,
984 struct usb_host_endpoint *ep, 984 struct usb_host_endpoint *ep,
985 struct urb *urb, 985 struct urb *urb,
986 unsigned mem_flags 986 gfp_t mem_flags
987) { 987) {
988 struct ehci_hcd *ehci = hcd_to_ehci (hcd); 988 struct ehci_hcd *ehci = hcd_to_ehci (hcd);
989 struct list_head qtd_list; 989 struct list_head qtd_list;
diff --git a/drivers/usb/host/ehci-mem.c b/drivers/usb/host/ehci-mem.c
index 5c38ad869485..91c2ab43cbcc 100644
--- a/drivers/usb/host/ehci-mem.c
+++ b/drivers/usb/host/ehci-mem.c
@@ -45,7 +45,7 @@ static inline void ehci_qtd_init (struct ehci_qtd *qtd, dma_addr_t dma)
45 INIT_LIST_HEAD (&qtd->qtd_list); 45 INIT_LIST_HEAD (&qtd->qtd_list);
46} 46}
47 47
48static struct ehci_qtd *ehci_qtd_alloc (struct ehci_hcd *ehci, int flags) 48static struct ehci_qtd *ehci_qtd_alloc (struct ehci_hcd *ehci, gfp_t flags)
49{ 49{
50 struct ehci_qtd *qtd; 50 struct ehci_qtd *qtd;
51 dma_addr_t dma; 51 dma_addr_t dma;
@@ -79,7 +79,7 @@ static void qh_destroy (struct kref *kref)
79 dma_pool_free (ehci->qh_pool, qh, qh->qh_dma); 79 dma_pool_free (ehci->qh_pool, qh, qh->qh_dma);
80} 80}
81 81
82static struct ehci_qh *ehci_qh_alloc (struct ehci_hcd *ehci, int flags) 82static struct ehci_qh *ehci_qh_alloc (struct ehci_hcd *ehci, gfp_t flags)
83{ 83{
84 struct ehci_qh *qh; 84 struct ehci_qh *qh;
85 dma_addr_t dma; 85 dma_addr_t dma;
@@ -161,7 +161,7 @@ static void ehci_mem_cleanup (struct ehci_hcd *ehci)
161} 161}
162 162
163/* remember to add cleanup code (above) if you add anything here */ 163/* remember to add cleanup code (above) if you add anything here */
164static int ehci_mem_init (struct ehci_hcd *ehci, int flags) 164static int ehci_mem_init (struct ehci_hcd *ehci, gfp_t flags)
165{ 165{
166 int i; 166 int i;
167 167
diff --git a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c
index 940d38ca7d91..5bb872c3496d 100644
--- a/drivers/usb/host/ehci-q.c
+++ b/drivers/usb/host/ehci-q.c
@@ -477,7 +477,7 @@ qh_urb_transaction (
477 struct ehci_hcd *ehci, 477 struct ehci_hcd *ehci,
478 struct urb *urb, 478 struct urb *urb,
479 struct list_head *head, 479 struct list_head *head,
480 int flags 480 gfp_t flags
481) { 481) {
482 struct ehci_qtd *qtd, *qtd_prev; 482 struct ehci_qtd *qtd, *qtd_prev;
483 dma_addr_t buf; 483 dma_addr_t buf;
@@ -629,7 +629,7 @@ static struct ehci_qh *
629qh_make ( 629qh_make (
630 struct ehci_hcd *ehci, 630 struct ehci_hcd *ehci,
631 struct urb *urb, 631 struct urb *urb,
632 int flags 632 gfp_t flags
633) { 633) {
634 struct ehci_qh *qh = ehci_qh_alloc (ehci, flags); 634 struct ehci_qh *qh = ehci_qh_alloc (ehci, flags);
635 u32 info1 = 0, info2 = 0; 635 u32 info1 = 0, info2 = 0;
@@ -906,7 +906,7 @@ submit_async (
906 struct usb_host_endpoint *ep, 906 struct usb_host_endpoint *ep,
907 struct urb *urb, 907 struct urb *urb,
908 struct list_head *qtd_list, 908 struct list_head *qtd_list,
909 unsigned mem_flags 909 gfp_t mem_flags
910) { 910) {
911 struct ehci_qtd *qtd; 911 struct ehci_qtd *qtd;
912 int epnum; 912 int epnum;
diff --git a/drivers/usb/host/ehci-sched.c b/drivers/usb/host/ehci-sched.c
index ccc7300baa6d..f0c8aa1ccd5d 100644
--- a/drivers/usb/host/ehci-sched.c
+++ b/drivers/usb/host/ehci-sched.c
@@ -589,7 +589,7 @@ static int intr_submit (
589 struct usb_host_endpoint *ep, 589 struct usb_host_endpoint *ep,
590 struct urb *urb, 590 struct urb *urb,
591 struct list_head *qtd_list, 591 struct list_head *qtd_list,
592 unsigned mem_flags 592 gfp_t mem_flags
593) { 593) {
594 unsigned epnum; 594 unsigned epnum;
595 unsigned long flags; 595 unsigned long flags;
@@ -634,7 +634,7 @@ done:
634/* ehci_iso_stream ops work with both ITD and SITD */ 634/* ehci_iso_stream ops work with both ITD and SITD */
635 635
636static struct ehci_iso_stream * 636static struct ehci_iso_stream *
637iso_stream_alloc (unsigned mem_flags) 637iso_stream_alloc (gfp_t mem_flags)
638{ 638{
639 struct ehci_iso_stream *stream; 639 struct ehci_iso_stream *stream;
640 640
@@ -851,7 +851,7 @@ iso_stream_find (struct ehci_hcd *ehci, struct urb *urb)
851/* ehci_iso_sched ops can be ITD-only or SITD-only */ 851/* ehci_iso_sched ops can be ITD-only or SITD-only */
852 852
853static struct ehci_iso_sched * 853static struct ehci_iso_sched *
854iso_sched_alloc (unsigned packets, unsigned mem_flags) 854iso_sched_alloc (unsigned packets, gfp_t mem_flags)
855{ 855{
856 struct ehci_iso_sched *iso_sched; 856 struct ehci_iso_sched *iso_sched;
857 int size = sizeof *iso_sched; 857 int size = sizeof *iso_sched;
@@ -924,7 +924,7 @@ itd_urb_transaction (
924 struct ehci_iso_stream *stream, 924 struct ehci_iso_stream *stream,
925 struct ehci_hcd *ehci, 925 struct ehci_hcd *ehci,
926 struct urb *urb, 926 struct urb *urb,
927 unsigned mem_flags 927 gfp_t mem_flags
928) 928)
929{ 929{
930 struct ehci_itd *itd; 930 struct ehci_itd *itd;
@@ -1418,7 +1418,7 @@ itd_complete (
1418/*-------------------------------------------------------------------------*/ 1418/*-------------------------------------------------------------------------*/
1419 1419
1420static int itd_submit (struct ehci_hcd *ehci, struct urb *urb, 1420static int itd_submit (struct ehci_hcd *ehci, struct urb *urb,
1421 unsigned mem_flags) 1421 gfp_t mem_flags)
1422{ 1422{
1423 int status = -EINVAL; 1423 int status = -EINVAL;
1424 unsigned long flags; 1424 unsigned long flags;
@@ -1529,7 +1529,7 @@ sitd_urb_transaction (
1529 struct ehci_iso_stream *stream, 1529 struct ehci_iso_stream *stream,
1530 struct ehci_hcd *ehci, 1530 struct ehci_hcd *ehci,
1531 struct urb *urb, 1531 struct urb *urb,
1532 unsigned mem_flags 1532 gfp_t mem_flags
1533) 1533)
1534{ 1534{
1535 struct ehci_sitd *sitd; 1535 struct ehci_sitd *sitd;
@@ -1779,7 +1779,7 @@ sitd_complete (
1779 1779
1780 1780
1781static int sitd_submit (struct ehci_hcd *ehci, struct urb *urb, 1781static int sitd_submit (struct ehci_hcd *ehci, struct urb *urb,
1782 unsigned mem_flags) 1782 gfp_t mem_flags)
1783{ 1783{
1784 int status = -EINVAL; 1784 int status = -EINVAL;
1785 unsigned long flags; 1785 unsigned long flags;
diff --git a/drivers/usb/host/isp116x-hcd.c b/drivers/usb/host/isp116x-hcd.c
index e142056b0d2c..2548d94fcd72 100644
--- a/drivers/usb/host/isp116x-hcd.c
+++ b/drivers/usb/host/isp116x-hcd.c
@@ -694,7 +694,7 @@ static int balance(struct isp116x *isp116x, u16 period, u16 load)
694 694
695static int isp116x_urb_enqueue(struct usb_hcd *hcd, 695static int isp116x_urb_enqueue(struct usb_hcd *hcd,
696 struct usb_host_endpoint *hep, struct urb *urb, 696 struct usb_host_endpoint *hep, struct urb *urb,
697 unsigned mem_flags) 697 gfp_t mem_flags)
698{ 698{
699 struct isp116x *isp116x = hcd_to_isp116x(hcd); 699 struct isp116x *isp116x = hcd_to_isp116x(hcd);
700 struct usb_device *udev = urb->dev; 700 struct usb_device *udev = urb->dev;
diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c
index 67c1aa5eb1c1..f8da8c7af7c6 100644
--- a/drivers/usb/host/ohci-hcd.c
+++ b/drivers/usb/host/ohci-hcd.c
@@ -180,7 +180,7 @@ static int ohci_urb_enqueue (
180 struct usb_hcd *hcd, 180 struct usb_hcd *hcd,
181 struct usb_host_endpoint *ep, 181 struct usb_host_endpoint *ep,
182 struct urb *urb, 182 struct urb *urb,
183 unsigned mem_flags 183 gfp_t mem_flags
184) { 184) {
185 struct ohci_hcd *ohci = hcd_to_ohci (hcd); 185 struct ohci_hcd *ohci = hcd_to_ohci (hcd);
186 struct ed *ed; 186 struct ed *ed;
diff --git a/drivers/usb/host/ohci-mem.c b/drivers/usb/host/ohci-mem.c
index fd3c4d3714bd..9fb83dfb1eb4 100644
--- a/drivers/usb/host/ohci-mem.c
+++ b/drivers/usb/host/ohci-mem.c
@@ -84,7 +84,7 @@ dma_to_td (struct ohci_hcd *hc, dma_addr_t td_dma)
84 84
85/* TDs ... */ 85/* TDs ... */
86static struct td * 86static struct td *
87td_alloc (struct ohci_hcd *hc, unsigned mem_flags) 87td_alloc (struct ohci_hcd *hc, gfp_t mem_flags)
88{ 88{
89 dma_addr_t dma; 89 dma_addr_t dma;
90 struct td *td; 90 struct td *td;
@@ -118,7 +118,7 @@ td_free (struct ohci_hcd *hc, struct td *td)
118 118
119/* EDs ... */ 119/* EDs ... */
120static struct ed * 120static struct ed *
121ed_alloc (struct ohci_hcd *hc, unsigned mem_flags) 121ed_alloc (struct ohci_hcd *hc, gfp_t mem_flags)
122{ 122{
123 dma_addr_t dma; 123 dma_addr_t dma;
124 struct ed *ed; 124 struct ed *ed;
diff --git a/drivers/usb/host/sl811-hcd.c b/drivers/usb/host/sl811-hcd.c
index d42a15d10a46..cad858575cea 100644
--- a/drivers/usb/host/sl811-hcd.c
+++ b/drivers/usb/host/sl811-hcd.c
@@ -818,7 +818,7 @@ static int sl811h_urb_enqueue(
818 struct usb_hcd *hcd, 818 struct usb_hcd *hcd,
819 struct usb_host_endpoint *hep, 819 struct usb_host_endpoint *hep,
820 struct urb *urb, 820 struct urb *urb,
821 unsigned mem_flags 821 gfp_t mem_flags
822) { 822) {
823 struct sl811 *sl811 = hcd_to_sl811(hcd); 823 struct sl811 *sl811 = hcd_to_sl811(hcd);
824 struct usb_device *udev = urb->dev; 824 struct usb_device *udev = urb->dev;
diff --git a/drivers/usb/host/uhci-q.c b/drivers/usb/host/uhci-q.c
index ea0d168a8c67..4e0fbe2c1a9a 100644
--- a/drivers/usb/host/uhci-q.c
+++ b/drivers/usb/host/uhci-q.c
@@ -1164,7 +1164,7 @@ static struct urb *uhci_find_urb_ep(struct uhci_hcd *uhci, struct urb *urb)
1164 1164
1165static int uhci_urb_enqueue(struct usb_hcd *hcd, 1165static int uhci_urb_enqueue(struct usb_hcd *hcd,
1166 struct usb_host_endpoint *ep, 1166 struct usb_host_endpoint *ep,
1167 struct urb *urb, unsigned mem_flags) 1167 struct urb *urb, gfp_t mem_flags)
1168{ 1168{
1169 int ret; 1169 int ret;
1170 struct uhci_hcd *uhci = hcd_to_uhci(hcd); 1170 struct uhci_hcd *uhci = hcd_to_uhci(hcd);
diff --git a/drivers/usb/misc/uss720.c b/drivers/usb/misc/uss720.c
index 03fb70ef2eb3..0592cb5e6c4d 100644
--- a/drivers/usb/misc/uss720.c
+++ b/drivers/usb/misc/uss720.c
@@ -137,7 +137,7 @@ static void async_complete(struct urb *urb, struct pt_regs *ptregs)
137 137
138static struct uss720_async_request *submit_async_request(struct parport_uss720_private *priv, 138static struct uss720_async_request *submit_async_request(struct parport_uss720_private *priv,
139 __u8 request, __u8 requesttype, __u16 value, __u16 index, 139 __u8 request, __u8 requesttype, __u16 value, __u16 index,
140 unsigned int mem_flags) 140 gfp_t mem_flags)
141{ 141{
142 struct usb_device *usbdev; 142 struct usb_device *usbdev;
143 struct uss720_async_request *rq; 143 struct uss720_async_request *rq;
@@ -204,7 +204,7 @@ static unsigned int kill_all_async_requests_priv(struct parport_uss720_private *
204 204
205/* --------------------------------------------------------------------- */ 205/* --------------------------------------------------------------------- */
206 206
207static int get_1284_register(struct parport *pp, unsigned char reg, unsigned char *val, unsigned int mem_flags) 207static int get_1284_register(struct parport *pp, unsigned char reg, unsigned char *val, gfp_t mem_flags)
208{ 208{
209 struct parport_uss720_private *priv; 209 struct parport_uss720_private *priv;
210 struct uss720_async_request *rq; 210 struct uss720_async_request *rq;
@@ -238,7 +238,7 @@ static int get_1284_register(struct parport *pp, unsigned char reg, unsigned cha
238 return -EIO; 238 return -EIO;
239} 239}
240 240
241static int set_1284_register(struct parport *pp, unsigned char reg, unsigned char val, unsigned int mem_flags) 241static int set_1284_register(struct parport *pp, unsigned char reg, unsigned char val, gfp_t mem_flags)
242{ 242{
243 struct parport_uss720_private *priv; 243 struct parport_uss720_private *priv;
244 struct uss720_async_request *rq; 244 struct uss720_async_request *rq;
diff --git a/drivers/usb/net/asix.c b/drivers/usb/net/asix.c
index 861f00a43750..252a34fbb42c 100644
--- a/drivers/usb/net/asix.c
+++ b/drivers/usb/net/asix.c
@@ -753,7 +753,7 @@ static int ax88772_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
753} 753}
754 754
755static struct sk_buff *ax88772_tx_fixup(struct usbnet *dev, struct sk_buff *skb, 755static struct sk_buff *ax88772_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
756 unsigned flags) 756 gfp_t flags)
757{ 757{
758 int padlen; 758 int padlen;
759 int headroom = skb_headroom(skb); 759 int headroom = skb_headroom(skb);
diff --git a/drivers/usb/net/gl620a.c b/drivers/usb/net/gl620a.c
index c8763ae33c73..c0f263b202a6 100644
--- a/drivers/usb/net/gl620a.c
+++ b/drivers/usb/net/gl620a.c
@@ -301,7 +301,7 @@ static int genelink_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
301} 301}
302 302
303static struct sk_buff * 303static struct sk_buff *
304genelink_tx_fixup(struct usbnet *dev, struct sk_buff *skb, unsigned flags) 304genelink_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags)
305{ 305{
306 int padlen; 306 int padlen;
307 int length = skb->len; 307 int length = skb->len;
diff --git a/drivers/usb/net/kaweth.c b/drivers/usb/net/kaweth.c
index e04b0ce3611a..c82655d3d448 100644
--- a/drivers/usb/net/kaweth.c
+++ b/drivers/usb/net/kaweth.c
@@ -477,13 +477,13 @@ static int kaweth_reset(struct kaweth_device *kaweth)
477} 477}
478 478
479static void kaweth_usb_receive(struct urb *, struct pt_regs *regs); 479static void kaweth_usb_receive(struct urb *, struct pt_regs *regs);
480static int kaweth_resubmit_rx_urb(struct kaweth_device *, unsigned); 480static int kaweth_resubmit_rx_urb(struct kaweth_device *, gfp_t);
481 481
482/**************************************************************** 482/****************************************************************
483 int_callback 483 int_callback
484*****************************************************************/ 484*****************************************************************/
485 485
486static void kaweth_resubmit_int_urb(struct kaweth_device *kaweth, int mf) 486static void kaweth_resubmit_int_urb(struct kaweth_device *kaweth, gfp_t mf)
487{ 487{
488 int status; 488 int status;
489 489
@@ -550,7 +550,7 @@ static void kaweth_resubmit_tl(void *d)
550 * kaweth_resubmit_rx_urb 550 * kaweth_resubmit_rx_urb
551 ****************************************************************/ 551 ****************************************************************/
552static int kaweth_resubmit_rx_urb(struct kaweth_device *kaweth, 552static int kaweth_resubmit_rx_urb(struct kaweth_device *kaweth,
553 unsigned mem_flags) 553 gfp_t mem_flags)
554{ 554{
555 int result; 555 int result;
556 556
diff --git a/drivers/usb/net/net1080.c b/drivers/usb/net/net1080.c
index a4309c4a491b..cee55f8cf64f 100644
--- a/drivers/usb/net/net1080.c
+++ b/drivers/usb/net/net1080.c
@@ -500,7 +500,7 @@ static int net1080_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
500} 500}
501 501
502static struct sk_buff * 502static struct sk_buff *
503net1080_tx_fixup(struct usbnet *dev, struct sk_buff *skb, unsigned flags) 503net1080_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags)
504{ 504{
505 int padlen; 505 int padlen;
506 struct sk_buff *skb2; 506 struct sk_buff *skb2;
diff --git a/drivers/usb/net/rndis_host.c b/drivers/usb/net/rndis_host.c
index 2ed2e5fb7778..b5a925dc1beb 100644
--- a/drivers/usb/net/rndis_host.c
+++ b/drivers/usb/net/rndis_host.c
@@ -517,7 +517,7 @@ static int rndis_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
517} 517}
518 518
519static struct sk_buff * 519static struct sk_buff *
520rndis_tx_fixup(struct usbnet *dev, struct sk_buff *skb, unsigned flags) 520rndis_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags)
521{ 521{
522 struct rndis_data_hdr *hdr; 522 struct rndis_data_hdr *hdr;
523 struct sk_buff *skb2; 523 struct sk_buff *skb2;
diff --git a/drivers/usb/net/usbnet.c b/drivers/usb/net/usbnet.c
index 6c460918d54f..fce81d738933 100644
--- a/drivers/usb/net/usbnet.c
+++ b/drivers/usb/net/usbnet.c
@@ -288,7 +288,7 @@ EXPORT_SYMBOL_GPL(usbnet_defer_kevent);
288 288
289static void rx_complete (struct urb *urb, struct pt_regs *regs); 289static void rx_complete (struct urb *urb, struct pt_regs *regs);
290 290
291static void rx_submit (struct usbnet *dev, struct urb *urb, unsigned flags) 291static void rx_submit (struct usbnet *dev, struct urb *urb, gfp_t flags)
292{ 292{
293 struct sk_buff *skb; 293 struct sk_buff *skb;
294 struct skb_data *entry; 294 struct skb_data *entry;
diff --git a/drivers/usb/net/usbnet.h b/drivers/usb/net/usbnet.h
index 7aa0abd1a9bd..89fc4958eecf 100644
--- a/drivers/usb/net/usbnet.h
+++ b/drivers/usb/net/usbnet.h
@@ -107,7 +107,7 @@ struct driver_info {
107 107
108 /* fixup tx packet (add framing) */ 108 /* fixup tx packet (add framing) */
109 struct sk_buff *(*tx_fixup)(struct usbnet *dev, 109 struct sk_buff *(*tx_fixup)(struct usbnet *dev,
110 struct sk_buff *skb, unsigned flags); 110 struct sk_buff *skb, gfp_t flags);
111 111
112 /* for new devices, use the descriptor-reading code instead */ 112 /* for new devices, use the descriptor-reading code instead */
113 int in; /* rx endpoint */ 113 int in; /* rx endpoint */
diff --git a/drivers/usb/net/zaurus.c b/drivers/usb/net/zaurus.c
index ee3b892aeabc..5d4b7d55b097 100644
--- a/drivers/usb/net/zaurus.c
+++ b/drivers/usb/net/zaurus.c
@@ -62,7 +62,7 @@
62 */ 62 */
63 63
64static struct sk_buff * 64static struct sk_buff *
65zaurus_tx_fixup(struct usbnet *dev, struct sk_buff *skb, unsigned flags) 65zaurus_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags)
66{ 66{
67 int padlen; 67 int padlen;
68 struct sk_buff *skb2; 68 struct sk_buff *skb2;
diff --git a/drivers/usb/net/zd1201.c b/drivers/usb/net/zd1201.c
index c4e479ee926a..2f52261c7cc1 100644
--- a/drivers/usb/net/zd1201.c
+++ b/drivers/usb/net/zd1201.c
@@ -521,7 +521,7 @@ static int zd1201_setconfig(struct zd1201 *zd, int rid, void *buf, int len, int
521 int reqlen; 521 int reqlen;
522 char seq=0; 522 char seq=0;
523 struct urb *urb; 523 struct urb *urb;
524 unsigned int gfp_mask = wait ? GFP_NOIO : GFP_ATOMIC; 524 gfp_t gfp_mask = wait ? GFP_NOIO : GFP_ATOMIC;
525 525
526 len += 4; /* first 4 are for header */ 526 len += 4; /* first 4 are for header */
527 527
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 23c125128024..0d576987ec67 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -29,7 +29,7 @@ static int afs_file_release(struct inode *inode, struct file *file);
29 29
30static int afs_file_readpage(struct file *file, struct page *page); 30static int afs_file_readpage(struct file *file, struct page *page);
31static int afs_file_invalidatepage(struct page *page, unsigned long offset); 31static int afs_file_invalidatepage(struct page *page, unsigned long offset);
32static int afs_file_releasepage(struct page *page, int gfp_flags); 32static int afs_file_releasepage(struct page *page, gfp_t gfp_flags);
33 33
34static ssize_t afs_file_write(struct file *file, const char __user *buf, 34static ssize_t afs_file_write(struct file *file, const char __user *buf,
35 size_t size, loff_t *off); 35 size_t size, loff_t *off);
@@ -279,7 +279,7 @@ static int afs_file_invalidatepage(struct page *page, unsigned long offset)
279/* 279/*
280 * release a page and cleanup its private data 280 * release a page and cleanup its private data
281 */ 281 */
282static int afs_file_releasepage(struct page *page, int gfp_flags) 282static int afs_file_releasepage(struct page *page, gfp_t gfp_flags)
283{ 283{
284 struct cachefs_page *pageio; 284 struct cachefs_page *pageio;
285 285
diff --git a/fs/bio.c b/fs/bio.c
index 7d81a93afd48..460554b07ff9 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -778,7 +778,7 @@ static int bio_map_kern_endio(struct bio *bio, unsigned int bytes_done, int err)
778 778
779 779
780static struct bio *__bio_map_kern(request_queue_t *q, void *data, 780static struct bio *__bio_map_kern(request_queue_t *q, void *data,
781 unsigned int len, unsigned int gfp_mask) 781 unsigned int len, gfp_t gfp_mask)
782{ 782{
783 unsigned long kaddr = (unsigned long)data; 783 unsigned long kaddr = (unsigned long)data;
784 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 784 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -825,7 +825,7 @@ static struct bio *__bio_map_kern(request_queue_t *q, void *data,
825 * device. Returns an error pointer in case of error. 825 * device. Returns an error pointer in case of error.
826 */ 826 */
827struct bio *bio_map_kern(request_queue_t *q, void *data, unsigned int len, 827struct bio *bio_map_kern(request_queue_t *q, void *data, unsigned int len,
828 unsigned int gfp_mask) 828 gfp_t gfp_mask)
829{ 829{
830 struct bio *bio; 830 struct bio *bio;
831 831
diff --git a/fs/buffer.c b/fs/buffer.c
index 1216c0d3c8ce..b1667986442f 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -502,7 +502,7 @@ static void free_more_memory(void)
502 yield(); 502 yield();
503 503
504 for_each_pgdat(pgdat) { 504 for_each_pgdat(pgdat) {
505 zones = pgdat->node_zonelists[GFP_NOFS&GFP_ZONEMASK].zones; 505 zones = pgdat->node_zonelists[gfp_zone(GFP_NOFS)].zones;
506 if (*zones) 506 if (*zones)
507 try_to_free_pages(zones, GFP_NOFS); 507 try_to_free_pages(zones, GFP_NOFS);
508 } 508 }
@@ -1571,7 +1571,7 @@ static inline void discard_buffer(struct buffer_head * bh)
1571 * 1571 *
1572 * NOTE: @gfp_mask may go away, and this function may become non-blocking. 1572 * NOTE: @gfp_mask may go away, and this function may become non-blocking.
1573 */ 1573 */
1574int try_to_release_page(struct page *page, int gfp_mask) 1574int try_to_release_page(struct page *page, gfp_t gfp_mask)
1575{ 1575{
1576 struct address_space * const mapping = page->mapping; 1576 struct address_space * const mapping = page->mapping;
1577 1577
diff --git a/fs/dcache.c b/fs/dcache.c
index fb10386c59be..e90512ed35a4 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -689,7 +689,7 @@ void shrink_dcache_anon(struct hlist_head *head)
689 * 689 *
690 * In this case we return -1 to tell the caller that we baled. 690 * In this case we return -1 to tell the caller that we baled.
691 */ 691 */
692static int shrink_dcache_memory(int nr, unsigned int gfp_mask) 692static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
693{ 693{
694 if (nr) { 694 if (nr) {
695 if (!(gfp_mask & __GFP_FS)) 695 if (!(gfp_mask & __GFP_FS))
diff --git a/fs/dquot.c b/fs/dquot.c
index b9732335bcdc..05f3327d64a3 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -500,7 +500,7 @@ static void prune_dqcache(int count)
500 * more memory 500 * more memory
501 */ 501 */
502 502
503static int shrink_dqcache_memory(int nr, unsigned int gfp_mask) 503static int shrink_dqcache_memory(int nr, gfp_t gfp_mask)
504{ 504{
505 if (nr) { 505 if (nr) {
506 spin_lock(&dq_list_lock); 506 spin_lock(&dq_list_lock);
diff --git a/fs/exec.c b/fs/exec.c
index a04a575ad433..d2208f7c87db 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -126,8 +126,7 @@ asmlinkage long sys_uselib(const char __user * library)
126 struct nameidata nd; 126 struct nameidata nd;
127 int error; 127 int error;
128 128
129 nd.intent.open.flags = FMODE_READ; 129 error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, FMODE_READ);
130 error = __user_walk(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd);
131 if (error) 130 if (error)
132 goto out; 131 goto out;
133 132
@@ -139,7 +138,7 @@ asmlinkage long sys_uselib(const char __user * library)
139 if (error) 138 if (error)
140 goto exit; 139 goto exit;
141 140
142 file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); 141 file = nameidata_to_filp(&nd, O_RDONLY);
143 error = PTR_ERR(file); 142 error = PTR_ERR(file);
144 if (IS_ERR(file)) 143 if (IS_ERR(file))
145 goto out; 144 goto out;
@@ -167,6 +166,7 @@ asmlinkage long sys_uselib(const char __user * library)
167out: 166out:
168 return error; 167 return error;
169exit: 168exit:
169 release_open_intent(&nd);
170 path_release(&nd); 170 path_release(&nd);
171 goto out; 171 goto out;
172} 172}
@@ -490,8 +490,7 @@ struct file *open_exec(const char *name)
490 int err; 490 int err;
491 struct file *file; 491 struct file *file;
492 492
493 nd.intent.open.flags = FMODE_READ; 493 err = path_lookup_open(name, LOOKUP_FOLLOW, &nd, FMODE_READ);
494 err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd);
495 file = ERR_PTR(err); 494 file = ERR_PTR(err);
496 495
497 if (!err) { 496 if (!err) {
@@ -504,7 +503,7 @@ struct file *open_exec(const char *name)
504 err = -EACCES; 503 err = -EACCES;
505 file = ERR_PTR(err); 504 file = ERR_PTR(err);
506 if (!err) { 505 if (!err) {
507 file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); 506 file = nameidata_to_filp(&nd, O_RDONLY);
508 if (!IS_ERR(file)) { 507 if (!IS_ERR(file)) {
509 err = deny_write_access(file); 508 err = deny_write_access(file);
510 if (err) { 509 if (err) {
@@ -516,6 +515,7 @@ out:
516 return file; 515 return file;
517 } 516 }
518 } 517 }
518 release_open_intent(&nd);
519 path_release(&nd); 519 path_release(&nd);
520 } 520 }
521 goto out; 521 goto out;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index b5177c90d6f1..8b38f2232796 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1434,7 +1434,7 @@ static int ext3_invalidatepage(struct page *page, unsigned long offset)
1434 return journal_invalidatepage(journal, page, offset); 1434 return journal_invalidatepage(journal, page, offset);
1435} 1435}
1436 1436
1437static int ext3_releasepage(struct page *page, int wait) 1437static int ext3_releasepage(struct page *page, gfp_t wait)
1438{ 1438{
1439 journal_t *journal = EXT3_JOURNAL(page->mapping->host); 1439 journal_t *journal = EXT3_JOURNAL(page->mapping->host);
1440 1440
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index f1570b9f9de3..3f680c5675bf 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -46,7 +46,7 @@ static sector_t hfs_bmap(struct address_space *mapping, sector_t block)
46 return generic_block_bmap(mapping, block, hfs_get_block); 46 return generic_block_bmap(mapping, block, hfs_get_block);
47} 47}
48 48
49static int hfs_releasepage(struct page *page, int mask) 49static int hfs_releasepage(struct page *page, gfp_t mask)
50{ 50{
51 struct inode *inode = page->mapping->host; 51 struct inode *inode = page->mapping->host;
52 struct super_block *sb = inode->i_sb; 52 struct super_block *sb = inode->i_sb;
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index d5642705f633..f205773ddfbe 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -40,7 +40,7 @@ static sector_t hfsplus_bmap(struct address_space *mapping, sector_t block)
40 return generic_block_bmap(mapping, block, hfsplus_get_block); 40 return generic_block_bmap(mapping, block, hfsplus_get_block);
41} 41}
42 42
43static int hfsplus_releasepage(struct page *page, int mask) 43static int hfsplus_releasepage(struct page *page, gfp_t mask)
44{ 44{
45 struct inode *inode = page->mapping->host; 45 struct inode *inode = page->mapping->host;
46 struct super_block *sb = inode->i_sb; 46 struct super_block *sb = inode->i_sb;
diff --git a/fs/inode.c b/fs/inode.c
index f80a79ff156b..7d3316527767 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -475,7 +475,7 @@ static void prune_icache(int nr_to_scan)
475 * This function is passed the number of inodes to scan, and it returns the 475 * This function is passed the number of inodes to scan, and it returns the
476 * total number of remaining possibly-reclaimable inodes. 476 * total number of remaining possibly-reclaimable inodes.
477 */ 477 */
478static int shrink_icache_memory(int nr, unsigned int gfp_mask) 478static int shrink_icache_memory(int nr, gfp_t gfp_mask)
479{ 479{
480 if (nr) { 480 if (nr) {
481 /* 481 /*
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 7ae2c4fe506b..e4b516ac4989 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1606,7 +1606,7 @@ int journal_blocks_per_page(struct inode *inode)
1606 * Simple support for retrying memory allocations. Introduced to help to 1606 * Simple support for retrying memory allocations. Introduced to help to
1607 * debug different VM deadlock avoidance strategies. 1607 * debug different VM deadlock avoidance strategies.
1608 */ 1608 */
1609void * __jbd_kmalloc (const char *where, size_t size, int flags, int retry) 1609void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
1610{ 1610{
1611 return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0)); 1611 return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0));
1612} 1612}
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 49bbc2be3d72..13cb05bf6048 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1621,7 +1621,7 @@ out:
1621 * while the data is part of a transaction. Yes? 1621 * while the data is part of a transaction. Yes?
1622 */ 1622 */
1623int journal_try_to_free_buffers(journal_t *journal, 1623int journal_try_to_free_buffers(journal_t *journal,
1624 struct page *page, int unused_gfp_mask) 1624 struct page *page, gfp_t unused_gfp_mask)
1625{ 1625{
1626 struct buffer_head *head; 1626 struct buffer_head *head;
1627 struct buffer_head *bh; 1627 struct buffer_head *bh;
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 13d7e3f1feb4..eeb37d70e650 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -198,7 +198,7 @@ static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
198 } 198 }
199} 199}
200 200
201static inline struct metapage *alloc_metapage(unsigned int gfp_mask) 201static inline struct metapage *alloc_metapage(gfp_t gfp_mask)
202{ 202{
203 return mempool_alloc(metapage_mempool, gfp_mask); 203 return mempool_alloc(metapage_mempool, gfp_mask);
204} 204}
@@ -534,7 +534,7 @@ add_failed:
534 return -EIO; 534 return -EIO;
535} 535}
536 536
537static int metapage_releasepage(struct page *page, int gfp_mask) 537static int metapage_releasepage(struct page *page, gfp_t gfp_mask)
538{ 538{
539 struct metapage *mp; 539 struct metapage *mp;
540 int busy = 0; 540 int busy = 0;
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 82c77df81c5f..c4c8601096e0 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -173,11 +173,10 @@ nlm_bind_host(struct nlm_host *host)
173 173
174 /* If we've already created an RPC client, check whether 174 /* If we've already created an RPC client, check whether
175 * RPC rebind is required 175 * RPC rebind is required
176 * Note: why keep rebinding if we're on a tcp connection?
177 */ 176 */
178 if ((clnt = host->h_rpcclnt) != NULL) { 177 if ((clnt = host->h_rpcclnt) != NULL) {
179 xprt = clnt->cl_xprt; 178 xprt = clnt->cl_xprt;
180 if (!xprt->stream && time_after_eq(jiffies, host->h_nextrebind)) { 179 if (time_after_eq(jiffies, host->h_nextrebind)) {
181 clnt->cl_port = 0; 180 clnt->cl_port = 0;
182 host->h_nextrebind = jiffies + NLM_HOST_REBIND; 181 host->h_nextrebind = jiffies + NLM_HOST_REBIND;
183 dprintk("lockd: next rebind in %ld jiffies\n", 182 dprintk("lockd: next rebind in %ld jiffies\n",
@@ -189,7 +188,6 @@ nlm_bind_host(struct nlm_host *host)
189 goto forgetit; 188 goto forgetit;
190 189
191 xprt_set_timeout(&xprt->timeout, 5, nlmsvc_timeout); 190 xprt_set_timeout(&xprt->timeout, 5, nlmsvc_timeout);
192 xprt->nocong = 1; /* No congestion control for NLM */
193 xprt->resvport = 1; /* NLM requires a reserved port */ 191 xprt->resvport = 1; /* NLM requires a reserved port */
194 192
195 /* Existing NLM servers accept AUTH_UNIX only */ 193 /* Existing NLM servers accept AUTH_UNIX only */
diff --git a/fs/locks.c b/fs/locks.c
index f7daa5f48949..a1e8b2248014 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -316,21 +316,22 @@ static int flock_to_posix_lock(struct file *filp, struct file_lock *fl,
316 /* POSIX-1996 leaves the case l->l_len < 0 undefined; 316 /* POSIX-1996 leaves the case l->l_len < 0 undefined;
317 POSIX-2001 defines it. */ 317 POSIX-2001 defines it. */
318 start += l->l_start; 318 start += l->l_start;
319 end = start + l->l_len - 1; 319 if (start < 0)
320 if (l->l_len < 0) { 320 return -EINVAL;
321 fl->fl_end = OFFSET_MAX;
322 if (l->l_len > 0) {
323 end = start + l->l_len - 1;
324 fl->fl_end = end;
325 } else if (l->l_len < 0) {
321 end = start - 1; 326 end = start - 1;
327 fl->fl_end = end;
322 start += l->l_len; 328 start += l->l_len;
329 if (start < 0)
330 return -EINVAL;
323 } 331 }
324
325 if (start < 0)
326 return -EINVAL;
327 if (l->l_len > 0 && end < 0)
328 return -EOVERFLOW;
329
330 fl->fl_start = start; /* we record the absolute position */ 332 fl->fl_start = start; /* we record the absolute position */
331 fl->fl_end = end; 333 if (fl->fl_end < fl->fl_start)
332 if (l->l_len == 0) 334 return -EOVERFLOW;
333 fl->fl_end = OFFSET_MAX;
334 335
335 fl->fl_owner = current->files; 336 fl->fl_owner = current->files;
336 fl->fl_pid = current->tgid; 337 fl->fl_pid = current->tgid;
@@ -362,14 +363,21 @@ static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl,
362 return -EINVAL; 363 return -EINVAL;
363 } 364 }
364 365
365 if (((start += l->l_start) < 0) || (l->l_len < 0)) 366 start += l->l_start;
367 if (start < 0)
366 return -EINVAL; 368 return -EINVAL;
367 fl->fl_end = start + l->l_len - 1; 369 fl->fl_end = OFFSET_MAX;
368 if (l->l_len > 0 && fl->fl_end < 0) 370 if (l->l_len > 0) {
369 return -EOVERFLOW; 371 fl->fl_end = start + l->l_len - 1;
372 } else if (l->l_len < 0) {
373 fl->fl_end = start - 1;
374 start += l->l_len;
375 if (start < 0)
376 return -EINVAL;
377 }
370 fl->fl_start = start; /* we record the absolute position */ 378 fl->fl_start = start; /* we record the absolute position */
371 if (l->l_len == 0) 379 if (fl->fl_end < fl->fl_start)
372 fl->fl_end = OFFSET_MAX; 380 return -EOVERFLOW;
373 381
374 fl->fl_owner = current->files; 382 fl->fl_owner = current->files;
375 fl->fl_pid = current->tgid; 383 fl->fl_pid = current->tgid;
@@ -829,12 +837,16 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request)
829 /* Detect adjacent or overlapping regions (if same lock type) 837 /* Detect adjacent or overlapping regions (if same lock type)
830 */ 838 */
831 if (request->fl_type == fl->fl_type) { 839 if (request->fl_type == fl->fl_type) {
840 /* In all comparisons of start vs end, use
841 * "start - 1" rather than "end + 1". If end
842 * is OFFSET_MAX, end + 1 will become negative.
843 */
832 if (fl->fl_end < request->fl_start - 1) 844 if (fl->fl_end < request->fl_start - 1)
833 goto next_lock; 845 goto next_lock;
834 /* If the next lock in the list has entirely bigger 846 /* If the next lock in the list has entirely bigger
835 * addresses than the new one, insert the lock here. 847 * addresses than the new one, insert the lock here.
836 */ 848 */
837 if (fl->fl_start > request->fl_end + 1) 849 if (fl->fl_start - 1 > request->fl_end)
838 break; 850 break;
839 851
840 /* If we come here, the new and old lock are of the 852 /* If we come here, the new and old lock are of the
diff --git a/fs/mbcache.c b/fs/mbcache.c
index b002a088857d..298997f17475 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -116,7 +116,7 @@ mb_cache_indexes(struct mb_cache *cache)
116 * What the mbcache registers as to get shrunk dynamically. 116 * What the mbcache registers as to get shrunk dynamically.
117 */ 117 */
118 118
119static int mb_cache_shrink_fn(int nr_to_scan, unsigned int gfp_mask); 119static int mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask);
120 120
121 121
122static inline int 122static inline int
@@ -140,7 +140,7 @@ __mb_cache_entry_unhash(struct mb_cache_entry *ce)
140 140
141 141
142static inline void 142static inline void
143__mb_cache_entry_forget(struct mb_cache_entry *ce, int gfp_mask) 143__mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask)
144{ 144{
145 struct mb_cache *cache = ce->e_cache; 145 struct mb_cache *cache = ce->e_cache;
146 146
@@ -193,7 +193,7 @@ forget:
193 * Returns the number of objects which are present in the cache. 193 * Returns the number of objects which are present in the cache.
194 */ 194 */
195static int 195static int
196mb_cache_shrink_fn(int nr_to_scan, unsigned int gfp_mask) 196mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask)
197{ 197{
198 LIST_HEAD(free_list); 198 LIST_HEAD(free_list);
199 struct list_head *l, *ltmp; 199 struct list_head *l, *ltmp;
diff --git a/fs/namei.c b/fs/namei.c
index aa62dbda93ac..aaaa81036234 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -28,6 +28,7 @@
28#include <linux/syscalls.h> 28#include <linux/syscalls.h>
29#include <linux/mount.h> 29#include <linux/mount.h>
30#include <linux/audit.h> 30#include <linux/audit.h>
31#include <linux/file.h>
31#include <asm/namei.h> 32#include <asm/namei.h>
32#include <asm/uaccess.h> 33#include <asm/uaccess.h>
33 34
@@ -317,6 +318,18 @@ void path_release_on_umount(struct nameidata *nd)
317 mntput_no_expire(nd->mnt); 318 mntput_no_expire(nd->mnt);
318} 319}
319 320
321/**
322 * release_open_intent - free up open intent resources
323 * @nd: pointer to nameidata
324 */
325void release_open_intent(struct nameidata *nd)
326{
327 if (nd->intent.open.file->f_dentry == NULL)
328 put_filp(nd->intent.open.file);
329 else
330 fput(nd->intent.open.file);
331}
332
320/* 333/*
321 * Internal lookup() using the new generic dcache. 334 * Internal lookup() using the new generic dcache.
322 * SMP-safe 335 * SMP-safe
@@ -750,6 +763,7 @@ static fastcall int __link_path_walk(const char * name, struct nameidata *nd)
750 struct qstr this; 763 struct qstr this;
751 unsigned int c; 764 unsigned int c;
752 765
766 nd->flags |= LOOKUP_CONTINUE;
753 err = exec_permission_lite(inode, nd); 767 err = exec_permission_lite(inode, nd);
754 if (err == -EAGAIN) { 768 if (err == -EAGAIN) {
755 err = permission(inode, MAY_EXEC, nd); 769 err = permission(inode, MAY_EXEC, nd);
@@ -802,7 +816,6 @@ static fastcall int __link_path_walk(const char * name, struct nameidata *nd)
802 if (err < 0) 816 if (err < 0)
803 break; 817 break;
804 } 818 }
805 nd->flags |= LOOKUP_CONTINUE;
806 /* This does the actual lookups.. */ 819 /* This does the actual lookups.. */
807 err = do_lookup(nd, &this, &next); 820 err = do_lookup(nd, &this, &next);
808 if (err) 821 if (err)
@@ -1052,6 +1065,70 @@ out:
1052 return retval; 1065 return retval;
1053} 1066}
1054 1067
1068static int __path_lookup_intent_open(const char *name, unsigned int lookup_flags,
1069 struct nameidata *nd, int open_flags, int create_mode)
1070{
1071 struct file *filp = get_empty_filp();
1072 int err;
1073
1074 if (filp == NULL)
1075 return -ENFILE;
1076 nd->intent.open.file = filp;
1077 nd->intent.open.flags = open_flags;
1078 nd->intent.open.create_mode = create_mode;
1079 err = path_lookup(name, lookup_flags|LOOKUP_OPEN, nd);
1080 if (IS_ERR(nd->intent.open.file)) {
1081 if (err == 0) {
1082 err = PTR_ERR(nd->intent.open.file);
1083 path_release(nd);
1084 }
1085 } else if (err != 0)
1086 release_open_intent(nd);
1087 return err;
1088}
1089
1090/**
1091 * path_lookup_open - lookup a file path with open intent
1092 * @name: pointer to file name
1093 * @lookup_flags: lookup intent flags
1094 * @nd: pointer to nameidata
1095 * @open_flags: open intent flags
1096 */
1097int path_lookup_open(const char *name, unsigned int lookup_flags,
1098 struct nameidata *nd, int open_flags)
1099{
1100 return __path_lookup_intent_open(name, lookup_flags, nd,
1101 open_flags, 0);
1102}
1103
1104/**
1105 * path_lookup_create - lookup a file path with open + create intent
1106 * @name: pointer to file name
1107 * @lookup_flags: lookup intent flags
1108 * @nd: pointer to nameidata
1109 * @open_flags: open intent flags
1110 * @create_mode: create intent flags
1111 */
1112int path_lookup_create(const char *name, unsigned int lookup_flags,
1113 struct nameidata *nd, int open_flags, int create_mode)
1114{
1115 return __path_lookup_intent_open(name, lookup_flags|LOOKUP_CREATE, nd,
1116 open_flags, create_mode);
1117}
1118
1119int __user_path_lookup_open(const char __user *name, unsigned int lookup_flags,
1120 struct nameidata *nd, int open_flags)
1121{
1122 char *tmp = getname(name);
1123 int err = PTR_ERR(tmp);
1124
1125 if (!IS_ERR(tmp)) {
1126 err = __path_lookup_intent_open(tmp, lookup_flags, nd, open_flags, 0);
1127 putname(tmp);
1128 }
1129 return err;
1130}
1131
1055/* 1132/*
1056 * Restricted form of lookup. Doesn't follow links, single-component only, 1133 * Restricted form of lookup. Doesn't follow links, single-component only,
1057 * needs parent already locked. Doesn't follow mounts. 1134 * needs parent already locked. Doesn't follow mounts.
@@ -1416,27 +1493,27 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
1416 */ 1493 */
1417int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) 1494int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
1418{ 1495{
1419 int acc_mode, error = 0; 1496 int acc_mode, error;
1420 struct path path; 1497 struct path path;
1421 struct dentry *dir; 1498 struct dentry *dir;
1422 int count = 0; 1499 int count = 0;
1423 1500
1424 acc_mode = ACC_MODE(flag); 1501 acc_mode = ACC_MODE(flag);
1425 1502
1503 /* O_TRUNC implies we need access checks for write permissions */
1504 if (flag & O_TRUNC)
1505 acc_mode |= MAY_WRITE;
1506
1426 /* Allow the LSM permission hook to distinguish append 1507 /* Allow the LSM permission hook to distinguish append
1427 access from general write access. */ 1508 access from general write access. */
1428 if (flag & O_APPEND) 1509 if (flag & O_APPEND)
1429 acc_mode |= MAY_APPEND; 1510 acc_mode |= MAY_APPEND;
1430 1511
1431 /* Fill in the open() intent data */
1432 nd->intent.open.flags = flag;
1433 nd->intent.open.create_mode = mode;
1434
1435 /* 1512 /*
1436 * The simplest case - just a plain lookup. 1513 * The simplest case - just a plain lookup.
1437 */ 1514 */
1438 if (!(flag & O_CREAT)) { 1515 if (!(flag & O_CREAT)) {
1439 error = path_lookup(pathname, lookup_flags(flag)|LOOKUP_OPEN, nd); 1516 error = path_lookup_open(pathname, lookup_flags(flag), nd, flag);
1440 if (error) 1517 if (error)
1441 return error; 1518 return error;
1442 goto ok; 1519 goto ok;
@@ -1445,7 +1522,7 @@ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
1445 /* 1522 /*
1446 * Create - we need to know the parent. 1523 * Create - we need to know the parent.
1447 */ 1524 */
1448 error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd); 1525 error = path_lookup_create(pathname, LOOKUP_PARENT, nd, flag, mode);
1449 if (error) 1526 if (error)
1450 return error; 1527 return error;
1451 1528
@@ -1520,6 +1597,8 @@ ok:
1520exit_dput: 1597exit_dput:
1521 dput_path(&path, nd); 1598 dput_path(&path, nd);
1522exit: 1599exit:
1600 if (!IS_ERR(nd->intent.open.file))
1601 release_open_intent(nd);
1523 path_release(nd); 1602 path_release(nd);
1524 return error; 1603 return error;
1525 1604
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 4a36839f0bbd..44135af9894c 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -142,7 +142,7 @@ static void nfs_msync_inode(struct inode *inode)
142/* 142/*
143 * Basic procedure for returning a delegation to the server 143 * Basic procedure for returning a delegation to the server
144 */ 144 */
145int nfs_inode_return_delegation(struct inode *inode) 145int __nfs_inode_return_delegation(struct inode *inode)
146{ 146{
147 struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; 147 struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
148 struct nfs_inode *nfsi = NFS_I(inode); 148 struct nfs_inode *nfsi = NFS_I(inode);
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 3f6c45a29d6a..8017846b561f 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -25,7 +25,7 @@ struct nfs_delegation {
25 25
26int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); 26int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
27void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); 27void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
28int nfs_inode_return_delegation(struct inode *inode); 28int __nfs_inode_return_delegation(struct inode *inode);
29int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); 29int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
30 30
31struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle); 31struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle);
@@ -47,11 +47,25 @@ static inline int nfs_have_delegation(struct inode *inode, int flags)
47 return 1; 47 return 1;
48 return 0; 48 return 0;
49} 49}
50
51static inline int nfs_inode_return_delegation(struct inode *inode)
52{
53 int err = 0;
54
55 if (NFS_I(inode)->delegation != NULL)
56 err = __nfs_inode_return_delegation(inode);
57 return err;
58}
50#else 59#else
51static inline int nfs_have_delegation(struct inode *inode, int flags) 60static inline int nfs_have_delegation(struct inode *inode, int flags)
52{ 61{
53 return 0; 62 return 0;
54} 63}
64
65static inline int nfs_inode_return_delegation(struct inode *inode)
66{
67 return 0;
68}
55#endif 69#endif
56 70
57#endif 71#endif
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2df639f143e8..8272ed3fc707 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -532,6 +532,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
532 my_entry.eof = 0; 532 my_entry.eof = 0;
533 my_entry.fh = &fh; 533 my_entry.fh = &fh;
534 my_entry.fattr = &fattr; 534 my_entry.fattr = &fattr;
535 nfs_fattr_init(&fattr);
535 desc->entry = &my_entry; 536 desc->entry = &my_entry;
536 537
537 while(!desc->entry->eof) { 538 while(!desc->entry->eof) {
@@ -565,8 +566,6 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
565 } 566 }
566 } 567 }
567 unlock_kernel(); 568 unlock_kernel();
568 if (desc->error < 0)
569 return desc->error;
570 if (res < 0) 569 if (res < 0)
571 return res; 570 return res;
572 return 0; 571 return 0;
@@ -803,6 +802,7 @@ static int nfs_dentry_delete(struct dentry *dentry)
803 */ 802 */
804static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode) 803static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode)
805{ 804{
805 nfs_inode_return_delegation(inode);
806 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { 806 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
807 lock_kernel(); 807 lock_kernel();
808 inode->i_nlink--; 808 inode->i_nlink--;
@@ -853,12 +853,6 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
853 dentry->d_op = NFS_PROTO(dir)->dentry_ops; 853 dentry->d_op = NFS_PROTO(dir)->dentry_ops;
854 854
855 lock_kernel(); 855 lock_kernel();
856 /* Revalidate parent directory attribute cache */
857 error = nfs_revalidate_inode(NFS_SERVER(dir), dir);
858 if (error < 0) {
859 res = ERR_PTR(error);
860 goto out_unlock;
861 }
862 856
863 /* If we're doing an exclusive create, optimize away the lookup */ 857 /* If we're doing an exclusive create, optimize away the lookup */
864 if (nfs_is_exclusive_create(dir, nd)) 858 if (nfs_is_exclusive_create(dir, nd))
@@ -916,7 +910,6 @@ static int is_atomic_open(struct inode *dir, struct nameidata *nd)
916static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 910static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
917{ 911{
918 struct dentry *res = NULL; 912 struct dentry *res = NULL;
919 struct inode *inode = NULL;
920 int error; 913 int error;
921 914
922 /* Check that we are indeed trying to open this file */ 915 /* Check that we are indeed trying to open this file */
@@ -930,8 +923,10 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
930 dentry->d_op = NFS_PROTO(dir)->dentry_ops; 923 dentry->d_op = NFS_PROTO(dir)->dentry_ops;
931 924
932 /* Let vfs_create() deal with O_EXCL */ 925 /* Let vfs_create() deal with O_EXCL */
933 if (nd->intent.open.flags & O_EXCL) 926 if (nd->intent.open.flags & O_EXCL) {
934 goto no_entry; 927 d_add(dentry, NULL);
928 goto out;
929 }
935 930
936 /* Open the file on the server */ 931 /* Open the file on the server */
937 lock_kernel(); 932 lock_kernel();
@@ -945,32 +940,30 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
945 940
946 if (nd->intent.open.flags & O_CREAT) { 941 if (nd->intent.open.flags & O_CREAT) {
947 nfs_begin_data_update(dir); 942 nfs_begin_data_update(dir);
948 inode = nfs4_atomic_open(dir, dentry, nd); 943 res = nfs4_atomic_open(dir, dentry, nd);
949 nfs_end_data_update(dir); 944 nfs_end_data_update(dir);
950 } else 945 } else
951 inode = nfs4_atomic_open(dir, dentry, nd); 946 res = nfs4_atomic_open(dir, dentry, nd);
952 unlock_kernel(); 947 unlock_kernel();
953 if (IS_ERR(inode)) { 948 if (IS_ERR(res)) {
954 error = PTR_ERR(inode); 949 error = PTR_ERR(res);
955 switch (error) { 950 switch (error) {
956 /* Make a negative dentry */ 951 /* Make a negative dentry */
957 case -ENOENT: 952 case -ENOENT:
958 inode = NULL; 953 res = NULL;
959 break; 954 goto out;
960 /* This turned out not to be a regular file */ 955 /* This turned out not to be a regular file */
956 case -EISDIR:
957 case -ENOTDIR:
958 goto no_open;
961 case -ELOOP: 959 case -ELOOP:
962 if (!(nd->intent.open.flags & O_NOFOLLOW)) 960 if (!(nd->intent.open.flags & O_NOFOLLOW))
963 goto no_open; 961 goto no_open;
964 /* case -EISDIR: */
965 /* case -EINVAL: */ 962 /* case -EINVAL: */
966 default: 963 default:
967 res = ERR_PTR(error);
968 goto out; 964 goto out;
969 } 965 }
970 } 966 } else if (res != NULL)
971no_entry:
972 res = d_add_unique(dentry, inode);
973 if (res != NULL)
974 dentry = res; 967 dentry = res;
975 nfs_renew_times(dentry); 968 nfs_renew_times(dentry);
976 nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); 969 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
@@ -1014,7 +1007,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
1014 */ 1007 */
1015 lock_kernel(); 1008 lock_kernel();
1016 verifier = nfs_save_change_attribute(dir); 1009 verifier = nfs_save_change_attribute(dir);
1017 ret = nfs4_open_revalidate(dir, dentry, openflags); 1010 ret = nfs4_open_revalidate(dir, dentry, openflags, nd);
1018 if (!ret) 1011 if (!ret)
1019 nfs_set_verifier(dentry, verifier); 1012 nfs_set_verifier(dentry, verifier);
1020 unlock_kernel(); 1013 unlock_kernel();
@@ -1137,7 +1130,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
1137 1130
1138 lock_kernel(); 1131 lock_kernel();
1139 nfs_begin_data_update(dir); 1132 nfs_begin_data_update(dir);
1140 error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags); 1133 error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd);
1141 nfs_end_data_update(dir); 1134 nfs_end_data_update(dir);
1142 if (error != 0) 1135 if (error != 0)
1143 goto out_err; 1136 goto out_err;
@@ -1332,6 +1325,7 @@ static int nfs_safe_remove(struct dentry *dentry)
1332 1325
1333 nfs_begin_data_update(dir); 1326 nfs_begin_data_update(dir);
1334 if (inode != NULL) { 1327 if (inode != NULL) {
1328 nfs_inode_return_delegation(inode);
1335 nfs_begin_data_update(inode); 1329 nfs_begin_data_update(inode);
1336 error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); 1330 error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
1337 /* The VFS may want to delete this inode */ 1331 /* The VFS may want to delete this inode */
@@ -1438,17 +1432,14 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
1438 old_dentry->d_parent->d_name.name, old_dentry->d_name.name, 1432 old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
1439 dentry->d_parent->d_name.name, dentry->d_name.name); 1433 dentry->d_parent->d_name.name, dentry->d_name.name);
1440 1434
1441 /*
1442 * Drop the dentry in advance to force a new lookup.
1443 * Since nfs_proc_link doesn't return a file handle,
1444 * we can't use the existing dentry.
1445 */
1446 lock_kernel(); 1435 lock_kernel();
1447 d_drop(dentry);
1448
1449 nfs_begin_data_update(dir); 1436 nfs_begin_data_update(dir);
1450 nfs_begin_data_update(inode); 1437 nfs_begin_data_update(inode);
1451 error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); 1438 error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
1439 if (error == 0) {
1440 atomic_inc(&inode->i_count);
1441 d_instantiate(dentry, inode);
1442 }
1452 nfs_end_data_update(inode); 1443 nfs_end_data_update(inode);
1453 nfs_end_data_update(dir); 1444 nfs_end_data_update(dir);
1454 unlock_kernel(); 1445 unlock_kernel();
@@ -1512,9 +1503,11 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1512 */ 1503 */
1513 if (!new_inode) 1504 if (!new_inode)
1514 goto go_ahead; 1505 goto go_ahead;
1515 if (S_ISDIR(new_inode->i_mode)) 1506 if (S_ISDIR(new_inode->i_mode)) {
1516 goto out; 1507 error = -EISDIR;
1517 else if (atomic_read(&new_dentry->d_count) > 2) { 1508 if (!S_ISDIR(old_inode->i_mode))
1509 goto out;
1510 } else if (atomic_read(&new_dentry->d_count) > 2) {
1518 int err; 1511 int err;
1519 /* copy the target dentry's name */ 1512 /* copy the target dentry's name */
1520 dentry = d_alloc(new_dentry->d_parent, 1513 dentry = d_alloc(new_dentry->d_parent,
@@ -1539,7 +1532,8 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1539#endif 1532#endif
1540 goto out; 1533 goto out;
1541 } 1534 }
1542 } 1535 } else
1536 new_inode->i_nlink--;
1543 1537
1544go_ahead: 1538go_ahead:
1545 /* 1539 /*
@@ -1549,6 +1543,7 @@ go_ahead:
1549 nfs_wb_all(old_inode); 1543 nfs_wb_all(old_inode);
1550 shrink_dcache_parent(old_dentry); 1544 shrink_dcache_parent(old_dentry);
1551 } 1545 }
1546 nfs_inode_return_delegation(old_inode);
1552 1547
1553 if (new_inode) 1548 if (new_inode)
1554 d_delete(new_dentry); 1549 d_delete(new_dentry);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 6bdcfa95de94..57d3e77d97ee 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -205,8 +205,8 @@ nfs_file_flush(struct file *file)
205 if (!status) { 205 if (!status) {
206 status = ctx->error; 206 status = ctx->error;
207 ctx->error = 0; 207 ctx->error = 0;
208 if (!status && !nfs_have_delegation(inode, FMODE_READ)) 208 if (!status)
209 __nfs_revalidate_inode(NFS_SERVER(inode), inode); 209 nfs_revalidate_inode(NFS_SERVER(inode), inode);
210 } 210 }
211 unlock_kernel(); 211 unlock_kernel();
212 return status; 212 return status;
@@ -376,22 +376,31 @@ out_swapfile:
376 376
377static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) 377static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
378{ 378{
379 struct file_lock *cfl;
379 struct inode *inode = filp->f_mapping->host; 380 struct inode *inode = filp->f_mapping->host;
380 int status = 0; 381 int status = 0;
381 382
382 lock_kernel(); 383 lock_kernel();
383 /* Use local locking if mounted with "-onolock" */ 384 /* Try local locking first */
384 if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) 385 cfl = posix_test_lock(filp, fl);
385 status = NFS_PROTO(inode)->lock(filp, cmd, fl); 386 if (cfl != NULL) {
386 else { 387 locks_copy_lock(fl, cfl);
387 struct file_lock *cfl = posix_test_lock(filp, fl); 388 goto out;
388
389 fl->fl_type = F_UNLCK;
390 if (cfl != NULL)
391 memcpy(fl, cfl, sizeof(*fl));
392 } 389 }
390
391 if (nfs_have_delegation(inode, FMODE_READ))
392 goto out_noconflict;
393
394 if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)
395 goto out_noconflict;
396
397 status = NFS_PROTO(inode)->lock(filp, cmd, fl);
398out:
393 unlock_kernel(); 399 unlock_kernel();
394 return status; 400 return status;
401out_noconflict:
402 fl->fl_type = F_UNLCK;
403 goto out;
395} 404}
396 405
397static int do_vfs_lock(struct file *file, struct file_lock *fl) 406static int do_vfs_lock(struct file *file, struct file_lock *fl)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index d4eadeea128e..f2781ca42761 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -358,6 +358,35 @@ out_no_root:
358 return no_root_error; 358 return no_root_error;
359} 359}
360 360
361static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned int timeo, unsigned int retrans)
362{
363 to->to_initval = timeo * HZ / 10;
364 to->to_retries = retrans;
365 if (!to->to_retries)
366 to->to_retries = 2;
367
368 switch (proto) {
369 case IPPROTO_TCP:
370 if (!to->to_initval)
371 to->to_initval = 60 * HZ;
372 if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
373 to->to_initval = NFS_MAX_TCP_TIMEOUT;
374 to->to_increment = to->to_initval;
375 to->to_maxval = to->to_initval + (to->to_increment * to->to_retries);
376 to->to_exponential = 0;
377 break;
378 case IPPROTO_UDP:
379 default:
380 if (!to->to_initval)
381 to->to_initval = 11 * HZ / 10;
382 if (to->to_initval > NFS_MAX_UDP_TIMEOUT)
383 to->to_initval = NFS_MAX_UDP_TIMEOUT;
384 to->to_maxval = NFS_MAX_UDP_TIMEOUT;
385 to->to_exponential = 1;
386 break;
387 }
388}
389
361/* 390/*
362 * Create an RPC client handle. 391 * Create an RPC client handle.
363 */ 392 */
@@ -367,22 +396,12 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
367 struct rpc_timeout timeparms; 396 struct rpc_timeout timeparms;
368 struct rpc_xprt *xprt = NULL; 397 struct rpc_xprt *xprt = NULL;
369 struct rpc_clnt *clnt = NULL; 398 struct rpc_clnt *clnt = NULL;
370 int tcp = (data->flags & NFS_MOUNT_TCP); 399 int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
371
372 /* Initialize timeout values */
373 timeparms.to_initval = data->timeo * HZ / 10;
374 timeparms.to_retries = data->retrans;
375 timeparms.to_maxval = tcp ? RPC_MAX_TCP_TIMEOUT : RPC_MAX_UDP_TIMEOUT;
376 timeparms.to_exponential = 1;
377 400
378 if (!timeparms.to_initval) 401 nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans);
379 timeparms.to_initval = (tcp ? 600 : 11) * HZ / 10;
380 if (!timeparms.to_retries)
381 timeparms.to_retries = 5;
382 402
383 /* create transport and client */ 403 /* create transport and client */
384 xprt = xprt_create_proto(tcp ? IPPROTO_TCP : IPPROTO_UDP, 404 xprt = xprt_create_proto(proto, &server->addr, &timeparms);
385 &server->addr, &timeparms);
386 if (IS_ERR(xprt)) { 405 if (IS_ERR(xprt)) {
387 dprintk("%s: cannot create RPC transport. Error = %ld\n", 406 dprintk("%s: cannot create RPC transport. Error = %ld\n",
388 __FUNCTION__, PTR_ERR(xprt)); 407 __FUNCTION__, PTR_ERR(xprt));
@@ -576,7 +595,6 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
576 { NFS_MOUNT_SOFT, ",soft", ",hard" }, 595 { NFS_MOUNT_SOFT, ",soft", ",hard" },
577 { NFS_MOUNT_INTR, ",intr", "" }, 596 { NFS_MOUNT_INTR, ",intr", "" },
578 { NFS_MOUNT_POSIX, ",posix", "" }, 597 { NFS_MOUNT_POSIX, ",posix", "" },
579 { NFS_MOUNT_TCP, ",tcp", ",udp" },
580 { NFS_MOUNT_NOCTO, ",nocto", "" }, 598 { NFS_MOUNT_NOCTO, ",nocto", "" },
581 { NFS_MOUNT_NOAC, ",noac", "" }, 599 { NFS_MOUNT_NOAC, ",noac", "" },
582 { NFS_MOUNT_NONLM, ",nolock", ",lock" }, 600 { NFS_MOUNT_NONLM, ",nolock", ",lock" },
@@ -585,6 +603,8 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
585 }; 603 };
586 struct proc_nfs_info *nfs_infop; 604 struct proc_nfs_info *nfs_infop;
587 struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); 605 struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
606 char buf[12];
607 char *proto;
588 608
589 seq_printf(m, ",v%d", nfss->rpc_ops->version); 609 seq_printf(m, ",v%d", nfss->rpc_ops->version);
590 seq_printf(m, ",rsize=%d", nfss->rsize); 610 seq_printf(m, ",rsize=%d", nfss->rsize);
@@ -603,6 +623,18 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
603 else 623 else
604 seq_puts(m, nfs_infop->nostr); 624 seq_puts(m, nfs_infop->nostr);
605 } 625 }
626 switch (nfss->client->cl_xprt->prot) {
627 case IPPROTO_TCP:
628 proto = "tcp";
629 break;
630 case IPPROTO_UDP:
631 proto = "udp";
632 break;
633 default:
634 snprintf(buf, sizeof(buf), "%u", nfss->client->cl_xprt->prot);
635 proto = buf;
636 }
637 seq_printf(m, ",proto=%s", proto);
606 seq_puts(m, ",addr="); 638 seq_puts(m, ",addr=");
607 seq_escape(m, nfss->hostname, " \t\n\\"); 639 seq_escape(m, nfss->hostname, " \t\n\\");
608 return 0; 640 return 0;
@@ -753,7 +785,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
753 else 785 else
754 init_special_inode(inode, inode->i_mode, fattr->rdev); 786 init_special_inode(inode, inode->i_mode, fattr->rdev);
755 787
756 nfsi->read_cache_jiffies = fattr->timestamp; 788 nfsi->read_cache_jiffies = fattr->time_start;
789 nfsi->last_updated = jiffies;
757 inode->i_atime = fattr->atime; 790 inode->i_atime = fattr->atime;
758 inode->i_mtime = fattr->mtime; 791 inode->i_mtime = fattr->mtime;
759 inode->i_ctime = fattr->ctime; 792 inode->i_ctime = fattr->ctime;
@@ -821,6 +854,11 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
821 filemap_fdatawait(inode->i_mapping); 854 filemap_fdatawait(inode->i_mapping);
822 nfs_wb_all(inode); 855 nfs_wb_all(inode);
823 } 856 }
857 /*
858 * Return any delegations if we're going to change ACLs
859 */
860 if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0)
861 nfs_inode_return_delegation(inode);
824 error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr); 862 error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr);
825 if (error == 0) 863 if (error == 0)
826 nfs_refresh_inode(inode, &fattr); 864 nfs_refresh_inode(inode, &fattr);
@@ -1019,15 +1057,11 @@ int nfs_open(struct inode *inode, struct file *filp)
1019 ctx->mode = filp->f_mode; 1057 ctx->mode = filp->f_mode;
1020 nfs_file_set_open_context(filp, ctx); 1058 nfs_file_set_open_context(filp, ctx);
1021 put_nfs_open_context(ctx); 1059 put_nfs_open_context(ctx);
1022 if ((filp->f_mode & FMODE_WRITE) != 0)
1023 nfs_begin_data_update(inode);
1024 return 0; 1060 return 0;
1025} 1061}
1026 1062
1027int nfs_release(struct inode *inode, struct file *filp) 1063int nfs_release(struct inode *inode, struct file *filp)
1028{ 1064{
1029 if ((filp->f_mode & FMODE_WRITE) != 0)
1030 nfs_end_data_update(inode);
1031 nfs_file_clear_open_context(filp); 1065 nfs_file_clear_open_context(filp);
1032 return 0; 1066 return 0;
1033} 1067}
@@ -1083,14 +1117,15 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
1083 goto out; 1117 goto out;
1084 } 1118 }
1085 1119
1120 spin_lock(&inode->i_lock);
1086 status = nfs_update_inode(inode, &fattr, verifier); 1121 status = nfs_update_inode(inode, &fattr, verifier);
1087 if (status) { 1122 if (status) {
1123 spin_unlock(&inode->i_lock);
1088 dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n", 1124 dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n",
1089 inode->i_sb->s_id, 1125 inode->i_sb->s_id,
1090 (long long)NFS_FILEID(inode), status); 1126 (long long)NFS_FILEID(inode), status);
1091 goto out; 1127 goto out;
1092 } 1128 }
1093 spin_lock(&inode->i_lock);
1094 cache_validity = nfsi->cache_validity; 1129 cache_validity = nfsi->cache_validity;
1095 nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE; 1130 nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE;
1096 1131
@@ -1098,7 +1133,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
1098 * We may need to keep the attributes marked as invalid if 1133 * We may need to keep the attributes marked as invalid if
1099 * we raced with nfs_end_attr_update(). 1134 * we raced with nfs_end_attr_update().
1100 */ 1135 */
1101 if (verifier == nfsi->cache_change_attribute) 1136 if (time_after_eq(verifier, nfsi->cache_change_attribute))
1102 nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); 1137 nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME);
1103 spin_unlock(&inode->i_lock); 1138 spin_unlock(&inode->i_lock);
1104 1139
@@ -1165,7 +1200,7 @@ void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
1165 if (S_ISDIR(inode->i_mode)) { 1200 if (S_ISDIR(inode->i_mode)) {
1166 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); 1201 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
1167 /* This ensures we revalidate child dentries */ 1202 /* This ensures we revalidate child dentries */
1168 nfsi->cache_change_attribute++; 1203 nfsi->cache_change_attribute = jiffies;
1169 } 1204 }
1170 spin_unlock(&inode->i_lock); 1205 spin_unlock(&inode->i_lock);
1171 1206
@@ -1197,20 +1232,19 @@ void nfs_end_data_update(struct inode *inode)
1197 struct nfs_inode *nfsi = NFS_I(inode); 1232 struct nfs_inode *nfsi = NFS_I(inode);
1198 1233
1199 if (!nfs_have_delegation(inode, FMODE_READ)) { 1234 if (!nfs_have_delegation(inode, FMODE_READ)) {
1200 /* Mark the attribute cache for revalidation */ 1235 /* Directories and symlinks: invalidate page cache */
1201 spin_lock(&inode->i_lock); 1236 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) {
1202 nfsi->cache_validity |= NFS_INO_INVALID_ATTR; 1237 spin_lock(&inode->i_lock);
1203 /* Directories and symlinks: invalidate page cache too */
1204 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
1205 nfsi->cache_validity |= NFS_INO_INVALID_DATA; 1238 nfsi->cache_validity |= NFS_INO_INVALID_DATA;
1206 spin_unlock(&inode->i_lock); 1239 spin_unlock(&inode->i_lock);
1240 }
1207 } 1241 }
1208 nfsi->cache_change_attribute ++; 1242 nfsi->cache_change_attribute = jiffies;
1209 atomic_dec(&nfsi->data_updates); 1243 atomic_dec(&nfsi->data_updates);
1210} 1244}
1211 1245
1212/** 1246/**
1213 * nfs_refresh_inode - verify consistency of the inode attribute cache 1247 * nfs_check_inode_attributes - verify consistency of the inode attribute cache
1214 * @inode - pointer to inode 1248 * @inode - pointer to inode
1215 * @fattr - updated attributes 1249 * @fattr - updated attributes
1216 * 1250 *
@@ -1218,13 +1252,12 @@ void nfs_end_data_update(struct inode *inode)
1218 * so that fattr carries weak cache consistency data, then it may 1252 * so that fattr carries weak cache consistency data, then it may
1219 * also update the ctime/mtime/change_attribute. 1253 * also update the ctime/mtime/change_attribute.
1220 */ 1254 */
1221int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) 1255static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fattr)
1222{ 1256{
1223 struct nfs_inode *nfsi = NFS_I(inode); 1257 struct nfs_inode *nfsi = NFS_I(inode);
1224 loff_t cur_size, new_isize; 1258 loff_t cur_size, new_isize;
1225 int data_unstable; 1259 int data_unstable;
1226 1260
1227 spin_lock(&inode->i_lock);
1228 1261
1229 /* Are we in the process of updating data on the server? */ 1262 /* Are we in the process of updating data on the server? */
1230 data_unstable = nfs_caches_unstable(inode); 1263 data_unstable = nfs_caches_unstable(inode);
@@ -1288,11 +1321,67 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
1288 if (!timespec_equal(&inode->i_atime, &fattr->atime)) 1321 if (!timespec_equal(&inode->i_atime, &fattr->atime))
1289 nfsi->cache_validity |= NFS_INO_INVALID_ATIME; 1322 nfsi->cache_validity |= NFS_INO_INVALID_ATIME;
1290 1323
1291 nfsi->read_cache_jiffies = fattr->timestamp; 1324 nfsi->read_cache_jiffies = fattr->time_start;
1292 spin_unlock(&inode->i_lock);
1293 return 0; 1325 return 0;
1294} 1326}
1295 1327
1328/**
1329 * nfs_refresh_inode - try to update the inode attribute cache
1330 * @inode - pointer to inode
1331 * @fattr - updated attributes
1332 *
1333 * Check that an RPC call that returned attributes has not overlapped with
1334 * other recent updates of the inode metadata, then decide whether it is
1335 * safe to do a full update of the inode attributes, or whether just to
1336 * call nfs_check_inode_attributes.
1337 */
1338int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
1339{
1340 struct nfs_inode *nfsi = NFS_I(inode);
1341 int status;
1342
1343 if ((fattr->valid & NFS_ATTR_FATTR) == 0)
1344 return 0;
1345 spin_lock(&inode->i_lock);
1346 nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE;
1347 if (nfs_verify_change_attribute(inode, fattr->time_start))
1348 nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME);
1349 if (time_after(fattr->time_start, nfsi->last_updated))
1350 status = nfs_update_inode(inode, fattr, fattr->time_start);
1351 else
1352 status = nfs_check_inode_attributes(inode, fattr);
1353
1354 spin_unlock(&inode->i_lock);
1355 return status;
1356}
1357
1358/**
1359 * nfs_post_op_update_inode - try to update the inode attribute cache
1360 * @inode - pointer to inode
1361 * @fattr - updated attributes
1362 *
1363 * After an operation that has changed the inode metadata, mark the
1364 * attribute cache as being invalid, then try to update it.
1365 */
1366int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1367{
1368 struct nfs_inode *nfsi = NFS_I(inode);
1369 int status = 0;
1370
1371 spin_lock(&inode->i_lock);
1372 if (unlikely((fattr->valid & NFS_ATTR_FATTR) == 0)) {
1373 nfsi->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS;
1374 goto out;
1375 }
1376 status = nfs_update_inode(inode, fattr, fattr->time_start);
1377 if (time_after_eq(fattr->time_start, nfsi->cache_change_attribute))
1378 nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME|NFS_INO_REVAL_PAGECACHE);
1379 nfsi->cache_change_attribute = jiffies;
1380out:
1381 spin_unlock(&inode->i_lock);
1382 return status;
1383}
1384
1296/* 1385/*
1297 * Many nfs protocol calls return the new file attributes after 1386 * Many nfs protocol calls return the new file attributes after
1298 * an operation. Here we update the inode to reflect the state 1387 * an operation. Here we update the inode to reflect the state
@@ -1328,20 +1417,17 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign
1328 goto out_err; 1417 goto out_err;
1329 } 1418 }
1330 1419
1331 spin_lock(&inode->i_lock);
1332
1333 /* 1420 /*
1334 * Make sure the inode's type hasn't changed. 1421 * Make sure the inode's type hasn't changed.
1335 */ 1422 */
1336 if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) { 1423 if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
1337 spin_unlock(&inode->i_lock);
1338 goto out_changed; 1424 goto out_changed;
1339 }
1340 1425
1341 /* 1426 /*
1342 * Update the read time so we don't revalidate too often. 1427 * Update the read time so we don't revalidate too often.
1343 */ 1428 */
1344 nfsi->read_cache_jiffies = fattr->timestamp; 1429 nfsi->read_cache_jiffies = fattr->time_start;
1430 nfsi->last_updated = jiffies;
1345 1431
1346 /* Are we racing with known updates of the metadata on the server? */ 1432 /* Are we racing with known updates of the metadata on the server? */
1347 data_unstable = ! (nfs_verify_change_attribute(inode, verifier) || 1433 data_unstable = ! (nfs_verify_change_attribute(inode, verifier) ||
@@ -1354,7 +1440,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign
1354 /* Do we perhaps have any outstanding writes? */ 1440 /* Do we perhaps have any outstanding writes? */
1355 if (nfsi->npages == 0) { 1441 if (nfsi->npages == 0) {
1356 /* No, but did we race with nfs_end_data_update()? */ 1442 /* No, but did we race with nfs_end_data_update()? */
1357 if (verifier == nfsi->cache_change_attribute) { 1443 if (time_after_eq(verifier, nfsi->cache_change_attribute)) {
1358 inode->i_size = new_isize; 1444 inode->i_size = new_isize;
1359 invalid |= NFS_INO_INVALID_DATA; 1445 invalid |= NFS_INO_INVALID_DATA;
1360 } 1446 }
@@ -1430,7 +1516,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign
1430 if (!nfs_have_delegation(inode, FMODE_READ)) 1516 if (!nfs_have_delegation(inode, FMODE_READ))
1431 nfsi->cache_validity |= invalid; 1517 nfsi->cache_validity |= invalid;
1432 1518
1433 spin_unlock(&inode->i_lock);
1434 return 0; 1519 return 0;
1435 out_changed: 1520 out_changed:
1436 /* 1521 /*
@@ -1639,8 +1724,7 @@ static void nfs4_clear_inode(struct inode *inode)
1639 struct nfs_inode *nfsi = NFS_I(inode); 1724 struct nfs_inode *nfsi = NFS_I(inode);
1640 1725
1641 /* If we are holding a delegation, return it! */ 1726 /* If we are holding a delegation, return it! */
1642 if (nfsi->delegation != NULL) 1727 nfs_inode_return_delegation(inode);
1643 nfs_inode_return_delegation(inode);
1644 /* First call standard NFS clear_inode() code */ 1728 /* First call standard NFS clear_inode() code */
1645 nfs_clear_inode(inode); 1729 nfs_clear_inode(inode);
1646 /* Now clear out any remaining state */ 1730 /* Now clear out any remaining state */
@@ -1669,7 +1753,7 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
1669 struct rpc_clnt *clnt = NULL; 1753 struct rpc_clnt *clnt = NULL;
1670 struct rpc_timeout timeparms; 1754 struct rpc_timeout timeparms;
1671 rpc_authflavor_t authflavour; 1755 rpc_authflavor_t authflavour;
1672 int proto, err = -EIO; 1756 int err = -EIO;
1673 1757
1674 sb->s_blocksize_bits = 0; 1758 sb->s_blocksize_bits = 0;
1675 sb->s_blocksize = 0; 1759 sb->s_blocksize = 0;
@@ -1687,30 +1771,8 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
1687 server->acdirmax = data->acdirmax*HZ; 1771 server->acdirmax = data->acdirmax*HZ;
1688 1772
1689 server->rpc_ops = &nfs_v4_clientops; 1773 server->rpc_ops = &nfs_v4_clientops;
1690 /* Initialize timeout values */
1691
1692 timeparms.to_initval = data->timeo * HZ / 10;
1693 timeparms.to_retries = data->retrans;
1694 timeparms.to_exponential = 1;
1695 if (!timeparms.to_retries)
1696 timeparms.to_retries = 5;
1697 1774
1698 proto = data->proto; 1775 nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans);
1699 /* Which IP protocol do we use? */
1700 switch (proto) {
1701 case IPPROTO_TCP:
1702 timeparms.to_maxval = RPC_MAX_TCP_TIMEOUT;
1703 if (!timeparms.to_initval)
1704 timeparms.to_initval = 600 * HZ / 10;
1705 break;
1706 case IPPROTO_UDP:
1707 timeparms.to_maxval = RPC_MAX_UDP_TIMEOUT;
1708 if (!timeparms.to_initval)
1709 timeparms.to_initval = 11 * HZ / 10;
1710 break;
1711 default:
1712 return -EINVAL;
1713 }
1714 1776
1715 clp = nfs4_get_client(&server->addr.sin_addr); 1777 clp = nfs4_get_client(&server->addr.sin_addr);
1716 if (!clp) { 1778 if (!clp) {
@@ -1735,7 +1797,7 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
1735 1797
1736 down_write(&clp->cl_sem); 1798 down_write(&clp->cl_sem);
1737 if (IS_ERR(clp->cl_rpcclient)) { 1799 if (IS_ERR(clp->cl_rpcclient)) {
1738 xprt = xprt_create_proto(proto, &server->addr, &timeparms); 1800 xprt = xprt_create_proto(data->proto, &server->addr, &timeparms);
1739 if (IS_ERR(xprt)) { 1801 if (IS_ERR(xprt)) {
1740 up_write(&clp->cl_sem); 1802 up_write(&clp->cl_sem);
1741 err = PTR_ERR(xprt); 1803 err = PTR_ERR(xprt);
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index d91b69044a4d..59049e864ca7 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -143,7 +143,6 @@ xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr)
143 fattr->mode = (fattr->mode & ~S_IFMT) | S_IFIFO; 143 fattr->mode = (fattr->mode & ~S_IFMT) | S_IFIFO;
144 fattr->rdev = 0; 144 fattr->rdev = 0;
145 } 145 }
146 fattr->timestamp = jiffies;
147 return p; 146 return p;
148} 147}
149 148
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index edc95514046d..92c870d19ccd 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -78,7 +78,7 @@ nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
78 int status; 78 int status;
79 79
80 dprintk("%s: call fsinfo\n", __FUNCTION__); 80 dprintk("%s: call fsinfo\n", __FUNCTION__);
81 info->fattr->valid = 0; 81 nfs_fattr_init(info->fattr);
82 status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0); 82 status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0);
83 dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status); 83 dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status);
84 if (!(info->fattr->valid & NFS_ATTR_FATTR)) { 84 if (!(info->fattr->valid & NFS_ATTR_FATTR)) {
@@ -98,7 +98,7 @@ nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
98 int status; 98 int status;
99 99
100 dprintk("NFS call getattr\n"); 100 dprintk("NFS call getattr\n");
101 fattr->valid = 0; 101 nfs_fattr_init(fattr);
102 status = rpc_call(server->client, NFS3PROC_GETATTR, 102 status = rpc_call(server->client, NFS3PROC_GETATTR,
103 fhandle, fattr, 0); 103 fhandle, fattr, 0);
104 dprintk("NFS reply getattr: %d\n", status); 104 dprintk("NFS reply getattr: %d\n", status);
@@ -117,7 +117,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
117 int status; 117 int status;
118 118
119 dprintk("NFS call setattr\n"); 119 dprintk("NFS call setattr\n");
120 fattr->valid = 0; 120 nfs_fattr_init(fattr);
121 status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0); 121 status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0);
122 if (status == 0) 122 if (status == 0)
123 nfs_setattr_update_inode(inode, sattr); 123 nfs_setattr_update_inode(inode, sattr);
@@ -143,8 +143,8 @@ nfs3_proc_lookup(struct inode *dir, struct qstr *name,
143 int status; 143 int status;
144 144
145 dprintk("NFS call lookup %s\n", name->name); 145 dprintk("NFS call lookup %s\n", name->name);
146 dir_attr.valid = 0; 146 nfs_fattr_init(&dir_attr);
147 fattr->valid = 0; 147 nfs_fattr_init(fattr);
148 status = rpc_call(NFS_CLIENT(dir), NFS3PROC_LOOKUP, &arg, &res, 0); 148 status = rpc_call(NFS_CLIENT(dir), NFS3PROC_LOOKUP, &arg, &res, 0);
149 if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) 149 if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR))
150 status = rpc_call(NFS_CLIENT(dir), NFS3PROC_GETATTR, 150 status = rpc_call(NFS_CLIENT(dir), NFS3PROC_GETATTR,
@@ -174,7 +174,6 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
174 int status; 174 int status;
175 175
176 dprintk("NFS call access\n"); 176 dprintk("NFS call access\n");
177 fattr.valid = 0;
178 177
179 if (mode & MAY_READ) 178 if (mode & MAY_READ)
180 arg.access |= NFS3_ACCESS_READ; 179 arg.access |= NFS3_ACCESS_READ;
@@ -189,6 +188,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
189 if (mode & MAY_EXEC) 188 if (mode & MAY_EXEC)
190 arg.access |= NFS3_ACCESS_EXECUTE; 189 arg.access |= NFS3_ACCESS_EXECUTE;
191 } 190 }
191 nfs_fattr_init(&fattr);
192 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); 192 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
193 nfs_refresh_inode(inode, &fattr); 193 nfs_refresh_inode(inode, &fattr);
194 if (status == 0) { 194 if (status == 0) {
@@ -217,7 +217,7 @@ static int nfs3_proc_readlink(struct inode *inode, struct page *page,
217 int status; 217 int status;
218 218
219 dprintk("NFS call readlink\n"); 219 dprintk("NFS call readlink\n");
220 fattr.valid = 0; 220 nfs_fattr_init(&fattr);
221 status = rpc_call(NFS_CLIENT(inode), NFS3PROC_READLINK, 221 status = rpc_call(NFS_CLIENT(inode), NFS3PROC_READLINK,
222 &args, &fattr, 0); 222 &args, &fattr, 0);
223 nfs_refresh_inode(inode, &fattr); 223 nfs_refresh_inode(inode, &fattr);
@@ -240,7 +240,7 @@ static int nfs3_proc_read(struct nfs_read_data *rdata)
240 240
241 dprintk("NFS call read %d @ %Ld\n", rdata->args.count, 241 dprintk("NFS call read %d @ %Ld\n", rdata->args.count,
242 (long long) rdata->args.offset); 242 (long long) rdata->args.offset);
243 fattr->valid = 0; 243 nfs_fattr_init(fattr);
244 status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags); 244 status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags);
245 if (status >= 0) 245 if (status >= 0)
246 nfs_refresh_inode(inode, fattr); 246 nfs_refresh_inode(inode, fattr);
@@ -263,10 +263,10 @@ static int nfs3_proc_write(struct nfs_write_data *wdata)
263 263
264 dprintk("NFS call write %d @ %Ld\n", wdata->args.count, 264 dprintk("NFS call write %d @ %Ld\n", wdata->args.count,
265 (long long) wdata->args.offset); 265 (long long) wdata->args.offset);
266 fattr->valid = 0; 266 nfs_fattr_init(fattr);
267 status = rpc_call_sync(NFS_CLIENT(inode), &msg, rpcflags); 267 status = rpc_call_sync(NFS_CLIENT(inode), &msg, rpcflags);
268 if (status >= 0) 268 if (status >= 0)
269 nfs_refresh_inode(inode, fattr); 269 nfs_post_op_update_inode(inode, fattr);
270 dprintk("NFS reply write: %d\n", status); 270 dprintk("NFS reply write: %d\n", status);
271 return status < 0? status : wdata->res.count; 271 return status < 0? status : wdata->res.count;
272} 272}
@@ -285,10 +285,10 @@ static int nfs3_proc_commit(struct nfs_write_data *cdata)
285 285
286 dprintk("NFS call commit %d @ %Ld\n", cdata->args.count, 286 dprintk("NFS call commit %d @ %Ld\n", cdata->args.count,
287 (long long) cdata->args.offset); 287 (long long) cdata->args.offset);
288 fattr->valid = 0; 288 nfs_fattr_init(fattr);
289 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); 289 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
290 if (status >= 0) 290 if (status >= 0)
291 nfs_refresh_inode(inode, fattr); 291 nfs_post_op_update_inode(inode, fattr);
292 dprintk("NFS reply commit: %d\n", status); 292 dprintk("NFS reply commit: %d\n", status);
293 return status; 293 return status;
294} 294}
@@ -299,7 +299,7 @@ static int nfs3_proc_commit(struct nfs_write_data *cdata)
299 */ 299 */
300static int 300static int
301nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, 301nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
302 int flags) 302 int flags, struct nameidata *nd)
303{ 303{
304 struct nfs_fh fhandle; 304 struct nfs_fh fhandle;
305 struct nfs_fattr fattr; 305 struct nfs_fattr fattr;
@@ -329,10 +329,10 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
329 sattr->ia_mode &= ~current->fs->umask; 329 sattr->ia_mode &= ~current->fs->umask;
330 330
331again: 331again:
332 dir_attr.valid = 0; 332 nfs_fattr_init(&dir_attr);
333 fattr.valid = 0; 333 nfs_fattr_init(&fattr);
334 status = rpc_call(NFS_CLIENT(dir), NFS3PROC_CREATE, &arg, &res, 0); 334 status = rpc_call(NFS_CLIENT(dir), NFS3PROC_CREATE, &arg, &res, 0);
335 nfs_refresh_inode(dir, &dir_attr); 335 nfs_post_op_update_inode(dir, &dir_attr);
336 336
337 /* If the server doesn't support the exclusive creation semantics, 337 /* If the server doesn't support the exclusive creation semantics,
338 * try again with simple 'guarded' mode. */ 338 * try again with simple 'guarded' mode. */
@@ -401,9 +401,9 @@ nfs3_proc_remove(struct inode *dir, struct qstr *name)
401 int status; 401 int status;
402 402
403 dprintk("NFS call remove %s\n", name->name); 403 dprintk("NFS call remove %s\n", name->name);
404 dir_attr.valid = 0; 404 nfs_fattr_init(&dir_attr);
405 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 405 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
406 nfs_refresh_inode(dir, &dir_attr); 406 nfs_post_op_update_inode(dir, &dir_attr);
407 dprintk("NFS reply remove: %d\n", status); 407 dprintk("NFS reply remove: %d\n", status);
408 return status; 408 return status;
409} 409}
@@ -422,7 +422,7 @@ nfs3_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir, struct qstr
422 ptr->arg.fh = NFS_FH(dir->d_inode); 422 ptr->arg.fh = NFS_FH(dir->d_inode);
423 ptr->arg.name = name->name; 423 ptr->arg.name = name->name;
424 ptr->arg.len = name->len; 424 ptr->arg.len = name->len;
425 ptr->res.valid = 0; 425 nfs_fattr_init(&ptr->res);
426 msg->rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE]; 426 msg->rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE];
427 msg->rpc_argp = &ptr->arg; 427 msg->rpc_argp = &ptr->arg;
428 msg->rpc_resp = &ptr->res; 428 msg->rpc_resp = &ptr->res;
@@ -439,7 +439,7 @@ nfs3_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
439 return 1; 439 return 1;
440 if (msg->rpc_argp) { 440 if (msg->rpc_argp) {
441 dir_attr = (struct nfs_fattr*)msg->rpc_resp; 441 dir_attr = (struct nfs_fattr*)msg->rpc_resp;
442 nfs_refresh_inode(dir->d_inode, dir_attr); 442 nfs_post_op_update_inode(dir->d_inode, dir_attr);
443 kfree(msg->rpc_argp); 443 kfree(msg->rpc_argp);
444 } 444 }
445 return 0; 445 return 0;
@@ -465,11 +465,11 @@ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
465 int status; 465 int status;
466 466
467 dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); 467 dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name);
468 old_dir_attr.valid = 0; 468 nfs_fattr_init(&old_dir_attr);
469 new_dir_attr.valid = 0; 469 nfs_fattr_init(&new_dir_attr);
470 status = rpc_call(NFS_CLIENT(old_dir), NFS3PROC_RENAME, &arg, &res, 0); 470 status = rpc_call(NFS_CLIENT(old_dir), NFS3PROC_RENAME, &arg, &res, 0);
471 nfs_refresh_inode(old_dir, &old_dir_attr); 471 nfs_post_op_update_inode(old_dir, &old_dir_attr);
472 nfs_refresh_inode(new_dir, &new_dir_attr); 472 nfs_post_op_update_inode(new_dir, &new_dir_attr);
473 dprintk("NFS reply rename: %d\n", status); 473 dprintk("NFS reply rename: %d\n", status);
474 return status; 474 return status;
475} 475}
@@ -491,11 +491,11 @@ nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
491 int status; 491 int status;
492 492
493 dprintk("NFS call link %s\n", name->name); 493 dprintk("NFS call link %s\n", name->name);
494 dir_attr.valid = 0; 494 nfs_fattr_init(&dir_attr);
495 fattr.valid = 0; 495 nfs_fattr_init(&fattr);
496 status = rpc_call(NFS_CLIENT(inode), NFS3PROC_LINK, &arg, &res, 0); 496 status = rpc_call(NFS_CLIENT(inode), NFS3PROC_LINK, &arg, &res, 0);
497 nfs_refresh_inode(dir, &dir_attr); 497 nfs_post_op_update_inode(dir, &dir_attr);
498 nfs_refresh_inode(inode, &fattr); 498 nfs_post_op_update_inode(inode, &fattr);
499 dprintk("NFS reply link: %d\n", status); 499 dprintk("NFS reply link: %d\n", status);
500 return status; 500 return status;
501} 501}
@@ -524,10 +524,10 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
524 if (path->len > NFS3_MAXPATHLEN) 524 if (path->len > NFS3_MAXPATHLEN)
525 return -ENAMETOOLONG; 525 return -ENAMETOOLONG;
526 dprintk("NFS call symlink %s -> %s\n", name->name, path->name); 526 dprintk("NFS call symlink %s -> %s\n", name->name, path->name);
527 dir_attr.valid = 0; 527 nfs_fattr_init(&dir_attr);
528 fattr->valid = 0; 528 nfs_fattr_init(fattr);
529 status = rpc_call(NFS_CLIENT(dir), NFS3PROC_SYMLINK, &arg, &res, 0); 529 status = rpc_call(NFS_CLIENT(dir), NFS3PROC_SYMLINK, &arg, &res, 0);
530 nfs_refresh_inode(dir, &dir_attr); 530 nfs_post_op_update_inode(dir, &dir_attr);
531 dprintk("NFS reply symlink: %d\n", status); 531 dprintk("NFS reply symlink: %d\n", status);
532 return status; 532 return status;
533} 533}
@@ -552,13 +552,13 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
552 int status; 552 int status;
553 553
554 dprintk("NFS call mkdir %s\n", dentry->d_name.name); 554 dprintk("NFS call mkdir %s\n", dentry->d_name.name);
555 dir_attr.valid = 0;
556 fattr.valid = 0;
557 555
558 sattr->ia_mode &= ~current->fs->umask; 556 sattr->ia_mode &= ~current->fs->umask;
559 557
558 nfs_fattr_init(&dir_attr);
559 nfs_fattr_init(&fattr);
560 status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0); 560 status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0);
561 nfs_refresh_inode(dir, &dir_attr); 561 nfs_post_op_update_inode(dir, &dir_attr);
562 if (status != 0) 562 if (status != 0)
563 goto out; 563 goto out;
564 status = nfs_instantiate(dentry, &fhandle, &fattr); 564 status = nfs_instantiate(dentry, &fhandle, &fattr);
@@ -582,9 +582,9 @@ nfs3_proc_rmdir(struct inode *dir, struct qstr *name)
582 int status; 582 int status;
583 583
584 dprintk("NFS call rmdir %s\n", name->name); 584 dprintk("NFS call rmdir %s\n", name->name);
585 dir_attr.valid = 0; 585 nfs_fattr_init(&dir_attr);
586 status = rpc_call(NFS_CLIENT(dir), NFS3PROC_RMDIR, &arg, &dir_attr, 0); 586 status = rpc_call(NFS_CLIENT(dir), NFS3PROC_RMDIR, &arg, &dir_attr, 0);
587 nfs_refresh_inode(dir, &dir_attr); 587 nfs_post_op_update_inode(dir, &dir_attr);
588 dprintk("NFS reply rmdir: %d\n", status); 588 dprintk("NFS reply rmdir: %d\n", status);
589 return status; 589 return status;
590} 590}
@@ -634,7 +634,7 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
634 dprintk("NFS call readdir%s %d\n", 634 dprintk("NFS call readdir%s %d\n",
635 plus? "plus" : "", (unsigned int) cookie); 635 plus? "plus" : "", (unsigned int) cookie);
636 636
637 dir_attr.valid = 0; 637 nfs_fattr_init(&dir_attr);
638 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 638 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
639 nfs_refresh_inode(dir, &dir_attr); 639 nfs_refresh_inode(dir, &dir_attr);
640 dprintk("NFS reply readdir: %d\n", status); 640 dprintk("NFS reply readdir: %d\n", status);
@@ -676,10 +676,10 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
676 676
677 sattr->ia_mode &= ~current->fs->umask; 677 sattr->ia_mode &= ~current->fs->umask;
678 678
679 dir_attr.valid = 0; 679 nfs_fattr_init(&dir_attr);
680 fattr.valid = 0; 680 nfs_fattr_init(&fattr);
681 status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0); 681 status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0);
682 nfs_refresh_inode(dir, &dir_attr); 682 nfs_post_op_update_inode(dir, &dir_attr);
683 if (status != 0) 683 if (status != 0)
684 goto out; 684 goto out;
685 status = nfs_instantiate(dentry, &fh, &fattr); 685 status = nfs_instantiate(dentry, &fh, &fattr);
@@ -698,7 +698,7 @@ nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
698 int status; 698 int status;
699 699
700 dprintk("NFS call fsstat\n"); 700 dprintk("NFS call fsstat\n");
701 stat->fattr->valid = 0; 701 nfs_fattr_init(stat->fattr);
702 status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, stat, 0); 702 status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, stat, 0);
703 dprintk("NFS reply statfs: %d\n", status); 703 dprintk("NFS reply statfs: %d\n", status);
704 return status; 704 return status;
@@ -711,7 +711,7 @@ nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
711 int status; 711 int status;
712 712
713 dprintk("NFS call fsinfo\n"); 713 dprintk("NFS call fsinfo\n");
714 info->fattr->valid = 0; 714 nfs_fattr_init(info->fattr);
715 status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0); 715 status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0);
716 dprintk("NFS reply fsinfo: %d\n", status); 716 dprintk("NFS reply fsinfo: %d\n", status);
717 return status; 717 return status;
@@ -724,7 +724,7 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
724 int status; 724 int status;
725 725
726 dprintk("NFS call pathconf\n"); 726 dprintk("NFS call pathconf\n");
727 info->fattr->valid = 0; 727 nfs_fattr_init(info->fattr);
728 status = rpc_call(server->client, NFS3PROC_PATHCONF, fhandle, info, 0); 728 status = rpc_call(server->client, NFS3PROC_PATHCONF, fhandle, info, 0);
729 dprintk("NFS reply pathconf: %d\n", status); 729 dprintk("NFS reply pathconf: %d\n", status);
730 return status; 730 return status;
@@ -735,7 +735,7 @@ extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int);
735static void 735static void
736nfs3_read_done(struct rpc_task *task) 736nfs3_read_done(struct rpc_task *task)
737{ 737{
738 struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; 738 struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata;
739 739
740 if (nfs3_async_handle_jukebox(task)) 740 if (nfs3_async_handle_jukebox(task))
741 return; 741 return;
@@ -775,7 +775,7 @@ nfs3_write_done(struct rpc_task *task)
775 return; 775 return;
776 data = (struct nfs_write_data *)task->tk_calldata; 776 data = (struct nfs_write_data *)task->tk_calldata;
777 if (task->tk_status >= 0) 777 if (task->tk_status >= 0)
778 nfs_refresh_inode(data->inode, data->res.fattr); 778 nfs_post_op_update_inode(data->inode, data->res.fattr);
779 nfs_writeback_done(task); 779 nfs_writeback_done(task);
780} 780}
781 781
@@ -819,7 +819,7 @@ nfs3_commit_done(struct rpc_task *task)
819 return; 819 return;
820 data = (struct nfs_write_data *)task->tk_calldata; 820 data = (struct nfs_write_data *)task->tk_calldata;
821 if (task->tk_status >= 0) 821 if (task->tk_status >= 0)
822 nfs_refresh_inode(data->inode, data->res.fattr); 822 nfs_post_op_update_inode(data->inode, data->res.fattr);
823 nfs_commit_done(task); 823 nfs_commit_done(task);
824} 824}
825 825
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index db4a904810a4..0498bd36602c 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -174,7 +174,6 @@ xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr)
174 174
175 /* Update the mode bits */ 175 /* Update the mode bits */
176 fattr->valid |= (NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3); 176 fattr->valid |= (NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3);
177 fattr->timestamp = jiffies;
178 return p; 177 return p;
179} 178}
180 179
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index ec1a22d7b876..78a53f5a9f18 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -93,25 +93,50 @@ struct nfs4_client {
93}; 93};
94 94
95/* 95/*
96 * struct rpc_sequence ensures that RPC calls are sent in the exact
97 * order that they appear on the list.
98 */
99struct rpc_sequence {
100 struct rpc_wait_queue wait; /* RPC call delay queue */
101 spinlock_t lock; /* Protects the list */
102 struct list_head list; /* Defines sequence of RPC calls */
103};
104
105#define NFS_SEQID_CONFIRMED 1
106struct nfs_seqid_counter {
107 struct rpc_sequence *sequence;
108 int flags;
109 u32 counter;
110};
111
112struct nfs_seqid {
113 struct nfs_seqid_counter *sequence;
114 struct list_head list;
115};
116
117static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status)
118{
119 if (seqid_mutating_err(-status))
120 seqid->flags |= NFS_SEQID_CONFIRMED;
121}
122
123/*
96 * NFS4 state_owners and lock_owners are simply labels for ordered 124 * NFS4 state_owners and lock_owners are simply labels for ordered
97 * sequences of RPC calls. Their sole purpose is to provide once-only 125 * sequences of RPC calls. Their sole purpose is to provide once-only
98 * semantics by allowing the server to identify replayed requests. 126 * semantics by allowing the server to identify replayed requests.
99 *
100 * The ->so_sema is held during all state_owner seqid-mutating operations:
101 * OPEN, OPEN_DOWNGRADE, and CLOSE. Its purpose is to properly serialize
102 * so_seqid.
103 */ 127 */
104struct nfs4_state_owner { 128struct nfs4_state_owner {
129 spinlock_t so_lock;
105 struct list_head so_list; /* per-clientid list of state_owners */ 130 struct list_head so_list; /* per-clientid list of state_owners */
106 struct nfs4_client *so_client; 131 struct nfs4_client *so_client;
107 u32 so_id; /* 32-bit identifier, unique */ 132 u32 so_id; /* 32-bit identifier, unique */
108 struct semaphore so_sema;
109 u32 so_seqid; /* protected by so_sema */
110 atomic_t so_count; 133 atomic_t so_count;
111 134
112 struct rpc_cred *so_cred; /* Associated cred */ 135 struct rpc_cred *so_cred; /* Associated cred */
113 struct list_head so_states; 136 struct list_head so_states;
114 struct list_head so_delegations; 137 struct list_head so_delegations;
138 struct nfs_seqid_counter so_seqid;
139 struct rpc_sequence so_sequence;
115}; 140};
116 141
117/* 142/*
@@ -132,7 +157,7 @@ struct nfs4_lock_state {
132 fl_owner_t ls_owner; /* POSIX lock owner */ 157 fl_owner_t ls_owner; /* POSIX lock owner */
133#define NFS_LOCK_INITIALIZED 1 158#define NFS_LOCK_INITIALIZED 1
134 int ls_flags; 159 int ls_flags;
135 u32 ls_seqid; 160 struct nfs_seqid_counter ls_seqid;
136 u32 ls_id; 161 u32 ls_id;
137 nfs4_stateid ls_stateid; 162 nfs4_stateid ls_stateid;
138 atomic_t ls_count; 163 atomic_t ls_count;
@@ -153,7 +178,6 @@ struct nfs4_state {
153 struct inode *inode; /* Pointer to the inode */ 178 struct inode *inode; /* Pointer to the inode */
154 179
155 unsigned long flags; /* Do we hold any locks? */ 180 unsigned long flags; /* Do we hold any locks? */
156 struct semaphore lock_sema; /* Serializes file locking operations */
157 spinlock_t state_lock; /* Protects the lock_states list */ 181 spinlock_t state_lock; /* Protects the lock_states list */
158 182
159 nfs4_stateid stateid; 183 nfs4_stateid stateid;
@@ -191,8 +215,8 @@ extern int nfs4_proc_setclientid_confirm(struct nfs4_client *);
191extern int nfs4_proc_async_renew(struct nfs4_client *); 215extern int nfs4_proc_async_renew(struct nfs4_client *);
192extern int nfs4_proc_renew(struct nfs4_client *); 216extern int nfs4_proc_renew(struct nfs4_client *);
193extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode); 217extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode);
194extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); 218extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
195extern int nfs4_open_revalidate(struct inode *, struct dentry *, int); 219extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
196 220
197extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops; 221extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops;
198extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops; 222extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops;
@@ -224,12 +248,17 @@ extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state
224extern void nfs4_put_open_state(struct nfs4_state *); 248extern void nfs4_put_open_state(struct nfs4_state *);
225extern void nfs4_close_state(struct nfs4_state *, mode_t); 249extern void nfs4_close_state(struct nfs4_state *, mode_t);
226extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode); 250extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode);
227extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp);
228extern void nfs4_schedule_state_recovery(struct nfs4_client *); 251extern void nfs4_schedule_state_recovery(struct nfs4_client *);
252extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
229extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); 253extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
230extern void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *ls);
231extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); 254extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
232 255
256extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter);
257extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task);
258extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid);
259extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid);
260extern void nfs_free_seqid(struct nfs_seqid *seqid);
261
233extern const nfs4_stateid zero_stateid; 262extern const nfs4_stateid zero_stateid;
234 263
235/* nfs4xdr.c */ 264/* nfs4xdr.c */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 9701ca8c9428..933e13b383f8 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -47,6 +47,7 @@
47#include <linux/nfs_page.h> 47#include <linux/nfs_page.h>
48#include <linux/smp_lock.h> 48#include <linux/smp_lock.h>
49#include <linux/namei.h> 49#include <linux/namei.h>
50#include <linux/mount.h>
50 51
51#include "nfs4_fs.h" 52#include "nfs4_fs.h"
52#include "delegation.h" 53#include "delegation.h"
@@ -56,10 +57,11 @@
56#define NFS4_POLL_RETRY_MIN (1*HZ) 57#define NFS4_POLL_RETRY_MIN (1*HZ)
57#define NFS4_POLL_RETRY_MAX (15*HZ) 58#define NFS4_POLL_RETRY_MAX (15*HZ)
58 59
60static int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid);
59static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); 61static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
60static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *); 62static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *);
61static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); 63static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry);
62static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception); 64static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
63extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus); 65extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus);
64extern struct rpc_procinfo nfs4_procedures[]; 66extern struct rpc_procinfo nfs4_procedures[];
65 67
@@ -185,8 +187,26 @@ static void update_changeattr(struct inode *inode, struct nfs4_change_info *cinf
185{ 187{
186 struct nfs_inode *nfsi = NFS_I(inode); 188 struct nfs_inode *nfsi = NFS_I(inode);
187 189
190 spin_lock(&inode->i_lock);
191 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
188 if (cinfo->before == nfsi->change_attr && cinfo->atomic) 192 if (cinfo->before == nfsi->change_attr && cinfo->atomic)
189 nfsi->change_attr = cinfo->after; 193 nfsi->change_attr = cinfo->after;
194 spin_unlock(&inode->i_lock);
195}
196
197/* Helper for asynchronous RPC calls */
198static int nfs4_call_async(struct rpc_clnt *clnt, rpc_action tk_begin,
199 rpc_action tk_exit, void *calldata)
200{
201 struct rpc_task *task;
202
203 if (!(task = rpc_new_task(clnt, tk_exit, RPC_TASK_ASYNC)))
204 return -ENOMEM;
205
206 task->tk_calldata = calldata;
207 task->tk_action = tk_begin;
208 rpc_execute(task);
209 return 0;
190} 210}
191 211
192static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags) 212static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
@@ -195,6 +215,7 @@ static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid,
195 215
196 open_flags &= (FMODE_READ|FMODE_WRITE); 216 open_flags &= (FMODE_READ|FMODE_WRITE);
197 /* Protect against nfs4_find_state() */ 217 /* Protect against nfs4_find_state() */
218 spin_lock(&state->owner->so_lock);
198 spin_lock(&inode->i_lock); 219 spin_lock(&inode->i_lock);
199 state->state |= open_flags; 220 state->state |= open_flags;
200 /* NB! List reordering - see the reclaim code for why. */ 221 /* NB! List reordering - see the reclaim code for why. */
@@ -204,12 +225,12 @@ static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid,
204 state->nreaders++; 225 state->nreaders++;
205 memcpy(&state->stateid, stateid, sizeof(state->stateid)); 226 memcpy(&state->stateid, stateid, sizeof(state->stateid));
206 spin_unlock(&inode->i_lock); 227 spin_unlock(&inode->i_lock);
228 spin_unlock(&state->owner->so_lock);
207} 229}
208 230
209/* 231/*
210 * OPEN_RECLAIM: 232 * OPEN_RECLAIM:
211 * reclaim state on the server after a reboot. 233 * reclaim state on the server after a reboot.
212 * Assumes caller is holding the sp->so_sem
213 */ 234 */
214static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state) 235static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state)
215{ 236{
@@ -218,7 +239,6 @@ static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *st
218 struct nfs_delegation *delegation = NFS_I(inode)->delegation; 239 struct nfs_delegation *delegation = NFS_I(inode)->delegation;
219 struct nfs_openargs o_arg = { 240 struct nfs_openargs o_arg = {
220 .fh = NFS_FH(inode), 241 .fh = NFS_FH(inode),
221 .seqid = sp->so_seqid,
222 .id = sp->so_id, 242 .id = sp->so_id,
223 .open_flags = state->state, 243 .open_flags = state->state,
224 .clientid = server->nfs4_state->cl_clientid, 244 .clientid = server->nfs4_state->cl_clientid,
@@ -245,8 +265,13 @@ static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *st
245 } 265 }
246 o_arg.u.delegation_type = delegation->type; 266 o_arg.u.delegation_type = delegation->type;
247 } 267 }
268 o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
269 if (o_arg.seqid == NULL)
270 return -ENOMEM;
248 status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); 271 status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
249 nfs4_increment_seqid(status, sp); 272 /* Confirm the sequence as being established */
273 nfs_confirm_seqid(&sp->so_seqid, status);
274 nfs_increment_open_seqid(status, o_arg.seqid);
250 if (status == 0) { 275 if (status == 0) {
251 memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid)); 276 memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid));
252 if (o_res.delegation_type != 0) { 277 if (o_res.delegation_type != 0) {
@@ -256,6 +281,7 @@ static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *st
256 nfs_async_inode_return_delegation(inode, &o_res.stateid); 281 nfs_async_inode_return_delegation(inode, &o_res.stateid);
257 } 282 }
258 } 283 }
284 nfs_free_seqid(o_arg.seqid);
259 clear_bit(NFS_DELEGATED_STATE, &state->flags); 285 clear_bit(NFS_DELEGATED_STATE, &state->flags);
260 /* Ensure we update the inode attributes */ 286 /* Ensure we update the inode attributes */
261 NFS_CACHEINV(inode); 287 NFS_CACHEINV(inode);
@@ -302,23 +328,35 @@ static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state
302 }; 328 };
303 int status = 0; 329 int status = 0;
304 330
305 down(&sp->so_sema);
306 if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) 331 if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
307 goto out; 332 goto out;
308 if (state->state == 0) 333 if (state->state == 0)
309 goto out; 334 goto out;
310 arg.seqid = sp->so_seqid; 335 arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
336 status = -ENOMEM;
337 if (arg.seqid == NULL)
338 goto out;
311 arg.open_flags = state->state; 339 arg.open_flags = state->state;
312 memcpy(arg.u.delegation.data, state->stateid.data, sizeof(arg.u.delegation.data)); 340 memcpy(arg.u.delegation.data, state->stateid.data, sizeof(arg.u.delegation.data));
313 status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); 341 status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
314 nfs4_increment_seqid(status, sp); 342 nfs_increment_open_seqid(status, arg.seqid);
343 if (status != 0)
344 goto out_free;
345 if(res.rflags & NFS4_OPEN_RESULT_CONFIRM) {
346 status = _nfs4_proc_open_confirm(server->client, NFS_FH(inode),
347 sp, &res.stateid, arg.seqid);
348 if (status != 0)
349 goto out_free;
350 }
351 nfs_confirm_seqid(&sp->so_seqid, 0);
315 if (status >= 0) { 352 if (status >= 0) {
316 memcpy(state->stateid.data, res.stateid.data, 353 memcpy(state->stateid.data, res.stateid.data,
317 sizeof(state->stateid.data)); 354 sizeof(state->stateid.data));
318 clear_bit(NFS_DELEGATED_STATE, &state->flags); 355 clear_bit(NFS_DELEGATED_STATE, &state->flags);
319 } 356 }
357out_free:
358 nfs_free_seqid(arg.seqid);
320out: 359out:
321 up(&sp->so_sema);
322 dput(parent); 360 dput(parent);
323 return status; 361 return status;
324} 362}
@@ -345,11 +383,11 @@ int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
345 return err; 383 return err;
346} 384}
347 385
348static inline int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid) 386static int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid)
349{ 387{
350 struct nfs_open_confirmargs arg = { 388 struct nfs_open_confirmargs arg = {
351 .fh = fh, 389 .fh = fh,
352 .seqid = sp->so_seqid, 390 .seqid = seqid,
353 .stateid = *stateid, 391 .stateid = *stateid,
354 }; 392 };
355 struct nfs_open_confirmres res; 393 struct nfs_open_confirmres res;
@@ -362,7 +400,9 @@ static inline int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nf
362 int status; 400 int status;
363 401
364 status = rpc_call_sync(clnt, &msg, RPC_TASK_NOINTR); 402 status = rpc_call_sync(clnt, &msg, RPC_TASK_NOINTR);
365 nfs4_increment_seqid(status, sp); 403 /* Confirm the sequence as being established */
404 nfs_confirm_seqid(&sp->so_seqid, status);
405 nfs_increment_open_seqid(status, seqid);
366 if (status >= 0) 406 if (status >= 0)
367 memcpy(stateid, &res.stateid, sizeof(*stateid)); 407 memcpy(stateid, &res.stateid, sizeof(*stateid));
368 return status; 408 return status;
@@ -380,21 +420,41 @@ static int _nfs4_proc_open(struct inode *dir, struct nfs4_state_owner *sp, stru
380 int status; 420 int status;
381 421
382 /* Update sequence id. The caller must serialize! */ 422 /* Update sequence id. The caller must serialize! */
383 o_arg->seqid = sp->so_seqid;
384 o_arg->id = sp->so_id; 423 o_arg->id = sp->so_id;
385 o_arg->clientid = sp->so_client->cl_clientid; 424 o_arg->clientid = sp->so_client->cl_clientid;
386 425
387 status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); 426 status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
388 nfs4_increment_seqid(status, sp); 427 if (status == 0) {
428 /* OPEN on anything except a regular file is disallowed in NFSv4 */
429 switch (o_res->f_attr->mode & S_IFMT) {
430 case S_IFREG:
431 break;
432 case S_IFLNK:
433 status = -ELOOP;
434 break;
435 case S_IFDIR:
436 status = -EISDIR;
437 break;
438 default:
439 status = -ENOTDIR;
440 }
441 }
442
443 nfs_increment_open_seqid(status, o_arg->seqid);
389 if (status != 0) 444 if (status != 0)
390 goto out; 445 goto out;
391 update_changeattr(dir, &o_res->cinfo); 446 if (o_arg->open_flags & O_CREAT) {
447 update_changeattr(dir, &o_res->cinfo);
448 nfs_post_op_update_inode(dir, o_res->dir_attr);
449 } else
450 nfs_refresh_inode(dir, o_res->dir_attr);
392 if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { 451 if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
393 status = _nfs4_proc_open_confirm(server->client, &o_res->fh, 452 status = _nfs4_proc_open_confirm(server->client, &o_res->fh,
394 sp, &o_res->stateid); 453 sp, &o_res->stateid, o_arg->seqid);
395 if (status != 0) 454 if (status != 0)
396 goto out; 455 goto out;
397 } 456 }
457 nfs_confirm_seqid(&sp->so_seqid, 0);
398 if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) 458 if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
399 status = server->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr); 459 status = server->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr);
400out: 460out:
@@ -441,9 +501,7 @@ static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st
441 struct inode *inode = state->inode; 501 struct inode *inode = state->inode;
442 struct nfs_server *server = NFS_SERVER(dir); 502 struct nfs_server *server = NFS_SERVER(dir);
443 struct nfs_delegation *delegation = NFS_I(inode)->delegation; 503 struct nfs_delegation *delegation = NFS_I(inode)->delegation;
444 struct nfs_fattr f_attr = { 504 struct nfs_fattr f_attr, dir_attr;
445 .valid = 0,
446 };
447 struct nfs_openargs o_arg = { 505 struct nfs_openargs o_arg = {
448 .fh = NFS_FH(dir), 506 .fh = NFS_FH(dir),
449 .open_flags = state->state, 507 .open_flags = state->state,
@@ -453,6 +511,7 @@ static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st
453 }; 511 };
454 struct nfs_openres o_res = { 512 struct nfs_openres o_res = {
455 .f_attr = &f_attr, 513 .f_attr = &f_attr,
514 .dir_attr = &dir_attr,
456 .server = server, 515 .server = server,
457 }; 516 };
458 int status = 0; 517 int status = 0;
@@ -465,6 +524,12 @@ static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st
465 set_bit(NFS_DELEGATED_STATE, &state->flags); 524 set_bit(NFS_DELEGATED_STATE, &state->flags);
466 goto out; 525 goto out;
467 } 526 }
527 o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
528 status = -ENOMEM;
529 if (o_arg.seqid == NULL)
530 goto out;
531 nfs_fattr_init(&f_attr);
532 nfs_fattr_init(&dir_attr);
468 status = _nfs4_proc_open(dir, sp, &o_arg, &o_res); 533 status = _nfs4_proc_open(dir, sp, &o_arg, &o_res);
469 if (status != 0) 534 if (status != 0)
470 goto out_nodeleg; 535 goto out_nodeleg;
@@ -490,6 +555,7 @@ static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st
490 nfs_inode_reclaim_delegation(inode, sp->so_cred, &o_res); 555 nfs_inode_reclaim_delegation(inode, sp->so_cred, &o_res);
491 } 556 }
492out_nodeleg: 557out_nodeleg:
558 nfs_free_seqid(o_arg.seqid);
493 clear_bit(NFS_DELEGATED_STATE, &state->flags); 559 clear_bit(NFS_DELEGATED_STATE, &state->flags);
494out: 560out:
495 dput(parent); 561 dput(parent);
@@ -564,7 +630,6 @@ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred
564 dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__); 630 dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__);
565 goto out_err; 631 goto out_err;
566 } 632 }
567 down(&sp->so_sema);
568 state = nfs4_get_open_state(inode, sp); 633 state = nfs4_get_open_state(inode, sp);
569 if (state == NULL) 634 if (state == NULL)
570 goto out_err; 635 goto out_err;
@@ -589,7 +654,6 @@ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred
589 set_bit(NFS_DELEGATED_STATE, &state->flags); 654 set_bit(NFS_DELEGATED_STATE, &state->flags);
590 update_open_stateid(state, &delegation->stateid, open_flags); 655 update_open_stateid(state, &delegation->stateid, open_flags);
591out_ok: 656out_ok:
592 up(&sp->so_sema);
593 nfs4_put_state_owner(sp); 657 nfs4_put_state_owner(sp);
594 up_read(&nfsi->rwsem); 658 up_read(&nfsi->rwsem);
595 up_read(&clp->cl_sem); 659 up_read(&clp->cl_sem);
@@ -600,11 +664,12 @@ out_err:
600 if (sp != NULL) { 664 if (sp != NULL) {
601 if (state != NULL) 665 if (state != NULL)
602 nfs4_put_open_state(state); 666 nfs4_put_open_state(state);
603 up(&sp->so_sema);
604 nfs4_put_state_owner(sp); 667 nfs4_put_state_owner(sp);
605 } 668 }
606 up_read(&nfsi->rwsem); 669 up_read(&nfsi->rwsem);
607 up_read(&clp->cl_sem); 670 up_read(&clp->cl_sem);
671 if (err != -EACCES)
672 nfs_inode_return_delegation(inode);
608 return err; 673 return err;
609} 674}
610 675
@@ -635,9 +700,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
635 struct nfs4_client *clp = server->nfs4_state; 700 struct nfs4_client *clp = server->nfs4_state;
636 struct inode *inode = NULL; 701 struct inode *inode = NULL;
637 int status; 702 int status;
638 struct nfs_fattr f_attr = { 703 struct nfs_fattr f_attr, dir_attr;
639 .valid = 0,
640 };
641 struct nfs_openargs o_arg = { 704 struct nfs_openargs o_arg = {
642 .fh = NFS_FH(dir), 705 .fh = NFS_FH(dir),
643 .open_flags = flags, 706 .open_flags = flags,
@@ -648,6 +711,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
648 }; 711 };
649 struct nfs_openres o_res = { 712 struct nfs_openres o_res = {
650 .f_attr = &f_attr, 713 .f_attr = &f_attr,
714 .dir_attr = &dir_attr,
651 .server = server, 715 .server = server,
652 }; 716 };
653 717
@@ -665,8 +729,12 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
665 } else 729 } else
666 o_arg.u.attrs = sattr; 730 o_arg.u.attrs = sattr;
667 /* Serialization for the sequence id */ 731 /* Serialization for the sequence id */
668 down(&sp->so_sema);
669 732
733 o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
734 if (o_arg.seqid == NULL)
735 return -ENOMEM;
736 nfs_fattr_init(&f_attr);
737 nfs_fattr_init(&dir_attr);
670 status = _nfs4_proc_open(dir, sp, &o_arg, &o_res); 738 status = _nfs4_proc_open(dir, sp, &o_arg, &o_res);
671 if (status != 0) 739 if (status != 0)
672 goto out_err; 740 goto out_err;
@@ -681,7 +749,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
681 update_open_stateid(state, &o_res.stateid, flags); 749 update_open_stateid(state, &o_res.stateid, flags);
682 if (o_res.delegation_type != 0) 750 if (o_res.delegation_type != 0)
683 nfs_inode_set_delegation(inode, cred, &o_res); 751 nfs_inode_set_delegation(inode, cred, &o_res);
684 up(&sp->so_sema); 752 nfs_free_seqid(o_arg.seqid);
685 nfs4_put_state_owner(sp); 753 nfs4_put_state_owner(sp);
686 up_read(&clp->cl_sem); 754 up_read(&clp->cl_sem);
687 *res = state; 755 *res = state;
@@ -690,7 +758,7 @@ out_err:
690 if (sp != NULL) { 758 if (sp != NULL) {
691 if (state != NULL) 759 if (state != NULL)
692 nfs4_put_open_state(state); 760 nfs4_put_open_state(state);
693 up(&sp->so_sema); 761 nfs_free_seqid(o_arg.seqid);
694 nfs4_put_state_owner(sp); 762 nfs4_put_state_owner(sp);
695 } 763 }
696 /* Note: clp->cl_sem must be released before nfs4_put_open_state()! */ 764 /* Note: clp->cl_sem must be released before nfs4_put_open_state()! */
@@ -718,7 +786,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
718 * It is actually a sign of a bug on the client or on the server. 786 * It is actually a sign of a bug on the client or on the server.
719 * 787 *
720 * If we receive a BAD_SEQID error in the particular case of 788 * If we receive a BAD_SEQID error in the particular case of
721 * doing an OPEN, we assume that nfs4_increment_seqid() will 789 * doing an OPEN, we assume that nfs_increment_open_seqid() will
722 * have unhashed the old state_owner for us, and that we can 790 * have unhashed the old state_owner for us, and that we can
723 * therefore safely retry using a new one. We should still warn 791 * therefore safely retry using a new one. We should still warn
724 * the user though... 792 * the user though...
@@ -728,6 +796,16 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
728 exception.retry = 1; 796 exception.retry = 1;
729 continue; 797 continue;
730 } 798 }
799 /*
800 * BAD_STATEID on OPEN means that the server cancelled our
801 * state before it received the OPEN_CONFIRM.
802 * Recover by retrying the request as per the discussion
803 * on Page 181 of RFC3530.
804 */
805 if (status == -NFS4ERR_BAD_STATEID) {
806 exception.retry = 1;
807 continue;
808 }
731 res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir), 809 res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir),
732 status, &exception)); 810 status, &exception));
733 } while (exception.retry); 811 } while (exception.retry);
@@ -755,7 +833,7 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
755 }; 833 };
756 int status; 834 int status;
757 835
758 fattr->valid = 0; 836 nfs_fattr_init(fattr);
759 837
760 if (state != NULL) { 838 if (state != NULL) {
761 msg.rpc_cred = state->owner->so_cred; 839 msg.rpc_cred = state->owner->so_cred;
@@ -787,19 +865,30 @@ struct nfs4_closedata {
787 struct nfs4_state *state; 865 struct nfs4_state *state;
788 struct nfs_closeargs arg; 866 struct nfs_closeargs arg;
789 struct nfs_closeres res; 867 struct nfs_closeres res;
868 struct nfs_fattr fattr;
790}; 869};
791 870
871static void nfs4_free_closedata(struct nfs4_closedata *calldata)
872{
873 struct nfs4_state *state = calldata->state;
874 struct nfs4_state_owner *sp = state->owner;
875
876 nfs4_put_open_state(calldata->state);
877 nfs_free_seqid(calldata->arg.seqid);
878 nfs4_put_state_owner(sp);
879 kfree(calldata);
880}
881
792static void nfs4_close_done(struct rpc_task *task) 882static void nfs4_close_done(struct rpc_task *task)
793{ 883{
794 struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata; 884 struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata;
795 struct nfs4_state *state = calldata->state; 885 struct nfs4_state *state = calldata->state;
796 struct nfs4_state_owner *sp = state->owner;
797 struct nfs_server *server = NFS_SERVER(calldata->inode); 886 struct nfs_server *server = NFS_SERVER(calldata->inode);
798 887
799 /* hmm. we are done with the inode, and in the process of freeing 888 /* hmm. we are done with the inode, and in the process of freeing
800 * the state_owner. we keep this around to process errors 889 * the state_owner. we keep this around to process errors
801 */ 890 */
802 nfs4_increment_seqid(task->tk_status, sp); 891 nfs_increment_open_seqid(task->tk_status, calldata->arg.seqid);
803 switch (task->tk_status) { 892 switch (task->tk_status) {
804 case 0: 893 case 0:
805 memcpy(&state->stateid, &calldata->res.stateid, 894 memcpy(&state->stateid, &calldata->res.stateid,
@@ -816,25 +905,49 @@ static void nfs4_close_done(struct rpc_task *task)
816 return; 905 return;
817 } 906 }
818 } 907 }
908 nfs_refresh_inode(calldata->inode, calldata->res.fattr);
819 state->state = calldata->arg.open_flags; 909 state->state = calldata->arg.open_flags;
820 nfs4_put_open_state(state); 910 nfs4_free_closedata(calldata);
821 up(&sp->so_sema);
822 nfs4_put_state_owner(sp);
823 up_read(&server->nfs4_state->cl_sem);
824 kfree(calldata);
825} 911}
826 912
827static inline int nfs4_close_call(struct rpc_clnt *clnt, struct nfs4_closedata *calldata) 913static void nfs4_close_begin(struct rpc_task *task)
828{ 914{
915 struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata;
916 struct nfs4_state *state = calldata->state;
829 struct rpc_message msg = { 917 struct rpc_message msg = {
830 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE], 918 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE],
831 .rpc_argp = &calldata->arg, 919 .rpc_argp = &calldata->arg,
832 .rpc_resp = &calldata->res, 920 .rpc_resp = &calldata->res,
833 .rpc_cred = calldata->state->owner->so_cred, 921 .rpc_cred = state->owner->so_cred,
834 }; 922 };
835 if (calldata->arg.open_flags != 0) 923 int mode = 0;
924 int status;
925
926 status = nfs_wait_on_sequence(calldata->arg.seqid, task);
927 if (status != 0)
928 return;
929 /* Don't reorder reads */
930 smp_rmb();
931 /* Recalculate the new open mode in case someone reopened the file
932 * while we were waiting in line to be scheduled.
933 */
934 if (state->nreaders != 0)
935 mode |= FMODE_READ;
936 if (state->nwriters != 0)
937 mode |= FMODE_WRITE;
938 if (test_bit(NFS_DELEGATED_STATE, &state->flags))
939 state->state = mode;
940 if (mode == state->state) {
941 nfs4_free_closedata(calldata);
942 task->tk_exit = NULL;
943 rpc_exit(task, 0);
944 return;
945 }
946 nfs_fattr_init(calldata->res.fattr);
947 if (mode != 0)
836 msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; 948 msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
837 return rpc_call_async(clnt, &msg, 0, nfs4_close_done, calldata); 949 calldata->arg.open_flags = mode;
950 rpc_call_setup(task, &msg, 0);
838} 951}
839 952
840/* 953/*
@@ -850,40 +963,57 @@ static inline int nfs4_close_call(struct rpc_clnt *clnt, struct nfs4_closedata *
850 */ 963 */
851int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode) 964int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode)
852{ 965{
966 struct nfs_server *server = NFS_SERVER(inode);
853 struct nfs4_closedata *calldata; 967 struct nfs4_closedata *calldata;
854 int status; 968 int status = -ENOMEM;
855 969
856 /* Tell caller we're done */ 970 calldata = kmalloc(sizeof(*calldata), GFP_KERNEL);
857 if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
858 state->state = mode;
859 return 0;
860 }
861 calldata = (struct nfs4_closedata *)kmalloc(sizeof(*calldata), GFP_KERNEL);
862 if (calldata == NULL) 971 if (calldata == NULL)
863 return -ENOMEM; 972 goto out;
864 calldata->inode = inode; 973 calldata->inode = inode;
865 calldata->state = state; 974 calldata->state = state;
866 calldata->arg.fh = NFS_FH(inode); 975 calldata->arg.fh = NFS_FH(inode);
976 calldata->arg.stateid = &state->stateid;
867 /* Serialization for the sequence id */ 977 /* Serialization for the sequence id */
868 calldata->arg.seqid = state->owner->so_seqid; 978 calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid);
869 calldata->arg.open_flags = mode; 979 if (calldata->arg.seqid == NULL)
870 memcpy(&calldata->arg.stateid, &state->stateid, 980 goto out_free_calldata;
871 sizeof(calldata->arg.stateid)); 981 calldata->arg.bitmask = server->attr_bitmask;
872 status = nfs4_close_call(NFS_SERVER(inode)->client, calldata); 982 calldata->res.fattr = &calldata->fattr;
873 /* 983 calldata->res.server = server;
874 * Return -EINPROGRESS on success in order to indicate to the 984
875 * caller that an asynchronous RPC call has been launched, and 985 status = nfs4_call_async(server->client, nfs4_close_begin,
876 * that it will release the semaphores on completion. 986 nfs4_close_done, calldata);
877 */ 987 if (status == 0)
878 return (status == 0) ? -EINPROGRESS : status; 988 goto out;
989
990 nfs_free_seqid(calldata->arg.seqid);
991out_free_calldata:
992 kfree(calldata);
993out:
994 return status;
879} 995}
880 996
881struct inode * 997static void nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state)
998{
999 struct file *filp;
1000
1001 filp = lookup_instantiate_filp(nd, dentry, NULL);
1002 if (!IS_ERR(filp)) {
1003 struct nfs_open_context *ctx;
1004 ctx = (struct nfs_open_context *)filp->private_data;
1005 ctx->state = state;
1006 } else
1007 nfs4_close_state(state, nd->intent.open.flags);
1008}
1009
1010struct dentry *
882nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 1011nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
883{ 1012{
884 struct iattr attr; 1013 struct iattr attr;
885 struct rpc_cred *cred; 1014 struct rpc_cred *cred;
886 struct nfs4_state *state; 1015 struct nfs4_state *state;
1016 struct dentry *res;
887 1017
888 if (nd->flags & LOOKUP_CREATE) { 1018 if (nd->flags & LOOKUP_CREATE) {
889 attr.ia_mode = nd->intent.open.create_mode; 1019 attr.ia_mode = nd->intent.open.create_mode;
@@ -897,16 +1027,23 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
897 1027
898 cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); 1028 cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
899 if (IS_ERR(cred)) 1029 if (IS_ERR(cred))
900 return (struct inode *)cred; 1030 return (struct dentry *)cred;
901 state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred); 1031 state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred);
902 put_rpccred(cred); 1032 put_rpccred(cred);
903 if (IS_ERR(state)) 1033 if (IS_ERR(state)) {
904 return (struct inode *)state; 1034 if (PTR_ERR(state) == -ENOENT)
905 return state->inode; 1035 d_add(dentry, NULL);
1036 return (struct dentry *)state;
1037 }
1038 res = d_add_unique(dentry, state->inode);
1039 if (res != NULL)
1040 dentry = res;
1041 nfs4_intent_set_file(nd, dentry, state);
1042 return res;
906} 1043}
907 1044
908int 1045int
909nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags) 1046nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd)
910{ 1047{
911 struct rpc_cred *cred; 1048 struct rpc_cred *cred;
912 struct nfs4_state *state; 1049 struct nfs4_state *state;
@@ -919,18 +1056,30 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags)
919 if (IS_ERR(state)) 1056 if (IS_ERR(state))
920 state = nfs4_do_open(dir, dentry, openflags, NULL, cred); 1057 state = nfs4_do_open(dir, dentry, openflags, NULL, cred);
921 put_rpccred(cred); 1058 put_rpccred(cred);
922 if (state == ERR_PTR(-ENOENT) && dentry->d_inode == 0) 1059 if (IS_ERR(state)) {
923 return 1; 1060 switch (PTR_ERR(state)) {
924 if (IS_ERR(state)) 1061 case -EPERM:
925 return 0; 1062 case -EACCES:
1063 case -EDQUOT:
1064 case -ENOSPC:
1065 case -EROFS:
1066 lookup_instantiate_filp(nd, (struct dentry *)state, NULL);
1067 return 1;
1068 case -ENOENT:
1069 if (dentry->d_inode == NULL)
1070 return 1;
1071 }
1072 goto out_drop;
1073 }
926 inode = state->inode; 1074 inode = state->inode;
1075 iput(inode);
927 if (inode == dentry->d_inode) { 1076 if (inode == dentry->d_inode) {
928 iput(inode); 1077 nfs4_intent_set_file(nd, dentry, state);
929 return 1; 1078 return 1;
930 } 1079 }
931 d_drop(dentry);
932 nfs4_close_state(state, openflags); 1080 nfs4_close_state(state, openflags);
933 iput(inode); 1081out_drop:
1082 d_drop(dentry);
934 return 0; 1083 return 0;
935} 1084}
936 1085
@@ -974,13 +1123,12 @@ static int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fh
974static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, 1123static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
975 struct nfs_fsinfo *info) 1124 struct nfs_fsinfo *info)
976{ 1125{
977 struct nfs_fattr * fattr = info->fattr;
978 struct nfs4_lookup_root_arg args = { 1126 struct nfs4_lookup_root_arg args = {
979 .bitmask = nfs4_fattr_bitmap, 1127 .bitmask = nfs4_fattr_bitmap,
980 }; 1128 };
981 struct nfs4_lookup_res res = { 1129 struct nfs4_lookup_res res = {
982 .server = server, 1130 .server = server,
983 .fattr = fattr, 1131 .fattr = info->fattr,
984 .fh = fhandle, 1132 .fh = fhandle,
985 }; 1133 };
986 struct rpc_message msg = { 1134 struct rpc_message msg = {
@@ -988,7 +1136,7 @@ static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
988 .rpc_argp = &args, 1136 .rpc_argp = &args,
989 .rpc_resp = &res, 1137 .rpc_resp = &res,
990 }; 1138 };
991 fattr->valid = 0; 1139 nfs_fattr_init(info->fattr);
992 return rpc_call_sync(server->client, &msg, 0); 1140 return rpc_call_sync(server->client, &msg, 0);
993} 1141}
994 1142
@@ -1051,7 +1199,7 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
1051 q.len = p - q.name; 1199 q.len = p - q.name;
1052 1200
1053 do { 1201 do {
1054 fattr->valid = 0; 1202 nfs_fattr_init(fattr);
1055 status = nfs4_handle_exception(server, 1203 status = nfs4_handle_exception(server,
1056 rpc_call_sync(server->client, &msg, 0), 1204 rpc_call_sync(server->client, &msg, 0),
1057 &exception); 1205 &exception);
@@ -1088,7 +1236,7 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
1088 .rpc_resp = &res, 1236 .rpc_resp = &res,
1089 }; 1237 };
1090 1238
1091 fattr->valid = 0; 1239 nfs_fattr_init(fattr);
1092 return rpc_call_sync(server->client, &msg, 0); 1240 return rpc_call_sync(server->client, &msg, 0);
1093} 1241}
1094 1242
@@ -1130,7 +1278,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
1130 struct nfs4_state *state; 1278 struct nfs4_state *state;
1131 int status; 1279 int status;
1132 1280
1133 fattr->valid = 0; 1281 nfs_fattr_init(fattr);
1134 1282
1135 cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); 1283 cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0);
1136 if (IS_ERR(cred)) 1284 if (IS_ERR(cred))
@@ -1176,7 +1324,7 @@ static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name,
1176 .rpc_resp = &res, 1324 .rpc_resp = &res,
1177 }; 1325 };
1178 1326
1179 fattr->valid = 0; 1327 nfs_fattr_init(fattr);
1180 1328
1181 dprintk("NFS call lookup %s\n", name->name); 1329 dprintk("NFS call lookup %s\n", name->name);
1182 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 1330 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
@@ -1325,7 +1473,7 @@ static int _nfs4_proc_read(struct nfs_read_data *rdata)
1325 dprintk("NFS call read %d @ %Ld\n", rdata->args.count, 1473 dprintk("NFS call read %d @ %Ld\n", rdata->args.count,
1326 (long long) rdata->args.offset); 1474 (long long) rdata->args.offset);
1327 1475
1328 fattr->valid = 0; 1476 nfs_fattr_init(fattr);
1329 status = rpc_call_sync(server->client, &msg, flags); 1477 status = rpc_call_sync(server->client, &msg, flags);
1330 if (!status) 1478 if (!status)
1331 renew_lease(server, timestamp); 1479 renew_lease(server, timestamp);
@@ -1362,7 +1510,7 @@ static int _nfs4_proc_write(struct nfs_write_data *wdata)
1362 dprintk("NFS call write %d @ %Ld\n", wdata->args.count, 1510 dprintk("NFS call write %d @ %Ld\n", wdata->args.count,
1363 (long long) wdata->args.offset); 1511 (long long) wdata->args.offset);
1364 1512
1365 fattr->valid = 0; 1513 nfs_fattr_init(fattr);
1366 status = rpc_call_sync(server->client, &msg, rpcflags); 1514 status = rpc_call_sync(server->client, &msg, rpcflags);
1367 dprintk("NFS reply write: %d\n", status); 1515 dprintk("NFS reply write: %d\n", status);
1368 return status; 1516 return status;
@@ -1396,7 +1544,7 @@ static int _nfs4_proc_commit(struct nfs_write_data *cdata)
1396 dprintk("NFS call commit %d @ %Ld\n", cdata->args.count, 1544 dprintk("NFS call commit %d @ %Ld\n", cdata->args.count,
1397 (long long) cdata->args.offset); 1545 (long long) cdata->args.offset);
1398 1546
1399 fattr->valid = 0; 1547 nfs_fattr_init(fattr);
1400 status = rpc_call_sync(server->client, &msg, 0); 1548 status = rpc_call_sync(server->client, &msg, 0);
1401 dprintk("NFS reply commit: %d\n", status); 1549 dprintk("NFS reply commit: %d\n", status);
1402 return status; 1550 return status;
@@ -1431,7 +1579,7 @@ static int nfs4_proc_commit(struct nfs_write_data *cdata)
1431 1579
1432static int 1580static int
1433nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, 1581nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
1434 int flags) 1582 int flags, struct nameidata *nd)
1435{ 1583{
1436 struct nfs4_state *state; 1584 struct nfs4_state *state;
1437 struct rpc_cred *cred; 1585 struct rpc_cred *cred;
@@ -1453,24 +1601,30 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
1453 struct nfs_fattr fattr; 1601 struct nfs_fattr fattr;
1454 status = nfs4_do_setattr(NFS_SERVER(dir), &fattr, 1602 status = nfs4_do_setattr(NFS_SERVER(dir), &fattr,
1455 NFS_FH(state->inode), sattr, state); 1603 NFS_FH(state->inode), sattr, state);
1456 if (status == 0) { 1604 if (status == 0)
1457 nfs_setattr_update_inode(state->inode, sattr); 1605 nfs_setattr_update_inode(state->inode, sattr);
1458 goto out; 1606 }
1459 } 1607 if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN))
1460 } else if (flags != 0) 1608 nfs4_intent_set_file(nd, dentry, state);
1461 goto out; 1609 else
1462 nfs4_close_state(state, flags); 1610 nfs4_close_state(state, flags);
1463out: 1611out:
1464 return status; 1612 return status;
1465} 1613}
1466 1614
1467static int _nfs4_proc_remove(struct inode *dir, struct qstr *name) 1615static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
1468{ 1616{
1617 struct nfs_server *server = NFS_SERVER(dir);
1469 struct nfs4_remove_arg args = { 1618 struct nfs4_remove_arg args = {
1470 .fh = NFS_FH(dir), 1619 .fh = NFS_FH(dir),
1471 .name = name, 1620 .name = name,
1621 .bitmask = server->attr_bitmask,
1622 };
1623 struct nfs_fattr dir_attr;
1624 struct nfs4_remove_res res = {
1625 .server = server,
1626 .dir_attr = &dir_attr,
1472 }; 1627 };
1473 struct nfs4_change_info res;
1474 struct rpc_message msg = { 1628 struct rpc_message msg = {
1475 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE], 1629 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE],
1476 .rpc_argp = &args, 1630 .rpc_argp = &args,
@@ -1478,9 +1632,12 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
1478 }; 1632 };
1479 int status; 1633 int status;
1480 1634
1481 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 1635 nfs_fattr_init(res.dir_attr);
1482 if (status == 0) 1636 status = rpc_call_sync(server->client, &msg, 0);
1483 update_changeattr(dir, &res); 1637 if (status == 0) {
1638 update_changeattr(dir, &res.cinfo);
1639 nfs_post_op_update_inode(dir, res.dir_attr);
1640 }
1484 return status; 1641 return status;
1485} 1642}
1486 1643
@@ -1498,12 +1655,14 @@ static int nfs4_proc_remove(struct inode *dir, struct qstr *name)
1498 1655
1499struct unlink_desc { 1656struct unlink_desc {
1500 struct nfs4_remove_arg args; 1657 struct nfs4_remove_arg args;
1501 struct nfs4_change_info res; 1658 struct nfs4_remove_res res;
1659 struct nfs_fattr dir_attr;
1502}; 1660};
1503 1661
1504static int nfs4_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir, 1662static int nfs4_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir,
1505 struct qstr *name) 1663 struct qstr *name)
1506{ 1664{
1665 struct nfs_server *server = NFS_SERVER(dir->d_inode);
1507 struct unlink_desc *up; 1666 struct unlink_desc *up;
1508 1667
1509 up = (struct unlink_desc *) kmalloc(sizeof(*up), GFP_KERNEL); 1668 up = (struct unlink_desc *) kmalloc(sizeof(*up), GFP_KERNEL);
@@ -1512,6 +1671,9 @@ static int nfs4_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir,
1512 1671
1513 up->args.fh = NFS_FH(dir->d_inode); 1672 up->args.fh = NFS_FH(dir->d_inode);
1514 up->args.name = name; 1673 up->args.name = name;
1674 up->args.bitmask = server->attr_bitmask;
1675 up->res.server = server;
1676 up->res.dir_attr = &up->dir_attr;
1515 1677
1516 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; 1678 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE];
1517 msg->rpc_argp = &up->args; 1679 msg->rpc_argp = &up->args;
@@ -1526,7 +1688,8 @@ static int nfs4_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
1526 1688
1527 if (msg->rpc_resp != NULL) { 1689 if (msg->rpc_resp != NULL) {
1528 up = container_of(msg->rpc_resp, struct unlink_desc, res); 1690 up = container_of(msg->rpc_resp, struct unlink_desc, res);
1529 update_changeattr(dir->d_inode, &up->res); 1691 update_changeattr(dir->d_inode, &up->res.cinfo);
1692 nfs_post_op_update_inode(dir->d_inode, up->res.dir_attr);
1530 kfree(up); 1693 kfree(up);
1531 msg->rpc_resp = NULL; 1694 msg->rpc_resp = NULL;
1532 msg->rpc_argp = NULL; 1695 msg->rpc_argp = NULL;
@@ -1537,13 +1700,20 @@ static int nfs4_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
1537static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, 1700static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
1538 struct inode *new_dir, struct qstr *new_name) 1701 struct inode *new_dir, struct qstr *new_name)
1539{ 1702{
1703 struct nfs_server *server = NFS_SERVER(old_dir);
1540 struct nfs4_rename_arg arg = { 1704 struct nfs4_rename_arg arg = {
1541 .old_dir = NFS_FH(old_dir), 1705 .old_dir = NFS_FH(old_dir),
1542 .new_dir = NFS_FH(new_dir), 1706 .new_dir = NFS_FH(new_dir),
1543 .old_name = old_name, 1707 .old_name = old_name,
1544 .new_name = new_name, 1708 .new_name = new_name,
1709 .bitmask = server->attr_bitmask,
1710 };
1711 struct nfs_fattr old_fattr, new_fattr;
1712 struct nfs4_rename_res res = {
1713 .server = server,
1714 .old_fattr = &old_fattr,
1715 .new_fattr = &new_fattr,
1545 }; 1716 };
1546 struct nfs4_rename_res res = { };
1547 struct rpc_message msg = { 1717 struct rpc_message msg = {
1548 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME], 1718 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME],
1549 .rpc_argp = &arg, 1719 .rpc_argp = &arg,
@@ -1551,11 +1721,15 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
1551 }; 1721 };
1552 int status; 1722 int status;
1553 1723
1554 status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0); 1724 nfs_fattr_init(res.old_fattr);
1725 nfs_fattr_init(res.new_fattr);
1726 status = rpc_call_sync(server->client, &msg, 0);
1555 1727
1556 if (!status) { 1728 if (!status) {
1557 update_changeattr(old_dir, &res.old_cinfo); 1729 update_changeattr(old_dir, &res.old_cinfo);
1730 nfs_post_op_update_inode(old_dir, res.old_fattr);
1558 update_changeattr(new_dir, &res.new_cinfo); 1731 update_changeattr(new_dir, &res.new_cinfo);
1732 nfs_post_op_update_inode(new_dir, res.new_fattr);
1559 } 1733 }
1560 return status; 1734 return status;
1561} 1735}
@@ -1576,22 +1750,34 @@ static int nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
1576 1750
1577static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) 1751static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
1578{ 1752{
1753 struct nfs_server *server = NFS_SERVER(inode);
1579 struct nfs4_link_arg arg = { 1754 struct nfs4_link_arg arg = {
1580 .fh = NFS_FH(inode), 1755 .fh = NFS_FH(inode),
1581 .dir_fh = NFS_FH(dir), 1756 .dir_fh = NFS_FH(dir),
1582 .name = name, 1757 .name = name,
1758 .bitmask = server->attr_bitmask,
1759 };
1760 struct nfs_fattr fattr, dir_attr;
1761 struct nfs4_link_res res = {
1762 .server = server,
1763 .fattr = &fattr,
1764 .dir_attr = &dir_attr,
1583 }; 1765 };
1584 struct nfs4_change_info cinfo = { };
1585 struct rpc_message msg = { 1766 struct rpc_message msg = {
1586 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LINK], 1767 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LINK],
1587 .rpc_argp = &arg, 1768 .rpc_argp = &arg,
1588 .rpc_resp = &cinfo, 1769 .rpc_resp = &res,
1589 }; 1770 };
1590 int status; 1771 int status;
1591 1772
1592 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); 1773 nfs_fattr_init(res.fattr);
1593 if (!status) 1774 nfs_fattr_init(res.dir_attr);
1594 update_changeattr(dir, &cinfo); 1775 status = rpc_call_sync(server->client, &msg, 0);
1776 if (!status) {
1777 update_changeattr(dir, &res.cinfo);
1778 nfs_post_op_update_inode(dir, res.dir_attr);
1779 nfs_refresh_inode(inode, res.fattr);
1780 }
1595 1781
1596 return status; 1782 return status;
1597} 1783}
@@ -1613,6 +1799,7 @@ static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name,
1613 struct nfs_fattr *fattr) 1799 struct nfs_fattr *fattr)
1614{ 1800{
1615 struct nfs_server *server = NFS_SERVER(dir); 1801 struct nfs_server *server = NFS_SERVER(dir);
1802 struct nfs_fattr dir_fattr;
1616 struct nfs4_create_arg arg = { 1803 struct nfs4_create_arg arg = {
1617 .dir_fh = NFS_FH(dir), 1804 .dir_fh = NFS_FH(dir),
1618 .server = server, 1805 .server = server,
@@ -1625,6 +1812,7 @@ static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name,
1625 .server = server, 1812 .server = server,
1626 .fh = fhandle, 1813 .fh = fhandle,
1627 .fattr = fattr, 1814 .fattr = fattr,
1815 .dir_fattr = &dir_fattr,
1628 }; 1816 };
1629 struct rpc_message msg = { 1817 struct rpc_message msg = {
1630 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SYMLINK], 1818 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SYMLINK],
@@ -1636,11 +1824,13 @@ static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name,
1636 if (path->len > NFS4_MAXPATHLEN) 1824 if (path->len > NFS4_MAXPATHLEN)
1637 return -ENAMETOOLONG; 1825 return -ENAMETOOLONG;
1638 arg.u.symlink = path; 1826 arg.u.symlink = path;
1639 fattr->valid = 0; 1827 nfs_fattr_init(fattr);
1828 nfs_fattr_init(&dir_fattr);
1640 1829
1641 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 1830 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
1642 if (!status) 1831 if (!status)
1643 update_changeattr(dir, &res.dir_cinfo); 1832 update_changeattr(dir, &res.dir_cinfo);
1833 nfs_post_op_update_inode(dir, res.dir_fattr);
1644 return status; 1834 return status;
1645} 1835}
1646 1836
@@ -1664,7 +1854,7 @@ static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
1664{ 1854{
1665 struct nfs_server *server = NFS_SERVER(dir); 1855 struct nfs_server *server = NFS_SERVER(dir);
1666 struct nfs_fh fhandle; 1856 struct nfs_fh fhandle;
1667 struct nfs_fattr fattr; 1857 struct nfs_fattr fattr, dir_fattr;
1668 struct nfs4_create_arg arg = { 1858 struct nfs4_create_arg arg = {
1669 .dir_fh = NFS_FH(dir), 1859 .dir_fh = NFS_FH(dir),
1670 .server = server, 1860 .server = server,
@@ -1677,6 +1867,7 @@ static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
1677 .server = server, 1867 .server = server,
1678 .fh = &fhandle, 1868 .fh = &fhandle,
1679 .fattr = &fattr, 1869 .fattr = &fattr,
1870 .dir_fattr = &dir_fattr,
1680 }; 1871 };
1681 struct rpc_message msg = { 1872 struct rpc_message msg = {
1682 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE], 1873 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE],
@@ -1685,11 +1876,13 @@ static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
1685 }; 1876 };
1686 int status; 1877 int status;
1687 1878
1688 fattr.valid = 0; 1879 nfs_fattr_init(&fattr);
1880 nfs_fattr_init(&dir_fattr);
1689 1881
1690 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 1882 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
1691 if (!status) { 1883 if (!status) {
1692 update_changeattr(dir, &res.dir_cinfo); 1884 update_changeattr(dir, &res.dir_cinfo);
1885 nfs_post_op_update_inode(dir, res.dir_fattr);
1693 status = nfs_instantiate(dentry, &fhandle, &fattr); 1886 status = nfs_instantiate(dentry, &fhandle, &fattr);
1694 } 1887 }
1695 return status; 1888 return status;
@@ -1762,7 +1955,7 @@ static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
1762{ 1955{
1763 struct nfs_server *server = NFS_SERVER(dir); 1956 struct nfs_server *server = NFS_SERVER(dir);
1764 struct nfs_fh fh; 1957 struct nfs_fh fh;
1765 struct nfs_fattr fattr; 1958 struct nfs_fattr fattr, dir_fattr;
1766 struct nfs4_create_arg arg = { 1959 struct nfs4_create_arg arg = {
1767 .dir_fh = NFS_FH(dir), 1960 .dir_fh = NFS_FH(dir),
1768 .server = server, 1961 .server = server,
@@ -1774,6 +1967,7 @@ static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
1774 .server = server, 1967 .server = server,
1775 .fh = &fh, 1968 .fh = &fh,
1776 .fattr = &fattr, 1969 .fattr = &fattr,
1970 .dir_fattr = &dir_fattr,
1777 }; 1971 };
1778 struct rpc_message msg = { 1972 struct rpc_message msg = {
1779 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE], 1973 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE],
@@ -1783,7 +1977,8 @@ static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
1783 int status; 1977 int status;
1784 int mode = sattr->ia_mode; 1978 int mode = sattr->ia_mode;
1785 1979
1786 fattr.valid = 0; 1980 nfs_fattr_init(&fattr);
1981 nfs_fattr_init(&dir_fattr);
1787 1982
1788 BUG_ON(!(sattr->ia_valid & ATTR_MODE)); 1983 BUG_ON(!(sattr->ia_valid & ATTR_MODE));
1789 BUG_ON(!S_ISFIFO(mode) && !S_ISBLK(mode) && !S_ISCHR(mode) && !S_ISSOCK(mode)); 1984 BUG_ON(!S_ISFIFO(mode) && !S_ISBLK(mode) && !S_ISCHR(mode) && !S_ISSOCK(mode));
@@ -1805,6 +2000,7 @@ static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
1805 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 2000 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
1806 if (status == 0) { 2001 if (status == 0) {
1807 update_changeattr(dir, &res.dir_cinfo); 2002 update_changeattr(dir, &res.dir_cinfo);
2003 nfs_post_op_update_inode(dir, res.dir_fattr);
1808 status = nfs_instantiate(dentry, &fh, &fattr); 2004 status = nfs_instantiate(dentry, &fh, &fattr);
1809 } 2005 }
1810 return status; 2006 return status;
@@ -1836,7 +2032,7 @@ static int _nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
1836 .rpc_resp = fsstat, 2032 .rpc_resp = fsstat,
1837 }; 2033 };
1838 2034
1839 fsstat->fattr->valid = 0; 2035 nfs_fattr_init(fsstat->fattr);
1840 return rpc_call_sync(server->client, &msg, 0); 2036 return rpc_call_sync(server->client, &msg, 0);
1841} 2037}
1842 2038
@@ -1883,7 +2079,7 @@ static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, str
1883 2079
1884static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo) 2080static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo)
1885{ 2081{
1886 fsinfo->fattr->valid = 0; 2082 nfs_fattr_init(fsinfo->fattr);
1887 return nfs4_do_fsinfo(server, fhandle, fsinfo); 2083 return nfs4_do_fsinfo(server, fhandle, fsinfo);
1888} 2084}
1889 2085
@@ -1906,7 +2102,7 @@ static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle
1906 return 0; 2102 return 0;
1907 } 2103 }
1908 2104
1909 pathconf->fattr->valid = 0; 2105 nfs_fattr_init(pathconf->fattr);
1910 return rpc_call_sync(server->client, &msg, 0); 2106 return rpc_call_sync(server->client, &msg, 0);
1911} 2107}
1912 2108
@@ -1973,8 +2169,10 @@ nfs4_write_done(struct rpc_task *task)
1973 rpc_restart_call(task); 2169 rpc_restart_call(task);
1974 return; 2170 return;
1975 } 2171 }
1976 if (task->tk_status >= 0) 2172 if (task->tk_status >= 0) {
1977 renew_lease(NFS_SERVER(inode), data->timestamp); 2173 renew_lease(NFS_SERVER(inode), data->timestamp);
2174 nfs_post_op_update_inode(inode, data->res.fattr);
2175 }
1978 /* Call back common NFS writeback processing */ 2176 /* Call back common NFS writeback processing */
1979 nfs_writeback_done(task); 2177 nfs_writeback_done(task);
1980} 2178}
@@ -1990,6 +2188,7 @@ nfs4_proc_write_setup(struct nfs_write_data *data, int how)
1990 .rpc_cred = data->cred, 2188 .rpc_cred = data->cred,
1991 }; 2189 };
1992 struct inode *inode = data->inode; 2190 struct inode *inode = data->inode;
2191 struct nfs_server *server = NFS_SERVER(inode);
1993 int stable; 2192 int stable;
1994 int flags; 2193 int flags;
1995 2194
@@ -2001,6 +2200,8 @@ nfs4_proc_write_setup(struct nfs_write_data *data, int how)
2001 } else 2200 } else
2002 stable = NFS_UNSTABLE; 2201 stable = NFS_UNSTABLE;
2003 data->args.stable = stable; 2202 data->args.stable = stable;
2203 data->args.bitmask = server->attr_bitmask;
2204 data->res.server = server;
2004 2205
2005 data->timestamp = jiffies; 2206 data->timestamp = jiffies;
2006 2207
@@ -2022,6 +2223,8 @@ nfs4_commit_done(struct rpc_task *task)
2022 rpc_restart_call(task); 2223 rpc_restart_call(task);
2023 return; 2224 return;
2024 } 2225 }
2226 if (task->tk_status >= 0)
2227 nfs_post_op_update_inode(inode, data->res.fattr);
2025 /* Call back common NFS writeback processing */ 2228 /* Call back common NFS writeback processing */
2026 nfs_commit_done(task); 2229 nfs_commit_done(task);
2027} 2230}
@@ -2037,8 +2240,12 @@ nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
2037 .rpc_cred = data->cred, 2240 .rpc_cred = data->cred,
2038 }; 2241 };
2039 struct inode *inode = data->inode; 2242 struct inode *inode = data->inode;
2243 struct nfs_server *server = NFS_SERVER(inode);
2040 int flags; 2244 int flags;
2041 2245
2246 data->args.bitmask = server->attr_bitmask;
2247 data->res.server = server;
2248
2042 /* Set the initial flags for the task. */ 2249 /* Set the initial flags for the task. */
2043 flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; 2250 flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
2044 2251
@@ -2106,65 +2313,6 @@ nfs4_proc_renew(struct nfs4_client *clp)
2106 return 0; 2313 return 0;
2107} 2314}
2108 2315
2109/*
2110 * We will need to arrange for the VFS layer to provide an atomic open.
2111 * Until then, this open method is prone to inefficiency and race conditions
2112 * due to the lookup, potential create, and open VFS calls from sys_open()
2113 * placed on the wire.
2114 */
2115static int
2116nfs4_proc_file_open(struct inode *inode, struct file *filp)
2117{
2118 struct dentry *dentry = filp->f_dentry;
2119 struct nfs_open_context *ctx;
2120 struct nfs4_state *state = NULL;
2121 struct rpc_cred *cred;
2122 int status = -ENOMEM;
2123
2124 dprintk("nfs4_proc_file_open: starting on (%.*s/%.*s)\n",
2125 (int)dentry->d_parent->d_name.len,
2126 dentry->d_parent->d_name.name,
2127 (int)dentry->d_name.len, dentry->d_name.name);
2128
2129
2130 /* Find our open stateid */
2131 cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0);
2132 if (IS_ERR(cred))
2133 return PTR_ERR(cred);
2134 ctx = alloc_nfs_open_context(dentry, cred);
2135 put_rpccred(cred);
2136 if (unlikely(ctx == NULL))
2137 return -ENOMEM;
2138 status = -EIO; /* ERACE actually */
2139 state = nfs4_find_state(inode, cred, filp->f_mode);
2140 if (unlikely(state == NULL))
2141 goto no_state;
2142 ctx->state = state;
2143 nfs4_close_state(state, filp->f_mode);
2144 ctx->mode = filp->f_mode;
2145 nfs_file_set_open_context(filp, ctx);
2146 put_nfs_open_context(ctx);
2147 if (filp->f_mode & FMODE_WRITE)
2148 nfs_begin_data_update(inode);
2149 return 0;
2150no_state:
2151 printk(KERN_WARNING "NFS: v4 raced in function %s\n", __FUNCTION__);
2152 put_nfs_open_context(ctx);
2153 return status;
2154}
2155
2156/*
2157 * Release our state
2158 */
2159static int
2160nfs4_proc_file_release(struct inode *inode, struct file *filp)
2161{
2162 if (filp->f_mode & FMODE_WRITE)
2163 nfs_end_data_update(inode);
2164 nfs_file_clear_open_context(filp);
2165 return 0;
2166}
2167
2168static inline int nfs4_server_supports_acls(struct nfs_server *server) 2316static inline int nfs4_server_supports_acls(struct nfs_server *server)
2169{ 2317{
2170 return (server->caps & NFS_CAP_ACLS) 2318 return (server->caps & NFS_CAP_ACLS)
@@ -2285,7 +2433,7 @@ static inline ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size
2285 return -ENOMEM; 2433 return -ENOMEM;
2286 args.acl_pages[0] = localpage; 2434 args.acl_pages[0] = localpage;
2287 args.acl_pgbase = 0; 2435 args.acl_pgbase = 0;
2288 args.acl_len = PAGE_SIZE; 2436 resp_len = args.acl_len = PAGE_SIZE;
2289 } else { 2437 } else {
2290 resp_buf = buf; 2438 resp_buf = buf;
2291 buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase); 2439 buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase);
@@ -2345,6 +2493,7 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen
2345 2493
2346 if (!nfs4_server_supports_acls(server)) 2494 if (!nfs4_server_supports_acls(server))
2347 return -EOPNOTSUPP; 2495 return -EOPNOTSUPP;
2496 nfs_inode_return_delegation(inode);
2348 buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); 2497 buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
2349 ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0); 2498 ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0);
2350 if (ret == 0) 2499 if (ret == 0)
@@ -2353,7 +2502,7 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen
2353} 2502}
2354 2503
2355static int 2504static int
2356nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server) 2505nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
2357{ 2506{
2358 struct nfs4_client *clp = server->nfs4_state; 2507 struct nfs4_client *clp = server->nfs4_state;
2359 2508
@@ -2431,7 +2580,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
2431/* This is the error handling routine for processes that are allowed 2580/* This is the error handling routine for processes that are allowed
2432 * to sleep. 2581 * to sleep.
2433 */ 2582 */
2434int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception) 2583int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
2435{ 2584{
2436 struct nfs4_client *clp = server->nfs4_state; 2585 struct nfs4_client *clp = server->nfs4_state;
2437 int ret = errorcode; 2586 int ret = errorcode;
@@ -2632,7 +2781,6 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
2632 2781
2633 down_read(&clp->cl_sem); 2782 down_read(&clp->cl_sem);
2634 nlo.clientid = clp->cl_clientid; 2783 nlo.clientid = clp->cl_clientid;
2635 down(&state->lock_sema);
2636 status = nfs4_set_lock_state(state, request); 2784 status = nfs4_set_lock_state(state, request);
2637 if (status != 0) 2785 if (status != 0)
2638 goto out; 2786 goto out;
@@ -2659,7 +2807,6 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
2659 status = 0; 2807 status = 0;
2660 } 2808 }
2661out: 2809out:
2662 up(&state->lock_sema);
2663 up_read(&clp->cl_sem); 2810 up_read(&clp->cl_sem);
2664 return status; 2811 return status;
2665} 2812}
@@ -2696,79 +2843,149 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl)
2696 return res; 2843 return res;
2697} 2844}
2698 2845
2699static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) 2846struct nfs4_unlockdata {
2847 struct nfs_lockargs arg;
2848 struct nfs_locku_opargs luargs;
2849 struct nfs_lockres res;
2850 struct nfs4_lock_state *lsp;
2851 struct nfs_open_context *ctx;
2852 atomic_t refcount;
2853 struct completion completion;
2854};
2855
2856static void nfs4_locku_release_calldata(struct nfs4_unlockdata *calldata)
2700{ 2857{
2701 struct inode *inode = state->inode; 2858 if (atomic_dec_and_test(&calldata->refcount)) {
2702 struct nfs_server *server = NFS_SERVER(inode); 2859 nfs_free_seqid(calldata->luargs.seqid);
2703 struct nfs4_client *clp = server->nfs4_state; 2860 nfs4_put_lock_state(calldata->lsp);
2704 struct nfs_lockargs arg = { 2861 put_nfs_open_context(calldata->ctx);
2705 .fh = NFS_FH(inode), 2862 kfree(calldata);
2706 .type = nfs4_lck_type(cmd, request), 2863 }
2707 .offset = request->fl_start, 2864}
2708 .length = nfs4_lck_length(request), 2865
2709 }; 2866static void nfs4_locku_complete(struct nfs4_unlockdata *calldata)
2710 struct nfs_lockres res = { 2867{
2711 .server = server, 2868 complete(&calldata->completion);
2712 }; 2869 nfs4_locku_release_calldata(calldata);
2870}
2871
2872static void nfs4_locku_done(struct rpc_task *task)
2873{
2874 struct nfs4_unlockdata *calldata = (struct nfs4_unlockdata *)task->tk_calldata;
2875
2876 nfs_increment_lock_seqid(task->tk_status, calldata->luargs.seqid);
2877 switch (task->tk_status) {
2878 case 0:
2879 memcpy(calldata->lsp->ls_stateid.data,
2880 calldata->res.u.stateid.data,
2881 sizeof(calldata->lsp->ls_stateid.data));
2882 break;
2883 case -NFS4ERR_STALE_STATEID:
2884 case -NFS4ERR_EXPIRED:
2885 nfs4_schedule_state_recovery(calldata->res.server->nfs4_state);
2886 break;
2887 default:
2888 if (nfs4_async_handle_error(task, calldata->res.server) == -EAGAIN) {
2889 rpc_restart_call(task);
2890 return;
2891 }
2892 }
2893 nfs4_locku_complete(calldata);
2894}
2895
2896static void nfs4_locku_begin(struct rpc_task *task)
2897{
2898 struct nfs4_unlockdata *calldata = (struct nfs4_unlockdata *)task->tk_calldata;
2713 struct rpc_message msg = { 2899 struct rpc_message msg = {
2714 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKU], 2900 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKU],
2715 .rpc_argp = &arg, 2901 .rpc_argp = &calldata->arg,
2716 .rpc_resp = &res, 2902 .rpc_resp = &calldata->res,
2717 .rpc_cred = state->owner->so_cred, 2903 .rpc_cred = calldata->lsp->ls_state->owner->so_cred,
2718 }; 2904 };
2905 int status;
2906
2907 status = nfs_wait_on_sequence(calldata->luargs.seqid, task);
2908 if (status != 0)
2909 return;
2910 if ((calldata->lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) {
2911 nfs4_locku_complete(calldata);
2912 task->tk_exit = NULL;
2913 rpc_exit(task, 0);
2914 return;
2915 }
2916 rpc_call_setup(task, &msg, 0);
2917}
2918
2919static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
2920{
2921 struct nfs4_unlockdata *calldata;
2922 struct inode *inode = state->inode;
2923 struct nfs_server *server = NFS_SERVER(inode);
2719 struct nfs4_lock_state *lsp; 2924 struct nfs4_lock_state *lsp;
2720 struct nfs_locku_opargs luargs;
2721 int status; 2925 int status;
2722 2926
2723 down_read(&clp->cl_sem);
2724 down(&state->lock_sema);
2725 status = nfs4_set_lock_state(state, request); 2927 status = nfs4_set_lock_state(state, request);
2726 if (status != 0) 2928 if (status != 0)
2727 goto out; 2929 return status;
2728 lsp = request->fl_u.nfs4_fl.owner; 2930 lsp = request->fl_u.nfs4_fl.owner;
2729 /* We might have lost the locks! */ 2931 /* We might have lost the locks! */
2730 if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) 2932 if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0)
2731 goto out; 2933 return 0;
2732 luargs.seqid = lsp->ls_seqid; 2934 calldata = kmalloc(sizeof(*calldata), GFP_KERNEL);
2733 memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid)); 2935 if (calldata == NULL)
2734 arg.u.locku = &luargs; 2936 return -ENOMEM;
2735 status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); 2937 calldata->luargs.seqid = nfs_alloc_seqid(&lsp->ls_seqid);
2736 nfs4_increment_lock_seqid(status, lsp); 2938 if (calldata->luargs.seqid == NULL) {
2737 2939 kfree(calldata);
2738 if (status == 0) 2940 return -ENOMEM;
2739 memcpy(&lsp->ls_stateid, &res.u.stateid, 2941 }
2740 sizeof(lsp->ls_stateid)); 2942 calldata->luargs.stateid = &lsp->ls_stateid;
2741out: 2943 calldata->arg.fh = NFS_FH(inode);
2742 up(&state->lock_sema); 2944 calldata->arg.type = nfs4_lck_type(cmd, request);
2945 calldata->arg.offset = request->fl_start;
2946 calldata->arg.length = nfs4_lck_length(request);
2947 calldata->arg.u.locku = &calldata->luargs;
2948 calldata->res.server = server;
2949 calldata->lsp = lsp;
2950 atomic_inc(&lsp->ls_count);
2951
2952 /* Ensure we don't close file until we're done freeing locks! */
2953 calldata->ctx = get_nfs_open_context((struct nfs_open_context*)request->fl_file->private_data);
2954
2955 atomic_set(&calldata->refcount, 2);
2956 init_completion(&calldata->completion);
2957
2958 status = nfs4_call_async(NFS_SERVER(inode)->client, nfs4_locku_begin,
2959 nfs4_locku_done, calldata);
2743 if (status == 0) 2960 if (status == 0)
2744 do_vfs_lock(request->fl_file, request); 2961 wait_for_completion_interruptible(&calldata->completion);
2745 up_read(&clp->cl_sem); 2962 do_vfs_lock(request->fl_file, request);
2963 nfs4_locku_release_calldata(calldata);
2746 return status; 2964 return status;
2747} 2965}
2748 2966
2749static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
2750{
2751 struct nfs4_exception exception = { };
2752 int err;
2753
2754 do {
2755 err = nfs4_handle_exception(NFS_SERVER(state->inode),
2756 _nfs4_proc_unlck(state, cmd, request),
2757 &exception);
2758 } while (exception.retry);
2759 return err;
2760}
2761
2762static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *request, int reclaim) 2967static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *request, int reclaim)
2763{ 2968{
2764 struct inode *inode = state->inode; 2969 struct inode *inode = state->inode;
2765 struct nfs_server *server = NFS_SERVER(inode); 2970 struct nfs_server *server = NFS_SERVER(inode);
2766 struct nfs4_lock_state *lsp = request->fl_u.nfs4_fl.owner; 2971 struct nfs4_lock_state *lsp = request->fl_u.nfs4_fl.owner;
2972 struct nfs_lock_opargs largs = {
2973 .lock_stateid = &lsp->ls_stateid,
2974 .open_stateid = &state->stateid,
2975 .lock_owner = {
2976 .clientid = server->nfs4_state->cl_clientid,
2977 .id = lsp->ls_id,
2978 },
2979 .reclaim = reclaim,
2980 };
2767 struct nfs_lockargs arg = { 2981 struct nfs_lockargs arg = {
2768 .fh = NFS_FH(inode), 2982 .fh = NFS_FH(inode),
2769 .type = nfs4_lck_type(cmd, request), 2983 .type = nfs4_lck_type(cmd, request),
2770 .offset = request->fl_start, 2984 .offset = request->fl_start,
2771 .length = nfs4_lck_length(request), 2985 .length = nfs4_lck_length(request),
2986 .u = {
2987 .lock = &largs,
2988 },
2772 }; 2989 };
2773 struct nfs_lockres res = { 2990 struct nfs_lockres res = {
2774 .server = server, 2991 .server = server,
@@ -2779,53 +2996,39 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r
2779 .rpc_resp = &res, 2996 .rpc_resp = &res,
2780 .rpc_cred = state->owner->so_cred, 2997 .rpc_cred = state->owner->so_cred,
2781 }; 2998 };
2782 struct nfs_lock_opargs largs = { 2999 int status = -ENOMEM;
2783 .reclaim = reclaim,
2784 .new_lock_owner = 0,
2785 };
2786 int status;
2787 3000
2788 if (!(lsp->ls_flags & NFS_LOCK_INITIALIZED)) { 3001 largs.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid);
3002 if (largs.lock_seqid == NULL)
3003 return -ENOMEM;
3004 if (!(lsp->ls_seqid.flags & NFS_SEQID_CONFIRMED)) {
2789 struct nfs4_state_owner *owner = state->owner; 3005 struct nfs4_state_owner *owner = state->owner;
2790 struct nfs_open_to_lock otl = { 3006
2791 .lock_owner = { 3007 largs.open_seqid = nfs_alloc_seqid(&owner->so_seqid);
2792 .clientid = server->nfs4_state->cl_clientid, 3008 if (largs.open_seqid == NULL)
2793 }, 3009 goto out;
2794 };
2795
2796 otl.lock_seqid = lsp->ls_seqid;
2797 otl.lock_owner.id = lsp->ls_id;
2798 memcpy(&otl.open_stateid, &state->stateid, sizeof(otl.open_stateid));
2799 largs.u.open_lock = &otl;
2800 largs.new_lock_owner = 1; 3010 largs.new_lock_owner = 1;
2801 arg.u.lock = &largs;
2802 down(&owner->so_sema);
2803 otl.open_seqid = owner->so_seqid;
2804 status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); 3011 status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
2805 /* increment open_owner seqid on success, and 3012 /* increment open seqid on success, and seqid mutating errors */
2806 * seqid mutating errors */ 3013 if (largs.new_lock_owner != 0) {
2807 nfs4_increment_seqid(status, owner); 3014 nfs_increment_open_seqid(status, largs.open_seqid);
2808 up(&owner->so_sema); 3015 if (status == 0)
2809 if (status == 0) { 3016 nfs_confirm_seqid(&lsp->ls_seqid, 0);
2810 lsp->ls_flags |= NFS_LOCK_INITIALIZED;
2811 lsp->ls_seqid++;
2812 } 3017 }
2813 } else { 3018 nfs_free_seqid(largs.open_seqid);
2814 struct nfs_exist_lock el = { 3019 } else
2815 .seqid = lsp->ls_seqid,
2816 };
2817 memcpy(&el.stateid, &lsp->ls_stateid, sizeof(el.stateid));
2818 largs.u.exist_lock = &el;
2819 arg.u.lock = &largs;
2820 status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); 3020 status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
2821 /* increment seqid on success, and * seqid mutating errors*/ 3021 /* increment lock seqid on success, and seqid mutating errors*/
2822 nfs4_increment_lock_seqid(status, lsp); 3022 nfs_increment_lock_seqid(status, largs.lock_seqid);
2823 }
2824 /* save the returned stateid. */ 3023 /* save the returned stateid. */
2825 if (status == 0) 3024 if (status == 0) {
2826 memcpy(&lsp->ls_stateid, &res.u.stateid, sizeof(nfs4_stateid)); 3025 memcpy(lsp->ls_stateid.data, res.u.stateid.data,
2827 else if (status == -NFS4ERR_DENIED) 3026 sizeof(lsp->ls_stateid.data));
3027 lsp->ls_flags |= NFS_LOCK_INITIALIZED;
3028 } else if (status == -NFS4ERR_DENIED)
2828 status = -EAGAIN; 3029 status = -EAGAIN;
3030out:
3031 nfs_free_seqid(largs.lock_seqid);
2829 return status; 3032 return status;
2830} 3033}
2831 3034
@@ -2865,11 +3068,9 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
2865 int status; 3068 int status;
2866 3069
2867 down_read(&clp->cl_sem); 3070 down_read(&clp->cl_sem);
2868 down(&state->lock_sema);
2869 status = nfs4_set_lock_state(state, request); 3071 status = nfs4_set_lock_state(state, request);
2870 if (status == 0) 3072 if (status == 0)
2871 status = _nfs4_do_setlk(state, cmd, request, 0); 3073 status = _nfs4_do_setlk(state, cmd, request, 0);
2872 up(&state->lock_sema);
2873 if (status == 0) { 3074 if (status == 0) {
2874 /* Note: we always want to sleep here! */ 3075 /* Note: we always want to sleep here! */
2875 request->fl_flags |= FL_SLEEP; 3076 request->fl_flags |= FL_SLEEP;
@@ -3024,8 +3225,8 @@ struct nfs_rpc_ops nfs_v4_clientops = {
3024 .read_setup = nfs4_proc_read_setup, 3225 .read_setup = nfs4_proc_read_setup,
3025 .write_setup = nfs4_proc_write_setup, 3226 .write_setup = nfs4_proc_write_setup,
3026 .commit_setup = nfs4_proc_commit_setup, 3227 .commit_setup = nfs4_proc_commit_setup,
3027 .file_open = nfs4_proc_file_open, 3228 .file_open = nfs_open,
3028 .file_release = nfs4_proc_file_release, 3229 .file_release = nfs_release,
3029 .lock = nfs4_proc_lock, 3230 .lock = nfs4_proc_lock,
3030 .clear_acl_cache = nfs4_zap_acl_attr, 3231 .clear_acl_cache = nfs4_zap_acl_attr,
3031}; 3232};
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index afe587d82f1e..2d5a6a2b9dec 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -264,13 +264,16 @@ nfs4_alloc_state_owner(void)
264{ 264{
265 struct nfs4_state_owner *sp; 265 struct nfs4_state_owner *sp;
266 266
267 sp = kmalloc(sizeof(*sp),GFP_KERNEL); 267 sp = kzalloc(sizeof(*sp),GFP_KERNEL);
268 if (!sp) 268 if (!sp)
269 return NULL; 269 return NULL;
270 init_MUTEX(&sp->so_sema); 270 spin_lock_init(&sp->so_lock);
271 sp->so_seqid = 0; /* arbitrary */
272 INIT_LIST_HEAD(&sp->so_states); 271 INIT_LIST_HEAD(&sp->so_states);
273 INIT_LIST_HEAD(&sp->so_delegations); 272 INIT_LIST_HEAD(&sp->so_delegations);
273 rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue");
274 sp->so_seqid.sequence = &sp->so_sequence;
275 spin_lock_init(&sp->so_sequence.lock);
276 INIT_LIST_HEAD(&sp->so_sequence.list);
274 atomic_set(&sp->so_count, 1); 277 atomic_set(&sp->so_count, 1);
275 return sp; 278 return sp;
276} 279}
@@ -359,7 +362,6 @@ nfs4_alloc_open_state(void)
359 memset(state->stateid.data, 0, sizeof(state->stateid.data)); 362 memset(state->stateid.data, 0, sizeof(state->stateid.data));
360 atomic_set(&state->count, 1); 363 atomic_set(&state->count, 1);
361 INIT_LIST_HEAD(&state->lock_states); 364 INIT_LIST_HEAD(&state->lock_states);
362 init_MUTEX(&state->lock_sema);
363 spin_lock_init(&state->state_lock); 365 spin_lock_init(&state->state_lock);
364 return state; 366 return state;
365} 367}
@@ -437,21 +439,23 @@ nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner)
437 if (state) 439 if (state)
438 goto out; 440 goto out;
439 new = nfs4_alloc_open_state(); 441 new = nfs4_alloc_open_state();
442 spin_lock(&owner->so_lock);
440 spin_lock(&inode->i_lock); 443 spin_lock(&inode->i_lock);
441 state = __nfs4_find_state_byowner(inode, owner); 444 state = __nfs4_find_state_byowner(inode, owner);
442 if (state == NULL && new != NULL) { 445 if (state == NULL && new != NULL) {
443 state = new; 446 state = new;
444 /* Caller *must* be holding owner->so_sem */
445 /* Note: The reclaim code dictates that we add stateless
446 * and read-only stateids to the end of the list */
447 list_add_tail(&state->open_states, &owner->so_states);
448 state->owner = owner; 447 state->owner = owner;
449 atomic_inc(&owner->so_count); 448 atomic_inc(&owner->so_count);
450 list_add(&state->inode_states, &nfsi->open_states); 449 list_add(&state->inode_states, &nfsi->open_states);
451 state->inode = igrab(inode); 450 state->inode = igrab(inode);
452 spin_unlock(&inode->i_lock); 451 spin_unlock(&inode->i_lock);
452 /* Note: The reclaim code dictates that we add stateless
453 * and read-only stateids to the end of the list */
454 list_add_tail(&state->open_states, &owner->so_states);
455 spin_unlock(&owner->so_lock);
453 } else { 456 } else {
454 spin_unlock(&inode->i_lock); 457 spin_unlock(&inode->i_lock);
458 spin_unlock(&owner->so_lock);
455 if (new) 459 if (new)
456 nfs4_free_open_state(new); 460 nfs4_free_open_state(new);
457 } 461 }
@@ -461,19 +465,21 @@ out:
461 465
462/* 466/*
463 * Beware! Caller must be holding exactly one 467 * Beware! Caller must be holding exactly one
464 * reference to clp->cl_sem and owner->so_sema! 468 * reference to clp->cl_sem!
465 */ 469 */
466void nfs4_put_open_state(struct nfs4_state *state) 470void nfs4_put_open_state(struct nfs4_state *state)
467{ 471{
468 struct inode *inode = state->inode; 472 struct inode *inode = state->inode;
469 struct nfs4_state_owner *owner = state->owner; 473 struct nfs4_state_owner *owner = state->owner;
470 474
471 if (!atomic_dec_and_lock(&state->count, &inode->i_lock)) 475 if (!atomic_dec_and_lock(&state->count, &owner->so_lock))
472 return; 476 return;
477 spin_lock(&inode->i_lock);
473 if (!list_empty(&state->inode_states)) 478 if (!list_empty(&state->inode_states))
474 list_del(&state->inode_states); 479 list_del(&state->inode_states);
475 spin_unlock(&inode->i_lock);
476 list_del(&state->open_states); 480 list_del(&state->open_states);
481 spin_unlock(&inode->i_lock);
482 spin_unlock(&owner->so_lock);
477 iput(inode); 483 iput(inode);
478 BUG_ON (state->state != 0); 484 BUG_ON (state->state != 0);
479 nfs4_free_open_state(state); 485 nfs4_free_open_state(state);
@@ -481,20 +487,17 @@ void nfs4_put_open_state(struct nfs4_state *state)
481} 487}
482 488
483/* 489/*
484 * Beware! Caller must be holding no references to clp->cl_sem! 490 * Close the current file.
485 * of owner->so_sema!
486 */ 491 */
487void nfs4_close_state(struct nfs4_state *state, mode_t mode) 492void nfs4_close_state(struct nfs4_state *state, mode_t mode)
488{ 493{
489 struct inode *inode = state->inode; 494 struct inode *inode = state->inode;
490 struct nfs4_state_owner *owner = state->owner; 495 struct nfs4_state_owner *owner = state->owner;
491 struct nfs4_client *clp = owner->so_client;
492 int newstate; 496 int newstate;
493 497
494 atomic_inc(&owner->so_count); 498 atomic_inc(&owner->so_count);
495 down_read(&clp->cl_sem);
496 down(&owner->so_sema);
497 /* Protect against nfs4_find_state() */ 499 /* Protect against nfs4_find_state() */
500 spin_lock(&owner->so_lock);
498 spin_lock(&inode->i_lock); 501 spin_lock(&inode->i_lock);
499 if (mode & FMODE_READ) 502 if (mode & FMODE_READ)
500 state->nreaders--; 503 state->nreaders--;
@@ -507,6 +510,7 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode)
507 list_move_tail(&state->open_states, &owner->so_states); 510 list_move_tail(&state->open_states, &owner->so_states);
508 } 511 }
509 spin_unlock(&inode->i_lock); 512 spin_unlock(&inode->i_lock);
513 spin_unlock(&owner->so_lock);
510 newstate = 0; 514 newstate = 0;
511 if (state->state != 0) { 515 if (state->state != 0) {
512 if (state->nreaders) 516 if (state->nreaders)
@@ -515,14 +519,16 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode)
515 newstate |= FMODE_WRITE; 519 newstate |= FMODE_WRITE;
516 if (state->state == newstate) 520 if (state->state == newstate)
517 goto out; 521 goto out;
518 if (nfs4_do_close(inode, state, newstate) == -EINPROGRESS) 522 if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
523 state->state = newstate;
524 goto out;
525 }
526 if (nfs4_do_close(inode, state, newstate) == 0)
519 return; 527 return;
520 } 528 }
521out: 529out:
522 nfs4_put_open_state(state); 530 nfs4_put_open_state(state);
523 up(&owner->so_sema);
524 nfs4_put_state_owner(owner); 531 nfs4_put_state_owner(owner);
525 up_read(&clp->cl_sem);
526} 532}
527 533
528/* 534/*
@@ -546,19 +552,16 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
546 * Return a compatible lock_state. If no initialized lock_state structure 552 * Return a compatible lock_state. If no initialized lock_state structure
547 * exists, return an uninitialized one. 553 * exists, return an uninitialized one.
548 * 554 *
549 * The caller must be holding state->lock_sema
550 */ 555 */
551static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) 556static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
552{ 557{
553 struct nfs4_lock_state *lsp; 558 struct nfs4_lock_state *lsp;
554 struct nfs4_client *clp = state->owner->so_client; 559 struct nfs4_client *clp = state->owner->so_client;
555 560
556 lsp = kmalloc(sizeof(*lsp), GFP_KERNEL); 561 lsp = kzalloc(sizeof(*lsp), GFP_KERNEL);
557 if (lsp == NULL) 562 if (lsp == NULL)
558 return NULL; 563 return NULL;
559 lsp->ls_flags = 0; 564 lsp->ls_seqid.sequence = &state->owner->so_sequence;
560 lsp->ls_seqid = 0; /* arbitrary */
561 memset(lsp->ls_stateid.data, 0, sizeof(lsp->ls_stateid.data));
562 atomic_set(&lsp->ls_count, 1); 565 atomic_set(&lsp->ls_count, 1);
563 lsp->ls_owner = fl_owner; 566 lsp->ls_owner = fl_owner;
564 spin_lock(&clp->cl_lock); 567 spin_lock(&clp->cl_lock);
@@ -572,7 +575,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
572 * Return a compatible lock_state. If no initialized lock_state structure 575 * Return a compatible lock_state. If no initialized lock_state structure
573 * exists, return an uninitialized one. 576 * exists, return an uninitialized one.
574 * 577 *
575 * The caller must be holding state->lock_sema and clp->cl_sem 578 * The caller must be holding clp->cl_sem
576 */ 579 */
577static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner) 580static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
578{ 581{
@@ -605,7 +608,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_
605 * Release reference to lock_state, and free it if we see that 608 * Release reference to lock_state, and free it if we see that
606 * it is no longer in use 609 * it is no longer in use
607 */ 610 */
608static void nfs4_put_lock_state(struct nfs4_lock_state *lsp) 611void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
609{ 612{
610 struct nfs4_state *state; 613 struct nfs4_state *state;
611 614
@@ -673,29 +676,94 @@ void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t f
673 nfs4_put_lock_state(lsp); 676 nfs4_put_lock_state(lsp);
674} 677}
675 678
676/* 679struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter)
677* Called with state->lock_sema and clp->cl_sem held.
678*/
679void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp)
680{ 680{
681 if (status == NFS_OK || seqid_mutating_err(-status)) 681 struct nfs_seqid *new;
682 lsp->ls_seqid++; 682
683 new = kmalloc(sizeof(*new), GFP_KERNEL);
684 if (new != NULL) {
685 new->sequence = counter;
686 INIT_LIST_HEAD(&new->list);
687 }
688 return new;
689}
690
691void nfs_free_seqid(struct nfs_seqid *seqid)
692{
693 struct rpc_sequence *sequence = seqid->sequence->sequence;
694
695 if (!list_empty(&seqid->list)) {
696 spin_lock(&sequence->lock);
697 list_del(&seqid->list);
698 spin_unlock(&sequence->lock);
699 }
700 rpc_wake_up_next(&sequence->wait);
701 kfree(seqid);
683} 702}
684 703
685/* 704/*
686* Called with sp->so_sema and clp->cl_sem held. 705 * Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or
687* 706 * failed with a seqid incrementing error -
688* Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or 707 * see comments nfs_fs.h:seqid_mutating_error()
689* failed with a seqid incrementing error - 708 */
690* see comments nfs_fs.h:seqid_mutating_error() 709static inline void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
691*/ 710{
692void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp) 711 switch (status) {
693{ 712 case 0:
694 if (status == NFS_OK || seqid_mutating_err(-status)) 713 break;
695 sp->so_seqid++; 714 case -NFS4ERR_BAD_SEQID:
696 /* If the server returns BAD_SEQID, unhash state_owner here */ 715 case -NFS4ERR_STALE_CLIENTID:
697 if (status == -NFS4ERR_BAD_SEQID) 716 case -NFS4ERR_STALE_STATEID:
717 case -NFS4ERR_BAD_STATEID:
718 case -NFS4ERR_BADXDR:
719 case -NFS4ERR_RESOURCE:
720 case -NFS4ERR_NOFILEHANDLE:
721 /* Non-seqid mutating errors */
722 return;
723 };
724 /*
725 * Note: no locking needed as we are guaranteed to be first
726 * on the sequence list
727 */
728 seqid->sequence->counter++;
729}
730
731void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
732{
733 if (status == -NFS4ERR_BAD_SEQID) {
734 struct nfs4_state_owner *sp = container_of(seqid->sequence,
735 struct nfs4_state_owner, so_seqid);
698 nfs4_drop_state_owner(sp); 736 nfs4_drop_state_owner(sp);
737 }
738 return nfs_increment_seqid(status, seqid);
739}
740
741/*
742 * Increment the seqid if the LOCK/LOCKU succeeded, or
743 * failed with a seqid incrementing error -
744 * see comments nfs_fs.h:seqid_mutating_error()
745 */
746void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid)
747{
748 return nfs_increment_seqid(status, seqid);
749}
750
751int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task)
752{
753 struct rpc_sequence *sequence = seqid->sequence->sequence;
754 int status = 0;
755
756 if (sequence->list.next == &seqid->list)
757 goto out;
758 spin_lock(&sequence->lock);
759 if (!list_empty(&sequence->list)) {
760 rpc_sleep_on(&sequence->wait, task, NULL, NULL);
761 status = -EAGAIN;
762 } else
763 list_add(&seqid->list, &sequence->list);
764 spin_unlock(&sequence->lock);
765out:
766 return status;
699} 767}
700 768
701static int reclaimer(void *); 769static int reclaimer(void *);
@@ -791,8 +859,6 @@ static int nfs4_reclaim_open_state(struct nfs4_state_recovery_ops *ops, struct n
791 if (state->state == 0) 859 if (state->state == 0)
792 continue; 860 continue;
793 status = ops->recover_open(sp, state); 861 status = ops->recover_open(sp, state);
794 list_for_each_entry(lock, &state->lock_states, ls_locks)
795 lock->ls_flags &= ~NFS_LOCK_INITIALIZED;
796 if (status >= 0) { 862 if (status >= 0) {
797 status = nfs4_reclaim_locks(ops, state); 863 status = nfs4_reclaim_locks(ops, state);
798 if (status < 0) 864 if (status < 0)
@@ -831,6 +897,28 @@ out_err:
831 return status; 897 return status;
832} 898}
833 899
900static void nfs4_state_mark_reclaim(struct nfs4_client *clp)
901{
902 struct nfs4_state_owner *sp;
903 struct nfs4_state *state;
904 struct nfs4_lock_state *lock;
905
906 /* Reset all sequence ids to zero */
907 list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
908 sp->so_seqid.counter = 0;
909 sp->so_seqid.flags = 0;
910 spin_lock(&sp->so_lock);
911 list_for_each_entry(state, &sp->so_states, open_states) {
912 list_for_each_entry(lock, &state->lock_states, ls_locks) {
913 lock->ls_seqid.counter = 0;
914 lock->ls_seqid.flags = 0;
915 lock->ls_flags &= ~NFS_LOCK_INITIALIZED;
916 }
917 }
918 spin_unlock(&sp->so_lock);
919 }
920}
921
834static int reclaimer(void *ptr) 922static int reclaimer(void *ptr)
835{ 923{
836 struct reclaimer_args *args = (struct reclaimer_args *)ptr; 924 struct reclaimer_args *args = (struct reclaimer_args *)ptr;
@@ -864,6 +952,7 @@ restart_loop:
864 default: 952 default:
865 ops = &nfs4_network_partition_recovery_ops; 953 ops = &nfs4_network_partition_recovery_ops;
866 }; 954 };
955 nfs4_state_mark_reclaim(clp);
867 status = __nfs4_init_client(clp); 956 status = __nfs4_init_client(clp);
868 if (status) 957 if (status)
869 goto out_error; 958 goto out_error;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 6c564ef9489e..fbbace8a30c4 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -95,6 +95,8 @@ static int nfs_stat_to_errno(int);
95#define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz) 95#define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz)
96#define encode_savefh_maxsz (op_encode_hdr_maxsz) 96#define encode_savefh_maxsz (op_encode_hdr_maxsz)
97#define decode_savefh_maxsz (op_decode_hdr_maxsz) 97#define decode_savefh_maxsz (op_decode_hdr_maxsz)
98#define encode_restorefh_maxsz (op_encode_hdr_maxsz)
99#define decode_restorefh_maxsz (op_decode_hdr_maxsz)
98#define encode_fsinfo_maxsz (op_encode_hdr_maxsz + 2) 100#define encode_fsinfo_maxsz (op_encode_hdr_maxsz + 2)
99#define decode_fsinfo_maxsz (op_decode_hdr_maxsz + 11) 101#define decode_fsinfo_maxsz (op_decode_hdr_maxsz + 11)
100#define encode_renew_maxsz (op_encode_hdr_maxsz + 3) 102#define encode_renew_maxsz (op_encode_hdr_maxsz + 3)
@@ -157,16 +159,20 @@ static int nfs_stat_to_errno(int);
157 op_decode_hdr_maxsz + 2) 159 op_decode_hdr_maxsz + 2)
158#define NFS4_enc_write_sz (compound_encode_hdr_maxsz + \ 160#define NFS4_enc_write_sz (compound_encode_hdr_maxsz + \
159 encode_putfh_maxsz + \ 161 encode_putfh_maxsz + \
160 op_encode_hdr_maxsz + 8) 162 op_encode_hdr_maxsz + 8 + \
163 encode_getattr_maxsz)
161#define NFS4_dec_write_sz (compound_decode_hdr_maxsz + \ 164#define NFS4_dec_write_sz (compound_decode_hdr_maxsz + \
162 decode_putfh_maxsz + \ 165 decode_putfh_maxsz + \
163 op_decode_hdr_maxsz + 4) 166 op_decode_hdr_maxsz + 4 + \
167 decode_getattr_maxsz)
164#define NFS4_enc_commit_sz (compound_encode_hdr_maxsz + \ 168#define NFS4_enc_commit_sz (compound_encode_hdr_maxsz + \
165 encode_putfh_maxsz + \ 169 encode_putfh_maxsz + \
166 op_encode_hdr_maxsz + 3) 170 op_encode_hdr_maxsz + 3 + \
171 encode_getattr_maxsz)
167#define NFS4_dec_commit_sz (compound_decode_hdr_maxsz + \ 172#define NFS4_dec_commit_sz (compound_decode_hdr_maxsz + \
168 decode_putfh_maxsz + \ 173 decode_putfh_maxsz + \
169 op_decode_hdr_maxsz + 2) 174 op_decode_hdr_maxsz + 2 + \
175 decode_getattr_maxsz)
170#define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \ 176#define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \
171 encode_putfh_maxsz + \ 177 encode_putfh_maxsz + \
172 op_encode_hdr_maxsz + \ 178 op_encode_hdr_maxsz + \
@@ -196,17 +202,21 @@ static int nfs_stat_to_errno(int);
196#define NFS4_enc_open_downgrade_sz \ 202#define NFS4_enc_open_downgrade_sz \
197 (compound_encode_hdr_maxsz + \ 203 (compound_encode_hdr_maxsz + \
198 encode_putfh_maxsz + \ 204 encode_putfh_maxsz + \
199 op_encode_hdr_maxsz + 7) 205 op_encode_hdr_maxsz + 7 + \
206 encode_getattr_maxsz)
200#define NFS4_dec_open_downgrade_sz \ 207#define NFS4_dec_open_downgrade_sz \
201 (compound_decode_hdr_maxsz + \ 208 (compound_decode_hdr_maxsz + \
202 decode_putfh_maxsz + \ 209 decode_putfh_maxsz + \
203 op_decode_hdr_maxsz + 4) 210 op_decode_hdr_maxsz + 4 + \
211 decode_getattr_maxsz)
204#define NFS4_enc_close_sz (compound_encode_hdr_maxsz + \ 212#define NFS4_enc_close_sz (compound_encode_hdr_maxsz + \
205 encode_putfh_maxsz + \ 213 encode_putfh_maxsz + \
206 op_encode_hdr_maxsz + 5) 214 op_encode_hdr_maxsz + 5 + \
215 encode_getattr_maxsz)
207#define NFS4_dec_close_sz (compound_decode_hdr_maxsz + \ 216#define NFS4_dec_close_sz (compound_decode_hdr_maxsz + \
208 decode_putfh_maxsz + \ 217 decode_putfh_maxsz + \
209 op_decode_hdr_maxsz + 4) 218 op_decode_hdr_maxsz + 4 + \
219 decode_getattr_maxsz)
210#define NFS4_enc_setattr_sz (compound_encode_hdr_maxsz + \ 220#define NFS4_enc_setattr_sz (compound_encode_hdr_maxsz + \
211 encode_putfh_maxsz + \ 221 encode_putfh_maxsz + \
212 op_encode_hdr_maxsz + 4 + \ 222 op_encode_hdr_maxsz + 4 + \
@@ -300,30 +310,44 @@ static int nfs_stat_to_errno(int);
300 decode_getfh_maxsz) 310 decode_getfh_maxsz)
301#define NFS4_enc_remove_sz (compound_encode_hdr_maxsz + \ 311#define NFS4_enc_remove_sz (compound_encode_hdr_maxsz + \
302 encode_putfh_maxsz + \ 312 encode_putfh_maxsz + \
303 encode_remove_maxsz) 313 encode_remove_maxsz + \
314 encode_getattr_maxsz)
304#define NFS4_dec_remove_sz (compound_decode_hdr_maxsz + \ 315#define NFS4_dec_remove_sz (compound_decode_hdr_maxsz + \
305 decode_putfh_maxsz + \ 316 decode_putfh_maxsz + \
306 op_decode_hdr_maxsz + 5) 317 op_decode_hdr_maxsz + 5 + \
318 decode_getattr_maxsz)
307#define NFS4_enc_rename_sz (compound_encode_hdr_maxsz + \ 319#define NFS4_enc_rename_sz (compound_encode_hdr_maxsz + \
308 encode_putfh_maxsz + \ 320 encode_putfh_maxsz + \
309 encode_savefh_maxsz + \ 321 encode_savefh_maxsz + \
310 encode_putfh_maxsz + \ 322 encode_putfh_maxsz + \
311 encode_rename_maxsz) 323 encode_rename_maxsz + \
324 encode_getattr_maxsz + \
325 encode_restorefh_maxsz + \
326 encode_getattr_maxsz)
312#define NFS4_dec_rename_sz (compound_decode_hdr_maxsz + \ 327#define NFS4_dec_rename_sz (compound_decode_hdr_maxsz + \
313 decode_putfh_maxsz + \ 328 decode_putfh_maxsz + \
314 decode_savefh_maxsz + \ 329 decode_savefh_maxsz + \
315 decode_putfh_maxsz + \ 330 decode_putfh_maxsz + \
316 decode_rename_maxsz) 331 decode_rename_maxsz + \
332 decode_getattr_maxsz + \
333 decode_restorefh_maxsz + \
334 decode_getattr_maxsz)
317#define NFS4_enc_link_sz (compound_encode_hdr_maxsz + \ 335#define NFS4_enc_link_sz (compound_encode_hdr_maxsz + \
318 encode_putfh_maxsz + \ 336 encode_putfh_maxsz + \
319 encode_savefh_maxsz + \ 337 encode_savefh_maxsz + \
320 encode_putfh_maxsz + \ 338 encode_putfh_maxsz + \
321 encode_link_maxsz) 339 encode_link_maxsz + \
340 decode_getattr_maxsz + \
341 encode_restorefh_maxsz + \
342 decode_getattr_maxsz)
322#define NFS4_dec_link_sz (compound_decode_hdr_maxsz + \ 343#define NFS4_dec_link_sz (compound_decode_hdr_maxsz + \
323 decode_putfh_maxsz + \ 344 decode_putfh_maxsz + \
324 decode_savefh_maxsz + \ 345 decode_savefh_maxsz + \
325 decode_putfh_maxsz + \ 346 decode_putfh_maxsz + \
326 decode_link_maxsz) 347 decode_link_maxsz + \
348 decode_getattr_maxsz + \
349 decode_restorefh_maxsz + \
350 decode_getattr_maxsz)
327#define NFS4_enc_symlink_sz (compound_encode_hdr_maxsz + \ 351#define NFS4_enc_symlink_sz (compound_encode_hdr_maxsz + \
328 encode_putfh_maxsz + \ 352 encode_putfh_maxsz + \
329 encode_symlink_maxsz + \ 353 encode_symlink_maxsz + \
@@ -336,14 +360,20 @@ static int nfs_stat_to_errno(int);
336 decode_getfh_maxsz) 360 decode_getfh_maxsz)
337#define NFS4_enc_create_sz (compound_encode_hdr_maxsz + \ 361#define NFS4_enc_create_sz (compound_encode_hdr_maxsz + \
338 encode_putfh_maxsz + \ 362 encode_putfh_maxsz + \
363 encode_savefh_maxsz + \
339 encode_create_maxsz + \ 364 encode_create_maxsz + \
365 encode_getfh_maxsz + \
340 encode_getattr_maxsz + \ 366 encode_getattr_maxsz + \
341 encode_getfh_maxsz) 367 encode_restorefh_maxsz + \
368 encode_getattr_maxsz)
342#define NFS4_dec_create_sz (compound_decode_hdr_maxsz + \ 369#define NFS4_dec_create_sz (compound_decode_hdr_maxsz + \
343 decode_putfh_maxsz + \ 370 decode_putfh_maxsz + \
371 decode_savefh_maxsz + \
344 decode_create_maxsz + \ 372 decode_create_maxsz + \
373 decode_getfh_maxsz + \
345 decode_getattr_maxsz + \ 374 decode_getattr_maxsz + \
346 decode_getfh_maxsz) 375 decode_restorefh_maxsz + \
376 decode_getattr_maxsz)
347#define NFS4_enc_pathconf_sz (compound_encode_hdr_maxsz + \ 377#define NFS4_enc_pathconf_sz (compound_encode_hdr_maxsz + \
348 encode_putfh_maxsz + \ 378 encode_putfh_maxsz + \
349 encode_getattr_maxsz) 379 encode_getattr_maxsz)
@@ -602,10 +632,10 @@ static int encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg)
602{ 632{
603 uint32_t *p; 633 uint32_t *p;
604 634
605 RESERVE_SPACE(8+sizeof(arg->stateid.data)); 635 RESERVE_SPACE(8+sizeof(arg->stateid->data));
606 WRITE32(OP_CLOSE); 636 WRITE32(OP_CLOSE);
607 WRITE32(arg->seqid); 637 WRITE32(arg->seqid->sequence->counter);
608 WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); 638 WRITEMEM(arg->stateid->data, sizeof(arg->stateid->data));
609 639
610 return 0; 640 return 0;
611} 641}
@@ -729,22 +759,18 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lockargs *arg)
729 WRITE64(arg->length); 759 WRITE64(arg->length);
730 WRITE32(opargs->new_lock_owner); 760 WRITE32(opargs->new_lock_owner);
731 if (opargs->new_lock_owner){ 761 if (opargs->new_lock_owner){
732 struct nfs_open_to_lock *ol = opargs->u.open_lock;
733
734 RESERVE_SPACE(40); 762 RESERVE_SPACE(40);
735 WRITE32(ol->open_seqid); 763 WRITE32(opargs->open_seqid->sequence->counter);
736 WRITEMEM(&ol->open_stateid, sizeof(ol->open_stateid)); 764 WRITEMEM(opargs->open_stateid->data, sizeof(opargs->open_stateid->data));
737 WRITE32(ol->lock_seqid); 765 WRITE32(opargs->lock_seqid->sequence->counter);
738 WRITE64(ol->lock_owner.clientid); 766 WRITE64(opargs->lock_owner.clientid);
739 WRITE32(4); 767 WRITE32(4);
740 WRITE32(ol->lock_owner.id); 768 WRITE32(opargs->lock_owner.id);
741 } 769 }
742 else { 770 else {
743 struct nfs_exist_lock *el = opargs->u.exist_lock;
744
745 RESERVE_SPACE(20); 771 RESERVE_SPACE(20);
746 WRITEMEM(&el->stateid, sizeof(el->stateid)); 772 WRITEMEM(opargs->lock_stateid->data, sizeof(opargs->lock_stateid->data));
747 WRITE32(el->seqid); 773 WRITE32(opargs->lock_seqid->sequence->counter);
748 } 774 }
749 775
750 return 0; 776 return 0;
@@ -775,8 +801,8 @@ static int encode_locku(struct xdr_stream *xdr, const struct nfs_lockargs *arg)
775 RESERVE_SPACE(44); 801 RESERVE_SPACE(44);
776 WRITE32(OP_LOCKU); 802 WRITE32(OP_LOCKU);
777 WRITE32(arg->type); 803 WRITE32(arg->type);
778 WRITE32(opargs->seqid); 804 WRITE32(opargs->seqid->sequence->counter);
779 WRITEMEM(&opargs->stateid, sizeof(opargs->stateid)); 805 WRITEMEM(opargs->stateid->data, sizeof(opargs->stateid->data));
780 WRITE64(arg->offset); 806 WRITE64(arg->offset);
781 WRITE64(arg->length); 807 WRITE64(arg->length);
782 808
@@ -826,7 +852,7 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
826 */ 852 */
827 RESERVE_SPACE(8); 853 RESERVE_SPACE(8);
828 WRITE32(OP_OPEN); 854 WRITE32(OP_OPEN);
829 WRITE32(arg->seqid); 855 WRITE32(arg->seqid->sequence->counter);
830 encode_share_access(xdr, arg->open_flags); 856 encode_share_access(xdr, arg->open_flags);
831 RESERVE_SPACE(16); 857 RESERVE_SPACE(16);
832 WRITE64(arg->clientid); 858 WRITE64(arg->clientid);
@@ -941,7 +967,7 @@ static int encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_con
941 RESERVE_SPACE(8+sizeof(arg->stateid.data)); 967 RESERVE_SPACE(8+sizeof(arg->stateid.data));
942 WRITE32(OP_OPEN_CONFIRM); 968 WRITE32(OP_OPEN_CONFIRM);
943 WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); 969 WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data));
944 WRITE32(arg->seqid); 970 WRITE32(arg->seqid->sequence->counter);
945 971
946 return 0; 972 return 0;
947} 973}
@@ -950,10 +976,10 @@ static int encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closea
950{ 976{
951 uint32_t *p; 977 uint32_t *p;
952 978
953 RESERVE_SPACE(8+sizeof(arg->stateid.data)); 979 RESERVE_SPACE(8+sizeof(arg->stateid->data));
954 WRITE32(OP_OPEN_DOWNGRADE); 980 WRITE32(OP_OPEN_DOWNGRADE);
955 WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); 981 WRITEMEM(arg->stateid->data, sizeof(arg->stateid->data));
956 WRITE32(arg->seqid); 982 WRITE32(arg->seqid->sequence->counter);
957 encode_share_access(xdr, arg->open_flags); 983 encode_share_access(xdr, arg->open_flags);
958 return 0; 984 return 0;
959} 985}
@@ -1117,6 +1143,17 @@ static int encode_renew(struct xdr_stream *xdr, const struct nfs4_client *client
1117} 1143}
1118 1144
1119static int 1145static int
1146encode_restorefh(struct xdr_stream *xdr)
1147{
1148 uint32_t *p;
1149
1150 RESERVE_SPACE(4);
1151 WRITE32(OP_RESTOREFH);
1152
1153 return 0;
1154}
1155
1156static int
1120encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg) 1157encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg)
1121{ 1158{
1122 uint32_t *p; 1159 uint32_t *p;
@@ -1296,14 +1333,18 @@ static int nfs4_xdr_enc_remove(struct rpc_rqst *req, uint32_t *p, const struct n
1296{ 1333{
1297 struct xdr_stream xdr; 1334 struct xdr_stream xdr;
1298 struct compound_hdr hdr = { 1335 struct compound_hdr hdr = {
1299 .nops = 2, 1336 .nops = 3,
1300 }; 1337 };
1301 int status; 1338 int status;
1302 1339
1303 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1340 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1304 encode_compound_hdr(&xdr, &hdr); 1341 encode_compound_hdr(&xdr, &hdr);
1305 if ((status = encode_putfh(&xdr, args->fh)) == 0) 1342 if ((status = encode_putfh(&xdr, args->fh)) != 0)
1306 status = encode_remove(&xdr, args->name); 1343 goto out;
1344 if ((status = encode_remove(&xdr, args->name)) != 0)
1345 goto out;
1346 status = encode_getfattr(&xdr, args->bitmask);
1347out:
1307 return status; 1348 return status;
1308} 1349}
1309 1350
@@ -1314,7 +1355,7 @@ static int nfs4_xdr_enc_rename(struct rpc_rqst *req, uint32_t *p, const struct n
1314{ 1355{
1315 struct xdr_stream xdr; 1356 struct xdr_stream xdr;
1316 struct compound_hdr hdr = { 1357 struct compound_hdr hdr = {
1317 .nops = 4, 1358 .nops = 7,
1318 }; 1359 };
1319 int status; 1360 int status;
1320 1361
@@ -1326,7 +1367,13 @@ static int nfs4_xdr_enc_rename(struct rpc_rqst *req, uint32_t *p, const struct n
1326 goto out; 1367 goto out;
1327 if ((status = encode_putfh(&xdr, args->new_dir)) != 0) 1368 if ((status = encode_putfh(&xdr, args->new_dir)) != 0)
1328 goto out; 1369 goto out;
1329 status = encode_rename(&xdr, args->old_name, args->new_name); 1370 if ((status = encode_rename(&xdr, args->old_name, args->new_name)) != 0)
1371 goto out;
1372 if ((status = encode_getfattr(&xdr, args->bitmask)) != 0)
1373 goto out;
1374 if ((status = encode_restorefh(&xdr)) != 0)
1375 goto out;
1376 status = encode_getfattr(&xdr, args->bitmask);
1330out: 1377out:
1331 return status; 1378 return status;
1332} 1379}
@@ -1338,7 +1385,7 @@ static int nfs4_xdr_enc_link(struct rpc_rqst *req, uint32_t *p, const struct nfs
1338{ 1385{
1339 struct xdr_stream xdr; 1386 struct xdr_stream xdr;
1340 struct compound_hdr hdr = { 1387 struct compound_hdr hdr = {
1341 .nops = 4, 1388 .nops = 7,
1342 }; 1389 };
1343 int status; 1390 int status;
1344 1391
@@ -1350,7 +1397,13 @@ static int nfs4_xdr_enc_link(struct rpc_rqst *req, uint32_t *p, const struct nfs
1350 goto out; 1397 goto out;
1351 if ((status = encode_putfh(&xdr, args->dir_fh)) != 0) 1398 if ((status = encode_putfh(&xdr, args->dir_fh)) != 0)
1352 goto out; 1399 goto out;
1353 status = encode_link(&xdr, args->name); 1400 if ((status = encode_link(&xdr, args->name)) != 0)
1401 goto out;
1402 if ((status = encode_getfattr(&xdr, args->bitmask)) != 0)
1403 goto out;
1404 if ((status = encode_restorefh(&xdr)) != 0)
1405 goto out;
1406 status = encode_getfattr(&xdr, args->bitmask);
1354out: 1407out:
1355 return status; 1408 return status;
1356} 1409}
@@ -1362,7 +1415,7 @@ static int nfs4_xdr_enc_create(struct rpc_rqst *req, uint32_t *p, const struct n
1362{ 1415{
1363 struct xdr_stream xdr; 1416 struct xdr_stream xdr;
1364 struct compound_hdr hdr = { 1417 struct compound_hdr hdr = {
1365 .nops = 4, 1418 .nops = 7,
1366 }; 1419 };
1367 int status; 1420 int status;
1368 1421
@@ -1370,10 +1423,16 @@ static int nfs4_xdr_enc_create(struct rpc_rqst *req, uint32_t *p, const struct n
1370 encode_compound_hdr(&xdr, &hdr); 1423 encode_compound_hdr(&xdr, &hdr);
1371 if ((status = encode_putfh(&xdr, args->dir_fh)) != 0) 1424 if ((status = encode_putfh(&xdr, args->dir_fh)) != 0)
1372 goto out; 1425 goto out;
1426 if ((status = encode_savefh(&xdr)) != 0)
1427 goto out;
1373 if ((status = encode_create(&xdr, args)) != 0) 1428 if ((status = encode_create(&xdr, args)) != 0)
1374 goto out; 1429 goto out;
1375 if ((status = encode_getfh(&xdr)) != 0) 1430 if ((status = encode_getfh(&xdr)) != 0)
1376 goto out; 1431 goto out;
1432 if ((status = encode_getfattr(&xdr, args->bitmask)) != 0)
1433 goto out;
1434 if ((status = encode_restorefh(&xdr)) != 0)
1435 goto out;
1377 status = encode_getfattr(&xdr, args->bitmask); 1436 status = encode_getfattr(&xdr, args->bitmask);
1378out: 1437out:
1379 return status; 1438 return status;
@@ -1412,7 +1471,7 @@ static int nfs4_xdr_enc_close(struct rpc_rqst *req, uint32_t *p, struct nfs_clos
1412{ 1471{
1413 struct xdr_stream xdr; 1472 struct xdr_stream xdr;
1414 struct compound_hdr hdr = { 1473 struct compound_hdr hdr = {
1415 .nops = 2, 1474 .nops = 3,
1416 }; 1475 };
1417 int status; 1476 int status;
1418 1477
@@ -1422,6 +1481,9 @@ static int nfs4_xdr_enc_close(struct rpc_rqst *req, uint32_t *p, struct nfs_clos
1422 if(status) 1481 if(status)
1423 goto out; 1482 goto out;
1424 status = encode_close(&xdr, args); 1483 status = encode_close(&xdr, args);
1484 if (status != 0)
1485 goto out;
1486 status = encode_getfattr(&xdr, args->bitmask);
1425out: 1487out:
1426 return status; 1488 return status;
1427} 1489}
@@ -1433,15 +1495,21 @@ static int nfs4_xdr_enc_open(struct rpc_rqst *req, uint32_t *p, struct nfs_opena
1433{ 1495{
1434 struct xdr_stream xdr; 1496 struct xdr_stream xdr;
1435 struct compound_hdr hdr = { 1497 struct compound_hdr hdr = {
1436 .nops = 4, 1498 .nops = 7,
1437 }; 1499 };
1438 int status; 1500 int status;
1439 1501
1502 status = nfs_wait_on_sequence(args->seqid, req->rq_task);
1503 if (status != 0)
1504 goto out;
1440 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1505 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1441 encode_compound_hdr(&xdr, &hdr); 1506 encode_compound_hdr(&xdr, &hdr);
1442 status = encode_putfh(&xdr, args->fh); 1507 status = encode_putfh(&xdr, args->fh);
1443 if (status) 1508 if (status)
1444 goto out; 1509 goto out;
1510 status = encode_savefh(&xdr);
1511 if (status)
1512 goto out;
1445 status = encode_open(&xdr, args); 1513 status = encode_open(&xdr, args);
1446 if (status) 1514 if (status)
1447 goto out; 1515 goto out;
@@ -1449,6 +1517,12 @@ static int nfs4_xdr_enc_open(struct rpc_rqst *req, uint32_t *p, struct nfs_opena
1449 if (status) 1517 if (status)
1450 goto out; 1518 goto out;
1451 status = encode_getfattr(&xdr, args->bitmask); 1519 status = encode_getfattr(&xdr, args->bitmask);
1520 if (status)
1521 goto out;
1522 status = encode_restorefh(&xdr);
1523 if (status)
1524 goto out;
1525 status = encode_getfattr(&xdr, args->bitmask);
1452out: 1526out:
1453 return status; 1527 return status;
1454} 1528}
@@ -1464,6 +1538,9 @@ static int nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, uint32_t *p, struct n
1464 }; 1538 };
1465 int status; 1539 int status;
1466 1540
1541 status = nfs_wait_on_sequence(args->seqid, req->rq_task);
1542 if (status != 0)
1543 goto out;
1467 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1544 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1468 encode_compound_hdr(&xdr, &hdr); 1545 encode_compound_hdr(&xdr, &hdr);
1469 status = encode_putfh(&xdr, args->fh); 1546 status = encode_putfh(&xdr, args->fh);
@@ -1485,6 +1562,9 @@ static int nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, uint32_t *p, struct nf
1485 }; 1562 };
1486 int status; 1563 int status;
1487 1564
1565 status = nfs_wait_on_sequence(args->seqid, req->rq_task);
1566 if (status != 0)
1567 goto out;
1488 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1568 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1489 encode_compound_hdr(&xdr, &hdr); 1569 encode_compound_hdr(&xdr, &hdr);
1490 status = encode_putfh(&xdr, args->fh); 1570 status = encode_putfh(&xdr, args->fh);
@@ -1502,7 +1582,7 @@ static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, uint32_t *p, struct
1502{ 1582{
1503 struct xdr_stream xdr; 1583 struct xdr_stream xdr;
1504 struct compound_hdr hdr = { 1584 struct compound_hdr hdr = {
1505 .nops = 2, 1585 .nops = 3,
1506 }; 1586 };
1507 int status; 1587 int status;
1508 1588
@@ -1512,6 +1592,9 @@ static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, uint32_t *p, struct
1512 if (status) 1592 if (status)
1513 goto out; 1593 goto out;
1514 status = encode_open_downgrade(&xdr, args); 1594 status = encode_open_downgrade(&xdr, args);
1595 if (status != 0)
1596 goto out;
1597 status = encode_getfattr(&xdr, args->bitmask);
1515out: 1598out:
1516 return status; 1599 return status;
1517} 1600}
@@ -1525,8 +1608,15 @@ static int nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_locka
1525 struct compound_hdr hdr = { 1608 struct compound_hdr hdr = {
1526 .nops = 2, 1609 .nops = 2,
1527 }; 1610 };
1611 struct nfs_lock_opargs *opargs = args->u.lock;
1528 int status; 1612 int status;
1529 1613
1614 status = nfs_wait_on_sequence(opargs->lock_seqid, req->rq_task);
1615 if (status != 0)
1616 goto out;
1617 /* Do we need to do an open_to_lock_owner? */
1618 if (opargs->lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)
1619 opargs->new_lock_owner = 0;
1530 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1620 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1531 encode_compound_hdr(&xdr, &hdr); 1621 encode_compound_hdr(&xdr, &hdr);
1532 status = encode_putfh(&xdr, args->fh); 1622 status = encode_putfh(&xdr, args->fh);
@@ -1713,7 +1803,7 @@ static int nfs4_xdr_enc_write(struct rpc_rqst *req, uint32_t *p, struct nfs_writ
1713{ 1803{
1714 struct xdr_stream xdr; 1804 struct xdr_stream xdr;
1715 struct compound_hdr hdr = { 1805 struct compound_hdr hdr = {
1716 .nops = 2, 1806 .nops = 3,
1717 }; 1807 };
1718 int status; 1808 int status;
1719 1809
@@ -1723,6 +1813,9 @@ static int nfs4_xdr_enc_write(struct rpc_rqst *req, uint32_t *p, struct nfs_writ
1723 if (status) 1813 if (status)
1724 goto out; 1814 goto out;
1725 status = encode_write(&xdr, args); 1815 status = encode_write(&xdr, args);
1816 if (status)
1817 goto out;
1818 status = encode_getfattr(&xdr, args->bitmask);
1726out: 1819out:
1727 return status; 1820 return status;
1728} 1821}
@@ -1734,7 +1827,7 @@ static int nfs4_xdr_enc_commit(struct rpc_rqst *req, uint32_t *p, struct nfs_wri
1734{ 1827{
1735 struct xdr_stream xdr; 1828 struct xdr_stream xdr;
1736 struct compound_hdr hdr = { 1829 struct compound_hdr hdr = {
1737 .nops = 2, 1830 .nops = 3,
1738 }; 1831 };
1739 int status; 1832 int status;
1740 1833
@@ -1744,6 +1837,9 @@ static int nfs4_xdr_enc_commit(struct rpc_rqst *req, uint32_t *p, struct nfs_wri
1744 if (status) 1837 if (status)
1745 goto out; 1838 goto out;
1746 status = encode_commit(&xdr, args); 1839 status = encode_commit(&xdr, args);
1840 if (status)
1841 goto out;
1842 status = encode_getfattr(&xdr, args->bitmask);
1747out: 1843out:
1748 return status; 1844 return status;
1749} 1845}
@@ -2670,8 +2766,7 @@ static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_re
2670 goto xdr_error; 2766 goto xdr_error;
2671 status = verify_attr_len(xdr, savep, attrlen); 2767 status = verify_attr_len(xdr, savep, attrlen);
2672xdr_error: 2768xdr_error:
2673 if (status != 0) 2769 dprintk("%s: xdr returned %d!\n", __FUNCTION__, -status);
2674 printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status);
2675 return status; 2770 return status;
2676} 2771}
2677 2772
@@ -2704,8 +2799,7 @@ static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat)
2704 2799
2705 status = verify_attr_len(xdr, savep, attrlen); 2800 status = verify_attr_len(xdr, savep, attrlen);
2706xdr_error: 2801xdr_error:
2707 if (status != 0) 2802 dprintk("%s: xdr returned %d!\n", __FUNCTION__, -status);
2708 printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status);
2709 return status; 2803 return status;
2710} 2804}
2711 2805
@@ -2730,8 +2824,7 @@ static int decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf
2730 2824
2731 status = verify_attr_len(xdr, savep, attrlen); 2825 status = verify_attr_len(xdr, savep, attrlen);
2732xdr_error: 2826xdr_error:
2733 if (status != 0) 2827 dprintk("%s: xdr returned %d!\n", __FUNCTION__, -status);
2734 printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status);
2735 return status; 2828 return status;
2736} 2829}
2737 2830
@@ -2787,13 +2880,10 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons
2787 goto xdr_error; 2880 goto xdr_error;
2788 if ((status = decode_attr_time_modify(xdr, bitmap, &fattr->mtime)) != 0) 2881 if ((status = decode_attr_time_modify(xdr, bitmap, &fattr->mtime)) != 0)
2789 goto xdr_error; 2882 goto xdr_error;
2790 if ((status = verify_attr_len(xdr, savep, attrlen)) == 0) { 2883 if ((status = verify_attr_len(xdr, savep, attrlen)) == 0)
2791 fattr->valid = NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4; 2884 fattr->valid = NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4;
2792 fattr->timestamp = jiffies;
2793 }
2794xdr_error: 2885xdr_error:
2795 if (status != 0) 2886 dprintk("%s: xdr returned %d\n", __FUNCTION__, -status);
2796 printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status);
2797 return status; 2887 return status;
2798} 2888}
2799 2889
@@ -2826,8 +2916,7 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
2826 2916
2827 status = verify_attr_len(xdr, savep, attrlen); 2917 status = verify_attr_len(xdr, savep, attrlen);
2828xdr_error: 2918xdr_error:
2829 if (status != 0) 2919 dprintk("%s: xdr returned %d!\n", __FUNCTION__, -status);
2830 printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status);
2831 return status; 2920 return status;
2832} 2921}
2833 2922
@@ -2890,8 +2979,8 @@ static int decode_lock(struct xdr_stream *xdr, struct nfs_lockres *res)
2890 2979
2891 status = decode_op_hdr(xdr, OP_LOCK); 2980 status = decode_op_hdr(xdr, OP_LOCK);
2892 if (status == 0) { 2981 if (status == 0) {
2893 READ_BUF(sizeof(nfs4_stateid)); 2982 READ_BUF(sizeof(res->u.stateid.data));
2894 COPYMEM(&res->u.stateid, sizeof(res->u.stateid)); 2983 COPYMEM(res->u.stateid.data, sizeof(res->u.stateid.data));
2895 } else if (status == -NFS4ERR_DENIED) 2984 } else if (status == -NFS4ERR_DENIED)
2896 return decode_lock_denied(xdr, &res->u.denied); 2985 return decode_lock_denied(xdr, &res->u.denied);
2897 return status; 2986 return status;
@@ -2913,8 +3002,8 @@ static int decode_locku(struct xdr_stream *xdr, struct nfs_lockres *res)
2913 3002
2914 status = decode_op_hdr(xdr, OP_LOCKU); 3003 status = decode_op_hdr(xdr, OP_LOCKU);
2915 if (status == 0) { 3004 if (status == 0) {
2916 READ_BUF(sizeof(nfs4_stateid)); 3005 READ_BUF(sizeof(res->u.stateid.data));
2917 COPYMEM(&res->u.stateid, sizeof(res->u.stateid)); 3006 COPYMEM(res->u.stateid.data, sizeof(res->u.stateid.data));
2918 } 3007 }
2919 return status; 3008 return status;
2920} 3009}
@@ -2994,7 +3083,7 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
2994 p += bmlen; 3083 p += bmlen;
2995 return decode_delegation(xdr, res); 3084 return decode_delegation(xdr, res);
2996xdr_error: 3085xdr_error:
2997 printk(KERN_NOTICE "%s: xdr error!\n", __FUNCTION__); 3086 dprintk("%s: Bitmap too large! Length = %u\n", __FUNCTION__, bmlen);
2998 return -EIO; 3087 return -EIO;
2999} 3088}
3000 3089
@@ -3208,6 +3297,12 @@ static int decode_renew(struct xdr_stream *xdr)
3208 return decode_op_hdr(xdr, OP_RENEW); 3297 return decode_op_hdr(xdr, OP_RENEW);
3209} 3298}
3210 3299
3300static int
3301decode_restorefh(struct xdr_stream *xdr)
3302{
3303 return decode_op_hdr(xdr, OP_RESTOREFH);
3304}
3305
3211static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, 3306static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
3212 size_t *acl_len) 3307 size_t *acl_len)
3213{ 3308{
@@ -3243,7 +3338,8 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
3243 if (attrlen <= *acl_len) 3338 if (attrlen <= *acl_len)
3244 xdr_read_pages(xdr, attrlen); 3339 xdr_read_pages(xdr, attrlen);
3245 *acl_len = attrlen; 3340 *acl_len = attrlen;
3246 } 3341 } else
3342 status = -EOPNOTSUPP;
3247 3343
3248out: 3344out:
3249 return status; 3345 return status;
@@ -3352,6 +3448,9 @@ static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, uint32_t *p, stru
3352 if (status) 3448 if (status)
3353 goto out; 3449 goto out;
3354 status = decode_open_downgrade(&xdr, res); 3450 status = decode_open_downgrade(&xdr, res);
3451 if (status != 0)
3452 goto out;
3453 decode_getfattr(&xdr, res->fattr, res->server);
3355out: 3454out:
3356 return status; 3455 return status;
3357} 3456}
@@ -3424,7 +3523,7 @@ out:
3424/* 3523/*
3425 * Decode REMOVE response 3524 * Decode REMOVE response
3426 */ 3525 */
3427static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_change_info *cinfo) 3526static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_remove_res *res)
3428{ 3527{
3429 struct xdr_stream xdr; 3528 struct xdr_stream xdr;
3430 struct compound_hdr hdr; 3529 struct compound_hdr hdr;
@@ -3433,8 +3532,11 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_
3433 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 3532 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3434 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) 3533 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
3435 goto out; 3534 goto out;
3436 if ((status = decode_putfh(&xdr)) == 0) 3535 if ((status = decode_putfh(&xdr)) != 0)
3437 status = decode_remove(&xdr, cinfo); 3536 goto out;
3537 if ((status = decode_remove(&xdr, &res->cinfo)) != 0)
3538 goto out;
3539 decode_getfattr(&xdr, res->dir_attr, res->server);
3438out: 3540out:
3439 return status; 3541 return status;
3440} 3542}
@@ -3457,7 +3559,14 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_
3457 goto out; 3559 goto out;
3458 if ((status = decode_putfh(&xdr)) != 0) 3560 if ((status = decode_putfh(&xdr)) != 0)
3459 goto out; 3561 goto out;
3460 status = decode_rename(&xdr, &res->old_cinfo, &res->new_cinfo); 3562 if ((status = decode_rename(&xdr, &res->old_cinfo, &res->new_cinfo)) != 0)
3563 goto out;
3564 /* Current FH is target directory */
3565 if (decode_getfattr(&xdr, res->new_fattr, res->server) != 0)
3566 goto out;
3567 if ((status = decode_restorefh(&xdr)) != 0)
3568 goto out;
3569 decode_getfattr(&xdr, res->old_fattr, res->server);
3461out: 3570out:
3462 return status; 3571 return status;
3463} 3572}
@@ -3465,7 +3574,7 @@ out:
3465/* 3574/*
3466 * Decode LINK response 3575 * Decode LINK response
3467 */ 3576 */
3468static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_change_info *cinfo) 3577static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_link_res *res)
3469{ 3578{
3470 struct xdr_stream xdr; 3579 struct xdr_stream xdr;
3471 struct compound_hdr hdr; 3580 struct compound_hdr hdr;
@@ -3480,7 +3589,17 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_ch
3480 goto out; 3589 goto out;
3481 if ((status = decode_putfh(&xdr)) != 0) 3590 if ((status = decode_putfh(&xdr)) != 0)
3482 goto out; 3591 goto out;
3483 status = decode_link(&xdr, cinfo); 3592 if ((status = decode_link(&xdr, &res->cinfo)) != 0)
3593 goto out;
3594 /*
3595 * Note order: OP_LINK leaves the directory as the current
3596 * filehandle.
3597 */
3598 if (decode_getfattr(&xdr, res->dir_attr, res->server) != 0)
3599 goto out;
3600 if ((status = decode_restorefh(&xdr)) != 0)
3601 goto out;
3602 decode_getfattr(&xdr, res->fattr, res->server);
3484out: 3603out:
3485 return status; 3604 return status;
3486} 3605}
@@ -3499,13 +3618,17 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_
3499 goto out; 3618 goto out;
3500 if ((status = decode_putfh(&xdr)) != 0) 3619 if ((status = decode_putfh(&xdr)) != 0)
3501 goto out; 3620 goto out;
3621 if ((status = decode_savefh(&xdr)) != 0)
3622 goto out;
3502 if ((status = decode_create(&xdr,&res->dir_cinfo)) != 0) 3623 if ((status = decode_create(&xdr,&res->dir_cinfo)) != 0)
3503 goto out; 3624 goto out;
3504 if ((status = decode_getfh(&xdr, res->fh)) != 0) 3625 if ((status = decode_getfh(&xdr, res->fh)) != 0)
3505 goto out; 3626 goto out;
3506 status = decode_getfattr(&xdr, res->fattr, res->server); 3627 if (decode_getfattr(&xdr, res->fattr, res->server) != 0)
3507 if (status == NFS4ERR_DELAY) 3628 goto out;
3508 status = 0; 3629 if ((status = decode_restorefh(&xdr)) != 0)
3630 goto out;
3631 decode_getfattr(&xdr, res->dir_fattr, res->server);
3509out: 3632out:
3510 return status; 3633 return status;
3511} 3634}
@@ -3623,6 +3746,15 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_cl
3623 if (status) 3746 if (status)
3624 goto out; 3747 goto out;
3625 status = decode_close(&xdr, res); 3748 status = decode_close(&xdr, res);
3749 if (status != 0)
3750 goto out;
3751 /*
3752 * Note: Server may do delete on close for this file
3753 * in which case the getattr call will fail with
3754 * an ESTALE error. Shouldn't be a problem,
3755 * though, since fattr->valid will remain unset.
3756 */
3757 decode_getfattr(&xdr, res->fattr, res->server);
3626out: 3758out:
3627 return status; 3759 return status;
3628} 3760}
@@ -3643,15 +3775,20 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_ope
3643 status = decode_putfh(&xdr); 3775 status = decode_putfh(&xdr);
3644 if (status) 3776 if (status)
3645 goto out; 3777 goto out;
3778 status = decode_savefh(&xdr);
3779 if (status)
3780 goto out;
3646 status = decode_open(&xdr, res); 3781 status = decode_open(&xdr, res);
3647 if (status) 3782 if (status)
3648 goto out; 3783 goto out;
3649 status = decode_getfh(&xdr, &res->fh); 3784 status = decode_getfh(&xdr, &res->fh);
3650 if (status) 3785 if (status)
3651 goto out; 3786 goto out;
3652 status = decode_getfattr(&xdr, res->f_attr, res->server); 3787 if (decode_getfattr(&xdr, res->f_attr, res->server) != 0)
3653 if (status == NFS4ERR_DELAY) 3788 goto out;
3654 status = 0; 3789 if ((status = decode_restorefh(&xdr)) != 0)
3790 goto out;
3791 decode_getfattr(&xdr, res->dir_attr, res->server);
3655out: 3792out:
3656 return status; 3793 return status;
3657} 3794}
@@ -3869,6 +4006,9 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_wr
3869 if (status) 4006 if (status)
3870 goto out; 4007 goto out;
3871 status = decode_write(&xdr, res); 4008 status = decode_write(&xdr, res);
4009 if (status)
4010 goto out;
4011 decode_getfattr(&xdr, res->fattr, res->server);
3872 if (!status) 4012 if (!status)
3873 status = res->count; 4013 status = res->count;
3874out: 4014out:
@@ -3892,6 +4032,9 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_w
3892 if (status) 4032 if (status)
3893 goto out; 4033 goto out;
3894 status = decode_commit(&xdr, res); 4034 status = decode_commit(&xdr, res);
4035 if (status)
4036 goto out;
4037 decode_getfattr(&xdr, res->fattr, res->server);
3895out: 4038out:
3896 return status; 4039 return status;
3897} 4040}
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index be23c3fb9260..a48a003242c0 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -61,7 +61,7 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
61 int status; 61 int status;
62 62
63 dprintk("%s: call getattr\n", __FUNCTION__); 63 dprintk("%s: call getattr\n", __FUNCTION__);
64 fattr->valid = 0; 64 nfs_fattr_init(fattr);
65 status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0); 65 status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0);
66 dprintk("%s: reply getattr: %d\n", __FUNCTION__, status); 66 dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
67 if (status) 67 if (status)
@@ -93,7 +93,7 @@ nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
93 int status; 93 int status;
94 94
95 dprintk("NFS call getattr\n"); 95 dprintk("NFS call getattr\n");
96 fattr->valid = 0; 96 nfs_fattr_init(fattr);
97 status = rpc_call(server->client, NFSPROC_GETATTR, 97 status = rpc_call(server->client, NFSPROC_GETATTR,
98 fhandle, fattr, 0); 98 fhandle, fattr, 0);
99 dprintk("NFS reply getattr: %d\n", status); 99 dprintk("NFS reply getattr: %d\n", status);
@@ -112,7 +112,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
112 int status; 112 int status;
113 113
114 dprintk("NFS call setattr\n"); 114 dprintk("NFS call setattr\n");
115 fattr->valid = 0; 115 nfs_fattr_init(fattr);
116 status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0); 116 status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0);
117 if (status == 0) 117 if (status == 0)
118 nfs_setattr_update_inode(inode, sattr); 118 nfs_setattr_update_inode(inode, sattr);
@@ -136,7 +136,7 @@ nfs_proc_lookup(struct inode *dir, struct qstr *name,
136 int status; 136 int status;
137 137
138 dprintk("NFS call lookup %s\n", name->name); 138 dprintk("NFS call lookup %s\n", name->name);
139 fattr->valid = 0; 139 nfs_fattr_init(fattr);
140 status = rpc_call(NFS_CLIENT(dir), NFSPROC_LOOKUP, &arg, &res, 0); 140 status = rpc_call(NFS_CLIENT(dir), NFSPROC_LOOKUP, &arg, &res, 0);
141 dprintk("NFS reply lookup: %d\n", status); 141 dprintk("NFS reply lookup: %d\n", status);
142 return status; 142 return status;
@@ -174,7 +174,7 @@ static int nfs_proc_read(struct nfs_read_data *rdata)
174 174
175 dprintk("NFS call read %d @ %Ld\n", rdata->args.count, 175 dprintk("NFS call read %d @ %Ld\n", rdata->args.count,
176 (long long) rdata->args.offset); 176 (long long) rdata->args.offset);
177 fattr->valid = 0; 177 nfs_fattr_init(fattr);
178 status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags); 178 status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags);
179 if (status >= 0) { 179 if (status >= 0) {
180 nfs_refresh_inode(inode, fattr); 180 nfs_refresh_inode(inode, fattr);
@@ -203,10 +203,10 @@ static int nfs_proc_write(struct nfs_write_data *wdata)
203 203
204 dprintk("NFS call write %d @ %Ld\n", wdata->args.count, 204 dprintk("NFS call write %d @ %Ld\n", wdata->args.count,
205 (long long) wdata->args.offset); 205 (long long) wdata->args.offset);
206 fattr->valid = 0; 206 nfs_fattr_init(fattr);
207 status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags); 207 status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags);
208 if (status >= 0) { 208 if (status >= 0) {
209 nfs_refresh_inode(inode, fattr); 209 nfs_post_op_update_inode(inode, fattr);
210 wdata->res.count = wdata->args.count; 210 wdata->res.count = wdata->args.count;
211 wdata->verf.committed = NFS_FILE_SYNC; 211 wdata->verf.committed = NFS_FILE_SYNC;
212 } 212 }
@@ -216,7 +216,7 @@ static int nfs_proc_write(struct nfs_write_data *wdata)
216 216
217static int 217static int
218nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, 218nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
219 int flags) 219 int flags, struct nameidata *nd)
220{ 220{
221 struct nfs_fh fhandle; 221 struct nfs_fh fhandle;
222 struct nfs_fattr fattr; 222 struct nfs_fattr fattr;
@@ -232,7 +232,7 @@ nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
232 }; 232 };
233 int status; 233 int status;
234 234
235 fattr.valid = 0; 235 nfs_fattr_init(&fattr);
236 dprintk("NFS call create %s\n", dentry->d_name.name); 236 dprintk("NFS call create %s\n", dentry->d_name.name);
237 status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0); 237 status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
238 if (status == 0) 238 if (status == 0)
@@ -273,12 +273,13 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
273 sattr->ia_size = new_encode_dev(rdev);/* get out your barf bag */ 273 sattr->ia_size = new_encode_dev(rdev);/* get out your barf bag */
274 } 274 }
275 275
276 fattr.valid = 0; 276 nfs_fattr_init(&fattr);
277 status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0); 277 status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
278 nfs_mark_for_revalidate(dir);
278 279
279 if (status == -EINVAL && S_ISFIFO(mode)) { 280 if (status == -EINVAL && S_ISFIFO(mode)) {
280 sattr->ia_mode = mode; 281 sattr->ia_mode = mode;
281 fattr.valid = 0; 282 nfs_fattr_init(&fattr);
282 status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0); 283 status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
283 } 284 }
284 if (status == 0) 285 if (status == 0)
@@ -305,6 +306,7 @@ nfs_proc_remove(struct inode *dir, struct qstr *name)
305 306
306 dprintk("NFS call remove %s\n", name->name); 307 dprintk("NFS call remove %s\n", name->name);
307 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 308 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
309 nfs_mark_for_revalidate(dir);
308 310
309 dprintk("NFS reply remove: %d\n", status); 311 dprintk("NFS reply remove: %d\n", status);
310 return status; 312 return status;
@@ -331,8 +333,10 @@ nfs_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
331{ 333{
332 struct rpc_message *msg = &task->tk_msg; 334 struct rpc_message *msg = &task->tk_msg;
333 335
334 if (msg->rpc_argp) 336 if (msg->rpc_argp) {
337 nfs_mark_for_revalidate(dir->d_inode);
335 kfree(msg->rpc_argp); 338 kfree(msg->rpc_argp);
339 }
336 return 0; 340 return 0;
337} 341}
338 342
@@ -352,6 +356,8 @@ nfs_proc_rename(struct inode *old_dir, struct qstr *old_name,
352 356
353 dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); 357 dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name);
354 status = rpc_call(NFS_CLIENT(old_dir), NFSPROC_RENAME, &arg, NULL, 0); 358 status = rpc_call(NFS_CLIENT(old_dir), NFSPROC_RENAME, &arg, NULL, 0);
359 nfs_mark_for_revalidate(old_dir);
360 nfs_mark_for_revalidate(new_dir);
355 dprintk("NFS reply rename: %d\n", status); 361 dprintk("NFS reply rename: %d\n", status);
356 return status; 362 return status;
357} 363}
@@ -369,6 +375,7 @@ nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
369 375
370 dprintk("NFS call link %s\n", name->name); 376 dprintk("NFS call link %s\n", name->name);
371 status = rpc_call(NFS_CLIENT(inode), NFSPROC_LINK, &arg, NULL, 0); 377 status = rpc_call(NFS_CLIENT(inode), NFSPROC_LINK, &arg, NULL, 0);
378 nfs_mark_for_revalidate(dir);
372 dprintk("NFS reply link: %d\n", status); 379 dprintk("NFS reply link: %d\n", status);
373 return status; 380 return status;
374} 381}
@@ -391,9 +398,10 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
391 if (path->len > NFS2_MAXPATHLEN) 398 if (path->len > NFS2_MAXPATHLEN)
392 return -ENAMETOOLONG; 399 return -ENAMETOOLONG;
393 dprintk("NFS call symlink %s -> %s\n", name->name, path->name); 400 dprintk("NFS call symlink %s -> %s\n", name->name, path->name);
394 fattr->valid = 0; 401 nfs_fattr_init(fattr);
395 fhandle->size = 0; 402 fhandle->size = 0;
396 status = rpc_call(NFS_CLIENT(dir), NFSPROC_SYMLINK, &arg, NULL, 0); 403 status = rpc_call(NFS_CLIENT(dir), NFSPROC_SYMLINK, &arg, NULL, 0);
404 nfs_mark_for_revalidate(dir);
397 dprintk("NFS reply symlink: %d\n", status); 405 dprintk("NFS reply symlink: %d\n", status);
398 return status; 406 return status;
399} 407}
@@ -416,8 +424,9 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
416 int status; 424 int status;
417 425
418 dprintk("NFS call mkdir %s\n", dentry->d_name.name); 426 dprintk("NFS call mkdir %s\n", dentry->d_name.name);
419 fattr.valid = 0; 427 nfs_fattr_init(&fattr);
420 status = rpc_call(NFS_CLIENT(dir), NFSPROC_MKDIR, &arg, &res, 0); 428 status = rpc_call(NFS_CLIENT(dir), NFSPROC_MKDIR, &arg, &res, 0);
429 nfs_mark_for_revalidate(dir);
421 if (status == 0) 430 if (status == 0)
422 status = nfs_instantiate(dentry, &fhandle, &fattr); 431 status = nfs_instantiate(dentry, &fhandle, &fattr);
423 dprintk("NFS reply mkdir: %d\n", status); 432 dprintk("NFS reply mkdir: %d\n", status);
@@ -436,6 +445,7 @@ nfs_proc_rmdir(struct inode *dir, struct qstr *name)
436 445
437 dprintk("NFS call rmdir %s\n", name->name); 446 dprintk("NFS call rmdir %s\n", name->name);
438 status = rpc_call(NFS_CLIENT(dir), NFSPROC_RMDIR, &arg, NULL, 0); 447 status = rpc_call(NFS_CLIENT(dir), NFSPROC_RMDIR, &arg, NULL, 0);
448 nfs_mark_for_revalidate(dir);
439 dprintk("NFS reply rmdir: %d\n", status); 449 dprintk("NFS reply rmdir: %d\n", status);
440 return status; 450 return status;
441} 451}
@@ -484,7 +494,7 @@ nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
484 int status; 494 int status;
485 495
486 dprintk("NFS call statfs\n"); 496 dprintk("NFS call statfs\n");
487 stat->fattr->valid = 0; 497 nfs_fattr_init(stat->fattr);
488 status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0); 498 status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
489 dprintk("NFS reply statfs: %d\n", status); 499 dprintk("NFS reply statfs: %d\n", status);
490 if (status) 500 if (status)
@@ -507,7 +517,7 @@ nfs_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
507 int status; 517 int status;
508 518
509 dprintk("NFS call fsinfo\n"); 519 dprintk("NFS call fsinfo\n");
510 info->fattr->valid = 0; 520 nfs_fattr_init(info->fattr);
511 status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0); 521 status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
512 dprintk("NFS reply fsinfo: %d\n", status); 522 dprintk("NFS reply fsinfo: %d\n", status);
513 if (status) 523 if (status)
@@ -579,7 +589,7 @@ nfs_write_done(struct rpc_task *task)
579 struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; 589 struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
580 590
581 if (task->tk_status >= 0) 591 if (task->tk_status >= 0)
582 nfs_refresh_inode(data->inode, data->res.fattr); 592 nfs_post_op_update_inode(data->inode, data->res.fattr);
583 nfs_writeback_done(task); 593 nfs_writeback_done(task);
584} 594}
585 595
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 9758ebd49905..43b03b19731b 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -215,6 +215,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
215 data->res.fattr = &data->fattr; 215 data->res.fattr = &data->fattr;
216 data->res.count = count; 216 data->res.count = count;
217 data->res.eof = 0; 217 data->res.eof = 0;
218 nfs_fattr_init(&data->fattr);
218 219
219 NFS_PROTO(inode)->read_setup(data); 220 NFS_PROTO(inode)->read_setup(data);
220 221
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 5130eda231d7..819a65f5071f 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -870,6 +870,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
870 data->res.fattr = &data->fattr; 870 data->res.fattr = &data->fattr;
871 data->res.count = count; 871 data->res.count = count;
872 data->res.verf = &data->verf; 872 data->res.verf = &data->verf;
873 nfs_fattr_init(&data->fattr);
873 874
874 NFS_PROTO(inode)->write_setup(data, how); 875 NFS_PROTO(inode)->write_setup(data, how);
875 876
@@ -1237,6 +1238,7 @@ static void nfs_commit_rpcsetup(struct list_head *head,
1237 data->res.count = 0; 1238 data->res.count = 0;
1238 data->res.fattr = &data->fattr; 1239 data->res.fattr = &data->fattr;
1239 data->res.verf = &data->verf; 1240 data->res.verf = &data->verf;
1241 nfs_fattr_init(&data->fattr);
1240 1242
1241 NFS_PROTO(inode)->commit_setup(data, how); 1243 NFS_PROTO(inode)->commit_setup(data, how);
1242 1244
diff --git a/fs/open.c b/fs/open.c
index f0d90cf0495c..8d06ec911fd9 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -739,7 +739,8 @@ asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group)
739} 739}
740 740
741static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, 741static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
742 int flags, struct file *f) 742 int flags, struct file *f,
743 int (*open)(struct inode *, struct file *))
743{ 744{
744 struct inode *inode; 745 struct inode *inode;
745 int error; 746 int error;
@@ -761,11 +762,14 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
761 f->f_op = fops_get(inode->i_fop); 762 f->f_op = fops_get(inode->i_fop);
762 file_move(f, &inode->i_sb->s_files); 763 file_move(f, &inode->i_sb->s_files);
763 764
764 if (f->f_op && f->f_op->open) { 765 if (!open && f->f_op)
765 error = f->f_op->open(inode,f); 766 open = f->f_op->open;
767 if (open) {
768 error = open(inode, f);
766 if (error) 769 if (error)
767 goto cleanup_all; 770 goto cleanup_all;
768 } 771 }
772
769 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); 773 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
770 774
771 file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); 775 file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
@@ -814,28 +818,75 @@ struct file *filp_open(const char * filename, int flags, int mode)
814{ 818{
815 int namei_flags, error; 819 int namei_flags, error;
816 struct nameidata nd; 820 struct nameidata nd;
817 struct file *f;
818 821
819 namei_flags = flags; 822 namei_flags = flags;
820 if ((namei_flags+1) & O_ACCMODE) 823 if ((namei_flags+1) & O_ACCMODE)
821 namei_flags++; 824 namei_flags++;
822 if (namei_flags & O_TRUNC)
823 namei_flags |= 2;
824
825 error = -ENFILE;
826 f = get_empty_filp();
827 if (f == NULL)
828 return ERR_PTR(error);
829 825
830 error = open_namei(filename, namei_flags, mode, &nd); 826 error = open_namei(filename, namei_flags, mode, &nd);
831 if (!error) 827 if (!error)
832 return __dentry_open(nd.dentry, nd.mnt, flags, f); 828 return nameidata_to_filp(&nd, flags);
833 829
834 put_filp(f);
835 return ERR_PTR(error); 830 return ERR_PTR(error);
836} 831}
837EXPORT_SYMBOL(filp_open); 832EXPORT_SYMBOL(filp_open);
838 833
834/**
835 * lookup_instantiate_filp - instantiates the open intent filp
836 * @nd: pointer to nameidata
837 * @dentry: pointer to dentry
838 * @open: open callback
839 *
840 * Helper for filesystems that want to use lookup open intents and pass back
841 * a fully instantiated struct file to the caller.
842 * This function is meant to be called from within a filesystem's
843 * lookup method.
844 * Note that in case of error, nd->intent.open.file is destroyed, but the
845 * path information remains valid.
846 * If the open callback is set to NULL, then the standard f_op->open()
847 * filesystem callback is substituted.
848 */
849struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
850 int (*open)(struct inode *, struct file *))
851{
852 if (IS_ERR(nd->intent.open.file))
853 goto out;
854 if (IS_ERR(dentry))
855 goto out_err;
856 nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->mnt),
857 nd->intent.open.flags - 1,
858 nd->intent.open.file,
859 open);
860out:
861 return nd->intent.open.file;
862out_err:
863 release_open_intent(nd);
864 nd->intent.open.file = (struct file *)dentry;
865 goto out;
866}
867EXPORT_SYMBOL_GPL(lookup_instantiate_filp);
868
869/**
870 * nameidata_to_filp - convert a nameidata to an open filp.
871 * @nd: pointer to nameidata
872 * @flags: open flags
873 *
874 * Note that this function destroys the original nameidata
875 */
876struct file *nameidata_to_filp(struct nameidata *nd, int flags)
877{
878 struct file *filp;
879
880 /* Pick up the filp from the open intent */
881 filp = nd->intent.open.file;
882 /* Has the filesystem initialised the file for us? */
883 if (filp->f_dentry == NULL)
884 filp = __dentry_open(nd->dentry, nd->mnt, flags, filp, NULL);
885 else
886 path_release(nd);
887 return filp;
888}
889
839struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) 890struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
840{ 891{
841 int error; 892 int error;
@@ -846,7 +897,7 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
846 if (f == NULL) 897 if (f == NULL)
847 return ERR_PTR(error); 898 return ERR_PTR(error);
848 899
849 return __dentry_open(dentry, mnt, flags, f); 900 return __dentry_open(dentry, mnt, flags, f, NULL);
850} 901}
851EXPORT_SYMBOL(dentry_open); 902EXPORT_SYMBOL(dentry_open);
852 903
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 77e178f13162..1e848648a322 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -430,7 +430,7 @@ void del_gendisk(struct gendisk *disk)
430 disk->flags &= ~GENHD_FL_UP; 430 disk->flags &= ~GENHD_FL_UP;
431 unlink_gendisk(disk); 431 unlink_gendisk(disk);
432 disk_stat_set_all(disk, 0); 432 disk_stat_set_all(disk, 0);
433 disk->stamp = disk->stamp_idle = 0; 433 disk->stamp = 0;
434 434
435 devfs_remove_disk(disk); 435 devfs_remove_disk(disk);
436 436
diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c
index 2706e2adffab..45829889dcdc 100644
--- a/fs/reiserfs/fix_node.c
+++ b/fs/reiserfs/fix_node.c
@@ -2022,7 +2022,7 @@ static int get_neighbors(struct tree_balance *p_s_tb, int n_h)
2022} 2022}
2023 2023
2024#ifdef CONFIG_REISERFS_CHECK 2024#ifdef CONFIG_REISERFS_CHECK
2025void *reiserfs_kmalloc(size_t size, int flags, struct super_block *s) 2025void *reiserfs_kmalloc(size_t size, gfp_t flags, struct super_block *s)
2026{ 2026{
2027 void *vp; 2027 void *vp;
2028 static size_t malloced; 2028 static size_t malloced;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index d76ee6c4f9b8..5f82352b97e1 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2842,7 +2842,7 @@ static int reiserfs_set_page_dirty(struct page *page)
2842 * even in -o notail mode, we can't be sure an old mount without -o notail 2842 * even in -o notail mode, we can't be sure an old mount without -o notail
2843 * didn't create files with tails. 2843 * didn't create files with tails.
2844 */ 2844 */
2845static int reiserfs_releasepage(struct page *page, int unused_gfp_flags) 2845static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags)
2846{ 2846{
2847 struct inode *inode = page->mapping->host; 2847 struct inode *inode = page->mapping->host;
2848 struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb); 2848 struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb);
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 87ac9dc8b381..72e120798677 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -453,7 +453,7 @@ static struct page *reiserfs_get_page(struct inode *dir, unsigned long n)
453 struct page *page; 453 struct page *page;
454 /* We can deadlock if we try to free dentries, 454 /* We can deadlock if we try to free dentries,
455 and an unlink/rmdir has just occured - GFP_NOFS avoids this */ 455 and an unlink/rmdir has just occured - GFP_NOFS avoids this */
456 mapping->flags = (mapping->flags & ~__GFP_BITS_MASK) | GFP_NOFS; 456 mapping_set_gfp_mask(mapping, GFP_NOFS);
457 page = read_cache_page(mapping, n, 457 page = read_cache_page(mapping, n,
458 (filler_t *) mapping->a_ops->readpage, NULL); 458 (filler_t *) mapping->a_ops->readpage, NULL);
459 if (!IS_ERR(page)) { 459 if (!IS_ERR(page)) {
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index d2653b589b1c..3c92162dc728 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -45,11 +45,11 @@
45 45
46 46
47void * 47void *
48kmem_alloc(size_t size, gfp_t flags) 48kmem_alloc(size_t size, unsigned int __nocast flags)
49{ 49{
50 int retries = 0; 50 int retries = 0;
51 unsigned int lflags = kmem_flags_convert(flags); 51 gfp_t lflags = kmem_flags_convert(flags);
52 void *ptr; 52 void *ptr;
53 53
54 do { 54 do {
55 if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS) 55 if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS)
@@ -67,7 +67,7 @@ kmem_alloc(size_t size, gfp_t flags)
67} 67}
68 68
69void * 69void *
70kmem_zalloc(size_t size, gfp_t flags) 70kmem_zalloc(size_t size, unsigned int __nocast flags)
71{ 71{
72 void *ptr; 72 void *ptr;
73 73
@@ -90,7 +90,7 @@ kmem_free(void *ptr, size_t size)
90 90
91void * 91void *
92kmem_realloc(void *ptr, size_t newsize, size_t oldsize, 92kmem_realloc(void *ptr, size_t newsize, size_t oldsize,
93 gfp_t flags) 93 unsigned int __nocast flags)
94{ 94{
95 void *new; 95 void *new;
96 96
@@ -105,11 +105,11 @@ kmem_realloc(void *ptr, size_t newsize, size_t oldsize,
105} 105}
106 106
107void * 107void *
108kmem_zone_alloc(kmem_zone_t *zone, gfp_t flags) 108kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
109{ 109{
110 int retries = 0; 110 int retries = 0;
111 unsigned int lflags = kmem_flags_convert(flags); 111 gfp_t lflags = kmem_flags_convert(flags);
112 void *ptr; 112 void *ptr;
113 113
114 do { 114 do {
115 ptr = kmem_cache_alloc(zone, lflags); 115 ptr = kmem_cache_alloc(zone, lflags);
@@ -124,7 +124,7 @@ kmem_zone_alloc(kmem_zone_t *zone, gfp_t flags)
124} 124}
125 125
126void * 126void *
127kmem_zone_zalloc(kmem_zone_t *zone, gfp_t flags) 127kmem_zone_zalloc(kmem_zone_t *zone, unsigned int __nocast flags)
128{ 128{
129 void *ptr; 129 void *ptr;
130 130
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index ee7010f085bc..f4bb78c268c0 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -81,9 +81,9 @@ typedef unsigned long xfs_pflags_t;
81 *(NSTATEP) = *(OSTATEP); \ 81 *(NSTATEP) = *(OSTATEP); \
82} while (0) 82} while (0)
83 83
84static __inline unsigned int kmem_flags_convert(gfp_t flags) 84static __inline gfp_t kmem_flags_convert(unsigned int __nocast flags)
85{ 85{
86 unsigned int lflags = __GFP_NOWARN; /* we'll report problems, if need be */ 86 gfp_t lflags = __GFP_NOWARN; /* we'll report problems, if need be */
87 87
88#ifdef DEBUG 88#ifdef DEBUG
89 if (unlikely(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL))) { 89 if (unlikely(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL))) {
@@ -125,16 +125,16 @@ kmem_zone_destroy(kmem_zone_t *zone)
125 BUG(); 125 BUG();
126} 126}
127 127
128extern void *kmem_zone_zalloc(kmem_zone_t *, gfp_t); 128extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
129extern void *kmem_zone_alloc(kmem_zone_t *, gfp_t); 129extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
130 130
131extern void *kmem_alloc(size_t, gfp_t); 131extern void *kmem_alloc(size_t, unsigned int __nocast);
132extern void *kmem_realloc(void *, size_t, size_t, gfp_t); 132extern void *kmem_realloc(void *, size_t, size_t, unsigned int __nocast);
133extern void *kmem_zalloc(size_t, gfp_t); 133extern void *kmem_zalloc(size_t, unsigned int __nocast);
134extern void kmem_free(void *, size_t); 134extern void kmem_free(void *, size_t);
135 135
136typedef struct shrinker *kmem_shaker_t; 136typedef struct shrinker *kmem_shaker_t;
137typedef int (*kmem_shake_func_t)(int, unsigned int); 137typedef int (*kmem_shake_func_t)(int, gfp_t);
138 138
139static __inline kmem_shaker_t 139static __inline kmem_shaker_t
140kmem_shake_register(kmem_shake_func_t sfunc) 140kmem_shake_register(kmem_shake_func_t sfunc)
@@ -149,7 +149,7 @@ kmem_shake_deregister(kmem_shaker_t shrinker)
149} 149}
150 150
151static __inline int 151static __inline int
152kmem_shake_allow(unsigned int gfp_mask) 152kmem_shake_allow(gfp_t gfp_mask)
153{ 153{
154 return (gfp_mask & __GFP_WAIT); 154 return (gfp_mask & __GFP_WAIT);
155} 155}
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index c6c077978fe3..7aa398724706 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1296,7 +1296,7 @@ linvfs_invalidate_page(
1296STATIC int 1296STATIC int
1297linvfs_release_page( 1297linvfs_release_page(
1298 struct page *page, 1298 struct page *page,
1299 int gfp_mask) 1299 gfp_t gfp_mask)
1300{ 1300{
1301 struct inode *inode = page->mapping->host; 1301 struct inode *inode = page->mapping->host;
1302 int dirty, delalloc, unmapped, unwritten; 1302 int dirty, delalloc, unmapped, unwritten;
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index e82cf72ac599..ba4767c04adf 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -64,7 +64,7 @@
64 64
65STATIC kmem_cache_t *pagebuf_zone; 65STATIC kmem_cache_t *pagebuf_zone;
66STATIC kmem_shaker_t pagebuf_shake; 66STATIC kmem_shaker_t pagebuf_shake;
67STATIC int xfsbufd_wakeup(int, unsigned int); 67STATIC int xfsbufd_wakeup(int, gfp_t);
68STATIC void pagebuf_delwri_queue(xfs_buf_t *, int); 68STATIC void pagebuf_delwri_queue(xfs_buf_t *, int);
69 69
70STATIC struct workqueue_struct *xfslogd_workqueue; 70STATIC struct workqueue_struct *xfslogd_workqueue;
@@ -383,7 +383,7 @@ _pagebuf_lookup_pages(
383 size_t blocksize = bp->pb_target->pbr_bsize; 383 size_t blocksize = bp->pb_target->pbr_bsize;
384 size_t size = bp->pb_count_desired; 384 size_t size = bp->pb_count_desired;
385 size_t nbytes, offset; 385 size_t nbytes, offset;
386 int gfp_mask = pb_to_gfp(flags); 386 gfp_t gfp_mask = pb_to_gfp(flags);
387 unsigned short page_count, i; 387 unsigned short page_count, i;
388 pgoff_t first; 388 pgoff_t first;
389 loff_t end; 389 loff_t end;
@@ -1749,8 +1749,8 @@ STATIC int xfsbufd_force_sleep;
1749 1749
1750STATIC int 1750STATIC int
1751xfsbufd_wakeup( 1751xfsbufd_wakeup(
1752 int priority, 1752 int priority,
1753 unsigned int mask) 1753 gfp_t mask)
1754{ 1754{
1755 if (xfsbufd_force_sleep) 1755 if (xfsbufd_force_sleep)
1756 return 0; 1756 return 0;
diff --git a/include/asm-alpha/dma-mapping.h b/include/asm-alpha/dma-mapping.h
index c675f282d6ad..680f7ecbb28f 100644
--- a/include/asm-alpha/dma-mapping.h
+++ b/include/asm-alpha/dma-mapping.h
@@ -31,7 +31,7 @@
31#else /* no PCI - no IOMMU. */ 31#else /* no PCI - no IOMMU. */
32 32
33void *dma_alloc_coherent(struct device *dev, size_t size, 33void *dma_alloc_coherent(struct device *dev, size_t size,
34 dma_addr_t *dma_handle, int gfp); 34 dma_addr_t *dma_handle, gfp_t gfp);
35int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, 35int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
36 enum dma_data_direction direction); 36 enum dma_data_direction direction);
37 37
diff --git a/include/asm-arm/dma-mapping.h b/include/asm-arm/dma-mapping.h
index d62ade4e4cbb..e3e8541ee63b 100644
--- a/include/asm-arm/dma-mapping.h
+++ b/include/asm-arm/dma-mapping.h
@@ -70,7 +70,7 @@ static inline int dma_mapping_error(dma_addr_t dma_addr)
70 * device-viewed address. 70 * device-viewed address.
71 */ 71 */
72extern void * 72extern void *
73dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, int gfp); 73dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp);
74 74
75/** 75/**
76 * dma_free_coherent - free memory allocated by dma_alloc_coherent 76 * dma_free_coherent - free memory allocated by dma_alloc_coherent
@@ -117,7 +117,7 @@ int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
117 * device-viewed address. 117 * device-viewed address.
118 */ 118 */
119extern void * 119extern void *
120dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *handle, int gfp); 120dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp);
121 121
122#define dma_free_writecombine(dev,size,cpu_addr,handle) \ 122#define dma_free_writecombine(dev,size,cpu_addr,handle) \
123 dma_free_coherent(dev,size,cpu_addr,handle) 123 dma_free_coherent(dev,size,cpu_addr,handle)
diff --git a/include/asm-cris/dma-mapping.h b/include/asm-cris/dma-mapping.h
index 0b5c3fdaefe1..8eff51349ae7 100644
--- a/include/asm-cris/dma-mapping.h
+++ b/include/asm-cris/dma-mapping.h
@@ -15,14 +15,14 @@
15 15
16#ifdef CONFIG_PCI 16#ifdef CONFIG_PCI
17void *dma_alloc_coherent(struct device *dev, size_t size, 17void *dma_alloc_coherent(struct device *dev, size_t size,
18 dma_addr_t *dma_handle, int flag); 18 dma_addr_t *dma_handle, gfp_t flag);
19 19
20void dma_free_coherent(struct device *dev, size_t size, 20void dma_free_coherent(struct device *dev, size_t size,
21 void *vaddr, dma_addr_t dma_handle); 21 void *vaddr, dma_addr_t dma_handle);
22#else 22#else
23static inline void * 23static inline void *
24dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, 24dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
25 int flag) 25 gfp_t flag)
26{ 26{
27 BUG(); 27 BUG();
28 return NULL; 28 return NULL;
diff --git a/include/asm-frv/dma-mapping.h b/include/asm-frv/dma-mapping.h
index 0206ab35eae0..5003e017fd1e 100644
--- a/include/asm-frv/dma-mapping.h
+++ b/include/asm-frv/dma-mapping.h
@@ -13,7 +13,7 @@
13extern unsigned long __nongprelbss dma_coherent_mem_start; 13extern unsigned long __nongprelbss dma_coherent_mem_start;
14extern unsigned long __nongprelbss dma_coherent_mem_end; 14extern unsigned long __nongprelbss dma_coherent_mem_end;
15 15
16void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, int gfp); 16void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp);
17void dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle); 17void dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle);
18 18
19/* 19/*
diff --git a/include/asm-frv/pci.h b/include/asm-frv/pci.h
index b4efe5e3591a..1168451c275f 100644
--- a/include/asm-frv/pci.h
+++ b/include/asm-frv/pci.h
@@ -32,7 +32,7 @@ extern void pcibios_set_master(struct pci_dev *dev);
32extern void pcibios_penalize_isa_irq(int irq); 32extern void pcibios_penalize_isa_irq(int irq);
33 33
34#ifdef CONFIG_MMU 34#ifdef CONFIG_MMU
35extern void *consistent_alloc(int gfp, size_t size, dma_addr_t *dma_handle); 35extern void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *dma_handle);
36extern void consistent_free(void *vaddr); 36extern void consistent_free(void *vaddr);
37extern void consistent_sync(void *vaddr, size_t size, int direction); 37extern void consistent_sync(void *vaddr, size_t size, int direction);
38extern void consistent_sync_page(struct page *page, unsigned long offset, 38extern void consistent_sync_page(struct page *page, unsigned long offset,
diff --git a/include/asm-generic/dma-mapping-broken.h b/include/asm-generic/dma-mapping-broken.h
index fd9de9502dff..a7f1a55ce6b0 100644
--- a/include/asm-generic/dma-mapping-broken.h
+++ b/include/asm-generic/dma-mapping-broken.h
@@ -6,7 +6,7 @@
6 6
7static inline void * 7static inline void *
8dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, 8dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
9 int flag) 9 gfp_t flag)
10{ 10{
11 BUG(); 11 BUG();
12 return NULL; 12 return NULL;
diff --git a/include/asm-ia64/machvec.h b/include/asm-ia64/machvec.h
index 79e89a7db566..a2f6ac5aef7d 100644
--- a/include/asm-ia64/machvec.h
+++ b/include/asm-ia64/machvec.h
@@ -37,7 +37,7 @@ typedef int ia64_mv_pci_legacy_write_t (struct pci_bus *, u16 port, u32 val,
37 37
38/* DMA-mapping interface: */ 38/* DMA-mapping interface: */
39typedef void ia64_mv_dma_init (void); 39typedef void ia64_mv_dma_init (void);
40typedef void *ia64_mv_dma_alloc_coherent (struct device *, size_t, dma_addr_t *, int); 40typedef void *ia64_mv_dma_alloc_coherent (struct device *, size_t, dma_addr_t *, gfp_t);
41typedef void ia64_mv_dma_free_coherent (struct device *, size_t, void *, dma_addr_t); 41typedef void ia64_mv_dma_free_coherent (struct device *, size_t, void *, dma_addr_t);
42typedef dma_addr_t ia64_mv_dma_map_single (struct device *, void *, size_t, int); 42typedef dma_addr_t ia64_mv_dma_map_single (struct device *, void *, size_t, int);
43typedef void ia64_mv_dma_unmap_single (struct device *, dma_addr_t, size_t, int); 43typedef void ia64_mv_dma_unmap_single (struct device *, dma_addr_t, size_t, int);
diff --git a/include/asm-m32r/dma-mapping.h b/include/asm-m32r/dma-mapping.h
index 3a2db28834b6..a7fa0302bda7 100644
--- a/include/asm-m32r/dma-mapping.h
+++ b/include/asm-m32r/dma-mapping.h
@@ -8,7 +8,7 @@
8 8
9static inline void * 9static inline void *
10dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, 10dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
11 int flag) 11 gfp_t flag)
12{ 12{
13 return (void *)NULL; 13 return (void *)NULL;
14} 14}
diff --git a/include/asm-mips/dma-mapping.h b/include/asm-mips/dma-mapping.h
index af28dc88930b..43288634c38a 100644
--- a/include/asm-mips/dma-mapping.h
+++ b/include/asm-mips/dma-mapping.h
@@ -5,13 +5,13 @@
5#include <asm/cache.h> 5#include <asm/cache.h>
6 6
7void *dma_alloc_noncoherent(struct device *dev, size_t size, 7void *dma_alloc_noncoherent(struct device *dev, size_t size,
8 dma_addr_t *dma_handle, int flag); 8 dma_addr_t *dma_handle, gfp_t flag);
9 9
10void dma_free_noncoherent(struct device *dev, size_t size, 10void dma_free_noncoherent(struct device *dev, size_t size,
11 void *vaddr, dma_addr_t dma_handle); 11 void *vaddr, dma_addr_t dma_handle);
12 12
13void *dma_alloc_coherent(struct device *dev, size_t size, 13void *dma_alloc_coherent(struct device *dev, size_t size,
14 dma_addr_t *dma_handle, int flag); 14 dma_addr_t *dma_handle, gfp_t flag);
15 15
16void dma_free_coherent(struct device *dev, size_t size, 16void dma_free_coherent(struct device *dev, size_t size,
17 void *vaddr, dma_addr_t dma_handle); 17 void *vaddr, dma_addr_t dma_handle);
diff --git a/include/asm-parisc/dma-mapping.h b/include/asm-parisc/dma-mapping.h
index 4db84f969e9e..74d4ac6f2151 100644
--- a/include/asm-parisc/dma-mapping.h
+++ b/include/asm-parisc/dma-mapping.h
@@ -9,8 +9,8 @@
9/* See Documentation/DMA-mapping.txt */ 9/* See Documentation/DMA-mapping.txt */
10struct hppa_dma_ops { 10struct hppa_dma_ops {
11 int (*dma_supported)(struct device *dev, u64 mask); 11 int (*dma_supported)(struct device *dev, u64 mask);
12 void *(*alloc_consistent)(struct device *dev, size_t size, dma_addr_t *iova, int flag); 12 void *(*alloc_consistent)(struct device *dev, size_t size, dma_addr_t *iova, gfp_t flag);
13 void *(*alloc_noncoherent)(struct device *dev, size_t size, dma_addr_t *iova, int flag); 13 void *(*alloc_noncoherent)(struct device *dev, size_t size, dma_addr_t *iova, gfp_t flag);
14 void (*free_consistent)(struct device *dev, size_t size, void *vaddr, dma_addr_t iova); 14 void (*free_consistent)(struct device *dev, size_t size, void *vaddr, dma_addr_t iova);
15 dma_addr_t (*map_single)(struct device *dev, void *addr, size_t size, enum dma_data_direction direction); 15 dma_addr_t (*map_single)(struct device *dev, void *addr, size_t size, enum dma_data_direction direction);
16 void (*unmap_single)(struct device *dev, dma_addr_t iova, size_t size, enum dma_data_direction direction); 16 void (*unmap_single)(struct device *dev, dma_addr_t iova, size_t size, enum dma_data_direction direction);
@@ -49,14 +49,14 @@ extern struct hppa_dma_ops *hppa_dma_ops;
49 49
50static inline void * 50static inline void *
51dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, 51dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
52 int flag) 52 gfp_t flag)
53{ 53{
54 return hppa_dma_ops->alloc_consistent(dev, size, dma_handle, flag); 54 return hppa_dma_ops->alloc_consistent(dev, size, dma_handle, flag);
55} 55}
56 56
57static inline void * 57static inline void *
58dma_alloc_noncoherent(struct device *dev, size_t size, dma_addr_t *dma_handle, 58dma_alloc_noncoherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
59 int flag) 59 gfp_t flag)
60{ 60{
61 return hppa_dma_ops->alloc_noncoherent(dev, size, dma_handle, flag); 61 return hppa_dma_ops->alloc_noncoherent(dev, size, dma_handle, flag);
62} 62}
diff --git a/include/asm-ppc/dma-mapping.h b/include/asm-ppc/dma-mapping.h
index 061bfcac1bf1..6e9635114433 100644
--- a/include/asm-ppc/dma-mapping.h
+++ b/include/asm-ppc/dma-mapping.h
@@ -19,7 +19,7 @@
19 * allocate the space "normally" and use the cache management functions 19 * allocate the space "normally" and use the cache management functions
20 * to ensure it is consistent. 20 * to ensure it is consistent.
21 */ 21 */
22extern void *__dma_alloc_coherent(size_t size, dma_addr_t *handle, int gfp); 22extern void *__dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp);
23extern void __dma_free_coherent(size_t size, void *vaddr); 23extern void __dma_free_coherent(size_t size, void *vaddr);
24extern void __dma_sync(void *vaddr, size_t size, int direction); 24extern void __dma_sync(void *vaddr, size_t size, int direction);
25extern void __dma_sync_page(struct page *page, unsigned long offset, 25extern void __dma_sync_page(struct page *page, unsigned long offset,
diff --git a/include/asm-sh/dma-mapping.h b/include/asm-sh/dma-mapping.h
index 80d164c1529e..d3fa5c2b889d 100644
--- a/include/asm-sh/dma-mapping.h
+++ b/include/asm-sh/dma-mapping.h
@@ -9,7 +9,7 @@
9extern struct bus_type pci_bus_type; 9extern struct bus_type pci_bus_type;
10 10
11/* arch/sh/mm/consistent.c */ 11/* arch/sh/mm/consistent.c */
12extern void *consistent_alloc(int gfp, size_t size, dma_addr_t *handle); 12extern void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *handle);
13extern void consistent_free(void *vaddr, size_t size); 13extern void consistent_free(void *vaddr, size_t size);
14extern void consistent_sync(void *vaddr, size_t size, int direction); 14extern void consistent_sync(void *vaddr, size_t size, int direction);
15 15
@@ -26,7 +26,7 @@ static inline int dma_set_mask(struct device *dev, u64 mask)
26} 26}
27 27
28static inline void *dma_alloc_coherent(struct device *dev, size_t size, 28static inline void *dma_alloc_coherent(struct device *dev, size_t size,
29 dma_addr_t *dma_handle, int flag) 29 dma_addr_t *dma_handle, gfp_t flag)
30{ 30{
31 if (sh_mv.mv_consistent_alloc) { 31 if (sh_mv.mv_consistent_alloc) {
32 void *ret; 32 void *ret;
diff --git a/include/asm-sh/machvec.h b/include/asm-sh/machvec.h
index 5771f4baa478..3f18aa180516 100644
--- a/include/asm-sh/machvec.h
+++ b/include/asm-sh/machvec.h
@@ -64,7 +64,7 @@ struct sh_machine_vector
64 64
65 void (*mv_heartbeat)(void); 65 void (*mv_heartbeat)(void);
66 66
67 void *(*mv_consistent_alloc)(struct device *, size_t, dma_addr_t *, int); 67 void *(*mv_consistent_alloc)(struct device *, size_t, dma_addr_t *, gfp_t);
68 int (*mv_consistent_free)(struct device *, size_t, void *, dma_addr_t); 68 int (*mv_consistent_free)(struct device *, size_t, void *, dma_addr_t);
69}; 69};
70 70
diff --git a/include/asm-sh64/dma-mapping.h b/include/asm-sh64/dma-mapping.h
index b8d26fe677f4..cc9a2e86f5b4 100644
--- a/include/asm-sh64/dma-mapping.h
+++ b/include/asm-sh64/dma-mapping.h
@@ -25,7 +25,7 @@ static inline int dma_set_mask(struct device *dev, u64 mask)
25} 25}
26 26
27static inline void *dma_alloc_coherent(struct device *dev, size_t size, 27static inline void *dma_alloc_coherent(struct device *dev, size_t size,
28 dma_addr_t *dma_handle, int flag) 28 dma_addr_t *dma_handle, gfp_t flag)
29{ 29{
30 return consistent_alloc(NULL, size, dma_handle); 30 return consistent_alloc(NULL, size, dma_handle);
31} 31}
diff --git a/include/asm-sparc/dma-mapping.h b/include/asm-sparc/dma-mapping.h
index 2dc5bb8effa6..d7c3b0f0a901 100644
--- a/include/asm-sparc/dma-mapping.h
+++ b/include/asm-sparc/dma-mapping.h
@@ -8,7 +8,7 @@
8#else 8#else
9 9
10static inline void *dma_alloc_coherent(struct device *dev, size_t size, 10static inline void *dma_alloc_coherent(struct device *dev, size_t size,
11 dma_addr_t *dma_handle, int flag) 11 dma_addr_t *dma_handle, gfp_t flag)
12{ 12{
13 BUG(); 13 BUG();
14 return NULL; 14 return NULL;
diff --git a/include/asm-sparc64/dma-mapping.h b/include/asm-sparc64/dma-mapping.h
index 1c5da41653a4..c7d5804ba76d 100644
--- a/include/asm-sparc64/dma-mapping.h
+++ b/include/asm-sparc64/dma-mapping.h
@@ -10,7 +10,7 @@
10struct device; 10struct device;
11 11
12static inline void *dma_alloc_coherent(struct device *dev, size_t size, 12static inline void *dma_alloc_coherent(struct device *dev, size_t size,
13 dma_addr_t *dma_handle, int flag) 13 dma_addr_t *dma_handle, gfp_t flag)
14{ 14{
15 BUG(); 15 BUG();
16 return NULL; 16 return NULL;
diff --git a/include/asm-um/dma-mapping.h b/include/asm-um/dma-mapping.h
index 13e6291f7151..babd29895114 100644
--- a/include/asm-um/dma-mapping.h
+++ b/include/asm-um/dma-mapping.h
@@ -19,7 +19,7 @@ dma_set_mask(struct device *dev, u64 dma_mask)
19 19
20static inline void * 20static inline void *
21dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, 21dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
22 int flag) 22 gfp_t flag)
23{ 23{
24 BUG(); 24 BUG();
25 return((void *) 0); 25 return((void *) 0);
diff --git a/include/asm-um/page.h b/include/asm-um/page.h
index 2c192abe9aeb..0229814af31e 100644
--- a/include/asm-um/page.h
+++ b/include/asm-um/page.h
@@ -115,7 +115,7 @@ extern unsigned long uml_physmem;
115#define pfn_valid(pfn) ((pfn) < max_mapnr) 115#define pfn_valid(pfn) ((pfn) < max_mapnr)
116#define virt_addr_valid(v) pfn_valid(phys_to_pfn(__pa(v))) 116#define virt_addr_valid(v) pfn_valid(phys_to_pfn(__pa(v)))
117 117
118extern struct page *arch_validate(struct page *page, int mask, int order); 118extern struct page *arch_validate(struct page *page, gfp_t mask, int order);
119#define HAVE_ARCH_VALIDATE 119#define HAVE_ARCH_VALIDATE
120 120
121extern void arch_free_page(struct page *page, int order); 121extern void arch_free_page(struct page *page, int order);
diff --git a/include/asm-x86_64/dma-mapping.h b/include/asm-x86_64/dma-mapping.h
index e784fdc524f1..54a380efed41 100644
--- a/include/asm-x86_64/dma-mapping.h
+++ b/include/asm-x86_64/dma-mapping.h
@@ -17,7 +17,7 @@ extern dma_addr_t bad_dma_address;
17 (swiotlb ? swiotlb_dma_mapping_error(x) : ((x) == bad_dma_address)) 17 (swiotlb ? swiotlb_dma_mapping_error(x) : ((x) == bad_dma_address))
18 18
19void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, 19void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
20 unsigned gfp); 20 gfp_t gfp);
21void dma_free_coherent(struct device *dev, size_t size, void *vaddr, 21void dma_free_coherent(struct device *dev, size_t size, void *vaddr,
22 dma_addr_t dma_handle); 22 dma_addr_t dma_handle);
23 23
diff --git a/include/asm-x86_64/swiotlb.h b/include/asm-x86_64/swiotlb.h
index 36293061f4ed..7cbfd10ecc3c 100644
--- a/include/asm-x86_64/swiotlb.h
+++ b/include/asm-x86_64/swiotlb.h
@@ -27,7 +27,7 @@ extern void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg,
27 int nents, int direction); 27 int nents, int direction);
28extern int swiotlb_dma_mapping_error(dma_addr_t dma_addr); 28extern int swiotlb_dma_mapping_error(dma_addr_t dma_addr);
29extern void *swiotlb_alloc_coherent (struct device *hwdev, size_t size, 29extern void *swiotlb_alloc_coherent (struct device *hwdev, size_t size,
30 dma_addr_t *dma_handle, int flags); 30 dma_addr_t *dma_handle, gfp_t flags);
31extern void swiotlb_free_coherent (struct device *hwdev, size_t size, 31extern void swiotlb_free_coherent (struct device *hwdev, size_t size,
32 void *vaddr, dma_addr_t dma_handle); 32 void *vaddr, dma_addr_t dma_handle);
33 33
diff --git a/include/asm-xtensa/dma-mapping.h b/include/asm-xtensa/dma-mapping.h
index e86a206f1209..c425f10d086a 100644
--- a/include/asm-xtensa/dma-mapping.h
+++ b/include/asm-xtensa/dma-mapping.h
@@ -28,7 +28,7 @@ extern void consistent_sync(void*, size_t, int);
28#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) 28#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
29 29
30void *dma_alloc_coherent(struct device *dev, size_t size, 30void *dma_alloc_coherent(struct device *dev, size_t size,
31 dma_addr_t *dma_handle, int flag); 31 dma_addr_t *dma_handle, gfp_t flag);
32 32
33void dma_free_coherent(struct device *dev, size_t size, 33void dma_free_coherent(struct device *dev, size_t size,
34 void *vaddr, dma_addr_t dma_handle); 34 void *vaddr, dma_addr_t dma_handle);
diff --git a/include/linux/audit.h b/include/linux/audit.h
index b2a2509bd7ea..da3c01955f3d 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -260,11 +260,11 @@ extern int audit_filter_user(struct netlink_skb_parms *cb, int type);
260#ifdef CONFIG_AUDIT 260#ifdef CONFIG_AUDIT
261/* These are defined in audit.c */ 261/* These are defined in audit.c */
262 /* Public API */ 262 /* Public API */
263extern void audit_log(struct audit_context *ctx, int gfp_mask, 263extern void audit_log(struct audit_context *ctx, gfp_t gfp_mask,
264 int type, const char *fmt, ...) 264 int type, const char *fmt, ...)
265 __attribute__((format(printf,4,5))); 265 __attribute__((format(printf,4,5)));
266 266
267extern struct audit_buffer *audit_log_start(struct audit_context *ctx, int gfp_mask, int type); 267extern struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, int type);
268extern void audit_log_format(struct audit_buffer *ab, 268extern void audit_log_format(struct audit_buffer *ab,
269 const char *fmt, ...) 269 const char *fmt, ...)
270 __attribute__((format(printf,2,3))); 270 __attribute__((format(printf,2,3)));
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 3344b4e8e43a..685fd3720df5 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -301,7 +301,7 @@ extern struct bio *bio_map_user_iov(struct request_queue *,
301 struct sg_iovec *, int, int); 301 struct sg_iovec *, int, int);
302extern void bio_unmap_user(struct bio *); 302extern void bio_unmap_user(struct bio *);
303extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, 303extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int,
304 unsigned int); 304 gfp_t);
305extern void bio_set_pages_dirty(struct bio *bio); 305extern void bio_set_pages_dirty(struct bio *bio);
306extern void bio_check_pages_dirty(struct bio *bio); 306extern void bio_check_pages_dirty(struct bio *bio);
307extern struct bio *bio_copy_user(struct request_queue *, unsigned long, unsigned int, int); 307extern struct bio *bio_copy_user(struct request_queue *, unsigned long, unsigned int, int);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index efdc9b5bc05c..025a7f084dbd 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -96,8 +96,8 @@ struct io_context {
96 96
97void put_io_context(struct io_context *ioc); 97void put_io_context(struct io_context *ioc);
98void exit_io_context(void); 98void exit_io_context(void);
99struct io_context *current_io_context(int gfp_flags); 99struct io_context *current_io_context(gfp_t gfp_flags);
100struct io_context *get_io_context(int gfp_flags); 100struct io_context *get_io_context(gfp_t gfp_flags);
101void copy_io_context(struct io_context **pdst, struct io_context **psrc); 101void copy_io_context(struct io_context **pdst, struct io_context **psrc);
102void swap_io_context(struct io_context **ioc1, struct io_context **ioc2); 102void swap_io_context(struct io_context **ioc1, struct io_context **ioc2);
103 103
@@ -107,9 +107,9 @@ typedef void (rq_end_io_fn)(struct request *);
107struct request_list { 107struct request_list {
108 int count[2]; 108 int count[2];
109 int starved[2]; 109 int starved[2];
110 int elvpriv;
110 mempool_t *rq_pool; 111 mempool_t *rq_pool;
111 wait_queue_head_t wait[2]; 112 wait_queue_head_t wait[2];
112 wait_queue_head_t drain;
113}; 113};
114 114
115#define BLK_MAX_CDB 16 115#define BLK_MAX_CDB 16
@@ -203,6 +203,7 @@ struct request {
203enum rq_flag_bits { 203enum rq_flag_bits {
204 __REQ_RW, /* not set, read. set, write */ 204 __REQ_RW, /* not set, read. set, write */
205 __REQ_FAILFAST, /* no low level driver retries */ 205 __REQ_FAILFAST, /* no low level driver retries */
206 __REQ_SORTED, /* elevator knows about this request */
206 __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ 207 __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */
207 __REQ_HARDBARRIER, /* may not be passed by drive either */ 208 __REQ_HARDBARRIER, /* may not be passed by drive either */
208 __REQ_CMD, /* is a regular fs rw request */ 209 __REQ_CMD, /* is a regular fs rw request */
@@ -210,6 +211,7 @@ enum rq_flag_bits {
210 __REQ_STARTED, /* drive already may have started this one */ 211 __REQ_STARTED, /* drive already may have started this one */
211 __REQ_DONTPREP, /* don't call prep for this one */ 212 __REQ_DONTPREP, /* don't call prep for this one */
212 __REQ_QUEUED, /* uses queueing */ 213 __REQ_QUEUED, /* uses queueing */
214 __REQ_ELVPRIV, /* elevator private data attached */
213 /* 215 /*
214 * for ATA/ATAPI devices 216 * for ATA/ATAPI devices
215 */ 217 */
@@ -235,6 +237,7 @@ enum rq_flag_bits {
235 237
236#define REQ_RW (1 << __REQ_RW) 238#define REQ_RW (1 << __REQ_RW)
237#define REQ_FAILFAST (1 << __REQ_FAILFAST) 239#define REQ_FAILFAST (1 << __REQ_FAILFAST)
240#define REQ_SORTED (1 << __REQ_SORTED)
238#define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) 241#define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER)
239#define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER) 242#define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER)
240#define REQ_CMD (1 << __REQ_CMD) 243#define REQ_CMD (1 << __REQ_CMD)
@@ -242,6 +245,7 @@ enum rq_flag_bits {
242#define REQ_STARTED (1 << __REQ_STARTED) 245#define REQ_STARTED (1 << __REQ_STARTED)
243#define REQ_DONTPREP (1 << __REQ_DONTPREP) 246#define REQ_DONTPREP (1 << __REQ_DONTPREP)
244#define REQ_QUEUED (1 << __REQ_QUEUED) 247#define REQ_QUEUED (1 << __REQ_QUEUED)
248#define REQ_ELVPRIV (1 << __REQ_ELVPRIV)
245#define REQ_PC (1 << __REQ_PC) 249#define REQ_PC (1 << __REQ_PC)
246#define REQ_BLOCK_PC (1 << __REQ_BLOCK_PC) 250#define REQ_BLOCK_PC (1 << __REQ_BLOCK_PC)
247#define REQ_SENSE (1 << __REQ_SENSE) 251#define REQ_SENSE (1 << __REQ_SENSE)
@@ -333,6 +337,12 @@ struct request_queue
333 end_flush_fn *end_flush_fn; 337 end_flush_fn *end_flush_fn;
334 338
335 /* 339 /*
340 * Dispatch queue sorting
341 */
342 sector_t end_sector;
343 struct request *boundary_rq;
344
345 /*
336 * Auto-unplugging state 346 * Auto-unplugging state
337 */ 347 */
338 struct timer_list unplug_timer; 348 struct timer_list unplug_timer;
@@ -354,7 +364,7 @@ struct request_queue
354 * queue needs bounce pages for pages above this limit 364 * queue needs bounce pages for pages above this limit
355 */ 365 */
356 unsigned long bounce_pfn; 366 unsigned long bounce_pfn;
357 unsigned int bounce_gfp; 367 gfp_t bounce_gfp;
358 368
359 /* 369 /*
360 * various queue flags, see QUEUE_* below 370 * various queue flags, see QUEUE_* below
@@ -405,8 +415,6 @@ struct request_queue
405 unsigned int sg_reserved_size; 415 unsigned int sg_reserved_size;
406 int node; 416 int node;
407 417
408 struct list_head drain_list;
409
410 /* 418 /*
411 * reserved for flush operations 419 * reserved for flush operations
412 */ 420 */
@@ -434,7 +442,7 @@ enum {
434#define QUEUE_FLAG_DEAD 5 /* queue being torn down */ 442#define QUEUE_FLAG_DEAD 5 /* queue being torn down */
435#define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ 443#define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */
436#define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ 444#define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */
437#define QUEUE_FLAG_DRAIN 8 /* draining queue for sched switch */ 445#define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */
438#define QUEUE_FLAG_FLUSH 9 /* doing barrier flush sequence */ 446#define QUEUE_FLAG_FLUSH 9 /* doing barrier flush sequence */
439 447
440#define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) 448#define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
@@ -454,6 +462,7 @@ enum {
454#define blk_pm_request(rq) \ 462#define blk_pm_request(rq) \
455 ((rq)->flags & (REQ_PM_SUSPEND | REQ_PM_RESUME)) 463 ((rq)->flags & (REQ_PM_SUSPEND | REQ_PM_RESUME))
456 464
465#define blk_sorted_rq(rq) ((rq)->flags & REQ_SORTED)
457#define blk_barrier_rq(rq) ((rq)->flags & REQ_HARDBARRIER) 466#define blk_barrier_rq(rq) ((rq)->flags & REQ_HARDBARRIER)
458#define blk_barrier_preflush(rq) ((rq)->flags & REQ_BAR_PREFLUSH) 467#define blk_barrier_preflush(rq) ((rq)->flags & REQ_BAR_PREFLUSH)
459#define blk_barrier_postflush(rq) ((rq)->flags & REQ_BAR_POSTFLUSH) 468#define blk_barrier_postflush(rq) ((rq)->flags & REQ_BAR_POSTFLUSH)
@@ -550,7 +559,7 @@ extern void generic_make_request(struct bio *bio);
550extern void blk_put_request(struct request *); 559extern void blk_put_request(struct request *);
551extern void blk_end_sync_rq(struct request *rq); 560extern void blk_end_sync_rq(struct request *rq);
552extern void blk_attempt_remerge(request_queue_t *, struct request *); 561extern void blk_attempt_remerge(request_queue_t *, struct request *);
553extern struct request *blk_get_request(request_queue_t *, int, int); 562extern struct request *blk_get_request(request_queue_t *, int, gfp_t);
554extern void blk_insert_request(request_queue_t *, struct request *, int, void *); 563extern void blk_insert_request(request_queue_t *, struct request *, int, void *);
555extern void blk_requeue_request(request_queue_t *, struct request *); 564extern void blk_requeue_request(request_queue_t *, struct request *);
556extern void blk_plug_device(request_queue_t *); 565extern void blk_plug_device(request_queue_t *);
@@ -565,7 +574,7 @@ extern void blk_run_queue(request_queue_t *);
565extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *); 574extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *);
566extern int blk_rq_map_user(request_queue_t *, struct request *, void __user *, unsigned int); 575extern int blk_rq_map_user(request_queue_t *, struct request *, void __user *, unsigned int);
567extern int blk_rq_unmap_user(struct bio *, unsigned int); 576extern int blk_rq_unmap_user(struct bio *, unsigned int);
568extern int blk_rq_map_kern(request_queue_t *, struct request *, void *, unsigned int, unsigned int); 577extern int blk_rq_map_kern(request_queue_t *, struct request *, void *, unsigned int, gfp_t);
569extern int blk_rq_map_user_iov(request_queue_t *, struct request *, struct sg_iovec *, int); 578extern int blk_rq_map_user_iov(request_queue_t *, struct request *, struct sg_iovec *, int);
570extern int blk_execute_rq(request_queue_t *, struct gendisk *, 579extern int blk_execute_rq(request_queue_t *, struct gendisk *,
571 struct request *, int); 580 struct request *, int);
@@ -611,12 +620,21 @@ extern void end_request(struct request *req, int uptodate);
611 620
612static inline void blkdev_dequeue_request(struct request *req) 621static inline void blkdev_dequeue_request(struct request *req)
613{ 622{
614 BUG_ON(list_empty(&req->queuelist)); 623 elv_dequeue_request(req->q, req);
624}
615 625
616 list_del_init(&req->queuelist); 626/*
627 * This should be in elevator.h, but that requires pulling in rq and q
628 */
629static inline void elv_dispatch_add_tail(struct request_queue *q,
630 struct request *rq)
631{
632 if (q->last_merge == rq)
633 q->last_merge = NULL;
617 634
618 if (req->rl) 635 q->end_sector = rq_end_sector(rq);
619 elv_remove_request(req->q, req); 636 q->boundary_rq = rq;
637 list_add_tail(&rq->queuelist, &q->queue_head);
620} 638}
621 639
622/* 640/*
@@ -650,12 +668,10 @@ extern void blk_dump_rq_flags(struct request *, char *);
650extern void generic_unplug_device(request_queue_t *); 668extern void generic_unplug_device(request_queue_t *);
651extern void __generic_unplug_device(request_queue_t *); 669extern void __generic_unplug_device(request_queue_t *);
652extern long nr_blockdev_pages(void); 670extern long nr_blockdev_pages(void);
653extern void blk_wait_queue_drained(request_queue_t *, int);
654extern void blk_finish_queue_drain(request_queue_t *);
655 671
656int blk_get_queue(request_queue_t *); 672int blk_get_queue(request_queue_t *);
657request_queue_t *blk_alloc_queue(int gfp_mask); 673request_queue_t *blk_alloc_queue(gfp_t);
658request_queue_t *blk_alloc_queue_node(int,int); 674request_queue_t *blk_alloc_queue_node(gfp_t, int);
659#define blk_put_queue(q) blk_cleanup_queue((q)) 675#define blk_put_queue(q) blk_cleanup_queue((q))
660 676
661/* 677/*
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 6a1d154c0825..88af42f5e04a 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -188,7 +188,7 @@ extern int buffer_heads_over_limit;
188 * Generic address_space_operations implementations for buffer_head-backed 188 * Generic address_space_operations implementations for buffer_head-backed
189 * address_spaces. 189 * address_spaces.
190 */ 190 */
191int try_to_release_page(struct page * page, int gfp_mask); 191int try_to_release_page(struct page * page, gfp_t gfp_mask);
192int block_invalidatepage(struct page *page, unsigned long offset); 192int block_invalidatepage(struct page *page, unsigned long offset);
193int block_write_full_page(struct page *page, get_block_t *get_block, 193int block_write_full_page(struct page *page, get_block_t *get_block,
194 struct writeback_control *wbc); 194 struct writeback_control *wbc);
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index ea6bbc2d7407..a74c27e460ba 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -8,18 +8,17 @@ typedef void (elevator_merge_req_fn) (request_queue_t *, struct request *, struc
8 8
9typedef void (elevator_merged_fn) (request_queue_t *, struct request *); 9typedef void (elevator_merged_fn) (request_queue_t *, struct request *);
10 10
11typedef struct request *(elevator_next_req_fn) (request_queue_t *); 11typedef int (elevator_dispatch_fn) (request_queue_t *, int);
12 12
13typedef void (elevator_add_req_fn) (request_queue_t *, struct request *, int); 13typedef void (elevator_add_req_fn) (request_queue_t *, struct request *);
14typedef int (elevator_queue_empty_fn) (request_queue_t *); 14typedef int (elevator_queue_empty_fn) (request_queue_t *);
15typedef void (elevator_remove_req_fn) (request_queue_t *, struct request *);
16typedef void (elevator_requeue_req_fn) (request_queue_t *, struct request *);
17typedef struct request *(elevator_request_list_fn) (request_queue_t *, struct request *); 15typedef struct request *(elevator_request_list_fn) (request_queue_t *, struct request *);
18typedef void (elevator_completed_req_fn) (request_queue_t *, struct request *); 16typedef void (elevator_completed_req_fn) (request_queue_t *, struct request *);
19typedef int (elevator_may_queue_fn) (request_queue_t *, int, struct bio *); 17typedef int (elevator_may_queue_fn) (request_queue_t *, int, struct bio *);
20 18
21typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, struct bio *, int); 19typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, struct bio *, gfp_t);
22typedef void (elevator_put_req_fn) (request_queue_t *, struct request *); 20typedef void (elevator_put_req_fn) (request_queue_t *, struct request *);
21typedef void (elevator_activate_req_fn) (request_queue_t *, struct request *);
23typedef void (elevator_deactivate_req_fn) (request_queue_t *, struct request *); 22typedef void (elevator_deactivate_req_fn) (request_queue_t *, struct request *);
24 23
25typedef int (elevator_init_fn) (request_queue_t *, elevator_t *); 24typedef int (elevator_init_fn) (request_queue_t *, elevator_t *);
@@ -31,10 +30,9 @@ struct elevator_ops
31 elevator_merged_fn *elevator_merged_fn; 30 elevator_merged_fn *elevator_merged_fn;
32 elevator_merge_req_fn *elevator_merge_req_fn; 31 elevator_merge_req_fn *elevator_merge_req_fn;
33 32
34 elevator_next_req_fn *elevator_next_req_fn; 33 elevator_dispatch_fn *elevator_dispatch_fn;
35 elevator_add_req_fn *elevator_add_req_fn; 34 elevator_add_req_fn *elevator_add_req_fn;
36 elevator_remove_req_fn *elevator_remove_req_fn; 35 elevator_activate_req_fn *elevator_activate_req_fn;
37 elevator_requeue_req_fn *elevator_requeue_req_fn;
38 elevator_deactivate_req_fn *elevator_deactivate_req_fn; 36 elevator_deactivate_req_fn *elevator_deactivate_req_fn;
39 37
40 elevator_queue_empty_fn *elevator_queue_empty_fn; 38 elevator_queue_empty_fn *elevator_queue_empty_fn;
@@ -81,15 +79,15 @@ struct elevator_queue
81/* 79/*
82 * block elevator interface 80 * block elevator interface
83 */ 81 */
82extern void elv_dispatch_sort(request_queue_t *, struct request *);
84extern void elv_add_request(request_queue_t *, struct request *, int, int); 83extern void elv_add_request(request_queue_t *, struct request *, int, int);
85extern void __elv_add_request(request_queue_t *, struct request *, int, int); 84extern void __elv_add_request(request_queue_t *, struct request *, int, int);
86extern int elv_merge(request_queue_t *, struct request **, struct bio *); 85extern int elv_merge(request_queue_t *, struct request **, struct bio *);
87extern void elv_merge_requests(request_queue_t *, struct request *, 86extern void elv_merge_requests(request_queue_t *, struct request *,
88 struct request *); 87 struct request *);
89extern void elv_merged_request(request_queue_t *, struct request *); 88extern void elv_merged_request(request_queue_t *, struct request *);
90extern void elv_remove_request(request_queue_t *, struct request *); 89extern void elv_dequeue_request(request_queue_t *, struct request *);
91extern void elv_requeue_request(request_queue_t *, struct request *); 90extern void elv_requeue_request(request_queue_t *, struct request *);
92extern void elv_deactivate_request(request_queue_t *, struct request *);
93extern int elv_queue_empty(request_queue_t *); 91extern int elv_queue_empty(request_queue_t *);
94extern struct request *elv_next_request(struct request_queue *q); 92extern struct request *elv_next_request(struct request_queue *q);
95extern struct request *elv_former_request(request_queue_t *, struct request *); 93extern struct request *elv_former_request(request_queue_t *, struct request *);
@@ -98,7 +96,7 @@ extern int elv_register_queue(request_queue_t *q);
98extern void elv_unregister_queue(request_queue_t *q); 96extern void elv_unregister_queue(request_queue_t *q);
99extern int elv_may_queue(request_queue_t *, int, struct bio *); 97extern int elv_may_queue(request_queue_t *, int, struct bio *);
100extern void elv_completed_request(request_queue_t *, struct request *); 98extern void elv_completed_request(request_queue_t *, struct request *);
101extern int elv_set_request(request_queue_t *, struct request *, struct bio *, int); 99extern int elv_set_request(request_queue_t *, struct request *, struct bio *, gfp_t);
102extern void elv_put_request(request_queue_t *, struct request *); 100extern void elv_put_request(request_queue_t *, struct request *);
103 101
104/* 102/*
@@ -142,4 +140,6 @@ enum {
142 ELV_MQUEUE_MUST, 140 ELV_MQUEUE_MUST,
143}; 141};
144 142
143#define rq_end_sector(rq) ((rq)->sector + (rq)->nr_sectors)
144
145#endif 145#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e0b77c5af9a0..f83d997c5582 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -320,7 +320,7 @@ struct address_space_operations {
320 /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ 320 /* Unfortunately this kludge is needed for FIBMAP. Don't use it */
321 sector_t (*bmap)(struct address_space *, sector_t); 321 sector_t (*bmap)(struct address_space *, sector_t);
322 int (*invalidatepage) (struct page *, unsigned long); 322 int (*invalidatepage) (struct page *, unsigned long);
323 int (*releasepage) (struct page *, int); 323 int (*releasepage) (struct page *, gfp_t);
324 ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, 324 ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
325 loff_t offset, unsigned long nr_segs); 325 loff_t offset, unsigned long nr_segs);
326 struct page* (*get_xip_page)(struct address_space *, sector_t, 326 struct page* (*get_xip_page)(struct address_space *, sector_t,
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 01796c41c951..142e1c1e0689 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -119,7 +119,7 @@ struct gendisk {
119 int policy; 119 int policy;
120 120
121 atomic_t sync_io; /* RAID */ 121 atomic_t sync_io; /* RAID */
122 unsigned long stamp, stamp_idle; 122 unsigned long stamp;
123 int in_flight; 123 int in_flight;
124#ifdef CONFIG_SMP 124#ifdef CONFIG_SMP
125 struct disk_stats *dkstats; 125 struct disk_stats *dkstats;
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 3010e172394d..c3779432a723 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -12,8 +12,8 @@ struct vm_area_struct;
12 * GFP bitmasks.. 12 * GFP bitmasks..
13 */ 13 */
14/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low two bits) */ 14/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low two bits) */
15#define __GFP_DMA 0x01u 15#define __GFP_DMA ((__force gfp_t)0x01u)
16#define __GFP_HIGHMEM 0x02u 16#define __GFP_HIGHMEM ((__force gfp_t)0x02u)
17 17
18/* 18/*
19 * Action modifiers - doesn't change the zoning 19 * Action modifiers - doesn't change the zoning
@@ -26,24 +26,24 @@ struct vm_area_struct;
26 * 26 *
27 * __GFP_NORETRY: The VM implementation must not retry indefinitely. 27 * __GFP_NORETRY: The VM implementation must not retry indefinitely.
28 */ 28 */
29#define __GFP_WAIT 0x10u /* Can wait and reschedule? */ 29#define __GFP_WAIT ((__force gfp_t)0x10u) /* Can wait and reschedule? */
30#define __GFP_HIGH 0x20u /* Should access emergency pools? */ 30#define __GFP_HIGH ((__force gfp_t)0x20u) /* Should access emergency pools? */
31#define __GFP_IO 0x40u /* Can start physical IO? */ 31#define __GFP_IO ((__force gfp_t)0x40u) /* Can start physical IO? */
32#define __GFP_FS 0x80u /* Can call down to low-level FS? */ 32#define __GFP_FS ((__force gfp_t)0x80u) /* Can call down to low-level FS? */
33#define __GFP_COLD 0x100u /* Cache-cold page required */ 33#define __GFP_COLD ((__force gfp_t)0x100u) /* Cache-cold page required */
34#define __GFP_NOWARN 0x200u /* Suppress page allocation failure warning */ 34#define __GFP_NOWARN ((__force gfp_t)0x200u) /* Suppress page allocation failure warning */
35#define __GFP_REPEAT 0x400u /* Retry the allocation. Might fail */ 35#define __GFP_REPEAT ((__force gfp_t)0x400u) /* Retry the allocation. Might fail */
36#define __GFP_NOFAIL 0x800u /* Retry for ever. Cannot fail */ 36#define __GFP_NOFAIL ((__force gfp_t)0x800u) /* Retry for ever. Cannot fail */
37#define __GFP_NORETRY 0x1000u /* Do not retry. Might fail */ 37#define __GFP_NORETRY ((__force gfp_t)0x1000u)/* Do not retry. Might fail */
38#define __GFP_NO_GROW 0x2000u /* Slab internal usage */ 38#define __GFP_NO_GROW ((__force gfp_t)0x2000u)/* Slab internal usage */
39#define __GFP_COMP 0x4000u /* Add compound page metadata */ 39#define __GFP_COMP ((__force gfp_t)0x4000u)/* Add compound page metadata */
40#define __GFP_ZERO 0x8000u /* Return zeroed page on success */ 40#define __GFP_ZERO ((__force gfp_t)0x8000u)/* Return zeroed page on success */
41#define __GFP_NOMEMALLOC 0x10000u /* Don't use emergency reserves */ 41#define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
42#define __GFP_NORECLAIM 0x20000u /* No realy zone reclaim during allocation */ 42#define __GFP_NORECLAIM ((__force gfp_t)0x20000u) /* No realy zone reclaim during allocation */
43#define __GFP_HARDWALL 0x40000u /* Enforce hardwall cpuset memory allocs */ 43#define __GFP_HARDWALL ((__force gfp_t)0x40000u) /* Enforce hardwall cpuset memory allocs */
44 44
45#define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */ 45#define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */
46#define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1) 46#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
47 47
48/* if you forget to add the bitmask here kernel will crash, period */ 48/* if you forget to add the bitmask here kernel will crash, period */
49#define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \ 49#define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
@@ -64,6 +64,7 @@ struct vm_area_struct;
64 64
65#define GFP_DMA __GFP_DMA 65#define GFP_DMA __GFP_DMA
66 66
67#define gfp_zone(mask) ((__force int)((mask) & (__force gfp_t)GFP_ZONEMASK))
67 68
68/* 69/*
69 * There is only one page-allocator function, and two main namespaces to 70 * There is only one page-allocator function, and two main namespaces to
@@ -94,7 +95,7 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
94 return NULL; 95 return NULL;
95 96
96 return __alloc_pages(gfp_mask, order, 97 return __alloc_pages(gfp_mask, order,
97 NODE_DATA(nid)->node_zonelists + (gfp_mask & GFP_ZONEMASK)); 98 NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_mask));
98} 99}
99 100
100#ifdef CONFIG_NUMA 101#ifdef CONFIG_NUMA
diff --git a/include/linux/i2o.h b/include/linux/i2o.h
index bdc286ec947c..b4af45aad25d 100644
--- a/include/linux/i2o.h
+++ b/include/linux/i2o.h
@@ -492,7 +492,7 @@ static inline int i2o_dma_map_sg(struct i2o_controller *c,
492 * Returns 0 on success or -ENOMEM on failure. 492 * Returns 0 on success or -ENOMEM on failure.
493 */ 493 */
494static inline int i2o_dma_alloc(struct device *dev, struct i2o_dma *addr, 494static inline int i2o_dma_alloc(struct device *dev, struct i2o_dma *addr,
495 size_t len, unsigned int gfp_mask) 495 size_t len, gfp_t gfp_mask)
496{ 496{
497 struct pci_dev *pdev = to_pci_dev(dev); 497 struct pci_dev *pdev = to_pci_dev(dev);
498 int dma_64 = 0; 498 int dma_64 = 0;
@@ -551,7 +551,7 @@ static inline void i2o_dma_free(struct device *dev, struct i2o_dma *addr)
551 * Returns the 0 on success or negative error code on failure. 551 * Returns the 0 on success or negative error code on failure.
552 */ 552 */
553static inline int i2o_dma_realloc(struct device *dev, struct i2o_dma *addr, 553static inline int i2o_dma_realloc(struct device *dev, struct i2o_dma *addr,
554 size_t len, unsigned int gfp_mask) 554 size_t len, gfp_t gfp_mask)
555{ 555{
556 i2o_dma_free(dev, addr); 556 i2o_dma_free(dev, addr);
557 557
diff --git a/include/linux/idr.h b/include/linux/idr.h
index 3d5de45f961b..7fb3ff9c7b0e 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -71,7 +71,7 @@ struct idr {
71 */ 71 */
72 72
73void *idr_find(struct idr *idp, int id); 73void *idr_find(struct idr *idp, int id);
74int idr_pre_get(struct idr *idp, unsigned gfp_mask); 74int idr_pre_get(struct idr *idp, gfp_t gfp_mask);
75int idr_get_new(struct idr *idp, void *ptr, int *id); 75int idr_get_new(struct idr *idp, void *ptr, int *id);
76int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id); 76int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id);
77void idr_remove(struct idr *idp, int id); 77void idr_remove(struct idr *idp, int id);
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index ff853b3173c6..be197eb90077 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -69,7 +69,7 @@ extern int journal_enable_debug;
69#define jbd_debug(f, a...) /**/ 69#define jbd_debug(f, a...) /**/
70#endif 70#endif
71 71
72extern void * __jbd_kmalloc (const char *where, size_t size, int flags, int retry); 72extern void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry);
73#define jbd_kmalloc(size, flags) \ 73#define jbd_kmalloc(size, flags) \
74 __jbd_kmalloc(__FUNCTION__, (size), (flags), journal_oom_retry) 74 __jbd_kmalloc(__FUNCTION__, (size), (flags), journal_oom_retry)
75#define jbd_rep_kmalloc(size, flags) \ 75#define jbd_rep_kmalloc(size, flags) \
@@ -890,7 +890,7 @@ extern int journal_forget (handle_t *, struct buffer_head *);
890extern void journal_sync_buffer (struct buffer_head *); 890extern void journal_sync_buffer (struct buffer_head *);
891extern int journal_invalidatepage(journal_t *, 891extern int journal_invalidatepage(journal_t *,
892 struct page *, unsigned long); 892 struct page *, unsigned long);
893extern int journal_try_to_free_buffers(journal_t *, struct page *, int); 893extern int journal_try_to_free_buffers(journal_t *, struct page *, gfp_t);
894extern int journal_stop(handle_t *); 894extern int journal_stop(handle_t *);
895extern int journal_flush (journal_t *); 895extern int journal_flush (journal_t *);
896extern void journal_lock_updates (journal_t *); 896extern void journal_lock_updates (journal_t *);
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 3b22304f12fd..7f7403aa4a41 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -65,7 +65,7 @@ extern void kobject_unregister(struct kobject *);
65extern struct kobject * kobject_get(struct kobject *); 65extern struct kobject * kobject_get(struct kobject *);
66extern void kobject_put(struct kobject *); 66extern void kobject_put(struct kobject *);
67 67
68extern char * kobject_get_path(struct kobject *, int); 68extern char * kobject_get_path(struct kobject *, gfp_t);
69 69
70struct kobj_type { 70struct kobj_type {
71 void (*release)(struct kobject *); 71 void (*release)(struct kobject *);
diff --git a/include/linux/loop.h b/include/linux/loop.h
index 53fa51595443..40f63c9879d2 100644
--- a/include/linux/loop.h
+++ b/include/linux/loop.h
@@ -52,7 +52,7 @@ struct loop_device {
52 unsigned lo_blocksize; 52 unsigned lo_blocksize;
53 void *key_data; 53 void *key_data;
54 54
55 int old_gfp_mask; 55 gfp_t old_gfp_mask;
56 56
57 spinlock_t lo_lock; 57 spinlock_t lo_lock;
58 struct bio *lo_bio; 58 struct bio *lo_bio;
diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h
index 9263d2db2d67..99e044b4efc6 100644
--- a/include/linux/mbcache.h
+++ b/include/linux/mbcache.h
@@ -22,7 +22,7 @@ struct mb_cache_entry {
22}; 22};
23 23
24struct mb_cache_op { 24struct mb_cache_op {
25 int (*free)(struct mb_cache_entry *, int); 25 int (*free)(struct mb_cache_entry *, gfp_t);
26}; 26};
27 27
28/* Functions on caches */ 28/* Functions on caches */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 097b3a3c693d..e1649578fb0c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -747,7 +747,7 @@ extern unsigned long do_mremap(unsigned long addr,
747 * The callback will be passed nr_to_scan == 0 when the VM is querying the 747 * The callback will be passed nr_to_scan == 0 when the VM is querying the
748 * cache size, so a fastpath for that case is appropriate. 748 * cache size, so a fastpath for that case is appropriate.
749 */ 749 */
750typedef int (*shrinker_t)(int nr_to_scan, unsigned int gfp_mask); 750typedef int (*shrinker_t)(int nr_to_scan, gfp_t gfp_mask);
751 751
752/* 752/*
753 * Add an aging callback. The int is the number of 'seeks' it takes 753 * Add an aging callback. The int is the number of 'seeks' it takes
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 5ed471b58f4f..7519eb4191e7 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -302,7 +302,7 @@ void get_zone_counts(unsigned long *active, unsigned long *inactive,
302void build_all_zonelists(void); 302void build_all_zonelists(void);
303void wakeup_kswapd(struct zone *zone, int order); 303void wakeup_kswapd(struct zone *zone, int order);
304int zone_watermark_ok(struct zone *z, int order, unsigned long mark, 304int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
305 int alloc_type, int can_try_harder, int gfp_high); 305 int alloc_type, int can_try_harder, gfp_t gfp_high);
306 306
307#ifdef CONFIG_HAVE_MEMORY_PRESENT 307#ifdef CONFIG_HAVE_MEMORY_PRESENT
308void memory_present(int nid, unsigned long start, unsigned long end); 308void memory_present(int nid, unsigned long start, unsigned long end);
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 7db67b008cac..1c975d0d9e94 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -8,6 +8,7 @@ struct vfsmount;
8struct open_intent { 8struct open_intent {
9 int flags; 9 int flags;
10 int create_mode; 10 int create_mode;
11 struct file *file;
11}; 12};
12 13
13enum { MAX_NESTED_LINKS = 5 }; 14enum { MAX_NESTED_LINKS = 5 };
@@ -65,6 +66,13 @@ extern int FASTCALL(link_path_walk(const char *, struct nameidata *));
65extern void path_release(struct nameidata *); 66extern void path_release(struct nameidata *);
66extern void path_release_on_umount(struct nameidata *); 67extern void path_release_on_umount(struct nameidata *);
67 68
69extern int __user_path_lookup_open(const char __user *, unsigned lookup_flags, struct nameidata *nd, int open_flags);
70extern int path_lookup_open(const char *, unsigned lookup_flags, struct nameidata *, int open_flags);
71extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
72 int (*open)(struct inode *, struct file *));
73extern struct file *nameidata_to_filp(struct nameidata *nd, int flags);
74extern void release_open_intent(struct nameidata *);
75
68extern struct dentry * lookup_one_len(const char *, struct dentry *, int); 76extern struct dentry * lookup_one_len(const char *, struct dentry *, int);
69extern struct dentry * lookup_hash(struct qstr *, struct dentry *); 77extern struct dentry * lookup_hash(struct qstr *, struct dentry *);
70 78
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 9a6047ff1b25..325fe7ae49bb 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -41,6 +41,10 @@
41#define NFS_MAX_FILE_IO_BUFFER_SIZE 32768 41#define NFS_MAX_FILE_IO_BUFFER_SIZE 32768
42#define NFS_DEF_FILE_IO_BUFFER_SIZE 4096 42#define NFS_DEF_FILE_IO_BUFFER_SIZE 4096
43 43
44/* Default timeout values */
45#define NFS_MAX_UDP_TIMEOUT (60*HZ)
46#define NFS_MAX_TCP_TIMEOUT (600*HZ)
47
44/* 48/*
45 * superblock magic number for NFS 49 * superblock magic number for NFS
46 */ 50 */
@@ -137,6 +141,7 @@ struct nfs_inode {
137 unsigned long attrtimeo_timestamp; 141 unsigned long attrtimeo_timestamp;
138 __u64 change_attr; /* v4 only */ 142 __u64 change_attr; /* v4 only */
139 143
144 unsigned long last_updated;
140 /* "Generation counter" for the attribute cache. This is 145 /* "Generation counter" for the attribute cache. This is
141 * bumped whenever we update the metadata on the 146 * bumped whenever we update the metadata on the
142 * server. 147 * server.
@@ -236,13 +241,17 @@ static inline int nfs_caches_unstable(struct inode *inode)
236 return atomic_read(&NFS_I(inode)->data_updates) != 0; 241 return atomic_read(&NFS_I(inode)->data_updates) != 0;
237} 242}
238 243
244static inline void nfs_mark_for_revalidate(struct inode *inode)
245{
246 spin_lock(&inode->i_lock);
247 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS;
248 spin_unlock(&inode->i_lock);
249}
250
239static inline void NFS_CACHEINV(struct inode *inode) 251static inline void NFS_CACHEINV(struct inode *inode)
240{ 252{
241 if (!nfs_caches_unstable(inode)) { 253 if (!nfs_caches_unstable(inode))
242 spin_lock(&inode->i_lock); 254 nfs_mark_for_revalidate(inode);
243 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS;
244 spin_unlock(&inode->i_lock);
245 }
246} 255}
247 256
248static inline int nfs_server_capable(struct inode *inode, int cap) 257static inline int nfs_server_capable(struct inode *inode, int cap)
@@ -276,7 +285,7 @@ static inline long nfs_save_change_attribute(struct inode *inode)
276static inline int nfs_verify_change_attribute(struct inode *inode, unsigned long chattr) 285static inline int nfs_verify_change_attribute(struct inode *inode, unsigned long chattr)
277{ 286{
278 return !nfs_caches_unstable(inode) 287 return !nfs_caches_unstable(inode)
279 && chattr == NFS_I(inode)->cache_change_attribute; 288 && time_after_eq(chattr, NFS_I(inode)->cache_change_attribute);
280} 289}
281 290
282/* 291/*
@@ -286,6 +295,7 @@ extern void nfs_zap_caches(struct inode *);
286extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *, 295extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *,
287 struct nfs_fattr *); 296 struct nfs_fattr *);
288extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *); 297extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
298extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr);
289extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); 299extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
290extern int nfs_permission(struct inode *, int, struct nameidata *); 300extern int nfs_permission(struct inode *, int, struct nameidata *);
291extern int nfs_access_get_cached(struct inode *, struct rpc_cred *, struct nfs_access_entry *); 301extern int nfs_access_get_cached(struct inode *, struct rpc_cred *, struct nfs_access_entry *);
@@ -312,6 +322,12 @@ extern void nfs_file_clear_open_context(struct file *filp);
312/* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */ 322/* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */
313extern u32 root_nfs_parse_addr(char *name); /*__init*/ 323extern u32 root_nfs_parse_addr(char *name); /*__init*/
314 324
325static inline void nfs_fattr_init(struct nfs_fattr *fattr)
326{
327 fattr->valid = 0;
328 fattr->time_start = jiffies;
329}
330
315/* 331/*
316 * linux/fs/nfs/file.c 332 * linux/fs/nfs/file.c
317 */ 333 */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index a2bf6914ff1b..40718669b9c8 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -41,7 +41,7 @@ struct nfs_fattr {
41 __u32 bitmap[2]; /* NFSv4 returned attribute bitmap */ 41 __u32 bitmap[2]; /* NFSv4 returned attribute bitmap */
42 __u64 change_attr; /* NFSv4 change attribute */ 42 __u64 change_attr; /* NFSv4 change attribute */
43 __u64 pre_change_attr;/* pre-op NFSv4 change attribute */ 43 __u64 pre_change_attr;/* pre-op NFSv4 change attribute */
44 unsigned long timestamp; 44 unsigned long time_start;
45}; 45};
46 46
47#define NFS_ATTR_WCC 0x0001 /* pre-op WCC data */ 47#define NFS_ATTR_WCC 0x0001 /* pre-op WCC data */
@@ -96,12 +96,13 @@ struct nfs4_change_info {
96 u64 after; 96 u64 after;
97}; 97};
98 98
99struct nfs_seqid;
99/* 100/*
100 * Arguments to the open call. 101 * Arguments to the open call.
101 */ 102 */
102struct nfs_openargs { 103struct nfs_openargs {
103 const struct nfs_fh * fh; 104 const struct nfs_fh * fh;
104 __u32 seqid; 105 struct nfs_seqid * seqid;
105 int open_flags; 106 int open_flags;
106 __u64 clientid; 107 __u64 clientid;
107 __u32 id; 108 __u32 id;
@@ -123,6 +124,7 @@ struct nfs_openres {
123 struct nfs4_change_info cinfo; 124 struct nfs4_change_info cinfo;
124 __u32 rflags; 125 __u32 rflags;
125 struct nfs_fattr * f_attr; 126 struct nfs_fattr * f_attr;
127 struct nfs_fattr * dir_attr;
126 const struct nfs_server *server; 128 const struct nfs_server *server;
127 int delegation_type; 129 int delegation_type;
128 nfs4_stateid delegation; 130 nfs4_stateid delegation;
@@ -136,7 +138,7 @@ struct nfs_openres {
136struct nfs_open_confirmargs { 138struct nfs_open_confirmargs {
137 const struct nfs_fh * fh; 139 const struct nfs_fh * fh;
138 nfs4_stateid stateid; 140 nfs4_stateid stateid;
139 __u32 seqid; 141 struct nfs_seqid * seqid;
140}; 142};
141 143
142struct nfs_open_confirmres { 144struct nfs_open_confirmres {
@@ -148,13 +150,16 @@ struct nfs_open_confirmres {
148 */ 150 */
149struct nfs_closeargs { 151struct nfs_closeargs {
150 struct nfs_fh * fh; 152 struct nfs_fh * fh;
151 nfs4_stateid stateid; 153 nfs4_stateid * stateid;
152 __u32 seqid; 154 struct nfs_seqid * seqid;
153 int open_flags; 155 int open_flags;
156 const u32 * bitmask;
154}; 157};
155 158
156struct nfs_closeres { 159struct nfs_closeres {
157 nfs4_stateid stateid; 160 nfs4_stateid stateid;
161 struct nfs_fattr * fattr;
162 const struct nfs_server *server;
158}; 163};
159/* 164/*
160 * * Arguments to the lock,lockt, and locku call. 165 * * Arguments to the lock,lockt, and locku call.
@@ -164,30 +169,19 @@ struct nfs_lowner {
164 u32 id; 169 u32 id;
165}; 170};
166 171
167struct nfs_open_to_lock {
168 __u32 open_seqid;
169 nfs4_stateid open_stateid;
170 __u32 lock_seqid;
171 struct nfs_lowner lock_owner;
172};
173
174struct nfs_exist_lock {
175 nfs4_stateid stateid;
176 __u32 seqid;
177};
178
179struct nfs_lock_opargs { 172struct nfs_lock_opargs {
173 struct nfs_seqid * lock_seqid;
174 nfs4_stateid * lock_stateid;
175 struct nfs_seqid * open_seqid;
176 nfs4_stateid * open_stateid;
177 struct nfs_lowner lock_owner;
180 __u32 reclaim; 178 __u32 reclaim;
181 __u32 new_lock_owner; 179 __u32 new_lock_owner;
182 union {
183 struct nfs_open_to_lock *open_lock;
184 struct nfs_exist_lock *exist_lock;
185 } u;
186}; 180};
187 181
188struct nfs_locku_opargs { 182struct nfs_locku_opargs {
189 __u32 seqid; 183 struct nfs_seqid * seqid;
190 nfs4_stateid stateid; 184 nfs4_stateid * stateid;
191}; 185};
192 186
193struct nfs_lockargs { 187struct nfs_lockargs {
@@ -262,6 +256,7 @@ struct nfs_writeargs {
262 enum nfs3_stable_how stable; 256 enum nfs3_stable_how stable;
263 unsigned int pgbase; 257 unsigned int pgbase;
264 struct page ** pages; 258 struct page ** pages;
259 const u32 * bitmask;
265}; 260};
266 261
267struct nfs_writeverf { 262struct nfs_writeverf {
@@ -273,6 +268,7 @@ struct nfs_writeres {
273 struct nfs_fattr * fattr; 268 struct nfs_fattr * fattr;
274 struct nfs_writeverf * verf; 269 struct nfs_writeverf * verf;
275 __u32 count; 270 __u32 count;
271 const struct nfs_server *server;
276}; 272};
277 273
278/* 274/*
@@ -550,6 +546,7 @@ struct nfs4_create_res {
550 struct nfs_fh * fh; 546 struct nfs_fh * fh;
551 struct nfs_fattr * fattr; 547 struct nfs_fattr * fattr;
552 struct nfs4_change_info dir_cinfo; 548 struct nfs4_change_info dir_cinfo;
549 struct nfs_fattr * dir_fattr;
553}; 550};
554 551
555struct nfs4_fsinfo_arg { 552struct nfs4_fsinfo_arg {
@@ -571,8 +568,17 @@ struct nfs4_link_arg {
571 const struct nfs_fh * fh; 568 const struct nfs_fh * fh;
572 const struct nfs_fh * dir_fh; 569 const struct nfs_fh * dir_fh;
573 const struct qstr * name; 570 const struct qstr * name;
571 const u32 * bitmask;
572};
573
574struct nfs4_link_res {
575 const struct nfs_server * server;
576 struct nfs_fattr * fattr;
577 struct nfs4_change_info cinfo;
578 struct nfs_fattr * dir_attr;
574}; 579};
575 580
581
576struct nfs4_lookup_arg { 582struct nfs4_lookup_arg {
577 const struct nfs_fh * dir_fh; 583 const struct nfs_fh * dir_fh;
578 const struct qstr * name; 584 const struct qstr * name;
@@ -619,6 +625,13 @@ struct nfs4_readlink {
619struct nfs4_remove_arg { 625struct nfs4_remove_arg {
620 const struct nfs_fh * fh; 626 const struct nfs_fh * fh;
621 const struct qstr * name; 627 const struct qstr * name;
628 const u32 * bitmask;
629};
630
631struct nfs4_remove_res {
632 const struct nfs_server * server;
633 struct nfs4_change_info cinfo;
634 struct nfs_fattr * dir_attr;
622}; 635};
623 636
624struct nfs4_rename_arg { 637struct nfs4_rename_arg {
@@ -626,11 +639,15 @@ struct nfs4_rename_arg {
626 const struct nfs_fh * new_dir; 639 const struct nfs_fh * new_dir;
627 const struct qstr * old_name; 640 const struct qstr * old_name;
628 const struct qstr * new_name; 641 const struct qstr * new_name;
642 const u32 * bitmask;
629}; 643};
630 644
631struct nfs4_rename_res { 645struct nfs4_rename_res {
646 const struct nfs_server * server;
632 struct nfs4_change_info old_cinfo; 647 struct nfs4_change_info old_cinfo;
648 struct nfs_fattr * old_fattr;
633 struct nfs4_change_info new_cinfo; 649 struct nfs4_change_info new_cinfo;
650 struct nfs_fattr * new_fattr;
634}; 651};
635 652
636struct nfs4_setclientid { 653struct nfs4_setclientid {
@@ -722,7 +739,7 @@ struct nfs_rpc_ops {
722 int (*write) (struct nfs_write_data *); 739 int (*write) (struct nfs_write_data *);
723 int (*commit) (struct nfs_write_data *); 740 int (*commit) (struct nfs_write_data *);
724 int (*create) (struct inode *, struct dentry *, 741 int (*create) (struct inode *, struct dentry *,
725 struct iattr *, int); 742 struct iattr *, int, struct nameidata *);
726 int (*remove) (struct inode *, struct qstr *); 743 int (*remove) (struct inode *, struct qstr *);
727 int (*unlink_setup) (struct rpc_message *, 744 int (*unlink_setup) (struct rpc_message *,
728 struct dentry *, struct qstr *); 745 struct dentry *, struct qstr *);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index acbf31c154f8..ba6c310a055f 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -21,16 +21,17 @@
21 21
22static inline gfp_t mapping_gfp_mask(struct address_space * mapping) 22static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
23{ 23{
24 return mapping->flags & __GFP_BITS_MASK; 24 return (__force gfp_t)mapping->flags & __GFP_BITS_MASK;
25} 25}
26 26
27/* 27/*
28 * This is non-atomic. Only to be used before the mapping is activated. 28 * This is non-atomic. Only to be used before the mapping is activated.
29 * Probably needs a barrier... 29 * Probably needs a barrier...
30 */ 30 */
31static inline void mapping_set_gfp_mask(struct address_space *m, int mask) 31static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
32{ 32{
33 m->flags = (m->flags & ~__GFP_BITS_MASK) | mask; 33 m->flags = (m->flags & ~(__force unsigned long)__GFP_BITS_MASK) |
34 (__force unsigned long)mask;
34} 35}
35 36
36/* 37/*
@@ -69,7 +70,7 @@ extern struct page * find_lock_page(struct address_space *mapping,
69extern struct page * find_trylock_page(struct address_space *mapping, 70extern struct page * find_trylock_page(struct address_space *mapping,
70 unsigned long index); 71 unsigned long index);
71extern struct page * find_or_create_page(struct address_space *mapping, 72extern struct page * find_or_create_page(struct address_space *mapping,
72 unsigned long index, unsigned int gfp_mask); 73 unsigned long index, gfp_t gfp_mask);
73unsigned find_get_pages(struct address_space *mapping, pgoff_t start, 74unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
74 unsigned int nr_pages, struct page **pages); 75 unsigned int nr_pages, struct page **pages);
75unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, 76unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
@@ -92,9 +93,9 @@ extern int read_cache_pages(struct address_space *mapping,
92 struct list_head *pages, filler_t *filler, void *data); 93 struct list_head *pages, filler_t *filler, void *data);
93 94
94int add_to_page_cache(struct page *page, struct address_space *mapping, 95int add_to_page_cache(struct page *page, struct address_space *mapping,
95 unsigned long index, int gfp_mask); 96 unsigned long index, gfp_t gfp_mask);
96int add_to_page_cache_lru(struct page *page, struct address_space *mapping, 97int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
97 unsigned long index, int gfp_mask); 98 unsigned long index, gfp_t gfp_mask);
98extern void remove_from_page_cache(struct page *page); 99extern void remove_from_page_cache(struct page *page);
99extern void __remove_from_page_cache(struct page *page); 100extern void __remove_from_page_cache(struct page *page);
100 101
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 045d4761febc..9f0f9281f42a 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -24,7 +24,7 @@
24 24
25struct radix_tree_root { 25struct radix_tree_root {
26 unsigned int height; 26 unsigned int height;
27 unsigned int gfp_mask; 27 gfp_t gfp_mask;
28 struct radix_tree_node *rnode; 28 struct radix_tree_node *rnode;
29}; 29};
30 30
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index af00b10294cd..001ab82df051 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -1972,7 +1972,7 @@ extern struct address_space_operations reiserfs_address_space_operations;
1972 1972
1973/* fix_nodes.c */ 1973/* fix_nodes.c */
1974#ifdef CONFIG_REISERFS_CHECK 1974#ifdef CONFIG_REISERFS_CHECK
1975void *reiserfs_kmalloc(size_t size, int flags, struct super_block *s); 1975void *reiserfs_kmalloc(size_t size, gfp_t flags, struct super_block *s);
1976void reiserfs_kfree(const void *vp, size_t size, struct super_block *s); 1976void reiserfs_kfree(const void *vp, size_t size, struct super_block *s);
1977#else 1977#else
1978static inline void *reiserfs_kmalloc(size_t size, int flags, 1978static inline void *reiserfs_kmalloc(size_t size, int flags,
diff --git a/include/linux/security.h b/include/linux/security.h
index 627382e74057..dac956ed98f0 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1210,7 +1210,7 @@ struct security_operations {
1210 int (*socket_shutdown) (struct socket * sock, int how); 1210 int (*socket_shutdown) (struct socket * sock, int how);
1211 int (*socket_sock_rcv_skb) (struct sock * sk, struct sk_buff * skb); 1211 int (*socket_sock_rcv_skb) (struct sock * sk, struct sk_buff * skb);
1212 int (*socket_getpeersec) (struct socket *sock, char __user *optval, int __user *optlen, unsigned len); 1212 int (*socket_getpeersec) (struct socket *sock, char __user *optval, int __user *optlen, unsigned len);
1213 int (*sk_alloc_security) (struct sock *sk, int family, int priority); 1213 int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority);
1214 void (*sk_free_security) (struct sock *sk); 1214 void (*sk_free_security) (struct sock *sk);
1215#endif /* CONFIG_SECURITY_NETWORK */ 1215#endif /* CONFIG_SECURITY_NETWORK */
1216}; 1216};
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 8f5d9e7f8734..b756935da9c8 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -171,7 +171,6 @@ enum {
171 * struct sk_buff - socket buffer 171 * struct sk_buff - socket buffer
172 * @next: Next buffer in list 172 * @next: Next buffer in list
173 * @prev: Previous buffer in list 173 * @prev: Previous buffer in list
174 * @list: List we are on
175 * @sk: Socket we are owned by 174 * @sk: Socket we are owned by
176 * @tstamp: Time we arrived 175 * @tstamp: Time we arrived
177 * @dev: Device we arrived on/are leaving by 176 * @dev: Device we arrived on/are leaving by
@@ -190,6 +189,7 @@ enum {
190 * @cloned: Head may be cloned (check refcnt to be sure) 189 * @cloned: Head may be cloned (check refcnt to be sure)
191 * @nohdr: Payload reference only, must not modify header 190 * @nohdr: Payload reference only, must not modify header
192 * @pkt_type: Packet class 191 * @pkt_type: Packet class
192 * @fclone: skbuff clone status
193 * @ip_summed: Driver fed us an IP checksum 193 * @ip_summed: Driver fed us an IP checksum
194 * @priority: Packet queueing priority 194 * @priority: Packet queueing priority
195 * @users: User count - see {datagram,tcp}.c 195 * @users: User count - see {datagram,tcp}.c
@@ -202,6 +202,7 @@ enum {
202 * @destructor: Destruct function 202 * @destructor: Destruct function
203 * @nfmark: Can be used for communication between hooks 203 * @nfmark: Can be used for communication between hooks
204 * @nfct: Associated connection, if any 204 * @nfct: Associated connection, if any
205 * @ipvs_property: skbuff is owned by ipvs
205 * @nfctinfo: Relationship of this skb to the connection 206 * @nfctinfo: Relationship of this skb to the connection
206 * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c 207 * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c
207 * @tc_index: Traffic control index 208 * @tc_index: Traffic control index
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 5fc04a16ecb0..09b9aa60063d 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -121,7 +121,7 @@ extern unsigned int ksize(const void *);
121extern void *kmem_cache_alloc_node(kmem_cache_t *, gfp_t flags, int node); 121extern void *kmem_cache_alloc_node(kmem_cache_t *, gfp_t flags, int node);
122extern void *kmalloc_node(size_t size, gfp_t flags, int node); 122extern void *kmalloc_node(size_t size, gfp_t flags, int node);
123#else 123#else
124static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, int flags, int node) 124static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int node)
125{ 125{
126 return kmem_cache_alloc(cachep, flags); 126 return kmem_cache_alloc(cachep, flags);
127} 127}
diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 04ebc24db348..b68c11a2d6dd 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -66,7 +66,12 @@ struct rpc_cred_cache {
66 66
67struct rpc_auth { 67struct rpc_auth {
68 unsigned int au_cslack; /* call cred size estimate */ 68 unsigned int au_cslack; /* call cred size estimate */
69 unsigned int au_rslack; /* reply verf size guess */ 69 /* guess at number of u32's auth adds before
70 * reply data; normally the verifier size: */
71 unsigned int au_rslack;
72 /* for gss, used to calculate au_rslack: */
73 unsigned int au_verfsize;
74
70 unsigned int au_flags; /* various flags */ 75 unsigned int au_flags; /* various flags */
71 struct rpc_authops * au_ops; /* operations */ 76 struct rpc_authops * au_ops; /* operations */
72 rpc_authflavor_t au_flavor; /* pseudoflavor (note may 77 rpc_authflavor_t au_flavor; /* pseudoflavor (note may
diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h
index eadb31e3c198..1a42d902bc11 100644
--- a/include/linux/sunrpc/debug.h
+++ b/include/linux/sunrpc/debug.h
@@ -32,6 +32,7 @@
32#define RPCDBG_AUTH 0x0010 32#define RPCDBG_AUTH 0x0010
33#define RPCDBG_PMAP 0x0020 33#define RPCDBG_PMAP 0x0020
34#define RPCDBG_SCHED 0x0040 34#define RPCDBG_SCHED 0x0040
35#define RPCDBG_TRANS 0x0080
35#define RPCDBG_SVCSOCK 0x0100 36#define RPCDBG_SVCSOCK 0x0100
36#define RPCDBG_SVCDSP 0x0200 37#define RPCDBG_SVCDSP 0x0200
37#define RPCDBG_MISC 0x0400 38#define RPCDBG_MISC 0x0400
@@ -94,6 +95,8 @@ enum {
94 CTL_NLMDEBUG, 95 CTL_NLMDEBUG,
95 CTL_SLOTTABLE_UDP, 96 CTL_SLOTTABLE_UDP,
96 CTL_SLOTTABLE_TCP, 97 CTL_SLOTTABLE_TCP,
98 CTL_MIN_RESVPORT,
99 CTL_MAX_RESVPORT,
97}; 100};
98 101
99#endif /* _LINUX_SUNRPC_DEBUG_H_ */ 102#endif /* _LINUX_SUNRPC_DEBUG_H_ */
diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h
index 689262f63059..9b8bcf125c18 100644
--- a/include/linux/sunrpc/gss_api.h
+++ b/include/linux/sunrpc/gss_api.h
@@ -40,14 +40,21 @@ int gss_import_sec_context(
40 struct gss_ctx **ctx_id); 40 struct gss_ctx **ctx_id);
41u32 gss_get_mic( 41u32 gss_get_mic(
42 struct gss_ctx *ctx_id, 42 struct gss_ctx *ctx_id,
43 u32 qop,
44 struct xdr_buf *message, 43 struct xdr_buf *message,
45 struct xdr_netobj *mic_token); 44 struct xdr_netobj *mic_token);
46u32 gss_verify_mic( 45u32 gss_verify_mic(
47 struct gss_ctx *ctx_id, 46 struct gss_ctx *ctx_id,
48 struct xdr_buf *message, 47 struct xdr_buf *message,
49 struct xdr_netobj *mic_token, 48 struct xdr_netobj *mic_token);
50 u32 *qstate); 49u32 gss_wrap(
50 struct gss_ctx *ctx_id,
51 int offset,
52 struct xdr_buf *outbuf,
53 struct page **inpages);
54u32 gss_unwrap(
55 struct gss_ctx *ctx_id,
56 int offset,
57 struct xdr_buf *inbuf);
51u32 gss_delete_sec_context( 58u32 gss_delete_sec_context(
52 struct gss_ctx **ctx_id); 59 struct gss_ctx **ctx_id);
53 60
@@ -56,7 +63,6 @@ char *gss_service_to_auth_domain_name(struct gss_api_mech *, u32 service);
56 63
57struct pf_desc { 64struct pf_desc {
58 u32 pseudoflavor; 65 u32 pseudoflavor;
59 u32 qop;
60 u32 service; 66 u32 service;
61 char *name; 67 char *name;
62 char *auth_domain_name; 68 char *auth_domain_name;
@@ -85,14 +91,21 @@ struct gss_api_ops {
85 struct gss_ctx *ctx_id); 91 struct gss_ctx *ctx_id);
86 u32 (*gss_get_mic)( 92 u32 (*gss_get_mic)(
87 struct gss_ctx *ctx_id, 93 struct gss_ctx *ctx_id,
88 u32 qop,
89 struct xdr_buf *message, 94 struct xdr_buf *message,
90 struct xdr_netobj *mic_token); 95 struct xdr_netobj *mic_token);
91 u32 (*gss_verify_mic)( 96 u32 (*gss_verify_mic)(
92 struct gss_ctx *ctx_id, 97 struct gss_ctx *ctx_id,
93 struct xdr_buf *message, 98 struct xdr_buf *message,
94 struct xdr_netobj *mic_token, 99 struct xdr_netobj *mic_token);
95 u32 *qstate); 100 u32 (*gss_wrap)(
101 struct gss_ctx *ctx_id,
102 int offset,
103 struct xdr_buf *outbuf,
104 struct page **inpages);
105 u32 (*gss_unwrap)(
106 struct gss_ctx *ctx_id,
107 int offset,
108 struct xdr_buf *buf);
96 void (*gss_delete_sec_context)( 109 void (*gss_delete_sec_context)(
97 void *internal_ctx_id); 110 void *internal_ctx_id);
98}; 111};
diff --git a/include/linux/sunrpc/gss_err.h b/include/linux/sunrpc/gss_err.h
index 92608a2e574c..a6807867bd21 100644
--- a/include/linux/sunrpc/gss_err.h
+++ b/include/linux/sunrpc/gss_err.h
@@ -66,16 +66,6 @@ typedef unsigned int OM_uint32;
66 66
67 67
68/* 68/*
69 * Define the default Quality of Protection for per-message services. Note
70 * that an implementation that offers multiple levels of QOP may either reserve
71 * a value (for example zero, as assumed here) to mean "default protection", or
72 * alternatively may simply equate GSS_C_QOP_DEFAULT to a specific explicit
73 * QOP value. However a value of 0 should always be interpreted by a GSSAPI
74 * implementation as a request for the default protection level.
75 */
76#define GSS_C_QOP_DEFAULT 0
77
78/*
79 * Expiration time of 2^32-1 seconds means infinite lifetime for a 69 * Expiration time of 2^32-1 seconds means infinite lifetime for a
80 * credential or security context 70 * credential or security context
81 */ 71 */
diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index ffe31d2eb9ec..2c3601d31045 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -116,18 +116,22 @@ enum seal_alg {
116 116
117s32 117s32
118make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, 118make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body,
119 struct xdr_netobj *cksum); 119 int body_offset, struct xdr_netobj *cksum);
120
121u32 gss_get_mic_kerberos(struct gss_ctx *, struct xdr_buf *,
122 struct xdr_netobj *);
123
124u32 gss_verify_mic_kerberos(struct gss_ctx *, struct xdr_buf *,
125 struct xdr_netobj *);
120 126
121u32 127u32
122krb5_make_token(struct krb5_ctx *context_handle, int qop_req, 128gss_wrap_kerberos(struct gss_ctx *ctx_id, int offset,
123 struct xdr_buf *input_message_buffer, 129 struct xdr_buf *outbuf, struct page **pages);
124 struct xdr_netobj *output_message_buffer, int toktype);
125 130
126u32 131u32
127krb5_read_token(struct krb5_ctx *context_handle, 132gss_unwrap_kerberos(struct gss_ctx *ctx_id, int offset,
128 struct xdr_netobj *input_token_buffer, 133 struct xdr_buf *buf);
129 struct xdr_buf *message_buffer, 134
130 int *qop_state, int toktype);
131 135
132u32 136u32
133krb5_encrypt(struct crypto_tfm * key, 137krb5_encrypt(struct crypto_tfm * key,
@@ -137,6 +141,13 @@ u32
137krb5_decrypt(struct crypto_tfm * key, 141krb5_decrypt(struct crypto_tfm * key,
138 void *iv, void *in, void *out, int length); 142 void *iv, void *in, void *out, int length);
139 143
144int
145gss_encrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *outbuf, int offset,
146 struct page **pages);
147
148int
149gss_decrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *inbuf, int offset);
150
140s32 151s32
141krb5_make_seq_num(struct crypto_tfm * key, 152krb5_make_seq_num(struct crypto_tfm * key,
142 int direction, 153 int direction,
diff --git a/include/linux/sunrpc/gss_spkm3.h b/include/linux/sunrpc/gss_spkm3.h
index b5c9968c3c17..0beb2cf00a84 100644
--- a/include/linux/sunrpc/gss_spkm3.h
+++ b/include/linux/sunrpc/gss_spkm3.h
@@ -41,9 +41,9 @@ struct spkm3_ctx {
41#define SPKM_WRAP_TOK 5 41#define SPKM_WRAP_TOK 5
42#define SPKM_DEL_TOK 6 42#define SPKM_DEL_TOK 6
43 43
44u32 spkm3_make_token(struct spkm3_ctx *ctx, int qop_req, struct xdr_buf * text, struct xdr_netobj * token, int toktype); 44u32 spkm3_make_token(struct spkm3_ctx *ctx, struct xdr_buf * text, struct xdr_netobj * token, int toktype);
45 45
46u32 spkm3_read_token(struct spkm3_ctx *ctx, struct xdr_netobj *read_token, struct xdr_buf *message_buffer, int *qop_state, int toktype); 46u32 spkm3_read_token(struct spkm3_ctx *ctx, struct xdr_netobj *read_token, struct xdr_buf *message_buffer, int toktype);
47 47
48#define CKSUMTYPE_RSA_MD5 0x0007 48#define CKSUMTYPE_RSA_MD5 0x0007
49 49
diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h
index 15f115332389..f43f237360ae 100644
--- a/include/linux/sunrpc/msg_prot.h
+++ b/include/linux/sunrpc/msg_prot.h
@@ -76,5 +76,30 @@ enum rpc_auth_stat {
76 76
77#define RPC_MAXNETNAMELEN 256 77#define RPC_MAXNETNAMELEN 256
78 78
79/*
80 * From RFC 1831:
81 *
82 * "A record is composed of one or more record fragments. A record
83 * fragment is a four-byte header followed by 0 to (2**31) - 1 bytes of
84 * fragment data. The bytes encode an unsigned binary number; as with
85 * XDR integers, the byte order is from highest to lowest. The number
86 * encodes two values -- a boolean which indicates whether the fragment
87 * is the last fragment of the record (bit value 1 implies the fragment
88 * is the last fragment) and a 31-bit unsigned binary value which is the
89 * length in bytes of the fragment's data. The boolean value is the
90 * highest-order bit of the header; the length is the 31 low-order bits.
91 * (Note that this record specification is NOT in XDR standard form!)"
92 *
93 * The Linux RPC client always sends its requests in a single record
94 * fragment, limiting the maximum payload size for stream transports to
95 * 2GB.
96 */
97
98typedef u32 rpc_fraghdr;
99
100#define RPC_LAST_STREAM_FRAGMENT (1U << 31)
101#define RPC_FRAGMENT_SIZE_MASK (~RPC_LAST_STREAM_FRAGMENT)
102#define RPC_MAX_FRAGMENT_SIZE ((1U << 31) - 1)
103
79#endif /* __KERNEL__ */ 104#endif /* __KERNEL__ */
80#endif /* _LINUX_SUNRPC_MSGPROT_H_ */ 105#endif /* _LINUX_SUNRPC_MSGPROT_H_ */
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 23448d0fb5bc..5da968729cf8 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -161,14 +161,10 @@ typedef struct {
161 161
162typedef size_t (*skb_read_actor_t)(skb_reader_t *desc, void *to, size_t len); 162typedef size_t (*skb_read_actor_t)(skb_reader_t *desc, void *to, size_t len);
163 163
164extern int csum_partial_copy_to_xdr(struct xdr_buf *, struct sk_buff *);
164extern ssize_t xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int, 165extern ssize_t xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int,
165 skb_reader_t *, skb_read_actor_t); 166 skb_reader_t *, skb_read_actor_t);
166 167
167struct socket;
168struct sockaddr;
169extern int xdr_sendpages(struct socket *, struct sockaddr *, int,
170 struct xdr_buf *, unsigned int, int);
171
172extern int xdr_encode_word(struct xdr_buf *, int, u32); 168extern int xdr_encode_word(struct xdr_buf *, int, u32);
173extern int xdr_decode_word(struct xdr_buf *, int, u32 *); 169extern int xdr_decode_word(struct xdr_buf *, int, u32 *);
174 170
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index e618c1649814..3b8b6e823c70 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/include/linux/sunrpc/clnt_xprt.h 2 * linux/include/linux/sunrpc/xprt.h
3 * 3 *
4 * Declarations for the RPC transport interface. 4 * Declarations for the RPC transport interface.
5 * 5 *
@@ -15,20 +15,6 @@
15#include <linux/sunrpc/sched.h> 15#include <linux/sunrpc/sched.h>
16#include <linux/sunrpc/xdr.h> 16#include <linux/sunrpc/xdr.h>
17 17
18/*
19 * The transport code maintains an estimate on the maximum number of out-
20 * standing RPC requests, using a smoothed version of the congestion
21 * avoidance implemented in 44BSD. This is basically the Van Jacobson
22 * congestion algorithm: If a retransmit occurs, the congestion window is
23 * halved; otherwise, it is incremented by 1/cwnd when
24 *
25 * - a reply is received and
26 * - a full number of requests are outstanding and
27 * - the congestion window hasn't been updated recently.
28 *
29 * Upper procedures may check whether a request would block waiting for
30 * a free RPC slot by using the RPC_CONGESTED() macro.
31 */
32extern unsigned int xprt_udp_slot_table_entries; 18extern unsigned int xprt_udp_slot_table_entries;
33extern unsigned int xprt_tcp_slot_table_entries; 19extern unsigned int xprt_tcp_slot_table_entries;
34 20
@@ -36,34 +22,23 @@ extern unsigned int xprt_tcp_slot_table_entries;
36#define RPC_DEF_SLOT_TABLE (16U) 22#define RPC_DEF_SLOT_TABLE (16U)
37#define RPC_MAX_SLOT_TABLE (128U) 23#define RPC_MAX_SLOT_TABLE (128U)
38 24
39#define RPC_CWNDSHIFT (8U)
40#define RPC_CWNDSCALE (1U << RPC_CWNDSHIFT)
41#define RPC_INITCWND RPC_CWNDSCALE
42#define RPC_MAXCWND(xprt) ((xprt)->max_reqs << RPC_CWNDSHIFT)
43#define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd)
44
45/* Default timeout values */
46#define RPC_MAX_UDP_TIMEOUT (60*HZ)
47#define RPC_MAX_TCP_TIMEOUT (600*HZ)
48
49/* 25/*
50 * Wait duration for an RPC TCP connection to be established. Solaris 26 * RPC call and reply header size as number of 32bit words (verifier
51 * NFS over TCP uses 60 seconds, for example, which is in line with how 27 * size computed separately)
52 * long a server takes to reboot.
53 */ 28 */
54#define RPC_CONNECT_TIMEOUT (60*HZ) 29#define RPC_CALLHDRSIZE 6
30#define RPC_REPHDRSIZE 4
55 31
56/* 32/*
57 * Delay an arbitrary number of seconds before attempting to reconnect 33 * Parameters for choosing a free port
58 * after an error.
59 */ 34 */
60#define RPC_REESTABLISH_TIMEOUT (15*HZ) 35extern unsigned int xprt_min_resvport;
36extern unsigned int xprt_max_resvport;
61 37
62/* RPC call and reply header size as number of 32bit words (verifier 38#define RPC_MIN_RESVPORT (1U)
63 * size computed separately) 39#define RPC_MAX_RESVPORT (65535U)
64 */ 40#define RPC_DEF_MIN_RESVPORT (650U)
65#define RPC_CALLHDRSIZE 6 41#define RPC_DEF_MAX_RESVPORT (1023U)
66#define RPC_REPHDRSIZE 4
67 42
68/* 43/*
69 * This describes a timeout strategy 44 * This describes a timeout strategy
@@ -76,6 +51,9 @@ struct rpc_timeout {
76 unsigned char to_exponential; 51 unsigned char to_exponential;
77}; 52};
78 53
54struct rpc_task;
55struct rpc_xprt;
56
79/* 57/*
80 * This describes a complete RPC request 58 * This describes a complete RPC request
81 */ 59 */
@@ -95,7 +73,10 @@ struct rpc_rqst {
95 int rq_cong; /* has incremented xprt->cong */ 73 int rq_cong; /* has incremented xprt->cong */
96 int rq_received; /* receive completed */ 74 int rq_received; /* receive completed */
97 u32 rq_seqno; /* gss seq no. used on req. */ 75 u32 rq_seqno; /* gss seq no. used on req. */
98 76 int rq_enc_pages_num;
77 struct page **rq_enc_pages; /* scratch pages for use by
78 gss privacy code */
79 void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */
99 struct list_head rq_list; 80 struct list_head rq_list;
100 81
101 struct xdr_buf rq_private_buf; /* The receive buffer 82 struct xdr_buf rq_private_buf; /* The receive buffer
@@ -121,12 +102,21 @@ struct rpc_rqst {
121#define rq_svec rq_snd_buf.head 102#define rq_svec rq_snd_buf.head
122#define rq_slen rq_snd_buf.len 103#define rq_slen rq_snd_buf.len
123 104
124#define XPRT_LAST_FRAG (1 << 0) 105struct rpc_xprt_ops {
125#define XPRT_COPY_RECM (1 << 1) 106 void (*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize);
126#define XPRT_COPY_XID (1 << 2) 107 int (*reserve_xprt)(struct rpc_task *task);
127#define XPRT_COPY_DATA (1 << 3) 108 void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task);
109 void (*connect)(struct rpc_task *task);
110 int (*send_request)(struct rpc_task *task);
111 void (*set_retrans_timeout)(struct rpc_task *task);
112 void (*timer)(struct rpc_task *task);
113 void (*release_request)(struct rpc_task *task);
114 void (*close)(struct rpc_xprt *xprt);
115 void (*destroy)(struct rpc_xprt *xprt);
116};
128 117
129struct rpc_xprt { 118struct rpc_xprt {
119 struct rpc_xprt_ops * ops; /* transport methods */
130 struct socket * sock; /* BSD socket layer */ 120 struct socket * sock; /* BSD socket layer */
131 struct sock * inet; /* INET layer */ 121 struct sock * inet; /* INET layer */
132 122
@@ -137,11 +127,13 @@ struct rpc_xprt {
137 unsigned long cong; /* current congestion */ 127 unsigned long cong; /* current congestion */
138 unsigned long cwnd; /* congestion window */ 128 unsigned long cwnd; /* congestion window */
139 129
140 unsigned int rcvsize, /* socket receive buffer size */ 130 size_t rcvsize, /* transport rcv buffer size */
141 sndsize; /* socket send buffer size */ 131 sndsize; /* transport send buffer size */
142 132
143 size_t max_payload; /* largest RPC payload size, 133 size_t max_payload; /* largest RPC payload size,
144 in bytes */ 134 in bytes */
135 unsigned int tsh_size; /* size of transport specific
136 header */
145 137
146 struct rpc_wait_queue sending; /* requests waiting to send */ 138 struct rpc_wait_queue sending; /* requests waiting to send */
147 struct rpc_wait_queue resend; /* requests waiting to resend */ 139 struct rpc_wait_queue resend; /* requests waiting to resend */
@@ -150,11 +142,9 @@ struct rpc_xprt {
150 struct list_head free; /* free slots */ 142 struct list_head free; /* free slots */
151 struct rpc_rqst * slot; /* slot table storage */ 143 struct rpc_rqst * slot; /* slot table storage */
152 unsigned int max_reqs; /* total slots */ 144 unsigned int max_reqs; /* total slots */
153 unsigned long sockstate; /* Socket state */ 145 unsigned long state; /* transport state */
154 unsigned char shutdown : 1, /* being shut down */ 146 unsigned char shutdown : 1, /* being shut down */
155 nocong : 1, /* no congestion control */ 147 resvport : 1; /* use a reserved port */
156 resvport : 1, /* use a reserved port */
157 stream : 1; /* TCP */
158 148
159 /* 149 /*
160 * XID 150 * XID
@@ -171,22 +161,27 @@ struct rpc_xprt {
171 unsigned long tcp_copied, /* copied to request */ 161 unsigned long tcp_copied, /* copied to request */
172 tcp_flags; 162 tcp_flags;
173 /* 163 /*
174 * Connection of sockets 164 * Connection of transports
175 */ 165 */
176 struct work_struct sock_connect; 166 unsigned long connect_timeout,
167 bind_timeout,
168 reestablish_timeout;
169 struct work_struct connect_worker;
177 unsigned short port; 170 unsigned short port;
171
178 /* 172 /*
179 * Disconnection of idle sockets 173 * Disconnection of idle transports
180 */ 174 */
181 struct work_struct task_cleanup; 175 struct work_struct task_cleanup;
182 struct timer_list timer; 176 struct timer_list timer;
183 unsigned long last_used; 177 unsigned long last_used,
178 idle_timeout;
184 179
185 /* 180 /*
186 * Send stuff 181 * Send stuff
187 */ 182 */
188 spinlock_t sock_lock; /* lock socket info */ 183 spinlock_t transport_lock; /* lock transport info */
189 spinlock_t xprt_lock; /* lock xprt info */ 184 spinlock_t reserve_lock; /* lock slot table */
190 struct rpc_task * snd_task; /* Task blocked in send */ 185 struct rpc_task * snd_task; /* Task blocked in send */
191 186
192 struct list_head recv; 187 struct list_head recv;
@@ -195,37 +190,111 @@ struct rpc_xprt {
195 void (*old_data_ready)(struct sock *, int); 190 void (*old_data_ready)(struct sock *, int);
196 void (*old_state_change)(struct sock *); 191 void (*old_state_change)(struct sock *);
197 void (*old_write_space)(struct sock *); 192 void (*old_write_space)(struct sock *);
198
199 wait_queue_head_t cong_wait;
200}; 193};
201 194
195#define XPRT_LAST_FRAG (1 << 0)
196#define XPRT_COPY_RECM (1 << 1)
197#define XPRT_COPY_XID (1 << 2)
198#define XPRT_COPY_DATA (1 << 3)
199
202#ifdef __KERNEL__ 200#ifdef __KERNEL__
203 201
204struct rpc_xprt * xprt_create_proto(int proto, struct sockaddr_in *addr, 202/*
205 struct rpc_timeout *toparms); 203 * Transport operations used by ULPs
206int xprt_destroy(struct rpc_xprt *); 204 */
207void xprt_set_timeout(struct rpc_timeout *, unsigned int, 205struct rpc_xprt * xprt_create_proto(int proto, struct sockaddr_in *addr, struct rpc_timeout *to);
208 unsigned long); 206void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr);
209 207
210void xprt_reserve(struct rpc_task *); 208/*
211int xprt_prepare_transmit(struct rpc_task *); 209 * Generic internal transport functions
212void xprt_transmit(struct rpc_task *); 210 */
213void xprt_receive(struct rpc_task *); 211void xprt_connect(struct rpc_task *task);
212void xprt_reserve(struct rpc_task *task);
213int xprt_reserve_xprt(struct rpc_task *task);
214int xprt_reserve_xprt_cong(struct rpc_task *task);
215int xprt_prepare_transmit(struct rpc_task *task);
216void xprt_transmit(struct rpc_task *task);
217void xprt_abort_transmit(struct rpc_task *task);
214int xprt_adjust_timeout(struct rpc_rqst *req); 218int xprt_adjust_timeout(struct rpc_rqst *req);
215void xprt_release(struct rpc_task *); 219void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task);
216void xprt_connect(struct rpc_task *); 220void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
217void xprt_sock_setbufsize(struct rpc_xprt *); 221void xprt_release(struct rpc_task *task);
218 222int xprt_destroy(struct rpc_xprt *xprt);
219#define XPRT_LOCKED 0 223
220#define XPRT_CONNECT 1 224static inline u32 *xprt_skip_transport_header(struct rpc_xprt *xprt, u32 *p)
221#define XPRT_CONNECTING 2 225{
222 226 return p + xprt->tsh_size;
223#define xprt_connected(xp) (test_bit(XPRT_CONNECT, &(xp)->sockstate)) 227}
224#define xprt_set_connected(xp) (set_bit(XPRT_CONNECT, &(xp)->sockstate)) 228
225#define xprt_test_and_set_connected(xp) (test_and_set_bit(XPRT_CONNECT, &(xp)->sockstate)) 229/*
226#define xprt_test_and_clear_connected(xp) \ 230 * Transport switch helper functions
227 (test_and_clear_bit(XPRT_CONNECT, &(xp)->sockstate)) 231 */
228#define xprt_clear_connected(xp) (clear_bit(XPRT_CONNECT, &(xp)->sockstate)) 232void xprt_set_retrans_timeout_def(struct rpc_task *task);
233void xprt_set_retrans_timeout_rtt(struct rpc_task *task);
234void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status);
235void xprt_wait_for_buffer_space(struct rpc_task *task);
236void xprt_write_space(struct rpc_xprt *xprt);
237void xprt_update_rtt(struct rpc_task *task);
238void xprt_adjust_cwnd(struct rpc_task *task, int result);
239struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid);
240void xprt_complete_rqst(struct rpc_task *task, int copied);
241void xprt_release_rqst_cong(struct rpc_task *task);
242void xprt_disconnect(struct rpc_xprt *xprt);
243
244/*
245 * Socket transport setup operations
246 */
247int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to);
248int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to);
249
250/*
251 * Reserved bit positions in xprt->state
252 */
253#define XPRT_LOCKED (0)
254#define XPRT_CONNECTED (1)
255#define XPRT_CONNECTING (2)
256
257static inline void xprt_set_connected(struct rpc_xprt *xprt)
258{
259 set_bit(XPRT_CONNECTED, &xprt->state);
260}
261
262static inline void xprt_clear_connected(struct rpc_xprt *xprt)
263{
264 clear_bit(XPRT_CONNECTED, &xprt->state);
265}
266
267static inline int xprt_connected(struct rpc_xprt *xprt)
268{
269 return test_bit(XPRT_CONNECTED, &xprt->state);
270}
271
272static inline int xprt_test_and_set_connected(struct rpc_xprt *xprt)
273{
274 return test_and_set_bit(XPRT_CONNECTED, &xprt->state);
275}
276
277static inline int xprt_test_and_clear_connected(struct rpc_xprt *xprt)
278{
279 return test_and_clear_bit(XPRT_CONNECTED, &xprt->state);
280}
281
282static inline void xprt_clear_connecting(struct rpc_xprt *xprt)
283{
284 smp_mb__before_clear_bit();
285 clear_bit(XPRT_CONNECTING, &xprt->state);
286 smp_mb__after_clear_bit();
287}
288
289static inline int xprt_connecting(struct rpc_xprt *xprt)
290{
291 return test_bit(XPRT_CONNECTING, &xprt->state);
292}
293
294static inline int xprt_test_and_set_connecting(struct rpc_xprt *xprt)
295{
296 return test_and_set_bit(XPRT_CONNECTING, &xprt->state);
297}
229 298
230#endif /* __KERNEL__*/ 299#endif /* __KERNEL__*/
231 300
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index ad15a54806d8..ba448c760168 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -71,7 +71,7 @@ void restore_processor_state(void);
71struct saved_context; 71struct saved_context;
72void __save_processor_state(struct saved_context *ctxt); 72void __save_processor_state(struct saved_context *ctxt);
73void __restore_processor_state(struct saved_context *ctxt); 73void __restore_processor_state(struct saved_context *ctxt);
74extern unsigned long get_usable_page(unsigned gfp_mask); 74extern unsigned long get_usable_page(gfp_t gfp_mask);
75extern void free_eaten_memory(void); 75extern void free_eaten_memory(void);
76 76
77#endif /* _LINUX_SWSUSP_H */ 77#endif /* _LINUX_SWSUSP_H */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index a7bf1a3b1496..20c975642cab 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -171,8 +171,8 @@ extern int rotate_reclaimable_page(struct page *page);
171extern void swap_setup(void); 171extern void swap_setup(void);
172 172
173/* linux/mm/vmscan.c */ 173/* linux/mm/vmscan.c */
174extern int try_to_free_pages(struct zone **, unsigned int); 174extern int try_to_free_pages(struct zone **, gfp_t);
175extern int zone_reclaim(struct zone *, unsigned int, unsigned int); 175extern int zone_reclaim(struct zone *, gfp_t, unsigned int);
176extern int shrink_all_memory(int); 176extern int shrink_all_memory(int);
177extern int vm_swappiness; 177extern int vm_swappiness;
178 178
diff --git a/include/linux/textsearch.h b/include/linux/textsearch.h
index 515046d1b2f4..fc5bb4e91a58 100644
--- a/include/linux/textsearch.h
+++ b/include/linux/textsearch.h
@@ -40,7 +40,7 @@ struct ts_state
40struct ts_ops 40struct ts_ops
41{ 41{
42 const char *name; 42 const char *name;
43 struct ts_config * (*init)(const void *, unsigned int, int); 43 struct ts_config * (*init)(const void *, unsigned int, gfp_t);
44 unsigned int (*find)(struct ts_config *, 44 unsigned int (*find)(struct ts_config *,
45 struct ts_state *); 45 struct ts_state *);
46 void (*destroy)(struct ts_config *); 46 void (*destroy)(struct ts_config *);
@@ -148,7 +148,7 @@ static inline unsigned int textsearch_get_pattern_len(struct ts_config *conf)
148extern int textsearch_register(struct ts_ops *); 148extern int textsearch_register(struct ts_ops *);
149extern int textsearch_unregister(struct ts_ops *); 149extern int textsearch_unregister(struct ts_ops *);
150extern struct ts_config *textsearch_prepare(const char *, const void *, 150extern struct ts_config *textsearch_prepare(const char *, const void *,
151 unsigned int, int, int); 151 unsigned int, gfp_t, int);
152extern void textsearch_destroy(struct ts_config *conf); 152extern void textsearch_destroy(struct ts_config *conf);
153extern unsigned int textsearch_find_continuous(struct ts_config *, 153extern unsigned int textsearch_find_continuous(struct ts_config *,
154 struct ts_state *, 154 struct ts_state *,
diff --git a/include/linux/types.h b/include/linux/types.h
index 0aee34f9da9f..21b9ce803644 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -151,7 +151,12 @@ typedef unsigned long sector_t;
151 */ 151 */
152 152
153#ifdef __CHECKER__ 153#ifdef __CHECKER__
154#define __bitwise __attribute__((bitwise)) 154#define __bitwise__ __attribute__((bitwise))
155#else
156#define __bitwise__
157#endif
158#ifdef __CHECK_ENDIAN__
159#define __bitwise __bitwise__
155#else 160#else
156#define __bitwise 161#define __bitwise
157#endif 162#endif
@@ -166,7 +171,7 @@ typedef __u64 __bitwise __be64;
166#endif 171#endif
167 172
168#ifdef __KERNEL__ 173#ifdef __KERNEL__
169typedef unsigned __nocast gfp_t; 174typedef unsigned __bitwise__ gfp_t;
170#endif 175#endif
171 176
172struct ustat { 177struct ustat {
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 4dbe580f9335..8f731e8f2821 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -933,17 +933,17 @@ static inline void usb_fill_int_urb (struct urb *urb,
933} 933}
934 934
935extern void usb_init_urb(struct urb *urb); 935extern void usb_init_urb(struct urb *urb);
936extern struct urb *usb_alloc_urb(int iso_packets, unsigned mem_flags); 936extern struct urb *usb_alloc_urb(int iso_packets, gfp_t mem_flags);
937extern void usb_free_urb(struct urb *urb); 937extern void usb_free_urb(struct urb *urb);
938#define usb_put_urb usb_free_urb 938#define usb_put_urb usb_free_urb
939extern struct urb *usb_get_urb(struct urb *urb); 939extern struct urb *usb_get_urb(struct urb *urb);
940extern int usb_submit_urb(struct urb *urb, unsigned mem_flags); 940extern int usb_submit_urb(struct urb *urb, gfp_t mem_flags);
941extern int usb_unlink_urb(struct urb *urb); 941extern int usb_unlink_urb(struct urb *urb);
942extern void usb_kill_urb(struct urb *urb); 942extern void usb_kill_urb(struct urb *urb);
943 943
944#define HAVE_USB_BUFFERS 944#define HAVE_USB_BUFFERS
945void *usb_buffer_alloc (struct usb_device *dev, size_t size, 945void *usb_buffer_alloc (struct usb_device *dev, size_t size,
946 unsigned mem_flags, dma_addr_t *dma); 946 gfp_t mem_flags, dma_addr_t *dma);
947void usb_buffer_free (struct usb_device *dev, size_t size, 947void usb_buffer_free (struct usb_device *dev, size_t size,
948 void *addr, dma_addr_t dma); 948 void *addr, dma_addr_t dma);
949 949
@@ -1050,7 +1050,7 @@ int usb_sg_init (
1050 struct scatterlist *sg, 1050 struct scatterlist *sg,
1051 int nents, 1051 int nents,
1052 size_t length, 1052 size_t length,
1053 unsigned mem_flags 1053 gfp_t mem_flags
1054); 1054);
1055void usb_sg_cancel (struct usb_sg_request *io); 1055void usb_sg_cancel (struct usb_sg_request *io);
1056void usb_sg_wait (struct usb_sg_request *io); 1056void usb_sg_wait (struct usb_sg_request *io);
diff --git a/include/linux/usb_gadget.h b/include/linux/usb_gadget.h
index 71e608607324..ff81117eb733 100644
--- a/include/linux/usb_gadget.h
+++ b/include/linux/usb_gadget.h
@@ -107,18 +107,18 @@ struct usb_ep_ops {
107 int (*disable) (struct usb_ep *ep); 107 int (*disable) (struct usb_ep *ep);
108 108
109 struct usb_request *(*alloc_request) (struct usb_ep *ep, 109 struct usb_request *(*alloc_request) (struct usb_ep *ep,
110 unsigned gfp_flags); 110 gfp_t gfp_flags);
111 void (*free_request) (struct usb_ep *ep, struct usb_request *req); 111 void (*free_request) (struct usb_ep *ep, struct usb_request *req);
112 112
113 void *(*alloc_buffer) (struct usb_ep *ep, unsigned bytes, 113 void *(*alloc_buffer) (struct usb_ep *ep, unsigned bytes,
114 dma_addr_t *dma, unsigned gfp_flags); 114 dma_addr_t *dma, gfp_t gfp_flags);
115 void (*free_buffer) (struct usb_ep *ep, void *buf, dma_addr_t dma, 115 void (*free_buffer) (struct usb_ep *ep, void *buf, dma_addr_t dma,
116 unsigned bytes); 116 unsigned bytes);
117 // NOTE: on 2.6, drivers may also use dma_map() and 117 // NOTE: on 2.6, drivers may also use dma_map() and
118 // dma_sync_single_*() to directly manage dma overhead. 118 // dma_sync_single_*() to directly manage dma overhead.
119 119
120 int (*queue) (struct usb_ep *ep, struct usb_request *req, 120 int (*queue) (struct usb_ep *ep, struct usb_request *req,
121 unsigned gfp_flags); 121 gfp_t gfp_flags);
122 int (*dequeue) (struct usb_ep *ep, struct usb_request *req); 122 int (*dequeue) (struct usb_ep *ep, struct usb_request *req);
123 123
124 int (*set_halt) (struct usb_ep *ep, int value); 124 int (*set_halt) (struct usb_ep *ep, int value);
@@ -214,7 +214,7 @@ usb_ep_disable (struct usb_ep *ep)
214 * Returns the request, or null if one could not be allocated. 214 * Returns the request, or null if one could not be allocated.
215 */ 215 */
216static inline struct usb_request * 216static inline struct usb_request *
217usb_ep_alloc_request (struct usb_ep *ep, unsigned gfp_flags) 217usb_ep_alloc_request (struct usb_ep *ep, gfp_t gfp_flags)
218{ 218{
219 return ep->ops->alloc_request (ep, gfp_flags); 219 return ep->ops->alloc_request (ep, gfp_flags);
220} 220}
@@ -254,7 +254,7 @@ usb_ep_free_request (struct usb_ep *ep, struct usb_request *req)
254 */ 254 */
255static inline void * 255static inline void *
256usb_ep_alloc_buffer (struct usb_ep *ep, unsigned len, dma_addr_t *dma, 256usb_ep_alloc_buffer (struct usb_ep *ep, unsigned len, dma_addr_t *dma,
257 unsigned gfp_flags) 257 gfp_t gfp_flags)
258{ 258{
259 return ep->ops->alloc_buffer (ep, len, dma, gfp_flags); 259 return ep->ops->alloc_buffer (ep, len, dma, gfp_flags);
260} 260}
@@ -330,7 +330,7 @@ usb_ep_free_buffer (struct usb_ep *ep, void *buf, dma_addr_t dma, unsigned len)
330 * reported when the usb peripheral is disconnected. 330 * reported when the usb peripheral is disconnected.
331 */ 331 */
332static inline int 332static inline int
333usb_ep_queue (struct usb_ep *ep, struct usb_request *req, unsigned gfp_flags) 333usb_ep_queue (struct usb_ep *ep, struct usb_request *req, gfp_t gfp_flags)
334{ 334{
335 return ep->ops->queue (ep, req, gfp_flags); 335 return ep->ops->queue (ep, req, gfp_flags);
336} 336}
diff --git a/include/net/dst.h b/include/net/dst.h
index 4a056a682435..6c196a5baf24 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -94,7 +94,6 @@ struct dst_ops
94 struct dst_entry * (*negative_advice)(struct dst_entry *); 94 struct dst_entry * (*negative_advice)(struct dst_entry *);
95 void (*link_failure)(struct sk_buff *); 95 void (*link_failure)(struct sk_buff *);
96 void (*update_pmtu)(struct dst_entry *dst, u32 mtu); 96 void (*update_pmtu)(struct dst_entry *dst, u32 mtu);
97 int (*get_mss)(struct dst_entry *dst, u32 mtu);
98 int entry_size; 97 int entry_size;
99 98
100 atomic_t entries; 99 atomic_t entries;
diff --git a/include/net/sock.h b/include/net/sock.h
index ecb75526cba0..e0498bd36004 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -207,7 +207,7 @@ struct sock {
207 struct sk_buff_head sk_write_queue; 207 struct sk_buff_head sk_write_queue;
208 int sk_wmem_queued; 208 int sk_wmem_queued;
209 int sk_forward_alloc; 209 int sk_forward_alloc;
210 unsigned int sk_allocation; 210 gfp_t sk_allocation;
211 int sk_sndbuf; 211 int sk_sndbuf;
212 int sk_route_caps; 212 int sk_route_caps;
213 unsigned long sk_flags; 213 unsigned long sk_flags;
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index bed4b7c9be99..e6b61fab66dd 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -146,7 +146,7 @@ struct scsi_cmnd {
146#define SCSI_STATE_MLQUEUE 0x100b 146#define SCSI_STATE_MLQUEUE 0x100b
147 147
148 148
149extern struct scsi_cmnd *scsi_get_command(struct scsi_device *, int); 149extern struct scsi_cmnd *scsi_get_command(struct scsi_device *, gfp_t);
150extern void scsi_put_command(struct scsi_cmnd *); 150extern void scsi_put_command(struct scsi_cmnd *);
151extern void scsi_io_completion(struct scsi_cmnd *, unsigned int, unsigned int); 151extern void scsi_io_completion(struct scsi_cmnd *, unsigned int, unsigned int);
152extern void scsi_finish_command(struct scsi_cmnd *cmd); 152extern void scsi_finish_command(struct scsi_cmnd *cmd);
diff --git a/include/scsi/scsi_request.h b/include/scsi/scsi_request.h
index 6a140020d7cb..2539debb7993 100644
--- a/include/scsi/scsi_request.h
+++ b/include/scsi/scsi_request.h
@@ -45,7 +45,7 @@ struct scsi_request {
45 level driver) of this request */ 45 level driver) of this request */
46}; 46};
47 47
48extern struct scsi_request *scsi_allocate_request(struct scsi_device *, int); 48extern struct scsi_request *scsi_allocate_request(struct scsi_device *, gfp_t);
49extern void scsi_release_request(struct scsi_request *); 49extern void scsi_release_request(struct scsi_request *);
50extern void scsi_wait_req(struct scsi_request *, const void *cmnd, 50extern void scsi_wait_req(struct scsi_request *, const void *cmnd,
51 void *buffer, unsigned bufflen, 51 void *buffer, unsigned bufflen,
diff --git a/include/sound/memalloc.h b/include/sound/memalloc.h
index 3a2fd2cc9f19..83489c3abbaf 100644
--- a/include/sound/memalloc.h
+++ b/include/sound/memalloc.h
@@ -111,7 +111,7 @@ size_t snd_dma_get_reserved_buf(struct snd_dma_buffer *dmab, unsigned int id);
111int snd_dma_reserve_buf(struct snd_dma_buffer *dmab, unsigned int id); 111int snd_dma_reserve_buf(struct snd_dma_buffer *dmab, unsigned int id);
112 112
113/* basic memory allocation functions */ 113/* basic memory allocation functions */
114void *snd_malloc_pages(size_t size, unsigned int gfp_flags); 114void *snd_malloc_pages(size_t size, gfp_t gfp_flags);
115void snd_free_pages(void *ptr, size_t size); 115void snd_free_pages(void *ptr, size_t size);
116 116
117#endif /* __SOUND_MEMALLOC_H */ 117#endif /* __SOUND_MEMALLOC_H */
diff --git a/kernel/audit.c b/kernel/audit.c
index aefa73a8a586..0c56320d38dc 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -133,7 +133,7 @@ struct audit_buffer {
133 struct list_head list; 133 struct list_head list;
134 struct sk_buff *skb; /* formatted skb ready to send */ 134 struct sk_buff *skb; /* formatted skb ready to send */
135 struct audit_context *ctx; /* NULL or associated context */ 135 struct audit_context *ctx; /* NULL or associated context */
136 int gfp_mask; 136 gfp_t gfp_mask;
137}; 137};
138 138
139static void audit_set_pid(struct audit_buffer *ab, pid_t pid) 139static void audit_set_pid(struct audit_buffer *ab, pid_t pid)
@@ -647,7 +647,7 @@ static inline void audit_get_stamp(struct audit_context *ctx,
647 * will be written at syscall exit. If there is no associated task, tsk 647 * will be written at syscall exit. If there is no associated task, tsk
648 * should be NULL. */ 648 * should be NULL. */
649 649
650struct audit_buffer *audit_log_start(struct audit_context *ctx, int gfp_mask, 650struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
651 int type) 651 int type)
652{ 652{
653 struct audit_buffer *ab = NULL; 653 struct audit_buffer *ab = NULL;
@@ -879,7 +879,7 @@ void audit_log_end(struct audit_buffer *ab)
879/* Log an audit record. This is a convenience function that calls 879/* Log an audit record. This is a convenience function that calls
880 * audit_log_start, audit_log_vformat, and audit_log_end. It may be 880 * audit_log_start, audit_log_vformat, and audit_log_end. It may be
881 * called in any context. */ 881 * called in any context. */
882void audit_log(struct audit_context *ctx, int gfp_mask, int type, 882void audit_log(struct audit_context *ctx, gfp_t gfp_mask, int type,
883 const char *fmt, ...) 883 const char *fmt, ...)
884{ 884{
885 struct audit_buffer *ab; 885 struct audit_buffer *ab;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 88696f639aab..d8a68509e729 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -803,7 +803,7 @@ static void audit_log_task_info(struct audit_buffer *ab)
803 up_read(&mm->mmap_sem); 803 up_read(&mm->mmap_sem);
804} 804}
805 805
806static void audit_log_exit(struct audit_context *context, unsigned int gfp_mask) 806static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask)
807{ 807{
808 int i; 808 int i;
809 struct audit_buffer *ab; 809 struct audit_buffer *ab;
diff --git a/kernel/kexec.c b/kernel/kexec.c
index cdd4dcd8fb63..36c5d9cd4cc1 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -90,7 +90,7 @@ int kexec_should_crash(struct task_struct *p)
90static int kimage_is_destination_range(struct kimage *image, 90static int kimage_is_destination_range(struct kimage *image,
91 unsigned long start, unsigned long end); 91 unsigned long start, unsigned long end);
92static struct page *kimage_alloc_page(struct kimage *image, 92static struct page *kimage_alloc_page(struct kimage *image,
93 unsigned int gfp_mask, 93 gfp_t gfp_mask,
94 unsigned long dest); 94 unsigned long dest);
95 95
96static int do_kimage_alloc(struct kimage **rimage, unsigned long entry, 96static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
@@ -326,8 +326,7 @@ static int kimage_is_destination_range(struct kimage *image,
326 return 0; 326 return 0;
327} 327}
328 328
329static struct page *kimage_alloc_pages(unsigned int gfp_mask, 329static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
330 unsigned int order)
331{ 330{
332 struct page *pages; 331 struct page *pages;
333 332
@@ -654,7 +653,7 @@ static kimage_entry_t *kimage_dst_used(struct kimage *image,
654} 653}
655 654
656static struct page *kimage_alloc_page(struct kimage *image, 655static struct page *kimage_alloc_page(struct kimage *image,
657 unsigned int gfp_mask, 656 gfp_t gfp_mask,
658 unsigned long destination) 657 unsigned long destination)
659{ 658{
660 /* 659 /*
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 2d5c45676442..10bc5ec496d7 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -1095,7 +1095,7 @@ static inline void eat_page(void *page)
1095 *eaten_memory = c; 1095 *eaten_memory = c;
1096} 1096}
1097 1097
1098unsigned long get_usable_page(unsigned gfp_mask) 1098unsigned long get_usable_page(gfp_t gfp_mask)
1099{ 1099{
1100 unsigned long m; 1100 unsigned long m;
1101 1101
diff --git a/lib/idr.c b/lib/idr.c
index d4df21debc4d..6414b2fb482d 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -72,7 +72,7 @@ static void free_layer(struct idr *idp, struct idr_layer *p)
72 * If the system is REALLY out of memory this function returns 0, 72 * If the system is REALLY out of memory this function returns 0,
73 * otherwise 1. 73 * otherwise 1.
74 */ 74 */
75int idr_pre_get(struct idr *idp, unsigned gfp_mask) 75int idr_pre_get(struct idr *idp, gfp_t gfp_mask)
76{ 76{
77 while (idp->id_free_cnt < IDR_FREE_MAX) { 77 while (idp->id_free_cnt < IDR_FREE_MAX) {
78 struct idr_layer *new; 78 struct idr_layer *new;
diff --git a/lib/kobject.c b/lib/kobject.c
index dd0917dd9fa9..253d3004ace9 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -100,7 +100,7 @@ static void fill_kobj_path(struct kobject *kobj, char *path, int length)
100 * @kobj: kobject in question, with which to build the path 100 * @kobj: kobject in question, with which to build the path
101 * @gfp_mask: the allocation type used to allocate the path 101 * @gfp_mask: the allocation type used to allocate the path
102 */ 102 */
103char *kobject_get_path(struct kobject *kobj, int gfp_mask) 103char *kobject_get_path(struct kobject *kobj, gfp_t gfp_mask)
104{ 104{
105 char *path; 105 char *path;
106 int len; 106 int len;
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 04ca4429ddfa..7ef6f6a17aa6 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -62,7 +62,7 @@ static struct sock *uevent_sock;
62 * @gfp_mask: 62 * @gfp_mask:
63 */ 63 */
64static int send_uevent(const char *signal, const char *obj, 64static int send_uevent(const char *signal, const char *obj,
65 char **envp, int gfp_mask) 65 char **envp, gfp_t gfp_mask)
66{ 66{
67 struct sk_buff *skb; 67 struct sk_buff *skb;
68 char *pos; 68 char *pos;
@@ -98,7 +98,7 @@ static int send_uevent(const char *signal, const char *obj,
98} 98}
99 99
100static int do_kobject_uevent(struct kobject *kobj, enum kobject_action action, 100static int do_kobject_uevent(struct kobject *kobj, enum kobject_action action,
101 struct attribute *attr, int gfp_mask) 101 struct attribute *attr, gfp_t gfp_mask)
102{ 102{
103 char *path; 103 char *path;
104 char *attrpath; 104 char *attrpath;
diff --git a/lib/textsearch.c b/lib/textsearch.c
index 1e934c196f0f..6f3093efbd7b 100644
--- a/lib/textsearch.c
+++ b/lib/textsearch.c
@@ -254,7 +254,7 @@ unsigned int textsearch_find_continuous(struct ts_config *conf,
254 * parameters or a ERR_PTR(). 254 * parameters or a ERR_PTR().
255 */ 255 */
256struct ts_config *textsearch_prepare(const char *algo, const void *pattern, 256struct ts_config *textsearch_prepare(const char *algo, const void *pattern,
257 unsigned int len, int gfp_mask, int flags) 257 unsigned int len, gfp_t gfp_mask, int flags)
258{ 258{
259 int err = -ENOENT; 259 int err = -ENOENT;
260 struct ts_config *conf; 260 struct ts_config *conf;
diff --git a/mm/filemap.c b/mm/filemap.c
index b5346576e58d..1c31b2fd2ca5 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -377,7 +377,7 @@ int filemap_write_and_wait_range(struct address_space *mapping,
377 * This function does not add the page to the LRU. The caller must do that. 377 * This function does not add the page to the LRU. The caller must do that.
378 */ 378 */
379int add_to_page_cache(struct page *page, struct address_space *mapping, 379int add_to_page_cache(struct page *page, struct address_space *mapping,
380 pgoff_t offset, int gfp_mask) 380 pgoff_t offset, gfp_t gfp_mask)
381{ 381{
382 int error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); 382 int error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
383 383
@@ -401,7 +401,7 @@ int add_to_page_cache(struct page *page, struct address_space *mapping,
401EXPORT_SYMBOL(add_to_page_cache); 401EXPORT_SYMBOL(add_to_page_cache);
402 402
403int add_to_page_cache_lru(struct page *page, struct address_space *mapping, 403int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
404 pgoff_t offset, int gfp_mask) 404 pgoff_t offset, gfp_t gfp_mask)
405{ 405{
406 int ret = add_to_page_cache(page, mapping, offset, gfp_mask); 406 int ret = add_to_page_cache(page, mapping, offset, gfp_mask);
407 if (ret == 0) 407 if (ret == 0)
@@ -591,7 +591,7 @@ EXPORT_SYMBOL(find_lock_page);
591 * memory exhaustion. 591 * memory exhaustion.
592 */ 592 */
593struct page *find_or_create_page(struct address_space *mapping, 593struct page *find_or_create_page(struct address_space *mapping,
594 unsigned long index, unsigned int gfp_mask) 594 unsigned long index, gfp_t gfp_mask)
595{ 595{
596 struct page *page, *cached_page = NULL; 596 struct page *page, *cached_page = NULL;
597 int err; 597 int err;
@@ -683,7 +683,7 @@ struct page *
683grab_cache_page_nowait(struct address_space *mapping, unsigned long index) 683grab_cache_page_nowait(struct address_space *mapping, unsigned long index)
684{ 684{
685 struct page *page = find_get_page(mapping, index); 685 struct page *page = find_get_page(mapping, index);
686 unsigned int gfp_mask; 686 gfp_t gfp_mask;
687 687
688 if (page) { 688 if (page) {
689 if (!TestSetPageLocked(page)) 689 if (!TestSetPageLocked(page))
diff --git a/mm/highmem.c b/mm/highmem.c
index 90e1861e2da0..ce2e7e8bbfa7 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -30,11 +30,9 @@
30 30
31static mempool_t *page_pool, *isa_page_pool; 31static mempool_t *page_pool, *isa_page_pool;
32 32
33static void *page_pool_alloc(gfp_t gfp_mask, void *data) 33static void *page_pool_alloc_isa(gfp_t gfp_mask, void *data)
34{ 34{
35 unsigned int gfp = gfp_mask | (unsigned int) (long) data; 35 return alloc_page(gfp_mask | GFP_DMA);
36
37 return alloc_page(gfp);
38} 36}
39 37
40static void page_pool_free(void *page, void *data) 38static void page_pool_free(void *page, void *data)
@@ -51,6 +49,12 @@ static void page_pool_free(void *page, void *data)
51 * n means that there are (n-1) current users of it. 49 * n means that there are (n-1) current users of it.
52 */ 50 */
53#ifdef CONFIG_HIGHMEM 51#ifdef CONFIG_HIGHMEM
52
53static void *page_pool_alloc(gfp_t gfp_mask, void *data)
54{
55 return alloc_page(gfp_mask);
56}
57
54static int pkmap_count[LAST_PKMAP]; 58static int pkmap_count[LAST_PKMAP];
55static unsigned int last_pkmap_nr; 59static unsigned int last_pkmap_nr;
56static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock); 60static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock);
@@ -267,7 +271,7 @@ int init_emergency_isa_pool(void)
267 if (isa_page_pool) 271 if (isa_page_pool)
268 return 0; 272 return 0;
269 273
270 isa_page_pool = mempool_create(ISA_POOL_SIZE, page_pool_alloc, page_pool_free, (void *) __GFP_DMA); 274 isa_page_pool = mempool_create(ISA_POOL_SIZE, page_pool_alloc_isa, page_pool_free, NULL);
271 if (!isa_page_pool) 275 if (!isa_page_pool)
272 BUG(); 276 BUG();
273 277
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 37af443eb094..1d5c64df1653 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -700,7 +700,7 @@ static struct zonelist *zonelist_policy(gfp_t gfp, struct mempolicy *policy)
700 case MPOL_BIND: 700 case MPOL_BIND:
701 /* Lower zones don't get a policy applied */ 701 /* Lower zones don't get a policy applied */
702 /* Careful: current->mems_allowed might have moved */ 702 /* Careful: current->mems_allowed might have moved */
703 if ((gfp & GFP_ZONEMASK) >= policy_zone) 703 if (gfp_zone(gfp) >= policy_zone)
704 if (cpuset_zonelist_valid_mems_allowed(policy->v.zonelist)) 704 if (cpuset_zonelist_valid_mems_allowed(policy->v.zonelist))
705 return policy->v.zonelist; 705 return policy->v.zonelist;
706 /*FALL THROUGH*/ 706 /*FALL THROUGH*/
@@ -712,7 +712,7 @@ static struct zonelist *zonelist_policy(gfp_t gfp, struct mempolicy *policy)
712 nd = 0; 712 nd = 0;
713 BUG(); 713 BUG();
714 } 714 }
715 return NODE_DATA(nd)->node_zonelists + (gfp & GFP_ZONEMASK); 715 return NODE_DATA(nd)->node_zonelists + gfp_zone(gfp);
716} 716}
717 717
718/* Do dynamic interleaving for a process */ 718/* Do dynamic interleaving for a process */
@@ -757,7 +757,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, unsigned ni
757 struct page *page; 757 struct page *page;
758 758
759 BUG_ON(!node_online(nid)); 759 BUG_ON(!node_online(nid));
760 zl = NODE_DATA(nid)->node_zonelists + (gfp & GFP_ZONEMASK); 760 zl = NODE_DATA(nid)->node_zonelists + gfp_zone(gfp);
761 page = __alloc_pages(gfp, order, zl); 761 page = __alloc_pages(gfp, order, zl);
762 if (page && page_zone(page) == zl->zones[0]) { 762 if (page && page_zone(page) == zl->zones[0]) {
763 zone_pcp(zl->zones[0],get_cpu())->interleave_hit++; 763 zone_pcp(zl->zones[0],get_cpu())->interleave_hit++;
diff --git a/mm/mempool.c b/mm/mempool.c
index 9e377ea700b2..1a99b80480d3 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -205,7 +205,7 @@ void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
205 void *element; 205 void *element;
206 unsigned long flags; 206 unsigned long flags;
207 wait_queue_t wait; 207 wait_queue_t wait;
208 unsigned int gfp_temp; 208 gfp_t gfp_temp;
209 209
210 might_sleep_if(gfp_mask & __GFP_WAIT); 210 might_sleep_if(gfp_mask & __GFP_WAIT);
211 211
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e1d3d77f4aee..94c864eac9c4 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -734,7 +734,7 @@ buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
734 * of the allocation. 734 * of the allocation.
735 */ 735 */
736int zone_watermark_ok(struct zone *z, int order, unsigned long mark, 736int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
737 int classzone_idx, int can_try_harder, int gfp_high) 737 int classzone_idx, int can_try_harder, gfp_t gfp_high)
738{ 738{
739 /* free_pages my go negative - that's OK */ 739 /* free_pages my go negative - that's OK */
740 long min = mark, free_pages = z->free_pages - (1 << order) + 1; 740 long min = mark, free_pages = z->free_pages - (1 << order) + 1;
@@ -777,7 +777,7 @@ struct page * fastcall
777__alloc_pages(gfp_t gfp_mask, unsigned int order, 777__alloc_pages(gfp_t gfp_mask, unsigned int order,
778 struct zonelist *zonelist) 778 struct zonelist *zonelist)
779{ 779{
780 const int wait = gfp_mask & __GFP_WAIT; 780 const gfp_t wait = gfp_mask & __GFP_WAIT;
781 struct zone **zones, *z; 781 struct zone **zones, *z;
782 struct page *page; 782 struct page *page;
783 struct reclaim_state reclaim_state; 783 struct reclaim_state reclaim_state;
@@ -996,7 +996,7 @@ fastcall unsigned long get_zeroed_page(gfp_t gfp_mask)
996 * get_zeroed_page() returns a 32-bit address, which cannot represent 996 * get_zeroed_page() returns a 32-bit address, which cannot represent
997 * a highmem page 997 * a highmem page
998 */ 998 */
999 BUG_ON(gfp_mask & __GFP_HIGHMEM); 999 BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0);
1000 1000
1001 page = alloc_pages(gfp_mask | __GFP_ZERO, 0); 1001 page = alloc_pages(gfp_mask | __GFP_ZERO, 0);
1002 if (page) 1002 if (page)
@@ -1089,7 +1089,7 @@ static unsigned int nr_free_zone_pages(int offset)
1089 */ 1089 */
1090unsigned int nr_free_buffer_pages(void) 1090unsigned int nr_free_buffer_pages(void)
1091{ 1091{
1092 return nr_free_zone_pages(GFP_USER & GFP_ZONEMASK); 1092 return nr_free_zone_pages(gfp_zone(GFP_USER));
1093} 1093}
1094 1094
1095/* 1095/*
@@ -1097,7 +1097,7 @@ unsigned int nr_free_buffer_pages(void)
1097 */ 1097 */
1098unsigned int nr_free_pagecache_pages(void) 1098unsigned int nr_free_pagecache_pages(void)
1099{ 1099{
1100 return nr_free_zone_pages(GFP_HIGHUSER & GFP_ZONEMASK); 1100 return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER));
1101} 1101}
1102 1102
1103#ifdef CONFIG_HIGHMEM 1103#ifdef CONFIG_HIGHMEM
@@ -1428,6 +1428,16 @@ static int __init build_zonelists_node(pg_data_t *pgdat, struct zonelist *zoneli
1428 return j; 1428 return j;
1429} 1429}
1430 1430
1431static inline int highest_zone(int zone_bits)
1432{
1433 int res = ZONE_NORMAL;
1434 if (zone_bits & (__force int)__GFP_HIGHMEM)
1435 res = ZONE_HIGHMEM;
1436 if (zone_bits & (__force int)__GFP_DMA)
1437 res = ZONE_DMA;
1438 return res;
1439}
1440
1431#ifdef CONFIG_NUMA 1441#ifdef CONFIG_NUMA
1432#define MAX_NODE_LOAD (num_online_nodes()) 1442#define MAX_NODE_LOAD (num_online_nodes())
1433static int __initdata node_load[MAX_NUMNODES]; 1443static int __initdata node_load[MAX_NUMNODES];
@@ -1524,11 +1534,7 @@ static void __init build_zonelists(pg_data_t *pgdat)
1524 zonelist = pgdat->node_zonelists + i; 1534 zonelist = pgdat->node_zonelists + i;
1525 for (j = 0; zonelist->zones[j] != NULL; j++); 1535 for (j = 0; zonelist->zones[j] != NULL; j++);
1526 1536
1527 k = ZONE_NORMAL; 1537 k = highest_zone(i);
1528 if (i & __GFP_HIGHMEM)
1529 k = ZONE_HIGHMEM;
1530 if (i & __GFP_DMA)
1531 k = ZONE_DMA;
1532 1538
1533 j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); 1539 j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
1534 zonelist->zones[j] = NULL; 1540 zonelist->zones[j] = NULL;
@@ -1549,12 +1555,7 @@ static void __init build_zonelists(pg_data_t *pgdat)
1549 zonelist = pgdat->node_zonelists + i; 1555 zonelist = pgdat->node_zonelists + i;
1550 1556
1551 j = 0; 1557 j = 0;
1552 k = ZONE_NORMAL; 1558 k = highest_zone(i);
1553 if (i & __GFP_HIGHMEM)
1554 k = ZONE_HIGHMEM;
1555 if (i & __GFP_DMA)
1556 k = ZONE_DMA;
1557
1558 j = build_zonelists_node(pgdat, zonelist, j, k); 1559 j = build_zonelists_node(pgdat, zonelist, j, k);
1559 /* 1560 /*
1560 * Now we build the zonelist so that it contains the zones 1561 * Now we build the zonelist so that it contains the zones
diff --git a/mm/shmem.c b/mm/shmem.c
index ea064d89cda9..55e04a0734c1 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -85,7 +85,7 @@ enum sgp_type {
85static int shmem_getpage(struct inode *inode, unsigned long idx, 85static int shmem_getpage(struct inode *inode, unsigned long idx,
86 struct page **pagep, enum sgp_type sgp, int *type); 86 struct page **pagep, enum sgp_type sgp, int *type);
87 87
88static inline struct page *shmem_dir_alloc(unsigned int gfp_mask) 88static inline struct page *shmem_dir_alloc(gfp_t gfp_mask)
89{ 89{
90 /* 90 /*
91 * The above definition of ENTRIES_PER_PAGE, and the use of 91 * The above definition of ENTRIES_PER_PAGE, and the use of
@@ -898,7 +898,7 @@ struct page *shmem_swapin(struct shmem_inode_info *info, swp_entry_t entry,
898} 898}
899 899
900static struct page * 900static struct page *
901shmem_alloc_page(unsigned long gfp, struct shmem_inode_info *info, 901shmem_alloc_page(gfp_t gfp, struct shmem_inode_info *info,
902 unsigned long idx) 902 unsigned long idx)
903{ 903{
904 struct vm_area_struct pvma; 904 struct vm_area_struct pvma;
diff --git a/mm/slab.c b/mm/slab.c
index d05c678bceb3..d30423f167a2 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -386,7 +386,7 @@ struct kmem_cache_s {
386 unsigned int gfporder; 386 unsigned int gfporder;
387 387
388 /* force GFP flags, e.g. GFP_DMA */ 388 /* force GFP flags, e.g. GFP_DMA */
389 unsigned int gfpflags; 389 gfp_t gfpflags;
390 390
391 size_t colour; /* cache colouring range */ 391 size_t colour; /* cache colouring range */
392 unsigned int colour_off; /* colour offset */ 392 unsigned int colour_off; /* colour offset */
@@ -2117,7 +2117,7 @@ static void cache_init_objs(kmem_cache_t *cachep,
2117 slabp->free = 0; 2117 slabp->free = 0;
2118} 2118}
2119 2119
2120static void kmem_flagcheck(kmem_cache_t *cachep, unsigned int flags) 2120static void kmem_flagcheck(kmem_cache_t *cachep, gfp_t flags)
2121{ 2121{
2122 if (flags & SLAB_DMA) { 2122 if (flags & SLAB_DMA) {
2123 if (!(cachep->gfpflags & GFP_DMA)) 2123 if (!(cachep->gfpflags & GFP_DMA))
@@ -2152,7 +2152,7 @@ static int cache_grow(kmem_cache_t *cachep, gfp_t flags, int nodeid)
2152 struct slab *slabp; 2152 struct slab *slabp;
2153 void *objp; 2153 void *objp;
2154 size_t offset; 2154 size_t offset;
2155 unsigned int local_flags; 2155 gfp_t local_flags;
2156 unsigned long ctor_flags; 2156 unsigned long ctor_flags;
2157 struct kmem_list3 *l3; 2157 struct kmem_list3 *l3;
2158 2158
@@ -2546,7 +2546,7 @@ static inline void *__cache_alloc(kmem_cache_t *cachep, gfp_t flags)
2546/* 2546/*
2547 * A interface to enable slab creation on nodeid 2547 * A interface to enable slab creation on nodeid
2548 */ 2548 */
2549static void *__cache_alloc_node(kmem_cache_t *cachep, int flags, int nodeid) 2549static void *__cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int nodeid)
2550{ 2550{
2551 struct list_head *entry; 2551 struct list_head *entry;
2552 struct slab *slabp; 2552 struct slab *slabp;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 64f9570cff56..843c87d1e61f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -70,7 +70,7 @@ struct scan_control {
70 unsigned int priority; 70 unsigned int priority;
71 71
72 /* This context's GFP mask */ 72 /* This context's GFP mask */
73 unsigned int gfp_mask; 73 gfp_t gfp_mask;
74 74
75 int may_writepage; 75 int may_writepage;
76 76
@@ -186,7 +186,7 @@ EXPORT_SYMBOL(remove_shrinker);
186 * 186 *
187 * Returns the number of slab objects which we shrunk. 187 * Returns the number of slab objects which we shrunk.
188 */ 188 */
189static int shrink_slab(unsigned long scanned, unsigned int gfp_mask, 189static int shrink_slab(unsigned long scanned, gfp_t gfp_mask,
190 unsigned long lru_pages) 190 unsigned long lru_pages)
191{ 191{
192 struct shrinker *shrinker; 192 struct shrinker *shrinker;
@@ -926,7 +926,7 @@ shrink_caches(struct zone **zones, struct scan_control *sc)
926 * holds filesystem locks which prevent writeout this might not work, and the 926 * holds filesystem locks which prevent writeout this might not work, and the
927 * allocation attempt will fail. 927 * allocation attempt will fail.
928 */ 928 */
929int try_to_free_pages(struct zone **zones, unsigned int gfp_mask) 929int try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
930{ 930{
931 int priority; 931 int priority;
932 int ret = 0; 932 int ret = 0;
@@ -1338,7 +1338,7 @@ module_init(kswapd_init)
1338/* 1338/*
1339 * Try to free up some pages from this zone through reclaim. 1339 * Try to free up some pages from this zone through reclaim.
1340 */ 1340 */
1341int zone_reclaim(struct zone *zone, unsigned int gfp_mask, unsigned int order) 1341int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
1342{ 1342{
1343 struct scan_control sc; 1343 struct scan_control sc;
1344 int nr_pages = 1 << order; 1344 int nr_pages = 1 << order;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 1dcf7fa1f0fe..e68700f950a5 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1625,12 +1625,9 @@ static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb,
1625 1625
1626 memset(&ndst, 0, sizeof(ndst)); 1626 memset(&ndst, 0, sizeof(ndst));
1627 1627
1628 for (cpu = 0; cpu < NR_CPUS; cpu++) { 1628 for_each_cpu(cpu) {
1629 struct neigh_statistics *st; 1629 struct neigh_statistics *st;
1630 1630
1631 if (!cpu_possible(cpu))
1632 continue;
1633
1634 st = per_cpu_ptr(tbl->stats, cpu); 1631 st = per_cpu_ptr(tbl->stats, cpu);
1635 ndst.ndts_allocs += st->allocs; 1632 ndst.ndts_allocs += st->allocs;
1636 ndst.ndts_destroys += st->destroys; 1633 ndst.ndts_destroys += st->destroys;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 5f043d346694..7fc3e9e28c34 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -75,7 +75,7 @@
75 * By design there should only be *one* "controlling" process. In practice 75 * By design there should only be *one* "controlling" process. In practice
76 * multiple write accesses gives unpredictable result. Understood by "write" 76 * multiple write accesses gives unpredictable result. Understood by "write"
77 * to /proc gives result code thats should be read be the "writer". 77 * to /proc gives result code thats should be read be the "writer".
78 * For pratical use this should be no problem. 78 * For practical use this should be no problem.
79 * 79 *
80 * Note when adding devices to a specific CPU there good idea to also assign 80 * Note when adding devices to a specific CPU there good idea to also assign
81 * /proc/irq/XX/smp_affinity so TX-interrupts gets bound to the same CPU. 81 * /proc/irq/XX/smp_affinity so TX-interrupts gets bound to the same CPU.
@@ -96,7 +96,7 @@
96 * New xmit() return, do_div and misc clean up by Stephen Hemminger 96 * New xmit() return, do_div and misc clean up by Stephen Hemminger
97 * <shemminger@osdl.org> 040923 97 * <shemminger@osdl.org> 040923
98 * 98 *
99 * Rany Dunlap fixed u64 printk compiler waring 99 * Randy Dunlap fixed u64 printk compiler waring
100 * 100 *
101 * Remove FCS from BW calculation. Lennert Buytenhek <buytenh@wantstofly.org> 101 * Remove FCS from BW calculation. Lennert Buytenhek <buytenh@wantstofly.org>
102 * New time handling. Lennert Buytenhek <buytenh@wantstofly.org> 041213 102 * New time handling. Lennert Buytenhek <buytenh@wantstofly.org> 041213
@@ -137,6 +137,7 @@
137#include <linux/ipv6.h> 137#include <linux/ipv6.h>
138#include <linux/udp.h> 138#include <linux/udp.h>
139#include <linux/proc_fs.h> 139#include <linux/proc_fs.h>
140#include <linux/seq_file.h>
140#include <linux/wait.h> 141#include <linux/wait.h>
141#include <net/checksum.h> 142#include <net/checksum.h>
142#include <net/ipv6.h> 143#include <net/ipv6.h>
@@ -151,7 +152,7 @@
151#include <asm/timex.h> 152#include <asm/timex.h>
152 153
153 154
154#define VERSION "pktgen v2.62: Packet Generator for packet performance testing.\n" 155#define VERSION "pktgen v2.63: Packet Generator for packet performance testing.\n"
155 156
156/* #define PG_DEBUG(a) a */ 157/* #define PG_DEBUG(a) a */
157#define PG_DEBUG(a) 158#define PG_DEBUG(a)
@@ -177,8 +178,8 @@
177#define T_REMDEV (1<<3) /* Remove all devs */ 178#define T_REMDEV (1<<3) /* Remove all devs */
178 179
179/* Locks */ 180/* Locks */
180#define thread_lock() spin_lock(&_thread_lock) 181#define thread_lock() down(&pktgen_sem)
181#define thread_unlock() spin_unlock(&_thread_lock) 182#define thread_unlock() up(&pktgen_sem)
182 183
183/* If lock -- can be removed after some work */ 184/* If lock -- can be removed after some work */
184#define if_lock(t) spin_lock(&(t->if_lock)); 185#define if_lock(t) spin_lock(&(t->if_lock));
@@ -186,7 +187,9 @@
186 187
187/* Used to help with determining the pkts on receive */ 188/* Used to help with determining the pkts on receive */
188#define PKTGEN_MAGIC 0xbe9be955 189#define PKTGEN_MAGIC 0xbe9be955
189#define PG_PROC_DIR "net/pktgen" 190#define PG_PROC_DIR "pktgen"
191#define PGCTRL "pgctrl"
192static struct proc_dir_entry *pg_proc_dir = NULL;
190 193
191#define MAX_CFLOWS 65536 194#define MAX_CFLOWS 65536
192 195
@@ -202,11 +205,8 @@ struct pktgen_dev {
202 * Try to keep frequent/infrequent used vars. separated. 205 * Try to keep frequent/infrequent used vars. separated.
203 */ 206 */
204 207
205 char ifname[32]; 208 char ifname[IFNAMSIZ];
206 struct proc_dir_entry *proc_ent;
207 char result[512]; 209 char result[512];
208 /* proc file names */
209 char fname[80];
210 210
211 struct pktgen_thread* pg_thread; /* the owner */ 211 struct pktgen_thread* pg_thread; /* the owner */
212 struct pktgen_dev *next; /* Used for chaining in the thread's run-queue */ 212 struct pktgen_dev *next; /* Used for chaining in the thread's run-queue */
@@ -244,7 +244,7 @@ struct pktgen_dev {
244 __u32 seq_num; 244 __u32 seq_num;
245 245
246 int clone_skb; /* Use multiple SKBs during packet gen. If this number 246 int clone_skb; /* Use multiple SKBs during packet gen. If this number
247 * is greater than 1, then that many coppies of the same 247 * is greater than 1, then that many copies of the same
248 * packet will be sent before a new packet is allocated. 248 * packet will be sent before a new packet is allocated.
249 * For instance, if you want to send 1024 identical packets 249 * For instance, if you want to send 1024 identical packets
250 * before creating a new packet, set clone_skb to 1024. 250 * before creating a new packet, set clone_skb to 1024.
@@ -330,8 +330,6 @@ struct pktgen_thread {
330 struct pktgen_dev *if_list; /* All device here */ 330 struct pktgen_dev *if_list; /* All device here */
331 struct pktgen_thread* next; 331 struct pktgen_thread* next;
332 char name[32]; 332 char name[32];
333 char fname[128]; /* name of proc file */
334 struct proc_dir_entry *proc_ent;
335 char result[512]; 333 char result[512];
336 u32 max_before_softirq; /* We'll call do_softirq to prevent starvation. */ 334 u32 max_before_softirq; /* We'll call do_softirq to prevent starvation. */
337 335
@@ -396,7 +394,7 @@ static inline s64 divremdi3(s64 x, s64 y, int type)
396 394
397/* End of hacks to deal with 64-bit math on x86 */ 395/* End of hacks to deal with 64-bit math on x86 */
398 396
399/** Convert to miliseconds */ 397/** Convert to milliseconds */
400static inline __u64 tv_to_ms(const struct timeval* tv) 398static inline __u64 tv_to_ms(const struct timeval* tv)
401{ 399{
402 __u64 ms = tv->tv_usec / 1000; 400 __u64 ms = tv->tv_usec / 1000;
@@ -425,7 +423,7 @@ static inline __u64 pg_div64(__u64 n, __u64 base)
425{ 423{
426 __u64 tmp = n; 424 __u64 tmp = n;
427/* 425/*
428 * How do we know if the architectrure we are running on 426 * How do we know if the architecture we are running on
429 * supports division with 64 bit base? 427 * supports division with 64 bit base?
430 * 428 *
431 */ 429 */
@@ -473,16 +471,6 @@ static inline __u64 tv_diff(const struct timeval* a, const struct timeval* b)
473 471
474static char version[] __initdata = VERSION; 472static char version[] __initdata = VERSION;
475 473
476static ssize_t proc_pgctrl_read(struct file* file, char __user * buf, size_t count, loff_t *ppos);
477static ssize_t proc_pgctrl_write(struct file* file, const char __user * buf, size_t count, loff_t *ppos);
478static int proc_if_read(char *buf , char **start, off_t offset, int len, int *eof, void *data);
479
480static int proc_thread_read(char *buf , char **start, off_t offset, int len, int *eof, void *data);
481static int proc_if_write(struct file *file, const char __user *user_buffer, unsigned long count, void *data);
482static int proc_thread_write(struct file *file, const char __user *user_buffer, unsigned long count, void *data);
483static int create_proc_dir(void);
484static int remove_proc_dir(void);
485
486static int pktgen_remove_device(struct pktgen_thread* t, struct pktgen_dev *i); 474static int pktgen_remove_device(struct pktgen_thread* t, struct pktgen_dev *i);
487static int pktgen_add_device(struct pktgen_thread* t, const char* ifname); 475static int pktgen_add_device(struct pktgen_thread* t, const char* ifname);
488static struct pktgen_thread* pktgen_find_thread(const char* name); 476static struct pktgen_thread* pktgen_find_thread(const char* name);
@@ -503,83 +491,41 @@ static int pg_delay_d = 0;
503static int pg_clone_skb_d = 0; 491static int pg_clone_skb_d = 0;
504static int debug = 0; 492static int debug = 0;
505 493
506static DEFINE_SPINLOCK(_thread_lock); 494static DECLARE_MUTEX(pktgen_sem);
507static struct pktgen_thread *pktgen_threads = NULL; 495static struct pktgen_thread *pktgen_threads = NULL;
508 496
509static char module_fname[128];
510static struct proc_dir_entry *module_proc_ent = NULL;
511
512static struct notifier_block pktgen_notifier_block = { 497static struct notifier_block pktgen_notifier_block = {
513 .notifier_call = pktgen_device_event, 498 .notifier_call = pktgen_device_event,
514}; 499};
515 500
516static struct file_operations pktgen_fops = {
517 .read = proc_pgctrl_read,
518 .write = proc_pgctrl_write,
519 /* .ioctl = pktgen_ioctl, later maybe */
520};
521
522/* 501/*
523 * /proc handling functions 502 * /proc handling functions
524 * 503 *
525 */ 504 */
526 505
527static struct proc_dir_entry *pg_proc_dir = NULL; 506static int pgctrl_show(struct seq_file *seq, void *v)
528static int proc_pgctrl_read_eof=0;
529
530static ssize_t proc_pgctrl_read(struct file* file, char __user * buf,
531 size_t count, loff_t *ppos)
532{ 507{
533 char data[200]; 508 seq_puts(seq, VERSION);
534 int len = 0; 509 return 0;
535
536 if(proc_pgctrl_read_eof) {
537 proc_pgctrl_read_eof=0;
538 len = 0;
539 goto out;
540 }
541
542 sprintf(data, "%s", VERSION);
543
544 len = strlen(data);
545
546 if(len > count) {
547 len =-EFAULT;
548 goto out;
549 }
550
551 if (copy_to_user(buf, data, len)) {
552 len =-EFAULT;
553 goto out;
554 }
555
556 *ppos += len;
557 proc_pgctrl_read_eof=1; /* EOF next call */
558
559 out:
560 return len;
561} 510}
562 511
563static ssize_t proc_pgctrl_write(struct file* file,const char __user * buf, 512static ssize_t pgctrl_write(struct file* file,const char __user * buf,
564 size_t count, loff_t *ppos) 513 size_t count, loff_t *ppos)
565{ 514{
566 char *data = NULL;
567 int err = 0; 515 int err = 0;
516 char data[128];
568 517
569 if (!capable(CAP_NET_ADMIN)){ 518 if (!capable(CAP_NET_ADMIN)){
570 err = -EPERM; 519 err = -EPERM;
571 goto out; 520 goto out;
572 } 521 }
573 522
574 data = (void*)vmalloc ((unsigned int)count); 523 if (count > sizeof(data))
524 count = sizeof(data);
575 525
576 if(!data) {
577 err = -ENOMEM;
578 goto out;
579 }
580 if (copy_from_user(data, buf, count)) { 526 if (copy_from_user(data, buf, count)) {
581 err =-EFAULT; 527 err = -EFAULT;
582 goto out_free; 528 goto out;
583 } 529 }
584 data[count-1] = 0; /* Make string */ 530 data[count-1] = 0; /* Make string */
585 531
@@ -594,31 +540,40 @@ static ssize_t proc_pgctrl_write(struct file* file,const char __user * buf,
594 540
595 err = count; 541 err = count;
596 542
597 out_free:
598 vfree (data);
599 out: 543 out:
600 return err; 544 return err;
601} 545}
602 546
603static int proc_if_read(char *buf , char **start, off_t offset, 547static int pgctrl_open(struct inode *inode, struct file *file)
604 int len, int *eof, void *data) 548{
549 return single_open(file, pgctrl_show, PDE(inode)->data);
550}
551
552static struct file_operations pktgen_fops = {
553 .owner = THIS_MODULE,
554 .open = pgctrl_open,
555 .read = seq_read,
556 .llseek = seq_lseek,
557 .write = pgctrl_write,
558 .release = single_release,
559};
560
561static int pktgen_if_show(struct seq_file *seq, void *v)
605{ 562{
606 char *p;
607 int i; 563 int i;
608 struct pktgen_dev *pkt_dev = (struct pktgen_dev*)(data); 564 struct pktgen_dev *pkt_dev = seq->private;
609 __u64 sa; 565 __u64 sa;
610 __u64 stopped; 566 __u64 stopped;
611 __u64 now = getCurUs(); 567 __u64 now = getCurUs();
612 568
613 p = buf; 569 seq_printf(seq, "Params: count %llu min_pkt_size: %u max_pkt_size: %u\n",
614 p += sprintf(p, "Params: count %llu min_pkt_size: %u max_pkt_size: %u\n", 570 (unsigned long long) pkt_dev->count,
615 (unsigned long long) pkt_dev->count, 571 pkt_dev->min_pkt_size, pkt_dev->max_pkt_size);
616 pkt_dev->min_pkt_size, pkt_dev->max_pkt_size);
617 572
618 p += sprintf(p, " frags: %d delay: %u clone_skb: %d ifname: %s\n", 573 seq_printf(seq, " frags: %d delay: %u clone_skb: %d ifname: %s\n",
619 pkt_dev->nfrags, 1000*pkt_dev->delay_us+pkt_dev->delay_ns, pkt_dev->clone_skb, pkt_dev->ifname); 574 pkt_dev->nfrags, 1000*pkt_dev->delay_us+pkt_dev->delay_ns, pkt_dev->clone_skb, pkt_dev->ifname);
620 575
621 p += sprintf(p, " flows: %u flowlen: %u\n", pkt_dev->cflows, pkt_dev->lflow); 576 seq_printf(seq, " flows: %u flowlen: %u\n", pkt_dev->cflows, pkt_dev->lflow);
622 577
623 578
624 if(pkt_dev->flags & F_IPV6) { 579 if(pkt_dev->flags & F_IPV6) {
@@ -626,19 +581,19 @@ static int proc_if_read(char *buf , char **start, off_t offset,
626 fmt_ip6(b1, pkt_dev->in6_saddr.s6_addr); 581 fmt_ip6(b1, pkt_dev->in6_saddr.s6_addr);
627 fmt_ip6(b2, pkt_dev->min_in6_saddr.s6_addr); 582 fmt_ip6(b2, pkt_dev->min_in6_saddr.s6_addr);
628 fmt_ip6(b3, pkt_dev->max_in6_saddr.s6_addr); 583 fmt_ip6(b3, pkt_dev->max_in6_saddr.s6_addr);
629 p += sprintf(p, " saddr: %s min_saddr: %s max_saddr: %s\n", b1, b2, b3); 584 seq_printf(seq, " saddr: %s min_saddr: %s max_saddr: %s\n", b1, b2, b3);
630 585
631 fmt_ip6(b1, pkt_dev->in6_daddr.s6_addr); 586 fmt_ip6(b1, pkt_dev->in6_daddr.s6_addr);
632 fmt_ip6(b2, pkt_dev->min_in6_daddr.s6_addr); 587 fmt_ip6(b2, pkt_dev->min_in6_daddr.s6_addr);
633 fmt_ip6(b3, pkt_dev->max_in6_daddr.s6_addr); 588 fmt_ip6(b3, pkt_dev->max_in6_daddr.s6_addr);
634 p += sprintf(p, " daddr: %s min_daddr: %s max_daddr: %s\n", b1, b2, b3); 589 seq_printf(seq, " daddr: %s min_daddr: %s max_daddr: %s\n", b1, b2, b3);
635 590
636 } 591 }
637 else 592 else
638 p += sprintf(p, " dst_min: %s dst_max: %s\n src_min: %s src_max: %s\n", 593 seq_printf(seq," dst_min: %s dst_max: %s\n src_min: %s src_max: %s\n",
639 pkt_dev->dst_min, pkt_dev->dst_max, pkt_dev->src_min, pkt_dev->src_max); 594 pkt_dev->dst_min, pkt_dev->dst_max, pkt_dev->src_min, pkt_dev->src_max);
640 595
641 p += sprintf(p, " src_mac: "); 596 seq_puts(seq, " src_mac: ");
642 597
643 if ((pkt_dev->src_mac[0] == 0) && 598 if ((pkt_dev->src_mac[0] == 0) &&
644 (pkt_dev->src_mac[1] == 0) && 599 (pkt_dev->src_mac[1] == 0) &&
@@ -648,89 +603,89 @@ static int proc_if_read(char *buf , char **start, off_t offset,
648 (pkt_dev->src_mac[5] == 0)) 603 (pkt_dev->src_mac[5] == 0))
649 604
650 for (i = 0; i < 6; i++) 605 for (i = 0; i < 6; i++)
651 p += sprintf(p, "%02X%s", pkt_dev->odev->dev_addr[i], i == 5 ? " " : ":"); 606 seq_printf(seq, "%02X%s", pkt_dev->odev->dev_addr[i], i == 5 ? " " : ":");
652 607
653 else 608 else
654 for (i = 0; i < 6; i++) 609 for (i = 0; i < 6; i++)
655 p += sprintf(p, "%02X%s", pkt_dev->src_mac[i], i == 5 ? " " : ":"); 610 seq_printf(seq, "%02X%s", pkt_dev->src_mac[i], i == 5 ? " " : ":");
656 611
657 p += sprintf(p, "dst_mac: "); 612 seq_printf(seq, "dst_mac: ");
658 for (i = 0; i < 6; i++) 613 for (i = 0; i < 6; i++)
659 p += sprintf(p, "%02X%s", pkt_dev->dst_mac[i], i == 5 ? "\n" : ":"); 614 seq_printf(seq, "%02X%s", pkt_dev->dst_mac[i], i == 5 ? "\n" : ":");
660 615
661 p += sprintf(p, " udp_src_min: %d udp_src_max: %d udp_dst_min: %d udp_dst_max: %d\n", 616 seq_printf(seq, " udp_src_min: %d udp_src_max: %d udp_dst_min: %d udp_dst_max: %d\n",
662 pkt_dev->udp_src_min, pkt_dev->udp_src_max, pkt_dev->udp_dst_min, 617 pkt_dev->udp_src_min, pkt_dev->udp_src_max, pkt_dev->udp_dst_min,
663 pkt_dev->udp_dst_max); 618 pkt_dev->udp_dst_max);
664 619
665 p += sprintf(p, " src_mac_count: %d dst_mac_count: %d \n Flags: ", 620 seq_printf(seq, " src_mac_count: %d dst_mac_count: %d \n Flags: ",
666 pkt_dev->src_mac_count, pkt_dev->dst_mac_count); 621 pkt_dev->src_mac_count, pkt_dev->dst_mac_count);
667 622
668 623
669 if (pkt_dev->flags & F_IPV6) 624 if (pkt_dev->flags & F_IPV6)
670 p += sprintf(p, "IPV6 "); 625 seq_printf(seq, "IPV6 ");
671 626
672 if (pkt_dev->flags & F_IPSRC_RND) 627 if (pkt_dev->flags & F_IPSRC_RND)
673 p += sprintf(p, "IPSRC_RND "); 628 seq_printf(seq, "IPSRC_RND ");
674 629
675 if (pkt_dev->flags & F_IPDST_RND) 630 if (pkt_dev->flags & F_IPDST_RND)
676 p += sprintf(p, "IPDST_RND "); 631 seq_printf(seq, "IPDST_RND ");
677 632
678 if (pkt_dev->flags & F_TXSIZE_RND) 633 if (pkt_dev->flags & F_TXSIZE_RND)
679 p += sprintf(p, "TXSIZE_RND "); 634 seq_printf(seq, "TXSIZE_RND ");
680 635
681 if (pkt_dev->flags & F_UDPSRC_RND) 636 if (pkt_dev->flags & F_UDPSRC_RND)
682 p += sprintf(p, "UDPSRC_RND "); 637 seq_printf(seq, "UDPSRC_RND ");
683 638
684 if (pkt_dev->flags & F_UDPDST_RND) 639 if (pkt_dev->flags & F_UDPDST_RND)
685 p += sprintf(p, "UDPDST_RND "); 640 seq_printf(seq, "UDPDST_RND ");
686 641
687 if (pkt_dev->flags & F_MACSRC_RND) 642 if (pkt_dev->flags & F_MACSRC_RND)
688 p += sprintf(p, "MACSRC_RND "); 643 seq_printf(seq, "MACSRC_RND ");
689 644
690 if (pkt_dev->flags & F_MACDST_RND) 645 if (pkt_dev->flags & F_MACDST_RND)
691 p += sprintf(p, "MACDST_RND "); 646 seq_printf(seq, "MACDST_RND ");
692 647
693 648
694 p += sprintf(p, "\n"); 649 seq_puts(seq, "\n");
695 650
696 sa = pkt_dev->started_at; 651 sa = pkt_dev->started_at;
697 stopped = pkt_dev->stopped_at; 652 stopped = pkt_dev->stopped_at;
698 if (pkt_dev->running) 653 if (pkt_dev->running)
699 stopped = now; /* not really stopped, more like last-running-at */ 654 stopped = now; /* not really stopped, more like last-running-at */
700 655
701 p += sprintf(p, "Current:\n pkts-sofar: %llu errors: %llu\n started: %lluus stopped: %lluus idle: %lluus\n", 656 seq_printf(seq, "Current:\n pkts-sofar: %llu errors: %llu\n started: %lluus stopped: %lluus idle: %lluus\n",
702 (unsigned long long) pkt_dev->sofar, 657 (unsigned long long) pkt_dev->sofar,
703 (unsigned long long) pkt_dev->errors, 658 (unsigned long long) pkt_dev->errors,
704 (unsigned long long) sa, 659 (unsigned long long) sa,
705 (unsigned long long) stopped, 660 (unsigned long long) stopped,
706 (unsigned long long) pkt_dev->idle_acc); 661 (unsigned long long) pkt_dev->idle_acc);
707 662
708 p += sprintf(p, " seq_num: %d cur_dst_mac_offset: %d cur_src_mac_offset: %d\n", 663 seq_printf(seq, " seq_num: %d cur_dst_mac_offset: %d cur_src_mac_offset: %d\n",
709 pkt_dev->seq_num, pkt_dev->cur_dst_mac_offset, pkt_dev->cur_src_mac_offset); 664 pkt_dev->seq_num, pkt_dev->cur_dst_mac_offset,
665 pkt_dev->cur_src_mac_offset);
710 666
711 if(pkt_dev->flags & F_IPV6) { 667 if(pkt_dev->flags & F_IPV6) {
712 char b1[128], b2[128]; 668 char b1[128], b2[128];
713 fmt_ip6(b1, pkt_dev->cur_in6_daddr.s6_addr); 669 fmt_ip6(b1, pkt_dev->cur_in6_daddr.s6_addr);
714 fmt_ip6(b2, pkt_dev->cur_in6_saddr.s6_addr); 670 fmt_ip6(b2, pkt_dev->cur_in6_saddr.s6_addr);
715 p += sprintf(p, " cur_saddr: %s cur_daddr: %s\n", b2, b1); 671 seq_printf(seq, " cur_saddr: %s cur_daddr: %s\n", b2, b1);
716 } 672 }
717 else 673 else
718 p += sprintf(p, " cur_saddr: 0x%x cur_daddr: 0x%x\n", 674 seq_printf(seq, " cur_saddr: 0x%x cur_daddr: 0x%x\n",
719 pkt_dev->cur_saddr, pkt_dev->cur_daddr); 675 pkt_dev->cur_saddr, pkt_dev->cur_daddr);
720 676
721 677
722 p += sprintf(p, " cur_udp_dst: %d cur_udp_src: %d\n", 678 seq_printf(seq, " cur_udp_dst: %d cur_udp_src: %d\n",
723 pkt_dev->cur_udp_dst, pkt_dev->cur_udp_src); 679 pkt_dev->cur_udp_dst, pkt_dev->cur_udp_src);
724 680
725 p += sprintf(p, " flows: %u\n", pkt_dev->nflows); 681 seq_printf(seq, " flows: %u\n", pkt_dev->nflows);
726 682
727 if (pkt_dev->result[0]) 683 if (pkt_dev->result[0])
728 p += sprintf(p, "Result: %s\n", pkt_dev->result); 684 seq_printf(seq, "Result: %s\n", pkt_dev->result);
729 else 685 else
730 p += sprintf(p, "Result: Idle\n"); 686 seq_printf(seq, "Result: Idle\n");
731 *eof = 1;
732 687
733 return p - buf; 688 return 0;
734} 689}
735 690
736 691
@@ -802,13 +757,14 @@ done_str:
802 return i; 757 return i;
803} 758}
804 759
805static int proc_if_write(struct file *file, const char __user *user_buffer, 760static ssize_t pktgen_if_write(struct file *file, const char __user *user_buffer,
806 unsigned long count, void *data) 761 size_t count, loff_t *offset)
807{ 762{
763 struct seq_file *seq = (struct seq_file *) file->private_data;
764 struct pktgen_dev *pkt_dev = seq->private;
808 int i = 0, max, len; 765 int i = 0, max, len;
809 char name[16], valstr[32]; 766 char name[16], valstr[32];
810 unsigned long value = 0; 767 unsigned long value = 0;
811 struct pktgen_dev *pkt_dev = (struct pktgen_dev*)(data);
812 char* pg_result = NULL; 768 char* pg_result = NULL;
813 int tmp = 0; 769 int tmp = 0;
814 char buf[128]; 770 char buf[128];
@@ -849,7 +805,8 @@ static int proc_if_write(struct file *file, const char __user *user_buffer,
849 if (copy_from_user(tb, user_buffer, count)) 805 if (copy_from_user(tb, user_buffer, count))
850 return -EFAULT; 806 return -EFAULT;
851 tb[count] = 0; 807 tb[count] = 0;
852 printk("pktgen: %s,%lu buffer -:%s:-\n", name, count, tb); 808 printk("pktgen: %s,%lu buffer -:%s:-\n", name,
809 (unsigned long) count, tb);
853 } 810 }
854 811
855 if (!strcmp(name, "min_pkt_size")) { 812 if (!strcmp(name, "min_pkt_size")) {
@@ -1335,92 +1292,98 @@ static int proc_if_write(struct file *file, const char __user *user_buffer,
1335 return -EINVAL; 1292 return -EINVAL;
1336} 1293}
1337 1294
1338static int proc_thread_read(char *buf , char **start, off_t offset, 1295static int pktgen_if_open(struct inode *inode, struct file *file)
1339 int len, int *eof, void *data)
1340{ 1296{
1341 char *p; 1297 return single_open(file, pktgen_if_show, PDE(inode)->data);
1342 struct pktgen_thread *t = (struct pktgen_thread*)(data); 1298}
1343 struct pktgen_dev *pkt_dev = NULL;
1344 1299
1300static struct file_operations pktgen_if_fops = {
1301 .owner = THIS_MODULE,
1302 .open = pktgen_if_open,
1303 .read = seq_read,
1304 .llseek = seq_lseek,
1305 .write = pktgen_if_write,
1306 .release = single_release,
1307};
1345 1308
1346 if (!t) { 1309static int pktgen_thread_show(struct seq_file *seq, void *v)
1347 printk("pktgen: ERROR: could not find thread in proc_thread_read\n"); 1310{
1348 return -EINVAL; 1311 struct pktgen_thread *t = seq->private;
1349 } 1312 struct pktgen_dev *pkt_dev = NULL;
1313
1314 BUG_ON(!t);
1350 1315
1351 p = buf; 1316 seq_printf(seq, "Name: %s max_before_softirq: %d\n",
1352 p += sprintf(p, "Name: %s max_before_softirq: %d\n",
1353 t->name, t->max_before_softirq); 1317 t->name, t->max_before_softirq);
1354 1318
1355 p += sprintf(p, "Running: "); 1319 seq_printf(seq, "Running: ");
1356 1320
1357 if_lock(t); 1321 if_lock(t);
1358 for(pkt_dev = t->if_list;pkt_dev; pkt_dev = pkt_dev->next) 1322 for(pkt_dev = t->if_list;pkt_dev; pkt_dev = pkt_dev->next)
1359 if(pkt_dev->running) 1323 if(pkt_dev->running)
1360 p += sprintf(p, "%s ", pkt_dev->ifname); 1324 seq_printf(seq, "%s ", pkt_dev->ifname);
1361 1325
1362 p += sprintf(p, "\nStopped: "); 1326 seq_printf(seq, "\nStopped: ");
1363 1327
1364 for(pkt_dev = t->if_list;pkt_dev; pkt_dev = pkt_dev->next) 1328 for(pkt_dev = t->if_list;pkt_dev; pkt_dev = pkt_dev->next)
1365 if(!pkt_dev->running) 1329 if(!pkt_dev->running)
1366 p += sprintf(p, "%s ", pkt_dev->ifname); 1330 seq_printf(seq, "%s ", pkt_dev->ifname);
1367 1331
1368 if (t->result[0]) 1332 if (t->result[0])
1369 p += sprintf(p, "\nResult: %s\n", t->result); 1333 seq_printf(seq, "\nResult: %s\n", t->result);
1370 else 1334 else
1371 p += sprintf(p, "\nResult: NA\n"); 1335 seq_printf(seq, "\nResult: NA\n");
1372
1373 *eof = 1;
1374 1336
1375 if_unlock(t); 1337 if_unlock(t);
1376 1338
1377 return p - buf; 1339 return 0;
1378} 1340}
1379 1341
1380static int proc_thread_write(struct file *file, const char __user *user_buffer, 1342static ssize_t pktgen_thread_write(struct file *file,
1381 unsigned long count, void *data) 1343 const char __user *user_buffer,
1344 size_t count, loff_t *offset)
1382{ 1345{
1346 struct seq_file *seq = (struct seq_file *) file->private_data;
1347 struct pktgen_thread *t = seq->private;
1383 int i = 0, max, len, ret; 1348 int i = 0, max, len, ret;
1384 char name[40]; 1349 char name[40];
1385 struct pktgen_thread *t;
1386 char *pg_result; 1350 char *pg_result;
1387 unsigned long value = 0; 1351 unsigned long value = 0;
1388 1352
1389 if (count < 1) { 1353 if (count < 1) {
1390 // sprintf(pg_result, "Wrong command format"); 1354 // sprintf(pg_result, "Wrong command format");
1391 return -EINVAL; 1355 return -EINVAL;
1392 } 1356 }
1393 1357
1394 max = count - i; 1358 max = count - i;
1395 len = count_trail_chars(&user_buffer[i], max); 1359 len = count_trail_chars(&user_buffer[i], max);
1396 if (len < 0) 1360 if (len < 0)
1397 return len; 1361 return len;
1398 1362
1399 i += len; 1363 i += len;
1400 1364
1401 /* Read variable name */ 1365 /* Read variable name */
1402 1366
1403 len = strn_len(&user_buffer[i], sizeof(name) - 1); 1367 len = strn_len(&user_buffer[i], sizeof(name) - 1);
1404 if (len < 0) 1368 if (len < 0)
1405 return len; 1369 return len;
1406 1370
1407 memset(name, 0, sizeof(name)); 1371 memset(name, 0, sizeof(name));
1408 if (copy_from_user(name, &user_buffer[i], len)) 1372 if (copy_from_user(name, &user_buffer[i], len))
1409 return -EFAULT; 1373 return -EFAULT;
1410 i += len; 1374 i += len;
1411 1375
1412 max = count -i; 1376 max = count -i;
1413 len = count_trail_chars(&user_buffer[i], max); 1377 len = count_trail_chars(&user_buffer[i], max);
1414 if (len < 0) 1378 if (len < 0)
1415 return len; 1379 return len;
1416 1380
1417 i += len; 1381 i += len;
1418 1382
1419 if (debug) 1383 if (debug)
1420 printk("pktgen: t=%s, count=%lu\n", name, count); 1384 printk("pktgen: t=%s, count=%lu\n", name,
1421 1385 (unsigned long) count);
1422 1386
1423 t = (struct pktgen_thread*)(data);
1424 if(!t) { 1387 if(!t) {
1425 printk("pktgen: ERROR: No thread\n"); 1388 printk("pktgen: ERROR: No thread\n");
1426 ret = -EINVAL; 1389 ret = -EINVAL;
@@ -1474,21 +1437,19 @@ static int proc_thread_write(struct file *file, const char __user *user_buffer,
1474 return ret; 1437 return ret;
1475} 1438}
1476 1439
1477static int create_proc_dir(void) 1440static int pktgen_thread_open(struct inode *inode, struct file *file)
1478{ 1441{
1479 pg_proc_dir = proc_mkdir(PG_PROC_DIR, NULL); 1442 return single_open(file, pktgen_thread_show, PDE(inode)->data);
1480
1481 if (!pg_proc_dir)
1482 return -ENODEV;
1483
1484 return 0;
1485} 1443}
1486 1444
1487static int remove_proc_dir(void) 1445static struct file_operations pktgen_thread_fops = {
1488{ 1446 .owner = THIS_MODULE,
1489 remove_proc_entry(PG_PROC_DIR, NULL); 1447 .open = pktgen_thread_open,
1490 return 0; 1448 .read = seq_read,
1491} 1449 .llseek = seq_lseek,
1450 .write = pktgen_thread_write,
1451 .release = single_release,
1452};
1492 1453
1493/* Think find or remove for NN */ 1454/* Think find or remove for NN */
1494static struct pktgen_dev *__pktgen_NN_threads(const char* ifname, int remove) 1455static struct pktgen_dev *__pktgen_NN_threads(const char* ifname, int remove)
@@ -1702,7 +1663,7 @@ static void spin(struct pktgen_dev *pkt_dev, __u64 spin_until_us)
1702 start = now = getCurUs(); 1663 start = now = getCurUs();
1703 printk(KERN_INFO "sleeping for %d\n", (int)(spin_until_us - now)); 1664 printk(KERN_INFO "sleeping for %d\n", (int)(spin_until_us - now));
1704 while (now < spin_until_us) { 1665 while (now < spin_until_us) {
1705 /* TODO: optimise sleeping behavior */ 1666 /* TODO: optimize sleeping behavior */
1706 if (spin_until_us - now > jiffies_to_usecs(1)+1) 1667 if (spin_until_us - now > jiffies_to_usecs(1)+1)
1707 schedule_timeout_interruptible(1); 1668 schedule_timeout_interruptible(1);
1708 else if (spin_until_us - now > 100) { 1669 else if (spin_until_us - now > 100) {
@@ -2361,7 +2322,7 @@ static void pktgen_stop_all_threads_ifs(void)
2361 pktgen_stop(t); 2322 pktgen_stop(t);
2362 t = t->next; 2323 t = t->next;
2363 } 2324 }
2364 thread_unlock(); 2325 thread_unlock();
2365} 2326}
2366 2327
2367static int thread_is_running(struct pktgen_thread *t ) 2328static int thread_is_running(struct pktgen_thread *t )
@@ -2552,10 +2513,9 @@ static void pktgen_rem_thread(struct pktgen_thread *t)
2552 2513
2553 struct pktgen_thread *tmp = pktgen_threads; 2514 struct pktgen_thread *tmp = pktgen_threads;
2554 2515
2555 if (strlen(t->fname)) 2516 remove_proc_entry(t->name, pg_proc_dir);
2556 remove_proc_entry(t->fname, NULL);
2557 2517
2558 thread_lock(); 2518 thread_lock();
2559 2519
2560 if (tmp == t) 2520 if (tmp == t)
2561 pktgen_threads = tmp->next; 2521 pktgen_threads = tmp->next;
@@ -2825,7 +2785,7 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, const char* i
2825 if_lock(t); 2785 if_lock(t);
2826 2786
2827 for(pkt_dev=t->if_list; pkt_dev; pkt_dev = pkt_dev->next ) { 2787 for(pkt_dev=t->if_list; pkt_dev; pkt_dev = pkt_dev->next ) {
2828 if (strcmp(pkt_dev->ifname, ifname) == 0) { 2788 if (strncmp(pkt_dev->ifname, ifname, IFNAMSIZ) == 0) {
2829 break; 2789 break;
2830 } 2790 }
2831 } 2791 }
@@ -2864,74 +2824,70 @@ static int add_dev_to_thread(struct pktgen_thread *t, struct pktgen_dev *pkt_dev
2864static int pktgen_add_device(struct pktgen_thread *t, const char* ifname) 2824static int pktgen_add_device(struct pktgen_thread *t, const char* ifname)
2865{ 2825{
2866 struct pktgen_dev *pkt_dev; 2826 struct pktgen_dev *pkt_dev;
2827 struct proc_dir_entry *pe;
2867 2828
2868 /* We don't allow a device to be on several threads */ 2829 /* We don't allow a device to be on several threads */
2869 2830
2870 if( (pkt_dev = __pktgen_NN_threads(ifname, FIND)) == NULL) { 2831 pkt_dev = __pktgen_NN_threads(ifname, FIND);
2871 2832 if (pkt_dev) {
2872 pkt_dev = kmalloc(sizeof(struct pktgen_dev), GFP_KERNEL); 2833 printk("pktgen: ERROR: interface already used.\n");
2873 if (!pkt_dev) 2834 return -EBUSY;
2874 return -ENOMEM; 2835 }
2875 2836
2876 memset(pkt_dev, 0, sizeof(struct pktgen_dev)); 2837 pkt_dev = kzalloc(sizeof(struct pktgen_dev), GFP_KERNEL);
2838 if (!pkt_dev)
2839 return -ENOMEM;
2877 2840
2878 pkt_dev->flows = vmalloc(MAX_CFLOWS*sizeof(struct flow_state)); 2841 pkt_dev->flows = vmalloc(MAX_CFLOWS*sizeof(struct flow_state));
2879 if (pkt_dev->flows == NULL) { 2842 if (pkt_dev->flows == NULL) {
2880 kfree(pkt_dev); 2843 kfree(pkt_dev);
2881 return -ENOMEM; 2844 return -ENOMEM;
2882 } 2845 }
2883 memset(pkt_dev->flows, 0, MAX_CFLOWS*sizeof(struct flow_state)); 2846 memset(pkt_dev->flows, 0, MAX_CFLOWS*sizeof(struct flow_state));
2884
2885 pkt_dev->min_pkt_size = ETH_ZLEN;
2886 pkt_dev->max_pkt_size = ETH_ZLEN;
2887 pkt_dev->nfrags = 0;
2888 pkt_dev->clone_skb = pg_clone_skb_d;
2889 pkt_dev->delay_us = pg_delay_d / 1000;
2890 pkt_dev->delay_ns = pg_delay_d % 1000;
2891 pkt_dev->count = pg_count_d;
2892 pkt_dev->sofar = 0;
2893 pkt_dev->udp_src_min = 9; /* sink port */
2894 pkt_dev->udp_src_max = 9;
2895 pkt_dev->udp_dst_min = 9;
2896 pkt_dev->udp_dst_max = 9;
2897
2898 strncpy(pkt_dev->ifname, ifname, 31);
2899 sprintf(pkt_dev->fname, "%s/%s", PG_PROC_DIR, ifname);
2900
2901 if (! pktgen_setup_dev(pkt_dev)) {
2902 printk("pktgen: ERROR: pktgen_setup_dev failed.\n");
2903 if (pkt_dev->flows)
2904 vfree(pkt_dev->flows);
2905 kfree(pkt_dev);
2906 return -ENODEV;
2907 }
2908 2847
2909 pkt_dev->proc_ent = create_proc_entry(pkt_dev->fname, 0600, NULL); 2848 pkt_dev->min_pkt_size = ETH_ZLEN;
2910 if (!pkt_dev->proc_ent) { 2849 pkt_dev->max_pkt_size = ETH_ZLEN;
2911 printk("pktgen: cannot create %s procfs entry.\n", pkt_dev->fname); 2850 pkt_dev->nfrags = 0;
2912 if (pkt_dev->flows) 2851 pkt_dev->clone_skb = pg_clone_skb_d;
2913 vfree(pkt_dev->flows); 2852 pkt_dev->delay_us = pg_delay_d / 1000;
2914 kfree(pkt_dev); 2853 pkt_dev->delay_ns = pg_delay_d % 1000;
2915 return -EINVAL; 2854 pkt_dev->count = pg_count_d;
2916 } 2855 pkt_dev->sofar = 0;
2917 pkt_dev->proc_ent->read_proc = proc_if_read; 2856 pkt_dev->udp_src_min = 9; /* sink port */
2918 pkt_dev->proc_ent->write_proc = proc_if_write; 2857 pkt_dev->udp_src_max = 9;
2919 pkt_dev->proc_ent->data = (void*)(pkt_dev); 2858 pkt_dev->udp_dst_min = 9;
2920 pkt_dev->proc_ent->owner = THIS_MODULE; 2859 pkt_dev->udp_dst_max = 9;
2860
2861 strncpy(pkt_dev->ifname, ifname, IFNAMSIZ);
2862
2863 if (! pktgen_setup_dev(pkt_dev)) {
2864 printk("pktgen: ERROR: pktgen_setup_dev failed.\n");
2865 if (pkt_dev->flows)
2866 vfree(pkt_dev->flows);
2867 kfree(pkt_dev);
2868 return -ENODEV;
2869 }
2870
2871 pe = create_proc_entry(ifname, 0600, pg_proc_dir);
2872 if (!pe) {
2873 printk("pktgen: cannot create %s/%s procfs entry.\n",
2874 PG_PROC_DIR, ifname);
2875 if (pkt_dev->flows)
2876 vfree(pkt_dev->flows);
2877 kfree(pkt_dev);
2878 return -EINVAL;
2879 }
2880 pe->proc_fops = &pktgen_if_fops;
2881 pe->data = pkt_dev;
2921 2882
2922 return add_dev_to_thread(t, pkt_dev); 2883 return add_dev_to_thread(t, pkt_dev);
2923 }
2924 else {
2925 printk("pktgen: ERROR: interface already used.\n");
2926 return -EBUSY;
2927 }
2928} 2884}
2929 2885
2930static struct pktgen_thread *pktgen_find_thread(const char* name) 2886static struct pktgen_thread *pktgen_find_thread(const char* name)
2931{ 2887{
2932 struct pktgen_thread *t = NULL; 2888 struct pktgen_thread *t = NULL;
2933 2889
2934 thread_lock(); 2890 thread_lock();
2935 2891
2936 t = pktgen_threads; 2892 t = pktgen_threads;
2937 while (t) { 2893 while (t) {
@@ -2947,6 +2903,7 @@ static struct pktgen_thread *pktgen_find_thread(const char* name)
2947static int pktgen_create_thread(const char* name, int cpu) 2903static int pktgen_create_thread(const char* name, int cpu)
2948{ 2904{
2949 struct pktgen_thread *t = NULL; 2905 struct pktgen_thread *t = NULL;
2906 struct proc_dir_entry *pe;
2950 2907
2951 if (strlen(name) > 31) { 2908 if (strlen(name) > 31) {
2952 printk("pktgen: ERROR: Thread name cannot be more than 31 characters.\n"); 2909 printk("pktgen: ERROR: Thread name cannot be more than 31 characters.\n");
@@ -2958,28 +2915,26 @@ static int pktgen_create_thread(const char* name, int cpu)
2958 return -EINVAL; 2915 return -EINVAL;
2959 } 2916 }
2960 2917
2961 t = (struct pktgen_thread*)(kmalloc(sizeof(struct pktgen_thread), GFP_KERNEL)); 2918 t = kzalloc(sizeof(struct pktgen_thread), GFP_KERNEL);
2962 if (!t) { 2919 if (!t) {
2963 printk("pktgen: ERROR: out of memory, can't create new thread.\n"); 2920 printk("pktgen: ERROR: out of memory, can't create new thread.\n");
2964 return -ENOMEM; 2921 return -ENOMEM;
2965 } 2922 }
2966 2923
2967 memset(t, 0, sizeof(struct pktgen_thread));
2968 strcpy(t->name, name); 2924 strcpy(t->name, name);
2969 spin_lock_init(&t->if_lock); 2925 spin_lock_init(&t->if_lock);
2970 t->cpu = cpu; 2926 t->cpu = cpu;
2971 2927
2972 sprintf(t->fname, "%s/%s", PG_PROC_DIR, t->name); 2928 pe = create_proc_entry(t->name, 0600, pg_proc_dir);
2973 t->proc_ent = create_proc_entry(t->fname, 0600, NULL); 2929 if (!pe) {
2974 if (!t->proc_ent) { 2930 printk("pktgen: cannot create %s/%s procfs entry.\n",
2975 printk("pktgen: cannot create %s procfs entry.\n", t->fname); 2931 PG_PROC_DIR, t->name);
2976 kfree(t); 2932 kfree(t);
2977 return -EINVAL; 2933 return -EINVAL;
2978 } 2934 }
2979 t->proc_ent->read_proc = proc_thread_read; 2935
2980 t->proc_ent->write_proc = proc_thread_write; 2936 pe->proc_fops = &pktgen_thread_fops;
2981 t->proc_ent->data = (void*)(t); 2937 pe->data = t;
2982 t->proc_ent->owner = THIS_MODULE;
2983 2938
2984 t->next = pktgen_threads; 2939 t->next = pktgen_threads;
2985 pktgen_threads = t; 2940 pktgen_threads = t;
@@ -3034,8 +2989,7 @@ static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *pkt_
3034 2989
3035 /* Clean up proc file system */ 2990 /* Clean up proc file system */
3036 2991
3037 if (strlen(pkt_dev->fname)) 2992 remove_proc_entry(pkt_dev->ifname, pg_proc_dir);
3038 remove_proc_entry(pkt_dev->fname, NULL);
3039 2993
3040 if (pkt_dev->flows) 2994 if (pkt_dev->flows)
3041 vfree(pkt_dev->flows); 2995 vfree(pkt_dev->flows);
@@ -3046,31 +3000,31 @@ static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *pkt_
3046static int __init pg_init(void) 3000static int __init pg_init(void)
3047{ 3001{
3048 int cpu; 3002 int cpu;
3049 printk(version); 3003 struct proc_dir_entry *pe;
3050 3004
3051 module_fname[0] = 0; 3005 printk(version);
3052 3006
3053 create_proc_dir(); 3007 pg_proc_dir = proc_mkdir(PG_PROC_DIR, proc_net);
3008 if (!pg_proc_dir)
3009 return -ENODEV;
3010 pg_proc_dir->owner = THIS_MODULE;
3054 3011
3055 sprintf(module_fname, "%s/pgctrl", PG_PROC_DIR); 3012 pe = create_proc_entry(PGCTRL, 0600, pg_proc_dir);
3056 module_proc_ent = create_proc_entry(module_fname, 0600, NULL); 3013 if (pe == NULL) {
3057 if (!module_proc_ent) { 3014 printk("pktgen: ERROR: cannot create %s procfs entry.\n", PGCTRL);
3058 printk("pktgen: ERROR: cannot create %s procfs entry.\n", module_fname); 3015 proc_net_remove(PG_PROC_DIR);
3059 return -EINVAL; 3016 return -EINVAL;
3060 } 3017 }
3061 3018
3062 module_proc_ent->proc_fops = &pktgen_fops; 3019 pe->proc_fops = &pktgen_fops;
3063 module_proc_ent->data = NULL; 3020 pe->data = NULL;
3064 3021
3065 /* Register us to receive netdevice events */ 3022 /* Register us to receive netdevice events */
3066 register_netdevice_notifier(&pktgen_notifier_block); 3023 register_netdevice_notifier(&pktgen_notifier_block);
3067 3024
3068 for (cpu = 0; cpu < NR_CPUS ; cpu++) { 3025 for_each_online_cpu(cpu) {
3069 char buf[30]; 3026 char buf[30];
3070 3027
3071 if (!cpu_online(cpu))
3072 continue;
3073
3074 sprintf(buf, "kpktgend_%i", cpu); 3028 sprintf(buf, "kpktgend_%i", cpu);
3075 pktgen_create_thread(buf, cpu); 3029 pktgen_create_thread(buf, cpu);
3076 } 3030 }
@@ -3095,10 +3049,8 @@ static void __exit pg_cleanup(void)
3095 unregister_netdevice_notifier(&pktgen_notifier_block); 3049 unregister_netdevice_notifier(&pktgen_notifier_block);
3096 3050
3097 /* Clean up proc file system */ 3051 /* Clean up proc file system */
3098 3052 remove_proc_entry(PGCTRL, pg_proc_dir);
3099 remove_proc_entry(module_fname, NULL); 3053 proc_net_remove(PG_PROC_DIR);
3100
3101 remove_proc_dir();
3102} 3054}
3103 3055
3104 3056
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 02cd4cde2112..ef9d46b91eb9 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -122,6 +122,8 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
122 * __alloc_skb - allocate a network buffer 122 * __alloc_skb - allocate a network buffer
123 * @size: size to allocate 123 * @size: size to allocate
124 * @gfp_mask: allocation mask 124 * @gfp_mask: allocation mask
125 * @fclone: allocate from fclone cache instead of head cache
126 * and allocate a cloned (child) skb
125 * 127 *
126 * Allocate a new &sk_buff. The returned buffer has no headroom and a 128 * Allocate a new &sk_buff. The returned buffer has no headroom and a
127 * tail room of size bytes. The object has a reference count of one. 129 * tail room of size bytes. The object has a reference count of one.
diff --git a/net/core/sock.c b/net/core/sock.c
index 1c52fe809eda..9602ceb3bac9 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -940,7 +940,7 @@ static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
940 int noblock, int *errcode) 940 int noblock, int *errcode)
941{ 941{
942 struct sk_buff *skb; 942 struct sk_buff *skb;
943 unsigned int gfp_mask; 943 gfp_t gfp_mask;
944 long timeo; 944 long timeo;
945 int err; 945 int err;
946 946
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 29250749f16f..d59f86f7ceab 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -495,7 +495,7 @@ void dccp_send_close(struct sock *sk, const int active)
495{ 495{
496 struct dccp_sock *dp = dccp_sk(sk); 496 struct dccp_sock *dp = dccp_sk(sk);
497 struct sk_buff *skb; 497 struct sk_buff *skb;
498 const unsigned int prio = active ? GFP_KERNEL : GFP_ATOMIC; 498 const gfp_t prio = active ? GFP_KERNEL : GFP_ATOMIC;
499 499
500 skb = alloc_skb(sk->sk_prot->max_header, prio); 500 skb = alloc_skb(sk->sk_prot->max_header, prio);
501 if (skb == NULL) 501 if (skb == NULL)
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 1186dc44cdff..3f25cadccddd 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -719,22 +719,9 @@ static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
719 if (saddr->sdn_flags & ~SDF_WILD) 719 if (saddr->sdn_flags & ~SDF_WILD)
720 return -EINVAL; 720 return -EINVAL;
721 721
722#if 1
723 if (!capable(CAP_NET_BIND_SERVICE) && (saddr->sdn_objnum || 722 if (!capable(CAP_NET_BIND_SERVICE) && (saddr->sdn_objnum ||
724 (saddr->sdn_flags & SDF_WILD))) 723 (saddr->sdn_flags & SDF_WILD)))
725 return -EACCES; 724 return -EACCES;
726#else
727 /*
728 * Maybe put the default actions in the default security ops for
729 * dn_prot_sock ? Would be nice if the capable call would go there
730 * too.
731 */
732 if (security_dn_prot_sock(saddr) &&
733 !capable(CAP_NET_BIND_SERVICE) ||
734 saddr->sdn_objnum || (saddr->sdn_flags & SDF_WILD))
735 return -EACCES;
736#endif
737
738 725
739 if (!(saddr->sdn_flags & SDF_WILD)) { 726 if (!(saddr->sdn_flags & SDF_WILD)) {
740 if (dn_ntohs(saddr->sdn_nodeaddrl)) { 727 if (dn_ntohs(saddr->sdn_nodeaddrl)) {
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 74f2207e131a..4ec4b2ca6ab1 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -715,6 +715,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
715 break; 715 break;
716 ret = 0; 716 ret = 0;
717 if (ifa->ifa_mask != sin->sin_addr.s_addr) { 717 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
718 u32 old_mask = ifa->ifa_mask;
718 inet_del_ifa(in_dev, ifap, 0); 719 inet_del_ifa(in_dev, ifap, 0);
719 ifa->ifa_mask = sin->sin_addr.s_addr; 720 ifa->ifa_mask = sin->sin_addr.s_addr;
720 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask); 721 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
@@ -728,7 +729,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
728 if ((dev->flags & IFF_BROADCAST) && 729 if ((dev->flags & IFF_BROADCAST) &&
729 (ifa->ifa_prefixlen < 31) && 730 (ifa->ifa_prefixlen < 31) &&
730 (ifa->ifa_broadcast == 731 (ifa->ifa_broadcast ==
731 (ifa->ifa_local|~ifa->ifa_mask))) { 732 (ifa->ifa_local|~old_mask))) {
732 ifa->ifa_broadcast = (ifa->ifa_local | 733 ifa->ifa_broadcast = (ifa->ifa_local |
733 ~sin->sin_addr.s_addr); 734 ~sin->sin_addr.s_addr);
734 } 735 }
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 0093ea08c7f5..66247f38b371 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2404,7 +2404,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
2404 prefix = htonl(l->key); 2404 prefix = htonl(l->key);
2405 2405
2406 list_for_each_entry_rcu(fa, &li->falh, fa_list) { 2406 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
2407 const struct fib_info *fi = rcu_dereference(fa->fa_info); 2407 const struct fib_info *fi = fa->fa_info;
2408 unsigned flags = fib_flag_trans(fa->fa_type, mask, fi); 2408 unsigned flags = fib_flag_trans(fa->fa_type, mask, fi);
2409 2409
2410 if (fa->fa_type == RTN_BROADCAST 2410 if (fa->fa_type == RTN_BROADCAST
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 90dca711ac9f..175e093ec564 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1108,12 +1108,9 @@ void __init icmp_init(struct net_proto_family *ops)
1108 struct inet_sock *inet; 1108 struct inet_sock *inet;
1109 int i; 1109 int i;
1110 1110
1111 for (i = 0; i < NR_CPUS; i++) { 1111 for_each_cpu(i) {
1112 int err; 1112 int err;
1113 1113
1114 if (!cpu_possible(i))
1115 continue;
1116
1117 err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP, 1114 err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP,
1118 &per_cpu(__icmp_socket, i)); 1115 &per_cpu(__icmp_socket, i));
1119 1116
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 1ad5202e556b..87e350069abb 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1023,10 +1023,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
1023 int alloclen; 1023 int alloclen;
1024 1024
1025 skb_prev = skb; 1025 skb_prev = skb;
1026 if (skb_prev) 1026 fraggap = skb_prev->len - maxfraglen;
1027 fraggap = skb_prev->len - maxfraglen;
1028 else
1029 fraggap = 0;
1030 1027
1031 alloclen = fragheaderlen + hh_len + fraggap + 15; 1028 alloclen = fragheaderlen + hh_len + fraggap + 15;
1032 skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation); 1029 skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 07a80b56e8dc..422ab68ee7fb 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -50,7 +50,7 @@
50#include <linux/netfilter_ipv4/ip_conntrack_core.h> 50#include <linux/netfilter_ipv4/ip_conntrack_core.h>
51#include <linux/netfilter_ipv4/listhelp.h> 51#include <linux/netfilter_ipv4/listhelp.h>
52 52
53#define IP_CONNTRACK_VERSION "2.3" 53#define IP_CONNTRACK_VERSION "2.4"
54 54
55#if 0 55#if 0
56#define DEBUGP printk 56#define DEBUGP printk
@@ -148,16 +148,20 @@ DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
148static int ip_conntrack_hash_rnd_initted; 148static int ip_conntrack_hash_rnd_initted;
149static unsigned int ip_conntrack_hash_rnd; 149static unsigned int ip_conntrack_hash_rnd;
150 150
151static u_int32_t 151static u_int32_t __hash_conntrack(const struct ip_conntrack_tuple *tuple,
152hash_conntrack(const struct ip_conntrack_tuple *tuple) 152 unsigned int size, unsigned int rnd)
153{ 153{
154#if 0
155 dump_tuple(tuple);
156#endif
157 return (jhash_3words(tuple->src.ip, 154 return (jhash_3words(tuple->src.ip,
158 (tuple->dst.ip ^ tuple->dst.protonum), 155 (tuple->dst.ip ^ tuple->dst.protonum),
159 (tuple->src.u.all | (tuple->dst.u.all << 16)), 156 (tuple->src.u.all | (tuple->dst.u.all << 16)),
160 ip_conntrack_hash_rnd) % ip_conntrack_htable_size); 157 rnd) % size);
158}
159
160static u_int32_t
161hash_conntrack(const struct ip_conntrack_tuple *tuple)
162{
163 return __hash_conntrack(tuple, ip_conntrack_htable_size,
164 ip_conntrack_hash_rnd);
161} 165}
162 166
163int 167int
@@ -1341,14 +1345,13 @@ static int kill_all(struct ip_conntrack *i, void *data)
1341 return 1; 1345 return 1;
1342} 1346}
1343 1347
1344static void free_conntrack_hash(void) 1348static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
1345{ 1349{
1346 if (ip_conntrack_vmalloc) 1350 if (vmalloced)
1347 vfree(ip_conntrack_hash); 1351 vfree(hash);
1348 else 1352 else
1349 free_pages((unsigned long)ip_conntrack_hash, 1353 free_pages((unsigned long)hash,
1350 get_order(sizeof(struct list_head) 1354 get_order(sizeof(struct list_head) * size));
1351 * ip_conntrack_htable_size));
1352} 1355}
1353 1356
1354void ip_conntrack_flush() 1357void ip_conntrack_flush()
@@ -1378,12 +1381,83 @@ void ip_conntrack_cleanup(void)
1378 ip_conntrack_flush(); 1381 ip_conntrack_flush();
1379 kmem_cache_destroy(ip_conntrack_cachep); 1382 kmem_cache_destroy(ip_conntrack_cachep);
1380 kmem_cache_destroy(ip_conntrack_expect_cachep); 1383 kmem_cache_destroy(ip_conntrack_expect_cachep);
1381 free_conntrack_hash(); 1384 free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
1385 ip_conntrack_htable_size);
1382 nf_unregister_sockopt(&so_getorigdst); 1386 nf_unregister_sockopt(&so_getorigdst);
1383} 1387}
1384 1388
1385static int hashsize; 1389static struct list_head *alloc_hashtable(int size, int *vmalloced)
1386module_param(hashsize, int, 0400); 1390{
1391 struct list_head *hash;
1392 unsigned int i;
1393
1394 *vmalloced = 0;
1395 hash = (void*)__get_free_pages(GFP_KERNEL,
1396 get_order(sizeof(struct list_head)
1397 * size));
1398 if (!hash) {
1399 *vmalloced = 1;
1400 printk(KERN_WARNING"ip_conntrack: falling back to vmalloc.\n");
1401 hash = vmalloc(sizeof(struct list_head) * size);
1402 }
1403
1404 if (hash)
1405 for (i = 0; i < size; i++)
1406 INIT_LIST_HEAD(&hash[i]);
1407
1408 return hash;
1409}
1410
1411int set_hashsize(const char *val, struct kernel_param *kp)
1412{
1413 int i, bucket, hashsize, vmalloced;
1414 int old_vmalloced, old_size;
1415 int rnd;
1416 struct list_head *hash, *old_hash;
1417 struct ip_conntrack_tuple_hash *h;
1418
1419 /* On boot, we can set this without any fancy locking. */
1420 if (!ip_conntrack_htable_size)
1421 return param_set_int(val, kp);
1422
1423 hashsize = simple_strtol(val, NULL, 0);
1424 if (!hashsize)
1425 return -EINVAL;
1426
1427 hash = alloc_hashtable(hashsize, &vmalloced);
1428 if (!hash)
1429 return -ENOMEM;
1430
1431 /* We have to rehash for the new table anyway, so we also can
1432 * use a new random seed */
1433 get_random_bytes(&rnd, 4);
1434
1435 write_lock_bh(&ip_conntrack_lock);
1436 for (i = 0; i < ip_conntrack_htable_size; i++) {
1437 while (!list_empty(&ip_conntrack_hash[i])) {
1438 h = list_entry(ip_conntrack_hash[i].next,
1439 struct ip_conntrack_tuple_hash, list);
1440 list_del(&h->list);
1441 bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
1442 list_add_tail(&h->list, &hash[bucket]);
1443 }
1444 }
1445 old_size = ip_conntrack_htable_size;
1446 old_vmalloced = ip_conntrack_vmalloc;
1447 old_hash = ip_conntrack_hash;
1448
1449 ip_conntrack_htable_size = hashsize;
1450 ip_conntrack_vmalloc = vmalloced;
1451 ip_conntrack_hash = hash;
1452 ip_conntrack_hash_rnd = rnd;
1453 write_unlock_bh(&ip_conntrack_lock);
1454
1455 free_conntrack_hash(old_hash, old_vmalloced, old_size);
1456 return 0;
1457}
1458
1459module_param_call(hashsize, set_hashsize, param_get_uint,
1460 &ip_conntrack_htable_size, 0600);
1387 1461
1388int __init ip_conntrack_init(void) 1462int __init ip_conntrack_init(void)
1389{ 1463{
@@ -1392,9 +1466,7 @@ int __init ip_conntrack_init(void)
1392 1466
1393 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB 1467 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
1394 * machine has 256 buckets. >= 1GB machines have 8192 buckets. */ 1468 * machine has 256 buckets. >= 1GB machines have 8192 buckets. */
1395 if (hashsize) { 1469 if (!ip_conntrack_htable_size) {
1396 ip_conntrack_htable_size = hashsize;
1397 } else {
1398 ip_conntrack_htable_size 1470 ip_conntrack_htable_size
1399 = (((num_physpages << PAGE_SHIFT) / 16384) 1471 = (((num_physpages << PAGE_SHIFT) / 16384)
1400 / sizeof(struct list_head)); 1472 / sizeof(struct list_head));
@@ -1416,20 +1488,8 @@ int __init ip_conntrack_init(void)
1416 return ret; 1488 return ret;
1417 } 1489 }
1418 1490
1419 /* AK: the hash table is twice as big than needed because it 1491 ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size,
1420 uses list_head. it would be much nicer to caches to use a 1492 &ip_conntrack_vmalloc);
1421 single pointer list head here. */
1422 ip_conntrack_vmalloc = 0;
1423 ip_conntrack_hash
1424 =(void*)__get_free_pages(GFP_KERNEL,
1425 get_order(sizeof(struct list_head)
1426 *ip_conntrack_htable_size));
1427 if (!ip_conntrack_hash) {
1428 ip_conntrack_vmalloc = 1;
1429 printk(KERN_WARNING "ip_conntrack: falling back to vmalloc.\n");
1430 ip_conntrack_hash = vmalloc(sizeof(struct list_head)
1431 * ip_conntrack_htable_size);
1432 }
1433 if (!ip_conntrack_hash) { 1493 if (!ip_conntrack_hash) {
1434 printk(KERN_ERR "Unable to create ip_conntrack_hash\n"); 1494 printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
1435 goto err_unreg_sockopt; 1495 goto err_unreg_sockopt;
@@ -1461,9 +1521,6 @@ int __init ip_conntrack_init(void)
1461 ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp; 1521 ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
1462 write_unlock_bh(&ip_conntrack_lock); 1522 write_unlock_bh(&ip_conntrack_lock);
1463 1523
1464 for (i = 0; i < ip_conntrack_htable_size; i++)
1465 INIT_LIST_HEAD(&ip_conntrack_hash[i]);
1466
1467 /* For use by ipt_REJECT */ 1524 /* For use by ipt_REJECT */
1468 ip_ct_attach = ip_conntrack_attach; 1525 ip_ct_attach = ip_conntrack_attach;
1469 1526
@@ -1478,7 +1535,8 @@ int __init ip_conntrack_init(void)
1478err_free_conntrack_slab: 1535err_free_conntrack_slab:
1479 kmem_cache_destroy(ip_conntrack_cachep); 1536 kmem_cache_destroy(ip_conntrack_cachep);
1480err_free_hash: 1537err_free_hash:
1481 free_conntrack_hash(); 1538 free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
1539 ip_conntrack_htable_size);
1482err_unreg_sockopt: 1540err_unreg_sockopt:
1483 nf_unregister_sockopt(&so_getorigdst); 1541 nf_unregister_sockopt(&so_getorigdst);
1484 1542
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index f7943ba1f43c..a65e508fbd40 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -90,9 +90,7 @@ fold_field(void *mib[], int offt)
90 unsigned long res = 0; 90 unsigned long res = 0;
91 int i; 91 int i;
92 92
93 for (i = 0; i < NR_CPUS; i++) { 93 for_each_cpu(i) {
94 if (!cpu_possible(i))
95 continue;
96 res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt); 94 res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt);
97 res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt); 95 res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt);
98 } 96 }
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index b7185fb3377c..23e540365a14 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -700,10 +700,7 @@ int __init icmpv6_init(struct net_proto_family *ops)
700 struct sock *sk; 700 struct sock *sk;
701 int err, i, j; 701 int err, i, j;
702 702
703 for (i = 0; i < NR_CPUS; i++) { 703 for_each_cpu(i) {
704 if (!cpu_possible(i))
705 continue;
706
707 err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, 704 err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6,
708 &per_cpu(__icmpv6_socket, i)); 705 &per_cpu(__icmpv6_socket, i));
709 if (err < 0) { 706 if (err < 0) {
@@ -749,9 +746,7 @@ void icmpv6_cleanup(void)
749{ 746{
750 int i; 747 int i;
751 748
752 for (i = 0; i < NR_CPUS; i++) { 749 for_each_cpu(i) {
753 if (!cpu_possible(i))
754 continue;
755 sock_release(per_cpu(__icmpv6_socket, i)); 750 sock_release(per_cpu(__icmpv6_socket, i));
756 } 751 }
757 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); 752 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 334a5967831e..50a13e75d70e 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -140,9 +140,7 @@ fold_field(void *mib[], int offt)
140 unsigned long res = 0; 140 unsigned long res = 0;
141 int i; 141 int i;
142 142
143 for (i = 0; i < NR_CPUS; i++) { 143 for_each_cpu(i) {
144 if (!cpu_possible(i))
145 continue;
146 res += *(((unsigned long *)per_cpu_ptr(mib[0], i)) + offt); 144 res += *(((unsigned long *)per_cpu_ptr(mib[0], i)) + offt);
147 res += *(((unsigned long *)per_cpu_ptr(mib[1], i)) + offt); 145 res += *(((unsigned long *)per_cpu_ptr(mib[1], i)) + offt);
148 } 146 }
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 678c3f2c0d0b..5ca283537bc6 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -740,11 +740,8 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long t
740 740
741int netlink_sendskb(struct sock *sk, struct sk_buff *skb, int protocol) 741int netlink_sendskb(struct sock *sk, struct sk_buff *skb, int protocol)
742{ 742{
743 struct netlink_sock *nlk;
744 int len = skb->len; 743 int len = skb->len;
745 744
746 nlk = nlk_sk(sk);
747
748 skb_queue_tail(&sk->sk_receive_queue, skb); 745 skb_queue_tail(&sk->sk_receive_queue, skb);
749 sk->sk_data_ready(sk, len); 746 sk->sk_data_ready(sk, len);
750 sock_put(sk); 747 sock_put(sk);
@@ -827,7 +824,7 @@ struct netlink_broadcast_data {
827 int failure; 824 int failure;
828 int congested; 825 int congested;
829 int delivered; 826 int delivered;
830 unsigned int allocation; 827 gfp_t allocation;
831 struct sk_buff *skb, *skb2; 828 struct sk_buff *skb, *skb2;
832}; 829};
833 830
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index e556d92c0bc4..b18fe5043019 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -727,7 +727,7 @@ int rose_rt_ioctl(unsigned int cmd, void __user *arg)
727 } 727 }
728 if (rose_route.mask > 10) /* Mask can't be more than 10 digits */ 728 if (rose_route.mask > 10) /* Mask can't be more than 10 digits */
729 return -EINVAL; 729 return -EINVAL;
730 if (rose_route.ndigis > 8) /* No more than 8 digipeats */ 730 if (rose_route.ndigis > AX25_MAX_DIGIS)
731 return -EINVAL; 731 return -EINVAL;
732 err = rose_add_node(&rose_route, dev); 732 err = rose_add_node(&rose_route, dev);
733 dev_put(dev); 733 dev_put(dev);
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index b74f7772b576..6e4dc28874d7 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -69,9 +69,7 @@ fold_field(void *mib[], int nr)
69 unsigned long res = 0; 69 unsigned long res = 0;
70 int i; 70 int i;
71 71
72 for (i = 0; i < NR_CPUS; i++) { 72 for_each_cpu(i) {
73 if (!cpu_possible(i))
74 continue;
75 res += 73 res +=
76 *((unsigned long *) (((void *) per_cpu_ptr(mib[0], i)) + 74 *((unsigned long *) (((void *) per_cpu_ptr(mib[0], i)) +
77 sizeof (unsigned long) * nr)); 75 sizeof (unsigned long) * nr));
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index 46a2ce00a29b..cdcab9ca4c60 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -6,7 +6,7 @@
6obj-$(CONFIG_SUNRPC) += sunrpc.o 6obj-$(CONFIG_SUNRPC) += sunrpc.o
7obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ 7obj-$(CONFIG_SUNRPC_GSS) += auth_gss/
8 8
9sunrpc-y := clnt.o xprt.o sched.o \ 9sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
10 auth.o auth_null.o auth_unix.o \ 10 auth.o auth_null.o auth_unix.o \
11 svc.o svcsock.o svcauth.o svcauth_unix.o \ 11 svc.o svcsock.o svcauth.o svcauth_unix.o \
12 pmap_clnt.o timer.o xdr.o \ 12 pmap_clnt.o timer.o xdr.o \
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 505e2d4b3d62..a415d99c394d 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -11,7 +11,6 @@
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/errno.h> 13#include <linux/errno.h>
14#include <linux/socket.h>
15#include <linux/sunrpc/clnt.h> 14#include <linux/sunrpc/clnt.h>
16#include <linux/spinlock.h> 15#include <linux/spinlock.h>
17 16
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index fe1b874084bc..f3431a7e33da 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -10,7 +10,7 @@ auth_rpcgss-objs := auth_gss.o gss_generic_token.o \
10obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o 10obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
11 11
12rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \ 12rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
13 gss_krb5_seqnum.o 13 gss_krb5_seqnum.o gss_krb5_wrap.o
14 14
15obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o 15obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o
16 16
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 2f7b867161d2..f44f46f1d8e0 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -42,9 +42,8 @@
42#include <linux/init.h> 42#include <linux/init.h>
43#include <linux/types.h> 43#include <linux/types.h>
44#include <linux/slab.h> 44#include <linux/slab.h>
45#include <linux/socket.h>
46#include <linux/in.h>
47#include <linux/sched.h> 45#include <linux/sched.h>
46#include <linux/pagemap.h>
48#include <linux/sunrpc/clnt.h> 47#include <linux/sunrpc/clnt.h>
49#include <linux/sunrpc/auth.h> 48#include <linux/sunrpc/auth.h>
50#include <linux/sunrpc/auth_gss.h> 49#include <linux/sunrpc/auth_gss.h>
@@ -846,10 +845,8 @@ gss_marshal(struct rpc_task *task, u32 *p)
846 845
847 /* We compute the checksum for the verifier over the xdr-encoded bytes 846 /* We compute the checksum for the verifier over the xdr-encoded bytes
848 * starting with the xid and ending at the end of the credential: */ 847 * starting with the xid and ending at the end of the credential: */
849 iov.iov_base = req->rq_snd_buf.head[0].iov_base; 848 iov.iov_base = xprt_skip_transport_header(task->tk_xprt,
850 if (task->tk_client->cl_xprt->stream) 849 req->rq_snd_buf.head[0].iov_base);
851 /* See clnt.c:call_header() */
852 iov.iov_base += 4;
853 iov.iov_len = (u8 *)p - (u8 *)iov.iov_base; 850 iov.iov_len = (u8 *)p - (u8 *)iov.iov_base;
854 xdr_buf_from_iov(&iov, &verf_buf); 851 xdr_buf_from_iov(&iov, &verf_buf);
855 852
@@ -857,9 +854,7 @@ gss_marshal(struct rpc_task *task, u32 *p)
857 *p++ = htonl(RPC_AUTH_GSS); 854 *p++ = htonl(RPC_AUTH_GSS);
858 855
859 mic.data = (u8 *)(p + 1); 856 mic.data = (u8 *)(p + 1);
860 maj_stat = gss_get_mic(ctx->gc_gss_ctx, 857 maj_stat = gss_get_mic(ctx->gc_gss_ctx, &verf_buf, &mic);
861 GSS_C_QOP_DEFAULT,
862 &verf_buf, &mic);
863 if (maj_stat == GSS_S_CONTEXT_EXPIRED) { 858 if (maj_stat == GSS_S_CONTEXT_EXPIRED) {
864 cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; 859 cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
865 } else if (maj_stat != 0) { 860 } else if (maj_stat != 0) {
@@ -890,10 +885,8 @@ static u32 *
890gss_validate(struct rpc_task *task, u32 *p) 885gss_validate(struct rpc_task *task, u32 *p)
891{ 886{
892 struct rpc_cred *cred = task->tk_msg.rpc_cred; 887 struct rpc_cred *cred = task->tk_msg.rpc_cred;
893 struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
894 gc_base);
895 struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); 888 struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
896 u32 seq, qop_state; 889 u32 seq;
897 struct kvec iov; 890 struct kvec iov;
898 struct xdr_buf verf_buf; 891 struct xdr_buf verf_buf;
899 struct xdr_netobj mic; 892 struct xdr_netobj mic;
@@ -914,23 +907,14 @@ gss_validate(struct rpc_task *task, u32 *p)
914 mic.data = (u8 *)p; 907 mic.data = (u8 *)p;
915 mic.len = len; 908 mic.len = len;
916 909
917 maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic, &qop_state); 910 maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic);
918 if (maj_stat == GSS_S_CONTEXT_EXPIRED) 911 if (maj_stat == GSS_S_CONTEXT_EXPIRED)
919 cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; 912 cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
920 if (maj_stat) 913 if (maj_stat)
921 goto out_bad; 914 goto out_bad;
922 switch (gss_cred->gc_service) { 915 /* We leave it to unwrap to calculate au_rslack. For now we just
923 case RPC_GSS_SVC_NONE: 916 * calculate the length of the verifier: */
924 /* verifier data, flavor, length: */ 917 task->tk_auth->au_verfsize = XDR_QUADLEN(len) + 2;
925 task->tk_auth->au_rslack = XDR_QUADLEN(len) + 2;
926 break;
927 case RPC_GSS_SVC_INTEGRITY:
928 /* verifier data, flavor, length, length, sequence number: */
929 task->tk_auth->au_rslack = XDR_QUADLEN(len) + 4;
930 break;
931 case RPC_GSS_SVC_PRIVACY:
932 goto out_bad;
933 }
934 gss_put_ctx(ctx); 918 gss_put_ctx(ctx);
935 dprintk("RPC: %4u GSS gss_validate: gss_verify_mic succeeded.\n", 919 dprintk("RPC: %4u GSS gss_validate: gss_verify_mic succeeded.\n",
936 task->tk_pid); 920 task->tk_pid);
@@ -975,8 +959,7 @@ gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
975 p = iov->iov_base + iov->iov_len; 959 p = iov->iov_base + iov->iov_len;
976 mic.data = (u8 *)(p + 1); 960 mic.data = (u8 *)(p + 1);
977 961
978 maj_stat = gss_get_mic(ctx->gc_gss_ctx, 962 maj_stat = gss_get_mic(ctx->gc_gss_ctx, &integ_buf, &mic);
979 GSS_C_QOP_DEFAULT, &integ_buf, &mic);
980 status = -EIO; /* XXX? */ 963 status = -EIO; /* XXX? */
981 if (maj_stat == GSS_S_CONTEXT_EXPIRED) 964 if (maj_stat == GSS_S_CONTEXT_EXPIRED)
982 cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; 965 cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
@@ -990,6 +973,113 @@ gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
990 return 0; 973 return 0;
991} 974}
992 975
976static void
977priv_release_snd_buf(struct rpc_rqst *rqstp)
978{
979 int i;
980
981 for (i=0; i < rqstp->rq_enc_pages_num; i++)
982 __free_page(rqstp->rq_enc_pages[i]);
983 kfree(rqstp->rq_enc_pages);
984}
985
986static int
987alloc_enc_pages(struct rpc_rqst *rqstp)
988{
989 struct xdr_buf *snd_buf = &rqstp->rq_snd_buf;
990 int first, last, i;
991
992 if (snd_buf->page_len == 0) {
993 rqstp->rq_enc_pages_num = 0;
994 return 0;
995 }
996
997 first = snd_buf->page_base >> PAGE_CACHE_SHIFT;
998 last = (snd_buf->page_base + snd_buf->page_len - 1) >> PAGE_CACHE_SHIFT;
999 rqstp->rq_enc_pages_num = last - first + 1 + 1;
1000 rqstp->rq_enc_pages
1001 = kmalloc(rqstp->rq_enc_pages_num * sizeof(struct page *),
1002 GFP_NOFS);
1003 if (!rqstp->rq_enc_pages)
1004 goto out;
1005 for (i=0; i < rqstp->rq_enc_pages_num; i++) {
1006 rqstp->rq_enc_pages[i] = alloc_page(GFP_NOFS);
1007 if (rqstp->rq_enc_pages[i] == NULL)
1008 goto out_free;
1009 }
1010 rqstp->rq_release_snd_buf = priv_release_snd_buf;
1011 return 0;
1012out_free:
1013 for (i--; i >= 0; i--) {
1014 __free_page(rqstp->rq_enc_pages[i]);
1015 }
1016out:
1017 return -EAGAIN;
1018}
1019
1020static inline int
1021gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
1022 kxdrproc_t encode, struct rpc_rqst *rqstp, u32 *p, void *obj)
1023{
1024 struct xdr_buf *snd_buf = &rqstp->rq_snd_buf;
1025 u32 offset;
1026 u32 maj_stat;
1027 int status;
1028 u32 *opaque_len;
1029 struct page **inpages;
1030 int first;
1031 int pad;
1032 struct kvec *iov;
1033 char *tmp;
1034
1035 opaque_len = p++;
1036 offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
1037 *p++ = htonl(rqstp->rq_seqno);
1038
1039 status = encode(rqstp, p, obj);
1040 if (status)
1041 return status;
1042
1043 status = alloc_enc_pages(rqstp);
1044 if (status)
1045 return status;
1046 first = snd_buf->page_base >> PAGE_CACHE_SHIFT;
1047 inpages = snd_buf->pages + first;
1048 snd_buf->pages = rqstp->rq_enc_pages;
1049 snd_buf->page_base -= first << PAGE_CACHE_SHIFT;
1050 /* Give the tail its own page, in case we need extra space in the
1051 * head when wrapping: */
1052 if (snd_buf->page_len || snd_buf->tail[0].iov_len) {
1053 tmp = page_address(rqstp->rq_enc_pages[rqstp->rq_enc_pages_num - 1]);
1054 memcpy(tmp, snd_buf->tail[0].iov_base, snd_buf->tail[0].iov_len);
1055 snd_buf->tail[0].iov_base = tmp;
1056 }
1057 maj_stat = gss_wrap(ctx->gc_gss_ctx, offset, snd_buf, inpages);
1058 /* RPC_SLACK_SPACE should prevent this ever happening: */
1059 BUG_ON(snd_buf->len > snd_buf->buflen);
1060 status = -EIO;
1061 /* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was
1062 * done anyway, so it's safe to put the request on the wire: */
1063 if (maj_stat == GSS_S_CONTEXT_EXPIRED)
1064 cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
1065 else if (maj_stat)
1066 return status;
1067
1068 *opaque_len = htonl(snd_buf->len - offset);
1069 /* guess whether we're in the head or the tail: */
1070 if (snd_buf->page_len || snd_buf->tail[0].iov_len)
1071 iov = snd_buf->tail;
1072 else
1073 iov = snd_buf->head;
1074 p = iov->iov_base + iov->iov_len;
1075 pad = 3 - ((snd_buf->len - offset - 1) & 3);
1076 memset(p, 0, pad);
1077 iov->iov_len += pad;
1078 snd_buf->len += pad;
1079
1080 return 0;
1081}
1082
993static int 1083static int
994gss_wrap_req(struct rpc_task *task, 1084gss_wrap_req(struct rpc_task *task,
995 kxdrproc_t encode, void *rqstp, u32 *p, void *obj) 1085 kxdrproc_t encode, void *rqstp, u32 *p, void *obj)
@@ -1017,6 +1107,8 @@ gss_wrap_req(struct rpc_task *task,
1017 rqstp, p, obj); 1107 rqstp, p, obj);
1018 break; 1108 break;
1019 case RPC_GSS_SVC_PRIVACY: 1109 case RPC_GSS_SVC_PRIVACY:
1110 status = gss_wrap_req_priv(cred, ctx, encode,
1111 rqstp, p, obj);
1020 break; 1112 break;
1021 } 1113 }
1022out: 1114out:
@@ -1054,8 +1146,7 @@ gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
1054 if (xdr_buf_read_netobj(rcv_buf, &mic, mic_offset)) 1146 if (xdr_buf_read_netobj(rcv_buf, &mic, mic_offset))
1055 return status; 1147 return status;
1056 1148
1057 maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, 1149 maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, &mic);
1058 &mic, NULL);
1059 if (maj_stat == GSS_S_CONTEXT_EXPIRED) 1150 if (maj_stat == GSS_S_CONTEXT_EXPIRED)
1060 cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; 1151 cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
1061 if (maj_stat != GSS_S_COMPLETE) 1152 if (maj_stat != GSS_S_COMPLETE)
@@ -1063,6 +1154,35 @@ gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
1063 return 0; 1154 return 0;
1064} 1155}
1065 1156
1157static inline int
1158gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
1159 struct rpc_rqst *rqstp, u32 **p)
1160{
1161 struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf;
1162 u32 offset;
1163 u32 opaque_len;
1164 u32 maj_stat;
1165 int status = -EIO;
1166
1167 opaque_len = ntohl(*(*p)++);
1168 offset = (u8 *)(*p) - (u8 *)rcv_buf->head[0].iov_base;
1169 if (offset + opaque_len > rcv_buf->len)
1170 return status;
1171 /* remove padding: */
1172 rcv_buf->len = offset + opaque_len;
1173
1174 maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, rcv_buf);
1175 if (maj_stat == GSS_S_CONTEXT_EXPIRED)
1176 cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
1177 if (maj_stat != GSS_S_COMPLETE)
1178 return status;
1179 if (ntohl(*(*p)++) != rqstp->rq_seqno)
1180 return status;
1181
1182 return 0;
1183}
1184
1185
1066static int 1186static int
1067gss_unwrap_resp(struct rpc_task *task, 1187gss_unwrap_resp(struct rpc_task *task,
1068 kxdrproc_t decode, void *rqstp, u32 *p, void *obj) 1188 kxdrproc_t decode, void *rqstp, u32 *p, void *obj)
@@ -1071,6 +1191,9 @@ gss_unwrap_resp(struct rpc_task *task,
1071 struct gss_cred *gss_cred = container_of(cred, struct gss_cred, 1191 struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
1072 gc_base); 1192 gc_base);
1073 struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); 1193 struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
1194 u32 *savedp = p;
1195 struct kvec *head = ((struct rpc_rqst *)rqstp)->rq_rcv_buf.head;
1196 int savedlen = head->iov_len;
1074 int status = -EIO; 1197 int status = -EIO;
1075 1198
1076 if (ctx->gc_proc != RPC_GSS_PROC_DATA) 1199 if (ctx->gc_proc != RPC_GSS_PROC_DATA)
@@ -1084,8 +1207,14 @@ gss_unwrap_resp(struct rpc_task *task,
1084 goto out; 1207 goto out;
1085 break; 1208 break;
1086 case RPC_GSS_SVC_PRIVACY: 1209 case RPC_GSS_SVC_PRIVACY:
1210 status = gss_unwrap_resp_priv(cred, ctx, rqstp, &p);
1211 if (status)
1212 goto out;
1087 break; 1213 break;
1088 } 1214 }
1215 /* take into account extra slack for integrity and privacy cases: */
1216 task->tk_auth->au_rslack = task->tk_auth->au_verfsize + (p - savedp)
1217 + (savedlen - head->iov_len);
1089out_decode: 1218out_decode:
1090 status = decode(rqstp, p, obj); 1219 status = decode(rqstp, p, obj);
1091out: 1220out:
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index ee6ae74cd1b2..3f3d5437f02d 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -139,17 +139,91 @@ buf_to_sg(struct scatterlist *sg, char *ptr, int len) {
139 sg->length = len; 139 sg->length = len;
140} 140}
141 141
142static int
143process_xdr_buf(struct xdr_buf *buf, int offset, int len,
144 int (*actor)(struct scatterlist *, void *), void *data)
145{
146 int i, page_len, thislen, page_offset, ret = 0;
147 struct scatterlist sg[1];
148
149 if (offset >= buf->head[0].iov_len) {
150 offset -= buf->head[0].iov_len;
151 } else {
152 thislen = buf->head[0].iov_len - offset;
153 if (thislen > len)
154 thislen = len;
155 buf_to_sg(sg, buf->head[0].iov_base + offset, thislen);
156 ret = actor(sg, data);
157 if (ret)
158 goto out;
159 offset = 0;
160 len -= thislen;
161 }
162 if (len == 0)
163 goto out;
164
165 if (offset >= buf->page_len) {
166 offset -= buf->page_len;
167 } else {
168 page_len = buf->page_len - offset;
169 if (page_len > len)
170 page_len = len;
171 len -= page_len;
172 page_offset = (offset + buf->page_base) & (PAGE_CACHE_SIZE - 1);
173 i = (offset + buf->page_base) >> PAGE_CACHE_SHIFT;
174 thislen = PAGE_CACHE_SIZE - page_offset;
175 do {
176 if (thislen > page_len)
177 thislen = page_len;
178 sg->page = buf->pages[i];
179 sg->offset = page_offset;
180 sg->length = thislen;
181 ret = actor(sg, data);
182 if (ret)
183 goto out;
184 page_len -= thislen;
185 i++;
186 page_offset = 0;
187 thislen = PAGE_CACHE_SIZE;
188 } while (page_len != 0);
189 offset = 0;
190 }
191 if (len == 0)
192 goto out;
193
194 if (offset < buf->tail[0].iov_len) {
195 thislen = buf->tail[0].iov_len - offset;
196 if (thislen > len)
197 thislen = len;
198 buf_to_sg(sg, buf->tail[0].iov_base + offset, thislen);
199 ret = actor(sg, data);
200 len -= thislen;
201 }
202 if (len != 0)
203 ret = -EINVAL;
204out:
205 return ret;
206}
207
208static int
209checksummer(struct scatterlist *sg, void *data)
210{
211 struct crypto_tfm *tfm = (struct crypto_tfm *)data;
212
213 crypto_digest_update(tfm, sg, 1);
214
215 return 0;
216}
217
142/* checksum the plaintext data and hdrlen bytes of the token header */ 218/* checksum the plaintext data and hdrlen bytes of the token header */
143s32 219s32
144make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, 220make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body,
145 struct xdr_netobj *cksum) 221 int body_offset, struct xdr_netobj *cksum)
146{ 222{
147 char *cksumname; 223 char *cksumname;
148 struct crypto_tfm *tfm = NULL; /* XXX add to ctx? */ 224 struct crypto_tfm *tfm = NULL; /* XXX add to ctx? */
149 struct scatterlist sg[1]; 225 struct scatterlist sg[1];
150 u32 code = GSS_S_FAILURE; 226 u32 code = GSS_S_FAILURE;
151 int len, thislen, offset;
152 int i;
153 227
154 switch (cksumtype) { 228 switch (cksumtype) {
155 case CKSUMTYPE_RSA_MD5: 229 case CKSUMTYPE_RSA_MD5:
@@ -169,33 +243,8 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body,
169 crypto_digest_init(tfm); 243 crypto_digest_init(tfm);
170 buf_to_sg(sg, header, hdrlen); 244 buf_to_sg(sg, header, hdrlen);
171 crypto_digest_update(tfm, sg, 1); 245 crypto_digest_update(tfm, sg, 1);
172 if (body->head[0].iov_len) { 246 process_xdr_buf(body, body_offset, body->len - body_offset,
173 buf_to_sg(sg, body->head[0].iov_base, body->head[0].iov_len); 247 checksummer, tfm);
174 crypto_digest_update(tfm, sg, 1);
175 }
176
177 len = body->page_len;
178 if (len != 0) {
179 offset = body->page_base & (PAGE_CACHE_SIZE - 1);
180 i = body->page_base >> PAGE_CACHE_SHIFT;
181 thislen = PAGE_CACHE_SIZE - offset;
182 do {
183 if (thislen > len)
184 thislen = len;
185 sg->page = body->pages[i];
186 sg->offset = offset;
187 sg->length = thislen;
188 crypto_digest_update(tfm, sg, 1);
189 len -= thislen;
190 i++;
191 offset = 0;
192 thislen = PAGE_CACHE_SIZE;
193 } while(len != 0);
194 }
195 if (body->tail[0].iov_len) {
196 buf_to_sg(sg, body->tail[0].iov_base, body->tail[0].iov_len);
197 crypto_digest_update(tfm, sg, 1);
198 }
199 crypto_digest_final(tfm, cksum->data); 248 crypto_digest_final(tfm, cksum->data);
200 code = 0; 249 code = 0;
201out: 250out:
@@ -204,3 +253,154 @@ out:
204} 253}
205 254
206EXPORT_SYMBOL(make_checksum); 255EXPORT_SYMBOL(make_checksum);
256
257struct encryptor_desc {
258 u8 iv[8]; /* XXX hard-coded blocksize */
259 struct crypto_tfm *tfm;
260 int pos;
261 struct xdr_buf *outbuf;
262 struct page **pages;
263 struct scatterlist infrags[4];
264 struct scatterlist outfrags[4];
265 int fragno;
266 int fraglen;
267};
268
269static int
270encryptor(struct scatterlist *sg, void *data)
271{
272 struct encryptor_desc *desc = data;
273 struct xdr_buf *outbuf = desc->outbuf;
274 struct page *in_page;
275 int thislen = desc->fraglen + sg->length;
276 int fraglen, ret;
277 int page_pos;
278
279 /* Worst case is 4 fragments: head, end of page 1, start
280 * of page 2, tail. Anything more is a bug. */
281 BUG_ON(desc->fragno > 3);
282 desc->infrags[desc->fragno] = *sg;
283 desc->outfrags[desc->fragno] = *sg;
284
285 page_pos = desc->pos - outbuf->head[0].iov_len;
286 if (page_pos >= 0 && page_pos < outbuf->page_len) {
287 /* pages are not in place: */
288 int i = (page_pos + outbuf->page_base) >> PAGE_CACHE_SHIFT;
289 in_page = desc->pages[i];
290 } else {
291 in_page = sg->page;
292 }
293 desc->infrags[desc->fragno].page = in_page;
294 desc->fragno++;
295 desc->fraglen += sg->length;
296 desc->pos += sg->length;
297
298 fraglen = thislen & 7; /* XXX hardcoded blocksize */
299 thislen -= fraglen;
300
301 if (thislen == 0)
302 return 0;
303
304 ret = crypto_cipher_encrypt_iv(desc->tfm, desc->outfrags, desc->infrags,
305 thislen, desc->iv);
306 if (ret)
307 return ret;
308 if (fraglen) {
309 desc->outfrags[0].page = sg->page;
310 desc->outfrags[0].offset = sg->offset + sg->length - fraglen;
311 desc->outfrags[0].length = fraglen;
312 desc->infrags[0] = desc->outfrags[0];
313 desc->infrags[0].page = in_page;
314 desc->fragno = 1;
315 desc->fraglen = fraglen;
316 } else {
317 desc->fragno = 0;
318 desc->fraglen = 0;
319 }
320 return 0;
321}
322
323int
324gss_encrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset,
325 struct page **pages)
326{
327 int ret;
328 struct encryptor_desc desc;
329
330 BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0);
331
332 memset(desc.iv, 0, sizeof(desc.iv));
333 desc.tfm = tfm;
334 desc.pos = offset;
335 desc.outbuf = buf;
336 desc.pages = pages;
337 desc.fragno = 0;
338 desc.fraglen = 0;
339
340 ret = process_xdr_buf(buf, offset, buf->len - offset, encryptor, &desc);
341 return ret;
342}
343
344EXPORT_SYMBOL(gss_encrypt_xdr_buf);
345
346struct decryptor_desc {
347 u8 iv[8]; /* XXX hard-coded blocksize */
348 struct crypto_tfm *tfm;
349 struct scatterlist frags[4];
350 int fragno;
351 int fraglen;
352};
353
354static int
355decryptor(struct scatterlist *sg, void *data)
356{
357 struct decryptor_desc *desc = data;
358 int thislen = desc->fraglen + sg->length;
359 int fraglen, ret;
360
361 /* Worst case is 4 fragments: head, end of page 1, start
362 * of page 2, tail. Anything more is a bug. */
363 BUG_ON(desc->fragno > 3);
364 desc->frags[desc->fragno] = *sg;
365 desc->fragno++;
366 desc->fraglen += sg->length;
367
368 fraglen = thislen & 7; /* XXX hardcoded blocksize */
369 thislen -= fraglen;
370
371 if (thislen == 0)
372 return 0;
373
374 ret = crypto_cipher_decrypt_iv(desc->tfm, desc->frags, desc->frags,
375 thislen, desc->iv);
376 if (ret)
377 return ret;
378 if (fraglen) {
379 desc->frags[0].page = sg->page;
380 desc->frags[0].offset = sg->offset + sg->length - fraglen;
381 desc->frags[0].length = fraglen;
382 desc->fragno = 1;
383 desc->fraglen = fraglen;
384 } else {
385 desc->fragno = 0;
386 desc->fraglen = 0;
387 }
388 return 0;
389}
390
391int
392gss_decrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset)
393{
394 struct decryptor_desc desc;
395
396 /* XXXJBF: */
397 BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0);
398
399 memset(desc.iv, 0, sizeof(desc.iv));
400 desc.tfm = tfm;
401 desc.fragno = 0;
402 desc.fraglen = 0;
403 return process_xdr_buf(buf, offset, buf->len - offset, decryptor, &desc);
404}
405
406EXPORT_SYMBOL(gss_decrypt_xdr_buf);
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 606a8a82cafb..5f1f806a0b11 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -39,7 +39,6 @@
39#include <linux/types.h> 39#include <linux/types.h>
40#include <linux/slab.h> 40#include <linux/slab.h>
41#include <linux/sunrpc/auth.h> 41#include <linux/sunrpc/auth.h>
42#include <linux/in.h>
43#include <linux/sunrpc/gss_krb5.h> 42#include <linux/sunrpc/gss_krb5.h>
44#include <linux/sunrpc/xdr.h> 43#include <linux/sunrpc/xdr.h>
45#include <linux/crypto.h> 44#include <linux/crypto.h>
@@ -191,43 +190,12 @@ gss_delete_sec_context_kerberos(void *internal_ctx) {
191 kfree(kctx); 190 kfree(kctx);
192} 191}
193 192
194static u32
195gss_verify_mic_kerberos(struct gss_ctx *ctx,
196 struct xdr_buf *message,
197 struct xdr_netobj *mic_token,
198 u32 *qstate) {
199 u32 maj_stat = 0;
200 int qop_state;
201 struct krb5_ctx *kctx = ctx->internal_ctx_id;
202
203 maj_stat = krb5_read_token(kctx, mic_token, message, &qop_state,
204 KG_TOK_MIC_MSG);
205 if (!maj_stat && qop_state)
206 *qstate = qop_state;
207
208 dprintk("RPC: gss_verify_mic_kerberos returning %d\n", maj_stat);
209 return maj_stat;
210}
211
212static u32
213gss_get_mic_kerberos(struct gss_ctx *ctx,
214 u32 qop,
215 struct xdr_buf *message,
216 struct xdr_netobj *mic_token) {
217 u32 err = 0;
218 struct krb5_ctx *kctx = ctx->internal_ctx_id;
219
220 err = krb5_make_token(kctx, qop, message, mic_token, KG_TOK_MIC_MSG);
221
222 dprintk("RPC: gss_get_mic_kerberos returning %d\n",err);
223
224 return err;
225}
226
227static struct gss_api_ops gss_kerberos_ops = { 193static struct gss_api_ops gss_kerberos_ops = {
228 .gss_import_sec_context = gss_import_sec_context_kerberos, 194 .gss_import_sec_context = gss_import_sec_context_kerberos,
229 .gss_get_mic = gss_get_mic_kerberos, 195 .gss_get_mic = gss_get_mic_kerberos,
230 .gss_verify_mic = gss_verify_mic_kerberos, 196 .gss_verify_mic = gss_verify_mic_kerberos,
197 .gss_wrap = gss_wrap_kerberos,
198 .gss_unwrap = gss_unwrap_kerberos,
231 .gss_delete_sec_context = gss_delete_sec_context_kerberos, 199 .gss_delete_sec_context = gss_delete_sec_context_kerberos,
232}; 200};
233 201
@@ -242,6 +210,11 @@ static struct pf_desc gss_kerberos_pfs[] = {
242 .service = RPC_GSS_SVC_INTEGRITY, 210 .service = RPC_GSS_SVC_INTEGRITY,
243 .name = "krb5i", 211 .name = "krb5i",
244 }, 212 },
213 [2] = {
214 .pseudoflavor = RPC_AUTH_GSS_KRB5P,
215 .service = RPC_GSS_SVC_PRIVACY,
216 .name = "krb5p",
217 },
245}; 218};
246 219
247static struct gss_api_mech gss_kerberos_mech = { 220static struct gss_api_mech gss_kerberos_mech = {
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index afeeb8715a77..13f8ae979454 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -70,22 +70,13 @@
70# define RPCDBG_FACILITY RPCDBG_AUTH 70# define RPCDBG_FACILITY RPCDBG_AUTH
71#endif 71#endif
72 72
73static inline int
74gss_krb5_padding(int blocksize, int length) {
75 /* Most of the code is block-size independent but in practice we
76 * use only 8: */
77 BUG_ON(blocksize != 8);
78 return 8 - (length & 7);
79}
80
81u32 73u32
82krb5_make_token(struct krb5_ctx *ctx, int qop_req, 74gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
83 struct xdr_buf *text, struct xdr_netobj *token, 75 struct xdr_netobj *token)
84 int toktype)
85{ 76{
77 struct krb5_ctx *ctx = gss_ctx->internal_ctx_id;
86 s32 checksum_type; 78 s32 checksum_type;
87 struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; 79 struct xdr_netobj md5cksum = {.len = 0, .data = NULL};
88 int blocksize = 0, tmsglen;
89 unsigned char *ptr, *krb5_hdr, *msg_start; 80 unsigned char *ptr, *krb5_hdr, *msg_start;
90 s32 now; 81 s32 now;
91 82
@@ -93,9 +84,6 @@ krb5_make_token(struct krb5_ctx *ctx, int qop_req,
93 84
94 now = get_seconds(); 85 now = get_seconds();
95 86
96 if (qop_req != 0)
97 goto out_err;
98
99 switch (ctx->signalg) { 87 switch (ctx->signalg) {
100 case SGN_ALG_DES_MAC_MD5: 88 case SGN_ALG_DES_MAC_MD5:
101 checksum_type = CKSUMTYPE_RSA_MD5; 89 checksum_type = CKSUMTYPE_RSA_MD5;
@@ -111,21 +99,13 @@ krb5_make_token(struct krb5_ctx *ctx, int qop_req,
111 goto out_err; 99 goto out_err;
112 } 100 }
113 101
114 if (toktype == KG_TOK_WRAP_MSG) { 102 token->len = g_token_size(&ctx->mech_used, 22);
115 blocksize = crypto_tfm_alg_blocksize(ctx->enc);
116 tmsglen = blocksize + text->len
117 + gss_krb5_padding(blocksize, blocksize + text->len);
118 } else {
119 tmsglen = 0;
120 }
121
122 token->len = g_token_size(&ctx->mech_used, 22 + tmsglen);
123 103
124 ptr = token->data; 104 ptr = token->data;
125 g_make_token_header(&ctx->mech_used, 22 + tmsglen, &ptr); 105 g_make_token_header(&ctx->mech_used, 22, &ptr);
126 106
127 *ptr++ = (unsigned char) ((toktype>>8)&0xff); 107 *ptr++ = (unsigned char) ((KG_TOK_MIC_MSG>>8)&0xff);
128 *ptr++ = (unsigned char) (toktype&0xff); 108 *ptr++ = (unsigned char) (KG_TOK_MIC_MSG&0xff);
129 109
130 /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */ 110 /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */
131 krb5_hdr = ptr - 2; 111 krb5_hdr = ptr - 2;
@@ -133,17 +113,9 @@ krb5_make_token(struct krb5_ctx *ctx, int qop_req,
133 113
134 *(u16 *)(krb5_hdr + 2) = htons(ctx->signalg); 114 *(u16 *)(krb5_hdr + 2) = htons(ctx->signalg);
135 memset(krb5_hdr + 4, 0xff, 4); 115 memset(krb5_hdr + 4, 0xff, 4);
136 if (toktype == KG_TOK_WRAP_MSG)
137 *(u16 *)(krb5_hdr + 4) = htons(ctx->sealalg);
138 116
139 if (toktype == KG_TOK_WRAP_MSG) { 117 if (make_checksum(checksum_type, krb5_hdr, 8, text, 0, &md5cksum))
140 /* XXX removing support for now */
141 goto out_err;
142 } else { /* Sign only. */
143 if (make_checksum(checksum_type, krb5_hdr, 8, text,
144 &md5cksum))
145 goto out_err; 118 goto out_err;
146 }
147 119
148 switch (ctx->signalg) { 120 switch (ctx->signalg) {
149 case SGN_ALG_DES_MAC_MD5: 121 case SGN_ALG_DES_MAC_MD5:
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index 8767fc53183d..2030475d98ed 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -68,21 +68,14 @@
68#endif 68#endif
69 69
70 70
71/* message_buffer is an input if toktype is MIC and an output if it is WRAP: 71/* read_token is a mic token, and message_buffer is the data that the mic was
72 * If toktype is MIC: read_token is a mic token, and message_buffer is the 72 * supposedly taken over. */
73 * data that the mic was supposedly taken over.
74 * If toktype is WRAP: read_token is a wrap token, and message_buffer is used
75 * to return the decrypted data.
76 */
77 73
78/* XXX will need to change prototype and/or just split into a separate function
79 * when we add privacy (because read_token will be in pages too). */
80u32 74u32
81krb5_read_token(struct krb5_ctx *ctx, 75gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
82 struct xdr_netobj *read_token, 76 struct xdr_buf *message_buffer, struct xdr_netobj *read_token)
83 struct xdr_buf *message_buffer,
84 int *qop_state, int toktype)
85{ 77{
78 struct krb5_ctx *ctx = gss_ctx->internal_ctx_id;
86 int signalg; 79 int signalg;
87 int sealalg; 80 int sealalg;
88 s32 checksum_type; 81 s32 checksum_type;
@@ -100,16 +93,12 @@ krb5_read_token(struct krb5_ctx *ctx,
100 read_token->len)) 93 read_token->len))
101 goto out; 94 goto out;
102 95
103 if ((*ptr++ != ((toktype>>8)&0xff)) || (*ptr++ != (toktype&0xff))) 96 if ((*ptr++ != ((KG_TOK_MIC_MSG>>8)&0xff)) ||
97 (*ptr++ != ( KG_TOK_MIC_MSG &0xff)) )
104 goto out; 98 goto out;
105 99
106 /* XXX sanity-check bodysize?? */ 100 /* XXX sanity-check bodysize?? */
107 101
108 if (toktype == KG_TOK_WRAP_MSG) {
109 /* XXX gone */
110 goto out;
111 }
112
113 /* get the sign and seal algorithms */ 102 /* get the sign and seal algorithms */
114 103
115 signalg = ptr[0] + (ptr[1] << 8); 104 signalg = ptr[0] + (ptr[1] << 8);
@@ -120,14 +109,7 @@ krb5_read_token(struct krb5_ctx *ctx,
120 if ((ptr[4] != 0xff) || (ptr[5] != 0xff)) 109 if ((ptr[4] != 0xff) || (ptr[5] != 0xff))
121 goto out; 110 goto out;
122 111
123 if (((toktype != KG_TOK_WRAP_MSG) && (sealalg != 0xffff)) || 112 if (sealalg != 0xffff)
124 ((toktype == KG_TOK_WRAP_MSG) && (sealalg == 0xffff)))
125 goto out;
126
127 /* in the current spec, there is only one valid seal algorithm per
128 key type, so a simple comparison is ok */
129
130 if ((toktype == KG_TOK_WRAP_MSG) && !(sealalg == ctx->sealalg))
131 goto out; 113 goto out;
132 114
133 /* there are several mappings of seal algorithms to sign algorithms, 115 /* there are several mappings of seal algorithms to sign algorithms,
@@ -154,7 +136,7 @@ krb5_read_token(struct krb5_ctx *ctx,
154 switch (signalg) { 136 switch (signalg) {
155 case SGN_ALG_DES_MAC_MD5: 137 case SGN_ALG_DES_MAC_MD5:
156 ret = make_checksum(checksum_type, ptr - 2, 8, 138 ret = make_checksum(checksum_type, ptr - 2, 8,
157 message_buffer, &md5cksum); 139 message_buffer, 0, &md5cksum);
158 if (ret) 140 if (ret)
159 goto out; 141 goto out;
160 142
@@ -175,9 +157,6 @@ krb5_read_token(struct krb5_ctx *ctx,
175 157
176 /* it got through unscathed. Make sure the context is unexpired */ 158 /* it got through unscathed. Make sure the context is unexpired */
177 159
178 if (qop_state)
179 *qop_state = GSS_C_QOP_DEFAULT;
180
181 now = get_seconds(); 160 now = get_seconds();
182 161
183 ret = GSS_S_CONTEXT_EXPIRED; 162 ret = GSS_S_CONTEXT_EXPIRED;
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
new file mode 100644
index 000000000000..af777cf9f251
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -0,0 +1,363 @@
1#include <linux/types.h>
2#include <linux/slab.h>
3#include <linux/jiffies.h>
4#include <linux/sunrpc/gss_krb5.h>
5#include <linux/random.h>
6#include <linux/pagemap.h>
7#include <asm/scatterlist.h>
8#include <linux/crypto.h>
9
10#ifdef RPC_DEBUG
11# define RPCDBG_FACILITY RPCDBG_AUTH
12#endif
13
14static inline int
15gss_krb5_padding(int blocksize, int length)
16{
17 /* Most of the code is block-size independent but currently we
18 * use only 8: */
19 BUG_ON(blocksize != 8);
20 return 8 - (length & 7);
21}
22
23static inline void
24gss_krb5_add_padding(struct xdr_buf *buf, int offset, int blocksize)
25{
26 int padding = gss_krb5_padding(blocksize, buf->len - offset);
27 char *p;
28 struct kvec *iov;
29
30 if (buf->page_len || buf->tail[0].iov_len)
31 iov = &buf->tail[0];
32 else
33 iov = &buf->head[0];
34 p = iov->iov_base + iov->iov_len;
35 iov->iov_len += padding;
36 buf->len += padding;
37 memset(p, padding, padding);
38}
39
40static inline int
41gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize)
42{
43 u8 *ptr;
44 u8 pad;
45 int len = buf->len;
46
47 if (len <= buf->head[0].iov_len) {
48 pad = *(u8 *)(buf->head[0].iov_base + len - 1);
49 if (pad > buf->head[0].iov_len)
50 return -EINVAL;
51 buf->head[0].iov_len -= pad;
52 goto out;
53 } else
54 len -= buf->head[0].iov_len;
55 if (len <= buf->page_len) {
56 int last = (buf->page_base + len - 1)
57 >>PAGE_CACHE_SHIFT;
58 int offset = (buf->page_base + len - 1)
59 & (PAGE_CACHE_SIZE - 1);
60 ptr = kmap_atomic(buf->pages[last], KM_SKB_SUNRPC_DATA);
61 pad = *(ptr + offset);
62 kunmap_atomic(ptr, KM_SKB_SUNRPC_DATA);
63 goto out;
64 } else
65 len -= buf->page_len;
66 BUG_ON(len > buf->tail[0].iov_len);
67 pad = *(u8 *)(buf->tail[0].iov_base + len - 1);
68out:
69 /* XXX: NOTE: we do not adjust the page lengths--they represent
70 * a range of data in the real filesystem page cache, and we need
71 * to know that range so the xdr code can properly place read data.
72 * However adjusting the head length, as we do above, is harmless.
73 * In the case of a request that fits into a single page, the server
74 * also uses length and head length together to determine the original
75 * start of the request to copy the request for deferal; so it's
76 * easier on the server if we adjust head and tail length in tandem.
77 * It's not really a problem that we don't fool with the page and
78 * tail lengths, though--at worst badly formed xdr might lead the
79 * server to attempt to parse the padding.
80 * XXX: Document all these weird requirements for gss mechanism
81 * wrap/unwrap functions. */
82 if (pad > blocksize)
83 return -EINVAL;
84 if (buf->len > pad)
85 buf->len -= pad;
86 else
87 return -EINVAL;
88 return 0;
89}
90
91static inline void
92make_confounder(char *p, int blocksize)
93{
94 static u64 i = 0;
95 u64 *q = (u64 *)p;
96
97 /* rfc1964 claims this should be "random". But all that's really
98 * necessary is that it be unique. And not even that is necessary in
99 * our case since our "gssapi" implementation exists only to support
100 * rpcsec_gss, so we know that the only buffers we will ever encrypt
101 * already begin with a unique sequence number. Just to hedge my bets
102 * I'll make a half-hearted attempt at something unique, but ensuring
103 * uniqueness would mean worrying about atomicity and rollover, and I
104 * don't care enough. */
105
106 BUG_ON(blocksize != 8);
107 *q = i++;
108}
109
110/* Assumptions: the head and tail of inbuf are ours to play with.
111 * The pages, however, may be real pages in the page cache and we replace
112 * them with scratch pages from **pages before writing to them. */
113/* XXX: obviously the above should be documentation of wrap interface,
114 * and shouldn't be in this kerberos-specific file. */
115
116/* XXX factor out common code with seal/unseal. */
117
118u32
119gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
120 struct xdr_buf *buf, struct page **pages)
121{
122 struct krb5_ctx *kctx = ctx->internal_ctx_id;
123 s32 checksum_type;
124 struct xdr_netobj md5cksum = {.len = 0, .data = NULL};
125 int blocksize = 0, plainlen;
126 unsigned char *ptr, *krb5_hdr, *msg_start;
127 s32 now;
128 int headlen;
129 struct page **tmp_pages;
130
131 dprintk("RPC: gss_wrap_kerberos\n");
132
133 now = get_seconds();
134
135 switch (kctx->signalg) {
136 case SGN_ALG_DES_MAC_MD5:
137 checksum_type = CKSUMTYPE_RSA_MD5;
138 break;
139 default:
140 dprintk("RPC: gss_krb5_seal: kctx->signalg %d not"
141 " supported\n", kctx->signalg);
142 goto out_err;
143 }
144 if (kctx->sealalg != SEAL_ALG_NONE && kctx->sealalg != SEAL_ALG_DES) {
145 dprintk("RPC: gss_krb5_seal: kctx->sealalg %d not supported\n",
146 kctx->sealalg);
147 goto out_err;
148 }
149
150 blocksize = crypto_tfm_alg_blocksize(kctx->enc);
151 gss_krb5_add_padding(buf, offset, blocksize);
152 BUG_ON((buf->len - offset) % blocksize);
153 plainlen = blocksize + buf->len - offset;
154
155 headlen = g_token_size(&kctx->mech_used, 22 + plainlen) -
156 (buf->len - offset);
157
158 ptr = buf->head[0].iov_base + offset;
159 /* shift data to make room for header. */
160 /* XXX Would be cleverer to encrypt while copying. */
161 /* XXX bounds checking, slack, etc. */
162 memmove(ptr + headlen, ptr, buf->head[0].iov_len - offset);
163 buf->head[0].iov_len += headlen;
164 buf->len += headlen;
165 BUG_ON((buf->len - offset - headlen) % blocksize);
166
167 g_make_token_header(&kctx->mech_used, 22 + plainlen, &ptr);
168
169
170 *ptr++ = (unsigned char) ((KG_TOK_WRAP_MSG>>8)&0xff);
171 *ptr++ = (unsigned char) (KG_TOK_WRAP_MSG&0xff);
172
173 /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */
174 krb5_hdr = ptr - 2;
175 msg_start = krb5_hdr + 24;
176 /* XXXJBF: */ BUG_ON(buf->head[0].iov_base + offset + headlen != msg_start + blocksize);
177
178 *(u16 *)(krb5_hdr + 2) = htons(kctx->signalg);
179 memset(krb5_hdr + 4, 0xff, 4);
180 *(u16 *)(krb5_hdr + 4) = htons(kctx->sealalg);
181
182 make_confounder(msg_start, blocksize);
183
184 /* XXXJBF: UGH!: */
185 tmp_pages = buf->pages;
186 buf->pages = pages;
187 if (make_checksum(checksum_type, krb5_hdr, 8, buf,
188 offset + headlen - blocksize, &md5cksum))
189 goto out_err;
190 buf->pages = tmp_pages;
191
192 switch (kctx->signalg) {
193 case SGN_ALG_DES_MAC_MD5:
194 if (krb5_encrypt(kctx->seq, NULL, md5cksum.data,
195 md5cksum.data, md5cksum.len))
196 goto out_err;
197 memcpy(krb5_hdr + 16,
198 md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH,
199 KRB5_CKSUM_LENGTH);
200
201 dprintk("RPC: make_seal_token: cksum data: \n");
202 print_hexl((u32 *) (krb5_hdr + 16), KRB5_CKSUM_LENGTH, 0);
203 break;
204 default:
205 BUG();
206 }
207
208 kfree(md5cksum.data);
209
210 /* XXX would probably be more efficient to compute checksum
211 * and encrypt at the same time: */
212 if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff,
213 kctx->seq_send, krb5_hdr + 16, krb5_hdr + 8)))
214 goto out_err;
215
216 if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize,
217 pages))
218 goto out_err;
219
220 kctx->seq_send++;
221
222 return ((kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE);
223out_err:
224 if (md5cksum.data) kfree(md5cksum.data);
225 return GSS_S_FAILURE;
226}
227
228u32
229gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
230{
231 struct krb5_ctx *kctx = ctx->internal_ctx_id;
232 int signalg;
233 int sealalg;
234 s32 checksum_type;
235 struct xdr_netobj md5cksum = {.len = 0, .data = NULL};
236 s32 now;
237 int direction;
238 s32 seqnum;
239 unsigned char *ptr;
240 int bodysize;
241 u32 ret = GSS_S_DEFECTIVE_TOKEN;
242 void *data_start, *orig_start;
243 int data_len;
244 int blocksize;
245
246 dprintk("RPC: gss_unwrap_kerberos\n");
247
248 ptr = (u8 *)buf->head[0].iov_base + offset;
249 if (g_verify_token_header(&kctx->mech_used, &bodysize, &ptr,
250 buf->len - offset))
251 goto out;
252
253 if ((*ptr++ != ((KG_TOK_WRAP_MSG>>8)&0xff)) ||
254 (*ptr++ != (KG_TOK_WRAP_MSG &0xff)) )
255 goto out;
256
257 /* XXX sanity-check bodysize?? */
258
259 /* get the sign and seal algorithms */
260
261 signalg = ptr[0] + (ptr[1] << 8);
262 sealalg = ptr[2] + (ptr[3] << 8);
263
264 /* Sanity checks */
265
266 if ((ptr[4] != 0xff) || (ptr[5] != 0xff))
267 goto out;
268
269 if (sealalg == 0xffff)
270 goto out;
271
272 /* in the current spec, there is only one valid seal algorithm per
273 key type, so a simple comparison is ok */
274
275 if (sealalg != kctx->sealalg)
276 goto out;
277
278 /* there are several mappings of seal algorithms to sign algorithms,
279 but few enough that we can try them all. */
280
281 if ((kctx->sealalg == SEAL_ALG_NONE && signalg > 1) ||
282 (kctx->sealalg == SEAL_ALG_1 && signalg != SGN_ALG_3) ||
283 (kctx->sealalg == SEAL_ALG_DES3KD &&
284 signalg != SGN_ALG_HMAC_SHA1_DES3_KD))
285 goto out;
286
287 if (gss_decrypt_xdr_buf(kctx->enc, buf,
288 ptr + 22 - (unsigned char *)buf->head[0].iov_base))
289 goto out;
290
291 /* compute the checksum of the message */
292
293 /* initialize the the cksum */
294 switch (signalg) {
295 case SGN_ALG_DES_MAC_MD5:
296 checksum_type = CKSUMTYPE_RSA_MD5;
297 break;
298 default:
299 ret = GSS_S_DEFECTIVE_TOKEN;
300 goto out;
301 }
302
303 switch (signalg) {
304 case SGN_ALG_DES_MAC_MD5:
305 ret = make_checksum(checksum_type, ptr - 2, 8, buf,
306 ptr + 22 - (unsigned char *)buf->head[0].iov_base, &md5cksum);
307 if (ret)
308 goto out;
309
310 ret = krb5_encrypt(kctx->seq, NULL, md5cksum.data,
311 md5cksum.data, md5cksum.len);
312 if (ret)
313 goto out;
314
315 if (memcmp(md5cksum.data + 8, ptr + 14, 8)) {
316 ret = GSS_S_BAD_SIG;
317 goto out;
318 }
319 break;
320 default:
321 ret = GSS_S_DEFECTIVE_TOKEN;
322 goto out;
323 }
324
325 /* it got through unscathed. Make sure the context is unexpired */
326
327 now = get_seconds();
328
329 ret = GSS_S_CONTEXT_EXPIRED;
330 if (now > kctx->endtime)
331 goto out;
332
333 /* do sequencing checks */
334
335 ret = GSS_S_BAD_SIG;
336 if ((ret = krb5_get_seq_num(kctx->seq, ptr + 14, ptr + 6, &direction,
337 &seqnum)))
338 goto out;
339
340 if ((kctx->initiate && direction != 0xff) ||
341 (!kctx->initiate && direction != 0))
342 goto out;
343
344 /* Copy the data back to the right position. XXX: Would probably be
345 * better to copy and encrypt at the same time. */
346
347 blocksize = crypto_tfm_alg_blocksize(kctx->enc);
348 data_start = ptr + 22 + blocksize;
349 orig_start = buf->head[0].iov_base + offset;
350 data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start;
351 memmove(orig_start, data_start, data_len);
352 buf->head[0].iov_len -= (data_start - orig_start);
353 buf->len -= (data_start - orig_start);
354
355 ret = GSS_S_DEFECTIVE_TOKEN;
356 if (gss_krb5_remove_padding(buf, blocksize))
357 goto out;
358
359 ret = GSS_S_COMPLETE;
360out:
361 if (md5cksum.data) kfree(md5cksum.data);
362 return ret;
363}
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 9dfb68377d69..b048bf672da2 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -35,7 +35,6 @@
35 35
36#include <linux/types.h> 36#include <linux/types.h>
37#include <linux/slab.h> 37#include <linux/slab.h>
38#include <linux/socket.h>
39#include <linux/module.h> 38#include <linux/module.h>
40#include <linux/sunrpc/msg_prot.h> 39#include <linux/sunrpc/msg_prot.h>
41#include <linux/sunrpc/gss_asn1.h> 40#include <linux/sunrpc/gss_asn1.h>
@@ -251,13 +250,11 @@ gss_import_sec_context(const void *input_token, size_t bufsize,
251 250
252u32 251u32
253gss_get_mic(struct gss_ctx *context_handle, 252gss_get_mic(struct gss_ctx *context_handle,
254 u32 qop,
255 struct xdr_buf *message, 253 struct xdr_buf *message,
256 struct xdr_netobj *mic_token) 254 struct xdr_netobj *mic_token)
257{ 255{
258 return context_handle->mech_type->gm_ops 256 return context_handle->mech_type->gm_ops
259 ->gss_get_mic(context_handle, 257 ->gss_get_mic(context_handle,
260 qop,
261 message, 258 message,
262 mic_token); 259 mic_token);
263} 260}
@@ -267,16 +264,34 @@ gss_get_mic(struct gss_ctx *context_handle,
267u32 264u32
268gss_verify_mic(struct gss_ctx *context_handle, 265gss_verify_mic(struct gss_ctx *context_handle,
269 struct xdr_buf *message, 266 struct xdr_buf *message,
270 struct xdr_netobj *mic_token, 267 struct xdr_netobj *mic_token)
271 u32 *qstate)
272{ 268{
273 return context_handle->mech_type->gm_ops 269 return context_handle->mech_type->gm_ops
274 ->gss_verify_mic(context_handle, 270 ->gss_verify_mic(context_handle,
275 message, 271 message,
276 mic_token, 272 mic_token);
277 qstate);
278} 273}
279 274
275u32
276gss_wrap(struct gss_ctx *ctx_id,
277 int offset,
278 struct xdr_buf *buf,
279 struct page **inpages)
280{
281 return ctx_id->mech_type->gm_ops
282 ->gss_wrap(ctx_id, offset, buf, inpages);
283}
284
285u32
286gss_unwrap(struct gss_ctx *ctx_id,
287 int offset,
288 struct xdr_buf *buf)
289{
290 return ctx_id->mech_type->gm_ops
291 ->gss_unwrap(ctx_id, offset, buf);
292}
293
294
280/* gss_delete_sec_context: free all resources associated with context_handle. 295/* gss_delete_sec_context: free all resources associated with context_handle.
281 * Note this differs from the RFC 2744-specified prototype in that we don't 296 * Note this differs from the RFC 2744-specified prototype in that we don't
282 * bother returning an output token, since it would never be used anyway. */ 297 * bother returning an output token, since it would never be used anyway. */
diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c
index 6c97d61baa9b..39b3edc14694 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_mech.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c
@@ -224,18 +224,13 @@ gss_delete_sec_context_spkm3(void *internal_ctx) {
224static u32 224static u32
225gss_verify_mic_spkm3(struct gss_ctx *ctx, 225gss_verify_mic_spkm3(struct gss_ctx *ctx,
226 struct xdr_buf *signbuf, 226 struct xdr_buf *signbuf,
227 struct xdr_netobj *checksum, 227 struct xdr_netobj *checksum)
228 u32 *qstate) { 228{
229 u32 maj_stat = 0; 229 u32 maj_stat = 0;
230 int qop_state = 0;
231 struct spkm3_ctx *sctx = ctx->internal_ctx_id; 230 struct spkm3_ctx *sctx = ctx->internal_ctx_id;
232 231
233 dprintk("RPC: gss_verify_mic_spkm3 calling spkm3_read_token\n"); 232 dprintk("RPC: gss_verify_mic_spkm3 calling spkm3_read_token\n");
234 maj_stat = spkm3_read_token(sctx, checksum, signbuf, &qop_state, 233 maj_stat = spkm3_read_token(sctx, checksum, signbuf, SPKM_MIC_TOK);
235 SPKM_MIC_TOK);
236
237 if (!maj_stat && qop_state)
238 *qstate = qop_state;
239 234
240 dprintk("RPC: gss_verify_mic_spkm3 returning %d\n", maj_stat); 235 dprintk("RPC: gss_verify_mic_spkm3 returning %d\n", maj_stat);
241 return maj_stat; 236 return maj_stat;
@@ -243,15 +238,15 @@ gss_verify_mic_spkm3(struct gss_ctx *ctx,
243 238
244static u32 239static u32
245gss_get_mic_spkm3(struct gss_ctx *ctx, 240gss_get_mic_spkm3(struct gss_ctx *ctx,
246 u32 qop,
247 struct xdr_buf *message_buffer, 241 struct xdr_buf *message_buffer,
248 struct xdr_netobj *message_token) { 242 struct xdr_netobj *message_token)
243{
249 u32 err = 0; 244 u32 err = 0;
250 struct spkm3_ctx *sctx = ctx->internal_ctx_id; 245 struct spkm3_ctx *sctx = ctx->internal_ctx_id;
251 246
252 dprintk("RPC: gss_get_mic_spkm3\n"); 247 dprintk("RPC: gss_get_mic_spkm3\n");
253 248
254 err = spkm3_make_token(sctx, qop, message_buffer, 249 err = spkm3_make_token(sctx, message_buffer,
255 message_token, SPKM_MIC_TOK); 250 message_token, SPKM_MIC_TOK);
256 return err; 251 return err;
257} 252}
@@ -264,8 +259,8 @@ static struct gss_api_ops gss_spkm3_ops = {
264}; 259};
265 260
266static struct pf_desc gss_spkm3_pfs[] = { 261static struct pf_desc gss_spkm3_pfs[] = {
267 {RPC_AUTH_GSS_SPKM, 0, RPC_GSS_SVC_NONE, "spkm3"}, 262 {RPC_AUTH_GSS_SPKM, RPC_GSS_SVC_NONE, "spkm3"},
268 {RPC_AUTH_GSS_SPKMI, 0, RPC_GSS_SVC_INTEGRITY, "spkm3i"}, 263 {RPC_AUTH_GSS_SPKMI, RPC_GSS_SVC_INTEGRITY, "spkm3i"},
269}; 264};
270 265
271static struct gss_api_mech gss_spkm3_mech = { 266static struct gss_api_mech gss_spkm3_mech = {
diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c
index 25339868d462..148201e929d0 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_seal.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_seal.c
@@ -51,7 +51,7 @@
51 */ 51 */
52 52
53u32 53u32
54spkm3_make_token(struct spkm3_ctx *ctx, int qop_req, 54spkm3_make_token(struct spkm3_ctx *ctx,
55 struct xdr_buf * text, struct xdr_netobj * token, 55 struct xdr_buf * text, struct xdr_netobj * token,
56 int toktype) 56 int toktype)
57{ 57{
@@ -68,8 +68,6 @@ spkm3_make_token(struct spkm3_ctx *ctx, int qop_req,
68 dprintk("RPC: spkm3_make_token\n"); 68 dprintk("RPC: spkm3_make_token\n");
69 69
70 now = jiffies; 70 now = jiffies;
71 if (qop_req != 0)
72 goto out_err;
73 71
74 if (ctx->ctx_id.len != 16) { 72 if (ctx->ctx_id.len != 16) {
75 dprintk("RPC: spkm3_make_token BAD ctx_id.len %d\n", 73 dprintk("RPC: spkm3_make_token BAD ctx_id.len %d\n",
diff --git a/net/sunrpc/auth_gss/gss_spkm3_unseal.c b/net/sunrpc/auth_gss/gss_spkm3_unseal.c
index 65ce81bf0bc4..c3c0d9586103 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_unseal.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_unseal.c
@@ -52,7 +52,7 @@ u32
52spkm3_read_token(struct spkm3_ctx *ctx, 52spkm3_read_token(struct spkm3_ctx *ctx,
53 struct xdr_netobj *read_token, /* checksum */ 53 struct xdr_netobj *read_token, /* checksum */
54 struct xdr_buf *message_buffer, /* signbuf */ 54 struct xdr_buf *message_buffer, /* signbuf */
55 int *qop_state, int toktype) 55 int toktype)
56{ 56{
57 s32 code; 57 s32 code;
58 struct xdr_netobj wire_cksum = {.len =0, .data = NULL}; 58 struct xdr_netobj wire_cksum = {.len =0, .data = NULL};
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index e3308195374e..e4ada15ed856 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -566,8 +566,7 @@ gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci,
566 566
567 if (rqstp->rq_deferred) /* skip verification of revisited request */ 567 if (rqstp->rq_deferred) /* skip verification of revisited request */
568 return SVC_OK; 568 return SVC_OK;
569 if (gss_verify_mic(ctx_id, &rpchdr, &checksum, NULL) 569 if (gss_verify_mic(ctx_id, &rpchdr, &checksum) != GSS_S_COMPLETE) {
570 != GSS_S_COMPLETE) {
571 *authp = rpcsec_gsserr_credproblem; 570 *authp = rpcsec_gsserr_credproblem;
572 return SVC_DENIED; 571 return SVC_DENIED;
573 } 572 }
@@ -604,7 +603,7 @@ gss_write_verf(struct svc_rqst *rqstp, struct gss_ctx *ctx_id, u32 seq)
604 xdr_buf_from_iov(&iov, &verf_data); 603 xdr_buf_from_iov(&iov, &verf_data);
605 p = rqstp->rq_res.head->iov_base + rqstp->rq_res.head->iov_len; 604 p = rqstp->rq_res.head->iov_base + rqstp->rq_res.head->iov_len;
606 mic.data = (u8 *)(p + 1); 605 mic.data = (u8 *)(p + 1);
607 maj_stat = gss_get_mic(ctx_id, 0, &verf_data, &mic); 606 maj_stat = gss_get_mic(ctx_id, &verf_data, &mic);
608 if (maj_stat != GSS_S_COMPLETE) 607 if (maj_stat != GSS_S_COMPLETE)
609 return -1; 608 return -1;
610 *p++ = htonl(mic.len); 609 *p++ = htonl(mic.len);
@@ -710,7 +709,7 @@ unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx)
710 goto out; 709 goto out;
711 if (read_bytes_from_xdr_buf(buf, integ_len + 4, mic.data, mic.len)) 710 if (read_bytes_from_xdr_buf(buf, integ_len + 4, mic.data, mic.len))
712 goto out; 711 goto out;
713 maj_stat = gss_verify_mic(ctx, &integ_buf, &mic, NULL); 712 maj_stat = gss_verify_mic(ctx, &integ_buf, &mic);
714 if (maj_stat != GSS_S_COMPLETE) 713 if (maj_stat != GSS_S_COMPLETE)
715 goto out; 714 goto out;
716 if (ntohl(svc_getu32(&buf->head[0])) != seq) 715 if (ntohl(svc_getu32(&buf->head[0])) != seq)
@@ -1012,7 +1011,7 @@ svcauth_gss_release(struct svc_rqst *rqstp)
1012 resv = &resbuf->tail[0]; 1011 resv = &resbuf->tail[0];
1013 } 1012 }
1014 mic.data = (u8 *)resv->iov_base + resv->iov_len + 4; 1013 mic.data = (u8 *)resv->iov_base + resv->iov_len + 4;
1015 if (gss_get_mic(gsd->rsci->mechctx, 0, &integ_buf, &mic)) 1014 if (gss_get_mic(gsd->rsci->mechctx, &integ_buf, &mic))
1016 goto out_err; 1015 goto out_err;
1017 svc_putu32(resv, htonl(mic.len)); 1016 svc_putu32(resv, htonl(mic.len));
1018 memset(mic.data + mic.len, 0, 1017 memset(mic.data + mic.len, 0,
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index 9b72d3abf823..f56767aaa927 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -7,9 +7,7 @@
7 */ 7 */
8 8
9#include <linux/types.h> 9#include <linux/types.h>
10#include <linux/socket.h>
11#include <linux/module.h> 10#include <linux/module.h>
12#include <linux/in.h>
13#include <linux/utsname.h> 11#include <linux/utsname.h>
14#include <linux/sunrpc/clnt.h> 12#include <linux/sunrpc/clnt.h>
15#include <linux/sched.h> 13#include <linux/sched.h>
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 4ff297a9b15b..890fb5ea0dcb 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -9,8 +9,6 @@
9#include <linux/types.h> 9#include <linux/types.h>
10#include <linux/sched.h> 10#include <linux/sched.h>
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/socket.h>
13#include <linux/in.h>
14#include <linux/sunrpc/clnt.h> 12#include <linux/sunrpc/clnt.h>
15#include <linux/sunrpc/auth.h> 13#include <linux/sunrpc/auth.h>
16 14
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index f17e6153b688..702ede309b06 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/net/sunrpc/rpcclnt.c 2 * linux/net/sunrpc/clnt.c
3 * 3 *
4 * This file contains the high-level RPC interface. 4 * This file contains the high-level RPC interface.
5 * It is modeled as a finite state machine to support both synchronous 5 * It is modeled as a finite state machine to support both synchronous
@@ -27,7 +27,6 @@
27#include <linux/types.h> 27#include <linux/types.h>
28#include <linux/mm.h> 28#include <linux/mm.h>
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include <linux/in.h>
31#include <linux/utsname.h> 30#include <linux/utsname.h>
32 31
33#include <linux/sunrpc/clnt.h> 32#include <linux/sunrpc/clnt.h>
@@ -53,6 +52,7 @@ static void call_allocate(struct rpc_task *task);
53static void call_encode(struct rpc_task *task); 52static void call_encode(struct rpc_task *task);
54static void call_decode(struct rpc_task *task); 53static void call_decode(struct rpc_task *task);
55static void call_bind(struct rpc_task *task); 54static void call_bind(struct rpc_task *task);
55static void call_bind_status(struct rpc_task *task);
56static void call_transmit(struct rpc_task *task); 56static void call_transmit(struct rpc_task *task);
57static void call_status(struct rpc_task *task); 57static void call_status(struct rpc_task *task);
58static void call_refresh(struct rpc_task *task); 58static void call_refresh(struct rpc_task *task);
@@ -517,15 +517,8 @@ void
517rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize) 517rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize)
518{ 518{
519 struct rpc_xprt *xprt = clnt->cl_xprt; 519 struct rpc_xprt *xprt = clnt->cl_xprt;
520 520 if (xprt->ops->set_buffer_size)
521 xprt->sndsize = 0; 521 xprt->ops->set_buffer_size(xprt, sndsize, rcvsize);
522 if (sndsize)
523 xprt->sndsize = sndsize + RPC_SLACK_SPACE;
524 xprt->rcvsize = 0;
525 if (rcvsize)
526 xprt->rcvsize = rcvsize + RPC_SLACK_SPACE;
527 if (xprt_connected(xprt))
528 xprt_sock_setbufsize(xprt);
529} 522}
530 523
531/* 524/*
@@ -685,13 +678,11 @@ call_allocate(struct rpc_task *task)
685static void 678static void
686call_encode(struct rpc_task *task) 679call_encode(struct rpc_task *task)
687{ 680{
688 struct rpc_clnt *clnt = task->tk_client;
689 struct rpc_rqst *req = task->tk_rqstp; 681 struct rpc_rqst *req = task->tk_rqstp;
690 struct xdr_buf *sndbuf = &req->rq_snd_buf; 682 struct xdr_buf *sndbuf = &req->rq_snd_buf;
691 struct xdr_buf *rcvbuf = &req->rq_rcv_buf; 683 struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
692 unsigned int bufsiz; 684 unsigned int bufsiz;
693 kxdrproc_t encode; 685 kxdrproc_t encode;
694 int status;
695 u32 *p; 686 u32 *p;
696 687
697 dprintk("RPC: %4d call_encode (status %d)\n", 688 dprintk("RPC: %4d call_encode (status %d)\n",
@@ -719,11 +710,15 @@ call_encode(struct rpc_task *task)
719 rpc_exit(task, -EIO); 710 rpc_exit(task, -EIO);
720 return; 711 return;
721 } 712 }
722 if (encode && (status = rpcauth_wrap_req(task, encode, req, p, 713 if (encode == NULL)
723 task->tk_msg.rpc_argp)) < 0) { 714 return;
724 printk(KERN_WARNING "%s: can't encode arguments: %d\n", 715
725 clnt->cl_protname, -status); 716 task->tk_status = rpcauth_wrap_req(task, encode, req, p,
726 rpc_exit(task, status); 717 task->tk_msg.rpc_argp);
718 if (task->tk_status == -ENOMEM) {
719 /* XXX: Is this sane? */
720 rpc_delay(task, 3*HZ);
721 task->tk_status = -EAGAIN;
727 } 722 }
728} 723}
729 724
@@ -734,43 +729,95 @@ static void
734call_bind(struct rpc_task *task) 729call_bind(struct rpc_task *task)
735{ 730{
736 struct rpc_clnt *clnt = task->tk_client; 731 struct rpc_clnt *clnt = task->tk_client;
737 struct rpc_xprt *xprt = clnt->cl_xprt;
738
739 dprintk("RPC: %4d call_bind xprt %p %s connected\n", task->tk_pid,
740 xprt, (xprt_connected(xprt) ? "is" : "is not"));
741 732
742 task->tk_action = (xprt_connected(xprt)) ? call_transmit : call_connect; 733 dprintk("RPC: %4d call_bind (status %d)\n",
734 task->tk_pid, task->tk_status);
743 735
736 task->tk_action = call_connect;
744 if (!clnt->cl_port) { 737 if (!clnt->cl_port) {
745 task->tk_action = call_connect; 738 task->tk_action = call_bind_status;
746 task->tk_timeout = RPC_CONNECT_TIMEOUT; 739 task->tk_timeout = task->tk_xprt->bind_timeout;
747 rpc_getport(task, clnt); 740 rpc_getport(task, clnt);
748 } 741 }
749} 742}
750 743
751/* 744/*
752 * 4a. Connect to the RPC server (TCP case) 745 * 4a. Sort out bind result
746 */
747static void
748call_bind_status(struct rpc_task *task)
749{
750 int status = -EACCES;
751
752 if (task->tk_status >= 0) {
753 dprintk("RPC: %4d call_bind_status (status %d)\n",
754 task->tk_pid, task->tk_status);
755 task->tk_status = 0;
756 task->tk_action = call_connect;
757 return;
758 }
759
760 switch (task->tk_status) {
761 case -EACCES:
762 dprintk("RPC: %4d remote rpcbind: RPC program/version unavailable\n",
763 task->tk_pid);
764 rpc_delay(task, 3*HZ);
765 goto retry_bind;
766 case -ETIMEDOUT:
767 dprintk("RPC: %4d rpcbind request timed out\n",
768 task->tk_pid);
769 if (RPC_IS_SOFT(task)) {
770 status = -EIO;
771 break;
772 }
773 goto retry_bind;
774 case -EPFNOSUPPORT:
775 dprintk("RPC: %4d remote rpcbind service unavailable\n",
776 task->tk_pid);
777 break;
778 case -EPROTONOSUPPORT:
779 dprintk("RPC: %4d remote rpcbind version 2 unavailable\n",
780 task->tk_pid);
781 break;
782 default:
783 dprintk("RPC: %4d unrecognized rpcbind error (%d)\n",
784 task->tk_pid, -task->tk_status);
785 status = -EIO;
786 break;
787 }
788
789 rpc_exit(task, status);
790 return;
791
792retry_bind:
793 task->tk_status = 0;
794 task->tk_action = call_bind;
795 return;
796}
797
798/*
799 * 4b. Connect to the RPC server
753 */ 800 */
754static void 801static void
755call_connect(struct rpc_task *task) 802call_connect(struct rpc_task *task)
756{ 803{
757 struct rpc_clnt *clnt = task->tk_client; 804 struct rpc_xprt *xprt = task->tk_xprt;
758 805
759 dprintk("RPC: %4d call_connect status %d\n", 806 dprintk("RPC: %4d call_connect xprt %p %s connected\n",
760 task->tk_pid, task->tk_status); 807 task->tk_pid, xprt,
808 (xprt_connected(xprt) ? "is" : "is not"));
761 809
762 if (xprt_connected(clnt->cl_xprt)) { 810 task->tk_action = call_transmit;
763 task->tk_action = call_transmit; 811 if (!xprt_connected(xprt)) {
764 return; 812 task->tk_action = call_connect_status;
813 if (task->tk_status < 0)
814 return;
815 xprt_connect(task);
765 } 816 }
766 task->tk_action = call_connect_status;
767 if (task->tk_status < 0)
768 return;
769 xprt_connect(task);
770} 817}
771 818
772/* 819/*
773 * 4b. Sort out connect result 820 * 4c. Sort out connect result
774 */ 821 */
775static void 822static void
776call_connect_status(struct rpc_task *task) 823call_connect_status(struct rpc_task *task)
@@ -778,6 +825,9 @@ call_connect_status(struct rpc_task *task)
778 struct rpc_clnt *clnt = task->tk_client; 825 struct rpc_clnt *clnt = task->tk_client;
779 int status = task->tk_status; 826 int status = task->tk_status;
780 827
828 dprintk("RPC: %5u call_connect_status (status %d)\n",
829 task->tk_pid, task->tk_status);
830
781 task->tk_status = 0; 831 task->tk_status = 0;
782 if (status >= 0) { 832 if (status >= 0) {
783 clnt->cl_stats->netreconn++; 833 clnt->cl_stats->netreconn++;
@@ -785,17 +835,19 @@ call_connect_status(struct rpc_task *task)
785 return; 835 return;
786 } 836 }
787 837
788 /* Something failed: we may have to rebind */ 838 /* Something failed: remote service port may have changed */
789 if (clnt->cl_autobind) 839 if (clnt->cl_autobind)
790 clnt->cl_port = 0; 840 clnt->cl_port = 0;
841
791 switch (status) { 842 switch (status) {
792 case -ENOTCONN: 843 case -ENOTCONN:
793 case -ETIMEDOUT: 844 case -ETIMEDOUT:
794 case -EAGAIN: 845 case -EAGAIN:
795 task->tk_action = (clnt->cl_port == 0) ? call_bind : call_connect; 846 task->tk_action = call_bind;
796 break; 847 break;
797 default: 848 default:
798 rpc_exit(task, -EIO); 849 rpc_exit(task, -EIO);
850 break;
799 } 851 }
800} 852}
801 853
@@ -815,10 +867,12 @@ call_transmit(struct rpc_task *task)
815 if (task->tk_status != 0) 867 if (task->tk_status != 0)
816 return; 868 return;
817 /* Encode here so that rpcsec_gss can use correct sequence number. */ 869 /* Encode here so that rpcsec_gss can use correct sequence number. */
818 if (!task->tk_rqstp->rq_bytes_sent) 870 if (task->tk_rqstp->rq_bytes_sent == 0) {
819 call_encode(task); 871 call_encode(task);
820 if (task->tk_status < 0) 872 /* Did the encode result in an error condition? */
821 return; 873 if (task->tk_status != 0)
874 goto out_nosend;
875 }
822 xprt_transmit(task); 876 xprt_transmit(task);
823 if (task->tk_status < 0) 877 if (task->tk_status < 0)
824 return; 878 return;
@@ -826,6 +880,10 @@ call_transmit(struct rpc_task *task)
826 task->tk_action = NULL; 880 task->tk_action = NULL;
827 rpc_wake_up_task(task); 881 rpc_wake_up_task(task);
828 } 882 }
883 return;
884out_nosend:
885 /* release socket write lock before attempting to handle error */
886 xprt_abort_transmit(task);
829} 887}
830 888
831/* 889/*
@@ -1020,13 +1078,12 @@ static u32 *
1020call_header(struct rpc_task *task) 1078call_header(struct rpc_task *task)
1021{ 1079{
1022 struct rpc_clnt *clnt = task->tk_client; 1080 struct rpc_clnt *clnt = task->tk_client;
1023 struct rpc_xprt *xprt = clnt->cl_xprt;
1024 struct rpc_rqst *req = task->tk_rqstp; 1081 struct rpc_rqst *req = task->tk_rqstp;
1025 u32 *p = req->rq_svec[0].iov_base; 1082 u32 *p = req->rq_svec[0].iov_base;
1026 1083
1027 /* FIXME: check buffer size? */ 1084 /* FIXME: check buffer size? */
1028 if (xprt->stream) 1085
1029 *p++ = 0; /* fill in later */ 1086 p = xprt_skip_transport_header(task->tk_xprt, p);
1030 *p++ = req->rq_xid; /* XID */ 1087 *p++ = req->rq_xid; /* XID */
1031 *p++ = htonl(RPC_CALL); /* CALL */ 1088 *p++ = htonl(RPC_CALL); /* CALL */
1032 *p++ = htonl(RPC_VERSION); /* RPC version */ 1089 *p++ = htonl(RPC_VERSION); /* RPC version */
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
index 4e81f2766923..a398575f94b8 100644
--- a/net/sunrpc/pmap_clnt.c
+++ b/net/sunrpc/pmap_clnt.c
@@ -26,7 +26,7 @@
26#define PMAP_GETPORT 3 26#define PMAP_GETPORT 3
27 27
28static struct rpc_procinfo pmap_procedures[]; 28static struct rpc_procinfo pmap_procedures[];
29static struct rpc_clnt * pmap_create(char *, struct sockaddr_in *, int); 29static struct rpc_clnt * pmap_create(char *, struct sockaddr_in *, int, int);
30static void pmap_getport_done(struct rpc_task *); 30static void pmap_getport_done(struct rpc_task *);
31static struct rpc_program pmap_program; 31static struct rpc_program pmap_program;
32static DEFINE_SPINLOCK(pmap_lock); 32static DEFINE_SPINLOCK(pmap_lock);
@@ -65,7 +65,7 @@ rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt)
65 map->pm_binding = 1; 65 map->pm_binding = 1;
66 spin_unlock(&pmap_lock); 66 spin_unlock(&pmap_lock);
67 67
68 pmap_clnt = pmap_create(clnt->cl_server, sap, map->pm_prot); 68 pmap_clnt = pmap_create(clnt->cl_server, sap, map->pm_prot, 0);
69 if (IS_ERR(pmap_clnt)) { 69 if (IS_ERR(pmap_clnt)) {
70 task->tk_status = PTR_ERR(pmap_clnt); 70 task->tk_status = PTR_ERR(pmap_clnt);
71 goto bailout; 71 goto bailout;
@@ -112,7 +112,7 @@ rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot)
112 NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); 112 NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot);
113 113
114 sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr)); 114 sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr));
115 pmap_clnt = pmap_create(hostname, sin, prot); 115 pmap_clnt = pmap_create(hostname, sin, prot, 0);
116 if (IS_ERR(pmap_clnt)) 116 if (IS_ERR(pmap_clnt))
117 return PTR_ERR(pmap_clnt); 117 return PTR_ERR(pmap_clnt);
118 118
@@ -171,7 +171,7 @@ rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
171 171
172 sin.sin_family = AF_INET; 172 sin.sin_family = AF_INET;
173 sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); 173 sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
174 pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP); 174 pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP, 1);
175 if (IS_ERR(pmap_clnt)) { 175 if (IS_ERR(pmap_clnt)) {
176 error = PTR_ERR(pmap_clnt); 176 error = PTR_ERR(pmap_clnt);
177 dprintk("RPC: couldn't create pmap client. Error = %d\n", error); 177 dprintk("RPC: couldn't create pmap client. Error = %d\n", error);
@@ -198,7 +198,7 @@ rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
198} 198}
199 199
200static struct rpc_clnt * 200static struct rpc_clnt *
201pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto) 201pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileged)
202{ 202{
203 struct rpc_xprt *xprt; 203 struct rpc_xprt *xprt;
204 struct rpc_clnt *clnt; 204 struct rpc_clnt *clnt;
@@ -208,6 +208,8 @@ pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto)
208 if (IS_ERR(xprt)) 208 if (IS_ERR(xprt))
209 return (struct rpc_clnt *)xprt; 209 return (struct rpc_clnt *)xprt;
210 xprt->addr.sin_port = htons(RPC_PMAP_PORT); 210 xprt->addr.sin_port = htons(RPC_PMAP_PORT);
211 if (!privileged)
212 xprt->resvport = 0;
211 213
212 /* printk("pmap: create clnt\n"); */ 214 /* printk("pmap: create clnt\n"); */
213 clnt = rpc_new_client(xprt, hostname, 215 clnt = rpc_new_client(xprt, hostname,
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index ded6c63f11ec..4f188d0a5d11 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -76,25 +76,35 @@ int
76rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg) 76rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg)
77{ 77{
78 struct rpc_inode *rpci = RPC_I(inode); 78 struct rpc_inode *rpci = RPC_I(inode);
79 int res = 0; 79 int res = -EPIPE;
80 80
81 down(&inode->i_sem); 81 down(&inode->i_sem);
82 if (rpci->ops == NULL)
83 goto out;
82 if (rpci->nreaders) { 84 if (rpci->nreaders) {
83 list_add_tail(&msg->list, &rpci->pipe); 85 list_add_tail(&msg->list, &rpci->pipe);
84 rpci->pipelen += msg->len; 86 rpci->pipelen += msg->len;
87 res = 0;
85 } else if (rpci->flags & RPC_PIPE_WAIT_FOR_OPEN) { 88 } else if (rpci->flags & RPC_PIPE_WAIT_FOR_OPEN) {
86 if (list_empty(&rpci->pipe)) 89 if (list_empty(&rpci->pipe))
87 schedule_delayed_work(&rpci->queue_timeout, 90 schedule_delayed_work(&rpci->queue_timeout,
88 RPC_UPCALL_TIMEOUT); 91 RPC_UPCALL_TIMEOUT);
89 list_add_tail(&msg->list, &rpci->pipe); 92 list_add_tail(&msg->list, &rpci->pipe);
90 rpci->pipelen += msg->len; 93 rpci->pipelen += msg->len;
91 } else 94 res = 0;
92 res = -EPIPE; 95 }
96out:
93 up(&inode->i_sem); 97 up(&inode->i_sem);
94 wake_up(&rpci->waitq); 98 wake_up(&rpci->waitq);
95 return res; 99 return res;
96} 100}
97 101
102static inline void
103rpc_inode_setowner(struct inode *inode, void *private)
104{
105 RPC_I(inode)->private = private;
106}
107
98static void 108static void
99rpc_close_pipes(struct inode *inode) 109rpc_close_pipes(struct inode *inode)
100{ 110{
@@ -111,15 +121,10 @@ rpc_close_pipes(struct inode *inode)
111 rpci->ops->release_pipe(inode); 121 rpci->ops->release_pipe(inode);
112 rpci->ops = NULL; 122 rpci->ops = NULL;
113 } 123 }
124 rpc_inode_setowner(inode, NULL);
114 up(&inode->i_sem); 125 up(&inode->i_sem);
115} 126}
116 127
117static inline void
118rpc_inode_setowner(struct inode *inode, void *private)
119{
120 RPC_I(inode)->private = private;
121}
122
123static struct inode * 128static struct inode *
124rpc_alloc_inode(struct super_block *sb) 129rpc_alloc_inode(struct super_block *sb)
125{ 130{
@@ -501,7 +506,6 @@ repeat:
501 dentry = dvec[--n]; 506 dentry = dvec[--n];
502 if (dentry->d_inode) { 507 if (dentry->d_inode) {
503 rpc_close_pipes(dentry->d_inode); 508 rpc_close_pipes(dentry->d_inode);
504 rpc_inode_setowner(dentry->d_inode, NULL);
505 simple_unlink(dir, dentry); 509 simple_unlink(dir, dentry);
506 } 510 }
507 dput(dentry); 511 dput(dentry);
@@ -576,10 +580,8 @@ __rpc_rmdir(struct inode *dir, struct dentry *dentry)
576 int error; 580 int error;
577 581
578 shrink_dcache_parent(dentry); 582 shrink_dcache_parent(dentry);
579 if (dentry->d_inode) { 583 if (dentry->d_inode)
580 rpc_close_pipes(dentry->d_inode); 584 rpc_close_pipes(dentry->d_inode);
581 rpc_inode_setowner(dentry->d_inode, NULL);
582 }
583 if ((error = simple_rmdir(dir, dentry)) != 0) 585 if ((error = simple_rmdir(dir, dentry)) != 0)
584 return error; 586 return error;
585 if (!error) { 587 if (!error) {
@@ -732,7 +734,6 @@ rpc_unlink(char *path)
732 d_drop(dentry); 734 d_drop(dentry);
733 if (dentry->d_inode) { 735 if (dentry->d_inode) {
734 rpc_close_pipes(dentry->d_inode); 736 rpc_close_pipes(dentry->d_inode);
735 rpc_inode_setowner(dentry->d_inode, NULL);
736 error = simple_unlink(dir, dentry); 737 error = simple_unlink(dir, dentry);
737 } 738 }
738 dput(dentry); 739 dput(dentry);
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
new file mode 100644
index 000000000000..8f97e90f36c8
--- /dev/null
+++ b/net/sunrpc/socklib.c
@@ -0,0 +1,175 @@
1/*
2 * linux/net/sunrpc/socklib.c
3 *
4 * Common socket helper routines for RPC client and server
5 *
6 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
7 */
8
9#include <linux/types.h>
10#include <linux/pagemap.h>
11#include <linux/udp.h>
12#include <linux/sunrpc/xdr.h>
13
14
15/**
16 * skb_read_bits - copy some data bits from skb to internal buffer
17 * @desc: sk_buff copy helper
18 * @to: copy destination
19 * @len: number of bytes to copy
20 *
21 * Possibly called several times to iterate over an sk_buff and copy
22 * data out of it.
23 */
24static size_t skb_read_bits(skb_reader_t *desc, void *to, size_t len)
25{
26 if (len > desc->count)
27 len = desc->count;
28 if (skb_copy_bits(desc->skb, desc->offset, to, len))
29 return 0;
30 desc->count -= len;
31 desc->offset += len;
32 return len;
33}
34
35/**
36 * skb_read_and_csum_bits - copy and checksum from skb to buffer
37 * @desc: sk_buff copy helper
38 * @to: copy destination
39 * @len: number of bytes to copy
40 *
41 * Same as skb_read_bits, but calculate a checksum at the same time.
42 */
43static size_t skb_read_and_csum_bits(skb_reader_t *desc, void *to, size_t len)
44{
45 unsigned int csum2, pos;
46
47 if (len > desc->count)
48 len = desc->count;
49 pos = desc->offset;
50 csum2 = skb_copy_and_csum_bits(desc->skb, pos, to, len, 0);
51 desc->csum = csum_block_add(desc->csum, csum2, pos);
52 desc->count -= len;
53 desc->offset += len;
54 return len;
55}
56
57/**
58 * xdr_partial_copy_from_skb - copy data out of an skb
59 * @xdr: target XDR buffer
60 * @base: starting offset
61 * @desc: sk_buff copy helper
62 * @copy_actor: virtual method for copying data
63 *
64 */
65ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, skb_reader_t *desc, skb_read_actor_t copy_actor)
66{
67 struct page **ppage = xdr->pages;
68 unsigned int len, pglen = xdr->page_len;
69 ssize_t copied = 0;
70 int ret;
71
72 len = xdr->head[0].iov_len;
73 if (base < len) {
74 len -= base;
75 ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len);
76 copied += ret;
77 if (ret != len || !desc->count)
78 goto out;
79 base = 0;
80 } else
81 base -= len;
82
83 if (unlikely(pglen == 0))
84 goto copy_tail;
85 if (unlikely(base >= pglen)) {
86 base -= pglen;
87 goto copy_tail;
88 }
89 if (base || xdr->page_base) {
90 pglen -= base;
91 base += xdr->page_base;
92 ppage += base >> PAGE_CACHE_SHIFT;
93 base &= ~PAGE_CACHE_MASK;
94 }
95 do {
96 char *kaddr;
97
98 /* ACL likes to be lazy in allocating pages - ACLs
99 * are small by default but can get huge. */
100 if (unlikely(*ppage == NULL)) {
101 *ppage = alloc_page(GFP_ATOMIC);
102 if (unlikely(*ppage == NULL)) {
103 if (copied == 0)
104 copied = -ENOMEM;
105 goto out;
106 }
107 }
108
109 len = PAGE_CACHE_SIZE;
110 kaddr = kmap_atomic(*ppage, KM_SKB_SUNRPC_DATA);
111 if (base) {
112 len -= base;
113 if (pglen < len)
114 len = pglen;
115 ret = copy_actor(desc, kaddr + base, len);
116 base = 0;
117 } else {
118 if (pglen < len)
119 len = pglen;
120 ret = copy_actor(desc, kaddr, len);
121 }
122 flush_dcache_page(*ppage);
123 kunmap_atomic(kaddr, KM_SKB_SUNRPC_DATA);
124 copied += ret;
125 if (ret != len || !desc->count)
126 goto out;
127 ppage++;
128 } while ((pglen -= len) != 0);
129copy_tail:
130 len = xdr->tail[0].iov_len;
131 if (base < len)
132 copied += copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base);
133out:
134 return copied;
135}
136
137/**
138 * csum_partial_copy_to_xdr - checksum and copy data
139 * @xdr: target XDR buffer
140 * @skb: source skb
141 *
142 * We have set things up such that we perform the checksum of the UDP
143 * packet in parallel with the copies into the RPC client iovec. -DaveM
144 */
145int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
146{
147 skb_reader_t desc;
148
149 desc.skb = skb;
150 desc.offset = sizeof(struct udphdr);
151 desc.count = skb->len - desc.offset;
152
153 if (skb->ip_summed == CHECKSUM_UNNECESSARY)
154 goto no_checksum;
155
156 desc.csum = csum_partial(skb->data, desc.offset, skb->csum);
157 if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits) < 0)
158 return -1;
159 if (desc.offset != skb->len) {
160 unsigned int csum2;
161 csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0);
162 desc.csum = csum_block_add(desc.csum, csum2, desc.offset);
163 }
164 if (desc.count)
165 return -1;
166 if ((unsigned short)csum_fold(desc.csum))
167 return -1;
168 return 0;
169no_checksum:
170 if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits) < 0)
171 return -1;
172 if (desc.count)
173 return -1;
174 return 0;
175}
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index ed48ff022d35..2387e7b823ff 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -10,7 +10,6 @@
10#include <linux/module.h> 10#include <linux/module.h>
11 11
12#include <linux/types.h> 12#include <linux/types.h>
13#include <linux/socket.h>
14#include <linux/sched.h> 13#include <linux/sched.h>
15#include <linux/uio.h> 14#include <linux/uio.h>
16#include <linux/unistd.h> 15#include <linux/unistd.h>
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 691dea4a58e7..f16e7cdd6150 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -548,9 +548,6 @@ svc_write_space(struct sock *sk)
548/* 548/*
549 * Receive a datagram from a UDP socket. 549 * Receive a datagram from a UDP socket.
550 */ 550 */
551extern int
552csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb);
553
554static int 551static int
555svc_udp_recvfrom(struct svc_rqst *rqstp) 552svc_udp_recvfrom(struct svc_rqst *rqstp)
556{ 553{
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
index 1b9616a12e24..d0c9f460e411 100644
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@@ -119,8 +119,18 @@ done:
119 return 0; 119 return 0;
120} 120}
121 121
122unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE;
123unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE;
124unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT;
125EXPORT_SYMBOL(xprt_min_resvport);
126unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT;
127EXPORT_SYMBOL(xprt_max_resvport);
128
129
122static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE; 130static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE;
123static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE; 131static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE;
132static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT;
133static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT;
124 134
125static ctl_table debug_table[] = { 135static ctl_table debug_table[] = {
126 { 136 {
@@ -177,6 +187,28 @@ static ctl_table debug_table[] = {
177 .extra1 = &min_slot_table_size, 187 .extra1 = &min_slot_table_size,
178 .extra2 = &max_slot_table_size 188 .extra2 = &max_slot_table_size
179 }, 189 },
190 {
191 .ctl_name = CTL_MIN_RESVPORT,
192 .procname = "min_resvport",
193 .data = &xprt_min_resvport,
194 .maxlen = sizeof(unsigned int),
195 .mode = 0644,
196 .proc_handler = &proc_dointvec_minmax,
197 .strategy = &sysctl_intvec,
198 .extra1 = &xprt_min_resvport_limit,
199 .extra2 = &xprt_max_resvport_limit
200 },
201 {
202 .ctl_name = CTL_MAX_RESVPORT,
203 .procname = "max_resvport",
204 .data = &xprt_max_resvport,
205 .maxlen = sizeof(unsigned int),
206 .mode = 0644,
207 .proc_handler = &proc_dointvec_minmax,
208 .strategy = &sysctl_intvec,
209 .extra1 = &xprt_min_resvport_limit,
210 .extra2 = &xprt_max_resvport_limit
211 },
180 { .ctl_name = 0 } 212 { .ctl_name = 0 }
181}; 213};
182 214
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index fde16f40a581..32df43372ee9 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -6,15 +6,12 @@
6 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> 6 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
7 */ 7 */
8 8
9#include <linux/module.h>
9#include <linux/types.h> 10#include <linux/types.h>
10#include <linux/socket.h>
11#include <linux/string.h> 11#include <linux/string.h>
12#include <linux/kernel.h> 12#include <linux/kernel.h>
13#include <linux/pagemap.h> 13#include <linux/pagemap.h>
14#include <linux/errno.h> 14#include <linux/errno.h>
15#include <linux/in.h>
16#include <linux/net.h>
17#include <net/sock.h>
18#include <linux/sunrpc/xdr.h> 15#include <linux/sunrpc/xdr.h>
19#include <linux/sunrpc/msg_prot.h> 16#include <linux/sunrpc/msg_prot.h>
20 17
@@ -176,178 +173,6 @@ xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset,
176 xdr->buflen += len; 173 xdr->buflen += len;
177} 174}
178 175
179ssize_t
180xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base,
181 skb_reader_t *desc,
182 skb_read_actor_t copy_actor)
183{
184 struct page **ppage = xdr->pages;
185 unsigned int len, pglen = xdr->page_len;
186 ssize_t copied = 0;
187 int ret;
188
189 len = xdr->head[0].iov_len;
190 if (base < len) {
191 len -= base;
192 ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len);
193 copied += ret;
194 if (ret != len || !desc->count)
195 goto out;
196 base = 0;
197 } else
198 base -= len;
199
200 if (pglen == 0)
201 goto copy_tail;
202 if (base >= pglen) {
203 base -= pglen;
204 goto copy_tail;
205 }
206 if (base || xdr->page_base) {
207 pglen -= base;
208 base += xdr->page_base;
209 ppage += base >> PAGE_CACHE_SHIFT;
210 base &= ~PAGE_CACHE_MASK;
211 }
212 do {
213 char *kaddr;
214
215 /* ACL likes to be lazy in allocating pages - ACLs
216 * are small by default but can get huge. */
217 if (unlikely(*ppage == NULL)) {
218 *ppage = alloc_page(GFP_ATOMIC);
219 if (unlikely(*ppage == NULL)) {
220 if (copied == 0)
221 copied = -ENOMEM;
222 goto out;
223 }
224 }
225
226 len = PAGE_CACHE_SIZE;
227 kaddr = kmap_atomic(*ppage, KM_SKB_SUNRPC_DATA);
228 if (base) {
229 len -= base;
230 if (pglen < len)
231 len = pglen;
232 ret = copy_actor(desc, kaddr + base, len);
233 base = 0;
234 } else {
235 if (pglen < len)
236 len = pglen;
237 ret = copy_actor(desc, kaddr, len);
238 }
239 flush_dcache_page(*ppage);
240 kunmap_atomic(kaddr, KM_SKB_SUNRPC_DATA);
241 copied += ret;
242 if (ret != len || !desc->count)
243 goto out;
244 ppage++;
245 } while ((pglen -= len) != 0);
246copy_tail:
247 len = xdr->tail[0].iov_len;
248 if (base < len)
249 copied += copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base);
250out:
251 return copied;
252}
253
254
255int
256xdr_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen,
257 struct xdr_buf *xdr, unsigned int base, int msgflags)
258{
259 struct page **ppage = xdr->pages;
260 unsigned int len, pglen = xdr->page_len;
261 int err, ret = 0;
262 ssize_t (*sendpage)(struct socket *, struct page *, int, size_t, int);
263
264 len = xdr->head[0].iov_len;
265 if (base < len || (addr != NULL && base == 0)) {
266 struct kvec iov = {
267 .iov_base = xdr->head[0].iov_base + base,
268 .iov_len = len - base,
269 };
270 struct msghdr msg = {
271 .msg_name = addr,
272 .msg_namelen = addrlen,
273 .msg_flags = msgflags,
274 };
275 if (xdr->len > len)
276 msg.msg_flags |= MSG_MORE;
277
278 if (iov.iov_len != 0)
279 err = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len);
280 else
281 err = kernel_sendmsg(sock, &msg, NULL, 0, 0);
282 if (ret == 0)
283 ret = err;
284 else if (err > 0)
285 ret += err;
286 if (err != iov.iov_len)
287 goto out;
288 base = 0;
289 } else
290 base -= len;
291
292 if (pglen == 0)
293 goto copy_tail;
294 if (base >= pglen) {
295 base -= pglen;
296 goto copy_tail;
297 }
298 if (base || xdr->page_base) {
299 pglen -= base;
300 base += xdr->page_base;
301 ppage += base >> PAGE_CACHE_SHIFT;
302 base &= ~PAGE_CACHE_MASK;
303 }
304
305 sendpage = sock->ops->sendpage ? : sock_no_sendpage;
306 do {
307 int flags = msgflags;
308
309 len = PAGE_CACHE_SIZE;
310 if (base)
311 len -= base;
312 if (pglen < len)
313 len = pglen;
314
315 if (pglen != len || xdr->tail[0].iov_len != 0)
316 flags |= MSG_MORE;
317
318 /* Hmm... We might be dealing with highmem pages */
319 if (PageHighMem(*ppage))
320 sendpage = sock_no_sendpage;
321 err = sendpage(sock, *ppage, base, len, flags);
322 if (ret == 0)
323 ret = err;
324 else if (err > 0)
325 ret += err;
326 if (err != len)
327 goto out;
328 base = 0;
329 ppage++;
330 } while ((pglen -= len) != 0);
331copy_tail:
332 len = xdr->tail[0].iov_len;
333 if (base < len) {
334 struct kvec iov = {
335 .iov_base = xdr->tail[0].iov_base + base,
336 .iov_len = len - base,
337 };
338 struct msghdr msg = {
339 .msg_flags = msgflags,
340 };
341 err = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len);
342 if (ret == 0)
343 ret = err;
344 else if (err > 0)
345 ret += err;
346 }
347out:
348 return ret;
349}
350
351 176
352/* 177/*
353 * Helper routines for doing 'memmove' like operations on a struct xdr_buf 178 * Helper routines for doing 'memmove' like operations on a struct xdr_buf
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 3c654e06b084..6dda3860351f 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -10,12 +10,12 @@
10 * one is available. Otherwise, it sleeps on the backlog queue 10 * one is available. Otherwise, it sleeps on the backlog queue
11 * (xprt_reserve). 11 * (xprt_reserve).
12 * - Next, the caller puts together the RPC message, stuffs it into 12 * - Next, the caller puts together the RPC message, stuffs it into
13 * the request struct, and calls xprt_call(). 13 * the request struct, and calls xprt_transmit().
14 * - xprt_call transmits the message and installs the caller on the 14 * - xprt_transmit sends the message and installs the caller on the
15 * socket's wait list. At the same time, it installs a timer that 15 * transport's wait list. At the same time, it installs a timer that
16 * is run after the packet's timeout has expired. 16 * is run after the packet's timeout has expired.
17 * - When a packet arrives, the data_ready handler walks the list of 17 * - When a packet arrives, the data_ready handler walks the list of
18 * pending requests for that socket. If a matching XID is found, the 18 * pending requests for that transport. If a matching XID is found, the
19 * caller is woken up, and the timer removed. 19 * caller is woken up, and the timer removed.
20 * - When no reply arrives within the timeout interval, the timer is 20 * - When no reply arrives within the timeout interval, the timer is
21 * fired by the kernel and runs xprt_timer(). It either adjusts the 21 * fired by the kernel and runs xprt_timer(). It either adjusts the
@@ -33,36 +33,17 @@
33 * 33 *
34 * Copyright (C) 1995-1997, Olaf Kirch <okir@monad.swb.de> 34 * Copyright (C) 1995-1997, Olaf Kirch <okir@monad.swb.de>
35 * 35 *
36 * TCP callback races fixes (C) 1998 Red Hat Software <alan@redhat.com> 36 * Transport switch API copyright (C) 2005, Chuck Lever <cel@netapp.com>
37 * TCP send fixes (C) 1998 Red Hat Software <alan@redhat.com>
38 * TCP NFS related read + write fixes
39 * (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie>
40 *
41 * Rewrite of larges part of the code in order to stabilize TCP stuff.
42 * Fix behaviour when socket buffer is full.
43 * (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no>
44 */ 37 */
45 38
39#include <linux/module.h>
40
46#include <linux/types.h> 41#include <linux/types.h>
47#include <linux/slab.h> 42#include <linux/interrupt.h>
48#include <linux/capability.h>
49#include <linux/sched.h>
50#include <linux/errno.h>
51#include <linux/socket.h>
52#include <linux/in.h>
53#include <linux/net.h>
54#include <linux/mm.h>
55#include <linux/udp.h>
56#include <linux/tcp.h>
57#include <linux/sunrpc/clnt.h>
58#include <linux/file.h>
59#include <linux/workqueue.h> 43#include <linux/workqueue.h>
60#include <linux/random.h> 44#include <linux/random.h>
61 45
62#include <net/sock.h> 46#include <linux/sunrpc/clnt.h>
63#include <net/checksum.h>
64#include <net/udp.h>
65#include <net/tcp.h>
66 47
67/* 48/*
68 * Local variables 49 * Local variables
@@ -73,81 +54,90 @@
73# define RPCDBG_FACILITY RPCDBG_XPRT 54# define RPCDBG_FACILITY RPCDBG_XPRT
74#endif 55#endif
75 56
76#define XPRT_MAX_BACKOFF (8)
77#define XPRT_IDLE_TIMEOUT (5*60*HZ)
78#define XPRT_MAX_RESVPORT (800)
79
80/* 57/*
81 * Local functions 58 * Local functions
82 */ 59 */
83static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); 60static void xprt_request_init(struct rpc_task *, struct rpc_xprt *);
84static inline void do_xprt_reserve(struct rpc_task *); 61static inline void do_xprt_reserve(struct rpc_task *);
85static void xprt_disconnect(struct rpc_xprt *);
86static void xprt_connect_status(struct rpc_task *task); 62static void xprt_connect_status(struct rpc_task *task);
87static struct rpc_xprt * xprt_setup(int proto, struct sockaddr_in *ap,
88 struct rpc_timeout *to);
89static struct socket *xprt_create_socket(struct rpc_xprt *, int, int);
90static void xprt_bind_socket(struct rpc_xprt *, struct socket *);
91static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); 63static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
92 64
93static int xprt_clear_backlog(struct rpc_xprt *xprt);
94
95#ifdef RPC_DEBUG_DATA
96/* 65/*
97 * Print the buffer contents (first 128 bytes only--just enough for 66 * The transport code maintains an estimate on the maximum number of out-
98 * diropres return). 67 * standing RPC requests, using a smoothed version of the congestion
68 * avoidance implemented in 44BSD. This is basically the Van Jacobson
69 * congestion algorithm: If a retransmit occurs, the congestion window is
70 * halved; otherwise, it is incremented by 1/cwnd when
71 *
72 * - a reply is received and
73 * - a full number of requests are outstanding and
74 * - the congestion window hasn't been updated recently.
99 */ 75 */
100static void 76#define RPC_CWNDSHIFT (8U)
101xprt_pktdump(char *msg, u32 *packet, unsigned int count) 77#define RPC_CWNDSCALE (1U << RPC_CWNDSHIFT)
102{ 78#define RPC_INITCWND RPC_CWNDSCALE
103 u8 *buf = (u8 *) packet; 79#define RPC_MAXCWND(xprt) ((xprt)->max_reqs << RPC_CWNDSHIFT)
104 int j;
105
106 dprintk("RPC: %s\n", msg);
107 for (j = 0; j < count && j < 128; j += 4) {
108 if (!(j & 31)) {
109 if (j)
110 dprintk("\n");
111 dprintk("0x%04x ", j);
112 }
113 dprintk("%02x%02x%02x%02x ",
114 buf[j], buf[j+1], buf[j+2], buf[j+3]);
115 }
116 dprintk("\n");
117}
118#else
119static inline void
120xprt_pktdump(char *msg, u32 *packet, unsigned int count)
121{
122 /* NOP */
123}
124#endif
125 80
126/* 81#define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd)
127 * Look up RPC transport given an INET socket 82
83/**
84 * xprt_reserve_xprt - serialize write access to transports
85 * @task: task that is requesting access to the transport
86 *
87 * This prevents mixing the payload of separate requests, and prevents
88 * transport connects from colliding with writes. No congestion control
89 * is provided.
128 */ 90 */
129static inline struct rpc_xprt * 91int xprt_reserve_xprt(struct rpc_task *task)
130xprt_from_sock(struct sock *sk)
131{ 92{
132 return (struct rpc_xprt *) sk->sk_user_data; 93 struct rpc_xprt *xprt = task->tk_xprt;
94 struct rpc_rqst *req = task->tk_rqstp;
95
96 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
97 if (task == xprt->snd_task)
98 return 1;
99 if (task == NULL)
100 return 0;
101 goto out_sleep;
102 }
103 xprt->snd_task = task;
104 if (req) {
105 req->rq_bytes_sent = 0;
106 req->rq_ntrans++;
107 }
108 return 1;
109
110out_sleep:
111 dprintk("RPC: %4d failed to lock transport %p\n",
112 task->tk_pid, xprt);
113 task->tk_timeout = 0;
114 task->tk_status = -EAGAIN;
115 if (req && req->rq_ntrans)
116 rpc_sleep_on(&xprt->resend, task, NULL, NULL);
117 else
118 rpc_sleep_on(&xprt->sending, task, NULL, NULL);
119 return 0;
133} 120}
134 121
135/* 122/*
136 * Serialize write access to sockets, in order to prevent different 123 * xprt_reserve_xprt_cong - serialize write access to transports
137 * requests from interfering with each other. 124 * @task: task that is requesting access to the transport
138 * Also prevents TCP socket connects from colliding with writes. 125 *
126 * Same as xprt_reserve_xprt, but Van Jacobson congestion control is
127 * integrated into the decision of whether a request is allowed to be
128 * woken up and given access to the transport.
139 */ 129 */
140static int 130int xprt_reserve_xprt_cong(struct rpc_task *task)
141__xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
142{ 131{
132 struct rpc_xprt *xprt = task->tk_xprt;
143 struct rpc_rqst *req = task->tk_rqstp; 133 struct rpc_rqst *req = task->tk_rqstp;
144 134
145 if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) { 135 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
146 if (task == xprt->snd_task) 136 if (task == xprt->snd_task)
147 return 1; 137 return 1;
148 goto out_sleep; 138 goto out_sleep;
149 } 139 }
150 if (xprt->nocong || __xprt_get_cong(xprt, task)) { 140 if (__xprt_get_cong(xprt, task)) {
151 xprt->snd_task = task; 141 xprt->snd_task = task;
152 if (req) { 142 if (req) {
153 req->rq_bytes_sent = 0; 143 req->rq_bytes_sent = 0;
@@ -156,10 +146,10 @@ __xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
156 return 1; 146 return 1;
157 } 147 }
158 smp_mb__before_clear_bit(); 148 smp_mb__before_clear_bit();
159 clear_bit(XPRT_LOCKED, &xprt->sockstate); 149 clear_bit(XPRT_LOCKED, &xprt->state);
160 smp_mb__after_clear_bit(); 150 smp_mb__after_clear_bit();
161out_sleep: 151out_sleep:
162 dprintk("RPC: %4d failed to lock socket %p\n", task->tk_pid, xprt); 152 dprintk("RPC: %4d failed to lock transport %p\n", task->tk_pid, xprt);
163 task->tk_timeout = 0; 153 task->tk_timeout = 0;
164 task->tk_status = -EAGAIN; 154 task->tk_status = -EAGAIN;
165 if (req && req->rq_ntrans) 155 if (req && req->rq_ntrans)
@@ -169,26 +159,52 @@ out_sleep:
169 return 0; 159 return 0;
170} 160}
171 161
172static inline int 162static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
173xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
174{ 163{
175 int retval; 164 int retval;
176 165
177 spin_lock_bh(&xprt->sock_lock); 166 spin_lock_bh(&xprt->transport_lock);
178 retval = __xprt_lock_write(xprt, task); 167 retval = xprt->ops->reserve_xprt(task);
179 spin_unlock_bh(&xprt->sock_lock); 168 spin_unlock_bh(&xprt->transport_lock);
180 return retval; 169 return retval;
181} 170}
182 171
172static void __xprt_lock_write_next(struct rpc_xprt *xprt)
173{
174 struct rpc_task *task;
175 struct rpc_rqst *req;
183 176
184static void 177 if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
185__xprt_lock_write_next(struct rpc_xprt *xprt) 178 return;
179
180 task = rpc_wake_up_next(&xprt->resend);
181 if (!task) {
182 task = rpc_wake_up_next(&xprt->sending);
183 if (!task)
184 goto out_unlock;
185 }
186
187 req = task->tk_rqstp;
188 xprt->snd_task = task;
189 if (req) {
190 req->rq_bytes_sent = 0;
191 req->rq_ntrans++;
192 }
193 return;
194
195out_unlock:
196 smp_mb__before_clear_bit();
197 clear_bit(XPRT_LOCKED, &xprt->state);
198 smp_mb__after_clear_bit();
199}
200
201static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt)
186{ 202{
187 struct rpc_task *task; 203 struct rpc_task *task;
188 204
189 if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) 205 if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
190 return; 206 return;
191 if (!xprt->nocong && RPCXPRT_CONGESTED(xprt)) 207 if (RPCXPRT_CONGESTED(xprt))
192 goto out_unlock; 208 goto out_unlock;
193 task = rpc_wake_up_next(&xprt->resend); 209 task = rpc_wake_up_next(&xprt->resend);
194 if (!task) { 210 if (!task) {
@@ -196,7 +212,7 @@ __xprt_lock_write_next(struct rpc_xprt *xprt)
196 if (!task) 212 if (!task)
197 goto out_unlock; 213 goto out_unlock;
198 } 214 }
199 if (xprt->nocong || __xprt_get_cong(xprt, task)) { 215 if (__xprt_get_cong(xprt, task)) {
200 struct rpc_rqst *req = task->tk_rqstp; 216 struct rpc_rqst *req = task->tk_rqstp;
201 xprt->snd_task = task; 217 xprt->snd_task = task;
202 if (req) { 218 if (req) {
@@ -207,87 +223,52 @@ __xprt_lock_write_next(struct rpc_xprt *xprt)
207 } 223 }
208out_unlock: 224out_unlock:
209 smp_mb__before_clear_bit(); 225 smp_mb__before_clear_bit();
210 clear_bit(XPRT_LOCKED, &xprt->sockstate); 226 clear_bit(XPRT_LOCKED, &xprt->state);
211 smp_mb__after_clear_bit(); 227 smp_mb__after_clear_bit();
212} 228}
213 229
214/* 230/**
215 * Releases the socket for use by other requests. 231 * xprt_release_xprt - allow other requests to use a transport
232 * @xprt: transport with other tasks potentially waiting
233 * @task: task that is releasing access to the transport
234 *
235 * Note that "task" can be NULL. No congestion control is provided.
216 */ 236 */
217static void 237void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
218__xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task)
219{ 238{
220 if (xprt->snd_task == task) { 239 if (xprt->snd_task == task) {
221 xprt->snd_task = NULL; 240 xprt->snd_task = NULL;
222 smp_mb__before_clear_bit(); 241 smp_mb__before_clear_bit();
223 clear_bit(XPRT_LOCKED, &xprt->sockstate); 242 clear_bit(XPRT_LOCKED, &xprt->state);
224 smp_mb__after_clear_bit(); 243 smp_mb__after_clear_bit();
225 __xprt_lock_write_next(xprt); 244 __xprt_lock_write_next(xprt);
226 } 245 }
227} 246}
228 247
229static inline void 248/**
230xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) 249 * xprt_release_xprt_cong - allow other requests to use a transport
231{ 250 * @xprt: transport with other tasks potentially waiting
232 spin_lock_bh(&xprt->sock_lock); 251 * @task: task that is releasing access to the transport
233 __xprt_release_write(xprt, task); 252 *
234 spin_unlock_bh(&xprt->sock_lock); 253 * Note that "task" can be NULL. Another task is awoken to use the
235} 254 * transport if the transport's congestion window allows it.
236
237/*
238 * Write data to socket.
239 */ 255 */
240static inline int 256void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
241xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req)
242{ 257{
243 struct socket *sock = xprt->sock; 258 if (xprt->snd_task == task) {
244 struct xdr_buf *xdr = &req->rq_snd_buf; 259 xprt->snd_task = NULL;
245 struct sockaddr *addr = NULL; 260 smp_mb__before_clear_bit();
246 int addrlen = 0; 261 clear_bit(XPRT_LOCKED, &xprt->state);
247 unsigned int skip; 262 smp_mb__after_clear_bit();
248 int result; 263 __xprt_lock_write_next_cong(xprt);
249
250 if (!sock)
251 return -ENOTCONN;
252
253 xprt_pktdump("packet data:",
254 req->rq_svec->iov_base,
255 req->rq_svec->iov_len);
256
257 /* For UDP, we need to provide an address */
258 if (!xprt->stream) {
259 addr = (struct sockaddr *) &xprt->addr;
260 addrlen = sizeof(xprt->addr);
261 } 264 }
262 /* Dont repeat bytes */ 265}
263 skip = req->rq_bytes_sent;
264
265 clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
266 result = xdr_sendpages(sock, addr, addrlen, xdr, skip, MSG_DONTWAIT);
267
268 dprintk("RPC: xprt_sendmsg(%d) = %d\n", xdr->len - skip, result);
269
270 if (result >= 0)
271 return result;
272 266
273 switch (result) { 267static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task)
274 case -ECONNREFUSED: 268{
275 /* When the server has died, an ICMP port unreachable message 269 spin_lock_bh(&xprt->transport_lock);
276 * prompts ECONNREFUSED. 270 xprt->ops->release_xprt(xprt, task);
277 */ 271 spin_unlock_bh(&xprt->transport_lock);
278 case -EAGAIN:
279 break;
280 case -ECONNRESET:
281 case -ENOTCONN:
282 case -EPIPE:
283 /* connection broken */
284 if (xprt->stream)
285 result = -ENOTCONN;
286 break;
287 default:
288 printk(KERN_NOTICE "RPC: sendmsg returned error %d\n", -result);
289 }
290 return result;
291} 272}
292 273
293/* 274/*
@@ -321,26 +302,40 @@ __xprt_put_cong(struct rpc_xprt *xprt, struct rpc_rqst *req)
321 return; 302 return;
322 req->rq_cong = 0; 303 req->rq_cong = 0;
323 xprt->cong -= RPC_CWNDSCALE; 304 xprt->cong -= RPC_CWNDSCALE;
324 __xprt_lock_write_next(xprt); 305 __xprt_lock_write_next_cong(xprt);
325} 306}
326 307
327/* 308/**
328 * Adjust RPC congestion window 309 * xprt_release_rqst_cong - housekeeping when request is complete
310 * @task: RPC request that recently completed
311 *
312 * Useful for transports that require congestion control.
313 */
314void xprt_release_rqst_cong(struct rpc_task *task)
315{
316 __xprt_put_cong(task->tk_xprt, task->tk_rqstp);
317}
318
319/**
320 * xprt_adjust_cwnd - adjust transport congestion window
321 * @task: recently completed RPC request used to adjust window
322 * @result: result code of completed RPC request
323 *
329 * We use a time-smoothed congestion estimator to avoid heavy oscillation. 324 * We use a time-smoothed congestion estimator to avoid heavy oscillation.
330 */ 325 */
331static void 326void xprt_adjust_cwnd(struct rpc_task *task, int result)
332xprt_adjust_cwnd(struct rpc_xprt *xprt, int result)
333{ 327{
334 unsigned long cwnd; 328 struct rpc_rqst *req = task->tk_rqstp;
329 struct rpc_xprt *xprt = task->tk_xprt;
330 unsigned long cwnd = xprt->cwnd;
335 331
336 cwnd = xprt->cwnd;
337 if (result >= 0 && cwnd <= xprt->cong) { 332 if (result >= 0 && cwnd <= xprt->cong) {
338 /* The (cwnd >> 1) term makes sure 333 /* The (cwnd >> 1) term makes sure
339 * the result gets rounded properly. */ 334 * the result gets rounded properly. */
340 cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + (cwnd >> 1)) / cwnd; 335 cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + (cwnd >> 1)) / cwnd;
341 if (cwnd > RPC_MAXCWND(xprt)) 336 if (cwnd > RPC_MAXCWND(xprt))
342 cwnd = RPC_MAXCWND(xprt); 337 cwnd = RPC_MAXCWND(xprt);
343 __xprt_lock_write_next(xprt); 338 __xprt_lock_write_next_cong(xprt);
344 } else if (result == -ETIMEDOUT) { 339 } else if (result == -ETIMEDOUT) {
345 cwnd >>= 1; 340 cwnd >>= 1;
346 if (cwnd < RPC_CWNDSCALE) 341 if (cwnd < RPC_CWNDSCALE)
@@ -349,11 +344,89 @@ xprt_adjust_cwnd(struct rpc_xprt *xprt, int result)
349 dprintk("RPC: cong %ld, cwnd was %ld, now %ld\n", 344 dprintk("RPC: cong %ld, cwnd was %ld, now %ld\n",
350 xprt->cong, xprt->cwnd, cwnd); 345 xprt->cong, xprt->cwnd, cwnd);
351 xprt->cwnd = cwnd; 346 xprt->cwnd = cwnd;
347 __xprt_put_cong(xprt, req);
348}
349
350/**
351 * xprt_wake_pending_tasks - wake all tasks on a transport's pending queue
352 * @xprt: transport with waiting tasks
353 * @status: result code to plant in each task before waking it
354 *
355 */
356void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status)
357{
358 if (status < 0)
359 rpc_wake_up_status(&xprt->pending, status);
360 else
361 rpc_wake_up(&xprt->pending);
362}
363
364/**
365 * xprt_wait_for_buffer_space - wait for transport output buffer to clear
366 * @task: task to be put to sleep
367 *
368 */
369void xprt_wait_for_buffer_space(struct rpc_task *task)
370{
371 struct rpc_rqst *req = task->tk_rqstp;
372 struct rpc_xprt *xprt = req->rq_xprt;
373
374 task->tk_timeout = req->rq_timeout;
375 rpc_sleep_on(&xprt->pending, task, NULL, NULL);
376}
377
378/**
379 * xprt_write_space - wake the task waiting for transport output buffer space
380 * @xprt: transport with waiting tasks
381 *
382 * Can be called in a soft IRQ context, so xprt_write_space never sleeps.
383 */
384void xprt_write_space(struct rpc_xprt *xprt)
385{
386 if (unlikely(xprt->shutdown))
387 return;
388
389 spin_lock_bh(&xprt->transport_lock);
390 if (xprt->snd_task) {
391 dprintk("RPC: write space: waking waiting task on xprt %p\n",
392 xprt);
393 rpc_wake_up_task(xprt->snd_task);
394 }
395 spin_unlock_bh(&xprt->transport_lock);
396}
397
398/**
399 * xprt_set_retrans_timeout_def - set a request's retransmit timeout
400 * @task: task whose timeout is to be set
401 *
402 * Set a request's retransmit timeout based on the transport's
403 * default timeout parameters. Used by transports that don't adjust
404 * the retransmit timeout based on round-trip time estimation.
405 */
406void xprt_set_retrans_timeout_def(struct rpc_task *task)
407{
408 task->tk_timeout = task->tk_rqstp->rq_timeout;
352} 409}
353 410
354/* 411/*
355 * Reset the major timeout value 412 * xprt_set_retrans_timeout_rtt - set a request's retransmit timeout
413 * @task: task whose timeout is to be set
414 *
415 * Set a request's retransmit timeout using the RTT estimator.
356 */ 416 */
417void xprt_set_retrans_timeout_rtt(struct rpc_task *task)
418{
419 int timer = task->tk_msg.rpc_proc->p_timer;
420 struct rpc_rtt *rtt = task->tk_client->cl_rtt;
421 struct rpc_rqst *req = task->tk_rqstp;
422 unsigned long max_timeout = req->rq_xprt->timeout.to_maxval;
423
424 task->tk_timeout = rpc_calc_rto(rtt, timer);
425 task->tk_timeout <<= rpc_ntimeo(rtt, timer) + req->rq_retries;
426 if (task->tk_timeout > max_timeout || task->tk_timeout == 0)
427 task->tk_timeout = max_timeout;
428}
429
357static void xprt_reset_majortimeo(struct rpc_rqst *req) 430static void xprt_reset_majortimeo(struct rpc_rqst *req)
358{ 431{
359 struct rpc_timeout *to = &req->rq_xprt->timeout; 432 struct rpc_timeout *to = &req->rq_xprt->timeout;
@@ -368,8 +441,10 @@ static void xprt_reset_majortimeo(struct rpc_rqst *req)
368 req->rq_majortimeo += jiffies; 441 req->rq_majortimeo += jiffies;
369} 442}
370 443
371/* 444/**
372 * Adjust timeout values etc for next retransmit 445 * xprt_adjust_timeout - adjust timeout values for next retransmit
446 * @req: RPC request containing parameters to use for the adjustment
447 *
373 */ 448 */
374int xprt_adjust_timeout(struct rpc_rqst *req) 449int xprt_adjust_timeout(struct rpc_rqst *req)
375{ 450{
@@ -391,9 +466,9 @@ int xprt_adjust_timeout(struct rpc_rqst *req)
391 req->rq_retries = 0; 466 req->rq_retries = 0;
392 xprt_reset_majortimeo(req); 467 xprt_reset_majortimeo(req);
393 /* Reset the RTT counters == "slow start" */ 468 /* Reset the RTT counters == "slow start" */
394 spin_lock_bh(&xprt->sock_lock); 469 spin_lock_bh(&xprt->transport_lock);
395 rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval); 470 rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval);
396 spin_unlock_bh(&xprt->sock_lock); 471 spin_unlock_bh(&xprt->transport_lock);
397 pprintk("RPC: %lu timeout\n", jiffies); 472 pprintk("RPC: %lu timeout\n", jiffies);
398 status = -ETIMEDOUT; 473 status = -ETIMEDOUT;
399 } 474 }
@@ -405,133 +480,52 @@ int xprt_adjust_timeout(struct rpc_rqst *req)
405 return status; 480 return status;
406} 481}
407 482
408/* 483static void xprt_autoclose(void *args)
409 * Close down a transport socket
410 */
411static void
412xprt_close(struct rpc_xprt *xprt)
413{
414 struct socket *sock = xprt->sock;
415 struct sock *sk = xprt->inet;
416
417 if (!sk)
418 return;
419
420 write_lock_bh(&sk->sk_callback_lock);
421 xprt->inet = NULL;
422 xprt->sock = NULL;
423
424 sk->sk_user_data = NULL;
425 sk->sk_data_ready = xprt->old_data_ready;
426 sk->sk_state_change = xprt->old_state_change;
427 sk->sk_write_space = xprt->old_write_space;
428 write_unlock_bh(&sk->sk_callback_lock);
429
430 sk->sk_no_check = 0;
431
432 sock_release(sock);
433}
434
435static void
436xprt_socket_autoclose(void *args)
437{ 484{
438 struct rpc_xprt *xprt = (struct rpc_xprt *)args; 485 struct rpc_xprt *xprt = (struct rpc_xprt *)args;
439 486
440 xprt_disconnect(xprt); 487 xprt_disconnect(xprt);
441 xprt_close(xprt); 488 xprt->ops->close(xprt);
442 xprt_release_write(xprt, NULL); 489 xprt_release_write(xprt, NULL);
443} 490}
444 491
445/* 492/**
446 * Mark a transport as disconnected 493 * xprt_disconnect - mark a transport as disconnected
494 * @xprt: transport to flag for disconnect
495 *
447 */ 496 */
448static void 497void xprt_disconnect(struct rpc_xprt *xprt)
449xprt_disconnect(struct rpc_xprt *xprt)
450{ 498{
451 dprintk("RPC: disconnected transport %p\n", xprt); 499 dprintk("RPC: disconnected transport %p\n", xprt);
452 spin_lock_bh(&xprt->sock_lock); 500 spin_lock_bh(&xprt->transport_lock);
453 xprt_clear_connected(xprt); 501 xprt_clear_connected(xprt);
454 rpc_wake_up_status(&xprt->pending, -ENOTCONN); 502 xprt_wake_pending_tasks(xprt, -ENOTCONN);
455 spin_unlock_bh(&xprt->sock_lock); 503 spin_unlock_bh(&xprt->transport_lock);
456} 504}
457 505
458/*
459 * Used to allow disconnection when we've been idle
460 */
461static void 506static void
462xprt_init_autodisconnect(unsigned long data) 507xprt_init_autodisconnect(unsigned long data)
463{ 508{
464 struct rpc_xprt *xprt = (struct rpc_xprt *)data; 509 struct rpc_xprt *xprt = (struct rpc_xprt *)data;
465 510
466 spin_lock(&xprt->sock_lock); 511 spin_lock(&xprt->transport_lock);
467 if (!list_empty(&xprt->recv) || xprt->shutdown) 512 if (!list_empty(&xprt->recv) || xprt->shutdown)
468 goto out_abort; 513 goto out_abort;
469 if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) 514 if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
470 goto out_abort; 515 goto out_abort;
471 spin_unlock(&xprt->sock_lock); 516 spin_unlock(&xprt->transport_lock);
472 /* Let keventd close the socket */ 517 if (xprt_connecting(xprt))
473 if (test_bit(XPRT_CONNECTING, &xprt->sockstate) != 0)
474 xprt_release_write(xprt, NULL); 518 xprt_release_write(xprt, NULL);
475 else 519 else
476 schedule_work(&xprt->task_cleanup); 520 schedule_work(&xprt->task_cleanup);
477 return; 521 return;
478out_abort: 522out_abort:
479 spin_unlock(&xprt->sock_lock); 523 spin_unlock(&xprt->transport_lock);
480}
481
482static void xprt_socket_connect(void *args)
483{
484 struct rpc_xprt *xprt = (struct rpc_xprt *)args;
485 struct socket *sock = xprt->sock;
486 int status = -EIO;
487
488 if (xprt->shutdown || xprt->addr.sin_port == 0)
489 goto out;
490
491 /*
492 * Start by resetting any existing state
493 */
494 xprt_close(xprt);
495 sock = xprt_create_socket(xprt, xprt->prot, xprt->resvport);
496 if (sock == NULL) {
497 /* couldn't create socket or bind to reserved port;
498 * this is likely a permanent error, so cause an abort */
499 goto out;
500 }
501 xprt_bind_socket(xprt, sock);
502 xprt_sock_setbufsize(xprt);
503
504 status = 0;
505 if (!xprt->stream)
506 goto out;
507
508 /*
509 * Tell the socket layer to start connecting...
510 */
511 status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr,
512 sizeof(xprt->addr), O_NONBLOCK);
513 dprintk("RPC: %p connect status %d connected %d sock state %d\n",
514 xprt, -status, xprt_connected(xprt), sock->sk->sk_state);
515 if (status < 0) {
516 switch (status) {
517 case -EINPROGRESS:
518 case -EALREADY:
519 goto out_clear;
520 }
521 }
522out:
523 if (status < 0)
524 rpc_wake_up_status(&xprt->pending, status);
525 else
526 rpc_wake_up(&xprt->pending);
527out_clear:
528 smp_mb__before_clear_bit();
529 clear_bit(XPRT_CONNECTING, &xprt->sockstate);
530 smp_mb__after_clear_bit();
531} 524}
532 525
533/* 526/**
534 * Attempt to connect a TCP socket. 527 * xprt_connect - schedule a transport connect operation
528 * @task: RPC task that is requesting the connect
535 * 529 *
536 */ 530 */
537void xprt_connect(struct rpc_task *task) 531void xprt_connect(struct rpc_task *task)
@@ -552,37 +546,19 @@ void xprt_connect(struct rpc_task *task)
552 if (!xprt_lock_write(xprt, task)) 546 if (!xprt_lock_write(xprt, task))
553 return; 547 return;
554 if (xprt_connected(xprt)) 548 if (xprt_connected(xprt))
555 goto out_write; 549 xprt_release_write(xprt, task);
550 else {
551 if (task->tk_rqstp)
552 task->tk_rqstp->rq_bytes_sent = 0;
556 553
557 if (task->tk_rqstp) 554 task->tk_timeout = xprt->connect_timeout;
558 task->tk_rqstp->rq_bytes_sent = 0; 555 rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL);
559 556 xprt->ops->connect(task);
560 task->tk_timeout = RPC_CONNECT_TIMEOUT;
561 rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL);
562 if (!test_and_set_bit(XPRT_CONNECTING, &xprt->sockstate)) {
563 /* Note: if we are here due to a dropped connection
564 * we delay reconnecting by RPC_REESTABLISH_TIMEOUT/HZ
565 * seconds
566 */
567 if (xprt->sock != NULL)
568 schedule_delayed_work(&xprt->sock_connect,
569 RPC_REESTABLISH_TIMEOUT);
570 else {
571 schedule_work(&xprt->sock_connect);
572 if (!RPC_IS_ASYNC(task))
573 flush_scheduled_work();
574 }
575 } 557 }
576 return; 558 return;
577 out_write:
578 xprt_release_write(xprt, task);
579} 559}
580 560
581/* 561static void xprt_connect_status(struct rpc_task *task)
582 * We arrive here when awoken from waiting on connection establishment.
583 */
584static void
585xprt_connect_status(struct rpc_task *task)
586{ 562{
587 struct rpc_xprt *xprt = task->tk_xprt; 563 struct rpc_xprt *xprt = task->tk_xprt;
588 564
@@ -592,31 +568,42 @@ xprt_connect_status(struct rpc_task *task)
592 return; 568 return;
593 } 569 }
594 570
595 /* if soft mounted, just cause this RPC to fail */
596 if (RPC_IS_SOFT(task))
597 task->tk_status = -EIO;
598
599 switch (task->tk_status) { 571 switch (task->tk_status) {
600 case -ECONNREFUSED: 572 case -ECONNREFUSED:
601 case -ECONNRESET: 573 case -ECONNRESET:
574 dprintk("RPC: %4d xprt_connect_status: server %s refused connection\n",
575 task->tk_pid, task->tk_client->cl_server);
576 break;
602 case -ENOTCONN: 577 case -ENOTCONN:
603 return; 578 dprintk("RPC: %4d xprt_connect_status: connection broken\n",
579 task->tk_pid);
580 break;
604 case -ETIMEDOUT: 581 case -ETIMEDOUT:
605 dprintk("RPC: %4d xprt_connect_status: timed out\n", 582 dprintk("RPC: %4d xprt_connect_status: connect attempt timed out\n",
606 task->tk_pid); 583 task->tk_pid);
607 break; 584 break;
608 default: 585 default:
609 printk(KERN_ERR "RPC: error %d connecting to server %s\n", 586 dprintk("RPC: %4d xprt_connect_status: error %d connecting to server %s\n",
610 -task->tk_status, task->tk_client->cl_server); 587 task->tk_pid, -task->tk_status, task->tk_client->cl_server);
588 xprt_release_write(xprt, task);
589 task->tk_status = -EIO;
590 return;
591 }
592
593 /* if soft mounted, just cause this RPC to fail */
594 if (RPC_IS_SOFT(task)) {
595 xprt_release_write(xprt, task);
596 task->tk_status = -EIO;
611 } 597 }
612 xprt_release_write(xprt, task);
613} 598}
614 599
615/* 600/**
616 * Look up the RPC request corresponding to a reply, and then lock it. 601 * xprt_lookup_rqst - find an RPC request corresponding to an XID
602 * @xprt: transport on which the original request was transmitted
603 * @xid: RPC XID of incoming reply
604 *
617 */ 605 */
618static inline struct rpc_rqst * 606struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid)
619xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid)
620{ 607{
621 struct list_head *pos; 608 struct list_head *pos;
622 struct rpc_rqst *req = NULL; 609 struct rpc_rqst *req = NULL;
@@ -631,556 +618,68 @@ xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid)
631 return req; 618 return req;
632} 619}
633 620
634/* 621/**
635 * Complete reply received. 622 * xprt_update_rtt - update an RPC client's RTT state after receiving a reply
636 * The TCP code relies on us to remove the request from xprt->pending. 623 * @task: RPC request that recently completed
637 */ 624 *
638static void
639xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied)
640{
641 struct rpc_task *task = req->rq_task;
642 struct rpc_clnt *clnt = task->tk_client;
643
644 /* Adjust congestion window */
645 if (!xprt->nocong) {
646 unsigned timer = task->tk_msg.rpc_proc->p_timer;
647 xprt_adjust_cwnd(xprt, copied);
648 __xprt_put_cong(xprt, req);
649 if (timer) {
650 if (req->rq_ntrans == 1)
651 rpc_update_rtt(clnt->cl_rtt, timer,
652 (long)jiffies - req->rq_xtime);
653 rpc_set_timeo(clnt->cl_rtt, timer, req->rq_ntrans - 1);
654 }
655 }
656
657#ifdef RPC_PROFILE
658 /* Profile only reads for now */
659 if (copied > 1024) {
660 static unsigned long nextstat;
661 static unsigned long pkt_rtt, pkt_len, pkt_cnt;
662
663 pkt_cnt++;
664 pkt_len += req->rq_slen + copied;
665 pkt_rtt += jiffies - req->rq_xtime;
666 if (time_before(nextstat, jiffies)) {
667 printk("RPC: %lu %ld cwnd\n", jiffies, xprt->cwnd);
668 printk("RPC: %ld %ld %ld %ld stat\n",
669 jiffies, pkt_cnt, pkt_len, pkt_rtt);
670 pkt_rtt = pkt_len = pkt_cnt = 0;
671 nextstat = jiffies + 5 * HZ;
672 }
673 }
674#endif
675
676 dprintk("RPC: %4d has input (%d bytes)\n", task->tk_pid, copied);
677 list_del_init(&req->rq_list);
678 req->rq_received = req->rq_private_buf.len = copied;
679
680 /* ... and wake up the process. */
681 rpc_wake_up_task(task);
682 return;
683}
684
685static size_t
686skb_read_bits(skb_reader_t *desc, void *to, size_t len)
687{
688 if (len > desc->count)
689 len = desc->count;
690 if (skb_copy_bits(desc->skb, desc->offset, to, len))
691 return 0;
692 desc->count -= len;
693 desc->offset += len;
694 return len;
695}
696
697static size_t
698skb_read_and_csum_bits(skb_reader_t *desc, void *to, size_t len)
699{
700 unsigned int csum2, pos;
701
702 if (len > desc->count)
703 len = desc->count;
704 pos = desc->offset;
705 csum2 = skb_copy_and_csum_bits(desc->skb, pos, to, len, 0);
706 desc->csum = csum_block_add(desc->csum, csum2, pos);
707 desc->count -= len;
708 desc->offset += len;
709 return len;
710}
711
712/*
713 * We have set things up such that we perform the checksum of the UDP
714 * packet in parallel with the copies into the RPC client iovec. -DaveM
715 */
716int
717csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
718{
719 skb_reader_t desc;
720
721 desc.skb = skb;
722 desc.offset = sizeof(struct udphdr);
723 desc.count = skb->len - desc.offset;
724
725 if (skb->ip_summed == CHECKSUM_UNNECESSARY)
726 goto no_checksum;
727
728 desc.csum = csum_partial(skb->data, desc.offset, skb->csum);
729 if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits) < 0)
730 return -1;
731 if (desc.offset != skb->len) {
732 unsigned int csum2;
733 csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0);
734 desc.csum = csum_block_add(desc.csum, csum2, desc.offset);
735 }
736 if (desc.count)
737 return -1;
738 if ((unsigned short)csum_fold(desc.csum))
739 return -1;
740 return 0;
741no_checksum:
742 if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits) < 0)
743 return -1;
744 if (desc.count)
745 return -1;
746 return 0;
747}
748
749/*
750 * Input handler for RPC replies. Called from a bottom half and hence
751 * atomic.
752 */
753static void
754udp_data_ready(struct sock *sk, int len)
755{
756 struct rpc_task *task;
757 struct rpc_xprt *xprt;
758 struct rpc_rqst *rovr;
759 struct sk_buff *skb;
760 int err, repsize, copied;
761 u32 _xid, *xp;
762
763 read_lock(&sk->sk_callback_lock);
764 dprintk("RPC: udp_data_ready...\n");
765 if (!(xprt = xprt_from_sock(sk))) {
766 printk("RPC: udp_data_ready request not found!\n");
767 goto out;
768 }
769
770 dprintk("RPC: udp_data_ready client %p\n", xprt);
771
772 if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL)
773 goto out;
774
775 if (xprt->shutdown)
776 goto dropit;
777
778 repsize = skb->len - sizeof(struct udphdr);
779 if (repsize < 4) {
780 printk("RPC: impossible RPC reply size %d!\n", repsize);
781 goto dropit;
782 }
783
784 /* Copy the XID from the skb... */
785 xp = skb_header_pointer(skb, sizeof(struct udphdr),
786 sizeof(_xid), &_xid);
787 if (xp == NULL)
788 goto dropit;
789
790 /* Look up and lock the request corresponding to the given XID */
791 spin_lock(&xprt->sock_lock);
792 rovr = xprt_lookup_rqst(xprt, *xp);
793 if (!rovr)
794 goto out_unlock;
795 task = rovr->rq_task;
796
797 dprintk("RPC: %4d received reply\n", task->tk_pid);
798
799 if ((copied = rovr->rq_private_buf.buflen) > repsize)
800 copied = repsize;
801
802 /* Suck it into the iovec, verify checksum if not done by hw. */
803 if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb))
804 goto out_unlock;
805
806 /* Something worked... */
807 dst_confirm(skb->dst);
808
809 xprt_complete_rqst(xprt, rovr, copied);
810
811 out_unlock:
812 spin_unlock(&xprt->sock_lock);
813 dropit:
814 skb_free_datagram(sk, skb);
815 out:
816 read_unlock(&sk->sk_callback_lock);
817}
818
819/*
820 * Copy from an skb into memory and shrink the skb.
821 */
822static inline size_t
823tcp_copy_data(skb_reader_t *desc, void *p, size_t len)
824{
825 if (len > desc->count)
826 len = desc->count;
827 if (skb_copy_bits(desc->skb, desc->offset, p, len)) {
828 dprintk("RPC: failed to copy %zu bytes from skb. %zu bytes remain\n",
829 len, desc->count);
830 return 0;
831 }
832 desc->offset += len;
833 desc->count -= len;
834 dprintk("RPC: copied %zu bytes from skb. %zu bytes remain\n",
835 len, desc->count);
836 return len;
837}
838
839/*
840 * TCP read fragment marker
841 */
842static inline void
843tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc)
844{
845 size_t len, used;
846 char *p;
847
848 p = ((char *) &xprt->tcp_recm) + xprt->tcp_offset;
849 len = sizeof(xprt->tcp_recm) - xprt->tcp_offset;
850 used = tcp_copy_data(desc, p, len);
851 xprt->tcp_offset += used;
852 if (used != len)
853 return;
854 xprt->tcp_reclen = ntohl(xprt->tcp_recm);
855 if (xprt->tcp_reclen & 0x80000000)
856 xprt->tcp_flags |= XPRT_LAST_FRAG;
857 else
858 xprt->tcp_flags &= ~XPRT_LAST_FRAG;
859 xprt->tcp_reclen &= 0x7fffffff;
860 xprt->tcp_flags &= ~XPRT_COPY_RECM;
861 xprt->tcp_offset = 0;
862 /* Sanity check of the record length */
863 if (xprt->tcp_reclen < 4) {
864 printk(KERN_ERR "RPC: Invalid TCP record fragment length\n");
865 xprt_disconnect(xprt);
866 }
867 dprintk("RPC: reading TCP record fragment of length %d\n",
868 xprt->tcp_reclen);
869}
870
871static void
872tcp_check_recm(struct rpc_xprt *xprt)
873{
874 dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u, tcp_flags = %lx\n",
875 xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen, xprt->tcp_flags);
876 if (xprt->tcp_offset == xprt->tcp_reclen) {
877 xprt->tcp_flags |= XPRT_COPY_RECM;
878 xprt->tcp_offset = 0;
879 if (xprt->tcp_flags & XPRT_LAST_FRAG) {
880 xprt->tcp_flags &= ~XPRT_COPY_DATA;
881 xprt->tcp_flags |= XPRT_COPY_XID;
882 xprt->tcp_copied = 0;
883 }
884 }
885}
886
887/*
888 * TCP read xid
889 */
890static inline void
891tcp_read_xid(struct rpc_xprt *xprt, skb_reader_t *desc)
892{
893 size_t len, used;
894 char *p;
895
896 len = sizeof(xprt->tcp_xid) - xprt->tcp_offset;
897 dprintk("RPC: reading XID (%Zu bytes)\n", len);
898 p = ((char *) &xprt->tcp_xid) + xprt->tcp_offset;
899 used = tcp_copy_data(desc, p, len);
900 xprt->tcp_offset += used;
901 if (used != len)
902 return;
903 xprt->tcp_flags &= ~XPRT_COPY_XID;
904 xprt->tcp_flags |= XPRT_COPY_DATA;
905 xprt->tcp_copied = 4;
906 dprintk("RPC: reading reply for XID %08x\n",
907 ntohl(xprt->tcp_xid));
908 tcp_check_recm(xprt);
909}
910
911/*
912 * TCP read and complete request
913 */
914static inline void
915tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc)
916{
917 struct rpc_rqst *req;
918 struct xdr_buf *rcvbuf;
919 size_t len;
920 ssize_t r;
921
922 /* Find and lock the request corresponding to this xid */
923 spin_lock(&xprt->sock_lock);
924 req = xprt_lookup_rqst(xprt, xprt->tcp_xid);
925 if (!req) {
926 xprt->tcp_flags &= ~XPRT_COPY_DATA;
927 dprintk("RPC: XID %08x request not found!\n",
928 ntohl(xprt->tcp_xid));
929 spin_unlock(&xprt->sock_lock);
930 return;
931 }
932
933 rcvbuf = &req->rq_private_buf;
934 len = desc->count;
935 if (len > xprt->tcp_reclen - xprt->tcp_offset) {
936 skb_reader_t my_desc;
937
938 len = xprt->tcp_reclen - xprt->tcp_offset;
939 memcpy(&my_desc, desc, sizeof(my_desc));
940 my_desc.count = len;
941 r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied,
942 &my_desc, tcp_copy_data);
943 desc->count -= r;
944 desc->offset += r;
945 } else
946 r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied,
947 desc, tcp_copy_data);
948
949 if (r > 0) {
950 xprt->tcp_copied += r;
951 xprt->tcp_offset += r;
952 }
953 if (r != len) {
954 /* Error when copying to the receive buffer,
955 * usually because we weren't able to allocate
956 * additional buffer pages. All we can do now
957 * is turn off XPRT_COPY_DATA, so the request
958 * will not receive any additional updates,
959 * and time out.
960 * Any remaining data from this record will
961 * be discarded.
962 */
963 xprt->tcp_flags &= ~XPRT_COPY_DATA;
964 dprintk("RPC: XID %08x truncated request\n",
965 ntohl(xprt->tcp_xid));
966 dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n",
967 xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen);
968 goto out;
969 }
970
971 dprintk("RPC: XID %08x read %Zd bytes\n",
972 ntohl(xprt->tcp_xid), r);
973 dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n",
974 xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen);
975
976 if (xprt->tcp_copied == req->rq_private_buf.buflen)
977 xprt->tcp_flags &= ~XPRT_COPY_DATA;
978 else if (xprt->tcp_offset == xprt->tcp_reclen) {
979 if (xprt->tcp_flags & XPRT_LAST_FRAG)
980 xprt->tcp_flags &= ~XPRT_COPY_DATA;
981 }
982
983out:
984 if (!(xprt->tcp_flags & XPRT_COPY_DATA)) {
985 dprintk("RPC: %4d received reply complete\n",
986 req->rq_task->tk_pid);
987 xprt_complete_rqst(xprt, req, xprt->tcp_copied);
988 }
989 spin_unlock(&xprt->sock_lock);
990 tcp_check_recm(xprt);
991}
992
993/*
994 * TCP discard extra bytes from a short read
995 */
996static inline void
997tcp_read_discard(struct rpc_xprt *xprt, skb_reader_t *desc)
998{
999 size_t len;
1000
1001 len = xprt->tcp_reclen - xprt->tcp_offset;
1002 if (len > desc->count)
1003 len = desc->count;
1004 desc->count -= len;
1005 desc->offset += len;
1006 xprt->tcp_offset += len;
1007 dprintk("RPC: discarded %Zu bytes\n", len);
1008 tcp_check_recm(xprt);
1009}
1010
1011/*
1012 * TCP record receive routine
1013 * We first have to grab the record marker, then the XID, then the data.
1014 */ 625 */
1015static int 626void xprt_update_rtt(struct rpc_task *task)
1016tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
1017 unsigned int offset, size_t len)
1018{
1019 struct rpc_xprt *xprt = rd_desc->arg.data;
1020 skb_reader_t desc = {
1021 .skb = skb,
1022 .offset = offset,
1023 .count = len,
1024 .csum = 0
1025 };
1026
1027 dprintk("RPC: tcp_data_recv\n");
1028 do {
1029 /* Read in a new fragment marker if necessary */
1030 /* Can we ever really expect to get completely empty fragments? */
1031 if (xprt->tcp_flags & XPRT_COPY_RECM) {
1032 tcp_read_fraghdr(xprt, &desc);
1033 continue;
1034 }
1035 /* Read in the xid if necessary */
1036 if (xprt->tcp_flags & XPRT_COPY_XID) {
1037 tcp_read_xid(xprt, &desc);
1038 continue;
1039 }
1040 /* Read in the request data */
1041 if (xprt->tcp_flags & XPRT_COPY_DATA) {
1042 tcp_read_request(xprt, &desc);
1043 continue;
1044 }
1045 /* Skip over any trailing bytes on short reads */
1046 tcp_read_discard(xprt, &desc);
1047 } while (desc.count);
1048 dprintk("RPC: tcp_data_recv done\n");
1049 return len - desc.count;
1050}
1051
1052static void tcp_data_ready(struct sock *sk, int bytes)
1053{ 627{
1054 struct rpc_xprt *xprt; 628 struct rpc_rqst *req = task->tk_rqstp;
1055 read_descriptor_t rd_desc; 629 struct rpc_rtt *rtt = task->tk_client->cl_rtt;
1056 630 unsigned timer = task->tk_msg.rpc_proc->p_timer;
1057 read_lock(&sk->sk_callback_lock);
1058 dprintk("RPC: tcp_data_ready...\n");
1059 if (!(xprt = xprt_from_sock(sk))) {
1060 printk("RPC: tcp_data_ready socket info not found!\n");
1061 goto out;
1062 }
1063 if (xprt->shutdown)
1064 goto out;
1065
1066 /* We use rd_desc to pass struct xprt to tcp_data_recv */
1067 rd_desc.arg.data = xprt;
1068 rd_desc.count = 65536;
1069 tcp_read_sock(sk, &rd_desc, tcp_data_recv);
1070out:
1071 read_unlock(&sk->sk_callback_lock);
1072}
1073
1074static void
1075tcp_state_change(struct sock *sk)
1076{
1077 struct rpc_xprt *xprt;
1078 631
1079 read_lock(&sk->sk_callback_lock); 632 if (timer) {
1080 if (!(xprt = xprt_from_sock(sk))) 633 if (req->rq_ntrans == 1)
1081 goto out; 634 rpc_update_rtt(rtt, timer,
1082 dprintk("RPC: tcp_state_change client %p...\n", xprt); 635 (long)jiffies - req->rq_xtime);
1083 dprintk("RPC: state %x conn %d dead %d zapped %d\n", 636 rpc_set_timeo(rtt, timer, req->rq_ntrans - 1);
1084 sk->sk_state, xprt_connected(xprt),
1085 sock_flag(sk, SOCK_DEAD),
1086 sock_flag(sk, SOCK_ZAPPED));
1087
1088 switch (sk->sk_state) {
1089 case TCP_ESTABLISHED:
1090 spin_lock_bh(&xprt->sock_lock);
1091 if (!xprt_test_and_set_connected(xprt)) {
1092 /* Reset TCP record info */
1093 xprt->tcp_offset = 0;
1094 xprt->tcp_reclen = 0;
1095 xprt->tcp_copied = 0;
1096 xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID;
1097 rpc_wake_up(&xprt->pending);
1098 }
1099 spin_unlock_bh(&xprt->sock_lock);
1100 break;
1101 case TCP_SYN_SENT:
1102 case TCP_SYN_RECV:
1103 break;
1104 default:
1105 xprt_disconnect(xprt);
1106 break;
1107 } 637 }
1108 out:
1109 read_unlock(&sk->sk_callback_lock);
1110} 638}
1111 639
1112/* 640/**
1113 * Called when more output buffer space is available for this socket. 641 * xprt_complete_rqst - called when reply processing is complete
1114 * We try not to wake our writers until they can make "significant" 642 * @task: RPC request that recently completed
1115 * progress, otherwise we'll waste resources thrashing sock_sendmsg 643 * @copied: actual number of bytes received from the transport
1116 * with a bunch of small requests. 644 *
645 * Caller holds transport lock.
1117 */ 646 */
1118static void 647void xprt_complete_rqst(struct rpc_task *task, int copied)
1119xprt_write_space(struct sock *sk)
1120{ 648{
1121 struct rpc_xprt *xprt; 649 struct rpc_rqst *req = task->tk_rqstp;
1122 struct socket *sock;
1123
1124 read_lock(&sk->sk_callback_lock);
1125 if (!(xprt = xprt_from_sock(sk)) || !(sock = sk->sk_socket))
1126 goto out;
1127 if (xprt->shutdown)
1128 goto out;
1129
1130 /* Wait until we have enough socket memory */
1131 if (xprt->stream) {
1132 /* from net/core/stream.c:sk_stream_write_space */
1133 if (sk_stream_wspace(sk) < sk_stream_min_wspace(sk))
1134 goto out;
1135 } else {
1136 /* from net/core/sock.c:sock_def_write_space */
1137 if (!sock_writeable(sk))
1138 goto out;
1139 }
1140 650
1141 if (!test_and_clear_bit(SOCK_NOSPACE, &sock->flags)) 651 dprintk("RPC: %5u xid %08x complete (%d bytes received)\n",
1142 goto out; 652 task->tk_pid, ntohl(req->rq_xid), copied);
1143 653
1144 spin_lock_bh(&xprt->sock_lock); 654 list_del_init(&req->rq_list);
1145 if (xprt->snd_task) 655 req->rq_received = req->rq_private_buf.len = copied;
1146 rpc_wake_up_task(xprt->snd_task); 656 rpc_wake_up_task(task);
1147 spin_unlock_bh(&xprt->sock_lock);
1148out:
1149 read_unlock(&sk->sk_callback_lock);
1150} 657}
1151 658
1152/* 659static void xprt_timer(struct rpc_task *task)
1153 * RPC receive timeout handler.
1154 */
1155static void
1156xprt_timer(struct rpc_task *task)
1157{ 660{
1158 struct rpc_rqst *req = task->tk_rqstp; 661 struct rpc_rqst *req = task->tk_rqstp;
1159 struct rpc_xprt *xprt = req->rq_xprt; 662 struct rpc_xprt *xprt = req->rq_xprt;
1160 663
1161 spin_lock(&xprt->sock_lock); 664 dprintk("RPC: %4d xprt_timer\n", task->tk_pid);
1162 if (req->rq_received)
1163 goto out;
1164
1165 xprt_adjust_cwnd(req->rq_xprt, -ETIMEDOUT);
1166 __xprt_put_cong(xprt, req);
1167 665
1168 dprintk("RPC: %4d xprt_timer (%s request)\n", 666 spin_lock(&xprt->transport_lock);
1169 task->tk_pid, req ? "pending" : "backlogged"); 667 if (!req->rq_received) {
1170 668 if (xprt->ops->timer)
1171 task->tk_status = -ETIMEDOUT; 669 xprt->ops->timer(task);
1172out: 670 task->tk_status = -ETIMEDOUT;
671 }
1173 task->tk_timeout = 0; 672 task->tk_timeout = 0;
1174 rpc_wake_up_task(task); 673 rpc_wake_up_task(task);
1175 spin_unlock(&xprt->sock_lock); 674 spin_unlock(&xprt->transport_lock);
1176} 675}
1177 676
1178/* 677/**
1179 * Place the actual RPC call. 678 * xprt_prepare_transmit - reserve the transport before sending a request
1180 * We have to copy the iovec because sendmsg fiddles with its contents. 679 * @task: RPC task about to send a request
680 *
1181 */ 681 */
1182int 682int xprt_prepare_transmit(struct rpc_task *task)
1183xprt_prepare_transmit(struct rpc_task *task)
1184{ 683{
1185 struct rpc_rqst *req = task->tk_rqstp; 684 struct rpc_rqst *req = task->tk_rqstp;
1186 struct rpc_xprt *xprt = req->rq_xprt; 685 struct rpc_xprt *xprt = req->rq_xprt;
@@ -1191,12 +690,12 @@ xprt_prepare_transmit(struct rpc_task *task)
1191 if (xprt->shutdown) 690 if (xprt->shutdown)
1192 return -EIO; 691 return -EIO;
1193 692
1194 spin_lock_bh(&xprt->sock_lock); 693 spin_lock_bh(&xprt->transport_lock);
1195 if (req->rq_received && !req->rq_bytes_sent) { 694 if (req->rq_received && !req->rq_bytes_sent) {
1196 err = req->rq_received; 695 err = req->rq_received;
1197 goto out_unlock; 696 goto out_unlock;
1198 } 697 }
1199 if (!__xprt_lock_write(xprt, task)) { 698 if (!xprt->ops->reserve_xprt(task)) {
1200 err = -EAGAIN; 699 err = -EAGAIN;
1201 goto out_unlock; 700 goto out_unlock;
1202 } 701 }
@@ -1206,39 +705,42 @@ xprt_prepare_transmit(struct rpc_task *task)
1206 goto out_unlock; 705 goto out_unlock;
1207 } 706 }
1208out_unlock: 707out_unlock:
1209 spin_unlock_bh(&xprt->sock_lock); 708 spin_unlock_bh(&xprt->transport_lock);
1210 return err; 709 return err;
1211} 710}
1212 711
1213void 712void
1214xprt_transmit(struct rpc_task *task) 713xprt_abort_transmit(struct rpc_task *task)
714{
715 struct rpc_xprt *xprt = task->tk_xprt;
716
717 xprt_release_write(xprt, task);
718}
719
720/**
721 * xprt_transmit - send an RPC request on a transport
722 * @task: controlling RPC task
723 *
724 * We have to copy the iovec because sendmsg fiddles with its contents.
725 */
726void xprt_transmit(struct rpc_task *task)
1215{ 727{
1216 struct rpc_clnt *clnt = task->tk_client;
1217 struct rpc_rqst *req = task->tk_rqstp; 728 struct rpc_rqst *req = task->tk_rqstp;
1218 struct rpc_xprt *xprt = req->rq_xprt; 729 struct rpc_xprt *xprt = req->rq_xprt;
1219 int status, retry = 0; 730 int status;
1220
1221 731
1222 dprintk("RPC: %4d xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); 732 dprintk("RPC: %4d xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
1223 733
1224 /* set up everything as needed. */
1225 /* Write the record marker */
1226 if (xprt->stream) {
1227 u32 *marker = req->rq_svec[0].iov_base;
1228
1229 *marker = htonl(0x80000000|(req->rq_slen-sizeof(*marker)));
1230 }
1231
1232 smp_rmb(); 734 smp_rmb();
1233 if (!req->rq_received) { 735 if (!req->rq_received) {
1234 if (list_empty(&req->rq_list)) { 736 if (list_empty(&req->rq_list)) {
1235 spin_lock_bh(&xprt->sock_lock); 737 spin_lock_bh(&xprt->transport_lock);
1236 /* Update the softirq receive buffer */ 738 /* Update the softirq receive buffer */
1237 memcpy(&req->rq_private_buf, &req->rq_rcv_buf, 739 memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
1238 sizeof(req->rq_private_buf)); 740 sizeof(req->rq_private_buf));
1239 /* Add request to the receive list */ 741 /* Add request to the receive list */
1240 list_add_tail(&req->rq_list, &xprt->recv); 742 list_add_tail(&req->rq_list, &xprt->recv);
1241 spin_unlock_bh(&xprt->sock_lock); 743 spin_unlock_bh(&xprt->transport_lock);
1242 xprt_reset_majortimeo(req); 744 xprt_reset_majortimeo(req);
1243 /* Turn off autodisconnect */ 745 /* Turn off autodisconnect */
1244 del_singleshot_timer_sync(&xprt->timer); 746 del_singleshot_timer_sync(&xprt->timer);
@@ -1246,40 +748,19 @@ xprt_transmit(struct rpc_task *task)
1246 } else if (!req->rq_bytes_sent) 748 } else if (!req->rq_bytes_sent)
1247 return; 749 return;
1248 750
1249 /* Continue transmitting the packet/record. We must be careful 751 status = xprt->ops->send_request(task);
1250 * to cope with writespace callbacks arriving _after_ we have 752 if (status == 0) {
1251 * called xprt_sendmsg(). 753 dprintk("RPC: %4d xmit complete\n", task->tk_pid);
1252 */ 754 spin_lock_bh(&xprt->transport_lock);
1253 while (1) { 755 xprt->ops->set_retrans_timeout(task);
1254 req->rq_xtime = jiffies; 756 /* Don't race with disconnect */
1255 status = xprt_sendmsg(xprt, req); 757 if (!xprt_connected(xprt))
1256 758 task->tk_status = -ENOTCONN;
1257 if (status < 0) 759 else if (!req->rq_received)
1258 break; 760 rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer);
1259 761 xprt->ops->release_xprt(xprt, task);
1260 if (xprt->stream) { 762 spin_unlock_bh(&xprt->transport_lock);
1261 req->rq_bytes_sent += status; 763 return;
1262
1263 /* If we've sent the entire packet, immediately
1264 * reset the count of bytes sent. */
1265 if (req->rq_bytes_sent >= req->rq_slen) {
1266 req->rq_bytes_sent = 0;
1267 goto out_receive;
1268 }
1269 } else {
1270 if (status >= req->rq_slen)
1271 goto out_receive;
1272 status = -EAGAIN;
1273 break;
1274 }
1275
1276 dprintk("RPC: %4d xmit incomplete (%d left of %d)\n",
1277 task->tk_pid, req->rq_slen - req->rq_bytes_sent,
1278 req->rq_slen);
1279
1280 status = -EAGAIN;
1281 if (retry++ > 50)
1282 break;
1283 } 764 }
1284 765
1285 /* Note: at this point, task->tk_sleeping has not yet been set, 766 /* Note: at this point, task->tk_sleeping has not yet been set,
@@ -1289,60 +770,19 @@ xprt_transmit(struct rpc_task *task)
1289 task->tk_status = status; 770 task->tk_status = status;
1290 771
1291 switch (status) { 772 switch (status) {
1292 case -EAGAIN:
1293 if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) {
1294 /* Protect against races with xprt_write_space */
1295 spin_lock_bh(&xprt->sock_lock);
1296 /* Don't race with disconnect */
1297 if (!xprt_connected(xprt))
1298 task->tk_status = -ENOTCONN;
1299 else if (test_bit(SOCK_NOSPACE, &xprt->sock->flags)) {
1300 task->tk_timeout = req->rq_timeout;
1301 rpc_sleep_on(&xprt->pending, task, NULL, NULL);
1302 }
1303 spin_unlock_bh(&xprt->sock_lock);
1304 return;
1305 }
1306 /* Keep holding the socket if it is blocked */
1307 rpc_delay(task, HZ>>4);
1308 return;
1309 case -ECONNREFUSED: 773 case -ECONNREFUSED:
1310 task->tk_timeout = RPC_REESTABLISH_TIMEOUT;
1311 rpc_sleep_on(&xprt->sending, task, NULL, NULL); 774 rpc_sleep_on(&xprt->sending, task, NULL, NULL);
775 case -EAGAIN:
1312 case -ENOTCONN: 776 case -ENOTCONN:
1313 return; 777 return;
1314 default: 778 default:
1315 if (xprt->stream) 779 break;
1316 xprt_disconnect(xprt);
1317 } 780 }
1318 xprt_release_write(xprt, task); 781 xprt_release_write(xprt, task);
1319 return; 782 return;
1320 out_receive:
1321 dprintk("RPC: %4d xmit complete\n", task->tk_pid);
1322 /* Set the task's receive timeout value */
1323 spin_lock_bh(&xprt->sock_lock);
1324 if (!xprt->nocong) {
1325 int timer = task->tk_msg.rpc_proc->p_timer;
1326 task->tk_timeout = rpc_calc_rto(clnt->cl_rtt, timer);
1327 task->tk_timeout <<= rpc_ntimeo(clnt->cl_rtt, timer) + req->rq_retries;
1328 if (task->tk_timeout > xprt->timeout.to_maxval || task->tk_timeout == 0)
1329 task->tk_timeout = xprt->timeout.to_maxval;
1330 } else
1331 task->tk_timeout = req->rq_timeout;
1332 /* Don't race with disconnect */
1333 if (!xprt_connected(xprt))
1334 task->tk_status = -ENOTCONN;
1335 else if (!req->rq_received)
1336 rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer);
1337 __xprt_release_write(xprt, task);
1338 spin_unlock_bh(&xprt->sock_lock);
1339} 783}
1340 784
1341/* 785static inline void do_xprt_reserve(struct rpc_task *task)
1342 * Reserve an RPC call slot.
1343 */
1344static inline void
1345do_xprt_reserve(struct rpc_task *task)
1346{ 786{
1347 struct rpc_xprt *xprt = task->tk_xprt; 787 struct rpc_xprt *xprt = task->tk_xprt;
1348 788
@@ -1362,22 +802,25 @@ do_xprt_reserve(struct rpc_task *task)
1362 rpc_sleep_on(&xprt->backlog, task, NULL, NULL); 802 rpc_sleep_on(&xprt->backlog, task, NULL, NULL);
1363} 803}
1364 804
1365void 805/**
1366xprt_reserve(struct rpc_task *task) 806 * xprt_reserve - allocate an RPC request slot
807 * @task: RPC task requesting a slot allocation
808 *
809 * If no more slots are available, place the task on the transport's
810 * backlog queue.
811 */
812void xprt_reserve(struct rpc_task *task)
1367{ 813{
1368 struct rpc_xprt *xprt = task->tk_xprt; 814 struct rpc_xprt *xprt = task->tk_xprt;
1369 815
1370 task->tk_status = -EIO; 816 task->tk_status = -EIO;
1371 if (!xprt->shutdown) { 817 if (!xprt->shutdown) {
1372 spin_lock(&xprt->xprt_lock); 818 spin_lock(&xprt->reserve_lock);
1373 do_xprt_reserve(task); 819 do_xprt_reserve(task);
1374 spin_unlock(&xprt->xprt_lock); 820 spin_unlock(&xprt->reserve_lock);
1375 } 821 }
1376} 822}
1377 823
1378/*
1379 * Allocate a 'unique' XID
1380 */
1381static inline u32 xprt_alloc_xid(struct rpc_xprt *xprt) 824static inline u32 xprt_alloc_xid(struct rpc_xprt *xprt)
1382{ 825{
1383 return xprt->xid++; 826 return xprt->xid++;
@@ -1388,11 +831,7 @@ static inline void xprt_init_xid(struct rpc_xprt *xprt)
1388 get_random_bytes(&xprt->xid, sizeof(xprt->xid)); 831 get_random_bytes(&xprt->xid, sizeof(xprt->xid));
1389} 832}
1390 833
1391/* 834static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
1392 * Initialize RPC request
1393 */
1394static void
1395xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
1396{ 835{
1397 struct rpc_rqst *req = task->tk_rqstp; 836 struct rpc_rqst *req = task->tk_rqstp;
1398 837
@@ -1400,128 +839,104 @@ xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
1400 req->rq_task = task; 839 req->rq_task = task;
1401 req->rq_xprt = xprt; 840 req->rq_xprt = xprt;
1402 req->rq_xid = xprt_alloc_xid(xprt); 841 req->rq_xid = xprt_alloc_xid(xprt);
842 req->rq_release_snd_buf = NULL;
1403 dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid, 843 dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid,
1404 req, ntohl(req->rq_xid)); 844 req, ntohl(req->rq_xid));
1405} 845}
1406 846
1407/* 847/**
1408 * Release an RPC call slot 848 * xprt_release - release an RPC request slot
849 * @task: task which is finished with the slot
850 *
1409 */ 851 */
1410void 852void xprt_release(struct rpc_task *task)
1411xprt_release(struct rpc_task *task)
1412{ 853{
1413 struct rpc_xprt *xprt = task->tk_xprt; 854 struct rpc_xprt *xprt = task->tk_xprt;
1414 struct rpc_rqst *req; 855 struct rpc_rqst *req;
1415 856
1416 if (!(req = task->tk_rqstp)) 857 if (!(req = task->tk_rqstp))
1417 return; 858 return;
1418 spin_lock_bh(&xprt->sock_lock); 859 spin_lock_bh(&xprt->transport_lock);
1419 __xprt_release_write(xprt, task); 860 xprt->ops->release_xprt(xprt, task);
1420 __xprt_put_cong(xprt, req); 861 if (xprt->ops->release_request)
862 xprt->ops->release_request(task);
1421 if (!list_empty(&req->rq_list)) 863 if (!list_empty(&req->rq_list))
1422 list_del(&req->rq_list); 864 list_del(&req->rq_list);
1423 xprt->last_used = jiffies; 865 xprt->last_used = jiffies;
1424 if (list_empty(&xprt->recv) && !xprt->shutdown) 866 if (list_empty(&xprt->recv) && !xprt->shutdown)
1425 mod_timer(&xprt->timer, xprt->last_used + XPRT_IDLE_TIMEOUT); 867 mod_timer(&xprt->timer,
1426 spin_unlock_bh(&xprt->sock_lock); 868 xprt->last_used + xprt->idle_timeout);
869 spin_unlock_bh(&xprt->transport_lock);
1427 task->tk_rqstp = NULL; 870 task->tk_rqstp = NULL;
871 if (req->rq_release_snd_buf)
872 req->rq_release_snd_buf(req);
1428 memset(req, 0, sizeof(*req)); /* mark unused */ 873 memset(req, 0, sizeof(*req)); /* mark unused */
1429 874
1430 dprintk("RPC: %4d release request %p\n", task->tk_pid, req); 875 dprintk("RPC: %4d release request %p\n", task->tk_pid, req);
1431 876
1432 spin_lock(&xprt->xprt_lock); 877 spin_lock(&xprt->reserve_lock);
1433 list_add(&req->rq_list, &xprt->free); 878 list_add(&req->rq_list, &xprt->free);
1434 xprt_clear_backlog(xprt); 879 rpc_wake_up_next(&xprt->backlog);
1435 spin_unlock(&xprt->xprt_lock); 880 spin_unlock(&xprt->reserve_lock);
1436}
1437
1438/*
1439 * Set default timeout parameters
1440 */
1441static void
1442xprt_default_timeout(struct rpc_timeout *to, int proto)
1443{
1444 if (proto == IPPROTO_UDP)
1445 xprt_set_timeout(to, 5, 5 * HZ);
1446 else
1447 xprt_set_timeout(to, 5, 60 * HZ);
1448} 881}
1449 882
1450/* 883/**
1451 * Set constant timeout 884 * xprt_set_timeout - set constant RPC timeout
885 * @to: RPC timeout parameters to set up
886 * @retr: number of retries
887 * @incr: amount of increase after each retry
888 *
1452 */ 889 */
1453void 890void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr)
1454xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr)
1455{ 891{
1456 to->to_initval = 892 to->to_initval =
1457 to->to_increment = incr; 893 to->to_increment = incr;
1458 to->to_maxval = incr * retr; 894 to->to_maxval = to->to_initval + (incr * retr);
1459 to->to_retries = retr; 895 to->to_retries = retr;
1460 to->to_exponential = 0; 896 to->to_exponential = 0;
1461} 897}
1462 898
1463unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE; 899static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to)
1464unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE;
1465
1466/*
1467 * Initialize an RPC client
1468 */
1469static struct rpc_xprt *
1470xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to)
1471{ 900{
901 int result;
1472 struct rpc_xprt *xprt; 902 struct rpc_xprt *xprt;
1473 unsigned int entries;
1474 size_t slot_table_size;
1475 struct rpc_rqst *req; 903 struct rpc_rqst *req;
1476 904
1477 dprintk("RPC: setting up %s transport...\n",
1478 proto == IPPROTO_UDP? "UDP" : "TCP");
1479
1480 entries = (proto == IPPROTO_TCP)?
1481 xprt_tcp_slot_table_entries : xprt_udp_slot_table_entries;
1482
1483 if ((xprt = kmalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) 905 if ((xprt = kmalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL)
1484 return ERR_PTR(-ENOMEM); 906 return ERR_PTR(-ENOMEM);
1485 memset(xprt, 0, sizeof(*xprt)); /* Nnnngh! */ 907 memset(xprt, 0, sizeof(*xprt)); /* Nnnngh! */
1486 xprt->max_reqs = entries;
1487 slot_table_size = entries * sizeof(xprt->slot[0]);
1488 xprt->slot = kmalloc(slot_table_size, GFP_KERNEL);
1489 if (xprt->slot == NULL) {
1490 kfree(xprt);
1491 return ERR_PTR(-ENOMEM);
1492 }
1493 memset(xprt->slot, 0, slot_table_size);
1494 908
1495 xprt->addr = *ap; 909 xprt->addr = *ap;
1496 xprt->prot = proto; 910
1497 xprt->stream = (proto == IPPROTO_TCP)? 1 : 0; 911 switch (proto) {
1498 if (xprt->stream) { 912 case IPPROTO_UDP:
1499 xprt->cwnd = RPC_MAXCWND(xprt); 913 result = xs_setup_udp(xprt, to);
1500 xprt->nocong = 1; 914 break;
1501 xprt->max_payload = (1U << 31) - 1; 915 case IPPROTO_TCP:
1502 } else { 916 result = xs_setup_tcp(xprt, to);
1503 xprt->cwnd = RPC_INITCWND; 917 break;
1504 xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); 918 default:
919 printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n",
920 proto);
921 result = -EIO;
922 break;
923 }
924 if (result) {
925 kfree(xprt);
926 return ERR_PTR(result);
1505 } 927 }
1506 spin_lock_init(&xprt->sock_lock); 928
1507 spin_lock_init(&xprt->xprt_lock); 929 spin_lock_init(&xprt->transport_lock);
1508 init_waitqueue_head(&xprt->cong_wait); 930 spin_lock_init(&xprt->reserve_lock);
1509 931
1510 INIT_LIST_HEAD(&xprt->free); 932 INIT_LIST_HEAD(&xprt->free);
1511 INIT_LIST_HEAD(&xprt->recv); 933 INIT_LIST_HEAD(&xprt->recv);
1512 INIT_WORK(&xprt->sock_connect, xprt_socket_connect, xprt); 934 INIT_WORK(&xprt->task_cleanup, xprt_autoclose, xprt);
1513 INIT_WORK(&xprt->task_cleanup, xprt_socket_autoclose, xprt);
1514 init_timer(&xprt->timer); 935 init_timer(&xprt->timer);
1515 xprt->timer.function = xprt_init_autodisconnect; 936 xprt->timer.function = xprt_init_autodisconnect;
1516 xprt->timer.data = (unsigned long) xprt; 937 xprt->timer.data = (unsigned long) xprt;
1517 xprt->last_used = jiffies; 938 xprt->last_used = jiffies;
1518 xprt->port = XPRT_MAX_RESVPORT; 939 xprt->cwnd = RPC_INITCWND;
1519
1520 /* Set timeout parameters */
1521 if (to) {
1522 xprt->timeout = *to;
1523 } else
1524 xprt_default_timeout(&xprt->timeout, xprt->prot);
1525 940
1526 rpc_init_wait_queue(&xprt->pending, "xprt_pending"); 941 rpc_init_wait_queue(&xprt->pending, "xprt_pending");
1527 rpc_init_wait_queue(&xprt->sending, "xprt_sending"); 942 rpc_init_wait_queue(&xprt->sending, "xprt_sending");
@@ -1529,139 +944,25 @@ xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to)
1529 rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog"); 944 rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog");
1530 945
1531 /* initialize free list */ 946 /* initialize free list */
1532 for (req = &xprt->slot[entries-1]; req >= &xprt->slot[0]; req--) 947 for (req = &xprt->slot[xprt->max_reqs-1]; req >= &xprt->slot[0]; req--)
1533 list_add(&req->rq_list, &xprt->free); 948 list_add(&req->rq_list, &xprt->free);
1534 949
1535 xprt_init_xid(xprt); 950 xprt_init_xid(xprt);
1536 951
1537 /* Check whether we want to use a reserved port */
1538 xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0;
1539
1540 dprintk("RPC: created transport %p with %u slots\n", xprt, 952 dprintk("RPC: created transport %p with %u slots\n", xprt,
1541 xprt->max_reqs); 953 xprt->max_reqs);
1542 954
1543 return xprt; 955 return xprt;
1544} 956}
1545 957
1546/* 958/**
1547 * Bind to a reserved port 959 * xprt_create_proto - create an RPC client transport
1548 */ 960 * @proto: requested transport protocol
1549static inline int xprt_bindresvport(struct rpc_xprt *xprt, struct socket *sock) 961 * @sap: remote peer's address
1550{ 962 * @to: timeout parameters for new transport
1551 struct sockaddr_in myaddr = { 963 *
1552 .sin_family = AF_INET,
1553 };
1554 int err, port;
1555
1556 /* Were we already bound to a given port? Try to reuse it */
1557 port = xprt->port;
1558 do {
1559 myaddr.sin_port = htons(port);
1560 err = sock->ops->bind(sock, (struct sockaddr *) &myaddr,
1561 sizeof(myaddr));
1562 if (err == 0) {
1563 xprt->port = port;
1564 return 0;
1565 }
1566 if (--port == 0)
1567 port = XPRT_MAX_RESVPORT;
1568 } while (err == -EADDRINUSE && port != xprt->port);
1569
1570 printk("RPC: Can't bind to reserved port (%d).\n", -err);
1571 return err;
1572}
1573
1574static void
1575xprt_bind_socket(struct rpc_xprt *xprt, struct socket *sock)
1576{
1577 struct sock *sk = sock->sk;
1578
1579 if (xprt->inet)
1580 return;
1581
1582 write_lock_bh(&sk->sk_callback_lock);
1583 sk->sk_user_data = xprt;
1584 xprt->old_data_ready = sk->sk_data_ready;
1585 xprt->old_state_change = sk->sk_state_change;
1586 xprt->old_write_space = sk->sk_write_space;
1587 if (xprt->prot == IPPROTO_UDP) {
1588 sk->sk_data_ready = udp_data_ready;
1589 sk->sk_no_check = UDP_CSUM_NORCV;
1590 xprt_set_connected(xprt);
1591 } else {
1592 tcp_sk(sk)->nonagle = 1; /* disable Nagle's algorithm */
1593 sk->sk_data_ready = tcp_data_ready;
1594 sk->sk_state_change = tcp_state_change;
1595 xprt_clear_connected(xprt);
1596 }
1597 sk->sk_write_space = xprt_write_space;
1598
1599 /* Reset to new socket */
1600 xprt->sock = sock;
1601 xprt->inet = sk;
1602 write_unlock_bh(&sk->sk_callback_lock);
1603
1604 return;
1605}
1606
1607/*
1608 * Set socket buffer length
1609 */
1610void
1611xprt_sock_setbufsize(struct rpc_xprt *xprt)
1612{
1613 struct sock *sk = xprt->inet;
1614
1615 if (xprt->stream)
1616 return;
1617 if (xprt->rcvsize) {
1618 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
1619 sk->sk_rcvbuf = xprt->rcvsize * xprt->max_reqs * 2;
1620 }
1621 if (xprt->sndsize) {
1622 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
1623 sk->sk_sndbuf = xprt->sndsize * xprt->max_reqs * 2;
1624 sk->sk_write_space(sk);
1625 }
1626}
1627
1628/*
1629 * Datastream sockets are created here, but xprt_connect will create
1630 * and connect stream sockets.
1631 */
1632static struct socket * xprt_create_socket(struct rpc_xprt *xprt, int proto, int resvport)
1633{
1634 struct socket *sock;
1635 int type, err;
1636
1637 dprintk("RPC: xprt_create_socket(%s %d)\n",
1638 (proto == IPPROTO_UDP)? "udp" : "tcp", proto);
1639
1640 type = (proto == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
1641
1642 if ((err = sock_create_kern(PF_INET, type, proto, &sock)) < 0) {
1643 printk("RPC: can't create socket (%d).\n", -err);
1644 return NULL;
1645 }
1646
1647 /* If the caller has the capability, bind to a reserved port */
1648 if (resvport && xprt_bindresvport(xprt, sock) < 0) {
1649 printk("RPC: can't bind to reserved port.\n");
1650 goto failed;
1651 }
1652
1653 return sock;
1654
1655failed:
1656 sock_release(sock);
1657 return NULL;
1658}
1659
1660/*
1661 * Create an RPC client transport given the protocol and peer address.
1662 */ 964 */
1663struct rpc_xprt * 965struct rpc_xprt *xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to)
1664xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to)
1665{ 966{
1666 struct rpc_xprt *xprt; 967 struct rpc_xprt *xprt;
1667 968
@@ -1673,46 +974,26 @@ xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to)
1673 return xprt; 974 return xprt;
1674} 975}
1675 976
1676/* 977static void xprt_shutdown(struct rpc_xprt *xprt)
1677 * Prepare for transport shutdown.
1678 */
1679static void
1680xprt_shutdown(struct rpc_xprt *xprt)
1681{ 978{
1682 xprt->shutdown = 1; 979 xprt->shutdown = 1;
1683 rpc_wake_up(&xprt->sending); 980 rpc_wake_up(&xprt->sending);
1684 rpc_wake_up(&xprt->resend); 981 rpc_wake_up(&xprt->resend);
1685 rpc_wake_up(&xprt->pending); 982 xprt_wake_pending_tasks(xprt, -EIO);
1686 rpc_wake_up(&xprt->backlog); 983 rpc_wake_up(&xprt->backlog);
1687 wake_up(&xprt->cong_wait);
1688 del_timer_sync(&xprt->timer); 984 del_timer_sync(&xprt->timer);
1689
1690 /* synchronously wait for connect worker to finish */
1691 cancel_delayed_work(&xprt->sock_connect);
1692 flush_scheduled_work();
1693} 985}
1694 986
1695/* 987/**
1696 * Clear the xprt backlog queue 988 * xprt_destroy - destroy an RPC transport, killing off all requests.
1697 */ 989 * @xprt: transport to destroy
1698static int 990 *
1699xprt_clear_backlog(struct rpc_xprt *xprt) {
1700 rpc_wake_up_next(&xprt->backlog);
1701 wake_up(&xprt->cong_wait);
1702 return 1;
1703}
1704
1705/*
1706 * Destroy an RPC transport, killing off all requests.
1707 */ 991 */
1708int 992int xprt_destroy(struct rpc_xprt *xprt)
1709xprt_destroy(struct rpc_xprt *xprt)
1710{ 993{
1711 dprintk("RPC: destroying transport %p\n", xprt); 994 dprintk("RPC: destroying transport %p\n", xprt);
1712 xprt_shutdown(xprt); 995 xprt_shutdown(xprt);
1713 xprt_disconnect(xprt); 996 xprt->ops->destroy(xprt);
1714 xprt_close(xprt);
1715 kfree(xprt->slot);
1716 kfree(xprt); 997 kfree(xprt);
1717 998
1718 return 0; 999 return 0;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
new file mode 100644
index 000000000000..2e1529217e65
--- /dev/null
+++ b/net/sunrpc/xprtsock.c
@@ -0,0 +1,1252 @@
1/*
2 * linux/net/sunrpc/xprtsock.c
3 *
4 * Client-side transport implementation for sockets.
5 *
6 * TCP callback races fixes (C) 1998 Red Hat Software <alan@redhat.com>
7 * TCP send fixes (C) 1998 Red Hat Software <alan@redhat.com>
8 * TCP NFS related read + write fixes
9 * (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie>
10 *
11 * Rewrite of larges part of the code in order to stabilize TCP stuff.
12 * Fix behaviour when socket buffer is full.
13 * (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no>
14 *
15 * IP socket transport implementation, (C) 2005 Chuck Lever <cel@netapp.com>
16 */
17
18#include <linux/types.h>
19#include <linux/slab.h>
20#include <linux/capability.h>
21#include <linux/sched.h>
22#include <linux/pagemap.h>
23#include <linux/errno.h>
24#include <linux/socket.h>
25#include <linux/in.h>
26#include <linux/net.h>
27#include <linux/mm.h>
28#include <linux/udp.h>
29#include <linux/tcp.h>
30#include <linux/sunrpc/clnt.h>
31#include <linux/file.h>
32
33#include <net/sock.h>
34#include <net/checksum.h>
35#include <net/udp.h>
36#include <net/tcp.h>
37
38/*
39 * How many times to try sending a request on a socket before waiting
40 * for the socket buffer to clear.
41 */
42#define XS_SENDMSG_RETRY (10U)
43
44/*
45 * Time out for an RPC UDP socket connect. UDP socket connects are
46 * synchronous, but we set a timeout anyway in case of resource
47 * exhaustion on the local host.
48 */
49#define XS_UDP_CONN_TO (5U * HZ)
50
51/*
52 * Wait duration for an RPC TCP connection to be established. Solaris
53 * NFS over TCP uses 60 seconds, for example, which is in line with how
54 * long a server takes to reboot.
55 */
56#define XS_TCP_CONN_TO (60U * HZ)
57
58/*
59 * Wait duration for a reply from the RPC portmapper.
60 */
61#define XS_BIND_TO (60U * HZ)
62
63/*
64 * Delay if a UDP socket connect error occurs. This is most likely some
65 * kind of resource problem on the local host.
66 */
67#define XS_UDP_REEST_TO (2U * HZ)
68
69/*
70 * The reestablish timeout allows clients to delay for a bit before attempting
71 * to reconnect to a server that just dropped our connection.
72 *
73 * We implement an exponential backoff when trying to reestablish a TCP
74 * transport connection with the server. Some servers like to drop a TCP
75 * connection when they are overworked, so we start with a short timeout and
76 * increase over time if the server is down or not responding.
77 */
78#define XS_TCP_INIT_REEST_TO (3U * HZ)
79#define XS_TCP_MAX_REEST_TO (5U * 60 * HZ)
80
81/*
82 * TCP idle timeout; client drops the transport socket if it is idle
83 * for this long. Note that we also timeout UDP sockets to prevent
84 * holding port numbers when there is no RPC traffic.
85 */
86#define XS_IDLE_DISC_TO (5U * 60 * HZ)
87
88#ifdef RPC_DEBUG
89# undef RPC_DEBUG_DATA
90# define RPCDBG_FACILITY RPCDBG_TRANS
91#endif
92
93#ifdef RPC_DEBUG_DATA
94static void xs_pktdump(char *msg, u32 *packet, unsigned int count)
95{
96 u8 *buf = (u8 *) packet;
97 int j;
98
99 dprintk("RPC: %s\n", msg);
100 for (j = 0; j < count && j < 128; j += 4) {
101 if (!(j & 31)) {
102 if (j)
103 dprintk("\n");
104 dprintk("0x%04x ", j);
105 }
106 dprintk("%02x%02x%02x%02x ",
107 buf[j], buf[j+1], buf[j+2], buf[j+3]);
108 }
109 dprintk("\n");
110}
111#else
112static inline void xs_pktdump(char *msg, u32 *packet, unsigned int count)
113{
114 /* NOP */
115}
116#endif
117
118#define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL)
119
120static inline int xs_send_head(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, unsigned int len)
121{
122 struct kvec iov = {
123 .iov_base = xdr->head[0].iov_base + base,
124 .iov_len = len - base,
125 };
126 struct msghdr msg = {
127 .msg_name = addr,
128 .msg_namelen = addrlen,
129 .msg_flags = XS_SENDMSG_FLAGS,
130 };
131
132 if (xdr->len > len)
133 msg.msg_flags |= MSG_MORE;
134
135 if (likely(iov.iov_len))
136 return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len);
137 return kernel_sendmsg(sock, &msg, NULL, 0, 0);
138}
139
140static int xs_send_tail(struct socket *sock, struct xdr_buf *xdr, unsigned int base, unsigned int len)
141{
142 struct kvec iov = {
143 .iov_base = xdr->tail[0].iov_base + base,
144 .iov_len = len - base,
145 };
146 struct msghdr msg = {
147 .msg_flags = XS_SENDMSG_FLAGS,
148 };
149
150 return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len);
151}
152
153/**
154 * xs_sendpages - write pages directly to a socket
155 * @sock: socket to send on
156 * @addr: UDP only -- address of destination
157 * @addrlen: UDP only -- length of destination address
158 * @xdr: buffer containing this request
159 * @base: starting position in the buffer
160 *
161 */
162static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base)
163{
164 struct page **ppage = xdr->pages;
165 unsigned int len, pglen = xdr->page_len;
166 int err, ret = 0;
167 ssize_t (*sendpage)(struct socket *, struct page *, int, size_t, int);
168
169 if (unlikely(!sock))
170 return -ENOTCONN;
171
172 clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
173
174 len = xdr->head[0].iov_len;
175 if (base < len || (addr != NULL && base == 0)) {
176 err = xs_send_head(sock, addr, addrlen, xdr, base, len);
177 if (ret == 0)
178 ret = err;
179 else if (err > 0)
180 ret += err;
181 if (err != (len - base))
182 goto out;
183 base = 0;
184 } else
185 base -= len;
186
187 if (unlikely(pglen == 0))
188 goto copy_tail;
189 if (unlikely(base >= pglen)) {
190 base -= pglen;
191 goto copy_tail;
192 }
193 if (base || xdr->page_base) {
194 pglen -= base;
195 base += xdr->page_base;
196 ppage += base >> PAGE_CACHE_SHIFT;
197 base &= ~PAGE_CACHE_MASK;
198 }
199
200 sendpage = sock->ops->sendpage ? : sock_no_sendpage;
201 do {
202 int flags = XS_SENDMSG_FLAGS;
203
204 len = PAGE_CACHE_SIZE;
205 if (base)
206 len -= base;
207 if (pglen < len)
208 len = pglen;
209
210 if (pglen != len || xdr->tail[0].iov_len != 0)
211 flags |= MSG_MORE;
212
213 /* Hmm... We might be dealing with highmem pages */
214 if (PageHighMem(*ppage))
215 sendpage = sock_no_sendpage;
216 err = sendpage(sock, *ppage, base, len, flags);
217 if (ret == 0)
218 ret = err;
219 else if (err > 0)
220 ret += err;
221 if (err != len)
222 goto out;
223 base = 0;
224 ppage++;
225 } while ((pglen -= len) != 0);
226copy_tail:
227 len = xdr->tail[0].iov_len;
228 if (base < len) {
229 err = xs_send_tail(sock, xdr, base, len);
230 if (ret == 0)
231 ret = err;
232 else if (err > 0)
233 ret += err;
234 }
235out:
236 return ret;
237}
238
239/**
240 * xs_nospace - place task on wait queue if transmit was incomplete
241 * @task: task to put to sleep
242 *
243 */
244static void xs_nospace(struct rpc_task *task)
245{
246 struct rpc_rqst *req = task->tk_rqstp;
247 struct rpc_xprt *xprt = req->rq_xprt;
248
249 dprintk("RPC: %4d xmit incomplete (%u left of %u)\n",
250 task->tk_pid, req->rq_slen - req->rq_bytes_sent,
251 req->rq_slen);
252
253 if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) {
254 /* Protect against races with write_space */
255 spin_lock_bh(&xprt->transport_lock);
256
257 /* Don't race with disconnect */
258 if (!xprt_connected(xprt))
259 task->tk_status = -ENOTCONN;
260 else if (test_bit(SOCK_NOSPACE, &xprt->sock->flags))
261 xprt_wait_for_buffer_space(task);
262
263 spin_unlock_bh(&xprt->transport_lock);
264 } else
265 /* Keep holding the socket if it is blocked */
266 rpc_delay(task, HZ>>4);
267}
268
269/**
270 * xs_udp_send_request - write an RPC request to a UDP socket
271 * @task: address of RPC task that manages the state of an RPC request
272 *
273 * Return values:
274 * 0: The request has been sent
275 * EAGAIN: The socket was blocked, please call again later to
276 * complete the request
277 * ENOTCONN: Caller needs to invoke connect logic then call again
278 * other: Some other error occured, the request was not sent
279 */
280static int xs_udp_send_request(struct rpc_task *task)
281{
282 struct rpc_rqst *req = task->tk_rqstp;
283 struct rpc_xprt *xprt = req->rq_xprt;
284 struct xdr_buf *xdr = &req->rq_snd_buf;
285 int status;
286
287 xs_pktdump("packet data:",
288 req->rq_svec->iov_base,
289 req->rq_svec->iov_len);
290
291 req->rq_xtime = jiffies;
292 status = xs_sendpages(xprt->sock, (struct sockaddr *) &xprt->addr,
293 sizeof(xprt->addr), xdr, req->rq_bytes_sent);
294
295 dprintk("RPC: xs_udp_send_request(%u) = %d\n",
296 xdr->len - req->rq_bytes_sent, status);
297
298 if (likely(status >= (int) req->rq_slen))
299 return 0;
300
301 /* Still some bytes left; set up for a retry later. */
302 if (status > 0)
303 status = -EAGAIN;
304
305 switch (status) {
306 case -ENETUNREACH:
307 case -EPIPE:
308 case -ECONNREFUSED:
309 /* When the server has died, an ICMP port unreachable message
310 * prompts ECONNREFUSED. */
311 break;
312 case -EAGAIN:
313 xs_nospace(task);
314 break;
315 default:
316 dprintk("RPC: sendmsg returned unrecognized error %d\n",
317 -status);
318 break;
319 }
320
321 return status;
322}
323
324static inline void xs_encode_tcp_record_marker(struct xdr_buf *buf)
325{
326 u32 reclen = buf->len - sizeof(rpc_fraghdr);
327 rpc_fraghdr *base = buf->head[0].iov_base;
328 *base = htonl(RPC_LAST_STREAM_FRAGMENT | reclen);
329}
330
331/**
332 * xs_tcp_send_request - write an RPC request to a TCP socket
333 * @task: address of RPC task that manages the state of an RPC request
334 *
335 * Return values:
336 * 0: The request has been sent
337 * EAGAIN: The socket was blocked, please call again later to
338 * complete the request
339 * ENOTCONN: Caller needs to invoke connect logic then call again
340 * other: Some other error occured, the request was not sent
341 *
342 * XXX: In the case of soft timeouts, should we eventually give up
343 * if sendmsg is not able to make progress?
344 */
345static int xs_tcp_send_request(struct rpc_task *task)
346{
347 struct rpc_rqst *req = task->tk_rqstp;
348 struct rpc_xprt *xprt = req->rq_xprt;
349 struct xdr_buf *xdr = &req->rq_snd_buf;
350 int status, retry = 0;
351
352 xs_encode_tcp_record_marker(&req->rq_snd_buf);
353
354 xs_pktdump("packet data:",
355 req->rq_svec->iov_base,
356 req->rq_svec->iov_len);
357
358 /* Continue transmitting the packet/record. We must be careful
359 * to cope with writespace callbacks arriving _after_ we have
360 * called sendmsg(). */
361 while (1) {
362 req->rq_xtime = jiffies;
363 status = xs_sendpages(xprt->sock, NULL, 0, xdr,
364 req->rq_bytes_sent);
365
366 dprintk("RPC: xs_tcp_send_request(%u) = %d\n",
367 xdr->len - req->rq_bytes_sent, status);
368
369 if (unlikely(status < 0))
370 break;
371
372 /* If we've sent the entire packet, immediately
373 * reset the count of bytes sent. */
374 req->rq_bytes_sent += status;
375 if (likely(req->rq_bytes_sent >= req->rq_slen)) {
376 req->rq_bytes_sent = 0;
377 return 0;
378 }
379
380 status = -EAGAIN;
381 if (retry++ > XS_SENDMSG_RETRY)
382 break;
383 }
384
385 switch (status) {
386 case -EAGAIN:
387 xs_nospace(task);
388 break;
389 case -ECONNREFUSED:
390 case -ECONNRESET:
391 case -ENOTCONN:
392 case -EPIPE:
393 status = -ENOTCONN;
394 break;
395 default:
396 dprintk("RPC: sendmsg returned unrecognized error %d\n",
397 -status);
398 xprt_disconnect(xprt);
399 break;
400 }
401
402 return status;
403}
404
405/**
406 * xs_close - close a socket
407 * @xprt: transport
408 *
409 * This is used when all requests are complete; ie, no DRC state remains
410 * on the server we want to save.
411 */
412static void xs_close(struct rpc_xprt *xprt)
413{
414 struct socket *sock = xprt->sock;
415 struct sock *sk = xprt->inet;
416
417 if (!sk)
418 return;
419
420 dprintk("RPC: xs_close xprt %p\n", xprt);
421
422 write_lock_bh(&sk->sk_callback_lock);
423 xprt->inet = NULL;
424 xprt->sock = NULL;
425
426 sk->sk_user_data = NULL;
427 sk->sk_data_ready = xprt->old_data_ready;
428 sk->sk_state_change = xprt->old_state_change;
429 sk->sk_write_space = xprt->old_write_space;
430 write_unlock_bh(&sk->sk_callback_lock);
431
432 sk->sk_no_check = 0;
433
434 sock_release(sock);
435}
436
437/**
438 * xs_destroy - prepare to shutdown a transport
439 * @xprt: doomed transport
440 *
441 */
442static void xs_destroy(struct rpc_xprt *xprt)
443{
444 dprintk("RPC: xs_destroy xprt %p\n", xprt);
445
446 cancel_delayed_work(&xprt->connect_worker);
447 flush_scheduled_work();
448
449 xprt_disconnect(xprt);
450 xs_close(xprt);
451 kfree(xprt->slot);
452}
453
454static inline struct rpc_xprt *xprt_from_sock(struct sock *sk)
455{
456 return (struct rpc_xprt *) sk->sk_user_data;
457}
458
459/**
460 * xs_udp_data_ready - "data ready" callback for UDP sockets
461 * @sk: socket with data to read
462 * @len: how much data to read
463 *
464 */
465static void xs_udp_data_ready(struct sock *sk, int len)
466{
467 struct rpc_task *task;
468 struct rpc_xprt *xprt;
469 struct rpc_rqst *rovr;
470 struct sk_buff *skb;
471 int err, repsize, copied;
472 u32 _xid, *xp;
473
474 read_lock(&sk->sk_callback_lock);
475 dprintk("RPC: xs_udp_data_ready...\n");
476 if (!(xprt = xprt_from_sock(sk)))
477 goto out;
478
479 if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL)
480 goto out;
481
482 if (xprt->shutdown)
483 goto dropit;
484
485 repsize = skb->len - sizeof(struct udphdr);
486 if (repsize < 4) {
487 dprintk("RPC: impossible RPC reply size %d!\n", repsize);
488 goto dropit;
489 }
490
491 /* Copy the XID from the skb... */
492 xp = skb_header_pointer(skb, sizeof(struct udphdr),
493 sizeof(_xid), &_xid);
494 if (xp == NULL)
495 goto dropit;
496
497 /* Look up and lock the request corresponding to the given XID */
498 spin_lock(&xprt->transport_lock);
499 rovr = xprt_lookup_rqst(xprt, *xp);
500 if (!rovr)
501 goto out_unlock;
502 task = rovr->rq_task;
503
504 if ((copied = rovr->rq_private_buf.buflen) > repsize)
505 copied = repsize;
506
507 /* Suck it into the iovec, verify checksum if not done by hw. */
508 if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb))
509 goto out_unlock;
510
511 /* Something worked... */
512 dst_confirm(skb->dst);
513
514 xprt_adjust_cwnd(task, copied);
515 xprt_update_rtt(task);
516 xprt_complete_rqst(task, copied);
517
518 out_unlock:
519 spin_unlock(&xprt->transport_lock);
520 dropit:
521 skb_free_datagram(sk, skb);
522 out:
523 read_unlock(&sk->sk_callback_lock);
524}
525
526static inline size_t xs_tcp_copy_data(skb_reader_t *desc, void *p, size_t len)
527{
528 if (len > desc->count)
529 len = desc->count;
530 if (skb_copy_bits(desc->skb, desc->offset, p, len)) {
531 dprintk("RPC: failed to copy %zu bytes from skb. %zu bytes remain\n",
532 len, desc->count);
533 return 0;
534 }
535 desc->offset += len;
536 desc->count -= len;
537 dprintk("RPC: copied %zu bytes from skb. %zu bytes remain\n",
538 len, desc->count);
539 return len;
540}
541
542static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc)
543{
544 size_t len, used;
545 char *p;
546
547 p = ((char *) &xprt->tcp_recm) + xprt->tcp_offset;
548 len = sizeof(xprt->tcp_recm) - xprt->tcp_offset;
549 used = xs_tcp_copy_data(desc, p, len);
550 xprt->tcp_offset += used;
551 if (used != len)
552 return;
553
554 xprt->tcp_reclen = ntohl(xprt->tcp_recm);
555 if (xprt->tcp_reclen & RPC_LAST_STREAM_FRAGMENT)
556 xprt->tcp_flags |= XPRT_LAST_FRAG;
557 else
558 xprt->tcp_flags &= ~XPRT_LAST_FRAG;
559 xprt->tcp_reclen &= RPC_FRAGMENT_SIZE_MASK;
560
561 xprt->tcp_flags &= ~XPRT_COPY_RECM;
562 xprt->tcp_offset = 0;
563
564 /* Sanity check of the record length */
565 if (unlikely(xprt->tcp_reclen < 4)) {
566 dprintk("RPC: invalid TCP record fragment length\n");
567 xprt_disconnect(xprt);
568 return;
569 }
570 dprintk("RPC: reading TCP record fragment of length %d\n",
571 xprt->tcp_reclen);
572}
573
574static void xs_tcp_check_recm(struct rpc_xprt *xprt)
575{
576 dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u, tcp_flags = %lx\n",
577 xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen, xprt->tcp_flags);
578 if (xprt->tcp_offset == xprt->tcp_reclen) {
579 xprt->tcp_flags |= XPRT_COPY_RECM;
580 xprt->tcp_offset = 0;
581 if (xprt->tcp_flags & XPRT_LAST_FRAG) {
582 xprt->tcp_flags &= ~XPRT_COPY_DATA;
583 xprt->tcp_flags |= XPRT_COPY_XID;
584 xprt->tcp_copied = 0;
585 }
586 }
587}
588
589static inline void xs_tcp_read_xid(struct rpc_xprt *xprt, skb_reader_t *desc)
590{
591 size_t len, used;
592 char *p;
593
594 len = sizeof(xprt->tcp_xid) - xprt->tcp_offset;
595 dprintk("RPC: reading XID (%Zu bytes)\n", len);
596 p = ((char *) &xprt->tcp_xid) + xprt->tcp_offset;
597 used = xs_tcp_copy_data(desc, p, len);
598 xprt->tcp_offset += used;
599 if (used != len)
600 return;
601 xprt->tcp_flags &= ~XPRT_COPY_XID;
602 xprt->tcp_flags |= XPRT_COPY_DATA;
603 xprt->tcp_copied = 4;
604 dprintk("RPC: reading reply for XID %08x\n",
605 ntohl(xprt->tcp_xid));
606 xs_tcp_check_recm(xprt);
607}
608
609static inline void xs_tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc)
610{
611 struct rpc_rqst *req;
612 struct xdr_buf *rcvbuf;
613 size_t len;
614 ssize_t r;
615
616 /* Find and lock the request corresponding to this xid */
617 spin_lock(&xprt->transport_lock);
618 req = xprt_lookup_rqst(xprt, xprt->tcp_xid);
619 if (!req) {
620 xprt->tcp_flags &= ~XPRT_COPY_DATA;
621 dprintk("RPC: XID %08x request not found!\n",
622 ntohl(xprt->tcp_xid));
623 spin_unlock(&xprt->transport_lock);
624 return;
625 }
626
627 rcvbuf = &req->rq_private_buf;
628 len = desc->count;
629 if (len > xprt->tcp_reclen - xprt->tcp_offset) {
630 skb_reader_t my_desc;
631
632 len = xprt->tcp_reclen - xprt->tcp_offset;
633 memcpy(&my_desc, desc, sizeof(my_desc));
634 my_desc.count = len;
635 r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied,
636 &my_desc, xs_tcp_copy_data);
637 desc->count -= r;
638 desc->offset += r;
639 } else
640 r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied,
641 desc, xs_tcp_copy_data);
642
643 if (r > 0) {
644 xprt->tcp_copied += r;
645 xprt->tcp_offset += r;
646 }
647 if (r != len) {
648 /* Error when copying to the receive buffer,
649 * usually because we weren't able to allocate
650 * additional buffer pages. All we can do now
651 * is turn off XPRT_COPY_DATA, so the request
652 * will not receive any additional updates,
653 * and time out.
654 * Any remaining data from this record will
655 * be discarded.
656 */
657 xprt->tcp_flags &= ~XPRT_COPY_DATA;
658 dprintk("RPC: XID %08x truncated request\n",
659 ntohl(xprt->tcp_xid));
660 dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n",
661 xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen);
662 goto out;
663 }
664
665 dprintk("RPC: XID %08x read %Zd bytes\n",
666 ntohl(xprt->tcp_xid), r);
667 dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n",
668 xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen);
669
670 if (xprt->tcp_copied == req->rq_private_buf.buflen)
671 xprt->tcp_flags &= ~XPRT_COPY_DATA;
672 else if (xprt->tcp_offset == xprt->tcp_reclen) {
673 if (xprt->tcp_flags & XPRT_LAST_FRAG)
674 xprt->tcp_flags &= ~XPRT_COPY_DATA;
675 }
676
677out:
678 if (!(xprt->tcp_flags & XPRT_COPY_DATA))
679 xprt_complete_rqst(req->rq_task, xprt->tcp_copied);
680 spin_unlock(&xprt->transport_lock);
681 xs_tcp_check_recm(xprt);
682}
683
684static inline void xs_tcp_read_discard(struct rpc_xprt *xprt, skb_reader_t *desc)
685{
686 size_t len;
687
688 len = xprt->tcp_reclen - xprt->tcp_offset;
689 if (len > desc->count)
690 len = desc->count;
691 desc->count -= len;
692 desc->offset += len;
693 xprt->tcp_offset += len;
694 dprintk("RPC: discarded %Zu bytes\n", len);
695 xs_tcp_check_recm(xprt);
696}
697
698static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, unsigned int offset, size_t len)
699{
700 struct rpc_xprt *xprt = rd_desc->arg.data;
701 skb_reader_t desc = {
702 .skb = skb,
703 .offset = offset,
704 .count = len,
705 .csum = 0
706 };
707
708 dprintk("RPC: xs_tcp_data_recv started\n");
709 do {
710 /* Read in a new fragment marker if necessary */
711 /* Can we ever really expect to get completely empty fragments? */
712 if (xprt->tcp_flags & XPRT_COPY_RECM) {
713 xs_tcp_read_fraghdr(xprt, &desc);
714 continue;
715 }
716 /* Read in the xid if necessary */
717 if (xprt->tcp_flags & XPRT_COPY_XID) {
718 xs_tcp_read_xid(xprt, &desc);
719 continue;
720 }
721 /* Read in the request data */
722 if (xprt->tcp_flags & XPRT_COPY_DATA) {
723 xs_tcp_read_request(xprt, &desc);
724 continue;
725 }
726 /* Skip over any trailing bytes on short reads */
727 xs_tcp_read_discard(xprt, &desc);
728 } while (desc.count);
729 dprintk("RPC: xs_tcp_data_recv done\n");
730 return len - desc.count;
731}
732
733/**
734 * xs_tcp_data_ready - "data ready" callback for TCP sockets
735 * @sk: socket with data to read
736 * @bytes: how much data to read
737 *
738 */
739static void xs_tcp_data_ready(struct sock *sk, int bytes)
740{
741 struct rpc_xprt *xprt;
742 read_descriptor_t rd_desc;
743
744 read_lock(&sk->sk_callback_lock);
745 dprintk("RPC: xs_tcp_data_ready...\n");
746 if (!(xprt = xprt_from_sock(sk)))
747 goto out;
748 if (xprt->shutdown)
749 goto out;
750
751 /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
752 rd_desc.arg.data = xprt;
753 rd_desc.count = 65536;
754 tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
755out:
756 read_unlock(&sk->sk_callback_lock);
757}
758
759/**
760 * xs_tcp_state_change - callback to handle TCP socket state changes
761 * @sk: socket whose state has changed
762 *
763 */
764static void xs_tcp_state_change(struct sock *sk)
765{
766 struct rpc_xprt *xprt;
767
768 read_lock(&sk->sk_callback_lock);
769 if (!(xprt = xprt_from_sock(sk)))
770 goto out;
771 dprintk("RPC: xs_tcp_state_change client %p...\n", xprt);
772 dprintk("RPC: state %x conn %d dead %d zapped %d\n",
773 sk->sk_state, xprt_connected(xprt),
774 sock_flag(sk, SOCK_DEAD),
775 sock_flag(sk, SOCK_ZAPPED));
776
777 switch (sk->sk_state) {
778 case TCP_ESTABLISHED:
779 spin_lock_bh(&xprt->transport_lock);
780 if (!xprt_test_and_set_connected(xprt)) {
781 /* Reset TCP record info */
782 xprt->tcp_offset = 0;
783 xprt->tcp_reclen = 0;
784 xprt->tcp_copied = 0;
785 xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID;
786 xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
787 xprt_wake_pending_tasks(xprt, 0);
788 }
789 spin_unlock_bh(&xprt->transport_lock);
790 break;
791 case TCP_SYN_SENT:
792 case TCP_SYN_RECV:
793 break;
794 default:
795 xprt_disconnect(xprt);
796 break;
797 }
798 out:
799 read_unlock(&sk->sk_callback_lock);
800}
801
802/**
803 * xs_udp_write_space - callback invoked when socket buffer space
804 * becomes available
805 * @sk: socket whose state has changed
806 *
807 * Called when more output buffer space is available for this socket.
808 * We try not to wake our writers until they can make "significant"
809 * progress, otherwise we'll waste resources thrashing kernel_sendmsg
810 * with a bunch of small requests.
811 */
812static void xs_udp_write_space(struct sock *sk)
813{
814 read_lock(&sk->sk_callback_lock);
815
816 /* from net/core/sock.c:sock_def_write_space */
817 if (sock_writeable(sk)) {
818 struct socket *sock;
819 struct rpc_xprt *xprt;
820
821 if (unlikely(!(sock = sk->sk_socket)))
822 goto out;
823 if (unlikely(!(xprt = xprt_from_sock(sk))))
824 goto out;
825 if (unlikely(!test_and_clear_bit(SOCK_NOSPACE, &sock->flags)))
826 goto out;
827
828 xprt_write_space(xprt);
829 }
830
831 out:
832 read_unlock(&sk->sk_callback_lock);
833}
834
835/**
836 * xs_tcp_write_space - callback invoked when socket buffer space
837 * becomes available
838 * @sk: socket whose state has changed
839 *
840 * Called when more output buffer space is available for this socket.
841 * We try not to wake our writers until they can make "significant"
842 * progress, otherwise we'll waste resources thrashing kernel_sendmsg
843 * with a bunch of small requests.
844 */
845static void xs_tcp_write_space(struct sock *sk)
846{
847 read_lock(&sk->sk_callback_lock);
848
849 /* from net/core/stream.c:sk_stream_write_space */
850 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
851 struct socket *sock;
852 struct rpc_xprt *xprt;
853
854 if (unlikely(!(sock = sk->sk_socket)))
855 goto out;
856 if (unlikely(!(xprt = xprt_from_sock(sk))))
857 goto out;
858 if (unlikely(!test_and_clear_bit(SOCK_NOSPACE, &sock->flags)))
859 goto out;
860
861 xprt_write_space(xprt);
862 }
863
864 out:
865 read_unlock(&sk->sk_callback_lock);
866}
867
868static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt)
869{
870 struct sock *sk = xprt->inet;
871
872 if (xprt->rcvsize) {
873 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
874 sk->sk_rcvbuf = xprt->rcvsize * xprt->max_reqs * 2;
875 }
876 if (xprt->sndsize) {
877 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
878 sk->sk_sndbuf = xprt->sndsize * xprt->max_reqs * 2;
879 sk->sk_write_space(sk);
880 }
881}
882
883/**
884 * xs_udp_set_buffer_size - set send and receive limits
885 * @xprt: generic transport
886 * @sndsize: requested size of send buffer, in bytes
887 * @rcvsize: requested size of receive buffer, in bytes
888 *
889 * Set socket send and receive buffer size limits.
890 */
891static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize)
892{
893 xprt->sndsize = 0;
894 if (sndsize)
895 xprt->sndsize = sndsize + 1024;
896 xprt->rcvsize = 0;
897 if (rcvsize)
898 xprt->rcvsize = rcvsize + 1024;
899
900 xs_udp_do_set_buffer_size(xprt);
901}
902
903/**
904 * xs_udp_timer - called when a retransmit timeout occurs on a UDP transport
905 * @task: task that timed out
906 *
907 * Adjust the congestion window after a retransmit timeout has occurred.
908 */
909static void xs_udp_timer(struct rpc_task *task)
910{
911 xprt_adjust_cwnd(task, -ETIMEDOUT);
912}
913
914static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock)
915{
916 struct sockaddr_in myaddr = {
917 .sin_family = AF_INET,
918 };
919 int err;
920 unsigned short port = xprt->port;
921
922 do {
923 myaddr.sin_port = htons(port);
924 err = sock->ops->bind(sock, (struct sockaddr *) &myaddr,
925 sizeof(myaddr));
926 if (err == 0) {
927 xprt->port = port;
928 dprintk("RPC: xs_bindresvport bound to port %u\n",
929 port);
930 return 0;
931 }
932 if (port <= xprt_min_resvport)
933 port = xprt_max_resvport;
934 else
935 port--;
936 } while (err == -EADDRINUSE && port != xprt->port);
937
938 dprintk("RPC: can't bind to reserved port (%d).\n", -err);
939 return err;
940}
941
942/**
943 * xs_udp_connect_worker - set up a UDP socket
944 * @args: RPC transport to connect
945 *
946 * Invoked by a work queue tasklet.
947 */
948static void xs_udp_connect_worker(void *args)
949{
950 struct rpc_xprt *xprt = (struct rpc_xprt *) args;
951 struct socket *sock = xprt->sock;
952 int err, status = -EIO;
953
954 if (xprt->shutdown || xprt->addr.sin_port == 0)
955 goto out;
956
957 dprintk("RPC: xs_udp_connect_worker for xprt %p\n", xprt);
958
959 /* Start by resetting any existing state */
960 xs_close(xprt);
961
962 if ((err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) {
963 dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
964 goto out;
965 }
966
967 if (xprt->resvport && xs_bindresvport(xprt, sock) < 0) {
968 sock_release(sock);
969 goto out;
970 }
971
972 if (!xprt->inet) {
973 struct sock *sk = sock->sk;
974
975 write_lock_bh(&sk->sk_callback_lock);
976
977 sk->sk_user_data = xprt;
978 xprt->old_data_ready = sk->sk_data_ready;
979 xprt->old_state_change = sk->sk_state_change;
980 xprt->old_write_space = sk->sk_write_space;
981 sk->sk_data_ready = xs_udp_data_ready;
982 sk->sk_write_space = xs_udp_write_space;
983 sk->sk_no_check = UDP_CSUM_NORCV;
984
985 xprt_set_connected(xprt);
986
987 /* Reset to new socket */
988 xprt->sock = sock;
989 xprt->inet = sk;
990
991 write_unlock_bh(&sk->sk_callback_lock);
992 }
993 xs_udp_do_set_buffer_size(xprt);
994 status = 0;
995out:
996 xprt_wake_pending_tasks(xprt, status);
997 xprt_clear_connecting(xprt);
998}
999
1000/*
1001 * We need to preserve the port number so the reply cache on the server can
1002 * find our cached RPC replies when we get around to reconnecting.
1003 */
1004static void xs_tcp_reuse_connection(struct rpc_xprt *xprt)
1005{
1006 int result;
1007 struct socket *sock = xprt->sock;
1008 struct sockaddr any;
1009
1010 dprintk("RPC: disconnecting xprt %p to reuse port\n", xprt);
1011
1012 /*
1013 * Disconnect the transport socket by doing a connect operation
1014 * with AF_UNSPEC. This should return immediately...
1015 */
1016 memset(&any, 0, sizeof(any));
1017 any.sa_family = AF_UNSPEC;
1018 result = sock->ops->connect(sock, &any, sizeof(any), 0);
1019 if (result)
1020 dprintk("RPC: AF_UNSPEC connect return code %d\n",
1021 result);
1022}
1023
1024/**
1025 * xs_tcp_connect_worker - connect a TCP socket to a remote endpoint
1026 * @args: RPC transport to connect
1027 *
1028 * Invoked by a work queue tasklet.
1029 */
1030static void xs_tcp_connect_worker(void *args)
1031{
1032 struct rpc_xprt *xprt = (struct rpc_xprt *)args;
1033 struct socket *sock = xprt->sock;
1034 int err, status = -EIO;
1035
1036 if (xprt->shutdown || xprt->addr.sin_port == 0)
1037 goto out;
1038
1039 dprintk("RPC: xs_tcp_connect_worker for xprt %p\n", xprt);
1040
1041 if (!xprt->sock) {
1042 /* start from scratch */
1043 if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
1044 dprintk("RPC: can't create TCP transport socket (%d).\n", -err);
1045 goto out;
1046 }
1047
1048 if (xprt->resvport && xs_bindresvport(xprt, sock) < 0) {
1049 sock_release(sock);
1050 goto out;
1051 }
1052 } else
1053 /* "close" the socket, preserving the local port */
1054 xs_tcp_reuse_connection(xprt);
1055
1056 if (!xprt->inet) {
1057 struct sock *sk = sock->sk;
1058
1059 write_lock_bh(&sk->sk_callback_lock);
1060
1061 sk->sk_user_data = xprt;
1062 xprt->old_data_ready = sk->sk_data_ready;
1063 xprt->old_state_change = sk->sk_state_change;
1064 xprt->old_write_space = sk->sk_write_space;
1065 sk->sk_data_ready = xs_tcp_data_ready;
1066 sk->sk_state_change = xs_tcp_state_change;
1067 sk->sk_write_space = xs_tcp_write_space;
1068
1069 /* socket options */
1070 sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
1071 sock_reset_flag(sk, SOCK_LINGER);
1072 tcp_sk(sk)->linger2 = 0;
1073 tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
1074
1075 xprt_clear_connected(xprt);
1076
1077 /* Reset to new socket */
1078 xprt->sock = sock;
1079 xprt->inet = sk;
1080
1081 write_unlock_bh(&sk->sk_callback_lock);
1082 }
1083
1084 /* Tell the socket layer to start connecting... */
1085 status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr,
1086 sizeof(xprt->addr), O_NONBLOCK);
1087 dprintk("RPC: %p connect status %d connected %d sock state %d\n",
1088 xprt, -status, xprt_connected(xprt), sock->sk->sk_state);
1089 if (status < 0) {
1090 switch (status) {
1091 case -EINPROGRESS:
1092 case -EALREADY:
1093 goto out_clear;
1094 case -ECONNREFUSED:
1095 case -ECONNRESET:
1096 /* retry with existing socket, after a delay */
1097 break;
1098 default:
1099 /* get rid of existing socket, and retry */
1100 xs_close(xprt);
1101 break;
1102 }
1103 }
1104out:
1105 xprt_wake_pending_tasks(xprt, status);
1106out_clear:
1107 xprt_clear_connecting(xprt);
1108}
1109
1110/**
1111 * xs_connect - connect a socket to a remote endpoint
1112 * @task: address of RPC task that manages state of connect request
1113 *
1114 * TCP: If the remote end dropped the connection, delay reconnecting.
1115 *
1116 * UDP socket connects are synchronous, but we use a work queue anyway
1117 * to guarantee that even unprivileged user processes can set up a
1118 * socket on a privileged port.
1119 *
1120 * If a UDP socket connect fails, the delay behavior here prevents
1121 * retry floods (hard mounts).
1122 */
1123static void xs_connect(struct rpc_task *task)
1124{
1125 struct rpc_xprt *xprt = task->tk_xprt;
1126
1127 if (xprt_test_and_set_connecting(xprt))
1128 return;
1129
1130 if (xprt->sock != NULL) {
1131 dprintk("RPC: xs_connect delayed xprt %p for %lu seconds\n",
1132 xprt, xprt->reestablish_timeout / HZ);
1133 schedule_delayed_work(&xprt->connect_worker,
1134 xprt->reestablish_timeout);
1135 xprt->reestablish_timeout <<= 1;
1136 if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO)
1137 xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO;
1138 } else {
1139 dprintk("RPC: xs_connect scheduled xprt %p\n", xprt);
1140 schedule_work(&xprt->connect_worker);
1141
1142 /* flush_scheduled_work can sleep... */
1143 if (!RPC_IS_ASYNC(task))
1144 flush_scheduled_work();
1145 }
1146}
1147
1148static struct rpc_xprt_ops xs_udp_ops = {
1149 .set_buffer_size = xs_udp_set_buffer_size,
1150 .reserve_xprt = xprt_reserve_xprt_cong,
1151 .release_xprt = xprt_release_xprt_cong,
1152 .connect = xs_connect,
1153 .send_request = xs_udp_send_request,
1154 .set_retrans_timeout = xprt_set_retrans_timeout_rtt,
1155 .timer = xs_udp_timer,
1156 .release_request = xprt_release_rqst_cong,
1157 .close = xs_close,
1158 .destroy = xs_destroy,
1159};
1160
1161static struct rpc_xprt_ops xs_tcp_ops = {
1162 .reserve_xprt = xprt_reserve_xprt,
1163 .release_xprt = xprt_release_xprt,
1164 .connect = xs_connect,
1165 .send_request = xs_tcp_send_request,
1166 .set_retrans_timeout = xprt_set_retrans_timeout_def,
1167 .close = xs_close,
1168 .destroy = xs_destroy,
1169};
1170
1171/**
1172 * xs_setup_udp - Set up transport to use a UDP socket
1173 * @xprt: transport to set up
1174 * @to: timeout parameters
1175 *
1176 */
1177int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to)
1178{
1179 size_t slot_table_size;
1180
1181 dprintk("RPC: setting up udp-ipv4 transport...\n");
1182
1183 xprt->max_reqs = xprt_udp_slot_table_entries;
1184 slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]);
1185 xprt->slot = kmalloc(slot_table_size, GFP_KERNEL);
1186 if (xprt->slot == NULL)
1187 return -ENOMEM;
1188 memset(xprt->slot, 0, slot_table_size);
1189
1190 xprt->prot = IPPROTO_UDP;
1191 xprt->port = xprt_max_resvport;
1192 xprt->tsh_size = 0;
1193 xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0;
1194 /* XXX: header size can vary due to auth type, IPv6, etc. */
1195 xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
1196
1197 INIT_WORK(&xprt->connect_worker, xs_udp_connect_worker, xprt);
1198 xprt->bind_timeout = XS_BIND_TO;
1199 xprt->connect_timeout = XS_UDP_CONN_TO;
1200 xprt->reestablish_timeout = XS_UDP_REEST_TO;
1201 xprt->idle_timeout = XS_IDLE_DISC_TO;
1202
1203 xprt->ops = &xs_udp_ops;
1204
1205 if (to)
1206 xprt->timeout = *to;
1207 else
1208 xprt_set_timeout(&xprt->timeout, 5, 5 * HZ);
1209
1210 return 0;
1211}
1212
1213/**
1214 * xs_setup_tcp - Set up transport to use a TCP socket
1215 * @xprt: transport to set up
1216 * @to: timeout parameters
1217 *
1218 */
1219int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to)
1220{
1221 size_t slot_table_size;
1222
1223 dprintk("RPC: setting up tcp-ipv4 transport...\n");
1224
1225 xprt->max_reqs = xprt_tcp_slot_table_entries;
1226 slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]);
1227 xprt->slot = kmalloc(slot_table_size, GFP_KERNEL);
1228 if (xprt->slot == NULL)
1229 return -ENOMEM;
1230 memset(xprt->slot, 0, slot_table_size);
1231
1232 xprt->prot = IPPROTO_TCP;
1233 xprt->port = xprt_max_resvport;
1234 xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
1235 xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0;
1236 xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
1237
1238 INIT_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt);
1239 xprt->bind_timeout = XS_BIND_TO;
1240 xprt->connect_timeout = XS_TCP_CONN_TO;
1241 xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
1242 xprt->idle_timeout = XS_IDLE_DISC_TO;
1243
1244 xprt->ops = &xs_tcp_ops;
1245
1246 if (to)
1247 xprt->timeout = *to;
1248 else
1249 xprt_set_timeout(&xprt->timeout, 2, 60 * HZ);
1250
1251 return 0;
1252}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index cbb0ba34a600..0db9e57013fd 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1192,46 +1192,6 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family)
1192 1192
1193EXPORT_SYMBOL(xfrm_bundle_ok); 1193EXPORT_SYMBOL(xfrm_bundle_ok);
1194 1194
1195/* Well... that's _TASK_. We need to scan through transformation
1196 * list and figure out what mss tcp should generate in order to
1197 * final datagram fit to mtu. Mama mia... :-)
1198 *
1199 * Apparently, some easy way exists, but we used to choose the most
1200 * bizarre ones. :-) So, raising Kalashnikov... tra-ta-ta.
1201 *
1202 * Consider this function as something like dark humour. :-)
1203 */
1204static int xfrm_get_mss(struct dst_entry *dst, u32 mtu)
1205{
1206 int res = mtu - dst->header_len;
1207
1208 for (;;) {
1209 struct dst_entry *d = dst;
1210 int m = res;
1211
1212 do {
1213 struct xfrm_state *x = d->xfrm;
1214 if (x) {
1215 spin_lock_bh(&x->lock);
1216 if (x->km.state == XFRM_STATE_VALID &&
1217 x->type && x->type->get_max_size)
1218 m = x->type->get_max_size(d->xfrm, m);
1219 else
1220 m += x->props.header_len;
1221 spin_unlock_bh(&x->lock);
1222 }
1223 } while ((d = d->child) != NULL);
1224
1225 if (m <= mtu)
1226 break;
1227 res -= (m - mtu);
1228 if (res < 88)
1229 return mtu;
1230 }
1231
1232 return res + dst->header_len;
1233}
1234
1235int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) 1195int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
1236{ 1196{
1237 int err = 0; 1197 int err = 0;
@@ -1252,8 +1212,6 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
1252 dst_ops->negative_advice = xfrm_negative_advice; 1212 dst_ops->negative_advice = xfrm_negative_advice;
1253 if (likely(dst_ops->link_failure == NULL)) 1213 if (likely(dst_ops->link_failure == NULL))
1254 dst_ops->link_failure = xfrm_link_failure; 1214 dst_ops->link_failure = xfrm_link_failure;
1255 if (likely(dst_ops->get_mss == NULL))
1256 dst_ops->get_mss = xfrm_get_mss;
1257 if (likely(afinfo->garbage_collect == NULL)) 1215 if (likely(afinfo->garbage_collect == NULL))
1258 afinfo->garbage_collect = __xfrm_garbage_collect; 1216 afinfo->garbage_collect = __xfrm_garbage_collect;
1259 xfrm_policy_afinfo[afinfo->family] = afinfo; 1217 xfrm_policy_afinfo[afinfo->family] = afinfo;
@@ -1281,7 +1239,6 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
1281 dst_ops->check = NULL; 1239 dst_ops->check = NULL;
1282 dst_ops->negative_advice = NULL; 1240 dst_ops->negative_advice = NULL;
1283 dst_ops->link_failure = NULL; 1241 dst_ops->link_failure = NULL;
1284 dst_ops->get_mss = NULL;
1285 afinfo->garbage_collect = NULL; 1242 afinfo->garbage_collect = NULL;
1286 } 1243 }
1287 } 1244 }
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 9d206c282cf1..8b9a4747417d 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1026,6 +1026,12 @@ void xfrm_state_delete_tunnel(struct xfrm_state *x)
1026} 1026}
1027EXPORT_SYMBOL(xfrm_state_delete_tunnel); 1027EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1028 1028
1029/*
1030 * This function is NOT optimal. For example, with ESP it will give an
1031 * MTU that's usually two bytes short of being optimal. However, it will
1032 * usually give an answer that's a multiple of 4 provided the input is
1033 * also a multiple of 4.
1034 */
1029int xfrm_state_mtu(struct xfrm_state *x, int mtu) 1035int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1030{ 1036{
1031 int res = mtu; 1037 int res = mtu;
diff --git a/security/dummy.c b/security/dummy.c
index 9623a61dfc76..3d34f3de7e82 100644
--- a/security/dummy.c
+++ b/security/dummy.c
@@ -768,7 +768,7 @@ static int dummy_socket_getpeersec(struct socket *sock, char __user *optval,
768 return -ENOPROTOOPT; 768 return -ENOPROTOOPT;
769} 769}
770 770
771static inline int dummy_sk_alloc_security (struct sock *sk, int family, int priority) 771static inline int dummy_sk_alloc_security (struct sock *sk, int family, gfp_t priority)
772{ 772{
773 return 0; 773 return 0;
774} 774}
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index b13be15165f5..447a1e0f48cb 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -262,7 +262,7 @@ static void superblock_free_security(struct super_block *sb)
262} 262}
263 263
264#ifdef CONFIG_SECURITY_NETWORK 264#ifdef CONFIG_SECURITY_NETWORK
265static int sk_alloc_security(struct sock *sk, int family, int priority) 265static int sk_alloc_security(struct sock *sk, int family, gfp_t priority)
266{ 266{
267 struct sk_security_struct *ssec; 267 struct sk_security_struct *ssec;
268 268
@@ -3380,7 +3380,7 @@ out:
3380 return err; 3380 return err;
3381} 3381}
3382 3382
3383static int selinux_sk_alloc_security(struct sock *sk, int family, int priority) 3383static int selinux_sk_alloc_security(struct sock *sk, int family, gfp_t priority)
3384{ 3384{
3385 return sk_alloc_security(sk, family, priority); 3385 return sk_alloc_security(sk, family, priority);
3386} 3386}
diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c
index e72cec77f0db..129abab5ce98 100644
--- a/sound/core/memalloc.c
+++ b/sound/core/memalloc.c
@@ -190,7 +190,7 @@ static void unmark_pages(struct page *page, int order)
190 * 190 *
191 * Returns the pointer of the buffer, or NULL if no enoguh memory. 191 * Returns the pointer of the buffer, or NULL if no enoguh memory.
192 */ 192 */
193void *snd_malloc_pages(size_t size, unsigned int gfp_flags) 193void *snd_malloc_pages(size_t size, gfp_t gfp_flags)
194{ 194{
195 int pg; 195 int pg;
196 void *res; 196 void *res;
@@ -235,7 +235,7 @@ static void *snd_malloc_dev_pages(struct device *dev, size_t size, dma_addr_t *d
235{ 235{
236 int pg; 236 int pg;
237 void *res; 237 void *res;
238 unsigned int gfp_flags; 238 gfp_t gfp_flags;
239 239
240 snd_assert(size > 0, return NULL); 240 snd_assert(size > 0, return NULL);
241 snd_assert(dma != NULL, return NULL); 241 snd_assert(dma != NULL, return NULL);
diff --git a/sound/core/seq/instr/ainstr_gf1.c b/sound/core/seq/instr/ainstr_gf1.c
index 207c2c54bf1d..0e4df8826eed 100644
--- a/sound/core/seq/instr/ainstr_gf1.c
+++ b/sound/core/seq/instr/ainstr_gf1.c
@@ -51,7 +51,7 @@ static int snd_seq_gf1_copy_wave_from_stream(snd_gf1_ops_t *ops,
51 gf1_wave_t *wp, *prev; 51 gf1_wave_t *wp, *prev;
52 gf1_xwave_t xp; 52 gf1_xwave_t xp;
53 int err; 53 int err;
54 unsigned int gfp_mask; 54 gfp_t gfp_mask;
55 unsigned int real_size; 55 unsigned int real_size;
56 56
57 gfp_mask = atomic ? GFP_ATOMIC : GFP_KERNEL; 57 gfp_mask = atomic ? GFP_ATOMIC : GFP_KERNEL;
@@ -144,7 +144,8 @@ static int snd_seq_gf1_put(void *private_data, snd_seq_kinstr_t *instr,
144 snd_gf1_ops_t *ops = (snd_gf1_ops_t *)private_data; 144 snd_gf1_ops_t *ops = (snd_gf1_ops_t *)private_data;
145 gf1_instrument_t *ip; 145 gf1_instrument_t *ip;
146 gf1_xinstrument_t ix; 146 gf1_xinstrument_t ix;
147 int err, gfp_mask; 147 int err;
148 gfp_t gfp_mask;
148 149
149 if (cmd != SNDRV_SEQ_INSTR_PUT_CMD_CREATE) 150 if (cmd != SNDRV_SEQ_INSTR_PUT_CMD_CREATE)
150 return -EINVAL; 151 return -EINVAL;
diff --git a/sound/core/seq/instr/ainstr_iw.c b/sound/core/seq/instr/ainstr_iw.c
index 67c24c8e8e7b..7c19fbbc5d0f 100644
--- a/sound/core/seq/instr/ainstr_iw.c
+++ b/sound/core/seq/instr/ainstr_iw.c
@@ -129,7 +129,7 @@ static int snd_seq_iwffff_copy_wave_from_stream(snd_iwffff_ops_t *ops,
129 iwffff_wave_t *wp, *prev; 129 iwffff_wave_t *wp, *prev;
130 iwffff_xwave_t xp; 130 iwffff_xwave_t xp;
131 int err; 131 int err;
132 unsigned int gfp_mask; 132 gfp_t gfp_mask;
133 unsigned int real_size; 133 unsigned int real_size;
134 134
135 gfp_mask = atomic ? GFP_ATOMIC : GFP_KERNEL; 135 gfp_mask = atomic ? GFP_ATOMIC : GFP_KERNEL;
@@ -236,7 +236,7 @@ static int snd_seq_iwffff_put(void *private_data, snd_seq_kinstr_t *instr,
236 iwffff_layer_t *lp, *prev_lp; 236 iwffff_layer_t *lp, *prev_lp;
237 iwffff_xlayer_t lx; 237 iwffff_xlayer_t lx;
238 int err; 238 int err;
239 unsigned int gfp_mask; 239 gfp_t gfp_mask;
240 240
241 if (cmd != SNDRV_SEQ_INSTR_PUT_CMD_CREATE) 241 if (cmd != SNDRV_SEQ_INSTR_PUT_CMD_CREATE)
242 return -EINVAL; 242 return -EINVAL;
diff --git a/sound/core/seq/instr/ainstr_simple.c b/sound/core/seq/instr/ainstr_simple.c
index 6183d2151034..17ab94e76073 100644
--- a/sound/core/seq/instr/ainstr_simple.c
+++ b/sound/core/seq/instr/ainstr_simple.c
@@ -57,7 +57,8 @@ static int snd_seq_simple_put(void *private_data, snd_seq_kinstr_t *instr,
57 snd_simple_ops_t *ops = (snd_simple_ops_t *)private_data; 57 snd_simple_ops_t *ops = (snd_simple_ops_t *)private_data;
58 simple_instrument_t *ip; 58 simple_instrument_t *ip;
59 simple_xinstrument_t ix; 59 simple_xinstrument_t ix;
60 int err, gfp_mask; 60 int err;
61 gfp_t gfp_mask;
61 unsigned int real_size; 62 unsigned int real_size;
62 63
63 if (cmd != SNDRV_SEQ_INSTR_PUT_CMD_CREATE) 64 if (cmd != SNDRV_SEQ_INSTR_PUT_CMD_CREATE)
diff --git a/sound/oss/dmasound/dmasound.h b/sound/oss/dmasound/dmasound.h
index 9a2f50f0b184..222014cafc1a 100644
--- a/sound/oss/dmasound/dmasound.h
+++ b/sound/oss/dmasound/dmasound.h
@@ -116,7 +116,7 @@ typedef struct {
116 const char *name; 116 const char *name;
117 const char *name2; 117 const char *name2;
118 struct module *owner; 118 struct module *owner;
119 void *(*dma_alloc)(unsigned int, int); 119 void *(*dma_alloc)(unsigned int, gfp_t);
120 void (*dma_free)(void *, unsigned int); 120 void (*dma_free)(void *, unsigned int);
121 int (*irqinit)(void); 121 int (*irqinit)(void);
122#ifdef MODULE 122#ifdef MODULE
diff --git a/sound/oss/dmasound/dmasound_atari.c b/sound/oss/dmasound/dmasound_atari.c
index 8daaf87664ba..59eb53f89318 100644
--- a/sound/oss/dmasound/dmasound_atari.c
+++ b/sound/oss/dmasound/dmasound_atari.c
@@ -114,7 +114,7 @@ static ssize_t ata_ctx_u16le(const u_char *userPtr, size_t userCount,
114/*** Low level stuff *********************************************************/ 114/*** Low level stuff *********************************************************/
115 115
116 116
117static void *AtaAlloc(unsigned int size, int flags); 117static void *AtaAlloc(unsigned int size, gfp_t flags);
118static void AtaFree(void *, unsigned int size); 118static void AtaFree(void *, unsigned int size);
119static int AtaIrqInit(void); 119static int AtaIrqInit(void);
120#ifdef MODULE 120#ifdef MODULE
@@ -810,7 +810,7 @@ static TRANS transFalconExpanding = {
810 * Atari (TT/Falcon) 810 * Atari (TT/Falcon)
811 */ 811 */
812 812
813static void *AtaAlloc(unsigned int size, int flags) 813static void *AtaAlloc(unsigned int size, gfp_t flags)
814{ 814{
815 return atari_stram_alloc(size, "dmasound"); 815 return atari_stram_alloc(size, "dmasound");
816} 816}
diff --git a/sound/oss/dmasound/dmasound_awacs.c b/sound/oss/dmasound/dmasound_awacs.c
index 2ceb46f1d40f..b2bf8bac842d 100644
--- a/sound/oss/dmasound/dmasound_awacs.c
+++ b/sound/oss/dmasound/dmasound_awacs.c
@@ -271,7 +271,7 @@ int expand_read_bal; /* Balance factor for expanding reads (not volume!) */
271 271
272/*** Low level stuff *********************************************************/ 272/*** Low level stuff *********************************************************/
273 273
274static void *PMacAlloc(unsigned int size, int flags); 274static void *PMacAlloc(unsigned int size, gfp_t flags);
275static void PMacFree(void *ptr, unsigned int size); 275static void PMacFree(void *ptr, unsigned int size);
276static int PMacIrqInit(void); 276static int PMacIrqInit(void);
277#ifdef MODULE 277#ifdef MODULE
@@ -614,7 +614,7 @@ tas_init_frame_rates(unsigned int *prop, unsigned int l)
614/* 614/*
615 * PCI PowerMac, with AWACS, Screamer, Burgundy, DACA or Tumbler and DBDMA. 615 * PCI PowerMac, with AWACS, Screamer, Burgundy, DACA or Tumbler and DBDMA.
616 */ 616 */
617static void *PMacAlloc(unsigned int size, int flags) 617static void *PMacAlloc(unsigned int size, gfp_t flags)
618{ 618{
619 return kmalloc(size, flags); 619 return kmalloc(size, flags);
620} 620}
diff --git a/sound/oss/dmasound/dmasound_paula.c b/sound/oss/dmasound/dmasound_paula.c
index 558db5311e06..d59f60b26410 100644
--- a/sound/oss/dmasound/dmasound_paula.c
+++ b/sound/oss/dmasound/dmasound_paula.c
@@ -69,7 +69,7 @@ static int write_sq_block_size_half, write_sq_block_size_quarter;
69/*** Low level stuff *********************************************************/ 69/*** Low level stuff *********************************************************/
70 70
71 71
72static void *AmiAlloc(unsigned int size, int flags); 72static void *AmiAlloc(unsigned int size, gfp_t flags);
73static void AmiFree(void *obj, unsigned int size); 73static void AmiFree(void *obj, unsigned int size);
74static int AmiIrqInit(void); 74static int AmiIrqInit(void);
75#ifdef MODULE 75#ifdef MODULE
@@ -317,7 +317,7 @@ static inline void StopDMA(void)
317 enable_heartbeat(); 317 enable_heartbeat();
318} 318}
319 319
320static void *AmiAlloc(unsigned int size, int flags) 320static void *AmiAlloc(unsigned int size, gfp_t flags)
321{ 321{
322 return amiga_chip_alloc((long)size, "dmasound [Paula]"); 322 return amiga_chip_alloc((long)size, "dmasound [Paula]");
323} 323}
diff --git a/sound/oss/dmasound/dmasound_q40.c b/sound/oss/dmasound/dmasound_q40.c
index 92c25a0174db..1ddaa6284b08 100644
--- a/sound/oss/dmasound/dmasound_q40.c
+++ b/sound/oss/dmasound/dmasound_q40.c
@@ -36,7 +36,7 @@ static int expand_data; /* Data for expanding */
36/*** Low level stuff *********************************************************/ 36/*** Low level stuff *********************************************************/
37 37
38 38
39static void *Q40Alloc(unsigned int size, int flags); 39static void *Q40Alloc(unsigned int size, gfp_t flags);
40static void Q40Free(void *, unsigned int); 40static void Q40Free(void *, unsigned int);
41static int Q40IrqInit(void); 41static int Q40IrqInit(void);
42#ifdef MODULE 42#ifdef MODULE
@@ -358,7 +358,7 @@ static TRANS transQ40Compressing = {
358 358
359/*** Low level stuff *********************************************************/ 359/*** Low level stuff *********************************************************/
360 360
361static void *Q40Alloc(unsigned int size, int flags) 361static void *Q40Alloc(unsigned int size, gfp_t flags)
362{ 362{
363 return kmalloc(size, flags); /* change to vmalloc */ 363 return kmalloc(size, flags); /* change to vmalloc */
364} 364}
diff --git a/sound/usb/usbmidi.c b/sound/usb/usbmidi.c
index e0d0365453b3..f1a2e2c2e02f 100644
--- a/sound/usb/usbmidi.c
+++ b/sound/usb/usbmidi.c
@@ -163,7 +163,7 @@ static const uint8_t snd_usbmidi_cin_length[] = {
163/* 163/*
164 * Submits the URB, with error handling. 164 * Submits the URB, with error handling.
165 */ 165 */
166static int snd_usbmidi_submit_urb(struct urb* urb, int flags) 166static int snd_usbmidi_submit_urb(struct urb* urb, gfp_t flags)
167{ 167{
168 int err = usb_submit_urb(urb, flags); 168 int err = usb_submit_urb(urb, flags);
169 if (err < 0 && err != -ENODEV) 169 if (err < 0 && err != -ENODEV)