diff options
40 files changed, 2562 insertions, 254 deletions
diff --git a/Documentation/filesystems/nfs/nfs41-server.txt b/Documentation/filesystems/nfs/nfs41-server.txt index c49cd7e796e7..682a59fabe3f 100644 --- a/Documentation/filesystems/nfs/nfs41-server.txt +++ b/Documentation/filesystems/nfs/nfs41-server.txt | |||
| @@ -24,11 +24,6 @@ focuses on the mandatory-to-implement NFSv4.1 Sessions, providing | |||
| 24 | "exactly once" semantics and better control and throttling of the | 24 | "exactly once" semantics and better control and throttling of the |
| 25 | resources allocated for each client. | 25 | resources allocated for each client. |
| 26 | 26 | ||
| 27 | Other NFSv4.1 features, Parallel NFS operations in particular, | ||
| 28 | are still under development out of tree. | ||
| 29 | See http://wiki.linux-nfs.org/wiki/index.php/PNFS_prototype_design | ||
| 30 | for more information. | ||
| 31 | |||
| 32 | The table below, taken from the NFSv4.1 document, lists | 27 | The table below, taken from the NFSv4.1 document, lists |
| 33 | the operations that are mandatory to implement (REQ), optional | 28 | the operations that are mandatory to implement (REQ), optional |
| 34 | (OPT), and NFSv4.0 operations that are required not to implement (MNI) | 29 | (OPT), and NFSv4.0 operations that are required not to implement (MNI) |
| @@ -43,9 +38,7 @@ The OPTIONAL features identified and their abbreviations are as follows: | |||
| 43 | The following abbreviations indicate the linux server implementation status. | 38 | The following abbreviations indicate the linux server implementation status. |
| 44 | I Implemented NFSv4.1 operations. | 39 | I Implemented NFSv4.1 operations. |
| 45 | NS Not Supported. | 40 | NS Not Supported. |
| 46 | NS* unimplemented optional feature. | 41 | NS* Unimplemented optional feature. |
| 47 | P pNFS features implemented out of tree. | ||
| 48 | PNS pNFS features that are not supported yet (out of tree). | ||
| 49 | 42 | ||
| 50 | Operations | 43 | Operations |
| 51 | 44 | ||
| @@ -70,13 +63,13 @@ I | DESTROY_SESSION | REQ | | Section 18.37 | | |||
| 70 | I | EXCHANGE_ID | REQ | | Section 18.35 | | 63 | I | EXCHANGE_ID | REQ | | Section 18.35 | |
| 71 | I | FREE_STATEID | REQ | | Section 18.38 | | 64 | I | FREE_STATEID | REQ | | Section 18.38 | |
| 72 | | GETATTR | REQ | | Section 18.7 | | 65 | | GETATTR | REQ | | Section 18.7 | |
| 73 | P | GETDEVICEINFO | OPT | pNFS (REQ) | Section 18.40 | | 66 | I | GETDEVICEINFO | OPT | pNFS (REQ) | Section 18.40 | |
| 74 | P | GETDEVICELIST | OPT | pNFS (OPT) | Section 18.41 | | 67 | NS*| GETDEVICELIST | OPT | pNFS (OPT) | Section 18.41 | |
| 75 | | GETFH | REQ | | Section 18.8 | | 68 | | GETFH | REQ | | Section 18.8 | |
| 76 | NS*| GET_DIR_DELEGATION | OPT | DDELG (REQ) | Section 18.39 | | 69 | NS*| GET_DIR_DELEGATION | OPT | DDELG (REQ) | Section 18.39 | |
| 77 | P | LAYOUTCOMMIT | OPT | pNFS (REQ) | Section 18.42 | | 70 | I | LAYOUTCOMMIT | OPT | pNFS (REQ) | Section 18.42 | |
| 78 | P | LAYOUTGET | OPT | pNFS (REQ) | Section 18.43 | | 71 | I | LAYOUTGET | OPT | pNFS (REQ) | Section 18.43 | |
| 79 | P | LAYOUTRETURN | OPT | pNFS (REQ) | Section 18.44 | | 72 | I | LAYOUTRETURN | OPT | pNFS (REQ) | Section 18.44 | |
| 80 | | LINK | OPT | | Section 18.9 | | 73 | | LINK | OPT | | Section 18.9 | |
| 81 | | LOCK | REQ | | Section 18.10 | | 74 | | LOCK | REQ | | Section 18.10 | |
| 82 | | LOCKT | REQ | | Section 18.11 | | 75 | | LOCKT | REQ | | Section 18.11 | |
| @@ -122,9 +115,9 @@ Callback Operations | |||
| 122 | | | MNI | or OPT) | | | 115 | | | MNI | or OPT) | | |
| 123 | +-------------------------+-----------+-------------+---------------+ | 116 | +-------------------------+-----------+-------------+---------------+ |
| 124 | | CB_GETATTR | OPT | FDELG (REQ) | Section 20.1 | | 117 | | CB_GETATTR | OPT | FDELG (REQ) | Section 20.1 | |
| 125 | P | CB_LAYOUTRECALL | OPT | pNFS (REQ) | Section 20.3 | | 118 | I | CB_LAYOUTRECALL | OPT | pNFS (REQ) | Section 20.3 | |
| 126 | NS*| CB_NOTIFY | OPT | DDELG (REQ) | Section 20.4 | | 119 | NS*| CB_NOTIFY | OPT | DDELG (REQ) | Section 20.4 | |
| 127 | P | CB_NOTIFY_DEVICEID | OPT | pNFS (OPT) | Section 20.12 | | 120 | NS*| CB_NOTIFY_DEVICEID | OPT | pNFS (OPT) | Section 20.12 | |
| 128 | NS*| CB_NOTIFY_LOCK | OPT | | Section 20.11 | | 121 | NS*| CB_NOTIFY_LOCK | OPT | | Section 20.11 | |
| 129 | NS*| CB_PUSH_DELEG | OPT | FDELG (OPT) | Section 20.5 | | 122 | NS*| CB_PUSH_DELEG | OPT | FDELG (OPT) | Section 20.5 | |
| 130 | | CB_RECALL | OPT | FDELG, | Section 20.2 | | 123 | | CB_RECALL | OPT | FDELG, | Section 20.2 | |
diff --git a/Documentation/filesystems/nfs/pnfs-block-server.txt b/Documentation/filesystems/nfs/pnfs-block-server.txt new file mode 100644 index 000000000000..2143673cf154 --- /dev/null +++ b/Documentation/filesystems/nfs/pnfs-block-server.txt | |||
| @@ -0,0 +1,37 @@ | |||
| 1 | pNFS block layout server user guide | ||
| 2 | |||
| 3 | The Linux NFS server now supports the pNFS block layout extension. In this | ||
| 4 | case the NFS server acts as Metadata Server (MDS) for pNFS, which in addition | ||
| 5 | to handling all the metadata access to the NFS export also hands out layouts | ||
| 6 | to the clients to directly access the underlying block devices that are | ||
| 7 | shared with the client. | ||
| 8 | |||
| 9 | To use pNFS block layouts with with the Linux NFS server the exported file | ||
| 10 | system needs to support the pNFS block layouts (currently just XFS), and the | ||
| 11 | file system must sit on shared storage (typically iSCSI) that is accessible | ||
| 12 | to the clients in addition to the MDS. As of now the file system needs to | ||
| 13 | sit directly on the exported volume, striping or concatenation of | ||
| 14 | volumes on the MDS and clients is not supported yet. | ||
| 15 | |||
| 16 | On the server, pNFS block volume support is automatically if the file system | ||
| 17 | support it. On the client make sure the kernel has the CONFIG_PNFS_BLOCK | ||
| 18 | option enabled, the blkmapd daemon from nfs-utils is running, and the | ||
| 19 | file system is mounted using the NFSv4.1 protocol version (mount -o vers=4.1). | ||
| 20 | |||
| 21 | If the nfsd server needs to fence a non-responding client it calls | ||
| 22 | /sbin/nfsd-recall-failed with the first argument set to the IP address of | ||
| 23 | the client, and the second argument set to the device node without the /dev | ||
| 24 | prefix for the file system to be fenced. Below is an example file that shows | ||
| 25 | how to translate the device into a serial number from SCSI EVPD 0x80: | ||
| 26 | |||
| 27 | cat > /sbin/nfsd-recall-failed << EOF | ||
| 28 | #!/bin/sh | ||
| 29 | |||
| 30 | CLIENT="$1" | ||
| 31 | DEV="/dev/$2" | ||
| 32 | EVPD=`sg_inq --page=0x80 ${DEV} | \ | ||
| 33 | grep "Unit serial number:" | \ | ||
| 34 | awk -F ': ' '{print $2}'` | ||
| 35 | |||
| 36 | echo "fencing client ${CLIENT} serial ${EVPD}" >> /var/log/pnfsd-fence.log | ||
| 37 | EOF | ||
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 56598742dde4..5581e020644b 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c | |||
| @@ -57,8 +57,8 @@ static DEFINE_SPINLOCK(nlm_blocked_lock); | |||
| 57 | static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie) | 57 | static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie) |
| 58 | { | 58 | { |
| 59 | /* | 59 | /* |
| 60 | * We can get away with a static buffer because we're only | 60 | * We can get away with a static buffer because this is only called |
| 61 | * called with BKL held. | 61 | * from lockd, which is single-threaded. |
| 62 | */ | 62 | */ |
| 63 | static char buf[2*NLM_MAXCOOKIELEN+1]; | 63 | static char buf[2*NLM_MAXCOOKIELEN+1]; |
| 64 | unsigned int i, len = sizeof(buf); | 64 | unsigned int i, len = sizeof(buf); |
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 9340e7e10ef6..5b651daad518 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c | |||
| @@ -95,14 +95,6 @@ nlm_decode_fh(__be32 *p, struct nfs_fh *f) | |||
| 95 | return p + XDR_QUADLEN(NFS2_FHSIZE); | 95 | return p + XDR_QUADLEN(NFS2_FHSIZE); |
| 96 | } | 96 | } |
| 97 | 97 | ||
| 98 | static inline __be32 * | ||
| 99 | nlm_encode_fh(__be32 *p, struct nfs_fh *f) | ||
| 100 | { | ||
| 101 | *p++ = htonl(NFS2_FHSIZE); | ||
| 102 | memcpy(p, f->data, NFS2_FHSIZE); | ||
| 103 | return p + XDR_QUADLEN(NFS2_FHSIZE); | ||
| 104 | } | ||
| 105 | |||
| 106 | /* | 98 | /* |
| 107 | * Encode and decode owner handle | 99 | * Encode and decode owner handle |
| 108 | */ | 100 | */ |
diff --git a/fs/locks.c b/fs/locks.c index 4d0d41163a50..4753218f308e 100644 --- a/fs/locks.c +++ b/fs/locks.c | |||
| @@ -137,7 +137,7 @@ | |||
| 137 | 137 | ||
| 138 | #define IS_POSIX(fl) (fl->fl_flags & FL_POSIX) | 138 | #define IS_POSIX(fl) (fl->fl_flags & FL_POSIX) |
| 139 | #define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK) | 139 | #define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK) |
| 140 | #define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG)) | 140 | #define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG|FL_LAYOUT)) |
| 141 | #define IS_OFDLCK(fl) (fl->fl_flags & FL_OFDLCK) | 141 | #define IS_OFDLCK(fl) (fl->fl_flags & FL_OFDLCK) |
| 142 | 142 | ||
| 143 | static bool lease_breaking(struct file_lock *fl) | 143 | static bool lease_breaking(struct file_lock *fl) |
| @@ -1371,6 +1371,8 @@ static void time_out_leases(struct inode *inode, struct list_head *dispose) | |||
| 1371 | 1371 | ||
| 1372 | static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker) | 1372 | static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker) |
| 1373 | { | 1373 | { |
| 1374 | if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT)) | ||
| 1375 | return false; | ||
| 1374 | if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE)) | 1376 | if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE)) |
| 1375 | return false; | 1377 | return false; |
| 1376 | return locks_conflict(breaker, lease); | 1378 | return locks_conflict(breaker, lease); |
| @@ -1594,11 +1596,14 @@ int fcntl_getlease(struct file *filp) | |||
| 1594 | * conflict with the lease we're trying to set. | 1596 | * conflict with the lease we're trying to set. |
| 1595 | */ | 1597 | */ |
| 1596 | static int | 1598 | static int |
| 1597 | check_conflicting_open(const struct dentry *dentry, const long arg) | 1599 | check_conflicting_open(const struct dentry *dentry, const long arg, int flags) |
| 1598 | { | 1600 | { |
| 1599 | int ret = 0; | 1601 | int ret = 0; |
| 1600 | struct inode *inode = dentry->d_inode; | 1602 | struct inode *inode = dentry->d_inode; |
| 1601 | 1603 | ||
| 1604 | if (flags & FL_LAYOUT) | ||
| 1605 | return 0; | ||
| 1606 | |||
| 1602 | if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) | 1607 | if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) |
| 1603 | return -EAGAIN; | 1608 | return -EAGAIN; |
| 1604 | 1609 | ||
| @@ -1647,7 +1652,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr | |||
| 1647 | 1652 | ||
| 1648 | spin_lock(&ctx->flc_lock); | 1653 | spin_lock(&ctx->flc_lock); |
| 1649 | time_out_leases(inode, &dispose); | 1654 | time_out_leases(inode, &dispose); |
| 1650 | error = check_conflicting_open(dentry, arg); | 1655 | error = check_conflicting_open(dentry, arg, lease->fl_flags); |
| 1651 | if (error) | 1656 | if (error) |
| 1652 | goto out; | 1657 | goto out; |
| 1653 | 1658 | ||
| @@ -1661,7 +1666,8 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr | |||
| 1661 | */ | 1666 | */ |
| 1662 | error = -EAGAIN; | 1667 | error = -EAGAIN; |
| 1663 | list_for_each_entry(fl, &ctx->flc_lease, fl_list) { | 1668 | list_for_each_entry(fl, &ctx->flc_lease, fl_list) { |
| 1664 | if (fl->fl_file == filp) { | 1669 | if (fl->fl_file == filp && |
| 1670 | fl->fl_owner == lease->fl_owner) { | ||
| 1665 | my_fl = fl; | 1671 | my_fl = fl; |
| 1666 | continue; | 1672 | continue; |
| 1667 | } | 1673 | } |
| @@ -1702,7 +1708,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr | |||
| 1702 | * precedes these checks. | 1708 | * precedes these checks. |
| 1703 | */ | 1709 | */ |
| 1704 | smp_mb(); | 1710 | smp_mb(); |
| 1705 | error = check_conflicting_open(dentry, arg); | 1711 | error = check_conflicting_open(dentry, arg, lease->fl_flags); |
| 1706 | if (error) { | 1712 | if (error) { |
| 1707 | locks_unlink_lock_ctx(lease, &ctx->flc_lease_cnt); | 1713 | locks_unlink_lock_ctx(lease, &ctx->flc_lease_cnt); |
| 1708 | goto out; | 1714 | goto out; |
| @@ -1721,7 +1727,7 @@ out: | |||
| 1721 | return error; | 1727 | return error; |
| 1722 | } | 1728 | } |
| 1723 | 1729 | ||
| 1724 | static int generic_delete_lease(struct file *filp) | 1730 | static int generic_delete_lease(struct file *filp, void *owner) |
| 1725 | { | 1731 | { |
| 1726 | int error = -EAGAIN; | 1732 | int error = -EAGAIN; |
| 1727 | struct file_lock *fl, *victim = NULL; | 1733 | struct file_lock *fl, *victim = NULL; |
| @@ -1737,7 +1743,8 @@ static int generic_delete_lease(struct file *filp) | |||
| 1737 | 1743 | ||
| 1738 | spin_lock(&ctx->flc_lock); | 1744 | spin_lock(&ctx->flc_lock); |
| 1739 | list_for_each_entry(fl, &ctx->flc_lease, fl_list) { | 1745 | list_for_each_entry(fl, &ctx->flc_lease, fl_list) { |
| 1740 | if (fl->fl_file == filp) { | 1746 | if (fl->fl_file == filp && |
| 1747 | fl->fl_owner == owner) { | ||
| 1741 | victim = fl; | 1748 | victim = fl; |
| 1742 | break; | 1749 | break; |
| 1743 | } | 1750 | } |
| @@ -1778,13 +1785,14 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp, | |||
| 1778 | 1785 | ||
| 1779 | switch (arg) { | 1786 | switch (arg) { |
| 1780 | case F_UNLCK: | 1787 | case F_UNLCK: |
| 1781 | return generic_delete_lease(filp); | 1788 | return generic_delete_lease(filp, *priv); |
| 1782 | case F_RDLCK: | 1789 | case F_RDLCK: |
| 1783 | case F_WRLCK: | 1790 | case F_WRLCK: |
| 1784 | if (!(*flp)->fl_lmops->lm_break) { | 1791 | if (!(*flp)->fl_lmops->lm_break) { |
| 1785 | WARN_ON_ONCE(1); | 1792 | WARN_ON_ONCE(1); |
| 1786 | return -ENOLCK; | 1793 | return -ENOLCK; |
| 1787 | } | 1794 | } |
| 1795 | |||
| 1788 | return generic_add_lease(filp, arg, flp, priv); | 1796 | return generic_add_lease(filp, arg, flp, priv); |
| 1789 | default: | 1797 | default: |
| 1790 | return -EINVAL; | 1798 | return -EINVAL; |
| @@ -1857,7 +1865,7 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) | |||
| 1857 | int fcntl_setlease(unsigned int fd, struct file *filp, long arg) | 1865 | int fcntl_setlease(unsigned int fd, struct file *filp, long arg) |
| 1858 | { | 1866 | { |
| 1859 | if (arg == F_UNLCK) | 1867 | if (arg == F_UNLCK) |
| 1860 | return vfs_setlease(filp, F_UNLCK, NULL, NULL); | 1868 | return vfs_setlease(filp, F_UNLCK, NULL, (void **)&filp); |
| 1861 | return do_fcntl_add_lease(fd, filp, arg); | 1869 | return do_fcntl_add_lease(fd, filp, arg); |
| 1862 | } | 1870 | } |
| 1863 | 1871 | ||
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 73395156bdb4..683bf718aead 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig | |||
| @@ -82,6 +82,16 @@ config NFSD_V4 | |||
| 82 | 82 | ||
| 83 | If unsure, say N. | 83 | If unsure, say N. |
| 84 | 84 | ||
| 85 | config NFSD_PNFS | ||
| 86 | bool "NFSv4.1 server support for Parallel NFS (pNFS)" | ||
| 87 | depends on NFSD_V4 | ||
| 88 | help | ||
| 89 | This option enables support for the parallel NFS features of the | ||
| 90 | minor version 1 of the NFSv4 protocol (RFC5661) in the kernel's NFS | ||
| 91 | server. | ||
| 92 | |||
| 93 | If unsure, say N. | ||
| 94 | |||
| 85 | config NFSD_V4_SECURITY_LABEL | 95 | config NFSD_V4_SECURITY_LABEL |
| 86 | bool "Provide Security Label support for NFSv4 server" | 96 | bool "Provide Security Label support for NFSv4 server" |
| 87 | depends on NFSD_V4 && SECURITY | 97 | depends on NFSD_V4 && SECURITY |
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile index af32ef06b4fe..9a6028e120c6 100644 --- a/fs/nfsd/Makefile +++ b/fs/nfsd/Makefile | |||
| @@ -2,9 +2,14 @@ | |||
| 2 | # Makefile for the Linux nfs server | 2 | # Makefile for the Linux nfs server |
| 3 | # | 3 | # |
| 4 | 4 | ||
| 5 | ccflags-y += -I$(src) # needed for trace events | ||
| 6 | |||
| 5 | obj-$(CONFIG_NFSD) += nfsd.o | 7 | obj-$(CONFIG_NFSD) += nfsd.o |
| 6 | 8 | ||
| 7 | nfsd-y := nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ | 9 | # this one should be compiled first, as the tracing macros can easily blow up |
| 10 | nfsd-y += trace.o | ||
| 11 | |||
| 12 | nfsd-y += nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ | ||
| 8 | export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o | 13 | export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o |
| 9 | nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o | 14 | nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o |
| 10 | nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o | 15 | nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o |
| @@ -12,3 +17,4 @@ nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o | |||
| 12 | nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o | 17 | nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o |
| 13 | nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ | 18 | nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ |
| 14 | nfs4acl.o nfs4callback.o nfs4recover.o | 19 | nfs4acl.o nfs4callback.o nfs4recover.o |
| 20 | nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o blocklayout.o blocklayoutxdr.o | ||
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c new file mode 100644 index 000000000000..cdbc78c72542 --- /dev/null +++ b/fs/nfsd/blocklayout.c | |||
| @@ -0,0 +1,189 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2014 Christoph Hellwig. | ||
| 3 | */ | ||
| 4 | #include <linux/exportfs.h> | ||
| 5 | #include <linux/genhd.h> | ||
| 6 | #include <linux/slab.h> | ||
| 7 | |||
| 8 | #include <linux/nfsd/debug.h> | ||
| 9 | |||
| 10 | #include "blocklayoutxdr.h" | ||
| 11 | #include "pnfs.h" | ||
| 12 | |||
| 13 | #define NFSDDBG_FACILITY NFSDDBG_PNFS | ||
| 14 | |||
| 15 | |||
| 16 | static int | ||
| 17 | nfsd4_block_get_device_info_simple(struct super_block *sb, | ||
| 18 | struct nfsd4_getdeviceinfo *gdp) | ||
| 19 | { | ||
| 20 | struct pnfs_block_deviceaddr *dev; | ||
| 21 | struct pnfs_block_volume *b; | ||
| 22 | |||
| 23 | dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) + | ||
| 24 | sizeof(struct pnfs_block_volume), GFP_KERNEL); | ||
| 25 | if (!dev) | ||
| 26 | return -ENOMEM; | ||
| 27 | gdp->gd_device = dev; | ||
| 28 | |||
| 29 | dev->nr_volumes = 1; | ||
| 30 | b = &dev->volumes[0]; | ||
| 31 | |||
| 32 | b->type = PNFS_BLOCK_VOLUME_SIMPLE; | ||
| 33 | b->simple.sig_len = PNFS_BLOCK_UUID_LEN; | ||
| 34 | return sb->s_export_op->get_uuid(sb, b->simple.sig, &b->simple.sig_len, | ||
| 35 | &b->simple.offset); | ||
| 36 | } | ||
| 37 | |||
| 38 | static __be32 | ||
| 39 | nfsd4_block_proc_getdeviceinfo(struct super_block *sb, | ||
| 40 | struct nfsd4_getdeviceinfo *gdp) | ||
| 41 | { | ||
| 42 | if (sb->s_bdev != sb->s_bdev->bd_contains) | ||
| 43 | return nfserr_inval; | ||
| 44 | return nfserrno(nfsd4_block_get_device_info_simple(sb, gdp)); | ||
| 45 | } | ||
| 46 | |||
| 47 | static __be32 | ||
| 48 | nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp, | ||
| 49 | struct nfsd4_layoutget *args) | ||
| 50 | { | ||
| 51 | struct nfsd4_layout_seg *seg = &args->lg_seg; | ||
| 52 | struct super_block *sb = inode->i_sb; | ||
| 53 | u32 block_size = (1 << inode->i_blkbits); | ||
| 54 | struct pnfs_block_extent *bex; | ||
| 55 | struct iomap iomap; | ||
| 56 | u32 device_generation = 0; | ||
| 57 | int error; | ||
| 58 | |||
| 59 | /* | ||
| 60 | * We do not attempt to support I/O smaller than the fs block size, | ||
| 61 | * or not aligned to it. | ||
| 62 | */ | ||
| 63 | if (args->lg_minlength < block_size) { | ||
| 64 | dprintk("pnfsd: I/O too small\n"); | ||
| 65 | goto out_layoutunavailable; | ||
| 66 | } | ||
| 67 | if (seg->offset & (block_size - 1)) { | ||
| 68 | dprintk("pnfsd: I/O misaligned\n"); | ||
| 69 | goto out_layoutunavailable; | ||
| 70 | } | ||
| 71 | |||
| 72 | /* | ||
| 73 | * Some clients barf on non-zero block numbers for NONE or INVALID | ||
| 74 | * layouts, so make sure to zero the whole structure. | ||
| 75 | */ | ||
| 76 | error = -ENOMEM; | ||
| 77 | bex = kzalloc(sizeof(*bex), GFP_KERNEL); | ||
| 78 | if (!bex) | ||
| 79 | goto out_error; | ||
| 80 | args->lg_content = bex; | ||
| 81 | |||
| 82 | error = sb->s_export_op->map_blocks(inode, seg->offset, seg->length, | ||
| 83 | &iomap, seg->iomode != IOMODE_READ, | ||
| 84 | &device_generation); | ||
| 85 | if (error) { | ||
| 86 | if (error == -ENXIO) | ||
| 87 | goto out_layoutunavailable; | ||
| 88 | goto out_error; | ||
| 89 | } | ||
| 90 | |||
| 91 | if (iomap.length < args->lg_minlength) { | ||
| 92 | dprintk("pnfsd: extent smaller than minlength\n"); | ||
| 93 | goto out_layoutunavailable; | ||
| 94 | } | ||
| 95 | |||
| 96 | switch (iomap.type) { | ||
| 97 | case IOMAP_MAPPED: | ||
| 98 | if (seg->iomode == IOMODE_READ) | ||
| 99 | bex->es = PNFS_BLOCK_READ_DATA; | ||
| 100 | else | ||
| 101 | bex->es = PNFS_BLOCK_READWRITE_DATA; | ||
| 102 | bex->soff = (iomap.blkno << 9); | ||
| 103 | break; | ||
| 104 | case IOMAP_UNWRITTEN: | ||
| 105 | if (seg->iomode & IOMODE_RW) { | ||
| 106 | /* | ||
| 107 | * Crack monkey special case from section 2.3.1. | ||
| 108 | */ | ||
| 109 | if (args->lg_minlength == 0) { | ||
| 110 | dprintk("pnfsd: no soup for you!\n"); | ||
| 111 | goto out_layoutunavailable; | ||
| 112 | } | ||
| 113 | |||
| 114 | bex->es = PNFS_BLOCK_INVALID_DATA; | ||
| 115 | bex->soff = (iomap.blkno << 9); | ||
| 116 | break; | ||
| 117 | } | ||
| 118 | /*FALLTHRU*/ | ||
| 119 | case IOMAP_HOLE: | ||
| 120 | if (seg->iomode == IOMODE_READ) { | ||
| 121 | bex->es = PNFS_BLOCK_NONE_DATA; | ||
| 122 | break; | ||
| 123 | } | ||
| 124 | /*FALLTHRU*/ | ||
| 125 | case IOMAP_DELALLOC: | ||
| 126 | default: | ||
| 127 | WARN(1, "pnfsd: filesystem returned %d extent\n", iomap.type); | ||
| 128 | goto out_layoutunavailable; | ||
| 129 | } | ||
| 130 | |||
| 131 | error = nfsd4_set_deviceid(&bex->vol_id, fhp, device_generation); | ||
| 132 | if (error) | ||
| 133 | goto out_error; | ||
| 134 | bex->foff = iomap.offset; | ||
| 135 | bex->len = iomap.length; | ||
| 136 | |||
| 137 | seg->offset = iomap.offset; | ||
| 138 | seg->length = iomap.length; | ||
| 139 | |||
| 140 | dprintk("GET: %lld:%lld %d\n", bex->foff, bex->len, bex->es); | ||
| 141 | return 0; | ||
| 142 | |||
| 143 | out_error: | ||
| 144 | seg->length = 0; | ||
| 145 | return nfserrno(error); | ||
| 146 | out_layoutunavailable: | ||
| 147 | seg->length = 0; | ||
| 148 | return nfserr_layoutunavailable; | ||
| 149 | } | ||
| 150 | |||
| 151 | static __be32 | ||
| 152 | nfsd4_block_proc_layoutcommit(struct inode *inode, | ||
| 153 | struct nfsd4_layoutcommit *lcp) | ||
| 154 | { | ||
| 155 | loff_t new_size = lcp->lc_last_wr + 1; | ||
| 156 | struct iattr iattr = { .ia_valid = 0 }; | ||
| 157 | struct iomap *iomaps; | ||
| 158 | int nr_iomaps; | ||
| 159 | int error; | ||
| 160 | |||
| 161 | nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout, | ||
| 162 | lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits); | ||
| 163 | if (nr_iomaps < 0) | ||
| 164 | return nfserrno(nr_iomaps); | ||
| 165 | |||
| 166 | if (lcp->lc_mtime.tv_nsec == UTIME_NOW || | ||
| 167 | timespec_compare(&lcp->lc_mtime, &inode->i_mtime) < 0) | ||
| 168 | lcp->lc_mtime = current_fs_time(inode->i_sb); | ||
| 169 | iattr.ia_valid |= ATTR_ATIME | ATTR_CTIME | ATTR_MTIME; | ||
| 170 | iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = lcp->lc_mtime; | ||
| 171 | |||
| 172 | if (new_size > i_size_read(inode)) { | ||
| 173 | iattr.ia_valid |= ATTR_SIZE; | ||
| 174 | iattr.ia_size = new_size; | ||
| 175 | } | ||
| 176 | |||
| 177 | error = inode->i_sb->s_export_op->commit_blocks(inode, iomaps, | ||
| 178 | nr_iomaps, &iattr); | ||
| 179 | kfree(iomaps); | ||
| 180 | return nfserrno(error); | ||
| 181 | } | ||
| 182 | |||
| 183 | const struct nfsd4_layout_ops bl_layout_ops = { | ||
| 184 | .proc_getdeviceinfo = nfsd4_block_proc_getdeviceinfo, | ||
| 185 | .encode_getdeviceinfo = nfsd4_block_encode_getdeviceinfo, | ||
| 186 | .proc_layoutget = nfsd4_block_proc_layoutget, | ||
| 187 | .encode_layoutget = nfsd4_block_encode_layoutget, | ||
| 188 | .proc_layoutcommit = nfsd4_block_proc_layoutcommit, | ||
| 189 | }; | ||
diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c new file mode 100644 index 000000000000..9da89fddab33 --- /dev/null +++ b/fs/nfsd/blocklayoutxdr.c | |||
| @@ -0,0 +1,157 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2014 Christoph Hellwig. | ||
| 3 | */ | ||
| 4 | #include <linux/sunrpc/svc.h> | ||
| 5 | #include <linux/exportfs.h> | ||
| 6 | #include <linux/nfs4.h> | ||
| 7 | |||
| 8 | #include "nfsd.h" | ||
| 9 | #include "blocklayoutxdr.h" | ||
| 10 | |||
| 11 | #define NFSDDBG_FACILITY NFSDDBG_PNFS | ||
| 12 | |||
| 13 | |||
| 14 | __be32 | ||
| 15 | nfsd4_block_encode_layoutget(struct xdr_stream *xdr, | ||
| 16 | struct nfsd4_layoutget *lgp) | ||
| 17 | { | ||
| 18 | struct pnfs_block_extent *b = lgp->lg_content; | ||
| 19 | int len = sizeof(__be32) + 5 * sizeof(__be64) + sizeof(__be32); | ||
| 20 | __be32 *p; | ||
| 21 | |||
| 22 | p = xdr_reserve_space(xdr, sizeof(__be32) + len); | ||
| 23 | if (!p) | ||
| 24 | return nfserr_toosmall; | ||
| 25 | |||
| 26 | *p++ = cpu_to_be32(len); | ||
| 27 | *p++ = cpu_to_be32(1); /* we always return a single extent */ | ||
| 28 | |||
| 29 | p = xdr_encode_opaque_fixed(p, &b->vol_id, | ||
| 30 | sizeof(struct nfsd4_deviceid)); | ||
| 31 | p = xdr_encode_hyper(p, b->foff); | ||
| 32 | p = xdr_encode_hyper(p, b->len); | ||
| 33 | p = xdr_encode_hyper(p, b->soff); | ||
| 34 | *p++ = cpu_to_be32(b->es); | ||
| 35 | return 0; | ||
| 36 | } | ||
| 37 | |||
| 38 | static int | ||
| 39 | nfsd4_block_encode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b) | ||
| 40 | { | ||
| 41 | __be32 *p; | ||
| 42 | int len; | ||
| 43 | |||
| 44 | switch (b->type) { | ||
| 45 | case PNFS_BLOCK_VOLUME_SIMPLE: | ||
| 46 | len = 4 + 4 + 8 + 4 + b->simple.sig_len; | ||
| 47 | p = xdr_reserve_space(xdr, len); | ||
| 48 | if (!p) | ||
| 49 | return -ETOOSMALL; | ||
| 50 | |||
| 51 | *p++ = cpu_to_be32(b->type); | ||
| 52 | *p++ = cpu_to_be32(1); /* single signature */ | ||
| 53 | p = xdr_encode_hyper(p, b->simple.offset); | ||
| 54 | p = xdr_encode_opaque(p, b->simple.sig, b->simple.sig_len); | ||
| 55 | break; | ||
| 56 | default: | ||
| 57 | return -ENOTSUPP; | ||
| 58 | } | ||
| 59 | |||
| 60 | return len; | ||
| 61 | } | ||
| 62 | |||
| 63 | __be32 | ||
| 64 | nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr, | ||
| 65 | struct nfsd4_getdeviceinfo *gdp) | ||
| 66 | { | ||
| 67 | struct pnfs_block_deviceaddr *dev = gdp->gd_device; | ||
| 68 | int len = sizeof(__be32), ret, i; | ||
| 69 | __be32 *p; | ||
| 70 | |||
| 71 | p = xdr_reserve_space(xdr, len + sizeof(__be32)); | ||
| 72 | if (!p) | ||
| 73 | return nfserr_resource; | ||
| 74 | |||
| 75 | for (i = 0; i < dev->nr_volumes; i++) { | ||
| 76 | ret = nfsd4_block_encode_volume(xdr, &dev->volumes[i]); | ||
| 77 | if (ret < 0) | ||
| 78 | return nfserrno(ret); | ||
| 79 | len += ret; | ||
| 80 | } | ||
| 81 | |||
| 82 | /* | ||
| 83 | * Fill in the overall length and number of volumes at the beginning | ||
| 84 | * of the layout. | ||
| 85 | */ | ||
| 86 | *p++ = cpu_to_be32(len); | ||
| 87 | *p++ = cpu_to_be32(dev->nr_volumes); | ||
| 88 | return 0; | ||
| 89 | } | ||
| 90 | |||
| 91 | int | ||
| 92 | nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, | ||
| 93 | u32 block_size) | ||
| 94 | { | ||
| 95 | struct iomap *iomaps; | ||
| 96 | u32 nr_iomaps, expected, i; | ||
| 97 | |||
| 98 | if (len < sizeof(u32)) { | ||
| 99 | dprintk("%s: extent array too small: %u\n", __func__, len); | ||
| 100 | return -EINVAL; | ||
| 101 | } | ||
| 102 | |||
| 103 | nr_iomaps = be32_to_cpup(p++); | ||
| 104 | expected = sizeof(__be32) + nr_iomaps * NFS4_BLOCK_EXTENT_SIZE; | ||
| 105 | if (len != expected) { | ||
| 106 | dprintk("%s: extent array size mismatch: %u/%u\n", | ||
| 107 | __func__, len, expected); | ||
| 108 | return -EINVAL; | ||
| 109 | } | ||
| 110 | |||
| 111 | iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL); | ||
| 112 | if (!iomaps) { | ||
| 113 | dprintk("%s: failed to allocate extent array\n", __func__); | ||
| 114 | return -ENOMEM; | ||
| 115 | } | ||
| 116 | |||
| 117 | for (i = 0; i < nr_iomaps; i++) { | ||
| 118 | struct pnfs_block_extent bex; | ||
| 119 | |||
| 120 | memcpy(&bex.vol_id, p, sizeof(struct nfsd4_deviceid)); | ||
| 121 | p += XDR_QUADLEN(sizeof(struct nfsd4_deviceid)); | ||
| 122 | |||
| 123 | p = xdr_decode_hyper(p, &bex.foff); | ||
| 124 | if (bex.foff & (block_size - 1)) { | ||
| 125 | dprintk("%s: unaligned offset %lld\n", | ||
| 126 | __func__, bex.foff); | ||
| 127 | goto fail; | ||
| 128 | } | ||
| 129 | p = xdr_decode_hyper(p, &bex.len); | ||
| 130 | if (bex.len & (block_size - 1)) { | ||
| 131 | dprintk("%s: unaligned length %lld\n", | ||
| 132 | __func__, bex.foff); | ||
| 133 | goto fail; | ||
| 134 | } | ||
| 135 | p = xdr_decode_hyper(p, &bex.soff); | ||
| 136 | if (bex.soff & (block_size - 1)) { | ||
| 137 | dprintk("%s: unaligned disk offset %lld\n", | ||
| 138 | __func__, bex.soff); | ||
| 139 | goto fail; | ||
| 140 | } | ||
| 141 | bex.es = be32_to_cpup(p++); | ||
| 142 | if (bex.es != PNFS_BLOCK_READWRITE_DATA) { | ||
| 143 | dprintk("%s: incorrect extent state %d\n", | ||
| 144 | __func__, bex.es); | ||
| 145 | goto fail; | ||
| 146 | } | ||
| 147 | |||
| 148 | iomaps[i].offset = bex.foff; | ||
| 149 | iomaps[i].length = bex.len; | ||
| 150 | } | ||
| 151 | |||
| 152 | *iomapp = iomaps; | ||
| 153 | return nr_iomaps; | ||
| 154 | fail: | ||
| 155 | kfree(iomaps); | ||
| 156 | return -EINVAL; | ||
| 157 | } | ||
diff --git a/fs/nfsd/blocklayoutxdr.h b/fs/nfsd/blocklayoutxdr.h new file mode 100644 index 000000000000..fdc79037c0e7 --- /dev/null +++ b/fs/nfsd/blocklayoutxdr.h | |||
| @@ -0,0 +1,62 @@ | |||
| 1 | #ifndef _NFSD_BLOCKLAYOUTXDR_H | ||
| 2 | #define _NFSD_BLOCKLAYOUTXDR_H 1 | ||
| 3 | |||
| 4 | #include <linux/blkdev.h> | ||
| 5 | #include "xdr4.h" | ||
| 6 | |||
| 7 | struct iomap; | ||
| 8 | struct xdr_stream; | ||
| 9 | |||
| 10 | enum pnfs_block_extent_state { | ||
| 11 | PNFS_BLOCK_READWRITE_DATA = 0, | ||
| 12 | PNFS_BLOCK_READ_DATA = 1, | ||
| 13 | PNFS_BLOCK_INVALID_DATA = 2, | ||
| 14 | PNFS_BLOCK_NONE_DATA = 3, | ||
| 15 | }; | ||
| 16 | |||
| 17 | struct pnfs_block_extent { | ||
| 18 | struct nfsd4_deviceid vol_id; | ||
| 19 | u64 foff; | ||
| 20 | u64 len; | ||
| 21 | u64 soff; | ||
| 22 | enum pnfs_block_extent_state es; | ||
| 23 | }; | ||
| 24 | #define NFS4_BLOCK_EXTENT_SIZE 44 | ||
| 25 | |||
| 26 | enum pnfs_block_volume_type { | ||
| 27 | PNFS_BLOCK_VOLUME_SIMPLE = 0, | ||
| 28 | PNFS_BLOCK_VOLUME_SLICE = 1, | ||
| 29 | PNFS_BLOCK_VOLUME_CONCAT = 2, | ||
| 30 | PNFS_BLOCK_VOLUME_STRIPE = 3, | ||
| 31 | }; | ||
| 32 | |||
| 33 | /* | ||
| 34 | * Random upper cap for the uuid length to avoid unbounded allocation. | ||
| 35 | * Not actually limited by the protocol. | ||
| 36 | */ | ||
| 37 | #define PNFS_BLOCK_UUID_LEN 128 | ||
| 38 | |||
| 39 | struct pnfs_block_volume { | ||
| 40 | enum pnfs_block_volume_type type; | ||
| 41 | union { | ||
| 42 | struct { | ||
| 43 | u64 offset; | ||
| 44 | u32 sig_len; | ||
| 45 | u8 sig[PNFS_BLOCK_UUID_LEN]; | ||
| 46 | } simple; | ||
| 47 | }; | ||
| 48 | }; | ||
| 49 | |||
| 50 | struct pnfs_block_deviceaddr { | ||
| 51 | u32 nr_volumes; | ||
| 52 | struct pnfs_block_volume volumes[]; | ||
| 53 | }; | ||
| 54 | |||
| 55 | __be32 nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr, | ||
| 56 | struct nfsd4_getdeviceinfo *gdp); | ||
| 57 | __be32 nfsd4_block_encode_layoutget(struct xdr_stream *xdr, | ||
| 58 | struct nfsd4_layoutget *lgp); | ||
| 59 | int nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, | ||
| 60 | u32 block_size); | ||
| 61 | |||
| 62 | #endif /* _NFSD_BLOCKLAYOUTXDR_H */ | ||
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 30a739d896ff..c3e3b6e55ae2 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include "nfsd.h" | 20 | #include "nfsd.h" |
| 21 | #include "nfsfh.h" | 21 | #include "nfsfh.h" |
| 22 | #include "netns.h" | 22 | #include "netns.h" |
| 23 | #include "pnfs.h" | ||
| 23 | 24 | ||
| 24 | #define NFSDDBG_FACILITY NFSDDBG_EXPORT | 25 | #define NFSDDBG_FACILITY NFSDDBG_EXPORT |
| 25 | 26 | ||
| @@ -545,6 +546,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) | |||
| 545 | 546 | ||
| 546 | exp.ex_client = dom; | 547 | exp.ex_client = dom; |
| 547 | exp.cd = cd; | 548 | exp.cd = cd; |
| 549 | exp.ex_devid_map = NULL; | ||
| 548 | 550 | ||
| 549 | /* expiry */ | 551 | /* expiry */ |
| 550 | err = -EINVAL; | 552 | err = -EINVAL; |
| @@ -621,6 +623,8 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) | |||
| 621 | if (!gid_valid(exp.ex_anon_gid)) | 623 | if (!gid_valid(exp.ex_anon_gid)) |
| 622 | goto out4; | 624 | goto out4; |
| 623 | err = 0; | 625 | err = 0; |
| 626 | |||
| 627 | nfsd4_setup_layout_type(&exp); | ||
| 624 | } | 628 | } |
| 625 | 629 | ||
| 626 | expp = svc_export_lookup(&exp); | 630 | expp = svc_export_lookup(&exp); |
| @@ -703,6 +707,7 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem) | |||
| 703 | new->ex_fslocs.locations = NULL; | 707 | new->ex_fslocs.locations = NULL; |
| 704 | new->ex_fslocs.locations_count = 0; | 708 | new->ex_fslocs.locations_count = 0; |
| 705 | new->ex_fslocs.migrated = 0; | 709 | new->ex_fslocs.migrated = 0; |
| 710 | new->ex_layout_type = 0; | ||
| 706 | new->ex_uuid = NULL; | 711 | new->ex_uuid = NULL; |
| 707 | new->cd = item->cd; | 712 | new->cd = item->cd; |
| 708 | } | 713 | } |
| @@ -717,6 +722,8 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem) | |||
| 717 | new->ex_anon_uid = item->ex_anon_uid; | 722 | new->ex_anon_uid = item->ex_anon_uid; |
| 718 | new->ex_anon_gid = item->ex_anon_gid; | 723 | new->ex_anon_gid = item->ex_anon_gid; |
| 719 | new->ex_fsid = item->ex_fsid; | 724 | new->ex_fsid = item->ex_fsid; |
| 725 | new->ex_devid_map = item->ex_devid_map; | ||
| 726 | item->ex_devid_map = NULL; | ||
| 720 | new->ex_uuid = item->ex_uuid; | 727 | new->ex_uuid = item->ex_uuid; |
| 721 | item->ex_uuid = NULL; | 728 | item->ex_uuid = NULL; |
| 722 | new->ex_fslocs.locations = item->ex_fslocs.locations; | 729 | new->ex_fslocs.locations = item->ex_fslocs.locations; |
| @@ -725,6 +732,7 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem) | |||
| 725 | item->ex_fslocs.locations_count = 0; | 732 | item->ex_fslocs.locations_count = 0; |
| 726 | new->ex_fslocs.migrated = item->ex_fslocs.migrated; | 733 | new->ex_fslocs.migrated = item->ex_fslocs.migrated; |
| 727 | item->ex_fslocs.migrated = 0; | 734 | item->ex_fslocs.migrated = 0; |
| 735 | new->ex_layout_type = item->ex_layout_type; | ||
| 728 | new->ex_nflavors = item->ex_nflavors; | 736 | new->ex_nflavors = item->ex_nflavors; |
| 729 | for (i = 0; i < MAX_SECINFO_LIST; i++) { | 737 | for (i = 0; i < MAX_SECINFO_LIST; i++) { |
| 730 | new->ex_flavors[i] = item->ex_flavors[i]; | 738 | new->ex_flavors[i] = item->ex_flavors[i]; |
diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h index 04dc8c167b0c..1f52bfcc436f 100644 --- a/fs/nfsd/export.h +++ b/fs/nfsd/export.h | |||
| @@ -56,6 +56,8 @@ struct svc_export { | |||
| 56 | struct nfsd4_fs_locations ex_fslocs; | 56 | struct nfsd4_fs_locations ex_fslocs; |
| 57 | uint32_t ex_nflavors; | 57 | uint32_t ex_nflavors; |
| 58 | struct exp_flavor_info ex_flavors[MAX_SECINFO_LIST]; | 58 | struct exp_flavor_info ex_flavors[MAX_SECINFO_LIST]; |
| 59 | enum pnfs_layouttype ex_layout_type; | ||
| 60 | struct nfsd4_deviceid_map *ex_devid_map; | ||
| 59 | struct cache_detail *cd; | 61 | struct cache_detail *cd; |
| 60 | }; | 62 | }; |
| 61 | 63 | ||
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 7cbdf1b2e4ab..58277859a467 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c | |||
| @@ -546,6 +546,102 @@ out: | |||
| 546 | return status; | 546 | return status; |
| 547 | } | 547 | } |
| 548 | 548 | ||
| 549 | #ifdef CONFIG_NFSD_PNFS | ||
| 550 | /* | ||
| 551 | * CB_LAYOUTRECALL4args | ||
| 552 | * | ||
| 553 | * struct layoutrecall_file4 { | ||
| 554 | * nfs_fh4 lor_fh; | ||
| 555 | * offset4 lor_offset; | ||
| 556 | * length4 lor_length; | ||
| 557 | * stateid4 lor_stateid; | ||
| 558 | * }; | ||
| 559 | * | ||
| 560 | * union layoutrecall4 switch(layoutrecall_type4 lor_recalltype) { | ||
| 561 | * case LAYOUTRECALL4_FILE: | ||
| 562 | * layoutrecall_file4 lor_layout; | ||
| 563 | * case LAYOUTRECALL4_FSID: | ||
| 564 | * fsid4 lor_fsid; | ||
| 565 | * case LAYOUTRECALL4_ALL: | ||
| 566 | * void; | ||
| 567 | * }; | ||
| 568 | * | ||
| 569 | * struct CB_LAYOUTRECALL4args { | ||
| 570 | * layouttype4 clora_type; | ||
| 571 | * layoutiomode4 clora_iomode; | ||
| 572 | * bool clora_changed; | ||
| 573 | * layoutrecall4 clora_recall; | ||
| 574 | * }; | ||
| 575 | */ | ||
| 576 | static void encode_cb_layout4args(struct xdr_stream *xdr, | ||
| 577 | const struct nfs4_layout_stateid *ls, | ||
| 578 | struct nfs4_cb_compound_hdr *hdr) | ||
| 579 | { | ||
| 580 | __be32 *p; | ||
| 581 | |||
| 582 | BUG_ON(hdr->minorversion == 0); | ||
| 583 | |||
| 584 | p = xdr_reserve_space(xdr, 5 * 4); | ||
| 585 | *p++ = cpu_to_be32(OP_CB_LAYOUTRECALL); | ||
| 586 | *p++ = cpu_to_be32(ls->ls_layout_type); | ||
| 587 | *p++ = cpu_to_be32(IOMODE_ANY); | ||
| 588 | *p++ = cpu_to_be32(1); | ||
| 589 | *p = cpu_to_be32(RETURN_FILE); | ||
| 590 | |||
| 591 | encode_nfs_fh4(xdr, &ls->ls_stid.sc_file->fi_fhandle); | ||
| 592 | |||
| 593 | p = xdr_reserve_space(xdr, 2 * 8); | ||
| 594 | p = xdr_encode_hyper(p, 0); | ||
| 595 | xdr_encode_hyper(p, NFS4_MAX_UINT64); | ||
| 596 | |||
| 597 | encode_stateid4(xdr, &ls->ls_recall_sid); | ||
| 598 | |||
| 599 | hdr->nops++; | ||
| 600 | } | ||
| 601 | |||
| 602 | static void nfs4_xdr_enc_cb_layout(struct rpc_rqst *req, | ||
| 603 | struct xdr_stream *xdr, | ||
| 604 | const struct nfsd4_callback *cb) | ||
| 605 | { | ||
| 606 | const struct nfs4_layout_stateid *ls = | ||
| 607 | container_of(cb, struct nfs4_layout_stateid, ls_recall); | ||
| 608 | struct nfs4_cb_compound_hdr hdr = { | ||
| 609 | .ident = 0, | ||
| 610 | .minorversion = cb->cb_minorversion, | ||
| 611 | }; | ||
| 612 | |||
| 613 | encode_cb_compound4args(xdr, &hdr); | ||
| 614 | encode_cb_sequence4args(xdr, cb, &hdr); | ||
| 615 | encode_cb_layout4args(xdr, ls, &hdr); | ||
| 616 | encode_cb_nops(&hdr); | ||
| 617 | } | ||
| 618 | |||
| 619 | static int nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp, | ||
| 620 | struct xdr_stream *xdr, | ||
| 621 | struct nfsd4_callback *cb) | ||
| 622 | { | ||
| 623 | struct nfs4_cb_compound_hdr hdr; | ||
| 624 | enum nfsstat4 nfserr; | ||
| 625 | int status; | ||
| 626 | |||
| 627 | status = decode_cb_compound4res(xdr, &hdr); | ||
| 628 | if (unlikely(status)) | ||
| 629 | goto out; | ||
| 630 | if (cb) { | ||
| 631 | status = decode_cb_sequence4res(xdr, cb); | ||
| 632 | if (unlikely(status)) | ||
| 633 | goto out; | ||
| 634 | } | ||
| 635 | status = decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &nfserr); | ||
| 636 | if (unlikely(status)) | ||
| 637 | goto out; | ||
| 638 | if (unlikely(nfserr != NFS4_OK)) | ||
| 639 | status = nfs_cb_stat_to_errno(nfserr); | ||
| 640 | out: | ||
| 641 | return status; | ||
| 642 | } | ||
| 643 | #endif /* CONFIG_NFSD_PNFS */ | ||
| 644 | |||
| 549 | /* | 645 | /* |
| 550 | * RPC procedure tables | 646 | * RPC procedure tables |
| 551 | */ | 647 | */ |
| @@ -563,6 +659,9 @@ out: | |||
| 563 | static struct rpc_procinfo nfs4_cb_procedures[] = { | 659 | static struct rpc_procinfo nfs4_cb_procedures[] = { |
| 564 | PROC(CB_NULL, NULL, cb_null, cb_null), | 660 | PROC(CB_NULL, NULL, cb_null, cb_null), |
| 565 | PROC(CB_RECALL, COMPOUND, cb_recall, cb_recall), | 661 | PROC(CB_RECALL, COMPOUND, cb_recall, cb_recall), |
| 662 | #ifdef CONFIG_NFSD_PNFS | ||
| 663 | PROC(CB_LAYOUT, COMPOUND, cb_layout, cb_layout), | ||
| 664 | #endif | ||
| 566 | }; | 665 | }; |
| 567 | 666 | ||
| 568 | static struct rpc_version nfs_cb_version4 = { | 667 | static struct rpc_version nfs_cb_version4 = { |
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c new file mode 100644 index 000000000000..3c1bfa155571 --- /dev/null +++ b/fs/nfsd/nfs4layouts.c | |||
| @@ -0,0 +1,721 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2014 Christoph Hellwig. | ||
| 3 | */ | ||
| 4 | #include <linux/kmod.h> | ||
| 5 | #include <linux/file.h> | ||
| 6 | #include <linux/jhash.h> | ||
| 7 | #include <linux/sched.h> | ||
| 8 | #include <linux/sunrpc/addr.h> | ||
| 9 | |||
| 10 | #include "pnfs.h" | ||
| 11 | #include "netns.h" | ||
| 12 | #include "trace.h" | ||
| 13 | |||
| 14 | #define NFSDDBG_FACILITY NFSDDBG_PNFS | ||
| 15 | |||
| 16 | struct nfs4_layout { | ||
| 17 | struct list_head lo_perstate; | ||
| 18 | struct nfs4_layout_stateid *lo_state; | ||
| 19 | struct nfsd4_layout_seg lo_seg; | ||
| 20 | }; | ||
| 21 | |||
| 22 | static struct kmem_cache *nfs4_layout_cache; | ||
| 23 | static struct kmem_cache *nfs4_layout_stateid_cache; | ||
| 24 | |||
| 25 | static struct nfsd4_callback_ops nfsd4_cb_layout_ops; | ||
| 26 | static const struct lock_manager_operations nfsd4_layouts_lm_ops; | ||
| 27 | |||
| 28 | const struct nfsd4_layout_ops *nfsd4_layout_ops[LAYOUT_TYPE_MAX] = { | ||
| 29 | [LAYOUT_BLOCK_VOLUME] = &bl_layout_ops, | ||
| 30 | }; | ||
| 31 | |||
| 32 | /* pNFS device ID to export fsid mapping */ | ||
| 33 | #define DEVID_HASH_BITS 8 | ||
| 34 | #define DEVID_HASH_SIZE (1 << DEVID_HASH_BITS) | ||
| 35 | #define DEVID_HASH_MASK (DEVID_HASH_SIZE - 1) | ||
| 36 | static u64 nfsd_devid_seq = 1; | ||
| 37 | static struct list_head nfsd_devid_hash[DEVID_HASH_SIZE]; | ||
| 38 | static DEFINE_SPINLOCK(nfsd_devid_lock); | ||
| 39 | |||
| 40 | static inline u32 devid_hashfn(u64 idx) | ||
| 41 | { | ||
| 42 | return jhash_2words(idx, idx >> 32, 0) & DEVID_HASH_MASK; | ||
| 43 | } | ||
| 44 | |||
| 45 | static void | ||
| 46 | nfsd4_alloc_devid_map(const struct svc_fh *fhp) | ||
| 47 | { | ||
| 48 | const struct knfsd_fh *fh = &fhp->fh_handle; | ||
| 49 | size_t fsid_len = key_len(fh->fh_fsid_type); | ||
| 50 | struct nfsd4_deviceid_map *map, *old; | ||
| 51 | int i; | ||
| 52 | |||
| 53 | map = kzalloc(sizeof(*map) + fsid_len, GFP_KERNEL); | ||
| 54 | if (!map) | ||
| 55 | return; | ||
| 56 | |||
| 57 | map->fsid_type = fh->fh_fsid_type; | ||
| 58 | memcpy(&map->fsid, fh->fh_fsid, fsid_len); | ||
| 59 | |||
| 60 | spin_lock(&nfsd_devid_lock); | ||
| 61 | if (fhp->fh_export->ex_devid_map) | ||
| 62 | goto out_unlock; | ||
| 63 | |||
| 64 | for (i = 0; i < DEVID_HASH_SIZE; i++) { | ||
| 65 | list_for_each_entry(old, &nfsd_devid_hash[i], hash) { | ||
| 66 | if (old->fsid_type != fh->fh_fsid_type) | ||
| 67 | continue; | ||
| 68 | if (memcmp(old->fsid, fh->fh_fsid, | ||
| 69 | key_len(old->fsid_type))) | ||
| 70 | continue; | ||
| 71 | |||
| 72 | fhp->fh_export->ex_devid_map = old; | ||
| 73 | goto out_unlock; | ||
| 74 | } | ||
| 75 | } | ||
| 76 | |||
| 77 | map->idx = nfsd_devid_seq++; | ||
| 78 | list_add_tail_rcu(&map->hash, &nfsd_devid_hash[devid_hashfn(map->idx)]); | ||
| 79 | fhp->fh_export->ex_devid_map = map; | ||
| 80 | map = NULL; | ||
| 81 | |||
| 82 | out_unlock: | ||
| 83 | spin_unlock(&nfsd_devid_lock); | ||
| 84 | kfree(map); | ||
| 85 | } | ||
| 86 | |||
| 87 | struct nfsd4_deviceid_map * | ||
| 88 | nfsd4_find_devid_map(int idx) | ||
| 89 | { | ||
| 90 | struct nfsd4_deviceid_map *map, *ret = NULL; | ||
| 91 | |||
| 92 | rcu_read_lock(); | ||
| 93 | list_for_each_entry_rcu(map, &nfsd_devid_hash[devid_hashfn(idx)], hash) | ||
| 94 | if (map->idx == idx) | ||
| 95 | ret = map; | ||
| 96 | rcu_read_unlock(); | ||
| 97 | |||
| 98 | return ret; | ||
| 99 | } | ||
| 100 | |||
| 101 | int | ||
| 102 | nfsd4_set_deviceid(struct nfsd4_deviceid *id, const struct svc_fh *fhp, | ||
| 103 | u32 device_generation) | ||
| 104 | { | ||
| 105 | if (!fhp->fh_export->ex_devid_map) { | ||
| 106 | nfsd4_alloc_devid_map(fhp); | ||
| 107 | if (!fhp->fh_export->ex_devid_map) | ||
| 108 | return -ENOMEM; | ||
| 109 | } | ||
| 110 | |||
| 111 | id->fsid_idx = fhp->fh_export->ex_devid_map->idx; | ||
| 112 | id->generation = device_generation; | ||
| 113 | id->pad = 0; | ||
| 114 | return 0; | ||
| 115 | } | ||
| 116 | |||
| 117 | void nfsd4_setup_layout_type(struct svc_export *exp) | ||
| 118 | { | ||
| 119 | struct super_block *sb = exp->ex_path.mnt->mnt_sb; | ||
| 120 | |||
| 121 | if (exp->ex_flags & NFSEXP_NOPNFS) | ||
| 122 | return; | ||
| 123 | |||
| 124 | if (sb->s_export_op->get_uuid && | ||
| 125 | sb->s_export_op->map_blocks && | ||
| 126 | sb->s_export_op->commit_blocks) | ||
| 127 | exp->ex_layout_type = LAYOUT_BLOCK_VOLUME; | ||
| 128 | } | ||
| 129 | |||
| 130 | static void | ||
| 131 | nfsd4_free_layout_stateid(struct nfs4_stid *stid) | ||
| 132 | { | ||
| 133 | struct nfs4_layout_stateid *ls = layoutstateid(stid); | ||
| 134 | struct nfs4_client *clp = ls->ls_stid.sc_client; | ||
| 135 | struct nfs4_file *fp = ls->ls_stid.sc_file; | ||
| 136 | |||
| 137 | trace_layoutstate_free(&ls->ls_stid.sc_stateid); | ||
| 138 | |||
| 139 | spin_lock(&clp->cl_lock); | ||
| 140 | list_del_init(&ls->ls_perclnt); | ||
| 141 | spin_unlock(&clp->cl_lock); | ||
| 142 | |||
| 143 | spin_lock(&fp->fi_lock); | ||
| 144 | list_del_init(&ls->ls_perfile); | ||
| 145 | spin_unlock(&fp->fi_lock); | ||
| 146 | |||
| 147 | vfs_setlease(ls->ls_file, F_UNLCK, NULL, (void **)&ls); | ||
| 148 | fput(ls->ls_file); | ||
| 149 | |||
| 150 | if (ls->ls_recalled) | ||
| 151 | atomic_dec(&ls->ls_stid.sc_file->fi_lo_recalls); | ||
| 152 | |||
| 153 | kmem_cache_free(nfs4_layout_stateid_cache, ls); | ||
| 154 | } | ||
| 155 | |||
| 156 | static int | ||
| 157 | nfsd4_layout_setlease(struct nfs4_layout_stateid *ls) | ||
| 158 | { | ||
| 159 | struct file_lock *fl; | ||
| 160 | int status; | ||
| 161 | |||
| 162 | fl = locks_alloc_lock(); | ||
| 163 | if (!fl) | ||
| 164 | return -ENOMEM; | ||
| 165 | locks_init_lock(fl); | ||
| 166 | fl->fl_lmops = &nfsd4_layouts_lm_ops; | ||
| 167 | fl->fl_flags = FL_LAYOUT; | ||
| 168 | fl->fl_type = F_RDLCK; | ||
| 169 | fl->fl_end = OFFSET_MAX; | ||
| 170 | fl->fl_owner = ls; | ||
| 171 | fl->fl_pid = current->tgid; | ||
| 172 | fl->fl_file = ls->ls_file; | ||
| 173 | |||
| 174 | status = vfs_setlease(fl->fl_file, fl->fl_type, &fl, NULL); | ||
| 175 | if (status) { | ||
| 176 | locks_free_lock(fl); | ||
| 177 | return status; | ||
| 178 | } | ||
| 179 | BUG_ON(fl != NULL); | ||
| 180 | return 0; | ||
| 181 | } | ||
| 182 | |||
| 183 | static struct nfs4_layout_stateid * | ||
| 184 | nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate, | ||
| 185 | struct nfs4_stid *parent, u32 layout_type) | ||
| 186 | { | ||
| 187 | struct nfs4_client *clp = cstate->clp; | ||
| 188 | struct nfs4_file *fp = parent->sc_file; | ||
| 189 | struct nfs4_layout_stateid *ls; | ||
| 190 | struct nfs4_stid *stp; | ||
| 191 | |||
| 192 | stp = nfs4_alloc_stid(cstate->clp, nfs4_layout_stateid_cache); | ||
| 193 | if (!stp) | ||
| 194 | return NULL; | ||
| 195 | stp->sc_free = nfsd4_free_layout_stateid; | ||
| 196 | get_nfs4_file(fp); | ||
| 197 | stp->sc_file = fp; | ||
| 198 | |||
| 199 | ls = layoutstateid(stp); | ||
| 200 | INIT_LIST_HEAD(&ls->ls_perclnt); | ||
| 201 | INIT_LIST_HEAD(&ls->ls_perfile); | ||
| 202 | spin_lock_init(&ls->ls_lock); | ||
| 203 | INIT_LIST_HEAD(&ls->ls_layouts); | ||
| 204 | ls->ls_layout_type = layout_type; | ||
| 205 | nfsd4_init_cb(&ls->ls_recall, clp, &nfsd4_cb_layout_ops, | ||
| 206 | NFSPROC4_CLNT_CB_LAYOUT); | ||
| 207 | |||
| 208 | if (parent->sc_type == NFS4_DELEG_STID) | ||
| 209 | ls->ls_file = get_file(fp->fi_deleg_file); | ||
| 210 | else | ||
| 211 | ls->ls_file = find_any_file(fp); | ||
| 212 | BUG_ON(!ls->ls_file); | ||
| 213 | |||
| 214 | if (nfsd4_layout_setlease(ls)) { | ||
| 215 | put_nfs4_file(fp); | ||
| 216 | kmem_cache_free(nfs4_layout_stateid_cache, ls); | ||
| 217 | return NULL; | ||
| 218 | } | ||
| 219 | |||
| 220 | spin_lock(&clp->cl_lock); | ||
| 221 | stp->sc_type = NFS4_LAYOUT_STID; | ||
| 222 | list_add(&ls->ls_perclnt, &clp->cl_lo_states); | ||
| 223 | spin_unlock(&clp->cl_lock); | ||
| 224 | |||
| 225 | spin_lock(&fp->fi_lock); | ||
| 226 | list_add(&ls->ls_perfile, &fp->fi_lo_states); | ||
| 227 | spin_unlock(&fp->fi_lock); | ||
| 228 | |||
| 229 | trace_layoutstate_alloc(&ls->ls_stid.sc_stateid); | ||
| 230 | return ls; | ||
| 231 | } | ||
| 232 | |||
| 233 | __be32 | ||
| 234 | nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp, | ||
| 235 | struct nfsd4_compound_state *cstate, stateid_t *stateid, | ||
| 236 | bool create, u32 layout_type, struct nfs4_layout_stateid **lsp) | ||
| 237 | { | ||
| 238 | struct nfs4_layout_stateid *ls; | ||
| 239 | struct nfs4_stid *stid; | ||
| 240 | unsigned char typemask = NFS4_LAYOUT_STID; | ||
| 241 | __be32 status; | ||
| 242 | |||
| 243 | if (create) | ||
| 244 | typemask |= (NFS4_OPEN_STID | NFS4_LOCK_STID | NFS4_DELEG_STID); | ||
| 245 | |||
| 246 | status = nfsd4_lookup_stateid(cstate, stateid, typemask, &stid, | ||
| 247 | net_generic(SVC_NET(rqstp), nfsd_net_id)); | ||
| 248 | if (status) | ||
| 249 | goto out; | ||
| 250 | |||
| 251 | if (!fh_match(&cstate->current_fh.fh_handle, | ||
| 252 | &stid->sc_file->fi_fhandle)) { | ||
| 253 | status = nfserr_bad_stateid; | ||
| 254 | goto out_put_stid; | ||
| 255 | } | ||
| 256 | |||
| 257 | if (stid->sc_type != NFS4_LAYOUT_STID) { | ||
| 258 | ls = nfsd4_alloc_layout_stateid(cstate, stid, layout_type); | ||
| 259 | nfs4_put_stid(stid); | ||
| 260 | |||
| 261 | status = nfserr_jukebox; | ||
| 262 | if (!ls) | ||
| 263 | goto out; | ||
| 264 | } else { | ||
| 265 | ls = container_of(stid, struct nfs4_layout_stateid, ls_stid); | ||
| 266 | |||
| 267 | status = nfserr_bad_stateid; | ||
| 268 | if (stateid->si_generation > stid->sc_stateid.si_generation) | ||
| 269 | goto out_put_stid; | ||
| 270 | if (layout_type != ls->ls_layout_type) | ||
| 271 | goto out_put_stid; | ||
| 272 | } | ||
| 273 | |||
| 274 | *lsp = ls; | ||
| 275 | return 0; | ||
| 276 | |||
| 277 | out_put_stid: | ||
| 278 | nfs4_put_stid(stid); | ||
| 279 | out: | ||
| 280 | return status; | ||
| 281 | } | ||
| 282 | |||
| 283 | static void | ||
| 284 | nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls) | ||
| 285 | { | ||
| 286 | spin_lock(&ls->ls_lock); | ||
| 287 | if (ls->ls_recalled) | ||
| 288 | goto out_unlock; | ||
| 289 | |||
| 290 | ls->ls_recalled = true; | ||
| 291 | atomic_inc(&ls->ls_stid.sc_file->fi_lo_recalls); | ||
| 292 | if (list_empty(&ls->ls_layouts)) | ||
| 293 | goto out_unlock; | ||
| 294 | |||
| 295 | trace_layout_recall(&ls->ls_stid.sc_stateid); | ||
| 296 | |||
| 297 | atomic_inc(&ls->ls_stid.sc_count); | ||
| 298 | update_stateid(&ls->ls_stid.sc_stateid); | ||
| 299 | memcpy(&ls->ls_recall_sid, &ls->ls_stid.sc_stateid, sizeof(stateid_t)); | ||
| 300 | nfsd4_run_cb(&ls->ls_recall); | ||
| 301 | |||
| 302 | out_unlock: | ||
| 303 | spin_unlock(&ls->ls_lock); | ||
| 304 | } | ||
| 305 | |||
| 306 | static inline u64 | ||
| 307 | layout_end(struct nfsd4_layout_seg *seg) | ||
| 308 | { | ||
| 309 | u64 end = seg->offset + seg->length; | ||
| 310 | return end >= seg->offset ? end : NFS4_MAX_UINT64; | ||
| 311 | } | ||
| 312 | |||
| 313 | static void | ||
| 314 | layout_update_len(struct nfsd4_layout_seg *lo, u64 end) | ||
| 315 | { | ||
| 316 | if (end == NFS4_MAX_UINT64) | ||
| 317 | lo->length = NFS4_MAX_UINT64; | ||
| 318 | else | ||
| 319 | lo->length = end - lo->offset; | ||
| 320 | } | ||
| 321 | |||
| 322 | static bool | ||
| 323 | layouts_overlapping(struct nfs4_layout *lo, struct nfsd4_layout_seg *s) | ||
| 324 | { | ||
| 325 | if (s->iomode != IOMODE_ANY && s->iomode != lo->lo_seg.iomode) | ||
| 326 | return false; | ||
| 327 | if (layout_end(&lo->lo_seg) <= s->offset) | ||
| 328 | return false; | ||
| 329 | if (layout_end(s) <= lo->lo_seg.offset) | ||
| 330 | return false; | ||
| 331 | return true; | ||
| 332 | } | ||
| 333 | |||
| 334 | static bool | ||
| 335 | layouts_try_merge(struct nfsd4_layout_seg *lo, struct nfsd4_layout_seg *new) | ||
| 336 | { | ||
| 337 | if (lo->iomode != new->iomode) | ||
| 338 | return false; | ||
| 339 | if (layout_end(new) < lo->offset) | ||
| 340 | return false; | ||
| 341 | if (layout_end(lo) < new->offset) | ||
| 342 | return false; | ||
| 343 | |||
| 344 | lo->offset = min(lo->offset, new->offset); | ||
| 345 | layout_update_len(lo, max(layout_end(lo), layout_end(new))); | ||
| 346 | return true; | ||
| 347 | } | ||
| 348 | |||
| 349 | static __be32 | ||
| 350 | nfsd4_recall_conflict(struct nfs4_layout_stateid *ls) | ||
| 351 | { | ||
| 352 | struct nfs4_file *fp = ls->ls_stid.sc_file; | ||
| 353 | struct nfs4_layout_stateid *l, *n; | ||
| 354 | __be32 nfserr = nfs_ok; | ||
| 355 | |||
| 356 | assert_spin_locked(&fp->fi_lock); | ||
| 357 | |||
| 358 | list_for_each_entry_safe(l, n, &fp->fi_lo_states, ls_perfile) { | ||
| 359 | if (l != ls) { | ||
| 360 | nfsd4_recall_file_layout(l); | ||
| 361 | nfserr = nfserr_recallconflict; | ||
| 362 | } | ||
| 363 | } | ||
| 364 | |||
| 365 | return nfserr; | ||
| 366 | } | ||
| 367 | |||
| 368 | __be32 | ||
| 369 | nfsd4_insert_layout(struct nfsd4_layoutget *lgp, struct nfs4_layout_stateid *ls) | ||
| 370 | { | ||
| 371 | struct nfsd4_layout_seg *seg = &lgp->lg_seg; | ||
| 372 | struct nfs4_file *fp = ls->ls_stid.sc_file; | ||
| 373 | struct nfs4_layout *lp, *new = NULL; | ||
| 374 | __be32 nfserr; | ||
| 375 | |||
| 376 | spin_lock(&fp->fi_lock); | ||
| 377 | nfserr = nfsd4_recall_conflict(ls); | ||
| 378 | if (nfserr) | ||
| 379 | goto out; | ||
| 380 | spin_lock(&ls->ls_lock); | ||
| 381 | list_for_each_entry(lp, &ls->ls_layouts, lo_perstate) { | ||
| 382 | if (layouts_try_merge(&lp->lo_seg, seg)) | ||
| 383 | goto done; | ||
| 384 | } | ||
| 385 | spin_unlock(&ls->ls_lock); | ||
| 386 | spin_unlock(&fp->fi_lock); | ||
| 387 | |||
| 388 | new = kmem_cache_alloc(nfs4_layout_cache, GFP_KERNEL); | ||
| 389 | if (!new) | ||
| 390 | return nfserr_jukebox; | ||
| 391 | memcpy(&new->lo_seg, seg, sizeof(lp->lo_seg)); | ||
| 392 | new->lo_state = ls; | ||
| 393 | |||
| 394 | spin_lock(&fp->fi_lock); | ||
| 395 | nfserr = nfsd4_recall_conflict(ls); | ||
| 396 | if (nfserr) | ||
| 397 | goto out; | ||
| 398 | spin_lock(&ls->ls_lock); | ||
| 399 | list_for_each_entry(lp, &ls->ls_layouts, lo_perstate) { | ||
| 400 | if (layouts_try_merge(&lp->lo_seg, seg)) | ||
| 401 | goto done; | ||
| 402 | } | ||
| 403 | |||
| 404 | atomic_inc(&ls->ls_stid.sc_count); | ||
| 405 | list_add_tail(&new->lo_perstate, &ls->ls_layouts); | ||
| 406 | new = NULL; | ||
| 407 | done: | ||
| 408 | update_stateid(&ls->ls_stid.sc_stateid); | ||
| 409 | memcpy(&lgp->lg_sid, &ls->ls_stid.sc_stateid, sizeof(stateid_t)); | ||
| 410 | spin_unlock(&ls->ls_lock); | ||
| 411 | out: | ||
| 412 | spin_unlock(&fp->fi_lock); | ||
| 413 | if (new) | ||
| 414 | kmem_cache_free(nfs4_layout_cache, new); | ||
| 415 | return nfserr; | ||
| 416 | } | ||
| 417 | |||
| 418 | static void | ||
| 419 | nfsd4_free_layouts(struct list_head *reaplist) | ||
| 420 | { | ||
| 421 | while (!list_empty(reaplist)) { | ||
| 422 | struct nfs4_layout *lp = list_first_entry(reaplist, | ||
| 423 | struct nfs4_layout, lo_perstate); | ||
| 424 | |||
| 425 | list_del(&lp->lo_perstate); | ||
| 426 | nfs4_put_stid(&lp->lo_state->ls_stid); | ||
| 427 | kmem_cache_free(nfs4_layout_cache, lp); | ||
| 428 | } | ||
| 429 | } | ||
| 430 | |||
| 431 | static void | ||
| 432 | nfsd4_return_file_layout(struct nfs4_layout *lp, struct nfsd4_layout_seg *seg, | ||
| 433 | struct list_head *reaplist) | ||
| 434 | { | ||
| 435 | struct nfsd4_layout_seg *lo = &lp->lo_seg; | ||
| 436 | u64 end = layout_end(lo); | ||
| 437 | |||
| 438 | if (seg->offset <= lo->offset) { | ||
| 439 | if (layout_end(seg) >= end) { | ||
| 440 | list_move_tail(&lp->lo_perstate, reaplist); | ||
| 441 | return; | ||
| 442 | } | ||
| 443 | end = seg->offset; | ||
| 444 | } else { | ||
| 445 | /* retain the whole layout segment on a split. */ | ||
| 446 | if (layout_end(seg) < end) { | ||
| 447 | dprintk("%s: split not supported\n", __func__); | ||
| 448 | return; | ||
| 449 | } | ||
| 450 | |||
| 451 | lo->offset = layout_end(seg); | ||
| 452 | } | ||
| 453 | |||
| 454 | layout_update_len(lo, end); | ||
| 455 | } | ||
| 456 | |||
| 457 | __be32 | ||
| 458 | nfsd4_return_file_layouts(struct svc_rqst *rqstp, | ||
| 459 | struct nfsd4_compound_state *cstate, | ||
| 460 | struct nfsd4_layoutreturn *lrp) | ||
| 461 | { | ||
| 462 | struct nfs4_layout_stateid *ls; | ||
| 463 | struct nfs4_layout *lp, *n; | ||
| 464 | LIST_HEAD(reaplist); | ||
| 465 | __be32 nfserr; | ||
| 466 | int found = 0; | ||
| 467 | |||
| 468 | nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lrp->lr_sid, | ||
| 469 | false, lrp->lr_layout_type, | ||
| 470 | &ls); | ||
| 471 | if (nfserr) { | ||
| 472 | trace_layout_return_lookup_fail(&lrp->lr_sid); | ||
| 473 | return nfserr; | ||
| 474 | } | ||
| 475 | |||
| 476 | spin_lock(&ls->ls_lock); | ||
| 477 | list_for_each_entry_safe(lp, n, &ls->ls_layouts, lo_perstate) { | ||
| 478 | if (layouts_overlapping(lp, &lrp->lr_seg)) { | ||
| 479 | nfsd4_return_file_layout(lp, &lrp->lr_seg, &reaplist); | ||
| 480 | found++; | ||
| 481 | } | ||
| 482 | } | ||
| 483 | if (!list_empty(&ls->ls_layouts)) { | ||
| 484 | if (found) { | ||
| 485 | update_stateid(&ls->ls_stid.sc_stateid); | ||
| 486 | memcpy(&lrp->lr_sid, &ls->ls_stid.sc_stateid, | ||
| 487 | sizeof(stateid_t)); | ||
| 488 | } | ||
| 489 | lrp->lrs_present = 1; | ||
| 490 | } else { | ||
| 491 | trace_layoutstate_unhash(&ls->ls_stid.sc_stateid); | ||
| 492 | nfs4_unhash_stid(&ls->ls_stid); | ||
| 493 | lrp->lrs_present = 0; | ||
| 494 | } | ||
| 495 | spin_unlock(&ls->ls_lock); | ||
| 496 | |||
| 497 | nfs4_put_stid(&ls->ls_stid); | ||
| 498 | nfsd4_free_layouts(&reaplist); | ||
| 499 | return nfs_ok; | ||
| 500 | } | ||
| 501 | |||
| 502 | __be32 | ||
| 503 | nfsd4_return_client_layouts(struct svc_rqst *rqstp, | ||
| 504 | struct nfsd4_compound_state *cstate, | ||
| 505 | struct nfsd4_layoutreturn *lrp) | ||
| 506 | { | ||
| 507 | struct nfs4_layout_stateid *ls, *n; | ||
| 508 | struct nfs4_client *clp = cstate->clp; | ||
| 509 | struct nfs4_layout *lp, *t; | ||
| 510 | LIST_HEAD(reaplist); | ||
| 511 | |||
| 512 | lrp->lrs_present = 0; | ||
| 513 | |||
| 514 | spin_lock(&clp->cl_lock); | ||
| 515 | list_for_each_entry_safe(ls, n, &clp->cl_lo_states, ls_perclnt) { | ||
| 516 | if (lrp->lr_return_type == RETURN_FSID && | ||
| 517 | !fh_fsid_match(&ls->ls_stid.sc_file->fi_fhandle, | ||
| 518 | &cstate->current_fh.fh_handle)) | ||
| 519 | continue; | ||
| 520 | |||
| 521 | spin_lock(&ls->ls_lock); | ||
| 522 | list_for_each_entry_safe(lp, t, &ls->ls_layouts, lo_perstate) { | ||
| 523 | if (lrp->lr_seg.iomode == IOMODE_ANY || | ||
| 524 | lrp->lr_seg.iomode == lp->lo_seg.iomode) | ||
| 525 | list_move_tail(&lp->lo_perstate, &reaplist); | ||
| 526 | } | ||
| 527 | spin_unlock(&ls->ls_lock); | ||
| 528 | } | ||
| 529 | spin_unlock(&clp->cl_lock); | ||
| 530 | |||
| 531 | nfsd4_free_layouts(&reaplist); | ||
| 532 | return 0; | ||
| 533 | } | ||
| 534 | |||
| 535 | static void | ||
| 536 | nfsd4_return_all_layouts(struct nfs4_layout_stateid *ls, | ||
| 537 | struct list_head *reaplist) | ||
| 538 | { | ||
| 539 | spin_lock(&ls->ls_lock); | ||
| 540 | list_splice_init(&ls->ls_layouts, reaplist); | ||
| 541 | spin_unlock(&ls->ls_lock); | ||
| 542 | } | ||
| 543 | |||
| 544 | void | ||
| 545 | nfsd4_return_all_client_layouts(struct nfs4_client *clp) | ||
| 546 | { | ||
| 547 | struct nfs4_layout_stateid *ls, *n; | ||
| 548 | LIST_HEAD(reaplist); | ||
| 549 | |||
| 550 | spin_lock(&clp->cl_lock); | ||
| 551 | list_for_each_entry_safe(ls, n, &clp->cl_lo_states, ls_perclnt) | ||
| 552 | nfsd4_return_all_layouts(ls, &reaplist); | ||
| 553 | spin_unlock(&clp->cl_lock); | ||
| 554 | |||
| 555 | nfsd4_free_layouts(&reaplist); | ||
| 556 | } | ||
| 557 | |||
| 558 | void | ||
| 559 | nfsd4_return_all_file_layouts(struct nfs4_client *clp, struct nfs4_file *fp) | ||
| 560 | { | ||
| 561 | struct nfs4_layout_stateid *ls, *n; | ||
| 562 | LIST_HEAD(reaplist); | ||
| 563 | |||
| 564 | spin_lock(&fp->fi_lock); | ||
| 565 | list_for_each_entry_safe(ls, n, &fp->fi_lo_states, ls_perfile) { | ||
| 566 | if (ls->ls_stid.sc_client == clp) | ||
| 567 | nfsd4_return_all_layouts(ls, &reaplist); | ||
| 568 | } | ||
| 569 | spin_unlock(&fp->fi_lock); | ||
| 570 | |||
| 571 | nfsd4_free_layouts(&reaplist); | ||
| 572 | } | ||
| 573 | |||
| 574 | static void | ||
| 575 | nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls) | ||
| 576 | { | ||
| 577 | struct nfs4_client *clp = ls->ls_stid.sc_client; | ||
| 578 | char addr_str[INET6_ADDRSTRLEN]; | ||
| 579 | static char *envp[] = { | ||
| 580 | "HOME=/", | ||
| 581 | "TERM=linux", | ||
| 582 | "PATH=/sbin:/usr/sbin:/bin:/usr/bin", | ||
| 583 | NULL | ||
| 584 | }; | ||
| 585 | char *argv[8]; | ||
| 586 | int error; | ||
| 587 | |||
| 588 | rpc_ntop((struct sockaddr *)&clp->cl_addr, addr_str, sizeof(addr_str)); | ||
| 589 | |||
| 590 | nfsd4_cb_layout_fail(ls); | ||
| 591 | |||
| 592 | printk(KERN_WARNING | ||
| 593 | "nfsd: client %s failed to respond to layout recall. " | ||
| 594 | " Fencing..\n", addr_str); | ||
| 595 | |||
| 596 | argv[0] = "/sbin/nfsd-recall-failed"; | ||
| 597 | argv[1] = addr_str; | ||
| 598 | argv[2] = ls->ls_file->f_path.mnt->mnt_sb->s_id; | ||
| 599 | argv[3] = NULL; | ||
| 600 | |||
| 601 | error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); | ||
| 602 | if (error) { | ||
| 603 | printk(KERN_ERR "nfsd: fence failed for client %s: %d!\n", | ||
| 604 | addr_str, error); | ||
| 605 | } | ||
| 606 | } | ||
| 607 | |||
| 608 | static int | ||
| 609 | nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) | ||
| 610 | { | ||
| 611 | struct nfs4_layout_stateid *ls = | ||
| 612 | container_of(cb, struct nfs4_layout_stateid, ls_recall); | ||
| 613 | LIST_HEAD(reaplist); | ||
| 614 | |||
| 615 | switch (task->tk_status) { | ||
| 616 | case 0: | ||
| 617 | return 1; | ||
| 618 | case -NFS4ERR_NOMATCHING_LAYOUT: | ||
| 619 | trace_layout_recall_done(&ls->ls_stid.sc_stateid); | ||
| 620 | task->tk_status = 0; | ||
| 621 | return 1; | ||
| 622 | case -NFS4ERR_DELAY: | ||
| 623 | /* Poll the client until it's done with the layout */ | ||
| 624 | /* FIXME: cap number of retries. | ||
| 625 | * The pnfs standard states that we need to only expire | ||
| 626 | * the client after at-least "lease time" .eg lease-time * 2 | ||
| 627 | * when failing to communicate a recall | ||
| 628 | */ | ||
| 629 | rpc_delay(task, HZ/100); /* 10 mili-seconds */ | ||
| 630 | return 0; | ||
| 631 | default: | ||
| 632 | /* | ||
| 633 | * Unknown error or non-responding client, we'll need to fence. | ||
| 634 | */ | ||
| 635 | nfsd4_cb_layout_fail(ls); | ||
| 636 | return -1; | ||
| 637 | } | ||
| 638 | } | ||
| 639 | |||
| 640 | static void | ||
| 641 | nfsd4_cb_layout_release(struct nfsd4_callback *cb) | ||
| 642 | { | ||
| 643 | struct nfs4_layout_stateid *ls = | ||
| 644 | container_of(cb, struct nfs4_layout_stateid, ls_recall); | ||
| 645 | LIST_HEAD(reaplist); | ||
| 646 | |||
| 647 | trace_layout_recall_release(&ls->ls_stid.sc_stateid); | ||
| 648 | |||
| 649 | nfsd4_return_all_layouts(ls, &reaplist); | ||
| 650 | nfsd4_free_layouts(&reaplist); | ||
| 651 | nfs4_put_stid(&ls->ls_stid); | ||
| 652 | } | ||
| 653 | |||
| 654 | static struct nfsd4_callback_ops nfsd4_cb_layout_ops = { | ||
| 655 | .done = nfsd4_cb_layout_done, | ||
| 656 | .release = nfsd4_cb_layout_release, | ||
| 657 | }; | ||
| 658 | |||
| 659 | static bool | ||
| 660 | nfsd4_layout_lm_break(struct file_lock *fl) | ||
| 661 | { | ||
| 662 | /* | ||
| 663 | * We don't want the locks code to timeout the lease for us; | ||
| 664 | * we'll remove it ourself if a layout isn't returned | ||
| 665 | * in time: | ||
| 666 | */ | ||
| 667 | fl->fl_break_time = 0; | ||
| 668 | nfsd4_recall_file_layout(fl->fl_owner); | ||
| 669 | return false; | ||
| 670 | } | ||
| 671 | |||
| 672 | static int | ||
| 673 | nfsd4_layout_lm_change(struct file_lock *onlist, int arg, | ||
| 674 | struct list_head *dispose) | ||
| 675 | { | ||
| 676 | BUG_ON(!(arg & F_UNLCK)); | ||
| 677 | return lease_modify(onlist, arg, dispose); | ||
| 678 | } | ||
| 679 | |||
| 680 | static const struct lock_manager_operations nfsd4_layouts_lm_ops = { | ||
| 681 | .lm_break = nfsd4_layout_lm_break, | ||
| 682 | .lm_change = nfsd4_layout_lm_change, | ||
| 683 | }; | ||
| 684 | |||
| 685 | int | ||
| 686 | nfsd4_init_pnfs(void) | ||
| 687 | { | ||
| 688 | int i; | ||
| 689 | |||
| 690 | for (i = 0; i < DEVID_HASH_SIZE; i++) | ||
| 691 | INIT_LIST_HEAD(&nfsd_devid_hash[i]); | ||
| 692 | |||
| 693 | nfs4_layout_cache = kmem_cache_create("nfs4_layout", | ||
| 694 | sizeof(struct nfs4_layout), 0, 0, NULL); | ||
| 695 | if (!nfs4_layout_cache) | ||
| 696 | return -ENOMEM; | ||
| 697 | |||
| 698 | nfs4_layout_stateid_cache = kmem_cache_create("nfs4_layout_stateid", | ||
| 699 | sizeof(struct nfs4_layout_stateid), 0, 0, NULL); | ||
| 700 | if (!nfs4_layout_stateid_cache) { | ||
| 701 | kmem_cache_destroy(nfs4_layout_cache); | ||
| 702 | return -ENOMEM; | ||
| 703 | } | ||
| 704 | return 0; | ||
| 705 | } | ||
| 706 | |||
| 707 | void | ||
| 708 | nfsd4_exit_pnfs(void) | ||
| 709 | { | ||
| 710 | int i; | ||
| 711 | |||
| 712 | kmem_cache_destroy(nfs4_layout_cache); | ||
| 713 | kmem_cache_destroy(nfs4_layout_stateid_cache); | ||
| 714 | |||
| 715 | for (i = 0; i < DEVID_HASH_SIZE; i++) { | ||
| 716 | struct nfsd4_deviceid_map *map, *n; | ||
| 717 | |||
| 718 | list_for_each_entry_safe(map, n, &nfsd_devid_hash[i], hash) | ||
| 719 | kfree(map); | ||
| 720 | } | ||
| 721 | } | ||
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index ac71d13c69ef..d30bea8d0277 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
| @@ -43,6 +43,8 @@ | |||
| 43 | #include "current_stateid.h" | 43 | #include "current_stateid.h" |
| 44 | #include "netns.h" | 44 | #include "netns.h" |
| 45 | #include "acl.h" | 45 | #include "acl.h" |
| 46 | #include "pnfs.h" | ||
| 47 | #include "trace.h" | ||
| 46 | 48 | ||
| 47 | #ifdef CONFIG_NFSD_V4_SECURITY_LABEL | 49 | #ifdef CONFIG_NFSD_V4_SECURITY_LABEL |
| 48 | #include <linux/security.h> | 50 | #include <linux/security.h> |
| @@ -1178,6 +1180,259 @@ nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
| 1178 | return status == nfserr_same ? nfs_ok : status; | 1180 | return status == nfserr_same ? nfs_ok : status; |
| 1179 | } | 1181 | } |
| 1180 | 1182 | ||
| 1183 | #ifdef CONFIG_NFSD_PNFS | ||
| 1184 | static const struct nfsd4_layout_ops * | ||
| 1185 | nfsd4_layout_verify(struct svc_export *exp, unsigned int layout_type) | ||
| 1186 | { | ||
| 1187 | if (!exp->ex_layout_type) { | ||
| 1188 | dprintk("%s: export does not support pNFS\n", __func__); | ||
| 1189 | return NULL; | ||
| 1190 | } | ||
| 1191 | |||
| 1192 | if (exp->ex_layout_type != layout_type) { | ||
| 1193 | dprintk("%s: layout type %d not supported\n", | ||
| 1194 | __func__, layout_type); | ||
| 1195 | return NULL; | ||
| 1196 | } | ||
| 1197 | |||
| 1198 | return nfsd4_layout_ops[layout_type]; | ||
| 1199 | } | ||
| 1200 | |||
| 1201 | static __be32 | ||
| 1202 | nfsd4_getdeviceinfo(struct svc_rqst *rqstp, | ||
| 1203 | struct nfsd4_compound_state *cstate, | ||
| 1204 | struct nfsd4_getdeviceinfo *gdp) | ||
| 1205 | { | ||
| 1206 | const struct nfsd4_layout_ops *ops; | ||
| 1207 | struct nfsd4_deviceid_map *map; | ||
| 1208 | struct svc_export *exp; | ||
| 1209 | __be32 nfserr; | ||
| 1210 | |||
| 1211 | dprintk("%s: layout_type %u dev_id [0x%llx:0x%x] maxcnt %u\n", | ||
| 1212 | __func__, | ||
| 1213 | gdp->gd_layout_type, | ||
| 1214 | gdp->gd_devid.fsid_idx, gdp->gd_devid.generation, | ||
| 1215 | gdp->gd_maxcount); | ||
| 1216 | |||
| 1217 | map = nfsd4_find_devid_map(gdp->gd_devid.fsid_idx); | ||
| 1218 | if (!map) { | ||
| 1219 | dprintk("%s: couldn't find device ID to export mapping!\n", | ||
| 1220 | __func__); | ||
| 1221 | return nfserr_noent; | ||
| 1222 | } | ||
| 1223 | |||
| 1224 | exp = rqst_exp_find(rqstp, map->fsid_type, map->fsid); | ||
| 1225 | if (IS_ERR(exp)) { | ||
| 1226 | dprintk("%s: could not find device id\n", __func__); | ||
| 1227 | return nfserr_noent; | ||
| 1228 | } | ||
| 1229 | |||
| 1230 | nfserr = nfserr_layoutunavailable; | ||
| 1231 | ops = nfsd4_layout_verify(exp, gdp->gd_layout_type); | ||
| 1232 | if (!ops) | ||
| 1233 | goto out; | ||
| 1234 | |||
| 1235 | nfserr = nfs_ok; | ||
| 1236 | if (gdp->gd_maxcount != 0) | ||
| 1237 | nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, gdp); | ||
| 1238 | |||
| 1239 | gdp->gd_notify_types &= ops->notify_types; | ||
| 1240 | exp_put(exp); | ||
| 1241 | out: | ||
| 1242 | return nfserr; | ||
| 1243 | } | ||
| 1244 | |||
| 1245 | static __be32 | ||
| 1246 | nfsd4_layoutget(struct svc_rqst *rqstp, | ||
| 1247 | struct nfsd4_compound_state *cstate, | ||
| 1248 | struct nfsd4_layoutget *lgp) | ||
| 1249 | { | ||
| 1250 | struct svc_fh *current_fh = &cstate->current_fh; | ||
| 1251 | const struct nfsd4_layout_ops *ops; | ||
| 1252 | struct nfs4_layout_stateid *ls; | ||
| 1253 | __be32 nfserr; | ||
| 1254 | int accmode; | ||
| 1255 | |||
| 1256 | switch (lgp->lg_seg.iomode) { | ||
| 1257 | case IOMODE_READ: | ||
| 1258 | accmode = NFSD_MAY_READ; | ||
| 1259 | break; | ||
| 1260 | case IOMODE_RW: | ||
| 1261 | accmode = NFSD_MAY_READ | NFSD_MAY_WRITE; | ||
| 1262 | break; | ||
| 1263 | default: | ||
| 1264 | dprintk("%s: invalid iomode %d\n", | ||
| 1265 | __func__, lgp->lg_seg.iomode); | ||
| 1266 | nfserr = nfserr_badiomode; | ||
| 1267 | goto out; | ||
| 1268 | } | ||
| 1269 | |||
| 1270 | nfserr = fh_verify(rqstp, current_fh, 0, accmode); | ||
| 1271 | if (nfserr) | ||
| 1272 | goto out; | ||
| 1273 | |||
| 1274 | nfserr = nfserr_layoutunavailable; | ||
| 1275 | ops = nfsd4_layout_verify(current_fh->fh_export, lgp->lg_layout_type); | ||
| 1276 | if (!ops) | ||
| 1277 | goto out; | ||
| 1278 | |||
| 1279 | /* | ||
| 1280 | * Verify minlength and range as per RFC5661: | ||
| 1281 | * o If loga_length is less than loga_minlength, | ||
| 1282 | * the metadata server MUST return NFS4ERR_INVAL. | ||
| 1283 | * o If the sum of loga_offset and loga_minlength exceeds | ||
| 1284 | * NFS4_UINT64_MAX, and loga_minlength is not | ||
| 1285 | * NFS4_UINT64_MAX, the error NFS4ERR_INVAL MUST result. | ||
| 1286 | * o If the sum of loga_offset and loga_length exceeds | ||
| 1287 | * NFS4_UINT64_MAX, and loga_length is not NFS4_UINT64_MAX, | ||
| 1288 | * the error NFS4ERR_INVAL MUST result. | ||
| 1289 | */ | ||
| 1290 | nfserr = nfserr_inval; | ||
| 1291 | if (lgp->lg_seg.length < lgp->lg_minlength || | ||
| 1292 | (lgp->lg_minlength != NFS4_MAX_UINT64 && | ||
| 1293 | lgp->lg_minlength > NFS4_MAX_UINT64 - lgp->lg_seg.offset) || | ||
| 1294 | (lgp->lg_seg.length != NFS4_MAX_UINT64 && | ||
| 1295 | lgp->lg_seg.length > NFS4_MAX_UINT64 - lgp->lg_seg.offset)) | ||
| 1296 | goto out; | ||
| 1297 | if (lgp->lg_seg.length == 0) | ||
| 1298 | goto out; | ||
| 1299 | |||
| 1300 | nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lgp->lg_sid, | ||
| 1301 | true, lgp->lg_layout_type, &ls); | ||
| 1302 | if (nfserr) { | ||
| 1303 | trace_layout_get_lookup_fail(&lgp->lg_sid); | ||
| 1304 | goto out; | ||
| 1305 | } | ||
| 1306 | |||
| 1307 | nfserr = nfserr_recallconflict; | ||
| 1308 | if (atomic_read(&ls->ls_stid.sc_file->fi_lo_recalls)) | ||
| 1309 | goto out_put_stid; | ||
| 1310 | |||
| 1311 | nfserr = ops->proc_layoutget(current_fh->fh_dentry->d_inode, | ||
| 1312 | current_fh, lgp); | ||
| 1313 | if (nfserr) | ||
| 1314 | goto out_put_stid; | ||
| 1315 | |||
| 1316 | nfserr = nfsd4_insert_layout(lgp, ls); | ||
| 1317 | |||
| 1318 | out_put_stid: | ||
| 1319 | nfs4_put_stid(&ls->ls_stid); | ||
| 1320 | out: | ||
| 1321 | return nfserr; | ||
| 1322 | } | ||
| 1323 | |||
| 1324 | static __be32 | ||
| 1325 | nfsd4_layoutcommit(struct svc_rqst *rqstp, | ||
| 1326 | struct nfsd4_compound_state *cstate, | ||
| 1327 | struct nfsd4_layoutcommit *lcp) | ||
| 1328 | { | ||
| 1329 | const struct nfsd4_layout_seg *seg = &lcp->lc_seg; | ||
| 1330 | struct svc_fh *current_fh = &cstate->current_fh; | ||
| 1331 | const struct nfsd4_layout_ops *ops; | ||
| 1332 | loff_t new_size = lcp->lc_last_wr + 1; | ||
| 1333 | struct inode *inode; | ||
| 1334 | struct nfs4_layout_stateid *ls; | ||
| 1335 | __be32 nfserr; | ||
| 1336 | |||
| 1337 | nfserr = fh_verify(rqstp, current_fh, 0, NFSD_MAY_WRITE); | ||
| 1338 | if (nfserr) | ||
| 1339 | goto out; | ||
| 1340 | |||
| 1341 | nfserr = nfserr_layoutunavailable; | ||
| 1342 | ops = nfsd4_layout_verify(current_fh->fh_export, lcp->lc_layout_type); | ||
| 1343 | if (!ops) | ||
| 1344 | goto out; | ||
| 1345 | inode = current_fh->fh_dentry->d_inode; | ||
| 1346 | |||
| 1347 | nfserr = nfserr_inval; | ||
| 1348 | if (new_size <= seg->offset) { | ||
| 1349 | dprintk("pnfsd: last write before layout segment\n"); | ||
| 1350 | goto out; | ||
| 1351 | } | ||
| 1352 | if (new_size > seg->offset + seg->length) { | ||
| 1353 | dprintk("pnfsd: last write beyond layout segment\n"); | ||
| 1354 | goto out; | ||
| 1355 | } | ||
| 1356 | if (!lcp->lc_newoffset && new_size > i_size_read(inode)) { | ||
| 1357 | dprintk("pnfsd: layoutcommit beyond EOF\n"); | ||
| 1358 | goto out; | ||
| 1359 | } | ||
| 1360 | |||
| 1361 | nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lcp->lc_sid, | ||
| 1362 | false, lcp->lc_layout_type, | ||
| 1363 | &ls); | ||
| 1364 | if (nfserr) { | ||
| 1365 | trace_layout_commit_lookup_fail(&lcp->lc_sid); | ||
| 1366 | /* fixup error code as per RFC5661 */ | ||
| 1367 | if (nfserr == nfserr_bad_stateid) | ||
| 1368 | nfserr = nfserr_badlayout; | ||
| 1369 | goto out; | ||
| 1370 | } | ||
| 1371 | |||
| 1372 | nfserr = ops->proc_layoutcommit(inode, lcp); | ||
| 1373 | if (nfserr) | ||
| 1374 | goto out_put_stid; | ||
| 1375 | |||
| 1376 | if (new_size > i_size_read(inode)) { | ||
| 1377 | lcp->lc_size_chg = 1; | ||
| 1378 | lcp->lc_newsize = new_size; | ||
| 1379 | } else { | ||
| 1380 | lcp->lc_size_chg = 0; | ||
| 1381 | } | ||
| 1382 | |||
| 1383 | out_put_stid: | ||
| 1384 | nfs4_put_stid(&ls->ls_stid); | ||
| 1385 | out: | ||
| 1386 | return nfserr; | ||
| 1387 | } | ||
| 1388 | |||
| 1389 | static __be32 | ||
| 1390 | nfsd4_layoutreturn(struct svc_rqst *rqstp, | ||
| 1391 | struct nfsd4_compound_state *cstate, | ||
| 1392 | struct nfsd4_layoutreturn *lrp) | ||
| 1393 | { | ||
| 1394 | struct svc_fh *current_fh = &cstate->current_fh; | ||
| 1395 | __be32 nfserr; | ||
| 1396 | |||
| 1397 | nfserr = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP); | ||
| 1398 | if (nfserr) | ||
| 1399 | goto out; | ||
| 1400 | |||
| 1401 | nfserr = nfserr_layoutunavailable; | ||
| 1402 | if (!nfsd4_layout_verify(current_fh->fh_export, lrp->lr_layout_type)) | ||
| 1403 | goto out; | ||
| 1404 | |||
| 1405 | switch (lrp->lr_seg.iomode) { | ||
| 1406 | case IOMODE_READ: | ||
| 1407 | case IOMODE_RW: | ||
| 1408 | case IOMODE_ANY: | ||
| 1409 | break; | ||
| 1410 | default: | ||
| 1411 | dprintk("%s: invalid iomode %d\n", __func__, | ||
| 1412 | lrp->lr_seg.iomode); | ||
| 1413 | nfserr = nfserr_inval; | ||
| 1414 | goto out; | ||
| 1415 | } | ||
| 1416 | |||
| 1417 | switch (lrp->lr_return_type) { | ||
| 1418 | case RETURN_FILE: | ||
| 1419 | nfserr = nfsd4_return_file_layouts(rqstp, cstate, lrp); | ||
| 1420 | break; | ||
| 1421 | case RETURN_FSID: | ||
| 1422 | case RETURN_ALL: | ||
| 1423 | nfserr = nfsd4_return_client_layouts(rqstp, cstate, lrp); | ||
| 1424 | break; | ||
| 1425 | default: | ||
| 1426 | dprintk("%s: invalid return_type %d\n", __func__, | ||
| 1427 | lrp->lr_return_type); | ||
| 1428 | nfserr = nfserr_inval; | ||
| 1429 | break; | ||
| 1430 | } | ||
| 1431 | out: | ||
| 1432 | return nfserr; | ||
| 1433 | } | ||
| 1434 | #endif /* CONFIG_NFSD_PNFS */ | ||
| 1435 | |||
| 1181 | /* | 1436 | /* |
| 1182 | * NULL call. | 1437 | * NULL call. |
| 1183 | */ | 1438 | */ |
| @@ -1679,6 +1934,36 @@ static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd | |||
| 1679 | op_encode_channel_attrs_maxsz) * sizeof(__be32); | 1934 | op_encode_channel_attrs_maxsz) * sizeof(__be32); |
| 1680 | } | 1935 | } |
| 1681 | 1936 | ||
| 1937 | #ifdef CONFIG_NFSD_PNFS | ||
| 1938 | /* | ||
| 1939 | * At this stage we don't really know what layout driver will handle the request, | ||
| 1940 | * so we need to define an arbitrary upper bound here. | ||
| 1941 | */ | ||
| 1942 | #define MAX_LAYOUT_SIZE 128 | ||
| 1943 | static inline u32 nfsd4_layoutget_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) | ||
| 1944 | { | ||
| 1945 | return (op_encode_hdr_size + | ||
| 1946 | 1 /* logr_return_on_close */ + | ||
| 1947 | op_encode_stateid_maxsz + | ||
| 1948 | 1 /* nr of layouts */ + | ||
| 1949 | MAX_LAYOUT_SIZE) * sizeof(__be32); | ||
| 1950 | } | ||
| 1951 | |||
| 1952 | static inline u32 nfsd4_layoutcommit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) | ||
| 1953 | { | ||
| 1954 | return (op_encode_hdr_size + | ||
| 1955 | 1 /* locr_newsize */ + | ||
| 1956 | 2 /* ns_size */) * sizeof(__be32); | ||
| 1957 | } | ||
| 1958 | |||
| 1959 | static inline u32 nfsd4_layoutreturn_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) | ||
| 1960 | { | ||
| 1961 | return (op_encode_hdr_size + | ||
| 1962 | 1 /* lrs_stateid */ + | ||
| 1963 | op_encode_stateid_maxsz) * sizeof(__be32); | ||
| 1964 | } | ||
| 1965 | #endif /* CONFIG_NFSD_PNFS */ | ||
| 1966 | |||
| 1682 | static struct nfsd4_operation nfsd4_ops[] = { | 1967 | static struct nfsd4_operation nfsd4_ops[] = { |
| 1683 | [OP_ACCESS] = { | 1968 | [OP_ACCESS] = { |
| 1684 | .op_func = (nfsd4op_func)nfsd4_access, | 1969 | .op_func = (nfsd4op_func)nfsd4_access, |
| @@ -1966,6 +2251,31 @@ static struct nfsd4_operation nfsd4_ops[] = { | |||
| 1966 | .op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid, | 2251 | .op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid, |
| 1967 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, | 2252 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, |
| 1968 | }, | 2253 | }, |
| 2254 | #ifdef CONFIG_NFSD_PNFS | ||
| 2255 | [OP_GETDEVICEINFO] = { | ||
| 2256 | .op_func = (nfsd4op_func)nfsd4_getdeviceinfo, | ||
| 2257 | .op_flags = ALLOWED_WITHOUT_FH, | ||
| 2258 | .op_name = "OP_GETDEVICEINFO", | ||
| 2259 | }, | ||
| 2260 | [OP_LAYOUTGET] = { | ||
| 2261 | .op_func = (nfsd4op_func)nfsd4_layoutget, | ||
| 2262 | .op_flags = OP_MODIFIES_SOMETHING, | ||
| 2263 | .op_name = "OP_LAYOUTGET", | ||
| 2264 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_layoutget_rsize, | ||
| 2265 | }, | ||
| 2266 | [OP_LAYOUTCOMMIT] = { | ||
| 2267 | .op_func = (nfsd4op_func)nfsd4_layoutcommit, | ||
| 2268 | .op_flags = OP_MODIFIES_SOMETHING, | ||
| 2269 | .op_name = "OP_LAYOUTCOMMIT", | ||
| 2270 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_layoutcommit_rsize, | ||
| 2271 | }, | ||
| 2272 | [OP_LAYOUTRETURN] = { | ||
| 2273 | .op_func = (nfsd4op_func)nfsd4_layoutreturn, | ||
| 2274 | .op_flags = OP_MODIFIES_SOMETHING, | ||
| 2275 | .op_name = "OP_LAYOUTRETURN", | ||
| 2276 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_layoutreturn_rsize, | ||
| 2277 | }, | ||
| 2278 | #endif /* CONFIG_NFSD_PNFS */ | ||
| 1969 | 2279 | ||
| 1970 | /* NFSv4.2 operations */ | 2280 | /* NFSv4.2 operations */ |
| 1971 | [OP_ALLOCATE] = { | 2281 | [OP_ALLOCATE] = { |
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 532a60cca2fb..f6b2a09f793f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
| @@ -48,6 +48,7 @@ | |||
| 48 | #include "current_stateid.h" | 48 | #include "current_stateid.h" |
| 49 | 49 | ||
| 50 | #include "netns.h" | 50 | #include "netns.h" |
| 51 | #include "pnfs.h" | ||
| 51 | 52 | ||
| 52 | #define NFSDDBG_FACILITY NFSDDBG_PROC | 53 | #define NFSDDBG_FACILITY NFSDDBG_PROC |
| 53 | 54 | ||
| @@ -150,16 +151,6 @@ renew_client_locked(struct nfs4_client *clp) | |||
| 150 | clp->cl_time = get_seconds(); | 151 | clp->cl_time = get_seconds(); |
| 151 | } | 152 | } |
| 152 | 153 | ||
| 153 | static inline void | ||
| 154 | renew_client(struct nfs4_client *clp) | ||
| 155 | { | ||
| 156 | struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); | ||
| 157 | |||
| 158 | spin_lock(&nn->client_lock); | ||
| 159 | renew_client_locked(clp); | ||
| 160 | spin_unlock(&nn->client_lock); | ||
| 161 | } | ||
| 162 | |||
| 163 | static void put_client_renew_locked(struct nfs4_client *clp) | 154 | static void put_client_renew_locked(struct nfs4_client *clp) |
| 164 | { | 155 | { |
| 165 | struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); | 156 | struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); |
| @@ -282,7 +273,7 @@ static void nfsd4_free_file_rcu(struct rcu_head *rcu) | |||
| 282 | kmem_cache_free(file_slab, fp); | 273 | kmem_cache_free(file_slab, fp); |
| 283 | } | 274 | } |
| 284 | 275 | ||
| 285 | static inline void | 276 | void |
| 286 | put_nfs4_file(struct nfs4_file *fi) | 277 | put_nfs4_file(struct nfs4_file *fi) |
| 287 | { | 278 | { |
| 288 | might_lock(&state_lock); | 279 | might_lock(&state_lock); |
| @@ -295,12 +286,6 @@ put_nfs4_file(struct nfs4_file *fi) | |||
| 295 | } | 286 | } |
| 296 | } | 287 | } |
| 297 | 288 | ||
| 298 | static inline void | ||
| 299 | get_nfs4_file(struct nfs4_file *fi) | ||
| 300 | { | ||
| 301 | atomic_inc(&fi->fi_ref); | ||
| 302 | } | ||
| 303 | |||
| 304 | static struct file * | 289 | static struct file * |
| 305 | __nfs4_get_fd(struct nfs4_file *f, int oflag) | 290 | __nfs4_get_fd(struct nfs4_file *f, int oflag) |
| 306 | { | 291 | { |
| @@ -358,7 +343,7 @@ find_readable_file(struct nfs4_file *f) | |||
| 358 | return ret; | 343 | return ret; |
| 359 | } | 344 | } |
| 360 | 345 | ||
| 361 | static struct file * | 346 | struct file * |
| 362 | find_any_file(struct nfs4_file *f) | 347 | find_any_file(struct nfs4_file *f) |
| 363 | { | 348 | { |
| 364 | struct file *ret; | 349 | struct file *ret; |
| @@ -408,14 +393,6 @@ static unsigned int file_hashval(struct knfsd_fh *fh) | |||
| 408 | return nfsd_fh_hashval(fh) & (FILE_HASH_SIZE - 1); | 393 | return nfsd_fh_hashval(fh) & (FILE_HASH_SIZE - 1); |
| 409 | } | 394 | } |
| 410 | 395 | ||
| 411 | static bool nfsd_fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) | ||
| 412 | { | ||
| 413 | return fh1->fh_size == fh2->fh_size && | ||
| 414 | !memcmp(fh1->fh_base.fh_pad, | ||
| 415 | fh2->fh_base.fh_pad, | ||
| 416 | fh1->fh_size); | ||
| 417 | } | ||
| 418 | |||
| 419 | static struct hlist_head file_hashtbl[FILE_HASH_SIZE]; | 396 | static struct hlist_head file_hashtbl[FILE_HASH_SIZE]; |
| 420 | 397 | ||
| 421 | static void | 398 | static void |
| @@ -494,7 +471,7 @@ static void nfs4_file_put_access(struct nfs4_file *fp, u32 access) | |||
| 494 | __nfs4_file_put_access(fp, O_RDONLY); | 471 | __nfs4_file_put_access(fp, O_RDONLY); |
| 495 | } | 472 | } |
| 496 | 473 | ||
| 497 | static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, | 474 | struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, |
| 498 | struct kmem_cache *slab) | 475 | struct kmem_cache *slab) |
| 499 | { | 476 | { |
| 500 | struct nfs4_stid *stid; | 477 | struct nfs4_stid *stid; |
| @@ -688,17 +665,17 @@ static void nfs4_put_deleg_lease(struct nfs4_file *fp) | |||
| 688 | struct file *filp = NULL; | 665 | struct file *filp = NULL; |
| 689 | 666 | ||
| 690 | spin_lock(&fp->fi_lock); | 667 | spin_lock(&fp->fi_lock); |
| 691 | if (fp->fi_deleg_file && atomic_dec_and_test(&fp->fi_delegees)) | 668 | if (fp->fi_deleg_file && --fp->fi_delegees == 0) |
| 692 | swap(filp, fp->fi_deleg_file); | 669 | swap(filp, fp->fi_deleg_file); |
| 693 | spin_unlock(&fp->fi_lock); | 670 | spin_unlock(&fp->fi_lock); |
| 694 | 671 | ||
| 695 | if (filp) { | 672 | if (filp) { |
| 696 | vfs_setlease(filp, F_UNLCK, NULL, NULL); | 673 | vfs_setlease(filp, F_UNLCK, NULL, (void **)&fp); |
| 697 | fput(filp); | 674 | fput(filp); |
| 698 | } | 675 | } |
| 699 | } | 676 | } |
| 700 | 677 | ||
| 701 | static void unhash_stid(struct nfs4_stid *s) | 678 | void nfs4_unhash_stid(struct nfs4_stid *s) |
| 702 | { | 679 | { |
| 703 | s->sc_type = 0; | 680 | s->sc_type = 0; |
| 704 | } | 681 | } |
| @@ -1006,7 +983,7 @@ static void unhash_lock_stateid(struct nfs4_ol_stateid *stp) | |||
| 1006 | 983 | ||
| 1007 | list_del_init(&stp->st_locks); | 984 | list_del_init(&stp->st_locks); |
| 1008 | unhash_ol_stateid(stp); | 985 | unhash_ol_stateid(stp); |
| 1009 | unhash_stid(&stp->st_stid); | 986 | nfs4_unhash_stid(&stp->st_stid); |
| 1010 | } | 987 | } |
| 1011 | 988 | ||
| 1012 | static void release_lock_stateid(struct nfs4_ol_stateid *stp) | 989 | static void release_lock_stateid(struct nfs4_ol_stateid *stp) |
| @@ -1518,7 +1495,12 @@ unhash_session(struct nfsd4_session *ses) | |||
| 1518 | static int | 1495 | static int |
| 1519 | STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn) | 1496 | STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn) |
| 1520 | { | 1497 | { |
| 1521 | if (clid->cl_boot == nn->boot_time) | 1498 | /* |
| 1499 | * We're assuming the clid was not given out from a boot | ||
| 1500 | * precisely 2^32 (about 136 years) before this one. That seems | ||
| 1501 | * a safe assumption: | ||
| 1502 | */ | ||
| 1503 | if (clid->cl_boot == (u32)nn->boot_time) | ||
| 1522 | return 0; | 1504 | return 0; |
| 1523 | dprintk("NFSD stale clientid (%08x/%08x) boot_time %08lx\n", | 1505 | dprintk("NFSD stale clientid (%08x/%08x) boot_time %08lx\n", |
| 1524 | clid->cl_boot, clid->cl_id, nn->boot_time); | 1506 | clid->cl_boot, clid->cl_id, nn->boot_time); |
| @@ -1558,6 +1540,9 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) | |||
| 1558 | INIT_LIST_HEAD(&clp->cl_lru); | 1540 | INIT_LIST_HEAD(&clp->cl_lru); |
| 1559 | INIT_LIST_HEAD(&clp->cl_callbacks); | 1541 | INIT_LIST_HEAD(&clp->cl_callbacks); |
| 1560 | INIT_LIST_HEAD(&clp->cl_revoked); | 1542 | INIT_LIST_HEAD(&clp->cl_revoked); |
| 1543 | #ifdef CONFIG_NFSD_PNFS | ||
| 1544 | INIT_LIST_HEAD(&clp->cl_lo_states); | ||
| 1545 | #endif | ||
| 1561 | spin_lock_init(&clp->cl_lock); | 1546 | spin_lock_init(&clp->cl_lock); |
| 1562 | rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); | 1547 | rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); |
| 1563 | return clp; | 1548 | return clp; |
| @@ -1662,6 +1647,7 @@ __destroy_client(struct nfs4_client *clp) | |||
| 1662 | nfs4_get_stateowner(&oo->oo_owner); | 1647 | nfs4_get_stateowner(&oo->oo_owner); |
| 1663 | release_openowner(oo); | 1648 | release_openowner(oo); |
| 1664 | } | 1649 | } |
| 1650 | nfsd4_return_all_client_layouts(clp); | ||
| 1665 | nfsd4_shutdown_callback(clp); | 1651 | nfsd4_shutdown_callback(clp); |
| 1666 | if (clp->cl_cb_conn.cb_xprt) | 1652 | if (clp->cl_cb_conn.cb_xprt) |
| 1667 | svc_xprt_put(clp->cl_cb_conn.cb_xprt); | 1653 | svc_xprt_put(clp->cl_cb_conn.cb_xprt); |
| @@ -2145,8 +2131,11 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, | |||
| 2145 | static void | 2131 | static void |
| 2146 | nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid) | 2132 | nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid) |
| 2147 | { | 2133 | { |
| 2148 | /* pNFS is not supported */ | 2134 | #ifdef CONFIG_NFSD_PNFS |
| 2135 | new->cl_exchange_flags |= EXCHGID4_FLAG_USE_PNFS_MDS; | ||
| 2136 | #else | ||
| 2149 | new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS; | 2137 | new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS; |
| 2138 | #endif | ||
| 2150 | 2139 | ||
| 2151 | /* Referrals are supported, Migration is not. */ | 2140 | /* Referrals are supported, Migration is not. */ |
| 2152 | new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER; | 2141 | new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER; |
| @@ -3074,6 +3063,10 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval, | |||
| 3074 | fp->fi_share_deny = 0; | 3063 | fp->fi_share_deny = 0; |
| 3075 | memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); | 3064 | memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); |
| 3076 | memset(fp->fi_access, 0, sizeof(fp->fi_access)); | 3065 | memset(fp->fi_access, 0, sizeof(fp->fi_access)); |
| 3066 | #ifdef CONFIG_NFSD_PNFS | ||
| 3067 | INIT_LIST_HEAD(&fp->fi_lo_states); | ||
| 3068 | atomic_set(&fp->fi_lo_recalls, 0); | ||
| 3069 | #endif | ||
| 3077 | hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]); | 3070 | hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]); |
| 3078 | } | 3071 | } |
| 3079 | 3072 | ||
| @@ -3300,7 +3293,7 @@ find_file_locked(struct knfsd_fh *fh, unsigned int hashval) | |||
| 3300 | struct nfs4_file *fp; | 3293 | struct nfs4_file *fp; |
| 3301 | 3294 | ||
| 3302 | hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash) { | 3295 | hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash) { |
| 3303 | if (nfsd_fh_match(&fp->fi_fhandle, fh)) { | 3296 | if (fh_match(&fp->fi_fhandle, fh)) { |
| 3304 | if (atomic_inc_not_zero(&fp->fi_ref)) | 3297 | if (atomic_inc_not_zero(&fp->fi_ref)) |
| 3305 | return fp; | 3298 | return fp; |
| 3306 | } | 3299 | } |
| @@ -3308,7 +3301,7 @@ find_file_locked(struct knfsd_fh *fh, unsigned int hashval) | |||
| 3308 | return NULL; | 3301 | return NULL; |
| 3309 | } | 3302 | } |
| 3310 | 3303 | ||
| 3311 | static struct nfs4_file * | 3304 | struct nfs4_file * |
| 3312 | find_file(struct knfsd_fh *fh) | 3305 | find_file(struct knfsd_fh *fh) |
| 3313 | { | 3306 | { |
| 3314 | struct nfs4_file *fp; | 3307 | struct nfs4_file *fp; |
| @@ -3856,12 +3849,12 @@ static int nfs4_setlease(struct nfs4_delegation *dp) | |||
| 3856 | /* Race breaker */ | 3849 | /* Race breaker */ |
| 3857 | if (fp->fi_deleg_file) { | 3850 | if (fp->fi_deleg_file) { |
| 3858 | status = 0; | 3851 | status = 0; |
| 3859 | atomic_inc(&fp->fi_delegees); | 3852 | ++fp->fi_delegees; |
| 3860 | hash_delegation_locked(dp, fp); | 3853 | hash_delegation_locked(dp, fp); |
| 3861 | goto out_unlock; | 3854 | goto out_unlock; |
| 3862 | } | 3855 | } |
| 3863 | fp->fi_deleg_file = filp; | 3856 | fp->fi_deleg_file = filp; |
| 3864 | atomic_set(&fp->fi_delegees, 1); | 3857 | fp->fi_delegees = 1; |
| 3865 | hash_delegation_locked(dp, fp); | 3858 | hash_delegation_locked(dp, fp); |
| 3866 | spin_unlock(&fp->fi_lock); | 3859 | spin_unlock(&fp->fi_lock); |
| 3867 | spin_unlock(&state_lock); | 3860 | spin_unlock(&state_lock); |
| @@ -3902,7 +3895,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, | |||
| 3902 | status = -EAGAIN; | 3895 | status = -EAGAIN; |
| 3903 | goto out_unlock; | 3896 | goto out_unlock; |
| 3904 | } | 3897 | } |
| 3905 | atomic_inc(&fp->fi_delegees); | 3898 | ++fp->fi_delegees; |
| 3906 | hash_delegation_locked(dp, fp); | 3899 | hash_delegation_locked(dp, fp); |
| 3907 | status = 0; | 3900 | status = 0; |
| 3908 | out_unlock: | 3901 | out_unlock: |
| @@ -4295,7 +4288,7 @@ laundromat_main(struct work_struct *laundry) | |||
| 4295 | 4288 | ||
| 4296 | static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *stp) | 4289 | static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *stp) |
| 4297 | { | 4290 | { |
| 4298 | if (!nfsd_fh_match(&fhp->fh_handle, &stp->st_stid.sc_file->fi_fhandle)) | 4291 | if (!fh_match(&fhp->fh_handle, &stp->st_stid.sc_file->fi_fhandle)) |
| 4299 | return nfserr_bad_stateid; | 4292 | return nfserr_bad_stateid; |
| 4300 | return nfs_ok; | 4293 | return nfs_ok; |
| 4301 | } | 4294 | } |
| @@ -4446,7 +4439,7 @@ out_unlock: | |||
| 4446 | return status; | 4439 | return status; |
| 4447 | } | 4440 | } |
| 4448 | 4441 | ||
| 4449 | static __be32 | 4442 | __be32 |
| 4450 | nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, | 4443 | nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, |
| 4451 | stateid_t *stateid, unsigned char typemask, | 4444 | stateid_t *stateid, unsigned char typemask, |
| 4452 | struct nfs4_stid **s, struct nfsd_net *nn) | 4445 | struct nfs4_stid **s, struct nfsd_net *nn) |
| @@ -4860,6 +4853,9 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
| 4860 | update_stateid(&stp->st_stid.sc_stateid); | 4853 | update_stateid(&stp->st_stid.sc_stateid); |
| 4861 | memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); | 4854 | memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); |
| 4862 | 4855 | ||
| 4856 | nfsd4_return_all_file_layouts(stp->st_stateowner->so_client, | ||
| 4857 | stp->st_stid.sc_file); | ||
| 4858 | |||
| 4863 | nfsd4_close_open_stateid(stp); | 4859 | nfsd4_close_open_stateid(stp); |
| 4864 | 4860 | ||
| 4865 | /* put reference from nfs4_preprocess_seqid_op */ | 4861 | /* put reference from nfs4_preprocess_seqid_op */ |
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 15f7b73e0c0f..df5e66caf100 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
| @@ -47,6 +47,7 @@ | |||
| 47 | #include "state.h" | 47 | #include "state.h" |
| 48 | #include "cache.h" | 48 | #include "cache.h" |
| 49 | #include "netns.h" | 49 | #include "netns.h" |
| 50 | #include "pnfs.h" | ||
| 50 | 51 | ||
| 51 | #ifdef CONFIG_NFSD_V4_SECURITY_LABEL | 52 | #ifdef CONFIG_NFSD_V4_SECURITY_LABEL |
| 52 | #include <linux/security.h> | 53 | #include <linux/security.h> |
| @@ -234,6 +235,26 @@ static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes) | |||
| 234 | return ret; | 235 | return ret; |
| 235 | } | 236 | } |
| 236 | 237 | ||
| 238 | /* | ||
| 239 | * We require the high 32 bits of 'seconds' to be 0, and | ||
| 240 | * we ignore all 32 bits of 'nseconds'. | ||
| 241 | */ | ||
| 242 | static __be32 | ||
| 243 | nfsd4_decode_time(struct nfsd4_compoundargs *argp, struct timespec *tv) | ||
| 244 | { | ||
| 245 | DECODE_HEAD; | ||
| 246 | u64 sec; | ||
| 247 | |||
| 248 | READ_BUF(12); | ||
| 249 | p = xdr_decode_hyper(p, &sec); | ||
| 250 | tv->tv_sec = sec; | ||
| 251 | tv->tv_nsec = be32_to_cpup(p++); | ||
| 252 | if (tv->tv_nsec >= (u32)1000000000) | ||
| 253 | return nfserr_inval; | ||
| 254 | |||
| 255 | DECODE_TAIL; | ||
| 256 | } | ||
| 257 | |||
| 237 | static __be32 | 258 | static __be32 |
| 238 | nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) | 259 | nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) |
| 239 | { | 260 | { |
| @@ -267,7 +288,6 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, | |||
| 267 | { | 288 | { |
| 268 | int expected_len, len = 0; | 289 | int expected_len, len = 0; |
| 269 | u32 dummy32; | 290 | u32 dummy32; |
| 270 | u64 sec; | ||
| 271 | char *buf; | 291 | char *buf; |
| 272 | 292 | ||
| 273 | DECODE_HEAD; | 293 | DECODE_HEAD; |
| @@ -358,15 +378,10 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, | |||
| 358 | dummy32 = be32_to_cpup(p++); | 378 | dummy32 = be32_to_cpup(p++); |
| 359 | switch (dummy32) { | 379 | switch (dummy32) { |
| 360 | case NFS4_SET_TO_CLIENT_TIME: | 380 | case NFS4_SET_TO_CLIENT_TIME: |
| 361 | /* We require the high 32 bits of 'seconds' to be 0, and we ignore | ||
| 362 | all 32 bits of 'nseconds'. */ | ||
| 363 | READ_BUF(12); | ||
| 364 | len += 12; | 381 | len += 12; |
| 365 | p = xdr_decode_hyper(p, &sec); | 382 | status = nfsd4_decode_time(argp, &iattr->ia_atime); |
| 366 | iattr->ia_atime.tv_sec = (time_t)sec; | 383 | if (status) |
| 367 | iattr->ia_atime.tv_nsec = be32_to_cpup(p++); | 384 | return status; |
| 368 | if (iattr->ia_atime.tv_nsec >= (u32)1000000000) | ||
| 369 | return nfserr_inval; | ||
| 370 | iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET); | 385 | iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET); |
| 371 | break; | 386 | break; |
| 372 | case NFS4_SET_TO_SERVER_TIME: | 387 | case NFS4_SET_TO_SERVER_TIME: |
| @@ -382,15 +397,10 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, | |||
| 382 | dummy32 = be32_to_cpup(p++); | 397 | dummy32 = be32_to_cpup(p++); |
| 383 | switch (dummy32) { | 398 | switch (dummy32) { |
| 384 | case NFS4_SET_TO_CLIENT_TIME: | 399 | case NFS4_SET_TO_CLIENT_TIME: |
| 385 | /* We require the high 32 bits of 'seconds' to be 0, and we ignore | ||
| 386 | all 32 bits of 'nseconds'. */ | ||
| 387 | READ_BUF(12); | ||
| 388 | len += 12; | 400 | len += 12; |
| 389 | p = xdr_decode_hyper(p, &sec); | 401 | status = nfsd4_decode_time(argp, &iattr->ia_mtime); |
| 390 | iattr->ia_mtime.tv_sec = sec; | 402 | if (status) |
| 391 | iattr->ia_mtime.tv_nsec = be32_to_cpup(p++); | 403 | return status; |
| 392 | if (iattr->ia_mtime.tv_nsec >= (u32)1000000000) | ||
| 393 | return nfserr_inval; | ||
| 394 | iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET); | 404 | iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET); |
| 395 | break; | 405 | break; |
| 396 | case NFS4_SET_TO_SERVER_TIME: | 406 | case NFS4_SET_TO_SERVER_TIME: |
| @@ -1513,6 +1523,127 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, str | |||
| 1513 | DECODE_TAIL; | 1523 | DECODE_TAIL; |
| 1514 | } | 1524 | } |
| 1515 | 1525 | ||
| 1526 | #ifdef CONFIG_NFSD_PNFS | ||
| 1527 | static __be32 | ||
| 1528 | nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp, | ||
| 1529 | struct nfsd4_getdeviceinfo *gdev) | ||
| 1530 | { | ||
| 1531 | DECODE_HEAD; | ||
| 1532 | u32 num, i; | ||
| 1533 | |||
| 1534 | READ_BUF(sizeof(struct nfsd4_deviceid) + 3 * 4); | ||
| 1535 | COPYMEM(&gdev->gd_devid, sizeof(struct nfsd4_deviceid)); | ||
| 1536 | gdev->gd_layout_type = be32_to_cpup(p++); | ||
| 1537 | gdev->gd_maxcount = be32_to_cpup(p++); | ||
| 1538 | num = be32_to_cpup(p++); | ||
| 1539 | if (num) { | ||
| 1540 | READ_BUF(4 * num); | ||
| 1541 | gdev->gd_notify_types = be32_to_cpup(p++); | ||
| 1542 | for (i = 1; i < num; i++) { | ||
| 1543 | if (be32_to_cpup(p++)) { | ||
| 1544 | status = nfserr_inval; | ||
| 1545 | goto out; | ||
| 1546 | } | ||
| 1547 | } | ||
| 1548 | } | ||
| 1549 | DECODE_TAIL; | ||
| 1550 | } | ||
| 1551 | |||
| 1552 | static __be32 | ||
| 1553 | nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp, | ||
| 1554 | struct nfsd4_layoutget *lgp) | ||
| 1555 | { | ||
| 1556 | DECODE_HEAD; | ||
| 1557 | |||
| 1558 | READ_BUF(36); | ||
| 1559 | lgp->lg_signal = be32_to_cpup(p++); | ||
| 1560 | lgp->lg_layout_type = be32_to_cpup(p++); | ||
| 1561 | lgp->lg_seg.iomode = be32_to_cpup(p++); | ||
| 1562 | p = xdr_decode_hyper(p, &lgp->lg_seg.offset); | ||
| 1563 | p = xdr_decode_hyper(p, &lgp->lg_seg.length); | ||
| 1564 | p = xdr_decode_hyper(p, &lgp->lg_minlength); | ||
| 1565 | nfsd4_decode_stateid(argp, &lgp->lg_sid); | ||
| 1566 | READ_BUF(4); | ||
| 1567 | lgp->lg_maxcount = be32_to_cpup(p++); | ||
| 1568 | |||
| 1569 | DECODE_TAIL; | ||
| 1570 | } | ||
| 1571 | |||
| 1572 | static __be32 | ||
| 1573 | nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp, | ||
| 1574 | struct nfsd4_layoutcommit *lcp) | ||
| 1575 | { | ||
| 1576 | DECODE_HEAD; | ||
| 1577 | u32 timechange; | ||
| 1578 | |||
| 1579 | READ_BUF(20); | ||
| 1580 | p = xdr_decode_hyper(p, &lcp->lc_seg.offset); | ||
| 1581 | p = xdr_decode_hyper(p, &lcp->lc_seg.length); | ||
| 1582 | lcp->lc_reclaim = be32_to_cpup(p++); | ||
| 1583 | nfsd4_decode_stateid(argp, &lcp->lc_sid); | ||
| 1584 | READ_BUF(4); | ||
| 1585 | lcp->lc_newoffset = be32_to_cpup(p++); | ||
| 1586 | if (lcp->lc_newoffset) { | ||
| 1587 | READ_BUF(8); | ||
| 1588 | p = xdr_decode_hyper(p, &lcp->lc_last_wr); | ||
| 1589 | } else | ||
| 1590 | lcp->lc_last_wr = 0; | ||
| 1591 | READ_BUF(4); | ||
| 1592 | timechange = be32_to_cpup(p++); | ||
| 1593 | if (timechange) { | ||
| 1594 | status = nfsd4_decode_time(argp, &lcp->lc_mtime); | ||
| 1595 | if (status) | ||
| 1596 | return status; | ||
| 1597 | } else { | ||
| 1598 | lcp->lc_mtime.tv_nsec = UTIME_NOW; | ||
| 1599 | } | ||
| 1600 | READ_BUF(8); | ||
| 1601 | lcp->lc_layout_type = be32_to_cpup(p++); | ||
| 1602 | |||
| 1603 | /* | ||
| 1604 | * Save the layout update in XDR format and let the layout driver deal | ||
| 1605 | * with it later. | ||
| 1606 | */ | ||
| 1607 | lcp->lc_up_len = be32_to_cpup(p++); | ||
| 1608 | if (lcp->lc_up_len > 0) { | ||
| 1609 | READ_BUF(lcp->lc_up_len); | ||
| 1610 | READMEM(lcp->lc_up_layout, lcp->lc_up_len); | ||
| 1611 | } | ||
| 1612 | |||
| 1613 | DECODE_TAIL; | ||
| 1614 | } | ||
| 1615 | |||
| 1616 | static __be32 | ||
| 1617 | nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp, | ||
| 1618 | struct nfsd4_layoutreturn *lrp) | ||
| 1619 | { | ||
| 1620 | DECODE_HEAD; | ||
| 1621 | |||
| 1622 | READ_BUF(16); | ||
| 1623 | lrp->lr_reclaim = be32_to_cpup(p++); | ||
| 1624 | lrp->lr_layout_type = be32_to_cpup(p++); | ||
| 1625 | lrp->lr_seg.iomode = be32_to_cpup(p++); | ||
| 1626 | lrp->lr_return_type = be32_to_cpup(p++); | ||
| 1627 | if (lrp->lr_return_type == RETURN_FILE) { | ||
| 1628 | READ_BUF(16); | ||
| 1629 | p = xdr_decode_hyper(p, &lrp->lr_seg.offset); | ||
| 1630 | p = xdr_decode_hyper(p, &lrp->lr_seg.length); | ||
| 1631 | nfsd4_decode_stateid(argp, &lrp->lr_sid); | ||
| 1632 | READ_BUF(4); | ||
| 1633 | lrp->lrf_body_len = be32_to_cpup(p++); | ||
| 1634 | if (lrp->lrf_body_len > 0) { | ||
| 1635 | READ_BUF(lrp->lrf_body_len); | ||
| 1636 | READMEM(lrp->lrf_body, lrp->lrf_body_len); | ||
| 1637 | } | ||
| 1638 | } else { | ||
| 1639 | lrp->lr_seg.offset = 0; | ||
| 1640 | lrp->lr_seg.length = NFS4_MAX_UINT64; | ||
| 1641 | } | ||
| 1642 | |||
| 1643 | DECODE_TAIL; | ||
| 1644 | } | ||
| 1645 | #endif /* CONFIG_NFSD_PNFS */ | ||
| 1646 | |||
| 1516 | static __be32 | 1647 | static __be32 |
| 1517 | nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp, | 1648 | nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp, |
| 1518 | struct nfsd4_fallocate *fallocate) | 1649 | struct nfsd4_fallocate *fallocate) |
| @@ -1607,11 +1738,19 @@ static nfsd4_dec nfsd4_dec_ops[] = { | |||
| 1607 | [OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session, | 1738 | [OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session, |
| 1608 | [OP_FREE_STATEID] = (nfsd4_dec)nfsd4_decode_free_stateid, | 1739 | [OP_FREE_STATEID] = (nfsd4_dec)nfsd4_decode_free_stateid, |
| 1609 | [OP_GET_DIR_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, | 1740 | [OP_GET_DIR_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, |
| 1741 | #ifdef CONFIG_NFSD_PNFS | ||
| 1742 | [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_getdeviceinfo, | ||
| 1743 | [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp, | ||
| 1744 | [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_layoutcommit, | ||
| 1745 | [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_layoutget, | ||
| 1746 | [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_layoutreturn, | ||
| 1747 | #else | ||
| 1610 | [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_notsupp, | 1748 | [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_notsupp, |
| 1611 | [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp, | 1749 | [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp, |
| 1612 | [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp, | 1750 | [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp, |
| 1613 | [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp, | 1751 | [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp, |
| 1614 | [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp, | 1752 | [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp, |
| 1753 | #endif | ||
| 1615 | [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_secinfo_no_name, | 1754 | [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_secinfo_no_name, |
| 1616 | [OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence, | 1755 | [OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence, |
| 1617 | [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp, | 1756 | [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp, |
| @@ -2539,6 +2678,30 @@ out_acl: | |||
| 2539 | get_parent_attributes(exp, &stat); | 2678 | get_parent_attributes(exp, &stat); |
| 2540 | p = xdr_encode_hyper(p, stat.ino); | 2679 | p = xdr_encode_hyper(p, stat.ino); |
| 2541 | } | 2680 | } |
| 2681 | #ifdef CONFIG_NFSD_PNFS | ||
| 2682 | if ((bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) || | ||
| 2683 | (bmval2 & FATTR4_WORD2_LAYOUT_TYPES)) { | ||
| 2684 | if (exp->ex_layout_type) { | ||
| 2685 | p = xdr_reserve_space(xdr, 8); | ||
| 2686 | if (!p) | ||
| 2687 | goto out_resource; | ||
| 2688 | *p++ = cpu_to_be32(1); | ||
| 2689 | *p++ = cpu_to_be32(exp->ex_layout_type); | ||
| 2690 | } else { | ||
| 2691 | p = xdr_reserve_space(xdr, 4); | ||
| 2692 | if (!p) | ||
| 2693 | goto out_resource; | ||
| 2694 | *p++ = cpu_to_be32(0); | ||
| 2695 | } | ||
| 2696 | } | ||
| 2697 | |||
| 2698 | if (bmval2 & FATTR4_WORD2_LAYOUT_BLKSIZE) { | ||
| 2699 | p = xdr_reserve_space(xdr, 4); | ||
| 2700 | if (!p) | ||
| 2701 | goto out_resource; | ||
| 2702 | *p++ = cpu_to_be32(stat.blksize); | ||
| 2703 | } | ||
| 2704 | #endif /* CONFIG_NFSD_PNFS */ | ||
| 2542 | if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { | 2705 | if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { |
| 2543 | status = nfsd4_encode_security_label(xdr, rqstp, context, | 2706 | status = nfsd4_encode_security_label(xdr, rqstp, context, |
| 2544 | contextlen); | 2707 | contextlen); |
| @@ -2768,16 +2931,17 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, | |||
| 2768 | if (entry_bytes > cd->rd_maxcount) | 2931 | if (entry_bytes > cd->rd_maxcount) |
| 2769 | goto fail; | 2932 | goto fail; |
| 2770 | cd->rd_maxcount -= entry_bytes; | 2933 | cd->rd_maxcount -= entry_bytes; |
| 2771 | if (!cd->rd_dircount) | ||
| 2772 | goto fail; | ||
| 2773 | /* | 2934 | /* |
| 2774 | * RFC 3530 14.2.24 describes rd_dircount as only a "hint", so | 2935 | * RFC 3530 14.2.24 describes rd_dircount as only a "hint", so |
| 2775 | * let's always let through the first entry, at least: | 2936 | * let's always let through the first entry, at least: |
| 2776 | */ | 2937 | */ |
| 2777 | name_and_cookie = 4 * XDR_QUADLEN(namlen) + 8; | 2938 | if (!cd->rd_dircount) |
| 2939 | goto fail; | ||
| 2940 | name_and_cookie = 4 + 4 * XDR_QUADLEN(namlen) + 8; | ||
| 2778 | if (name_and_cookie > cd->rd_dircount && cd->cookie_offset) | 2941 | if (name_and_cookie > cd->rd_dircount && cd->cookie_offset) |
| 2779 | goto fail; | 2942 | goto fail; |
| 2780 | cd->rd_dircount -= min(cd->rd_dircount, name_and_cookie); | 2943 | cd->rd_dircount -= min(cd->rd_dircount, name_and_cookie); |
| 2944 | |||
| 2781 | cd->cookie_offset = cookie_offset; | 2945 | cd->cookie_offset = cookie_offset; |
| 2782 | skip_entry: | 2946 | skip_entry: |
| 2783 | cd->common.err = nfs_ok; | 2947 | cd->common.err = nfs_ok; |
| @@ -3814,6 +3978,156 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, | |||
| 3814 | return nfserr; | 3978 | return nfserr; |
| 3815 | } | 3979 | } |
| 3816 | 3980 | ||
| 3981 | #ifdef CONFIG_NFSD_PNFS | ||
| 3982 | static __be32 | ||
| 3983 | nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr, | ||
| 3984 | struct nfsd4_getdeviceinfo *gdev) | ||
| 3985 | { | ||
| 3986 | struct xdr_stream *xdr = &resp->xdr; | ||
| 3987 | const struct nfsd4_layout_ops *ops = | ||
| 3988 | nfsd4_layout_ops[gdev->gd_layout_type]; | ||
| 3989 | u32 starting_len = xdr->buf->len, needed_len; | ||
| 3990 | __be32 *p; | ||
| 3991 | |||
| 3992 | dprintk("%s: err %d\n", __func__, nfserr); | ||
| 3993 | if (nfserr) | ||
| 3994 | goto out; | ||
| 3995 | |||
| 3996 | nfserr = nfserr_resource; | ||
| 3997 | p = xdr_reserve_space(xdr, 4); | ||
| 3998 | if (!p) | ||
| 3999 | goto out; | ||
| 4000 | |||
| 4001 | *p++ = cpu_to_be32(gdev->gd_layout_type); | ||
| 4002 | |||
| 4003 | /* If maxcount is 0 then just update notifications */ | ||
| 4004 | if (gdev->gd_maxcount != 0) { | ||
| 4005 | nfserr = ops->encode_getdeviceinfo(xdr, gdev); | ||
| 4006 | if (nfserr) { | ||
| 4007 | /* | ||
| 4008 | * We don't bother to burden the layout drivers with | ||
| 4009 | * enforcing gd_maxcount, just tell the client to | ||
| 4010 | * come back with a bigger buffer if it's not enough. | ||
| 4011 | */ | ||
| 4012 | if (xdr->buf->len + 4 > gdev->gd_maxcount) | ||
| 4013 | goto toosmall; | ||
| 4014 | goto out; | ||
| 4015 | } | ||
| 4016 | } | ||
| 4017 | |||
| 4018 | nfserr = nfserr_resource; | ||
| 4019 | if (gdev->gd_notify_types) { | ||
| 4020 | p = xdr_reserve_space(xdr, 4 + 4); | ||
| 4021 | if (!p) | ||
| 4022 | goto out; | ||
| 4023 | *p++ = cpu_to_be32(1); /* bitmap length */ | ||
| 4024 | *p++ = cpu_to_be32(gdev->gd_notify_types); | ||
| 4025 | } else { | ||
| 4026 | p = xdr_reserve_space(xdr, 4); | ||
| 4027 | if (!p) | ||
| 4028 | goto out; | ||
| 4029 | *p++ = 0; | ||
| 4030 | } | ||
| 4031 | |||
| 4032 | nfserr = 0; | ||
| 4033 | out: | ||
| 4034 | kfree(gdev->gd_device); | ||
| 4035 | dprintk("%s: done: %d\n", __func__, be32_to_cpu(nfserr)); | ||
| 4036 | return nfserr; | ||
| 4037 | |||
| 4038 | toosmall: | ||
| 4039 | dprintk("%s: maxcount too small\n", __func__); | ||
| 4040 | needed_len = xdr->buf->len + 4 /* notifications */; | ||
| 4041 | xdr_truncate_encode(xdr, starting_len); | ||
| 4042 | p = xdr_reserve_space(xdr, 4); | ||
| 4043 | if (!p) { | ||
| 4044 | nfserr = nfserr_resource; | ||
| 4045 | } else { | ||
| 4046 | *p++ = cpu_to_be32(needed_len); | ||
| 4047 | nfserr = nfserr_toosmall; | ||
| 4048 | } | ||
| 4049 | goto out; | ||
| 4050 | } | ||
| 4051 | |||
| 4052 | static __be32 | ||
| 4053 | nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr, | ||
| 4054 | struct nfsd4_layoutget *lgp) | ||
| 4055 | { | ||
| 4056 | struct xdr_stream *xdr = &resp->xdr; | ||
| 4057 | const struct nfsd4_layout_ops *ops = | ||
| 4058 | nfsd4_layout_ops[lgp->lg_layout_type]; | ||
| 4059 | __be32 *p; | ||
| 4060 | |||
| 4061 | dprintk("%s: err %d\n", __func__, nfserr); | ||
| 4062 | if (nfserr) | ||
| 4063 | goto out; | ||
| 4064 | |||
| 4065 | nfserr = nfserr_resource; | ||
| 4066 | p = xdr_reserve_space(xdr, 36 + sizeof(stateid_opaque_t)); | ||
| 4067 | if (!p) | ||
| 4068 | goto out; | ||
| 4069 | |||
| 4070 | *p++ = cpu_to_be32(1); /* we always set return-on-close */ | ||
| 4071 | *p++ = cpu_to_be32(lgp->lg_sid.si_generation); | ||
| 4072 | p = xdr_encode_opaque_fixed(p, &lgp->lg_sid.si_opaque, | ||
| 4073 | sizeof(stateid_opaque_t)); | ||
| 4074 | |||
| 4075 | *p++ = cpu_to_be32(1); /* we always return a single layout */ | ||
| 4076 | p = xdr_encode_hyper(p, lgp->lg_seg.offset); | ||
| 4077 | p = xdr_encode_hyper(p, lgp->lg_seg.length); | ||
| 4078 | *p++ = cpu_to_be32(lgp->lg_seg.iomode); | ||
| 4079 | *p++ = cpu_to_be32(lgp->lg_layout_type); | ||
| 4080 | |||
| 4081 | nfserr = ops->encode_layoutget(xdr, lgp); | ||
| 4082 | out: | ||
| 4083 | kfree(lgp->lg_content); | ||
| 4084 | return nfserr; | ||
| 4085 | } | ||
| 4086 | |||
| 4087 | static __be32 | ||
| 4088 | nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr, | ||
| 4089 | struct nfsd4_layoutcommit *lcp) | ||
| 4090 | { | ||
| 4091 | struct xdr_stream *xdr = &resp->xdr; | ||
| 4092 | __be32 *p; | ||
| 4093 | |||
| 4094 | if (nfserr) | ||
| 4095 | return nfserr; | ||
| 4096 | |||
| 4097 | p = xdr_reserve_space(xdr, 4); | ||
| 4098 | if (!p) | ||
| 4099 | return nfserr_resource; | ||
| 4100 | *p++ = cpu_to_be32(lcp->lc_size_chg); | ||
| 4101 | if (lcp->lc_size_chg) { | ||
| 4102 | p = xdr_reserve_space(xdr, 8); | ||
| 4103 | if (!p) | ||
| 4104 | return nfserr_resource; | ||
| 4105 | p = xdr_encode_hyper(p, lcp->lc_newsize); | ||
| 4106 | } | ||
| 4107 | |||
| 4108 | return nfs_ok; | ||
| 4109 | } | ||
| 4110 | |||
| 4111 | static __be32 | ||
| 4112 | nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr, | ||
| 4113 | struct nfsd4_layoutreturn *lrp) | ||
| 4114 | { | ||
| 4115 | struct xdr_stream *xdr = &resp->xdr; | ||
| 4116 | __be32 *p; | ||
| 4117 | |||
| 4118 | if (nfserr) | ||
| 4119 | return nfserr; | ||
| 4120 | |||
| 4121 | p = xdr_reserve_space(xdr, 4); | ||
| 4122 | if (!p) | ||
| 4123 | return nfserr_resource; | ||
| 4124 | *p++ = cpu_to_be32(lrp->lrs_present); | ||
| 4125 | if (lrp->lrs_present) | ||
| 4126 | nfsd4_encode_stateid(xdr, &lrp->lr_sid); | ||
| 4127 | return nfs_ok; | ||
| 4128 | } | ||
| 4129 | #endif /* CONFIG_NFSD_PNFS */ | ||
| 4130 | |||
| 3817 | static __be32 | 4131 | static __be32 |
| 3818 | nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, | 4132 | nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, |
| 3819 | struct nfsd4_seek *seek) | 4133 | struct nfsd4_seek *seek) |
| @@ -3890,11 +4204,19 @@ static nfsd4_enc nfsd4_enc_ops[] = { | |||
| 3890 | [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_noop, | 4204 | [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_noop, |
| 3891 | [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_noop, | 4205 | [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_noop, |
| 3892 | [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, | 4206 | [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, |
| 4207 | #ifdef CONFIG_NFSD_PNFS | ||
| 4208 | [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_getdeviceinfo, | ||
| 4209 | [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop, | ||
| 4210 | [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_layoutcommit, | ||
| 4211 | [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_layoutget, | ||
| 4212 | [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_layoutreturn, | ||
| 4213 | #else | ||
| 3893 | [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop, | 4214 | [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop, |
| 3894 | [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop, | 4215 | [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop, |
| 3895 | [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop, | 4216 | [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop, |
| 3896 | [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop, | 4217 | [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop, |
| 3897 | [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop, | 4218 | [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop, |
| 4219 | #endif | ||
| 3898 | [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_secinfo_no_name, | 4220 | [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_secinfo_no_name, |
| 3899 | [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence, | 4221 | [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence, |
| 3900 | [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop, | 4222 | [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop, |
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 19ace74d35f6..aa47d75ddb26 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include "cache.h" | 21 | #include "cache.h" |
| 22 | #include "state.h" | 22 | #include "state.h" |
| 23 | #include "netns.h" | 23 | #include "netns.h" |
| 24 | #include "pnfs.h" | ||
| 24 | 25 | ||
| 25 | /* | 26 | /* |
| 26 | * We have a single directory with several nodes in it. | 27 | * We have a single directory with several nodes in it. |
| @@ -1258,9 +1259,12 @@ static int __init init_nfsd(void) | |||
| 1258 | retval = nfsd4_init_slabs(); | 1259 | retval = nfsd4_init_slabs(); |
| 1259 | if (retval) | 1260 | if (retval) |
| 1260 | goto out_unregister_pernet; | 1261 | goto out_unregister_pernet; |
| 1261 | retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */ | 1262 | retval = nfsd4_init_pnfs(); |
| 1262 | if (retval) | 1263 | if (retval) |
| 1263 | goto out_free_slabs; | 1264 | goto out_free_slabs; |
| 1265 | retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */ | ||
| 1266 | if (retval) | ||
| 1267 | goto out_exit_pnfs; | ||
| 1264 | nfsd_stat_init(); /* Statistics */ | 1268 | nfsd_stat_init(); /* Statistics */ |
| 1265 | retval = nfsd_reply_cache_init(); | 1269 | retval = nfsd_reply_cache_init(); |
| 1266 | if (retval) | 1270 | if (retval) |
| @@ -1282,6 +1286,8 @@ out_free_lockd: | |||
| 1282 | out_free_stat: | 1286 | out_free_stat: |
| 1283 | nfsd_stat_shutdown(); | 1287 | nfsd_stat_shutdown(); |
| 1284 | nfsd_fault_inject_cleanup(); | 1288 | nfsd_fault_inject_cleanup(); |
| 1289 | out_exit_pnfs: | ||
| 1290 | nfsd4_exit_pnfs(); | ||
| 1285 | out_free_slabs: | 1291 | out_free_slabs: |
| 1286 | nfsd4_free_slabs(); | 1292 | nfsd4_free_slabs(); |
| 1287 | out_unregister_pernet: | 1293 | out_unregister_pernet: |
| @@ -1299,6 +1305,7 @@ static void __exit exit_nfsd(void) | |||
| 1299 | nfsd_stat_shutdown(); | 1305 | nfsd_stat_shutdown(); |
| 1300 | nfsd_lockd_shutdown(); | 1306 | nfsd_lockd_shutdown(); |
| 1301 | nfsd4_free_slabs(); | 1307 | nfsd4_free_slabs(); |
| 1308 | nfsd4_exit_pnfs(); | ||
| 1302 | nfsd_fault_inject_cleanup(); | 1309 | nfsd_fault_inject_cleanup(); |
| 1303 | unregister_filesystem(&nfsd_fs_type); | 1310 | unregister_filesystem(&nfsd_fs_type); |
| 1304 | unregister_pernet_subsys(&nfsd_net_ops); | 1311 | unregister_pernet_subsys(&nfsd_net_ops); |
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 33a46a8dfaf7..565c4da1a9eb 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h | |||
| @@ -325,15 +325,27 @@ void nfsd_lockd_shutdown(void); | |||
| 325 | 325 | ||
| 326 | #define NFSD4_SUPPORTED_ATTRS_WORD2 0 | 326 | #define NFSD4_SUPPORTED_ATTRS_WORD2 0 |
| 327 | 327 | ||
| 328 | /* 4.1 */ | ||
| 329 | #ifdef CONFIG_NFSD_PNFS | ||
| 330 | #define PNFSD_SUPPORTED_ATTRS_WORD1 FATTR4_WORD1_FS_LAYOUT_TYPES | ||
| 331 | #define PNFSD_SUPPORTED_ATTRS_WORD2 \ | ||
| 332 | (FATTR4_WORD2_LAYOUT_BLKSIZE | FATTR4_WORD2_LAYOUT_TYPES) | ||
| 333 | #else | ||
| 334 | #define PNFSD_SUPPORTED_ATTRS_WORD1 0 | ||
| 335 | #define PNFSD_SUPPORTED_ATTRS_WORD2 0 | ||
| 336 | #endif /* CONFIG_NFSD_PNFS */ | ||
| 337 | |||
| 328 | #define NFSD4_1_SUPPORTED_ATTRS_WORD0 \ | 338 | #define NFSD4_1_SUPPORTED_ATTRS_WORD0 \ |
| 329 | NFSD4_SUPPORTED_ATTRS_WORD0 | 339 | NFSD4_SUPPORTED_ATTRS_WORD0 |
| 330 | 340 | ||
| 331 | #define NFSD4_1_SUPPORTED_ATTRS_WORD1 \ | 341 | #define NFSD4_1_SUPPORTED_ATTRS_WORD1 \ |
| 332 | NFSD4_SUPPORTED_ATTRS_WORD1 | 342 | (NFSD4_SUPPORTED_ATTRS_WORD1 | PNFSD_SUPPORTED_ATTRS_WORD1) |
| 333 | 343 | ||
| 334 | #define NFSD4_1_SUPPORTED_ATTRS_WORD2 \ | 344 | #define NFSD4_1_SUPPORTED_ATTRS_WORD2 \ |
| 335 | (NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT) | 345 | (NFSD4_SUPPORTED_ATTRS_WORD2 | PNFSD_SUPPORTED_ATTRS_WORD2 | \ |
| 346 | FATTR4_WORD2_SUPPATTR_EXCLCREAT) | ||
| 336 | 347 | ||
| 348 | /* 4.2 */ | ||
| 337 | #ifdef CONFIG_NFSD_V4_SECURITY_LABEL | 349 | #ifdef CONFIG_NFSD_V4_SECURITY_LABEL |
| 338 | #define NFSD4_2_SECURITY_ATTRS FATTR4_WORD2_SECURITY_LABEL | 350 | #define NFSD4_2_SECURITY_ATTRS FATTR4_WORD2_SECURITY_LABEL |
| 339 | #else | 351 | #else |
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 08236d70c667..84cae2079d21 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h | |||
| @@ -187,6 +187,24 @@ fh_init(struct svc_fh *fhp, int maxsize) | |||
| 187 | return fhp; | 187 | return fhp; |
| 188 | } | 188 | } |
| 189 | 189 | ||
| 190 | static inline bool fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) | ||
| 191 | { | ||
| 192 | if (fh1->fh_size != fh2->fh_size) | ||
| 193 | return false; | ||
| 194 | if (memcmp(fh1->fh_base.fh_pad, fh2->fh_base.fh_pad, fh1->fh_size) != 0) | ||
| 195 | return false; | ||
| 196 | return true; | ||
| 197 | } | ||
| 198 | |||
| 199 | static inline bool fh_fsid_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) | ||
| 200 | { | ||
| 201 | if (fh1->fh_fsid_type != fh2->fh_fsid_type) | ||
| 202 | return false; | ||
| 203 | if (memcmp(fh1->fh_fsid, fh2->fh_fsid, key_len(fh1->fh_fsid_type) != 0)) | ||
| 204 | return false; | ||
| 205 | return true; | ||
| 206 | } | ||
| 207 | |||
| 190 | #ifdef CONFIG_NFSD_V3 | 208 | #ifdef CONFIG_NFSD_V3 |
| 191 | /* | 209 | /* |
| 192 | * The wcc data stored in current_fh should be cleared | 210 | * The wcc data stored in current_fh should be cleared |
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 314f5c8f8f1a..9277cc91c21b 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c | |||
| @@ -119,6 +119,7 @@ struct svc_program nfsd_program = { | |||
| 119 | static bool nfsd_supported_minorversions[NFSD_SUPPORTED_MINOR_VERSION + 1] = { | 119 | static bool nfsd_supported_minorversions[NFSD_SUPPORTED_MINOR_VERSION + 1] = { |
| 120 | [0] = 1, | 120 | [0] = 1, |
| 121 | [1] = 1, | 121 | [1] = 1, |
| 122 | [2] = 1, | ||
| 122 | }; | 123 | }; |
| 123 | 124 | ||
| 124 | int nfsd_vers(int vers, enum vers_op change) | 125 | int nfsd_vers(int vers, enum vers_op change) |
diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h new file mode 100644 index 000000000000..fedb4d620a81 --- /dev/null +++ b/fs/nfsd/pnfs.h | |||
| @@ -0,0 +1,81 @@ | |||
| 1 | #ifndef _FS_NFSD_PNFS_H | ||
| 2 | #define _FS_NFSD_PNFS_H 1 | ||
| 3 | |||
| 4 | #include <linux/exportfs.h> | ||
| 5 | #include <linux/nfsd/export.h> | ||
| 6 | |||
| 7 | #include "state.h" | ||
| 8 | #include "xdr4.h" | ||
| 9 | |||
| 10 | struct xdr_stream; | ||
| 11 | |||
| 12 | struct nfsd4_deviceid_map { | ||
| 13 | struct list_head hash; | ||
| 14 | u64 idx; | ||
| 15 | int fsid_type; | ||
| 16 | u32 fsid[]; | ||
| 17 | }; | ||
| 18 | |||
| 19 | struct nfsd4_layout_ops { | ||
| 20 | u32 notify_types; | ||
| 21 | |||
| 22 | __be32 (*proc_getdeviceinfo)(struct super_block *sb, | ||
| 23 | struct nfsd4_getdeviceinfo *gdevp); | ||
| 24 | __be32 (*encode_getdeviceinfo)(struct xdr_stream *xdr, | ||
| 25 | struct nfsd4_getdeviceinfo *gdevp); | ||
| 26 | |||
| 27 | __be32 (*proc_layoutget)(struct inode *, const struct svc_fh *fhp, | ||
| 28 | struct nfsd4_layoutget *lgp); | ||
| 29 | __be32 (*encode_layoutget)(struct xdr_stream *, | ||
| 30 | struct nfsd4_layoutget *lgp); | ||
| 31 | |||
| 32 | __be32 (*proc_layoutcommit)(struct inode *inode, | ||
| 33 | struct nfsd4_layoutcommit *lcp); | ||
| 34 | }; | ||
| 35 | |||
| 36 | extern const struct nfsd4_layout_ops *nfsd4_layout_ops[]; | ||
| 37 | extern const struct nfsd4_layout_ops bl_layout_ops; | ||
| 38 | |||
| 39 | __be32 nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp, | ||
| 40 | struct nfsd4_compound_state *cstate, stateid_t *stateid, | ||
| 41 | bool create, u32 layout_type, struct nfs4_layout_stateid **lsp); | ||
| 42 | __be32 nfsd4_insert_layout(struct nfsd4_layoutget *lgp, | ||
| 43 | struct nfs4_layout_stateid *ls); | ||
| 44 | __be32 nfsd4_return_file_layouts(struct svc_rqst *rqstp, | ||
| 45 | struct nfsd4_compound_state *cstate, | ||
| 46 | struct nfsd4_layoutreturn *lrp); | ||
| 47 | __be32 nfsd4_return_client_layouts(struct svc_rqst *rqstp, | ||
| 48 | struct nfsd4_compound_state *cstate, | ||
| 49 | struct nfsd4_layoutreturn *lrp); | ||
| 50 | int nfsd4_set_deviceid(struct nfsd4_deviceid *id, const struct svc_fh *fhp, | ||
| 51 | u32 device_generation); | ||
| 52 | struct nfsd4_deviceid_map *nfsd4_find_devid_map(int idx); | ||
| 53 | |||
| 54 | #ifdef CONFIG_NFSD_PNFS | ||
| 55 | void nfsd4_setup_layout_type(struct svc_export *exp); | ||
| 56 | void nfsd4_return_all_client_layouts(struct nfs4_client *); | ||
| 57 | void nfsd4_return_all_file_layouts(struct nfs4_client *clp, | ||
| 58 | struct nfs4_file *fp); | ||
| 59 | int nfsd4_init_pnfs(void); | ||
| 60 | void nfsd4_exit_pnfs(void); | ||
| 61 | #else | ||
| 62 | static inline void nfsd4_setup_layout_type(struct svc_export *exp) | ||
| 63 | { | ||
| 64 | } | ||
| 65 | |||
| 66 | static inline void nfsd4_return_all_client_layouts(struct nfs4_client *clp) | ||
| 67 | { | ||
| 68 | } | ||
| 69 | static inline void nfsd4_return_all_file_layouts(struct nfs4_client *clp, | ||
| 70 | struct nfs4_file *fp) | ||
| 71 | { | ||
| 72 | } | ||
| 73 | static inline void nfsd4_exit_pnfs(void) | ||
| 74 | { | ||
| 75 | } | ||
| 76 | static inline int nfsd4_init_pnfs(void) | ||
| 77 | { | ||
| 78 | return 0; | ||
| 79 | } | ||
| 80 | #endif /* CONFIG_NFSD_PNFS */ | ||
| 81 | #endif /* _FS_NFSD_PNFS_H */ | ||
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 9d3be371240a..4f3bfeb11766 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h | |||
| @@ -92,6 +92,7 @@ struct nfs4_stid { | |||
| 92 | /* For a deleg stateid kept around only to process free_stateid's: */ | 92 | /* For a deleg stateid kept around only to process free_stateid's: */ |
| 93 | #define NFS4_REVOKED_DELEG_STID 16 | 93 | #define NFS4_REVOKED_DELEG_STID 16 |
| 94 | #define NFS4_CLOSED_DELEG_STID 32 | 94 | #define NFS4_CLOSED_DELEG_STID 32 |
| 95 | #define NFS4_LAYOUT_STID 64 | ||
| 95 | unsigned char sc_type; | 96 | unsigned char sc_type; |
| 96 | stateid_t sc_stateid; | 97 | stateid_t sc_stateid; |
| 97 | struct nfs4_client *sc_client; | 98 | struct nfs4_client *sc_client; |
| @@ -297,6 +298,9 @@ struct nfs4_client { | |||
| 297 | struct list_head cl_delegations; | 298 | struct list_head cl_delegations; |
| 298 | struct list_head cl_revoked; /* unacknowledged, revoked 4.1 state */ | 299 | struct list_head cl_revoked; /* unacknowledged, revoked 4.1 state */ |
| 299 | struct list_head cl_lru; /* tail queue */ | 300 | struct list_head cl_lru; /* tail queue */ |
| 301 | #ifdef CONFIG_NFSD_PNFS | ||
| 302 | struct list_head cl_lo_states; /* outstanding layout states */ | ||
| 303 | #endif | ||
| 300 | struct xdr_netobj cl_name; /* id generated by client */ | 304 | struct xdr_netobj cl_name; /* id generated by client */ |
| 301 | nfs4_verifier cl_verifier; /* generated by client */ | 305 | nfs4_verifier cl_verifier; /* generated by client */ |
| 302 | time_t cl_time; /* time of last lease renewal */ | 306 | time_t cl_time; /* time of last lease renewal */ |
| @@ -493,9 +497,13 @@ struct nfs4_file { | |||
| 493 | atomic_t fi_access[2]; | 497 | atomic_t fi_access[2]; |
| 494 | u32 fi_share_deny; | 498 | u32 fi_share_deny; |
| 495 | struct file *fi_deleg_file; | 499 | struct file *fi_deleg_file; |
| 496 | atomic_t fi_delegees; | 500 | int fi_delegees; |
| 497 | struct knfsd_fh fi_fhandle; | 501 | struct knfsd_fh fi_fhandle; |
| 498 | bool fi_had_conflict; | 502 | bool fi_had_conflict; |
| 503 | #ifdef CONFIG_NFSD_PNFS | ||
| 504 | struct list_head fi_lo_states; | ||
| 505 | atomic_t fi_lo_recalls; | ||
| 506 | #endif | ||
| 499 | }; | 507 | }; |
| 500 | 508 | ||
| 501 | /* | 509 | /* |
| @@ -528,6 +536,24 @@ static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s) | |||
| 528 | return container_of(s, struct nfs4_ol_stateid, st_stid); | 536 | return container_of(s, struct nfs4_ol_stateid, st_stid); |
| 529 | } | 537 | } |
| 530 | 538 | ||
| 539 | struct nfs4_layout_stateid { | ||
| 540 | struct nfs4_stid ls_stid; | ||
| 541 | struct list_head ls_perclnt; | ||
| 542 | struct list_head ls_perfile; | ||
| 543 | spinlock_t ls_lock; | ||
| 544 | struct list_head ls_layouts; | ||
| 545 | u32 ls_layout_type; | ||
| 546 | struct file *ls_file; | ||
| 547 | struct nfsd4_callback ls_recall; | ||
| 548 | stateid_t ls_recall_sid; | ||
| 549 | bool ls_recalled; | ||
| 550 | }; | ||
| 551 | |||
| 552 | static inline struct nfs4_layout_stateid *layoutstateid(struct nfs4_stid *s) | ||
| 553 | { | ||
| 554 | return container_of(s, struct nfs4_layout_stateid, ls_stid); | ||
| 555 | } | ||
| 556 | |||
| 531 | /* flags for preprocess_seqid_op() */ | 557 | /* flags for preprocess_seqid_op() */ |
| 532 | #define RD_STATE 0x00000010 | 558 | #define RD_STATE 0x00000010 |
| 533 | #define WR_STATE 0x00000020 | 559 | #define WR_STATE 0x00000020 |
| @@ -535,6 +561,7 @@ static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s) | |||
| 535 | enum nfsd4_cb_op { | 561 | enum nfsd4_cb_op { |
| 536 | NFSPROC4_CLNT_CB_NULL = 0, | 562 | NFSPROC4_CLNT_CB_NULL = 0, |
| 537 | NFSPROC4_CLNT_CB_RECALL, | 563 | NFSPROC4_CLNT_CB_RECALL, |
| 564 | NFSPROC4_CLNT_CB_LAYOUT, | ||
| 538 | NFSPROC4_CLNT_CB_SEQUENCE, | 565 | NFSPROC4_CLNT_CB_SEQUENCE, |
| 539 | }; | 566 | }; |
| 540 | 567 | ||
| @@ -545,6 +572,12 @@ struct nfsd_net; | |||
| 545 | extern __be32 nfs4_preprocess_stateid_op(struct net *net, | 572 | extern __be32 nfs4_preprocess_stateid_op(struct net *net, |
| 546 | struct nfsd4_compound_state *cstate, | 573 | struct nfsd4_compound_state *cstate, |
| 547 | stateid_t *stateid, int flags, struct file **filp); | 574 | stateid_t *stateid, int flags, struct file **filp); |
| 575 | __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, | ||
| 576 | stateid_t *stateid, unsigned char typemask, | ||
| 577 | struct nfs4_stid **s, struct nfsd_net *nn); | ||
| 578 | struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, | ||
| 579 | struct kmem_cache *slab); | ||
| 580 | void nfs4_unhash_stid(struct nfs4_stid *s); | ||
| 548 | void nfs4_put_stid(struct nfs4_stid *s); | 581 | void nfs4_put_stid(struct nfs4_stid *s); |
| 549 | void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *); | 582 | void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *); |
| 550 | extern void nfs4_release_reclaim(struct nfsd_net *); | 583 | extern void nfs4_release_reclaim(struct nfsd_net *); |
| @@ -567,6 +600,14 @@ extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name, | |||
| 567 | struct nfsd_net *nn); | 600 | struct nfsd_net *nn); |
| 568 | extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn); | 601 | extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn); |
| 569 | 602 | ||
| 603 | struct nfs4_file *find_file(struct knfsd_fh *fh); | ||
| 604 | void put_nfs4_file(struct nfs4_file *fi); | ||
| 605 | static inline void get_nfs4_file(struct nfs4_file *fi) | ||
| 606 | { | ||
| 607 | atomic_inc(&fi->fi_ref); | ||
| 608 | } | ||
| 609 | struct file *find_any_file(struct nfs4_file *f); | ||
| 610 | |||
| 570 | /* grace period management */ | 611 | /* grace period management */ |
| 571 | void nfsd4_end_grace(struct nfsd_net *nn); | 612 | void nfsd4_end_grace(struct nfsd_net *nn); |
| 572 | 613 | ||
diff --git a/fs/nfsd/trace.c b/fs/nfsd/trace.c new file mode 100644 index 000000000000..82f89070594c --- /dev/null +++ b/fs/nfsd/trace.c | |||
| @@ -0,0 +1,5 @@ | |||
| 1 | |||
| 2 | #include "state.h" | ||
| 3 | |||
| 4 | #define CREATE_TRACE_POINTS | ||
| 5 | #include "trace.h" | ||
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h new file mode 100644 index 000000000000..c668520c344b --- /dev/null +++ b/fs/nfsd/trace.h | |||
| @@ -0,0 +1,54 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2014 Christoph Hellwig. | ||
| 3 | */ | ||
| 4 | #undef TRACE_SYSTEM | ||
| 5 | #define TRACE_SYSTEM nfsd | ||
| 6 | |||
| 7 | #if !defined(_NFSD_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) | ||
| 8 | #define _NFSD_TRACE_H | ||
| 9 | |||
| 10 | #include <linux/tracepoint.h> | ||
| 11 | |||
| 12 | DECLARE_EVENT_CLASS(nfsd_stateid_class, | ||
| 13 | TP_PROTO(stateid_t *stp), | ||
| 14 | TP_ARGS(stp), | ||
| 15 | TP_STRUCT__entry( | ||
| 16 | __field(u32, cl_boot) | ||
| 17 | __field(u32, cl_id) | ||
| 18 | __field(u32, si_id) | ||
| 19 | __field(u32, si_generation) | ||
| 20 | ), | ||
| 21 | TP_fast_assign( | ||
| 22 | __entry->cl_boot = stp->si_opaque.so_clid.cl_boot; | ||
| 23 | __entry->cl_id = stp->si_opaque.so_clid.cl_id; | ||
| 24 | __entry->si_id = stp->si_opaque.so_id; | ||
| 25 | __entry->si_generation = stp->si_generation; | ||
| 26 | ), | ||
| 27 | TP_printk("client %08x:%08x stateid %08x:%08x", | ||
| 28 | __entry->cl_boot, | ||
| 29 | __entry->cl_id, | ||
| 30 | __entry->si_id, | ||
| 31 | __entry->si_generation) | ||
| 32 | ) | ||
| 33 | |||
| 34 | #define DEFINE_STATEID_EVENT(name) \ | ||
| 35 | DEFINE_EVENT(nfsd_stateid_class, name, \ | ||
| 36 | TP_PROTO(stateid_t *stp), \ | ||
| 37 | TP_ARGS(stp)) | ||
| 38 | DEFINE_STATEID_EVENT(layoutstate_alloc); | ||
| 39 | DEFINE_STATEID_EVENT(layoutstate_unhash); | ||
| 40 | DEFINE_STATEID_EVENT(layoutstate_free); | ||
| 41 | DEFINE_STATEID_EVENT(layout_get_lookup_fail); | ||
| 42 | DEFINE_STATEID_EVENT(layout_commit_lookup_fail); | ||
| 43 | DEFINE_STATEID_EVENT(layout_return_lookup_fail); | ||
| 44 | DEFINE_STATEID_EVENT(layout_recall); | ||
| 45 | DEFINE_STATEID_EVENT(layout_recall_done); | ||
| 46 | DEFINE_STATEID_EVENT(layout_recall_fail); | ||
| 47 | DEFINE_STATEID_EVENT(layout_recall_release); | ||
| 48 | |||
| 49 | #endif /* _NFSD_TRACE_H */ | ||
| 50 | |||
| 51 | #undef TRACE_INCLUDE_PATH | ||
| 52 | #define TRACE_INCLUDE_PATH . | ||
| 53 | #define TRACE_INCLUDE_FILE trace | ||
| 54 | #include <trace/define_trace.h> | ||
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 90a5925bd6ab..0bda93e58e1b 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h | |||
| @@ -428,6 +428,61 @@ struct nfsd4_reclaim_complete { | |||
| 428 | u32 rca_one_fs; | 428 | u32 rca_one_fs; |
| 429 | }; | 429 | }; |
| 430 | 430 | ||
| 431 | struct nfsd4_deviceid { | ||
| 432 | u64 fsid_idx; | ||
| 433 | u32 generation; | ||
| 434 | u32 pad; | ||
| 435 | }; | ||
| 436 | |||
| 437 | struct nfsd4_layout_seg { | ||
| 438 | u32 iomode; | ||
| 439 | u64 offset; | ||
| 440 | u64 length; | ||
| 441 | }; | ||
| 442 | |||
| 443 | struct nfsd4_getdeviceinfo { | ||
| 444 | struct nfsd4_deviceid gd_devid; /* request */ | ||
| 445 | u32 gd_layout_type; /* request */ | ||
| 446 | u32 gd_maxcount; /* request */ | ||
| 447 | u32 gd_notify_types;/* request - response */ | ||
| 448 | void *gd_device; /* response */ | ||
| 449 | }; | ||
| 450 | |||
| 451 | struct nfsd4_layoutget { | ||
| 452 | u64 lg_minlength; /* request */ | ||
| 453 | u32 lg_signal; /* request */ | ||
| 454 | u32 lg_layout_type; /* request */ | ||
| 455 | u32 lg_maxcount; /* request */ | ||
| 456 | stateid_t lg_sid; /* request/response */ | ||
| 457 | struct nfsd4_layout_seg lg_seg; /* request/response */ | ||
| 458 | void *lg_content; /* response */ | ||
| 459 | }; | ||
| 460 | |||
| 461 | struct nfsd4_layoutcommit { | ||
| 462 | stateid_t lc_sid; /* request */ | ||
| 463 | struct nfsd4_layout_seg lc_seg; /* request */ | ||
| 464 | u32 lc_reclaim; /* request */ | ||
| 465 | u32 lc_newoffset; /* request */ | ||
| 466 | u64 lc_last_wr; /* request */ | ||
| 467 | struct timespec lc_mtime; /* request */ | ||
| 468 | u32 lc_layout_type; /* request */ | ||
| 469 | u32 lc_up_len; /* layout length */ | ||
| 470 | void *lc_up_layout; /* decoded by callback */ | ||
| 471 | u32 lc_size_chg; /* boolean for response */ | ||
| 472 | u64 lc_newsize; /* response */ | ||
| 473 | }; | ||
| 474 | |||
| 475 | struct nfsd4_layoutreturn { | ||
| 476 | u32 lr_return_type; /* request */ | ||
| 477 | u32 lr_layout_type; /* request */ | ||
| 478 | struct nfsd4_layout_seg lr_seg; /* request */ | ||
| 479 | u32 lr_reclaim; /* request */ | ||
| 480 | u32 lrf_body_len; /* request */ | ||
| 481 | void *lrf_body; /* request */ | ||
| 482 | stateid_t lr_sid; /* request/response */ | ||
| 483 | u32 lrs_present; /* response */ | ||
| 484 | }; | ||
| 485 | |||
| 431 | struct nfsd4_fallocate { | 486 | struct nfsd4_fallocate { |
| 432 | /* request */ | 487 | /* request */ |
| 433 | stateid_t falloc_stateid; | 488 | stateid_t falloc_stateid; |
| @@ -491,6 +546,10 @@ struct nfsd4_op { | |||
| 491 | struct nfsd4_reclaim_complete reclaim_complete; | 546 | struct nfsd4_reclaim_complete reclaim_complete; |
| 492 | struct nfsd4_test_stateid test_stateid; | 547 | struct nfsd4_test_stateid test_stateid; |
| 493 | struct nfsd4_free_stateid free_stateid; | 548 | struct nfsd4_free_stateid free_stateid; |
| 549 | struct nfsd4_getdeviceinfo getdeviceinfo; | ||
| 550 | struct nfsd4_layoutget layoutget; | ||
| 551 | struct nfsd4_layoutcommit layoutcommit; | ||
| 552 | struct nfsd4_layoutreturn layoutreturn; | ||
| 494 | 553 | ||
| 495 | /* NFSv4.2 */ | 554 | /* NFSv4.2 */ |
| 496 | struct nfsd4_fallocate allocate; | 555 | struct nfsd4_fallocate allocate; |
diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h index c5c55dfb91a9..c47f6fdb111a 100644 --- a/fs/nfsd/xdr4cb.h +++ b/fs/nfsd/xdr4cb.h | |||
| @@ -21,3 +21,10 @@ | |||
| 21 | #define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \ | 21 | #define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \ |
| 22 | cb_sequence_dec_sz + \ | 22 | cb_sequence_dec_sz + \ |
| 23 | op_dec_sz) | 23 | op_dec_sz) |
| 24 | #define NFS4_enc_cb_layout_sz (cb_compound_enc_hdr_sz + \ | ||
| 25 | cb_sequence_enc_sz + \ | ||
| 26 | 1 + 3 + \ | ||
| 27 | enc_nfs4_fh_sz + 4) | ||
| 28 | #define NFS4_dec_cb_layout_sz (cb_compound_dec_hdr_sz + \ | ||
| 29 | cb_sequence_dec_sz + \ | ||
| 30 | op_dec_sz) | ||
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 41b223a59a63..fa05e04c5531 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
| 5 | 5 | ||
| 6 | struct dentry; | 6 | struct dentry; |
| 7 | struct iattr; | ||
| 7 | struct inode; | 8 | struct inode; |
| 8 | struct super_block; | 9 | struct super_block; |
| 9 | struct vfsmount; | 10 | struct vfsmount; |
| @@ -180,6 +181,21 @@ struct fid { | |||
| 180 | * get_name is not (which is possibly inconsistent) | 181 | * get_name is not (which is possibly inconsistent) |
| 181 | */ | 182 | */ |
| 182 | 183 | ||
| 184 | /* types of block ranges for multipage write mappings. */ | ||
| 185 | #define IOMAP_HOLE 0x01 /* no blocks allocated, need allocation */ | ||
| 186 | #define IOMAP_DELALLOC 0x02 /* delayed allocation blocks */ | ||
| 187 | #define IOMAP_MAPPED 0x03 /* blocks allocated @blkno */ | ||
| 188 | #define IOMAP_UNWRITTEN 0x04 /* blocks allocated @blkno in unwritten state */ | ||
| 189 | |||
| 190 | #define IOMAP_NULL_BLOCK -1LL /* blkno is not valid */ | ||
| 191 | |||
| 192 | struct iomap { | ||
| 193 | sector_t blkno; /* first sector of mapping */ | ||
| 194 | loff_t offset; /* file offset of mapping, bytes */ | ||
| 195 | u64 length; /* length of mapping, bytes */ | ||
| 196 | int type; /* type of mapping */ | ||
| 197 | }; | ||
| 198 | |||
| 183 | struct export_operations { | 199 | struct export_operations { |
| 184 | int (*encode_fh)(struct inode *inode, __u32 *fh, int *max_len, | 200 | int (*encode_fh)(struct inode *inode, __u32 *fh, int *max_len, |
| 185 | struct inode *parent); | 201 | struct inode *parent); |
| @@ -191,6 +207,13 @@ struct export_operations { | |||
| 191 | struct dentry *child); | 207 | struct dentry *child); |
| 192 | struct dentry * (*get_parent)(struct dentry *child); | 208 | struct dentry * (*get_parent)(struct dentry *child); |
| 193 | int (*commit_metadata)(struct inode *inode); | 209 | int (*commit_metadata)(struct inode *inode); |
| 210 | |||
| 211 | int (*get_uuid)(struct super_block *sb, u8 *buf, u32 *len, u64 *offset); | ||
| 212 | int (*map_blocks)(struct inode *inode, loff_t offset, | ||
| 213 | u64 len, struct iomap *iomap, | ||
| 214 | bool write, u32 *device_generation); | ||
| 215 | int (*commit_blocks)(struct inode *inode, struct iomap *iomaps, | ||
| 216 | int nr_iomaps, struct iattr *iattr); | ||
| 194 | }; | 217 | }; |
| 195 | 218 | ||
| 196 | extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid, | 219 | extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid, |
diff --git a/include/linux/fs.h b/include/linux/fs.h index f125b88443bd..cdcb1e9d9613 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
| @@ -873,6 +873,7 @@ static inline struct file *get_file(struct file *f) | |||
| 873 | #define FL_DOWNGRADE_PENDING 256 /* Lease is being downgraded */ | 873 | #define FL_DOWNGRADE_PENDING 256 /* Lease is being downgraded */ |
| 874 | #define FL_UNLOCK_PENDING 512 /* Lease is being broken */ | 874 | #define FL_UNLOCK_PENDING 512 /* Lease is being broken */ |
| 875 | #define FL_OFDLCK 1024 /* lock is "owned" by struct file */ | 875 | #define FL_OFDLCK 1024 /* lock is "owned" by struct file */ |
| 876 | #define FL_LAYOUT 2048 /* outstanding pNFS layout */ | ||
| 876 | 877 | ||
| 877 | /* | 878 | /* |
| 878 | * Special return value from posix_lock_file() and vfs_lock_file() for | 879 | * Special return value from posix_lock_file() and vfs_lock_file() for |
| @@ -2035,6 +2036,16 @@ static inline int break_deleg_wait(struct inode **delegated_inode) | |||
| 2035 | return ret; | 2036 | return ret; |
| 2036 | } | 2037 | } |
| 2037 | 2038 | ||
| 2039 | static inline int break_layout(struct inode *inode, bool wait) | ||
| 2040 | { | ||
| 2041 | smp_mb(); | ||
| 2042 | if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease)) | ||
| 2043 | return __break_lease(inode, | ||
| 2044 | wait ? O_WRONLY : O_WRONLY | O_NONBLOCK, | ||
| 2045 | FL_LAYOUT); | ||
| 2046 | return 0; | ||
| 2047 | } | ||
| 2048 | |||
| 2038 | #else /* !CONFIG_FILE_LOCKING */ | 2049 | #else /* !CONFIG_FILE_LOCKING */ |
| 2039 | static inline int locks_mandatory_locked(struct file *file) | 2050 | static inline int locks_mandatory_locked(struct file *file) |
| 2040 | { | 2051 | { |
| @@ -2090,6 +2101,11 @@ static inline int break_deleg_wait(struct inode **delegated_inode) | |||
| 2090 | return 0; | 2101 | return 0; |
| 2091 | } | 2102 | } |
| 2092 | 2103 | ||
| 2104 | static inline int break_layout(struct inode *inode, bool wait) | ||
| 2105 | { | ||
| 2106 | return 0; | ||
| 2107 | } | ||
| 2108 | |||
| 2093 | #endif /* CONFIG_FILE_LOCKING */ | 2109 | #endif /* CONFIG_FILE_LOCKING */ |
| 2094 | 2110 | ||
| 2095 | /* fs/open.c */ | 2111 | /* fs/open.c */ |
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index de7c91ca427e..ed43cb74b11d 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h | |||
| @@ -411,6 +411,7 @@ enum lock_type4 { | |||
| 411 | #define FATTR4_WORD1_TIME_MODIFY_SET (1UL << 22) | 411 | #define FATTR4_WORD1_TIME_MODIFY_SET (1UL << 22) |
| 412 | #define FATTR4_WORD1_MOUNTED_ON_FILEID (1UL << 23) | 412 | #define FATTR4_WORD1_MOUNTED_ON_FILEID (1UL << 23) |
| 413 | #define FATTR4_WORD1_FS_LAYOUT_TYPES (1UL << 30) | 413 | #define FATTR4_WORD1_FS_LAYOUT_TYPES (1UL << 30) |
| 414 | #define FATTR4_WORD2_LAYOUT_TYPES (1UL << 0) | ||
| 414 | #define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1) | 415 | #define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1) |
| 415 | #define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4) | 416 | #define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4) |
| 416 | #define FATTR4_WORD2_SECURITY_LABEL (1UL << 16) | 417 | #define FATTR4_WORD2_SECURITY_LABEL (1UL << 16) |
| @@ -517,6 +518,7 @@ enum pnfs_layouttype { | |||
| 517 | LAYOUT_OSD2_OBJECTS = 2, | 518 | LAYOUT_OSD2_OBJECTS = 2, |
| 518 | LAYOUT_BLOCK_VOLUME = 3, | 519 | LAYOUT_BLOCK_VOLUME = 3, |
| 519 | LAYOUT_FLEX_FILES = 4, | 520 | LAYOUT_FLEX_FILES = 4, |
| 521 | LAYOUT_TYPE_MAX | ||
| 520 | }; | 522 | }; |
| 521 | 523 | ||
| 522 | /* used for both layout return and recall */ | 524 | /* used for both layout return and recall */ |
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 6f22cfeef5e3..fae6fb947fc8 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h | |||
| @@ -110,7 +110,7 @@ struct svc_serv { | |||
| 110 | * We use sv_nrthreads as a reference count. svc_destroy() drops | 110 | * We use sv_nrthreads as a reference count. svc_destroy() drops |
| 111 | * this refcount, so we need to bump it up around operations that | 111 | * this refcount, so we need to bump it up around operations that |
| 112 | * change the number of threads. Horrible, but there it is. | 112 | * change the number of threads. Horrible, but there it is. |
| 113 | * Should be called with the BKL held. | 113 | * Should be called with the "service mutex" held. |
| 114 | */ | 114 | */ |
| 115 | static inline void svc_get(struct svc_serv *serv) | 115 | static inline void svc_get(struct svc_serv *serv) |
| 116 | { | 116 | { |
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index ddfe88f52219..df8edf8ec914 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h | |||
| @@ -77,6 +77,7 @@ struct svc_rdma_op_ctxt { | |||
| 77 | enum ib_wr_opcode wr_op; | 77 | enum ib_wr_opcode wr_op; |
| 78 | enum ib_wc_status wc_status; | 78 | enum ib_wc_status wc_status; |
| 79 | u32 byte_len; | 79 | u32 byte_len; |
| 80 | u32 position; | ||
| 80 | struct svcxprt_rdma *xprt; | 81 | struct svcxprt_rdma *xprt; |
| 81 | unsigned long flags; | 82 | unsigned long flags; |
| 82 | enum dma_data_direction direction; | 83 | enum dma_data_direction direction; |
| @@ -148,6 +149,10 @@ struct svcxprt_rdma { | |||
| 148 | struct ib_cq *sc_rq_cq; | 149 | struct ib_cq *sc_rq_cq; |
| 149 | struct ib_cq *sc_sq_cq; | 150 | struct ib_cq *sc_sq_cq; |
| 150 | struct ib_mr *sc_phys_mr; /* MR for server memory */ | 151 | struct ib_mr *sc_phys_mr; /* MR for server memory */ |
| 152 | int (*sc_reader)(struct svcxprt_rdma *, | ||
| 153 | struct svc_rqst *, | ||
| 154 | struct svc_rdma_op_ctxt *, | ||
| 155 | int *, u32 *, u32, u32, u64, bool); | ||
| 151 | u32 sc_dev_caps; /* distilled device caps */ | 156 | u32 sc_dev_caps; /* distilled device caps */ |
| 152 | u32 sc_dma_lkey; /* local dma key */ | 157 | u32 sc_dma_lkey; /* local dma key */ |
| 153 | unsigned int sc_frmr_pg_list_len; | 158 | unsigned int sc_frmr_pg_list_len; |
| @@ -176,8 +181,6 @@ struct svcxprt_rdma { | |||
| 176 | #define RPCRDMA_MAX_REQ_SIZE 4096 | 181 | #define RPCRDMA_MAX_REQ_SIZE 4096 |
| 177 | 182 | ||
| 178 | /* svc_rdma_marshal.c */ | 183 | /* svc_rdma_marshal.c */ |
| 179 | extern void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *, | ||
| 180 | int *, int *); | ||
| 181 | extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *); | 184 | extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *); |
| 182 | extern int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *); | 185 | extern int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *); |
| 183 | extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *, | 186 | extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *, |
| @@ -195,6 +198,12 @@ extern int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *); | |||
| 195 | 198 | ||
| 196 | /* svc_rdma_recvfrom.c */ | 199 | /* svc_rdma_recvfrom.c */ |
| 197 | extern int svc_rdma_recvfrom(struct svc_rqst *); | 200 | extern int svc_rdma_recvfrom(struct svc_rqst *); |
| 201 | extern int rdma_read_chunk_lcl(struct svcxprt_rdma *, struct svc_rqst *, | ||
| 202 | struct svc_rdma_op_ctxt *, int *, u32 *, | ||
| 203 | u32, u32, u64, bool); | ||
| 204 | extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *, | ||
| 205 | struct svc_rdma_op_ctxt *, int *, u32 *, | ||
| 206 | u32, u32, u64, bool); | ||
| 198 | 207 | ||
| 199 | /* svc_rdma_sendto.c */ | 208 | /* svc_rdma_sendto.c */ |
| 200 | extern int svc_rdma_sendto(struct svc_rqst *); | 209 | extern int svc_rdma_sendto(struct svc_rqst *); |
diff --git a/include/uapi/linux/nfsd/debug.h b/include/uapi/linux/nfsd/debug.h index 1fdc95bb2375..0bf130a1c58d 100644 --- a/include/uapi/linux/nfsd/debug.h +++ b/include/uapi/linux/nfsd/debug.h | |||
| @@ -32,6 +32,7 @@ | |||
| 32 | #define NFSDDBG_REPCACHE 0x0080 | 32 | #define NFSDDBG_REPCACHE 0x0080 |
| 33 | #define NFSDDBG_XDR 0x0100 | 33 | #define NFSDDBG_XDR 0x0100 |
| 34 | #define NFSDDBG_LOCKD 0x0200 | 34 | #define NFSDDBG_LOCKD 0x0200 |
| 35 | #define NFSDDBG_PNFS 0x0400 | ||
| 35 | #define NFSDDBG_ALL 0x7FFF | 36 | #define NFSDDBG_ALL 0x7FFF |
| 36 | #define NFSDDBG_NOCHANGE 0xFFFF | 37 | #define NFSDDBG_NOCHANGE 0xFFFF |
| 37 | 38 | ||
diff --git a/include/uapi/linux/nfsd/export.h b/include/uapi/linux/nfsd/export.h index 584b6ef3a5e8..4742f2cb42f2 100644 --- a/include/uapi/linux/nfsd/export.h +++ b/include/uapi/linux/nfsd/export.h | |||
| @@ -47,8 +47,10 @@ | |||
| 47 | * exported filesystem. | 47 | * exported filesystem. |
| 48 | */ | 48 | */ |
| 49 | #define NFSEXP_V4ROOT 0x10000 | 49 | #define NFSEXP_V4ROOT 0x10000 |
| 50 | #define NFSEXP_NOPNFS 0x20000 | ||
| 51 | |||
| 50 | /* All flags that we claim to support. (Note we don't support NOACL.) */ | 52 | /* All flags that we claim to support. (Note we don't support NOACL.) */ |
| 51 | #define NFSEXP_ALLFLAGS 0x1FE7F | 53 | #define NFSEXP_ALLFLAGS 0x3FE7F |
| 52 | 54 | ||
| 53 | /* The flags that may vary depending on security flavor: */ | 55 | /* The flags that may vary depending on security flavor: */ |
| 54 | #define NFSEXP_SECINFO_FLAGS (NFSEXP_READONLY | NFSEXP_ROOTSQUASH \ | 56 | #define NFSEXP_SECINFO_FLAGS (NFSEXP_READONLY | NFSEXP_ROOTSQUASH \ |
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 91eaef1844c8..78974e4d9ad2 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c | |||
| @@ -768,8 +768,8 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) | |||
| 768 | EXPORT_SYMBOL_GPL(svc_set_num_threads); | 768 | EXPORT_SYMBOL_GPL(svc_set_num_threads); |
| 769 | 769 | ||
| 770 | /* | 770 | /* |
| 771 | * Called from a server thread as it's exiting. Caller must hold the BKL or | 771 | * Called from a server thread as it's exiting. Caller must hold the "service |
| 772 | * the "service mutex", whichever is appropriate for the service. | 772 | * mutex" for the service. |
| 773 | */ | 773 | */ |
| 774 | void | 774 | void |
| 775 | svc_exit_thread(struct svc_rqst *rqstp) | 775 | svc_exit_thread(struct svc_rqst *rqstp) |
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index c69358b3cf7f..163ac45c3639 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c | |||
| @@ -42,7 +42,7 @@ static LIST_HEAD(svc_xprt_class_list); | |||
| 42 | * svc_pool->sp_lock protects most of the fields of that pool. | 42 | * svc_pool->sp_lock protects most of the fields of that pool. |
| 43 | * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt. | 43 | * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt. |
| 44 | * when both need to be taken (rare), svc_serv->sv_lock is first. | 44 | * when both need to be taken (rare), svc_serv->sv_lock is first. |
| 45 | * BKL protects svc_serv->sv_nrthread. | 45 | * The "service mutex" protects svc_serv->sv_nrthread. |
| 46 | * svc_sock->sk_lock protects the svc_sock->sk_deferred list | 46 | * svc_sock->sk_lock protects the svc_sock->sk_deferred list |
| 47 | * and the ->sk_info_authunix cache. | 47 | * and the ->sk_info_authunix cache. |
| 48 | * | 48 | * |
| @@ -67,7 +67,6 @@ static LIST_HEAD(svc_xprt_class_list); | |||
| 67 | * that no other thread will be using the transport or will | 67 | * that no other thread will be using the transport or will |
| 68 | * try to set XPT_DEAD. | 68 | * try to set XPT_DEAD. |
| 69 | */ | 69 | */ |
| 70 | |||
| 71 | int svc_reg_xprt_class(struct svc_xprt_class *xcl) | 70 | int svc_reg_xprt_class(struct svc_xprt_class *xcl) |
| 72 | { | 71 | { |
| 73 | struct svc_xprt_class *cl; | 72 | struct svc_xprt_class *cl; |
diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c index 65b146297f5a..b681855cf970 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c +++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c | |||
| @@ -71,22 +71,6 @@ static u32 *decode_read_list(u32 *va, u32 *vaend) | |||
| 71 | } | 71 | } |
| 72 | 72 | ||
| 73 | /* | 73 | /* |
| 74 | * Determine number of chunks and total bytes in chunk list. The chunk | ||
| 75 | * list has already been verified to fit within the RPCRDMA header. | ||
| 76 | */ | ||
| 77 | void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *ch, | ||
| 78 | int *ch_count, int *byte_count) | ||
| 79 | { | ||
| 80 | /* compute the number of bytes represented by read chunks */ | ||
| 81 | *byte_count = 0; | ||
| 82 | *ch_count = 0; | ||
| 83 | for (; ch->rc_discrim != 0; ch++) { | ||
| 84 | *byte_count = *byte_count + ntohl(ch->rc_target.rs_length); | ||
| 85 | *ch_count = *ch_count + 1; | ||
| 86 | } | ||
| 87 | } | ||
| 88 | |||
| 89 | /* | ||
| 90 | * Decodes a write chunk list. The expected format is as follows: | 74 | * Decodes a write chunk list. The expected format is as follows: |
| 91 | * descrim : xdr_one | 75 | * descrim : xdr_one |
| 92 | * nchunks : <count> | 76 | * nchunks : <count> |
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index e0110270d650..f9f13a32ddb8 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | |||
| @@ -43,7 +43,6 @@ | |||
| 43 | #include <linux/sunrpc/debug.h> | 43 | #include <linux/sunrpc/debug.h> |
| 44 | #include <linux/sunrpc/rpc_rdma.h> | 44 | #include <linux/sunrpc/rpc_rdma.h> |
| 45 | #include <linux/spinlock.h> | 45 | #include <linux/spinlock.h> |
| 46 | #include <linux/highmem.h> | ||
| 47 | #include <asm/unaligned.h> | 46 | #include <asm/unaligned.h> |
| 48 | #include <rdma/ib_verbs.h> | 47 | #include <rdma/ib_verbs.h> |
| 49 | #include <rdma/rdma_cm.h> | 48 | #include <rdma/rdma_cm.h> |
| @@ -60,6 +59,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, | |||
| 60 | struct svc_rdma_op_ctxt *ctxt, | 59 | struct svc_rdma_op_ctxt *ctxt, |
| 61 | u32 byte_count) | 60 | u32 byte_count) |
| 62 | { | 61 | { |
| 62 | struct rpcrdma_msg *rmsgp; | ||
| 63 | struct page *page; | 63 | struct page *page; |
| 64 | u32 bc; | 64 | u32 bc; |
| 65 | int sge_no; | 65 | int sge_no; |
| @@ -82,7 +82,14 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, | |||
| 82 | /* If data remains, store it in the pagelist */ | 82 | /* If data remains, store it in the pagelist */ |
| 83 | rqstp->rq_arg.page_len = bc; | 83 | rqstp->rq_arg.page_len = bc; |
| 84 | rqstp->rq_arg.page_base = 0; | 84 | rqstp->rq_arg.page_base = 0; |
| 85 | rqstp->rq_arg.pages = &rqstp->rq_pages[1]; | 85 | |
| 86 | /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */ | ||
| 87 | rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base; | ||
| 88 | if (be32_to_cpu(rmsgp->rm_type) == RDMA_NOMSG) | ||
| 89 | rqstp->rq_arg.pages = &rqstp->rq_pages[0]; | ||
| 90 | else | ||
| 91 | rqstp->rq_arg.pages = &rqstp->rq_pages[1]; | ||
| 92 | |||
| 86 | sge_no = 1; | 93 | sge_no = 1; |
| 87 | while (bc && sge_no < ctxt->count) { | 94 | while (bc && sge_no < ctxt->count) { |
| 88 | page = ctxt->pages[sge_no]; | 95 | page = ctxt->pages[sge_no]; |
| @@ -95,14 +102,6 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, | |||
| 95 | rqstp->rq_respages = &rqstp->rq_pages[sge_no]; | 102 | rqstp->rq_respages = &rqstp->rq_pages[sge_no]; |
| 96 | rqstp->rq_next_page = rqstp->rq_respages + 1; | 103 | rqstp->rq_next_page = rqstp->rq_respages + 1; |
| 97 | 104 | ||
| 98 | /* We should never run out of SGE because the limit is defined to | ||
| 99 | * support the max allowed RPC data length | ||
| 100 | */ | ||
| 101 | BUG_ON(bc && (sge_no == ctxt->count)); | ||
| 102 | BUG_ON((rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len) | ||
| 103 | != byte_count); | ||
| 104 | BUG_ON(rqstp->rq_arg.len != byte_count); | ||
| 105 | |||
| 106 | /* If not all pages were used from the SGL, free the remaining ones */ | 105 | /* If not all pages were used from the SGL, free the remaining ones */ |
| 107 | bc = sge_no; | 106 | bc = sge_no; |
| 108 | while (sge_no < ctxt->count) { | 107 | while (sge_no < ctxt->count) { |
| @@ -125,26 +124,16 @@ static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) | |||
| 125 | return min_t(int, sge_count, xprt->sc_max_sge); | 124 | return min_t(int, sge_count, xprt->sc_max_sge); |
| 126 | } | 125 | } |
| 127 | 126 | ||
| 128 | typedef int (*rdma_reader_fn)(struct svcxprt_rdma *xprt, | ||
| 129 | struct svc_rqst *rqstp, | ||
| 130 | struct svc_rdma_op_ctxt *head, | ||
| 131 | int *page_no, | ||
| 132 | u32 *page_offset, | ||
| 133 | u32 rs_handle, | ||
| 134 | u32 rs_length, | ||
| 135 | u64 rs_offset, | ||
| 136 | int last); | ||
| 137 | |||
| 138 | /* Issue an RDMA_READ using the local lkey to map the data sink */ | 127 | /* Issue an RDMA_READ using the local lkey to map the data sink */ |
| 139 | static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, | 128 | int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, |
| 140 | struct svc_rqst *rqstp, | 129 | struct svc_rqst *rqstp, |
| 141 | struct svc_rdma_op_ctxt *head, | 130 | struct svc_rdma_op_ctxt *head, |
| 142 | int *page_no, | 131 | int *page_no, |
| 143 | u32 *page_offset, | 132 | u32 *page_offset, |
| 144 | u32 rs_handle, | 133 | u32 rs_handle, |
| 145 | u32 rs_length, | 134 | u32 rs_length, |
| 146 | u64 rs_offset, | 135 | u64 rs_offset, |
| 147 | int last) | 136 | bool last) |
| 148 | { | 137 | { |
| 149 | struct ib_send_wr read_wr; | 138 | struct ib_send_wr read_wr; |
| 150 | int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; | 139 | int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; |
| @@ -229,15 +218,15 @@ static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, | |||
| 229 | } | 218 | } |
| 230 | 219 | ||
| 231 | /* Issue an RDMA_READ using an FRMR to map the data sink */ | 220 | /* Issue an RDMA_READ using an FRMR to map the data sink */ |
| 232 | static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, | 221 | int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, |
| 233 | struct svc_rqst *rqstp, | 222 | struct svc_rqst *rqstp, |
| 234 | struct svc_rdma_op_ctxt *head, | 223 | struct svc_rdma_op_ctxt *head, |
| 235 | int *page_no, | 224 | int *page_no, |
| 236 | u32 *page_offset, | 225 | u32 *page_offset, |
| 237 | u32 rs_handle, | 226 | u32 rs_handle, |
| 238 | u32 rs_length, | 227 | u32 rs_length, |
| 239 | u64 rs_offset, | 228 | u64 rs_offset, |
| 240 | int last) | 229 | bool last) |
| 241 | { | 230 | { |
| 242 | struct ib_send_wr read_wr; | 231 | struct ib_send_wr read_wr; |
| 243 | struct ib_send_wr inv_wr; | 232 | struct ib_send_wr inv_wr; |
| @@ -365,24 +354,84 @@ static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, | |||
| 365 | return ret; | 354 | return ret; |
| 366 | } | 355 | } |
| 367 | 356 | ||
| 357 | static unsigned int | ||
| 358 | rdma_rcl_chunk_count(struct rpcrdma_read_chunk *ch) | ||
| 359 | { | ||
| 360 | unsigned int count; | ||
| 361 | |||
| 362 | for (count = 0; ch->rc_discrim != xdr_zero; ch++) | ||
| 363 | count++; | ||
| 364 | return count; | ||
| 365 | } | ||
| 366 | |||
| 367 | /* If there was additional inline content, append it to the end of arg.pages. | ||
| 368 | * Tail copy has to be done after the reader function has determined how many | ||
| 369 | * pages are needed for RDMA READ. | ||
| 370 | */ | ||
| 371 | static int | ||
| 372 | rdma_copy_tail(struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head, | ||
| 373 | u32 position, u32 byte_count, u32 page_offset, int page_no) | ||
| 374 | { | ||
| 375 | char *srcp, *destp; | ||
| 376 | int ret; | ||
| 377 | |||
| 378 | ret = 0; | ||
| 379 | srcp = head->arg.head[0].iov_base + position; | ||
| 380 | byte_count = head->arg.head[0].iov_len - position; | ||
| 381 | if (byte_count > PAGE_SIZE) { | ||
| 382 | dprintk("svcrdma: large tail unsupported\n"); | ||
| 383 | return 0; | ||
| 384 | } | ||
| 385 | |||
| 386 | /* Fit as much of the tail on the current page as possible */ | ||
| 387 | if (page_offset != PAGE_SIZE) { | ||
| 388 | destp = page_address(rqstp->rq_arg.pages[page_no]); | ||
| 389 | destp += page_offset; | ||
| 390 | while (byte_count--) { | ||
| 391 | *destp++ = *srcp++; | ||
| 392 | page_offset++; | ||
| 393 | if (page_offset == PAGE_SIZE && byte_count) | ||
| 394 | goto more; | ||
| 395 | } | ||
| 396 | goto done; | ||
| 397 | } | ||
| 398 | |||
| 399 | more: | ||
| 400 | /* Fit the rest on the next page */ | ||
| 401 | page_no++; | ||
| 402 | destp = page_address(rqstp->rq_arg.pages[page_no]); | ||
| 403 | while (byte_count--) | ||
| 404 | *destp++ = *srcp++; | ||
| 405 | |||
| 406 | rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1]; | ||
| 407 | rqstp->rq_next_page = rqstp->rq_respages + 1; | ||
| 408 | |||
| 409 | done: | ||
| 410 | byte_count = head->arg.head[0].iov_len - position; | ||
| 411 | head->arg.page_len += byte_count; | ||
| 412 | head->arg.len += byte_count; | ||
| 413 | head->arg.buflen += byte_count; | ||
| 414 | return 1; | ||
| 415 | } | ||
| 416 | |||
| 368 | static int rdma_read_chunks(struct svcxprt_rdma *xprt, | 417 | static int rdma_read_chunks(struct svcxprt_rdma *xprt, |
| 369 | struct rpcrdma_msg *rmsgp, | 418 | struct rpcrdma_msg *rmsgp, |
| 370 | struct svc_rqst *rqstp, | 419 | struct svc_rqst *rqstp, |
| 371 | struct svc_rdma_op_ctxt *head) | 420 | struct svc_rdma_op_ctxt *head) |
| 372 | { | 421 | { |
| 373 | int page_no, ch_count, ret; | 422 | int page_no, ret; |
| 374 | struct rpcrdma_read_chunk *ch; | 423 | struct rpcrdma_read_chunk *ch; |
| 375 | u32 page_offset, byte_count; | 424 | u32 handle, page_offset, byte_count; |
| 425 | u32 position; | ||
| 376 | u64 rs_offset; | 426 | u64 rs_offset; |
| 377 | rdma_reader_fn reader; | 427 | bool last; |
| 378 | 428 | ||
| 379 | /* If no read list is present, return 0 */ | 429 | /* If no read list is present, return 0 */ |
| 380 | ch = svc_rdma_get_read_chunk(rmsgp); | 430 | ch = svc_rdma_get_read_chunk(rmsgp); |
| 381 | if (!ch) | 431 | if (!ch) |
| 382 | return 0; | 432 | return 0; |
| 383 | 433 | ||
| 384 | svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); | 434 | if (rdma_rcl_chunk_count(ch) > RPCSVC_MAXPAGES) |
| 385 | if (ch_count > RPCSVC_MAXPAGES) | ||
| 386 | return -EINVAL; | 435 | return -EINVAL; |
| 387 | 436 | ||
| 388 | /* The request is completed when the RDMA_READs complete. The | 437 | /* The request is completed when the RDMA_READs complete. The |
| @@ -391,34 +440,41 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt, | |||
| 391 | */ | 440 | */ |
| 392 | head->arg.head[0] = rqstp->rq_arg.head[0]; | 441 | head->arg.head[0] = rqstp->rq_arg.head[0]; |
| 393 | head->arg.tail[0] = rqstp->rq_arg.tail[0]; | 442 | head->arg.tail[0] = rqstp->rq_arg.tail[0]; |
| 394 | head->arg.pages = &head->pages[head->count]; | ||
| 395 | head->hdr_count = head->count; | 443 | head->hdr_count = head->count; |
| 396 | head->arg.page_base = 0; | 444 | head->arg.page_base = 0; |
| 397 | head->arg.page_len = 0; | 445 | head->arg.page_len = 0; |
| 398 | head->arg.len = rqstp->rq_arg.len; | 446 | head->arg.len = rqstp->rq_arg.len; |
| 399 | head->arg.buflen = rqstp->rq_arg.buflen; | 447 | head->arg.buflen = rqstp->rq_arg.buflen; |
| 400 | 448 | ||
| 401 | /* Use FRMR if supported */ | 449 | ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; |
| 402 | if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG) | 450 | position = be32_to_cpu(ch->rc_position); |
| 403 | reader = rdma_read_chunk_frmr; | 451 | |
| 404 | else | 452 | /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */ |
| 405 | reader = rdma_read_chunk_lcl; | 453 | if (position == 0) { |
| 454 | head->arg.pages = &head->pages[0]; | ||
| 455 | page_offset = head->byte_len; | ||
| 456 | } else { | ||
| 457 | head->arg.pages = &head->pages[head->count]; | ||
| 458 | page_offset = 0; | ||
| 459 | } | ||
| 406 | 460 | ||
| 407 | page_no = 0; page_offset = 0; | 461 | ret = 0; |
| 408 | for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; | 462 | page_no = 0; |
| 409 | ch->rc_discrim != 0; ch++) { | 463 | for (; ch->rc_discrim != xdr_zero; ch++) { |
| 464 | if (be32_to_cpu(ch->rc_position) != position) | ||
| 465 | goto err; | ||
| 410 | 466 | ||
| 467 | handle = be32_to_cpu(ch->rc_target.rs_handle), | ||
| 468 | byte_count = be32_to_cpu(ch->rc_target.rs_length); | ||
| 411 | xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset, | 469 | xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset, |
| 412 | &rs_offset); | 470 | &rs_offset); |
| 413 | byte_count = ntohl(ch->rc_target.rs_length); | ||
| 414 | 471 | ||
| 415 | while (byte_count > 0) { | 472 | while (byte_count > 0) { |
| 416 | ret = reader(xprt, rqstp, head, | 473 | last = (ch + 1)->rc_discrim == xdr_zero; |
| 417 | &page_no, &page_offset, | 474 | ret = xprt->sc_reader(xprt, rqstp, head, |
| 418 | ntohl(ch->rc_target.rs_handle), | 475 | &page_no, &page_offset, |
| 419 | byte_count, rs_offset, | 476 | handle, byte_count, |
| 420 | ((ch+1)->rc_discrim == 0) /* last */ | 477 | rs_offset, last); |
| 421 | ); | ||
| 422 | if (ret < 0) | 478 | if (ret < 0) |
| 423 | goto err; | 479 | goto err; |
| 424 | byte_count -= ret; | 480 | byte_count -= ret; |
| @@ -426,7 +482,24 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt, | |||
| 426 | head->arg.buflen += ret; | 482 | head->arg.buflen += ret; |
| 427 | } | 483 | } |
| 428 | } | 484 | } |
| 485 | |||
| 486 | /* Read list may need XDR round-up (see RFC 5666, s. 3.7) */ | ||
| 487 | if (page_offset & 3) { | ||
| 488 | u32 pad = 4 - (page_offset & 3); | ||
| 489 | |||
| 490 | head->arg.page_len += pad; | ||
| 491 | head->arg.len += pad; | ||
| 492 | head->arg.buflen += pad; | ||
| 493 | page_offset += pad; | ||
| 494 | } | ||
| 495 | |||
| 429 | ret = 1; | 496 | ret = 1; |
| 497 | if (position && position < head->arg.head[0].iov_len) | ||
| 498 | ret = rdma_copy_tail(rqstp, head, position, | ||
| 499 | byte_count, page_offset, page_no); | ||
| 500 | head->arg.head[0].iov_len = position; | ||
| 501 | head->position = position; | ||
| 502 | |||
| 430 | err: | 503 | err: |
| 431 | /* Detach arg pages. svc_recv will replenish them */ | 504 | /* Detach arg pages. svc_recv will replenish them */ |
| 432 | for (page_no = 0; | 505 | for (page_no = 0; |
| @@ -436,47 +509,33 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt, | |||
| 436 | return ret; | 509 | return ret; |
| 437 | } | 510 | } |
| 438 | 511 | ||
| 439 | /* | ||
| 440 | * To avoid a separate RDMA READ just for a handful of zero bytes, | ||
| 441 | * RFC 5666 section 3.7 allows the client to omit the XDR zero pad | ||
| 442 | * in chunk lists. | ||
| 443 | */ | ||
| 444 | static void | ||
| 445 | rdma_fix_xdr_pad(struct xdr_buf *buf) | ||
| 446 | { | ||
| 447 | unsigned int page_len = buf->page_len; | ||
| 448 | unsigned int size = (XDR_QUADLEN(page_len) << 2) - page_len; | ||
| 449 | unsigned int offset, pg_no; | ||
| 450 | char *p; | ||
| 451 | |||
| 452 | if (size == 0) | ||
| 453 | return; | ||
| 454 | |||
| 455 | pg_no = page_len >> PAGE_SHIFT; | ||
| 456 | offset = page_len & ~PAGE_MASK; | ||
| 457 | p = page_address(buf->pages[pg_no]); | ||
| 458 | memset(p + offset, 0, size); | ||
| 459 | |||
| 460 | buf->page_len += size; | ||
| 461 | buf->buflen += size; | ||
| 462 | buf->len += size; | ||
| 463 | } | ||
| 464 | |||
| 465 | static int rdma_read_complete(struct svc_rqst *rqstp, | 512 | static int rdma_read_complete(struct svc_rqst *rqstp, |
| 466 | struct svc_rdma_op_ctxt *head) | 513 | struct svc_rdma_op_ctxt *head) |
| 467 | { | 514 | { |
| 468 | int page_no; | 515 | int page_no; |
| 469 | int ret; | 516 | int ret; |
| 470 | 517 | ||
| 471 | BUG_ON(!head); | ||
| 472 | |||
| 473 | /* Copy RPC pages */ | 518 | /* Copy RPC pages */ |
| 474 | for (page_no = 0; page_no < head->count; page_no++) { | 519 | for (page_no = 0; page_no < head->count; page_no++) { |
| 475 | put_page(rqstp->rq_pages[page_no]); | 520 | put_page(rqstp->rq_pages[page_no]); |
| 476 | rqstp->rq_pages[page_no] = head->pages[page_no]; | 521 | rqstp->rq_pages[page_no] = head->pages[page_no]; |
| 477 | } | 522 | } |
| 523 | |||
| 524 | /* Adjustments made for RDMA_NOMSG type requests */ | ||
| 525 | if (head->position == 0) { | ||
| 526 | if (head->arg.len <= head->sge[0].length) { | ||
| 527 | head->arg.head[0].iov_len = head->arg.len - | ||
| 528 | head->byte_len; | ||
| 529 | head->arg.page_len = 0; | ||
| 530 | } else { | ||
| 531 | head->arg.head[0].iov_len = head->sge[0].length - | ||
| 532 | head->byte_len; | ||
| 533 | head->arg.page_len = head->arg.len - | ||
| 534 | head->sge[0].length; | ||
| 535 | } | ||
| 536 | } | ||
| 537 | |||
| 478 | /* Point rq_arg.pages past header */ | 538 | /* Point rq_arg.pages past header */ |
| 479 | rdma_fix_xdr_pad(&head->arg); | ||
| 480 | rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count]; | 539 | rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count]; |
| 481 | rqstp->rq_arg.page_len = head->arg.page_len; | 540 | rqstp->rq_arg.page_len = head->arg.page_len; |
| 482 | rqstp->rq_arg.page_base = head->arg.page_base; | 541 | rqstp->rq_arg.page_base = head->arg.page_base; |
| @@ -501,8 +560,8 @@ static int rdma_read_complete(struct svc_rqst *rqstp, | |||
| 501 | ret = rqstp->rq_arg.head[0].iov_len | 560 | ret = rqstp->rq_arg.head[0].iov_len |
| 502 | + rqstp->rq_arg.page_len | 561 | + rqstp->rq_arg.page_len |
| 503 | + rqstp->rq_arg.tail[0].iov_len; | 562 | + rqstp->rq_arg.tail[0].iov_len; |
| 504 | dprintk("svcrdma: deferred read ret=%d, rq_arg.len =%d, " | 563 | dprintk("svcrdma: deferred read ret=%d, rq_arg.len=%u, " |
| 505 | "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n", | 564 | "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len=%zu\n", |
| 506 | ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base, | 565 | ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base, |
| 507 | rqstp->rq_arg.head[0].iov_len); | 566 | rqstp->rq_arg.head[0].iov_len); |
| 508 | 567 | ||
| @@ -558,7 +617,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) | |||
| 558 | } | 617 | } |
| 559 | dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n", | 618 | dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n", |
| 560 | ctxt, rdma_xprt, rqstp, ctxt->wc_status); | 619 | ctxt, rdma_xprt, rqstp, ctxt->wc_status); |
| 561 | BUG_ON(ctxt->wc_status != IB_WC_SUCCESS); | ||
| 562 | atomic_inc(&rdma_stat_recv); | 620 | atomic_inc(&rdma_stat_recv); |
| 563 | 621 | ||
| 564 | /* Build up the XDR from the receive buffers. */ | 622 | /* Build up the XDR from the receive buffers. */ |
| @@ -591,8 +649,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) | |||
| 591 | + rqstp->rq_arg.tail[0].iov_len; | 649 | + rqstp->rq_arg.tail[0].iov_len; |
| 592 | svc_rdma_put_context(ctxt, 0); | 650 | svc_rdma_put_context(ctxt, 0); |
| 593 | out: | 651 | out: |
| 594 | dprintk("svcrdma: ret = %d, rq_arg.len =%d, " | 652 | dprintk("svcrdma: ret=%d, rq_arg.len=%u, " |
| 595 | "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n", | 653 | "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len=%zd\n", |
| 596 | ret, rqstp->rq_arg.len, | 654 | ret, rqstp->rq_arg.len, |
| 597 | rqstp->rq_arg.head[0].iov_base, | 655 | rqstp->rq_arg.head[0].iov_base, |
| 598 | rqstp->rq_arg.head[0].iov_len); | 656 | rqstp->rq_arg.head[0].iov_len); |
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 9f1b50689c0f..7de33d1af9b6 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c | |||
| @@ -60,8 +60,11 @@ static int map_xdr(struct svcxprt_rdma *xprt, | |||
| 60 | u32 page_off; | 60 | u32 page_off; |
| 61 | int page_no; | 61 | int page_no; |
| 62 | 62 | ||
| 63 | BUG_ON(xdr->len != | 63 | if (xdr->len != |
| 64 | (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)); | 64 | (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)) { |
| 65 | pr_err("svcrdma: map_xdr: XDR buffer length error\n"); | ||
| 66 | return -EIO; | ||
| 67 | } | ||
| 65 | 68 | ||
| 66 | /* Skip the first sge, this is for the RPCRDMA header */ | 69 | /* Skip the first sge, this is for the RPCRDMA header */ |
| 67 | sge_no = 1; | 70 | sge_no = 1; |
| @@ -150,7 +153,11 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, | |||
| 150 | int bc; | 153 | int bc; |
| 151 | struct svc_rdma_op_ctxt *ctxt; | 154 | struct svc_rdma_op_ctxt *ctxt; |
| 152 | 155 | ||
| 153 | BUG_ON(vec->count > RPCSVC_MAXPAGES); | 156 | if (vec->count > RPCSVC_MAXPAGES) { |
| 157 | pr_err("svcrdma: Too many pages (%lu)\n", vec->count); | ||
| 158 | return -EIO; | ||
| 159 | } | ||
| 160 | |||
| 154 | dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, " | 161 | dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, " |
| 155 | "write_len=%d, vec->sge=%p, vec->count=%lu\n", | 162 | "write_len=%d, vec->sge=%p, vec->count=%lu\n", |
| 156 | rmr, (unsigned long long)to, xdr_off, | 163 | rmr, (unsigned long long)to, xdr_off, |
| @@ -190,7 +197,10 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, | |||
| 190 | sge_off = 0; | 197 | sge_off = 0; |
| 191 | sge_no++; | 198 | sge_no++; |
| 192 | xdr_sge_no++; | 199 | xdr_sge_no++; |
| 193 | BUG_ON(xdr_sge_no > vec->count); | 200 | if (xdr_sge_no > vec->count) { |
| 201 | pr_err("svcrdma: Too many sges (%d)\n", xdr_sge_no); | ||
| 202 | goto err; | ||
| 203 | } | ||
| 194 | bc -= sge_bytes; | 204 | bc -= sge_bytes; |
| 195 | if (sge_no == xprt->sc_max_sge) | 205 | if (sge_no == xprt->sc_max_sge) |
| 196 | break; | 206 | break; |
| @@ -421,7 +431,10 @@ static int send_reply(struct svcxprt_rdma *rdma, | |||
| 421 | ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey; | 431 | ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey; |
| 422 | ctxt->sge[sge_no].length = sge_bytes; | 432 | ctxt->sge[sge_no].length = sge_bytes; |
| 423 | } | 433 | } |
| 424 | BUG_ON(byte_count != 0); | 434 | if (byte_count != 0) { |
| 435 | pr_err("svcrdma: Could not map %d bytes\n", byte_count); | ||
| 436 | goto err; | ||
| 437 | } | ||
| 425 | 438 | ||
| 426 | /* Save all respages in the ctxt and remove them from the | 439 | /* Save all respages in the ctxt and remove them from the |
| 427 | * respages array. They are our pages until the I/O | 440 | * respages array. They are our pages until the I/O |
| @@ -442,7 +455,10 @@ static int send_reply(struct svcxprt_rdma *rdma, | |||
| 442 | } | 455 | } |
| 443 | rqstp->rq_next_page = rqstp->rq_respages + 1; | 456 | rqstp->rq_next_page = rqstp->rq_respages + 1; |
| 444 | 457 | ||
| 445 | BUG_ON(sge_no > rdma->sc_max_sge); | 458 | if (sge_no > rdma->sc_max_sge) { |
| 459 | pr_err("svcrdma: Too many sges (%d)\n", sge_no); | ||
| 460 | goto err; | ||
| 461 | } | ||
| 446 | memset(&send_wr, 0, sizeof send_wr); | 462 | memset(&send_wr, 0, sizeof send_wr); |
| 447 | ctxt->wr_op = IB_WR_SEND; | 463 | ctxt->wr_op = IB_WR_SEND; |
| 448 | send_wr.wr_id = (unsigned long)ctxt; | 464 | send_wr.wr_id = (unsigned long)ctxt; |
| @@ -467,18 +483,6 @@ void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp) | |||
| 467 | { | 483 | { |
| 468 | } | 484 | } |
| 469 | 485 | ||
| 470 | /* | ||
| 471 | * Return the start of an xdr buffer. | ||
| 472 | */ | ||
| 473 | static void *xdr_start(struct xdr_buf *xdr) | ||
| 474 | { | ||
| 475 | return xdr->head[0].iov_base - | ||
| 476 | (xdr->len - | ||
| 477 | xdr->page_len - | ||
| 478 | xdr->tail[0].iov_len - | ||
| 479 | xdr->head[0].iov_len); | ||
| 480 | } | ||
| 481 | |||
| 482 | int svc_rdma_sendto(struct svc_rqst *rqstp) | 486 | int svc_rdma_sendto(struct svc_rqst *rqstp) |
| 483 | { | 487 | { |
| 484 | struct svc_xprt *xprt = rqstp->rq_xprt; | 488 | struct svc_xprt *xprt = rqstp->rq_xprt; |
| @@ -496,8 +500,10 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) | |||
| 496 | 500 | ||
| 497 | dprintk("svcrdma: sending response for rqstp=%p\n", rqstp); | 501 | dprintk("svcrdma: sending response for rqstp=%p\n", rqstp); |
| 498 | 502 | ||
| 499 | /* Get the RDMA request header. */ | 503 | /* Get the RDMA request header. The receive logic always |
| 500 | rdma_argp = xdr_start(&rqstp->rq_arg); | 504 | * places this at the start of page 0. |
| 505 | */ | ||
| 506 | rdma_argp = page_address(rqstp->rq_pages[0]); | ||
| 501 | 507 | ||
| 502 | /* Build an req vec for the XDR */ | 508 | /* Build an req vec for the XDR */ |
| 503 | ctxt = svc_rdma_get_context(rdma); | 509 | ctxt = svc_rdma_get_context(rdma); |
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 4e618808bc98..f609c1c2d38d 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c | |||
| @@ -139,7 +139,6 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) | |||
| 139 | struct svcxprt_rdma *xprt; | 139 | struct svcxprt_rdma *xprt; |
| 140 | int i; | 140 | int i; |
| 141 | 141 | ||
| 142 | BUG_ON(!ctxt); | ||
| 143 | xprt = ctxt->xprt; | 142 | xprt = ctxt->xprt; |
| 144 | if (free_pages) | 143 | if (free_pages) |
| 145 | for (i = 0; i < ctxt->count; i++) | 144 | for (i = 0; i < ctxt->count; i++) |
| @@ -339,12 +338,14 @@ static void process_context(struct svcxprt_rdma *xprt, | |||
| 339 | 338 | ||
| 340 | switch (ctxt->wr_op) { | 339 | switch (ctxt->wr_op) { |
| 341 | case IB_WR_SEND: | 340 | case IB_WR_SEND: |
| 342 | BUG_ON(ctxt->frmr); | 341 | if (ctxt->frmr) |
| 342 | pr_err("svcrdma: SEND: ctxt->frmr != NULL\n"); | ||
| 343 | svc_rdma_put_context(ctxt, 1); | 343 | svc_rdma_put_context(ctxt, 1); |
| 344 | break; | 344 | break; |
| 345 | 345 | ||
| 346 | case IB_WR_RDMA_WRITE: | 346 | case IB_WR_RDMA_WRITE: |
| 347 | BUG_ON(ctxt->frmr); | 347 | if (ctxt->frmr) |
| 348 | pr_err("svcrdma: WRITE: ctxt->frmr != NULL\n"); | ||
| 348 | svc_rdma_put_context(ctxt, 0); | 349 | svc_rdma_put_context(ctxt, 0); |
| 349 | break; | 350 | break; |
| 350 | 351 | ||
| @@ -353,19 +354,21 @@ static void process_context(struct svcxprt_rdma *xprt, | |||
| 353 | svc_rdma_put_frmr(xprt, ctxt->frmr); | 354 | svc_rdma_put_frmr(xprt, ctxt->frmr); |
| 354 | if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { | 355 | if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { |
| 355 | struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; | 356 | struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; |
| 356 | BUG_ON(!read_hdr); | 357 | if (read_hdr) { |
| 357 | spin_lock_bh(&xprt->sc_rq_dto_lock); | 358 | spin_lock_bh(&xprt->sc_rq_dto_lock); |
| 358 | set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); | 359 | set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); |
| 359 | list_add_tail(&read_hdr->dto_q, | 360 | list_add_tail(&read_hdr->dto_q, |
| 360 | &xprt->sc_read_complete_q); | 361 | &xprt->sc_read_complete_q); |
| 361 | spin_unlock_bh(&xprt->sc_rq_dto_lock); | 362 | spin_unlock_bh(&xprt->sc_rq_dto_lock); |
| 363 | } else { | ||
| 364 | pr_err("svcrdma: ctxt->read_hdr == NULL\n"); | ||
| 365 | } | ||
| 362 | svc_xprt_enqueue(&xprt->sc_xprt); | 366 | svc_xprt_enqueue(&xprt->sc_xprt); |
| 363 | } | 367 | } |
| 364 | svc_rdma_put_context(ctxt, 0); | 368 | svc_rdma_put_context(ctxt, 0); |
| 365 | break; | 369 | break; |
| 366 | 370 | ||
| 367 | default: | 371 | default: |
| 368 | BUG_ON(1); | ||
| 369 | printk(KERN_ERR "svcrdma: unexpected completion type, " | 372 | printk(KERN_ERR "svcrdma: unexpected completion type, " |
| 370 | "opcode=%d\n", | 373 | "opcode=%d\n", |
| 371 | ctxt->wr_op); | 374 | ctxt->wr_op); |
| @@ -513,7 +516,10 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) | |||
| 513 | buflen = 0; | 516 | buflen = 0; |
| 514 | ctxt->direction = DMA_FROM_DEVICE; | 517 | ctxt->direction = DMA_FROM_DEVICE; |
| 515 | for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) { | 518 | for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) { |
| 516 | BUG_ON(sge_no >= xprt->sc_max_sge); | 519 | if (sge_no >= xprt->sc_max_sge) { |
| 520 | pr_err("svcrdma: Too many sges (%d)\n", sge_no); | ||
| 521 | goto err_put_ctxt; | ||
| 522 | } | ||
| 517 | page = svc_rdma_get_page(); | 523 | page = svc_rdma_get_page(); |
| 518 | ctxt->pages[sge_no] = page; | 524 | ctxt->pages[sge_no] = page; |
| 519 | pa = ib_dma_map_page(xprt->sc_cm_id->device, | 525 | pa = ib_dma_map_page(xprt->sc_cm_id->device, |
| @@ -687,7 +693,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, | |||
| 687 | { | 693 | { |
| 688 | struct rdma_cm_id *listen_id; | 694 | struct rdma_cm_id *listen_id; |
| 689 | struct svcxprt_rdma *cma_xprt; | 695 | struct svcxprt_rdma *cma_xprt; |
| 690 | struct svc_xprt *xprt; | ||
| 691 | int ret; | 696 | int ret; |
| 692 | 697 | ||
| 693 | dprintk("svcrdma: Creating RDMA socket\n"); | 698 | dprintk("svcrdma: Creating RDMA socket\n"); |
| @@ -698,7 +703,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, | |||
| 698 | cma_xprt = rdma_create_xprt(serv, 1); | 703 | cma_xprt = rdma_create_xprt(serv, 1); |
| 699 | if (!cma_xprt) | 704 | if (!cma_xprt) |
| 700 | return ERR_PTR(-ENOMEM); | 705 | return ERR_PTR(-ENOMEM); |
| 701 | xprt = &cma_xprt->sc_xprt; | ||
| 702 | 706 | ||
| 703 | listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP, | 707 | listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP, |
| 704 | IB_QPT_RC); | 708 | IB_QPT_RC); |
| @@ -822,7 +826,7 @@ void svc_rdma_put_frmr(struct svcxprt_rdma *rdma, | |||
| 822 | if (frmr) { | 826 | if (frmr) { |
| 823 | frmr_unmap_dma(rdma, frmr); | 827 | frmr_unmap_dma(rdma, frmr); |
| 824 | spin_lock_bh(&rdma->sc_frmr_q_lock); | 828 | spin_lock_bh(&rdma->sc_frmr_q_lock); |
| 825 | BUG_ON(!list_empty(&frmr->frmr_list)); | 829 | WARN_ON_ONCE(!list_empty(&frmr->frmr_list)); |
| 826 | list_add(&frmr->frmr_list, &rdma->sc_frmr_q); | 830 | list_add(&frmr->frmr_list, &rdma->sc_frmr_q); |
| 827 | spin_unlock_bh(&rdma->sc_frmr_q_lock); | 831 | spin_unlock_bh(&rdma->sc_frmr_q_lock); |
| 828 | } | 832 | } |
| @@ -970,10 +974,12 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) | |||
| 970 | * NB: iWARP requires remote write access for the data sink | 974 | * NB: iWARP requires remote write access for the data sink |
| 971 | * of an RDMA_READ. IB does not. | 975 | * of an RDMA_READ. IB does not. |
| 972 | */ | 976 | */ |
| 977 | newxprt->sc_reader = rdma_read_chunk_lcl; | ||
| 973 | if (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { | 978 | if (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { |
| 974 | newxprt->sc_frmr_pg_list_len = | 979 | newxprt->sc_frmr_pg_list_len = |
| 975 | devattr.max_fast_reg_page_list_len; | 980 | devattr.max_fast_reg_page_list_len; |
| 976 | newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG; | 981 | newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG; |
| 982 | newxprt->sc_reader = rdma_read_chunk_frmr; | ||
| 977 | } | 983 | } |
| 978 | 984 | ||
| 979 | /* | 985 | /* |
| @@ -1125,7 +1131,9 @@ static void __svc_rdma_free(struct work_struct *work) | |||
| 1125 | dprintk("svcrdma: svc_rdma_free(%p)\n", rdma); | 1131 | dprintk("svcrdma: svc_rdma_free(%p)\n", rdma); |
| 1126 | 1132 | ||
| 1127 | /* We should only be called from kref_put */ | 1133 | /* We should only be called from kref_put */ |
| 1128 | BUG_ON(atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0); | 1134 | if (atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0) |
| 1135 | pr_err("svcrdma: sc_xprt still in use? (%d)\n", | ||
| 1136 | atomic_read(&rdma->sc_xprt.xpt_ref.refcount)); | ||
| 1129 | 1137 | ||
| 1130 | /* | 1138 | /* |
| 1131 | * Destroy queued, but not processed read completions. Note | 1139 | * Destroy queued, but not processed read completions. Note |
| @@ -1153,8 +1161,12 @@ static void __svc_rdma_free(struct work_struct *work) | |||
| 1153 | } | 1161 | } |
| 1154 | 1162 | ||
| 1155 | /* Warn if we leaked a resource or under-referenced */ | 1163 | /* Warn if we leaked a resource or under-referenced */ |
| 1156 | WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0); | 1164 | if (atomic_read(&rdma->sc_ctxt_used) != 0) |
| 1157 | WARN_ON(atomic_read(&rdma->sc_dma_used) != 0); | 1165 | pr_err("svcrdma: ctxt still in use? (%d)\n", |
| 1166 | atomic_read(&rdma->sc_ctxt_used)); | ||
| 1167 | if (atomic_read(&rdma->sc_dma_used) != 0) | ||
| 1168 | pr_err("svcrdma: dma still in use? (%d)\n", | ||
| 1169 | atomic_read(&rdma->sc_dma_used)); | ||
| 1158 | 1170 | ||
| 1159 | /* De-allocate fastreg mr */ | 1171 | /* De-allocate fastreg mr */ |
| 1160 | rdma_dealloc_frmr_q(rdma); | 1172 | rdma_dealloc_frmr_q(rdma); |
| @@ -1254,7 +1266,6 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) | |||
| 1254 | if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) | 1266 | if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) |
| 1255 | return -ENOTCONN; | 1267 | return -ENOTCONN; |
| 1256 | 1268 | ||
| 1257 | BUG_ON(wr->send_flags != IB_SEND_SIGNALED); | ||
| 1258 | wr_count = 1; | 1269 | wr_count = 1; |
| 1259 | for (n_wr = wr->next; n_wr; n_wr = n_wr->next) | 1270 | for (n_wr = wr->next; n_wr; n_wr = n_wr->next) |
| 1260 | wr_count++; | 1271 | wr_count++; |
