aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/cifs/cifs_debug.c2
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h19
-rw-r--r--fs/cifs/cifsproto.h4
-rw-r--r--fs/cifs/cifssmb.c119
-rw-r--r--fs/cifs/connect.c8
-rw-r--r--fs/cifs/file.c872
-rw-r--r--fs/cifs/misc.c13
-rw-r--r--fs/cifs/sess.c1192
-rw-r--r--fs/cifs/smb1ops.c8
-rw-r--r--fs/cifs/smb2inode.c2
-rw-r--r--fs/cifs/smb2maperror.c2
-rw-r--r--fs/cifs/smb2misc.c6
-rw-r--r--fs/cifs/smb2ops.c73
-rw-r--r--fs/cifs/smb2pdu.c94
-rw-r--r--fs/cifs/smb2proto.h2
-rw-r--r--fs/cifs/smb2transport.c5
-rw-r--r--fs/cifs/transport.c25
-rw-r--r--fs/namespace.c65
-rw-r--r--fs/nfs/client.c95
-rw-r--r--fs/nfs/inode.c3
-rw-r--r--fs/nfs/internal.h9
-rw-r--r--fs/nfs/netns.h3
-rw-r--r--fs/nfsd/acl.h2
-rw-r--r--fs/nfsd/auth.c2
-rw-r--r--fs/nfsd/export.c6
-rw-r--r--fs/nfsd/export.h3
-rw-r--r--fs/nfsd/fault_inject.c138
-rw-r--r--fs/nfsd/netns.h23
-rw-r--r--fs/nfsd/nfs2acl.c8
-rw-r--r--fs/nfsd/nfs3acl.c8
-rw-r--r--fs/nfsd/nfs3proc.c9
-rw-r--r--fs/nfsd/nfs3xdr.c30
-rw-r--r--fs/nfsd/nfs4acl.c39
-rw-r--r--fs/nfsd/nfs4callback.c32
-rw-r--r--fs/nfsd/nfs4proc.c53
-rw-r--r--fs/nfsd/nfs4state.c3096
-rw-r--r--fs/nfsd/nfs4xdr.c128
-rw-r--r--fs/nfsd/nfscache.c13
-rw-r--r--fs/nfsd/nfsctl.c51
-rw-r--r--fs/nfsd/nfsfh.c12
-rw-r--r--fs/nfsd/nfsfh.h15
-rw-r--r--fs/nfsd/nfsproc.c13
-rw-r--r--fs/nfsd/nfssvc.c21
-rw-r--r--fs/nfsd/nfsxdr.c14
-rw-r--r--fs/nfsd/state.h220
-rw-r--r--fs/nfsd/vfs.c48
-rw-r--r--fs/nfsd/vfs.h8
-rw-r--r--fs/nfsd/xdr4.h30
-rw-r--r--fs/proc/Makefile1
-rw-r--r--fs/proc/base.c18
-rw-r--r--fs/proc/inode.c7
-rw-r--r--fs/proc/internal.h6
-rw-r--r--fs/proc/proc_net.c6
-rw-r--r--fs/proc/root.c5
-rw-r--r--fs/proc/thread_self.c85
-rw-r--r--fs/proc_namespace.c8
57 files changed, 4543 insertions, 2238 deletions
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index f3ac4154cbb6..44ec72684df5 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -213,7 +213,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
213 tcon->nativeFileSystem); 213 tcon->nativeFileSystem);
214 } 214 }
215 seq_printf(m, "DevInfo: 0x%x Attributes: 0x%x" 215 seq_printf(m, "DevInfo: 0x%x Attributes: 0x%x"
216 "\n\tPathComponentMax: %d Status: 0x%d", 216 "\n\tPathComponentMax: %d Status: %d",
217 le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics), 217 le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics),
218 le32_to_cpu(tcon->fsAttrInfo.Attributes), 218 le32_to_cpu(tcon->fsAttrInfo.Attributes),
219 le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength), 219 le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength),
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 70f178a7c759..560480263336 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -136,5 +136,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
136extern const struct export_operations cifs_export_ops; 136extern const struct export_operations cifs_export_ops;
137#endif /* CONFIG_CIFS_NFSD_EXPORT */ 137#endif /* CONFIG_CIFS_NFSD_EXPORT */
138 138
139#define CIFS_VERSION "2.03" 139#define CIFS_VERSION "2.04"
140#endif /* _CIFSFS_H */ 140#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index de6aed8c78e5..0012e1e291d4 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -404,6 +404,11 @@ struct smb_version_operations {
404 const struct cifs_fid *, u32 *); 404 const struct cifs_fid *, u32 *);
405 int (*set_acl)(struct cifs_ntsd *, __u32, struct inode *, const char *, 405 int (*set_acl)(struct cifs_ntsd *, __u32, struct inode *, const char *,
406 int); 406 int);
407 /* writepages retry size */
408 unsigned int (*wp_retry_size)(struct inode *);
409 /* get mtu credits */
410 int (*wait_mtu_credits)(struct TCP_Server_Info *, unsigned int,
411 unsigned int *, unsigned int *);
407}; 412};
408 413
409struct smb_version_values { 414struct smb_version_values {
@@ -640,6 +645,16 @@ add_credits(struct TCP_Server_Info *server, const unsigned int add,
640} 645}
641 646
642static inline void 647static inline void
648add_credits_and_wake_if(struct TCP_Server_Info *server, const unsigned int add,
649 const int optype)
650{
651 if (add) {
652 server->ops->add_credits(server, add, optype);
653 wake_up(&server->request_q);
654 }
655}
656
657static inline void
643set_credits(struct TCP_Server_Info *server, const int val) 658set_credits(struct TCP_Server_Info *server, const int val)
644{ 659{
645 server->ops->set_credits(server, val); 660 server->ops->set_credits(server, val);
@@ -1044,6 +1059,7 @@ struct cifs_readdata {
1044 struct address_space *mapping; 1059 struct address_space *mapping;
1045 __u64 offset; 1060 __u64 offset;
1046 unsigned int bytes; 1061 unsigned int bytes;
1062 unsigned int got_bytes;
1047 pid_t pid; 1063 pid_t pid;
1048 int result; 1064 int result;
1049 struct work_struct work; 1065 struct work_struct work;
@@ -1053,6 +1069,7 @@ struct cifs_readdata {
1053 struct kvec iov; 1069 struct kvec iov;
1054 unsigned int pagesz; 1070 unsigned int pagesz;
1055 unsigned int tailsz; 1071 unsigned int tailsz;
1072 unsigned int credits;
1056 unsigned int nr_pages; 1073 unsigned int nr_pages;
1057 struct page *pages[]; 1074 struct page *pages[];
1058}; 1075};
@@ -1073,6 +1090,7 @@ struct cifs_writedata {
1073 int result; 1090 int result;
1074 unsigned int pagesz; 1091 unsigned int pagesz;
1075 unsigned int tailsz; 1092 unsigned int tailsz;
1093 unsigned int credits;
1076 unsigned int nr_pages; 1094 unsigned int nr_pages;
1077 struct page *pages[]; 1095 struct page *pages[];
1078}; 1096};
@@ -1398,6 +1416,7 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param,
1398#define CIFS_OBREAK_OP 0x0100 /* oplock break request */ 1416#define CIFS_OBREAK_OP 0x0100 /* oplock break request */
1399#define CIFS_NEG_OP 0x0200 /* negotiate request */ 1417#define CIFS_NEG_OP 0x0200 /* negotiate request */
1400#define CIFS_OP_MASK 0x0380 /* mask request type */ 1418#define CIFS_OP_MASK 0x0380 /* mask request type */
1419#define CIFS_HAS_CREDITS 0x0400 /* already has credits */
1401 1420
1402/* Security Flags: indicate type of session setup needed */ 1421/* Security Flags: indicate type of session setup needed */
1403#define CIFSSEC_MAY_SIGN 0x00001 1422#define CIFSSEC_MAY_SIGN 0x00001
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index ca7980a1e303..c31ce98c1704 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -36,6 +36,7 @@ extern struct smb_hdr *cifs_buf_get(void);
36extern void cifs_buf_release(void *); 36extern void cifs_buf_release(void *);
37extern struct smb_hdr *cifs_small_buf_get(void); 37extern struct smb_hdr *cifs_small_buf_get(void);
38extern void cifs_small_buf_release(void *); 38extern void cifs_small_buf_release(void *);
39extern void free_rsp_buf(int, void *);
39extern void cifs_rqst_page_to_kvec(struct smb_rqst *rqst, unsigned int idx, 40extern void cifs_rqst_page_to_kvec(struct smb_rqst *rqst, unsigned int idx,
40 struct kvec *iov); 41 struct kvec *iov);
41extern int smb_send(struct TCP_Server_Info *, struct smb_hdr *, 42extern int smb_send(struct TCP_Server_Info *, struct smb_hdr *,
@@ -89,6 +90,9 @@ extern struct mid_q_entry *cifs_setup_async_request(struct TCP_Server_Info *,
89 struct smb_rqst *); 90 struct smb_rqst *);
90extern int cifs_check_receive(struct mid_q_entry *mid, 91extern int cifs_check_receive(struct mid_q_entry *mid,
91 struct TCP_Server_Info *server, bool log_error); 92 struct TCP_Server_Info *server, bool log_error);
93extern int cifs_wait_mtu_credits(struct TCP_Server_Info *server,
94 unsigned int size, unsigned int *num,
95 unsigned int *credits);
92extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *, 96extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *,
93 struct kvec *, int /* nvec to send */, 97 struct kvec *, int /* nvec to send */,
94 int * /* type of buf returned */ , const int flags); 98 int * /* type of buf returned */ , const int flags);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 6ce4e0954b98..66f65001a6d8 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -196,10 +196,6 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
196 if (rc) 196 if (rc)
197 goto out; 197 goto out;
198 198
199 /*
200 * FIXME: check if wsize needs updated due to negotiated smb buffer
201 * size shrinking
202 */
203 atomic_inc(&tconInfoReconnectCount); 199 atomic_inc(&tconInfoReconnectCount);
204 200
205 /* tell server Unix caps we support */ 201 /* tell server Unix caps we support */
@@ -1517,7 +1513,6 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1517 return length; 1513 return length;
1518 1514
1519 server->total_read += length; 1515 server->total_read += length;
1520 rdata->bytes = length;
1521 1516
1522 cifs_dbg(FYI, "total_read=%u buflen=%u remaining=%u\n", 1517 cifs_dbg(FYI, "total_read=%u buflen=%u remaining=%u\n",
1523 server->total_read, buflen, data_len); 1518 server->total_read, buflen, data_len);
@@ -1560,12 +1555,18 @@ cifs_readv_callback(struct mid_q_entry *mid)
1560 rc); 1555 rc);
1561 } 1556 }
1562 /* FIXME: should this be counted toward the initiating task? */ 1557 /* FIXME: should this be counted toward the initiating task? */
1563 task_io_account_read(rdata->bytes); 1558 task_io_account_read(rdata->got_bytes);
1564 cifs_stats_bytes_read(tcon, rdata->bytes); 1559 cifs_stats_bytes_read(tcon, rdata->got_bytes);
1565 break; 1560 break;
1566 case MID_REQUEST_SUBMITTED: 1561 case MID_REQUEST_SUBMITTED:
1567 case MID_RETRY_NEEDED: 1562 case MID_RETRY_NEEDED:
1568 rdata->result = -EAGAIN; 1563 rdata->result = -EAGAIN;
1564 if (server->sign && rdata->got_bytes)
1565 /* reset bytes number since we can not check a sign */
1566 rdata->got_bytes = 0;
1567 /* FIXME: should this be counted toward the initiating task? */
1568 task_io_account_read(rdata->got_bytes);
1569 cifs_stats_bytes_read(tcon, rdata->got_bytes);
1569 break; 1570 break;
1570 default: 1571 default:
1571 rdata->result = -EIO; 1572 rdata->result = -EIO;
@@ -1734,10 +1735,7 @@ CIFSSMBRead(const unsigned int xid, struct cifs_io_parms *io_parms,
1734 1735
1735/* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */ 1736/* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */
1736 if (*buf) { 1737 if (*buf) {
1737 if (resp_buf_type == CIFS_SMALL_BUFFER) 1738 free_rsp_buf(resp_buf_type, iov[0].iov_base);
1738 cifs_small_buf_release(iov[0].iov_base);
1739 else if (resp_buf_type == CIFS_LARGE_BUFFER)
1740 cifs_buf_release(iov[0].iov_base);
1741 } else if (resp_buf_type != CIFS_NO_BUFFER) { 1739 } else if (resp_buf_type != CIFS_NO_BUFFER) {
1742 /* return buffer to caller to free */ 1740 /* return buffer to caller to free */
1743 *buf = iov[0].iov_base; 1741 *buf = iov[0].iov_base;
@@ -1899,28 +1897,80 @@ cifs_writedata_release(struct kref *refcount)
1899static void 1897static void
1900cifs_writev_requeue(struct cifs_writedata *wdata) 1898cifs_writev_requeue(struct cifs_writedata *wdata)
1901{ 1899{
1902 int i, rc; 1900 int i, rc = 0;
1903 struct inode *inode = wdata->cfile->dentry->d_inode; 1901 struct inode *inode = wdata->cfile->dentry->d_inode;
1904 struct TCP_Server_Info *server; 1902 struct TCP_Server_Info *server;
1903 unsigned int rest_len;
1905 1904
1906 for (i = 0; i < wdata->nr_pages; i++) { 1905 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
1907 lock_page(wdata->pages[i]); 1906 i = 0;
1908 clear_page_dirty_for_io(wdata->pages[i]); 1907 rest_len = wdata->bytes;
1909 }
1910
1911 do { 1908 do {
1912 server = tlink_tcon(wdata->cfile->tlink)->ses->server; 1909 struct cifs_writedata *wdata2;
1913 rc = server->ops->async_writev(wdata, cifs_writedata_release); 1910 unsigned int j, nr_pages, wsize, tailsz, cur_len;
1914 } while (rc == -EAGAIN); 1911
1912 wsize = server->ops->wp_retry_size(inode);
1913 if (wsize < rest_len) {
1914 nr_pages = wsize / PAGE_CACHE_SIZE;
1915 if (!nr_pages) {
1916 rc = -ENOTSUPP;
1917 break;
1918 }
1919 cur_len = nr_pages * PAGE_CACHE_SIZE;
1920 tailsz = PAGE_CACHE_SIZE;
1921 } else {
1922 nr_pages = DIV_ROUND_UP(rest_len, PAGE_CACHE_SIZE);
1923 cur_len = rest_len;
1924 tailsz = rest_len - (nr_pages - 1) * PAGE_CACHE_SIZE;
1925 }
1915 1926
1916 for (i = 0; i < wdata->nr_pages; i++) { 1927 wdata2 = cifs_writedata_alloc(nr_pages, cifs_writev_complete);
1917 unlock_page(wdata->pages[i]); 1928 if (!wdata2) {
1918 if (rc != 0) { 1929 rc = -ENOMEM;
1919 SetPageError(wdata->pages[i]); 1930 break;
1920 end_page_writeback(wdata->pages[i]);
1921 page_cache_release(wdata->pages[i]);
1922 } 1931 }
1923 } 1932
1933 for (j = 0; j < nr_pages; j++) {
1934 wdata2->pages[j] = wdata->pages[i + j];
1935 lock_page(wdata2->pages[j]);
1936 clear_page_dirty_for_io(wdata2->pages[j]);
1937 }
1938
1939 wdata2->sync_mode = wdata->sync_mode;
1940 wdata2->nr_pages = nr_pages;
1941 wdata2->offset = page_offset(wdata2->pages[0]);
1942 wdata2->pagesz = PAGE_CACHE_SIZE;
1943 wdata2->tailsz = tailsz;
1944 wdata2->bytes = cur_len;
1945
1946 wdata2->cfile = find_writable_file(CIFS_I(inode), false);
1947 if (!wdata2->cfile) {
1948 cifs_dbg(VFS, "No writable handles for inode\n");
1949 rc = -EBADF;
1950 break;
1951 }
1952 wdata2->pid = wdata2->cfile->pid;
1953 rc = server->ops->async_writev(wdata2, cifs_writedata_release);
1954
1955 for (j = 0; j < nr_pages; j++) {
1956 unlock_page(wdata2->pages[j]);
1957 if (rc != 0 && rc != -EAGAIN) {
1958 SetPageError(wdata2->pages[j]);
1959 end_page_writeback(wdata2->pages[j]);
1960 page_cache_release(wdata2->pages[j]);
1961 }
1962 }
1963
1964 if (rc) {
1965 kref_put(&wdata2->refcount, cifs_writedata_release);
1966 if (rc == -EAGAIN)
1967 continue;
1968 break;
1969 }
1970
1971 rest_len -= cur_len;
1972 i += nr_pages;
1973 } while (i < wdata->nr_pages);
1924 1974
1925 mapping_set_error(inode->i_mapping, rc); 1975 mapping_set_error(inode->i_mapping, rc);
1926 kref_put(&wdata->refcount, cifs_writedata_release); 1976 kref_put(&wdata->refcount, cifs_writedata_release);
@@ -2203,10 +2253,7 @@ CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms,
2203 } 2253 }
2204 2254
2205/* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */ 2255/* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */
2206 if (resp_buf_type == CIFS_SMALL_BUFFER) 2256 free_rsp_buf(resp_buf_type, iov[0].iov_base);
2207 cifs_small_buf_release(iov[0].iov_base);
2208 else if (resp_buf_type == CIFS_LARGE_BUFFER)
2209 cifs_buf_release(iov[0].iov_base);
2210 2257
2211 /* Note: On -EAGAIN error only caller can retry on handle based calls 2258 /* Note: On -EAGAIN error only caller can retry on handle based calls
2212 since file handle passed in no longer valid */ 2259 since file handle passed in no longer valid */
@@ -2451,10 +2498,7 @@ plk_err_exit:
2451 if (pSMB) 2498 if (pSMB)
2452 cifs_small_buf_release(pSMB); 2499 cifs_small_buf_release(pSMB);
2453 2500
2454 if (resp_buf_type == CIFS_SMALL_BUFFER) 2501 free_rsp_buf(resp_buf_type, iov[0].iov_base);
2455 cifs_small_buf_release(iov[0].iov_base);
2456 else if (resp_buf_type == CIFS_LARGE_BUFFER)
2457 cifs_buf_release(iov[0].iov_base);
2458 2502
2459 /* Note: On -EAGAIN error only caller can retry on handle based calls 2503 /* Note: On -EAGAIN error only caller can retry on handle based calls
2460 since file handle passed in no longer valid */ 2504 since file handle passed in no longer valid */
@@ -3838,10 +3882,7 @@ CIFSSMBGetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid,
3838 } 3882 }
3839 } 3883 }
3840qsec_out: 3884qsec_out:
3841 if (buf_type == CIFS_SMALL_BUFFER) 3885 free_rsp_buf(buf_type, iov[0].iov_base);
3842 cifs_small_buf_release(iov[0].iov_base);
3843 else if (buf_type == CIFS_LARGE_BUFFER)
3844 cifs_buf_release(iov[0].iov_base);
3845/* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */ 3886/* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */
3846 return rc; 3887 return rc;
3847} 3888}
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index b98366f21f9e..03ed8a09581c 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -557,7 +557,7 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct kvec *iov_orig,
557 try_to_freeze(); 557 try_to_freeze();
558 558
559 if (server_unresponsive(server)) { 559 if (server_unresponsive(server)) {
560 total_read = -EAGAIN; 560 total_read = -ECONNABORTED;
561 break; 561 break;
562 } 562 }
563 563
@@ -571,7 +571,7 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct kvec *iov_orig,
571 break; 571 break;
572 } else if (server->tcpStatus == CifsNeedReconnect) { 572 } else if (server->tcpStatus == CifsNeedReconnect) {
573 cifs_reconnect(server); 573 cifs_reconnect(server);
574 total_read = -EAGAIN; 574 total_read = -ECONNABORTED;
575 break; 575 break;
576 } else if (length == -ERESTARTSYS || 576 } else if (length == -ERESTARTSYS ||
577 length == -EAGAIN || 577 length == -EAGAIN ||
@@ -588,7 +588,7 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct kvec *iov_orig,
588 cifs_dbg(FYI, "Received no data or error: expecting %d\n" 588 cifs_dbg(FYI, "Received no data or error: expecting %d\n"
589 "got %d", to_read, length); 589 "got %d", to_read, length);
590 cifs_reconnect(server); 590 cifs_reconnect(server);
591 total_read = -EAGAIN; 591 total_read = -ECONNABORTED;
592 break; 592 break;
593 } 593 }
594 } 594 }
@@ -786,7 +786,7 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid)
786 cifs_dbg(VFS, "SMB response too long (%u bytes)\n", pdu_length); 786 cifs_dbg(VFS, "SMB response too long (%u bytes)\n", pdu_length);
787 cifs_reconnect(server); 787 cifs_reconnect(server);
788 wake_up(&server->response_q); 788 wake_up(&server->response_q);
789 return -EAGAIN; 789 return -ECONNABORTED;
790 } 790 }
791 791
792 /* switch to large buffer if too big for a small one */ 792 /* switch to large buffer if too big for a small one */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index b88b1ade4d3d..4ab2f79ffa7a 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1670,8 +1670,8 @@ cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1670 break; 1670 break;
1671 } 1671 }
1672 1672
1673 len = min((size_t)cifs_sb->wsize, 1673 len = min(server->ops->wp_retry_size(dentry->d_inode),
1674 write_size - total_written); 1674 (unsigned int)write_size - total_written);
1675 /* iov[0] is reserved for smb header */ 1675 /* iov[0] is reserved for smb header */
1676 iov[1].iov_base = (char *)write_data + total_written; 1676 iov[1].iov_base = (char *)write_data + total_written;
1677 iov[1].iov_len = len; 1677 iov[1].iov_len = len;
@@ -1878,15 +1878,163 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1878 return rc; 1878 return rc;
1879} 1879}
1880 1880
1881static struct cifs_writedata *
1882wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1883 pgoff_t end, pgoff_t *index,
1884 unsigned int *found_pages)
1885{
1886 unsigned int nr_pages;
1887 struct page **pages;
1888 struct cifs_writedata *wdata;
1889
1890 wdata = cifs_writedata_alloc((unsigned int)tofind,
1891 cifs_writev_complete);
1892 if (!wdata)
1893 return NULL;
1894
1895 /*
1896 * find_get_pages_tag seems to return a max of 256 on each
1897 * iteration, so we must call it several times in order to
1898 * fill the array or the wsize is effectively limited to
1899 * 256 * PAGE_CACHE_SIZE.
1900 */
1901 *found_pages = 0;
1902 pages = wdata->pages;
1903 do {
1904 nr_pages = find_get_pages_tag(mapping, index,
1905 PAGECACHE_TAG_DIRTY, tofind,
1906 pages);
1907 *found_pages += nr_pages;
1908 tofind -= nr_pages;
1909 pages += nr_pages;
1910 } while (nr_pages && tofind && *index <= end);
1911
1912 return wdata;
1913}
1914
1915static unsigned int
1916wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
1917 struct address_space *mapping,
1918 struct writeback_control *wbc,
1919 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
1920{
1921 unsigned int nr_pages = 0, i;
1922 struct page *page;
1923
1924 for (i = 0; i < found_pages; i++) {
1925 page = wdata->pages[i];
1926 /*
1927 * At this point we hold neither mapping->tree_lock nor
1928 * lock on the page itself: the page may be truncated or
1929 * invalidated (changing page->mapping to NULL), or even
1930 * swizzled back from swapper_space to tmpfs file
1931 * mapping
1932 */
1933
1934 if (nr_pages == 0)
1935 lock_page(page);
1936 else if (!trylock_page(page))
1937 break;
1938
1939 if (unlikely(page->mapping != mapping)) {
1940 unlock_page(page);
1941 break;
1942 }
1943
1944 if (!wbc->range_cyclic && page->index > end) {
1945 *done = true;
1946 unlock_page(page);
1947 break;
1948 }
1949
1950 if (*next && (page->index != *next)) {
1951 /* Not next consecutive page */
1952 unlock_page(page);
1953 break;
1954 }
1955
1956 if (wbc->sync_mode != WB_SYNC_NONE)
1957 wait_on_page_writeback(page);
1958
1959 if (PageWriteback(page) ||
1960 !clear_page_dirty_for_io(page)) {
1961 unlock_page(page);
1962 break;
1963 }
1964
1965 /*
1966 * This actually clears the dirty bit in the radix tree.
1967 * See cifs_writepage() for more commentary.
1968 */
1969 set_page_writeback(page);
1970 if (page_offset(page) >= i_size_read(mapping->host)) {
1971 *done = true;
1972 unlock_page(page);
1973 end_page_writeback(page);
1974 break;
1975 }
1976
1977 wdata->pages[i] = page;
1978 *next = page->index + 1;
1979 ++nr_pages;
1980 }
1981
1982 /* reset index to refind any pages skipped */
1983 if (nr_pages == 0)
1984 *index = wdata->pages[0]->index + 1;
1985
1986 /* put any pages we aren't going to use */
1987 for (i = nr_pages; i < found_pages; i++) {
1988 page_cache_release(wdata->pages[i]);
1989 wdata->pages[i] = NULL;
1990 }
1991
1992 return nr_pages;
1993}
1994
1995static int
1996wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
1997 struct address_space *mapping, struct writeback_control *wbc)
1998{
1999 int rc = 0;
2000 struct TCP_Server_Info *server;
2001 unsigned int i;
2002
2003 wdata->sync_mode = wbc->sync_mode;
2004 wdata->nr_pages = nr_pages;
2005 wdata->offset = page_offset(wdata->pages[0]);
2006 wdata->pagesz = PAGE_CACHE_SIZE;
2007 wdata->tailsz = min(i_size_read(mapping->host) -
2008 page_offset(wdata->pages[nr_pages - 1]),
2009 (loff_t)PAGE_CACHE_SIZE);
2010 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + wdata->tailsz;
2011
2012 if (wdata->cfile != NULL)
2013 cifsFileInfo_put(wdata->cfile);
2014 wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2015 if (!wdata->cfile) {
2016 cifs_dbg(VFS, "No writable handles for inode\n");
2017 rc = -EBADF;
2018 } else {
2019 wdata->pid = wdata->cfile->pid;
2020 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2021 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2022 }
2023
2024 for (i = 0; i < nr_pages; ++i)
2025 unlock_page(wdata->pages[i]);
2026
2027 return rc;
2028}
2029
1881static int cifs_writepages(struct address_space *mapping, 2030static int cifs_writepages(struct address_space *mapping,
1882 struct writeback_control *wbc) 2031 struct writeback_control *wbc)
1883{ 2032{
1884 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb); 2033 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2034 struct TCP_Server_Info *server;
1885 bool done = false, scanned = false, range_whole = false; 2035 bool done = false, scanned = false, range_whole = false;
1886 pgoff_t end, index; 2036 pgoff_t end, index;
1887 struct cifs_writedata *wdata; 2037 struct cifs_writedata *wdata;
1888 struct TCP_Server_Info *server;
1889 struct page *page;
1890 int rc = 0; 2038 int rc = 0;
1891 2039
1892 /* 2040 /*
@@ -1906,152 +2054,50 @@ static int cifs_writepages(struct address_space *mapping,
1906 range_whole = true; 2054 range_whole = true;
1907 scanned = true; 2055 scanned = true;
1908 } 2056 }
2057 server = cifs_sb_master_tcon(cifs_sb)->ses->server;
1909retry: 2058retry:
1910 while (!done && index <= end) { 2059 while (!done && index <= end) {
1911 unsigned int i, nr_pages, found_pages; 2060 unsigned int i, nr_pages, found_pages, wsize, credits;
1912 pgoff_t next = 0, tofind; 2061 pgoff_t next = 0, tofind, saved_index = index;
1913 struct page **pages; 2062
2063 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2064 &wsize, &credits);
2065 if (rc)
2066 break;
1914 2067
1915 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1, 2068 tofind = min((wsize / PAGE_CACHE_SIZE) - 1, end - index) + 1;
1916 end - index) + 1;
1917 2069
1918 wdata = cifs_writedata_alloc((unsigned int)tofind, 2070 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
1919 cifs_writev_complete); 2071 &found_pages);
1920 if (!wdata) { 2072 if (!wdata) {
1921 rc = -ENOMEM; 2073 rc = -ENOMEM;
2074 add_credits_and_wake_if(server, credits, 0);
1922 break; 2075 break;
1923 } 2076 }
1924 2077
1925 /*
1926 * find_get_pages_tag seems to return a max of 256 on each
1927 * iteration, so we must call it several times in order to
1928 * fill the array or the wsize is effectively limited to
1929 * 256 * PAGE_CACHE_SIZE.
1930 */
1931 found_pages = 0;
1932 pages = wdata->pages;
1933 do {
1934 nr_pages = find_get_pages_tag(mapping, &index,
1935 PAGECACHE_TAG_DIRTY,
1936 tofind, pages);
1937 found_pages += nr_pages;
1938 tofind -= nr_pages;
1939 pages += nr_pages;
1940 } while (nr_pages && tofind && index <= end);
1941
1942 if (found_pages == 0) { 2078 if (found_pages == 0) {
1943 kref_put(&wdata->refcount, cifs_writedata_release); 2079 kref_put(&wdata->refcount, cifs_writedata_release);
2080 add_credits_and_wake_if(server, credits, 0);
1944 break; 2081 break;
1945 } 2082 }
1946 2083
1947 nr_pages = 0; 2084 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
1948 for (i = 0; i < found_pages; i++) { 2085 end, &index, &next, &done);
1949 page = wdata->pages[i];
1950 /*
1951 * At this point we hold neither mapping->tree_lock nor
1952 * lock on the page itself: the page may be truncated or
1953 * invalidated (changing page->mapping to NULL), or even
1954 * swizzled back from swapper_space to tmpfs file
1955 * mapping
1956 */
1957
1958 if (nr_pages == 0)
1959 lock_page(page);
1960 else if (!trylock_page(page))
1961 break;
1962
1963 if (unlikely(page->mapping != mapping)) {
1964 unlock_page(page);
1965 break;
1966 }
1967
1968 if (!wbc->range_cyclic && page->index > end) {
1969 done = true;
1970 unlock_page(page);
1971 break;
1972 }
1973
1974 if (next && (page->index != next)) {
1975 /* Not next consecutive page */
1976 unlock_page(page);
1977 break;
1978 }
1979
1980 if (wbc->sync_mode != WB_SYNC_NONE)
1981 wait_on_page_writeback(page);
1982
1983 if (PageWriteback(page) ||
1984 !clear_page_dirty_for_io(page)) {
1985 unlock_page(page);
1986 break;
1987 }
1988
1989 /*
1990 * This actually clears the dirty bit in the radix tree.
1991 * See cifs_writepage() for more commentary.
1992 */
1993 set_page_writeback(page);
1994
1995 if (page_offset(page) >= i_size_read(mapping->host)) {
1996 done = true;
1997 unlock_page(page);
1998 end_page_writeback(page);
1999 break;
2000 }
2001
2002 wdata->pages[i] = page;
2003 next = page->index + 1;
2004 ++nr_pages;
2005 }
2006
2007 /* reset index to refind any pages skipped */
2008 if (nr_pages == 0)
2009 index = wdata->pages[0]->index + 1;
2010
2011 /* put any pages we aren't going to use */
2012 for (i = nr_pages; i < found_pages; i++) {
2013 page_cache_release(wdata->pages[i]);
2014 wdata->pages[i] = NULL;
2015 }
2016 2086
2017 /* nothing to write? */ 2087 /* nothing to write? */
2018 if (nr_pages == 0) { 2088 if (nr_pages == 0) {
2019 kref_put(&wdata->refcount, cifs_writedata_release); 2089 kref_put(&wdata->refcount, cifs_writedata_release);
2090 add_credits_and_wake_if(server, credits, 0);
2020 continue; 2091 continue;
2021 } 2092 }
2022 2093
2023 wdata->sync_mode = wbc->sync_mode; 2094 wdata->credits = credits;
2024 wdata->nr_pages = nr_pages;
2025 wdata->offset = page_offset(wdata->pages[0]);
2026 wdata->pagesz = PAGE_CACHE_SIZE;
2027 wdata->tailsz =
2028 min(i_size_read(mapping->host) -
2029 page_offset(wdata->pages[nr_pages - 1]),
2030 (loff_t)PAGE_CACHE_SIZE);
2031 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) +
2032 wdata->tailsz;
2033
2034 do {
2035 if (wdata->cfile != NULL)
2036 cifsFileInfo_put(wdata->cfile);
2037 wdata->cfile = find_writable_file(CIFS_I(mapping->host),
2038 false);
2039 if (!wdata->cfile) {
2040 cifs_dbg(VFS, "No writable handles for inode\n");
2041 rc = -EBADF;
2042 break;
2043 }
2044 wdata->pid = wdata->cfile->pid;
2045 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2046 rc = server->ops->async_writev(wdata,
2047 cifs_writedata_release);
2048 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
2049 2095
2050 for (i = 0; i < nr_pages; ++i) 2096 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2051 unlock_page(wdata->pages[i]);
2052 2097
2053 /* send failure -- clean up the mess */ 2098 /* send failure -- clean up the mess */
2054 if (rc != 0) { 2099 if (rc != 0) {
2100 add_credits_and_wake_if(server, wdata->credits, 0);
2055 for (i = 0; i < nr_pages; ++i) { 2101 for (i = 0; i < nr_pages; ++i) {
2056 if (rc == -EAGAIN) 2102 if (rc == -EAGAIN)
2057 redirty_page_for_writepage(wbc, 2103 redirty_page_for_writepage(wbc,
@@ -2066,6 +2112,11 @@ retry:
2066 } 2112 }
2067 kref_put(&wdata->refcount, cifs_writedata_release); 2113 kref_put(&wdata->refcount, cifs_writedata_release);
2068 2114
2115 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2116 index = saved_index;
2117 continue;
2118 }
2119
2069 wbc->nr_to_write -= nr_pages; 2120 wbc->nr_to_write -= nr_pages;
2070 if (wbc->nr_to_write <= 0) 2121 if (wbc->nr_to_write <= 0)
2071 done = true; 2122 done = true;
@@ -2362,123 +2413,109 @@ cifs_uncached_writev_complete(struct work_struct *work)
2362 kref_put(&wdata->refcount, cifs_uncached_writedata_release); 2413 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2363} 2414}
2364 2415
2365/* attempt to send write to server, retry on any -EAGAIN errors */
2366static int 2416static int
2367cifs_uncached_retry_writev(struct cifs_writedata *wdata) 2417wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2418 size_t *len, unsigned long *num_pages)
2368{ 2419{
2369 int rc; 2420 size_t save_len, copied, bytes, cur_len = *len;
2370 struct TCP_Server_Info *server; 2421 unsigned long i, nr_pages = *num_pages;
2371 2422
2372 server = tlink_tcon(wdata->cfile->tlink)->ses->server; 2423 save_len = cur_len;
2424 for (i = 0; i < nr_pages; i++) {
2425 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2426 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2427 cur_len -= copied;
2428 /*
2429 * If we didn't copy as much as we expected, then that
2430 * may mean we trod into an unmapped area. Stop copying
2431 * at that point. On the next pass through the big
2432 * loop, we'll likely end up getting a zero-length
2433 * write and bailing out of it.
2434 */
2435 if (copied < bytes)
2436 break;
2437 }
2438 cur_len = save_len - cur_len;
2439 *len = cur_len;
2373 2440
2374 do { 2441 /*
2375 if (wdata->cfile->invalidHandle) { 2442 * If we have no data to send, then that probably means that
2376 rc = cifs_reopen_file(wdata->cfile, false); 2443 * the copy above failed altogether. That's most likely because
2377 if (rc != 0) 2444 * the address in the iovec was bogus. Return -EFAULT and let
2378 continue; 2445 * the caller free anything we allocated and bail out.
2379 } 2446 */
2380 rc = server->ops->async_writev(wdata, 2447 if (!cur_len)
2381 cifs_uncached_writedata_release); 2448 return -EFAULT;
2382 } while (rc == -EAGAIN);
2383 2449
2384 return rc; 2450 /*
2451 * i + 1 now represents the number of pages we actually used in
2452 * the copy phase above.
2453 */
2454 *num_pages = i + 1;
2455 return 0;
2385} 2456}
2386 2457
2387static ssize_t 2458static int
2388cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset) 2459cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2460 struct cifsFileInfo *open_file,
2461 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list)
2389{ 2462{
2390 unsigned long nr_pages, i; 2463 int rc = 0;
2391 size_t bytes, copied, len, cur_len; 2464 size_t cur_len;
2392 ssize_t total_written = 0; 2465 unsigned long nr_pages, num_pages, i;
2393 loff_t offset; 2466 struct cifs_writedata *wdata;
2394 struct cifsFileInfo *open_file; 2467 struct iov_iter saved_from;
2395 struct cifs_tcon *tcon; 2468 loff_t saved_offset = offset;
2396 struct cifs_sb_info *cifs_sb;
2397 struct cifs_writedata *wdata, *tmp;
2398 struct list_head wdata_list;
2399 int rc;
2400 pid_t pid; 2469 pid_t pid;
2401 2470 struct TCP_Server_Info *server;
2402 len = iov_iter_count(from);
2403 rc = generic_write_checks(file, poffset, &len, 0);
2404 if (rc)
2405 return rc;
2406
2407 if (!len)
2408 return 0;
2409
2410 iov_iter_truncate(from, len);
2411
2412 INIT_LIST_HEAD(&wdata_list);
2413 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2414 open_file = file->private_data;
2415 tcon = tlink_tcon(open_file->tlink);
2416
2417 if (!tcon->ses->server->ops->async_writev)
2418 return -ENOSYS;
2419
2420 offset = *poffset;
2421 2471
2422 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 2472 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2423 pid = open_file->pid; 2473 pid = open_file->pid;
2424 else 2474 else
2425 pid = current->tgid; 2475 pid = current->tgid;
2426 2476
2477 server = tlink_tcon(open_file->tlink)->ses->server;
2478 memcpy(&saved_from, from, sizeof(struct iov_iter));
2479
2427 do { 2480 do {
2428 size_t save_len; 2481 unsigned int wsize, credits;
2482
2483 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2484 &wsize, &credits);
2485 if (rc)
2486 break;
2429 2487
2430 nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len); 2488 nr_pages = get_numpages(wsize, len, &cur_len);
2431 wdata = cifs_writedata_alloc(nr_pages, 2489 wdata = cifs_writedata_alloc(nr_pages,
2432 cifs_uncached_writev_complete); 2490 cifs_uncached_writev_complete);
2433 if (!wdata) { 2491 if (!wdata) {
2434 rc = -ENOMEM; 2492 rc = -ENOMEM;
2493 add_credits_and_wake_if(server, credits, 0);
2435 break; 2494 break;
2436 } 2495 }
2437 2496
2438 rc = cifs_write_allocate_pages(wdata->pages, nr_pages); 2497 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2439 if (rc) { 2498 if (rc) {
2440 kfree(wdata); 2499 kfree(wdata);
2500 add_credits_and_wake_if(server, credits, 0);
2441 break; 2501 break;
2442 } 2502 }
2443 2503
2444 save_len = cur_len; 2504 num_pages = nr_pages;
2445 for (i = 0; i < nr_pages; i++) { 2505 rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2446 bytes = min_t(size_t, cur_len, PAGE_SIZE); 2506 if (rc) {
2447 copied = copy_page_from_iter(wdata->pages[i], 0, bytes,
2448 from);
2449 cur_len -= copied;
2450 /*
2451 * If we didn't copy as much as we expected, then that
2452 * may mean we trod into an unmapped area. Stop copying
2453 * at that point. On the next pass through the big
2454 * loop, we'll likely end up getting a zero-length
2455 * write and bailing out of it.
2456 */
2457 if (copied < bytes)
2458 break;
2459 }
2460 cur_len = save_len - cur_len;
2461
2462 /*
2463 * If we have no data to send, then that probably means that
2464 * the copy above failed altogether. That's most likely because
2465 * the address in the iovec was bogus. Set the rc to -EFAULT,
2466 * free anything we allocated and bail out.
2467 */
2468 if (!cur_len) {
2469 for (i = 0; i < nr_pages; i++) 2507 for (i = 0; i < nr_pages; i++)
2470 put_page(wdata->pages[i]); 2508 put_page(wdata->pages[i]);
2471 kfree(wdata); 2509 kfree(wdata);
2472 rc = -EFAULT; 2510 add_credits_and_wake_if(server, credits, 0);
2473 break; 2511 break;
2474 } 2512 }
2475 2513
2476 /* 2514 /*
2477 * i + 1 now represents the number of pages we actually used in 2515 * Bring nr_pages down to the number of pages we actually used,
2478 * the copy phase above. Bring nr_pages down to that, and free 2516 * and free any pages that we didn't use.
2479 * any pages that we didn't use.
2480 */ 2517 */
2481 for ( ; nr_pages > i + 1; nr_pages--) 2518 for ( ; nr_pages > num_pages; nr_pages--)
2482 put_page(wdata->pages[nr_pages - 1]); 2519 put_page(wdata->pages[nr_pages - 1]);
2483 2520
2484 wdata->sync_mode = WB_SYNC_ALL; 2521 wdata->sync_mode = WB_SYNC_ALL;
@@ -2489,18 +2526,69 @@ cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
2489 wdata->bytes = cur_len; 2526 wdata->bytes = cur_len;
2490 wdata->pagesz = PAGE_SIZE; 2527 wdata->pagesz = PAGE_SIZE;
2491 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE); 2528 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2492 rc = cifs_uncached_retry_writev(wdata); 2529 wdata->credits = credits;
2530
2531 if (!wdata->cfile->invalidHandle ||
2532 !cifs_reopen_file(wdata->cfile, false))
2533 rc = server->ops->async_writev(wdata,
2534 cifs_uncached_writedata_release);
2493 if (rc) { 2535 if (rc) {
2536 add_credits_and_wake_if(server, wdata->credits, 0);
2494 kref_put(&wdata->refcount, 2537 kref_put(&wdata->refcount,
2495 cifs_uncached_writedata_release); 2538 cifs_uncached_writedata_release);
2539 if (rc == -EAGAIN) {
2540 memcpy(from, &saved_from,
2541 sizeof(struct iov_iter));
2542 iov_iter_advance(from, offset - saved_offset);
2543 continue;
2544 }
2496 break; 2545 break;
2497 } 2546 }
2498 2547
2499 list_add_tail(&wdata->list, &wdata_list); 2548 list_add_tail(&wdata->list, wdata_list);
2500 offset += cur_len; 2549 offset += cur_len;
2501 len -= cur_len; 2550 len -= cur_len;
2502 } while (len > 0); 2551 } while (len > 0);
2503 2552
2553 return rc;
2554}
2555
2556static ssize_t
2557cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
2558{
2559 size_t len;
2560 ssize_t total_written = 0;
2561 struct cifsFileInfo *open_file;
2562 struct cifs_tcon *tcon;
2563 struct cifs_sb_info *cifs_sb;
2564 struct cifs_writedata *wdata, *tmp;
2565 struct list_head wdata_list;
2566 struct iov_iter saved_from;
2567 int rc;
2568
2569 len = iov_iter_count(from);
2570 rc = generic_write_checks(file, poffset, &len, 0);
2571 if (rc)
2572 return rc;
2573
2574 if (!len)
2575 return 0;
2576
2577 iov_iter_truncate(from, len);
2578
2579 INIT_LIST_HEAD(&wdata_list);
2580 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2581 open_file = file->private_data;
2582 tcon = tlink_tcon(open_file->tlink);
2583
2584 if (!tcon->ses->server->ops->async_writev)
2585 return -ENOSYS;
2586
2587 memcpy(&saved_from, from, sizeof(struct iov_iter));
2588
2589 rc = cifs_write_from_iter(*poffset, len, from, open_file, cifs_sb,
2590 &wdata_list);
2591
2504 /* 2592 /*
2505 * If at least one write was successfully sent, then discard any rc 2593 * If at least one write was successfully sent, then discard any rc
2506 * value from the later writes. If the other write succeeds, then 2594 * value from the later writes. If the other write succeeds, then
@@ -2529,7 +2617,25 @@ restart_loop:
2529 2617
2530 /* resend call if it's a retryable error */ 2618 /* resend call if it's a retryable error */
2531 if (rc == -EAGAIN) { 2619 if (rc == -EAGAIN) {
2532 rc = cifs_uncached_retry_writev(wdata); 2620 struct list_head tmp_list;
2621 struct iov_iter tmp_from;
2622
2623 INIT_LIST_HEAD(&tmp_list);
2624 list_del_init(&wdata->list);
2625
2626 memcpy(&tmp_from, &saved_from,
2627 sizeof(struct iov_iter));
2628 iov_iter_advance(&tmp_from,
2629 wdata->offset - *poffset);
2630
2631 rc = cifs_write_from_iter(wdata->offset,
2632 wdata->bytes, &tmp_from,
2633 open_file, cifs_sb, &tmp_list);
2634
2635 list_splice(&tmp_list, &wdata_list);
2636
2637 kref_put(&wdata->refcount,
2638 cifs_uncached_writedata_release);
2533 goto restart_loop; 2639 goto restart_loop;
2534 } 2640 }
2535 } 2641 }
@@ -2722,26 +2828,6 @@ cifs_uncached_readdata_release(struct kref *refcount)
2722 cifs_readdata_release(refcount); 2828 cifs_readdata_release(refcount);
2723} 2829}
2724 2830
2725static int
2726cifs_retry_async_readv(struct cifs_readdata *rdata)
2727{
2728 int rc;
2729 struct TCP_Server_Info *server;
2730
2731 server = tlink_tcon(rdata->cfile->tlink)->ses->server;
2732
2733 do {
2734 if (rdata->cfile->invalidHandle) {
2735 rc = cifs_reopen_file(rdata->cfile, true);
2736 if (rc != 0)
2737 continue;
2738 }
2739 rc = server->ops->async_readv(rdata);
2740 } while (rc == -EAGAIN);
2741
2742 return rc;
2743}
2744
2745/** 2831/**
2746 * cifs_readdata_to_iov - copy data from pages in response to an iovec 2832 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2747 * @rdata: the readdata response with list of pages holding data 2833 * @rdata: the readdata response with list of pages holding data
@@ -2754,7 +2840,7 @@ cifs_retry_async_readv(struct cifs_readdata *rdata)
2754static int 2840static int
2755cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter) 2841cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
2756{ 2842{
2757 size_t remaining = rdata->bytes; 2843 size_t remaining = rdata->got_bytes;
2758 unsigned int i; 2844 unsigned int i;
2759 2845
2760 for (i = 0; i < rdata->nr_pages; i++) { 2846 for (i = 0; i < rdata->nr_pages; i++) {
@@ -2782,11 +2868,12 @@ static int
2782cifs_uncached_read_into_pages(struct TCP_Server_Info *server, 2868cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2783 struct cifs_readdata *rdata, unsigned int len) 2869 struct cifs_readdata *rdata, unsigned int len)
2784{ 2870{
2785 int total_read = 0, result = 0; 2871 int result = 0;
2786 unsigned int i; 2872 unsigned int i;
2787 unsigned int nr_pages = rdata->nr_pages; 2873 unsigned int nr_pages = rdata->nr_pages;
2788 struct kvec iov; 2874 struct kvec iov;
2789 2875
2876 rdata->got_bytes = 0;
2790 rdata->tailsz = PAGE_SIZE; 2877 rdata->tailsz = PAGE_SIZE;
2791 for (i = 0; i < nr_pages; i++) { 2878 for (i = 0; i < nr_pages; i++) {
2792 struct page *page = rdata->pages[i]; 2879 struct page *page = rdata->pages[i];
@@ -2820,55 +2907,45 @@ cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2820 if (result < 0) 2907 if (result < 0)
2821 break; 2908 break;
2822 2909
2823 total_read += result; 2910 rdata->got_bytes += result;
2824 } 2911 }
2825 2912
2826 return total_read > 0 ? total_read : result; 2913 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
2914 rdata->got_bytes : result;
2827} 2915}
2828 2916
2829ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) 2917static int
2918cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
2919 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list)
2830{ 2920{
2831 struct file *file = iocb->ki_filp; 2921 struct cifs_readdata *rdata;
2832 ssize_t rc; 2922 unsigned int npages, rsize, credits;
2833 size_t len, cur_len; 2923 size_t cur_len;
2834 ssize_t total_read = 0; 2924 int rc;
2835 loff_t offset = iocb->ki_pos;
2836 unsigned int npages;
2837 struct cifs_sb_info *cifs_sb;
2838 struct cifs_tcon *tcon;
2839 struct cifsFileInfo *open_file;
2840 struct cifs_readdata *rdata, *tmp;
2841 struct list_head rdata_list;
2842 pid_t pid; 2925 pid_t pid;
2926 struct TCP_Server_Info *server;
2843 2927
2844 len = iov_iter_count(to); 2928 server = tlink_tcon(open_file->tlink)->ses->server;
2845 if (!len)
2846 return 0;
2847
2848 INIT_LIST_HEAD(&rdata_list);
2849 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2850 open_file = file->private_data;
2851 tcon = tlink_tcon(open_file->tlink);
2852
2853 if (!tcon->ses->server->ops->async_readv)
2854 return -ENOSYS;
2855 2929
2856 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 2930 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2857 pid = open_file->pid; 2931 pid = open_file->pid;
2858 else 2932 else
2859 pid = current->tgid; 2933 pid = current->tgid;
2860 2934
2861 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2862 cifs_dbg(FYI, "attempting read on write only file instance\n");
2863
2864 do { 2935 do {
2865 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize); 2936 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
2937 &rsize, &credits);
2938 if (rc)
2939 break;
2940
2941 cur_len = min_t(const size_t, len, rsize);
2866 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE); 2942 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2867 2943
2868 /* allocate a readdata struct */ 2944 /* allocate a readdata struct */
2869 rdata = cifs_readdata_alloc(npages, 2945 rdata = cifs_readdata_alloc(npages,
2870 cifs_uncached_readv_complete); 2946 cifs_uncached_readv_complete);
2871 if (!rdata) { 2947 if (!rdata) {
2948 add_credits_and_wake_if(server, credits, 0);
2872 rc = -ENOMEM; 2949 rc = -ENOMEM;
2873 break; 2950 break;
2874 } 2951 }
@@ -2884,44 +2961,113 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
2884 rdata->pid = pid; 2961 rdata->pid = pid;
2885 rdata->pagesz = PAGE_SIZE; 2962 rdata->pagesz = PAGE_SIZE;
2886 rdata->read_into_pages = cifs_uncached_read_into_pages; 2963 rdata->read_into_pages = cifs_uncached_read_into_pages;
2964 rdata->credits = credits;
2887 2965
2888 rc = cifs_retry_async_readv(rdata); 2966 if (!rdata->cfile->invalidHandle ||
2967 !cifs_reopen_file(rdata->cfile, true))
2968 rc = server->ops->async_readv(rdata);
2889error: 2969error:
2890 if (rc) { 2970 if (rc) {
2971 add_credits_and_wake_if(server, rdata->credits, 0);
2891 kref_put(&rdata->refcount, 2972 kref_put(&rdata->refcount,
2892 cifs_uncached_readdata_release); 2973 cifs_uncached_readdata_release);
2974 if (rc == -EAGAIN)
2975 continue;
2893 break; 2976 break;
2894 } 2977 }
2895 2978
2896 list_add_tail(&rdata->list, &rdata_list); 2979 list_add_tail(&rdata->list, rdata_list);
2897 offset += cur_len; 2980 offset += cur_len;
2898 len -= cur_len; 2981 len -= cur_len;
2899 } while (len > 0); 2982 } while (len > 0);
2900 2983
2984 return rc;
2985}
2986
2987ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
2988{
2989 struct file *file = iocb->ki_filp;
2990 ssize_t rc;
2991 size_t len;
2992 ssize_t total_read = 0;
2993 loff_t offset = iocb->ki_pos;
2994 struct cifs_sb_info *cifs_sb;
2995 struct cifs_tcon *tcon;
2996 struct cifsFileInfo *open_file;
2997 struct cifs_readdata *rdata, *tmp;
2998 struct list_head rdata_list;
2999
3000 len = iov_iter_count(to);
3001 if (!len)
3002 return 0;
3003
3004 INIT_LIST_HEAD(&rdata_list);
3005 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3006 open_file = file->private_data;
3007 tcon = tlink_tcon(open_file->tlink);
3008
3009 if (!tcon->ses->server->ops->async_readv)
3010 return -ENOSYS;
3011
3012 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3013 cifs_dbg(FYI, "attempting read on write only file instance\n");
3014
3015 rc = cifs_send_async_read(offset, len, open_file, cifs_sb, &rdata_list);
3016
2901 /* if at least one read request send succeeded, then reset rc */ 3017 /* if at least one read request send succeeded, then reset rc */
2902 if (!list_empty(&rdata_list)) 3018 if (!list_empty(&rdata_list))
2903 rc = 0; 3019 rc = 0;
2904 3020
2905 len = iov_iter_count(to); 3021 len = iov_iter_count(to);
2906 /* the loop below should proceed in the order of increasing offsets */ 3022 /* the loop below should proceed in the order of increasing offsets */
3023again:
2907 list_for_each_entry_safe(rdata, tmp, &rdata_list, list) { 3024 list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2908 again:
2909 if (!rc) { 3025 if (!rc) {
2910 /* FIXME: freezable sleep too? */ 3026 /* FIXME: freezable sleep too? */
2911 rc = wait_for_completion_killable(&rdata->done); 3027 rc = wait_for_completion_killable(&rdata->done);
2912 if (rc) 3028 if (rc)
2913 rc = -EINTR; 3029 rc = -EINTR;
2914 else if (rdata->result) { 3030 else if (rdata->result == -EAGAIN) {
2915 rc = rdata->result;
2916 /* resend call if it's a retryable error */ 3031 /* resend call if it's a retryable error */
2917 if (rc == -EAGAIN) { 3032 struct list_head tmp_list;
2918 rc = cifs_retry_async_readv(rdata); 3033 unsigned int got_bytes = rdata->got_bytes;
2919 goto again; 3034
3035 list_del_init(&rdata->list);
3036 INIT_LIST_HEAD(&tmp_list);
3037
3038 /*
3039 * Got a part of data and then reconnect has
3040 * happened -- fill the buffer and continue
3041 * reading.
3042 */
3043 if (got_bytes && got_bytes < rdata->bytes) {
3044 rc = cifs_readdata_to_iov(rdata, to);
3045 if (rc) {
3046 kref_put(&rdata->refcount,
3047 cifs_uncached_readdata_release);
3048 continue;
3049 }
2920 } 3050 }
2921 } else { 3051
3052 rc = cifs_send_async_read(
3053 rdata->offset + got_bytes,
3054 rdata->bytes - got_bytes,
3055 rdata->cfile, cifs_sb,
3056 &tmp_list);
3057
3058 list_splice(&tmp_list, &rdata_list);
3059
3060 kref_put(&rdata->refcount,
3061 cifs_uncached_readdata_release);
3062 goto again;
3063 } else if (rdata->result)
3064 rc = rdata->result;
3065 else
2922 rc = cifs_readdata_to_iov(rdata, to); 3066 rc = cifs_readdata_to_iov(rdata, to);
2923 }
2924 3067
3068 /* if there was a short read -- discard anything left */
3069 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3070 rc = -ENODATA;
2925 } 3071 }
2926 list_del_init(&rdata->list); 3072 list_del_init(&rdata->list);
2927 kref_put(&rdata->refcount, cifs_uncached_readdata_release); 3073 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
@@ -3030,18 +3176,19 @@ cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3030 3176
3031 for (total_read = 0, cur_offset = read_data; read_size > total_read; 3177 for (total_read = 0, cur_offset = read_data; read_size > total_read;
3032 total_read += bytes_read, cur_offset += bytes_read) { 3178 total_read += bytes_read, cur_offset += bytes_read) {
3033 current_read_size = min_t(uint, read_size - total_read, rsize); 3179 do {
3034 /* 3180 current_read_size = min_t(uint, read_size - total_read,
3035 * For windows me and 9x we do not want to request more than it 3181 rsize);
3036 * negotiated since it will refuse the read then. 3182 /*
3037 */ 3183 * For windows me and 9x we do not want to request more
3038 if ((tcon->ses) && !(tcon->ses->capabilities & 3184 * than it negotiated since it will refuse the read
3185 * then.
3186 */
3187 if ((tcon->ses) && !(tcon->ses->capabilities &
3039 tcon->ses->server->vals->cap_large_files)) { 3188 tcon->ses->server->vals->cap_large_files)) {
3040 current_read_size = min_t(uint, current_read_size, 3189 current_read_size = min_t(uint,
3041 CIFSMaxBufSize); 3190 current_read_size, CIFSMaxBufSize);
3042 } 3191 }
3043 rc = -EAGAIN;
3044 while (rc == -EAGAIN) {
3045 if (open_file->invalidHandle) { 3192 if (open_file->invalidHandle) {
3046 rc = cifs_reopen_file(open_file, true); 3193 rc = cifs_reopen_file(open_file, true);
3047 if (rc != 0) 3194 if (rc != 0)
@@ -3054,7 +3201,8 @@ cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3054 rc = server->ops->sync_read(xid, open_file, &io_parms, 3201 rc = server->ops->sync_read(xid, open_file, &io_parms,
3055 &bytes_read, &cur_offset, 3202 &bytes_read, &cur_offset,
3056 &buf_type); 3203 &buf_type);
3057 } 3204 } while (rc == -EAGAIN);
3205
3058 if (rc || (bytes_read == 0)) { 3206 if (rc || (bytes_read == 0)) {
3059 if (total_read) { 3207 if (total_read) {
3060 break; 3208 break;
@@ -3133,25 +3281,30 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3133static void 3281static void
3134cifs_readv_complete(struct work_struct *work) 3282cifs_readv_complete(struct work_struct *work)
3135{ 3283{
3136 unsigned int i; 3284 unsigned int i, got_bytes;
3137 struct cifs_readdata *rdata = container_of(work, 3285 struct cifs_readdata *rdata = container_of(work,
3138 struct cifs_readdata, work); 3286 struct cifs_readdata, work);
3139 3287
3288 got_bytes = rdata->got_bytes;
3140 for (i = 0; i < rdata->nr_pages; i++) { 3289 for (i = 0; i < rdata->nr_pages; i++) {
3141 struct page *page = rdata->pages[i]; 3290 struct page *page = rdata->pages[i];
3142 3291
3143 lru_cache_add_file(page); 3292 lru_cache_add_file(page);
3144 3293
3145 if (rdata->result == 0) { 3294 if (rdata->result == 0 ||
3295 (rdata->result == -EAGAIN && got_bytes)) {
3146 flush_dcache_page(page); 3296 flush_dcache_page(page);
3147 SetPageUptodate(page); 3297 SetPageUptodate(page);
3148 } 3298 }
3149 3299
3150 unlock_page(page); 3300 unlock_page(page);
3151 3301
3152 if (rdata->result == 0) 3302 if (rdata->result == 0 ||
3303 (rdata->result == -EAGAIN && got_bytes))
3153 cifs_readpage_to_fscache(rdata->mapping->host, page); 3304 cifs_readpage_to_fscache(rdata->mapping->host, page);
3154 3305
3306 got_bytes -= min_t(unsigned int, PAGE_CACHE_SIZE, got_bytes);
3307
3155 page_cache_release(page); 3308 page_cache_release(page);
3156 rdata->pages[i] = NULL; 3309 rdata->pages[i] = NULL;
3157 } 3310 }
@@ -3162,7 +3315,7 @@ static int
3162cifs_readpages_read_into_pages(struct TCP_Server_Info *server, 3315cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3163 struct cifs_readdata *rdata, unsigned int len) 3316 struct cifs_readdata *rdata, unsigned int len)
3164{ 3317{
3165 int total_read = 0, result = 0; 3318 int result = 0;
3166 unsigned int i; 3319 unsigned int i;
3167 u64 eof; 3320 u64 eof;
3168 pgoff_t eof_index; 3321 pgoff_t eof_index;
@@ -3174,6 +3327,7 @@ cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3174 eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0; 3327 eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
3175 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index); 3328 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3176 3329
3330 rdata->got_bytes = 0;
3177 rdata->tailsz = PAGE_CACHE_SIZE; 3331 rdata->tailsz = PAGE_CACHE_SIZE;
3178 for (i = 0; i < nr_pages; i++) { 3332 for (i = 0; i < nr_pages; i++) {
3179 struct page *page = rdata->pages[i]; 3333 struct page *page = rdata->pages[i];
@@ -3228,10 +3382,70 @@ cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3228 if (result < 0) 3382 if (result < 0)
3229 break; 3383 break;
3230 3384
3231 total_read += result; 3385 rdata->got_bytes += result;
3232 } 3386 }
3233 3387
3234 return total_read > 0 ? total_read : result; 3388 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3389 rdata->got_bytes : result;
3390}
3391
3392static int
3393readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3394 unsigned int rsize, struct list_head *tmplist,
3395 unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3396{
3397 struct page *page, *tpage;
3398 unsigned int expected_index;
3399 int rc;
3400
3401 INIT_LIST_HEAD(tmplist);
3402
3403 page = list_entry(page_list->prev, struct page, lru);
3404
3405 /*
3406 * Lock the page and put it in the cache. Since no one else
3407 * should have access to this page, we're safe to simply set
3408 * PG_locked without checking it first.
3409 */
3410 __set_page_locked(page);
3411 rc = add_to_page_cache_locked(page, mapping,
3412 page->index, GFP_KERNEL);
3413
3414 /* give up if we can't stick it in the cache */
3415 if (rc) {
3416 __clear_page_locked(page);
3417 return rc;
3418 }
3419
3420 /* move first page to the tmplist */
3421 *offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3422 *bytes = PAGE_CACHE_SIZE;
3423 *nr_pages = 1;
3424 list_move_tail(&page->lru, tmplist);
3425
3426 /* now try and add more pages onto the request */
3427 expected_index = page->index + 1;
3428 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3429 /* discontinuity ? */
3430 if (page->index != expected_index)
3431 break;
3432
3433 /* would this page push the read over the rsize? */
3434 if (*bytes + PAGE_CACHE_SIZE > rsize)
3435 break;
3436
3437 __set_page_locked(page);
3438 if (add_to_page_cache_locked(page, mapping, page->index,
3439 GFP_KERNEL)) {
3440 __clear_page_locked(page);
3441 break;
3442 }
3443 list_move_tail(&page->lru, tmplist);
3444 (*bytes) += PAGE_CACHE_SIZE;
3445 expected_index++;
3446 (*nr_pages)++;
3447 }
3448 return rc;
3235} 3449}
3236 3450
3237static int cifs_readpages(struct file *file, struct address_space *mapping, 3451static int cifs_readpages(struct file *file, struct address_space *mapping,
@@ -3241,19 +3455,10 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
3241 struct list_head tmplist; 3455 struct list_head tmplist;
3242 struct cifsFileInfo *open_file = file->private_data; 3456 struct cifsFileInfo *open_file = file->private_data;
3243 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 3457 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3244 unsigned int rsize = cifs_sb->rsize; 3458 struct TCP_Server_Info *server;
3245 pid_t pid; 3459 pid_t pid;
3246 3460
3247 /* 3461 /*
3248 * Give up immediately if rsize is too small to read an entire page.
3249 * The VFS will fall back to readpage. We should never reach this
3250 * point however since we set ra_pages to 0 when the rsize is smaller
3251 * than a cache page.
3252 */
3253 if (unlikely(rsize < PAGE_CACHE_SIZE))
3254 return 0;
3255
3256 /*
3257 * Reads as many pages as possible from fscache. Returns -ENOBUFS 3462 * Reads as many pages as possible from fscache. Returns -ENOBUFS
3258 * immediately if the cookie is negative 3463 * immediately if the cookie is negative
3259 * 3464 *
@@ -3271,7 +3476,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
3271 pid = current->tgid; 3476 pid = current->tgid;
3272 3477
3273 rc = 0; 3478 rc = 0;
3274 INIT_LIST_HEAD(&tmplist); 3479 server = tlink_tcon(open_file->tlink)->ses->server;
3275 3480
3276 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n", 3481 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3277 __func__, file, mapping, num_pages); 3482 __func__, file, mapping, num_pages);
@@ -3288,58 +3493,35 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
3288 * the rdata->pages, then we want them in increasing order. 3493 * the rdata->pages, then we want them in increasing order.
3289 */ 3494 */
3290 while (!list_empty(page_list)) { 3495 while (!list_empty(page_list)) {
3291 unsigned int i; 3496 unsigned int i, nr_pages, bytes, rsize;
3292 unsigned int bytes = PAGE_CACHE_SIZE;
3293 unsigned int expected_index;
3294 unsigned int nr_pages = 1;
3295 loff_t offset; 3497 loff_t offset;
3296 struct page *page, *tpage; 3498 struct page *page, *tpage;
3297 struct cifs_readdata *rdata; 3499 struct cifs_readdata *rdata;
3500 unsigned credits;
3298 3501
3299 page = list_entry(page_list->prev, struct page, lru); 3502 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3503 &rsize, &credits);
3504 if (rc)
3505 break;
3300 3506
3301 /* 3507 /*
3302 * Lock the page and put it in the cache. Since no one else 3508 * Give up immediately if rsize is too small to read an entire
3303 * should have access to this page, we're safe to simply set 3509 * page. The VFS will fall back to readpage. We should never
3304 * PG_locked without checking it first. 3510 * reach this point however since we set ra_pages to 0 when the
3511 * rsize is smaller than a cache page.
3305 */ 3512 */
3306 __set_page_locked(page); 3513 if (unlikely(rsize < PAGE_CACHE_SIZE)) {
3307 rc = add_to_page_cache_locked(page, mapping, 3514 add_credits_and_wake_if(server, credits, 0);
3308 page->index, GFP_KERNEL); 3515 return 0;
3516 }
3309 3517
3310 /* give up if we can't stick it in the cache */ 3518 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3519 &nr_pages, &offset, &bytes);
3311 if (rc) { 3520 if (rc) {
3312 __clear_page_locked(page); 3521 add_credits_and_wake_if(server, credits, 0);
3313 break; 3522 break;
3314 } 3523 }
3315 3524
3316 /* move first page to the tmplist */
3317 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3318 list_move_tail(&page->lru, &tmplist);
3319
3320 /* now try and add more pages onto the request */
3321 expected_index = page->index + 1;
3322 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3323 /* discontinuity ? */
3324 if (page->index != expected_index)
3325 break;
3326
3327 /* would this page push the read over the rsize? */
3328 if (bytes + PAGE_CACHE_SIZE > rsize)
3329 break;
3330
3331 __set_page_locked(page);
3332 if (add_to_page_cache_locked(page, mapping,
3333 page->index, GFP_KERNEL)) {
3334 __clear_page_locked(page);
3335 break;
3336 }
3337 list_move_tail(&page->lru, &tmplist);
3338 bytes += PAGE_CACHE_SIZE;
3339 expected_index++;
3340 nr_pages++;
3341 }
3342
3343 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete); 3525 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3344 if (!rdata) { 3526 if (!rdata) {
3345 /* best to give up if we're out of mem */ 3527 /* best to give up if we're out of mem */
@@ -3350,6 +3532,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
3350 page_cache_release(page); 3532 page_cache_release(page);
3351 } 3533 }
3352 rc = -ENOMEM; 3534 rc = -ENOMEM;
3535 add_credits_and_wake_if(server, credits, 0);
3353 break; 3536 break;
3354 } 3537 }
3355 3538
@@ -3360,21 +3543,32 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
3360 rdata->pid = pid; 3543 rdata->pid = pid;
3361 rdata->pagesz = PAGE_CACHE_SIZE; 3544 rdata->pagesz = PAGE_CACHE_SIZE;
3362 rdata->read_into_pages = cifs_readpages_read_into_pages; 3545 rdata->read_into_pages = cifs_readpages_read_into_pages;
3546 rdata->credits = credits;
3363 3547
3364 list_for_each_entry_safe(page, tpage, &tmplist, lru) { 3548 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3365 list_del(&page->lru); 3549 list_del(&page->lru);
3366 rdata->pages[rdata->nr_pages++] = page; 3550 rdata->pages[rdata->nr_pages++] = page;
3367 } 3551 }
3368 3552
3369 rc = cifs_retry_async_readv(rdata); 3553 if (!rdata->cfile->invalidHandle ||
3370 if (rc != 0) { 3554 !cifs_reopen_file(rdata->cfile, true))
3555 rc = server->ops->async_readv(rdata);
3556 if (rc) {
3557 add_credits_and_wake_if(server, rdata->credits, 0);
3371 for (i = 0; i < rdata->nr_pages; i++) { 3558 for (i = 0; i < rdata->nr_pages; i++) {
3372 page = rdata->pages[i]; 3559 page = rdata->pages[i];
3373 lru_cache_add_file(page); 3560 lru_cache_add_file(page);
3374 unlock_page(page); 3561 unlock_page(page);
3375 page_cache_release(page); 3562 page_cache_release(page);
3563 if (rc == -EAGAIN)
3564 list_add_tail(&page->lru, &tmplist);
3376 } 3565 }
3377 kref_put(&rdata->refcount, cifs_readdata_release); 3566 kref_put(&rdata->refcount, cifs_readdata_release);
3567 if (rc == -EAGAIN) {
3568 /* Re-add pages to the page_list and retry */
3569 list_splice(&tmplist, page_list);
3570 continue;
3571 }
3378 break; 3572 break;
3379 } 3573 }
3380 3574
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 6bf55d0ed494..81340c6253eb 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -226,6 +226,15 @@ cifs_small_buf_release(void *buf_to_free)
226 return; 226 return;
227} 227}
228 228
229void
230free_rsp_buf(int resp_buftype, void *rsp)
231{
232 if (resp_buftype == CIFS_SMALL_BUFFER)
233 cifs_small_buf_release(rsp);
234 else if (resp_buftype == CIFS_LARGE_BUFFER)
235 cifs_buf_release(rsp);
236}
237
229/* NB: MID can not be set if treeCon not passed in, in that 238/* NB: MID can not be set if treeCon not passed in, in that
230 case it is responsbility of caller to set the mid */ 239 case it is responsbility of caller to set the mid */
231void 240void
@@ -414,7 +423,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
414 return true; 423 return true;
415 } 424 }
416 if (pSMBr->hdr.Status.CifsError) { 425 if (pSMBr->hdr.Status.CifsError) {
417 cifs_dbg(FYI, "notify err 0x%d\n", 426 cifs_dbg(FYI, "notify err 0x%x\n",
418 pSMBr->hdr.Status.CifsError); 427 pSMBr->hdr.Status.CifsError);
419 return true; 428 return true;
420 } 429 }
@@ -441,7 +450,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
441 if (pSMB->hdr.WordCount != 8) 450 if (pSMB->hdr.WordCount != 8)
442 return false; 451 return false;
443 452
444 cifs_dbg(FYI, "oplock type 0x%d level 0x%d\n", 453 cifs_dbg(FYI, "oplock type 0x%x level 0x%x\n",
445 pSMB->LockType, pSMB->OplockLevel); 454 pSMB->LockType, pSMB->OplockLevel);
446 if (!(pSMB->LockType & LOCKING_ANDX_OPLOCK_RELEASE)) 455 if (!(pSMB->LockType & LOCKING_ANDX_OPLOCK_RELEASE))
447 return false; 456 return false;
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index e87387dbf39f..39ee32688eac 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -520,382 +520,559 @@ select_sectype(struct TCP_Server_Info *server, enum securityEnum requested)
520 } 520 }
521} 521}
522 522
523int 523struct sess_data {
524CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, 524 unsigned int xid;
525 const struct nls_table *nls_cp) 525 struct cifs_ses *ses;
526 struct nls_table *nls_cp;
527 void (*func)(struct sess_data *);
528 int result;
529
530 /* we will send the SMB in three pieces:
531 * a fixed length beginning part, an optional
532 * SPNEGO blob (which can be zero length), and a
533 * last part which will include the strings
534 * and rest of bcc area. This allows us to avoid
535 * a large buffer 17K allocation
536 */
537 int buf0_type;
538 struct kvec iov[3];
539};
540
541static int
542sess_alloc_buffer(struct sess_data *sess_data, int wct)
526{ 543{
527 int rc = 0; 544 int rc;
528 int wct; 545 struct cifs_ses *ses = sess_data->ses;
529 struct smb_hdr *smb_buf; 546 struct smb_hdr *smb_buf;
530 char *bcc_ptr;
531 char *str_area;
532 SESSION_SETUP_ANDX *pSMB;
533 __u32 capabilities;
534 __u16 count;
535 int resp_buf_type;
536 struct kvec iov[3];
537 enum securityEnum type;
538 __u16 action, bytes_remaining;
539 struct key *spnego_key = NULL;
540 __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */
541 u16 blob_len;
542 char *ntlmsspblob = NULL;
543 547
544 if (ses == NULL) { 548 rc = small_smb_init_no_tc(SMB_COM_SESSION_SETUP_ANDX, wct, ses,
545 WARN(1, "%s: ses == NULL!", __func__); 549 (void **)&smb_buf);
546 return -EINVAL;
547 }
548 550
549 type = select_sectype(ses->server, ses->sectype); 551 if (rc)
550 cifs_dbg(FYI, "sess setup type %d\n", type); 552 return rc;
551 if (type == Unspecified) { 553
552 cifs_dbg(VFS, 554 sess_data->iov[0].iov_base = (char *)smb_buf;
553 "Unable to select appropriate authentication method!"); 555 sess_data->iov[0].iov_len = be32_to_cpu(smb_buf->smb_buf_length) + 4;
554 return -EINVAL; 556 /*
557 * This variable will be used to clear the buffer
558 * allocated above in case of any error in the calling function.
559 */
560 sess_data->buf0_type = CIFS_SMALL_BUFFER;
561
562 /* 2000 big enough to fit max user, domain, NOS name etc. */
563 sess_data->iov[2].iov_base = kmalloc(2000, GFP_KERNEL);
564 if (!sess_data->iov[2].iov_base) {
565 rc = -ENOMEM;
566 goto out_free_smb_buf;
555 } 567 }
556 568
557 if (type == RawNTLMSSP) { 569 return 0;
558 /* if memory allocation is successful, caller of this function 570
559 * frees it. 571out_free_smb_buf:
560 */ 572 kfree(smb_buf);
561 ses->ntlmssp = kmalloc(sizeof(struct ntlmssp_auth), GFP_KERNEL); 573 sess_data->iov[0].iov_base = NULL;
562 if (!ses->ntlmssp) 574 sess_data->iov[0].iov_len = 0;
563 return -ENOMEM; 575 sess_data->buf0_type = CIFS_NO_BUFFER;
564 ses->ntlmssp->sesskey_per_smbsess = false; 576 return rc;
577}
578
579static void
580sess_free_buffer(struct sess_data *sess_data)
581{
565 582
583 free_rsp_buf(sess_data->buf0_type, sess_data->iov[0].iov_base);
584 sess_data->buf0_type = CIFS_NO_BUFFER;
585 kfree(sess_data->iov[2].iov_base);
586}
587
588static int
589sess_establish_session(struct sess_data *sess_data)
590{
591 struct cifs_ses *ses = sess_data->ses;
592
593 mutex_lock(&ses->server->srv_mutex);
594 if (!ses->server->session_estab) {
595 if (ses->server->sign) {
596 ses->server->session_key.response =
597 kmemdup(ses->auth_key.response,
598 ses->auth_key.len, GFP_KERNEL);
599 if (!ses->server->session_key.response) {
600 mutex_unlock(&ses->server->srv_mutex);
601 return -ENOMEM;
602 }
603 ses->server->session_key.len =
604 ses->auth_key.len;
605 }
606 ses->server->sequence_number = 0x2;
607 ses->server->session_estab = true;
566 } 608 }
609 mutex_unlock(&ses->server->srv_mutex);
567 610
568ssetup_ntlmssp_authenticate: 611 cifs_dbg(FYI, "CIFS session established successfully\n");
569 if (phase == NtLmChallenge) 612 spin_lock(&GlobalMid_Lock);
570 phase = NtLmAuthenticate; /* if ntlmssp, now final phase */ 613 ses->status = CifsGood;
614 ses->need_reconnect = false;
615 spin_unlock(&GlobalMid_Lock);
571 616
572 if (type == LANMAN) { 617 return 0;
573#ifndef CONFIG_CIFS_WEAK_PW_HASH 618}
574 /* LANMAN and plaintext are less secure and off by default.
575 So we make this explicitly be turned on in kconfig (in the
576 build) and turned on at runtime (changed from the default)
577 in proc/fs/cifs or via mount parm. Unfortunately this is
578 needed for old Win (e.g. Win95), some obscure NAS and OS/2 */
579 return -EOPNOTSUPP;
580#endif
581 wct = 10; /* lanman 2 style sessionsetup */
582 } else if ((type == NTLM) || (type == NTLMv2)) {
583 /* For NTLMv2 failures eventually may need to retry NTLM */
584 wct = 13; /* old style NTLM sessionsetup */
585 } else /* same size: negotiate or auth, NTLMSSP or extended security */
586 wct = 12;
587 619
588 rc = small_smb_init_no_tc(SMB_COM_SESSION_SETUP_ANDX, wct, ses, 620static int
589 (void **)&smb_buf); 621sess_sendreceive(struct sess_data *sess_data)
590 if (rc) 622{
591 return rc; 623 int rc;
624 struct smb_hdr *smb_buf = (struct smb_hdr *) sess_data->iov[0].iov_base;
625 __u16 count;
592 626
593 pSMB = (SESSION_SETUP_ANDX *)smb_buf; 627 count = sess_data->iov[1].iov_len + sess_data->iov[2].iov_len;
628 smb_buf->smb_buf_length =
629 cpu_to_be32(be32_to_cpu(smb_buf->smb_buf_length) + count);
630 put_bcc(count, smb_buf);
631
632 rc = SendReceive2(sess_data->xid, sess_data->ses,
633 sess_data->iov, 3 /* num_iovecs */,
634 &sess_data->buf0_type,
635 CIFS_LOG_ERROR);
636
637 return rc;
638}
594 639
640/*
641 * LANMAN and plaintext are less secure and off by default.
642 * So we make this explicitly be turned on in kconfig (in the
643 * build) and turned on at runtime (changed from the default)
644 * in proc/fs/cifs or via mount parm. Unfortunately this is
645 * needed for old Win (e.g. Win95), some obscure NAS and OS/2
646 */
647#ifdef CONFIG_CIFS_WEAK_PW_HASH
648static void
649sess_auth_lanman(struct sess_data *sess_data)
650{
651 int rc = 0;
652 struct smb_hdr *smb_buf;
653 SESSION_SETUP_ANDX *pSMB;
654 char *bcc_ptr;
655 struct cifs_ses *ses = sess_data->ses;
656 char lnm_session_key[CIFS_AUTH_RESP_SIZE];
657 __u32 capabilities;
658 __u16 bytes_remaining;
659
660 /* lanman 2 style sessionsetup */
661 /* wct = 10 */
662 rc = sess_alloc_buffer(sess_data, 10);
663 if (rc)
664 goto out;
665
666 pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
667 bcc_ptr = sess_data->iov[2].iov_base;
595 capabilities = cifs_ssetup_hdr(ses, pSMB); 668 capabilities = cifs_ssetup_hdr(ses, pSMB);
596 669
597 /* we will send the SMB in three pieces: 670 pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE;
598 a fixed length beginning part, an optional
599 SPNEGO blob (which can be zero length), and a
600 last part which will include the strings
601 and rest of bcc area. This allows us to avoid
602 a large buffer 17K allocation */
603 iov[0].iov_base = (char *)pSMB;
604 iov[0].iov_len = be32_to_cpu(smb_buf->smb_buf_length) + 4;
605
606 /* setting this here allows the code at the end of the function
607 to free the request buffer if there's an error */
608 resp_buf_type = CIFS_SMALL_BUFFER;
609 671
610 /* 2000 big enough to fit max user, domain, NOS name etc. */ 672 /* no capabilities flags in old lanman negotiation */
611 str_area = kmalloc(2000, GFP_KERNEL); 673 pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE);
612 if (str_area == NULL) {
613 rc = -ENOMEM;
614 goto ssetup_exit;
615 }
616 bcc_ptr = str_area;
617 674
618 iov[1].iov_base = NULL; 675 /* Calculate hash with password and copy into bcc_ptr.
619 iov[1].iov_len = 0; 676 * Encryption Key (stored as in cryptkey) gets used if the
677 * security mode bit in Negottiate Protocol response states
678 * to use challenge/response method (i.e. Password bit is 1).
679 */
680 rc = calc_lanman_hash(ses->password, ses->server->cryptkey,
681 ses->server->sec_mode & SECMODE_PW_ENCRYPT ?
682 true : false, lnm_session_key);
620 683
621 if (type == LANMAN) { 684 memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE);
622#ifdef CONFIG_CIFS_WEAK_PW_HASH 685 bcc_ptr += CIFS_AUTH_RESP_SIZE;
623 char lnm_session_key[CIFS_AUTH_RESP_SIZE]; 686
687 /*
688 * can not sign if LANMAN negotiated so no need
689 * to calculate signing key? but what if server
690 * changed to do higher than lanman dialect and
691 * we reconnected would we ever calc signing_key?
692 */
624 693
625 pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE; 694 cifs_dbg(FYI, "Negotiating LANMAN setting up strings\n");
695 /* Unicode not allowed for LANMAN dialects */
696 ascii_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp);
626 697
627 /* no capabilities flags in old lanman negotiation */ 698 sess_data->iov[2].iov_len = (long) bcc_ptr -
699 (long) sess_data->iov[2].iov_base;
628 700
629 pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE); 701 rc = sess_sendreceive(sess_data);
702 if (rc)
703 goto out;
630 704
631 /* Calculate hash with password and copy into bcc_ptr. 705 pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
632 * Encryption Key (stored as in cryptkey) gets used if the 706 smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base;
633 * security mode bit in Negottiate Protocol response states
634 * to use challenge/response method (i.e. Password bit is 1).
635 */
636 707
637 rc = calc_lanman_hash(ses->password, ses->server->cryptkey, 708 /* lanman response has a word count of 3 */
638 ses->server->sec_mode & SECMODE_PW_ENCRYPT ? 709 if (smb_buf->WordCount != 3) {
639 true : false, lnm_session_key); 710 rc = -EIO;
711 cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount);
712 goto out;
713 }
640 714
641 memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE); 715 if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN)
642 bcc_ptr += CIFS_AUTH_RESP_SIZE; 716 cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */
717
718 ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */
719 cifs_dbg(FYI, "UID = %llu\n", ses->Suid);
643 720
644 /* can not sign if LANMAN negotiated so no need 721 bytes_remaining = get_bcc(smb_buf);
645 to calculate signing key? but what if server 722 bcc_ptr = pByteArea(smb_buf);
646 changed to do higher than lanman dialect and
647 we reconnected would we ever calc signing_key? */
648 723
649 cifs_dbg(FYI, "Negotiating LANMAN setting up strings\n"); 724 /* BB check if Unicode and decode strings */
650 /* Unicode not allowed for LANMAN dialects */ 725 if (bytes_remaining == 0) {
651 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); 726 /* no string area to decode, do nothing */
727 } else if (smb_buf->Flags2 & SMBFLG2_UNICODE) {
728 /* unicode string area must be word-aligned */
729 if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) {
730 ++bcc_ptr;
731 --bytes_remaining;
732 }
733 decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses,
734 sess_data->nls_cp);
735 } else {
736 decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses,
737 sess_data->nls_cp);
738 }
739
740 rc = sess_establish_session(sess_data);
741out:
742 sess_data->result = rc;
743 sess_data->func = NULL;
744 sess_free_buffer(sess_data);
745}
746
747#else
748
749static void
750sess_auth_lanman(struct sess_data *sess_data)
751{
752 sess_data->result = -EOPNOTSUPP;
753 sess_data->func = NULL;
754}
652#endif 755#endif
653 } else if (type == NTLM) { 756
654 pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities); 757static void
655 pSMB->req_no_secext.CaseInsensitivePasswordLength = 758sess_auth_ntlm(struct sess_data *sess_data)
759{
760 int rc = 0;
761 struct smb_hdr *smb_buf;
762 SESSION_SETUP_ANDX *pSMB;
763 char *bcc_ptr;
764 struct cifs_ses *ses = sess_data->ses;
765 __u32 capabilities;
766 __u16 bytes_remaining;
767
768 /* old style NTLM sessionsetup */
769 /* wct = 13 */
770 rc = sess_alloc_buffer(sess_data, 13);
771 if (rc)
772 goto out;
773
774 pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
775 bcc_ptr = sess_data->iov[2].iov_base;
776 capabilities = cifs_ssetup_hdr(ses, pSMB);
777
778 pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
779 pSMB->req_no_secext.CaseInsensitivePasswordLength =
656 cpu_to_le16(CIFS_AUTH_RESP_SIZE); 780 cpu_to_le16(CIFS_AUTH_RESP_SIZE);
657 pSMB->req_no_secext.CaseSensitivePasswordLength = 781 pSMB->req_no_secext.CaseSensitivePasswordLength =
658 cpu_to_le16(CIFS_AUTH_RESP_SIZE); 782 cpu_to_le16(CIFS_AUTH_RESP_SIZE);
659 783
660 /* calculate ntlm response and session key */ 784 /* calculate ntlm response and session key */
661 rc = setup_ntlm_response(ses, nls_cp); 785 rc = setup_ntlm_response(ses, sess_data->nls_cp);
662 if (rc) { 786 if (rc) {
663 cifs_dbg(VFS, "Error %d during NTLM authentication\n", 787 cifs_dbg(VFS, "Error %d during NTLM authentication\n",
664 rc); 788 rc);
665 goto ssetup_exit; 789 goto out;
666 } 790 }
667 791
668 /* copy ntlm response */ 792 /* copy ntlm response */
669 memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE, 793 memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
670 CIFS_AUTH_RESP_SIZE); 794 CIFS_AUTH_RESP_SIZE);
671 bcc_ptr += CIFS_AUTH_RESP_SIZE; 795 bcc_ptr += CIFS_AUTH_RESP_SIZE;
672 memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE, 796 memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
673 CIFS_AUTH_RESP_SIZE); 797 CIFS_AUTH_RESP_SIZE);
674 bcc_ptr += CIFS_AUTH_RESP_SIZE; 798 bcc_ptr += CIFS_AUTH_RESP_SIZE;
675 799
676 if (ses->capabilities & CAP_UNICODE) { 800 if (ses->capabilities & CAP_UNICODE) {
677 /* unicode strings must be word aligned */ 801 /* unicode strings must be word aligned */
678 if (iov[0].iov_len % 2) { 802 if (sess_data->iov[0].iov_len % 2) {
679 *bcc_ptr = 0; 803 *bcc_ptr = 0;
680 bcc_ptr++; 804 bcc_ptr++;
681 }
682 unicode_ssetup_strings(&bcc_ptr, ses, nls_cp);
683 } else
684 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
685 } else if (type == NTLMv2) {
686 pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
687
688 /* LM2 password would be here if we supported it */
689 pSMB->req_no_secext.CaseInsensitivePasswordLength = 0;
690
691 /* calculate nlmv2 response and session key */
692 rc = setup_ntlmv2_rsp(ses, nls_cp);
693 if (rc) {
694 cifs_dbg(VFS, "Error %d during NTLMv2 authentication\n",
695 rc);
696 goto ssetup_exit;
697 } 805 }
698 memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE, 806 unicode_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp);
699 ses->auth_key.len - CIFS_SESS_KEY_SIZE); 807 } else {
700 bcc_ptr += ses->auth_key.len - CIFS_SESS_KEY_SIZE; 808 ascii_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp);
701 809 }
702 /* set case sensitive password length after tilen may get
703 * assigned, tilen is 0 otherwise.
704 */
705 pSMB->req_no_secext.CaseSensitivePasswordLength =
706 cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
707 810
708 if (ses->capabilities & CAP_UNICODE) {
709 if (iov[0].iov_len % 2) {
710 *bcc_ptr = 0;
711 bcc_ptr++;
712 }
713 unicode_ssetup_strings(&bcc_ptr, ses, nls_cp);
714 } else
715 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
716 } else if (type == Kerberos) {
717#ifdef CONFIG_CIFS_UPCALL
718 struct cifs_spnego_msg *msg;
719 811
720 spnego_key = cifs_get_spnego_key(ses); 812 sess_data->iov[2].iov_len = (long) bcc_ptr -
721 if (IS_ERR(spnego_key)) { 813 (long) sess_data->iov[2].iov_base;
722 rc = PTR_ERR(spnego_key);
723 spnego_key = NULL;
724 goto ssetup_exit;
725 }
726 814
727 msg = spnego_key->payload.data; 815 rc = sess_sendreceive(sess_data);
728 /* check version field to make sure that cifs.upcall is 816 if (rc)
729 sending us a response in an expected form */ 817 goto out;
730 if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) {
731 cifs_dbg(VFS, "incorrect version of cifs.upcall "
732 "expected %d but got %d)",
733 CIFS_SPNEGO_UPCALL_VERSION, msg->version);
734 rc = -EKEYREJECTED;
735 goto ssetup_exit;
736 }
737 818
738 ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len, 819 pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
739 GFP_KERNEL); 820 smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base;
740 if (!ses->auth_key.response) {
741 cifs_dbg(VFS,
742 "Kerberos can't allocate (%u bytes) memory",
743 msg->sesskey_len);
744 rc = -ENOMEM;
745 goto ssetup_exit;
746 }
747 ses->auth_key.len = msg->sesskey_len;
748
749 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
750 capabilities |= CAP_EXTENDED_SECURITY;
751 pSMB->req.Capabilities = cpu_to_le32(capabilities);
752 iov[1].iov_base = msg->data + msg->sesskey_len;
753 iov[1].iov_len = msg->secblob_len;
754 pSMB->req.SecurityBlobLength = cpu_to_le16(iov[1].iov_len);
755
756 if (ses->capabilities & CAP_UNICODE) {
757 /* unicode strings must be word aligned */
758 if ((iov[0].iov_len + iov[1].iov_len) % 2) {
759 *bcc_ptr = 0;
760 bcc_ptr++;
761 }
762 unicode_oslm_strings(&bcc_ptr, nls_cp);
763 unicode_domain_string(&bcc_ptr, ses, nls_cp);
764 } else
765 /* BB: is this right? */
766 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
767#else /* ! CONFIG_CIFS_UPCALL */
768 cifs_dbg(VFS, "Kerberos negotiated but upcall support disabled!\n");
769 rc = -ENOSYS;
770 goto ssetup_exit;
771#endif /* CONFIG_CIFS_UPCALL */
772 } else if (type == RawNTLMSSP) {
773 if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) {
774 cifs_dbg(VFS, "NTLMSSP requires Unicode support\n");
775 rc = -ENOSYS;
776 goto ssetup_exit;
777 }
778 821
779 cifs_dbg(FYI, "ntlmssp session setup phase %d\n", phase); 822 if (smb_buf->WordCount != 3) {
780 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; 823 rc = -EIO;
781 capabilities |= CAP_EXTENDED_SECURITY; 824 cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount);
782 pSMB->req.Capabilities |= cpu_to_le32(capabilities); 825 goto out;
783 switch(phase) { 826 }
784 case NtLmNegotiate:
785 build_ntlmssp_negotiate_blob(
786 pSMB->req.SecurityBlob, ses);
787 iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE);
788 iov[1].iov_base = pSMB->req.SecurityBlob;
789 pSMB->req.SecurityBlobLength =
790 cpu_to_le16(sizeof(NEGOTIATE_MESSAGE));
791 break;
792 case NtLmAuthenticate:
793 /*
794 * 5 is an empirical value, large enough to hold
795 * authenticate message plus max 10 of av paris,
796 * domain, user, workstation names, flags, etc.
797 */
798 ntlmsspblob = kzalloc(
799 5*sizeof(struct _AUTHENTICATE_MESSAGE),
800 GFP_KERNEL);
801 if (!ntlmsspblob) {
802 rc = -ENOMEM;
803 goto ssetup_exit;
804 }
805 827
806 rc = build_ntlmssp_auth_blob(ntlmsspblob, 828 if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN)
807 &blob_len, ses, nls_cp); 829 cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */
808 if (rc) 830
809 goto ssetup_exit; 831 ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */
810 iov[1].iov_len = blob_len; 832 cifs_dbg(FYI, "UID = %llu\n", ses->Suid);
811 iov[1].iov_base = ntlmsspblob; 833
812 pSMB->req.SecurityBlobLength = cpu_to_le16(blob_len); 834 bytes_remaining = get_bcc(smb_buf);
813 /* 835 bcc_ptr = pByteArea(smb_buf);
814 * Make sure that we tell the server that we are using 836
815 * the uid that it just gave us back on the response 837 /* BB check if Unicode and decode strings */
816 * (challenge) 838 if (bytes_remaining == 0) {
817 */ 839 /* no string area to decode, do nothing */
818 smb_buf->Uid = ses->Suid; 840 } else if (smb_buf->Flags2 & SMBFLG2_UNICODE) {
819 break; 841 /* unicode string area must be word-aligned */
820 default: 842 if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) {
821 cifs_dbg(VFS, "invalid phase %d\n", phase); 843 ++bcc_ptr;
822 rc = -ENOSYS; 844 --bytes_remaining;
823 goto ssetup_exit;
824 } 845 }
825 /* unicode strings must be word aligned */ 846 decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses,
826 if ((iov[0].iov_len + iov[1].iov_len) % 2) { 847 sess_data->nls_cp);
848 } else {
849 decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses,
850 sess_data->nls_cp);
851 }
852
853 rc = sess_establish_session(sess_data);
854out:
855 sess_data->result = rc;
856 sess_data->func = NULL;
857 sess_free_buffer(sess_data);
858 kfree(ses->auth_key.response);
859 ses->auth_key.response = NULL;
860}
861
862static void
863sess_auth_ntlmv2(struct sess_data *sess_data)
864{
865 int rc = 0;
866 struct smb_hdr *smb_buf;
867 SESSION_SETUP_ANDX *pSMB;
868 char *bcc_ptr;
869 struct cifs_ses *ses = sess_data->ses;
870 __u32 capabilities;
871 __u16 bytes_remaining;
872
873 /* old style NTLM sessionsetup */
874 /* wct = 13 */
875 rc = sess_alloc_buffer(sess_data, 13);
876 if (rc)
877 goto out;
878
879 pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
880 bcc_ptr = sess_data->iov[2].iov_base;
881 capabilities = cifs_ssetup_hdr(ses, pSMB);
882
883 pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
884
885 /* LM2 password would be here if we supported it */
886 pSMB->req_no_secext.CaseInsensitivePasswordLength = 0;
887
888 /* calculate nlmv2 response and session key */
889 rc = setup_ntlmv2_rsp(ses, sess_data->nls_cp);
890 if (rc) {
891 cifs_dbg(VFS, "Error %d during NTLMv2 authentication\n", rc);
892 goto out;
893 }
894
895 memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
896 ses->auth_key.len - CIFS_SESS_KEY_SIZE);
897 bcc_ptr += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
898
899 /* set case sensitive password length after tilen may get
900 * assigned, tilen is 0 otherwise.
901 */
902 pSMB->req_no_secext.CaseSensitivePasswordLength =
903 cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
904
905 if (ses->capabilities & CAP_UNICODE) {
906 if (sess_data->iov[0].iov_len % 2) {
827 *bcc_ptr = 0; 907 *bcc_ptr = 0;
828 bcc_ptr++; 908 bcc_ptr++;
829 } 909 }
830 unicode_oslm_strings(&bcc_ptr, nls_cp); 910 unicode_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp);
831 } else { 911 } else {
832 cifs_dbg(VFS, "secType %d not supported!\n", type); 912 ascii_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp);
833 rc = -ENOSYS;
834 goto ssetup_exit;
835 } 913 }
836 914
837 iov[2].iov_base = str_area;
838 iov[2].iov_len = (long) bcc_ptr - (long) str_area;
839 915
840 count = iov[1].iov_len + iov[2].iov_len; 916 sess_data->iov[2].iov_len = (long) bcc_ptr -
841 smb_buf->smb_buf_length = 917 (long) sess_data->iov[2].iov_base;
842 cpu_to_be32(be32_to_cpu(smb_buf->smb_buf_length) + count);
843 918
844 put_bcc(count, smb_buf); 919 rc = sess_sendreceive(sess_data);
920 if (rc)
921 goto out;
845 922
846 rc = SendReceive2(xid, ses, iov, 3 /* num_iovecs */, &resp_buf_type, 923 pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
847 CIFS_LOG_ERROR); 924 smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base;
848 /* SMB request buf freed in SendReceive2 */ 925
926 if (smb_buf->WordCount != 3) {
927 rc = -EIO;
928 cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount);
929 goto out;
930 }
931
932 if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN)
933 cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */
934
935 ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */
936 cifs_dbg(FYI, "UID = %llu\n", ses->Suid);
849 937
850 pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base; 938 bytes_remaining = get_bcc(smb_buf);
851 smb_buf = (struct smb_hdr *)iov[0].iov_base; 939 bcc_ptr = pByteArea(smb_buf);
852 940
853 if ((type == RawNTLMSSP) && (resp_buf_type != CIFS_NO_BUFFER) && 941 /* BB check if Unicode and decode strings */
854 (smb_buf->Status.CifsError == 942 if (bytes_remaining == 0) {
855 cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED))) { 943 /* no string area to decode, do nothing */
856 if (phase != NtLmNegotiate) { 944 } else if (smb_buf->Flags2 & SMBFLG2_UNICODE) {
857 cifs_dbg(VFS, "Unexpected more processing error\n"); 945 /* unicode string area must be word-aligned */
858 goto ssetup_exit; 946 if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) {
947 ++bcc_ptr;
948 --bytes_remaining;
859 } 949 }
860 /* NTLMSSP Negotiate sent now processing challenge (response) */ 950 decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses,
861 phase = NtLmChallenge; /* process ntlmssp challenge */ 951 sess_data->nls_cp);
862 rc = 0; /* MORE_PROC rc is not an error here, but expected */ 952 } else {
953 decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses,
954 sess_data->nls_cp);
863 } 955 }
956
957 rc = sess_establish_session(sess_data);
958out:
959 sess_data->result = rc;
960 sess_data->func = NULL;
961 sess_free_buffer(sess_data);
962 kfree(ses->auth_key.response);
963 ses->auth_key.response = NULL;
964}
965
966#ifdef CONFIG_CIFS_UPCALL
967static void
968sess_auth_kerberos(struct sess_data *sess_data)
969{
970 int rc = 0;
971 struct smb_hdr *smb_buf;
972 SESSION_SETUP_ANDX *pSMB;
973 char *bcc_ptr;
974 struct cifs_ses *ses = sess_data->ses;
975 __u32 capabilities;
976 __u16 bytes_remaining;
977 struct key *spnego_key = NULL;
978 struct cifs_spnego_msg *msg;
979 u16 blob_len;
980
981 /* extended security */
982 /* wct = 12 */
983 rc = sess_alloc_buffer(sess_data, 12);
864 if (rc) 984 if (rc)
865 goto ssetup_exit; 985 goto out;
866 986
867 if ((smb_buf->WordCount != 3) && (smb_buf->WordCount != 4)) { 987 pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
988 bcc_ptr = sess_data->iov[2].iov_base;
989 capabilities = cifs_ssetup_hdr(ses, pSMB);
990
991 spnego_key = cifs_get_spnego_key(ses);
992 if (IS_ERR(spnego_key)) {
993 rc = PTR_ERR(spnego_key);
994 spnego_key = NULL;
995 goto out;
996 }
997
998 msg = spnego_key->payload.data;
999 /*
1000 * check version field to make sure that cifs.upcall is
1001 * sending us a response in an expected form
1002 */
1003 if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) {
1004 cifs_dbg(VFS,
1005 "incorrect version of cifs.upcall (expected %d but got %d)",
1006 CIFS_SPNEGO_UPCALL_VERSION, msg->version);
1007 rc = -EKEYREJECTED;
1008 goto out_put_spnego_key;
1009 }
1010
1011 ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len,
1012 GFP_KERNEL);
1013 if (!ses->auth_key.response) {
1014 cifs_dbg(VFS, "Kerberos can't allocate (%u bytes) memory",
1015 msg->sesskey_len);
1016 rc = -ENOMEM;
1017 goto out_put_spnego_key;
1018 }
1019 ses->auth_key.len = msg->sesskey_len;
1020
1021 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
1022 capabilities |= CAP_EXTENDED_SECURITY;
1023 pSMB->req.Capabilities = cpu_to_le32(capabilities);
1024 sess_data->iov[1].iov_base = msg->data + msg->sesskey_len;
1025 sess_data->iov[1].iov_len = msg->secblob_len;
1026 pSMB->req.SecurityBlobLength = cpu_to_le16(sess_data->iov[1].iov_len);
1027
1028 if (ses->capabilities & CAP_UNICODE) {
1029 /* unicode strings must be word aligned */
1030 if ((sess_data->iov[0].iov_len
1031 + sess_data->iov[1].iov_len) % 2) {
1032 *bcc_ptr = 0;
1033 bcc_ptr++;
1034 }
1035 unicode_oslm_strings(&bcc_ptr, sess_data->nls_cp);
1036 unicode_domain_string(&bcc_ptr, ses, sess_data->nls_cp);
1037 } else {
1038 /* BB: is this right? */
1039 ascii_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp);
1040 }
1041
1042 sess_data->iov[2].iov_len = (long) bcc_ptr -
1043 (long) sess_data->iov[2].iov_base;
1044
1045 rc = sess_sendreceive(sess_data);
1046 if (rc)
1047 goto out_put_spnego_key;
1048
1049 pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
1050 smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base;
1051
1052 if (smb_buf->WordCount != 4) {
868 rc = -EIO; 1053 rc = -EIO;
869 cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount); 1054 cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount);
870 goto ssetup_exit; 1055 goto out_put_spnego_key;
871 } 1056 }
872 action = le16_to_cpu(pSMB->resp.Action); 1057
873 if (action & GUEST_LOGIN) 1058 if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN)
874 cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */ 1059 cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */
1060
875 ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */ 1061 ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */
876 cifs_dbg(FYI, "UID = %llu\n", ses->Suid); 1062 cifs_dbg(FYI, "UID = %llu\n", ses->Suid);
877 /* response can have either 3 or 4 word count - Samba sends 3 */ 1063
878 /* and lanman response is 3 */
879 bytes_remaining = get_bcc(smb_buf); 1064 bytes_remaining = get_bcc(smb_buf);
880 bcc_ptr = pByteArea(smb_buf); 1065 bcc_ptr = pByteArea(smb_buf);
881 1066
882 if (smb_buf->WordCount == 4) { 1067 blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength);
883 blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength); 1068 if (blob_len > bytes_remaining) {
884 if (blob_len > bytes_remaining) { 1069 cifs_dbg(VFS, "bad security blob length %d\n",
885 cifs_dbg(VFS, "bad security blob length %d\n", 1070 blob_len);
886 blob_len); 1071 rc = -EINVAL;
887 rc = -EINVAL; 1072 goto out_put_spnego_key;
888 goto ssetup_exit;
889 }
890 if (phase == NtLmChallenge) {
891 rc = decode_ntlmssp_challenge(bcc_ptr, blob_len, ses);
892 /* now goto beginning for ntlmssp authenticate phase */
893 if (rc)
894 goto ssetup_exit;
895 }
896 bcc_ptr += blob_len;
897 bytes_remaining -= blob_len;
898 } 1073 }
1074 bcc_ptr += blob_len;
1075 bytes_remaining -= blob_len;
899 1076
900 /* BB check if Unicode and decode strings */ 1077 /* BB check if Unicode and decode strings */
901 if (bytes_remaining == 0) { 1078 if (bytes_remaining == 0) {
@@ -906,60 +1083,371 @@ ssetup_ntlmssp_authenticate:
906 ++bcc_ptr; 1083 ++bcc_ptr;
907 --bytes_remaining; 1084 --bytes_remaining;
908 } 1085 }
909 decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses, nls_cp); 1086 decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses,
1087 sess_data->nls_cp);
910 } else { 1088 } else {
911 decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses, nls_cp); 1089 decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses,
1090 sess_data->nls_cp);
912 } 1091 }
913 1092
914ssetup_exit: 1093 rc = sess_establish_session(sess_data);
915 if (spnego_key) { 1094out_put_spnego_key:
916 key_invalidate(spnego_key); 1095 key_invalidate(spnego_key);
917 key_put(spnego_key); 1096 key_put(spnego_key);
1097out:
1098 sess_data->result = rc;
1099 sess_data->func = NULL;
1100 sess_free_buffer(sess_data);
1101 kfree(ses->auth_key.response);
1102 ses->auth_key.response = NULL;
1103}
1104
1105#else
1106
1107static void
1108sess_auth_kerberos(struct sess_data *sess_data)
1109{
1110 cifs_dbg(VFS, "Kerberos negotiated but upcall support disabled!\n");
1111 sess_data->result = -ENOSYS;
1112 sess_data->func = NULL;
1113}
1114#endif /* ! CONFIG_CIFS_UPCALL */
1115
1116/*
1117 * The required kvec buffers have to be allocated before calling this
1118 * function.
1119 */
1120static int
1121_sess_auth_rawntlmssp_assemble_req(struct sess_data *sess_data)
1122{
1123 struct smb_hdr *smb_buf;
1124 SESSION_SETUP_ANDX *pSMB;
1125 struct cifs_ses *ses = sess_data->ses;
1126 __u32 capabilities;
1127 char *bcc_ptr;
1128
1129 pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
1130 smb_buf = (struct smb_hdr *)pSMB;
1131
1132 capabilities = cifs_ssetup_hdr(ses, pSMB);
1133 if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) {
1134 cifs_dbg(VFS, "NTLMSSP requires Unicode support\n");
1135 return -ENOSYS;
918 } 1136 }
919 kfree(str_area);
920 kfree(ntlmsspblob);
921 ntlmsspblob = NULL;
922 if (resp_buf_type == CIFS_SMALL_BUFFER) {
923 cifs_dbg(FYI, "ssetup freeing small buf %p\n", iov[0].iov_base);
924 cifs_small_buf_release(iov[0].iov_base);
925 } else if (resp_buf_type == CIFS_LARGE_BUFFER)
926 cifs_buf_release(iov[0].iov_base);
927 1137
928 /* if ntlmssp, and negotiate succeeded, proceed to authenticate phase */ 1138 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
929 if ((phase == NtLmChallenge) && (rc == 0)) 1139 capabilities |= CAP_EXTENDED_SECURITY;
930 goto ssetup_ntlmssp_authenticate; 1140 pSMB->req.Capabilities |= cpu_to_le32(capabilities);
1141
1142 bcc_ptr = sess_data->iov[2].iov_base;
1143 /* unicode strings must be word aligned */
1144 if ((sess_data->iov[0].iov_len + sess_data->iov[1].iov_len) % 2) {
1145 *bcc_ptr = 0;
1146 bcc_ptr++;
1147 }
1148 unicode_oslm_strings(&bcc_ptr, sess_data->nls_cp);
1149
1150 sess_data->iov[2].iov_len = (long) bcc_ptr -
1151 (long) sess_data->iov[2].iov_base;
1152
1153 return 0;
1154}
1155
1156static void
1157sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data);
1158
1159static void
1160sess_auth_rawntlmssp_negotiate(struct sess_data *sess_data)
1161{
1162 int rc;
1163 struct smb_hdr *smb_buf;
1164 SESSION_SETUP_ANDX *pSMB;
1165 struct cifs_ses *ses = sess_data->ses;
1166 __u16 bytes_remaining;
1167 char *bcc_ptr;
1168 u16 blob_len;
1169
1170 cifs_dbg(FYI, "rawntlmssp session setup negotiate phase\n");
1171
1172 /*
1173 * if memory allocation is successful, caller of this function
1174 * frees it.
1175 */
1176 ses->ntlmssp = kmalloc(sizeof(struct ntlmssp_auth), GFP_KERNEL);
1177 if (!ses->ntlmssp) {
1178 rc = -ENOMEM;
1179 goto out;
1180 }
1181 ses->ntlmssp->sesskey_per_smbsess = false;
1182
1183 /* wct = 12 */
1184 rc = sess_alloc_buffer(sess_data, 12);
1185 if (rc)
1186 goto out;
1187
1188 pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
1189
1190 /* Build security blob before we assemble the request */
1191 build_ntlmssp_negotiate_blob(pSMB->req.SecurityBlob, ses);
1192 sess_data->iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE);
1193 sess_data->iov[1].iov_base = pSMB->req.SecurityBlob;
1194 pSMB->req.SecurityBlobLength = cpu_to_le16(sizeof(NEGOTIATE_MESSAGE));
1195
1196 rc = _sess_auth_rawntlmssp_assemble_req(sess_data);
1197 if (rc)
1198 goto out;
1199
1200 rc = sess_sendreceive(sess_data);
1201
1202 pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
1203 smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base;
1204
1205 /* If true, rc here is expected and not an error */
1206 if (sess_data->buf0_type != CIFS_NO_BUFFER &&
1207 smb_buf->Status.CifsError ==
1208 cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED))
1209 rc = 0;
1210
1211 if (rc)
1212 goto out;
1213
1214 cifs_dbg(FYI, "rawntlmssp session setup challenge phase\n");
1215
1216 if (smb_buf->WordCount != 4) {
1217 rc = -EIO;
1218 cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount);
1219 goto out;
1220 }
1221
1222 ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */
1223 cifs_dbg(FYI, "UID = %llu\n", ses->Suid);
1224
1225 bytes_remaining = get_bcc(smb_buf);
1226 bcc_ptr = pByteArea(smb_buf);
1227
1228 blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength);
1229 if (blob_len > bytes_remaining) {
1230 cifs_dbg(VFS, "bad security blob length %d\n",
1231 blob_len);
1232 rc = -EINVAL;
1233 goto out;
1234 }
1235
1236 rc = decode_ntlmssp_challenge(bcc_ptr, blob_len, ses);
1237out:
1238 sess_free_buffer(sess_data);
931 1239
932 if (!rc) { 1240 if (!rc) {
933 mutex_lock(&ses->server->srv_mutex); 1241 sess_data->func = sess_auth_rawntlmssp_authenticate;
934 if (!ses->server->session_estab) { 1242 return;
935 if (ses->server->sign) { 1243 }
936 ses->server->session_key.response = 1244
937 kmemdup(ses->auth_key.response, 1245 /* Else error. Cleanup */
938 ses->auth_key.len, GFP_KERNEL); 1246 kfree(ses->auth_key.response);
939 if (!ses->server->session_key.response) { 1247 ses->auth_key.response = NULL;
940 rc = -ENOMEM; 1248 kfree(ses->ntlmssp);
941 mutex_unlock(&ses->server->srv_mutex); 1249 ses->ntlmssp = NULL;
942 goto keycp_exit; 1250
943 } 1251 sess_data->func = NULL;
944 ses->server->session_key.len = 1252 sess_data->result = rc;
945 ses->auth_key.len; 1253}
946 }
947 ses->server->sequence_number = 0x2;
948 ses->server->session_estab = true;
949 }
950 mutex_unlock(&ses->server->srv_mutex);
951 1254
952 cifs_dbg(FYI, "CIFS session established successfully\n"); 1255static void
953 spin_lock(&GlobalMid_Lock); 1256sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data)
954 ses->status = CifsGood; 1257{
955 ses->need_reconnect = false; 1258 int rc;
956 spin_unlock(&GlobalMid_Lock); 1259 struct smb_hdr *smb_buf;
1260 SESSION_SETUP_ANDX *pSMB;
1261 struct cifs_ses *ses = sess_data->ses;
1262 __u16 bytes_remaining;
1263 char *bcc_ptr;
1264 char *ntlmsspblob = NULL;
1265 u16 blob_len;
1266
1267 cifs_dbg(FYI, "rawntlmssp session setup authenticate phase\n");
1268
1269 /* wct = 12 */
1270 rc = sess_alloc_buffer(sess_data, 12);
1271 if (rc)
1272 goto out;
1273
1274 /* Build security blob before we assemble the request */
1275 pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
1276 smb_buf = (struct smb_hdr *)pSMB;
1277 /*
1278 * 5 is an empirical value, large enough to hold
1279 * authenticate message plus max 10 of av paris,
1280 * domain, user, workstation names, flags, etc.
1281 */
1282 ntlmsspblob = kzalloc(5*sizeof(struct _AUTHENTICATE_MESSAGE),
1283 GFP_KERNEL);
1284 if (!ntlmsspblob) {
1285 rc = -ENOMEM;
1286 goto out;
957 } 1287 }
958 1288
959keycp_exit: 1289 rc = build_ntlmssp_auth_blob(ntlmsspblob,
1290 &blob_len, ses, sess_data->nls_cp);
1291 if (rc)
1292 goto out_free_ntlmsspblob;
1293 sess_data->iov[1].iov_len = blob_len;
1294 sess_data->iov[1].iov_base = ntlmsspblob;
1295 pSMB->req.SecurityBlobLength = cpu_to_le16(blob_len);
1296 /*
1297 * Make sure that we tell the server that we are using
1298 * the uid that it just gave us back on the response
1299 * (challenge)
1300 */
1301 smb_buf->Uid = ses->Suid;
1302
1303 rc = _sess_auth_rawntlmssp_assemble_req(sess_data);
1304 if (rc)
1305 goto out_free_ntlmsspblob;
1306
1307 rc = sess_sendreceive(sess_data);
1308 if (rc)
1309 goto out_free_ntlmsspblob;
1310
1311 pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
1312 smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base;
1313 if (smb_buf->WordCount != 4) {
1314 rc = -EIO;
1315 cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount);
1316 goto out_free_ntlmsspblob;
1317 }
1318
1319 if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN)
1320 cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */
1321
1322 bytes_remaining = get_bcc(smb_buf);
1323 bcc_ptr = pByteArea(smb_buf);
1324 blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength);
1325 if (blob_len > bytes_remaining) {
1326 cifs_dbg(VFS, "bad security blob length %d\n",
1327 blob_len);
1328 rc = -EINVAL;
1329 goto out_free_ntlmsspblob;
1330 }
1331 bcc_ptr += blob_len;
1332 bytes_remaining -= blob_len;
1333
1334
1335 /* BB check if Unicode and decode strings */
1336 if (bytes_remaining == 0) {
1337 /* no string area to decode, do nothing */
1338 } else if (smb_buf->Flags2 & SMBFLG2_UNICODE) {
1339 /* unicode string area must be word-aligned */
1340 if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) {
1341 ++bcc_ptr;
1342 --bytes_remaining;
1343 }
1344 decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses,
1345 sess_data->nls_cp);
1346 } else {
1347 decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses,
1348 sess_data->nls_cp);
1349 }
1350
1351out_free_ntlmsspblob:
1352 kfree(ntlmsspblob);
1353out:
1354 sess_free_buffer(sess_data);
1355
1356 if (!rc)
1357 rc = sess_establish_session(sess_data);
1358
1359 /* Cleanup */
960 kfree(ses->auth_key.response); 1360 kfree(ses->auth_key.response);
961 ses->auth_key.response = NULL; 1361 ses->auth_key.response = NULL;
962 kfree(ses->ntlmssp); 1362 kfree(ses->ntlmssp);
1363 ses->ntlmssp = NULL;
1364
1365 sess_data->func = NULL;
1366 sess_data->result = rc;
1367}
1368
1369static int select_sec(struct cifs_ses *ses, struct sess_data *sess_data)
1370{
1371 int type;
1372
1373 type = select_sectype(ses->server, ses->sectype);
1374 cifs_dbg(FYI, "sess setup type %d\n", type);
1375 if (type == Unspecified) {
1376 cifs_dbg(VFS,
1377 "Unable to select appropriate authentication method!");
1378 return -EINVAL;
1379 }
1380
1381 switch (type) {
1382 case LANMAN:
1383 /* LANMAN and plaintext are less secure and off by default.
1384 * So we make this explicitly be turned on in kconfig (in the
1385 * build) and turned on at runtime (changed from the default)
1386 * in proc/fs/cifs or via mount parm. Unfortunately this is
1387 * needed for old Win (e.g. Win95), some obscure NAS and OS/2 */
1388#ifdef CONFIG_CIFS_WEAK_PW_HASH
1389 sess_data->func = sess_auth_lanman;
1390 break;
1391#else
1392 return -EOPNOTSUPP;
1393#endif
1394 case NTLM:
1395 sess_data->func = sess_auth_ntlm;
1396 break;
1397 case NTLMv2:
1398 sess_data->func = sess_auth_ntlmv2;
1399 break;
1400 case Kerberos:
1401#ifdef CONFIG_CIFS_UPCALL
1402 sess_data->func = sess_auth_kerberos;
1403 break;
1404#else
1405 cifs_dbg(VFS, "Kerberos negotiated but upcall support disabled!\n");
1406 return -ENOSYS;
1407 break;
1408#endif /* CONFIG_CIFS_UPCALL */
1409 case RawNTLMSSP:
1410 sess_data->func = sess_auth_rawntlmssp_negotiate;
1411 break;
1412 default:
1413 cifs_dbg(VFS, "secType %d not supported!\n", type);
1414 return -ENOSYS;
1415 }
1416
1417 return 0;
1418}
1419
1420int CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses,
1421 const struct nls_table *nls_cp)
1422{
1423 int rc = 0;
1424 struct sess_data *sess_data;
1425
1426 if (ses == NULL) {
1427 WARN(1, "%s: ses == NULL!", __func__);
1428 return -EINVAL;
1429 }
1430
1431 sess_data = kzalloc(sizeof(struct sess_data), GFP_KERNEL);
1432 if (!sess_data)
1433 return -ENOMEM;
1434
1435 rc = select_sec(ses, sess_data);
1436 if (rc)
1437 goto out;
1438
1439 sess_data->xid = xid;
1440 sess_data->ses = ses;
1441 sess_data->buf0_type = CIFS_NO_BUFFER;
1442 sess_data->nls_cp = (struct nls_table *) nls_cp;
1443
1444 while (sess_data->func)
1445 sess_data->func(sess_data);
1446
1447 /* Store result before we free sess_data */
1448 rc = sess_data->result;
963 1449
1450out:
1451 kfree(sess_data);
964 return rc; 1452 return rc;
965} 1453}
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index d1fdfa848703..5e8c22d6c7b9 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -1009,6 +1009,12 @@ cifs_is_read_op(__u32 oplock)
1009 return oplock == OPLOCK_READ; 1009 return oplock == OPLOCK_READ;
1010} 1010}
1011 1011
1012static unsigned int
1013cifs_wp_retry_size(struct inode *inode)
1014{
1015 return CIFS_SB(inode->i_sb)->wsize;
1016}
1017
1012struct smb_version_operations smb1_operations = { 1018struct smb_version_operations smb1_operations = {
1013 .send_cancel = send_nt_cancel, 1019 .send_cancel = send_nt_cancel,
1014 .compare_fids = cifs_compare_fids, 1020 .compare_fids = cifs_compare_fids,
@@ -1019,6 +1025,7 @@ struct smb_version_operations smb1_operations = {
1019 .set_credits = cifs_set_credits, 1025 .set_credits = cifs_set_credits,
1020 .get_credits_field = cifs_get_credits_field, 1026 .get_credits_field = cifs_get_credits_field,
1021 .get_credits = cifs_get_credits, 1027 .get_credits = cifs_get_credits,
1028 .wait_mtu_credits = cifs_wait_mtu_credits,
1022 .get_next_mid = cifs_get_next_mid, 1029 .get_next_mid = cifs_get_next_mid,
1023 .read_data_offset = cifs_read_data_offset, 1030 .read_data_offset = cifs_read_data_offset,
1024 .read_data_length = cifs_read_data_length, 1031 .read_data_length = cifs_read_data_length,
@@ -1078,6 +1085,7 @@ struct smb_version_operations smb1_operations = {
1078 .query_mf_symlink = cifs_query_mf_symlink, 1085 .query_mf_symlink = cifs_query_mf_symlink,
1079 .create_mf_symlink = cifs_create_mf_symlink, 1086 .create_mf_symlink = cifs_create_mf_symlink,
1080 .is_read_op = cifs_is_read_op, 1087 .is_read_op = cifs_is_read_op,
1088 .wp_retry_size = cifs_wp_retry_size,
1081#ifdef CONFIG_CIFS_XATTR 1089#ifdef CONFIG_CIFS_XATTR
1082 .query_all_EAs = CIFSSMBQAllEAs, 1090 .query_all_EAs = CIFSSMBQAllEAs,
1083 .set_EA = CIFSSMBSetEA, 1091 .set_EA = CIFSSMBSetEA,
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 84c012a6aba0..0150182a4494 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -91,7 +91,7 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon,
91 case SMB2_OP_SET_EOF: 91 case SMB2_OP_SET_EOF:
92 tmprc = SMB2_set_eof(xid, tcon, fid.persistent_fid, 92 tmprc = SMB2_set_eof(xid, tcon, fid.persistent_fid,
93 fid.volatile_fid, current->tgid, 93 fid.volatile_fid, current->tgid,
94 (__le64 *)data); 94 (__le64 *)data, false);
95 break; 95 break;
96 case SMB2_OP_SET_INFO: 96 case SMB2_OP_SET_INFO:
97 tmprc = SMB2_set_info(xid, tcon, fid.persistent_fid, 97 tmprc = SMB2_set_info(xid, tcon, fid.persistent_fid,
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index 94bd4fbb13d3..e31a9dfdcd39 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -605,7 +605,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
605 {STATUS_MAPPED_FILE_SIZE_ZERO, -EIO, "STATUS_MAPPED_FILE_SIZE_ZERO"}, 605 {STATUS_MAPPED_FILE_SIZE_ZERO, -EIO, "STATUS_MAPPED_FILE_SIZE_ZERO"},
606 {STATUS_TOO_MANY_OPENED_FILES, -EMFILE, "STATUS_TOO_MANY_OPENED_FILES"}, 606 {STATUS_TOO_MANY_OPENED_FILES, -EMFILE, "STATUS_TOO_MANY_OPENED_FILES"},
607 {STATUS_CANCELLED, -EIO, "STATUS_CANCELLED"}, 607 {STATUS_CANCELLED, -EIO, "STATUS_CANCELLED"},
608 {STATUS_CANNOT_DELETE, -EIO, "STATUS_CANNOT_DELETE"}, 608 {STATUS_CANNOT_DELETE, -EACCES, "STATUS_CANNOT_DELETE"},
609 {STATUS_INVALID_COMPUTER_NAME, -EIO, "STATUS_INVALID_COMPUTER_NAME"}, 609 {STATUS_INVALID_COMPUTER_NAME, -EIO, "STATUS_INVALID_COMPUTER_NAME"},
610 {STATUS_FILE_DELETED, -EIO, "STATUS_FILE_DELETED"}, 610 {STATUS_FILE_DELETED, -EIO, "STATUS_FILE_DELETED"},
611 {STATUS_SPECIAL_ACCOUNT, -EIO, "STATUS_SPECIAL_ACCOUNT"}, 611 {STATUS_SPECIAL_ACCOUNT, -EIO, "STATUS_SPECIAL_ACCOUNT"},
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index b8021fde987d..f2e6ac29a8d6 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -437,7 +437,7 @@ smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp,
437 continue; 437 continue;
438 438
439 cifs_dbg(FYI, "found in the open list\n"); 439 cifs_dbg(FYI, "found in the open list\n");
440 cifs_dbg(FYI, "lease key match, lease break 0x%d\n", 440 cifs_dbg(FYI, "lease key match, lease break 0x%x\n",
441 le32_to_cpu(rsp->NewLeaseState)); 441 le32_to_cpu(rsp->NewLeaseState));
442 442
443 server->ops->set_oplock_level(cinode, lease_state, 0, NULL); 443 server->ops->set_oplock_level(cinode, lease_state, 0, NULL);
@@ -467,7 +467,7 @@ smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp,
467 } 467 }
468 468
469 cifs_dbg(FYI, "found in the pending open list\n"); 469 cifs_dbg(FYI, "found in the pending open list\n");
470 cifs_dbg(FYI, "lease key match, lease break 0x%d\n", 470 cifs_dbg(FYI, "lease key match, lease break 0x%x\n",
471 le32_to_cpu(rsp->NewLeaseState)); 471 le32_to_cpu(rsp->NewLeaseState));
472 472
473 open->oplock = lease_state; 473 open->oplock = lease_state;
@@ -546,7 +546,7 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
546 return false; 546 return false;
547 } 547 }
548 548
549 cifs_dbg(FYI, "oplock level 0x%d\n", rsp->OplockLevel); 549 cifs_dbg(FYI, "oplock level 0x%x\n", rsp->OplockLevel);
550 550
551 /* look up tcon based on tid & uid */ 551 /* look up tcon based on tid & uid */
552 spin_lock(&cifs_tcp_ses_lock); 552 spin_lock(&cifs_tcp_ses_lock);
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 787844bde384..77f8aeb9c2fc 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -19,6 +19,7 @@
19 19
20#include <linux/pagemap.h> 20#include <linux/pagemap.h>
21#include <linux/vfs.h> 21#include <linux/vfs.h>
22#include <linux/falloc.h>
22#include "cifsglob.h" 23#include "cifsglob.h"
23#include "smb2pdu.h" 24#include "smb2pdu.h"
24#include "smb2proto.h" 25#include "smb2proto.h"
@@ -112,6 +113,53 @@ smb2_get_credits(struct mid_q_entry *mid)
112 return le16_to_cpu(((struct smb2_hdr *)mid->resp_buf)->CreditRequest); 113 return le16_to_cpu(((struct smb2_hdr *)mid->resp_buf)->CreditRequest);
113} 114}
114 115
116static int
117smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size,
118 unsigned int *num, unsigned int *credits)
119{
120 int rc = 0;
121 unsigned int scredits;
122
123 spin_lock(&server->req_lock);
124 while (1) {
125 if (server->credits <= 0) {
126 spin_unlock(&server->req_lock);
127 cifs_num_waiters_inc(server);
128 rc = wait_event_killable(server->request_q,
129 has_credits(server, &server->credits));
130 cifs_num_waiters_dec(server);
131 if (rc)
132 return rc;
133 spin_lock(&server->req_lock);
134 } else {
135 if (server->tcpStatus == CifsExiting) {
136 spin_unlock(&server->req_lock);
137 return -ENOENT;
138 }
139
140 scredits = server->credits;
141 /* can deadlock with reopen */
142 if (scredits == 1) {
143 *num = SMB2_MAX_BUFFER_SIZE;
144 *credits = 0;
145 break;
146 }
147
148 /* leave one credit for a possible reopen */
149 scredits--;
150 *num = min_t(unsigned int, size,
151 scredits * SMB2_MAX_BUFFER_SIZE);
152
153 *credits = DIV_ROUND_UP(*num, SMB2_MAX_BUFFER_SIZE);
154 server->credits -= *credits;
155 server->in_flight++;
156 break;
157 }
158 }
159 spin_unlock(&server->req_lock);
160 return rc;
161}
162
115static __u64 163static __u64
116smb2_get_next_mid(struct TCP_Server_Info *server) 164smb2_get_next_mid(struct TCP_Server_Info *server)
117{ 165{
@@ -182,8 +230,9 @@ smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
182 /* start with specified wsize, or default */ 230 /* start with specified wsize, or default */
183 wsize = volume_info->wsize ? volume_info->wsize : CIFS_DEFAULT_IOSIZE; 231 wsize = volume_info->wsize ? volume_info->wsize : CIFS_DEFAULT_IOSIZE;
184 wsize = min_t(unsigned int, wsize, server->max_write); 232 wsize = min_t(unsigned int, wsize, server->max_write);
185 /* set it to the maximum buffer size value we can send with 1 credit */ 233
186 wsize = min_t(unsigned int, wsize, SMB2_MAX_BUFFER_SIZE); 234 if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
235 wsize = min_t(unsigned int, wsize, SMB2_MAX_BUFFER_SIZE);
187 236
188 return wsize; 237 return wsize;
189} 238}
@@ -197,8 +246,9 @@ smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
197 /* start with specified rsize, or default */ 246 /* start with specified rsize, or default */
198 rsize = volume_info->rsize ? volume_info->rsize : CIFS_DEFAULT_IOSIZE; 247 rsize = volume_info->rsize ? volume_info->rsize : CIFS_DEFAULT_IOSIZE;
199 rsize = min_t(unsigned int, rsize, server->max_read); 248 rsize = min_t(unsigned int, rsize, server->max_read);
200 /* set it to the maximum buffer size value we can send with 1 credit */ 249
201 rsize = min_t(unsigned int, rsize, SMB2_MAX_BUFFER_SIZE); 250 if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
251 rsize = min_t(unsigned int, rsize, SMB2_MAX_BUFFER_SIZE);
202 252
203 return rsize; 253 return rsize;
204} 254}
@@ -687,7 +737,7 @@ smb2_set_file_size(const unsigned int xid, struct cifs_tcon *tcon,
687{ 737{
688 __le64 eof = cpu_to_le64(size); 738 __le64 eof = cpu_to_le64(size);
689 return SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid, 739 return SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid,
690 cfile->fid.volatile_fid, cfile->pid, &eof); 740 cfile->fid.volatile_fid, cfile->pid, &eof, false);
691} 741}
692 742
693static int 743static int
@@ -1104,6 +1154,13 @@ smb3_parse_lease_buf(void *buf, unsigned int *epoch)
1104 return le32_to_cpu(lc->lcontext.LeaseState); 1154 return le32_to_cpu(lc->lcontext.LeaseState);
1105} 1155}
1106 1156
1157static unsigned int
1158smb2_wp_retry_size(struct inode *inode)
1159{
1160 return min_t(unsigned int, CIFS_SB(inode->i_sb)->wsize,
1161 SMB2_MAX_BUFFER_SIZE);
1162}
1163
1107struct smb_version_operations smb20_operations = { 1164struct smb_version_operations smb20_operations = {
1108 .compare_fids = smb2_compare_fids, 1165 .compare_fids = smb2_compare_fids,
1109 .setup_request = smb2_setup_request, 1166 .setup_request = smb2_setup_request,
@@ -1113,6 +1170,7 @@ struct smb_version_operations smb20_operations = {
1113 .set_credits = smb2_set_credits, 1170 .set_credits = smb2_set_credits,
1114 .get_credits_field = smb2_get_credits_field, 1171 .get_credits_field = smb2_get_credits_field,
1115 .get_credits = smb2_get_credits, 1172 .get_credits = smb2_get_credits,
1173 .wait_mtu_credits = cifs_wait_mtu_credits,
1116 .get_next_mid = smb2_get_next_mid, 1174 .get_next_mid = smb2_get_next_mid,
1117 .read_data_offset = smb2_read_data_offset, 1175 .read_data_offset = smb2_read_data_offset,
1118 .read_data_length = smb2_read_data_length, 1176 .read_data_length = smb2_read_data_length,
@@ -1177,6 +1235,7 @@ struct smb_version_operations smb20_operations = {
1177 .create_lease_buf = smb2_create_lease_buf, 1235 .create_lease_buf = smb2_create_lease_buf,
1178 .parse_lease_buf = smb2_parse_lease_buf, 1236 .parse_lease_buf = smb2_parse_lease_buf,
1179 .clone_range = smb2_clone_range, 1237 .clone_range = smb2_clone_range,
1238 .wp_retry_size = smb2_wp_retry_size,
1180}; 1239};
1181 1240
1182struct smb_version_operations smb21_operations = { 1241struct smb_version_operations smb21_operations = {
@@ -1188,6 +1247,7 @@ struct smb_version_operations smb21_operations = {
1188 .set_credits = smb2_set_credits, 1247 .set_credits = smb2_set_credits,
1189 .get_credits_field = smb2_get_credits_field, 1248 .get_credits_field = smb2_get_credits_field,
1190 .get_credits = smb2_get_credits, 1249 .get_credits = smb2_get_credits,
1250 .wait_mtu_credits = smb2_wait_mtu_credits,
1191 .get_next_mid = smb2_get_next_mid, 1251 .get_next_mid = smb2_get_next_mid,
1192 .read_data_offset = smb2_read_data_offset, 1252 .read_data_offset = smb2_read_data_offset,
1193 .read_data_length = smb2_read_data_length, 1253 .read_data_length = smb2_read_data_length,
@@ -1252,6 +1312,7 @@ struct smb_version_operations smb21_operations = {
1252 .create_lease_buf = smb2_create_lease_buf, 1312 .create_lease_buf = smb2_create_lease_buf,
1253 .parse_lease_buf = smb2_parse_lease_buf, 1313 .parse_lease_buf = smb2_parse_lease_buf,
1254 .clone_range = smb2_clone_range, 1314 .clone_range = smb2_clone_range,
1315 .wp_retry_size = smb2_wp_retry_size,
1255}; 1316};
1256 1317
1257struct smb_version_operations smb30_operations = { 1318struct smb_version_operations smb30_operations = {
@@ -1263,6 +1324,7 @@ struct smb_version_operations smb30_operations = {
1263 .set_credits = smb2_set_credits, 1324 .set_credits = smb2_set_credits,
1264 .get_credits_field = smb2_get_credits_field, 1325 .get_credits_field = smb2_get_credits_field,
1265 .get_credits = smb2_get_credits, 1326 .get_credits = smb2_get_credits,
1327 .wait_mtu_credits = smb2_wait_mtu_credits,
1266 .get_next_mid = smb2_get_next_mid, 1328 .get_next_mid = smb2_get_next_mid,
1267 .read_data_offset = smb2_read_data_offset, 1329 .read_data_offset = smb2_read_data_offset,
1268 .read_data_length = smb2_read_data_length, 1330 .read_data_length = smb2_read_data_length,
@@ -1330,6 +1392,7 @@ struct smb_version_operations smb30_operations = {
1330 .parse_lease_buf = smb3_parse_lease_buf, 1392 .parse_lease_buf = smb3_parse_lease_buf,
1331 .clone_range = smb2_clone_range, 1393 .clone_range = smb2_clone_range,
1332 .validate_negotiate = smb3_validate_negotiate, 1394 .validate_negotiate = smb3_validate_negotiate,
1395 .wp_retry_size = smb2_wp_retry_size,
1333}; 1396};
1334 1397
1335struct smb_version_values smb20_values = { 1398struct smb_version_values smb20_values = {
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index b0b260dbb19d..42ebc1a8be6c 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -108,7 +108,6 @@ smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ ,
108 if (!tcon) 108 if (!tcon)
109 goto out; 109 goto out;
110 110
111 /* BB FIXME when we do write > 64K add +1 for every 64K in req or rsp */
112 /* GLOBAL_CAP_LARGE_MTU will only be set if dialect > SMB2.02 */ 111 /* GLOBAL_CAP_LARGE_MTU will only be set if dialect > SMB2.02 */
113 /* See sections 2.2.4 and 3.2.4.1.5 of MS-SMB2 */ 112 /* See sections 2.2.4 and 3.2.4.1.5 of MS-SMB2 */
114 if ((tcon->ses) && 113 if ((tcon->ses) &&
@@ -245,10 +244,6 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon)
245 if (rc) 244 if (rc)
246 goto out; 245 goto out;
247 atomic_inc(&tconInfoReconnectCount); 246 atomic_inc(&tconInfoReconnectCount);
248 /*
249 * BB FIXME add code to check if wsize needs update due to negotiated
250 * smb buffer size shrinking.
251 */
252out: 247out:
253 /* 248 /*
254 * Check if handle based operation so we know whether we can continue 249 * Check if handle based operation so we know whether we can continue
@@ -309,16 +304,6 @@ small_smb2_init(__le16 smb2_command, struct cifs_tcon *tcon,
309 return rc; 304 return rc;
310} 305}
311 306
312static void
313free_rsp_buf(int resp_buftype, void *rsp)
314{
315 if (resp_buftype == CIFS_SMALL_BUFFER)
316 cifs_small_buf_release(rsp);
317 else if (resp_buftype == CIFS_LARGE_BUFFER)
318 cifs_buf_release(rsp);
319}
320
321
322/* 307/*
323 * 308 *
324 * SMB2 Worker functions follow: 309 * SMB2 Worker functions follow:
@@ -1738,12 +1723,18 @@ smb2_readv_callback(struct mid_q_entry *mid)
1738 rc); 1723 rc);
1739 } 1724 }
1740 /* FIXME: should this be counted toward the initiating task? */ 1725 /* FIXME: should this be counted toward the initiating task? */
1741 task_io_account_read(rdata->bytes); 1726 task_io_account_read(rdata->got_bytes);
1742 cifs_stats_bytes_read(tcon, rdata->bytes); 1727 cifs_stats_bytes_read(tcon, rdata->got_bytes);
1743 break; 1728 break;
1744 case MID_REQUEST_SUBMITTED: 1729 case MID_REQUEST_SUBMITTED:
1745 case MID_RETRY_NEEDED: 1730 case MID_RETRY_NEEDED:
1746 rdata->result = -EAGAIN; 1731 rdata->result = -EAGAIN;
1732 if (server->sign && rdata->got_bytes)
1733 /* reset bytes number since we can not check a sign */
1734 rdata->got_bytes = 0;
1735 /* FIXME: should this be counted toward the initiating task? */
1736 task_io_account_read(rdata->got_bytes);
1737 cifs_stats_bytes_read(tcon, rdata->got_bytes);
1747 break; 1738 break;
1748 default: 1739 default:
1749 if (rdata->result != -ENODATA) 1740 if (rdata->result != -ENODATA)
@@ -1762,11 +1753,12 @@ smb2_readv_callback(struct mid_q_entry *mid)
1762int 1753int
1763smb2_async_readv(struct cifs_readdata *rdata) 1754smb2_async_readv(struct cifs_readdata *rdata)
1764{ 1755{
1765 int rc; 1756 int rc, flags = 0;
1766 struct smb2_hdr *buf; 1757 struct smb2_hdr *buf;
1767 struct cifs_io_parms io_parms; 1758 struct cifs_io_parms io_parms;
1768 struct smb_rqst rqst = { .rq_iov = &rdata->iov, 1759 struct smb_rqst rqst = { .rq_iov = &rdata->iov,
1769 .rq_nvec = 1 }; 1760 .rq_nvec = 1 };
1761 struct TCP_Server_Info *server;
1770 1762
1771 cifs_dbg(FYI, "%s: offset=%llu bytes=%u\n", 1763 cifs_dbg(FYI, "%s: offset=%llu bytes=%u\n",
1772 __func__, rdata->offset, rdata->bytes); 1764 __func__, rdata->offset, rdata->bytes);
@@ -1777,18 +1769,41 @@ smb2_async_readv(struct cifs_readdata *rdata)
1777 io_parms.persistent_fid = rdata->cfile->fid.persistent_fid; 1769 io_parms.persistent_fid = rdata->cfile->fid.persistent_fid;
1778 io_parms.volatile_fid = rdata->cfile->fid.volatile_fid; 1770 io_parms.volatile_fid = rdata->cfile->fid.volatile_fid;
1779 io_parms.pid = rdata->pid; 1771 io_parms.pid = rdata->pid;
1772
1773 server = io_parms.tcon->ses->server;
1774
1780 rc = smb2_new_read_req(&rdata->iov, &io_parms, 0, 0); 1775 rc = smb2_new_read_req(&rdata->iov, &io_parms, 0, 0);
1781 if (rc) 1776 if (rc) {
1777 if (rc == -EAGAIN && rdata->credits) {
1778 /* credits was reset by reconnect */
1779 rdata->credits = 0;
1780 /* reduce in_flight value since we won't send the req */
1781 spin_lock(&server->req_lock);
1782 server->in_flight--;
1783 spin_unlock(&server->req_lock);
1784 }
1782 return rc; 1785 return rc;
1786 }
1783 1787
1784 buf = (struct smb2_hdr *)rdata->iov.iov_base; 1788 buf = (struct smb2_hdr *)rdata->iov.iov_base;
1785 /* 4 for rfc1002 length field */ 1789 /* 4 for rfc1002 length field */
1786 rdata->iov.iov_len = get_rfc1002_length(rdata->iov.iov_base) + 4; 1790 rdata->iov.iov_len = get_rfc1002_length(rdata->iov.iov_base) + 4;
1787 1791
1792 if (rdata->credits) {
1793 buf->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->bytes,
1794 SMB2_MAX_BUFFER_SIZE));
1795 spin_lock(&server->req_lock);
1796 server->credits += rdata->credits -
1797 le16_to_cpu(buf->CreditCharge);
1798 spin_unlock(&server->req_lock);
1799 wake_up(&server->request_q);
1800 flags = CIFS_HAS_CREDITS;
1801 }
1802
1788 kref_get(&rdata->refcount); 1803 kref_get(&rdata->refcount);
1789 rc = cifs_call_async(io_parms.tcon->ses->server, &rqst, 1804 rc = cifs_call_async(io_parms.tcon->ses->server, &rqst,
1790 cifs_readv_receive, smb2_readv_callback, 1805 cifs_readv_receive, smb2_readv_callback,
1791 rdata, 0); 1806 rdata, flags);
1792 if (rc) { 1807 if (rc) {
1793 kref_put(&rdata->refcount, cifs_readdata_release); 1808 kref_put(&rdata->refcount, cifs_readdata_release);
1794 cifs_stats_fail_inc(io_parms.tcon, SMB2_READ_HE); 1809 cifs_stats_fail_inc(io_parms.tcon, SMB2_READ_HE);
@@ -1906,15 +1921,25 @@ int
1906smb2_async_writev(struct cifs_writedata *wdata, 1921smb2_async_writev(struct cifs_writedata *wdata,
1907 void (*release)(struct kref *kref)) 1922 void (*release)(struct kref *kref))
1908{ 1923{
1909 int rc = -EACCES; 1924 int rc = -EACCES, flags = 0;
1910 struct smb2_write_req *req = NULL; 1925 struct smb2_write_req *req = NULL;
1911 struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); 1926 struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink);
1927 struct TCP_Server_Info *server = tcon->ses->server;
1912 struct kvec iov; 1928 struct kvec iov;
1913 struct smb_rqst rqst; 1929 struct smb_rqst rqst;
1914 1930
1915 rc = small_smb2_init(SMB2_WRITE, tcon, (void **) &req); 1931 rc = small_smb2_init(SMB2_WRITE, tcon, (void **) &req);
1916 if (rc) 1932 if (rc) {
1933 if (rc == -EAGAIN && wdata->credits) {
1934 /* credits was reset by reconnect */
1935 wdata->credits = 0;
1936 /* reduce in_flight value since we won't send the req */
1937 spin_lock(&server->req_lock);
1938 server->in_flight--;
1939 spin_unlock(&server->req_lock);
1940 }
1917 goto async_writev_out; 1941 goto async_writev_out;
1942 }
1918 1943
1919 req->hdr.ProcessId = cpu_to_le32(wdata->cfile->pid); 1944 req->hdr.ProcessId = cpu_to_le32(wdata->cfile->pid);
1920 1945
@@ -1947,9 +1972,20 @@ smb2_async_writev(struct cifs_writedata *wdata,
1947 1972
1948 inc_rfc1001_len(&req->hdr, wdata->bytes - 1 /* Buffer */); 1973 inc_rfc1001_len(&req->hdr, wdata->bytes - 1 /* Buffer */);
1949 1974
1975 if (wdata->credits) {
1976 req->hdr.CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes,
1977 SMB2_MAX_BUFFER_SIZE));
1978 spin_lock(&server->req_lock);
1979 server->credits += wdata->credits -
1980 le16_to_cpu(req->hdr.CreditCharge);
1981 spin_unlock(&server->req_lock);
1982 wake_up(&server->request_q);
1983 flags = CIFS_HAS_CREDITS;
1984 }
1985
1950 kref_get(&wdata->refcount); 1986 kref_get(&wdata->refcount);
1951 rc = cifs_call_async(tcon->ses->server, &rqst, NULL, 1987 rc = cifs_call_async(server, &rqst, NULL, smb2_writev_callback, wdata,
1952 smb2_writev_callback, wdata, 0); 1988 flags);
1953 1989
1954 if (rc) { 1990 if (rc) {
1955 kref_put(&wdata->refcount, release); 1991 kref_put(&wdata->refcount, release);
@@ -2325,7 +2361,7 @@ SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon,
2325 2361
2326int 2362int
2327SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, 2363SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
2328 u64 volatile_fid, u32 pid, __le64 *eof) 2364 u64 volatile_fid, u32 pid, __le64 *eof, bool is_falloc)
2329{ 2365{
2330 struct smb2_file_eof_info info; 2366 struct smb2_file_eof_info info;
2331 void *data; 2367 void *data;
@@ -2336,8 +2372,12 @@ SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
2336 data = &info; 2372 data = &info;
2337 size = sizeof(struct smb2_file_eof_info); 2373 size = sizeof(struct smb2_file_eof_info);
2338 2374
2339 return send_set_info(xid, tcon, persistent_fid, volatile_fid, pid, 2375 if (is_falloc)
2340 FILE_END_OF_FILE_INFORMATION, 1, &data, &size); 2376 return send_set_info(xid, tcon, persistent_fid, volatile_fid,
2377 pid, FILE_ALLOCATION_INFORMATION, 1, &data, &size);
2378 else
2379 return send_set_info(xid, tcon, persistent_fid, volatile_fid,
2380 pid, FILE_END_OF_FILE_INFORMATION, 1, &data, &size);
2341} 2381}
2342 2382
2343int 2383int
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 0ce48db20a65..67e8ce8055de 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -139,7 +139,7 @@ extern int SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon,
139 __le16 *target_file); 139 __le16 *target_file);
140extern int SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, 140extern int SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon,
141 u64 persistent_fid, u64 volatile_fid, u32 pid, 141 u64 persistent_fid, u64 volatile_fid, u32 pid,
142 __le64 *eof); 142 __le64 *eof, bool is_fallocate);
143extern int SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon, 143extern int SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon,
144 u64 persistent_fid, u64 volatile_fid, 144 u64 persistent_fid, u64 volatile_fid,
145 FILE_BASIC_INFO *buf); 145 FILE_BASIC_INFO *buf);
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 59c748ce872f..5111e7272db6 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -466,7 +466,12 @@ smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
466static inline void 466static inline void
467smb2_seq_num_into_buf(struct TCP_Server_Info *server, struct smb2_hdr *hdr) 467smb2_seq_num_into_buf(struct TCP_Server_Info *server, struct smb2_hdr *hdr)
468{ 468{
469 unsigned int i, num = le16_to_cpu(hdr->CreditCharge);
470
469 hdr->MessageId = get_next_mid64(server); 471 hdr->MessageId = get_next_mid64(server);
472 /* skip message numbers according to CreditCharge field */
473 for (i = 1; i < num; i++)
474 get_next_mid(server);
470} 475}
471 476
472static struct mid_q_entry * 477static struct mid_q_entry *
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 18cd5650a5fc..9d087f4e7d4e 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -448,6 +448,15 @@ wait_for_free_request(struct TCP_Server_Info *server, const int timeout,
448 return wait_for_free_credits(server, timeout, val); 448 return wait_for_free_credits(server, timeout, val);
449} 449}
450 450
451int
452cifs_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size,
453 unsigned int *num, unsigned int *credits)
454{
455 *num = size;
456 *credits = 0;
457 return 0;
458}
459
451static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf, 460static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf,
452 struct mid_q_entry **ppmidQ) 461 struct mid_q_entry **ppmidQ)
453{ 462{
@@ -531,20 +540,23 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
531{ 540{
532 int rc, timeout, optype; 541 int rc, timeout, optype;
533 struct mid_q_entry *mid; 542 struct mid_q_entry *mid;
543 unsigned int credits = 0;
534 544
535 timeout = flags & CIFS_TIMEOUT_MASK; 545 timeout = flags & CIFS_TIMEOUT_MASK;
536 optype = flags & CIFS_OP_MASK; 546 optype = flags & CIFS_OP_MASK;
537 547
538 rc = wait_for_free_request(server, timeout, optype); 548 if ((flags & CIFS_HAS_CREDITS) == 0) {
539 if (rc) 549 rc = wait_for_free_request(server, timeout, optype);
540 return rc; 550 if (rc)
551 return rc;
552 credits = 1;
553 }
541 554
542 mutex_lock(&server->srv_mutex); 555 mutex_lock(&server->srv_mutex);
543 mid = server->ops->setup_async_request(server, rqst); 556 mid = server->ops->setup_async_request(server, rqst);
544 if (IS_ERR(mid)) { 557 if (IS_ERR(mid)) {
545 mutex_unlock(&server->srv_mutex); 558 mutex_unlock(&server->srv_mutex);
546 add_credits(server, 1, optype); 559 add_credits_and_wake_if(server, credits, optype);
547 wake_up(&server->request_q);
548 return PTR_ERR(mid); 560 return PTR_ERR(mid);
549 } 561 }
550 562
@@ -572,8 +584,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
572 return 0; 584 return 0;
573 585
574 cifs_delete_mid(mid); 586 cifs_delete_mid(mid);
575 add_credits(server, 1, optype); 587 add_credits_and_wake_if(server, credits, optype);
576 wake_up(&server->request_q);
577 return rc; 588 return rc;
578} 589}
579 590
diff --git a/fs/namespace.c b/fs/namespace.c
index 2a1447c946e7..0acabea58319 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -890,8 +890,21 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
890 890
891 mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED); 891 mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED);
892 /* Don't allow unprivileged users to change mount flags */ 892 /* Don't allow unprivileged users to change mount flags */
893 if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY)) 893 if (flag & CL_UNPRIVILEGED) {
894 mnt->mnt.mnt_flags |= MNT_LOCK_READONLY; 894 mnt->mnt.mnt_flags |= MNT_LOCK_ATIME;
895
896 if (mnt->mnt.mnt_flags & MNT_READONLY)
897 mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
898
899 if (mnt->mnt.mnt_flags & MNT_NODEV)
900 mnt->mnt.mnt_flags |= MNT_LOCK_NODEV;
901
902 if (mnt->mnt.mnt_flags & MNT_NOSUID)
903 mnt->mnt.mnt_flags |= MNT_LOCK_NOSUID;
904
905 if (mnt->mnt.mnt_flags & MNT_NOEXEC)
906 mnt->mnt.mnt_flags |= MNT_LOCK_NOEXEC;
907 }
895 908
896 /* Don't allow unprivileged users to reveal what is under a mount */ 909 /* Don't allow unprivileged users to reveal what is under a mount */
897 if ((flag & CL_UNPRIVILEGED) && list_empty(&old->mnt_expire)) 910 if ((flag & CL_UNPRIVILEGED) && list_empty(&old->mnt_expire))
@@ -1896,9 +1909,6 @@ static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
1896 if (readonly_request == __mnt_is_readonly(mnt)) 1909 if (readonly_request == __mnt_is_readonly(mnt))
1897 return 0; 1910 return 0;
1898 1911
1899 if (mnt->mnt_flags & MNT_LOCK_READONLY)
1900 return -EPERM;
1901
1902 if (readonly_request) 1912 if (readonly_request)
1903 error = mnt_make_readonly(real_mount(mnt)); 1913 error = mnt_make_readonly(real_mount(mnt));
1904 else 1914 else
@@ -1924,6 +1934,33 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
1924 if (path->dentry != path->mnt->mnt_root) 1934 if (path->dentry != path->mnt->mnt_root)
1925 return -EINVAL; 1935 return -EINVAL;
1926 1936
1937 /* Don't allow changing of locked mnt flags.
1938 *
1939 * No locks need to be held here while testing the various
1940 * MNT_LOCK flags because those flags can never be cleared
1941 * once they are set.
1942 */
1943 if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) &&
1944 !(mnt_flags & MNT_READONLY)) {
1945 return -EPERM;
1946 }
1947 if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
1948 !(mnt_flags & MNT_NODEV)) {
1949 return -EPERM;
1950 }
1951 if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
1952 !(mnt_flags & MNT_NOSUID)) {
1953 return -EPERM;
1954 }
1955 if ((mnt->mnt.mnt_flags & MNT_LOCK_NOEXEC) &&
1956 !(mnt_flags & MNT_NOEXEC)) {
1957 return -EPERM;
1958 }
1959 if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) &&
1960 ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) {
1961 return -EPERM;
1962 }
1963
1927 err = security_sb_remount(sb, data); 1964 err = security_sb_remount(sb, data);
1928 if (err) 1965 if (err)
1929 return err; 1966 return err;
@@ -1937,7 +1974,7 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
1937 err = do_remount_sb(sb, flags, data, 0); 1974 err = do_remount_sb(sb, flags, data, 0);
1938 if (!err) { 1975 if (!err) {
1939 lock_mount_hash(); 1976 lock_mount_hash();
1940 mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK; 1977 mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
1941 mnt->mnt.mnt_flags = mnt_flags; 1978 mnt->mnt.mnt_flags = mnt_flags;
1942 touch_mnt_namespace(mnt->mnt_ns); 1979 touch_mnt_namespace(mnt->mnt_ns);
1943 unlock_mount_hash(); 1980 unlock_mount_hash();
@@ -2122,7 +2159,7 @@ static int do_new_mount(struct path *path, const char *fstype, int flags,
2122 */ 2159 */
2123 if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) { 2160 if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) {
2124 flags |= MS_NODEV; 2161 flags |= MS_NODEV;
2125 mnt_flags |= MNT_NODEV; 2162 mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV;
2126 } 2163 }
2127 } 2164 }
2128 2165
@@ -2436,6 +2473,14 @@ long do_mount(const char *dev_name, const char *dir_name,
2436 if (flags & MS_RDONLY) 2473 if (flags & MS_RDONLY)
2437 mnt_flags |= MNT_READONLY; 2474 mnt_flags |= MNT_READONLY;
2438 2475
2476 /* The default atime for remount is preservation */
2477 if ((flags & MS_REMOUNT) &&
2478 ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME |
2479 MS_STRICTATIME)) == 0)) {
2480 mnt_flags &= ~MNT_ATIME_MASK;
2481 mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK;
2482 }
2483
2439 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN | 2484 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
2440 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT | 2485 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
2441 MS_STRICTATIME); 2486 MS_STRICTATIME);
@@ -2972,13 +3017,13 @@ static void *mntns_get(struct task_struct *task)
2972 struct mnt_namespace *ns = NULL; 3017 struct mnt_namespace *ns = NULL;
2973 struct nsproxy *nsproxy; 3018 struct nsproxy *nsproxy;
2974 3019
2975 rcu_read_lock(); 3020 task_lock(task);
2976 nsproxy = task_nsproxy(task); 3021 nsproxy = task->nsproxy;
2977 if (nsproxy) { 3022 if (nsproxy) {
2978 ns = nsproxy->mnt_ns; 3023 ns = nsproxy->mnt_ns;
2979 get_mnt_ns(ns); 3024 get_mnt_ns(ns);
2980 } 3025 }
2981 rcu_read_unlock(); 3026 task_unlock(task);
2982 3027
2983 return ns; 3028 return ns;
2984} 3029}
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 1d09289c8f0e..180d1ec9c32e 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1205,7 +1205,7 @@ static const struct file_operations nfs_server_list_fops = {
1205 .open = nfs_server_list_open, 1205 .open = nfs_server_list_open,
1206 .read = seq_read, 1206 .read = seq_read,
1207 .llseek = seq_lseek, 1207 .llseek = seq_lseek,
1208 .release = seq_release, 1208 .release = seq_release_net,
1209 .owner = THIS_MODULE, 1209 .owner = THIS_MODULE,
1210}; 1210};
1211 1211
@@ -1226,7 +1226,7 @@ static const struct file_operations nfs_volume_list_fops = {
1226 .open = nfs_volume_list_open, 1226 .open = nfs_volume_list_open,
1227 .read = seq_read, 1227 .read = seq_read,
1228 .llseek = seq_lseek, 1228 .llseek = seq_lseek,
1229 .release = seq_release, 1229 .release = seq_release_net,
1230 .owner = THIS_MODULE, 1230 .owner = THIS_MODULE,
1231}; 1231};
1232 1232
@@ -1236,19 +1236,8 @@ static const struct file_operations nfs_volume_list_fops = {
1236 */ 1236 */
1237static int nfs_server_list_open(struct inode *inode, struct file *file) 1237static int nfs_server_list_open(struct inode *inode, struct file *file)
1238{ 1238{
1239 struct seq_file *m; 1239 return seq_open_net(inode, file, &nfs_server_list_ops,
1240 int ret; 1240 sizeof(struct seq_net_private));
1241 struct pid_namespace *pid_ns = file->f_dentry->d_sb->s_fs_info;
1242 struct net *net = pid_ns->child_reaper->nsproxy->net_ns;
1243
1244 ret = seq_open(file, &nfs_server_list_ops);
1245 if (ret < 0)
1246 return ret;
1247
1248 m = file->private_data;
1249 m->private = net;
1250
1251 return 0;
1252} 1241}
1253 1242
1254/* 1243/*
@@ -1256,7 +1245,7 @@ static int nfs_server_list_open(struct inode *inode, struct file *file)
1256 */ 1245 */
1257static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) 1246static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos)
1258{ 1247{
1259 struct nfs_net *nn = net_generic(m->private, nfs_net_id); 1248 struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id);
1260 1249
1261 /* lock the list against modification */ 1250 /* lock the list against modification */
1262 spin_lock(&nn->nfs_client_lock); 1251 spin_lock(&nn->nfs_client_lock);
@@ -1268,7 +1257,7 @@ static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos)
1268 */ 1257 */
1269static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos) 1258static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos)
1270{ 1259{
1271 struct nfs_net *nn = net_generic(p->private, nfs_net_id); 1260 struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id);
1272 1261
1273 return seq_list_next(v, &nn->nfs_client_list, pos); 1262 return seq_list_next(v, &nn->nfs_client_list, pos);
1274} 1263}
@@ -1278,7 +1267,7 @@ static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos)
1278 */ 1267 */
1279static void nfs_server_list_stop(struct seq_file *p, void *v) 1268static void nfs_server_list_stop(struct seq_file *p, void *v)
1280{ 1269{
1281 struct nfs_net *nn = net_generic(p->private, nfs_net_id); 1270 struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id);
1282 1271
1283 spin_unlock(&nn->nfs_client_lock); 1272 spin_unlock(&nn->nfs_client_lock);
1284} 1273}
@@ -1289,7 +1278,7 @@ static void nfs_server_list_stop(struct seq_file *p, void *v)
1289static int nfs_server_list_show(struct seq_file *m, void *v) 1278static int nfs_server_list_show(struct seq_file *m, void *v)
1290{ 1279{
1291 struct nfs_client *clp; 1280 struct nfs_client *clp;
1292 struct nfs_net *nn = net_generic(m->private, nfs_net_id); 1281 struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id);
1293 1282
1294 /* display header on line 1 */ 1283 /* display header on line 1 */
1295 if (v == &nn->nfs_client_list) { 1284 if (v == &nn->nfs_client_list) {
@@ -1321,19 +1310,8 @@ static int nfs_server_list_show(struct seq_file *m, void *v)
1321 */ 1310 */
1322static int nfs_volume_list_open(struct inode *inode, struct file *file) 1311static int nfs_volume_list_open(struct inode *inode, struct file *file)
1323{ 1312{
1324 struct seq_file *m; 1313 return seq_open_net(inode, file, &nfs_server_list_ops,
1325 int ret; 1314 sizeof(struct seq_net_private));
1326 struct pid_namespace *pid_ns = file->f_dentry->d_sb->s_fs_info;
1327 struct net *net = pid_ns->child_reaper->nsproxy->net_ns;
1328
1329 ret = seq_open(file, &nfs_volume_list_ops);
1330 if (ret < 0)
1331 return ret;
1332
1333 m = file->private_data;
1334 m->private = net;
1335
1336 return 0;
1337} 1315}
1338 1316
1339/* 1317/*
@@ -1341,7 +1319,7 @@ static int nfs_volume_list_open(struct inode *inode, struct file *file)
1341 */ 1319 */
1342static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) 1320static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos)
1343{ 1321{
1344 struct nfs_net *nn = net_generic(m->private, nfs_net_id); 1322 struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id);
1345 1323
1346 /* lock the list against modification */ 1324 /* lock the list against modification */
1347 spin_lock(&nn->nfs_client_lock); 1325 spin_lock(&nn->nfs_client_lock);
@@ -1353,7 +1331,7 @@ static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos)
1353 */ 1331 */
1354static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos) 1332static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos)
1355{ 1333{
1356 struct nfs_net *nn = net_generic(p->private, nfs_net_id); 1334 struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id);
1357 1335
1358 return seq_list_next(v, &nn->nfs_volume_list, pos); 1336 return seq_list_next(v, &nn->nfs_volume_list, pos);
1359} 1337}
@@ -1363,7 +1341,7 @@ static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos)
1363 */ 1341 */
1364static void nfs_volume_list_stop(struct seq_file *p, void *v) 1342static void nfs_volume_list_stop(struct seq_file *p, void *v)
1365{ 1343{
1366 struct nfs_net *nn = net_generic(p->private, nfs_net_id); 1344 struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id);
1367 1345
1368 spin_unlock(&nn->nfs_client_lock); 1346 spin_unlock(&nn->nfs_client_lock);
1369} 1347}
@@ -1376,7 +1354,7 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
1376 struct nfs_server *server; 1354 struct nfs_server *server;
1377 struct nfs_client *clp; 1355 struct nfs_client *clp;
1378 char dev[8], fsid[17]; 1356 char dev[8], fsid[17];
1379 struct nfs_net *nn = net_generic(m->private, nfs_net_id); 1357 struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id);
1380 1358
1381 /* display header on line 1 */ 1359 /* display header on line 1 */
1382 if (v == &nn->nfs_volume_list) { 1360 if (v == &nn->nfs_volume_list) {
@@ -1407,6 +1385,45 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
1407 return 0; 1385 return 0;
1408} 1386}
1409 1387
1388int nfs_fs_proc_net_init(struct net *net)
1389{
1390 struct nfs_net *nn = net_generic(net, nfs_net_id);
1391 struct proc_dir_entry *p;
1392
1393 nn->proc_nfsfs = proc_net_mkdir(net, "nfsfs", net->proc_net);
1394 if (!nn->proc_nfsfs)
1395 goto error_0;
1396
1397 /* a file of servers with which we're dealing */
1398 p = proc_create("servers", S_IFREG|S_IRUGO,
1399 nn->proc_nfsfs, &nfs_server_list_fops);
1400 if (!p)
1401 goto error_1;
1402
1403 /* a file of volumes that we have mounted */
1404 p = proc_create("volumes", S_IFREG|S_IRUGO,
1405 nn->proc_nfsfs, &nfs_volume_list_fops);
1406 if (!p)
1407 goto error_2;
1408 return 0;
1409
1410error_2:
1411 remove_proc_entry("servers", nn->proc_nfsfs);
1412error_1:
1413 remove_proc_entry("fs/nfsfs", NULL);
1414error_0:
1415 return -ENOMEM;
1416}
1417
1418void nfs_fs_proc_net_exit(struct net *net)
1419{
1420 struct nfs_net *nn = net_generic(net, nfs_net_id);
1421
1422 remove_proc_entry("volumes", nn->proc_nfsfs);
1423 remove_proc_entry("servers", nn->proc_nfsfs);
1424 remove_proc_entry("fs/nfsfs", NULL);
1425}
1426
1410/* 1427/*
1411 * initialise the /proc/fs/nfsfs/ directory 1428 * initialise the /proc/fs/nfsfs/ directory
1412 */ 1429 */
@@ -1419,14 +1436,12 @@ int __init nfs_fs_proc_init(void)
1419 goto error_0; 1436 goto error_0;
1420 1437
1421 /* a file of servers with which we're dealing */ 1438 /* a file of servers with which we're dealing */
1422 p = proc_create("servers", S_IFREG|S_IRUGO, 1439 p = proc_symlink("servers", proc_fs_nfs, "../../net/nfsfs/servers");
1423 proc_fs_nfs, &nfs_server_list_fops);
1424 if (!p) 1440 if (!p)
1425 goto error_1; 1441 goto error_1;
1426 1442
1427 /* a file of volumes that we have mounted */ 1443 /* a file of volumes that we have mounted */
1428 p = proc_create("volumes", S_IFREG|S_IRUGO, 1444 p = proc_symlink("volumes", proc_fs_nfs, "../../net/nfsfs/volumes");
1429 proc_fs_nfs, &nfs_volume_list_fops);
1430 if (!p) 1445 if (!p)
1431 goto error_2; 1446 goto error_2;
1432 return 0; 1447 return 0;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index abd37a380535..68921b01b792 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1840,11 +1840,12 @@ EXPORT_SYMBOL_GPL(nfs_net_id);
1840static int nfs_net_init(struct net *net) 1840static int nfs_net_init(struct net *net)
1841{ 1841{
1842 nfs_clients_init(net); 1842 nfs_clients_init(net);
1843 return 0; 1843 return nfs_fs_proc_net_init(net);
1844} 1844}
1845 1845
1846static void nfs_net_exit(struct net *net) 1846static void nfs_net_exit(struct net *net)
1847{ 1847{
1848 nfs_fs_proc_net_exit(net);
1848 nfs_cleanup_cb_ident_idr(net); 1849 nfs_cleanup_cb_ident_idr(net);
1849} 1850}
1850 1851
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 617f36611d4a..e2a45ae5014e 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -195,7 +195,16 @@ extern struct rpc_clnt *nfs4_find_or_create_ds_client(struct nfs_client *,
195#ifdef CONFIG_PROC_FS 195#ifdef CONFIG_PROC_FS
196extern int __init nfs_fs_proc_init(void); 196extern int __init nfs_fs_proc_init(void);
197extern void nfs_fs_proc_exit(void); 197extern void nfs_fs_proc_exit(void);
198extern int nfs_fs_proc_net_init(struct net *net);
199extern void nfs_fs_proc_net_exit(struct net *net);
198#else 200#else
201static inline int nfs_fs_proc_net_init(struct net *net)
202{
203 return 0;
204}
205static inline void nfs_fs_proc_net_exit(struct net *net)
206{
207}
199static inline int nfs_fs_proc_init(void) 208static inline int nfs_fs_proc_init(void)
200{ 209{
201 return 0; 210 return 0;
diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h
index 8ee1fab83268..ef221fb8a183 100644
--- a/fs/nfs/netns.h
+++ b/fs/nfs/netns.h
@@ -29,6 +29,9 @@ struct nfs_net {
29#endif 29#endif
30 spinlock_t nfs_client_lock; 30 spinlock_t nfs_client_lock;
31 struct timespec boot_time; 31 struct timespec boot_time;
32#ifdef CONFIG_PROC_FS
33 struct proc_dir_entry *proc_nfsfs;
34#endif
32}; 35};
33 36
34extern int nfs_net_id; 37extern int nfs_net_id;
diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h
index a986ceb6fd0d..4cd7c69a6cb9 100644
--- a/fs/nfsd/acl.h
+++ b/fs/nfsd/acl.h
@@ -47,7 +47,7 @@ struct svc_rqst;
47#define NFS4_ACL_MAX ((PAGE_SIZE - sizeof(struct nfs4_acl)) \ 47#define NFS4_ACL_MAX ((PAGE_SIZE - sizeof(struct nfs4_acl)) \
48 / sizeof(struct nfs4_ace)) 48 / sizeof(struct nfs4_ace))
49 49
50struct nfs4_acl *nfs4_acl_new(int); 50int nfs4_acl_bytes(int entries);
51int nfs4_acl_get_whotype(char *, u32); 51int nfs4_acl_get_whotype(char *, u32);
52__be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who); 52__be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who);
53 53
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 72f44823adbb..9d46a0bdd9f9 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -28,7 +28,7 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
28 validate_process_creds(); 28 validate_process_creds();
29 29
30 /* discard any old override before preparing the new set */ 30 /* discard any old override before preparing the new set */
31 revert_creds(get_cred(current->real_cred)); 31 revert_creds(get_cred(current_real_cred()));
32 new = prepare_creds(); 32 new = prepare_creds();
33 if (!new) 33 if (!new)
34 return -ENOMEM; 34 return -ENOMEM;
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 13b85f94d9e2..72ffd7cce3c3 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -698,8 +698,8 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem)
698 698
699 kref_get(&item->ex_client->ref); 699 kref_get(&item->ex_client->ref);
700 new->ex_client = item->ex_client; 700 new->ex_client = item->ex_client;
701 new->ex_path.dentry = dget(item->ex_path.dentry); 701 new->ex_path = item->ex_path;
702 new->ex_path.mnt = mntget(item->ex_path.mnt); 702 path_get(&item->ex_path);
703 new->ex_fslocs.locations = NULL; 703 new->ex_fslocs.locations = NULL;
704 new->ex_fslocs.locations_count = 0; 704 new->ex_fslocs.locations_count = 0;
705 new->ex_fslocs.migrated = 0; 705 new->ex_fslocs.migrated = 0;
@@ -1253,7 +1253,7 @@ static int e_show(struct seq_file *m, void *p)
1253 return 0; 1253 return 0;
1254 } 1254 }
1255 1255
1256 cache_get(&exp->h); 1256 exp_get(exp);
1257 if (cache_check(cd, &exp->h, NULL)) 1257 if (cache_check(cd, &exp->h, NULL))
1258 return 0; 1258 return 0;
1259 exp_put(exp); 1259 exp_put(exp);
diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
index cfeea85c5bed..04dc8c167b0c 100644
--- a/fs/nfsd/export.h
+++ b/fs/nfsd/export.h
@@ -101,9 +101,10 @@ static inline void exp_put(struct svc_export *exp)
101 cache_put(&exp->h, exp->cd); 101 cache_put(&exp->h, exp->cd);
102} 102}
103 103
104static inline void exp_get(struct svc_export *exp) 104static inline struct svc_export *exp_get(struct svc_export *exp)
105{ 105{
106 cache_get(&exp->h); 106 cache_get(&exp->h);
107 return exp;
107} 108}
108struct svc_export * rqst_exp_find(struct svc_rqst *, int, u32 *); 109struct svc_export * rqst_exp_find(struct svc_rqst *, int, u32 *);
109 110
diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c
index 2ed05c3cd43d..c16bf5af6831 100644
--- a/fs/nfsd/fault_inject.c
+++ b/fs/nfsd/fault_inject.c
@@ -17,81 +17,13 @@
17 17
18struct nfsd_fault_inject_op { 18struct nfsd_fault_inject_op {
19 char *file; 19 char *file;
20 u64 (*forget)(struct nfs4_client *, u64); 20 u64 (*get)(void);
21 u64 (*print)(struct nfs4_client *, u64); 21 u64 (*set_val)(u64);
22 u64 (*set_clnt)(struct sockaddr_storage *, size_t);
22}; 23};
23 24
24static struct nfsd_fault_inject_op inject_ops[] = {
25 {
26 .file = "forget_clients",
27 .forget = nfsd_forget_client,
28 .print = nfsd_print_client,
29 },
30 {
31 .file = "forget_locks",
32 .forget = nfsd_forget_client_locks,
33 .print = nfsd_print_client_locks,
34 },
35 {
36 .file = "forget_openowners",
37 .forget = nfsd_forget_client_openowners,
38 .print = nfsd_print_client_openowners,
39 },
40 {
41 .file = "forget_delegations",
42 .forget = nfsd_forget_client_delegations,
43 .print = nfsd_print_client_delegations,
44 },
45 {
46 .file = "recall_delegations",
47 .forget = nfsd_recall_client_delegations,
48 .print = nfsd_print_client_delegations,
49 },
50};
51
52static long int NUM_INJECT_OPS = sizeof(inject_ops) / sizeof(struct nfsd_fault_inject_op);
53static struct dentry *debug_dir; 25static struct dentry *debug_dir;
54 26
55static void nfsd_inject_set(struct nfsd_fault_inject_op *op, u64 val)
56{
57 u64 count = 0;
58
59 if (val == 0)
60 printk(KERN_INFO "NFSD Fault Injection: %s (all)", op->file);
61 else
62 printk(KERN_INFO "NFSD Fault Injection: %s (n = %llu)", op->file, val);
63
64 nfs4_lock_state();
65 count = nfsd_for_n_state(val, op->forget);
66 nfs4_unlock_state();
67 printk(KERN_INFO "NFSD: %s: found %llu", op->file, count);
68}
69
70static void nfsd_inject_set_client(struct nfsd_fault_inject_op *op,
71 struct sockaddr_storage *addr,
72 size_t addr_size)
73{
74 char buf[INET6_ADDRSTRLEN];
75 struct nfs4_client *clp;
76 u64 count;
77
78 nfs4_lock_state();
79 clp = nfsd_find_client(addr, addr_size);
80 if (clp) {
81 count = op->forget(clp, 0);
82 rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf));
83 printk(KERN_INFO "NFSD [%s]: Client %s had %llu state object(s)\n", op->file, buf, count);
84 }
85 nfs4_unlock_state();
86}
87
88static void nfsd_inject_get(struct nfsd_fault_inject_op *op, u64 *val)
89{
90 nfs4_lock_state();
91 *val = nfsd_for_n_state(0, op->print);
92 nfs4_unlock_state();
93}
94
95static ssize_t fault_inject_read(struct file *file, char __user *buf, 27static ssize_t fault_inject_read(struct file *file, char __user *buf,
96 size_t len, loff_t *ppos) 28 size_t len, loff_t *ppos)
97{ 29{
@@ -99,9 +31,10 @@ static ssize_t fault_inject_read(struct file *file, char __user *buf,
99 char read_buf[25]; 31 char read_buf[25];
100 size_t size; 32 size_t size;
101 loff_t pos = *ppos; 33 loff_t pos = *ppos;
34 struct nfsd_fault_inject_op *op = file_inode(file)->i_private;
102 35
103 if (!pos) 36 if (!pos)
104 nfsd_inject_get(file_inode(file)->i_private, &val); 37 val = op->get();
105 size = scnprintf(read_buf, sizeof(read_buf), "%llu\n", val); 38 size = scnprintf(read_buf, sizeof(read_buf), "%llu\n", val);
106 39
107 return simple_read_from_buffer(buf, len, ppos, read_buf, size); 40 return simple_read_from_buffer(buf, len, ppos, read_buf, size);
@@ -114,18 +47,36 @@ static ssize_t fault_inject_write(struct file *file, const char __user *buf,
114 size_t size = min(sizeof(write_buf) - 1, len); 47 size_t size = min(sizeof(write_buf) - 1, len);
115 struct net *net = current->nsproxy->net_ns; 48 struct net *net = current->nsproxy->net_ns;
116 struct sockaddr_storage sa; 49 struct sockaddr_storage sa;
50 struct nfsd_fault_inject_op *op = file_inode(file)->i_private;
117 u64 val; 51 u64 val;
52 char *nl;
118 53
119 if (copy_from_user(write_buf, buf, size)) 54 if (copy_from_user(write_buf, buf, size))
120 return -EFAULT; 55 return -EFAULT;
121 write_buf[size] = '\0'; 56 write_buf[size] = '\0';
122 57
58 /* Deal with any embedded newlines in the string */
59 nl = strchr(write_buf, '\n');
60 if (nl) {
61 size = nl - write_buf;
62 *nl = '\0';
63 }
64
123 size = rpc_pton(net, write_buf, size, (struct sockaddr *)&sa, sizeof(sa)); 65 size = rpc_pton(net, write_buf, size, (struct sockaddr *)&sa, sizeof(sa));
124 if (size > 0) 66 if (size > 0) {
125 nfsd_inject_set_client(file_inode(file)->i_private, &sa, size); 67 val = op->set_clnt(&sa, size);
126 else { 68 if (val)
69 pr_info("NFSD [%s]: Client %s had %llu state object(s)\n",
70 op->file, write_buf, val);
71 } else {
127 val = simple_strtoll(write_buf, NULL, 0); 72 val = simple_strtoll(write_buf, NULL, 0);
128 nfsd_inject_set(file_inode(file)->i_private, val); 73 if (val == 0)
74 pr_info("NFSD Fault Injection: %s (all)", op->file);
75 else
76 pr_info("NFSD Fault Injection: %s (n = %llu)",
77 op->file, val);
78 val = op->set_val(val);
79 pr_info("NFSD: %s: found %llu", op->file, val);
129 } 80 }
130 return len; /* on success, claim we got the whole input */ 81 return len; /* on success, claim we got the whole input */
131} 82}
@@ -141,6 +92,41 @@ void nfsd_fault_inject_cleanup(void)
141 debugfs_remove_recursive(debug_dir); 92 debugfs_remove_recursive(debug_dir);
142} 93}
143 94
95static struct nfsd_fault_inject_op inject_ops[] = {
96 {
97 .file = "forget_clients",
98 .get = nfsd_inject_print_clients,
99 .set_val = nfsd_inject_forget_clients,
100 .set_clnt = nfsd_inject_forget_client,
101 },
102 {
103 .file = "forget_locks",
104 .get = nfsd_inject_print_locks,
105 .set_val = nfsd_inject_forget_locks,
106 .set_clnt = nfsd_inject_forget_client_locks,
107 },
108 {
109 .file = "forget_openowners",
110 .get = nfsd_inject_print_openowners,
111 .set_val = nfsd_inject_forget_openowners,
112 .set_clnt = nfsd_inject_forget_client_openowners,
113 },
114 {
115 .file = "forget_delegations",
116 .get = nfsd_inject_print_delegations,
117 .set_val = nfsd_inject_forget_delegations,
118 .set_clnt = nfsd_inject_forget_client_delegations,
119 },
120 {
121 .file = "recall_delegations",
122 .get = nfsd_inject_print_delegations,
123 .set_val = nfsd_inject_recall_delegations,
124 .set_clnt = nfsd_inject_recall_client_delegations,
125 },
126};
127
128#define NUM_INJECT_OPS (sizeof(inject_ops)/sizeof(struct nfsd_fault_inject_op))
129
144int nfsd_fault_inject_init(void) 130int nfsd_fault_inject_init(void)
145{ 131{
146 unsigned int i; 132 unsigned int i;
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index d32b3aa6600d..ea6749a32760 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -29,14 +29,19 @@
29#define CLIENT_HASH_SIZE (1 << CLIENT_HASH_BITS) 29#define CLIENT_HASH_SIZE (1 << CLIENT_HASH_BITS)
30#define CLIENT_HASH_MASK (CLIENT_HASH_SIZE - 1) 30#define CLIENT_HASH_MASK (CLIENT_HASH_SIZE - 1)
31 31
32#define LOCKOWNER_INO_HASH_BITS 8
33#define LOCKOWNER_INO_HASH_SIZE (1 << LOCKOWNER_INO_HASH_BITS)
34
35#define SESSION_HASH_SIZE 512 32#define SESSION_HASH_SIZE 512
36 33
37struct cld_net; 34struct cld_net;
38struct nfsd4_client_tracking_ops; 35struct nfsd4_client_tracking_ops;
39 36
37/*
38 * Represents a nfsd "container". With respect to nfsv4 state tracking, the
39 * fields of interest are the *_id_hashtbls and the *_name_tree. These track
40 * the nfs4_client objects by either short or long form clientid.
41 *
42 * Each nfsd_net runs a nfs4_laundromat workqueue job when necessary to clean
43 * up expired clients and delegations within the container.
44 */
40struct nfsd_net { 45struct nfsd_net {
41 struct cld_net *cld_net; 46 struct cld_net *cld_net;
42 47
@@ -66,8 +71,6 @@ struct nfsd_net {
66 struct rb_root conf_name_tree; 71 struct rb_root conf_name_tree;
67 struct list_head *unconf_id_hashtbl; 72 struct list_head *unconf_id_hashtbl;
68 struct rb_root unconf_name_tree; 73 struct rb_root unconf_name_tree;
69 struct list_head *ownerstr_hashtbl;
70 struct list_head *lockowner_ino_hashtbl;
71 struct list_head *sessionid_hashtbl; 74 struct list_head *sessionid_hashtbl;
72 /* 75 /*
73 * client_lru holds client queue ordered by nfs4_client.cl_time 76 * client_lru holds client queue ordered by nfs4_client.cl_time
@@ -97,10 +100,16 @@ struct nfsd_net {
97 bool nfsd_net_up; 100 bool nfsd_net_up;
98 bool lockd_up; 101 bool lockd_up;
99 102
103 /* Time of server startup */
104 struct timeval nfssvc_boot;
105
100 /* 106 /*
101 * Time of server startup 107 * Max number of connections this nfsd container will allow. Defaults
108 * to '0' which is means that it bases this on the number of threads.
102 */ 109 */
103 struct timeval nfssvc_boot; 110 unsigned int max_connections;
111
112 u32 clientid_counter;
104 113
105 struct svc_serv *nfsd_serv; 114 struct svc_serv *nfsd_serv;
106}; 115};
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 12b023a7ab7d..ac54ea60b3f6 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -54,14 +54,14 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp,
54 54
55 if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { 55 if (resp->mask & (NFS_ACL|NFS_ACLCNT)) {
56 acl = get_acl(inode, ACL_TYPE_ACCESS); 56 acl = get_acl(inode, ACL_TYPE_ACCESS);
57 if (IS_ERR(acl)) {
58 nfserr = nfserrno(PTR_ERR(acl));
59 goto fail;
60 }
61 if (acl == NULL) { 57 if (acl == NULL) {
62 /* Solaris returns the inode's minimum ACL. */ 58 /* Solaris returns the inode's minimum ACL. */
63 acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); 59 acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
64 } 60 }
61 if (IS_ERR(acl)) {
62 nfserr = nfserrno(PTR_ERR(acl));
63 goto fail;
64 }
65 resp->acl_access = acl; 65 resp->acl_access = acl;
66 } 66 }
67 if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) { 67 if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) {
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 2a514e21dc74..34cbbab6abd7 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -47,14 +47,14 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp,
47 47
48 if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { 48 if (resp->mask & (NFS_ACL|NFS_ACLCNT)) {
49 acl = get_acl(inode, ACL_TYPE_ACCESS); 49 acl = get_acl(inode, ACL_TYPE_ACCESS);
50 if (IS_ERR(acl)) {
51 nfserr = nfserrno(PTR_ERR(acl));
52 goto fail;
53 }
54 if (acl == NULL) { 50 if (acl == NULL) {
55 /* Solaris returns the inode's minimum ACL. */ 51 /* Solaris returns the inode's minimum ACL. */
56 acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); 52 acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
57 } 53 }
54 if (IS_ERR(acl)) {
55 nfserr = nfserrno(PTR_ERR(acl));
56 goto fail;
57 }
58 resp->acl_access = acl; 58 resp->acl_access = acl;
59 } 59 }
60 if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) { 60 if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) {
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 401289913130..fa2525b2e9d7 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -157,11 +157,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
157 * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof) 157 * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof)
158 * + 1 (xdr opaque byte count) = 26 158 * + 1 (xdr opaque byte count) = 26
159 */ 159 */
160 160 resp->count = min(argp->count, max_blocksize);
161 resp->count = argp->count;
162 if (max_blocksize < resp->count)
163 resp->count = max_blocksize;
164
165 svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4); 161 svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);
166 162
167 fh_copy(&resp->fh, &argp->fh); 163 fh_copy(&resp->fh, &argp->fh);
@@ -286,8 +282,7 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp, struct nfsd3_symlinkargs *argp,
286 fh_copy(&resp->dirfh, &argp->ffh); 282 fh_copy(&resp->dirfh, &argp->ffh);
287 fh_init(&resp->fh, NFS3_FHSIZE); 283 fh_init(&resp->fh, NFS3_FHSIZE);
288 nfserr = nfsd_symlink(rqstp, &resp->dirfh, argp->fname, argp->flen, 284 nfserr = nfsd_symlink(rqstp, &resp->dirfh, argp->fname, argp->flen,
289 argp->tname, argp->tlen, 285 argp->tname, &resp->fh);
290 &resp->fh, &argp->attrs);
291 RETURN_STATUS(nfserr); 286 RETURN_STATUS(nfserr);
292} 287}
293 288
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index e6c01e80325e..39c5eb3ad33a 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -120,10 +120,7 @@ decode_sattr3(__be32 *p, struct iattr *iap)
120 120
121 iap->ia_valid |= ATTR_SIZE; 121 iap->ia_valid |= ATTR_SIZE;
122 p = xdr_decode_hyper(p, &newsize); 122 p = xdr_decode_hyper(p, &newsize);
123 if (newsize <= NFS_OFFSET_MAX) 123 iap->ia_size = min_t(u64, newsize, NFS_OFFSET_MAX);
124 iap->ia_size = newsize;
125 else
126 iap->ia_size = NFS_OFFSET_MAX;
127 } 124 }
128 if ((tmp = ntohl(*p++)) == 1) { /* set to server time */ 125 if ((tmp = ntohl(*p++)) == 1) { /* set to server time */
129 iap->ia_valid |= ATTR_ATIME; 126 iap->ia_valid |= ATTR_ATIME;
@@ -338,10 +335,8 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
338 return 0; 335 return 0;
339 p = xdr_decode_hyper(p, &args->offset); 336 p = xdr_decode_hyper(p, &args->offset);
340 337
341 len = args->count = ntohl(*p++); 338 args->count = ntohl(*p++);
342 339 len = min(args->count, max_blocksize);
343 if (len > max_blocksize)
344 len = max_blocksize;
345 340
346 /* set up the kvec */ 341 /* set up the kvec */
347 v=0; 342 v=0;
@@ -349,7 +344,7 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
349 struct page *p = *(rqstp->rq_next_page++); 344 struct page *p = *(rqstp->rq_next_page++);
350 345
351 rqstp->rq_vec[v].iov_base = page_address(p); 346 rqstp->rq_vec[v].iov_base = page_address(p);
352 rqstp->rq_vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE; 347 rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE);
353 len -= rqstp->rq_vec[v].iov_len; 348 len -= rqstp->rq_vec[v].iov_len;
354 v++; 349 v++;
355 } 350 }
@@ -484,9 +479,7 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
484 } 479 }
485 /* now copy next page if there is one */ 480 /* now copy next page if there is one */
486 if (len && !avail && rqstp->rq_arg.page_len) { 481 if (len && !avail && rqstp->rq_arg.page_len) {
487 avail = rqstp->rq_arg.page_len; 482 avail = min_t(unsigned int, rqstp->rq_arg.page_len, PAGE_SIZE);
488 if (avail > PAGE_SIZE)
489 avail = PAGE_SIZE;
490 old = page_address(rqstp->rq_arg.pages[0]); 483 old = page_address(rqstp->rq_arg.pages[0]);
491 } 484 }
492 while (len && avail && *old) { 485 while (len && avail && *old) {
@@ -571,10 +564,7 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
571 args->verf = p; p += 2; 564 args->verf = p; p += 2;
572 args->dircount = ~0; 565 args->dircount = ~0;
573 args->count = ntohl(*p++); 566 args->count = ntohl(*p++);
574 567 args->count = min_t(u32, args->count, PAGE_SIZE);
575 if (args->count > PAGE_SIZE)
576 args->count = PAGE_SIZE;
577
578 args->buffer = page_address(*(rqstp->rq_next_page++)); 568 args->buffer = page_address(*(rqstp->rq_next_page++));
579 569
580 return xdr_argsize_check(rqstp, p); 570 return xdr_argsize_check(rqstp, p);
@@ -595,10 +585,7 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
595 args->dircount = ntohl(*p++); 585 args->dircount = ntohl(*p++);
596 args->count = ntohl(*p++); 586 args->count = ntohl(*p++);
597 587
598 len = (args->count > max_blocksize) ? max_blocksize : 588 len = args->count = min(args->count, max_blocksize);
599 args->count;
600 args->count = len;
601
602 while (len > 0) { 589 while (len > 0) {
603 struct page *p = *(rqstp->rq_next_page++); 590 struct page *p = *(rqstp->rq_next_page++);
604 if (!args->buffer) 591 if (!args->buffer)
@@ -913,8 +900,7 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
913 */ 900 */
914 901
915 /* truncate filename if too long */ 902 /* truncate filename if too long */
916 if (namlen > NFS3_MAXNAMLEN) 903 namlen = min(namlen, NFS3_MAXNAMLEN);
917 namlen = NFS3_MAXNAMLEN;
918 904
919 slen = XDR_QUADLEN(namlen); 905 slen = XDR_QUADLEN(namlen);
920 elen = slen + NFS3_ENTRY_BAGGAGE 906 elen = slen + NFS3_ENTRY_BAGGAGE
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index d714156a19fd..59fd76651781 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -146,35 +146,43 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry,
146 int size = 0; 146 int size = 0;
147 147
148 pacl = get_acl(inode, ACL_TYPE_ACCESS); 148 pacl = get_acl(inode, ACL_TYPE_ACCESS);
149 if (!pacl) { 149 if (!pacl)
150 pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); 150 pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
151 if (IS_ERR(pacl)) 151
152 return PTR_ERR(pacl); 152 if (IS_ERR(pacl))
153 } 153 return PTR_ERR(pacl);
154
154 /* allocate for worst case: one (deny, allow) pair each: */ 155 /* allocate for worst case: one (deny, allow) pair each: */
155 size += 2 * pacl->a_count; 156 size += 2 * pacl->a_count;
156 157
157 if (S_ISDIR(inode->i_mode)) { 158 if (S_ISDIR(inode->i_mode)) {
158 flags = NFS4_ACL_DIR; 159 flags = NFS4_ACL_DIR;
159 dpacl = get_acl(inode, ACL_TYPE_DEFAULT); 160 dpacl = get_acl(inode, ACL_TYPE_DEFAULT);
161 if (IS_ERR(dpacl)) {
162 error = PTR_ERR(dpacl);
163 goto rel_pacl;
164 }
165
160 if (dpacl) 166 if (dpacl)
161 size += 2 * dpacl->a_count; 167 size += 2 * dpacl->a_count;
162 } 168 }
163 169
164 *acl = nfs4_acl_new(size); 170 *acl = kmalloc(nfs4_acl_bytes(size), GFP_KERNEL);
165 if (*acl == NULL) { 171 if (*acl == NULL) {
166 error = -ENOMEM; 172 error = -ENOMEM;
167 goto out; 173 goto out;
168 } 174 }
175 (*acl)->naces = 0;
169 176
170 _posix_to_nfsv4_one(pacl, *acl, flags & ~NFS4_ACL_TYPE_DEFAULT); 177 _posix_to_nfsv4_one(pacl, *acl, flags & ~NFS4_ACL_TYPE_DEFAULT);
171 178
172 if (dpacl) 179 if (dpacl)
173 _posix_to_nfsv4_one(dpacl, *acl, flags | NFS4_ACL_TYPE_DEFAULT); 180 _posix_to_nfsv4_one(dpacl, *acl, flags | NFS4_ACL_TYPE_DEFAULT);
174 181
175 out: 182out:
176 posix_acl_release(pacl);
177 posix_acl_release(dpacl); 183 posix_acl_release(dpacl);
184rel_pacl:
185 posix_acl_release(pacl);
178 return error; 186 return error;
179} 187}
180 188
@@ -872,16 +880,13 @@ ace2type(struct nfs4_ace *ace)
872 return -1; 880 return -1;
873} 881}
874 882
875struct nfs4_acl * 883/*
876nfs4_acl_new(int n) 884 * return the size of the struct nfs4_acl required to represent an acl
885 * with @entries entries.
886 */
887int nfs4_acl_bytes(int entries)
877{ 888{
878 struct nfs4_acl *acl; 889 return sizeof(struct nfs4_acl) + entries * sizeof(struct nfs4_ace);
879
880 acl = kmalloc(sizeof(*acl) + n*sizeof(struct nfs4_ace), GFP_KERNEL);
881 if (acl == NULL)
882 return NULL;
883 acl->naces = 0;
884 return acl;
885} 890}
886 891
887static struct { 892static struct {
@@ -935,5 +940,5 @@ __be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who)
935 return 0; 940 return 0;
936 } 941 }
937 WARN_ON_ONCE(1); 942 WARN_ON_ONCE(1);
938 return -1; 943 return nfserr_serverfault;
939} 944}
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 2c73cae9899d..e0be57b0f79b 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -337,7 +337,7 @@ static void encode_cb_recall4args(struct xdr_stream *xdr,
337 p = xdr_reserve_space(xdr, 4); 337 p = xdr_reserve_space(xdr, 4);
338 *p++ = xdr_zero; /* truncate */ 338 *p++ = xdr_zero; /* truncate */
339 339
340 encode_nfs_fh4(xdr, &dp->dl_fh); 340 encode_nfs_fh4(xdr, &dp->dl_stid.sc_file->fi_fhandle);
341 341
342 hdr->nops++; 342 hdr->nops++;
343} 343}
@@ -678,7 +678,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
678 (clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5)) 678 (clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5))
679 return -EINVAL; 679 return -EINVAL;
680 args.client_name = clp->cl_cred.cr_principal; 680 args.client_name = clp->cl_cred.cr_principal;
681 args.prognumber = conn->cb_prog, 681 args.prognumber = conn->cb_prog;
682 args.protocol = XPRT_TRANSPORT_TCP; 682 args.protocol = XPRT_TRANSPORT_TCP;
683 args.authflavor = clp->cl_cred.cr_flavor; 683 args.authflavor = clp->cl_cred.cr_flavor;
684 clp->cl_cb_ident = conn->cb_ident; 684 clp->cl_cb_ident = conn->cb_ident;
@@ -689,7 +689,8 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
689 clp->cl_cb_session = ses; 689 clp->cl_cb_session = ses;
690 args.bc_xprt = conn->cb_xprt; 690 args.bc_xprt = conn->cb_xprt;
691 args.prognumber = clp->cl_cb_session->se_cb_prog; 691 args.prognumber = clp->cl_cb_session->se_cb_prog;
692 args.protocol = XPRT_TRANSPORT_BC_TCP; 692 args.protocol = conn->cb_xprt->xpt_class->xcl_ident |
693 XPRT_TRANSPORT_BC;
693 args.authflavor = ses->se_cb_sec.flavor; 694 args.authflavor = ses->se_cb_sec.flavor;
694 } 695 }
695 /* Create RPC client */ 696 /* Create RPC client */
@@ -904,7 +905,7 @@ static void nfsd4_cb_recall_release(void *calldata)
904 spin_lock(&clp->cl_lock); 905 spin_lock(&clp->cl_lock);
905 list_del(&cb->cb_per_client); 906 list_del(&cb->cb_per_client);
906 spin_unlock(&clp->cl_lock); 907 spin_unlock(&clp->cl_lock);
907 nfs4_put_delegation(dp); 908 nfs4_put_stid(&dp->dl_stid);
908 } 909 }
909} 910}
910 911
@@ -933,7 +934,7 @@ void nfsd4_shutdown_callback(struct nfs4_client *clp)
933 set_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags); 934 set_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags);
934 /* 935 /*
935 * Note this won't actually result in a null callback; 936 * Note this won't actually result in a null callback;
936 * instead, nfsd4_do_callback_rpc() will detect the killed 937 * instead, nfsd4_run_cb_null() will detect the killed
937 * client, destroy the rpc client, and stop: 938 * client, destroy the rpc client, and stop:
938 */ 939 */
939 do_probe_callback(clp); 940 do_probe_callback(clp);
@@ -1011,9 +1012,9 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
1011 run_nfsd4_cb(cb); 1012 run_nfsd4_cb(cb);
1012} 1013}
1013 1014
1014static void nfsd4_do_callback_rpc(struct work_struct *w) 1015static void
1016nfsd4_run_callback_rpc(struct nfsd4_callback *cb)
1015{ 1017{
1016 struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, cb_work);
1017 struct nfs4_client *clp = cb->cb_clp; 1018 struct nfs4_client *clp = cb->cb_clp;
1018 struct rpc_clnt *clnt; 1019 struct rpc_clnt *clnt;
1019 1020
@@ -1031,9 +1032,22 @@ static void nfsd4_do_callback_rpc(struct work_struct *w)
1031 cb->cb_ops, cb); 1032 cb->cb_ops, cb);
1032} 1033}
1033 1034
1034void nfsd4_init_callback(struct nfsd4_callback *cb) 1035void
1036nfsd4_run_cb_null(struct work_struct *w)
1035{ 1037{
1036 INIT_WORK(&cb->cb_work, nfsd4_do_callback_rpc); 1038 struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback,
1039 cb_work);
1040 nfsd4_run_callback_rpc(cb);
1041}
1042
1043void
1044nfsd4_run_cb_recall(struct work_struct *w)
1045{
1046 struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback,
1047 cb_work);
1048
1049 nfsd4_prepare_cb_recall(cb->cb_op);
1050 nfsd4_run_callback_rpc(cb);
1037} 1051}
1038 1052
1039void nfsd4_cb_recall(struct nfs4_delegation *dp) 1053void nfsd4_cb_recall(struct nfs4_delegation *dp)
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 8f029db5d271..5e0dc528a0e8 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -177,7 +177,7 @@ fh_dup2(struct svc_fh *dst, struct svc_fh *src)
177 fh_put(dst); 177 fh_put(dst);
178 dget(src->fh_dentry); 178 dget(src->fh_dentry);
179 if (src->fh_export) 179 if (src->fh_export)
180 cache_get(&src->fh_export->h); 180 exp_get(src->fh_export);
181 *dst = *src; 181 *dst = *src;
182} 182}
183 183
@@ -385,8 +385,6 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
385 if (nfsd4_has_session(cstate)) 385 if (nfsd4_has_session(cstate))
386 copy_clientid(&open->op_clientid, cstate->session); 386 copy_clientid(&open->op_clientid, cstate->session);
387 387
388 nfs4_lock_state();
389
390 /* check seqid for replay. set nfs4_owner */ 388 /* check seqid for replay. set nfs4_owner */
391 resp = rqstp->rq_resp; 389 resp = rqstp->rq_resp;
392 status = nfsd4_process_open1(&resp->cstate, open, nn); 390 status = nfsd4_process_open1(&resp->cstate, open, nn);
@@ -431,8 +429,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
431 break; 429 break;
432 case NFS4_OPEN_CLAIM_PREVIOUS: 430 case NFS4_OPEN_CLAIM_PREVIOUS:
433 status = nfs4_check_open_reclaim(&open->op_clientid, 431 status = nfs4_check_open_reclaim(&open->op_clientid,
434 cstate->minorversion, 432 cstate, nn);
435 nn);
436 if (status) 433 if (status)
437 goto out; 434 goto out;
438 open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; 435 open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
@@ -461,19 +458,17 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
461 * set, (2) sets open->op_stateid, (3) sets open->op_delegation. 458 * set, (2) sets open->op_stateid, (3) sets open->op_delegation.
462 */ 459 */
463 status = nfsd4_process_open2(rqstp, resfh, open); 460 status = nfsd4_process_open2(rqstp, resfh, open);
464 WARN_ON(status && open->op_created); 461 WARN(status && open->op_created,
462 "nfsd4_process_open2 failed to open newly-created file! status=%u\n",
463 be32_to_cpu(status));
465out: 464out:
466 if (resfh && resfh != &cstate->current_fh) { 465 if (resfh && resfh != &cstate->current_fh) {
467 fh_dup2(&cstate->current_fh, resfh); 466 fh_dup2(&cstate->current_fh, resfh);
468 fh_put(resfh); 467 fh_put(resfh);
469 kfree(resfh); 468 kfree(resfh);
470 } 469 }
471 nfsd4_cleanup_open_state(open, status); 470 nfsd4_cleanup_open_state(cstate, open, status);
472 if (open->op_openowner && !nfsd4_has_session(cstate))
473 cstate->replay_owner = &open->op_openowner->oo_owner;
474 nfsd4_bump_seqid(cstate, status); 471 nfsd4_bump_seqid(cstate, status);
475 if (!cstate->replay_owner)
476 nfs4_unlock_state();
477 return status; 472 return status;
478} 473}
479 474
@@ -581,8 +576,12 @@ static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
581 __be32 verf[2]; 576 __be32 verf[2];
582 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 577 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
583 578
584 verf[0] = (__be32)nn->nfssvc_boot.tv_sec; 579 /*
585 verf[1] = (__be32)nn->nfssvc_boot.tv_usec; 580 * This is opaque to client, so no need to byte-swap. Use
581 * __force to keep sparse happy
582 */
583 verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec;
584 verf[1] = (__force __be32)nn->nfssvc_boot.tv_usec;
586 memcpy(verifier->data, verf, sizeof(verifier->data)); 585 memcpy(verifier->data, verf, sizeof(verifier->data));
587} 586}
588 587
@@ -619,8 +618,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
619 case NF4LNK: 618 case NF4LNK:
620 status = nfsd_symlink(rqstp, &cstate->current_fh, 619 status = nfsd_symlink(rqstp, &cstate->current_fh,
621 create->cr_name, create->cr_namelen, 620 create->cr_name, create->cr_namelen,
622 create->cr_linkname, create->cr_linklen, 621 create->cr_data, &resfh);
623 &resfh, &create->cr_iattr);
624 break; 622 break;
625 623
626 case NF4BLK: 624 case NF4BLK:
@@ -909,8 +907,8 @@ nfsd4_secinfo_no_name(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstat
909 default: 907 default:
910 return nfserr_inval; 908 return nfserr_inval;
911 } 909 }
912 exp_get(cstate->current_fh.fh_export); 910
913 sin->sin_exp = cstate->current_fh.fh_export; 911 sin->sin_exp = exp_get(cstate->current_fh.fh_export);
914 fh_put(&cstate->current_fh); 912 fh_put(&cstate->current_fh);
915 return nfs_ok; 913 return nfs_ok;
916} 914}
@@ -1289,7 +1287,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
1289 * Don't use the deferral mechanism for NFSv4; compounds make it 1287 * Don't use the deferral mechanism for NFSv4; compounds make it
1290 * too hard to avoid non-idempotency problems. 1288 * too hard to avoid non-idempotency problems.
1291 */ 1289 */
1292 rqstp->rq_usedeferral = 0; 1290 rqstp->rq_usedeferral = false;
1293 1291
1294 /* 1292 /*
1295 * According to RFC3010, this takes precedence over all other errors. 1293 * According to RFC3010, this takes precedence over all other errors.
@@ -1391,10 +1389,7 @@ encode_op:
1391 args->ops, args->opcnt, resp->opcnt, op->opnum, 1389 args->ops, args->opcnt, resp->opcnt, op->opnum,
1392 be32_to_cpu(status)); 1390 be32_to_cpu(status));
1393 1391
1394 if (cstate->replay_owner) { 1392 nfsd4_cstate_clear_replay(cstate);
1395 nfs4_unlock_state();
1396 cstate->replay_owner = NULL;
1397 }
1398 /* XXX Ugh, we need to get rid of this kind of special case: */ 1393 /* XXX Ugh, we need to get rid of this kind of special case: */
1399 if (op->opnum == OP_READ && op->u.read.rd_filp) 1394 if (op->opnum == OP_READ && op->u.read.rd_filp)
1400 fput(op->u.read.rd_filp); 1395 fput(op->u.read.rd_filp);
@@ -1408,7 +1403,7 @@ encode_op:
1408 BUG_ON(cstate->replay_owner); 1403 BUG_ON(cstate->replay_owner);
1409out: 1404out:
1410 /* Reset deferral mechanism for RPC deferrals */ 1405 /* Reset deferral mechanism for RPC deferrals */
1411 rqstp->rq_usedeferral = 1; 1406 rqstp->rq_usedeferral = true;
1412 dprintk("nfsv4 compound returned %d\n", ntohl(status)); 1407 dprintk("nfsv4 compound returned %d\n", ntohl(status));
1413 return status; 1408 return status;
1414} 1409}
@@ -1520,21 +1515,17 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1520 u32 maxcount = 0, rlen = 0; 1515 u32 maxcount = 0, rlen = 0;
1521 1516
1522 maxcount = svc_max_payload(rqstp); 1517 maxcount = svc_max_payload(rqstp);
1523 rlen = op->u.read.rd_length; 1518 rlen = min(op->u.read.rd_length, maxcount);
1524
1525 if (rlen > maxcount)
1526 rlen = maxcount;
1527 1519
1528 return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32); 1520 return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32);
1529} 1521}
1530 1522
1531static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) 1523static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1532{ 1524{
1533 u32 maxcount = svc_max_payload(rqstp); 1525 u32 maxcount = 0, rlen = 0;
1534 u32 rlen = op->u.readdir.rd_maxcount;
1535 1526
1536 if (rlen > maxcount) 1527 maxcount = svc_max_payload(rqstp);
1537 rlen = maxcount; 1528 rlen = min(op->u.readdir.rd_maxcount, maxcount);
1538 1529
1539 return (op_encode_hdr_size + op_encode_verifier_maxsz + 1530 return (op_encode_hdr_size + op_encode_verifier_maxsz +
1540 XDR_QUADLEN(rlen)) * sizeof(__be32); 1531 XDR_QUADLEN(rlen)) * sizeof(__be32);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2204e1fe5725..2e80a59e7e91 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -70,13 +70,11 @@ static u64 current_sessionid = 1;
70#define CURRENT_STATEID(stateid) (!memcmp((stateid), &currentstateid, sizeof(stateid_t))) 70#define CURRENT_STATEID(stateid) (!memcmp((stateid), &currentstateid, sizeof(stateid_t)))
71 71
72/* forward declarations */ 72/* forward declarations */
73static int check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner); 73static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner);
74static void nfs4_free_ol_stateid(struct nfs4_stid *stid);
74 75
75/* Locking: */ 76/* Locking: */
76 77
77/* Currently used for almost all code touching nfsv4 state: */
78static DEFINE_MUTEX(client_mutex);
79
80/* 78/*
81 * Currently used for the del_recall_lru and file hash table. In an 79 * Currently used for the del_recall_lru and file hash table. In an
82 * effort to decrease the scope of the client_mutex, this spinlock may 80 * effort to decrease the scope of the client_mutex, this spinlock may
@@ -84,18 +82,18 @@ static DEFINE_MUTEX(client_mutex);
84 */ 82 */
85static DEFINE_SPINLOCK(state_lock); 83static DEFINE_SPINLOCK(state_lock);
86 84
85/*
86 * A waitqueue for all in-progress 4.0 CLOSE operations that are waiting for
87 * the refcount on the open stateid to drop.
88 */
89static DECLARE_WAIT_QUEUE_HEAD(close_wq);
90
87static struct kmem_cache *openowner_slab; 91static struct kmem_cache *openowner_slab;
88static struct kmem_cache *lockowner_slab; 92static struct kmem_cache *lockowner_slab;
89static struct kmem_cache *file_slab; 93static struct kmem_cache *file_slab;
90static struct kmem_cache *stateid_slab; 94static struct kmem_cache *stateid_slab;
91static struct kmem_cache *deleg_slab; 95static struct kmem_cache *deleg_slab;
92 96
93void
94nfs4_lock_state(void)
95{
96 mutex_lock(&client_mutex);
97}
98
99static void free_session(struct nfsd4_session *); 97static void free_session(struct nfsd4_session *);
100 98
101static bool is_session_dead(struct nfsd4_session *ses) 99static bool is_session_dead(struct nfsd4_session *ses)
@@ -103,12 +101,6 @@ static bool is_session_dead(struct nfsd4_session *ses)
103 return ses->se_flags & NFS4_SESSION_DEAD; 101 return ses->se_flags & NFS4_SESSION_DEAD;
104} 102}
105 103
106void nfsd4_put_session(struct nfsd4_session *ses)
107{
108 if (atomic_dec_and_test(&ses->se_ref) && is_session_dead(ses))
109 free_session(ses);
110}
111
112static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_by_me) 104static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_by_me)
113{ 105{
114 if (atomic_read(&ses->se_ref) > ref_held_by_me) 106 if (atomic_read(&ses->se_ref) > ref_held_by_me)
@@ -117,46 +109,17 @@ static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_b
117 return nfs_ok; 109 return nfs_ok;
118} 110}
119 111
120static __be32 nfsd4_get_session_locked(struct nfsd4_session *ses)
121{
122 if (is_session_dead(ses))
123 return nfserr_badsession;
124 atomic_inc(&ses->se_ref);
125 return nfs_ok;
126}
127
128void
129nfs4_unlock_state(void)
130{
131 mutex_unlock(&client_mutex);
132}
133
134static bool is_client_expired(struct nfs4_client *clp) 112static bool is_client_expired(struct nfs4_client *clp)
135{ 113{
136 return clp->cl_time == 0; 114 return clp->cl_time == 0;
137} 115}
138 116
139static __be32 mark_client_expired_locked(struct nfs4_client *clp) 117static __be32 get_client_locked(struct nfs4_client *clp)
140{
141 if (atomic_read(&clp->cl_refcount))
142 return nfserr_jukebox;
143 clp->cl_time = 0;
144 return nfs_ok;
145}
146
147static __be32 mark_client_expired(struct nfs4_client *clp)
148{ 118{
149 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); 119 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
150 __be32 ret;
151 120
152 spin_lock(&nn->client_lock); 121 lockdep_assert_held(&nn->client_lock);
153 ret = mark_client_expired_locked(clp);
154 spin_unlock(&nn->client_lock);
155 return ret;
156}
157 122
158static __be32 get_client_locked(struct nfs4_client *clp)
159{
160 if (is_client_expired(clp)) 123 if (is_client_expired(clp))
161 return nfserr_expired; 124 return nfserr_expired;
162 atomic_inc(&clp->cl_refcount); 125 atomic_inc(&clp->cl_refcount);
@@ -197,13 +160,17 @@ renew_client(struct nfs4_client *clp)
197 160
198static void put_client_renew_locked(struct nfs4_client *clp) 161static void put_client_renew_locked(struct nfs4_client *clp)
199{ 162{
163 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
164
165 lockdep_assert_held(&nn->client_lock);
166
200 if (!atomic_dec_and_test(&clp->cl_refcount)) 167 if (!atomic_dec_and_test(&clp->cl_refcount))
201 return; 168 return;
202 if (!is_client_expired(clp)) 169 if (!is_client_expired(clp))
203 renew_client_locked(clp); 170 renew_client_locked(clp);
204} 171}
205 172
206void put_client_renew(struct nfs4_client *clp) 173static void put_client_renew(struct nfs4_client *clp)
207{ 174{
208 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); 175 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
209 176
@@ -214,6 +181,79 @@ void put_client_renew(struct nfs4_client *clp)
214 spin_unlock(&nn->client_lock); 181 spin_unlock(&nn->client_lock);
215} 182}
216 183
184static __be32 nfsd4_get_session_locked(struct nfsd4_session *ses)
185{
186 __be32 status;
187
188 if (is_session_dead(ses))
189 return nfserr_badsession;
190 status = get_client_locked(ses->se_client);
191 if (status)
192 return status;
193 atomic_inc(&ses->se_ref);
194 return nfs_ok;
195}
196
197static void nfsd4_put_session_locked(struct nfsd4_session *ses)
198{
199 struct nfs4_client *clp = ses->se_client;
200 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
201
202 lockdep_assert_held(&nn->client_lock);
203
204 if (atomic_dec_and_test(&ses->se_ref) && is_session_dead(ses))
205 free_session(ses);
206 put_client_renew_locked(clp);
207}
208
209static void nfsd4_put_session(struct nfsd4_session *ses)
210{
211 struct nfs4_client *clp = ses->se_client;
212 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
213
214 spin_lock(&nn->client_lock);
215 nfsd4_put_session_locked(ses);
216 spin_unlock(&nn->client_lock);
217}
218
219static int
220same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner)
221{
222 return (sop->so_owner.len == owner->len) &&
223 0 == memcmp(sop->so_owner.data, owner->data, owner->len);
224}
225
226static struct nfs4_openowner *
227find_openstateowner_str_locked(unsigned int hashval, struct nfsd4_open *open,
228 struct nfs4_client *clp)
229{
230 struct nfs4_stateowner *so;
231
232 lockdep_assert_held(&clp->cl_lock);
233
234 list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[hashval],
235 so_strhash) {
236 if (!so->so_is_open_owner)
237 continue;
238 if (same_owner_str(so, &open->op_owner)) {
239 atomic_inc(&so->so_count);
240 return openowner(so);
241 }
242 }
243 return NULL;
244}
245
246static struct nfs4_openowner *
247find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open,
248 struct nfs4_client *clp)
249{
250 struct nfs4_openowner *oo;
251
252 spin_lock(&clp->cl_lock);
253 oo = find_openstateowner_str_locked(hashval, open, clp);
254 spin_unlock(&clp->cl_lock);
255 return oo;
256}
217 257
218static inline u32 258static inline u32
219opaque_hashval(const void *ptr, int nbytes) 259opaque_hashval(const void *ptr, int nbytes)
@@ -236,10 +276,11 @@ static void nfsd4_free_file(struct nfs4_file *f)
236static inline void 276static inline void
237put_nfs4_file(struct nfs4_file *fi) 277put_nfs4_file(struct nfs4_file *fi)
238{ 278{
279 might_lock(&state_lock);
280
239 if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) { 281 if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) {
240 hlist_del(&fi->fi_hash); 282 hlist_del(&fi->fi_hash);
241 spin_unlock(&state_lock); 283 spin_unlock(&state_lock);
242 iput(fi->fi_inode);
243 nfsd4_free_file(fi); 284 nfsd4_free_file(fi);
244 } 285 }
245} 286}
@@ -250,7 +291,80 @@ get_nfs4_file(struct nfs4_file *fi)
250 atomic_inc(&fi->fi_ref); 291 atomic_inc(&fi->fi_ref);
251} 292}
252 293
253static int num_delegations; 294static struct file *
295__nfs4_get_fd(struct nfs4_file *f, int oflag)
296{
297 if (f->fi_fds[oflag])
298 return get_file(f->fi_fds[oflag]);
299 return NULL;
300}
301
302static struct file *
303find_writeable_file_locked(struct nfs4_file *f)
304{
305 struct file *ret;
306
307 lockdep_assert_held(&f->fi_lock);
308
309 ret = __nfs4_get_fd(f, O_WRONLY);
310 if (!ret)
311 ret = __nfs4_get_fd(f, O_RDWR);
312 return ret;
313}
314
315static struct file *
316find_writeable_file(struct nfs4_file *f)
317{
318 struct file *ret;
319
320 spin_lock(&f->fi_lock);
321 ret = find_writeable_file_locked(f);
322 spin_unlock(&f->fi_lock);
323
324 return ret;
325}
326
327static struct file *find_readable_file_locked(struct nfs4_file *f)
328{
329 struct file *ret;
330
331 lockdep_assert_held(&f->fi_lock);
332
333 ret = __nfs4_get_fd(f, O_RDONLY);
334 if (!ret)
335 ret = __nfs4_get_fd(f, O_RDWR);
336 return ret;
337}
338
339static struct file *
340find_readable_file(struct nfs4_file *f)
341{
342 struct file *ret;
343
344 spin_lock(&f->fi_lock);
345 ret = find_readable_file_locked(f);
346 spin_unlock(&f->fi_lock);
347
348 return ret;
349}
350
351static struct file *
352find_any_file(struct nfs4_file *f)
353{
354 struct file *ret;
355
356 spin_lock(&f->fi_lock);
357 ret = __nfs4_get_fd(f, O_RDWR);
358 if (!ret) {
359 ret = __nfs4_get_fd(f, O_WRONLY);
360 if (!ret)
361 ret = __nfs4_get_fd(f, O_RDONLY);
362 }
363 spin_unlock(&f->fi_lock);
364 return ret;
365}
366
367static atomic_long_t num_delegations;
254unsigned long max_delegations; 368unsigned long max_delegations;
255 369
256/* 370/*
@@ -262,12 +376,11 @@ unsigned long max_delegations;
262#define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS) 376#define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS)
263#define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1) 377#define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1)
264 378
265static unsigned int ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername) 379static unsigned int ownerstr_hashval(struct xdr_netobj *ownername)
266{ 380{
267 unsigned int ret; 381 unsigned int ret;
268 382
269 ret = opaque_hashval(ownername->data, ownername->len); 383 ret = opaque_hashval(ownername->data, ownername->len);
270 ret += clientid;
271 return ret & OWNER_HASH_MASK; 384 return ret & OWNER_HASH_MASK;
272} 385}
273 386
@@ -275,75 +388,124 @@ static unsigned int ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername)
275#define FILE_HASH_BITS 8 388#define FILE_HASH_BITS 8
276#define FILE_HASH_SIZE (1 << FILE_HASH_BITS) 389#define FILE_HASH_SIZE (1 << FILE_HASH_BITS)
277 390
278static unsigned int file_hashval(struct inode *ino) 391static unsigned int nfsd_fh_hashval(struct knfsd_fh *fh)
392{
393 return jhash2(fh->fh_base.fh_pad, XDR_QUADLEN(fh->fh_size), 0);
394}
395
396static unsigned int file_hashval(struct knfsd_fh *fh)
397{
398 return nfsd_fh_hashval(fh) & (FILE_HASH_SIZE - 1);
399}
400
401static bool nfsd_fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2)
279{ 402{
280 /* XXX: why are we hashing on inode pointer, anyway? */ 403 return fh1->fh_size == fh2->fh_size &&
281 return hash_ptr(ino, FILE_HASH_BITS); 404 !memcmp(fh1->fh_base.fh_pad,
405 fh2->fh_base.fh_pad,
406 fh1->fh_size);
282} 407}
283 408
284static struct hlist_head file_hashtbl[FILE_HASH_SIZE]; 409static struct hlist_head file_hashtbl[FILE_HASH_SIZE];
285 410
286static void __nfs4_file_get_access(struct nfs4_file *fp, int oflag) 411static void
412__nfs4_file_get_access(struct nfs4_file *fp, u32 access)
287{ 413{
288 WARN_ON_ONCE(!(fp->fi_fds[oflag] || fp->fi_fds[O_RDWR])); 414 lockdep_assert_held(&fp->fi_lock);
289 atomic_inc(&fp->fi_access[oflag]); 415
416 if (access & NFS4_SHARE_ACCESS_WRITE)
417 atomic_inc(&fp->fi_access[O_WRONLY]);
418 if (access & NFS4_SHARE_ACCESS_READ)
419 atomic_inc(&fp->fi_access[O_RDONLY]);
290} 420}
291 421
292static void nfs4_file_get_access(struct nfs4_file *fp, int oflag) 422static __be32
423nfs4_file_get_access(struct nfs4_file *fp, u32 access)
293{ 424{
294 if (oflag == O_RDWR) { 425 lockdep_assert_held(&fp->fi_lock);
295 __nfs4_file_get_access(fp, O_RDONLY); 426
296 __nfs4_file_get_access(fp, O_WRONLY); 427 /* Does this access mode make sense? */
297 } else 428 if (access & ~NFS4_SHARE_ACCESS_BOTH)
298 __nfs4_file_get_access(fp, oflag); 429 return nfserr_inval;
430
431 /* Does it conflict with a deny mode already set? */
432 if ((access & fp->fi_share_deny) != 0)
433 return nfserr_share_denied;
434
435 __nfs4_file_get_access(fp, access);
436 return nfs_ok;
299} 437}
300 438
301static void nfs4_file_put_fd(struct nfs4_file *fp, int oflag) 439static __be32 nfs4_file_check_deny(struct nfs4_file *fp, u32 deny)
302{ 440{
303 if (fp->fi_fds[oflag]) { 441 /* Common case is that there is no deny mode. */
304 fput(fp->fi_fds[oflag]); 442 if (deny) {
305 fp->fi_fds[oflag] = NULL; 443 /* Does this deny mode make sense? */
444 if (deny & ~NFS4_SHARE_DENY_BOTH)
445 return nfserr_inval;
446
447 if ((deny & NFS4_SHARE_DENY_READ) &&
448 atomic_read(&fp->fi_access[O_RDONLY]))
449 return nfserr_share_denied;
450
451 if ((deny & NFS4_SHARE_DENY_WRITE) &&
452 atomic_read(&fp->fi_access[O_WRONLY]))
453 return nfserr_share_denied;
306 } 454 }
455 return nfs_ok;
307} 456}
308 457
309static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag) 458static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag)
310{ 459{
311 if (atomic_dec_and_test(&fp->fi_access[oflag])) { 460 might_lock(&fp->fi_lock);
312 nfs4_file_put_fd(fp, oflag); 461
462 if (atomic_dec_and_lock(&fp->fi_access[oflag], &fp->fi_lock)) {
463 struct file *f1 = NULL;
464 struct file *f2 = NULL;
465
466 swap(f1, fp->fi_fds[oflag]);
313 if (atomic_read(&fp->fi_access[1 - oflag]) == 0) 467 if (atomic_read(&fp->fi_access[1 - oflag]) == 0)
314 nfs4_file_put_fd(fp, O_RDWR); 468 swap(f2, fp->fi_fds[O_RDWR]);
469 spin_unlock(&fp->fi_lock);
470 if (f1)
471 fput(f1);
472 if (f2)
473 fput(f2);
315 } 474 }
316} 475}
317 476
318static void nfs4_file_put_access(struct nfs4_file *fp, int oflag) 477static void nfs4_file_put_access(struct nfs4_file *fp, u32 access)
319{ 478{
320 if (oflag == O_RDWR) { 479 WARN_ON_ONCE(access & ~NFS4_SHARE_ACCESS_BOTH);
321 __nfs4_file_put_access(fp, O_RDONLY); 480
481 if (access & NFS4_SHARE_ACCESS_WRITE)
322 __nfs4_file_put_access(fp, O_WRONLY); 482 __nfs4_file_put_access(fp, O_WRONLY);
323 } else 483 if (access & NFS4_SHARE_ACCESS_READ)
324 __nfs4_file_put_access(fp, oflag); 484 __nfs4_file_put_access(fp, O_RDONLY);
325} 485}
326 486
327static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct 487static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl,
328kmem_cache *slab) 488 struct kmem_cache *slab)
329{ 489{
330 struct idr *stateids = &cl->cl_stateids;
331 struct nfs4_stid *stid; 490 struct nfs4_stid *stid;
332 int new_id; 491 int new_id;
333 492
334 stid = kmem_cache_alloc(slab, GFP_KERNEL); 493 stid = kmem_cache_zalloc(slab, GFP_KERNEL);
335 if (!stid) 494 if (!stid)
336 return NULL; 495 return NULL;
337 496
338 new_id = idr_alloc_cyclic(stateids, stid, 0, 0, GFP_KERNEL); 497 idr_preload(GFP_KERNEL);
498 spin_lock(&cl->cl_lock);
499 new_id = idr_alloc_cyclic(&cl->cl_stateids, stid, 0, 0, GFP_NOWAIT);
500 spin_unlock(&cl->cl_lock);
501 idr_preload_end();
339 if (new_id < 0) 502 if (new_id < 0)
340 goto out_free; 503 goto out_free;
341 stid->sc_client = cl; 504 stid->sc_client = cl;
342 stid->sc_type = 0;
343 stid->sc_stateid.si_opaque.so_id = new_id; 505 stid->sc_stateid.si_opaque.so_id = new_id;
344 stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid; 506 stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid;
345 /* Will be incremented before return to client: */ 507 /* Will be incremented before return to client: */
346 stid->sc_stateid.si_generation = 0; 508 atomic_set(&stid->sc_count, 1);
347 509
348 /* 510 /*
349 * It shouldn't be a problem to reuse an opaque stateid value. 511 * It shouldn't be a problem to reuse an opaque stateid value.
@@ -360,9 +522,24 @@ out_free:
360 return NULL; 522 return NULL;
361} 523}
362 524
363static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp) 525static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp)
364{ 526{
365 return openlockstateid(nfs4_alloc_stid(clp, stateid_slab)); 527 struct nfs4_stid *stid;
528 struct nfs4_ol_stateid *stp;
529
530 stid = nfs4_alloc_stid(clp, stateid_slab);
531 if (!stid)
532 return NULL;
533
534 stp = openlockstateid(stid);
535 stp->st_stid.sc_free = nfs4_free_ol_stateid;
536 return stp;
537}
538
539static void nfs4_free_deleg(struct nfs4_stid *stid)
540{
541 kmem_cache_free(deleg_slab, stid);
542 atomic_long_dec(&num_delegations);
366} 543}
367 544
368/* 545/*
@@ -379,10 +556,11 @@ static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp)
379 * Each filter is 256 bits. We hash the filehandle to 32bit and use the 556 * Each filter is 256 bits. We hash the filehandle to 32bit and use the
380 * low 3 bytes as hash-table indices. 557 * low 3 bytes as hash-table indices.
381 * 558 *
382 * 'state_lock', which is always held when block_delegations() is called, 559 * 'blocked_delegations_lock', which is always taken in block_delegations(),
383 * is used to manage concurrent access. Testing does not need the lock 560 * is used to manage concurrent access. Testing does not need the lock
384 * except when swapping the two filters. 561 * except when swapping the two filters.
385 */ 562 */
563static DEFINE_SPINLOCK(blocked_delegations_lock);
386static struct bloom_pair { 564static struct bloom_pair {
387 int entries, old_entries; 565 int entries, old_entries;
388 time_t swap_time; 566 time_t swap_time;
@@ -398,7 +576,7 @@ static int delegation_blocked(struct knfsd_fh *fh)
398 if (bd->entries == 0) 576 if (bd->entries == 0)
399 return 0; 577 return 0;
400 if (seconds_since_boot() - bd->swap_time > 30) { 578 if (seconds_since_boot() - bd->swap_time > 30) {
401 spin_lock(&state_lock); 579 spin_lock(&blocked_delegations_lock);
402 if (seconds_since_boot() - bd->swap_time > 30) { 580 if (seconds_since_boot() - bd->swap_time > 30) {
403 bd->entries -= bd->old_entries; 581 bd->entries -= bd->old_entries;
404 bd->old_entries = bd->entries; 582 bd->old_entries = bd->entries;
@@ -407,7 +585,7 @@ static int delegation_blocked(struct knfsd_fh *fh)
407 bd->new = 1-bd->new; 585 bd->new = 1-bd->new;
408 bd->swap_time = seconds_since_boot(); 586 bd->swap_time = seconds_since_boot();
409 } 587 }
410 spin_unlock(&state_lock); 588 spin_unlock(&blocked_delegations_lock);
411 } 589 }
412 hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0); 590 hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0);
413 if (test_bit(hash&255, bd->set[0]) && 591 if (test_bit(hash&255, bd->set[0]) &&
@@ -430,69 +608,73 @@ static void block_delegations(struct knfsd_fh *fh)
430 608
431 hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0); 609 hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0);
432 610
611 spin_lock(&blocked_delegations_lock);
433 __set_bit(hash&255, bd->set[bd->new]); 612 __set_bit(hash&255, bd->set[bd->new]);
434 __set_bit((hash>>8)&255, bd->set[bd->new]); 613 __set_bit((hash>>8)&255, bd->set[bd->new]);
435 __set_bit((hash>>16)&255, bd->set[bd->new]); 614 __set_bit((hash>>16)&255, bd->set[bd->new]);
436 if (bd->entries == 0) 615 if (bd->entries == 0)
437 bd->swap_time = seconds_since_boot(); 616 bd->swap_time = seconds_since_boot();
438 bd->entries += 1; 617 bd->entries += 1;
618 spin_unlock(&blocked_delegations_lock);
439} 619}
440 620
441static struct nfs4_delegation * 621static struct nfs4_delegation *
442alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh) 622alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh)
443{ 623{
444 struct nfs4_delegation *dp; 624 struct nfs4_delegation *dp;
625 long n;
445 626
446 dprintk("NFSD alloc_init_deleg\n"); 627 dprintk("NFSD alloc_init_deleg\n");
447 if (num_delegations > max_delegations) 628 n = atomic_long_inc_return(&num_delegations);
448 return NULL; 629 if (n < 0 || n > max_delegations)
630 goto out_dec;
449 if (delegation_blocked(&current_fh->fh_handle)) 631 if (delegation_blocked(&current_fh->fh_handle))
450 return NULL; 632 goto out_dec;
451 dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab)); 633 dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab));
452 if (dp == NULL) 634 if (dp == NULL)
453 return dp; 635 goto out_dec;
636
637 dp->dl_stid.sc_free = nfs4_free_deleg;
454 /* 638 /*
455 * delegation seqid's are never incremented. The 4.1 special 639 * delegation seqid's are never incremented. The 4.1 special
456 * meaning of seqid 0 isn't meaningful, really, but let's avoid 640 * meaning of seqid 0 isn't meaningful, really, but let's avoid
457 * 0 anyway just for consistency and use 1: 641 * 0 anyway just for consistency and use 1:
458 */ 642 */
459 dp->dl_stid.sc_stateid.si_generation = 1; 643 dp->dl_stid.sc_stateid.si_generation = 1;
460 num_delegations++;
461 INIT_LIST_HEAD(&dp->dl_perfile); 644 INIT_LIST_HEAD(&dp->dl_perfile);
462 INIT_LIST_HEAD(&dp->dl_perclnt); 645 INIT_LIST_HEAD(&dp->dl_perclnt);
463 INIT_LIST_HEAD(&dp->dl_recall_lru); 646 INIT_LIST_HEAD(&dp->dl_recall_lru);
464 dp->dl_file = NULL;
465 dp->dl_type = NFS4_OPEN_DELEGATE_READ; 647 dp->dl_type = NFS4_OPEN_DELEGATE_READ;
466 fh_copy_shallow(&dp->dl_fh, &current_fh->fh_handle); 648 INIT_WORK(&dp->dl_recall.cb_work, nfsd4_run_cb_recall);
467 dp->dl_time = 0;
468 atomic_set(&dp->dl_count, 1);
469 nfsd4_init_callback(&dp->dl_recall);
470 return dp; 649 return dp;
650out_dec:
651 atomic_long_dec(&num_delegations);
652 return NULL;
471} 653}
472 654
473static void remove_stid(struct nfs4_stid *s) 655void
656nfs4_put_stid(struct nfs4_stid *s)
474{ 657{
475 struct idr *stateids = &s->sc_client->cl_stateids; 658 struct nfs4_file *fp = s->sc_file;
659 struct nfs4_client *clp = s->sc_client;
476 660
477 idr_remove(stateids, s->sc_stateid.si_opaque.so_id); 661 might_lock(&clp->cl_lock);
478}
479 662
480static void nfs4_free_stid(struct kmem_cache *slab, struct nfs4_stid *s) 663 if (!atomic_dec_and_lock(&s->sc_count, &clp->cl_lock)) {
481{ 664 wake_up_all(&close_wq);
482 kmem_cache_free(slab, s); 665 return;
483}
484
485void
486nfs4_put_delegation(struct nfs4_delegation *dp)
487{
488 if (atomic_dec_and_test(&dp->dl_count)) {
489 nfs4_free_stid(deleg_slab, &dp->dl_stid);
490 num_delegations--;
491 } 666 }
667 idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
668 spin_unlock(&clp->cl_lock);
669 s->sc_free(s);
670 if (fp)
671 put_nfs4_file(fp);
492} 672}
493 673
494static void nfs4_put_deleg_lease(struct nfs4_file *fp) 674static void nfs4_put_deleg_lease(struct nfs4_file *fp)
495{ 675{
676 lockdep_assert_held(&state_lock);
677
496 if (!fp->fi_lease) 678 if (!fp->fi_lease)
497 return; 679 return;
498 if (atomic_dec_and_test(&fp->fi_delegees)) { 680 if (atomic_dec_and_test(&fp->fi_delegees)) {
@@ -512,54 +694,54 @@ static void
512hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) 694hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
513{ 695{
514 lockdep_assert_held(&state_lock); 696 lockdep_assert_held(&state_lock);
697 lockdep_assert_held(&fp->fi_lock);
515 698
699 atomic_inc(&dp->dl_stid.sc_count);
516 dp->dl_stid.sc_type = NFS4_DELEG_STID; 700 dp->dl_stid.sc_type = NFS4_DELEG_STID;
517 list_add(&dp->dl_perfile, &fp->fi_delegations); 701 list_add(&dp->dl_perfile, &fp->fi_delegations);
518 list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); 702 list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations);
519} 703}
520 704
521/* Called under the state lock. */
522static void 705static void
523unhash_delegation(struct nfs4_delegation *dp) 706unhash_delegation_locked(struct nfs4_delegation *dp)
524{ 707{
525 spin_lock(&state_lock); 708 struct nfs4_file *fp = dp->dl_stid.sc_file;
526 list_del_init(&dp->dl_perclnt);
527 list_del_init(&dp->dl_perfile);
528 list_del_init(&dp->dl_recall_lru);
529 spin_unlock(&state_lock);
530 if (dp->dl_file) {
531 nfs4_put_deleg_lease(dp->dl_file);
532 put_nfs4_file(dp->dl_file);
533 dp->dl_file = NULL;
534 }
535}
536
537 709
710 lockdep_assert_held(&state_lock);
538 711
539static void destroy_revoked_delegation(struct nfs4_delegation *dp) 712 dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID;
540{ 713 /* Ensure that deleg break won't try to requeue it */
714 ++dp->dl_time;
715 spin_lock(&fp->fi_lock);
716 list_del_init(&dp->dl_perclnt);
541 list_del_init(&dp->dl_recall_lru); 717 list_del_init(&dp->dl_recall_lru);
542 remove_stid(&dp->dl_stid); 718 list_del_init(&dp->dl_perfile);
543 nfs4_put_delegation(dp); 719 spin_unlock(&fp->fi_lock);
720 if (fp)
721 nfs4_put_deleg_lease(fp);
544} 722}
545 723
546static void destroy_delegation(struct nfs4_delegation *dp) 724static void destroy_delegation(struct nfs4_delegation *dp)
547{ 725{
548 unhash_delegation(dp); 726 spin_lock(&state_lock);
549 remove_stid(&dp->dl_stid); 727 unhash_delegation_locked(dp);
550 nfs4_put_delegation(dp); 728 spin_unlock(&state_lock);
729 nfs4_put_stid(&dp->dl_stid);
551} 730}
552 731
553static void revoke_delegation(struct nfs4_delegation *dp) 732static void revoke_delegation(struct nfs4_delegation *dp)
554{ 733{
555 struct nfs4_client *clp = dp->dl_stid.sc_client; 734 struct nfs4_client *clp = dp->dl_stid.sc_client;
556 735
736 WARN_ON(!list_empty(&dp->dl_recall_lru));
737
557 if (clp->cl_minorversion == 0) 738 if (clp->cl_minorversion == 0)
558 destroy_delegation(dp); 739 nfs4_put_stid(&dp->dl_stid);
559 else { 740 else {
560 unhash_delegation(dp);
561 dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID; 741 dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID;
742 spin_lock(&clp->cl_lock);
562 list_add(&dp->dl_recall_lru, &clp->cl_revoked); 743 list_add(&dp->dl_recall_lru, &clp->cl_revoked);
744 spin_unlock(&clp->cl_lock);
563 } 745 }
564} 746}
565 747
@@ -607,57 +789,62 @@ bmap_to_share_mode(unsigned long bmap) {
607 return access; 789 return access;
608} 790}
609 791
610static bool
611test_share(struct nfs4_ol_stateid *stp, struct nfsd4_open *open) {
612 unsigned int access, deny;
613
614 access = bmap_to_share_mode(stp->st_access_bmap);
615 deny = bmap_to_share_mode(stp->st_deny_bmap);
616 if ((access & open->op_share_deny) || (deny & open->op_share_access))
617 return false;
618 return true;
619}
620
621/* set share access for a given stateid */ 792/* set share access for a given stateid */
622static inline void 793static inline void
623set_access(u32 access, struct nfs4_ol_stateid *stp) 794set_access(u32 access, struct nfs4_ol_stateid *stp)
624{ 795{
625 __set_bit(access, &stp->st_access_bmap); 796 unsigned char mask = 1 << access;
797
798 WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH);
799 stp->st_access_bmap |= mask;
626} 800}
627 801
628/* clear share access for a given stateid */ 802/* clear share access for a given stateid */
629static inline void 803static inline void
630clear_access(u32 access, struct nfs4_ol_stateid *stp) 804clear_access(u32 access, struct nfs4_ol_stateid *stp)
631{ 805{
632 __clear_bit(access, &stp->st_access_bmap); 806 unsigned char mask = 1 << access;
807
808 WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH);
809 stp->st_access_bmap &= ~mask;
633} 810}
634 811
635/* test whether a given stateid has access */ 812/* test whether a given stateid has access */
636static inline bool 813static inline bool
637test_access(u32 access, struct nfs4_ol_stateid *stp) 814test_access(u32 access, struct nfs4_ol_stateid *stp)
638{ 815{
639 return test_bit(access, &stp->st_access_bmap); 816 unsigned char mask = 1 << access;
817
818 return (bool)(stp->st_access_bmap & mask);
640} 819}
641 820
642/* set share deny for a given stateid */ 821/* set share deny for a given stateid */
643static inline void 822static inline void
644set_deny(u32 access, struct nfs4_ol_stateid *stp) 823set_deny(u32 deny, struct nfs4_ol_stateid *stp)
645{ 824{
646 __set_bit(access, &stp->st_deny_bmap); 825 unsigned char mask = 1 << deny;
826
827 WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH);
828 stp->st_deny_bmap |= mask;
647} 829}
648 830
649/* clear share deny for a given stateid */ 831/* clear share deny for a given stateid */
650static inline void 832static inline void
651clear_deny(u32 access, struct nfs4_ol_stateid *stp) 833clear_deny(u32 deny, struct nfs4_ol_stateid *stp)
652{ 834{
653 __clear_bit(access, &stp->st_deny_bmap); 835 unsigned char mask = 1 << deny;
836
837 WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH);
838 stp->st_deny_bmap &= ~mask;
654} 839}
655 840
656/* test whether a given stateid is denying specific access */ 841/* test whether a given stateid is denying specific access */
657static inline bool 842static inline bool
658test_deny(u32 access, struct nfs4_ol_stateid *stp) 843test_deny(u32 deny, struct nfs4_ol_stateid *stp)
659{ 844{
660 return test_bit(access, &stp->st_deny_bmap); 845 unsigned char mask = 1 << deny;
846
847 return (bool)(stp->st_deny_bmap & mask);
661} 848}
662 849
663static int nfs4_access_to_omode(u32 access) 850static int nfs4_access_to_omode(u32 access)
@@ -674,138 +861,283 @@ static int nfs4_access_to_omode(u32 access)
674 return O_RDONLY; 861 return O_RDONLY;
675} 862}
676 863
864/*
865 * A stateid that had a deny mode associated with it is being released
866 * or downgraded. Recalculate the deny mode on the file.
867 */
868static void
869recalculate_deny_mode(struct nfs4_file *fp)
870{
871 struct nfs4_ol_stateid *stp;
872
873 spin_lock(&fp->fi_lock);
874 fp->fi_share_deny = 0;
875 list_for_each_entry(stp, &fp->fi_stateids, st_perfile)
876 fp->fi_share_deny |= bmap_to_share_mode(stp->st_deny_bmap);
877 spin_unlock(&fp->fi_lock);
878}
879
880static void
881reset_union_bmap_deny(u32 deny, struct nfs4_ol_stateid *stp)
882{
883 int i;
884 bool change = false;
885
886 for (i = 1; i < 4; i++) {
887 if ((i & deny) != i) {
888 change = true;
889 clear_deny(i, stp);
890 }
891 }
892
893 /* Recalculate per-file deny mode if there was a change */
894 if (change)
895 recalculate_deny_mode(stp->st_stid.sc_file);
896}
897
677/* release all access and file references for a given stateid */ 898/* release all access and file references for a given stateid */
678static void 899static void
679release_all_access(struct nfs4_ol_stateid *stp) 900release_all_access(struct nfs4_ol_stateid *stp)
680{ 901{
681 int i; 902 int i;
903 struct nfs4_file *fp = stp->st_stid.sc_file;
904
905 if (fp && stp->st_deny_bmap != 0)
906 recalculate_deny_mode(fp);
682 907
683 for (i = 1; i < 4; i++) { 908 for (i = 1; i < 4; i++) {
684 if (test_access(i, stp)) 909 if (test_access(i, stp))
685 nfs4_file_put_access(stp->st_file, 910 nfs4_file_put_access(stp->st_stid.sc_file, i);
686 nfs4_access_to_omode(i));
687 clear_access(i, stp); 911 clear_access(i, stp);
688 } 912 }
689} 913}
690 914
691static void unhash_generic_stateid(struct nfs4_ol_stateid *stp) 915static void nfs4_put_stateowner(struct nfs4_stateowner *sop)
692{ 916{
917 struct nfs4_client *clp = sop->so_client;
918
919 might_lock(&clp->cl_lock);
920
921 if (!atomic_dec_and_lock(&sop->so_count, &clp->cl_lock))
922 return;
923 sop->so_ops->so_unhash(sop);
924 spin_unlock(&clp->cl_lock);
925 kfree(sop->so_owner.data);
926 sop->so_ops->so_free(sop);
927}
928
929static void unhash_ol_stateid(struct nfs4_ol_stateid *stp)
930{
931 struct nfs4_file *fp = stp->st_stid.sc_file;
932
933 lockdep_assert_held(&stp->st_stateowner->so_client->cl_lock);
934
935 spin_lock(&fp->fi_lock);
693 list_del(&stp->st_perfile); 936 list_del(&stp->st_perfile);
937 spin_unlock(&fp->fi_lock);
694 list_del(&stp->st_perstateowner); 938 list_del(&stp->st_perstateowner);
695} 939}
696 940
697static void close_generic_stateid(struct nfs4_ol_stateid *stp) 941static void nfs4_free_ol_stateid(struct nfs4_stid *stid)
698{ 942{
943 struct nfs4_ol_stateid *stp = openlockstateid(stid);
944
699 release_all_access(stp); 945 release_all_access(stp);
700 put_nfs4_file(stp->st_file); 946 if (stp->st_stateowner)
701 stp->st_file = NULL; 947 nfs4_put_stateowner(stp->st_stateowner);
948 kmem_cache_free(stateid_slab, stid);
702} 949}
703 950
704static void free_generic_stateid(struct nfs4_ol_stateid *stp) 951static void nfs4_free_lock_stateid(struct nfs4_stid *stid)
705{ 952{
706 remove_stid(&stp->st_stid); 953 struct nfs4_ol_stateid *stp = openlockstateid(stid);
707 nfs4_free_stid(stateid_slab, &stp->st_stid); 954 struct nfs4_lockowner *lo = lockowner(stp->st_stateowner);
955 struct file *file;
956
957 file = find_any_file(stp->st_stid.sc_file);
958 if (file)
959 filp_close(file, (fl_owner_t)lo);
960 nfs4_free_ol_stateid(stid);
708} 961}
709 962
710static void release_lock_stateid(struct nfs4_ol_stateid *stp) 963/*
964 * Put the persistent reference to an already unhashed generic stateid, while
965 * holding the cl_lock. If it's the last reference, then put it onto the
966 * reaplist for later destruction.
967 */
968static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp,
969 struct list_head *reaplist)
711{ 970{
712 struct file *file; 971 struct nfs4_stid *s = &stp->st_stid;
972 struct nfs4_client *clp = s->sc_client;
973
974 lockdep_assert_held(&clp->cl_lock);
713 975
714 unhash_generic_stateid(stp); 976 WARN_ON_ONCE(!list_empty(&stp->st_locks));
977
978 if (!atomic_dec_and_test(&s->sc_count)) {
979 wake_up_all(&close_wq);
980 return;
981 }
982
983 idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
984 list_add(&stp->st_locks, reaplist);
985}
986
987static void unhash_lock_stateid(struct nfs4_ol_stateid *stp)
988{
989 struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner);
990
991 lockdep_assert_held(&oo->oo_owner.so_client->cl_lock);
992
993 list_del_init(&stp->st_locks);
994 unhash_ol_stateid(stp);
715 unhash_stid(&stp->st_stid); 995 unhash_stid(&stp->st_stid);
716 file = find_any_file(stp->st_file);
717 if (file)
718 locks_remove_posix(file, (fl_owner_t)lockowner(stp->st_stateowner));
719 close_generic_stateid(stp);
720 free_generic_stateid(stp);
721} 996}
722 997
723static void unhash_lockowner(struct nfs4_lockowner *lo) 998static void release_lock_stateid(struct nfs4_ol_stateid *stp)
724{ 999{
725 struct nfs4_ol_stateid *stp; 1000 struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner);
726 1001
727 list_del(&lo->lo_owner.so_strhash); 1002 spin_lock(&oo->oo_owner.so_client->cl_lock);
728 list_del(&lo->lo_perstateid); 1003 unhash_lock_stateid(stp);
729 list_del(&lo->lo_owner_ino_hash); 1004 spin_unlock(&oo->oo_owner.so_client->cl_lock);
730 while (!list_empty(&lo->lo_owner.so_stateids)) { 1005 nfs4_put_stid(&stp->st_stid);
731 stp = list_first_entry(&lo->lo_owner.so_stateids,
732 struct nfs4_ol_stateid, st_perstateowner);
733 release_lock_stateid(stp);
734 }
735} 1006}
736 1007
737static void nfs4_free_lockowner(struct nfs4_lockowner *lo) 1008static void unhash_lockowner_locked(struct nfs4_lockowner *lo)
738{ 1009{
739 kfree(lo->lo_owner.so_owner.data); 1010 struct nfs4_client *clp = lo->lo_owner.so_client;
740 kmem_cache_free(lockowner_slab, lo); 1011
1012 lockdep_assert_held(&clp->cl_lock);
1013
1014 list_del_init(&lo->lo_owner.so_strhash);
1015}
1016
1017/*
1018 * Free a list of generic stateids that were collected earlier after being
1019 * fully unhashed.
1020 */
1021static void
1022free_ol_stateid_reaplist(struct list_head *reaplist)
1023{
1024 struct nfs4_ol_stateid *stp;
1025 struct nfs4_file *fp;
1026
1027 might_sleep();
1028
1029 while (!list_empty(reaplist)) {
1030 stp = list_first_entry(reaplist, struct nfs4_ol_stateid,
1031 st_locks);
1032 list_del(&stp->st_locks);
1033 fp = stp->st_stid.sc_file;
1034 stp->st_stid.sc_free(&stp->st_stid);
1035 if (fp)
1036 put_nfs4_file(fp);
1037 }
741} 1038}
742 1039
743static void release_lockowner(struct nfs4_lockowner *lo) 1040static void release_lockowner(struct nfs4_lockowner *lo)
744{ 1041{
745 unhash_lockowner(lo); 1042 struct nfs4_client *clp = lo->lo_owner.so_client;
746 nfs4_free_lockowner(lo); 1043 struct nfs4_ol_stateid *stp;
1044 struct list_head reaplist;
1045
1046 INIT_LIST_HEAD(&reaplist);
1047
1048 spin_lock(&clp->cl_lock);
1049 unhash_lockowner_locked(lo);
1050 while (!list_empty(&lo->lo_owner.so_stateids)) {
1051 stp = list_first_entry(&lo->lo_owner.so_stateids,
1052 struct nfs4_ol_stateid, st_perstateowner);
1053 unhash_lock_stateid(stp);
1054 put_ol_stateid_locked(stp, &reaplist);
1055 }
1056 spin_unlock(&clp->cl_lock);
1057 free_ol_stateid_reaplist(&reaplist);
1058 nfs4_put_stateowner(&lo->lo_owner);
747} 1059}
748 1060
749static void 1061static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp,
750release_stateid_lockowners(struct nfs4_ol_stateid *open_stp) 1062 struct list_head *reaplist)
751{ 1063{
752 struct nfs4_lockowner *lo; 1064 struct nfs4_ol_stateid *stp;
753 1065
754 while (!list_empty(&open_stp->st_lockowners)) { 1066 while (!list_empty(&open_stp->st_locks)) {
755 lo = list_entry(open_stp->st_lockowners.next, 1067 stp = list_entry(open_stp->st_locks.next,
756 struct nfs4_lockowner, lo_perstateid); 1068 struct nfs4_ol_stateid, st_locks);
757 release_lockowner(lo); 1069 unhash_lock_stateid(stp);
1070 put_ol_stateid_locked(stp, reaplist);
758 } 1071 }
759} 1072}
760 1073
761static void unhash_open_stateid(struct nfs4_ol_stateid *stp) 1074static void unhash_open_stateid(struct nfs4_ol_stateid *stp,
1075 struct list_head *reaplist)
762{ 1076{
763 unhash_generic_stateid(stp); 1077 lockdep_assert_held(&stp->st_stid.sc_client->cl_lock);
764 release_stateid_lockowners(stp); 1078
765 close_generic_stateid(stp); 1079 unhash_ol_stateid(stp);
1080 release_open_stateid_locks(stp, reaplist);
766} 1081}
767 1082
768static void release_open_stateid(struct nfs4_ol_stateid *stp) 1083static void release_open_stateid(struct nfs4_ol_stateid *stp)
769{ 1084{
770 unhash_open_stateid(stp); 1085 LIST_HEAD(reaplist);
771 free_generic_stateid(stp); 1086
1087 spin_lock(&stp->st_stid.sc_client->cl_lock);
1088 unhash_open_stateid(stp, &reaplist);
1089 put_ol_stateid_locked(stp, &reaplist);
1090 spin_unlock(&stp->st_stid.sc_client->cl_lock);
1091 free_ol_stateid_reaplist(&reaplist);
772} 1092}
773 1093
774static void unhash_openowner(struct nfs4_openowner *oo) 1094static void unhash_openowner_locked(struct nfs4_openowner *oo)
775{ 1095{
776 struct nfs4_ol_stateid *stp; 1096 struct nfs4_client *clp = oo->oo_owner.so_client;
777 1097
778 list_del(&oo->oo_owner.so_strhash); 1098 lockdep_assert_held(&clp->cl_lock);
779 list_del(&oo->oo_perclient); 1099
780 while (!list_empty(&oo->oo_owner.so_stateids)) { 1100 list_del_init(&oo->oo_owner.so_strhash);
781 stp = list_first_entry(&oo->oo_owner.so_stateids, 1101 list_del_init(&oo->oo_perclient);
782 struct nfs4_ol_stateid, st_perstateowner);
783 release_open_stateid(stp);
784 }
785} 1102}
786 1103
787static void release_last_closed_stateid(struct nfs4_openowner *oo) 1104static void release_last_closed_stateid(struct nfs4_openowner *oo)
788{ 1105{
789 struct nfs4_ol_stateid *s = oo->oo_last_closed_stid; 1106 struct nfsd_net *nn = net_generic(oo->oo_owner.so_client->net,
1107 nfsd_net_id);
1108 struct nfs4_ol_stateid *s;
790 1109
1110 spin_lock(&nn->client_lock);
1111 s = oo->oo_last_closed_stid;
791 if (s) { 1112 if (s) {
792 free_generic_stateid(s); 1113 list_del_init(&oo->oo_close_lru);
793 oo->oo_last_closed_stid = NULL; 1114 oo->oo_last_closed_stid = NULL;
794 } 1115 }
795} 1116 spin_unlock(&nn->client_lock);
796 1117 if (s)
797static void nfs4_free_openowner(struct nfs4_openowner *oo) 1118 nfs4_put_stid(&s->st_stid);
798{
799 kfree(oo->oo_owner.so_owner.data);
800 kmem_cache_free(openowner_slab, oo);
801} 1119}
802 1120
803static void release_openowner(struct nfs4_openowner *oo) 1121static void release_openowner(struct nfs4_openowner *oo)
804{ 1122{
805 unhash_openowner(oo); 1123 struct nfs4_ol_stateid *stp;
806 list_del(&oo->oo_close_lru); 1124 struct nfs4_client *clp = oo->oo_owner.so_client;
1125 struct list_head reaplist;
1126
1127 INIT_LIST_HEAD(&reaplist);
1128
1129 spin_lock(&clp->cl_lock);
1130 unhash_openowner_locked(oo);
1131 while (!list_empty(&oo->oo_owner.so_stateids)) {
1132 stp = list_first_entry(&oo->oo_owner.so_stateids,
1133 struct nfs4_ol_stateid, st_perstateowner);
1134 unhash_open_stateid(stp, &reaplist);
1135 put_ol_stateid_locked(stp, &reaplist);
1136 }
1137 spin_unlock(&clp->cl_lock);
1138 free_ol_stateid_reaplist(&reaplist);
807 release_last_closed_stateid(oo); 1139 release_last_closed_stateid(oo);
808 nfs4_free_openowner(oo); 1140 nfs4_put_stateowner(&oo->oo_owner);
809} 1141}
810 1142
811static inline int 1143static inline int
@@ -842,7 +1174,7 @@ void nfsd4_bump_seqid(struct nfsd4_compound_state *cstate, __be32 nfserr)
842 return; 1174 return;
843 1175
844 if (!seqid_mutating_err(ntohl(nfserr))) { 1176 if (!seqid_mutating_err(ntohl(nfserr))) {
845 cstate->replay_owner = NULL; 1177 nfsd4_cstate_clear_replay(cstate);
846 return; 1178 return;
847 } 1179 }
848 if (!so) 1180 if (!so)
@@ -1030,10 +1362,8 @@ static void nfsd4_init_conn(struct svc_rqst *rqstp, struct nfsd4_conn *conn, str
1030 if (ret) 1362 if (ret)
1031 /* oops; xprt is already down: */ 1363 /* oops; xprt is already down: */
1032 nfsd4_conn_lost(&conn->cn_xpt_user); 1364 nfsd4_conn_lost(&conn->cn_xpt_user);
1033 if (conn->cn_flags & NFS4_CDFC4_BACK) { 1365 /* We may have gained or lost a callback channel: */
1034 /* callback channel may be back up */ 1366 nfsd4_probe_callback_sync(ses->se_client);
1035 nfsd4_probe_callback(ses->se_client);
1036 }
1037} 1367}
1038 1368
1039static struct nfsd4_conn *alloc_conn_from_crses(struct svc_rqst *rqstp, struct nfsd4_create_session *cses) 1369static struct nfsd4_conn *alloc_conn_from_crses(struct svc_rqst *rqstp, struct nfsd4_create_session *cses)
@@ -1073,9 +1403,6 @@ static void __free_session(struct nfsd4_session *ses)
1073 1403
1074static void free_session(struct nfsd4_session *ses) 1404static void free_session(struct nfsd4_session *ses)
1075{ 1405{
1076 struct nfsd_net *nn = net_generic(ses->se_client->net, nfsd_net_id);
1077
1078 lockdep_assert_held(&nn->client_lock);
1079 nfsd4_del_conns(ses); 1406 nfsd4_del_conns(ses);
1080 nfsd4_put_drc_mem(&ses->se_fchannel); 1407 nfsd4_put_drc_mem(&ses->se_fchannel);
1081 __free_session(ses); 1408 __free_session(ses);
@@ -1097,12 +1424,10 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru
1097 new->se_cb_sec = cses->cb_sec; 1424 new->se_cb_sec = cses->cb_sec;
1098 atomic_set(&new->se_ref, 0); 1425 atomic_set(&new->se_ref, 0);
1099 idx = hash_sessionid(&new->se_sessionid); 1426 idx = hash_sessionid(&new->se_sessionid);
1100 spin_lock(&nn->client_lock);
1101 list_add(&new->se_hash, &nn->sessionid_hashtbl[idx]); 1427 list_add(&new->se_hash, &nn->sessionid_hashtbl[idx]);
1102 spin_lock(&clp->cl_lock); 1428 spin_lock(&clp->cl_lock);
1103 list_add(&new->se_perclnt, &clp->cl_sessions); 1429 list_add(&new->se_perclnt, &clp->cl_sessions);
1104 spin_unlock(&clp->cl_lock); 1430 spin_unlock(&clp->cl_lock);
1105 spin_unlock(&nn->client_lock);
1106 1431
1107 if (cses->flags & SESSION4_BACK_CHAN) { 1432 if (cses->flags & SESSION4_BACK_CHAN) {
1108 struct sockaddr *sa = svc_addr(rqstp); 1433 struct sockaddr *sa = svc_addr(rqstp);
@@ -1120,12 +1445,14 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru
1120 1445
1121/* caller must hold client_lock */ 1446/* caller must hold client_lock */
1122static struct nfsd4_session * 1447static struct nfsd4_session *
1123find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net) 1448__find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net)
1124{ 1449{
1125 struct nfsd4_session *elem; 1450 struct nfsd4_session *elem;
1126 int idx; 1451 int idx;
1127 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 1452 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
1128 1453
1454 lockdep_assert_held(&nn->client_lock);
1455
1129 dump_sessionid(__func__, sessionid); 1456 dump_sessionid(__func__, sessionid);
1130 idx = hash_sessionid(sessionid); 1457 idx = hash_sessionid(sessionid);
1131 /* Search in the appropriate list */ 1458 /* Search in the appropriate list */
@@ -1140,10 +1467,33 @@ find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net)
1140 return NULL; 1467 return NULL;
1141} 1468}
1142 1469
1470static struct nfsd4_session *
1471find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net,
1472 __be32 *ret)
1473{
1474 struct nfsd4_session *session;
1475 __be32 status = nfserr_badsession;
1476
1477 session = __find_in_sessionid_hashtbl(sessionid, net);
1478 if (!session)
1479 goto out;
1480 status = nfsd4_get_session_locked(session);
1481 if (status)
1482 session = NULL;
1483out:
1484 *ret = status;
1485 return session;
1486}
1487
1143/* caller must hold client_lock */ 1488/* caller must hold client_lock */
1144static void 1489static void
1145unhash_session(struct nfsd4_session *ses) 1490unhash_session(struct nfsd4_session *ses)
1146{ 1491{
1492 struct nfs4_client *clp = ses->se_client;
1493 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1494
1495 lockdep_assert_held(&nn->client_lock);
1496
1147 list_del(&ses->se_hash); 1497 list_del(&ses->se_hash);
1148 spin_lock(&ses->se_client->cl_lock); 1498 spin_lock(&ses->se_client->cl_lock);
1149 list_del(&ses->se_perclnt); 1499 list_del(&ses->se_perclnt);
@@ -1169,15 +1519,20 @@ STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn)
1169static struct nfs4_client *alloc_client(struct xdr_netobj name) 1519static struct nfs4_client *alloc_client(struct xdr_netobj name)
1170{ 1520{
1171 struct nfs4_client *clp; 1521 struct nfs4_client *clp;
1522 int i;
1172 1523
1173 clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL); 1524 clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL);
1174 if (clp == NULL) 1525 if (clp == NULL)
1175 return NULL; 1526 return NULL;
1176 clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL); 1527 clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL);
1177 if (clp->cl_name.data == NULL) { 1528 if (clp->cl_name.data == NULL)
1178 kfree(clp); 1529 goto err_no_name;
1179 return NULL; 1530 clp->cl_ownerstr_hashtbl = kmalloc(sizeof(struct list_head) *
1180 } 1531 OWNER_HASH_SIZE, GFP_KERNEL);
1532 if (!clp->cl_ownerstr_hashtbl)
1533 goto err_no_hashtbl;
1534 for (i = 0; i < OWNER_HASH_SIZE; i++)
1535 INIT_LIST_HEAD(&clp->cl_ownerstr_hashtbl[i]);
1181 clp->cl_name.len = name.len; 1536 clp->cl_name.len = name.len;
1182 INIT_LIST_HEAD(&clp->cl_sessions); 1537 INIT_LIST_HEAD(&clp->cl_sessions);
1183 idr_init(&clp->cl_stateids); 1538 idr_init(&clp->cl_stateids);
@@ -1192,14 +1547,16 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
1192 spin_lock_init(&clp->cl_lock); 1547 spin_lock_init(&clp->cl_lock);
1193 rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); 1548 rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
1194 return clp; 1549 return clp;
1550err_no_hashtbl:
1551 kfree(clp->cl_name.data);
1552err_no_name:
1553 kfree(clp);
1554 return NULL;
1195} 1555}
1196 1556
1197static void 1557static void
1198free_client(struct nfs4_client *clp) 1558free_client(struct nfs4_client *clp)
1199{ 1559{
1200 struct nfsd_net __maybe_unused *nn = net_generic(clp->net, nfsd_net_id);
1201
1202 lockdep_assert_held(&nn->client_lock);
1203 while (!list_empty(&clp->cl_sessions)) { 1560 while (!list_empty(&clp->cl_sessions)) {
1204 struct nfsd4_session *ses; 1561 struct nfsd4_session *ses;
1205 ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, 1562 ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
@@ -1210,18 +1567,32 @@ free_client(struct nfs4_client *clp)
1210 } 1567 }
1211 rpc_destroy_wait_queue(&clp->cl_cb_waitq); 1568 rpc_destroy_wait_queue(&clp->cl_cb_waitq);
1212 free_svc_cred(&clp->cl_cred); 1569 free_svc_cred(&clp->cl_cred);
1570 kfree(clp->cl_ownerstr_hashtbl);
1213 kfree(clp->cl_name.data); 1571 kfree(clp->cl_name.data);
1214 idr_destroy(&clp->cl_stateids); 1572 idr_destroy(&clp->cl_stateids);
1215 kfree(clp); 1573 kfree(clp);
1216} 1574}
1217 1575
1218/* must be called under the client_lock */ 1576/* must be called under the client_lock */
1219static inline void 1577static void
1220unhash_client_locked(struct nfs4_client *clp) 1578unhash_client_locked(struct nfs4_client *clp)
1221{ 1579{
1580 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1222 struct nfsd4_session *ses; 1581 struct nfsd4_session *ses;
1223 1582
1224 list_del(&clp->cl_lru); 1583 lockdep_assert_held(&nn->client_lock);
1584
1585 /* Mark the client as expired! */
1586 clp->cl_time = 0;
1587 /* Make it invisible */
1588 if (!list_empty(&clp->cl_idhash)) {
1589 list_del_init(&clp->cl_idhash);
1590 if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags))
1591 rb_erase(&clp->cl_namenode, &nn->conf_name_tree);
1592 else
1593 rb_erase(&clp->cl_namenode, &nn->unconf_name_tree);
1594 }
1595 list_del_init(&clp->cl_lru);
1225 spin_lock(&clp->cl_lock); 1596 spin_lock(&clp->cl_lock);
1226 list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) 1597 list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
1227 list_del_init(&ses->se_hash); 1598 list_del_init(&ses->se_hash);
@@ -1229,53 +1600,71 @@ unhash_client_locked(struct nfs4_client *clp)
1229} 1600}
1230 1601
1231static void 1602static void
1232destroy_client(struct nfs4_client *clp) 1603unhash_client(struct nfs4_client *clp)
1604{
1605 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1606
1607 spin_lock(&nn->client_lock);
1608 unhash_client_locked(clp);
1609 spin_unlock(&nn->client_lock);
1610}
1611
1612static __be32 mark_client_expired_locked(struct nfs4_client *clp)
1613{
1614 if (atomic_read(&clp->cl_refcount))
1615 return nfserr_jukebox;
1616 unhash_client_locked(clp);
1617 return nfs_ok;
1618}
1619
1620static void
1621__destroy_client(struct nfs4_client *clp)
1233{ 1622{
1234 struct nfs4_openowner *oo; 1623 struct nfs4_openowner *oo;
1235 struct nfs4_delegation *dp; 1624 struct nfs4_delegation *dp;
1236 struct list_head reaplist; 1625 struct list_head reaplist;
1237 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1238 1626
1239 INIT_LIST_HEAD(&reaplist); 1627 INIT_LIST_HEAD(&reaplist);
1240 spin_lock(&state_lock); 1628 spin_lock(&state_lock);
1241 while (!list_empty(&clp->cl_delegations)) { 1629 while (!list_empty(&clp->cl_delegations)) {
1242 dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt); 1630 dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
1243 list_del_init(&dp->dl_perclnt); 1631 unhash_delegation_locked(dp);
1244 list_move(&dp->dl_recall_lru, &reaplist); 1632 list_add(&dp->dl_recall_lru, &reaplist);
1245 } 1633 }
1246 spin_unlock(&state_lock); 1634 spin_unlock(&state_lock);
1247 while (!list_empty(&reaplist)) { 1635 while (!list_empty(&reaplist)) {
1248 dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); 1636 dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
1249 destroy_delegation(dp); 1637 list_del_init(&dp->dl_recall_lru);
1638 nfs4_put_stid(&dp->dl_stid);
1250 } 1639 }
1251 list_splice_init(&clp->cl_revoked, &reaplist); 1640 while (!list_empty(&clp->cl_revoked)) {
1252 while (!list_empty(&reaplist)) {
1253 dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); 1641 dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
1254 destroy_revoked_delegation(dp); 1642 list_del_init(&dp->dl_recall_lru);
1643 nfs4_put_stid(&dp->dl_stid);
1255 } 1644 }
1256 while (!list_empty(&clp->cl_openowners)) { 1645 while (!list_empty(&clp->cl_openowners)) {
1257 oo = list_entry(clp->cl_openowners.next, struct nfs4_openowner, oo_perclient); 1646 oo = list_entry(clp->cl_openowners.next, struct nfs4_openowner, oo_perclient);
1647 atomic_inc(&oo->oo_owner.so_count);
1258 release_openowner(oo); 1648 release_openowner(oo);
1259 } 1649 }
1260 nfsd4_shutdown_callback(clp); 1650 nfsd4_shutdown_callback(clp);
1261 if (clp->cl_cb_conn.cb_xprt) 1651 if (clp->cl_cb_conn.cb_xprt)
1262 svc_xprt_put(clp->cl_cb_conn.cb_xprt); 1652 svc_xprt_put(clp->cl_cb_conn.cb_xprt);
1263 list_del(&clp->cl_idhash);
1264 if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags))
1265 rb_erase(&clp->cl_namenode, &nn->conf_name_tree);
1266 else
1267 rb_erase(&clp->cl_namenode, &nn->unconf_name_tree);
1268 spin_lock(&nn->client_lock);
1269 unhash_client_locked(clp);
1270 WARN_ON_ONCE(atomic_read(&clp->cl_refcount));
1271 free_client(clp); 1653 free_client(clp);
1272 spin_unlock(&nn->client_lock); 1654}
1655
1656static void
1657destroy_client(struct nfs4_client *clp)
1658{
1659 unhash_client(clp);
1660 __destroy_client(clp);
1273} 1661}
1274 1662
1275static void expire_client(struct nfs4_client *clp) 1663static void expire_client(struct nfs4_client *clp)
1276{ 1664{
1665 unhash_client(clp);
1277 nfsd4_client_record_remove(clp); 1666 nfsd4_client_record_remove(clp);
1278 destroy_client(clp); 1667 __destroy_client(clp);
1279} 1668}
1280 1669
1281static void copy_verf(struct nfs4_client *target, nfs4_verifier *source) 1670static void copy_verf(struct nfs4_client *target, nfs4_verifier *source)
@@ -1408,25 +1797,28 @@ static bool mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp)
1408 return 0 == strcmp(cl->cl_cred.cr_principal, cr->cr_principal); 1797 return 0 == strcmp(cl->cl_cred.cr_principal, cr->cr_principal);
1409} 1798}
1410 1799
1411static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn) 1800static void gen_confirm(struct nfs4_client *clp, struct nfsd_net *nn)
1412{ 1801{
1413 static u32 current_clientid = 1; 1802 __be32 verf[2];
1414 1803
1415 clp->cl_clientid.cl_boot = nn->boot_time; 1804 /*
1416 clp->cl_clientid.cl_id = current_clientid++; 1805 * This is opaque to client, so no need to byte-swap. Use
1806 * __force to keep sparse happy
1807 */
1808 verf[0] = (__force __be32)get_seconds();
1809 verf[1] = (__force __be32)nn->clientid_counter;
1810 memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data));
1417} 1811}
1418 1812
1419static void gen_confirm(struct nfs4_client *clp) 1813static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn)
1420{ 1814{
1421 __be32 verf[2]; 1815 clp->cl_clientid.cl_boot = nn->boot_time;
1422 static u32 i; 1816 clp->cl_clientid.cl_id = nn->clientid_counter++;
1423 1817 gen_confirm(clp, nn);
1424 verf[0] = (__be32)get_seconds();
1425 verf[1] = (__be32)i++;
1426 memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data));
1427} 1818}
1428 1819
1429static struct nfs4_stid *find_stateid(struct nfs4_client *cl, stateid_t *t) 1820static struct nfs4_stid *
1821find_stateid_locked(struct nfs4_client *cl, stateid_t *t)
1430{ 1822{
1431 struct nfs4_stid *ret; 1823 struct nfs4_stid *ret;
1432 1824
@@ -1436,16 +1828,21 @@ static struct nfs4_stid *find_stateid(struct nfs4_client *cl, stateid_t *t)
1436 return ret; 1828 return ret;
1437} 1829}
1438 1830
1439static struct nfs4_stid *find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask) 1831static struct nfs4_stid *
1832find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
1440{ 1833{
1441 struct nfs4_stid *s; 1834 struct nfs4_stid *s;
1442 1835
1443 s = find_stateid(cl, t); 1836 spin_lock(&cl->cl_lock);
1444 if (!s) 1837 s = find_stateid_locked(cl, t);
1445 return NULL; 1838 if (s != NULL) {
1446 if (typemask & s->sc_type) 1839 if (typemask & s->sc_type)
1447 return s; 1840 atomic_inc(&s->sc_count);
1448 return NULL; 1841 else
1842 s = NULL;
1843 }
1844 spin_unlock(&cl->cl_lock);
1845 return s;
1449} 1846}
1450 1847
1451static struct nfs4_client *create_client(struct xdr_netobj name, 1848static struct nfs4_client *create_client(struct xdr_netobj name,
@@ -1455,7 +1852,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
1455 struct sockaddr *sa = svc_addr(rqstp); 1852 struct sockaddr *sa = svc_addr(rqstp);
1456 int ret; 1853 int ret;
1457 struct net *net = SVC_NET(rqstp); 1854 struct net *net = SVC_NET(rqstp);
1458 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
1459 1855
1460 clp = alloc_client(name); 1856 clp = alloc_client(name);
1461 if (clp == NULL) 1857 if (clp == NULL)
@@ -1463,17 +1859,14 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
1463 1859
1464 ret = copy_cred(&clp->cl_cred, &rqstp->rq_cred); 1860 ret = copy_cred(&clp->cl_cred, &rqstp->rq_cred);
1465 if (ret) { 1861 if (ret) {
1466 spin_lock(&nn->client_lock);
1467 free_client(clp); 1862 free_client(clp);
1468 spin_unlock(&nn->client_lock);
1469 return NULL; 1863 return NULL;
1470 } 1864 }
1471 nfsd4_init_callback(&clp->cl_cb_null); 1865 INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_run_cb_null);
1472 clp->cl_time = get_seconds(); 1866 clp->cl_time = get_seconds();
1473 clear_bit(0, &clp->cl_cb_slot_busy); 1867 clear_bit(0, &clp->cl_cb_slot_busy);
1474 copy_verf(clp, verf); 1868 copy_verf(clp, verf);
1475 rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa); 1869 rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa);
1476 gen_confirm(clp);
1477 clp->cl_cb_session = NULL; 1870 clp->cl_cb_session = NULL;
1478 clp->net = net; 1871 clp->net = net;
1479 return clp; 1872 return clp;
@@ -1525,11 +1918,13 @@ add_to_unconfirmed(struct nfs4_client *clp)
1525 unsigned int idhashval; 1918 unsigned int idhashval;
1526 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); 1919 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1527 1920
1921 lockdep_assert_held(&nn->client_lock);
1922
1528 clear_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags); 1923 clear_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags);
1529 add_clp_to_name_tree(clp, &nn->unconf_name_tree); 1924 add_clp_to_name_tree(clp, &nn->unconf_name_tree);
1530 idhashval = clientid_hashval(clp->cl_clientid.cl_id); 1925 idhashval = clientid_hashval(clp->cl_clientid.cl_id);
1531 list_add(&clp->cl_idhash, &nn->unconf_id_hashtbl[idhashval]); 1926 list_add(&clp->cl_idhash, &nn->unconf_id_hashtbl[idhashval]);
1532 renew_client(clp); 1927 renew_client_locked(clp);
1533} 1928}
1534 1929
1535static void 1930static void
@@ -1538,12 +1933,14 @@ move_to_confirmed(struct nfs4_client *clp)
1538 unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id); 1933 unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id);
1539 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); 1934 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1540 1935
1936 lockdep_assert_held(&nn->client_lock);
1937
1541 dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp); 1938 dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp);
1542 list_move(&clp->cl_idhash, &nn->conf_id_hashtbl[idhashval]); 1939 list_move(&clp->cl_idhash, &nn->conf_id_hashtbl[idhashval]);
1543 rb_erase(&clp->cl_namenode, &nn->unconf_name_tree); 1940 rb_erase(&clp->cl_namenode, &nn->unconf_name_tree);
1544 add_clp_to_name_tree(clp, &nn->conf_name_tree); 1941 add_clp_to_name_tree(clp, &nn->conf_name_tree);
1545 set_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags); 1942 set_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags);
1546 renew_client(clp); 1943 renew_client_locked(clp);
1547} 1944}
1548 1945
1549static struct nfs4_client * 1946static struct nfs4_client *
@@ -1556,7 +1953,7 @@ find_client_in_id_table(struct list_head *tbl, clientid_t *clid, bool sessions)
1556 if (same_clid(&clp->cl_clientid, clid)) { 1953 if (same_clid(&clp->cl_clientid, clid)) {
1557 if ((bool)clp->cl_minorversion != sessions) 1954 if ((bool)clp->cl_minorversion != sessions)
1558 return NULL; 1955 return NULL;
1559 renew_client(clp); 1956 renew_client_locked(clp);
1560 return clp; 1957 return clp;
1561 } 1958 }
1562 } 1959 }
@@ -1568,6 +1965,7 @@ find_confirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn)
1568{ 1965{
1569 struct list_head *tbl = nn->conf_id_hashtbl; 1966 struct list_head *tbl = nn->conf_id_hashtbl;
1570 1967
1968 lockdep_assert_held(&nn->client_lock);
1571 return find_client_in_id_table(tbl, clid, sessions); 1969 return find_client_in_id_table(tbl, clid, sessions);
1572} 1970}
1573 1971
@@ -1576,6 +1974,7 @@ find_unconfirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn)
1576{ 1974{
1577 struct list_head *tbl = nn->unconf_id_hashtbl; 1975 struct list_head *tbl = nn->unconf_id_hashtbl;
1578 1976
1977 lockdep_assert_held(&nn->client_lock);
1579 return find_client_in_id_table(tbl, clid, sessions); 1978 return find_client_in_id_table(tbl, clid, sessions);
1580} 1979}
1581 1980
@@ -1587,12 +1986,14 @@ static bool clp_used_exchangeid(struct nfs4_client *clp)
1587static struct nfs4_client * 1986static struct nfs4_client *
1588find_confirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn) 1987find_confirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn)
1589{ 1988{
1989 lockdep_assert_held(&nn->client_lock);
1590 return find_clp_in_name_tree(name, &nn->conf_name_tree); 1990 return find_clp_in_name_tree(name, &nn->conf_name_tree);
1591} 1991}
1592 1992
1593static struct nfs4_client * 1993static struct nfs4_client *
1594find_unconfirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn) 1994find_unconfirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn)
1595{ 1995{
1996 lockdep_assert_held(&nn->client_lock);
1596 return find_clp_in_name_tree(name, &nn->unconf_name_tree); 1997 return find_clp_in_name_tree(name, &nn->unconf_name_tree);
1597} 1998}
1598 1999
@@ -1642,7 +2043,7 @@ out_err:
1642/* 2043/*
1643 * Cache a reply. nfsd4_check_resp_size() has bounded the cache size. 2044 * Cache a reply. nfsd4_check_resp_size() has bounded the cache size.
1644 */ 2045 */
1645void 2046static void
1646nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) 2047nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
1647{ 2048{
1648 struct xdr_buf *buf = resp->xdr.buf; 2049 struct xdr_buf *buf = resp->xdr.buf;
@@ -1758,7 +2159,8 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
1758 struct nfsd4_compound_state *cstate, 2159 struct nfsd4_compound_state *cstate,
1759 struct nfsd4_exchange_id *exid) 2160 struct nfsd4_exchange_id *exid)
1760{ 2161{
1761 struct nfs4_client *unconf, *conf, *new; 2162 struct nfs4_client *conf, *new;
2163 struct nfs4_client *unconf = NULL;
1762 __be32 status; 2164 __be32 status;
1763 char addr_str[INET6_ADDRSTRLEN]; 2165 char addr_str[INET6_ADDRSTRLEN];
1764 nfs4_verifier verf = exid->verifier; 2166 nfs4_verifier verf = exid->verifier;
@@ -1787,8 +2189,12 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
1787 return nfserr_encr_alg_unsupp; 2189 return nfserr_encr_alg_unsupp;
1788 } 2190 }
1789 2191
2192 new = create_client(exid->clname, rqstp, &verf);
2193 if (new == NULL)
2194 return nfserr_jukebox;
2195
1790 /* Cases below refer to rfc 5661 section 18.35.4: */ 2196 /* Cases below refer to rfc 5661 section 18.35.4: */
1791 nfs4_lock_state(); 2197 spin_lock(&nn->client_lock);
1792 conf = find_confirmed_client_by_name(&exid->clname, nn); 2198 conf = find_confirmed_client_by_name(&exid->clname, nn);
1793 if (conf) { 2199 if (conf) {
1794 bool creds_match = same_creds(&conf->cl_cred, &rqstp->rq_cred); 2200 bool creds_match = same_creds(&conf->cl_cred, &rqstp->rq_cred);
@@ -1813,7 +2219,6 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
1813 } 2219 }
1814 /* case 6 */ 2220 /* case 6 */
1815 exid->flags |= EXCHGID4_FLAG_CONFIRMED_R; 2221 exid->flags |= EXCHGID4_FLAG_CONFIRMED_R;
1816 new = conf;
1817 goto out_copy; 2222 goto out_copy;
1818 } 2223 }
1819 if (!creds_match) { /* case 3 */ 2224 if (!creds_match) { /* case 3 */
@@ -1821,15 +2226,14 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
1821 status = nfserr_clid_inuse; 2226 status = nfserr_clid_inuse;
1822 goto out; 2227 goto out;
1823 } 2228 }
1824 expire_client(conf);
1825 goto out_new; 2229 goto out_new;
1826 } 2230 }
1827 if (verfs_match) { /* case 2 */ 2231 if (verfs_match) { /* case 2 */
1828 conf->cl_exchange_flags |= EXCHGID4_FLAG_CONFIRMED_R; 2232 conf->cl_exchange_flags |= EXCHGID4_FLAG_CONFIRMED_R;
1829 new = conf;
1830 goto out_copy; 2233 goto out_copy;
1831 } 2234 }
1832 /* case 5, client reboot */ 2235 /* case 5, client reboot */
2236 conf = NULL;
1833 goto out_new; 2237 goto out_new;
1834 } 2238 }
1835 2239
@@ -1840,33 +2244,38 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
1840 2244
1841 unconf = find_unconfirmed_client_by_name(&exid->clname, nn); 2245 unconf = find_unconfirmed_client_by_name(&exid->clname, nn);
1842 if (unconf) /* case 4, possible retry or client restart */ 2246 if (unconf) /* case 4, possible retry or client restart */
1843 expire_client(unconf); 2247 unhash_client_locked(unconf);
1844 2248
1845 /* case 1 (normal case) */ 2249 /* case 1 (normal case) */
1846out_new: 2250out_new:
1847 new = create_client(exid->clname, rqstp, &verf); 2251 if (conf) {
1848 if (new == NULL) { 2252 status = mark_client_expired_locked(conf);
1849 status = nfserr_jukebox; 2253 if (status)
1850 goto out; 2254 goto out;
1851 } 2255 }
1852 new->cl_minorversion = cstate->minorversion; 2256 new->cl_minorversion = cstate->minorversion;
1853 new->cl_mach_cred = (exid->spa_how == SP4_MACH_CRED); 2257 new->cl_mach_cred = (exid->spa_how == SP4_MACH_CRED);
1854 2258
1855 gen_clid(new, nn); 2259 gen_clid(new, nn);
1856 add_to_unconfirmed(new); 2260 add_to_unconfirmed(new);
2261 swap(new, conf);
1857out_copy: 2262out_copy:
1858 exid->clientid.cl_boot = new->cl_clientid.cl_boot; 2263 exid->clientid.cl_boot = conf->cl_clientid.cl_boot;
1859 exid->clientid.cl_id = new->cl_clientid.cl_id; 2264 exid->clientid.cl_id = conf->cl_clientid.cl_id;
1860 2265
1861 exid->seqid = new->cl_cs_slot.sl_seqid + 1; 2266 exid->seqid = conf->cl_cs_slot.sl_seqid + 1;
1862 nfsd4_set_ex_flags(new, exid); 2267 nfsd4_set_ex_flags(conf, exid);
1863 2268
1864 dprintk("nfsd4_exchange_id seqid %d flags %x\n", 2269 dprintk("nfsd4_exchange_id seqid %d flags %x\n",
1865 new->cl_cs_slot.sl_seqid, new->cl_exchange_flags); 2270 conf->cl_cs_slot.sl_seqid, conf->cl_exchange_flags);
1866 status = nfs_ok; 2271 status = nfs_ok;
1867 2272
1868out: 2273out:
1869 nfs4_unlock_state(); 2274 spin_unlock(&nn->client_lock);
2275 if (new)
2276 expire_client(new);
2277 if (unconf)
2278 expire_client(unconf);
1870 return status; 2279 return status;
1871} 2280}
1872 2281
@@ -2010,6 +2419,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
2010{ 2419{
2011 struct sockaddr *sa = svc_addr(rqstp); 2420 struct sockaddr *sa = svc_addr(rqstp);
2012 struct nfs4_client *conf, *unconf; 2421 struct nfs4_client *conf, *unconf;
2422 struct nfs4_client *old = NULL;
2013 struct nfsd4_session *new; 2423 struct nfsd4_session *new;
2014 struct nfsd4_conn *conn; 2424 struct nfsd4_conn *conn;
2015 struct nfsd4_clid_slot *cs_slot = NULL; 2425 struct nfsd4_clid_slot *cs_slot = NULL;
@@ -2035,7 +2445,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
2035 if (!conn) 2445 if (!conn)
2036 goto out_free_session; 2446 goto out_free_session;
2037 2447
2038 nfs4_lock_state(); 2448 spin_lock(&nn->client_lock);
2039 unconf = find_unconfirmed_client(&cr_ses->clientid, true, nn); 2449 unconf = find_unconfirmed_client(&cr_ses->clientid, true, nn);
2040 conf = find_confirmed_client(&cr_ses->clientid, true, nn); 2450 conf = find_confirmed_client(&cr_ses->clientid, true, nn);
2041 WARN_ON_ONCE(conf && unconf); 2451 WARN_ON_ONCE(conf && unconf);
@@ -2054,7 +2464,6 @@ nfsd4_create_session(struct svc_rqst *rqstp,
2054 goto out_free_conn; 2464 goto out_free_conn;
2055 } 2465 }
2056 } else if (unconf) { 2466 } else if (unconf) {
2057 struct nfs4_client *old;
2058 if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || 2467 if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) ||
2059 !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) { 2468 !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) {
2060 status = nfserr_clid_inuse; 2469 status = nfserr_clid_inuse;
@@ -2072,10 +2481,11 @@ nfsd4_create_session(struct svc_rqst *rqstp,
2072 } 2481 }
2073 old = find_confirmed_client_by_name(&unconf->cl_name, nn); 2482 old = find_confirmed_client_by_name(&unconf->cl_name, nn);
2074 if (old) { 2483 if (old) {
2075 status = mark_client_expired(old); 2484 status = mark_client_expired_locked(old);
2076 if (status) 2485 if (status) {
2486 old = NULL;
2077 goto out_free_conn; 2487 goto out_free_conn;
2078 expire_client(old); 2488 }
2079 } 2489 }
2080 move_to_confirmed(unconf); 2490 move_to_confirmed(unconf);
2081 conf = unconf; 2491 conf = unconf;
@@ -2091,20 +2501,27 @@ nfsd4_create_session(struct svc_rqst *rqstp,
2091 cr_ses->flags &= ~SESSION4_RDMA; 2501 cr_ses->flags &= ~SESSION4_RDMA;
2092 2502
2093 init_session(rqstp, new, conf, cr_ses); 2503 init_session(rqstp, new, conf, cr_ses);
2094 nfsd4_init_conn(rqstp, conn, new); 2504 nfsd4_get_session_locked(new);
2095 2505
2096 memcpy(cr_ses->sessionid.data, new->se_sessionid.data, 2506 memcpy(cr_ses->sessionid.data, new->se_sessionid.data,
2097 NFS4_MAX_SESSIONID_LEN); 2507 NFS4_MAX_SESSIONID_LEN);
2098 cs_slot->sl_seqid++; 2508 cs_slot->sl_seqid++;
2099 cr_ses->seqid = cs_slot->sl_seqid; 2509 cr_ses->seqid = cs_slot->sl_seqid;
2100 2510
2101 /* cache solo and embedded create sessions under the state lock */ 2511 /* cache solo and embedded create sessions under the client_lock */
2102 nfsd4_cache_create_session(cr_ses, cs_slot, status); 2512 nfsd4_cache_create_session(cr_ses, cs_slot, status);
2103 nfs4_unlock_state(); 2513 spin_unlock(&nn->client_lock);
2514 /* init connection and backchannel */
2515 nfsd4_init_conn(rqstp, conn, new);
2516 nfsd4_put_session(new);
2517 if (old)
2518 expire_client(old);
2104 return status; 2519 return status;
2105out_free_conn: 2520out_free_conn:
2106 nfs4_unlock_state(); 2521 spin_unlock(&nn->client_lock);
2107 free_conn(conn); 2522 free_conn(conn);
2523 if (old)
2524 expire_client(old);
2108out_free_session: 2525out_free_session:
2109 __free_session(new); 2526 __free_session(new);
2110out_release_drc_mem: 2527out_release_drc_mem:
@@ -2152,17 +2569,16 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp,
2152 __be32 status; 2569 __be32 status;
2153 struct nfsd4_conn *conn; 2570 struct nfsd4_conn *conn;
2154 struct nfsd4_session *session; 2571 struct nfsd4_session *session;
2155 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); 2572 struct net *net = SVC_NET(rqstp);
2573 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
2156 2574
2157 if (!nfsd4_last_compound_op(rqstp)) 2575 if (!nfsd4_last_compound_op(rqstp))
2158 return nfserr_not_only_op; 2576 return nfserr_not_only_op;
2159 nfs4_lock_state();
2160 spin_lock(&nn->client_lock); 2577 spin_lock(&nn->client_lock);
2161 session = find_in_sessionid_hashtbl(&bcts->sessionid, SVC_NET(rqstp)); 2578 session = find_in_sessionid_hashtbl(&bcts->sessionid, net, &status);
2162 spin_unlock(&nn->client_lock); 2579 spin_unlock(&nn->client_lock);
2163 status = nfserr_badsession;
2164 if (!session) 2580 if (!session)
2165 goto out; 2581 goto out_no_session;
2166 status = nfserr_wrong_cred; 2582 status = nfserr_wrong_cred;
2167 if (!mach_creds_match(session->se_client, rqstp)) 2583 if (!mach_creds_match(session->se_client, rqstp))
2168 goto out; 2584 goto out;
@@ -2176,7 +2592,8 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp,
2176 nfsd4_init_conn(rqstp, conn, session); 2592 nfsd4_init_conn(rqstp, conn, session);
2177 status = nfs_ok; 2593 status = nfs_ok;
2178out: 2594out:
2179 nfs4_unlock_state(); 2595 nfsd4_put_session(session);
2596out_no_session:
2180 return status; 2597 return status;
2181} 2598}
2182 2599
@@ -2195,9 +2612,9 @@ nfsd4_destroy_session(struct svc_rqst *r,
2195 struct nfsd4_session *ses; 2612 struct nfsd4_session *ses;
2196 __be32 status; 2613 __be32 status;
2197 int ref_held_by_me = 0; 2614 int ref_held_by_me = 0;
2198 struct nfsd_net *nn = net_generic(SVC_NET(r), nfsd_net_id); 2615 struct net *net = SVC_NET(r);
2616 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
2199 2617
2200 nfs4_lock_state();
2201 status = nfserr_not_only_op; 2618 status = nfserr_not_only_op;
2202 if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) { 2619 if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) {
2203 if (!nfsd4_last_compound_op(r)) 2620 if (!nfsd4_last_compound_op(r))
@@ -2206,14 +2623,12 @@ nfsd4_destroy_session(struct svc_rqst *r,
2206 } 2623 }
2207 dump_sessionid(__func__, &sessionid->sessionid); 2624 dump_sessionid(__func__, &sessionid->sessionid);
2208 spin_lock(&nn->client_lock); 2625 spin_lock(&nn->client_lock);
2209 ses = find_in_sessionid_hashtbl(&sessionid->sessionid, SVC_NET(r)); 2626 ses = find_in_sessionid_hashtbl(&sessionid->sessionid, net, &status);
2210 status = nfserr_badsession;
2211 if (!ses) 2627 if (!ses)
2212 goto out_client_lock; 2628 goto out_client_lock;
2213 status = nfserr_wrong_cred; 2629 status = nfserr_wrong_cred;
2214 if (!mach_creds_match(ses->se_client, r)) 2630 if (!mach_creds_match(ses->se_client, r))
2215 goto out_client_lock; 2631 goto out_put_session;
2216 nfsd4_get_session_locked(ses);
2217 status = mark_session_dead_locked(ses, 1 + ref_held_by_me); 2632 status = mark_session_dead_locked(ses, 1 + ref_held_by_me);
2218 if (status) 2633 if (status)
2219 goto out_put_session; 2634 goto out_put_session;
@@ -2225,11 +2640,10 @@ nfsd4_destroy_session(struct svc_rqst *r,
2225 spin_lock(&nn->client_lock); 2640 spin_lock(&nn->client_lock);
2226 status = nfs_ok; 2641 status = nfs_ok;
2227out_put_session: 2642out_put_session:
2228 nfsd4_put_session(ses); 2643 nfsd4_put_session_locked(ses);
2229out_client_lock: 2644out_client_lock:
2230 spin_unlock(&nn->client_lock); 2645 spin_unlock(&nn->client_lock);
2231out: 2646out:
2232 nfs4_unlock_state();
2233 return status; 2647 return status;
2234} 2648}
2235 2649
@@ -2300,7 +2714,8 @@ nfsd4_sequence(struct svc_rqst *rqstp,
2300 struct nfsd4_conn *conn; 2714 struct nfsd4_conn *conn;
2301 __be32 status; 2715 __be32 status;
2302 int buflen; 2716 int buflen;
2303 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); 2717 struct net *net = SVC_NET(rqstp);
2718 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
2304 2719
2305 if (resp->opcnt != 1) 2720 if (resp->opcnt != 1)
2306 return nfserr_sequence_pos; 2721 return nfserr_sequence_pos;
@@ -2314,17 +2729,10 @@ nfsd4_sequence(struct svc_rqst *rqstp,
2314 return nfserr_jukebox; 2729 return nfserr_jukebox;
2315 2730
2316 spin_lock(&nn->client_lock); 2731 spin_lock(&nn->client_lock);
2317 status = nfserr_badsession; 2732 session = find_in_sessionid_hashtbl(&seq->sessionid, net, &status);
2318 session = find_in_sessionid_hashtbl(&seq->sessionid, SVC_NET(rqstp));
2319 if (!session) 2733 if (!session)
2320 goto out_no_session; 2734 goto out_no_session;
2321 clp = session->se_client; 2735 clp = session->se_client;
2322 status = get_client_locked(clp);
2323 if (status)
2324 goto out_no_session;
2325 status = nfsd4_get_session_locked(session);
2326 if (status)
2327 goto out_put_client;
2328 2736
2329 status = nfserr_too_many_ops; 2737 status = nfserr_too_many_ops;
2330 if (nfsd4_session_too_many_ops(rqstp, session)) 2738 if (nfsd4_session_too_many_ops(rqstp, session))
@@ -2354,6 +2762,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
2354 goto out_put_session; 2762 goto out_put_session;
2355 cstate->slot = slot; 2763 cstate->slot = slot;
2356 cstate->session = session; 2764 cstate->session = session;
2765 cstate->clp = clp;
2357 /* Return the cached reply status and set cstate->status 2766 /* Return the cached reply status and set cstate->status
2358 * for nfsd4_proc_compound processing */ 2767 * for nfsd4_proc_compound processing */
2359 status = nfsd4_replay_cache_entry(resp, seq); 2768 status = nfsd4_replay_cache_entry(resp, seq);
@@ -2388,6 +2797,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
2388 2797
2389 cstate->slot = slot; 2798 cstate->slot = slot;
2390 cstate->session = session; 2799 cstate->session = session;
2800 cstate->clp = clp;
2391 2801
2392out: 2802out:
2393 switch (clp->cl_cb_state) { 2803 switch (clp->cl_cb_state) {
@@ -2408,31 +2818,48 @@ out_no_session:
2408 spin_unlock(&nn->client_lock); 2818 spin_unlock(&nn->client_lock);
2409 return status; 2819 return status;
2410out_put_session: 2820out_put_session:
2411 nfsd4_put_session(session); 2821 nfsd4_put_session_locked(session);
2412out_put_client:
2413 put_client_renew_locked(clp);
2414 goto out_no_session; 2822 goto out_no_session;
2415} 2823}
2416 2824
2825void
2826nfsd4_sequence_done(struct nfsd4_compoundres *resp)
2827{
2828 struct nfsd4_compound_state *cs = &resp->cstate;
2829
2830 if (nfsd4_has_session(cs)) {
2831 if (cs->status != nfserr_replay_cache) {
2832 nfsd4_store_cache_entry(resp);
2833 cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE;
2834 }
2835 /* Drop session reference that was taken in nfsd4_sequence() */
2836 nfsd4_put_session(cs->session);
2837 } else if (cs->clp)
2838 put_client_renew(cs->clp);
2839}
2840
2417__be32 2841__be32
2418nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_destroy_clientid *dc) 2842nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_destroy_clientid *dc)
2419{ 2843{
2420 struct nfs4_client *conf, *unconf, *clp; 2844 struct nfs4_client *conf, *unconf;
2845 struct nfs4_client *clp = NULL;
2421 __be32 status = 0; 2846 __be32 status = 0;
2422 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); 2847 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
2423 2848
2424 nfs4_lock_state(); 2849 spin_lock(&nn->client_lock);
2425 unconf = find_unconfirmed_client(&dc->clientid, true, nn); 2850 unconf = find_unconfirmed_client(&dc->clientid, true, nn);
2426 conf = find_confirmed_client(&dc->clientid, true, nn); 2851 conf = find_confirmed_client(&dc->clientid, true, nn);
2427 WARN_ON_ONCE(conf && unconf); 2852 WARN_ON_ONCE(conf && unconf);
2428 2853
2429 if (conf) { 2854 if (conf) {
2430 clp = conf;
2431
2432 if (client_has_state(conf)) { 2855 if (client_has_state(conf)) {
2433 status = nfserr_clientid_busy; 2856 status = nfserr_clientid_busy;
2434 goto out; 2857 goto out;
2435 } 2858 }
2859 status = mark_client_expired_locked(conf);
2860 if (status)
2861 goto out;
2862 clp = conf;
2436 } else if (unconf) 2863 } else if (unconf)
2437 clp = unconf; 2864 clp = unconf;
2438 else { 2865 else {
@@ -2440,12 +2867,15 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta
2440 goto out; 2867 goto out;
2441 } 2868 }
2442 if (!mach_creds_match(clp, rqstp)) { 2869 if (!mach_creds_match(clp, rqstp)) {
2870 clp = NULL;
2443 status = nfserr_wrong_cred; 2871 status = nfserr_wrong_cred;
2444 goto out; 2872 goto out;
2445 } 2873 }
2446 expire_client(clp); 2874 unhash_client_locked(clp);
2447out: 2875out:
2448 nfs4_unlock_state(); 2876 spin_unlock(&nn->client_lock);
2877 if (clp)
2878 expire_client(clp);
2449 return status; 2879 return status;
2450} 2880}
2451 2881
@@ -2464,7 +2894,6 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta
2464 return nfs_ok; 2894 return nfs_ok;
2465 } 2895 }
2466 2896
2467 nfs4_lock_state();
2468 status = nfserr_complete_already; 2897 status = nfserr_complete_already;
2469 if (test_and_set_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, 2898 if (test_and_set_bit(NFSD4_CLIENT_RECLAIM_COMPLETE,
2470 &cstate->session->se_client->cl_flags)) 2899 &cstate->session->se_client->cl_flags))
@@ -2484,7 +2913,6 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta
2484 status = nfs_ok; 2913 status = nfs_ok;
2485 nfsd4_client_record_create(cstate->session->se_client); 2914 nfsd4_client_record_create(cstate->session->se_client);
2486out: 2915out:
2487 nfs4_unlock_state();
2488 return status; 2916 return status;
2489} 2917}
2490 2918
@@ -2494,12 +2922,16 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2494{ 2922{
2495 struct xdr_netobj clname = setclid->se_name; 2923 struct xdr_netobj clname = setclid->se_name;
2496 nfs4_verifier clverifier = setclid->se_verf; 2924 nfs4_verifier clverifier = setclid->se_verf;
2497 struct nfs4_client *conf, *unconf, *new; 2925 struct nfs4_client *conf, *new;
2926 struct nfs4_client *unconf = NULL;
2498 __be32 status; 2927 __be32 status;
2499 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); 2928 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
2500 2929
2930 new = create_client(clname, rqstp, &clverifier);
2931 if (new == NULL)
2932 return nfserr_jukebox;
2501 /* Cases below refer to rfc 3530 section 14.2.33: */ 2933 /* Cases below refer to rfc 3530 section 14.2.33: */
2502 nfs4_lock_state(); 2934 spin_lock(&nn->client_lock);
2503 conf = find_confirmed_client_by_name(&clname, nn); 2935 conf = find_confirmed_client_by_name(&clname, nn);
2504 if (conf) { 2936 if (conf) {
2505 /* case 0: */ 2937 /* case 0: */
@@ -2517,11 +2949,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2517 } 2949 }
2518 unconf = find_unconfirmed_client_by_name(&clname, nn); 2950 unconf = find_unconfirmed_client_by_name(&clname, nn);
2519 if (unconf) 2951 if (unconf)
2520 expire_client(unconf); 2952 unhash_client_locked(unconf);
2521 status = nfserr_jukebox;
2522 new = create_client(clname, rqstp, &clverifier);
2523 if (new == NULL)
2524 goto out;
2525 if (conf && same_verf(&conf->cl_verifier, &clverifier)) 2953 if (conf && same_verf(&conf->cl_verifier, &clverifier))
2526 /* case 1: probable callback update */ 2954 /* case 1: probable callback update */
2527 copy_clid(new, conf); 2955 copy_clid(new, conf);
@@ -2533,9 +2961,14 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2533 setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; 2961 setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot;
2534 setclid->se_clientid.cl_id = new->cl_clientid.cl_id; 2962 setclid->se_clientid.cl_id = new->cl_clientid.cl_id;
2535 memcpy(setclid->se_confirm.data, new->cl_confirm.data, sizeof(setclid->se_confirm.data)); 2963 memcpy(setclid->se_confirm.data, new->cl_confirm.data, sizeof(setclid->se_confirm.data));
2964 new = NULL;
2536 status = nfs_ok; 2965 status = nfs_ok;
2537out: 2966out:
2538 nfs4_unlock_state(); 2967 spin_unlock(&nn->client_lock);
2968 if (new)
2969 free_client(new);
2970 if (unconf)
2971 expire_client(unconf);
2539 return status; 2972 return status;
2540} 2973}
2541 2974
@@ -2546,6 +2979,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
2546 struct nfsd4_setclientid_confirm *setclientid_confirm) 2979 struct nfsd4_setclientid_confirm *setclientid_confirm)
2547{ 2980{
2548 struct nfs4_client *conf, *unconf; 2981 struct nfs4_client *conf, *unconf;
2982 struct nfs4_client *old = NULL;
2549 nfs4_verifier confirm = setclientid_confirm->sc_confirm; 2983 nfs4_verifier confirm = setclientid_confirm->sc_confirm;
2550 clientid_t * clid = &setclientid_confirm->sc_clientid; 2984 clientid_t * clid = &setclientid_confirm->sc_clientid;
2551 __be32 status; 2985 __be32 status;
@@ -2553,8 +2987,8 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
2553 2987
2554 if (STALE_CLIENTID(clid, nn)) 2988 if (STALE_CLIENTID(clid, nn))
2555 return nfserr_stale_clientid; 2989 return nfserr_stale_clientid;
2556 nfs4_lock_state();
2557 2990
2991 spin_lock(&nn->client_lock);
2558 conf = find_confirmed_client(clid, false, nn); 2992 conf = find_confirmed_client(clid, false, nn);
2559 unconf = find_unconfirmed_client(clid, false, nn); 2993 unconf = find_unconfirmed_client(clid, false, nn);
2560 /* 2994 /*
@@ -2578,22 +3012,30 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
2578 } 3012 }
2579 status = nfs_ok; 3013 status = nfs_ok;
2580 if (conf) { /* case 1: callback update */ 3014 if (conf) { /* case 1: callback update */
3015 old = unconf;
3016 unhash_client_locked(old);
2581 nfsd4_change_callback(conf, &unconf->cl_cb_conn); 3017 nfsd4_change_callback(conf, &unconf->cl_cb_conn);
2582 nfsd4_probe_callback(conf);
2583 expire_client(unconf);
2584 } else { /* case 3: normal case; new or rebooted client */ 3018 } else { /* case 3: normal case; new or rebooted client */
2585 conf = find_confirmed_client_by_name(&unconf->cl_name, nn); 3019 old = find_confirmed_client_by_name(&unconf->cl_name, nn);
2586 if (conf) { 3020 if (old) {
2587 status = mark_client_expired(conf); 3021 status = mark_client_expired_locked(old);
2588 if (status) 3022 if (status) {
3023 old = NULL;
2589 goto out; 3024 goto out;
2590 expire_client(conf); 3025 }
2591 } 3026 }
2592 move_to_confirmed(unconf); 3027 move_to_confirmed(unconf);
2593 nfsd4_probe_callback(unconf); 3028 conf = unconf;
2594 } 3029 }
3030 get_client_locked(conf);
3031 spin_unlock(&nn->client_lock);
3032 nfsd4_probe_callback(conf);
3033 spin_lock(&nn->client_lock);
3034 put_client_renew_locked(conf);
2595out: 3035out:
2596 nfs4_unlock_state(); 3036 spin_unlock(&nn->client_lock);
3037 if (old)
3038 expire_client(old);
2597 return status; 3039 return status;
2598} 3040}
2599 3041
@@ -2603,21 +3045,23 @@ static struct nfs4_file *nfsd4_alloc_file(void)
2603} 3045}
2604 3046
2605/* OPEN Share state helper functions */ 3047/* OPEN Share state helper functions */
2606static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino) 3048static void nfsd4_init_file(struct nfs4_file *fp, struct knfsd_fh *fh)
2607{ 3049{
2608 unsigned int hashval = file_hashval(ino); 3050 unsigned int hashval = file_hashval(fh);
3051
3052 lockdep_assert_held(&state_lock);
2609 3053
2610 atomic_set(&fp->fi_ref, 1); 3054 atomic_set(&fp->fi_ref, 1);
3055 spin_lock_init(&fp->fi_lock);
2611 INIT_LIST_HEAD(&fp->fi_stateids); 3056 INIT_LIST_HEAD(&fp->fi_stateids);
2612 INIT_LIST_HEAD(&fp->fi_delegations); 3057 INIT_LIST_HEAD(&fp->fi_delegations);
2613 fp->fi_inode = igrab(ino); 3058 fh_copy_shallow(&fp->fi_fhandle, fh);
2614 fp->fi_had_conflict = false; 3059 fp->fi_had_conflict = false;
2615 fp->fi_lease = NULL; 3060 fp->fi_lease = NULL;
3061 fp->fi_share_deny = 0;
2616 memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); 3062 memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
2617 memset(fp->fi_access, 0, sizeof(fp->fi_access)); 3063 memset(fp->fi_access, 0, sizeof(fp->fi_access));
2618 spin_lock(&state_lock);
2619 hlist_add_head(&fp->fi_hash, &file_hashtbl[hashval]); 3064 hlist_add_head(&fp->fi_hash, &file_hashtbl[hashval]);
2620 spin_unlock(&state_lock);
2621} 3065}
2622 3066
2623void 3067void
@@ -2673,6 +3117,28 @@ static void init_nfs4_replay(struct nfs4_replay *rp)
2673 rp->rp_status = nfserr_serverfault; 3117 rp->rp_status = nfserr_serverfault;
2674 rp->rp_buflen = 0; 3118 rp->rp_buflen = 0;
2675 rp->rp_buf = rp->rp_ibuf; 3119 rp->rp_buf = rp->rp_ibuf;
3120 mutex_init(&rp->rp_mutex);
3121}
3122
3123static void nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate,
3124 struct nfs4_stateowner *so)
3125{
3126 if (!nfsd4_has_session(cstate)) {
3127 mutex_lock(&so->so_replay.rp_mutex);
3128 cstate->replay_owner = so;
3129 atomic_inc(&so->so_count);
3130 }
3131}
3132
3133void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate)
3134{
3135 struct nfs4_stateowner *so = cstate->replay_owner;
3136
3137 if (so != NULL) {
3138 cstate->replay_owner = NULL;
3139 mutex_unlock(&so->so_replay.rp_mutex);
3140 nfs4_put_stateowner(so);
3141 }
2676} 3142}
2677 3143
2678static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj *owner, struct nfs4_client *clp) 3144static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj *owner, struct nfs4_client *clp)
@@ -2693,111 +3159,172 @@ static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj
2693 INIT_LIST_HEAD(&sop->so_stateids); 3159 INIT_LIST_HEAD(&sop->so_stateids);
2694 sop->so_client = clp; 3160 sop->so_client = clp;
2695 init_nfs4_replay(&sop->so_replay); 3161 init_nfs4_replay(&sop->so_replay);
3162 atomic_set(&sop->so_count, 1);
2696 return sop; 3163 return sop;
2697} 3164}
2698 3165
2699static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval) 3166static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval)
2700{ 3167{
2701 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); 3168 lockdep_assert_held(&clp->cl_lock);
2702 3169
2703 list_add(&oo->oo_owner.so_strhash, &nn->ownerstr_hashtbl[strhashval]); 3170 list_add(&oo->oo_owner.so_strhash,
3171 &clp->cl_ownerstr_hashtbl[strhashval]);
2704 list_add(&oo->oo_perclient, &clp->cl_openowners); 3172 list_add(&oo->oo_perclient, &clp->cl_openowners);
2705} 3173}
2706 3174
3175static void nfs4_unhash_openowner(struct nfs4_stateowner *so)
3176{
3177 unhash_openowner_locked(openowner(so));
3178}
3179
3180static void nfs4_free_openowner(struct nfs4_stateowner *so)
3181{
3182 struct nfs4_openowner *oo = openowner(so);
3183
3184 kmem_cache_free(openowner_slab, oo);
3185}
3186
3187static const struct nfs4_stateowner_operations openowner_ops = {
3188 .so_unhash = nfs4_unhash_openowner,
3189 .so_free = nfs4_free_openowner,
3190};
3191
2707static struct nfs4_openowner * 3192static struct nfs4_openowner *
2708alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfsd4_open *open) { 3193alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,
2709 struct nfs4_openowner *oo; 3194 struct nfsd4_compound_state *cstate)
3195{
3196 struct nfs4_client *clp = cstate->clp;
3197 struct nfs4_openowner *oo, *ret;
2710 3198
2711 oo = alloc_stateowner(openowner_slab, &open->op_owner, clp); 3199 oo = alloc_stateowner(openowner_slab, &open->op_owner, clp);
2712 if (!oo) 3200 if (!oo)
2713 return NULL; 3201 return NULL;
3202 oo->oo_owner.so_ops = &openowner_ops;
2714 oo->oo_owner.so_is_open_owner = 1; 3203 oo->oo_owner.so_is_open_owner = 1;
2715 oo->oo_owner.so_seqid = open->op_seqid; 3204 oo->oo_owner.so_seqid = open->op_seqid;
2716 oo->oo_flags = NFS4_OO_NEW; 3205 oo->oo_flags = 0;
3206 if (nfsd4_has_session(cstate))
3207 oo->oo_flags |= NFS4_OO_CONFIRMED;
2717 oo->oo_time = 0; 3208 oo->oo_time = 0;
2718 oo->oo_last_closed_stid = NULL; 3209 oo->oo_last_closed_stid = NULL;
2719 INIT_LIST_HEAD(&oo->oo_close_lru); 3210 INIT_LIST_HEAD(&oo->oo_close_lru);
2720 hash_openowner(oo, clp, strhashval); 3211 spin_lock(&clp->cl_lock);
3212 ret = find_openstateowner_str_locked(strhashval, open, clp);
3213 if (ret == NULL) {
3214 hash_openowner(oo, clp, strhashval);
3215 ret = oo;
3216 } else
3217 nfs4_free_openowner(&oo->oo_owner);
3218 spin_unlock(&clp->cl_lock);
2721 return oo; 3219 return oo;
2722} 3220}
2723 3221
2724static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { 3222static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) {
2725 struct nfs4_openowner *oo = open->op_openowner; 3223 struct nfs4_openowner *oo = open->op_openowner;
2726 3224
3225 atomic_inc(&stp->st_stid.sc_count);
2727 stp->st_stid.sc_type = NFS4_OPEN_STID; 3226 stp->st_stid.sc_type = NFS4_OPEN_STID;
2728 INIT_LIST_HEAD(&stp->st_lockowners); 3227 INIT_LIST_HEAD(&stp->st_locks);
2729 list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
2730 list_add(&stp->st_perfile, &fp->fi_stateids);
2731 stp->st_stateowner = &oo->oo_owner; 3228 stp->st_stateowner = &oo->oo_owner;
3229 atomic_inc(&stp->st_stateowner->so_count);
2732 get_nfs4_file(fp); 3230 get_nfs4_file(fp);
2733 stp->st_file = fp; 3231 stp->st_stid.sc_file = fp;
2734 stp->st_access_bmap = 0; 3232 stp->st_access_bmap = 0;
2735 stp->st_deny_bmap = 0; 3233 stp->st_deny_bmap = 0;
2736 set_access(open->op_share_access, stp);
2737 set_deny(open->op_share_deny, stp);
2738 stp->st_openstp = NULL; 3234 stp->st_openstp = NULL;
3235 spin_lock(&oo->oo_owner.so_client->cl_lock);
3236 list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
3237 spin_lock(&fp->fi_lock);
3238 list_add(&stp->st_perfile, &fp->fi_stateids);
3239 spin_unlock(&fp->fi_lock);
3240 spin_unlock(&oo->oo_owner.so_client->cl_lock);
2739} 3241}
2740 3242
3243/*
3244 * In the 4.0 case we need to keep the owners around a little while to handle
3245 * CLOSE replay. We still do need to release any file access that is held by
3246 * them before returning however.
3247 */
2741static void 3248static void
2742move_to_close_lru(struct nfs4_openowner *oo, struct net *net) 3249move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
2743{ 3250{
2744 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 3251 struct nfs4_ol_stateid *last;
3252 struct nfs4_openowner *oo = openowner(s->st_stateowner);
3253 struct nfsd_net *nn = net_generic(s->st_stid.sc_client->net,
3254 nfsd_net_id);
2745 3255
2746 dprintk("NFSD: move_to_close_lru nfs4_openowner %p\n", oo); 3256 dprintk("NFSD: move_to_close_lru nfs4_openowner %p\n", oo);
2747 3257
3258 /*
3259 * We know that we hold one reference via nfsd4_close, and another
3260 * "persistent" reference for the client. If the refcount is higher
3261 * than 2, then there are still calls in progress that are using this
3262 * stateid. We can't put the sc_file reference until they are finished.
3263 * Wait for the refcount to drop to 2. Since it has been unhashed,
3264 * there should be no danger of the refcount going back up again at
3265 * this point.
3266 */
3267 wait_event(close_wq, atomic_read(&s->st_stid.sc_count) == 2);
3268
3269 release_all_access(s);
3270 if (s->st_stid.sc_file) {
3271 put_nfs4_file(s->st_stid.sc_file);
3272 s->st_stid.sc_file = NULL;
3273 }
3274
3275 spin_lock(&nn->client_lock);
3276 last = oo->oo_last_closed_stid;
3277 oo->oo_last_closed_stid = s;
2748 list_move_tail(&oo->oo_close_lru, &nn->close_lru); 3278 list_move_tail(&oo->oo_close_lru, &nn->close_lru);
2749 oo->oo_time = get_seconds(); 3279 oo->oo_time = get_seconds();
3280 spin_unlock(&nn->client_lock);
3281 if (last)
3282 nfs4_put_stid(&last->st_stid);
2750} 3283}
2751 3284
2752static int 3285/* search file_hashtbl[] for file */
2753same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner, 3286static struct nfs4_file *
2754 clientid_t *clid) 3287find_file_locked(struct knfsd_fh *fh)
2755{ 3288{
2756 return (sop->so_owner.len == owner->len) && 3289 unsigned int hashval = file_hashval(fh);
2757 0 == memcmp(sop->so_owner.data, owner->data, owner->len) && 3290 struct nfs4_file *fp;
2758 (sop->so_client->cl_clientid.cl_id == clid->cl_id);
2759}
2760 3291
2761static struct nfs4_openowner * 3292 lockdep_assert_held(&state_lock);
2762find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open,
2763 bool sessions, struct nfsd_net *nn)
2764{
2765 struct nfs4_stateowner *so;
2766 struct nfs4_openowner *oo;
2767 struct nfs4_client *clp;
2768 3293
2769 list_for_each_entry(so, &nn->ownerstr_hashtbl[hashval], so_strhash) { 3294 hlist_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) {
2770 if (!so->so_is_open_owner) 3295 if (nfsd_fh_match(&fp->fi_fhandle, fh)) {
2771 continue; 3296 get_nfs4_file(fp);
2772 if (same_owner_str(so, &open->op_owner, &open->op_clientid)) { 3297 return fp;
2773 oo = openowner(so);
2774 clp = oo->oo_owner.so_client;
2775 if ((bool)clp->cl_minorversion != sessions)
2776 return NULL;
2777 renew_client(oo->oo_owner.so_client);
2778 return oo;
2779 } 3298 }
2780 } 3299 }
2781 return NULL; 3300 return NULL;
2782} 3301}
2783 3302
2784/* search file_hashtbl[] for file */
2785static struct nfs4_file * 3303static struct nfs4_file *
2786find_file(struct inode *ino) 3304find_file(struct knfsd_fh *fh)
2787{ 3305{
2788 unsigned int hashval = file_hashval(ino);
2789 struct nfs4_file *fp; 3306 struct nfs4_file *fp;
2790 3307
2791 spin_lock(&state_lock); 3308 spin_lock(&state_lock);
2792 hlist_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) { 3309 fp = find_file_locked(fh);
2793 if (fp->fi_inode == ino) { 3310 spin_unlock(&state_lock);
2794 get_nfs4_file(fp); 3311 return fp;
2795 spin_unlock(&state_lock); 3312}
2796 return fp; 3313
2797 } 3314static struct nfs4_file *
3315find_or_add_file(struct nfs4_file *new, struct knfsd_fh *fh)
3316{
3317 struct nfs4_file *fp;
3318
3319 spin_lock(&state_lock);
3320 fp = find_file_locked(fh);
3321 if (fp == NULL) {
3322 nfsd4_init_file(new, fh);
3323 fp = new;
2798 } 3324 }
2799 spin_unlock(&state_lock); 3325 spin_unlock(&state_lock);
2800 return NULL; 3326
3327 return fp;
2801} 3328}
2802 3329
2803/* 3330/*
@@ -2807,47 +3334,53 @@ find_file(struct inode *ino)
2807static __be32 3334static __be32
2808nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) 3335nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
2809{ 3336{
2810 struct inode *ino = current_fh->fh_dentry->d_inode;
2811 struct nfs4_file *fp; 3337 struct nfs4_file *fp;
2812 struct nfs4_ol_stateid *stp; 3338 __be32 ret = nfs_ok;
2813 __be32 ret;
2814 3339
2815 fp = find_file(ino); 3340 fp = find_file(&current_fh->fh_handle);
2816 if (!fp) 3341 if (!fp)
2817 return nfs_ok; 3342 return ret;
2818 ret = nfserr_locked; 3343 /* Check for conflicting share reservations */
2819 /* Search for conflicting share reservations */ 3344 spin_lock(&fp->fi_lock);
2820 list_for_each_entry(stp, &fp->fi_stateids, st_perfile) { 3345 if (fp->fi_share_deny & deny_type)
2821 if (test_deny(deny_type, stp) || 3346 ret = nfserr_locked;
2822 test_deny(NFS4_SHARE_DENY_BOTH, stp)) 3347 spin_unlock(&fp->fi_lock);
2823 goto out;
2824 }
2825 ret = nfs_ok;
2826out:
2827 put_nfs4_file(fp); 3348 put_nfs4_file(fp);
2828 return ret; 3349 return ret;
2829} 3350}
2830 3351
2831static void nfsd_break_one_deleg(struct nfs4_delegation *dp) 3352void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp)
2832{ 3353{
2833 struct nfs4_client *clp = dp->dl_stid.sc_client; 3354 struct nfsd_net *nn = net_generic(dp->dl_stid.sc_client->net,
2834 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); 3355 nfsd_net_id);
2835 3356
2836 lockdep_assert_held(&state_lock); 3357 block_delegations(&dp->dl_stid.sc_file->fi_fhandle);
2837 /* We're assuming the state code never drops its reference 3358
3359 /*
3360 * We can't do this in nfsd_break_deleg_cb because it is
3361 * already holding inode->i_lock.
3362 *
3363 * If the dl_time != 0, then we know that it has already been
3364 * queued for a lease break. Don't queue it again.
3365 */
3366 spin_lock(&state_lock);
3367 if (dp->dl_time == 0) {
3368 dp->dl_time = get_seconds();
3369 list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru);
3370 }
3371 spin_unlock(&state_lock);
3372}
3373
3374static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
3375{
3376 /*
3377 * We're assuming the state code never drops its reference
2838 * without first removing the lease. Since we're in this lease 3378 * without first removing the lease. Since we're in this lease
2839 * callback (and since the lease code is serialized by the kernel 3379 * callback (and since the lease code is serialized by the kernel
2840 * lock) we know the server hasn't removed the lease yet, we know 3380 * lock) we know the server hasn't removed the lease yet, we know
2841 * it's safe to take a reference: */ 3381 * it's safe to take a reference.
2842 atomic_inc(&dp->dl_count); 3382 */
2843 3383 atomic_inc(&dp->dl_stid.sc_count);
2844 list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru);
2845
2846 /* Only place dl_time is set; protected by i_lock: */
2847 dp->dl_time = get_seconds();
2848
2849 block_delegations(&dp->dl_fh);
2850
2851 nfsd4_cb_recall(dp); 3384 nfsd4_cb_recall(dp);
2852} 3385}
2853 3386
@@ -2872,11 +3405,20 @@ static void nfsd_break_deleg_cb(struct file_lock *fl)
2872 */ 3405 */
2873 fl->fl_break_time = 0; 3406 fl->fl_break_time = 0;
2874 3407
2875 spin_lock(&state_lock); 3408 spin_lock(&fp->fi_lock);
2876 fp->fi_had_conflict = true; 3409 fp->fi_had_conflict = true;
2877 list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) 3410 /*
2878 nfsd_break_one_deleg(dp); 3411 * If there are no delegations on the list, then we can't count on this
2879 spin_unlock(&state_lock); 3412 * lease ever being cleaned up. Set the fl_break_time to jiffies so that
3413 * time_out_leases will do it ASAP. The fact that fi_had_conflict is now
3414 * true should keep any new delegations from being hashed.
3415 */
3416 if (list_empty(&fp->fi_delegations))
3417 fl->fl_break_time = jiffies;
3418 else
3419 list_for_each_entry(dp, &fp->fi_delegations, dl_perfile)
3420 nfsd_break_one_deleg(dp);
3421 spin_unlock(&fp->fi_lock);
2880} 3422}
2881 3423
2882static 3424static
@@ -2904,6 +3446,42 @@ static __be32 nfsd4_check_seqid(struct nfsd4_compound_state *cstate, struct nfs4
2904 return nfserr_bad_seqid; 3446 return nfserr_bad_seqid;
2905} 3447}
2906 3448
3449static __be32 lookup_clientid(clientid_t *clid,
3450 struct nfsd4_compound_state *cstate,
3451 struct nfsd_net *nn)
3452{
3453 struct nfs4_client *found;
3454
3455 if (cstate->clp) {
3456 found = cstate->clp;
3457 if (!same_clid(&found->cl_clientid, clid))
3458 return nfserr_stale_clientid;
3459 return nfs_ok;
3460 }
3461
3462 if (STALE_CLIENTID(clid, nn))
3463 return nfserr_stale_clientid;
3464
3465 /*
3466 * For v4.1+ we get the client in the SEQUENCE op. If we don't have one
3467 * cached already then we know this is for is for v4.0 and "sessions"
3468 * will be false.
3469 */
3470 WARN_ON_ONCE(cstate->session);
3471 spin_lock(&nn->client_lock);
3472 found = find_confirmed_client(clid, false, nn);
3473 if (!found) {
3474 spin_unlock(&nn->client_lock);
3475 return nfserr_expired;
3476 }
3477 atomic_inc(&found->cl_refcount);
3478 spin_unlock(&nn->client_lock);
3479
3480 /* Cache the nfs4_client in cstate! */
3481 cstate->clp = found;
3482 return nfs_ok;
3483}
3484
2907__be32 3485__be32
2908nfsd4_process_open1(struct nfsd4_compound_state *cstate, 3486nfsd4_process_open1(struct nfsd4_compound_state *cstate,
2909 struct nfsd4_open *open, struct nfsd_net *nn) 3487 struct nfsd4_open *open, struct nfsd_net *nn)
@@ -2924,19 +3502,19 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate,
2924 if (open->op_file == NULL) 3502 if (open->op_file == NULL)
2925 return nfserr_jukebox; 3503 return nfserr_jukebox;
2926 3504
2927 strhashval = ownerstr_hashval(clientid->cl_id, &open->op_owner); 3505 status = lookup_clientid(clientid, cstate, nn);
2928 oo = find_openstateowner_str(strhashval, open, cstate->minorversion, nn); 3506 if (status)
3507 return status;
3508 clp = cstate->clp;
3509
3510 strhashval = ownerstr_hashval(&open->op_owner);
3511 oo = find_openstateowner_str(strhashval, open, clp);
2929 open->op_openowner = oo; 3512 open->op_openowner = oo;
2930 if (!oo) { 3513 if (!oo) {
2931 clp = find_confirmed_client(clientid, cstate->minorversion,
2932 nn);
2933 if (clp == NULL)
2934 return nfserr_expired;
2935 goto new_owner; 3514 goto new_owner;
2936 } 3515 }
2937 if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { 3516 if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) {
2938 /* Replace unconfirmed owners without checking for replay. */ 3517 /* Replace unconfirmed owners without checking for replay. */
2939 clp = oo->oo_owner.so_client;
2940 release_openowner(oo); 3518 release_openowner(oo);
2941 open->op_openowner = NULL; 3519 open->op_openowner = NULL;
2942 goto new_owner; 3520 goto new_owner;
@@ -2944,15 +3522,14 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate,
2944 status = nfsd4_check_seqid(cstate, &oo->oo_owner, open->op_seqid); 3522 status = nfsd4_check_seqid(cstate, &oo->oo_owner, open->op_seqid);
2945 if (status) 3523 if (status)
2946 return status; 3524 return status;
2947 clp = oo->oo_owner.so_client;
2948 goto alloc_stateid; 3525 goto alloc_stateid;
2949new_owner: 3526new_owner:
2950 oo = alloc_init_open_stateowner(strhashval, clp, open); 3527 oo = alloc_init_open_stateowner(strhashval, open, cstate);
2951 if (oo == NULL) 3528 if (oo == NULL)
2952 return nfserr_jukebox; 3529 return nfserr_jukebox;
2953 open->op_openowner = oo; 3530 open->op_openowner = oo;
2954alloc_stateid: 3531alloc_stateid:
2955 open->op_stp = nfs4_alloc_stateid(clp); 3532 open->op_stp = nfs4_alloc_open_stateid(clp);
2956 if (!open->op_stp) 3533 if (!open->op_stp)
2957 return nfserr_jukebox; 3534 return nfserr_jukebox;
2958 return nfs_ok; 3535 return nfs_ok;
@@ -2994,14 +3571,18 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
2994{ 3571{
2995 int flags; 3572 int flags;
2996 __be32 status = nfserr_bad_stateid; 3573 __be32 status = nfserr_bad_stateid;
3574 struct nfs4_delegation *deleg;
2997 3575
2998 *dp = find_deleg_stateid(cl, &open->op_delegate_stateid); 3576 deleg = find_deleg_stateid(cl, &open->op_delegate_stateid);
2999 if (*dp == NULL) 3577 if (deleg == NULL)
3000 goto out; 3578 goto out;
3001 flags = share_access_to_flags(open->op_share_access); 3579 flags = share_access_to_flags(open->op_share_access);
3002 status = nfs4_check_delegmode(*dp, flags); 3580 status = nfs4_check_delegmode(deleg, flags);
3003 if (status) 3581 if (status) {
3004 *dp = NULL; 3582 nfs4_put_stid(&deleg->dl_stid);
3583 goto out;
3584 }
3585 *dp = deleg;
3005out: 3586out:
3006 if (!nfsd4_is_deleg_cur(open)) 3587 if (!nfsd4_is_deleg_cur(open))
3007 return nfs_ok; 3588 return nfs_ok;
@@ -3011,24 +3592,25 @@ out:
3011 return nfs_ok; 3592 return nfs_ok;
3012} 3593}
3013 3594
3014static __be32 3595static struct nfs4_ol_stateid *
3015nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_ol_stateid **stpp) 3596nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
3016{ 3597{
3017 struct nfs4_ol_stateid *local; 3598 struct nfs4_ol_stateid *local, *ret = NULL;
3018 struct nfs4_openowner *oo = open->op_openowner; 3599 struct nfs4_openowner *oo = open->op_openowner;
3019 3600
3601 spin_lock(&fp->fi_lock);
3020 list_for_each_entry(local, &fp->fi_stateids, st_perfile) { 3602 list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
3021 /* ignore lock owners */ 3603 /* ignore lock owners */
3022 if (local->st_stateowner->so_is_open_owner == 0) 3604 if (local->st_stateowner->so_is_open_owner == 0)
3023 continue; 3605 continue;
3024 /* remember if we have seen this open owner */ 3606 if (local->st_stateowner == &oo->oo_owner) {
3025 if (local->st_stateowner == &oo->oo_owner) 3607 ret = local;
3026 *stpp = local; 3608 atomic_inc(&ret->st_stid.sc_count);
3027 /* check for conflicting share reservations */ 3609 break;
3028 if (!test_share(local, open)) 3610 }
3029 return nfserr_share_denied;
3030 } 3611 }
3031 return nfs_ok; 3612 spin_unlock(&fp->fi_lock);
3613 return ret;
3032} 3614}
3033 3615
3034static inline int nfs4_access_to_access(u32 nfs4_access) 3616static inline int nfs4_access_to_access(u32 nfs4_access)
@@ -3042,24 +3624,6 @@ static inline int nfs4_access_to_access(u32 nfs4_access)
3042 return flags; 3624 return flags;
3043} 3625}
3044 3626
3045static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
3046 struct svc_fh *cur_fh, struct nfsd4_open *open)
3047{
3048 __be32 status;
3049 int oflag = nfs4_access_to_omode(open->op_share_access);
3050 int access = nfs4_access_to_access(open->op_share_access);
3051
3052 if (!fp->fi_fds[oflag]) {
3053 status = nfsd_open(rqstp, cur_fh, S_IFREG, access,
3054 &fp->fi_fds[oflag]);
3055 if (status)
3056 return status;
3057 }
3058 nfs4_file_get_access(fp, oflag);
3059
3060 return nfs_ok;
3061}
3062
3063static inline __be32 3627static inline __be32
3064nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh, 3628nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
3065 struct nfsd4_open *open) 3629 struct nfsd4_open *open)
@@ -3075,34 +3639,99 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
3075 return nfsd_setattr(rqstp, fh, &iattr, 0, (time_t)0); 3639 return nfsd_setattr(rqstp, fh, &iattr, 0, (time_t)0);
3076} 3640}
3077 3641
3078static __be32 3642static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
3079nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open) 3643 struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp,
3644 struct nfsd4_open *open)
3080{ 3645{
3081 u32 op_share_access = open->op_share_access; 3646 struct file *filp = NULL;
3082 bool new_access;
3083 __be32 status; 3647 __be32 status;
3648 int oflag = nfs4_access_to_omode(open->op_share_access);
3649 int access = nfs4_access_to_access(open->op_share_access);
3650 unsigned char old_access_bmap, old_deny_bmap;
3084 3651
3085 new_access = !test_access(op_share_access, stp); 3652 spin_lock(&fp->fi_lock);
3086 if (new_access) { 3653
3087 status = nfs4_get_vfs_file(rqstp, fp, cur_fh, open); 3654 /*
3088 if (status) 3655 * Are we trying to set a deny mode that would conflict with
3089 return status; 3656 * current access?
3657 */
3658 status = nfs4_file_check_deny(fp, open->op_share_deny);
3659 if (status != nfs_ok) {
3660 spin_unlock(&fp->fi_lock);
3661 goto out;
3090 } 3662 }
3091 status = nfsd4_truncate(rqstp, cur_fh, open); 3663
3092 if (status) { 3664 /* set access to the file */
3093 if (new_access) { 3665 status = nfs4_file_get_access(fp, open->op_share_access);
3094 int oflag = nfs4_access_to_omode(op_share_access); 3666 if (status != nfs_ok) {
3095 nfs4_file_put_access(fp, oflag); 3667 spin_unlock(&fp->fi_lock);
3096 } 3668 goto out;
3097 return status;
3098 } 3669 }
3099 /* remember the open */ 3670
3100 set_access(op_share_access, stp); 3671 /* Set access bits in stateid */
3672 old_access_bmap = stp->st_access_bmap;
3673 set_access(open->op_share_access, stp);
3674
3675 /* Set new deny mask */
3676 old_deny_bmap = stp->st_deny_bmap;
3101 set_deny(open->op_share_deny, stp); 3677 set_deny(open->op_share_deny, stp);
3678 fp->fi_share_deny |= (open->op_share_deny & NFS4_SHARE_DENY_BOTH);
3102 3679
3103 return nfs_ok; 3680 if (!fp->fi_fds[oflag]) {
3681 spin_unlock(&fp->fi_lock);
3682 status = nfsd_open(rqstp, cur_fh, S_IFREG, access, &filp);
3683 if (status)
3684 goto out_put_access;
3685 spin_lock(&fp->fi_lock);
3686 if (!fp->fi_fds[oflag]) {
3687 fp->fi_fds[oflag] = filp;
3688 filp = NULL;
3689 }
3690 }
3691 spin_unlock(&fp->fi_lock);
3692 if (filp)
3693 fput(filp);
3694
3695 status = nfsd4_truncate(rqstp, cur_fh, open);
3696 if (status)
3697 goto out_put_access;
3698out:
3699 return status;
3700out_put_access:
3701 stp->st_access_bmap = old_access_bmap;
3702 nfs4_file_put_access(fp, open->op_share_access);
3703 reset_union_bmap_deny(bmap_to_share_mode(old_deny_bmap), stp);
3704 goto out;
3104} 3705}
3105 3706
3707static __be32
3708nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open)
3709{
3710 __be32 status;
3711 unsigned char old_deny_bmap;
3712
3713 if (!test_access(open->op_share_access, stp))
3714 return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open);
3715
3716 /* test and set deny mode */
3717 spin_lock(&fp->fi_lock);
3718 status = nfs4_file_check_deny(fp, open->op_share_deny);
3719 if (status == nfs_ok) {
3720 old_deny_bmap = stp->st_deny_bmap;
3721 set_deny(open->op_share_deny, stp);
3722 fp->fi_share_deny |=
3723 (open->op_share_deny & NFS4_SHARE_DENY_BOTH);
3724 }
3725 spin_unlock(&fp->fi_lock);
3726
3727 if (status != nfs_ok)
3728 return status;
3729
3730 status = nfsd4_truncate(rqstp, cur_fh, open);
3731 if (status != nfs_ok)
3732 reset_union_bmap_deny(old_deny_bmap, stp);
3733 return status;
3734}
3106 3735
3107static void 3736static void
3108nfs4_set_claim_prev(struct nfsd4_open *open, bool has_session) 3737nfs4_set_claim_prev(struct nfsd4_open *open, bool has_session)
@@ -3123,7 +3752,7 @@ static bool nfsd4_cb_channel_good(struct nfs4_client *clp)
3123 return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN; 3752 return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN;
3124} 3753}
3125 3754
3126static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int flag) 3755static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag)
3127{ 3756{
3128 struct file_lock *fl; 3757 struct file_lock *fl;
3129 3758
@@ -3135,53 +3764,101 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int f
3135 fl->fl_flags = FL_DELEG; 3764 fl->fl_flags = FL_DELEG;
3136 fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; 3765 fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
3137 fl->fl_end = OFFSET_MAX; 3766 fl->fl_end = OFFSET_MAX;
3138 fl->fl_owner = (fl_owner_t)(dp->dl_file); 3767 fl->fl_owner = (fl_owner_t)fp;
3139 fl->fl_pid = current->tgid; 3768 fl->fl_pid = current->tgid;
3140 return fl; 3769 return fl;
3141} 3770}
3142 3771
3143static int nfs4_setlease(struct nfs4_delegation *dp) 3772static int nfs4_setlease(struct nfs4_delegation *dp)
3144{ 3773{
3145 struct nfs4_file *fp = dp->dl_file; 3774 struct nfs4_file *fp = dp->dl_stid.sc_file;
3146 struct file_lock *fl; 3775 struct file_lock *fl;
3147 int status; 3776 struct file *filp;
3777 int status = 0;
3148 3778
3149 fl = nfs4_alloc_init_lease(dp, NFS4_OPEN_DELEGATE_READ); 3779 fl = nfs4_alloc_init_lease(fp, NFS4_OPEN_DELEGATE_READ);
3150 if (!fl) 3780 if (!fl)
3151 return -ENOMEM; 3781 return -ENOMEM;
3152 fl->fl_file = find_readable_file(fp); 3782 filp = find_readable_file(fp);
3153 status = vfs_setlease(fl->fl_file, fl->fl_type, &fl); 3783 if (!filp) {
3154 if (status) 3784 /* We should always have a readable file here */
3155 goto out_free; 3785 WARN_ON_ONCE(1);
3786 return -EBADF;
3787 }
3788 fl->fl_file = filp;
3789 status = vfs_setlease(filp, fl->fl_type, &fl);
3790 if (status) {
3791 locks_free_lock(fl);
3792 goto out_fput;
3793 }
3794 spin_lock(&state_lock);
3795 spin_lock(&fp->fi_lock);
3796 /* Did the lease get broken before we took the lock? */
3797 status = -EAGAIN;
3798 if (fp->fi_had_conflict)
3799 goto out_unlock;
3800 /* Race breaker */
3801 if (fp->fi_lease) {
3802 status = 0;
3803 atomic_inc(&fp->fi_delegees);
3804 hash_delegation_locked(dp, fp);
3805 goto out_unlock;
3806 }
3156 fp->fi_lease = fl; 3807 fp->fi_lease = fl;
3157 fp->fi_deleg_file = get_file(fl->fl_file); 3808 fp->fi_deleg_file = filp;
3158 atomic_set(&fp->fi_delegees, 1); 3809 atomic_set(&fp->fi_delegees, 1);
3159 spin_lock(&state_lock);
3160 hash_delegation_locked(dp, fp); 3810 hash_delegation_locked(dp, fp);
3811 spin_unlock(&fp->fi_lock);
3161 spin_unlock(&state_lock); 3812 spin_unlock(&state_lock);
3162 return 0; 3813 return 0;
3163out_free: 3814out_unlock:
3164 locks_free_lock(fl); 3815 spin_unlock(&fp->fi_lock);
3816 spin_unlock(&state_lock);
3817out_fput:
3818 fput(filp);
3165 return status; 3819 return status;
3166} 3820}
3167 3821
3168static int nfs4_set_delegation(struct nfs4_delegation *dp, struct nfs4_file *fp) 3822static struct nfs4_delegation *
3823nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
3824 struct nfs4_file *fp)
3169{ 3825{
3826 int status;
3827 struct nfs4_delegation *dp;
3828
3170 if (fp->fi_had_conflict) 3829 if (fp->fi_had_conflict)
3171 return -EAGAIN; 3830 return ERR_PTR(-EAGAIN);
3831
3832 dp = alloc_init_deleg(clp, fh);
3833 if (!dp)
3834 return ERR_PTR(-ENOMEM);
3835
3172 get_nfs4_file(fp); 3836 get_nfs4_file(fp);
3173 dp->dl_file = fp;
3174 if (!fp->fi_lease)
3175 return nfs4_setlease(dp);
3176 spin_lock(&state_lock); 3837 spin_lock(&state_lock);
3838 spin_lock(&fp->fi_lock);
3839 dp->dl_stid.sc_file = fp;
3840 if (!fp->fi_lease) {
3841 spin_unlock(&fp->fi_lock);
3842 spin_unlock(&state_lock);
3843 status = nfs4_setlease(dp);
3844 goto out;
3845 }
3177 atomic_inc(&fp->fi_delegees); 3846 atomic_inc(&fp->fi_delegees);
3178 if (fp->fi_had_conflict) { 3847 if (fp->fi_had_conflict) {
3179 spin_unlock(&state_lock); 3848 status = -EAGAIN;
3180 return -EAGAIN; 3849 goto out_unlock;
3181 } 3850 }
3182 hash_delegation_locked(dp, fp); 3851 hash_delegation_locked(dp, fp);
3852 status = 0;
3853out_unlock:
3854 spin_unlock(&fp->fi_lock);
3183 spin_unlock(&state_lock); 3855 spin_unlock(&state_lock);
3184 return 0; 3856out:
3857 if (status) {
3858 nfs4_put_stid(&dp->dl_stid);
3859 return ERR_PTR(status);
3860 }
3861 return dp;
3185} 3862}
3186 3863
3187static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) 3864static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
@@ -3212,11 +3889,12 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
3212 * proper support for them. 3889 * proper support for them.
3213 */ 3890 */
3214static void 3891static void
3215nfs4_open_delegation(struct net *net, struct svc_fh *fh, 3892nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open,
3216 struct nfsd4_open *open, struct nfs4_ol_stateid *stp) 3893 struct nfs4_ol_stateid *stp)
3217{ 3894{
3218 struct nfs4_delegation *dp; 3895 struct nfs4_delegation *dp;
3219 struct nfs4_openowner *oo = container_of(stp->st_stateowner, struct nfs4_openowner, oo_owner); 3896 struct nfs4_openowner *oo = openowner(stp->st_stateowner);
3897 struct nfs4_client *clp = stp->st_stid.sc_client;
3220 int cb_up; 3898 int cb_up;
3221 int status = 0; 3899 int status = 0;
3222 3900
@@ -3235,7 +3913,7 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh,
3235 * Let's not give out any delegations till everyone's 3913 * Let's not give out any delegations till everyone's
3236 * had the chance to reclaim theirs.... 3914 * had the chance to reclaim theirs....
3237 */ 3915 */
3238 if (locks_in_grace(net)) 3916 if (locks_in_grace(clp->net))
3239 goto out_no_deleg; 3917 goto out_no_deleg;
3240 if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED)) 3918 if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED))
3241 goto out_no_deleg; 3919 goto out_no_deleg;
@@ -3254,21 +3932,17 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh,
3254 default: 3932 default:
3255 goto out_no_deleg; 3933 goto out_no_deleg;
3256 } 3934 }
3257 dp = alloc_init_deleg(oo->oo_owner.so_client, stp, fh); 3935 dp = nfs4_set_delegation(clp, fh, stp->st_stid.sc_file);
3258 if (dp == NULL) 3936 if (IS_ERR(dp))
3259 goto out_no_deleg; 3937 goto out_no_deleg;
3260 status = nfs4_set_delegation(dp, stp->st_file);
3261 if (status)
3262 goto out_free;
3263 3938
3264 memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid)); 3939 memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid));
3265 3940
3266 dprintk("NFSD: delegation stateid=" STATEID_FMT "\n", 3941 dprintk("NFSD: delegation stateid=" STATEID_FMT "\n",
3267 STATEID_VAL(&dp->dl_stid.sc_stateid)); 3942 STATEID_VAL(&dp->dl_stid.sc_stateid));
3268 open->op_delegate_type = NFS4_OPEN_DELEGATE_READ; 3943 open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
3944 nfs4_put_stid(&dp->dl_stid);
3269 return; 3945 return;
3270out_free:
3271 destroy_delegation(dp);
3272out_no_deleg: 3946out_no_deleg:
3273 open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE; 3947 open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE;
3274 if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && 3948 if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS &&
@@ -3301,16 +3975,12 @@ static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open,
3301 */ 3975 */
3302} 3976}
3303 3977
3304/*
3305 * called with nfs4_lock_state() held.
3306 */
3307__be32 3978__be32
3308nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) 3979nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
3309{ 3980{
3310 struct nfsd4_compoundres *resp = rqstp->rq_resp; 3981 struct nfsd4_compoundres *resp = rqstp->rq_resp;
3311 struct nfs4_client *cl = open->op_openowner->oo_owner.so_client; 3982 struct nfs4_client *cl = open->op_openowner->oo_owner.so_client;
3312 struct nfs4_file *fp = NULL; 3983 struct nfs4_file *fp = NULL;
3313 struct inode *ino = current_fh->fh_dentry->d_inode;
3314 struct nfs4_ol_stateid *stp = NULL; 3984 struct nfs4_ol_stateid *stp = NULL;
3315 struct nfs4_delegation *dp = NULL; 3985 struct nfs4_delegation *dp = NULL;
3316 __be32 status; 3986 __be32 status;
@@ -3320,21 +3990,18 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
3320 * and check for delegations in the process of being recalled. 3990 * and check for delegations in the process of being recalled.
3321 * If not found, create the nfs4_file struct 3991 * If not found, create the nfs4_file struct
3322 */ 3992 */
3323 fp = find_file(ino); 3993 fp = find_or_add_file(open->op_file, &current_fh->fh_handle);
3324 if (fp) { 3994 if (fp != open->op_file) {
3325 if ((status = nfs4_check_open(fp, open, &stp)))
3326 goto out;
3327 status = nfs4_check_deleg(cl, open, &dp); 3995 status = nfs4_check_deleg(cl, open, &dp);
3328 if (status) 3996 if (status)
3329 goto out; 3997 goto out;
3998 stp = nfsd4_find_existing_open(fp, open);
3330 } else { 3999 } else {
4000 open->op_file = NULL;
3331 status = nfserr_bad_stateid; 4001 status = nfserr_bad_stateid;
3332 if (nfsd4_is_deleg_cur(open)) 4002 if (nfsd4_is_deleg_cur(open))
3333 goto out; 4003 goto out;
3334 status = nfserr_jukebox; 4004 status = nfserr_jukebox;
3335 fp = open->op_file;
3336 open->op_file = NULL;
3337 nfsd4_init_file(fp, ino);
3338 } 4005 }
3339 4006
3340 /* 4007 /*
@@ -3347,22 +4014,19 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
3347 if (status) 4014 if (status)
3348 goto out; 4015 goto out;
3349 } else { 4016 } else {
3350 status = nfs4_get_vfs_file(rqstp, fp, current_fh, open);
3351 if (status)
3352 goto out;
3353 status = nfsd4_truncate(rqstp, current_fh, open);
3354 if (status)
3355 goto out;
3356 stp = open->op_stp; 4017 stp = open->op_stp;
3357 open->op_stp = NULL; 4018 open->op_stp = NULL;
3358 init_open_stateid(stp, fp, open); 4019 init_open_stateid(stp, fp, open);
4020 status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open);
4021 if (status) {
4022 release_open_stateid(stp);
4023 goto out;
4024 }
3359 } 4025 }
3360 update_stateid(&stp->st_stid.sc_stateid); 4026 update_stateid(&stp->st_stid.sc_stateid);
3361 memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); 4027 memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
3362 4028
3363 if (nfsd4_has_session(&resp->cstate)) { 4029 if (nfsd4_has_session(&resp->cstate)) {
3364 open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
3365
3366 if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) { 4030 if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) {
3367 open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT; 4031 open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
3368 open->op_why_no_deleg = WND4_NOT_WANTED; 4032 open->op_why_no_deleg = WND4_NOT_WANTED;
@@ -3374,7 +4038,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
3374 * Attempt to hand out a delegation. No error return, because the 4038 * Attempt to hand out a delegation. No error return, because the
3375 * OPEN succeeds even if we fail. 4039 * OPEN succeeds even if we fail.
3376 */ 4040 */
3377 nfs4_open_delegation(SVC_NET(rqstp), current_fh, open, stp); 4041 nfs4_open_delegation(current_fh, open, stp);
3378nodeleg: 4042nodeleg:
3379 status = nfs_ok; 4043 status = nfs_ok;
3380 4044
@@ -3397,41 +4061,27 @@ out:
3397 if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED) && 4061 if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED) &&
3398 !nfsd4_has_session(&resp->cstate)) 4062 !nfsd4_has_session(&resp->cstate))
3399 open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM; 4063 open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM;
4064 if (dp)
4065 nfs4_put_stid(&dp->dl_stid);
4066 if (stp)
4067 nfs4_put_stid(&stp->st_stid);
3400 4068
3401 return status; 4069 return status;
3402} 4070}
3403 4071
3404void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status) 4072void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate,
4073 struct nfsd4_open *open, __be32 status)
3405{ 4074{
3406 if (open->op_openowner) { 4075 if (open->op_openowner) {
3407 struct nfs4_openowner *oo = open->op_openowner; 4076 struct nfs4_stateowner *so = &open->op_openowner->oo_owner;
3408 4077
3409 if (!list_empty(&oo->oo_owner.so_stateids)) 4078 nfsd4_cstate_assign_replay(cstate, so);
3410 list_del_init(&oo->oo_close_lru); 4079 nfs4_put_stateowner(so);
3411 if (oo->oo_flags & NFS4_OO_NEW) {
3412 if (status) {
3413 release_openowner(oo);
3414 open->op_openowner = NULL;
3415 } else
3416 oo->oo_flags &= ~NFS4_OO_NEW;
3417 }
3418 } 4080 }
3419 if (open->op_file) 4081 if (open->op_file)
3420 nfsd4_free_file(open->op_file); 4082 nfsd4_free_file(open->op_file);
3421 if (open->op_stp) 4083 if (open->op_stp)
3422 free_generic_stateid(open->op_stp); 4084 nfs4_put_stid(&open->op_stp->st_stid);
3423}
3424
3425static __be32 lookup_clientid(clientid_t *clid, bool session, struct nfsd_net *nn, struct nfs4_client **clp)
3426{
3427 struct nfs4_client *found;
3428
3429 if (STALE_CLIENTID(clid, nn))
3430 return nfserr_stale_clientid;
3431 found = find_confirmed_client(clid, session, nn);
3432 if (clp)
3433 *clp = found;
3434 return found ? nfs_ok : nfserr_expired;
3435} 4085}
3436 4086
3437__be32 4087__be32
@@ -3442,19 +4092,18 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3442 __be32 status; 4092 __be32 status;
3443 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); 4093 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
3444 4094
3445 nfs4_lock_state();
3446 dprintk("process_renew(%08x/%08x): starting\n", 4095 dprintk("process_renew(%08x/%08x): starting\n",
3447 clid->cl_boot, clid->cl_id); 4096 clid->cl_boot, clid->cl_id);
3448 status = lookup_clientid(clid, cstate->minorversion, nn, &clp); 4097 status = lookup_clientid(clid, cstate, nn);
3449 if (status) 4098 if (status)
3450 goto out; 4099 goto out;
4100 clp = cstate->clp;
3451 status = nfserr_cb_path_down; 4101 status = nfserr_cb_path_down;
3452 if (!list_empty(&clp->cl_delegations) 4102 if (!list_empty(&clp->cl_delegations)
3453 && clp->cl_cb_state != NFSD4_CB_UP) 4103 && clp->cl_cb_state != NFSD4_CB_UP)
3454 goto out; 4104 goto out;
3455 status = nfs_ok; 4105 status = nfs_ok;
3456out: 4106out:
3457 nfs4_unlock_state();
3458 return status; 4107 return status;
3459} 4108}
3460 4109
@@ -3483,12 +4132,11 @@ nfs4_laundromat(struct nfsd_net *nn)
3483 struct nfs4_client *clp; 4132 struct nfs4_client *clp;
3484 struct nfs4_openowner *oo; 4133 struct nfs4_openowner *oo;
3485 struct nfs4_delegation *dp; 4134 struct nfs4_delegation *dp;
4135 struct nfs4_ol_stateid *stp;
3486 struct list_head *pos, *next, reaplist; 4136 struct list_head *pos, *next, reaplist;
3487 time_t cutoff = get_seconds() - nn->nfsd4_lease; 4137 time_t cutoff = get_seconds() - nn->nfsd4_lease;
3488 time_t t, new_timeo = nn->nfsd4_lease; 4138 time_t t, new_timeo = nn->nfsd4_lease;
3489 4139
3490 nfs4_lock_state();
3491
3492 dprintk("NFSD: laundromat service - starting\n"); 4140 dprintk("NFSD: laundromat service - starting\n");
3493 nfsd4_end_grace(nn); 4141 nfsd4_end_grace(nn);
3494 INIT_LIST_HEAD(&reaplist); 4142 INIT_LIST_HEAD(&reaplist);
@@ -3505,13 +4153,14 @@ nfs4_laundromat(struct nfsd_net *nn)
3505 clp->cl_clientid.cl_id); 4153 clp->cl_clientid.cl_id);
3506 continue; 4154 continue;
3507 } 4155 }
3508 list_move(&clp->cl_lru, &reaplist); 4156 list_add(&clp->cl_lru, &reaplist);
3509 } 4157 }
3510 spin_unlock(&nn->client_lock); 4158 spin_unlock(&nn->client_lock);
3511 list_for_each_safe(pos, next, &reaplist) { 4159 list_for_each_safe(pos, next, &reaplist) {
3512 clp = list_entry(pos, struct nfs4_client, cl_lru); 4160 clp = list_entry(pos, struct nfs4_client, cl_lru);
3513 dprintk("NFSD: purging unused client (clientid %08x)\n", 4161 dprintk("NFSD: purging unused client (clientid %08x)\n",
3514 clp->cl_clientid.cl_id); 4162 clp->cl_clientid.cl_id);
4163 list_del_init(&clp->cl_lru);
3515 expire_client(clp); 4164 expire_client(clp);
3516 } 4165 }
3517 spin_lock(&state_lock); 4166 spin_lock(&state_lock);
@@ -3524,24 +4173,37 @@ nfs4_laundromat(struct nfsd_net *nn)
3524 new_timeo = min(new_timeo, t); 4173 new_timeo = min(new_timeo, t);
3525 break; 4174 break;
3526 } 4175 }
3527 list_move(&dp->dl_recall_lru, &reaplist); 4176 unhash_delegation_locked(dp);
4177 list_add(&dp->dl_recall_lru, &reaplist);
3528 } 4178 }
3529 spin_unlock(&state_lock); 4179 spin_unlock(&state_lock);
3530 list_for_each_safe(pos, next, &reaplist) { 4180 while (!list_empty(&reaplist)) {
3531 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); 4181 dp = list_first_entry(&reaplist, struct nfs4_delegation,
4182 dl_recall_lru);
4183 list_del_init(&dp->dl_recall_lru);
3532 revoke_delegation(dp); 4184 revoke_delegation(dp);
3533 } 4185 }
3534 list_for_each_safe(pos, next, &nn->close_lru) { 4186
3535 oo = container_of(pos, struct nfs4_openowner, oo_close_lru); 4187 spin_lock(&nn->client_lock);
3536 if (time_after((unsigned long)oo->oo_time, (unsigned long)cutoff)) { 4188 while (!list_empty(&nn->close_lru)) {
4189 oo = list_first_entry(&nn->close_lru, struct nfs4_openowner,
4190 oo_close_lru);
4191 if (time_after((unsigned long)oo->oo_time,
4192 (unsigned long)cutoff)) {
3537 t = oo->oo_time - cutoff; 4193 t = oo->oo_time - cutoff;
3538 new_timeo = min(new_timeo, t); 4194 new_timeo = min(new_timeo, t);
3539 break; 4195 break;
3540 } 4196 }
3541 release_openowner(oo); 4197 list_del_init(&oo->oo_close_lru);
4198 stp = oo->oo_last_closed_stid;
4199 oo->oo_last_closed_stid = NULL;
4200 spin_unlock(&nn->client_lock);
4201 nfs4_put_stid(&stp->st_stid);
4202 spin_lock(&nn->client_lock);
3542 } 4203 }
4204 spin_unlock(&nn->client_lock);
4205
3543 new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); 4206 new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
3544 nfs4_unlock_state();
3545 return new_timeo; 4207 return new_timeo;
3546} 4208}
3547 4209
@@ -3564,7 +4226,7 @@ laundromat_main(struct work_struct *laundry)
3564 4226
3565static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *stp) 4227static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *stp)
3566{ 4228{
3567 if (fhp->fh_dentry->d_inode != stp->st_file->fi_inode) 4229 if (!nfsd_fh_match(&fhp->fh_handle, &stp->st_stid.sc_file->fi_fhandle))
3568 return nfserr_bad_stateid; 4230 return nfserr_bad_stateid;
3569 return nfs_ok; 4231 return nfs_ok;
3570} 4232}
@@ -3666,10 +4328,10 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
3666{ 4328{
3667 struct nfs4_stid *s; 4329 struct nfs4_stid *s;
3668 struct nfs4_ol_stateid *ols; 4330 struct nfs4_ol_stateid *ols;
3669 __be32 status; 4331 __be32 status = nfserr_bad_stateid;
3670 4332
3671 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) 4333 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
3672 return nfserr_bad_stateid; 4334 return status;
3673 /* Client debugging aid. */ 4335 /* Client debugging aid. */
3674 if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) { 4336 if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) {
3675 char addr_str[INET6_ADDRSTRLEN]; 4337 char addr_str[INET6_ADDRSTRLEN];
@@ -3677,53 +4339,62 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
3677 sizeof(addr_str)); 4339 sizeof(addr_str));
3678 pr_warn_ratelimited("NFSD: client %s testing state ID " 4340 pr_warn_ratelimited("NFSD: client %s testing state ID "
3679 "with incorrect client ID\n", addr_str); 4341 "with incorrect client ID\n", addr_str);
3680 return nfserr_bad_stateid; 4342 return status;
3681 } 4343 }
3682 s = find_stateid(cl, stateid); 4344 spin_lock(&cl->cl_lock);
4345 s = find_stateid_locked(cl, stateid);
3683 if (!s) 4346 if (!s)
3684 return nfserr_bad_stateid; 4347 goto out_unlock;
3685 status = check_stateid_generation(stateid, &s->sc_stateid, 1); 4348 status = check_stateid_generation(stateid, &s->sc_stateid, 1);
3686 if (status) 4349 if (status)
3687 return status; 4350 goto out_unlock;
3688 switch (s->sc_type) { 4351 switch (s->sc_type) {
3689 case NFS4_DELEG_STID: 4352 case NFS4_DELEG_STID:
3690 return nfs_ok; 4353 status = nfs_ok;
4354 break;
3691 case NFS4_REVOKED_DELEG_STID: 4355 case NFS4_REVOKED_DELEG_STID:
3692 return nfserr_deleg_revoked; 4356 status = nfserr_deleg_revoked;
4357 break;
3693 case NFS4_OPEN_STID: 4358 case NFS4_OPEN_STID:
3694 case NFS4_LOCK_STID: 4359 case NFS4_LOCK_STID:
3695 ols = openlockstateid(s); 4360 ols = openlockstateid(s);
3696 if (ols->st_stateowner->so_is_open_owner 4361 if (ols->st_stateowner->so_is_open_owner
3697 && !(openowner(ols->st_stateowner)->oo_flags 4362 && !(openowner(ols->st_stateowner)->oo_flags
3698 & NFS4_OO_CONFIRMED)) 4363 & NFS4_OO_CONFIRMED))
3699 return nfserr_bad_stateid; 4364 status = nfserr_bad_stateid;
3700 return nfs_ok; 4365 else
4366 status = nfs_ok;
4367 break;
3701 default: 4368 default:
3702 printk("unknown stateid type %x\n", s->sc_type); 4369 printk("unknown stateid type %x\n", s->sc_type);
4370 /* Fallthrough */
3703 case NFS4_CLOSED_STID: 4371 case NFS4_CLOSED_STID:
3704 return nfserr_bad_stateid; 4372 case NFS4_CLOSED_DELEG_STID:
4373 status = nfserr_bad_stateid;
3705 } 4374 }
4375out_unlock:
4376 spin_unlock(&cl->cl_lock);
4377 return status;
3706} 4378}
3707 4379
3708static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask, 4380static __be32
3709 struct nfs4_stid **s, bool sessions, 4381nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
3710 struct nfsd_net *nn) 4382 stateid_t *stateid, unsigned char typemask,
4383 struct nfs4_stid **s, struct nfsd_net *nn)
3711{ 4384{
3712 struct nfs4_client *cl;
3713 __be32 status; 4385 __be32 status;
3714 4386
3715 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) 4387 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
3716 return nfserr_bad_stateid; 4388 return nfserr_bad_stateid;
3717 status = lookup_clientid(&stateid->si_opaque.so_clid, sessions, 4389 status = lookup_clientid(&stateid->si_opaque.so_clid, cstate, nn);
3718 nn, &cl);
3719 if (status == nfserr_stale_clientid) { 4390 if (status == nfserr_stale_clientid) {
3720 if (sessions) 4391 if (cstate->session)
3721 return nfserr_bad_stateid; 4392 return nfserr_bad_stateid;
3722 return nfserr_stale_stateid; 4393 return nfserr_stale_stateid;
3723 } 4394 }
3724 if (status) 4395 if (status)
3725 return status; 4396 return status;
3726 *s = find_stateid_by_type(cl, stateid, typemask); 4397 *s = find_stateid_by_type(cstate->clp, stateid, typemask);
3727 if (!*s) 4398 if (!*s)
3728 return nfserr_bad_stateid; 4399 return nfserr_bad_stateid;
3729 return nfs_ok; 4400 return nfs_ok;
@@ -3754,12 +4425,11 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
3754 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) 4425 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
3755 return check_special_stateids(net, current_fh, stateid, flags); 4426 return check_special_stateids(net, current_fh, stateid, flags);
3756 4427
3757 nfs4_lock_state(); 4428 status = nfsd4_lookup_stateid(cstate, stateid,
3758 4429 NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
3759 status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, 4430 &s, nn);
3760 &s, cstate->minorversion, nn);
3761 if (status) 4431 if (status)
3762 goto out; 4432 return status;
3763 status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate)); 4433 status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate));
3764 if (status) 4434 if (status)
3765 goto out; 4435 goto out;
@@ -3770,12 +4440,13 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
3770 if (status) 4440 if (status)
3771 goto out; 4441 goto out;
3772 if (filpp) { 4442 if (filpp) {
3773 file = dp->dl_file->fi_deleg_file; 4443 file = dp->dl_stid.sc_file->fi_deleg_file;
3774 if (!file) { 4444 if (!file) {
3775 WARN_ON_ONCE(1); 4445 WARN_ON_ONCE(1);
3776 status = nfserr_serverfault; 4446 status = nfserr_serverfault;
3777 goto out; 4447 goto out;
3778 } 4448 }
4449 get_file(file);
3779 } 4450 }
3780 break; 4451 break;
3781 case NFS4_OPEN_STID: 4452 case NFS4_OPEN_STID:
@@ -3791,10 +4462,12 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
3791 if (status) 4462 if (status)
3792 goto out; 4463 goto out;
3793 if (filpp) { 4464 if (filpp) {
4465 struct nfs4_file *fp = stp->st_stid.sc_file;
4466
3794 if (flags & RD_STATE) 4467 if (flags & RD_STATE)
3795 file = find_readable_file(stp->st_file); 4468 file = find_readable_file(fp);
3796 else 4469 else
3797 file = find_writeable_file(stp->st_file); 4470 file = find_writeable_file(fp);
3798 } 4471 }
3799 break; 4472 break;
3800 default: 4473 default:
@@ -3803,28 +4476,12 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
3803 } 4476 }
3804 status = nfs_ok; 4477 status = nfs_ok;
3805 if (file) 4478 if (file)
3806 *filpp = get_file(file); 4479 *filpp = file;
3807out: 4480out:
3808 nfs4_unlock_state(); 4481 nfs4_put_stid(s);
3809 return status; 4482 return status;
3810} 4483}
3811 4484
3812static __be32
3813nfsd4_free_lock_stateid(struct nfs4_ol_stateid *stp)
3814{
3815 struct nfs4_lockowner *lo = lockowner(stp->st_stateowner);
3816
3817 if (check_for_locks(stp->st_file, lo))
3818 return nfserr_locks_held;
3819 /*
3820 * Currently there's a 1-1 lock stateid<->lockowner
3821 * correspondance, and we have to delete the lockowner when we
3822 * delete the lock stateid:
3823 */
3824 release_lockowner(lo);
3825 return nfs_ok;
3826}
3827
3828/* 4485/*
3829 * Test if the stateid is valid 4486 * Test if the stateid is valid
3830 */ 4487 */
@@ -3835,11 +4492,9 @@ nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3835 struct nfsd4_test_stateid_id *stateid; 4492 struct nfsd4_test_stateid_id *stateid;
3836 struct nfs4_client *cl = cstate->session->se_client; 4493 struct nfs4_client *cl = cstate->session->se_client;
3837 4494
3838 nfs4_lock_state();
3839 list_for_each_entry(stateid, &test_stateid->ts_stateid_list, ts_id_list) 4495 list_for_each_entry(stateid, &test_stateid->ts_stateid_list, ts_id_list)
3840 stateid->ts_id_status = 4496 stateid->ts_id_status =
3841 nfsd4_validate_stateid(cl, &stateid->ts_id_stateid); 4497 nfsd4_validate_stateid(cl, &stateid->ts_id_stateid);
3842 nfs4_unlock_state();
3843 4498
3844 return nfs_ok; 4499 return nfs_ok;
3845} 4500}
@@ -3851,37 +4506,50 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3851 stateid_t *stateid = &free_stateid->fr_stateid; 4506 stateid_t *stateid = &free_stateid->fr_stateid;
3852 struct nfs4_stid *s; 4507 struct nfs4_stid *s;
3853 struct nfs4_delegation *dp; 4508 struct nfs4_delegation *dp;
4509 struct nfs4_ol_stateid *stp;
3854 struct nfs4_client *cl = cstate->session->se_client; 4510 struct nfs4_client *cl = cstate->session->se_client;
3855 __be32 ret = nfserr_bad_stateid; 4511 __be32 ret = nfserr_bad_stateid;
3856 4512
3857 nfs4_lock_state(); 4513 spin_lock(&cl->cl_lock);
3858 s = find_stateid(cl, stateid); 4514 s = find_stateid_locked(cl, stateid);
3859 if (!s) 4515 if (!s)
3860 goto out; 4516 goto out_unlock;
3861 switch (s->sc_type) { 4517 switch (s->sc_type) {
3862 case NFS4_DELEG_STID: 4518 case NFS4_DELEG_STID:
3863 ret = nfserr_locks_held; 4519 ret = nfserr_locks_held;
3864 goto out; 4520 break;
3865 case NFS4_OPEN_STID: 4521 case NFS4_OPEN_STID:
3866 case NFS4_LOCK_STID:
3867 ret = check_stateid_generation(stateid, &s->sc_stateid, 1); 4522 ret = check_stateid_generation(stateid, &s->sc_stateid, 1);
3868 if (ret) 4523 if (ret)
3869 goto out; 4524 break;
3870 if (s->sc_type == NFS4_LOCK_STID) 4525 ret = nfserr_locks_held;
3871 ret = nfsd4_free_lock_stateid(openlockstateid(s));
3872 else
3873 ret = nfserr_locks_held;
3874 break; 4526 break;
4527 case NFS4_LOCK_STID:
4528 ret = check_stateid_generation(stateid, &s->sc_stateid, 1);
4529 if (ret)
4530 break;
4531 stp = openlockstateid(s);
4532 ret = nfserr_locks_held;
4533 if (check_for_locks(stp->st_stid.sc_file,
4534 lockowner(stp->st_stateowner)))
4535 break;
4536 unhash_lock_stateid(stp);
4537 spin_unlock(&cl->cl_lock);
4538 nfs4_put_stid(s);
4539 ret = nfs_ok;
4540 goto out;
3875 case NFS4_REVOKED_DELEG_STID: 4541 case NFS4_REVOKED_DELEG_STID:
3876 dp = delegstateid(s); 4542 dp = delegstateid(s);
3877 destroy_revoked_delegation(dp); 4543 list_del_init(&dp->dl_recall_lru);
4544 spin_unlock(&cl->cl_lock);
4545 nfs4_put_stid(s);
3878 ret = nfs_ok; 4546 ret = nfs_ok;
3879 break; 4547 goto out;
3880 default: 4548 /* Default falls through and returns nfserr_bad_stateid */
3881 ret = nfserr_bad_stateid;
3882 } 4549 }
4550out_unlock:
4551 spin_unlock(&cl->cl_lock);
3883out: 4552out:
3884 nfs4_unlock_state();
3885 return ret; 4553 return ret;
3886} 4554}
3887 4555
@@ -3926,20 +4594,24 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
3926{ 4594{
3927 __be32 status; 4595 __be32 status;
3928 struct nfs4_stid *s; 4596 struct nfs4_stid *s;
4597 struct nfs4_ol_stateid *stp = NULL;
3929 4598
3930 dprintk("NFSD: %s: seqid=%d stateid = " STATEID_FMT "\n", __func__, 4599 dprintk("NFSD: %s: seqid=%d stateid = " STATEID_FMT "\n", __func__,
3931 seqid, STATEID_VAL(stateid)); 4600 seqid, STATEID_VAL(stateid));
3932 4601
3933 *stpp = NULL; 4602 *stpp = NULL;
3934 status = nfsd4_lookup_stateid(stateid, typemask, &s, 4603 status = nfsd4_lookup_stateid(cstate, stateid, typemask, &s, nn);
3935 cstate->minorversion, nn);
3936 if (status) 4604 if (status)
3937 return status; 4605 return status;
3938 *stpp = openlockstateid(s); 4606 stp = openlockstateid(s);
3939 if (!nfsd4_has_session(cstate)) 4607 nfsd4_cstate_assign_replay(cstate, stp->st_stateowner);
3940 cstate->replay_owner = (*stpp)->st_stateowner;
3941 4608
3942 return nfs4_seqid_op_checks(cstate, stateid, seqid, *stpp); 4609 status = nfs4_seqid_op_checks(cstate, stateid, seqid, stp);
4610 if (!status)
4611 *stpp = stp;
4612 else
4613 nfs4_put_stid(&stp->st_stid);
4614 return status;
3943} 4615}
3944 4616
3945static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, 4617static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
@@ -3947,14 +4619,18 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs
3947{ 4619{
3948 __be32 status; 4620 __be32 status;
3949 struct nfs4_openowner *oo; 4621 struct nfs4_openowner *oo;
4622 struct nfs4_ol_stateid *stp;
3950 4623
3951 status = nfs4_preprocess_seqid_op(cstate, seqid, stateid, 4624 status = nfs4_preprocess_seqid_op(cstate, seqid, stateid,
3952 NFS4_OPEN_STID, stpp, nn); 4625 NFS4_OPEN_STID, &stp, nn);
3953 if (status) 4626 if (status)
3954 return status; 4627 return status;
3955 oo = openowner((*stpp)->st_stateowner); 4628 oo = openowner(stp->st_stateowner);
3956 if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) 4629 if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) {
4630 nfs4_put_stid(&stp->st_stid);
3957 return nfserr_bad_stateid; 4631 return nfserr_bad_stateid;
4632 }
4633 *stpp = stp;
3958 return nfs_ok; 4634 return nfs_ok;
3959} 4635}
3960 4636
@@ -3974,8 +4650,6 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3974 if (status) 4650 if (status)
3975 return status; 4651 return status;
3976 4652
3977 nfs4_lock_state();
3978
3979 status = nfs4_preprocess_seqid_op(cstate, 4653 status = nfs4_preprocess_seqid_op(cstate,
3980 oc->oc_seqid, &oc->oc_req_stateid, 4654 oc->oc_seqid, &oc->oc_req_stateid,
3981 NFS4_OPEN_STID, &stp, nn); 4655 NFS4_OPEN_STID, &stp, nn);
@@ -3984,7 +4658,7 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3984 oo = openowner(stp->st_stateowner); 4658 oo = openowner(stp->st_stateowner);
3985 status = nfserr_bad_stateid; 4659 status = nfserr_bad_stateid;
3986 if (oo->oo_flags & NFS4_OO_CONFIRMED) 4660 if (oo->oo_flags & NFS4_OO_CONFIRMED)
3987 goto out; 4661 goto put_stateid;
3988 oo->oo_flags |= NFS4_OO_CONFIRMED; 4662 oo->oo_flags |= NFS4_OO_CONFIRMED;
3989 update_stateid(&stp->st_stid.sc_stateid); 4663 update_stateid(&stp->st_stid.sc_stateid);
3990 memcpy(&oc->oc_resp_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); 4664 memcpy(&oc->oc_resp_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
@@ -3993,10 +4667,10 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3993 4667
3994 nfsd4_client_record_create(oo->oo_owner.so_client); 4668 nfsd4_client_record_create(oo->oo_owner.so_client);
3995 status = nfs_ok; 4669 status = nfs_ok;
4670put_stateid:
4671 nfs4_put_stid(&stp->st_stid);
3996out: 4672out:
3997 nfsd4_bump_seqid(cstate, status); 4673 nfsd4_bump_seqid(cstate, status);
3998 if (!cstate->replay_owner)
3999 nfs4_unlock_state();
4000 return status; 4674 return status;
4001} 4675}
4002 4676
@@ -4004,7 +4678,7 @@ static inline void nfs4_stateid_downgrade_bit(struct nfs4_ol_stateid *stp, u32 a
4004{ 4678{
4005 if (!test_access(access, stp)) 4679 if (!test_access(access, stp))
4006 return; 4680 return;
4007 nfs4_file_put_access(stp->st_file, nfs4_access_to_omode(access)); 4681 nfs4_file_put_access(stp->st_stid.sc_file, access);
4008 clear_access(access, stp); 4682 clear_access(access, stp);
4009} 4683}
4010 4684
@@ -4026,16 +4700,6 @@ static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_ac
4026 } 4700 }
4027} 4701}
4028 4702
4029static void
4030reset_union_bmap_deny(unsigned long deny, struct nfs4_ol_stateid *stp)
4031{
4032 int i;
4033 for (i = 0; i < 4; i++) {
4034 if ((i & deny) != i)
4035 clear_deny(i, stp);
4036 }
4037}
4038
4039__be32 4703__be32
4040nfsd4_open_downgrade(struct svc_rqst *rqstp, 4704nfsd4_open_downgrade(struct svc_rqst *rqstp,
4041 struct nfsd4_compound_state *cstate, 4705 struct nfsd4_compound_state *cstate,
@@ -4053,21 +4717,20 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
4053 dprintk("NFSD: %s: od_deleg_want=0x%x ignored\n", __func__, 4717 dprintk("NFSD: %s: od_deleg_want=0x%x ignored\n", __func__,
4054 od->od_deleg_want); 4718 od->od_deleg_want);
4055 4719
4056 nfs4_lock_state();
4057 status = nfs4_preprocess_confirmed_seqid_op(cstate, od->od_seqid, 4720 status = nfs4_preprocess_confirmed_seqid_op(cstate, od->od_seqid,
4058 &od->od_stateid, &stp, nn); 4721 &od->od_stateid, &stp, nn);
4059 if (status) 4722 if (status)
4060 goto out; 4723 goto out;
4061 status = nfserr_inval; 4724 status = nfserr_inval;
4062 if (!test_access(od->od_share_access, stp)) { 4725 if (!test_access(od->od_share_access, stp)) {
4063 dprintk("NFSD: access not a subset current bitmap: 0x%lx, input access=%08x\n", 4726 dprintk("NFSD: access not a subset of current bitmap: 0x%hhx, input access=%08x\n",
4064 stp->st_access_bmap, od->od_share_access); 4727 stp->st_access_bmap, od->od_share_access);
4065 goto out; 4728 goto put_stateid;
4066 } 4729 }
4067 if (!test_deny(od->od_share_deny, stp)) { 4730 if (!test_deny(od->od_share_deny, stp)) {
4068 dprintk("NFSD:deny not a subset current bitmap: 0x%lx, input deny=%08x\n", 4731 dprintk("NFSD: deny not a subset of current bitmap: 0x%hhx, input deny=%08x\n",
4069 stp->st_deny_bmap, od->od_share_deny); 4732 stp->st_deny_bmap, od->od_share_deny);
4070 goto out; 4733 goto put_stateid;
4071 } 4734 }
4072 nfs4_stateid_downgrade(stp, od->od_share_access); 4735 nfs4_stateid_downgrade(stp, od->od_share_access);
4073 4736
@@ -4076,17 +4739,31 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
4076 update_stateid(&stp->st_stid.sc_stateid); 4739 update_stateid(&stp->st_stid.sc_stateid);
4077 memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); 4740 memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
4078 status = nfs_ok; 4741 status = nfs_ok;
4742put_stateid:
4743 nfs4_put_stid(&stp->st_stid);
4079out: 4744out:
4080 nfsd4_bump_seqid(cstate, status); 4745 nfsd4_bump_seqid(cstate, status);
4081 if (!cstate->replay_owner)
4082 nfs4_unlock_state();
4083 return status; 4746 return status;
4084} 4747}
4085 4748
4086static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) 4749static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
4087{ 4750{
4088 unhash_open_stateid(s); 4751 struct nfs4_client *clp = s->st_stid.sc_client;
4752 LIST_HEAD(reaplist);
4753
4089 s->st_stid.sc_type = NFS4_CLOSED_STID; 4754 s->st_stid.sc_type = NFS4_CLOSED_STID;
4755 spin_lock(&clp->cl_lock);
4756 unhash_open_stateid(s, &reaplist);
4757
4758 if (clp->cl_minorversion) {
4759 put_ol_stateid_locked(s, &reaplist);
4760 spin_unlock(&clp->cl_lock);
4761 free_ol_stateid_reaplist(&reaplist);
4762 } else {
4763 spin_unlock(&clp->cl_lock);
4764 free_ol_stateid_reaplist(&reaplist);
4765 move_to_close_lru(s, clp->net);
4766 }
4090} 4767}
4091 4768
4092/* 4769/*
@@ -4097,7 +4774,6 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4097 struct nfsd4_close *close) 4774 struct nfsd4_close *close)
4098{ 4775{
4099 __be32 status; 4776 __be32 status;
4100 struct nfs4_openowner *oo;
4101 struct nfs4_ol_stateid *stp; 4777 struct nfs4_ol_stateid *stp;
4102 struct net *net = SVC_NET(rqstp); 4778 struct net *net = SVC_NET(rqstp);
4103 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 4779 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
@@ -4105,7 +4781,6 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4105 dprintk("NFSD: nfsd4_close on file %pd\n", 4781 dprintk("NFSD: nfsd4_close on file %pd\n",
4106 cstate->current_fh.fh_dentry); 4782 cstate->current_fh.fh_dentry);
4107 4783
4108 nfs4_lock_state();
4109 status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid, 4784 status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid,
4110 &close->cl_stateid, 4785 &close->cl_stateid,
4111 NFS4_OPEN_STID|NFS4_CLOSED_STID, 4786 NFS4_OPEN_STID|NFS4_CLOSED_STID,
@@ -4113,31 +4788,14 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4113 nfsd4_bump_seqid(cstate, status); 4788 nfsd4_bump_seqid(cstate, status);
4114 if (status) 4789 if (status)
4115 goto out; 4790 goto out;
4116 oo = openowner(stp->st_stateowner);
4117 update_stateid(&stp->st_stid.sc_stateid); 4791 update_stateid(&stp->st_stid.sc_stateid);
4118 memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); 4792 memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
4119 4793
4120 nfsd4_close_open_stateid(stp); 4794 nfsd4_close_open_stateid(stp);
4121 4795
4122 if (cstate->minorversion) 4796 /* put reference from nfs4_preprocess_seqid_op */
4123 free_generic_stateid(stp); 4797 nfs4_put_stid(&stp->st_stid);
4124 else
4125 oo->oo_last_closed_stid = stp;
4126
4127 if (list_empty(&oo->oo_owner.so_stateids)) {
4128 if (cstate->minorversion)
4129 release_openowner(oo);
4130 else {
4131 /*
4132 * In the 4.0 case we need to keep the owners around a
4133 * little while to handle CLOSE replay.
4134 */
4135 move_to_close_lru(oo, SVC_NET(rqstp));
4136 }
4137 }
4138out: 4798out:
4139 if (!cstate->replay_owner)
4140 nfs4_unlock_state();
4141 return status; 4799 return status;
4142} 4800}
4143 4801
@@ -4154,28 +4812,24 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4154 if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) 4812 if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
4155 return status; 4813 return status;
4156 4814
4157 nfs4_lock_state(); 4815 status = nfsd4_lookup_stateid(cstate, stateid, NFS4_DELEG_STID, &s, nn);
4158 status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID, &s,
4159 cstate->minorversion, nn);
4160 if (status) 4816 if (status)
4161 goto out; 4817 goto out;
4162 dp = delegstateid(s); 4818 dp = delegstateid(s);
4163 status = check_stateid_generation(stateid, &dp->dl_stid.sc_stateid, nfsd4_has_session(cstate)); 4819 status = check_stateid_generation(stateid, &dp->dl_stid.sc_stateid, nfsd4_has_session(cstate));
4164 if (status) 4820 if (status)
4165 goto out; 4821 goto put_stateid;
4166 4822
4167 destroy_delegation(dp); 4823 destroy_delegation(dp);
4824put_stateid:
4825 nfs4_put_stid(&dp->dl_stid);
4168out: 4826out:
4169 nfs4_unlock_state();
4170
4171 return status; 4827 return status;
4172} 4828}
4173 4829
4174 4830
4175#define LOFF_OVERFLOW(start, len) ((u64)(len) > ~(u64)(start)) 4831#define LOFF_OVERFLOW(start, len) ((u64)(len) > ~(u64)(start))
4176 4832
4177#define LOCKOWNER_INO_HASH_MASK (LOCKOWNER_INO_HASH_SIZE - 1)
4178
4179static inline u64 4833static inline u64
4180end_offset(u64 start, u64 len) 4834end_offset(u64 start, u64 len)
4181{ 4835{
@@ -4196,13 +4850,6 @@ last_byte_offset(u64 start, u64 len)
4196 return end > start ? end - 1: NFS4_MAX_UINT64; 4850 return end > start ? end - 1: NFS4_MAX_UINT64;
4197} 4851}
4198 4852
4199static unsigned int lockowner_ino_hashval(struct inode *inode, u32 cl_id, struct xdr_netobj *ownername)
4200{
4201 return (file_hashval(inode) + cl_id
4202 + opaque_hashval(ownername->data, ownername->len))
4203 & LOCKOWNER_INO_HASH_MASK;
4204}
4205
4206/* 4853/*
4207 * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that 4854 * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that
4208 * we can't properly handle lock requests that go beyond the (2^63 - 1)-th 4855 * we can't properly handle lock requests that go beyond the (2^63 - 1)-th
@@ -4255,47 +4902,56 @@ nevermind:
4255 deny->ld_type = NFS4_WRITE_LT; 4902 deny->ld_type = NFS4_WRITE_LT;
4256} 4903}
4257 4904
4258static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, clientid_t *clid, struct xdr_netobj *owner) 4905static struct nfs4_lockowner *
4906find_lockowner_str_locked(clientid_t *clid, struct xdr_netobj *owner,
4907 struct nfs4_client *clp)
4259{ 4908{
4260 struct nfs4_ol_stateid *lst; 4909 unsigned int strhashval = ownerstr_hashval(owner);
4910 struct nfs4_stateowner *so;
4261 4911
4262 if (!same_owner_str(&lo->lo_owner, owner, clid)) 4912 lockdep_assert_held(&clp->cl_lock);
4263 return false; 4913
4264 if (list_empty(&lo->lo_owner.so_stateids)) { 4914 list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[strhashval],
4265 WARN_ON_ONCE(1); 4915 so_strhash) {
4266 return false; 4916 if (so->so_is_open_owner)
4917 continue;
4918 if (!same_owner_str(so, owner))
4919 continue;
4920 atomic_inc(&so->so_count);
4921 return lockowner(so);
4267 } 4922 }
4268 lst = list_first_entry(&lo->lo_owner.so_stateids, 4923 return NULL;
4269 struct nfs4_ol_stateid, st_perstateowner);
4270 return lst->st_file->fi_inode == inode;
4271} 4924}
4272 4925
4273static struct nfs4_lockowner * 4926static struct nfs4_lockowner *
4274find_lockowner_str(struct inode *inode, clientid_t *clid, 4927find_lockowner_str(clientid_t *clid, struct xdr_netobj *owner,
4275 struct xdr_netobj *owner, struct nfsd_net *nn) 4928 struct nfs4_client *clp)
4276{ 4929{
4277 unsigned int hashval = lockowner_ino_hashval(inode, clid->cl_id, owner);
4278 struct nfs4_lockowner *lo; 4930 struct nfs4_lockowner *lo;
4279 4931
4280 list_for_each_entry(lo, &nn->lockowner_ino_hashtbl[hashval], lo_owner_ino_hash) { 4932 spin_lock(&clp->cl_lock);
4281 if (same_lockowner_ino(lo, inode, clid, owner)) 4933 lo = find_lockowner_str_locked(clid, owner, clp);
4282 return lo; 4934 spin_unlock(&clp->cl_lock);
4283 } 4935 return lo;
4284 return NULL;
4285} 4936}
4286 4937
4287static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp) 4938static void nfs4_unhash_lockowner(struct nfs4_stateowner *sop)
4288{ 4939{
4289 struct inode *inode = open_stp->st_file->fi_inode; 4940 unhash_lockowner_locked(lockowner(sop));
4290 unsigned int inohash = lockowner_ino_hashval(inode, 4941}
4291 clp->cl_clientid.cl_id, &lo->lo_owner.so_owner); 4942
4292 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); 4943static void nfs4_free_lockowner(struct nfs4_stateowner *sop)
4944{
4945 struct nfs4_lockowner *lo = lockowner(sop);
4293 4946
4294 list_add(&lo->lo_owner.so_strhash, &nn->ownerstr_hashtbl[strhashval]); 4947 kmem_cache_free(lockowner_slab, lo);
4295 list_add(&lo->lo_owner_ino_hash, &nn->lockowner_ino_hashtbl[inohash]);
4296 list_add(&lo->lo_perstateid, &open_stp->st_lockowners);
4297} 4948}
4298 4949
4950static const struct nfs4_stateowner_operations lockowner_ops = {
4951 .so_unhash = nfs4_unhash_lockowner,
4952 .so_free = nfs4_free_lockowner,
4953};
4954
4299/* 4955/*
4300 * Alloc a lock owner structure. 4956 * Alloc a lock owner structure.
4301 * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has 4957 * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has
@@ -4303,42 +4959,107 @@ static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, s
4303 * 4959 *
4304 * strhashval = ownerstr_hashval 4960 * strhashval = ownerstr_hashval
4305 */ 4961 */
4306
4307static struct nfs4_lockowner * 4962static struct nfs4_lockowner *
4308alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp, struct nfsd4_lock *lock) { 4963alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp,
4309 struct nfs4_lockowner *lo; 4964 struct nfs4_ol_stateid *open_stp,
4965 struct nfsd4_lock *lock)
4966{
4967 struct nfs4_lockowner *lo, *ret;
4310 4968
4311 lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp); 4969 lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp);
4312 if (!lo) 4970 if (!lo)
4313 return NULL; 4971 return NULL;
4314 INIT_LIST_HEAD(&lo->lo_owner.so_stateids); 4972 INIT_LIST_HEAD(&lo->lo_owner.so_stateids);
4315 lo->lo_owner.so_is_open_owner = 0; 4973 lo->lo_owner.so_is_open_owner = 0;
4316 /* It is the openowner seqid that will be incremented in encode in the 4974 lo->lo_owner.so_seqid = lock->lk_new_lock_seqid;
4317 * case of new lockowners; so increment the lock seqid manually: */ 4975 lo->lo_owner.so_ops = &lockowner_ops;
4318 lo->lo_owner.so_seqid = lock->lk_new_lock_seqid + 1; 4976 spin_lock(&clp->cl_lock);
4319 hash_lockowner(lo, strhashval, clp, open_stp); 4977 ret = find_lockowner_str_locked(&clp->cl_clientid,
4978 &lock->lk_new_owner, clp);
4979 if (ret == NULL) {
4980 list_add(&lo->lo_owner.so_strhash,
4981 &clp->cl_ownerstr_hashtbl[strhashval]);
4982 ret = lo;
4983 } else
4984 nfs4_free_lockowner(&lo->lo_owner);
4985 spin_unlock(&clp->cl_lock);
4320 return lo; 4986 return lo;
4321} 4987}
4322 4988
4323static struct nfs4_ol_stateid * 4989static void
4324alloc_init_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp, struct nfs4_ol_stateid *open_stp) 4990init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo,
4991 struct nfs4_file *fp, struct inode *inode,
4992 struct nfs4_ol_stateid *open_stp)
4325{ 4993{
4326 struct nfs4_ol_stateid *stp;
4327 struct nfs4_client *clp = lo->lo_owner.so_client; 4994 struct nfs4_client *clp = lo->lo_owner.so_client;
4328 4995
4329 stp = nfs4_alloc_stateid(clp); 4996 lockdep_assert_held(&clp->cl_lock);
4330 if (stp == NULL) 4997
4331 return NULL; 4998 atomic_inc(&stp->st_stid.sc_count);
4332 stp->st_stid.sc_type = NFS4_LOCK_STID; 4999 stp->st_stid.sc_type = NFS4_LOCK_STID;
4333 list_add(&stp->st_perfile, &fp->fi_stateids);
4334 list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids);
4335 stp->st_stateowner = &lo->lo_owner; 5000 stp->st_stateowner = &lo->lo_owner;
5001 atomic_inc(&lo->lo_owner.so_count);
4336 get_nfs4_file(fp); 5002 get_nfs4_file(fp);
4337 stp->st_file = fp; 5003 stp->st_stid.sc_file = fp;
5004 stp->st_stid.sc_free = nfs4_free_lock_stateid;
4338 stp->st_access_bmap = 0; 5005 stp->st_access_bmap = 0;
4339 stp->st_deny_bmap = open_stp->st_deny_bmap; 5006 stp->st_deny_bmap = open_stp->st_deny_bmap;
4340 stp->st_openstp = open_stp; 5007 stp->st_openstp = open_stp;
4341 return stp; 5008 list_add(&stp->st_locks, &open_stp->st_locks);
5009 list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids);
5010 spin_lock(&fp->fi_lock);
5011 list_add(&stp->st_perfile, &fp->fi_stateids);
5012 spin_unlock(&fp->fi_lock);
5013}
5014
5015static struct nfs4_ol_stateid *
5016find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp)
5017{
5018 struct nfs4_ol_stateid *lst;
5019 struct nfs4_client *clp = lo->lo_owner.so_client;
5020
5021 lockdep_assert_held(&clp->cl_lock);
5022
5023 list_for_each_entry(lst, &lo->lo_owner.so_stateids, st_perstateowner) {
5024 if (lst->st_stid.sc_file == fp) {
5025 atomic_inc(&lst->st_stid.sc_count);
5026 return lst;
5027 }
5028 }
5029 return NULL;
5030}
5031
5032static struct nfs4_ol_stateid *
5033find_or_create_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fi,
5034 struct inode *inode, struct nfs4_ol_stateid *ost,
5035 bool *new)
5036{
5037 struct nfs4_stid *ns = NULL;
5038 struct nfs4_ol_stateid *lst;
5039 struct nfs4_openowner *oo = openowner(ost->st_stateowner);
5040 struct nfs4_client *clp = oo->oo_owner.so_client;
5041
5042 spin_lock(&clp->cl_lock);
5043 lst = find_lock_stateid(lo, fi);
5044 if (lst == NULL) {
5045 spin_unlock(&clp->cl_lock);
5046 ns = nfs4_alloc_stid(clp, stateid_slab);
5047 if (ns == NULL)
5048 return NULL;
5049
5050 spin_lock(&clp->cl_lock);
5051 lst = find_lock_stateid(lo, fi);
5052 if (likely(!lst)) {
5053 lst = openlockstateid(ns);
5054 init_lock_stateid(lst, lo, fi, inode, ost);
5055 ns = NULL;
5056 *new = true;
5057 }
5058 }
5059 spin_unlock(&clp->cl_lock);
5060 if (ns)
5061 nfs4_put_stid(ns);
5062 return lst;
4342} 5063}
4343 5064
4344static int 5065static int
@@ -4350,46 +5071,53 @@ check_lock_length(u64 offset, u64 length)
4350 5071
4351static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access) 5072static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access)
4352{ 5073{
4353 struct nfs4_file *fp = lock_stp->st_file; 5074 struct nfs4_file *fp = lock_stp->st_stid.sc_file;
4354 int oflag = nfs4_access_to_omode(access); 5075
5076 lockdep_assert_held(&fp->fi_lock);
4355 5077
4356 if (test_access(access, lock_stp)) 5078 if (test_access(access, lock_stp))
4357 return; 5079 return;
4358 nfs4_file_get_access(fp, oflag); 5080 __nfs4_file_get_access(fp, access);
4359 set_access(access, lock_stp); 5081 set_access(access, lock_stp);
4360} 5082}
4361 5083
4362static __be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *ost, struct nfsd4_lock *lock, struct nfs4_ol_stateid **lst, bool *new) 5084static __be32
5085lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,
5086 struct nfs4_ol_stateid *ost,
5087 struct nfsd4_lock *lock,
5088 struct nfs4_ol_stateid **lst, bool *new)
4363{ 5089{
4364 struct nfs4_file *fi = ost->st_file; 5090 __be32 status;
5091 struct nfs4_file *fi = ost->st_stid.sc_file;
4365 struct nfs4_openowner *oo = openowner(ost->st_stateowner); 5092 struct nfs4_openowner *oo = openowner(ost->st_stateowner);
4366 struct nfs4_client *cl = oo->oo_owner.so_client; 5093 struct nfs4_client *cl = oo->oo_owner.so_client;
5094 struct inode *inode = cstate->current_fh.fh_dentry->d_inode;
4367 struct nfs4_lockowner *lo; 5095 struct nfs4_lockowner *lo;
4368 unsigned int strhashval; 5096 unsigned int strhashval;
4369 struct nfsd_net *nn = net_generic(cl->net, nfsd_net_id); 5097
4370 5098 lo = find_lockowner_str(&cl->cl_clientid, &lock->v.new.owner, cl);
4371 lo = find_lockowner_str(fi->fi_inode, &cl->cl_clientid, 5099 if (!lo) {
4372 &lock->v.new.owner, nn); 5100 strhashval = ownerstr_hashval(&lock->v.new.owner);
4373 if (lo) { 5101 lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock);
4374 if (!cstate->minorversion) 5102 if (lo == NULL)
4375 return nfserr_bad_seqid; 5103 return nfserr_jukebox;
4376 /* XXX: a lockowner always has exactly one stateid: */ 5104 } else {
4377 *lst = list_first_entry(&lo->lo_owner.so_stateids, 5105 /* with an existing lockowner, seqids must be the same */
4378 struct nfs4_ol_stateid, st_perstateowner); 5106 status = nfserr_bad_seqid;
4379 return nfs_ok; 5107 if (!cstate->minorversion &&
5108 lock->lk_new_lock_seqid != lo->lo_owner.so_seqid)
5109 goto out;
4380 } 5110 }
4381 strhashval = ownerstr_hashval(cl->cl_clientid.cl_id, 5111
4382 &lock->v.new.owner); 5112 *lst = find_or_create_lock_stateid(lo, fi, inode, ost, new);
4383 lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock);
4384 if (lo == NULL)
4385 return nfserr_jukebox;
4386 *lst = alloc_init_lock_stateid(lo, fi, ost);
4387 if (*lst == NULL) { 5113 if (*lst == NULL) {
4388 release_lockowner(lo); 5114 status = nfserr_jukebox;
4389 return nfserr_jukebox; 5115 goto out;
4390 } 5116 }
4391 *new = true; 5117 status = nfs_ok;
4392 return nfs_ok; 5118out:
5119 nfs4_put_stateowner(&lo->lo_owner);
5120 return status;
4393} 5121}
4394 5122
4395/* 5123/*
@@ -4401,14 +5129,16 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4401{ 5129{
4402 struct nfs4_openowner *open_sop = NULL; 5130 struct nfs4_openowner *open_sop = NULL;
4403 struct nfs4_lockowner *lock_sop = NULL; 5131 struct nfs4_lockowner *lock_sop = NULL;
4404 struct nfs4_ol_stateid *lock_stp; 5132 struct nfs4_ol_stateid *lock_stp = NULL;
5133 struct nfs4_ol_stateid *open_stp = NULL;
5134 struct nfs4_file *fp;
4405 struct file *filp = NULL; 5135 struct file *filp = NULL;
4406 struct file_lock *file_lock = NULL; 5136 struct file_lock *file_lock = NULL;
4407 struct file_lock *conflock = NULL; 5137 struct file_lock *conflock = NULL;
4408 __be32 status = 0; 5138 __be32 status = 0;
4409 bool new_state = false;
4410 int lkflg; 5139 int lkflg;
4411 int err; 5140 int err;
5141 bool new = false;
4412 struct net *net = SVC_NET(rqstp); 5142 struct net *net = SVC_NET(rqstp);
4413 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 5143 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
4414 5144
@@ -4425,11 +5155,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4425 return status; 5155 return status;
4426 } 5156 }
4427 5157
4428 nfs4_lock_state();
4429
4430 if (lock->lk_is_new) { 5158 if (lock->lk_is_new) {
4431 struct nfs4_ol_stateid *open_stp = NULL;
4432
4433 if (nfsd4_has_session(cstate)) 5159 if (nfsd4_has_session(cstate))
4434 /* See rfc 5661 18.10.3: given clientid is ignored: */ 5160 /* See rfc 5661 18.10.3: given clientid is ignored: */
4435 memcpy(&lock->v.new.clientid, 5161 memcpy(&lock->v.new.clientid,
@@ -4453,12 +5179,13 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4453 &lock->v.new.clientid)) 5179 &lock->v.new.clientid))
4454 goto out; 5180 goto out;
4455 status = lookup_or_create_lock_state(cstate, open_stp, lock, 5181 status = lookup_or_create_lock_state(cstate, open_stp, lock,
4456 &lock_stp, &new_state); 5182 &lock_stp, &new);
4457 } else 5183 } else {
4458 status = nfs4_preprocess_seqid_op(cstate, 5184 status = nfs4_preprocess_seqid_op(cstate,
4459 lock->lk_old_lock_seqid, 5185 lock->lk_old_lock_seqid,
4460 &lock->lk_old_lock_stateid, 5186 &lock->lk_old_lock_stateid,
4461 NFS4_LOCK_STID, &lock_stp, nn); 5187 NFS4_LOCK_STID, &lock_stp, nn);
5188 }
4462 if (status) 5189 if (status)
4463 goto out; 5190 goto out;
4464 lock_sop = lockowner(lock_stp->st_stateowner); 5191 lock_sop = lockowner(lock_stp->st_stateowner);
@@ -4482,20 +5209,25 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4482 goto out; 5209 goto out;
4483 } 5210 }
4484 5211
5212 fp = lock_stp->st_stid.sc_file;
4485 locks_init_lock(file_lock); 5213 locks_init_lock(file_lock);
4486 switch (lock->lk_type) { 5214 switch (lock->lk_type) {
4487 case NFS4_READ_LT: 5215 case NFS4_READ_LT:
4488 case NFS4_READW_LT: 5216 case NFS4_READW_LT:
4489 filp = find_readable_file(lock_stp->st_file); 5217 spin_lock(&fp->fi_lock);
5218 filp = find_readable_file_locked(fp);
4490 if (filp) 5219 if (filp)
4491 get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ); 5220 get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ);
5221 spin_unlock(&fp->fi_lock);
4492 file_lock->fl_type = F_RDLCK; 5222 file_lock->fl_type = F_RDLCK;
4493 break; 5223 break;
4494 case NFS4_WRITE_LT: 5224 case NFS4_WRITE_LT:
4495 case NFS4_WRITEW_LT: 5225 case NFS4_WRITEW_LT:
4496 filp = find_writeable_file(lock_stp->st_file); 5226 spin_lock(&fp->fi_lock);
5227 filp = find_writeable_file_locked(fp);
4497 if (filp) 5228 if (filp)
4498 get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE); 5229 get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE);
5230 spin_unlock(&fp->fi_lock);
4499 file_lock->fl_type = F_WRLCK; 5231 file_lock->fl_type = F_WRLCK;
4500 break; 5232 break;
4501 default: 5233 default:
@@ -4544,11 +5276,27 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4544 break; 5276 break;
4545 } 5277 }
4546out: 5278out:
4547 if (status && new_state) 5279 if (filp)
4548 release_lockowner(lock_sop); 5280 fput(filp);
5281 if (lock_stp) {
5282 /* Bump seqid manually if the 4.0 replay owner is openowner */
5283 if (cstate->replay_owner &&
5284 cstate->replay_owner != &lock_sop->lo_owner &&
5285 seqid_mutating_err(ntohl(status)))
5286 lock_sop->lo_owner.so_seqid++;
5287
5288 /*
5289 * If this is a new, never-before-used stateid, and we are
5290 * returning an error, then just go ahead and release it.
5291 */
5292 if (status && new)
5293 release_lock_stateid(lock_stp);
5294
5295 nfs4_put_stid(&lock_stp->st_stid);
5296 }
5297 if (open_stp)
5298 nfs4_put_stid(&open_stp->st_stid);
4549 nfsd4_bump_seqid(cstate, status); 5299 nfsd4_bump_seqid(cstate, status);
4550 if (!cstate->replay_owner)
4551 nfs4_unlock_state();
4552 if (file_lock) 5300 if (file_lock)
4553 locks_free_lock(file_lock); 5301 locks_free_lock(file_lock);
4554 if (conflock) 5302 if (conflock)
@@ -4580,9 +5328,8 @@ __be32
4580nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 5328nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4581 struct nfsd4_lockt *lockt) 5329 struct nfsd4_lockt *lockt)
4582{ 5330{
4583 struct inode *inode;
4584 struct file_lock *file_lock = NULL; 5331 struct file_lock *file_lock = NULL;
4585 struct nfs4_lockowner *lo; 5332 struct nfs4_lockowner *lo = NULL;
4586 __be32 status; 5333 __be32 status;
4587 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); 5334 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
4588 5335
@@ -4592,10 +5339,8 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4592 if (check_lock_length(lockt->lt_offset, lockt->lt_length)) 5339 if (check_lock_length(lockt->lt_offset, lockt->lt_length))
4593 return nfserr_inval; 5340 return nfserr_inval;
4594 5341
4595 nfs4_lock_state();
4596
4597 if (!nfsd4_has_session(cstate)) { 5342 if (!nfsd4_has_session(cstate)) {
4598 status = lookup_clientid(&lockt->lt_clientid, false, nn, NULL); 5343 status = lookup_clientid(&lockt->lt_clientid, cstate, nn);
4599 if (status) 5344 if (status)
4600 goto out; 5345 goto out;
4601 } 5346 }
@@ -4603,7 +5348,6 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4603 if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) 5348 if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
4604 goto out; 5349 goto out;
4605 5350
4606 inode = cstate->current_fh.fh_dentry->d_inode;
4607 file_lock = locks_alloc_lock(); 5351 file_lock = locks_alloc_lock();
4608 if (!file_lock) { 5352 if (!file_lock) {
4609 dprintk("NFSD: %s: unable to allocate lock!\n", __func__); 5353 dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
@@ -4626,7 +5370,8 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4626 goto out; 5370 goto out;
4627 } 5371 }
4628 5372
4629 lo = find_lockowner_str(inode, &lockt->lt_clientid, &lockt->lt_owner, nn); 5373 lo = find_lockowner_str(&lockt->lt_clientid, &lockt->lt_owner,
5374 cstate->clp);
4630 if (lo) 5375 if (lo)
4631 file_lock->fl_owner = (fl_owner_t)lo; 5376 file_lock->fl_owner = (fl_owner_t)lo;
4632 file_lock->fl_pid = current->tgid; 5377 file_lock->fl_pid = current->tgid;
@@ -4646,7 +5391,8 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4646 nfs4_set_lock_denied(file_lock, &lockt->lt_denied); 5391 nfs4_set_lock_denied(file_lock, &lockt->lt_denied);
4647 } 5392 }
4648out: 5393out:
4649 nfs4_unlock_state(); 5394 if (lo)
5395 nfs4_put_stateowner(&lo->lo_owner);
4650 if (file_lock) 5396 if (file_lock)
4651 locks_free_lock(file_lock); 5397 locks_free_lock(file_lock);
4652 return status; 5398 return status;
@@ -4670,23 +5416,21 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4670 if (check_lock_length(locku->lu_offset, locku->lu_length)) 5416 if (check_lock_length(locku->lu_offset, locku->lu_length))
4671 return nfserr_inval; 5417 return nfserr_inval;
4672 5418
4673 nfs4_lock_state();
4674
4675 status = nfs4_preprocess_seqid_op(cstate, locku->lu_seqid, 5419 status = nfs4_preprocess_seqid_op(cstate, locku->lu_seqid,
4676 &locku->lu_stateid, NFS4_LOCK_STID, 5420 &locku->lu_stateid, NFS4_LOCK_STID,
4677 &stp, nn); 5421 &stp, nn);
4678 if (status) 5422 if (status)
4679 goto out; 5423 goto out;
4680 filp = find_any_file(stp->st_file); 5424 filp = find_any_file(stp->st_stid.sc_file);
4681 if (!filp) { 5425 if (!filp) {
4682 status = nfserr_lock_range; 5426 status = nfserr_lock_range;
4683 goto out; 5427 goto put_stateid;
4684 } 5428 }
4685 file_lock = locks_alloc_lock(); 5429 file_lock = locks_alloc_lock();
4686 if (!file_lock) { 5430 if (!file_lock) {
4687 dprintk("NFSD: %s: unable to allocate lock!\n", __func__); 5431 dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
4688 status = nfserr_jukebox; 5432 status = nfserr_jukebox;
4689 goto out; 5433 goto fput;
4690 } 5434 }
4691 locks_init_lock(file_lock); 5435 locks_init_lock(file_lock);
4692 file_lock->fl_type = F_UNLCK; 5436 file_lock->fl_type = F_UNLCK;
@@ -4708,41 +5452,51 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4708 } 5452 }
4709 update_stateid(&stp->st_stid.sc_stateid); 5453 update_stateid(&stp->st_stid.sc_stateid);
4710 memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); 5454 memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
4711 5455fput:
5456 fput(filp);
5457put_stateid:
5458 nfs4_put_stid(&stp->st_stid);
4712out: 5459out:
4713 nfsd4_bump_seqid(cstate, status); 5460 nfsd4_bump_seqid(cstate, status);
4714 if (!cstate->replay_owner)
4715 nfs4_unlock_state();
4716 if (file_lock) 5461 if (file_lock)
4717 locks_free_lock(file_lock); 5462 locks_free_lock(file_lock);
4718 return status; 5463 return status;
4719 5464
4720out_nfserr: 5465out_nfserr:
4721 status = nfserrno(err); 5466 status = nfserrno(err);
4722 goto out; 5467 goto fput;
4723} 5468}
4724 5469
4725/* 5470/*
4726 * returns 5471 * returns
4727 * 1: locks held by lockowner 5472 * true: locks held by lockowner
4728 * 0: no locks held by lockowner 5473 * false: no locks held by lockowner
4729 */ 5474 */
4730static int 5475static bool
4731check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner) 5476check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
4732{ 5477{
4733 struct file_lock **flpp; 5478 struct file_lock **flpp;
4734 struct inode *inode = filp->fi_inode; 5479 int status = false;
4735 int status = 0; 5480 struct file *filp = find_any_file(fp);
5481 struct inode *inode;
5482
5483 if (!filp) {
5484 /* Any valid lock stateid should have some sort of access */
5485 WARN_ON_ONCE(1);
5486 return status;
5487 }
5488
5489 inode = file_inode(filp);
4736 5490
4737 spin_lock(&inode->i_lock); 5491 spin_lock(&inode->i_lock);
4738 for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) { 5492 for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) {
4739 if ((*flpp)->fl_owner == (fl_owner_t)lowner) { 5493 if ((*flpp)->fl_owner == (fl_owner_t)lowner) {
4740 status = 1; 5494 status = true;
4741 goto out; 5495 break;
4742 } 5496 }
4743 } 5497 }
4744out:
4745 spin_unlock(&inode->i_lock); 5498 spin_unlock(&inode->i_lock);
5499 fput(filp);
4746 return status; 5500 return status;
4747} 5501}
4748 5502
@@ -4753,53 +5507,46 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
4753{ 5507{
4754 clientid_t *clid = &rlockowner->rl_clientid; 5508 clientid_t *clid = &rlockowner->rl_clientid;
4755 struct nfs4_stateowner *sop; 5509 struct nfs4_stateowner *sop;
4756 struct nfs4_lockowner *lo; 5510 struct nfs4_lockowner *lo = NULL;
4757 struct nfs4_ol_stateid *stp; 5511 struct nfs4_ol_stateid *stp;
4758 struct xdr_netobj *owner = &rlockowner->rl_owner; 5512 struct xdr_netobj *owner = &rlockowner->rl_owner;
4759 struct list_head matches; 5513 unsigned int hashval = ownerstr_hashval(owner);
4760 unsigned int hashval = ownerstr_hashval(clid->cl_id, owner);
4761 __be32 status; 5514 __be32 status;
4762 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); 5515 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
5516 struct nfs4_client *clp;
4763 5517
4764 dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", 5518 dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
4765 clid->cl_boot, clid->cl_id); 5519 clid->cl_boot, clid->cl_id);
4766 5520
4767 nfs4_lock_state(); 5521 status = lookup_clientid(clid, cstate, nn);
4768
4769 status = lookup_clientid(clid, cstate->minorversion, nn, NULL);
4770 if (status) 5522 if (status)
4771 goto out; 5523 return status;
4772 5524
4773 status = nfserr_locks_held; 5525 clp = cstate->clp;
4774 INIT_LIST_HEAD(&matches); 5526 /* Find the matching lock stateowner */
5527 spin_lock(&clp->cl_lock);
5528 list_for_each_entry(sop, &clp->cl_ownerstr_hashtbl[hashval],
5529 so_strhash) {
4775 5530
4776 list_for_each_entry(sop, &nn->ownerstr_hashtbl[hashval], so_strhash) { 5531 if (sop->so_is_open_owner || !same_owner_str(sop, owner))
4777 if (sop->so_is_open_owner)
4778 continue; 5532 continue;
4779 if (!same_owner_str(sop, owner, clid)) 5533
4780 continue; 5534 /* see if there are still any locks associated with it */
4781 list_for_each_entry(stp, &sop->so_stateids, 5535 lo = lockowner(sop);
4782 st_perstateowner) { 5536 list_for_each_entry(stp, &sop->so_stateids, st_perstateowner) {
4783 lo = lockowner(sop); 5537 if (check_for_locks(stp->st_stid.sc_file, lo)) {
4784 if (check_for_locks(stp->st_file, lo)) 5538 status = nfserr_locks_held;
4785 goto out; 5539 spin_unlock(&clp->cl_lock);
4786 list_add(&lo->lo_list, &matches); 5540 return status;
5541 }
4787 } 5542 }
5543
5544 atomic_inc(&sop->so_count);
5545 break;
4788 } 5546 }
4789 /* Clients probably won't expect us to return with some (but not all) 5547 spin_unlock(&clp->cl_lock);
4790 * of the lockowner state released; so don't release any until all 5548 if (lo)
4791 * have been checked. */
4792 status = nfs_ok;
4793 while (!list_empty(&matches)) {
4794 lo = list_entry(matches.next, struct nfs4_lockowner,
4795 lo_list);
4796 /* unhash_stateowner deletes so_perclient only
4797 * for openowners. */
4798 list_del(&lo->lo_list);
4799 release_lockowner(lo); 5549 release_lockowner(lo);
4800 }
4801out:
4802 nfs4_unlock_state();
4803 return status; 5550 return status;
4804} 5551}
4805 5552
@@ -4887,34 +5634,123 @@ nfsd4_find_reclaim_client(const char *recdir, struct nfsd_net *nn)
4887* Called from OPEN. Look for clientid in reclaim list. 5634* Called from OPEN. Look for clientid in reclaim list.
4888*/ 5635*/
4889__be32 5636__be32
4890nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn) 5637nfs4_check_open_reclaim(clientid_t *clid,
5638 struct nfsd4_compound_state *cstate,
5639 struct nfsd_net *nn)
4891{ 5640{
4892 struct nfs4_client *clp; 5641 __be32 status;
4893 5642
4894 /* find clientid in conf_id_hashtbl */ 5643 /* find clientid in conf_id_hashtbl */
4895 clp = find_confirmed_client(clid, sessions, nn); 5644 status = lookup_clientid(clid, cstate, nn);
4896 if (clp == NULL) 5645 if (status)
4897 return nfserr_reclaim_bad; 5646 return nfserr_reclaim_bad;
4898 5647
4899 return nfsd4_client_record_check(clp) ? nfserr_reclaim_bad : nfs_ok; 5648 if (nfsd4_client_record_check(cstate->clp))
5649 return nfserr_reclaim_bad;
5650
5651 return nfs_ok;
4900} 5652}
4901 5653
4902#ifdef CONFIG_NFSD_FAULT_INJECTION 5654#ifdef CONFIG_NFSD_FAULT_INJECTION
5655static inline void
5656put_client(struct nfs4_client *clp)
5657{
5658 atomic_dec(&clp->cl_refcount);
5659}
4903 5660
4904u64 nfsd_forget_client(struct nfs4_client *clp, u64 max) 5661static struct nfs4_client *
5662nfsd_find_client(struct sockaddr_storage *addr, size_t addr_size)
4905{ 5663{
4906 if (mark_client_expired(clp)) 5664 struct nfs4_client *clp;
4907 return 0; 5665 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
4908 expire_client(clp); 5666 nfsd_net_id);
4909 return 1; 5667
5668 if (!nfsd_netns_ready(nn))
5669 return NULL;
5670
5671 list_for_each_entry(clp, &nn->client_lru, cl_lru) {
5672 if (memcmp(&clp->cl_addr, addr, addr_size) == 0)
5673 return clp;
5674 }
5675 return NULL;
4910} 5676}
4911 5677
4912u64 nfsd_print_client(struct nfs4_client *clp, u64 num) 5678u64
5679nfsd_inject_print_clients(void)
4913{ 5680{
5681 struct nfs4_client *clp;
5682 u64 count = 0;
5683 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
5684 nfsd_net_id);
4914 char buf[INET6_ADDRSTRLEN]; 5685 char buf[INET6_ADDRSTRLEN];
4915 rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf)); 5686
4916 printk(KERN_INFO "NFS Client: %s\n", buf); 5687 if (!nfsd_netns_ready(nn))
4917 return 1; 5688 return 0;
5689
5690 spin_lock(&nn->client_lock);
5691 list_for_each_entry(clp, &nn->client_lru, cl_lru) {
5692 rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf));
5693 pr_info("NFS Client: %s\n", buf);
5694 ++count;
5695 }
5696 spin_unlock(&nn->client_lock);
5697
5698 return count;
5699}
5700
5701u64
5702nfsd_inject_forget_client(struct sockaddr_storage *addr, size_t addr_size)
5703{
5704 u64 count = 0;
5705 struct nfs4_client *clp;
5706 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
5707 nfsd_net_id);
5708
5709 if (!nfsd_netns_ready(nn))
5710 return count;
5711
5712 spin_lock(&nn->client_lock);
5713 clp = nfsd_find_client(addr, addr_size);
5714 if (clp) {
5715 if (mark_client_expired_locked(clp) == nfs_ok)
5716 ++count;
5717 else
5718 clp = NULL;
5719 }
5720 spin_unlock(&nn->client_lock);
5721
5722 if (clp)
5723 expire_client(clp);
5724
5725 return count;
5726}
5727
5728u64
5729nfsd_inject_forget_clients(u64 max)
5730{
5731 u64 count = 0;
5732 struct nfs4_client *clp, *next;
5733 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
5734 nfsd_net_id);
5735 LIST_HEAD(reaplist);
5736
5737 if (!nfsd_netns_ready(nn))
5738 return count;
5739
5740 spin_lock(&nn->client_lock);
5741 list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) {
5742 if (mark_client_expired_locked(clp) == nfs_ok) {
5743 list_add(&clp->cl_lru, &reaplist);
5744 if (max != 0 && ++count >= max)
5745 break;
5746 }
5747 }
5748 spin_unlock(&nn->client_lock);
5749
5750 list_for_each_entry_safe(clp, next, &reaplist, cl_lru)
5751 expire_client(clp);
5752
5753 return count;
4918} 5754}
4919 5755
4920static void nfsd_print_count(struct nfs4_client *clp, unsigned int count, 5756static void nfsd_print_count(struct nfs4_client *clp, unsigned int count,
@@ -4925,158 +5761,484 @@ static void nfsd_print_count(struct nfs4_client *clp, unsigned int count,
4925 printk(KERN_INFO "NFS Client: %s has %u %s\n", buf, count, type); 5761 printk(KERN_INFO "NFS Client: %s has %u %s\n", buf, count, type);
4926} 5762}
4927 5763
4928static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, void (*func)(struct nfs4_lockowner *)) 5764static void
5765nfsd_inject_add_lock_to_list(struct nfs4_ol_stateid *lst,
5766 struct list_head *collect)
5767{
5768 struct nfs4_client *clp = lst->st_stid.sc_client;
5769 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
5770 nfsd_net_id);
5771
5772 if (!collect)
5773 return;
5774
5775 lockdep_assert_held(&nn->client_lock);
5776 atomic_inc(&clp->cl_refcount);
5777 list_add(&lst->st_locks, collect);
5778}
5779
5780static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max,
5781 struct list_head *collect,
5782 void (*func)(struct nfs4_ol_stateid *))
4929{ 5783{
4930 struct nfs4_openowner *oop; 5784 struct nfs4_openowner *oop;
4931 struct nfs4_lockowner *lop, *lo_next;
4932 struct nfs4_ol_stateid *stp, *st_next; 5785 struct nfs4_ol_stateid *stp, *st_next;
5786 struct nfs4_ol_stateid *lst, *lst_next;
4933 u64 count = 0; 5787 u64 count = 0;
4934 5788
5789 spin_lock(&clp->cl_lock);
4935 list_for_each_entry(oop, &clp->cl_openowners, oo_perclient) { 5790 list_for_each_entry(oop, &clp->cl_openowners, oo_perclient) {
4936 list_for_each_entry_safe(stp, st_next, &oop->oo_owner.so_stateids, st_perstateowner) { 5791 list_for_each_entry_safe(stp, st_next,
4937 list_for_each_entry_safe(lop, lo_next, &stp->st_lockowners, lo_perstateid) { 5792 &oop->oo_owner.so_stateids, st_perstateowner) {
4938 if (func) 5793 list_for_each_entry_safe(lst, lst_next,
4939 func(lop); 5794 &stp->st_locks, st_locks) {
4940 if (++count == max) 5795 if (func) {
4941 return count; 5796 func(lst);
5797 nfsd_inject_add_lock_to_list(lst,
5798 collect);
5799 }
5800 ++count;
5801 /*
5802 * Despite the fact that these functions deal
5803 * with 64-bit integers for "count", we must
5804 * ensure that it doesn't blow up the
5805 * clp->cl_refcount. Throw a warning if we
5806 * start to approach INT_MAX here.
5807 */
5808 WARN_ON_ONCE(count == (INT_MAX / 2));
5809 if (count == max)
5810 goto out;
4942 } 5811 }
4943 } 5812 }
4944 } 5813 }
5814out:
5815 spin_unlock(&clp->cl_lock);
4945 5816
4946 return count; 5817 return count;
4947} 5818}
4948 5819
4949u64 nfsd_forget_client_locks(struct nfs4_client *clp, u64 max) 5820static u64
5821nfsd_collect_client_locks(struct nfs4_client *clp, struct list_head *collect,
5822 u64 max)
4950{ 5823{
4951 return nfsd_foreach_client_lock(clp, max, release_lockowner); 5824 return nfsd_foreach_client_lock(clp, max, collect, unhash_lock_stateid);
4952} 5825}
4953 5826
4954u64 nfsd_print_client_locks(struct nfs4_client *clp, u64 max) 5827static u64
5828nfsd_print_client_locks(struct nfs4_client *clp)
4955{ 5829{
4956 u64 count = nfsd_foreach_client_lock(clp, max, NULL); 5830 u64 count = nfsd_foreach_client_lock(clp, 0, NULL, NULL);
4957 nfsd_print_count(clp, count, "locked files"); 5831 nfsd_print_count(clp, count, "locked files");
4958 return count; 5832 return count;
4959} 5833}
4960 5834
4961static u64 nfsd_foreach_client_open(struct nfs4_client *clp, u64 max, void (*func)(struct nfs4_openowner *)) 5835u64
5836nfsd_inject_print_locks(void)
5837{
5838 struct nfs4_client *clp;
5839 u64 count = 0;
5840 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
5841 nfsd_net_id);
5842
5843 if (!nfsd_netns_ready(nn))
5844 return 0;
5845
5846 spin_lock(&nn->client_lock);
5847 list_for_each_entry(clp, &nn->client_lru, cl_lru)
5848 count += nfsd_print_client_locks(clp);
5849 spin_unlock(&nn->client_lock);
5850
5851 return count;
5852}
5853
5854static void
5855nfsd_reap_locks(struct list_head *reaplist)
5856{
5857 struct nfs4_client *clp;
5858 struct nfs4_ol_stateid *stp, *next;
5859
5860 list_for_each_entry_safe(stp, next, reaplist, st_locks) {
5861 list_del_init(&stp->st_locks);
5862 clp = stp->st_stid.sc_client;
5863 nfs4_put_stid(&stp->st_stid);
5864 put_client(clp);
5865 }
5866}
5867
5868u64
5869nfsd_inject_forget_client_locks(struct sockaddr_storage *addr, size_t addr_size)
5870{
5871 unsigned int count = 0;
5872 struct nfs4_client *clp;
5873 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
5874 nfsd_net_id);
5875 LIST_HEAD(reaplist);
5876
5877 if (!nfsd_netns_ready(nn))
5878 return count;
5879
5880 spin_lock(&nn->client_lock);
5881 clp = nfsd_find_client(addr, addr_size);
5882 if (clp)
5883 count = nfsd_collect_client_locks(clp, &reaplist, 0);
5884 spin_unlock(&nn->client_lock);
5885 nfsd_reap_locks(&reaplist);
5886 return count;
5887}
5888
5889u64
5890nfsd_inject_forget_locks(u64 max)
5891{
5892 u64 count = 0;
5893 struct nfs4_client *clp;
5894 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
5895 nfsd_net_id);
5896 LIST_HEAD(reaplist);
5897
5898 if (!nfsd_netns_ready(nn))
5899 return count;
5900
5901 spin_lock(&nn->client_lock);
5902 list_for_each_entry(clp, &nn->client_lru, cl_lru) {
5903 count += nfsd_collect_client_locks(clp, &reaplist, max - count);
5904 if (max != 0 && count >= max)
5905 break;
5906 }
5907 spin_unlock(&nn->client_lock);
5908 nfsd_reap_locks(&reaplist);
5909 return count;
5910}
5911
5912static u64
5913nfsd_foreach_client_openowner(struct nfs4_client *clp, u64 max,
5914 struct list_head *collect,
5915 void (*func)(struct nfs4_openowner *))
4962{ 5916{
4963 struct nfs4_openowner *oop, *next; 5917 struct nfs4_openowner *oop, *next;
5918 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
5919 nfsd_net_id);
4964 u64 count = 0; 5920 u64 count = 0;
4965 5921
5922 lockdep_assert_held(&nn->client_lock);
5923
5924 spin_lock(&clp->cl_lock);
4966 list_for_each_entry_safe(oop, next, &clp->cl_openowners, oo_perclient) { 5925 list_for_each_entry_safe(oop, next, &clp->cl_openowners, oo_perclient) {
4967 if (func) 5926 if (func) {
4968 func(oop); 5927 func(oop);
4969 if (++count == max) 5928 if (collect) {
5929 atomic_inc(&clp->cl_refcount);
5930 list_add(&oop->oo_perclient, collect);
5931 }
5932 }
5933 ++count;
5934 /*
5935 * Despite the fact that these functions deal with
5936 * 64-bit integers for "count", we must ensure that
5937 * it doesn't blow up the clp->cl_refcount. Throw a
5938 * warning if we start to approach INT_MAX here.
5939 */
5940 WARN_ON_ONCE(count == (INT_MAX / 2));
5941 if (count == max)
4970 break; 5942 break;
4971 } 5943 }
5944 spin_unlock(&clp->cl_lock);
4972 5945
4973 return count; 5946 return count;
4974} 5947}
4975 5948
4976u64 nfsd_forget_client_openowners(struct nfs4_client *clp, u64 max) 5949static u64
5950nfsd_print_client_openowners(struct nfs4_client *clp)
4977{ 5951{
4978 return nfsd_foreach_client_open(clp, max, release_openowner); 5952 u64 count = nfsd_foreach_client_openowner(clp, 0, NULL, NULL);
5953
5954 nfsd_print_count(clp, count, "openowners");
5955 return count;
4979} 5956}
4980 5957
4981u64 nfsd_print_client_openowners(struct nfs4_client *clp, u64 max) 5958static u64
5959nfsd_collect_client_openowners(struct nfs4_client *clp,
5960 struct list_head *collect, u64 max)
4982{ 5961{
4983 u64 count = nfsd_foreach_client_open(clp, max, NULL); 5962 return nfsd_foreach_client_openowner(clp, max, collect,
4984 nfsd_print_count(clp, count, "open files"); 5963 unhash_openowner_locked);
4985 return count;
4986} 5964}
4987 5965
4988static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max, 5966u64
4989 struct list_head *victims) 5967nfsd_inject_print_openowners(void)
4990{ 5968{
4991 struct nfs4_delegation *dp, *next; 5969 struct nfs4_client *clp;
4992 u64 count = 0; 5970 u64 count = 0;
5971 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
5972 nfsd_net_id);
5973
5974 if (!nfsd_netns_ready(nn))
5975 return 0;
5976
5977 spin_lock(&nn->client_lock);
5978 list_for_each_entry(clp, &nn->client_lru, cl_lru)
5979 count += nfsd_print_client_openowners(clp);
5980 spin_unlock(&nn->client_lock);
4993 5981
4994 lockdep_assert_held(&state_lock);
4995 list_for_each_entry_safe(dp, next, &clp->cl_delegations, dl_perclnt) {
4996 if (victims)
4997 list_move(&dp->dl_recall_lru, victims);
4998 if (++count == max)
4999 break;
5000 }
5001 return count; 5982 return count;
5002} 5983}
5003 5984
5004u64 nfsd_forget_client_delegations(struct nfs4_client *clp, u64 max) 5985static void
5986nfsd_reap_openowners(struct list_head *reaplist)
5005{ 5987{
5006 struct nfs4_delegation *dp, *next; 5988 struct nfs4_client *clp;
5007 LIST_HEAD(victims); 5989 struct nfs4_openowner *oop, *next;
5008 u64 count;
5009 5990
5010 spin_lock(&state_lock); 5991 list_for_each_entry_safe(oop, next, reaplist, oo_perclient) {
5011 count = nfsd_find_all_delegations(clp, max, &victims); 5992 list_del_init(&oop->oo_perclient);
5012 spin_unlock(&state_lock); 5993 clp = oop->oo_owner.so_client;
5994 release_openowner(oop);
5995 put_client(clp);
5996 }
5997}
5013 5998
5014 list_for_each_entry_safe(dp, next, &victims, dl_recall_lru) 5999u64
5015 revoke_delegation(dp); 6000nfsd_inject_forget_client_openowners(struct sockaddr_storage *addr,
6001 size_t addr_size)
6002{
6003 unsigned int count = 0;
6004 struct nfs4_client *clp;
6005 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
6006 nfsd_net_id);
6007 LIST_HEAD(reaplist);
5016 6008
6009 if (!nfsd_netns_ready(nn))
6010 return count;
6011
6012 spin_lock(&nn->client_lock);
6013 clp = nfsd_find_client(addr, addr_size);
6014 if (clp)
6015 count = nfsd_collect_client_openowners(clp, &reaplist, 0);
6016 spin_unlock(&nn->client_lock);
6017 nfsd_reap_openowners(&reaplist);
5017 return count; 6018 return count;
5018} 6019}
5019 6020
5020u64 nfsd_recall_client_delegations(struct nfs4_client *clp, u64 max) 6021u64
6022nfsd_inject_forget_openowners(u64 max)
5021{ 6023{
5022 struct nfs4_delegation *dp, *next; 6024 u64 count = 0;
5023 LIST_HEAD(victims); 6025 struct nfs4_client *clp;
5024 u64 count; 6026 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
6027 nfsd_net_id);
6028 LIST_HEAD(reaplist);
5025 6029
5026 spin_lock(&state_lock); 6030 if (!nfsd_netns_ready(nn))
5027 count = nfsd_find_all_delegations(clp, max, &victims); 6031 return count;
5028 list_for_each_entry_safe(dp, next, &victims, dl_recall_lru)
5029 nfsd_break_one_deleg(dp);
5030 spin_unlock(&state_lock);
5031 6032
6033 spin_lock(&nn->client_lock);
6034 list_for_each_entry(clp, &nn->client_lru, cl_lru) {
6035 count += nfsd_collect_client_openowners(clp, &reaplist,
6036 max - count);
6037 if (max != 0 && count >= max)
6038 break;
6039 }
6040 spin_unlock(&nn->client_lock);
6041 nfsd_reap_openowners(&reaplist);
5032 return count; 6042 return count;
5033} 6043}
5034 6044
5035u64 nfsd_print_client_delegations(struct nfs4_client *clp, u64 max) 6045static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max,
6046 struct list_head *victims)
5036{ 6047{
6048 struct nfs4_delegation *dp, *next;
6049 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
6050 nfsd_net_id);
5037 u64 count = 0; 6051 u64 count = 0;
5038 6052
6053 lockdep_assert_held(&nn->client_lock);
6054
5039 spin_lock(&state_lock); 6055 spin_lock(&state_lock);
5040 count = nfsd_find_all_delegations(clp, max, NULL); 6056 list_for_each_entry_safe(dp, next, &clp->cl_delegations, dl_perclnt) {
6057 if (victims) {
6058 /*
6059 * It's not safe to mess with delegations that have a
6060 * non-zero dl_time. They might have already been broken
6061 * and could be processed by the laundromat outside of
6062 * the state_lock. Just leave them be.
6063 */
6064 if (dp->dl_time != 0)
6065 continue;
6066
6067 atomic_inc(&clp->cl_refcount);
6068 unhash_delegation_locked(dp);
6069 list_add(&dp->dl_recall_lru, victims);
6070 }
6071 ++count;
6072 /*
6073 * Despite the fact that these functions deal with
6074 * 64-bit integers for "count", we must ensure that
6075 * it doesn't blow up the clp->cl_refcount. Throw a
6076 * warning if we start to approach INT_MAX here.
6077 */
6078 WARN_ON_ONCE(count == (INT_MAX / 2));
6079 if (count == max)
6080 break;
6081 }
5041 spin_unlock(&state_lock); 6082 spin_unlock(&state_lock);
6083 return count;
6084}
6085
6086static u64
6087nfsd_print_client_delegations(struct nfs4_client *clp)
6088{
6089 u64 count = nfsd_find_all_delegations(clp, 0, NULL);
5042 6090
5043 nfsd_print_count(clp, count, "delegations"); 6091 nfsd_print_count(clp, count, "delegations");
5044 return count; 6092 return count;
5045} 6093}
5046 6094
5047u64 nfsd_for_n_state(u64 max, u64 (*func)(struct nfs4_client *, u64)) 6095u64
6096nfsd_inject_print_delegations(void)
5048{ 6097{
5049 struct nfs4_client *clp, *next; 6098 struct nfs4_client *clp;
5050 u64 count = 0; 6099 u64 count = 0;
5051 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, nfsd_net_id); 6100 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
6101 nfsd_net_id);
5052 6102
5053 if (!nfsd_netns_ready(nn)) 6103 if (!nfsd_netns_ready(nn))
5054 return 0; 6104 return 0;
5055 6105
5056 list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) { 6106 spin_lock(&nn->client_lock);
5057 count += func(clp, max - count); 6107 list_for_each_entry(clp, &nn->client_lru, cl_lru)
5058 if ((max != 0) && (count >= max)) 6108 count += nfsd_print_client_delegations(clp);
5059 break; 6109 spin_unlock(&nn->client_lock);
6110
6111 return count;
6112}
6113
6114static void
6115nfsd_forget_delegations(struct list_head *reaplist)
6116{
6117 struct nfs4_client *clp;
6118 struct nfs4_delegation *dp, *next;
6119
6120 list_for_each_entry_safe(dp, next, reaplist, dl_recall_lru) {
6121 list_del_init(&dp->dl_recall_lru);
6122 clp = dp->dl_stid.sc_client;
6123 revoke_delegation(dp);
6124 put_client(clp);
5060 } 6125 }
6126}
5061 6127
6128u64
6129nfsd_inject_forget_client_delegations(struct sockaddr_storage *addr,
6130 size_t addr_size)
6131{
6132 u64 count = 0;
6133 struct nfs4_client *clp;
6134 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
6135 nfsd_net_id);
6136 LIST_HEAD(reaplist);
6137
6138 if (!nfsd_netns_ready(nn))
6139 return count;
6140
6141 spin_lock(&nn->client_lock);
6142 clp = nfsd_find_client(addr, addr_size);
6143 if (clp)
6144 count = nfsd_find_all_delegations(clp, 0, &reaplist);
6145 spin_unlock(&nn->client_lock);
6146
6147 nfsd_forget_delegations(&reaplist);
5062 return count; 6148 return count;
5063} 6149}
5064 6150
5065struct nfs4_client *nfsd_find_client(struct sockaddr_storage *addr, size_t addr_size) 6151u64
6152nfsd_inject_forget_delegations(u64 max)
5066{ 6153{
6154 u64 count = 0;
5067 struct nfs4_client *clp; 6155 struct nfs4_client *clp;
5068 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, nfsd_net_id); 6156 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
6157 nfsd_net_id);
6158 LIST_HEAD(reaplist);
5069 6159
5070 if (!nfsd_netns_ready(nn)) 6160 if (!nfsd_netns_ready(nn))
5071 return NULL; 6161 return count;
5072 6162
6163 spin_lock(&nn->client_lock);
5073 list_for_each_entry(clp, &nn->client_lru, cl_lru) { 6164 list_for_each_entry(clp, &nn->client_lru, cl_lru) {
5074 if (memcmp(&clp->cl_addr, addr, addr_size) == 0) 6165 count += nfsd_find_all_delegations(clp, max - count, &reaplist);
5075 return clp; 6166 if (max != 0 && count >= max)
6167 break;
5076 } 6168 }
5077 return NULL; 6169 spin_unlock(&nn->client_lock);
6170 nfsd_forget_delegations(&reaplist);
6171 return count;
5078} 6172}
5079 6173
6174static void
6175nfsd_recall_delegations(struct list_head *reaplist)
6176{
6177 struct nfs4_client *clp;
6178 struct nfs4_delegation *dp, *next;
6179
6180 list_for_each_entry_safe(dp, next, reaplist, dl_recall_lru) {
6181 list_del_init(&dp->dl_recall_lru);
6182 clp = dp->dl_stid.sc_client;
6183 /*
6184 * We skipped all entries that had a zero dl_time before,
6185 * so we can now reset the dl_time back to 0. If a delegation
6186 * break comes in now, then it won't make any difference since
6187 * we're recalling it either way.
6188 */
6189 spin_lock(&state_lock);
6190 dp->dl_time = 0;
6191 spin_unlock(&state_lock);
6192 nfsd_break_one_deleg(dp);
6193 put_client(clp);
6194 }
6195}
6196
6197u64
6198nfsd_inject_recall_client_delegations(struct sockaddr_storage *addr,
6199 size_t addr_size)
6200{
6201 u64 count = 0;
6202 struct nfs4_client *clp;
6203 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
6204 nfsd_net_id);
6205 LIST_HEAD(reaplist);
6206
6207 if (!nfsd_netns_ready(nn))
6208 return count;
6209
6210 spin_lock(&nn->client_lock);
6211 clp = nfsd_find_client(addr, addr_size);
6212 if (clp)
6213 count = nfsd_find_all_delegations(clp, 0, &reaplist);
6214 spin_unlock(&nn->client_lock);
6215
6216 nfsd_recall_delegations(&reaplist);
6217 return count;
6218}
6219
6220u64
6221nfsd_inject_recall_delegations(u64 max)
6222{
6223 u64 count = 0;
6224 struct nfs4_client *clp, *next;
6225 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
6226 nfsd_net_id);
6227 LIST_HEAD(reaplist);
6228
6229 if (!nfsd_netns_ready(nn))
6230 return count;
6231
6232 spin_lock(&nn->client_lock);
6233 list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) {
6234 count += nfsd_find_all_delegations(clp, max - count, &reaplist);
6235 if (max != 0 && ++count >= max)
6236 break;
6237 }
6238 spin_unlock(&nn->client_lock);
6239 nfsd_recall_delegations(&reaplist);
6240 return count;
6241}
5080#endif /* CONFIG_NFSD_FAULT_INJECTION */ 6242#endif /* CONFIG_NFSD_FAULT_INJECTION */
5081 6243
5082/* 6244/*
@@ -5113,14 +6275,6 @@ static int nfs4_state_create_net(struct net *net)
5113 CLIENT_HASH_SIZE, GFP_KERNEL); 6275 CLIENT_HASH_SIZE, GFP_KERNEL);
5114 if (!nn->unconf_id_hashtbl) 6276 if (!nn->unconf_id_hashtbl)
5115 goto err_unconf_id; 6277 goto err_unconf_id;
5116 nn->ownerstr_hashtbl = kmalloc(sizeof(struct list_head) *
5117 OWNER_HASH_SIZE, GFP_KERNEL);
5118 if (!nn->ownerstr_hashtbl)
5119 goto err_ownerstr;
5120 nn->lockowner_ino_hashtbl = kmalloc(sizeof(struct list_head) *
5121 LOCKOWNER_INO_HASH_SIZE, GFP_KERNEL);
5122 if (!nn->lockowner_ino_hashtbl)
5123 goto err_lockowner_ino;
5124 nn->sessionid_hashtbl = kmalloc(sizeof(struct list_head) * 6278 nn->sessionid_hashtbl = kmalloc(sizeof(struct list_head) *
5125 SESSION_HASH_SIZE, GFP_KERNEL); 6279 SESSION_HASH_SIZE, GFP_KERNEL);
5126 if (!nn->sessionid_hashtbl) 6280 if (!nn->sessionid_hashtbl)
@@ -5130,10 +6284,6 @@ static int nfs4_state_create_net(struct net *net)
5130 INIT_LIST_HEAD(&nn->conf_id_hashtbl[i]); 6284 INIT_LIST_HEAD(&nn->conf_id_hashtbl[i]);
5131 INIT_LIST_HEAD(&nn->unconf_id_hashtbl[i]); 6285 INIT_LIST_HEAD(&nn->unconf_id_hashtbl[i]);
5132 } 6286 }
5133 for (i = 0; i < OWNER_HASH_SIZE; i++)
5134 INIT_LIST_HEAD(&nn->ownerstr_hashtbl[i]);
5135 for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++)
5136 INIT_LIST_HEAD(&nn->lockowner_ino_hashtbl[i]);
5137 for (i = 0; i < SESSION_HASH_SIZE; i++) 6287 for (i = 0; i < SESSION_HASH_SIZE; i++)
5138 INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]); 6288 INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]);
5139 nn->conf_name_tree = RB_ROOT; 6289 nn->conf_name_tree = RB_ROOT;
@@ -5149,10 +6299,6 @@ static int nfs4_state_create_net(struct net *net)
5149 return 0; 6299 return 0;
5150 6300
5151err_sessionid: 6301err_sessionid:
5152 kfree(nn->lockowner_ino_hashtbl);
5153err_lockowner_ino:
5154 kfree(nn->ownerstr_hashtbl);
5155err_ownerstr:
5156 kfree(nn->unconf_id_hashtbl); 6302 kfree(nn->unconf_id_hashtbl);
5157err_unconf_id: 6303err_unconf_id:
5158 kfree(nn->conf_id_hashtbl); 6304 kfree(nn->conf_id_hashtbl);
@@ -5182,8 +6328,6 @@ nfs4_state_destroy_net(struct net *net)
5182 } 6328 }
5183 6329
5184 kfree(nn->sessionid_hashtbl); 6330 kfree(nn->sessionid_hashtbl);
5185 kfree(nn->lockowner_ino_hashtbl);
5186 kfree(nn->ownerstr_hashtbl);
5187 kfree(nn->unconf_id_hashtbl); 6331 kfree(nn->unconf_id_hashtbl);
5188 kfree(nn->conf_id_hashtbl); 6332 kfree(nn->conf_id_hashtbl);
5189 put_net(net); 6333 put_net(net);
@@ -5247,22 +6391,22 @@ nfs4_state_shutdown_net(struct net *net)
5247 cancel_delayed_work_sync(&nn->laundromat_work); 6391 cancel_delayed_work_sync(&nn->laundromat_work);
5248 locks_end_grace(&nn->nfsd4_manager); 6392 locks_end_grace(&nn->nfsd4_manager);
5249 6393
5250 nfs4_lock_state();
5251 INIT_LIST_HEAD(&reaplist); 6394 INIT_LIST_HEAD(&reaplist);
5252 spin_lock(&state_lock); 6395 spin_lock(&state_lock);
5253 list_for_each_safe(pos, next, &nn->del_recall_lru) { 6396 list_for_each_safe(pos, next, &nn->del_recall_lru) {
5254 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); 6397 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
5255 list_move(&dp->dl_recall_lru, &reaplist); 6398 unhash_delegation_locked(dp);
6399 list_add(&dp->dl_recall_lru, &reaplist);
5256 } 6400 }
5257 spin_unlock(&state_lock); 6401 spin_unlock(&state_lock);
5258 list_for_each_safe(pos, next, &reaplist) { 6402 list_for_each_safe(pos, next, &reaplist) {
5259 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); 6403 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
5260 destroy_delegation(dp); 6404 list_del_init(&dp->dl_recall_lru);
6405 nfs4_put_stid(&dp->dl_stid);
5261 } 6406 }
5262 6407
5263 nfsd4_client_tracking_exit(net); 6408 nfsd4_client_tracking_exit(net);
5264 nfs4_state_destroy_net(net); 6409 nfs4_state_destroy_net(net);
5265 nfs4_unlock_state();
5266} 6410}
5267 6411
5268void 6412void
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 944275c8f56d..f9821ce6658a 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -181,28 +181,43 @@ static int zero_clientid(clientid_t *clid)
181} 181}
182 182
183/** 183/**
184 * defer_free - mark an allocation as deferred freed 184 * svcxdr_tmpalloc - allocate memory to be freed after compound processing
185 * @argp: NFSv4 compound argument structure to be freed with 185 * @argp: NFSv4 compound argument structure
186 * @release: release callback to free @p, typically kfree() 186 * @p: pointer to be freed (with kfree())
187 * @p: pointer to be freed
188 * 187 *
189 * Marks @p to be freed when processing the compound operation 188 * Marks @p to be freed when processing the compound operation
190 * described in @argp finishes. 189 * described in @argp finishes.
191 */ 190 */
192static int 191static void *
193defer_free(struct nfsd4_compoundargs *argp, 192svcxdr_tmpalloc(struct nfsd4_compoundargs *argp, u32 len)
194 void (*release)(const void *), void *p)
195{ 193{
196 struct tmpbuf *tb; 194 struct svcxdr_tmpbuf *tb;
197 195
198 tb = kmalloc(sizeof(*tb), GFP_KERNEL); 196 tb = kmalloc(sizeof(*tb) + len, GFP_KERNEL);
199 if (!tb) 197 if (!tb)
200 return -ENOMEM; 198 return NULL;
201 tb->buf = p;
202 tb->release = release;
203 tb->next = argp->to_free; 199 tb->next = argp->to_free;
204 argp->to_free = tb; 200 argp->to_free = tb;
205 return 0; 201 return tb->buf;
202}
203
204/*
205 * For xdr strings that need to be passed to other kernel api's
206 * as null-terminated strings.
207 *
208 * Note null-terminating in place usually isn't safe since the
209 * buffer might end on a page boundary.
210 */
211static char *
212svcxdr_dupstr(struct nfsd4_compoundargs *argp, void *buf, u32 len)
213{
214 char *p = svcxdr_tmpalloc(argp, len + 1);
215
216 if (!p)
217 return NULL;
218 memcpy(p, buf, len);
219 p[len] = '\0';
220 return p;
206} 221}
207 222
208/** 223/**
@@ -217,19 +232,13 @@ defer_free(struct nfsd4_compoundargs *argp,
217 */ 232 */
218static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes) 233static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes)
219{ 234{
220 if (p == argp->tmp) { 235 void *ret;
221 p = kmemdup(argp->tmp, nbytes, GFP_KERNEL); 236
222 if (!p) 237 ret = svcxdr_tmpalloc(argp, nbytes);
223 return NULL; 238 if (!ret)
224 } else {
225 BUG_ON(p != argp->tmpp);
226 argp->tmpp = NULL;
227 }
228 if (defer_free(argp, kfree, p)) {
229 kfree(p);
230 return NULL; 239 return NULL;
231 } else 240 memcpy(ret, p, nbytes);
232 return (char *)p; 241 return ret;
233} 242}
234 243
235static __be32 244static __be32
@@ -292,12 +301,10 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
292 if (nace > NFS4_ACL_MAX) 301 if (nace > NFS4_ACL_MAX)
293 return nfserr_fbig; 302 return nfserr_fbig;
294 303
295 *acl = nfs4_acl_new(nace); 304 *acl = svcxdr_tmpalloc(argp, nfs4_acl_bytes(nace));
296 if (*acl == NULL) 305 if (*acl == NULL)
297 return nfserr_jukebox; 306 return nfserr_jukebox;
298 307
299 defer_free(argp, kfree, *acl);
300
301 (*acl)->naces = nace; 308 (*acl)->naces = nace;
302 for (ace = (*acl)->aces; ace < (*acl)->aces + nace; ace++) { 309 for (ace = (*acl)->aces; ace < (*acl)->aces + nace; ace++) {
303 READ_BUF(16); len += 16; 310 READ_BUF(16); len += 16;
@@ -418,12 +425,10 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
418 return nfserr_badlabel; 425 return nfserr_badlabel;
419 len += (XDR_QUADLEN(dummy32) << 2); 426 len += (XDR_QUADLEN(dummy32) << 2);
420 READMEM(buf, dummy32); 427 READMEM(buf, dummy32);
421 label->data = kzalloc(dummy32 + 1, GFP_KERNEL); 428 label->len = dummy32;
429 label->data = svcxdr_dupstr(argp, buf, dummy32);
422 if (!label->data) 430 if (!label->data)
423 return nfserr_jukebox; 431 return nfserr_jukebox;
424 label->len = dummy32;
425 defer_free(argp, kfree, label->data);
426 memcpy(label->data, buf, dummy32);
427 } 432 }
428#endif 433#endif
429 434
@@ -598,20 +603,11 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create
598 switch (create->cr_type) { 603 switch (create->cr_type) {
599 case NF4LNK: 604 case NF4LNK:
600 READ_BUF(4); 605 READ_BUF(4);
601 create->cr_linklen = be32_to_cpup(p++); 606 create->cr_datalen = be32_to_cpup(p++);
602 READ_BUF(create->cr_linklen); 607 READ_BUF(create->cr_datalen);
603 /* 608 create->cr_data = svcxdr_dupstr(argp, p, create->cr_datalen);
604 * The VFS will want a null-terminated string, and 609 if (!create->cr_data)
605 * null-terminating in place isn't safe since this might
606 * end on a page boundary:
607 */
608 create->cr_linkname =
609 kmalloc(create->cr_linklen + 1, GFP_KERNEL);
610 if (!create->cr_linkname)
611 return nfserr_jukebox; 610 return nfserr_jukebox;
612 memcpy(create->cr_linkname, p, create->cr_linklen);
613 create->cr_linkname[create->cr_linklen] = '\0';
614 defer_free(argp, kfree, create->cr_linkname);
615 break; 611 break;
616 case NF4BLK: 612 case NF4BLK:
617 case NF4CHR: 613 case NF4CHR:
@@ -1481,13 +1477,12 @@ nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_sta
1481 INIT_LIST_HEAD(&test_stateid->ts_stateid_list); 1477 INIT_LIST_HEAD(&test_stateid->ts_stateid_list);
1482 1478
1483 for (i = 0; i < test_stateid->ts_num_ids; i++) { 1479 for (i = 0; i < test_stateid->ts_num_ids; i++) {
1484 stateid = kmalloc(sizeof(struct nfsd4_test_stateid_id), GFP_KERNEL); 1480 stateid = svcxdr_tmpalloc(argp, sizeof(*stateid));
1485 if (!stateid) { 1481 if (!stateid) {
1486 status = nfserrno(-ENOMEM); 1482 status = nfserrno(-ENOMEM);
1487 goto out; 1483 goto out;
1488 } 1484 }
1489 1485
1490 defer_free(argp, kfree, stateid);
1491 INIT_LIST_HEAD(&stateid->ts_id_list); 1486 INIT_LIST_HEAD(&stateid->ts_id_list);
1492 list_add_tail(&stateid->ts_id_list, &test_stateid->ts_stateid_list); 1487 list_add_tail(&stateid->ts_id_list, &test_stateid->ts_stateid_list);
1493 1488
@@ -1640,7 +1635,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
1640 goto xdr_error; 1635 goto xdr_error;
1641 1636
1642 if (argp->opcnt > ARRAY_SIZE(argp->iops)) { 1637 if (argp->opcnt > ARRAY_SIZE(argp->iops)) {
1643 argp->ops = kmalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL); 1638 argp->ops = kzalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL);
1644 if (!argp->ops) { 1639 if (!argp->ops) {
1645 argp->ops = argp->iops; 1640 argp->ops = argp->iops;
1646 dprintk("nfsd: couldn't allocate room for COMPOUND\n"); 1641 dprintk("nfsd: couldn't allocate room for COMPOUND\n");
@@ -3077,11 +3072,8 @@ static __be32 nfsd4_encode_splice_read(
3077 __be32 nfserr; 3072 __be32 nfserr;
3078 __be32 *p = xdr->p - 2; 3073 __be32 *p = xdr->p - 2;
3079 3074
3080 /* 3075 /* Make sure there will be room for padding if needed */
3081 * Don't inline pages unless we know there's room for eof, 3076 if (xdr->end - xdr->p < 1)
3082 * count, and possible padding:
3083 */
3084 if (xdr->end - xdr->p < 3)
3085 return nfserr_resource; 3077 return nfserr_resource;
3086 3078
3087 nfserr = nfsd_splice_read(read->rd_rqstp, file, 3079 nfserr = nfsd_splice_read(read->rd_rqstp, file,
@@ -3147,9 +3139,7 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
3147 len = maxcount; 3139 len = maxcount;
3148 v = 0; 3140 v = 0;
3149 3141
3150 thislen = (void *)xdr->end - (void *)xdr->p; 3142 thislen = min_t(long, len, ((void *)xdr->end - (void *)xdr->p));
3151 if (len < thislen)
3152 thislen = len;
3153 p = xdr_reserve_space(xdr, (thislen+3)&~3); 3143 p = xdr_reserve_space(xdr, (thislen+3)&~3);
3154 WARN_ON_ONCE(!p); 3144 WARN_ON_ONCE(!p);
3155 resp->rqstp->rq_vec[v].iov_base = p; 3145 resp->rqstp->rq_vec[v].iov_base = p;
@@ -3216,10 +3206,8 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
3216 xdr_commit_encode(xdr); 3206 xdr_commit_encode(xdr);
3217 3207
3218 maxcount = svc_max_payload(resp->rqstp); 3208 maxcount = svc_max_payload(resp->rqstp);
3219 if (maxcount > xdr->buf->buflen - xdr->buf->len) 3209 maxcount = min_t(unsigned long, maxcount, (xdr->buf->buflen - xdr->buf->len));
3220 maxcount = xdr->buf->buflen - xdr->buf->len; 3210 maxcount = min_t(unsigned long, maxcount, read->rd_length);
3221 if (maxcount > read->rd_length)
3222 maxcount = read->rd_length;
3223 3211
3224 if (!read->rd_filp) { 3212 if (!read->rd_filp) {
3225 err = nfsd_get_tmp_read_open(resp->rqstp, read->rd_fhp, 3213 err = nfsd_get_tmp_read_open(resp->rqstp, read->rd_fhp,
@@ -3937,8 +3925,6 @@ status:
3937 * 3925 *
3938 * XDR note: do not encode rp->rp_buflen: the buffer contains the 3926 * XDR note: do not encode rp->rp_buflen: the buffer contains the
3939 * previously sent already encoded operation. 3927 * previously sent already encoded operation.
3940 *
3941 * called with nfs4_lock_state() held
3942 */ 3928 */
3943void 3929void
3944nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op) 3930nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op)
@@ -3977,9 +3963,8 @@ int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp)
3977 kfree(args->tmpp); 3963 kfree(args->tmpp);
3978 args->tmpp = NULL; 3964 args->tmpp = NULL;
3979 while (args->to_free) { 3965 while (args->to_free) {
3980 struct tmpbuf *tb = args->to_free; 3966 struct svcxdr_tmpbuf *tb = args->to_free;
3981 args->to_free = tb->next; 3967 args->to_free = tb->next;
3982 tb->release(tb->buf);
3983 kfree(tb); 3968 kfree(tb);
3984 } 3969 }
3985 return 1; 3970 return 1;
@@ -4012,7 +3997,6 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
4012 /* 3997 /*
4013 * All that remains is to write the tag and operation count... 3998 * All that remains is to write the tag and operation count...
4014 */ 3999 */
4015 struct nfsd4_compound_state *cs = &resp->cstate;
4016 struct xdr_buf *buf = resp->xdr.buf; 4000 struct xdr_buf *buf = resp->xdr.buf;
4017 4001
4018 WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len + 4002 WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len +
@@ -4026,19 +4010,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
4026 p += XDR_QUADLEN(resp->taglen); 4010 p += XDR_QUADLEN(resp->taglen);
4027 *p++ = htonl(resp->opcnt); 4011 *p++ = htonl(resp->opcnt);
4028 4012
4029 if (nfsd4_has_session(cs)) { 4013 nfsd4_sequence_done(resp);
4030 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
4031 struct nfs4_client *clp = cs->session->se_client;
4032 if (cs->status != nfserr_replay_cache) {
4033 nfsd4_store_cache_entry(resp);
4034 cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE;
4035 }
4036 /* Renew the clientid on success and on replay */
4037 spin_lock(&nn->client_lock);
4038 nfsd4_put_session(cs->session);
4039 spin_unlock(&nn->client_lock);
4040 put_client_renew(clp);
4041 }
4042 return 1; 4014 return 1;
4043} 4015}
4044 4016
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 6040da8830ff..ff9567633245 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -221,7 +221,12 @@ static void
221hash_refile(struct svc_cacherep *rp) 221hash_refile(struct svc_cacherep *rp)
222{ 222{
223 hlist_del_init(&rp->c_hash); 223 hlist_del_init(&rp->c_hash);
224 hlist_add_head(&rp->c_hash, cache_hash + hash_32(rp->c_xid, maskbits)); 224 /*
225 * No point in byte swapping c_xid since we're just using it to pick
226 * a hash bucket.
227 */
228 hlist_add_head(&rp->c_hash, cache_hash +
229 hash_32((__force u32)rp->c_xid, maskbits));
225} 230}
226 231
227/* 232/*
@@ -356,7 +361,11 @@ nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum)
356 struct hlist_head *rh; 361 struct hlist_head *rh;
357 unsigned int entries = 0; 362 unsigned int entries = 0;
358 363
359 rh = &cache_hash[hash_32(rqstp->rq_xid, maskbits)]; 364 /*
365 * No point in byte swapping rq_xid since we're just using it to pick
366 * a hash bucket.
367 */
368 rh = &cache_hash[hash_32((__force u32)rqstp->rq_xid, maskbits)];
360 hlist_for_each_entry(rp, rh, c_hash) { 369 hlist_for_each_entry(rp, rh, c_hash) {
361 ++entries; 370 ++entries;
362 if (nfsd_cache_match(rqstp, csum, rp)) { 371 if (nfsd_cache_match(rqstp, csum, rp)) {
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 51844048937f..4e042105fb6e 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -39,6 +39,7 @@ enum {
39 NFSD_Versions, 39 NFSD_Versions,
40 NFSD_Ports, 40 NFSD_Ports,
41 NFSD_MaxBlkSize, 41 NFSD_MaxBlkSize,
42 NFSD_MaxConnections,
42 NFSD_SupportedEnctypes, 43 NFSD_SupportedEnctypes,
43 /* 44 /*
44 * The below MUST come last. Otherwise we leave a hole in nfsd_files[] 45 * The below MUST come last. Otherwise we leave a hole in nfsd_files[]
@@ -62,6 +63,7 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size);
62static ssize_t write_versions(struct file *file, char *buf, size_t size); 63static ssize_t write_versions(struct file *file, char *buf, size_t size);
63static ssize_t write_ports(struct file *file, char *buf, size_t size); 64static ssize_t write_ports(struct file *file, char *buf, size_t size);
64static ssize_t write_maxblksize(struct file *file, char *buf, size_t size); 65static ssize_t write_maxblksize(struct file *file, char *buf, size_t size);
66static ssize_t write_maxconn(struct file *file, char *buf, size_t size);
65#ifdef CONFIG_NFSD_V4 67#ifdef CONFIG_NFSD_V4
66static ssize_t write_leasetime(struct file *file, char *buf, size_t size); 68static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
67static ssize_t write_gracetime(struct file *file, char *buf, size_t size); 69static ssize_t write_gracetime(struct file *file, char *buf, size_t size);
@@ -77,6 +79,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
77 [NFSD_Versions] = write_versions, 79 [NFSD_Versions] = write_versions,
78 [NFSD_Ports] = write_ports, 80 [NFSD_Ports] = write_ports,
79 [NFSD_MaxBlkSize] = write_maxblksize, 81 [NFSD_MaxBlkSize] = write_maxblksize,
82 [NFSD_MaxConnections] = write_maxconn,
80#ifdef CONFIG_NFSD_V4 83#ifdef CONFIG_NFSD_V4
81 [NFSD_Leasetime] = write_leasetime, 84 [NFSD_Leasetime] = write_leasetime,
82 [NFSD_Gracetime] = write_gracetime, 85 [NFSD_Gracetime] = write_gracetime,
@@ -369,8 +372,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
369 372
370 if (maxsize < NFS_FHSIZE) 373 if (maxsize < NFS_FHSIZE)
371 return -EINVAL; 374 return -EINVAL;
372 if (maxsize > NFS3_FHSIZE) 375 maxsize = min(maxsize, NFS3_FHSIZE);
373 maxsize = NFS3_FHSIZE;
374 376
375 if (qword_get(&mesg, mesg, size)>0) 377 if (qword_get(&mesg, mesg, size)>0)
376 return -EINVAL; 378 return -EINVAL;
@@ -871,10 +873,8 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
871 /* force bsize into allowed range and 873 /* force bsize into allowed range and
872 * required alignment. 874 * required alignment.
873 */ 875 */
874 if (bsize < 1024) 876 bsize = max_t(int, bsize, 1024);
875 bsize = 1024; 877 bsize = min_t(int, bsize, NFSSVC_MAXBLKSIZE);
876 if (bsize > NFSSVC_MAXBLKSIZE)
877 bsize = NFSSVC_MAXBLKSIZE;
878 bsize &= ~(1024-1); 878 bsize &= ~(1024-1);
879 mutex_lock(&nfsd_mutex); 879 mutex_lock(&nfsd_mutex);
880 if (nn->nfsd_serv) { 880 if (nn->nfsd_serv) {
@@ -889,6 +889,44 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
889 nfsd_max_blksize); 889 nfsd_max_blksize);
890} 890}
891 891
892/**
893 * write_maxconn - Set or report the current max number of connections
894 *
895 * Input:
896 * buf: ignored
897 * size: zero
898 * OR
899 *
900 * Input:
901 * buf: C string containing an unsigned
902 * integer value representing the new
903 * number of max connections
904 * size: non-zero length of C string in @buf
905 * Output:
906 * On success: passed-in buffer filled with '\n'-terminated C string
907 * containing numeric value of max_connections setting
908 * for this net namespace;
909 * return code is the size in bytes of the string
910 * On error: return code is zero or a negative errno value
911 */
912static ssize_t write_maxconn(struct file *file, char *buf, size_t size)
913{
914 char *mesg = buf;
915 struct net *net = file->f_dentry->d_sb->s_fs_info;
916 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
917 unsigned int maxconn = nn->max_connections;
918
919 if (size > 0) {
920 int rv = get_uint(&mesg, &maxconn);
921
922 if (rv)
923 return rv;
924 nn->max_connections = maxconn;
925 }
926
927 return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%u\n", maxconn);
928}
929
892#ifdef CONFIG_NFSD_V4 930#ifdef CONFIG_NFSD_V4
893static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, 931static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size,
894 time_t *time, struct nfsd_net *nn) 932 time_t *time, struct nfsd_net *nn)
@@ -1064,6 +1102,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
1064 [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, 1102 [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},
1065 [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, 1103 [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},
1066 [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, 1104 [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
1105 [NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO},
1067#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE) 1106#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE)
1068 [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO}, 1107 [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO},
1069#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */ 1108#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index ec8393418154..e883a5868be6 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -162,7 +162,14 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
162 /* deprecated, convert to type 3 */ 162 /* deprecated, convert to type 3 */
163 len = key_len(FSID_ENCODE_DEV)/4; 163 len = key_len(FSID_ENCODE_DEV)/4;
164 fh->fh_fsid_type = FSID_ENCODE_DEV; 164 fh->fh_fsid_type = FSID_ENCODE_DEV;
165 fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl(fh->fh_fsid[0]), ntohl(fh->fh_fsid[1]))); 165 /*
166 * struct knfsd_fh uses host-endian fields, which are
167 * sometimes used to hold net-endian values. This
168 * confuses sparse, so we must use __force here to
169 * keep it from complaining.
170 */
171 fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl((__force __be32)fh->fh_fsid[0]),
172 ntohl((__force __be32)fh->fh_fsid[1])));
166 fh->fh_fsid[1] = fh->fh_fsid[2]; 173 fh->fh_fsid[1] = fh->fh_fsid[2];
167 } 174 }
168 data_left -= len; 175 data_left -= len;
@@ -539,8 +546,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
539 dentry); 546 dentry);
540 547
541 fhp->fh_dentry = dget(dentry); /* our internal copy */ 548 fhp->fh_dentry = dget(dentry); /* our internal copy */
542 fhp->fh_export = exp; 549 fhp->fh_export = exp_get(exp);
543 cache_get(&exp->h);
544 550
545 if (fhp->fh_handle.fh_version == 0xca) { 551 if (fhp->fh_handle.fh_version == 0xca) {
546 /* old style filehandle please */ 552 /* old style filehandle please */
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index 2e89e70ac15c..08236d70c667 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -73,8 +73,15 @@ enum fsid_source {
73extern enum fsid_source fsid_source(struct svc_fh *fhp); 73extern enum fsid_source fsid_source(struct svc_fh *fhp);
74 74
75 75
76/* This might look a little large to "inline" but in all calls except 76/*
77 * This might look a little large to "inline" but in all calls except
77 * one, 'vers' is constant so moste of the function disappears. 78 * one, 'vers' is constant so moste of the function disappears.
79 *
80 * In some cases the values are considered to be host endian and in
81 * others, net endian. fsidv is always considered to be u32 as the
82 * callers don't know which it will be. So we must use __force to keep
83 * sparse from complaining. Since these values are opaque to the
84 * client, that shouldn't be a problem.
78 */ 85 */
79static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino, 86static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino,
80 u32 fsid, unsigned char *uuid) 87 u32 fsid, unsigned char *uuid)
@@ -82,7 +89,7 @@ static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino,
82 u32 *up; 89 u32 *up;
83 switch(vers) { 90 switch(vers) {
84 case FSID_DEV: 91 case FSID_DEV:
85 fsidv[0] = htonl((MAJOR(dev)<<16) | 92 fsidv[0] = (__force __u32)htonl((MAJOR(dev)<<16) |
86 MINOR(dev)); 93 MINOR(dev));
87 fsidv[1] = ino_t_to_u32(ino); 94 fsidv[1] = ino_t_to_u32(ino);
88 break; 95 break;
@@ -90,8 +97,8 @@ static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino,
90 fsidv[0] = fsid; 97 fsidv[0] = fsid;
91 break; 98 break;
92 case FSID_MAJOR_MINOR: 99 case FSID_MAJOR_MINOR:
93 fsidv[0] = htonl(MAJOR(dev)); 100 fsidv[0] = (__force __u32)htonl(MAJOR(dev));
94 fsidv[1] = htonl(MINOR(dev)); 101 fsidv[1] = (__force __u32)htonl(MINOR(dev));
95 fsidv[2] = ino_t_to_u32(ino); 102 fsidv[2] = ino_t_to_u32(ino);
96 break; 103 break;
97 104
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 54c6b3d3cc79..b8680738f588 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -403,12 +403,13 @@ nfsd_proc_symlink(struct svc_rqst *rqstp, struct nfsd_symlinkargs *argp,
403 403
404 fh_init(&newfh, NFS_FHSIZE); 404 fh_init(&newfh, NFS_FHSIZE);
405 /* 405 /*
406 * Create the link, look up new file and set attrs. 406 * Crazy hack: the request fits in a page, and already-decoded
407 * attributes follow argp->tname, so it's safe to just write a
408 * null to ensure it's null-terminated:
407 */ 409 */
410 argp->tname[argp->tlen] = '\0';
408 nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen, 411 nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen,
409 argp->tname, argp->tlen, 412 argp->tname, &newfh);
410 &newfh, &argp->attrs);
411
412 413
413 fh_put(&argp->ffh); 414 fh_put(&argp->ffh);
414 fh_put(&newfh); 415 fh_put(&newfh);
@@ -716,6 +717,7 @@ nfserrno (int errno)
716 { nfserr_noent, -ENOENT }, 717 { nfserr_noent, -ENOENT },
717 { nfserr_io, -EIO }, 718 { nfserr_io, -EIO },
718 { nfserr_nxio, -ENXIO }, 719 { nfserr_nxio, -ENXIO },
720 { nfserr_fbig, -E2BIG },
719 { nfserr_acces, -EACCES }, 721 { nfserr_acces, -EACCES },
720 { nfserr_exist, -EEXIST }, 722 { nfserr_exist, -EEXIST },
721 { nfserr_xdev, -EXDEV }, 723 { nfserr_xdev, -EXDEV },
@@ -743,6 +745,7 @@ nfserrno (int errno)
743 { nfserr_notsupp, -EOPNOTSUPP }, 745 { nfserr_notsupp, -EOPNOTSUPP },
744 { nfserr_toosmall, -ETOOSMALL }, 746 { nfserr_toosmall, -ETOOSMALL },
745 { nfserr_serverfault, -ESERVERFAULT }, 747 { nfserr_serverfault, -ESERVERFAULT },
748 { nfserr_serverfault, -ENFILE },
746 }; 749 };
747 int i; 750 int i;
748 751
@@ -750,7 +753,7 @@ nfserrno (int errno)
750 if (nfs_errtbl[i].syserr == errno) 753 if (nfs_errtbl[i].syserr == errno)
751 return nfs_errtbl[i].nfserr; 754 return nfs_errtbl[i].nfserr;
752 } 755 }
753 printk (KERN_INFO "nfsd: non-standard errno: %d\n", errno); 756 WARN(1, "nfsd: non-standard errno: %d\n", errno);
754 return nfserr_io; 757 return nfserr_io;
755} 758}
756 759
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 1879e43f2868..752d56bbe0ba 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -221,7 +221,8 @@ static int nfsd_startup_generic(int nrservs)
221 */ 221 */
222 ret = nfsd_racache_init(2*nrservs); 222 ret = nfsd_racache_init(2*nrservs);
223 if (ret) 223 if (ret)
224 return ret; 224 goto dec_users;
225
225 ret = nfs4_state_start(); 226 ret = nfs4_state_start();
226 if (ret) 227 if (ret)
227 goto out_racache; 228 goto out_racache;
@@ -229,6 +230,8 @@ static int nfsd_startup_generic(int nrservs)
229 230
230out_racache: 231out_racache:
231 nfsd_racache_shutdown(); 232 nfsd_racache_shutdown();
233dec_users:
234 nfsd_users--;
232 return ret; 235 return ret;
233} 236}
234 237
@@ -405,6 +408,7 @@ int nfsd_create_serv(struct net *net)
405 if (nn->nfsd_serv == NULL) 408 if (nn->nfsd_serv == NULL)
406 return -ENOMEM; 409 return -ENOMEM;
407 410
411 nn->nfsd_serv->sv_maxconn = nn->max_connections;
408 error = svc_bind(nn->nfsd_serv, net); 412 error = svc_bind(nn->nfsd_serv, net);
409 if (error < 0) { 413 if (error < 0) {
410 svc_destroy(nn->nfsd_serv); 414 svc_destroy(nn->nfsd_serv);
@@ -469,8 +473,7 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
469 /* enforce a global maximum number of threads */ 473 /* enforce a global maximum number of threads */
470 tot = 0; 474 tot = 0;
471 for (i = 0; i < n; i++) { 475 for (i = 0; i < n; i++) {
472 if (nthreads[i] > NFSD_MAXSERVS) 476 nthreads[i] = min(nthreads[i], NFSD_MAXSERVS);
473 nthreads[i] = NFSD_MAXSERVS;
474 tot += nthreads[i]; 477 tot += nthreads[i];
475 } 478 }
476 if (tot > NFSD_MAXSERVS) { 479 if (tot > NFSD_MAXSERVS) {
@@ -519,11 +522,11 @@ nfsd_svc(int nrservs, struct net *net)
519 522
520 mutex_lock(&nfsd_mutex); 523 mutex_lock(&nfsd_mutex);
521 dprintk("nfsd: creating service\n"); 524 dprintk("nfsd: creating service\n");
522 if (nrservs <= 0) 525
523 nrservs = 0; 526 nrservs = max(nrservs, 0);
524 if (nrservs > NFSD_MAXSERVS) 527 nrservs = min(nrservs, NFSD_MAXSERVS);
525 nrservs = NFSD_MAXSERVS;
526 error = 0; 528 error = 0;
529
527 if (nrservs == 0 && nn->nfsd_serv == NULL) 530 if (nrservs == 0 && nn->nfsd_serv == NULL)
528 goto out; 531 goto out;
529 532
@@ -564,6 +567,7 @@ nfsd(void *vrqstp)
564 struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp; 567 struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp;
565 struct svc_xprt *perm_sock = list_entry(rqstp->rq_server->sv_permsocks.next, typeof(struct svc_xprt), xpt_list); 568 struct svc_xprt *perm_sock = list_entry(rqstp->rq_server->sv_permsocks.next, typeof(struct svc_xprt), xpt_list);
566 struct net *net = perm_sock->xpt_net; 569 struct net *net = perm_sock->xpt_net;
570 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
567 int err; 571 int err;
568 572
569 /* Lock module and set up kernel thread */ 573 /* Lock module and set up kernel thread */
@@ -597,6 +601,9 @@ nfsd(void *vrqstp)
597 * The main request loop 601 * The main request loop
598 */ 602 */
599 for (;;) { 603 for (;;) {
604 /* Update sv_maxconn if it has changed */
605 rqstp->rq_server->sv_maxconn = nn->max_connections;
606
600 /* 607 /*
601 * Find a socket with data available and call its 608 * Find a socket with data available and call its
602 * recvfrom routine. 609 * recvfrom routine.
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 1ac306b769df..412d7061f9e5 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -257,8 +257,7 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
257 len = args->count = ntohl(*p++); 257 len = args->count = ntohl(*p++);
258 p++; /* totalcount - unused */ 258 p++; /* totalcount - unused */
259 259
260 if (len > NFSSVC_MAXBLKSIZE_V2) 260 len = min_t(unsigned int, len, NFSSVC_MAXBLKSIZE_V2);
261 len = NFSSVC_MAXBLKSIZE_V2;
262 261
263 /* set up somewhere to store response. 262 /* set up somewhere to store response.
264 * We take pages, put them on reslist and include in iovec 263 * We take pages, put them on reslist and include in iovec
@@ -268,7 +267,7 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
268 struct page *p = *(rqstp->rq_next_page++); 267 struct page *p = *(rqstp->rq_next_page++);
269 268
270 rqstp->rq_vec[v].iov_base = page_address(p); 269 rqstp->rq_vec[v].iov_base = page_address(p);
271 rqstp->rq_vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE; 270 rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE);
272 len -= rqstp->rq_vec[v].iov_len; 271 len -= rqstp->rq_vec[v].iov_len;
273 v++; 272 v++;
274 } 273 }
@@ -400,9 +399,7 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
400 return 0; 399 return 0;
401 args->cookie = ntohl(*p++); 400 args->cookie = ntohl(*p++);
402 args->count = ntohl(*p++); 401 args->count = ntohl(*p++);
403 if (args->count > PAGE_SIZE) 402 args->count = min_t(u32, args->count, PAGE_SIZE);
404 args->count = PAGE_SIZE;
405
406 args->buffer = page_address(*(rqstp->rq_next_page++)); 403 args->buffer = page_address(*(rqstp->rq_next_page++));
407 404
408 return xdr_argsize_check(rqstp, p); 405 return xdr_argsize_check(rqstp, p);
@@ -516,10 +513,11 @@ nfssvc_encode_entry(void *ccdv, const char *name,
516 } 513 }
517 if (cd->offset) 514 if (cd->offset)
518 *cd->offset = htonl(offset); 515 *cd->offset = htonl(offset);
519 if (namlen > NFS2_MAXNAMLEN)
520 namlen = NFS2_MAXNAMLEN;/* truncate filename */
521 516
517 /* truncate filename */
518 namlen = min(namlen, NFS2_MAXNAMLEN);
522 slen = XDR_QUADLEN(namlen); 519 slen = XDR_QUADLEN(namlen);
520
523 if ((buflen = cd->buflen - slen - 4) < 0) { 521 if ((buflen = cd->buflen - slen - 4) < 0) {
524 cd->common.err = nfserr_toosmall; 522 cd->common.err = nfserr_toosmall;
525 return -EINVAL; 523 return -EINVAL;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 374c66283ac5..4a89e00d7461 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -72,7 +72,13 @@ struct nfsd4_callback {
72 bool cb_done; 72 bool cb_done;
73}; 73};
74 74
75/*
76 * A core object that represents a "common" stateid. These are generally
77 * embedded within the different (more specific) stateid objects and contain
78 * fields that are of general use to any stateid.
79 */
75struct nfs4_stid { 80struct nfs4_stid {
81 atomic_t sc_count;
76#define NFS4_OPEN_STID 1 82#define NFS4_OPEN_STID 1
77#define NFS4_LOCK_STID 2 83#define NFS4_LOCK_STID 2
78#define NFS4_DELEG_STID 4 84#define NFS4_DELEG_STID 4
@@ -80,22 +86,43 @@ struct nfs4_stid {
80#define NFS4_CLOSED_STID 8 86#define NFS4_CLOSED_STID 8
81/* For a deleg stateid kept around only to process free_stateid's: */ 87/* For a deleg stateid kept around only to process free_stateid's: */
82#define NFS4_REVOKED_DELEG_STID 16 88#define NFS4_REVOKED_DELEG_STID 16
89#define NFS4_CLOSED_DELEG_STID 32
83 unsigned char sc_type; 90 unsigned char sc_type;
84 stateid_t sc_stateid; 91 stateid_t sc_stateid;
85 struct nfs4_client *sc_client; 92 struct nfs4_client *sc_client;
93 struct nfs4_file *sc_file;
94 void (*sc_free)(struct nfs4_stid *);
86}; 95};
87 96
97/*
98 * Represents a delegation stateid. The nfs4_client holds references to these
99 * and they are put when it is being destroyed or when the delegation is
100 * returned by the client:
101 *
102 * o 1 reference as long as a delegation is still in force (taken when it's
103 * alloc'd, put when it's returned or revoked)
104 *
105 * o 1 reference as long as a recall rpc is in progress (taken when the lease
106 * is broken, put when the rpc exits)
107 *
108 * o 1 more ephemeral reference for each nfsd thread currently doing something
109 * with that delegation without holding the cl_lock
110 *
111 * If the server attempts to recall a delegation and the client doesn't do so
112 * before a timeout, the server may also revoke the delegation. In that case,
113 * the object will either be destroyed (v4.0) or moved to a per-client list of
114 * revoked delegations (v4.1+).
115 *
116 * This object is a superset of the nfs4_stid.
117 */
88struct nfs4_delegation { 118struct nfs4_delegation {
89 struct nfs4_stid dl_stid; /* must be first field */ 119 struct nfs4_stid dl_stid; /* must be first field */
90 struct list_head dl_perfile; 120 struct list_head dl_perfile;
91 struct list_head dl_perclnt; 121 struct list_head dl_perclnt;
92 struct list_head dl_recall_lru; /* delegation recalled */ 122 struct list_head dl_recall_lru; /* delegation recalled */
93 atomic_t dl_count; /* ref count */
94 struct nfs4_file *dl_file;
95 u32 dl_type; 123 u32 dl_type;
96 time_t dl_time; 124 time_t dl_time;
97/* For recall: */ 125/* For recall: */
98 struct knfsd_fh dl_fh;
99 int dl_retries; 126 int dl_retries;
100 struct nfsd4_callback dl_recall; 127 struct nfsd4_callback dl_recall;
101}; 128};
@@ -194,6 +221,11 @@ struct nfsd4_conn {
194 unsigned char cn_flags; 221 unsigned char cn_flags;
195}; 222};
196 223
224/*
225 * Representation of a v4.1+ session. These are refcounted in a similar fashion
226 * to the nfs4_client. References are only taken when the server is actively
227 * working on the object (primarily during the processing of compounds).
228 */
197struct nfsd4_session { 229struct nfsd4_session {
198 atomic_t se_ref; 230 atomic_t se_ref;
199 struct list_head se_hash; /* hash by sessionid */ 231 struct list_head se_hash; /* hash by sessionid */
@@ -212,8 +244,6 @@ struct nfsd4_session {
212 struct nfsd4_slot *se_slots[]; /* forward channel slots */ 244 struct nfsd4_slot *se_slots[]; /* forward channel slots */
213}; 245};
214 246
215extern void nfsd4_put_session(struct nfsd4_session *ses);
216
217/* formatted contents of nfs4_sessionid */ 247/* formatted contents of nfs4_sessionid */
218struct nfsd4_sessionid { 248struct nfsd4_sessionid {
219 clientid_t clientid; 249 clientid_t clientid;
@@ -225,17 +255,35 @@ struct nfsd4_sessionid {
225 255
226/* 256/*
227 * struct nfs4_client - one per client. Clientids live here. 257 * struct nfs4_client - one per client. Clientids live here.
228 * o Each nfs4_client is hashed by clientid.
229 * 258 *
230 * o Each nfs4_clients is also hashed by name 259 * The initial object created by an NFS client using SETCLIENTID (for NFSv4.0)
231 * (the opaque quantity initially sent by the client to identify itself). 260 * or EXCHANGE_ID (for NFSv4.1+). These objects are refcounted and timestamped.
261 * Each nfsd_net_ns object contains a set of these and they are tracked via
262 * short and long form clientid. They are hashed and searched for under the
263 * per-nfsd_net client_lock spinlock.
264 *
265 * References to it are only held during the processing of compounds, and in
266 * certain other operations. In their "resting state" they have a refcount of
267 * 0. If they are not renewed within a lease period, they become eligible for
268 * destruction by the laundromat.
269 *
270 * These objects can also be destroyed prematurely by the fault injection code,
271 * or if the client sends certain forms of SETCLIENTID or EXCHANGE_ID updates.
272 * Care is taken *not* to do this however when the objects have an elevated
273 * refcount.
274 *
275 * o Each nfs4_client is hashed by clientid
276 *
277 * o Each nfs4_clients is also hashed by name (the opaque quantity initially
278 * sent by the client to identify itself).
232 * 279 *
233 * o cl_perclient list is used to ensure no dangling stateowner references 280 * o cl_perclient list is used to ensure no dangling stateowner references
234 * when we expire the nfs4_client 281 * when we expire the nfs4_client
235 */ 282 */
236struct nfs4_client { 283struct nfs4_client {
237 struct list_head cl_idhash; /* hash by cl_clientid.id */ 284 struct list_head cl_idhash; /* hash by cl_clientid.id */
238 struct rb_node cl_namenode; /* link into by-name trees */ 285 struct rb_node cl_namenode; /* link into by-name trees */
286 struct list_head *cl_ownerstr_hashtbl;
239 struct list_head cl_openowners; 287 struct list_head cl_openowners;
240 struct idr cl_stateids; /* stateid lookup */ 288 struct idr cl_stateids; /* stateid lookup */
241 struct list_head cl_delegations; 289 struct list_head cl_delegations;
@@ -329,21 +377,43 @@ struct nfs4_replay {
329 unsigned int rp_buflen; 377 unsigned int rp_buflen;
330 char *rp_buf; 378 char *rp_buf;
331 struct knfsd_fh rp_openfh; 379 struct knfsd_fh rp_openfh;
380 struct mutex rp_mutex;
332 char rp_ibuf[NFSD4_REPLAY_ISIZE]; 381 char rp_ibuf[NFSD4_REPLAY_ISIZE];
333}; 382};
334 383
384struct nfs4_stateowner;
385
386struct nfs4_stateowner_operations {
387 void (*so_unhash)(struct nfs4_stateowner *);
388 void (*so_free)(struct nfs4_stateowner *);
389};
390
391/*
392 * A core object that represents either an open or lock owner. The object and
393 * lock owner objects have one of these embedded within them. Refcounts and
394 * other fields common to both owner types are contained within these
395 * structures.
396 */
335struct nfs4_stateowner { 397struct nfs4_stateowner {
336 struct list_head so_strhash; /* hash by op_name */ 398 struct list_head so_strhash;
337 struct list_head so_stateids; 399 struct list_head so_stateids;
338 struct nfs4_client * so_client; 400 struct nfs4_client *so_client;
339 /* after increment in ENCODE_SEQID_OP_TAIL, represents the next 401 const struct nfs4_stateowner_operations *so_ops;
402 /* after increment in nfsd4_bump_seqid, represents the next
340 * sequence id expected from the client: */ 403 * sequence id expected from the client: */
341 u32 so_seqid; 404 atomic_t so_count;
342 struct xdr_netobj so_owner; /* open owner name */ 405 u32 so_seqid;
343 struct nfs4_replay so_replay; 406 struct xdr_netobj so_owner; /* open owner name */
344 bool so_is_open_owner; 407 struct nfs4_replay so_replay;
408 bool so_is_open_owner;
345}; 409};
346 410
411/*
412 * When a file is opened, the client provides an open state owner opaque string
413 * that indicates the "owner" of that open. These objects are refcounted.
414 * References to it are held by each open state associated with it. This object
415 * is a superset of the nfs4_stateowner struct.
416 */
347struct nfs4_openowner { 417struct nfs4_openowner {
348 struct nfs4_stateowner oo_owner; /* must be first field */ 418 struct nfs4_stateowner oo_owner; /* must be first field */
349 struct list_head oo_perclient; 419 struct list_head oo_perclient;
@@ -358,15 +428,17 @@ struct nfs4_openowner {
358 struct nfs4_ol_stateid *oo_last_closed_stid; 428 struct nfs4_ol_stateid *oo_last_closed_stid;
359 time_t oo_time; /* time of placement on so_close_lru */ 429 time_t oo_time; /* time of placement on so_close_lru */
360#define NFS4_OO_CONFIRMED 1 430#define NFS4_OO_CONFIRMED 1
361#define NFS4_OO_NEW 4
362 unsigned char oo_flags; 431 unsigned char oo_flags;
363}; 432};
364 433
434/*
435 * Represents a generic "lockowner". Similar to an openowner. References to it
436 * are held by the lock stateids that are created on its behalf. This object is
437 * a superset of the nfs4_stateowner struct (or would be if it needed any extra
438 * fields).
439 */
365struct nfs4_lockowner { 440struct nfs4_lockowner {
366 struct nfs4_stateowner lo_owner; /* must be first element */ 441 struct nfs4_stateowner lo_owner; /* must be first element */
367 struct list_head lo_owner_ino_hash; /* hash by owner,file */
368 struct list_head lo_perstateid;
369 struct list_head lo_list; /* for temporary uses */
370}; 442};
371 443
372static inline struct nfs4_openowner * openowner(struct nfs4_stateowner *so) 444static inline struct nfs4_openowner * openowner(struct nfs4_stateowner *so)
@@ -379,9 +451,17 @@ static inline struct nfs4_lockowner * lockowner(struct nfs4_stateowner *so)
379 return container_of(so, struct nfs4_lockowner, lo_owner); 451 return container_of(so, struct nfs4_lockowner, lo_owner);
380} 452}
381 453
382/* nfs4_file: a file opened by some number of (open) nfs4_stateowners. */ 454/*
455 * nfs4_file: a file opened by some number of (open) nfs4_stateowners.
456 *
457 * These objects are global. nfsd only keeps one instance of a nfs4_file per
458 * inode (though it may keep multiple file descriptors open per inode). These
459 * are tracked in the file_hashtbl which is protected by the state_lock
460 * spinlock.
461 */
383struct nfs4_file { 462struct nfs4_file {
384 atomic_t fi_ref; 463 atomic_t fi_ref;
464 spinlock_t fi_lock;
385 struct hlist_node fi_hash; /* hash by "struct inode *" */ 465 struct hlist_node fi_hash; /* hash by "struct inode *" */
386 struct list_head fi_stateids; 466 struct list_head fi_stateids;
387 struct list_head fi_delegations; 467 struct list_head fi_delegations;
@@ -395,49 +475,36 @@ struct nfs4_file {
395 * + 1 to both of the above if NFS4_SHARE_ACCESS_BOTH is set. 475 * + 1 to both of the above if NFS4_SHARE_ACCESS_BOTH is set.
396 */ 476 */
397 atomic_t fi_access[2]; 477 atomic_t fi_access[2];
478 u32 fi_share_deny;
398 struct file *fi_deleg_file; 479 struct file *fi_deleg_file;
399 struct file_lock *fi_lease; 480 struct file_lock *fi_lease;
400 atomic_t fi_delegees; 481 atomic_t fi_delegees;
401 struct inode *fi_inode; 482 struct knfsd_fh fi_fhandle;
402 bool fi_had_conflict; 483 bool fi_had_conflict;
403}; 484};
404 485
405/* XXX: for first cut may fall back on returning file that doesn't work 486/*
406 * at all? */ 487 * A generic struct representing either a open or lock stateid. The nfs4_client
407static inline struct file *find_writeable_file(struct nfs4_file *f) 488 * holds a reference to each of these objects, and they in turn hold a
408{ 489 * reference to their respective stateowners. The client's reference is
409 if (f->fi_fds[O_WRONLY]) 490 * released in response to a close or unlock (depending on whether it's an open
410 return f->fi_fds[O_WRONLY]; 491 * or lock stateid) or when the client is being destroyed.
411 return f->fi_fds[O_RDWR]; 492 *
412} 493 * In the case of v4.0 open stateids, these objects are preserved for a little
413 494 * while after close in order to handle CLOSE replays. Those are eventually
414static inline struct file *find_readable_file(struct nfs4_file *f) 495 * reclaimed via a LRU scheme by the laundromat.
415{ 496 *
416 if (f->fi_fds[O_RDONLY]) 497 * This object is a superset of the nfs4_stid. "ol" stands for "Open or Lock".
417 return f->fi_fds[O_RDONLY]; 498 * Better suggestions welcome.
418 return f->fi_fds[O_RDWR]; 499 */
419}
420
421static inline struct file *find_any_file(struct nfs4_file *f)
422{
423 if (f->fi_fds[O_RDWR])
424 return f->fi_fds[O_RDWR];
425 else if (f->fi_fds[O_WRONLY])
426 return f->fi_fds[O_WRONLY];
427 else
428 return f->fi_fds[O_RDONLY];
429}
430
431/* "ol" stands for "Open or Lock". Better suggestions welcome. */
432struct nfs4_ol_stateid { 500struct nfs4_ol_stateid {
433 struct nfs4_stid st_stid; /* must be first field */ 501 struct nfs4_stid st_stid; /* must be first field */
434 struct list_head st_perfile; 502 struct list_head st_perfile;
435 struct list_head st_perstateowner; 503 struct list_head st_perstateowner;
436 struct list_head st_lockowners; 504 struct list_head st_locks;
437 struct nfs4_stateowner * st_stateowner; 505 struct nfs4_stateowner * st_stateowner;
438 struct nfs4_file * st_file; 506 unsigned char st_access_bmap;
439 unsigned long st_access_bmap; 507 unsigned char st_deny_bmap;
440 unsigned long st_deny_bmap;
441 struct nfs4_ol_stateid * st_openstp; 508 struct nfs4_ol_stateid * st_openstp;
442}; 509};
443 510
@@ -456,15 +523,16 @@ struct nfsd_net;
456extern __be32 nfs4_preprocess_stateid_op(struct net *net, 523extern __be32 nfs4_preprocess_stateid_op(struct net *net,
457 struct nfsd4_compound_state *cstate, 524 struct nfsd4_compound_state *cstate,
458 stateid_t *stateid, int flags, struct file **filp); 525 stateid_t *stateid, int flags, struct file **filp);
459extern void nfs4_lock_state(void); 526void nfs4_put_stid(struct nfs4_stid *s);
460extern void nfs4_unlock_state(void);
461void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *); 527void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *);
462extern void nfs4_release_reclaim(struct nfsd_net *); 528extern void nfs4_release_reclaim(struct nfsd_net *);
463extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir, 529extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir,
464 struct nfsd_net *nn); 530 struct nfsd_net *nn);
465extern __be32 nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn); 531extern __be32 nfs4_check_open_reclaim(clientid_t *clid,
532 struct nfsd4_compound_state *cstate, struct nfsd_net *nn);
466extern int set_callback_cred(void); 533extern int set_callback_cred(void);
467extern void nfsd4_init_callback(struct nfsd4_callback *); 534void nfsd4_run_cb_null(struct work_struct *w);
535void nfsd4_run_cb_recall(struct work_struct *w);
468extern void nfsd4_probe_callback(struct nfs4_client *clp); 536extern void nfsd4_probe_callback(struct nfs4_client *clp);
469extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); 537extern void nfsd4_probe_callback_sync(struct nfs4_client *clp);
470extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); 538extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
@@ -472,11 +540,10 @@ extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
472extern int nfsd4_create_callback_queue(void); 540extern int nfsd4_create_callback_queue(void);
473extern void nfsd4_destroy_callback_queue(void); 541extern void nfsd4_destroy_callback_queue(void);
474extern void nfsd4_shutdown_callback(struct nfs4_client *); 542extern void nfsd4_shutdown_callback(struct nfs4_client *);
475extern void nfs4_put_delegation(struct nfs4_delegation *dp); 543extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp);
476extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name, 544extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name,
477 struct nfsd_net *nn); 545 struct nfsd_net *nn);
478extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn); 546extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn);
479extern void put_client_renew(struct nfs4_client *clp);
480 547
481/* nfs4recover operations */ 548/* nfs4recover operations */
482extern int nfsd4_client_tracking_init(struct net *net); 549extern int nfsd4_client_tracking_init(struct net *net);
@@ -490,19 +557,24 @@ extern void nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time);
490#ifdef CONFIG_NFSD_FAULT_INJECTION 557#ifdef CONFIG_NFSD_FAULT_INJECTION
491int nfsd_fault_inject_init(void); 558int nfsd_fault_inject_init(void);
492void nfsd_fault_inject_cleanup(void); 559void nfsd_fault_inject_cleanup(void);
493u64 nfsd_for_n_state(u64, u64 (*)(struct nfs4_client *, u64)); 560
494struct nfs4_client *nfsd_find_client(struct sockaddr_storage *, size_t); 561u64 nfsd_inject_print_clients(void);
495 562u64 nfsd_inject_forget_client(struct sockaddr_storage *, size_t);
496u64 nfsd_forget_client(struct nfs4_client *, u64); 563u64 nfsd_inject_forget_clients(u64);
497u64 nfsd_forget_client_locks(struct nfs4_client*, u64); 564
498u64 nfsd_forget_client_openowners(struct nfs4_client *, u64); 565u64 nfsd_inject_print_locks(void);
499u64 nfsd_forget_client_delegations(struct nfs4_client *, u64); 566u64 nfsd_inject_forget_client_locks(struct sockaddr_storage *, size_t);
500u64 nfsd_recall_client_delegations(struct nfs4_client *, u64); 567u64 nfsd_inject_forget_locks(u64);
501 568
502u64 nfsd_print_client(struct nfs4_client *, u64); 569u64 nfsd_inject_print_openowners(void);
503u64 nfsd_print_client_locks(struct nfs4_client *, u64); 570u64 nfsd_inject_forget_client_openowners(struct sockaddr_storage *, size_t);
504u64 nfsd_print_client_openowners(struct nfs4_client *, u64); 571u64 nfsd_inject_forget_openowners(u64);
505u64 nfsd_print_client_delegations(struct nfs4_client *, u64); 572
573u64 nfsd_inject_print_delegations(void);
574u64 nfsd_inject_forget_client_delegations(struct sockaddr_storage *, size_t);
575u64 nfsd_inject_forget_delegations(u64);
576u64 nfsd_inject_recall_client_delegations(struct sockaddr_storage *, size_t);
577u64 nfsd_inject_recall_delegations(u64);
506#else /* CONFIG_NFSD_FAULT_INJECTION */ 578#else /* CONFIG_NFSD_FAULT_INJECTION */
507static inline int nfsd_fault_inject_init(void) { return 0; } 579static inline int nfsd_fault_inject_init(void) { return 0; }
508static inline void nfsd_fault_inject_cleanup(void) {} 580static inline void nfsd_fault_inject_cleanup(void) {}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 140c496f612c..f501a9b5c9df 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -189,8 +189,7 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
189 dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name); 189 dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name);
190 190
191 dparent = fhp->fh_dentry; 191 dparent = fhp->fh_dentry;
192 exp = fhp->fh_export; 192 exp = exp_get(fhp->fh_export);
193 exp_get(exp);
194 193
195 /* Lookup the name, but don't follow links */ 194 /* Lookup the name, but don't follow links */
196 if (isdotent(name, len)) { 195 if (isdotent(name, len)) {
@@ -464,7 +463,7 @@ out_put_write_access:
464 if (size_change) 463 if (size_change)
465 put_write_access(inode); 464 put_write_access(inode);
466 if (!err) 465 if (!err)
467 commit_metadata(fhp); 466 err = nfserrno(commit_metadata(fhp));
468out: 467out:
469 return err; 468 return err;
470} 469}
@@ -820,7 +819,8 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
820 return __splice_from_pipe(pipe, sd, nfsd_splice_actor); 819 return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
821} 820}
822 821
823__be32 nfsd_finish_read(struct file *file, unsigned long *count, int host_err) 822static __be32
823nfsd_finish_read(struct file *file, unsigned long *count, int host_err)
824{ 824{
825 if (host_err >= 0) { 825 if (host_err >= 0) {
826 nfsdstats.io_read += host_err; 826 nfsdstats.io_read += host_err;
@@ -831,7 +831,7 @@ __be32 nfsd_finish_read(struct file *file, unsigned long *count, int host_err)
831 return nfserrno(host_err); 831 return nfserrno(host_err);
832} 832}
833 833
834int nfsd_splice_read(struct svc_rqst *rqstp, 834__be32 nfsd_splice_read(struct svc_rqst *rqstp,
835 struct file *file, loff_t offset, unsigned long *count) 835 struct file *file, loff_t offset, unsigned long *count)
836{ 836{
837 struct splice_desc sd = { 837 struct splice_desc sd = {
@@ -847,7 +847,7 @@ int nfsd_splice_read(struct svc_rqst *rqstp,
847 return nfsd_finish_read(file, count, host_err); 847 return nfsd_finish_read(file, count, host_err);
848} 848}
849 849
850int nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen, 850__be32 nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen,
851 unsigned long *count) 851 unsigned long *count)
852{ 852{
853 mm_segment_t oldfs; 853 mm_segment_t oldfs;
@@ -1121,7 +1121,8 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
1121 iap->ia_valid &= ~(ATTR_UID|ATTR_GID); 1121 iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
1122 if (iap->ia_valid) 1122 if (iap->ia_valid)
1123 return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0); 1123 return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
1124 return 0; 1124 /* Callers expect file metadata to be committed here */
1125 return nfserrno(commit_metadata(resfhp));
1125} 1126}
1126 1127
1127/* HPUX client sometimes creates a file in mode 000, and sets size to 0. 1128/* HPUX client sometimes creates a file in mode 000, and sets size to 0.
@@ -1253,9 +1254,10 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1253 err = nfsd_create_setattr(rqstp, resfhp, iap); 1254 err = nfsd_create_setattr(rqstp, resfhp, iap);
1254 1255
1255 /* 1256 /*
1256 * nfsd_setattr already committed the child. Transactional filesystems 1257 * nfsd_create_setattr already committed the child. Transactional
1257 * had a chance to commit changes for both parent and child 1258 * filesystems had a chance to commit changes for both parent and
1258 * simultaneously making the following commit_metadata a noop. 1259 * child * simultaneously making the following commit_metadata a
1260 * noop.
1259 */ 1261 */
1260 err2 = nfserrno(commit_metadata(fhp)); 1262 err2 = nfserrno(commit_metadata(fhp));
1261 if (err2) 1263 if (err2)
@@ -1426,7 +1428,8 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1426 err = nfsd_create_setattr(rqstp, resfhp, iap); 1428 err = nfsd_create_setattr(rqstp, resfhp, iap);
1427 1429
1428 /* 1430 /*
1429 * nfsd_setattr already committed the child (and possibly also the parent). 1431 * nfsd_create_setattr already committed the child
1432 * (and possibly also the parent).
1430 */ 1433 */
1431 if (!err) 1434 if (!err)
1432 err = nfserrno(commit_metadata(fhp)); 1435 err = nfserrno(commit_metadata(fhp));
@@ -1504,16 +1507,15 @@ out_nfserr:
1504__be32 1507__be32
1505nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, 1508nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
1506 char *fname, int flen, 1509 char *fname, int flen,
1507 char *path, int plen, 1510 char *path,
1508 struct svc_fh *resfhp, 1511 struct svc_fh *resfhp)
1509 struct iattr *iap)
1510{ 1512{
1511 struct dentry *dentry, *dnew; 1513 struct dentry *dentry, *dnew;
1512 __be32 err, cerr; 1514 __be32 err, cerr;
1513 int host_err; 1515 int host_err;
1514 1516
1515 err = nfserr_noent; 1517 err = nfserr_noent;
1516 if (!flen || !plen) 1518 if (!flen || path[0] == '\0')
1517 goto out; 1519 goto out;
1518 err = nfserr_exist; 1520 err = nfserr_exist;
1519 if (isdotent(fname, flen)) 1521 if (isdotent(fname, flen))
@@ -1534,18 +1536,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
1534 if (IS_ERR(dnew)) 1536 if (IS_ERR(dnew))
1535 goto out_nfserr; 1537 goto out_nfserr;
1536 1538
1537 if (unlikely(path[plen] != 0)) { 1539 host_err = vfs_symlink(dentry->d_inode, dnew, path);
1538 char *path_alloced = kmalloc(plen+1, GFP_KERNEL);
1539 if (path_alloced == NULL)
1540 host_err = -ENOMEM;
1541 else {
1542 strncpy(path_alloced, path, plen);
1543 path_alloced[plen] = 0;
1544 host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced);
1545 kfree(path_alloced);
1546 }
1547 } else
1548 host_err = vfs_symlink(dentry->d_inode, dnew, path);
1549 err = nfserrno(host_err); 1540 err = nfserrno(host_err);
1550 if (!err) 1541 if (!err)
1551 err = nfserrno(commit_metadata(fhp)); 1542 err = nfserrno(commit_metadata(fhp));
@@ -2093,8 +2084,7 @@ nfsd_racache_init(int cache_size)
2093 if (raparm_hash[0].pb_head) 2084 if (raparm_hash[0].pb_head)
2094 return 0; 2085 return 0;
2095 nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE); 2086 nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE);
2096 if (nperbucket < 2) 2087 nperbucket = max(2, nperbucket);
2097 nperbucket = 2;
2098 cache_size = nperbucket * RAPARM_HASH_SIZE; 2088 cache_size = nperbucket * RAPARM_HASH_SIZE;
2099 2089
2100 dprintk("nfsd: allocating %d readahead buffers.\n", cache_size); 2090 dprintk("nfsd: allocating %d readahead buffers.\n", cache_size);
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 91b6ae3f658b..c2ff3f14e5f6 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -74,9 +74,9 @@ struct raparms;
74__be32 nfsd_get_tmp_read_open(struct svc_rqst *, struct svc_fh *, 74__be32 nfsd_get_tmp_read_open(struct svc_rqst *, struct svc_fh *,
75 struct file **, struct raparms **); 75 struct file **, struct raparms **);
76void nfsd_put_tmp_read_open(struct file *, struct raparms *); 76void nfsd_put_tmp_read_open(struct file *, struct raparms *);
77int nfsd_splice_read(struct svc_rqst *, 77__be32 nfsd_splice_read(struct svc_rqst *,
78 struct file *, loff_t, unsigned long *); 78 struct file *, loff_t, unsigned long *);
79int nfsd_readv(struct file *, loff_t, struct kvec *, int, 79__be32 nfsd_readv(struct file *, loff_t, struct kvec *, int,
80 unsigned long *); 80 unsigned long *);
81__be32 nfsd_read(struct svc_rqst *, struct svc_fh *, 81__be32 nfsd_read(struct svc_rqst *, struct svc_fh *,
82 loff_t, struct kvec *, int, unsigned long *); 82 loff_t, struct kvec *, int, unsigned long *);
@@ -85,8 +85,8 @@ __be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
85__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *, 85__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *,
86 char *, int *); 86 char *, int *);
87__be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *, 87__be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *,
88 char *name, int len, char *path, int plen, 88 char *name, int len, char *path,
89 struct svc_fh *res, struct iattr *); 89 struct svc_fh *res);
90__be32 nfsd_link(struct svc_rqst *, struct svc_fh *, 90__be32 nfsd_link(struct svc_rqst *, struct svc_fh *,
91 char *, int, struct svc_fh *); 91 char *, int, struct svc_fh *);
92__be32 nfsd_rename(struct svc_rqst *, 92__be32 nfsd_rename(struct svc_rqst *,
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 18cbb6d9c8a9..465e7799742a 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -55,6 +55,7 @@ struct nfsd4_compound_state {
55 struct svc_fh current_fh; 55 struct svc_fh current_fh;
56 struct svc_fh save_fh; 56 struct svc_fh save_fh;
57 struct nfs4_stateowner *replay_owner; 57 struct nfs4_stateowner *replay_owner;
58 struct nfs4_client *clp;
58 /* For sessions DRC */ 59 /* For sessions DRC */
59 struct nfsd4_session *session; 60 struct nfsd4_session *session;
60 struct nfsd4_slot *slot; 61 struct nfsd4_slot *slot;
@@ -107,8 +108,8 @@ struct nfsd4_create {
107 u32 cr_type; /* request */ 108 u32 cr_type; /* request */
108 union { /* request */ 109 union { /* request */
109 struct { 110 struct {
110 u32 namelen; 111 u32 datalen;
111 char *name; 112 char *data;
112 } link; /* NF4LNK */ 113 } link; /* NF4LNK */
113 struct { 114 struct {
114 u32 specdata1; 115 u32 specdata1;
@@ -121,8 +122,8 @@ struct nfsd4_create {
121 struct nfs4_acl *cr_acl; 122 struct nfs4_acl *cr_acl;
122 struct xdr_netobj cr_label; 123 struct xdr_netobj cr_label;
123}; 124};
124#define cr_linklen u.link.namelen 125#define cr_datalen u.link.datalen
125#define cr_linkname u.link.name 126#define cr_data u.link.data
126#define cr_specdata1 u.dev.specdata1 127#define cr_specdata1 u.dev.specdata1
127#define cr_specdata2 u.dev.specdata2 128#define cr_specdata2 u.dev.specdata2
128 129
@@ -478,6 +479,14 @@ struct nfsd4_op {
478 479
479bool nfsd4_cache_this_op(struct nfsd4_op *); 480bool nfsd4_cache_this_op(struct nfsd4_op *);
480 481
482/*
483 * Memory needed just for the duration of processing one compound:
484 */
485struct svcxdr_tmpbuf {
486 struct svcxdr_tmpbuf *next;
487 char buf[];
488};
489
481struct nfsd4_compoundargs { 490struct nfsd4_compoundargs {
482 /* scratch variables for XDR decode */ 491 /* scratch variables for XDR decode */
483 __be32 * p; 492 __be32 * p;
@@ -486,11 +495,7 @@ struct nfsd4_compoundargs {
486 int pagelen; 495 int pagelen;
487 __be32 tmp[8]; 496 __be32 tmp[8];
488 __be32 * tmpp; 497 __be32 * tmpp;
489 struct tmpbuf { 498 struct svcxdr_tmpbuf *to_free;
490 struct tmpbuf *next;
491 void (*release)(const void *);
492 void *buf;
493 } *to_free;
494 499
495 struct svc_rqst *rqstp; 500 struct svc_rqst *rqstp;
496 501
@@ -574,7 +579,6 @@ extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
574extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp, 579extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
575 struct nfsd4_compound_state *, 580 struct nfsd4_compound_state *,
576 struct nfsd4_setclientid_confirm *setclientid_confirm); 581 struct nfsd4_setclientid_confirm *setclientid_confirm);
577extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp);
578extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp, 582extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
579 struct nfsd4_compound_state *, struct nfsd4_exchange_id *); 583 struct nfsd4_compound_state *, struct nfsd4_exchange_id *);
580extern __be32 nfsd4_backchannel_ctl(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_backchannel_ctl *); 584extern __be32 nfsd4_backchannel_ctl(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_backchannel_ctl *);
@@ -585,6 +589,7 @@ extern __be32 nfsd4_create_session(struct svc_rqst *,
585extern __be32 nfsd4_sequence(struct svc_rqst *, 589extern __be32 nfsd4_sequence(struct svc_rqst *,
586 struct nfsd4_compound_state *, 590 struct nfsd4_compound_state *,
587 struct nfsd4_sequence *); 591 struct nfsd4_sequence *);
592extern void nfsd4_sequence_done(struct nfsd4_compoundres *resp);
588extern __be32 nfsd4_destroy_session(struct svc_rqst *, 593extern __be32 nfsd4_destroy_session(struct svc_rqst *,
589 struct nfsd4_compound_state *, 594 struct nfsd4_compound_state *,
590 struct nfsd4_destroy_session *); 595 struct nfsd4_destroy_session *);
@@ -594,7 +599,9 @@ extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
594 struct nfsd4_open *open, struct nfsd_net *nn); 599 struct nfsd4_open *open, struct nfsd_net *nn);
595extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp, 600extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
596 struct svc_fh *current_fh, struct nfsd4_open *open); 601 struct svc_fh *current_fh, struct nfsd4_open *open);
597extern void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status); 602extern void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate);
603extern void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate,
604 struct nfsd4_open *open, __be32 status);
598extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp, 605extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp,
599 struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc); 606 struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc);
600extern __be32 nfsd4_close(struct svc_rqst *rqstp, 607extern __be32 nfsd4_close(struct svc_rqst *rqstp,
@@ -625,6 +632,7 @@ extern __be32 nfsd4_test_stateid(struct svc_rqst *rqstp,
625extern __be32 nfsd4_free_stateid(struct svc_rqst *rqstp, 632extern __be32 nfsd4_free_stateid(struct svc_rqst *rqstp,
626 struct nfsd4_compound_state *, struct nfsd4_free_stateid *free_stateid); 633 struct nfsd4_compound_state *, struct nfsd4_free_stateid *free_stateid);
627extern void nfsd4_bump_seqid(struct nfsd4_compound_state *, __be32 nfserr); 634extern void nfsd4_bump_seqid(struct nfsd4_compound_state *, __be32 nfserr);
635
628#endif 636#endif
629 637
630/* 638/*
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 239493ec718e..7151ea428041 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -23,6 +23,7 @@ proc-y += version.o
23proc-y += softirqs.o 23proc-y += softirqs.o
24proc-y += namespaces.o 24proc-y += namespaces.o
25proc-y += self.o 25proc-y += self.o
26proc-y += thread_self.o
26proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o 27proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o
27proc-$(CONFIG_NET) += proc_net.o 28proc-$(CONFIG_NET) += proc_net.o
28proc-$(CONFIG_PROC_KCORE) += kcore.o 29proc-$(CONFIG_PROC_KCORE) += kcore.o
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 043c83cb51f9..baf852b648ad 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2814,7 +2814,7 @@ retry:
2814 return iter; 2814 return iter;
2815} 2815}
2816 2816
2817#define TGID_OFFSET (FIRST_PROCESS_ENTRY + 1) 2817#define TGID_OFFSET (FIRST_PROCESS_ENTRY + 2)
2818 2818
2819/* for the /proc/ directory itself, after non-process stuff has been done */ 2819/* for the /proc/ directory itself, after non-process stuff has been done */
2820int proc_pid_readdir(struct file *file, struct dir_context *ctx) 2820int proc_pid_readdir(struct file *file, struct dir_context *ctx)
@@ -2826,14 +2826,19 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
2826 if (pos >= PID_MAX_LIMIT + TGID_OFFSET) 2826 if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
2827 return 0; 2827 return 0;
2828 2828
2829 if (pos == TGID_OFFSET - 1) { 2829 if (pos == TGID_OFFSET - 2) {
2830 struct inode *inode = ns->proc_self->d_inode; 2830 struct inode *inode = ns->proc_self->d_inode;
2831 if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK)) 2831 if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK))
2832 return 0; 2832 return 0;
2833 iter.tgid = 0; 2833 ctx->pos = pos = pos + 1;
2834 } else { 2834 }
2835 iter.tgid = pos - TGID_OFFSET; 2835 if (pos == TGID_OFFSET - 1) {
2836 struct inode *inode = ns->proc_thread_self->d_inode;
2837 if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK))
2838 return 0;
2839 ctx->pos = pos = pos + 1;
2836 } 2840 }
2841 iter.tgid = pos - TGID_OFFSET;
2837 iter.task = NULL; 2842 iter.task = NULL;
2838 for (iter = next_tgid(ns, iter); 2843 for (iter = next_tgid(ns, iter);
2839 iter.task; 2844 iter.task;
@@ -2862,6 +2867,9 @@ static const struct pid_entry tid_base_stuff[] = {
2862 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), 2867 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
2863 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), 2868 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
2864 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), 2869 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
2870#ifdef CONFIG_NET
2871 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
2872#endif
2865 REG("environ", S_IRUSR, proc_environ_operations), 2873 REG("environ", S_IRUSR, proc_environ_operations),
2866 ONE("auxv", S_IRUSR, proc_pid_auxv), 2874 ONE("auxv", S_IRUSR, proc_pid_auxv),
2867 ONE("status", S_IRUGO, proc_pid_status), 2875 ONE("status", S_IRUGO, proc_pid_status),
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 0adbc02d60e3..333080d7a671 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -442,6 +442,7 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
442int proc_fill_super(struct super_block *s) 442int proc_fill_super(struct super_block *s)
443{ 443{
444 struct inode *root_inode; 444 struct inode *root_inode;
445 int ret;
445 446
446 s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC; 447 s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC;
447 s->s_blocksize = 1024; 448 s->s_blocksize = 1024;
@@ -463,5 +464,9 @@ int proc_fill_super(struct super_block *s)
463 return -ENOMEM; 464 return -ENOMEM;
464 } 465 }
465 466
466 return proc_setup_self(s); 467 ret = proc_setup_self(s);
468 if (ret) {
469 return ret;
470 }
471 return proc_setup_thread_self(s);
467} 472}
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index a024cf7b260f..7da13e49128a 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -231,6 +231,12 @@ static inline int proc_net_init(void) { return 0; }
231extern int proc_setup_self(struct super_block *); 231extern int proc_setup_self(struct super_block *);
232 232
233/* 233/*
234 * proc_thread_self.c
235 */
236extern int proc_setup_thread_self(struct super_block *);
237extern void proc_thread_self_init(void);
238
239/*
234 * proc_sysctl.c 240 * proc_sysctl.c
235 */ 241 */
236#ifdef CONFIG_PROC_SYSCTL 242#ifdef CONFIG_PROC_SYSCTL
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 4677bb7dc7c2..39481028ec08 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -113,9 +113,11 @@ static struct net *get_proc_task_net(struct inode *dir)
113 rcu_read_lock(); 113 rcu_read_lock();
114 task = pid_task(proc_pid(dir), PIDTYPE_PID); 114 task = pid_task(proc_pid(dir), PIDTYPE_PID);
115 if (task != NULL) { 115 if (task != NULL) {
116 ns = task_nsproxy(task); 116 task_lock(task);
117 ns = task->nsproxy;
117 if (ns != NULL) 118 if (ns != NULL)
118 net = get_net(ns->net_ns); 119 net = get_net(ns->net_ns);
120 task_unlock(task);
119 } 121 }
120 rcu_read_unlock(); 122 rcu_read_unlock();
121 123
@@ -224,7 +226,7 @@ static struct pernet_operations __net_initdata proc_net_ns_ops = {
224 226
225int __init proc_net_init(void) 227int __init proc_net_init(void)
226{ 228{
227 proc_symlink("net", NULL, "self/net"); 229 proc_symlink("net", NULL, "thread-self/net");
228 230
229 return register_pernet_subsys(&proc_net_ns_ops); 231 return register_pernet_subsys(&proc_net_ns_ops);
230} 232}
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 574bafc41f0b..6296c7626963 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -149,6 +149,8 @@ static void proc_kill_sb(struct super_block *sb)
149 ns = (struct pid_namespace *)sb->s_fs_info; 149 ns = (struct pid_namespace *)sb->s_fs_info;
150 if (ns->proc_self) 150 if (ns->proc_self)
151 dput(ns->proc_self); 151 dput(ns->proc_self);
152 if (ns->proc_thread_self)
153 dput(ns->proc_thread_self);
152 kill_anon_super(sb); 154 kill_anon_super(sb);
153 put_pid_ns(ns); 155 put_pid_ns(ns);
154} 156}
@@ -170,7 +172,8 @@ void __init proc_root_init(void)
170 return; 172 return;
171 173
172 proc_self_init(); 174 proc_self_init();
173 proc_symlink("mounts", NULL, "self/mounts"); 175 proc_thread_self_init();
176 proc_symlink("mounts", NULL, "thread-self/mounts");
174 177
175 proc_net_init(); 178 proc_net_init();
176 179
diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c
new file mode 100644
index 000000000000..59075b509df3
--- /dev/null
+++ b/fs/proc/thread_self.c
@@ -0,0 +1,85 @@
1#include <linux/sched.h>
2#include <linux/namei.h>
3#include <linux/slab.h>
4#include <linux/pid_namespace.h>
5#include "internal.h"
6
7/*
8 * /proc/thread_self:
9 */
10static int proc_thread_self_readlink(struct dentry *dentry, char __user *buffer,
11 int buflen)
12{
13 struct pid_namespace *ns = dentry->d_sb->s_fs_info;
14 pid_t tgid = task_tgid_nr_ns(current, ns);
15 pid_t pid = task_pid_nr_ns(current, ns);
16 char tmp[PROC_NUMBUF + 6 + PROC_NUMBUF];
17 if (!pid)
18 return -ENOENT;
19 sprintf(tmp, "%d/task/%d", tgid, pid);
20 return readlink_copy(buffer, buflen, tmp);
21}
22
23static void *proc_thread_self_follow_link(struct dentry *dentry, struct nameidata *nd)
24{
25 struct pid_namespace *ns = dentry->d_sb->s_fs_info;
26 pid_t tgid = task_tgid_nr_ns(current, ns);
27 pid_t pid = task_pid_nr_ns(current, ns);
28 char *name = ERR_PTR(-ENOENT);
29 if (pid) {
30 name = kmalloc(PROC_NUMBUF + 6 + PROC_NUMBUF, GFP_KERNEL);
31 if (!name)
32 name = ERR_PTR(-ENOMEM);
33 else
34 sprintf(name, "%d/task/%d", tgid, pid);
35 }
36 nd_set_link(nd, name);
37 return NULL;
38}
39
40static const struct inode_operations proc_thread_self_inode_operations = {
41 .readlink = proc_thread_self_readlink,
42 .follow_link = proc_thread_self_follow_link,
43 .put_link = kfree_put_link,
44};
45
46static unsigned thread_self_inum;
47
48int proc_setup_thread_self(struct super_block *s)
49{
50 struct inode *root_inode = s->s_root->d_inode;
51 struct pid_namespace *ns = s->s_fs_info;
52 struct dentry *thread_self;
53
54 mutex_lock(&root_inode->i_mutex);
55 thread_self = d_alloc_name(s->s_root, "thread-self");
56 if (thread_self) {
57 struct inode *inode = new_inode_pseudo(s);
58 if (inode) {
59 inode->i_ino = thread_self_inum;
60 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
61 inode->i_mode = S_IFLNK | S_IRWXUGO;
62 inode->i_uid = GLOBAL_ROOT_UID;
63 inode->i_gid = GLOBAL_ROOT_GID;
64 inode->i_op = &proc_thread_self_inode_operations;
65 d_add(thread_self, inode);
66 } else {
67 dput(thread_self);
68 thread_self = ERR_PTR(-ENOMEM);
69 }
70 } else {
71 thread_self = ERR_PTR(-ENOMEM);
72 }
73 mutex_unlock(&root_inode->i_mutex);
74 if (IS_ERR(thread_self)) {
75 pr_err("proc_fill_super: can't allocate /proc/thread_self\n");
76 return PTR_ERR(thread_self);
77 }
78 ns->proc_thread_self = thread_self;
79 return 0;
80}
81
82void __init proc_thread_self_init(void)
83{
84 proc_alloc_inum(&thread_self_inum);
85}
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 1a81373947f3..73ca1740d839 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -232,17 +232,15 @@ static int mounts_open_common(struct inode *inode, struct file *file,
232 if (!task) 232 if (!task)
233 goto err; 233 goto err;
234 234
235 rcu_read_lock(); 235 task_lock(task);
236 nsp = task_nsproxy(task); 236 nsp = task->nsproxy;
237 if (!nsp || !nsp->mnt_ns) { 237 if (!nsp || !nsp->mnt_ns) {
238 rcu_read_unlock(); 238 task_unlock(task);
239 put_task_struct(task); 239 put_task_struct(task);
240 goto err; 240 goto err;
241 } 241 }
242 ns = nsp->mnt_ns; 242 ns = nsp->mnt_ns;
243 get_mnt_ns(ns); 243 get_mnt_ns(ns);
244 rcu_read_unlock();
245 task_lock(task);
246 if (!task->fs) { 244 if (!task->fs) {
247 task_unlock(task); 245 task_unlock(task);
248 put_task_struct(task); 246 put_task_struct(task);