aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/lockd/clntproc.c2
-rw-r--r--fs/lockd/svc4proc.c2
-rw-r--r--fs/lockd/svcproc.c2
-rw-r--r--fs/nfs/direct.c6
-rw-r--r--fs/nfs/file.c28
-rw-r--r--fs/nfs/inode.c2
-rw-r--r--fs/nfs/internal.h18
-rw-r--r--fs/nfs/nfs3proc.c52
-rw-r--r--fs/nfs/nfs4proc.c99
-rw-r--r--fs/nfs/pagelist.c67
-rw-r--r--fs/nfs/proc.c31
-rw-r--r--fs/nfs/read.c179
-rw-r--r--fs/nfs/symlink.c2
-rw-r--r--fs/nfs/write.c598
-rw-r--r--include/linux/nfs_fs.h37
-rw-r--r--include/linux/nfs_page.h7
-rw-r--r--include/linux/nfs_xdr.h2
-rw-r--r--include/linux/sunrpc/auth_gss.h2
-rw-r--r--include/linux/sunrpc/clnt.h1
-rw-r--r--include/linux/sunrpc/debug.h6
-rw-r--r--include/linux/sunrpc/gss_krb5.h6
-rw-r--r--include/linux/sunrpc/gss_spkm3.h34
-rw-r--r--include/linux/sunrpc/sched.h11
-rw-r--r--include/linux/sunrpc/xdr.h23
-rw-r--r--include/linux/sunrpc/xprt.h37
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c42
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c101
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c18
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seal.c55
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_unseal.c87
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c153
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_mech.c131
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_seal.c101
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_token.c6
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_unseal.c92
-rw-r--r--net/sunrpc/clnt.c70
-rw-r--r--net/sunrpc/pmap_clnt.c13
-rw-r--r--net/sunrpc/sched.c137
-rw-r--r--net/sunrpc/socklib.c18
-rw-r--r--net/sunrpc/sunrpc_syms.c5
-rw-r--r--net/sunrpc/sysctl.c50
-rw-r--r--net/sunrpc/xdr.c255
-rw-r--r--net/sunrpc/xprt.c33
-rw-r--r--net/sunrpc/xprtsock.c716
44 files changed, 1557 insertions, 1780 deletions
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 3d84f600b633..41c983a05294 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -729,7 +729,7 @@ static void nlmclnt_cancel_callback(struct rpc_task *task, void *data)
729 goto retry_cancel; 729 goto retry_cancel;
730 } 730 }
731 731
732 dprintk("lockd: cancel status %d (task %d)\n", 732 dprintk("lockd: cancel status %u (task %u)\n",
733 req->a_res.status, task->tk_pid); 733 req->a_res.status, task->tk_pid);
734 734
735 switch (req->a_res.status) { 735 switch (req->a_res.status) {
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 0ce5c81ff507..f67146a8199a 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -234,7 +234,7 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
234 */ 234 */
235static void nlm4svc_callback_exit(struct rpc_task *task, void *data) 235static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
236{ 236{
237 dprintk("lockd: %4d callback returned %d\n", task->tk_pid, 237 dprintk("lockd: %5u callback returned %d\n", task->tk_pid,
238 -task->tk_status); 238 -task->tk_status);
239} 239}
240 240
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 32e99a6e8dca..3707c3a23e93 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -263,7 +263,7 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
263 */ 263 */
264static void nlmsvc_callback_exit(struct rpc_task *task, void *data) 264static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
265{ 265{
266 dprintk("lockd: %4d callback returned %d\n", task->tk_pid, 266 dprintk("lockd: %5u callback returned %d\n", task->tk_pid,
267 -task->tk_status); 267 -task->tk_status);
268} 268}
269 269
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index bdfabf854a51..784bbb54e6c1 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -307,9 +307,7 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo
307 307
308 data->task.tk_cookie = (unsigned long) inode; 308 data->task.tk_cookie = (unsigned long) inode;
309 309
310 lock_kernel();
311 rpc_execute(&data->task); 310 rpc_execute(&data->task);
312 unlock_kernel();
313 311
314 dfprintk(VFS, "NFS: %5u initiated direct read call (req %s/%Ld, %zu bytes @ offset %Lu)\n", 312 dfprintk(VFS, "NFS: %5u initiated direct read call (req %s/%Ld, %zu bytes @ offset %Lu)\n",
315 data->task.tk_pid, 313 data->task.tk_pid,
@@ -475,9 +473,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
475 473
476 dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); 474 dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
477 475
478 lock_kernel();
479 rpc_execute(&data->task); 476 rpc_execute(&data->task);
480 unlock_kernel();
481} 477}
482 478
483static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) 479static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
@@ -641,9 +637,7 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l
641 data->task.tk_priority = RPC_PRIORITY_NORMAL; 637 data->task.tk_priority = RPC_PRIORITY_NORMAL;
642 data->task.tk_cookie = (unsigned long) inode; 638 data->task.tk_cookie = (unsigned long) inode;
643 639
644 lock_kernel();
645 rpc_execute(&data->task); 640 rpc_execute(&data->task);
646 unlock_kernel();
647 641
648 dfprintk(VFS, "NFS: %5u initiated direct write call (req %s/%Ld, %zu bytes @ offset %Lu)\n", 642 dfprintk(VFS, "NFS: %5u initiated direct write call (req %s/%Ld, %zu bytes @ offset %Lu)\n",
649 data->task.tk_pid, 643 data->task.tk_pid,
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index cc93865cea93..8e28bffc35a0 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -307,28 +307,28 @@ static int nfs_commit_write(struct file *file, struct page *page, unsigned offse
307 307
308static void nfs_invalidate_page(struct page *page, unsigned long offset) 308static void nfs_invalidate_page(struct page *page, unsigned long offset)
309{ 309{
310 struct inode *inode = page->mapping->host; 310 if (offset != 0)
311 311 return;
312 /* Cancel any unstarted writes on this page */ 312 /* Cancel any unstarted writes on this page */
313 if (offset == 0) 313 nfs_wb_page_priority(page->mapping->host, page, FLUSH_INVALIDATE);
314 nfs_sync_inode_wait(inode, page->index, 1, FLUSH_INVALIDATE);
315} 314}
316 315
317static int nfs_release_page(struct page *page, gfp_t gfp) 316static int nfs_release_page(struct page *page, gfp_t gfp)
318{ 317{
319 if (gfp & __GFP_FS) 318 /*
320 return !nfs_wb_page(page->mapping->host, page); 319 * Avoid deadlock on nfs_wait_on_request().
321 else 320 */
322 /* 321 if (!(gfp & __GFP_FS))
323 * Avoid deadlock on nfs_wait_on_request().
324 */
325 return 0; 322 return 0;
323 /* Hack... Force nfs_wb_page() to write out the page */
324 SetPageDirty(page);
325 return !nfs_wb_page(page->mapping->host, page);
326} 326}
327 327
328const struct address_space_operations nfs_file_aops = { 328const struct address_space_operations nfs_file_aops = {
329 .readpage = nfs_readpage, 329 .readpage = nfs_readpage,
330 .readpages = nfs_readpages, 330 .readpages = nfs_readpages,
331 .set_page_dirty = __set_page_dirty_nobuffers, 331 .set_page_dirty = nfs_set_page_dirty,
332 .writepage = nfs_writepage, 332 .writepage = nfs_writepage,
333 .writepages = nfs_writepages, 333 .writepages = nfs_writepages,
334 .prepare_write = nfs_prepare_write, 334 .prepare_write = nfs_prepare_write,
@@ -375,6 +375,12 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
375 375
376 nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count); 376 nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
377 result = generic_file_aio_write(iocb, iov, nr_segs, pos); 377 result = generic_file_aio_write(iocb, iov, nr_segs, pos);
378 /* Return error values for O_SYNC and IS_SYNC() */
379 if (result >= 0 && (IS_SYNC(inode) || (iocb->ki_filp->f_flags & O_SYNC))) {
380 int err = nfs_fsync(iocb->ki_filp, dentry, 1);
381 if (err < 0)
382 result = err;
383 }
378out: 384out:
379 return result; 385 return result;
380 386
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 08cc4c5919ab..7c32187f953e 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -422,7 +422,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
422 int err; 422 int err;
423 423
424 /* Flush out writes to the server in order to update c/mtime */ 424 /* Flush out writes to the server in order to update c/mtime */
425 nfs_sync_inode_wait(inode, 0, 0, FLUSH_NOCOMMIT); 425 nfs_sync_mapping_range(inode->i_mapping, 0, 0, FLUSH_NOCOMMIT);
426 426
427 /* 427 /*
428 * We may force a getattr if the user cares about atime. 428 * We may force a getattr if the user cares about atime.
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index d205466233f6..a28f6ce2e131 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -217,3 +217,21 @@ void nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize)
217 if (sb->s_maxbytes > MAX_LFS_FILESIZE || sb->s_maxbytes <= 0) 217 if (sb->s_maxbytes > MAX_LFS_FILESIZE || sb->s_maxbytes <= 0)
218 sb->s_maxbytes = MAX_LFS_FILESIZE; 218 sb->s_maxbytes = MAX_LFS_FILESIZE;
219} 219}
220
221/*
222 * Determine the number of bytes of data the page contains
223 */
224static inline
225unsigned int nfs_page_length(struct page *page)
226{
227 loff_t i_size = i_size_read(page->mapping->host);
228
229 if (i_size > 0) {
230 pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
231 if (page->index < end_index)
232 return PAGE_CACHE_SIZE;
233 if (page->index == end_index)
234 return ((i_size - 1) & ~PAGE_CACHE_MASK) + 1;
235 }
236 return 0;
237}
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index e5f128ffc32d..510ae524f3fd 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -276,51 +276,6 @@ static int nfs3_proc_read(struct nfs_read_data *rdata)
276 return status; 276 return status;
277} 277}
278 278
279static int nfs3_proc_write(struct nfs_write_data *wdata)
280{
281 int rpcflags = wdata->flags;
282 struct inode * inode = wdata->inode;
283 struct nfs_fattr * fattr = wdata->res.fattr;
284 struct rpc_message msg = {
285 .rpc_proc = &nfs3_procedures[NFS3PROC_WRITE],
286 .rpc_argp = &wdata->args,
287 .rpc_resp = &wdata->res,
288 .rpc_cred = wdata->cred,
289 };
290 int status;
291
292 dprintk("NFS call write %d @ %Ld\n", wdata->args.count,
293 (long long) wdata->args.offset);
294 nfs_fattr_init(fattr);
295 status = rpc_call_sync(NFS_CLIENT(inode), &msg, rpcflags);
296 if (status >= 0)
297 nfs_post_op_update_inode(inode, fattr);
298 dprintk("NFS reply write: %d\n", status);
299 return status < 0? status : wdata->res.count;
300}
301
302static int nfs3_proc_commit(struct nfs_write_data *cdata)
303{
304 struct inode * inode = cdata->inode;
305 struct nfs_fattr * fattr = cdata->res.fattr;
306 struct rpc_message msg = {
307 .rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT],
308 .rpc_argp = &cdata->args,
309 .rpc_resp = &cdata->res,
310 .rpc_cred = cdata->cred,
311 };
312 int status;
313
314 dprintk("NFS call commit %d @ %Ld\n", cdata->args.count,
315 (long long) cdata->args.offset);
316 nfs_fattr_init(fattr);
317 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
318 if (status >= 0)
319 nfs_post_op_update_inode(inode, fattr);
320 dprintk("NFS reply commit: %d\n", status);
321 return status;
322}
323
324/* 279/*
325 * Create a regular file. 280 * Create a regular file.
326 * For now, we don't implement O_EXCL. 281 * For now, we don't implement O_EXCL.
@@ -369,7 +324,7 @@ again:
369 324
370 /* If the server doesn't support the exclusive creation semantics, 325 /* If the server doesn't support the exclusive creation semantics,
371 * try again with simple 'guarded' mode. */ 326 * try again with simple 'guarded' mode. */
372 if (status == NFSERR_NOTSUPP) { 327 if (status == -ENOTSUPP) {
373 switch (arg.createmode) { 328 switch (arg.createmode) {
374 case NFS3_CREATE_EXCLUSIVE: 329 case NFS3_CREATE_EXCLUSIVE:
375 arg.createmode = NFS3_CREATE_GUARDED; 330 arg.createmode = NFS3_CREATE_GUARDED;
@@ -690,8 +645,6 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
690 }; 645 };
691 int status; 646 int status;
692 647
693 lock_kernel();
694
695 if (plus) 648 if (plus)
696 msg.rpc_proc = &nfs3_procedures[NFS3PROC_READDIRPLUS]; 649 msg.rpc_proc = &nfs3_procedures[NFS3PROC_READDIRPLUS];
697 650
@@ -702,7 +655,6 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
702 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 655 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
703 nfs_refresh_inode(dir, &dir_attr); 656 nfs_refresh_inode(dir, &dir_attr);
704 dprintk("NFS reply readdir: %d\n", status); 657 dprintk("NFS reply readdir: %d\n", status);
705 unlock_kernel();
706 return status; 658 return status;
707} 659}
708 660
@@ -904,8 +856,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
904 .access = nfs3_proc_access, 856 .access = nfs3_proc_access,
905 .readlink = nfs3_proc_readlink, 857 .readlink = nfs3_proc_readlink,
906 .read = nfs3_proc_read, 858 .read = nfs3_proc_read,
907 .write = nfs3_proc_write,
908 .commit = nfs3_proc_commit,
909 .create = nfs3_proc_create, 859 .create = nfs3_proc_create,
910 .remove = nfs3_proc_remove, 860 .remove = nfs3_proc_remove,
911 .unlink_setup = nfs3_proc_unlink_setup, 861 .unlink_setup = nfs3_proc_unlink_setup,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 8118036cc449..ee458aeab24a 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -636,7 +636,7 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
636 smp_wmb(); 636 smp_wmb();
637 } else 637 } else
638 status = data->rpc_status; 638 status = data->rpc_status;
639 rpc_release_task(task); 639 rpc_put_task(task);
640 return status; 640 return status;
641} 641}
642 642
@@ -742,7 +742,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
742 smp_wmb(); 742 smp_wmb();
743 } else 743 } else
744 status = data->rpc_status; 744 status = data->rpc_status;
745 rpc_release_task(task); 745 rpc_put_task(task);
746 if (status != 0) 746 if (status != 0)
747 return status; 747 return status;
748 748
@@ -1775,89 +1775,6 @@ static int nfs4_proc_read(struct nfs_read_data *rdata)
1775 return err; 1775 return err;
1776} 1776}
1777 1777
1778static int _nfs4_proc_write(struct nfs_write_data *wdata)
1779{
1780 int rpcflags = wdata->flags;
1781 struct inode *inode = wdata->inode;
1782 struct nfs_fattr *fattr = wdata->res.fattr;
1783 struct nfs_server *server = NFS_SERVER(inode);
1784 struct rpc_message msg = {
1785 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE],
1786 .rpc_argp = &wdata->args,
1787 .rpc_resp = &wdata->res,
1788 .rpc_cred = wdata->cred,
1789 };
1790 int status;
1791
1792 dprintk("NFS call write %d @ %Ld\n", wdata->args.count,
1793 (long long) wdata->args.offset);
1794
1795 wdata->args.bitmask = server->attr_bitmask;
1796 wdata->res.server = server;
1797 wdata->timestamp = jiffies;
1798 nfs_fattr_init(fattr);
1799 status = rpc_call_sync(server->client, &msg, rpcflags);
1800 dprintk("NFS reply write: %d\n", status);
1801 if (status < 0)
1802 return status;
1803 renew_lease(server, wdata->timestamp);
1804 nfs_post_op_update_inode(inode, fattr);
1805 return wdata->res.count;
1806}
1807
1808static int nfs4_proc_write(struct nfs_write_data *wdata)
1809{
1810 struct nfs4_exception exception = { };
1811 int err;
1812 do {
1813 err = nfs4_handle_exception(NFS_SERVER(wdata->inode),
1814 _nfs4_proc_write(wdata),
1815 &exception);
1816 } while (exception.retry);
1817 return err;
1818}
1819
1820static int _nfs4_proc_commit(struct nfs_write_data *cdata)
1821{
1822 struct inode *inode = cdata->inode;
1823 struct nfs_fattr *fattr = cdata->res.fattr;
1824 struct nfs_server *server = NFS_SERVER(inode);
1825 struct rpc_message msg = {
1826 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT],
1827 .rpc_argp = &cdata->args,
1828 .rpc_resp = &cdata->res,
1829 .rpc_cred = cdata->cred,
1830 };
1831 int status;
1832
1833 dprintk("NFS call commit %d @ %Ld\n", cdata->args.count,
1834 (long long) cdata->args.offset);
1835
1836 cdata->args.bitmask = server->attr_bitmask;
1837 cdata->res.server = server;
1838 cdata->timestamp = jiffies;
1839 nfs_fattr_init(fattr);
1840 status = rpc_call_sync(server->client, &msg, 0);
1841 if (status >= 0)
1842 renew_lease(server, cdata->timestamp);
1843 dprintk("NFS reply commit: %d\n", status);
1844 if (status >= 0)
1845 nfs_post_op_update_inode(inode, fattr);
1846 return status;
1847}
1848
1849static int nfs4_proc_commit(struct nfs_write_data *cdata)
1850{
1851 struct nfs4_exception exception = { };
1852 int err;
1853 do {
1854 err = nfs4_handle_exception(NFS_SERVER(cdata->inode),
1855 _nfs4_proc_commit(cdata),
1856 &exception);
1857 } while (exception.retry);
1858 return err;
1859}
1860
1861/* 1778/*
1862 * Got race? 1779 * Got race?
1863 * We will need to arrange for the VFS layer to provide an atomic open. 1780 * We will need to arrange for the VFS layer to provide an atomic open.
@@ -2223,13 +2140,11 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
2223 dentry->d_parent->d_name.name, 2140 dentry->d_parent->d_name.name,
2224 dentry->d_name.name, 2141 dentry->d_name.name,
2225 (unsigned long long)cookie); 2142 (unsigned long long)cookie);
2226 lock_kernel();
2227 nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); 2143 nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args);
2228 res.pgbase = args.pgbase; 2144 res.pgbase = args.pgbase;
2229 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 2145 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
2230 if (status == 0) 2146 if (status == 0)
2231 memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); 2147 memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE);
2232 unlock_kernel();
2233 dprintk("%s: returns %d\n", __FUNCTION__, status); 2148 dprintk("%s: returns %d\n", __FUNCTION__, status);
2234 return status; 2149 return status;
2235} 2150}
@@ -3067,7 +2982,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
3067 if (status == 0) 2982 if (status == 0)
3068 nfs_post_op_update_inode(inode, &data->fattr); 2983 nfs_post_op_update_inode(inode, &data->fattr);
3069 } 2984 }
3070 rpc_release_task(task); 2985 rpc_put_task(task);
3071 return status; 2986 return status;
3072} 2987}
3073 2988
@@ -3314,7 +3229,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
3314 if (IS_ERR(task)) 3229 if (IS_ERR(task))
3315 goto out; 3230 goto out;
3316 status = nfs4_wait_for_completion_rpc_task(task); 3231 status = nfs4_wait_for_completion_rpc_task(task);
3317 rpc_release_task(task); 3232 rpc_put_task(task);
3318out: 3233out:
3319 return status; 3234 return status;
3320} 3235}
@@ -3430,7 +3345,7 @@ static void nfs4_lock_release(void *calldata)
3430 task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp, 3345 task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
3431 data->arg.lock_seqid); 3346 data->arg.lock_seqid);
3432 if (!IS_ERR(task)) 3347 if (!IS_ERR(task))
3433 rpc_release_task(task); 3348 rpc_put_task(task);
3434 dprintk("%s: cancelling lock!\n", __FUNCTION__); 3349 dprintk("%s: cancelling lock!\n", __FUNCTION__);
3435 } else 3350 } else
3436 nfs_free_seqid(data->arg.lock_seqid); 3351 nfs_free_seqid(data->arg.lock_seqid);
@@ -3472,7 +3387,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
3472 ret = -EAGAIN; 3387 ret = -EAGAIN;
3473 } else 3388 } else
3474 data->cancelled = 1; 3389 data->cancelled = 1;
3475 rpc_release_task(task); 3390 rpc_put_task(task);
3476 dprintk("%s: done, ret = %d!\n", __FUNCTION__, ret); 3391 dprintk("%s: done, ret = %d!\n", __FUNCTION__, ret);
3477 return ret; 3392 return ret;
3478} 3393}
@@ -3732,8 +3647,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
3732 .access = nfs4_proc_access, 3647 .access = nfs4_proc_access,
3733 .readlink = nfs4_proc_readlink, 3648 .readlink = nfs4_proc_readlink,
3734 .read = nfs4_proc_read, 3649 .read = nfs4_proc_read,
3735 .write = nfs4_proc_write,
3736 .commit = nfs4_proc_commit,
3737 .create = nfs4_proc_create, 3650 .create = nfs4_proc_create,
3738 .remove = nfs4_proc_remove, 3651 .remove = nfs4_proc_remove,
3739 .unlink_setup = nfs4_proc_unlink_setup, 3652 .unlink_setup = nfs4_proc_unlink_setup,
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 829af323f288..bc9fab68b29c 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -17,6 +17,7 @@
17#include <linux/nfs_page.h> 17#include <linux/nfs_page.h>
18#include <linux/nfs_fs.h> 18#include <linux/nfs_fs.h>
19#include <linux/nfs_mount.h> 19#include <linux/nfs_mount.h>
20#include <linux/writeback.h>
20 21
21#define NFS_PARANOIA 1 22#define NFS_PARANOIA 1
22 23
@@ -268,11 +269,10 @@ nfs_coalesce_requests(struct list_head *head, struct list_head *dst,
268 269
269#define NFS_SCAN_MAXENTRIES 16 270#define NFS_SCAN_MAXENTRIES 16
270/** 271/**
271 * nfs_scan_lock_dirty - Scan the radix tree for dirty requests 272 * nfs_scan_dirty - Scan the radix tree for dirty requests
272 * @nfsi: NFS inode 273 * @mapping: pointer to address space
274 * @wbc: writeback_control structure
273 * @dst: Destination list 275 * @dst: Destination list
274 * @idx_start: lower bound of page->index to scan
275 * @npages: idx_start + npages sets the upper bound to scan.
276 * 276 *
277 * Moves elements from one of the inode request lists. 277 * Moves elements from one of the inode request lists.
278 * If the number of requests is set to 0, the entire address_space 278 * If the number of requests is set to 0, the entire address_space
@@ -280,46 +280,63 @@ nfs_coalesce_requests(struct list_head *head, struct list_head *dst,
280 * The requests are *not* checked to ensure that they form a contiguous set. 280 * The requests are *not* checked to ensure that they form a contiguous set.
281 * You must be holding the inode's req_lock when calling this function 281 * You must be holding the inode's req_lock when calling this function
282 */ 282 */
283int 283long nfs_scan_dirty(struct address_space *mapping,
284nfs_scan_lock_dirty(struct nfs_inode *nfsi, struct list_head *dst, 284 struct writeback_control *wbc,
285 unsigned long idx_start, unsigned int npages) 285 struct list_head *dst)
286{ 286{
287 struct nfs_inode *nfsi = NFS_I(mapping->host);
287 struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; 288 struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
288 struct nfs_page *req; 289 struct nfs_page *req;
289 unsigned long idx_end; 290 pgoff_t idx_start, idx_end;
291 long res = 0;
290 int found, i; 292 int found, i;
291 int res;
292 293
293 res = 0; 294 if (nfsi->ndirty == 0)
294 if (npages == 0) 295 return 0;
295 idx_end = ~0; 296 if (wbc->range_cyclic) {
296 else 297 idx_start = 0;
297 idx_end = idx_start + npages - 1; 298 idx_end = ULONG_MAX;
299 } else if (wbc->range_end == 0) {
300 idx_start = wbc->range_start >> PAGE_CACHE_SHIFT;
301 idx_end = ULONG_MAX;
302 } else {
303 idx_start = wbc->range_start >> PAGE_CACHE_SHIFT;
304 idx_end = wbc->range_end >> PAGE_CACHE_SHIFT;
305 }
298 306
299 for (;;) { 307 for (;;) {
308 unsigned int toscan = NFS_SCAN_MAXENTRIES;
309
300 found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, 310 found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree,
301 (void **)&pgvec[0], idx_start, NFS_SCAN_MAXENTRIES, 311 (void **)&pgvec[0], idx_start, toscan,
302 NFS_PAGE_TAG_DIRTY); 312 NFS_PAGE_TAG_DIRTY);
313
314 /* Did we make progress? */
303 if (found <= 0) 315 if (found <= 0)
304 break; 316 break;
317
305 for (i = 0; i < found; i++) { 318 for (i = 0; i < found; i++) {
306 req = pgvec[i]; 319 req = pgvec[i];
307 if (req->wb_index > idx_end) 320 if (!wbc->range_cyclic && req->wb_index > idx_end)
308 goto out; 321 goto out;
309 322
323 /* Try to lock request and mark it for writeback */
324 if (!nfs_set_page_writeback_locked(req))
325 goto next;
326 radix_tree_tag_clear(&nfsi->nfs_page_tree,
327 req->wb_index, NFS_PAGE_TAG_DIRTY);
328 nfsi->ndirty--;
329 nfs_list_remove_request(req);
330 nfs_list_add_request(req, dst);
331 res++;
332 if (res == LONG_MAX)
333 goto out;
334next:
310 idx_start = req->wb_index + 1; 335 idx_start = req->wb_index + 1;
311
312 if (nfs_set_page_writeback_locked(req)) {
313 radix_tree_tag_clear(&nfsi->nfs_page_tree,
314 req->wb_index, NFS_PAGE_TAG_DIRTY);
315 nfs_list_remove_request(req);
316 nfs_list_add_request(req, dst);
317 dec_zone_page_state(req->wb_page, NR_FILE_DIRTY);
318 res++;
319 }
320 } 336 }
321 } 337 }
322out: 338out:
339 WARN_ON ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty));
323 return res; 340 return res;
324} 341}
325 342
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 4529cc4f3f8f..10f5e80ca157 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -215,32 +215,6 @@ static int nfs_proc_read(struct nfs_read_data *rdata)
215 return status; 215 return status;
216} 216}
217 217
218static int nfs_proc_write(struct nfs_write_data *wdata)
219{
220 int flags = wdata->flags;
221 struct inode * inode = wdata->inode;
222 struct nfs_fattr * fattr = wdata->res.fattr;
223 struct rpc_message msg = {
224 .rpc_proc = &nfs_procedures[NFSPROC_WRITE],
225 .rpc_argp = &wdata->args,
226 .rpc_resp = &wdata->res,
227 .rpc_cred = wdata->cred,
228 };
229 int status;
230
231 dprintk("NFS call write %d @ %Ld\n", wdata->args.count,
232 (long long) wdata->args.offset);
233 nfs_fattr_init(fattr);
234 status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags);
235 if (status >= 0) {
236 nfs_post_op_update_inode(inode, fattr);
237 wdata->res.count = wdata->args.count;
238 wdata->verf.committed = NFS_FILE_SYNC;
239 }
240 dprintk("NFS reply write: %d\n", status);
241 return status < 0? status : wdata->res.count;
242}
243
244static int 218static int
245nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, 219nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
246 int flags, struct nameidata *nd) 220 int flags, struct nameidata *nd)
@@ -545,13 +519,10 @@ nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
545 }; 519 };
546 int status; 520 int status;
547 521
548 lock_kernel();
549
550 dprintk("NFS call readdir %d\n", (unsigned int)cookie); 522 dprintk("NFS call readdir %d\n", (unsigned int)cookie);
551 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 523 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
552 524
553 dprintk("NFS reply readdir: %d\n", status); 525 dprintk("NFS reply readdir: %d\n", status);
554 unlock_kernel();
555 return status; 526 return status;
556} 527}
557 528
@@ -696,8 +667,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
696 .access = NULL, /* access */ 667 .access = NULL, /* access */
697 .readlink = nfs_proc_readlink, 668 .readlink = nfs_proc_readlink,
698 .read = nfs_proc_read, 669 .read = nfs_proc_read,
699 .write = nfs_proc_write,
700 .commit = NULL, /* commit */
701 .create = nfs_proc_create, 670 .create = nfs_proc_create,
702 .remove = nfs_proc_remove, 671 .remove = nfs_proc_remove,
703 .unlink_setup = nfs_proc_unlink_setup, 672 .unlink_setup = nfs_proc_unlink_setup,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index c2e49c397a27..05cca6609977 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -30,6 +30,7 @@
30 30
31#include <asm/system.h> 31#include <asm/system.h>
32 32
33#include "internal.h"
33#include "iostat.h" 34#include "iostat.h"
34 35
35#define NFSDBG_FACILITY NFSDBG_PAGECACHE 36#define NFSDBG_FACILITY NFSDBG_PAGECACHE
@@ -65,32 +66,22 @@ struct nfs_read_data *nfs_readdata_alloc(size_t len)
65 return p; 66 return p;
66} 67}
67 68
68static void nfs_readdata_free(struct nfs_read_data *p) 69static void nfs_readdata_rcu_free(struct rcu_head *head)
69{ 70{
71 struct nfs_read_data *p = container_of(head, struct nfs_read_data, task.u.tk_rcu);
70 if (p && (p->pagevec != &p->page_array[0])) 72 if (p && (p->pagevec != &p->page_array[0]))
71 kfree(p->pagevec); 73 kfree(p->pagevec);
72 mempool_free(p, nfs_rdata_mempool); 74 mempool_free(p, nfs_rdata_mempool);
73} 75}
74 76
75void nfs_readdata_release(void *data) 77static void nfs_readdata_free(struct nfs_read_data *rdata)
76{ 78{
77 nfs_readdata_free(data); 79 call_rcu_bh(&rdata->task.u.tk_rcu, nfs_readdata_rcu_free);
78} 80}
79 81
80static 82void nfs_readdata_release(void *data)
81unsigned int nfs_page_length(struct inode *inode, struct page *page)
82{ 83{
83 loff_t i_size = i_size_read(inode); 84 nfs_readdata_free(data);
84 unsigned long idx;
85
86 if (i_size <= 0)
87 return 0;
88 idx = (i_size - 1) >> PAGE_CACHE_SHIFT;
89 if (page->index > idx)
90 return 0;
91 if (page->index != idx)
92 return PAGE_CACHE_SIZE;
93 return 1 + ((i_size - 1) & (PAGE_CACHE_SIZE - 1));
94} 85}
95 86
96static 87static
@@ -139,12 +130,12 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
139{ 130{
140 unsigned int rsize = NFS_SERVER(inode)->rsize; 131 unsigned int rsize = NFS_SERVER(inode)->rsize;
141 unsigned int count = PAGE_CACHE_SIZE; 132 unsigned int count = PAGE_CACHE_SIZE;
142 int result; 133 int result = -ENOMEM;
143 struct nfs_read_data *rdata; 134 struct nfs_read_data *rdata;
144 135
145 rdata = nfs_readdata_alloc(count); 136 rdata = nfs_readdata_alloc(count);
146 if (!rdata) 137 if (!rdata)
147 return -ENOMEM; 138 goto out_unlock;
148 139
149 memset(rdata, 0, sizeof(*rdata)); 140 memset(rdata, 0, sizeof(*rdata));
150 rdata->flags = (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); 141 rdata->flags = (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
@@ -212,8 +203,9 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
212 result = 0; 203 result = 0;
213 204
214io_error: 205io_error:
215 unlock_page(page);
216 nfs_readdata_free(rdata); 206 nfs_readdata_free(rdata);
207out_unlock:
208 unlock_page(page);
217 return result; 209 return result;
218} 210}
219 211
@@ -224,7 +216,7 @@ static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
224 struct nfs_page *new; 216 struct nfs_page *new;
225 unsigned int len; 217 unsigned int len;
226 218
227 len = nfs_page_length(inode, page); 219 len = nfs_page_length(page);
228 if (len == 0) 220 if (len == 0)
229 return nfs_return_empty_page(page); 221 return nfs_return_empty_page(page);
230 new = nfs_create_request(ctx, inode, page, 0, len); 222 new = nfs_create_request(ctx, inode, page, 0, len);
@@ -316,9 +308,7 @@ static void nfs_execute_read(struct nfs_read_data *data)
316 sigset_t oldset; 308 sigset_t oldset;
317 309
318 rpc_clnt_sigmask(clnt, &oldset); 310 rpc_clnt_sigmask(clnt, &oldset);
319 lock_kernel();
320 rpc_execute(&data->task); 311 rpc_execute(&data->task);
321 unlock_kernel();
322 rpc_clnt_sigunmask(clnt, &oldset); 312 rpc_clnt_sigunmask(clnt, &oldset);
323} 313}
324 314
@@ -455,6 +445,55 @@ nfs_pagein_list(struct list_head *head, int rpages)
455} 445}
456 446
457/* 447/*
448 * This is the callback from RPC telling us whether a reply was
449 * received or some error occurred (timeout or socket shutdown).
450 */
451int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
452{
453 int status;
454
455 dprintk("%s: %4d, (status %d)\n", __FUNCTION__, task->tk_pid,
456 task->tk_status);
457
458 status = NFS_PROTO(data->inode)->read_done(task, data);
459 if (status != 0)
460 return status;
461
462 nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count);
463
464 if (task->tk_status == -ESTALE) {
465 set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode));
466 nfs_mark_for_revalidate(data->inode);
467 }
468 spin_lock(&data->inode->i_lock);
469 NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
470 spin_unlock(&data->inode->i_lock);
471 return 0;
472}
473
474static int nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data)
475{
476 struct nfs_readargs *argp = &data->args;
477 struct nfs_readres *resp = &data->res;
478
479 if (resp->eof || resp->count == argp->count)
480 return 0;
481
482 /* This is a short read! */
483 nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
484 /* Has the server at least made some progress? */
485 if (resp->count == 0)
486 return 0;
487
488 /* Yes, so retry the read at the end of the data */
489 argp->offset += resp->count;
490 argp->pgbase += resp->count;
491 argp->count -= resp->count;
492 rpc_restart_call(task);
493 return -EAGAIN;
494}
495
496/*
458 * Handle a read reply that fills part of a page. 497 * Handle a read reply that fills part of a page.
459 */ 498 */
460static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata) 499static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
@@ -463,12 +502,16 @@ static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
463 struct nfs_page *req = data->req; 502 struct nfs_page *req = data->req;
464 struct page *page = req->wb_page; 503 struct page *page = req->wb_page;
465 504
466 if (likely(task->tk_status >= 0))
467 nfs_readpage_truncate_uninitialised_page(data);
468 else
469 SetPageError(page);
470 if (nfs_readpage_result(task, data) != 0) 505 if (nfs_readpage_result(task, data) != 0)
471 return; 506 return;
507
508 if (likely(task->tk_status >= 0)) {
509 nfs_readpage_truncate_uninitialised_page(data);
510 if (nfs_readpage_retry(task, data) != 0)
511 return;
512 }
513 if (unlikely(task->tk_status < 0))
514 SetPageError(page);
472 if (atomic_dec_and_test(&req->wb_complete)) { 515 if (atomic_dec_and_test(&req->wb_complete)) {
473 if (!PageError(page)) 516 if (!PageError(page))
474 SetPageUptodate(page); 517 SetPageUptodate(page);
@@ -496,25 +539,13 @@ static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
496 count += base; 539 count += base;
497 for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) 540 for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
498 SetPageUptodate(*pages); 541 SetPageUptodate(*pages);
499 if (count != 0) 542 if (count == 0)
543 return;
544 /* Was this a short read? */
545 if (data->res.eof || data->res.count == data->args.count)
500 SetPageUptodate(*pages); 546 SetPageUptodate(*pages);
501} 547}
502 548
503static void nfs_readpage_set_pages_error(struct nfs_read_data *data)
504{
505 unsigned int count = data->args.count;
506 unsigned int base = data->args.pgbase;
507 struct page **pages;
508
509 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
510 base &= ~PAGE_CACHE_MASK;
511 count += base;
512 for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
513 SetPageError(*pages);
514 if (count != 0)
515 SetPageError(*pages);
516}
517
518/* 549/*
519 * This is the callback from RPC telling us whether a reply was 550 * This is the callback from RPC telling us whether a reply was
520 * received or some error occurred (timeout or socket shutdown). 551 * received or some error occurred (timeout or socket shutdown).
@@ -523,19 +554,20 @@ static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
523{ 554{
524 struct nfs_read_data *data = calldata; 555 struct nfs_read_data *data = calldata;
525 556
557 if (nfs_readpage_result(task, data) != 0)
558 return;
526 /* 559 /*
527 * Note: nfs_readpage_result may change the values of 560 * Note: nfs_readpage_retry may change the values of
528 * data->args. In the multi-page case, we therefore need 561 * data->args. In the multi-page case, we therefore need
529 * to ensure that we call the next nfs_readpage_set_page_uptodate() 562 * to ensure that we call nfs_readpage_set_pages_uptodate()
530 * first in the multi-page case. 563 * first.
531 */ 564 */
532 if (likely(task->tk_status >= 0)) { 565 if (likely(task->tk_status >= 0)) {
533 nfs_readpage_truncate_uninitialised_page(data); 566 nfs_readpage_truncate_uninitialised_page(data);
534 nfs_readpage_set_pages_uptodate(data); 567 nfs_readpage_set_pages_uptodate(data);
535 } else 568 if (nfs_readpage_retry(task, data) != 0)
536 nfs_readpage_set_pages_error(data); 569 return;
537 if (nfs_readpage_result(task, data) != 0) 570 }
538 return;
539 while (!list_empty(&data->pages)) { 571 while (!list_empty(&data->pages)) {
540 struct nfs_page *req = nfs_list_entry(data->pages.next); 572 struct nfs_page *req = nfs_list_entry(data->pages.next);
541 573
@@ -550,50 +582,6 @@ static const struct rpc_call_ops nfs_read_full_ops = {
550}; 582};
551 583
552/* 584/*
553 * This is the callback from RPC telling us whether a reply was
554 * received or some error occurred (timeout or socket shutdown).
555 */
556int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
557{
558 struct nfs_readargs *argp = &data->args;
559 struct nfs_readres *resp = &data->res;
560 int status;
561
562 dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
563 task->tk_pid, task->tk_status);
564
565 status = NFS_PROTO(data->inode)->read_done(task, data);
566 if (status != 0)
567 return status;
568
569 nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count);
570
571 if (task->tk_status < 0) {
572 if (task->tk_status == -ESTALE) {
573 set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode));
574 nfs_mark_for_revalidate(data->inode);
575 }
576 } else if (resp->count < argp->count && !resp->eof) {
577 /* This is a short read! */
578 nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
579 /* Has the server at least made some progress? */
580 if (resp->count != 0) {
581 /* Yes, so retry the read at the end of the data */
582 argp->offset += resp->count;
583 argp->pgbase += resp->count;
584 argp->count -= resp->count;
585 rpc_restart_call(task);
586 return -EAGAIN;
587 }
588 task->tk_status = -EIO;
589 }
590 spin_lock(&data->inode->i_lock);
591 NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
592 spin_unlock(&data->inode->i_lock);
593 return 0;
594}
595
596/*
597 * Read a page over NFS. 585 * Read a page over NFS.
598 * We read the page synchronously in the following case: 586 * We read the page synchronously in the following case:
599 * - The error flag is set for this page. This happens only when a 587 * - The error flag is set for this page. This happens only when a
@@ -626,9 +614,10 @@ int nfs_readpage(struct file *file, struct page *page)
626 goto out_error; 614 goto out_error;
627 615
628 if (file == NULL) { 616 if (file == NULL) {
617 error = -EBADF;
629 ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 618 ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
630 if (ctx == NULL) 619 if (ctx == NULL)
631 return -EBADF; 620 goto out_error;
632 } else 621 } else
633 ctx = get_nfs_open_context((struct nfs_open_context *) 622 ctx = get_nfs_open_context((struct nfs_open_context *)
634 file->private_data); 623 file->private_data);
@@ -663,7 +652,7 @@ readpage_async_filler(void *data, struct page *page)
663 unsigned int len; 652 unsigned int len;
664 653
665 nfs_wb_page(inode, page); 654 nfs_wb_page(inode, page);
666 len = nfs_page_length(inode, page); 655 len = nfs_page_length(page);
667 if (len == 0) 656 if (len == 0)
668 return nfs_return_empty_page(page); 657 return nfs_return_empty_page(page);
669 new = nfs_create_request(desc->ctx, inode, page, 0, len); 658 new = nfs_create_request(desc->ctx, inode, page, 0, len);
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 600bbe630abd..6c686112cc03 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -33,9 +33,7 @@ static int nfs_symlink_filler(struct inode *inode, struct page *page)
33{ 33{
34 int error; 34 int error;
35 35
36 lock_kernel();
37 error = NFS_PROTO(inode)->readlink(inode, page, 0, PAGE_SIZE); 36 error = NFS_PROTO(inode)->readlink(inode, page, 0, PAGE_SIZE);
38 unlock_kernel();
39 if (error < 0) 37 if (error < 0)
40 goto error; 38 goto error;
41 SetPageUptodate(page); 39 SetPageUptodate(page);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 883dd4a1c157..7f3844d2bf36 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -63,6 +63,7 @@
63#include <linux/smp_lock.h> 63#include <linux/smp_lock.h>
64 64
65#include "delegation.h" 65#include "delegation.h"
66#include "internal.h"
66#include "iostat.h" 67#include "iostat.h"
67 68
68#define NFSDBG_FACILITY NFSDBG_PAGECACHE 69#define NFSDBG_FACILITY NFSDBG_PAGECACHE
@@ -74,13 +75,12 @@
74 * Local function declarations 75 * Local function declarations
75 */ 76 */
76static struct nfs_page * nfs_update_request(struct nfs_open_context*, 77static struct nfs_page * nfs_update_request(struct nfs_open_context*,
77 struct inode *,
78 struct page *, 78 struct page *,
79 unsigned int, unsigned int); 79 unsigned int, unsigned int);
80static void nfs_mark_request_dirty(struct nfs_page *req);
80static int nfs_wait_on_write_congestion(struct address_space *, int); 81static int nfs_wait_on_write_congestion(struct address_space *, int);
81static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int); 82static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int);
82static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, 83static long nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how);
83 unsigned int npages, int how);
84static const struct rpc_call_ops nfs_write_partial_ops; 84static const struct rpc_call_ops nfs_write_partial_ops;
85static const struct rpc_call_ops nfs_write_full_ops; 85static const struct rpc_call_ops nfs_write_full_ops;
86static const struct rpc_call_ops nfs_commit_ops; 86static const struct rpc_call_ops nfs_commit_ops;
@@ -102,13 +102,19 @@ struct nfs_write_data *nfs_commit_alloc(void)
102 return p; 102 return p;
103} 103}
104 104
105void nfs_commit_free(struct nfs_write_data *p) 105void nfs_commit_rcu_free(struct rcu_head *head)
106{ 106{
107 struct nfs_write_data *p = container_of(head, struct nfs_write_data, task.u.tk_rcu);
107 if (p && (p->pagevec != &p->page_array[0])) 108 if (p && (p->pagevec != &p->page_array[0]))
108 kfree(p->pagevec); 109 kfree(p->pagevec);
109 mempool_free(p, nfs_commit_mempool); 110 mempool_free(p, nfs_commit_mempool);
110} 111}
111 112
113void nfs_commit_free(struct nfs_write_data *wdata)
114{
115 call_rcu_bh(&wdata->task.u.tk_rcu, nfs_commit_rcu_free);
116}
117
112struct nfs_write_data *nfs_writedata_alloc(size_t len) 118struct nfs_write_data *nfs_writedata_alloc(size_t len)
113{ 119{
114 unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; 120 unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -131,18 +137,47 @@ struct nfs_write_data *nfs_writedata_alloc(size_t len)
131 return p; 137 return p;
132} 138}
133 139
134static void nfs_writedata_free(struct nfs_write_data *p) 140static void nfs_writedata_rcu_free(struct rcu_head *head)
135{ 141{
142 struct nfs_write_data *p = container_of(head, struct nfs_write_data, task.u.tk_rcu);
136 if (p && (p->pagevec != &p->page_array[0])) 143 if (p && (p->pagevec != &p->page_array[0]))
137 kfree(p->pagevec); 144 kfree(p->pagevec);
138 mempool_free(p, nfs_wdata_mempool); 145 mempool_free(p, nfs_wdata_mempool);
139} 146}
140 147
148static void nfs_writedata_free(struct nfs_write_data *wdata)
149{
150 call_rcu_bh(&wdata->task.u.tk_rcu, nfs_writedata_rcu_free);
151}
152
141void nfs_writedata_release(void *wdata) 153void nfs_writedata_release(void *wdata)
142{ 154{
143 nfs_writedata_free(wdata); 155 nfs_writedata_free(wdata);
144} 156}
145 157
158static struct nfs_page *nfs_page_find_request_locked(struct page *page)
159{
160 struct nfs_page *req = NULL;
161
162 if (PagePrivate(page)) {
163 req = (struct nfs_page *)page_private(page);
164 if (req != NULL)
165 atomic_inc(&req->wb_count);
166 }
167 return req;
168}
169
170static struct nfs_page *nfs_page_find_request(struct page *page)
171{
172 struct nfs_page *req = NULL;
173 spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock;
174
175 spin_lock(req_lock);
176 req = nfs_page_find_request_locked(page);
177 spin_unlock(req_lock);
178 return req;
179}
180
146/* Adjust the file length if we're writing beyond the end */ 181/* Adjust the file length if we're writing beyond the end */
147static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count) 182static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
148{ 183{
@@ -164,113 +199,34 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c
164 */ 199 */
165static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count) 200static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count)
166{ 201{
167 loff_t end_offs;
168
169 if (PageUptodate(page)) 202 if (PageUptodate(page))
170 return; 203 return;
171 if (base != 0) 204 if (base != 0)
172 return; 205 return;
173 if (count == PAGE_CACHE_SIZE) { 206 if (count != nfs_page_length(page))
174 SetPageUptodate(page);
175 return;
176 }
177
178 end_offs = i_size_read(page->mapping->host) - 1;
179 if (end_offs < 0)
180 return; 207 return;
181 /* Is this the last page? */ 208 if (count != PAGE_CACHE_SIZE)
182 if (page->index != (unsigned long)(end_offs >> PAGE_CACHE_SHIFT))
183 return;
184 /* This is the last page: set PG_uptodate if we cover the entire
185 * extent of the data, then zero the rest of the page.
186 */
187 if (count == (unsigned int)(end_offs & (PAGE_CACHE_SIZE - 1)) + 1) {
188 memclear_highpage_flush(page, count, PAGE_CACHE_SIZE - count); 209 memclear_highpage_flush(page, count, PAGE_CACHE_SIZE - count);
189 SetPageUptodate(page); 210 SetPageUptodate(page);
190 }
191} 211}
192 212
193/* 213static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
194 * Write a page synchronously.
195 * Offset is the data offset within the page.
196 */
197static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
198 struct page *page, unsigned int offset, unsigned int count,
199 int how)
200{
201 unsigned int wsize = NFS_SERVER(inode)->wsize;
202 int result, written = 0;
203 struct nfs_write_data *wdata;
204
205 wdata = nfs_writedata_alloc(wsize);
206 if (!wdata)
207 return -ENOMEM;
208
209 wdata->flags = how;
210 wdata->cred = ctx->cred;
211 wdata->inode = inode;
212 wdata->args.fh = NFS_FH(inode);
213 wdata->args.context = ctx;
214 wdata->args.pages = &page;
215 wdata->args.stable = NFS_FILE_SYNC;
216 wdata->args.pgbase = offset;
217 wdata->args.count = wsize;
218 wdata->res.fattr = &wdata->fattr;
219 wdata->res.verf = &wdata->verf;
220
221 dprintk("NFS: nfs_writepage_sync(%s/%Ld %d@%Ld)\n",
222 inode->i_sb->s_id,
223 (long long)NFS_FILEID(inode),
224 count, (long long)(page_offset(page) + offset));
225
226 set_page_writeback(page);
227 nfs_begin_data_update(inode);
228 do {
229 if (count < wsize)
230 wdata->args.count = count;
231 wdata->args.offset = page_offset(page) + wdata->args.pgbase;
232
233 result = NFS_PROTO(inode)->write(wdata);
234
235 if (result < 0) {
236 /* Must mark the page invalid after I/O error */
237 ClearPageUptodate(page);
238 goto io_error;
239 }
240 if (result < wdata->args.count)
241 printk(KERN_WARNING "NFS: short write, count=%u, result=%d\n",
242 wdata->args.count, result);
243
244 wdata->args.offset += result;
245 wdata->args.pgbase += result;
246 written += result;
247 count -= result;
248 nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, result);
249 } while (count);
250 /* Update file length */
251 nfs_grow_file(page, offset, written);
252 /* Set the PG_uptodate flag? */
253 nfs_mark_uptodate(page, offset, written);
254
255 if (PageError(page))
256 ClearPageError(page);
257
258io_error:
259 nfs_end_data_update(inode);
260 end_page_writeback(page);
261 nfs_writedata_free(wdata);
262 return written ? written : result;
263}
264
265static int nfs_writepage_async(struct nfs_open_context *ctx,
266 struct inode *inode, struct page *page,
267 unsigned int offset, unsigned int count) 214 unsigned int offset, unsigned int count)
268{ 215{
269 struct nfs_page *req; 216 struct nfs_page *req;
217 int ret;
270 218
271 req = nfs_update_request(ctx, inode, page, offset, count); 219 for (;;) {
272 if (IS_ERR(req)) 220 req = nfs_update_request(ctx, page, offset, count);
273 return PTR_ERR(req); 221 if (!IS_ERR(req))
222 break;
223 ret = PTR_ERR(req);
224 if (ret != -EBUSY)
225 return ret;
226 ret = nfs_wb_page(page->mapping->host, page);
227 if (ret != 0)
228 return ret;
229 }
274 /* Update file length */ 230 /* Update file length */
275 nfs_grow_file(page, offset, count); 231 nfs_grow_file(page, offset, count);
276 /* Set the PG_uptodate flag? */ 232 /* Set the PG_uptodate flag? */
@@ -289,73 +245,94 @@ static int wb_priority(struct writeback_control *wbc)
289} 245}
290 246
291/* 247/*
248 * Find an associated nfs write request, and prepare to flush it out
249 * Returns 1 if there was no write request, or if the request was
250 * already tagged by nfs_set_page_dirty.Returns 0 if the request
251 * was not tagged.
252 * May also return an error if the user signalled nfs_wait_on_request().
253 */
254static int nfs_page_mark_flush(struct page *page)
255{
256 struct nfs_page *req;
257 spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock;
258 int ret;
259
260 spin_lock(req_lock);
261 for(;;) {
262 req = nfs_page_find_request_locked(page);
263 if (req == NULL) {
264 spin_unlock(req_lock);
265 return 1;
266 }
267 if (nfs_lock_request_dontget(req))
268 break;
269 /* Note: If we hold the page lock, as is the case in nfs_writepage,
270 * then the call to nfs_lock_request_dontget() will always
271 * succeed provided that someone hasn't already marked the
272 * request as dirty (in which case we don't care).
273 */
274 spin_unlock(req_lock);
275 ret = nfs_wait_on_request(req);
276 nfs_release_request(req);
277 if (ret != 0)
278 return ret;
279 spin_lock(req_lock);
280 }
281 spin_unlock(req_lock);
282 if (test_and_set_bit(PG_FLUSHING, &req->wb_flags) == 0) {
283 nfs_mark_request_dirty(req);
284 set_page_writeback(page);
285 }
286 ret = test_bit(PG_NEED_FLUSH, &req->wb_flags);
287 nfs_unlock_request(req);
288 return ret;
289}
290
291/*
292 * Write an mmapped page to the server. 292 * Write an mmapped page to the server.
293 */ 293 */
294int nfs_writepage(struct page *page, struct writeback_control *wbc) 294static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc)
295{ 295{
296 struct nfs_open_context *ctx; 296 struct nfs_open_context *ctx;
297 struct inode *inode = page->mapping->host; 297 struct inode *inode = page->mapping->host;
298 unsigned long end_index; 298 unsigned offset;
299 unsigned offset = PAGE_CACHE_SIZE;
300 loff_t i_size = i_size_read(inode);
301 int inode_referenced = 0;
302 int priority = wb_priority(wbc);
303 int err; 299 int err;
304 300
305 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); 301 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
306 nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); 302 nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
307 303
308 /* 304 err = nfs_page_mark_flush(page);
309 * Note: We need to ensure that we have a reference to the inode 305 if (err <= 0)
310 * if we are to do asynchronous writes. If not, waiting 306 goto out;
311 * in nfs_wait_on_request() may deadlock with clear_inode(). 307 err = 0;
312 * 308 offset = nfs_page_length(page);
313 * If igrab() fails here, then it is in any case safe to 309 if (!offset)
314 * call nfs_wb_page(), since there will be no pending writes.
315 */
316 if (igrab(inode) != 0)
317 inode_referenced = 1;
318 end_index = i_size >> PAGE_CACHE_SHIFT;
319
320 /* Ensure we've flushed out any previous writes */
321 nfs_wb_page_priority(inode, page, priority);
322
323 /* easy case */
324 if (page->index < end_index)
325 goto do_it;
326 /* things got complicated... */
327 offset = i_size & (PAGE_CACHE_SIZE-1);
328
329 /* OK, are we completely out? */
330 err = 0; /* potential race with truncate - ignore */
331 if (page->index >= end_index+1 || !offset)
332 goto out; 310 goto out;
333do_it: 311
334 ctx = nfs_find_open_context(inode, NULL, FMODE_WRITE); 312 ctx = nfs_find_open_context(inode, NULL, FMODE_WRITE);
335 if (ctx == NULL) { 313 if (ctx == NULL) {
336 err = -EBADF; 314 err = -EBADF;
337 goto out; 315 goto out;
338 } 316 }
339 lock_kernel(); 317 err = nfs_writepage_setup(ctx, page, 0, offset);
340 if (!IS_SYNC(inode) && inode_referenced) {
341 err = nfs_writepage_async(ctx, inode, page, 0, offset);
342 if (!wbc->for_writepages)
343 nfs_flush_inode(inode, 0, 0, wb_priority(wbc));
344 } else {
345 err = nfs_writepage_sync(ctx, inode, page, 0,
346 offset, priority);
347 if (err >= 0) {
348 if (err != offset)
349 redirty_page_for_writepage(wbc, page);
350 err = 0;
351 }
352 }
353 unlock_kernel();
354 put_nfs_open_context(ctx); 318 put_nfs_open_context(ctx);
319 if (err != 0)
320 goto out;
321 err = nfs_page_mark_flush(page);
322 if (err > 0)
323 err = 0;
355out: 324out:
325 if (!wbc->for_writepages)
326 nfs_flush_mapping(page->mapping, wbc, wb_priority(wbc));
327 return err;
328}
329
330int nfs_writepage(struct page *page, struct writeback_control *wbc)
331{
332 int err;
333
334 err = nfs_writepage_locked(page, wbc);
356 unlock_page(page); 335 unlock_page(page);
357 if (inode_referenced)
358 iput(inode);
359 return err; 336 return err;
360} 337}
361 338
@@ -379,21 +356,18 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
379 return 0; 356 return 0;
380 nfs_wait_on_write_congestion(mapping, 0); 357 nfs_wait_on_write_congestion(mapping, 0);
381 } 358 }
382 err = nfs_flush_inode(inode, 0, 0, wb_priority(wbc)); 359 err = nfs_flush_mapping(mapping, wbc, wb_priority(wbc));
383 if (err < 0) 360 if (err < 0)
384 goto out; 361 goto out;
385 nfs_add_stats(inode, NFSIOS_WRITEPAGES, err); 362 nfs_add_stats(inode, NFSIOS_WRITEPAGES, err);
386 wbc->nr_to_write -= err;
387 if (!wbc->nonblocking && wbc->sync_mode == WB_SYNC_ALL) { 363 if (!wbc->nonblocking && wbc->sync_mode == WB_SYNC_ALL) {
388 err = nfs_wait_on_requests(inode, 0, 0); 364 err = nfs_wait_on_requests(inode, 0, 0);
389 if (err < 0) 365 if (err < 0)
390 goto out; 366 goto out;
391 } 367 }
392 err = nfs_commit_inode(inode, wb_priority(wbc)); 368 err = nfs_commit_inode(inode, wb_priority(wbc));
393 if (err > 0) { 369 if (err > 0)
394 wbc->nr_to_write -= err;
395 err = 0; 370 err = 0;
396 }
397out: 371out:
398 clear_bit(BDI_write_congested, &bdi->state); 372 clear_bit(BDI_write_congested, &bdi->state);
399 wake_up_all(&nfs_write_congestion); 373 wake_up_all(&nfs_write_congestion);
@@ -420,6 +394,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
420 nfsi->change_attr++; 394 nfsi->change_attr++;
421 } 395 }
422 SetPagePrivate(req->wb_page); 396 SetPagePrivate(req->wb_page);
397 set_page_private(req->wb_page, (unsigned long)req);
423 nfsi->npages++; 398 nfsi->npages++;
424 atomic_inc(&req->wb_count); 399 atomic_inc(&req->wb_count);
425 return 0; 400 return 0;
@@ -436,6 +411,7 @@ static void nfs_inode_remove_request(struct nfs_page *req)
436 BUG_ON (!NFS_WBACK_BUSY(req)); 411 BUG_ON (!NFS_WBACK_BUSY(req));
437 412
438 spin_lock(&nfsi->req_lock); 413 spin_lock(&nfsi->req_lock);
414 set_page_private(req->wb_page, 0);
439 ClearPagePrivate(req->wb_page); 415 ClearPagePrivate(req->wb_page);
440 radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); 416 radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
441 nfsi->npages--; 417 nfsi->npages--;
@@ -450,33 +426,6 @@ static void nfs_inode_remove_request(struct nfs_page *req)
450} 426}
451 427
452/* 428/*
453 * Find a request
454 */
455static inline struct nfs_page *
456_nfs_find_request(struct inode *inode, unsigned long index)
457{
458 struct nfs_inode *nfsi = NFS_I(inode);
459 struct nfs_page *req;
460
461 req = (struct nfs_page*)radix_tree_lookup(&nfsi->nfs_page_tree, index);
462 if (req)
463 atomic_inc(&req->wb_count);
464 return req;
465}
466
467static struct nfs_page *
468nfs_find_request(struct inode *inode, unsigned long index)
469{
470 struct nfs_page *req;
471 struct nfs_inode *nfsi = NFS_I(inode);
472
473 spin_lock(&nfsi->req_lock);
474 req = _nfs_find_request(inode, index);
475 spin_unlock(&nfsi->req_lock);
476 return req;
477}
478
479/*
480 * Add a request to the inode's dirty list. 429 * Add a request to the inode's dirty list.
481 */ 430 */
482static void 431static void
@@ -491,8 +440,14 @@ nfs_mark_request_dirty(struct nfs_page *req)
491 nfs_list_add_request(req, &nfsi->dirty); 440 nfs_list_add_request(req, &nfsi->dirty);
492 nfsi->ndirty++; 441 nfsi->ndirty++;
493 spin_unlock(&nfsi->req_lock); 442 spin_unlock(&nfsi->req_lock);
494 inc_zone_page_state(req->wb_page, NR_FILE_DIRTY); 443 __mark_inode_dirty(inode, I_DIRTY_PAGES);
495 mark_inode_dirty(inode); 444}
445
446static void
447nfs_redirty_request(struct nfs_page *req)
448{
449 clear_bit(PG_FLUSHING, &req->wb_flags);
450 __set_page_dirty_nobuffers(req->wb_page);
496} 451}
497 452
498/* 453/*
@@ -501,8 +456,7 @@ nfs_mark_request_dirty(struct nfs_page *req)
501static inline int 456static inline int
502nfs_dirty_request(struct nfs_page *req) 457nfs_dirty_request(struct nfs_page *req)
503{ 458{
504 struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); 459 return test_bit(PG_FLUSHING, &req->wb_flags) == 0;
505 return !list_empty(&req->wb_list) && req->wb_list_head == &nfsi->dirty;
506} 460}
507 461
508#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 462#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
@@ -520,7 +474,7 @@ nfs_mark_request_commit(struct nfs_page *req)
520 nfsi->ncommit++; 474 nfsi->ncommit++;
521 spin_unlock(&nfsi->req_lock); 475 spin_unlock(&nfsi->req_lock);
522 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 476 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
523 mark_inode_dirty(inode); 477 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
524} 478}
525#endif 479#endif
526 480
@@ -597,31 +551,6 @@ static void nfs_cancel_commit_list(struct list_head *head)
597 } 551 }
598} 552}
599 553
600/*
601 * nfs_scan_dirty - Scan an inode for dirty requests
602 * @inode: NFS inode to scan
603 * @dst: destination list
604 * @idx_start: lower bound of page->index to scan.
605 * @npages: idx_start + npages sets the upper bound to scan.
606 *
607 * Moves requests from the inode's dirty page list.
608 * The requests are *not* checked to ensure that they form a contiguous set.
609 */
610static int
611nfs_scan_dirty(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
612{
613 struct nfs_inode *nfsi = NFS_I(inode);
614 int res = 0;
615
616 if (nfsi->ndirty != 0) {
617 res = nfs_scan_lock_dirty(nfsi, dst, idx_start, npages);
618 nfsi->ndirty -= res;
619 if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty))
620 printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n");
621 }
622 return res;
623}
624
625#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 554#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
626/* 555/*
627 * nfs_scan_commit - Scan an inode for commit requests 556 * nfs_scan_commit - Scan an inode for commit requests
@@ -698,27 +627,27 @@ static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr)
698 * Note: Should always be called with the Page Lock held! 627 * Note: Should always be called with the Page Lock held!
699 */ 628 */
700static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, 629static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
701 struct inode *inode, struct page *page, 630 struct page *page, unsigned int offset, unsigned int bytes)
702 unsigned int offset, unsigned int bytes)
703{ 631{
704 struct nfs_server *server = NFS_SERVER(inode); 632 struct inode *inode = page->mapping->host;
705 struct nfs_inode *nfsi = NFS_I(inode); 633 struct nfs_inode *nfsi = NFS_I(inode);
706 struct nfs_page *req, *new = NULL; 634 struct nfs_page *req, *new = NULL;
707 unsigned long rqend, end; 635 unsigned long rqend, end;
708 636
709 end = offset + bytes; 637 end = offset + bytes;
710 638
711 if (nfs_wait_on_write_congestion(page->mapping, server->flags & NFS_MOUNT_INTR)) 639 if (nfs_wait_on_write_congestion(page->mapping, NFS_SERVER(inode)->flags & NFS_MOUNT_INTR))
712 return ERR_PTR(-ERESTARTSYS); 640 return ERR_PTR(-ERESTARTSYS);
713 for (;;) { 641 for (;;) {
714 /* Loop over all inode entries and see if we find 642 /* Loop over all inode entries and see if we find
715 * A request for the page we wish to update 643 * A request for the page we wish to update
716 */ 644 */
717 spin_lock(&nfsi->req_lock); 645 spin_lock(&nfsi->req_lock);
718 req = _nfs_find_request(inode, page->index); 646 req = nfs_page_find_request_locked(page);
719 if (req) { 647 if (req) {
720 if (!nfs_lock_request_dontget(req)) { 648 if (!nfs_lock_request_dontget(req)) {
721 int error; 649 int error;
650
722 spin_unlock(&nfsi->req_lock); 651 spin_unlock(&nfsi->req_lock);
723 error = nfs_wait_on_request(req); 652 error = nfs_wait_on_request(req);
724 nfs_release_request(req); 653 nfs_release_request(req);
@@ -745,7 +674,6 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
745 return ERR_PTR(error); 674 return ERR_PTR(error);
746 } 675 }
747 spin_unlock(&nfsi->req_lock); 676 spin_unlock(&nfsi->req_lock);
748 nfs_mark_request_dirty(new);
749 return new; 677 return new;
750 } 678 }
751 spin_unlock(&nfsi->req_lock); 679 spin_unlock(&nfsi->req_lock);
@@ -786,9 +714,8 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
786int nfs_flush_incompatible(struct file *file, struct page *page) 714int nfs_flush_incompatible(struct file *file, struct page *page)
787{ 715{
788 struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; 716 struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
789 struct inode *inode = page->mapping->host;
790 struct nfs_page *req; 717 struct nfs_page *req;
791 int status = 0; 718 int do_flush, status;
792 /* 719 /*
793 * Look for a request corresponding to this page. If there 720 * Look for a request corresponding to this page. If there
794 * is one, and it belongs to another file, we flush it out 721 * is one, and it belongs to another file, we flush it out
@@ -797,13 +724,18 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
797 * Also do the same if we find a request from an existing 724 * Also do the same if we find a request from an existing
798 * dropped page. 725 * dropped page.
799 */ 726 */
800 req = nfs_find_request(inode, page->index); 727 do {
801 if (req) { 728 req = nfs_page_find_request(page);
802 if (req->wb_page != page || ctx != req->wb_context) 729 if (req == NULL)
803 status = nfs_wb_page(inode, page); 730 return 0;
731 do_flush = req->wb_page != page || req->wb_context != ctx
732 || !nfs_dirty_request(req);
804 nfs_release_request(req); 733 nfs_release_request(req);
805 } 734 if (!do_flush)
806 return (status < 0) ? status : 0; 735 return 0;
736 status = nfs_wb_page(page->mapping->host, page);
737 } while (status == 0);
738 return status;
807} 739}
808 740
809/* 741/*
@@ -817,7 +749,6 @@ int nfs_updatepage(struct file *file, struct page *page,
817{ 749{
818 struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; 750 struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
819 struct inode *inode = page->mapping->host; 751 struct inode *inode = page->mapping->host;
820 struct nfs_page *req;
821 int status = 0; 752 int status = 0;
822 753
823 nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); 754 nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
@@ -827,62 +758,18 @@ int nfs_updatepage(struct file *file, struct page *page,
827 file->f_dentry->d_name.name, count, 758 file->f_dentry->d_name.name, count,
828 (long long)(page_offset(page) +offset)); 759 (long long)(page_offset(page) +offset));
829 760
830 if (IS_SYNC(inode)) {
831 status = nfs_writepage_sync(ctx, inode, page, offset, count, 0);
832 if (status > 0) {
833 if (offset == 0 && status == PAGE_CACHE_SIZE)
834 SetPageUptodate(page);
835 return 0;
836 }
837 return status;
838 }
839
840 /* If we're not using byte range locks, and we know the page 761 /* If we're not using byte range locks, and we know the page
841 * is entirely in cache, it may be more efficient to avoid 762 * is entirely in cache, it may be more efficient to avoid
842 * fragmenting write requests. 763 * fragmenting write requests.
843 */ 764 */
844 if (PageUptodate(page) && inode->i_flock == NULL && !(file->f_mode & O_SYNC)) { 765 if (PageUptodate(page) && inode->i_flock == NULL && !(file->f_mode & O_SYNC)) {
845 loff_t end_offs = i_size_read(inode) - 1; 766 count = max(count + offset, nfs_page_length(page));
846 unsigned long end_index = end_offs >> PAGE_CACHE_SHIFT;
847
848 count += offset;
849 offset = 0; 767 offset = 0;
850 if (unlikely(end_offs < 0)) {
851 /* Do nothing */
852 } else if (page->index == end_index) {
853 unsigned int pglen;
854 pglen = (unsigned int)(end_offs & (PAGE_CACHE_SIZE-1)) + 1;
855 if (count < pglen)
856 count = pglen;
857 } else if (page->index < end_index)
858 count = PAGE_CACHE_SIZE;
859 } 768 }
860 769
861 /* 770 status = nfs_writepage_setup(ctx, page, offset, count);
862 * Try to find an NFS request corresponding to this page 771 __set_page_dirty_nobuffers(page);
863 * and update it.
864 * If the existing request cannot be updated, we must flush
865 * it out now.
866 */
867 do {
868 req = nfs_update_request(ctx, inode, page, offset, count);
869 status = (IS_ERR(req)) ? PTR_ERR(req) : 0;
870 if (status != -EBUSY)
871 break;
872 /* Request could not be updated. Flush it out and try again */
873 status = nfs_wb_page(inode, page);
874 } while (status >= 0);
875 if (status < 0)
876 goto done;
877
878 status = 0;
879 772
880 /* Update file length */
881 nfs_grow_file(page, offset, count);
882 /* Set the PG_uptodate flag? */
883 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
884 nfs_unlock_request(req);
885done:
886 dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n", 773 dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n",
887 status, (long long)i_size_read(inode)); 774 status, (long long)i_size_read(inode));
888 if (status < 0) 775 if (status < 0)
@@ -897,7 +784,7 @@ static void nfs_writepage_release(struct nfs_page *req)
897#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 784#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
898 if (!PageError(req->wb_page)) { 785 if (!PageError(req->wb_page)) {
899 if (NFS_NEED_RESCHED(req)) { 786 if (NFS_NEED_RESCHED(req)) {
900 nfs_mark_request_dirty(req); 787 nfs_redirty_request(req);
901 goto out; 788 goto out;
902 } else if (NFS_NEED_COMMIT(req)) { 789 } else if (NFS_NEED_COMMIT(req)) {
903 nfs_mark_request_commit(req); 790 nfs_mark_request_commit(req);
@@ -979,9 +866,7 @@ static void nfs_execute_write(struct nfs_write_data *data)
979 sigset_t oldset; 866 sigset_t oldset;
980 867
981 rpc_clnt_sigmask(clnt, &oldset); 868 rpc_clnt_sigmask(clnt, &oldset);
982 lock_kernel();
983 rpc_execute(&data->task); 869 rpc_execute(&data->task);
984 unlock_kernel();
985 rpc_clnt_sigunmask(clnt, &oldset); 870 rpc_clnt_sigunmask(clnt, &oldset);
986} 871}
987 872
@@ -1015,7 +900,6 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how)
1015 atomic_set(&req->wb_complete, requests); 900 atomic_set(&req->wb_complete, requests);
1016 901
1017 ClearPageError(page); 902 ClearPageError(page);
1018 set_page_writeback(page);
1019 offset = 0; 903 offset = 0;
1020 nbytes = req->wb_bytes; 904 nbytes = req->wb_bytes;
1021 do { 905 do {
@@ -1043,9 +927,9 @@ out_bad:
1043 while (!list_empty(&list)) { 927 while (!list_empty(&list)) {
1044 data = list_entry(list.next, struct nfs_write_data, pages); 928 data = list_entry(list.next, struct nfs_write_data, pages);
1045 list_del(&data->pages); 929 list_del(&data->pages);
1046 nfs_writedata_free(data); 930 nfs_writedata_release(data);
1047 } 931 }
1048 nfs_mark_request_dirty(req); 932 nfs_redirty_request(req);
1049 nfs_clear_page_writeback(req); 933 nfs_clear_page_writeback(req);
1050 return -ENOMEM; 934 return -ENOMEM;
1051} 935}
@@ -1076,7 +960,6 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, int how)
1076 nfs_list_remove_request(req); 960 nfs_list_remove_request(req);
1077 nfs_list_add_request(req, &data->pages); 961 nfs_list_add_request(req, &data->pages);
1078 ClearPageError(req->wb_page); 962 ClearPageError(req->wb_page);
1079 set_page_writeback(req->wb_page);
1080 *pages++ = req->wb_page; 963 *pages++ = req->wb_page;
1081 count += req->wb_bytes; 964 count += req->wb_bytes;
1082 } 965 }
@@ -1091,7 +974,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, int how)
1091 while (!list_empty(head)) { 974 while (!list_empty(head)) {
1092 struct nfs_page *req = nfs_list_entry(head->next); 975 struct nfs_page *req = nfs_list_entry(head->next);
1093 nfs_list_remove_request(req); 976 nfs_list_remove_request(req);
1094 nfs_mark_request_dirty(req); 977 nfs_redirty_request(req);
1095 nfs_clear_page_writeback(req); 978 nfs_clear_page_writeback(req);
1096 } 979 }
1097 return -ENOMEM; 980 return -ENOMEM;
@@ -1126,7 +1009,7 @@ out_err:
1126 while (!list_empty(head)) { 1009 while (!list_empty(head)) {
1127 req = nfs_list_entry(head->next); 1010 req = nfs_list_entry(head->next);
1128 nfs_list_remove_request(req); 1011 nfs_list_remove_request(req);
1129 nfs_mark_request_dirty(req); 1012 nfs_redirty_request(req);
1130 nfs_clear_page_writeback(req); 1013 nfs_clear_page_writeback(req);
1131 } 1014 }
1132 return error; 1015 return error;
@@ -1442,7 +1325,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
1442 } 1325 }
1443 /* We have a mismatch. Write the page again */ 1326 /* We have a mismatch. Write the page again */
1444 dprintk(" mismatch\n"); 1327 dprintk(" mismatch\n");
1445 nfs_mark_request_dirty(req); 1328 nfs_redirty_request(req);
1446 next: 1329 next:
1447 nfs_clear_page_writeback(req); 1330 nfs_clear_page_writeback(req);
1448 } 1331 }
@@ -1459,18 +1342,17 @@ static inline int nfs_commit_list(struct inode *inode, struct list_head *head, i
1459} 1342}
1460#endif 1343#endif
1461 1344
1462static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, 1345static long nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how)
1463 unsigned int npages, int how)
1464{ 1346{
1465 struct nfs_inode *nfsi = NFS_I(inode); 1347 struct nfs_inode *nfsi = NFS_I(mapping->host);
1466 LIST_HEAD(head); 1348 LIST_HEAD(head);
1467 int res; 1349 long res;
1468 1350
1469 spin_lock(&nfsi->req_lock); 1351 spin_lock(&nfsi->req_lock);
1470 res = nfs_scan_dirty(inode, &head, idx_start, npages); 1352 res = nfs_scan_dirty(mapping, wbc, &head);
1471 spin_unlock(&nfsi->req_lock); 1353 spin_unlock(&nfsi->req_lock);
1472 if (res) { 1354 if (res) {
1473 int error = nfs_flush_list(inode, &head, res, how); 1355 int error = nfs_flush_list(mapping->host, &head, res, how);
1474 if (error < 0) 1356 if (error < 0)
1475 return error; 1357 return error;
1476 } 1358 }
@@ -1496,38 +1378,62 @@ int nfs_commit_inode(struct inode *inode, int how)
1496} 1378}
1497#endif 1379#endif
1498 1380
1499int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start, 1381long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how)
1500 unsigned int npages, int how)
1501{ 1382{
1383 struct inode *inode = mapping->host;
1502 struct nfs_inode *nfsi = NFS_I(inode); 1384 struct nfs_inode *nfsi = NFS_I(inode);
1385 unsigned long idx_start, idx_end;
1386 unsigned int npages = 0;
1503 LIST_HEAD(head); 1387 LIST_HEAD(head);
1504 int nocommit = how & FLUSH_NOCOMMIT; 1388 int nocommit = how & FLUSH_NOCOMMIT;
1505 int pages, ret; 1389 long pages, ret;
1506 1390
1391 /* FIXME */
1392 if (wbc->range_cyclic)
1393 idx_start = 0;
1394 else {
1395 idx_start = wbc->range_start >> PAGE_CACHE_SHIFT;
1396 idx_end = wbc->range_end >> PAGE_CACHE_SHIFT;
1397 if (idx_end > idx_start) {
1398 unsigned long l_npages = 1 + idx_end - idx_start;
1399 npages = l_npages;
1400 if (sizeof(npages) != sizeof(l_npages) &&
1401 (unsigned long)npages != l_npages)
1402 npages = 0;
1403 }
1404 }
1507 how &= ~FLUSH_NOCOMMIT; 1405 how &= ~FLUSH_NOCOMMIT;
1508 spin_lock(&nfsi->req_lock); 1406 spin_lock(&nfsi->req_lock);
1509 do { 1407 do {
1408 wbc->pages_skipped = 0;
1510 ret = nfs_wait_on_requests_locked(inode, idx_start, npages); 1409 ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
1511 if (ret != 0) 1410 if (ret != 0)
1512 continue; 1411 continue;
1513 pages = nfs_scan_dirty(inode, &head, idx_start, npages); 1412 pages = nfs_scan_dirty(mapping, wbc, &head);
1514 if (pages != 0) { 1413 if (pages != 0) {
1515 spin_unlock(&nfsi->req_lock); 1414 spin_unlock(&nfsi->req_lock);
1516 if (how & FLUSH_INVALIDATE) 1415 if (how & FLUSH_INVALIDATE) {
1517 nfs_cancel_dirty_list(&head); 1416 nfs_cancel_dirty_list(&head);
1518 else 1417 ret = pages;
1418 } else
1519 ret = nfs_flush_list(inode, &head, pages, how); 1419 ret = nfs_flush_list(inode, &head, pages, how);
1520 spin_lock(&nfsi->req_lock); 1420 spin_lock(&nfsi->req_lock);
1521 continue; 1421 continue;
1522 } 1422 }
1423 if (wbc->pages_skipped != 0)
1424 continue;
1523 if (nocommit) 1425 if (nocommit)
1524 break; 1426 break;
1525 pages = nfs_scan_commit(inode, &head, idx_start, npages); 1427 pages = nfs_scan_commit(inode, &head, idx_start, npages);
1526 if (pages == 0) 1428 if (pages == 0) {
1429 if (wbc->pages_skipped != 0)
1430 continue;
1527 break; 1431 break;
1432 }
1528 if (how & FLUSH_INVALIDATE) { 1433 if (how & FLUSH_INVALIDATE) {
1529 spin_unlock(&nfsi->req_lock); 1434 spin_unlock(&nfsi->req_lock);
1530 nfs_cancel_commit_list(&head); 1435 nfs_cancel_commit_list(&head);
1436 ret = pages;
1531 spin_lock(&nfsi->req_lock); 1437 spin_lock(&nfsi->req_lock);
1532 continue; 1438 continue;
1533 } 1439 }
@@ -1540,6 +1446,106 @@ int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start,
1540 return ret; 1446 return ret;
1541} 1447}
1542 1448
1449/*
1450 * flush the inode to disk.
1451 */
1452int nfs_wb_all(struct inode *inode)
1453{
1454 struct address_space *mapping = inode->i_mapping;
1455 struct writeback_control wbc = {
1456 .bdi = mapping->backing_dev_info,
1457 .sync_mode = WB_SYNC_ALL,
1458 .nr_to_write = LONG_MAX,
1459 .for_writepages = 1,
1460 .range_cyclic = 1,
1461 };
1462 int ret;
1463
1464 ret = generic_writepages(mapping, &wbc);
1465 if (ret < 0)
1466 goto out;
1467 ret = nfs_sync_mapping_wait(mapping, &wbc, 0);
1468 if (ret >= 0)
1469 return 0;
1470out:
1471 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
1472 return ret;
1473}
1474
1475int nfs_sync_mapping_range(struct address_space *mapping, loff_t range_start, loff_t range_end, int how)
1476{
1477 struct writeback_control wbc = {
1478 .bdi = mapping->backing_dev_info,
1479 .sync_mode = WB_SYNC_ALL,
1480 .nr_to_write = LONG_MAX,
1481 .range_start = range_start,
1482 .range_end = range_end,
1483 .for_writepages = 1,
1484 };
1485 int ret;
1486
1487 if (!(how & FLUSH_NOWRITEPAGE)) {
1488 ret = generic_writepages(mapping, &wbc);
1489 if (ret < 0)
1490 goto out;
1491 }
1492 ret = nfs_sync_mapping_wait(mapping, &wbc, how);
1493 if (ret >= 0)
1494 return 0;
1495out:
1496 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
1497 return ret;
1498}
1499
1500int nfs_wb_page_priority(struct inode *inode, struct page *page, int how)
1501{
1502 loff_t range_start = page_offset(page);
1503 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
1504 struct writeback_control wbc = {
1505 .bdi = page->mapping->backing_dev_info,
1506 .sync_mode = WB_SYNC_ALL,
1507 .nr_to_write = LONG_MAX,
1508 .range_start = range_start,
1509 .range_end = range_end,
1510 };
1511 int ret;
1512
1513 BUG_ON(!PageLocked(page));
1514 if (!(how & FLUSH_NOWRITEPAGE) && clear_page_dirty_for_io(page)) {
1515 ret = nfs_writepage_locked(page, &wbc);
1516 if (ret < 0)
1517 goto out;
1518 }
1519 ret = nfs_sync_mapping_wait(page->mapping, &wbc, how);
1520 if (ret >= 0)
1521 return 0;
1522out:
1523 __mark_inode_dirty(inode, I_DIRTY_PAGES);
1524 return ret;
1525}
1526
1527/*
1528 * Write back all requests on one page - we do this before reading it.
1529 */
1530int nfs_wb_page(struct inode *inode, struct page* page)
1531{
1532 return nfs_wb_page_priority(inode, page, FLUSH_STABLE);
1533}
1534
1535int nfs_set_page_dirty(struct page *page)
1536{
1537 struct nfs_page *req;
1538
1539 req = nfs_page_find_request(page);
1540 if (req != NULL) {
1541 /* Mark any existing write requests for flushing */
1542 set_bit(PG_NEED_FLUSH, &req->wb_flags);
1543 nfs_release_request(req);
1544 }
1545 return __set_page_dirty_nobuffers(page);
1546}
1547
1548
1543int __init nfs_init_writepagecache(void) 1549int __init nfs_init_writepagecache(void)
1544{ 1550{
1545 nfs_wdata_cachep = kmem_cache_create("nfs_write_data", 1551 nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 625ffea98561..04963063e620 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -33,6 +33,7 @@
33#define FLUSH_HIGHPRI 16 /* high priority memory reclaim flush */ 33#define FLUSH_HIGHPRI 16 /* high priority memory reclaim flush */
34#define FLUSH_NOCOMMIT 32 /* Don't send the NFSv3/v4 COMMIT */ 34#define FLUSH_NOCOMMIT 32 /* Don't send the NFSv3/v4 COMMIT */
35#define FLUSH_INVALIDATE 64 /* Invalidate the page cache */ 35#define FLUSH_INVALIDATE 64 /* Invalidate the page cache */
36#define FLUSH_NOWRITEPAGE 128 /* Don't call writepage() */
36 37
37#ifdef __KERNEL__ 38#ifdef __KERNEL__
38 39
@@ -427,19 +428,21 @@ extern int nfs_flush_incompatible(struct file *file, struct page *page);
427extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int); 428extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
428extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); 429extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
429extern void nfs_writedata_release(void *); 430extern void nfs_writedata_release(void *);
430 431extern int nfs_set_page_dirty(struct page *);
431#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
432struct nfs_write_data *nfs_commit_alloc(void);
433void nfs_commit_free(struct nfs_write_data *p);
434#endif
435 432
436/* 433/*
437 * Try to write back everything synchronously (but check the 434 * Try to write back everything synchronously (but check the
438 * return value!) 435 * return value!)
439 */ 436 */
440extern int nfs_sync_inode_wait(struct inode *, unsigned long, unsigned int, int); 437extern long nfs_sync_mapping_wait(struct address_space *, struct writeback_control *, int);
438extern int nfs_sync_mapping_range(struct address_space *, loff_t, loff_t, int);
439extern int nfs_wb_all(struct inode *inode);
440extern int nfs_wb_page(struct inode *inode, struct page* page);
441extern int nfs_wb_page_priority(struct inode *inode, struct page* page, int how);
441#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 442#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
442extern int nfs_commit_inode(struct inode *, int); 443extern int nfs_commit_inode(struct inode *, int);
444extern struct nfs_write_data *nfs_commit_alloc(void);
445extern void nfs_commit_free(struct nfs_write_data *wdata);
443extern void nfs_commit_release(void *wdata); 446extern void nfs_commit_release(void *wdata);
444#else 447#else
445static inline int 448static inline int
@@ -455,28 +458,6 @@ nfs_have_writebacks(struct inode *inode)
455 return NFS_I(inode)->npages != 0; 458 return NFS_I(inode)->npages != 0;
456} 459}
457 460
458static inline int
459nfs_wb_all(struct inode *inode)
460{
461 int error = nfs_sync_inode_wait(inode, 0, 0, 0);
462 return (error < 0) ? error : 0;
463}
464
465/*
466 * Write back all requests on one page - we do this before reading it.
467 */
468static inline int nfs_wb_page_priority(struct inode *inode, struct page* page, int how)
469{
470 int error = nfs_sync_inode_wait(inode, page->index, 1,
471 how | FLUSH_STABLE);
472 return (error < 0) ? error : 0;
473}
474
475static inline int nfs_wb_page(struct inode *inode, struct page* page)
476{
477 return nfs_wb_page_priority(inode, page, 0);
478}
479
480/* 461/*
481 * Allocate nfs_write_data structures 462 * Allocate nfs_write_data structures
482 */ 463 */
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 1f7bd287c230..2e555d49c9b7 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -30,6 +30,8 @@
30#define PG_BUSY 0 30#define PG_BUSY 0
31#define PG_NEED_COMMIT 1 31#define PG_NEED_COMMIT 1
32#define PG_NEED_RESCHED 2 32#define PG_NEED_RESCHED 2
33#define PG_NEED_FLUSH 3
34#define PG_FLUSHING 4
33 35
34struct nfs_inode; 36struct nfs_inode;
35struct nfs_page { 37struct nfs_page {
@@ -60,8 +62,9 @@ extern void nfs_clear_request(struct nfs_page *req);
60extern void nfs_release_request(struct nfs_page *req); 62extern void nfs_release_request(struct nfs_page *req);
61 63
62 64
63extern int nfs_scan_lock_dirty(struct nfs_inode *nfsi, struct list_head *dst, 65extern long nfs_scan_dirty(struct address_space *mapping,
64 unsigned long idx_start, unsigned int npages); 66 struct writeback_control *wbc,
67 struct list_head *dst);
65extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, struct list_head *dst, 68extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, struct list_head *dst,
66 unsigned long idx_start, unsigned int npages); 69 unsigned long idx_start, unsigned int npages);
67extern int nfs_coalesce_requests(struct list_head *, struct list_head *, 70extern int nfs_coalesce_requests(struct list_head *, struct list_head *,
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 768c1ad5ff6f..9ee9da5e1cc9 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -785,8 +785,6 @@ struct nfs_rpc_ops {
785 int (*readlink)(struct inode *, struct page *, unsigned int, 785 int (*readlink)(struct inode *, struct page *, unsigned int,
786 unsigned int); 786 unsigned int);
787 int (*read) (struct nfs_read_data *); 787 int (*read) (struct nfs_read_data *);
788 int (*write) (struct nfs_write_data *);
789 int (*commit) (struct nfs_write_data *);
790 int (*create) (struct inode *, struct dentry *, 788 int (*create) (struct inode *, struct dentry *,
791 struct iattr *, int, struct nameidata *); 789 struct iattr *, int, struct nameidata *);
792 int (*remove) (struct inode *, struct qstr *); 790 int (*remove) (struct inode *, struct qstr *);
diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h
index 97b62e97dd8d..2db2fbf34947 100644
--- a/include/linux/sunrpc/auth_gss.h
+++ b/include/linux/sunrpc/auth_gss.h
@@ -90,8 +90,6 @@ struct gss_cred {
90#define gc_flags gc_base.cr_flags 90#define gc_flags gc_base.cr_flags
91#define gc_expire gc_base.cr_expire 91#define gc_expire gc_base.cr_expire
92 92
93void print_hexl(u32 *p, u_int length, u_int offset);
94
95#endif /* __KERNEL__ */ 93#endif /* __KERNEL__ */
96#endif /* _LINUX_SUNRPC_AUTH_GSS_H */ 94#endif /* _LINUX_SUNRPC_AUTH_GSS_H */
97 95
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index f6d1d646ce05..a1be89deb3af 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -53,6 +53,7 @@ struct rpc_clnt {
53 struct dentry * cl_dentry; /* inode */ 53 struct dentry * cl_dentry; /* inode */
54 struct rpc_clnt * cl_parent; /* Points to parent of clones */ 54 struct rpc_clnt * cl_parent; /* Points to parent of clones */
55 struct rpc_rtt cl_rtt_default; 55 struct rpc_rtt cl_rtt_default;
56 struct rpc_program * cl_program;
56 char cl_inline_name[32]; 57 char cl_inline_name[32];
57}; 58};
58 59
diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h
index e4729aa67654..60fce3c92857 100644
--- a/include/linux/sunrpc/debug.h
+++ b/include/linux/sunrpc/debug.h
@@ -62,12 +62,6 @@ extern unsigned int nlm_debug;
62# define RPC_IFDEBUG(x) 62# define RPC_IFDEBUG(x)
63#endif 63#endif
64 64
65#ifdef RPC_PROFILE
66# define pprintk(args...) printk(## args)
67#else
68# define pprintk(args...) do ; while (0)
69#endif
70
71/* 65/*
72 * Sysctl interface for RPC debugging 66 * Sysctl interface for RPC debugging
73 */ 67 */
diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index e30ba201910a..5a4b1e0206e3 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -42,10 +42,6 @@
42 42
43struct krb5_ctx { 43struct krb5_ctx {
44 int initiate; /* 1 = initiating, 0 = accepting */ 44 int initiate; /* 1 = initiating, 0 = accepting */
45 int seed_init;
46 unsigned char seed[16];
47 int signalg;
48 int sealalg;
49 struct crypto_blkcipher *enc; 45 struct crypto_blkcipher *enc;
50 struct crypto_blkcipher *seq; 46 struct crypto_blkcipher *seq;
51 s32 endtime; 47 s32 endtime;
@@ -117,7 +113,7 @@ enum seal_alg {
117#define ENCTYPE_UNKNOWN 0x01ff 113#define ENCTYPE_UNKNOWN 0x01ff
118 114
119s32 115s32
120make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, 116make_checksum(char *, char *header, int hdrlen, struct xdr_buf *body,
121 int body_offset, struct xdr_netobj *cksum); 117 int body_offset, struct xdr_netobj *cksum);
122 118
123u32 gss_get_mic_kerberos(struct gss_ctx *, struct xdr_buf *, 119u32 gss_get_mic_kerberos(struct gss_ctx *, struct xdr_buf *,
diff --git a/include/linux/sunrpc/gss_spkm3.h b/include/linux/sunrpc/gss_spkm3.h
index 2cf3fbb40b4f..e3e6a3437f8b 100644
--- a/include/linux/sunrpc/gss_spkm3.h
+++ b/include/linux/sunrpc/gss_spkm3.h
@@ -12,27 +12,19 @@
12#include <linux/sunrpc/gss_asn1.h> 12#include <linux/sunrpc/gss_asn1.h>
13 13
14struct spkm3_ctx { 14struct spkm3_ctx {
15 struct xdr_netobj ctx_id; /* per message context id */ 15 struct xdr_netobj ctx_id; /* per message context id */
16 int qop; /* negotiated qop */ 16 int endtime; /* endtime of the context */
17 struct xdr_netobj mech_used; 17 struct xdr_netobj mech_used;
18 unsigned int ret_flags ; 18 unsigned int ret_flags ;
19 unsigned int req_flags ; 19 struct xdr_netobj conf_alg;
20 struct xdr_netobj share_key; 20 struct xdr_netobj derived_conf_key;
21 int conf_alg; 21 struct xdr_netobj intg_alg;
22 struct crypto_blkcipher *derived_conf_key; 22 struct xdr_netobj derived_integ_key;
23 int intg_alg;
24 struct crypto_blkcipher *derived_integ_key;
25 int keyestb_alg; /* alg used to get share_key */
26 int owf_alg; /* one way function */
27}; 23};
28 24
29/* from openssl/objects.h */ 25/* OIDs declarations for K-ALG, I-ALG, C-ALG, and OWF-ALG */
30/* XXX need SEAL_ALG_NONE */ 26extern const struct xdr_netobj hmac_md5_oid;
31#define NID_md5 4 27extern const struct xdr_netobj cast5_cbc_oid;
32#define NID_dhKeyAgreement 28
33#define NID_des_cbc 31
34#define NID_sha1 64
35#define NID_cast5_cbc 108
36 28
37/* SPKM InnerContext Token types */ 29/* SPKM InnerContext Token types */
38 30
@@ -46,11 +38,13 @@ u32 spkm3_make_token(struct spkm3_ctx *ctx, struct xdr_buf * text, struct xdr_ne
46u32 spkm3_read_token(struct spkm3_ctx *ctx, struct xdr_netobj *read_token, struct xdr_buf *message_buffer, int toktype); 38u32 spkm3_read_token(struct spkm3_ctx *ctx, struct xdr_netobj *read_token, struct xdr_buf *message_buffer, int toktype);
47 39
48#define CKSUMTYPE_RSA_MD5 0x0007 40#define CKSUMTYPE_RSA_MD5 0x0007
41#define CKSUMTYPE_HMAC_MD5 0x0008
49 42
50s32 make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, 43s32 make_spkm3_checksum(s32 cksumtype, struct xdr_netobj *key, char *header,
51 int body_offset, struct xdr_netobj *cksum); 44 unsigned int hdrlen, struct xdr_buf *body,
45 unsigned int body_offset, struct xdr_netobj *cksum);
52void asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits); 46void asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits);
53int decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen, 47int decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen,
54 int explen); 48 int explen);
55void spkm3_mic_header(unsigned char **hdrbuf, unsigned int *hdrlen, 49void spkm3_mic_header(unsigned char **hdrbuf, unsigned int *hdrlen,
56 unsigned char *ctxhdr, int elen, int zbit); 50 unsigned char *ctxhdr, int elen, int zbit);
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index f399c138f79d..b6b6ad6253b4 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -11,6 +11,7 @@
11 11
12#include <linux/timer.h> 12#include <linux/timer.h>
13#include <linux/sunrpc/types.h> 13#include <linux/sunrpc/types.h>
14#include <linux/rcupdate.h>
14#include <linux/spinlock.h> 15#include <linux/spinlock.h>
15#include <linux/wait.h> 16#include <linux/wait.h>
16#include <linux/workqueue.h> 17#include <linux/workqueue.h>
@@ -85,6 +86,7 @@ struct rpc_task {
85 union { 86 union {
86 struct work_struct tk_work; /* Async task work queue */ 87 struct work_struct tk_work; /* Async task work queue */
87 struct rpc_wait tk_wait; /* RPC wait */ 88 struct rpc_wait tk_wait; /* RPC wait */
89 struct rcu_head tk_rcu; /* for task deletion */
88 } u; 90 } u;
89 91
90 unsigned short tk_timeouts; /* maj timeouts */ 92 unsigned short tk_timeouts; /* maj timeouts */
@@ -178,13 +180,6 @@ struct rpc_call_ops {
178 } while (0) 180 } while (0)
179 181
180#define RPC_IS_ACTIVATED(t) (test_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate)) 182#define RPC_IS_ACTIVATED(t) (test_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate))
181#define rpc_set_active(t) (set_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate))
182#define rpc_clear_active(t) \
183 do { \
184 smp_mb__before_clear_bit(); \
185 clear_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate); \
186 smp_mb__after_clear_bit(); \
187 } while(0)
188 183
189/* 184/*
190 * Task priorities. 185 * Task priorities.
@@ -254,8 +249,10 @@ struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
254void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, 249void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt,
255 int flags, const struct rpc_call_ops *ops, 250 int flags, const struct rpc_call_ops *ops,
256 void *data); 251 void *data);
252void rpc_put_task(struct rpc_task *);
257void rpc_release_task(struct rpc_task *); 253void rpc_release_task(struct rpc_task *);
258void rpc_exit_task(struct rpc_task *); 254void rpc_exit_task(struct rpc_task *);
255void rpc_release_calldata(const struct rpc_call_ops *, void *);
259void rpc_killall_tasks(struct rpc_clnt *); 256void rpc_killall_tasks(struct rpc_clnt *);
260int rpc_execute(struct rpc_task *); 257int rpc_execute(struct rpc_task *);
261void rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *); 258void rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *);
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 9a527c364394..9e340fa23c06 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -11,6 +11,7 @@
11 11
12#include <linux/uio.h> 12#include <linux/uio.h>
13#include <asm/byteorder.h> 13#include <asm/byteorder.h>
14#include <linux/scatterlist.h>
14 15
15/* 16/*
16 * Buffer adjustment 17 * Buffer adjustment
@@ -139,29 +140,30 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p)
139 */ 140 */
140extern void xdr_shift_buf(struct xdr_buf *, size_t); 141extern void xdr_shift_buf(struct xdr_buf *, size_t);
141extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *); 142extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *);
142extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, int, int); 143extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int);
143extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, int); 144extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, unsigned int);
144extern int read_bytes_from_xdr_buf(struct xdr_buf *, int, void *, int); 145extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
145extern int write_bytes_to_xdr_buf(struct xdr_buf *, int, void *, int); 146extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
146 147
147/* 148/*
148 * Helper structure for copying from an sk_buff. 149 * Helper structure for copying from an sk_buff.
149 */ 150 */
150typedef struct { 151struct xdr_skb_reader {
151 struct sk_buff *skb; 152 struct sk_buff *skb;
152 unsigned int offset; 153 unsigned int offset;
153 size_t count; 154 size_t count;
154 __wsum csum; 155 __wsum csum;
155} skb_reader_t; 156};
156 157
157typedef size_t (*skb_read_actor_t)(skb_reader_t *desc, void *to, size_t len); 158typedef size_t (*xdr_skb_read_actor)(struct xdr_skb_reader *desc, void *to, size_t len);
158 159
160size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len);
159extern int csum_partial_copy_to_xdr(struct xdr_buf *, struct sk_buff *); 161extern int csum_partial_copy_to_xdr(struct xdr_buf *, struct sk_buff *);
160extern ssize_t xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int, 162extern ssize_t xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int,
161 skb_reader_t *, skb_read_actor_t); 163 struct xdr_skb_reader *, xdr_skb_read_actor);
162 164
163extern int xdr_encode_word(struct xdr_buf *, int, u32); 165extern int xdr_encode_word(struct xdr_buf *, unsigned int, u32);
164extern int xdr_decode_word(struct xdr_buf *, int, u32 *); 166extern int xdr_decode_word(struct xdr_buf *, unsigned int, u32 *);
165 167
166struct xdr_array2_desc; 168struct xdr_array2_desc;
167typedef int (*xdr_xcode_elem_t)(struct xdr_array2_desc *desc, void *elem); 169typedef int (*xdr_xcode_elem_t)(struct xdr_array2_desc *desc, void *elem);
@@ -196,6 +198,7 @@ extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32
196extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); 198extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes);
197extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len); 199extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
198extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); 200extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len);
201extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data);
199 202
200#endif /* __KERNEL__ */ 203#endif /* __KERNEL__ */
201 204
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 3e04c1512fc4..f780e72fc417 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -106,7 +106,6 @@ struct rpc_rqst {
106 106
107struct rpc_xprt_ops { 107struct rpc_xprt_ops {
108 void (*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize); 108 void (*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize);
109 char * (*print_addr)(struct rpc_xprt *xprt, enum rpc_display_format_t format);
110 int (*reserve_xprt)(struct rpc_task *task); 109 int (*reserve_xprt)(struct rpc_task *task);
111 void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); 110 void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task);
112 void (*rpcbind)(struct rpc_task *task); 111 void (*rpcbind)(struct rpc_task *task);
@@ -126,8 +125,6 @@ struct rpc_xprt_ops {
126struct rpc_xprt { 125struct rpc_xprt {
127 struct kref kref; /* Reference count */ 126 struct kref kref; /* Reference count */
128 struct rpc_xprt_ops * ops; /* transport methods */ 127 struct rpc_xprt_ops * ops; /* transport methods */
129 struct socket * sock; /* BSD socket layer */
130 struct sock * inet; /* INET layer */
131 128
132 struct rpc_timeout timeout; /* timeout parms */ 129 struct rpc_timeout timeout; /* timeout parms */
133 struct sockaddr_storage addr; /* server address */ 130 struct sockaddr_storage addr; /* server address */
@@ -137,9 +134,6 @@ struct rpc_xprt {
137 unsigned long cong; /* current congestion */ 134 unsigned long cong; /* current congestion */
138 unsigned long cwnd; /* congestion window */ 135 unsigned long cwnd; /* congestion window */
139 136
140 size_t rcvsize, /* transport rcv buffer size */
141 sndsize; /* transport send buffer size */
142
143 size_t max_payload; /* largest RPC payload size, 137 size_t max_payload; /* largest RPC payload size,
144 in bytes */ 138 in bytes */
145 unsigned int tsh_size; /* size of transport specific 139 unsigned int tsh_size; /* size of transport specific
@@ -158,27 +152,11 @@ struct rpc_xprt {
158 resvport : 1; /* use a reserved port */ 152 resvport : 1; /* use a reserved port */
159 153
160 /* 154 /*
161 * XID
162 */
163 __u32 xid; /* Next XID value to use */
164
165 /*
166 * State of TCP reply receive stuff
167 */
168 __be32 tcp_recm, /* Fragment header */
169 tcp_xid; /* Current XID */
170 u32 tcp_reclen, /* fragment length */
171 tcp_offset; /* fragment offset */
172 unsigned long tcp_copied, /* copied to request */
173 tcp_flags;
174 /*
175 * Connection of transports 155 * Connection of transports
176 */ 156 */
177 unsigned long connect_timeout, 157 unsigned long connect_timeout,
178 bind_timeout, 158 bind_timeout,
179 reestablish_timeout; 159 reestablish_timeout;
180 struct delayed_work connect_worker;
181 unsigned short port;
182 160
183 /* 161 /*
184 * Disconnection of idle transports 162 * Disconnection of idle transports
@@ -193,8 +171,8 @@ struct rpc_xprt {
193 */ 171 */
194 spinlock_t transport_lock; /* lock transport info */ 172 spinlock_t transport_lock; /* lock transport info */
195 spinlock_t reserve_lock; /* lock slot table */ 173 spinlock_t reserve_lock; /* lock slot table */
174 u32 xid; /* Next XID value to use */
196 struct rpc_task * snd_task; /* Task blocked in send */ 175 struct rpc_task * snd_task; /* Task blocked in send */
197
198 struct list_head recv; 176 struct list_head recv;
199 177
200 struct { 178 struct {
@@ -210,18 +188,9 @@ struct rpc_xprt {
210 bklog_u; /* backlog queue utilization */ 188 bklog_u; /* backlog queue utilization */
211 } stat; 189 } stat;
212 190
213 void (*old_data_ready)(struct sock *, int);
214 void (*old_state_change)(struct sock *);
215 void (*old_write_space)(struct sock *);
216
217 char * address_strings[RPC_DISPLAY_MAX]; 191 char * address_strings[RPC_DISPLAY_MAX];
218}; 192};
219 193
220#define XPRT_LAST_FRAG (1 << 0)
221#define XPRT_COPY_RECM (1 << 1)
222#define XPRT_COPY_XID (1 << 2)
223#define XPRT_COPY_DATA (1 << 3)
224
225#ifdef __KERNEL__ 194#ifdef __KERNEL__
226 195
227/* 196/*
@@ -270,8 +239,8 @@ void xprt_disconnect(struct rpc_xprt *xprt);
270/* 239/*
271 * Socket transport setup operations 240 * Socket transport setup operations
272 */ 241 */
273int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to); 242struct rpc_xprt * xs_setup_udp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to);
274int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to); 243struct rpc_xprt * xs_setup_tcp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to);
275 244
276/* 245/*
277 * Reserved bit positions in xprt->state 246 * Reserved bit positions in xprt->state
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index e5a84a482e57..a02ecc1f230d 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -68,7 +68,7 @@ static struct rpc_credops gss_credops;
68#define GSS_CRED_SLACK 1024 /* XXX: unused */ 68#define GSS_CRED_SLACK 1024 /* XXX: unused */
69/* length of a krb5 verifier (48), plus data added before arguments when 69/* length of a krb5 verifier (48), plus data added before arguments when
70 * using integrity (two 4-byte integers): */ 70 * using integrity (two 4-byte integers): */
71#define GSS_VERF_SLACK 56 71#define GSS_VERF_SLACK 100
72 72
73/* XXX this define must match the gssd define 73/* XXX this define must match the gssd define
74* as it is passed to gssd to signal the use of 74* as it is passed to gssd to signal the use of
@@ -94,46 +94,6 @@ struct gss_auth {
94static void gss_destroy_ctx(struct gss_cl_ctx *); 94static void gss_destroy_ctx(struct gss_cl_ctx *);
95static struct rpc_pipe_ops gss_upcall_ops; 95static struct rpc_pipe_ops gss_upcall_ops;
96 96
97void
98print_hexl(u32 *p, u_int length, u_int offset)
99{
100 u_int i, j, jm;
101 u8 c, *cp;
102
103 dprintk("RPC: print_hexl: length %d\n",length);
104 dprintk("\n");
105 cp = (u8 *) p;
106
107 for (i = 0; i < length; i += 0x10) {
108 dprintk(" %04x: ", (u_int)(i + offset));
109 jm = length - i;
110 jm = jm > 16 ? 16 : jm;
111
112 for (j = 0; j < jm; j++) {
113 if ((j % 2) == 1)
114 dprintk("%02x ", (u_int)cp[i+j]);
115 else
116 dprintk("%02x", (u_int)cp[i+j]);
117 }
118 for (; j < 16; j++) {
119 if ((j % 2) == 1)
120 dprintk(" ");
121 else
122 dprintk(" ");
123 }
124 dprintk(" ");
125
126 for (j = 0; j < jm; j++) {
127 c = cp[i+j];
128 c = isprint(c) ? c : '.';
129 dprintk("%c", c);
130 }
131 dprintk("\n");
132 }
133}
134
135EXPORT_SYMBOL(print_hexl);
136
137static inline struct gss_cl_ctx * 97static inline struct gss_cl_ctx *
138gss_get_ctx(struct gss_cl_ctx *ctx) 98gss_get_ctx(struct gss_cl_ctx *ctx)
139{ 99{
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index e11a40b25cce..d926cda88623 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -43,6 +43,7 @@
43#include <linux/highmem.h> 43#include <linux/highmem.h>
44#include <linux/pagemap.h> 44#include <linux/pagemap.h>
45#include <linux/sunrpc/gss_krb5.h> 45#include <linux/sunrpc/gss_krb5.h>
46#include <linux/sunrpc/xdr.h>
46 47
47#ifdef RPC_DEBUG 48#ifdef RPC_DEBUG
48# define RPCDBG_FACILITY RPCDBG_AUTH 49# define RPCDBG_FACILITY RPCDBG_AUTH
@@ -61,9 +62,6 @@ krb5_encrypt(
61 u8 local_iv[16] = {0}; 62 u8 local_iv[16] = {0};
62 struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv }; 63 struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv };
63 64
64 dprintk("RPC: krb5_encrypt: input data:\n");
65 print_hexl((u32 *)in, length, 0);
66
67 if (length % crypto_blkcipher_blocksize(tfm) != 0) 65 if (length % crypto_blkcipher_blocksize(tfm) != 0)
68 goto out; 66 goto out;
69 67
@@ -80,12 +78,9 @@ krb5_encrypt(
80 sg_set_buf(sg, out, length); 78 sg_set_buf(sg, out, length);
81 79
82 ret = crypto_blkcipher_encrypt_iv(&desc, sg, sg, length); 80 ret = crypto_blkcipher_encrypt_iv(&desc, sg, sg, length);
83
84 dprintk("RPC: krb5_encrypt: output data:\n");
85 print_hexl((u32 *)out, length, 0);
86out: 81out:
87 dprintk("RPC: krb5_encrypt returns %d\n",ret); 82 dprintk("RPC: krb5_encrypt returns %d\n",ret);
88 return(ret); 83 return ret;
89} 84}
90 85
91EXPORT_SYMBOL(krb5_encrypt); 86EXPORT_SYMBOL(krb5_encrypt);
@@ -103,9 +98,6 @@ krb5_decrypt(
103 u8 local_iv[16] = {0}; 98 u8 local_iv[16] = {0};
104 struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv }; 99 struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv };
105 100
106 dprintk("RPC: krb5_decrypt: input data:\n");
107 print_hexl((u32 *)in, length, 0);
108
109 if (length % crypto_blkcipher_blocksize(tfm) != 0) 101 if (length % crypto_blkcipher_blocksize(tfm) != 0)
110 goto out; 102 goto out;
111 103
@@ -121,83 +113,14 @@ krb5_decrypt(
121 sg_set_buf(sg, out, length); 113 sg_set_buf(sg, out, length);
122 114
123 ret = crypto_blkcipher_decrypt_iv(&desc, sg, sg, length); 115 ret = crypto_blkcipher_decrypt_iv(&desc, sg, sg, length);
124
125 dprintk("RPC: krb5_decrypt: output_data:\n");
126 print_hexl((u32 *)out, length, 0);
127out: 116out:
128 dprintk("RPC: gss_k5decrypt returns %d\n",ret); 117 dprintk("RPC: gss_k5decrypt returns %d\n",ret);
129 return(ret); 118 return ret;
130} 119}
131 120
132EXPORT_SYMBOL(krb5_decrypt); 121EXPORT_SYMBOL(krb5_decrypt);
133 122
134static int 123static int
135process_xdr_buf(struct xdr_buf *buf, int offset, int len,
136 int (*actor)(struct scatterlist *, void *), void *data)
137{
138 int i, page_len, thislen, page_offset, ret = 0;
139 struct scatterlist sg[1];
140
141 if (offset >= buf->head[0].iov_len) {
142 offset -= buf->head[0].iov_len;
143 } else {
144 thislen = buf->head[0].iov_len - offset;
145 if (thislen > len)
146 thislen = len;
147 sg_set_buf(sg, buf->head[0].iov_base + offset, thislen);
148 ret = actor(sg, data);
149 if (ret)
150 goto out;
151 offset = 0;
152 len -= thislen;
153 }
154 if (len == 0)
155 goto out;
156
157 if (offset >= buf->page_len) {
158 offset -= buf->page_len;
159 } else {
160 page_len = buf->page_len - offset;
161 if (page_len > len)
162 page_len = len;
163 len -= page_len;
164 page_offset = (offset + buf->page_base) & (PAGE_CACHE_SIZE - 1);
165 i = (offset + buf->page_base) >> PAGE_CACHE_SHIFT;
166 thislen = PAGE_CACHE_SIZE - page_offset;
167 do {
168 if (thislen > page_len)
169 thislen = page_len;
170 sg->page = buf->pages[i];
171 sg->offset = page_offset;
172 sg->length = thislen;
173 ret = actor(sg, data);
174 if (ret)
175 goto out;
176 page_len -= thislen;
177 i++;
178 page_offset = 0;
179 thislen = PAGE_CACHE_SIZE;
180 } while (page_len != 0);
181 offset = 0;
182 }
183 if (len == 0)
184 goto out;
185
186 if (offset < buf->tail[0].iov_len) {
187 thislen = buf->tail[0].iov_len - offset;
188 if (thislen > len)
189 thislen = len;
190 sg_set_buf(sg, buf->tail[0].iov_base + offset, thislen);
191 ret = actor(sg, data);
192 len -= thislen;
193 }
194 if (len != 0)
195 ret = -EINVAL;
196out:
197 return ret;
198}
199
200static int
201checksummer(struct scatterlist *sg, void *data) 124checksummer(struct scatterlist *sg, void *data)
202{ 125{
203 struct hash_desc *desc = data; 126 struct hash_desc *desc = data;
@@ -207,23 +130,13 @@ checksummer(struct scatterlist *sg, void *data)
207 130
208/* checksum the plaintext data and hdrlen bytes of the token header */ 131/* checksum the plaintext data and hdrlen bytes of the token header */
209s32 132s32
210make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, 133make_checksum(char *cksumname, char *header, int hdrlen, struct xdr_buf *body,
211 int body_offset, struct xdr_netobj *cksum) 134 int body_offset, struct xdr_netobj *cksum)
212{ 135{
213 char *cksumname;
214 struct hash_desc desc; /* XXX add to ctx? */ 136 struct hash_desc desc; /* XXX add to ctx? */
215 struct scatterlist sg[1]; 137 struct scatterlist sg[1];
216 int err; 138 int err;
217 139
218 switch (cksumtype) {
219 case CKSUMTYPE_RSA_MD5:
220 cksumname = "md5";
221 break;
222 default:
223 dprintk("RPC: krb5_make_checksum:"
224 " unsupported checksum %d", cksumtype);
225 return GSS_S_FAILURE;
226 }
227 desc.tfm = crypto_alloc_hash(cksumname, 0, CRYPTO_ALG_ASYNC); 140 desc.tfm = crypto_alloc_hash(cksumname, 0, CRYPTO_ALG_ASYNC);
228 if (IS_ERR(desc.tfm)) 141 if (IS_ERR(desc.tfm))
229 return GSS_S_FAILURE; 142 return GSS_S_FAILURE;
@@ -237,7 +150,7 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body,
237 err = crypto_hash_update(&desc, sg, hdrlen); 150 err = crypto_hash_update(&desc, sg, hdrlen);
238 if (err) 151 if (err)
239 goto out; 152 goto out;
240 err = process_xdr_buf(body, body_offset, body->len - body_offset, 153 err = xdr_process_buf(body, body_offset, body->len - body_offset,
241 checksummer, &desc); 154 checksummer, &desc);
242 if (err) 155 if (err)
243 goto out; 156 goto out;
@@ -335,7 +248,7 @@ gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
335 desc.fragno = 0; 248 desc.fragno = 0;
336 desc.fraglen = 0; 249 desc.fraglen = 0;
337 250
338 ret = process_xdr_buf(buf, offset, buf->len - offset, encryptor, &desc); 251 ret = xdr_process_buf(buf, offset, buf->len - offset, encryptor, &desc);
339 return ret; 252 return ret;
340} 253}
341 254
@@ -401,7 +314,7 @@ gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
401 desc.desc.flags = 0; 314 desc.desc.flags = 0;
402 desc.fragno = 0; 315 desc.fragno = 0;
403 desc.fraglen = 0; 316 desc.fraglen = 0;
404 return process_xdr_buf(buf, offset, buf->len - offset, decryptor, &desc); 317 return xdr_process_buf(buf, offset, buf->len - offset, decryptor, &desc);
405} 318}
406 319
407EXPORT_SYMBOL(gss_decrypt_xdr_buf); 320EXPORT_SYMBOL(gss_decrypt_xdr_buf);
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 754b8cd6439f..05d4bee86fc0 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -129,6 +129,7 @@ gss_import_sec_context_kerberos(const void *p,
129{ 129{
130 const void *end = (const void *)((const char *)p + len); 130 const void *end = (const void *)((const char *)p + len);
131 struct krb5_ctx *ctx; 131 struct krb5_ctx *ctx;
132 int tmp;
132 133
133 if (!(ctx = kzalloc(sizeof(*ctx), GFP_KERNEL))) 134 if (!(ctx = kzalloc(sizeof(*ctx), GFP_KERNEL)))
134 goto out_err; 135 goto out_err;
@@ -136,18 +137,23 @@ gss_import_sec_context_kerberos(const void *p,
136 p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate)); 137 p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate));
137 if (IS_ERR(p)) 138 if (IS_ERR(p))
138 goto out_err_free_ctx; 139 goto out_err_free_ctx;
139 p = simple_get_bytes(p, end, &ctx->seed_init, sizeof(ctx->seed_init)); 140 /* The downcall format was designed before we completely understood
140 if (IS_ERR(p)) 141 * the uses of the context fields; so it includes some stuff we
142 * just give some minimal sanity-checking, and some we ignore
143 * completely (like the next twenty bytes): */
144 if (unlikely(p + 20 > end || p + 20 < p))
141 goto out_err_free_ctx; 145 goto out_err_free_ctx;
142 p = simple_get_bytes(p, end, ctx->seed, sizeof(ctx->seed)); 146 p += 20;
147 p = simple_get_bytes(p, end, &tmp, sizeof(tmp));
143 if (IS_ERR(p)) 148 if (IS_ERR(p))
144 goto out_err_free_ctx; 149 goto out_err_free_ctx;
145 p = simple_get_bytes(p, end, &ctx->signalg, sizeof(ctx->signalg)); 150 if (tmp != SGN_ALG_DES_MAC_MD5)
146 if (IS_ERR(p))
147 goto out_err_free_ctx; 151 goto out_err_free_ctx;
148 p = simple_get_bytes(p, end, &ctx->sealalg, sizeof(ctx->sealalg)); 152 p = simple_get_bytes(p, end, &tmp, sizeof(tmp));
149 if (IS_ERR(p)) 153 if (IS_ERR(p))
150 goto out_err_free_ctx; 154 goto out_err_free_ctx;
155 if (tmp != SEAL_ALG_DES)
156 goto out_err_free_ctx;
151 p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime)); 157 p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
152 if (IS_ERR(p)) 158 if (IS_ERR(p))
153 goto out_err_free_ctx; 159 goto out_err_free_ctx;
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index 08601ee4cd73..d0bb5064f8c5 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -77,7 +77,6 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
77 struct xdr_netobj *token) 77 struct xdr_netobj *token)
78{ 78{
79 struct krb5_ctx *ctx = gss_ctx->internal_ctx_id; 79 struct krb5_ctx *ctx = gss_ctx->internal_ctx_id;
80 s32 checksum_type;
81 char cksumdata[16]; 80 char cksumdata[16];
82 struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata}; 81 struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
83 unsigned char *ptr, *krb5_hdr, *msg_start; 82 unsigned char *ptr, *krb5_hdr, *msg_start;
@@ -88,21 +87,6 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
88 87
89 now = get_seconds(); 88 now = get_seconds();
90 89
91 switch (ctx->signalg) {
92 case SGN_ALG_DES_MAC_MD5:
93 checksum_type = CKSUMTYPE_RSA_MD5;
94 break;
95 default:
96 dprintk("RPC: gss_krb5_seal: ctx->signalg %d not"
97 " supported\n", ctx->signalg);
98 goto out_err;
99 }
100 if (ctx->sealalg != SEAL_ALG_NONE && ctx->sealalg != SEAL_ALG_DES) {
101 dprintk("RPC: gss_krb5_seal: ctx->sealalg %d not supported\n",
102 ctx->sealalg);
103 goto out_err;
104 }
105
106 token->len = g_token_size(&ctx->mech_used, 22); 90 token->len = g_token_size(&ctx->mech_used, 22);
107 91
108 ptr = token->data; 92 ptr = token->data;
@@ -115,37 +99,26 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
115 krb5_hdr = ptr - 2; 99 krb5_hdr = ptr - 2;
116 msg_start = krb5_hdr + 24; 100 msg_start = krb5_hdr + 24;
117 101
118 *(__be16 *)(krb5_hdr + 2) = htons(ctx->signalg); 102 *(__be16 *)(krb5_hdr + 2) = htons(SGN_ALG_DES_MAC_MD5);
119 memset(krb5_hdr + 4, 0xff, 4); 103 memset(krb5_hdr + 4, 0xff, 4);
120 104
121 if (make_checksum(checksum_type, krb5_hdr, 8, text, 0, &md5cksum)) 105 if (make_checksum("md5", krb5_hdr, 8, text, 0, &md5cksum))
122 goto out_err; 106 return GSS_S_FAILURE;
123 107
124 switch (ctx->signalg) { 108 if (krb5_encrypt(ctx->seq, NULL, md5cksum.data,
125 case SGN_ALG_DES_MAC_MD5: 109 md5cksum.data, md5cksum.len))
126 if (krb5_encrypt(ctx->seq, NULL, md5cksum.data, 110 return GSS_S_FAILURE;
127 md5cksum.data, md5cksum.len)) 111
128 goto out_err; 112 memcpy(krb5_hdr + 16, md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH,
129 memcpy(krb5_hdr + 16, 113 KRB5_CKSUM_LENGTH);
130 md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH,
131 KRB5_CKSUM_LENGTH);
132
133 dprintk("RPC: make_seal_token: cksum data: \n");
134 print_hexl((u32 *) (krb5_hdr + 16), KRB5_CKSUM_LENGTH, 0);
135 break;
136 default:
137 BUG();
138 }
139 114
140 spin_lock(&krb5_seq_lock); 115 spin_lock(&krb5_seq_lock);
141 seq_send = ctx->seq_send++; 116 seq_send = ctx->seq_send++;
142 spin_unlock(&krb5_seq_lock); 117 spin_unlock(&krb5_seq_lock);
143 118
144 if ((krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff, 119 if (krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff,
145 seq_send, krb5_hdr + 16, krb5_hdr + 8))) 120 ctx->seq_send, krb5_hdr + 16, krb5_hdr + 8))
146 goto out_err; 121 return GSS_S_FAILURE;
147 122
148 return ((ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE); 123 return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
149out_err:
150 return GSS_S_FAILURE;
151} 124}
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index 0828cf64100f..87f8977ccece 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -78,7 +78,6 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
78 struct krb5_ctx *ctx = gss_ctx->internal_ctx_id; 78 struct krb5_ctx *ctx = gss_ctx->internal_ctx_id;
79 int signalg; 79 int signalg;
80 int sealalg; 80 int sealalg;
81 s32 checksum_type;
82 char cksumdata[16]; 81 char cksumdata[16];
83 struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata}; 82 struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
84 s32 now; 83 s32 now;
@@ -86,96 +85,54 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
86 s32 seqnum; 85 s32 seqnum;
87 unsigned char *ptr = (unsigned char *)read_token->data; 86 unsigned char *ptr = (unsigned char *)read_token->data;
88 int bodysize; 87 int bodysize;
89 u32 ret = GSS_S_DEFECTIVE_TOKEN;
90 88
91 dprintk("RPC: krb5_read_token\n"); 89 dprintk("RPC: krb5_read_token\n");
92 90
93 if (g_verify_token_header(&ctx->mech_used, &bodysize, &ptr, 91 if (g_verify_token_header(&ctx->mech_used, &bodysize, &ptr,
94 read_token->len)) 92 read_token->len))
95 goto out; 93 return GSS_S_DEFECTIVE_TOKEN;
96 94
97 if ((*ptr++ != ((KG_TOK_MIC_MSG>>8)&0xff)) || 95 if ((*ptr++ != ((KG_TOK_MIC_MSG>>8)&0xff)) ||
98 (*ptr++ != ( KG_TOK_MIC_MSG &0xff)) ) 96 (*ptr++ != ( KG_TOK_MIC_MSG &0xff)) )
99 goto out; 97 return GSS_S_DEFECTIVE_TOKEN;
100 98
101 /* XXX sanity-check bodysize?? */ 99 /* XXX sanity-check bodysize?? */
102 100
103 /* get the sign and seal algorithms */
104
105 signalg = ptr[0] + (ptr[1] << 8); 101 signalg = ptr[0] + (ptr[1] << 8);
106 sealalg = ptr[2] + (ptr[3] << 8); 102 if (signalg != SGN_ALG_DES_MAC_MD5)
103 return GSS_S_DEFECTIVE_TOKEN;
107 104
108 /* Sanity checks */ 105 sealalg = ptr[2] + (ptr[3] << 8);
106 if (sealalg != SEAL_ALG_NONE)
107 return GSS_S_DEFECTIVE_TOKEN;
109 108
110 if ((ptr[4] != 0xff) || (ptr[5] != 0xff)) 109 if ((ptr[4] != 0xff) || (ptr[5] != 0xff))
111 goto out; 110 return GSS_S_DEFECTIVE_TOKEN;
112 111
113 if (sealalg != 0xffff) 112 if (make_checksum("md5", ptr - 2, 8, message_buffer, 0, &md5cksum))
114 goto out; 113 return GSS_S_FAILURE;
115 114
116 /* there are several mappings of seal algorithms to sign algorithms, 115 if (krb5_encrypt(ctx->seq, NULL, md5cksum.data, md5cksum.data, 16))
117 but few enough that we can try them all. */ 116 return GSS_S_FAILURE;
118 117
119 if ((ctx->sealalg == SEAL_ALG_NONE && signalg > 1) || 118 if (memcmp(md5cksum.data + 8, ptr + 14, 8))
120 (ctx->sealalg == SEAL_ALG_1 && signalg != SGN_ALG_3) || 119 return GSS_S_BAD_SIG;
121 (ctx->sealalg == SEAL_ALG_DES3KD &&
122 signalg != SGN_ALG_HMAC_SHA1_DES3_KD))
123 goto out;
124
125 /* compute the checksum of the message */
126
127 /* initialize the the cksum */
128 switch (signalg) {
129 case SGN_ALG_DES_MAC_MD5:
130 checksum_type = CKSUMTYPE_RSA_MD5;
131 break;
132 default:
133 ret = GSS_S_DEFECTIVE_TOKEN;
134 goto out;
135 }
136
137 switch (signalg) {
138 case SGN_ALG_DES_MAC_MD5:
139 ret = make_checksum(checksum_type, ptr - 2, 8,
140 message_buffer, 0, &md5cksum);
141 if (ret)
142 goto out;
143
144 ret = krb5_encrypt(ctx->seq, NULL, md5cksum.data,
145 md5cksum.data, 16);
146 if (ret)
147 goto out;
148
149 if (memcmp(md5cksum.data + 8, ptr + 14, 8)) {
150 ret = GSS_S_BAD_SIG;
151 goto out;
152 }
153 break;
154 default:
155 ret = GSS_S_DEFECTIVE_TOKEN;
156 goto out;
157 }
158 120
159 /* it got through unscathed. Make sure the context is unexpired */ 121 /* it got through unscathed. Make sure the context is unexpired */
160 122
161 now = get_seconds(); 123 now = get_seconds();
162 124
163 ret = GSS_S_CONTEXT_EXPIRED;
164 if (now > ctx->endtime) 125 if (now > ctx->endtime)
165 goto out; 126 return GSS_S_CONTEXT_EXPIRED;
166 127
167 /* do sequencing checks */ 128 /* do sequencing checks */
168 129
169 ret = GSS_S_BAD_SIG; 130 if (krb5_get_seq_num(ctx->seq, ptr + 14, ptr + 6, &direction, &seqnum))
170 if ((ret = krb5_get_seq_num(ctx->seq, ptr + 14, ptr + 6, &direction, 131 return GSS_S_FAILURE;
171 &seqnum)))
172 goto out;
173 132
174 if ((ctx->initiate && direction != 0xff) || 133 if ((ctx->initiate && direction != 0xff) ||
175 (!ctx->initiate && direction != 0)) 134 (!ctx->initiate && direction != 0))
176 goto out; 135 return GSS_S_BAD_SIG;
177 136
178 ret = GSS_S_COMPLETE; 137 return GSS_S_COMPLETE;
179out:
180 return ret;
181} 138}
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index cc45c1605f80..fe25b3d898dc 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -57,9 +57,9 @@ gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize)
57 >>PAGE_CACHE_SHIFT; 57 >>PAGE_CACHE_SHIFT;
58 int offset = (buf->page_base + len - 1) 58 int offset = (buf->page_base + len - 1)
59 & (PAGE_CACHE_SIZE - 1); 59 & (PAGE_CACHE_SIZE - 1);
60 ptr = kmap_atomic(buf->pages[last], KM_SKB_SUNRPC_DATA); 60 ptr = kmap_atomic(buf->pages[last], KM_USER0);
61 pad = *(ptr + offset); 61 pad = *(ptr + offset);
62 kunmap_atomic(ptr, KM_SKB_SUNRPC_DATA); 62 kunmap_atomic(ptr, KM_USER0);
63 goto out; 63 goto out;
64 } else 64 } else
65 len -= buf->page_len; 65 len -= buf->page_len;
@@ -120,7 +120,6 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
120 struct xdr_buf *buf, struct page **pages) 120 struct xdr_buf *buf, struct page **pages)
121{ 121{
122 struct krb5_ctx *kctx = ctx->internal_ctx_id; 122 struct krb5_ctx *kctx = ctx->internal_ctx_id;
123 s32 checksum_type;
124 char cksumdata[16]; 123 char cksumdata[16];
125 struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata}; 124 struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
126 int blocksize = 0, plainlen; 125 int blocksize = 0, plainlen;
@@ -134,21 +133,6 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
134 133
135 now = get_seconds(); 134 now = get_seconds();
136 135
137 switch (kctx->signalg) {
138 case SGN_ALG_DES_MAC_MD5:
139 checksum_type = CKSUMTYPE_RSA_MD5;
140 break;
141 default:
142 dprintk("RPC: gss_krb5_seal: kctx->signalg %d not"
143 " supported\n", kctx->signalg);
144 goto out_err;
145 }
146 if (kctx->sealalg != SEAL_ALG_NONE && kctx->sealalg != SEAL_ALG_DES) {
147 dprintk("RPC: gss_krb5_seal: kctx->sealalg %d not supported\n",
148 kctx->sealalg);
149 goto out_err;
150 }
151
152 blocksize = crypto_blkcipher_blocksize(kctx->enc); 136 blocksize = crypto_blkcipher_blocksize(kctx->enc);
153 gss_krb5_add_padding(buf, offset, blocksize); 137 gss_krb5_add_padding(buf, offset, blocksize);
154 BUG_ON((buf->len - offset) % blocksize); 138 BUG_ON((buf->len - offset) % blocksize);
@@ -175,37 +159,27 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
175 /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */ 159 /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */
176 krb5_hdr = ptr - 2; 160 krb5_hdr = ptr - 2;
177 msg_start = krb5_hdr + 24; 161 msg_start = krb5_hdr + 24;
178 /* XXXJBF: */ BUG_ON(buf->head[0].iov_base + offset + headlen != msg_start + blocksize);
179 162
180 *(__be16 *)(krb5_hdr + 2) = htons(kctx->signalg); 163 *(__be16 *)(krb5_hdr + 2) = htons(SGN_ALG_DES_MAC_MD5);
181 memset(krb5_hdr + 4, 0xff, 4); 164 memset(krb5_hdr + 4, 0xff, 4);
182 *(__be16 *)(krb5_hdr + 4) = htons(kctx->sealalg); 165 *(__be16 *)(krb5_hdr + 4) = htons(SEAL_ALG_DES);
183 166
184 make_confounder(msg_start, blocksize); 167 make_confounder(msg_start, blocksize);
185 168
186 /* XXXJBF: UGH!: */ 169 /* XXXJBF: UGH!: */
187 tmp_pages = buf->pages; 170 tmp_pages = buf->pages;
188 buf->pages = pages; 171 buf->pages = pages;
189 if (make_checksum(checksum_type, krb5_hdr, 8, buf, 172 if (make_checksum("md5", krb5_hdr, 8, buf,
190 offset + headlen - blocksize, &md5cksum)) 173 offset + headlen - blocksize, &md5cksum))
191 goto out_err; 174 return GSS_S_FAILURE;
192 buf->pages = tmp_pages; 175 buf->pages = tmp_pages;
193 176
194 switch (kctx->signalg) { 177 if (krb5_encrypt(kctx->seq, NULL, md5cksum.data,
195 case SGN_ALG_DES_MAC_MD5: 178 md5cksum.data, md5cksum.len))
196 if (krb5_encrypt(kctx->seq, NULL, md5cksum.data, 179 return GSS_S_FAILURE;
197 md5cksum.data, md5cksum.len)) 180 memcpy(krb5_hdr + 16,
198 goto out_err; 181 md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH,
199 memcpy(krb5_hdr + 16, 182 KRB5_CKSUM_LENGTH);
200 md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH,
201 KRB5_CKSUM_LENGTH);
202
203 dprintk("RPC: make_seal_token: cksum data: \n");
204 print_hexl((u32 *) (krb5_hdr + 16), KRB5_CKSUM_LENGTH, 0);
205 break;
206 default:
207 BUG();
208 }
209 183
210 spin_lock(&krb5_seq_lock); 184 spin_lock(&krb5_seq_lock);
211 seq_send = kctx->seq_send++; 185 seq_send = kctx->seq_send++;
@@ -215,15 +189,13 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
215 * and encrypt at the same time: */ 189 * and encrypt at the same time: */
216 if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff, 190 if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff,
217 seq_send, krb5_hdr + 16, krb5_hdr + 8))) 191 seq_send, krb5_hdr + 16, krb5_hdr + 8)))
218 goto out_err; 192 return GSS_S_FAILURE;
219 193
220 if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize, 194 if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize,
221 pages)) 195 pages))
222 goto out_err; 196 return GSS_S_FAILURE;
223 197
224 return ((kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE); 198 return (kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
225out_err:
226 return GSS_S_FAILURE;
227} 199}
228 200
229u32 201u32
@@ -232,7 +204,6 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
232 struct krb5_ctx *kctx = ctx->internal_ctx_id; 204 struct krb5_ctx *kctx = ctx->internal_ctx_id;
233 int signalg; 205 int signalg;
234 int sealalg; 206 int sealalg;
235 s32 checksum_type;
236 char cksumdata[16]; 207 char cksumdata[16];
237 struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata}; 208 struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
238 s32 now; 209 s32 now;
@@ -240,7 +211,6 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
240 s32 seqnum; 211 s32 seqnum;
241 unsigned char *ptr; 212 unsigned char *ptr;
242 int bodysize; 213 int bodysize;
243 u32 ret = GSS_S_DEFECTIVE_TOKEN;
244 void *data_start, *orig_start; 214 void *data_start, *orig_start;
245 int data_len; 215 int data_len;
246 int blocksize; 216 int blocksize;
@@ -250,98 +220,58 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
250 ptr = (u8 *)buf->head[0].iov_base + offset; 220 ptr = (u8 *)buf->head[0].iov_base + offset;
251 if (g_verify_token_header(&kctx->mech_used, &bodysize, &ptr, 221 if (g_verify_token_header(&kctx->mech_used, &bodysize, &ptr,
252 buf->len - offset)) 222 buf->len - offset))
253 goto out; 223 return GSS_S_DEFECTIVE_TOKEN;
254 224
255 if ((*ptr++ != ((KG_TOK_WRAP_MSG>>8)&0xff)) || 225 if ((*ptr++ != ((KG_TOK_WRAP_MSG>>8)&0xff)) ||
256 (*ptr++ != (KG_TOK_WRAP_MSG &0xff)) ) 226 (*ptr++ != (KG_TOK_WRAP_MSG &0xff)) )
257 goto out; 227 return GSS_S_DEFECTIVE_TOKEN;
258 228
259 /* XXX sanity-check bodysize?? */ 229 /* XXX sanity-check bodysize?? */
260 230
261 /* get the sign and seal algorithms */ 231 /* get the sign and seal algorithms */
262 232
263 signalg = ptr[0] + (ptr[1] << 8); 233 signalg = ptr[0] + (ptr[1] << 8);
264 sealalg = ptr[2] + (ptr[3] << 8); 234 if (signalg != SGN_ALG_DES_MAC_MD5)
235 return GSS_S_DEFECTIVE_TOKEN;
265 236
266 /* Sanity checks */ 237 sealalg = ptr[2] + (ptr[3] << 8);
238 if (sealalg != SEAL_ALG_DES)
239 return GSS_S_DEFECTIVE_TOKEN;
267 240
268 if ((ptr[4] != 0xff) || (ptr[5] != 0xff)) 241 if ((ptr[4] != 0xff) || (ptr[5] != 0xff))
269 goto out; 242 return GSS_S_DEFECTIVE_TOKEN;
270
271 if (sealalg == 0xffff)
272 goto out;
273
274 /* in the current spec, there is only one valid seal algorithm per
275 key type, so a simple comparison is ok */
276
277 if (sealalg != kctx->sealalg)
278 goto out;
279
280 /* there are several mappings of seal algorithms to sign algorithms,
281 but few enough that we can try them all. */
282
283 if ((kctx->sealalg == SEAL_ALG_NONE && signalg > 1) ||
284 (kctx->sealalg == SEAL_ALG_1 && signalg != SGN_ALG_3) ||
285 (kctx->sealalg == SEAL_ALG_DES3KD &&
286 signalg != SGN_ALG_HMAC_SHA1_DES3_KD))
287 goto out;
288 243
289 if (gss_decrypt_xdr_buf(kctx->enc, buf, 244 if (gss_decrypt_xdr_buf(kctx->enc, buf,
290 ptr + 22 - (unsigned char *)buf->head[0].iov_base)) 245 ptr + 22 - (unsigned char *)buf->head[0].iov_base))
291 goto out; 246 return GSS_S_DEFECTIVE_TOKEN;
292 247
293 /* compute the checksum of the message */ 248 if (make_checksum("md5", ptr - 2, 8, buf,
249 ptr + 22 - (unsigned char *)buf->head[0].iov_base, &md5cksum))
250 return GSS_S_FAILURE;
294 251
295 /* initialize the the cksum */ 252 if (krb5_encrypt(kctx->seq, NULL, md5cksum.data,
296 switch (signalg) { 253 md5cksum.data, md5cksum.len))
297 case SGN_ALG_DES_MAC_MD5: 254 return GSS_S_FAILURE;
298 checksum_type = CKSUMTYPE_RSA_MD5; 255
299 break; 256 if (memcmp(md5cksum.data + 8, ptr + 14, 8))
300 default: 257 return GSS_S_BAD_SIG;
301 ret = GSS_S_DEFECTIVE_TOKEN;
302 goto out;
303 }
304
305 switch (signalg) {
306 case SGN_ALG_DES_MAC_MD5:
307 ret = make_checksum(checksum_type, ptr - 2, 8, buf,
308 ptr + 22 - (unsigned char *)buf->head[0].iov_base, &md5cksum);
309 if (ret)
310 goto out;
311
312 ret = krb5_encrypt(kctx->seq, NULL, md5cksum.data,
313 md5cksum.data, md5cksum.len);
314 if (ret)
315 goto out;
316
317 if (memcmp(md5cksum.data + 8, ptr + 14, 8)) {
318 ret = GSS_S_BAD_SIG;
319 goto out;
320 }
321 break;
322 default:
323 ret = GSS_S_DEFECTIVE_TOKEN;
324 goto out;
325 }
326 258
327 /* it got through unscathed. Make sure the context is unexpired */ 259 /* it got through unscathed. Make sure the context is unexpired */
328 260
329 now = get_seconds(); 261 now = get_seconds();
330 262
331 ret = GSS_S_CONTEXT_EXPIRED;
332 if (now > kctx->endtime) 263 if (now > kctx->endtime)
333 goto out; 264 return GSS_S_CONTEXT_EXPIRED;
334 265
335 /* do sequencing checks */ 266 /* do sequencing checks */
336 267
337 ret = GSS_S_BAD_SIG; 268 if (krb5_get_seq_num(kctx->seq, ptr + 14, ptr + 6, &direction,
338 if ((ret = krb5_get_seq_num(kctx->seq, ptr + 14, ptr + 6, &direction, 269 &seqnum))
339 &seqnum))) 270 return GSS_S_BAD_SIG;
340 goto out;
341 271
342 if ((kctx->initiate && direction != 0xff) || 272 if ((kctx->initiate && direction != 0xff) ||
343 (!kctx->initiate && direction != 0)) 273 (!kctx->initiate && direction != 0))
344 goto out; 274 return GSS_S_BAD_SIG;
345 275
346 /* Copy the data back to the right position. XXX: Would probably be 276 /* Copy the data back to the right position. XXX: Would probably be
347 * better to copy and encrypt at the same time. */ 277 * better to copy and encrypt at the same time. */
@@ -354,11 +284,8 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
354 buf->head[0].iov_len -= (data_start - orig_start); 284 buf->head[0].iov_len -= (data_start - orig_start);
355 buf->len -= (data_start - orig_start); 285 buf->len -= (data_start - orig_start);
356 286
357 ret = GSS_S_DEFECTIVE_TOKEN;
358 if (gss_krb5_remove_padding(buf, blocksize)) 287 if (gss_krb5_remove_padding(buf, blocksize))
359 goto out; 288 return GSS_S_DEFECTIVE_TOKEN;
360 289
361 ret = GSS_S_COMPLETE; 290 return GSS_S_COMPLETE;
362out:
363 return ret;
364} 291}
diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c
index d57f60838895..41465072d0b5 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_mech.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c
@@ -82,133 +82,73 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
82 return q; 82 return q;
83} 83}
84 84
85static inline const void *
86get_key(const void *p, const void *end, struct crypto_blkcipher **res,
87 int *resalg)
88{
89 struct xdr_netobj key = { 0 };
90 int setkey = 0;
91 char *alg_name;
92
93 p = simple_get_bytes(p, end, resalg, sizeof(*resalg));
94 if (IS_ERR(p))
95 goto out_err;
96 p = simple_get_netobj(p, end, &key);
97 if (IS_ERR(p))
98 goto out_err;
99
100 switch (*resalg) {
101 case NID_des_cbc:
102 alg_name = "cbc(des)";
103 setkey = 1;
104 break;
105 case NID_cast5_cbc:
106 /* XXXX here in name only, not used */
107 alg_name = "cbc(cast5)";
108 setkey = 0; /* XXX will need to set to 1 */
109 break;
110 case NID_md5:
111 if (key.len == 0) {
112 dprintk("RPC: SPKM3 get_key: NID_md5 zero Key length\n");
113 }
114 alg_name = "md5";
115 setkey = 0;
116 break;
117 default:
118 dprintk("gss_spkm3_mech: unsupported algorithm %d\n", *resalg);
119 goto out_err_free_key;
120 }
121 *res = crypto_alloc_blkcipher(alg_name, 0, CRYPTO_ALG_ASYNC);
122 if (IS_ERR(*res)) {
123 printk("gss_spkm3_mech: unable to initialize crypto algorthm %s\n", alg_name);
124 *res = NULL;
125 goto out_err_free_key;
126 }
127 if (setkey) {
128 if (crypto_blkcipher_setkey(*res, key.data, key.len)) {
129 printk("gss_spkm3_mech: error setting key for crypto algorthm %s\n", alg_name);
130 goto out_err_free_tfm;
131 }
132 }
133
134 if(key.len > 0)
135 kfree(key.data);
136 return p;
137
138out_err_free_tfm:
139 crypto_free_blkcipher(*res);
140out_err_free_key:
141 if(key.len > 0)
142 kfree(key.data);
143 p = ERR_PTR(-EINVAL);
144out_err:
145 return p;
146}
147
148static int 85static int
149gss_import_sec_context_spkm3(const void *p, size_t len, 86gss_import_sec_context_spkm3(const void *p, size_t len,
150 struct gss_ctx *ctx_id) 87 struct gss_ctx *ctx_id)
151{ 88{
152 const void *end = (const void *)((const char *)p + len); 89 const void *end = (const void *)((const char *)p + len);
153 struct spkm3_ctx *ctx; 90 struct spkm3_ctx *ctx;
91 int version;
154 92
155 if (!(ctx = kzalloc(sizeof(*ctx), GFP_KERNEL))) 93 if (!(ctx = kzalloc(sizeof(*ctx), GFP_KERNEL)))
156 goto out_err; 94 goto out_err;
157 95
96 p = simple_get_bytes(p, end, &version, sizeof(version));
97 if (IS_ERR(p))
98 goto out_err_free_ctx;
99 if (version != 1) {
100 dprintk("RPC: unknown spkm3 token format: obsolete nfs-utils?\n");
101 goto out_err_free_ctx;
102 }
103
158 p = simple_get_netobj(p, end, &ctx->ctx_id); 104 p = simple_get_netobj(p, end, &ctx->ctx_id);
159 if (IS_ERR(p)) 105 if (IS_ERR(p))
160 goto out_err_free_ctx; 106 goto out_err_free_ctx;
161 107
162 p = simple_get_bytes(p, end, &ctx->qop, sizeof(ctx->qop)); 108 p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
163 if (IS_ERR(p)) 109 if (IS_ERR(p))
164 goto out_err_free_ctx_id; 110 goto out_err_free_ctx_id;
165 111
166 p = simple_get_netobj(p, end, &ctx->mech_used); 112 p = simple_get_netobj(p, end, &ctx->mech_used);
167 if (IS_ERR(p)) 113 if (IS_ERR(p))
168 goto out_err_free_mech; 114 goto out_err_free_ctx_id;
169 115
170 p = simple_get_bytes(p, end, &ctx->ret_flags, sizeof(ctx->ret_flags)); 116 p = simple_get_bytes(p, end, &ctx->ret_flags, sizeof(ctx->ret_flags));
171 if (IS_ERR(p)) 117 if (IS_ERR(p))
172 goto out_err_free_mech; 118 goto out_err_free_mech;
173 119
174 p = simple_get_bytes(p, end, &ctx->req_flags, sizeof(ctx->req_flags)); 120 p = simple_get_netobj(p, end, &ctx->conf_alg);
175 if (IS_ERR(p)) 121 if (IS_ERR(p))
176 goto out_err_free_mech; 122 goto out_err_free_mech;
177 123
178 p = simple_get_netobj(p, end, &ctx->share_key); 124 p = simple_get_netobj(p, end, &ctx->derived_conf_key);
179 if (IS_ERR(p))
180 goto out_err_free_s_key;
181
182 p = get_key(p, end, &ctx->derived_conf_key, &ctx->conf_alg);
183 if (IS_ERR(p)) 125 if (IS_ERR(p))
184 goto out_err_free_s_key; 126 goto out_err_free_conf_alg;
185 127
186 p = get_key(p, end, &ctx->derived_integ_key, &ctx->intg_alg); 128 p = simple_get_netobj(p, end, &ctx->intg_alg);
187 if (IS_ERR(p)) 129 if (IS_ERR(p))
188 goto out_err_free_key1; 130 goto out_err_free_conf_key;
189 131
190 p = simple_get_bytes(p, end, &ctx->keyestb_alg, sizeof(ctx->keyestb_alg)); 132 p = simple_get_netobj(p, end, &ctx->derived_integ_key);
191 if (IS_ERR(p)) 133 if (IS_ERR(p))
192 goto out_err_free_key2; 134 goto out_err_free_intg_alg;
193
194 p = simple_get_bytes(p, end, &ctx->owf_alg, sizeof(ctx->owf_alg));
195 if (IS_ERR(p))
196 goto out_err_free_key2;
197 135
198 if (p != end) 136 if (p != end)
199 goto out_err_free_key2; 137 goto out_err_free_intg_key;
200 138
201 ctx_id->internal_ctx_id = ctx; 139 ctx_id->internal_ctx_id = ctx;
202 140
203 dprintk("Successfully imported new spkm context.\n"); 141 dprintk("Successfully imported new spkm context.\n");
204 return 0; 142 return 0;
205 143
206out_err_free_key2: 144out_err_free_intg_key:
207 crypto_free_blkcipher(ctx->derived_integ_key); 145 kfree(ctx->derived_integ_key.data);
208out_err_free_key1: 146out_err_free_intg_alg:
209 crypto_free_blkcipher(ctx->derived_conf_key); 147 kfree(ctx->intg_alg.data);
210out_err_free_s_key: 148out_err_free_conf_key:
211 kfree(ctx->share_key.data); 149 kfree(ctx->derived_conf_key.data);
150out_err_free_conf_alg:
151 kfree(ctx->conf_alg.data);
212out_err_free_mech: 152out_err_free_mech:
213 kfree(ctx->mech_used.data); 153 kfree(ctx->mech_used.data);
214out_err_free_ctx_id: 154out_err_free_ctx_id:
@@ -220,13 +160,16 @@ out_err:
220} 160}
221 161
222static void 162static void
223gss_delete_sec_context_spkm3(void *internal_ctx) { 163gss_delete_sec_context_spkm3(void *internal_ctx)
164{
224 struct spkm3_ctx *sctx = internal_ctx; 165 struct spkm3_ctx *sctx = internal_ctx;
225 166
226 crypto_free_blkcipher(sctx->derived_integ_key); 167 kfree(sctx->derived_integ_key.data);
227 crypto_free_blkcipher(sctx->derived_conf_key); 168 kfree(sctx->intg_alg.data);
228 kfree(sctx->share_key.data); 169 kfree(sctx->derived_conf_key.data);
170 kfree(sctx->conf_alg.data);
229 kfree(sctx->mech_used.data); 171 kfree(sctx->mech_used.data);
172 kfree(sctx->ctx_id.data);
230 kfree(sctx); 173 kfree(sctx);
231} 174}
232 175
@@ -238,7 +181,6 @@ gss_verify_mic_spkm3(struct gss_ctx *ctx,
238 u32 maj_stat = 0; 181 u32 maj_stat = 0;
239 struct spkm3_ctx *sctx = ctx->internal_ctx_id; 182 struct spkm3_ctx *sctx = ctx->internal_ctx_id;
240 183
241 dprintk("RPC: gss_verify_mic_spkm3 calling spkm3_read_token\n");
242 maj_stat = spkm3_read_token(sctx, checksum, signbuf, SPKM_MIC_TOK); 184 maj_stat = spkm3_read_token(sctx, checksum, signbuf, SPKM_MIC_TOK);
243 185
244 dprintk("RPC: gss_verify_mic_spkm3 returning %d\n", maj_stat); 186 dprintk("RPC: gss_verify_mic_spkm3 returning %d\n", maj_stat);
@@ -253,10 +195,9 @@ gss_get_mic_spkm3(struct gss_ctx *ctx,
253 u32 err = 0; 195 u32 err = 0;
254 struct spkm3_ctx *sctx = ctx->internal_ctx_id; 196 struct spkm3_ctx *sctx = ctx->internal_ctx_id;
255 197
256 dprintk("RPC: gss_get_mic_spkm3\n");
257
258 err = spkm3_make_token(sctx, message_buffer, 198 err = spkm3_make_token(sctx, message_buffer,
259 message_token, SPKM_MIC_TOK); 199 message_token, SPKM_MIC_TOK);
200 dprintk("RPC: gss_get_mic_spkm3 returning %d\n", err);
260 return err; 201 return err;
261} 202}
262 203
diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c
index 18c7862bc234..b179d58c6249 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_seal.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_seal.c
@@ -39,11 +39,17 @@
39#include <linux/sunrpc/gss_spkm3.h> 39#include <linux/sunrpc/gss_spkm3.h>
40#include <linux/random.h> 40#include <linux/random.h>
41#include <linux/crypto.h> 41#include <linux/crypto.h>
42#include <linux/pagemap.h>
43#include <linux/scatterlist.h>
44#include <linux/sunrpc/xdr.h>
42 45
43#ifdef RPC_DEBUG 46#ifdef RPC_DEBUG
44# define RPCDBG_FACILITY RPCDBG_AUTH 47# define RPCDBG_FACILITY RPCDBG_AUTH
45#endif 48#endif
46 49
50const struct xdr_netobj hmac_md5_oid = { 8, "\x2B\x06\x01\x05\x05\x08\x01\x01"};
51const struct xdr_netobj cast5_cbc_oid = {9, "\x2A\x86\x48\x86\xF6\x7D\x07\x42\x0A"};
52
47/* 53/*
48 * spkm3_make_token() 54 * spkm3_make_token()
49 * 55 *
@@ -66,29 +72,23 @@ spkm3_make_token(struct spkm3_ctx *ctx,
66 int ctxelen = 0, ctxzbit = 0; 72 int ctxelen = 0, ctxzbit = 0;
67 int md5elen = 0, md5zbit = 0; 73 int md5elen = 0, md5zbit = 0;
68 74
69 dprintk("RPC: spkm3_make_token\n");
70
71 now = jiffies; 75 now = jiffies;
72 76
73 if (ctx->ctx_id.len != 16) { 77 if (ctx->ctx_id.len != 16) {
74 dprintk("RPC: spkm3_make_token BAD ctx_id.len %d\n", 78 dprintk("RPC: spkm3_make_token BAD ctx_id.len %d\n",
75 ctx->ctx_id.len); 79 ctx->ctx_id.len);
76 goto out_err; 80 goto out_err;
77 } 81 }
78 82
79 switch (ctx->intg_alg) { 83 if (!g_OID_equal(&ctx->intg_alg, &hmac_md5_oid)) {
80 case NID_md5: 84 dprintk("RPC: gss_spkm3_seal: unsupported I-ALG algorithm."
81 checksum_type = CKSUMTYPE_RSA_MD5; 85 "only support hmac-md5 I-ALG.\n");
82 break; 86 goto out_err;
83 default: 87 } else
84 dprintk("RPC: gss_spkm3_seal: ctx->signalg %d not" 88 checksum_type = CKSUMTYPE_HMAC_MD5;
85 " supported\n", ctx->intg_alg); 89
86 goto out_err; 90 if (!g_OID_equal(&ctx->conf_alg, &cast5_cbc_oid)) {
87 } 91 dprintk("RPC: gss_spkm3_seal: unsupported C-ALG algorithm\n");
88 /* XXX since we don't support WRAP, perhaps we don't care... */
89 if (ctx->conf_alg != NID_cast5_cbc) {
90 dprintk("RPC: gss_spkm3_seal: ctx->sealalg %d not supported\n",
91 ctx->conf_alg);
92 goto out_err; 92 goto out_err;
93 } 93 }
94 94
@@ -96,10 +96,10 @@ spkm3_make_token(struct spkm3_ctx *ctx,
96 /* Calculate checksum over the mic-header */ 96 /* Calculate checksum over the mic-header */
97 asn1_bitstring_len(&ctx->ctx_id, &ctxelen, &ctxzbit); 97 asn1_bitstring_len(&ctx->ctx_id, &ctxelen, &ctxzbit);
98 spkm3_mic_header(&mic_hdr.data, &mic_hdr.len, ctx->ctx_id.data, 98 spkm3_mic_header(&mic_hdr.data, &mic_hdr.len, ctx->ctx_id.data,
99 ctxelen, ctxzbit); 99 ctxelen, ctxzbit);
100 100 if (make_spkm3_checksum(checksum_type, &ctx->derived_integ_key,
101 if (make_checksum(checksum_type, mic_hdr.data, mic_hdr.len, 101 (char *)mic_hdr.data, mic_hdr.len,
102 text, 0, &md5cksum)) 102 text, 0, &md5cksum))
103 goto out_err; 103 goto out_err;
104 104
105 asn1_bitstring_len(&md5cksum, &md5elen, &md5zbit); 105 asn1_bitstring_len(&md5cksum, &md5elen, &md5zbit);
@@ -121,7 +121,66 @@ spkm3_make_token(struct spkm3_ctx *ctx,
121 121
122 return GSS_S_COMPLETE; 122 return GSS_S_COMPLETE;
123out_err: 123out_err:
124 if (md5cksum.data)
125 kfree(md5cksum.data);
126
124 token->data = NULL; 127 token->data = NULL;
125 token->len = 0; 128 token->len = 0;
126 return GSS_S_FAILURE; 129 return GSS_S_FAILURE;
127} 130}
131
132static int
133spkm3_checksummer(struct scatterlist *sg, void *data)
134{
135 struct hash_desc *desc = data;
136
137 return crypto_hash_update(desc, sg, sg->length);
138}
139
140/* checksum the plaintext data and hdrlen bytes of the token header */
141s32
142make_spkm3_checksum(s32 cksumtype, struct xdr_netobj *key, char *header,
143 unsigned int hdrlen, struct xdr_buf *body,
144 unsigned int body_offset, struct xdr_netobj *cksum)
145{
146 char *cksumname;
147 struct hash_desc desc; /* XXX add to ctx? */
148 struct scatterlist sg[1];
149 int err;
150
151 switch (cksumtype) {
152 case CKSUMTYPE_HMAC_MD5:
153 cksumname = "md5";
154 break;
155 default:
156 dprintk("RPC: spkm3_make_checksum:"
157 " unsupported checksum %d", cksumtype);
158 return GSS_S_FAILURE;
159 }
160
161 if (key->data == NULL || key->len <= 0) return GSS_S_FAILURE;
162
163 desc.tfm = crypto_alloc_hash(cksumname, 0, CRYPTO_ALG_ASYNC);
164 if (IS_ERR(desc.tfm))
165 return GSS_S_FAILURE;
166 cksum->len = crypto_hash_digestsize(desc.tfm);
167 desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
168
169 err = crypto_hash_setkey(desc.tfm, key->data, key->len);
170 if (err)
171 goto out;
172
173 sg_set_buf(sg, header, hdrlen);
174 crypto_hash_update(&desc, sg, 1);
175
176 xdr_process_buf(body, body_offset, body->len - body_offset,
177 spkm3_checksummer, &desc);
178 crypto_hash_final(&desc, cksum->data);
179
180out:
181 crypto_free_hash(desc.tfm);
182
183 return err ? GSS_S_FAILURE : 0;
184}
185
186EXPORT_SYMBOL(make_spkm3_checksum);
diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c
index 854a983ccf26..35188b6ea8f7 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_token.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_token.c
@@ -172,10 +172,10 @@ spkm3_mic_header(unsigned char **hdrbuf, unsigned int *hdrlen, unsigned char *ct
172 *(u8 *)hptr++ = zbit; 172 *(u8 *)hptr++ = zbit;
173 memcpy(hptr, ctxdata, elen); 173 memcpy(hptr, ctxdata, elen);
174 hptr += elen; 174 hptr += elen;
175 *hdrlen = hptr - top; 175 *hdrlen = hptr - top;
176} 176}
177 177
178/* 178/*
179 * spkm3_mic_innercontext_token() 179 * spkm3_mic_innercontext_token()
180 * 180 *
181 * *tokp points to the beginning of the SPKM_MIC token described 181 * *tokp points to the beginning of the SPKM_MIC token described
diff --git a/net/sunrpc/auth_gss/gss_spkm3_unseal.c b/net/sunrpc/auth_gss/gss_spkm3_unseal.c
index 8537f581ef9b..e54581ca7570 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_unseal.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_unseal.c
@@ -54,70 +54,70 @@ spkm3_read_token(struct spkm3_ctx *ctx,
54 struct xdr_buf *message_buffer, /* signbuf */ 54 struct xdr_buf *message_buffer, /* signbuf */
55 int toktype) 55 int toktype)
56{ 56{
57 s32 checksum_type;
57 s32 code; 58 s32 code;
58 struct xdr_netobj wire_cksum = {.len =0, .data = NULL}; 59 struct xdr_netobj wire_cksum = {.len =0, .data = NULL};
59 char cksumdata[16]; 60 char cksumdata[16];
60 struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata}; 61 struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
61 unsigned char *ptr = (unsigned char *)read_token->data; 62 unsigned char *ptr = (unsigned char *)read_token->data;
62 unsigned char *cksum; 63 unsigned char *cksum;
63 int bodysize, md5elen; 64 int bodysize, md5elen;
64 int mic_hdrlen; 65 int mic_hdrlen;
65 u32 ret = GSS_S_DEFECTIVE_TOKEN; 66 u32 ret = GSS_S_DEFECTIVE_TOKEN;
66 67
67 dprintk("RPC: spkm3_read_token read_token->len %d\n", read_token->len);
68
69 if (g_verify_token_header((struct xdr_netobj *) &ctx->mech_used, 68 if (g_verify_token_header((struct xdr_netobj *) &ctx->mech_used,
70 &bodysize, &ptr, read_token->len)) 69 &bodysize, &ptr, read_token->len))
71 goto out; 70 goto out;
72 71
73 /* decode the token */ 72 /* decode the token */
74 73
75 if (toktype == SPKM_MIC_TOK) { 74 if (toktype != SPKM_MIC_TOK) {
76 75 dprintk("RPC: BAD SPKM3 token type: %d\n", toktype);
77 if ((ret = spkm3_verify_mic_token(&ptr, &mic_hdrlen, &cksum))) 76 goto out;
78 goto out; 77 }
79 78
80 if (*cksum++ != 0x03) { 79 if ((ret = spkm3_verify_mic_token(&ptr, &mic_hdrlen, &cksum)))
81 dprintk("RPC: spkm3_read_token BAD checksum type\n"); 80 goto out;
82 goto out; 81
83 } 82 if (*cksum++ != 0x03) {
84 md5elen = *cksum++; 83 dprintk("RPC: spkm3_read_token BAD checksum type\n");
85 cksum++; /* move past the zbit */ 84 goto out;
86 85 }
87 if(!decode_asn1_bitstring(&wire_cksum, cksum, md5elen - 1, 16)) 86 md5elen = *cksum++;
88 goto out; 87 cksum++; /* move past the zbit */
89 88
90 /* HARD CODED FOR MD5 */ 89 if (!decode_asn1_bitstring(&wire_cksum, cksum, md5elen - 1, 16))
91 90 goto out;
92 /* compute the checksum of the message. 91
93 * ptr + 2 = start of header piece of checksum 92 /* HARD CODED FOR MD5 */
94 * mic_hdrlen + 2 = length of header piece of checksum 93
95 */ 94 /* compute the checksum of the message.
96 ret = GSS_S_DEFECTIVE_TOKEN; 95 * ptr + 2 = start of header piece of checksum
97 code = make_checksum(CKSUMTYPE_RSA_MD5, ptr + 2, 96 * mic_hdrlen + 2 = length of header piece of checksum
98 mic_hdrlen + 2, 97 */
99 message_buffer, 0, &md5cksum); 98 ret = GSS_S_DEFECTIVE_TOKEN;
100 99 if (!g_OID_equal(&ctx->intg_alg, &hmac_md5_oid)) {
101 if (code) 100 dprintk("RPC: gss_spkm3_seal: unsupported I-ALG algorithm\n");
102 goto out; 101 goto out;
103 102 }
104 dprintk("RPC: spkm3_read_token: digest wire_cksum.len %d:\n", 103
105 wire_cksum.len); 104 checksum_type = CKSUMTYPE_HMAC_MD5;
106 dprintk(" md5cksum.data\n"); 105
107 print_hexl((u32 *) md5cksum.data, 16, 0); 106 code = make_spkm3_checksum(checksum_type,
108 dprintk(" cksum.data:\n"); 107 &ctx->derived_integ_key, ptr + 2, mic_hdrlen + 2,
109 print_hexl((u32 *) wire_cksum.data, wire_cksum.len, 0); 108 message_buffer, 0, &md5cksum);
110 109
111 ret = GSS_S_BAD_SIG; 110 if (code)
112 code = memcmp(md5cksum.data, wire_cksum.data, wire_cksum.len); 111 goto out;
113 if (code) 112
114 goto out; 113 ret = GSS_S_BAD_SIG;
115 114 code = memcmp(md5cksum.data, wire_cksum.data, wire_cksum.len);
116 } else { 115 if (code) {
117 dprintk("RPC: BAD or UNSUPPORTED SPKM3 token type: %d\n",toktype); 116 dprintk("RPC: bad MIC checksum\n");
118 goto out; 117 goto out;
119 } 118 }
120 119
120
121 /* XXX: need to add expiration and sequencing */ 121 /* XXX: need to add expiration and sequencing */
122 ret = GSS_S_COMPLETE; 122 ret = GSS_S_COMPLETE;
123out: 123out:
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index dfeea4fea95a..aba528b9ae76 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -27,6 +27,7 @@
27#include <linux/types.h> 27#include <linux/types.h>
28#include <linux/mm.h> 28#include <linux/mm.h>
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include <linux/smp_lock.h>
30#include <linux/utsname.h> 31#include <linux/utsname.h>
31#include <linux/workqueue.h> 32#include <linux/workqueue.h>
32 33
@@ -141,6 +142,10 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s
141 clnt->cl_vers = version->number; 142 clnt->cl_vers = version->number;
142 clnt->cl_stats = program->stats; 143 clnt->cl_stats = program->stats;
143 clnt->cl_metrics = rpc_alloc_iostats(clnt); 144 clnt->cl_metrics = rpc_alloc_iostats(clnt);
145 err = -ENOMEM;
146 if (clnt->cl_metrics == NULL)
147 goto out_no_stats;
148 clnt->cl_program = program;
144 149
145 if (!xprt_bound(clnt->cl_xprt)) 150 if (!xprt_bound(clnt->cl_xprt))
146 clnt->cl_autobind = 1; 151 clnt->cl_autobind = 1;
@@ -173,6 +178,8 @@ out_no_auth:
173 rpc_put_mount(); 178 rpc_put_mount();
174 } 179 }
175out_no_path: 180out_no_path:
181 rpc_free_iostats(clnt->cl_metrics);
182out_no_stats:
176 if (clnt->cl_server != clnt->cl_inline_name) 183 if (clnt->cl_server != clnt->cl_inline_name)
177 kfree(clnt->cl_server); 184 kfree(clnt->cl_server);
178 kfree(clnt); 185 kfree(clnt);
@@ -252,12 +259,19 @@ struct rpc_clnt *
252rpc_clone_client(struct rpc_clnt *clnt) 259rpc_clone_client(struct rpc_clnt *clnt)
253{ 260{
254 struct rpc_clnt *new; 261 struct rpc_clnt *new;
262 int err = -ENOMEM;
255 263
256 new = kmemdup(clnt, sizeof(*new), GFP_KERNEL); 264 new = kmemdup(clnt, sizeof(*new), GFP_KERNEL);
257 if (!new) 265 if (!new)
258 goto out_no_clnt; 266 goto out_no_clnt;
259 atomic_set(&new->cl_count, 1); 267 atomic_set(&new->cl_count, 1);
260 atomic_set(&new->cl_users, 0); 268 atomic_set(&new->cl_users, 0);
269 new->cl_metrics = rpc_alloc_iostats(clnt);
270 if (new->cl_metrics == NULL)
271 goto out_no_stats;
272 err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name);
273 if (err != 0)
274 goto out_no_path;
261 new->cl_parent = clnt; 275 new->cl_parent = clnt;
262 atomic_inc(&clnt->cl_count); 276 atomic_inc(&clnt->cl_count);
263 new->cl_xprt = xprt_get(clnt->cl_xprt); 277 new->cl_xprt = xprt_get(clnt->cl_xprt);
@@ -265,16 +279,17 @@ rpc_clone_client(struct rpc_clnt *clnt)
265 new->cl_autobind = 0; 279 new->cl_autobind = 0;
266 new->cl_oneshot = 0; 280 new->cl_oneshot = 0;
267 new->cl_dead = 0; 281 new->cl_dead = 0;
268 if (!IS_ERR(new->cl_dentry))
269 dget(new->cl_dentry);
270 rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); 282 rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
271 if (new->cl_auth) 283 if (new->cl_auth)
272 atomic_inc(&new->cl_auth->au_count); 284 atomic_inc(&new->cl_auth->au_count);
273 new->cl_metrics = rpc_alloc_iostats(clnt);
274 return new; 285 return new;
286out_no_path:
287 rpc_free_iostats(new->cl_metrics);
288out_no_stats:
289 kfree(new);
275out_no_clnt: 290out_no_clnt:
276 printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__); 291 dprintk("RPC: %s returned error %d\n", __FUNCTION__, err);
277 return ERR_PTR(-ENOMEM); 292 return ERR_PTR(err);
278} 293}
279 294
280/* 295/*
@@ -327,16 +342,14 @@ rpc_destroy_client(struct rpc_clnt *clnt)
327 rpcauth_destroy(clnt->cl_auth); 342 rpcauth_destroy(clnt->cl_auth);
328 clnt->cl_auth = NULL; 343 clnt->cl_auth = NULL;
329 } 344 }
330 if (clnt->cl_parent != clnt) {
331 if (!IS_ERR(clnt->cl_dentry))
332 dput(clnt->cl_dentry);
333 rpc_destroy_client(clnt->cl_parent);
334 goto out_free;
335 }
336 if (!IS_ERR(clnt->cl_dentry)) { 345 if (!IS_ERR(clnt->cl_dentry)) {
337 rpc_rmdir(clnt->cl_dentry); 346 rpc_rmdir(clnt->cl_dentry);
338 rpc_put_mount(); 347 rpc_put_mount();
339 } 348 }
349 if (clnt->cl_parent != clnt) {
350 rpc_destroy_client(clnt->cl_parent);
351 goto out_free;
352 }
340 if (clnt->cl_server != clnt->cl_inline_name) 353 if (clnt->cl_server != clnt->cl_inline_name)
341 kfree(clnt->cl_server); 354 kfree(clnt->cl_server);
342out_free: 355out_free:
@@ -466,10 +479,9 @@ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
466 479
467 BUG_ON(flags & RPC_TASK_ASYNC); 480 BUG_ON(flags & RPC_TASK_ASYNC);
468 481
469 status = -ENOMEM;
470 task = rpc_new_task(clnt, flags, &rpc_default_ops, NULL); 482 task = rpc_new_task(clnt, flags, &rpc_default_ops, NULL);
471 if (task == NULL) 483 if (task == NULL)
472 goto out; 484 return -ENOMEM;
473 485
474 /* Mask signals on RPC calls _and_ GSS_AUTH upcalls */ 486 /* Mask signals on RPC calls _and_ GSS_AUTH upcalls */
475 rpc_task_sigmask(task, &oldset); 487 rpc_task_sigmask(task, &oldset);
@@ -478,15 +490,17 @@ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
478 490
479 /* Set up the call info struct and execute the task */ 491 /* Set up the call info struct and execute the task */
480 status = task->tk_status; 492 status = task->tk_status;
481 if (status == 0) { 493 if (status != 0) {
482 atomic_inc(&task->tk_count); 494 rpc_release_task(task);
483 status = rpc_execute(task); 495 goto out;
484 if (status == 0)
485 status = task->tk_status;
486 } 496 }
487 rpc_restore_sigmask(&oldset); 497 atomic_inc(&task->tk_count);
488 rpc_release_task(task); 498 status = rpc_execute(task);
499 if (status == 0)
500 status = task->tk_status;
501 rpc_put_task(task);
489out: 502out:
503 rpc_restore_sigmask(&oldset);
490 return status; 504 return status;
491} 505}
492 506
@@ -528,8 +542,7 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
528 rpc_restore_sigmask(&oldset); 542 rpc_restore_sigmask(&oldset);
529 return status; 543 return status;
530out_release: 544out_release:
531 if (tk_ops->rpc_release != NULL) 545 rpc_release_calldata(tk_ops, data);
532 tk_ops->rpc_release(data);
533 return status; 546 return status;
534} 547}
535 548
@@ -581,7 +594,11 @@ EXPORT_SYMBOL_GPL(rpc_peeraddr);
581char *rpc_peeraddr2str(struct rpc_clnt *clnt, enum rpc_display_format_t format) 594char *rpc_peeraddr2str(struct rpc_clnt *clnt, enum rpc_display_format_t format)
582{ 595{
583 struct rpc_xprt *xprt = clnt->cl_xprt; 596 struct rpc_xprt *xprt = clnt->cl_xprt;
584 return xprt->ops->print_addr(xprt, format); 597
598 if (xprt->address_strings[format] != NULL)
599 return xprt->address_strings[format];
600 else
601 return "unprintable";
585} 602}
586EXPORT_SYMBOL_GPL(rpc_peeraddr2str); 603EXPORT_SYMBOL_GPL(rpc_peeraddr2str);
587 604
@@ -811,8 +828,10 @@ call_encode(struct rpc_task *task)
811 if (encode == NULL) 828 if (encode == NULL)
812 return; 829 return;
813 830
831 lock_kernel();
814 task->tk_status = rpcauth_wrap_req(task, encode, req, p, 832 task->tk_status = rpcauth_wrap_req(task, encode, req, p,
815 task->tk_msg.rpc_argp); 833 task->tk_msg.rpc_argp);
834 unlock_kernel();
816 if (task->tk_status == -ENOMEM) { 835 if (task->tk_status == -ENOMEM) {
817 /* XXX: Is this sane? */ 836 /* XXX: Is this sane? */
818 rpc_delay(task, 3*HZ); 837 rpc_delay(task, 3*HZ);
@@ -1143,9 +1162,12 @@ call_decode(struct rpc_task *task)
1143 1162
1144 task->tk_action = rpc_exit_task; 1163 task->tk_action = rpc_exit_task;
1145 1164
1146 if (decode) 1165 if (decode) {
1166 lock_kernel();
1147 task->tk_status = rpcauth_unwrap_resp(task, decode, req, p, 1167 task->tk_status = rpcauth_unwrap_resp(task, decode, req, p,
1148 task->tk_msg.rpc_resp); 1168 task->tk_msg.rpc_resp);
1169 unlock_kernel();
1170 }
1149 dprintk("RPC: %4d call_decode result %d\n", task->tk_pid, 1171 dprintk("RPC: %4d call_decode result %d\n", task->tk_pid,
1150 task->tk_status); 1172 task->tk_status);
1151 return; 1173 return;
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
index e52afab413de..3946ec3eb517 100644
--- a/net/sunrpc/pmap_clnt.c
+++ b/net/sunrpc/pmap_clnt.c
@@ -101,14 +101,14 @@ void rpc_getport(struct rpc_task *task)
101 /* Autobind on cloned rpc clients is discouraged */ 101 /* Autobind on cloned rpc clients is discouraged */
102 BUG_ON(clnt->cl_parent != clnt); 102 BUG_ON(clnt->cl_parent != clnt);
103 103
104 status = -EACCES; /* tell caller to check again */
105 if (xprt_test_and_set_binding(xprt))
106 goto bailout_nowake;
107
104 /* Put self on queue before sending rpcbind request, in case 108 /* Put self on queue before sending rpcbind request, in case
105 * pmap_getport_done completes before we return from rpc_run_task */ 109 * pmap_getport_done completes before we return from rpc_run_task */
106 rpc_sleep_on(&xprt->binding, task, NULL, NULL); 110 rpc_sleep_on(&xprt->binding, task, NULL, NULL);
107 111
108 status = -EACCES; /* tell caller to check again */
109 if (xprt_test_and_set_binding(xprt))
110 goto bailout_nofree;
111
112 /* Someone else may have bound if we slept */ 112 /* Someone else may have bound if we slept */
113 status = 0; 113 status = 0;
114 if (xprt_bound(xprt)) 114 if (xprt_bound(xprt))
@@ -134,7 +134,7 @@ void rpc_getport(struct rpc_task *task)
134 child = rpc_run_task(pmap_clnt, RPC_TASK_ASYNC, &pmap_getport_ops, map); 134 child = rpc_run_task(pmap_clnt, RPC_TASK_ASYNC, &pmap_getport_ops, map);
135 if (IS_ERR(child)) 135 if (IS_ERR(child))
136 goto bailout; 136 goto bailout;
137 rpc_release_task(child); 137 rpc_put_task(child);
138 138
139 task->tk_xprt->stat.bind_count++; 139 task->tk_xprt->stat.bind_count++;
140 return; 140 return;
@@ -143,8 +143,9 @@ bailout:
143 pmap_map_free(map); 143 pmap_map_free(map);
144 xprt_put(xprt); 144 xprt_put(xprt);
145bailout_nofree: 145bailout_nofree:
146 task->tk_status = status;
147 pmap_wake_portmap_waiters(xprt, status); 146 pmap_wake_portmap_waiters(xprt, status);
147bailout_nowake:
148 task->tk_status = status;
148} 149}
149 150
150#ifdef CONFIG_ROOT_NFS 151#ifdef CONFIG_ROOT_NFS
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index eff44bcdc95a..18a33d327012 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -266,12 +266,28 @@ static int rpc_wait_bit_interruptible(void *word)
266 return 0; 266 return 0;
267} 267}
268 268
269static void rpc_set_active(struct rpc_task *task)
270{
271 if (test_and_set_bit(RPC_TASK_ACTIVE, &task->tk_runstate) != 0)
272 return;
273 spin_lock(&rpc_sched_lock);
274#ifdef RPC_DEBUG
275 task->tk_magic = RPC_TASK_MAGIC_ID;
276 task->tk_pid = rpc_task_id++;
277#endif
278 /* Add to global list of all tasks */
279 list_add_tail(&task->tk_task, &all_tasks);
280 spin_unlock(&rpc_sched_lock);
281}
282
269/* 283/*
270 * Mark an RPC call as having completed by clearing the 'active' bit 284 * Mark an RPC call as having completed by clearing the 'active' bit
271 */ 285 */
272static inline void rpc_mark_complete_task(struct rpc_task *task) 286static void rpc_mark_complete_task(struct rpc_task *task)
273{ 287{
274 rpc_clear_active(task); 288 smp_mb__before_clear_bit();
289 clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
290 smp_mb__after_clear_bit();
275 wake_up_bit(&task->tk_runstate, RPC_TASK_ACTIVE); 291 wake_up_bit(&task->tk_runstate, RPC_TASK_ACTIVE);
276} 292}
277 293
@@ -295,13 +311,15 @@ EXPORT_SYMBOL(__rpc_wait_for_completion_task);
295 */ 311 */
296static void rpc_make_runnable(struct rpc_task *task) 312static void rpc_make_runnable(struct rpc_task *task)
297{ 313{
298 int do_ret;
299
300 BUG_ON(task->tk_timeout_fn); 314 BUG_ON(task->tk_timeout_fn);
301 do_ret = rpc_test_and_set_running(task);
302 rpc_clear_queued(task); 315 rpc_clear_queued(task);
303 if (do_ret) 316 if (rpc_test_and_set_running(task))
304 return; 317 return;
318 /* We might have raced */
319 if (RPC_IS_QUEUED(task)) {
320 rpc_clear_running(task);
321 return;
322 }
305 if (RPC_IS_ASYNC(task)) { 323 if (RPC_IS_ASYNC(task)) {
306 int status; 324 int status;
307 325
@@ -333,9 +351,6 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
333 return; 351 return;
334 } 352 }
335 353
336 /* Mark the task as being activated if so needed */
337 rpc_set_active(task);
338
339 __rpc_add_wait_queue(q, task); 354 __rpc_add_wait_queue(q, task);
340 355
341 BUG_ON(task->tk_callback != NULL); 356 BUG_ON(task->tk_callback != NULL);
@@ -346,6 +361,9 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
346void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, 361void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
347 rpc_action action, rpc_action timer) 362 rpc_action action, rpc_action timer)
348{ 363{
364 /* Mark the task as being activated if so needed */
365 rpc_set_active(task);
366
349 /* 367 /*
350 * Protect the queue operations. 368 * Protect the queue operations.
351 */ 369 */
@@ -409,16 +427,19 @@ __rpc_default_timer(struct rpc_task *task)
409 */ 427 */
410void rpc_wake_up_task(struct rpc_task *task) 428void rpc_wake_up_task(struct rpc_task *task)
411{ 429{
430 rcu_read_lock_bh();
412 if (rpc_start_wakeup(task)) { 431 if (rpc_start_wakeup(task)) {
413 if (RPC_IS_QUEUED(task)) { 432 if (RPC_IS_QUEUED(task)) {
414 struct rpc_wait_queue *queue = task->u.tk_wait.rpc_waitq; 433 struct rpc_wait_queue *queue = task->u.tk_wait.rpc_waitq;
415 434
416 spin_lock_bh(&queue->lock); 435 /* Note: we're already in a bh-safe context */
436 spin_lock(&queue->lock);
417 __rpc_do_wake_up_task(task); 437 __rpc_do_wake_up_task(task);
418 spin_unlock_bh(&queue->lock); 438 spin_unlock(&queue->lock);
419 } 439 }
420 rpc_finish_wakeup(task); 440 rpc_finish_wakeup(task);
421 } 441 }
442 rcu_read_unlock_bh();
422} 443}
423 444
424/* 445/*
@@ -481,14 +502,16 @@ struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue)
481 struct rpc_task *task = NULL; 502 struct rpc_task *task = NULL;
482 503
483 dprintk("RPC: wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue)); 504 dprintk("RPC: wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue));
484 spin_lock_bh(&queue->lock); 505 rcu_read_lock_bh();
506 spin_lock(&queue->lock);
485 if (RPC_IS_PRIORITY(queue)) 507 if (RPC_IS_PRIORITY(queue))
486 task = __rpc_wake_up_next_priority(queue); 508 task = __rpc_wake_up_next_priority(queue);
487 else { 509 else {
488 task_for_first(task, &queue->tasks[0]) 510 task_for_first(task, &queue->tasks[0])
489 __rpc_wake_up_task(task); 511 __rpc_wake_up_task(task);
490 } 512 }
491 spin_unlock_bh(&queue->lock); 513 spin_unlock(&queue->lock);
514 rcu_read_unlock_bh();
492 515
493 return task; 516 return task;
494} 517}
@@ -504,7 +527,8 @@ void rpc_wake_up(struct rpc_wait_queue *queue)
504 struct rpc_task *task, *next; 527 struct rpc_task *task, *next;
505 struct list_head *head; 528 struct list_head *head;
506 529
507 spin_lock_bh(&queue->lock); 530 rcu_read_lock_bh();
531 spin_lock(&queue->lock);
508 head = &queue->tasks[queue->maxpriority]; 532 head = &queue->tasks[queue->maxpriority];
509 for (;;) { 533 for (;;) {
510 list_for_each_entry_safe(task, next, head, u.tk_wait.list) 534 list_for_each_entry_safe(task, next, head, u.tk_wait.list)
@@ -513,7 +537,8 @@ void rpc_wake_up(struct rpc_wait_queue *queue)
513 break; 537 break;
514 head--; 538 head--;
515 } 539 }
516 spin_unlock_bh(&queue->lock); 540 spin_unlock(&queue->lock);
541 rcu_read_unlock_bh();
517} 542}
518 543
519/** 544/**
@@ -528,7 +553,8 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
528 struct rpc_task *task, *next; 553 struct rpc_task *task, *next;
529 struct list_head *head; 554 struct list_head *head;
530 555
531 spin_lock_bh(&queue->lock); 556 rcu_read_lock_bh();
557 spin_lock(&queue->lock);
532 head = &queue->tasks[queue->maxpriority]; 558 head = &queue->tasks[queue->maxpriority];
533 for (;;) { 559 for (;;) {
534 list_for_each_entry_safe(task, next, head, u.tk_wait.list) { 560 list_for_each_entry_safe(task, next, head, u.tk_wait.list) {
@@ -539,7 +565,8 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
539 break; 565 break;
540 head--; 566 head--;
541 } 567 }
542 spin_unlock_bh(&queue->lock); 568 spin_unlock(&queue->lock);
569 rcu_read_unlock_bh();
543} 570}
544 571
545static void __rpc_atrun(struct rpc_task *task) 572static void __rpc_atrun(struct rpc_task *task)
@@ -561,7 +588,9 @@ void rpc_delay(struct rpc_task *task, unsigned long delay)
561 */ 588 */
562static void rpc_prepare_task(struct rpc_task *task) 589static void rpc_prepare_task(struct rpc_task *task)
563{ 590{
591 lock_kernel();
564 task->tk_ops->rpc_call_prepare(task, task->tk_calldata); 592 task->tk_ops->rpc_call_prepare(task, task->tk_calldata);
593 unlock_kernel();
565} 594}
566 595
567/* 596/*
@@ -571,7 +600,9 @@ void rpc_exit_task(struct rpc_task *task)
571{ 600{
572 task->tk_action = NULL; 601 task->tk_action = NULL;
573 if (task->tk_ops->rpc_call_done != NULL) { 602 if (task->tk_ops->rpc_call_done != NULL) {
603 lock_kernel();
574 task->tk_ops->rpc_call_done(task, task->tk_calldata); 604 task->tk_ops->rpc_call_done(task, task->tk_calldata);
605 unlock_kernel();
575 if (task->tk_action != NULL) { 606 if (task->tk_action != NULL) {
576 WARN_ON(RPC_ASSASSINATED(task)); 607 WARN_ON(RPC_ASSASSINATED(task));
577 /* Always release the RPC slot and buffer memory */ 608 /* Always release the RPC slot and buffer memory */
@@ -581,6 +612,15 @@ void rpc_exit_task(struct rpc_task *task)
581} 612}
582EXPORT_SYMBOL(rpc_exit_task); 613EXPORT_SYMBOL(rpc_exit_task);
583 614
615void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata)
616{
617 if (ops->rpc_release != NULL) {
618 lock_kernel();
619 ops->rpc_release(calldata);
620 unlock_kernel();
621 }
622}
623
584/* 624/*
585 * This is the RPC `scheduler' (or rather, the finite state machine). 625 * This is the RPC `scheduler' (or rather, the finite state machine).
586 */ 626 */
@@ -615,9 +655,7 @@ static int __rpc_execute(struct rpc_task *task)
615 */ 655 */
616 save_callback=task->tk_callback; 656 save_callback=task->tk_callback;
617 task->tk_callback=NULL; 657 task->tk_callback=NULL;
618 lock_kernel();
619 save_callback(task); 658 save_callback(task);
620 unlock_kernel();
621 } 659 }
622 660
623 /* 661 /*
@@ -628,9 +666,7 @@ static int __rpc_execute(struct rpc_task *task)
628 if (!RPC_IS_QUEUED(task)) { 666 if (!RPC_IS_QUEUED(task)) {
629 if (task->tk_action == NULL) 667 if (task->tk_action == NULL)
630 break; 668 break;
631 lock_kernel();
632 task->tk_action(task); 669 task->tk_action(task);
633 unlock_kernel();
634 } 670 }
635 671
636 /* 672 /*
@@ -671,8 +707,6 @@ static int __rpc_execute(struct rpc_task *task)
671 } 707 }
672 708
673 dprintk("RPC: %4d, return %d, status %d\n", task->tk_pid, status, task->tk_status); 709 dprintk("RPC: %4d, return %d, status %d\n", task->tk_pid, status, task->tk_status);
674 /* Wake up anyone who is waiting for task completion */
675 rpc_mark_complete_task(task);
676 /* Release all resources associated with the task */ 710 /* Release all resources associated with the task */
677 rpc_release_task(task); 711 rpc_release_task(task);
678 return status; 712 return status;
@@ -786,15 +820,6 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, cons
786 task->tk_flags |= RPC_TASK_NOINTR; 820 task->tk_flags |= RPC_TASK_NOINTR;
787 } 821 }
788 822
789#ifdef RPC_DEBUG
790 task->tk_magic = RPC_TASK_MAGIC_ID;
791 task->tk_pid = rpc_task_id++;
792#endif
793 /* Add to global list of all tasks */
794 spin_lock(&rpc_sched_lock);
795 list_add_tail(&task->tk_task, &all_tasks);
796 spin_unlock(&rpc_sched_lock);
797
798 BUG_ON(task->tk_ops == NULL); 823 BUG_ON(task->tk_ops == NULL);
799 824
800 /* starting timestamp */ 825 /* starting timestamp */
@@ -810,8 +835,9 @@ rpc_alloc_task(void)
810 return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS); 835 return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS);
811} 836}
812 837
813static void rpc_free_task(struct rpc_task *task) 838static void rpc_free_task(struct rcu_head *rcu)
814{ 839{
840 struct rpc_task *task = container_of(rcu, struct rpc_task, u.tk_rcu);
815 dprintk("RPC: %4d freeing task\n", task->tk_pid); 841 dprintk("RPC: %4d freeing task\n", task->tk_pid);
816 mempool_free(task, rpc_task_mempool); 842 mempool_free(task, rpc_task_mempool);
817} 843}
@@ -847,16 +873,34 @@ cleanup:
847 goto out; 873 goto out;
848} 874}
849 875
850void rpc_release_task(struct rpc_task *task) 876
877void rpc_put_task(struct rpc_task *task)
851{ 878{
852 const struct rpc_call_ops *tk_ops = task->tk_ops; 879 const struct rpc_call_ops *tk_ops = task->tk_ops;
853 void *calldata = task->tk_calldata; 880 void *calldata = task->tk_calldata;
854 881
882 if (!atomic_dec_and_test(&task->tk_count))
883 return;
884 /* Release resources */
885 if (task->tk_rqstp)
886 xprt_release(task);
887 if (task->tk_msg.rpc_cred)
888 rpcauth_unbindcred(task);
889 if (task->tk_client) {
890 rpc_release_client(task->tk_client);
891 task->tk_client = NULL;
892 }
893 if (task->tk_flags & RPC_TASK_DYNAMIC)
894 call_rcu_bh(&task->u.tk_rcu, rpc_free_task);
895 rpc_release_calldata(tk_ops, calldata);
896}
897EXPORT_SYMBOL(rpc_put_task);
898
899void rpc_release_task(struct rpc_task *task)
900{
855#ifdef RPC_DEBUG 901#ifdef RPC_DEBUG
856 BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID); 902 BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID);
857#endif 903#endif
858 if (!atomic_dec_and_test(&task->tk_count))
859 return;
860 dprintk("RPC: %4d release task\n", task->tk_pid); 904 dprintk("RPC: %4d release task\n", task->tk_pid);
861 905
862 /* Remove from global task list */ 906 /* Remove from global task list */
@@ -869,23 +913,13 @@ void rpc_release_task(struct rpc_task *task)
869 /* Synchronously delete any running timer */ 913 /* Synchronously delete any running timer */
870 rpc_delete_timer(task); 914 rpc_delete_timer(task);
871 915
872 /* Release resources */
873 if (task->tk_rqstp)
874 xprt_release(task);
875 if (task->tk_msg.rpc_cred)
876 rpcauth_unbindcred(task);
877 if (task->tk_client) {
878 rpc_release_client(task->tk_client);
879 task->tk_client = NULL;
880 }
881
882#ifdef RPC_DEBUG 916#ifdef RPC_DEBUG
883 task->tk_magic = 0; 917 task->tk_magic = 0;
884#endif 918#endif
885 if (task->tk_flags & RPC_TASK_DYNAMIC) 919 /* Wake up anyone who is waiting for task completion */
886 rpc_free_task(task); 920 rpc_mark_complete_task(task);
887 if (tk_ops->rpc_release) 921
888 tk_ops->rpc_release(calldata); 922 rpc_put_task(task);
889} 923}
890 924
891/** 925/**
@@ -902,8 +936,7 @@ struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
902 struct rpc_task *task; 936 struct rpc_task *task;
903 task = rpc_new_task(clnt, flags, ops, data); 937 task = rpc_new_task(clnt, flags, ops, data);
904 if (task == NULL) { 938 if (task == NULL) {
905 if (ops->rpc_release != NULL) 939 rpc_release_calldata(ops, data);
906 ops->rpc_release(data);
907 return ERR_PTR(-ENOMEM); 940 return ERR_PTR(-ENOMEM);
908 } 941 }
909 atomic_inc(&task->tk_count); 942 atomic_inc(&task->tk_count);
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index 2635c543ba06..634885b0c04d 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -16,7 +16,7 @@
16 16
17 17
18/** 18/**
19 * skb_read_bits - copy some data bits from skb to internal buffer 19 * xdr_skb_read_bits - copy some data bits from skb to internal buffer
20 * @desc: sk_buff copy helper 20 * @desc: sk_buff copy helper
21 * @to: copy destination 21 * @to: copy destination
22 * @len: number of bytes to copy 22 * @len: number of bytes to copy
@@ -24,11 +24,11 @@
24 * Possibly called several times to iterate over an sk_buff and copy 24 * Possibly called several times to iterate over an sk_buff and copy
25 * data out of it. 25 * data out of it.
26 */ 26 */
27static size_t skb_read_bits(skb_reader_t *desc, void *to, size_t len) 27size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len)
28{ 28{
29 if (len > desc->count) 29 if (len > desc->count)
30 len = desc->count; 30 len = desc->count;
31 if (skb_copy_bits(desc->skb, desc->offset, to, len)) 31 if (unlikely(skb_copy_bits(desc->skb, desc->offset, to, len)))
32 return 0; 32 return 0;
33 desc->count -= len; 33 desc->count -= len;
34 desc->offset += len; 34 desc->offset += len;
@@ -36,14 +36,14 @@ static size_t skb_read_bits(skb_reader_t *desc, void *to, size_t len)
36} 36}
37 37
38/** 38/**
39 * skb_read_and_csum_bits - copy and checksum from skb to buffer 39 * xdr_skb_read_and_csum_bits - copy and checksum from skb to buffer
40 * @desc: sk_buff copy helper 40 * @desc: sk_buff copy helper
41 * @to: copy destination 41 * @to: copy destination
42 * @len: number of bytes to copy 42 * @len: number of bytes to copy
43 * 43 *
44 * Same as skb_read_bits, but calculate a checksum at the same time. 44 * Same as skb_read_bits, but calculate a checksum at the same time.
45 */ 45 */
46static size_t skb_read_and_csum_bits(skb_reader_t *desc, void *to, size_t len) 46static size_t xdr_skb_read_and_csum_bits(struct xdr_skb_reader *desc, void *to, size_t len)
47{ 47{
48 unsigned int pos; 48 unsigned int pos;
49 __wsum csum2; 49 __wsum csum2;
@@ -66,7 +66,7 @@ static size_t skb_read_and_csum_bits(skb_reader_t *desc, void *to, size_t len)
66 * @copy_actor: virtual method for copying data 66 * @copy_actor: virtual method for copying data
67 * 67 *
68 */ 68 */
69ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, skb_reader_t *desc, skb_read_actor_t copy_actor) 69ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct xdr_skb_reader *desc, xdr_skb_read_actor copy_actor)
70{ 70{
71 struct page **ppage = xdr->pages; 71 struct page **ppage = xdr->pages;
72 unsigned int len, pglen = xdr->page_len; 72 unsigned int len, pglen = xdr->page_len;
@@ -148,7 +148,7 @@ out:
148 */ 148 */
149int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) 149int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
150{ 150{
151 skb_reader_t desc; 151 struct xdr_skb_reader desc;
152 152
153 desc.skb = skb; 153 desc.skb = skb;
154 desc.offset = sizeof(struct udphdr); 154 desc.offset = sizeof(struct udphdr);
@@ -158,7 +158,7 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
158 goto no_checksum; 158 goto no_checksum;
159 159
160 desc.csum = csum_partial(skb->data, desc.offset, skb->csum); 160 desc.csum = csum_partial(skb->data, desc.offset, skb->csum);
161 if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits) < 0) 161 if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_and_csum_bits) < 0)
162 return -1; 162 return -1;
163 if (desc.offset != skb->len) { 163 if (desc.offset != skb->len) {
164 __wsum csum2; 164 __wsum csum2;
@@ -173,7 +173,7 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
173 netdev_rx_csum_fault(skb->dev); 173 netdev_rx_csum_fault(skb->dev);
174 return 0; 174 return 0;
175no_checksum: 175no_checksum:
176 if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits) < 0) 176 if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_bits) < 0)
177 return -1; 177 return -1;
178 if (desc.count) 178 if (desc.count)
179 return -1; 179 return -1;
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index 192dff5dabcb..d85fddeb6388 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -33,7 +33,6 @@ EXPORT_SYMBOL(rpciod_down);
33EXPORT_SYMBOL(rpciod_up); 33EXPORT_SYMBOL(rpciod_up);
34EXPORT_SYMBOL(rpc_new_task); 34EXPORT_SYMBOL(rpc_new_task);
35EXPORT_SYMBOL(rpc_wake_up_status); 35EXPORT_SYMBOL(rpc_wake_up_status);
36EXPORT_SYMBOL(rpc_release_task);
37 36
38/* RPC client functions */ 37/* RPC client functions */
39EXPORT_SYMBOL(rpc_clone_client); 38EXPORT_SYMBOL(rpc_clone_client);
@@ -139,6 +138,8 @@ EXPORT_SYMBOL(nlm_debug);
139extern int register_rpc_pipefs(void); 138extern int register_rpc_pipefs(void);
140extern void unregister_rpc_pipefs(void); 139extern void unregister_rpc_pipefs(void);
141extern struct cache_detail ip_map_cache; 140extern struct cache_detail ip_map_cache;
141extern int init_socket_xprt(void);
142extern void cleanup_socket_xprt(void);
142 143
143static int __init 144static int __init
144init_sunrpc(void) 145init_sunrpc(void)
@@ -156,6 +157,7 @@ init_sunrpc(void)
156 rpc_proc_init(); 157 rpc_proc_init();
157#endif 158#endif
158 cache_register(&ip_map_cache); 159 cache_register(&ip_map_cache);
160 init_socket_xprt();
159out: 161out:
160 return err; 162 return err;
161} 163}
@@ -163,6 +165,7 @@ out:
163static void __exit 165static void __exit
164cleanup_sunrpc(void) 166cleanup_sunrpc(void)
165{ 167{
168 cleanup_socket_xprt();
166 unregister_rpc_pipefs(); 169 unregister_rpc_pipefs();
167 rpc_destroy_mempool(); 170 rpc_destroy_mempool();
168 if (cache_unregister(&ip_map_cache)) 171 if (cache_unregister(&ip_map_cache))
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
index d89b048ad6bb..82b27528d0c4 100644
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@@ -18,7 +18,6 @@
18#include <linux/sunrpc/types.h> 18#include <linux/sunrpc/types.h>
19#include <linux/sunrpc/sched.h> 19#include <linux/sunrpc/sched.h>
20#include <linux/sunrpc/stats.h> 20#include <linux/sunrpc/stats.h>
21#include <linux/sunrpc/xprt.h>
22 21
23/* 22/*
24 * Declare the debug flags here 23 * Declare the debug flags here
@@ -119,11 +118,6 @@ done:
119} 118}
120 119
121 120
122static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE;
123static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE;
124static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT;
125static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT;
126
127static ctl_table debug_table[] = { 121static ctl_table debug_table[] = {
128 { 122 {
129 .ctl_name = CTL_RPCDEBUG, 123 .ctl_name = CTL_RPCDEBUG,
@@ -157,50 +151,6 @@ static ctl_table debug_table[] = {
157 .mode = 0644, 151 .mode = 0644,
158 .proc_handler = &proc_dodebug 152 .proc_handler = &proc_dodebug
159 }, 153 },
160 {
161 .ctl_name = CTL_SLOTTABLE_UDP,
162 .procname = "udp_slot_table_entries",
163 .data = &xprt_udp_slot_table_entries,
164 .maxlen = sizeof(unsigned int),
165 .mode = 0644,
166 .proc_handler = &proc_dointvec_minmax,
167 .strategy = &sysctl_intvec,
168 .extra1 = &min_slot_table_size,
169 .extra2 = &max_slot_table_size
170 },
171 {
172 .ctl_name = CTL_SLOTTABLE_TCP,
173 .procname = "tcp_slot_table_entries",
174 .data = &xprt_tcp_slot_table_entries,
175 .maxlen = sizeof(unsigned int),
176 .mode = 0644,
177 .proc_handler = &proc_dointvec_minmax,
178 .strategy = &sysctl_intvec,
179 .extra1 = &min_slot_table_size,
180 .extra2 = &max_slot_table_size
181 },
182 {
183 .ctl_name = CTL_MIN_RESVPORT,
184 .procname = "min_resvport",
185 .data = &xprt_min_resvport,
186 .maxlen = sizeof(unsigned int),
187 .mode = 0644,
188 .proc_handler = &proc_dointvec_minmax,
189 .strategy = &sysctl_intvec,
190 .extra1 = &xprt_min_resvport_limit,
191 .extra2 = &xprt_max_resvport_limit
192 },
193 {
194 .ctl_name = CTL_MAX_RESVPORT,
195 .procname = "max_resvport",
196 .data = &xprt_max_resvport,
197 .maxlen = sizeof(unsigned int),
198 .mode = 0644,
199 .proc_handler = &proc_dointvec_minmax,
200 .strategy = &sysctl_intvec,
201 .extra1 = &xprt_min_resvport_limit,
202 .extra2 = &xprt_max_resvport_limit
203 },
204 { .ctl_name = 0 } 154 { .ctl_name = 0 }
205}; 155};
206 156
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 9022eb8b37ed..a0af250ca319 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -640,41 +640,30 @@ xdr_buf_from_iov(struct kvec *iov, struct xdr_buf *buf)
640 buf->buflen = buf->len = iov->iov_len; 640 buf->buflen = buf->len = iov->iov_len;
641} 641}
642 642
643/* Sets subiov to the intersection of iov with the buffer of length len
644 * starting base bytes after iov. Indicates empty intersection by setting
645 * length of subiov to zero. Decrements len by length of subiov, sets base
646 * to zero (or decrements it by length of iov if subiov is empty). */
647static void
648iov_subsegment(struct kvec *iov, struct kvec *subiov, int *base, int *len)
649{
650 if (*base > iov->iov_len) {
651 subiov->iov_base = NULL;
652 subiov->iov_len = 0;
653 *base -= iov->iov_len;
654 } else {
655 subiov->iov_base = iov->iov_base + *base;
656 subiov->iov_len = min(*len, (int)iov->iov_len - *base);
657 *base = 0;
658 }
659 *len -= subiov->iov_len;
660}
661
662/* Sets subbuf to the portion of buf of length len beginning base bytes 643/* Sets subbuf to the portion of buf of length len beginning base bytes
663 * from the start of buf. Returns -1 if base of length are out of bounds. */ 644 * from the start of buf. Returns -1 if base of length are out of bounds. */
664int 645int
665xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, 646xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
666 int base, int len) 647 unsigned int base, unsigned int len)
667{ 648{
668 int i;
669
670 subbuf->buflen = subbuf->len = len; 649 subbuf->buflen = subbuf->len = len;
671 iov_subsegment(buf->head, subbuf->head, &base, &len); 650 if (base < buf->head[0].iov_len) {
651 subbuf->head[0].iov_base = buf->head[0].iov_base + base;
652 subbuf->head[0].iov_len = min_t(unsigned int, len,
653 buf->head[0].iov_len - base);
654 len -= subbuf->head[0].iov_len;
655 base = 0;
656 } else {
657 subbuf->head[0].iov_base = NULL;
658 subbuf->head[0].iov_len = 0;
659 base -= buf->head[0].iov_len;
660 }
672 661
673 if (base < buf->page_len) { 662 if (base < buf->page_len) {
674 i = (base + buf->page_base) >> PAGE_CACHE_SHIFT; 663 subbuf->page_len = min(buf->page_len - base, len);
675 subbuf->pages = &buf->pages[i]; 664 base += buf->page_base;
676 subbuf->page_base = (base + buf->page_base) & ~PAGE_CACHE_MASK; 665 subbuf->page_base = base & ~PAGE_CACHE_MASK;
677 subbuf->page_len = min((int)buf->page_len - base, len); 666 subbuf->pages = &buf->pages[base >> PAGE_CACHE_SHIFT];
678 len -= subbuf->page_len; 667 len -= subbuf->page_len;
679 base = 0; 668 base = 0;
680 } else { 669 } else {
@@ -682,66 +671,85 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
682 subbuf->page_len = 0; 671 subbuf->page_len = 0;
683 } 672 }
684 673
685 iov_subsegment(buf->tail, subbuf->tail, &base, &len); 674 if (base < buf->tail[0].iov_len) {
675 subbuf->tail[0].iov_base = buf->tail[0].iov_base + base;
676 subbuf->tail[0].iov_len = min_t(unsigned int, len,
677 buf->tail[0].iov_len - base);
678 len -= subbuf->tail[0].iov_len;
679 base = 0;
680 } else {
681 subbuf->tail[0].iov_base = NULL;
682 subbuf->tail[0].iov_len = 0;
683 base -= buf->tail[0].iov_len;
684 }
685
686 if (base || len) 686 if (base || len)
687 return -1; 687 return -1;
688 return 0; 688 return 0;
689} 689}
690 690
691/* obj is assumed to point to allocated memory of size at least len: */ 691static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len)
692int
693read_bytes_from_xdr_buf(struct xdr_buf *buf, int base, void *obj, int len)
694{ 692{
695 struct xdr_buf subbuf; 693 unsigned int this_len;
696 int this_len;
697 int status;
698 694
699 status = xdr_buf_subsegment(buf, &subbuf, base, len); 695 this_len = min_t(unsigned int, len, subbuf->head[0].iov_len);
700 if (status) 696 memcpy(obj, subbuf->head[0].iov_base, this_len);
701 goto out;
702 this_len = min(len, (int)subbuf.head[0].iov_len);
703 memcpy(obj, subbuf.head[0].iov_base, this_len);
704 len -= this_len; 697 len -= this_len;
705 obj += this_len; 698 obj += this_len;
706 this_len = min(len, (int)subbuf.page_len); 699 this_len = min_t(unsigned int, len, subbuf->page_len);
707 if (this_len) 700 if (this_len)
708 _copy_from_pages(obj, subbuf.pages, subbuf.page_base, this_len); 701 _copy_from_pages(obj, subbuf->pages, subbuf->page_base, this_len);
709 len -= this_len; 702 len -= this_len;
710 obj += this_len; 703 obj += this_len;
711 this_len = min(len, (int)subbuf.tail[0].iov_len); 704 this_len = min_t(unsigned int, len, subbuf->tail[0].iov_len);
712 memcpy(obj, subbuf.tail[0].iov_base, this_len); 705 memcpy(obj, subbuf->tail[0].iov_base, this_len);
713out:
714 return status;
715} 706}
716 707
717/* obj is assumed to point to allocated memory of size at least len: */ 708/* obj is assumed to point to allocated memory of size at least len: */
718int 709int read_bytes_from_xdr_buf(struct xdr_buf *buf, unsigned int base, void *obj, unsigned int len)
719write_bytes_to_xdr_buf(struct xdr_buf *buf, int base, void *obj, int len)
720{ 710{
721 struct xdr_buf subbuf; 711 struct xdr_buf subbuf;
722 int this_len;
723 int status; 712 int status;
724 713
725 status = xdr_buf_subsegment(buf, &subbuf, base, len); 714 status = xdr_buf_subsegment(buf, &subbuf, base, len);
726 if (status) 715 if (status != 0)
727 goto out; 716 return status;
728 this_len = min(len, (int)subbuf.head[0].iov_len); 717 __read_bytes_from_xdr_buf(&subbuf, obj, len);
729 memcpy(subbuf.head[0].iov_base, obj, this_len); 718 return 0;
719}
720
721static void __write_bytes_to_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len)
722{
723 unsigned int this_len;
724
725 this_len = min_t(unsigned int, len, subbuf->head[0].iov_len);
726 memcpy(subbuf->head[0].iov_base, obj, this_len);
730 len -= this_len; 727 len -= this_len;
731 obj += this_len; 728 obj += this_len;
732 this_len = min(len, (int)subbuf.page_len); 729 this_len = min_t(unsigned int, len, subbuf->page_len);
733 if (this_len) 730 if (this_len)
734 _copy_to_pages(subbuf.pages, subbuf.page_base, obj, this_len); 731 _copy_to_pages(subbuf->pages, subbuf->page_base, obj, this_len);
735 len -= this_len; 732 len -= this_len;
736 obj += this_len; 733 obj += this_len;
737 this_len = min(len, (int)subbuf.tail[0].iov_len); 734 this_len = min_t(unsigned int, len, subbuf->tail[0].iov_len);
738 memcpy(subbuf.tail[0].iov_base, obj, this_len); 735 memcpy(subbuf->tail[0].iov_base, obj, this_len);
739out: 736}
740 return status; 737
738/* obj is assumed to point to allocated memory of size at least len: */
739int write_bytes_to_xdr_buf(struct xdr_buf *buf, unsigned int base, void *obj, unsigned int len)
740{
741 struct xdr_buf subbuf;
742 int status;
743
744 status = xdr_buf_subsegment(buf, &subbuf, base, len);
745 if (status != 0)
746 return status;
747 __write_bytes_to_xdr_buf(&subbuf, obj, len);
748 return 0;
741} 749}
742 750
743int 751int
744xdr_decode_word(struct xdr_buf *buf, int base, u32 *obj) 752xdr_decode_word(struct xdr_buf *buf, unsigned int base, u32 *obj)
745{ 753{
746 __be32 raw; 754 __be32 raw;
747 int status; 755 int status;
@@ -754,7 +762,7 @@ xdr_decode_word(struct xdr_buf *buf, int base, u32 *obj)
754} 762}
755 763
756int 764int
757xdr_encode_word(struct xdr_buf *buf, int base, u32 obj) 765xdr_encode_word(struct xdr_buf *buf, unsigned int base, u32 obj)
758{ 766{
759 __be32 raw = htonl(obj); 767 __be32 raw = htonl(obj);
760 768
@@ -765,44 +773,37 @@ xdr_encode_word(struct xdr_buf *buf, int base, u32 obj)
765 * entirely in the head or the tail, set object to point to it; otherwise 773 * entirely in the head or the tail, set object to point to it; otherwise
766 * try to find space for it at the end of the tail, copy it there, and 774 * try to find space for it at the end of the tail, copy it there, and
767 * set obj to point to it. */ 775 * set obj to point to it. */
768int 776int xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, unsigned int offset)
769xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, int offset)
770{ 777{
771 u32 tail_offset = buf->head[0].iov_len + buf->page_len; 778 struct xdr_buf subbuf;
772 u32 obj_end_offset;
773 779
774 if (xdr_decode_word(buf, offset, &obj->len)) 780 if (xdr_decode_word(buf, offset, &obj->len))
775 goto out; 781 return -EFAULT;
776 obj_end_offset = offset + 4 + obj->len; 782 if (xdr_buf_subsegment(buf, &subbuf, offset + 4, obj->len))
777 783 return -EFAULT;
778 if (obj_end_offset <= buf->head[0].iov_len) {
779 /* The obj is contained entirely in the head: */
780 obj->data = buf->head[0].iov_base + offset + 4;
781 } else if (offset + 4 >= tail_offset) {
782 if (obj_end_offset - tail_offset
783 > buf->tail[0].iov_len)
784 goto out;
785 /* The obj is contained entirely in the tail: */
786 obj->data = buf->tail[0].iov_base
787 + offset - tail_offset + 4;
788 } else {
789 /* use end of tail as storage for obj:
790 * (We don't copy to the beginning because then we'd have
791 * to worry about doing a potentially overlapping copy.
792 * This assumes the object is at most half the length of the
793 * tail.) */
794 if (obj->len > buf->tail[0].iov_len)
795 goto out;
796 obj->data = buf->tail[0].iov_base + buf->tail[0].iov_len -
797 obj->len;
798 if (read_bytes_from_xdr_buf(buf, offset + 4,
799 obj->data, obj->len))
800 goto out;
801 784
802 } 785 /* Is the obj contained entirely in the head? */
786 obj->data = subbuf.head[0].iov_base;
787 if (subbuf.head[0].iov_len == obj->len)
788 return 0;
789 /* ..or is the obj contained entirely in the tail? */
790 obj->data = subbuf.tail[0].iov_base;
791 if (subbuf.tail[0].iov_len == obj->len)
792 return 0;
793
794 /* use end of tail as storage for obj:
795 * (We don't copy to the beginning because then we'd have
796 * to worry about doing a potentially overlapping copy.
797 * This assumes the object is at most half the length of the
798 * tail.) */
799 if (obj->len > buf->buflen - buf->len)
800 return -ENOMEM;
801 if (buf->tail[0].iov_len != 0)
802 obj->data = buf->tail[0].iov_base + buf->tail[0].iov_len;
803 else
804 obj->data = buf->head[0].iov_base + buf->head[0].iov_len;
805 __read_bytes_from_xdr_buf(&subbuf, obj->data, obj->len);
803 return 0; 806 return 0;
804out:
805 return -1;
806} 807}
807 808
808/* Returns 0 on success, or else a negative error code. */ 809/* Returns 0 on success, or else a negative error code. */
@@ -1020,3 +1021,71 @@ xdr_encode_array2(struct xdr_buf *buf, unsigned int base,
1020 1021
1021 return xdr_xcode_array2(buf, base, desc, 1); 1022 return xdr_xcode_array2(buf, base, desc, 1);
1022} 1023}
1024
1025int
1026xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len,
1027 int (*actor)(struct scatterlist *, void *), void *data)
1028{
1029 int i, ret = 0;
1030 unsigned page_len, thislen, page_offset;
1031 struct scatterlist sg[1];
1032
1033 if (offset >= buf->head[0].iov_len) {
1034 offset -= buf->head[0].iov_len;
1035 } else {
1036 thislen = buf->head[0].iov_len - offset;
1037 if (thislen > len)
1038 thislen = len;
1039 sg_set_buf(sg, buf->head[0].iov_base + offset, thislen);
1040 ret = actor(sg, data);
1041 if (ret)
1042 goto out;
1043 offset = 0;
1044 len -= thislen;
1045 }
1046 if (len == 0)
1047 goto out;
1048
1049 if (offset >= buf->page_len) {
1050 offset -= buf->page_len;
1051 } else {
1052 page_len = buf->page_len - offset;
1053 if (page_len > len)
1054 page_len = len;
1055 len -= page_len;
1056 page_offset = (offset + buf->page_base) & (PAGE_CACHE_SIZE - 1);
1057 i = (offset + buf->page_base) >> PAGE_CACHE_SHIFT;
1058 thislen = PAGE_CACHE_SIZE - page_offset;
1059 do {
1060 if (thislen > page_len)
1061 thislen = page_len;
1062 sg->page = buf->pages[i];
1063 sg->offset = page_offset;
1064 sg->length = thislen;
1065 ret = actor(sg, data);
1066 if (ret)
1067 goto out;
1068 page_len -= thislen;
1069 i++;
1070 page_offset = 0;
1071 thislen = PAGE_CACHE_SIZE;
1072 } while (page_len != 0);
1073 offset = 0;
1074 }
1075 if (len == 0)
1076 goto out;
1077 if (offset < buf->tail[0].iov_len) {
1078 thislen = buf->tail[0].iov_len - offset;
1079 if (thislen > len)
1080 thislen = len;
1081 sg_set_buf(sg, buf->tail[0].iov_base + offset, thislen);
1082 ret = actor(sg, data);
1083 len -= thislen;
1084 }
1085 if (len != 0)
1086 ret = -EINVAL;
1087out:
1088 return ret;
1089}
1090EXPORT_SYMBOL(xdr_process_buf);
1091
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 4f9a5d9791fb..7a3999f0a4a2 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -459,7 +459,6 @@ int xprt_adjust_timeout(struct rpc_rqst *req)
459 if (to->to_maxval && req->rq_timeout >= to->to_maxval) 459 if (to->to_maxval && req->rq_timeout >= to->to_maxval)
460 req->rq_timeout = to->to_maxval; 460 req->rq_timeout = to->to_maxval;
461 req->rq_retries++; 461 req->rq_retries++;
462 pprintk("RPC: %lu retrans\n", jiffies);
463 } else { 462 } else {
464 req->rq_timeout = to->to_initval; 463 req->rq_timeout = to->to_initval;
465 req->rq_retries = 0; 464 req->rq_retries = 0;
@@ -468,7 +467,6 @@ int xprt_adjust_timeout(struct rpc_rqst *req)
468 spin_lock_bh(&xprt->transport_lock); 467 spin_lock_bh(&xprt->transport_lock);
469 rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval); 468 rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval);
470 spin_unlock_bh(&xprt->transport_lock); 469 spin_unlock_bh(&xprt->transport_lock);
471 pprintk("RPC: %lu timeout\n", jiffies);
472 status = -ETIMEDOUT; 470 status = -ETIMEDOUT;
473 } 471 }
474 472
@@ -892,39 +890,25 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long i
892 */ 890 */
893struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t size, struct rpc_timeout *to) 891struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t size, struct rpc_timeout *to)
894{ 892{
895 int result;
896 struct rpc_xprt *xprt; 893 struct rpc_xprt *xprt;
897 struct rpc_rqst *req; 894 struct rpc_rqst *req;
898 895
899 if ((xprt = kzalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) {
900 dprintk("RPC: xprt_create_transport: no memory\n");
901 return ERR_PTR(-ENOMEM);
902 }
903 if (size <= sizeof(xprt->addr)) {
904 memcpy(&xprt->addr, ap, size);
905 xprt->addrlen = size;
906 } else {
907 kfree(xprt);
908 dprintk("RPC: xprt_create_transport: address too large\n");
909 return ERR_PTR(-EBADF);
910 }
911
912 switch (proto) { 896 switch (proto) {
913 case IPPROTO_UDP: 897 case IPPROTO_UDP:
914 result = xs_setup_udp(xprt, to); 898 xprt = xs_setup_udp(ap, size, to);
915 break; 899 break;
916 case IPPROTO_TCP: 900 case IPPROTO_TCP:
917 result = xs_setup_tcp(xprt, to); 901 xprt = xs_setup_tcp(ap, size, to);
918 break; 902 break;
919 default: 903 default:
920 printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n", 904 printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n",
921 proto); 905 proto);
922 return ERR_PTR(-EIO); 906 return ERR_PTR(-EIO);
923 } 907 }
924 if (result) { 908 if (IS_ERR(xprt)) {
925 kfree(xprt); 909 dprintk("RPC: xprt_create_transport: failed, %ld\n",
926 dprintk("RPC: xprt_create_transport: failed, %d\n", result); 910 -PTR_ERR(xprt));
927 return ERR_PTR(result); 911 return xprt;
928 } 912 }
929 913
930 kref_init(&xprt->kref); 914 kref_init(&xprt->kref);
@@ -970,8 +954,11 @@ static void xprt_destroy(struct kref *kref)
970 dprintk("RPC: destroying transport %p\n", xprt); 954 dprintk("RPC: destroying transport %p\n", xprt);
971 xprt->shutdown = 1; 955 xprt->shutdown = 1;
972 del_timer_sync(&xprt->timer); 956 del_timer_sync(&xprt->timer);
957
958 /*
959 * Tear down transport state and free the rpc_xprt
960 */
973 xprt->ops->destroy(xprt); 961 xprt->ops->destroy(xprt);
974 kfree(xprt);
975} 962}
976 963
977/** 964/**
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index cfe3c15be948..3bb232eb5d90 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -46,6 +46,92 @@ unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT;
46unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; 46unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT;
47 47
48/* 48/*
49 * We can register our own files under /proc/sys/sunrpc by
50 * calling register_sysctl_table() again. The files in that
51 * directory become the union of all files registered there.
52 *
53 * We simply need to make sure that we don't collide with
54 * someone else's file names!
55 */
56
57#ifdef RPC_DEBUG
58
59static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE;
60static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE;
61static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT;
62static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT;
63
64static struct ctl_table_header *sunrpc_table_header;
65
66/*
67 * FIXME: changing the UDP slot table size should also resize the UDP
68 * socket buffers for existing UDP transports
69 */
70static ctl_table xs_tunables_table[] = {
71 {
72 .ctl_name = CTL_SLOTTABLE_UDP,
73 .procname = "udp_slot_table_entries",
74 .data = &xprt_udp_slot_table_entries,
75 .maxlen = sizeof(unsigned int),
76 .mode = 0644,
77 .proc_handler = &proc_dointvec_minmax,
78 .strategy = &sysctl_intvec,
79 .extra1 = &min_slot_table_size,
80 .extra2 = &max_slot_table_size
81 },
82 {
83 .ctl_name = CTL_SLOTTABLE_TCP,
84 .procname = "tcp_slot_table_entries",
85 .data = &xprt_tcp_slot_table_entries,
86 .maxlen = sizeof(unsigned int),
87 .mode = 0644,
88 .proc_handler = &proc_dointvec_minmax,
89 .strategy = &sysctl_intvec,
90 .extra1 = &min_slot_table_size,
91 .extra2 = &max_slot_table_size
92 },
93 {
94 .ctl_name = CTL_MIN_RESVPORT,
95 .procname = "min_resvport",
96 .data = &xprt_min_resvport,
97 .maxlen = sizeof(unsigned int),
98 .mode = 0644,
99 .proc_handler = &proc_dointvec_minmax,
100 .strategy = &sysctl_intvec,
101 .extra1 = &xprt_min_resvport_limit,
102 .extra2 = &xprt_max_resvport_limit
103 },
104 {
105 .ctl_name = CTL_MAX_RESVPORT,
106 .procname = "max_resvport",
107 .data = &xprt_max_resvport,
108 .maxlen = sizeof(unsigned int),
109 .mode = 0644,
110 .proc_handler = &proc_dointvec_minmax,
111 .strategy = &sysctl_intvec,
112 .extra1 = &xprt_min_resvport_limit,
113 .extra2 = &xprt_max_resvport_limit
114 },
115 {
116 .ctl_name = 0,
117 },
118};
119
120static ctl_table sunrpc_table[] = {
121 {
122 .ctl_name = CTL_SUNRPC,
123 .procname = "sunrpc",
124 .mode = 0555,
125 .child = xs_tunables_table
126 },
127 {
128 .ctl_name = 0,
129 },
130};
131
132#endif
133
134/*
49 * How many times to try sending a request on a socket before waiting 135 * How many times to try sending a request on a socket before waiting
50 * for the socket buffer to clear. 136 * for the socket buffer to clear.
51 */ 137 */
@@ -125,6 +211,55 @@ static inline void xs_pktdump(char *msg, u32 *packet, unsigned int count)
125} 211}
126#endif 212#endif
127 213
214struct sock_xprt {
215 struct rpc_xprt xprt;
216
217 /*
218 * Network layer
219 */
220 struct socket * sock;
221 struct sock * inet;
222
223 /*
224 * State of TCP reply receive
225 */
226 __be32 tcp_fraghdr,
227 tcp_xid;
228
229 u32 tcp_offset,
230 tcp_reclen;
231
232 unsigned long tcp_copied,
233 tcp_flags;
234
235 /*
236 * Connection of transports
237 */
238 struct delayed_work connect_worker;
239 unsigned short port;
240
241 /*
242 * UDP socket buffer size parameters
243 */
244 size_t rcvsize,
245 sndsize;
246
247 /*
248 * Saved socket callback addresses
249 */
250 void (*old_data_ready)(struct sock *, int);
251 void (*old_state_change)(struct sock *);
252 void (*old_write_space)(struct sock *);
253};
254
255/*
256 * TCP receive state flags
257 */
258#define TCP_RCV_LAST_FRAG (1UL << 0)
259#define TCP_RCV_COPY_FRAGHDR (1UL << 1)
260#define TCP_RCV_COPY_XID (1UL << 2)
261#define TCP_RCV_COPY_DATA (1UL << 3)
262
128static void xs_format_peer_addresses(struct rpc_xprt *xprt) 263static void xs_format_peer_addresses(struct rpc_xprt *xprt)
129{ 264{
130 struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr; 265 struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr;
@@ -168,37 +303,52 @@ static void xs_free_peer_addresses(struct rpc_xprt *xprt)
168 303
169#define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) 304#define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL)
170 305
171static inline int xs_send_head(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, unsigned int len) 306static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen, struct kvec *vec, unsigned int base, int more)
172{ 307{
173 struct kvec iov = {
174 .iov_base = xdr->head[0].iov_base + base,
175 .iov_len = len - base,
176 };
177 struct msghdr msg = { 308 struct msghdr msg = {
178 .msg_name = addr, 309 .msg_name = addr,
179 .msg_namelen = addrlen, 310 .msg_namelen = addrlen,
180 .msg_flags = XS_SENDMSG_FLAGS, 311 .msg_flags = XS_SENDMSG_FLAGS | (more ? MSG_MORE : 0),
312 };
313 struct kvec iov = {
314 .iov_base = vec->iov_base + base,
315 .iov_len = vec->iov_len - base,
181 }; 316 };
182 317
183 if (xdr->len > len) 318 if (iov.iov_len != 0)
184 msg.msg_flags |= MSG_MORE;
185
186 if (likely(iov.iov_len))
187 return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); 319 return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len);
188 return kernel_sendmsg(sock, &msg, NULL, 0, 0); 320 return kernel_sendmsg(sock, &msg, NULL, 0, 0);
189} 321}
190 322
191static int xs_send_tail(struct socket *sock, struct xdr_buf *xdr, unsigned int base, unsigned int len) 323static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more)
192{ 324{
193 struct kvec iov = { 325 struct page **ppage;
194 .iov_base = xdr->tail[0].iov_base + base, 326 unsigned int remainder;
195 .iov_len = len - base, 327 int err, sent = 0;
196 }; 328
197 struct msghdr msg = { 329 remainder = xdr->page_len - base;
198 .msg_flags = XS_SENDMSG_FLAGS, 330 base += xdr->page_base;
199 }; 331 ppage = xdr->pages + (base >> PAGE_SHIFT);
332 base &= ~PAGE_MASK;
333 for(;;) {
334 unsigned int len = min_t(unsigned int, PAGE_SIZE - base, remainder);
335 int flags = XS_SENDMSG_FLAGS;
200 336
201 return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); 337 remainder -= len;
338 if (remainder != 0 || more)
339 flags |= MSG_MORE;
340 err = sock->ops->sendpage(sock, *ppage, base, len, flags);
341 if (remainder == 0 || err != len)
342 break;
343 sent += err;
344 ppage++;
345 base = 0;
346 }
347 if (sent == 0)
348 return err;
349 if (err > 0)
350 sent += err;
351 return sent;
202} 352}
203 353
204/** 354/**
@@ -210,76 +360,51 @@ static int xs_send_tail(struct socket *sock, struct xdr_buf *xdr, unsigned int b
210 * @base: starting position in the buffer 360 * @base: starting position in the buffer
211 * 361 *
212 */ 362 */
213static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base) 363static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base)
214{ 364{
215 struct page **ppage = xdr->pages; 365 unsigned int remainder = xdr->len - base;
216 unsigned int len, pglen = xdr->page_len; 366 int err, sent = 0;
217 int err, ret = 0;
218 367
219 if (unlikely(!sock)) 368 if (unlikely(!sock))
220 return -ENOTCONN; 369 return -ENOTCONN;
221 370
222 clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags); 371 clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
372 if (base != 0) {
373 addr = NULL;
374 addrlen = 0;
375 }
223 376
224 len = xdr->head[0].iov_len; 377 if (base < xdr->head[0].iov_len || addr != NULL) {
225 if (base < len || (addr != NULL && base == 0)) { 378 unsigned int len = xdr->head[0].iov_len - base;
226 err = xs_send_head(sock, addr, addrlen, xdr, base, len); 379 remainder -= len;
227 if (ret == 0) 380 err = xs_send_kvec(sock, addr, addrlen, &xdr->head[0], base, remainder != 0);
228 ret = err; 381 if (remainder == 0 || err != len)
229 else if (err > 0)
230 ret += err;
231 if (err != (len - base))
232 goto out; 382 goto out;
383 sent += err;
233 base = 0; 384 base = 0;
234 } else 385 } else
235 base -= len; 386 base -= xdr->head[0].iov_len;
236
237 if (unlikely(pglen == 0))
238 goto copy_tail;
239 if (unlikely(base >= pglen)) {
240 base -= pglen;
241 goto copy_tail;
242 }
243 if (base || xdr->page_base) {
244 pglen -= base;
245 base += xdr->page_base;
246 ppage += base >> PAGE_CACHE_SHIFT;
247 base &= ~PAGE_CACHE_MASK;
248 }
249
250 do {
251 int flags = XS_SENDMSG_FLAGS;
252
253 len = PAGE_CACHE_SIZE;
254 if (base)
255 len -= base;
256 if (pglen < len)
257 len = pglen;
258
259 if (pglen != len || xdr->tail[0].iov_len != 0)
260 flags |= MSG_MORE;
261 387
262 err = kernel_sendpage(sock, *ppage, base, len, flags); 388 if (base < xdr->page_len) {
263 if (ret == 0) 389 unsigned int len = xdr->page_len - base;
264 ret = err; 390 remainder -= len;
265 else if (err > 0) 391 err = xs_send_pagedata(sock, xdr, base, remainder != 0);
266 ret += err; 392 if (remainder == 0 || err != len)
267 if (err != len)
268 goto out; 393 goto out;
394 sent += err;
269 base = 0; 395 base = 0;
270 ppage++; 396 } else
271 } while ((pglen -= len) != 0); 397 base -= xdr->page_len;
272copy_tail: 398
273 len = xdr->tail[0].iov_len; 399 if (base >= xdr->tail[0].iov_len)
274 if (base < len) { 400 return sent;
275 err = xs_send_tail(sock, xdr, base, len); 401 err = xs_send_kvec(sock, NULL, 0, &xdr->tail[0], base, 0);
276 if (ret == 0)
277 ret = err;
278 else if (err > 0)
279 ret += err;
280 }
281out: 402out:
282 return ret; 403 if (sent == 0)
404 return err;
405 if (err > 0)
406 sent += err;
407 return sent;
283} 408}
284 409
285/** 410/**
@@ -291,19 +416,20 @@ static void xs_nospace(struct rpc_task *task)
291{ 416{
292 struct rpc_rqst *req = task->tk_rqstp; 417 struct rpc_rqst *req = task->tk_rqstp;
293 struct rpc_xprt *xprt = req->rq_xprt; 418 struct rpc_xprt *xprt = req->rq_xprt;
419 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
294 420
295 dprintk("RPC: %4d xmit incomplete (%u left of %u)\n", 421 dprintk("RPC: %4d xmit incomplete (%u left of %u)\n",
296 task->tk_pid, req->rq_slen - req->rq_bytes_sent, 422 task->tk_pid, req->rq_slen - req->rq_bytes_sent,
297 req->rq_slen); 423 req->rq_slen);
298 424
299 if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) { 425 if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) {
300 /* Protect against races with write_space */ 426 /* Protect against races with write_space */
301 spin_lock_bh(&xprt->transport_lock); 427 spin_lock_bh(&xprt->transport_lock);
302 428
303 /* Don't race with disconnect */ 429 /* Don't race with disconnect */
304 if (!xprt_connected(xprt)) 430 if (!xprt_connected(xprt))
305 task->tk_status = -ENOTCONN; 431 task->tk_status = -ENOTCONN;
306 else if (test_bit(SOCK_NOSPACE, &xprt->sock->flags)) 432 else if (test_bit(SOCK_NOSPACE, &transport->sock->flags))
307 xprt_wait_for_buffer_space(task); 433 xprt_wait_for_buffer_space(task);
308 434
309 spin_unlock_bh(&xprt->transport_lock); 435 spin_unlock_bh(&xprt->transport_lock);
@@ -327,6 +453,7 @@ static int xs_udp_send_request(struct rpc_task *task)
327{ 453{
328 struct rpc_rqst *req = task->tk_rqstp; 454 struct rpc_rqst *req = task->tk_rqstp;
329 struct rpc_xprt *xprt = req->rq_xprt; 455 struct rpc_xprt *xprt = req->rq_xprt;
456 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
330 struct xdr_buf *xdr = &req->rq_snd_buf; 457 struct xdr_buf *xdr = &req->rq_snd_buf;
331 int status; 458 int status;
332 459
@@ -335,8 +462,10 @@ static int xs_udp_send_request(struct rpc_task *task)
335 req->rq_svec->iov_len); 462 req->rq_svec->iov_len);
336 463
337 req->rq_xtime = jiffies; 464 req->rq_xtime = jiffies;
338 status = xs_sendpages(xprt->sock, (struct sockaddr *) &xprt->addr, 465 status = xs_sendpages(transport->sock,
339 xprt->addrlen, xdr, req->rq_bytes_sent); 466 (struct sockaddr *) &xprt->addr,
467 xprt->addrlen, xdr,
468 req->rq_bytes_sent);
340 469
341 dprintk("RPC: xs_udp_send_request(%u) = %d\n", 470 dprintk("RPC: xs_udp_send_request(%u) = %d\n",
342 xdr->len - req->rq_bytes_sent, status); 471 xdr->len - req->rq_bytes_sent, status);
@@ -392,6 +521,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
392{ 521{
393 struct rpc_rqst *req = task->tk_rqstp; 522 struct rpc_rqst *req = task->tk_rqstp;
394 struct rpc_xprt *xprt = req->rq_xprt; 523 struct rpc_xprt *xprt = req->rq_xprt;
524 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
395 struct xdr_buf *xdr = &req->rq_snd_buf; 525 struct xdr_buf *xdr = &req->rq_snd_buf;
396 int status, retry = 0; 526 int status, retry = 0;
397 527
@@ -406,8 +536,8 @@ static int xs_tcp_send_request(struct rpc_task *task)
406 * called sendmsg(). */ 536 * called sendmsg(). */
407 while (1) { 537 while (1) {
408 req->rq_xtime = jiffies; 538 req->rq_xtime = jiffies;
409 status = xs_sendpages(xprt->sock, NULL, 0, xdr, 539 status = xs_sendpages(transport->sock,
410 req->rq_bytes_sent); 540 NULL, 0, xdr, req->rq_bytes_sent);
411 541
412 dprintk("RPC: xs_tcp_send_request(%u) = %d\n", 542 dprintk("RPC: xs_tcp_send_request(%u) = %d\n",
413 xdr->len - req->rq_bytes_sent, status); 543 xdr->len - req->rq_bytes_sent, status);
@@ -485,8 +615,9 @@ out_release:
485 */ 615 */
486static void xs_close(struct rpc_xprt *xprt) 616static void xs_close(struct rpc_xprt *xprt)
487{ 617{
488 struct socket *sock = xprt->sock; 618 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
489 struct sock *sk = xprt->inet; 619 struct socket *sock = transport->sock;
620 struct sock *sk = transport->inet;
490 621
491 if (!sk) 622 if (!sk)
492 goto clear_close_wait; 623 goto clear_close_wait;
@@ -494,13 +625,13 @@ static void xs_close(struct rpc_xprt *xprt)
494 dprintk("RPC: xs_close xprt %p\n", xprt); 625 dprintk("RPC: xs_close xprt %p\n", xprt);
495 626
496 write_lock_bh(&sk->sk_callback_lock); 627 write_lock_bh(&sk->sk_callback_lock);
497 xprt->inet = NULL; 628 transport->inet = NULL;
498 xprt->sock = NULL; 629 transport->sock = NULL;
499 630
500 sk->sk_user_data = NULL; 631 sk->sk_user_data = NULL;
501 sk->sk_data_ready = xprt->old_data_ready; 632 sk->sk_data_ready = transport->old_data_ready;
502 sk->sk_state_change = xprt->old_state_change; 633 sk->sk_state_change = transport->old_state_change;
503 sk->sk_write_space = xprt->old_write_space; 634 sk->sk_write_space = transport->old_write_space;
504 write_unlock_bh(&sk->sk_callback_lock); 635 write_unlock_bh(&sk->sk_callback_lock);
505 636
506 sk->sk_no_check = 0; 637 sk->sk_no_check = 0;
@@ -519,15 +650,18 @@ clear_close_wait:
519 */ 650 */
520static void xs_destroy(struct rpc_xprt *xprt) 651static void xs_destroy(struct rpc_xprt *xprt)
521{ 652{
653 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
654
522 dprintk("RPC: xs_destroy xprt %p\n", xprt); 655 dprintk("RPC: xs_destroy xprt %p\n", xprt);
523 656
524 cancel_delayed_work(&xprt->connect_worker); 657 cancel_delayed_work(&transport->connect_worker);
525 flush_scheduled_work(); 658 flush_scheduled_work();
526 659
527 xprt_disconnect(xprt); 660 xprt_disconnect(xprt);
528 xs_close(xprt); 661 xs_close(xprt);
529 xs_free_peer_addresses(xprt); 662 xs_free_peer_addresses(xprt);
530 kfree(xprt->slot); 663 kfree(xprt->slot);
664 kfree(xprt);
531} 665}
532 666
533static inline struct rpc_xprt *xprt_from_sock(struct sock *sk) 667static inline struct rpc_xprt *xprt_from_sock(struct sock *sk)
@@ -603,91 +737,75 @@ static void xs_udp_data_ready(struct sock *sk, int len)
603 read_unlock(&sk->sk_callback_lock); 737 read_unlock(&sk->sk_callback_lock);
604} 738}
605 739
606static inline size_t xs_tcp_copy_data(skb_reader_t *desc, void *p, size_t len) 740static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_reader *desc)
607{
608 if (len > desc->count)
609 len = desc->count;
610 if (skb_copy_bits(desc->skb, desc->offset, p, len)) {
611 dprintk("RPC: failed to copy %zu bytes from skb. %zu bytes remain\n",
612 len, desc->count);
613 return 0;
614 }
615 desc->offset += len;
616 desc->count -= len;
617 dprintk("RPC: copied %zu bytes from skb. %zu bytes remain\n",
618 len, desc->count);
619 return len;
620}
621
622static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc)
623{ 741{
742 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
624 size_t len, used; 743 size_t len, used;
625 char *p; 744 char *p;
626 745
627 p = ((char *) &xprt->tcp_recm) + xprt->tcp_offset; 746 p = ((char *) &transport->tcp_fraghdr) + transport->tcp_offset;
628 len = sizeof(xprt->tcp_recm) - xprt->tcp_offset; 747 len = sizeof(transport->tcp_fraghdr) - transport->tcp_offset;
629 used = xs_tcp_copy_data(desc, p, len); 748 used = xdr_skb_read_bits(desc, p, len);
630 xprt->tcp_offset += used; 749 transport->tcp_offset += used;
631 if (used != len) 750 if (used != len)
632 return; 751 return;
633 752
634 xprt->tcp_reclen = ntohl(xprt->tcp_recm); 753 transport->tcp_reclen = ntohl(transport->tcp_fraghdr);
635 if (xprt->tcp_reclen & RPC_LAST_STREAM_FRAGMENT) 754 if (transport->tcp_reclen & RPC_LAST_STREAM_FRAGMENT)
636 xprt->tcp_flags |= XPRT_LAST_FRAG; 755 transport->tcp_flags |= TCP_RCV_LAST_FRAG;
637 else 756 else
638 xprt->tcp_flags &= ~XPRT_LAST_FRAG; 757 transport->tcp_flags &= ~TCP_RCV_LAST_FRAG;
639 xprt->tcp_reclen &= RPC_FRAGMENT_SIZE_MASK; 758 transport->tcp_reclen &= RPC_FRAGMENT_SIZE_MASK;
640 759
641 xprt->tcp_flags &= ~XPRT_COPY_RECM; 760 transport->tcp_flags &= ~TCP_RCV_COPY_FRAGHDR;
642 xprt->tcp_offset = 0; 761 transport->tcp_offset = 0;
643 762
644 /* Sanity check of the record length */ 763 /* Sanity check of the record length */
645 if (unlikely(xprt->tcp_reclen < 4)) { 764 if (unlikely(transport->tcp_reclen < 4)) {
646 dprintk("RPC: invalid TCP record fragment length\n"); 765 dprintk("RPC: invalid TCP record fragment length\n");
647 xprt_disconnect(xprt); 766 xprt_disconnect(xprt);
648 return; 767 return;
649 } 768 }
650 dprintk("RPC: reading TCP record fragment of length %d\n", 769 dprintk("RPC: reading TCP record fragment of length %d\n",
651 xprt->tcp_reclen); 770 transport->tcp_reclen);
652} 771}
653 772
654static void xs_tcp_check_recm(struct rpc_xprt *xprt) 773static void xs_tcp_check_fraghdr(struct sock_xprt *transport)
655{ 774{
656 dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u, tcp_flags = %lx\n", 775 if (transport->tcp_offset == transport->tcp_reclen) {
657 xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen, xprt->tcp_flags); 776 transport->tcp_flags |= TCP_RCV_COPY_FRAGHDR;
658 if (xprt->tcp_offset == xprt->tcp_reclen) { 777 transport->tcp_offset = 0;
659 xprt->tcp_flags |= XPRT_COPY_RECM; 778 if (transport->tcp_flags & TCP_RCV_LAST_FRAG) {
660 xprt->tcp_offset = 0; 779 transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
661 if (xprt->tcp_flags & XPRT_LAST_FRAG) { 780 transport->tcp_flags |= TCP_RCV_COPY_XID;
662 xprt->tcp_flags &= ~XPRT_COPY_DATA; 781 transport->tcp_copied = 0;
663 xprt->tcp_flags |= XPRT_COPY_XID;
664 xprt->tcp_copied = 0;
665 } 782 }
666 } 783 }
667} 784}
668 785
669static inline void xs_tcp_read_xid(struct rpc_xprt *xprt, skb_reader_t *desc) 786static inline void xs_tcp_read_xid(struct sock_xprt *transport, struct xdr_skb_reader *desc)
670{ 787{
671 size_t len, used; 788 size_t len, used;
672 char *p; 789 char *p;
673 790
674 len = sizeof(xprt->tcp_xid) - xprt->tcp_offset; 791 len = sizeof(transport->tcp_xid) - transport->tcp_offset;
675 dprintk("RPC: reading XID (%Zu bytes)\n", len); 792 dprintk("RPC: reading XID (%Zu bytes)\n", len);
676 p = ((char *) &xprt->tcp_xid) + xprt->tcp_offset; 793 p = ((char *) &transport->tcp_xid) + transport->tcp_offset;
677 used = xs_tcp_copy_data(desc, p, len); 794 used = xdr_skb_read_bits(desc, p, len);
678 xprt->tcp_offset += used; 795 transport->tcp_offset += used;
679 if (used != len) 796 if (used != len)
680 return; 797 return;
681 xprt->tcp_flags &= ~XPRT_COPY_XID; 798 transport->tcp_flags &= ~TCP_RCV_COPY_XID;
682 xprt->tcp_flags |= XPRT_COPY_DATA; 799 transport->tcp_flags |= TCP_RCV_COPY_DATA;
683 xprt->tcp_copied = 4; 800 transport->tcp_copied = 4;
684 dprintk("RPC: reading reply for XID %08x\n", 801 dprintk("RPC: reading reply for XID %08x\n",
685 ntohl(xprt->tcp_xid)); 802 ntohl(transport->tcp_xid));
686 xs_tcp_check_recm(xprt); 803 xs_tcp_check_fraghdr(transport);
687} 804}
688 805
689static inline void xs_tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc) 806static inline void xs_tcp_read_request(struct rpc_xprt *xprt, struct xdr_skb_reader *desc)
690{ 807{
808 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
691 struct rpc_rqst *req; 809 struct rpc_rqst *req;
692 struct xdr_buf *rcvbuf; 810 struct xdr_buf *rcvbuf;
693 size_t len; 811 size_t len;
@@ -695,116 +813,118 @@ static inline void xs_tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc
695 813
696 /* Find and lock the request corresponding to this xid */ 814 /* Find and lock the request corresponding to this xid */
697 spin_lock(&xprt->transport_lock); 815 spin_lock(&xprt->transport_lock);
698 req = xprt_lookup_rqst(xprt, xprt->tcp_xid); 816 req = xprt_lookup_rqst(xprt, transport->tcp_xid);
699 if (!req) { 817 if (!req) {
700 xprt->tcp_flags &= ~XPRT_COPY_DATA; 818 transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
701 dprintk("RPC: XID %08x request not found!\n", 819 dprintk("RPC: XID %08x request not found!\n",
702 ntohl(xprt->tcp_xid)); 820 ntohl(transport->tcp_xid));
703 spin_unlock(&xprt->transport_lock); 821 spin_unlock(&xprt->transport_lock);
704 return; 822 return;
705 } 823 }
706 824
707 rcvbuf = &req->rq_private_buf; 825 rcvbuf = &req->rq_private_buf;
708 len = desc->count; 826 len = desc->count;
709 if (len > xprt->tcp_reclen - xprt->tcp_offset) { 827 if (len > transport->tcp_reclen - transport->tcp_offset) {
710 skb_reader_t my_desc; 828 struct xdr_skb_reader my_desc;
711 829
712 len = xprt->tcp_reclen - xprt->tcp_offset; 830 len = transport->tcp_reclen - transport->tcp_offset;
713 memcpy(&my_desc, desc, sizeof(my_desc)); 831 memcpy(&my_desc, desc, sizeof(my_desc));
714 my_desc.count = len; 832 my_desc.count = len;
715 r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, 833 r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied,
716 &my_desc, xs_tcp_copy_data); 834 &my_desc, xdr_skb_read_bits);
717 desc->count -= r; 835 desc->count -= r;
718 desc->offset += r; 836 desc->offset += r;
719 } else 837 } else
720 r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, 838 r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied,
721 desc, xs_tcp_copy_data); 839 desc, xdr_skb_read_bits);
722 840
723 if (r > 0) { 841 if (r > 0) {
724 xprt->tcp_copied += r; 842 transport->tcp_copied += r;
725 xprt->tcp_offset += r; 843 transport->tcp_offset += r;
726 } 844 }
727 if (r != len) { 845 if (r != len) {
728 /* Error when copying to the receive buffer, 846 /* Error when copying to the receive buffer,
729 * usually because we weren't able to allocate 847 * usually because we weren't able to allocate
730 * additional buffer pages. All we can do now 848 * additional buffer pages. All we can do now
731 * is turn off XPRT_COPY_DATA, so the request 849 * is turn off TCP_RCV_COPY_DATA, so the request
732 * will not receive any additional updates, 850 * will not receive any additional updates,
733 * and time out. 851 * and time out.
734 * Any remaining data from this record will 852 * Any remaining data from this record will
735 * be discarded. 853 * be discarded.
736 */ 854 */
737 xprt->tcp_flags &= ~XPRT_COPY_DATA; 855 transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
738 dprintk("RPC: XID %08x truncated request\n", 856 dprintk("RPC: XID %08x truncated request\n",
739 ntohl(xprt->tcp_xid)); 857 ntohl(transport->tcp_xid));
740 dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n", 858 dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n",
741 xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen); 859 xprt, transport->tcp_copied, transport->tcp_offset,
860 transport->tcp_reclen);
742 goto out; 861 goto out;
743 } 862 }
744 863
745 dprintk("RPC: XID %08x read %Zd bytes\n", 864 dprintk("RPC: XID %08x read %Zd bytes\n",
746 ntohl(xprt->tcp_xid), r); 865 ntohl(transport->tcp_xid), r);
747 dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n", 866 dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n",
748 xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen); 867 xprt, transport->tcp_copied, transport->tcp_offset,
749 868 transport->tcp_reclen);
750 if (xprt->tcp_copied == req->rq_private_buf.buflen) 869
751 xprt->tcp_flags &= ~XPRT_COPY_DATA; 870 if (transport->tcp_copied == req->rq_private_buf.buflen)
752 else if (xprt->tcp_offset == xprt->tcp_reclen) { 871 transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
753 if (xprt->tcp_flags & XPRT_LAST_FRAG) 872 else if (transport->tcp_offset == transport->tcp_reclen) {
754 xprt->tcp_flags &= ~XPRT_COPY_DATA; 873 if (transport->tcp_flags & TCP_RCV_LAST_FRAG)
874 transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
755 } 875 }
756 876
757out: 877out:
758 if (!(xprt->tcp_flags & XPRT_COPY_DATA)) 878 if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
759 xprt_complete_rqst(req->rq_task, xprt->tcp_copied); 879 xprt_complete_rqst(req->rq_task, transport->tcp_copied);
760 spin_unlock(&xprt->transport_lock); 880 spin_unlock(&xprt->transport_lock);
761 xs_tcp_check_recm(xprt); 881 xs_tcp_check_fraghdr(transport);
762} 882}
763 883
764static inline void xs_tcp_read_discard(struct rpc_xprt *xprt, skb_reader_t *desc) 884static inline void xs_tcp_read_discard(struct sock_xprt *transport, struct xdr_skb_reader *desc)
765{ 885{
766 size_t len; 886 size_t len;
767 887
768 len = xprt->tcp_reclen - xprt->tcp_offset; 888 len = transport->tcp_reclen - transport->tcp_offset;
769 if (len > desc->count) 889 if (len > desc->count)
770 len = desc->count; 890 len = desc->count;
771 desc->count -= len; 891 desc->count -= len;
772 desc->offset += len; 892 desc->offset += len;
773 xprt->tcp_offset += len; 893 transport->tcp_offset += len;
774 dprintk("RPC: discarded %Zu bytes\n", len); 894 dprintk("RPC: discarded %Zu bytes\n", len);
775 xs_tcp_check_recm(xprt); 895 xs_tcp_check_fraghdr(transport);
776} 896}
777 897
778static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, unsigned int offset, size_t len) 898static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, unsigned int offset, size_t len)
779{ 899{
780 struct rpc_xprt *xprt = rd_desc->arg.data; 900 struct rpc_xprt *xprt = rd_desc->arg.data;
781 skb_reader_t desc = { 901 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
902 struct xdr_skb_reader desc = {
782 .skb = skb, 903 .skb = skb,
783 .offset = offset, 904 .offset = offset,
784 .count = len, 905 .count = len,
785 .csum = 0
786 }; 906 };
787 907
788 dprintk("RPC: xs_tcp_data_recv started\n"); 908 dprintk("RPC: xs_tcp_data_recv started\n");
789 do { 909 do {
790 /* Read in a new fragment marker if necessary */ 910 /* Read in a new fragment marker if necessary */
791 /* Can we ever really expect to get completely empty fragments? */ 911 /* Can we ever really expect to get completely empty fragments? */
792 if (xprt->tcp_flags & XPRT_COPY_RECM) { 912 if (transport->tcp_flags & TCP_RCV_COPY_FRAGHDR) {
793 xs_tcp_read_fraghdr(xprt, &desc); 913 xs_tcp_read_fraghdr(xprt, &desc);
794 continue; 914 continue;
795 } 915 }
796 /* Read in the xid if necessary */ 916 /* Read in the xid if necessary */
797 if (xprt->tcp_flags & XPRT_COPY_XID) { 917 if (transport->tcp_flags & TCP_RCV_COPY_XID) {
798 xs_tcp_read_xid(xprt, &desc); 918 xs_tcp_read_xid(transport, &desc);
799 continue; 919 continue;
800 } 920 }
801 /* Read in the request data */ 921 /* Read in the request data */
802 if (xprt->tcp_flags & XPRT_COPY_DATA) { 922 if (transport->tcp_flags & TCP_RCV_COPY_DATA) {
803 xs_tcp_read_request(xprt, &desc); 923 xs_tcp_read_request(xprt, &desc);
804 continue; 924 continue;
805 } 925 }
806 /* Skip over any trailing bytes on short reads */ 926 /* Skip over any trailing bytes on short reads */
807 xs_tcp_read_discard(xprt, &desc); 927 xs_tcp_read_discard(transport, &desc);
808 } while (desc.count); 928 } while (desc.count);
809 dprintk("RPC: xs_tcp_data_recv done\n"); 929 dprintk("RPC: xs_tcp_data_recv done\n");
810 return len - desc.count; 930 return len - desc.count;
@@ -858,11 +978,16 @@ static void xs_tcp_state_change(struct sock *sk)
858 case TCP_ESTABLISHED: 978 case TCP_ESTABLISHED:
859 spin_lock_bh(&xprt->transport_lock); 979 spin_lock_bh(&xprt->transport_lock);
860 if (!xprt_test_and_set_connected(xprt)) { 980 if (!xprt_test_and_set_connected(xprt)) {
981 struct sock_xprt *transport = container_of(xprt,
982 struct sock_xprt, xprt);
983
861 /* Reset TCP record info */ 984 /* Reset TCP record info */
862 xprt->tcp_offset = 0; 985 transport->tcp_offset = 0;
863 xprt->tcp_reclen = 0; 986 transport->tcp_reclen = 0;
864 xprt->tcp_copied = 0; 987 transport->tcp_copied = 0;
865 xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID; 988 transport->tcp_flags =
989 TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID;
990
866 xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; 991 xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
867 xprt_wake_pending_tasks(xprt, 0); 992 xprt_wake_pending_tasks(xprt, 0);
868 } 993 }
@@ -951,15 +1076,16 @@ static void xs_tcp_write_space(struct sock *sk)
951 1076
952static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt) 1077static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt)
953{ 1078{
954 struct sock *sk = xprt->inet; 1079 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
1080 struct sock *sk = transport->inet;
955 1081
956 if (xprt->rcvsize) { 1082 if (transport->rcvsize) {
957 sk->sk_userlocks |= SOCK_RCVBUF_LOCK; 1083 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
958 sk->sk_rcvbuf = xprt->rcvsize * xprt->max_reqs * 2; 1084 sk->sk_rcvbuf = transport->rcvsize * xprt->max_reqs * 2;
959 } 1085 }
960 if (xprt->sndsize) { 1086 if (transport->sndsize) {
961 sk->sk_userlocks |= SOCK_SNDBUF_LOCK; 1087 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
962 sk->sk_sndbuf = xprt->sndsize * xprt->max_reqs * 2; 1088 sk->sk_sndbuf = transport->sndsize * xprt->max_reqs * 2;
963 sk->sk_write_space(sk); 1089 sk->sk_write_space(sk);
964 } 1090 }
965} 1091}
@@ -974,12 +1100,14 @@ static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt)
974 */ 1100 */
975static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize) 1101static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize)
976{ 1102{
977 xprt->sndsize = 0; 1103 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
1104
1105 transport->sndsize = 0;
978 if (sndsize) 1106 if (sndsize)
979 xprt->sndsize = sndsize + 1024; 1107 transport->sndsize = sndsize + 1024;
980 xprt->rcvsize = 0; 1108 transport->rcvsize = 0;
981 if (rcvsize) 1109 if (rcvsize)
982 xprt->rcvsize = rcvsize + 1024; 1110 transport->rcvsize = rcvsize + 1024;
983 1111
984 xs_udp_do_set_buffer_size(xprt); 1112 xs_udp_do_set_buffer_size(xprt);
985} 1113}
@@ -1003,19 +1131,6 @@ static unsigned short xs_get_random_port(void)
1003} 1131}
1004 1132
1005/** 1133/**
1006 * xs_print_peer_address - format an IPv4 address for printing
1007 * @xprt: generic transport
1008 * @format: flags field indicating which parts of the address to render
1009 */
1010static char *xs_print_peer_address(struct rpc_xprt *xprt, enum rpc_display_format_t format)
1011{
1012 if (xprt->address_strings[format] != NULL)
1013 return xprt->address_strings[format];
1014 else
1015 return "unprintable";
1016}
1017
1018/**
1019 * xs_set_port - reset the port number in the remote endpoint address 1134 * xs_set_port - reset the port number in the remote endpoint address
1020 * @xprt: generic transport 1135 * @xprt: generic transport
1021 * @port: new port number 1136 * @port: new port number
@@ -1030,20 +1145,20 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
1030 sap->sin_port = htons(port); 1145 sap->sin_port = htons(port);
1031} 1146}
1032 1147
1033static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock) 1148static int xs_bindresvport(struct sock_xprt *transport, struct socket *sock)
1034{ 1149{
1035 struct sockaddr_in myaddr = { 1150 struct sockaddr_in myaddr = {
1036 .sin_family = AF_INET, 1151 .sin_family = AF_INET,
1037 }; 1152 };
1038 int err; 1153 int err;
1039 unsigned short port = xprt->port; 1154 unsigned short port = transport->port;
1040 1155
1041 do { 1156 do {
1042 myaddr.sin_port = htons(port); 1157 myaddr.sin_port = htons(port);
1043 err = kernel_bind(sock, (struct sockaddr *) &myaddr, 1158 err = kernel_bind(sock, (struct sockaddr *) &myaddr,
1044 sizeof(myaddr)); 1159 sizeof(myaddr));
1045 if (err == 0) { 1160 if (err == 0) {
1046 xprt->port = port; 1161 transport->port = port;
1047 dprintk("RPC: xs_bindresvport bound to port %u\n", 1162 dprintk("RPC: xs_bindresvport bound to port %u\n",
1048 port); 1163 port);
1049 return 0; 1164 return 0;
@@ -1052,7 +1167,7 @@ static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock)
1052 port = xprt_max_resvport; 1167 port = xprt_max_resvport;
1053 else 1168 else
1054 port--; 1169 port--;
1055 } while (err == -EADDRINUSE && port != xprt->port); 1170 } while (err == -EADDRINUSE && port != transport->port);
1056 1171
1057 dprintk("RPC: can't bind to reserved port (%d).\n", -err); 1172 dprintk("RPC: can't bind to reserved port (%d).\n", -err);
1058 return err; 1173 return err;
@@ -1066,9 +1181,10 @@ static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock)
1066 */ 1181 */
1067static void xs_udp_connect_worker(struct work_struct *work) 1182static void xs_udp_connect_worker(struct work_struct *work)
1068{ 1183{
1069 struct rpc_xprt *xprt = 1184 struct sock_xprt *transport =
1070 container_of(work, struct rpc_xprt, connect_worker.work); 1185 container_of(work, struct sock_xprt, connect_worker.work);
1071 struct socket *sock = xprt->sock; 1186 struct rpc_xprt *xprt = &transport->xprt;
1187 struct socket *sock = transport->sock;
1072 int err, status = -EIO; 1188 int err, status = -EIO;
1073 1189
1074 if (xprt->shutdown || !xprt_bound(xprt)) 1190 if (xprt->shutdown || !xprt_bound(xprt))
@@ -1082,23 +1198,23 @@ static void xs_udp_connect_worker(struct work_struct *work)
1082 goto out; 1198 goto out;
1083 } 1199 }
1084 1200
1085 if (xprt->resvport && xs_bindresvport(xprt, sock) < 0) { 1201 if (xprt->resvport && xs_bindresvport(transport, sock) < 0) {
1086 sock_release(sock); 1202 sock_release(sock);
1087 goto out; 1203 goto out;
1088 } 1204 }
1089 1205
1090 dprintk("RPC: worker connecting xprt %p to address: %s\n", 1206 dprintk("RPC: worker connecting xprt %p to address: %s\n",
1091 xprt, xs_print_peer_address(xprt, RPC_DISPLAY_ALL)); 1207 xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
1092 1208
1093 if (!xprt->inet) { 1209 if (!transport->inet) {
1094 struct sock *sk = sock->sk; 1210 struct sock *sk = sock->sk;
1095 1211
1096 write_lock_bh(&sk->sk_callback_lock); 1212 write_lock_bh(&sk->sk_callback_lock);
1097 1213
1098 sk->sk_user_data = xprt; 1214 sk->sk_user_data = xprt;
1099 xprt->old_data_ready = sk->sk_data_ready; 1215 transport->old_data_ready = sk->sk_data_ready;
1100 xprt->old_state_change = sk->sk_state_change; 1216 transport->old_state_change = sk->sk_state_change;
1101 xprt->old_write_space = sk->sk_write_space; 1217 transport->old_write_space = sk->sk_write_space;
1102 sk->sk_data_ready = xs_udp_data_ready; 1218 sk->sk_data_ready = xs_udp_data_ready;
1103 sk->sk_write_space = xs_udp_write_space; 1219 sk->sk_write_space = xs_udp_write_space;
1104 sk->sk_no_check = UDP_CSUM_NORCV; 1220 sk->sk_no_check = UDP_CSUM_NORCV;
@@ -1107,8 +1223,8 @@ static void xs_udp_connect_worker(struct work_struct *work)
1107 xprt_set_connected(xprt); 1223 xprt_set_connected(xprt);
1108 1224
1109 /* Reset to new socket */ 1225 /* Reset to new socket */
1110 xprt->sock = sock; 1226 transport->sock = sock;
1111 xprt->inet = sk; 1227 transport->inet = sk;
1112 1228
1113 write_unlock_bh(&sk->sk_callback_lock); 1229 write_unlock_bh(&sk->sk_callback_lock);
1114 } 1230 }
@@ -1126,7 +1242,7 @@ out:
1126static void xs_tcp_reuse_connection(struct rpc_xprt *xprt) 1242static void xs_tcp_reuse_connection(struct rpc_xprt *xprt)
1127{ 1243{
1128 int result; 1244 int result;
1129 struct socket *sock = xprt->sock; 1245 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
1130 struct sockaddr any; 1246 struct sockaddr any;
1131 1247
1132 dprintk("RPC: disconnecting xprt %p to reuse port\n", xprt); 1248 dprintk("RPC: disconnecting xprt %p to reuse port\n", xprt);
@@ -1137,7 +1253,7 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt)
1137 */ 1253 */
1138 memset(&any, 0, sizeof(any)); 1254 memset(&any, 0, sizeof(any));
1139 any.sa_family = AF_UNSPEC; 1255 any.sa_family = AF_UNSPEC;
1140 result = kernel_connect(sock, &any, sizeof(any), 0); 1256 result = kernel_connect(transport->sock, &any, sizeof(any), 0);
1141 if (result) 1257 if (result)
1142 dprintk("RPC: AF_UNSPEC connect return code %d\n", 1258 dprintk("RPC: AF_UNSPEC connect return code %d\n",
1143 result); 1259 result);
@@ -1151,22 +1267,23 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt)
1151 */ 1267 */
1152static void xs_tcp_connect_worker(struct work_struct *work) 1268static void xs_tcp_connect_worker(struct work_struct *work)
1153{ 1269{
1154 struct rpc_xprt *xprt = 1270 struct sock_xprt *transport =
1155 container_of(work, struct rpc_xprt, connect_worker.work); 1271 container_of(work, struct sock_xprt, connect_worker.work);
1156 struct socket *sock = xprt->sock; 1272 struct rpc_xprt *xprt = &transport->xprt;
1273 struct socket *sock = transport->sock;
1157 int err, status = -EIO; 1274 int err, status = -EIO;
1158 1275
1159 if (xprt->shutdown || !xprt_bound(xprt)) 1276 if (xprt->shutdown || !xprt_bound(xprt))
1160 goto out; 1277 goto out;
1161 1278
1162 if (!xprt->sock) { 1279 if (!sock) {
1163 /* start from scratch */ 1280 /* start from scratch */
1164 if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { 1281 if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
1165 dprintk("RPC: can't create TCP transport socket (%d).\n", -err); 1282 dprintk("RPC: can't create TCP transport socket (%d).\n", -err);
1166 goto out; 1283 goto out;
1167 } 1284 }
1168 1285
1169 if (xprt->resvport && xs_bindresvport(xprt, sock) < 0) { 1286 if (xprt->resvport && xs_bindresvport(transport, sock) < 0) {
1170 sock_release(sock); 1287 sock_release(sock);
1171 goto out; 1288 goto out;
1172 } 1289 }
@@ -1175,17 +1292,17 @@ static void xs_tcp_connect_worker(struct work_struct *work)
1175 xs_tcp_reuse_connection(xprt); 1292 xs_tcp_reuse_connection(xprt);
1176 1293
1177 dprintk("RPC: worker connecting xprt %p to address: %s\n", 1294 dprintk("RPC: worker connecting xprt %p to address: %s\n",
1178 xprt, xs_print_peer_address(xprt, RPC_DISPLAY_ALL)); 1295 xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
1179 1296
1180 if (!xprt->inet) { 1297 if (!transport->inet) {
1181 struct sock *sk = sock->sk; 1298 struct sock *sk = sock->sk;
1182 1299
1183 write_lock_bh(&sk->sk_callback_lock); 1300 write_lock_bh(&sk->sk_callback_lock);
1184 1301
1185 sk->sk_user_data = xprt; 1302 sk->sk_user_data = xprt;
1186 xprt->old_data_ready = sk->sk_data_ready; 1303 transport->old_data_ready = sk->sk_data_ready;
1187 xprt->old_state_change = sk->sk_state_change; 1304 transport->old_state_change = sk->sk_state_change;
1188 xprt->old_write_space = sk->sk_write_space; 1305 transport->old_write_space = sk->sk_write_space;
1189 sk->sk_data_ready = xs_tcp_data_ready; 1306 sk->sk_data_ready = xs_tcp_data_ready;
1190 sk->sk_state_change = xs_tcp_state_change; 1307 sk->sk_state_change = xs_tcp_state_change;
1191 sk->sk_write_space = xs_tcp_write_space; 1308 sk->sk_write_space = xs_tcp_write_space;
@@ -1200,8 +1317,8 @@ static void xs_tcp_connect_worker(struct work_struct *work)
1200 xprt_clear_connected(xprt); 1317 xprt_clear_connected(xprt);
1201 1318
1202 /* Reset to new socket */ 1319 /* Reset to new socket */
1203 xprt->sock = sock; 1320 transport->sock = sock;
1204 xprt->inet = sk; 1321 transport->inet = sk;
1205 1322
1206 write_unlock_bh(&sk->sk_callback_lock); 1323 write_unlock_bh(&sk->sk_callback_lock);
1207 } 1324 }
@@ -1250,21 +1367,22 @@ out_clear:
1250static void xs_connect(struct rpc_task *task) 1367static void xs_connect(struct rpc_task *task)
1251{ 1368{
1252 struct rpc_xprt *xprt = task->tk_xprt; 1369 struct rpc_xprt *xprt = task->tk_xprt;
1370 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
1253 1371
1254 if (xprt_test_and_set_connecting(xprt)) 1372 if (xprt_test_and_set_connecting(xprt))
1255 return; 1373 return;
1256 1374
1257 if (xprt->sock != NULL) { 1375 if (transport->sock != NULL) {
1258 dprintk("RPC: xs_connect delayed xprt %p for %lu seconds\n", 1376 dprintk("RPC: xs_connect delayed xprt %p for %lu seconds\n",
1259 xprt, xprt->reestablish_timeout / HZ); 1377 xprt, xprt->reestablish_timeout / HZ);
1260 schedule_delayed_work(&xprt->connect_worker, 1378 schedule_delayed_work(&transport->connect_worker,
1261 xprt->reestablish_timeout); 1379 xprt->reestablish_timeout);
1262 xprt->reestablish_timeout <<= 1; 1380 xprt->reestablish_timeout <<= 1;
1263 if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO) 1381 if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO)
1264 xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO; 1382 xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO;
1265 } else { 1383 } else {
1266 dprintk("RPC: xs_connect scheduled xprt %p\n", xprt); 1384 dprintk("RPC: xs_connect scheduled xprt %p\n", xprt);
1267 schedule_delayed_work(&xprt->connect_worker, 0); 1385 schedule_delayed_work(&transport->connect_worker, 0);
1268 1386
1269 /* flush_scheduled_work can sleep... */ 1387 /* flush_scheduled_work can sleep... */
1270 if (!RPC_IS_ASYNC(task)) 1388 if (!RPC_IS_ASYNC(task))
@@ -1280,8 +1398,10 @@ static void xs_connect(struct rpc_task *task)
1280 */ 1398 */
1281static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) 1399static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
1282{ 1400{
1401 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
1402
1283 seq_printf(seq, "\txprt:\tudp %u %lu %lu %lu %lu %Lu %Lu\n", 1403 seq_printf(seq, "\txprt:\tudp %u %lu %lu %lu %lu %Lu %Lu\n",
1284 xprt->port, 1404 transport->port,
1285 xprt->stat.bind_count, 1405 xprt->stat.bind_count,
1286 xprt->stat.sends, 1406 xprt->stat.sends,
1287 xprt->stat.recvs, 1407 xprt->stat.recvs,
@@ -1298,13 +1418,14 @@ static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
1298 */ 1418 */
1299static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) 1419static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
1300{ 1420{
1421 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
1301 long idle_time = 0; 1422 long idle_time = 0;
1302 1423
1303 if (xprt_connected(xprt)) 1424 if (xprt_connected(xprt))
1304 idle_time = (long)(jiffies - xprt->last_used) / HZ; 1425 idle_time = (long)(jiffies - xprt->last_used) / HZ;
1305 1426
1306 seq_printf(seq, "\txprt:\ttcp %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu\n", 1427 seq_printf(seq, "\txprt:\ttcp %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu\n",
1307 xprt->port, 1428 transport->port,
1308 xprt->stat.bind_count, 1429 xprt->stat.bind_count,
1309 xprt->stat.connect_count, 1430 xprt->stat.connect_count,
1310 xprt->stat.connect_time, 1431 xprt->stat.connect_time,
@@ -1318,7 +1439,6 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
1318 1439
1319static struct rpc_xprt_ops xs_udp_ops = { 1440static struct rpc_xprt_ops xs_udp_ops = {
1320 .set_buffer_size = xs_udp_set_buffer_size, 1441 .set_buffer_size = xs_udp_set_buffer_size,
1321 .print_addr = xs_print_peer_address,
1322 .reserve_xprt = xprt_reserve_xprt_cong, 1442 .reserve_xprt = xprt_reserve_xprt_cong,
1323 .release_xprt = xprt_release_xprt_cong, 1443 .release_xprt = xprt_release_xprt_cong,
1324 .rpcbind = rpc_getport, 1444 .rpcbind = rpc_getport,
@@ -1336,7 +1456,6 @@ static struct rpc_xprt_ops xs_udp_ops = {
1336}; 1456};
1337 1457
1338static struct rpc_xprt_ops xs_tcp_ops = { 1458static struct rpc_xprt_ops xs_tcp_ops = {
1339 .print_addr = xs_print_peer_address,
1340 .reserve_xprt = xprt_reserve_xprt, 1459 .reserve_xprt = xprt_reserve_xprt,
1341 .release_xprt = xs_tcp_release_xprt, 1460 .release_xprt = xs_tcp_release_xprt,
1342 .rpcbind = rpc_getport, 1461 .rpcbind = rpc_getport,
@@ -1351,33 +1470,64 @@ static struct rpc_xprt_ops xs_tcp_ops = {
1351 .print_stats = xs_tcp_print_stats, 1470 .print_stats = xs_tcp_print_stats,
1352}; 1471};
1353 1472
1473static struct rpc_xprt *xs_setup_xprt(struct sockaddr *addr, size_t addrlen, unsigned int slot_table_size)
1474{
1475 struct rpc_xprt *xprt;
1476 struct sock_xprt *new;
1477
1478 if (addrlen > sizeof(xprt->addr)) {
1479 dprintk("RPC: xs_setup_xprt: address too large\n");
1480 return ERR_PTR(-EBADF);
1481 }
1482
1483 new = kzalloc(sizeof(*new), GFP_KERNEL);
1484 if (new == NULL) {
1485 dprintk("RPC: xs_setup_xprt: couldn't allocate rpc_xprt\n");
1486 return ERR_PTR(-ENOMEM);
1487 }
1488 xprt = &new->xprt;
1489
1490 xprt->max_reqs = slot_table_size;
1491 xprt->slot = kcalloc(xprt->max_reqs, sizeof(struct rpc_rqst), GFP_KERNEL);
1492 if (xprt->slot == NULL) {
1493 kfree(xprt);
1494 dprintk("RPC: xs_setup_xprt: couldn't allocate slot table\n");
1495 return ERR_PTR(-ENOMEM);
1496 }
1497
1498 memcpy(&xprt->addr, addr, addrlen);
1499 xprt->addrlen = addrlen;
1500 new->port = xs_get_random_port();
1501
1502 return xprt;
1503}
1504
1354/** 1505/**
1355 * xs_setup_udp - Set up transport to use a UDP socket 1506 * xs_setup_udp - Set up transport to use a UDP socket
1356 * @xprt: transport to set up 1507 * @addr: address of remote server
1508 * @addrlen: length of address in bytes
1357 * @to: timeout parameters 1509 * @to: timeout parameters
1358 * 1510 *
1359 */ 1511 */
1360int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) 1512struct rpc_xprt *xs_setup_udp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to)
1361{ 1513{
1362 size_t slot_table_size; 1514 struct rpc_xprt *xprt;
1363 struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr; 1515 struct sock_xprt *transport;
1364 1516
1365 xprt->max_reqs = xprt_udp_slot_table_entries; 1517 xprt = xs_setup_xprt(addr, addrlen, xprt_udp_slot_table_entries);
1366 slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); 1518 if (IS_ERR(xprt))
1367 xprt->slot = kzalloc(slot_table_size, GFP_KERNEL); 1519 return xprt;
1368 if (xprt->slot == NULL) 1520 transport = container_of(xprt, struct sock_xprt, xprt);
1369 return -ENOMEM;
1370 1521
1371 if (ntohs(addr->sin_port) != 0) 1522 if (ntohs(((struct sockaddr_in *)addr)->sin_port) != 0)
1372 xprt_set_bound(xprt); 1523 xprt_set_bound(xprt);
1373 xprt->port = xs_get_random_port();
1374 1524
1375 xprt->prot = IPPROTO_UDP; 1525 xprt->prot = IPPROTO_UDP;
1376 xprt->tsh_size = 0; 1526 xprt->tsh_size = 0;
1377 /* XXX: header size can vary due to auth type, IPv6, etc. */ 1527 /* XXX: header size can vary due to auth type, IPv6, etc. */
1378 xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); 1528 xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
1379 1529
1380 INIT_DELAYED_WORK(&xprt->connect_worker, xs_udp_connect_worker); 1530 INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_connect_worker);
1381 xprt->bind_timeout = XS_BIND_TO; 1531 xprt->bind_timeout = XS_BIND_TO;
1382 xprt->connect_timeout = XS_UDP_CONN_TO; 1532 xprt->connect_timeout = XS_UDP_CONN_TO;
1383 xprt->reestablish_timeout = XS_UDP_REEST_TO; 1533 xprt->reestablish_timeout = XS_UDP_REEST_TO;
@@ -1392,37 +1542,36 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to)
1392 1542
1393 xs_format_peer_addresses(xprt); 1543 xs_format_peer_addresses(xprt);
1394 dprintk("RPC: set up transport to address %s\n", 1544 dprintk("RPC: set up transport to address %s\n",
1395 xs_print_peer_address(xprt, RPC_DISPLAY_ALL)); 1545 xprt->address_strings[RPC_DISPLAY_ALL]);
1396 1546
1397 return 0; 1547 return xprt;
1398} 1548}
1399 1549
1400/** 1550/**
1401 * xs_setup_tcp - Set up transport to use a TCP socket 1551 * xs_setup_tcp - Set up transport to use a TCP socket
1402 * @xprt: transport to set up 1552 * @addr: address of remote server
1553 * @addrlen: length of address in bytes
1403 * @to: timeout parameters 1554 * @to: timeout parameters
1404 * 1555 *
1405 */ 1556 */
1406int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) 1557struct rpc_xprt *xs_setup_tcp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to)
1407{ 1558{
1408 size_t slot_table_size; 1559 struct rpc_xprt *xprt;
1409 struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr; 1560 struct sock_xprt *transport;
1410 1561
1411 xprt->max_reqs = xprt_tcp_slot_table_entries; 1562 xprt = xs_setup_xprt(addr, addrlen, xprt_tcp_slot_table_entries);
1412 slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); 1563 if (IS_ERR(xprt))
1413 xprt->slot = kzalloc(slot_table_size, GFP_KERNEL); 1564 return xprt;
1414 if (xprt->slot == NULL) 1565 transport = container_of(xprt, struct sock_xprt, xprt);
1415 return -ENOMEM;
1416 1566
1417 if (ntohs(addr->sin_port) != 0) 1567 if (ntohs(((struct sockaddr_in *)addr)->sin_port) != 0)
1418 xprt_set_bound(xprt); 1568 xprt_set_bound(xprt);
1419 xprt->port = xs_get_random_port();
1420 1569
1421 xprt->prot = IPPROTO_TCP; 1570 xprt->prot = IPPROTO_TCP;
1422 xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); 1571 xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
1423 xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; 1572 xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
1424 1573
1425 INIT_DELAYED_WORK(&xprt->connect_worker, xs_tcp_connect_worker); 1574 INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker);
1426 xprt->bind_timeout = XS_BIND_TO; 1575 xprt->bind_timeout = XS_BIND_TO;
1427 xprt->connect_timeout = XS_TCP_CONN_TO; 1576 xprt->connect_timeout = XS_TCP_CONN_TO;
1428 xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; 1577 xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
@@ -1437,7 +1586,40 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to)
1437 1586
1438 xs_format_peer_addresses(xprt); 1587 xs_format_peer_addresses(xprt);
1439 dprintk("RPC: set up transport to address %s\n", 1588 dprintk("RPC: set up transport to address %s\n",
1440 xs_print_peer_address(xprt, RPC_DISPLAY_ALL)); 1589 xprt->address_strings[RPC_DISPLAY_ALL]);
1590
1591 return xprt;
1592}
1593
1594/**
1595 * init_socket_xprt - set up xprtsock's sysctls
1596 *
1597 */
1598int init_socket_xprt(void)
1599{
1600#ifdef RPC_DEBUG
1601 if (!sunrpc_table_header) {
1602 sunrpc_table_header = register_sysctl_table(sunrpc_table, 1);
1603#ifdef CONFIG_PROC_FS
1604 if (sunrpc_table[0].de)
1605 sunrpc_table[0].de->owner = THIS_MODULE;
1606#endif
1607 }
1608#endif
1441 1609
1442 return 0; 1610 return 0;
1443} 1611}
1612
1613/**
1614 * cleanup_socket_xprt - remove xprtsock's sysctls
1615 *
1616 */
1617void cleanup_socket_xprt(void)
1618{
1619#ifdef RPC_DEBUG
1620 if (sunrpc_table_header) {
1621 unregister_sysctl_table(sunrpc_table_header);
1622 sunrpc_table_header = NULL;
1623 }
1624#endif
1625}