aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndy Adamson <andros@netapp.com>2010-10-20 00:18:03 -0400
committerTrond Myklebust <Trond.Myklebust@netapp.com>2010-10-24 18:07:10 -0400
commitb1f69b754ee312ec75f2c7ead0e6851cd9598cc2 (patch)
tree1d8e70abb2cd087e3b97f73d86db8b9568467378
parent974cec8ca0352eb5d281535b714cf194a606e98f (diff)
NFSv4.1: pnfs: add LAYOUTGET and GETDEVICEINFO infrastructure
Add the ability to actually send LAYOUTGET and GETDEVICEINFO. This also adds in the machinery to handle layout state and the deviceid cache. Note that GETDEVICEINFO is not called directly by the generic layer. Instead it is called by the drivers while parsing the LAYOUTGET opaque data in response to an unknown device id embedded therein. RFC 5661 only encodes device ids within the driver-specific opaque data. Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Dean Hildebrand <dhildebz@umich.edu> Signed-off-by: Marc Eshel <eshel@almaden.ibm.com> Signed-off-by: Mike Sager <sager@netapp.com> Signed-off-by: Ricardo Labiaga <ricardo.labiaga@netapp.com> Signed-off-by: Tao Guo <guotao@nrchpc.ac.cn> Signed-off-by: Boaz Harrosh <bharrosh@panasas.com> Signed-off-by: Fred Isaman <iisaman@netapp.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
-rw-r--r--fs/nfs/nfs4proc.c142
-rw-r--r--fs/nfs/nfs4xdr.c302
-rw-r--r--fs/nfs/pnfs.c385
-rw-r--r--fs/nfs/pnfs.h73
-rw-r--r--include/linux/nfs4.h2
-rw-r--r--include/linux/nfs_fs_sb.h1
-rw-r--r--include/linux/nfs_xdr.h49
7 files changed, 921 insertions, 33 deletions
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a5f1edb45b47..7e14e991ddfa 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -55,6 +55,7 @@
55#include "internal.h" 55#include "internal.h"
56#include "iostat.h" 56#include "iostat.h"
57#include "callback.h" 57#include "callback.h"
58#include "pnfs.h"
58 59
59#define NFSDBG_FACILITY NFSDBG_PROC 60#define NFSDBG_FACILITY NFSDBG_PROC
60 61
@@ -5256,6 +5257,147 @@ out:
5256 dprintk("<-- %s status=%d\n", __func__, status); 5257 dprintk("<-- %s status=%d\n", __func__, status);
5257 return status; 5258 return status;
5258} 5259}
5260
5261static void
5262nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
5263{
5264 struct nfs4_layoutget *lgp = calldata;
5265 struct inode *ino = lgp->args.inode;
5266 struct nfs_server *server = NFS_SERVER(ino);
5267
5268 dprintk("--> %s\n", __func__);
5269 if (nfs4_setup_sequence(server, &lgp->args.seq_args,
5270 &lgp->res.seq_res, 0, task))
5271 return;
5272 rpc_call_start(task);
5273}
5274
5275static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
5276{
5277 struct nfs4_layoutget *lgp = calldata;
5278 struct nfs_server *server = NFS_SERVER(lgp->args.inode);
5279
5280 dprintk("--> %s\n", __func__);
5281
5282 if (!nfs4_sequence_done(task, &lgp->res.seq_res))
5283 return;
5284
5285 switch (task->tk_status) {
5286 case 0:
5287 break;
5288 case -NFS4ERR_LAYOUTTRYLATER:
5289 case -NFS4ERR_RECALLCONFLICT:
5290 task->tk_status = -NFS4ERR_DELAY;
5291 /* Fall through */
5292 default:
5293 if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
5294 rpc_restart_call_prepare(task);
5295 return;
5296 }
5297 }
5298 lgp->status = task->tk_status;
5299 dprintk("<-- %s\n", __func__);
5300}
5301
5302static void nfs4_layoutget_release(void *calldata)
5303{
5304 struct nfs4_layoutget *lgp = calldata;
5305
5306 dprintk("--> %s\n", __func__);
5307 put_layout_hdr(lgp->args.inode);
5308 if (lgp->res.layout.buf != NULL)
5309 free_page((unsigned long) lgp->res.layout.buf);
5310 put_nfs_open_context(lgp->args.ctx);
5311 kfree(calldata);
5312 dprintk("<-- %s\n", __func__);
5313}
5314
5315static const struct rpc_call_ops nfs4_layoutget_call_ops = {
5316 .rpc_call_prepare = nfs4_layoutget_prepare,
5317 .rpc_call_done = nfs4_layoutget_done,
5318 .rpc_release = nfs4_layoutget_release,
5319};
5320
5321int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
5322{
5323 struct nfs_server *server = NFS_SERVER(lgp->args.inode);
5324 struct rpc_task *task;
5325 struct rpc_message msg = {
5326 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET],
5327 .rpc_argp = &lgp->args,
5328 .rpc_resp = &lgp->res,
5329 };
5330 struct rpc_task_setup task_setup_data = {
5331 .rpc_client = server->client,
5332 .rpc_message = &msg,
5333 .callback_ops = &nfs4_layoutget_call_ops,
5334 .callback_data = lgp,
5335 .flags = RPC_TASK_ASYNC,
5336 };
5337 int status = 0;
5338
5339 dprintk("--> %s\n", __func__);
5340
5341 lgp->res.layout.buf = (void *)__get_free_page(GFP_NOFS);
5342 if (lgp->res.layout.buf == NULL) {
5343 nfs4_layoutget_release(lgp);
5344 return -ENOMEM;
5345 }
5346
5347 lgp->res.seq_res.sr_slot = NULL;
5348 task = rpc_run_task(&task_setup_data);
5349 if (IS_ERR(task))
5350 return PTR_ERR(task);
5351 status = nfs4_wait_for_completion_rpc_task(task);
5352 if (status != 0)
5353 goto out;
5354 status = lgp->status;
5355 if (status != 0)
5356 goto out;
5357 status = pnfs_layout_process(lgp);
5358out:
5359 rpc_put_task(task);
5360 dprintk("<-- %s status=%d\n", __func__, status);
5361 return status;
5362}
5363
5364static int
5365_nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
5366{
5367 struct nfs4_getdeviceinfo_args args = {
5368 .pdev = pdev,
5369 };
5370 struct nfs4_getdeviceinfo_res res = {
5371 .pdev = pdev,
5372 };
5373 struct rpc_message msg = {
5374 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICEINFO],
5375 .rpc_argp = &args,
5376 .rpc_resp = &res,
5377 };
5378 int status;
5379
5380 dprintk("--> %s\n", __func__);
5381 status = nfs4_call_sync(server, &msg, &args, &res, 0);
5382 dprintk("<-- %s status=%d\n", __func__, status);
5383
5384 return status;
5385}
5386
5387int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
5388{
5389 struct nfs4_exception exception = { };
5390 int err;
5391
5392 do {
5393 err = nfs4_handle_exception(server,
5394 _nfs4_proc_getdeviceinfo(server, pdev),
5395 &exception);
5396 } while (exception.retry);
5397 return err;
5398}
5399EXPORT_SYMBOL_GPL(nfs4_proc_getdeviceinfo);
5400
5259#endif /* CONFIG_NFS_V4_1 */ 5401#endif /* CONFIG_NFS_V4_1 */
5260 5402
5261struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { 5403struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 8b4dfa393f0f..f313c4cce7e4 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -52,6 +52,7 @@
52#include <linux/nfs_idmap.h> 52#include <linux/nfs_idmap.h>
53#include "nfs4_fs.h" 53#include "nfs4_fs.h"
54#include "internal.h" 54#include "internal.h"
55#include "pnfs.h"
55 56
56#define NFSDBG_FACILITY NFSDBG_XDR 57#define NFSDBG_FACILITY NFSDBG_XDR
57 58
@@ -310,6 +311,19 @@ static int nfs4_stat_to_errno(int);
310 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) 311 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5)
311#define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4) 312#define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4)
312#define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4) 313#define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4)
314#define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 4 + \
315 XDR_QUADLEN(NFS4_DEVICEID4_SIZE))
316#define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \
317 1 /* layout type */ + \
318 1 /* opaque devaddr4 length */ + \
319 /* devaddr4 payload is read into page */ \
320 1 /* notification bitmap length */ + \
321 1 /* notification bitmap */)
322#define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \
323 encode_stateid_maxsz)
324#define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \
325 decode_stateid_maxsz + \
326 XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
313#else /* CONFIG_NFS_V4_1 */ 327#else /* CONFIG_NFS_V4_1 */
314#define encode_sequence_maxsz 0 328#define encode_sequence_maxsz 0
315#define decode_sequence_maxsz 0 329#define decode_sequence_maxsz 0
@@ -699,6 +713,20 @@ static int nfs4_stat_to_errno(int);
699#define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \ 713#define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \
700 decode_sequence_maxsz + \ 714 decode_sequence_maxsz + \
701 decode_reclaim_complete_maxsz) 715 decode_reclaim_complete_maxsz)
716#define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz + \
717 encode_sequence_maxsz +\
718 encode_getdeviceinfo_maxsz)
719#define NFS4_dec_getdeviceinfo_sz (compound_decode_hdr_maxsz + \
720 decode_sequence_maxsz + \
721 decode_getdeviceinfo_maxsz)
722#define NFS4_enc_layoutget_sz (compound_encode_hdr_maxsz + \
723 encode_sequence_maxsz + \
724 encode_putfh_maxsz + \
725 encode_layoutget_maxsz)
726#define NFS4_dec_layoutget_sz (compound_decode_hdr_maxsz + \
727 decode_sequence_maxsz + \
728 decode_putfh_maxsz + \
729 decode_layoutget_maxsz)
702 730
703const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + 731const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
704 compound_encode_hdr_maxsz + 732 compound_encode_hdr_maxsz +
@@ -1737,6 +1765,58 @@ static void encode_sequence(struct xdr_stream *xdr,
1737#endif /* CONFIG_NFS_V4_1 */ 1765#endif /* CONFIG_NFS_V4_1 */
1738} 1766}
1739 1767
1768#ifdef CONFIG_NFS_V4_1
1769static void
1770encode_getdeviceinfo(struct xdr_stream *xdr,
1771 const struct nfs4_getdeviceinfo_args *args,
1772 struct compound_hdr *hdr)
1773{
1774 __be32 *p;
1775
1776 p = reserve_space(xdr, 16 + NFS4_DEVICEID4_SIZE);
1777 *p++ = cpu_to_be32(OP_GETDEVICEINFO);
1778 p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data,
1779 NFS4_DEVICEID4_SIZE);
1780 *p++ = cpu_to_be32(args->pdev->layout_type);
1781 *p++ = cpu_to_be32(args->pdev->pglen); /* gdia_maxcount */
1782 *p++ = cpu_to_be32(0); /* bitmap length 0 */
1783 hdr->nops++;
1784 hdr->replen += decode_getdeviceinfo_maxsz;
1785}
1786
1787static void
1788encode_layoutget(struct xdr_stream *xdr,
1789 const struct nfs4_layoutget_args *args,
1790 struct compound_hdr *hdr)
1791{
1792 nfs4_stateid stateid;
1793 __be32 *p;
1794
1795 p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE);
1796 *p++ = cpu_to_be32(OP_LAYOUTGET);
1797 *p++ = cpu_to_be32(0); /* Signal layout available */
1798 *p++ = cpu_to_be32(args->type);
1799 *p++ = cpu_to_be32(args->range.iomode);
1800 p = xdr_encode_hyper(p, args->range.offset);
1801 p = xdr_encode_hyper(p, args->range.length);
1802 p = xdr_encode_hyper(p, args->minlength);
1803 pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout,
1804 args->ctx->state);
1805 p = xdr_encode_opaque_fixed(p, &stateid.data, NFS4_STATEID_SIZE);
1806 *p = cpu_to_be32(args->maxcount);
1807
1808 dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n",
1809 __func__,
1810 args->type,
1811 args->range.iomode,
1812 (unsigned long)args->range.offset,
1813 (unsigned long)args->range.length,
1814 args->maxcount);
1815 hdr->nops++;
1816 hdr->replen += decode_layoutget_maxsz;
1817}
1818#endif /* CONFIG_NFS_V4_1 */
1819
1740/* 1820/*
1741 * END OF "GENERIC" ENCODE ROUTINES. 1821 * END OF "GENERIC" ENCODE ROUTINES.
1742 */ 1822 */
@@ -2554,6 +2634,51 @@ static int nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req, uint32_t *p,
2554 return 0; 2634 return 0;
2555} 2635}
2556 2636
2637/*
2638 * Encode GETDEVICEINFO request
2639 */
2640static int nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, uint32_t *p,
2641 struct nfs4_getdeviceinfo_args *args)
2642{
2643 struct xdr_stream xdr;
2644 struct compound_hdr hdr = {
2645 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2646 };
2647
2648 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2649 encode_compound_hdr(&xdr, req, &hdr);
2650 encode_sequence(&xdr, &args->seq_args, &hdr);
2651 encode_getdeviceinfo(&xdr, args, &hdr);
2652
2653 /* set up reply kvec. Subtract notification bitmap max size (2)
2654 * so that notification bitmap is put in xdr_buf tail */
2655 xdr_inline_pages(&req->rq_rcv_buf, (hdr.replen - 2) << 2,
2656 args->pdev->pages, args->pdev->pgbase,
2657 args->pdev->pglen);
2658
2659 encode_nops(&hdr);
2660 return 0;
2661}
2662
2663/*
2664 * Encode LAYOUTGET request
2665 */
2666static int nfs4_xdr_enc_layoutget(struct rpc_rqst *req, uint32_t *p,
2667 struct nfs4_layoutget_args *args)
2668{
2669 struct xdr_stream xdr;
2670 struct compound_hdr hdr = {
2671 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2672 };
2673
2674 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2675 encode_compound_hdr(&xdr, req, &hdr);
2676 encode_sequence(&xdr, &args->seq_args, &hdr);
2677 encode_putfh(&xdr, NFS_FH(args->inode), &hdr);
2678 encode_layoutget(&xdr, args, &hdr);
2679 encode_nops(&hdr);
2680 return 0;
2681}
2557#endif /* CONFIG_NFS_V4_1 */ 2682#endif /* CONFIG_NFS_V4_1 */
2558 2683
2559static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) 2684static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
@@ -4830,6 +4955,134 @@ out_overflow:
4830#endif /* CONFIG_NFS_V4_1 */ 4955#endif /* CONFIG_NFS_V4_1 */
4831} 4956}
4832 4957
4958#if defined(CONFIG_NFS_V4_1)
4959
4960static int decode_getdeviceinfo(struct xdr_stream *xdr,
4961 struct pnfs_device *pdev)
4962{
4963 __be32 *p;
4964 uint32_t len, type;
4965 int status;
4966
4967 status = decode_op_hdr(xdr, OP_GETDEVICEINFO);
4968 if (status) {
4969 if (status == -ETOOSMALL) {
4970 p = xdr_inline_decode(xdr, 4);
4971 if (unlikely(!p))
4972 goto out_overflow;
4973 pdev->mincount = be32_to_cpup(p);
4974 dprintk("%s: Min count too small. mincnt = %u\n",
4975 __func__, pdev->mincount);
4976 }
4977 return status;
4978 }
4979
4980 p = xdr_inline_decode(xdr, 8);
4981 if (unlikely(!p))
4982 goto out_overflow;
4983 type = be32_to_cpup(p++);
4984 if (type != pdev->layout_type) {
4985 dprintk("%s: layout mismatch req: %u pdev: %u\n",
4986 __func__, pdev->layout_type, type);
4987 return -EINVAL;
4988 }
4989 /*
4990 * Get the length of the opaque device_addr4. xdr_read_pages places
4991 * the opaque device_addr4 in the xdr_buf->pages (pnfs_device->pages)
4992 * and places the remaining xdr data in xdr_buf->tail
4993 */
4994 pdev->mincount = be32_to_cpup(p);
4995 xdr_read_pages(xdr, pdev->mincount); /* include space for the length */
4996
4997 /* Parse notification bitmap, verifying that it is zero. */
4998 p = xdr_inline_decode(xdr, 4);
4999 if (unlikely(!p))
5000 goto out_overflow;
5001 len = be32_to_cpup(p);
5002 if (len) {
5003 int i;
5004
5005 p = xdr_inline_decode(xdr, 4 * len);
5006 if (unlikely(!p))
5007 goto out_overflow;
5008 for (i = 0; i < len; i++, p++) {
5009 if (be32_to_cpup(p)) {
5010 dprintk("%s: notifications not supported\n",
5011 __func__);
5012 return -EIO;
5013 }
5014 }
5015 }
5016 return 0;
5017out_overflow:
5018 print_overflow_msg(__func__, xdr);
5019 return -EIO;
5020}
5021
5022static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
5023 struct nfs4_layoutget_res *res)
5024{
5025 __be32 *p;
5026 int status;
5027 u32 layout_count;
5028
5029 status = decode_op_hdr(xdr, OP_LAYOUTGET);
5030 if (status)
5031 return status;
5032 p = xdr_inline_decode(xdr, 8 + NFS4_STATEID_SIZE);
5033 if (unlikely(!p))
5034 goto out_overflow;
5035 res->return_on_close = be32_to_cpup(p++);
5036 p = xdr_decode_opaque_fixed(p, res->stateid.data, NFS4_STATEID_SIZE);
5037 layout_count = be32_to_cpup(p);
5038 if (!layout_count) {
5039 dprintk("%s: server responded with empty layout array\n",
5040 __func__);
5041 return -EINVAL;
5042 }
5043
5044 p = xdr_inline_decode(xdr, 24);
5045 if (unlikely(!p))
5046 goto out_overflow;
5047 p = xdr_decode_hyper(p, &res->range.offset);
5048 p = xdr_decode_hyper(p, &res->range.length);
5049 res->range.iomode = be32_to_cpup(p++);
5050 res->type = be32_to_cpup(p++);
5051
5052 status = decode_opaque_inline(xdr, &res->layout.len, (char **)&p);
5053 if (unlikely(status))
5054 return status;
5055
5056 dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n",
5057 __func__,
5058 (unsigned long)res->range.offset,
5059 (unsigned long)res->range.length,
5060 res->range.iomode,
5061 res->type,
5062 res->layout.len);
5063
5064 /* nfs4_proc_layoutget allocated a single page */
5065 if (res->layout.len > PAGE_SIZE)
5066 return -ENOMEM;
5067 memcpy(res->layout.buf, p, res->layout.len);
5068
5069 if (layout_count > 1) {
5070 /* We only handle a length one array at the moment. Any
5071 * further entries are just ignored. Note that this means
5072 * the client may see a response that is less than the
5073 * minimum it requested.
5074 */
5075 dprintk("%s: server responded with %d layouts, dropping tail\n",
5076 __func__, layout_count);
5077 }
5078
5079 return 0;
5080out_overflow:
5081 print_overflow_msg(__func__, xdr);
5082 return -EIO;
5083}
5084#endif /* CONFIG_NFS_V4_1 */
5085
4833/* 5086/*
4834 * END OF "GENERIC" DECODE ROUTINES. 5087 * END OF "GENERIC" DECODE ROUTINES.
4835 */ 5088 */
@@ -5857,6 +6110,53 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, uint32_t *p,
5857 status = decode_reclaim_complete(&xdr, (void *)NULL); 6110 status = decode_reclaim_complete(&xdr, (void *)NULL);
5858 return status; 6111 return status;
5859} 6112}
6113
6114/*
6115 * Decode GETDEVINFO response
6116 */
6117static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, uint32_t *p,
6118 struct nfs4_getdeviceinfo_res *res)
6119{
6120 struct xdr_stream xdr;
6121 struct compound_hdr hdr;
6122 int status;
6123
6124 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
6125 status = decode_compound_hdr(&xdr, &hdr);
6126 if (status != 0)
6127 goto out;
6128 status = decode_sequence(&xdr, &res->seq_res, rqstp);
6129 if (status != 0)
6130 goto out;
6131 status = decode_getdeviceinfo(&xdr, res->pdev);
6132out:
6133 return status;
6134}
6135
6136/*
6137 * Decode LAYOUTGET response
6138 */
6139static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp, uint32_t *p,
6140 struct nfs4_layoutget_res *res)
6141{
6142 struct xdr_stream xdr;
6143 struct compound_hdr hdr;
6144 int status;
6145
6146 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
6147 status = decode_compound_hdr(&xdr, &hdr);
6148 if (status)
6149 goto out;
6150 status = decode_sequence(&xdr, &res->seq_res, rqstp);
6151 if (status)
6152 goto out;
6153 status = decode_putfh(&xdr);
6154 if (status)
6155 goto out;
6156 status = decode_layoutget(&xdr, rqstp, res);
6157out:
6158 return status;
6159}
5860#endif /* CONFIG_NFS_V4_1 */ 6160#endif /* CONFIG_NFS_V4_1 */
5861 6161
5862__be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, 6162__be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
@@ -6048,6 +6348,8 @@ struct rpc_procinfo nfs4_procedures[] = {
6048 PROC(SEQUENCE, enc_sequence, dec_sequence), 6348 PROC(SEQUENCE, enc_sequence, dec_sequence),
6049 PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), 6349 PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time),
6050 PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), 6350 PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete),
6351 PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo),
6352 PROC(LAYOUTGET, enc_layoutget, dec_layoutget),
6051#endif /* CONFIG_NFS_V4_1 */ 6353#endif /* CONFIG_NFS_V4_1 */
6052}; 6354};
6053 6355
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 891a0c36f992..d1ad7df3479e 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -140,6 +140,11 @@ pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
140 printk(KERN_ERR "%s id 0 is reserved\n", __func__); 140 printk(KERN_ERR "%s id 0 is reserved\n", __func__);
141 return status; 141 return status;
142 } 142 }
143 if (!ld_type->alloc_lseg || !ld_type->free_lseg) {
144 printk(KERN_ERR "%s Layout driver must provide "
145 "alloc_lseg and free_lseg.\n", __func__);
146 return status;
147 }
143 148
144 spin_lock(&pnfs_spinlock); 149 spin_lock(&pnfs_spinlock);
145 tmp = find_pnfs_driver_locked(ld_type->id); 150 tmp = find_pnfs_driver_locked(ld_type->id);
@@ -168,6 +173,10 @@ pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
168} 173}
169EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); 174EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
170 175
176/*
177 * pNFS client layout cache
178 */
179
171static void 180static void
172get_layout_hdr_locked(struct pnfs_layout_hdr *lo) 181get_layout_hdr_locked(struct pnfs_layout_hdr *lo)
173{ 182{
@@ -190,7 +199,7 @@ put_layout_hdr_locked(struct pnfs_layout_hdr *lo)
190 } 199 }
191} 200}
192 201
193static void 202void
194put_layout_hdr(struct inode *inode) 203put_layout_hdr(struct inode *inode)
195{ 204{
196 spin_lock(&inode->i_lock); 205 spin_lock(&inode->i_lock);
@@ -215,7 +224,7 @@ destroy_lseg(struct kref *kref)
215 struct inode *ino = lseg->layout->inode; 224 struct inode *ino = lseg->layout->inode;
216 225
217 dprintk("--> %s\n", __func__); 226 dprintk("--> %s\n", __func__);
218 kfree(lseg); 227 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
219 /* Matched by get_layout_hdr_locked in pnfs_insert_layout */ 228 /* Matched by get_layout_hdr_locked in pnfs_insert_layout */
220 put_layout_hdr(ino); 229 put_layout_hdr(ino);
221} 230}
@@ -249,6 +258,9 @@ pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list)
249 /* List does not take a reference, so no need for put here */ 258 /* List does not take a reference, so no need for put here */
250 list_del_init(&lo->layouts); 259 list_del_init(&lo->layouts);
251 spin_unlock(&clp->cl_lock); 260 spin_unlock(&clp->cl_lock);
261 write_seqlock(&lo->seqlock);
262 clear_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
263 write_sequnlock(&lo->seqlock);
252 264
253 dprintk("%s:Return\n", __func__); 265 dprintk("%s:Return\n", __func__);
254} 266}
@@ -307,40 +319,135 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
307 } 319 }
308} 320}
309 321
310static void pnfs_insert_layout(struct pnfs_layout_hdr *lo, 322/* update lo->stateid with new if is more recent
311 struct pnfs_layout_segment *lseg); 323 *
324 * lo->stateid could be the open stateid, in which case we just use what given.
325 */
326static void
327pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
328 const nfs4_stateid *new)
329{
330 nfs4_stateid *old = &lo->stateid;
331 bool overwrite = false;
332
333 write_seqlock(&lo->seqlock);
334 if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state) ||
335 memcmp(old->stateid.other, new->stateid.other, sizeof(new->stateid.other)))
336 overwrite = true;
337 else {
338 u32 oldseq, newseq;
339
340 oldseq = be32_to_cpu(old->stateid.seqid);
341 newseq = be32_to_cpu(new->stateid.seqid);
342 if ((int)(newseq - oldseq) > 0)
343 overwrite = true;
344 }
345 if (overwrite)
346 memcpy(&old->stateid, &new->stateid, sizeof(new->stateid));
347 write_sequnlock(&lo->seqlock);
348}
349
350static void
351pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo,
352 struct nfs4_state *state)
353{
354 int seq;
355
356 dprintk("--> %s\n", __func__);
357 write_seqlock(&lo->seqlock);
358 do {
359 seq = read_seqbegin(&state->seqlock);
360 memcpy(lo->stateid.data, state->stateid.data,
361 sizeof(state->stateid.data));
362 } while (read_seqretry(&state->seqlock, seq));
363 set_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
364 write_sequnlock(&lo->seqlock);
365 dprintk("<-- %s\n", __func__);
366}
367
368void
369pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
370 struct nfs4_state *open_state)
371{
372 int seq;
312 373
313/* Get layout from server. */ 374 dprintk("--> %s\n", __func__);
375 do {
376 seq = read_seqbegin(&lo->seqlock);
377 if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state)) {
378 /* This will trigger retry of the read */
379 pnfs_layout_from_open_stateid(lo, open_state);
380 } else
381 memcpy(dst->data, lo->stateid.data,
382 sizeof(lo->stateid.data));
383 } while (read_seqretry(&lo->seqlock, seq));
384 dprintk("<-- %s\n", __func__);
385}
386
387/*
388* Get layout from server.
389* for now, assume that whole file layouts are requested.
390* arg->offset: 0
391* arg->length: all ones
392*/
314static struct pnfs_layout_segment * 393static struct pnfs_layout_segment *
315send_layoutget(struct pnfs_layout_hdr *lo, 394send_layoutget(struct pnfs_layout_hdr *lo,
316 struct nfs_open_context *ctx, 395 struct nfs_open_context *ctx,
317 u32 iomode) 396 u32 iomode)
318{ 397{
319 struct inode *ino = lo->inode; 398 struct inode *ino = lo->inode;
320 struct pnfs_layout_segment *lseg; 399 struct nfs_server *server = NFS_SERVER(ino);
400 struct nfs4_layoutget *lgp;
401 struct pnfs_layout_segment *lseg = NULL;
402
403 dprintk("--> %s\n", __func__);
321 404
322 /* Lets pretend we sent LAYOUTGET and got a response */ 405 BUG_ON(ctx == NULL);
323 lseg = kzalloc(sizeof(*lseg), GFP_KERNEL); 406 lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
407 if (lgp == NULL) {
408 put_layout_hdr(lo->inode);
409 return NULL;
410 }
411 lgp->args.minlength = NFS4_MAX_UINT64;
412 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
413 lgp->args.range.iomode = iomode;
414 lgp->args.range.offset = 0;
415 lgp->args.range.length = NFS4_MAX_UINT64;
416 lgp->args.type = server->pnfs_curr_ld->id;
417 lgp->args.inode = ino;
418 lgp->args.ctx = get_nfs_open_context(ctx);
419 lgp->lsegpp = &lseg;
420
421 /* Synchronously retrieve layout information from server and
422 * store in lseg.
423 */
424 nfs4_proc_layoutget(lgp);
324 if (!lseg) { 425 if (!lseg) {
426 /* remember that LAYOUTGET failed and suspend trying */
325 set_bit(lo_fail_bit(iomode), &lo->state); 427 set_bit(lo_fail_bit(iomode), &lo->state);
326 spin_lock(&ino->i_lock);
327 put_layout_hdr_locked(lo);
328 spin_unlock(&ino->i_lock);
329 return NULL;
330 } 428 }
331 init_lseg(lo, lseg);
332 lseg->iomode = IOMODE_RW;
333 spin_lock(&ino->i_lock);
334 pnfs_insert_layout(lo, lseg);
335 put_layout_hdr_locked(lo);
336 spin_unlock(&ino->i_lock);
337 return lseg; 429 return lseg;
338} 430}
339 431
432/*
433 * Compare two layout segments for sorting into layout cache.
434 * We want to preferentially return RW over RO layouts, so ensure those
435 * are seen first.
436 */
437static s64
438cmp_layout(u32 iomode1, u32 iomode2)
439{
440 /* read > read/write */
441 return (int)(iomode2 == IOMODE_READ) - (int)(iomode1 == IOMODE_READ);
442}
443
340static void 444static void
341pnfs_insert_layout(struct pnfs_layout_hdr *lo, 445pnfs_insert_layout(struct pnfs_layout_hdr *lo,
342 struct pnfs_layout_segment *lseg) 446 struct pnfs_layout_segment *lseg)
343{ 447{
448 struct pnfs_layout_segment *lp;
449 int found = 0;
450
344 dprintk("%s:Begin\n", __func__); 451 dprintk("%s:Begin\n", __func__);
345 452
346 assert_spin_locked(&lo->inode->i_lock); 453 assert_spin_locked(&lo->inode->i_lock);
@@ -352,19 +459,28 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo,
352 list_add_tail(&lo->layouts, &clp->cl_layouts); 459 list_add_tail(&lo->layouts, &clp->cl_layouts);
353 spin_unlock(&clp->cl_lock); 460 spin_unlock(&clp->cl_lock);
354 } 461 }
355 get_layout_hdr_locked(lo); 462 list_for_each_entry(lp, &lo->segs, fi_list) {
356 /* STUB - add the constructed lseg if necessary */ 463 if (cmp_layout(lp->range.iomode, lseg->range.iomode) > 0)
357 if (list_empty(&lo->segs)) { 464 continue;
465 list_add_tail(&lseg->fi_list, &lp->fi_list);
466 dprintk("%s: inserted lseg %p "
467 "iomode %d offset %llu length %llu before "
468 "lp %p iomode %d offset %llu length %llu\n",
469 __func__, lseg, lseg->range.iomode,
470 lseg->range.offset, lseg->range.length,
471 lp, lp->range.iomode, lp->range.offset,
472 lp->range.length);
473 found = 1;
474 break;
475 }
476 if (!found) {
358 list_add_tail(&lseg->fi_list, &lo->segs); 477 list_add_tail(&lseg->fi_list, &lo->segs);
359 dprintk("%s: inserted lseg %p iomode %d at tail\n", 478 dprintk("%s: inserted lseg %p "
360 __func__, lseg, lseg->iomode); 479 "iomode %d offset %llu length %llu at tail\n",
361 } else { 480 __func__, lseg, lseg->range.iomode,
362 /* There is no harm for the moment in calling this 481 lseg->range.offset, lseg->range.length);
363 * with the lock held, and the call will be removed
364 * with the STUB.
365 */
366 put_lseg(lseg);
367 } 482 }
483 get_layout_hdr_locked(lo);
368 484
369 dprintk("%s:Return\n", __func__); 485 dprintk("%s:Return\n", __func__);
370} 486}
@@ -380,6 +496,7 @@ alloc_init_layout_hdr(struct inode *ino)
380 lo->refcount = 1; 496 lo->refcount = 1;
381 INIT_LIST_HEAD(&lo->layouts); 497 INIT_LIST_HEAD(&lo->layouts);
382 INIT_LIST_HEAD(&lo->segs); 498 INIT_LIST_HEAD(&lo->segs);
499 seqlock_init(&lo->seqlock);
383 lo->inode = ino; 500 lo->inode = ino;
384 return lo; 501 return lo;
385} 502}
@@ -407,11 +524,46 @@ pnfs_find_alloc_layout(struct inode *ino)
407 return nfsi->layout; 524 return nfsi->layout;
408} 525}
409 526
410/* STUB - LAYOUTGET never succeeds, so cache is empty */ 527/*
528 * iomode matching rules:
529 * iomode lseg match
530 * ----- ----- -----
531 * ANY READ true
532 * ANY RW true
533 * RW READ false
534 * RW RW true
535 * READ READ true
536 * READ RW true
537 */
538static int
539is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode)
540{
541 return (iomode != IOMODE_RW || lseg->range.iomode == IOMODE_RW);
542}
543
544/*
545 * lookup range in layout
546 */
411static struct pnfs_layout_segment * 547static struct pnfs_layout_segment *
412pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode) 548pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode)
413{ 549{
414 return NULL; 550 struct pnfs_layout_segment *lseg, *ret = NULL;
551
552 dprintk("%s:Begin\n", __func__);
553
554 assert_spin_locked(&lo->inode->i_lock);
555 list_for_each_entry(lseg, &lo->segs, fi_list) {
556 if (is_matching_lseg(lseg, iomode)) {
557 ret = lseg;
558 break;
559 }
560 if (cmp_layout(iomode, lseg->range.iomode) > 0)
561 break;
562 }
563
564 dprintk("%s:Return lseg %p ref %d\n",
565 __func__, ret, ret ? atomic_read(&ret->kref.refcount) : 0);
566 return ret;
415} 567}
416 568
417/* 569/*
@@ -448,7 +600,7 @@ pnfs_update_layout(struct inode *ino,
448 if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state)) 600 if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state))
449 goto out_unlock; 601 goto out_unlock;
450 602
451 get_layout_hdr_locked(lo); 603 get_layout_hdr_locked(lo); /* Matched in nfs4_layoutget_release */
452 spin_unlock(&ino->i_lock); 604 spin_unlock(&ino->i_lock);
453 605
454 lseg = send_layoutget(lo, ctx, iomode); 606 lseg = send_layoutget(lo, ctx, iomode);
@@ -460,3 +612,172 @@ out_unlock:
460 spin_unlock(&ino->i_lock); 612 spin_unlock(&ino->i_lock);
461 goto out; 613 goto out;
462} 614}
615
616int
617pnfs_layout_process(struct nfs4_layoutget *lgp)
618{
619 struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
620 struct nfs4_layoutget_res *res = &lgp->res;
621 struct pnfs_layout_segment *lseg;
622 struct inode *ino = lo->inode;
623 int status = 0;
624
625 /* Inject layout blob into I/O device driver */
626 lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res);
627 if (!lseg || IS_ERR(lseg)) {
628 if (!lseg)
629 status = -ENOMEM;
630 else
631 status = PTR_ERR(lseg);
632 dprintk("%s: Could not allocate layout: error %d\n",
633 __func__, status);
634 goto out;
635 }
636
637 spin_lock(&ino->i_lock);
638 init_lseg(lo, lseg);
639 lseg->range = res->range;
640 *lgp->lsegpp = lseg;
641 pnfs_insert_layout(lo, lseg);
642
643 /* Done processing layoutget. Set the layout stateid */
644 pnfs_set_layout_stateid(lo, &res->stateid);
645 spin_unlock(&ino->i_lock);
646out:
647 return status;
648}
649
650/*
651 * Device ID cache. Currently supports one layout type per struct nfs_client.
652 * Add layout type to the lookup key to expand to support multiple types.
653 */
654int
655pnfs_alloc_init_deviceid_cache(struct nfs_client *clp,
656 void (*free_callback)(struct pnfs_deviceid_node *))
657{
658 struct pnfs_deviceid_cache *c;
659
660 c = kzalloc(sizeof(struct pnfs_deviceid_cache), GFP_KERNEL);
661 if (!c)
662 return -ENOMEM;
663 spin_lock(&clp->cl_lock);
664 if (clp->cl_devid_cache != NULL) {
665 atomic_inc(&clp->cl_devid_cache->dc_ref);
666 dprintk("%s [kref [%d]]\n", __func__,
667 atomic_read(&clp->cl_devid_cache->dc_ref));
668 kfree(c);
669 } else {
670 /* kzalloc initializes hlists */
671 spin_lock_init(&c->dc_lock);
672 atomic_set(&c->dc_ref, 1);
673 c->dc_free_callback = free_callback;
674 clp->cl_devid_cache = c;
675 dprintk("%s [new]\n", __func__);
676 }
677 spin_unlock(&clp->cl_lock);
678 return 0;
679}
680EXPORT_SYMBOL_GPL(pnfs_alloc_init_deviceid_cache);
681
682/*
683 * Called from pnfs_layoutdriver_type->free_lseg
684 * last layout segment reference frees deviceid
685 */
686void
687pnfs_put_deviceid(struct pnfs_deviceid_cache *c,
688 struct pnfs_deviceid_node *devid)
689{
690 struct nfs4_deviceid *id = &devid->de_id;
691 struct pnfs_deviceid_node *d;
692 struct hlist_node *n;
693 long h = nfs4_deviceid_hash(id);
694
695 dprintk("%s [%d]\n", __func__, atomic_read(&devid->de_ref));
696 if (!atomic_dec_and_lock(&devid->de_ref, &c->dc_lock))
697 return;
698
699 hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[h], de_node)
700 if (!memcmp(&d->de_id, id, sizeof(*id))) {
701 hlist_del_rcu(&d->de_node);
702 spin_unlock(&c->dc_lock);
703 synchronize_rcu();
704 c->dc_free_callback(devid);
705 return;
706 }
707 spin_unlock(&c->dc_lock);
708 /* Why wasn't it found in the list? */
709 BUG();
710}
711EXPORT_SYMBOL_GPL(pnfs_put_deviceid);
712
713/* Find and reference a deviceid */
714struct pnfs_deviceid_node *
715pnfs_find_get_deviceid(struct pnfs_deviceid_cache *c, struct nfs4_deviceid *id)
716{
717 struct pnfs_deviceid_node *d;
718 struct hlist_node *n;
719 long hash = nfs4_deviceid_hash(id);
720
721 dprintk("--> %s hash %ld\n", __func__, hash);
722 rcu_read_lock();
723 hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
724 if (!memcmp(&d->de_id, id, sizeof(*id))) {
725 if (!atomic_inc_not_zero(&d->de_ref)) {
726 goto fail;
727 } else {
728 rcu_read_unlock();
729 return d;
730 }
731 }
732 }
733fail:
734 rcu_read_unlock();
735 return NULL;
736}
737EXPORT_SYMBOL_GPL(pnfs_find_get_deviceid);
738
739/*
740 * Add a deviceid to the cache.
741 * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new
742 */
743struct pnfs_deviceid_node *
744pnfs_add_deviceid(struct pnfs_deviceid_cache *c, struct pnfs_deviceid_node *new)
745{
746 struct pnfs_deviceid_node *d;
747 long hash = nfs4_deviceid_hash(&new->de_id);
748
749 dprintk("--> %s hash %ld\n", __func__, hash);
750 spin_lock(&c->dc_lock);
751 d = pnfs_find_get_deviceid(c, &new->de_id);
752 if (d) {
753 spin_unlock(&c->dc_lock);
754 dprintk("%s [discard]\n", __func__);
755 c->dc_free_callback(new);
756 return d;
757 }
758 INIT_HLIST_NODE(&new->de_node);
759 atomic_set(&new->de_ref, 1);
760 hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]);
761 spin_unlock(&c->dc_lock);
762 dprintk("%s [new]\n", __func__);
763 return new;
764}
765EXPORT_SYMBOL_GPL(pnfs_add_deviceid);
766
767void
768pnfs_put_deviceid_cache(struct nfs_client *clp)
769{
770 struct pnfs_deviceid_cache *local = clp->cl_devid_cache;
771
772 dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache);
773 if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) {
774 int i;
775 /* Verify cache is empty */
776 for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++)
777 BUG_ON(!hlist_empty(&local->dc_deviceids[i]));
778 clp->cl_devid_cache = NULL;
779 spin_unlock(&clp->cl_lock);
780 kfree(local);
781 }
782}
783EXPORT_SYMBOL_GPL(pnfs_put_deviceid_cache);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 1c3eb02f4944..cbba28cb02a7 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -32,7 +32,7 @@
32 32
33struct pnfs_layout_segment { 33struct pnfs_layout_segment {
34 struct list_head fi_list; 34 struct list_head fi_list;
35 u32 iomode; 35 struct pnfs_layout_range range;
36 struct kref kref; 36 struct kref kref;
37 struct pnfs_layout_hdr *layout; 37 struct pnfs_layout_hdr *layout;
38}; 38};
@@ -44,6 +44,7 @@ struct pnfs_layout_segment {
44enum { 44enum {
45 NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */ 45 NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */
46 NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ 46 NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */
47 NFS_LAYOUT_STATEID_SET, /* have a valid layout stateid */
47}; 48};
48 49
49/* Per-layout driver specific registration structure */ 50/* Per-layout driver specific registration structure */
@@ -54,26 +55,96 @@ struct pnfs_layoutdriver_type {
54 struct module *owner; 55 struct module *owner;
55 int (*initialize_mountpoint) (struct nfs_server *); 56 int (*initialize_mountpoint) (struct nfs_server *);
56 int (*uninitialize_mountpoint) (struct nfs_server *); 57 int (*uninitialize_mountpoint) (struct nfs_server *);
58 struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr);
59 void (*free_lseg) (struct pnfs_layout_segment *lseg);
57}; 60};
58 61
59struct pnfs_layout_hdr { 62struct pnfs_layout_hdr {
60 unsigned long refcount; 63 unsigned long refcount;
61 struct list_head layouts; /* other client layouts */ 64 struct list_head layouts; /* other client layouts */
62 struct list_head segs; /* layout segments list */ 65 struct list_head segs; /* layout segments list */
66 seqlock_t seqlock; /* Protects the stateid */
67 nfs4_stateid stateid;
63 unsigned long state; 68 unsigned long state;
64 struct inode *inode; 69 struct inode *inode;
65}; 70};
66 71
72struct pnfs_device {
73 struct nfs4_deviceid dev_id;
74 unsigned int layout_type;
75 unsigned int mincount;
76 struct page **pages;
77 void *area;
78 unsigned int pgbase;
79 unsigned int pglen;
80};
81
82/*
83 * Device ID RCU cache. A device ID is unique per client ID and layout type.
84 */
85#define NFS4_DEVICE_ID_HASH_BITS 5
86#define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS)
87#define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1)
88
89static inline u32
90nfs4_deviceid_hash(struct nfs4_deviceid *id)
91{
92 unsigned char *cptr = (unsigned char *)id->data;
93 unsigned int nbytes = NFS4_DEVICEID4_SIZE;
94 u32 x = 0;
95
96 while (nbytes--) {
97 x *= 37;
98 x += *cptr++;
99 }
100 return x & NFS4_DEVICE_ID_HASH_MASK;
101}
102
103struct pnfs_deviceid_node {
104 struct hlist_node de_node;
105 struct nfs4_deviceid de_id;
106 atomic_t de_ref;
107};
108
109struct pnfs_deviceid_cache {
110 spinlock_t dc_lock;
111 atomic_t dc_ref;
112 void (*dc_free_callback)(struct pnfs_deviceid_node *);
113 struct hlist_head dc_deviceids[NFS4_DEVICE_ID_HASH_SIZE];
114};
115
116extern int pnfs_alloc_init_deviceid_cache(struct nfs_client *,
117 void (*free_callback)(struct pnfs_deviceid_node *));
118extern void pnfs_put_deviceid_cache(struct nfs_client *);
119extern struct pnfs_deviceid_node *pnfs_find_get_deviceid(
120 struct pnfs_deviceid_cache *,
121 struct nfs4_deviceid *);
122extern struct pnfs_deviceid_node *pnfs_add_deviceid(
123 struct pnfs_deviceid_cache *,
124 struct pnfs_deviceid_node *);
125extern void pnfs_put_deviceid(struct pnfs_deviceid_cache *c,
126 struct pnfs_deviceid_node *devid);
127
67extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); 128extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *);
68extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); 129extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
69 130
131/* nfs4proc.c */
132extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
133 struct pnfs_device *dev);
134extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
135
136/* pnfs.c */
70struct pnfs_layout_segment * 137struct pnfs_layout_segment *
71pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, 138pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
72 enum pnfs_iomode access_type); 139 enum pnfs_iomode access_type);
73void set_pnfs_layoutdriver(struct nfs_server *, u32 id); 140void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
74void unset_pnfs_layoutdriver(struct nfs_server *); 141void unset_pnfs_layoutdriver(struct nfs_server *);
142int pnfs_layout_process(struct nfs4_layoutget *lgp);
75void pnfs_destroy_layout(struct nfs_inode *); 143void pnfs_destroy_layout(struct nfs_inode *);
76void pnfs_destroy_all_layouts(struct nfs_client *); 144void pnfs_destroy_all_layouts(struct nfs_client *);
145void put_layout_hdr(struct inode *inode);
146void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
147 struct nfs4_state *open_state);
77 148
78 149
79static inline int lo_fail_bit(u32 iomode) 150static inline int lo_fail_bit(u32 iomode)
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 34da32436ac0..a9683d6acaa4 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -545,6 +545,8 @@ enum {
545 NFSPROC4_CLNT_SEQUENCE, 545 NFSPROC4_CLNT_SEQUENCE,
546 NFSPROC4_CLNT_GET_LEASE_TIME, 546 NFSPROC4_CLNT_GET_LEASE_TIME,
547 NFSPROC4_CLNT_RECLAIM_COMPLETE, 547 NFSPROC4_CLNT_RECLAIM_COMPLETE,
548 NFSPROC4_CLNT_LAYOUTGET,
549 NFSPROC4_CLNT_GETDEVICEINFO,
548}; 550};
549 551
550/* nfs41 types */ 552/* nfs41 types */
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 4d62f1581ed1..452d96436d26 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -83,6 +83,7 @@ struct nfs_client {
83 u32 cl_exchange_flags; 83 u32 cl_exchange_flags;
84 struct nfs4_session *cl_session; /* sharred session */ 84 struct nfs4_session *cl_session; /* sharred session */
85 struct list_head cl_layouts; 85 struct list_head cl_layouts;
86 struct pnfs_deviceid_cache *cl_devid_cache; /* pNFS deviceid cache */
86#endif /* CONFIG_NFS_V4_1 */ 87#endif /* CONFIG_NFS_V4_1 */
87 88
88#ifdef CONFIG_NFS_FSCACHE 89#ifdef CONFIG_NFS_FSCACHE
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 065f9d105d05..ba6cc8f223c9 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -187,6 +187,55 @@ struct nfs4_get_lease_time_res {
187 struct nfs4_sequence_res lr_seq_res; 187 struct nfs4_sequence_res lr_seq_res;
188}; 188};
189 189
190#define PNFS_LAYOUT_MAXSIZE 4096
191
192struct nfs4_layoutdriver_data {
193 __u32 len;
194 void *buf;
195};
196
197struct pnfs_layout_range {
198 u32 iomode;
199 u64 offset;
200 u64 length;
201};
202
203struct nfs4_layoutget_args {
204 __u32 type;
205 struct pnfs_layout_range range;
206 __u64 minlength;
207 __u32 maxcount;
208 struct inode *inode;
209 struct nfs_open_context *ctx;
210 struct nfs4_sequence_args seq_args;
211};
212
213struct nfs4_layoutget_res {
214 __u32 return_on_close;
215 struct pnfs_layout_range range;
216 __u32 type;
217 nfs4_stateid stateid;
218 struct nfs4_layoutdriver_data layout;
219 struct nfs4_sequence_res seq_res;
220};
221
222struct nfs4_layoutget {
223 struct nfs4_layoutget_args args;
224 struct nfs4_layoutget_res res;
225 struct pnfs_layout_segment **lsegpp;
226 int status;
227};
228
229struct nfs4_getdeviceinfo_args {
230 struct pnfs_device *pdev;
231 struct nfs4_sequence_args seq_args;
232};
233
234struct nfs4_getdeviceinfo_res {
235 struct pnfs_device *pdev;
236 struct nfs4_sequence_res seq_res;
237};
238
190/* 239/*
191 * Arguments to the open call. 240 * Arguments to the open call.
192 */ 241 */