diff options
-rw-r--r-- | fs/nfs/nfs4proc.c | 142 | ||||
-rw-r--r-- | fs/nfs/nfs4xdr.c | 302 | ||||
-rw-r--r-- | fs/nfs/pnfs.c | 385 | ||||
-rw-r--r-- | fs/nfs/pnfs.h | 73 | ||||
-rw-r--r-- | include/linux/nfs4.h | 2 | ||||
-rw-r--r-- | include/linux/nfs_fs_sb.h | 1 | ||||
-rw-r--r-- | include/linux/nfs_xdr.h | 49 |
7 files changed, 921 insertions, 33 deletions
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a5f1edb45b47..7e14e991ddfa 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -55,6 +55,7 @@ | |||
55 | #include "internal.h" | 55 | #include "internal.h" |
56 | #include "iostat.h" | 56 | #include "iostat.h" |
57 | #include "callback.h" | 57 | #include "callback.h" |
58 | #include "pnfs.h" | ||
58 | 59 | ||
59 | #define NFSDBG_FACILITY NFSDBG_PROC | 60 | #define NFSDBG_FACILITY NFSDBG_PROC |
60 | 61 | ||
@@ -5256,6 +5257,147 @@ out: | |||
5256 | dprintk("<-- %s status=%d\n", __func__, status); | 5257 | dprintk("<-- %s status=%d\n", __func__, status); |
5257 | return status; | 5258 | return status; |
5258 | } | 5259 | } |
5260 | |||
5261 | static void | ||
5262 | nfs4_layoutget_prepare(struct rpc_task *task, void *calldata) | ||
5263 | { | ||
5264 | struct nfs4_layoutget *lgp = calldata; | ||
5265 | struct inode *ino = lgp->args.inode; | ||
5266 | struct nfs_server *server = NFS_SERVER(ino); | ||
5267 | |||
5268 | dprintk("--> %s\n", __func__); | ||
5269 | if (nfs4_setup_sequence(server, &lgp->args.seq_args, | ||
5270 | &lgp->res.seq_res, 0, task)) | ||
5271 | return; | ||
5272 | rpc_call_start(task); | ||
5273 | } | ||
5274 | |||
5275 | static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) | ||
5276 | { | ||
5277 | struct nfs4_layoutget *lgp = calldata; | ||
5278 | struct nfs_server *server = NFS_SERVER(lgp->args.inode); | ||
5279 | |||
5280 | dprintk("--> %s\n", __func__); | ||
5281 | |||
5282 | if (!nfs4_sequence_done(task, &lgp->res.seq_res)) | ||
5283 | return; | ||
5284 | |||
5285 | switch (task->tk_status) { | ||
5286 | case 0: | ||
5287 | break; | ||
5288 | case -NFS4ERR_LAYOUTTRYLATER: | ||
5289 | case -NFS4ERR_RECALLCONFLICT: | ||
5290 | task->tk_status = -NFS4ERR_DELAY; | ||
5291 | /* Fall through */ | ||
5292 | default: | ||
5293 | if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { | ||
5294 | rpc_restart_call_prepare(task); | ||
5295 | return; | ||
5296 | } | ||
5297 | } | ||
5298 | lgp->status = task->tk_status; | ||
5299 | dprintk("<-- %s\n", __func__); | ||
5300 | } | ||
5301 | |||
5302 | static void nfs4_layoutget_release(void *calldata) | ||
5303 | { | ||
5304 | struct nfs4_layoutget *lgp = calldata; | ||
5305 | |||
5306 | dprintk("--> %s\n", __func__); | ||
5307 | put_layout_hdr(lgp->args.inode); | ||
5308 | if (lgp->res.layout.buf != NULL) | ||
5309 | free_page((unsigned long) lgp->res.layout.buf); | ||
5310 | put_nfs_open_context(lgp->args.ctx); | ||
5311 | kfree(calldata); | ||
5312 | dprintk("<-- %s\n", __func__); | ||
5313 | } | ||
5314 | |||
5315 | static const struct rpc_call_ops nfs4_layoutget_call_ops = { | ||
5316 | .rpc_call_prepare = nfs4_layoutget_prepare, | ||
5317 | .rpc_call_done = nfs4_layoutget_done, | ||
5318 | .rpc_release = nfs4_layoutget_release, | ||
5319 | }; | ||
5320 | |||
5321 | int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) | ||
5322 | { | ||
5323 | struct nfs_server *server = NFS_SERVER(lgp->args.inode); | ||
5324 | struct rpc_task *task; | ||
5325 | struct rpc_message msg = { | ||
5326 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET], | ||
5327 | .rpc_argp = &lgp->args, | ||
5328 | .rpc_resp = &lgp->res, | ||
5329 | }; | ||
5330 | struct rpc_task_setup task_setup_data = { | ||
5331 | .rpc_client = server->client, | ||
5332 | .rpc_message = &msg, | ||
5333 | .callback_ops = &nfs4_layoutget_call_ops, | ||
5334 | .callback_data = lgp, | ||
5335 | .flags = RPC_TASK_ASYNC, | ||
5336 | }; | ||
5337 | int status = 0; | ||
5338 | |||
5339 | dprintk("--> %s\n", __func__); | ||
5340 | |||
5341 | lgp->res.layout.buf = (void *)__get_free_page(GFP_NOFS); | ||
5342 | if (lgp->res.layout.buf == NULL) { | ||
5343 | nfs4_layoutget_release(lgp); | ||
5344 | return -ENOMEM; | ||
5345 | } | ||
5346 | |||
5347 | lgp->res.seq_res.sr_slot = NULL; | ||
5348 | task = rpc_run_task(&task_setup_data); | ||
5349 | if (IS_ERR(task)) | ||
5350 | return PTR_ERR(task); | ||
5351 | status = nfs4_wait_for_completion_rpc_task(task); | ||
5352 | if (status != 0) | ||
5353 | goto out; | ||
5354 | status = lgp->status; | ||
5355 | if (status != 0) | ||
5356 | goto out; | ||
5357 | status = pnfs_layout_process(lgp); | ||
5358 | out: | ||
5359 | rpc_put_task(task); | ||
5360 | dprintk("<-- %s status=%d\n", __func__, status); | ||
5361 | return status; | ||
5362 | } | ||
5363 | |||
5364 | static int | ||
5365 | _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) | ||
5366 | { | ||
5367 | struct nfs4_getdeviceinfo_args args = { | ||
5368 | .pdev = pdev, | ||
5369 | }; | ||
5370 | struct nfs4_getdeviceinfo_res res = { | ||
5371 | .pdev = pdev, | ||
5372 | }; | ||
5373 | struct rpc_message msg = { | ||
5374 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICEINFO], | ||
5375 | .rpc_argp = &args, | ||
5376 | .rpc_resp = &res, | ||
5377 | }; | ||
5378 | int status; | ||
5379 | |||
5380 | dprintk("--> %s\n", __func__); | ||
5381 | status = nfs4_call_sync(server, &msg, &args, &res, 0); | ||
5382 | dprintk("<-- %s status=%d\n", __func__, status); | ||
5383 | |||
5384 | return status; | ||
5385 | } | ||
5386 | |||
5387 | int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) | ||
5388 | { | ||
5389 | struct nfs4_exception exception = { }; | ||
5390 | int err; | ||
5391 | |||
5392 | do { | ||
5393 | err = nfs4_handle_exception(server, | ||
5394 | _nfs4_proc_getdeviceinfo(server, pdev), | ||
5395 | &exception); | ||
5396 | } while (exception.retry); | ||
5397 | return err; | ||
5398 | } | ||
5399 | EXPORT_SYMBOL_GPL(nfs4_proc_getdeviceinfo); | ||
5400 | |||
5259 | #endif /* CONFIG_NFS_V4_1 */ | 5401 | #endif /* CONFIG_NFS_V4_1 */ |
5260 | 5402 | ||
5261 | struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { | 5403 | struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 8b4dfa393f0f..f313c4cce7e4 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -52,6 +52,7 @@ | |||
52 | #include <linux/nfs_idmap.h> | 52 | #include <linux/nfs_idmap.h> |
53 | #include "nfs4_fs.h" | 53 | #include "nfs4_fs.h" |
54 | #include "internal.h" | 54 | #include "internal.h" |
55 | #include "pnfs.h" | ||
55 | 56 | ||
56 | #define NFSDBG_FACILITY NFSDBG_XDR | 57 | #define NFSDBG_FACILITY NFSDBG_XDR |
57 | 58 | ||
@@ -310,6 +311,19 @@ static int nfs4_stat_to_errno(int); | |||
310 | XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) | 311 | XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) |
311 | #define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4) | 312 | #define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4) |
312 | #define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4) | 313 | #define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4) |
314 | #define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 4 + \ | ||
315 | XDR_QUADLEN(NFS4_DEVICEID4_SIZE)) | ||
316 | #define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \ | ||
317 | 1 /* layout type */ + \ | ||
318 | 1 /* opaque devaddr4 length */ + \ | ||
319 | /* devaddr4 payload is read into page */ \ | ||
320 | 1 /* notification bitmap length */ + \ | ||
321 | 1 /* notification bitmap */) | ||
322 | #define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \ | ||
323 | encode_stateid_maxsz) | ||
324 | #define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \ | ||
325 | decode_stateid_maxsz + \ | ||
326 | XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE)) | ||
313 | #else /* CONFIG_NFS_V4_1 */ | 327 | #else /* CONFIG_NFS_V4_1 */ |
314 | #define encode_sequence_maxsz 0 | 328 | #define encode_sequence_maxsz 0 |
315 | #define decode_sequence_maxsz 0 | 329 | #define decode_sequence_maxsz 0 |
@@ -699,6 +713,20 @@ static int nfs4_stat_to_errno(int); | |||
699 | #define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \ | 713 | #define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \ |
700 | decode_sequence_maxsz + \ | 714 | decode_sequence_maxsz + \ |
701 | decode_reclaim_complete_maxsz) | 715 | decode_reclaim_complete_maxsz) |
716 | #define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz + \ | ||
717 | encode_sequence_maxsz +\ | ||
718 | encode_getdeviceinfo_maxsz) | ||
719 | #define NFS4_dec_getdeviceinfo_sz (compound_decode_hdr_maxsz + \ | ||
720 | decode_sequence_maxsz + \ | ||
721 | decode_getdeviceinfo_maxsz) | ||
722 | #define NFS4_enc_layoutget_sz (compound_encode_hdr_maxsz + \ | ||
723 | encode_sequence_maxsz + \ | ||
724 | encode_putfh_maxsz + \ | ||
725 | encode_layoutget_maxsz) | ||
726 | #define NFS4_dec_layoutget_sz (compound_decode_hdr_maxsz + \ | ||
727 | decode_sequence_maxsz + \ | ||
728 | decode_putfh_maxsz + \ | ||
729 | decode_layoutget_maxsz) | ||
702 | 730 | ||
703 | const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + | 731 | const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + |
704 | compound_encode_hdr_maxsz + | 732 | compound_encode_hdr_maxsz + |
@@ -1737,6 +1765,58 @@ static void encode_sequence(struct xdr_stream *xdr, | |||
1737 | #endif /* CONFIG_NFS_V4_1 */ | 1765 | #endif /* CONFIG_NFS_V4_1 */ |
1738 | } | 1766 | } |
1739 | 1767 | ||
1768 | #ifdef CONFIG_NFS_V4_1 | ||
1769 | static void | ||
1770 | encode_getdeviceinfo(struct xdr_stream *xdr, | ||
1771 | const struct nfs4_getdeviceinfo_args *args, | ||
1772 | struct compound_hdr *hdr) | ||
1773 | { | ||
1774 | __be32 *p; | ||
1775 | |||
1776 | p = reserve_space(xdr, 16 + NFS4_DEVICEID4_SIZE); | ||
1777 | *p++ = cpu_to_be32(OP_GETDEVICEINFO); | ||
1778 | p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data, | ||
1779 | NFS4_DEVICEID4_SIZE); | ||
1780 | *p++ = cpu_to_be32(args->pdev->layout_type); | ||
1781 | *p++ = cpu_to_be32(args->pdev->pglen); /* gdia_maxcount */ | ||
1782 | *p++ = cpu_to_be32(0); /* bitmap length 0 */ | ||
1783 | hdr->nops++; | ||
1784 | hdr->replen += decode_getdeviceinfo_maxsz; | ||
1785 | } | ||
1786 | |||
1787 | static void | ||
1788 | encode_layoutget(struct xdr_stream *xdr, | ||
1789 | const struct nfs4_layoutget_args *args, | ||
1790 | struct compound_hdr *hdr) | ||
1791 | { | ||
1792 | nfs4_stateid stateid; | ||
1793 | __be32 *p; | ||
1794 | |||
1795 | p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE); | ||
1796 | *p++ = cpu_to_be32(OP_LAYOUTGET); | ||
1797 | *p++ = cpu_to_be32(0); /* Signal layout available */ | ||
1798 | *p++ = cpu_to_be32(args->type); | ||
1799 | *p++ = cpu_to_be32(args->range.iomode); | ||
1800 | p = xdr_encode_hyper(p, args->range.offset); | ||
1801 | p = xdr_encode_hyper(p, args->range.length); | ||
1802 | p = xdr_encode_hyper(p, args->minlength); | ||
1803 | pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout, | ||
1804 | args->ctx->state); | ||
1805 | p = xdr_encode_opaque_fixed(p, &stateid.data, NFS4_STATEID_SIZE); | ||
1806 | *p = cpu_to_be32(args->maxcount); | ||
1807 | |||
1808 | dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n", | ||
1809 | __func__, | ||
1810 | args->type, | ||
1811 | args->range.iomode, | ||
1812 | (unsigned long)args->range.offset, | ||
1813 | (unsigned long)args->range.length, | ||
1814 | args->maxcount); | ||
1815 | hdr->nops++; | ||
1816 | hdr->replen += decode_layoutget_maxsz; | ||
1817 | } | ||
1818 | #endif /* CONFIG_NFS_V4_1 */ | ||
1819 | |||
1740 | /* | 1820 | /* |
1741 | * END OF "GENERIC" ENCODE ROUTINES. | 1821 | * END OF "GENERIC" ENCODE ROUTINES. |
1742 | */ | 1822 | */ |
@@ -2554,6 +2634,51 @@ static int nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req, uint32_t *p, | |||
2554 | return 0; | 2634 | return 0; |
2555 | } | 2635 | } |
2556 | 2636 | ||
2637 | /* | ||
2638 | * Encode GETDEVICEINFO request | ||
2639 | */ | ||
2640 | static int nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, uint32_t *p, | ||
2641 | struct nfs4_getdeviceinfo_args *args) | ||
2642 | { | ||
2643 | struct xdr_stream xdr; | ||
2644 | struct compound_hdr hdr = { | ||
2645 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), | ||
2646 | }; | ||
2647 | |||
2648 | xdr_init_encode(&xdr, &req->rq_snd_buf, p); | ||
2649 | encode_compound_hdr(&xdr, req, &hdr); | ||
2650 | encode_sequence(&xdr, &args->seq_args, &hdr); | ||
2651 | encode_getdeviceinfo(&xdr, args, &hdr); | ||
2652 | |||
2653 | /* set up reply kvec. Subtract notification bitmap max size (2) | ||
2654 | * so that notification bitmap is put in xdr_buf tail */ | ||
2655 | xdr_inline_pages(&req->rq_rcv_buf, (hdr.replen - 2) << 2, | ||
2656 | args->pdev->pages, args->pdev->pgbase, | ||
2657 | args->pdev->pglen); | ||
2658 | |||
2659 | encode_nops(&hdr); | ||
2660 | return 0; | ||
2661 | } | ||
2662 | |||
2663 | /* | ||
2664 | * Encode LAYOUTGET request | ||
2665 | */ | ||
2666 | static int nfs4_xdr_enc_layoutget(struct rpc_rqst *req, uint32_t *p, | ||
2667 | struct nfs4_layoutget_args *args) | ||
2668 | { | ||
2669 | struct xdr_stream xdr; | ||
2670 | struct compound_hdr hdr = { | ||
2671 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), | ||
2672 | }; | ||
2673 | |||
2674 | xdr_init_encode(&xdr, &req->rq_snd_buf, p); | ||
2675 | encode_compound_hdr(&xdr, req, &hdr); | ||
2676 | encode_sequence(&xdr, &args->seq_args, &hdr); | ||
2677 | encode_putfh(&xdr, NFS_FH(args->inode), &hdr); | ||
2678 | encode_layoutget(&xdr, args, &hdr); | ||
2679 | encode_nops(&hdr); | ||
2680 | return 0; | ||
2681 | } | ||
2557 | #endif /* CONFIG_NFS_V4_1 */ | 2682 | #endif /* CONFIG_NFS_V4_1 */ |
2558 | 2683 | ||
2559 | static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) | 2684 | static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) |
@@ -4830,6 +4955,134 @@ out_overflow: | |||
4830 | #endif /* CONFIG_NFS_V4_1 */ | 4955 | #endif /* CONFIG_NFS_V4_1 */ |
4831 | } | 4956 | } |
4832 | 4957 | ||
4958 | #if defined(CONFIG_NFS_V4_1) | ||
4959 | |||
4960 | static int decode_getdeviceinfo(struct xdr_stream *xdr, | ||
4961 | struct pnfs_device *pdev) | ||
4962 | { | ||
4963 | __be32 *p; | ||
4964 | uint32_t len, type; | ||
4965 | int status; | ||
4966 | |||
4967 | status = decode_op_hdr(xdr, OP_GETDEVICEINFO); | ||
4968 | if (status) { | ||
4969 | if (status == -ETOOSMALL) { | ||
4970 | p = xdr_inline_decode(xdr, 4); | ||
4971 | if (unlikely(!p)) | ||
4972 | goto out_overflow; | ||
4973 | pdev->mincount = be32_to_cpup(p); | ||
4974 | dprintk("%s: Min count too small. mincnt = %u\n", | ||
4975 | __func__, pdev->mincount); | ||
4976 | } | ||
4977 | return status; | ||
4978 | } | ||
4979 | |||
4980 | p = xdr_inline_decode(xdr, 8); | ||
4981 | if (unlikely(!p)) | ||
4982 | goto out_overflow; | ||
4983 | type = be32_to_cpup(p++); | ||
4984 | if (type != pdev->layout_type) { | ||
4985 | dprintk("%s: layout mismatch req: %u pdev: %u\n", | ||
4986 | __func__, pdev->layout_type, type); | ||
4987 | return -EINVAL; | ||
4988 | } | ||
4989 | /* | ||
4990 | * Get the length of the opaque device_addr4. xdr_read_pages places | ||
4991 | * the opaque device_addr4 in the xdr_buf->pages (pnfs_device->pages) | ||
4992 | * and places the remaining xdr data in xdr_buf->tail | ||
4993 | */ | ||
4994 | pdev->mincount = be32_to_cpup(p); | ||
4995 | xdr_read_pages(xdr, pdev->mincount); /* include space for the length */ | ||
4996 | |||
4997 | /* Parse notification bitmap, verifying that it is zero. */ | ||
4998 | p = xdr_inline_decode(xdr, 4); | ||
4999 | if (unlikely(!p)) | ||
5000 | goto out_overflow; | ||
5001 | len = be32_to_cpup(p); | ||
5002 | if (len) { | ||
5003 | int i; | ||
5004 | |||
5005 | p = xdr_inline_decode(xdr, 4 * len); | ||
5006 | if (unlikely(!p)) | ||
5007 | goto out_overflow; | ||
5008 | for (i = 0; i < len; i++, p++) { | ||
5009 | if (be32_to_cpup(p)) { | ||
5010 | dprintk("%s: notifications not supported\n", | ||
5011 | __func__); | ||
5012 | return -EIO; | ||
5013 | } | ||
5014 | } | ||
5015 | } | ||
5016 | return 0; | ||
5017 | out_overflow: | ||
5018 | print_overflow_msg(__func__, xdr); | ||
5019 | return -EIO; | ||
5020 | } | ||
5021 | |||
5022 | static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, | ||
5023 | struct nfs4_layoutget_res *res) | ||
5024 | { | ||
5025 | __be32 *p; | ||
5026 | int status; | ||
5027 | u32 layout_count; | ||
5028 | |||
5029 | status = decode_op_hdr(xdr, OP_LAYOUTGET); | ||
5030 | if (status) | ||
5031 | return status; | ||
5032 | p = xdr_inline_decode(xdr, 8 + NFS4_STATEID_SIZE); | ||
5033 | if (unlikely(!p)) | ||
5034 | goto out_overflow; | ||
5035 | res->return_on_close = be32_to_cpup(p++); | ||
5036 | p = xdr_decode_opaque_fixed(p, res->stateid.data, NFS4_STATEID_SIZE); | ||
5037 | layout_count = be32_to_cpup(p); | ||
5038 | if (!layout_count) { | ||
5039 | dprintk("%s: server responded with empty layout array\n", | ||
5040 | __func__); | ||
5041 | return -EINVAL; | ||
5042 | } | ||
5043 | |||
5044 | p = xdr_inline_decode(xdr, 24); | ||
5045 | if (unlikely(!p)) | ||
5046 | goto out_overflow; | ||
5047 | p = xdr_decode_hyper(p, &res->range.offset); | ||
5048 | p = xdr_decode_hyper(p, &res->range.length); | ||
5049 | res->range.iomode = be32_to_cpup(p++); | ||
5050 | res->type = be32_to_cpup(p++); | ||
5051 | |||
5052 | status = decode_opaque_inline(xdr, &res->layout.len, (char **)&p); | ||
5053 | if (unlikely(status)) | ||
5054 | return status; | ||
5055 | |||
5056 | dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n", | ||
5057 | __func__, | ||
5058 | (unsigned long)res->range.offset, | ||
5059 | (unsigned long)res->range.length, | ||
5060 | res->range.iomode, | ||
5061 | res->type, | ||
5062 | res->layout.len); | ||
5063 | |||
5064 | /* nfs4_proc_layoutget allocated a single page */ | ||
5065 | if (res->layout.len > PAGE_SIZE) | ||
5066 | return -ENOMEM; | ||
5067 | memcpy(res->layout.buf, p, res->layout.len); | ||
5068 | |||
5069 | if (layout_count > 1) { | ||
5070 | /* We only handle a length one array at the moment. Any | ||
5071 | * further entries are just ignored. Note that this means | ||
5072 | * the client may see a response that is less than the | ||
5073 | * minimum it requested. | ||
5074 | */ | ||
5075 | dprintk("%s: server responded with %d layouts, dropping tail\n", | ||
5076 | __func__, layout_count); | ||
5077 | } | ||
5078 | |||
5079 | return 0; | ||
5080 | out_overflow: | ||
5081 | print_overflow_msg(__func__, xdr); | ||
5082 | return -EIO; | ||
5083 | } | ||
5084 | #endif /* CONFIG_NFS_V4_1 */ | ||
5085 | |||
4833 | /* | 5086 | /* |
4834 | * END OF "GENERIC" DECODE ROUTINES. | 5087 | * END OF "GENERIC" DECODE ROUTINES. |
4835 | */ | 5088 | */ |
@@ -5857,6 +6110,53 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, uint32_t *p, | |||
5857 | status = decode_reclaim_complete(&xdr, (void *)NULL); | 6110 | status = decode_reclaim_complete(&xdr, (void *)NULL); |
5858 | return status; | 6111 | return status; |
5859 | } | 6112 | } |
6113 | |||
6114 | /* | ||
6115 | * Decode GETDEVINFO response | ||
6116 | */ | ||
6117 | static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, uint32_t *p, | ||
6118 | struct nfs4_getdeviceinfo_res *res) | ||
6119 | { | ||
6120 | struct xdr_stream xdr; | ||
6121 | struct compound_hdr hdr; | ||
6122 | int status; | ||
6123 | |||
6124 | xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); | ||
6125 | status = decode_compound_hdr(&xdr, &hdr); | ||
6126 | if (status != 0) | ||
6127 | goto out; | ||
6128 | status = decode_sequence(&xdr, &res->seq_res, rqstp); | ||
6129 | if (status != 0) | ||
6130 | goto out; | ||
6131 | status = decode_getdeviceinfo(&xdr, res->pdev); | ||
6132 | out: | ||
6133 | return status; | ||
6134 | } | ||
6135 | |||
6136 | /* | ||
6137 | * Decode LAYOUTGET response | ||
6138 | */ | ||
6139 | static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp, uint32_t *p, | ||
6140 | struct nfs4_layoutget_res *res) | ||
6141 | { | ||
6142 | struct xdr_stream xdr; | ||
6143 | struct compound_hdr hdr; | ||
6144 | int status; | ||
6145 | |||
6146 | xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); | ||
6147 | status = decode_compound_hdr(&xdr, &hdr); | ||
6148 | if (status) | ||
6149 | goto out; | ||
6150 | status = decode_sequence(&xdr, &res->seq_res, rqstp); | ||
6151 | if (status) | ||
6152 | goto out; | ||
6153 | status = decode_putfh(&xdr); | ||
6154 | if (status) | ||
6155 | goto out; | ||
6156 | status = decode_layoutget(&xdr, rqstp, res); | ||
6157 | out: | ||
6158 | return status; | ||
6159 | } | ||
5860 | #endif /* CONFIG_NFS_V4_1 */ | 6160 | #endif /* CONFIG_NFS_V4_1 */ |
5861 | 6161 | ||
5862 | __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, | 6162 | __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, |
@@ -6048,6 +6348,8 @@ struct rpc_procinfo nfs4_procedures[] = { | |||
6048 | PROC(SEQUENCE, enc_sequence, dec_sequence), | 6348 | PROC(SEQUENCE, enc_sequence, dec_sequence), |
6049 | PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), | 6349 | PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), |
6050 | PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), | 6350 | PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), |
6351 | PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), | ||
6352 | PROC(LAYOUTGET, enc_layoutget, dec_layoutget), | ||
6051 | #endif /* CONFIG_NFS_V4_1 */ | 6353 | #endif /* CONFIG_NFS_V4_1 */ |
6052 | }; | 6354 | }; |
6053 | 6355 | ||
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 891a0c36f992..d1ad7df3479e 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -140,6 +140,11 @@ pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type) | |||
140 | printk(KERN_ERR "%s id 0 is reserved\n", __func__); | 140 | printk(KERN_ERR "%s id 0 is reserved\n", __func__); |
141 | return status; | 141 | return status; |
142 | } | 142 | } |
143 | if (!ld_type->alloc_lseg || !ld_type->free_lseg) { | ||
144 | printk(KERN_ERR "%s Layout driver must provide " | ||
145 | "alloc_lseg and free_lseg.\n", __func__); | ||
146 | return status; | ||
147 | } | ||
143 | 148 | ||
144 | spin_lock(&pnfs_spinlock); | 149 | spin_lock(&pnfs_spinlock); |
145 | tmp = find_pnfs_driver_locked(ld_type->id); | 150 | tmp = find_pnfs_driver_locked(ld_type->id); |
@@ -168,6 +173,10 @@ pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type) | |||
168 | } | 173 | } |
169 | EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); | 174 | EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); |
170 | 175 | ||
176 | /* | ||
177 | * pNFS client layout cache | ||
178 | */ | ||
179 | |||
171 | static void | 180 | static void |
172 | get_layout_hdr_locked(struct pnfs_layout_hdr *lo) | 181 | get_layout_hdr_locked(struct pnfs_layout_hdr *lo) |
173 | { | 182 | { |
@@ -190,7 +199,7 @@ put_layout_hdr_locked(struct pnfs_layout_hdr *lo) | |||
190 | } | 199 | } |
191 | } | 200 | } |
192 | 201 | ||
193 | static void | 202 | void |
194 | put_layout_hdr(struct inode *inode) | 203 | put_layout_hdr(struct inode *inode) |
195 | { | 204 | { |
196 | spin_lock(&inode->i_lock); | 205 | spin_lock(&inode->i_lock); |
@@ -215,7 +224,7 @@ destroy_lseg(struct kref *kref) | |||
215 | struct inode *ino = lseg->layout->inode; | 224 | struct inode *ino = lseg->layout->inode; |
216 | 225 | ||
217 | dprintk("--> %s\n", __func__); | 226 | dprintk("--> %s\n", __func__); |
218 | kfree(lseg); | 227 | NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); |
219 | /* Matched by get_layout_hdr_locked in pnfs_insert_layout */ | 228 | /* Matched by get_layout_hdr_locked in pnfs_insert_layout */ |
220 | put_layout_hdr(ino); | 229 | put_layout_hdr(ino); |
221 | } | 230 | } |
@@ -249,6 +258,9 @@ pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list) | |||
249 | /* List does not take a reference, so no need for put here */ | 258 | /* List does not take a reference, so no need for put here */ |
250 | list_del_init(&lo->layouts); | 259 | list_del_init(&lo->layouts); |
251 | spin_unlock(&clp->cl_lock); | 260 | spin_unlock(&clp->cl_lock); |
261 | write_seqlock(&lo->seqlock); | ||
262 | clear_bit(NFS_LAYOUT_STATEID_SET, &lo->state); | ||
263 | write_sequnlock(&lo->seqlock); | ||
252 | 264 | ||
253 | dprintk("%s:Return\n", __func__); | 265 | dprintk("%s:Return\n", __func__); |
254 | } | 266 | } |
@@ -307,40 +319,135 @@ pnfs_destroy_all_layouts(struct nfs_client *clp) | |||
307 | } | 319 | } |
308 | } | 320 | } |
309 | 321 | ||
310 | static void pnfs_insert_layout(struct pnfs_layout_hdr *lo, | 322 | /* update lo->stateid with new if is more recent |
311 | struct pnfs_layout_segment *lseg); | 323 | * |
324 | * lo->stateid could be the open stateid, in which case we just use what given. | ||
325 | */ | ||
326 | static void | ||
327 | pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, | ||
328 | const nfs4_stateid *new) | ||
329 | { | ||
330 | nfs4_stateid *old = &lo->stateid; | ||
331 | bool overwrite = false; | ||
332 | |||
333 | write_seqlock(&lo->seqlock); | ||
334 | if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state) || | ||
335 | memcmp(old->stateid.other, new->stateid.other, sizeof(new->stateid.other))) | ||
336 | overwrite = true; | ||
337 | else { | ||
338 | u32 oldseq, newseq; | ||
339 | |||
340 | oldseq = be32_to_cpu(old->stateid.seqid); | ||
341 | newseq = be32_to_cpu(new->stateid.seqid); | ||
342 | if ((int)(newseq - oldseq) > 0) | ||
343 | overwrite = true; | ||
344 | } | ||
345 | if (overwrite) | ||
346 | memcpy(&old->stateid, &new->stateid, sizeof(new->stateid)); | ||
347 | write_sequnlock(&lo->seqlock); | ||
348 | } | ||
349 | |||
350 | static void | ||
351 | pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo, | ||
352 | struct nfs4_state *state) | ||
353 | { | ||
354 | int seq; | ||
355 | |||
356 | dprintk("--> %s\n", __func__); | ||
357 | write_seqlock(&lo->seqlock); | ||
358 | do { | ||
359 | seq = read_seqbegin(&state->seqlock); | ||
360 | memcpy(lo->stateid.data, state->stateid.data, | ||
361 | sizeof(state->stateid.data)); | ||
362 | } while (read_seqretry(&state->seqlock, seq)); | ||
363 | set_bit(NFS_LAYOUT_STATEID_SET, &lo->state); | ||
364 | write_sequnlock(&lo->seqlock); | ||
365 | dprintk("<-- %s\n", __func__); | ||
366 | } | ||
367 | |||
368 | void | ||
369 | pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, | ||
370 | struct nfs4_state *open_state) | ||
371 | { | ||
372 | int seq; | ||
312 | 373 | ||
313 | /* Get layout from server. */ | 374 | dprintk("--> %s\n", __func__); |
375 | do { | ||
376 | seq = read_seqbegin(&lo->seqlock); | ||
377 | if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state)) { | ||
378 | /* This will trigger retry of the read */ | ||
379 | pnfs_layout_from_open_stateid(lo, open_state); | ||
380 | } else | ||
381 | memcpy(dst->data, lo->stateid.data, | ||
382 | sizeof(lo->stateid.data)); | ||
383 | } while (read_seqretry(&lo->seqlock, seq)); | ||
384 | dprintk("<-- %s\n", __func__); | ||
385 | } | ||
386 | |||
387 | /* | ||
388 | * Get layout from server. | ||
389 | * for now, assume that whole file layouts are requested. | ||
390 | * arg->offset: 0 | ||
391 | * arg->length: all ones | ||
392 | */ | ||
314 | static struct pnfs_layout_segment * | 393 | static struct pnfs_layout_segment * |
315 | send_layoutget(struct pnfs_layout_hdr *lo, | 394 | send_layoutget(struct pnfs_layout_hdr *lo, |
316 | struct nfs_open_context *ctx, | 395 | struct nfs_open_context *ctx, |
317 | u32 iomode) | 396 | u32 iomode) |
318 | { | 397 | { |
319 | struct inode *ino = lo->inode; | 398 | struct inode *ino = lo->inode; |
320 | struct pnfs_layout_segment *lseg; | 399 | struct nfs_server *server = NFS_SERVER(ino); |
400 | struct nfs4_layoutget *lgp; | ||
401 | struct pnfs_layout_segment *lseg = NULL; | ||
402 | |||
403 | dprintk("--> %s\n", __func__); | ||
321 | 404 | ||
322 | /* Lets pretend we sent LAYOUTGET and got a response */ | 405 | BUG_ON(ctx == NULL); |
323 | lseg = kzalloc(sizeof(*lseg), GFP_KERNEL); | 406 | lgp = kzalloc(sizeof(*lgp), GFP_KERNEL); |
407 | if (lgp == NULL) { | ||
408 | put_layout_hdr(lo->inode); | ||
409 | return NULL; | ||
410 | } | ||
411 | lgp->args.minlength = NFS4_MAX_UINT64; | ||
412 | lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; | ||
413 | lgp->args.range.iomode = iomode; | ||
414 | lgp->args.range.offset = 0; | ||
415 | lgp->args.range.length = NFS4_MAX_UINT64; | ||
416 | lgp->args.type = server->pnfs_curr_ld->id; | ||
417 | lgp->args.inode = ino; | ||
418 | lgp->args.ctx = get_nfs_open_context(ctx); | ||
419 | lgp->lsegpp = &lseg; | ||
420 | |||
421 | /* Synchronously retrieve layout information from server and | ||
422 | * store in lseg. | ||
423 | */ | ||
424 | nfs4_proc_layoutget(lgp); | ||
324 | if (!lseg) { | 425 | if (!lseg) { |
426 | /* remember that LAYOUTGET failed and suspend trying */ | ||
325 | set_bit(lo_fail_bit(iomode), &lo->state); | 427 | set_bit(lo_fail_bit(iomode), &lo->state); |
326 | spin_lock(&ino->i_lock); | ||
327 | put_layout_hdr_locked(lo); | ||
328 | spin_unlock(&ino->i_lock); | ||
329 | return NULL; | ||
330 | } | 428 | } |
331 | init_lseg(lo, lseg); | ||
332 | lseg->iomode = IOMODE_RW; | ||
333 | spin_lock(&ino->i_lock); | ||
334 | pnfs_insert_layout(lo, lseg); | ||
335 | put_layout_hdr_locked(lo); | ||
336 | spin_unlock(&ino->i_lock); | ||
337 | return lseg; | 429 | return lseg; |
338 | } | 430 | } |
339 | 431 | ||
432 | /* | ||
433 | * Compare two layout segments for sorting into layout cache. | ||
434 | * We want to preferentially return RW over RO layouts, so ensure those | ||
435 | * are seen first. | ||
436 | */ | ||
437 | static s64 | ||
438 | cmp_layout(u32 iomode1, u32 iomode2) | ||
439 | { | ||
440 | /* read > read/write */ | ||
441 | return (int)(iomode2 == IOMODE_READ) - (int)(iomode1 == IOMODE_READ); | ||
442 | } | ||
443 | |||
340 | static void | 444 | static void |
341 | pnfs_insert_layout(struct pnfs_layout_hdr *lo, | 445 | pnfs_insert_layout(struct pnfs_layout_hdr *lo, |
342 | struct pnfs_layout_segment *lseg) | 446 | struct pnfs_layout_segment *lseg) |
343 | { | 447 | { |
448 | struct pnfs_layout_segment *lp; | ||
449 | int found = 0; | ||
450 | |||
344 | dprintk("%s:Begin\n", __func__); | 451 | dprintk("%s:Begin\n", __func__); |
345 | 452 | ||
346 | assert_spin_locked(&lo->inode->i_lock); | 453 | assert_spin_locked(&lo->inode->i_lock); |
@@ -352,19 +459,28 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo, | |||
352 | list_add_tail(&lo->layouts, &clp->cl_layouts); | 459 | list_add_tail(&lo->layouts, &clp->cl_layouts); |
353 | spin_unlock(&clp->cl_lock); | 460 | spin_unlock(&clp->cl_lock); |
354 | } | 461 | } |
355 | get_layout_hdr_locked(lo); | 462 | list_for_each_entry(lp, &lo->segs, fi_list) { |
356 | /* STUB - add the constructed lseg if necessary */ | 463 | if (cmp_layout(lp->range.iomode, lseg->range.iomode) > 0) |
357 | if (list_empty(&lo->segs)) { | 464 | continue; |
465 | list_add_tail(&lseg->fi_list, &lp->fi_list); | ||
466 | dprintk("%s: inserted lseg %p " | ||
467 | "iomode %d offset %llu length %llu before " | ||
468 | "lp %p iomode %d offset %llu length %llu\n", | ||
469 | __func__, lseg, lseg->range.iomode, | ||
470 | lseg->range.offset, lseg->range.length, | ||
471 | lp, lp->range.iomode, lp->range.offset, | ||
472 | lp->range.length); | ||
473 | found = 1; | ||
474 | break; | ||
475 | } | ||
476 | if (!found) { | ||
358 | list_add_tail(&lseg->fi_list, &lo->segs); | 477 | list_add_tail(&lseg->fi_list, &lo->segs); |
359 | dprintk("%s: inserted lseg %p iomode %d at tail\n", | 478 | dprintk("%s: inserted lseg %p " |
360 | __func__, lseg, lseg->iomode); | 479 | "iomode %d offset %llu length %llu at tail\n", |
361 | } else { | 480 | __func__, lseg, lseg->range.iomode, |
362 | /* There is no harm for the moment in calling this | 481 | lseg->range.offset, lseg->range.length); |
363 | * with the lock held, and the call will be removed | ||
364 | * with the STUB. | ||
365 | */ | ||
366 | put_lseg(lseg); | ||
367 | } | 482 | } |
483 | get_layout_hdr_locked(lo); | ||
368 | 484 | ||
369 | dprintk("%s:Return\n", __func__); | 485 | dprintk("%s:Return\n", __func__); |
370 | } | 486 | } |
@@ -380,6 +496,7 @@ alloc_init_layout_hdr(struct inode *ino) | |||
380 | lo->refcount = 1; | 496 | lo->refcount = 1; |
381 | INIT_LIST_HEAD(&lo->layouts); | 497 | INIT_LIST_HEAD(&lo->layouts); |
382 | INIT_LIST_HEAD(&lo->segs); | 498 | INIT_LIST_HEAD(&lo->segs); |
499 | seqlock_init(&lo->seqlock); | ||
383 | lo->inode = ino; | 500 | lo->inode = ino; |
384 | return lo; | 501 | return lo; |
385 | } | 502 | } |
@@ -407,11 +524,46 @@ pnfs_find_alloc_layout(struct inode *ino) | |||
407 | return nfsi->layout; | 524 | return nfsi->layout; |
408 | } | 525 | } |
409 | 526 | ||
410 | /* STUB - LAYOUTGET never succeeds, so cache is empty */ | 527 | /* |
528 | * iomode matching rules: | ||
529 | * iomode lseg match | ||
530 | * ----- ----- ----- | ||
531 | * ANY READ true | ||
532 | * ANY RW true | ||
533 | * RW READ false | ||
534 | * RW RW true | ||
535 | * READ READ true | ||
536 | * READ RW true | ||
537 | */ | ||
538 | static int | ||
539 | is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode) | ||
540 | { | ||
541 | return (iomode != IOMODE_RW || lseg->range.iomode == IOMODE_RW); | ||
542 | } | ||
543 | |||
544 | /* | ||
545 | * lookup range in layout | ||
546 | */ | ||
411 | static struct pnfs_layout_segment * | 547 | static struct pnfs_layout_segment * |
412 | pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode) | 548 | pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode) |
413 | { | 549 | { |
414 | return NULL; | 550 | struct pnfs_layout_segment *lseg, *ret = NULL; |
551 | |||
552 | dprintk("%s:Begin\n", __func__); | ||
553 | |||
554 | assert_spin_locked(&lo->inode->i_lock); | ||
555 | list_for_each_entry(lseg, &lo->segs, fi_list) { | ||
556 | if (is_matching_lseg(lseg, iomode)) { | ||
557 | ret = lseg; | ||
558 | break; | ||
559 | } | ||
560 | if (cmp_layout(iomode, lseg->range.iomode) > 0) | ||
561 | break; | ||
562 | } | ||
563 | |||
564 | dprintk("%s:Return lseg %p ref %d\n", | ||
565 | __func__, ret, ret ? atomic_read(&ret->kref.refcount) : 0); | ||
566 | return ret; | ||
415 | } | 567 | } |
416 | 568 | ||
417 | /* | 569 | /* |
@@ -448,7 +600,7 @@ pnfs_update_layout(struct inode *ino, | |||
448 | if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state)) | 600 | if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state)) |
449 | goto out_unlock; | 601 | goto out_unlock; |
450 | 602 | ||
451 | get_layout_hdr_locked(lo); | 603 | get_layout_hdr_locked(lo); /* Matched in nfs4_layoutget_release */ |
452 | spin_unlock(&ino->i_lock); | 604 | spin_unlock(&ino->i_lock); |
453 | 605 | ||
454 | lseg = send_layoutget(lo, ctx, iomode); | 606 | lseg = send_layoutget(lo, ctx, iomode); |
@@ -460,3 +612,172 @@ out_unlock: | |||
460 | spin_unlock(&ino->i_lock); | 612 | spin_unlock(&ino->i_lock); |
461 | goto out; | 613 | goto out; |
462 | } | 614 | } |
615 | |||
616 | int | ||
617 | pnfs_layout_process(struct nfs4_layoutget *lgp) | ||
618 | { | ||
619 | struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout; | ||
620 | struct nfs4_layoutget_res *res = &lgp->res; | ||
621 | struct pnfs_layout_segment *lseg; | ||
622 | struct inode *ino = lo->inode; | ||
623 | int status = 0; | ||
624 | |||
625 | /* Inject layout blob into I/O device driver */ | ||
626 | lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res); | ||
627 | if (!lseg || IS_ERR(lseg)) { | ||
628 | if (!lseg) | ||
629 | status = -ENOMEM; | ||
630 | else | ||
631 | status = PTR_ERR(lseg); | ||
632 | dprintk("%s: Could not allocate layout: error %d\n", | ||
633 | __func__, status); | ||
634 | goto out; | ||
635 | } | ||
636 | |||
637 | spin_lock(&ino->i_lock); | ||
638 | init_lseg(lo, lseg); | ||
639 | lseg->range = res->range; | ||
640 | *lgp->lsegpp = lseg; | ||
641 | pnfs_insert_layout(lo, lseg); | ||
642 | |||
643 | /* Done processing layoutget. Set the layout stateid */ | ||
644 | pnfs_set_layout_stateid(lo, &res->stateid); | ||
645 | spin_unlock(&ino->i_lock); | ||
646 | out: | ||
647 | return status; | ||
648 | } | ||
649 | |||
650 | /* | ||
651 | * Device ID cache. Currently supports one layout type per struct nfs_client. | ||
652 | * Add layout type to the lookup key to expand to support multiple types. | ||
653 | */ | ||
654 | int | ||
655 | pnfs_alloc_init_deviceid_cache(struct nfs_client *clp, | ||
656 | void (*free_callback)(struct pnfs_deviceid_node *)) | ||
657 | { | ||
658 | struct pnfs_deviceid_cache *c; | ||
659 | |||
660 | c = kzalloc(sizeof(struct pnfs_deviceid_cache), GFP_KERNEL); | ||
661 | if (!c) | ||
662 | return -ENOMEM; | ||
663 | spin_lock(&clp->cl_lock); | ||
664 | if (clp->cl_devid_cache != NULL) { | ||
665 | atomic_inc(&clp->cl_devid_cache->dc_ref); | ||
666 | dprintk("%s [kref [%d]]\n", __func__, | ||
667 | atomic_read(&clp->cl_devid_cache->dc_ref)); | ||
668 | kfree(c); | ||
669 | } else { | ||
670 | /* kzalloc initializes hlists */ | ||
671 | spin_lock_init(&c->dc_lock); | ||
672 | atomic_set(&c->dc_ref, 1); | ||
673 | c->dc_free_callback = free_callback; | ||
674 | clp->cl_devid_cache = c; | ||
675 | dprintk("%s [new]\n", __func__); | ||
676 | } | ||
677 | spin_unlock(&clp->cl_lock); | ||
678 | return 0; | ||
679 | } | ||
680 | EXPORT_SYMBOL_GPL(pnfs_alloc_init_deviceid_cache); | ||
681 | |||
682 | /* | ||
683 | * Called from pnfs_layoutdriver_type->free_lseg | ||
684 | * last layout segment reference frees deviceid | ||
685 | */ | ||
686 | void | ||
687 | pnfs_put_deviceid(struct pnfs_deviceid_cache *c, | ||
688 | struct pnfs_deviceid_node *devid) | ||
689 | { | ||
690 | struct nfs4_deviceid *id = &devid->de_id; | ||
691 | struct pnfs_deviceid_node *d; | ||
692 | struct hlist_node *n; | ||
693 | long h = nfs4_deviceid_hash(id); | ||
694 | |||
695 | dprintk("%s [%d]\n", __func__, atomic_read(&devid->de_ref)); | ||
696 | if (!atomic_dec_and_lock(&devid->de_ref, &c->dc_lock)) | ||
697 | return; | ||
698 | |||
699 | hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[h], de_node) | ||
700 | if (!memcmp(&d->de_id, id, sizeof(*id))) { | ||
701 | hlist_del_rcu(&d->de_node); | ||
702 | spin_unlock(&c->dc_lock); | ||
703 | synchronize_rcu(); | ||
704 | c->dc_free_callback(devid); | ||
705 | return; | ||
706 | } | ||
707 | spin_unlock(&c->dc_lock); | ||
708 | /* Why wasn't it found in the list? */ | ||
709 | BUG(); | ||
710 | } | ||
711 | EXPORT_SYMBOL_GPL(pnfs_put_deviceid); | ||
712 | |||
713 | /* Find and reference a deviceid */ | ||
714 | struct pnfs_deviceid_node * | ||
715 | pnfs_find_get_deviceid(struct pnfs_deviceid_cache *c, struct nfs4_deviceid *id) | ||
716 | { | ||
717 | struct pnfs_deviceid_node *d; | ||
718 | struct hlist_node *n; | ||
719 | long hash = nfs4_deviceid_hash(id); | ||
720 | |||
721 | dprintk("--> %s hash %ld\n", __func__, hash); | ||
722 | rcu_read_lock(); | ||
723 | hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) { | ||
724 | if (!memcmp(&d->de_id, id, sizeof(*id))) { | ||
725 | if (!atomic_inc_not_zero(&d->de_ref)) { | ||
726 | goto fail; | ||
727 | } else { | ||
728 | rcu_read_unlock(); | ||
729 | return d; | ||
730 | } | ||
731 | } | ||
732 | } | ||
733 | fail: | ||
734 | rcu_read_unlock(); | ||
735 | return NULL; | ||
736 | } | ||
737 | EXPORT_SYMBOL_GPL(pnfs_find_get_deviceid); | ||
738 | |||
739 | /* | ||
740 | * Add a deviceid to the cache. | ||
741 | * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new | ||
742 | */ | ||
743 | struct pnfs_deviceid_node * | ||
744 | pnfs_add_deviceid(struct pnfs_deviceid_cache *c, struct pnfs_deviceid_node *new) | ||
745 | { | ||
746 | struct pnfs_deviceid_node *d; | ||
747 | long hash = nfs4_deviceid_hash(&new->de_id); | ||
748 | |||
749 | dprintk("--> %s hash %ld\n", __func__, hash); | ||
750 | spin_lock(&c->dc_lock); | ||
751 | d = pnfs_find_get_deviceid(c, &new->de_id); | ||
752 | if (d) { | ||
753 | spin_unlock(&c->dc_lock); | ||
754 | dprintk("%s [discard]\n", __func__); | ||
755 | c->dc_free_callback(new); | ||
756 | return d; | ||
757 | } | ||
758 | INIT_HLIST_NODE(&new->de_node); | ||
759 | atomic_set(&new->de_ref, 1); | ||
760 | hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]); | ||
761 | spin_unlock(&c->dc_lock); | ||
762 | dprintk("%s [new]\n", __func__); | ||
763 | return new; | ||
764 | } | ||
765 | EXPORT_SYMBOL_GPL(pnfs_add_deviceid); | ||
766 | |||
767 | void | ||
768 | pnfs_put_deviceid_cache(struct nfs_client *clp) | ||
769 | { | ||
770 | struct pnfs_deviceid_cache *local = clp->cl_devid_cache; | ||
771 | |||
772 | dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache); | ||
773 | if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) { | ||
774 | int i; | ||
775 | /* Verify cache is empty */ | ||
776 | for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++) | ||
777 | BUG_ON(!hlist_empty(&local->dc_deviceids[i])); | ||
778 | clp->cl_devid_cache = NULL; | ||
779 | spin_unlock(&clp->cl_lock); | ||
780 | kfree(local); | ||
781 | } | ||
782 | } | ||
783 | EXPORT_SYMBOL_GPL(pnfs_put_deviceid_cache); | ||
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 1c3eb02f4944..cbba28cb02a7 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
@@ -32,7 +32,7 @@ | |||
32 | 32 | ||
33 | struct pnfs_layout_segment { | 33 | struct pnfs_layout_segment { |
34 | struct list_head fi_list; | 34 | struct list_head fi_list; |
35 | u32 iomode; | 35 | struct pnfs_layout_range range; |
36 | struct kref kref; | 36 | struct kref kref; |
37 | struct pnfs_layout_hdr *layout; | 37 | struct pnfs_layout_hdr *layout; |
38 | }; | 38 | }; |
@@ -44,6 +44,7 @@ struct pnfs_layout_segment { | |||
44 | enum { | 44 | enum { |
45 | NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */ | 45 | NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */ |
46 | NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ | 46 | NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ |
47 | NFS_LAYOUT_STATEID_SET, /* have a valid layout stateid */ | ||
47 | }; | 48 | }; |
48 | 49 | ||
49 | /* Per-layout driver specific registration structure */ | 50 | /* Per-layout driver specific registration structure */ |
@@ -54,26 +55,96 @@ struct pnfs_layoutdriver_type { | |||
54 | struct module *owner; | 55 | struct module *owner; |
55 | int (*initialize_mountpoint) (struct nfs_server *); | 56 | int (*initialize_mountpoint) (struct nfs_server *); |
56 | int (*uninitialize_mountpoint) (struct nfs_server *); | 57 | int (*uninitialize_mountpoint) (struct nfs_server *); |
58 | struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr); | ||
59 | void (*free_lseg) (struct pnfs_layout_segment *lseg); | ||
57 | }; | 60 | }; |
58 | 61 | ||
59 | struct pnfs_layout_hdr { | 62 | struct pnfs_layout_hdr { |
60 | unsigned long refcount; | 63 | unsigned long refcount; |
61 | struct list_head layouts; /* other client layouts */ | 64 | struct list_head layouts; /* other client layouts */ |
62 | struct list_head segs; /* layout segments list */ | 65 | struct list_head segs; /* layout segments list */ |
66 | seqlock_t seqlock; /* Protects the stateid */ | ||
67 | nfs4_stateid stateid; | ||
63 | unsigned long state; | 68 | unsigned long state; |
64 | struct inode *inode; | 69 | struct inode *inode; |
65 | }; | 70 | }; |
66 | 71 | ||
72 | struct pnfs_device { | ||
73 | struct nfs4_deviceid dev_id; | ||
74 | unsigned int layout_type; | ||
75 | unsigned int mincount; | ||
76 | struct page **pages; | ||
77 | void *area; | ||
78 | unsigned int pgbase; | ||
79 | unsigned int pglen; | ||
80 | }; | ||
81 | |||
82 | /* | ||
83 | * Device ID RCU cache. A device ID is unique per client ID and layout type. | ||
84 | */ | ||
85 | #define NFS4_DEVICE_ID_HASH_BITS 5 | ||
86 | #define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS) | ||
87 | #define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1) | ||
88 | |||
89 | static inline u32 | ||
90 | nfs4_deviceid_hash(struct nfs4_deviceid *id) | ||
91 | { | ||
92 | unsigned char *cptr = (unsigned char *)id->data; | ||
93 | unsigned int nbytes = NFS4_DEVICEID4_SIZE; | ||
94 | u32 x = 0; | ||
95 | |||
96 | while (nbytes--) { | ||
97 | x *= 37; | ||
98 | x += *cptr++; | ||
99 | } | ||
100 | return x & NFS4_DEVICE_ID_HASH_MASK; | ||
101 | } | ||
102 | |||
103 | struct pnfs_deviceid_node { | ||
104 | struct hlist_node de_node; | ||
105 | struct nfs4_deviceid de_id; | ||
106 | atomic_t de_ref; | ||
107 | }; | ||
108 | |||
109 | struct pnfs_deviceid_cache { | ||
110 | spinlock_t dc_lock; | ||
111 | atomic_t dc_ref; | ||
112 | void (*dc_free_callback)(struct pnfs_deviceid_node *); | ||
113 | struct hlist_head dc_deviceids[NFS4_DEVICE_ID_HASH_SIZE]; | ||
114 | }; | ||
115 | |||
116 | extern int pnfs_alloc_init_deviceid_cache(struct nfs_client *, | ||
117 | void (*free_callback)(struct pnfs_deviceid_node *)); | ||
118 | extern void pnfs_put_deviceid_cache(struct nfs_client *); | ||
119 | extern struct pnfs_deviceid_node *pnfs_find_get_deviceid( | ||
120 | struct pnfs_deviceid_cache *, | ||
121 | struct nfs4_deviceid *); | ||
122 | extern struct pnfs_deviceid_node *pnfs_add_deviceid( | ||
123 | struct pnfs_deviceid_cache *, | ||
124 | struct pnfs_deviceid_node *); | ||
125 | extern void pnfs_put_deviceid(struct pnfs_deviceid_cache *c, | ||
126 | struct pnfs_deviceid_node *devid); | ||
127 | |||
67 | extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); | 128 | extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); |
68 | extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); | 129 | extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); |
69 | 130 | ||
131 | /* nfs4proc.c */ | ||
132 | extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, | ||
133 | struct pnfs_device *dev); | ||
134 | extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); | ||
135 | |||
136 | /* pnfs.c */ | ||
70 | struct pnfs_layout_segment * | 137 | struct pnfs_layout_segment * |
71 | pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, | 138 | pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, |
72 | enum pnfs_iomode access_type); | 139 | enum pnfs_iomode access_type); |
73 | void set_pnfs_layoutdriver(struct nfs_server *, u32 id); | 140 | void set_pnfs_layoutdriver(struct nfs_server *, u32 id); |
74 | void unset_pnfs_layoutdriver(struct nfs_server *); | 141 | void unset_pnfs_layoutdriver(struct nfs_server *); |
142 | int pnfs_layout_process(struct nfs4_layoutget *lgp); | ||
75 | void pnfs_destroy_layout(struct nfs_inode *); | 143 | void pnfs_destroy_layout(struct nfs_inode *); |
76 | void pnfs_destroy_all_layouts(struct nfs_client *); | 144 | void pnfs_destroy_all_layouts(struct nfs_client *); |
145 | void put_layout_hdr(struct inode *inode); | ||
146 | void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, | ||
147 | struct nfs4_state *open_state); | ||
77 | 148 | ||
78 | 149 | ||
79 | static inline int lo_fail_bit(u32 iomode) | 150 | static inline int lo_fail_bit(u32 iomode) |
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 34da32436ac0..a9683d6acaa4 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h | |||
@@ -545,6 +545,8 @@ enum { | |||
545 | NFSPROC4_CLNT_SEQUENCE, | 545 | NFSPROC4_CLNT_SEQUENCE, |
546 | NFSPROC4_CLNT_GET_LEASE_TIME, | 546 | NFSPROC4_CLNT_GET_LEASE_TIME, |
547 | NFSPROC4_CLNT_RECLAIM_COMPLETE, | 547 | NFSPROC4_CLNT_RECLAIM_COMPLETE, |
548 | NFSPROC4_CLNT_LAYOUTGET, | ||
549 | NFSPROC4_CLNT_GETDEVICEINFO, | ||
548 | }; | 550 | }; |
549 | 551 | ||
550 | /* nfs41 types */ | 552 | /* nfs41 types */ |
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 4d62f1581ed1..452d96436d26 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h | |||
@@ -83,6 +83,7 @@ struct nfs_client { | |||
83 | u32 cl_exchange_flags; | 83 | u32 cl_exchange_flags; |
84 | struct nfs4_session *cl_session; /* sharred session */ | 84 | struct nfs4_session *cl_session; /* sharred session */ |
85 | struct list_head cl_layouts; | 85 | struct list_head cl_layouts; |
86 | struct pnfs_deviceid_cache *cl_devid_cache; /* pNFS deviceid cache */ | ||
86 | #endif /* CONFIG_NFS_V4_1 */ | 87 | #endif /* CONFIG_NFS_V4_1 */ |
87 | 88 | ||
88 | #ifdef CONFIG_NFS_FSCACHE | 89 | #ifdef CONFIG_NFS_FSCACHE |
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 065f9d105d05..ba6cc8f223c9 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h | |||
@@ -187,6 +187,55 @@ struct nfs4_get_lease_time_res { | |||
187 | struct nfs4_sequence_res lr_seq_res; | 187 | struct nfs4_sequence_res lr_seq_res; |
188 | }; | 188 | }; |
189 | 189 | ||
190 | #define PNFS_LAYOUT_MAXSIZE 4096 | ||
191 | |||
192 | struct nfs4_layoutdriver_data { | ||
193 | __u32 len; | ||
194 | void *buf; | ||
195 | }; | ||
196 | |||
197 | struct pnfs_layout_range { | ||
198 | u32 iomode; | ||
199 | u64 offset; | ||
200 | u64 length; | ||
201 | }; | ||
202 | |||
203 | struct nfs4_layoutget_args { | ||
204 | __u32 type; | ||
205 | struct pnfs_layout_range range; | ||
206 | __u64 minlength; | ||
207 | __u32 maxcount; | ||
208 | struct inode *inode; | ||
209 | struct nfs_open_context *ctx; | ||
210 | struct nfs4_sequence_args seq_args; | ||
211 | }; | ||
212 | |||
213 | struct nfs4_layoutget_res { | ||
214 | __u32 return_on_close; | ||
215 | struct pnfs_layout_range range; | ||
216 | __u32 type; | ||
217 | nfs4_stateid stateid; | ||
218 | struct nfs4_layoutdriver_data layout; | ||
219 | struct nfs4_sequence_res seq_res; | ||
220 | }; | ||
221 | |||
222 | struct nfs4_layoutget { | ||
223 | struct nfs4_layoutget_args args; | ||
224 | struct nfs4_layoutget_res res; | ||
225 | struct pnfs_layout_segment **lsegpp; | ||
226 | int status; | ||
227 | }; | ||
228 | |||
229 | struct nfs4_getdeviceinfo_args { | ||
230 | struct pnfs_device *pdev; | ||
231 | struct nfs4_sequence_args seq_args; | ||
232 | }; | ||
233 | |||
234 | struct nfs4_getdeviceinfo_res { | ||
235 | struct pnfs_device *pdev; | ||
236 | struct nfs4_sequence_res seq_res; | ||
237 | }; | ||
238 | |||
190 | /* | 239 | /* |
191 | * Arguments to the open call. | 240 | * Arguments to the open call. |
192 | */ | 241 | */ |