diff options
| -rw-r--r-- | fs/nfs/nfs4proc.c | 142 | ||||
| -rw-r--r-- | fs/nfs/nfs4xdr.c | 302 | ||||
| -rw-r--r-- | fs/nfs/pnfs.c | 385 | ||||
| -rw-r--r-- | fs/nfs/pnfs.h | 73 | ||||
| -rw-r--r-- | include/linux/nfs4.h | 2 | ||||
| -rw-r--r-- | include/linux/nfs_fs_sb.h | 1 | ||||
| -rw-r--r-- | include/linux/nfs_xdr.h | 49 |
7 files changed, 921 insertions, 33 deletions
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a5f1edb45b47..7e14e991ddfa 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
| @@ -55,6 +55,7 @@ | |||
| 55 | #include "internal.h" | 55 | #include "internal.h" |
| 56 | #include "iostat.h" | 56 | #include "iostat.h" |
| 57 | #include "callback.h" | 57 | #include "callback.h" |
| 58 | #include "pnfs.h" | ||
| 58 | 59 | ||
| 59 | #define NFSDBG_FACILITY NFSDBG_PROC | 60 | #define NFSDBG_FACILITY NFSDBG_PROC |
| 60 | 61 | ||
| @@ -5256,6 +5257,147 @@ out: | |||
| 5256 | dprintk("<-- %s status=%d\n", __func__, status); | 5257 | dprintk("<-- %s status=%d\n", __func__, status); |
| 5257 | return status; | 5258 | return status; |
| 5258 | } | 5259 | } |
| 5260 | |||
| 5261 | static void | ||
| 5262 | nfs4_layoutget_prepare(struct rpc_task *task, void *calldata) | ||
| 5263 | { | ||
| 5264 | struct nfs4_layoutget *lgp = calldata; | ||
| 5265 | struct inode *ino = lgp->args.inode; | ||
| 5266 | struct nfs_server *server = NFS_SERVER(ino); | ||
| 5267 | |||
| 5268 | dprintk("--> %s\n", __func__); | ||
| 5269 | if (nfs4_setup_sequence(server, &lgp->args.seq_args, | ||
| 5270 | &lgp->res.seq_res, 0, task)) | ||
| 5271 | return; | ||
| 5272 | rpc_call_start(task); | ||
| 5273 | } | ||
| 5274 | |||
| 5275 | static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) | ||
| 5276 | { | ||
| 5277 | struct nfs4_layoutget *lgp = calldata; | ||
| 5278 | struct nfs_server *server = NFS_SERVER(lgp->args.inode); | ||
| 5279 | |||
| 5280 | dprintk("--> %s\n", __func__); | ||
| 5281 | |||
| 5282 | if (!nfs4_sequence_done(task, &lgp->res.seq_res)) | ||
| 5283 | return; | ||
| 5284 | |||
| 5285 | switch (task->tk_status) { | ||
| 5286 | case 0: | ||
| 5287 | break; | ||
| 5288 | case -NFS4ERR_LAYOUTTRYLATER: | ||
| 5289 | case -NFS4ERR_RECALLCONFLICT: | ||
| 5290 | task->tk_status = -NFS4ERR_DELAY; | ||
| 5291 | /* Fall through */ | ||
| 5292 | default: | ||
| 5293 | if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { | ||
| 5294 | rpc_restart_call_prepare(task); | ||
| 5295 | return; | ||
| 5296 | } | ||
| 5297 | } | ||
| 5298 | lgp->status = task->tk_status; | ||
| 5299 | dprintk("<-- %s\n", __func__); | ||
| 5300 | } | ||
| 5301 | |||
| 5302 | static void nfs4_layoutget_release(void *calldata) | ||
| 5303 | { | ||
| 5304 | struct nfs4_layoutget *lgp = calldata; | ||
| 5305 | |||
| 5306 | dprintk("--> %s\n", __func__); | ||
| 5307 | put_layout_hdr(lgp->args.inode); | ||
| 5308 | if (lgp->res.layout.buf != NULL) | ||
| 5309 | free_page((unsigned long) lgp->res.layout.buf); | ||
| 5310 | put_nfs_open_context(lgp->args.ctx); | ||
| 5311 | kfree(calldata); | ||
| 5312 | dprintk("<-- %s\n", __func__); | ||
| 5313 | } | ||
| 5314 | |||
| 5315 | static const struct rpc_call_ops nfs4_layoutget_call_ops = { | ||
| 5316 | .rpc_call_prepare = nfs4_layoutget_prepare, | ||
| 5317 | .rpc_call_done = nfs4_layoutget_done, | ||
| 5318 | .rpc_release = nfs4_layoutget_release, | ||
| 5319 | }; | ||
| 5320 | |||
| 5321 | int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) | ||
| 5322 | { | ||
| 5323 | struct nfs_server *server = NFS_SERVER(lgp->args.inode); | ||
| 5324 | struct rpc_task *task; | ||
| 5325 | struct rpc_message msg = { | ||
| 5326 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET], | ||
| 5327 | .rpc_argp = &lgp->args, | ||
| 5328 | .rpc_resp = &lgp->res, | ||
| 5329 | }; | ||
| 5330 | struct rpc_task_setup task_setup_data = { | ||
| 5331 | .rpc_client = server->client, | ||
| 5332 | .rpc_message = &msg, | ||
| 5333 | .callback_ops = &nfs4_layoutget_call_ops, | ||
| 5334 | .callback_data = lgp, | ||
| 5335 | .flags = RPC_TASK_ASYNC, | ||
| 5336 | }; | ||
| 5337 | int status = 0; | ||
| 5338 | |||
| 5339 | dprintk("--> %s\n", __func__); | ||
| 5340 | |||
| 5341 | lgp->res.layout.buf = (void *)__get_free_page(GFP_NOFS); | ||
| 5342 | if (lgp->res.layout.buf == NULL) { | ||
| 5343 | nfs4_layoutget_release(lgp); | ||
| 5344 | return -ENOMEM; | ||
| 5345 | } | ||
| 5346 | |||
| 5347 | lgp->res.seq_res.sr_slot = NULL; | ||
| 5348 | task = rpc_run_task(&task_setup_data); | ||
| 5349 | if (IS_ERR(task)) | ||
| 5350 | return PTR_ERR(task); | ||
| 5351 | status = nfs4_wait_for_completion_rpc_task(task); | ||
| 5352 | if (status != 0) | ||
| 5353 | goto out; | ||
| 5354 | status = lgp->status; | ||
| 5355 | if (status != 0) | ||
| 5356 | goto out; | ||
| 5357 | status = pnfs_layout_process(lgp); | ||
| 5358 | out: | ||
| 5359 | rpc_put_task(task); | ||
| 5360 | dprintk("<-- %s status=%d\n", __func__, status); | ||
| 5361 | return status; | ||
| 5362 | } | ||
| 5363 | |||
| 5364 | static int | ||
| 5365 | _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) | ||
| 5366 | { | ||
| 5367 | struct nfs4_getdeviceinfo_args args = { | ||
| 5368 | .pdev = pdev, | ||
| 5369 | }; | ||
| 5370 | struct nfs4_getdeviceinfo_res res = { | ||
| 5371 | .pdev = pdev, | ||
| 5372 | }; | ||
| 5373 | struct rpc_message msg = { | ||
| 5374 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICEINFO], | ||
| 5375 | .rpc_argp = &args, | ||
| 5376 | .rpc_resp = &res, | ||
| 5377 | }; | ||
| 5378 | int status; | ||
| 5379 | |||
| 5380 | dprintk("--> %s\n", __func__); | ||
| 5381 | status = nfs4_call_sync(server, &msg, &args, &res, 0); | ||
| 5382 | dprintk("<-- %s status=%d\n", __func__, status); | ||
| 5383 | |||
| 5384 | return status; | ||
| 5385 | } | ||
| 5386 | |||
| 5387 | int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) | ||
| 5388 | { | ||
| 5389 | struct nfs4_exception exception = { }; | ||
| 5390 | int err; | ||
| 5391 | |||
| 5392 | do { | ||
| 5393 | err = nfs4_handle_exception(server, | ||
| 5394 | _nfs4_proc_getdeviceinfo(server, pdev), | ||
| 5395 | &exception); | ||
| 5396 | } while (exception.retry); | ||
| 5397 | return err; | ||
| 5398 | } | ||
| 5399 | EXPORT_SYMBOL_GPL(nfs4_proc_getdeviceinfo); | ||
| 5400 | |||
| 5259 | #endif /* CONFIG_NFS_V4_1 */ | 5401 | #endif /* CONFIG_NFS_V4_1 */ |
| 5260 | 5402 | ||
| 5261 | struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { | 5403 | struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 8b4dfa393f0f..f313c4cce7e4 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
| @@ -52,6 +52,7 @@ | |||
| 52 | #include <linux/nfs_idmap.h> | 52 | #include <linux/nfs_idmap.h> |
| 53 | #include "nfs4_fs.h" | 53 | #include "nfs4_fs.h" |
| 54 | #include "internal.h" | 54 | #include "internal.h" |
| 55 | #include "pnfs.h" | ||
| 55 | 56 | ||
| 56 | #define NFSDBG_FACILITY NFSDBG_XDR | 57 | #define NFSDBG_FACILITY NFSDBG_XDR |
| 57 | 58 | ||
| @@ -310,6 +311,19 @@ static int nfs4_stat_to_errno(int); | |||
| 310 | XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) | 311 | XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) |
| 311 | #define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4) | 312 | #define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4) |
| 312 | #define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4) | 313 | #define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4) |
| 314 | #define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 4 + \ | ||
| 315 | XDR_QUADLEN(NFS4_DEVICEID4_SIZE)) | ||
| 316 | #define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \ | ||
| 317 | 1 /* layout type */ + \ | ||
| 318 | 1 /* opaque devaddr4 length */ + \ | ||
| 319 | /* devaddr4 payload is read into page */ \ | ||
| 320 | 1 /* notification bitmap length */ + \ | ||
| 321 | 1 /* notification bitmap */) | ||
| 322 | #define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \ | ||
| 323 | encode_stateid_maxsz) | ||
| 324 | #define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \ | ||
| 325 | decode_stateid_maxsz + \ | ||
| 326 | XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE)) | ||
| 313 | #else /* CONFIG_NFS_V4_1 */ | 327 | #else /* CONFIG_NFS_V4_1 */ |
| 314 | #define encode_sequence_maxsz 0 | 328 | #define encode_sequence_maxsz 0 |
| 315 | #define decode_sequence_maxsz 0 | 329 | #define decode_sequence_maxsz 0 |
| @@ -699,6 +713,20 @@ static int nfs4_stat_to_errno(int); | |||
| 699 | #define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \ | 713 | #define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \ |
| 700 | decode_sequence_maxsz + \ | 714 | decode_sequence_maxsz + \ |
| 701 | decode_reclaim_complete_maxsz) | 715 | decode_reclaim_complete_maxsz) |
| 716 | #define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz + \ | ||
| 717 | encode_sequence_maxsz +\ | ||
| 718 | encode_getdeviceinfo_maxsz) | ||
| 719 | #define NFS4_dec_getdeviceinfo_sz (compound_decode_hdr_maxsz + \ | ||
| 720 | decode_sequence_maxsz + \ | ||
| 721 | decode_getdeviceinfo_maxsz) | ||
| 722 | #define NFS4_enc_layoutget_sz (compound_encode_hdr_maxsz + \ | ||
| 723 | encode_sequence_maxsz + \ | ||
| 724 | encode_putfh_maxsz + \ | ||
| 725 | encode_layoutget_maxsz) | ||
| 726 | #define NFS4_dec_layoutget_sz (compound_decode_hdr_maxsz + \ | ||
| 727 | decode_sequence_maxsz + \ | ||
| 728 | decode_putfh_maxsz + \ | ||
| 729 | decode_layoutget_maxsz) | ||
| 702 | 730 | ||
| 703 | const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + | 731 | const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + |
| 704 | compound_encode_hdr_maxsz + | 732 | compound_encode_hdr_maxsz + |
| @@ -1737,6 +1765,58 @@ static void encode_sequence(struct xdr_stream *xdr, | |||
| 1737 | #endif /* CONFIG_NFS_V4_1 */ | 1765 | #endif /* CONFIG_NFS_V4_1 */ |
| 1738 | } | 1766 | } |
| 1739 | 1767 | ||
| 1768 | #ifdef CONFIG_NFS_V4_1 | ||
| 1769 | static void | ||
| 1770 | encode_getdeviceinfo(struct xdr_stream *xdr, | ||
| 1771 | const struct nfs4_getdeviceinfo_args *args, | ||
| 1772 | struct compound_hdr *hdr) | ||
| 1773 | { | ||
| 1774 | __be32 *p; | ||
| 1775 | |||
| 1776 | p = reserve_space(xdr, 16 + NFS4_DEVICEID4_SIZE); | ||
| 1777 | *p++ = cpu_to_be32(OP_GETDEVICEINFO); | ||
| 1778 | p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data, | ||
| 1779 | NFS4_DEVICEID4_SIZE); | ||
| 1780 | *p++ = cpu_to_be32(args->pdev->layout_type); | ||
| 1781 | *p++ = cpu_to_be32(args->pdev->pglen); /* gdia_maxcount */ | ||
| 1782 | *p++ = cpu_to_be32(0); /* bitmap length 0 */ | ||
| 1783 | hdr->nops++; | ||
| 1784 | hdr->replen += decode_getdeviceinfo_maxsz; | ||
| 1785 | } | ||
| 1786 | |||
| 1787 | static void | ||
| 1788 | encode_layoutget(struct xdr_stream *xdr, | ||
| 1789 | const struct nfs4_layoutget_args *args, | ||
| 1790 | struct compound_hdr *hdr) | ||
| 1791 | { | ||
| 1792 | nfs4_stateid stateid; | ||
| 1793 | __be32 *p; | ||
| 1794 | |||
| 1795 | p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE); | ||
| 1796 | *p++ = cpu_to_be32(OP_LAYOUTGET); | ||
| 1797 | *p++ = cpu_to_be32(0); /* Signal layout available */ | ||
| 1798 | *p++ = cpu_to_be32(args->type); | ||
| 1799 | *p++ = cpu_to_be32(args->range.iomode); | ||
| 1800 | p = xdr_encode_hyper(p, args->range.offset); | ||
| 1801 | p = xdr_encode_hyper(p, args->range.length); | ||
| 1802 | p = xdr_encode_hyper(p, args->minlength); | ||
| 1803 | pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout, | ||
| 1804 | args->ctx->state); | ||
| 1805 | p = xdr_encode_opaque_fixed(p, &stateid.data, NFS4_STATEID_SIZE); | ||
| 1806 | *p = cpu_to_be32(args->maxcount); | ||
| 1807 | |||
| 1808 | dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n", | ||
| 1809 | __func__, | ||
| 1810 | args->type, | ||
| 1811 | args->range.iomode, | ||
| 1812 | (unsigned long)args->range.offset, | ||
| 1813 | (unsigned long)args->range.length, | ||
| 1814 | args->maxcount); | ||
| 1815 | hdr->nops++; | ||
| 1816 | hdr->replen += decode_layoutget_maxsz; | ||
| 1817 | } | ||
| 1818 | #endif /* CONFIG_NFS_V4_1 */ | ||
| 1819 | |||
| 1740 | /* | 1820 | /* |
| 1741 | * END OF "GENERIC" ENCODE ROUTINES. | 1821 | * END OF "GENERIC" ENCODE ROUTINES. |
| 1742 | */ | 1822 | */ |
| @@ -2554,6 +2634,51 @@ static int nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req, uint32_t *p, | |||
| 2554 | return 0; | 2634 | return 0; |
| 2555 | } | 2635 | } |
| 2556 | 2636 | ||
| 2637 | /* | ||
| 2638 | * Encode GETDEVICEINFO request | ||
| 2639 | */ | ||
| 2640 | static int nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, uint32_t *p, | ||
| 2641 | struct nfs4_getdeviceinfo_args *args) | ||
| 2642 | { | ||
| 2643 | struct xdr_stream xdr; | ||
| 2644 | struct compound_hdr hdr = { | ||
| 2645 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), | ||
| 2646 | }; | ||
| 2647 | |||
| 2648 | xdr_init_encode(&xdr, &req->rq_snd_buf, p); | ||
| 2649 | encode_compound_hdr(&xdr, req, &hdr); | ||
| 2650 | encode_sequence(&xdr, &args->seq_args, &hdr); | ||
| 2651 | encode_getdeviceinfo(&xdr, args, &hdr); | ||
| 2652 | |||
| 2653 | /* set up reply kvec. Subtract notification bitmap max size (2) | ||
| 2654 | * so that notification bitmap is put in xdr_buf tail */ | ||
| 2655 | xdr_inline_pages(&req->rq_rcv_buf, (hdr.replen - 2) << 2, | ||
| 2656 | args->pdev->pages, args->pdev->pgbase, | ||
| 2657 | args->pdev->pglen); | ||
| 2658 | |||
| 2659 | encode_nops(&hdr); | ||
| 2660 | return 0; | ||
| 2661 | } | ||
| 2662 | |||
| 2663 | /* | ||
| 2664 | * Encode LAYOUTGET request | ||
| 2665 | */ | ||
| 2666 | static int nfs4_xdr_enc_layoutget(struct rpc_rqst *req, uint32_t *p, | ||
| 2667 | struct nfs4_layoutget_args *args) | ||
| 2668 | { | ||
| 2669 | struct xdr_stream xdr; | ||
| 2670 | struct compound_hdr hdr = { | ||
| 2671 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), | ||
| 2672 | }; | ||
| 2673 | |||
| 2674 | xdr_init_encode(&xdr, &req->rq_snd_buf, p); | ||
| 2675 | encode_compound_hdr(&xdr, req, &hdr); | ||
| 2676 | encode_sequence(&xdr, &args->seq_args, &hdr); | ||
| 2677 | encode_putfh(&xdr, NFS_FH(args->inode), &hdr); | ||
| 2678 | encode_layoutget(&xdr, args, &hdr); | ||
| 2679 | encode_nops(&hdr); | ||
| 2680 | return 0; | ||
| 2681 | } | ||
| 2557 | #endif /* CONFIG_NFS_V4_1 */ | 2682 | #endif /* CONFIG_NFS_V4_1 */ |
| 2558 | 2683 | ||
| 2559 | static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) | 2684 | static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) |
| @@ -4830,6 +4955,134 @@ out_overflow: | |||
| 4830 | #endif /* CONFIG_NFS_V4_1 */ | 4955 | #endif /* CONFIG_NFS_V4_1 */ |
| 4831 | } | 4956 | } |
| 4832 | 4957 | ||
| 4958 | #if defined(CONFIG_NFS_V4_1) | ||
| 4959 | |||
| 4960 | static int decode_getdeviceinfo(struct xdr_stream *xdr, | ||
| 4961 | struct pnfs_device *pdev) | ||
| 4962 | { | ||
| 4963 | __be32 *p; | ||
| 4964 | uint32_t len, type; | ||
| 4965 | int status; | ||
| 4966 | |||
| 4967 | status = decode_op_hdr(xdr, OP_GETDEVICEINFO); | ||
| 4968 | if (status) { | ||
| 4969 | if (status == -ETOOSMALL) { | ||
| 4970 | p = xdr_inline_decode(xdr, 4); | ||
| 4971 | if (unlikely(!p)) | ||
| 4972 | goto out_overflow; | ||
| 4973 | pdev->mincount = be32_to_cpup(p); | ||
| 4974 | dprintk("%s: Min count too small. mincnt = %u\n", | ||
| 4975 | __func__, pdev->mincount); | ||
| 4976 | } | ||
| 4977 | return status; | ||
| 4978 | } | ||
| 4979 | |||
| 4980 | p = xdr_inline_decode(xdr, 8); | ||
| 4981 | if (unlikely(!p)) | ||
| 4982 | goto out_overflow; | ||
| 4983 | type = be32_to_cpup(p++); | ||
| 4984 | if (type != pdev->layout_type) { | ||
| 4985 | dprintk("%s: layout mismatch req: %u pdev: %u\n", | ||
| 4986 | __func__, pdev->layout_type, type); | ||
| 4987 | return -EINVAL; | ||
| 4988 | } | ||
| 4989 | /* | ||
| 4990 | * Get the length of the opaque device_addr4. xdr_read_pages places | ||
| 4991 | * the opaque device_addr4 in the xdr_buf->pages (pnfs_device->pages) | ||
| 4992 | * and places the remaining xdr data in xdr_buf->tail | ||
| 4993 | */ | ||
| 4994 | pdev->mincount = be32_to_cpup(p); | ||
| 4995 | xdr_read_pages(xdr, pdev->mincount); /* include space for the length */ | ||
| 4996 | |||
| 4997 | /* Parse notification bitmap, verifying that it is zero. */ | ||
| 4998 | p = xdr_inline_decode(xdr, 4); | ||
| 4999 | if (unlikely(!p)) | ||
| 5000 | goto out_overflow; | ||
| 5001 | len = be32_to_cpup(p); | ||
| 5002 | if (len) { | ||
| 5003 | int i; | ||
| 5004 | |||
| 5005 | p = xdr_inline_decode(xdr, 4 * len); | ||
| 5006 | if (unlikely(!p)) | ||
| 5007 | goto out_overflow; | ||
| 5008 | for (i = 0; i < len; i++, p++) { | ||
| 5009 | if (be32_to_cpup(p)) { | ||
| 5010 | dprintk("%s: notifications not supported\n", | ||
| 5011 | __func__); | ||
| 5012 | return -EIO; | ||
| 5013 | } | ||
| 5014 | } | ||
| 5015 | } | ||
| 5016 | return 0; | ||
| 5017 | out_overflow: | ||
| 5018 | print_overflow_msg(__func__, xdr); | ||
| 5019 | return -EIO; | ||
| 5020 | } | ||
| 5021 | |||
| 5022 | static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, | ||
| 5023 | struct nfs4_layoutget_res *res) | ||
| 5024 | { | ||
| 5025 | __be32 *p; | ||
| 5026 | int status; | ||
| 5027 | u32 layout_count; | ||
| 5028 | |||
| 5029 | status = decode_op_hdr(xdr, OP_LAYOUTGET); | ||
| 5030 | if (status) | ||
| 5031 | return status; | ||
| 5032 | p = xdr_inline_decode(xdr, 8 + NFS4_STATEID_SIZE); | ||
| 5033 | if (unlikely(!p)) | ||
| 5034 | goto out_overflow; | ||
| 5035 | res->return_on_close = be32_to_cpup(p++); | ||
| 5036 | p = xdr_decode_opaque_fixed(p, res->stateid.data, NFS4_STATEID_SIZE); | ||
| 5037 | layout_count = be32_to_cpup(p); | ||
| 5038 | if (!layout_count) { | ||
| 5039 | dprintk("%s: server responded with empty layout array\n", | ||
| 5040 | __func__); | ||
| 5041 | return -EINVAL; | ||
| 5042 | } | ||
| 5043 | |||
| 5044 | p = xdr_inline_decode(xdr, 24); | ||
| 5045 | if (unlikely(!p)) | ||
| 5046 | goto out_overflow; | ||
| 5047 | p = xdr_decode_hyper(p, &res->range.offset); | ||
| 5048 | p = xdr_decode_hyper(p, &res->range.length); | ||
| 5049 | res->range.iomode = be32_to_cpup(p++); | ||
| 5050 | res->type = be32_to_cpup(p++); | ||
| 5051 | |||
| 5052 | status = decode_opaque_inline(xdr, &res->layout.len, (char **)&p); | ||
| 5053 | if (unlikely(status)) | ||
| 5054 | return status; | ||
| 5055 | |||
| 5056 | dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n", | ||
| 5057 | __func__, | ||
| 5058 | (unsigned long)res->range.offset, | ||
| 5059 | (unsigned long)res->range.length, | ||
| 5060 | res->range.iomode, | ||
| 5061 | res->type, | ||
| 5062 | res->layout.len); | ||
| 5063 | |||
| 5064 | /* nfs4_proc_layoutget allocated a single page */ | ||
| 5065 | if (res->layout.len > PAGE_SIZE) | ||
| 5066 | return -ENOMEM; | ||
| 5067 | memcpy(res->layout.buf, p, res->layout.len); | ||
| 5068 | |||
| 5069 | if (layout_count > 1) { | ||
| 5070 | /* We only handle a length one array at the moment. Any | ||
| 5071 | * further entries are just ignored. Note that this means | ||
| 5072 | * the client may see a response that is less than the | ||
| 5073 | * minimum it requested. | ||
| 5074 | */ | ||
| 5075 | dprintk("%s: server responded with %d layouts, dropping tail\n", | ||
| 5076 | __func__, layout_count); | ||
| 5077 | } | ||
| 5078 | |||
| 5079 | return 0; | ||
| 5080 | out_overflow: | ||
| 5081 | print_overflow_msg(__func__, xdr); | ||
| 5082 | return -EIO; | ||
| 5083 | } | ||
| 5084 | #endif /* CONFIG_NFS_V4_1 */ | ||
| 5085 | |||
| 4833 | /* | 5086 | /* |
| 4834 | * END OF "GENERIC" DECODE ROUTINES. | 5087 | * END OF "GENERIC" DECODE ROUTINES. |
| 4835 | */ | 5088 | */ |
| @@ -5857,6 +6110,53 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, uint32_t *p, | |||
| 5857 | status = decode_reclaim_complete(&xdr, (void *)NULL); | 6110 | status = decode_reclaim_complete(&xdr, (void *)NULL); |
| 5858 | return status; | 6111 | return status; |
| 5859 | } | 6112 | } |
| 6113 | |||
| 6114 | /* | ||
| 6115 | * Decode GETDEVINFO response | ||
| 6116 | */ | ||
| 6117 | static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, uint32_t *p, | ||
| 6118 | struct nfs4_getdeviceinfo_res *res) | ||
| 6119 | { | ||
| 6120 | struct xdr_stream xdr; | ||
| 6121 | struct compound_hdr hdr; | ||
| 6122 | int status; | ||
| 6123 | |||
| 6124 | xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); | ||
| 6125 | status = decode_compound_hdr(&xdr, &hdr); | ||
| 6126 | if (status != 0) | ||
| 6127 | goto out; | ||
| 6128 | status = decode_sequence(&xdr, &res->seq_res, rqstp); | ||
| 6129 | if (status != 0) | ||
| 6130 | goto out; | ||
| 6131 | status = decode_getdeviceinfo(&xdr, res->pdev); | ||
| 6132 | out: | ||
| 6133 | return status; | ||
| 6134 | } | ||
| 6135 | |||
| 6136 | /* | ||
| 6137 | * Decode LAYOUTGET response | ||
| 6138 | */ | ||
| 6139 | static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp, uint32_t *p, | ||
| 6140 | struct nfs4_layoutget_res *res) | ||
| 6141 | { | ||
| 6142 | struct xdr_stream xdr; | ||
| 6143 | struct compound_hdr hdr; | ||
| 6144 | int status; | ||
| 6145 | |||
| 6146 | xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); | ||
| 6147 | status = decode_compound_hdr(&xdr, &hdr); | ||
| 6148 | if (status) | ||
| 6149 | goto out; | ||
| 6150 | status = decode_sequence(&xdr, &res->seq_res, rqstp); | ||
| 6151 | if (status) | ||
| 6152 | goto out; | ||
| 6153 | status = decode_putfh(&xdr); | ||
| 6154 | if (status) | ||
| 6155 | goto out; | ||
| 6156 | status = decode_layoutget(&xdr, rqstp, res); | ||
| 6157 | out: | ||
| 6158 | return status; | ||
| 6159 | } | ||
| 5860 | #endif /* CONFIG_NFS_V4_1 */ | 6160 | #endif /* CONFIG_NFS_V4_1 */ |
| 5861 | 6161 | ||
| 5862 | __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, | 6162 | __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, |
| @@ -6048,6 +6348,8 @@ struct rpc_procinfo nfs4_procedures[] = { | |||
| 6048 | PROC(SEQUENCE, enc_sequence, dec_sequence), | 6348 | PROC(SEQUENCE, enc_sequence, dec_sequence), |
| 6049 | PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), | 6349 | PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), |
| 6050 | PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), | 6350 | PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), |
| 6351 | PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), | ||
| 6352 | PROC(LAYOUTGET, enc_layoutget, dec_layoutget), | ||
| 6051 | #endif /* CONFIG_NFS_V4_1 */ | 6353 | #endif /* CONFIG_NFS_V4_1 */ |
| 6052 | }; | 6354 | }; |
| 6053 | 6355 | ||
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 891a0c36f992..d1ad7df3479e 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
| @@ -140,6 +140,11 @@ pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type) | |||
| 140 | printk(KERN_ERR "%s id 0 is reserved\n", __func__); | 140 | printk(KERN_ERR "%s id 0 is reserved\n", __func__); |
| 141 | return status; | 141 | return status; |
| 142 | } | 142 | } |
| 143 | if (!ld_type->alloc_lseg || !ld_type->free_lseg) { | ||
| 144 | printk(KERN_ERR "%s Layout driver must provide " | ||
| 145 | "alloc_lseg and free_lseg.\n", __func__); | ||
| 146 | return status; | ||
| 147 | } | ||
| 143 | 148 | ||
| 144 | spin_lock(&pnfs_spinlock); | 149 | spin_lock(&pnfs_spinlock); |
| 145 | tmp = find_pnfs_driver_locked(ld_type->id); | 150 | tmp = find_pnfs_driver_locked(ld_type->id); |
| @@ -168,6 +173,10 @@ pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type) | |||
| 168 | } | 173 | } |
| 169 | EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); | 174 | EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); |
| 170 | 175 | ||
| 176 | /* | ||
| 177 | * pNFS client layout cache | ||
| 178 | */ | ||
| 179 | |||
| 171 | static void | 180 | static void |
| 172 | get_layout_hdr_locked(struct pnfs_layout_hdr *lo) | 181 | get_layout_hdr_locked(struct pnfs_layout_hdr *lo) |
| 173 | { | 182 | { |
| @@ -190,7 +199,7 @@ put_layout_hdr_locked(struct pnfs_layout_hdr *lo) | |||
| 190 | } | 199 | } |
| 191 | } | 200 | } |
| 192 | 201 | ||
| 193 | static void | 202 | void |
| 194 | put_layout_hdr(struct inode *inode) | 203 | put_layout_hdr(struct inode *inode) |
| 195 | { | 204 | { |
| 196 | spin_lock(&inode->i_lock); | 205 | spin_lock(&inode->i_lock); |
| @@ -215,7 +224,7 @@ destroy_lseg(struct kref *kref) | |||
| 215 | struct inode *ino = lseg->layout->inode; | 224 | struct inode *ino = lseg->layout->inode; |
| 216 | 225 | ||
| 217 | dprintk("--> %s\n", __func__); | 226 | dprintk("--> %s\n", __func__); |
| 218 | kfree(lseg); | 227 | NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); |
| 219 | /* Matched by get_layout_hdr_locked in pnfs_insert_layout */ | 228 | /* Matched by get_layout_hdr_locked in pnfs_insert_layout */ |
| 220 | put_layout_hdr(ino); | 229 | put_layout_hdr(ino); |
| 221 | } | 230 | } |
| @@ -249,6 +258,9 @@ pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list) | |||
| 249 | /* List does not take a reference, so no need for put here */ | 258 | /* List does not take a reference, so no need for put here */ |
| 250 | list_del_init(&lo->layouts); | 259 | list_del_init(&lo->layouts); |
| 251 | spin_unlock(&clp->cl_lock); | 260 | spin_unlock(&clp->cl_lock); |
| 261 | write_seqlock(&lo->seqlock); | ||
| 262 | clear_bit(NFS_LAYOUT_STATEID_SET, &lo->state); | ||
| 263 | write_sequnlock(&lo->seqlock); | ||
| 252 | 264 | ||
| 253 | dprintk("%s:Return\n", __func__); | 265 | dprintk("%s:Return\n", __func__); |
| 254 | } | 266 | } |
| @@ -307,40 +319,135 @@ pnfs_destroy_all_layouts(struct nfs_client *clp) | |||
| 307 | } | 319 | } |
| 308 | } | 320 | } |
| 309 | 321 | ||
| 310 | static void pnfs_insert_layout(struct pnfs_layout_hdr *lo, | 322 | /* update lo->stateid with new if is more recent |
| 311 | struct pnfs_layout_segment *lseg); | 323 | * |
| 324 | * lo->stateid could be the open stateid, in which case we just use what given. | ||
| 325 | */ | ||
| 326 | static void | ||
| 327 | pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, | ||
| 328 | const nfs4_stateid *new) | ||
| 329 | { | ||
| 330 | nfs4_stateid *old = &lo->stateid; | ||
| 331 | bool overwrite = false; | ||
| 332 | |||
| 333 | write_seqlock(&lo->seqlock); | ||
| 334 | if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state) || | ||
| 335 | memcmp(old->stateid.other, new->stateid.other, sizeof(new->stateid.other))) | ||
| 336 | overwrite = true; | ||
| 337 | else { | ||
| 338 | u32 oldseq, newseq; | ||
| 339 | |||
| 340 | oldseq = be32_to_cpu(old->stateid.seqid); | ||
| 341 | newseq = be32_to_cpu(new->stateid.seqid); | ||
| 342 | if ((int)(newseq - oldseq) > 0) | ||
| 343 | overwrite = true; | ||
| 344 | } | ||
| 345 | if (overwrite) | ||
| 346 | memcpy(&old->stateid, &new->stateid, sizeof(new->stateid)); | ||
| 347 | write_sequnlock(&lo->seqlock); | ||
| 348 | } | ||
| 349 | |||
| 350 | static void | ||
| 351 | pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo, | ||
| 352 | struct nfs4_state *state) | ||
| 353 | { | ||
| 354 | int seq; | ||
| 355 | |||
| 356 | dprintk("--> %s\n", __func__); | ||
| 357 | write_seqlock(&lo->seqlock); | ||
| 358 | do { | ||
| 359 | seq = read_seqbegin(&state->seqlock); | ||
| 360 | memcpy(lo->stateid.data, state->stateid.data, | ||
| 361 | sizeof(state->stateid.data)); | ||
| 362 | } while (read_seqretry(&state->seqlock, seq)); | ||
| 363 | set_bit(NFS_LAYOUT_STATEID_SET, &lo->state); | ||
| 364 | write_sequnlock(&lo->seqlock); | ||
| 365 | dprintk("<-- %s\n", __func__); | ||
| 366 | } | ||
| 367 | |||
| 368 | void | ||
| 369 | pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, | ||
| 370 | struct nfs4_state *open_state) | ||
| 371 | { | ||
| 372 | int seq; | ||
| 312 | 373 | ||
| 313 | /* Get layout from server. */ | 374 | dprintk("--> %s\n", __func__); |
| 375 | do { | ||
| 376 | seq = read_seqbegin(&lo->seqlock); | ||
| 377 | if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state)) { | ||
| 378 | /* This will trigger retry of the read */ | ||
| 379 | pnfs_layout_from_open_stateid(lo, open_state); | ||
| 380 | } else | ||
| 381 | memcpy(dst->data, lo->stateid.data, | ||
| 382 | sizeof(lo->stateid.data)); | ||
| 383 | } while (read_seqretry(&lo->seqlock, seq)); | ||
| 384 | dprintk("<-- %s\n", __func__); | ||
| 385 | } | ||
| 386 | |||
| 387 | /* | ||
| 388 | * Get layout from server. | ||
| 389 | * for now, assume that whole file layouts are requested. | ||
| 390 | * arg->offset: 0 | ||
| 391 | * arg->length: all ones | ||
| 392 | */ | ||
| 314 | static struct pnfs_layout_segment * | 393 | static struct pnfs_layout_segment * |
| 315 | send_layoutget(struct pnfs_layout_hdr *lo, | 394 | send_layoutget(struct pnfs_layout_hdr *lo, |
| 316 | struct nfs_open_context *ctx, | 395 | struct nfs_open_context *ctx, |
| 317 | u32 iomode) | 396 | u32 iomode) |
| 318 | { | 397 | { |
| 319 | struct inode *ino = lo->inode; | 398 | struct inode *ino = lo->inode; |
| 320 | struct pnfs_layout_segment *lseg; | 399 | struct nfs_server *server = NFS_SERVER(ino); |
| 400 | struct nfs4_layoutget *lgp; | ||
| 401 | struct pnfs_layout_segment *lseg = NULL; | ||
| 402 | |||
| 403 | dprintk("--> %s\n", __func__); | ||
| 321 | 404 | ||
| 322 | /* Lets pretend we sent LAYOUTGET and got a response */ | 405 | BUG_ON(ctx == NULL); |
| 323 | lseg = kzalloc(sizeof(*lseg), GFP_KERNEL); | 406 | lgp = kzalloc(sizeof(*lgp), GFP_KERNEL); |
| 407 | if (lgp == NULL) { | ||
| 408 | put_layout_hdr(lo->inode); | ||
| 409 | return NULL; | ||
| 410 | } | ||
| 411 | lgp->args.minlength = NFS4_MAX_UINT64; | ||
| 412 | lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; | ||
| 413 | lgp->args.range.iomode = iomode; | ||
| 414 | lgp->args.range.offset = 0; | ||
| 415 | lgp->args.range.length = NFS4_MAX_UINT64; | ||
| 416 | lgp->args.type = server->pnfs_curr_ld->id; | ||
| 417 | lgp->args.inode = ino; | ||
| 418 | lgp->args.ctx = get_nfs_open_context(ctx); | ||
| 419 | lgp->lsegpp = &lseg; | ||
| 420 | |||
| 421 | /* Synchronously retrieve layout information from server and | ||
| 422 | * store in lseg. | ||
| 423 | */ | ||
| 424 | nfs4_proc_layoutget(lgp); | ||
| 324 | if (!lseg) { | 425 | if (!lseg) { |
| 426 | /* remember that LAYOUTGET failed and suspend trying */ | ||
| 325 | set_bit(lo_fail_bit(iomode), &lo->state); | 427 | set_bit(lo_fail_bit(iomode), &lo->state); |
| 326 | spin_lock(&ino->i_lock); | ||
| 327 | put_layout_hdr_locked(lo); | ||
| 328 | spin_unlock(&ino->i_lock); | ||
| 329 | return NULL; | ||
| 330 | } | 428 | } |
| 331 | init_lseg(lo, lseg); | ||
| 332 | lseg->iomode = IOMODE_RW; | ||
| 333 | spin_lock(&ino->i_lock); | ||
| 334 | pnfs_insert_layout(lo, lseg); | ||
| 335 | put_layout_hdr_locked(lo); | ||
| 336 | spin_unlock(&ino->i_lock); | ||
| 337 | return lseg; | 429 | return lseg; |
| 338 | } | 430 | } |
| 339 | 431 | ||
| 432 | /* | ||
| 433 | * Compare two layout segments for sorting into layout cache. | ||
| 434 | * We want to preferentially return RW over RO layouts, so ensure those | ||
| 435 | * are seen first. | ||
| 436 | */ | ||
| 437 | static s64 | ||
| 438 | cmp_layout(u32 iomode1, u32 iomode2) | ||
| 439 | { | ||
| 440 | /* read > read/write */ | ||
| 441 | return (int)(iomode2 == IOMODE_READ) - (int)(iomode1 == IOMODE_READ); | ||
| 442 | } | ||
| 443 | |||
| 340 | static void | 444 | static void |
| 341 | pnfs_insert_layout(struct pnfs_layout_hdr *lo, | 445 | pnfs_insert_layout(struct pnfs_layout_hdr *lo, |
| 342 | struct pnfs_layout_segment *lseg) | 446 | struct pnfs_layout_segment *lseg) |
| 343 | { | 447 | { |
| 448 | struct pnfs_layout_segment *lp; | ||
| 449 | int found = 0; | ||
| 450 | |||
| 344 | dprintk("%s:Begin\n", __func__); | 451 | dprintk("%s:Begin\n", __func__); |
| 345 | 452 | ||
| 346 | assert_spin_locked(&lo->inode->i_lock); | 453 | assert_spin_locked(&lo->inode->i_lock); |
| @@ -352,19 +459,28 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo, | |||
| 352 | list_add_tail(&lo->layouts, &clp->cl_layouts); | 459 | list_add_tail(&lo->layouts, &clp->cl_layouts); |
| 353 | spin_unlock(&clp->cl_lock); | 460 | spin_unlock(&clp->cl_lock); |
| 354 | } | 461 | } |
| 355 | get_layout_hdr_locked(lo); | 462 | list_for_each_entry(lp, &lo->segs, fi_list) { |
| 356 | /* STUB - add the constructed lseg if necessary */ | 463 | if (cmp_layout(lp->range.iomode, lseg->range.iomode) > 0) |
| 357 | if (list_empty(&lo->segs)) { | 464 | continue; |
| 465 | list_add_tail(&lseg->fi_list, &lp->fi_list); | ||
| 466 | dprintk("%s: inserted lseg %p " | ||
| 467 | "iomode %d offset %llu length %llu before " | ||
| 468 | "lp %p iomode %d offset %llu length %llu\n", | ||
| 469 | __func__, lseg, lseg->range.iomode, | ||
| 470 | lseg->range.offset, lseg->range.length, | ||
| 471 | lp, lp->range.iomode, lp->range.offset, | ||
| 472 | lp->range.length); | ||
| 473 | found = 1; | ||
| 474 | break; | ||
| 475 | } | ||
| 476 | if (!found) { | ||
| 358 | list_add_tail(&lseg->fi_list, &lo->segs); | 477 | list_add_tail(&lseg->fi_list, &lo->segs); |
| 359 | dprintk("%s: inserted lseg %p iomode %d at tail\n", | 478 | dprintk("%s: inserted lseg %p " |
| 360 | __func__, lseg, lseg->iomode); | 479 | "iomode %d offset %llu length %llu at tail\n", |
| 361 | } else { | 480 | __func__, lseg, lseg->range.iomode, |
| 362 | /* There is no harm for the moment in calling this | 481 | lseg->range.offset, lseg->range.length); |
| 363 | * with the lock held, and the call will be removed | ||
| 364 | * with the STUB. | ||
| 365 | */ | ||
| 366 | put_lseg(lseg); | ||
| 367 | } | 482 | } |
| 483 | get_layout_hdr_locked(lo); | ||
| 368 | 484 | ||
| 369 | dprintk("%s:Return\n", __func__); | 485 | dprintk("%s:Return\n", __func__); |
| 370 | } | 486 | } |
| @@ -380,6 +496,7 @@ alloc_init_layout_hdr(struct inode *ino) | |||
| 380 | lo->refcount = 1; | 496 | lo->refcount = 1; |
| 381 | INIT_LIST_HEAD(&lo->layouts); | 497 | INIT_LIST_HEAD(&lo->layouts); |
| 382 | INIT_LIST_HEAD(&lo->segs); | 498 | INIT_LIST_HEAD(&lo->segs); |
| 499 | seqlock_init(&lo->seqlock); | ||
| 383 | lo->inode = ino; | 500 | lo->inode = ino; |
| 384 | return lo; | 501 | return lo; |
| 385 | } | 502 | } |
| @@ -407,11 +524,46 @@ pnfs_find_alloc_layout(struct inode *ino) | |||
| 407 | return nfsi->layout; | 524 | return nfsi->layout; |
| 408 | } | 525 | } |
| 409 | 526 | ||
| 410 | /* STUB - LAYOUTGET never succeeds, so cache is empty */ | 527 | /* |
| 528 | * iomode matching rules: | ||
| 529 | * iomode lseg match | ||
| 530 | * ----- ----- ----- | ||
| 531 | * ANY READ true | ||
| 532 | * ANY RW true | ||
| 533 | * RW READ false | ||
| 534 | * RW RW true | ||
| 535 | * READ READ true | ||
| 536 | * READ RW true | ||
| 537 | */ | ||
| 538 | static int | ||
| 539 | is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode) | ||
| 540 | { | ||
| 541 | return (iomode != IOMODE_RW || lseg->range.iomode == IOMODE_RW); | ||
| 542 | } | ||
| 543 | |||
| 544 | /* | ||
| 545 | * lookup range in layout | ||
| 546 | */ | ||
| 411 | static struct pnfs_layout_segment * | 547 | static struct pnfs_layout_segment * |
| 412 | pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode) | 548 | pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode) |
| 413 | { | 549 | { |
| 414 | return NULL; | 550 | struct pnfs_layout_segment *lseg, *ret = NULL; |
| 551 | |||
| 552 | dprintk("%s:Begin\n", __func__); | ||
| 553 | |||
| 554 | assert_spin_locked(&lo->inode->i_lock); | ||
| 555 | list_for_each_entry(lseg, &lo->segs, fi_list) { | ||
| 556 | if (is_matching_lseg(lseg, iomode)) { | ||
| 557 | ret = lseg; | ||
| 558 | break; | ||
| 559 | } | ||
| 560 | if (cmp_layout(iomode, lseg->range.iomode) > 0) | ||
| 561 | break; | ||
| 562 | } | ||
| 563 | |||
| 564 | dprintk("%s:Return lseg %p ref %d\n", | ||
| 565 | __func__, ret, ret ? atomic_read(&ret->kref.refcount) : 0); | ||
| 566 | return ret; | ||
| 415 | } | 567 | } |
| 416 | 568 | ||
| 417 | /* | 569 | /* |
| @@ -448,7 +600,7 @@ pnfs_update_layout(struct inode *ino, | |||
| 448 | if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state)) | 600 | if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state)) |
| 449 | goto out_unlock; | 601 | goto out_unlock; |
| 450 | 602 | ||
| 451 | get_layout_hdr_locked(lo); | 603 | get_layout_hdr_locked(lo); /* Matched in nfs4_layoutget_release */ |
| 452 | spin_unlock(&ino->i_lock); | 604 | spin_unlock(&ino->i_lock); |
| 453 | 605 | ||
| 454 | lseg = send_layoutget(lo, ctx, iomode); | 606 | lseg = send_layoutget(lo, ctx, iomode); |
| @@ -460,3 +612,172 @@ out_unlock: | |||
| 460 | spin_unlock(&ino->i_lock); | 612 | spin_unlock(&ino->i_lock); |
| 461 | goto out; | 613 | goto out; |
| 462 | } | 614 | } |
| 615 | |||
| 616 | int | ||
| 617 | pnfs_layout_process(struct nfs4_layoutget *lgp) | ||
| 618 | { | ||
| 619 | struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout; | ||
| 620 | struct nfs4_layoutget_res *res = &lgp->res; | ||
| 621 | struct pnfs_layout_segment *lseg; | ||
| 622 | struct inode *ino = lo->inode; | ||
| 623 | int status = 0; | ||
| 624 | |||
| 625 | /* Inject layout blob into I/O device driver */ | ||
| 626 | lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res); | ||
| 627 | if (!lseg || IS_ERR(lseg)) { | ||
| 628 | if (!lseg) | ||
| 629 | status = -ENOMEM; | ||
| 630 | else | ||
| 631 | status = PTR_ERR(lseg); | ||
| 632 | dprintk("%s: Could not allocate layout: error %d\n", | ||
| 633 | __func__, status); | ||
| 634 | goto out; | ||
| 635 | } | ||
| 636 | |||
| 637 | spin_lock(&ino->i_lock); | ||
| 638 | init_lseg(lo, lseg); | ||
| 639 | lseg->range = res->range; | ||
| 640 | *lgp->lsegpp = lseg; | ||
| 641 | pnfs_insert_layout(lo, lseg); | ||
| 642 | |||
| 643 | /* Done processing layoutget. Set the layout stateid */ | ||
| 644 | pnfs_set_layout_stateid(lo, &res->stateid); | ||
| 645 | spin_unlock(&ino->i_lock); | ||
| 646 | out: | ||
| 647 | return status; | ||
| 648 | } | ||
| 649 | |||
| 650 | /* | ||
| 651 | * Device ID cache. Currently supports one layout type per struct nfs_client. | ||
| 652 | * Add layout type to the lookup key to expand to support multiple types. | ||
| 653 | */ | ||
| 654 | int | ||
| 655 | pnfs_alloc_init_deviceid_cache(struct nfs_client *clp, | ||
| 656 | void (*free_callback)(struct pnfs_deviceid_node *)) | ||
| 657 | { | ||
| 658 | struct pnfs_deviceid_cache *c; | ||
| 659 | |||
| 660 | c = kzalloc(sizeof(struct pnfs_deviceid_cache), GFP_KERNEL); | ||
| 661 | if (!c) | ||
| 662 | return -ENOMEM; | ||
| 663 | spin_lock(&clp->cl_lock); | ||
| 664 | if (clp->cl_devid_cache != NULL) { | ||
| 665 | atomic_inc(&clp->cl_devid_cache->dc_ref); | ||
| 666 | dprintk("%s [kref [%d]]\n", __func__, | ||
| 667 | atomic_read(&clp->cl_devid_cache->dc_ref)); | ||
| 668 | kfree(c); | ||
| 669 | } else { | ||
| 670 | /* kzalloc initializes hlists */ | ||
| 671 | spin_lock_init(&c->dc_lock); | ||
| 672 | atomic_set(&c->dc_ref, 1); | ||
| 673 | c->dc_free_callback = free_callback; | ||
| 674 | clp->cl_devid_cache = c; | ||
| 675 | dprintk("%s [new]\n", __func__); | ||
| 676 | } | ||
| 677 | spin_unlock(&clp->cl_lock); | ||
| 678 | return 0; | ||
| 679 | } | ||
| 680 | EXPORT_SYMBOL_GPL(pnfs_alloc_init_deviceid_cache); | ||
| 681 | |||
| 682 | /* | ||
| 683 | * Called from pnfs_layoutdriver_type->free_lseg | ||
| 684 | * last layout segment reference frees deviceid | ||
| 685 | */ | ||
| 686 | void | ||
| 687 | pnfs_put_deviceid(struct pnfs_deviceid_cache *c, | ||
| 688 | struct pnfs_deviceid_node *devid) | ||
| 689 | { | ||
| 690 | struct nfs4_deviceid *id = &devid->de_id; | ||
| 691 | struct pnfs_deviceid_node *d; | ||
| 692 | struct hlist_node *n; | ||
| 693 | long h = nfs4_deviceid_hash(id); | ||
| 694 | |||
| 695 | dprintk("%s [%d]\n", __func__, atomic_read(&devid->de_ref)); | ||
| 696 | if (!atomic_dec_and_lock(&devid->de_ref, &c->dc_lock)) | ||
| 697 | return; | ||
| 698 | |||
| 699 | hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[h], de_node) | ||
| 700 | if (!memcmp(&d->de_id, id, sizeof(*id))) { | ||
| 701 | hlist_del_rcu(&d->de_node); | ||
| 702 | spin_unlock(&c->dc_lock); | ||
| 703 | synchronize_rcu(); | ||
| 704 | c->dc_free_callback(devid); | ||
| 705 | return; | ||
| 706 | } | ||
| 707 | spin_unlock(&c->dc_lock); | ||
| 708 | /* Why wasn't it found in the list? */ | ||
| 709 | BUG(); | ||
| 710 | } | ||
| 711 | EXPORT_SYMBOL_GPL(pnfs_put_deviceid); | ||
| 712 | |||
| 713 | /* Find and reference a deviceid */ | ||
| 714 | struct pnfs_deviceid_node * | ||
| 715 | pnfs_find_get_deviceid(struct pnfs_deviceid_cache *c, struct nfs4_deviceid *id) | ||
| 716 | { | ||
| 717 | struct pnfs_deviceid_node *d; | ||
| 718 | struct hlist_node *n; | ||
| 719 | long hash = nfs4_deviceid_hash(id); | ||
| 720 | |||
| 721 | dprintk("--> %s hash %ld\n", __func__, hash); | ||
| 722 | rcu_read_lock(); | ||
| 723 | hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) { | ||
| 724 | if (!memcmp(&d->de_id, id, sizeof(*id))) { | ||
| 725 | if (!atomic_inc_not_zero(&d->de_ref)) { | ||
| 726 | goto fail; | ||
| 727 | } else { | ||
| 728 | rcu_read_unlock(); | ||
| 729 | return d; | ||
| 730 | } | ||
| 731 | } | ||
| 732 | } | ||
| 733 | fail: | ||
| 734 | rcu_read_unlock(); | ||
| 735 | return NULL; | ||
| 736 | } | ||
| 737 | EXPORT_SYMBOL_GPL(pnfs_find_get_deviceid); | ||
| 738 | |||
| 739 | /* | ||
| 740 | * Add a deviceid to the cache. | ||
| 741 | * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new | ||
| 742 | */ | ||
| 743 | struct pnfs_deviceid_node * | ||
| 744 | pnfs_add_deviceid(struct pnfs_deviceid_cache *c, struct pnfs_deviceid_node *new) | ||
| 745 | { | ||
| 746 | struct pnfs_deviceid_node *d; | ||
| 747 | long hash = nfs4_deviceid_hash(&new->de_id); | ||
| 748 | |||
| 749 | dprintk("--> %s hash %ld\n", __func__, hash); | ||
| 750 | spin_lock(&c->dc_lock); | ||
| 751 | d = pnfs_find_get_deviceid(c, &new->de_id); | ||
| 752 | if (d) { | ||
| 753 | spin_unlock(&c->dc_lock); | ||
| 754 | dprintk("%s [discard]\n", __func__); | ||
| 755 | c->dc_free_callback(new); | ||
| 756 | return d; | ||
| 757 | } | ||
| 758 | INIT_HLIST_NODE(&new->de_node); | ||
| 759 | atomic_set(&new->de_ref, 1); | ||
| 760 | hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]); | ||
| 761 | spin_unlock(&c->dc_lock); | ||
| 762 | dprintk("%s [new]\n", __func__); | ||
| 763 | return new; | ||
| 764 | } | ||
| 765 | EXPORT_SYMBOL_GPL(pnfs_add_deviceid); | ||
| 766 | |||
| 767 | void | ||
| 768 | pnfs_put_deviceid_cache(struct nfs_client *clp) | ||
| 769 | { | ||
| 770 | struct pnfs_deviceid_cache *local = clp->cl_devid_cache; | ||
| 771 | |||
| 772 | dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache); | ||
| 773 | if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) { | ||
| 774 | int i; | ||
| 775 | /* Verify cache is empty */ | ||
| 776 | for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++) | ||
| 777 | BUG_ON(!hlist_empty(&local->dc_deviceids[i])); | ||
| 778 | clp->cl_devid_cache = NULL; | ||
| 779 | spin_unlock(&clp->cl_lock); | ||
| 780 | kfree(local); | ||
| 781 | } | ||
| 782 | } | ||
| 783 | EXPORT_SYMBOL_GPL(pnfs_put_deviceid_cache); | ||
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 1c3eb02f4944..cbba28cb02a7 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
| @@ -32,7 +32,7 @@ | |||
| 32 | 32 | ||
| 33 | struct pnfs_layout_segment { | 33 | struct pnfs_layout_segment { |
| 34 | struct list_head fi_list; | 34 | struct list_head fi_list; |
| 35 | u32 iomode; | 35 | struct pnfs_layout_range range; |
| 36 | struct kref kref; | 36 | struct kref kref; |
| 37 | struct pnfs_layout_hdr *layout; | 37 | struct pnfs_layout_hdr *layout; |
| 38 | }; | 38 | }; |
| @@ -44,6 +44,7 @@ struct pnfs_layout_segment { | |||
| 44 | enum { | 44 | enum { |
| 45 | NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */ | 45 | NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */ |
| 46 | NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ | 46 | NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ |
| 47 | NFS_LAYOUT_STATEID_SET, /* have a valid layout stateid */ | ||
| 47 | }; | 48 | }; |
| 48 | 49 | ||
| 49 | /* Per-layout driver specific registration structure */ | 50 | /* Per-layout driver specific registration structure */ |
| @@ -54,26 +55,96 @@ struct pnfs_layoutdriver_type { | |||
| 54 | struct module *owner; | 55 | struct module *owner; |
| 55 | int (*initialize_mountpoint) (struct nfs_server *); | 56 | int (*initialize_mountpoint) (struct nfs_server *); |
| 56 | int (*uninitialize_mountpoint) (struct nfs_server *); | 57 | int (*uninitialize_mountpoint) (struct nfs_server *); |
| 58 | struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr); | ||
| 59 | void (*free_lseg) (struct pnfs_layout_segment *lseg); | ||
| 57 | }; | 60 | }; |
| 58 | 61 | ||
| 59 | struct pnfs_layout_hdr { | 62 | struct pnfs_layout_hdr { |
| 60 | unsigned long refcount; | 63 | unsigned long refcount; |
| 61 | struct list_head layouts; /* other client layouts */ | 64 | struct list_head layouts; /* other client layouts */ |
| 62 | struct list_head segs; /* layout segments list */ | 65 | struct list_head segs; /* layout segments list */ |
| 66 | seqlock_t seqlock; /* Protects the stateid */ | ||
| 67 | nfs4_stateid stateid; | ||
| 63 | unsigned long state; | 68 | unsigned long state; |
| 64 | struct inode *inode; | 69 | struct inode *inode; |
| 65 | }; | 70 | }; |
| 66 | 71 | ||
| 72 | struct pnfs_device { | ||
| 73 | struct nfs4_deviceid dev_id; | ||
| 74 | unsigned int layout_type; | ||
| 75 | unsigned int mincount; | ||
| 76 | struct page **pages; | ||
| 77 | void *area; | ||
| 78 | unsigned int pgbase; | ||
| 79 | unsigned int pglen; | ||
| 80 | }; | ||
| 81 | |||
| 82 | /* | ||
| 83 | * Device ID RCU cache. A device ID is unique per client ID and layout type. | ||
| 84 | */ | ||
| 85 | #define NFS4_DEVICE_ID_HASH_BITS 5 | ||
| 86 | #define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS) | ||
| 87 | #define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1) | ||
| 88 | |||
| 89 | static inline u32 | ||
| 90 | nfs4_deviceid_hash(struct nfs4_deviceid *id) | ||
| 91 | { | ||
| 92 | unsigned char *cptr = (unsigned char *)id->data; | ||
| 93 | unsigned int nbytes = NFS4_DEVICEID4_SIZE; | ||
| 94 | u32 x = 0; | ||
| 95 | |||
| 96 | while (nbytes--) { | ||
| 97 | x *= 37; | ||
| 98 | x += *cptr++; | ||
| 99 | } | ||
| 100 | return x & NFS4_DEVICE_ID_HASH_MASK; | ||
| 101 | } | ||
| 102 | |||
| 103 | struct pnfs_deviceid_node { | ||
| 104 | struct hlist_node de_node; | ||
| 105 | struct nfs4_deviceid de_id; | ||
| 106 | atomic_t de_ref; | ||
| 107 | }; | ||
| 108 | |||
| 109 | struct pnfs_deviceid_cache { | ||
| 110 | spinlock_t dc_lock; | ||
| 111 | atomic_t dc_ref; | ||
| 112 | void (*dc_free_callback)(struct pnfs_deviceid_node *); | ||
| 113 | struct hlist_head dc_deviceids[NFS4_DEVICE_ID_HASH_SIZE]; | ||
| 114 | }; | ||
| 115 | |||
| 116 | extern int pnfs_alloc_init_deviceid_cache(struct nfs_client *, | ||
| 117 | void (*free_callback)(struct pnfs_deviceid_node *)); | ||
| 118 | extern void pnfs_put_deviceid_cache(struct nfs_client *); | ||
| 119 | extern struct pnfs_deviceid_node *pnfs_find_get_deviceid( | ||
| 120 | struct pnfs_deviceid_cache *, | ||
| 121 | struct nfs4_deviceid *); | ||
| 122 | extern struct pnfs_deviceid_node *pnfs_add_deviceid( | ||
| 123 | struct pnfs_deviceid_cache *, | ||
| 124 | struct pnfs_deviceid_node *); | ||
| 125 | extern void pnfs_put_deviceid(struct pnfs_deviceid_cache *c, | ||
| 126 | struct pnfs_deviceid_node *devid); | ||
| 127 | |||
| 67 | extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); | 128 | extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); |
| 68 | extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); | 129 | extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); |
| 69 | 130 | ||
| 131 | /* nfs4proc.c */ | ||
| 132 | extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, | ||
| 133 | struct pnfs_device *dev); | ||
| 134 | extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); | ||
| 135 | |||
| 136 | /* pnfs.c */ | ||
| 70 | struct pnfs_layout_segment * | 137 | struct pnfs_layout_segment * |
| 71 | pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, | 138 | pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, |
| 72 | enum pnfs_iomode access_type); | 139 | enum pnfs_iomode access_type); |
| 73 | void set_pnfs_layoutdriver(struct nfs_server *, u32 id); | 140 | void set_pnfs_layoutdriver(struct nfs_server *, u32 id); |
| 74 | void unset_pnfs_layoutdriver(struct nfs_server *); | 141 | void unset_pnfs_layoutdriver(struct nfs_server *); |
| 142 | int pnfs_layout_process(struct nfs4_layoutget *lgp); | ||
| 75 | void pnfs_destroy_layout(struct nfs_inode *); | 143 | void pnfs_destroy_layout(struct nfs_inode *); |
| 76 | void pnfs_destroy_all_layouts(struct nfs_client *); | 144 | void pnfs_destroy_all_layouts(struct nfs_client *); |
| 145 | void put_layout_hdr(struct inode *inode); | ||
| 146 | void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, | ||
| 147 | struct nfs4_state *open_state); | ||
| 77 | 148 | ||
| 78 | 149 | ||
| 79 | static inline int lo_fail_bit(u32 iomode) | 150 | static inline int lo_fail_bit(u32 iomode) |
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 34da32436ac0..a9683d6acaa4 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h | |||
| @@ -545,6 +545,8 @@ enum { | |||
| 545 | NFSPROC4_CLNT_SEQUENCE, | 545 | NFSPROC4_CLNT_SEQUENCE, |
| 546 | NFSPROC4_CLNT_GET_LEASE_TIME, | 546 | NFSPROC4_CLNT_GET_LEASE_TIME, |
| 547 | NFSPROC4_CLNT_RECLAIM_COMPLETE, | 547 | NFSPROC4_CLNT_RECLAIM_COMPLETE, |
| 548 | NFSPROC4_CLNT_LAYOUTGET, | ||
| 549 | NFSPROC4_CLNT_GETDEVICEINFO, | ||
| 548 | }; | 550 | }; |
| 549 | 551 | ||
| 550 | /* nfs41 types */ | 552 | /* nfs41 types */ |
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 4d62f1581ed1..452d96436d26 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h | |||
| @@ -83,6 +83,7 @@ struct nfs_client { | |||
| 83 | u32 cl_exchange_flags; | 83 | u32 cl_exchange_flags; |
| 84 | struct nfs4_session *cl_session; /* sharred session */ | 84 | struct nfs4_session *cl_session; /* sharred session */ |
| 85 | struct list_head cl_layouts; | 85 | struct list_head cl_layouts; |
| 86 | struct pnfs_deviceid_cache *cl_devid_cache; /* pNFS deviceid cache */ | ||
| 86 | #endif /* CONFIG_NFS_V4_1 */ | 87 | #endif /* CONFIG_NFS_V4_1 */ |
| 87 | 88 | ||
| 88 | #ifdef CONFIG_NFS_FSCACHE | 89 | #ifdef CONFIG_NFS_FSCACHE |
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 065f9d105d05..ba6cc8f223c9 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h | |||
| @@ -187,6 +187,55 @@ struct nfs4_get_lease_time_res { | |||
| 187 | struct nfs4_sequence_res lr_seq_res; | 187 | struct nfs4_sequence_res lr_seq_res; |
| 188 | }; | 188 | }; |
| 189 | 189 | ||
| 190 | #define PNFS_LAYOUT_MAXSIZE 4096 | ||
| 191 | |||
| 192 | struct nfs4_layoutdriver_data { | ||
| 193 | __u32 len; | ||
| 194 | void *buf; | ||
| 195 | }; | ||
| 196 | |||
| 197 | struct pnfs_layout_range { | ||
| 198 | u32 iomode; | ||
| 199 | u64 offset; | ||
| 200 | u64 length; | ||
| 201 | }; | ||
| 202 | |||
| 203 | struct nfs4_layoutget_args { | ||
| 204 | __u32 type; | ||
| 205 | struct pnfs_layout_range range; | ||
| 206 | __u64 minlength; | ||
| 207 | __u32 maxcount; | ||
| 208 | struct inode *inode; | ||
| 209 | struct nfs_open_context *ctx; | ||
| 210 | struct nfs4_sequence_args seq_args; | ||
| 211 | }; | ||
| 212 | |||
| 213 | struct nfs4_layoutget_res { | ||
| 214 | __u32 return_on_close; | ||
| 215 | struct pnfs_layout_range range; | ||
| 216 | __u32 type; | ||
| 217 | nfs4_stateid stateid; | ||
| 218 | struct nfs4_layoutdriver_data layout; | ||
| 219 | struct nfs4_sequence_res seq_res; | ||
| 220 | }; | ||
| 221 | |||
| 222 | struct nfs4_layoutget { | ||
| 223 | struct nfs4_layoutget_args args; | ||
| 224 | struct nfs4_layoutget_res res; | ||
| 225 | struct pnfs_layout_segment **lsegpp; | ||
| 226 | int status; | ||
| 227 | }; | ||
| 228 | |||
| 229 | struct nfs4_getdeviceinfo_args { | ||
| 230 | struct pnfs_device *pdev; | ||
| 231 | struct nfs4_sequence_args seq_args; | ||
| 232 | }; | ||
| 233 | |||
| 234 | struct nfs4_getdeviceinfo_res { | ||
| 235 | struct pnfs_device *pdev; | ||
| 236 | struct nfs4_sequence_res seq_res; | ||
| 237 | }; | ||
| 238 | |||
| 190 | /* | 239 | /* |
| 191 | * Arguments to the open call. | 240 | * Arguments to the open call. |
| 192 | */ | 241 | */ |
