diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-29 17:10:13 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-29 17:10:13 -0400 |
| commit | cd1acdf1723d71b28175f95b04305f1cc74ce363 (patch) | |
| tree | 7ab58883eccd314be3d8efafd59a124d4ffbb861 | |
| parent | fac04863cef53a69830590b2e1c54345068a9747 (diff) | |
| parent | 9342077011d54f42fa1b88b7bc1f7008dcf5fff9 (diff) | |
Merge branch 'pnfs-submit' of git://git.open-osd.org/linux-open-osd
* 'pnfs-submit' of git://git.open-osd.org/linux-open-osd: (32 commits)
pnfs-obj: pg_test check for max_io_size
NFSv4.1: define nfs_generic_pg_test
NFSv4.1: use pnfs_generic_pg_test directly by layout driver
NFSv4.1: change pg_test return type to bool
NFSv4.1: unify pnfs_pageio_init functions
pnfs-obj: objlayout_encode_layoutcommit implementation
pnfs: encode_layoutcommit
pnfs-obj: report errors and .encode_layoutreturn Implementation.
pnfs: encode_layoutreturn
pnfs: layoutret_on_setattr
pnfs: layoutreturn
pnfs-obj: osd raid engine read/write implementation
pnfs: support for non-rpc layout drivers
pnfs-obj: define per-inode private structure
pnfs: alloc and free layout_hdr layoutdriver methods
pnfs-obj: objio_osd device information retrieval and caching
pnfs-obj: decode layout, alloc/free lseg
pnfs-obj: pnfs_osd XDR client implementation
pnfs-obj: pnfs_osd XDR definitions
pnfs-obj: objlayoutdriver module skeleton
...
32 files changed, 3907 insertions, 279 deletions
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index ba306658a6db..81515545ba75 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig | |||
| @@ -87,6 +87,16 @@ config NFS_V4_1 | |||
| 87 | config PNFS_FILE_LAYOUT | 87 | config PNFS_FILE_LAYOUT |
| 88 | tristate | 88 | tristate |
| 89 | 89 | ||
| 90 | config PNFS_OBJLAYOUT | ||
| 91 | tristate "Provide support for the pNFS Objects Layout Driver for NFSv4.1 pNFS (EXPERIMENTAL)" | ||
| 92 | depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD | ||
| 93 | help | ||
| 94 | Say M here if you want your pNFS client to support the Objects Layout Driver. | ||
| 95 | Requires the SCSI osd initiator library (SCSI_OSD_INITIATOR) and | ||
| 96 | upper level driver (SCSI_OSD_ULD). | ||
| 97 | |||
| 98 | If unsure, say N. | ||
| 99 | |||
| 90 | config ROOT_NFS | 100 | config ROOT_NFS |
| 91 | bool "Root file system on NFS" | 101 | bool "Root file system on NFS" |
| 92 | depends on NFS_FS=y && IP_PNP | 102 | depends on NFS_FS=y && IP_PNP |
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 4776ff9e3814..6a34f7dd0e6f 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile | |||
| @@ -15,9 +15,11 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ | |||
| 15 | delegation.o idmap.o \ | 15 | delegation.o idmap.o \ |
| 16 | callback.o callback_xdr.o callback_proc.o \ | 16 | callback.o callback_xdr.o callback_proc.o \ |
| 17 | nfs4namespace.o | 17 | nfs4namespace.o |
| 18 | nfs-$(CONFIG_NFS_V4_1) += pnfs.o | 18 | nfs-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o |
| 19 | nfs-$(CONFIG_SYSCTL) += sysctl.o | 19 | nfs-$(CONFIG_SYSCTL) += sysctl.o |
| 20 | nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o | 20 | nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o |
| 21 | 21 | ||
| 22 | obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o | 22 | obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o |
| 23 | nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o | 23 | nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o |
| 24 | |||
| 25 | obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/ | ||
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index 46d93ce7311b..b257383bb565 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h | |||
| @@ -167,6 +167,23 @@ extern unsigned nfs4_callback_layoutrecall( | |||
| 167 | 167 | ||
| 168 | extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses); | 168 | extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses); |
| 169 | extern void nfs4_cb_take_slot(struct nfs_client *clp); | 169 | extern void nfs4_cb_take_slot(struct nfs_client *clp); |
| 170 | |||
| 171 | struct cb_devicenotifyitem { | ||
| 172 | uint32_t cbd_notify_type; | ||
| 173 | uint32_t cbd_layout_type; | ||
| 174 | struct nfs4_deviceid cbd_dev_id; | ||
| 175 | uint32_t cbd_immediate; | ||
| 176 | }; | ||
| 177 | |||
| 178 | struct cb_devicenotifyargs { | ||
| 179 | int ndevs; | ||
| 180 | struct cb_devicenotifyitem *devs; | ||
| 181 | }; | ||
| 182 | |||
| 183 | extern __be32 nfs4_callback_devicenotify( | ||
| 184 | struct cb_devicenotifyargs *args, | ||
| 185 | void *dummy, struct cb_process_state *cps); | ||
| 186 | |||
| 170 | #endif /* CONFIG_NFS_V4_1 */ | 187 | #endif /* CONFIG_NFS_V4_1 */ |
| 171 | extern int check_gss_callback_principal(struct nfs_client *, struct svc_rqst *); | 188 | extern int check_gss_callback_principal(struct nfs_client *, struct svc_rqst *); |
| 172 | extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, | 189 | extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, |
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 2f41dccea18e..d4d1954e9bb9 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c | |||
| @@ -139,7 +139,7 @@ static u32 initiate_file_draining(struct nfs_client *clp, | |||
| 139 | spin_lock(&ino->i_lock); | 139 | spin_lock(&ino->i_lock); |
| 140 | if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || | 140 | if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || |
| 141 | mark_matching_lsegs_invalid(lo, &free_me_list, | 141 | mark_matching_lsegs_invalid(lo, &free_me_list, |
| 142 | args->cbl_range.iomode)) | 142 | &args->cbl_range)) |
| 143 | rv = NFS4ERR_DELAY; | 143 | rv = NFS4ERR_DELAY; |
| 144 | else | 144 | else |
| 145 | rv = NFS4ERR_NOMATCHING_LAYOUT; | 145 | rv = NFS4ERR_NOMATCHING_LAYOUT; |
| @@ -184,7 +184,7 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, | |||
| 184 | ino = lo->plh_inode; | 184 | ino = lo->plh_inode; |
| 185 | spin_lock(&ino->i_lock); | 185 | spin_lock(&ino->i_lock); |
| 186 | set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); | 186 | set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); |
| 187 | if (mark_matching_lsegs_invalid(lo, &free_me_list, range.iomode)) | 187 | if (mark_matching_lsegs_invalid(lo, &free_me_list, &range)) |
| 188 | rv = NFS4ERR_DELAY; | 188 | rv = NFS4ERR_DELAY; |
| 189 | list_del_init(&lo->plh_bulk_recall); | 189 | list_del_init(&lo->plh_bulk_recall); |
| 190 | spin_unlock(&ino->i_lock); | 190 | spin_unlock(&ino->i_lock); |
| @@ -241,6 +241,53 @@ static void pnfs_recall_all_layouts(struct nfs_client *clp) | |||
| 241 | do_callback_layoutrecall(clp, &args); | 241 | do_callback_layoutrecall(clp, &args); |
| 242 | } | 242 | } |
| 243 | 243 | ||
| 244 | __be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args, | ||
| 245 | void *dummy, struct cb_process_state *cps) | ||
| 246 | { | ||
| 247 | int i; | ||
| 248 | __be32 res = 0; | ||
| 249 | struct nfs_client *clp = cps->clp; | ||
| 250 | struct nfs_server *server = NULL; | ||
| 251 | |||
| 252 | dprintk("%s: -->\n", __func__); | ||
| 253 | |||
| 254 | if (!clp) { | ||
| 255 | res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION); | ||
| 256 | goto out; | ||
| 257 | } | ||
| 258 | |||
| 259 | for (i = 0; i < args->ndevs; i++) { | ||
| 260 | struct cb_devicenotifyitem *dev = &args->devs[i]; | ||
| 261 | |||
| 262 | if (!server || | ||
| 263 | server->pnfs_curr_ld->id != dev->cbd_layout_type) { | ||
| 264 | rcu_read_lock(); | ||
| 265 | list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) | ||
| 266 | if (server->pnfs_curr_ld && | ||
| 267 | server->pnfs_curr_ld->id == dev->cbd_layout_type) { | ||
| 268 | rcu_read_unlock(); | ||
| 269 | goto found; | ||
| 270 | } | ||
| 271 | rcu_read_unlock(); | ||
| 272 | dprintk("%s: layout type %u not found\n", | ||
| 273 | __func__, dev->cbd_layout_type); | ||
| 274 | continue; | ||
| 275 | } | ||
| 276 | |||
| 277 | found: | ||
| 278 | if (dev->cbd_notify_type == NOTIFY_DEVICEID4_CHANGE) | ||
| 279 | dprintk("%s: NOTIFY_DEVICEID4_CHANGE not supported, " | ||
| 280 | "deleting instead\n", __func__); | ||
| 281 | nfs4_delete_deviceid(server->pnfs_curr_ld, clp, &dev->cbd_dev_id); | ||
| 282 | } | ||
| 283 | |||
| 284 | out: | ||
| 285 | kfree(args->devs); | ||
| 286 | dprintk("%s: exit with status = %u\n", | ||
| 287 | __func__, be32_to_cpu(res)); | ||
| 288 | return res; | ||
| 289 | } | ||
| 290 | |||
| 244 | int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid) | 291 | int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid) |
| 245 | { | 292 | { |
| 246 | if (delegation == NULL) | 293 | if (delegation == NULL) |
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 00ecf62ce7c1..c6c86a77e043 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c | |||
| @@ -25,6 +25,7 @@ | |||
| 25 | 25 | ||
| 26 | #if defined(CONFIG_NFS_V4_1) | 26 | #if defined(CONFIG_NFS_V4_1) |
| 27 | #define CB_OP_LAYOUTRECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) | 27 | #define CB_OP_LAYOUTRECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) |
| 28 | #define CB_OP_DEVICENOTIFY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) | ||
| 28 | #define CB_OP_SEQUENCE_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ | 29 | #define CB_OP_SEQUENCE_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ |
| 29 | 4 + 1 + 3) | 30 | 4 + 1 + 3) |
| 30 | #define CB_OP_RECALLANY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) | 31 | #define CB_OP_RECALLANY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) |
| @@ -284,6 +285,93 @@ out: | |||
| 284 | return status; | 285 | return status; |
| 285 | } | 286 | } |
| 286 | 287 | ||
| 288 | static | ||
| 289 | __be32 decode_devicenotify_args(struct svc_rqst *rqstp, | ||
| 290 | struct xdr_stream *xdr, | ||
| 291 | struct cb_devicenotifyargs *args) | ||
| 292 | { | ||
| 293 | __be32 *p; | ||
| 294 | __be32 status = 0; | ||
| 295 | u32 tmp; | ||
| 296 | int n, i; | ||
| 297 | args->ndevs = 0; | ||
| 298 | |||
| 299 | /* Num of device notifications */ | ||
| 300 | p = read_buf(xdr, sizeof(uint32_t)); | ||
| 301 | if (unlikely(p == NULL)) { | ||
| 302 | status = htonl(NFS4ERR_BADXDR); | ||
| 303 | goto out; | ||
| 304 | } | ||
| 305 | n = ntohl(*p++); | ||
| 306 | if (n <= 0) | ||
| 307 | goto out; | ||
| 308 | |||
| 309 | args->devs = kmalloc(n * sizeof(*args->devs), GFP_KERNEL); | ||
| 310 | if (!args->devs) { | ||
| 311 | status = htonl(NFS4ERR_DELAY); | ||
| 312 | goto out; | ||
| 313 | } | ||
| 314 | |||
| 315 | /* Decode each dev notification */ | ||
| 316 | for (i = 0; i < n; i++) { | ||
| 317 | struct cb_devicenotifyitem *dev = &args->devs[i]; | ||
| 318 | |||
| 319 | p = read_buf(xdr, (4 * sizeof(uint32_t)) + NFS4_DEVICEID4_SIZE); | ||
| 320 | if (unlikely(p == NULL)) { | ||
| 321 | status = htonl(NFS4ERR_BADXDR); | ||
| 322 | goto err; | ||
| 323 | } | ||
| 324 | |||
| 325 | tmp = ntohl(*p++); /* bitmap size */ | ||
| 326 | if (tmp != 1) { | ||
| 327 | status = htonl(NFS4ERR_INVAL); | ||
| 328 | goto err; | ||
| 329 | } | ||
| 330 | dev->cbd_notify_type = ntohl(*p++); | ||
| 331 | if (dev->cbd_notify_type != NOTIFY_DEVICEID4_CHANGE && | ||
| 332 | dev->cbd_notify_type != NOTIFY_DEVICEID4_DELETE) { | ||
| 333 | status = htonl(NFS4ERR_INVAL); | ||
| 334 | goto err; | ||
| 335 | } | ||
| 336 | |||
| 337 | tmp = ntohl(*p++); /* opaque size */ | ||
| 338 | if (((dev->cbd_notify_type == NOTIFY_DEVICEID4_CHANGE) && | ||
| 339 | (tmp != NFS4_DEVICEID4_SIZE + 8)) || | ||
| 340 | ((dev->cbd_notify_type == NOTIFY_DEVICEID4_DELETE) && | ||
| 341 | (tmp != NFS4_DEVICEID4_SIZE + 4))) { | ||
| 342 | status = htonl(NFS4ERR_INVAL); | ||
| 343 | goto err; | ||
| 344 | } | ||
| 345 | dev->cbd_layout_type = ntohl(*p++); | ||
| 346 | memcpy(dev->cbd_dev_id.data, p, NFS4_DEVICEID4_SIZE); | ||
| 347 | p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); | ||
| 348 | |||
| 349 | if (dev->cbd_layout_type == NOTIFY_DEVICEID4_CHANGE) { | ||
| 350 | p = read_buf(xdr, sizeof(uint32_t)); | ||
| 351 | if (unlikely(p == NULL)) { | ||
| 352 | status = htonl(NFS4ERR_BADXDR); | ||
| 353 | goto err; | ||
| 354 | } | ||
| 355 | dev->cbd_immediate = ntohl(*p++); | ||
| 356 | } else { | ||
| 357 | dev->cbd_immediate = 0; | ||
| 358 | } | ||
| 359 | |||
| 360 | args->ndevs++; | ||
| 361 | |||
| 362 | dprintk("%s: type %d layout 0x%x immediate %d\n", | ||
| 363 | __func__, dev->cbd_notify_type, dev->cbd_layout_type, | ||
| 364 | dev->cbd_immediate); | ||
| 365 | } | ||
| 366 | out: | ||
| 367 | dprintk("%s: status %d ndevs %d\n", | ||
| 368 | __func__, ntohl(status), args->ndevs); | ||
| 369 | return status; | ||
| 370 | err: | ||
| 371 | kfree(args->devs); | ||
| 372 | goto out; | ||
| 373 | } | ||
| 374 | |||
| 287 | static __be32 decode_sessionid(struct xdr_stream *xdr, | 375 | static __be32 decode_sessionid(struct xdr_stream *xdr, |
| 288 | struct nfs4_sessionid *sid) | 376 | struct nfs4_sessionid *sid) |
| 289 | { | 377 | { |
| @@ -639,10 +727,10 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op) | |||
| 639 | case OP_CB_RECALL_ANY: | 727 | case OP_CB_RECALL_ANY: |
| 640 | case OP_CB_RECALL_SLOT: | 728 | case OP_CB_RECALL_SLOT: |
| 641 | case OP_CB_LAYOUTRECALL: | 729 | case OP_CB_LAYOUTRECALL: |
| 730 | case OP_CB_NOTIFY_DEVICEID: | ||
| 642 | *op = &callback_ops[op_nr]; | 731 | *op = &callback_ops[op_nr]; |
| 643 | break; | 732 | break; |
| 644 | 733 | ||
| 645 | case OP_CB_NOTIFY_DEVICEID: | ||
| 646 | case OP_CB_NOTIFY: | 734 | case OP_CB_NOTIFY: |
| 647 | case OP_CB_PUSH_DELEG: | 735 | case OP_CB_PUSH_DELEG: |
| 648 | case OP_CB_RECALLABLE_OBJ_AVAIL: | 736 | case OP_CB_RECALLABLE_OBJ_AVAIL: |
| @@ -849,6 +937,12 @@ static struct callback_op callback_ops[] = { | |||
| 849 | (callback_decode_arg_t)decode_layoutrecall_args, | 937 | (callback_decode_arg_t)decode_layoutrecall_args, |
| 850 | .res_maxsize = CB_OP_LAYOUTRECALL_RES_MAXSZ, | 938 | .res_maxsize = CB_OP_LAYOUTRECALL_RES_MAXSZ, |
| 851 | }, | 939 | }, |
| 940 | [OP_CB_NOTIFY_DEVICEID] = { | ||
| 941 | .process_op = (callback_process_op_t)nfs4_callback_devicenotify, | ||
| 942 | .decode_args = | ||
| 943 | (callback_decode_arg_t)decode_devicenotify_args, | ||
| 944 | .res_maxsize = CB_OP_DEVICENOTIFY_RES_MAXSZ, | ||
| 945 | }, | ||
| 852 | [OP_CB_SEQUENCE] = { | 946 | [OP_CB_SEQUENCE] = { |
| 853 | .process_op = (callback_process_op_t)nfs4_callback_sequence, | 947 | .process_op = (callback_process_op_t)nfs4_callback_sequence, |
| 854 | .decode_args = (callback_decode_arg_t)decode_cb_sequence_args, | 948 | .decode_args = (callback_decode_arg_t)decode_cb_sequence_args, |
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 139be9647d80..b3dc2b88b65b 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
| @@ -290,6 +290,8 @@ static void nfs_free_client(struct nfs_client *clp) | |||
| 290 | if (clp->cl_machine_cred != NULL) | 290 | if (clp->cl_machine_cred != NULL) |
| 291 | put_rpccred(clp->cl_machine_cred); | 291 | put_rpccred(clp->cl_machine_cred); |
| 292 | 292 | ||
| 293 | nfs4_deviceid_purge_client(clp); | ||
| 294 | |||
| 293 | kfree(clp->cl_hostname); | 295 | kfree(clp->cl_hostname); |
| 294 | kfree(clp); | 296 | kfree(clp); |
| 295 | 297 | ||
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 424e47773a84..ededdbd0db38 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
| @@ -512,12 +512,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en | |||
| 512 | struct page **xdr_pages, struct page *page, unsigned int buflen) | 512 | struct page **xdr_pages, struct page *page, unsigned int buflen) |
| 513 | { | 513 | { |
| 514 | struct xdr_stream stream; | 514 | struct xdr_stream stream; |
| 515 | struct xdr_buf buf = { | 515 | struct xdr_buf buf; |
| 516 | .pages = xdr_pages, | ||
| 517 | .page_len = buflen, | ||
| 518 | .buflen = buflen, | ||
| 519 | .len = buflen, | ||
| 520 | }; | ||
| 521 | struct page *scratch; | 516 | struct page *scratch; |
| 522 | struct nfs_cache_array *array; | 517 | struct nfs_cache_array *array; |
| 523 | unsigned int count = 0; | 518 | unsigned int count = 0; |
| @@ -527,7 +522,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en | |||
| 527 | if (scratch == NULL) | 522 | if (scratch == NULL) |
| 528 | return -ENOMEM; | 523 | return -ENOMEM; |
| 529 | 524 | ||
| 530 | xdr_init_decode(&stream, &buf, NULL); | 525 | xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen); |
| 531 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); | 526 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); |
| 532 | 527 | ||
| 533 | do { | 528 | do { |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 873c6fa8bc3b..144f2a3c7185 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
| @@ -1428,9 +1428,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
| 1428 | */ | 1428 | */ |
| 1429 | void nfs4_evict_inode(struct inode *inode) | 1429 | void nfs4_evict_inode(struct inode *inode) |
| 1430 | { | 1430 | { |
| 1431 | pnfs_destroy_layout(NFS_I(inode)); | ||
| 1432 | truncate_inode_pages(&inode->i_data, 0); | 1431 | truncate_inode_pages(&inode->i_data, 0); |
| 1433 | end_writeback(inode); | 1432 | end_writeback(inode); |
| 1433 | pnfs_return_layout(inode); | ||
| 1434 | pnfs_destroy_layout(NFS_I(inode)); | ||
| 1434 | /* If we are holding a delegation, return it! */ | 1435 | /* If we are holding a delegation, return it! */ |
| 1435 | nfs_inode_return_delegation_noreclaim(inode); | 1436 | nfs_inode_return_delegation_noreclaim(inode); |
| 1436 | /* First call standard NFS clear_inode() code */ | 1437 | /* First call standard NFS clear_inode() code */ |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 2df6ca7b5898..b9056cbe68d6 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
| @@ -310,6 +310,7 @@ extern int nfs_migrate_page(struct address_space *, | |||
| 310 | #endif | 310 | #endif |
| 311 | 311 | ||
| 312 | /* nfs4proc.c */ | 312 | /* nfs4proc.c */ |
| 313 | extern void __nfs4_read_done_cb(struct nfs_read_data *); | ||
| 313 | extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data); | 314 | extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data); |
| 314 | extern int nfs4_init_client(struct nfs_client *clp, | 315 | extern int nfs4_init_client(struct nfs_client *clp, |
| 315 | const struct rpc_timeout *timeparms, | 316 | const struct rpc_timeout *timeparms, |
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index be79dc9f386d..426908809c97 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c | |||
| @@ -421,6 +421,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, | |||
| 421 | struct nfs4_deviceid *id, | 421 | struct nfs4_deviceid *id, |
| 422 | gfp_t gfp_flags) | 422 | gfp_t gfp_flags) |
| 423 | { | 423 | { |
| 424 | struct nfs4_deviceid_node *d; | ||
| 424 | struct nfs4_file_layout_dsaddr *dsaddr; | 425 | struct nfs4_file_layout_dsaddr *dsaddr; |
| 425 | int status = -EINVAL; | 426 | int status = -EINVAL; |
| 426 | struct nfs_server *nfss = NFS_SERVER(lo->plh_inode); | 427 | struct nfs_server *nfss = NFS_SERVER(lo->plh_inode); |
| @@ -428,7 +429,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, | |||
| 428 | dprintk("--> %s\n", __func__); | 429 | dprintk("--> %s\n", __func__); |
| 429 | 430 | ||
| 430 | if (fl->pattern_offset > lgr->range.offset) { | 431 | if (fl->pattern_offset > lgr->range.offset) { |
| 431 | dprintk("%s pattern_offset %lld to large\n", | 432 | dprintk("%s pattern_offset %lld too large\n", |
| 432 | __func__, fl->pattern_offset); | 433 | __func__, fl->pattern_offset); |
| 433 | goto out; | 434 | goto out; |
| 434 | } | 435 | } |
| @@ -440,12 +441,14 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, | |||
| 440 | } | 441 | } |
| 441 | 442 | ||
| 442 | /* find and reference the deviceid */ | 443 | /* find and reference the deviceid */ |
| 443 | dsaddr = nfs4_fl_find_get_deviceid(id); | 444 | d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode)->pnfs_curr_ld, |
| 444 | if (dsaddr == NULL) { | 445 | NFS_SERVER(lo->plh_inode)->nfs_client, id); |
| 446 | if (d == NULL) { | ||
| 445 | dsaddr = get_device_info(lo->plh_inode, id, gfp_flags); | 447 | dsaddr = get_device_info(lo->plh_inode, id, gfp_flags); |
| 446 | if (dsaddr == NULL) | 448 | if (dsaddr == NULL) |
| 447 | goto out; | 449 | goto out; |
| 448 | } | 450 | } else |
| 451 | dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); | ||
| 449 | fl->dsaddr = dsaddr; | 452 | fl->dsaddr = dsaddr; |
| 450 | 453 | ||
| 451 | if (fl->first_stripe_index < 0 || | 454 | if (fl->first_stripe_index < 0 || |
| @@ -507,12 +510,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, | |||
| 507 | gfp_t gfp_flags) | 510 | gfp_t gfp_flags) |
| 508 | { | 511 | { |
| 509 | struct xdr_stream stream; | 512 | struct xdr_stream stream; |
| 510 | struct xdr_buf buf = { | 513 | struct xdr_buf buf; |
| 511 | .pages = lgr->layoutp->pages, | ||
| 512 | .page_len = lgr->layoutp->len, | ||
| 513 | .buflen = lgr->layoutp->len, | ||
| 514 | .len = lgr->layoutp->len, | ||
| 515 | }; | ||
| 516 | struct page *scratch; | 514 | struct page *scratch; |
| 517 | __be32 *p; | 515 | __be32 *p; |
| 518 | uint32_t nfl_util; | 516 | uint32_t nfl_util; |
| @@ -524,7 +522,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, | |||
| 524 | if (!scratch) | 522 | if (!scratch) |
| 525 | return -ENOMEM; | 523 | return -ENOMEM; |
| 526 | 524 | ||
| 527 | xdr_init_decode(&stream, &buf, NULL); | 525 | xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len); |
| 528 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); | 526 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); |
| 529 | 527 | ||
| 530 | /* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8), | 528 | /* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8), |
| @@ -535,7 +533,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, | |||
| 535 | 533 | ||
| 536 | memcpy(id, p, sizeof(*id)); | 534 | memcpy(id, p, sizeof(*id)); |
| 537 | p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); | 535 | p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); |
| 538 | print_deviceid(id); | 536 | nfs4_print_deviceid(id); |
| 539 | 537 | ||
| 540 | nfl_util = be32_to_cpup(p++); | 538 | nfl_util = be32_to_cpup(p++); |
| 541 | if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS) | 539 | if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS) |
| @@ -653,16 +651,19 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, | |||
| 653 | /* | 651 | /* |
| 654 | * filelayout_pg_test(). Called by nfs_can_coalesce_requests() | 652 | * filelayout_pg_test(). Called by nfs_can_coalesce_requests() |
| 655 | * | 653 | * |
| 656 | * return 1 : coalesce page | 654 | * return true : coalesce page |
| 657 | * return 0 : don't coalesce page | 655 | * return false : don't coalesce page |
| 658 | */ | 656 | */ |
| 659 | int | 657 | bool |
| 660 | filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | 658 | filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, |
| 661 | struct nfs_page *req) | 659 | struct nfs_page *req) |
| 662 | { | 660 | { |
| 663 | u64 p_stripe, r_stripe; | 661 | u64 p_stripe, r_stripe; |
| 664 | u32 stripe_unit; | 662 | u32 stripe_unit; |
| 665 | 663 | ||
| 664 | if (!pnfs_generic_pg_test(pgio, prev, req)) | ||
| 665 | return 0; | ||
| 666 | |||
| 666 | if (!pgio->pg_lseg) | 667 | if (!pgio->pg_lseg) |
| 667 | return 1; | 668 | return 1; |
| 668 | p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT; | 669 | p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT; |
| @@ -860,6 +861,12 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, | |||
| 860 | return -ENOMEM; | 861 | return -ENOMEM; |
| 861 | } | 862 | } |
| 862 | 863 | ||
| 864 | static void | ||
| 865 | filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d) | ||
| 866 | { | ||
| 867 | nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node)); | ||
| 868 | } | ||
| 869 | |||
| 863 | static struct pnfs_layoutdriver_type filelayout_type = { | 870 | static struct pnfs_layoutdriver_type filelayout_type = { |
| 864 | .id = LAYOUT_NFSV4_1_FILES, | 871 | .id = LAYOUT_NFSV4_1_FILES, |
| 865 | .name = "LAYOUT_NFSV4_1_FILES", | 872 | .name = "LAYOUT_NFSV4_1_FILES", |
| @@ -872,6 +879,7 @@ static struct pnfs_layoutdriver_type filelayout_type = { | |||
| 872 | .commit_pagelist = filelayout_commit_pagelist, | 879 | .commit_pagelist = filelayout_commit_pagelist, |
| 873 | .read_pagelist = filelayout_read_pagelist, | 880 | .read_pagelist = filelayout_read_pagelist, |
| 874 | .write_pagelist = filelayout_write_pagelist, | 881 | .write_pagelist = filelayout_write_pagelist, |
| 882 | .free_deviceid_node = filelayout_free_deveiceid_node, | ||
| 875 | }; | 883 | }; |
| 876 | 884 | ||
| 877 | static int __init nfs4filelayout_init(void) | 885 | static int __init nfs4filelayout_init(void) |
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 2b461d77b43a..cebe01e3795e 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h | |||
| @@ -59,9 +59,7 @@ struct nfs4_pnfs_ds { | |||
| 59 | #define NFS4_DEVICE_ID_NEG_ENTRY 0x00000001 | 59 | #define NFS4_DEVICE_ID_NEG_ENTRY 0x00000001 |
| 60 | 60 | ||
| 61 | struct nfs4_file_layout_dsaddr { | 61 | struct nfs4_file_layout_dsaddr { |
| 62 | struct hlist_node node; | 62 | struct nfs4_deviceid_node id_node; |
| 63 | struct nfs4_deviceid deviceid; | ||
| 64 | atomic_t ref; | ||
| 65 | unsigned long flags; | 63 | unsigned long flags; |
| 66 | u32 stripe_count; | 64 | u32 stripe_count; |
| 67 | u8 *stripe_indices; | 65 | u8 *stripe_indices; |
| @@ -95,14 +93,12 @@ extern struct nfs_fh * | |||
| 95 | nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); | 93 | nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); |
| 96 | 94 | ||
| 97 | extern void print_ds(struct nfs4_pnfs_ds *ds); | 95 | extern void print_ds(struct nfs4_pnfs_ds *ds); |
| 98 | extern void print_deviceid(struct nfs4_deviceid *dev_id); | ||
| 99 | u32 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset); | 96 | u32 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset); |
| 100 | u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j); | 97 | u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j); |
| 101 | struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, | 98 | struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, |
| 102 | u32 ds_idx); | 99 | u32 ds_idx); |
| 103 | extern struct nfs4_file_layout_dsaddr * | ||
| 104 | nfs4_fl_find_get_deviceid(struct nfs4_deviceid *dev_id); | ||
| 105 | extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); | 100 | extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); |
| 101 | extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); | ||
| 106 | struct nfs4_file_layout_dsaddr * | 102 | struct nfs4_file_layout_dsaddr * |
| 107 | get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags); | 103 | get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags); |
| 108 | 104 | ||
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index db07c7af1395..3b7bf1377264 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c | |||
| @@ -37,30 +37,6 @@ | |||
| 37 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | 37 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD |
| 38 | 38 | ||
| 39 | /* | 39 | /* |
| 40 | * Device ID RCU cache. A device ID is unique per client ID and layout type. | ||
| 41 | */ | ||
| 42 | #define NFS4_FL_DEVICE_ID_HASH_BITS 5 | ||
| 43 | #define NFS4_FL_DEVICE_ID_HASH_SIZE (1 << NFS4_FL_DEVICE_ID_HASH_BITS) | ||
| 44 | #define NFS4_FL_DEVICE_ID_HASH_MASK (NFS4_FL_DEVICE_ID_HASH_SIZE - 1) | ||
| 45 | |||
| 46 | static inline u32 | ||
| 47 | nfs4_fl_deviceid_hash(struct nfs4_deviceid *id) | ||
| 48 | { | ||
| 49 | unsigned char *cptr = (unsigned char *)id->data; | ||
| 50 | unsigned int nbytes = NFS4_DEVICEID4_SIZE; | ||
| 51 | u32 x = 0; | ||
| 52 | |||
| 53 | while (nbytes--) { | ||
| 54 | x *= 37; | ||
| 55 | x += *cptr++; | ||
| 56 | } | ||
| 57 | return x & NFS4_FL_DEVICE_ID_HASH_MASK; | ||
| 58 | } | ||
| 59 | |||
| 60 | static struct hlist_head filelayout_deviceid_cache[NFS4_FL_DEVICE_ID_HASH_SIZE]; | ||
| 61 | static DEFINE_SPINLOCK(filelayout_deviceid_lock); | ||
| 62 | |||
| 63 | /* | ||
| 64 | * Data server cache | 40 | * Data server cache |
| 65 | * | 41 | * |
| 66 | * Data servers can be mapped to different device ids. | 42 | * Data servers can be mapped to different device ids. |
| @@ -89,27 +65,6 @@ print_ds(struct nfs4_pnfs_ds *ds) | |||
| 89 | ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); | 65 | ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); |
| 90 | } | 66 | } |
| 91 | 67 | ||
| 92 | void | ||
| 93 | print_ds_list(struct nfs4_file_layout_dsaddr *dsaddr) | ||
| 94 | { | ||
| 95 | int i; | ||
| 96 | |||
| 97 | ifdebug(FACILITY) { | ||
| 98 | printk("%s dsaddr->ds_num %d\n", __func__, | ||
| 99 | dsaddr->ds_num); | ||
| 100 | for (i = 0; i < dsaddr->ds_num; i++) | ||
| 101 | print_ds(dsaddr->ds_list[i]); | ||
| 102 | } | ||
| 103 | } | ||
| 104 | |||
| 105 | void print_deviceid(struct nfs4_deviceid *id) | ||
| 106 | { | ||
| 107 | u32 *p = (u32 *)id; | ||
| 108 | |||
| 109 | dprintk("%s: device id= [%x%x%x%x]\n", __func__, | ||
| 110 | p[0], p[1], p[2], p[3]); | ||
| 111 | } | ||
| 112 | |||
| 113 | /* nfs4_ds_cache_lock is held */ | 68 | /* nfs4_ds_cache_lock is held */ |
| 114 | static struct nfs4_pnfs_ds * | 69 | static struct nfs4_pnfs_ds * |
| 115 | _data_server_lookup_locked(u32 ip_addr, u32 port) | 70 | _data_server_lookup_locked(u32 ip_addr, u32 port) |
| @@ -201,13 +156,13 @@ destroy_ds(struct nfs4_pnfs_ds *ds) | |||
| 201 | kfree(ds); | 156 | kfree(ds); |
| 202 | } | 157 | } |
| 203 | 158 | ||
| 204 | static void | 159 | void |
| 205 | nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) | 160 | nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) |
| 206 | { | 161 | { |
| 207 | struct nfs4_pnfs_ds *ds; | 162 | struct nfs4_pnfs_ds *ds; |
| 208 | int i; | 163 | int i; |
| 209 | 164 | ||
| 210 | print_deviceid(&dsaddr->deviceid); | 165 | nfs4_print_deviceid(&dsaddr->id_node.deviceid); |
| 211 | 166 | ||
| 212 | for (i = 0; i < dsaddr->ds_num; i++) { | 167 | for (i = 0; i < dsaddr->ds_num; i++) { |
| 213 | ds = dsaddr->ds_list[i]; | 168 | ds = dsaddr->ds_list[i]; |
| @@ -353,12 +308,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) | |||
| 353 | u8 max_stripe_index; | 308 | u8 max_stripe_index; |
| 354 | struct nfs4_file_layout_dsaddr *dsaddr = NULL; | 309 | struct nfs4_file_layout_dsaddr *dsaddr = NULL; |
| 355 | struct xdr_stream stream; | 310 | struct xdr_stream stream; |
| 356 | struct xdr_buf buf = { | 311 | struct xdr_buf buf; |
| 357 | .pages = pdev->pages, | ||
| 358 | .page_len = pdev->pglen, | ||
| 359 | .buflen = pdev->pglen, | ||
| 360 | .len = pdev->pglen, | ||
| 361 | }; | ||
| 362 | struct page *scratch; | 312 | struct page *scratch; |
| 363 | 313 | ||
| 364 | /* set up xdr stream */ | 314 | /* set up xdr stream */ |
| @@ -366,7 +316,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) | |||
| 366 | if (!scratch) | 316 | if (!scratch) |
| 367 | goto out_err; | 317 | goto out_err; |
| 368 | 318 | ||
| 369 | xdr_init_decode(&stream, &buf, NULL); | 319 | xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen); |
| 370 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); | 320 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); |
| 371 | 321 | ||
| 372 | /* Get the stripe count (number of stripe index) */ | 322 | /* Get the stripe count (number of stripe index) */ |
| @@ -431,8 +381,10 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) | |||
| 431 | dsaddr->stripe_indices = stripe_indices; | 381 | dsaddr->stripe_indices = stripe_indices; |
| 432 | stripe_indices = NULL; | 382 | stripe_indices = NULL; |
| 433 | dsaddr->ds_num = num; | 383 | dsaddr->ds_num = num; |
| 434 | 384 | nfs4_init_deviceid_node(&dsaddr->id_node, | |
| 435 | memcpy(&dsaddr->deviceid, &pdev->dev_id, sizeof(pdev->dev_id)); | 385 | NFS_SERVER(ino)->pnfs_curr_ld, |
| 386 | NFS_SERVER(ino)->nfs_client, | ||
| 387 | &pdev->dev_id); | ||
| 436 | 388 | ||
| 437 | for (i = 0; i < dsaddr->ds_num; i++) { | 389 | for (i = 0; i < dsaddr->ds_num; i++) { |
| 438 | int j; | 390 | int j; |
| @@ -505,8 +457,8 @@ out_err: | |||
| 505 | static struct nfs4_file_layout_dsaddr * | 457 | static struct nfs4_file_layout_dsaddr * |
| 506 | decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_flags) | 458 | decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_flags) |
| 507 | { | 459 | { |
| 508 | struct nfs4_file_layout_dsaddr *d, *new; | 460 | struct nfs4_deviceid_node *d; |
| 509 | long hash; | 461 | struct nfs4_file_layout_dsaddr *n, *new; |
| 510 | 462 | ||
| 511 | new = decode_device(inode, dev, gfp_flags); | 463 | new = decode_device(inode, dev, gfp_flags); |
| 512 | if (!new) { | 464 | if (!new) { |
| @@ -515,20 +467,13 @@ decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_fl | |||
| 515 | return NULL; | 467 | return NULL; |
| 516 | } | 468 | } |
| 517 | 469 | ||
| 518 | spin_lock(&filelayout_deviceid_lock); | 470 | d = nfs4_insert_deviceid_node(&new->id_node); |
| 519 | d = nfs4_fl_find_get_deviceid(&new->deviceid); | 471 | n = container_of(d, struct nfs4_file_layout_dsaddr, id_node); |
| 520 | if (d) { | 472 | if (n != new) { |
| 521 | spin_unlock(&filelayout_deviceid_lock); | ||
| 522 | nfs4_fl_free_deviceid(new); | 473 | nfs4_fl_free_deviceid(new); |
| 523 | return d; | 474 | return n; |
| 524 | } | 475 | } |
| 525 | 476 | ||
| 526 | INIT_HLIST_NODE(&new->node); | ||
| 527 | atomic_set(&new->ref, 1); | ||
| 528 | hash = nfs4_fl_deviceid_hash(&new->deviceid); | ||
| 529 | hlist_add_head_rcu(&new->node, &filelayout_deviceid_cache[hash]); | ||
| 530 | spin_unlock(&filelayout_deviceid_lock); | ||
| 531 | |||
| 532 | return new; | 477 | return new; |
| 533 | } | 478 | } |
| 534 | 479 | ||
| @@ -600,35 +545,7 @@ out_free: | |||
| 600 | void | 545 | void |
| 601 | nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) | 546 | nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) |
| 602 | { | 547 | { |
| 603 | if (atomic_dec_and_lock(&dsaddr->ref, &filelayout_deviceid_lock)) { | 548 | nfs4_put_deviceid_node(&dsaddr->id_node); |
| 604 | hlist_del_rcu(&dsaddr->node); | ||
| 605 | spin_unlock(&filelayout_deviceid_lock); | ||
| 606 | |||
| 607 | synchronize_rcu(); | ||
| 608 | nfs4_fl_free_deviceid(dsaddr); | ||
| 609 | } | ||
| 610 | } | ||
| 611 | |||
| 612 | struct nfs4_file_layout_dsaddr * | ||
| 613 | nfs4_fl_find_get_deviceid(struct nfs4_deviceid *id) | ||
| 614 | { | ||
| 615 | struct nfs4_file_layout_dsaddr *d; | ||
| 616 | struct hlist_node *n; | ||
| 617 | long hash = nfs4_fl_deviceid_hash(id); | ||
| 618 | |||
| 619 | |||
| 620 | rcu_read_lock(); | ||
| 621 | hlist_for_each_entry_rcu(d, n, &filelayout_deviceid_cache[hash], node) { | ||
| 622 | if (!memcmp(&d->deviceid, id, sizeof(*id))) { | ||
| 623 | if (!atomic_inc_not_zero(&d->ref)) | ||
| 624 | goto fail; | ||
| 625 | rcu_read_unlock(); | ||
| 626 | return d; | ||
| 627 | } | ||
| 628 | } | ||
| 629 | fail: | ||
| 630 | rcu_read_unlock(); | ||
| 631 | return NULL; | ||
| 632 | } | 549 | } |
| 633 | 550 | ||
| 634 | /* | 551 | /* |
| @@ -676,15 +593,15 @@ static void | |||
| 676 | filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr, | 593 | filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr, |
| 677 | int err, u32 ds_addr) | 594 | int err, u32 ds_addr) |
| 678 | { | 595 | { |
| 679 | u32 *p = (u32 *)&dsaddr->deviceid; | 596 | u32 *p = (u32 *)&dsaddr->id_node.deviceid; |
| 680 | 597 | ||
| 681 | printk(KERN_ERR "NFS: data server %x connection error %d." | 598 | printk(KERN_ERR "NFS: data server %x connection error %d." |
| 682 | " Deviceid [%x%x%x%x] marked out of use.\n", | 599 | " Deviceid [%x%x%x%x] marked out of use.\n", |
| 683 | ds_addr, err, p[0], p[1], p[2], p[3]); | 600 | ds_addr, err, p[0], p[1], p[2], p[3]); |
| 684 | 601 | ||
| 685 | spin_lock(&filelayout_deviceid_lock); | 602 | spin_lock(&nfs4_ds_cache_lock); |
| 686 | dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY; | 603 | dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY; |
| 687 | spin_unlock(&filelayout_deviceid_lock); | 604 | spin_unlock(&nfs4_ds_cache_lock); |
| 688 | } | 605 | } |
| 689 | 606 | ||
| 690 | struct nfs4_pnfs_ds * | 607 | struct nfs4_pnfs_ds * |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d0e15dba7a5a..d2c4b59c896d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
| @@ -2363,6 +2363,9 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, | |||
| 2363 | struct nfs4_state *state = NULL; | 2363 | struct nfs4_state *state = NULL; |
| 2364 | int status; | 2364 | int status; |
| 2365 | 2365 | ||
| 2366 | if (pnfs_ld_layoutret_on_setattr(inode)) | ||
| 2367 | pnfs_return_layout(inode); | ||
| 2368 | |||
| 2366 | nfs_fattr_init(fattr); | 2369 | nfs_fattr_init(fattr); |
| 2367 | 2370 | ||
| 2368 | /* Search for an existing open(O_WRITE) file */ | 2371 | /* Search for an existing open(O_WRITE) file */ |
| @@ -3177,6 +3180,11 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, | |||
| 3177 | return err; | 3180 | return err; |
| 3178 | } | 3181 | } |
| 3179 | 3182 | ||
| 3183 | void __nfs4_read_done_cb(struct nfs_read_data *data) | ||
| 3184 | { | ||
| 3185 | nfs_invalidate_atime(data->inode); | ||
| 3186 | } | ||
| 3187 | |||
| 3180 | static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) | 3188 | static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) |
| 3181 | { | 3189 | { |
| 3182 | struct nfs_server *server = NFS_SERVER(data->inode); | 3190 | struct nfs_server *server = NFS_SERVER(data->inode); |
| @@ -3186,7 +3194,7 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) | |||
| 3186 | return -EAGAIN; | 3194 | return -EAGAIN; |
| 3187 | } | 3195 | } |
| 3188 | 3196 | ||
| 3189 | nfs_invalidate_atime(data->inode); | 3197 | __nfs4_read_done_cb(data); |
| 3190 | if (task->tk_status > 0) | 3198 | if (task->tk_status > 0) |
| 3191 | renew_lease(server, data->timestamp); | 3199 | renew_lease(server, data->timestamp); |
| 3192 | return 0; | 3200 | return 0; |
| @@ -3200,7 +3208,8 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) | |||
| 3200 | if (!nfs4_sequence_done(task, &data->res.seq_res)) | 3208 | if (!nfs4_sequence_done(task, &data->res.seq_res)) |
| 3201 | return -EAGAIN; | 3209 | return -EAGAIN; |
| 3202 | 3210 | ||
| 3203 | return data->read_done_cb(task, data); | 3211 | return data->read_done_cb ? data->read_done_cb(task, data) : |
| 3212 | nfs4_read_done_cb(task, data); | ||
| 3204 | } | 3213 | } |
| 3205 | 3214 | ||
| 3206 | static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) | 3215 | static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) |
| @@ -3245,7 +3254,8 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) | |||
| 3245 | { | 3254 | { |
| 3246 | if (!nfs4_sequence_done(task, &data->res.seq_res)) | 3255 | if (!nfs4_sequence_done(task, &data->res.seq_res)) |
| 3247 | return -EAGAIN; | 3256 | return -EAGAIN; |
| 3248 | return data->write_done_cb(task, data); | 3257 | return data->write_done_cb ? data->write_done_cb(task, data) : |
| 3258 | nfs4_write_done_cb(task, data); | ||
| 3249 | } | 3259 | } |
| 3250 | 3260 | ||
| 3251 | /* Reset the the nfs_write_data to send the write to the MDS. */ | 3261 | /* Reset the the nfs_write_data to send the write to the MDS. */ |
| @@ -5671,6 +5681,88 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) | |||
| 5671 | return status; | 5681 | return status; |
| 5672 | } | 5682 | } |
| 5673 | 5683 | ||
| 5684 | static void | ||
| 5685 | nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata) | ||
| 5686 | { | ||
| 5687 | struct nfs4_layoutreturn *lrp = calldata; | ||
| 5688 | |||
| 5689 | dprintk("--> %s\n", __func__); | ||
| 5690 | if (nfs41_setup_sequence(lrp->clp->cl_session, &lrp->args.seq_args, | ||
| 5691 | &lrp->res.seq_res, 0, task)) | ||
| 5692 | return; | ||
| 5693 | rpc_call_start(task); | ||
| 5694 | } | ||
| 5695 | |||
| 5696 | static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) | ||
| 5697 | { | ||
| 5698 | struct nfs4_layoutreturn *lrp = calldata; | ||
| 5699 | struct nfs_server *server; | ||
| 5700 | |||
| 5701 | dprintk("--> %s\n", __func__); | ||
| 5702 | |||
| 5703 | if (!nfs4_sequence_done(task, &lrp->res.seq_res)) | ||
| 5704 | return; | ||
| 5705 | |||
| 5706 | server = NFS_SERVER(lrp->args.inode); | ||
| 5707 | if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { | ||
| 5708 | nfs_restart_rpc(task, lrp->clp); | ||
| 5709 | return; | ||
| 5710 | } | ||
| 5711 | if (task->tk_status == 0) { | ||
| 5712 | struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout; | ||
| 5713 | |||
| 5714 | if (lrp->res.lrs_present) { | ||
| 5715 | spin_lock(&lo->plh_inode->i_lock); | ||
| 5716 | pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); | ||
| 5717 | spin_unlock(&lo->plh_inode->i_lock); | ||
| 5718 | } else | ||
| 5719 | BUG_ON(!list_empty(&lo->plh_segs)); | ||
| 5720 | } | ||
| 5721 | dprintk("<-- %s\n", __func__); | ||
| 5722 | } | ||
| 5723 | |||
| 5724 | static void nfs4_layoutreturn_release(void *calldata) | ||
| 5725 | { | ||
| 5726 | struct nfs4_layoutreturn *lrp = calldata; | ||
| 5727 | |||
| 5728 | dprintk("--> %s\n", __func__); | ||
| 5729 | put_layout_hdr(NFS_I(lrp->args.inode)->layout); | ||
| 5730 | kfree(calldata); | ||
| 5731 | dprintk("<-- %s\n", __func__); | ||
| 5732 | } | ||
| 5733 | |||
| 5734 | static const struct rpc_call_ops nfs4_layoutreturn_call_ops = { | ||
| 5735 | .rpc_call_prepare = nfs4_layoutreturn_prepare, | ||
| 5736 | .rpc_call_done = nfs4_layoutreturn_done, | ||
| 5737 | .rpc_release = nfs4_layoutreturn_release, | ||
| 5738 | }; | ||
| 5739 | |||
| 5740 | int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp) | ||
| 5741 | { | ||
| 5742 | struct rpc_task *task; | ||
| 5743 | struct rpc_message msg = { | ||
| 5744 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTRETURN], | ||
| 5745 | .rpc_argp = &lrp->args, | ||
| 5746 | .rpc_resp = &lrp->res, | ||
| 5747 | }; | ||
| 5748 | struct rpc_task_setup task_setup_data = { | ||
| 5749 | .rpc_client = lrp->clp->cl_rpcclient, | ||
| 5750 | .rpc_message = &msg, | ||
| 5751 | .callback_ops = &nfs4_layoutreturn_call_ops, | ||
| 5752 | .callback_data = lrp, | ||
| 5753 | }; | ||
| 5754 | int status; | ||
| 5755 | |||
| 5756 | dprintk("--> %s\n", __func__); | ||
| 5757 | task = rpc_run_task(&task_setup_data); | ||
| 5758 | if (IS_ERR(task)) | ||
| 5759 | return PTR_ERR(task); | ||
| 5760 | status = task->tk_status; | ||
| 5761 | dprintk("<-- %s status=%d\n", __func__, status); | ||
| 5762 | rpc_put_task(task); | ||
| 5763 | return status; | ||
| 5764 | } | ||
| 5765 | |||
| 5674 | static int | 5766 | static int |
| 5675 | _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) | 5767 | _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) |
| 5676 | { | 5768 | { |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index c3ccd2c46834..d869a5e5464b 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
| @@ -338,7 +338,11 @@ static int nfs4_stat_to_errno(int); | |||
| 338 | 1 /* layoutupdate4 layout type */ + \ | 338 | 1 /* layoutupdate4 layout type */ + \ |
| 339 | 1 /* NULL filelayout layoutupdate4 payload */) | 339 | 1 /* NULL filelayout layoutupdate4 payload */) |
| 340 | #define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3) | 340 | #define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3) |
| 341 | 341 | #define encode_layoutreturn_maxsz (8 + op_encode_hdr_maxsz + \ | |
| 342 | encode_stateid_maxsz + \ | ||
| 343 | 1 /* FIXME: opaque lrf_body always empty at the moment */) | ||
| 344 | #define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \ | ||
| 345 | 1 + decode_stateid_maxsz) | ||
| 342 | #else /* CONFIG_NFS_V4_1 */ | 346 | #else /* CONFIG_NFS_V4_1 */ |
| 343 | #define encode_sequence_maxsz 0 | 347 | #define encode_sequence_maxsz 0 |
| 344 | #define decode_sequence_maxsz 0 | 348 | #define decode_sequence_maxsz 0 |
| @@ -760,7 +764,14 @@ static int nfs4_stat_to_errno(int); | |||
| 760 | decode_putfh_maxsz + \ | 764 | decode_putfh_maxsz + \ |
| 761 | decode_layoutcommit_maxsz + \ | 765 | decode_layoutcommit_maxsz + \ |
| 762 | decode_getattr_maxsz) | 766 | decode_getattr_maxsz) |
| 763 | 767 | #define NFS4_enc_layoutreturn_sz (compound_encode_hdr_maxsz + \ | |
| 768 | encode_sequence_maxsz + \ | ||
| 769 | encode_putfh_maxsz + \ | ||
| 770 | encode_layoutreturn_maxsz) | ||
| 771 | #define NFS4_dec_layoutreturn_sz (compound_decode_hdr_maxsz + \ | ||
| 772 | decode_sequence_maxsz + \ | ||
| 773 | decode_putfh_maxsz + \ | ||
| 774 | decode_layoutreturn_maxsz) | ||
| 764 | 775 | ||
| 765 | const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + | 776 | const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + |
| 766 | compound_encode_hdr_maxsz + | 777 | compound_encode_hdr_maxsz + |
| @@ -1864,6 +1875,7 @@ encode_layoutget(struct xdr_stream *xdr, | |||
| 1864 | 1875 | ||
| 1865 | static int | 1876 | static int |
| 1866 | encode_layoutcommit(struct xdr_stream *xdr, | 1877 | encode_layoutcommit(struct xdr_stream *xdr, |
| 1878 | struct inode *inode, | ||
| 1867 | const struct nfs4_layoutcommit_args *args, | 1879 | const struct nfs4_layoutcommit_args *args, |
| 1868 | struct compound_hdr *hdr) | 1880 | struct compound_hdr *hdr) |
| 1869 | { | 1881 | { |
| @@ -1872,7 +1884,7 @@ encode_layoutcommit(struct xdr_stream *xdr, | |||
| 1872 | dprintk("%s: lbw: %llu type: %d\n", __func__, args->lastbytewritten, | 1884 | dprintk("%s: lbw: %llu type: %d\n", __func__, args->lastbytewritten, |
| 1873 | NFS_SERVER(args->inode)->pnfs_curr_ld->id); | 1885 | NFS_SERVER(args->inode)->pnfs_curr_ld->id); |
| 1874 | 1886 | ||
| 1875 | p = reserve_space(xdr, 48 + NFS4_STATEID_SIZE); | 1887 | p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE); |
| 1876 | *p++ = cpu_to_be32(OP_LAYOUTCOMMIT); | 1888 | *p++ = cpu_to_be32(OP_LAYOUTCOMMIT); |
| 1877 | /* Only whole file layouts */ | 1889 | /* Only whole file layouts */ |
| 1878 | p = xdr_encode_hyper(p, 0); /* offset */ | 1890 | p = xdr_encode_hyper(p, 0); /* offset */ |
| @@ -1883,12 +1895,49 @@ encode_layoutcommit(struct xdr_stream *xdr, | |||
| 1883 | p = xdr_encode_hyper(p, args->lastbytewritten); | 1895 | p = xdr_encode_hyper(p, args->lastbytewritten); |
| 1884 | *p++ = cpu_to_be32(0); /* Never send time_modify_changed */ | 1896 | *p++ = cpu_to_be32(0); /* Never send time_modify_changed */ |
| 1885 | *p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */ | 1897 | *p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */ |
| 1886 | *p++ = cpu_to_be32(0); /* no file layout payload */ | 1898 | |
| 1899 | if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit) | ||
| 1900 | NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit( | ||
| 1901 | NFS_I(inode)->layout, xdr, args); | ||
| 1902 | else { | ||
| 1903 | p = reserve_space(xdr, 4); | ||
| 1904 | *p = cpu_to_be32(0); /* no layout-type payload */ | ||
| 1905 | } | ||
| 1887 | 1906 | ||
| 1888 | hdr->nops++; | 1907 | hdr->nops++; |
| 1889 | hdr->replen += decode_layoutcommit_maxsz; | 1908 | hdr->replen += decode_layoutcommit_maxsz; |
| 1890 | return 0; | 1909 | return 0; |
| 1891 | } | 1910 | } |
| 1911 | |||
| 1912 | static void | ||
| 1913 | encode_layoutreturn(struct xdr_stream *xdr, | ||
| 1914 | const struct nfs4_layoutreturn_args *args, | ||
| 1915 | struct compound_hdr *hdr) | ||
| 1916 | { | ||
| 1917 | __be32 *p; | ||
| 1918 | |||
| 1919 | p = reserve_space(xdr, 20); | ||
| 1920 | *p++ = cpu_to_be32(OP_LAYOUTRETURN); | ||
| 1921 | *p++ = cpu_to_be32(0); /* reclaim. always 0 for now */ | ||
| 1922 | *p++ = cpu_to_be32(args->layout_type); | ||
| 1923 | *p++ = cpu_to_be32(IOMODE_ANY); | ||
| 1924 | *p = cpu_to_be32(RETURN_FILE); | ||
| 1925 | p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE); | ||
| 1926 | p = xdr_encode_hyper(p, 0); | ||
| 1927 | p = xdr_encode_hyper(p, NFS4_MAX_UINT64); | ||
| 1928 | spin_lock(&args->inode->i_lock); | ||
| 1929 | xdr_encode_opaque_fixed(p, &args->stateid.data, NFS4_STATEID_SIZE); | ||
| 1930 | spin_unlock(&args->inode->i_lock); | ||
| 1931 | if (NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn) { | ||
| 1932 | NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn( | ||
| 1933 | NFS_I(args->inode)->layout, xdr, args); | ||
| 1934 | } else { | ||
| 1935 | p = reserve_space(xdr, 4); | ||
| 1936 | *p = cpu_to_be32(0); | ||
| 1937 | } | ||
| 1938 | hdr->nops++; | ||
| 1939 | hdr->replen += decode_layoutreturn_maxsz; | ||
| 1940 | } | ||
| 1892 | #endif /* CONFIG_NFS_V4_1 */ | 1941 | #endif /* CONFIG_NFS_V4_1 */ |
| 1893 | 1942 | ||
| 1894 | /* | 1943 | /* |
| @@ -2706,10 +2755,12 @@ static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req, | |||
| 2706 | /* | 2755 | /* |
| 2707 | * Encode LAYOUTCOMMIT request | 2756 | * Encode LAYOUTCOMMIT request |
| 2708 | */ | 2757 | */ |
| 2709 | static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, | 2758 | static void nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, |
| 2710 | struct xdr_stream *xdr, | 2759 | struct xdr_stream *xdr, |
| 2711 | struct nfs4_layoutcommit_args *args) | 2760 | struct nfs4_layoutcommit_args *args) |
| 2712 | { | 2761 | { |
| 2762 | struct nfs4_layoutcommit_data *data = | ||
| 2763 | container_of(args, struct nfs4_layoutcommit_data, args); | ||
| 2713 | struct compound_hdr hdr = { | 2764 | struct compound_hdr hdr = { |
| 2714 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), | 2765 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), |
| 2715 | }; | 2766 | }; |
| @@ -2717,10 +2768,27 @@ static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, | |||
| 2717 | encode_compound_hdr(xdr, req, &hdr); | 2768 | encode_compound_hdr(xdr, req, &hdr); |
| 2718 | encode_sequence(xdr, &args->seq_args, &hdr); | 2769 | encode_sequence(xdr, &args->seq_args, &hdr); |
| 2719 | encode_putfh(xdr, NFS_FH(args->inode), &hdr); | 2770 | encode_putfh(xdr, NFS_FH(args->inode), &hdr); |
| 2720 | encode_layoutcommit(xdr, args, &hdr); | 2771 | encode_layoutcommit(xdr, data->args.inode, args, &hdr); |
| 2721 | encode_getfattr(xdr, args->bitmask, &hdr); | 2772 | encode_getfattr(xdr, args->bitmask, &hdr); |
| 2722 | encode_nops(&hdr); | 2773 | encode_nops(&hdr); |
| 2723 | return 0; | 2774 | } |
| 2775 | |||
| 2776 | /* | ||
| 2777 | * Encode LAYOUTRETURN request | ||
| 2778 | */ | ||
| 2779 | static void nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req, | ||
| 2780 | struct xdr_stream *xdr, | ||
| 2781 | struct nfs4_layoutreturn_args *args) | ||
| 2782 | { | ||
| 2783 | struct compound_hdr hdr = { | ||
| 2784 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), | ||
| 2785 | }; | ||
| 2786 | |||
| 2787 | encode_compound_hdr(xdr, req, &hdr); | ||
| 2788 | encode_sequence(xdr, &args->seq_args, &hdr); | ||
| 2789 | encode_putfh(xdr, NFS_FH(args->inode), &hdr); | ||
| 2790 | encode_layoutreturn(xdr, args, &hdr); | ||
| 2791 | encode_nops(&hdr); | ||
| 2724 | } | 2792 | } |
| 2725 | #endif /* CONFIG_NFS_V4_1 */ | 2793 | #endif /* CONFIG_NFS_V4_1 */ |
| 2726 | 2794 | ||
| @@ -5203,6 +5271,27 @@ out_overflow: | |||
| 5203 | return -EIO; | 5271 | return -EIO; |
| 5204 | } | 5272 | } |
| 5205 | 5273 | ||
| 5274 | static int decode_layoutreturn(struct xdr_stream *xdr, | ||
| 5275 | struct nfs4_layoutreturn_res *res) | ||
| 5276 | { | ||
| 5277 | __be32 *p; | ||
| 5278 | int status; | ||
| 5279 | |||
| 5280 | status = decode_op_hdr(xdr, OP_LAYOUTRETURN); | ||
| 5281 | if (status) | ||
| 5282 | return status; | ||
| 5283 | p = xdr_inline_decode(xdr, 4); | ||
| 5284 | if (unlikely(!p)) | ||
| 5285 | goto out_overflow; | ||
| 5286 | res->lrs_present = be32_to_cpup(p); | ||
| 5287 | if (res->lrs_present) | ||
| 5288 | status = decode_stateid(xdr, &res->stateid); | ||
| 5289 | return status; | ||
| 5290 | out_overflow: | ||
| 5291 | print_overflow_msg(__func__, xdr); | ||
| 5292 | return -EIO; | ||
| 5293 | } | ||
| 5294 | |||
| 5206 | static int decode_layoutcommit(struct xdr_stream *xdr, | 5295 | static int decode_layoutcommit(struct xdr_stream *xdr, |
| 5207 | struct rpc_rqst *req, | 5296 | struct rpc_rqst *req, |
| 5208 | struct nfs4_layoutcommit_res *res) | 5297 | struct nfs4_layoutcommit_res *res) |
| @@ -6320,6 +6409,30 @@ out: | |||
| 6320 | } | 6409 | } |
| 6321 | 6410 | ||
| 6322 | /* | 6411 | /* |
| 6412 | * Decode LAYOUTRETURN response | ||
| 6413 | */ | ||
| 6414 | static int nfs4_xdr_dec_layoutreturn(struct rpc_rqst *rqstp, | ||
| 6415 | struct xdr_stream *xdr, | ||
| 6416 | struct nfs4_layoutreturn_res *res) | ||
| 6417 | { | ||
| 6418 | struct compound_hdr hdr; | ||
| 6419 | int status; | ||
| 6420 | |||
| 6421 | status = decode_compound_hdr(xdr, &hdr); | ||
| 6422 | if (status) | ||
| 6423 | goto out; | ||
| 6424 | status = decode_sequence(xdr, &res->seq_res, rqstp); | ||
| 6425 | if (status) | ||
| 6426 | goto out; | ||
| 6427 | status = decode_putfh(xdr); | ||
| 6428 | if (status) | ||
| 6429 | goto out; | ||
| 6430 | status = decode_layoutreturn(xdr, res); | ||
| 6431 | out: | ||
| 6432 | return status; | ||
| 6433 | } | ||
| 6434 | |||
| 6435 | /* | ||
| 6323 | * Decode LAYOUTCOMMIT response | 6436 | * Decode LAYOUTCOMMIT response |
| 6324 | */ | 6437 | */ |
| 6325 | static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp, | 6438 | static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp, |
| @@ -6547,6 +6660,7 @@ struct rpc_procinfo nfs4_procedures[] = { | |||
| 6547 | PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), | 6660 | PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), |
| 6548 | PROC(LAYOUTGET, enc_layoutget, dec_layoutget), | 6661 | PROC(LAYOUTGET, enc_layoutget, dec_layoutget), |
| 6549 | PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit), | 6662 | PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit), |
| 6663 | PROC(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn), | ||
| 6550 | #endif /* CONFIG_NFS_V4_1 */ | 6664 | #endif /* CONFIG_NFS_V4_1 */ |
| 6551 | }; | 6665 | }; |
| 6552 | 6666 | ||
diff --git a/fs/nfs/objlayout/Kbuild b/fs/nfs/objlayout/Kbuild new file mode 100644 index 000000000000..ed30ea072bb8 --- /dev/null +++ b/fs/nfs/objlayout/Kbuild | |||
| @@ -0,0 +1,5 @@ | |||
| 1 | # | ||
| 2 | # Makefile for the pNFS Objects Layout Driver kernel module | ||
| 3 | # | ||
| 4 | objlayoutdriver-y := objio_osd.o pnfs_osd_xdr_cli.o objlayout.o | ||
| 5 | obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayoutdriver.o | ||
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c new file mode 100644 index 000000000000..9cf208df1f25 --- /dev/null +++ b/fs/nfs/objlayout/objio_osd.c | |||
| @@ -0,0 +1,1057 @@ | |||
| 1 | /* | ||
| 2 | * pNFS Objects layout implementation over open-osd initiator library | ||
| 3 | * | ||
| 4 | * Copyright (C) 2009 Panasas Inc. [year of first publication] | ||
| 5 | * All rights reserved. | ||
| 6 | * | ||
| 7 | * Benny Halevy <bhalevy@panasas.com> | ||
| 8 | * Boaz Harrosh <bharrosh@panasas.com> | ||
| 9 | * | ||
| 10 | * This program is free software; you can redistribute it and/or modify | ||
| 11 | * it under the terms of the GNU General Public License version 2 | ||
| 12 | * See the file COPYING included with this distribution for more details. | ||
| 13 | * | ||
| 14 | * Redistribution and use in source and binary forms, with or without | ||
| 15 | * modification, are permitted provided that the following conditions | ||
| 16 | * are met: | ||
| 17 | * | ||
| 18 | * 1. Redistributions of source code must retain the above copyright | ||
| 19 | * notice, this list of conditions and the following disclaimer. | ||
| 20 | * 2. Redistributions in binary form must reproduce the above copyright | ||
| 21 | * notice, this list of conditions and the following disclaimer in the | ||
| 22 | * documentation and/or other materials provided with the distribution. | ||
| 23 | * 3. Neither the name of the Panasas company nor the names of its | ||
| 24 | * contributors may be used to endorse or promote products derived | ||
| 25 | * from this software without specific prior written permission. | ||
| 26 | * | ||
| 27 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED | ||
| 28 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | ||
| 29 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
| 30 | * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | ||
| 31 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
| 32 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
| 33 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR | ||
| 34 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
| 35 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
| 36 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
| 37 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 38 | */ | ||
| 39 | |||
| 40 | #include <linux/module.h> | ||
| 41 | #include <scsi/osd_initiator.h> | ||
| 42 | |||
| 43 | #include "objlayout.h" | ||
| 44 | |||
| 45 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
| 46 | |||
| 47 | #define _LLU(x) ((unsigned long long)x) | ||
| 48 | |||
| 49 | enum { BIO_MAX_PAGES_KMALLOC = | ||
| 50 | (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec), | ||
| 51 | }; | ||
| 52 | |||
| 53 | struct objio_dev_ent { | ||
| 54 | struct nfs4_deviceid_node id_node; | ||
| 55 | struct osd_dev *od; | ||
| 56 | }; | ||
| 57 | |||
| 58 | static void | ||
| 59 | objio_free_deviceid_node(struct nfs4_deviceid_node *d) | ||
| 60 | { | ||
| 61 | struct objio_dev_ent *de = container_of(d, struct objio_dev_ent, id_node); | ||
| 62 | |||
| 63 | dprintk("%s: free od=%p\n", __func__, de->od); | ||
| 64 | osduld_put_device(de->od); | ||
| 65 | kfree(de); | ||
| 66 | } | ||
| 67 | |||
| 68 | static struct objio_dev_ent *_dev_list_find(const struct nfs_server *nfss, | ||
| 69 | const struct nfs4_deviceid *d_id) | ||
| 70 | { | ||
| 71 | struct nfs4_deviceid_node *d; | ||
| 72 | struct objio_dev_ent *de; | ||
| 73 | |||
| 74 | d = nfs4_find_get_deviceid(nfss->pnfs_curr_ld, nfss->nfs_client, d_id); | ||
| 75 | if (!d) | ||
| 76 | return NULL; | ||
| 77 | |||
| 78 | de = container_of(d, struct objio_dev_ent, id_node); | ||
| 79 | return de; | ||
| 80 | } | ||
| 81 | |||
| 82 | static struct objio_dev_ent * | ||
| 83 | _dev_list_add(const struct nfs_server *nfss, | ||
| 84 | const struct nfs4_deviceid *d_id, struct osd_dev *od, | ||
| 85 | gfp_t gfp_flags) | ||
| 86 | { | ||
| 87 | struct nfs4_deviceid_node *d; | ||
| 88 | struct objio_dev_ent *de = kzalloc(sizeof(*de), gfp_flags); | ||
| 89 | struct objio_dev_ent *n; | ||
| 90 | |||
| 91 | if (!de) { | ||
| 92 | dprintk("%s: -ENOMEM od=%p\n", __func__, od); | ||
| 93 | return NULL; | ||
| 94 | } | ||
| 95 | |||
| 96 | dprintk("%s: Adding od=%p\n", __func__, od); | ||
| 97 | nfs4_init_deviceid_node(&de->id_node, | ||
| 98 | nfss->pnfs_curr_ld, | ||
| 99 | nfss->nfs_client, | ||
| 100 | d_id); | ||
| 101 | de->od = od; | ||
| 102 | |||
| 103 | d = nfs4_insert_deviceid_node(&de->id_node); | ||
| 104 | n = container_of(d, struct objio_dev_ent, id_node); | ||
| 105 | if (n != de) { | ||
| 106 | dprintk("%s: Race with other n->od=%p\n", __func__, n->od); | ||
| 107 | objio_free_deviceid_node(&de->id_node); | ||
| 108 | de = n; | ||
| 109 | } | ||
| 110 | |||
| 111 | atomic_inc(&de->id_node.ref); | ||
| 112 | return de; | ||
| 113 | } | ||
| 114 | |||
| 115 | struct caps_buffers { | ||
| 116 | u8 caps_key[OSD_CRYPTO_KEYID_SIZE]; | ||
| 117 | u8 creds[OSD_CAP_LEN]; | ||
| 118 | }; | ||
| 119 | |||
| 120 | struct objio_segment { | ||
| 121 | struct pnfs_layout_segment lseg; | ||
| 122 | |||
| 123 | struct pnfs_osd_object_cred *comps; | ||
| 124 | |||
| 125 | unsigned mirrors_p1; | ||
| 126 | unsigned stripe_unit; | ||
| 127 | unsigned group_width; /* Data stripe_units without integrity comps */ | ||
| 128 | u64 group_depth; | ||
| 129 | unsigned group_count; | ||
| 130 | |||
| 131 | unsigned max_io_size; | ||
| 132 | |||
| 133 | unsigned comps_index; | ||
| 134 | unsigned num_comps; | ||
| 135 | /* variable length */ | ||
| 136 | struct objio_dev_ent *ods[]; | ||
| 137 | }; | ||
| 138 | |||
| 139 | static inline struct objio_segment * | ||
| 140 | OBJIO_LSEG(struct pnfs_layout_segment *lseg) | ||
| 141 | { | ||
| 142 | return container_of(lseg, struct objio_segment, lseg); | ||
| 143 | } | ||
| 144 | |||
| 145 | struct objio_state; | ||
| 146 | typedef ssize_t (*objio_done_fn)(struct objio_state *ios); | ||
| 147 | |||
| 148 | struct objio_state { | ||
| 149 | /* Generic layer */ | ||
| 150 | struct objlayout_io_state ol_state; | ||
| 151 | |||
| 152 | struct objio_segment *layout; | ||
| 153 | |||
| 154 | struct kref kref; | ||
| 155 | objio_done_fn done; | ||
| 156 | void *private; | ||
| 157 | |||
| 158 | unsigned long length; | ||
| 159 | unsigned numdevs; /* Actually used devs in this IO */ | ||
| 160 | /* A per-device variable array of size numdevs */ | ||
| 161 | struct _objio_per_comp { | ||
| 162 | struct bio *bio; | ||
| 163 | struct osd_request *or; | ||
| 164 | unsigned long length; | ||
| 165 | u64 offset; | ||
| 166 | unsigned dev; | ||
| 167 | } per_dev[]; | ||
| 168 | }; | ||
| 169 | |||
| 170 | /* Send and wait for a get_device_info of devices in the layout, | ||
| 171 | then look them up with the osd_initiator library */ | ||
| 172 | static struct objio_dev_ent *_device_lookup(struct pnfs_layout_hdr *pnfslay, | ||
| 173 | struct objio_segment *objio_seg, unsigned comp, | ||
| 174 | gfp_t gfp_flags) | ||
| 175 | { | ||
| 176 | struct pnfs_osd_deviceaddr *deviceaddr; | ||
| 177 | struct nfs4_deviceid *d_id; | ||
| 178 | struct objio_dev_ent *ode; | ||
| 179 | struct osd_dev *od; | ||
| 180 | struct osd_dev_info odi; | ||
| 181 | int err; | ||
| 182 | |||
| 183 | d_id = &objio_seg->comps[comp].oc_object_id.oid_device_id; | ||
| 184 | |||
| 185 | ode = _dev_list_find(NFS_SERVER(pnfslay->plh_inode), d_id); | ||
| 186 | if (ode) | ||
| 187 | return ode; | ||
| 188 | |||
| 189 | err = objlayout_get_deviceinfo(pnfslay, d_id, &deviceaddr, gfp_flags); | ||
| 190 | if (unlikely(err)) { | ||
| 191 | dprintk("%s: objlayout_get_deviceinfo dev(%llx:%llx) =>%d\n", | ||
| 192 | __func__, _DEVID_LO(d_id), _DEVID_HI(d_id), err); | ||
| 193 | return ERR_PTR(err); | ||
| 194 | } | ||
| 195 | |||
| 196 | odi.systemid_len = deviceaddr->oda_systemid.len; | ||
| 197 | if (odi.systemid_len > sizeof(odi.systemid)) { | ||
| 198 | err = -EINVAL; | ||
| 199 | goto out; | ||
| 200 | } else if (odi.systemid_len) | ||
| 201 | memcpy(odi.systemid, deviceaddr->oda_systemid.data, | ||
| 202 | odi.systemid_len); | ||
| 203 | odi.osdname_len = deviceaddr->oda_osdname.len; | ||
| 204 | odi.osdname = (u8 *)deviceaddr->oda_osdname.data; | ||
| 205 | |||
| 206 | if (!odi.osdname_len && !odi.systemid_len) { | ||
| 207 | dprintk("%s: !odi.osdname_len && !odi.systemid_len\n", | ||
| 208 | __func__); | ||
| 209 | err = -ENODEV; | ||
| 210 | goto out; | ||
| 211 | } | ||
| 212 | |||
| 213 | od = osduld_info_lookup(&odi); | ||
| 214 | if (unlikely(IS_ERR(od))) { | ||
| 215 | err = PTR_ERR(od); | ||
| 216 | dprintk("%s: osduld_info_lookup => %d\n", __func__, err); | ||
| 217 | goto out; | ||
| 218 | } | ||
| 219 | |||
| 220 | ode = _dev_list_add(NFS_SERVER(pnfslay->plh_inode), d_id, od, | ||
| 221 | gfp_flags); | ||
| 222 | |||
| 223 | out: | ||
| 224 | dprintk("%s: return=%d\n", __func__, err); | ||
| 225 | objlayout_put_deviceinfo(deviceaddr); | ||
| 226 | return err ? ERR_PTR(err) : ode; | ||
| 227 | } | ||
| 228 | |||
| 229 | static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay, | ||
| 230 | struct objio_segment *objio_seg, | ||
| 231 | gfp_t gfp_flags) | ||
| 232 | { | ||
| 233 | unsigned i; | ||
| 234 | int err; | ||
| 235 | |||
| 236 | /* lookup all devices */ | ||
| 237 | for (i = 0; i < objio_seg->num_comps; i++) { | ||
| 238 | struct objio_dev_ent *ode; | ||
| 239 | |||
| 240 | ode = _device_lookup(pnfslay, objio_seg, i, gfp_flags); | ||
| 241 | if (unlikely(IS_ERR(ode))) { | ||
| 242 | err = PTR_ERR(ode); | ||
| 243 | goto out; | ||
| 244 | } | ||
| 245 | objio_seg->ods[i] = ode; | ||
| 246 | } | ||
| 247 | err = 0; | ||
| 248 | |||
| 249 | out: | ||
| 250 | dprintk("%s: return=%d\n", __func__, err); | ||
| 251 | return err; | ||
| 252 | } | ||
| 253 | |||
| 254 | static int _verify_data_map(struct pnfs_osd_layout *layout) | ||
| 255 | { | ||
| 256 | struct pnfs_osd_data_map *data_map = &layout->olo_map; | ||
| 257 | u64 stripe_length; | ||
| 258 | u32 group_width; | ||
| 259 | |||
| 260 | /* FIXME: Only raid0 for now. if not go through MDS */ | ||
| 261 | if (data_map->odm_raid_algorithm != PNFS_OSD_RAID_0) { | ||
| 262 | printk(KERN_ERR "Only RAID_0 for now\n"); | ||
| 263 | return -ENOTSUPP; | ||
| 264 | } | ||
| 265 | if (0 != (data_map->odm_num_comps % (data_map->odm_mirror_cnt + 1))) { | ||
| 266 | printk(KERN_ERR "Data Map wrong, num_comps=%u mirrors=%u\n", | ||
| 267 | data_map->odm_num_comps, data_map->odm_mirror_cnt); | ||
| 268 | return -EINVAL; | ||
| 269 | } | ||
| 270 | |||
| 271 | if (data_map->odm_group_width) | ||
| 272 | group_width = data_map->odm_group_width; | ||
| 273 | else | ||
| 274 | group_width = data_map->odm_num_comps / | ||
| 275 | (data_map->odm_mirror_cnt + 1); | ||
| 276 | |||
| 277 | stripe_length = (u64)data_map->odm_stripe_unit * group_width; | ||
| 278 | if (stripe_length >= (1ULL << 32)) { | ||
| 279 | printk(KERN_ERR "Total Stripe length(0x%llx)" | ||
| 280 | " >= 32bit is not supported\n", _LLU(stripe_length)); | ||
| 281 | return -ENOTSUPP; | ||
| 282 | } | ||
| 283 | |||
| 284 | if (0 != (data_map->odm_stripe_unit & ~PAGE_MASK)) { | ||
| 285 | printk(KERN_ERR "Stripe Unit(0x%llx)" | ||
| 286 | " must be Multples of PAGE_SIZE(0x%lx)\n", | ||
| 287 | _LLU(data_map->odm_stripe_unit), PAGE_SIZE); | ||
| 288 | return -ENOTSUPP; | ||
| 289 | } | ||
| 290 | |||
| 291 | return 0; | ||
| 292 | } | ||
| 293 | |||
| 294 | static void copy_single_comp(struct pnfs_osd_object_cred *cur_comp, | ||
| 295 | struct pnfs_osd_object_cred *src_comp, | ||
| 296 | struct caps_buffers *caps_p) | ||
| 297 | { | ||
| 298 | WARN_ON(src_comp->oc_cap_key.cred_len > sizeof(caps_p->caps_key)); | ||
| 299 | WARN_ON(src_comp->oc_cap.cred_len > sizeof(caps_p->creds)); | ||
| 300 | |||
| 301 | *cur_comp = *src_comp; | ||
| 302 | |||
| 303 | memcpy(caps_p->caps_key, src_comp->oc_cap_key.cred, | ||
| 304 | sizeof(caps_p->caps_key)); | ||
| 305 | cur_comp->oc_cap_key.cred = caps_p->caps_key; | ||
| 306 | |||
| 307 | memcpy(caps_p->creds, src_comp->oc_cap.cred, | ||
| 308 | sizeof(caps_p->creds)); | ||
| 309 | cur_comp->oc_cap.cred = caps_p->creds; | ||
| 310 | } | ||
| 311 | |||
| 312 | int objio_alloc_lseg(struct pnfs_layout_segment **outp, | ||
| 313 | struct pnfs_layout_hdr *pnfslay, | ||
| 314 | struct pnfs_layout_range *range, | ||
| 315 | struct xdr_stream *xdr, | ||
| 316 | gfp_t gfp_flags) | ||
| 317 | { | ||
| 318 | struct objio_segment *objio_seg; | ||
| 319 | struct pnfs_osd_xdr_decode_layout_iter iter; | ||
| 320 | struct pnfs_osd_layout layout; | ||
| 321 | struct pnfs_osd_object_cred *cur_comp, src_comp; | ||
| 322 | struct caps_buffers *caps_p; | ||
| 323 | int err; | ||
| 324 | |||
| 325 | err = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr); | ||
| 326 | if (unlikely(err)) | ||
| 327 | return err; | ||
| 328 | |||
| 329 | err = _verify_data_map(&layout); | ||
| 330 | if (unlikely(err)) | ||
| 331 | return err; | ||
| 332 | |||
| 333 | objio_seg = kzalloc(sizeof(*objio_seg) + | ||
| 334 | sizeof(objio_seg->ods[0]) * layout.olo_num_comps + | ||
| 335 | sizeof(*objio_seg->comps) * layout.olo_num_comps + | ||
| 336 | sizeof(struct caps_buffers) * layout.olo_num_comps, | ||
| 337 | gfp_flags); | ||
| 338 | if (!objio_seg) | ||
| 339 | return -ENOMEM; | ||
| 340 | |||
| 341 | objio_seg->comps = (void *)(objio_seg->ods + layout.olo_num_comps); | ||
| 342 | cur_comp = objio_seg->comps; | ||
| 343 | caps_p = (void *)(cur_comp + layout.olo_num_comps); | ||
| 344 | while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err)) | ||
| 345 | copy_single_comp(cur_comp++, &src_comp, caps_p++); | ||
| 346 | if (unlikely(err)) | ||
| 347 | goto err; | ||
| 348 | |||
| 349 | objio_seg->num_comps = layout.olo_num_comps; | ||
| 350 | objio_seg->comps_index = layout.olo_comps_index; | ||
| 351 | err = objio_devices_lookup(pnfslay, objio_seg, gfp_flags); | ||
| 352 | if (err) | ||
| 353 | goto err; | ||
| 354 | |||
| 355 | objio_seg->mirrors_p1 = layout.olo_map.odm_mirror_cnt + 1; | ||
| 356 | objio_seg->stripe_unit = layout.olo_map.odm_stripe_unit; | ||
| 357 | if (layout.olo_map.odm_group_width) { | ||
| 358 | objio_seg->group_width = layout.olo_map.odm_group_width; | ||
| 359 | objio_seg->group_depth = layout.olo_map.odm_group_depth; | ||
| 360 | objio_seg->group_count = layout.olo_map.odm_num_comps / | ||
| 361 | objio_seg->mirrors_p1 / | ||
| 362 | objio_seg->group_width; | ||
| 363 | } else { | ||
| 364 | objio_seg->group_width = layout.olo_map.odm_num_comps / | ||
| 365 | objio_seg->mirrors_p1; | ||
| 366 | objio_seg->group_depth = -1; | ||
| 367 | objio_seg->group_count = 1; | ||
| 368 | } | ||
| 369 | |||
| 370 | /* Cache this calculation it will hit for every page */ | ||
| 371 | objio_seg->max_io_size = (BIO_MAX_PAGES_KMALLOC * PAGE_SIZE - | ||
| 372 | objio_seg->stripe_unit) * | ||
| 373 | objio_seg->group_width; | ||
| 374 | |||
| 375 | *outp = &objio_seg->lseg; | ||
| 376 | return 0; | ||
| 377 | |||
| 378 | err: | ||
| 379 | kfree(objio_seg); | ||
| 380 | dprintk("%s: Error: return %d\n", __func__, err); | ||
| 381 | *outp = NULL; | ||
| 382 | return err; | ||
| 383 | } | ||
| 384 | |||
| 385 | void objio_free_lseg(struct pnfs_layout_segment *lseg) | ||
| 386 | { | ||
| 387 | int i; | ||
| 388 | struct objio_segment *objio_seg = OBJIO_LSEG(lseg); | ||
| 389 | |||
| 390 | for (i = 0; i < objio_seg->num_comps; i++) { | ||
| 391 | if (!objio_seg->ods[i]) | ||
| 392 | break; | ||
| 393 | nfs4_put_deviceid_node(&objio_seg->ods[i]->id_node); | ||
| 394 | } | ||
| 395 | kfree(objio_seg); | ||
| 396 | } | ||
| 397 | |||
| 398 | int objio_alloc_io_state(struct pnfs_layout_segment *lseg, | ||
| 399 | struct objlayout_io_state **outp, | ||
| 400 | gfp_t gfp_flags) | ||
| 401 | { | ||
| 402 | struct objio_segment *objio_seg = OBJIO_LSEG(lseg); | ||
| 403 | struct objio_state *ios; | ||
| 404 | const unsigned first_size = sizeof(*ios) + | ||
| 405 | objio_seg->num_comps * sizeof(ios->per_dev[0]); | ||
| 406 | const unsigned sec_size = objio_seg->num_comps * | ||
| 407 | sizeof(ios->ol_state.ioerrs[0]); | ||
| 408 | |||
| 409 | ios = kzalloc(first_size + sec_size, gfp_flags); | ||
| 410 | if (unlikely(!ios)) | ||
| 411 | return -ENOMEM; | ||
| 412 | |||
| 413 | ios->layout = objio_seg; | ||
| 414 | ios->ol_state.ioerrs = ((void *)ios) + first_size; | ||
| 415 | ios->ol_state.num_comps = objio_seg->num_comps; | ||
| 416 | |||
| 417 | *outp = &ios->ol_state; | ||
| 418 | return 0; | ||
| 419 | } | ||
| 420 | |||
| 421 | void objio_free_io_state(struct objlayout_io_state *ol_state) | ||
| 422 | { | ||
| 423 | struct objio_state *ios = container_of(ol_state, struct objio_state, | ||
| 424 | ol_state); | ||
| 425 | |||
| 426 | kfree(ios); | ||
| 427 | } | ||
| 428 | |||
| 429 | enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep) | ||
| 430 | { | ||
| 431 | switch (oep) { | ||
| 432 | case OSD_ERR_PRI_NO_ERROR: | ||
| 433 | return (enum pnfs_osd_errno)0; | ||
| 434 | |||
| 435 | case OSD_ERR_PRI_CLEAR_PAGES: | ||
| 436 | BUG_ON(1); | ||
| 437 | return 0; | ||
| 438 | |||
| 439 | case OSD_ERR_PRI_RESOURCE: | ||
| 440 | return PNFS_OSD_ERR_RESOURCE; | ||
| 441 | case OSD_ERR_PRI_BAD_CRED: | ||
| 442 | return PNFS_OSD_ERR_BAD_CRED; | ||
| 443 | case OSD_ERR_PRI_NO_ACCESS: | ||
| 444 | return PNFS_OSD_ERR_NO_ACCESS; | ||
| 445 | case OSD_ERR_PRI_UNREACHABLE: | ||
| 446 | return PNFS_OSD_ERR_UNREACHABLE; | ||
| 447 | case OSD_ERR_PRI_NOT_FOUND: | ||
| 448 | return PNFS_OSD_ERR_NOT_FOUND; | ||
| 449 | case OSD_ERR_PRI_NO_SPACE: | ||
| 450 | return PNFS_OSD_ERR_NO_SPACE; | ||
| 451 | default: | ||
| 452 | WARN_ON(1); | ||
| 453 | /* fallthrough */ | ||
| 454 | case OSD_ERR_PRI_EIO: | ||
| 455 | return PNFS_OSD_ERR_EIO; | ||
| 456 | } | ||
| 457 | } | ||
| 458 | |||
| 459 | static void _clear_bio(struct bio *bio) | ||
| 460 | { | ||
| 461 | struct bio_vec *bv; | ||
| 462 | unsigned i; | ||
| 463 | |||
| 464 | __bio_for_each_segment(bv, bio, i, 0) { | ||
| 465 | unsigned this_count = bv->bv_len; | ||
| 466 | |||
| 467 | if (likely(PAGE_SIZE == this_count)) | ||
| 468 | clear_highpage(bv->bv_page); | ||
| 469 | else | ||
| 470 | zero_user(bv->bv_page, bv->bv_offset, this_count); | ||
| 471 | } | ||
| 472 | } | ||
| 473 | |||
| 474 | static int _io_check(struct objio_state *ios, bool is_write) | ||
| 475 | { | ||
| 476 | enum osd_err_priority oep = OSD_ERR_PRI_NO_ERROR; | ||
| 477 | int lin_ret = 0; | ||
| 478 | int i; | ||
| 479 | |||
| 480 | for (i = 0; i < ios->numdevs; i++) { | ||
| 481 | struct osd_sense_info osi; | ||
| 482 | struct osd_request *or = ios->per_dev[i].or; | ||
| 483 | unsigned dev; | ||
| 484 | int ret; | ||
| 485 | |||
| 486 | if (!or) | ||
| 487 | continue; | ||
| 488 | |||
| 489 | ret = osd_req_decode_sense(or, &osi); | ||
| 490 | if (likely(!ret)) | ||
| 491 | continue; | ||
| 492 | |||
| 493 | if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) { | ||
| 494 | /* start read offset passed endof file */ | ||
| 495 | BUG_ON(is_write); | ||
| 496 | _clear_bio(ios->per_dev[i].bio); | ||
| 497 | dprintk("%s: start read offset passed end of file " | ||
| 498 | "offset=0x%llx, length=0x%lx\n", __func__, | ||
| 499 | _LLU(ios->per_dev[i].offset), | ||
| 500 | ios->per_dev[i].length); | ||
| 501 | |||
| 502 | continue; /* we recovered */ | ||
| 503 | } | ||
| 504 | dev = ios->per_dev[i].dev; | ||
| 505 | objlayout_io_set_result(&ios->ol_state, dev, | ||
| 506 | &ios->layout->comps[dev].oc_object_id, | ||
| 507 | osd_pri_2_pnfs_err(osi.osd_err_pri), | ||
| 508 | ios->per_dev[i].offset, | ||
| 509 | ios->per_dev[i].length, | ||
| 510 | is_write); | ||
| 511 | |||
| 512 | if (osi.osd_err_pri >= oep) { | ||
| 513 | oep = osi.osd_err_pri; | ||
| 514 | lin_ret = ret; | ||
| 515 | } | ||
| 516 | } | ||
| 517 | |||
| 518 | return lin_ret; | ||
| 519 | } | ||
| 520 | |||
| 521 | /* | ||
| 522 | * Common IO state helpers. | ||
| 523 | */ | ||
| 524 | static void _io_free(struct objio_state *ios) | ||
| 525 | { | ||
| 526 | unsigned i; | ||
| 527 | |||
| 528 | for (i = 0; i < ios->numdevs; i++) { | ||
| 529 | struct _objio_per_comp *per_dev = &ios->per_dev[i]; | ||
| 530 | |||
| 531 | if (per_dev->or) { | ||
| 532 | osd_end_request(per_dev->or); | ||
| 533 | per_dev->or = NULL; | ||
| 534 | } | ||
| 535 | |||
| 536 | if (per_dev->bio) { | ||
| 537 | bio_put(per_dev->bio); | ||
| 538 | per_dev->bio = NULL; | ||
| 539 | } | ||
| 540 | } | ||
| 541 | } | ||
| 542 | |||
| 543 | struct osd_dev *_io_od(struct objio_state *ios, unsigned dev) | ||
| 544 | { | ||
| 545 | unsigned min_dev = ios->layout->comps_index; | ||
| 546 | unsigned max_dev = min_dev + ios->layout->num_comps; | ||
| 547 | |||
| 548 | BUG_ON(dev < min_dev || max_dev <= dev); | ||
| 549 | return ios->layout->ods[dev - min_dev]->od; | ||
| 550 | } | ||
| 551 | |||
| 552 | struct _striping_info { | ||
| 553 | u64 obj_offset; | ||
| 554 | u64 group_length; | ||
| 555 | unsigned dev; | ||
| 556 | unsigned unit_off; | ||
| 557 | }; | ||
| 558 | |||
| 559 | static void _calc_stripe_info(struct objio_state *ios, u64 file_offset, | ||
| 560 | struct _striping_info *si) | ||
| 561 | { | ||
| 562 | u32 stripe_unit = ios->layout->stripe_unit; | ||
| 563 | u32 group_width = ios->layout->group_width; | ||
| 564 | u64 group_depth = ios->layout->group_depth; | ||
| 565 | u32 U = stripe_unit * group_width; | ||
| 566 | |||
| 567 | u64 T = U * group_depth; | ||
| 568 | u64 S = T * ios->layout->group_count; | ||
| 569 | u64 M = div64_u64(file_offset, S); | ||
| 570 | |||
| 571 | /* | ||
| 572 | G = (L - (M * S)) / T | ||
| 573 | H = (L - (M * S)) % T | ||
| 574 | */ | ||
| 575 | u64 LmodU = file_offset - M * S; | ||
| 576 | u32 G = div64_u64(LmodU, T); | ||
| 577 | u64 H = LmodU - G * T; | ||
| 578 | |||
| 579 | u32 N = div_u64(H, U); | ||
| 580 | |||
| 581 | div_u64_rem(file_offset, stripe_unit, &si->unit_off); | ||
| 582 | si->obj_offset = si->unit_off + (N * stripe_unit) + | ||
| 583 | (M * group_depth * stripe_unit); | ||
| 584 | |||
| 585 | /* "H - (N * U)" is just "H % U" so it's bound to u32 */ | ||
| 586 | si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width; | ||
| 587 | si->dev *= ios->layout->mirrors_p1; | ||
| 588 | |||
| 589 | si->group_length = T - H; | ||
| 590 | } | ||
| 591 | |||
| 592 | static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg, | ||
| 593 | unsigned pgbase, struct _objio_per_comp *per_dev, int cur_len, | ||
| 594 | gfp_t gfp_flags) | ||
| 595 | { | ||
| 596 | unsigned pg = *cur_pg; | ||
| 597 | struct request_queue *q = | ||
| 598 | osd_request_queue(_io_od(ios, per_dev->dev)); | ||
| 599 | |||
| 600 | per_dev->length += cur_len; | ||
| 601 | |||
| 602 | if (per_dev->bio == NULL) { | ||
| 603 | unsigned stripes = ios->layout->num_comps / | ||
| 604 | ios->layout->mirrors_p1; | ||
| 605 | unsigned pages_in_stripe = stripes * | ||
| 606 | (ios->layout->stripe_unit / PAGE_SIZE); | ||
| 607 | unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) / | ||
| 608 | stripes; | ||
| 609 | |||
| 610 | if (BIO_MAX_PAGES_KMALLOC < bio_size) | ||
| 611 | bio_size = BIO_MAX_PAGES_KMALLOC; | ||
| 612 | |||
| 613 | per_dev->bio = bio_kmalloc(gfp_flags, bio_size); | ||
| 614 | if (unlikely(!per_dev->bio)) { | ||
| 615 | dprintk("Faild to allocate BIO size=%u\n", bio_size); | ||
| 616 | return -ENOMEM; | ||
| 617 | } | ||
| 618 | } | ||
| 619 | |||
| 620 | while (cur_len > 0) { | ||
| 621 | unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len); | ||
| 622 | unsigned added_len; | ||
| 623 | |||
| 624 | BUG_ON(ios->ol_state.nr_pages <= pg); | ||
| 625 | cur_len -= pglen; | ||
| 626 | |||
| 627 | added_len = bio_add_pc_page(q, per_dev->bio, | ||
| 628 | ios->ol_state.pages[pg], pglen, pgbase); | ||
| 629 | if (unlikely(pglen != added_len)) | ||
| 630 | return -ENOMEM; | ||
| 631 | pgbase = 0; | ||
| 632 | ++pg; | ||
| 633 | } | ||
| 634 | BUG_ON(cur_len); | ||
| 635 | |||
| 636 | *cur_pg = pg; | ||
| 637 | return 0; | ||
| 638 | } | ||
| 639 | |||
| 640 | static int _prepare_one_group(struct objio_state *ios, u64 length, | ||
| 641 | struct _striping_info *si, unsigned *last_pg, | ||
| 642 | gfp_t gfp_flags) | ||
| 643 | { | ||
| 644 | unsigned stripe_unit = ios->layout->stripe_unit; | ||
| 645 | unsigned mirrors_p1 = ios->layout->mirrors_p1; | ||
| 646 | unsigned devs_in_group = ios->layout->group_width * mirrors_p1; | ||
| 647 | unsigned dev = si->dev; | ||
| 648 | unsigned first_dev = dev - (dev % devs_in_group); | ||
| 649 | unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0; | ||
| 650 | unsigned cur_pg = *last_pg; | ||
| 651 | int ret = 0; | ||
| 652 | |||
| 653 | while (length) { | ||
| 654 | struct _objio_per_comp *per_dev = &ios->per_dev[dev]; | ||
| 655 | unsigned cur_len, page_off = 0; | ||
| 656 | |||
| 657 | if (!per_dev->length) { | ||
| 658 | per_dev->dev = dev; | ||
| 659 | if (dev < si->dev) { | ||
| 660 | per_dev->offset = si->obj_offset + stripe_unit - | ||
| 661 | si->unit_off; | ||
| 662 | cur_len = stripe_unit; | ||
| 663 | } else if (dev == si->dev) { | ||
| 664 | per_dev->offset = si->obj_offset; | ||
| 665 | cur_len = stripe_unit - si->unit_off; | ||
| 666 | page_off = si->unit_off & ~PAGE_MASK; | ||
| 667 | BUG_ON(page_off && | ||
| 668 | (page_off != ios->ol_state.pgbase)); | ||
| 669 | } else { /* dev > si->dev */ | ||
| 670 | per_dev->offset = si->obj_offset - si->unit_off; | ||
| 671 | cur_len = stripe_unit; | ||
| 672 | } | ||
| 673 | |||
| 674 | if (max_comp < dev) | ||
| 675 | max_comp = dev; | ||
| 676 | } else { | ||
| 677 | cur_len = stripe_unit; | ||
| 678 | } | ||
| 679 | if (cur_len >= length) | ||
| 680 | cur_len = length; | ||
| 681 | |||
| 682 | ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev, | ||
| 683 | cur_len, gfp_flags); | ||
| 684 | if (unlikely(ret)) | ||
| 685 | goto out; | ||
| 686 | |||
| 687 | dev += mirrors_p1; | ||
| 688 | dev = (dev % devs_in_group) + first_dev; | ||
| 689 | |||
| 690 | length -= cur_len; | ||
| 691 | ios->length += cur_len; | ||
| 692 | } | ||
| 693 | out: | ||
| 694 | ios->numdevs = max_comp + mirrors_p1; | ||
| 695 | *last_pg = cur_pg; | ||
| 696 | return ret; | ||
| 697 | } | ||
| 698 | |||
| 699 | static int _io_rw_pagelist(struct objio_state *ios, gfp_t gfp_flags) | ||
| 700 | { | ||
| 701 | u64 length = ios->ol_state.count; | ||
| 702 | u64 offset = ios->ol_state.offset; | ||
| 703 | struct _striping_info si; | ||
| 704 | unsigned last_pg = 0; | ||
| 705 | int ret = 0; | ||
| 706 | |||
| 707 | while (length) { | ||
| 708 | _calc_stripe_info(ios, offset, &si); | ||
| 709 | |||
| 710 | if (length < si.group_length) | ||
| 711 | si.group_length = length; | ||
| 712 | |||
| 713 | ret = _prepare_one_group(ios, si.group_length, &si, &last_pg, gfp_flags); | ||
| 714 | if (unlikely(ret)) | ||
| 715 | goto out; | ||
| 716 | |||
| 717 | offset += si.group_length; | ||
| 718 | length -= si.group_length; | ||
| 719 | } | ||
| 720 | |||
| 721 | out: | ||
| 722 | if (!ios->length) | ||
| 723 | return ret; | ||
| 724 | |||
| 725 | return 0; | ||
| 726 | } | ||
| 727 | |||
| 728 | static ssize_t _sync_done(struct objio_state *ios) | ||
| 729 | { | ||
| 730 | struct completion *waiting = ios->private; | ||
| 731 | |||
| 732 | complete(waiting); | ||
| 733 | return 0; | ||
| 734 | } | ||
| 735 | |||
| 736 | static void _last_io(struct kref *kref) | ||
| 737 | { | ||
| 738 | struct objio_state *ios = container_of(kref, struct objio_state, kref); | ||
| 739 | |||
| 740 | ios->done(ios); | ||
| 741 | } | ||
| 742 | |||
| 743 | static void _done_io(struct osd_request *or, void *p) | ||
| 744 | { | ||
| 745 | struct objio_state *ios = p; | ||
| 746 | |||
| 747 | kref_put(&ios->kref, _last_io); | ||
| 748 | } | ||
| 749 | |||
| 750 | static ssize_t _io_exec(struct objio_state *ios) | ||
| 751 | { | ||
| 752 | DECLARE_COMPLETION_ONSTACK(wait); | ||
| 753 | ssize_t status = 0; /* sync status */ | ||
| 754 | unsigned i; | ||
| 755 | objio_done_fn saved_done_fn = ios->done; | ||
| 756 | bool sync = ios->ol_state.sync; | ||
| 757 | |||
| 758 | if (sync) { | ||
| 759 | ios->done = _sync_done; | ||
| 760 | ios->private = &wait; | ||
| 761 | } | ||
| 762 | |||
| 763 | kref_init(&ios->kref); | ||
| 764 | |||
| 765 | for (i = 0; i < ios->numdevs; i++) { | ||
| 766 | struct osd_request *or = ios->per_dev[i].or; | ||
| 767 | |||
| 768 | if (!or) | ||
| 769 | continue; | ||
| 770 | |||
| 771 | kref_get(&ios->kref); | ||
| 772 | osd_execute_request_async(or, _done_io, ios); | ||
| 773 | } | ||
| 774 | |||
| 775 | kref_put(&ios->kref, _last_io); | ||
| 776 | |||
| 777 | if (sync) { | ||
| 778 | wait_for_completion(&wait); | ||
| 779 | status = saved_done_fn(ios); | ||
| 780 | } | ||
| 781 | |||
| 782 | return status; | ||
| 783 | } | ||
| 784 | |||
| 785 | /* | ||
| 786 | * read | ||
| 787 | */ | ||
| 788 | static ssize_t _read_done(struct objio_state *ios) | ||
| 789 | { | ||
| 790 | ssize_t status; | ||
| 791 | int ret = _io_check(ios, false); | ||
| 792 | |||
| 793 | _io_free(ios); | ||
| 794 | |||
| 795 | if (likely(!ret)) | ||
| 796 | status = ios->length; | ||
| 797 | else | ||
| 798 | status = ret; | ||
| 799 | |||
| 800 | objlayout_read_done(&ios->ol_state, status, ios->ol_state.sync); | ||
| 801 | return status; | ||
| 802 | } | ||
| 803 | |||
| 804 | static int _read_mirrors(struct objio_state *ios, unsigned cur_comp) | ||
| 805 | { | ||
| 806 | struct osd_request *or = NULL; | ||
| 807 | struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp]; | ||
| 808 | unsigned dev = per_dev->dev; | ||
| 809 | struct pnfs_osd_object_cred *cred = | ||
| 810 | &ios->layout->comps[dev]; | ||
| 811 | struct osd_obj_id obj = { | ||
| 812 | .partition = cred->oc_object_id.oid_partition_id, | ||
| 813 | .id = cred->oc_object_id.oid_object_id, | ||
| 814 | }; | ||
| 815 | int ret; | ||
| 816 | |||
| 817 | or = osd_start_request(_io_od(ios, dev), GFP_KERNEL); | ||
| 818 | if (unlikely(!or)) { | ||
| 819 | ret = -ENOMEM; | ||
| 820 | goto err; | ||
| 821 | } | ||
| 822 | per_dev->or = or; | ||
| 823 | |||
| 824 | osd_req_read(or, &obj, per_dev->offset, per_dev->bio, per_dev->length); | ||
| 825 | |||
| 826 | ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL); | ||
| 827 | if (ret) { | ||
| 828 | dprintk("%s: Faild to osd_finalize_request() => %d\n", | ||
| 829 | __func__, ret); | ||
| 830 | goto err; | ||
| 831 | } | ||
| 832 | |||
| 833 | dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n", | ||
| 834 | __func__, cur_comp, dev, obj.id, _LLU(per_dev->offset), | ||
| 835 | per_dev->length); | ||
| 836 | |||
| 837 | err: | ||
| 838 | return ret; | ||
| 839 | } | ||
| 840 | |||
| 841 | static ssize_t _read_exec(struct objio_state *ios) | ||
| 842 | { | ||
| 843 | unsigned i; | ||
| 844 | int ret; | ||
| 845 | |||
| 846 | for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { | ||
| 847 | if (!ios->per_dev[i].length) | ||
| 848 | continue; | ||
| 849 | ret = _read_mirrors(ios, i); | ||
| 850 | if (unlikely(ret)) | ||
| 851 | goto err; | ||
| 852 | } | ||
| 853 | |||
| 854 | ios->done = _read_done; | ||
| 855 | return _io_exec(ios); /* In sync mode exec returns the io status */ | ||
| 856 | |||
| 857 | err: | ||
| 858 | _io_free(ios); | ||
| 859 | return ret; | ||
| 860 | } | ||
| 861 | |||
| 862 | ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state) | ||
| 863 | { | ||
| 864 | struct objio_state *ios = container_of(ol_state, struct objio_state, | ||
| 865 | ol_state); | ||
| 866 | int ret; | ||
| 867 | |||
| 868 | ret = _io_rw_pagelist(ios, GFP_KERNEL); | ||
| 869 | if (unlikely(ret)) | ||
| 870 | return ret; | ||
| 871 | |||
| 872 | return _read_exec(ios); | ||
| 873 | } | ||
| 874 | |||
| 875 | /* | ||
| 876 | * write | ||
| 877 | */ | ||
| 878 | static ssize_t _write_done(struct objio_state *ios) | ||
| 879 | { | ||
| 880 | ssize_t status; | ||
| 881 | int ret = _io_check(ios, true); | ||
| 882 | |||
| 883 | _io_free(ios); | ||
| 884 | |||
| 885 | if (likely(!ret)) { | ||
| 886 | /* FIXME: should be based on the OSD's persistence model | ||
| 887 | * See OSD2r05 Section 4.13 Data persistence model */ | ||
| 888 | ios->ol_state.committed = NFS_FILE_SYNC; | ||
| 889 | status = ios->length; | ||
| 890 | } else { | ||
| 891 | status = ret; | ||
| 892 | } | ||
| 893 | |||
| 894 | objlayout_write_done(&ios->ol_state, status, ios->ol_state.sync); | ||
| 895 | return status; | ||
| 896 | } | ||
| 897 | |||
| 898 | static int _write_mirrors(struct objio_state *ios, unsigned cur_comp) | ||
| 899 | { | ||
| 900 | struct _objio_per_comp *master_dev = &ios->per_dev[cur_comp]; | ||
| 901 | unsigned dev = ios->per_dev[cur_comp].dev; | ||
| 902 | unsigned last_comp = cur_comp + ios->layout->mirrors_p1; | ||
| 903 | int ret; | ||
| 904 | |||
| 905 | for (; cur_comp < last_comp; ++cur_comp, ++dev) { | ||
| 906 | struct osd_request *or = NULL; | ||
| 907 | struct pnfs_osd_object_cred *cred = | ||
| 908 | &ios->layout->comps[dev]; | ||
| 909 | struct osd_obj_id obj = { | ||
| 910 | .partition = cred->oc_object_id.oid_partition_id, | ||
| 911 | .id = cred->oc_object_id.oid_object_id, | ||
| 912 | }; | ||
| 913 | struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp]; | ||
| 914 | struct bio *bio; | ||
| 915 | |||
| 916 | or = osd_start_request(_io_od(ios, dev), GFP_NOFS); | ||
| 917 | if (unlikely(!or)) { | ||
| 918 | ret = -ENOMEM; | ||
| 919 | goto err; | ||
| 920 | } | ||
| 921 | per_dev->or = or; | ||
| 922 | |||
| 923 | if (per_dev != master_dev) { | ||
| 924 | bio = bio_kmalloc(GFP_NOFS, | ||
| 925 | master_dev->bio->bi_max_vecs); | ||
| 926 | if (unlikely(!bio)) { | ||
| 927 | dprintk("Faild to allocate BIO size=%u\n", | ||
| 928 | master_dev->bio->bi_max_vecs); | ||
| 929 | ret = -ENOMEM; | ||
| 930 | goto err; | ||
| 931 | } | ||
| 932 | |||
| 933 | __bio_clone(bio, master_dev->bio); | ||
| 934 | bio->bi_bdev = NULL; | ||
| 935 | bio->bi_next = NULL; | ||
| 936 | per_dev->bio = bio; | ||
| 937 | per_dev->dev = dev; | ||
| 938 | per_dev->length = master_dev->length; | ||
| 939 | per_dev->offset = master_dev->offset; | ||
| 940 | } else { | ||
| 941 | bio = master_dev->bio; | ||
| 942 | bio->bi_rw |= REQ_WRITE; | ||
| 943 | } | ||
| 944 | |||
| 945 | osd_req_write(or, &obj, per_dev->offset, bio, per_dev->length); | ||
| 946 | |||
| 947 | ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL); | ||
| 948 | if (ret) { | ||
| 949 | dprintk("%s: Faild to osd_finalize_request() => %d\n", | ||
| 950 | __func__, ret); | ||
| 951 | goto err; | ||
| 952 | } | ||
| 953 | |||
| 954 | dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n", | ||
| 955 | __func__, cur_comp, dev, obj.id, _LLU(per_dev->offset), | ||
| 956 | per_dev->length); | ||
| 957 | } | ||
| 958 | |||
| 959 | err: | ||
| 960 | return ret; | ||
| 961 | } | ||
| 962 | |||
| 963 | static ssize_t _write_exec(struct objio_state *ios) | ||
| 964 | { | ||
| 965 | unsigned i; | ||
| 966 | int ret; | ||
| 967 | |||
| 968 | for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { | ||
| 969 | if (!ios->per_dev[i].length) | ||
| 970 | continue; | ||
| 971 | ret = _write_mirrors(ios, i); | ||
| 972 | if (unlikely(ret)) | ||
| 973 | goto err; | ||
| 974 | } | ||
| 975 | |||
| 976 | ios->done = _write_done; | ||
| 977 | return _io_exec(ios); /* In sync mode exec returns the io->status */ | ||
| 978 | |||
| 979 | err: | ||
| 980 | _io_free(ios); | ||
| 981 | return ret; | ||
| 982 | } | ||
| 983 | |||
| 984 | ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, bool stable) | ||
| 985 | { | ||
| 986 | struct objio_state *ios = container_of(ol_state, struct objio_state, | ||
| 987 | ol_state); | ||
| 988 | int ret; | ||
| 989 | |||
| 990 | /* TODO: ios->stable = stable; */ | ||
| 991 | ret = _io_rw_pagelist(ios, GFP_NOFS); | ||
| 992 | if (unlikely(ret)) | ||
| 993 | return ret; | ||
| 994 | |||
| 995 | return _write_exec(ios); | ||
| 996 | } | ||
| 997 | |||
| 998 | static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, | ||
| 999 | struct nfs_page *prev, struct nfs_page *req) | ||
| 1000 | { | ||
| 1001 | if (!pnfs_generic_pg_test(pgio, prev, req)) | ||
| 1002 | return false; | ||
| 1003 | |||
| 1004 | return pgio->pg_count + req->wb_bytes <= | ||
| 1005 | OBJIO_LSEG(pgio->pg_lseg)->max_io_size; | ||
| 1006 | } | ||
| 1007 | |||
| 1008 | static struct pnfs_layoutdriver_type objlayout_type = { | ||
| 1009 | .id = LAYOUT_OSD2_OBJECTS, | ||
| 1010 | .name = "LAYOUT_OSD2_OBJECTS", | ||
| 1011 | .flags = PNFS_LAYOUTRET_ON_SETATTR, | ||
| 1012 | |||
| 1013 | .alloc_layout_hdr = objlayout_alloc_layout_hdr, | ||
| 1014 | .free_layout_hdr = objlayout_free_layout_hdr, | ||
| 1015 | |||
| 1016 | .alloc_lseg = objlayout_alloc_lseg, | ||
| 1017 | .free_lseg = objlayout_free_lseg, | ||
| 1018 | |||
| 1019 | .read_pagelist = objlayout_read_pagelist, | ||
| 1020 | .write_pagelist = objlayout_write_pagelist, | ||
| 1021 | .pg_test = objio_pg_test, | ||
| 1022 | |||
| 1023 | .free_deviceid_node = objio_free_deviceid_node, | ||
| 1024 | |||
| 1025 | .encode_layoutcommit = objlayout_encode_layoutcommit, | ||
| 1026 | .encode_layoutreturn = objlayout_encode_layoutreturn, | ||
| 1027 | }; | ||
| 1028 | |||
| 1029 | MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects"); | ||
| 1030 | MODULE_AUTHOR("Benny Halevy <bhalevy@panasas.com>"); | ||
| 1031 | MODULE_LICENSE("GPL"); | ||
| 1032 | |||
| 1033 | static int __init | ||
| 1034 | objlayout_init(void) | ||
| 1035 | { | ||
| 1036 | int ret = pnfs_register_layoutdriver(&objlayout_type); | ||
| 1037 | |||
| 1038 | if (ret) | ||
| 1039 | printk(KERN_INFO | ||
| 1040 | "%s: Registering OSD pNFS Layout Driver failed: error=%d\n", | ||
| 1041 | __func__, ret); | ||
| 1042 | else | ||
| 1043 | printk(KERN_INFO "%s: Registered OSD pNFS Layout Driver\n", | ||
| 1044 | __func__); | ||
| 1045 | return ret; | ||
| 1046 | } | ||
| 1047 | |||
| 1048 | static void __exit | ||
| 1049 | objlayout_exit(void) | ||
| 1050 | { | ||
| 1051 | pnfs_unregister_layoutdriver(&objlayout_type); | ||
| 1052 | printk(KERN_INFO "%s: Unregistered OSD pNFS Layout Driver\n", | ||
| 1053 | __func__); | ||
| 1054 | } | ||
| 1055 | |||
| 1056 | module_init(objlayout_init); | ||
| 1057 | module_exit(objlayout_exit); | ||
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c new file mode 100644 index 000000000000..dc3956c0de80 --- /dev/null +++ b/fs/nfs/objlayout/objlayout.c | |||
| @@ -0,0 +1,712 @@ | |||
| 1 | /* | ||
| 2 | * pNFS Objects layout driver high level definitions | ||
| 3 | * | ||
| 4 | * Copyright (C) 2007 Panasas Inc. [year of first publication] | ||
| 5 | * All rights reserved. | ||
| 6 | * | ||
| 7 | * Benny Halevy <bhalevy@panasas.com> | ||
| 8 | * Boaz Harrosh <bharrosh@panasas.com> | ||
| 9 | * | ||
| 10 | * This program is free software; you can redistribute it and/or modify | ||
| 11 | * it under the terms of the GNU General Public License version 2 | ||
| 12 | * See the file COPYING included with this distribution for more details. | ||
| 13 | * | ||
| 14 | * Redistribution and use in source and binary forms, with or without | ||
| 15 | * modification, are permitted provided that the following conditions | ||
| 16 | * are met: | ||
| 17 | * | ||
| 18 | * 1. Redistributions of source code must retain the above copyright | ||
| 19 | * notice, this list of conditions and the following disclaimer. | ||
| 20 | * 2. Redistributions in binary form must reproduce the above copyright | ||
| 21 | * notice, this list of conditions and the following disclaimer in the | ||
| 22 | * documentation and/or other materials provided with the distribution. | ||
| 23 | * 3. Neither the name of the Panasas company nor the names of its | ||
| 24 | * contributors may be used to endorse or promote products derived | ||
| 25 | * from this software without specific prior written permission. | ||
| 26 | * | ||
| 27 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED | ||
| 28 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | ||
| 29 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
| 30 | * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | ||
| 31 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
| 32 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
| 33 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR | ||
| 34 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
| 35 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
| 36 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
| 37 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 38 | */ | ||
| 39 | |||
| 40 | #include <scsi/osd_initiator.h> | ||
| 41 | #include "objlayout.h" | ||
| 42 | |||
| 43 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
| 44 | /* | ||
| 45 | * Create a objlayout layout structure for the given inode and return it. | ||
| 46 | */ | ||
| 47 | struct pnfs_layout_hdr * | ||
| 48 | objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) | ||
| 49 | { | ||
| 50 | struct objlayout *objlay; | ||
| 51 | |||
| 52 | objlay = kzalloc(sizeof(struct objlayout), gfp_flags); | ||
| 53 | if (objlay) { | ||
| 54 | spin_lock_init(&objlay->lock); | ||
| 55 | INIT_LIST_HEAD(&objlay->err_list); | ||
| 56 | } | ||
| 57 | dprintk("%s: Return %p\n", __func__, objlay); | ||
| 58 | return &objlay->pnfs_layout; | ||
| 59 | } | ||
| 60 | |||
| 61 | /* | ||
| 62 | * Free an objlayout layout structure | ||
| 63 | */ | ||
| 64 | void | ||
| 65 | objlayout_free_layout_hdr(struct pnfs_layout_hdr *lo) | ||
| 66 | { | ||
| 67 | struct objlayout *objlay = OBJLAYOUT(lo); | ||
| 68 | |||
| 69 | dprintk("%s: objlay %p\n", __func__, objlay); | ||
| 70 | |||
| 71 | WARN_ON(!list_empty(&objlay->err_list)); | ||
| 72 | kfree(objlay); | ||
| 73 | } | ||
| 74 | |||
| 75 | /* | ||
| 76 | * Unmarshall layout and store it in pnfslay. | ||
| 77 | */ | ||
| 78 | struct pnfs_layout_segment * | ||
| 79 | objlayout_alloc_lseg(struct pnfs_layout_hdr *pnfslay, | ||
| 80 | struct nfs4_layoutget_res *lgr, | ||
| 81 | gfp_t gfp_flags) | ||
| 82 | { | ||
| 83 | int status = -ENOMEM; | ||
| 84 | struct xdr_stream stream; | ||
| 85 | struct xdr_buf buf = { | ||
| 86 | .pages = lgr->layoutp->pages, | ||
| 87 | .page_len = lgr->layoutp->len, | ||
| 88 | .buflen = lgr->layoutp->len, | ||
| 89 | .len = lgr->layoutp->len, | ||
| 90 | }; | ||
| 91 | struct page *scratch; | ||
| 92 | struct pnfs_layout_segment *lseg; | ||
| 93 | |||
| 94 | dprintk("%s: Begin pnfslay %p\n", __func__, pnfslay); | ||
| 95 | |||
| 96 | scratch = alloc_page(gfp_flags); | ||
| 97 | if (!scratch) | ||
| 98 | goto err_nofree; | ||
| 99 | |||
| 100 | xdr_init_decode(&stream, &buf, NULL); | ||
| 101 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); | ||
| 102 | |||
| 103 | status = objio_alloc_lseg(&lseg, pnfslay, &lgr->range, &stream, gfp_flags); | ||
| 104 | if (unlikely(status)) { | ||
| 105 | dprintk("%s: objio_alloc_lseg Return err %d\n", __func__, | ||
| 106 | status); | ||
| 107 | goto err; | ||
| 108 | } | ||
| 109 | |||
| 110 | __free_page(scratch); | ||
| 111 | |||
| 112 | dprintk("%s: Return %p\n", __func__, lseg); | ||
| 113 | return lseg; | ||
| 114 | |||
| 115 | err: | ||
| 116 | __free_page(scratch); | ||
| 117 | err_nofree: | ||
| 118 | dprintk("%s: Err Return=>%d\n", __func__, status); | ||
| 119 | return ERR_PTR(status); | ||
| 120 | } | ||
| 121 | |||
| 122 | /* | ||
| 123 | * Free a layout segement | ||
| 124 | */ | ||
| 125 | void | ||
| 126 | objlayout_free_lseg(struct pnfs_layout_segment *lseg) | ||
| 127 | { | ||
| 128 | dprintk("%s: freeing layout segment %p\n", __func__, lseg); | ||
| 129 | |||
| 130 | if (unlikely(!lseg)) | ||
| 131 | return; | ||
| 132 | |||
| 133 | objio_free_lseg(lseg); | ||
| 134 | } | ||
| 135 | |||
| 136 | /* | ||
| 137 | * I/O Operations | ||
| 138 | */ | ||
| 139 | static inline u64 | ||
| 140 | end_offset(u64 start, u64 len) | ||
| 141 | { | ||
| 142 | u64 end; | ||
| 143 | |||
| 144 | end = start + len; | ||
| 145 | return end >= start ? end : NFS4_MAX_UINT64; | ||
| 146 | } | ||
| 147 | |||
| 148 | /* last octet in a range */ | ||
| 149 | static inline u64 | ||
| 150 | last_byte_offset(u64 start, u64 len) | ||
| 151 | { | ||
| 152 | u64 end; | ||
| 153 | |||
| 154 | BUG_ON(!len); | ||
| 155 | end = start + len; | ||
| 156 | return end > start ? end - 1 : NFS4_MAX_UINT64; | ||
| 157 | } | ||
| 158 | |||
| 159 | static struct objlayout_io_state * | ||
| 160 | objlayout_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, | ||
| 161 | struct page **pages, | ||
| 162 | unsigned pgbase, | ||
| 163 | loff_t offset, | ||
| 164 | size_t count, | ||
| 165 | struct pnfs_layout_segment *lseg, | ||
| 166 | void *rpcdata, | ||
| 167 | gfp_t gfp_flags) | ||
| 168 | { | ||
| 169 | struct objlayout_io_state *state; | ||
| 170 | u64 lseg_end_offset; | ||
| 171 | |||
| 172 | dprintk("%s: allocating io_state\n", __func__); | ||
| 173 | if (objio_alloc_io_state(lseg, &state, gfp_flags)) | ||
| 174 | return NULL; | ||
| 175 | |||
| 176 | BUG_ON(offset < lseg->pls_range.offset); | ||
| 177 | lseg_end_offset = end_offset(lseg->pls_range.offset, | ||
| 178 | lseg->pls_range.length); | ||
| 179 | BUG_ON(offset >= lseg_end_offset); | ||
| 180 | if (offset + count > lseg_end_offset) { | ||
| 181 | count = lseg->pls_range.length - | ||
| 182 | (offset - lseg->pls_range.offset); | ||
| 183 | dprintk("%s: truncated count %Zd\n", __func__, count); | ||
| 184 | } | ||
| 185 | |||
| 186 | if (pgbase > PAGE_SIZE) { | ||
| 187 | pages += pgbase >> PAGE_SHIFT; | ||
| 188 | pgbase &= ~PAGE_MASK; | ||
| 189 | } | ||
| 190 | |||
| 191 | INIT_LIST_HEAD(&state->err_list); | ||
| 192 | state->lseg = lseg; | ||
| 193 | state->rpcdata = rpcdata; | ||
| 194 | state->pages = pages; | ||
| 195 | state->pgbase = pgbase; | ||
| 196 | state->nr_pages = (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
| 197 | state->offset = offset; | ||
| 198 | state->count = count; | ||
| 199 | state->sync = 0; | ||
| 200 | |||
| 201 | return state; | ||
| 202 | } | ||
| 203 | |||
| 204 | static void | ||
| 205 | objlayout_free_io_state(struct objlayout_io_state *state) | ||
| 206 | { | ||
| 207 | dprintk("%s: freeing io_state\n", __func__); | ||
| 208 | if (unlikely(!state)) | ||
| 209 | return; | ||
| 210 | |||
| 211 | objio_free_io_state(state); | ||
| 212 | } | ||
| 213 | |||
| 214 | /* | ||
| 215 | * I/O done common code | ||
| 216 | */ | ||
| 217 | static void | ||
| 218 | objlayout_iodone(struct objlayout_io_state *state) | ||
| 219 | { | ||
| 220 | dprintk("%s: state %p status\n", __func__, state); | ||
| 221 | |||
| 222 | if (likely(state->status >= 0)) { | ||
| 223 | objlayout_free_io_state(state); | ||
| 224 | } else { | ||
| 225 | struct objlayout *objlay = OBJLAYOUT(state->lseg->pls_layout); | ||
| 226 | |||
| 227 | spin_lock(&objlay->lock); | ||
| 228 | objlay->delta_space_valid = OBJ_DSU_INVALID; | ||
| 229 | list_add(&objlay->err_list, &state->err_list); | ||
| 230 | spin_unlock(&objlay->lock); | ||
| 231 | } | ||
| 232 | } | ||
| 233 | |||
| 234 | /* | ||
| 235 | * objlayout_io_set_result - Set an osd_error code on a specific osd comp. | ||
| 236 | * | ||
| 237 | * The @index component IO failed (error returned from target). Register | ||
| 238 | * the error for later reporting at layout-return. | ||
| 239 | */ | ||
| 240 | void | ||
| 241 | objlayout_io_set_result(struct objlayout_io_state *state, unsigned index, | ||
| 242 | struct pnfs_osd_objid *pooid, int osd_error, | ||
| 243 | u64 offset, u64 length, bool is_write) | ||
| 244 | { | ||
| 245 | struct pnfs_osd_ioerr *ioerr = &state->ioerrs[index]; | ||
| 246 | |||
| 247 | BUG_ON(index >= state->num_comps); | ||
| 248 | if (osd_error) { | ||
| 249 | ioerr->oer_component = *pooid; | ||
| 250 | ioerr->oer_comp_offset = offset; | ||
| 251 | ioerr->oer_comp_length = length; | ||
| 252 | ioerr->oer_iswrite = is_write; | ||
| 253 | ioerr->oer_errno = osd_error; | ||
| 254 | |||
| 255 | dprintk("%s: err[%d]: errno=%d is_write=%d dev(%llx:%llx) " | ||
| 256 | "par=0x%llx obj=0x%llx offset=0x%llx length=0x%llx\n", | ||
| 257 | __func__, index, ioerr->oer_errno, | ||
| 258 | ioerr->oer_iswrite, | ||
| 259 | _DEVID_LO(&ioerr->oer_component.oid_device_id), | ||
| 260 | _DEVID_HI(&ioerr->oer_component.oid_device_id), | ||
| 261 | ioerr->oer_component.oid_partition_id, | ||
| 262 | ioerr->oer_component.oid_object_id, | ||
| 263 | ioerr->oer_comp_offset, | ||
| 264 | ioerr->oer_comp_length); | ||
| 265 | } else { | ||
| 266 | /* User need not call if no error is reported */ | ||
| 267 | ioerr->oer_errno = 0; | ||
| 268 | } | ||
| 269 | } | ||
| 270 | |||
| 271 | /* Function scheduled on rpc workqueue to call ->nfs_readlist_complete(). | ||
| 272 | * This is because the osd completion is called with ints-off from | ||
| 273 | * the block layer | ||
| 274 | */ | ||
| 275 | static void _rpc_read_complete(struct work_struct *work) | ||
| 276 | { | ||
| 277 | struct rpc_task *task; | ||
| 278 | struct nfs_read_data *rdata; | ||
| 279 | |||
| 280 | dprintk("%s enter\n", __func__); | ||
| 281 | task = container_of(work, struct rpc_task, u.tk_work); | ||
| 282 | rdata = container_of(task, struct nfs_read_data, task); | ||
| 283 | |||
| 284 | pnfs_ld_read_done(rdata); | ||
| 285 | } | ||
| 286 | |||
| 287 | void | ||
| 288 | objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync) | ||
| 289 | { | ||
| 290 | int eof = state->eof; | ||
| 291 | struct nfs_read_data *rdata; | ||
| 292 | |||
| 293 | state->status = status; | ||
| 294 | dprintk("%s: Begin status=%ld eof=%d\n", __func__, status, eof); | ||
| 295 | rdata = state->rpcdata; | ||
| 296 | rdata->task.tk_status = status; | ||
| 297 | if (status >= 0) { | ||
| 298 | rdata->res.count = status; | ||
| 299 | rdata->res.eof = eof; | ||
| 300 | } | ||
| 301 | objlayout_iodone(state); | ||
| 302 | /* must not use state after this point */ | ||
| 303 | |||
| 304 | if (sync) | ||
| 305 | pnfs_ld_read_done(rdata); | ||
| 306 | else { | ||
| 307 | INIT_WORK(&rdata->task.u.tk_work, _rpc_read_complete); | ||
| 308 | schedule_work(&rdata->task.u.tk_work); | ||
| 309 | } | ||
| 310 | } | ||
| 311 | |||
| 312 | /* | ||
| 313 | * Perform sync or async reads. | ||
| 314 | */ | ||
| 315 | enum pnfs_try_status | ||
| 316 | objlayout_read_pagelist(struct nfs_read_data *rdata) | ||
| 317 | { | ||
| 318 | loff_t offset = rdata->args.offset; | ||
| 319 | size_t count = rdata->args.count; | ||
| 320 | struct objlayout_io_state *state; | ||
| 321 | ssize_t status = 0; | ||
| 322 | loff_t eof; | ||
| 323 | |||
| 324 | dprintk("%s: Begin inode %p offset %llu count %d\n", | ||
| 325 | __func__, rdata->inode, offset, (int)count); | ||
| 326 | |||
| 327 | eof = i_size_read(rdata->inode); | ||
| 328 | if (unlikely(offset + count > eof)) { | ||
| 329 | if (offset >= eof) { | ||
| 330 | status = 0; | ||
| 331 | rdata->res.count = 0; | ||
| 332 | rdata->res.eof = 1; | ||
| 333 | goto out; | ||
| 334 | } | ||
| 335 | count = eof - offset; | ||
| 336 | } | ||
| 337 | |||
| 338 | state = objlayout_alloc_io_state(NFS_I(rdata->inode)->layout, | ||
| 339 | rdata->args.pages, rdata->args.pgbase, | ||
| 340 | offset, count, | ||
| 341 | rdata->lseg, rdata, | ||
| 342 | GFP_KERNEL); | ||
| 343 | if (unlikely(!state)) { | ||
| 344 | status = -ENOMEM; | ||
| 345 | goto out; | ||
| 346 | } | ||
| 347 | |||
| 348 | state->eof = state->offset + state->count >= eof; | ||
| 349 | |||
| 350 | status = objio_read_pagelist(state); | ||
| 351 | out: | ||
| 352 | dprintk("%s: Return status %Zd\n", __func__, status); | ||
| 353 | rdata->pnfs_error = status; | ||
| 354 | return PNFS_ATTEMPTED; | ||
| 355 | } | ||
| 356 | |||
| 357 | /* Function scheduled on rpc workqueue to call ->nfs_writelist_complete(). | ||
| 358 | * This is because the osd completion is called with ints-off from | ||
| 359 | * the block layer | ||
| 360 | */ | ||
| 361 | static void _rpc_write_complete(struct work_struct *work) | ||
| 362 | { | ||
| 363 | struct rpc_task *task; | ||
| 364 | struct nfs_write_data *wdata; | ||
| 365 | |||
| 366 | dprintk("%s enter\n", __func__); | ||
| 367 | task = container_of(work, struct rpc_task, u.tk_work); | ||
| 368 | wdata = container_of(task, struct nfs_write_data, task); | ||
| 369 | |||
| 370 | pnfs_ld_write_done(wdata); | ||
| 371 | } | ||
| 372 | |||
| 373 | void | ||
| 374 | objlayout_write_done(struct objlayout_io_state *state, ssize_t status, | ||
| 375 | bool sync) | ||
| 376 | { | ||
| 377 | struct nfs_write_data *wdata; | ||
| 378 | |||
| 379 | dprintk("%s: Begin\n", __func__); | ||
| 380 | wdata = state->rpcdata; | ||
| 381 | state->status = status; | ||
| 382 | wdata->task.tk_status = status; | ||
| 383 | if (status >= 0) { | ||
| 384 | wdata->res.count = status; | ||
| 385 | wdata->verf.committed = state->committed; | ||
| 386 | dprintk("%s: Return status %d committed %d\n", | ||
| 387 | __func__, wdata->task.tk_status, | ||
| 388 | wdata->verf.committed); | ||
| 389 | } else | ||
| 390 | dprintk("%s: Return status %d\n", | ||
| 391 | __func__, wdata->task.tk_status); | ||
| 392 | objlayout_iodone(state); | ||
| 393 | /* must not use state after this point */ | ||
| 394 | |||
| 395 | if (sync) | ||
| 396 | pnfs_ld_write_done(wdata); | ||
| 397 | else { | ||
| 398 | INIT_WORK(&wdata->task.u.tk_work, _rpc_write_complete); | ||
| 399 | schedule_work(&wdata->task.u.tk_work); | ||
| 400 | } | ||
| 401 | } | ||
| 402 | |||
| 403 | /* | ||
| 404 | * Perform sync or async writes. | ||
| 405 | */ | ||
| 406 | enum pnfs_try_status | ||
| 407 | objlayout_write_pagelist(struct nfs_write_data *wdata, | ||
| 408 | int how) | ||
| 409 | { | ||
| 410 | struct objlayout_io_state *state; | ||
| 411 | ssize_t status; | ||
| 412 | |||
| 413 | dprintk("%s: Begin inode %p offset %llu count %u\n", | ||
| 414 | __func__, wdata->inode, wdata->args.offset, wdata->args.count); | ||
| 415 | |||
| 416 | state = objlayout_alloc_io_state(NFS_I(wdata->inode)->layout, | ||
| 417 | wdata->args.pages, | ||
| 418 | wdata->args.pgbase, | ||
| 419 | wdata->args.offset, | ||
| 420 | wdata->args.count, | ||
| 421 | wdata->lseg, wdata, | ||
| 422 | GFP_NOFS); | ||
| 423 | if (unlikely(!state)) { | ||
| 424 | status = -ENOMEM; | ||
| 425 | goto out; | ||
| 426 | } | ||
| 427 | |||
| 428 | state->sync = how & FLUSH_SYNC; | ||
| 429 | |||
| 430 | status = objio_write_pagelist(state, how & FLUSH_STABLE); | ||
| 431 | out: | ||
| 432 | dprintk("%s: Return status %Zd\n", __func__, status); | ||
| 433 | wdata->pnfs_error = status; | ||
| 434 | return PNFS_ATTEMPTED; | ||
| 435 | } | ||
| 436 | |||
| 437 | void | ||
| 438 | objlayout_encode_layoutcommit(struct pnfs_layout_hdr *pnfslay, | ||
| 439 | struct xdr_stream *xdr, | ||
| 440 | const struct nfs4_layoutcommit_args *args) | ||
| 441 | { | ||
| 442 | struct objlayout *objlay = OBJLAYOUT(pnfslay); | ||
| 443 | struct pnfs_osd_layoutupdate lou; | ||
| 444 | __be32 *start; | ||
| 445 | |||
| 446 | dprintk("%s: Begin\n", __func__); | ||
| 447 | |||
| 448 | spin_lock(&objlay->lock); | ||
| 449 | lou.dsu_valid = (objlay->delta_space_valid == OBJ_DSU_VALID); | ||
| 450 | lou.dsu_delta = objlay->delta_space_used; | ||
| 451 | objlay->delta_space_used = 0; | ||
| 452 | objlay->delta_space_valid = OBJ_DSU_INIT; | ||
| 453 | lou.olu_ioerr_flag = !list_empty(&objlay->err_list); | ||
| 454 | spin_unlock(&objlay->lock); | ||
| 455 | |||
| 456 | start = xdr_reserve_space(xdr, 4); | ||
| 457 | |||
| 458 | BUG_ON(pnfs_osd_xdr_encode_layoutupdate(xdr, &lou)); | ||
| 459 | |||
| 460 | *start = cpu_to_be32((xdr->p - start - 1) * 4); | ||
| 461 | |||
| 462 | dprintk("%s: Return delta_space_used %lld err %d\n", __func__, | ||
| 463 | lou.dsu_delta, lou.olu_ioerr_flag); | ||
| 464 | } | ||
| 465 | |||
| 466 | static int | ||
| 467 | err_prio(u32 oer_errno) | ||
| 468 | { | ||
| 469 | switch (oer_errno) { | ||
| 470 | case 0: | ||
| 471 | return 0; | ||
| 472 | |||
| 473 | case PNFS_OSD_ERR_RESOURCE: | ||
| 474 | return OSD_ERR_PRI_RESOURCE; | ||
| 475 | case PNFS_OSD_ERR_BAD_CRED: | ||
| 476 | return OSD_ERR_PRI_BAD_CRED; | ||
| 477 | case PNFS_OSD_ERR_NO_ACCESS: | ||
| 478 | return OSD_ERR_PRI_NO_ACCESS; | ||
| 479 | case PNFS_OSD_ERR_UNREACHABLE: | ||
| 480 | return OSD_ERR_PRI_UNREACHABLE; | ||
| 481 | case PNFS_OSD_ERR_NOT_FOUND: | ||
| 482 | return OSD_ERR_PRI_NOT_FOUND; | ||
| 483 | case PNFS_OSD_ERR_NO_SPACE: | ||
| 484 | return OSD_ERR_PRI_NO_SPACE; | ||
| 485 | default: | ||
| 486 | WARN_ON(1); | ||
| 487 | /* fallthrough */ | ||
| 488 | case PNFS_OSD_ERR_EIO: | ||
| 489 | return OSD_ERR_PRI_EIO; | ||
| 490 | } | ||
| 491 | } | ||
| 492 | |||
| 493 | static void | ||
| 494 | merge_ioerr(struct pnfs_osd_ioerr *dest_err, | ||
| 495 | const struct pnfs_osd_ioerr *src_err) | ||
| 496 | { | ||
| 497 | u64 dest_end, src_end; | ||
| 498 | |||
| 499 | if (!dest_err->oer_errno) { | ||
| 500 | *dest_err = *src_err; | ||
| 501 | /* accumulated device must be blank */ | ||
| 502 | memset(&dest_err->oer_component.oid_device_id, 0, | ||
| 503 | sizeof(dest_err->oer_component.oid_device_id)); | ||
| 504 | |||
| 505 | return; | ||
| 506 | } | ||
| 507 | |||
| 508 | if (dest_err->oer_component.oid_partition_id != | ||
| 509 | src_err->oer_component.oid_partition_id) | ||
| 510 | dest_err->oer_component.oid_partition_id = 0; | ||
| 511 | |||
| 512 | if (dest_err->oer_component.oid_object_id != | ||
| 513 | src_err->oer_component.oid_object_id) | ||
| 514 | dest_err->oer_component.oid_object_id = 0; | ||
| 515 | |||
| 516 | if (dest_err->oer_comp_offset > src_err->oer_comp_offset) | ||
| 517 | dest_err->oer_comp_offset = src_err->oer_comp_offset; | ||
| 518 | |||
| 519 | dest_end = end_offset(dest_err->oer_comp_offset, | ||
| 520 | dest_err->oer_comp_length); | ||
| 521 | src_end = end_offset(src_err->oer_comp_offset, | ||
| 522 | src_err->oer_comp_length); | ||
| 523 | if (dest_end < src_end) | ||
| 524 | dest_end = src_end; | ||
| 525 | |||
| 526 | dest_err->oer_comp_length = dest_end - dest_err->oer_comp_offset; | ||
| 527 | |||
| 528 | if ((src_err->oer_iswrite == dest_err->oer_iswrite) && | ||
| 529 | (err_prio(src_err->oer_errno) > err_prio(dest_err->oer_errno))) { | ||
| 530 | dest_err->oer_errno = src_err->oer_errno; | ||
| 531 | } else if (src_err->oer_iswrite) { | ||
| 532 | dest_err->oer_iswrite = true; | ||
| 533 | dest_err->oer_errno = src_err->oer_errno; | ||
| 534 | } | ||
| 535 | } | ||
| 536 | |||
| 537 | static void | ||
| 538 | encode_accumulated_error(struct objlayout *objlay, __be32 *p) | ||
| 539 | { | ||
| 540 | struct objlayout_io_state *state, *tmp; | ||
| 541 | struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0}; | ||
| 542 | |||
| 543 | list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) { | ||
| 544 | unsigned i; | ||
| 545 | |||
| 546 | for (i = 0; i < state->num_comps; i++) { | ||
| 547 | struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i]; | ||
| 548 | |||
| 549 | if (!ioerr->oer_errno) | ||
| 550 | continue; | ||
| 551 | |||
| 552 | printk(KERN_ERR "%s: err[%d]: errno=%d is_write=%d " | ||
| 553 | "dev(%llx:%llx) par=0x%llx obj=0x%llx " | ||
| 554 | "offset=0x%llx length=0x%llx\n", | ||
| 555 | __func__, i, ioerr->oer_errno, | ||
| 556 | ioerr->oer_iswrite, | ||
| 557 | _DEVID_LO(&ioerr->oer_component.oid_device_id), | ||
| 558 | _DEVID_HI(&ioerr->oer_component.oid_device_id), | ||
| 559 | ioerr->oer_component.oid_partition_id, | ||
| 560 | ioerr->oer_component.oid_object_id, | ||
| 561 | ioerr->oer_comp_offset, | ||
| 562 | ioerr->oer_comp_length); | ||
| 563 | |||
| 564 | merge_ioerr(&accumulated_err, ioerr); | ||
| 565 | } | ||
| 566 | list_del(&state->err_list); | ||
| 567 | objlayout_free_io_state(state); | ||
| 568 | } | ||
| 569 | |||
| 570 | pnfs_osd_xdr_encode_ioerr(p, &accumulated_err); | ||
| 571 | } | ||
| 572 | |||
| 573 | void | ||
| 574 | objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay, | ||
| 575 | struct xdr_stream *xdr, | ||
| 576 | const struct nfs4_layoutreturn_args *args) | ||
| 577 | { | ||
| 578 | struct objlayout *objlay = OBJLAYOUT(pnfslay); | ||
| 579 | struct objlayout_io_state *state, *tmp; | ||
| 580 | __be32 *start; | ||
| 581 | |||
| 582 | dprintk("%s: Begin\n", __func__); | ||
| 583 | start = xdr_reserve_space(xdr, 4); | ||
| 584 | BUG_ON(!start); | ||
| 585 | |||
| 586 | spin_lock(&objlay->lock); | ||
| 587 | |||
| 588 | list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) { | ||
| 589 | __be32 *last_xdr = NULL, *p; | ||
| 590 | unsigned i; | ||
| 591 | int res = 0; | ||
| 592 | |||
| 593 | for (i = 0; i < state->num_comps; i++) { | ||
| 594 | struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i]; | ||
| 595 | |||
| 596 | if (!ioerr->oer_errno) | ||
| 597 | continue; | ||
| 598 | |||
| 599 | dprintk("%s: err[%d]: errno=%d is_write=%d " | ||
| 600 | "dev(%llx:%llx) par=0x%llx obj=0x%llx " | ||
| 601 | "offset=0x%llx length=0x%llx\n", | ||
| 602 | __func__, i, ioerr->oer_errno, | ||
| 603 | ioerr->oer_iswrite, | ||
| 604 | _DEVID_LO(&ioerr->oer_component.oid_device_id), | ||
| 605 | _DEVID_HI(&ioerr->oer_component.oid_device_id), | ||
| 606 | ioerr->oer_component.oid_partition_id, | ||
| 607 | ioerr->oer_component.oid_object_id, | ||
| 608 | ioerr->oer_comp_offset, | ||
| 609 | ioerr->oer_comp_length); | ||
| 610 | |||
| 611 | p = pnfs_osd_xdr_ioerr_reserve_space(xdr); | ||
| 612 | if (unlikely(!p)) { | ||
| 613 | res = -E2BIG; | ||
| 614 | break; /* accumulated_error */ | ||
| 615 | } | ||
| 616 | |||
| 617 | last_xdr = p; | ||
| 618 | pnfs_osd_xdr_encode_ioerr(p, &state->ioerrs[i]); | ||
| 619 | } | ||
| 620 | |||
| 621 | /* TODO: use xdr_write_pages */ | ||
| 622 | if (unlikely(res)) { | ||
| 623 | /* no space for even one error descriptor */ | ||
| 624 | BUG_ON(!last_xdr); | ||
| 625 | |||
| 626 | /* we've encountered a situation with lots and lots of | ||
| 627 | * errors and no space to encode them all. Use the last | ||
| 628 | * available slot to report the union of all the | ||
| 629 | * remaining errors. | ||
| 630 | */ | ||
| 631 | encode_accumulated_error(objlay, last_xdr); | ||
| 632 | goto loop_done; | ||
| 633 | } | ||
| 634 | list_del(&state->err_list); | ||
| 635 | objlayout_free_io_state(state); | ||
| 636 | } | ||
| 637 | loop_done: | ||
| 638 | spin_unlock(&objlay->lock); | ||
| 639 | |||
| 640 | *start = cpu_to_be32((xdr->p - start - 1) * 4); | ||
| 641 | dprintk("%s: Return\n", __func__); | ||
| 642 | } | ||
| 643 | |||
| 644 | |||
| 645 | /* | ||
| 646 | * Get Device Info API for io engines | ||
| 647 | */ | ||
| 648 | struct objlayout_deviceinfo { | ||
| 649 | struct page *page; | ||
| 650 | struct pnfs_osd_deviceaddr da; /* This must be last */ | ||
| 651 | }; | ||
| 652 | |||
| 653 | /* Initialize and call nfs_getdeviceinfo, then decode and return a | ||
| 654 | * "struct pnfs_osd_deviceaddr *" Eventually objlayout_put_deviceinfo() | ||
| 655 | * should be called. | ||
| 656 | */ | ||
| 657 | int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay, | ||
| 658 | struct nfs4_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr, | ||
| 659 | gfp_t gfp_flags) | ||
| 660 | { | ||
| 661 | struct objlayout_deviceinfo *odi; | ||
| 662 | struct pnfs_device pd; | ||
| 663 | struct super_block *sb; | ||
| 664 | struct page *page, **pages; | ||
| 665 | u32 *p; | ||
| 666 | int err; | ||
| 667 | |||
| 668 | page = alloc_page(gfp_flags); | ||
| 669 | if (!page) | ||
| 670 | return -ENOMEM; | ||
| 671 | |||
| 672 | pages = &page; | ||
| 673 | pd.pages = pages; | ||
| 674 | |||
| 675 | memcpy(&pd.dev_id, d_id, sizeof(*d_id)); | ||
| 676 | pd.layout_type = LAYOUT_OSD2_OBJECTS; | ||
| 677 | pd.pages = &page; | ||
| 678 | pd.pgbase = 0; | ||
| 679 | pd.pglen = PAGE_SIZE; | ||
| 680 | pd.mincount = 0; | ||
| 681 | |||
| 682 | sb = pnfslay->plh_inode->i_sb; | ||
| 683 | err = nfs4_proc_getdeviceinfo(NFS_SERVER(pnfslay->plh_inode), &pd); | ||
| 684 | dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err); | ||
| 685 | if (err) | ||
| 686 | goto err_out; | ||
| 687 | |||
| 688 | p = page_address(page); | ||
| 689 | odi = kzalloc(sizeof(*odi), gfp_flags); | ||
| 690 | if (!odi) { | ||
| 691 | err = -ENOMEM; | ||
| 692 | goto err_out; | ||
| 693 | } | ||
| 694 | pnfs_osd_xdr_decode_deviceaddr(&odi->da, p); | ||
| 695 | odi->page = page; | ||
| 696 | *deviceaddr = &odi->da; | ||
| 697 | return 0; | ||
| 698 | |||
| 699 | err_out: | ||
| 700 | __free_page(page); | ||
| 701 | return err; | ||
| 702 | } | ||
| 703 | |||
| 704 | void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr) | ||
| 705 | { | ||
| 706 | struct objlayout_deviceinfo *odi = container_of(deviceaddr, | ||
| 707 | struct objlayout_deviceinfo, | ||
| 708 | da); | ||
| 709 | |||
| 710 | __free_page(odi->page); | ||
| 711 | kfree(odi); | ||
| 712 | } | ||
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h new file mode 100644 index 000000000000..a8244c8e042d --- /dev/null +++ b/fs/nfs/objlayout/objlayout.h | |||
| @@ -0,0 +1,187 @@ | |||
| 1 | /* | ||
| 2 | * Data types and function declerations for interfacing with the | ||
| 3 | * pNFS standard object layout driver. | ||
| 4 | * | ||
| 5 | * Copyright (C) 2007 Panasas Inc. [year of first publication] | ||
| 6 | * All rights reserved. | ||
| 7 | * | ||
| 8 | * Benny Halevy <bhalevy@panasas.com> | ||
| 9 | * Boaz Harrosh <bharrosh@panasas.com> | ||
| 10 | * | ||
| 11 | * This program is free software; you can redistribute it and/or modify | ||
| 12 | * it under the terms of the GNU General Public License version 2 | ||
| 13 | * See the file COPYING included with this distribution for more details. | ||
| 14 | * | ||
| 15 | * Redistribution and use in source and binary forms, with or without | ||
| 16 | * modification, are permitted provided that the following conditions | ||
| 17 | * are met: | ||
| 18 | * | ||
| 19 | * 1. Redistributions of source code must retain the above copyright | ||
| 20 | * notice, this list of conditions and the following disclaimer. | ||
| 21 | * 2. Redistributions in binary form must reproduce the above copyright | ||
| 22 | * notice, this list of conditions and the following disclaimer in the | ||
| 23 | * documentation and/or other materials provided with the distribution. | ||
| 24 | * 3. Neither the name of the Panasas company nor the names of its | ||
| 25 | * contributors may be used to endorse or promote products derived | ||
| 26 | * from this software without specific prior written permission. | ||
| 27 | * | ||
| 28 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED | ||
| 29 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | ||
| 30 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
| 31 | * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | ||
| 32 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
| 33 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
| 34 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR | ||
| 35 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
| 36 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
| 37 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
| 38 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 39 | */ | ||
| 40 | |||
| 41 | #ifndef _OBJLAYOUT_H | ||
| 42 | #define _OBJLAYOUT_H | ||
| 43 | |||
| 44 | #include <linux/nfs_fs.h> | ||
| 45 | #include <linux/pnfs_osd_xdr.h> | ||
| 46 | #include "../pnfs.h" | ||
| 47 | |||
| 48 | /* | ||
| 49 | * per-inode layout | ||
| 50 | */ | ||
| 51 | struct objlayout { | ||
| 52 | struct pnfs_layout_hdr pnfs_layout; | ||
| 53 | |||
| 54 | /* for layout_commit */ | ||
| 55 | enum osd_delta_space_valid_enum { | ||
| 56 | OBJ_DSU_INIT = 0, | ||
| 57 | OBJ_DSU_VALID, | ||
| 58 | OBJ_DSU_INVALID, | ||
| 59 | } delta_space_valid; | ||
| 60 | s64 delta_space_used; /* consumed by write ops */ | ||
| 61 | |||
| 62 | /* for layout_return */ | ||
| 63 | spinlock_t lock; | ||
| 64 | struct list_head err_list; | ||
| 65 | }; | ||
| 66 | |||
| 67 | static inline struct objlayout * | ||
| 68 | OBJLAYOUT(struct pnfs_layout_hdr *lo) | ||
| 69 | { | ||
| 70 | return container_of(lo, struct objlayout, pnfs_layout); | ||
| 71 | } | ||
| 72 | |||
| 73 | /* | ||
| 74 | * per-I/O operation state | ||
| 75 | * embedded in objects provider io_state data structure | ||
| 76 | */ | ||
| 77 | struct objlayout_io_state { | ||
| 78 | struct pnfs_layout_segment *lseg; | ||
| 79 | |||
| 80 | struct page **pages; | ||
| 81 | unsigned pgbase; | ||
| 82 | unsigned nr_pages; | ||
| 83 | unsigned long count; | ||
| 84 | loff_t offset; | ||
| 85 | bool sync; | ||
| 86 | |||
| 87 | void *rpcdata; | ||
| 88 | int status; /* res */ | ||
| 89 | int eof; /* res */ | ||
| 90 | int committed; /* res */ | ||
| 91 | |||
| 92 | /* Error reporting (layout_return) */ | ||
| 93 | struct list_head err_list; | ||
| 94 | unsigned num_comps; | ||
| 95 | /* Pointer to array of error descriptors of size num_comps. | ||
| 96 | * It should contain as many entries as devices in the osd_layout | ||
| 97 | * that participate in the I/O. It is up to the io_engine to allocate | ||
| 98 | * needed space and set num_comps. | ||
| 99 | */ | ||
| 100 | struct pnfs_osd_ioerr *ioerrs; | ||
| 101 | }; | ||
| 102 | |||
| 103 | /* | ||
| 104 | * Raid engine I/O API | ||
| 105 | */ | ||
| 106 | extern int objio_alloc_lseg(struct pnfs_layout_segment **outp, | ||
| 107 | struct pnfs_layout_hdr *pnfslay, | ||
| 108 | struct pnfs_layout_range *range, | ||
| 109 | struct xdr_stream *xdr, | ||
| 110 | gfp_t gfp_flags); | ||
| 111 | extern void objio_free_lseg(struct pnfs_layout_segment *lseg); | ||
| 112 | |||
| 113 | extern int objio_alloc_io_state( | ||
| 114 | struct pnfs_layout_segment *lseg, | ||
| 115 | struct objlayout_io_state **outp, | ||
| 116 | gfp_t gfp_flags); | ||
| 117 | extern void objio_free_io_state(struct objlayout_io_state *state); | ||
| 118 | |||
| 119 | extern ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state); | ||
| 120 | extern ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, | ||
| 121 | bool stable); | ||
| 122 | |||
| 123 | /* | ||
| 124 | * callback API | ||
| 125 | */ | ||
| 126 | extern void objlayout_io_set_result(struct objlayout_io_state *state, | ||
| 127 | unsigned index, struct pnfs_osd_objid *pooid, | ||
| 128 | int osd_error, u64 offset, u64 length, bool is_write); | ||
| 129 | |||
| 130 | static inline void | ||
| 131 | objlayout_add_delta_space_used(struct objlayout_io_state *state, s64 space_used) | ||
| 132 | { | ||
| 133 | struct objlayout *objlay = OBJLAYOUT(state->lseg->pls_layout); | ||
| 134 | |||
| 135 | /* If one of the I/Os errored out and the delta_space_used was | ||
| 136 | * invalid we render the complete report as invalid. Protocol mandate | ||
| 137 | * the DSU be accurate or not reported. | ||
| 138 | */ | ||
| 139 | spin_lock(&objlay->lock); | ||
| 140 | if (objlay->delta_space_valid != OBJ_DSU_INVALID) { | ||
| 141 | objlay->delta_space_valid = OBJ_DSU_VALID; | ||
| 142 | objlay->delta_space_used += space_used; | ||
| 143 | } | ||
| 144 | spin_unlock(&objlay->lock); | ||
| 145 | } | ||
| 146 | |||
| 147 | extern void objlayout_read_done(struct objlayout_io_state *state, | ||
| 148 | ssize_t status, bool sync); | ||
| 149 | extern void objlayout_write_done(struct objlayout_io_state *state, | ||
| 150 | ssize_t status, bool sync); | ||
| 151 | |||
| 152 | extern int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay, | ||
| 153 | struct nfs4_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr, | ||
| 154 | gfp_t gfp_flags); | ||
| 155 | extern void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr); | ||
| 156 | |||
| 157 | /* | ||
| 158 | * exported generic objects function vectors | ||
| 159 | */ | ||
| 160 | |||
| 161 | extern struct pnfs_layout_hdr *objlayout_alloc_layout_hdr(struct inode *, gfp_t gfp_flags); | ||
| 162 | extern void objlayout_free_layout_hdr(struct pnfs_layout_hdr *); | ||
| 163 | |||
| 164 | extern struct pnfs_layout_segment *objlayout_alloc_lseg( | ||
| 165 | struct pnfs_layout_hdr *, | ||
| 166 | struct nfs4_layoutget_res *, | ||
| 167 | gfp_t gfp_flags); | ||
| 168 | extern void objlayout_free_lseg(struct pnfs_layout_segment *); | ||
| 169 | |||
| 170 | extern enum pnfs_try_status objlayout_read_pagelist( | ||
| 171 | struct nfs_read_data *); | ||
| 172 | |||
| 173 | extern enum pnfs_try_status objlayout_write_pagelist( | ||
| 174 | struct nfs_write_data *, | ||
| 175 | int how); | ||
| 176 | |||
| 177 | extern void objlayout_encode_layoutcommit( | ||
| 178 | struct pnfs_layout_hdr *, | ||
| 179 | struct xdr_stream *, | ||
| 180 | const struct nfs4_layoutcommit_args *); | ||
| 181 | |||
| 182 | extern void objlayout_encode_layoutreturn( | ||
| 183 | struct pnfs_layout_hdr *, | ||
| 184 | struct xdr_stream *, | ||
| 185 | const struct nfs4_layoutreturn_args *); | ||
| 186 | |||
| 187 | #endif /* _OBJLAYOUT_H */ | ||
diff --git a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c new file mode 100644 index 000000000000..16fc758e9123 --- /dev/null +++ b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c | |||
| @@ -0,0 +1,412 @@ | |||
| 1 | /* | ||
| 2 | * Object-Based pNFS Layout XDR layer | ||
| 3 | * | ||
| 4 | * Copyright (C) 2007 Panasas Inc. [year of first publication] | ||
| 5 | * All rights reserved. | ||
| 6 | * | ||
| 7 | * Benny Halevy <bhalevy@panasas.com> | ||
| 8 | * Boaz Harrosh <bharrosh@panasas.com> | ||
| 9 | * | ||
| 10 | * This program is free software; you can redistribute it and/or modify | ||
| 11 | * it under the terms of the GNU General Public License version 2 | ||
| 12 | * See the file COPYING included with this distribution for more details. | ||
| 13 | * | ||
| 14 | * Redistribution and use in source and binary forms, with or without | ||
| 15 | * modification, are permitted provided that the following conditions | ||
| 16 | * are met: | ||
| 17 | * | ||
| 18 | * 1. Redistributions of source code must retain the above copyright | ||
| 19 | * notice, this list of conditions and the following disclaimer. | ||
| 20 | * 2. Redistributions in binary form must reproduce the above copyright | ||
| 21 | * notice, this list of conditions and the following disclaimer in the | ||
| 22 | * documentation and/or other materials provided with the distribution. | ||
| 23 | * 3. Neither the name of the Panasas company nor the names of its | ||
| 24 | * contributors may be used to endorse or promote products derived | ||
| 25 | * from this software without specific prior written permission. | ||
| 26 | * | ||
| 27 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED | ||
| 28 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | ||
| 29 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
| 30 | * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | ||
| 31 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
| 32 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
| 33 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR | ||
| 34 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
| 35 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
| 36 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
| 37 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 38 | */ | ||
| 39 | |||
| 40 | #include <linux/pnfs_osd_xdr.h> | ||
| 41 | |||
| 42 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
| 43 | |||
| 44 | /* | ||
| 45 | * The following implementation is based on RFC5664 | ||
| 46 | */ | ||
| 47 | |||
| 48 | /* | ||
| 49 | * struct pnfs_osd_objid { | ||
| 50 | * struct nfs4_deviceid oid_device_id; | ||
| 51 | * u64 oid_partition_id; | ||
| 52 | * u64 oid_object_id; | ||
| 53 | * }; // xdr size 32 bytes | ||
| 54 | */ | ||
| 55 | static __be32 * | ||
| 56 | _osd_xdr_decode_objid(__be32 *p, struct pnfs_osd_objid *objid) | ||
| 57 | { | ||
| 58 | p = xdr_decode_opaque_fixed(p, objid->oid_device_id.data, | ||
| 59 | sizeof(objid->oid_device_id.data)); | ||
| 60 | |||
| 61 | p = xdr_decode_hyper(p, &objid->oid_partition_id); | ||
| 62 | p = xdr_decode_hyper(p, &objid->oid_object_id); | ||
| 63 | return p; | ||
| 64 | } | ||
| 65 | /* | ||
| 66 | * struct pnfs_osd_opaque_cred { | ||
| 67 | * u32 cred_len; | ||
| 68 | * void *cred; | ||
| 69 | * }; // xdr size [variable] | ||
| 70 | * The return pointers are from the xdr buffer | ||
| 71 | */ | ||
| 72 | static int | ||
| 73 | _osd_xdr_decode_opaque_cred(struct pnfs_osd_opaque_cred *opaque_cred, | ||
| 74 | struct xdr_stream *xdr) | ||
| 75 | { | ||
| 76 | __be32 *p = xdr_inline_decode(xdr, 1); | ||
| 77 | |||
| 78 | if (!p) | ||
| 79 | return -EINVAL; | ||
| 80 | |||
| 81 | opaque_cred->cred_len = be32_to_cpu(*p++); | ||
| 82 | |||
| 83 | p = xdr_inline_decode(xdr, opaque_cred->cred_len); | ||
| 84 | if (!p) | ||
| 85 | return -EINVAL; | ||
| 86 | |||
| 87 | opaque_cred->cred = p; | ||
| 88 | return 0; | ||
| 89 | } | ||
| 90 | |||
| 91 | /* | ||
| 92 | * struct pnfs_osd_object_cred { | ||
| 93 | * struct pnfs_osd_objid oc_object_id; | ||
| 94 | * u32 oc_osd_version; | ||
| 95 | * u32 oc_cap_key_sec; | ||
| 96 | * struct pnfs_osd_opaque_cred oc_cap_key | ||
| 97 | * struct pnfs_osd_opaque_cred oc_cap; | ||
| 98 | * }; // xdr size 32 + 4 + 4 + [variable] + [variable] | ||
| 99 | */ | ||
| 100 | static int | ||
| 101 | _osd_xdr_decode_object_cred(struct pnfs_osd_object_cred *comp, | ||
| 102 | struct xdr_stream *xdr) | ||
| 103 | { | ||
| 104 | __be32 *p = xdr_inline_decode(xdr, 32 + 4 + 4); | ||
| 105 | int ret; | ||
| 106 | |||
| 107 | if (!p) | ||
| 108 | return -EIO; | ||
| 109 | |||
| 110 | p = _osd_xdr_decode_objid(p, &comp->oc_object_id); | ||
| 111 | comp->oc_osd_version = be32_to_cpup(p++); | ||
| 112 | comp->oc_cap_key_sec = be32_to_cpup(p); | ||
| 113 | |||
| 114 | ret = _osd_xdr_decode_opaque_cred(&comp->oc_cap_key, xdr); | ||
| 115 | if (unlikely(ret)) | ||
| 116 | return ret; | ||
| 117 | |||
| 118 | ret = _osd_xdr_decode_opaque_cred(&comp->oc_cap, xdr); | ||
| 119 | return ret; | ||
| 120 | } | ||
| 121 | |||
| 122 | /* | ||
| 123 | * struct pnfs_osd_data_map { | ||
| 124 | * u32 odm_num_comps; | ||
| 125 | * u64 odm_stripe_unit; | ||
| 126 | * u32 odm_group_width; | ||
| 127 | * u32 odm_group_depth; | ||
| 128 | * u32 odm_mirror_cnt; | ||
| 129 | * u32 odm_raid_algorithm; | ||
| 130 | * }; // xdr size 4 + 8 + 4 + 4 + 4 + 4 | ||
| 131 | */ | ||
| 132 | static inline int | ||
| 133 | _osd_data_map_xdr_sz(void) | ||
| 134 | { | ||
| 135 | return 4 + 8 + 4 + 4 + 4 + 4; | ||
| 136 | } | ||
| 137 | |||
| 138 | static __be32 * | ||
| 139 | _osd_xdr_decode_data_map(__be32 *p, struct pnfs_osd_data_map *data_map) | ||
| 140 | { | ||
| 141 | data_map->odm_num_comps = be32_to_cpup(p++); | ||
| 142 | p = xdr_decode_hyper(p, &data_map->odm_stripe_unit); | ||
| 143 | data_map->odm_group_width = be32_to_cpup(p++); | ||
| 144 | data_map->odm_group_depth = be32_to_cpup(p++); | ||
| 145 | data_map->odm_mirror_cnt = be32_to_cpup(p++); | ||
| 146 | data_map->odm_raid_algorithm = be32_to_cpup(p++); | ||
| 147 | dprintk("%s: odm_num_comps=%u odm_stripe_unit=%llu odm_group_width=%u " | ||
| 148 | "odm_group_depth=%u odm_mirror_cnt=%u odm_raid_algorithm=%u\n", | ||
| 149 | __func__, | ||
| 150 | data_map->odm_num_comps, | ||
| 151 | (unsigned long long)data_map->odm_stripe_unit, | ||
| 152 | data_map->odm_group_width, | ||
| 153 | data_map->odm_group_depth, | ||
| 154 | data_map->odm_mirror_cnt, | ||
| 155 | data_map->odm_raid_algorithm); | ||
| 156 | return p; | ||
| 157 | } | ||
| 158 | |||
| 159 | int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout, | ||
| 160 | struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr) | ||
| 161 | { | ||
| 162 | __be32 *p; | ||
| 163 | |||
| 164 | memset(iter, 0, sizeof(*iter)); | ||
| 165 | |||
| 166 | p = xdr_inline_decode(xdr, _osd_data_map_xdr_sz() + 4 + 4); | ||
| 167 | if (unlikely(!p)) | ||
| 168 | return -EINVAL; | ||
| 169 | |||
| 170 | p = _osd_xdr_decode_data_map(p, &layout->olo_map); | ||
| 171 | layout->olo_comps_index = be32_to_cpup(p++); | ||
| 172 | layout->olo_num_comps = be32_to_cpup(p++); | ||
| 173 | iter->total_comps = layout->olo_num_comps; | ||
| 174 | return 0; | ||
| 175 | } | ||
| 176 | |||
| 177 | bool pnfs_osd_xdr_decode_layout_comp(struct pnfs_osd_object_cred *comp, | ||
| 178 | struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr, | ||
| 179 | int *err) | ||
| 180 | { | ||
| 181 | BUG_ON(iter->decoded_comps > iter->total_comps); | ||
| 182 | if (iter->decoded_comps == iter->total_comps) | ||
| 183 | return false; | ||
| 184 | |||
| 185 | *err = _osd_xdr_decode_object_cred(comp, xdr); | ||
| 186 | if (unlikely(*err)) { | ||
| 187 | dprintk("%s: _osd_xdr_decode_object_cred=>%d decoded_comps=%d " | ||
| 188 | "total_comps=%d\n", __func__, *err, | ||
| 189 | iter->decoded_comps, iter->total_comps); | ||
| 190 | return false; /* stop the loop */ | ||
| 191 | } | ||
| 192 | dprintk("%s: dev(%llx:%llx) par=0x%llx obj=0x%llx " | ||
| 193 | "key_len=%u cap_len=%u\n", | ||
| 194 | __func__, | ||
| 195 | _DEVID_LO(&comp->oc_object_id.oid_device_id), | ||
| 196 | _DEVID_HI(&comp->oc_object_id.oid_device_id), | ||
| 197 | comp->oc_object_id.oid_partition_id, | ||
| 198 | comp->oc_object_id.oid_object_id, | ||
| 199 | comp->oc_cap_key.cred_len, comp->oc_cap.cred_len); | ||
| 200 | |||
| 201 | iter->decoded_comps++; | ||
| 202 | return true; | ||
| 203 | } | ||
| 204 | |||
| 205 | /* | ||
| 206 | * Get Device Information Decoding | ||
| 207 | * | ||
| 208 | * Note: since Device Information is currently done synchronously, all | ||
| 209 | * variable strings fields are left inside the rpc buffer and are only | ||
| 210 | * pointed to by the pnfs_osd_deviceaddr members. So the read buffer | ||
| 211 | * should not be freed while the returned information is in use. | ||
| 212 | */ | ||
| 213 | /* | ||
| 214 | *struct nfs4_string { | ||
| 215 | * unsigned int len; | ||
| 216 | * char *data; | ||
| 217 | *}; // size [variable] | ||
| 218 | * NOTE: Returned string points to inside the XDR buffer | ||
| 219 | */ | ||
| 220 | static __be32 * | ||
| 221 | __read_u8_opaque(__be32 *p, struct nfs4_string *str) | ||
| 222 | { | ||
| 223 | str->len = be32_to_cpup(p++); | ||
| 224 | str->data = (char *)p; | ||
| 225 | |||
| 226 | p += XDR_QUADLEN(str->len); | ||
| 227 | return p; | ||
| 228 | } | ||
| 229 | |||
| 230 | /* | ||
| 231 | * struct pnfs_osd_targetid { | ||
| 232 | * u32 oti_type; | ||
| 233 | * struct nfs4_string oti_scsi_device_id; | ||
| 234 | * };// size 4 + [variable] | ||
| 235 | */ | ||
| 236 | static __be32 * | ||
| 237 | __read_targetid(__be32 *p, struct pnfs_osd_targetid* targetid) | ||
| 238 | { | ||
| 239 | u32 oti_type; | ||
| 240 | |||
| 241 | oti_type = be32_to_cpup(p++); | ||
| 242 | targetid->oti_type = oti_type; | ||
| 243 | |||
| 244 | switch (oti_type) { | ||
| 245 | case OBJ_TARGET_SCSI_NAME: | ||
| 246 | case OBJ_TARGET_SCSI_DEVICE_ID: | ||
| 247 | p = __read_u8_opaque(p, &targetid->oti_scsi_device_id); | ||
| 248 | } | ||
| 249 | |||
| 250 | return p; | ||
| 251 | } | ||
| 252 | |||
| 253 | /* | ||
| 254 | * struct pnfs_osd_net_addr { | ||
| 255 | * struct nfs4_string r_netid; | ||
| 256 | * struct nfs4_string r_addr; | ||
| 257 | * }; | ||
| 258 | */ | ||
| 259 | static __be32 * | ||
| 260 | __read_net_addr(__be32 *p, struct pnfs_osd_net_addr* netaddr) | ||
| 261 | { | ||
| 262 | p = __read_u8_opaque(p, &netaddr->r_netid); | ||
| 263 | p = __read_u8_opaque(p, &netaddr->r_addr); | ||
| 264 | |||
| 265 | return p; | ||
| 266 | } | ||
| 267 | |||
| 268 | /* | ||
| 269 | * struct pnfs_osd_targetaddr { | ||
| 270 | * u32 ota_available; | ||
| 271 | * struct pnfs_osd_net_addr ota_netaddr; | ||
| 272 | * }; | ||
| 273 | */ | ||
| 274 | static __be32 * | ||
| 275 | __read_targetaddr(__be32 *p, struct pnfs_osd_targetaddr *targetaddr) | ||
| 276 | { | ||
| 277 | u32 ota_available; | ||
| 278 | |||
| 279 | ota_available = be32_to_cpup(p++); | ||
| 280 | targetaddr->ota_available = ota_available; | ||
| 281 | |||
| 282 | if (ota_available) | ||
| 283 | p = __read_net_addr(p, &targetaddr->ota_netaddr); | ||
| 284 | |||
| 285 | |||
| 286 | return p; | ||
| 287 | } | ||
| 288 | |||
| 289 | /* | ||
| 290 | * struct pnfs_osd_deviceaddr { | ||
| 291 | * struct pnfs_osd_targetid oda_targetid; | ||
| 292 | * struct pnfs_osd_targetaddr oda_targetaddr; | ||
| 293 | * u8 oda_lun[8]; | ||
| 294 | * struct nfs4_string oda_systemid; | ||
| 295 | * struct pnfs_osd_object_cred oda_root_obj_cred; | ||
| 296 | * struct nfs4_string oda_osdname; | ||
| 297 | * }; | ||
| 298 | */ | ||
| 299 | |||
| 300 | /* We need this version for the pnfs_osd_xdr_decode_deviceaddr which does | ||
| 301 | * not have an xdr_stream | ||
| 302 | */ | ||
| 303 | static __be32 * | ||
| 304 | __read_opaque_cred(__be32 *p, | ||
| 305 | struct pnfs_osd_opaque_cred *opaque_cred) | ||
| 306 | { | ||
| 307 | opaque_cred->cred_len = be32_to_cpu(*p++); | ||
| 308 | opaque_cred->cred = p; | ||
| 309 | return p + XDR_QUADLEN(opaque_cred->cred_len); | ||
| 310 | } | ||
| 311 | |||
| 312 | static __be32 * | ||
| 313 | __read_object_cred(__be32 *p, struct pnfs_osd_object_cred *comp) | ||
| 314 | { | ||
| 315 | p = _osd_xdr_decode_objid(p, &comp->oc_object_id); | ||
| 316 | comp->oc_osd_version = be32_to_cpup(p++); | ||
| 317 | comp->oc_cap_key_sec = be32_to_cpup(p++); | ||
| 318 | |||
| 319 | p = __read_opaque_cred(p, &comp->oc_cap_key); | ||
| 320 | p = __read_opaque_cred(p, &comp->oc_cap); | ||
| 321 | return p; | ||
| 322 | } | ||
| 323 | |||
| 324 | void pnfs_osd_xdr_decode_deviceaddr( | ||
| 325 | struct pnfs_osd_deviceaddr *deviceaddr, __be32 *p) | ||
| 326 | { | ||
| 327 | p = __read_targetid(p, &deviceaddr->oda_targetid); | ||
| 328 | |||
| 329 | p = __read_targetaddr(p, &deviceaddr->oda_targetaddr); | ||
| 330 | |||
| 331 | p = xdr_decode_opaque_fixed(p, deviceaddr->oda_lun, | ||
| 332 | sizeof(deviceaddr->oda_lun)); | ||
| 333 | |||
| 334 | p = __read_u8_opaque(p, &deviceaddr->oda_systemid); | ||
| 335 | |||
| 336 | p = __read_object_cred(p, &deviceaddr->oda_root_obj_cred); | ||
| 337 | |||
| 338 | p = __read_u8_opaque(p, &deviceaddr->oda_osdname); | ||
| 339 | |||
| 340 | /* libosd likes this terminated in dbg. It's last, so no problems */ | ||
| 341 | deviceaddr->oda_osdname.data[deviceaddr->oda_osdname.len] = 0; | ||
| 342 | } | ||
| 343 | |||
| 344 | /* | ||
| 345 | * struct pnfs_osd_layoutupdate { | ||
| 346 | * u32 dsu_valid; | ||
| 347 | * s64 dsu_delta; | ||
| 348 | * u32 olu_ioerr_flag; | ||
| 349 | * }; xdr size 4 + 8 + 4 | ||
| 350 | */ | ||
| 351 | int | ||
| 352 | pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr, | ||
| 353 | struct pnfs_osd_layoutupdate *lou) | ||
| 354 | { | ||
| 355 | __be32 *p = xdr_reserve_space(xdr, 4 + 8 + 4); | ||
| 356 | |||
| 357 | if (!p) | ||
| 358 | return -E2BIG; | ||
| 359 | |||
| 360 | *p++ = cpu_to_be32(lou->dsu_valid); | ||
| 361 | if (lou->dsu_valid) | ||
| 362 | p = xdr_encode_hyper(p, lou->dsu_delta); | ||
| 363 | *p++ = cpu_to_be32(lou->olu_ioerr_flag); | ||
| 364 | return 0; | ||
| 365 | } | ||
| 366 | |||
| 367 | /* | ||
| 368 | * struct pnfs_osd_objid { | ||
| 369 | * struct nfs4_deviceid oid_device_id; | ||
| 370 | * u64 oid_partition_id; | ||
| 371 | * u64 oid_object_id; | ||
| 372 | * }; // xdr size 32 bytes | ||
| 373 | */ | ||
| 374 | static inline __be32 * | ||
| 375 | pnfs_osd_xdr_encode_objid(__be32 *p, struct pnfs_osd_objid *object_id) | ||
| 376 | { | ||
| 377 | p = xdr_encode_opaque_fixed(p, &object_id->oid_device_id.data, | ||
| 378 | sizeof(object_id->oid_device_id.data)); | ||
| 379 | p = xdr_encode_hyper(p, object_id->oid_partition_id); | ||
| 380 | p = xdr_encode_hyper(p, object_id->oid_object_id); | ||
| 381 | |||
| 382 | return p; | ||
| 383 | } | ||
| 384 | |||
| 385 | /* | ||
| 386 | * struct pnfs_osd_ioerr { | ||
| 387 | * struct pnfs_osd_objid oer_component; | ||
| 388 | * u64 oer_comp_offset; | ||
| 389 | * u64 oer_comp_length; | ||
| 390 | * u32 oer_iswrite; | ||
| 391 | * u32 oer_errno; | ||
| 392 | * }; // xdr size 32 + 24 bytes | ||
| 393 | */ | ||
| 394 | void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr) | ||
| 395 | { | ||
| 396 | p = pnfs_osd_xdr_encode_objid(p, &ioerr->oer_component); | ||
| 397 | p = xdr_encode_hyper(p, ioerr->oer_comp_offset); | ||
| 398 | p = xdr_encode_hyper(p, ioerr->oer_comp_length); | ||
| 399 | *p++ = cpu_to_be32(ioerr->oer_iswrite); | ||
| 400 | *p = cpu_to_be32(ioerr->oer_errno); | ||
| 401 | } | ||
| 402 | |||
| 403 | __be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr) | ||
| 404 | { | ||
| 405 | __be32 *p; | ||
| 406 | |||
| 407 | p = xdr_reserve_space(xdr, 32 + 24); | ||
| 408 | if (unlikely(!p)) | ||
| 409 | dprintk("%s: out of xdr space\n", __func__); | ||
| 410 | |||
| 411 | return p; | ||
| 412 | } | ||
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index c80add6e2213..7913961aff22 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c | |||
| @@ -204,6 +204,21 @@ nfs_wait_on_request(struct nfs_page *req) | |||
| 204 | TASK_UNINTERRUPTIBLE); | 204 | TASK_UNINTERRUPTIBLE); |
| 205 | } | 205 | } |
| 206 | 206 | ||
| 207 | static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req) | ||
| 208 | { | ||
| 209 | /* | ||
| 210 | * FIXME: ideally we should be able to coalesce all requests | ||
| 211 | * that are not block boundary aligned, but currently this | ||
| 212 | * is problematic for the case of bsize < PAGE_CACHE_SIZE, | ||
| 213 | * since nfs_flush_multi and nfs_pagein_multi assume you | ||
| 214 | * can have only one struct nfs_page. | ||
| 215 | */ | ||
| 216 | if (desc->pg_bsize < PAGE_SIZE) | ||
| 217 | return 0; | ||
| 218 | |||
| 219 | return desc->pg_count + req->wb_bytes <= desc->pg_bsize; | ||
| 220 | } | ||
| 221 | |||
| 207 | /** | 222 | /** |
| 208 | * nfs_pageio_init - initialise a page io descriptor | 223 | * nfs_pageio_init - initialise a page io descriptor |
| 209 | * @desc: pointer to descriptor | 224 | * @desc: pointer to descriptor |
| @@ -229,6 +244,8 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, | |||
| 229 | desc->pg_ioflags = io_flags; | 244 | desc->pg_ioflags = io_flags; |
| 230 | desc->pg_error = 0; | 245 | desc->pg_error = 0; |
| 231 | desc->pg_lseg = NULL; | 246 | desc->pg_lseg = NULL; |
| 247 | desc->pg_test = nfs_generic_pg_test; | ||
| 248 | pnfs_pageio_init(desc, inode); | ||
| 232 | } | 249 | } |
| 233 | 250 | ||
| 234 | /** | 251 | /** |
| @@ -242,29 +259,23 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, | |||
| 242 | * | 259 | * |
| 243 | * Return 'true' if this is the case, else return 'false'. | 260 | * Return 'true' if this is the case, else return 'false'. |
| 244 | */ | 261 | */ |
| 245 | static int nfs_can_coalesce_requests(struct nfs_page *prev, | 262 | static bool nfs_can_coalesce_requests(struct nfs_page *prev, |
| 246 | struct nfs_page *req, | 263 | struct nfs_page *req, |
| 247 | struct nfs_pageio_descriptor *pgio) | 264 | struct nfs_pageio_descriptor *pgio) |
| 248 | { | 265 | { |
| 249 | if (req->wb_context->cred != prev->wb_context->cred) | 266 | if (req->wb_context->cred != prev->wb_context->cred) |
| 250 | return 0; | 267 | return false; |
| 251 | if (req->wb_lock_context->lockowner != prev->wb_lock_context->lockowner) | 268 | if (req->wb_lock_context->lockowner != prev->wb_lock_context->lockowner) |
| 252 | return 0; | 269 | return false; |
| 253 | if (req->wb_context->state != prev->wb_context->state) | 270 | if (req->wb_context->state != prev->wb_context->state) |
| 254 | return 0; | 271 | return false; |
| 255 | if (req->wb_index != (prev->wb_index + 1)) | 272 | if (req->wb_index != (prev->wb_index + 1)) |
| 256 | return 0; | 273 | return false; |
| 257 | if (req->wb_pgbase != 0) | 274 | if (req->wb_pgbase != 0) |
| 258 | return 0; | 275 | return false; |
| 259 | if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) | 276 | if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) |
| 260 | return 0; | 277 | return false; |
| 261 | /* | 278 | return pgio->pg_test(pgio, prev, req); |
| 262 | * Non-whole file layouts need to check that req is inside of | ||
| 263 | * pgio->pg_lseg. | ||
| 264 | */ | ||
| 265 | if (pgio->pg_test && !pgio->pg_test(pgio, prev, req)) | ||
| 266 | return 0; | ||
| 267 | return 1; | ||
| 268 | } | 279 | } |
| 269 | 280 | ||
| 270 | /** | 281 | /** |
| @@ -278,31 +289,18 @@ static int nfs_can_coalesce_requests(struct nfs_page *prev, | |||
| 278 | static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, | 289 | static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, |
| 279 | struct nfs_page *req) | 290 | struct nfs_page *req) |
| 280 | { | 291 | { |
| 281 | size_t newlen = req->wb_bytes; | ||
| 282 | |||
| 283 | if (desc->pg_count != 0) { | 292 | if (desc->pg_count != 0) { |
| 284 | struct nfs_page *prev; | 293 | struct nfs_page *prev; |
| 285 | 294 | ||
| 286 | /* | ||
| 287 | * FIXME: ideally we should be able to coalesce all requests | ||
| 288 | * that are not block boundary aligned, but currently this | ||
| 289 | * is problematic for the case of bsize < PAGE_CACHE_SIZE, | ||
| 290 | * since nfs_flush_multi and nfs_pagein_multi assume you | ||
| 291 | * can have only one struct nfs_page. | ||
| 292 | */ | ||
| 293 | if (desc->pg_bsize < PAGE_SIZE) | ||
| 294 | return 0; | ||
| 295 | newlen += desc->pg_count; | ||
| 296 | if (newlen > desc->pg_bsize) | ||
| 297 | return 0; | ||
| 298 | prev = nfs_list_entry(desc->pg_list.prev); | 295 | prev = nfs_list_entry(desc->pg_list.prev); |
| 299 | if (!nfs_can_coalesce_requests(prev, req, desc)) | 296 | if (!nfs_can_coalesce_requests(prev, req, desc)) |
| 300 | return 0; | 297 | return 0; |
| 301 | } else | 298 | } else { |
| 302 | desc->pg_base = req->wb_pgbase; | 299 | desc->pg_base = req->wb_pgbase; |
| 300 | } | ||
| 303 | nfs_list_remove_request(req); | 301 | nfs_list_remove_request(req); |
| 304 | nfs_list_add_request(req, &desc->pg_list); | 302 | nfs_list_add_request(req, &desc->pg_list); |
| 305 | desc->pg_count = newlen; | 303 | desc->pg_count += req->wb_bytes; |
| 306 | return 1; | 304 | return 1; |
| 307 | } | 305 | } |
| 308 | 306 | ||
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 101c85a3644e..8c1309d852a6 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
| @@ -177,13 +177,28 @@ get_layout_hdr(struct pnfs_layout_hdr *lo) | |||
| 177 | atomic_inc(&lo->plh_refcount); | 177 | atomic_inc(&lo->plh_refcount); |
| 178 | } | 178 | } |
| 179 | 179 | ||
| 180 | static struct pnfs_layout_hdr * | ||
| 181 | pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags) | ||
| 182 | { | ||
| 183 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; | ||
| 184 | return ld->alloc_layout_hdr ? ld->alloc_layout_hdr(ino, gfp_flags) : | ||
| 185 | kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags); | ||
| 186 | } | ||
| 187 | |||
| 188 | static void | ||
| 189 | pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo) | ||
| 190 | { | ||
| 191 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(lo->plh_inode)->pnfs_curr_ld; | ||
| 192 | return ld->alloc_layout_hdr ? ld->free_layout_hdr(lo) : kfree(lo); | ||
| 193 | } | ||
| 194 | |||
| 180 | static void | 195 | static void |
| 181 | destroy_layout_hdr(struct pnfs_layout_hdr *lo) | 196 | destroy_layout_hdr(struct pnfs_layout_hdr *lo) |
| 182 | { | 197 | { |
| 183 | dprintk("%s: freeing layout cache %p\n", __func__, lo); | 198 | dprintk("%s: freeing layout cache %p\n", __func__, lo); |
| 184 | BUG_ON(!list_empty(&lo->plh_layouts)); | 199 | BUG_ON(!list_empty(&lo->plh_layouts)); |
| 185 | NFS_I(lo->plh_inode)->layout = NULL; | 200 | NFS_I(lo->plh_inode)->layout = NULL; |
| 186 | kfree(lo); | 201 | pnfs_free_layout_hdr(lo); |
| 187 | } | 202 | } |
| 188 | 203 | ||
| 189 | static void | 204 | static void |
| @@ -228,7 +243,7 @@ put_lseg_common(struct pnfs_layout_segment *lseg) | |||
| 228 | { | 243 | { |
| 229 | struct inode *inode = lseg->pls_layout->plh_inode; | 244 | struct inode *inode = lseg->pls_layout->plh_inode; |
| 230 | 245 | ||
| 231 | BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); | 246 | WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); |
| 232 | list_del_init(&lseg->pls_list); | 247 | list_del_init(&lseg->pls_list); |
| 233 | if (list_empty(&lseg->pls_layout->plh_segs)) { | 248 | if (list_empty(&lseg->pls_layout->plh_segs)) { |
| 234 | set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags); | 249 | set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags); |
| @@ -261,11 +276,72 @@ put_lseg(struct pnfs_layout_segment *lseg) | |||
| 261 | } | 276 | } |
| 262 | EXPORT_SYMBOL_GPL(put_lseg); | 277 | EXPORT_SYMBOL_GPL(put_lseg); |
| 263 | 278 | ||
| 279 | static inline u64 | ||
| 280 | end_offset(u64 start, u64 len) | ||
| 281 | { | ||
| 282 | u64 end; | ||
| 283 | |||
| 284 | end = start + len; | ||
| 285 | return end >= start ? end : NFS4_MAX_UINT64; | ||
| 286 | } | ||
| 287 | |||
| 288 | /* last octet in a range */ | ||
| 289 | static inline u64 | ||
| 290 | last_byte_offset(u64 start, u64 len) | ||
| 291 | { | ||
| 292 | u64 end; | ||
| 293 | |||
| 294 | BUG_ON(!len); | ||
| 295 | end = start + len; | ||
| 296 | return end > start ? end - 1 : NFS4_MAX_UINT64; | ||
| 297 | } | ||
| 298 | |||
| 299 | /* | ||
| 300 | * is l2 fully contained in l1? | ||
| 301 | * start1 end1 | ||
| 302 | * [----------------------------------) | ||
| 303 | * start2 end2 | ||
| 304 | * [----------------) | ||
| 305 | */ | ||
| 306 | static inline int | ||
| 307 | lo_seg_contained(struct pnfs_layout_range *l1, | ||
| 308 | struct pnfs_layout_range *l2) | ||
| 309 | { | ||
| 310 | u64 start1 = l1->offset; | ||
| 311 | u64 end1 = end_offset(start1, l1->length); | ||
| 312 | u64 start2 = l2->offset; | ||
| 313 | u64 end2 = end_offset(start2, l2->length); | ||
| 314 | |||
| 315 | return (start1 <= start2) && (end1 >= end2); | ||
| 316 | } | ||
| 317 | |||
| 318 | /* | ||
| 319 | * is l1 and l2 intersecting? | ||
| 320 | * start1 end1 | ||
| 321 | * [----------------------------------) | ||
| 322 | * start2 end2 | ||
| 323 | * [----------------) | ||
| 324 | */ | ||
| 325 | static inline int | ||
| 326 | lo_seg_intersecting(struct pnfs_layout_range *l1, | ||
| 327 | struct pnfs_layout_range *l2) | ||
| 328 | { | ||
| 329 | u64 start1 = l1->offset; | ||
| 330 | u64 end1 = end_offset(start1, l1->length); | ||
| 331 | u64 start2 = l2->offset; | ||
| 332 | u64 end2 = end_offset(start2, l2->length); | ||
| 333 | |||
| 334 | return (end1 == NFS4_MAX_UINT64 || end1 > start2) && | ||
| 335 | (end2 == NFS4_MAX_UINT64 || end2 > start1); | ||
| 336 | } | ||
| 337 | |||
| 264 | static bool | 338 | static bool |
| 265 | should_free_lseg(u32 lseg_iomode, u32 recall_iomode) | 339 | should_free_lseg(struct pnfs_layout_range *lseg_range, |
| 340 | struct pnfs_layout_range *recall_range) | ||
| 266 | { | 341 | { |
| 267 | return (recall_iomode == IOMODE_ANY || | 342 | return (recall_range->iomode == IOMODE_ANY || |
| 268 | lseg_iomode == recall_iomode); | 343 | lseg_range->iomode == recall_range->iomode) && |
| 344 | lo_seg_intersecting(lseg_range, recall_range); | ||
| 269 | } | 345 | } |
| 270 | 346 | ||
| 271 | /* Returns 1 if lseg is removed from list, 0 otherwise */ | 347 | /* Returns 1 if lseg is removed from list, 0 otherwise */ |
| @@ -296,7 +372,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, | |||
| 296 | int | 372 | int |
| 297 | mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, | 373 | mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, |
| 298 | struct list_head *tmp_list, | 374 | struct list_head *tmp_list, |
| 299 | u32 iomode) | 375 | struct pnfs_layout_range *recall_range) |
| 300 | { | 376 | { |
| 301 | struct pnfs_layout_segment *lseg, *next; | 377 | struct pnfs_layout_segment *lseg, *next; |
| 302 | int invalid = 0, removed = 0; | 378 | int invalid = 0, removed = 0; |
| @@ -309,7 +385,8 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, | |||
| 309 | return 0; | 385 | return 0; |
| 310 | } | 386 | } |
| 311 | list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) | 387 | list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) |
| 312 | if (should_free_lseg(lseg->pls_range.iomode, iomode)) { | 388 | if (!recall_range || |
| 389 | should_free_lseg(&lseg->pls_range, recall_range)) { | ||
| 313 | dprintk("%s: freeing lseg %p iomode %d " | 390 | dprintk("%s: freeing lseg %p iomode %d " |
| 314 | "offset %llu length %llu\n", __func__, | 391 | "offset %llu length %llu\n", __func__, |
| 315 | lseg, lseg->pls_range.iomode, lseg->pls_range.offset, | 392 | lseg, lseg->pls_range.iomode, lseg->pls_range.offset, |
| @@ -358,7 +435,7 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) | |||
| 358 | lo = nfsi->layout; | 435 | lo = nfsi->layout; |
| 359 | if (lo) { | 436 | if (lo) { |
| 360 | lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */ | 437 | lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */ |
| 361 | mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY); | 438 | mark_matching_lsegs_invalid(lo, &tmp_list, NULL); |
| 362 | } | 439 | } |
| 363 | spin_unlock(&nfsi->vfs_inode.i_lock); | 440 | spin_unlock(&nfsi->vfs_inode.i_lock); |
| 364 | pnfs_free_lseg_list(&tmp_list); | 441 | pnfs_free_lseg_list(&tmp_list); |
| @@ -467,7 +544,7 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, | |||
| 467 | static struct pnfs_layout_segment * | 544 | static struct pnfs_layout_segment * |
| 468 | send_layoutget(struct pnfs_layout_hdr *lo, | 545 | send_layoutget(struct pnfs_layout_hdr *lo, |
| 469 | struct nfs_open_context *ctx, | 546 | struct nfs_open_context *ctx, |
| 470 | u32 iomode, | 547 | struct pnfs_layout_range *range, |
| 471 | gfp_t gfp_flags) | 548 | gfp_t gfp_flags) |
| 472 | { | 549 | { |
| 473 | struct inode *ino = lo->plh_inode; | 550 | struct inode *ino = lo->plh_inode; |
| @@ -499,11 +576,11 @@ send_layoutget(struct pnfs_layout_hdr *lo, | |||
| 499 | goto out_err_free; | 576 | goto out_err_free; |
| 500 | } | 577 | } |
| 501 | 578 | ||
| 502 | lgp->args.minlength = NFS4_MAX_UINT64; | 579 | lgp->args.minlength = PAGE_CACHE_SIZE; |
| 580 | if (lgp->args.minlength > range->length) | ||
| 581 | lgp->args.minlength = range->length; | ||
| 503 | lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; | 582 | lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; |
| 504 | lgp->args.range.iomode = iomode; | 583 | lgp->args.range = *range; |
| 505 | lgp->args.range.offset = 0; | ||
| 506 | lgp->args.range.length = NFS4_MAX_UINT64; | ||
| 507 | lgp->args.type = server->pnfs_curr_ld->id; | 584 | lgp->args.type = server->pnfs_curr_ld->id; |
| 508 | lgp->args.inode = ino; | 585 | lgp->args.inode = ino; |
| 509 | lgp->args.ctx = get_nfs_open_context(ctx); | 586 | lgp->args.ctx = get_nfs_open_context(ctx); |
| @@ -518,7 +595,7 @@ send_layoutget(struct pnfs_layout_hdr *lo, | |||
| 518 | nfs4_proc_layoutget(lgp); | 595 | nfs4_proc_layoutget(lgp); |
| 519 | if (!lseg) { | 596 | if (!lseg) { |
| 520 | /* remember that LAYOUTGET failed and suspend trying */ | 597 | /* remember that LAYOUTGET failed and suspend trying */ |
| 521 | set_bit(lo_fail_bit(iomode), &lo->plh_flags); | 598 | set_bit(lo_fail_bit(range->iomode), &lo->plh_flags); |
| 522 | } | 599 | } |
| 523 | 600 | ||
| 524 | /* free xdr pages */ | 601 | /* free xdr pages */ |
| @@ -542,6 +619,51 @@ out_err_free: | |||
| 542 | return NULL; | 619 | return NULL; |
| 543 | } | 620 | } |
| 544 | 621 | ||
| 622 | /* Initiates a LAYOUTRETURN(FILE) */ | ||
| 623 | int | ||
| 624 | _pnfs_return_layout(struct inode *ino) | ||
| 625 | { | ||
| 626 | struct pnfs_layout_hdr *lo = NULL; | ||
| 627 | struct nfs_inode *nfsi = NFS_I(ino); | ||
| 628 | LIST_HEAD(tmp_list); | ||
| 629 | struct nfs4_layoutreturn *lrp; | ||
| 630 | nfs4_stateid stateid; | ||
| 631 | int status = 0; | ||
| 632 | |||
| 633 | dprintk("--> %s\n", __func__); | ||
| 634 | |||
| 635 | spin_lock(&ino->i_lock); | ||
| 636 | lo = nfsi->layout; | ||
| 637 | if (!lo || !mark_matching_lsegs_invalid(lo, &tmp_list, NULL)) { | ||
| 638 | spin_unlock(&ino->i_lock); | ||
| 639 | dprintk("%s: no layout segments to return\n", __func__); | ||
| 640 | goto out; | ||
| 641 | } | ||
| 642 | stateid = nfsi->layout->plh_stateid; | ||
| 643 | /* Reference matched in nfs4_layoutreturn_release */ | ||
| 644 | get_layout_hdr(lo); | ||
| 645 | spin_unlock(&ino->i_lock); | ||
| 646 | pnfs_free_lseg_list(&tmp_list); | ||
| 647 | |||
| 648 | WARN_ON(test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)); | ||
| 649 | |||
| 650 | lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); | ||
| 651 | if (unlikely(lrp == NULL)) { | ||
| 652 | status = -ENOMEM; | ||
| 653 | goto out; | ||
| 654 | } | ||
| 655 | |||
| 656 | lrp->args.stateid = stateid; | ||
| 657 | lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; | ||
| 658 | lrp->args.inode = ino; | ||
| 659 | lrp->clp = NFS_SERVER(ino)->nfs_client; | ||
| 660 | |||
| 661 | status = nfs4_proc_layoutreturn(lrp); | ||
| 662 | out: | ||
| 663 | dprintk("<-- %s status: %d\n", __func__, status); | ||
| 664 | return status; | ||
| 665 | } | ||
| 666 | |||
| 545 | bool pnfs_roc(struct inode *ino) | 667 | bool pnfs_roc(struct inode *ino) |
| 546 | { | 668 | { |
| 547 | struct pnfs_layout_hdr *lo; | 669 | struct pnfs_layout_hdr *lo; |
| @@ -625,10 +747,23 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier) | |||
| 625 | * are seen first. | 747 | * are seen first. |
| 626 | */ | 748 | */ |
| 627 | static s64 | 749 | static s64 |
| 628 | cmp_layout(u32 iomode1, u32 iomode2) | 750 | cmp_layout(struct pnfs_layout_range *l1, |
| 751 | struct pnfs_layout_range *l2) | ||
| 629 | { | 752 | { |
| 753 | s64 d; | ||
| 754 | |||
| 755 | /* high offset > low offset */ | ||
| 756 | d = l1->offset - l2->offset; | ||
| 757 | if (d) | ||
| 758 | return d; | ||
| 759 | |||
| 760 | /* short length > long length */ | ||
| 761 | d = l2->length - l1->length; | ||
| 762 | if (d) | ||
| 763 | return d; | ||
| 764 | |||
| 630 | /* read > read/write */ | 765 | /* read > read/write */ |
| 631 | return (int)(iomode2 == IOMODE_READ) - (int)(iomode1 == IOMODE_READ); | 766 | return (int)(l1->iomode == IOMODE_READ) - (int)(l2->iomode == IOMODE_READ); |
| 632 | } | 767 | } |
| 633 | 768 | ||
| 634 | static void | 769 | static void |
| @@ -636,13 +771,12 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo, | |||
| 636 | struct pnfs_layout_segment *lseg) | 771 | struct pnfs_layout_segment *lseg) |
| 637 | { | 772 | { |
| 638 | struct pnfs_layout_segment *lp; | 773 | struct pnfs_layout_segment *lp; |
| 639 | int found = 0; | ||
| 640 | 774 | ||
| 641 | dprintk("%s:Begin\n", __func__); | 775 | dprintk("%s:Begin\n", __func__); |
| 642 | 776 | ||
| 643 | assert_spin_locked(&lo->plh_inode->i_lock); | 777 | assert_spin_locked(&lo->plh_inode->i_lock); |
| 644 | list_for_each_entry(lp, &lo->plh_segs, pls_list) { | 778 | list_for_each_entry(lp, &lo->plh_segs, pls_list) { |
| 645 | if (cmp_layout(lp->pls_range.iomode, lseg->pls_range.iomode) > 0) | 779 | if (cmp_layout(&lseg->pls_range, &lp->pls_range) > 0) |
| 646 | continue; | 780 | continue; |
| 647 | list_add_tail(&lseg->pls_list, &lp->pls_list); | 781 | list_add_tail(&lseg->pls_list, &lp->pls_list); |
| 648 | dprintk("%s: inserted lseg %p " | 782 | dprintk("%s: inserted lseg %p " |
| @@ -652,16 +786,14 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo, | |||
| 652 | lseg->pls_range.offset, lseg->pls_range.length, | 786 | lseg->pls_range.offset, lseg->pls_range.length, |
| 653 | lp, lp->pls_range.iomode, lp->pls_range.offset, | 787 | lp, lp->pls_range.iomode, lp->pls_range.offset, |
| 654 | lp->pls_range.length); | 788 | lp->pls_range.length); |
| 655 | found = 1; | 789 | goto out; |
| 656 | break; | ||
| 657 | } | ||
| 658 | if (!found) { | ||
| 659 | list_add_tail(&lseg->pls_list, &lo->plh_segs); | ||
| 660 | dprintk("%s: inserted lseg %p " | ||
| 661 | "iomode %d offset %llu length %llu at tail\n", | ||
| 662 | __func__, lseg, lseg->pls_range.iomode, | ||
| 663 | lseg->pls_range.offset, lseg->pls_range.length); | ||
| 664 | } | 790 | } |
| 791 | list_add_tail(&lseg->pls_list, &lo->plh_segs); | ||
| 792 | dprintk("%s: inserted lseg %p " | ||
| 793 | "iomode %d offset %llu length %llu at tail\n", | ||
| 794 | __func__, lseg, lseg->pls_range.iomode, | ||
| 795 | lseg->pls_range.offset, lseg->pls_range.length); | ||
| 796 | out: | ||
| 665 | get_layout_hdr(lo); | 797 | get_layout_hdr(lo); |
| 666 | 798 | ||
| 667 | dprintk("%s:Return\n", __func__); | 799 | dprintk("%s:Return\n", __func__); |
| @@ -672,7 +804,7 @@ alloc_init_layout_hdr(struct inode *ino, gfp_t gfp_flags) | |||
| 672 | { | 804 | { |
| 673 | struct pnfs_layout_hdr *lo; | 805 | struct pnfs_layout_hdr *lo; |
| 674 | 806 | ||
| 675 | lo = kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags); | 807 | lo = pnfs_alloc_layout_hdr(ino, gfp_flags); |
| 676 | if (!lo) | 808 | if (!lo) |
| 677 | return NULL; | 809 | return NULL; |
| 678 | atomic_set(&lo->plh_refcount, 1); | 810 | atomic_set(&lo->plh_refcount, 1); |
| @@ -705,7 +837,7 @@ pnfs_find_alloc_layout(struct inode *ino, gfp_t gfp_flags) | |||
| 705 | if (likely(nfsi->layout == NULL)) /* Won the race? */ | 837 | if (likely(nfsi->layout == NULL)) /* Won the race? */ |
| 706 | nfsi->layout = new; | 838 | nfsi->layout = new; |
| 707 | else | 839 | else |
| 708 | kfree(new); | 840 | pnfs_free_layout_hdr(new); |
| 709 | return nfsi->layout; | 841 | return nfsi->layout; |
| 710 | } | 842 | } |
| 711 | 843 | ||
| @@ -721,16 +853,28 @@ pnfs_find_alloc_layout(struct inode *ino, gfp_t gfp_flags) | |||
| 721 | * READ RW true | 853 | * READ RW true |
| 722 | */ | 854 | */ |
| 723 | static int | 855 | static int |
| 724 | is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode) | 856 | is_matching_lseg(struct pnfs_layout_range *ls_range, |
| 857 | struct pnfs_layout_range *range) | ||
| 725 | { | 858 | { |
| 726 | return (iomode != IOMODE_RW || lseg->pls_range.iomode == IOMODE_RW); | 859 | struct pnfs_layout_range range1; |
| 860 | |||
| 861 | if ((range->iomode == IOMODE_RW && | ||
| 862 | ls_range->iomode != IOMODE_RW) || | ||
| 863 | !lo_seg_intersecting(ls_range, range)) | ||
| 864 | return 0; | ||
| 865 | |||
| 866 | /* range1 covers only the first byte in the range */ | ||
| 867 | range1 = *range; | ||
| 868 | range1.length = 1; | ||
| 869 | return lo_seg_contained(ls_range, &range1); | ||
| 727 | } | 870 | } |
| 728 | 871 | ||
| 729 | /* | 872 | /* |
| 730 | * lookup range in layout | 873 | * lookup range in layout |
| 731 | */ | 874 | */ |
| 732 | static struct pnfs_layout_segment * | 875 | static struct pnfs_layout_segment * |
| 733 | pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode) | 876 | pnfs_find_lseg(struct pnfs_layout_hdr *lo, |
| 877 | struct pnfs_layout_range *range) | ||
| 734 | { | 878 | { |
| 735 | struct pnfs_layout_segment *lseg, *ret = NULL; | 879 | struct pnfs_layout_segment *lseg, *ret = NULL; |
| 736 | 880 | ||
| @@ -739,11 +883,11 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode) | |||
| 739 | assert_spin_locked(&lo->plh_inode->i_lock); | 883 | assert_spin_locked(&lo->plh_inode->i_lock); |
| 740 | list_for_each_entry(lseg, &lo->plh_segs, pls_list) { | 884 | list_for_each_entry(lseg, &lo->plh_segs, pls_list) { |
| 741 | if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && | 885 | if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && |
| 742 | is_matching_lseg(lseg, iomode)) { | 886 | is_matching_lseg(&lseg->pls_range, range)) { |
| 743 | ret = get_lseg(lseg); | 887 | ret = get_lseg(lseg); |
| 744 | break; | 888 | break; |
| 745 | } | 889 | } |
| 746 | if (cmp_layout(iomode, lseg->pls_range.iomode) > 0) | 890 | if (cmp_layout(range, &lseg->pls_range) > 0) |
| 747 | break; | 891 | break; |
| 748 | } | 892 | } |
| 749 | 893 | ||
| @@ -759,9 +903,17 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode) | |||
| 759 | struct pnfs_layout_segment * | 903 | struct pnfs_layout_segment * |
| 760 | pnfs_update_layout(struct inode *ino, | 904 | pnfs_update_layout(struct inode *ino, |
| 761 | struct nfs_open_context *ctx, | 905 | struct nfs_open_context *ctx, |
| 906 | loff_t pos, | ||
| 907 | u64 count, | ||
| 762 | enum pnfs_iomode iomode, | 908 | enum pnfs_iomode iomode, |
| 763 | gfp_t gfp_flags) | 909 | gfp_t gfp_flags) |
| 764 | { | 910 | { |
| 911 | struct pnfs_layout_range arg = { | ||
| 912 | .iomode = iomode, | ||
| 913 | .offset = pos, | ||
| 914 | .length = count, | ||
| 915 | }; | ||
| 916 | unsigned pg_offset; | ||
| 765 | struct nfs_inode *nfsi = NFS_I(ino); | 917 | struct nfs_inode *nfsi = NFS_I(ino); |
| 766 | struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; | 918 | struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; |
| 767 | struct pnfs_layout_hdr *lo; | 919 | struct pnfs_layout_hdr *lo; |
| @@ -789,7 +941,7 @@ pnfs_update_layout(struct inode *ino, | |||
| 789 | goto out_unlock; | 941 | goto out_unlock; |
| 790 | 942 | ||
| 791 | /* Check to see if the layout for the given range already exists */ | 943 | /* Check to see if the layout for the given range already exists */ |
| 792 | lseg = pnfs_find_lseg(lo, iomode); | 944 | lseg = pnfs_find_lseg(lo, &arg); |
| 793 | if (lseg) | 945 | if (lseg) |
| 794 | goto out_unlock; | 946 | goto out_unlock; |
| 795 | 947 | ||
| @@ -811,7 +963,14 @@ pnfs_update_layout(struct inode *ino, | |||
| 811 | spin_unlock(&clp->cl_lock); | 963 | spin_unlock(&clp->cl_lock); |
| 812 | } | 964 | } |
| 813 | 965 | ||
| 814 | lseg = send_layoutget(lo, ctx, iomode, gfp_flags); | 966 | pg_offset = arg.offset & ~PAGE_CACHE_MASK; |
| 967 | if (pg_offset) { | ||
| 968 | arg.offset -= pg_offset; | ||
| 969 | arg.length += pg_offset; | ||
| 970 | } | ||
| 971 | arg.length = PAGE_CACHE_ALIGN(arg.length); | ||
| 972 | |||
| 973 | lseg = send_layoutget(lo, ctx, &arg, gfp_flags); | ||
| 815 | if (!lseg && first) { | 974 | if (!lseg && first) { |
| 816 | spin_lock(&clp->cl_lock); | 975 | spin_lock(&clp->cl_lock); |
| 817 | list_del_init(&lo->plh_layouts); | 976 | list_del_init(&lo->plh_layouts); |
| @@ -838,17 +997,6 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) | |||
| 838 | struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; | 997 | struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; |
| 839 | int status = 0; | 998 | int status = 0; |
| 840 | 999 | ||
| 841 | /* Verify we got what we asked for. | ||
| 842 | * Note that because the xdr parsing only accepts a single | ||
| 843 | * element array, this can fail even if the server is behaving | ||
| 844 | * correctly. | ||
| 845 | */ | ||
| 846 | if (lgp->args.range.iomode > res->range.iomode || | ||
| 847 | res->range.offset != 0 || | ||
| 848 | res->range.length != NFS4_MAX_UINT64) { | ||
| 849 | status = -EINVAL; | ||
| 850 | goto out; | ||
| 851 | } | ||
| 852 | /* Inject layout blob into I/O device driver */ | 1000 | /* Inject layout blob into I/O device driver */ |
| 853 | lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags); | 1001 | lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags); |
| 854 | if (!lseg || IS_ERR(lseg)) { | 1002 | if (!lseg || IS_ERR(lseg)) { |
| @@ -895,51 +1043,64 @@ out_forget_reply: | |||
| 895 | goto out; | 1043 | goto out; |
| 896 | } | 1044 | } |
| 897 | 1045 | ||
| 898 | static int pnfs_read_pg_test(struct nfs_pageio_descriptor *pgio, | 1046 | bool |
| 899 | struct nfs_page *prev, | 1047 | pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, |
| 900 | struct nfs_page *req) | 1048 | struct nfs_page *req) |
| 901 | { | 1049 | { |
| 1050 | enum pnfs_iomode access_type; | ||
| 1051 | gfp_t gfp_flags; | ||
| 1052 | |||
| 1053 | /* We assume that pg_ioflags == 0 iff we're reading a page */ | ||
| 1054 | if (pgio->pg_ioflags == 0) { | ||
| 1055 | access_type = IOMODE_READ; | ||
| 1056 | gfp_flags = GFP_KERNEL; | ||
| 1057 | } else { | ||
| 1058 | access_type = IOMODE_RW; | ||
| 1059 | gfp_flags = GFP_NOFS; | ||
| 1060 | } | ||
| 1061 | |||
| 902 | if (pgio->pg_count == prev->wb_bytes) { | 1062 | if (pgio->pg_count == prev->wb_bytes) { |
| 903 | /* This is first coelesce call for a series of nfs_pages */ | 1063 | /* This is first coelesce call for a series of nfs_pages */ |
| 904 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | 1064 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, |
| 905 | prev->wb_context, | 1065 | prev->wb_context, |
| 906 | IOMODE_READ, | 1066 | req_offset(req), |
| 907 | GFP_KERNEL); | 1067 | pgio->pg_count, |
| 1068 | access_type, | ||
| 1069 | gfp_flags); | ||
| 1070 | return true; | ||
| 908 | } | 1071 | } |
| 909 | return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); | ||
| 910 | } | ||
| 911 | 1072 | ||
| 912 | void | 1073 | if (pgio->pg_lseg && |
| 913 | pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) | 1074 | req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset, |
| 914 | { | 1075 | pgio->pg_lseg->pls_range.length)) |
| 915 | struct pnfs_layoutdriver_type *ld; | 1076 | return false; |
| 916 | 1077 | ||
| 917 | ld = NFS_SERVER(inode)->pnfs_curr_ld; | 1078 | return true; |
| 918 | pgio->pg_test = (ld && ld->pg_test) ? pnfs_read_pg_test : NULL; | ||
| 919 | } | 1079 | } |
| 1080 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); | ||
| 920 | 1081 | ||
| 921 | static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio, | 1082 | /* |
| 922 | struct nfs_page *prev, | 1083 | * Called by non rpc-based layout drivers |
| 923 | struct nfs_page *req) | 1084 | */ |
| 1085 | int | ||
| 1086 | pnfs_ld_write_done(struct nfs_write_data *data) | ||
| 924 | { | 1087 | { |
| 925 | if (pgio->pg_count == prev->wb_bytes) { | 1088 | int status; |
| 926 | /* This is first coelesce call for a series of nfs_pages */ | ||
| 927 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | ||
| 928 | prev->wb_context, | ||
| 929 | IOMODE_RW, | ||
| 930 | GFP_NOFS); | ||
| 931 | } | ||
| 932 | return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); | ||
| 933 | } | ||
| 934 | 1089 | ||
| 935 | void | 1090 | if (!data->pnfs_error) { |
| 936 | pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode) | 1091 | pnfs_set_layoutcommit(data); |
| 937 | { | 1092 | data->mds_ops->rpc_call_done(&data->task, data); |
| 938 | struct pnfs_layoutdriver_type *ld; | 1093 | data->mds_ops->rpc_release(data); |
| 1094 | return 0; | ||
| 1095 | } | ||
| 939 | 1096 | ||
| 940 | ld = NFS_SERVER(inode)->pnfs_curr_ld; | 1097 | dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__, |
| 941 | pgio->pg_test = (ld && ld->pg_test) ? pnfs_write_pg_test : NULL; | 1098 | data->pnfs_error); |
| 1099 | status = nfs_initiate_write(data, NFS_CLIENT(data->inode), | ||
| 1100 | data->mds_ops, NFS_FILE_SYNC); | ||
| 1101 | return status ? : -EAGAIN; | ||
| 942 | } | 1102 | } |
| 1103 | EXPORT_SYMBOL_GPL(pnfs_ld_write_done); | ||
| 943 | 1104 | ||
| 944 | enum pnfs_try_status | 1105 | enum pnfs_try_status |
| 945 | pnfs_try_to_write_data(struct nfs_write_data *wdata, | 1106 | pnfs_try_to_write_data(struct nfs_write_data *wdata, |
| @@ -966,6 +1127,29 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata, | |||
| 966 | } | 1127 | } |
| 967 | 1128 | ||
| 968 | /* | 1129 | /* |
| 1130 | * Called by non rpc-based layout drivers | ||
| 1131 | */ | ||
| 1132 | int | ||
| 1133 | pnfs_ld_read_done(struct nfs_read_data *data) | ||
| 1134 | { | ||
| 1135 | int status; | ||
| 1136 | |||
| 1137 | if (!data->pnfs_error) { | ||
| 1138 | __nfs4_read_done_cb(data); | ||
| 1139 | data->mds_ops->rpc_call_done(&data->task, data); | ||
| 1140 | data->mds_ops->rpc_release(data); | ||
| 1141 | return 0; | ||
| 1142 | } | ||
| 1143 | |||
| 1144 | dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__, | ||
| 1145 | data->pnfs_error); | ||
| 1146 | status = nfs_initiate_read(data, NFS_CLIENT(data->inode), | ||
| 1147 | data->mds_ops); | ||
| 1148 | return status ? : -EAGAIN; | ||
| 1149 | } | ||
| 1150 | EXPORT_SYMBOL_GPL(pnfs_ld_read_done); | ||
| 1151 | |||
| 1152 | /* | ||
| 969 | * Call the appropriate parallel I/O subsystem read function. | 1153 | * Call the appropriate parallel I/O subsystem read function. |
| 970 | */ | 1154 | */ |
| 971 | enum pnfs_try_status | 1155 | enum pnfs_try_status |
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 0c015bad9e7a..48d0a8e4d062 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
| @@ -30,6 +30,7 @@ | |||
| 30 | #ifndef FS_NFS_PNFS_H | 30 | #ifndef FS_NFS_PNFS_H |
| 31 | #define FS_NFS_PNFS_H | 31 | #define FS_NFS_PNFS_H |
| 32 | 32 | ||
| 33 | #include <linux/nfs_fs.h> | ||
| 33 | #include <linux/nfs_page.h> | 34 | #include <linux/nfs_page.h> |
| 34 | 35 | ||
| 35 | enum { | 36 | enum { |
| @@ -64,17 +65,29 @@ enum { | |||
| 64 | NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */ | 65 | NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */ |
| 65 | }; | 66 | }; |
| 66 | 67 | ||
| 68 | enum layoutdriver_policy_flags { | ||
| 69 | /* Should the pNFS client commit and return the layout upon a setattr */ | ||
| 70 | PNFS_LAYOUTRET_ON_SETATTR = 1 << 0, | ||
| 71 | }; | ||
| 72 | |||
| 73 | struct nfs4_deviceid_node; | ||
| 74 | |||
| 67 | /* Per-layout driver specific registration structure */ | 75 | /* Per-layout driver specific registration structure */ |
| 68 | struct pnfs_layoutdriver_type { | 76 | struct pnfs_layoutdriver_type { |
| 69 | struct list_head pnfs_tblid; | 77 | struct list_head pnfs_tblid; |
| 70 | const u32 id; | 78 | const u32 id; |
| 71 | const char *name; | 79 | const char *name; |
| 72 | struct module *owner; | 80 | struct module *owner; |
| 81 | unsigned flags; | ||
| 82 | |||
| 83 | struct pnfs_layout_hdr * (*alloc_layout_hdr) (struct inode *inode, gfp_t gfp_flags); | ||
| 84 | void (*free_layout_hdr) (struct pnfs_layout_hdr *); | ||
| 85 | |||
| 73 | struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr, gfp_t gfp_flags); | 86 | struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr, gfp_t gfp_flags); |
| 74 | void (*free_lseg) (struct pnfs_layout_segment *lseg); | 87 | void (*free_lseg) (struct pnfs_layout_segment *lseg); |
| 75 | 88 | ||
| 76 | /* test for nfs page cache coalescing */ | 89 | /* test for nfs page cache coalescing */ |
| 77 | int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); | 90 | bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); |
| 78 | 91 | ||
| 79 | /* Returns true if layoutdriver wants to divert this request to | 92 | /* Returns true if layoutdriver wants to divert this request to |
| 80 | * driver's commit routine. | 93 | * driver's commit routine. |
| @@ -89,6 +102,16 @@ struct pnfs_layoutdriver_type { | |||
| 89 | */ | 102 | */ |
| 90 | enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data); | 103 | enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data); |
| 91 | enum pnfs_try_status (*write_pagelist) (struct nfs_write_data *nfs_data, int how); | 104 | enum pnfs_try_status (*write_pagelist) (struct nfs_write_data *nfs_data, int how); |
| 105 | |||
| 106 | void (*free_deviceid_node) (struct nfs4_deviceid_node *); | ||
| 107 | |||
| 108 | void (*encode_layoutreturn) (struct pnfs_layout_hdr *layoutid, | ||
| 109 | struct xdr_stream *xdr, | ||
| 110 | const struct nfs4_layoutreturn_args *args); | ||
| 111 | |||
| 112 | void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid, | ||
| 113 | struct xdr_stream *xdr, | ||
| 114 | const struct nfs4_layoutcommit_args *args); | ||
| 92 | }; | 115 | }; |
| 93 | 116 | ||
| 94 | struct pnfs_layout_hdr { | 117 | struct pnfs_layout_hdr { |
| @@ -120,21 +143,22 @@ extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); | |||
| 120 | extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, | 143 | extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, |
| 121 | struct pnfs_device *dev); | 144 | struct pnfs_device *dev); |
| 122 | extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); | 145 | extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); |
| 146 | extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); | ||
| 123 | 147 | ||
| 124 | /* pnfs.c */ | 148 | /* pnfs.c */ |
| 125 | void get_layout_hdr(struct pnfs_layout_hdr *lo); | 149 | void get_layout_hdr(struct pnfs_layout_hdr *lo); |
| 126 | void put_lseg(struct pnfs_layout_segment *lseg); | 150 | void put_lseg(struct pnfs_layout_segment *lseg); |
| 127 | struct pnfs_layout_segment * | 151 | struct pnfs_layout_segment * |
| 128 | pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, | 152 | pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, |
| 129 | enum pnfs_iomode access_type, gfp_t gfp_flags); | 153 | loff_t pos, u64 count, enum pnfs_iomode access_type, |
| 154 | gfp_t gfp_flags); | ||
| 130 | void set_pnfs_layoutdriver(struct nfs_server *, u32 id); | 155 | void set_pnfs_layoutdriver(struct nfs_server *, u32 id); |
| 131 | void unset_pnfs_layoutdriver(struct nfs_server *); | 156 | void unset_pnfs_layoutdriver(struct nfs_server *); |
| 132 | enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *, | 157 | enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *, |
| 133 | const struct rpc_call_ops *, int); | 158 | const struct rpc_call_ops *, int); |
| 134 | enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *, | 159 | enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *, |
| 135 | const struct rpc_call_ops *); | 160 | const struct rpc_call_ops *); |
| 136 | void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *); | 161 | bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); |
| 137 | void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *); | ||
| 138 | int pnfs_layout_process(struct nfs4_layoutget *lgp); | 162 | int pnfs_layout_process(struct nfs4_layoutget *lgp); |
| 139 | void pnfs_free_lseg_list(struct list_head *tmp_list); | 163 | void pnfs_free_lseg_list(struct list_head *tmp_list); |
| 140 | void pnfs_destroy_layout(struct nfs_inode *); | 164 | void pnfs_destroy_layout(struct nfs_inode *); |
| @@ -148,13 +172,37 @@ int pnfs_choose_layoutget_stateid(nfs4_stateid *dst, | |||
| 148 | struct nfs4_state *open_state); | 172 | struct nfs4_state *open_state); |
| 149 | int mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, | 173 | int mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, |
| 150 | struct list_head *tmp_list, | 174 | struct list_head *tmp_list, |
| 151 | u32 iomode); | 175 | struct pnfs_layout_range *recall_range); |
| 152 | bool pnfs_roc(struct inode *ino); | 176 | bool pnfs_roc(struct inode *ino); |
| 153 | void pnfs_roc_release(struct inode *ino); | 177 | void pnfs_roc_release(struct inode *ino); |
| 154 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); | 178 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); |
| 155 | bool pnfs_roc_drain(struct inode *ino, u32 *barrier); | 179 | bool pnfs_roc_drain(struct inode *ino, u32 *barrier); |
| 156 | void pnfs_set_layoutcommit(struct nfs_write_data *wdata); | 180 | void pnfs_set_layoutcommit(struct nfs_write_data *wdata); |
| 157 | int pnfs_layoutcommit_inode(struct inode *inode, bool sync); | 181 | int pnfs_layoutcommit_inode(struct inode *inode, bool sync); |
| 182 | int _pnfs_return_layout(struct inode *); | ||
| 183 | int pnfs_ld_write_done(struct nfs_write_data *); | ||
| 184 | int pnfs_ld_read_done(struct nfs_read_data *); | ||
| 185 | |||
| 186 | /* pnfs_dev.c */ | ||
| 187 | struct nfs4_deviceid_node { | ||
| 188 | struct hlist_node node; | ||
| 189 | const struct pnfs_layoutdriver_type *ld; | ||
| 190 | const struct nfs_client *nfs_client; | ||
| 191 | struct nfs4_deviceid deviceid; | ||
| 192 | atomic_t ref; | ||
| 193 | }; | ||
| 194 | |||
| 195 | void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id); | ||
| 196 | struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); | ||
| 197 | struct nfs4_deviceid_node *nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); | ||
| 198 | void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); | ||
| 199 | void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, | ||
| 200 | const struct pnfs_layoutdriver_type *, | ||
| 201 | const struct nfs_client *, | ||
| 202 | const struct nfs4_deviceid *); | ||
| 203 | struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *); | ||
| 204 | bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *); | ||
| 205 | void nfs4_deviceid_purge_client(const struct nfs_client *); | ||
| 158 | 206 | ||
| 159 | static inline int lo_fail_bit(u32 iomode) | 207 | static inline int lo_fail_bit(u32 iomode) |
| 160 | { | 208 | { |
| @@ -223,6 +271,36 @@ static inline void pnfs_clear_request_commit(struct nfs_page *req) | |||
| 223 | put_lseg(req->wb_commit_lseg); | 271 | put_lseg(req->wb_commit_lseg); |
| 224 | } | 272 | } |
| 225 | 273 | ||
| 274 | /* Should the pNFS client commit and return the layout upon a setattr */ | ||
| 275 | static inline bool | ||
| 276 | pnfs_ld_layoutret_on_setattr(struct inode *inode) | ||
| 277 | { | ||
| 278 | if (!pnfs_enabled_sb(NFS_SERVER(inode))) | ||
| 279 | return false; | ||
| 280 | return NFS_SERVER(inode)->pnfs_curr_ld->flags & | ||
| 281 | PNFS_LAYOUTRET_ON_SETATTR; | ||
| 282 | } | ||
| 283 | |||
| 284 | static inline int pnfs_return_layout(struct inode *ino) | ||
| 285 | { | ||
| 286 | struct nfs_inode *nfsi = NFS_I(ino); | ||
| 287 | struct nfs_server *nfss = NFS_SERVER(ino); | ||
| 288 | |||
| 289 | if (pnfs_enabled_sb(nfss) && nfsi->layout) | ||
| 290 | return _pnfs_return_layout(ino); | ||
| 291 | |||
| 292 | return 0; | ||
| 293 | } | ||
| 294 | |||
| 295 | static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio, | ||
| 296 | struct inode *inode) | ||
| 297 | { | ||
| 298 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; | ||
| 299 | |||
| 300 | if (ld) | ||
| 301 | pgio->pg_test = ld->pg_test; | ||
| 302 | } | ||
| 303 | |||
| 226 | #else /* CONFIG_NFS_V4_1 */ | 304 | #else /* CONFIG_NFS_V4_1 */ |
| 227 | 305 | ||
| 228 | static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) | 306 | static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) |
| @@ -245,7 +323,8 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg) | |||
| 245 | 323 | ||
| 246 | static inline struct pnfs_layout_segment * | 324 | static inline struct pnfs_layout_segment * |
| 247 | pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, | 325 | pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, |
| 248 | enum pnfs_iomode access_type, gfp_t gfp_flags) | 326 | loff_t pos, u64 count, enum pnfs_iomode access_type, |
| 327 | gfp_t gfp_flags) | ||
| 249 | { | 328 | { |
| 250 | return NULL; | 329 | return NULL; |
| 251 | } | 330 | } |
| @@ -264,6 +343,17 @@ pnfs_try_to_write_data(struct nfs_write_data *data, | |||
| 264 | return PNFS_NOT_ATTEMPTED; | 343 | return PNFS_NOT_ATTEMPTED; |
| 265 | } | 344 | } |
| 266 | 345 | ||
| 346 | static inline int pnfs_return_layout(struct inode *ino) | ||
| 347 | { | ||
| 348 | return 0; | ||
| 349 | } | ||
| 350 | |||
| 351 | static inline bool | ||
| 352 | pnfs_ld_layoutret_on_setattr(struct inode *inode) | ||
| 353 | { | ||
| 354 | return false; | ||
| 355 | } | ||
| 356 | |||
| 267 | static inline bool | 357 | static inline bool |
| 268 | pnfs_roc(struct inode *ino) | 358 | pnfs_roc(struct inode *ino) |
| 269 | { | 359 | { |
| @@ -294,16 +384,9 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s) | |||
| 294 | { | 384 | { |
| 295 | } | 385 | } |
| 296 | 386 | ||
| 297 | static inline void | 387 | static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio, |
| 298 | pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *ino) | 388 | struct inode *inode) |
| 299 | { | ||
| 300 | pgio->pg_test = NULL; | ||
| 301 | } | ||
| 302 | |||
| 303 | static inline void | ||
| 304 | pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *ino) | ||
| 305 | { | 389 | { |
| 306 | pgio->pg_test = NULL; | ||
| 307 | } | 390 | } |
| 308 | 391 | ||
| 309 | static inline void | 392 | static inline void |
| @@ -331,6 +414,10 @@ static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) | |||
| 331 | { | 414 | { |
| 332 | return 0; | 415 | return 0; |
| 333 | } | 416 | } |
| 417 | |||
| 418 | static inline void nfs4_deviceid_purge_client(struct nfs_client *ncl) | ||
| 419 | { | ||
| 420 | } | ||
| 334 | #endif /* CONFIG_NFS_V4_1 */ | 421 | #endif /* CONFIG_NFS_V4_1 */ |
| 335 | 422 | ||
| 336 | #endif /* FS_NFS_PNFS_H */ | 423 | #endif /* FS_NFS_PNFS_H */ |
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c new file mode 100644 index 000000000000..c65e133ce9c0 --- /dev/null +++ b/fs/nfs/pnfs_dev.c | |||
| @@ -0,0 +1,270 @@ | |||
| 1 | /* | ||
| 2 | * Device operations for the pnfs client. | ||
| 3 | * | ||
| 4 | * Copyright (c) 2002 | ||
| 5 | * The Regents of the University of Michigan | ||
| 6 | * All Rights Reserved | ||
| 7 | * | ||
| 8 | * Dean Hildebrand <dhildebz@umich.edu> | ||
| 9 | * Garth Goodson <Garth.Goodson@netapp.com> | ||
| 10 | * | ||
| 11 | * Permission is granted to use, copy, create derivative works, and | ||
| 12 | * redistribute this software and such derivative works for any purpose, | ||
| 13 | * so long as the name of the University of Michigan is not used in | ||
| 14 | * any advertising or publicity pertaining to the use or distribution | ||
| 15 | * of this software without specific, written prior authorization. If | ||
| 16 | * the above copyright notice or any other identification of the | ||
| 17 | * University of Michigan is included in any copy of any portion of | ||
| 18 | * this software, then the disclaimer below must also be included. | ||
| 19 | * | ||
| 20 | * This software is provided as is, without representation or warranty | ||
| 21 | * of any kind either express or implied, including without limitation | ||
| 22 | * the implied warranties of merchantability, fitness for a particular | ||
| 23 | * purpose, or noninfringement. The Regents of the University of | ||
| 24 | * Michigan shall not be liable for any damages, including special, | ||
| 25 | * indirect, incidental, or consequential damages, with respect to any | ||
| 26 | * claim arising out of or in connection with the use of the software, | ||
| 27 | * even if it has been or is hereafter advised of the possibility of | ||
| 28 | * such damages. | ||
| 29 | */ | ||
| 30 | |||
| 31 | #include "pnfs.h" | ||
| 32 | |||
| 33 | #define NFSDBG_FACILITY NFSDBG_PNFS | ||
| 34 | |||
| 35 | /* | ||
| 36 | * Device ID RCU cache. A device ID is unique per server and layout type. | ||
| 37 | */ | ||
| 38 | #define NFS4_DEVICE_ID_HASH_BITS 5 | ||
| 39 | #define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS) | ||
| 40 | #define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1) | ||
| 41 | |||
| 42 | static struct hlist_head nfs4_deviceid_cache[NFS4_DEVICE_ID_HASH_SIZE]; | ||
| 43 | static DEFINE_SPINLOCK(nfs4_deviceid_lock); | ||
| 44 | |||
| 45 | void | ||
| 46 | nfs4_print_deviceid(const struct nfs4_deviceid *id) | ||
| 47 | { | ||
| 48 | u32 *p = (u32 *)id; | ||
| 49 | |||
| 50 | dprintk("%s: device id= [%x%x%x%x]\n", __func__, | ||
| 51 | p[0], p[1], p[2], p[3]); | ||
| 52 | } | ||
| 53 | EXPORT_SYMBOL_GPL(nfs4_print_deviceid); | ||
| 54 | |||
| 55 | static inline u32 | ||
| 56 | nfs4_deviceid_hash(const struct nfs4_deviceid *id) | ||
| 57 | { | ||
| 58 | unsigned char *cptr = (unsigned char *)id->data; | ||
| 59 | unsigned int nbytes = NFS4_DEVICEID4_SIZE; | ||
| 60 | u32 x = 0; | ||
| 61 | |||
| 62 | while (nbytes--) { | ||
| 63 | x *= 37; | ||
| 64 | x += *cptr++; | ||
| 65 | } | ||
| 66 | return x & NFS4_DEVICE_ID_HASH_MASK; | ||
| 67 | } | ||
| 68 | |||
| 69 | static struct nfs4_deviceid_node * | ||
| 70 | _lookup_deviceid(const struct pnfs_layoutdriver_type *ld, | ||
| 71 | const struct nfs_client *clp, const struct nfs4_deviceid *id, | ||
| 72 | long hash) | ||
| 73 | { | ||
| 74 | struct nfs4_deviceid_node *d; | ||
| 75 | struct hlist_node *n; | ||
| 76 | |||
| 77 | hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node) | ||
| 78 | if (d->ld == ld && d->nfs_client == clp && | ||
| 79 | !memcmp(&d->deviceid, id, sizeof(*id))) { | ||
| 80 | if (atomic_read(&d->ref)) | ||
| 81 | return d; | ||
| 82 | else | ||
| 83 | continue; | ||
| 84 | } | ||
| 85 | return NULL; | ||
| 86 | } | ||
| 87 | |||
| 88 | /* | ||
| 89 | * Lookup a deviceid in cache and get a reference count on it if found | ||
| 90 | * | ||
| 91 | * @clp nfs_client associated with deviceid | ||
| 92 | * @id deviceid to look up | ||
| 93 | */ | ||
| 94 | struct nfs4_deviceid_node * | ||
| 95 | _find_get_deviceid(const struct pnfs_layoutdriver_type *ld, | ||
| 96 | const struct nfs_client *clp, const struct nfs4_deviceid *id, | ||
| 97 | long hash) | ||
| 98 | { | ||
| 99 | struct nfs4_deviceid_node *d; | ||
| 100 | |||
| 101 | rcu_read_lock(); | ||
| 102 | d = _lookup_deviceid(ld, clp, id, hash); | ||
| 103 | if (d && !atomic_inc_not_zero(&d->ref)) | ||
| 104 | d = NULL; | ||
| 105 | rcu_read_unlock(); | ||
| 106 | return d; | ||
| 107 | } | ||
| 108 | |||
| 109 | struct nfs4_deviceid_node * | ||
| 110 | nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld, | ||
| 111 | const struct nfs_client *clp, const struct nfs4_deviceid *id) | ||
| 112 | { | ||
| 113 | return _find_get_deviceid(ld, clp, id, nfs4_deviceid_hash(id)); | ||
| 114 | } | ||
| 115 | EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid); | ||
| 116 | |||
| 117 | /* | ||
| 118 | * Unhash and put deviceid | ||
| 119 | * | ||
| 120 | * @clp nfs_client associated with deviceid | ||
| 121 | * @id the deviceid to unhash | ||
| 122 | * | ||
| 123 | * @ret the unhashed node, if found and dereferenced to zero, NULL otherwise. | ||
| 124 | */ | ||
| 125 | struct nfs4_deviceid_node * | ||
| 126 | nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld, | ||
| 127 | const struct nfs_client *clp, const struct nfs4_deviceid *id) | ||
| 128 | { | ||
| 129 | struct nfs4_deviceid_node *d; | ||
| 130 | |||
| 131 | spin_lock(&nfs4_deviceid_lock); | ||
| 132 | rcu_read_lock(); | ||
| 133 | d = _lookup_deviceid(ld, clp, id, nfs4_deviceid_hash(id)); | ||
| 134 | rcu_read_unlock(); | ||
| 135 | if (!d) { | ||
| 136 | spin_unlock(&nfs4_deviceid_lock); | ||
| 137 | return NULL; | ||
| 138 | } | ||
| 139 | hlist_del_init_rcu(&d->node); | ||
| 140 | spin_unlock(&nfs4_deviceid_lock); | ||
| 141 | synchronize_rcu(); | ||
| 142 | |||
| 143 | /* balance the initial ref set in pnfs_insert_deviceid */ | ||
| 144 | if (atomic_dec_and_test(&d->ref)) | ||
| 145 | return d; | ||
| 146 | |||
| 147 | return NULL; | ||
| 148 | } | ||
| 149 | EXPORT_SYMBOL_GPL(nfs4_unhash_put_deviceid); | ||
| 150 | |||
| 151 | /* | ||
| 152 | * Delete a deviceid from cache | ||
| 153 | * | ||
| 154 | * @clp struct nfs_client qualifying the deviceid | ||
| 155 | * @id deviceid to delete | ||
| 156 | */ | ||
| 157 | void | ||
| 158 | nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld, | ||
| 159 | const struct nfs_client *clp, const struct nfs4_deviceid *id) | ||
| 160 | { | ||
| 161 | struct nfs4_deviceid_node *d; | ||
| 162 | |||
| 163 | d = nfs4_unhash_put_deviceid(ld, clp, id); | ||
| 164 | if (!d) | ||
| 165 | return; | ||
| 166 | d->ld->free_deviceid_node(d); | ||
| 167 | } | ||
| 168 | EXPORT_SYMBOL_GPL(nfs4_delete_deviceid); | ||
| 169 | |||
| 170 | void | ||
| 171 | nfs4_init_deviceid_node(struct nfs4_deviceid_node *d, | ||
| 172 | const struct pnfs_layoutdriver_type *ld, | ||
| 173 | const struct nfs_client *nfs_client, | ||
| 174 | const struct nfs4_deviceid *id) | ||
| 175 | { | ||
| 176 | INIT_HLIST_NODE(&d->node); | ||
| 177 | d->ld = ld; | ||
| 178 | d->nfs_client = nfs_client; | ||
| 179 | d->deviceid = *id; | ||
| 180 | atomic_set(&d->ref, 1); | ||
| 181 | } | ||
| 182 | EXPORT_SYMBOL_GPL(nfs4_init_deviceid_node); | ||
| 183 | |||
| 184 | /* | ||
| 185 | * Uniquely initialize and insert a deviceid node into cache | ||
| 186 | * | ||
| 187 | * @new new deviceid node | ||
| 188 | * Note that the caller must set up the following members: | ||
| 189 | * new->ld | ||
| 190 | * new->nfs_client | ||
| 191 | * new->deviceid | ||
| 192 | * | ||
| 193 | * @ret the inserted node, if none found, otherwise, the found entry. | ||
| 194 | */ | ||
| 195 | struct nfs4_deviceid_node * | ||
| 196 | nfs4_insert_deviceid_node(struct nfs4_deviceid_node *new) | ||
| 197 | { | ||
| 198 | struct nfs4_deviceid_node *d; | ||
| 199 | long hash; | ||
| 200 | |||
| 201 | spin_lock(&nfs4_deviceid_lock); | ||
| 202 | hash = nfs4_deviceid_hash(&new->deviceid); | ||
| 203 | d = _find_get_deviceid(new->ld, new->nfs_client, &new->deviceid, hash); | ||
| 204 | if (d) { | ||
| 205 | spin_unlock(&nfs4_deviceid_lock); | ||
| 206 | return d; | ||
| 207 | } | ||
| 208 | |||
| 209 | hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]); | ||
| 210 | spin_unlock(&nfs4_deviceid_lock); | ||
| 211 | |||
| 212 | return new; | ||
| 213 | } | ||
| 214 | EXPORT_SYMBOL_GPL(nfs4_insert_deviceid_node); | ||
| 215 | |||
| 216 | /* | ||
| 217 | * Dereference a deviceid node and delete it when its reference count drops | ||
| 218 | * to zero. | ||
| 219 | * | ||
| 220 | * @d deviceid node to put | ||
| 221 | * | ||
| 222 | * @ret true iff the node was deleted | ||
| 223 | */ | ||
| 224 | bool | ||
| 225 | nfs4_put_deviceid_node(struct nfs4_deviceid_node *d) | ||
| 226 | { | ||
| 227 | if (!atomic_dec_and_lock(&d->ref, &nfs4_deviceid_lock)) | ||
| 228 | return false; | ||
| 229 | hlist_del_init_rcu(&d->node); | ||
| 230 | spin_unlock(&nfs4_deviceid_lock); | ||
| 231 | synchronize_rcu(); | ||
| 232 | d->ld->free_deviceid_node(d); | ||
| 233 | return true; | ||
| 234 | } | ||
| 235 | EXPORT_SYMBOL_GPL(nfs4_put_deviceid_node); | ||
| 236 | |||
| 237 | static void | ||
| 238 | _deviceid_purge_client(const struct nfs_client *clp, long hash) | ||
| 239 | { | ||
| 240 | struct nfs4_deviceid_node *d; | ||
| 241 | struct hlist_node *n, *next; | ||
| 242 | HLIST_HEAD(tmp); | ||
| 243 | |||
| 244 | rcu_read_lock(); | ||
| 245 | hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node) | ||
| 246 | if (d->nfs_client == clp && atomic_read(&d->ref)) { | ||
| 247 | hlist_del_init_rcu(&d->node); | ||
| 248 | hlist_add_head(&d->node, &tmp); | ||
| 249 | } | ||
| 250 | rcu_read_unlock(); | ||
| 251 | |||
| 252 | if (hlist_empty(&tmp)) | ||
| 253 | return; | ||
| 254 | |||
| 255 | synchronize_rcu(); | ||
| 256 | hlist_for_each_entry_safe(d, n, next, &tmp, node) | ||
| 257 | if (atomic_dec_and_test(&d->ref)) | ||
| 258 | d->ld->free_deviceid_node(d); | ||
| 259 | } | ||
| 260 | |||
| 261 | void | ||
| 262 | nfs4_deviceid_purge_client(const struct nfs_client *clp) | ||
| 263 | { | ||
| 264 | long h; | ||
| 265 | |||
| 266 | spin_lock(&nfs4_deviceid_lock); | ||
| 267 | for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++) | ||
| 268 | _deviceid_purge_client(clp, h); | ||
| 269 | spin_unlock(&nfs4_deviceid_lock); | ||
| 270 | } | ||
diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 2bcf0dc306a1..20a7f952e244 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c | |||
| @@ -288,7 +288,9 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) | |||
| 288 | atomic_set(&req->wb_complete, requests); | 288 | atomic_set(&req->wb_complete, requests); |
| 289 | 289 | ||
| 290 | BUG_ON(desc->pg_lseg != NULL); | 290 | BUG_ON(desc->pg_lseg != NULL); |
| 291 | lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL); | 291 | lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, |
| 292 | req_offset(req), desc->pg_count, | ||
| 293 | IOMODE_READ, GFP_KERNEL); | ||
| 292 | ClearPageError(page); | 294 | ClearPageError(page); |
| 293 | offset = 0; | 295 | offset = 0; |
| 294 | nbytes = desc->pg_count; | 296 | nbytes = desc->pg_count; |
| @@ -351,7 +353,9 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc) | |||
| 351 | } | 353 | } |
| 352 | req = nfs_list_entry(data->pages.next); | 354 | req = nfs_list_entry(data->pages.next); |
| 353 | if ((!lseg) && list_is_singular(&data->pages)) | 355 | if ((!lseg) && list_is_singular(&data->pages)) |
| 354 | lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL); | 356 | lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, |
| 357 | req_offset(req), desc->pg_count, | ||
| 358 | IOMODE_READ, GFP_KERNEL); | ||
| 355 | 359 | ||
| 356 | ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count, | 360 | ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count, |
| 357 | 0, lseg); | 361 | 0, lseg); |
| @@ -660,7 +664,6 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, | |||
| 660 | if (ret == 0) | 664 | if (ret == 0) |
| 661 | goto read_complete; /* all pages were read */ | 665 | goto read_complete; /* all pages were read */ |
| 662 | 666 | ||
| 663 | pnfs_pageio_init_read(&pgio, inode); | ||
| 664 | if (rsize < PAGE_CACHE_SIZE) | 667 | if (rsize < PAGE_CACHE_SIZE) |
| 665 | nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); | 668 | nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); |
| 666 | else | 669 | else |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index e288f06d3fa7..ce40e5c568ba 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
| @@ -63,6 +63,7 @@ | |||
| 63 | #include "iostat.h" | 63 | #include "iostat.h" |
| 64 | #include "internal.h" | 64 | #include "internal.h" |
| 65 | #include "fscache.h" | 65 | #include "fscache.h" |
| 66 | #include "pnfs.h" | ||
| 66 | 67 | ||
| 67 | #define NFSDBG_FACILITY NFSDBG_VFS | 68 | #define NFSDBG_FACILITY NFSDBG_VFS |
| 68 | 69 | ||
| @@ -732,6 +733,28 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) | |||
| 732 | 733 | ||
| 733 | return 0; | 734 | return 0; |
| 734 | } | 735 | } |
| 736 | #ifdef CONFIG_NFS_V4_1 | ||
| 737 | void show_sessions(struct seq_file *m, struct nfs_server *server) | ||
| 738 | { | ||
| 739 | if (nfs4_has_session(server->nfs_client)) | ||
| 740 | seq_printf(m, ",sessions"); | ||
| 741 | } | ||
| 742 | #else | ||
| 743 | void show_sessions(struct seq_file *m, struct nfs_server *server) {} | ||
| 744 | #endif | ||
| 745 | |||
| 746 | #ifdef CONFIG_NFS_V4_1 | ||
| 747 | void show_pnfs(struct seq_file *m, struct nfs_server *server) | ||
| 748 | { | ||
| 749 | seq_printf(m, ",pnfs="); | ||
| 750 | if (server->pnfs_curr_ld) | ||
| 751 | seq_printf(m, "%s", server->pnfs_curr_ld->name); | ||
| 752 | else | ||
| 753 | seq_printf(m, "not configured"); | ||
| 754 | } | ||
| 755 | #else /* CONFIG_NFS_V4_1 */ | ||
| 756 | void show_pnfs(struct seq_file *m, struct nfs_server *server) {} | ||
| 757 | #endif /* CONFIG_NFS_V4_1 */ | ||
| 735 | 758 | ||
| 736 | static int nfs_show_devname(struct seq_file *m, struct vfsmount *mnt) | 759 | static int nfs_show_devname(struct seq_file *m, struct vfsmount *mnt) |
| 737 | { | 760 | { |
| @@ -792,6 +815,8 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) | |||
| 792 | seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); | 815 | seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); |
| 793 | seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); | 816 | seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); |
| 794 | seq_printf(m, ",acl=0x%x", nfss->acl_bitmask); | 817 | seq_printf(m, ",acl=0x%x", nfss->acl_bitmask); |
| 818 | show_sessions(m, nfss); | ||
| 819 | show_pnfs(m, nfss); | ||
| 795 | } | 820 | } |
| 796 | #endif | 821 | #endif |
| 797 | 822 | ||
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 49c715b4ac92..e268e3b23497 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
| @@ -939,7 +939,9 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) | |||
| 939 | atomic_set(&req->wb_complete, requests); | 939 | atomic_set(&req->wb_complete, requests); |
| 940 | 940 | ||
| 941 | BUG_ON(desc->pg_lseg); | 941 | BUG_ON(desc->pg_lseg); |
| 942 | lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS); | 942 | lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, |
| 943 | req_offset(req), desc->pg_count, | ||
| 944 | IOMODE_RW, GFP_NOFS); | ||
| 943 | ClearPageError(page); | 945 | ClearPageError(page); |
| 944 | offset = 0; | 946 | offset = 0; |
| 945 | nbytes = desc->pg_count; | 947 | nbytes = desc->pg_count; |
| @@ -1013,7 +1015,9 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc) | |||
| 1013 | } | 1015 | } |
| 1014 | req = nfs_list_entry(data->pages.next); | 1016 | req = nfs_list_entry(data->pages.next); |
| 1015 | if ((!lseg) && list_is_singular(&data->pages)) | 1017 | if ((!lseg) && list_is_singular(&data->pages)) |
| 1016 | lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS); | 1018 | lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, |
| 1019 | req_offset(req), desc->pg_count, | ||
| 1020 | IOMODE_RW, GFP_NOFS); | ||
| 1017 | 1021 | ||
| 1018 | if ((desc->pg_ioflags & FLUSH_COND_STABLE) && | 1022 | if ((desc->pg_ioflags & FLUSH_COND_STABLE) && |
| 1019 | (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) | 1023 | (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) |
| @@ -1032,8 +1036,6 @@ static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, | |||
| 1032 | { | 1036 | { |
| 1033 | size_t wsize = NFS_SERVER(inode)->wsize; | 1037 | size_t wsize = NFS_SERVER(inode)->wsize; |
| 1034 | 1038 | ||
| 1035 | pnfs_pageio_init_write(pgio, inode); | ||
| 1036 | |||
| 1037 | if (wsize < PAGE_CACHE_SIZE) | 1039 | if (wsize < PAGE_CACHE_SIZE) |
| 1038 | nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags); | 1040 | nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags); |
| 1039 | else | 1041 | else |
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 8e66c5ccc1c4..504b289ba680 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h | |||
| @@ -562,6 +562,7 @@ enum { | |||
| 562 | NFSPROC4_CLNT_LAYOUTGET, | 562 | NFSPROC4_CLNT_LAYOUTGET, |
| 563 | NFSPROC4_CLNT_GETDEVICEINFO, | 563 | NFSPROC4_CLNT_GETDEVICEINFO, |
| 564 | NFSPROC4_CLNT_LAYOUTCOMMIT, | 564 | NFSPROC4_CLNT_LAYOUTCOMMIT, |
| 565 | NFSPROC4_CLNT_LAYOUTRETURN, | ||
| 565 | }; | 566 | }; |
| 566 | 567 | ||
| 567 | /* nfs41 types */ | 568 | /* nfs41 types */ |
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 91af2e49fa3a..3a34e80ae92f 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h | |||
| @@ -68,7 +68,7 @@ struct nfs_pageio_descriptor { | |||
| 68 | int pg_ioflags; | 68 | int pg_ioflags; |
| 69 | int pg_error; | 69 | int pg_error; |
| 70 | struct pnfs_layout_segment *pg_lseg; | 70 | struct pnfs_layout_segment *pg_lseg; |
| 71 | int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); | 71 | bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); |
| 72 | }; | 72 | }; |
| 73 | 73 | ||
| 74 | #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) | 74 | #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) |
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 7e371f7df9c4..5e8444a11adf 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h | |||
| @@ -269,6 +269,27 @@ struct nfs4_layoutcommit_data { | |||
| 269 | struct nfs4_layoutcommit_res res; | 269 | struct nfs4_layoutcommit_res res; |
| 270 | }; | 270 | }; |
| 271 | 271 | ||
| 272 | struct nfs4_layoutreturn_args { | ||
| 273 | __u32 layout_type; | ||
| 274 | struct inode *inode; | ||
| 275 | nfs4_stateid stateid; | ||
| 276 | struct nfs4_sequence_args seq_args; | ||
| 277 | }; | ||
| 278 | |||
| 279 | struct nfs4_layoutreturn_res { | ||
| 280 | struct nfs4_sequence_res seq_res; | ||
| 281 | u32 lrs_present; | ||
| 282 | nfs4_stateid stateid; | ||
| 283 | }; | ||
| 284 | |||
| 285 | struct nfs4_layoutreturn { | ||
| 286 | struct nfs4_layoutreturn_args args; | ||
| 287 | struct nfs4_layoutreturn_res res; | ||
| 288 | struct rpc_cred *cred; | ||
| 289 | struct nfs_client *clp; | ||
| 290 | int rpc_status; | ||
| 291 | }; | ||
| 292 | |||
| 272 | /* | 293 | /* |
| 273 | * Arguments to the open call. | 294 | * Arguments to the open call. |
| 274 | */ | 295 | */ |
| @@ -1087,6 +1108,7 @@ struct nfs_read_data { | |||
| 1087 | const struct rpc_call_ops *mds_ops; | 1108 | const struct rpc_call_ops *mds_ops; |
| 1088 | int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data); | 1109 | int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data); |
| 1089 | __u64 mds_offset; | 1110 | __u64 mds_offset; |
| 1111 | int pnfs_error; | ||
| 1090 | struct page *page_array[NFS_PAGEVEC_SIZE]; | 1112 | struct page *page_array[NFS_PAGEVEC_SIZE]; |
| 1091 | }; | 1113 | }; |
| 1092 | 1114 | ||
| @@ -1112,6 +1134,7 @@ struct nfs_write_data { | |||
| 1112 | unsigned long timestamp; /* For lease renewal */ | 1134 | unsigned long timestamp; /* For lease renewal */ |
| 1113 | #endif | 1135 | #endif |
| 1114 | __u64 mds_offset; /* Filelayout dense stripe */ | 1136 | __u64 mds_offset; /* Filelayout dense stripe */ |
| 1137 | int pnfs_error; | ||
| 1115 | struct page *page_array[NFS_PAGEVEC_SIZE]; | 1138 | struct page *page_array[NFS_PAGEVEC_SIZE]; |
| 1116 | }; | 1139 | }; |
| 1117 | 1140 | ||
diff --git a/include/linux/pnfs_osd_xdr.h b/include/linux/pnfs_osd_xdr.h new file mode 100644 index 000000000000..76efbdd01622 --- /dev/null +++ b/include/linux/pnfs_osd_xdr.h | |||
| @@ -0,0 +1,345 @@ | |||
| 1 | /* | ||
| 2 | * pNFS-osd on-the-wire data structures | ||
| 3 | * | ||
| 4 | * Copyright (C) 2007 Panasas Inc. [year of first publication] | ||
| 5 | * All rights reserved. | ||
| 6 | * | ||
| 7 | * Benny Halevy <bhalevy@panasas.com> | ||
| 8 | * Boaz Harrosh <bharrosh@panasas.com> | ||
| 9 | * | ||
| 10 | * This program is free software; you can redistribute it and/or modify | ||
| 11 | * it under the terms of the GNU General Public License version 2 | ||
| 12 | * See the file COPYING included with this distribution for more details. | ||
| 13 | * | ||
| 14 | * Redistribution and use in source and binary forms, with or without | ||
| 15 | * modification, are permitted provided that the following conditions | ||
| 16 | * are met: | ||
| 17 | * | ||
| 18 | * 1. Redistributions of source code must retain the above copyright | ||
| 19 | * notice, this list of conditions and the following disclaimer. | ||
| 20 | * 2. Redistributions in binary form must reproduce the above copyright | ||
| 21 | * notice, this list of conditions and the following disclaimer in the | ||
| 22 | * documentation and/or other materials provided with the distribution. | ||
| 23 | * 3. Neither the name of the Panasas company nor the names of its | ||
| 24 | * contributors may be used to endorse or promote products derived | ||
| 25 | * from this software without specific prior written permission. | ||
| 26 | * | ||
| 27 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED | ||
| 28 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | ||
| 29 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
| 30 | * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | ||
| 31 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
| 32 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
| 33 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR | ||
| 34 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
| 35 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
| 36 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
| 37 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 38 | */ | ||
| 39 | #ifndef __PNFS_OSD_XDR_H__ | ||
| 40 | #define __PNFS_OSD_XDR_H__ | ||
| 41 | |||
| 42 | #include <linux/nfs_fs.h> | ||
| 43 | #include <linux/nfs_page.h> | ||
| 44 | #include <scsi/osd_protocol.h> | ||
| 45 | |||
| 46 | #define PNFS_OSD_OSDNAME_MAXSIZE 256 | ||
| 47 | |||
| 48 | /* | ||
| 49 | * draft-ietf-nfsv4-minorversion-22 | ||
| 50 | * draft-ietf-nfsv4-pnfs-obj-12 | ||
| 51 | */ | ||
| 52 | |||
| 53 | /* Layout Structure */ | ||
| 54 | |||
| 55 | enum pnfs_osd_raid_algorithm4 { | ||
| 56 | PNFS_OSD_RAID_0 = 1, | ||
| 57 | PNFS_OSD_RAID_4 = 2, | ||
| 58 | PNFS_OSD_RAID_5 = 3, | ||
| 59 | PNFS_OSD_RAID_PQ = 4 /* Reed-Solomon P+Q */ | ||
| 60 | }; | ||
| 61 | |||
| 62 | /* struct pnfs_osd_data_map4 { | ||
| 63 | * uint32_t odm_num_comps; | ||
| 64 | * length4 odm_stripe_unit; | ||
| 65 | * uint32_t odm_group_width; | ||
| 66 | * uint32_t odm_group_depth; | ||
| 67 | * uint32_t odm_mirror_cnt; | ||
| 68 | * pnfs_osd_raid_algorithm4 odm_raid_algorithm; | ||
| 69 | * }; | ||
| 70 | */ | ||
| 71 | struct pnfs_osd_data_map { | ||
| 72 | u32 odm_num_comps; | ||
| 73 | u64 odm_stripe_unit; | ||
| 74 | u32 odm_group_width; | ||
| 75 | u32 odm_group_depth; | ||
| 76 | u32 odm_mirror_cnt; | ||
| 77 | u32 odm_raid_algorithm; | ||
| 78 | }; | ||
| 79 | |||
| 80 | /* struct pnfs_osd_objid4 { | ||
| 81 | * deviceid4 oid_device_id; | ||
| 82 | * uint64_t oid_partition_id; | ||
| 83 | * uint64_t oid_object_id; | ||
| 84 | * }; | ||
| 85 | */ | ||
| 86 | struct pnfs_osd_objid { | ||
| 87 | struct nfs4_deviceid oid_device_id; | ||
| 88 | u64 oid_partition_id; | ||
| 89 | u64 oid_object_id; | ||
| 90 | }; | ||
| 91 | |||
| 92 | /* For printout. I use: | ||
| 93 | * kprint("dev(%llx:%llx)", _DEVID_LO(pointer), _DEVID_HI(pointer)); | ||
| 94 | * BE style | ||
| 95 | */ | ||
| 96 | #define _DEVID_LO(oid_device_id) \ | ||
| 97 | (unsigned long long)be64_to_cpup((__be64 *)(oid_device_id)->data) | ||
| 98 | |||
| 99 | #define _DEVID_HI(oid_device_id) \ | ||
| 100 | (unsigned long long)be64_to_cpup(((__be64 *)(oid_device_id)->data) + 1) | ||
| 101 | |||
| 102 | static inline int | ||
| 103 | pnfs_osd_objid_xdr_sz(void) | ||
| 104 | { | ||
| 105 | return (NFS4_DEVICEID4_SIZE / 4) + 2 + 2; | ||
| 106 | } | ||
| 107 | |||
| 108 | enum pnfs_osd_version { | ||
| 109 | PNFS_OSD_MISSING = 0, | ||
| 110 | PNFS_OSD_VERSION_1 = 1, | ||
| 111 | PNFS_OSD_VERSION_2 = 2 | ||
| 112 | }; | ||
| 113 | |||
| 114 | struct pnfs_osd_opaque_cred { | ||
| 115 | u32 cred_len; | ||
| 116 | void *cred; | ||
| 117 | }; | ||
| 118 | |||
| 119 | enum pnfs_osd_cap_key_sec { | ||
| 120 | PNFS_OSD_CAP_KEY_SEC_NONE = 0, | ||
| 121 | PNFS_OSD_CAP_KEY_SEC_SSV = 1, | ||
| 122 | }; | ||
| 123 | |||
| 124 | /* struct pnfs_osd_object_cred4 { | ||
| 125 | * pnfs_osd_objid4 oc_object_id; | ||
| 126 | * pnfs_osd_version4 oc_osd_version; | ||
| 127 | * pnfs_osd_cap_key_sec4 oc_cap_key_sec; | ||
| 128 | * opaque oc_capability_key<>; | ||
| 129 | * opaque oc_capability<>; | ||
| 130 | * }; | ||
| 131 | */ | ||
| 132 | struct pnfs_osd_object_cred { | ||
| 133 | struct pnfs_osd_objid oc_object_id; | ||
| 134 | u32 oc_osd_version; | ||
| 135 | u32 oc_cap_key_sec; | ||
| 136 | struct pnfs_osd_opaque_cred oc_cap_key; | ||
| 137 | struct pnfs_osd_opaque_cred oc_cap; | ||
| 138 | }; | ||
| 139 | |||
| 140 | /* struct pnfs_osd_layout4 { | ||
| 141 | * pnfs_osd_data_map4 olo_map; | ||
| 142 | * uint32_t olo_comps_index; | ||
| 143 | * pnfs_osd_object_cred4 olo_components<>; | ||
| 144 | * }; | ||
| 145 | */ | ||
| 146 | struct pnfs_osd_layout { | ||
| 147 | struct pnfs_osd_data_map olo_map; | ||
| 148 | u32 olo_comps_index; | ||
| 149 | u32 olo_num_comps; | ||
| 150 | struct pnfs_osd_object_cred *olo_comps; | ||
| 151 | }; | ||
| 152 | |||
| 153 | /* Device Address */ | ||
| 154 | enum pnfs_osd_targetid_type { | ||
| 155 | OBJ_TARGET_ANON = 1, | ||
| 156 | OBJ_TARGET_SCSI_NAME = 2, | ||
| 157 | OBJ_TARGET_SCSI_DEVICE_ID = 3, | ||
| 158 | }; | ||
| 159 | |||
| 160 | /* union pnfs_osd_targetid4 switch (pnfs_osd_targetid_type4 oti_type) { | ||
| 161 | * case OBJ_TARGET_SCSI_NAME: | ||
| 162 | * string oti_scsi_name<>; | ||
| 163 | * | ||
| 164 | * case OBJ_TARGET_SCSI_DEVICE_ID: | ||
| 165 | * opaque oti_scsi_device_id<>; | ||
| 166 | * | ||
| 167 | * default: | ||
| 168 | * void; | ||
| 169 | * }; | ||
| 170 | * | ||
| 171 | * union pnfs_osd_targetaddr4 switch (bool ota_available) { | ||
| 172 | * case TRUE: | ||
| 173 | * netaddr4 ota_netaddr; | ||
| 174 | * case FALSE: | ||
| 175 | * void; | ||
| 176 | * }; | ||
| 177 | * | ||
| 178 | * struct pnfs_osd_deviceaddr4 { | ||
| 179 | * pnfs_osd_targetid4 oda_targetid; | ||
| 180 | * pnfs_osd_targetaddr4 oda_targetaddr; | ||
| 181 | * uint64_t oda_lun; | ||
| 182 | * opaque oda_systemid<>; | ||
| 183 | * pnfs_osd_object_cred4 oda_root_obj_cred; | ||
| 184 | * opaque oda_osdname<>; | ||
| 185 | * }; | ||
| 186 | */ | ||
| 187 | struct pnfs_osd_targetid { | ||
| 188 | u32 oti_type; | ||
| 189 | struct nfs4_string oti_scsi_device_id; | ||
| 190 | }; | ||
| 191 | |||
| 192 | enum { PNFS_OSD_TARGETID_MAX = 1 + PNFS_OSD_OSDNAME_MAXSIZE / 4 }; | ||
| 193 | |||
| 194 | /* struct netaddr4 { | ||
| 195 | * // see struct rpcb in RFC1833 | ||
| 196 | * string r_netid<>; // network id | ||
| 197 | * string r_addr<>; // universal address | ||
| 198 | * }; | ||
| 199 | */ | ||
| 200 | struct pnfs_osd_net_addr { | ||
| 201 | struct nfs4_string r_netid; | ||
| 202 | struct nfs4_string r_addr; | ||
| 203 | }; | ||
| 204 | |||
| 205 | struct pnfs_osd_targetaddr { | ||
| 206 | u32 ota_available; | ||
| 207 | struct pnfs_osd_net_addr ota_netaddr; | ||
| 208 | }; | ||
| 209 | |||
| 210 | enum { | ||
| 211 | NETWORK_ID_MAX = 16 / 4, | ||
| 212 | UNIVERSAL_ADDRESS_MAX = 64 / 4, | ||
| 213 | PNFS_OSD_TARGETADDR_MAX = 3 + NETWORK_ID_MAX + UNIVERSAL_ADDRESS_MAX, | ||
| 214 | }; | ||
| 215 | |||
| 216 | struct pnfs_osd_deviceaddr { | ||
| 217 | struct pnfs_osd_targetid oda_targetid; | ||
| 218 | struct pnfs_osd_targetaddr oda_targetaddr; | ||
| 219 | u8 oda_lun[8]; | ||
| 220 | struct nfs4_string oda_systemid; | ||
| 221 | struct pnfs_osd_object_cred oda_root_obj_cred; | ||
| 222 | struct nfs4_string oda_osdname; | ||
| 223 | }; | ||
| 224 | |||
| 225 | enum { | ||
| 226 | ODA_OSDNAME_MAX = PNFS_OSD_OSDNAME_MAXSIZE / 4, | ||
| 227 | PNFS_OSD_DEVICEADDR_MAX = | ||
| 228 | PNFS_OSD_TARGETID_MAX + PNFS_OSD_TARGETADDR_MAX + | ||
| 229 | 2 /*oda_lun*/ + | ||
| 230 | 1 + OSD_SYSTEMID_LEN + | ||
| 231 | 1 + ODA_OSDNAME_MAX, | ||
| 232 | }; | ||
| 233 | |||
| 234 | /* LAYOUTCOMMIT: layoutupdate */ | ||
| 235 | |||
| 236 | /* union pnfs_osd_deltaspaceused4 switch (bool dsu_valid) { | ||
| 237 | * case TRUE: | ||
| 238 | * int64_t dsu_delta; | ||
| 239 | * case FALSE: | ||
| 240 | * void; | ||
| 241 | * }; | ||
| 242 | * | ||
| 243 | * struct pnfs_osd_layoutupdate4 { | ||
| 244 | * pnfs_osd_deltaspaceused4 olu_delta_space_used; | ||
| 245 | * bool olu_ioerr_flag; | ||
| 246 | * }; | ||
| 247 | */ | ||
| 248 | struct pnfs_osd_layoutupdate { | ||
| 249 | u32 dsu_valid; | ||
| 250 | s64 dsu_delta; | ||
| 251 | u32 olu_ioerr_flag; | ||
| 252 | }; | ||
| 253 | |||
| 254 | /* LAYOUTRETURN: I/O Rrror Report */ | ||
| 255 | |||
| 256 | enum pnfs_osd_errno { | ||
| 257 | PNFS_OSD_ERR_EIO = 1, | ||
| 258 | PNFS_OSD_ERR_NOT_FOUND = 2, | ||
| 259 | PNFS_OSD_ERR_NO_SPACE = 3, | ||
| 260 | PNFS_OSD_ERR_BAD_CRED = 4, | ||
| 261 | PNFS_OSD_ERR_NO_ACCESS = 5, | ||
| 262 | PNFS_OSD_ERR_UNREACHABLE = 6, | ||
| 263 | PNFS_OSD_ERR_RESOURCE = 7 | ||
| 264 | }; | ||
| 265 | |||
| 266 | /* struct pnfs_osd_ioerr4 { | ||
| 267 | * pnfs_osd_objid4 oer_component; | ||
| 268 | * length4 oer_comp_offset; | ||
| 269 | * length4 oer_comp_length; | ||
| 270 | * bool oer_iswrite; | ||
| 271 | * pnfs_osd_errno4 oer_errno; | ||
| 272 | * }; | ||
| 273 | */ | ||
| 274 | struct pnfs_osd_ioerr { | ||
| 275 | struct pnfs_osd_objid oer_component; | ||
| 276 | u64 oer_comp_offset; | ||
| 277 | u64 oer_comp_length; | ||
| 278 | u32 oer_iswrite; | ||
| 279 | u32 oer_errno; | ||
| 280 | }; | ||
| 281 | |||
| 282 | /* OSD XDR API */ | ||
| 283 | /* Layout helpers */ | ||
| 284 | /* Layout decoding is done in two parts: | ||
| 285 | * 1. First Call pnfs_osd_xdr_decode_layout_map to read in only the header part | ||
| 286 | * of the layout. @iter members need not be initialized. | ||
| 287 | * Returned: | ||
| 288 | * @layout members are set. (@layout->olo_comps set to NULL). | ||
| 289 | * | ||
| 290 | * Zero on success, or negative error if passed xdr is broken. | ||
| 291 | * | ||
| 292 | * 2. 2nd Call pnfs_osd_xdr_decode_layout_comp() in a loop until it returns | ||
| 293 | * false, to decode the next component. | ||
| 294 | * Returned: | ||
| 295 | * true if there is more to decode or false if we are done or error. | ||
| 296 | * | ||
| 297 | * Example: | ||
| 298 | * struct pnfs_osd_xdr_decode_layout_iter iter; | ||
| 299 | * struct pnfs_osd_layout layout; | ||
| 300 | * struct pnfs_osd_object_cred comp; | ||
| 301 | * int status; | ||
| 302 | * | ||
| 303 | * status = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr); | ||
| 304 | * if (unlikely(status)) | ||
| 305 | * goto err; | ||
| 306 | * while(pnfs_osd_xdr_decode_layout_comp(&comp, &iter, xdr, &status)) { | ||
| 307 | * // All of @comp strings point to inside the xdr_buffer | ||
| 308 | * // or scrach buffer. Copy them out to user memory eg. | ||
| 309 | * copy_single_comp(dest_comp++, &comp); | ||
| 310 | * } | ||
| 311 | * if (unlikely(status)) | ||
| 312 | * goto err; | ||
| 313 | */ | ||
| 314 | |||
| 315 | struct pnfs_osd_xdr_decode_layout_iter { | ||
| 316 | unsigned total_comps; | ||
| 317 | unsigned decoded_comps; | ||
| 318 | }; | ||
| 319 | |||
| 320 | extern int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout, | ||
| 321 | struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr); | ||
| 322 | |||
| 323 | extern bool pnfs_osd_xdr_decode_layout_comp(struct pnfs_osd_object_cred *comp, | ||
| 324 | struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr, | ||
| 325 | int *err); | ||
| 326 | |||
| 327 | /* Device Info helpers */ | ||
| 328 | |||
| 329 | /* Note: All strings inside @deviceaddr point to space inside @p. | ||
| 330 | * @p should stay valid while @deviceaddr is in use. | ||
| 331 | */ | ||
| 332 | extern void pnfs_osd_xdr_decode_deviceaddr( | ||
| 333 | struct pnfs_osd_deviceaddr *deviceaddr, __be32 *p); | ||
| 334 | |||
| 335 | /* layoutupdate (layout_commit) xdr helpers */ | ||
| 336 | extern int | ||
| 337 | pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr, | ||
| 338 | struct pnfs_osd_layoutupdate *lou); | ||
| 339 | |||
| 340 | /* osd_ioerror encoding/decoding (layout_return) */ | ||
| 341 | /* Client */ | ||
| 342 | extern __be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr); | ||
| 343 | extern void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr); | ||
| 344 | |||
| 345 | #endif /* __PNFS_OSD_XDR_H__ */ | ||
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index fc84b7a19ca3..a20970ef9e4e 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h | |||
| @@ -216,6 +216,8 @@ extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); | |||
| 216 | extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, | 216 | extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, |
| 217 | unsigned int base, unsigned int len); | 217 | unsigned int base, unsigned int len); |
| 218 | extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); | 218 | extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); |
| 219 | extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, | ||
| 220 | struct page **pages, unsigned int len); | ||
| 219 | extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen); | 221 | extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen); |
| 220 | extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); | 222 | extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); |
| 221 | extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len); | 223 | extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len); |
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 679cd674b81d..f008c14ad34c 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c | |||
| @@ -638,6 +638,25 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) | |||
| 638 | } | 638 | } |
| 639 | EXPORT_SYMBOL_GPL(xdr_init_decode); | 639 | EXPORT_SYMBOL_GPL(xdr_init_decode); |
| 640 | 640 | ||
| 641 | /** | ||
| 642 | * xdr_init_decode - Initialize an xdr_stream for decoding data. | ||
| 643 | * @xdr: pointer to xdr_stream struct | ||
| 644 | * @buf: pointer to XDR buffer from which to decode data | ||
| 645 | * @pages: list of pages to decode into | ||
| 646 | * @len: length in bytes of buffer in pages | ||
| 647 | */ | ||
| 648 | void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, | ||
| 649 | struct page **pages, unsigned int len) | ||
| 650 | { | ||
| 651 | memset(buf, 0, sizeof(*buf)); | ||
| 652 | buf->pages = pages; | ||
| 653 | buf->page_len = len; | ||
| 654 | buf->buflen = len; | ||
| 655 | buf->len = len; | ||
| 656 | xdr_init_decode(xdr, buf, NULL); | ||
| 657 | } | ||
| 658 | EXPORT_SYMBOL_GPL(xdr_init_decode_pages); | ||
| 659 | |||
| 641 | static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) | 660 | static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) |
| 642 | { | 661 | { |
| 643 | __be32 *p = xdr->p; | 662 | __be32 *p = xdr->p; |
