diff options
author | Long Li <longli@microsoft.com> | 2018-10-31 18:13:09 -0400 |
---|---|---|
committer | Steve French <stfrench@microsoft.com> | 2018-11-02 15:09:41 -0400 |
commit | 6e6e2b86c29c6fcfa16ad9fdc7ea32027bea5d73 (patch) | |
tree | 1c1c63c0f8191fe88291fe6de8acd341e68bc608 | |
parent | 0df444a00f32a3ab4d37c3c101bb960ee38a9617 (diff) |
CIFS: Add support for direct I/O read
With direct I/O read, we transfer the data directly from transport layer to
the user data buffer.
Change in v3: add support for kernel AIO
Change in v4:
Refactor common read code to __cifs_readv for direct and non-direct I/O.
Retry on direct I/O failure.
Signed-off-by: Long Li <longli@microsoft.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
-rw-r--r-- | fs/cifs/cifsfs.h | 1 | ||||
-rw-r--r-- | fs/cifs/cifsglob.h | 5 | ||||
-rw-r--r-- | fs/cifs/file.c | 225 |
3 files changed, 192 insertions, 39 deletions
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 24e265a51874..3abea1a3f20c 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h | |||
@@ -101,6 +101,7 @@ extern int cifs_open(struct inode *inode, struct file *file); | |||
101 | extern int cifs_close(struct inode *inode, struct file *file); | 101 | extern int cifs_close(struct inode *inode, struct file *file); |
102 | extern int cifs_closedir(struct inode *inode, struct file *file); | 102 | extern int cifs_closedir(struct inode *inode, struct file *file); |
103 | extern ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to); | 103 | extern ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to); |
104 | extern ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to); | ||
104 | extern ssize_t cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to); | 105 | extern ssize_t cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to); |
105 | extern ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from); | 106 | extern ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from); |
106 | extern ssize_t cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from); | 107 | extern ssize_t cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from); |
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index d7c0443d47a4..38ab0fca49e1 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h | |||
@@ -1186,6 +1186,11 @@ struct cifs_aio_ctx { | |||
1186 | unsigned int len; | 1186 | unsigned int len; |
1187 | unsigned int total_len; | 1187 | unsigned int total_len; |
1188 | bool should_dirty; | 1188 | bool should_dirty; |
1189 | /* | ||
1190 | * Indicates if this aio_ctx is for direct_io, | ||
1191 | * If yes, iter is a copy of the user passed iov_iter | ||
1192 | */ | ||
1193 | bool direct_io; | ||
1189 | }; | 1194 | }; |
1190 | 1195 | ||
1191 | struct cifs_readdata; | 1196 | struct cifs_readdata; |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 542d8828e1d0..1be36076e960 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -2979,7 +2979,6 @@ cifs_uncached_readdata_release(struct kref *refcount) | |||
2979 | kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release); | 2979 | kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release); |
2980 | for (i = 0; i < rdata->nr_pages; i++) { | 2980 | for (i = 0; i < rdata->nr_pages; i++) { |
2981 | put_page(rdata->pages[i]); | 2981 | put_page(rdata->pages[i]); |
2982 | rdata->pages[i] = NULL; | ||
2983 | } | 2982 | } |
2984 | cifs_readdata_release(refcount); | 2983 | cifs_readdata_release(refcount); |
2985 | } | 2984 | } |
@@ -3106,6 +3105,67 @@ cifs_uncached_copy_into_pages(struct TCP_Server_Info *server, | |||
3106 | return uncached_fill_pages(server, rdata, iter, iter->count); | 3105 | return uncached_fill_pages(server, rdata, iter, iter->count); |
3107 | } | 3106 | } |
3108 | 3107 | ||
3108 | static int cifs_resend_rdata(struct cifs_readdata *rdata, | ||
3109 | struct list_head *rdata_list, | ||
3110 | struct cifs_aio_ctx *ctx) | ||
3111 | { | ||
3112 | int wait_retry = 0; | ||
3113 | unsigned int rsize, credits; | ||
3114 | int rc; | ||
3115 | struct TCP_Server_Info *server = | ||
3116 | tlink_tcon(rdata->cfile->tlink)->ses->server; | ||
3117 | |||
3118 | /* | ||
3119 | * Try to resend this rdata, waiting for credits up to 3 seconds. | ||
3120 | * Note: we are attempting to resend the whole rdata not in segments | ||
3121 | */ | ||
3122 | do { | ||
3123 | rc = server->ops->wait_mtu_credits(server, rdata->bytes, | ||
3124 | &rsize, &credits); | ||
3125 | |||
3126 | if (rc) | ||
3127 | break; | ||
3128 | |||
3129 | if (rsize < rdata->bytes) { | ||
3130 | add_credits_and_wake_if(server, credits, 0); | ||
3131 | msleep(1000); | ||
3132 | wait_retry++; | ||
3133 | } | ||
3134 | } while (rsize < rdata->bytes && wait_retry < 3); | ||
3135 | |||
3136 | /* | ||
3137 | * If we can't find enough credits to send this rdata | ||
3138 | * release the rdata and return failure, this will pass | ||
3139 | * whatever I/O amount we have finished to VFS. | ||
3140 | */ | ||
3141 | if (rsize < rdata->bytes) { | ||
3142 | rc = -EBUSY; | ||
3143 | goto out; | ||
3144 | } | ||
3145 | |||
3146 | rc = -EAGAIN; | ||
3147 | while (rc == -EAGAIN) { | ||
3148 | rc = 0; | ||
3149 | if (rdata->cfile->invalidHandle) | ||
3150 | rc = cifs_reopen_file(rdata->cfile, true); | ||
3151 | if (!rc) | ||
3152 | rc = server->ops->async_readv(rdata); | ||
3153 | } | ||
3154 | |||
3155 | if (!rc) { | ||
3156 | /* Add to aio pending list */ | ||
3157 | list_add_tail(&rdata->list, rdata_list); | ||
3158 | return 0; | ||
3159 | } | ||
3160 | |||
3161 | add_credits_and_wake_if(server, rdata->credits, 0); | ||
3162 | out: | ||
3163 | kref_put(&rdata->refcount, | ||
3164 | cifs_uncached_readdata_release); | ||
3165 | |||
3166 | return rc; | ||
3167 | } | ||
3168 | |||
3109 | static int | 3169 | static int |
3110 | cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, | 3170 | cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, |
3111 | struct cifs_sb_info *cifs_sb, struct list_head *rdata_list, | 3171 | struct cifs_sb_info *cifs_sb, struct list_head *rdata_list, |
@@ -3117,6 +3177,9 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, | |||
3117 | int rc; | 3177 | int rc; |
3118 | pid_t pid; | 3178 | pid_t pid; |
3119 | struct TCP_Server_Info *server; | 3179 | struct TCP_Server_Info *server; |
3180 | struct page **pagevec; | ||
3181 | size_t start; | ||
3182 | struct iov_iter direct_iov = ctx->iter; | ||
3120 | 3183 | ||
3121 | server = tlink_tcon(open_file->tlink)->ses->server; | 3184 | server = tlink_tcon(open_file->tlink)->ses->server; |
3122 | 3185 | ||
@@ -3125,6 +3188,9 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, | |||
3125 | else | 3188 | else |
3126 | pid = current->tgid; | 3189 | pid = current->tgid; |
3127 | 3190 | ||
3191 | if (ctx->direct_io) | ||
3192 | iov_iter_advance(&direct_iov, offset - ctx->pos); | ||
3193 | |||
3128 | do { | 3194 | do { |
3129 | rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize, | 3195 | rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize, |
3130 | &rsize, &credits); | 3196 | &rsize, &credits); |
@@ -3132,20 +3198,57 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, | |||
3132 | break; | 3198 | break; |
3133 | 3199 | ||
3134 | cur_len = min_t(const size_t, len, rsize); | 3200 | cur_len = min_t(const size_t, len, rsize); |
3135 | npages = DIV_ROUND_UP(cur_len, PAGE_SIZE); | ||
3136 | 3201 | ||
3137 | /* allocate a readdata struct */ | 3202 | if (ctx->direct_io) { |
3138 | rdata = cifs_readdata_alloc(npages, | 3203 | |
3204 | cur_len = iov_iter_get_pages_alloc( | ||
3205 | &direct_iov, &pagevec, | ||
3206 | cur_len, &start); | ||
3207 | if (cur_len < 0) { | ||
3208 | cifs_dbg(VFS, | ||
3209 | "couldn't get user pages (cur_len=%zd)" | ||
3210 | " iter type %d" | ||
3211 | " iov_offset %zd count %zd\n", | ||
3212 | cur_len, direct_iov.type, | ||
3213 | direct_iov.iov_offset, | ||
3214 | direct_iov.count); | ||
3215 | dump_stack(); | ||
3216 | break; | ||
3217 | } | ||
3218 | iov_iter_advance(&direct_iov, cur_len); | ||
3219 | |||
3220 | rdata = cifs_readdata_direct_alloc( | ||
3221 | pagevec, cifs_uncached_readv_complete); | ||
3222 | if (!rdata) { | ||
3223 | add_credits_and_wake_if(server, credits, 0); | ||
3224 | rc = -ENOMEM; | ||
3225 | break; | ||
3226 | } | ||
3227 | |||
3228 | npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE; | ||
3229 | rdata->page_offset = start; | ||
3230 | rdata->tailsz = npages > 1 ? | ||
3231 | cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE : | ||
3232 | cur_len; | ||
3233 | |||
3234 | } else { | ||
3235 | |||
3236 | npages = DIV_ROUND_UP(cur_len, PAGE_SIZE); | ||
3237 | /* allocate a readdata struct */ | ||
3238 | rdata = cifs_readdata_alloc(npages, | ||
3139 | cifs_uncached_readv_complete); | 3239 | cifs_uncached_readv_complete); |
3140 | if (!rdata) { | 3240 | if (!rdata) { |
3141 | add_credits_and_wake_if(server, credits, 0); | 3241 | add_credits_and_wake_if(server, credits, 0); |
3142 | rc = -ENOMEM; | 3242 | rc = -ENOMEM; |
3143 | break; | 3243 | break; |
3144 | } | 3244 | } |
3145 | 3245 | ||
3146 | rc = cifs_read_allocate_pages(rdata, npages); | 3246 | rc = cifs_read_allocate_pages(rdata, npages); |
3147 | if (rc) | 3247 | if (rc) |
3148 | goto error; | 3248 | goto error; |
3249 | |||
3250 | rdata->tailsz = PAGE_SIZE; | ||
3251 | } | ||
3149 | 3252 | ||
3150 | rdata->cfile = cifsFileInfo_get(open_file); | 3253 | rdata->cfile = cifsFileInfo_get(open_file); |
3151 | rdata->nr_pages = npages; | 3254 | rdata->nr_pages = npages; |
@@ -3153,7 +3256,6 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, | |||
3153 | rdata->bytes = cur_len; | 3256 | rdata->bytes = cur_len; |
3154 | rdata->pid = pid; | 3257 | rdata->pid = pid; |
3155 | rdata->pagesz = PAGE_SIZE; | 3258 | rdata->pagesz = PAGE_SIZE; |
3156 | rdata->tailsz = PAGE_SIZE; | ||
3157 | rdata->read_into_pages = cifs_uncached_read_into_pages; | 3259 | rdata->read_into_pages = cifs_uncached_read_into_pages; |
3158 | rdata->copy_into_pages = cifs_uncached_copy_into_pages; | 3260 | rdata->copy_into_pages = cifs_uncached_copy_into_pages; |
3159 | rdata->credits = credits; | 3261 | rdata->credits = credits; |
@@ -3167,9 +3269,11 @@ error: | |||
3167 | if (rc) { | 3269 | if (rc) { |
3168 | add_credits_and_wake_if(server, rdata->credits, 0); | 3270 | add_credits_and_wake_if(server, rdata->credits, 0); |
3169 | kref_put(&rdata->refcount, | 3271 | kref_put(&rdata->refcount, |
3170 | cifs_uncached_readdata_release); | 3272 | cifs_uncached_readdata_release); |
3171 | if (rc == -EAGAIN) | 3273 | if (rc == -EAGAIN) { |
3274 | iov_iter_revert(&direct_iov, cur_len); | ||
3172 | continue; | 3275 | continue; |
3276 | } | ||
3173 | break; | 3277 | break; |
3174 | } | 3278 | } |
3175 | 3279 | ||
@@ -3225,45 +3329,62 @@ again: | |||
3225 | * reading. | 3329 | * reading. |
3226 | */ | 3330 | */ |
3227 | if (got_bytes && got_bytes < rdata->bytes) { | 3331 | if (got_bytes && got_bytes < rdata->bytes) { |
3228 | rc = cifs_readdata_to_iov(rdata, to); | 3332 | rc = 0; |
3333 | if (!ctx->direct_io) | ||
3334 | rc = cifs_readdata_to_iov(rdata, to); | ||
3229 | if (rc) { | 3335 | if (rc) { |
3230 | kref_put(&rdata->refcount, | 3336 | kref_put(&rdata->refcount, |
3231 | cifs_uncached_readdata_release); | 3337 | cifs_uncached_readdata_release); |
3232 | continue; | 3338 | continue; |
3233 | } | 3339 | } |
3234 | } | 3340 | } |
3235 | 3341 | ||
3236 | rc = cifs_send_async_read( | 3342 | if (ctx->direct_io) { |
3343 | /* | ||
3344 | * Re-use rdata as this is a | ||
3345 | * direct I/O | ||
3346 | */ | ||
3347 | rc = cifs_resend_rdata( | ||
3348 | rdata, | ||
3349 | &tmp_list, ctx); | ||
3350 | } else { | ||
3351 | rc = cifs_send_async_read( | ||
3237 | rdata->offset + got_bytes, | 3352 | rdata->offset + got_bytes, |
3238 | rdata->bytes - got_bytes, | 3353 | rdata->bytes - got_bytes, |
3239 | rdata->cfile, cifs_sb, | 3354 | rdata->cfile, cifs_sb, |
3240 | &tmp_list, ctx); | 3355 | &tmp_list, ctx); |
3241 | 3356 | ||
3357 | kref_put(&rdata->refcount, | ||
3358 | cifs_uncached_readdata_release); | ||
3359 | } | ||
3360 | |||
3242 | list_splice(&tmp_list, &ctx->list); | 3361 | list_splice(&tmp_list, &ctx->list); |
3243 | 3362 | ||
3244 | kref_put(&rdata->refcount, | ||
3245 | cifs_uncached_readdata_release); | ||
3246 | goto again; | 3363 | goto again; |
3247 | } else if (rdata->result) | 3364 | } else if (rdata->result) |
3248 | rc = rdata->result; | 3365 | rc = rdata->result; |
3249 | else | 3366 | else if (!ctx->direct_io) |
3250 | rc = cifs_readdata_to_iov(rdata, to); | 3367 | rc = cifs_readdata_to_iov(rdata, to); |
3251 | 3368 | ||
3252 | /* if there was a short read -- discard anything left */ | 3369 | /* if there was a short read -- discard anything left */ |
3253 | if (rdata->got_bytes && rdata->got_bytes < rdata->bytes) | 3370 | if (rdata->got_bytes && rdata->got_bytes < rdata->bytes) |
3254 | rc = -ENODATA; | 3371 | rc = -ENODATA; |
3372 | |||
3373 | ctx->total_len += rdata->got_bytes; | ||
3255 | } | 3374 | } |
3256 | list_del_init(&rdata->list); | 3375 | list_del_init(&rdata->list); |
3257 | kref_put(&rdata->refcount, cifs_uncached_readdata_release); | 3376 | kref_put(&rdata->refcount, cifs_uncached_readdata_release); |
3258 | } | 3377 | } |
3259 | 3378 | ||
3260 | for (i = 0; i < ctx->npages; i++) { | 3379 | if (!ctx->direct_io) { |
3261 | if (ctx->should_dirty) | 3380 | for (i = 0; i < ctx->npages; i++) { |
3262 | set_page_dirty(ctx->bv[i].bv_page); | 3381 | if (ctx->should_dirty) |
3263 | put_page(ctx->bv[i].bv_page); | 3382 | set_page_dirty(ctx->bv[i].bv_page); |
3264 | } | 3383 | put_page(ctx->bv[i].bv_page); |
3384 | } | ||
3265 | 3385 | ||
3266 | ctx->total_len = ctx->len - iov_iter_count(to); | 3386 | ctx->total_len = ctx->len - iov_iter_count(to); |
3387 | } | ||
3267 | 3388 | ||
3268 | cifs_stats_bytes_read(tcon, ctx->total_len); | 3389 | cifs_stats_bytes_read(tcon, ctx->total_len); |
3269 | 3390 | ||
@@ -3281,18 +3402,28 @@ again: | |||
3281 | complete(&ctx->done); | 3402 | complete(&ctx->done); |
3282 | } | 3403 | } |
3283 | 3404 | ||
3284 | ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) | 3405 | static ssize_t __cifs_readv( |
3406 | struct kiocb *iocb, struct iov_iter *to, bool direct) | ||
3285 | { | 3407 | { |
3286 | struct file *file = iocb->ki_filp; | ||
3287 | ssize_t rc; | ||
3288 | size_t len; | 3408 | size_t len; |
3289 | ssize_t total_read = 0; | 3409 | struct file *file = iocb->ki_filp; |
3290 | loff_t offset = iocb->ki_pos; | ||
3291 | struct cifs_sb_info *cifs_sb; | 3410 | struct cifs_sb_info *cifs_sb; |
3292 | struct cifs_tcon *tcon; | ||
3293 | struct cifsFileInfo *cfile; | 3411 | struct cifsFileInfo *cfile; |
3412 | struct cifs_tcon *tcon; | ||
3413 | ssize_t rc, total_read = 0; | ||
3414 | loff_t offset = iocb->ki_pos; | ||
3294 | struct cifs_aio_ctx *ctx; | 3415 | struct cifs_aio_ctx *ctx; |
3295 | 3416 | ||
3417 | /* | ||
3418 | * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC, | ||
3419 | * fall back to data copy read path | ||
3420 | * this could be improved by getting pages directly in ITER_KVEC | ||
3421 | */ | ||
3422 | if (direct && to->type & ITER_KVEC) { | ||
3423 | cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n"); | ||
3424 | direct = false; | ||
3425 | } | ||
3426 | |||
3296 | len = iov_iter_count(to); | 3427 | len = iov_iter_count(to); |
3297 | if (!len) | 3428 | if (!len) |
3298 | return 0; | 3429 | return 0; |
@@ -3319,14 +3450,20 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) | |||
3319 | if (iter_is_iovec(to)) | 3450 | if (iter_is_iovec(to)) |
3320 | ctx->should_dirty = true; | 3451 | ctx->should_dirty = true; |
3321 | 3452 | ||
3322 | rc = setup_aio_ctx_iter(ctx, to, READ); | 3453 | if (direct) { |
3323 | if (rc) { | 3454 | ctx->pos = offset; |
3324 | kref_put(&ctx->refcount, cifs_aio_ctx_release); | 3455 | ctx->direct_io = true; |
3325 | return rc; | 3456 | ctx->iter = *to; |
3457 | ctx->len = len; | ||
3458 | } else { | ||
3459 | rc = setup_aio_ctx_iter(ctx, to, READ); | ||
3460 | if (rc) { | ||
3461 | kref_put(&ctx->refcount, cifs_aio_ctx_release); | ||
3462 | return rc; | ||
3463 | } | ||
3464 | len = ctx->len; | ||
3326 | } | 3465 | } |
3327 | 3466 | ||
3328 | len = ctx->len; | ||
3329 | |||
3330 | /* grab a lock here due to read response handlers can access ctx */ | 3467 | /* grab a lock here due to read response handlers can access ctx */ |
3331 | mutex_lock(&ctx->aio_mutex); | 3468 | mutex_lock(&ctx->aio_mutex); |
3332 | 3469 | ||
@@ -3368,6 +3505,16 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) | |||
3368 | return rc; | 3505 | return rc; |
3369 | } | 3506 | } |
3370 | 3507 | ||
3508 | ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to) | ||
3509 | { | ||
3510 | return __cifs_readv(iocb, to, true); | ||
3511 | } | ||
3512 | |||
3513 | ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) | ||
3514 | { | ||
3515 | return __cifs_readv(iocb, to, false); | ||
3516 | } | ||
3517 | |||
3371 | ssize_t | 3518 | ssize_t |
3372 | cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to) | 3519 | cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to) |
3373 | { | 3520 | { |