diff options
Diffstat (limited to 'fs/nfs/pnfs.c')
-rw-r--r-- | fs/nfs/pnfs.c | 526 |
1 files changed, 354 insertions, 172 deletions
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index db773428f95f..1b1bc1a0fb0a 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -177,105 +177,149 @@ EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); | |||
177 | * pNFS client layout cache | 177 | * pNFS client layout cache |
178 | */ | 178 | */ |
179 | 179 | ||
180 | /* Need to hold i_lock if caller does not already hold reference */ | ||
181 | void | ||
182 | get_layout_hdr(struct pnfs_layout_hdr *lo) | ||
183 | { | ||
184 | atomic_inc(&lo->plh_refcount); | ||
185 | } | ||
186 | |||
180 | static void | 187 | static void |
181 | get_layout_hdr_locked(struct pnfs_layout_hdr *lo) | 188 | destroy_layout_hdr(struct pnfs_layout_hdr *lo) |
182 | { | 189 | { |
183 | assert_spin_locked(&lo->inode->i_lock); | 190 | dprintk("%s: freeing layout cache %p\n", __func__, lo); |
184 | lo->refcount++; | 191 | BUG_ON(!list_empty(&lo->plh_layouts)); |
192 | NFS_I(lo->plh_inode)->layout = NULL; | ||
193 | kfree(lo); | ||
185 | } | 194 | } |
186 | 195 | ||
187 | static void | 196 | static void |
188 | put_layout_hdr_locked(struct pnfs_layout_hdr *lo) | 197 | put_layout_hdr_locked(struct pnfs_layout_hdr *lo) |
189 | { | 198 | { |
190 | assert_spin_locked(&lo->inode->i_lock); | 199 | if (atomic_dec_and_test(&lo->plh_refcount)) |
191 | BUG_ON(lo->refcount == 0); | 200 | destroy_layout_hdr(lo); |
192 | |||
193 | lo->refcount--; | ||
194 | if (!lo->refcount) { | ||
195 | dprintk("%s: freeing layout cache %p\n", __func__, lo); | ||
196 | BUG_ON(!list_empty(&lo->layouts)); | ||
197 | NFS_I(lo->inode)->layout = NULL; | ||
198 | kfree(lo); | ||
199 | } | ||
200 | } | 201 | } |
201 | 202 | ||
202 | void | 203 | void |
203 | put_layout_hdr(struct inode *inode) | 204 | put_layout_hdr(struct pnfs_layout_hdr *lo) |
204 | { | 205 | { |
205 | spin_lock(&inode->i_lock); | 206 | struct inode *inode = lo->plh_inode; |
206 | put_layout_hdr_locked(NFS_I(inode)->layout); | 207 | |
207 | spin_unlock(&inode->i_lock); | 208 | if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { |
209 | destroy_layout_hdr(lo); | ||
210 | spin_unlock(&inode->i_lock); | ||
211 | } | ||
208 | } | 212 | } |
209 | 213 | ||
210 | static void | 214 | static void |
211 | init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) | 215 | init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) |
212 | { | 216 | { |
213 | INIT_LIST_HEAD(&lseg->fi_list); | 217 | INIT_LIST_HEAD(&lseg->pls_list); |
214 | kref_init(&lseg->kref); | 218 | atomic_set(&lseg->pls_refcount, 1); |
215 | lseg->layout = lo; | 219 | smp_mb(); |
220 | set_bit(NFS_LSEG_VALID, &lseg->pls_flags); | ||
221 | lseg->pls_layout = lo; | ||
216 | } | 222 | } |
217 | 223 | ||
218 | /* Called without i_lock held, as the free_lseg call may sleep */ | 224 | static void free_lseg(struct pnfs_layout_segment *lseg) |
219 | static void | ||
220 | destroy_lseg(struct kref *kref) | ||
221 | { | 225 | { |
222 | struct pnfs_layout_segment *lseg = | 226 | struct inode *ino = lseg->pls_layout->plh_inode; |
223 | container_of(kref, struct pnfs_layout_segment, kref); | ||
224 | struct inode *ino = lseg->layout->inode; | ||
225 | 227 | ||
226 | dprintk("--> %s\n", __func__); | ||
227 | NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); | 228 | NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); |
228 | /* Matched by get_layout_hdr_locked in pnfs_insert_layout */ | 229 | /* Matched by get_layout_hdr in pnfs_insert_layout */ |
229 | put_layout_hdr(ino); | 230 | put_layout_hdr(NFS_I(ino)->layout); |
230 | } | 231 | } |
231 | 232 | ||
232 | static void | 233 | /* The use of tmp_list is necessary because pnfs_curr_ld->free_lseg |
233 | put_lseg(struct pnfs_layout_segment *lseg) | 234 | * could sleep, so must be called outside of the lock. |
235 | * Returns 1 if object was removed, otherwise return 0. | ||
236 | */ | ||
237 | static int | ||
238 | put_lseg_locked(struct pnfs_layout_segment *lseg, | ||
239 | struct list_head *tmp_list) | ||
240 | { | ||
241 | dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, | ||
242 | atomic_read(&lseg->pls_refcount), | ||
243 | test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); | ||
244 | if (atomic_dec_and_test(&lseg->pls_refcount)) { | ||
245 | struct inode *ino = lseg->pls_layout->plh_inode; | ||
246 | |||
247 | BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); | ||
248 | list_del(&lseg->pls_list); | ||
249 | if (list_empty(&lseg->pls_layout->plh_segs)) { | ||
250 | struct nfs_client *clp; | ||
251 | |||
252 | clp = NFS_SERVER(ino)->nfs_client; | ||
253 | spin_lock(&clp->cl_lock); | ||
254 | /* List does not take a reference, so no need for put here */ | ||
255 | list_del_init(&lseg->pls_layout->plh_layouts); | ||
256 | spin_unlock(&clp->cl_lock); | ||
257 | clear_bit(NFS_LAYOUT_BULK_RECALL, &lseg->pls_layout->plh_flags); | ||
258 | } | ||
259 | rpc_wake_up(&NFS_SERVER(ino)->roc_rpcwaitq); | ||
260 | list_add(&lseg->pls_list, tmp_list); | ||
261 | return 1; | ||
262 | } | ||
263 | return 0; | ||
264 | } | ||
265 | |||
266 | static bool | ||
267 | should_free_lseg(u32 lseg_iomode, u32 recall_iomode) | ||
234 | { | 268 | { |
235 | if (!lseg) | 269 | return (recall_iomode == IOMODE_ANY || |
236 | return; | 270 | lseg_iomode == recall_iomode); |
271 | } | ||
237 | 272 | ||
238 | dprintk("%s: lseg %p ref %d\n", __func__, lseg, | 273 | /* Returns 1 if lseg is removed from list, 0 otherwise */ |
239 | atomic_read(&lseg->kref.refcount)); | 274 | static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, |
240 | kref_put(&lseg->kref, destroy_lseg); | 275 | struct list_head *tmp_list) |
276 | { | ||
277 | int rv = 0; | ||
278 | |||
279 | if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) { | ||
280 | /* Remove the reference keeping the lseg in the | ||
281 | * list. It will now be removed when all | ||
282 | * outstanding io is finished. | ||
283 | */ | ||
284 | rv = put_lseg_locked(lseg, tmp_list); | ||
285 | } | ||
286 | return rv; | ||
241 | } | 287 | } |
242 | 288 | ||
243 | static void | 289 | /* Returns count of number of matching invalid lsegs remaining in list |
244 | pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list) | 290 | * after call. |
291 | */ | ||
292 | int | ||
293 | mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, | ||
294 | struct list_head *tmp_list, | ||
295 | u32 iomode) | ||
245 | { | 296 | { |
246 | struct pnfs_layout_segment *lseg, *next; | 297 | struct pnfs_layout_segment *lseg, *next; |
247 | struct nfs_client *clp; | 298 | int invalid = 0, removed = 0; |
248 | 299 | ||
249 | dprintk("%s:Begin lo %p\n", __func__, lo); | 300 | dprintk("%s:Begin lo %p\n", __func__, lo); |
250 | 301 | ||
251 | assert_spin_locked(&lo->inode->i_lock); | 302 | list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) |
252 | list_for_each_entry_safe(lseg, next, &lo->segs, fi_list) { | 303 | if (should_free_lseg(lseg->pls_range.iomode, iomode)) { |
253 | dprintk("%s: freeing lseg %p\n", __func__, lseg); | 304 | dprintk("%s: freeing lseg %p iomode %d " |
254 | list_move(&lseg->fi_list, tmp_list); | 305 | "offset %llu length %llu\n", __func__, |
255 | } | 306 | lseg, lseg->pls_range.iomode, lseg->pls_range.offset, |
256 | clp = NFS_SERVER(lo->inode)->nfs_client; | 307 | lseg->pls_range.length); |
257 | spin_lock(&clp->cl_lock); | 308 | invalid++; |
258 | /* List does not take a reference, so no need for put here */ | 309 | removed += mark_lseg_invalid(lseg, tmp_list); |
259 | list_del_init(&lo->layouts); | 310 | } |
260 | spin_unlock(&clp->cl_lock); | 311 | dprintk("%s:Return %i\n", __func__, invalid - removed); |
261 | write_seqlock(&lo->seqlock); | 312 | return invalid - removed; |
262 | clear_bit(NFS_LAYOUT_STATEID_SET, &lo->state); | ||
263 | write_sequnlock(&lo->seqlock); | ||
264 | |||
265 | dprintk("%s:Return\n", __func__); | ||
266 | } | 313 | } |
267 | 314 | ||
268 | static void | 315 | void |
269 | pnfs_free_lseg_list(struct list_head *tmp_list) | 316 | pnfs_free_lseg_list(struct list_head *free_me) |
270 | { | 317 | { |
271 | struct pnfs_layout_segment *lseg; | 318 | struct pnfs_layout_segment *lseg, *tmp; |
272 | 319 | ||
273 | while (!list_empty(tmp_list)) { | 320 | list_for_each_entry_safe(lseg, tmp, free_me, pls_list) { |
274 | lseg = list_entry(tmp_list->next, struct pnfs_layout_segment, | 321 | list_del(&lseg->pls_list); |
275 | fi_list); | 322 | free_lseg(lseg); |
276 | dprintk("%s calling put_lseg on %p\n", __func__, lseg); | ||
277 | list_del(&lseg->fi_list); | ||
278 | put_lseg(lseg); | ||
279 | } | 323 | } |
280 | } | 324 | } |
281 | 325 | ||
@@ -288,7 +332,8 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) | |||
288 | spin_lock(&nfsi->vfs_inode.i_lock); | 332 | spin_lock(&nfsi->vfs_inode.i_lock); |
289 | lo = nfsi->layout; | 333 | lo = nfsi->layout; |
290 | if (lo) { | 334 | if (lo) { |
291 | pnfs_clear_lseg_list(lo, &tmp_list); | 335 | set_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags); |
336 | mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY); | ||
292 | /* Matched by refcount set to 1 in alloc_init_layout_hdr */ | 337 | /* Matched by refcount set to 1 in alloc_init_layout_hdr */ |
293 | put_layout_hdr_locked(lo); | 338 | put_layout_hdr_locked(lo); |
294 | } | 339 | } |
@@ -312,76 +357,80 @@ pnfs_destroy_all_layouts(struct nfs_client *clp) | |||
312 | 357 | ||
313 | while (!list_empty(&tmp_list)) { | 358 | while (!list_empty(&tmp_list)) { |
314 | lo = list_entry(tmp_list.next, struct pnfs_layout_hdr, | 359 | lo = list_entry(tmp_list.next, struct pnfs_layout_hdr, |
315 | layouts); | 360 | plh_layouts); |
316 | dprintk("%s freeing layout for inode %lu\n", __func__, | 361 | dprintk("%s freeing layout for inode %lu\n", __func__, |
317 | lo->inode->i_ino); | 362 | lo->plh_inode->i_ino); |
318 | pnfs_destroy_layout(NFS_I(lo->inode)); | 363 | pnfs_destroy_layout(NFS_I(lo->plh_inode)); |
319 | } | 364 | } |
320 | } | 365 | } |
321 | 366 | ||
322 | /* update lo->stateid with new if is more recent | 367 | /* update lo->plh_stateid with new if is more recent */ |
323 | * | 368 | void |
324 | * lo->stateid could be the open stateid, in which case we just use what given. | 369 | pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, |
325 | */ | 370 | bool update_barrier) |
326 | static void | 371 | { |
327 | pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, | 372 | u32 oldseq, newseq; |
328 | const nfs4_stateid *new) | 373 | |
329 | { | 374 | oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid); |
330 | nfs4_stateid *old = &lo->stateid; | 375 | newseq = be32_to_cpu(new->stateid.seqid); |
331 | bool overwrite = false; | 376 | if ((int)(newseq - oldseq) > 0) { |
332 | 377 | memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid)); | |
333 | write_seqlock(&lo->seqlock); | 378 | if (update_barrier) { |
334 | if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state) || | 379 | u32 new_barrier = be32_to_cpu(new->stateid.seqid); |
335 | memcmp(old->stateid.other, new->stateid.other, sizeof(new->stateid.other))) | 380 | |
336 | overwrite = true; | 381 | if ((int)(new_barrier - lo->plh_barrier)) |
337 | else { | 382 | lo->plh_barrier = new_barrier; |
338 | u32 oldseq, newseq; | 383 | } else { |
339 | 384 | /* Because of wraparound, we want to keep the barrier | |
340 | oldseq = be32_to_cpu(old->stateid.seqid); | 385 | * "close" to the current seqids. It needs to be |
341 | newseq = be32_to_cpu(new->stateid.seqid); | 386 | * within 2**31 to count as "behind", so if it |
342 | if ((int)(newseq - oldseq) > 0) | 387 | * gets too near that limit, give us a litle leeway |
343 | overwrite = true; | 388 | * and bring it to within 2**30. |
389 | * NOTE - and yes, this is all unsigned arithmetic. | ||
390 | */ | ||
391 | if (unlikely((newseq - lo->plh_barrier) > (3 << 29))) | ||
392 | lo->plh_barrier = newseq - (1 << 30); | ||
393 | } | ||
344 | } | 394 | } |
345 | if (overwrite) | ||
346 | memcpy(&old->stateid, &new->stateid, sizeof(new->stateid)); | ||
347 | write_sequnlock(&lo->seqlock); | ||
348 | } | 395 | } |
349 | 396 | ||
350 | static void | 397 | /* lget is set to 1 if called from inside send_layoutget call chain */ |
351 | pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo, | 398 | static bool |
352 | struct nfs4_state *state) | 399 | pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid, |
400 | int lget) | ||
353 | { | 401 | { |
354 | int seq; | 402 | if ((stateid) && |
355 | 403 | (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0) | |
356 | dprintk("--> %s\n", __func__); | 404 | return true; |
357 | write_seqlock(&lo->seqlock); | 405 | return lo->plh_block_lgets || |
358 | do { | 406 | test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || |
359 | seq = read_seqbegin(&state->seqlock); | 407 | (list_empty(&lo->plh_segs) && |
360 | memcpy(lo->stateid.data, state->stateid.data, | 408 | (atomic_read(&lo->plh_outstanding) > lget)); |
361 | sizeof(state->stateid.data)); | ||
362 | } while (read_seqretry(&state->seqlock, seq)); | ||
363 | set_bit(NFS_LAYOUT_STATEID_SET, &lo->state); | ||
364 | write_sequnlock(&lo->seqlock); | ||
365 | dprintk("<-- %s\n", __func__); | ||
366 | } | 409 | } |
367 | 410 | ||
368 | void | 411 | int |
369 | pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, | 412 | pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, |
370 | struct nfs4_state *open_state) | 413 | struct nfs4_state *open_state) |
371 | { | 414 | { |
372 | int seq; | 415 | int status = 0; |
373 | 416 | ||
374 | dprintk("--> %s\n", __func__); | 417 | dprintk("--> %s\n", __func__); |
375 | do { | 418 | spin_lock(&lo->plh_inode->i_lock); |
376 | seq = read_seqbegin(&lo->seqlock); | 419 | if (pnfs_layoutgets_blocked(lo, NULL, 1)) { |
377 | if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state)) { | 420 | status = -EAGAIN; |
378 | /* This will trigger retry of the read */ | 421 | } else if (list_empty(&lo->plh_segs)) { |
379 | pnfs_layout_from_open_stateid(lo, open_state); | 422 | int seq; |
380 | } else | 423 | |
381 | memcpy(dst->data, lo->stateid.data, | 424 | do { |
382 | sizeof(lo->stateid.data)); | 425 | seq = read_seqbegin(&open_state->seqlock); |
383 | } while (read_seqretry(&lo->seqlock, seq)); | 426 | memcpy(dst->data, open_state->stateid.data, |
427 | sizeof(open_state->stateid.data)); | ||
428 | } while (read_seqretry(&open_state->seqlock, seq)); | ||
429 | } else | ||
430 | memcpy(dst->data, lo->plh_stateid.data, sizeof(lo->plh_stateid.data)); | ||
431 | spin_unlock(&lo->plh_inode->i_lock); | ||
384 | dprintk("<-- %s\n", __func__); | 432 | dprintk("<-- %s\n", __func__); |
433 | return status; | ||
385 | } | 434 | } |
386 | 435 | ||
387 | /* | 436 | /* |
@@ -395,7 +444,7 @@ send_layoutget(struct pnfs_layout_hdr *lo, | |||
395 | struct nfs_open_context *ctx, | 444 | struct nfs_open_context *ctx, |
396 | u32 iomode) | 445 | u32 iomode) |
397 | { | 446 | { |
398 | struct inode *ino = lo->inode; | 447 | struct inode *ino = lo->plh_inode; |
399 | struct nfs_server *server = NFS_SERVER(ino); | 448 | struct nfs_server *server = NFS_SERVER(ino); |
400 | struct nfs4_layoutget *lgp; | 449 | struct nfs4_layoutget *lgp; |
401 | struct pnfs_layout_segment *lseg = NULL; | 450 | struct pnfs_layout_segment *lseg = NULL; |
@@ -404,10 +453,8 @@ send_layoutget(struct pnfs_layout_hdr *lo, | |||
404 | 453 | ||
405 | BUG_ON(ctx == NULL); | 454 | BUG_ON(ctx == NULL); |
406 | lgp = kzalloc(sizeof(*lgp), GFP_KERNEL); | 455 | lgp = kzalloc(sizeof(*lgp), GFP_KERNEL); |
407 | if (lgp == NULL) { | 456 | if (lgp == NULL) |
408 | put_layout_hdr(lo->inode); | ||
409 | return NULL; | 457 | return NULL; |
410 | } | ||
411 | lgp->args.minlength = NFS4_MAX_UINT64; | 458 | lgp->args.minlength = NFS4_MAX_UINT64; |
412 | lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; | 459 | lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; |
413 | lgp->args.range.iomode = iomode; | 460 | lgp->args.range.iomode = iomode; |
@@ -424,11 +471,88 @@ send_layoutget(struct pnfs_layout_hdr *lo, | |||
424 | nfs4_proc_layoutget(lgp); | 471 | nfs4_proc_layoutget(lgp); |
425 | if (!lseg) { | 472 | if (!lseg) { |
426 | /* remember that LAYOUTGET failed and suspend trying */ | 473 | /* remember that LAYOUTGET failed and suspend trying */ |
427 | set_bit(lo_fail_bit(iomode), &lo->state); | 474 | set_bit(lo_fail_bit(iomode), &lo->plh_flags); |
428 | } | 475 | } |
429 | return lseg; | 476 | return lseg; |
430 | } | 477 | } |
431 | 478 | ||
479 | bool pnfs_roc(struct inode *ino) | ||
480 | { | ||
481 | struct pnfs_layout_hdr *lo; | ||
482 | struct pnfs_layout_segment *lseg, *tmp; | ||
483 | LIST_HEAD(tmp_list); | ||
484 | bool found = false; | ||
485 | |||
486 | spin_lock(&ino->i_lock); | ||
487 | lo = NFS_I(ino)->layout; | ||
488 | if (!lo || !test_and_clear_bit(NFS_LAYOUT_ROC, &lo->plh_flags) || | ||
489 | test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) | ||
490 | goto out_nolayout; | ||
491 | list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) | ||
492 | if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { | ||
493 | mark_lseg_invalid(lseg, &tmp_list); | ||
494 | found = true; | ||
495 | } | ||
496 | if (!found) | ||
497 | goto out_nolayout; | ||
498 | lo->plh_block_lgets++; | ||
499 | get_layout_hdr(lo); /* matched in pnfs_roc_release */ | ||
500 | spin_unlock(&ino->i_lock); | ||
501 | pnfs_free_lseg_list(&tmp_list); | ||
502 | return true; | ||
503 | |||
504 | out_nolayout: | ||
505 | spin_unlock(&ino->i_lock); | ||
506 | return false; | ||
507 | } | ||
508 | |||
509 | void pnfs_roc_release(struct inode *ino) | ||
510 | { | ||
511 | struct pnfs_layout_hdr *lo; | ||
512 | |||
513 | spin_lock(&ino->i_lock); | ||
514 | lo = NFS_I(ino)->layout; | ||
515 | lo->plh_block_lgets--; | ||
516 | put_layout_hdr_locked(lo); | ||
517 | spin_unlock(&ino->i_lock); | ||
518 | } | ||
519 | |||
520 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier) | ||
521 | { | ||
522 | struct pnfs_layout_hdr *lo; | ||
523 | |||
524 | spin_lock(&ino->i_lock); | ||
525 | lo = NFS_I(ino)->layout; | ||
526 | if ((int)(barrier - lo->plh_barrier) > 0) | ||
527 | lo->plh_barrier = barrier; | ||
528 | spin_unlock(&ino->i_lock); | ||
529 | } | ||
530 | |||
531 | bool pnfs_roc_drain(struct inode *ino, u32 *barrier) | ||
532 | { | ||
533 | struct nfs_inode *nfsi = NFS_I(ino); | ||
534 | struct pnfs_layout_segment *lseg; | ||
535 | bool found = false; | ||
536 | |||
537 | spin_lock(&ino->i_lock); | ||
538 | list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list) | ||
539 | if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { | ||
540 | found = true; | ||
541 | break; | ||
542 | } | ||
543 | if (!found) { | ||
544 | struct pnfs_layout_hdr *lo = nfsi->layout; | ||
545 | u32 current_seqid = be32_to_cpu(lo->plh_stateid.stateid.seqid); | ||
546 | |||
547 | /* Since close does not return a layout stateid for use as | ||
548 | * a barrier, we choose the worst-case barrier. | ||
549 | */ | ||
550 | *barrier = current_seqid + atomic_read(&lo->plh_outstanding); | ||
551 | } | ||
552 | spin_unlock(&ino->i_lock); | ||
553 | return found; | ||
554 | } | ||
555 | |||
432 | /* | 556 | /* |
433 | * Compare two layout segments for sorting into layout cache. | 557 | * Compare two layout segments for sorting into layout cache. |
434 | * We want to preferentially return RW over RO layouts, so ensure those | 558 | * We want to preferentially return RW over RO layouts, so ensure those |
@@ -450,37 +574,29 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo, | |||
450 | 574 | ||
451 | dprintk("%s:Begin\n", __func__); | 575 | dprintk("%s:Begin\n", __func__); |
452 | 576 | ||
453 | assert_spin_locked(&lo->inode->i_lock); | 577 | assert_spin_locked(&lo->plh_inode->i_lock); |
454 | if (list_empty(&lo->segs)) { | 578 | list_for_each_entry(lp, &lo->plh_segs, pls_list) { |
455 | struct nfs_client *clp = NFS_SERVER(lo->inode)->nfs_client; | 579 | if (cmp_layout(lp->pls_range.iomode, lseg->pls_range.iomode) > 0) |
456 | |||
457 | spin_lock(&clp->cl_lock); | ||
458 | BUG_ON(!list_empty(&lo->layouts)); | ||
459 | list_add_tail(&lo->layouts, &clp->cl_layouts); | ||
460 | spin_unlock(&clp->cl_lock); | ||
461 | } | ||
462 | list_for_each_entry(lp, &lo->segs, fi_list) { | ||
463 | if (cmp_layout(lp->range.iomode, lseg->range.iomode) > 0) | ||
464 | continue; | 580 | continue; |
465 | list_add_tail(&lseg->fi_list, &lp->fi_list); | 581 | list_add_tail(&lseg->pls_list, &lp->pls_list); |
466 | dprintk("%s: inserted lseg %p " | 582 | dprintk("%s: inserted lseg %p " |
467 | "iomode %d offset %llu length %llu before " | 583 | "iomode %d offset %llu length %llu before " |
468 | "lp %p iomode %d offset %llu length %llu\n", | 584 | "lp %p iomode %d offset %llu length %llu\n", |
469 | __func__, lseg, lseg->range.iomode, | 585 | __func__, lseg, lseg->pls_range.iomode, |
470 | lseg->range.offset, lseg->range.length, | 586 | lseg->pls_range.offset, lseg->pls_range.length, |
471 | lp, lp->range.iomode, lp->range.offset, | 587 | lp, lp->pls_range.iomode, lp->pls_range.offset, |
472 | lp->range.length); | 588 | lp->pls_range.length); |
473 | found = 1; | 589 | found = 1; |
474 | break; | 590 | break; |
475 | } | 591 | } |
476 | if (!found) { | 592 | if (!found) { |
477 | list_add_tail(&lseg->fi_list, &lo->segs); | 593 | list_add_tail(&lseg->pls_list, &lo->plh_segs); |
478 | dprintk("%s: inserted lseg %p " | 594 | dprintk("%s: inserted lseg %p " |
479 | "iomode %d offset %llu length %llu at tail\n", | 595 | "iomode %d offset %llu length %llu at tail\n", |
480 | __func__, lseg, lseg->range.iomode, | 596 | __func__, lseg, lseg->pls_range.iomode, |
481 | lseg->range.offset, lseg->range.length); | 597 | lseg->pls_range.offset, lseg->pls_range.length); |
482 | } | 598 | } |
483 | get_layout_hdr_locked(lo); | 599 | get_layout_hdr(lo); |
484 | 600 | ||
485 | dprintk("%s:Return\n", __func__); | 601 | dprintk("%s:Return\n", __func__); |
486 | } | 602 | } |
@@ -493,11 +609,11 @@ alloc_init_layout_hdr(struct inode *ino) | |||
493 | lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL); | 609 | lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL); |
494 | if (!lo) | 610 | if (!lo) |
495 | return NULL; | 611 | return NULL; |
496 | lo->refcount = 1; | 612 | atomic_set(&lo->plh_refcount, 1); |
497 | INIT_LIST_HEAD(&lo->layouts); | 613 | INIT_LIST_HEAD(&lo->plh_layouts); |
498 | INIT_LIST_HEAD(&lo->segs); | 614 | INIT_LIST_HEAD(&lo->plh_segs); |
499 | seqlock_init(&lo->seqlock); | 615 | INIT_LIST_HEAD(&lo->plh_bulk_recall); |
500 | lo->inode = ino; | 616 | lo->plh_inode = ino; |
501 | return lo; | 617 | return lo; |
502 | } | 618 | } |
503 | 619 | ||
@@ -510,9 +626,12 @@ pnfs_find_alloc_layout(struct inode *ino) | |||
510 | dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout); | 626 | dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout); |
511 | 627 | ||
512 | assert_spin_locked(&ino->i_lock); | 628 | assert_spin_locked(&ino->i_lock); |
513 | if (nfsi->layout) | 629 | if (nfsi->layout) { |
514 | return nfsi->layout; | 630 | if (test_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags)) |
515 | 631 | return NULL; | |
632 | else | ||
633 | return nfsi->layout; | ||
634 | } | ||
516 | spin_unlock(&ino->i_lock); | 635 | spin_unlock(&ino->i_lock); |
517 | new = alloc_init_layout_hdr(ino); | 636 | new = alloc_init_layout_hdr(ino); |
518 | spin_lock(&ino->i_lock); | 637 | spin_lock(&ino->i_lock); |
@@ -538,31 +657,32 @@ pnfs_find_alloc_layout(struct inode *ino) | |||
538 | static int | 657 | static int |
539 | is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode) | 658 | is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode) |
540 | { | 659 | { |
541 | return (iomode != IOMODE_RW || lseg->range.iomode == IOMODE_RW); | 660 | return (iomode != IOMODE_RW || lseg->pls_range.iomode == IOMODE_RW); |
542 | } | 661 | } |
543 | 662 | ||
544 | /* | 663 | /* |
545 | * lookup range in layout | 664 | * lookup range in layout |
546 | */ | 665 | */ |
547 | static struct pnfs_layout_segment * | 666 | static struct pnfs_layout_segment * |
548 | pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode) | 667 | pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode) |
549 | { | 668 | { |
550 | struct pnfs_layout_segment *lseg, *ret = NULL; | 669 | struct pnfs_layout_segment *lseg, *ret = NULL; |
551 | 670 | ||
552 | dprintk("%s:Begin\n", __func__); | 671 | dprintk("%s:Begin\n", __func__); |
553 | 672 | ||
554 | assert_spin_locked(&lo->inode->i_lock); | 673 | assert_spin_locked(&lo->plh_inode->i_lock); |
555 | list_for_each_entry(lseg, &lo->segs, fi_list) { | 674 | list_for_each_entry(lseg, &lo->plh_segs, pls_list) { |
556 | if (is_matching_lseg(lseg, iomode)) { | 675 | if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && |
676 | is_matching_lseg(lseg, iomode)) { | ||
557 | ret = lseg; | 677 | ret = lseg; |
558 | break; | 678 | break; |
559 | } | 679 | } |
560 | if (cmp_layout(iomode, lseg->range.iomode) > 0) | 680 | if (cmp_layout(iomode, lseg->pls_range.iomode) > 0) |
561 | break; | 681 | break; |
562 | } | 682 | } |
563 | 683 | ||
564 | dprintk("%s:Return lseg %p ref %d\n", | 684 | dprintk("%s:Return lseg %p ref %d\n", |
565 | __func__, ret, ret ? atomic_read(&ret->kref.refcount) : 0); | 685 | __func__, ret, ret ? atomic_read(&ret->pls_refcount) : 0); |
566 | return ret; | 686 | return ret; |
567 | } | 687 | } |
568 | 688 | ||
@@ -576,6 +696,7 @@ pnfs_update_layout(struct inode *ino, | |||
576 | enum pnfs_iomode iomode) | 696 | enum pnfs_iomode iomode) |
577 | { | 697 | { |
578 | struct nfs_inode *nfsi = NFS_I(ino); | 698 | struct nfs_inode *nfsi = NFS_I(ino); |
699 | struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; | ||
579 | struct pnfs_layout_hdr *lo; | 700 | struct pnfs_layout_hdr *lo; |
580 | struct pnfs_layout_segment *lseg = NULL; | 701 | struct pnfs_layout_segment *lseg = NULL; |
581 | 702 | ||
@@ -588,25 +709,53 @@ pnfs_update_layout(struct inode *ino, | |||
588 | goto out_unlock; | 709 | goto out_unlock; |
589 | } | 710 | } |
590 | 711 | ||
591 | /* Check to see if the layout for the given range already exists */ | 712 | /* Do we even need to bother with this? */ |
592 | lseg = pnfs_has_layout(lo, iomode); | 713 | if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) || |
593 | if (lseg) { | 714 | test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { |
594 | dprintk("%s: Using cached lseg %p for iomode %d)\n", | 715 | dprintk("%s matches recall, use MDS\n", __func__); |
595 | __func__, lseg, iomode); | ||
596 | goto out_unlock; | 716 | goto out_unlock; |
597 | } | 717 | } |
718 | /* Check to see if the layout for the given range already exists */ | ||
719 | lseg = pnfs_find_lseg(lo, iomode); | ||
720 | if (lseg) | ||
721 | goto out_unlock; | ||
598 | 722 | ||
599 | /* if LAYOUTGET already failed once we don't try again */ | 723 | /* if LAYOUTGET already failed once we don't try again */ |
600 | if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state)) | 724 | if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags)) |
725 | goto out_unlock; | ||
726 | |||
727 | if (pnfs_layoutgets_blocked(lo, NULL, 0)) | ||
601 | goto out_unlock; | 728 | goto out_unlock; |
729 | atomic_inc(&lo->plh_outstanding); | ||
602 | 730 | ||
603 | get_layout_hdr_locked(lo); /* Matched in nfs4_layoutget_release */ | 731 | get_layout_hdr(lo); |
732 | if (list_empty(&lo->plh_segs)) { | ||
733 | /* The lo must be on the clp list if there is any | ||
734 | * chance of a CB_LAYOUTRECALL(FILE) coming in. | ||
735 | */ | ||
736 | spin_lock(&clp->cl_lock); | ||
737 | BUG_ON(!list_empty(&lo->plh_layouts)); | ||
738 | list_add_tail(&lo->plh_layouts, &clp->cl_layouts); | ||
739 | spin_unlock(&clp->cl_lock); | ||
740 | } | ||
604 | spin_unlock(&ino->i_lock); | 741 | spin_unlock(&ino->i_lock); |
605 | 742 | ||
606 | lseg = send_layoutget(lo, ctx, iomode); | 743 | lseg = send_layoutget(lo, ctx, iomode); |
744 | if (!lseg) { | ||
745 | spin_lock(&ino->i_lock); | ||
746 | if (list_empty(&lo->plh_segs)) { | ||
747 | spin_lock(&clp->cl_lock); | ||
748 | list_del_init(&lo->plh_layouts); | ||
749 | spin_unlock(&clp->cl_lock); | ||
750 | clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); | ||
751 | } | ||
752 | spin_unlock(&ino->i_lock); | ||
753 | } | ||
754 | atomic_dec(&lo->plh_outstanding); | ||
755 | put_layout_hdr(lo); | ||
607 | out: | 756 | out: |
608 | dprintk("%s end, state 0x%lx lseg %p\n", __func__, | 757 | dprintk("%s end, state 0x%lx lseg %p\n", __func__, |
609 | nfsi->layout->state, lseg); | 758 | nfsi->layout->plh_flags, lseg); |
610 | return lseg; | 759 | return lseg; |
611 | out_unlock: | 760 | out_unlock: |
612 | spin_unlock(&ino->i_lock); | 761 | spin_unlock(&ino->i_lock); |
@@ -619,9 +768,21 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) | |||
619 | struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout; | 768 | struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout; |
620 | struct nfs4_layoutget_res *res = &lgp->res; | 769 | struct nfs4_layoutget_res *res = &lgp->res; |
621 | struct pnfs_layout_segment *lseg; | 770 | struct pnfs_layout_segment *lseg; |
622 | struct inode *ino = lo->inode; | 771 | struct inode *ino = lo->plh_inode; |
772 | struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; | ||
623 | int status = 0; | 773 | int status = 0; |
624 | 774 | ||
775 | /* Verify we got what we asked for. | ||
776 | * Note that because the xdr parsing only accepts a single | ||
777 | * element array, this can fail even if the server is behaving | ||
778 | * correctly. | ||
779 | */ | ||
780 | if (lgp->args.range.iomode > res->range.iomode || | ||
781 | res->range.offset != 0 || | ||
782 | res->range.length != NFS4_MAX_UINT64) { | ||
783 | status = -EINVAL; | ||
784 | goto out; | ||
785 | } | ||
625 | /* Inject layout blob into I/O device driver */ | 786 | /* Inject layout blob into I/O device driver */ |
626 | lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res); | 787 | lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res); |
627 | if (!lseg || IS_ERR(lseg)) { | 788 | if (!lseg || IS_ERR(lseg)) { |
@@ -635,16 +796,37 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) | |||
635 | } | 796 | } |
636 | 797 | ||
637 | spin_lock(&ino->i_lock); | 798 | spin_lock(&ino->i_lock); |
799 | if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) || | ||
800 | test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { | ||
801 | dprintk("%s forget reply due to recall\n", __func__); | ||
802 | goto out_forget_reply; | ||
803 | } | ||
804 | |||
805 | if (pnfs_layoutgets_blocked(lo, &res->stateid, 1)) { | ||
806 | dprintk("%s forget reply due to state\n", __func__); | ||
807 | goto out_forget_reply; | ||
808 | } | ||
638 | init_lseg(lo, lseg); | 809 | init_lseg(lo, lseg); |
639 | lseg->range = res->range; | 810 | lseg->pls_range = res->range; |
640 | *lgp->lsegpp = lseg; | 811 | *lgp->lsegpp = lseg; |
641 | pnfs_insert_layout(lo, lseg); | 812 | pnfs_insert_layout(lo, lseg); |
642 | 813 | ||
814 | if (res->return_on_close) { | ||
815 | set_bit(NFS_LSEG_ROC, &lseg->pls_flags); | ||
816 | set_bit(NFS_LAYOUT_ROC, &lo->plh_flags); | ||
817 | } | ||
818 | |||
643 | /* Done processing layoutget. Set the layout stateid */ | 819 | /* Done processing layoutget. Set the layout stateid */ |
644 | pnfs_set_layout_stateid(lo, &res->stateid); | 820 | pnfs_set_layout_stateid(lo, &res->stateid, false); |
645 | spin_unlock(&ino->i_lock); | 821 | spin_unlock(&ino->i_lock); |
646 | out: | 822 | out: |
647 | return status; | 823 | return status; |
824 | |||
825 | out_forget_reply: | ||
826 | spin_unlock(&ino->i_lock); | ||
827 | lseg->pls_layout = lo; | ||
828 | NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); | ||
829 | goto out; | ||
648 | } | 830 | } |
649 | 831 | ||
650 | /* | 832 | /* |
@@ -769,7 +951,7 @@ pnfs_put_deviceid_cache(struct nfs_client *clp) | |||
769 | { | 951 | { |
770 | struct pnfs_deviceid_cache *local = clp->cl_devid_cache; | 952 | struct pnfs_deviceid_cache *local = clp->cl_devid_cache; |
771 | 953 | ||
772 | dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache); | 954 | dprintk("--> %s ({%d})\n", __func__, atomic_read(&local->dc_ref)); |
773 | if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) { | 955 | if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) { |
774 | int i; | 956 | int i; |
775 | /* Verify cache is empty */ | 957 | /* Verify cache is empty */ |