diff options
Diffstat (limited to 'fs/xfs/xfs_inode.c')
-rw-r--r-- | fs/xfs/xfs_inode.c | 3749 |
1 files changed, 1478 insertions, 2271 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index bb262c25c8de..e3d75385aa76 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -19,18 +19,23 @@ | |||
19 | 19 | ||
20 | #include "xfs.h" | 20 | #include "xfs.h" |
21 | #include "xfs_fs.h" | 21 | #include "xfs_fs.h" |
22 | #include "xfs_types.h" | 22 | #include "xfs_format.h" |
23 | #include "xfs_log.h" | 23 | #include "xfs_log.h" |
24 | #include "xfs_inum.h" | 24 | #include "xfs_inum.h" |
25 | #include "xfs_trans.h" | 25 | #include "xfs_trans.h" |
26 | #include "xfs_trans_space.h" | ||
26 | #include "xfs_trans_priv.h" | 27 | #include "xfs_trans_priv.h" |
27 | #include "xfs_sb.h" | 28 | #include "xfs_sb.h" |
28 | #include "xfs_ag.h" | 29 | #include "xfs_ag.h" |
29 | #include "xfs_mount.h" | 30 | #include "xfs_mount.h" |
31 | #include "xfs_da_btree.h" | ||
32 | #include "xfs_dir2_format.h" | ||
33 | #include "xfs_dir2.h" | ||
30 | #include "xfs_bmap_btree.h" | 34 | #include "xfs_bmap_btree.h" |
31 | #include "xfs_alloc_btree.h" | 35 | #include "xfs_alloc_btree.h" |
32 | #include "xfs_ialloc_btree.h" | 36 | #include "xfs_ialloc_btree.h" |
33 | #include "xfs_attr_sf.h" | 37 | #include "xfs_attr_sf.h" |
38 | #include "xfs_attr.h" | ||
34 | #include "xfs_dinode.h" | 39 | #include "xfs_dinode.h" |
35 | #include "xfs_inode.h" | 40 | #include "xfs_inode.h" |
36 | #include "xfs_buf_item.h" | 41 | #include "xfs_buf_item.h" |
@@ -39,16 +44,15 @@ | |||
39 | #include "xfs_alloc.h" | 44 | #include "xfs_alloc.h" |
40 | #include "xfs_ialloc.h" | 45 | #include "xfs_ialloc.h" |
41 | #include "xfs_bmap.h" | 46 | #include "xfs_bmap.h" |
47 | #include "xfs_bmap_util.h" | ||
42 | #include "xfs_error.h" | 48 | #include "xfs_error.h" |
43 | #include "xfs_utils.h" | ||
44 | #include "xfs_quota.h" | 49 | #include "xfs_quota.h" |
45 | #include "xfs_filestream.h" | 50 | #include "xfs_filestream.h" |
46 | #include "xfs_vnodeops.h" | ||
47 | #include "xfs_cksum.h" | 51 | #include "xfs_cksum.h" |
48 | #include "xfs_trace.h" | 52 | #include "xfs_trace.h" |
49 | #include "xfs_icache.h" | 53 | #include "xfs_icache.h" |
54 | #include "xfs_symlink.h" | ||
50 | 55 | ||
51 | kmem_zone_t *xfs_ifork_zone; | ||
52 | kmem_zone_t *xfs_inode_zone; | 56 | kmem_zone_t *xfs_inode_zone; |
53 | 57 | ||
54 | /* | 58 | /* |
@@ -58,9 +62,6 @@ kmem_zone_t *xfs_inode_zone; | |||
58 | #define XFS_ITRUNC_MAX_EXTENTS 2 | 62 | #define XFS_ITRUNC_MAX_EXTENTS 2 |
59 | 63 | ||
60 | STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *); | 64 | STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *); |
61 | STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int); | ||
62 | STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int); | ||
63 | STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int); | ||
64 | 65 | ||
65 | /* | 66 | /* |
66 | * helper function to extract extent size hint from inode | 67 | * helper function to extract extent size hint from inode |
@@ -310,623 +311,202 @@ xfs_isilocked( | |||
310 | } | 311 | } |
311 | #endif | 312 | #endif |
312 | 313 | ||
313 | void | ||
314 | __xfs_iflock( | ||
315 | struct xfs_inode *ip) | ||
316 | { | ||
317 | wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT); | ||
318 | DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT); | ||
319 | |||
320 | do { | ||
321 | prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE); | ||
322 | if (xfs_isiflocked(ip)) | ||
323 | io_schedule(); | ||
324 | } while (!xfs_iflock_nowait(ip)); | ||
325 | |||
326 | finish_wait(wq, &wait.wait); | ||
327 | } | ||
328 | |||
329 | #ifdef DEBUG | 314 | #ifdef DEBUG |
315 | int xfs_locked_n; | ||
316 | int xfs_small_retries; | ||
317 | int xfs_middle_retries; | ||
318 | int xfs_lots_retries; | ||
319 | int xfs_lock_delays; | ||
320 | #endif | ||
321 | |||
330 | /* | 322 | /* |
331 | * Make sure that the extents in the given memory buffer | 323 | * Bump the subclass so xfs_lock_inodes() acquires each lock with |
332 | * are valid. | 324 | * a different value |
333 | */ | 325 | */ |
334 | STATIC void | 326 | static inline int |
335 | xfs_validate_extents( | 327 | xfs_lock_inumorder(int lock_mode, int subclass) |
336 | xfs_ifork_t *ifp, | ||
337 | int nrecs, | ||
338 | xfs_exntfmt_t fmt) | ||
339 | { | 328 | { |
340 | xfs_bmbt_irec_t irec; | 329 | if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) |
341 | xfs_bmbt_rec_host_t rec; | 330 | lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT; |
342 | int i; | 331 | if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) |
332 | lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT; | ||
343 | 333 | ||
344 | for (i = 0; i < nrecs; i++) { | 334 | return lock_mode; |
345 | xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); | ||
346 | rec.l0 = get_unaligned(&ep->l0); | ||
347 | rec.l1 = get_unaligned(&ep->l1); | ||
348 | xfs_bmbt_get_all(&rec, &irec); | ||
349 | if (fmt == XFS_EXTFMT_NOSTATE) | ||
350 | ASSERT(irec.br_state == XFS_EXT_NORM); | ||
351 | } | ||
352 | } | 335 | } |
353 | #else /* DEBUG */ | ||
354 | #define xfs_validate_extents(ifp, nrecs, fmt) | ||
355 | #endif /* DEBUG */ | ||
356 | 336 | ||
357 | /* | 337 | /* |
358 | * Check that none of the inode's in the buffer have a next | 338 | * The following routine will lock n inodes in exclusive mode. |
359 | * unlinked field of 0. | 339 | * We assume the caller calls us with the inodes in i_ino order. |
340 | * | ||
341 | * We need to detect deadlock where an inode that we lock | ||
342 | * is in the AIL and we start waiting for another inode that is locked | ||
343 | * by a thread in a long running transaction (such as truncate). This can | ||
344 | * result in deadlock since the long running trans might need to wait | ||
345 | * for the inode we just locked in order to push the tail and free space | ||
346 | * in the log. | ||
360 | */ | 347 | */ |
361 | #if defined(DEBUG) | ||
362 | void | 348 | void |
363 | xfs_inobp_check( | 349 | xfs_lock_inodes( |
364 | xfs_mount_t *mp, | 350 | xfs_inode_t **ips, |
365 | xfs_buf_t *bp) | 351 | int inodes, |
352 | uint lock_mode) | ||
366 | { | 353 | { |
367 | int i; | 354 | int attempts = 0, i, j, try_lock; |
368 | int j; | 355 | xfs_log_item_t *lp; |
369 | xfs_dinode_t *dip; | ||
370 | 356 | ||
371 | j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; | 357 | ASSERT(ips && (inodes >= 2)); /* we need at least two */ |
372 | 358 | ||
373 | for (i = 0; i < j; i++) { | 359 | try_lock = 0; |
374 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, | 360 | i = 0; |
375 | i * mp->m_sb.sb_inodesize); | ||
376 | if (!dip->di_next_unlinked) { | ||
377 | xfs_alert(mp, | ||
378 | "Detected bogus zero next_unlinked field in incore inode buffer 0x%p.", | ||
379 | bp); | ||
380 | ASSERT(dip->di_next_unlinked); | ||
381 | } | ||
382 | } | ||
383 | } | ||
384 | #endif | ||
385 | 361 | ||
386 | static void | 362 | again: |
387 | xfs_inode_buf_verify( | 363 | for (; i < inodes; i++) { |
388 | struct xfs_buf *bp) | 364 | ASSERT(ips[i]); |
389 | { | ||
390 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
391 | int i; | ||
392 | int ni; | ||
393 | |||
394 | /* | ||
395 | * Validate the magic number and version of every inode in the buffer | ||
396 | */ | ||
397 | ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock; | ||
398 | for (i = 0; i < ni; i++) { | ||
399 | int di_ok; | ||
400 | xfs_dinode_t *dip; | ||
401 | |||
402 | dip = (struct xfs_dinode *)xfs_buf_offset(bp, | ||
403 | (i << mp->m_sb.sb_inodelog)); | ||
404 | di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) && | ||
405 | XFS_DINODE_GOOD_VERSION(dip->di_version); | ||
406 | if (unlikely(XFS_TEST_ERROR(!di_ok, mp, | ||
407 | XFS_ERRTAG_ITOBP_INOTOBP, | ||
408 | XFS_RANDOM_ITOBP_INOTOBP))) { | ||
409 | xfs_buf_ioerror(bp, EFSCORRUPTED); | ||
410 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH, | ||
411 | mp, dip); | ||
412 | #ifdef DEBUG | ||
413 | xfs_emerg(mp, | ||
414 | "bad inode magic/vsn daddr %lld #%d (magic=%x)", | ||
415 | (unsigned long long)bp->b_bn, i, | ||
416 | be16_to_cpu(dip->di_magic)); | ||
417 | ASSERT(0); | ||
418 | #endif | ||
419 | } | ||
420 | } | ||
421 | xfs_inobp_check(mp, bp); | ||
422 | } | ||
423 | |||
424 | |||
425 | static void | ||
426 | xfs_inode_buf_read_verify( | ||
427 | struct xfs_buf *bp) | ||
428 | { | ||
429 | xfs_inode_buf_verify(bp); | ||
430 | } | ||
431 | |||
432 | static void | ||
433 | xfs_inode_buf_write_verify( | ||
434 | struct xfs_buf *bp) | ||
435 | { | ||
436 | xfs_inode_buf_verify(bp); | ||
437 | } | ||
438 | |||
439 | const struct xfs_buf_ops xfs_inode_buf_ops = { | ||
440 | .verify_read = xfs_inode_buf_read_verify, | ||
441 | .verify_write = xfs_inode_buf_write_verify, | ||
442 | }; | ||
443 | 365 | ||
366 | if (i && (ips[i] == ips[i-1])) /* Already locked */ | ||
367 | continue; | ||
444 | 368 | ||
445 | /* | 369 | /* |
446 | * This routine is called to map an inode to the buffer containing the on-disk | 370 | * If try_lock is not set yet, make sure all locked inodes |
447 | * version of the inode. It returns a pointer to the buffer containing the | 371 | * are not in the AIL. |
448 | * on-disk inode in the bpp parameter, and in the dipp parameter it returns a | 372 | * If any are, set try_lock to be used later. |
449 | * pointer to the on-disk inode within that buffer. | 373 | */ |
450 | * | ||
451 | * If a non-zero error is returned, then the contents of bpp and dipp are | ||
452 | * undefined. | ||
453 | */ | ||
454 | int | ||
455 | xfs_imap_to_bp( | ||
456 | struct xfs_mount *mp, | ||
457 | struct xfs_trans *tp, | ||
458 | struct xfs_imap *imap, | ||
459 | struct xfs_dinode **dipp, | ||
460 | struct xfs_buf **bpp, | ||
461 | uint buf_flags, | ||
462 | uint iget_flags) | ||
463 | { | ||
464 | struct xfs_buf *bp; | ||
465 | int error; | ||
466 | 374 | ||
467 | buf_flags |= XBF_UNMAPPED; | 375 | if (!try_lock) { |
468 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, | 376 | for (j = (i - 1); j >= 0 && !try_lock; j--) { |
469 | (int)imap->im_len, buf_flags, &bp, | 377 | lp = (xfs_log_item_t *)ips[j]->i_itemp; |
470 | &xfs_inode_buf_ops); | 378 | if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { |
471 | if (error) { | 379 | try_lock++; |
472 | if (error == EAGAIN) { | 380 | } |
473 | ASSERT(buf_flags & XBF_TRYLOCK); | 381 | } |
474 | return error; | ||
475 | } | 382 | } |
476 | 383 | ||
477 | if (error == EFSCORRUPTED && | 384 | /* |
478 | (iget_flags & XFS_IGET_UNTRUSTED)) | 385 | * If any of the previous locks we have locked is in the AIL, |
479 | return XFS_ERROR(EINVAL); | 386 | * we must TRY to get the second and subsequent locks. If |
480 | 387 | * we can't get any, we must release all we have | |
481 | xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.", | 388 | * and try again. |
482 | __func__, error); | 389 | */ |
483 | return error; | ||
484 | } | ||
485 | |||
486 | *bpp = bp; | ||
487 | *dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset); | ||
488 | return 0; | ||
489 | } | ||
490 | |||
491 | /* | ||
492 | * Move inode type and inode format specific information from the | ||
493 | * on-disk inode to the in-core inode. For fifos, devs, and sockets | ||
494 | * this means set if_rdev to the proper value. For files, directories, | ||
495 | * and symlinks this means to bring in the in-line data or extent | ||
496 | * pointers. For a file in B-tree format, only the root is immediately | ||
497 | * brought in-core. The rest will be in-lined in if_extents when it | ||
498 | * is first referenced (see xfs_iread_extents()). | ||
499 | */ | ||
500 | STATIC int | ||
501 | xfs_iformat( | ||
502 | xfs_inode_t *ip, | ||
503 | xfs_dinode_t *dip) | ||
504 | { | ||
505 | xfs_attr_shortform_t *atp; | ||
506 | int size; | ||
507 | int error = 0; | ||
508 | xfs_fsize_t di_size; | ||
509 | |||
510 | if (unlikely(be32_to_cpu(dip->di_nextents) + | ||
511 | be16_to_cpu(dip->di_anextents) > | ||
512 | be64_to_cpu(dip->di_nblocks))) { | ||
513 | xfs_warn(ip->i_mount, | ||
514 | "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.", | ||
515 | (unsigned long long)ip->i_ino, | ||
516 | (int)(be32_to_cpu(dip->di_nextents) + | ||
517 | be16_to_cpu(dip->di_anextents)), | ||
518 | (unsigned long long) | ||
519 | be64_to_cpu(dip->di_nblocks)); | ||
520 | XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW, | ||
521 | ip->i_mount, dip); | ||
522 | return XFS_ERROR(EFSCORRUPTED); | ||
523 | } | ||
524 | |||
525 | if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { | ||
526 | xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.", | ||
527 | (unsigned long long)ip->i_ino, | ||
528 | dip->di_forkoff); | ||
529 | XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, | ||
530 | ip->i_mount, dip); | ||
531 | return XFS_ERROR(EFSCORRUPTED); | ||
532 | } | ||
533 | |||
534 | if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && | ||
535 | !ip->i_mount->m_rtdev_targp)) { | ||
536 | xfs_warn(ip->i_mount, | ||
537 | "corrupt dinode %Lu, has realtime flag set.", | ||
538 | ip->i_ino); | ||
539 | XFS_CORRUPTION_ERROR("xfs_iformat(realtime)", | ||
540 | XFS_ERRLEVEL_LOW, ip->i_mount, dip); | ||
541 | return XFS_ERROR(EFSCORRUPTED); | ||
542 | } | ||
543 | |||
544 | switch (ip->i_d.di_mode & S_IFMT) { | ||
545 | case S_IFIFO: | ||
546 | case S_IFCHR: | ||
547 | case S_IFBLK: | ||
548 | case S_IFSOCK: | ||
549 | if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) { | ||
550 | XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW, | ||
551 | ip->i_mount, dip); | ||
552 | return XFS_ERROR(EFSCORRUPTED); | ||
553 | } | ||
554 | ip->i_d.di_size = 0; | ||
555 | ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip); | ||
556 | break; | ||
557 | 390 | ||
558 | case S_IFREG: | 391 | if (try_lock) { |
559 | case S_IFLNK: | 392 | /* try_lock must be 0 if i is 0. */ |
560 | case S_IFDIR: | ||
561 | switch (dip->di_format) { | ||
562 | case XFS_DINODE_FMT_LOCAL: | ||
563 | /* | 393 | /* |
564 | * no local regular files yet | 394 | * try_lock means we have an inode locked |
395 | * that is in the AIL. | ||
565 | */ | 396 | */ |
566 | if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) { | 397 | ASSERT(i != 0); |
567 | xfs_warn(ip->i_mount, | 398 | if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) { |
568 | "corrupt inode %Lu (local format for regular file).", | 399 | attempts++; |
569 | (unsigned long long) ip->i_ino); | 400 | |
570 | XFS_CORRUPTION_ERROR("xfs_iformat(4)", | 401 | /* |
571 | XFS_ERRLEVEL_LOW, | 402 | * Unlock all previous guys and try again. |
572 | ip->i_mount, dip); | 403 | * xfs_iunlock will try to push the tail |
573 | return XFS_ERROR(EFSCORRUPTED); | 404 | * if the inode is in the AIL. |
574 | } | 405 | */ |
406 | |||
407 | for(j = i - 1; j >= 0; j--) { | ||
408 | |||
409 | /* | ||
410 | * Check to see if we've already | ||
411 | * unlocked this one. | ||
412 | * Not the first one going back, | ||
413 | * and the inode ptr is the same. | ||
414 | */ | ||
415 | if ((j != (i - 1)) && ips[j] == | ||
416 | ips[j+1]) | ||
417 | continue; | ||
418 | |||
419 | xfs_iunlock(ips[j], lock_mode); | ||
420 | } | ||
575 | 421 | ||
576 | di_size = be64_to_cpu(dip->di_size); | 422 | if ((attempts % 5) == 0) { |
577 | if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { | 423 | delay(1); /* Don't just spin the CPU */ |
578 | xfs_warn(ip->i_mount, | 424 | #ifdef DEBUG |
579 | "corrupt inode %Lu (bad size %Ld for local inode).", | 425 | xfs_lock_delays++; |
580 | (unsigned long long) ip->i_ino, | 426 | #endif |
581 | (long long) di_size); | 427 | } |
582 | XFS_CORRUPTION_ERROR("xfs_iformat(5)", | 428 | i = 0; |
583 | XFS_ERRLEVEL_LOW, | 429 | try_lock = 0; |
584 | ip->i_mount, dip); | 430 | goto again; |
585 | return XFS_ERROR(EFSCORRUPTED); | ||
586 | } | 431 | } |
587 | 432 | } else { | |
588 | size = (int)di_size; | 433 | xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i)); |
589 | error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size); | ||
590 | break; | ||
591 | case XFS_DINODE_FMT_EXTENTS: | ||
592 | error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK); | ||
593 | break; | ||
594 | case XFS_DINODE_FMT_BTREE: | ||
595 | error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK); | ||
596 | break; | ||
597 | default: | ||
598 | XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW, | ||
599 | ip->i_mount); | ||
600 | return XFS_ERROR(EFSCORRUPTED); | ||
601 | } | 434 | } |
602 | break; | ||
603 | |||
604 | default: | ||
605 | XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount); | ||
606 | return XFS_ERROR(EFSCORRUPTED); | ||
607 | } | ||
608 | if (error) { | ||
609 | return error; | ||
610 | } | 435 | } |
611 | if (!XFS_DFORK_Q(dip)) | ||
612 | return 0; | ||
613 | |||
614 | ASSERT(ip->i_afp == NULL); | ||
615 | ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS); | ||
616 | |||
617 | switch (dip->di_aformat) { | ||
618 | case XFS_DINODE_FMT_LOCAL: | ||
619 | atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); | ||
620 | size = be16_to_cpu(atp->hdr.totsize); | ||
621 | |||
622 | if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) { | ||
623 | xfs_warn(ip->i_mount, | ||
624 | "corrupt inode %Lu (bad attr fork size %Ld).", | ||
625 | (unsigned long long) ip->i_ino, | ||
626 | (long long) size); | ||
627 | XFS_CORRUPTION_ERROR("xfs_iformat(8)", | ||
628 | XFS_ERRLEVEL_LOW, | ||
629 | ip->i_mount, dip); | ||
630 | return XFS_ERROR(EFSCORRUPTED); | ||
631 | } | ||
632 | 436 | ||
633 | error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size); | 437 | #ifdef DEBUG |
634 | break; | 438 | if (attempts) { |
635 | case XFS_DINODE_FMT_EXTENTS: | 439 | if (attempts < 5) xfs_small_retries++; |
636 | error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK); | 440 | else if (attempts < 100) xfs_middle_retries++; |
637 | break; | 441 | else xfs_lots_retries++; |
638 | case XFS_DINODE_FMT_BTREE: | 442 | } else { |
639 | error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK); | 443 | xfs_locked_n++; |
640 | break; | ||
641 | default: | ||
642 | error = XFS_ERROR(EFSCORRUPTED); | ||
643 | break; | ||
644 | } | ||
645 | if (error) { | ||
646 | kmem_zone_free(xfs_ifork_zone, ip->i_afp); | ||
647 | ip->i_afp = NULL; | ||
648 | xfs_idestroy_fork(ip, XFS_DATA_FORK); | ||
649 | } | 444 | } |
650 | return error; | 445 | #endif |
651 | } | 446 | } |
652 | 447 | ||
653 | /* | 448 | /* |
654 | * The file is in-lined in the on-disk inode. | 449 | * xfs_lock_two_inodes() can only be used to lock one type of lock |
655 | * If it fits into if_inline_data, then copy | 450 | * at a time - the iolock or the ilock, but not both at once. If |
656 | * it there, otherwise allocate a buffer for it | 451 | * we lock both at once, lockdep will report false positives saying |
657 | * and copy the data there. Either way, set | 452 | * we have violated locking orders. |
658 | * if_data to point at the data. | ||
659 | * If we allocate a buffer for the data, make | ||
660 | * sure that its size is a multiple of 4 and | ||
661 | * record the real size in i_real_bytes. | ||
662 | */ | 453 | */ |
663 | STATIC int | 454 | void |
664 | xfs_iformat_local( | 455 | xfs_lock_two_inodes( |
665 | xfs_inode_t *ip, | 456 | xfs_inode_t *ip0, |
666 | xfs_dinode_t *dip, | 457 | xfs_inode_t *ip1, |
667 | int whichfork, | 458 | uint lock_mode) |
668 | int size) | ||
669 | { | 459 | { |
670 | xfs_ifork_t *ifp; | 460 | xfs_inode_t *temp; |
671 | int real_size; | 461 | int attempts = 0; |
672 | 462 | xfs_log_item_t *lp; | |
673 | /* | ||
674 | * If the size is unreasonable, then something | ||
675 | * is wrong and we just bail out rather than crash in | ||
676 | * kmem_alloc() or memcpy() below. | ||
677 | */ | ||
678 | if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { | ||
679 | xfs_warn(ip->i_mount, | ||
680 | "corrupt inode %Lu (bad size %d for local fork, size = %d).", | ||
681 | (unsigned long long) ip->i_ino, size, | ||
682 | XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); | ||
683 | XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW, | ||
684 | ip->i_mount, dip); | ||
685 | return XFS_ERROR(EFSCORRUPTED); | ||
686 | } | ||
687 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
688 | real_size = 0; | ||
689 | if (size == 0) | ||
690 | ifp->if_u1.if_data = NULL; | ||
691 | else if (size <= sizeof(ifp->if_u2.if_inline_data)) | ||
692 | ifp->if_u1.if_data = ifp->if_u2.if_inline_data; | ||
693 | else { | ||
694 | real_size = roundup(size, 4); | ||
695 | ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS); | ||
696 | } | ||
697 | ifp->if_bytes = size; | ||
698 | ifp->if_real_bytes = real_size; | ||
699 | if (size) | ||
700 | memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size); | ||
701 | ifp->if_flags &= ~XFS_IFEXTENTS; | ||
702 | ifp->if_flags |= XFS_IFINLINE; | ||
703 | return 0; | ||
704 | } | ||
705 | 463 | ||
706 | /* | 464 | if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) |
707 | * The file consists of a set of extents all | 465 | ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0); |
708 | * of which fit into the on-disk inode. | 466 | ASSERT(ip0->i_ino != ip1->i_ino); |
709 | * If there are few enough extents to fit into | ||
710 | * the if_inline_ext, then copy them there. | ||
711 | * Otherwise allocate a buffer for them and copy | ||
712 | * them into it. Either way, set if_extents | ||
713 | * to point at the extents. | ||
714 | */ | ||
715 | STATIC int | ||
716 | xfs_iformat_extents( | ||
717 | xfs_inode_t *ip, | ||
718 | xfs_dinode_t *dip, | ||
719 | int whichfork) | ||
720 | { | ||
721 | xfs_bmbt_rec_t *dp; | ||
722 | xfs_ifork_t *ifp; | ||
723 | int nex; | ||
724 | int size; | ||
725 | int i; | ||
726 | |||
727 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
728 | nex = XFS_DFORK_NEXTENTS(dip, whichfork); | ||
729 | size = nex * (uint)sizeof(xfs_bmbt_rec_t); | ||
730 | |||
731 | /* | ||
732 | * If the number of extents is unreasonable, then something | ||
733 | * is wrong and we just bail out rather than crash in | ||
734 | * kmem_alloc() or memcpy() below. | ||
735 | */ | ||
736 | if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { | ||
737 | xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).", | ||
738 | (unsigned long long) ip->i_ino, nex); | ||
739 | XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, | ||
740 | ip->i_mount, dip); | ||
741 | return XFS_ERROR(EFSCORRUPTED); | ||
742 | } | ||
743 | |||
744 | ifp->if_real_bytes = 0; | ||
745 | if (nex == 0) | ||
746 | ifp->if_u1.if_extents = NULL; | ||
747 | else if (nex <= XFS_INLINE_EXTS) | ||
748 | ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; | ||
749 | else | ||
750 | xfs_iext_add(ifp, 0, nex); | ||
751 | |||
752 | ifp->if_bytes = size; | ||
753 | if (size) { | ||
754 | dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork); | ||
755 | xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip)); | ||
756 | for (i = 0; i < nex; i++, dp++) { | ||
757 | xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); | ||
758 | ep->l0 = get_unaligned_be64(&dp->l0); | ||
759 | ep->l1 = get_unaligned_be64(&dp->l1); | ||
760 | } | ||
761 | XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork); | ||
762 | if (whichfork != XFS_DATA_FORK || | ||
763 | XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE) | ||
764 | if (unlikely(xfs_check_nostate_extents( | ||
765 | ifp, 0, nex))) { | ||
766 | XFS_ERROR_REPORT("xfs_iformat_extents(2)", | ||
767 | XFS_ERRLEVEL_LOW, | ||
768 | ip->i_mount); | ||
769 | return XFS_ERROR(EFSCORRUPTED); | ||
770 | } | ||
771 | } | ||
772 | ifp->if_flags |= XFS_IFEXTENTS; | ||
773 | return 0; | ||
774 | } | ||
775 | 467 | ||
776 | /* | 468 | if (ip0->i_ino > ip1->i_ino) { |
777 | * The file has too many extents to fit into | 469 | temp = ip0; |
778 | * the inode, so they are in B-tree format. | 470 | ip0 = ip1; |
779 | * Allocate a buffer for the root of the B-tree | 471 | ip1 = temp; |
780 | * and copy the root into it. The i_extents | 472 | } |
781 | * field will remain NULL until all of the | ||
782 | * extents are read in (when they are needed). | ||
783 | */ | ||
784 | STATIC int | ||
785 | xfs_iformat_btree( | ||
786 | xfs_inode_t *ip, | ||
787 | xfs_dinode_t *dip, | ||
788 | int whichfork) | ||
789 | { | ||
790 | struct xfs_mount *mp = ip->i_mount; | ||
791 | xfs_bmdr_block_t *dfp; | ||
792 | xfs_ifork_t *ifp; | ||
793 | /* REFERENCED */ | ||
794 | int nrecs; | ||
795 | int size; | ||
796 | |||
797 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
798 | dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); | ||
799 | size = XFS_BMAP_BROOT_SPACE(mp, dfp); | ||
800 | nrecs = be16_to_cpu(dfp->bb_numrecs); | ||
801 | |||
802 | /* | ||
803 | * blow out if -- fork has less extents than can fit in | ||
804 | * fork (fork shouldn't be a btree format), root btree | ||
805 | * block has more records than can fit into the fork, | ||
806 | * or the number of extents is greater than the number of | ||
807 | * blocks. | ||
808 | */ | ||
809 | if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= | ||
810 | XFS_IFORK_MAXEXT(ip, whichfork) || | ||
811 | XFS_BMDR_SPACE_CALC(nrecs) > | ||
812 | XFS_DFORK_SIZE(dip, mp, whichfork) || | ||
813 | XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { | ||
814 | xfs_warn(mp, "corrupt inode %Lu (btree).", | ||
815 | (unsigned long long) ip->i_ino); | ||
816 | XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW, | ||
817 | mp, dip); | ||
818 | return XFS_ERROR(EFSCORRUPTED); | ||
819 | } | ||
820 | |||
821 | ifp->if_broot_bytes = size; | ||
822 | ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS); | ||
823 | ASSERT(ifp->if_broot != NULL); | ||
824 | /* | ||
825 | * Copy and convert from the on-disk structure | ||
826 | * to the in-memory structure. | ||
827 | */ | ||
828 | xfs_bmdr_to_bmbt(ip, dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork), | ||
829 | ifp->if_broot, size); | ||
830 | ifp->if_flags &= ~XFS_IFEXTENTS; | ||
831 | ifp->if_flags |= XFS_IFBROOT; | ||
832 | 473 | ||
833 | return 0; | 474 | again: |
834 | } | 475 | xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0)); |
835 | 476 | ||
836 | STATIC void | 477 | /* |
837 | xfs_dinode_from_disk( | 478 | * If the first lock we have locked is in the AIL, we must TRY to get |
838 | xfs_icdinode_t *to, | 479 | * the second lock. If we can't get it, we must release the first one |
839 | xfs_dinode_t *from) | 480 | * and try again. |
840 | { | 481 | */ |
841 | to->di_magic = be16_to_cpu(from->di_magic); | 482 | lp = (xfs_log_item_t *)ip0->i_itemp; |
842 | to->di_mode = be16_to_cpu(from->di_mode); | 483 | if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { |
843 | to->di_version = from ->di_version; | 484 | if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) { |
844 | to->di_format = from->di_format; | 485 | xfs_iunlock(ip0, lock_mode); |
845 | to->di_onlink = be16_to_cpu(from->di_onlink); | 486 | if ((++attempts % 5) == 0) |
846 | to->di_uid = be32_to_cpu(from->di_uid); | 487 | delay(1); /* Don't just spin the CPU */ |
847 | to->di_gid = be32_to_cpu(from->di_gid); | 488 | goto again; |
848 | to->di_nlink = be32_to_cpu(from->di_nlink); | 489 | } |
849 | to->di_projid_lo = be16_to_cpu(from->di_projid_lo); | 490 | } else { |
850 | to->di_projid_hi = be16_to_cpu(from->di_projid_hi); | 491 | xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1)); |
851 | memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); | ||
852 | to->di_flushiter = be16_to_cpu(from->di_flushiter); | ||
853 | to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec); | ||
854 | to->di_atime.t_nsec = be32_to_cpu(from->di_atime.t_nsec); | ||
855 | to->di_mtime.t_sec = be32_to_cpu(from->di_mtime.t_sec); | ||
856 | to->di_mtime.t_nsec = be32_to_cpu(from->di_mtime.t_nsec); | ||
857 | to->di_ctime.t_sec = be32_to_cpu(from->di_ctime.t_sec); | ||
858 | to->di_ctime.t_nsec = be32_to_cpu(from->di_ctime.t_nsec); | ||
859 | to->di_size = be64_to_cpu(from->di_size); | ||
860 | to->di_nblocks = be64_to_cpu(from->di_nblocks); | ||
861 | to->di_extsize = be32_to_cpu(from->di_extsize); | ||
862 | to->di_nextents = be32_to_cpu(from->di_nextents); | ||
863 | to->di_anextents = be16_to_cpu(from->di_anextents); | ||
864 | to->di_forkoff = from->di_forkoff; | ||
865 | to->di_aformat = from->di_aformat; | ||
866 | to->di_dmevmask = be32_to_cpu(from->di_dmevmask); | ||
867 | to->di_dmstate = be16_to_cpu(from->di_dmstate); | ||
868 | to->di_flags = be16_to_cpu(from->di_flags); | ||
869 | to->di_gen = be32_to_cpu(from->di_gen); | ||
870 | |||
871 | if (to->di_version == 3) { | ||
872 | to->di_changecount = be64_to_cpu(from->di_changecount); | ||
873 | to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec); | ||
874 | to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec); | ||
875 | to->di_flags2 = be64_to_cpu(from->di_flags2); | ||
876 | to->di_ino = be64_to_cpu(from->di_ino); | ||
877 | to->di_lsn = be64_to_cpu(from->di_lsn); | ||
878 | memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); | ||
879 | uuid_copy(&to->di_uuid, &from->di_uuid); | ||
880 | } | 492 | } |
881 | } | 493 | } |
882 | 494 | ||
495 | |||
883 | void | 496 | void |
884 | xfs_dinode_to_disk( | 497 | __xfs_iflock( |
885 | xfs_dinode_t *to, | 498 | struct xfs_inode *ip) |
886 | xfs_icdinode_t *from) | ||
887 | { | 499 | { |
888 | to->di_magic = cpu_to_be16(from->di_magic); | 500 | wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT); |
889 | to->di_mode = cpu_to_be16(from->di_mode); | 501 | DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT); |
890 | to->di_version = from ->di_version; | 502 | |
891 | to->di_format = from->di_format; | 503 | do { |
892 | to->di_onlink = cpu_to_be16(from->di_onlink); | 504 | prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE); |
893 | to->di_uid = cpu_to_be32(from->di_uid); | 505 | if (xfs_isiflocked(ip)) |
894 | to->di_gid = cpu_to_be32(from->di_gid); | 506 | io_schedule(); |
895 | to->di_nlink = cpu_to_be32(from->di_nlink); | 507 | } while (!xfs_iflock_nowait(ip)); |
896 | to->di_projid_lo = cpu_to_be16(from->di_projid_lo); | 508 | |
897 | to->di_projid_hi = cpu_to_be16(from->di_projid_hi); | 509 | finish_wait(wq, &wait.wait); |
898 | memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); | ||
899 | to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec); | ||
900 | to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec); | ||
901 | to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec); | ||
902 | to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec); | ||
903 | to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec); | ||
904 | to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec); | ||
905 | to->di_size = cpu_to_be64(from->di_size); | ||
906 | to->di_nblocks = cpu_to_be64(from->di_nblocks); | ||
907 | to->di_extsize = cpu_to_be32(from->di_extsize); | ||
908 | to->di_nextents = cpu_to_be32(from->di_nextents); | ||
909 | to->di_anextents = cpu_to_be16(from->di_anextents); | ||
910 | to->di_forkoff = from->di_forkoff; | ||
911 | to->di_aformat = from->di_aformat; | ||
912 | to->di_dmevmask = cpu_to_be32(from->di_dmevmask); | ||
913 | to->di_dmstate = cpu_to_be16(from->di_dmstate); | ||
914 | to->di_flags = cpu_to_be16(from->di_flags); | ||
915 | to->di_gen = cpu_to_be32(from->di_gen); | ||
916 | |||
917 | if (from->di_version == 3) { | ||
918 | to->di_changecount = cpu_to_be64(from->di_changecount); | ||
919 | to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec); | ||
920 | to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec); | ||
921 | to->di_flags2 = cpu_to_be64(from->di_flags2); | ||
922 | to->di_ino = cpu_to_be64(from->di_ino); | ||
923 | to->di_lsn = cpu_to_be64(from->di_lsn); | ||
924 | memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); | ||
925 | uuid_copy(&to->di_uuid, &from->di_uuid); | ||
926 | to->di_flushiter = 0; | ||
927 | } else { | ||
928 | to->di_flushiter = cpu_to_be16(from->di_flushiter); | ||
929 | } | ||
930 | } | 510 | } |
931 | 511 | ||
932 | STATIC uint | 512 | STATIC uint |
@@ -987,235 +567,50 @@ xfs_dic2xflags( | |||
987 | (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0); | 567 | (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0); |
988 | } | 568 | } |
989 | 569 | ||
990 | static bool | ||
991 | xfs_dinode_verify( | ||
992 | struct xfs_mount *mp, | ||
993 | struct xfs_inode *ip, | ||
994 | struct xfs_dinode *dip) | ||
995 | { | ||
996 | if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) | ||
997 | return false; | ||
998 | |||
999 | /* only version 3 or greater inodes are extensively verified here */ | ||
1000 | if (dip->di_version < 3) | ||
1001 | return true; | ||
1002 | |||
1003 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | ||
1004 | return false; | ||
1005 | if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, | ||
1006 | offsetof(struct xfs_dinode, di_crc))) | ||
1007 | return false; | ||
1008 | if (be64_to_cpu(dip->di_ino) != ip->i_ino) | ||
1009 | return false; | ||
1010 | if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_uuid)) | ||
1011 | return false; | ||
1012 | return true; | ||
1013 | } | ||
1014 | |||
1015 | void | ||
1016 | xfs_dinode_calc_crc( | ||
1017 | struct xfs_mount *mp, | ||
1018 | struct xfs_dinode *dip) | ||
1019 | { | ||
1020 | __uint32_t crc; | ||
1021 | |||
1022 | if (dip->di_version < 3) | ||
1023 | return; | ||
1024 | |||
1025 | ASSERT(xfs_sb_version_hascrc(&mp->m_sb)); | ||
1026 | crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize, | ||
1027 | offsetof(struct xfs_dinode, di_crc)); | ||
1028 | dip->di_crc = xfs_end_cksum(crc); | ||
1029 | } | ||
1030 | |||
1031 | /* | 570 | /* |
1032 | * Read the disk inode attributes into the in-core inode structure. | 571 | * Lookups up an inode from "name". If ci_name is not NULL, then a CI match |
1033 | * | 572 | * is allowed, otherwise it has to be an exact match. If a CI match is found, |
1034 | * For version 5 superblocks, if we are initialising a new inode and we are not | 573 | * ci_name->name will point to a the actual name (caller must free) or |
1035 | * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new | 574 | * will be set to NULL if an exact match is found. |
1036 | * inode core with a random generation number. If we are keeping inodes around, | ||
1037 | * we need to read the inode cluster to get the existing generation number off | ||
1038 | * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode | ||
1039 | * format) then log recovery is dependent on the di_flushiter field being | ||
1040 | * initialised from the current on-disk value and hence we must also read the | ||
1041 | * inode off disk. | ||
1042 | */ | 575 | */ |
1043 | int | 576 | int |
1044 | xfs_iread( | 577 | xfs_lookup( |
1045 | xfs_mount_t *mp, | 578 | xfs_inode_t *dp, |
1046 | xfs_trans_t *tp, | 579 | struct xfs_name *name, |
1047 | xfs_inode_t *ip, | 580 | xfs_inode_t **ipp, |
1048 | uint iget_flags) | 581 | struct xfs_name *ci_name) |
1049 | { | 582 | { |
1050 | xfs_buf_t *bp; | 583 | xfs_ino_t inum; |
1051 | xfs_dinode_t *dip; | 584 | int error; |
1052 | int error; | 585 | uint lock_mode; |
1053 | |||
1054 | /* | ||
1055 | * Fill in the location information in the in-core inode. | ||
1056 | */ | ||
1057 | error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags); | ||
1058 | if (error) | ||
1059 | return error; | ||
1060 | |||
1061 | /* shortcut IO on inode allocation if possible */ | ||
1062 | if ((iget_flags & XFS_IGET_CREATE) && | ||
1063 | xfs_sb_version_hascrc(&mp->m_sb) && | ||
1064 | !(mp->m_flags & XFS_MOUNT_IKEEP)) { | ||
1065 | /* initialise the on-disk inode core */ | ||
1066 | memset(&ip->i_d, 0, sizeof(ip->i_d)); | ||
1067 | ip->i_d.di_magic = XFS_DINODE_MAGIC; | ||
1068 | ip->i_d.di_gen = prandom_u32(); | ||
1069 | if (xfs_sb_version_hascrc(&mp->m_sb)) { | ||
1070 | ip->i_d.di_version = 3; | ||
1071 | ip->i_d.di_ino = ip->i_ino; | ||
1072 | uuid_copy(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid); | ||
1073 | } else | ||
1074 | ip->i_d.di_version = 2; | ||
1075 | return 0; | ||
1076 | } | ||
1077 | |||
1078 | /* | ||
1079 | * Get pointers to the on-disk inode and the buffer containing it. | ||
1080 | */ | ||
1081 | error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags); | ||
1082 | if (error) | ||
1083 | return error; | ||
1084 | 586 | ||
1085 | /* even unallocated inodes are verified */ | 587 | trace_xfs_lookup(dp, name); |
1086 | if (!xfs_dinode_verify(mp, ip, dip)) { | ||
1087 | xfs_alert(mp, "%s: validation failed for inode %lld failed", | ||
1088 | __func__, ip->i_ino); | ||
1089 | 588 | ||
1090 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip); | 589 | if (XFS_FORCED_SHUTDOWN(dp->i_mount)) |
1091 | error = XFS_ERROR(EFSCORRUPTED); | 590 | return XFS_ERROR(EIO); |
1092 | goto out_brelse; | ||
1093 | } | ||
1094 | 591 | ||
1095 | /* | 592 | lock_mode = xfs_ilock_map_shared(dp); |
1096 | * If the on-disk inode is already linked to a directory | 593 | error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name); |
1097 | * entry, copy all of the inode into the in-core inode. | 594 | xfs_iunlock_map_shared(dp, lock_mode); |
1098 | * xfs_iformat() handles copying in the inode format | ||
1099 | * specific information. | ||
1100 | * Otherwise, just get the truly permanent information. | ||
1101 | */ | ||
1102 | if (dip->di_mode) { | ||
1103 | xfs_dinode_from_disk(&ip->i_d, dip); | ||
1104 | error = xfs_iformat(ip, dip); | ||
1105 | if (error) { | ||
1106 | #ifdef DEBUG | ||
1107 | xfs_alert(mp, "%s: xfs_iformat() returned error %d", | ||
1108 | __func__, error); | ||
1109 | #endif /* DEBUG */ | ||
1110 | goto out_brelse; | ||
1111 | } | ||
1112 | } else { | ||
1113 | /* | ||
1114 | * Partial initialisation of the in-core inode. Just the bits | ||
1115 | * that xfs_ialloc won't overwrite or relies on being correct. | ||
1116 | */ | ||
1117 | ip->i_d.di_magic = be16_to_cpu(dip->di_magic); | ||
1118 | ip->i_d.di_version = dip->di_version; | ||
1119 | ip->i_d.di_gen = be32_to_cpu(dip->di_gen); | ||
1120 | ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter); | ||
1121 | |||
1122 | if (dip->di_version == 3) { | ||
1123 | ip->i_d.di_ino = be64_to_cpu(dip->di_ino); | ||
1124 | uuid_copy(&ip->i_d.di_uuid, &dip->di_uuid); | ||
1125 | } | ||
1126 | 595 | ||
1127 | /* | 596 | if (error) |
1128 | * Make sure to pull in the mode here as well in | 597 | goto out; |
1129 | * case the inode is released without being used. | ||
1130 | * This ensures that xfs_inactive() will see that | ||
1131 | * the inode is already free and not try to mess | ||
1132 | * with the uninitialized part of it. | ||
1133 | */ | ||
1134 | ip->i_d.di_mode = 0; | ||
1135 | } | ||
1136 | |||
1137 | /* | ||
1138 | * The inode format changed when we moved the link count and | ||
1139 | * made it 32 bits long. If this is an old format inode, | ||
1140 | * convert it in memory to look like a new one. If it gets | ||
1141 | * flushed to disk we will convert back before flushing or | ||
1142 | * logging it. We zero out the new projid field and the old link | ||
1143 | * count field. We'll handle clearing the pad field (the remains | ||
1144 | * of the old uuid field) when we actually convert the inode to | ||
1145 | * the new format. We don't change the version number so that we | ||
1146 | * can distinguish this from a real new format inode. | ||
1147 | */ | ||
1148 | if (ip->i_d.di_version == 1) { | ||
1149 | ip->i_d.di_nlink = ip->i_d.di_onlink; | ||
1150 | ip->i_d.di_onlink = 0; | ||
1151 | xfs_set_projid(ip, 0); | ||
1152 | } | ||
1153 | 598 | ||
1154 | ip->i_delayed_blks = 0; | 599 | error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp); |
600 | if (error) | ||
601 | goto out_free_name; | ||
1155 | 602 | ||
1156 | /* | 603 | return 0; |
1157 | * Mark the buffer containing the inode as something to keep | ||
1158 | * around for a while. This helps to keep recently accessed | ||
1159 | * meta-data in-core longer. | ||
1160 | */ | ||
1161 | xfs_buf_set_ref(bp, XFS_INO_REF); | ||
1162 | 604 | ||
1163 | /* | 605 | out_free_name: |
1164 | * Use xfs_trans_brelse() to release the buffer containing the on-disk | 606 | if (ci_name) |
1165 | * inode, because it was acquired with xfs_trans_read_buf() in | 607 | kmem_free(ci_name->name); |
1166 | * xfs_imap_to_bp() above. If tp is NULL, this is just a normal | 608 | out: |
1167 | * brelse(). If we're within a transaction, then xfs_trans_brelse() | 609 | *ipp = NULL; |
1168 | * will only release the buffer if it is not dirty within the | ||
1169 | * transaction. It will be OK to release the buffer in this case, | ||
1170 | * because inodes on disk are never destroyed and we will be locking the | ||
1171 | * new in-core inode before putting it in the cache where other | ||
1172 | * processes can find it. Thus we don't have to worry about the inode | ||
1173 | * being changed just because we released the buffer. | ||
1174 | */ | ||
1175 | out_brelse: | ||
1176 | xfs_trans_brelse(tp, bp); | ||
1177 | return error; | 610 | return error; |
1178 | } | 611 | } |
1179 | 612 | ||
1180 | /* | 613 | /* |
1181 | * Read in extents from a btree-format inode. | ||
1182 | * Allocate and fill in if_extents. Real work is done in xfs_bmap.c. | ||
1183 | */ | ||
1184 | int | ||
1185 | xfs_iread_extents( | ||
1186 | xfs_trans_t *tp, | ||
1187 | xfs_inode_t *ip, | ||
1188 | int whichfork) | ||
1189 | { | ||
1190 | int error; | ||
1191 | xfs_ifork_t *ifp; | ||
1192 | xfs_extnum_t nextents; | ||
1193 | |||
1194 | if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { | ||
1195 | XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW, | ||
1196 | ip->i_mount); | ||
1197 | return XFS_ERROR(EFSCORRUPTED); | ||
1198 | } | ||
1199 | nextents = XFS_IFORK_NEXTENTS(ip, whichfork); | ||
1200 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
1201 | |||
1202 | /* | ||
1203 | * We know that the size is valid (it's checked in iformat_btree) | ||
1204 | */ | ||
1205 | ifp->if_bytes = ifp->if_real_bytes = 0; | ||
1206 | ifp->if_flags |= XFS_IFEXTENTS; | ||
1207 | xfs_iext_add(ifp, 0, nextents); | ||
1208 | error = xfs_bmap_read_extents(tp, ip, whichfork); | ||
1209 | if (error) { | ||
1210 | xfs_iext_destroy(ifp); | ||
1211 | ifp->if_flags &= ~XFS_IFEXTENTS; | ||
1212 | return error; | ||
1213 | } | ||
1214 | xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip)); | ||
1215 | return 0; | ||
1216 | } | ||
1217 | |||
1218 | /* | ||
1219 | * Allocate an inode on disk and return a copy of its in-core version. | 614 | * Allocate an inode on disk and return a copy of its in-core version. |
1220 | * The in-core inode is locked exclusively. Set mode, nlink, and rdev | 615 | * The in-core inode is locked exclusively. Set mode, nlink, and rdev |
1221 | * appropriately within the inode. The uid and gid for the inode are | 616 | * appropriately within the inode. The uid and gid for the inode are |
@@ -1295,8 +690,8 @@ xfs_ialloc( | |||
1295 | ip->i_d.di_onlink = 0; | 690 | ip->i_d.di_onlink = 0; |
1296 | ip->i_d.di_nlink = nlink; | 691 | ip->i_d.di_nlink = nlink; |
1297 | ASSERT(ip->i_d.di_nlink == nlink); | 692 | ASSERT(ip->i_d.di_nlink == nlink); |
1298 | ip->i_d.di_uid = current_fsuid(); | 693 | ip->i_d.di_uid = xfs_kuid_to_uid(current_fsuid()); |
1299 | ip->i_d.di_gid = current_fsgid(); | 694 | ip->i_d.di_gid = xfs_kgid_to_gid(current_fsgid()); |
1300 | xfs_set_projid(ip, prid); | 695 | xfs_set_projid(ip, prid); |
1301 | memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); | 696 | memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); |
1302 | 697 | ||
@@ -1335,7 +730,7 @@ xfs_ialloc( | |||
1335 | */ | 730 | */ |
1336 | if ((irix_sgid_inherit) && | 731 | if ((irix_sgid_inherit) && |
1337 | (ip->i_d.di_mode & S_ISGID) && | 732 | (ip->i_d.di_mode & S_ISGID) && |
1338 | (!in_group_p((gid_t)ip->i_d.di_gid))) { | 733 | (!in_group_p(xfs_gid_to_kgid(ip->i_d.di_gid)))) { |
1339 | ip->i_d.di_mode &= ~S_ISGID; | 734 | ip->i_d.di_mode &= ~S_ISGID; |
1340 | } | 735 | } |
1341 | 736 | ||
@@ -1467,6 +862,583 @@ xfs_ialloc( | |||
1467 | } | 862 | } |
1468 | 863 | ||
1469 | /* | 864 | /* |
865 | * Allocates a new inode from disk and return a pointer to the | ||
866 | * incore copy. This routine will internally commit the current | ||
867 | * transaction and allocate a new one if the Space Manager needed | ||
868 | * to do an allocation to replenish the inode free-list. | ||
869 | * | ||
870 | * This routine is designed to be called from xfs_create and | ||
871 | * xfs_create_dir. | ||
872 | * | ||
873 | */ | ||
874 | int | ||
875 | xfs_dir_ialloc( | ||
876 | xfs_trans_t **tpp, /* input: current transaction; | ||
877 | output: may be a new transaction. */ | ||
878 | xfs_inode_t *dp, /* directory within whose allocate | ||
879 | the inode. */ | ||
880 | umode_t mode, | ||
881 | xfs_nlink_t nlink, | ||
882 | xfs_dev_t rdev, | ||
883 | prid_t prid, /* project id */ | ||
884 | int okalloc, /* ok to allocate new space */ | ||
885 | xfs_inode_t **ipp, /* pointer to inode; it will be | ||
886 | locked. */ | ||
887 | int *committed) | ||
888 | |||
889 | { | ||
890 | xfs_trans_t *tp; | ||
891 | xfs_trans_t *ntp; | ||
892 | xfs_inode_t *ip; | ||
893 | xfs_buf_t *ialloc_context = NULL; | ||
894 | int code; | ||
895 | void *dqinfo; | ||
896 | uint tflags; | ||
897 | |||
898 | tp = *tpp; | ||
899 | ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); | ||
900 | |||
901 | /* | ||
902 | * xfs_ialloc will return a pointer to an incore inode if | ||
903 | * the Space Manager has an available inode on the free | ||
904 | * list. Otherwise, it will do an allocation and replenish | ||
905 | * the freelist. Since we can only do one allocation per | ||
906 | * transaction without deadlocks, we will need to commit the | ||
907 | * current transaction and start a new one. We will then | ||
908 | * need to call xfs_ialloc again to get the inode. | ||
909 | * | ||
910 | * If xfs_ialloc did an allocation to replenish the freelist, | ||
911 | * it returns the bp containing the head of the freelist as | ||
912 | * ialloc_context. We will hold a lock on it across the | ||
913 | * transaction commit so that no other process can steal | ||
914 | * the inode(s) that we've just allocated. | ||
915 | */ | ||
916 | code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc, | ||
917 | &ialloc_context, &ip); | ||
918 | |||
919 | /* | ||
920 | * Return an error if we were unable to allocate a new inode. | ||
921 | * This should only happen if we run out of space on disk or | ||
922 | * encounter a disk error. | ||
923 | */ | ||
924 | if (code) { | ||
925 | *ipp = NULL; | ||
926 | return code; | ||
927 | } | ||
928 | if (!ialloc_context && !ip) { | ||
929 | *ipp = NULL; | ||
930 | return XFS_ERROR(ENOSPC); | ||
931 | } | ||
932 | |||
933 | /* | ||
934 | * If the AGI buffer is non-NULL, then we were unable to get an | ||
935 | * inode in one operation. We need to commit the current | ||
936 | * transaction and call xfs_ialloc() again. It is guaranteed | ||
937 | * to succeed the second time. | ||
938 | */ | ||
939 | if (ialloc_context) { | ||
940 | struct xfs_trans_res tres; | ||
941 | |||
942 | /* | ||
943 | * Normally, xfs_trans_commit releases all the locks. | ||
944 | * We call bhold to hang on to the ialloc_context across | ||
945 | * the commit. Holding this buffer prevents any other | ||
946 | * processes from doing any allocations in this | ||
947 | * allocation group. | ||
948 | */ | ||
949 | xfs_trans_bhold(tp, ialloc_context); | ||
950 | /* | ||
951 | * Save the log reservation so we can use | ||
952 | * them in the next transaction. | ||
953 | */ | ||
954 | tres.tr_logres = xfs_trans_get_log_res(tp); | ||
955 | tres.tr_logcount = xfs_trans_get_log_count(tp); | ||
956 | |||
957 | /* | ||
958 | * We want the quota changes to be associated with the next | ||
959 | * transaction, NOT this one. So, detach the dqinfo from this | ||
960 | * and attach it to the next transaction. | ||
961 | */ | ||
962 | dqinfo = NULL; | ||
963 | tflags = 0; | ||
964 | if (tp->t_dqinfo) { | ||
965 | dqinfo = (void *)tp->t_dqinfo; | ||
966 | tp->t_dqinfo = NULL; | ||
967 | tflags = tp->t_flags & XFS_TRANS_DQ_DIRTY; | ||
968 | tp->t_flags &= ~(XFS_TRANS_DQ_DIRTY); | ||
969 | } | ||
970 | |||
971 | ntp = xfs_trans_dup(tp); | ||
972 | code = xfs_trans_commit(tp, 0); | ||
973 | tp = ntp; | ||
974 | if (committed != NULL) { | ||
975 | *committed = 1; | ||
976 | } | ||
977 | /* | ||
978 | * If we get an error during the commit processing, | ||
979 | * release the buffer that is still held and return | ||
980 | * to the caller. | ||
981 | */ | ||
982 | if (code) { | ||
983 | xfs_buf_relse(ialloc_context); | ||
984 | if (dqinfo) { | ||
985 | tp->t_dqinfo = dqinfo; | ||
986 | xfs_trans_free_dqinfo(tp); | ||
987 | } | ||
988 | *tpp = ntp; | ||
989 | *ipp = NULL; | ||
990 | return code; | ||
991 | } | ||
992 | |||
993 | /* | ||
994 | * transaction commit worked ok so we can drop the extra ticket | ||
995 | * reference that we gained in xfs_trans_dup() | ||
996 | */ | ||
997 | xfs_log_ticket_put(tp->t_ticket); | ||
998 | tres.tr_logflags = XFS_TRANS_PERM_LOG_RES; | ||
999 | code = xfs_trans_reserve(tp, &tres, 0, 0); | ||
1000 | |||
1001 | /* | ||
1002 | * Re-attach the quota info that we detached from prev trx. | ||
1003 | */ | ||
1004 | if (dqinfo) { | ||
1005 | tp->t_dqinfo = dqinfo; | ||
1006 | tp->t_flags |= tflags; | ||
1007 | } | ||
1008 | |||
1009 | if (code) { | ||
1010 | xfs_buf_relse(ialloc_context); | ||
1011 | *tpp = ntp; | ||
1012 | *ipp = NULL; | ||
1013 | return code; | ||
1014 | } | ||
1015 | xfs_trans_bjoin(tp, ialloc_context); | ||
1016 | |||
1017 | /* | ||
1018 | * Call ialloc again. Since we've locked out all | ||
1019 | * other allocations in this allocation group, | ||
1020 | * this call should always succeed. | ||
1021 | */ | ||
1022 | code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, | ||
1023 | okalloc, &ialloc_context, &ip); | ||
1024 | |||
1025 | /* | ||
1026 | * If we get an error at this point, return to the caller | ||
1027 | * so that the current transaction can be aborted. | ||
1028 | */ | ||
1029 | if (code) { | ||
1030 | *tpp = tp; | ||
1031 | *ipp = NULL; | ||
1032 | return code; | ||
1033 | } | ||
1034 | ASSERT(!ialloc_context && ip); | ||
1035 | |||
1036 | } else { | ||
1037 | if (committed != NULL) | ||
1038 | *committed = 0; | ||
1039 | } | ||
1040 | |||
1041 | *ipp = ip; | ||
1042 | *tpp = tp; | ||
1043 | |||
1044 | return 0; | ||
1045 | } | ||
1046 | |||
1047 | /* | ||
1048 | * Decrement the link count on an inode & log the change. | ||
1049 | * If this causes the link count to go to zero, initiate the | ||
1050 | * logging activity required to truncate a file. | ||
1051 | */ | ||
1052 | int /* error */ | ||
1053 | xfs_droplink( | ||
1054 | xfs_trans_t *tp, | ||
1055 | xfs_inode_t *ip) | ||
1056 | { | ||
1057 | int error; | ||
1058 | |||
1059 | xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); | ||
1060 | |||
1061 | ASSERT (ip->i_d.di_nlink > 0); | ||
1062 | ip->i_d.di_nlink--; | ||
1063 | drop_nlink(VFS_I(ip)); | ||
1064 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | ||
1065 | |||
1066 | error = 0; | ||
1067 | if (ip->i_d.di_nlink == 0) { | ||
1068 | /* | ||
1069 | * We're dropping the last link to this file. | ||
1070 | * Move the on-disk inode to the AGI unlinked list. | ||
1071 | * From xfs_inactive() we will pull the inode from | ||
1072 | * the list and free it. | ||
1073 | */ | ||
1074 | error = xfs_iunlink(tp, ip); | ||
1075 | } | ||
1076 | return error; | ||
1077 | } | ||
1078 | |||
1079 | /* | ||
1080 | * This gets called when the inode's version needs to be changed from 1 to 2. | ||
1081 | * Currently this happens when the nlink field overflows the old 16-bit value | ||
1082 | * or when chproj is called to change the project for the first time. | ||
1083 | * As a side effect the superblock version will also get rev'd | ||
1084 | * to contain the NLINK bit. | ||
1085 | */ | ||
1086 | void | ||
1087 | xfs_bump_ino_vers2( | ||
1088 | xfs_trans_t *tp, | ||
1089 | xfs_inode_t *ip) | ||
1090 | { | ||
1091 | xfs_mount_t *mp; | ||
1092 | |||
1093 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | ||
1094 | ASSERT(ip->i_d.di_version == 1); | ||
1095 | |||
1096 | ip->i_d.di_version = 2; | ||
1097 | ip->i_d.di_onlink = 0; | ||
1098 | memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); | ||
1099 | mp = tp->t_mountp; | ||
1100 | if (!xfs_sb_version_hasnlink(&mp->m_sb)) { | ||
1101 | spin_lock(&mp->m_sb_lock); | ||
1102 | if (!xfs_sb_version_hasnlink(&mp->m_sb)) { | ||
1103 | xfs_sb_version_addnlink(&mp->m_sb); | ||
1104 | spin_unlock(&mp->m_sb_lock); | ||
1105 | xfs_mod_sb(tp, XFS_SB_VERSIONNUM); | ||
1106 | } else { | ||
1107 | spin_unlock(&mp->m_sb_lock); | ||
1108 | } | ||
1109 | } | ||
1110 | /* Caller must log the inode */ | ||
1111 | } | ||
1112 | |||
1113 | /* | ||
1114 | * Increment the link count on an inode & log the change. | ||
1115 | */ | ||
1116 | int | ||
1117 | xfs_bumplink( | ||
1118 | xfs_trans_t *tp, | ||
1119 | xfs_inode_t *ip) | ||
1120 | { | ||
1121 | xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); | ||
1122 | |||
1123 | ASSERT(ip->i_d.di_nlink > 0); | ||
1124 | ip->i_d.di_nlink++; | ||
1125 | inc_nlink(VFS_I(ip)); | ||
1126 | if ((ip->i_d.di_version == 1) && | ||
1127 | (ip->i_d.di_nlink > XFS_MAXLINK_1)) { | ||
1128 | /* | ||
1129 | * The inode has increased its number of links beyond | ||
1130 | * what can fit in an old format inode. It now needs | ||
1131 | * to be converted to a version 2 inode with a 32 bit | ||
1132 | * link count. If this is the first inode in the file | ||
1133 | * system to do this, then we need to bump the superblock | ||
1134 | * version number as well. | ||
1135 | */ | ||
1136 | xfs_bump_ino_vers2(tp, ip); | ||
1137 | } | ||
1138 | |||
1139 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | ||
1140 | return 0; | ||
1141 | } | ||
1142 | |||
1143 | int | ||
1144 | xfs_create( | ||
1145 | xfs_inode_t *dp, | ||
1146 | struct xfs_name *name, | ||
1147 | umode_t mode, | ||
1148 | xfs_dev_t rdev, | ||
1149 | xfs_inode_t **ipp) | ||
1150 | { | ||
1151 | int is_dir = S_ISDIR(mode); | ||
1152 | struct xfs_mount *mp = dp->i_mount; | ||
1153 | struct xfs_inode *ip = NULL; | ||
1154 | struct xfs_trans *tp = NULL; | ||
1155 | int error; | ||
1156 | xfs_bmap_free_t free_list; | ||
1157 | xfs_fsblock_t first_block; | ||
1158 | bool unlock_dp_on_error = false; | ||
1159 | uint cancel_flags; | ||
1160 | int committed; | ||
1161 | prid_t prid; | ||
1162 | struct xfs_dquot *udqp = NULL; | ||
1163 | struct xfs_dquot *gdqp = NULL; | ||
1164 | struct xfs_dquot *pdqp = NULL; | ||
1165 | struct xfs_trans_res tres; | ||
1166 | uint resblks; | ||
1167 | |||
1168 | trace_xfs_create(dp, name); | ||
1169 | |||
1170 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
1171 | return XFS_ERROR(EIO); | ||
1172 | |||
1173 | if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) | ||
1174 | prid = xfs_get_projid(dp); | ||
1175 | else | ||
1176 | prid = XFS_PROJID_DEFAULT; | ||
1177 | |||
1178 | /* | ||
1179 | * Make sure that we have allocated dquot(s) on disk. | ||
1180 | */ | ||
1181 | error = xfs_qm_vop_dqalloc(dp, xfs_kuid_to_uid(current_fsuid()), | ||
1182 | xfs_kgid_to_gid(current_fsgid()), prid, | ||
1183 | XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, | ||
1184 | &udqp, &gdqp, &pdqp); | ||
1185 | if (error) | ||
1186 | return error; | ||
1187 | |||
1188 | if (is_dir) { | ||
1189 | rdev = 0; | ||
1190 | resblks = XFS_MKDIR_SPACE_RES(mp, name->len); | ||
1191 | tres.tr_logres = M_RES(mp)->tr_mkdir.tr_logres; | ||
1192 | tres.tr_logcount = XFS_MKDIR_LOG_COUNT; | ||
1193 | tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR); | ||
1194 | } else { | ||
1195 | resblks = XFS_CREATE_SPACE_RES(mp, name->len); | ||
1196 | tres.tr_logres = M_RES(mp)->tr_create.tr_logres; | ||
1197 | tres.tr_logcount = XFS_CREATE_LOG_COUNT; | ||
1198 | tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE); | ||
1199 | } | ||
1200 | |||
1201 | cancel_flags = XFS_TRANS_RELEASE_LOG_RES; | ||
1202 | |||
1203 | /* | ||
1204 | * Initially assume that the file does not exist and | ||
1205 | * reserve the resources for that case. If that is not | ||
1206 | * the case we'll drop the one we have and get a more | ||
1207 | * appropriate transaction later. | ||
1208 | */ | ||
1209 | tres.tr_logflags = XFS_TRANS_PERM_LOG_RES; | ||
1210 | error = xfs_trans_reserve(tp, &tres, resblks, 0); | ||
1211 | if (error == ENOSPC) { | ||
1212 | /* flush outstanding delalloc blocks and retry */ | ||
1213 | xfs_flush_inodes(mp); | ||
1214 | error = xfs_trans_reserve(tp, &tres, resblks, 0); | ||
1215 | } | ||
1216 | if (error == ENOSPC) { | ||
1217 | /* No space at all so try a "no-allocation" reservation */ | ||
1218 | resblks = 0; | ||
1219 | error = xfs_trans_reserve(tp, &tres, 0, 0); | ||
1220 | } | ||
1221 | if (error) { | ||
1222 | cancel_flags = 0; | ||
1223 | goto out_trans_cancel; | ||
1224 | } | ||
1225 | |||
1226 | xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); | ||
1227 | unlock_dp_on_error = true; | ||
1228 | |||
1229 | xfs_bmap_init(&free_list, &first_block); | ||
1230 | |||
1231 | /* | ||
1232 | * Reserve disk quota and the inode. | ||
1233 | */ | ||
1234 | error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, | ||
1235 | pdqp, resblks, 1, 0); | ||
1236 | if (error) | ||
1237 | goto out_trans_cancel; | ||
1238 | |||
1239 | error = xfs_dir_canenter(tp, dp, name, resblks); | ||
1240 | if (error) | ||
1241 | goto out_trans_cancel; | ||
1242 | |||
1243 | /* | ||
1244 | * A newly created regular or special file just has one directory | ||
1245 | * entry pointing to them, but a directory also the "." entry | ||
1246 | * pointing to itself. | ||
1247 | */ | ||
1248 | error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, | ||
1249 | prid, resblks > 0, &ip, &committed); | ||
1250 | if (error) { | ||
1251 | if (error == ENOSPC) | ||
1252 | goto out_trans_cancel; | ||
1253 | goto out_trans_abort; | ||
1254 | } | ||
1255 | |||
1256 | /* | ||
1257 | * Now we join the directory inode to the transaction. We do not do it | ||
1258 | * earlier because xfs_dir_ialloc might commit the previous transaction | ||
1259 | * (and release all the locks). An error from here on will result in | ||
1260 | * the transaction cancel unlocking dp so don't do it explicitly in the | ||
1261 | * error path. | ||
1262 | */ | ||
1263 | xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); | ||
1264 | unlock_dp_on_error = false; | ||
1265 | |||
1266 | error = xfs_dir_createname(tp, dp, name, ip->i_ino, | ||
1267 | &first_block, &free_list, resblks ? | ||
1268 | resblks - XFS_IALLOC_SPACE_RES(mp) : 0); | ||
1269 | if (error) { | ||
1270 | ASSERT(error != ENOSPC); | ||
1271 | goto out_trans_abort; | ||
1272 | } | ||
1273 | xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | ||
1274 | xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); | ||
1275 | |||
1276 | if (is_dir) { | ||
1277 | error = xfs_dir_init(tp, ip, dp); | ||
1278 | if (error) | ||
1279 | goto out_bmap_cancel; | ||
1280 | |||
1281 | error = xfs_bumplink(tp, dp); | ||
1282 | if (error) | ||
1283 | goto out_bmap_cancel; | ||
1284 | } | ||
1285 | |||
1286 | /* | ||
1287 | * If this is a synchronous mount, make sure that the | ||
1288 | * create transaction goes to disk before returning to | ||
1289 | * the user. | ||
1290 | */ | ||
1291 | if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) | ||
1292 | xfs_trans_set_sync(tp); | ||
1293 | |||
1294 | /* | ||
1295 | * Attach the dquot(s) to the inodes and modify them incore. | ||
1296 | * These ids of the inode couldn't have changed since the new | ||
1297 | * inode has been locked ever since it was created. | ||
1298 | */ | ||
1299 | xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp); | ||
1300 | |||
1301 | error = xfs_bmap_finish(&tp, &free_list, &committed); | ||
1302 | if (error) | ||
1303 | goto out_bmap_cancel; | ||
1304 | |||
1305 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | ||
1306 | if (error) | ||
1307 | goto out_release_inode; | ||
1308 | |||
1309 | xfs_qm_dqrele(udqp); | ||
1310 | xfs_qm_dqrele(gdqp); | ||
1311 | xfs_qm_dqrele(pdqp); | ||
1312 | |||
1313 | *ipp = ip; | ||
1314 | return 0; | ||
1315 | |||
1316 | out_bmap_cancel: | ||
1317 | xfs_bmap_cancel(&free_list); | ||
1318 | out_trans_abort: | ||
1319 | cancel_flags |= XFS_TRANS_ABORT; | ||
1320 | out_trans_cancel: | ||
1321 | xfs_trans_cancel(tp, cancel_flags); | ||
1322 | out_release_inode: | ||
1323 | /* | ||
1324 | * Wait until after the current transaction is aborted to | ||
1325 | * release the inode. This prevents recursive transactions | ||
1326 | * and deadlocks from xfs_inactive. | ||
1327 | */ | ||
1328 | if (ip) | ||
1329 | IRELE(ip); | ||
1330 | |||
1331 | xfs_qm_dqrele(udqp); | ||
1332 | xfs_qm_dqrele(gdqp); | ||
1333 | xfs_qm_dqrele(pdqp); | ||
1334 | |||
1335 | if (unlock_dp_on_error) | ||
1336 | xfs_iunlock(dp, XFS_ILOCK_EXCL); | ||
1337 | return error; | ||
1338 | } | ||
1339 | |||
1340 | int | ||
1341 | xfs_link( | ||
1342 | xfs_inode_t *tdp, | ||
1343 | xfs_inode_t *sip, | ||
1344 | struct xfs_name *target_name) | ||
1345 | { | ||
1346 | xfs_mount_t *mp = tdp->i_mount; | ||
1347 | xfs_trans_t *tp; | ||
1348 | int error; | ||
1349 | xfs_bmap_free_t free_list; | ||
1350 | xfs_fsblock_t first_block; | ||
1351 | int cancel_flags; | ||
1352 | int committed; | ||
1353 | int resblks; | ||
1354 | |||
1355 | trace_xfs_link(tdp, target_name); | ||
1356 | |||
1357 | ASSERT(!S_ISDIR(sip->i_d.di_mode)); | ||
1358 | |||
1359 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
1360 | return XFS_ERROR(EIO); | ||
1361 | |||
1362 | error = xfs_qm_dqattach(sip, 0); | ||
1363 | if (error) | ||
1364 | goto std_return; | ||
1365 | |||
1366 | error = xfs_qm_dqattach(tdp, 0); | ||
1367 | if (error) | ||
1368 | goto std_return; | ||
1369 | |||
1370 | tp = xfs_trans_alloc(mp, XFS_TRANS_LINK); | ||
1371 | cancel_flags = XFS_TRANS_RELEASE_LOG_RES; | ||
1372 | resblks = XFS_LINK_SPACE_RES(mp, target_name->len); | ||
1373 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, resblks, 0); | ||
1374 | if (error == ENOSPC) { | ||
1375 | resblks = 0; | ||
1376 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, 0, 0); | ||
1377 | } | ||
1378 | if (error) { | ||
1379 | cancel_flags = 0; | ||
1380 | goto error_return; | ||
1381 | } | ||
1382 | |||
1383 | xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); | ||
1384 | |||
1385 | xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); | ||
1386 | xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); | ||
1387 | |||
1388 | /* | ||
1389 | * If we are using project inheritance, we only allow hard link | ||
1390 | * creation in our tree when the project IDs are the same; else | ||
1391 | * the tree quota mechanism could be circumvented. | ||
1392 | */ | ||
1393 | if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && | ||
1394 | (xfs_get_projid(tdp) != xfs_get_projid(sip)))) { | ||
1395 | error = XFS_ERROR(EXDEV); | ||
1396 | goto error_return; | ||
1397 | } | ||
1398 | |||
1399 | error = xfs_dir_canenter(tp, tdp, target_name, resblks); | ||
1400 | if (error) | ||
1401 | goto error_return; | ||
1402 | |||
1403 | xfs_bmap_init(&free_list, &first_block); | ||
1404 | |||
1405 | error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino, | ||
1406 | &first_block, &free_list, resblks); | ||
1407 | if (error) | ||
1408 | goto abort_return; | ||
1409 | xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | ||
1410 | xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); | ||
1411 | |||
1412 | error = xfs_bumplink(tp, sip); | ||
1413 | if (error) | ||
1414 | goto abort_return; | ||
1415 | |||
1416 | /* | ||
1417 | * If this is a synchronous mount, make sure that the | ||
1418 | * link transaction goes to disk before returning to | ||
1419 | * the user. | ||
1420 | */ | ||
1421 | if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { | ||
1422 | xfs_trans_set_sync(tp); | ||
1423 | } | ||
1424 | |||
1425 | error = xfs_bmap_finish (&tp, &free_list, &committed); | ||
1426 | if (error) { | ||
1427 | xfs_bmap_cancel(&free_list); | ||
1428 | goto abort_return; | ||
1429 | } | ||
1430 | |||
1431 | return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | ||
1432 | |||
1433 | abort_return: | ||
1434 | cancel_flags |= XFS_TRANS_ABORT; | ||
1435 | error_return: | ||
1436 | xfs_trans_cancel(tp, cancel_flags); | ||
1437 | std_return: | ||
1438 | return error; | ||
1439 | } | ||
1440 | |||
1441 | /* | ||
1470 | * Free up the underlying blocks past new_size. The new size must be smaller | 1442 | * Free up the underlying blocks past new_size. The new size must be smaller |
1471 | * than the current size. This routine can be used both for the attribute and | 1443 | * than the current size. This routine can be used both for the attribute and |
1472 | * data fork, and does not modify the inode size, which is left to the caller. | 1444 | * data fork, and does not modify the inode size, which is left to the caller. |
@@ -1576,10 +1548,7 @@ xfs_itruncate_extents( | |||
1576 | * reference that we gained in xfs_trans_dup() | 1548 | * reference that we gained in xfs_trans_dup() |
1577 | */ | 1549 | */ |
1578 | xfs_log_ticket_put(tp->t_ticket); | 1550 | xfs_log_ticket_put(tp->t_ticket); |
1579 | error = xfs_trans_reserve(tp, 0, | 1551 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); |
1580 | XFS_ITRUNCATE_LOG_RES(mp), 0, | ||
1581 | XFS_TRANS_PERM_LOG_RES, | ||
1582 | XFS_ITRUNCATE_LOG_COUNT); | ||
1583 | if (error) | 1552 | if (error) |
1584 | goto out; | 1553 | goto out; |
1585 | } | 1554 | } |
@@ -1605,6 +1574,271 @@ out_bmap_cancel: | |||
1605 | goto out; | 1574 | goto out; |
1606 | } | 1575 | } |
1607 | 1576 | ||
1577 | int | ||
1578 | xfs_release( | ||
1579 | xfs_inode_t *ip) | ||
1580 | { | ||
1581 | xfs_mount_t *mp = ip->i_mount; | ||
1582 | int error; | ||
1583 | |||
1584 | if (!S_ISREG(ip->i_d.di_mode) || (ip->i_d.di_mode == 0)) | ||
1585 | return 0; | ||
1586 | |||
1587 | /* If this is a read-only mount, don't do this (would generate I/O) */ | ||
1588 | if (mp->m_flags & XFS_MOUNT_RDONLY) | ||
1589 | return 0; | ||
1590 | |||
1591 | if (!XFS_FORCED_SHUTDOWN(mp)) { | ||
1592 | int truncated; | ||
1593 | |||
1594 | /* | ||
1595 | * If we are using filestreams, and we have an unlinked | ||
1596 | * file that we are processing the last close on, then nothing | ||
1597 | * will be able to reopen and write to this file. Purge this | ||
1598 | * inode from the filestreams cache so that it doesn't delay | ||
1599 | * teardown of the inode. | ||
1600 | */ | ||
1601 | if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip)) | ||
1602 | xfs_filestream_deassociate(ip); | ||
1603 | |||
1604 | /* | ||
1605 | * If we previously truncated this file and removed old data | ||
1606 | * in the process, we want to initiate "early" writeout on | ||
1607 | * the last close. This is an attempt to combat the notorious | ||
1608 | * NULL files problem which is particularly noticeable from a | ||
1609 | * truncate down, buffered (re-)write (delalloc), followed by | ||
1610 | * a crash. What we are effectively doing here is | ||
1611 | * significantly reducing the time window where we'd otherwise | ||
1612 | * be exposed to that problem. | ||
1613 | */ | ||
1614 | truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); | ||
1615 | if (truncated) { | ||
1616 | xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE); | ||
1617 | if (VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) { | ||
1618 | error = -filemap_flush(VFS_I(ip)->i_mapping); | ||
1619 | if (error) | ||
1620 | return error; | ||
1621 | } | ||
1622 | } | ||
1623 | } | ||
1624 | |||
1625 | if (ip->i_d.di_nlink == 0) | ||
1626 | return 0; | ||
1627 | |||
1628 | if (xfs_can_free_eofblocks(ip, false)) { | ||
1629 | |||
1630 | /* | ||
1631 | * If we can't get the iolock just skip truncating the blocks | ||
1632 | * past EOF because we could deadlock with the mmap_sem | ||
1633 | * otherwise. We'll get another chance to drop them once the | ||
1634 | * last reference to the inode is dropped, so we'll never leak | ||
1635 | * blocks permanently. | ||
1636 | * | ||
1637 | * Further, check if the inode is being opened, written and | ||
1638 | * closed frequently and we have delayed allocation blocks | ||
1639 | * outstanding (e.g. streaming writes from the NFS server), | ||
1640 | * truncating the blocks past EOF will cause fragmentation to | ||
1641 | * occur. | ||
1642 | * | ||
1643 | * In this case don't do the truncation, either, but we have to | ||
1644 | * be careful how we detect this case. Blocks beyond EOF show | ||
1645 | * up as i_delayed_blks even when the inode is clean, so we | ||
1646 | * need to truncate them away first before checking for a dirty | ||
1647 | * release. Hence on the first dirty close we will still remove | ||
1648 | * the speculative allocation, but after that we will leave it | ||
1649 | * in place. | ||
1650 | */ | ||
1651 | if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE)) | ||
1652 | return 0; | ||
1653 | |||
1654 | error = xfs_free_eofblocks(mp, ip, true); | ||
1655 | if (error && error != EAGAIN) | ||
1656 | return error; | ||
1657 | |||
1658 | /* delalloc blocks after truncation means it really is dirty */ | ||
1659 | if (ip->i_delayed_blks) | ||
1660 | xfs_iflags_set(ip, XFS_IDIRTY_RELEASE); | ||
1661 | } | ||
1662 | return 0; | ||
1663 | } | ||
1664 | |||
1665 | /* | ||
1666 | * xfs_inactive | ||
1667 | * | ||
1668 | * This is called when the vnode reference count for the vnode | ||
1669 | * goes to zero. If the file has been unlinked, then it must | ||
1670 | * now be truncated. Also, we clear all of the read-ahead state | ||
1671 | * kept for the inode here since the file is now closed. | ||
1672 | */ | ||
1673 | int | ||
1674 | xfs_inactive( | ||
1675 | xfs_inode_t *ip) | ||
1676 | { | ||
1677 | xfs_bmap_free_t free_list; | ||
1678 | xfs_fsblock_t first_block; | ||
1679 | int committed; | ||
1680 | struct xfs_trans *tp; | ||
1681 | struct xfs_mount *mp; | ||
1682 | struct xfs_trans_res *resp; | ||
1683 | int error; | ||
1684 | int truncate = 0; | ||
1685 | |||
1686 | /* | ||
1687 | * If the inode is already free, then there can be nothing | ||
1688 | * to clean up here. | ||
1689 | */ | ||
1690 | if (ip->i_d.di_mode == 0 || is_bad_inode(VFS_I(ip))) { | ||
1691 | ASSERT(ip->i_df.if_real_bytes == 0); | ||
1692 | ASSERT(ip->i_df.if_broot_bytes == 0); | ||
1693 | return VN_INACTIVE_CACHE; | ||
1694 | } | ||
1695 | |||
1696 | mp = ip->i_mount; | ||
1697 | |||
1698 | error = 0; | ||
1699 | |||
1700 | /* If this is a read-only mount, don't do this (would generate I/O) */ | ||
1701 | if (mp->m_flags & XFS_MOUNT_RDONLY) | ||
1702 | goto out; | ||
1703 | |||
1704 | if (ip->i_d.di_nlink != 0) { | ||
1705 | /* | ||
1706 | * force is true because we are evicting an inode from the | ||
1707 | * cache. Post-eof blocks must be freed, lest we end up with | ||
1708 | * broken free space accounting. | ||
1709 | */ | ||
1710 | if (xfs_can_free_eofblocks(ip, true)) { | ||
1711 | error = xfs_free_eofblocks(mp, ip, false); | ||
1712 | if (error) | ||
1713 | return VN_INACTIVE_CACHE; | ||
1714 | } | ||
1715 | goto out; | ||
1716 | } | ||
1717 | |||
1718 | if (S_ISREG(ip->i_d.di_mode) && | ||
1719 | (ip->i_d.di_size != 0 || XFS_ISIZE(ip) != 0 || | ||
1720 | ip->i_d.di_nextents > 0 || ip->i_delayed_blks > 0)) | ||
1721 | truncate = 1; | ||
1722 | |||
1723 | error = xfs_qm_dqattach(ip, 0); | ||
1724 | if (error) | ||
1725 | return VN_INACTIVE_CACHE; | ||
1726 | |||
1727 | tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); | ||
1728 | resp = (truncate || S_ISLNK(ip->i_d.di_mode)) ? | ||
1729 | &M_RES(mp)->tr_itruncate : &M_RES(mp)->tr_ifree; | ||
1730 | |||
1731 | error = xfs_trans_reserve(tp, resp, 0, 0); | ||
1732 | if (error) { | ||
1733 | ASSERT(XFS_FORCED_SHUTDOWN(mp)); | ||
1734 | xfs_trans_cancel(tp, 0); | ||
1735 | return VN_INACTIVE_CACHE; | ||
1736 | } | ||
1737 | |||
1738 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
1739 | xfs_trans_ijoin(tp, ip, 0); | ||
1740 | |||
1741 | if (S_ISLNK(ip->i_d.di_mode)) { | ||
1742 | error = xfs_inactive_symlink(ip, &tp); | ||
1743 | if (error) | ||
1744 | goto out_cancel; | ||
1745 | } else if (truncate) { | ||
1746 | ip->i_d.di_size = 0; | ||
1747 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | ||
1748 | |||
1749 | error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0); | ||
1750 | if (error) | ||
1751 | goto out_cancel; | ||
1752 | |||
1753 | ASSERT(ip->i_d.di_nextents == 0); | ||
1754 | } | ||
1755 | |||
1756 | /* | ||
1757 | * If there are attributes associated with the file then blow them away | ||
1758 | * now. The code calls a routine that recursively deconstructs the | ||
1759 | * attribute fork. We need to just commit the current transaction | ||
1760 | * because we can't use it for xfs_attr_inactive(). | ||
1761 | */ | ||
1762 | if (ip->i_d.di_anextents > 0) { | ||
1763 | ASSERT(ip->i_d.di_forkoff != 0); | ||
1764 | |||
1765 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | ||
1766 | if (error) | ||
1767 | goto out_unlock; | ||
1768 | |||
1769 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1770 | |||
1771 | error = xfs_attr_inactive(ip); | ||
1772 | if (error) | ||
1773 | goto out; | ||
1774 | |||
1775 | tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); | ||
1776 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, 0, 0); | ||
1777 | if (error) { | ||
1778 | xfs_trans_cancel(tp, 0); | ||
1779 | goto out; | ||
1780 | } | ||
1781 | |||
1782 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
1783 | xfs_trans_ijoin(tp, ip, 0); | ||
1784 | } | ||
1785 | |||
1786 | if (ip->i_afp) | ||
1787 | xfs_idestroy_fork(ip, XFS_ATTR_FORK); | ||
1788 | |||
1789 | ASSERT(ip->i_d.di_anextents == 0); | ||
1790 | |||
1791 | /* | ||
1792 | * Free the inode. | ||
1793 | */ | ||
1794 | xfs_bmap_init(&free_list, &first_block); | ||
1795 | error = xfs_ifree(tp, ip, &free_list); | ||
1796 | if (error) { | ||
1797 | /* | ||
1798 | * If we fail to free the inode, shut down. The cancel | ||
1799 | * might do that, we need to make sure. Otherwise the | ||
1800 | * inode might be lost for a long time or forever. | ||
1801 | */ | ||
1802 | if (!XFS_FORCED_SHUTDOWN(mp)) { | ||
1803 | xfs_notice(mp, "%s: xfs_ifree returned error %d", | ||
1804 | __func__, error); | ||
1805 | xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); | ||
1806 | } | ||
1807 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); | ||
1808 | } else { | ||
1809 | /* | ||
1810 | * Credit the quota account(s). The inode is gone. | ||
1811 | */ | ||
1812 | xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1); | ||
1813 | |||
1814 | /* | ||
1815 | * Just ignore errors at this point. There is nothing we can | ||
1816 | * do except to try to keep going. Make sure it's not a silent | ||
1817 | * error. | ||
1818 | */ | ||
1819 | error = xfs_bmap_finish(&tp, &free_list, &committed); | ||
1820 | if (error) | ||
1821 | xfs_notice(mp, "%s: xfs_bmap_finish returned error %d", | ||
1822 | __func__, error); | ||
1823 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | ||
1824 | if (error) | ||
1825 | xfs_notice(mp, "%s: xfs_trans_commit returned error %d", | ||
1826 | __func__, error); | ||
1827 | } | ||
1828 | |||
1829 | /* | ||
1830 | * Release the dquots held by inode, if any. | ||
1831 | */ | ||
1832 | xfs_qm_dqdetach(ip); | ||
1833 | out_unlock: | ||
1834 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1835 | out: | ||
1836 | return VN_INACTIVE_CACHE; | ||
1837 | out_cancel: | ||
1838 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); | ||
1839 | goto out_unlock; | ||
1840 | } | ||
1841 | |||
1608 | /* | 1842 | /* |
1609 | * This is called when the inode's link count goes to 0. | 1843 | * This is called when the inode's link count goes to 0. |
1610 | * We place the on-disk inode on a list in the AGI. It | 1844 | * We place the on-disk inode on a list in the AGI. It |
@@ -1861,7 +2095,7 @@ xfs_iunlink_remove( | |||
1861 | } | 2095 | } |
1862 | 2096 | ||
1863 | /* | 2097 | /* |
1864 | * A big issue when freeing the inode cluster is is that we _cannot_ skip any | 2098 | * A big issue when freeing the inode cluster is that we _cannot_ skip any |
1865 | * inodes that are in memory - they all must be marked stale and attached to | 2099 | * inodes that are in memory - they all must be marked stale and attached to |
1866 | * the cluster buffer. | 2100 | * the cluster buffer. |
1867 | */ | 2101 | */ |
@@ -2094,272 +2328,6 @@ xfs_ifree( | |||
2094 | } | 2328 | } |
2095 | 2329 | ||
2096 | /* | 2330 | /* |
2097 | * Reallocate the space for if_broot based on the number of records | ||
2098 | * being added or deleted as indicated in rec_diff. Move the records | ||
2099 | * and pointers in if_broot to fit the new size. When shrinking this | ||
2100 | * will eliminate holes between the records and pointers created by | ||
2101 | * the caller. When growing this will create holes to be filled in | ||
2102 | * by the caller. | ||
2103 | * | ||
2104 | * The caller must not request to add more records than would fit in | ||
2105 | * the on-disk inode root. If the if_broot is currently NULL, then | ||
2106 | * if we adding records one will be allocated. The caller must also | ||
2107 | * not request that the number of records go below zero, although | ||
2108 | * it can go to zero. | ||
2109 | * | ||
2110 | * ip -- the inode whose if_broot area is changing | ||
2111 | * ext_diff -- the change in the number of records, positive or negative, | ||
2112 | * requested for the if_broot array. | ||
2113 | */ | ||
2114 | void | ||
2115 | xfs_iroot_realloc( | ||
2116 | xfs_inode_t *ip, | ||
2117 | int rec_diff, | ||
2118 | int whichfork) | ||
2119 | { | ||
2120 | struct xfs_mount *mp = ip->i_mount; | ||
2121 | int cur_max; | ||
2122 | xfs_ifork_t *ifp; | ||
2123 | struct xfs_btree_block *new_broot; | ||
2124 | int new_max; | ||
2125 | size_t new_size; | ||
2126 | char *np; | ||
2127 | char *op; | ||
2128 | |||
2129 | /* | ||
2130 | * Handle the degenerate case quietly. | ||
2131 | */ | ||
2132 | if (rec_diff == 0) { | ||
2133 | return; | ||
2134 | } | ||
2135 | |||
2136 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
2137 | if (rec_diff > 0) { | ||
2138 | /* | ||
2139 | * If there wasn't any memory allocated before, just | ||
2140 | * allocate it now and get out. | ||
2141 | */ | ||
2142 | if (ifp->if_broot_bytes == 0) { | ||
2143 | new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff); | ||
2144 | ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS); | ||
2145 | ifp->if_broot_bytes = (int)new_size; | ||
2146 | return; | ||
2147 | } | ||
2148 | |||
2149 | /* | ||
2150 | * If there is already an existing if_broot, then we need | ||
2151 | * to realloc() it and shift the pointers to their new | ||
2152 | * location. The records don't change location because | ||
2153 | * they are kept butted up against the btree block header. | ||
2154 | */ | ||
2155 | cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); | ||
2156 | new_max = cur_max + rec_diff; | ||
2157 | new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max); | ||
2158 | ifp->if_broot = kmem_realloc(ifp->if_broot, new_size, | ||
2159 | XFS_BMAP_BROOT_SPACE_CALC(mp, cur_max), | ||
2160 | KM_SLEEP | KM_NOFS); | ||
2161 | op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, | ||
2162 | ifp->if_broot_bytes); | ||
2163 | np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, | ||
2164 | (int)new_size); | ||
2165 | ifp->if_broot_bytes = (int)new_size; | ||
2166 | ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <= | ||
2167 | XFS_IFORK_SIZE(ip, whichfork)); | ||
2168 | memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t)); | ||
2169 | return; | ||
2170 | } | ||
2171 | |||
2172 | /* | ||
2173 | * rec_diff is less than 0. In this case, we are shrinking the | ||
2174 | * if_broot buffer. It must already exist. If we go to zero | ||
2175 | * records, just get rid of the root and clear the status bit. | ||
2176 | */ | ||
2177 | ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0)); | ||
2178 | cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); | ||
2179 | new_max = cur_max + rec_diff; | ||
2180 | ASSERT(new_max >= 0); | ||
2181 | if (new_max > 0) | ||
2182 | new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max); | ||
2183 | else | ||
2184 | new_size = 0; | ||
2185 | if (new_size > 0) { | ||
2186 | new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS); | ||
2187 | /* | ||
2188 | * First copy over the btree block header. | ||
2189 | */ | ||
2190 | memcpy(new_broot, ifp->if_broot, | ||
2191 | XFS_BMBT_BLOCK_LEN(ip->i_mount)); | ||
2192 | } else { | ||
2193 | new_broot = NULL; | ||
2194 | ifp->if_flags &= ~XFS_IFBROOT; | ||
2195 | } | ||
2196 | |||
2197 | /* | ||
2198 | * Only copy the records and pointers if there are any. | ||
2199 | */ | ||
2200 | if (new_max > 0) { | ||
2201 | /* | ||
2202 | * First copy the records. | ||
2203 | */ | ||
2204 | op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1); | ||
2205 | np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1); | ||
2206 | memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t)); | ||
2207 | |||
2208 | /* | ||
2209 | * Then copy the pointers. | ||
2210 | */ | ||
2211 | op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, | ||
2212 | ifp->if_broot_bytes); | ||
2213 | np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1, | ||
2214 | (int)new_size); | ||
2215 | memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t)); | ||
2216 | } | ||
2217 | kmem_free(ifp->if_broot); | ||
2218 | ifp->if_broot = new_broot; | ||
2219 | ifp->if_broot_bytes = (int)new_size; | ||
2220 | if (ifp->if_broot) | ||
2221 | ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <= | ||
2222 | XFS_IFORK_SIZE(ip, whichfork)); | ||
2223 | return; | ||
2224 | } | ||
2225 | |||
2226 | |||
2227 | /* | ||
2228 | * This is called when the amount of space needed for if_data | ||
2229 | * is increased or decreased. The change in size is indicated by | ||
2230 | * the number of bytes that need to be added or deleted in the | ||
2231 | * byte_diff parameter. | ||
2232 | * | ||
2233 | * If the amount of space needed has decreased below the size of the | ||
2234 | * inline buffer, then switch to using the inline buffer. Otherwise, | ||
2235 | * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer | ||
2236 | * to what is needed. | ||
2237 | * | ||
2238 | * ip -- the inode whose if_data area is changing | ||
2239 | * byte_diff -- the change in the number of bytes, positive or negative, | ||
2240 | * requested for the if_data array. | ||
2241 | */ | ||
2242 | void | ||
2243 | xfs_idata_realloc( | ||
2244 | xfs_inode_t *ip, | ||
2245 | int byte_diff, | ||
2246 | int whichfork) | ||
2247 | { | ||
2248 | xfs_ifork_t *ifp; | ||
2249 | int new_size; | ||
2250 | int real_size; | ||
2251 | |||
2252 | if (byte_diff == 0) { | ||
2253 | return; | ||
2254 | } | ||
2255 | |||
2256 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
2257 | new_size = (int)ifp->if_bytes + byte_diff; | ||
2258 | ASSERT(new_size >= 0); | ||
2259 | |||
2260 | if (new_size == 0) { | ||
2261 | if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { | ||
2262 | kmem_free(ifp->if_u1.if_data); | ||
2263 | } | ||
2264 | ifp->if_u1.if_data = NULL; | ||
2265 | real_size = 0; | ||
2266 | } else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) { | ||
2267 | /* | ||
2268 | * If the valid extents/data can fit in if_inline_ext/data, | ||
2269 | * copy them from the malloc'd vector and free it. | ||
2270 | */ | ||
2271 | if (ifp->if_u1.if_data == NULL) { | ||
2272 | ifp->if_u1.if_data = ifp->if_u2.if_inline_data; | ||
2273 | } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { | ||
2274 | ASSERT(ifp->if_real_bytes != 0); | ||
2275 | memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data, | ||
2276 | new_size); | ||
2277 | kmem_free(ifp->if_u1.if_data); | ||
2278 | ifp->if_u1.if_data = ifp->if_u2.if_inline_data; | ||
2279 | } | ||
2280 | real_size = 0; | ||
2281 | } else { | ||
2282 | /* | ||
2283 | * Stuck with malloc/realloc. | ||
2284 | * For inline data, the underlying buffer must be | ||
2285 | * a multiple of 4 bytes in size so that it can be | ||
2286 | * logged and stay on word boundaries. We enforce | ||
2287 | * that here. | ||
2288 | */ | ||
2289 | real_size = roundup(new_size, 4); | ||
2290 | if (ifp->if_u1.if_data == NULL) { | ||
2291 | ASSERT(ifp->if_real_bytes == 0); | ||
2292 | ifp->if_u1.if_data = kmem_alloc(real_size, | ||
2293 | KM_SLEEP | KM_NOFS); | ||
2294 | } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { | ||
2295 | /* | ||
2296 | * Only do the realloc if the underlying size | ||
2297 | * is really changing. | ||
2298 | */ | ||
2299 | if (ifp->if_real_bytes != real_size) { | ||
2300 | ifp->if_u1.if_data = | ||
2301 | kmem_realloc(ifp->if_u1.if_data, | ||
2302 | real_size, | ||
2303 | ifp->if_real_bytes, | ||
2304 | KM_SLEEP | KM_NOFS); | ||
2305 | } | ||
2306 | } else { | ||
2307 | ASSERT(ifp->if_real_bytes == 0); | ||
2308 | ifp->if_u1.if_data = kmem_alloc(real_size, | ||
2309 | KM_SLEEP | KM_NOFS); | ||
2310 | memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data, | ||
2311 | ifp->if_bytes); | ||
2312 | } | ||
2313 | } | ||
2314 | ifp->if_real_bytes = real_size; | ||
2315 | ifp->if_bytes = new_size; | ||
2316 | ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); | ||
2317 | } | ||
2318 | |||
2319 | void | ||
2320 | xfs_idestroy_fork( | ||
2321 | xfs_inode_t *ip, | ||
2322 | int whichfork) | ||
2323 | { | ||
2324 | xfs_ifork_t *ifp; | ||
2325 | |||
2326 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
2327 | if (ifp->if_broot != NULL) { | ||
2328 | kmem_free(ifp->if_broot); | ||
2329 | ifp->if_broot = NULL; | ||
2330 | } | ||
2331 | |||
2332 | /* | ||
2333 | * If the format is local, then we can't have an extents | ||
2334 | * array so just look for an inline data array. If we're | ||
2335 | * not local then we may or may not have an extents list, | ||
2336 | * so check and free it up if we do. | ||
2337 | */ | ||
2338 | if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { | ||
2339 | if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) && | ||
2340 | (ifp->if_u1.if_data != NULL)) { | ||
2341 | ASSERT(ifp->if_real_bytes != 0); | ||
2342 | kmem_free(ifp->if_u1.if_data); | ||
2343 | ifp->if_u1.if_data = NULL; | ||
2344 | ifp->if_real_bytes = 0; | ||
2345 | } | ||
2346 | } else if ((ifp->if_flags & XFS_IFEXTENTS) && | ||
2347 | ((ifp->if_flags & XFS_IFEXTIREC) || | ||
2348 | ((ifp->if_u1.if_extents != NULL) && | ||
2349 | (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) { | ||
2350 | ASSERT(ifp->if_real_bytes != 0); | ||
2351 | xfs_iext_destroy(ifp); | ||
2352 | } | ||
2353 | ASSERT(ifp->if_u1.if_extents == NULL || | ||
2354 | ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext); | ||
2355 | ASSERT(ifp->if_real_bytes == 0); | ||
2356 | if (whichfork == XFS_ATTR_FORK) { | ||
2357 | kmem_zone_free(xfs_ifork_zone, ip->i_afp); | ||
2358 | ip->i_afp = NULL; | ||
2359 | } | ||
2360 | } | ||
2361 | |||
2362 | /* | ||
2363 | * This is called to unpin an inode. The caller must have the inode locked | 2331 | * This is called to unpin an inode. The caller must have the inode locked |
2364 | * in at least shared mode so that the buffer cannot be subsequently pinned | 2332 | * in at least shared mode so that the buffer cannot be subsequently pinned |
2365 | * once someone is waiting for it to be unpinned. | 2333 | * once someone is waiting for it to be unpinned. |
@@ -2402,162 +2370,471 @@ xfs_iunpin_wait( | |||
2402 | __xfs_iunpin_wait(ip); | 2370 | __xfs_iunpin_wait(ip); |
2403 | } | 2371 | } |
2404 | 2372 | ||
2405 | /* | ||
2406 | * xfs_iextents_copy() | ||
2407 | * | ||
2408 | * This is called to copy the REAL extents (as opposed to the delayed | ||
2409 | * allocation extents) from the inode into the given buffer. It | ||
2410 | * returns the number of bytes copied into the buffer. | ||
2411 | * | ||
2412 | * If there are no delayed allocation extents, then we can just | ||
2413 | * memcpy() the extents into the buffer. Otherwise, we need to | ||
2414 | * examine each extent in turn and skip those which are delayed. | ||
2415 | */ | ||
2416 | int | 2373 | int |
2417 | xfs_iextents_copy( | 2374 | xfs_remove( |
2418 | xfs_inode_t *ip, | 2375 | xfs_inode_t *dp, |
2419 | xfs_bmbt_rec_t *dp, | 2376 | struct xfs_name *name, |
2420 | int whichfork) | 2377 | xfs_inode_t *ip) |
2421 | { | 2378 | { |
2422 | int copied; | 2379 | xfs_mount_t *mp = dp->i_mount; |
2423 | int i; | 2380 | xfs_trans_t *tp = NULL; |
2424 | xfs_ifork_t *ifp; | 2381 | int is_dir = S_ISDIR(ip->i_d.di_mode); |
2425 | int nrecs; | 2382 | int error = 0; |
2426 | xfs_fsblock_t start_block; | 2383 | xfs_bmap_free_t free_list; |
2384 | xfs_fsblock_t first_block; | ||
2385 | int cancel_flags; | ||
2386 | int committed; | ||
2387 | int link_zero; | ||
2388 | uint resblks; | ||
2389 | uint log_count; | ||
2427 | 2390 | ||
2428 | ifp = XFS_IFORK_PTR(ip, whichfork); | 2391 | trace_xfs_remove(dp, name); |
2429 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); | 2392 | |
2430 | ASSERT(ifp->if_bytes > 0); | 2393 | if (XFS_FORCED_SHUTDOWN(mp)) |
2394 | return XFS_ERROR(EIO); | ||
2395 | |||
2396 | error = xfs_qm_dqattach(dp, 0); | ||
2397 | if (error) | ||
2398 | goto std_return; | ||
2399 | |||
2400 | error = xfs_qm_dqattach(ip, 0); | ||
2401 | if (error) | ||
2402 | goto std_return; | ||
2431 | 2403 | ||
2432 | nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | 2404 | if (is_dir) { |
2433 | XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork); | 2405 | tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR); |
2434 | ASSERT(nrecs > 0); | 2406 | log_count = XFS_DEFAULT_LOG_COUNT; |
2407 | } else { | ||
2408 | tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE); | ||
2409 | log_count = XFS_REMOVE_LOG_COUNT; | ||
2410 | } | ||
2411 | cancel_flags = XFS_TRANS_RELEASE_LOG_RES; | ||
2435 | 2412 | ||
2436 | /* | 2413 | /* |
2437 | * There are some delayed allocation extents in the | 2414 | * We try to get the real space reservation first, |
2438 | * inode, so copy the extents one at a time and skip | 2415 | * allowing for directory btree deletion(s) implying |
2439 | * the delayed ones. There must be at least one | 2416 | * possible bmap insert(s). If we can't get the space |
2440 | * non-delayed extent. | 2417 | * reservation then we use 0 instead, and avoid the bmap |
2418 | * btree insert(s) in the directory code by, if the bmap | ||
2419 | * insert tries to happen, instead trimming the LAST | ||
2420 | * block from the directory. | ||
2441 | */ | 2421 | */ |
2442 | copied = 0; | 2422 | resblks = XFS_REMOVE_SPACE_RES(mp); |
2443 | for (i = 0; i < nrecs; i++) { | 2423 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, resblks, 0); |
2444 | xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); | 2424 | if (error == ENOSPC) { |
2445 | start_block = xfs_bmbt_get_startblock(ep); | 2425 | resblks = 0; |
2446 | if (isnullstartblock(start_block)) { | 2426 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, 0, 0); |
2447 | /* | 2427 | } |
2448 | * It's a delayed allocation extent, so skip it. | 2428 | if (error) { |
2449 | */ | 2429 | ASSERT(error != ENOSPC); |
2450 | continue; | 2430 | cancel_flags = 0; |
2431 | goto out_trans_cancel; | ||
2432 | } | ||
2433 | |||
2434 | xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL); | ||
2435 | |||
2436 | xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); | ||
2437 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); | ||
2438 | |||
2439 | /* | ||
2440 | * If we're removing a directory perform some additional validation. | ||
2441 | */ | ||
2442 | if (is_dir) { | ||
2443 | ASSERT(ip->i_d.di_nlink >= 2); | ||
2444 | if (ip->i_d.di_nlink != 2) { | ||
2445 | error = XFS_ERROR(ENOTEMPTY); | ||
2446 | goto out_trans_cancel; | ||
2451 | } | 2447 | } |
2448 | if (!xfs_dir_isempty(ip)) { | ||
2449 | error = XFS_ERROR(ENOTEMPTY); | ||
2450 | goto out_trans_cancel; | ||
2451 | } | ||
2452 | } | ||
2453 | |||
2454 | xfs_bmap_init(&free_list, &first_block); | ||
2455 | error = xfs_dir_removename(tp, dp, name, ip->i_ino, | ||
2456 | &first_block, &free_list, resblks); | ||
2457 | if (error) { | ||
2458 | ASSERT(error != ENOENT); | ||
2459 | goto out_bmap_cancel; | ||
2460 | } | ||
2461 | xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | ||
2452 | 2462 | ||
2453 | /* Translate to on disk format */ | 2463 | if (is_dir) { |
2454 | put_unaligned(cpu_to_be64(ep->l0), &dp->l0); | 2464 | /* |
2455 | put_unaligned(cpu_to_be64(ep->l1), &dp->l1); | 2465 | * Drop the link from ip's "..". |
2456 | dp++; | 2466 | */ |
2457 | copied++; | 2467 | error = xfs_droplink(tp, dp); |
2468 | if (error) | ||
2469 | goto out_bmap_cancel; | ||
2470 | |||
2471 | /* | ||
2472 | * Drop the "." link from ip to self. | ||
2473 | */ | ||
2474 | error = xfs_droplink(tp, ip); | ||
2475 | if (error) | ||
2476 | goto out_bmap_cancel; | ||
2477 | } else { | ||
2478 | /* | ||
2479 | * When removing a non-directory we need to log the parent | ||
2480 | * inode here. For a directory this is done implicitly | ||
2481 | * by the xfs_droplink call for the ".." entry. | ||
2482 | */ | ||
2483 | xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); | ||
2458 | } | 2484 | } |
2459 | ASSERT(copied != 0); | ||
2460 | xfs_validate_extents(ifp, copied, XFS_EXTFMT_INODE(ip)); | ||
2461 | 2485 | ||
2462 | return (copied * (uint)sizeof(xfs_bmbt_rec_t)); | 2486 | /* |
2487 | * Drop the link from dp to ip. | ||
2488 | */ | ||
2489 | error = xfs_droplink(tp, ip); | ||
2490 | if (error) | ||
2491 | goto out_bmap_cancel; | ||
2492 | |||
2493 | /* | ||
2494 | * Determine if this is the last link while | ||
2495 | * we are in the transaction. | ||
2496 | */ | ||
2497 | link_zero = (ip->i_d.di_nlink == 0); | ||
2498 | |||
2499 | /* | ||
2500 | * If this is a synchronous mount, make sure that the | ||
2501 | * remove transaction goes to disk before returning to | ||
2502 | * the user. | ||
2503 | */ | ||
2504 | if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) | ||
2505 | xfs_trans_set_sync(tp); | ||
2506 | |||
2507 | error = xfs_bmap_finish(&tp, &free_list, &committed); | ||
2508 | if (error) | ||
2509 | goto out_bmap_cancel; | ||
2510 | |||
2511 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | ||
2512 | if (error) | ||
2513 | goto std_return; | ||
2514 | |||
2515 | /* | ||
2516 | * If we are using filestreams, kill the stream association. | ||
2517 | * If the file is still open it may get a new one but that | ||
2518 | * will get killed on last close in xfs_close() so we don't | ||
2519 | * have to worry about that. | ||
2520 | */ | ||
2521 | if (!is_dir && link_zero && xfs_inode_is_filestream(ip)) | ||
2522 | xfs_filestream_deassociate(ip); | ||
2523 | |||
2524 | return 0; | ||
2525 | |||
2526 | out_bmap_cancel: | ||
2527 | xfs_bmap_cancel(&free_list); | ||
2528 | cancel_flags |= XFS_TRANS_ABORT; | ||
2529 | out_trans_cancel: | ||
2530 | xfs_trans_cancel(tp, cancel_flags); | ||
2531 | std_return: | ||
2532 | return error; | ||
2463 | } | 2533 | } |
2464 | 2534 | ||
2465 | /* | 2535 | /* |
2466 | * Each of the following cases stores data into the same region | 2536 | * Enter all inodes for a rename transaction into a sorted array. |
2467 | * of the on-disk inode, so only one of them can be valid at | ||
2468 | * any given time. While it is possible to have conflicting formats | ||
2469 | * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is | ||
2470 | * in EXTENTS format, this can only happen when the fork has | ||
2471 | * changed formats after being modified but before being flushed. | ||
2472 | * In these cases, the format always takes precedence, because the | ||
2473 | * format indicates the current state of the fork. | ||
2474 | */ | 2537 | */ |
2475 | /*ARGSUSED*/ | ||
2476 | STATIC void | 2538 | STATIC void |
2477 | xfs_iflush_fork( | 2539 | xfs_sort_for_rename( |
2478 | xfs_inode_t *ip, | 2540 | xfs_inode_t *dp1, /* in: old (source) directory inode */ |
2479 | xfs_dinode_t *dip, | 2541 | xfs_inode_t *dp2, /* in: new (target) directory inode */ |
2480 | xfs_inode_log_item_t *iip, | 2542 | xfs_inode_t *ip1, /* in: inode of old entry */ |
2481 | int whichfork, | 2543 | xfs_inode_t *ip2, /* in: inode of new entry, if it |
2482 | xfs_buf_t *bp) | 2544 | already exists, NULL otherwise. */ |
2483 | { | 2545 | xfs_inode_t **i_tab,/* out: array of inode returned, sorted */ |
2484 | char *cp; | 2546 | int *num_inodes) /* out: number of inodes in array */ |
2485 | xfs_ifork_t *ifp; | 2547 | { |
2486 | xfs_mount_t *mp; | 2548 | xfs_inode_t *temp; |
2487 | static const short brootflag[2] = | 2549 | int i, j; |
2488 | { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT }; | ||
2489 | static const short dataflag[2] = | ||
2490 | { XFS_ILOG_DDATA, XFS_ILOG_ADATA }; | ||
2491 | static const short extflag[2] = | ||
2492 | { XFS_ILOG_DEXT, XFS_ILOG_AEXT }; | ||
2493 | |||
2494 | if (!iip) | ||
2495 | return; | ||
2496 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
2497 | /* | ||
2498 | * This can happen if we gave up in iformat in an error path, | ||
2499 | * for the attribute fork. | ||
2500 | */ | ||
2501 | if (!ifp) { | ||
2502 | ASSERT(whichfork == XFS_ATTR_FORK); | ||
2503 | return; | ||
2504 | } | ||
2505 | cp = XFS_DFORK_PTR(dip, whichfork); | ||
2506 | mp = ip->i_mount; | ||
2507 | switch (XFS_IFORK_FORMAT(ip, whichfork)) { | ||
2508 | case XFS_DINODE_FMT_LOCAL: | ||
2509 | if ((iip->ili_fields & dataflag[whichfork]) && | ||
2510 | (ifp->if_bytes > 0)) { | ||
2511 | ASSERT(ifp->if_u1.if_data != NULL); | ||
2512 | ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); | ||
2513 | memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes); | ||
2514 | } | ||
2515 | break; | ||
2516 | 2550 | ||
2517 | case XFS_DINODE_FMT_EXTENTS: | 2551 | /* |
2518 | ASSERT((ifp->if_flags & XFS_IFEXTENTS) || | 2552 | * i_tab contains a list of pointers to inodes. We initialize |
2519 | !(iip->ili_fields & extflag[whichfork])); | 2553 | * the table here & we'll sort it. We will then use it to |
2520 | if ((iip->ili_fields & extflag[whichfork]) && | 2554 | * order the acquisition of the inode locks. |
2521 | (ifp->if_bytes > 0)) { | 2555 | * |
2522 | ASSERT(xfs_iext_get_ext(ifp, 0)); | 2556 | * Note that the table may contain duplicates. e.g., dp1 == dp2. |
2523 | ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); | 2557 | */ |
2524 | (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp, | 2558 | i_tab[0] = dp1; |
2525 | whichfork); | 2559 | i_tab[1] = dp2; |
2526 | } | 2560 | i_tab[2] = ip1; |
2527 | break; | 2561 | if (ip2) { |
2562 | *num_inodes = 4; | ||
2563 | i_tab[3] = ip2; | ||
2564 | } else { | ||
2565 | *num_inodes = 3; | ||
2566 | i_tab[3] = NULL; | ||
2567 | } | ||
2528 | 2568 | ||
2529 | case XFS_DINODE_FMT_BTREE: | 2569 | /* |
2530 | if ((iip->ili_fields & brootflag[whichfork]) && | 2570 | * Sort the elements via bubble sort. (Remember, there are at |
2531 | (ifp->if_broot_bytes > 0)) { | 2571 | * most 4 elements to sort, so this is adequate.) |
2532 | ASSERT(ifp->if_broot != NULL); | 2572 | */ |
2533 | ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <= | 2573 | for (i = 0; i < *num_inodes; i++) { |
2534 | XFS_IFORK_SIZE(ip, whichfork)); | 2574 | for (j = 1; j < *num_inodes; j++) { |
2535 | xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes, | 2575 | if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) { |
2536 | (xfs_bmdr_block_t *)cp, | 2576 | temp = i_tab[j]; |
2537 | XFS_DFORK_SIZE(dip, mp, whichfork)); | 2577 | i_tab[j] = i_tab[j-1]; |
2578 | i_tab[j-1] = temp; | ||
2579 | } | ||
2538 | } | 2580 | } |
2539 | break; | 2581 | } |
2582 | } | ||
2583 | |||
2584 | /* | ||
2585 | * xfs_rename | ||
2586 | */ | ||
2587 | int | ||
2588 | xfs_rename( | ||
2589 | xfs_inode_t *src_dp, | ||
2590 | struct xfs_name *src_name, | ||
2591 | xfs_inode_t *src_ip, | ||
2592 | xfs_inode_t *target_dp, | ||
2593 | struct xfs_name *target_name, | ||
2594 | xfs_inode_t *target_ip) | ||
2595 | { | ||
2596 | xfs_trans_t *tp = NULL; | ||
2597 | xfs_mount_t *mp = src_dp->i_mount; | ||
2598 | int new_parent; /* moving to a new dir */ | ||
2599 | int src_is_directory; /* src_name is a directory */ | ||
2600 | int error; | ||
2601 | xfs_bmap_free_t free_list; | ||
2602 | xfs_fsblock_t first_block; | ||
2603 | int cancel_flags; | ||
2604 | int committed; | ||
2605 | xfs_inode_t *inodes[4]; | ||
2606 | int spaceres; | ||
2607 | int num_inodes; | ||
2608 | |||
2609 | trace_xfs_rename(src_dp, target_dp, src_name, target_name); | ||
2610 | |||
2611 | new_parent = (src_dp != target_dp); | ||
2612 | src_is_directory = S_ISDIR(src_ip->i_d.di_mode); | ||
2613 | |||
2614 | xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, | ||
2615 | inodes, &num_inodes); | ||
2616 | |||
2617 | xfs_bmap_init(&free_list, &first_block); | ||
2618 | tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME); | ||
2619 | cancel_flags = XFS_TRANS_RELEASE_LOG_RES; | ||
2620 | spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len); | ||
2621 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, spaceres, 0); | ||
2622 | if (error == ENOSPC) { | ||
2623 | spaceres = 0; | ||
2624 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, 0, 0); | ||
2625 | } | ||
2626 | if (error) { | ||
2627 | xfs_trans_cancel(tp, 0); | ||
2628 | goto std_return; | ||
2629 | } | ||
2630 | |||
2631 | /* | ||
2632 | * Attach the dquots to the inodes | ||
2633 | */ | ||
2634 | error = xfs_qm_vop_rename_dqattach(inodes); | ||
2635 | if (error) { | ||
2636 | xfs_trans_cancel(tp, cancel_flags); | ||
2637 | goto std_return; | ||
2638 | } | ||
2639 | |||
2640 | /* | ||
2641 | * Lock all the participating inodes. Depending upon whether | ||
2642 | * the target_name exists in the target directory, and | ||
2643 | * whether the target directory is the same as the source | ||
2644 | * directory, we can lock from 2 to 4 inodes. | ||
2645 | */ | ||
2646 | xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL); | ||
2647 | |||
2648 | /* | ||
2649 | * Join all the inodes to the transaction. From this point on, | ||
2650 | * we can rely on either trans_commit or trans_cancel to unlock | ||
2651 | * them. | ||
2652 | */ | ||
2653 | xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL); | ||
2654 | if (new_parent) | ||
2655 | xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL); | ||
2656 | xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); | ||
2657 | if (target_ip) | ||
2658 | xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); | ||
2659 | |||
2660 | /* | ||
2661 | * If we are using project inheritance, we only allow renames | ||
2662 | * into our tree when the project IDs are the same; else the | ||
2663 | * tree quota mechanism would be circumvented. | ||
2664 | */ | ||
2665 | if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && | ||
2666 | (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) { | ||
2667 | error = XFS_ERROR(EXDEV); | ||
2668 | goto error_return; | ||
2669 | } | ||
2670 | |||
2671 | /* | ||
2672 | * Set up the target. | ||
2673 | */ | ||
2674 | if (target_ip == NULL) { | ||
2675 | /* | ||
2676 | * If there's no space reservation, check the entry will | ||
2677 | * fit before actually inserting it. | ||
2678 | */ | ||
2679 | error = xfs_dir_canenter(tp, target_dp, target_name, spaceres); | ||
2680 | if (error) | ||
2681 | goto error_return; | ||
2682 | /* | ||
2683 | * If target does not exist and the rename crosses | ||
2684 | * directories, adjust the target directory link count | ||
2685 | * to account for the ".." reference from the new entry. | ||
2686 | */ | ||
2687 | error = xfs_dir_createname(tp, target_dp, target_name, | ||
2688 | src_ip->i_ino, &first_block, | ||
2689 | &free_list, spaceres); | ||
2690 | if (error == ENOSPC) | ||
2691 | goto error_return; | ||
2692 | if (error) | ||
2693 | goto abort_return; | ||
2694 | |||
2695 | xfs_trans_ichgtime(tp, target_dp, | ||
2696 | XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | ||
2540 | 2697 | ||
2541 | case XFS_DINODE_FMT_DEV: | 2698 | if (new_parent && src_is_directory) { |
2542 | if (iip->ili_fields & XFS_ILOG_DEV) { | 2699 | error = xfs_bumplink(tp, target_dp); |
2543 | ASSERT(whichfork == XFS_DATA_FORK); | 2700 | if (error) |
2544 | xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev); | 2701 | goto abort_return; |
2702 | } | ||
2703 | } else { /* target_ip != NULL */ | ||
2704 | /* | ||
2705 | * If target exists and it's a directory, check that both | ||
2706 | * target and source are directories and that target can be | ||
2707 | * destroyed, or that neither is a directory. | ||
2708 | */ | ||
2709 | if (S_ISDIR(target_ip->i_d.di_mode)) { | ||
2710 | /* | ||
2711 | * Make sure target dir is empty. | ||
2712 | */ | ||
2713 | if (!(xfs_dir_isempty(target_ip)) || | ||
2714 | (target_ip->i_d.di_nlink > 2)) { | ||
2715 | error = XFS_ERROR(EEXIST); | ||
2716 | goto error_return; | ||
2717 | } | ||
2545 | } | 2718 | } |
2546 | break; | ||
2547 | 2719 | ||
2548 | case XFS_DINODE_FMT_UUID: | 2720 | /* |
2549 | if (iip->ili_fields & XFS_ILOG_UUID) { | 2721 | * Link the source inode under the target name. |
2550 | ASSERT(whichfork == XFS_DATA_FORK); | 2722 | * If the source inode is a directory and we are moving |
2551 | memcpy(XFS_DFORK_DPTR(dip), | 2723 | * it across directories, its ".." entry will be |
2552 | &ip->i_df.if_u2.if_uuid, | 2724 | * inconsistent until we replace that down below. |
2553 | sizeof(uuid_t)); | 2725 | * |
2726 | * In case there is already an entry with the same | ||
2727 | * name at the destination directory, remove it first. | ||
2728 | */ | ||
2729 | error = xfs_dir_replace(tp, target_dp, target_name, | ||
2730 | src_ip->i_ino, | ||
2731 | &first_block, &free_list, spaceres); | ||
2732 | if (error) | ||
2733 | goto abort_return; | ||
2734 | |||
2735 | xfs_trans_ichgtime(tp, target_dp, | ||
2736 | XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | ||
2737 | |||
2738 | /* | ||
2739 | * Decrement the link count on the target since the target | ||
2740 | * dir no longer points to it. | ||
2741 | */ | ||
2742 | error = xfs_droplink(tp, target_ip); | ||
2743 | if (error) | ||
2744 | goto abort_return; | ||
2745 | |||
2746 | if (src_is_directory) { | ||
2747 | /* | ||
2748 | * Drop the link from the old "." entry. | ||
2749 | */ | ||
2750 | error = xfs_droplink(tp, target_ip); | ||
2751 | if (error) | ||
2752 | goto abort_return; | ||
2554 | } | 2753 | } |
2555 | break; | 2754 | } /* target_ip != NULL */ |
2556 | 2755 | ||
2557 | default: | 2756 | /* |
2558 | ASSERT(0); | 2757 | * Remove the source. |
2559 | break; | 2758 | */ |
2759 | if (new_parent && src_is_directory) { | ||
2760 | /* | ||
2761 | * Rewrite the ".." entry to point to the new | ||
2762 | * directory. | ||
2763 | */ | ||
2764 | error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot, | ||
2765 | target_dp->i_ino, | ||
2766 | &first_block, &free_list, spaceres); | ||
2767 | ASSERT(error != EEXIST); | ||
2768 | if (error) | ||
2769 | goto abort_return; | ||
2770 | } | ||
2771 | |||
2772 | /* | ||
2773 | * We always want to hit the ctime on the source inode. | ||
2774 | * | ||
2775 | * This isn't strictly required by the standards since the source | ||
2776 | * inode isn't really being changed, but old unix file systems did | ||
2777 | * it and some incremental backup programs won't work without it. | ||
2778 | */ | ||
2779 | xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG); | ||
2780 | xfs_trans_log_inode(tp, src_ip, XFS_ILOG_CORE); | ||
2781 | |||
2782 | /* | ||
2783 | * Adjust the link count on src_dp. This is necessary when | ||
2784 | * renaming a directory, either within one parent when | ||
2785 | * the target existed, or across two parent directories. | ||
2786 | */ | ||
2787 | if (src_is_directory && (new_parent || target_ip != NULL)) { | ||
2788 | |||
2789 | /* | ||
2790 | * Decrement link count on src_directory since the | ||
2791 | * entry that's moved no longer points to it. | ||
2792 | */ | ||
2793 | error = xfs_droplink(tp, src_dp); | ||
2794 | if (error) | ||
2795 | goto abort_return; | ||
2796 | } | ||
2797 | |||
2798 | error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino, | ||
2799 | &first_block, &free_list, spaceres); | ||
2800 | if (error) | ||
2801 | goto abort_return; | ||
2802 | |||
2803 | xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | ||
2804 | xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); | ||
2805 | if (new_parent) | ||
2806 | xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); | ||
2807 | |||
2808 | /* | ||
2809 | * If this is a synchronous mount, make sure that the | ||
2810 | * rename transaction goes to disk before returning to | ||
2811 | * the user. | ||
2812 | */ | ||
2813 | if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { | ||
2814 | xfs_trans_set_sync(tp); | ||
2560 | } | 2815 | } |
2816 | |||
2817 | error = xfs_bmap_finish(&tp, &free_list, &committed); | ||
2818 | if (error) { | ||
2819 | xfs_bmap_cancel(&free_list); | ||
2820 | xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | | ||
2821 | XFS_TRANS_ABORT)); | ||
2822 | goto std_return; | ||
2823 | } | ||
2824 | |||
2825 | /* | ||
2826 | * trans_commit will unlock src_ip, target_ip & decrement | ||
2827 | * the vnode references. | ||
2828 | */ | ||
2829 | return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | ||
2830 | |||
2831 | abort_return: | ||
2832 | cancel_flags |= XFS_TRANS_ABORT; | ||
2833 | error_return: | ||
2834 | xfs_bmap_cancel(&free_list); | ||
2835 | xfs_trans_cancel(tp, cancel_flags); | ||
2836 | std_return: | ||
2837 | return error; | ||
2561 | } | 2838 | } |
2562 | 2839 | ||
2563 | STATIC int | 2840 | STATIC int |
@@ -2816,7 +3093,6 @@ abort_out: | |||
2816 | return error; | 3093 | return error; |
2817 | } | 3094 | } |
2818 | 3095 | ||
2819 | |||
2820 | STATIC int | 3096 | STATIC int |
2821 | xfs_iflush_int( | 3097 | xfs_iflush_int( |
2822 | struct xfs_inode *ip, | 3098 | struct xfs_inode *ip, |
@@ -3004,1072 +3280,3 @@ xfs_iflush_int( | |||
3004 | corrupt_out: | 3280 | corrupt_out: |
3005 | return XFS_ERROR(EFSCORRUPTED); | 3281 | return XFS_ERROR(EFSCORRUPTED); |
3006 | } | 3282 | } |
3007 | |||
3008 | /* | ||
3009 | * Return a pointer to the extent record at file index idx. | ||
3010 | */ | ||
3011 | xfs_bmbt_rec_host_t * | ||
3012 | xfs_iext_get_ext( | ||
3013 | xfs_ifork_t *ifp, /* inode fork pointer */ | ||
3014 | xfs_extnum_t idx) /* index of target extent */ | ||
3015 | { | ||
3016 | ASSERT(idx >= 0); | ||
3017 | ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); | ||
3018 | |||
3019 | if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) { | ||
3020 | return ifp->if_u1.if_ext_irec->er_extbuf; | ||
3021 | } else if (ifp->if_flags & XFS_IFEXTIREC) { | ||
3022 | xfs_ext_irec_t *erp; /* irec pointer */ | ||
3023 | int erp_idx = 0; /* irec index */ | ||
3024 | xfs_extnum_t page_idx = idx; /* ext index in target list */ | ||
3025 | |||
3026 | erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); | ||
3027 | return &erp->er_extbuf[page_idx]; | ||
3028 | } else if (ifp->if_bytes) { | ||
3029 | return &ifp->if_u1.if_extents[idx]; | ||
3030 | } else { | ||
3031 | return NULL; | ||
3032 | } | ||
3033 | } | ||
3034 | |||
3035 | /* | ||
3036 | * Insert new item(s) into the extent records for incore inode | ||
3037 | * fork 'ifp'. 'count' new items are inserted at index 'idx'. | ||
3038 | */ | ||
3039 | void | ||
3040 | xfs_iext_insert( | ||
3041 | xfs_inode_t *ip, /* incore inode pointer */ | ||
3042 | xfs_extnum_t idx, /* starting index of new items */ | ||
3043 | xfs_extnum_t count, /* number of inserted items */ | ||
3044 | xfs_bmbt_irec_t *new, /* items to insert */ | ||
3045 | int state) /* type of extent conversion */ | ||
3046 | { | ||
3047 | xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df; | ||
3048 | xfs_extnum_t i; /* extent record index */ | ||
3049 | |||
3050 | trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_); | ||
3051 | |||
3052 | ASSERT(ifp->if_flags & XFS_IFEXTENTS); | ||
3053 | xfs_iext_add(ifp, idx, count); | ||
3054 | for (i = idx; i < idx + count; i++, new++) | ||
3055 | xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new); | ||
3056 | } | ||
3057 | |||
3058 | /* | ||
3059 | * This is called when the amount of space required for incore file | ||
3060 | * extents needs to be increased. The ext_diff parameter stores the | ||
3061 | * number of new extents being added and the idx parameter contains | ||
3062 | * the extent index where the new extents will be added. If the new | ||
3063 | * extents are being appended, then we just need to (re)allocate and | ||
3064 | * initialize the space. Otherwise, if the new extents are being | ||
3065 | * inserted into the middle of the existing entries, a bit more work | ||
3066 | * is required to make room for the new extents to be inserted. The | ||
3067 | * caller is responsible for filling in the new extent entries upon | ||
3068 | * return. | ||
3069 | */ | ||
3070 | void | ||
3071 | xfs_iext_add( | ||
3072 | xfs_ifork_t *ifp, /* inode fork pointer */ | ||
3073 | xfs_extnum_t idx, /* index to begin adding exts */ | ||
3074 | int ext_diff) /* number of extents to add */ | ||
3075 | { | ||
3076 | int byte_diff; /* new bytes being added */ | ||
3077 | int new_size; /* size of extents after adding */ | ||
3078 | xfs_extnum_t nextents; /* number of extents in file */ | ||
3079 | |||
3080 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | ||
3081 | ASSERT((idx >= 0) && (idx <= nextents)); | ||
3082 | byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t); | ||
3083 | new_size = ifp->if_bytes + byte_diff; | ||
3084 | /* | ||
3085 | * If the new number of extents (nextents + ext_diff) | ||
3086 | * fits inside the inode, then continue to use the inline | ||
3087 | * extent buffer. | ||
3088 | */ | ||
3089 | if (nextents + ext_diff <= XFS_INLINE_EXTS) { | ||
3090 | if (idx < nextents) { | ||
3091 | memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff], | ||
3092 | &ifp->if_u2.if_inline_ext[idx], | ||
3093 | (nextents - idx) * sizeof(xfs_bmbt_rec_t)); | ||
3094 | memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff); | ||
3095 | } | ||
3096 | ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; | ||
3097 | ifp->if_real_bytes = 0; | ||
3098 | } | ||
3099 | /* | ||
3100 | * Otherwise use a linear (direct) extent list. | ||
3101 | * If the extents are currently inside the inode, | ||
3102 | * xfs_iext_realloc_direct will switch us from | ||
3103 | * inline to direct extent allocation mode. | ||
3104 | */ | ||
3105 | else if (nextents + ext_diff <= XFS_LINEAR_EXTS) { | ||
3106 | xfs_iext_realloc_direct(ifp, new_size); | ||
3107 | if (idx < nextents) { | ||
3108 | memmove(&ifp->if_u1.if_extents[idx + ext_diff], | ||
3109 | &ifp->if_u1.if_extents[idx], | ||
3110 | (nextents - idx) * sizeof(xfs_bmbt_rec_t)); | ||
3111 | memset(&ifp->if_u1.if_extents[idx], 0, byte_diff); | ||
3112 | } | ||
3113 | } | ||
3114 | /* Indirection array */ | ||
3115 | else { | ||
3116 | xfs_ext_irec_t *erp; | ||
3117 | int erp_idx = 0; | ||
3118 | int page_idx = idx; | ||
3119 | |||
3120 | ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS); | ||
3121 | if (ifp->if_flags & XFS_IFEXTIREC) { | ||
3122 | erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1); | ||
3123 | } else { | ||
3124 | xfs_iext_irec_init(ifp); | ||
3125 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | ||
3126 | erp = ifp->if_u1.if_ext_irec; | ||
3127 | } | ||
3128 | /* Extents fit in target extent page */ | ||
3129 | if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) { | ||
3130 | if (page_idx < erp->er_extcount) { | ||
3131 | memmove(&erp->er_extbuf[page_idx + ext_diff], | ||
3132 | &erp->er_extbuf[page_idx], | ||
3133 | (erp->er_extcount - page_idx) * | ||
3134 | sizeof(xfs_bmbt_rec_t)); | ||
3135 | memset(&erp->er_extbuf[page_idx], 0, byte_diff); | ||
3136 | } | ||
3137 | erp->er_extcount += ext_diff; | ||
3138 | xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); | ||
3139 | } | ||
3140 | /* Insert a new extent page */ | ||
3141 | else if (erp) { | ||
3142 | xfs_iext_add_indirect_multi(ifp, | ||
3143 | erp_idx, page_idx, ext_diff); | ||
3144 | } | ||
3145 | /* | ||
3146 | * If extent(s) are being appended to the last page in | ||
3147 | * the indirection array and the new extent(s) don't fit | ||
3148 | * in the page, then erp is NULL and erp_idx is set to | ||
3149 | * the next index needed in the indirection array. | ||
3150 | */ | ||
3151 | else { | ||
3152 | int count = ext_diff; | ||
3153 | |||
3154 | while (count) { | ||
3155 | erp = xfs_iext_irec_new(ifp, erp_idx); | ||
3156 | erp->er_extcount = count; | ||
3157 | count -= MIN(count, (int)XFS_LINEAR_EXTS); | ||
3158 | if (count) { | ||
3159 | erp_idx++; | ||
3160 | } | ||
3161 | } | ||
3162 | } | ||
3163 | } | ||
3164 | ifp->if_bytes = new_size; | ||
3165 | } | ||
3166 | |||
3167 | /* | ||
3168 | * This is called when incore extents are being added to the indirection | ||
3169 | * array and the new extents do not fit in the target extent list. The | ||
3170 | * erp_idx parameter contains the irec index for the target extent list | ||
3171 | * in the indirection array, and the idx parameter contains the extent | ||
3172 | * index within the list. The number of extents being added is stored | ||
3173 | * in the count parameter. | ||
3174 | * | ||
3175 | * |-------| |-------| | ||
3176 | * | | | | idx - number of extents before idx | ||
3177 | * | idx | | count | | ||
3178 | * | | | | count - number of extents being inserted at idx | ||
3179 | * |-------| |-------| | ||
3180 | * | count | | nex2 | nex2 - number of extents after idx + count | ||
3181 | * |-------| |-------| | ||
3182 | */ | ||
3183 | void | ||
3184 | xfs_iext_add_indirect_multi( | ||
3185 | xfs_ifork_t *ifp, /* inode fork pointer */ | ||
3186 | int erp_idx, /* target extent irec index */ | ||
3187 | xfs_extnum_t idx, /* index within target list */ | ||
3188 | int count) /* new extents being added */ | ||
3189 | { | ||
3190 | int byte_diff; /* new bytes being added */ | ||
3191 | xfs_ext_irec_t *erp; /* pointer to irec entry */ | ||
3192 | xfs_extnum_t ext_diff; /* number of extents to add */ | ||
3193 | xfs_extnum_t ext_cnt; /* new extents still needed */ | ||
3194 | xfs_extnum_t nex2; /* extents after idx + count */ | ||
3195 | xfs_bmbt_rec_t *nex2_ep = NULL; /* temp list for nex2 extents */ | ||
3196 | int nlists; /* number of irec's (lists) */ | ||
3197 | |||
3198 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | ||
3199 | erp = &ifp->if_u1.if_ext_irec[erp_idx]; | ||
3200 | nex2 = erp->er_extcount - idx; | ||
3201 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | ||
3202 | |||
3203 | /* | ||
3204 | * Save second part of target extent list | ||
3205 | * (all extents past */ | ||
3206 | if (nex2) { | ||
3207 | byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); | ||
3208 | nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS); | ||
3209 | memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff); | ||
3210 | erp->er_extcount -= nex2; | ||
3211 | xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2); | ||
3212 | memset(&erp->er_extbuf[idx], 0, byte_diff); | ||
3213 | } | ||
3214 | |||
3215 | /* | ||
3216 | * Add the new extents to the end of the target | ||
3217 | * list, then allocate new irec record(s) and | ||
3218 | * extent buffer(s) as needed to store the rest | ||
3219 | * of the new extents. | ||
3220 | */ | ||
3221 | ext_cnt = count; | ||
3222 | ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount); | ||
3223 | if (ext_diff) { | ||
3224 | erp->er_extcount += ext_diff; | ||
3225 | xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); | ||
3226 | ext_cnt -= ext_diff; | ||
3227 | } | ||
3228 | while (ext_cnt) { | ||
3229 | erp_idx++; | ||
3230 | erp = xfs_iext_irec_new(ifp, erp_idx); | ||
3231 | ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS); | ||
3232 | erp->er_extcount = ext_diff; | ||
3233 | xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); | ||
3234 | ext_cnt -= ext_diff; | ||
3235 | } | ||
3236 | |||
3237 | /* Add nex2 extents back to indirection array */ | ||
3238 | if (nex2) { | ||
3239 | xfs_extnum_t ext_avail; | ||
3240 | int i; | ||
3241 | |||
3242 | byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); | ||
3243 | ext_avail = XFS_LINEAR_EXTS - erp->er_extcount; | ||
3244 | i = 0; | ||
3245 | /* | ||
3246 | * If nex2 extents fit in the current page, append | ||
3247 | * nex2_ep after the new extents. | ||
3248 | */ | ||
3249 | if (nex2 <= ext_avail) { | ||
3250 | i = erp->er_extcount; | ||
3251 | } | ||
3252 | /* | ||
3253 | * Otherwise, check if space is available in the | ||
3254 | * next page. | ||
3255 | */ | ||
3256 | else if ((erp_idx < nlists - 1) && | ||
3257 | (nex2 <= (ext_avail = XFS_LINEAR_EXTS - | ||
3258 | ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) { | ||
3259 | erp_idx++; | ||
3260 | erp++; | ||
3261 | /* Create a hole for nex2 extents */ | ||
3262 | memmove(&erp->er_extbuf[nex2], erp->er_extbuf, | ||
3263 | erp->er_extcount * sizeof(xfs_bmbt_rec_t)); | ||
3264 | } | ||
3265 | /* | ||
3266 | * Final choice, create a new extent page for | ||
3267 | * nex2 extents. | ||
3268 | */ | ||
3269 | else { | ||
3270 | erp_idx++; | ||
3271 | erp = xfs_iext_irec_new(ifp, erp_idx); | ||
3272 | } | ||
3273 | memmove(&erp->er_extbuf[i], nex2_ep, byte_diff); | ||
3274 | kmem_free(nex2_ep); | ||
3275 | erp->er_extcount += nex2; | ||
3276 | xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2); | ||
3277 | } | ||
3278 | } | ||
3279 | |||
3280 | /* | ||
3281 | * This is called when the amount of space required for incore file | ||
3282 | * extents needs to be decreased. The ext_diff parameter stores the | ||
3283 | * number of extents to be removed and the idx parameter contains | ||
3284 | * the extent index where the extents will be removed from. | ||
3285 | * | ||
3286 | * If the amount of space needed has decreased below the linear | ||
3287 | * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous | ||
3288 | * extent array. Otherwise, use kmem_realloc() to adjust the | ||
3289 | * size to what is needed. | ||
3290 | */ | ||
3291 | void | ||
3292 | xfs_iext_remove( | ||
3293 | xfs_inode_t *ip, /* incore inode pointer */ | ||
3294 | xfs_extnum_t idx, /* index to begin removing exts */ | ||
3295 | int ext_diff, /* number of extents to remove */ | ||
3296 | int state) /* type of extent conversion */ | ||
3297 | { | ||
3298 | xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df; | ||
3299 | xfs_extnum_t nextents; /* number of extents in file */ | ||
3300 | int new_size; /* size of extents after removal */ | ||
3301 | |||
3302 | trace_xfs_iext_remove(ip, idx, state, _RET_IP_); | ||
3303 | |||
3304 | ASSERT(ext_diff > 0); | ||
3305 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | ||
3306 | new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t); | ||
3307 | |||
3308 | if (new_size == 0) { | ||
3309 | xfs_iext_destroy(ifp); | ||
3310 | } else if (ifp->if_flags & XFS_IFEXTIREC) { | ||
3311 | xfs_iext_remove_indirect(ifp, idx, ext_diff); | ||
3312 | } else if (ifp->if_real_bytes) { | ||
3313 | xfs_iext_remove_direct(ifp, idx, ext_diff); | ||
3314 | } else { | ||
3315 | xfs_iext_remove_inline(ifp, idx, ext_diff); | ||
3316 | } | ||
3317 | ifp->if_bytes = new_size; | ||
3318 | } | ||
3319 | |||
3320 | /* | ||
3321 | * This removes ext_diff extents from the inline buffer, beginning | ||
3322 | * at extent index idx. | ||
3323 | */ | ||
3324 | void | ||
3325 | xfs_iext_remove_inline( | ||
3326 | xfs_ifork_t *ifp, /* inode fork pointer */ | ||
3327 | xfs_extnum_t idx, /* index to begin removing exts */ | ||
3328 | int ext_diff) /* number of extents to remove */ | ||
3329 | { | ||
3330 | int nextents; /* number of extents in file */ | ||
3331 | |||
3332 | ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); | ||
3333 | ASSERT(idx < XFS_INLINE_EXTS); | ||
3334 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | ||
3335 | ASSERT(((nextents - ext_diff) > 0) && | ||
3336 | (nextents - ext_diff) < XFS_INLINE_EXTS); | ||
3337 | |||
3338 | if (idx + ext_diff < nextents) { | ||
3339 | memmove(&ifp->if_u2.if_inline_ext[idx], | ||
3340 | &ifp->if_u2.if_inline_ext[idx + ext_diff], | ||
3341 | (nextents - (idx + ext_diff)) * | ||
3342 | sizeof(xfs_bmbt_rec_t)); | ||
3343 | memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff], | ||
3344 | 0, ext_diff * sizeof(xfs_bmbt_rec_t)); | ||
3345 | } else { | ||
3346 | memset(&ifp->if_u2.if_inline_ext[idx], 0, | ||
3347 | ext_diff * sizeof(xfs_bmbt_rec_t)); | ||
3348 | } | ||
3349 | } | ||
3350 | |||
3351 | /* | ||
3352 | * This removes ext_diff extents from a linear (direct) extent list, | ||
3353 | * beginning at extent index idx. If the extents are being removed | ||
3354 | * from the end of the list (ie. truncate) then we just need to re- | ||
3355 | * allocate the list to remove the extra space. Otherwise, if the | ||
3356 | * extents are being removed from the middle of the existing extent | ||
3357 | * entries, then we first need to move the extent records beginning | ||
3358 | * at idx + ext_diff up in the list to overwrite the records being | ||
3359 | * removed, then remove the extra space via kmem_realloc. | ||
3360 | */ | ||
3361 | void | ||
3362 | xfs_iext_remove_direct( | ||
3363 | xfs_ifork_t *ifp, /* inode fork pointer */ | ||
3364 | xfs_extnum_t idx, /* index to begin removing exts */ | ||
3365 | int ext_diff) /* number of extents to remove */ | ||
3366 | { | ||
3367 | xfs_extnum_t nextents; /* number of extents in file */ | ||
3368 | int new_size; /* size of extents after removal */ | ||
3369 | |||
3370 | ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); | ||
3371 | new_size = ifp->if_bytes - | ||
3372 | (ext_diff * sizeof(xfs_bmbt_rec_t)); | ||
3373 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | ||
3374 | |||
3375 | if (new_size == 0) { | ||
3376 | xfs_iext_destroy(ifp); | ||
3377 | return; | ||
3378 | } | ||
3379 | /* Move extents up in the list (if needed) */ | ||
3380 | if (idx + ext_diff < nextents) { | ||
3381 | memmove(&ifp->if_u1.if_extents[idx], | ||
3382 | &ifp->if_u1.if_extents[idx + ext_diff], | ||
3383 | (nextents - (idx + ext_diff)) * | ||
3384 | sizeof(xfs_bmbt_rec_t)); | ||
3385 | } | ||
3386 | memset(&ifp->if_u1.if_extents[nextents - ext_diff], | ||
3387 | 0, ext_diff * sizeof(xfs_bmbt_rec_t)); | ||
3388 | /* | ||
3389 | * Reallocate the direct extent list. If the extents | ||
3390 | * will fit inside the inode then xfs_iext_realloc_direct | ||
3391 | * will switch from direct to inline extent allocation | ||
3392 | * mode for us. | ||
3393 | */ | ||
3394 | xfs_iext_realloc_direct(ifp, new_size); | ||
3395 | ifp->if_bytes = new_size; | ||
3396 | } | ||
3397 | |||
3398 | /* | ||
3399 | * This is called when incore extents are being removed from the | ||
3400 | * indirection array and the extents being removed span multiple extent | ||
3401 | * buffers. The idx parameter contains the file extent index where we | ||
3402 | * want to begin removing extents, and the count parameter contains | ||
3403 | * how many extents need to be removed. | ||
3404 | * | ||
3405 | * |-------| |-------| | ||
3406 | * | nex1 | | | nex1 - number of extents before idx | ||
3407 | * |-------| | count | | ||
3408 | * | | | | count - number of extents being removed at idx | ||
3409 | * | count | |-------| | ||
3410 | * | | | nex2 | nex2 - number of extents after idx + count | ||
3411 | * |-------| |-------| | ||
3412 | */ | ||
3413 | void | ||
3414 | xfs_iext_remove_indirect( | ||
3415 | xfs_ifork_t *ifp, /* inode fork pointer */ | ||
3416 | xfs_extnum_t idx, /* index to begin removing extents */ | ||
3417 | int count) /* number of extents to remove */ | ||
3418 | { | ||
3419 | xfs_ext_irec_t *erp; /* indirection array pointer */ | ||
3420 | int erp_idx = 0; /* indirection array index */ | ||
3421 | xfs_extnum_t ext_cnt; /* extents left to remove */ | ||
3422 | xfs_extnum_t ext_diff; /* extents to remove in current list */ | ||
3423 | xfs_extnum_t nex1; /* number of extents before idx */ | ||
3424 | xfs_extnum_t nex2; /* extents after idx + count */ | ||
3425 | int page_idx = idx; /* index in target extent list */ | ||
3426 | |||
3427 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | ||
3428 | erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); | ||
3429 | ASSERT(erp != NULL); | ||
3430 | nex1 = page_idx; | ||
3431 | ext_cnt = count; | ||
3432 | while (ext_cnt) { | ||
3433 | nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0); | ||
3434 | ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1)); | ||
3435 | /* | ||
3436 | * Check for deletion of entire list; | ||
3437 | * xfs_iext_irec_remove() updates extent offsets. | ||
3438 | */ | ||
3439 | if (ext_diff == erp->er_extcount) { | ||
3440 | xfs_iext_irec_remove(ifp, erp_idx); | ||
3441 | ext_cnt -= ext_diff; | ||
3442 | nex1 = 0; | ||
3443 | if (ext_cnt) { | ||
3444 | ASSERT(erp_idx < ifp->if_real_bytes / | ||
3445 | XFS_IEXT_BUFSZ); | ||
3446 | erp = &ifp->if_u1.if_ext_irec[erp_idx]; | ||
3447 | nex1 = 0; | ||
3448 | continue; | ||
3449 | } else { | ||
3450 | break; | ||
3451 | } | ||
3452 | } | ||
3453 | /* Move extents up (if needed) */ | ||
3454 | if (nex2) { | ||
3455 | memmove(&erp->er_extbuf[nex1], | ||
3456 | &erp->er_extbuf[nex1 + ext_diff], | ||
3457 | nex2 * sizeof(xfs_bmbt_rec_t)); | ||
3458 | } | ||
3459 | /* Zero out rest of page */ | ||
3460 | memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ - | ||
3461 | ((nex1 + nex2) * sizeof(xfs_bmbt_rec_t)))); | ||
3462 | /* Update remaining counters */ | ||
3463 | erp->er_extcount -= ext_diff; | ||
3464 | xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff); | ||
3465 | ext_cnt -= ext_diff; | ||
3466 | nex1 = 0; | ||
3467 | erp_idx++; | ||
3468 | erp++; | ||
3469 | } | ||
3470 | ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t); | ||
3471 | xfs_iext_irec_compact(ifp); | ||
3472 | } | ||
3473 | |||
3474 | /* | ||
3475 | * Create, destroy, or resize a linear (direct) block of extents. | ||
3476 | */ | ||
3477 | void | ||
3478 | xfs_iext_realloc_direct( | ||
3479 | xfs_ifork_t *ifp, /* inode fork pointer */ | ||
3480 | int new_size) /* new size of extents */ | ||
3481 | { | ||
3482 | int rnew_size; /* real new size of extents */ | ||
3483 | |||
3484 | rnew_size = new_size; | ||
3485 | |||
3486 | ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) || | ||
3487 | ((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) && | ||
3488 | (new_size != ifp->if_real_bytes))); | ||
3489 | |||
3490 | /* Free extent records */ | ||
3491 | if (new_size == 0) { | ||
3492 | xfs_iext_destroy(ifp); | ||
3493 | } | ||
3494 | /* Resize direct extent list and zero any new bytes */ | ||
3495 | else if (ifp->if_real_bytes) { | ||
3496 | /* Check if extents will fit inside the inode */ | ||
3497 | if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) { | ||
3498 | xfs_iext_direct_to_inline(ifp, new_size / | ||
3499 | (uint)sizeof(xfs_bmbt_rec_t)); | ||
3500 | ifp->if_bytes = new_size; | ||
3501 | return; | ||
3502 | } | ||
3503 | if (!is_power_of_2(new_size)){ | ||
3504 | rnew_size = roundup_pow_of_two(new_size); | ||
3505 | } | ||
3506 | if (rnew_size != ifp->if_real_bytes) { | ||
3507 | ifp->if_u1.if_extents = | ||
3508 | kmem_realloc(ifp->if_u1.if_extents, | ||
3509 | rnew_size, | ||
3510 | ifp->if_real_bytes, KM_NOFS); | ||
3511 | } | ||
3512 | if (rnew_size > ifp->if_real_bytes) { | ||
3513 | memset(&ifp->if_u1.if_extents[ifp->if_bytes / | ||
3514 | (uint)sizeof(xfs_bmbt_rec_t)], 0, | ||
3515 | rnew_size - ifp->if_real_bytes); | ||
3516 | } | ||
3517 | } | ||
3518 | /* | ||
3519 | * Switch from the inline extent buffer to a direct | ||
3520 | * extent list. Be sure to include the inline extent | ||
3521 | * bytes in new_size. | ||
3522 | */ | ||
3523 | else { | ||
3524 | new_size += ifp->if_bytes; | ||
3525 | if (!is_power_of_2(new_size)) { | ||
3526 | rnew_size = roundup_pow_of_two(new_size); | ||
3527 | } | ||
3528 | xfs_iext_inline_to_direct(ifp, rnew_size); | ||
3529 | } | ||
3530 | ifp->if_real_bytes = rnew_size; | ||
3531 | ifp->if_bytes = new_size; | ||
3532 | } | ||
3533 | |||
3534 | /* | ||
3535 | * Switch from linear (direct) extent records to inline buffer. | ||
3536 | */ | ||
3537 | void | ||
3538 | xfs_iext_direct_to_inline( | ||
3539 | xfs_ifork_t *ifp, /* inode fork pointer */ | ||
3540 | xfs_extnum_t nextents) /* number of extents in file */ | ||
3541 | { | ||
3542 | ASSERT(ifp->if_flags & XFS_IFEXTENTS); | ||
3543 | ASSERT(nextents <= XFS_INLINE_EXTS); | ||
3544 | /* | ||
3545 | * The inline buffer was zeroed when we switched | ||
3546 | * from inline to direct extent allocation mode, | ||
3547 | * so we don't need to clear it here. | ||
3548 | */ | ||
3549 | memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents, | ||
3550 | nextents * sizeof(xfs_bmbt_rec_t)); | ||
3551 | kmem_free(ifp->if_u1.if_extents); | ||
3552 | ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; | ||
3553 | ifp->if_real_bytes = 0; | ||
3554 | } | ||
3555 | |||
3556 | /* | ||
3557 | * Switch from inline buffer to linear (direct) extent records. | ||
3558 | * new_size should already be rounded up to the next power of 2 | ||
3559 | * by the caller (when appropriate), so use new_size as it is. | ||
3560 | * However, since new_size may be rounded up, we can't update | ||
3561 | * if_bytes here. It is the caller's responsibility to update | ||
3562 | * if_bytes upon return. | ||
3563 | */ | ||
3564 | void | ||
3565 | xfs_iext_inline_to_direct( | ||
3566 | xfs_ifork_t *ifp, /* inode fork pointer */ | ||
3567 | int new_size) /* number of extents in file */ | ||
3568 | { | ||
3569 | ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS); | ||
3570 | memset(ifp->if_u1.if_extents, 0, new_size); | ||
3571 | if (ifp->if_bytes) { | ||
3572 | memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext, | ||
3573 | ifp->if_bytes); | ||
3574 | memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * | ||
3575 | sizeof(xfs_bmbt_rec_t)); | ||
3576 | } | ||
3577 | ifp->if_real_bytes = new_size; | ||
3578 | } | ||
3579 | |||
3580 | /* | ||
3581 | * Resize an extent indirection array to new_size bytes. | ||
3582 | */ | ||
3583 | STATIC void | ||
3584 | xfs_iext_realloc_indirect( | ||
3585 | xfs_ifork_t *ifp, /* inode fork pointer */ | ||
3586 | int new_size) /* new indirection array size */ | ||
3587 | { | ||
3588 | int nlists; /* number of irec's (ex lists) */ | ||
3589 | int size; /* current indirection array size */ | ||
3590 | |||
3591 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | ||
3592 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | ||
3593 | size = nlists * sizeof(xfs_ext_irec_t); | ||
3594 | ASSERT(ifp->if_real_bytes); | ||
3595 | ASSERT((new_size >= 0) && (new_size != size)); | ||
3596 | if (new_size == 0) { | ||
3597 | xfs_iext_destroy(ifp); | ||
3598 | } else { | ||
3599 | ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *) | ||
3600 | kmem_realloc(ifp->if_u1.if_ext_irec, | ||
3601 | new_size, size, KM_NOFS); | ||
3602 | } | ||
3603 | } | ||
3604 | |||
3605 | /* | ||
3606 | * Switch from indirection array to linear (direct) extent allocations. | ||
3607 | */ | ||
3608 | STATIC void | ||
3609 | xfs_iext_indirect_to_direct( | ||
3610 | xfs_ifork_t *ifp) /* inode fork pointer */ | ||
3611 | { | ||
3612 | xfs_bmbt_rec_host_t *ep; /* extent record pointer */ | ||
3613 | xfs_extnum_t nextents; /* number of extents in file */ | ||
3614 | int size; /* size of file extents */ | ||
3615 | |||
3616 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | ||
3617 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | ||
3618 | ASSERT(nextents <= XFS_LINEAR_EXTS); | ||
3619 | size = nextents * sizeof(xfs_bmbt_rec_t); | ||
3620 | |||
3621 | xfs_iext_irec_compact_pages(ifp); | ||
3622 | ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); | ||
3623 | |||
3624 | ep = ifp->if_u1.if_ext_irec->er_extbuf; | ||
3625 | kmem_free(ifp->if_u1.if_ext_irec); | ||
3626 | ifp->if_flags &= ~XFS_IFEXTIREC; | ||
3627 | ifp->if_u1.if_extents = ep; | ||
3628 | ifp->if_bytes = size; | ||
3629 | if (nextents < XFS_LINEAR_EXTS) { | ||
3630 | xfs_iext_realloc_direct(ifp, size); | ||
3631 | } | ||
3632 | } | ||
3633 | |||
3634 | /* | ||
3635 | * Free incore file extents. | ||
3636 | */ | ||
3637 | void | ||
3638 | xfs_iext_destroy( | ||
3639 | xfs_ifork_t *ifp) /* inode fork pointer */ | ||
3640 | { | ||
3641 | if (ifp->if_flags & XFS_IFEXTIREC) { | ||
3642 | int erp_idx; | ||
3643 | int nlists; | ||
3644 | |||
3645 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | ||
3646 | for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) { | ||
3647 | xfs_iext_irec_remove(ifp, erp_idx); | ||
3648 | } | ||
3649 | ifp->if_flags &= ~XFS_IFEXTIREC; | ||
3650 | } else if (ifp->if_real_bytes) { | ||
3651 | kmem_free(ifp->if_u1.if_extents); | ||
3652 | } else if (ifp->if_bytes) { | ||
3653 | memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * | ||
3654 | sizeof(xfs_bmbt_rec_t)); | ||
3655 | } | ||
3656 | ifp->if_u1.if_extents = NULL; | ||
3657 | ifp->if_real_bytes = 0; | ||
3658 | ifp->if_bytes = 0; | ||
3659 | } | ||
3660 | |||
3661 | /* | ||
3662 | * Return a pointer to the extent record for file system block bno. | ||
3663 | */ | ||
3664 | xfs_bmbt_rec_host_t * /* pointer to found extent record */ | ||
3665 | xfs_iext_bno_to_ext( | ||
3666 | xfs_ifork_t *ifp, /* inode fork pointer */ | ||
3667 | xfs_fileoff_t bno, /* block number to search for */ | ||
3668 | xfs_extnum_t *idxp) /* index of target extent */ | ||
3669 | { | ||
3670 | xfs_bmbt_rec_host_t *base; /* pointer to first extent */ | ||
3671 | xfs_filblks_t blockcount = 0; /* number of blocks in extent */ | ||
3672 | xfs_bmbt_rec_host_t *ep = NULL; /* pointer to target extent */ | ||
3673 | xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ | ||
3674 | int high; /* upper boundary in search */ | ||
3675 | xfs_extnum_t idx = 0; /* index of target extent */ | ||
3676 | int low; /* lower boundary in search */ | ||
3677 | xfs_extnum_t nextents; /* number of file extents */ | ||
3678 | xfs_fileoff_t startoff = 0; /* start offset of extent */ | ||
3679 | |||
3680 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | ||
3681 | if (nextents == 0) { | ||
3682 | *idxp = 0; | ||
3683 | return NULL; | ||
3684 | } | ||
3685 | low = 0; | ||
3686 | if (ifp->if_flags & XFS_IFEXTIREC) { | ||
3687 | /* Find target extent list */ | ||
3688 | int erp_idx = 0; | ||
3689 | erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx); | ||
3690 | base = erp->er_extbuf; | ||
3691 | high = erp->er_extcount - 1; | ||
3692 | } else { | ||
3693 | base = ifp->if_u1.if_extents; | ||
3694 | high = nextents - 1; | ||
3695 | } | ||
3696 | /* Binary search extent records */ | ||
3697 | while (low <= high) { | ||
3698 | idx = (low + high) >> 1; | ||
3699 | ep = base + idx; | ||
3700 | startoff = xfs_bmbt_get_startoff(ep); | ||
3701 | blockcount = xfs_bmbt_get_blockcount(ep); | ||
3702 | if (bno < startoff) { | ||
3703 | high = idx - 1; | ||
3704 | } else if (bno >= startoff + blockcount) { | ||
3705 | low = idx + 1; | ||
3706 | } else { | ||
3707 | /* Convert back to file-based extent index */ | ||
3708 | if (ifp->if_flags & XFS_IFEXTIREC) { | ||
3709 | idx += erp->er_extoff; | ||
3710 | } | ||
3711 | *idxp = idx; | ||
3712 | return ep; | ||
3713 | } | ||
3714 | } | ||
3715 | /* Convert back to file-based extent index */ | ||
3716 | if (ifp->if_flags & XFS_IFEXTIREC) { | ||
3717 | idx += erp->er_extoff; | ||
3718 | } | ||
3719 | if (bno >= startoff + blockcount) { | ||
3720 | if (++idx == nextents) { | ||
3721 | ep = NULL; | ||
3722 | } else { | ||
3723 | ep = xfs_iext_get_ext(ifp, idx); | ||
3724 | } | ||
3725 | } | ||
3726 | *idxp = idx; | ||
3727 | return ep; | ||
3728 | } | ||
3729 | |||
3730 | /* | ||
3731 | * Return a pointer to the indirection array entry containing the | ||
3732 | * extent record for filesystem block bno. Store the index of the | ||
3733 | * target irec in *erp_idxp. | ||
3734 | */ | ||
3735 | xfs_ext_irec_t * /* pointer to found extent record */ | ||
3736 | xfs_iext_bno_to_irec( | ||
3737 | xfs_ifork_t *ifp, /* inode fork pointer */ | ||
3738 | xfs_fileoff_t bno, /* block number to search for */ | ||
3739 | int *erp_idxp) /* irec index of target ext list */ | ||
3740 | { | ||
3741 | xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ | ||
3742 | xfs_ext_irec_t *erp_next; /* next indirection array entry */ | ||
3743 | int erp_idx; /* indirection array index */ | ||
3744 | int nlists; /* number of extent irec's (lists) */ | ||
3745 | int high; /* binary search upper limit */ | ||
3746 | int low; /* binary search lower limit */ | ||
3747 | |||
3748 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | ||
3749 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | ||
3750 | erp_idx = 0; | ||
3751 | low = 0; | ||
3752 | high = nlists - 1; | ||
3753 | while (low <= high) { | ||
3754 | erp_idx = (low + high) >> 1; | ||
3755 | erp = &ifp->if_u1.if_ext_irec[erp_idx]; | ||
3756 | erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL; | ||
3757 | if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) { | ||
3758 | high = erp_idx - 1; | ||
3759 | } else if (erp_next && bno >= | ||
3760 | xfs_bmbt_get_startoff(erp_next->er_extbuf)) { | ||
3761 | low = erp_idx + 1; | ||
3762 | } else { | ||
3763 | break; | ||
3764 | } | ||
3765 | } | ||
3766 | *erp_idxp = erp_idx; | ||
3767 | return erp; | ||
3768 | } | ||
3769 | |||
3770 | /* | ||
3771 | * Return a pointer to the indirection array entry containing the | ||
3772 | * extent record at file extent index *idxp. Store the index of the | ||
3773 | * target irec in *erp_idxp and store the page index of the target | ||
3774 | * extent record in *idxp. | ||
3775 | */ | ||
3776 | xfs_ext_irec_t * | ||
3777 | xfs_iext_idx_to_irec( | ||
3778 | xfs_ifork_t *ifp, /* inode fork pointer */ | ||
3779 | xfs_extnum_t *idxp, /* extent index (file -> page) */ | ||
3780 | int *erp_idxp, /* pointer to target irec */ | ||
3781 | int realloc) /* new bytes were just added */ | ||
3782 | { | ||
3783 | xfs_ext_irec_t *prev; /* pointer to previous irec */ | ||
3784 | xfs_ext_irec_t *erp = NULL; /* pointer to current irec */ | ||
3785 | int erp_idx; /* indirection array index */ | ||
3786 | int nlists; /* number of irec's (ex lists) */ | ||
3787 | int high; /* binary search upper limit */ | ||
3788 | int low; /* binary search lower limit */ | ||
3789 | xfs_extnum_t page_idx = *idxp; /* extent index in target list */ | ||
3790 | |||
3791 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | ||
3792 | ASSERT(page_idx >= 0); | ||
3793 | ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); | ||
3794 | ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc); | ||
3795 | |||
3796 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | ||
3797 | erp_idx = 0; | ||
3798 | low = 0; | ||
3799 | high = nlists - 1; | ||
3800 | |||
3801 | /* Binary search extent irec's */ | ||
3802 | while (low <= high) { | ||
3803 | erp_idx = (low + high) >> 1; | ||
3804 | erp = &ifp->if_u1.if_ext_irec[erp_idx]; | ||
3805 | prev = erp_idx > 0 ? erp - 1 : NULL; | ||
3806 | if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff && | ||
3807 | realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) { | ||
3808 | high = erp_idx - 1; | ||
3809 | } else if (page_idx > erp->er_extoff + erp->er_extcount || | ||
3810 | (page_idx == erp->er_extoff + erp->er_extcount && | ||
3811 | !realloc)) { | ||
3812 | low = erp_idx + 1; | ||
3813 | } else if (page_idx == erp->er_extoff + erp->er_extcount && | ||
3814 | erp->er_extcount == XFS_LINEAR_EXTS) { | ||
3815 | ASSERT(realloc); | ||
3816 | page_idx = 0; | ||
3817 | erp_idx++; | ||
3818 | erp = erp_idx < nlists ? erp + 1 : NULL; | ||
3819 | break; | ||
3820 | } else { | ||
3821 | page_idx -= erp->er_extoff; | ||
3822 | break; | ||
3823 | } | ||
3824 | } | ||
3825 | *idxp = page_idx; | ||
3826 | *erp_idxp = erp_idx; | ||
3827 | return(erp); | ||
3828 | } | ||
3829 | |||
3830 | /* | ||
3831 | * Allocate and initialize an indirection array once the space needed | ||
3832 | * for incore extents increases above XFS_IEXT_BUFSZ. | ||
3833 | */ | ||
3834 | void | ||
3835 | xfs_iext_irec_init( | ||
3836 | xfs_ifork_t *ifp) /* inode fork pointer */ | ||
3837 | { | ||
3838 | xfs_ext_irec_t *erp; /* indirection array pointer */ | ||
3839 | xfs_extnum_t nextents; /* number of extents in file */ | ||
3840 | |||
3841 | ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); | ||
3842 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | ||
3843 | ASSERT(nextents <= XFS_LINEAR_EXTS); | ||
3844 | |||
3845 | erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS); | ||
3846 | |||
3847 | if (nextents == 0) { | ||
3848 | ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); | ||
3849 | } else if (!ifp->if_real_bytes) { | ||
3850 | xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ); | ||
3851 | } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) { | ||
3852 | xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ); | ||
3853 | } | ||
3854 | erp->er_extbuf = ifp->if_u1.if_extents; | ||
3855 | erp->er_extcount = nextents; | ||
3856 | erp->er_extoff = 0; | ||
3857 | |||
3858 | ifp->if_flags |= XFS_IFEXTIREC; | ||
3859 | ifp->if_real_bytes = XFS_IEXT_BUFSZ; | ||
3860 | ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t); | ||
3861 | ifp->if_u1.if_ext_irec = erp; | ||
3862 | |||
3863 | return; | ||
3864 | } | ||
3865 | |||
3866 | /* | ||
3867 | * Allocate and initialize a new entry in the indirection array. | ||
3868 | */ | ||
3869 | xfs_ext_irec_t * | ||
3870 | xfs_iext_irec_new( | ||
3871 | xfs_ifork_t *ifp, /* inode fork pointer */ | ||
3872 | int erp_idx) /* index for new irec */ | ||
3873 | { | ||
3874 | xfs_ext_irec_t *erp; /* indirection array pointer */ | ||
3875 | int i; /* loop counter */ | ||
3876 | int nlists; /* number of irec's (ex lists) */ | ||
3877 | |||
3878 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | ||
3879 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | ||
3880 | |||
3881 | /* Resize indirection array */ | ||
3882 | xfs_iext_realloc_indirect(ifp, ++nlists * | ||
3883 | sizeof(xfs_ext_irec_t)); | ||
3884 | /* | ||
3885 | * Move records down in the array so the | ||
3886 | * new page can use erp_idx. | ||
3887 | */ | ||
3888 | erp = ifp->if_u1.if_ext_irec; | ||
3889 | for (i = nlists - 1; i > erp_idx; i--) { | ||
3890 | memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t)); | ||
3891 | } | ||
3892 | ASSERT(i == erp_idx); | ||
3893 | |||
3894 | /* Initialize new extent record */ | ||
3895 | erp = ifp->if_u1.if_ext_irec; | ||
3896 | erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); | ||
3897 | ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; | ||
3898 | memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ); | ||
3899 | erp[erp_idx].er_extcount = 0; | ||
3900 | erp[erp_idx].er_extoff = erp_idx > 0 ? | ||
3901 | erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0; | ||
3902 | return (&erp[erp_idx]); | ||
3903 | } | ||
3904 | |||
3905 | /* | ||
3906 | * Remove a record from the indirection array. | ||
3907 | */ | ||
3908 | void | ||
3909 | xfs_iext_irec_remove( | ||
3910 | xfs_ifork_t *ifp, /* inode fork pointer */ | ||
3911 | int erp_idx) /* irec index to remove */ | ||
3912 | { | ||
3913 | xfs_ext_irec_t *erp; /* indirection array pointer */ | ||
3914 | int i; /* loop counter */ | ||
3915 | int nlists; /* number of irec's (ex lists) */ | ||
3916 | |||
3917 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | ||
3918 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | ||
3919 | erp = &ifp->if_u1.if_ext_irec[erp_idx]; | ||
3920 | if (erp->er_extbuf) { | ||
3921 | xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, | ||
3922 | -erp->er_extcount); | ||
3923 | kmem_free(erp->er_extbuf); | ||
3924 | } | ||
3925 | /* Compact extent records */ | ||
3926 | erp = ifp->if_u1.if_ext_irec; | ||
3927 | for (i = erp_idx; i < nlists - 1; i++) { | ||
3928 | memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t)); | ||
3929 | } | ||
3930 | /* | ||
3931 | * Manually free the last extent record from the indirection | ||
3932 | * array. A call to xfs_iext_realloc_indirect() with a size | ||
3933 | * of zero would result in a call to xfs_iext_destroy() which | ||
3934 | * would in turn call this function again, creating a nasty | ||
3935 | * infinite loop. | ||
3936 | */ | ||
3937 | if (--nlists) { | ||
3938 | xfs_iext_realloc_indirect(ifp, | ||
3939 | nlists * sizeof(xfs_ext_irec_t)); | ||
3940 | } else { | ||
3941 | kmem_free(ifp->if_u1.if_ext_irec); | ||
3942 | } | ||
3943 | ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; | ||
3944 | } | ||
3945 | |||
3946 | /* | ||
3947 | * This is called to clean up large amounts of unused memory allocated | ||
3948 | * by the indirection array. Before compacting anything though, verify | ||
3949 | * that the indirection array is still needed and switch back to the | ||
3950 | * linear extent list (or even the inline buffer) if possible. The | ||
3951 | * compaction policy is as follows: | ||
3952 | * | ||
3953 | * Full Compaction: Extents fit into a single page (or inline buffer) | ||
3954 | * Partial Compaction: Extents occupy less than 50% of allocated space | ||
3955 | * No Compaction: Extents occupy at least 50% of allocated space | ||
3956 | */ | ||
3957 | void | ||
3958 | xfs_iext_irec_compact( | ||
3959 | xfs_ifork_t *ifp) /* inode fork pointer */ | ||
3960 | { | ||
3961 | xfs_extnum_t nextents; /* number of extents in file */ | ||
3962 | int nlists; /* number of irec's (ex lists) */ | ||
3963 | |||
3964 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | ||
3965 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | ||
3966 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | ||
3967 | |||
3968 | if (nextents == 0) { | ||
3969 | xfs_iext_destroy(ifp); | ||
3970 | } else if (nextents <= XFS_INLINE_EXTS) { | ||
3971 | xfs_iext_indirect_to_direct(ifp); | ||
3972 | xfs_iext_direct_to_inline(ifp, nextents); | ||
3973 | } else if (nextents <= XFS_LINEAR_EXTS) { | ||
3974 | xfs_iext_indirect_to_direct(ifp); | ||
3975 | } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) { | ||
3976 | xfs_iext_irec_compact_pages(ifp); | ||
3977 | } | ||
3978 | } | ||
3979 | |||
3980 | /* | ||
3981 | * Combine extents from neighboring extent pages. | ||
3982 | */ | ||
3983 | void | ||
3984 | xfs_iext_irec_compact_pages( | ||
3985 | xfs_ifork_t *ifp) /* inode fork pointer */ | ||
3986 | { | ||
3987 | xfs_ext_irec_t *erp, *erp_next;/* pointers to irec entries */ | ||
3988 | int erp_idx = 0; /* indirection array index */ | ||
3989 | int nlists; /* number of irec's (ex lists) */ | ||
3990 | |||
3991 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | ||
3992 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | ||
3993 | while (erp_idx < nlists - 1) { | ||
3994 | erp = &ifp->if_u1.if_ext_irec[erp_idx]; | ||
3995 | erp_next = erp + 1; | ||
3996 | if (erp_next->er_extcount <= | ||
3997 | (XFS_LINEAR_EXTS - erp->er_extcount)) { | ||
3998 | memcpy(&erp->er_extbuf[erp->er_extcount], | ||
3999 | erp_next->er_extbuf, erp_next->er_extcount * | ||
4000 | sizeof(xfs_bmbt_rec_t)); | ||
4001 | erp->er_extcount += erp_next->er_extcount; | ||
4002 | /* | ||
4003 | * Free page before removing extent record | ||
4004 | * so er_extoffs don't get modified in | ||
4005 | * xfs_iext_irec_remove. | ||
4006 | */ | ||
4007 | kmem_free(erp_next->er_extbuf); | ||
4008 | erp_next->er_extbuf = NULL; | ||
4009 | xfs_iext_irec_remove(ifp, erp_idx + 1); | ||
4010 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | ||
4011 | } else { | ||
4012 | erp_idx++; | ||
4013 | } | ||
4014 | } | ||
4015 | } | ||
4016 | |||
4017 | /* | ||
4018 | * This is called to update the er_extoff field in the indirection | ||
4019 | * array when extents have been added or removed from one of the | ||
4020 | * extent lists. erp_idx contains the irec index to begin updating | ||
4021 | * at and ext_diff contains the number of extents that were added | ||
4022 | * or removed. | ||
4023 | */ | ||
4024 | void | ||
4025 | xfs_iext_irec_update_extoffs( | ||
4026 | xfs_ifork_t *ifp, /* inode fork pointer */ | ||
4027 | int erp_idx, /* irec index to update */ | ||
4028 | int ext_diff) /* number of new extents */ | ||
4029 | { | ||
4030 | int i; /* loop counter */ | ||
4031 | int nlists; /* number of irec's (ex lists */ | ||
4032 | |||
4033 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | ||
4034 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | ||
4035 | for (i = erp_idx; i < nlists; i++) { | ||
4036 | ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff; | ||
4037 | } | ||
4038 | } | ||
4039 | |||
4040 | /* | ||
4041 | * Test whether it is appropriate to check an inode for and free post EOF | ||
4042 | * blocks. The 'force' parameter determines whether we should also consider | ||
4043 | * regular files that are marked preallocated or append-only. | ||
4044 | */ | ||
4045 | bool | ||
4046 | xfs_can_free_eofblocks(struct xfs_inode *ip, bool force) | ||
4047 | { | ||
4048 | /* prealloc/delalloc exists only on regular files */ | ||
4049 | if (!S_ISREG(ip->i_d.di_mode)) | ||
4050 | return false; | ||
4051 | |||
4052 | /* | ||
4053 | * Zero sized files with no cached pages and delalloc blocks will not | ||
4054 | * have speculative prealloc/delalloc blocks to remove. | ||
4055 | */ | ||
4056 | if (VFS_I(ip)->i_size == 0 && | ||
4057 | VN_CACHED(VFS_I(ip)) == 0 && | ||
4058 | ip->i_delayed_blks == 0) | ||
4059 | return false; | ||
4060 | |||
4061 | /* If we haven't read in the extent list, then don't do it now. */ | ||
4062 | if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) | ||
4063 | return false; | ||
4064 | |||
4065 | /* | ||
4066 | * Do not free real preallocated or append-only files unless the file | ||
4067 | * has delalloc blocks and we are forced to remove them. | ||
4068 | */ | ||
4069 | if (ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) | ||
4070 | if (!force || ip->i_delayed_blks == 0) | ||
4071 | return false; | ||
4072 | |||
4073 | return true; | ||
4074 | } | ||
4075 | |||