aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2013-08-12 06:49:35 -0400
committerBen Myers <bpm@sgi.com>2013-08-12 17:39:05 -0400
commit1fd7115eda5661e872463694fc4a12821c4f914a (patch)
tree147f8c4d1f4fac5c7f876e96ae4a4c8d201972f4 /fs
parent7bb85ef3608bf740e285b4436776526b7afd4903 (diff)
xfs: introduce xfs_inode_buf.c for inode buffer operations
The only thing remaining in xfs_inode.[ch] are the operations that read, write or verify physical inodes in their underlying buffers. Move all this code to xfs_inode_buf.[ch] and so we can stop sharing xfs_inode.[ch] with userspace. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Mark Tinguely <tinguely@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/Makefile1
-rw-r--r--fs/xfs/xfs_inode.c420
-rw-r--r--fs/xfs/xfs_inode.h41
-rw-r--r--fs/xfs/xfs_inode_buf.c453
-rw-r--r--fs/xfs/xfs_inode_buf.h53
5 files changed, 514 insertions, 454 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index ce0b87e6e4f3..7f2319d3554d 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -75,6 +75,7 @@ xfs-y += xfs_alloc.o \
75 xfs_icreate_item.o \ 75 xfs_icreate_item.o \
76 xfs_inode.o \ 76 xfs_inode.o \
77 xfs_inode_fork.o \ 77 xfs_inode_fork.o \
78 xfs_inode_buf.o \
78 xfs_log_recover.o \ 79 xfs_log_recover.o \
79 xfs_mount.o \ 80 xfs_mount.o \
80 xfs_symlink.o \ 81 xfs_symlink.o \
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 18308aceebc1..fcda5b39cc17 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -322,236 +322,6 @@ __xfs_iflock(
322 finish_wait(wq, &wait.wait); 322 finish_wait(wq, &wait.wait);
323} 323}
324 324
325/*
326 * Check that none of the inode's in the buffer have a next
327 * unlinked field of 0.
328 */
329#if defined(DEBUG)
330void
331xfs_inobp_check(
332 xfs_mount_t *mp,
333 xfs_buf_t *bp)
334{
335 int i;
336 int j;
337 xfs_dinode_t *dip;
338
339 j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
340
341 for (i = 0; i < j; i++) {
342 dip = (xfs_dinode_t *)xfs_buf_offset(bp,
343 i * mp->m_sb.sb_inodesize);
344 if (!dip->di_next_unlinked) {
345 xfs_alert(mp,
346 "Detected bogus zero next_unlinked field in incore inode buffer 0x%p.",
347 bp);
348 ASSERT(dip->di_next_unlinked);
349 }
350 }
351}
352#endif
353
354static void
355xfs_inode_buf_verify(
356 struct xfs_buf *bp)
357{
358 struct xfs_mount *mp = bp->b_target->bt_mount;
359 int i;
360 int ni;
361
362 /*
363 * Validate the magic number and version of every inode in the buffer
364 */
365 ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
366 for (i = 0; i < ni; i++) {
367 int di_ok;
368 xfs_dinode_t *dip;
369
370 dip = (struct xfs_dinode *)xfs_buf_offset(bp,
371 (i << mp->m_sb.sb_inodelog));
372 di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
373 XFS_DINODE_GOOD_VERSION(dip->di_version);
374 if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
375 XFS_ERRTAG_ITOBP_INOTOBP,
376 XFS_RANDOM_ITOBP_INOTOBP))) {
377 xfs_buf_ioerror(bp, EFSCORRUPTED);
378 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH,
379 mp, dip);
380#ifdef DEBUG
381 xfs_emerg(mp,
382 "bad inode magic/vsn daddr %lld #%d (magic=%x)",
383 (unsigned long long)bp->b_bn, i,
384 be16_to_cpu(dip->di_magic));
385 ASSERT(0);
386#endif
387 }
388 }
389 xfs_inobp_check(mp, bp);
390}
391
392
393static void
394xfs_inode_buf_read_verify(
395 struct xfs_buf *bp)
396{
397 xfs_inode_buf_verify(bp);
398}
399
400static void
401xfs_inode_buf_write_verify(
402 struct xfs_buf *bp)
403{
404 xfs_inode_buf_verify(bp);
405}
406
407const struct xfs_buf_ops xfs_inode_buf_ops = {
408 .verify_read = xfs_inode_buf_read_verify,
409 .verify_write = xfs_inode_buf_write_verify,
410};
411
412
413/*
414 * This routine is called to map an inode to the buffer containing the on-disk
415 * version of the inode. It returns a pointer to the buffer containing the
416 * on-disk inode in the bpp parameter, and in the dipp parameter it returns a
417 * pointer to the on-disk inode within that buffer.
418 *
419 * If a non-zero error is returned, then the contents of bpp and dipp are
420 * undefined.
421 */
422int
423xfs_imap_to_bp(
424 struct xfs_mount *mp,
425 struct xfs_trans *tp,
426 struct xfs_imap *imap,
427 struct xfs_dinode **dipp,
428 struct xfs_buf **bpp,
429 uint buf_flags,
430 uint iget_flags)
431{
432 struct xfs_buf *bp;
433 int error;
434
435 buf_flags |= XBF_UNMAPPED;
436 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
437 (int)imap->im_len, buf_flags, &bp,
438 &xfs_inode_buf_ops);
439 if (error) {
440 if (error == EAGAIN) {
441 ASSERT(buf_flags & XBF_TRYLOCK);
442 return error;
443 }
444
445 if (error == EFSCORRUPTED &&
446 (iget_flags & XFS_IGET_UNTRUSTED))
447 return XFS_ERROR(EINVAL);
448
449 xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.",
450 __func__, error);
451 return error;
452 }
453
454 *bpp = bp;
455 *dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset);
456 return 0;
457}
458
459STATIC void
460xfs_dinode_from_disk(
461 xfs_icdinode_t *to,
462 xfs_dinode_t *from)
463{
464 to->di_magic = be16_to_cpu(from->di_magic);
465 to->di_mode = be16_to_cpu(from->di_mode);
466 to->di_version = from ->di_version;
467 to->di_format = from->di_format;
468 to->di_onlink = be16_to_cpu(from->di_onlink);
469 to->di_uid = be32_to_cpu(from->di_uid);
470 to->di_gid = be32_to_cpu(from->di_gid);
471 to->di_nlink = be32_to_cpu(from->di_nlink);
472 to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
473 to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
474 memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
475 to->di_flushiter = be16_to_cpu(from->di_flushiter);
476 to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec);
477 to->di_atime.t_nsec = be32_to_cpu(from->di_atime.t_nsec);
478 to->di_mtime.t_sec = be32_to_cpu(from->di_mtime.t_sec);
479 to->di_mtime.t_nsec = be32_to_cpu(from->di_mtime.t_nsec);
480 to->di_ctime.t_sec = be32_to_cpu(from->di_ctime.t_sec);
481 to->di_ctime.t_nsec = be32_to_cpu(from->di_ctime.t_nsec);
482 to->di_size = be64_to_cpu(from->di_size);
483 to->di_nblocks = be64_to_cpu(from->di_nblocks);
484 to->di_extsize = be32_to_cpu(from->di_extsize);
485 to->di_nextents = be32_to_cpu(from->di_nextents);
486 to->di_anextents = be16_to_cpu(from->di_anextents);
487 to->di_forkoff = from->di_forkoff;
488 to->di_aformat = from->di_aformat;
489 to->di_dmevmask = be32_to_cpu(from->di_dmevmask);
490 to->di_dmstate = be16_to_cpu(from->di_dmstate);
491 to->di_flags = be16_to_cpu(from->di_flags);
492 to->di_gen = be32_to_cpu(from->di_gen);
493
494 if (to->di_version == 3) {
495 to->di_changecount = be64_to_cpu(from->di_changecount);
496 to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec);
497 to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec);
498 to->di_flags2 = be64_to_cpu(from->di_flags2);
499 to->di_ino = be64_to_cpu(from->di_ino);
500 to->di_lsn = be64_to_cpu(from->di_lsn);
501 memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
502 uuid_copy(&to->di_uuid, &from->di_uuid);
503 }
504}
505
506void
507xfs_dinode_to_disk(
508 xfs_dinode_t *to,
509 xfs_icdinode_t *from)
510{
511 to->di_magic = cpu_to_be16(from->di_magic);
512 to->di_mode = cpu_to_be16(from->di_mode);
513 to->di_version = from ->di_version;
514 to->di_format = from->di_format;
515 to->di_onlink = cpu_to_be16(from->di_onlink);
516 to->di_uid = cpu_to_be32(from->di_uid);
517 to->di_gid = cpu_to_be32(from->di_gid);
518 to->di_nlink = cpu_to_be32(from->di_nlink);
519 to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
520 to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
521 memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
522 to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
523 to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
524 to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
525 to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec);
526 to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec);
527 to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec);
528 to->di_size = cpu_to_be64(from->di_size);
529 to->di_nblocks = cpu_to_be64(from->di_nblocks);
530 to->di_extsize = cpu_to_be32(from->di_extsize);
531 to->di_nextents = cpu_to_be32(from->di_nextents);
532 to->di_anextents = cpu_to_be16(from->di_anextents);
533 to->di_forkoff = from->di_forkoff;
534 to->di_aformat = from->di_aformat;
535 to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
536 to->di_dmstate = cpu_to_be16(from->di_dmstate);
537 to->di_flags = cpu_to_be16(from->di_flags);
538 to->di_gen = cpu_to_be32(from->di_gen);
539
540 if (from->di_version == 3) {
541 to->di_changecount = cpu_to_be64(from->di_changecount);
542 to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
543 to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
544 to->di_flags2 = cpu_to_be64(from->di_flags2);
545 to->di_ino = cpu_to_be64(from->di_ino);
546 to->di_lsn = cpu_to_be64(from->di_lsn);
547 memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
548 uuid_copy(&to->di_uuid, &from->di_uuid);
549 to->di_flushiter = 0;
550 } else {
551 to->di_flushiter = cpu_to_be16(from->di_flushiter);
552 }
553}
554
555STATIC uint 325STATIC uint
556_xfs_dic2xflags( 326_xfs_dic2xflags(
557 __uint16_t di_flags) 327 __uint16_t di_flags)
@@ -610,196 +380,6 @@ xfs_dic2xflags(
610 (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0); 380 (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0);
611} 381}
612 382
613static bool
614xfs_dinode_verify(
615 struct xfs_mount *mp,
616 struct xfs_inode *ip,
617 struct xfs_dinode *dip)
618{
619 if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
620 return false;
621
622 /* only version 3 or greater inodes are extensively verified here */
623 if (dip->di_version < 3)
624 return true;
625
626 if (!xfs_sb_version_hascrc(&mp->m_sb))
627 return false;
628 if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
629 offsetof(struct xfs_dinode, di_crc)))
630 return false;
631 if (be64_to_cpu(dip->di_ino) != ip->i_ino)
632 return false;
633 if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_uuid))
634 return false;
635 return true;
636}
637
638void
639xfs_dinode_calc_crc(
640 struct xfs_mount *mp,
641 struct xfs_dinode *dip)
642{
643 __uint32_t crc;
644
645 if (dip->di_version < 3)
646 return;
647
648 ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
649 crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize,
650 offsetof(struct xfs_dinode, di_crc));
651 dip->di_crc = xfs_end_cksum(crc);
652}
653
654/*
655 * Read the disk inode attributes into the in-core inode structure.
656 *
657 * For version 5 superblocks, if we are initialising a new inode and we are not
658 * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new
659 * inode core with a random generation number. If we are keeping inodes around,
660 * we need to read the inode cluster to get the existing generation number off
661 * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode
662 * format) then log recovery is dependent on the di_flushiter field being
663 * initialised from the current on-disk value and hence we must also read the
664 * inode off disk.
665 */
666int
667xfs_iread(
668 xfs_mount_t *mp,
669 xfs_trans_t *tp,
670 xfs_inode_t *ip,
671 uint iget_flags)
672{
673 xfs_buf_t *bp;
674 xfs_dinode_t *dip;
675 int error;
676
677 /*
678 * Fill in the location information in the in-core inode.
679 */
680 error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);
681 if (error)
682 return error;
683
684 /* shortcut IO on inode allocation if possible */
685 if ((iget_flags & XFS_IGET_CREATE) &&
686 xfs_sb_version_hascrc(&mp->m_sb) &&
687 !(mp->m_flags & XFS_MOUNT_IKEEP)) {
688 /* initialise the on-disk inode core */
689 memset(&ip->i_d, 0, sizeof(ip->i_d));
690 ip->i_d.di_magic = XFS_DINODE_MAGIC;
691 ip->i_d.di_gen = prandom_u32();
692 if (xfs_sb_version_hascrc(&mp->m_sb)) {
693 ip->i_d.di_version = 3;
694 ip->i_d.di_ino = ip->i_ino;
695 uuid_copy(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid);
696 } else
697 ip->i_d.di_version = 2;
698 return 0;
699 }
700
701 /*
702 * Get pointers to the on-disk inode and the buffer containing it.
703 */
704 error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags);
705 if (error)
706 return error;
707
708 /* even unallocated inodes are verified */
709 if (!xfs_dinode_verify(mp, ip, dip)) {
710 xfs_alert(mp, "%s: validation failed for inode %lld failed",
711 __func__, ip->i_ino);
712
713 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip);
714 error = XFS_ERROR(EFSCORRUPTED);
715 goto out_brelse;
716 }
717
718 /*
719 * If the on-disk inode is already linked to a directory
720 * entry, copy all of the inode into the in-core inode.
721 * xfs_iformat_fork() handles copying in the inode format
722 * specific information.
723 * Otherwise, just get the truly permanent information.
724 */
725 if (dip->di_mode) {
726 xfs_dinode_from_disk(&ip->i_d, dip);
727 error = xfs_iformat_fork(ip, dip);
728 if (error) {
729#ifdef DEBUG
730 xfs_alert(mp, "%s: xfs_iformat() returned error %d",
731 __func__, error);
732#endif /* DEBUG */
733 goto out_brelse;
734 }
735 } else {
736 /*
737 * Partial initialisation of the in-core inode. Just the bits
738 * that xfs_ialloc won't overwrite or relies on being correct.
739 */
740 ip->i_d.di_magic = be16_to_cpu(dip->di_magic);
741 ip->i_d.di_version = dip->di_version;
742 ip->i_d.di_gen = be32_to_cpu(dip->di_gen);
743 ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
744
745 if (dip->di_version == 3) {
746 ip->i_d.di_ino = be64_to_cpu(dip->di_ino);
747 uuid_copy(&ip->i_d.di_uuid, &dip->di_uuid);
748 }
749
750 /*
751 * Make sure to pull in the mode here as well in
752 * case the inode is released without being used.
753 * This ensures that xfs_inactive() will see that
754 * the inode is already free and not try to mess
755 * with the uninitialized part of it.
756 */
757 ip->i_d.di_mode = 0;
758 }
759
760 /*
761 * The inode format changed when we moved the link count and
762 * made it 32 bits long. If this is an old format inode,
763 * convert it in memory to look like a new one. If it gets
764 * flushed to disk we will convert back before flushing or
765 * logging it. We zero out the new projid field and the old link
766 * count field. We'll handle clearing the pad field (the remains
767 * of the old uuid field) when we actually convert the inode to
768 * the new format. We don't change the version number so that we
769 * can distinguish this from a real new format inode.
770 */
771 if (ip->i_d.di_version == 1) {
772 ip->i_d.di_nlink = ip->i_d.di_onlink;
773 ip->i_d.di_onlink = 0;
774 xfs_set_projid(ip, 0);
775 }
776
777 ip->i_delayed_blks = 0;
778
779 /*
780 * Mark the buffer containing the inode as something to keep
781 * around for a while. This helps to keep recently accessed
782 * meta-data in-core longer.
783 */
784 xfs_buf_set_ref(bp, XFS_INO_REF);
785
786 /*
787 * Use xfs_trans_brelse() to release the buffer containing the on-disk
788 * inode, because it was acquired with xfs_trans_read_buf() in
789 * xfs_imap_to_bp() above. If tp is NULL, this is just a normal
790 * brelse(). If we're within a transaction, then xfs_trans_brelse()
791 * will only release the buffer if it is not dirty within the
792 * transaction. It will be OK to release the buffer in this case,
793 * because inodes on disk are never destroyed and we will be locking the
794 * new in-core inode before putting it in the cache where other
795 * processes can find it. Thus we don't have to worry about the inode
796 * being changed just because we released the buffer.
797 */
798 out_brelse:
799 xfs_trans_brelse(tp, bp);
800 return error;
801}
802
803/* 383/*
804 * Allocate an inode on disk and return a copy of its in-core version. 384 * Allocate an inode on disk and return a copy of its in-core version.
805 * The in-core inode is locked exclusively. Set mode, nlink, and rdev 385 * The in-core inode is locked exclusively. Set mode, nlink, and rdev
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 0bd034ac8f82..8f775ed722a9 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -18,24 +18,15 @@
18#ifndef __XFS_INODE_H__ 18#ifndef __XFS_INODE_H__
19#define __XFS_INODE_H__ 19#define __XFS_INODE_H__
20 20
21struct posix_acl; 21#include "xfs_inode_buf.h"
22struct xfs_dinode;
23struct xfs_inode;
24
25#include "xfs_inode_fork.h" 22#include "xfs_inode_fork.h"
26 23
27/* 24/*
28 * Inode location information. Stored in the inode and passed to 25 * Kernel only inode definitions
29 * xfs_imap_to_bp() to get a buffer and dinode for a given inode.
30 */ 26 */
31struct xfs_imap {
32 xfs_daddr_t im_blkno; /* starting BB of inode chunk */
33 ushort im_len; /* length in BBs of inode chunk */
34 ushort im_boffset; /* inode offset in block in bytes */
35};
36
37#ifdef __KERNEL__
38 27
28struct xfs_dinode;
29struct xfs_inode;
39struct xfs_buf; 30struct xfs_buf;
40struct xfs_bmap_free; 31struct xfs_bmap_free;
41struct xfs_bmbt_irec; 32struct xfs_bmbt_irec;
@@ -347,7 +338,10 @@ int xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *,
347int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); 338int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
348 339
349void xfs_iext_realloc(xfs_inode_t *, int, int); 340void xfs_iext_realloc(xfs_inode_t *, int, int);
341
350void xfs_iunpin_wait(xfs_inode_t *); 342void xfs_iunpin_wait(xfs_inode_t *);
343#define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount))
344
351int xfs_iflush(struct xfs_inode *, struct xfs_buf **); 345int xfs_iflush(struct xfs_inode *, struct xfs_buf **);
352void xfs_lock_inodes(xfs_inode_t **, int, uint); 346void xfs_lock_inodes(xfs_inode_t **, int, uint);
353void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); 347void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
@@ -367,27 +361,6 @@ do { \
367 iput(VFS_I(ip)); \ 361 iput(VFS_I(ip)); \
368} while (0) 362} while (0)
369 363
370#endif /* __KERNEL__ */
371
372int xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *,
373 struct xfs_imap *, struct xfs_dinode **,
374 struct xfs_buf **, uint, uint);
375int xfs_iread(struct xfs_mount *, struct xfs_trans *,
376 struct xfs_inode *, uint);
377void xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *);
378void xfs_dinode_to_disk(struct xfs_dinode *,
379 struct xfs_icdinode *);
380bool xfs_can_free_eofblocks(struct xfs_inode *, bool);
381
382#define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount))
383
384#if defined(DEBUG)
385void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
386#else
387#define xfs_inobp_check(mp, bp)
388#endif /* DEBUG */
389
390extern struct kmem_zone *xfs_inode_zone; 364extern struct kmem_zone *xfs_inode_zone;
391extern const struct xfs_buf_ops xfs_inode_buf_ops;
392 365
393#endif /* __XFS_INODE_H__ */ 366#endif /* __XFS_INODE_H__ */
diff --git a/fs/xfs/xfs_inode_buf.c b/fs/xfs/xfs_inode_buf.c
new file mode 100644
index 000000000000..38fe509827dd
--- /dev/null
+++ b/fs/xfs/xfs_inode_buf.c
@@ -0,0 +1,453 @@
1/*
2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_format.h"
21#include "xfs_log.h"
22#include "xfs_trans.h"
23#include "xfs_sb.h"
24#include "xfs_ag.h"
25#include "xfs_mount.h"
26#include "xfs_bmap_btree.h"
27#include "xfs_ialloc_btree.h"
28#include "xfs_dinode.h"
29#include "xfs_inode.h"
30#include "xfs_error.h"
31#include "xfs_cksum.h"
32#include "xfs_icache.h"
33#include "xfs_ialloc.h"
34
35/*
36 * Check that none of the inode's in the buffer have a next
37 * unlinked field of 0.
38 */
39#if defined(DEBUG)
40void
41xfs_inobp_check(
42 xfs_mount_t *mp,
43 xfs_buf_t *bp)
44{
45 int i;
46 int j;
47 xfs_dinode_t *dip;
48
49 j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
50
51 for (i = 0; i < j; i++) {
52 dip = (xfs_dinode_t *)xfs_buf_offset(bp,
53 i * mp->m_sb.sb_inodesize);
54 if (!dip->di_next_unlinked) {
55 xfs_alert(mp,
56 "Detected bogus zero next_unlinked field in incore inode buffer 0x%p.",
57 bp);
58 ASSERT(dip->di_next_unlinked);
59 }
60 }
61}
62#endif
63
64static void
65xfs_inode_buf_verify(
66 struct xfs_buf *bp)
67{
68 struct xfs_mount *mp = bp->b_target->bt_mount;
69 int i;
70 int ni;
71
72 /*
73 * Validate the magic number and version of every inode in the buffer
74 */
75 ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
76 for (i = 0; i < ni; i++) {
77 int di_ok;
78 xfs_dinode_t *dip;
79
80 dip = (struct xfs_dinode *)xfs_buf_offset(bp,
81 (i << mp->m_sb.sb_inodelog));
82 di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
83 XFS_DINODE_GOOD_VERSION(dip->di_version);
84 if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
85 XFS_ERRTAG_ITOBP_INOTOBP,
86 XFS_RANDOM_ITOBP_INOTOBP))) {
87 xfs_buf_ioerror(bp, EFSCORRUPTED);
88 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH,
89 mp, dip);
90#ifdef DEBUG
91 xfs_emerg(mp,
92 "bad inode magic/vsn daddr %lld #%d (magic=%x)",
93 (unsigned long long)bp->b_bn, i,
94 be16_to_cpu(dip->di_magic));
95 ASSERT(0);
96#endif
97 }
98 }
99 xfs_inobp_check(mp, bp);
100}
101
102
103static void
104xfs_inode_buf_read_verify(
105 struct xfs_buf *bp)
106{
107 xfs_inode_buf_verify(bp);
108}
109
110static void
111xfs_inode_buf_write_verify(
112 struct xfs_buf *bp)
113{
114 xfs_inode_buf_verify(bp);
115}
116
117const struct xfs_buf_ops xfs_inode_buf_ops = {
118 .verify_read = xfs_inode_buf_read_verify,
119 .verify_write = xfs_inode_buf_write_verify,
120};
121
122
123/*
124 * This routine is called to map an inode to the buffer containing the on-disk
125 * version of the inode. It returns a pointer to the buffer containing the
126 * on-disk inode in the bpp parameter, and in the dipp parameter it returns a
127 * pointer to the on-disk inode within that buffer.
128 *
129 * If a non-zero error is returned, then the contents of bpp and dipp are
130 * undefined.
131 */
132int
133xfs_imap_to_bp(
134 struct xfs_mount *mp,
135 struct xfs_trans *tp,
136 struct xfs_imap *imap,
137 struct xfs_dinode **dipp,
138 struct xfs_buf **bpp,
139 uint buf_flags,
140 uint iget_flags)
141{
142 struct xfs_buf *bp;
143 int error;
144
145 buf_flags |= XBF_UNMAPPED;
146 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
147 (int)imap->im_len, buf_flags, &bp,
148 &xfs_inode_buf_ops);
149 if (error) {
150 if (error == EAGAIN) {
151 ASSERT(buf_flags & XBF_TRYLOCK);
152 return error;
153 }
154
155 if (error == EFSCORRUPTED &&
156 (iget_flags & XFS_IGET_UNTRUSTED))
157 return XFS_ERROR(EINVAL);
158
159 xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.",
160 __func__, error);
161 return error;
162 }
163
164 *bpp = bp;
165 *dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset);
166 return 0;
167}
168
169STATIC void
170xfs_dinode_from_disk(
171 xfs_icdinode_t *to,
172 xfs_dinode_t *from)
173{
174 to->di_magic = be16_to_cpu(from->di_magic);
175 to->di_mode = be16_to_cpu(from->di_mode);
176 to->di_version = from ->di_version;
177 to->di_format = from->di_format;
178 to->di_onlink = be16_to_cpu(from->di_onlink);
179 to->di_uid = be32_to_cpu(from->di_uid);
180 to->di_gid = be32_to_cpu(from->di_gid);
181 to->di_nlink = be32_to_cpu(from->di_nlink);
182 to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
183 to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
184 memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
185 to->di_flushiter = be16_to_cpu(from->di_flushiter);
186 to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec);
187 to->di_atime.t_nsec = be32_to_cpu(from->di_atime.t_nsec);
188 to->di_mtime.t_sec = be32_to_cpu(from->di_mtime.t_sec);
189 to->di_mtime.t_nsec = be32_to_cpu(from->di_mtime.t_nsec);
190 to->di_ctime.t_sec = be32_to_cpu(from->di_ctime.t_sec);
191 to->di_ctime.t_nsec = be32_to_cpu(from->di_ctime.t_nsec);
192 to->di_size = be64_to_cpu(from->di_size);
193 to->di_nblocks = be64_to_cpu(from->di_nblocks);
194 to->di_extsize = be32_to_cpu(from->di_extsize);
195 to->di_nextents = be32_to_cpu(from->di_nextents);
196 to->di_anextents = be16_to_cpu(from->di_anextents);
197 to->di_forkoff = from->di_forkoff;
198 to->di_aformat = from->di_aformat;
199 to->di_dmevmask = be32_to_cpu(from->di_dmevmask);
200 to->di_dmstate = be16_to_cpu(from->di_dmstate);
201 to->di_flags = be16_to_cpu(from->di_flags);
202 to->di_gen = be32_to_cpu(from->di_gen);
203
204 if (to->di_version == 3) {
205 to->di_changecount = be64_to_cpu(from->di_changecount);
206 to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec);
207 to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec);
208 to->di_flags2 = be64_to_cpu(from->di_flags2);
209 to->di_ino = be64_to_cpu(from->di_ino);
210 to->di_lsn = be64_to_cpu(from->di_lsn);
211 memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
212 uuid_copy(&to->di_uuid, &from->di_uuid);
213 }
214}
215
216void
217xfs_dinode_to_disk(
218 xfs_dinode_t *to,
219 xfs_icdinode_t *from)
220{
221 to->di_magic = cpu_to_be16(from->di_magic);
222 to->di_mode = cpu_to_be16(from->di_mode);
223 to->di_version = from ->di_version;
224 to->di_format = from->di_format;
225 to->di_onlink = cpu_to_be16(from->di_onlink);
226 to->di_uid = cpu_to_be32(from->di_uid);
227 to->di_gid = cpu_to_be32(from->di_gid);
228 to->di_nlink = cpu_to_be32(from->di_nlink);
229 to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
230 to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
231 memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
232 to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
233 to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
234 to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
235 to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec);
236 to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec);
237 to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec);
238 to->di_size = cpu_to_be64(from->di_size);
239 to->di_nblocks = cpu_to_be64(from->di_nblocks);
240 to->di_extsize = cpu_to_be32(from->di_extsize);
241 to->di_nextents = cpu_to_be32(from->di_nextents);
242 to->di_anextents = cpu_to_be16(from->di_anextents);
243 to->di_forkoff = from->di_forkoff;
244 to->di_aformat = from->di_aformat;
245 to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
246 to->di_dmstate = cpu_to_be16(from->di_dmstate);
247 to->di_flags = cpu_to_be16(from->di_flags);
248 to->di_gen = cpu_to_be32(from->di_gen);
249
250 if (from->di_version == 3) {
251 to->di_changecount = cpu_to_be64(from->di_changecount);
252 to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
253 to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
254 to->di_flags2 = cpu_to_be64(from->di_flags2);
255 to->di_ino = cpu_to_be64(from->di_ino);
256 to->di_lsn = cpu_to_be64(from->di_lsn);
257 memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
258 uuid_copy(&to->di_uuid, &from->di_uuid);
259 to->di_flushiter = 0;
260 } else {
261 to->di_flushiter = cpu_to_be16(from->di_flushiter);
262 }
263}
264
265static bool
266xfs_dinode_verify(
267 struct xfs_mount *mp,
268 struct xfs_inode *ip,
269 struct xfs_dinode *dip)
270{
271 if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
272 return false;
273
274 /* only version 3 or greater inodes are extensively verified here */
275 if (dip->di_version < 3)
276 return true;
277
278 if (!xfs_sb_version_hascrc(&mp->m_sb))
279 return false;
280 if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
281 offsetof(struct xfs_dinode, di_crc)))
282 return false;
283 if (be64_to_cpu(dip->di_ino) != ip->i_ino)
284 return false;
285 if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_uuid))
286 return false;
287 return true;
288}
289
290void
291xfs_dinode_calc_crc(
292 struct xfs_mount *mp,
293 struct xfs_dinode *dip)
294{
295 __uint32_t crc;
296
297 if (dip->di_version < 3)
298 return;
299
300 ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
301 crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize,
302 offsetof(struct xfs_dinode, di_crc));
303 dip->di_crc = xfs_end_cksum(crc);
304}
305
306/*
307 * Read the disk inode attributes into the in-core inode structure.
308 *
309 * For version 5 superblocks, if we are initialising a new inode and we are not
310 * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new
311 * inode core with a random generation number. If we are keeping inodes around,
312 * we need to read the inode cluster to get the existing generation number off
313 * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode
314 * format) then log recovery is dependent on the di_flushiter field being
315 * initialised from the current on-disk value and hence we must also read the
316 * inode off disk.
317 */
318int
319xfs_iread(
320 xfs_mount_t *mp,
321 xfs_trans_t *tp,
322 xfs_inode_t *ip,
323 uint iget_flags)
324{
325 xfs_buf_t *bp;
326 xfs_dinode_t *dip;
327 int error;
328
329 /*
330 * Fill in the location information in the in-core inode.
331 */
332 error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);
333 if (error)
334 return error;
335
336 /* shortcut IO on inode allocation if possible */
337 if ((iget_flags & XFS_IGET_CREATE) &&
338 xfs_sb_version_hascrc(&mp->m_sb) &&
339 !(mp->m_flags & XFS_MOUNT_IKEEP)) {
340 /* initialise the on-disk inode core */
341 memset(&ip->i_d, 0, sizeof(ip->i_d));
342 ip->i_d.di_magic = XFS_DINODE_MAGIC;
343 ip->i_d.di_gen = prandom_u32();
344 if (xfs_sb_version_hascrc(&mp->m_sb)) {
345 ip->i_d.di_version = 3;
346 ip->i_d.di_ino = ip->i_ino;
347 uuid_copy(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid);
348 } else
349 ip->i_d.di_version = 2;
350 return 0;
351 }
352
353 /*
354 * Get pointers to the on-disk inode and the buffer containing it.
355 */
356 error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags);
357 if (error)
358 return error;
359
360 /* even unallocated inodes are verified */
361 if (!xfs_dinode_verify(mp, ip, dip)) {
362 xfs_alert(mp, "%s: validation failed for inode %lld failed",
363 __func__, ip->i_ino);
364
365 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip);
366 error = XFS_ERROR(EFSCORRUPTED);
367 goto out_brelse;
368 }
369
370 /*
371 * If the on-disk inode is already linked to a directory
372 * entry, copy all of the inode into the in-core inode.
373 * xfs_iformat_fork() handles copying in the inode format
374 * specific information.
375 * Otherwise, just get the truly permanent information.
376 */
377 if (dip->di_mode) {
378 xfs_dinode_from_disk(&ip->i_d, dip);
379 error = xfs_iformat_fork(ip, dip);
380 if (error) {
381#ifdef DEBUG
382 xfs_alert(mp, "%s: xfs_iformat() returned error %d",
383 __func__, error);
384#endif /* DEBUG */
385 goto out_brelse;
386 }
387 } else {
388 /*
389 * Partial initialisation of the in-core inode. Just the bits
390 * that xfs_ialloc won't overwrite or relies on being correct.
391 */
392 ip->i_d.di_magic = be16_to_cpu(dip->di_magic);
393 ip->i_d.di_version = dip->di_version;
394 ip->i_d.di_gen = be32_to_cpu(dip->di_gen);
395 ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
396
397 if (dip->di_version == 3) {
398 ip->i_d.di_ino = be64_to_cpu(dip->di_ino);
399 uuid_copy(&ip->i_d.di_uuid, &dip->di_uuid);
400 }
401
402 /*
403 * Make sure to pull in the mode here as well in
404 * case the inode is released without being used.
405 * This ensures that xfs_inactive() will see that
406 * the inode is already free and not try to mess
407 * with the uninitialized part of it.
408 */
409 ip->i_d.di_mode = 0;
410 }
411
412 /*
413 * The inode format changed when we moved the link count and
414 * made it 32 bits long. If this is an old format inode,
415 * convert it in memory to look like a new one. If it gets
416 * flushed to disk we will convert back before flushing or
417 * logging it. We zero out the new projid field and the old link
418 * count field. We'll handle clearing the pad field (the remains
419 * of the old uuid field) when we actually convert the inode to
420 * the new format. We don't change the version number so that we
421 * can distinguish this from a real new format inode.
422 */
423 if (ip->i_d.di_version == 1) {
424 ip->i_d.di_nlink = ip->i_d.di_onlink;
425 ip->i_d.di_onlink = 0;
426 xfs_set_projid(ip, 0);
427 }
428
429 ip->i_delayed_blks = 0;
430
431 /*
432 * Mark the buffer containing the inode as something to keep
433 * around for a while. This helps to keep recently accessed
434 * meta-data in-core longer.
435 */
436 xfs_buf_set_ref(bp, XFS_INO_REF);
437
438 /*
439 * Use xfs_trans_brelse() to release the buffer containing the on-disk
440 * inode, because it was acquired with xfs_trans_read_buf() in
441 * xfs_imap_to_bp() above. If tp is NULL, this is just a normal
442 * brelse(). If we're within a transaction, then xfs_trans_brelse()
443 * will only release the buffer if it is not dirty within the
444 * transaction. It will be OK to release the buffer in this case,
445 * because inodes on disk are never destroyed and we will be locking the
446 * new in-core inode before putting it in the cache where other
447 * processes can find it. Thus we don't have to worry about the inode
448 * being changed just because we released the buffer.
449 */
450 out_brelse:
451 xfs_trans_brelse(tp, bp);
452 return error;
453}
diff --git a/fs/xfs/xfs_inode_buf.h b/fs/xfs/xfs_inode_buf.h
new file mode 100644
index 000000000000..b5f1e22bf44e
--- /dev/null
+++ b/fs/xfs/xfs_inode_buf.h
@@ -0,0 +1,53 @@
1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_INODE_BUF_H__
19#define __XFS_INODE_BUF_H__
20
21struct xfs_inode;
22struct xfs_dinode;
23struct xfs_icdinode;
24
25/*
26 * Inode location information. Stored in the inode and passed to
27 * xfs_imap_to_bp() to get a buffer and dinode for a given inode.
28 */
29struct xfs_imap {
30 xfs_daddr_t im_blkno; /* starting BB of inode chunk */
31 ushort im_len; /* length in BBs of inode chunk */
32 ushort im_boffset; /* inode offset in block in bytes */
33};
34
35int xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *,
36 struct xfs_imap *, struct xfs_dinode **,
37 struct xfs_buf **, uint, uint);
38int xfs_iread(struct xfs_mount *, struct xfs_trans *,
39 struct xfs_inode *, uint);
40void xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *);
41void xfs_dinode_to_disk(struct xfs_dinode *,
42 struct xfs_icdinode *);
43bool xfs_can_free_eofblocks(struct xfs_inode *, bool);
44
45#if defined(DEBUG)
46void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
47#else
48#define xfs_inobp_check(mp, bp)
49#endif /* DEBUG */
50
51extern const struct xfs_buf_ops xfs_inode_buf_ops;
52
53#endif /* __XFS_INODE_BUF_H__ */