diff options
author | Steve French <sfrench@us.ibm.com> | 2008-04-24 11:26:50 -0400 |
---|---|---|
committer | Steve French <sfrench@us.ibm.com> | 2008-04-24 11:26:50 -0400 |
commit | 36d99df2fb474222ab47fbe8ae7385661033223b (patch) | |
tree | 962e068491b752a944f61c454fad3f8619a1ea3f /fs | |
parent | 076d8423a98659a92837b07aa494cb74bfefe77c (diff) | |
parent | 3dc5063786b273f1aee545844f6bd4e9651ebffe (diff) |
Merge branch 'master' of /pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'fs')
238 files changed, 12005 insertions, 6571 deletions
diff --git a/fs/9p/fid.c b/fs/9p/fid.c index dfebdbe7440e..3031e3233dd6 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c | |||
@@ -26,7 +26,6 @@ | |||
26 | #include <linux/fs.h> | 26 | #include <linux/fs.h> |
27 | #include <linux/sched.h> | 27 | #include <linux/sched.h> |
28 | #include <linux/idr.h> | 28 | #include <linux/idr.h> |
29 | #include <asm/semaphore.h> | ||
30 | #include <net/9p/9p.h> | 29 | #include <net/9p/9p.h> |
31 | #include <net/9p/client.h> | 30 | #include <net/9p/client.h> |
32 | 31 | ||
diff --git a/fs/Kconfig b/fs/Kconfig index c509123bea49..8b18a8758677 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -444,6 +444,32 @@ config OCFS2_FS | |||
444 | For more information on OCFS2, see the file | 444 | For more information on OCFS2, see the file |
445 | <file:Documentation/filesystems/ocfs2.txt>. | 445 | <file:Documentation/filesystems/ocfs2.txt>. |
446 | 446 | ||
447 | config OCFS2_FS_O2CB | ||
448 | tristate "O2CB Kernelspace Clustering" | ||
449 | depends on OCFS2_FS | ||
450 | default y | ||
451 | help | ||
452 | OCFS2 includes a simple kernelspace clustering package, the OCFS2 | ||
453 | Cluster Base. It only requires a very small userspace component | ||
454 | to configure it. This comes with the standard ocfs2-tools package. | ||
455 | O2CB is limited to maintaining a cluster for OCFS2 file systems. | ||
456 | It cannot manage any other cluster applications. | ||
457 | |||
458 | It is always safe to say Y here, as the clustering method is | ||
459 | run-time selectable. | ||
460 | |||
461 | config OCFS2_FS_USERSPACE_CLUSTER | ||
462 | tristate "OCFS2 Userspace Clustering" | ||
463 | depends on OCFS2_FS && DLM | ||
464 | default y | ||
465 | help | ||
466 | This option will allow OCFS2 to use userspace clustering services | ||
467 | in conjunction with the DLM in fs/dlm. If you are using a | ||
468 | userspace cluster manager, say Y here. | ||
469 | |||
470 | It is safe to say Y, as the clustering method is run-time | ||
471 | selectable. | ||
472 | |||
447 | config OCFS2_DEBUG_MASKLOG | 473 | config OCFS2_DEBUG_MASKLOG |
448 | bool "OCFS2 logging support" | 474 | bool "OCFS2 logging support" |
449 | depends on OCFS2_FS | 475 | depends on OCFS2_FS |
@@ -663,6 +689,7 @@ config ZISOFS | |||
663 | 689 | ||
664 | config UDF_FS | 690 | config UDF_FS |
665 | tristate "UDF file system support" | 691 | tristate "UDF file system support" |
692 | select CRC_ITU_T | ||
666 | help | 693 | help |
667 | This is the new file system used on some CD-ROMs and DVDs. Say Y if | 694 | This is the new file system used on some CD-ROMs and DVDs. Say Y if |
668 | you intend to mount DVD discs or CDRW's written in packet mode, or | 695 | you intend to mount DVD discs or CDRW's written in packet mode, or |
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index b5c3b6114add..853845abcca6 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt | |||
@@ -62,7 +62,7 @@ config BINFMT_SHARED_FLAT | |||
62 | config BINFMT_AOUT | 62 | config BINFMT_AOUT |
63 | tristate "Kernel support for a.out and ECOFF binaries" | 63 | tristate "Kernel support for a.out and ECOFF binaries" |
64 | depends on ARCH_SUPPORTS_AOUT && \ | 64 | depends on ARCH_SUPPORTS_AOUT && \ |
65 | (X86_32 || ALPHA || ARM || M68K || SPARC32) | 65 | (X86_32 || ALPHA || ARM || M68K) |
66 | ---help--- | 66 | ---help--- |
67 | A.out (Assembler.OUTput) is a set of formats for libraries and | 67 | A.out (Assembler.OUTput) is a set of formats for libraries and |
68 | executables used in the earliest versions of UNIX. Linux used | 68 | executables used in the earliest versions of UNIX. Linux used |
@@ -444,22 +444,27 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len, | |||
444 | 444 | ||
445 | struct bio_map_data { | 445 | struct bio_map_data { |
446 | struct bio_vec *iovecs; | 446 | struct bio_vec *iovecs; |
447 | void __user *userptr; | 447 | int nr_sgvecs; |
448 | struct sg_iovec *sgvecs; | ||
448 | }; | 449 | }; |
449 | 450 | ||
450 | static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio) | 451 | static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio, |
452 | struct sg_iovec *iov, int iov_count) | ||
451 | { | 453 | { |
452 | memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt); | 454 | memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt); |
455 | memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count); | ||
456 | bmd->nr_sgvecs = iov_count; | ||
453 | bio->bi_private = bmd; | 457 | bio->bi_private = bmd; |
454 | } | 458 | } |
455 | 459 | ||
456 | static void bio_free_map_data(struct bio_map_data *bmd) | 460 | static void bio_free_map_data(struct bio_map_data *bmd) |
457 | { | 461 | { |
458 | kfree(bmd->iovecs); | 462 | kfree(bmd->iovecs); |
463 | kfree(bmd->sgvecs); | ||
459 | kfree(bmd); | 464 | kfree(bmd); |
460 | } | 465 | } |
461 | 466 | ||
462 | static struct bio_map_data *bio_alloc_map_data(int nr_segs) | 467 | static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count) |
463 | { | 468 | { |
464 | struct bio_map_data *bmd = kmalloc(sizeof(*bmd), GFP_KERNEL); | 469 | struct bio_map_data *bmd = kmalloc(sizeof(*bmd), GFP_KERNEL); |
465 | 470 | ||
@@ -467,13 +472,71 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs) | |||
467 | return NULL; | 472 | return NULL; |
468 | 473 | ||
469 | bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, GFP_KERNEL); | 474 | bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, GFP_KERNEL); |
470 | if (bmd->iovecs) | 475 | if (!bmd->iovecs) { |
476 | kfree(bmd); | ||
477 | return NULL; | ||
478 | } | ||
479 | |||
480 | bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, GFP_KERNEL); | ||
481 | if (bmd->sgvecs) | ||
471 | return bmd; | 482 | return bmd; |
472 | 483 | ||
484 | kfree(bmd->iovecs); | ||
473 | kfree(bmd); | 485 | kfree(bmd); |
474 | return NULL; | 486 | return NULL; |
475 | } | 487 | } |
476 | 488 | ||
489 | static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count, | ||
490 | int uncopy) | ||
491 | { | ||
492 | int ret = 0, i; | ||
493 | struct bio_vec *bvec; | ||
494 | int iov_idx = 0; | ||
495 | unsigned int iov_off = 0; | ||
496 | int read = bio_data_dir(bio) == READ; | ||
497 | |||
498 | __bio_for_each_segment(bvec, bio, i, 0) { | ||
499 | char *bv_addr = page_address(bvec->bv_page); | ||
500 | unsigned int bv_len = bvec->bv_len; | ||
501 | |||
502 | while (bv_len && iov_idx < iov_count) { | ||
503 | unsigned int bytes; | ||
504 | char *iov_addr; | ||
505 | |||
506 | bytes = min_t(unsigned int, | ||
507 | iov[iov_idx].iov_len - iov_off, bv_len); | ||
508 | iov_addr = iov[iov_idx].iov_base + iov_off; | ||
509 | |||
510 | if (!ret) { | ||
511 | if (!read && !uncopy) | ||
512 | ret = copy_from_user(bv_addr, iov_addr, | ||
513 | bytes); | ||
514 | if (read && uncopy) | ||
515 | ret = copy_to_user(iov_addr, bv_addr, | ||
516 | bytes); | ||
517 | |||
518 | if (ret) | ||
519 | ret = -EFAULT; | ||
520 | } | ||
521 | |||
522 | bv_len -= bytes; | ||
523 | bv_addr += bytes; | ||
524 | iov_addr += bytes; | ||
525 | iov_off += bytes; | ||
526 | |||
527 | if (iov[iov_idx].iov_len == iov_off) { | ||
528 | iov_idx++; | ||
529 | iov_off = 0; | ||
530 | } | ||
531 | } | ||
532 | |||
533 | if (uncopy) | ||
534 | __free_page(bvec->bv_page); | ||
535 | } | ||
536 | |||
537 | return ret; | ||
538 | } | ||
539 | |||
477 | /** | 540 | /** |
478 | * bio_uncopy_user - finish previously mapped bio | 541 | * bio_uncopy_user - finish previously mapped bio |
479 | * @bio: bio being terminated | 542 | * @bio: bio being terminated |
@@ -484,55 +547,56 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs) | |||
484 | int bio_uncopy_user(struct bio *bio) | 547 | int bio_uncopy_user(struct bio *bio) |
485 | { | 548 | { |
486 | struct bio_map_data *bmd = bio->bi_private; | 549 | struct bio_map_data *bmd = bio->bi_private; |
487 | const int read = bio_data_dir(bio) == READ; | 550 | int ret; |
488 | struct bio_vec *bvec; | ||
489 | int i, ret = 0; | ||
490 | 551 | ||
491 | __bio_for_each_segment(bvec, bio, i, 0) { | 552 | ret = __bio_copy_iov(bio, bmd->sgvecs, bmd->nr_sgvecs, 1); |
492 | char *addr = page_address(bvec->bv_page); | ||
493 | unsigned int len = bmd->iovecs[i].bv_len; | ||
494 | 553 | ||
495 | if (read && !ret && copy_to_user(bmd->userptr, addr, len)) | ||
496 | ret = -EFAULT; | ||
497 | |||
498 | __free_page(bvec->bv_page); | ||
499 | bmd->userptr += len; | ||
500 | } | ||
501 | bio_free_map_data(bmd); | 554 | bio_free_map_data(bmd); |
502 | bio_put(bio); | 555 | bio_put(bio); |
503 | return ret; | 556 | return ret; |
504 | } | 557 | } |
505 | 558 | ||
506 | /** | 559 | /** |
507 | * bio_copy_user - copy user data to bio | 560 | * bio_copy_user_iov - copy user data to bio |
508 | * @q: destination block queue | 561 | * @q: destination block queue |
509 | * @uaddr: start of user address | 562 | * @iov: the iovec. |
510 | * @len: length in bytes | 563 | * @iov_count: number of elements in the iovec |
511 | * @write_to_vm: bool indicating writing to pages or not | 564 | * @write_to_vm: bool indicating writing to pages or not |
512 | * | 565 | * |
513 | * Prepares and returns a bio for indirect user io, bouncing data | 566 | * Prepares and returns a bio for indirect user io, bouncing data |
514 | * to/from kernel pages as necessary. Must be paired with | 567 | * to/from kernel pages as necessary. Must be paired with |
515 | * call bio_uncopy_user() on io completion. | 568 | * call bio_uncopy_user() on io completion. |
516 | */ | 569 | */ |
517 | struct bio *bio_copy_user(struct request_queue *q, unsigned long uaddr, | 570 | struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, |
518 | unsigned int len, int write_to_vm) | 571 | int iov_count, int write_to_vm) |
519 | { | 572 | { |
520 | unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
521 | unsigned long start = uaddr >> PAGE_SHIFT; | ||
522 | struct bio_map_data *bmd; | 573 | struct bio_map_data *bmd; |
523 | struct bio_vec *bvec; | 574 | struct bio_vec *bvec; |
524 | struct page *page; | 575 | struct page *page; |
525 | struct bio *bio; | 576 | struct bio *bio; |
526 | int i, ret; | 577 | int i, ret; |
578 | int nr_pages = 0; | ||
579 | unsigned int len = 0; | ||
527 | 580 | ||
528 | bmd = bio_alloc_map_data(end - start); | 581 | for (i = 0; i < iov_count; i++) { |
582 | unsigned long uaddr; | ||
583 | unsigned long end; | ||
584 | unsigned long start; | ||
585 | |||
586 | uaddr = (unsigned long)iov[i].iov_base; | ||
587 | end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
588 | start = uaddr >> PAGE_SHIFT; | ||
589 | |||
590 | nr_pages += end - start; | ||
591 | len += iov[i].iov_len; | ||
592 | } | ||
593 | |||
594 | bmd = bio_alloc_map_data(nr_pages, iov_count); | ||
529 | if (!bmd) | 595 | if (!bmd) |
530 | return ERR_PTR(-ENOMEM); | 596 | return ERR_PTR(-ENOMEM); |
531 | 597 | ||
532 | bmd->userptr = (void __user *) uaddr; | ||
533 | |||
534 | ret = -ENOMEM; | 598 | ret = -ENOMEM; |
535 | bio = bio_alloc(GFP_KERNEL, end - start); | 599 | bio = bio_alloc(GFP_KERNEL, nr_pages); |
536 | if (!bio) | 600 | if (!bio) |
537 | goto out_bmd; | 601 | goto out_bmd; |
538 | 602 | ||
@@ -564,22 +628,12 @@ struct bio *bio_copy_user(struct request_queue *q, unsigned long uaddr, | |||
564 | * success | 628 | * success |
565 | */ | 629 | */ |
566 | if (!write_to_vm) { | 630 | if (!write_to_vm) { |
567 | char __user *p = (char __user *) uaddr; | 631 | ret = __bio_copy_iov(bio, iov, iov_count, 0); |
568 | 632 | if (ret) | |
569 | /* | 633 | goto cleanup; |
570 | * for a write, copy in data to kernel pages | ||
571 | */ | ||
572 | ret = -EFAULT; | ||
573 | bio_for_each_segment(bvec, bio, i) { | ||
574 | char *addr = page_address(bvec->bv_page); | ||
575 | |||
576 | if (copy_from_user(addr, p, bvec->bv_len)) | ||
577 | goto cleanup; | ||
578 | p += bvec->bv_len; | ||
579 | } | ||
580 | } | 634 | } |
581 | 635 | ||
582 | bio_set_map_data(bmd, bio); | 636 | bio_set_map_data(bmd, bio, iov, iov_count); |
583 | return bio; | 637 | return bio; |
584 | cleanup: | 638 | cleanup: |
585 | bio_for_each_segment(bvec, bio, i) | 639 | bio_for_each_segment(bvec, bio, i) |
@@ -591,6 +645,28 @@ out_bmd: | |||
591 | return ERR_PTR(ret); | 645 | return ERR_PTR(ret); |
592 | } | 646 | } |
593 | 647 | ||
648 | /** | ||
649 | * bio_copy_user - copy user data to bio | ||
650 | * @q: destination block queue | ||
651 | * @uaddr: start of user address | ||
652 | * @len: length in bytes | ||
653 | * @write_to_vm: bool indicating writing to pages or not | ||
654 | * | ||
655 | * Prepares and returns a bio for indirect user io, bouncing data | ||
656 | * to/from kernel pages as necessary. Must be paired with | ||
657 | * call bio_uncopy_user() on io completion. | ||
658 | */ | ||
659 | struct bio *bio_copy_user(struct request_queue *q, unsigned long uaddr, | ||
660 | unsigned int len, int write_to_vm) | ||
661 | { | ||
662 | struct sg_iovec iov; | ||
663 | |||
664 | iov.iov_base = (void __user *)uaddr; | ||
665 | iov.iov_len = len; | ||
666 | |||
667 | return bio_copy_user_iov(q, &iov, 1, write_to_vm); | ||
668 | } | ||
669 | |||
594 | static struct bio *__bio_map_user_iov(struct request_queue *q, | 670 | static struct bio *__bio_map_user_iov(struct request_queue *q, |
595 | struct block_device *bdev, | 671 | struct block_device *bdev, |
596 | struct sg_iovec *iov, int iov_count, | 672 | struct sg_iovec *iov, int iov_count, |
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 350680fd7da7..0c3b618c15b3 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c | |||
@@ -23,7 +23,6 @@ | |||
23 | #include <linux/buffer_head.h> | 23 | #include <linux/buffer_head.h> |
24 | #include <linux/vfs.h> | 24 | #include <linux/vfs.h> |
25 | #include <linux/mutex.h> | 25 | #include <linux/mutex.h> |
26 | #include <asm/semaphore.h> | ||
27 | 26 | ||
28 | #include <asm/uaccess.h> | 27 | #include <asm/uaccess.h> |
29 | 28 | ||
diff --git a/fs/dcache.c b/fs/dcache.c index 43455776711e..3ee588d5f585 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -1746,12 +1746,21 @@ shouldnt_be_hashed: | |||
1746 | goto shouldnt_be_hashed; | 1746 | goto shouldnt_be_hashed; |
1747 | } | 1747 | } |
1748 | 1748 | ||
1749 | static int prepend(char **buffer, int *buflen, const char *str, | ||
1750 | int namelen) | ||
1751 | { | ||
1752 | *buflen -= namelen; | ||
1753 | if (*buflen < 0) | ||
1754 | return -ENAMETOOLONG; | ||
1755 | *buffer -= namelen; | ||
1756 | memcpy(*buffer, str, namelen); | ||
1757 | return 0; | ||
1758 | } | ||
1759 | |||
1749 | /** | 1760 | /** |
1750 | * d_path - return the path of a dentry | 1761 | * d_path - return the path of a dentry |
1751 | * @dentry: dentry to report | 1762 | * @path: the dentry/vfsmount to report |
1752 | * @vfsmnt: vfsmnt to which the dentry belongs | 1763 | * @root: root vfsmnt/dentry (may be modified by this function) |
1753 | * @root: root dentry | ||
1754 | * @rootmnt: vfsmnt to which the root dentry belongs | ||
1755 | * @buffer: buffer to return value in | 1764 | * @buffer: buffer to return value in |
1756 | * @buflen: buffer length | 1765 | * @buflen: buffer length |
1757 | * | 1766 | * |
@@ -1761,23 +1770,22 @@ shouldnt_be_hashed: | |||
1761 | * Returns the buffer or an error code if the path was too long. | 1770 | * Returns the buffer or an error code if the path was too long. |
1762 | * | 1771 | * |
1763 | * "buflen" should be positive. Caller holds the dcache_lock. | 1772 | * "buflen" should be positive. Caller holds the dcache_lock. |
1773 | * | ||
1774 | * If path is not reachable from the supplied root, then the value of | ||
1775 | * root is changed (without modifying refcounts). | ||
1764 | */ | 1776 | */ |
1765 | static char *__d_path(struct dentry *dentry, struct vfsmount *vfsmnt, | 1777 | char *__d_path(const struct path *path, struct path *root, |
1766 | struct path *root, char *buffer, int buflen) | 1778 | char *buffer, int buflen) |
1767 | { | 1779 | { |
1780 | struct dentry *dentry = path->dentry; | ||
1781 | struct vfsmount *vfsmnt = path->mnt; | ||
1768 | char * end = buffer+buflen; | 1782 | char * end = buffer+buflen; |
1769 | char * retval; | 1783 | char * retval; |
1770 | int namelen; | 1784 | |
1771 | 1785 | prepend(&end, &buflen, "\0", 1); | |
1772 | *--end = '\0'; | 1786 | if (!IS_ROOT(dentry) && d_unhashed(dentry) && |
1773 | buflen--; | 1787 | (prepend(&end, &buflen, " (deleted)", 10) != 0)) |
1774 | if (!IS_ROOT(dentry) && d_unhashed(dentry)) { | ||
1775 | buflen -= 10; | ||
1776 | end -= 10; | ||
1777 | if (buflen < 0) | ||
1778 | goto Elong; | 1788 | goto Elong; |
1779 | memcpy(end, " (deleted)", 10); | ||
1780 | } | ||
1781 | 1789 | ||
1782 | if (buflen < 1) | 1790 | if (buflen < 1) |
1783 | goto Elong; | 1791 | goto Elong; |
@@ -1804,13 +1812,10 @@ static char *__d_path(struct dentry *dentry, struct vfsmount *vfsmnt, | |||
1804 | } | 1812 | } |
1805 | parent = dentry->d_parent; | 1813 | parent = dentry->d_parent; |
1806 | prefetch(parent); | 1814 | prefetch(parent); |
1807 | namelen = dentry->d_name.len; | 1815 | if ((prepend(&end, &buflen, dentry->d_name.name, |
1808 | buflen -= namelen + 1; | 1816 | dentry->d_name.len) != 0) || |
1809 | if (buflen < 0) | 1817 | (prepend(&end, &buflen, "/", 1) != 0)) |
1810 | goto Elong; | 1818 | goto Elong; |
1811 | end -= namelen; | ||
1812 | memcpy(end, dentry->d_name.name, namelen); | ||
1813 | *--end = '/'; | ||
1814 | retval = end; | 1819 | retval = end; |
1815 | dentry = parent; | 1820 | dentry = parent; |
1816 | } | 1821 | } |
@@ -1818,12 +1823,12 @@ static char *__d_path(struct dentry *dentry, struct vfsmount *vfsmnt, | |||
1818 | return retval; | 1823 | return retval; |
1819 | 1824 | ||
1820 | global_root: | 1825 | global_root: |
1821 | namelen = dentry->d_name.len; | 1826 | retval += 1; /* hit the slash */ |
1822 | buflen -= namelen; | 1827 | if (prepend(&retval, &buflen, dentry->d_name.name, |
1823 | if (buflen < 0) | 1828 | dentry->d_name.len) != 0) |
1824 | goto Elong; | 1829 | goto Elong; |
1825 | retval -= namelen-1; /* hit the slash */ | 1830 | root->mnt = vfsmnt; |
1826 | memcpy(retval, dentry->d_name.name, namelen); | 1831 | root->dentry = dentry; |
1827 | return retval; | 1832 | return retval; |
1828 | Elong: | 1833 | Elong: |
1829 | return ERR_PTR(-ENAMETOOLONG); | 1834 | return ERR_PTR(-ENAMETOOLONG); |
@@ -1846,6 +1851,7 @@ char *d_path(struct path *path, char *buf, int buflen) | |||
1846 | { | 1851 | { |
1847 | char *res; | 1852 | char *res; |
1848 | struct path root; | 1853 | struct path root; |
1854 | struct path tmp; | ||
1849 | 1855 | ||
1850 | /* | 1856 | /* |
1851 | * We have various synthetic filesystems that never get mounted. On | 1857 | * We have various synthetic filesystems that never get mounted. On |
@@ -1859,10 +1865,11 @@ char *d_path(struct path *path, char *buf, int buflen) | |||
1859 | 1865 | ||
1860 | read_lock(¤t->fs->lock); | 1866 | read_lock(¤t->fs->lock); |
1861 | root = current->fs->root; | 1867 | root = current->fs->root; |
1862 | path_get(¤t->fs->root); | 1868 | path_get(&root); |
1863 | read_unlock(¤t->fs->lock); | 1869 | read_unlock(¤t->fs->lock); |
1864 | spin_lock(&dcache_lock); | 1870 | spin_lock(&dcache_lock); |
1865 | res = __d_path(path->dentry, path->mnt, &root, buf, buflen); | 1871 | tmp = root; |
1872 | res = __d_path(path, &tmp, buf, buflen); | ||
1866 | spin_unlock(&dcache_lock); | 1873 | spin_unlock(&dcache_lock); |
1867 | path_put(&root); | 1874 | path_put(&root); |
1868 | return res; | 1875 | return res; |
@@ -1890,6 +1897,48 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen, | |||
1890 | } | 1897 | } |
1891 | 1898 | ||
1892 | /* | 1899 | /* |
1900 | * Write full pathname from the root of the filesystem into the buffer. | ||
1901 | */ | ||
1902 | char *dentry_path(struct dentry *dentry, char *buf, int buflen) | ||
1903 | { | ||
1904 | char *end = buf + buflen; | ||
1905 | char *retval; | ||
1906 | |||
1907 | spin_lock(&dcache_lock); | ||
1908 | prepend(&end, &buflen, "\0", 1); | ||
1909 | if (!IS_ROOT(dentry) && d_unhashed(dentry) && | ||
1910 | (prepend(&end, &buflen, "//deleted", 9) != 0)) | ||
1911 | goto Elong; | ||
1912 | if (buflen < 1) | ||
1913 | goto Elong; | ||
1914 | /* Get '/' right */ | ||
1915 | retval = end-1; | ||
1916 | *retval = '/'; | ||
1917 | |||
1918 | for (;;) { | ||
1919 | struct dentry *parent; | ||
1920 | if (IS_ROOT(dentry)) | ||
1921 | break; | ||
1922 | |||
1923 | parent = dentry->d_parent; | ||
1924 | prefetch(parent); | ||
1925 | |||
1926 | if ((prepend(&end, &buflen, dentry->d_name.name, | ||
1927 | dentry->d_name.len) != 0) || | ||
1928 | (prepend(&end, &buflen, "/", 1) != 0)) | ||
1929 | goto Elong; | ||
1930 | |||
1931 | retval = end; | ||
1932 | dentry = parent; | ||
1933 | } | ||
1934 | spin_unlock(&dcache_lock); | ||
1935 | return retval; | ||
1936 | Elong: | ||
1937 | spin_unlock(&dcache_lock); | ||
1938 | return ERR_PTR(-ENAMETOOLONG); | ||
1939 | } | ||
1940 | |||
1941 | /* | ||
1893 | * NOTE! The user-level library version returns a | 1942 | * NOTE! The user-level library version returns a |
1894 | * character pointer. The kernel system call just | 1943 | * character pointer. The kernel system call just |
1895 | * returns the length of the buffer filled (which | 1944 | * returns the length of the buffer filled (which |
@@ -1918,9 +1967,9 @@ asmlinkage long sys_getcwd(char __user *buf, unsigned long size) | |||
1918 | 1967 | ||
1919 | read_lock(¤t->fs->lock); | 1968 | read_lock(¤t->fs->lock); |
1920 | pwd = current->fs->pwd; | 1969 | pwd = current->fs->pwd; |
1921 | path_get(¤t->fs->pwd); | 1970 | path_get(&pwd); |
1922 | root = current->fs->root; | 1971 | root = current->fs->root; |
1923 | path_get(¤t->fs->root); | 1972 | path_get(&root); |
1924 | read_unlock(¤t->fs->lock); | 1973 | read_unlock(¤t->fs->lock); |
1925 | 1974 | ||
1926 | error = -ENOENT; | 1975 | error = -ENOENT; |
@@ -1928,9 +1977,10 @@ asmlinkage long sys_getcwd(char __user *buf, unsigned long size) | |||
1928 | spin_lock(&dcache_lock); | 1977 | spin_lock(&dcache_lock); |
1929 | if (pwd.dentry->d_parent == pwd.dentry || !d_unhashed(pwd.dentry)) { | 1978 | if (pwd.dentry->d_parent == pwd.dentry || !d_unhashed(pwd.dentry)) { |
1930 | unsigned long len; | 1979 | unsigned long len; |
1980 | struct path tmp = root; | ||
1931 | char * cwd; | 1981 | char * cwd; |
1932 | 1982 | ||
1933 | cwd = __d_path(pwd.dentry, pwd.mnt, &root, page, PAGE_SIZE); | 1983 | cwd = __d_path(&pwd, &tmp, page, PAGE_SIZE); |
1934 | spin_unlock(&dcache_lock); | 1984 | spin_unlock(&dcache_lock); |
1935 | 1985 | ||
1936 | error = PTR_ERR(cwd); | 1986 | error = PTR_ERR(cwd); |
diff --git a/fs/dlm/Makefile b/fs/dlm/Makefile index d248e60951ba..ca1c9124c8ce 100644 --- a/fs/dlm/Makefile +++ b/fs/dlm/Makefile | |||
@@ -10,6 +10,7 @@ dlm-y := ast.o \ | |||
10 | midcomms.o \ | 10 | midcomms.o \ |
11 | netlink.o \ | 11 | netlink.o \ |
12 | lowcomms.o \ | 12 | lowcomms.o \ |
13 | plock.o \ | ||
13 | rcom.o \ | 14 | rcom.o \ |
14 | recover.o \ | 15 | recover.o \ |
15 | recoverd.o \ | 16 | recoverd.o \ |
diff --git a/fs/dlm/config.c b/fs/dlm/config.c index c3ad1dff3b25..eac23bd288b2 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c | |||
@@ -114,7 +114,7 @@ struct cluster_attribute { | |||
114 | }; | 114 | }; |
115 | 115 | ||
116 | static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field, | 116 | static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field, |
117 | unsigned int *info_field, int check_zero, | 117 | int *info_field, int check_zero, |
118 | const char *buf, size_t len) | 118 | const char *buf, size_t len) |
119 | { | 119 | { |
120 | unsigned int x; | 120 | unsigned int x; |
@@ -284,6 +284,7 @@ struct node { | |||
284 | struct list_head list; /* space->members */ | 284 | struct list_head list; /* space->members */ |
285 | int nodeid; | 285 | int nodeid; |
286 | int weight; | 286 | int weight; |
287 | int new; | ||
287 | }; | 288 | }; |
288 | 289 | ||
289 | static struct configfs_group_operations clusters_ops = { | 290 | static struct configfs_group_operations clusters_ops = { |
@@ -565,6 +566,7 @@ static struct config_item *make_node(struct config_group *g, const char *name) | |||
565 | config_item_init_type_name(&nd->item, name, &node_type); | 566 | config_item_init_type_name(&nd->item, name, &node_type); |
566 | nd->nodeid = -1; | 567 | nd->nodeid = -1; |
567 | nd->weight = 1; /* default weight of 1 if none is set */ | 568 | nd->weight = 1; /* default weight of 1 if none is set */ |
569 | nd->new = 1; /* set to 0 once it's been read by dlm_nodeid_list() */ | ||
568 | 570 | ||
569 | mutex_lock(&sp->members_lock); | 571 | mutex_lock(&sp->members_lock); |
570 | list_add(&nd->list, &sp->members); | 572 | list_add(&nd->list, &sp->members); |
@@ -805,12 +807,13 @@ static void put_comm(struct comm *cm) | |||
805 | } | 807 | } |
806 | 808 | ||
807 | /* caller must free mem */ | 809 | /* caller must free mem */ |
808 | int dlm_nodeid_list(char *lsname, int **ids_out) | 810 | int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out, |
811 | int **new_out, int *new_count_out) | ||
809 | { | 812 | { |
810 | struct space *sp; | 813 | struct space *sp; |
811 | struct node *nd; | 814 | struct node *nd; |
812 | int i = 0, rv = 0; | 815 | int i = 0, rv = 0, ids_count = 0, new_count = 0; |
813 | int *ids; | 816 | int *ids, *new; |
814 | 817 | ||
815 | sp = get_space(lsname); | 818 | sp = get_space(lsname); |
816 | if (!sp) | 819 | if (!sp) |
@@ -818,23 +821,50 @@ int dlm_nodeid_list(char *lsname, int **ids_out) | |||
818 | 821 | ||
819 | mutex_lock(&sp->members_lock); | 822 | mutex_lock(&sp->members_lock); |
820 | if (!sp->members_count) { | 823 | if (!sp->members_count) { |
821 | rv = 0; | 824 | rv = -EINVAL; |
825 | printk(KERN_ERR "dlm: zero members_count\n"); | ||
822 | goto out; | 826 | goto out; |
823 | } | 827 | } |
824 | 828 | ||
825 | ids = kcalloc(sp->members_count, sizeof(int), GFP_KERNEL); | 829 | ids_count = sp->members_count; |
830 | |||
831 | ids = kcalloc(ids_count, sizeof(int), GFP_KERNEL); | ||
826 | if (!ids) { | 832 | if (!ids) { |
827 | rv = -ENOMEM; | 833 | rv = -ENOMEM; |
828 | goto out; | 834 | goto out; |
829 | } | 835 | } |
830 | 836 | ||
831 | rv = sp->members_count; | 837 | list_for_each_entry(nd, &sp->members, list) { |
832 | list_for_each_entry(nd, &sp->members, list) | ||
833 | ids[i++] = nd->nodeid; | 838 | ids[i++] = nd->nodeid; |
839 | if (nd->new) | ||
840 | new_count++; | ||
841 | } | ||
842 | |||
843 | if (ids_count != i) | ||
844 | printk(KERN_ERR "dlm: bad nodeid count %d %d\n", ids_count, i); | ||
845 | |||
846 | if (!new_count) | ||
847 | goto out_ids; | ||
848 | |||
849 | new = kcalloc(new_count, sizeof(int), GFP_KERNEL); | ||
850 | if (!new) { | ||
851 | kfree(ids); | ||
852 | rv = -ENOMEM; | ||
853 | goto out; | ||
854 | } | ||
834 | 855 | ||
835 | if (rv != i) | 856 | i = 0; |
836 | printk("bad nodeid count %d %d\n", rv, i); | 857 | list_for_each_entry(nd, &sp->members, list) { |
858 | if (nd->new) { | ||
859 | new[i++] = nd->nodeid; | ||
860 | nd->new = 0; | ||
861 | } | ||
862 | } | ||
863 | *new_count_out = new_count; | ||
864 | *new_out = new; | ||
837 | 865 | ||
866 | out_ids: | ||
867 | *ids_count_out = ids_count; | ||
838 | *ids_out = ids; | 868 | *ids_out = ids; |
839 | out: | 869 | out: |
840 | mutex_unlock(&sp->members_lock); | 870 | mutex_unlock(&sp->members_lock); |
diff --git a/fs/dlm/config.h b/fs/dlm/config.h index a3170fe22090..4f1d6fce58c5 100644 --- a/fs/dlm/config.h +++ b/fs/dlm/config.h | |||
@@ -35,7 +35,8 @@ extern struct dlm_config_info dlm_config; | |||
35 | int dlm_config_init(void); | 35 | int dlm_config_init(void); |
36 | void dlm_config_exit(void); | 36 | void dlm_config_exit(void); |
37 | int dlm_node_weight(char *lsname, int nodeid); | 37 | int dlm_node_weight(char *lsname, int nodeid); |
38 | int dlm_nodeid_list(char *lsname, int **ids_out); | 38 | int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out, |
39 | int **new_out, int *new_count_out); | ||
39 | int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr); | 40 | int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr); |
40 | int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid); | 41 | int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid); |
41 | int dlm_our_nodeid(void); | 42 | int dlm_our_nodeid(void); |
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index d30ea8b433a2..5a7ac33b629c 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h | |||
@@ -37,14 +37,11 @@ | |||
37 | #include <linux/jhash.h> | 37 | #include <linux/jhash.h> |
38 | #include <linux/miscdevice.h> | 38 | #include <linux/miscdevice.h> |
39 | #include <linux/mutex.h> | 39 | #include <linux/mutex.h> |
40 | #include <asm/semaphore.h> | ||
41 | #include <asm/uaccess.h> | 40 | #include <asm/uaccess.h> |
42 | 41 | ||
43 | #include <linux/dlm.h> | 42 | #include <linux/dlm.h> |
44 | #include "config.h" | 43 | #include "config.h" |
45 | 44 | ||
46 | #define DLM_LOCKSPACE_LEN 64 | ||
47 | |||
48 | /* Size of the temp buffer midcomms allocates on the stack. | 45 | /* Size of the temp buffer midcomms allocates on the stack. |
49 | We try to make this large enough so most messages fit. | 46 | We try to make this large enough so most messages fit. |
50 | FIXME: should sctp make this unnecessary? */ | 47 | FIXME: should sctp make this unnecessary? */ |
@@ -133,8 +130,10 @@ struct dlm_member { | |||
133 | 130 | ||
134 | struct dlm_recover { | 131 | struct dlm_recover { |
135 | struct list_head list; | 132 | struct list_head list; |
136 | int *nodeids; | 133 | int *nodeids; /* nodeids of all members */ |
137 | int node_count; | 134 | int node_count; |
135 | int *new; /* nodeids of new members */ | ||
136 | int new_count; | ||
138 | uint64_t seq; | 137 | uint64_t seq; |
139 | }; | 138 | }; |
140 | 139 | ||
@@ -580,6 +579,8 @@ static inline int dlm_no_directory(struct dlm_ls *ls) | |||
580 | int dlm_netlink_init(void); | 579 | int dlm_netlink_init(void); |
581 | void dlm_netlink_exit(void); | 580 | void dlm_netlink_exit(void); |
582 | void dlm_timeout_warn(struct dlm_lkb *lkb); | 581 | void dlm_timeout_warn(struct dlm_lkb *lkb); |
582 | int dlm_plock_init(void); | ||
583 | void dlm_plock_exit(void); | ||
583 | 584 | ||
584 | #ifdef CONFIG_DLM_DEBUG | 585 | #ifdef CONFIG_DLM_DEBUG |
585 | int dlm_register_debugfs(void); | 586 | int dlm_register_debugfs(void); |
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 8f250ac8b928..2d3d1027ce2b 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c | |||
@@ -165,7 +165,7 @@ void dlm_print_lkb(struct dlm_lkb *lkb) | |||
165 | lkb->lkb_grmode, lkb->lkb_wait_type, lkb->lkb_ast_type); | 165 | lkb->lkb_grmode, lkb->lkb_wait_type, lkb->lkb_ast_type); |
166 | } | 166 | } |
167 | 167 | ||
168 | void dlm_print_rsb(struct dlm_rsb *r) | 168 | static void dlm_print_rsb(struct dlm_rsb *r) |
169 | { | 169 | { |
170 | printk(KERN_ERR "rsb: nodeid %d flags %lx first %x rlc %d name %s\n", | 170 | printk(KERN_ERR "rsb: nodeid %d flags %lx first %x rlc %d name %s\n", |
171 | r->res_nodeid, r->res_flags, r->res_first_lkid, | 171 | r->res_nodeid, r->res_flags, r->res_first_lkid, |
@@ -1956,8 +1956,7 @@ static void confirm_master(struct dlm_rsb *r, int error) | |||
1956 | list_del_init(&lkb->lkb_rsb_lookup); | 1956 | list_del_init(&lkb->lkb_rsb_lookup); |
1957 | r->res_first_lkid = lkb->lkb_id; | 1957 | r->res_first_lkid = lkb->lkb_id; |
1958 | _request_lock(r, lkb); | 1958 | _request_lock(r, lkb); |
1959 | } else | 1959 | } |
1960 | r->res_nodeid = -1; | ||
1961 | break; | 1960 | break; |
1962 | 1961 | ||
1963 | default: | 1962 | default: |
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h index 05d9c82e646b..88e93c80cc22 100644 --- a/fs/dlm/lock.h +++ b/fs/dlm/lock.h | |||
@@ -13,7 +13,6 @@ | |||
13 | #ifndef __LOCK_DOT_H__ | 13 | #ifndef __LOCK_DOT_H__ |
14 | #define __LOCK_DOT_H__ | 14 | #define __LOCK_DOT_H__ |
15 | 15 | ||
16 | void dlm_print_rsb(struct dlm_rsb *r); | ||
17 | void dlm_dump_rsb(struct dlm_rsb *r); | 16 | void dlm_dump_rsb(struct dlm_rsb *r); |
18 | void dlm_print_lkb(struct dlm_lkb *lkb); | 17 | void dlm_print_lkb(struct dlm_lkb *lkb); |
19 | void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms); | 18 | void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms); |
diff --git a/fs/dlm/main.c b/fs/dlm/main.c index 58487fb95a4c..b80e0aa3cfa5 100644 --- a/fs/dlm/main.c +++ b/fs/dlm/main.c | |||
@@ -46,10 +46,16 @@ static int __init init_dlm(void) | |||
46 | if (error) | 46 | if (error) |
47 | goto out_user; | 47 | goto out_user; |
48 | 48 | ||
49 | error = dlm_plock_init(); | ||
50 | if (error) | ||
51 | goto out_netlink; | ||
52 | |||
49 | printk("DLM (built %s %s) installed\n", __DATE__, __TIME__); | 53 | printk("DLM (built %s %s) installed\n", __DATE__, __TIME__); |
50 | 54 | ||
51 | return 0; | 55 | return 0; |
52 | 56 | ||
57 | out_netlink: | ||
58 | dlm_netlink_exit(); | ||
53 | out_user: | 59 | out_user: |
54 | dlm_user_exit(); | 60 | dlm_user_exit(); |
55 | out_debug: | 61 | out_debug: |
@@ -66,6 +72,7 @@ static int __init init_dlm(void) | |||
66 | 72 | ||
67 | static void __exit exit_dlm(void) | 73 | static void __exit exit_dlm(void) |
68 | { | 74 | { |
75 | dlm_plock_exit(); | ||
69 | dlm_netlink_exit(); | 76 | dlm_netlink_exit(); |
70 | dlm_user_exit(); | 77 | dlm_user_exit(); |
71 | dlm_config_exit(); | 78 | dlm_config_exit(); |
diff --git a/fs/dlm/member.c b/fs/dlm/member.c index fa17f5a27883..26133f05ae3a 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c | |||
@@ -210,6 +210,23 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) | |||
210 | } | 210 | } |
211 | } | 211 | } |
212 | 212 | ||
213 | /* Add an entry to ls_nodes_gone for members that were removed and | ||
214 | then added again, so that previous state for these nodes will be | ||
215 | cleared during recovery. */ | ||
216 | |||
217 | for (i = 0; i < rv->new_count; i++) { | ||
218 | if (!dlm_is_member(ls, rv->new[i])) | ||
219 | continue; | ||
220 | log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]); | ||
221 | |||
222 | memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL); | ||
223 | if (!memb) | ||
224 | return -ENOMEM; | ||
225 | memb->nodeid = rv->new[i]; | ||
226 | list_add_tail(&memb->list, &ls->ls_nodes_gone); | ||
227 | neg++; | ||
228 | } | ||
229 | |||
213 | /* add new members to ls_nodes */ | 230 | /* add new members to ls_nodes */ |
214 | 231 | ||
215 | for (i = 0; i < rv->node_count; i++) { | 232 | for (i = 0; i < rv->node_count; i++) { |
@@ -314,15 +331,16 @@ int dlm_ls_stop(struct dlm_ls *ls) | |||
314 | int dlm_ls_start(struct dlm_ls *ls) | 331 | int dlm_ls_start(struct dlm_ls *ls) |
315 | { | 332 | { |
316 | struct dlm_recover *rv = NULL, *rv_old; | 333 | struct dlm_recover *rv = NULL, *rv_old; |
317 | int *ids = NULL; | 334 | int *ids = NULL, *new = NULL; |
318 | int error, count; | 335 | int error, ids_count = 0, new_count = 0; |
319 | 336 | ||
320 | rv = kzalloc(sizeof(struct dlm_recover), GFP_KERNEL); | 337 | rv = kzalloc(sizeof(struct dlm_recover), GFP_KERNEL); |
321 | if (!rv) | 338 | if (!rv) |
322 | return -ENOMEM; | 339 | return -ENOMEM; |
323 | 340 | ||
324 | error = count = dlm_nodeid_list(ls->ls_name, &ids); | 341 | error = dlm_nodeid_list(ls->ls_name, &ids, &ids_count, |
325 | if (error <= 0) | 342 | &new, &new_count); |
343 | if (error < 0) | ||
326 | goto fail; | 344 | goto fail; |
327 | 345 | ||
328 | spin_lock(&ls->ls_recover_lock); | 346 | spin_lock(&ls->ls_recover_lock); |
@@ -337,14 +355,19 @@ int dlm_ls_start(struct dlm_ls *ls) | |||
337 | } | 355 | } |
338 | 356 | ||
339 | rv->nodeids = ids; | 357 | rv->nodeids = ids; |
340 | rv->node_count = count; | 358 | rv->node_count = ids_count; |
359 | rv->new = new; | ||
360 | rv->new_count = new_count; | ||
341 | rv->seq = ++ls->ls_recover_seq; | 361 | rv->seq = ++ls->ls_recover_seq; |
342 | rv_old = ls->ls_recover_args; | 362 | rv_old = ls->ls_recover_args; |
343 | ls->ls_recover_args = rv; | 363 | ls->ls_recover_args = rv; |
344 | spin_unlock(&ls->ls_recover_lock); | 364 | spin_unlock(&ls->ls_recover_lock); |
345 | 365 | ||
346 | if (rv_old) { | 366 | if (rv_old) { |
367 | log_error(ls, "unused recovery %llx %d", | ||
368 | (unsigned long long)rv_old->seq, rv_old->node_count); | ||
347 | kfree(rv_old->nodeids); | 369 | kfree(rv_old->nodeids); |
370 | kfree(rv_old->new); | ||
348 | kfree(rv_old); | 371 | kfree(rv_old); |
349 | } | 372 | } |
350 | 373 | ||
@@ -354,6 +377,7 @@ int dlm_ls_start(struct dlm_ls *ls) | |||
354 | fail: | 377 | fail: |
355 | kfree(rv); | 378 | kfree(rv); |
356 | kfree(ids); | 379 | kfree(ids); |
380 | kfree(new); | ||
357 | return error; | 381 | return error; |
358 | } | 382 | } |
359 | 383 | ||
diff --git a/fs/gfs2/locking/dlm/plock.c b/fs/dlm/plock.c index 2ebd374b3143..d6d6e370f89c 100644 --- a/fs/gfs2/locking/dlm/plock.c +++ b/fs/dlm/plock.c | |||
@@ -1,17 +1,19 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2005 Red Hat, Inc. All rights reserved. | 2 | * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. |
3 | * | 3 | * |
4 | * This copyrighted material is made available to anyone wishing to use, | 4 | * This copyrighted material is made available to anyone wishing to use, |
5 | * modify, copy, or redistribute it subject to the terms and conditions | 5 | * modify, copy, or redistribute it subject to the terms and conditions |
6 | * of the GNU General Public License version 2. | 6 | * of the GNU General Public License version 2. |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/fs.h> | ||
9 | #include <linux/miscdevice.h> | 10 | #include <linux/miscdevice.h> |
10 | #include <linux/lock_dlm_plock.h> | ||
11 | #include <linux/poll.h> | 11 | #include <linux/poll.h> |
12 | #include <linux/dlm.h> | ||
13 | #include <linux/dlm_plock.h> | ||
12 | 14 | ||
13 | #include "lock_dlm.h" | 15 | #include "dlm_internal.h" |
14 | 16 | #include "lockspace.h" | |
15 | 17 | ||
16 | static spinlock_t ops_lock; | 18 | static spinlock_t ops_lock; |
17 | static struct list_head send_list; | 19 | static struct list_head send_list; |
@@ -22,7 +24,7 @@ static wait_queue_head_t recv_wq; | |||
22 | struct plock_op { | 24 | struct plock_op { |
23 | struct list_head list; | 25 | struct list_head list; |
24 | int done; | 26 | int done; |
25 | struct gdlm_plock_info info; | 27 | struct dlm_plock_info info; |
26 | }; | 28 | }; |
27 | 29 | ||
28 | struct plock_xop { | 30 | struct plock_xop { |
@@ -34,22 +36,22 @@ struct plock_xop { | |||
34 | }; | 36 | }; |
35 | 37 | ||
36 | 38 | ||
37 | static inline void set_version(struct gdlm_plock_info *info) | 39 | static inline void set_version(struct dlm_plock_info *info) |
38 | { | 40 | { |
39 | info->version[0] = GDLM_PLOCK_VERSION_MAJOR; | 41 | info->version[0] = DLM_PLOCK_VERSION_MAJOR; |
40 | info->version[1] = GDLM_PLOCK_VERSION_MINOR; | 42 | info->version[1] = DLM_PLOCK_VERSION_MINOR; |
41 | info->version[2] = GDLM_PLOCK_VERSION_PATCH; | 43 | info->version[2] = DLM_PLOCK_VERSION_PATCH; |
42 | } | 44 | } |
43 | 45 | ||
44 | static int check_version(struct gdlm_plock_info *info) | 46 | static int check_version(struct dlm_plock_info *info) |
45 | { | 47 | { |
46 | if ((GDLM_PLOCK_VERSION_MAJOR != info->version[0]) || | 48 | if ((DLM_PLOCK_VERSION_MAJOR != info->version[0]) || |
47 | (GDLM_PLOCK_VERSION_MINOR < info->version[1])) { | 49 | (DLM_PLOCK_VERSION_MINOR < info->version[1])) { |
48 | log_error("plock device version mismatch: " | 50 | log_print("plock device version mismatch: " |
49 | "kernel (%u.%u.%u), user (%u.%u.%u)", | 51 | "kernel (%u.%u.%u), user (%u.%u.%u)", |
50 | GDLM_PLOCK_VERSION_MAJOR, | 52 | DLM_PLOCK_VERSION_MAJOR, |
51 | GDLM_PLOCK_VERSION_MINOR, | 53 | DLM_PLOCK_VERSION_MINOR, |
52 | GDLM_PLOCK_VERSION_PATCH, | 54 | DLM_PLOCK_VERSION_PATCH, |
53 | info->version[0], | 55 | info->version[0], |
54 | info->version[1], | 56 | info->version[1], |
55 | info->version[2]); | 57 | info->version[2]); |
@@ -68,25 +70,31 @@ static void send_op(struct plock_op *op) | |||
68 | wake_up(&send_wq); | 70 | wake_up(&send_wq); |
69 | } | 71 | } |
70 | 72 | ||
71 | int gdlm_plock(void *lockspace, struct lm_lockname *name, | 73 | int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, |
72 | struct file *file, int cmd, struct file_lock *fl) | 74 | int cmd, struct file_lock *fl) |
73 | { | 75 | { |
74 | struct gdlm_ls *ls = lockspace; | 76 | struct dlm_ls *ls; |
75 | struct plock_op *op; | 77 | struct plock_op *op; |
76 | struct plock_xop *xop; | 78 | struct plock_xop *xop; |
77 | int rv; | 79 | int rv; |
78 | 80 | ||
81 | ls = dlm_find_lockspace_local(lockspace); | ||
82 | if (!ls) | ||
83 | return -EINVAL; | ||
84 | |||
79 | xop = kzalloc(sizeof(*xop), GFP_KERNEL); | 85 | xop = kzalloc(sizeof(*xop), GFP_KERNEL); |
80 | if (!xop) | 86 | if (!xop) { |
81 | return -ENOMEM; | 87 | rv = -ENOMEM; |
88 | goto out; | ||
89 | } | ||
82 | 90 | ||
83 | op = &xop->xop; | 91 | op = &xop->xop; |
84 | op->info.optype = GDLM_PLOCK_OP_LOCK; | 92 | op->info.optype = DLM_PLOCK_OP_LOCK; |
85 | op->info.pid = fl->fl_pid; | 93 | op->info.pid = fl->fl_pid; |
86 | op->info.ex = (fl->fl_type == F_WRLCK); | 94 | op->info.ex = (fl->fl_type == F_WRLCK); |
87 | op->info.wait = IS_SETLKW(cmd); | 95 | op->info.wait = IS_SETLKW(cmd); |
88 | op->info.fsid = ls->id; | 96 | op->info.fsid = ls->ls_global_id; |
89 | op->info.number = name->ln_number; | 97 | op->info.number = number; |
90 | op->info.start = fl->fl_start; | 98 | op->info.start = fl->fl_start; |
91 | op->info.end = fl->fl_end; | 99 | op->info.end = fl->fl_end; |
92 | if (fl->fl_lmops && fl->fl_lmops->fl_grant) { | 100 | if (fl->fl_lmops && fl->fl_lmops->fl_grant) { |
@@ -107,12 +115,15 @@ int gdlm_plock(void *lockspace, struct lm_lockname *name, | |||
107 | 115 | ||
108 | if (xop->callback == NULL) | 116 | if (xop->callback == NULL) |
109 | wait_event(recv_wq, (op->done != 0)); | 117 | wait_event(recv_wq, (op->done != 0)); |
110 | else | 118 | else { |
111 | return -EINPROGRESS; | 119 | rv = -EINPROGRESS; |
120 | goto out; | ||
121 | } | ||
112 | 122 | ||
113 | spin_lock(&ops_lock); | 123 | spin_lock(&ops_lock); |
114 | if (!list_empty(&op->list)) { | 124 | if (!list_empty(&op->list)) { |
115 | printk(KERN_INFO "plock op on list\n"); | 125 | log_error(ls, "dlm_posix_lock: op on list %llx", |
126 | (unsigned long long)number); | ||
116 | list_del(&op->list); | 127 | list_del(&op->list); |
117 | } | 128 | } |
118 | spin_unlock(&ops_lock); | 129 | spin_unlock(&ops_lock); |
@@ -121,17 +132,19 @@ int gdlm_plock(void *lockspace, struct lm_lockname *name, | |||
121 | 132 | ||
122 | if (!rv) { | 133 | if (!rv) { |
123 | if (posix_lock_file_wait(file, fl) < 0) | 134 | if (posix_lock_file_wait(file, fl) < 0) |
124 | log_error("gdlm_plock: vfs lock error %x,%llx", | 135 | log_error(ls, "dlm_posix_lock: vfs lock error %llx", |
125 | name->ln_type, | 136 | (unsigned long long)number); |
126 | (unsigned long long)name->ln_number); | ||
127 | } | 137 | } |
128 | 138 | ||
129 | kfree(xop); | 139 | kfree(xop); |
140 | out: | ||
141 | dlm_put_lockspace(ls); | ||
130 | return rv; | 142 | return rv; |
131 | } | 143 | } |
144 | EXPORT_SYMBOL_GPL(dlm_posix_lock); | ||
132 | 145 | ||
133 | /* Returns failure iff a succesful lock operation should be canceled */ | 146 | /* Returns failure iff a succesful lock operation should be canceled */ |
134 | static int gdlm_plock_callback(struct plock_op *op) | 147 | static int dlm_plock_callback(struct plock_op *op) |
135 | { | 148 | { |
136 | struct file *file; | 149 | struct file *file; |
137 | struct file_lock *fl; | 150 | struct file_lock *fl; |
@@ -142,7 +155,8 @@ static int gdlm_plock_callback(struct plock_op *op) | |||
142 | 155 | ||
143 | spin_lock(&ops_lock); | 156 | spin_lock(&ops_lock); |
144 | if (!list_empty(&op->list)) { | 157 | if (!list_empty(&op->list)) { |
145 | printk(KERN_INFO "plock op on list\n"); | 158 | log_print("dlm_plock_callback: op on list %llx", |
159 | (unsigned long long)op->info.number); | ||
146 | list_del(&op->list); | 160 | list_del(&op->list); |
147 | } | 161 | } |
148 | spin_unlock(&ops_lock); | 162 | spin_unlock(&ops_lock); |
@@ -165,19 +179,19 @@ static int gdlm_plock_callback(struct plock_op *op) | |||
165 | * This can only happen in the case of kmalloc() failure. | 179 | * This can only happen in the case of kmalloc() failure. |
166 | * The filesystem's own lock is the authoritative lock, | 180 | * The filesystem's own lock is the authoritative lock, |
167 | * so a failure to get the lock locally is not a disaster. | 181 | * so a failure to get the lock locally is not a disaster. |
168 | * As long as GFS cannot reliably cancel locks (especially | 182 | * As long as the fs cannot reliably cancel locks (especially |
169 | * in a low-memory situation), we're better off ignoring | 183 | * in a low-memory situation), we're better off ignoring |
170 | * this failure than trying to recover. | 184 | * this failure than trying to recover. |
171 | */ | 185 | */ |
172 | log_error("gdlm_plock: vfs lock error file %p fl %p", | 186 | log_print("dlm_plock_callback: vfs lock error %llx file %p fl %p", |
173 | file, fl); | 187 | (unsigned long long)op->info.number, file, fl); |
174 | } | 188 | } |
175 | 189 | ||
176 | rv = notify(flc, NULL, 0); | 190 | rv = notify(flc, NULL, 0); |
177 | if (rv) { | 191 | if (rv) { |
178 | /* XXX: We need to cancel the fs lock here: */ | 192 | /* XXX: We need to cancel the fs lock here: */ |
179 | printk("gfs2 lock granted after lock request failed;" | 193 | log_print("dlm_plock_callback: lock granted after lock request " |
180 | " dangling lock!\n"); | 194 | "failed; dangling lock!\n"); |
181 | goto out; | 195 | goto out; |
182 | } | 196 | } |
183 | 197 | ||
@@ -186,25 +200,31 @@ out: | |||
186 | return rv; | 200 | return rv; |
187 | } | 201 | } |
188 | 202 | ||
189 | int gdlm_punlock(void *lockspace, struct lm_lockname *name, | 203 | int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file, |
190 | struct file *file, struct file_lock *fl) | 204 | struct file_lock *fl) |
191 | { | 205 | { |
192 | struct gdlm_ls *ls = lockspace; | 206 | struct dlm_ls *ls; |
193 | struct plock_op *op; | 207 | struct plock_op *op; |
194 | int rv; | 208 | int rv; |
195 | 209 | ||
210 | ls = dlm_find_lockspace_local(lockspace); | ||
211 | if (!ls) | ||
212 | return -EINVAL; | ||
213 | |||
196 | op = kzalloc(sizeof(*op), GFP_KERNEL); | 214 | op = kzalloc(sizeof(*op), GFP_KERNEL); |
197 | if (!op) | 215 | if (!op) { |
198 | return -ENOMEM; | 216 | rv = -ENOMEM; |
217 | goto out; | ||
218 | } | ||
199 | 219 | ||
200 | if (posix_lock_file_wait(file, fl) < 0) | 220 | if (posix_lock_file_wait(file, fl) < 0) |
201 | log_error("gdlm_punlock: vfs unlock error %x,%llx", | 221 | log_error(ls, "dlm_posix_unlock: vfs unlock error %llx", |
202 | name->ln_type, (unsigned long long)name->ln_number); | 222 | (unsigned long long)number); |
203 | 223 | ||
204 | op->info.optype = GDLM_PLOCK_OP_UNLOCK; | 224 | op->info.optype = DLM_PLOCK_OP_UNLOCK; |
205 | op->info.pid = fl->fl_pid; | 225 | op->info.pid = fl->fl_pid; |
206 | op->info.fsid = ls->id; | 226 | op->info.fsid = ls->ls_global_id; |
207 | op->info.number = name->ln_number; | 227 | op->info.number = number; |
208 | op->info.start = fl->fl_start; | 228 | op->info.start = fl->fl_start; |
209 | op->info.end = fl->fl_end; | 229 | op->info.end = fl->fl_end; |
210 | if (fl->fl_lmops && fl->fl_lmops->fl_grant) | 230 | if (fl->fl_lmops && fl->fl_lmops->fl_grant) |
@@ -217,7 +237,8 @@ int gdlm_punlock(void *lockspace, struct lm_lockname *name, | |||
217 | 237 | ||
218 | spin_lock(&ops_lock); | 238 | spin_lock(&ops_lock); |
219 | if (!list_empty(&op->list)) { | 239 | if (!list_empty(&op->list)) { |
220 | printk(KERN_INFO "punlock op on list\n"); | 240 | log_error(ls, "dlm_posix_unlock: op on list %llx", |
241 | (unsigned long long)number); | ||
221 | list_del(&op->list); | 242 | list_del(&op->list); |
222 | } | 243 | } |
223 | spin_unlock(&ops_lock); | 244 | spin_unlock(&ops_lock); |
@@ -228,25 +249,34 @@ int gdlm_punlock(void *lockspace, struct lm_lockname *name, | |||
228 | rv = 0; | 249 | rv = 0; |
229 | 250 | ||
230 | kfree(op); | 251 | kfree(op); |
252 | out: | ||
253 | dlm_put_lockspace(ls); | ||
231 | return rv; | 254 | return rv; |
232 | } | 255 | } |
256 | EXPORT_SYMBOL_GPL(dlm_posix_unlock); | ||
233 | 257 | ||
234 | int gdlm_plock_get(void *lockspace, struct lm_lockname *name, | 258 | int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file, |
235 | struct file *file, struct file_lock *fl) | 259 | struct file_lock *fl) |
236 | { | 260 | { |
237 | struct gdlm_ls *ls = lockspace; | 261 | struct dlm_ls *ls; |
238 | struct plock_op *op; | 262 | struct plock_op *op; |
239 | int rv; | 263 | int rv; |
240 | 264 | ||
265 | ls = dlm_find_lockspace_local(lockspace); | ||
266 | if (!ls) | ||
267 | return -EINVAL; | ||
268 | |||
241 | op = kzalloc(sizeof(*op), GFP_KERNEL); | 269 | op = kzalloc(sizeof(*op), GFP_KERNEL); |
242 | if (!op) | 270 | if (!op) { |
243 | return -ENOMEM; | 271 | rv = -ENOMEM; |
272 | goto out; | ||
273 | } | ||
244 | 274 | ||
245 | op->info.optype = GDLM_PLOCK_OP_GET; | 275 | op->info.optype = DLM_PLOCK_OP_GET; |
246 | op->info.pid = fl->fl_pid; | 276 | op->info.pid = fl->fl_pid; |
247 | op->info.ex = (fl->fl_type == F_WRLCK); | 277 | op->info.ex = (fl->fl_type == F_WRLCK); |
248 | op->info.fsid = ls->id; | 278 | op->info.fsid = ls->ls_global_id; |
249 | op->info.number = name->ln_number; | 279 | op->info.number = number; |
250 | op->info.start = fl->fl_start; | 280 | op->info.start = fl->fl_start; |
251 | op->info.end = fl->fl_end; | 281 | op->info.end = fl->fl_end; |
252 | if (fl->fl_lmops && fl->fl_lmops->fl_grant) | 282 | if (fl->fl_lmops && fl->fl_lmops->fl_grant) |
@@ -259,7 +289,8 @@ int gdlm_plock_get(void *lockspace, struct lm_lockname *name, | |||
259 | 289 | ||
260 | spin_lock(&ops_lock); | 290 | spin_lock(&ops_lock); |
261 | if (!list_empty(&op->list)) { | 291 | if (!list_empty(&op->list)) { |
262 | printk(KERN_INFO "plock_get op on list\n"); | 292 | log_error(ls, "dlm_posix_get: op on list %llx", |
293 | (unsigned long long)number); | ||
263 | list_del(&op->list); | 294 | list_del(&op->list); |
264 | } | 295 | } |
265 | spin_unlock(&ops_lock); | 296 | spin_unlock(&ops_lock); |
@@ -281,14 +312,17 @@ int gdlm_plock_get(void *lockspace, struct lm_lockname *name, | |||
281 | } | 312 | } |
282 | 313 | ||
283 | kfree(op); | 314 | kfree(op); |
315 | out: | ||
316 | dlm_put_lockspace(ls); | ||
284 | return rv; | 317 | return rv; |
285 | } | 318 | } |
319 | EXPORT_SYMBOL_GPL(dlm_posix_get); | ||
286 | 320 | ||
287 | /* a read copies out one plock request from the send list */ | 321 | /* a read copies out one plock request from the send list */ |
288 | static ssize_t dev_read(struct file *file, char __user *u, size_t count, | 322 | static ssize_t dev_read(struct file *file, char __user *u, size_t count, |
289 | loff_t *ppos) | 323 | loff_t *ppos) |
290 | { | 324 | { |
291 | struct gdlm_plock_info info; | 325 | struct dlm_plock_info info; |
292 | struct plock_op *op = NULL; | 326 | struct plock_op *op = NULL; |
293 | 327 | ||
294 | if (count < sizeof(info)) | 328 | if (count < sizeof(info)) |
@@ -315,7 +349,7 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count, | |||
315 | static ssize_t dev_write(struct file *file, const char __user *u, size_t count, | 349 | static ssize_t dev_write(struct file *file, const char __user *u, size_t count, |
316 | loff_t *ppos) | 350 | loff_t *ppos) |
317 | { | 351 | { |
318 | struct gdlm_plock_info info; | 352 | struct dlm_plock_info info; |
319 | struct plock_op *op; | 353 | struct plock_op *op; |
320 | int found = 0; | 354 | int found = 0; |
321 | 355 | ||
@@ -345,12 +379,12 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count, | |||
345 | struct plock_xop *xop; | 379 | struct plock_xop *xop; |
346 | xop = (struct plock_xop *)op; | 380 | xop = (struct plock_xop *)op; |
347 | if (xop->callback) | 381 | if (xop->callback) |
348 | count = gdlm_plock_callback(op); | 382 | count = dlm_plock_callback(op); |
349 | else | 383 | else |
350 | wake_up(&recv_wq); | 384 | wake_up(&recv_wq); |
351 | } else | 385 | } else |
352 | printk(KERN_INFO "gdlm dev_write no op %x %llx\n", info.fsid, | 386 | log_print("dev_write no op %x %llx", info.fsid, |
353 | (unsigned long long)info.number); | 387 | (unsigned long long)info.number); |
354 | return count; | 388 | return count; |
355 | } | 389 | } |
356 | 390 | ||
@@ -377,11 +411,11 @@ static const struct file_operations dev_fops = { | |||
377 | 411 | ||
378 | static struct miscdevice plock_dev_misc = { | 412 | static struct miscdevice plock_dev_misc = { |
379 | .minor = MISC_DYNAMIC_MINOR, | 413 | .minor = MISC_DYNAMIC_MINOR, |
380 | .name = GDLM_PLOCK_MISC_NAME, | 414 | .name = DLM_PLOCK_MISC_NAME, |
381 | .fops = &dev_fops | 415 | .fops = &dev_fops |
382 | }; | 416 | }; |
383 | 417 | ||
384 | int gdlm_plock_init(void) | 418 | int dlm_plock_init(void) |
385 | { | 419 | { |
386 | int rv; | 420 | int rv; |
387 | 421 | ||
@@ -393,14 +427,13 @@ int gdlm_plock_init(void) | |||
393 | 427 | ||
394 | rv = misc_register(&plock_dev_misc); | 428 | rv = misc_register(&plock_dev_misc); |
395 | if (rv) | 429 | if (rv) |
396 | printk(KERN_INFO "gdlm_plock_init: misc_register failed %d", | 430 | log_print("dlm_plock_init: misc_register failed %d", rv); |
397 | rv); | ||
398 | return rv; | 431 | return rv; |
399 | } | 432 | } |
400 | 433 | ||
401 | void gdlm_plock_exit(void) | 434 | void dlm_plock_exit(void) |
402 | { | 435 | { |
403 | if (misc_deregister(&plock_dev_misc) < 0) | 436 | if (misc_deregister(&plock_dev_misc) < 0) |
404 | printk(KERN_INFO "gdlm_plock_exit: misc_deregister failed"); | 437 | log_print("dlm_plock_exit: misc_deregister failed"); |
405 | } | 438 | } |
406 | 439 | ||
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c index 997f9531d594..fd677c8c3d3b 100644 --- a/fs/dlm/recoverd.c +++ b/fs/dlm/recoverd.c | |||
@@ -257,6 +257,7 @@ static void do_ls_recovery(struct dlm_ls *ls) | |||
257 | if (rv) { | 257 | if (rv) { |
258 | ls_recover(ls, rv); | 258 | ls_recover(ls, rv); |
259 | kfree(rv->nodeids); | 259 | kfree(rv->nodeids); |
260 | kfree(rv->new); | ||
260 | kfree(rv); | 261 | kfree(rv); |
261 | } | 262 | } |
262 | } | 263 | } |
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index 5deb8b74e649..08f647d8188d 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c | |||
@@ -253,7 +253,7 @@ static int find_group_dir(struct super_block *sb, struct inode *parent) | |||
253 | * it has too few free inodes left (min_inodes) or | 253 | * it has too few free inodes left (min_inodes) or |
254 | * it has too few free blocks left (min_blocks) or | 254 | * it has too few free blocks left (min_blocks) or |
255 | * it's already running too large debt (max_debt). | 255 | * it's already running too large debt (max_debt). |
256 | * Parent's group is prefered, if it doesn't satisfy these | 256 | * Parent's group is preferred, if it doesn't satisfy these |
257 | * conditions we search cyclically through the rest. If none | 257 | * conditions we search cyclically through the rest. If none |
258 | * of the groups look good we just look for a group with more | 258 | * of the groups look good we just look for a group with more |
259 | * free inodes than average (starting at parent's group). | 259 | * free inodes than average (starting at parent's group). |
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index c62006805427..b8a2990bab83 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c | |||
@@ -239,7 +239,7 @@ no_block: | |||
239 | * @inode: owner | 239 | * @inode: owner |
240 | * @ind: descriptor of indirect block. | 240 | * @ind: descriptor of indirect block. |
241 | * | 241 | * |
242 | * This function returns the prefered place for block allocation. | 242 | * This function returns the preferred place for block allocation. |
243 | * It is used when heuristic for sequential allocation fails. | 243 | * It is used when heuristic for sequential allocation fails. |
244 | * Rules are: | 244 | * Rules are: |
245 | * + if there is a block to the left of our position - allocate near it. | 245 | * + if there is a block to the left of our position - allocate near it. |
@@ -283,7 +283,7 @@ static unsigned long ext2_find_near(struct inode *inode, Indirect *ind) | |||
283 | } | 283 | } |
284 | 284 | ||
285 | /** | 285 | /** |
286 | * ext2_find_goal - find a prefered place for allocation. | 286 | * ext2_find_goal - find a preferred place for allocation. |
287 | * @inode: owner | 287 | * @inode: owner |
288 | * @block: block we want | 288 | * @block: block we want |
289 | * @partial: pointer to the last triple within a chain | 289 | * @partial: pointer to the last triple within a chain |
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c index b8ea11fee5c6..de876fa793e1 100644 --- a/fs/ext2/ioctl.c +++ b/fs/ext2/ioctl.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/time.h> | 12 | #include <linux/time.h> |
13 | #include <linux/sched.h> | 13 | #include <linux/sched.h> |
14 | #include <linux/compat.h> | 14 | #include <linux/compat.h> |
15 | #include <linux/mount.h> | ||
15 | #include <linux/smp_lock.h> | 16 | #include <linux/smp_lock.h> |
16 | #include <asm/current.h> | 17 | #include <asm/current.h> |
17 | #include <asm/uaccess.h> | 18 | #include <asm/uaccess.h> |
@@ -23,6 +24,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
23 | struct ext2_inode_info *ei = EXT2_I(inode); | 24 | struct ext2_inode_info *ei = EXT2_I(inode); |
24 | unsigned int flags; | 25 | unsigned int flags; |
25 | unsigned short rsv_window_size; | 26 | unsigned short rsv_window_size; |
27 | int ret; | ||
26 | 28 | ||
27 | ext2_debug ("cmd = %u, arg = %lu\n", cmd, arg); | 29 | ext2_debug ("cmd = %u, arg = %lu\n", cmd, arg); |
28 | 30 | ||
@@ -34,14 +36,19 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
34 | case EXT2_IOC_SETFLAGS: { | 36 | case EXT2_IOC_SETFLAGS: { |
35 | unsigned int oldflags; | 37 | unsigned int oldflags; |
36 | 38 | ||
37 | if (IS_RDONLY(inode)) | 39 | ret = mnt_want_write(filp->f_path.mnt); |
38 | return -EROFS; | 40 | if (ret) |
41 | return ret; | ||
39 | 42 | ||
40 | if (!is_owner_or_cap(inode)) | 43 | if (!is_owner_or_cap(inode)) { |
41 | return -EACCES; | 44 | ret = -EACCES; |
45 | goto setflags_out; | ||
46 | } | ||
42 | 47 | ||
43 | if (get_user(flags, (int __user *) arg)) | 48 | if (get_user(flags, (int __user *) arg)) { |
44 | return -EFAULT; | 49 | ret = -EFAULT; |
50 | goto setflags_out; | ||
51 | } | ||
45 | 52 | ||
46 | if (!S_ISDIR(inode->i_mode)) | 53 | if (!S_ISDIR(inode->i_mode)) |
47 | flags &= ~EXT2_DIRSYNC_FL; | 54 | flags &= ~EXT2_DIRSYNC_FL; |
@@ -50,7 +57,8 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
50 | /* Is it quota file? Do not allow user to mess with it */ | 57 | /* Is it quota file? Do not allow user to mess with it */ |
51 | if (IS_NOQUOTA(inode)) { | 58 | if (IS_NOQUOTA(inode)) { |
52 | mutex_unlock(&inode->i_mutex); | 59 | mutex_unlock(&inode->i_mutex); |
53 | return -EPERM; | 60 | ret = -EPERM; |
61 | goto setflags_out; | ||
54 | } | 62 | } |
55 | oldflags = ei->i_flags; | 63 | oldflags = ei->i_flags; |
56 | 64 | ||
@@ -63,7 +71,8 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
63 | if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) { | 71 | if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) { |
64 | if (!capable(CAP_LINUX_IMMUTABLE)) { | 72 | if (!capable(CAP_LINUX_IMMUTABLE)) { |
65 | mutex_unlock(&inode->i_mutex); | 73 | mutex_unlock(&inode->i_mutex); |
66 | return -EPERM; | 74 | ret = -EPERM; |
75 | goto setflags_out; | ||
67 | } | 76 | } |
68 | } | 77 | } |
69 | 78 | ||
@@ -75,20 +84,26 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
75 | ext2_set_inode_flags(inode); | 84 | ext2_set_inode_flags(inode); |
76 | inode->i_ctime = CURRENT_TIME_SEC; | 85 | inode->i_ctime = CURRENT_TIME_SEC; |
77 | mark_inode_dirty(inode); | 86 | mark_inode_dirty(inode); |
78 | return 0; | 87 | setflags_out: |
88 | mnt_drop_write(filp->f_path.mnt); | ||
89 | return ret; | ||
79 | } | 90 | } |
80 | case EXT2_IOC_GETVERSION: | 91 | case EXT2_IOC_GETVERSION: |
81 | return put_user(inode->i_generation, (int __user *) arg); | 92 | return put_user(inode->i_generation, (int __user *) arg); |
82 | case EXT2_IOC_SETVERSION: | 93 | case EXT2_IOC_SETVERSION: |
83 | if (!is_owner_or_cap(inode)) | 94 | if (!is_owner_or_cap(inode)) |
84 | return -EPERM; | 95 | return -EPERM; |
85 | if (IS_RDONLY(inode)) | 96 | ret = mnt_want_write(filp->f_path.mnt); |
86 | return -EROFS; | 97 | if (ret) |
87 | if (get_user(inode->i_generation, (int __user *) arg)) | 98 | return ret; |
88 | return -EFAULT; | 99 | if (get_user(inode->i_generation, (int __user *) arg)) { |
89 | inode->i_ctime = CURRENT_TIME_SEC; | 100 | ret = -EFAULT; |
90 | mark_inode_dirty(inode); | 101 | } else { |
91 | return 0; | 102 | inode->i_ctime = CURRENT_TIME_SEC; |
103 | mark_inode_dirty(inode); | ||
104 | } | ||
105 | mnt_drop_write(filp->f_path.mnt); | ||
106 | return ret; | ||
92 | case EXT2_IOC_GETRSVSZ: | 107 | case EXT2_IOC_GETRSVSZ: |
93 | if (test_opt(inode->i_sb, RESERVATION) | 108 | if (test_opt(inode->i_sb, RESERVATION) |
94 | && S_ISREG(inode->i_mode) | 109 | && S_ISREG(inode->i_mode) |
@@ -102,15 +117,16 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
102 | if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) | 117 | if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) |
103 | return -ENOTTY; | 118 | return -ENOTTY; |
104 | 119 | ||
105 | if (IS_RDONLY(inode)) | 120 | if (!is_owner_or_cap(inode)) |
106 | return -EROFS; | ||
107 | |||
108 | if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) | ||
109 | return -EACCES; | 121 | return -EACCES; |
110 | 122 | ||
111 | if (get_user(rsv_window_size, (int __user *)arg)) | 123 | if (get_user(rsv_window_size, (int __user *)arg)) |
112 | return -EFAULT; | 124 | return -EFAULT; |
113 | 125 | ||
126 | ret = mnt_want_write(filp->f_path.mnt); | ||
127 | if (ret) | ||
128 | return ret; | ||
129 | |||
114 | if (rsv_window_size > EXT2_MAX_RESERVE_BLOCKS) | 130 | if (rsv_window_size > EXT2_MAX_RESERVE_BLOCKS) |
115 | rsv_window_size = EXT2_MAX_RESERVE_BLOCKS; | 131 | rsv_window_size = EXT2_MAX_RESERVE_BLOCKS; |
116 | 132 | ||
@@ -131,6 +147,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
131 | rsv->rsv_goal_size = rsv_window_size; | 147 | rsv->rsv_goal_size = rsv_window_size; |
132 | } | 148 | } |
133 | mutex_unlock(&ei->truncate_mutex); | 149 | mutex_unlock(&ei->truncate_mutex); |
150 | mnt_drop_write(filp->f_path.mnt); | ||
134 | return 0; | 151 | return 0; |
135 | } | 152 | } |
136 | default: | 153 | default: |
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index 4f4020c54683..96dd5573e49b 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c | |||
@@ -239,7 +239,7 @@ static int find_group_dir(struct super_block *sb, struct inode *parent) | |||
239 | * it has too few free inodes left (min_inodes) or | 239 | * it has too few free inodes left (min_inodes) or |
240 | * it has too few free blocks left (min_blocks) or | 240 | * it has too few free blocks left (min_blocks) or |
241 | * it's already running too large debt (max_debt). | 241 | * it's already running too large debt (max_debt). |
242 | * Parent's group is prefered, if it doesn't satisfy these | 242 | * Parent's group is preferred, if it doesn't satisfy these |
243 | * conditions we search cyclically through the rest. If none | 243 | * conditions we search cyclically through the rest. If none |
244 | * of the groups look good we just look for a group with more | 244 | * of the groups look good we just look for a group with more |
245 | * free inodes than average (starting at parent's group). | 245 | * free inodes than average (starting at parent's group). |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index eb95670a27eb..c683609b0e3a 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -392,7 +392,7 @@ no_block: | |||
392 | * @inode: owner | 392 | * @inode: owner |
393 | * @ind: descriptor of indirect block. | 393 | * @ind: descriptor of indirect block. |
394 | * | 394 | * |
395 | * This function returns the prefered place for block allocation. | 395 | * This function returns the preferred place for block allocation. |
396 | * It is used when heuristic for sequential allocation fails. | 396 | * It is used when heuristic for sequential allocation fails. |
397 | * Rules are: | 397 | * Rules are: |
398 | * + if there is a block to the left of our position - allocate near it. | 398 | * + if there is a block to the left of our position - allocate near it. |
@@ -436,12 +436,12 @@ static ext3_fsblk_t ext3_find_near(struct inode *inode, Indirect *ind) | |||
436 | } | 436 | } |
437 | 437 | ||
438 | /** | 438 | /** |
439 | * ext3_find_goal - find a prefered place for allocation. | 439 | * ext3_find_goal - find a preferred place for allocation. |
440 | * @inode: owner | 440 | * @inode: owner |
441 | * @block: block we want | 441 | * @block: block we want |
442 | * @partial: pointer to the last triple within a chain | 442 | * @partial: pointer to the last triple within a chain |
443 | * | 443 | * |
444 | * Normally this function find the prefered place for block allocation, | 444 | * Normally this function find the preferred place for block allocation, |
445 | * returns it. | 445 | * returns it. |
446 | */ | 446 | */ |
447 | 447 | ||
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c index 023a070f55f1..0d0c70151642 100644 --- a/fs/ext3/ioctl.c +++ b/fs/ext3/ioctl.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/capability.h> | 12 | #include <linux/capability.h> |
13 | #include <linux/ext3_fs.h> | 13 | #include <linux/ext3_fs.h> |
14 | #include <linux/ext3_jbd.h> | 14 | #include <linux/ext3_jbd.h> |
15 | #include <linux/mount.h> | ||
15 | #include <linux/time.h> | 16 | #include <linux/time.h> |
16 | #include <linux/compat.h> | 17 | #include <linux/compat.h> |
17 | #include <linux/smp_lock.h> | 18 | #include <linux/smp_lock.h> |
@@ -38,14 +39,19 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, | |||
38 | unsigned int oldflags; | 39 | unsigned int oldflags; |
39 | unsigned int jflag; | 40 | unsigned int jflag; |
40 | 41 | ||
41 | if (IS_RDONLY(inode)) | 42 | err = mnt_want_write(filp->f_path.mnt); |
42 | return -EROFS; | 43 | if (err) |
44 | return err; | ||
43 | 45 | ||
44 | if (!is_owner_or_cap(inode)) | 46 | if (!is_owner_or_cap(inode)) { |
45 | return -EACCES; | 47 | err = -EACCES; |
48 | goto flags_out; | ||
49 | } | ||
46 | 50 | ||
47 | if (get_user(flags, (int __user *) arg)) | 51 | if (get_user(flags, (int __user *) arg)) { |
48 | return -EFAULT; | 52 | err = -EFAULT; |
53 | goto flags_out; | ||
54 | } | ||
49 | 55 | ||
50 | if (!S_ISDIR(inode->i_mode)) | 56 | if (!S_ISDIR(inode->i_mode)) |
51 | flags &= ~EXT3_DIRSYNC_FL; | 57 | flags &= ~EXT3_DIRSYNC_FL; |
@@ -54,7 +60,8 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, | |||
54 | /* Is it quota file? Do not allow user to mess with it */ | 60 | /* Is it quota file? Do not allow user to mess with it */ |
55 | if (IS_NOQUOTA(inode)) { | 61 | if (IS_NOQUOTA(inode)) { |
56 | mutex_unlock(&inode->i_mutex); | 62 | mutex_unlock(&inode->i_mutex); |
57 | return -EPERM; | 63 | err = -EPERM; |
64 | goto flags_out; | ||
58 | } | 65 | } |
59 | oldflags = ei->i_flags; | 66 | oldflags = ei->i_flags; |
60 | 67 | ||
@@ -70,7 +77,8 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, | |||
70 | if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) { | 77 | if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) { |
71 | if (!capable(CAP_LINUX_IMMUTABLE)) { | 78 | if (!capable(CAP_LINUX_IMMUTABLE)) { |
72 | mutex_unlock(&inode->i_mutex); | 79 | mutex_unlock(&inode->i_mutex); |
73 | return -EPERM; | 80 | err = -EPERM; |
81 | goto flags_out; | ||
74 | } | 82 | } |
75 | } | 83 | } |
76 | 84 | ||
@@ -81,7 +89,8 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, | |||
81 | if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) { | 89 | if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) { |
82 | if (!capable(CAP_SYS_RESOURCE)) { | 90 | if (!capable(CAP_SYS_RESOURCE)) { |
83 | mutex_unlock(&inode->i_mutex); | 91 | mutex_unlock(&inode->i_mutex); |
84 | return -EPERM; | 92 | err = -EPERM; |
93 | goto flags_out; | ||
85 | } | 94 | } |
86 | } | 95 | } |
87 | 96 | ||
@@ -89,7 +98,8 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, | |||
89 | handle = ext3_journal_start(inode, 1); | 98 | handle = ext3_journal_start(inode, 1); |
90 | if (IS_ERR(handle)) { | 99 | if (IS_ERR(handle)) { |
91 | mutex_unlock(&inode->i_mutex); | 100 | mutex_unlock(&inode->i_mutex); |
92 | return PTR_ERR(handle); | 101 | err = PTR_ERR(handle); |
102 | goto flags_out; | ||
93 | } | 103 | } |
94 | if (IS_SYNC(inode)) | 104 | if (IS_SYNC(inode)) |
95 | handle->h_sync = 1; | 105 | handle->h_sync = 1; |
@@ -115,6 +125,8 @@ flags_err: | |||
115 | if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) | 125 | if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) |
116 | err = ext3_change_inode_journal_flag(inode, jflag); | 126 | err = ext3_change_inode_journal_flag(inode, jflag); |
117 | mutex_unlock(&inode->i_mutex); | 127 | mutex_unlock(&inode->i_mutex); |
128 | flags_out: | ||
129 | mnt_drop_write(filp->f_path.mnt); | ||
118 | return err; | 130 | return err; |
119 | } | 131 | } |
120 | case EXT3_IOC_GETVERSION: | 132 | case EXT3_IOC_GETVERSION: |
@@ -129,14 +141,18 @@ flags_err: | |||
129 | 141 | ||
130 | if (!is_owner_or_cap(inode)) | 142 | if (!is_owner_or_cap(inode)) |
131 | return -EPERM; | 143 | return -EPERM; |
132 | if (IS_RDONLY(inode)) | 144 | err = mnt_want_write(filp->f_path.mnt); |
133 | return -EROFS; | 145 | if (err) |
134 | if (get_user(generation, (int __user *) arg)) | 146 | return err; |
135 | return -EFAULT; | 147 | if (get_user(generation, (int __user *) arg)) { |
136 | 148 | err = -EFAULT; | |
149 | goto setversion_out; | ||
150 | } | ||
137 | handle = ext3_journal_start(inode, 1); | 151 | handle = ext3_journal_start(inode, 1); |
138 | if (IS_ERR(handle)) | 152 | if (IS_ERR(handle)) { |
139 | return PTR_ERR(handle); | 153 | err = PTR_ERR(handle); |
154 | goto setversion_out; | ||
155 | } | ||
140 | err = ext3_reserve_inode_write(handle, inode, &iloc); | 156 | err = ext3_reserve_inode_write(handle, inode, &iloc); |
141 | if (err == 0) { | 157 | if (err == 0) { |
142 | inode->i_ctime = CURRENT_TIME_SEC; | 158 | inode->i_ctime = CURRENT_TIME_SEC; |
@@ -144,6 +160,8 @@ flags_err: | |||
144 | err = ext3_mark_iloc_dirty(handle, inode, &iloc); | 160 | err = ext3_mark_iloc_dirty(handle, inode, &iloc); |
145 | } | 161 | } |
146 | ext3_journal_stop(handle); | 162 | ext3_journal_stop(handle); |
163 | setversion_out: | ||
164 | mnt_drop_write(filp->f_path.mnt); | ||
147 | return err; | 165 | return err; |
148 | } | 166 | } |
149 | #ifdef CONFIG_JBD_DEBUG | 167 | #ifdef CONFIG_JBD_DEBUG |
@@ -179,18 +197,24 @@ flags_err: | |||
179 | } | 197 | } |
180 | return -ENOTTY; | 198 | return -ENOTTY; |
181 | case EXT3_IOC_SETRSVSZ: { | 199 | case EXT3_IOC_SETRSVSZ: { |
200 | int err; | ||
182 | 201 | ||
183 | if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) | 202 | if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) |
184 | return -ENOTTY; | 203 | return -ENOTTY; |
185 | 204 | ||
186 | if (IS_RDONLY(inode)) | 205 | err = mnt_want_write(filp->f_path.mnt); |
187 | return -EROFS; | 206 | if (err) |
207 | return err; | ||
188 | 208 | ||
189 | if (!is_owner_or_cap(inode)) | 209 | if (!is_owner_or_cap(inode)) { |
190 | return -EACCES; | 210 | err = -EACCES; |
211 | goto setrsvsz_out; | ||
212 | } | ||
191 | 213 | ||
192 | if (get_user(rsv_window_size, (int __user *)arg)) | 214 | if (get_user(rsv_window_size, (int __user *)arg)) { |
193 | return -EFAULT; | 215 | err = -EFAULT; |
216 | goto setrsvsz_out; | ||
217 | } | ||
194 | 218 | ||
195 | if (rsv_window_size > EXT3_MAX_RESERVE_BLOCKS) | 219 | if (rsv_window_size > EXT3_MAX_RESERVE_BLOCKS) |
196 | rsv_window_size = EXT3_MAX_RESERVE_BLOCKS; | 220 | rsv_window_size = EXT3_MAX_RESERVE_BLOCKS; |
@@ -208,7 +232,9 @@ flags_err: | |||
208 | rsv->rsv_goal_size = rsv_window_size; | 232 | rsv->rsv_goal_size = rsv_window_size; |
209 | } | 233 | } |
210 | mutex_unlock(&ei->truncate_mutex); | 234 | mutex_unlock(&ei->truncate_mutex); |
211 | return 0; | 235 | setrsvsz_out: |
236 | mnt_drop_write(filp->f_path.mnt); | ||
237 | return err; | ||
212 | } | 238 | } |
213 | case EXT3_IOC_GROUP_EXTEND: { | 239 | case EXT3_IOC_GROUP_EXTEND: { |
214 | ext3_fsblk_t n_blocks_count; | 240 | ext3_fsblk_t n_blocks_count; |
@@ -218,17 +244,20 @@ flags_err: | |||
218 | if (!capable(CAP_SYS_RESOURCE)) | 244 | if (!capable(CAP_SYS_RESOURCE)) |
219 | return -EPERM; | 245 | return -EPERM; |
220 | 246 | ||
221 | if (IS_RDONLY(inode)) | 247 | err = mnt_want_write(filp->f_path.mnt); |
222 | return -EROFS; | 248 | if (err) |
223 | 249 | return err; | |
224 | if (get_user(n_blocks_count, (__u32 __user *)arg)) | ||
225 | return -EFAULT; | ||
226 | 250 | ||
251 | if (get_user(n_blocks_count, (__u32 __user *)arg)) { | ||
252 | err = -EFAULT; | ||
253 | goto group_extend_out; | ||
254 | } | ||
227 | err = ext3_group_extend(sb, EXT3_SB(sb)->s_es, n_blocks_count); | 255 | err = ext3_group_extend(sb, EXT3_SB(sb)->s_es, n_blocks_count); |
228 | journal_lock_updates(EXT3_SB(sb)->s_journal); | 256 | journal_lock_updates(EXT3_SB(sb)->s_journal); |
229 | journal_flush(EXT3_SB(sb)->s_journal); | 257 | journal_flush(EXT3_SB(sb)->s_journal); |
230 | journal_unlock_updates(EXT3_SB(sb)->s_journal); | 258 | journal_unlock_updates(EXT3_SB(sb)->s_journal); |
231 | 259 | group_extend_out: | |
260 | mnt_drop_write(filp->f_path.mnt); | ||
232 | return err; | 261 | return err; |
233 | } | 262 | } |
234 | case EXT3_IOC_GROUP_ADD: { | 263 | case EXT3_IOC_GROUP_ADD: { |
@@ -239,18 +268,22 @@ flags_err: | |||
239 | if (!capable(CAP_SYS_RESOURCE)) | 268 | if (!capable(CAP_SYS_RESOURCE)) |
240 | return -EPERM; | 269 | return -EPERM; |
241 | 270 | ||
242 | if (IS_RDONLY(inode)) | 271 | err = mnt_want_write(filp->f_path.mnt); |
243 | return -EROFS; | 272 | if (err) |
273 | return err; | ||
244 | 274 | ||
245 | if (copy_from_user(&input, (struct ext3_new_group_input __user *)arg, | 275 | if (copy_from_user(&input, (struct ext3_new_group_input __user *)arg, |
246 | sizeof(input))) | 276 | sizeof(input))) { |
247 | return -EFAULT; | 277 | err = -EFAULT; |
278 | goto group_add_out; | ||
279 | } | ||
248 | 280 | ||
249 | err = ext3_group_add(sb, &input); | 281 | err = ext3_group_add(sb, &input); |
250 | journal_lock_updates(EXT3_SB(sb)->s_journal); | 282 | journal_lock_updates(EXT3_SB(sb)->s_journal); |
251 | journal_flush(EXT3_SB(sb)->s_journal); | 283 | journal_flush(EXT3_SB(sb)->s_journal); |
252 | journal_unlock_updates(EXT3_SB(sb)->s_journal); | 284 | journal_unlock_updates(EXT3_SB(sb)->s_journal); |
253 | 285 | group_add_out: | |
286 | mnt_drop_write(filp->f_path.mnt); | ||
254 | return err; | 287 | return err; |
255 | } | 288 | } |
256 | 289 | ||
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 8036b9b5376b..486e46a3918d 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -305,7 +305,7 @@ static int find_group_dir(struct super_block *sb, struct inode *parent, | |||
305 | * it has too few free inodes left (min_inodes) or | 305 | * it has too few free inodes left (min_inodes) or |
306 | * it has too few free blocks left (min_blocks) or | 306 | * it has too few free blocks left (min_blocks) or |
307 | * it's already running too large debt (max_debt). | 307 | * it's already running too large debt (max_debt). |
308 | * Parent's group is prefered, if it doesn't satisfy these | 308 | * Parent's group is preferred, if it doesn't satisfy these |
309 | * conditions we search cyclically through the rest. If none | 309 | * conditions we search cyclically through the rest. If none |
310 | * of the groups look good we just look for a group with more | 310 | * of the groups look good we just look for a group with more |
311 | * free inodes than average (starting at parent's group). | 311 | * free inodes than average (starting at parent's group). |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 945cbf6cb1fc..8fab233cb05f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -382,7 +382,7 @@ no_block: | |||
382 | * @inode: owner | 382 | * @inode: owner |
383 | * @ind: descriptor of indirect block. | 383 | * @ind: descriptor of indirect block. |
384 | * | 384 | * |
385 | * This function returns the prefered place for block allocation. | 385 | * This function returns the preferred place for block allocation. |
386 | * It is used when heuristic for sequential allocation fails. | 386 | * It is used when heuristic for sequential allocation fails. |
387 | * Rules are: | 387 | * Rules are: |
388 | * + if there is a block to the left of our position - allocate near it. | 388 | * + if there is a block to the left of our position - allocate near it. |
@@ -432,12 +432,12 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) | |||
432 | } | 432 | } |
433 | 433 | ||
434 | /** | 434 | /** |
435 | * ext4_find_goal - find a prefered place for allocation. | 435 | * ext4_find_goal - find a preferred place for allocation. |
436 | * @inode: owner | 436 | * @inode: owner |
437 | * @block: block we want | 437 | * @block: block we want |
438 | * @partial: pointer to the last triple within a chain | 438 | * @partial: pointer to the last triple within a chain |
439 | * | 439 | * |
440 | * Normally this function find the prefered place for block allocation, | 440 | * Normally this function find the preferred place for block allocation, |
441 | * returns it. | 441 | * returns it. |
442 | */ | 442 | */ |
443 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, | 443 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 2ed7c37f897e..25b13ede8086 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/time.h> | 15 | #include <linux/time.h> |
16 | #include <linux/compat.h> | 16 | #include <linux/compat.h> |
17 | #include <linux/smp_lock.h> | 17 | #include <linux/smp_lock.h> |
18 | #include <linux/mount.h> | ||
18 | #include <asm/uaccess.h> | 19 | #include <asm/uaccess.h> |
19 | 20 | ||
20 | int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, | 21 | int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, |
@@ -38,24 +39,25 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, | |||
38 | unsigned int oldflags; | 39 | unsigned int oldflags; |
39 | unsigned int jflag; | 40 | unsigned int jflag; |
40 | 41 | ||
41 | if (IS_RDONLY(inode)) | ||
42 | return -EROFS; | ||
43 | |||
44 | if (!is_owner_or_cap(inode)) | 42 | if (!is_owner_or_cap(inode)) |
45 | return -EACCES; | 43 | return -EACCES; |
46 | 44 | ||
47 | if (get_user(flags, (int __user *) arg)) | 45 | if (get_user(flags, (int __user *) arg)) |
48 | return -EFAULT; | 46 | return -EFAULT; |
49 | 47 | ||
48 | err = mnt_want_write(filp->f_path.mnt); | ||
49 | if (err) | ||
50 | return err; | ||
51 | |||
50 | if (!S_ISDIR(inode->i_mode)) | 52 | if (!S_ISDIR(inode->i_mode)) |
51 | flags &= ~EXT4_DIRSYNC_FL; | 53 | flags &= ~EXT4_DIRSYNC_FL; |
52 | 54 | ||
55 | err = -EPERM; | ||
53 | mutex_lock(&inode->i_mutex); | 56 | mutex_lock(&inode->i_mutex); |
54 | /* Is it quota file? Do not allow user to mess with it */ | 57 | /* Is it quota file? Do not allow user to mess with it */ |
55 | if (IS_NOQUOTA(inode)) { | 58 | if (IS_NOQUOTA(inode)) |
56 | mutex_unlock(&inode->i_mutex); | 59 | goto flags_out; |
57 | return -EPERM; | 60 | |
58 | } | ||
59 | oldflags = ei->i_flags; | 61 | oldflags = ei->i_flags; |
60 | 62 | ||
61 | /* The JOURNAL_DATA flag is modifiable only by root */ | 63 | /* The JOURNAL_DATA flag is modifiable only by root */ |
@@ -68,10 +70,8 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, | |||
68 | * This test looks nicer. Thanks to Pauline Middelink | 70 | * This test looks nicer. Thanks to Pauline Middelink |
69 | */ | 71 | */ |
70 | if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) { | 72 | if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) { |
71 | if (!capable(CAP_LINUX_IMMUTABLE)) { | 73 | if (!capable(CAP_LINUX_IMMUTABLE)) |
72 | mutex_unlock(&inode->i_mutex); | 74 | goto flags_out; |
73 | return -EPERM; | ||
74 | } | ||
75 | } | 75 | } |
76 | 76 | ||
77 | /* | 77 | /* |
@@ -79,17 +79,14 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, | |||
79 | * the relevant capability. | 79 | * the relevant capability. |
80 | */ | 80 | */ |
81 | if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) { | 81 | if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) { |
82 | if (!capable(CAP_SYS_RESOURCE)) { | 82 | if (!capable(CAP_SYS_RESOURCE)) |
83 | mutex_unlock(&inode->i_mutex); | 83 | goto flags_out; |
84 | return -EPERM; | ||
85 | } | ||
86 | } | 84 | } |
87 | 85 | ||
88 | |||
89 | handle = ext4_journal_start(inode, 1); | 86 | handle = ext4_journal_start(inode, 1); |
90 | if (IS_ERR(handle)) { | 87 | if (IS_ERR(handle)) { |
91 | mutex_unlock(&inode->i_mutex); | 88 | err = PTR_ERR(handle); |
92 | return PTR_ERR(handle); | 89 | goto flags_out; |
93 | } | 90 | } |
94 | if (IS_SYNC(inode)) | 91 | if (IS_SYNC(inode)) |
95 | handle->h_sync = 1; | 92 | handle->h_sync = 1; |
@@ -107,14 +104,14 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, | |||
107 | err = ext4_mark_iloc_dirty(handle, inode, &iloc); | 104 | err = ext4_mark_iloc_dirty(handle, inode, &iloc); |
108 | flags_err: | 105 | flags_err: |
109 | ext4_journal_stop(handle); | 106 | ext4_journal_stop(handle); |
110 | if (err) { | 107 | if (err) |
111 | mutex_unlock(&inode->i_mutex); | 108 | goto flags_out; |
112 | return err; | ||
113 | } | ||
114 | 109 | ||
115 | if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) | 110 | if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) |
116 | err = ext4_change_inode_journal_flag(inode, jflag); | 111 | err = ext4_change_inode_journal_flag(inode, jflag); |
112 | flags_out: | ||
117 | mutex_unlock(&inode->i_mutex); | 113 | mutex_unlock(&inode->i_mutex); |
114 | mnt_drop_write(filp->f_path.mnt); | ||
118 | return err; | 115 | return err; |
119 | } | 116 | } |
120 | case EXT4_IOC_GETVERSION: | 117 | case EXT4_IOC_GETVERSION: |
@@ -129,14 +126,20 @@ flags_err: | |||
129 | 126 | ||
130 | if (!is_owner_or_cap(inode)) | 127 | if (!is_owner_or_cap(inode)) |
131 | return -EPERM; | 128 | return -EPERM; |
132 | if (IS_RDONLY(inode)) | 129 | |
133 | return -EROFS; | 130 | err = mnt_want_write(filp->f_path.mnt); |
134 | if (get_user(generation, (int __user *) arg)) | 131 | if (err) |
135 | return -EFAULT; | 132 | return err; |
133 | if (get_user(generation, (int __user *) arg)) { | ||
134 | err = -EFAULT; | ||
135 | goto setversion_out; | ||
136 | } | ||
136 | 137 | ||
137 | handle = ext4_journal_start(inode, 1); | 138 | handle = ext4_journal_start(inode, 1); |
138 | if (IS_ERR(handle)) | 139 | if (IS_ERR(handle)) { |
139 | return PTR_ERR(handle); | 140 | err = PTR_ERR(handle); |
141 | goto setversion_out; | ||
142 | } | ||
140 | err = ext4_reserve_inode_write(handle, inode, &iloc); | 143 | err = ext4_reserve_inode_write(handle, inode, &iloc); |
141 | if (err == 0) { | 144 | if (err == 0) { |
142 | inode->i_ctime = ext4_current_time(inode); | 145 | inode->i_ctime = ext4_current_time(inode); |
@@ -144,6 +147,8 @@ flags_err: | |||
144 | err = ext4_mark_iloc_dirty(handle, inode, &iloc); | 147 | err = ext4_mark_iloc_dirty(handle, inode, &iloc); |
145 | } | 148 | } |
146 | ext4_journal_stop(handle); | 149 | ext4_journal_stop(handle); |
150 | setversion_out: | ||
151 | mnt_drop_write(filp->f_path.mnt); | ||
147 | return err; | 152 | return err; |
148 | } | 153 | } |
149 | #ifdef CONFIG_JBD2_DEBUG | 154 | #ifdef CONFIG_JBD2_DEBUG |
@@ -179,19 +184,21 @@ flags_err: | |||
179 | } | 184 | } |
180 | return -ENOTTY; | 185 | return -ENOTTY; |
181 | case EXT4_IOC_SETRSVSZ: { | 186 | case EXT4_IOC_SETRSVSZ: { |
187 | int err; | ||
182 | 188 | ||
183 | if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) | 189 | if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) |
184 | return -ENOTTY; | 190 | return -ENOTTY; |
185 | 191 | ||
186 | if (IS_RDONLY(inode)) | ||
187 | return -EROFS; | ||
188 | |||
189 | if (!is_owner_or_cap(inode)) | 192 | if (!is_owner_or_cap(inode)) |
190 | return -EACCES; | 193 | return -EACCES; |
191 | 194 | ||
192 | if (get_user(rsv_window_size, (int __user *)arg)) | 195 | if (get_user(rsv_window_size, (int __user *)arg)) |
193 | return -EFAULT; | 196 | return -EFAULT; |
194 | 197 | ||
198 | err = mnt_want_write(filp->f_path.mnt); | ||
199 | if (err) | ||
200 | return err; | ||
201 | |||
195 | if (rsv_window_size > EXT4_MAX_RESERVE_BLOCKS) | 202 | if (rsv_window_size > EXT4_MAX_RESERVE_BLOCKS) |
196 | rsv_window_size = EXT4_MAX_RESERVE_BLOCKS; | 203 | rsv_window_size = EXT4_MAX_RESERVE_BLOCKS; |
197 | 204 | ||
@@ -208,6 +215,7 @@ flags_err: | |||
208 | rsv->rsv_goal_size = rsv_window_size; | 215 | rsv->rsv_goal_size = rsv_window_size; |
209 | } | 216 | } |
210 | up_write(&ei->i_data_sem); | 217 | up_write(&ei->i_data_sem); |
218 | mnt_drop_write(filp->f_path.mnt); | ||
211 | return 0; | 219 | return 0; |
212 | } | 220 | } |
213 | case EXT4_IOC_GROUP_EXTEND: { | 221 | case EXT4_IOC_GROUP_EXTEND: { |
@@ -218,16 +226,18 @@ flags_err: | |||
218 | if (!capable(CAP_SYS_RESOURCE)) | 226 | if (!capable(CAP_SYS_RESOURCE)) |
219 | return -EPERM; | 227 | return -EPERM; |
220 | 228 | ||
221 | if (IS_RDONLY(inode)) | ||
222 | return -EROFS; | ||
223 | |||
224 | if (get_user(n_blocks_count, (__u32 __user *)arg)) | 229 | if (get_user(n_blocks_count, (__u32 __user *)arg)) |
225 | return -EFAULT; | 230 | return -EFAULT; |
226 | 231 | ||
232 | err = mnt_want_write(filp->f_path.mnt); | ||
233 | if (err) | ||
234 | return err; | ||
235 | |||
227 | err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); | 236 | err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); |
228 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); | 237 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); |
229 | jbd2_journal_flush(EXT4_SB(sb)->s_journal); | 238 | jbd2_journal_flush(EXT4_SB(sb)->s_journal); |
230 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); | 239 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); |
240 | mnt_drop_write(filp->f_path.mnt); | ||
231 | 241 | ||
232 | return err; | 242 | return err; |
233 | } | 243 | } |
@@ -239,17 +249,19 @@ flags_err: | |||
239 | if (!capable(CAP_SYS_RESOURCE)) | 249 | if (!capable(CAP_SYS_RESOURCE)) |
240 | return -EPERM; | 250 | return -EPERM; |
241 | 251 | ||
242 | if (IS_RDONLY(inode)) | ||
243 | return -EROFS; | ||
244 | |||
245 | if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg, | 252 | if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg, |
246 | sizeof(input))) | 253 | sizeof(input))) |
247 | return -EFAULT; | 254 | return -EFAULT; |
248 | 255 | ||
256 | err = mnt_want_write(filp->f_path.mnt); | ||
257 | if (err) | ||
258 | return err; | ||
259 | |||
249 | err = ext4_group_add(sb, &input); | 260 | err = ext4_group_add(sb, &input); |
250 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); | 261 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); |
251 | jbd2_journal_flush(EXT4_SB(sb)->s_journal); | 262 | jbd2_journal_flush(EXT4_SB(sb)->s_journal); |
252 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); | 263 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); |
264 | mnt_drop_write(filp->f_path.mnt); | ||
253 | 265 | ||
254 | return err; | 266 | return err; |
255 | } | 267 | } |
diff --git a/fs/fat/file.c b/fs/fat/file.c index c614175876e0..2a3bed967041 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c | |||
@@ -8,6 +8,7 @@ | |||
8 | 8 | ||
9 | #include <linux/capability.h> | 9 | #include <linux/capability.h> |
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | #include <linux/mount.h> | ||
11 | #include <linux/time.h> | 12 | #include <linux/time.h> |
12 | #include <linux/msdos_fs.h> | 13 | #include <linux/msdos_fs.h> |
13 | #include <linux/smp_lock.h> | 14 | #include <linux/smp_lock.h> |
@@ -46,10 +47,9 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, | |||
46 | 47 | ||
47 | mutex_lock(&inode->i_mutex); | 48 | mutex_lock(&inode->i_mutex); |
48 | 49 | ||
49 | if (IS_RDONLY(inode)) { | 50 | err = mnt_want_write(filp->f_path.mnt); |
50 | err = -EROFS; | 51 | if (err) |
51 | goto up; | 52 | goto up_no_drop_write; |
52 | } | ||
53 | 53 | ||
54 | /* | 54 | /* |
55 | * ATTR_VOLUME and ATTR_DIR cannot be changed; this also | 55 | * ATTR_VOLUME and ATTR_DIR cannot be changed; this also |
@@ -105,7 +105,9 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, | |||
105 | 105 | ||
106 | MSDOS_I(inode)->i_attrs = attr & ATTR_UNUSED; | 106 | MSDOS_I(inode)->i_attrs = attr & ATTR_UNUSED; |
107 | mark_inode_dirty(inode); | 107 | mark_inode_dirty(inode); |
108 | up: | 108 | up: |
109 | mnt_drop_write(filp->f_path.mnt); | ||
110 | up_no_drop_write: | ||
109 | mutex_unlock(&inode->i_mutex); | 111 | mutex_unlock(&inode->i_mutex); |
110 | return err; | 112 | return err; |
111 | } | 113 | } |
diff --git a/fs/file_table.c b/fs/file_table.c index 986ff4ed0a7c..7a0a9b872251 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
@@ -42,6 +42,7 @@ static inline void file_free_rcu(struct rcu_head *head) | |||
42 | static inline void file_free(struct file *f) | 42 | static inline void file_free(struct file *f) |
43 | { | 43 | { |
44 | percpu_counter_dec(&nr_files); | 44 | percpu_counter_dec(&nr_files); |
45 | file_check_state(f); | ||
45 | call_rcu(&f->f_u.fu_rcuhead, file_free_rcu); | 46 | call_rcu(&f->f_u.fu_rcuhead, file_free_rcu); |
46 | } | 47 | } |
47 | 48 | ||
@@ -199,6 +200,18 @@ int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry, | |||
199 | file->f_mapping = dentry->d_inode->i_mapping; | 200 | file->f_mapping = dentry->d_inode->i_mapping; |
200 | file->f_mode = mode; | 201 | file->f_mode = mode; |
201 | file->f_op = fop; | 202 | file->f_op = fop; |
203 | |||
204 | /* | ||
205 | * These mounts don't really matter in practice | ||
206 | * for r/o bind mounts. They aren't userspace- | ||
207 | * visible. We do this for consistency, and so | ||
208 | * that we can do debugging checks at __fput() | ||
209 | */ | ||
210 | if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) { | ||
211 | file_take_write(file); | ||
212 | error = mnt_want_write(mnt); | ||
213 | WARN_ON(error); | ||
214 | } | ||
202 | return error; | 215 | return error; |
203 | } | 216 | } |
204 | EXPORT_SYMBOL(init_file); | 217 | EXPORT_SYMBOL(init_file); |
@@ -211,6 +224,31 @@ void fput(struct file *file) | |||
211 | 224 | ||
212 | EXPORT_SYMBOL(fput); | 225 | EXPORT_SYMBOL(fput); |
213 | 226 | ||
227 | /** | ||
228 | * drop_file_write_access - give up ability to write to a file | ||
229 | * @file: the file to which we will stop writing | ||
230 | * | ||
231 | * This is a central place which will give up the ability | ||
232 | * to write to @file, along with access to write through | ||
233 | * its vfsmount. | ||
234 | */ | ||
235 | void drop_file_write_access(struct file *file) | ||
236 | { | ||
237 | struct vfsmount *mnt = file->f_path.mnt; | ||
238 | struct dentry *dentry = file->f_path.dentry; | ||
239 | struct inode *inode = dentry->d_inode; | ||
240 | |||
241 | put_write_access(inode); | ||
242 | |||
243 | if (special_file(inode->i_mode)) | ||
244 | return; | ||
245 | if (file_check_writeable(file) != 0) | ||
246 | return; | ||
247 | mnt_drop_write(mnt); | ||
248 | file_release_write(file); | ||
249 | } | ||
250 | EXPORT_SYMBOL_GPL(drop_file_write_access); | ||
251 | |||
214 | /* __fput is called from task context when aio completion releases the last | 252 | /* __fput is called from task context when aio completion releases the last |
215 | * last use of a struct file *. Do not use otherwise. | 253 | * last use of a struct file *. Do not use otherwise. |
216 | */ | 254 | */ |
@@ -236,10 +274,10 @@ void __fput(struct file *file) | |||
236 | if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL)) | 274 | if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL)) |
237 | cdev_put(inode->i_cdev); | 275 | cdev_put(inode->i_cdev); |
238 | fops_put(file->f_op); | 276 | fops_put(file->f_op); |
239 | if (file->f_mode & FMODE_WRITE) | ||
240 | put_write_access(inode); | ||
241 | put_pid(file->f_owner.pid); | 277 | put_pid(file->f_owner.pid); |
242 | file_kill(file); | 278 | file_kill(file); |
279 | if (file->f_mode & FMODE_WRITE) | ||
280 | drop_file_write_access(file); | ||
243 | file->f_path.dentry = NULL; | 281 | file->f_path.dentry = NULL; |
244 | file->f_path.mnt = NULL; | 282 | file->f_path.mnt = NULL; |
245 | file_free(file); | 283 | file_free(file); |
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index de8e64c03f73..7f7947e3dfbb 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig | |||
@@ -1,6 +1,6 @@ | |||
1 | config GFS2_FS | 1 | config GFS2_FS |
2 | tristate "GFS2 file system support" | 2 | tristate "GFS2 file system support" |
3 | depends on EXPERIMENTAL | 3 | depends on EXPERIMENTAL && (64BIT || (LSF && LBD)) |
4 | select FS_POSIX_ACL | 4 | select FS_POSIX_ACL |
5 | select CRC32 | 5 | select CRC32 |
6 | help | 6 | help |
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile index 8fff11058cee..e2350df02a07 100644 --- a/fs/gfs2/Makefile +++ b/fs/gfs2/Makefile | |||
@@ -1,6 +1,6 @@ | |||
1 | obj-$(CONFIG_GFS2_FS) += gfs2.o | 1 | obj-$(CONFIG_GFS2_FS) += gfs2.o |
2 | gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \ | 2 | gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \ |
3 | glops.o inode.o lm.o log.o lops.o locking.o main.o meta_io.o \ | 3 | glops.o inode.o log.o lops.o locking.o main.o meta_io.o \ |
4 | mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \ | 4 | mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \ |
5 | ops_fstype.o ops_inode.o ops_super.o quota.o \ | 5 | ops_fstype.o ops_inode.o ops_super.o quota.o \ |
6 | recovery.o rgrp.o super.o sys.o trans.o util.o | 6 | recovery.o rgrp.o super.o sys.o trans.o util.o |
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 1047a8c7226a..3e9bd46f27e3 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c | |||
@@ -116,7 +116,7 @@ static int acl_get(struct gfs2_inode *ip, int access, struct posix_acl **acl, | |||
116 | goto out; | 116 | goto out; |
117 | 117 | ||
118 | er.er_data_len = GFS2_EA_DATA_LEN(el->el_ea); | 118 | er.er_data_len = GFS2_EA_DATA_LEN(el->el_ea); |
119 | er.er_data = kmalloc(er.er_data_len, GFP_KERNEL); | 119 | er.er_data = kmalloc(er.er_data_len, GFP_NOFS); |
120 | error = -ENOMEM; | 120 | error = -ENOMEM; |
121 | if (!er.er_data) | 121 | if (!er.er_data) |
122 | goto out; | 122 | goto out; |
@@ -222,7 +222,7 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) | |||
222 | return error; | 222 | return error; |
223 | } | 223 | } |
224 | 224 | ||
225 | clone = posix_acl_clone(acl, GFP_KERNEL); | 225 | clone = posix_acl_clone(acl, GFP_NOFS); |
226 | error = -ENOMEM; | 226 | error = -ENOMEM; |
227 | if (!clone) | 227 | if (!clone) |
228 | goto out; | 228 | goto out; |
@@ -272,7 +272,7 @@ int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr) | |||
272 | if (!acl) | 272 | if (!acl) |
273 | return gfs2_setattr_simple(ip, attr); | 273 | return gfs2_setattr_simple(ip, attr); |
274 | 274 | ||
275 | clone = posix_acl_clone(acl, GFP_KERNEL); | 275 | clone = posix_acl_clone(acl, GFP_NOFS); |
276 | error = -ENOMEM; | 276 | error = -ENOMEM; |
277 | if (!clone) | 277 | if (!clone) |
278 | goto out; | 278 | goto out; |
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index e9456ebd3bb6..c19184f2e70e 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c | |||
@@ -33,6 +33,7 @@ | |||
33 | * keep it small. | 33 | * keep it small. |
34 | */ | 34 | */ |
35 | struct metapath { | 35 | struct metapath { |
36 | struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT]; | ||
36 | __u16 mp_list[GFS2_MAX_META_HEIGHT]; | 37 | __u16 mp_list[GFS2_MAX_META_HEIGHT]; |
37 | }; | 38 | }; |
38 | 39 | ||
@@ -135,9 +136,10 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page) | |||
135 | /* Get a free block, fill it with the stuffed data, | 136 | /* Get a free block, fill it with the stuffed data, |
136 | and write it out to disk */ | 137 | and write it out to disk */ |
137 | 138 | ||
139 | unsigned int n = 1; | ||
140 | block = gfs2_alloc_block(ip, &n); | ||
138 | if (isdir) { | 141 | if (isdir) { |
139 | block = gfs2_alloc_meta(ip); | 142 | gfs2_trans_add_unrevoke(GFS2_SB(&ip->i_inode), block, 1); |
140 | |||
141 | error = gfs2_dir_get_new_buffer(ip, block, &bh); | 143 | error = gfs2_dir_get_new_buffer(ip, block, &bh); |
142 | if (error) | 144 | if (error) |
143 | goto out_brelse; | 145 | goto out_brelse; |
@@ -145,8 +147,6 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page) | |||
145 | dibh, sizeof(struct gfs2_dinode)); | 147 | dibh, sizeof(struct gfs2_dinode)); |
146 | brelse(bh); | 148 | brelse(bh); |
147 | } else { | 149 | } else { |
148 | block = gfs2_alloc_data(ip); | ||
149 | |||
150 | error = gfs2_unstuffer_page(ip, dibh, block, page); | 150 | error = gfs2_unstuffer_page(ip, dibh, block, page); |
151 | if (error) | 151 | if (error) |
152 | goto out_brelse; | 152 | goto out_brelse; |
@@ -161,12 +161,11 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page) | |||
161 | 161 | ||
162 | if (ip->i_di.di_size) { | 162 | if (ip->i_di.di_size) { |
163 | *(__be64 *)(di + 1) = cpu_to_be64(block); | 163 | *(__be64 *)(di + 1) = cpu_to_be64(block); |
164 | ip->i_di.di_blocks++; | 164 | gfs2_add_inode_blocks(&ip->i_inode, 1); |
165 | gfs2_set_inode_blocks(&ip->i_inode); | 165 | di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); |
166 | di->di_blocks = cpu_to_be64(ip->i_di.di_blocks); | ||
167 | } | 166 | } |
168 | 167 | ||
169 | ip->i_di.di_height = 1; | 168 | ip->i_height = 1; |
170 | di->di_height = cpu_to_be16(1); | 169 | di->di_height = cpu_to_be16(1); |
171 | 170 | ||
172 | out_brelse: | 171 | out_brelse: |
@@ -176,114 +175,13 @@ out: | |||
176 | return error; | 175 | return error; |
177 | } | 176 | } |
178 | 177 | ||
179 | /** | ||
180 | * calc_tree_height - Calculate the height of a metadata tree | ||
181 | * @ip: The GFS2 inode | ||
182 | * @size: The proposed size of the file | ||
183 | * | ||
184 | * Work out how tall a metadata tree needs to be in order to accommodate a | ||
185 | * file of a particular size. If size is less than the current size of | ||
186 | * the inode, then the current size of the inode is used instead of the | ||
187 | * supplied one. | ||
188 | * | ||
189 | * Returns: the height the tree should be | ||
190 | */ | ||
191 | |||
192 | static unsigned int calc_tree_height(struct gfs2_inode *ip, u64 size) | ||
193 | { | ||
194 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
195 | u64 *arr; | ||
196 | unsigned int max, height; | ||
197 | |||
198 | if (ip->i_di.di_size > size) | ||
199 | size = ip->i_di.di_size; | ||
200 | |||
201 | if (gfs2_is_dir(ip)) { | ||
202 | arr = sdp->sd_jheightsize; | ||
203 | max = sdp->sd_max_jheight; | ||
204 | } else { | ||
205 | arr = sdp->sd_heightsize; | ||
206 | max = sdp->sd_max_height; | ||
207 | } | ||
208 | |||
209 | for (height = 0; height < max; height++) | ||
210 | if (arr[height] >= size) | ||
211 | break; | ||
212 | |||
213 | return height; | ||
214 | } | ||
215 | |||
216 | /** | ||
217 | * build_height - Build a metadata tree of the requested height | ||
218 | * @ip: The GFS2 inode | ||
219 | * @height: The height to build to | ||
220 | * | ||
221 | * | ||
222 | * Returns: errno | ||
223 | */ | ||
224 | |||
225 | static int build_height(struct inode *inode, unsigned height) | ||
226 | { | ||
227 | struct gfs2_inode *ip = GFS2_I(inode); | ||
228 | unsigned new_height = height - ip->i_di.di_height; | ||
229 | struct buffer_head *dibh; | ||
230 | struct buffer_head *blocks[GFS2_MAX_META_HEIGHT]; | ||
231 | struct gfs2_dinode *di; | ||
232 | int error; | ||
233 | __be64 *bp; | ||
234 | u64 bn; | ||
235 | unsigned n; | ||
236 | |||
237 | if (height <= ip->i_di.di_height) | ||
238 | return 0; | ||
239 | |||
240 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
241 | if (error) | ||
242 | return error; | ||
243 | |||
244 | for(n = 0; n < new_height; n++) { | ||
245 | bn = gfs2_alloc_meta(ip); | ||
246 | blocks[n] = gfs2_meta_new(ip->i_gl, bn); | ||
247 | gfs2_trans_add_bh(ip->i_gl, blocks[n], 1); | ||
248 | } | ||
249 | |||
250 | n = 0; | ||
251 | bn = blocks[0]->b_blocknr; | ||
252 | if (new_height > 1) { | ||
253 | for(; n < new_height-1; n++) { | ||
254 | gfs2_metatype_set(blocks[n], GFS2_METATYPE_IN, | ||
255 | GFS2_FORMAT_IN); | ||
256 | gfs2_buffer_clear_tail(blocks[n], | ||
257 | sizeof(struct gfs2_meta_header)); | ||
258 | bp = (__be64 *)(blocks[n]->b_data + | ||
259 | sizeof(struct gfs2_meta_header)); | ||
260 | *bp = cpu_to_be64(blocks[n+1]->b_blocknr); | ||
261 | brelse(blocks[n]); | ||
262 | blocks[n] = NULL; | ||
263 | } | ||
264 | } | ||
265 | gfs2_metatype_set(blocks[n], GFS2_METATYPE_IN, GFS2_FORMAT_IN); | ||
266 | gfs2_buffer_copy_tail(blocks[n], sizeof(struct gfs2_meta_header), | ||
267 | dibh, sizeof(struct gfs2_dinode)); | ||
268 | brelse(blocks[n]); | ||
269 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
270 | di = (struct gfs2_dinode *)dibh->b_data; | ||
271 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); | ||
272 | *(__be64 *)(di + 1) = cpu_to_be64(bn); | ||
273 | ip->i_di.di_height += new_height; | ||
274 | ip->i_di.di_blocks += new_height; | ||
275 | gfs2_set_inode_blocks(&ip->i_inode); | ||
276 | di->di_height = cpu_to_be16(ip->i_di.di_height); | ||
277 | di->di_blocks = cpu_to_be64(ip->i_di.di_blocks); | ||
278 | brelse(dibh); | ||
279 | return error; | ||
280 | } | ||
281 | 178 | ||
282 | /** | 179 | /** |
283 | * find_metapath - Find path through the metadata tree | 180 | * find_metapath - Find path through the metadata tree |
284 | * @ip: The inode pointer | 181 | * @sdp: The superblock |
285 | * @mp: The metapath to return the result in | 182 | * @mp: The metapath to return the result in |
286 | * @block: The disk block to look up | 183 | * @block: The disk block to look up |
184 | * @height: The pre-calculated height of the metadata tree | ||
287 | * | 185 | * |
288 | * This routine returns a struct metapath structure that defines a path | 186 | * This routine returns a struct metapath structure that defines a path |
289 | * through the metadata of inode "ip" to get to block "block". | 187 | * through the metadata of inode "ip" to get to block "block". |
@@ -338,21 +236,29 @@ static int build_height(struct inode *inode, unsigned height) | |||
338 | * | 236 | * |
339 | */ | 237 | */ |
340 | 238 | ||
341 | static void find_metapath(struct gfs2_inode *ip, u64 block, | 239 | static void find_metapath(const struct gfs2_sbd *sdp, u64 block, |
342 | struct metapath *mp) | 240 | struct metapath *mp, unsigned int height) |
343 | { | 241 | { |
344 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
345 | u64 b = block; | ||
346 | unsigned int i; | 242 | unsigned int i; |
347 | 243 | ||
348 | for (i = ip->i_di.di_height; i--;) | 244 | for (i = height; i--;) |
349 | mp->mp_list[i] = do_div(b, sdp->sd_inptrs); | 245 | mp->mp_list[i] = do_div(block, sdp->sd_inptrs); |
246 | |||
247 | } | ||
350 | 248 | ||
249 | static inline unsigned int zero_metapath_length(const struct metapath *mp, | ||
250 | unsigned height) | ||
251 | { | ||
252 | unsigned int i; | ||
253 | for (i = 0; i < height - 1; i++) { | ||
254 | if (mp->mp_list[i] != 0) | ||
255 | return i; | ||
256 | } | ||
257 | return height; | ||
351 | } | 258 | } |
352 | 259 | ||
353 | /** | 260 | /** |
354 | * metapointer - Return pointer to start of metadata in a buffer | 261 | * metapointer - Return pointer to start of metadata in a buffer |
355 | * @bh: The buffer | ||
356 | * @height: The metadata height (0 = dinode) | 262 | * @height: The metadata height (0 = dinode) |
357 | * @mp: The metapath | 263 | * @mp: The metapath |
358 | * | 264 | * |
@@ -361,93 +267,302 @@ static void find_metapath(struct gfs2_inode *ip, u64 block, | |||
361 | * metadata tree. | 267 | * metadata tree. |
362 | */ | 268 | */ |
363 | 269 | ||
364 | static inline __be64 *metapointer(struct buffer_head *bh, int *boundary, | 270 | static inline __be64 *metapointer(unsigned int height, const struct metapath *mp) |
365 | unsigned int height, const struct metapath *mp) | ||
366 | { | 271 | { |
272 | struct buffer_head *bh = mp->mp_bh[height]; | ||
367 | unsigned int head_size = (height > 0) ? | 273 | unsigned int head_size = (height > 0) ? |
368 | sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode); | 274 | sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode); |
369 | __be64 *ptr; | 275 | return ((__be64 *)(bh->b_data + head_size)) + mp->mp_list[height]; |
370 | *boundary = 0; | ||
371 | ptr = ((__be64 *)(bh->b_data + head_size)) + mp->mp_list[height]; | ||
372 | if (ptr + 1 == (__be64 *)(bh->b_data + bh->b_size)) | ||
373 | *boundary = 1; | ||
374 | return ptr; | ||
375 | } | 276 | } |
376 | 277 | ||
377 | /** | 278 | /** |
378 | * lookup_block - Get the next metadata block in metadata tree | 279 | * lookup_metapath - Walk the metadata tree to a specific point |
379 | * @ip: The GFS2 inode | 280 | * @ip: The inode |
380 | * @bh: Buffer containing the pointers to metadata blocks | ||
381 | * @height: The height of the tree (0 = dinode) | ||
382 | * @mp: The metapath | 281 | * @mp: The metapath |
383 | * @create: Non-zero if we may create a new meatdata block | ||
384 | * @new: Used to indicate if we did create a new metadata block | ||
385 | * @block: the returned disk block number | ||
386 | * | 282 | * |
387 | * Given a metatree, complete to a particular height, checks to see if the next | 283 | * Assumes that the inode's buffer has already been looked up and |
388 | * height of the tree exists. If not the next height of the tree is created. | 284 | * hooked onto mp->mp_bh[0] and that the metapath has been initialised |
389 | * The block number of the next height of the metadata tree is returned. | 285 | * by find_metapath(). |
286 | * | ||
287 | * If this function encounters part of the tree which has not been | ||
288 | * allocated, it returns the current height of the tree at the point | ||
289 | * at which it found the unallocated block. Blocks which are found are | ||
290 | * added to the mp->mp_bh[] list. | ||
390 | * | 291 | * |
292 | * Returns: error or height of metadata tree | ||
391 | */ | 293 | */ |
392 | 294 | ||
393 | static int lookup_block(struct gfs2_inode *ip, struct buffer_head *bh, | 295 | static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp) |
394 | unsigned int height, struct metapath *mp, int create, | ||
395 | int *new, u64 *block) | ||
396 | { | 296 | { |
397 | int boundary; | 297 | unsigned int end_of_metadata = ip->i_height - 1; |
398 | __be64 *ptr = metapointer(bh, &boundary, height, mp); | 298 | unsigned int x; |
299 | __be64 *ptr; | ||
300 | u64 dblock; | ||
301 | int ret; | ||
399 | 302 | ||
400 | if (*ptr) { | 303 | for (x = 0; x < end_of_metadata; x++) { |
401 | *block = be64_to_cpu(*ptr); | 304 | ptr = metapointer(x, mp); |
402 | return boundary; | 305 | dblock = be64_to_cpu(*ptr); |
403 | } | 306 | if (!dblock) |
307 | return x + 1; | ||
404 | 308 | ||
405 | *block = 0; | 309 | ret = gfs2_meta_indirect_buffer(ip, x+1, dblock, 0, &mp->mp_bh[x+1]); |
310 | if (ret) | ||
311 | return ret; | ||
312 | } | ||
406 | 313 | ||
407 | if (!create) | 314 | return ip->i_height; |
408 | return 0; | 315 | } |
409 | 316 | ||
410 | if (height == ip->i_di.di_height - 1 && !gfs2_is_dir(ip)) | 317 | static inline void release_metapath(struct metapath *mp) |
411 | *block = gfs2_alloc_data(ip); | 318 | { |
412 | else | 319 | int i; |
413 | *block = gfs2_alloc_meta(ip); | ||
414 | 320 | ||
415 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | 321 | for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) { |
322 | if (mp->mp_bh[i] == NULL) | ||
323 | break; | ||
324 | brelse(mp->mp_bh[i]); | ||
325 | } | ||
326 | } | ||
416 | 327 | ||
417 | *ptr = cpu_to_be64(*block); | 328 | /** |
418 | ip->i_di.di_blocks++; | 329 | * gfs2_extent_length - Returns length of an extent of blocks |
419 | gfs2_set_inode_blocks(&ip->i_inode); | 330 | * @start: Start of the buffer |
331 | * @len: Length of the buffer in bytes | ||
332 | * @ptr: Current position in the buffer | ||
333 | * @limit: Max extent length to return (0 = unlimited) | ||
334 | * @eob: Set to 1 if we hit "end of block" | ||
335 | * | ||
336 | * If the first block is zero (unallocated) it will return the number of | ||
337 | * unallocated blocks in the extent, otherwise it will return the number | ||
338 | * of contiguous blocks in the extent. | ||
339 | * | ||
340 | * Returns: The length of the extent (minimum of one block) | ||
341 | */ | ||
420 | 342 | ||
421 | *new = 1; | 343 | static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __be64 *ptr, unsigned limit, int *eob) |
422 | return 0; | 344 | { |
345 | const __be64 *end = (start + len); | ||
346 | const __be64 *first = ptr; | ||
347 | u64 d = be64_to_cpu(*ptr); | ||
348 | |||
349 | *eob = 0; | ||
350 | do { | ||
351 | ptr++; | ||
352 | if (ptr >= end) | ||
353 | break; | ||
354 | if (limit && --limit == 0) | ||
355 | break; | ||
356 | if (d) | ||
357 | d++; | ||
358 | } while(be64_to_cpu(*ptr) == d); | ||
359 | if (ptr >= end) | ||
360 | *eob = 1; | ||
361 | return (ptr - first); | ||
423 | } | 362 | } |
424 | 363 | ||
425 | static inline void bmap_lock(struct inode *inode, int create) | 364 | static inline void bmap_lock(struct gfs2_inode *ip, int create) |
426 | { | 365 | { |
427 | struct gfs2_inode *ip = GFS2_I(inode); | ||
428 | if (create) | 366 | if (create) |
429 | down_write(&ip->i_rw_mutex); | 367 | down_write(&ip->i_rw_mutex); |
430 | else | 368 | else |
431 | down_read(&ip->i_rw_mutex); | 369 | down_read(&ip->i_rw_mutex); |
432 | } | 370 | } |
433 | 371 | ||
434 | static inline void bmap_unlock(struct inode *inode, int create) | 372 | static inline void bmap_unlock(struct gfs2_inode *ip, int create) |
435 | { | 373 | { |
436 | struct gfs2_inode *ip = GFS2_I(inode); | ||
437 | if (create) | 374 | if (create) |
438 | up_write(&ip->i_rw_mutex); | 375 | up_write(&ip->i_rw_mutex); |
439 | else | 376 | else |
440 | up_read(&ip->i_rw_mutex); | 377 | up_read(&ip->i_rw_mutex); |
441 | } | 378 | } |
442 | 379 | ||
380 | static inline __be64 *gfs2_indirect_init(struct metapath *mp, | ||
381 | struct gfs2_glock *gl, unsigned int i, | ||
382 | unsigned offset, u64 bn) | ||
383 | { | ||
384 | __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data + | ||
385 | ((i > 1) ? sizeof(struct gfs2_meta_header) : | ||
386 | sizeof(struct gfs2_dinode))); | ||
387 | BUG_ON(i < 1); | ||
388 | BUG_ON(mp->mp_bh[i] != NULL); | ||
389 | mp->mp_bh[i] = gfs2_meta_new(gl, bn); | ||
390 | gfs2_trans_add_bh(gl, mp->mp_bh[i], 1); | ||
391 | gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN); | ||
392 | gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header)); | ||
393 | ptr += offset; | ||
394 | *ptr = cpu_to_be64(bn); | ||
395 | return ptr; | ||
396 | } | ||
397 | |||
398 | enum alloc_state { | ||
399 | ALLOC_DATA = 0, | ||
400 | ALLOC_GROW_DEPTH = 1, | ||
401 | ALLOC_GROW_HEIGHT = 2, | ||
402 | /* ALLOC_UNSTUFF = 3, TBD and rather complicated */ | ||
403 | }; | ||
404 | |||
405 | /** | ||
406 | * gfs2_bmap_alloc - Build a metadata tree of the requested height | ||
407 | * @inode: The GFS2 inode | ||
408 | * @lblock: The logical starting block of the extent | ||
409 | * @bh_map: This is used to return the mapping details | ||
410 | * @mp: The metapath | ||
411 | * @sheight: The starting height (i.e. whats already mapped) | ||
412 | * @height: The height to build to | ||
413 | * @maxlen: The max number of data blocks to alloc | ||
414 | * | ||
415 | * In this routine we may have to alloc: | ||
416 | * i) Indirect blocks to grow the metadata tree height | ||
417 | * ii) Indirect blocks to fill in lower part of the metadata tree | ||
418 | * iii) Data blocks | ||
419 | * | ||
420 | * The function is in two parts. The first part works out the total | ||
421 | * number of blocks which we need. The second part does the actual | ||
422 | * allocation asking for an extent at a time (if enough contiguous free | ||
423 | * blocks are available, there will only be one request per bmap call) | ||
424 | * and uses the state machine to initialise the blocks in order. | ||
425 | * | ||
426 | * Returns: errno on error | ||
427 | */ | ||
428 | |||
429 | static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, | ||
430 | struct buffer_head *bh_map, struct metapath *mp, | ||
431 | const unsigned int sheight, | ||
432 | const unsigned int height, | ||
433 | const unsigned int maxlen) | ||
434 | { | ||
435 | struct gfs2_inode *ip = GFS2_I(inode); | ||
436 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
437 | struct buffer_head *dibh = mp->mp_bh[0]; | ||
438 | u64 bn, dblock = 0; | ||
439 | unsigned n, i, blks, alloced = 0, iblks = 0, zmpl = 0; | ||
440 | unsigned dblks = 0; | ||
441 | unsigned ptrs_per_blk; | ||
442 | const unsigned end_of_metadata = height - 1; | ||
443 | int eob = 0; | ||
444 | enum alloc_state state; | ||
445 | __be64 *ptr; | ||
446 | __be64 zero_bn = 0; | ||
447 | |||
448 | BUG_ON(sheight < 1); | ||
449 | BUG_ON(dibh == NULL); | ||
450 | |||
451 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
452 | |||
453 | if (height == sheight) { | ||
454 | struct buffer_head *bh; | ||
455 | /* Bottom indirect block exists, find unalloced extent size */ | ||
456 | ptr = metapointer(end_of_metadata, mp); | ||
457 | bh = mp->mp_bh[end_of_metadata]; | ||
458 | dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr, maxlen, | ||
459 | &eob); | ||
460 | BUG_ON(dblks < 1); | ||
461 | state = ALLOC_DATA; | ||
462 | } else { | ||
463 | /* Need to allocate indirect blocks */ | ||
464 | ptrs_per_blk = height > 1 ? sdp->sd_inptrs : sdp->sd_diptrs; | ||
465 | dblks = min(maxlen, ptrs_per_blk - mp->mp_list[end_of_metadata]); | ||
466 | if (height == ip->i_height) { | ||
467 | /* Writing into existing tree, extend tree down */ | ||
468 | iblks = height - sheight; | ||
469 | state = ALLOC_GROW_DEPTH; | ||
470 | } else { | ||
471 | /* Building up tree height */ | ||
472 | state = ALLOC_GROW_HEIGHT; | ||
473 | iblks = height - ip->i_height; | ||
474 | zmpl = zero_metapath_length(mp, height); | ||
475 | iblks -= zmpl; | ||
476 | iblks += height; | ||
477 | } | ||
478 | } | ||
479 | |||
480 | /* start of the second part of the function (state machine) */ | ||
481 | |||
482 | blks = dblks + iblks; | ||
483 | i = sheight; | ||
484 | do { | ||
485 | n = blks - alloced; | ||
486 | bn = gfs2_alloc_block(ip, &n); | ||
487 | alloced += n; | ||
488 | if (state != ALLOC_DATA || gfs2_is_jdata(ip)) | ||
489 | gfs2_trans_add_unrevoke(sdp, bn, n); | ||
490 | switch (state) { | ||
491 | /* Growing height of tree */ | ||
492 | case ALLOC_GROW_HEIGHT: | ||
493 | if (i == 1) { | ||
494 | ptr = (__be64 *)(dibh->b_data + | ||
495 | sizeof(struct gfs2_dinode)); | ||
496 | zero_bn = *ptr; | ||
497 | } | ||
498 | for (; i - 1 < height - ip->i_height && n > 0; i++, n--) | ||
499 | gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++); | ||
500 | if (i - 1 == height - ip->i_height) { | ||
501 | i--; | ||
502 | gfs2_buffer_copy_tail(mp->mp_bh[i], | ||
503 | sizeof(struct gfs2_meta_header), | ||
504 | dibh, sizeof(struct gfs2_dinode)); | ||
505 | gfs2_buffer_clear_tail(dibh, | ||
506 | sizeof(struct gfs2_dinode) + | ||
507 | sizeof(__be64)); | ||
508 | ptr = (__be64 *)(mp->mp_bh[i]->b_data + | ||
509 | sizeof(struct gfs2_meta_header)); | ||
510 | *ptr = zero_bn; | ||
511 | state = ALLOC_GROW_DEPTH; | ||
512 | for(i = zmpl; i < height; i++) { | ||
513 | if (mp->mp_bh[i] == NULL) | ||
514 | break; | ||
515 | brelse(mp->mp_bh[i]); | ||
516 | mp->mp_bh[i] = NULL; | ||
517 | } | ||
518 | i = zmpl; | ||
519 | } | ||
520 | if (n == 0) | ||
521 | break; | ||
522 | /* Branching from existing tree */ | ||
523 | case ALLOC_GROW_DEPTH: | ||
524 | if (i > 1 && i < height) | ||
525 | gfs2_trans_add_bh(ip->i_gl, mp->mp_bh[i-1], 1); | ||
526 | for (; i < height && n > 0; i++, n--) | ||
527 | gfs2_indirect_init(mp, ip->i_gl, i, | ||
528 | mp->mp_list[i-1], bn++); | ||
529 | if (i == height) | ||
530 | state = ALLOC_DATA; | ||
531 | if (n == 0) | ||
532 | break; | ||
533 | /* Tree complete, adding data blocks */ | ||
534 | case ALLOC_DATA: | ||
535 | BUG_ON(n > dblks); | ||
536 | BUG_ON(mp->mp_bh[end_of_metadata] == NULL); | ||
537 | gfs2_trans_add_bh(ip->i_gl, mp->mp_bh[end_of_metadata], 1); | ||
538 | dblks = n; | ||
539 | ptr = metapointer(end_of_metadata, mp); | ||
540 | dblock = bn; | ||
541 | while (n-- > 0) | ||
542 | *ptr++ = cpu_to_be64(bn++); | ||
543 | break; | ||
544 | } | ||
545 | } while (state != ALLOC_DATA); | ||
546 | |||
547 | ip->i_height = height; | ||
548 | gfs2_add_inode_blocks(&ip->i_inode, alloced); | ||
549 | gfs2_dinode_out(ip, mp->mp_bh[0]->b_data); | ||
550 | map_bh(bh_map, inode->i_sb, dblock); | ||
551 | bh_map->b_size = dblks << inode->i_blkbits; | ||
552 | set_buffer_new(bh_map); | ||
553 | return 0; | ||
554 | } | ||
555 | |||
443 | /** | 556 | /** |
444 | * gfs2_block_map - Map a block from an inode to a disk block | 557 | * gfs2_block_map - Map a block from an inode to a disk block |
445 | * @inode: The inode | 558 | * @inode: The inode |
446 | * @lblock: The logical block number | 559 | * @lblock: The logical block number |
447 | * @bh_map: The bh to be mapped | 560 | * @bh_map: The bh to be mapped |
561 | * @create: True if its ok to alloc blocks to satify the request | ||
448 | * | 562 | * |
449 | * Find the block number on the current device which corresponds to an | 563 | * Sets buffer_mapped() if successful, sets buffer_boundary() if a |
450 | * inode's block. If the block had to be created, "new" will be set. | 564 | * read of metadata will be required before the next block can be |
565 | * mapped. Sets buffer_new() if new blocks were allocated. | ||
451 | * | 566 | * |
452 | * Returns: errno | 567 | * Returns: errno |
453 | */ | 568 | */ |
@@ -457,97 +572,78 @@ int gfs2_block_map(struct inode *inode, sector_t lblock, | |||
457 | { | 572 | { |
458 | struct gfs2_inode *ip = GFS2_I(inode); | 573 | struct gfs2_inode *ip = GFS2_I(inode); |
459 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 574 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
460 | struct buffer_head *bh; | 575 | unsigned int bsize = sdp->sd_sb.sb_bsize; |
461 | unsigned int bsize; | 576 | const unsigned int maxlen = bh_map->b_size >> inode->i_blkbits; |
462 | unsigned int height; | 577 | const u64 *arr = sdp->sd_heightsize; |
463 | unsigned int end_of_metadata; | 578 | __be64 *ptr; |
464 | unsigned int x; | ||
465 | int error = 0; | ||
466 | int new = 0; | ||
467 | u64 dblock = 0; | ||
468 | int boundary; | ||
469 | unsigned int maxlen = bh_map->b_size >> inode->i_blkbits; | ||
470 | struct metapath mp; | ||
471 | u64 size; | 579 | u64 size; |
472 | struct buffer_head *dibh = NULL; | 580 | struct metapath mp; |
581 | int ret; | ||
582 | int eob; | ||
583 | unsigned int len; | ||
584 | struct buffer_head *bh; | ||
585 | u8 height; | ||
473 | 586 | ||
474 | BUG_ON(maxlen == 0); | 587 | BUG_ON(maxlen == 0); |
475 | 588 | ||
476 | if (gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip))) | 589 | memset(mp.mp_bh, 0, sizeof(mp.mp_bh)); |
477 | return 0; | 590 | bmap_lock(ip, create); |
478 | |||
479 | bmap_lock(inode, create); | ||
480 | clear_buffer_mapped(bh_map); | 591 | clear_buffer_mapped(bh_map); |
481 | clear_buffer_new(bh_map); | 592 | clear_buffer_new(bh_map); |
482 | clear_buffer_boundary(bh_map); | 593 | clear_buffer_boundary(bh_map); |
483 | bsize = gfs2_is_dir(ip) ? sdp->sd_jbsize : sdp->sd_sb.sb_bsize; | 594 | if (gfs2_is_dir(ip)) { |
484 | size = (lblock + 1) * bsize; | 595 | bsize = sdp->sd_jbsize; |
485 | 596 | arr = sdp->sd_jheightsize; | |
486 | if (size > ip->i_di.di_size) { | ||
487 | height = calc_tree_height(ip, size); | ||
488 | if (ip->i_di.di_height < height) { | ||
489 | if (!create) | ||
490 | goto out_ok; | ||
491 | |||
492 | error = build_height(inode, height); | ||
493 | if (error) | ||
494 | goto out_fail; | ||
495 | } | ||
496 | } | 597 | } |
497 | 598 | ||
498 | find_metapath(ip, lblock, &mp); | 599 | ret = gfs2_meta_inode_buffer(ip, &mp.mp_bh[0]); |
499 | end_of_metadata = ip->i_di.di_height - 1; | 600 | if (ret) |
500 | error = gfs2_meta_inode_buffer(ip, &bh); | 601 | goto out; |
501 | if (error) | ||
502 | goto out_fail; | ||
503 | dibh = bh; | ||
504 | get_bh(dibh); | ||
505 | 602 | ||
506 | for (x = 0; x < end_of_metadata; x++) { | 603 | height = ip->i_height; |
507 | lookup_block(ip, bh, x, &mp, create, &new, &dblock); | 604 | size = (lblock + 1) * bsize; |
508 | brelse(bh); | 605 | while (size > arr[height]) |
509 | if (!dblock) | 606 | height++; |
510 | goto out_ok; | 607 | find_metapath(sdp, lblock, &mp, height); |
608 | ret = 1; | ||
609 | if (height > ip->i_height || gfs2_is_stuffed(ip)) | ||
610 | goto do_alloc; | ||
611 | ret = lookup_metapath(ip, &mp); | ||
612 | if (ret < 0) | ||
613 | goto out; | ||
614 | if (ret != ip->i_height) | ||
615 | goto do_alloc; | ||
616 | ptr = metapointer(ip->i_height - 1, &mp); | ||
617 | if (*ptr == 0) | ||
618 | goto do_alloc; | ||
619 | map_bh(bh_map, inode->i_sb, be64_to_cpu(*ptr)); | ||
620 | bh = mp.mp_bh[ip->i_height - 1]; | ||
621 | len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, maxlen, &eob); | ||
622 | bh_map->b_size = (len << inode->i_blkbits); | ||
623 | if (eob) | ||
624 | set_buffer_boundary(bh_map); | ||
625 | ret = 0; | ||
626 | out: | ||
627 | release_metapath(&mp); | ||
628 | bmap_unlock(ip, create); | ||
629 | return ret; | ||
511 | 630 | ||
512 | error = gfs2_meta_indirect_buffer(ip, x+1, dblock, new, &bh); | 631 | do_alloc: |
513 | if (error) | 632 | /* All allocations are done here, firstly check create flag */ |
514 | goto out_fail; | 633 | if (!create) { |
634 | BUG_ON(gfs2_is_stuffed(ip)); | ||
635 | ret = 0; | ||
636 | goto out; | ||
515 | } | 637 | } |
516 | 638 | ||
517 | boundary = lookup_block(ip, bh, end_of_metadata, &mp, create, &new, &dblock); | 639 | /* At this point ret is the tree depth of already allocated blocks */ |
518 | if (dblock) { | 640 | ret = gfs2_bmap_alloc(inode, lblock, bh_map, &mp, ret, height, maxlen); |
519 | map_bh(bh_map, inode->i_sb, dblock); | 641 | goto out; |
520 | if (boundary) | ||
521 | set_buffer_boundary(bh_map); | ||
522 | if (new) { | ||
523 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
524 | gfs2_dinode_out(ip, dibh->b_data); | ||
525 | set_buffer_new(bh_map); | ||
526 | goto out_brelse; | ||
527 | } | ||
528 | while(--maxlen && !buffer_boundary(bh_map)) { | ||
529 | u64 eblock; | ||
530 | |||
531 | mp.mp_list[end_of_metadata]++; | ||
532 | boundary = lookup_block(ip, bh, end_of_metadata, &mp, 0, &new, &eblock); | ||
533 | if (eblock != ++dblock) | ||
534 | break; | ||
535 | bh_map->b_size += (1 << inode->i_blkbits); | ||
536 | if (boundary) | ||
537 | set_buffer_boundary(bh_map); | ||
538 | } | ||
539 | } | ||
540 | out_brelse: | ||
541 | brelse(bh); | ||
542 | out_ok: | ||
543 | error = 0; | ||
544 | out_fail: | ||
545 | if (dibh) | ||
546 | brelse(dibh); | ||
547 | bmap_unlock(inode, create); | ||
548 | return error; | ||
549 | } | 642 | } |
550 | 643 | ||
644 | /* | ||
645 | * Deprecated: do not use in new code | ||
646 | */ | ||
551 | int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen) | 647 | int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen) |
552 | { | 648 | { |
553 | struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 }; | 649 | struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 }; |
@@ -558,7 +654,7 @@ int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsi | |||
558 | BUG_ON(!dblock); | 654 | BUG_ON(!dblock); |
559 | BUG_ON(!new); | 655 | BUG_ON(!new); |
560 | 656 | ||
561 | bh.b_size = 1 << (inode->i_blkbits + 5); | 657 | bh.b_size = 1 << (inode->i_blkbits + (create ? 0 : 5)); |
562 | ret = gfs2_block_map(inode, lblock, &bh, create); | 658 | ret = gfs2_block_map(inode, lblock, &bh, create); |
563 | *extlen = bh.b_size >> inode->i_blkbits; | 659 | *extlen = bh.b_size >> inode->i_blkbits; |
564 | *dblock = bh.b_blocknr; | 660 | *dblock = bh.b_blocknr; |
@@ -621,7 +717,7 @@ static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh, | |||
621 | if (error) | 717 | if (error) |
622 | goto out; | 718 | goto out; |
623 | 719 | ||
624 | if (height < ip->i_di.di_height - 1) | 720 | if (height < ip->i_height - 1) |
625 | for (; top < bottom; top++, first = 0) { | 721 | for (; top < bottom; top++, first = 0) { |
626 | if (!*top) | 722 | if (!*top) |
627 | continue; | 723 | continue; |
@@ -679,7 +775,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, | |||
679 | sm->sm_first = 0; | 775 | sm->sm_first = 0; |
680 | } | 776 | } |
681 | 777 | ||
682 | metadata = (height != ip->i_di.di_height - 1); | 778 | metadata = (height != ip->i_height - 1); |
683 | if (metadata) | 779 | if (metadata) |
684 | revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; | 780 | revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; |
685 | 781 | ||
@@ -713,7 +809,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, | |||
713 | else | 809 | else |
714 | goto out; /* Nothing to do */ | 810 | goto out; /* Nothing to do */ |
715 | 811 | ||
716 | gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0); | 812 | gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE); |
717 | 813 | ||
718 | for (x = 0; x < rlist.rl_rgrps; x++) { | 814 | for (x = 0; x < rlist.rl_rgrps; x++) { |
719 | struct gfs2_rgrpd *rgd; | 815 | struct gfs2_rgrpd *rgd; |
@@ -760,10 +856,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, | |||
760 | } | 856 | } |
761 | 857 | ||
762 | *p = 0; | 858 | *p = 0; |
763 | if (!ip->i_di.di_blocks) | 859 | gfs2_add_inode_blocks(&ip->i_inode, -1); |
764 | gfs2_consist_inode(ip); | ||
765 | ip->i_di.di_blocks--; | ||
766 | gfs2_set_inode_blocks(&ip->i_inode); | ||
767 | } | 860 | } |
768 | if (bstart) { | 861 | if (bstart) { |
769 | if (metadata) | 862 | if (metadata) |
@@ -804,19 +897,16 @@ static int do_grow(struct gfs2_inode *ip, u64 size) | |||
804 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 897 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
805 | struct gfs2_alloc *al; | 898 | struct gfs2_alloc *al; |
806 | struct buffer_head *dibh; | 899 | struct buffer_head *dibh; |
807 | unsigned int h; | ||
808 | int error; | 900 | int error; |
809 | 901 | ||
810 | al = gfs2_alloc_get(ip); | 902 | al = gfs2_alloc_get(ip); |
903 | if (!al) | ||
904 | return -ENOMEM; | ||
811 | 905 | ||
812 | error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | 906 | error = gfs2_quota_lock_check(ip); |
813 | if (error) | 907 | if (error) |
814 | goto out; | 908 | goto out; |
815 | 909 | ||
816 | error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); | ||
817 | if (error) | ||
818 | goto out_gunlock_q; | ||
819 | |||
820 | al->al_requested = sdp->sd_max_height + RES_DATA; | 910 | al->al_requested = sdp->sd_max_height + RES_DATA; |
821 | 911 | ||
822 | error = gfs2_inplace_reserve(ip); | 912 | error = gfs2_inplace_reserve(ip); |
@@ -829,34 +919,25 @@ static int do_grow(struct gfs2_inode *ip, u64 size) | |||
829 | if (error) | 919 | if (error) |
830 | goto out_ipres; | 920 | goto out_ipres; |
831 | 921 | ||
922 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
923 | if (error) | ||
924 | goto out_end_trans; | ||
925 | |||
832 | if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) { | 926 | if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) { |
833 | if (gfs2_is_stuffed(ip)) { | 927 | if (gfs2_is_stuffed(ip)) { |
834 | error = gfs2_unstuff_dinode(ip, NULL); | 928 | error = gfs2_unstuff_dinode(ip, NULL); |
835 | if (error) | 929 | if (error) |
836 | goto out_end_trans; | 930 | goto out_brelse; |
837 | } | ||
838 | |||
839 | h = calc_tree_height(ip, size); | ||
840 | if (ip->i_di.di_height < h) { | ||
841 | down_write(&ip->i_rw_mutex); | ||
842 | error = build_height(&ip->i_inode, h); | ||
843 | up_write(&ip->i_rw_mutex); | ||
844 | if (error) | ||
845 | goto out_end_trans; | ||
846 | } | 931 | } |
847 | } | 932 | } |
848 | 933 | ||
849 | ip->i_di.di_size = size; | 934 | ip->i_di.di_size = size; |
850 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | 935 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; |
851 | |||
852 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
853 | if (error) | ||
854 | goto out_end_trans; | ||
855 | |||
856 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 936 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
857 | gfs2_dinode_out(ip, dibh->b_data); | 937 | gfs2_dinode_out(ip, dibh->b_data); |
858 | brelse(dibh); | ||
859 | 938 | ||
939 | out_brelse: | ||
940 | brelse(dibh); | ||
860 | out_end_trans: | 941 | out_end_trans: |
861 | gfs2_trans_end(sdp); | 942 | gfs2_trans_end(sdp); |
862 | out_ipres: | 943 | out_ipres: |
@@ -986,7 +1067,8 @@ out: | |||
986 | 1067 | ||
987 | static int trunc_dealloc(struct gfs2_inode *ip, u64 size) | 1068 | static int trunc_dealloc(struct gfs2_inode *ip, u64 size) |
988 | { | 1069 | { |
989 | unsigned int height = ip->i_di.di_height; | 1070 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1071 | unsigned int height = ip->i_height; | ||
990 | u64 lblock; | 1072 | u64 lblock; |
991 | struct metapath mp; | 1073 | struct metapath mp; |
992 | int error; | 1074 | int error; |
@@ -994,10 +1076,11 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size) | |||
994 | if (!size) | 1076 | if (!size) |
995 | lblock = 0; | 1077 | lblock = 0; |
996 | else | 1078 | else |
997 | lblock = (size - 1) >> GFS2_SB(&ip->i_inode)->sd_sb.sb_bsize_shift; | 1079 | lblock = (size - 1) >> sdp->sd_sb.sb_bsize_shift; |
998 | 1080 | ||
999 | find_metapath(ip, lblock, &mp); | 1081 | find_metapath(sdp, lblock, &mp, ip->i_height); |
1000 | gfs2_alloc_get(ip); | 1082 | if (!gfs2_alloc_get(ip)) |
1083 | return -ENOMEM; | ||
1001 | 1084 | ||
1002 | error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | 1085 | error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); |
1003 | if (error) | 1086 | if (error) |
@@ -1037,10 +1120,8 @@ static int trunc_end(struct gfs2_inode *ip) | |||
1037 | goto out; | 1120 | goto out; |
1038 | 1121 | ||
1039 | if (!ip->i_di.di_size) { | 1122 | if (!ip->i_di.di_size) { |
1040 | ip->i_di.di_height = 0; | 1123 | ip->i_height = 0; |
1041 | ip->i_di.di_goal_meta = | 1124 | ip->i_goal = ip->i_no_addr; |
1042 | ip->i_di.di_goal_data = | ||
1043 | ip->i_no_addr; | ||
1044 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); | 1125 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); |
1045 | } | 1126 | } |
1046 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | 1127 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; |
@@ -1197,10 +1278,9 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, | |||
1197 | unsigned int len, int *alloc_required) | 1278 | unsigned int len, int *alloc_required) |
1198 | { | 1279 | { |
1199 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1280 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1200 | u64 lblock, lblock_stop, dblock; | 1281 | struct buffer_head bh; |
1201 | u32 extlen; | 1282 | unsigned int shift; |
1202 | int new = 0; | 1283 | u64 lblock, lblock_stop, size; |
1203 | int error = 0; | ||
1204 | 1284 | ||
1205 | *alloc_required = 0; | 1285 | *alloc_required = 0; |
1206 | 1286 | ||
@@ -1214,6 +1294,8 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, | |||
1214 | return 0; | 1294 | return 0; |
1215 | } | 1295 | } |
1216 | 1296 | ||
1297 | *alloc_required = 1; | ||
1298 | shift = sdp->sd_sb.sb_bsize_shift; | ||
1217 | if (gfs2_is_dir(ip)) { | 1299 | if (gfs2_is_dir(ip)) { |
1218 | unsigned int bsize = sdp->sd_jbsize; | 1300 | unsigned int bsize = sdp->sd_jbsize; |
1219 | lblock = offset; | 1301 | lblock = offset; |
@@ -1221,27 +1303,25 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, | |||
1221 | lblock_stop = offset + len + bsize - 1; | 1303 | lblock_stop = offset + len + bsize - 1; |
1222 | do_div(lblock_stop, bsize); | 1304 | do_div(lblock_stop, bsize); |
1223 | } else { | 1305 | } else { |
1224 | unsigned int shift = sdp->sd_sb.sb_bsize_shift; | ||
1225 | u64 end_of_file = (ip->i_di.di_size + sdp->sd_sb.sb_bsize - 1) >> shift; | 1306 | u64 end_of_file = (ip->i_di.di_size + sdp->sd_sb.sb_bsize - 1) >> shift; |
1226 | lblock = offset >> shift; | 1307 | lblock = offset >> shift; |
1227 | lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; | 1308 | lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; |
1228 | if (lblock_stop > end_of_file) { | 1309 | if (lblock_stop > end_of_file) |
1229 | *alloc_required = 1; | ||
1230 | return 0; | 1310 | return 0; |
1231 | } | ||
1232 | } | 1311 | } |
1233 | 1312 | ||
1234 | for (; lblock < lblock_stop; lblock += extlen) { | 1313 | size = (lblock_stop - lblock) << shift; |
1235 | error = gfs2_extent_map(&ip->i_inode, lblock, &new, &dblock, &extlen); | 1314 | do { |
1236 | if (error) | 1315 | bh.b_state = 0; |
1237 | return error; | 1316 | bh.b_size = size; |
1238 | 1317 | gfs2_block_map(&ip->i_inode, lblock, &bh, 0); | |
1239 | if (!dblock) { | 1318 | if (!buffer_mapped(&bh)) |
1240 | *alloc_required = 1; | ||
1241 | return 0; | 1319 | return 0; |
1242 | } | 1320 | size -= bh.b_size; |
1243 | } | 1321 | lblock += (bh.b_size >> ip->i_inode.i_blkbits); |
1322 | } while(size > 0); | ||
1244 | 1323 | ||
1324 | *alloc_required = 0; | ||
1245 | return 0; | 1325 | return 0; |
1246 | } | 1326 | } |
1247 | 1327 | ||
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index c34709512b19..eed040d8ba3a 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c | |||
@@ -159,6 +159,7 @@ static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf, | |||
159 | unsigned int o; | 159 | unsigned int o; |
160 | int copied = 0; | 160 | int copied = 0; |
161 | int error = 0; | 161 | int error = 0; |
162 | int new = 0; | ||
162 | 163 | ||
163 | if (!size) | 164 | if (!size) |
164 | return 0; | 165 | return 0; |
@@ -183,7 +184,6 @@ static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf, | |||
183 | while (copied < size) { | 184 | while (copied < size) { |
184 | unsigned int amount; | 185 | unsigned int amount; |
185 | struct buffer_head *bh; | 186 | struct buffer_head *bh; |
186 | int new = 0; | ||
187 | 187 | ||
188 | amount = size - copied; | 188 | amount = size - copied; |
189 | if (amount > sdp->sd_sb.sb_bsize - o) | 189 | if (amount > sdp->sd_sb.sb_bsize - o) |
@@ -757,7 +757,7 @@ static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode, | |||
757 | 757 | ||
758 | if (ip->i_di.di_flags & GFS2_DIF_EXHASH) { | 758 | if (ip->i_di.di_flags & GFS2_DIF_EXHASH) { |
759 | struct gfs2_leaf *leaf; | 759 | struct gfs2_leaf *leaf; |
760 | unsigned hsize = 1 << ip->i_di.di_depth; | 760 | unsigned hsize = 1 << ip->i_depth; |
761 | unsigned index; | 761 | unsigned index; |
762 | u64 ln; | 762 | u64 ln; |
763 | if (hsize * sizeof(u64) != ip->i_di.di_size) { | 763 | if (hsize * sizeof(u64) != ip->i_di.di_size) { |
@@ -765,7 +765,7 @@ static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode, | |||
765 | return ERR_PTR(-EIO); | 765 | return ERR_PTR(-EIO); |
766 | } | 766 | } |
767 | 767 | ||
768 | index = name->hash >> (32 - ip->i_di.di_depth); | 768 | index = name->hash >> (32 - ip->i_depth); |
769 | error = get_first_leaf(ip, index, &bh); | 769 | error = get_first_leaf(ip, index, &bh); |
770 | if (error) | 770 | if (error) |
771 | return ERR_PTR(error); | 771 | return ERR_PTR(error); |
@@ -803,14 +803,15 @@ got_dent: | |||
803 | static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, u16 depth) | 803 | static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, u16 depth) |
804 | { | 804 | { |
805 | struct gfs2_inode *ip = GFS2_I(inode); | 805 | struct gfs2_inode *ip = GFS2_I(inode); |
806 | u64 bn = gfs2_alloc_meta(ip); | 806 | unsigned int n = 1; |
807 | u64 bn = gfs2_alloc_block(ip, &n); | ||
807 | struct buffer_head *bh = gfs2_meta_new(ip->i_gl, bn); | 808 | struct buffer_head *bh = gfs2_meta_new(ip->i_gl, bn); |
808 | struct gfs2_leaf *leaf; | 809 | struct gfs2_leaf *leaf; |
809 | struct gfs2_dirent *dent; | 810 | struct gfs2_dirent *dent; |
810 | struct qstr name = { .name = "", .len = 0, .hash = 0 }; | 811 | struct qstr name = { .name = "", .len = 0, .hash = 0 }; |
811 | if (!bh) | 812 | if (!bh) |
812 | return NULL; | 813 | return NULL; |
813 | 814 | gfs2_trans_add_unrevoke(GFS2_SB(inode), bn, 1); | |
814 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | 815 | gfs2_trans_add_bh(ip->i_gl, bh, 1); |
815 | gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF); | 816 | gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF); |
816 | leaf = (struct gfs2_leaf *)bh->b_data; | 817 | leaf = (struct gfs2_leaf *)bh->b_data; |
@@ -905,12 +906,11 @@ static int dir_make_exhash(struct inode *inode) | |||
905 | *lp = cpu_to_be64(bn); | 906 | *lp = cpu_to_be64(bn); |
906 | 907 | ||
907 | dip->i_di.di_size = sdp->sd_sb.sb_bsize / 2; | 908 | dip->i_di.di_size = sdp->sd_sb.sb_bsize / 2; |
908 | dip->i_di.di_blocks++; | 909 | gfs2_add_inode_blocks(&dip->i_inode, 1); |
909 | gfs2_set_inode_blocks(&dip->i_inode); | ||
910 | dip->i_di.di_flags |= GFS2_DIF_EXHASH; | 910 | dip->i_di.di_flags |= GFS2_DIF_EXHASH; |
911 | 911 | ||
912 | for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ; | 912 | for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ; |
913 | dip->i_di.di_depth = y; | 913 | dip->i_depth = y; |
914 | 914 | ||
915 | gfs2_dinode_out(dip, dibh->b_data); | 915 | gfs2_dinode_out(dip, dibh->b_data); |
916 | 916 | ||
@@ -941,7 +941,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) | |||
941 | int x, moved = 0; | 941 | int x, moved = 0; |
942 | int error; | 942 | int error; |
943 | 943 | ||
944 | index = name->hash >> (32 - dip->i_di.di_depth); | 944 | index = name->hash >> (32 - dip->i_depth); |
945 | error = get_leaf_nr(dip, index, &leaf_no); | 945 | error = get_leaf_nr(dip, index, &leaf_no); |
946 | if (error) | 946 | if (error) |
947 | return error; | 947 | return error; |
@@ -952,7 +952,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) | |||
952 | return error; | 952 | return error; |
953 | 953 | ||
954 | oleaf = (struct gfs2_leaf *)obh->b_data; | 954 | oleaf = (struct gfs2_leaf *)obh->b_data; |
955 | if (dip->i_di.di_depth == be16_to_cpu(oleaf->lf_depth)) { | 955 | if (dip->i_depth == be16_to_cpu(oleaf->lf_depth)) { |
956 | brelse(obh); | 956 | brelse(obh); |
957 | return 1; /* can't split */ | 957 | return 1; /* can't split */ |
958 | } | 958 | } |
@@ -967,10 +967,10 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) | |||
967 | bn = nbh->b_blocknr; | 967 | bn = nbh->b_blocknr; |
968 | 968 | ||
969 | /* Compute the start and len of leaf pointers in the hash table. */ | 969 | /* Compute the start and len of leaf pointers in the hash table. */ |
970 | len = 1 << (dip->i_di.di_depth - be16_to_cpu(oleaf->lf_depth)); | 970 | len = 1 << (dip->i_depth - be16_to_cpu(oleaf->lf_depth)); |
971 | half_len = len >> 1; | 971 | half_len = len >> 1; |
972 | if (!half_len) { | 972 | if (!half_len) { |
973 | printk(KERN_WARNING "di_depth %u lf_depth %u index %u\n", dip->i_di.di_depth, be16_to_cpu(oleaf->lf_depth), index); | 973 | printk(KERN_WARNING "i_depth %u lf_depth %u index %u\n", dip->i_depth, be16_to_cpu(oleaf->lf_depth), index); |
974 | gfs2_consist_inode(dip); | 974 | gfs2_consist_inode(dip); |
975 | error = -EIO; | 975 | error = -EIO; |
976 | goto fail_brelse; | 976 | goto fail_brelse; |
@@ -997,7 +997,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) | |||
997 | kfree(lp); | 997 | kfree(lp); |
998 | 998 | ||
999 | /* Compute the divider */ | 999 | /* Compute the divider */ |
1000 | divider = (start + half_len) << (32 - dip->i_di.di_depth); | 1000 | divider = (start + half_len) << (32 - dip->i_depth); |
1001 | 1001 | ||
1002 | /* Copy the entries */ | 1002 | /* Copy the entries */ |
1003 | dirent_first(dip, obh, &dent); | 1003 | dirent_first(dip, obh, &dent); |
@@ -1021,13 +1021,13 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) | |||
1021 | 1021 | ||
1022 | new->de_inum = dent->de_inum; /* No endian worries */ | 1022 | new->de_inum = dent->de_inum; /* No endian worries */ |
1023 | new->de_type = dent->de_type; /* No endian worries */ | 1023 | new->de_type = dent->de_type; /* No endian worries */ |
1024 | nleaf->lf_entries = cpu_to_be16(be16_to_cpu(nleaf->lf_entries)+1); | 1024 | be16_add_cpu(&nleaf->lf_entries, 1); |
1025 | 1025 | ||
1026 | dirent_del(dip, obh, prev, dent); | 1026 | dirent_del(dip, obh, prev, dent); |
1027 | 1027 | ||
1028 | if (!oleaf->lf_entries) | 1028 | if (!oleaf->lf_entries) |
1029 | gfs2_consist_inode(dip); | 1029 | gfs2_consist_inode(dip); |
1030 | oleaf->lf_entries = cpu_to_be16(be16_to_cpu(oleaf->lf_entries)-1); | 1030 | be16_add_cpu(&oleaf->lf_entries, -1); |
1031 | 1031 | ||
1032 | if (!prev) | 1032 | if (!prev) |
1033 | prev = dent; | 1033 | prev = dent; |
@@ -1044,8 +1044,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) | |||
1044 | error = gfs2_meta_inode_buffer(dip, &dibh); | 1044 | error = gfs2_meta_inode_buffer(dip, &dibh); |
1045 | if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) { | 1045 | if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) { |
1046 | gfs2_trans_add_bh(dip->i_gl, dibh, 1); | 1046 | gfs2_trans_add_bh(dip->i_gl, dibh, 1); |
1047 | dip->i_di.di_blocks++; | 1047 | gfs2_add_inode_blocks(&dip->i_inode, 1); |
1048 | gfs2_set_inode_blocks(&dip->i_inode); | ||
1049 | gfs2_dinode_out(dip, dibh->b_data); | 1048 | gfs2_dinode_out(dip, dibh->b_data); |
1050 | brelse(dibh); | 1049 | brelse(dibh); |
1051 | } | 1050 | } |
@@ -1082,7 +1081,7 @@ static int dir_double_exhash(struct gfs2_inode *dip) | |||
1082 | int x; | 1081 | int x; |
1083 | int error = 0; | 1082 | int error = 0; |
1084 | 1083 | ||
1085 | hsize = 1 << dip->i_di.di_depth; | 1084 | hsize = 1 << dip->i_depth; |
1086 | if (hsize * sizeof(u64) != dip->i_di.di_size) { | 1085 | if (hsize * sizeof(u64) != dip->i_di.di_size) { |
1087 | gfs2_consist_inode(dip); | 1086 | gfs2_consist_inode(dip); |
1088 | return -EIO; | 1087 | return -EIO; |
@@ -1090,7 +1089,7 @@ static int dir_double_exhash(struct gfs2_inode *dip) | |||
1090 | 1089 | ||
1091 | /* Allocate both the "from" and "to" buffers in one big chunk */ | 1090 | /* Allocate both the "from" and "to" buffers in one big chunk */ |
1092 | 1091 | ||
1093 | buf = kcalloc(3, sdp->sd_hash_bsize, GFP_KERNEL | __GFP_NOFAIL); | 1092 | buf = kcalloc(3, sdp->sd_hash_bsize, GFP_NOFS | __GFP_NOFAIL); |
1094 | 1093 | ||
1095 | for (block = dip->i_di.di_size >> sdp->sd_hash_bsize_shift; block--;) { | 1094 | for (block = dip->i_di.di_size >> sdp->sd_hash_bsize_shift; block--;) { |
1096 | error = gfs2_dir_read_data(dip, (char *)buf, | 1095 | error = gfs2_dir_read_data(dip, (char *)buf, |
@@ -1125,7 +1124,7 @@ static int dir_double_exhash(struct gfs2_inode *dip) | |||
1125 | 1124 | ||
1126 | error = gfs2_meta_inode_buffer(dip, &dibh); | 1125 | error = gfs2_meta_inode_buffer(dip, &dibh); |
1127 | if (!gfs2_assert_withdraw(sdp, !error)) { | 1126 | if (!gfs2_assert_withdraw(sdp, !error)) { |
1128 | dip->i_di.di_depth++; | 1127 | dip->i_depth++; |
1129 | gfs2_dinode_out(dip, dibh->b_data); | 1128 | gfs2_dinode_out(dip, dibh->b_data); |
1130 | brelse(dibh); | 1129 | brelse(dibh); |
1131 | } | 1130 | } |
@@ -1370,16 +1369,16 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, | |||
1370 | int error = 0; | 1369 | int error = 0; |
1371 | unsigned depth = 0; | 1370 | unsigned depth = 0; |
1372 | 1371 | ||
1373 | hsize = 1 << dip->i_di.di_depth; | 1372 | hsize = 1 << dip->i_depth; |
1374 | if (hsize * sizeof(u64) != dip->i_di.di_size) { | 1373 | if (hsize * sizeof(u64) != dip->i_di.di_size) { |
1375 | gfs2_consist_inode(dip); | 1374 | gfs2_consist_inode(dip); |
1376 | return -EIO; | 1375 | return -EIO; |
1377 | } | 1376 | } |
1378 | 1377 | ||
1379 | hash = gfs2_dir_offset2hash(*offset); | 1378 | hash = gfs2_dir_offset2hash(*offset); |
1380 | index = hash >> (32 - dip->i_di.di_depth); | 1379 | index = hash >> (32 - dip->i_depth); |
1381 | 1380 | ||
1382 | lp = kmalloc(sdp->sd_hash_bsize, GFP_KERNEL); | 1381 | lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS); |
1383 | if (!lp) | 1382 | if (!lp) |
1384 | return -ENOMEM; | 1383 | return -ENOMEM; |
1385 | 1384 | ||
@@ -1405,7 +1404,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, | |||
1405 | if (error) | 1404 | if (error) |
1406 | break; | 1405 | break; |
1407 | 1406 | ||
1408 | len = 1 << (dip->i_di.di_depth - depth); | 1407 | len = 1 << (dip->i_depth - depth); |
1409 | index = (index & ~(len - 1)) + len; | 1408 | index = (index & ~(len - 1)) + len; |
1410 | } | 1409 | } |
1411 | 1410 | ||
@@ -1444,7 +1443,7 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, | |||
1444 | 1443 | ||
1445 | error = -ENOMEM; | 1444 | error = -ENOMEM; |
1446 | /* 96 is max number of dirents which can be stuffed into an inode */ | 1445 | /* 96 is max number of dirents which can be stuffed into an inode */ |
1447 | darr = kmalloc(96 * sizeof(struct gfs2_dirent *), GFP_KERNEL); | 1446 | darr = kmalloc(96 * sizeof(struct gfs2_dirent *), GFP_NOFS); |
1448 | if (darr) { | 1447 | if (darr) { |
1449 | g.pdent = darr; | 1448 | g.pdent = darr; |
1450 | g.offset = 0; | 1449 | g.offset = 0; |
@@ -1549,7 +1548,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name) | |||
1549 | u32 index; | 1548 | u32 index; |
1550 | u64 bn; | 1549 | u64 bn; |
1551 | 1550 | ||
1552 | index = name->hash >> (32 - ip->i_di.di_depth); | 1551 | index = name->hash >> (32 - ip->i_depth); |
1553 | error = get_first_leaf(ip, index, &obh); | 1552 | error = get_first_leaf(ip, index, &obh); |
1554 | if (error) | 1553 | if (error) |
1555 | return error; | 1554 | return error; |
@@ -1579,8 +1578,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name) | |||
1579 | if (error) | 1578 | if (error) |
1580 | return error; | 1579 | return error; |
1581 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | 1580 | gfs2_trans_add_bh(ip->i_gl, bh, 1); |
1582 | ip->i_di.di_blocks++; | 1581 | gfs2_add_inode_blocks(&ip->i_inode, 1); |
1583 | gfs2_set_inode_blocks(&ip->i_inode); | ||
1584 | gfs2_dinode_out(ip, bh->b_data); | 1582 | gfs2_dinode_out(ip, bh->b_data); |
1585 | brelse(bh); | 1583 | brelse(bh); |
1586 | return 0; | 1584 | return 0; |
@@ -1616,7 +1614,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name, | |||
1616 | dent->de_type = cpu_to_be16(type); | 1614 | dent->de_type = cpu_to_be16(type); |
1617 | if (ip->i_di.di_flags & GFS2_DIF_EXHASH) { | 1615 | if (ip->i_di.di_flags & GFS2_DIF_EXHASH) { |
1618 | leaf = (struct gfs2_leaf *)bh->b_data; | 1616 | leaf = (struct gfs2_leaf *)bh->b_data; |
1619 | leaf->lf_entries = cpu_to_be16(be16_to_cpu(leaf->lf_entries) + 1); | 1617 | be16_add_cpu(&leaf->lf_entries, 1); |
1620 | } | 1618 | } |
1621 | brelse(bh); | 1619 | brelse(bh); |
1622 | error = gfs2_meta_inode_buffer(ip, &bh); | 1620 | error = gfs2_meta_inode_buffer(ip, &bh); |
@@ -1641,7 +1639,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name, | |||
1641 | continue; | 1639 | continue; |
1642 | if (error < 0) | 1640 | if (error < 0) |
1643 | break; | 1641 | break; |
1644 | if (ip->i_di.di_depth < GFS2_DIR_MAX_DEPTH) { | 1642 | if (ip->i_depth < GFS2_DIR_MAX_DEPTH) { |
1645 | error = dir_double_exhash(ip); | 1643 | error = dir_double_exhash(ip); |
1646 | if (error) | 1644 | if (error) |
1647 | break; | 1645 | break; |
@@ -1785,13 +1783,13 @@ static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data) | |||
1785 | u64 leaf_no; | 1783 | u64 leaf_no; |
1786 | int error = 0; | 1784 | int error = 0; |
1787 | 1785 | ||
1788 | hsize = 1 << dip->i_di.di_depth; | 1786 | hsize = 1 << dip->i_depth; |
1789 | if (hsize * sizeof(u64) != dip->i_di.di_size) { | 1787 | if (hsize * sizeof(u64) != dip->i_di.di_size) { |
1790 | gfs2_consist_inode(dip); | 1788 | gfs2_consist_inode(dip); |
1791 | return -EIO; | 1789 | return -EIO; |
1792 | } | 1790 | } |
1793 | 1791 | ||
1794 | lp = kmalloc(sdp->sd_hash_bsize, GFP_KERNEL); | 1792 | lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS); |
1795 | if (!lp) | 1793 | if (!lp) |
1796 | return -ENOMEM; | 1794 | return -ENOMEM; |
1797 | 1795 | ||
@@ -1817,7 +1815,7 @@ static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data) | |||
1817 | if (error) | 1815 | if (error) |
1818 | goto out; | 1816 | goto out; |
1819 | leaf = (struct gfs2_leaf *)bh->b_data; | 1817 | leaf = (struct gfs2_leaf *)bh->b_data; |
1820 | len = 1 << (dip->i_di.di_depth - be16_to_cpu(leaf->lf_depth)); | 1818 | len = 1 << (dip->i_depth - be16_to_cpu(leaf->lf_depth)); |
1821 | brelse(bh); | 1819 | brelse(bh); |
1822 | 1820 | ||
1823 | error = lc(dip, index, len, leaf_no, data); | 1821 | error = lc(dip, index, len, leaf_no, data); |
@@ -1866,15 +1864,18 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, | |||
1866 | 1864 | ||
1867 | memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); | 1865 | memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); |
1868 | 1866 | ||
1869 | ht = kzalloc(size, GFP_KERNEL); | 1867 | ht = kzalloc(size, GFP_NOFS); |
1870 | if (!ht) | 1868 | if (!ht) |
1871 | return -ENOMEM; | 1869 | return -ENOMEM; |
1872 | 1870 | ||
1873 | gfs2_alloc_get(dip); | 1871 | if (!gfs2_alloc_get(dip)) { |
1872 | error = -ENOMEM; | ||
1873 | goto out; | ||
1874 | } | ||
1874 | 1875 | ||
1875 | error = gfs2_quota_hold(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | 1876 | error = gfs2_quota_hold(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); |
1876 | if (error) | 1877 | if (error) |
1877 | goto out; | 1878 | goto out_put; |
1878 | 1879 | ||
1879 | error = gfs2_rindex_hold(sdp, &dip->i_alloc->al_ri_gh); | 1880 | error = gfs2_rindex_hold(sdp, &dip->i_alloc->al_ri_gh); |
1880 | if (error) | 1881 | if (error) |
@@ -1894,7 +1895,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, | |||
1894 | l_blocks++; | 1895 | l_blocks++; |
1895 | } | 1896 | } |
1896 | 1897 | ||
1897 | gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0); | 1898 | gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE); |
1898 | 1899 | ||
1899 | for (x = 0; x < rlist.rl_rgrps; x++) { | 1900 | for (x = 0; x < rlist.rl_rgrps; x++) { |
1900 | struct gfs2_rgrpd *rgd; | 1901 | struct gfs2_rgrpd *rgd; |
@@ -1921,11 +1922,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, | |||
1921 | brelse(bh); | 1922 | brelse(bh); |
1922 | 1923 | ||
1923 | gfs2_free_meta(dip, blk, 1); | 1924 | gfs2_free_meta(dip, blk, 1); |
1924 | 1925 | gfs2_add_inode_blocks(&dip->i_inode, -1); | |
1925 | if (!dip->i_di.di_blocks) | ||
1926 | gfs2_consist_inode(dip); | ||
1927 | dip->i_di.di_blocks--; | ||
1928 | gfs2_set_inode_blocks(&dip->i_inode); | ||
1929 | } | 1926 | } |
1930 | 1927 | ||
1931 | error = gfs2_dir_write_data(dip, ht, index * sizeof(u64), size); | 1928 | error = gfs2_dir_write_data(dip, ht, index * sizeof(u64), size); |
@@ -1952,8 +1949,9 @@ out_rlist: | |||
1952 | gfs2_glock_dq_uninit(&dip->i_alloc->al_ri_gh); | 1949 | gfs2_glock_dq_uninit(&dip->i_alloc->al_ri_gh); |
1953 | out_qs: | 1950 | out_qs: |
1954 | gfs2_quota_unhold(dip); | 1951 | gfs2_quota_unhold(dip); |
1955 | out: | 1952 | out_put: |
1956 | gfs2_alloc_put(dip); | 1953 | gfs2_alloc_put(dip); |
1954 | out: | ||
1957 | kfree(ht); | 1955 | kfree(ht); |
1958 | return error; | 1956 | return error; |
1959 | } | 1957 | } |
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c index bee99704ea10..e3f76f451b0a 100644 --- a/fs/gfs2/eattr.c +++ b/fs/gfs2/eattr.c | |||
@@ -277,10 +277,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, | |||
277 | } | 277 | } |
278 | 278 | ||
279 | *dataptrs = 0; | 279 | *dataptrs = 0; |
280 | if (!ip->i_di.di_blocks) | 280 | gfs2_add_inode_blocks(&ip->i_inode, -1); |
281 | gfs2_consist_inode(ip); | ||
282 | ip->i_di.di_blocks--; | ||
283 | gfs2_set_inode_blocks(&ip->i_inode); | ||
284 | } | 281 | } |
285 | if (bstart) | 282 | if (bstart) |
286 | gfs2_free_meta(ip, bstart, blen); | 283 | gfs2_free_meta(ip, bstart, blen); |
@@ -321,6 +318,8 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, | |||
321 | int error; | 318 | int error; |
322 | 319 | ||
323 | al = gfs2_alloc_get(ip); | 320 | al = gfs2_alloc_get(ip); |
321 | if (!al) | ||
322 | return -ENOMEM; | ||
324 | 323 | ||
325 | error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | 324 | error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); |
326 | if (error) | 325 | if (error) |
@@ -449,7 +448,7 @@ static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, | |||
449 | unsigned int x; | 448 | unsigned int x; |
450 | int error = 0; | 449 | int error = 0; |
451 | 450 | ||
452 | bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_KERNEL); | 451 | bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_NOFS); |
453 | if (!bh) | 452 | if (!bh) |
454 | return -ENOMEM; | 453 | return -ENOMEM; |
455 | 454 | ||
@@ -582,10 +581,11 @@ static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp) | |||
582 | { | 581 | { |
583 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 582 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
584 | struct gfs2_ea_header *ea; | 583 | struct gfs2_ea_header *ea; |
584 | unsigned int n = 1; | ||
585 | u64 block; | 585 | u64 block; |
586 | 586 | ||
587 | block = gfs2_alloc_meta(ip); | 587 | block = gfs2_alloc_block(ip, &n); |
588 | 588 | gfs2_trans_add_unrevoke(sdp, block, 1); | |
589 | *bhp = gfs2_meta_new(ip->i_gl, block); | 589 | *bhp = gfs2_meta_new(ip->i_gl, block); |
590 | gfs2_trans_add_bh(ip->i_gl, *bhp, 1); | 590 | gfs2_trans_add_bh(ip->i_gl, *bhp, 1); |
591 | gfs2_metatype_set(*bhp, GFS2_METATYPE_EA, GFS2_FORMAT_EA); | 591 | gfs2_metatype_set(*bhp, GFS2_METATYPE_EA, GFS2_FORMAT_EA); |
@@ -597,8 +597,7 @@ static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp) | |||
597 | ea->ea_flags = GFS2_EAFLAG_LAST; | 597 | ea->ea_flags = GFS2_EAFLAG_LAST; |
598 | ea->ea_num_ptrs = 0; | 598 | ea->ea_num_ptrs = 0; |
599 | 599 | ||
600 | ip->i_di.di_blocks++; | 600 | gfs2_add_inode_blocks(&ip->i_inode, 1); |
601 | gfs2_set_inode_blocks(&ip->i_inode); | ||
602 | 601 | ||
603 | return 0; | 602 | return 0; |
604 | } | 603 | } |
@@ -642,15 +641,15 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea, | |||
642 | struct buffer_head *bh; | 641 | struct buffer_head *bh; |
643 | u64 block; | 642 | u64 block; |
644 | int mh_size = sizeof(struct gfs2_meta_header); | 643 | int mh_size = sizeof(struct gfs2_meta_header); |
644 | unsigned int n = 1; | ||
645 | 645 | ||
646 | block = gfs2_alloc_meta(ip); | 646 | block = gfs2_alloc_block(ip, &n); |
647 | 647 | gfs2_trans_add_unrevoke(sdp, block, 1); | |
648 | bh = gfs2_meta_new(ip->i_gl, block); | 648 | bh = gfs2_meta_new(ip->i_gl, block); |
649 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | 649 | gfs2_trans_add_bh(ip->i_gl, bh, 1); |
650 | gfs2_metatype_set(bh, GFS2_METATYPE_ED, GFS2_FORMAT_ED); | 650 | gfs2_metatype_set(bh, GFS2_METATYPE_ED, GFS2_FORMAT_ED); |
651 | 651 | ||
652 | ip->i_di.di_blocks++; | 652 | gfs2_add_inode_blocks(&ip->i_inode, 1); |
653 | gfs2_set_inode_blocks(&ip->i_inode); | ||
654 | 653 | ||
655 | copy = data_len > sdp->sd_jbsize ? sdp->sd_jbsize : | 654 | copy = data_len > sdp->sd_jbsize ? sdp->sd_jbsize : |
656 | data_len; | 655 | data_len; |
@@ -684,15 +683,13 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, | |||
684 | int error; | 683 | int error; |
685 | 684 | ||
686 | al = gfs2_alloc_get(ip); | 685 | al = gfs2_alloc_get(ip); |
686 | if (!al) | ||
687 | return -ENOMEM; | ||
687 | 688 | ||
688 | error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | 689 | error = gfs2_quota_lock_check(ip); |
689 | if (error) | 690 | if (error) |
690 | goto out; | 691 | goto out; |
691 | 692 | ||
692 | error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); | ||
693 | if (error) | ||
694 | goto out_gunlock_q; | ||
695 | |||
696 | al->al_requested = blks; | 693 | al->al_requested = blks; |
697 | 694 | ||
698 | error = gfs2_inplace_reserve(ip); | 695 | error = gfs2_inplace_reserve(ip); |
@@ -966,9 +963,9 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er, | |||
966 | gfs2_trans_add_bh(ip->i_gl, indbh, 1); | 963 | gfs2_trans_add_bh(ip->i_gl, indbh, 1); |
967 | } else { | 964 | } else { |
968 | u64 blk; | 965 | u64 blk; |
969 | 966 | unsigned int n = 1; | |
970 | blk = gfs2_alloc_meta(ip); | 967 | blk = gfs2_alloc_block(ip, &n); |
971 | 968 | gfs2_trans_add_unrevoke(sdp, blk, 1); | |
972 | indbh = gfs2_meta_new(ip->i_gl, blk); | 969 | indbh = gfs2_meta_new(ip->i_gl, blk); |
973 | gfs2_trans_add_bh(ip->i_gl, indbh, 1); | 970 | gfs2_trans_add_bh(ip->i_gl, indbh, 1); |
974 | gfs2_metatype_set(indbh, GFS2_METATYPE_IN, GFS2_FORMAT_IN); | 971 | gfs2_metatype_set(indbh, GFS2_METATYPE_IN, GFS2_FORMAT_IN); |
@@ -978,8 +975,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er, | |||
978 | *eablk = cpu_to_be64(ip->i_di.di_eattr); | 975 | *eablk = cpu_to_be64(ip->i_di.di_eattr); |
979 | ip->i_di.di_eattr = blk; | 976 | ip->i_di.di_eattr = blk; |
980 | ip->i_di.di_flags |= GFS2_DIF_EA_INDIRECT; | 977 | ip->i_di.di_flags |= GFS2_DIF_EA_INDIRECT; |
981 | ip->i_di.di_blocks++; | 978 | gfs2_add_inode_blocks(&ip->i_inode, 1); |
982 | gfs2_set_inode_blocks(&ip->i_inode); | ||
983 | 979 | ||
984 | eablk++; | 980 | eablk++; |
985 | } | 981 | } |
@@ -1210,7 +1206,7 @@ static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip, | |||
1210 | unsigned int x; | 1206 | unsigned int x; |
1211 | int error; | 1207 | int error; |
1212 | 1208 | ||
1213 | bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_KERNEL); | 1209 | bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_NOFS); |
1214 | if (!bh) | 1210 | if (!bh) |
1215 | return -ENOMEM; | 1211 | return -ENOMEM; |
1216 | 1212 | ||
@@ -1347,7 +1343,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip) | |||
1347 | else | 1343 | else |
1348 | goto out; | 1344 | goto out; |
1349 | 1345 | ||
1350 | gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0); | 1346 | gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE); |
1351 | 1347 | ||
1352 | for (x = 0; x < rlist.rl_rgrps; x++) { | 1348 | for (x = 0; x < rlist.rl_rgrps; x++) { |
1353 | struct gfs2_rgrpd *rgd; | 1349 | struct gfs2_rgrpd *rgd; |
@@ -1387,10 +1383,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip) | |||
1387 | } | 1383 | } |
1388 | 1384 | ||
1389 | *eablk = 0; | 1385 | *eablk = 0; |
1390 | if (!ip->i_di.di_blocks) | 1386 | gfs2_add_inode_blocks(&ip->i_inode, -1); |
1391 | gfs2_consist_inode(ip); | ||
1392 | ip->i_di.di_blocks--; | ||
1393 | gfs2_set_inode_blocks(&ip->i_inode); | ||
1394 | } | 1387 | } |
1395 | if (bstart) | 1388 | if (bstart) |
1396 | gfs2_free_meta(ip, bstart, blen); | 1389 | gfs2_free_meta(ip, bstart, blen); |
@@ -1442,10 +1435,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip) | |||
1442 | gfs2_free_meta(ip, ip->i_di.di_eattr, 1); | 1435 | gfs2_free_meta(ip, ip->i_di.di_eattr, 1); |
1443 | 1436 | ||
1444 | ip->i_di.di_eattr = 0; | 1437 | ip->i_di.di_eattr = 0; |
1445 | if (!ip->i_di.di_blocks) | 1438 | gfs2_add_inode_blocks(&ip->i_inode, -1); |
1446 | gfs2_consist_inode(ip); | ||
1447 | ip->i_di.di_blocks--; | ||
1448 | gfs2_set_inode_blocks(&ip->i_inode); | ||
1449 | 1439 | ||
1450 | error = gfs2_meta_inode_buffer(ip, &dibh); | 1440 | error = gfs2_meta_inode_buffer(ip, &dibh); |
1451 | if (!error) { | 1441 | if (!error) { |
@@ -1474,6 +1464,8 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip) | |||
1474 | int error; | 1464 | int error; |
1475 | 1465 | ||
1476 | al = gfs2_alloc_get(ip); | 1466 | al = gfs2_alloc_get(ip); |
1467 | if (!al) | ||
1468 | return -ENOMEM; | ||
1477 | 1469 | ||
1478 | error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | 1470 | error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); |
1479 | if (error) | 1471 | if (error) |
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 7175a4d06435..d636b3e80f5d 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
3 | * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
4 | * | 4 | * |
5 | * This copyrighted material is made available to anyone wishing to use, | 5 | * This copyrighted material is made available to anyone wishing to use, |
6 | * modify, copy, or redistribute it subject to the terms and conditions | 6 | * modify, copy, or redistribute it subject to the terms and conditions |
@@ -35,7 +35,6 @@ | |||
35 | #include "glock.h" | 35 | #include "glock.h" |
36 | #include "glops.h" | 36 | #include "glops.h" |
37 | #include "inode.h" | 37 | #include "inode.h" |
38 | #include "lm.h" | ||
39 | #include "lops.h" | 38 | #include "lops.h" |
40 | #include "meta_io.h" | 39 | #include "meta_io.h" |
41 | #include "quota.h" | 40 | #include "quota.h" |
@@ -183,7 +182,8 @@ static void glock_free(struct gfs2_glock *gl) | |||
183 | struct gfs2_sbd *sdp = gl->gl_sbd; | 182 | struct gfs2_sbd *sdp = gl->gl_sbd; |
184 | struct inode *aspace = gl->gl_aspace; | 183 | struct inode *aspace = gl->gl_aspace; |
185 | 184 | ||
186 | gfs2_lm_put_lock(sdp, gl->gl_lock); | 185 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) |
186 | sdp->sd_lockstruct.ls_ops->lm_put_lock(gl->gl_lock); | ||
187 | 187 | ||
188 | if (aspace) | 188 | if (aspace) |
189 | gfs2_aspace_put(aspace); | 189 | gfs2_aspace_put(aspace); |
@@ -197,7 +197,7 @@ static void glock_free(struct gfs2_glock *gl) | |||
197 | * | 197 | * |
198 | */ | 198 | */ |
199 | 199 | ||
200 | void gfs2_glock_hold(struct gfs2_glock *gl) | 200 | static void gfs2_glock_hold(struct gfs2_glock *gl) |
201 | { | 201 | { |
202 | atomic_inc(&gl->gl_ref); | 202 | atomic_inc(&gl->gl_ref); |
203 | } | 203 | } |
@@ -293,6 +293,16 @@ static void glock_work_func(struct work_struct *work) | |||
293 | gfs2_glock_put(gl); | 293 | gfs2_glock_put(gl); |
294 | } | 294 | } |
295 | 295 | ||
296 | static int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name, | ||
297 | void **lockp) | ||
298 | { | ||
299 | int error = -EIO; | ||
300 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
301 | error = sdp->sd_lockstruct.ls_ops->lm_get_lock( | ||
302 | sdp->sd_lockstruct.ls_lockspace, name, lockp); | ||
303 | return error; | ||
304 | } | ||
305 | |||
296 | /** | 306 | /** |
297 | * gfs2_glock_get() - Get a glock, or create one if one doesn't exist | 307 | * gfs2_glock_get() - Get a glock, or create one if one doesn't exist |
298 | * @sdp: The GFS2 superblock | 308 | * @sdp: The GFS2 superblock |
@@ -338,8 +348,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, | |||
338 | gl->gl_ip = 0; | 348 | gl->gl_ip = 0; |
339 | gl->gl_ops = glops; | 349 | gl->gl_ops = glops; |
340 | gl->gl_req_gh = NULL; | 350 | gl->gl_req_gh = NULL; |
341 | gl->gl_req_bh = NULL; | ||
342 | gl->gl_vn = 0; | ||
343 | gl->gl_stamp = jiffies; | 351 | gl->gl_stamp = jiffies; |
344 | gl->gl_tchange = jiffies; | 352 | gl->gl_tchange = jiffies; |
345 | gl->gl_object = NULL; | 353 | gl->gl_object = NULL; |
@@ -595,11 +603,12 @@ static void run_queue(struct gfs2_glock *gl) | |||
595 | blocked = rq_mutex(gh); | 603 | blocked = rq_mutex(gh); |
596 | } else if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { | 604 | } else if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { |
597 | blocked = rq_demote(gl); | 605 | blocked = rq_demote(gl); |
598 | if (gl->gl_waiters2 && !blocked) { | 606 | if (test_bit(GLF_WAITERS2, &gl->gl_flags) && |
607 | !blocked) { | ||
599 | set_bit(GLF_DEMOTE, &gl->gl_flags); | 608 | set_bit(GLF_DEMOTE, &gl->gl_flags); |
600 | gl->gl_demote_state = LM_ST_UNLOCKED; | 609 | gl->gl_demote_state = LM_ST_UNLOCKED; |
601 | } | 610 | } |
602 | gl->gl_waiters2 = 0; | 611 | clear_bit(GLF_WAITERS2, &gl->gl_flags); |
603 | } else if (!list_empty(&gl->gl_waiters3)) { | 612 | } else if (!list_empty(&gl->gl_waiters3)) { |
604 | gh = list_entry(gl->gl_waiters3.next, | 613 | gh = list_entry(gl->gl_waiters3.next, |
605 | struct gfs2_holder, gh_list); | 614 | struct gfs2_holder, gh_list); |
@@ -710,7 +719,7 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state, | |||
710 | } else if (gl->gl_demote_state != LM_ST_UNLOCKED && | 719 | } else if (gl->gl_demote_state != LM_ST_UNLOCKED && |
711 | gl->gl_demote_state != state) { | 720 | gl->gl_demote_state != state) { |
712 | if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) | 721 | if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) |
713 | gl->gl_waiters2 = 1; | 722 | set_bit(GLF_WAITERS2, &gl->gl_flags); |
714 | else | 723 | else |
715 | gl->gl_demote_state = LM_ST_UNLOCKED; | 724 | gl->gl_demote_state = LM_ST_UNLOCKED; |
716 | } | 725 | } |
@@ -743,6 +752,43 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state) | |||
743 | } | 752 | } |
744 | 753 | ||
745 | /** | 754 | /** |
755 | * drop_bh - Called after a lock module unlock completes | ||
756 | * @gl: the glock | ||
757 | * @ret: the return status | ||
758 | * | ||
759 | * Doesn't wake up the process waiting on the struct gfs2_holder (if any) | ||
760 | * Doesn't drop the reference on the glock the top half took out | ||
761 | * | ||
762 | */ | ||
763 | |||
764 | static void drop_bh(struct gfs2_glock *gl, unsigned int ret) | ||
765 | { | ||
766 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
767 | struct gfs2_holder *gh = gl->gl_req_gh; | ||
768 | |||
769 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | ||
770 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); | ||
771 | gfs2_assert_warn(sdp, !ret); | ||
772 | |||
773 | state_change(gl, LM_ST_UNLOCKED); | ||
774 | |||
775 | if (test_and_clear_bit(GLF_CONV_DEADLK, &gl->gl_flags)) { | ||
776 | spin_lock(&gl->gl_spin); | ||
777 | gh->gh_error = 0; | ||
778 | spin_unlock(&gl->gl_spin); | ||
779 | gfs2_glock_xmote_th(gl, gl->gl_req_gh); | ||
780 | gfs2_glock_put(gl); | ||
781 | return; | ||
782 | } | ||
783 | |||
784 | spin_lock(&gl->gl_spin); | ||
785 | gfs2_demote_wake(gl); | ||
786 | clear_bit(GLF_LOCK, &gl->gl_flags); | ||
787 | spin_unlock(&gl->gl_spin); | ||
788 | gfs2_glock_put(gl); | ||
789 | } | ||
790 | |||
791 | /** | ||
746 | * xmote_bh - Called after the lock module is done acquiring a lock | 792 | * xmote_bh - Called after the lock module is done acquiring a lock |
747 | * @gl: The glock in question | 793 | * @gl: The glock in question |
748 | * @ret: the int returned from the lock module | 794 | * @ret: the int returned from the lock module |
@@ -754,25 +800,19 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) | |||
754 | struct gfs2_sbd *sdp = gl->gl_sbd; | 800 | struct gfs2_sbd *sdp = gl->gl_sbd; |
755 | const struct gfs2_glock_operations *glops = gl->gl_ops; | 801 | const struct gfs2_glock_operations *glops = gl->gl_ops; |
756 | struct gfs2_holder *gh = gl->gl_req_gh; | 802 | struct gfs2_holder *gh = gl->gl_req_gh; |
757 | int prev_state = gl->gl_state; | ||
758 | int op_done = 1; | 803 | int op_done = 1; |
759 | 804 | ||
805 | if (!gh && (ret & LM_OUT_ST_MASK) == LM_ST_UNLOCKED) { | ||
806 | drop_bh(gl, ret); | ||
807 | return; | ||
808 | } | ||
809 | |||
760 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | 810 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); |
761 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); | 811 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); |
762 | gfs2_assert_warn(sdp, !(ret & LM_OUT_ASYNC)); | 812 | gfs2_assert_warn(sdp, !(ret & LM_OUT_ASYNC)); |
763 | 813 | ||
764 | state_change(gl, ret & LM_OUT_ST_MASK); | 814 | state_change(gl, ret & LM_OUT_ST_MASK); |
765 | 815 | ||
766 | if (prev_state != LM_ST_UNLOCKED && !(ret & LM_OUT_CACHEABLE)) { | ||
767 | if (glops->go_inval) | ||
768 | glops->go_inval(gl, DIO_METADATA); | ||
769 | } else if (gl->gl_state == LM_ST_DEFERRED) { | ||
770 | /* We might not want to do this here. | ||
771 | Look at moving to the inode glops. */ | ||
772 | if (glops->go_inval) | ||
773 | glops->go_inval(gl, 0); | ||
774 | } | ||
775 | |||
776 | /* Deal with each possible exit condition */ | 816 | /* Deal with each possible exit condition */ |
777 | 817 | ||
778 | if (!gh) { | 818 | if (!gh) { |
@@ -782,7 +822,6 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) | |||
782 | } else { | 822 | } else { |
783 | spin_lock(&gl->gl_spin); | 823 | spin_lock(&gl->gl_spin); |
784 | if (gl->gl_state != gl->gl_demote_state) { | 824 | if (gl->gl_state != gl->gl_demote_state) { |
785 | gl->gl_req_bh = NULL; | ||
786 | spin_unlock(&gl->gl_spin); | 825 | spin_unlock(&gl->gl_spin); |
787 | gfs2_glock_drop_th(gl); | 826 | gfs2_glock_drop_th(gl); |
788 | gfs2_glock_put(gl); | 827 | gfs2_glock_put(gl); |
@@ -793,6 +832,14 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) | |||
793 | } | 832 | } |
794 | } else { | 833 | } else { |
795 | spin_lock(&gl->gl_spin); | 834 | spin_lock(&gl->gl_spin); |
835 | if (ret & LM_OUT_CONV_DEADLK) { | ||
836 | gh->gh_error = 0; | ||
837 | set_bit(GLF_CONV_DEADLK, &gl->gl_flags); | ||
838 | spin_unlock(&gl->gl_spin); | ||
839 | gfs2_glock_drop_th(gl); | ||
840 | gfs2_glock_put(gl); | ||
841 | return; | ||
842 | } | ||
796 | list_del_init(&gh->gh_list); | 843 | list_del_init(&gh->gh_list); |
797 | gh->gh_error = -EIO; | 844 | gh->gh_error = -EIO; |
798 | if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | 845 | if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) |
@@ -824,7 +871,6 @@ out: | |||
824 | if (op_done) { | 871 | if (op_done) { |
825 | spin_lock(&gl->gl_spin); | 872 | spin_lock(&gl->gl_spin); |
826 | gl->gl_req_gh = NULL; | 873 | gl->gl_req_gh = NULL; |
827 | gl->gl_req_bh = NULL; | ||
828 | clear_bit(GLF_LOCK, &gl->gl_flags); | 874 | clear_bit(GLF_LOCK, &gl->gl_flags); |
829 | spin_unlock(&gl->gl_spin); | 875 | spin_unlock(&gl->gl_spin); |
830 | } | 876 | } |
@@ -835,6 +881,17 @@ out: | |||
835 | gfs2_holder_wake(gh); | 881 | gfs2_holder_wake(gh); |
836 | } | 882 | } |
837 | 883 | ||
884 | static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock, | ||
885 | unsigned int cur_state, unsigned int req_state, | ||
886 | unsigned int flags) | ||
887 | { | ||
888 | int ret = 0; | ||
889 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
890 | ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock, cur_state, | ||
891 | req_state, flags); | ||
892 | return ret; | ||
893 | } | ||
894 | |||
838 | /** | 895 | /** |
839 | * gfs2_glock_xmote_th - Call into the lock module to acquire or change a glock | 896 | * gfs2_glock_xmote_th - Call into the lock module to acquire or change a glock |
840 | * @gl: The glock in question | 897 | * @gl: The glock in question |
@@ -856,6 +913,8 @@ static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh) | |||
856 | 913 | ||
857 | if (glops->go_xmote_th) | 914 | if (glops->go_xmote_th) |
858 | glops->go_xmote_th(gl); | 915 | glops->go_xmote_th(gl); |
916 | if (state == LM_ST_DEFERRED && glops->go_inval) | ||
917 | glops->go_inval(gl, DIO_METADATA); | ||
859 | 918 | ||
860 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | 919 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); |
861 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); | 920 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); |
@@ -863,7 +922,6 @@ static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh) | |||
863 | gfs2_assert_warn(sdp, state != gl->gl_state); | 922 | gfs2_assert_warn(sdp, state != gl->gl_state); |
864 | 923 | ||
865 | gfs2_glock_hold(gl); | 924 | gfs2_glock_hold(gl); |
866 | gl->gl_req_bh = xmote_bh; | ||
867 | 925 | ||
868 | lck_ret = gfs2_lm_lock(sdp, gl->gl_lock, gl->gl_state, state, lck_flags); | 926 | lck_ret = gfs2_lm_lock(sdp, gl->gl_lock, gl->gl_state, state, lck_flags); |
869 | 927 | ||
@@ -876,49 +934,13 @@ static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh) | |||
876 | xmote_bh(gl, lck_ret); | 934 | xmote_bh(gl, lck_ret); |
877 | } | 935 | } |
878 | 936 | ||
879 | /** | 937 | static unsigned int gfs2_lm_unlock(struct gfs2_sbd *sdp, void *lock, |
880 | * drop_bh - Called after a lock module unlock completes | 938 | unsigned int cur_state) |
881 | * @gl: the glock | ||
882 | * @ret: the return status | ||
883 | * | ||
884 | * Doesn't wake up the process waiting on the struct gfs2_holder (if any) | ||
885 | * Doesn't drop the reference on the glock the top half took out | ||
886 | * | ||
887 | */ | ||
888 | |||
889 | static void drop_bh(struct gfs2_glock *gl, unsigned int ret) | ||
890 | { | 939 | { |
891 | struct gfs2_sbd *sdp = gl->gl_sbd; | 940 | int ret = 0; |
892 | const struct gfs2_glock_operations *glops = gl->gl_ops; | 941 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) |
893 | struct gfs2_holder *gh = gl->gl_req_gh; | 942 | ret = sdp->sd_lockstruct.ls_ops->lm_unlock(lock, cur_state); |
894 | 943 | return ret; | |
895 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | ||
896 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); | ||
897 | gfs2_assert_warn(sdp, !ret); | ||
898 | |||
899 | state_change(gl, LM_ST_UNLOCKED); | ||
900 | |||
901 | if (glops->go_inval) | ||
902 | glops->go_inval(gl, DIO_METADATA); | ||
903 | |||
904 | if (gh) { | ||
905 | spin_lock(&gl->gl_spin); | ||
906 | list_del_init(&gh->gh_list); | ||
907 | gh->gh_error = 0; | ||
908 | spin_unlock(&gl->gl_spin); | ||
909 | } | ||
910 | |||
911 | spin_lock(&gl->gl_spin); | ||
912 | gfs2_demote_wake(gl); | ||
913 | gl->gl_req_gh = NULL; | ||
914 | gl->gl_req_bh = NULL; | ||
915 | clear_bit(GLF_LOCK, &gl->gl_flags); | ||
916 | spin_unlock(&gl->gl_spin); | ||
917 | |||
918 | gfs2_glock_put(gl); | ||
919 | |||
920 | if (gh) | ||
921 | gfs2_holder_wake(gh); | ||
922 | } | 944 | } |
923 | 945 | ||
924 | /** | 946 | /** |
@@ -935,13 +957,14 @@ static void gfs2_glock_drop_th(struct gfs2_glock *gl) | |||
935 | 957 | ||
936 | if (glops->go_xmote_th) | 958 | if (glops->go_xmote_th) |
937 | glops->go_xmote_th(gl); | 959 | glops->go_xmote_th(gl); |
960 | if (glops->go_inval) | ||
961 | glops->go_inval(gl, DIO_METADATA); | ||
938 | 962 | ||
939 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | 963 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); |
940 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); | 964 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); |
941 | gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED); | 965 | gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED); |
942 | 966 | ||
943 | gfs2_glock_hold(gl); | 967 | gfs2_glock_hold(gl); |
944 | gl->gl_req_bh = drop_bh; | ||
945 | 968 | ||
946 | ret = gfs2_lm_unlock(sdp, gl->gl_lock, gl->gl_state); | 969 | ret = gfs2_lm_unlock(sdp, gl->gl_lock, gl->gl_state); |
947 | 970 | ||
@@ -964,16 +987,17 @@ static void gfs2_glock_drop_th(struct gfs2_glock *gl) | |||
964 | static void do_cancels(struct gfs2_holder *gh) | 987 | static void do_cancels(struct gfs2_holder *gh) |
965 | { | 988 | { |
966 | struct gfs2_glock *gl = gh->gh_gl; | 989 | struct gfs2_glock *gl = gh->gh_gl; |
990 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
967 | 991 | ||
968 | spin_lock(&gl->gl_spin); | 992 | spin_lock(&gl->gl_spin); |
969 | 993 | ||
970 | while (gl->gl_req_gh != gh && | 994 | while (gl->gl_req_gh != gh && |
971 | !test_bit(HIF_HOLDER, &gh->gh_iflags) && | 995 | !test_bit(HIF_HOLDER, &gh->gh_iflags) && |
972 | !list_empty(&gh->gh_list)) { | 996 | !list_empty(&gh->gh_list)) { |
973 | if (gl->gl_req_bh && !(gl->gl_req_gh && | 997 | if (!(gl->gl_req_gh && (gl->gl_req_gh->gh_flags & GL_NOCANCEL))) { |
974 | (gl->gl_req_gh->gh_flags & GL_NOCANCEL))) { | ||
975 | spin_unlock(&gl->gl_spin); | 998 | spin_unlock(&gl->gl_spin); |
976 | gfs2_lm_cancel(gl->gl_sbd, gl->gl_lock); | 999 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) |
1000 | sdp->sd_lockstruct.ls_ops->lm_cancel(gl->gl_lock); | ||
977 | msleep(100); | 1001 | msleep(100); |
978 | spin_lock(&gl->gl_spin); | 1002 | spin_lock(&gl->gl_spin); |
979 | } else { | 1003 | } else { |
@@ -1041,7 +1065,6 @@ static int glock_wait_internal(struct gfs2_holder *gh) | |||
1041 | 1065 | ||
1042 | spin_lock(&gl->gl_spin); | 1066 | spin_lock(&gl->gl_spin); |
1043 | gl->gl_req_gh = NULL; | 1067 | gl->gl_req_gh = NULL; |
1044 | gl->gl_req_bh = NULL; | ||
1045 | clear_bit(GLF_LOCK, &gl->gl_flags); | 1068 | clear_bit(GLF_LOCK, &gl->gl_flags); |
1046 | run_queue(gl); | 1069 | run_queue(gl); |
1047 | spin_unlock(&gl->gl_spin); | 1070 | spin_unlock(&gl->gl_spin); |
@@ -1428,6 +1451,14 @@ void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs) | |||
1428 | gfs2_glock_dq_uninit(&ghs[x]); | 1451 | gfs2_glock_dq_uninit(&ghs[x]); |
1429 | } | 1452 | } |
1430 | 1453 | ||
1454 | static int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, void *lock, char **lvbp) | ||
1455 | { | ||
1456 | int error = -EIO; | ||
1457 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
1458 | error = sdp->sd_lockstruct.ls_ops->lm_hold_lvb(lock, lvbp); | ||
1459 | return error; | ||
1460 | } | ||
1461 | |||
1431 | /** | 1462 | /** |
1432 | * gfs2_lvb_hold - attach a LVB from a glock | 1463 | * gfs2_lvb_hold - attach a LVB from a glock |
1433 | * @gl: The glock in question | 1464 | * @gl: The glock in question |
@@ -1463,12 +1494,15 @@ int gfs2_lvb_hold(struct gfs2_glock *gl) | |||
1463 | 1494 | ||
1464 | void gfs2_lvb_unhold(struct gfs2_glock *gl) | 1495 | void gfs2_lvb_unhold(struct gfs2_glock *gl) |
1465 | { | 1496 | { |
1497 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
1498 | |||
1466 | gfs2_glock_hold(gl); | 1499 | gfs2_glock_hold(gl); |
1467 | gfs2_glmutex_lock(gl); | 1500 | gfs2_glmutex_lock(gl); |
1468 | 1501 | ||
1469 | gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_lvb_count) > 0); | 1502 | gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_lvb_count) > 0); |
1470 | if (atomic_dec_and_test(&gl->gl_lvb_count)) { | 1503 | if (atomic_dec_and_test(&gl->gl_lvb_count)) { |
1471 | gfs2_lm_unhold_lvb(gl->gl_sbd, gl->gl_lock, gl->gl_lvb); | 1504 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) |
1505 | sdp->sd_lockstruct.ls_ops->lm_unhold_lvb(gl->gl_lock, gl->gl_lvb); | ||
1472 | gl->gl_lvb = NULL; | 1506 | gl->gl_lvb = NULL; |
1473 | gfs2_glock_put(gl); | 1507 | gfs2_glock_put(gl); |
1474 | } | 1508 | } |
@@ -1534,8 +1568,7 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data) | |||
1534 | gl = gfs2_glock_find(sdp, &async->lc_name); | 1568 | gl = gfs2_glock_find(sdp, &async->lc_name); |
1535 | if (gfs2_assert_warn(sdp, gl)) | 1569 | if (gfs2_assert_warn(sdp, gl)) |
1536 | return; | 1570 | return; |
1537 | if (!gfs2_assert_warn(sdp, gl->gl_req_bh)) | 1571 | xmote_bh(gl, async->lc_ret); |
1538 | gl->gl_req_bh(gl, async->lc_ret); | ||
1539 | if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) | 1572 | if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) |
1540 | gfs2_glock_put(gl); | 1573 | gfs2_glock_put(gl); |
1541 | up_read(&gfs2_umount_flush_sem); | 1574 | up_read(&gfs2_umount_flush_sem); |
@@ -1594,10 +1627,10 @@ void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) | |||
1594 | gfs2_glock_hold(gl); | 1627 | gfs2_glock_hold(gl); |
1595 | list_add(&gl->gl_reclaim, &sdp->sd_reclaim_list); | 1628 | list_add(&gl->gl_reclaim, &sdp->sd_reclaim_list); |
1596 | atomic_inc(&sdp->sd_reclaim_count); | 1629 | atomic_inc(&sdp->sd_reclaim_count); |
1597 | } | 1630 | spin_unlock(&sdp->sd_reclaim_lock); |
1598 | spin_unlock(&sdp->sd_reclaim_lock); | 1631 | wake_up(&sdp->sd_reclaim_wq); |
1599 | 1632 | } else | |
1600 | wake_up(&sdp->sd_reclaim_wq); | 1633 | spin_unlock(&sdp->sd_reclaim_lock); |
1601 | } | 1634 | } |
1602 | 1635 | ||
1603 | /** | 1636 | /** |
@@ -1897,7 +1930,6 @@ static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl) | |||
1897 | print_dbg(gi, " gl_owner = -1\n"); | 1930 | print_dbg(gi, " gl_owner = -1\n"); |
1898 | print_dbg(gi, " gl_ip = %lu\n", gl->gl_ip); | 1931 | print_dbg(gi, " gl_ip = %lu\n", gl->gl_ip); |
1899 | print_dbg(gi, " req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no"); | 1932 | print_dbg(gi, " req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no"); |
1900 | print_dbg(gi, " req_bh = %s\n", (gl->gl_req_bh) ? "yes" : "no"); | ||
1901 | print_dbg(gi, " lvb_count = %d\n", atomic_read(&gl->gl_lvb_count)); | 1933 | print_dbg(gi, " lvb_count = %d\n", atomic_read(&gl->gl_lvb_count)); |
1902 | print_dbg(gi, " object = %s\n", (gl->gl_object) ? "yes" : "no"); | 1934 | print_dbg(gi, " object = %s\n", (gl->gl_object) ? "yes" : "no"); |
1903 | print_dbg(gi, " reclaim = %s\n", | 1935 | print_dbg(gi, " reclaim = %s\n", |
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 2f9c6d136b37..cdad3e6f8150 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h | |||
@@ -32,24 +32,23 @@ | |||
32 | #define GLR_TRYFAILED 13 | 32 | #define GLR_TRYFAILED 13 |
33 | #define GLR_CANCELED 14 | 33 | #define GLR_CANCELED 14 |
34 | 34 | ||
35 | static inline int gfs2_glock_is_locked_by_me(struct gfs2_glock *gl) | 35 | static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl) |
36 | { | 36 | { |
37 | struct gfs2_holder *gh; | 37 | struct gfs2_holder *gh; |
38 | int locked = 0; | ||
39 | struct pid *pid; | 38 | struct pid *pid; |
40 | 39 | ||
41 | /* Look in glock's list of holders for one with current task as owner */ | 40 | /* Look in glock's list of holders for one with current task as owner */ |
42 | spin_lock(&gl->gl_spin); | 41 | spin_lock(&gl->gl_spin); |
43 | pid = task_pid(current); | 42 | pid = task_pid(current); |
44 | list_for_each_entry(gh, &gl->gl_holders, gh_list) { | 43 | list_for_each_entry(gh, &gl->gl_holders, gh_list) { |
45 | if (gh->gh_owner_pid == pid) { | 44 | if (gh->gh_owner_pid == pid) |
46 | locked = 1; | 45 | goto out; |
47 | break; | ||
48 | } | ||
49 | } | 46 | } |
47 | gh = NULL; | ||
48 | out: | ||
50 | spin_unlock(&gl->gl_spin); | 49 | spin_unlock(&gl->gl_spin); |
51 | 50 | ||
52 | return locked; | 51 | return gh; |
53 | } | 52 | } |
54 | 53 | ||
55 | static inline int gfs2_glock_is_held_excl(struct gfs2_glock *gl) | 54 | static inline int gfs2_glock_is_held_excl(struct gfs2_glock *gl) |
@@ -79,7 +78,6 @@ static inline int gfs2_glock_is_blocking(struct gfs2_glock *gl) | |||
79 | int gfs2_glock_get(struct gfs2_sbd *sdp, | 78 | int gfs2_glock_get(struct gfs2_sbd *sdp, |
80 | u64 number, const struct gfs2_glock_operations *glops, | 79 | u64 number, const struct gfs2_glock_operations *glops, |
81 | int create, struct gfs2_glock **glp); | 80 | int create, struct gfs2_glock **glp); |
82 | void gfs2_glock_hold(struct gfs2_glock *gl); | ||
83 | int gfs2_glock_put(struct gfs2_glock *gl); | 81 | int gfs2_glock_put(struct gfs2_glock *gl); |
84 | void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, | 82 | void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, |
85 | struct gfs2_holder *gh); | 83 | struct gfs2_holder *gh); |
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index c663b7a0f410..d31badadef8f 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
4 | * | 4 | * |
5 | * This copyrighted material is made available to anyone wishing to use, | 5 | * This copyrighted material is made available to anyone wishing to use, |
6 | * modify, copy, or redistribute it subject to the terms and conditions | 6 | * modify, copy, or redistribute it subject to the terms and conditions |
@@ -126,7 +126,13 @@ static void meta_go_inval(struct gfs2_glock *gl, int flags) | |||
126 | return; | 126 | return; |
127 | 127 | ||
128 | gfs2_meta_inval(gl); | 128 | gfs2_meta_inval(gl); |
129 | gl->gl_vn++; | 129 | if (gl->gl_object == GFS2_I(gl->gl_sbd->sd_rindex)) |
130 | gl->gl_sbd->sd_rindex_uptodate = 0; | ||
131 | else if (gl->gl_ops == &gfs2_rgrp_glops && gl->gl_object) { | ||
132 | struct gfs2_rgrpd *rgd = (struct gfs2_rgrpd *)gl->gl_object; | ||
133 | |||
134 | rgd->rd_flags &= ~GFS2_RDF_UPTODATE; | ||
135 | } | ||
130 | } | 136 | } |
131 | 137 | ||
132 | /** | 138 | /** |
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 525dcae352d6..9c2c0b90b22a 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
3 | * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
4 | * | 4 | * |
5 | * This copyrighted material is made available to anyone wishing to use, | 5 | * This copyrighted material is made available to anyone wishing to use, |
6 | * modify, copy, or redistribute it subject to the terms and conditions | 6 | * modify, copy, or redistribute it subject to the terms and conditions |
@@ -44,7 +44,6 @@ struct gfs2_log_header_host { | |||
44 | 44 | ||
45 | struct gfs2_log_operations { | 45 | struct gfs2_log_operations { |
46 | void (*lo_add) (struct gfs2_sbd *sdp, struct gfs2_log_element *le); | 46 | void (*lo_add) (struct gfs2_sbd *sdp, struct gfs2_log_element *le); |
47 | void (*lo_incore_commit) (struct gfs2_sbd *sdp, struct gfs2_trans *tr); | ||
48 | void (*lo_before_commit) (struct gfs2_sbd *sdp); | 47 | void (*lo_before_commit) (struct gfs2_sbd *sdp); |
49 | void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai); | 48 | void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai); |
50 | void (*lo_before_scan) (struct gfs2_jdesc *jd, | 49 | void (*lo_before_scan) (struct gfs2_jdesc *jd, |
@@ -70,7 +69,6 @@ struct gfs2_bitmap { | |||
70 | }; | 69 | }; |
71 | 70 | ||
72 | struct gfs2_rgrp_host { | 71 | struct gfs2_rgrp_host { |
73 | u32 rg_flags; | ||
74 | u32 rg_free; | 72 | u32 rg_free; |
75 | u32 rg_dinodes; | 73 | u32 rg_dinodes; |
76 | u64 rg_igeneration; | 74 | u64 rg_igeneration; |
@@ -87,17 +85,17 @@ struct gfs2_rgrpd { | |||
87 | u32 rd_data; /* num of data blocks in rgrp */ | 85 | u32 rd_data; /* num of data blocks in rgrp */ |
88 | u32 rd_bitbytes; /* number of bytes in data bitmaps */ | 86 | u32 rd_bitbytes; /* number of bytes in data bitmaps */ |
89 | struct gfs2_rgrp_host rd_rg; | 87 | struct gfs2_rgrp_host rd_rg; |
90 | u64 rd_rg_vn; | ||
91 | struct gfs2_bitmap *rd_bits; | 88 | struct gfs2_bitmap *rd_bits; |
92 | unsigned int rd_bh_count; | 89 | unsigned int rd_bh_count; |
93 | struct mutex rd_mutex; | 90 | struct mutex rd_mutex; |
94 | u32 rd_free_clone; | 91 | u32 rd_free_clone; |
95 | struct gfs2_log_element rd_le; | 92 | struct gfs2_log_element rd_le; |
96 | u32 rd_last_alloc_data; | 93 | u32 rd_last_alloc; |
97 | u32 rd_last_alloc_meta; | ||
98 | struct gfs2_sbd *rd_sbd; | 94 | struct gfs2_sbd *rd_sbd; |
99 | unsigned long rd_flags; | 95 | unsigned char rd_flags; |
100 | #define GFS2_RDF_CHECK 0x0001 /* Need to check for unlinked inodes */ | 96 | #define GFS2_RDF_CHECK 0x01 /* Need to check for unlinked inodes */ |
97 | #define GFS2_RDF_NOALLOC 0x02 /* rg prohibits allocation */ | ||
98 | #define GFS2_RDF_UPTODATE 0x04 /* rg is up to date */ | ||
101 | }; | 99 | }; |
102 | 100 | ||
103 | enum gfs2_state_bits { | 101 | enum gfs2_state_bits { |
@@ -168,6 +166,8 @@ enum { | |||
168 | GLF_DIRTY = 5, | 166 | GLF_DIRTY = 5, |
169 | GLF_DEMOTE_IN_PROGRESS = 6, | 167 | GLF_DEMOTE_IN_PROGRESS = 6, |
170 | GLF_LFLUSH = 7, | 168 | GLF_LFLUSH = 7, |
169 | GLF_WAITERS2 = 8, | ||
170 | GLF_CONV_DEADLK = 9, | ||
171 | }; | 171 | }; |
172 | 172 | ||
173 | struct gfs2_glock { | 173 | struct gfs2_glock { |
@@ -187,18 +187,15 @@ struct gfs2_glock { | |||
187 | struct list_head gl_holders; | 187 | struct list_head gl_holders; |
188 | struct list_head gl_waiters1; /* HIF_MUTEX */ | 188 | struct list_head gl_waiters1; /* HIF_MUTEX */ |
189 | struct list_head gl_waiters3; /* HIF_PROMOTE */ | 189 | struct list_head gl_waiters3; /* HIF_PROMOTE */ |
190 | int gl_waiters2; /* GIF_DEMOTE */ | ||
191 | 190 | ||
192 | const struct gfs2_glock_operations *gl_ops; | 191 | const struct gfs2_glock_operations *gl_ops; |
193 | 192 | ||
194 | struct gfs2_holder *gl_req_gh; | 193 | struct gfs2_holder *gl_req_gh; |
195 | gfs2_glop_bh_t gl_req_bh; | ||
196 | 194 | ||
197 | void *gl_lock; | 195 | void *gl_lock; |
198 | char *gl_lvb; | 196 | char *gl_lvb; |
199 | atomic_t gl_lvb_count; | 197 | atomic_t gl_lvb_count; |
200 | 198 | ||
201 | u64 gl_vn; | ||
202 | unsigned long gl_stamp; | 199 | unsigned long gl_stamp; |
203 | unsigned long gl_tchange; | 200 | unsigned long gl_tchange; |
204 | void *gl_object; | 201 | void *gl_object; |
@@ -213,6 +210,8 @@ struct gfs2_glock { | |||
213 | struct delayed_work gl_work; | 210 | struct delayed_work gl_work; |
214 | }; | 211 | }; |
215 | 212 | ||
213 | #define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */ | ||
214 | |||
216 | struct gfs2_alloc { | 215 | struct gfs2_alloc { |
217 | /* Quota stuff */ | 216 | /* Quota stuff */ |
218 | 217 | ||
@@ -241,14 +240,9 @@ enum { | |||
241 | 240 | ||
242 | struct gfs2_dinode_host { | 241 | struct gfs2_dinode_host { |
243 | u64 di_size; /* number of bytes in file */ | 242 | u64 di_size; /* number of bytes in file */ |
244 | u64 di_blocks; /* number of blocks in file */ | ||
245 | u64 di_goal_meta; /* rgrp to alloc from next */ | ||
246 | u64 di_goal_data; /* data block goal */ | ||
247 | u64 di_generation; /* generation number for NFS */ | 243 | u64 di_generation; /* generation number for NFS */ |
248 | u32 di_flags; /* GFS2_DIF_... */ | 244 | u32 di_flags; /* GFS2_DIF_... */ |
249 | u16 di_height; /* height of metadata */ | ||
250 | /* These only apply to directories */ | 245 | /* These only apply to directories */ |
251 | u16 di_depth; /* Number of bits in the table */ | ||
252 | u32 di_entries; /* The number of entries in the directory */ | 246 | u32 di_entries; /* The number of entries in the directory */ |
253 | u64 di_eattr; /* extended attribute block number */ | 247 | u64 di_eattr; /* extended attribute block number */ |
254 | }; | 248 | }; |
@@ -265,9 +259,10 @@ struct gfs2_inode { | |||
265 | struct gfs2_holder i_iopen_gh; | 259 | struct gfs2_holder i_iopen_gh; |
266 | struct gfs2_holder i_gh; /* for prepare/commit_write only */ | 260 | struct gfs2_holder i_gh; /* for prepare/commit_write only */ |
267 | struct gfs2_alloc *i_alloc; | 261 | struct gfs2_alloc *i_alloc; |
268 | u64 i_last_rg_alloc; | 262 | u64 i_goal; /* goal block for allocations */ |
269 | |||
270 | struct rw_semaphore i_rw_mutex; | 263 | struct rw_semaphore i_rw_mutex; |
264 | u8 i_height; | ||
265 | u8 i_depth; | ||
271 | }; | 266 | }; |
272 | 267 | ||
273 | /* | 268 | /* |
@@ -490,9 +485,9 @@ struct gfs2_sbd { | |||
490 | u32 sd_qc_per_block; | 485 | u32 sd_qc_per_block; |
491 | u32 sd_max_dirres; /* Max blocks needed to add a directory entry */ | 486 | u32 sd_max_dirres; /* Max blocks needed to add a directory entry */ |
492 | u32 sd_max_height; /* Max height of a file's metadata tree */ | 487 | u32 sd_max_height; /* Max height of a file's metadata tree */ |
493 | u64 sd_heightsize[GFS2_MAX_META_HEIGHT]; | 488 | u64 sd_heightsize[GFS2_MAX_META_HEIGHT + 1]; |
494 | u32 sd_max_jheight; /* Max height of journaled file's meta tree */ | 489 | u32 sd_max_jheight; /* Max height of journaled file's meta tree */ |
495 | u64 sd_jheightsize[GFS2_MAX_META_HEIGHT]; | 490 | u64 sd_jheightsize[GFS2_MAX_META_HEIGHT + 1]; |
496 | 491 | ||
497 | struct gfs2_args sd_args; /* Mount arguments */ | 492 | struct gfs2_args sd_args; /* Mount arguments */ |
498 | struct gfs2_tune sd_tune; /* Filesystem tuning structure */ | 493 | struct gfs2_tune sd_tune; /* Filesystem tuning structure */ |
@@ -533,7 +528,7 @@ struct gfs2_sbd { | |||
533 | 528 | ||
534 | /* Resource group stuff */ | 529 | /* Resource group stuff */ |
535 | 530 | ||
536 | u64 sd_rindex_vn; | 531 | int sd_rindex_uptodate; |
537 | spinlock_t sd_rindex_spin; | 532 | spinlock_t sd_rindex_spin; |
538 | struct mutex sd_rindex_mutex; | 533 | struct mutex sd_rindex_mutex; |
539 | struct list_head sd_rindex_list; | 534 | struct list_head sd_rindex_list; |
@@ -637,9 +632,6 @@ struct gfs2_sbd { | |||
637 | 632 | ||
638 | /* Counters */ | 633 | /* Counters */ |
639 | 634 | ||
640 | atomic_t sd_glock_count; | ||
641 | atomic_t sd_glock_held_count; | ||
642 | atomic_t sd_inode_count; | ||
643 | atomic_t sd_reclaimed; | 635 | atomic_t sd_reclaimed; |
644 | 636 | ||
645 | char sd_fsname[GFS2_FSNAME_LEN]; | 637 | char sd_fsname[GFS2_FSNAME_LEN]; |
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 37725ade3c51..3a9ef526c308 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
4 | * | 4 | * |
5 | * This copyrighted material is made available to anyone wishing to use, | 5 | * This copyrighted material is made available to anyone wishing to use, |
6 | * modify, copy, or redistribute it subject to the terms and conditions | 6 | * modify, copy, or redistribute it subject to the terms and conditions |
@@ -149,7 +149,8 @@ void gfs2_set_iop(struct inode *inode) | |||
149 | } else if (S_ISLNK(mode)) { | 149 | } else if (S_ISLNK(mode)) { |
150 | inode->i_op = &gfs2_symlink_iops; | 150 | inode->i_op = &gfs2_symlink_iops; |
151 | } else { | 151 | } else { |
152 | inode->i_op = &gfs2_dev_iops; | 152 | inode->i_op = &gfs2_file_iops; |
153 | init_special_inode(inode, inode->i_mode, inode->i_rdev); | ||
153 | } | 154 | } |
154 | 155 | ||
155 | unlock_new_inode(inode); | 156 | unlock_new_inode(inode); |
@@ -248,12 +249,10 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) | |||
248 | { | 249 | { |
249 | struct gfs2_dinode_host *di = &ip->i_di; | 250 | struct gfs2_dinode_host *di = &ip->i_di; |
250 | const struct gfs2_dinode *str = buf; | 251 | const struct gfs2_dinode *str = buf; |
252 | u16 height, depth; | ||
251 | 253 | ||
252 | if (ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)) { | 254 | if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) |
253 | if (gfs2_consist_inode(ip)) | 255 | goto corrupt; |
254 | gfs2_dinode_print(ip); | ||
255 | return -EIO; | ||
256 | } | ||
257 | ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino); | 256 | ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino); |
258 | ip->i_inode.i_mode = be32_to_cpu(str->di_mode); | 257 | ip->i_inode.i_mode = be32_to_cpu(str->di_mode); |
259 | ip->i_inode.i_rdev = 0; | 258 | ip->i_inode.i_rdev = 0; |
@@ -275,8 +274,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) | |||
275 | ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink); | 274 | ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink); |
276 | di->di_size = be64_to_cpu(str->di_size); | 275 | di->di_size = be64_to_cpu(str->di_size); |
277 | i_size_write(&ip->i_inode, di->di_size); | 276 | i_size_write(&ip->i_inode, di->di_size); |
278 | di->di_blocks = be64_to_cpu(str->di_blocks); | 277 | gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); |
279 | gfs2_set_inode_blocks(&ip->i_inode); | ||
280 | ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime); | 278 | ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime); |
281 | ip->i_inode.i_atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); | 279 | ip->i_inode.i_atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); |
282 | ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); | 280 | ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); |
@@ -284,15 +282,20 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) | |||
284 | ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); | 282 | ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); |
285 | ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec); | 283 | ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec); |
286 | 284 | ||
287 | di->di_goal_meta = be64_to_cpu(str->di_goal_meta); | 285 | ip->i_goal = be64_to_cpu(str->di_goal_meta); |
288 | di->di_goal_data = be64_to_cpu(str->di_goal_data); | ||
289 | di->di_generation = be64_to_cpu(str->di_generation); | 286 | di->di_generation = be64_to_cpu(str->di_generation); |
290 | 287 | ||
291 | di->di_flags = be32_to_cpu(str->di_flags); | 288 | di->di_flags = be32_to_cpu(str->di_flags); |
292 | gfs2_set_inode_flags(&ip->i_inode); | 289 | gfs2_set_inode_flags(&ip->i_inode); |
293 | di->di_height = be16_to_cpu(str->di_height); | 290 | height = be16_to_cpu(str->di_height); |
294 | 291 | if (unlikely(height > GFS2_MAX_META_HEIGHT)) | |
295 | di->di_depth = be16_to_cpu(str->di_depth); | 292 | goto corrupt; |
293 | ip->i_height = (u8)height; | ||
294 | |||
295 | depth = be16_to_cpu(str->di_depth); | ||
296 | if (unlikely(depth > GFS2_DIR_MAX_DEPTH)) | ||
297 | goto corrupt; | ||
298 | ip->i_depth = (u8)depth; | ||
296 | di->di_entries = be32_to_cpu(str->di_entries); | 299 | di->di_entries = be32_to_cpu(str->di_entries); |
297 | 300 | ||
298 | di->di_eattr = be64_to_cpu(str->di_eattr); | 301 | di->di_eattr = be64_to_cpu(str->di_eattr); |
@@ -300,6 +303,10 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) | |||
300 | gfs2_set_aops(&ip->i_inode); | 303 | gfs2_set_aops(&ip->i_inode); |
301 | 304 | ||
302 | return 0; | 305 | return 0; |
306 | corrupt: | ||
307 | if (gfs2_consist_inode(ip)) | ||
308 | gfs2_dinode_print(ip); | ||
309 | return -EIO; | ||
303 | } | 310 | } |
304 | 311 | ||
305 | /** | 312 | /** |
@@ -337,13 +344,15 @@ int gfs2_dinode_dealloc(struct gfs2_inode *ip) | |||
337 | struct gfs2_rgrpd *rgd; | 344 | struct gfs2_rgrpd *rgd; |
338 | int error; | 345 | int error; |
339 | 346 | ||
340 | if (ip->i_di.di_blocks != 1) { | 347 | if (gfs2_get_inode_blocks(&ip->i_inode) != 1) { |
341 | if (gfs2_consist_inode(ip)) | 348 | if (gfs2_consist_inode(ip)) |
342 | gfs2_dinode_print(ip); | 349 | gfs2_dinode_print(ip); |
343 | return -EIO; | 350 | return -EIO; |
344 | } | 351 | } |
345 | 352 | ||
346 | al = gfs2_alloc_get(ip); | 353 | al = gfs2_alloc_get(ip); |
354 | if (!al) | ||
355 | return -ENOMEM; | ||
347 | 356 | ||
348 | error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | 357 | error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); |
349 | if (error) | 358 | if (error) |
@@ -487,7 +496,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, | |||
487 | return dir; | 496 | return dir; |
488 | } | 497 | } |
489 | 498 | ||
490 | if (gfs2_glock_is_locked_by_me(dip->i_gl) == 0) { | 499 | if (gfs2_glock_is_locked_by_me(dip->i_gl) == NULL) { |
491 | error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); | 500 | error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); |
492 | if (error) | 501 | if (error) |
493 | return ERR_PTR(error); | 502 | return ERR_PTR(error); |
@@ -818,7 +827,8 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, | |||
818 | int error; | 827 | int error; |
819 | 828 | ||
820 | munge_mode_uid_gid(dip, &mode, &uid, &gid); | 829 | munge_mode_uid_gid(dip, &mode, &uid, &gid); |
821 | gfs2_alloc_get(dip); | 830 | if (!gfs2_alloc_get(dip)) |
831 | return -ENOMEM; | ||
822 | 832 | ||
823 | error = gfs2_quota_lock(dip, uid, gid); | 833 | error = gfs2_quota_lock(dip, uid, gid); |
824 | if (error) | 834 | if (error) |
@@ -853,6 +863,8 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, | |||
853 | int error; | 863 | int error; |
854 | 864 | ||
855 | al = gfs2_alloc_get(dip); | 865 | al = gfs2_alloc_get(dip); |
866 | if (!al) | ||
867 | return -ENOMEM; | ||
856 | 868 | ||
857 | error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | 869 | error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); |
858 | if (error) | 870 | if (error) |
@@ -1219,7 +1231,7 @@ int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len) | |||
1219 | 1231 | ||
1220 | x = ip->i_di.di_size + 1; | 1232 | x = ip->i_di.di_size + 1; |
1221 | if (x > *len) { | 1233 | if (x > *len) { |
1222 | *buf = kmalloc(x, GFP_KERNEL); | 1234 | *buf = kmalloc(x, GFP_NOFS); |
1223 | if (!*buf) { | 1235 | if (!*buf) { |
1224 | error = -ENOMEM; | 1236 | error = -ENOMEM; |
1225 | goto out_brelse; | 1237 | goto out_brelse; |
@@ -1391,21 +1403,21 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) | |||
1391 | str->di_gid = cpu_to_be32(ip->i_inode.i_gid); | 1403 | str->di_gid = cpu_to_be32(ip->i_inode.i_gid); |
1392 | str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); | 1404 | str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); |
1393 | str->di_size = cpu_to_be64(di->di_size); | 1405 | str->di_size = cpu_to_be64(di->di_size); |
1394 | str->di_blocks = cpu_to_be64(di->di_blocks); | 1406 | str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); |
1395 | str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); | 1407 | str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); |
1396 | str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); | 1408 | str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); |
1397 | str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec); | 1409 | str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec); |
1398 | 1410 | ||
1399 | str->di_goal_meta = cpu_to_be64(di->di_goal_meta); | 1411 | str->di_goal_meta = cpu_to_be64(ip->i_goal); |
1400 | str->di_goal_data = cpu_to_be64(di->di_goal_data); | 1412 | str->di_goal_data = cpu_to_be64(ip->i_goal); |
1401 | str->di_generation = cpu_to_be64(di->di_generation); | 1413 | str->di_generation = cpu_to_be64(di->di_generation); |
1402 | 1414 | ||
1403 | str->di_flags = cpu_to_be32(di->di_flags); | 1415 | str->di_flags = cpu_to_be32(di->di_flags); |
1404 | str->di_height = cpu_to_be16(di->di_height); | 1416 | str->di_height = cpu_to_be16(ip->i_height); |
1405 | str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) && | 1417 | str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) && |
1406 | !(ip->i_di.di_flags & GFS2_DIF_EXHASH) ? | 1418 | !(ip->i_di.di_flags & GFS2_DIF_EXHASH) ? |
1407 | GFS2_FORMAT_DE : 0); | 1419 | GFS2_FORMAT_DE : 0); |
1408 | str->di_depth = cpu_to_be16(di->di_depth); | 1420 | str->di_depth = cpu_to_be16(ip->i_depth); |
1409 | str->di_entries = cpu_to_be32(di->di_entries); | 1421 | str->di_entries = cpu_to_be32(di->di_entries); |
1410 | 1422 | ||
1411 | str->di_eattr = cpu_to_be64(di->di_eattr); | 1423 | str->di_eattr = cpu_to_be64(di->di_eattr); |
@@ -1423,15 +1435,13 @@ void gfs2_dinode_print(const struct gfs2_inode *ip) | |||
1423 | printk(KERN_INFO " no_addr = %llu\n", | 1435 | printk(KERN_INFO " no_addr = %llu\n", |
1424 | (unsigned long long)ip->i_no_addr); | 1436 | (unsigned long long)ip->i_no_addr); |
1425 | printk(KERN_INFO " di_size = %llu\n", (unsigned long long)di->di_size); | 1437 | printk(KERN_INFO " di_size = %llu\n", (unsigned long long)di->di_size); |
1426 | printk(KERN_INFO " di_blocks = %llu\n", | 1438 | printk(KERN_INFO " blocks = %llu\n", |
1427 | (unsigned long long)di->di_blocks); | 1439 | (unsigned long long)gfs2_get_inode_blocks(&ip->i_inode)); |
1428 | printk(KERN_INFO " di_goal_meta = %llu\n", | 1440 | printk(KERN_INFO " i_goal = %llu\n", |
1429 | (unsigned long long)di->di_goal_meta); | 1441 | (unsigned long long)ip->i_goal); |
1430 | printk(KERN_INFO " di_goal_data = %llu\n", | ||
1431 | (unsigned long long)di->di_goal_data); | ||
1432 | printk(KERN_INFO " di_flags = 0x%.8X\n", di->di_flags); | 1442 | printk(KERN_INFO " di_flags = 0x%.8X\n", di->di_flags); |
1433 | printk(KERN_INFO " di_height = %u\n", di->di_height); | 1443 | printk(KERN_INFO " i_height = %u\n", ip->i_height); |
1434 | printk(KERN_INFO " di_depth = %u\n", di->di_depth); | 1444 | printk(KERN_INFO " i_depth = %u\n", ip->i_depth); |
1435 | printk(KERN_INFO " di_entries = %u\n", di->di_entries); | 1445 | printk(KERN_INFO " di_entries = %u\n", di->di_entries); |
1436 | printk(KERN_INFO " di_eattr = %llu\n", | 1446 | printk(KERN_INFO " di_eattr = %llu\n", |
1437 | (unsigned long long)di->di_eattr); | 1447 | (unsigned long long)di->di_eattr); |
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index d44650662615..580da454b38f 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h | |||
@@ -10,9 +10,11 @@ | |||
10 | #ifndef __INODE_DOT_H__ | 10 | #ifndef __INODE_DOT_H__ |
11 | #define __INODE_DOT_H__ | 11 | #define __INODE_DOT_H__ |
12 | 12 | ||
13 | #include "util.h" | ||
14 | |||
13 | static inline int gfs2_is_stuffed(const struct gfs2_inode *ip) | 15 | static inline int gfs2_is_stuffed(const struct gfs2_inode *ip) |
14 | { | 16 | { |
15 | return !ip->i_di.di_height; | 17 | return !ip->i_height; |
16 | } | 18 | } |
17 | 19 | ||
18 | static inline int gfs2_is_jdata(const struct gfs2_inode *ip) | 20 | static inline int gfs2_is_jdata(const struct gfs2_inode *ip) |
@@ -37,13 +39,25 @@ static inline int gfs2_is_dir(const struct gfs2_inode *ip) | |||
37 | return S_ISDIR(ip->i_inode.i_mode); | 39 | return S_ISDIR(ip->i_inode.i_mode); |
38 | } | 40 | } |
39 | 41 | ||
40 | static inline void gfs2_set_inode_blocks(struct inode *inode) | 42 | static inline void gfs2_set_inode_blocks(struct inode *inode, u64 blocks) |
43 | { | ||
44 | inode->i_blocks = blocks << | ||
45 | (GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT); | ||
46 | } | ||
47 | |||
48 | static inline u64 gfs2_get_inode_blocks(const struct inode *inode) | ||
41 | { | 49 | { |
42 | struct gfs2_inode *ip = GFS2_I(inode); | 50 | return inode->i_blocks >> |
43 | inode->i_blocks = ip->i_di.di_blocks << | ||
44 | (GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT); | 51 | (GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT); |
45 | } | 52 | } |
46 | 53 | ||
54 | static inline void gfs2_add_inode_blocks(struct inode *inode, s64 change) | ||
55 | { | ||
56 | gfs2_assert(GFS2_SB(inode), (change >= 0 || inode->i_blocks > -change)); | ||
57 | change *= (GFS2_SB(inode)->sd_sb.sb_bsize/GFS2_BASIC_BLOCK); | ||
58 | inode->i_blocks += change; | ||
59 | } | ||
60 | |||
47 | static inline int gfs2_check_inum(const struct gfs2_inode *ip, u64 no_addr, | 61 | static inline int gfs2_check_inum(const struct gfs2_inode *ip, u64 no_addr, |
48 | u64 no_formal_ino) | 62 | u64 no_formal_ino) |
49 | { | 63 | { |
diff --git a/fs/gfs2/lm.c b/fs/gfs2/lm.c deleted file mode 100644 index cfcc39b86a53..000000000000 --- a/fs/gfs2/lm.c +++ /dev/null | |||
@@ -1,210 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
4 | * | ||
5 | * This copyrighted material is made available to anyone wishing to use, | ||
6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
7 | * of the GNU General Public License version 2. | ||
8 | */ | ||
9 | |||
10 | #include <linux/slab.h> | ||
11 | #include <linux/spinlock.h> | ||
12 | #include <linux/completion.h> | ||
13 | #include <linux/buffer_head.h> | ||
14 | #include <linux/delay.h> | ||
15 | #include <linux/gfs2_ondisk.h> | ||
16 | #include <linux/lm_interface.h> | ||
17 | |||
18 | #include "gfs2.h" | ||
19 | #include "incore.h" | ||
20 | #include "glock.h" | ||
21 | #include "lm.h" | ||
22 | #include "super.h" | ||
23 | #include "util.h" | ||
24 | |||
25 | /** | ||
26 | * gfs2_lm_mount - mount a locking protocol | ||
27 | * @sdp: the filesystem | ||
28 | * @args: mount arguements | ||
29 | * @silent: if 1, don't complain if the FS isn't a GFS2 fs | ||
30 | * | ||
31 | * Returns: errno | ||
32 | */ | ||
33 | |||
34 | int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent) | ||
35 | { | ||
36 | char *proto = sdp->sd_proto_name; | ||
37 | char *table = sdp->sd_table_name; | ||
38 | int flags = 0; | ||
39 | int error; | ||
40 | |||
41 | if (sdp->sd_args.ar_spectator) | ||
42 | flags |= LM_MFLAG_SPECTATOR; | ||
43 | |||
44 | fs_info(sdp, "Trying to join cluster \"%s\", \"%s\"\n", proto, table); | ||
45 | |||
46 | error = gfs2_mount_lockproto(proto, table, sdp->sd_args.ar_hostdata, | ||
47 | gfs2_glock_cb, sdp, | ||
48 | GFS2_MIN_LVB_SIZE, flags, | ||
49 | &sdp->sd_lockstruct, &sdp->sd_kobj); | ||
50 | if (error) { | ||
51 | fs_info(sdp, "can't mount proto=%s, table=%s, hostdata=%s\n", | ||
52 | proto, table, sdp->sd_args.ar_hostdata); | ||
53 | goto out; | ||
54 | } | ||
55 | |||
56 | if (gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lockspace) || | ||
57 | gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_ops) || | ||
58 | gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lvb_size >= | ||
59 | GFS2_MIN_LVB_SIZE)) { | ||
60 | gfs2_unmount_lockproto(&sdp->sd_lockstruct); | ||
61 | goto out; | ||
62 | } | ||
63 | |||
64 | if (sdp->sd_args.ar_spectator) | ||
65 | snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s", table); | ||
66 | else | ||
67 | snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", table, | ||
68 | sdp->sd_lockstruct.ls_jid); | ||
69 | |||
70 | fs_info(sdp, "Joined cluster. Now mounting FS...\n"); | ||
71 | |||
72 | if ((sdp->sd_lockstruct.ls_flags & LM_LSFLAG_LOCAL) && | ||
73 | !sdp->sd_args.ar_ignore_local_fs) { | ||
74 | sdp->sd_args.ar_localflocks = 1; | ||
75 | sdp->sd_args.ar_localcaching = 1; | ||
76 | } | ||
77 | |||
78 | out: | ||
79 | return error; | ||
80 | } | ||
81 | |||
82 | void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp) | ||
83 | { | ||
84 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
85 | sdp->sd_lockstruct.ls_ops->lm_others_may_mount( | ||
86 | sdp->sd_lockstruct.ls_lockspace); | ||
87 | } | ||
88 | |||
89 | void gfs2_lm_unmount(struct gfs2_sbd *sdp) | ||
90 | { | ||
91 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
92 | gfs2_unmount_lockproto(&sdp->sd_lockstruct); | ||
93 | } | ||
94 | |||
95 | int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...) | ||
96 | { | ||
97 | va_list args; | ||
98 | |||
99 | if (test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags)) | ||
100 | return 0; | ||
101 | |||
102 | va_start(args, fmt); | ||
103 | vprintk(fmt, args); | ||
104 | va_end(args); | ||
105 | |||
106 | fs_err(sdp, "about to withdraw this file system\n"); | ||
107 | BUG_ON(sdp->sd_args.ar_debug); | ||
108 | |||
109 | fs_err(sdp, "telling LM to withdraw\n"); | ||
110 | gfs2_withdraw_lockproto(&sdp->sd_lockstruct); | ||
111 | fs_err(sdp, "withdrawn\n"); | ||
112 | dump_stack(); | ||
113 | |||
114 | return -1; | ||
115 | } | ||
116 | |||
117 | int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name, | ||
118 | void **lockp) | ||
119 | { | ||
120 | int error = -EIO; | ||
121 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
122 | error = sdp->sd_lockstruct.ls_ops->lm_get_lock( | ||
123 | sdp->sd_lockstruct.ls_lockspace, name, lockp); | ||
124 | return error; | ||
125 | } | ||
126 | |||
127 | void gfs2_lm_put_lock(struct gfs2_sbd *sdp, void *lock) | ||
128 | { | ||
129 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
130 | sdp->sd_lockstruct.ls_ops->lm_put_lock(lock); | ||
131 | } | ||
132 | |||
133 | unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock, | ||
134 | unsigned int cur_state, unsigned int req_state, | ||
135 | unsigned int flags) | ||
136 | { | ||
137 | int ret = 0; | ||
138 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
139 | ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock, cur_state, | ||
140 | req_state, flags); | ||
141 | return ret; | ||
142 | } | ||
143 | |||
144 | unsigned int gfs2_lm_unlock(struct gfs2_sbd *sdp, void *lock, | ||
145 | unsigned int cur_state) | ||
146 | { | ||
147 | int ret = 0; | ||
148 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
149 | ret = sdp->sd_lockstruct.ls_ops->lm_unlock(lock, cur_state); | ||
150 | return ret; | ||
151 | } | ||
152 | |||
153 | void gfs2_lm_cancel(struct gfs2_sbd *sdp, void *lock) | ||
154 | { | ||
155 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
156 | sdp->sd_lockstruct.ls_ops->lm_cancel(lock); | ||
157 | } | ||
158 | |||
159 | int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, void *lock, char **lvbp) | ||
160 | { | ||
161 | int error = -EIO; | ||
162 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
163 | error = sdp->sd_lockstruct.ls_ops->lm_hold_lvb(lock, lvbp); | ||
164 | return error; | ||
165 | } | ||
166 | |||
167 | void gfs2_lm_unhold_lvb(struct gfs2_sbd *sdp, void *lock, char *lvb) | ||
168 | { | ||
169 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
170 | sdp->sd_lockstruct.ls_ops->lm_unhold_lvb(lock, lvb); | ||
171 | } | ||
172 | |||
173 | int gfs2_lm_plock_get(struct gfs2_sbd *sdp, struct lm_lockname *name, | ||
174 | struct file *file, struct file_lock *fl) | ||
175 | { | ||
176 | int error = -EIO; | ||
177 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
178 | error = sdp->sd_lockstruct.ls_ops->lm_plock_get( | ||
179 | sdp->sd_lockstruct.ls_lockspace, name, file, fl); | ||
180 | return error; | ||
181 | } | ||
182 | |||
183 | int gfs2_lm_plock(struct gfs2_sbd *sdp, struct lm_lockname *name, | ||
184 | struct file *file, int cmd, struct file_lock *fl) | ||
185 | { | ||
186 | int error = -EIO; | ||
187 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
188 | error = sdp->sd_lockstruct.ls_ops->lm_plock( | ||
189 | sdp->sd_lockstruct.ls_lockspace, name, file, cmd, fl); | ||
190 | return error; | ||
191 | } | ||
192 | |||
193 | int gfs2_lm_punlock(struct gfs2_sbd *sdp, struct lm_lockname *name, | ||
194 | struct file *file, struct file_lock *fl) | ||
195 | { | ||
196 | int error = -EIO; | ||
197 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
198 | error = sdp->sd_lockstruct.ls_ops->lm_punlock( | ||
199 | sdp->sd_lockstruct.ls_lockspace, name, file, fl); | ||
200 | return error; | ||
201 | } | ||
202 | |||
203 | void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid, | ||
204 | unsigned int message) | ||
205 | { | ||
206 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
207 | sdp->sd_lockstruct.ls_ops->lm_recovery_done( | ||
208 | sdp->sd_lockstruct.ls_lockspace, jid, message); | ||
209 | } | ||
210 | |||
diff --git a/fs/gfs2/lm.h b/fs/gfs2/lm.h deleted file mode 100644 index 21cdc30ee08c..000000000000 --- a/fs/gfs2/lm.h +++ /dev/null | |||
@@ -1,42 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
4 | * | ||
5 | * This copyrighted material is made available to anyone wishing to use, | ||
6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
7 | * of the GNU General Public License version 2. | ||
8 | */ | ||
9 | |||
10 | #ifndef __LM_DOT_H__ | ||
11 | #define __LM_DOT_H__ | ||
12 | |||
13 | struct gfs2_sbd; | ||
14 | |||
15 | #define GFS2_MIN_LVB_SIZE 32 | ||
16 | |||
17 | int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent); | ||
18 | void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp); | ||
19 | void gfs2_lm_unmount(struct gfs2_sbd *sdp); | ||
20 | int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...) | ||
21 | __attribute__ ((format(printf, 2, 3))); | ||
22 | int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name, | ||
23 | void **lockp); | ||
24 | void gfs2_lm_put_lock(struct gfs2_sbd *sdp, void *lock); | ||
25 | unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock, | ||
26 | unsigned int cur_state, unsigned int req_state, | ||
27 | unsigned int flags); | ||
28 | unsigned int gfs2_lm_unlock(struct gfs2_sbd *sdp, void *lock, | ||
29 | unsigned int cur_state); | ||
30 | void gfs2_lm_cancel(struct gfs2_sbd *sdp, void *lock); | ||
31 | int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, void *lock, char **lvbp); | ||
32 | void gfs2_lm_unhold_lvb(struct gfs2_sbd *sdp, void *lock, char *lvb); | ||
33 | int gfs2_lm_plock_get(struct gfs2_sbd *sdp, struct lm_lockname *name, | ||
34 | struct file *file, struct file_lock *fl); | ||
35 | int gfs2_lm_plock(struct gfs2_sbd *sdp, struct lm_lockname *name, | ||
36 | struct file *file, int cmd, struct file_lock *fl); | ||
37 | int gfs2_lm_punlock(struct gfs2_sbd *sdp, struct lm_lockname *name, | ||
38 | struct file *file, struct file_lock *fl); | ||
39 | void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid, | ||
40 | unsigned int message); | ||
41 | |||
42 | #endif /* __LM_DOT_H__ */ | ||
diff --git a/fs/gfs2/locking/dlm/Makefile b/fs/gfs2/locking/dlm/Makefile index 89b93b6b45cf..2609bb6cd013 100644 --- a/fs/gfs2/locking/dlm/Makefile +++ b/fs/gfs2/locking/dlm/Makefile | |||
@@ -1,3 +1,3 @@ | |||
1 | obj-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o | 1 | obj-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o |
2 | lock_dlm-y := lock.o main.o mount.o sysfs.o thread.o plock.o | 2 | lock_dlm-y := lock.o main.o mount.o sysfs.o thread.o |
3 | 3 | ||
diff --git a/fs/gfs2/locking/dlm/lock.c b/fs/gfs2/locking/dlm/lock.c index 542a797ac89a..cf7ea8abec87 100644 --- a/fs/gfs2/locking/dlm/lock.c +++ b/fs/gfs2/locking/dlm/lock.c | |||
@@ -137,7 +137,8 @@ static inline unsigned int make_flags(struct gdlm_lock *lp, | |||
137 | 137 | ||
138 | /* Conversion deadlock avoidance by DLM */ | 138 | /* Conversion deadlock avoidance by DLM */ |
139 | 139 | ||
140 | if (!test_bit(LFL_FORCE_PROMOTE, &lp->flags) && | 140 | if (!(lp->ls->fsflags & LM_MFLAG_CONV_NODROP) && |
141 | !test_bit(LFL_FORCE_PROMOTE, &lp->flags) && | ||
141 | !(lkf & DLM_LKF_NOQUEUE) && | 142 | !(lkf & DLM_LKF_NOQUEUE) && |
142 | cur > DLM_LOCK_NL && req > DLM_LOCK_NL && cur != req) | 143 | cur > DLM_LOCK_NL && req > DLM_LOCK_NL && cur != req) |
143 | lkf |= DLM_LKF_CONVDEADLK; | 144 | lkf |= DLM_LKF_CONVDEADLK; |
@@ -164,7 +165,7 @@ static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name, | |||
164 | { | 165 | { |
165 | struct gdlm_lock *lp; | 166 | struct gdlm_lock *lp; |
166 | 167 | ||
167 | lp = kzalloc(sizeof(struct gdlm_lock), GFP_KERNEL); | 168 | lp = kzalloc(sizeof(struct gdlm_lock), GFP_NOFS); |
168 | if (!lp) | 169 | if (!lp) |
169 | return -ENOMEM; | 170 | return -ENOMEM; |
170 | 171 | ||
@@ -382,7 +383,7 @@ static int gdlm_add_lvb(struct gdlm_lock *lp) | |||
382 | { | 383 | { |
383 | char *lvb; | 384 | char *lvb; |
384 | 385 | ||
385 | lvb = kzalloc(GDLM_LVB_SIZE, GFP_KERNEL); | 386 | lvb = kzalloc(GDLM_LVB_SIZE, GFP_NOFS); |
386 | if (!lvb) | 387 | if (!lvb) |
387 | return -ENOMEM; | 388 | return -ENOMEM; |
388 | 389 | ||
diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h index 9e8265d28377..a243cf69c54e 100644 --- a/fs/gfs2/locking/dlm/lock_dlm.h +++ b/fs/gfs2/locking/dlm/lock_dlm.h | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <net/sock.h> | 25 | #include <net/sock.h> |
26 | 26 | ||
27 | #include <linux/dlm.h> | 27 | #include <linux/dlm.h> |
28 | #include <linux/dlm_plock.h> | ||
28 | #include <linux/lm_interface.h> | 29 | #include <linux/lm_interface.h> |
29 | 30 | ||
30 | /* | 31 | /* |
@@ -173,15 +174,9 @@ void gdlm_cancel(void *); | |||
173 | int gdlm_hold_lvb(void *, char **); | 174 | int gdlm_hold_lvb(void *, char **); |
174 | void gdlm_unhold_lvb(void *, char *); | 175 | void gdlm_unhold_lvb(void *, char *); |
175 | 176 | ||
176 | /* plock.c */ | 177 | /* mount.c */ |
178 | |||
179 | extern const struct lm_lockops gdlm_ops; | ||
177 | 180 | ||
178 | int gdlm_plock_init(void); | ||
179 | void gdlm_plock_exit(void); | ||
180 | int gdlm_plock(void *, struct lm_lockname *, struct file *, int, | ||
181 | struct file_lock *); | ||
182 | int gdlm_plock_get(void *, struct lm_lockname *, struct file *, | ||
183 | struct file_lock *); | ||
184 | int gdlm_punlock(void *, struct lm_lockname *, struct file *, | ||
185 | struct file_lock *); | ||
186 | #endif | 181 | #endif |
187 | 182 | ||
diff --git a/fs/gfs2/locking/dlm/main.c b/fs/gfs2/locking/dlm/main.c index a0e7eda643ed..b9a03a7ff801 100644 --- a/fs/gfs2/locking/dlm/main.c +++ b/fs/gfs2/locking/dlm/main.c | |||
@@ -11,8 +11,6 @@ | |||
11 | 11 | ||
12 | #include "lock_dlm.h" | 12 | #include "lock_dlm.h" |
13 | 13 | ||
14 | extern struct lm_lockops gdlm_ops; | ||
15 | |||
16 | static int __init init_lock_dlm(void) | 14 | static int __init init_lock_dlm(void) |
17 | { | 15 | { |
18 | int error; | 16 | int error; |
@@ -30,13 +28,6 @@ static int __init init_lock_dlm(void) | |||
30 | return error; | 28 | return error; |
31 | } | 29 | } |
32 | 30 | ||
33 | error = gdlm_plock_init(); | ||
34 | if (error) { | ||
35 | gdlm_sysfs_exit(); | ||
36 | gfs2_unregister_lockproto(&gdlm_ops); | ||
37 | return error; | ||
38 | } | ||
39 | |||
40 | printk(KERN_INFO | 31 | printk(KERN_INFO |
41 | "Lock_DLM (built %s %s) installed\n", __DATE__, __TIME__); | 32 | "Lock_DLM (built %s %s) installed\n", __DATE__, __TIME__); |
42 | return 0; | 33 | return 0; |
@@ -44,7 +35,6 @@ static int __init init_lock_dlm(void) | |||
44 | 35 | ||
45 | static void __exit exit_lock_dlm(void) | 36 | static void __exit exit_lock_dlm(void) |
46 | { | 37 | { |
47 | gdlm_plock_exit(); | ||
48 | gdlm_sysfs_exit(); | 38 | gdlm_sysfs_exit(); |
49 | gfs2_unregister_lockproto(&gdlm_ops); | 39 | gfs2_unregister_lockproto(&gdlm_ops); |
50 | } | 40 | } |
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c index f2efff424224..470bdf650b50 100644 --- a/fs/gfs2/locking/dlm/mount.c +++ b/fs/gfs2/locking/dlm/mount.c | |||
@@ -236,6 +236,27 @@ static void gdlm_withdraw(void *lockspace) | |||
236 | gdlm_kobject_release(ls); | 236 | gdlm_kobject_release(ls); |
237 | } | 237 | } |
238 | 238 | ||
239 | static int gdlm_plock(void *lockspace, struct lm_lockname *name, | ||
240 | struct file *file, int cmd, struct file_lock *fl) | ||
241 | { | ||
242 | struct gdlm_ls *ls = lockspace; | ||
243 | return dlm_posix_lock(ls->dlm_lockspace, name->ln_number, file, cmd, fl); | ||
244 | } | ||
245 | |||
246 | static int gdlm_punlock(void *lockspace, struct lm_lockname *name, | ||
247 | struct file *file, struct file_lock *fl) | ||
248 | { | ||
249 | struct gdlm_ls *ls = lockspace; | ||
250 | return dlm_posix_unlock(ls->dlm_lockspace, name->ln_number, file, fl); | ||
251 | } | ||
252 | |||
253 | static int gdlm_plock_get(void *lockspace, struct lm_lockname *name, | ||
254 | struct file *file, struct file_lock *fl) | ||
255 | { | ||
256 | struct gdlm_ls *ls = lockspace; | ||
257 | return dlm_posix_get(ls->dlm_lockspace, name->ln_number, file, fl); | ||
258 | } | ||
259 | |||
239 | const struct lm_lockops gdlm_ops = { | 260 | const struct lm_lockops gdlm_ops = { |
240 | .lm_proto_name = "lock_dlm", | 261 | .lm_proto_name = "lock_dlm", |
241 | .lm_mount = gdlm_mount, | 262 | .lm_mount = gdlm_mount, |
diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c index a87b09839761..8479da47049c 100644 --- a/fs/gfs2/locking/dlm/sysfs.c +++ b/fs/gfs2/locking/dlm/sysfs.c | |||
@@ -12,8 +12,6 @@ | |||
12 | 12 | ||
13 | #include "lock_dlm.h" | 13 | #include "lock_dlm.h" |
14 | 14 | ||
15 | extern struct lm_lockops gdlm_ops; | ||
16 | |||
17 | static ssize_t proto_name_show(struct gdlm_ls *ls, char *buf) | 15 | static ssize_t proto_name_show(struct gdlm_ls *ls, char *buf) |
18 | { | 16 | { |
19 | return sprintf(buf, "%s\n", gdlm_ops.lm_proto_name); | 17 | return sprintf(buf, "%s\n", gdlm_ops.lm_proto_name); |
diff --git a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c index 521694fc19d6..e53db6fd28ab 100644 --- a/fs/gfs2/locking/dlm/thread.c +++ b/fs/gfs2/locking/dlm/thread.c | |||
@@ -135,7 +135,15 @@ static void process_complete(struct gdlm_lock *lp) | |||
135 | lp->lksb.sb_status, lp->lockname.ln_type, | 135 | lp->lksb.sb_status, lp->lockname.ln_type, |
136 | (unsigned long long)lp->lockname.ln_number, | 136 | (unsigned long long)lp->lockname.ln_number, |
137 | lp->flags); | 137 | lp->flags); |
138 | return; | 138 | if (lp->lksb.sb_status == -EDEADLOCK && |
139 | lp->ls->fsflags & LM_MFLAG_CONV_NODROP) { | ||
140 | lp->req = lp->cur; | ||
141 | acb.lc_ret |= LM_OUT_CONV_DEADLK; | ||
142 | if (lp->cur == DLM_LOCK_IV) | ||
143 | lp->lksb.sb_lkid = 0; | ||
144 | goto out; | ||
145 | } else | ||
146 | return; | ||
139 | } | 147 | } |
140 | 148 | ||
141 | /* | 149 | /* |
diff --git a/fs/gfs2/locking/nolock/main.c b/fs/gfs2/locking/nolock/main.c index d3b8ce6fbbe3..284a5ece8d94 100644 --- a/fs/gfs2/locking/nolock/main.c +++ b/fs/gfs2/locking/nolock/main.c | |||
@@ -140,7 +140,7 @@ static int nolock_hold_lvb(void *lock, char **lvbp) | |||
140 | struct nolock_lockspace *nl = lock; | 140 | struct nolock_lockspace *nl = lock; |
141 | int error = 0; | 141 | int error = 0; |
142 | 142 | ||
143 | *lvbp = kzalloc(nl->nl_lvb_size, GFP_KERNEL); | 143 | *lvbp = kzalloc(nl->nl_lvb_size, GFP_NOFS); |
144 | if (!*lvbp) | 144 | if (!*lvbp) |
145 | error = -ENOMEM; | 145 | error = -ENOMEM; |
146 | 146 | ||
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 161ab6f2058e..548264b1836d 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c | |||
@@ -769,8 +769,8 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | |||
769 | sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm; | 769 | sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm; |
770 | gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0); | 770 | gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0); |
771 | reserved = calc_reserved(sdp); | 771 | reserved = calc_reserved(sdp); |
772 | gfs2_assert_withdraw(sdp, sdp->sd_log_blks_reserved + tr->tr_reserved >= reserved); | ||
772 | unused = sdp->sd_log_blks_reserved - reserved + tr->tr_reserved; | 773 | unused = sdp->sd_log_blks_reserved - reserved + tr->tr_reserved; |
773 | gfs2_assert_withdraw(sdp, unused >= 0); | ||
774 | atomic_add(unused, &sdp->sd_log_blks_free); | 774 | atomic_add(unused, &sdp->sd_log_blks_free); |
775 | gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= | 775 | gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= |
776 | sdp->sd_jdesc->jd_blocks); | 776 | sdp->sd_jdesc->jd_blocks); |
@@ -779,6 +779,21 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | |||
779 | gfs2_log_unlock(sdp); | 779 | gfs2_log_unlock(sdp); |
780 | } | 780 | } |
781 | 781 | ||
782 | static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | ||
783 | { | ||
784 | struct list_head *head = &tr->tr_list_buf; | ||
785 | struct gfs2_bufdata *bd; | ||
786 | |||
787 | gfs2_log_lock(sdp); | ||
788 | while (!list_empty(head)) { | ||
789 | bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr); | ||
790 | list_del_init(&bd->bd_list_tr); | ||
791 | tr->tr_num_buf--; | ||
792 | } | ||
793 | gfs2_log_unlock(sdp); | ||
794 | gfs2_assert_warn(sdp, !tr->tr_num_buf); | ||
795 | } | ||
796 | |||
782 | /** | 797 | /** |
783 | * gfs2_log_commit - Commit a transaction to the log | 798 | * gfs2_log_commit - Commit a transaction to the log |
784 | * @sdp: the filesystem | 799 | * @sdp: the filesystem |
@@ -790,7 +805,7 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | |||
790 | void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | 805 | void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) |
791 | { | 806 | { |
792 | log_refund(sdp, tr); | 807 | log_refund(sdp, tr); |
793 | lops_incore_commit(sdp, tr); | 808 | buf_lo_incore_commit(sdp, tr); |
794 | 809 | ||
795 | sdp->sd_vfs->s_dirt = 1; | 810 | sdp->sd_vfs->s_dirt = 1; |
796 | up_read(&sdp->sd_log_flush_lock); | 811 | up_read(&sdp->sd_log_flush_lock); |
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index fae59d69d01a..4390f6f4047d 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c | |||
@@ -152,21 +152,6 @@ out: | |||
152 | unlock_buffer(bd->bd_bh); | 152 | unlock_buffer(bd->bd_bh); |
153 | } | 153 | } |
154 | 154 | ||
155 | static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | ||
156 | { | ||
157 | struct list_head *head = &tr->tr_list_buf; | ||
158 | struct gfs2_bufdata *bd; | ||
159 | |||
160 | gfs2_log_lock(sdp); | ||
161 | while (!list_empty(head)) { | ||
162 | bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr); | ||
163 | list_del_init(&bd->bd_list_tr); | ||
164 | tr->tr_num_buf--; | ||
165 | } | ||
166 | gfs2_log_unlock(sdp); | ||
167 | gfs2_assert_warn(sdp, !tr->tr_num_buf); | ||
168 | } | ||
169 | |||
170 | static void buf_lo_before_commit(struct gfs2_sbd *sdp) | 155 | static void buf_lo_before_commit(struct gfs2_sbd *sdp) |
171 | { | 156 | { |
172 | struct buffer_head *bh; | 157 | struct buffer_head *bh; |
@@ -419,8 +404,10 @@ static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, | |||
419 | blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset)); | 404 | blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset)); |
420 | 405 | ||
421 | error = gfs2_revoke_add(sdp, blkno, start); | 406 | error = gfs2_revoke_add(sdp, blkno, start); |
422 | if (error < 0) | 407 | if (error < 0) { |
408 | brelse(bh); | ||
423 | return error; | 409 | return error; |
410 | } | ||
424 | else if (error) | 411 | else if (error) |
425 | sdp->sd_found_revokes++; | 412 | sdp->sd_found_revokes++; |
426 | 413 | ||
@@ -737,7 +724,6 @@ static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) | |||
737 | 724 | ||
738 | const struct gfs2_log_operations gfs2_buf_lops = { | 725 | const struct gfs2_log_operations gfs2_buf_lops = { |
739 | .lo_add = buf_lo_add, | 726 | .lo_add = buf_lo_add, |
740 | .lo_incore_commit = buf_lo_incore_commit, | ||
741 | .lo_before_commit = buf_lo_before_commit, | 727 | .lo_before_commit = buf_lo_before_commit, |
742 | .lo_after_commit = buf_lo_after_commit, | 728 | .lo_after_commit = buf_lo_after_commit, |
743 | .lo_before_scan = buf_lo_before_scan, | 729 | .lo_before_scan = buf_lo_before_scan, |
@@ -763,7 +749,6 @@ const struct gfs2_log_operations gfs2_rg_lops = { | |||
763 | 749 | ||
764 | const struct gfs2_log_operations gfs2_databuf_lops = { | 750 | const struct gfs2_log_operations gfs2_databuf_lops = { |
765 | .lo_add = databuf_lo_add, | 751 | .lo_add = databuf_lo_add, |
766 | .lo_incore_commit = buf_lo_incore_commit, | ||
767 | .lo_before_commit = databuf_lo_before_commit, | 752 | .lo_before_commit = databuf_lo_before_commit, |
768 | .lo_after_commit = databuf_lo_after_commit, | 753 | .lo_after_commit = databuf_lo_after_commit, |
769 | .lo_scan_elements = databuf_lo_scan_elements, | 754 | .lo_scan_elements = databuf_lo_scan_elements, |
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h index 41a00df75587..3c0b2737658a 100644 --- a/fs/gfs2/lops.h +++ b/fs/gfs2/lops.h | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
4 | * | 4 | * |
5 | * This copyrighted material is made available to anyone wishing to use, | 5 | * This copyrighted material is made available to anyone wishing to use, |
6 | * modify, copy, or redistribute it subject to the terms and conditions | 6 | * modify, copy, or redistribute it subject to the terms and conditions |
@@ -57,15 +57,6 @@ static inline void lops_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) | |||
57 | le->le_ops->lo_add(sdp, le); | 57 | le->le_ops->lo_add(sdp, le); |
58 | } | 58 | } |
59 | 59 | ||
60 | static inline void lops_incore_commit(struct gfs2_sbd *sdp, | ||
61 | struct gfs2_trans *tr) | ||
62 | { | ||
63 | int x; | ||
64 | for (x = 0; gfs2_log_ops[x]; x++) | ||
65 | if (gfs2_log_ops[x]->lo_incore_commit) | ||
66 | gfs2_log_ops[x]->lo_incore_commit(sdp, tr); | ||
67 | } | ||
68 | |||
69 | static inline void lops_before_commit(struct gfs2_sbd *sdp) | 60 | static inline void lops_before_commit(struct gfs2_sbd *sdp) |
70 | { | 61 | { |
71 | int x; | 62 | int x; |
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 9c7765c12d62..053e2ebbbd50 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c | |||
@@ -89,6 +89,12 @@ static int __init init_gfs2_fs(void) | |||
89 | if (!gfs2_bufdata_cachep) | 89 | if (!gfs2_bufdata_cachep) |
90 | goto fail; | 90 | goto fail; |
91 | 91 | ||
92 | gfs2_rgrpd_cachep = kmem_cache_create("gfs2_rgrpd", | ||
93 | sizeof(struct gfs2_rgrpd), | ||
94 | 0, 0, NULL); | ||
95 | if (!gfs2_rgrpd_cachep) | ||
96 | goto fail; | ||
97 | |||
92 | error = register_filesystem(&gfs2_fs_type); | 98 | error = register_filesystem(&gfs2_fs_type); |
93 | if (error) | 99 | if (error) |
94 | goto fail; | 100 | goto fail; |
@@ -108,6 +114,9 @@ fail_unregister: | |||
108 | fail: | 114 | fail: |
109 | gfs2_glock_exit(); | 115 | gfs2_glock_exit(); |
110 | 116 | ||
117 | if (gfs2_rgrpd_cachep) | ||
118 | kmem_cache_destroy(gfs2_rgrpd_cachep); | ||
119 | |||
111 | if (gfs2_bufdata_cachep) | 120 | if (gfs2_bufdata_cachep) |
112 | kmem_cache_destroy(gfs2_bufdata_cachep); | 121 | kmem_cache_destroy(gfs2_bufdata_cachep); |
113 | 122 | ||
@@ -133,6 +142,7 @@ static void __exit exit_gfs2_fs(void) | |||
133 | unregister_filesystem(&gfs2_fs_type); | 142 | unregister_filesystem(&gfs2_fs_type); |
134 | unregister_filesystem(&gfs2meta_fs_type); | 143 | unregister_filesystem(&gfs2meta_fs_type); |
135 | 144 | ||
145 | kmem_cache_destroy(gfs2_rgrpd_cachep); | ||
136 | kmem_cache_destroy(gfs2_bufdata_cachep); | 146 | kmem_cache_destroy(gfs2_bufdata_cachep); |
137 | kmem_cache_destroy(gfs2_inode_cachep); | 147 | kmem_cache_destroy(gfs2_inode_cachep); |
138 | kmem_cache_destroy(gfs2_glock_cachep); | 148 | kmem_cache_destroy(gfs2_glock_cachep); |
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index ac772b6d9dbb..90a04a6e3789 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
3 | * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
4 | * | 4 | * |
5 | * This copyrighted material is made available to anyone wishing to use, | 5 | * This copyrighted material is made available to anyone wishing to use, |
6 | * modify, copy, or redistribute it subject to the terms and conditions | 6 | * modify, copy, or redistribute it subject to the terms and conditions |
@@ -21,7 +21,6 @@ | |||
21 | #include <linux/gfs2_ondisk.h> | 21 | #include <linux/gfs2_ondisk.h> |
22 | #include <linux/lm_interface.h> | 22 | #include <linux/lm_interface.h> |
23 | #include <linux/backing-dev.h> | 23 | #include <linux/backing-dev.h> |
24 | #include <linux/pagevec.h> | ||
25 | 24 | ||
26 | #include "gfs2.h" | 25 | #include "gfs2.h" |
27 | #include "incore.h" | 26 | #include "incore.h" |
@@ -104,11 +103,9 @@ static int gfs2_writepage_common(struct page *page, | |||
104 | loff_t i_size = i_size_read(inode); | 103 | loff_t i_size = i_size_read(inode); |
105 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; | 104 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; |
106 | unsigned offset; | 105 | unsigned offset; |
107 | int ret = -EIO; | ||
108 | 106 | ||
109 | if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) | 107 | if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) |
110 | goto out; | 108 | goto out; |
111 | ret = 0; | ||
112 | if (current->journal_info) | 109 | if (current->journal_info) |
113 | goto redirty; | 110 | goto redirty; |
114 | /* Is the page fully outside i_size? (truncate in progress) */ | 111 | /* Is the page fully outside i_size? (truncate in progress) */ |
@@ -280,7 +277,7 @@ static int gfs2_write_jdata_pagevec(struct address_space *mapping, | |||
280 | int i; | 277 | int i; |
281 | int ret; | 278 | int ret; |
282 | 279 | ||
283 | ret = gfs2_trans_begin(sdp, nrblocks, 0); | 280 | ret = gfs2_trans_begin(sdp, nrblocks, nrblocks); |
284 | if (ret < 0) | 281 | if (ret < 0) |
285 | return ret; | 282 | return ret; |
286 | 283 | ||
@@ -510,23 +507,26 @@ static int __gfs2_readpage(void *file, struct page *page) | |||
510 | static int gfs2_readpage(struct file *file, struct page *page) | 507 | static int gfs2_readpage(struct file *file, struct page *page) |
511 | { | 508 | { |
512 | struct gfs2_inode *ip = GFS2_I(page->mapping->host); | 509 | struct gfs2_inode *ip = GFS2_I(page->mapping->host); |
513 | struct gfs2_holder gh; | 510 | struct gfs2_holder *gh; |
514 | int error; | 511 | int error; |
515 | 512 | ||
516 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh); | 513 | gh = gfs2_glock_is_locked_by_me(ip->i_gl); |
517 | error = gfs2_glock_nq_atime(&gh); | 514 | if (!gh) { |
518 | if (unlikely(error)) { | 515 | gh = kmalloc(sizeof(struct gfs2_holder), GFP_NOFS); |
516 | if (!gh) | ||
517 | return -ENOBUFS; | ||
518 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, gh); | ||
519 | unlock_page(page); | 519 | unlock_page(page); |
520 | goto out; | 520 | error = gfs2_glock_nq_atime(gh); |
521 | if (likely(error != 0)) | ||
522 | goto out; | ||
523 | return AOP_TRUNCATED_PAGE; | ||
521 | } | 524 | } |
522 | error = __gfs2_readpage(file, page); | 525 | error = __gfs2_readpage(file, page); |
523 | gfs2_glock_dq(&gh); | 526 | gfs2_glock_dq(gh); |
524 | out: | 527 | out: |
525 | gfs2_holder_uninit(&gh); | 528 | gfs2_holder_uninit(gh); |
526 | if (error == GLR_TRYFAILED) { | 529 | kfree(gh); |
527 | yield(); | ||
528 | return AOP_TRUNCATED_PAGE; | ||
529 | } | ||
530 | return error; | 530 | return error; |
531 | } | 531 | } |
532 | 532 | ||
@@ -648,15 +648,15 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, | |||
648 | 648 | ||
649 | if (alloc_required) { | 649 | if (alloc_required) { |
650 | al = gfs2_alloc_get(ip); | 650 | al = gfs2_alloc_get(ip); |
651 | if (!al) { | ||
652 | error = -ENOMEM; | ||
653 | goto out_unlock; | ||
654 | } | ||
651 | 655 | ||
652 | error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | 656 | error = gfs2_quota_lock_check(ip); |
653 | if (error) | 657 | if (error) |
654 | goto out_alloc_put; | 658 | goto out_alloc_put; |
655 | 659 | ||
656 | error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); | ||
657 | if (error) | ||
658 | goto out_qunlock; | ||
659 | |||
660 | al->al_requested = data_blocks + ind_blocks; | 660 | al->al_requested = data_blocks + ind_blocks; |
661 | error = gfs2_inplace_reserve(ip); | 661 | error = gfs2_inplace_reserve(ip); |
662 | if (error) | 662 | if (error) |
@@ -828,7 +828,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, | |||
828 | unsigned int to = from + len; | 828 | unsigned int to = from + len; |
829 | int ret; | 829 | int ret; |
830 | 830 | ||
831 | BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == 0); | 831 | BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == NULL); |
832 | 832 | ||
833 | ret = gfs2_meta_inode_buffer(ip, &dibh); | 833 | ret = gfs2_meta_inode_buffer(ip, &dibh); |
834 | if (unlikely(ret)) { | 834 | if (unlikely(ret)) { |
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c index 793e334d098e..4a5e676b4420 100644 --- a/fs/gfs2/ops_dentry.c +++ b/fs/gfs2/ops_dentry.c | |||
@@ -43,7 +43,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) | |||
43 | struct gfs2_holder d_gh; | 43 | struct gfs2_holder d_gh; |
44 | struct gfs2_inode *ip = NULL; | 44 | struct gfs2_inode *ip = NULL; |
45 | int error; | 45 | int error; |
46 | int had_lock=0; | 46 | int had_lock = 0; |
47 | 47 | ||
48 | if (inode) { | 48 | if (inode) { |
49 | if (is_bad_inode(inode)) | 49 | if (is_bad_inode(inode)) |
@@ -54,7 +54,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) | |||
54 | if (sdp->sd_args.ar_localcaching) | 54 | if (sdp->sd_args.ar_localcaching) |
55 | goto valid; | 55 | goto valid; |
56 | 56 | ||
57 | had_lock = gfs2_glock_is_locked_by_me(dip->i_gl); | 57 | had_lock = (gfs2_glock_is_locked_by_me(dip->i_gl) != NULL); |
58 | if (!had_lock) { | 58 | if (!had_lock) { |
59 | error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); | 59 | error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); |
60 | if (error) | 60 | if (error) |
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c index 334c7f85351b..990d9f4bc463 100644 --- a/fs/gfs2/ops_export.c +++ b/fs/gfs2/ops_export.c | |||
@@ -204,8 +204,6 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, | |||
204 | inode = gfs2_inode_lookup(sb, DT_UNKNOWN, | 204 | inode = gfs2_inode_lookup(sb, DT_UNKNOWN, |
205 | inum->no_addr, | 205 | inum->no_addr, |
206 | 0, 0); | 206 | 0, 0); |
207 | if (!inode) | ||
208 | goto fail; | ||
209 | if (IS_ERR(inode)) { | 207 | if (IS_ERR(inode)) { |
210 | error = PTR_ERR(inode); | 208 | error = PTR_ERR(inode); |
211 | goto fail; | 209 | goto fail; |
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index f4842f2548cd..e1b7d525a066 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c | |||
@@ -30,7 +30,6 @@ | |||
30 | #include "glock.h" | 30 | #include "glock.h" |
31 | #include "glops.h" | 31 | #include "glops.h" |
32 | #include "inode.h" | 32 | #include "inode.h" |
33 | #include "lm.h" | ||
34 | #include "log.h" | 33 | #include "log.h" |
35 | #include "meta_io.h" | 34 | #include "meta_io.h" |
36 | #include "quota.h" | 35 | #include "quota.h" |
@@ -39,6 +38,7 @@ | |||
39 | #include "util.h" | 38 | #include "util.h" |
40 | #include "eaops.h" | 39 | #include "eaops.h" |
41 | #include "ops_address.h" | 40 | #include "ops_address.h" |
41 | #include "ops_inode.h" | ||
42 | 42 | ||
43 | /** | 43 | /** |
44 | * gfs2_llseek - seek to a location in a file | 44 | * gfs2_llseek - seek to a location in a file |
@@ -369,12 +369,9 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |||
369 | if (al == NULL) | 369 | if (al == NULL) |
370 | goto out_unlock; | 370 | goto out_unlock; |
371 | 371 | ||
372 | ret = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | 372 | ret = gfs2_quota_lock_check(ip); |
373 | if (ret) | 373 | if (ret) |
374 | goto out_alloc_put; | 374 | goto out_alloc_put; |
375 | ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); | ||
376 | if (ret) | ||
377 | goto out_quota_unlock; | ||
378 | al->al_requested = data_blocks + ind_blocks; | 375 | al->al_requested = data_blocks + ind_blocks; |
379 | ret = gfs2_inplace_reserve(ip); | 376 | ret = gfs2_inplace_reserve(ip); |
380 | if (ret) | 377 | if (ret) |
@@ -596,6 +593,36 @@ static int gfs2_setlease(struct file *file, long arg, struct file_lock **fl) | |||
596 | return generic_setlease(file, arg, fl); | 593 | return generic_setlease(file, arg, fl); |
597 | } | 594 | } |
598 | 595 | ||
596 | static int gfs2_lm_plock_get(struct gfs2_sbd *sdp, struct lm_lockname *name, | ||
597 | struct file *file, struct file_lock *fl) | ||
598 | { | ||
599 | int error = -EIO; | ||
600 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
601 | error = sdp->sd_lockstruct.ls_ops->lm_plock_get( | ||
602 | sdp->sd_lockstruct.ls_lockspace, name, file, fl); | ||
603 | return error; | ||
604 | } | ||
605 | |||
606 | static int gfs2_lm_plock(struct gfs2_sbd *sdp, struct lm_lockname *name, | ||
607 | struct file *file, int cmd, struct file_lock *fl) | ||
608 | { | ||
609 | int error = -EIO; | ||
610 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
611 | error = sdp->sd_lockstruct.ls_ops->lm_plock( | ||
612 | sdp->sd_lockstruct.ls_lockspace, name, file, cmd, fl); | ||
613 | return error; | ||
614 | } | ||
615 | |||
616 | static int gfs2_lm_punlock(struct gfs2_sbd *sdp, struct lm_lockname *name, | ||
617 | struct file *file, struct file_lock *fl) | ||
618 | { | ||
619 | int error = -EIO; | ||
620 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
621 | error = sdp->sd_lockstruct.ls_ops->lm_punlock( | ||
622 | sdp->sd_lockstruct.ls_lockspace, name, file, fl); | ||
623 | return error; | ||
624 | } | ||
625 | |||
599 | /** | 626 | /** |
600 | * gfs2_lock - acquire/release a posix lock on a file | 627 | * gfs2_lock - acquire/release a posix lock on a file |
601 | * @file: the file pointer | 628 | * @file: the file pointer |
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 4bee6aa845e4..ef9c6c4f80f6 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
3 | * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
4 | * | 4 | * |
5 | * This copyrighted material is made available to anyone wishing to use, | 5 | * This copyrighted material is made available to anyone wishing to use, |
6 | * modify, copy, or redistribute it subject to the terms and conditions | 6 | * modify, copy, or redistribute it subject to the terms and conditions |
@@ -26,7 +26,6 @@ | |||
26 | #include "glock.h" | 26 | #include "glock.h" |
27 | #include "glops.h" | 27 | #include "glops.h" |
28 | #include "inode.h" | 28 | #include "inode.h" |
29 | #include "lm.h" | ||
30 | #include "mount.h" | 29 | #include "mount.h" |
31 | #include "ops_fstype.h" | 30 | #include "ops_fstype.h" |
32 | #include "ops_dentry.h" | 31 | #include "ops_dentry.h" |
@@ -363,6 +362,13 @@ static int map_journal_extents(struct gfs2_sbd *sdp) | |||
363 | return rc; | 362 | return rc; |
364 | } | 363 | } |
365 | 364 | ||
365 | static void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp) | ||
366 | { | ||
367 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
368 | sdp->sd_lockstruct.ls_ops->lm_others_may_mount( | ||
369 | sdp->sd_lockstruct.ls_lockspace); | ||
370 | } | ||
371 | |||
366 | static int init_journal(struct gfs2_sbd *sdp, int undo) | 372 | static int init_journal(struct gfs2_sbd *sdp, int undo) |
367 | { | 373 | { |
368 | struct gfs2_holder ji_gh; | 374 | struct gfs2_holder ji_gh; |
@@ -542,7 +548,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) | |||
542 | } | 548 | } |
543 | ip = GFS2_I(sdp->sd_rindex); | 549 | ip = GFS2_I(sdp->sd_rindex); |
544 | set_bit(GLF_STICKY, &ip->i_gl->gl_flags); | 550 | set_bit(GLF_STICKY, &ip->i_gl->gl_flags); |
545 | sdp->sd_rindex_vn = ip->i_gl->gl_vn - 1; | 551 | sdp->sd_rindex_uptodate = 0; |
546 | 552 | ||
547 | /* Read in the quota inode */ | 553 | /* Read in the quota inode */ |
548 | sdp->sd_quota_inode = gfs2_lookup_simple(sdp->sd_master_dir, "quota"); | 554 | sdp->sd_quota_inode = gfs2_lookup_simple(sdp->sd_master_dir, "quota"); |
@@ -705,6 +711,69 @@ fail: | |||
705 | } | 711 | } |
706 | 712 | ||
707 | /** | 713 | /** |
714 | * gfs2_lm_mount - mount a locking protocol | ||
715 | * @sdp: the filesystem | ||
716 | * @args: mount arguements | ||
717 | * @silent: if 1, don't complain if the FS isn't a GFS2 fs | ||
718 | * | ||
719 | * Returns: errno | ||
720 | */ | ||
721 | |||
722 | static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent) | ||
723 | { | ||
724 | char *proto = sdp->sd_proto_name; | ||
725 | char *table = sdp->sd_table_name; | ||
726 | int flags = LM_MFLAG_CONV_NODROP; | ||
727 | int error; | ||
728 | |||
729 | if (sdp->sd_args.ar_spectator) | ||
730 | flags |= LM_MFLAG_SPECTATOR; | ||
731 | |||
732 | fs_info(sdp, "Trying to join cluster \"%s\", \"%s\"\n", proto, table); | ||
733 | |||
734 | error = gfs2_mount_lockproto(proto, table, sdp->sd_args.ar_hostdata, | ||
735 | gfs2_glock_cb, sdp, | ||
736 | GFS2_MIN_LVB_SIZE, flags, | ||
737 | &sdp->sd_lockstruct, &sdp->sd_kobj); | ||
738 | if (error) { | ||
739 | fs_info(sdp, "can't mount proto=%s, table=%s, hostdata=%s\n", | ||
740 | proto, table, sdp->sd_args.ar_hostdata); | ||
741 | goto out; | ||
742 | } | ||
743 | |||
744 | if (gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lockspace) || | ||
745 | gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_ops) || | ||
746 | gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lvb_size >= | ||
747 | GFS2_MIN_LVB_SIZE)) { | ||
748 | gfs2_unmount_lockproto(&sdp->sd_lockstruct); | ||
749 | goto out; | ||
750 | } | ||
751 | |||
752 | if (sdp->sd_args.ar_spectator) | ||
753 | snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s", table); | ||
754 | else | ||
755 | snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", table, | ||
756 | sdp->sd_lockstruct.ls_jid); | ||
757 | |||
758 | fs_info(sdp, "Joined cluster. Now mounting FS...\n"); | ||
759 | |||
760 | if ((sdp->sd_lockstruct.ls_flags & LM_LSFLAG_LOCAL) && | ||
761 | !sdp->sd_args.ar_ignore_local_fs) { | ||
762 | sdp->sd_args.ar_localflocks = 1; | ||
763 | sdp->sd_args.ar_localcaching = 1; | ||
764 | } | ||
765 | |||
766 | out: | ||
767 | return error; | ||
768 | } | ||
769 | |||
770 | void gfs2_lm_unmount(struct gfs2_sbd *sdp) | ||
771 | { | ||
772 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
773 | gfs2_unmount_lockproto(&sdp->sd_lockstruct); | ||
774 | } | ||
775 | |||
776 | /** | ||
708 | * fill_super - Read in superblock | 777 | * fill_super - Read in superblock |
709 | * @sb: The VFS superblock | 778 | * @sb: The VFS superblock |
710 | * @data: Mount options | 779 | * @data: Mount options |
@@ -874,7 +943,6 @@ static struct super_block* get_gfs2_sb(const char *dev_name) | |||
874 | { | 943 | { |
875 | struct kstat stat; | 944 | struct kstat stat; |
876 | struct nameidata nd; | 945 | struct nameidata nd; |
877 | struct file_system_type *fstype; | ||
878 | struct super_block *sb = NULL, *s; | 946 | struct super_block *sb = NULL, *s; |
879 | int error; | 947 | int error; |
880 | 948 | ||
@@ -886,8 +954,7 @@ static struct super_block* get_gfs2_sb(const char *dev_name) | |||
886 | } | 954 | } |
887 | error = vfs_getattr(nd.path.mnt, nd.path.dentry, &stat); | 955 | error = vfs_getattr(nd.path.mnt, nd.path.dentry, &stat); |
888 | 956 | ||
889 | fstype = get_fs_type("gfs2"); | 957 | list_for_each_entry(s, &gfs2_fs_type.fs_supers, s_instances) { |
890 | list_for_each_entry(s, &fstype->fs_supers, s_instances) { | ||
891 | if ((S_ISBLK(stat.mode) && s->s_dev == stat.rdev) || | 958 | if ((S_ISBLK(stat.mode) && s->s_dev == stat.rdev) || |
892 | (S_ISDIR(stat.mode) && | 959 | (S_ISDIR(stat.mode) && |
893 | s == nd.path.dentry->d_inode->i_sb)) { | 960 | s == nd.path.dentry->d_inode->i_sb)) { |
@@ -931,7 +998,6 @@ static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags, | |||
931 | error = PTR_ERR(new); | 998 | error = PTR_ERR(new); |
932 | goto error; | 999 | goto error; |
933 | } | 1000 | } |
934 | module_put(fs_type->owner); | ||
935 | new->s_flags = flags; | 1001 | new->s_flags = flags; |
936 | strlcpy(new->s_id, sb->s_id, sizeof(new->s_id)); | 1002 | strlcpy(new->s_id, sb->s_id, sizeof(new->s_id)); |
937 | sb_set_blocksize(new, sb->s_blocksize); | 1003 | sb_set_blocksize(new, sb->s_blocksize); |
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index e87412902bed..2686ad4c0029 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c | |||
@@ -200,15 +200,15 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, | |||
200 | 200 | ||
201 | if (alloc_required) { | 201 | if (alloc_required) { |
202 | struct gfs2_alloc *al = gfs2_alloc_get(dip); | 202 | struct gfs2_alloc *al = gfs2_alloc_get(dip); |
203 | if (!al) { | ||
204 | error = -ENOMEM; | ||
205 | goto out_gunlock; | ||
206 | } | ||
203 | 207 | ||
204 | error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | 208 | error = gfs2_quota_lock_check(dip); |
205 | if (error) | 209 | if (error) |
206 | goto out_alloc; | 210 | goto out_alloc; |
207 | 211 | ||
208 | error = gfs2_quota_check(dip, dip->i_inode.i_uid, dip->i_inode.i_gid); | ||
209 | if (error) | ||
210 | goto out_gunlock_q; | ||
211 | |||
212 | al->al_requested = sdp->sd_max_dirres; | 212 | al->al_requested = sdp->sd_max_dirres; |
213 | 213 | ||
214 | error = gfs2_inplace_reserve(dip); | 214 | error = gfs2_inplace_reserve(dip); |
@@ -716,15 +716,15 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
716 | 716 | ||
717 | if (alloc_required) { | 717 | if (alloc_required) { |
718 | struct gfs2_alloc *al = gfs2_alloc_get(ndip); | 718 | struct gfs2_alloc *al = gfs2_alloc_get(ndip); |
719 | if (!al) { | ||
720 | error = -ENOMEM; | ||
721 | goto out_gunlock; | ||
722 | } | ||
719 | 723 | ||
720 | error = gfs2_quota_lock(ndip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | 724 | error = gfs2_quota_lock_check(ndip); |
721 | if (error) | 725 | if (error) |
722 | goto out_alloc; | 726 | goto out_alloc; |
723 | 727 | ||
724 | error = gfs2_quota_check(ndip, ndip->i_inode.i_uid, ndip->i_inode.i_gid); | ||
725 | if (error) | ||
726 | goto out_gunlock_q; | ||
727 | |||
728 | al->al_requested = sdp->sd_max_dirres; | 728 | al->al_requested = sdp->sd_max_dirres; |
729 | 729 | ||
730 | error = gfs2_inplace_reserve(ndip); | 730 | error = gfs2_inplace_reserve(ndip); |
@@ -898,7 +898,7 @@ static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd) | |||
898 | int error; | 898 | int error; |
899 | int unlock = 0; | 899 | int unlock = 0; |
900 | 900 | ||
901 | if (gfs2_glock_is_locked_by_me(ip->i_gl) == 0) { | 901 | if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { |
902 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); | 902 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); |
903 | if (error) | 903 | if (error) |
904 | return error; | 904 | return error; |
@@ -953,7 +953,8 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) | |||
953 | if (!(attr->ia_valid & ATTR_GID) || ogid == ngid) | 953 | if (!(attr->ia_valid & ATTR_GID) || ogid == ngid) |
954 | ogid = ngid = NO_QUOTA_CHANGE; | 954 | ogid = ngid = NO_QUOTA_CHANGE; |
955 | 955 | ||
956 | gfs2_alloc_get(ip); | 956 | if (!gfs2_alloc_get(ip)) |
957 | return -ENOMEM; | ||
957 | 958 | ||
958 | error = gfs2_quota_lock(ip, nuid, ngid); | 959 | error = gfs2_quota_lock(ip, nuid, ngid); |
959 | if (error) | 960 | if (error) |
@@ -981,8 +982,9 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) | |||
981 | brelse(dibh); | 982 | brelse(dibh); |
982 | 983 | ||
983 | if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { | 984 | if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { |
984 | gfs2_quota_change(ip, -ip->i_di.di_blocks, ouid, ogid); | 985 | u64 blocks = gfs2_get_inode_blocks(&ip->i_inode); |
985 | gfs2_quota_change(ip, ip->i_di.di_blocks, nuid, ngid); | 986 | gfs2_quota_change(ip, -blocks, ouid, ogid); |
987 | gfs2_quota_change(ip, blocks, nuid, ngid); | ||
986 | } | 988 | } |
987 | 989 | ||
988 | out_end_trans: | 990 | out_end_trans: |
@@ -1064,7 +1066,7 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
1064 | int error; | 1066 | int error; |
1065 | int unlock = 0; | 1067 | int unlock = 0; |
1066 | 1068 | ||
1067 | if (gfs2_glock_is_locked_by_me(ip->i_gl) == 0) { | 1069 | if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { |
1068 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); | 1070 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); |
1069 | if (error) | 1071 | if (error) |
1070 | return error; | 1072 | return error; |
@@ -1148,16 +1150,6 @@ const struct inode_operations gfs2_file_iops = { | |||
1148 | .removexattr = gfs2_removexattr, | 1150 | .removexattr = gfs2_removexattr, |
1149 | }; | 1151 | }; |
1150 | 1152 | ||
1151 | const struct inode_operations gfs2_dev_iops = { | ||
1152 | .permission = gfs2_permission, | ||
1153 | .setattr = gfs2_setattr, | ||
1154 | .getattr = gfs2_getattr, | ||
1155 | .setxattr = gfs2_setxattr, | ||
1156 | .getxattr = gfs2_getxattr, | ||
1157 | .listxattr = gfs2_listxattr, | ||
1158 | .removexattr = gfs2_removexattr, | ||
1159 | }; | ||
1160 | |||
1161 | const struct inode_operations gfs2_dir_iops = { | 1153 | const struct inode_operations gfs2_dir_iops = { |
1162 | .create = gfs2_create, | 1154 | .create = gfs2_create, |
1163 | .lookup = gfs2_lookup, | 1155 | .lookup = gfs2_lookup, |
diff --git a/fs/gfs2/ops_inode.h b/fs/gfs2/ops_inode.h index fd8cee231e1d..14b4b797622a 100644 --- a/fs/gfs2/ops_inode.h +++ b/fs/gfs2/ops_inode.h | |||
@@ -15,7 +15,6 @@ | |||
15 | extern const struct inode_operations gfs2_file_iops; | 15 | extern const struct inode_operations gfs2_file_iops; |
16 | extern const struct inode_operations gfs2_dir_iops; | 16 | extern const struct inode_operations gfs2_dir_iops; |
17 | extern const struct inode_operations gfs2_symlink_iops; | 17 | extern const struct inode_operations gfs2_symlink_iops; |
18 | extern const struct inode_operations gfs2_dev_iops; | ||
19 | extern const struct file_operations gfs2_file_fops; | 18 | extern const struct file_operations gfs2_file_fops; |
20 | extern const struct file_operations gfs2_dir_fops; | 19 | extern const struct file_operations gfs2_dir_fops; |
21 | extern const struct file_operations gfs2_file_fops_nolock; | 20 | extern const struct file_operations gfs2_file_fops_nolock; |
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index 5e524217944a..2278c68b7e35 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c | |||
@@ -25,7 +25,6 @@ | |||
25 | #include "incore.h" | 25 | #include "incore.h" |
26 | #include "glock.h" | 26 | #include "glock.h" |
27 | #include "inode.h" | 27 | #include "inode.h" |
28 | #include "lm.h" | ||
29 | #include "log.h" | 28 | #include "log.h" |
30 | #include "mount.h" | 29 | #include "mount.h" |
31 | #include "ops_super.h" | 30 | #include "ops_super.h" |
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index a08dabd6ce90..56aaf915c59a 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
@@ -94,7 +94,7 @@ static int qd_alloc(struct gfs2_sbd *sdp, int user, u32 id, | |||
94 | struct gfs2_quota_data *qd; | 94 | struct gfs2_quota_data *qd; |
95 | int error; | 95 | int error; |
96 | 96 | ||
97 | qd = kzalloc(sizeof(struct gfs2_quota_data), GFP_KERNEL); | 97 | qd = kzalloc(sizeof(struct gfs2_quota_data), GFP_NOFS); |
98 | if (!qd) | 98 | if (!qd) |
99 | return -ENOMEM; | 99 | return -ENOMEM; |
100 | 100 | ||
@@ -616,16 +616,9 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, | |||
616 | s64 value; | 616 | s64 value; |
617 | int err = -EIO; | 617 | int err = -EIO; |
618 | 618 | ||
619 | if (gfs2_is_stuffed(ip)) { | 619 | if (gfs2_is_stuffed(ip)) |
620 | struct gfs2_alloc *al = NULL; | ||
621 | al = gfs2_alloc_get(ip); | ||
622 | /* just request 1 blk */ | ||
623 | al->al_requested = 1; | ||
624 | gfs2_inplace_reserve(ip); | ||
625 | gfs2_unstuff_dinode(ip, NULL); | 620 | gfs2_unstuff_dinode(ip, NULL); |
626 | gfs2_inplace_release(ip); | 621 | |
627 | gfs2_alloc_put(ip); | ||
628 | } | ||
629 | page = grab_cache_page(mapping, index); | 622 | page = grab_cache_page(mapping, index); |
630 | if (!page) | 623 | if (!page) |
631 | return -ENOMEM; | 624 | return -ENOMEM; |
@@ -690,14 +683,14 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) | |||
690 | unsigned int qx, x; | 683 | unsigned int qx, x; |
691 | struct gfs2_quota_data *qd; | 684 | struct gfs2_quota_data *qd; |
692 | loff_t offset; | 685 | loff_t offset; |
693 | unsigned int nalloc = 0; | 686 | unsigned int nalloc = 0, blocks; |
694 | struct gfs2_alloc *al = NULL; | 687 | struct gfs2_alloc *al = NULL; |
695 | int error; | 688 | int error; |
696 | 689 | ||
697 | gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), | 690 | gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), |
698 | &data_blocks, &ind_blocks); | 691 | &data_blocks, &ind_blocks); |
699 | 692 | ||
700 | ghs = kcalloc(num_qd, sizeof(struct gfs2_holder), GFP_KERNEL); | 693 | ghs = kcalloc(num_qd, sizeof(struct gfs2_holder), GFP_NOFS); |
701 | if (!ghs) | 694 | if (!ghs) |
702 | return -ENOMEM; | 695 | return -ENOMEM; |
703 | 696 | ||
@@ -727,30 +720,33 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) | |||
727 | nalloc++; | 720 | nalloc++; |
728 | } | 721 | } |
729 | 722 | ||
730 | if (nalloc) { | 723 | al = gfs2_alloc_get(ip); |
731 | al = gfs2_alloc_get(ip); | 724 | if (!al) { |
725 | error = -ENOMEM; | ||
726 | goto out_gunlock; | ||
727 | } | ||
728 | /* | ||
729 | * 1 blk for unstuffing inode if stuffed. We add this extra | ||
730 | * block to the reservation unconditionally. If the inode | ||
731 | * doesn't need unstuffing, the block will be released to the | ||
732 | * rgrp since it won't be allocated during the transaction | ||
733 | */ | ||
734 | al->al_requested = 1; | ||
735 | /* +1 in the end for block requested above for unstuffing */ | ||
736 | blocks = num_qd * data_blocks + RES_DINODE + num_qd + 1; | ||
732 | 737 | ||
733 | al->al_requested = nalloc * (data_blocks + ind_blocks); | 738 | if (nalloc) |
739 | al->al_requested += nalloc * (data_blocks + ind_blocks); | ||
740 | error = gfs2_inplace_reserve(ip); | ||
741 | if (error) | ||
742 | goto out_alloc; | ||
734 | 743 | ||
735 | error = gfs2_inplace_reserve(ip); | 744 | if (nalloc) |
736 | if (error) | 745 | blocks += al->al_rgd->rd_length + nalloc * ind_blocks + RES_STATFS; |
737 | goto out_alloc; | 746 | |
738 | 747 | error = gfs2_trans_begin(sdp, blocks, 0); | |
739 | error = gfs2_trans_begin(sdp, | 748 | if (error) |
740 | al->al_rgd->rd_length + | 749 | goto out_ipres; |
741 | num_qd * data_blocks + | ||
742 | nalloc * ind_blocks + | ||
743 | RES_DINODE + num_qd + | ||
744 | RES_STATFS, 0); | ||
745 | if (error) | ||
746 | goto out_ipres; | ||
747 | } else { | ||
748 | error = gfs2_trans_begin(sdp, | ||
749 | num_qd * data_blocks + | ||
750 | RES_DINODE + num_qd, 0); | ||
751 | if (error) | ||
752 | goto out_gunlock; | ||
753 | } | ||
754 | 750 | ||
755 | for (x = 0; x < num_qd; x++) { | 751 | for (x = 0; x < num_qd; x++) { |
756 | qd = qda[x]; | 752 | qd = qda[x]; |
@@ -769,11 +765,9 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) | |||
769 | out_end_trans: | 765 | out_end_trans: |
770 | gfs2_trans_end(sdp); | 766 | gfs2_trans_end(sdp); |
771 | out_ipres: | 767 | out_ipres: |
772 | if (nalloc) | 768 | gfs2_inplace_release(ip); |
773 | gfs2_inplace_release(ip); | ||
774 | out_alloc: | 769 | out_alloc: |
775 | if (nalloc) | 770 | gfs2_alloc_put(ip); |
776 | gfs2_alloc_put(ip); | ||
777 | out_gunlock: | 771 | out_gunlock: |
778 | gfs2_glock_dq_uninit(&i_gh); | 772 | gfs2_glock_dq_uninit(&i_gh); |
779 | out: | 773 | out: |
@@ -1124,12 +1118,12 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) | |||
1124 | error = -ENOMEM; | 1118 | error = -ENOMEM; |
1125 | 1119 | ||
1126 | sdp->sd_quota_bitmap = kcalloc(sdp->sd_quota_chunks, | 1120 | sdp->sd_quota_bitmap = kcalloc(sdp->sd_quota_chunks, |
1127 | sizeof(unsigned char *), GFP_KERNEL); | 1121 | sizeof(unsigned char *), GFP_NOFS); |
1128 | if (!sdp->sd_quota_bitmap) | 1122 | if (!sdp->sd_quota_bitmap) |
1129 | return error; | 1123 | return error; |
1130 | 1124 | ||
1131 | for (x = 0; x < sdp->sd_quota_chunks; x++) { | 1125 | for (x = 0; x < sdp->sd_quota_chunks; x++) { |
1132 | sdp->sd_quota_bitmap[x] = kzalloc(PAGE_SIZE, GFP_KERNEL); | 1126 | sdp->sd_quota_bitmap[x] = kzalloc(PAGE_SIZE, GFP_NOFS); |
1133 | if (!sdp->sd_quota_bitmap[x]) | 1127 | if (!sdp->sd_quota_bitmap[x]) |
1134 | goto fail; | 1128 | goto fail; |
1135 | } | 1129 | } |
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index a8be1417051f..3b7f4b0e5dfe 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h | |||
@@ -32,4 +32,21 @@ int gfs2_quota_init(struct gfs2_sbd *sdp); | |||
32 | void gfs2_quota_scan(struct gfs2_sbd *sdp); | 32 | void gfs2_quota_scan(struct gfs2_sbd *sdp); |
33 | void gfs2_quota_cleanup(struct gfs2_sbd *sdp); | 33 | void gfs2_quota_cleanup(struct gfs2_sbd *sdp); |
34 | 34 | ||
35 | static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) | ||
36 | { | ||
37 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
38 | int ret; | ||
39 | if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) | ||
40 | return 0; | ||
41 | ret = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | ||
42 | if (ret) | ||
43 | return ret; | ||
44 | if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON) | ||
45 | return 0; | ||
46 | ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); | ||
47 | if (ret) | ||
48 | gfs2_quota_unlock(ip); | ||
49 | return ret; | ||
50 | } | ||
51 | |||
35 | #endif /* __QUOTA_DOT_H__ */ | 52 | #endif /* __QUOTA_DOT_H__ */ |
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 6fb07d67ca8a..2888e4b4b1c5 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include "bmap.h" | 20 | #include "bmap.h" |
21 | #include "glock.h" | 21 | #include "glock.h" |
22 | #include "glops.h" | 22 | #include "glops.h" |
23 | #include "lm.h" | ||
24 | #include "lops.h" | 23 | #include "lops.h" |
25 | #include "meta_io.h" | 24 | #include "meta_io.h" |
26 | #include "recovery.h" | 25 | #include "recovery.h" |
@@ -69,7 +68,7 @@ int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where) | |||
69 | return 0; | 68 | return 0; |
70 | } | 69 | } |
71 | 70 | ||
72 | rr = kmalloc(sizeof(struct gfs2_revoke_replay), GFP_KERNEL); | 71 | rr = kmalloc(sizeof(struct gfs2_revoke_replay), GFP_NOFS); |
73 | if (!rr) | 72 | if (!rr) |
74 | return -ENOMEM; | 73 | return -ENOMEM; |
75 | 74 | ||
@@ -150,7 +149,7 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk, | |||
150 | struct gfs2_log_header_host *head) | 149 | struct gfs2_log_header_host *head) |
151 | { | 150 | { |
152 | struct buffer_head *bh; | 151 | struct buffer_head *bh; |
153 | struct gfs2_log_header_host lh; | 152 | struct gfs2_log_header_host uninitialized_var(lh); |
154 | const u32 nothing = 0; | 153 | const u32 nothing = 0; |
155 | u32 hash; | 154 | u32 hash; |
156 | int error; | 155 | int error; |
@@ -425,6 +424,16 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *hea | |||
425 | return error; | 424 | return error; |
426 | } | 425 | } |
427 | 426 | ||
427 | |||
428 | static void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid, | ||
429 | unsigned int message) | ||
430 | { | ||
431 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
432 | sdp->sd_lockstruct.ls_ops->lm_recovery_done( | ||
433 | sdp->sd_lockstruct.ls_lockspace, jid, message); | ||
434 | } | ||
435 | |||
436 | |||
428 | /** | 437 | /** |
429 | * gfs2_recover_journal - recovery a given journal | 438 | * gfs2_recover_journal - recovery a given journal |
430 | * @jd: the struct gfs2_jdesc describing the journal | 439 | * @jd: the struct gfs2_jdesc describing the journal |
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 3552110b2e5f..7e8f0b1d6c6e 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
3 | * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
4 | * | 4 | * |
5 | * This copyrighted material is made available to anyone wishing to use, | 5 | * This copyrighted material is made available to anyone wishing to use, |
6 | * modify, copy, or redistribute it subject to the terms and conditions | 6 | * modify, copy, or redistribute it subject to the terms and conditions |
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/fs.h> | 14 | #include <linux/fs.h> |
15 | #include <linux/gfs2_ondisk.h> | 15 | #include <linux/gfs2_ondisk.h> |
16 | #include <linux/lm_interface.h> | 16 | #include <linux/lm_interface.h> |
17 | #include <linux/prefetch.h> | ||
17 | 18 | ||
18 | #include "gfs2.h" | 19 | #include "gfs2.h" |
19 | #include "incore.h" | 20 | #include "incore.h" |
@@ -33,6 +34,16 @@ | |||
33 | #define BFITNOENT ((u32)~0) | 34 | #define BFITNOENT ((u32)~0) |
34 | #define NO_BLOCK ((u64)~0) | 35 | #define NO_BLOCK ((u64)~0) |
35 | 36 | ||
37 | #if BITS_PER_LONG == 32 | ||
38 | #define LBITMASK (0x55555555UL) | ||
39 | #define LBITSKIP55 (0x55555555UL) | ||
40 | #define LBITSKIP00 (0x00000000UL) | ||
41 | #else | ||
42 | #define LBITMASK (0x5555555555555555UL) | ||
43 | #define LBITSKIP55 (0x5555555555555555UL) | ||
44 | #define LBITSKIP00 (0x0000000000000000UL) | ||
45 | #endif | ||
46 | |||
36 | /* | 47 | /* |
37 | * These routines are used by the resource group routines (rgrp.c) | 48 | * These routines are used by the resource group routines (rgrp.c) |
38 | * to keep track of block allocation. Each block is represented by two | 49 | * to keep track of block allocation. Each block is represented by two |
@@ -53,7 +64,8 @@ static const char valid_change[16] = { | |||
53 | }; | 64 | }; |
54 | 65 | ||
55 | static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, | 66 | static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, |
56 | unsigned char old_state, unsigned char new_state); | 67 | unsigned char old_state, unsigned char new_state, |
68 | unsigned int *n); | ||
57 | 69 | ||
58 | /** | 70 | /** |
59 | * gfs2_setbit - Set a bit in the bitmaps | 71 | * gfs2_setbit - Set a bit in the bitmaps |
@@ -64,26 +76,32 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, | |||
64 | * | 76 | * |
65 | */ | 77 | */ |
66 | 78 | ||
67 | static void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buffer, | 79 | static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1, |
68 | unsigned int buflen, u32 block, | 80 | unsigned char *buf2, unsigned int offset, |
69 | unsigned char new_state) | 81 | unsigned int buflen, u32 block, |
82 | unsigned char new_state) | ||
70 | { | 83 | { |
71 | unsigned char *byte, *end, cur_state; | 84 | unsigned char *byte1, *byte2, *end, cur_state; |
72 | unsigned int bit; | 85 | const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; |
73 | 86 | ||
74 | byte = buffer + (block / GFS2_NBBY); | 87 | byte1 = buf1 + offset + (block / GFS2_NBBY); |
75 | bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; | 88 | end = buf1 + offset + buflen; |
76 | end = buffer + buflen; | ||
77 | 89 | ||
78 | gfs2_assert(rgd->rd_sbd, byte < end); | 90 | BUG_ON(byte1 >= end); |
79 | 91 | ||
80 | cur_state = (*byte >> bit) & GFS2_BIT_MASK; | 92 | cur_state = (*byte1 >> bit) & GFS2_BIT_MASK; |
81 | 93 | ||
82 | if (valid_change[new_state * 4 + cur_state]) { | 94 | if (unlikely(!valid_change[new_state * 4 + cur_state])) { |
83 | *byte ^= cur_state << bit; | ||
84 | *byte |= new_state << bit; | ||
85 | } else | ||
86 | gfs2_consist_rgrpd(rgd); | 95 | gfs2_consist_rgrpd(rgd); |
96 | return; | ||
97 | } | ||
98 | *byte1 ^= (cur_state ^ new_state) << bit; | ||
99 | |||
100 | if (buf2) { | ||
101 | byte2 = buf2 + offset + (block / GFS2_NBBY); | ||
102 | cur_state = (*byte2 >> bit) & GFS2_BIT_MASK; | ||
103 | *byte2 ^= (cur_state ^ new_state) << bit; | ||
104 | } | ||
87 | } | 105 | } |
88 | 106 | ||
89 | /** | 107 | /** |
@@ -94,10 +112,12 @@ static void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buffer, | |||
94 | * | 112 | * |
95 | */ | 113 | */ |
96 | 114 | ||
97 | static unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, unsigned char *buffer, | 115 | static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, |
98 | unsigned int buflen, u32 block) | 116 | const unsigned char *buffer, |
117 | unsigned int buflen, u32 block) | ||
99 | { | 118 | { |
100 | unsigned char *byte, *end, cur_state; | 119 | const unsigned char *byte, *end; |
120 | unsigned char cur_state; | ||
101 | unsigned int bit; | 121 | unsigned int bit; |
102 | 122 | ||
103 | byte = buffer + (block / GFS2_NBBY); | 123 | byte = buffer + (block / GFS2_NBBY); |
@@ -126,47 +146,66 @@ static unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, unsigned char *buffer, | |||
126 | * Return: the block number (bitmap buffer scope) that was found | 146 | * Return: the block number (bitmap buffer scope) that was found |
127 | */ | 147 | */ |
128 | 148 | ||
129 | static u32 gfs2_bitfit(unsigned char *buffer, unsigned int buflen, u32 goal, | 149 | static u32 gfs2_bitfit(const u8 *buffer, unsigned int buflen, u32 goal, |
130 | unsigned char old_state) | 150 | u8 old_state) |
131 | { | 151 | { |
132 | unsigned char *byte; | 152 | const u8 *byte, *start, *end; |
133 | u32 blk = goal; | 153 | int bit, startbit; |
134 | unsigned int bit, bitlong; | 154 | u32 g1, g2, misaligned; |
135 | unsigned long *plong, plong55; | 155 | unsigned long *plong; |
136 | 156 | unsigned long lskipval; | |
137 | byte = buffer + (goal / GFS2_NBBY); | 157 | |
138 | plong = (unsigned long *)(buffer + (goal / GFS2_NBBY)); | 158 | lskipval = (old_state & GFS2_BLKST_USED) ? LBITSKIP00 : LBITSKIP55; |
139 | bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE; | 159 | g1 = (goal / GFS2_NBBY); |
140 | bitlong = bit; | 160 | start = buffer + g1; |
141 | #if BITS_PER_LONG == 32 | 161 | byte = start; |
142 | plong55 = 0x55555555; | 162 | end = buffer + buflen; |
143 | #else | 163 | g2 = ALIGN(g1, sizeof(unsigned long)); |
144 | plong55 = 0x5555555555555555; | 164 | plong = (unsigned long *)(buffer + g2); |
145 | #endif | 165 | startbit = bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE; |
146 | while (byte < buffer + buflen) { | 166 | misaligned = g2 - g1; |
147 | 167 | if (!misaligned) | |
148 | if (bitlong == 0 && old_state == 0 && *plong == plong55) { | 168 | goto ulong_aligned; |
149 | plong++; | 169 | /* parse the bitmap a byte at a time */ |
150 | byte += sizeof(unsigned long); | 170 | misaligned: |
151 | blk += sizeof(unsigned long) * GFS2_NBBY; | 171 | while (byte < end) { |
152 | continue; | 172 | if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) { |
173 | return goal + | ||
174 | (((byte - start) * GFS2_NBBY) + | ||
175 | ((bit - startbit) >> 1)); | ||
153 | } | 176 | } |
154 | if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) | ||
155 | return blk; | ||
156 | bit += GFS2_BIT_SIZE; | 177 | bit += GFS2_BIT_SIZE; |
157 | if (bit >= 8) { | 178 | if (bit >= GFS2_NBBY * GFS2_BIT_SIZE) { |
158 | bit = 0; | 179 | bit = 0; |
159 | byte++; | 180 | byte++; |
181 | misaligned--; | ||
182 | if (!misaligned) { | ||
183 | plong = (unsigned long *)byte; | ||
184 | goto ulong_aligned; | ||
185 | } | ||
160 | } | 186 | } |
161 | bitlong += GFS2_BIT_SIZE; | ||
162 | if (bitlong >= sizeof(unsigned long) * 8) { | ||
163 | bitlong = 0; | ||
164 | plong++; | ||
165 | } | ||
166 | |||
167 | blk++; | ||
168 | } | 187 | } |
188 | return BFITNOENT; | ||
169 | 189 | ||
190 | /* parse the bitmap a unsigned long at a time */ | ||
191 | ulong_aligned: | ||
192 | /* Stop at "end - 1" or else prefetch can go past the end and segfault. | ||
193 | We could "if" it but we'd lose some of the performance gained. | ||
194 | This way will only slow down searching the very last 4/8 bytes | ||
195 | depending on architecture. I've experimented with several ways | ||
196 | of writing this section such as using an else before the goto | ||
197 | but this one seems to be the fastest. */ | ||
198 | while ((unsigned char *)plong < end - 1) { | ||
199 | prefetch(plong + 1); | ||
200 | if (((*plong) & LBITMASK) != lskipval) | ||
201 | break; | ||
202 | plong++; | ||
203 | } | ||
204 | if ((unsigned char *)plong < end) { | ||
205 | byte = (const u8 *)plong; | ||
206 | misaligned += sizeof(unsigned long) - 1; | ||
207 | goto misaligned; | ||
208 | } | ||
170 | return BFITNOENT; | 209 | return BFITNOENT; |
171 | } | 210 | } |
172 | 211 | ||
@@ -179,14 +218,14 @@ static u32 gfs2_bitfit(unsigned char *buffer, unsigned int buflen, u32 goal, | |||
179 | * Returns: The number of bits | 218 | * Returns: The number of bits |
180 | */ | 219 | */ |
181 | 220 | ||
182 | static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, unsigned char *buffer, | 221 | static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer, |
183 | unsigned int buflen, unsigned char state) | 222 | unsigned int buflen, u8 state) |
184 | { | 223 | { |
185 | unsigned char *byte = buffer; | 224 | const u8 *byte = buffer; |
186 | unsigned char *end = buffer + buflen; | 225 | const u8 *end = buffer + buflen; |
187 | unsigned char state1 = state << 2; | 226 | const u8 state1 = state << 2; |
188 | unsigned char state2 = state << 4; | 227 | const u8 state2 = state << 4; |
189 | unsigned char state3 = state << 6; | 228 | const u8 state3 = state << 6; |
190 | u32 count = 0; | 229 | u32 count = 0; |
191 | 230 | ||
192 | for (; byte < end; byte++) { | 231 | for (; byte < end; byte++) { |
@@ -353,7 +392,7 @@ static void clear_rgrpdi(struct gfs2_sbd *sdp) | |||
353 | } | 392 | } |
354 | 393 | ||
355 | kfree(rgd->rd_bits); | 394 | kfree(rgd->rd_bits); |
356 | kfree(rgd); | 395 | kmem_cache_free(gfs2_rgrpd_cachep, rgd); |
357 | } | 396 | } |
358 | } | 397 | } |
359 | 398 | ||
@@ -516,7 +555,7 @@ static int read_rindex_entry(struct gfs2_inode *ip, | |||
516 | return error; | 555 | return error; |
517 | } | 556 | } |
518 | 557 | ||
519 | rgd = kzalloc(sizeof(struct gfs2_rgrpd), GFP_NOFS); | 558 | rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS); |
520 | error = -ENOMEM; | 559 | error = -ENOMEM; |
521 | if (!rgd) | 560 | if (!rgd) |
522 | return error; | 561 | return error; |
@@ -539,7 +578,7 @@ static int read_rindex_entry(struct gfs2_inode *ip, | |||
539 | return error; | 578 | return error; |
540 | 579 | ||
541 | rgd->rd_gl->gl_object = rgd; | 580 | rgd->rd_gl->gl_object = rgd; |
542 | rgd->rd_rg_vn = rgd->rd_gl->gl_vn - 1; | 581 | rgd->rd_flags &= ~GFS2_RDF_UPTODATE; |
543 | rgd->rd_flags |= GFS2_RDF_CHECK; | 582 | rgd->rd_flags |= GFS2_RDF_CHECK; |
544 | return error; | 583 | return error; |
545 | } | 584 | } |
@@ -575,7 +614,7 @@ static int gfs2_ri_update(struct gfs2_inode *ip) | |||
575 | } | 614 | } |
576 | } | 615 | } |
577 | 616 | ||
578 | sdp->sd_rindex_vn = ip->i_gl->gl_vn; | 617 | sdp->sd_rindex_uptodate = 1; |
579 | return 0; | 618 | return 0; |
580 | } | 619 | } |
581 | 620 | ||
@@ -609,7 +648,7 @@ static int gfs2_ri_update_special(struct gfs2_inode *ip) | |||
609 | } | 648 | } |
610 | } | 649 | } |
611 | 650 | ||
612 | sdp->sd_rindex_vn = ip->i_gl->gl_vn; | 651 | sdp->sd_rindex_uptodate = 1; |
613 | return 0; | 652 | return 0; |
614 | } | 653 | } |
615 | 654 | ||
@@ -642,9 +681,9 @@ int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh) | |||
642 | return error; | 681 | return error; |
643 | 682 | ||
644 | /* Read new copy from disk if we don't have the latest */ | 683 | /* Read new copy from disk if we don't have the latest */ |
645 | if (sdp->sd_rindex_vn != gl->gl_vn) { | 684 | if (!sdp->sd_rindex_uptodate) { |
646 | mutex_lock(&sdp->sd_rindex_mutex); | 685 | mutex_lock(&sdp->sd_rindex_mutex); |
647 | if (sdp->sd_rindex_vn != gl->gl_vn) { | 686 | if (!sdp->sd_rindex_uptodate) { |
648 | error = gfs2_ri_update(ip); | 687 | error = gfs2_ri_update(ip); |
649 | if (error) | 688 | if (error) |
650 | gfs2_glock_dq_uninit(ri_gh); | 689 | gfs2_glock_dq_uninit(ri_gh); |
@@ -655,21 +694,31 @@ int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh) | |||
655 | return error; | 694 | return error; |
656 | } | 695 | } |
657 | 696 | ||
658 | static void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf) | 697 | static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf) |
659 | { | 698 | { |
660 | const struct gfs2_rgrp *str = buf; | 699 | const struct gfs2_rgrp *str = buf; |
700 | struct gfs2_rgrp_host *rg = &rgd->rd_rg; | ||
701 | u32 rg_flags; | ||
661 | 702 | ||
662 | rg->rg_flags = be32_to_cpu(str->rg_flags); | 703 | rg_flags = be32_to_cpu(str->rg_flags); |
704 | if (rg_flags & GFS2_RGF_NOALLOC) | ||
705 | rgd->rd_flags |= GFS2_RDF_NOALLOC; | ||
706 | else | ||
707 | rgd->rd_flags &= ~GFS2_RDF_NOALLOC; | ||
663 | rg->rg_free = be32_to_cpu(str->rg_free); | 708 | rg->rg_free = be32_to_cpu(str->rg_free); |
664 | rg->rg_dinodes = be32_to_cpu(str->rg_dinodes); | 709 | rg->rg_dinodes = be32_to_cpu(str->rg_dinodes); |
665 | rg->rg_igeneration = be64_to_cpu(str->rg_igeneration); | 710 | rg->rg_igeneration = be64_to_cpu(str->rg_igeneration); |
666 | } | 711 | } |
667 | 712 | ||
668 | static void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf) | 713 | static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) |
669 | { | 714 | { |
670 | struct gfs2_rgrp *str = buf; | 715 | struct gfs2_rgrp *str = buf; |
716 | struct gfs2_rgrp_host *rg = &rgd->rd_rg; | ||
717 | u32 rg_flags = 0; | ||
671 | 718 | ||
672 | str->rg_flags = cpu_to_be32(rg->rg_flags); | 719 | if (rgd->rd_flags & GFS2_RDF_NOALLOC) |
720 | rg_flags |= GFS2_RGF_NOALLOC; | ||
721 | str->rg_flags = cpu_to_be32(rg_flags); | ||
673 | str->rg_free = cpu_to_be32(rg->rg_free); | 722 | str->rg_free = cpu_to_be32(rg->rg_free); |
674 | str->rg_dinodes = cpu_to_be32(rg->rg_dinodes); | 723 | str->rg_dinodes = cpu_to_be32(rg->rg_dinodes); |
675 | str->__pad = cpu_to_be32(0); | 724 | str->__pad = cpu_to_be32(0); |
@@ -726,9 +775,9 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) | |||
726 | } | 775 | } |
727 | } | 776 | } |
728 | 777 | ||
729 | if (rgd->rd_rg_vn != gl->gl_vn) { | 778 | if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) { |
730 | gfs2_rgrp_in(&rgd->rd_rg, (rgd->rd_bits[0].bi_bh)->b_data); | 779 | gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); |
731 | rgd->rd_rg_vn = gl->gl_vn; | 780 | rgd->rd_flags |= GFS2_RDF_UPTODATE; |
732 | } | 781 | } |
733 | 782 | ||
734 | spin_lock(&sdp->sd_rindex_spin); | 783 | spin_lock(&sdp->sd_rindex_spin); |
@@ -840,7 +889,7 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al) | |||
840 | struct gfs2_sbd *sdp = rgd->rd_sbd; | 889 | struct gfs2_sbd *sdp = rgd->rd_sbd; |
841 | int ret = 0; | 890 | int ret = 0; |
842 | 891 | ||
843 | if (rgd->rd_rg.rg_flags & GFS2_RGF_NOALLOC) | 892 | if (rgd->rd_flags & GFS2_RDF_NOALLOC) |
844 | return 0; | 893 | return 0; |
845 | 894 | ||
846 | spin_lock(&sdp->sd_rindex_spin); | 895 | spin_lock(&sdp->sd_rindex_spin); |
@@ -866,13 +915,15 @@ static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked) | |||
866 | u32 goal = 0, block; | 915 | u32 goal = 0, block; |
867 | u64 no_addr; | 916 | u64 no_addr; |
868 | struct gfs2_sbd *sdp = rgd->rd_sbd; | 917 | struct gfs2_sbd *sdp = rgd->rd_sbd; |
918 | unsigned int n; | ||
869 | 919 | ||
870 | for(;;) { | 920 | for(;;) { |
871 | if (goal >= rgd->rd_data) | 921 | if (goal >= rgd->rd_data) |
872 | break; | 922 | break; |
873 | down_write(&sdp->sd_log_flush_lock); | 923 | down_write(&sdp->sd_log_flush_lock); |
924 | n = 1; | ||
874 | block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, | 925 | block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, |
875 | GFS2_BLKST_UNLINKED); | 926 | GFS2_BLKST_UNLINKED, &n); |
876 | up_write(&sdp->sd_log_flush_lock); | 927 | up_write(&sdp->sd_log_flush_lock); |
877 | if (block == BFITNOENT) | 928 | if (block == BFITNOENT) |
878 | break; | 929 | break; |
@@ -904,24 +955,20 @@ static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked) | |||
904 | static struct gfs2_rgrpd *recent_rgrp_first(struct gfs2_sbd *sdp, | 955 | static struct gfs2_rgrpd *recent_rgrp_first(struct gfs2_sbd *sdp, |
905 | u64 rglast) | 956 | u64 rglast) |
906 | { | 957 | { |
907 | struct gfs2_rgrpd *rgd = NULL; | 958 | struct gfs2_rgrpd *rgd; |
908 | 959 | ||
909 | spin_lock(&sdp->sd_rindex_spin); | 960 | spin_lock(&sdp->sd_rindex_spin); |
910 | 961 | ||
911 | if (list_empty(&sdp->sd_rindex_recent_list)) | 962 | if (rglast) { |
912 | goto out; | 963 | list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) { |
913 | 964 | if (rgrp_contains_block(rgd, rglast)) | |
914 | if (!rglast) | 965 | goto out; |
915 | goto first; | 966 | } |
916 | |||
917 | list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) { | ||
918 | if (rgd->rd_addr == rglast) | ||
919 | goto out; | ||
920 | } | 967 | } |
921 | 968 | rgd = NULL; | |
922 | first: | 969 | if (!list_empty(&sdp->sd_rindex_recent_list)) |
923 | rgd = list_entry(sdp->sd_rindex_recent_list.next, struct gfs2_rgrpd, | 970 | rgd = list_entry(sdp->sd_rindex_recent_list.next, |
924 | rd_recent); | 971 | struct gfs2_rgrpd, rd_recent); |
925 | out: | 972 | out: |
926 | spin_unlock(&sdp->sd_rindex_spin); | 973 | spin_unlock(&sdp->sd_rindex_spin); |
927 | return rgd; | 974 | return rgd; |
@@ -1067,7 +1114,7 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) | |||
1067 | 1114 | ||
1068 | /* Try recently successful rgrps */ | 1115 | /* Try recently successful rgrps */ |
1069 | 1116 | ||
1070 | rgd = recent_rgrp_first(sdp, ip->i_last_rg_alloc); | 1117 | rgd = recent_rgrp_first(sdp, ip->i_goal); |
1071 | 1118 | ||
1072 | while (rgd) { | 1119 | while (rgd) { |
1073 | rg_locked = 0; | 1120 | rg_locked = 0; |
@@ -1151,8 +1198,6 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) | |||
1151 | } | 1198 | } |
1152 | 1199 | ||
1153 | out: | 1200 | out: |
1154 | ip->i_last_rg_alloc = rgd->rd_addr; | ||
1155 | |||
1156 | if (begin) { | 1201 | if (begin) { |
1157 | recent_rgrp_add(rgd); | 1202 | recent_rgrp_add(rgd); |
1158 | rgd = gfs2_rgrpd_get_next(rgd); | 1203 | rgd = gfs2_rgrpd_get_next(rgd); |
@@ -1275,6 +1320,7 @@ unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) | |||
1275 | * @goal: the goal block within the RG (start here to search for avail block) | 1320 | * @goal: the goal block within the RG (start here to search for avail block) |
1276 | * @old_state: GFS2_BLKST_XXX the before-allocation state to find | 1321 | * @old_state: GFS2_BLKST_XXX the before-allocation state to find |
1277 | * @new_state: GFS2_BLKST_XXX the after-allocation block state | 1322 | * @new_state: GFS2_BLKST_XXX the after-allocation block state |
1323 | * @n: The extent length | ||
1278 | * | 1324 | * |
1279 | * Walk rgrp's bitmap to find bits that represent a block in @old_state. | 1325 | * Walk rgrp's bitmap to find bits that represent a block in @old_state. |
1280 | * Add the found bitmap buffer to the transaction. | 1326 | * Add the found bitmap buffer to the transaction. |
@@ -1290,13 +1336,17 @@ unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) | |||
1290 | */ | 1336 | */ |
1291 | 1337 | ||
1292 | static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, | 1338 | static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, |
1293 | unsigned char old_state, unsigned char new_state) | 1339 | unsigned char old_state, unsigned char new_state, |
1340 | unsigned int *n) | ||
1294 | { | 1341 | { |
1295 | struct gfs2_bitmap *bi = NULL; | 1342 | struct gfs2_bitmap *bi = NULL; |
1296 | u32 length = rgd->rd_length; | 1343 | const u32 length = rgd->rd_length; |
1297 | u32 blk = 0; | 1344 | u32 blk = 0; |
1298 | unsigned int buf, x; | 1345 | unsigned int buf, x; |
1346 | const unsigned int elen = *n; | ||
1347 | const u8 *buffer; | ||
1299 | 1348 | ||
1349 | *n = 0; | ||
1300 | /* Find bitmap block that contains bits for goal block */ | 1350 | /* Find bitmap block that contains bits for goal block */ |
1301 | for (buf = 0; buf < length; buf++) { | 1351 | for (buf = 0; buf < length; buf++) { |
1302 | bi = rgd->rd_bits + buf; | 1352 | bi = rgd->rd_bits + buf; |
@@ -1317,12 +1367,11 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, | |||
1317 | for (x = 0; x <= length; x++) { | 1367 | for (x = 0; x <= length; x++) { |
1318 | /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone | 1368 | /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone |
1319 | bitmaps, so we must search the originals for that. */ | 1369 | bitmaps, so we must search the originals for that. */ |
1370 | buffer = bi->bi_bh->b_data + bi->bi_offset; | ||
1320 | if (old_state != GFS2_BLKST_UNLINKED && bi->bi_clone) | 1371 | if (old_state != GFS2_BLKST_UNLINKED && bi->bi_clone) |
1321 | blk = gfs2_bitfit(bi->bi_clone + bi->bi_offset, | 1372 | buffer = bi->bi_clone + bi->bi_offset; |
1322 | bi->bi_len, goal, old_state); | 1373 | |
1323 | else | 1374 | blk = gfs2_bitfit(buffer, bi->bi_len, goal, old_state); |
1324 | blk = gfs2_bitfit(bi->bi_bh->b_data + bi->bi_offset, | ||
1325 | bi->bi_len, goal, old_state); | ||
1326 | if (blk != BFITNOENT) | 1375 | if (blk != BFITNOENT) |
1327 | break; | 1376 | break; |
1328 | 1377 | ||
@@ -1333,12 +1382,23 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, | |||
1333 | } | 1382 | } |
1334 | 1383 | ||
1335 | if (blk != BFITNOENT && old_state != new_state) { | 1384 | if (blk != BFITNOENT && old_state != new_state) { |
1385 | *n = 1; | ||
1336 | gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); | 1386 | gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); |
1337 | gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset, | 1387 | gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, |
1338 | bi->bi_len, blk, new_state); | 1388 | bi->bi_len, blk, new_state); |
1339 | if (bi->bi_clone) | 1389 | goal = blk; |
1340 | gfs2_setbit(rgd, bi->bi_clone + bi->bi_offset, | 1390 | while (*n < elen) { |
1341 | bi->bi_len, blk, new_state); | 1391 | goal++; |
1392 | if (goal >= (bi->bi_len * GFS2_NBBY)) | ||
1393 | break; | ||
1394 | if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != | ||
1395 | GFS2_BLKST_FREE) | ||
1396 | break; | ||
1397 | gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, | ||
1398 | bi->bi_offset, bi->bi_len, goal, | ||
1399 | new_state); | ||
1400 | (*n)++; | ||
1401 | } | ||
1342 | } | 1402 | } |
1343 | 1403 | ||
1344 | return (blk == BFITNOENT) ? blk : (bi->bi_start * GFS2_NBBY) + blk; | 1404 | return (blk == BFITNOENT) ? blk : (bi->bi_start * GFS2_NBBY) + blk; |
@@ -1393,7 +1453,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, | |||
1393 | bi->bi_len); | 1453 | bi->bi_len); |
1394 | } | 1454 | } |
1395 | gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); | 1455 | gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); |
1396 | gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset, | 1456 | gfs2_setbit(rgd, bi->bi_bh->b_data, NULL, bi->bi_offset, |
1397 | bi->bi_len, buf_blk, new_state); | 1457 | bi->bi_len, buf_blk, new_state); |
1398 | } | 1458 | } |
1399 | 1459 | ||
@@ -1401,13 +1461,13 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, | |||
1401 | } | 1461 | } |
1402 | 1462 | ||
1403 | /** | 1463 | /** |
1404 | * gfs2_alloc_data - Allocate a data block | 1464 | * gfs2_alloc_block - Allocate a block |
1405 | * @ip: the inode to allocate the data block for | 1465 | * @ip: the inode to allocate the block for |
1406 | * | 1466 | * |
1407 | * Returns: the allocated block | 1467 | * Returns: the allocated block |
1408 | */ | 1468 | */ |
1409 | 1469 | ||
1410 | u64 gfs2_alloc_data(struct gfs2_inode *ip) | 1470 | u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n) |
1411 | { | 1471 | { |
1412 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1472 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1413 | struct gfs2_alloc *al = ip->i_alloc; | 1473 | struct gfs2_alloc *al = ip->i_alloc; |
@@ -1415,77 +1475,31 @@ u64 gfs2_alloc_data(struct gfs2_inode *ip) | |||
1415 | u32 goal, blk; | 1475 | u32 goal, blk; |
1416 | u64 block; | 1476 | u64 block; |
1417 | 1477 | ||
1418 | if (rgrp_contains_block(rgd, ip->i_di.di_goal_data)) | 1478 | if (rgrp_contains_block(rgd, ip->i_goal)) |
1419 | goal = ip->i_di.di_goal_data - rgd->rd_data0; | 1479 | goal = ip->i_goal - rgd->rd_data0; |
1420 | else | 1480 | else |
1421 | goal = rgd->rd_last_alloc_data; | 1481 | goal = rgd->rd_last_alloc; |
1422 | 1482 | ||
1423 | blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED); | 1483 | blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED, n); |
1424 | BUG_ON(blk == BFITNOENT); | 1484 | BUG_ON(blk == BFITNOENT); |
1425 | rgd->rd_last_alloc_data = blk; | ||
1426 | 1485 | ||
1486 | rgd->rd_last_alloc = blk; | ||
1427 | block = rgd->rd_data0 + blk; | 1487 | block = rgd->rd_data0 + blk; |
1428 | ip->i_di.di_goal_data = block; | 1488 | ip->i_goal = block; |
1429 | 1489 | ||
1430 | gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free); | 1490 | gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free >= *n); |
1431 | rgd->rd_rg.rg_free--; | 1491 | rgd->rd_rg.rg_free -= *n; |
1432 | 1492 | ||
1433 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); | 1493 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); |
1434 | gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data); | 1494 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); |
1435 | 1495 | ||
1436 | al->al_alloced++; | 1496 | al->al_alloced += *n; |
1437 | 1497 | ||
1438 | gfs2_statfs_change(sdp, 0, -1, 0); | 1498 | gfs2_statfs_change(sdp, 0, -*n, 0); |
1439 | gfs2_quota_change(ip, +1, ip->i_inode.i_uid, ip->i_inode.i_gid); | 1499 | gfs2_quota_change(ip, *n, ip->i_inode.i_uid, ip->i_inode.i_gid); |
1440 | 1500 | ||
1441 | spin_lock(&sdp->sd_rindex_spin); | 1501 | spin_lock(&sdp->sd_rindex_spin); |
1442 | rgd->rd_free_clone--; | 1502 | rgd->rd_free_clone -= *n; |
1443 | spin_unlock(&sdp->sd_rindex_spin); | ||
1444 | |||
1445 | return block; | ||
1446 | } | ||
1447 | |||
1448 | /** | ||
1449 | * gfs2_alloc_meta - Allocate a metadata block | ||
1450 | * @ip: the inode to allocate the metadata block for | ||
1451 | * | ||
1452 | * Returns: the allocated block | ||
1453 | */ | ||
1454 | |||
1455 | u64 gfs2_alloc_meta(struct gfs2_inode *ip) | ||
1456 | { | ||
1457 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
1458 | struct gfs2_alloc *al = ip->i_alloc; | ||
1459 | struct gfs2_rgrpd *rgd = al->al_rgd; | ||
1460 | u32 goal, blk; | ||
1461 | u64 block; | ||
1462 | |||
1463 | if (rgrp_contains_block(rgd, ip->i_di.di_goal_meta)) | ||
1464 | goal = ip->i_di.di_goal_meta - rgd->rd_data0; | ||
1465 | else | ||
1466 | goal = rgd->rd_last_alloc_meta; | ||
1467 | |||
1468 | blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED); | ||
1469 | BUG_ON(blk == BFITNOENT); | ||
1470 | rgd->rd_last_alloc_meta = blk; | ||
1471 | |||
1472 | block = rgd->rd_data0 + blk; | ||
1473 | ip->i_di.di_goal_meta = block; | ||
1474 | |||
1475 | gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free); | ||
1476 | rgd->rd_rg.rg_free--; | ||
1477 | |||
1478 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); | ||
1479 | gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data); | ||
1480 | |||
1481 | al->al_alloced++; | ||
1482 | |||
1483 | gfs2_statfs_change(sdp, 0, -1, 0); | ||
1484 | gfs2_quota_change(ip, +1, ip->i_inode.i_uid, ip->i_inode.i_gid); | ||
1485 | gfs2_trans_add_unrevoke(sdp, block); | ||
1486 | |||
1487 | spin_lock(&sdp->sd_rindex_spin); | ||
1488 | rgd->rd_free_clone--; | ||
1489 | spin_unlock(&sdp->sd_rindex_spin); | 1503 | spin_unlock(&sdp->sd_rindex_spin); |
1490 | 1504 | ||
1491 | return block; | 1505 | return block; |
@@ -1505,12 +1519,13 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation) | |||
1505 | struct gfs2_rgrpd *rgd = al->al_rgd; | 1519 | struct gfs2_rgrpd *rgd = al->al_rgd; |
1506 | u32 blk; | 1520 | u32 blk; |
1507 | u64 block; | 1521 | u64 block; |
1522 | unsigned int n = 1; | ||
1508 | 1523 | ||
1509 | blk = rgblk_search(rgd, rgd->rd_last_alloc_meta, | 1524 | blk = rgblk_search(rgd, rgd->rd_last_alloc, |
1510 | GFS2_BLKST_FREE, GFS2_BLKST_DINODE); | 1525 | GFS2_BLKST_FREE, GFS2_BLKST_DINODE, &n); |
1511 | BUG_ON(blk == BFITNOENT); | 1526 | BUG_ON(blk == BFITNOENT); |
1512 | 1527 | ||
1513 | rgd->rd_last_alloc_meta = blk; | 1528 | rgd->rd_last_alloc = blk; |
1514 | 1529 | ||
1515 | block = rgd->rd_data0 + blk; | 1530 | block = rgd->rd_data0 + blk; |
1516 | 1531 | ||
@@ -1519,12 +1534,12 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation) | |||
1519 | rgd->rd_rg.rg_dinodes++; | 1534 | rgd->rd_rg.rg_dinodes++; |
1520 | *generation = rgd->rd_rg.rg_igeneration++; | 1535 | *generation = rgd->rd_rg.rg_igeneration++; |
1521 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); | 1536 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); |
1522 | gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data); | 1537 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); |
1523 | 1538 | ||
1524 | al->al_alloced++; | 1539 | al->al_alloced++; |
1525 | 1540 | ||
1526 | gfs2_statfs_change(sdp, 0, -1, +1); | 1541 | gfs2_statfs_change(sdp, 0, -1, +1); |
1527 | gfs2_trans_add_unrevoke(sdp, block); | 1542 | gfs2_trans_add_unrevoke(sdp, block, 1); |
1528 | 1543 | ||
1529 | spin_lock(&sdp->sd_rindex_spin); | 1544 | spin_lock(&sdp->sd_rindex_spin); |
1530 | rgd->rd_free_clone--; | 1545 | rgd->rd_free_clone--; |
@@ -1553,7 +1568,7 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen) | |||
1553 | rgd->rd_rg.rg_free += blen; | 1568 | rgd->rd_rg.rg_free += blen; |
1554 | 1569 | ||
1555 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); | 1570 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); |
1556 | gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data); | 1571 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); |
1557 | 1572 | ||
1558 | gfs2_trans_add_rg(rgd); | 1573 | gfs2_trans_add_rg(rgd); |
1559 | 1574 | ||
@@ -1581,7 +1596,7 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen) | |||
1581 | rgd->rd_rg.rg_free += blen; | 1596 | rgd->rd_rg.rg_free += blen; |
1582 | 1597 | ||
1583 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); | 1598 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); |
1584 | gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data); | 1599 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); |
1585 | 1600 | ||
1586 | gfs2_trans_add_rg(rgd); | 1601 | gfs2_trans_add_rg(rgd); |
1587 | 1602 | ||
@@ -1601,7 +1616,7 @@ void gfs2_unlink_di(struct inode *inode) | |||
1601 | if (!rgd) | 1616 | if (!rgd) |
1602 | return; | 1617 | return; |
1603 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); | 1618 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); |
1604 | gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data); | 1619 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); |
1605 | gfs2_trans_add_rg(rgd); | 1620 | gfs2_trans_add_rg(rgd); |
1606 | } | 1621 | } |
1607 | 1622 | ||
@@ -1621,7 +1636,7 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) | |||
1621 | rgd->rd_rg.rg_free++; | 1636 | rgd->rd_rg.rg_free++; |
1622 | 1637 | ||
1623 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); | 1638 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); |
1624 | gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data); | 1639 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); |
1625 | 1640 | ||
1626 | gfs2_statfs_change(sdp, 0, +1, -1); | 1641 | gfs2_statfs_change(sdp, 0, +1, -1); |
1627 | gfs2_trans_add_rg(rgd); | 1642 | gfs2_trans_add_rg(rgd); |
@@ -1699,8 +1714,7 @@ void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist, | |||
1699 | * | 1714 | * |
1700 | */ | 1715 | */ |
1701 | 1716 | ||
1702 | void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state, | 1717 | void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state) |
1703 | int flags) | ||
1704 | { | 1718 | { |
1705 | unsigned int x; | 1719 | unsigned int x; |
1706 | 1720 | ||
@@ -1708,7 +1722,7 @@ void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state, | |||
1708 | GFP_NOFS | __GFP_NOFAIL); | 1722 | GFP_NOFS | __GFP_NOFAIL); |
1709 | for (x = 0; x < rlist->rl_rgrps; x++) | 1723 | for (x = 0; x < rlist->rl_rgrps; x++) |
1710 | gfs2_holder_init(rlist->rl_rgd[x]->rd_gl, | 1724 | gfs2_holder_init(rlist->rl_rgd[x]->rd_gl, |
1711 | state, flags, | 1725 | state, 0, |
1712 | &rlist->rl_ghs[x]); | 1726 | &rlist->rl_ghs[x]); |
1713 | } | 1727 | } |
1714 | 1728 | ||
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 149bb161f4b6..3181c7e624bf 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
4 | * | 4 | * |
5 | * This copyrighted material is made available to anyone wishing to use, | 5 | * This copyrighted material is made available to anyone wishing to use, |
6 | * modify, copy, or redistribute it subject to the terms and conditions | 6 | * modify, copy, or redistribute it subject to the terms and conditions |
@@ -46,8 +46,7 @@ void gfs2_inplace_release(struct gfs2_inode *ip); | |||
46 | 46 | ||
47 | unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block); | 47 | unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block); |
48 | 48 | ||
49 | u64 gfs2_alloc_data(struct gfs2_inode *ip); | 49 | u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n); |
50 | u64 gfs2_alloc_meta(struct gfs2_inode *ip); | ||
51 | u64 gfs2_alloc_di(struct gfs2_inode *ip, u64 *generation); | 50 | u64 gfs2_alloc_di(struct gfs2_inode *ip, u64 *generation); |
52 | 51 | ||
53 | void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen); | 52 | void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen); |
@@ -64,8 +63,7 @@ struct gfs2_rgrp_list { | |||
64 | 63 | ||
65 | void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist, | 64 | void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist, |
66 | u64 block); | 65 | u64 block); |
67 | void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state, | 66 | void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state); |
68 | int flags); | ||
69 | void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); | 67 | void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); |
70 | u64 gfs2_ri_total(struct gfs2_sbd *sdp); | 68 | u64 gfs2_ri_total(struct gfs2_sbd *sdp); |
71 | 69 | ||
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index ef0562c3bc71..7aeacbc65f35 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c | |||
@@ -210,7 +210,7 @@ int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) | |||
210 | struct page *page; | 210 | struct page *page; |
211 | struct bio *bio; | 211 | struct bio *bio; |
212 | 212 | ||
213 | page = alloc_page(GFP_KERNEL); | 213 | page = alloc_page(GFP_NOFS); |
214 | if (unlikely(!page)) | 214 | if (unlikely(!page)) |
215 | return -ENOBUFS; | 215 | return -ENOBUFS; |
216 | 216 | ||
@@ -218,7 +218,7 @@ int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) | |||
218 | ClearPageDirty(page); | 218 | ClearPageDirty(page); |
219 | lock_page(page); | 219 | lock_page(page); |
220 | 220 | ||
221 | bio = bio_alloc(GFP_KERNEL, 1); | 221 | bio = bio_alloc(GFP_NOFS, 1); |
222 | if (unlikely(!bio)) { | 222 | if (unlikely(!bio)) { |
223 | __free_page(page); | 223 | __free_page(page); |
224 | return -ENOBUFS; | 224 | return -ENOBUFS; |
@@ -316,6 +316,7 @@ int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent) | |||
316 | sdp->sd_heightsize[x] = space; | 316 | sdp->sd_heightsize[x] = space; |
317 | } | 317 | } |
318 | sdp->sd_max_height = x; | 318 | sdp->sd_max_height = x; |
319 | sdp->sd_heightsize[x] = ~0; | ||
319 | gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT); | 320 | gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT); |
320 | 321 | ||
321 | sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize - | 322 | sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize - |
@@ -334,6 +335,7 @@ int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent) | |||
334 | sdp->sd_jheightsize[x] = space; | 335 | sdp->sd_jheightsize[x] = space; |
335 | } | 336 | } |
336 | sdp->sd_max_jheight = x; | 337 | sdp->sd_max_jheight = x; |
338 | sdp->sd_jheightsize[x] = ~0; | ||
337 | gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT); | 339 | gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT); |
338 | 340 | ||
339 | return 0; | 341 | return 0; |
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h index 60a870e430be..44361ecc44f7 100644 --- a/fs/gfs2/super.h +++ b/fs/gfs2/super.h | |||
@@ -17,6 +17,7 @@ void gfs2_tune_init(struct gfs2_tune *gt); | |||
17 | int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent); | 17 | int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent); |
18 | int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent); | 18 | int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent); |
19 | int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector); | 19 | int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector); |
20 | void gfs2_lm_unmount(struct gfs2_sbd *sdp); | ||
20 | 21 | ||
21 | static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp) | 22 | static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp) |
22 | { | 23 | { |
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index eaa3b7b2f99e..9ab9fc85ecd0 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c | |||
@@ -20,7 +20,6 @@ | |||
20 | 20 | ||
21 | #include "gfs2.h" | 21 | #include "gfs2.h" |
22 | #include "incore.h" | 22 | #include "incore.h" |
23 | #include "lm.h" | ||
24 | #include "sys.h" | 23 | #include "sys.h" |
25 | #include "super.h" | 24 | #include "super.h" |
26 | #include "glock.h" | 25 | #include "glock.h" |
@@ -328,15 +327,9 @@ static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \ | |||
328 | } \ | 327 | } \ |
329 | static struct counters_attr counters_attr_##name = __ATTR_RO(name) | 328 | static struct counters_attr counters_attr_##name = __ATTR_RO(name) |
330 | 329 | ||
331 | COUNTERS_ATTR(glock_count, "%u\n"); | ||
332 | COUNTERS_ATTR(glock_held_count, "%u\n"); | ||
333 | COUNTERS_ATTR(inode_count, "%u\n"); | ||
334 | COUNTERS_ATTR(reclaimed, "%u\n"); | 330 | COUNTERS_ATTR(reclaimed, "%u\n"); |
335 | 331 | ||
336 | static struct attribute *counters_attrs[] = { | 332 | static struct attribute *counters_attrs[] = { |
337 | &counters_attr_glock_count.attr, | ||
338 | &counters_attr_glock_held_count.attr, | ||
339 | &counters_attr_inode_count.attr, | ||
340 | &counters_attr_reclaimed.attr, | 333 | &counters_attr_reclaimed.attr, |
341 | NULL, | 334 | NULL, |
342 | }; | 335 | }; |
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 73e5d92a657c..f677b8a83f0c 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c | |||
@@ -146,30 +146,25 @@ void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) | |||
146 | lops_add(sdp, &bd->bd_le); | 146 | lops_add(sdp, &bd->bd_le); |
147 | } | 147 | } |
148 | 148 | ||
149 | void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno) | 149 | void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len) |
150 | { | 150 | { |
151 | struct gfs2_bufdata *bd; | 151 | struct gfs2_bufdata *bd, *tmp; |
152 | int found = 0; | 152 | struct gfs2_trans *tr = current->journal_info; |
153 | unsigned int n = len; | ||
153 | 154 | ||
154 | gfs2_log_lock(sdp); | 155 | gfs2_log_lock(sdp); |
155 | 156 | list_for_each_entry_safe(bd, tmp, &sdp->sd_log_le_revoke, bd_le.le_list) { | |
156 | list_for_each_entry(bd, &sdp->sd_log_le_revoke, bd_le.le_list) { | 157 | if ((bd->bd_blkno >= blkno) && (bd->bd_blkno < (blkno + len))) { |
157 | if (bd->bd_blkno == blkno) { | ||
158 | list_del_init(&bd->bd_le.le_list); | 158 | list_del_init(&bd->bd_le.le_list); |
159 | gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke); | 159 | gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke); |
160 | sdp->sd_log_num_revoke--; | 160 | sdp->sd_log_num_revoke--; |
161 | found = 1; | 161 | kmem_cache_free(gfs2_bufdata_cachep, bd); |
162 | break; | 162 | tr->tr_num_revoke_rm++; |
163 | if (--n == 0) | ||
164 | break; | ||
163 | } | 165 | } |
164 | } | 166 | } |
165 | |||
166 | gfs2_log_unlock(sdp); | 167 | gfs2_log_unlock(sdp); |
167 | |||
168 | if (found) { | ||
169 | struct gfs2_trans *tr = current->journal_info; | ||
170 | kmem_cache_free(gfs2_bufdata_cachep, bd); | ||
171 | tr->tr_num_revoke_rm++; | ||
172 | } | ||
173 | } | 168 | } |
174 | 169 | ||
175 | void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd) | 170 | void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd) |
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h index e826f0dab80a..edf9d4bd908e 100644 --- a/fs/gfs2/trans.h +++ b/fs/gfs2/trans.h | |||
@@ -32,7 +32,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp); | |||
32 | 32 | ||
33 | void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta); | 33 | void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta); |
34 | void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); | 34 | void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); |
35 | void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno); | 35 | void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len); |
36 | void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd); | 36 | void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd); |
37 | 37 | ||
38 | #endif /* __TRANS_DOT_H__ */ | 38 | #endif /* __TRANS_DOT_H__ */ |
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 424a0774eda8..d31e355c61fb 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c | |||
@@ -19,12 +19,12 @@ | |||
19 | #include "gfs2.h" | 19 | #include "gfs2.h" |
20 | #include "incore.h" | 20 | #include "incore.h" |
21 | #include "glock.h" | 21 | #include "glock.h" |
22 | #include "lm.h" | ||
23 | #include "util.h" | 22 | #include "util.h" |
24 | 23 | ||
25 | struct kmem_cache *gfs2_glock_cachep __read_mostly; | 24 | struct kmem_cache *gfs2_glock_cachep __read_mostly; |
26 | struct kmem_cache *gfs2_inode_cachep __read_mostly; | 25 | struct kmem_cache *gfs2_inode_cachep __read_mostly; |
27 | struct kmem_cache *gfs2_bufdata_cachep __read_mostly; | 26 | struct kmem_cache *gfs2_bufdata_cachep __read_mostly; |
27 | struct kmem_cache *gfs2_rgrpd_cachep __read_mostly; | ||
28 | 28 | ||
29 | void gfs2_assert_i(struct gfs2_sbd *sdp) | 29 | void gfs2_assert_i(struct gfs2_sbd *sdp) |
30 | { | 30 | { |
@@ -32,6 +32,28 @@ void gfs2_assert_i(struct gfs2_sbd *sdp) | |||
32 | sdp->sd_fsname); | 32 | sdp->sd_fsname); |
33 | } | 33 | } |
34 | 34 | ||
35 | int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...) | ||
36 | { | ||
37 | va_list args; | ||
38 | |||
39 | if (test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags)) | ||
40 | return 0; | ||
41 | |||
42 | va_start(args, fmt); | ||
43 | vprintk(fmt, args); | ||
44 | va_end(args); | ||
45 | |||
46 | fs_err(sdp, "about to withdraw this file system\n"); | ||
47 | BUG_ON(sdp->sd_args.ar_debug); | ||
48 | |||
49 | fs_err(sdp, "telling LM to withdraw\n"); | ||
50 | gfs2_withdraw_lockproto(&sdp->sd_lockstruct); | ||
51 | fs_err(sdp, "withdrawn\n"); | ||
52 | dump_stack(); | ||
53 | |||
54 | return -1; | ||
55 | } | ||
56 | |||
35 | /** | 57 | /** |
36 | * gfs2_assert_withdraw_i - Cause the machine to withdraw if @assertion is false | 58 | * gfs2_assert_withdraw_i - Cause the machine to withdraw if @assertion is false |
37 | * Returns: -1 if this call withdrew the machine, | 59 | * Returns: -1 if this call withdrew the machine, |
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index 28938a46cf47..509c5d60bd80 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h | |||
@@ -147,6 +147,7 @@ gfs2_io_error_bh_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__); | |||
147 | extern struct kmem_cache *gfs2_glock_cachep; | 147 | extern struct kmem_cache *gfs2_glock_cachep; |
148 | extern struct kmem_cache *gfs2_inode_cachep; | 148 | extern struct kmem_cache *gfs2_inode_cachep; |
149 | extern struct kmem_cache *gfs2_bufdata_cachep; | 149 | extern struct kmem_cache *gfs2_bufdata_cachep; |
150 | extern struct kmem_cache *gfs2_rgrpd_cachep; | ||
150 | 151 | ||
151 | static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt, | 152 | static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt, |
152 | unsigned int *p) | 153 | unsigned int *p) |
@@ -163,6 +164,7 @@ gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field) | |||
163 | 164 | ||
164 | void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap, | 165 | void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap, |
165 | unsigned int bit, int new_value); | 166 | unsigned int bit, int new_value); |
167 | int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...); | ||
166 | 168 | ||
167 | #endif /* __UTIL_DOT_H__ */ | 169 | #endif /* __UTIL_DOT_H__ */ |
168 | 170 | ||
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c index b60c0affbec5..f457d2ca51ab 100644 --- a/fs/hfsplus/ioctl.c +++ b/fs/hfsplus/ioctl.c | |||
@@ -14,6 +14,7 @@ | |||
14 | 14 | ||
15 | #include <linux/capability.h> | 15 | #include <linux/capability.h> |
16 | #include <linux/fs.h> | 16 | #include <linux/fs.h> |
17 | #include <linux/mount.h> | ||
17 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
18 | #include <linux/xattr.h> | 19 | #include <linux/xattr.h> |
19 | #include <asm/uaccess.h> | 20 | #include <asm/uaccess.h> |
@@ -35,25 +36,32 @@ int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, | |||
35 | flags |= FS_NODUMP_FL; /* EXT2_NODUMP_FL */ | 36 | flags |= FS_NODUMP_FL; /* EXT2_NODUMP_FL */ |
36 | return put_user(flags, (int __user *)arg); | 37 | return put_user(flags, (int __user *)arg); |
37 | case HFSPLUS_IOC_EXT2_SETFLAGS: { | 38 | case HFSPLUS_IOC_EXT2_SETFLAGS: { |
38 | if (IS_RDONLY(inode)) | 39 | int err = 0; |
39 | return -EROFS; | 40 | err = mnt_want_write(filp->f_path.mnt); |
40 | 41 | if (err) | |
41 | if (!is_owner_or_cap(inode)) | 42 | return err; |
42 | return -EACCES; | 43 | |
43 | 44 | if (!is_owner_or_cap(inode)) { | |
44 | if (get_user(flags, (int __user *)arg)) | 45 | err = -EACCES; |
45 | return -EFAULT; | 46 | goto setflags_out; |
46 | 47 | } | |
48 | if (get_user(flags, (int __user *)arg)) { | ||
49 | err = -EFAULT; | ||
50 | goto setflags_out; | ||
51 | } | ||
47 | if (flags & (FS_IMMUTABLE_FL|FS_APPEND_FL) || | 52 | if (flags & (FS_IMMUTABLE_FL|FS_APPEND_FL) || |
48 | HFSPLUS_I(inode).rootflags & (HFSPLUS_FLG_IMMUTABLE|HFSPLUS_FLG_APPEND)) { | 53 | HFSPLUS_I(inode).rootflags & (HFSPLUS_FLG_IMMUTABLE|HFSPLUS_FLG_APPEND)) { |
49 | if (!capable(CAP_LINUX_IMMUTABLE)) | 54 | if (!capable(CAP_LINUX_IMMUTABLE)) { |
50 | return -EPERM; | 55 | err = -EPERM; |
56 | goto setflags_out; | ||
57 | } | ||
51 | } | 58 | } |
52 | 59 | ||
53 | /* don't silently ignore unsupported ext2 flags */ | 60 | /* don't silently ignore unsupported ext2 flags */ |
54 | if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL)) | 61 | if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL)) { |
55 | return -EOPNOTSUPP; | 62 | err = -EOPNOTSUPP; |
56 | 63 | goto setflags_out; | |
64 | } | ||
57 | if (flags & FS_IMMUTABLE_FL) { /* EXT2_IMMUTABLE_FL */ | 65 | if (flags & FS_IMMUTABLE_FL) { /* EXT2_IMMUTABLE_FL */ |
58 | inode->i_flags |= S_IMMUTABLE; | 66 | inode->i_flags |= S_IMMUTABLE; |
59 | HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_IMMUTABLE; | 67 | HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_IMMUTABLE; |
@@ -75,7 +83,9 @@ int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, | |||
75 | 83 | ||
76 | inode->i_ctime = CURRENT_TIME_SEC; | 84 | inode->i_ctime = CURRENT_TIME_SEC; |
77 | mark_inode_dirty(inode); | 85 | mark_inode_dirty(inode); |
78 | return 0; | 86 | setflags_out: |
87 | mnt_drop_write(filp->f_path.mnt); | ||
88 | return err; | ||
79 | } | 89 | } |
80 | default: | 90 | default: |
81 | return -ENOTTY; | 91 | return -ENOTTY; |
diff --git a/fs/inode.c b/fs/inode.c index 53245ffcf93d..27ee1af50d02 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -1199,42 +1199,37 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry) | |||
1199 | struct inode *inode = dentry->d_inode; | 1199 | struct inode *inode = dentry->d_inode; |
1200 | struct timespec now; | 1200 | struct timespec now; |
1201 | 1201 | ||
1202 | if (inode->i_flags & S_NOATIME) | 1202 | if (mnt_want_write(mnt)) |
1203 | return; | 1203 | return; |
1204 | if (inode->i_flags & S_NOATIME) | ||
1205 | goto out; | ||
1204 | if (IS_NOATIME(inode)) | 1206 | if (IS_NOATIME(inode)) |
1205 | return; | 1207 | goto out; |
1206 | if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) | 1208 | if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) |
1207 | return; | 1209 | goto out; |
1208 | 1210 | ||
1209 | /* | 1211 | if (mnt->mnt_flags & MNT_NOATIME) |
1210 | * We may have a NULL vfsmount when coming from NFSD | 1212 | goto out; |
1211 | */ | 1213 | if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) |
1212 | if (mnt) { | 1214 | goto out; |
1213 | if (mnt->mnt_flags & MNT_NOATIME) | 1215 | if (mnt->mnt_flags & MNT_RELATIME) { |
1214 | return; | 1216 | /* |
1215 | if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) | 1217 | * With relative atime, only update atime if the previous |
1216 | return; | 1218 | * atime is earlier than either the ctime or mtime. |
1217 | 1219 | */ | |
1218 | if (mnt->mnt_flags & MNT_RELATIME) { | 1220 | if (timespec_compare(&inode->i_mtime, &inode->i_atime) < 0 && |
1219 | /* | 1221 | timespec_compare(&inode->i_ctime, &inode->i_atime) < 0) |
1220 | * With relative atime, only update atime if the | 1222 | goto out; |
1221 | * previous atime is earlier than either the ctime or | ||
1222 | * mtime. | ||
1223 | */ | ||
1224 | if (timespec_compare(&inode->i_mtime, | ||
1225 | &inode->i_atime) < 0 && | ||
1226 | timespec_compare(&inode->i_ctime, | ||
1227 | &inode->i_atime) < 0) | ||
1228 | return; | ||
1229 | } | ||
1230 | } | 1223 | } |
1231 | 1224 | ||
1232 | now = current_fs_time(inode->i_sb); | 1225 | now = current_fs_time(inode->i_sb); |
1233 | if (timespec_equal(&inode->i_atime, &now)) | 1226 | if (timespec_equal(&inode->i_atime, &now)) |
1234 | return; | 1227 | goto out; |
1235 | 1228 | ||
1236 | inode->i_atime = now; | 1229 | inode->i_atime = now; |
1237 | mark_inode_dirty_sync(inode); | 1230 | mark_inode_dirty_sync(inode); |
1231 | out: | ||
1232 | mnt_drop_write(mnt); | ||
1238 | } | 1233 | } |
1239 | EXPORT_SYMBOL(touch_atime); | 1234 | EXPORT_SYMBOL(touch_atime); |
1240 | 1235 | ||
@@ -1255,10 +1250,13 @@ void file_update_time(struct file *file) | |||
1255 | struct inode *inode = file->f_path.dentry->d_inode; | 1250 | struct inode *inode = file->f_path.dentry->d_inode; |
1256 | struct timespec now; | 1251 | struct timespec now; |
1257 | int sync_it = 0; | 1252 | int sync_it = 0; |
1253 | int err; | ||
1258 | 1254 | ||
1259 | if (IS_NOCMTIME(inode)) | 1255 | if (IS_NOCMTIME(inode)) |
1260 | return; | 1256 | return; |
1261 | if (IS_RDONLY(inode)) | 1257 | |
1258 | err = mnt_want_write(file->f_path.mnt); | ||
1259 | if (err) | ||
1262 | return; | 1260 | return; |
1263 | 1261 | ||
1264 | now = current_fs_time(inode->i_sb); | 1262 | now = current_fs_time(inode->i_sb); |
@@ -1279,6 +1277,7 @@ void file_update_time(struct file *file) | |||
1279 | 1277 | ||
1280 | if (sync_it) | 1278 | if (sync_it) |
1281 | mark_inode_dirty_sync(inode); | 1279 | mark_inode_dirty_sync(inode); |
1280 | mnt_drop_write(file->f_path.mnt); | ||
1282 | } | 1281 | } |
1283 | 1282 | ||
1284 | EXPORT_SYMBOL(file_update_time); | 1283 | EXPORT_SYMBOL(file_update_time); |
diff --git a/fs/internal.h b/fs/internal.h index 392e8ccd6fc4..80aa9a023372 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
@@ -43,3 +43,14 @@ extern void __init chrdev_init(void); | |||
43 | * namespace.c | 43 | * namespace.c |
44 | */ | 44 | */ |
45 | extern int copy_mount_options(const void __user *, unsigned long *); | 45 | extern int copy_mount_options(const void __user *, unsigned long *); |
46 | |||
47 | extern void free_vfsmnt(struct vfsmount *); | ||
48 | extern struct vfsmount *alloc_vfsmnt(const char *); | ||
49 | extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int); | ||
50 | extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *, | ||
51 | struct vfsmount *); | ||
52 | extern void release_mounts(struct list_head *); | ||
53 | extern void umount_tree(struct vfsmount *, int, struct list_head *); | ||
54 | extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int); | ||
55 | |||
56 | extern void __init mnt_init(void); | ||
diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h index 0b78fdc9773b..a841f4973a74 100644 --- a/fs/jffs2/jffs2_fs_i.h +++ b/fs/jffs2/jffs2_fs_i.h | |||
@@ -15,7 +15,7 @@ | |||
15 | #include <linux/version.h> | 15 | #include <linux/version.h> |
16 | #include <linux/rbtree.h> | 16 | #include <linux/rbtree.h> |
17 | #include <linux/posix_acl.h> | 17 | #include <linux/posix_acl.h> |
18 | #include <asm/semaphore.h> | 18 | #include <linux/semaphore.h> |
19 | 19 | ||
20 | struct jffs2_inode_info { | 20 | struct jffs2_inode_info { |
21 | /* We need an internal mutex similar to inode->i_mutex. | 21 | /* We need an internal mutex similar to inode->i_mutex. |
diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h index 3a2197f3c812..18fca2b9e531 100644 --- a/fs/jffs2/jffs2_fs_sb.h +++ b/fs/jffs2/jffs2_fs_sb.h | |||
@@ -16,7 +16,7 @@ | |||
16 | #include <linux/spinlock.h> | 16 | #include <linux/spinlock.h> |
17 | #include <linux/workqueue.h> | 17 | #include <linux/workqueue.h> |
18 | #include <linux/completion.h> | 18 | #include <linux/completion.h> |
19 | #include <asm/semaphore.h> | 19 | #include <linux/semaphore.h> |
20 | #include <linux/timer.h> | 20 | #include <linux/timer.h> |
21 | #include <linux/wait.h> | 21 | #include <linux/wait.h> |
22 | #include <linux/list.h> | 22 | #include <linux/list.h> |
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c index a1f8e375ad21..afe222bf300f 100644 --- a/fs/jfs/ioctl.c +++ b/fs/jfs/ioctl.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/fs.h> | 8 | #include <linux/fs.h> |
9 | #include <linux/ctype.h> | 9 | #include <linux/ctype.h> |
10 | #include <linux/capability.h> | 10 | #include <linux/capability.h> |
11 | #include <linux/mount.h> | ||
11 | #include <linux/time.h> | 12 | #include <linux/time.h> |
12 | #include <linux/sched.h> | 13 | #include <linux/sched.h> |
13 | #include <asm/current.h> | 14 | #include <asm/current.h> |
@@ -65,23 +66,30 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
65 | return put_user(flags, (int __user *) arg); | 66 | return put_user(flags, (int __user *) arg); |
66 | case JFS_IOC_SETFLAGS: { | 67 | case JFS_IOC_SETFLAGS: { |
67 | unsigned int oldflags; | 68 | unsigned int oldflags; |
69 | int err; | ||
68 | 70 | ||
69 | if (IS_RDONLY(inode)) | 71 | err = mnt_want_write(filp->f_path.mnt); |
70 | return -EROFS; | 72 | if (err) |
73 | return err; | ||
71 | 74 | ||
72 | if (!is_owner_or_cap(inode)) | 75 | if (!is_owner_or_cap(inode)) { |
73 | return -EACCES; | 76 | err = -EACCES; |
74 | 77 | goto setflags_out; | |
75 | if (get_user(flags, (int __user *) arg)) | 78 | } |
76 | return -EFAULT; | 79 | if (get_user(flags, (int __user *) arg)) { |
80 | err = -EFAULT; | ||
81 | goto setflags_out; | ||
82 | } | ||
77 | 83 | ||
78 | flags = jfs_map_ext2(flags, 1); | 84 | flags = jfs_map_ext2(flags, 1); |
79 | if (!S_ISDIR(inode->i_mode)) | 85 | if (!S_ISDIR(inode->i_mode)) |
80 | flags &= ~JFS_DIRSYNC_FL; | 86 | flags &= ~JFS_DIRSYNC_FL; |
81 | 87 | ||
82 | /* Is it quota file? Do not allow user to mess with it */ | 88 | /* Is it quota file? Do not allow user to mess with it */ |
83 | if (IS_NOQUOTA(inode)) | 89 | if (IS_NOQUOTA(inode)) { |
84 | return -EPERM; | 90 | err = -EPERM; |
91 | goto setflags_out; | ||
92 | } | ||
85 | 93 | ||
86 | /* Lock against other parallel changes of flags */ | 94 | /* Lock against other parallel changes of flags */ |
87 | mutex_lock(&inode->i_mutex); | 95 | mutex_lock(&inode->i_mutex); |
@@ -98,7 +106,8 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
98 | (JFS_APPEND_FL | JFS_IMMUTABLE_FL))) { | 106 | (JFS_APPEND_FL | JFS_IMMUTABLE_FL))) { |
99 | if (!capable(CAP_LINUX_IMMUTABLE)) { | 107 | if (!capable(CAP_LINUX_IMMUTABLE)) { |
100 | mutex_unlock(&inode->i_mutex); | 108 | mutex_unlock(&inode->i_mutex); |
101 | return -EPERM; | 109 | err = -EPERM; |
110 | goto setflags_out; | ||
102 | } | 111 | } |
103 | } | 112 | } |
104 | 113 | ||
@@ -110,7 +119,9 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
110 | mutex_unlock(&inode->i_mutex); | 119 | mutex_unlock(&inode->i_mutex); |
111 | inode->i_ctime = CURRENT_TIME_SEC; | 120 | inode->i_ctime = CURRENT_TIME_SEC; |
112 | mark_inode_dirty(inode); | 121 | mark_inode_dirty(inode); |
113 | return 0; | 122 | setflags_out: |
123 | mnt_drop_write(filp->f_path.mnt); | ||
124 | return err; | ||
114 | } | 125 | } |
115 | default: | 126 | default: |
116 | return -ENOTTY; | 127 | return -ENOTTY; |
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index e1985066b1c6..2bc7d8aa5740 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c | |||
@@ -2172,7 +2172,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, | |||
2172 | } | 2172 | } |
2173 | 2173 | ||
2174 | /* update the free count for this dmap */ | 2174 | /* update the free count for this dmap */ |
2175 | dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) - nblocks); | 2175 | le32_add_cpu(&dp->nfree, -nblocks); |
2176 | 2176 | ||
2177 | BMAP_LOCK(bmp); | 2177 | BMAP_LOCK(bmp); |
2178 | 2178 | ||
@@ -2316,7 +2316,7 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, | |||
2316 | 2316 | ||
2317 | /* update the free count for this dmap. | 2317 | /* update the free count for this dmap. |
2318 | */ | 2318 | */ |
2319 | dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) + nblocks); | 2319 | le32_add_cpu(&dp->nfree, nblocks); |
2320 | 2320 | ||
2321 | BMAP_LOCK(bmp); | 2321 | BMAP_LOCK(bmp); |
2322 | 2322 | ||
@@ -3226,7 +3226,7 @@ static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno, | |||
3226 | } | 3226 | } |
3227 | 3227 | ||
3228 | /* update the free count for this dmap */ | 3228 | /* update the free count for this dmap */ |
3229 | dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) - nblocks); | 3229 | le32_add_cpu(&dp->nfree, -nblocks); |
3230 | 3230 | ||
3231 | /* reconstruct summary tree */ | 3231 | /* reconstruct summary tree */ |
3232 | dbInitDmapTree(dp); | 3232 | dbInitDmapTree(dp); |
@@ -3660,9 +3660,8 @@ static int dbInitDmap(struct dmap * dp, s64 Blkno, int nblocks) | |||
3660 | goto initTree; | 3660 | goto initTree; |
3661 | } | 3661 | } |
3662 | } else { | 3662 | } else { |
3663 | dp->nblocks = | 3663 | le32_add_cpu(&dp->nblocks, nblocks); |
3664 | cpu_to_le32(le32_to_cpu(dp->nblocks) + nblocks); | 3664 | le32_add_cpu(&dp->nfree, nblocks); |
3665 | dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) + nblocks); | ||
3666 | } | 3665 | } |
3667 | 3666 | ||
3668 | /* word number containing start block number */ | 3667 | /* word number containing start block number */ |
diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h index 11e6d471b364..1a6eb41569bc 100644 --- a/fs/jfs/jfs_dmap.h +++ b/fs/jfs/jfs_dmap.h | |||
@@ -61,7 +61,7 @@ | |||
61 | * determine the maximum free string for four (lower level) nodes | 61 | * determine the maximum free string for four (lower level) nodes |
62 | * of the tree. | 62 | * of the tree. |
63 | */ | 63 | */ |
64 | static __inline signed char TREEMAX(signed char *cp) | 64 | static inline signed char TREEMAX(signed char *cp) |
65 | { | 65 | { |
66 | signed char tmp1, tmp2; | 66 | signed char tmp1, tmp2; |
67 | 67 | ||
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 9bf29f771737..734ec916beaf 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c | |||
@@ -1019,8 +1019,7 @@ int diFree(struct inode *ip) | |||
1019 | /* update the free inode counts at the iag, ag and | 1019 | /* update the free inode counts at the iag, ag and |
1020 | * map level. | 1020 | * map level. |
1021 | */ | 1021 | */ |
1022 | iagp->nfreeinos = | 1022 | le32_add_cpu(&iagp->nfreeinos, 1); |
1023 | cpu_to_le32(le32_to_cpu(iagp->nfreeinos) + 1); | ||
1024 | imap->im_agctl[agno].numfree += 1; | 1023 | imap->im_agctl[agno].numfree += 1; |
1025 | atomic_inc(&imap->im_numfree); | 1024 | atomic_inc(&imap->im_numfree); |
1026 | 1025 | ||
@@ -1219,9 +1218,8 @@ int diFree(struct inode *ip) | |||
1219 | /* update the number of free inodes and number of free extents | 1218 | /* update the number of free inodes and number of free extents |
1220 | * for the iag. | 1219 | * for the iag. |
1221 | */ | 1220 | */ |
1222 | iagp->nfreeinos = cpu_to_le32(le32_to_cpu(iagp->nfreeinos) - | 1221 | le32_add_cpu(&iagp->nfreeinos, -(INOSPEREXT - 1)); |
1223 | (INOSPEREXT - 1)); | 1222 | le32_add_cpu(&iagp->nfreeexts, 1); |
1224 | iagp->nfreeexts = cpu_to_le32(le32_to_cpu(iagp->nfreeexts) + 1); | ||
1225 | 1223 | ||
1226 | /* update the number of free inodes and backed inodes | 1224 | /* update the number of free inodes and backed inodes |
1227 | * at the ag and inode map level. | 1225 | * at the ag and inode map level. |
@@ -2124,7 +2122,7 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) | |||
2124 | /* update the free inode count at the iag, ag, inode | 2122 | /* update the free inode count at the iag, ag, inode |
2125 | * map levels. | 2123 | * map levels. |
2126 | */ | 2124 | */ |
2127 | iagp->nfreeinos = cpu_to_le32(le32_to_cpu(iagp->nfreeinos) - 1); | 2125 | le32_add_cpu(&iagp->nfreeinos, -1); |
2128 | imap->im_agctl[agno].numfree -= 1; | 2126 | imap->im_agctl[agno].numfree -= 1; |
2129 | atomic_dec(&imap->im_numfree); | 2127 | atomic_dec(&imap->im_numfree); |
2130 | 2128 | ||
@@ -2378,9 +2376,8 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) | |||
2378 | /* update the free inode and free extent counts for the | 2376 | /* update the free inode and free extent counts for the |
2379 | * iag. | 2377 | * iag. |
2380 | */ | 2378 | */ |
2381 | iagp->nfreeinos = cpu_to_le32(le32_to_cpu(iagp->nfreeinos) + | 2379 | le32_add_cpu(&iagp->nfreeinos, (INOSPEREXT - 1)); |
2382 | (INOSPEREXT - 1)); | 2380 | le32_add_cpu(&iagp->nfreeexts, -1); |
2383 | iagp->nfreeexts = cpu_to_le32(le32_to_cpu(iagp->nfreeexts) - 1); | ||
2384 | 2381 | ||
2385 | /* update the free and backed inode counts for the ag. | 2382 | /* update the free and backed inode counts for the ag. |
2386 | */ | 2383 | */ |
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c index a000aaa75136..5a61ebf2cbcc 100644 --- a/fs/jfs/jfs_xtree.c +++ b/fs/jfs/jfs_xtree.c | |||
@@ -905,8 +905,7 @@ int xtInsert(tid_t tid, /* transaction id */ | |||
905 | XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); | 905 | XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); |
906 | 906 | ||
907 | /* advance next available entry index */ | 907 | /* advance next available entry index */ |
908 | p->header.nextindex = | 908 | le16_add_cpu(&p->header.nextindex, 1); |
909 | cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); | ||
910 | 909 | ||
911 | /* Don't log it if there are no links to the file */ | 910 | /* Don't log it if there are no links to the file */ |
912 | if (!test_cflag(COMMIT_Nolink, ip)) { | 911 | if (!test_cflag(COMMIT_Nolink, ip)) { |
@@ -997,8 +996,7 @@ xtSplitUp(tid_t tid, | |||
997 | split->addr); | 996 | split->addr); |
998 | 997 | ||
999 | /* advance next available entry index */ | 998 | /* advance next available entry index */ |
1000 | sp->header.nextindex = | 999 | le16_add_cpu(&sp->header.nextindex, 1); |
1001 | cpu_to_le16(le16_to_cpu(sp->header.nextindex) + 1); | ||
1002 | 1000 | ||
1003 | /* Don't log it if there are no links to the file */ | 1001 | /* Don't log it if there are no links to the file */ |
1004 | if (!test_cflag(COMMIT_Nolink, ip)) { | 1002 | if (!test_cflag(COMMIT_Nolink, ip)) { |
@@ -1167,9 +1165,7 @@ xtSplitUp(tid_t tid, | |||
1167 | JFS_SBI(ip->i_sb)->nbperpage, rcbn); | 1165 | JFS_SBI(ip->i_sb)->nbperpage, rcbn); |
1168 | 1166 | ||
1169 | /* advance next available entry index. */ | 1167 | /* advance next available entry index. */ |
1170 | sp->header.nextindex = | 1168 | le16_add_cpu(&sp->header.nextindex, 1); |
1171 | cpu_to_le16(le16_to_cpu(sp->header.nextindex) + | ||
1172 | 1); | ||
1173 | 1169 | ||
1174 | /* Don't log it if there are no links to the file */ | 1170 | /* Don't log it if there are no links to the file */ |
1175 | if (!test_cflag(COMMIT_Nolink, ip)) { | 1171 | if (!test_cflag(COMMIT_Nolink, ip)) { |
@@ -1738,8 +1734,7 @@ int xtExtend(tid_t tid, /* transaction id */ | |||
1738 | XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr); | 1734 | XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr); |
1739 | 1735 | ||
1740 | /* advance next available entry index */ | 1736 | /* advance next available entry index */ |
1741 | p->header.nextindex = | 1737 | le16_add_cpu(&p->header.nextindex, 1); |
1742 | cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); | ||
1743 | } | 1738 | } |
1744 | 1739 | ||
1745 | /* get back old entry */ | 1740 | /* get back old entry */ |
@@ -1905,8 +1900,7 @@ printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", | |||
1905 | XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); | 1900 | XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); |
1906 | 1901 | ||
1907 | /* advance next available entry index */ | 1902 | /* advance next available entry index */ |
1908 | p->header.nextindex = | 1903 | le16_add_cpu(&p->header.nextindex, 1); |
1909 | cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); | ||
1910 | } | 1904 | } |
1911 | 1905 | ||
1912 | /* get back old XAD */ | 1906 | /* get back old XAD */ |
@@ -2567,8 +2561,7 @@ int xtAppend(tid_t tid, /* transaction id */ | |||
2567 | XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); | 2561 | XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); |
2568 | 2562 | ||
2569 | /* advance next available entry index */ | 2563 | /* advance next available entry index */ |
2570 | p->header.nextindex = | 2564 | le16_add_cpu(&p->header.nextindex, 1); |
2571 | cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); | ||
2572 | 2565 | ||
2573 | xtlck->lwm.offset = | 2566 | xtlck->lwm.offset = |
2574 | (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index; | 2567 | (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index; |
@@ -2631,8 +2624,7 @@ int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag) | |||
2631 | * delete the entry from the leaf page | 2624 | * delete the entry from the leaf page |
2632 | */ | 2625 | */ |
2633 | nextindex = le16_to_cpu(p->header.nextindex); | 2626 | nextindex = le16_to_cpu(p->header.nextindex); |
2634 | p->header.nextindex = | 2627 | le16_add_cpu(&p->header.nextindex, -1); |
2635 | cpu_to_le16(le16_to_cpu(p->header.nextindex) - 1); | ||
2636 | 2628 | ||
2637 | /* | 2629 | /* |
2638 | * if the leaf page bocome empty, free the page | 2630 | * if the leaf page bocome empty, free the page |
@@ -2795,9 +2787,7 @@ xtDeleteUp(tid_t tid, struct inode *ip, | |||
2795 | (nextindex - index - | 2787 | (nextindex - index - |
2796 | 1) << L2XTSLOTSIZE); | 2788 | 1) << L2XTSLOTSIZE); |
2797 | 2789 | ||
2798 | p->header.nextindex = | 2790 | le16_add_cpu(&p->header.nextindex, -1); |
2799 | cpu_to_le16(le16_to_cpu(p->header.nextindex) - | ||
2800 | 1); | ||
2801 | jfs_info("xtDeleteUp(entry): 0x%lx[%d]", | 2791 | jfs_info("xtDeleteUp(entry): 0x%lx[%d]", |
2802 | (ulong) parent->bn, index); | 2792 | (ulong) parent->bn, index); |
2803 | } | 2793 | } |
diff --git a/fs/locks.c b/fs/locks.c index 43c0af21a0c5..592faadbcec1 100644 --- a/fs/locks.c +++ b/fs/locks.c | |||
@@ -127,7 +127,6 @@ | |||
127 | #include <linux/rcupdate.h> | 127 | #include <linux/rcupdate.h> |
128 | #include <linux/pid_namespace.h> | 128 | #include <linux/pid_namespace.h> |
129 | 129 | ||
130 | #include <asm/semaphore.h> | ||
131 | #include <asm/uaccess.h> | 130 | #include <asm/uaccess.h> |
132 | 131 | ||
133 | #define IS_POSIX(fl) (fl->fl_flags & FL_POSIX) | 132 | #define IS_POSIX(fl) (fl->fl_flags & FL_POSIX) |
diff --git a/fs/namei.c b/fs/namei.c index 8cf9bb9c2fc0..e179f71bfcb0 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -1623,8 +1623,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag) | |||
1623 | return -EACCES; | 1623 | return -EACCES; |
1624 | 1624 | ||
1625 | flag &= ~O_TRUNC; | 1625 | flag &= ~O_TRUNC; |
1626 | } else if (IS_RDONLY(inode) && (acc_mode & MAY_WRITE)) | 1626 | } |
1627 | return -EROFS; | ||
1628 | 1627 | ||
1629 | error = vfs_permission(nd, acc_mode); | 1628 | error = vfs_permission(nd, acc_mode); |
1630 | if (error) | 1629 | if (error) |
@@ -1677,7 +1676,12 @@ int may_open(struct nameidata *nd, int acc_mode, int flag) | |||
1677 | return 0; | 1676 | return 0; |
1678 | } | 1677 | } |
1679 | 1678 | ||
1680 | static int open_namei_create(struct nameidata *nd, struct path *path, | 1679 | /* |
1680 | * Be careful about ever adding any more callers of this | ||
1681 | * function. Its flags must be in the namei format, not | ||
1682 | * what get passed to sys_open(). | ||
1683 | */ | ||
1684 | static int __open_namei_create(struct nameidata *nd, struct path *path, | ||
1681 | int flag, int mode) | 1685 | int flag, int mode) |
1682 | { | 1686 | { |
1683 | int error; | 1687 | int error; |
@@ -1696,26 +1700,56 @@ static int open_namei_create(struct nameidata *nd, struct path *path, | |||
1696 | } | 1700 | } |
1697 | 1701 | ||
1698 | /* | 1702 | /* |
1699 | * open_namei() | 1703 | * Note that while the flag value (low two bits) for sys_open means: |
1704 | * 00 - read-only | ||
1705 | * 01 - write-only | ||
1706 | * 10 - read-write | ||
1707 | * 11 - special | ||
1708 | * it is changed into | ||
1709 | * 00 - no permissions needed | ||
1710 | * 01 - read-permission | ||
1711 | * 10 - write-permission | ||
1712 | * 11 - read-write | ||
1713 | * for the internal routines (ie open_namei()/follow_link() etc) | ||
1714 | * This is more logical, and also allows the 00 "no perm needed" | ||
1715 | * to be used for symlinks (where the permissions are checked | ||
1716 | * later). | ||
1700 | * | 1717 | * |
1701 | * namei for open - this is in fact almost the whole open-routine. | 1718 | */ |
1702 | * | 1719 | static inline int open_to_namei_flags(int flag) |
1703 | * Note that the low bits of "flag" aren't the same as in the open | 1720 | { |
1704 | * system call - they are 00 - no permissions needed | 1721 | if ((flag+1) & O_ACCMODE) |
1705 | * 01 - read permission needed | 1722 | flag++; |
1706 | * 10 - write permission needed | 1723 | return flag; |
1707 | * 11 - read/write permissions needed | 1724 | } |
1708 | * which is a lot more logical, and also allows the "no perm" needed | 1725 | |
1709 | * for symlinks (where the permissions are checked later). | 1726 | static int open_will_write_to_fs(int flag, struct inode *inode) |
1710 | * SMP-safe | 1727 | { |
1728 | /* | ||
1729 | * We'll never write to the fs underlying | ||
1730 | * a device file. | ||
1731 | */ | ||
1732 | if (special_file(inode->i_mode)) | ||
1733 | return 0; | ||
1734 | return (flag & O_TRUNC); | ||
1735 | } | ||
1736 | |||
1737 | /* | ||
1738 | * Note that the low bits of the passed in "open_flag" | ||
1739 | * are not the same as in the local variable "flag". See | ||
1740 | * open_to_namei_flags() for more details. | ||
1711 | */ | 1741 | */ |
1712 | int open_namei(int dfd, const char *pathname, int flag, | 1742 | struct file *do_filp_open(int dfd, const char *pathname, |
1713 | int mode, struct nameidata *nd) | 1743 | int open_flag, int mode) |
1714 | { | 1744 | { |
1745 | struct file *filp; | ||
1746 | struct nameidata nd; | ||
1715 | int acc_mode, error; | 1747 | int acc_mode, error; |
1716 | struct path path; | 1748 | struct path path; |
1717 | struct dentry *dir; | 1749 | struct dentry *dir; |
1718 | int count = 0; | 1750 | int count = 0; |
1751 | int will_write; | ||
1752 | int flag = open_to_namei_flags(open_flag); | ||
1719 | 1753 | ||
1720 | acc_mode = ACC_MODE(flag); | 1754 | acc_mode = ACC_MODE(flag); |
1721 | 1755 | ||
@@ -1733,18 +1767,19 @@ int open_namei(int dfd, const char *pathname, int flag, | |||
1733 | */ | 1767 | */ |
1734 | if (!(flag & O_CREAT)) { | 1768 | if (!(flag & O_CREAT)) { |
1735 | error = path_lookup_open(dfd, pathname, lookup_flags(flag), | 1769 | error = path_lookup_open(dfd, pathname, lookup_flags(flag), |
1736 | nd, flag); | 1770 | &nd, flag); |
1737 | if (error) | 1771 | if (error) |
1738 | return error; | 1772 | return ERR_PTR(error); |
1739 | goto ok; | 1773 | goto ok; |
1740 | } | 1774 | } |
1741 | 1775 | ||
1742 | /* | 1776 | /* |
1743 | * Create - we need to know the parent. | 1777 | * Create - we need to know the parent. |
1744 | */ | 1778 | */ |
1745 | error = path_lookup_create(dfd,pathname,LOOKUP_PARENT,nd,flag,mode); | 1779 | error = path_lookup_create(dfd, pathname, LOOKUP_PARENT, |
1780 | &nd, flag, mode); | ||
1746 | if (error) | 1781 | if (error) |
1747 | return error; | 1782 | return ERR_PTR(error); |
1748 | 1783 | ||
1749 | /* | 1784 | /* |
1750 | * We have the parent and last component. First of all, check | 1785 | * We have the parent and last component. First of all, check |
@@ -1752,14 +1787,14 @@ int open_namei(int dfd, const char *pathname, int flag, | |||
1752 | * will not do. | 1787 | * will not do. |
1753 | */ | 1788 | */ |
1754 | error = -EISDIR; | 1789 | error = -EISDIR; |
1755 | if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len]) | 1790 | if (nd.last_type != LAST_NORM || nd.last.name[nd.last.len]) |
1756 | goto exit; | 1791 | goto exit; |
1757 | 1792 | ||
1758 | dir = nd->path.dentry; | 1793 | dir = nd.path.dentry; |
1759 | nd->flags &= ~LOOKUP_PARENT; | 1794 | nd.flags &= ~LOOKUP_PARENT; |
1760 | mutex_lock(&dir->d_inode->i_mutex); | 1795 | mutex_lock(&dir->d_inode->i_mutex); |
1761 | path.dentry = lookup_hash(nd); | 1796 | path.dentry = lookup_hash(&nd); |
1762 | path.mnt = nd->path.mnt; | 1797 | path.mnt = nd.path.mnt; |
1763 | 1798 | ||
1764 | do_last: | 1799 | do_last: |
1765 | error = PTR_ERR(path.dentry); | 1800 | error = PTR_ERR(path.dentry); |
@@ -1768,18 +1803,31 @@ do_last: | |||
1768 | goto exit; | 1803 | goto exit; |
1769 | } | 1804 | } |
1770 | 1805 | ||
1771 | if (IS_ERR(nd->intent.open.file)) { | 1806 | if (IS_ERR(nd.intent.open.file)) { |
1772 | mutex_unlock(&dir->d_inode->i_mutex); | 1807 | error = PTR_ERR(nd.intent.open.file); |
1773 | error = PTR_ERR(nd->intent.open.file); | 1808 | goto exit_mutex_unlock; |
1774 | goto exit_dput; | ||
1775 | } | 1809 | } |
1776 | 1810 | ||
1777 | /* Negative dentry, just create the file */ | 1811 | /* Negative dentry, just create the file */ |
1778 | if (!path.dentry->d_inode) { | 1812 | if (!path.dentry->d_inode) { |
1779 | error = open_namei_create(nd, &path, flag, mode); | 1813 | /* |
1814 | * This write is needed to ensure that a | ||
1815 | * ro->rw transition does not occur between | ||
1816 | * the time when the file is created and when | ||
1817 | * a permanent write count is taken through | ||
1818 | * the 'struct file' in nameidata_to_filp(). | ||
1819 | */ | ||
1820 | error = mnt_want_write(nd.path.mnt); | ||
1780 | if (error) | 1821 | if (error) |
1822 | goto exit_mutex_unlock; | ||
1823 | error = __open_namei_create(&nd, &path, flag, mode); | ||
1824 | if (error) { | ||
1825 | mnt_drop_write(nd.path.mnt); | ||
1781 | goto exit; | 1826 | goto exit; |
1782 | return 0; | 1827 | } |
1828 | filp = nameidata_to_filp(&nd, open_flag); | ||
1829 | mnt_drop_write(nd.path.mnt); | ||
1830 | return filp; | ||
1783 | } | 1831 | } |
1784 | 1832 | ||
1785 | /* | 1833 | /* |
@@ -1804,23 +1852,52 @@ do_last: | |||
1804 | if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link) | 1852 | if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link) |
1805 | goto do_link; | 1853 | goto do_link; |
1806 | 1854 | ||
1807 | path_to_nameidata(&path, nd); | 1855 | path_to_nameidata(&path, &nd); |
1808 | error = -EISDIR; | 1856 | error = -EISDIR; |
1809 | if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode)) | 1857 | if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode)) |
1810 | goto exit; | 1858 | goto exit; |
1811 | ok: | 1859 | ok: |
1812 | error = may_open(nd, acc_mode, flag); | 1860 | /* |
1813 | if (error) | 1861 | * Consider: |
1862 | * 1. may_open() truncates a file | ||
1863 | * 2. a rw->ro mount transition occurs | ||
1864 | * 3. nameidata_to_filp() fails due to | ||
1865 | * the ro mount. | ||
1866 | * That would be inconsistent, and should | ||
1867 | * be avoided. Taking this mnt write here | ||
1868 | * ensures that (2) can not occur. | ||
1869 | */ | ||
1870 | will_write = open_will_write_to_fs(flag, nd.path.dentry->d_inode); | ||
1871 | if (will_write) { | ||
1872 | error = mnt_want_write(nd.path.mnt); | ||
1873 | if (error) | ||
1874 | goto exit; | ||
1875 | } | ||
1876 | error = may_open(&nd, acc_mode, flag); | ||
1877 | if (error) { | ||
1878 | if (will_write) | ||
1879 | mnt_drop_write(nd.path.mnt); | ||
1814 | goto exit; | 1880 | goto exit; |
1815 | return 0; | 1881 | } |
1882 | filp = nameidata_to_filp(&nd, open_flag); | ||
1883 | /* | ||
1884 | * It is now safe to drop the mnt write | ||
1885 | * because the filp has had a write taken | ||
1886 | * on its behalf. | ||
1887 | */ | ||
1888 | if (will_write) | ||
1889 | mnt_drop_write(nd.path.mnt); | ||
1890 | return filp; | ||
1816 | 1891 | ||
1892 | exit_mutex_unlock: | ||
1893 | mutex_unlock(&dir->d_inode->i_mutex); | ||
1817 | exit_dput: | 1894 | exit_dput: |
1818 | path_put_conditional(&path, nd); | 1895 | path_put_conditional(&path, &nd); |
1819 | exit: | 1896 | exit: |
1820 | if (!IS_ERR(nd->intent.open.file)) | 1897 | if (!IS_ERR(nd.intent.open.file)) |
1821 | release_open_intent(nd); | 1898 | release_open_intent(&nd); |
1822 | path_put(&nd->path); | 1899 | path_put(&nd.path); |
1823 | return error; | 1900 | return ERR_PTR(error); |
1824 | 1901 | ||
1825 | do_link: | 1902 | do_link: |
1826 | error = -ELOOP; | 1903 | error = -ELOOP; |
@@ -1836,43 +1913,60 @@ do_link: | |||
1836 | * stored in nd->last.name and we will have to putname() it when we | 1913 | * stored in nd->last.name and we will have to putname() it when we |
1837 | * are done. Procfs-like symlinks just set LAST_BIND. | 1914 | * are done. Procfs-like symlinks just set LAST_BIND. |
1838 | */ | 1915 | */ |
1839 | nd->flags |= LOOKUP_PARENT; | 1916 | nd.flags |= LOOKUP_PARENT; |
1840 | error = security_inode_follow_link(path.dentry, nd); | 1917 | error = security_inode_follow_link(path.dentry, &nd); |
1841 | if (error) | 1918 | if (error) |
1842 | goto exit_dput; | 1919 | goto exit_dput; |
1843 | error = __do_follow_link(&path, nd); | 1920 | error = __do_follow_link(&path, &nd); |
1844 | if (error) { | 1921 | if (error) { |
1845 | /* Does someone understand code flow here? Or it is only | 1922 | /* Does someone understand code flow here? Or it is only |
1846 | * me so stupid? Anathema to whoever designed this non-sense | 1923 | * me so stupid? Anathema to whoever designed this non-sense |
1847 | * with "intent.open". | 1924 | * with "intent.open". |
1848 | */ | 1925 | */ |
1849 | release_open_intent(nd); | 1926 | release_open_intent(&nd); |
1850 | return error; | 1927 | return ERR_PTR(error); |
1851 | } | 1928 | } |
1852 | nd->flags &= ~LOOKUP_PARENT; | 1929 | nd.flags &= ~LOOKUP_PARENT; |
1853 | if (nd->last_type == LAST_BIND) | 1930 | if (nd.last_type == LAST_BIND) |
1854 | goto ok; | 1931 | goto ok; |
1855 | error = -EISDIR; | 1932 | error = -EISDIR; |
1856 | if (nd->last_type != LAST_NORM) | 1933 | if (nd.last_type != LAST_NORM) |
1857 | goto exit; | 1934 | goto exit; |
1858 | if (nd->last.name[nd->last.len]) { | 1935 | if (nd.last.name[nd.last.len]) { |
1859 | __putname(nd->last.name); | 1936 | __putname(nd.last.name); |
1860 | goto exit; | 1937 | goto exit; |
1861 | } | 1938 | } |
1862 | error = -ELOOP; | 1939 | error = -ELOOP; |
1863 | if (count++==32) { | 1940 | if (count++==32) { |
1864 | __putname(nd->last.name); | 1941 | __putname(nd.last.name); |
1865 | goto exit; | 1942 | goto exit; |
1866 | } | 1943 | } |
1867 | dir = nd->path.dentry; | 1944 | dir = nd.path.dentry; |
1868 | mutex_lock(&dir->d_inode->i_mutex); | 1945 | mutex_lock(&dir->d_inode->i_mutex); |
1869 | path.dentry = lookup_hash(nd); | 1946 | path.dentry = lookup_hash(&nd); |
1870 | path.mnt = nd->path.mnt; | 1947 | path.mnt = nd.path.mnt; |
1871 | __putname(nd->last.name); | 1948 | __putname(nd.last.name); |
1872 | goto do_last; | 1949 | goto do_last; |
1873 | } | 1950 | } |
1874 | 1951 | ||
1875 | /** | 1952 | /** |
1953 | * filp_open - open file and return file pointer | ||
1954 | * | ||
1955 | * @filename: path to open | ||
1956 | * @flags: open flags as per the open(2) second argument | ||
1957 | * @mode: mode for the new file if O_CREAT is set, else ignored | ||
1958 | * | ||
1959 | * This is the helper to open a file from kernelspace if you really | ||
1960 | * have to. But in generally you should not do this, so please move | ||
1961 | * along, nothing to see here.. | ||
1962 | */ | ||
1963 | struct file *filp_open(const char *filename, int flags, int mode) | ||
1964 | { | ||
1965 | return do_filp_open(AT_FDCWD, filename, flags, mode); | ||
1966 | } | ||
1967 | EXPORT_SYMBOL(filp_open); | ||
1968 | |||
1969 | /** | ||
1876 | * lookup_create - lookup a dentry, creating it if it doesn't exist | 1970 | * lookup_create - lookup a dentry, creating it if it doesn't exist |
1877 | * @nd: nameidata info | 1971 | * @nd: nameidata info |
1878 | * @is_dir: directory flag | 1972 | * @is_dir: directory flag |
@@ -1945,6 +2039,23 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) | |||
1945 | return error; | 2039 | return error; |
1946 | } | 2040 | } |
1947 | 2041 | ||
2042 | static int may_mknod(mode_t mode) | ||
2043 | { | ||
2044 | switch (mode & S_IFMT) { | ||
2045 | case S_IFREG: | ||
2046 | case S_IFCHR: | ||
2047 | case S_IFBLK: | ||
2048 | case S_IFIFO: | ||
2049 | case S_IFSOCK: | ||
2050 | case 0: /* zero mode translates to S_IFREG */ | ||
2051 | return 0; | ||
2052 | case S_IFDIR: | ||
2053 | return -EPERM; | ||
2054 | default: | ||
2055 | return -EINVAL; | ||
2056 | } | ||
2057 | } | ||
2058 | |||
1948 | asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode, | 2059 | asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode, |
1949 | unsigned dev) | 2060 | unsigned dev) |
1950 | { | 2061 | { |
@@ -1963,12 +2074,19 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode, | |||
1963 | if (error) | 2074 | if (error) |
1964 | goto out; | 2075 | goto out; |
1965 | dentry = lookup_create(&nd, 0); | 2076 | dentry = lookup_create(&nd, 0); |
1966 | error = PTR_ERR(dentry); | 2077 | if (IS_ERR(dentry)) { |
1967 | 2078 | error = PTR_ERR(dentry); | |
2079 | goto out_unlock; | ||
2080 | } | ||
1968 | if (!IS_POSIXACL(nd.path.dentry->d_inode)) | 2081 | if (!IS_POSIXACL(nd.path.dentry->d_inode)) |
1969 | mode &= ~current->fs->umask; | 2082 | mode &= ~current->fs->umask; |
1970 | if (!IS_ERR(dentry)) { | 2083 | error = may_mknod(mode); |
1971 | switch (mode & S_IFMT) { | 2084 | if (error) |
2085 | goto out_dput; | ||
2086 | error = mnt_want_write(nd.path.mnt); | ||
2087 | if (error) | ||
2088 | goto out_dput; | ||
2089 | switch (mode & S_IFMT) { | ||
1972 | case 0: case S_IFREG: | 2090 | case 0: case S_IFREG: |
1973 | error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd); | 2091 | error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd); |
1974 | break; | 2092 | break; |
@@ -1979,14 +2097,11 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode, | |||
1979 | case S_IFIFO: case S_IFSOCK: | 2097 | case S_IFIFO: case S_IFSOCK: |
1980 | error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0); | 2098 | error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0); |
1981 | break; | 2099 | break; |
1982 | case S_IFDIR: | ||
1983 | error = -EPERM; | ||
1984 | break; | ||
1985 | default: | ||
1986 | error = -EINVAL; | ||
1987 | } | ||
1988 | dput(dentry); | ||
1989 | } | 2100 | } |
2101 | mnt_drop_write(nd.path.mnt); | ||
2102 | out_dput: | ||
2103 | dput(dentry); | ||
2104 | out_unlock: | ||
1990 | mutex_unlock(&nd.path.dentry->d_inode->i_mutex); | 2105 | mutex_unlock(&nd.path.dentry->d_inode->i_mutex); |
1991 | path_put(&nd.path); | 2106 | path_put(&nd.path); |
1992 | out: | 2107 | out: |
@@ -2044,7 +2159,12 @@ asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode) | |||
2044 | 2159 | ||
2045 | if (!IS_POSIXACL(nd.path.dentry->d_inode)) | 2160 | if (!IS_POSIXACL(nd.path.dentry->d_inode)) |
2046 | mode &= ~current->fs->umask; | 2161 | mode &= ~current->fs->umask; |
2162 | error = mnt_want_write(nd.path.mnt); | ||
2163 | if (error) | ||
2164 | goto out_dput; | ||
2047 | error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode); | 2165 | error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode); |
2166 | mnt_drop_write(nd.path.mnt); | ||
2167 | out_dput: | ||
2048 | dput(dentry); | 2168 | dput(dentry); |
2049 | out_unlock: | 2169 | out_unlock: |
2050 | mutex_unlock(&nd.path.dentry->d_inode->i_mutex); | 2170 | mutex_unlock(&nd.path.dentry->d_inode->i_mutex); |
@@ -2151,7 +2271,12 @@ static long do_rmdir(int dfd, const char __user *pathname) | |||
2151 | error = PTR_ERR(dentry); | 2271 | error = PTR_ERR(dentry); |
2152 | if (IS_ERR(dentry)) | 2272 | if (IS_ERR(dentry)) |
2153 | goto exit2; | 2273 | goto exit2; |
2274 | error = mnt_want_write(nd.path.mnt); | ||
2275 | if (error) | ||
2276 | goto exit3; | ||
2154 | error = vfs_rmdir(nd.path.dentry->d_inode, dentry); | 2277 | error = vfs_rmdir(nd.path.dentry->d_inode, dentry); |
2278 | mnt_drop_write(nd.path.mnt); | ||
2279 | exit3: | ||
2155 | dput(dentry); | 2280 | dput(dentry); |
2156 | exit2: | 2281 | exit2: |
2157 | mutex_unlock(&nd.path.dentry->d_inode->i_mutex); | 2282 | mutex_unlock(&nd.path.dentry->d_inode->i_mutex); |
@@ -2232,7 +2357,11 @@ static long do_unlinkat(int dfd, const char __user *pathname) | |||
2232 | inode = dentry->d_inode; | 2357 | inode = dentry->d_inode; |
2233 | if (inode) | 2358 | if (inode) |
2234 | atomic_inc(&inode->i_count); | 2359 | atomic_inc(&inode->i_count); |
2360 | error = mnt_want_write(nd.path.mnt); | ||
2361 | if (error) | ||
2362 | goto exit2; | ||
2235 | error = vfs_unlink(nd.path.dentry->d_inode, dentry); | 2363 | error = vfs_unlink(nd.path.dentry->d_inode, dentry); |
2364 | mnt_drop_write(nd.path.mnt); | ||
2236 | exit2: | 2365 | exit2: |
2237 | dput(dentry); | 2366 | dput(dentry); |
2238 | } | 2367 | } |
@@ -2313,7 +2442,12 @@ asmlinkage long sys_symlinkat(const char __user *oldname, | |||
2313 | if (IS_ERR(dentry)) | 2442 | if (IS_ERR(dentry)) |
2314 | goto out_unlock; | 2443 | goto out_unlock; |
2315 | 2444 | ||
2445 | error = mnt_want_write(nd.path.mnt); | ||
2446 | if (error) | ||
2447 | goto out_dput; | ||
2316 | error = vfs_symlink(nd.path.dentry->d_inode, dentry, from, S_IALLUGO); | 2448 | error = vfs_symlink(nd.path.dentry->d_inode, dentry, from, S_IALLUGO); |
2449 | mnt_drop_write(nd.path.mnt); | ||
2450 | out_dput: | ||
2317 | dput(dentry); | 2451 | dput(dentry); |
2318 | out_unlock: | 2452 | out_unlock: |
2319 | mutex_unlock(&nd.path.dentry->d_inode->i_mutex); | 2453 | mutex_unlock(&nd.path.dentry->d_inode->i_mutex); |
@@ -2408,7 +2542,12 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname, | |||
2408 | error = PTR_ERR(new_dentry); | 2542 | error = PTR_ERR(new_dentry); |
2409 | if (IS_ERR(new_dentry)) | 2543 | if (IS_ERR(new_dentry)) |
2410 | goto out_unlock; | 2544 | goto out_unlock; |
2545 | error = mnt_want_write(nd.path.mnt); | ||
2546 | if (error) | ||
2547 | goto out_dput; | ||
2411 | error = vfs_link(old_nd.path.dentry, nd.path.dentry->d_inode, new_dentry); | 2548 | error = vfs_link(old_nd.path.dentry, nd.path.dentry->d_inode, new_dentry); |
2549 | mnt_drop_write(nd.path.mnt); | ||
2550 | out_dput: | ||
2412 | dput(new_dentry); | 2551 | dput(new_dentry); |
2413 | out_unlock: | 2552 | out_unlock: |
2414 | mutex_unlock(&nd.path.dentry->d_inode->i_mutex); | 2553 | mutex_unlock(&nd.path.dentry->d_inode->i_mutex); |
@@ -2634,8 +2773,12 @@ static int do_rename(int olddfd, const char *oldname, | |||
2634 | if (new_dentry == trap) | 2773 | if (new_dentry == trap) |
2635 | goto exit5; | 2774 | goto exit5; |
2636 | 2775 | ||
2776 | error = mnt_want_write(oldnd.path.mnt); | ||
2777 | if (error) | ||
2778 | goto exit5; | ||
2637 | error = vfs_rename(old_dir->d_inode, old_dentry, | 2779 | error = vfs_rename(old_dir->d_inode, old_dentry, |
2638 | new_dir->d_inode, new_dentry); | 2780 | new_dir->d_inode, new_dentry); |
2781 | mnt_drop_write(oldnd.path.mnt); | ||
2639 | exit5: | 2782 | exit5: |
2640 | dput(new_dentry); | 2783 | dput(new_dentry); |
2641 | exit4: | 2784 | exit4: |
diff --git a/fs/namespace.c b/fs/namespace.c index 94f026ec990a..0505fb61aa74 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/quotaops.h> | 17 | #include <linux/quotaops.h> |
18 | #include <linux/acct.h> | 18 | #include <linux/acct.h> |
19 | #include <linux/capability.h> | 19 | #include <linux/capability.h> |
20 | #include <linux/cpumask.h> | ||
20 | #include <linux/module.h> | 21 | #include <linux/module.h> |
21 | #include <linux/sysfs.h> | 22 | #include <linux/sysfs.h> |
22 | #include <linux/seq_file.h> | 23 | #include <linux/seq_file.h> |
@@ -26,6 +27,7 @@ | |||
26 | #include <linux/mount.h> | 27 | #include <linux/mount.h> |
27 | #include <linux/ramfs.h> | 28 | #include <linux/ramfs.h> |
28 | #include <linux/log2.h> | 29 | #include <linux/log2.h> |
30 | #include <linux/idr.h> | ||
29 | #include <asm/uaccess.h> | 31 | #include <asm/uaccess.h> |
30 | #include <asm/unistd.h> | 32 | #include <asm/unistd.h> |
31 | #include "pnode.h" | 33 | #include "pnode.h" |
@@ -38,6 +40,8 @@ | |||
38 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); | 40 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); |
39 | 41 | ||
40 | static int event; | 42 | static int event; |
43 | static DEFINE_IDA(mnt_id_ida); | ||
44 | static DEFINE_IDA(mnt_group_ida); | ||
41 | 45 | ||
42 | static struct list_head *mount_hashtable __read_mostly; | 46 | static struct list_head *mount_hashtable __read_mostly; |
43 | static struct kmem_cache *mnt_cache __read_mostly; | 47 | static struct kmem_cache *mnt_cache __read_mostly; |
@@ -55,10 +59,65 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) | |||
55 | return tmp & (HASH_SIZE - 1); | 59 | return tmp & (HASH_SIZE - 1); |
56 | } | 60 | } |
57 | 61 | ||
62 | #define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16) | ||
63 | |||
64 | /* allocation is serialized by namespace_sem */ | ||
65 | static int mnt_alloc_id(struct vfsmount *mnt) | ||
66 | { | ||
67 | int res; | ||
68 | |||
69 | retry: | ||
70 | ida_pre_get(&mnt_id_ida, GFP_KERNEL); | ||
71 | spin_lock(&vfsmount_lock); | ||
72 | res = ida_get_new(&mnt_id_ida, &mnt->mnt_id); | ||
73 | spin_unlock(&vfsmount_lock); | ||
74 | if (res == -EAGAIN) | ||
75 | goto retry; | ||
76 | |||
77 | return res; | ||
78 | } | ||
79 | |||
80 | static void mnt_free_id(struct vfsmount *mnt) | ||
81 | { | ||
82 | spin_lock(&vfsmount_lock); | ||
83 | ida_remove(&mnt_id_ida, mnt->mnt_id); | ||
84 | spin_unlock(&vfsmount_lock); | ||
85 | } | ||
86 | |||
87 | /* | ||
88 | * Allocate a new peer group ID | ||
89 | * | ||
90 | * mnt_group_ida is protected by namespace_sem | ||
91 | */ | ||
92 | static int mnt_alloc_group_id(struct vfsmount *mnt) | ||
93 | { | ||
94 | if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL)) | ||
95 | return -ENOMEM; | ||
96 | |||
97 | return ida_get_new_above(&mnt_group_ida, 1, &mnt->mnt_group_id); | ||
98 | } | ||
99 | |||
100 | /* | ||
101 | * Release a peer group ID | ||
102 | */ | ||
103 | void mnt_release_group_id(struct vfsmount *mnt) | ||
104 | { | ||
105 | ida_remove(&mnt_group_ida, mnt->mnt_group_id); | ||
106 | mnt->mnt_group_id = 0; | ||
107 | } | ||
108 | |||
58 | struct vfsmount *alloc_vfsmnt(const char *name) | 109 | struct vfsmount *alloc_vfsmnt(const char *name) |
59 | { | 110 | { |
60 | struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); | 111 | struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); |
61 | if (mnt) { | 112 | if (mnt) { |
113 | int err; | ||
114 | |||
115 | err = mnt_alloc_id(mnt); | ||
116 | if (err) { | ||
117 | kmem_cache_free(mnt_cache, mnt); | ||
118 | return NULL; | ||
119 | } | ||
120 | |||
62 | atomic_set(&mnt->mnt_count, 1); | 121 | atomic_set(&mnt->mnt_count, 1); |
63 | INIT_LIST_HEAD(&mnt->mnt_hash); | 122 | INIT_LIST_HEAD(&mnt->mnt_hash); |
64 | INIT_LIST_HEAD(&mnt->mnt_child); | 123 | INIT_LIST_HEAD(&mnt->mnt_child); |
@@ -68,6 +127,7 @@ struct vfsmount *alloc_vfsmnt(const char *name) | |||
68 | INIT_LIST_HEAD(&mnt->mnt_share); | 127 | INIT_LIST_HEAD(&mnt->mnt_share); |
69 | INIT_LIST_HEAD(&mnt->mnt_slave_list); | 128 | INIT_LIST_HEAD(&mnt->mnt_slave_list); |
70 | INIT_LIST_HEAD(&mnt->mnt_slave); | 129 | INIT_LIST_HEAD(&mnt->mnt_slave); |
130 | atomic_set(&mnt->__mnt_writers, 0); | ||
71 | if (name) { | 131 | if (name) { |
72 | int size = strlen(name) + 1; | 132 | int size = strlen(name) + 1; |
73 | char *newname = kmalloc(size, GFP_KERNEL); | 133 | char *newname = kmalloc(size, GFP_KERNEL); |
@@ -80,6 +140,263 @@ struct vfsmount *alloc_vfsmnt(const char *name) | |||
80 | return mnt; | 140 | return mnt; |
81 | } | 141 | } |
82 | 142 | ||
143 | /* | ||
144 | * Most r/o checks on a fs are for operations that take | ||
145 | * discrete amounts of time, like a write() or unlink(). | ||
146 | * We must keep track of when those operations start | ||
147 | * (for permission checks) and when they end, so that | ||
148 | * we can determine when writes are able to occur to | ||
149 | * a filesystem. | ||
150 | */ | ||
151 | /* | ||
152 | * __mnt_is_readonly: check whether a mount is read-only | ||
153 | * @mnt: the mount to check for its write status | ||
154 | * | ||
155 | * This shouldn't be used directly ouside of the VFS. | ||
156 | * It does not guarantee that the filesystem will stay | ||
157 | * r/w, just that it is right *now*. This can not and | ||
158 | * should not be used in place of IS_RDONLY(inode). | ||
159 | * mnt_want/drop_write() will _keep_ the filesystem | ||
160 | * r/w. | ||
161 | */ | ||
162 | int __mnt_is_readonly(struct vfsmount *mnt) | ||
163 | { | ||
164 | if (mnt->mnt_flags & MNT_READONLY) | ||
165 | return 1; | ||
166 | if (mnt->mnt_sb->s_flags & MS_RDONLY) | ||
167 | return 1; | ||
168 | return 0; | ||
169 | } | ||
170 | EXPORT_SYMBOL_GPL(__mnt_is_readonly); | ||
171 | |||
172 | struct mnt_writer { | ||
173 | /* | ||
174 | * If holding multiple instances of this lock, they | ||
175 | * must be ordered by cpu number. | ||
176 | */ | ||
177 | spinlock_t lock; | ||
178 | struct lock_class_key lock_class; /* compiles out with !lockdep */ | ||
179 | unsigned long count; | ||
180 | struct vfsmount *mnt; | ||
181 | } ____cacheline_aligned_in_smp; | ||
182 | static DEFINE_PER_CPU(struct mnt_writer, mnt_writers); | ||
183 | |||
184 | static int __init init_mnt_writers(void) | ||
185 | { | ||
186 | int cpu; | ||
187 | for_each_possible_cpu(cpu) { | ||
188 | struct mnt_writer *writer = &per_cpu(mnt_writers, cpu); | ||
189 | spin_lock_init(&writer->lock); | ||
190 | lockdep_set_class(&writer->lock, &writer->lock_class); | ||
191 | writer->count = 0; | ||
192 | } | ||
193 | return 0; | ||
194 | } | ||
195 | fs_initcall(init_mnt_writers); | ||
196 | |||
197 | static void unlock_mnt_writers(void) | ||
198 | { | ||
199 | int cpu; | ||
200 | struct mnt_writer *cpu_writer; | ||
201 | |||
202 | for_each_possible_cpu(cpu) { | ||
203 | cpu_writer = &per_cpu(mnt_writers, cpu); | ||
204 | spin_unlock(&cpu_writer->lock); | ||
205 | } | ||
206 | } | ||
207 | |||
208 | static inline void __clear_mnt_count(struct mnt_writer *cpu_writer) | ||
209 | { | ||
210 | if (!cpu_writer->mnt) | ||
211 | return; | ||
212 | /* | ||
213 | * This is in case anyone ever leaves an invalid, | ||
214 | * old ->mnt and a count of 0. | ||
215 | */ | ||
216 | if (!cpu_writer->count) | ||
217 | return; | ||
218 | atomic_add(cpu_writer->count, &cpu_writer->mnt->__mnt_writers); | ||
219 | cpu_writer->count = 0; | ||
220 | } | ||
221 | /* | ||
222 | * must hold cpu_writer->lock | ||
223 | */ | ||
224 | static inline void use_cpu_writer_for_mount(struct mnt_writer *cpu_writer, | ||
225 | struct vfsmount *mnt) | ||
226 | { | ||
227 | if (cpu_writer->mnt == mnt) | ||
228 | return; | ||
229 | __clear_mnt_count(cpu_writer); | ||
230 | cpu_writer->mnt = mnt; | ||
231 | } | ||
232 | |||
233 | /* | ||
234 | * Most r/o checks on a fs are for operations that take | ||
235 | * discrete amounts of time, like a write() or unlink(). | ||
236 | * We must keep track of when those operations start | ||
237 | * (for permission checks) and when they end, so that | ||
238 | * we can determine when writes are able to occur to | ||
239 | * a filesystem. | ||
240 | */ | ||
241 | /** | ||
242 | * mnt_want_write - get write access to a mount | ||
243 | * @mnt: the mount on which to take a write | ||
244 | * | ||
245 | * This tells the low-level filesystem that a write is | ||
246 | * about to be performed to it, and makes sure that | ||
247 | * writes are allowed before returning success. When | ||
248 | * the write operation is finished, mnt_drop_write() | ||
249 | * must be called. This is effectively a refcount. | ||
250 | */ | ||
251 | int mnt_want_write(struct vfsmount *mnt) | ||
252 | { | ||
253 | int ret = 0; | ||
254 | struct mnt_writer *cpu_writer; | ||
255 | |||
256 | cpu_writer = &get_cpu_var(mnt_writers); | ||
257 | spin_lock(&cpu_writer->lock); | ||
258 | if (__mnt_is_readonly(mnt)) { | ||
259 | ret = -EROFS; | ||
260 | goto out; | ||
261 | } | ||
262 | use_cpu_writer_for_mount(cpu_writer, mnt); | ||
263 | cpu_writer->count++; | ||
264 | out: | ||
265 | spin_unlock(&cpu_writer->lock); | ||
266 | put_cpu_var(mnt_writers); | ||
267 | return ret; | ||
268 | } | ||
269 | EXPORT_SYMBOL_GPL(mnt_want_write); | ||
270 | |||
271 | static void lock_mnt_writers(void) | ||
272 | { | ||
273 | int cpu; | ||
274 | struct mnt_writer *cpu_writer; | ||
275 | |||
276 | for_each_possible_cpu(cpu) { | ||
277 | cpu_writer = &per_cpu(mnt_writers, cpu); | ||
278 | spin_lock(&cpu_writer->lock); | ||
279 | __clear_mnt_count(cpu_writer); | ||
280 | cpu_writer->mnt = NULL; | ||
281 | } | ||
282 | } | ||
283 | |||
284 | /* | ||
285 | * These per-cpu write counts are not guaranteed to have | ||
286 | * matched increments and decrements on any given cpu. | ||
287 | * A file open()ed for write on one cpu and close()d on | ||
288 | * another cpu will imbalance this count. Make sure it | ||
289 | * does not get too far out of whack. | ||
290 | */ | ||
291 | static void handle_write_count_underflow(struct vfsmount *mnt) | ||
292 | { | ||
293 | if (atomic_read(&mnt->__mnt_writers) >= | ||
294 | MNT_WRITER_UNDERFLOW_LIMIT) | ||
295 | return; | ||
296 | /* | ||
297 | * It isn't necessary to hold all of the locks | ||
298 | * at the same time, but doing it this way makes | ||
299 | * us share a lot more code. | ||
300 | */ | ||
301 | lock_mnt_writers(); | ||
302 | /* | ||
303 | * vfsmount_lock is for mnt_flags. | ||
304 | */ | ||
305 | spin_lock(&vfsmount_lock); | ||
306 | /* | ||
307 | * If coalescing the per-cpu writer counts did not | ||
308 | * get us back to a positive writer count, we have | ||
309 | * a bug. | ||
310 | */ | ||
311 | if ((atomic_read(&mnt->__mnt_writers) < 0) && | ||
312 | !(mnt->mnt_flags & MNT_IMBALANCED_WRITE_COUNT)) { | ||
313 | printk(KERN_DEBUG "leak detected on mount(%p) writers " | ||
314 | "count: %d\n", | ||
315 | mnt, atomic_read(&mnt->__mnt_writers)); | ||
316 | WARN_ON(1); | ||
317 | /* use the flag to keep the dmesg spam down */ | ||
318 | mnt->mnt_flags |= MNT_IMBALANCED_WRITE_COUNT; | ||
319 | } | ||
320 | spin_unlock(&vfsmount_lock); | ||
321 | unlock_mnt_writers(); | ||
322 | } | ||
323 | |||
324 | /** | ||
325 | * mnt_drop_write - give up write access to a mount | ||
326 | * @mnt: the mount on which to give up write access | ||
327 | * | ||
328 | * Tells the low-level filesystem that we are done | ||
329 | * performing writes to it. Must be matched with | ||
330 | * mnt_want_write() call above. | ||
331 | */ | ||
332 | void mnt_drop_write(struct vfsmount *mnt) | ||
333 | { | ||
334 | int must_check_underflow = 0; | ||
335 | struct mnt_writer *cpu_writer; | ||
336 | |||
337 | cpu_writer = &get_cpu_var(mnt_writers); | ||
338 | spin_lock(&cpu_writer->lock); | ||
339 | |||
340 | use_cpu_writer_for_mount(cpu_writer, mnt); | ||
341 | if (cpu_writer->count > 0) { | ||
342 | cpu_writer->count--; | ||
343 | } else { | ||
344 | must_check_underflow = 1; | ||
345 | atomic_dec(&mnt->__mnt_writers); | ||
346 | } | ||
347 | |||
348 | spin_unlock(&cpu_writer->lock); | ||
349 | /* | ||
350 | * Logically, we could call this each time, | ||
351 | * but the __mnt_writers cacheline tends to | ||
352 | * be cold, and makes this expensive. | ||
353 | */ | ||
354 | if (must_check_underflow) | ||
355 | handle_write_count_underflow(mnt); | ||
356 | /* | ||
357 | * This could be done right after the spinlock | ||
358 | * is taken because the spinlock keeps us on | ||
359 | * the cpu, and disables preemption. However, | ||
360 | * putting it here bounds the amount that | ||
361 | * __mnt_writers can underflow. Without it, | ||
362 | * we could theoretically wrap __mnt_writers. | ||
363 | */ | ||
364 | put_cpu_var(mnt_writers); | ||
365 | } | ||
366 | EXPORT_SYMBOL_GPL(mnt_drop_write); | ||
367 | |||
368 | static int mnt_make_readonly(struct vfsmount *mnt) | ||
369 | { | ||
370 | int ret = 0; | ||
371 | |||
372 | lock_mnt_writers(); | ||
373 | /* | ||
374 | * With all the locks held, this value is stable | ||
375 | */ | ||
376 | if (atomic_read(&mnt->__mnt_writers) > 0) { | ||
377 | ret = -EBUSY; | ||
378 | goto out; | ||
379 | } | ||
380 | /* | ||
381 | * nobody can do a successful mnt_want_write() with all | ||
382 | * of the counts in MNT_DENIED_WRITE and the locks held. | ||
383 | */ | ||
384 | spin_lock(&vfsmount_lock); | ||
385 | if (!ret) | ||
386 | mnt->mnt_flags |= MNT_READONLY; | ||
387 | spin_unlock(&vfsmount_lock); | ||
388 | out: | ||
389 | unlock_mnt_writers(); | ||
390 | return ret; | ||
391 | } | ||
392 | |||
393 | static void __mnt_unmake_readonly(struct vfsmount *mnt) | ||
394 | { | ||
395 | spin_lock(&vfsmount_lock); | ||
396 | mnt->mnt_flags &= ~MNT_READONLY; | ||
397 | spin_unlock(&vfsmount_lock); | ||
398 | } | ||
399 | |||
83 | int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb) | 400 | int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb) |
84 | { | 401 | { |
85 | mnt->mnt_sb = sb; | 402 | mnt->mnt_sb = sb; |
@@ -92,6 +409,7 @@ EXPORT_SYMBOL(simple_set_mnt); | |||
92 | void free_vfsmnt(struct vfsmount *mnt) | 409 | void free_vfsmnt(struct vfsmount *mnt) |
93 | { | 410 | { |
94 | kfree(mnt->mnt_devname); | 411 | kfree(mnt->mnt_devname); |
412 | mnt_free_id(mnt); | ||
95 | kmem_cache_free(mnt_cache, mnt); | 413 | kmem_cache_free(mnt_cache, mnt); |
96 | } | 414 | } |
97 | 415 | ||
@@ -238,6 +556,17 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root, | |||
238 | struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname); | 556 | struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname); |
239 | 557 | ||
240 | if (mnt) { | 558 | if (mnt) { |
559 | if (flag & (CL_SLAVE | CL_PRIVATE)) | ||
560 | mnt->mnt_group_id = 0; /* not a peer of original */ | ||
561 | else | ||
562 | mnt->mnt_group_id = old->mnt_group_id; | ||
563 | |||
564 | if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) { | ||
565 | int err = mnt_alloc_group_id(mnt); | ||
566 | if (err) | ||
567 | goto out_free; | ||
568 | } | ||
569 | |||
241 | mnt->mnt_flags = old->mnt_flags; | 570 | mnt->mnt_flags = old->mnt_flags; |
242 | atomic_inc(&sb->s_active); | 571 | atomic_inc(&sb->s_active); |
243 | mnt->mnt_sb = sb; | 572 | mnt->mnt_sb = sb; |
@@ -267,11 +596,44 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root, | |||
267 | } | 596 | } |
268 | } | 597 | } |
269 | return mnt; | 598 | return mnt; |
599 | |||
600 | out_free: | ||
601 | free_vfsmnt(mnt); | ||
602 | return NULL; | ||
270 | } | 603 | } |
271 | 604 | ||
272 | static inline void __mntput(struct vfsmount *mnt) | 605 | static inline void __mntput(struct vfsmount *mnt) |
273 | { | 606 | { |
607 | int cpu; | ||
274 | struct super_block *sb = mnt->mnt_sb; | 608 | struct super_block *sb = mnt->mnt_sb; |
609 | /* | ||
610 | * We don't have to hold all of the locks at the | ||
611 | * same time here because we know that we're the | ||
612 | * last reference to mnt and that no new writers | ||
613 | * can come in. | ||
614 | */ | ||
615 | for_each_possible_cpu(cpu) { | ||
616 | struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu); | ||
617 | if (cpu_writer->mnt != mnt) | ||
618 | continue; | ||
619 | spin_lock(&cpu_writer->lock); | ||
620 | atomic_add(cpu_writer->count, &mnt->__mnt_writers); | ||
621 | cpu_writer->count = 0; | ||
622 | /* | ||
623 | * Might as well do this so that no one | ||
624 | * ever sees the pointer and expects | ||
625 | * it to be valid. | ||
626 | */ | ||
627 | cpu_writer->mnt = NULL; | ||
628 | spin_unlock(&cpu_writer->lock); | ||
629 | } | ||
630 | /* | ||
631 | * This probably indicates that somebody messed | ||
632 | * up a mnt_want/drop_write() pair. If this | ||
633 | * happens, the filesystem was probably unable | ||
634 | * to make r/w->r/o transitions. | ||
635 | */ | ||
636 | WARN_ON(atomic_read(&mnt->__mnt_writers)); | ||
275 | dput(mnt->mnt_root); | 637 | dput(mnt->mnt_root); |
276 | free_vfsmnt(mnt); | 638 | free_vfsmnt(mnt); |
277 | deactivate_super(sb); | 639 | deactivate_super(sb); |
@@ -362,20 +724,21 @@ void save_mount_options(struct super_block *sb, char *options) | |||
362 | } | 724 | } |
363 | EXPORT_SYMBOL(save_mount_options); | 725 | EXPORT_SYMBOL(save_mount_options); |
364 | 726 | ||
727 | #ifdef CONFIG_PROC_FS | ||
365 | /* iterator */ | 728 | /* iterator */ |
366 | static void *m_start(struct seq_file *m, loff_t *pos) | 729 | static void *m_start(struct seq_file *m, loff_t *pos) |
367 | { | 730 | { |
368 | struct mnt_namespace *n = m->private; | 731 | struct proc_mounts *p = m->private; |
369 | 732 | ||
370 | down_read(&namespace_sem); | 733 | down_read(&namespace_sem); |
371 | return seq_list_start(&n->list, *pos); | 734 | return seq_list_start(&p->ns->list, *pos); |
372 | } | 735 | } |
373 | 736 | ||
374 | static void *m_next(struct seq_file *m, void *v, loff_t *pos) | 737 | static void *m_next(struct seq_file *m, void *v, loff_t *pos) |
375 | { | 738 | { |
376 | struct mnt_namespace *n = m->private; | 739 | struct proc_mounts *p = m->private; |
377 | 740 | ||
378 | return seq_list_next(v, &n->list, pos); | 741 | return seq_list_next(v, &p->ns->list, pos); |
379 | } | 742 | } |
380 | 743 | ||
381 | static void m_stop(struct seq_file *m, void *v) | 744 | static void m_stop(struct seq_file *m, void *v) |
@@ -383,20 +746,30 @@ static void m_stop(struct seq_file *m, void *v) | |||
383 | up_read(&namespace_sem); | 746 | up_read(&namespace_sem); |
384 | } | 747 | } |
385 | 748 | ||
386 | static int show_vfsmnt(struct seq_file *m, void *v) | 749 | struct proc_fs_info { |
750 | int flag; | ||
751 | const char *str; | ||
752 | }; | ||
753 | |||
754 | static void show_sb_opts(struct seq_file *m, struct super_block *sb) | ||
387 | { | 755 | { |
388 | struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list); | 756 | static const struct proc_fs_info fs_info[] = { |
389 | int err = 0; | ||
390 | static struct proc_fs_info { | ||
391 | int flag; | ||
392 | char *str; | ||
393 | } fs_info[] = { | ||
394 | { MS_SYNCHRONOUS, ",sync" }, | 757 | { MS_SYNCHRONOUS, ",sync" }, |
395 | { MS_DIRSYNC, ",dirsync" }, | 758 | { MS_DIRSYNC, ",dirsync" }, |
396 | { MS_MANDLOCK, ",mand" }, | 759 | { MS_MANDLOCK, ",mand" }, |
397 | { 0, NULL } | 760 | { 0, NULL } |
398 | }; | 761 | }; |
399 | static struct proc_fs_info mnt_info[] = { | 762 | const struct proc_fs_info *fs_infop; |
763 | |||
764 | for (fs_infop = fs_info; fs_infop->flag; fs_infop++) { | ||
765 | if (sb->s_flags & fs_infop->flag) | ||
766 | seq_puts(m, fs_infop->str); | ||
767 | } | ||
768 | } | ||
769 | |||
770 | static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt) | ||
771 | { | ||
772 | static const struct proc_fs_info mnt_info[] = { | ||
400 | { MNT_NOSUID, ",nosuid" }, | 773 | { MNT_NOSUID, ",nosuid" }, |
401 | { MNT_NODEV, ",nodev" }, | 774 | { MNT_NODEV, ",nodev" }, |
402 | { MNT_NOEXEC, ",noexec" }, | 775 | { MNT_NOEXEC, ",noexec" }, |
@@ -405,40 +778,108 @@ static int show_vfsmnt(struct seq_file *m, void *v) | |||
405 | { MNT_RELATIME, ",relatime" }, | 778 | { MNT_RELATIME, ",relatime" }, |
406 | { 0, NULL } | 779 | { 0, NULL } |
407 | }; | 780 | }; |
408 | struct proc_fs_info *fs_infop; | 781 | const struct proc_fs_info *fs_infop; |
782 | |||
783 | for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) { | ||
784 | if (mnt->mnt_flags & fs_infop->flag) | ||
785 | seq_puts(m, fs_infop->str); | ||
786 | } | ||
787 | } | ||
788 | |||
789 | static void show_type(struct seq_file *m, struct super_block *sb) | ||
790 | { | ||
791 | mangle(m, sb->s_type->name); | ||
792 | if (sb->s_subtype && sb->s_subtype[0]) { | ||
793 | seq_putc(m, '.'); | ||
794 | mangle(m, sb->s_subtype); | ||
795 | } | ||
796 | } | ||
797 | |||
798 | static int show_vfsmnt(struct seq_file *m, void *v) | ||
799 | { | ||
800 | struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list); | ||
801 | int err = 0; | ||
409 | struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt }; | 802 | struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt }; |
410 | 803 | ||
411 | mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); | 804 | mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); |
412 | seq_putc(m, ' '); | 805 | seq_putc(m, ' '); |
413 | seq_path(m, &mnt_path, " \t\n\\"); | 806 | seq_path(m, &mnt_path, " \t\n\\"); |
414 | seq_putc(m, ' '); | 807 | seq_putc(m, ' '); |
415 | mangle(m, mnt->mnt_sb->s_type->name); | 808 | show_type(m, mnt->mnt_sb); |
416 | if (mnt->mnt_sb->s_subtype && mnt->mnt_sb->s_subtype[0]) { | 809 | seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw"); |
417 | seq_putc(m, '.'); | 810 | show_sb_opts(m, mnt->mnt_sb); |
418 | mangle(m, mnt->mnt_sb->s_subtype); | 811 | show_mnt_opts(m, mnt); |
419 | } | ||
420 | seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw"); | ||
421 | for (fs_infop = fs_info; fs_infop->flag; fs_infop++) { | ||
422 | if (mnt->mnt_sb->s_flags & fs_infop->flag) | ||
423 | seq_puts(m, fs_infop->str); | ||
424 | } | ||
425 | for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) { | ||
426 | if (mnt->mnt_flags & fs_infop->flag) | ||
427 | seq_puts(m, fs_infop->str); | ||
428 | } | ||
429 | if (mnt->mnt_sb->s_op->show_options) | 812 | if (mnt->mnt_sb->s_op->show_options) |
430 | err = mnt->mnt_sb->s_op->show_options(m, mnt); | 813 | err = mnt->mnt_sb->s_op->show_options(m, mnt); |
431 | seq_puts(m, " 0 0\n"); | 814 | seq_puts(m, " 0 0\n"); |
432 | return err; | 815 | return err; |
433 | } | 816 | } |
434 | 817 | ||
435 | struct seq_operations mounts_op = { | 818 | const struct seq_operations mounts_op = { |
436 | .start = m_start, | 819 | .start = m_start, |
437 | .next = m_next, | 820 | .next = m_next, |
438 | .stop = m_stop, | 821 | .stop = m_stop, |
439 | .show = show_vfsmnt | 822 | .show = show_vfsmnt |
440 | }; | 823 | }; |
441 | 824 | ||
825 | static int show_mountinfo(struct seq_file *m, void *v) | ||
826 | { | ||
827 | struct proc_mounts *p = m->private; | ||
828 | struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list); | ||
829 | struct super_block *sb = mnt->mnt_sb; | ||
830 | struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt }; | ||
831 | struct path root = p->root; | ||
832 | int err = 0; | ||
833 | |||
834 | seq_printf(m, "%i %i %u:%u ", mnt->mnt_id, mnt->mnt_parent->mnt_id, | ||
835 | MAJOR(sb->s_dev), MINOR(sb->s_dev)); | ||
836 | seq_dentry(m, mnt->mnt_root, " \t\n\\"); | ||
837 | seq_putc(m, ' '); | ||
838 | seq_path_root(m, &mnt_path, &root, " \t\n\\"); | ||
839 | if (root.mnt != p->root.mnt || root.dentry != p->root.dentry) { | ||
840 | /* | ||
841 | * Mountpoint is outside root, discard that one. Ugly, | ||
842 | * but less so than trying to do that in iterator in a | ||
843 | * race-free way (due to renames). | ||
844 | */ | ||
845 | return SEQ_SKIP; | ||
846 | } | ||
847 | seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw"); | ||
848 | show_mnt_opts(m, mnt); | ||
849 | |||
850 | /* Tagged fields ("foo:X" or "bar") */ | ||
851 | if (IS_MNT_SHARED(mnt)) | ||
852 | seq_printf(m, " shared:%i", mnt->mnt_group_id); | ||
853 | if (IS_MNT_SLAVE(mnt)) { | ||
854 | int master = mnt->mnt_master->mnt_group_id; | ||
855 | int dom = get_dominating_id(mnt, &p->root); | ||
856 | seq_printf(m, " master:%i", master); | ||
857 | if (dom && dom != master) | ||
858 | seq_printf(m, " propagate_from:%i", dom); | ||
859 | } | ||
860 | if (IS_MNT_UNBINDABLE(mnt)) | ||
861 | seq_puts(m, " unbindable"); | ||
862 | |||
863 | /* Filesystem specific data */ | ||
864 | seq_puts(m, " - "); | ||
865 | show_type(m, sb); | ||
866 | seq_putc(m, ' '); | ||
867 | mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); | ||
868 | seq_puts(m, sb->s_flags & MS_RDONLY ? " ro" : " rw"); | ||
869 | show_sb_opts(m, sb); | ||
870 | if (sb->s_op->show_options) | ||
871 | err = sb->s_op->show_options(m, mnt); | ||
872 | seq_putc(m, '\n'); | ||
873 | return err; | ||
874 | } | ||
875 | |||
876 | const struct seq_operations mountinfo_op = { | ||
877 | .start = m_start, | ||
878 | .next = m_next, | ||
879 | .stop = m_stop, | ||
880 | .show = show_mountinfo, | ||
881 | }; | ||
882 | |||
442 | static int show_vfsstat(struct seq_file *m, void *v) | 883 | static int show_vfsstat(struct seq_file *m, void *v) |
443 | { | 884 | { |
444 | struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list); | 885 | struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list); |
@@ -459,7 +900,7 @@ static int show_vfsstat(struct seq_file *m, void *v) | |||
459 | 900 | ||
460 | /* file system type */ | 901 | /* file system type */ |
461 | seq_puts(m, "with fstype "); | 902 | seq_puts(m, "with fstype "); |
462 | mangle(m, mnt->mnt_sb->s_type->name); | 903 | show_type(m, mnt->mnt_sb); |
463 | 904 | ||
464 | /* optional statistics */ | 905 | /* optional statistics */ |
465 | if (mnt->mnt_sb->s_op->show_stats) { | 906 | if (mnt->mnt_sb->s_op->show_stats) { |
@@ -471,12 +912,13 @@ static int show_vfsstat(struct seq_file *m, void *v) | |||
471 | return err; | 912 | return err; |
472 | } | 913 | } |
473 | 914 | ||
474 | struct seq_operations mountstats_op = { | 915 | const struct seq_operations mountstats_op = { |
475 | .start = m_start, | 916 | .start = m_start, |
476 | .next = m_next, | 917 | .next = m_next, |
477 | .stop = m_stop, | 918 | .stop = m_stop, |
478 | .show = show_vfsstat, | 919 | .show = show_vfsstat, |
479 | }; | 920 | }; |
921 | #endif /* CONFIG_PROC_FS */ | ||
480 | 922 | ||
481 | /** | 923 | /** |
482 | * may_umount_tree - check if a mount tree is busy | 924 | * may_umount_tree - check if a mount tree is busy |
@@ -801,23 +1243,50 @@ Enomem: | |||
801 | struct vfsmount *collect_mounts(struct vfsmount *mnt, struct dentry *dentry) | 1243 | struct vfsmount *collect_mounts(struct vfsmount *mnt, struct dentry *dentry) |
802 | { | 1244 | { |
803 | struct vfsmount *tree; | 1245 | struct vfsmount *tree; |
804 | down_read(&namespace_sem); | 1246 | down_write(&namespace_sem); |
805 | tree = copy_tree(mnt, dentry, CL_COPY_ALL | CL_PRIVATE); | 1247 | tree = copy_tree(mnt, dentry, CL_COPY_ALL | CL_PRIVATE); |
806 | up_read(&namespace_sem); | 1248 | up_write(&namespace_sem); |
807 | return tree; | 1249 | return tree; |
808 | } | 1250 | } |
809 | 1251 | ||
810 | void drop_collected_mounts(struct vfsmount *mnt) | 1252 | void drop_collected_mounts(struct vfsmount *mnt) |
811 | { | 1253 | { |
812 | LIST_HEAD(umount_list); | 1254 | LIST_HEAD(umount_list); |
813 | down_read(&namespace_sem); | 1255 | down_write(&namespace_sem); |
814 | spin_lock(&vfsmount_lock); | 1256 | spin_lock(&vfsmount_lock); |
815 | umount_tree(mnt, 0, &umount_list); | 1257 | umount_tree(mnt, 0, &umount_list); |
816 | spin_unlock(&vfsmount_lock); | 1258 | spin_unlock(&vfsmount_lock); |
817 | up_read(&namespace_sem); | 1259 | up_write(&namespace_sem); |
818 | release_mounts(&umount_list); | 1260 | release_mounts(&umount_list); |
819 | } | 1261 | } |
820 | 1262 | ||
1263 | static void cleanup_group_ids(struct vfsmount *mnt, struct vfsmount *end) | ||
1264 | { | ||
1265 | struct vfsmount *p; | ||
1266 | |||
1267 | for (p = mnt; p != end; p = next_mnt(p, mnt)) { | ||
1268 | if (p->mnt_group_id && !IS_MNT_SHARED(p)) | ||
1269 | mnt_release_group_id(p); | ||
1270 | } | ||
1271 | } | ||
1272 | |||
1273 | static int invent_group_ids(struct vfsmount *mnt, bool recurse) | ||
1274 | { | ||
1275 | struct vfsmount *p; | ||
1276 | |||
1277 | for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) { | ||
1278 | if (!p->mnt_group_id && !IS_MNT_SHARED(p)) { | ||
1279 | int err = mnt_alloc_group_id(p); | ||
1280 | if (err) { | ||
1281 | cleanup_group_ids(mnt, p); | ||
1282 | return err; | ||
1283 | } | ||
1284 | } | ||
1285 | } | ||
1286 | |||
1287 | return 0; | ||
1288 | } | ||
1289 | |||
821 | /* | 1290 | /* |
822 | * @source_mnt : mount tree to be attached | 1291 | * @source_mnt : mount tree to be attached |
823 | * @nd : place the mount tree @source_mnt is attached | 1292 | * @nd : place the mount tree @source_mnt is attached |
@@ -888,9 +1357,16 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt, | |||
888 | struct vfsmount *dest_mnt = path->mnt; | 1357 | struct vfsmount *dest_mnt = path->mnt; |
889 | struct dentry *dest_dentry = path->dentry; | 1358 | struct dentry *dest_dentry = path->dentry; |
890 | struct vfsmount *child, *p; | 1359 | struct vfsmount *child, *p; |
1360 | int err; | ||
891 | 1361 | ||
892 | if (propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list)) | 1362 | if (IS_MNT_SHARED(dest_mnt)) { |
893 | return -EINVAL; | 1363 | err = invent_group_ids(source_mnt, true); |
1364 | if (err) | ||
1365 | goto out; | ||
1366 | } | ||
1367 | err = propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list); | ||
1368 | if (err) | ||
1369 | goto out_cleanup_ids; | ||
894 | 1370 | ||
895 | if (IS_MNT_SHARED(dest_mnt)) { | 1371 | if (IS_MNT_SHARED(dest_mnt)) { |
896 | for (p = source_mnt; p; p = next_mnt(p, source_mnt)) | 1372 | for (p = source_mnt; p; p = next_mnt(p, source_mnt)) |
@@ -913,34 +1389,40 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt, | |||
913 | } | 1389 | } |
914 | spin_unlock(&vfsmount_lock); | 1390 | spin_unlock(&vfsmount_lock); |
915 | return 0; | 1391 | return 0; |
1392 | |||
1393 | out_cleanup_ids: | ||
1394 | if (IS_MNT_SHARED(dest_mnt)) | ||
1395 | cleanup_group_ids(source_mnt, NULL); | ||
1396 | out: | ||
1397 | return err; | ||
916 | } | 1398 | } |
917 | 1399 | ||
918 | static int graft_tree(struct vfsmount *mnt, struct nameidata *nd) | 1400 | static int graft_tree(struct vfsmount *mnt, struct path *path) |
919 | { | 1401 | { |
920 | int err; | 1402 | int err; |
921 | if (mnt->mnt_sb->s_flags & MS_NOUSER) | 1403 | if (mnt->mnt_sb->s_flags & MS_NOUSER) |
922 | return -EINVAL; | 1404 | return -EINVAL; |
923 | 1405 | ||
924 | if (S_ISDIR(nd->path.dentry->d_inode->i_mode) != | 1406 | if (S_ISDIR(path->dentry->d_inode->i_mode) != |
925 | S_ISDIR(mnt->mnt_root->d_inode->i_mode)) | 1407 | S_ISDIR(mnt->mnt_root->d_inode->i_mode)) |
926 | return -ENOTDIR; | 1408 | return -ENOTDIR; |
927 | 1409 | ||
928 | err = -ENOENT; | 1410 | err = -ENOENT; |
929 | mutex_lock(&nd->path.dentry->d_inode->i_mutex); | 1411 | mutex_lock(&path->dentry->d_inode->i_mutex); |
930 | if (IS_DEADDIR(nd->path.dentry->d_inode)) | 1412 | if (IS_DEADDIR(path->dentry->d_inode)) |
931 | goto out_unlock; | 1413 | goto out_unlock; |
932 | 1414 | ||
933 | err = security_sb_check_sb(mnt, nd); | 1415 | err = security_sb_check_sb(mnt, path); |
934 | if (err) | 1416 | if (err) |
935 | goto out_unlock; | 1417 | goto out_unlock; |
936 | 1418 | ||
937 | err = -ENOENT; | 1419 | err = -ENOENT; |
938 | if (IS_ROOT(nd->path.dentry) || !d_unhashed(nd->path.dentry)) | 1420 | if (IS_ROOT(path->dentry) || !d_unhashed(path->dentry)) |
939 | err = attach_recursive_mnt(mnt, &nd->path, NULL); | 1421 | err = attach_recursive_mnt(mnt, path, NULL); |
940 | out_unlock: | 1422 | out_unlock: |
941 | mutex_unlock(&nd->path.dentry->d_inode->i_mutex); | 1423 | mutex_unlock(&path->dentry->d_inode->i_mutex); |
942 | if (!err) | 1424 | if (!err) |
943 | security_sb_post_addmount(mnt, nd); | 1425 | security_sb_post_addmount(mnt, path); |
944 | return err; | 1426 | return err; |
945 | } | 1427 | } |
946 | 1428 | ||
@@ -953,6 +1435,7 @@ static noinline int do_change_type(struct nameidata *nd, int flag) | |||
953 | struct vfsmount *m, *mnt = nd->path.mnt; | 1435 | struct vfsmount *m, *mnt = nd->path.mnt; |
954 | int recurse = flag & MS_REC; | 1436 | int recurse = flag & MS_REC; |
955 | int type = flag & ~MS_REC; | 1437 | int type = flag & ~MS_REC; |
1438 | int err = 0; | ||
956 | 1439 | ||
957 | if (!capable(CAP_SYS_ADMIN)) | 1440 | if (!capable(CAP_SYS_ADMIN)) |
958 | return -EPERM; | 1441 | return -EPERM; |
@@ -961,12 +1444,20 @@ static noinline int do_change_type(struct nameidata *nd, int flag) | |||
961 | return -EINVAL; | 1444 | return -EINVAL; |
962 | 1445 | ||
963 | down_write(&namespace_sem); | 1446 | down_write(&namespace_sem); |
1447 | if (type == MS_SHARED) { | ||
1448 | err = invent_group_ids(mnt, recurse); | ||
1449 | if (err) | ||
1450 | goto out_unlock; | ||
1451 | } | ||
1452 | |||
964 | spin_lock(&vfsmount_lock); | 1453 | spin_lock(&vfsmount_lock); |
965 | for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) | 1454 | for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) |
966 | change_mnt_propagation(m, type); | 1455 | change_mnt_propagation(m, type); |
967 | spin_unlock(&vfsmount_lock); | 1456 | spin_unlock(&vfsmount_lock); |
1457 | |||
1458 | out_unlock: | ||
968 | up_write(&namespace_sem); | 1459 | up_write(&namespace_sem); |
969 | return 0; | 1460 | return err; |
970 | } | 1461 | } |
971 | 1462 | ||
972 | /* | 1463 | /* |
@@ -1004,7 +1495,7 @@ static noinline int do_loopback(struct nameidata *nd, char *old_name, | |||
1004 | if (!mnt) | 1495 | if (!mnt) |
1005 | goto out; | 1496 | goto out; |
1006 | 1497 | ||
1007 | err = graft_tree(mnt, nd); | 1498 | err = graft_tree(mnt, &nd->path); |
1008 | if (err) { | 1499 | if (err) { |
1009 | LIST_HEAD(umount_list); | 1500 | LIST_HEAD(umount_list); |
1010 | spin_lock(&vfsmount_lock); | 1501 | spin_lock(&vfsmount_lock); |
@@ -1019,6 +1510,23 @@ out: | |||
1019 | return err; | 1510 | return err; |
1020 | } | 1511 | } |
1021 | 1512 | ||
1513 | static int change_mount_flags(struct vfsmount *mnt, int ms_flags) | ||
1514 | { | ||
1515 | int error = 0; | ||
1516 | int readonly_request = 0; | ||
1517 | |||
1518 | if (ms_flags & MS_RDONLY) | ||
1519 | readonly_request = 1; | ||
1520 | if (readonly_request == __mnt_is_readonly(mnt)) | ||
1521 | return 0; | ||
1522 | |||
1523 | if (readonly_request) | ||
1524 | error = mnt_make_readonly(mnt); | ||
1525 | else | ||
1526 | __mnt_unmake_readonly(mnt); | ||
1527 | return error; | ||
1528 | } | ||
1529 | |||
1022 | /* | 1530 | /* |
1023 | * change filesystem flags. dir should be a physical root of filesystem. | 1531 | * change filesystem flags. dir should be a physical root of filesystem. |
1024 | * If you've mounted a non-root directory somewhere and want to do remount | 1532 | * If you've mounted a non-root directory somewhere and want to do remount |
@@ -1041,7 +1549,10 @@ static noinline int do_remount(struct nameidata *nd, int flags, int mnt_flags, | |||
1041 | return -EINVAL; | 1549 | return -EINVAL; |
1042 | 1550 | ||
1043 | down_write(&sb->s_umount); | 1551 | down_write(&sb->s_umount); |
1044 | err = do_remount_sb(sb, flags, data, 0); | 1552 | if (flags & MS_BIND) |
1553 | err = change_mount_flags(nd->path.mnt, flags); | ||
1554 | else | ||
1555 | err = do_remount_sb(sb, flags, data, 0); | ||
1045 | if (!err) | 1556 | if (!err) |
1046 | nd->path.mnt->mnt_flags = mnt_flags; | 1557 | nd->path.mnt->mnt_flags = mnt_flags; |
1047 | up_write(&sb->s_umount); | 1558 | up_write(&sb->s_umount); |
@@ -1191,7 +1702,7 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd, | |||
1191 | goto unlock; | 1702 | goto unlock; |
1192 | 1703 | ||
1193 | newmnt->mnt_flags = mnt_flags; | 1704 | newmnt->mnt_flags = mnt_flags; |
1194 | if ((err = graft_tree(newmnt, nd))) | 1705 | if ((err = graft_tree(newmnt, &nd->path))) |
1195 | goto unlock; | 1706 | goto unlock; |
1196 | 1707 | ||
1197 | if (fslist) /* add to the specified expiration list */ | 1708 | if (fslist) /* add to the specified expiration list */ |
@@ -1425,6 +1936,8 @@ long do_mount(char *dev_name, char *dir_name, char *type_page, | |||
1425 | mnt_flags |= MNT_NODIRATIME; | 1936 | mnt_flags |= MNT_NODIRATIME; |
1426 | if (flags & MS_RELATIME) | 1937 | if (flags & MS_RELATIME) |
1427 | mnt_flags |= MNT_RELATIME; | 1938 | mnt_flags |= MNT_RELATIME; |
1939 | if (flags & MS_RDONLY) | ||
1940 | mnt_flags |= MNT_READONLY; | ||
1428 | 1941 | ||
1429 | flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | | 1942 | flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | |
1430 | MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT); | 1943 | MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT); |
@@ -1434,7 +1947,8 @@ long do_mount(char *dev_name, char *dir_name, char *type_page, | |||
1434 | if (retval) | 1947 | if (retval) |
1435 | return retval; | 1948 | return retval; |
1436 | 1949 | ||
1437 | retval = security_sb_mount(dev_name, &nd, type_page, flags, data_page); | 1950 | retval = security_sb_mount(dev_name, &nd.path, |
1951 | type_page, flags, data_page); | ||
1438 | if (retval) | 1952 | if (retval) |
1439 | goto dput_out; | 1953 | goto dput_out; |
1440 | 1954 | ||
@@ -1674,15 +2188,13 @@ asmlinkage long sys_pivot_root(const char __user * new_root, | |||
1674 | const char __user * put_old) | 2188 | const char __user * put_old) |
1675 | { | 2189 | { |
1676 | struct vfsmount *tmp; | 2190 | struct vfsmount *tmp; |
1677 | struct nameidata new_nd, old_nd, user_nd; | 2191 | struct nameidata new_nd, old_nd; |
1678 | struct path parent_path, root_parent; | 2192 | struct path parent_path, root_parent, root; |
1679 | int error; | 2193 | int error; |
1680 | 2194 | ||
1681 | if (!capable(CAP_SYS_ADMIN)) | 2195 | if (!capable(CAP_SYS_ADMIN)) |
1682 | return -EPERM; | 2196 | return -EPERM; |
1683 | 2197 | ||
1684 | lock_kernel(); | ||
1685 | |||
1686 | error = __user_walk(new_root, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, | 2198 | error = __user_walk(new_root, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, |
1687 | &new_nd); | 2199 | &new_nd); |
1688 | if (error) | 2200 | if (error) |
@@ -1695,14 +2207,14 @@ asmlinkage long sys_pivot_root(const char __user * new_root, | |||
1695 | if (error) | 2207 | if (error) |
1696 | goto out1; | 2208 | goto out1; |
1697 | 2209 | ||
1698 | error = security_sb_pivotroot(&old_nd, &new_nd); | 2210 | error = security_sb_pivotroot(&old_nd.path, &new_nd.path); |
1699 | if (error) { | 2211 | if (error) { |
1700 | path_put(&old_nd.path); | 2212 | path_put(&old_nd.path); |
1701 | goto out1; | 2213 | goto out1; |
1702 | } | 2214 | } |
1703 | 2215 | ||
1704 | read_lock(¤t->fs->lock); | 2216 | read_lock(¤t->fs->lock); |
1705 | user_nd.path = current->fs->root; | 2217 | root = current->fs->root; |
1706 | path_get(¤t->fs->root); | 2218 | path_get(¤t->fs->root); |
1707 | read_unlock(¤t->fs->lock); | 2219 | read_unlock(¤t->fs->lock); |
1708 | down_write(&namespace_sem); | 2220 | down_write(&namespace_sem); |
@@ -1710,9 +2222,9 @@ asmlinkage long sys_pivot_root(const char __user * new_root, | |||
1710 | error = -EINVAL; | 2222 | error = -EINVAL; |
1711 | if (IS_MNT_SHARED(old_nd.path.mnt) || | 2223 | if (IS_MNT_SHARED(old_nd.path.mnt) || |
1712 | IS_MNT_SHARED(new_nd.path.mnt->mnt_parent) || | 2224 | IS_MNT_SHARED(new_nd.path.mnt->mnt_parent) || |
1713 | IS_MNT_SHARED(user_nd.path.mnt->mnt_parent)) | 2225 | IS_MNT_SHARED(root.mnt->mnt_parent)) |
1714 | goto out2; | 2226 | goto out2; |
1715 | if (!check_mnt(user_nd.path.mnt)) | 2227 | if (!check_mnt(root.mnt)) |
1716 | goto out2; | 2228 | goto out2; |
1717 | error = -ENOENT; | 2229 | error = -ENOENT; |
1718 | if (IS_DEADDIR(new_nd.path.dentry->d_inode)) | 2230 | if (IS_DEADDIR(new_nd.path.dentry->d_inode)) |
@@ -1722,13 +2234,13 @@ asmlinkage long sys_pivot_root(const char __user * new_root, | |||
1722 | if (d_unhashed(old_nd.path.dentry) && !IS_ROOT(old_nd.path.dentry)) | 2234 | if (d_unhashed(old_nd.path.dentry) && !IS_ROOT(old_nd.path.dentry)) |
1723 | goto out2; | 2235 | goto out2; |
1724 | error = -EBUSY; | 2236 | error = -EBUSY; |
1725 | if (new_nd.path.mnt == user_nd.path.mnt || | 2237 | if (new_nd.path.mnt == root.mnt || |
1726 | old_nd.path.mnt == user_nd.path.mnt) | 2238 | old_nd.path.mnt == root.mnt) |
1727 | goto out2; /* loop, on the same file system */ | 2239 | goto out2; /* loop, on the same file system */ |
1728 | error = -EINVAL; | 2240 | error = -EINVAL; |
1729 | if (user_nd.path.mnt->mnt_root != user_nd.path.dentry) | 2241 | if (root.mnt->mnt_root != root.dentry) |
1730 | goto out2; /* not a mountpoint */ | 2242 | goto out2; /* not a mountpoint */ |
1731 | if (user_nd.path.mnt->mnt_parent == user_nd.path.mnt) | 2243 | if (root.mnt->mnt_parent == root.mnt) |
1732 | goto out2; /* not attached */ | 2244 | goto out2; /* not attached */ |
1733 | if (new_nd.path.mnt->mnt_root != new_nd.path.dentry) | 2245 | if (new_nd.path.mnt->mnt_root != new_nd.path.dentry) |
1734 | goto out2; /* not a mountpoint */ | 2246 | goto out2; /* not a mountpoint */ |
@@ -1750,27 +2262,26 @@ asmlinkage long sys_pivot_root(const char __user * new_root, | |||
1750 | } else if (!is_subdir(old_nd.path.dentry, new_nd.path.dentry)) | 2262 | } else if (!is_subdir(old_nd.path.dentry, new_nd.path.dentry)) |
1751 | goto out3; | 2263 | goto out3; |
1752 | detach_mnt(new_nd.path.mnt, &parent_path); | 2264 | detach_mnt(new_nd.path.mnt, &parent_path); |
1753 | detach_mnt(user_nd.path.mnt, &root_parent); | 2265 | detach_mnt(root.mnt, &root_parent); |
1754 | /* mount old root on put_old */ | 2266 | /* mount old root on put_old */ |
1755 | attach_mnt(user_nd.path.mnt, &old_nd.path); | 2267 | attach_mnt(root.mnt, &old_nd.path); |
1756 | /* mount new_root on / */ | 2268 | /* mount new_root on / */ |
1757 | attach_mnt(new_nd.path.mnt, &root_parent); | 2269 | attach_mnt(new_nd.path.mnt, &root_parent); |
1758 | touch_mnt_namespace(current->nsproxy->mnt_ns); | 2270 | touch_mnt_namespace(current->nsproxy->mnt_ns); |
1759 | spin_unlock(&vfsmount_lock); | 2271 | spin_unlock(&vfsmount_lock); |
1760 | chroot_fs_refs(&user_nd.path, &new_nd.path); | 2272 | chroot_fs_refs(&root, &new_nd.path); |
1761 | security_sb_post_pivotroot(&user_nd, &new_nd); | 2273 | security_sb_post_pivotroot(&root, &new_nd.path); |
1762 | error = 0; | 2274 | error = 0; |
1763 | path_put(&root_parent); | 2275 | path_put(&root_parent); |
1764 | path_put(&parent_path); | 2276 | path_put(&parent_path); |
1765 | out2: | 2277 | out2: |
1766 | mutex_unlock(&old_nd.path.dentry->d_inode->i_mutex); | 2278 | mutex_unlock(&old_nd.path.dentry->d_inode->i_mutex); |
1767 | up_write(&namespace_sem); | 2279 | up_write(&namespace_sem); |
1768 | path_put(&user_nd.path); | 2280 | path_put(&root); |
1769 | path_put(&old_nd.path); | 2281 | path_put(&old_nd.path); |
1770 | out1: | 2282 | out1: |
1771 | path_put(&new_nd.path); | 2283 | path_put(&new_nd.path); |
1772 | out0: | 2284 | out0: |
1773 | unlock_kernel(); | ||
1774 | return error; | 2285 | return error; |
1775 | out3: | 2286 | out3: |
1776 | spin_unlock(&vfsmount_lock); | 2287 | spin_unlock(&vfsmount_lock); |
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index c67b4bdcf719..ad8f167e54bc 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/ioctl.h> | 14 | #include <linux/ioctl.h> |
15 | #include <linux/time.h> | 15 | #include <linux/time.h> |
16 | #include <linux/mm.h> | 16 | #include <linux/mm.h> |
17 | #include <linux/mount.h> | ||
17 | #include <linux/highuid.h> | 18 | #include <linux/highuid.h> |
18 | #include <linux/smp_lock.h> | 19 | #include <linux/smp_lock.h> |
19 | #include <linux/vmalloc.h> | 20 | #include <linux/vmalloc.h> |
@@ -261,7 +262,7 @@ ncp_get_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg) | |||
261 | } | 262 | } |
262 | #endif /* CONFIG_NCPFS_NLS */ | 263 | #endif /* CONFIG_NCPFS_NLS */ |
263 | 264 | ||
264 | int ncp_ioctl(struct inode *inode, struct file *filp, | 265 | static int __ncp_ioctl(struct inode *inode, struct file *filp, |
265 | unsigned int cmd, unsigned long arg) | 266 | unsigned int cmd, unsigned long arg) |
266 | { | 267 | { |
267 | struct ncp_server *server = NCP_SERVER(inode); | 268 | struct ncp_server *server = NCP_SERVER(inode); |
@@ -822,6 +823,57 @@ outrel: | |||
822 | return -EINVAL; | 823 | return -EINVAL; |
823 | } | 824 | } |
824 | 825 | ||
826 | static int ncp_ioctl_need_write(unsigned int cmd) | ||
827 | { | ||
828 | switch (cmd) { | ||
829 | case NCP_IOC_GET_FS_INFO: | ||
830 | case NCP_IOC_GET_FS_INFO_V2: | ||
831 | case NCP_IOC_NCPREQUEST: | ||
832 | case NCP_IOC_SETDENTRYTTL: | ||
833 | case NCP_IOC_SIGN_INIT: | ||
834 | case NCP_IOC_LOCKUNLOCK: | ||
835 | case NCP_IOC_SET_SIGN_WANTED: | ||
836 | return 1; | ||
837 | case NCP_IOC_GETOBJECTNAME: | ||
838 | case NCP_IOC_SETOBJECTNAME: | ||
839 | case NCP_IOC_GETPRIVATEDATA: | ||
840 | case NCP_IOC_SETPRIVATEDATA: | ||
841 | case NCP_IOC_SETCHARSETS: | ||
842 | case NCP_IOC_GETCHARSETS: | ||
843 | case NCP_IOC_CONN_LOGGED_IN: | ||
844 | case NCP_IOC_GETDENTRYTTL: | ||
845 | case NCP_IOC_GETMOUNTUID2: | ||
846 | case NCP_IOC_SIGN_WANTED: | ||
847 | case NCP_IOC_GETROOT: | ||
848 | case NCP_IOC_SETROOT: | ||
849 | return 0; | ||
850 | default: | ||
851 | /* unkown IOCTL command, assume write */ | ||
852 | return 1; | ||
853 | } | ||
854 | } | ||
855 | |||
856 | int ncp_ioctl(struct inode *inode, struct file *filp, | ||
857 | unsigned int cmd, unsigned long arg) | ||
858 | { | ||
859 | int ret; | ||
860 | |||
861 | if (ncp_ioctl_need_write(cmd)) { | ||
862 | /* | ||
863 | * inside the ioctl(), any failures which | ||
864 | * are because of file_permission() are | ||
865 | * -EACCESS, so it seems consistent to keep | ||
866 | * that here. | ||
867 | */ | ||
868 | if (mnt_want_write(filp->f_path.mnt)) | ||
869 | return -EACCES; | ||
870 | } | ||
871 | ret = __ncp_ioctl(inode, filp, cmd, arg); | ||
872 | if (ncp_ioctl_need_write(cmd)) | ||
873 | mnt_drop_write(filp->f_path.mnt); | ||
874 | return ret; | ||
875 | } | ||
876 | |||
825 | #ifdef CONFIG_COMPAT | 877 | #ifdef CONFIG_COMPAT |
826 | long ncp_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | 878 | long ncp_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) |
827 | { | 879 | { |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 6cea7479c5b4..d9e30ac2798d 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -967,7 +967,8 @@ static int is_atomic_open(struct inode *dir, struct nameidata *nd) | |||
967 | if (nd->flags & LOOKUP_DIRECTORY) | 967 | if (nd->flags & LOOKUP_DIRECTORY) |
968 | return 0; | 968 | return 0; |
969 | /* Are we trying to write to a read only partition? */ | 969 | /* Are we trying to write to a read only partition? */ |
970 | if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) | 970 | if (__mnt_is_readonly(nd->path.mnt) && |
971 | (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) | ||
971 | return 0; | 972 | return 0; |
972 | return 1; | 973 | return 1; |
973 | } | 974 | } |
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index c593db047d8b..c309c881bd4e 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
@@ -658,14 +658,19 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
658 | return status; | 658 | return status; |
659 | } | 659 | } |
660 | } | 660 | } |
661 | status = mnt_want_write(cstate->current_fh.fh_export->ex_path.mnt); | ||
662 | if (status) | ||
663 | return status; | ||
661 | status = nfs_ok; | 664 | status = nfs_ok; |
662 | if (setattr->sa_acl != NULL) | 665 | if (setattr->sa_acl != NULL) |
663 | status = nfsd4_set_nfs4_acl(rqstp, &cstate->current_fh, | 666 | status = nfsd4_set_nfs4_acl(rqstp, &cstate->current_fh, |
664 | setattr->sa_acl); | 667 | setattr->sa_acl); |
665 | if (status) | 668 | if (status) |
666 | return status; | 669 | goto out; |
667 | status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr, | 670 | status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr, |
668 | 0, (time_t)0); | 671 | 0, (time_t)0); |
672 | out: | ||
673 | mnt_drop_write(cstate->current_fh.fh_export->ex_path.mnt); | ||
669 | return status; | 674 | return status; |
670 | } | 675 | } |
671 | 676 | ||
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 1ff90625860f..145b3c877a27 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c | |||
@@ -46,6 +46,7 @@ | |||
46 | #include <linux/scatterlist.h> | 46 | #include <linux/scatterlist.h> |
47 | #include <linux/crypto.h> | 47 | #include <linux/crypto.h> |
48 | #include <linux/sched.h> | 48 | #include <linux/sched.h> |
49 | #include <linux/mount.h> | ||
49 | 50 | ||
50 | #define NFSDDBG_FACILITY NFSDDBG_PROC | 51 | #define NFSDDBG_FACILITY NFSDDBG_PROC |
51 | 52 | ||
@@ -154,7 +155,11 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) | |||
154 | dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n"); | 155 | dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n"); |
155 | goto out_put; | 156 | goto out_put; |
156 | } | 157 | } |
158 | status = mnt_want_write(rec_dir.path.mnt); | ||
159 | if (status) | ||
160 | goto out_put; | ||
157 | status = vfs_mkdir(rec_dir.path.dentry->d_inode, dentry, S_IRWXU); | 161 | status = vfs_mkdir(rec_dir.path.dentry->d_inode, dentry, S_IRWXU); |
162 | mnt_drop_write(rec_dir.path.mnt); | ||
158 | out_put: | 163 | out_put: |
159 | dput(dentry); | 164 | dput(dentry); |
160 | out_unlock: | 165 | out_unlock: |
@@ -313,12 +318,17 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp) | |||
313 | if (!rec_dir_init || !clp->cl_firststate) | 318 | if (!rec_dir_init || !clp->cl_firststate) |
314 | return; | 319 | return; |
315 | 320 | ||
321 | status = mnt_want_write(rec_dir.path.mnt); | ||
322 | if (status) | ||
323 | goto out; | ||
316 | clp->cl_firststate = 0; | 324 | clp->cl_firststate = 0; |
317 | nfs4_save_user(&uid, &gid); | 325 | nfs4_save_user(&uid, &gid); |
318 | status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1); | 326 | status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1); |
319 | nfs4_reset_user(uid, gid); | 327 | nfs4_reset_user(uid, gid); |
320 | if (status == 0) | 328 | if (status == 0) |
321 | nfsd4_sync_rec_dir(); | 329 | nfsd4_sync_rec_dir(); |
330 | mnt_drop_write(rec_dir.path.mnt); | ||
331 | out: | ||
322 | if (status) | 332 | if (status) |
323 | printk("NFSD: Failed to remove expired client state directory" | 333 | printk("NFSD: Failed to remove expired client state directory" |
324 | " %.*s\n", HEXDIR_LEN, clp->cl_recdir); | 334 | " %.*s\n", HEXDIR_LEN, clp->cl_recdir); |
@@ -347,13 +357,17 @@ nfsd4_recdir_purge_old(void) { | |||
347 | 357 | ||
348 | if (!rec_dir_init) | 358 | if (!rec_dir_init) |
349 | return; | 359 | return; |
360 | status = mnt_want_write(rec_dir.path.mnt); | ||
361 | if (status) | ||
362 | goto out; | ||
350 | status = nfsd4_list_rec_dir(rec_dir.path.dentry, purge_old); | 363 | status = nfsd4_list_rec_dir(rec_dir.path.dentry, purge_old); |
351 | if (status == 0) | 364 | if (status == 0) |
352 | nfsd4_sync_rec_dir(); | 365 | nfsd4_sync_rec_dir(); |
366 | mnt_drop_write(rec_dir.path.mnt); | ||
367 | out: | ||
353 | if (status) | 368 | if (status) |
354 | printk("nfsd4: failed to purge old clients from recovery" | 369 | printk("nfsd4: failed to purge old clients from recovery" |
355 | " directory %s\n", rec_dir.path.dentry->d_name.name); | 370 | " directory %s\n", rec_dir.path.dentry->d_name.name); |
356 | return; | ||
357 | } | 371 | } |
358 | 372 | ||
359 | static int | 373 | static int |
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index bcb97d8e8b8b..81a75f3081f4 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <linux/sunrpc/svc.h> | 41 | #include <linux/sunrpc/svc.h> |
42 | #include <linux/nfsd/nfsd.h> | 42 | #include <linux/nfsd/nfsd.h> |
43 | #include <linux/nfsd/cache.h> | 43 | #include <linux/nfsd/cache.h> |
44 | #include <linux/file.h> | ||
44 | #include <linux/mount.h> | 45 | #include <linux/mount.h> |
45 | #include <linux/workqueue.h> | 46 | #include <linux/workqueue.h> |
46 | #include <linux/smp_lock.h> | 47 | #include <linux/smp_lock.h> |
@@ -1239,7 +1240,7 @@ static inline void | |||
1239 | nfs4_file_downgrade(struct file *filp, unsigned int share_access) | 1240 | nfs4_file_downgrade(struct file *filp, unsigned int share_access) |
1240 | { | 1241 | { |
1241 | if (share_access & NFS4_SHARE_ACCESS_WRITE) { | 1242 | if (share_access & NFS4_SHARE_ACCESS_WRITE) { |
1242 | put_write_access(filp->f_path.dentry->d_inode); | 1243 | drop_file_write_access(filp); |
1243 | filp->f_mode = (filp->f_mode | FMODE_READ) & ~FMODE_WRITE; | 1244 | filp->f_mode = (filp->f_mode | FMODE_READ) & ~FMODE_WRITE; |
1244 | } | 1245 | } |
1245 | } | 1246 | } |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 46f59d5365a0..304bf5f643c9 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -1255,23 +1255,35 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
1255 | err = 0; | 1255 | err = 0; |
1256 | switch (type) { | 1256 | switch (type) { |
1257 | case S_IFREG: | 1257 | case S_IFREG: |
1258 | host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); | ||
1259 | if (host_err) | ||
1260 | goto out_nfserr; | ||
1258 | host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); | 1261 | host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); |
1259 | break; | 1262 | break; |
1260 | case S_IFDIR: | 1263 | case S_IFDIR: |
1264 | host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); | ||
1265 | if (host_err) | ||
1266 | goto out_nfserr; | ||
1261 | host_err = vfs_mkdir(dirp, dchild, iap->ia_mode); | 1267 | host_err = vfs_mkdir(dirp, dchild, iap->ia_mode); |
1262 | break; | 1268 | break; |
1263 | case S_IFCHR: | 1269 | case S_IFCHR: |
1264 | case S_IFBLK: | 1270 | case S_IFBLK: |
1265 | case S_IFIFO: | 1271 | case S_IFIFO: |
1266 | case S_IFSOCK: | 1272 | case S_IFSOCK: |
1273 | host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); | ||
1274 | if (host_err) | ||
1275 | goto out_nfserr; | ||
1267 | host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); | 1276 | host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); |
1268 | break; | 1277 | break; |
1269 | default: | 1278 | default: |
1270 | printk("nfsd: bad file type %o in nfsd_create\n", type); | 1279 | printk("nfsd: bad file type %o in nfsd_create\n", type); |
1271 | host_err = -EINVAL; | 1280 | host_err = -EINVAL; |
1281 | goto out_nfserr; | ||
1272 | } | 1282 | } |
1273 | if (host_err < 0) | 1283 | if (host_err < 0) { |
1284 | mnt_drop_write(fhp->fh_export->ex_path.mnt); | ||
1274 | goto out_nfserr; | 1285 | goto out_nfserr; |
1286 | } | ||
1275 | 1287 | ||
1276 | if (EX_ISSYNC(fhp->fh_export)) { | 1288 | if (EX_ISSYNC(fhp->fh_export)) { |
1277 | err = nfserrno(nfsd_sync_dir(dentry)); | 1289 | err = nfserrno(nfsd_sync_dir(dentry)); |
@@ -1282,6 +1294,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
1282 | err2 = nfsd_create_setattr(rqstp, resfhp, iap); | 1294 | err2 = nfsd_create_setattr(rqstp, resfhp, iap); |
1283 | if (err2) | 1295 | if (err2) |
1284 | err = err2; | 1296 | err = err2; |
1297 | mnt_drop_write(fhp->fh_export->ex_path.mnt); | ||
1285 | /* | 1298 | /* |
1286 | * Update the file handle to get the new inode info. | 1299 | * Update the file handle to get the new inode info. |
1287 | */ | 1300 | */ |
@@ -1359,6 +1372,9 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
1359 | v_atime = verifier[1]&0x7fffffff; | 1372 | v_atime = verifier[1]&0x7fffffff; |
1360 | } | 1373 | } |
1361 | 1374 | ||
1375 | host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); | ||
1376 | if (host_err) | ||
1377 | goto out_nfserr; | ||
1362 | if (dchild->d_inode) { | 1378 | if (dchild->d_inode) { |
1363 | err = 0; | 1379 | err = 0; |
1364 | 1380 | ||
@@ -1390,12 +1406,15 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
1390 | case NFS3_CREATE_GUARDED: | 1406 | case NFS3_CREATE_GUARDED: |
1391 | err = nfserr_exist; | 1407 | err = nfserr_exist; |
1392 | } | 1408 | } |
1409 | mnt_drop_write(fhp->fh_export->ex_path.mnt); | ||
1393 | goto out; | 1410 | goto out; |
1394 | } | 1411 | } |
1395 | 1412 | ||
1396 | host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); | 1413 | host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); |
1397 | if (host_err < 0) | 1414 | if (host_err < 0) { |
1415 | mnt_drop_write(fhp->fh_export->ex_path.mnt); | ||
1398 | goto out_nfserr; | 1416 | goto out_nfserr; |
1417 | } | ||
1399 | if (created) | 1418 | if (created) |
1400 | *created = 1; | 1419 | *created = 1; |
1401 | 1420 | ||
@@ -1420,6 +1439,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
1420 | if (err2) | 1439 | if (err2) |
1421 | err = err2; | 1440 | err = err2; |
1422 | 1441 | ||
1442 | mnt_drop_write(fhp->fh_export->ex_path.mnt); | ||
1423 | /* | 1443 | /* |
1424 | * Update the filehandle to get the new inode info. | 1444 | * Update the filehandle to get the new inode info. |
1425 | */ | 1445 | */ |
@@ -1522,6 +1542,10 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
1522 | if (iap && (iap->ia_valid & ATTR_MODE)) | 1542 | if (iap && (iap->ia_valid & ATTR_MODE)) |
1523 | mode = iap->ia_mode & S_IALLUGO; | 1543 | mode = iap->ia_mode & S_IALLUGO; |
1524 | 1544 | ||
1545 | host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); | ||
1546 | if (host_err) | ||
1547 | goto out_nfserr; | ||
1548 | |||
1525 | if (unlikely(path[plen] != 0)) { | 1549 | if (unlikely(path[plen] != 0)) { |
1526 | char *path_alloced = kmalloc(plen+1, GFP_KERNEL); | 1550 | char *path_alloced = kmalloc(plen+1, GFP_KERNEL); |
1527 | if (path_alloced == NULL) | 1551 | if (path_alloced == NULL) |
@@ -1542,6 +1566,8 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
1542 | err = nfserrno(host_err); | 1566 | err = nfserrno(host_err); |
1543 | fh_unlock(fhp); | 1567 | fh_unlock(fhp); |
1544 | 1568 | ||
1569 | mnt_drop_write(fhp->fh_export->ex_path.mnt); | ||
1570 | |||
1545 | cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp); | 1571 | cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp); |
1546 | dput(dnew); | 1572 | dput(dnew); |
1547 | if (err==0) err = cerr; | 1573 | if (err==0) err = cerr; |
@@ -1592,6 +1618,11 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, | |||
1592 | dold = tfhp->fh_dentry; | 1618 | dold = tfhp->fh_dentry; |
1593 | dest = dold->d_inode; | 1619 | dest = dold->d_inode; |
1594 | 1620 | ||
1621 | host_err = mnt_want_write(tfhp->fh_export->ex_path.mnt); | ||
1622 | if (host_err) { | ||
1623 | err = nfserrno(host_err); | ||
1624 | goto out_dput; | ||
1625 | } | ||
1595 | host_err = vfs_link(dold, dirp, dnew); | 1626 | host_err = vfs_link(dold, dirp, dnew); |
1596 | if (!host_err) { | 1627 | if (!host_err) { |
1597 | if (EX_ISSYNC(ffhp->fh_export)) { | 1628 | if (EX_ISSYNC(ffhp->fh_export)) { |
@@ -1605,7 +1636,8 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, | |||
1605 | else | 1636 | else |
1606 | err = nfserrno(host_err); | 1637 | err = nfserrno(host_err); |
1607 | } | 1638 | } |
1608 | 1639 | mnt_drop_write(tfhp->fh_export->ex_path.mnt); | |
1640 | out_dput: | ||
1609 | dput(dnew); | 1641 | dput(dnew); |
1610 | out_unlock: | 1642 | out_unlock: |
1611 | fh_unlock(ffhp); | 1643 | fh_unlock(ffhp); |
@@ -1678,13 +1710,20 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, | |||
1678 | if (ndentry == trap) | 1710 | if (ndentry == trap) |
1679 | goto out_dput_new; | 1711 | goto out_dput_new; |
1680 | 1712 | ||
1681 | #ifdef MSNFS | 1713 | if (svc_msnfs(ffhp) && |
1682 | if ((ffhp->fh_export->ex_flags & NFSEXP_MSNFS) && | ||
1683 | ((atomic_read(&odentry->d_count) > 1) | 1714 | ((atomic_read(&odentry->d_count) > 1) |
1684 | || (atomic_read(&ndentry->d_count) > 1))) { | 1715 | || (atomic_read(&ndentry->d_count) > 1))) { |
1685 | host_err = -EPERM; | 1716 | host_err = -EPERM; |
1686 | } else | 1717 | goto out_dput_new; |
1687 | #endif | 1718 | } |
1719 | |||
1720 | host_err = -EXDEV; | ||
1721 | if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt) | ||
1722 | goto out_dput_new; | ||
1723 | host_err = mnt_want_write(ffhp->fh_export->ex_path.mnt); | ||
1724 | if (host_err) | ||
1725 | goto out_dput_new; | ||
1726 | |||
1688 | host_err = vfs_rename(fdir, odentry, tdir, ndentry); | 1727 | host_err = vfs_rename(fdir, odentry, tdir, ndentry); |
1689 | if (!host_err && EX_ISSYNC(tfhp->fh_export)) { | 1728 | if (!host_err && EX_ISSYNC(tfhp->fh_export)) { |
1690 | host_err = nfsd_sync_dir(tdentry); | 1729 | host_err = nfsd_sync_dir(tdentry); |
@@ -1692,6 +1731,8 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, | |||
1692 | host_err = nfsd_sync_dir(fdentry); | 1731 | host_err = nfsd_sync_dir(fdentry); |
1693 | } | 1732 | } |
1694 | 1733 | ||
1734 | mnt_drop_write(ffhp->fh_export->ex_path.mnt); | ||
1735 | |||
1695 | out_dput_new: | 1736 | out_dput_new: |
1696 | dput(ndentry); | 1737 | dput(ndentry); |
1697 | out_dput_old: | 1738 | out_dput_old: |
@@ -1750,6 +1791,10 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, | |||
1750 | if (!type) | 1791 | if (!type) |
1751 | type = rdentry->d_inode->i_mode & S_IFMT; | 1792 | type = rdentry->d_inode->i_mode & S_IFMT; |
1752 | 1793 | ||
1794 | host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); | ||
1795 | if (host_err) | ||
1796 | goto out_nfserr; | ||
1797 | |||
1753 | if (type != S_IFDIR) { /* It's UNLINK */ | 1798 | if (type != S_IFDIR) { /* It's UNLINK */ |
1754 | #ifdef MSNFS | 1799 | #ifdef MSNFS |
1755 | if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && | 1800 | if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && |
@@ -1765,10 +1810,12 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, | |||
1765 | dput(rdentry); | 1810 | dput(rdentry); |
1766 | 1811 | ||
1767 | if (host_err) | 1812 | if (host_err) |
1768 | goto out_nfserr; | 1813 | goto out_drop; |
1769 | if (EX_ISSYNC(fhp->fh_export)) | 1814 | if (EX_ISSYNC(fhp->fh_export)) |
1770 | host_err = nfsd_sync_dir(dentry); | 1815 | host_err = nfsd_sync_dir(dentry); |
1771 | 1816 | ||
1817 | out_drop: | ||
1818 | mnt_drop_write(fhp->fh_export->ex_path.mnt); | ||
1772 | out_nfserr: | 1819 | out_nfserr: |
1773 | err = nfserrno(host_err); | 1820 | err = nfserrno(host_err); |
1774 | out: | 1821 | out: |
@@ -1865,7 +1912,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, | |||
1865 | inode->i_mode, | 1912 | inode->i_mode, |
1866 | IS_IMMUTABLE(inode)? " immut" : "", | 1913 | IS_IMMUTABLE(inode)? " immut" : "", |
1867 | IS_APPEND(inode)? " append" : "", | 1914 | IS_APPEND(inode)? " append" : "", |
1868 | IS_RDONLY(inode)? " ro" : ""); | 1915 | __mnt_is_readonly(exp->ex_path.mnt)? " ro" : ""); |
1869 | dprintk(" owner %d/%d user %d/%d\n", | 1916 | dprintk(" owner %d/%d user %d/%d\n", |
1870 | inode->i_uid, inode->i_gid, current->fsuid, current->fsgid); | 1917 | inode->i_uid, inode->i_gid, current->fsuid, current->fsgid); |
1871 | #endif | 1918 | #endif |
@@ -1876,7 +1923,8 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, | |||
1876 | */ | 1923 | */ |
1877 | if (!(acc & MAY_LOCAL_ACCESS)) | 1924 | if (!(acc & MAY_LOCAL_ACCESS)) |
1878 | if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) { | 1925 | if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) { |
1879 | if (exp_rdonly(rqstp, exp) || IS_RDONLY(inode)) | 1926 | if (exp_rdonly(rqstp, exp) || |
1927 | __mnt_is_readonly(exp->ex_path.mnt)) | ||
1880 | return nfserr_rofs; | 1928 | return nfserr_rofs; |
1881 | if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode)) | 1929 | if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode)) |
1882 | return nfserr_perm; | 1930 | return nfserr_perm; |
@@ -2039,6 +2087,9 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl) | |||
2039 | } else | 2087 | } else |
2040 | size = 0; | 2088 | size = 0; |
2041 | 2089 | ||
2090 | error = mnt_want_write(fhp->fh_export->ex_path.mnt); | ||
2091 | if (error) | ||
2092 | goto getout; | ||
2042 | if (size) | 2093 | if (size) |
2043 | error = vfs_setxattr(fhp->fh_dentry, name, value, size, 0); | 2094 | error = vfs_setxattr(fhp->fh_dentry, name, value, size, 0); |
2044 | else { | 2095 | else { |
@@ -2050,6 +2101,7 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl) | |||
2050 | error = 0; | 2101 | error = 0; |
2051 | } | 2102 | } |
2052 | } | 2103 | } |
2104 | mnt_drop_write(fhp->fh_export->ex_path.mnt); | ||
2053 | 2105 | ||
2054 | getout: | 2106 | getout: |
2055 | kfree(value); | 2107 | kfree(value); |
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile index 4d4ce48bb42c..f6956de56fdb 100644 --- a/fs/ocfs2/Makefile +++ b/fs/ocfs2/Makefile | |||
@@ -2,7 +2,12 @@ EXTRA_CFLAGS += -Ifs/ocfs2 | |||
2 | 2 | ||
3 | EXTRA_CFLAGS += -DCATCH_BH_JBD_RACES | 3 | EXTRA_CFLAGS += -DCATCH_BH_JBD_RACES |
4 | 4 | ||
5 | obj-$(CONFIG_OCFS2_FS) += ocfs2.o | 5 | obj-$(CONFIG_OCFS2_FS) += \ |
6 | ocfs2.o \ | ||
7 | ocfs2_stackglue.o | ||
8 | |||
9 | obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_stack_o2cb.o | ||
10 | obj-$(CONFIG_OCFS2_FS_USERSPACE_CLUSTER) += ocfs2_stack_user.o | ||
6 | 11 | ||
7 | ocfs2-objs := \ | 12 | ocfs2-objs := \ |
8 | alloc.o \ | 13 | alloc.o \ |
@@ -31,5 +36,10 @@ ocfs2-objs := \ | |||
31 | uptodate.o \ | 36 | uptodate.o \ |
32 | ver.o | 37 | ver.o |
33 | 38 | ||
39 | ocfs2_stackglue-objs := stackglue.o | ||
40 | ocfs2_stack_o2cb-objs := stack_o2cb.o | ||
41 | ocfs2_stack_user-objs := stack_user.o | ||
42 | |||
43 | # cluster/ is always needed when OCFS2_FS for masklog support | ||
34 | obj-$(CONFIG_OCFS2_FS) += cluster/ | 44 | obj-$(CONFIG_OCFS2_FS) += cluster/ |
35 | obj-$(CONFIG_OCFS2_FS) += dlm/ | 45 | obj-$(CONFIG_OCFS2_FS_O2CB) += dlm/ |
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 447206eb5c2e..41f84c92094f 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -1029,8 +1029,7 @@ static void ocfs2_rotate_leaf(struct ocfs2_extent_list *el, | |||
1029 | BUG_ON(!next_free); | 1029 | BUG_ON(!next_free); |
1030 | 1030 | ||
1031 | /* The tree code before us didn't allow enough room in the leaf. */ | 1031 | /* The tree code before us didn't allow enough room in the leaf. */ |
1032 | if (el->l_next_free_rec == el->l_count && !has_empty) | 1032 | BUG_ON(el->l_next_free_rec == el->l_count && !has_empty); |
1033 | BUG(); | ||
1034 | 1033 | ||
1035 | /* | 1034 | /* |
1036 | * The easiest way to approach this is to just remove the | 1035 | * The easiest way to approach this is to just remove the |
@@ -1450,6 +1449,8 @@ static void ocfs2_adjust_root_records(struct ocfs2_extent_list *root_el, | |||
1450 | * - When our insert into the right path leaf is at the leftmost edge | 1449 | * - When our insert into the right path leaf is at the leftmost edge |
1451 | * and requires an update of the path immediately to it's left. This | 1450 | * and requires an update of the path immediately to it's left. This |
1452 | * can occur at the end of some types of rotation and appending inserts. | 1451 | * can occur at the end of some types of rotation and appending inserts. |
1452 | * - When we've adjusted the last extent record in the left path leaf and the | ||
1453 | * 1st extent record in the right path leaf during cross extent block merge. | ||
1453 | */ | 1454 | */ |
1454 | static void ocfs2_complete_edge_insert(struct inode *inode, handle_t *handle, | 1455 | static void ocfs2_complete_edge_insert(struct inode *inode, handle_t *handle, |
1455 | struct ocfs2_path *left_path, | 1456 | struct ocfs2_path *left_path, |
@@ -2712,24 +2713,147 @@ static void ocfs2_cleanup_merge(struct ocfs2_extent_list *el, | |||
2712 | } | 2713 | } |
2713 | } | 2714 | } |
2714 | 2715 | ||
2716 | static int ocfs2_get_right_path(struct inode *inode, | ||
2717 | struct ocfs2_path *left_path, | ||
2718 | struct ocfs2_path **ret_right_path) | ||
2719 | { | ||
2720 | int ret; | ||
2721 | u32 right_cpos; | ||
2722 | struct ocfs2_path *right_path = NULL; | ||
2723 | struct ocfs2_extent_list *left_el; | ||
2724 | |||
2725 | *ret_right_path = NULL; | ||
2726 | |||
2727 | /* This function shouldn't be called for non-trees. */ | ||
2728 | BUG_ON(left_path->p_tree_depth == 0); | ||
2729 | |||
2730 | left_el = path_leaf_el(left_path); | ||
2731 | BUG_ON(left_el->l_next_free_rec != left_el->l_count); | ||
2732 | |||
2733 | ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, left_path, | ||
2734 | &right_cpos); | ||
2735 | if (ret) { | ||
2736 | mlog_errno(ret); | ||
2737 | goto out; | ||
2738 | } | ||
2739 | |||
2740 | /* This function shouldn't be called for the rightmost leaf. */ | ||
2741 | BUG_ON(right_cpos == 0); | ||
2742 | |||
2743 | right_path = ocfs2_new_path(path_root_bh(left_path), | ||
2744 | path_root_el(left_path)); | ||
2745 | if (!right_path) { | ||
2746 | ret = -ENOMEM; | ||
2747 | mlog_errno(ret); | ||
2748 | goto out; | ||
2749 | } | ||
2750 | |||
2751 | ret = ocfs2_find_path(inode, right_path, right_cpos); | ||
2752 | if (ret) { | ||
2753 | mlog_errno(ret); | ||
2754 | goto out; | ||
2755 | } | ||
2756 | |||
2757 | *ret_right_path = right_path; | ||
2758 | out: | ||
2759 | if (ret) | ||
2760 | ocfs2_free_path(right_path); | ||
2761 | return ret; | ||
2762 | } | ||
2763 | |||
2715 | /* | 2764 | /* |
2716 | * Remove split_rec clusters from the record at index and merge them | 2765 | * Remove split_rec clusters from the record at index and merge them |
2717 | * onto the beginning of the record at index + 1. | 2766 | * onto the beginning of the record "next" to it. |
2767 | * For index < l_count - 1, the next means the extent rec at index + 1. | ||
2768 | * For index == l_count - 1, the "next" means the 1st extent rec of the | ||
2769 | * next extent block. | ||
2718 | */ | 2770 | */ |
2719 | static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh, | 2771 | static int ocfs2_merge_rec_right(struct inode *inode, |
2720 | handle_t *handle, | 2772 | struct ocfs2_path *left_path, |
2721 | struct ocfs2_extent_rec *split_rec, | 2773 | handle_t *handle, |
2722 | struct ocfs2_extent_list *el, int index) | 2774 | struct ocfs2_extent_rec *split_rec, |
2775 | int index) | ||
2723 | { | 2776 | { |
2724 | int ret; | 2777 | int ret, next_free, i; |
2725 | unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters); | 2778 | unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters); |
2726 | struct ocfs2_extent_rec *left_rec; | 2779 | struct ocfs2_extent_rec *left_rec; |
2727 | struct ocfs2_extent_rec *right_rec; | 2780 | struct ocfs2_extent_rec *right_rec; |
2781 | struct ocfs2_extent_list *right_el; | ||
2782 | struct ocfs2_path *right_path = NULL; | ||
2783 | int subtree_index = 0; | ||
2784 | struct ocfs2_extent_list *el = path_leaf_el(left_path); | ||
2785 | struct buffer_head *bh = path_leaf_bh(left_path); | ||
2786 | struct buffer_head *root_bh = NULL; | ||
2728 | 2787 | ||
2729 | BUG_ON(index >= le16_to_cpu(el->l_next_free_rec)); | 2788 | BUG_ON(index >= le16_to_cpu(el->l_next_free_rec)); |
2730 | |||
2731 | left_rec = &el->l_recs[index]; | 2789 | left_rec = &el->l_recs[index]; |
2732 | right_rec = &el->l_recs[index + 1]; | 2790 | |
2791 | if (index == le16_to_cpu(el->l_next_free_rec - 1) && | ||
2792 | le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count)) { | ||
2793 | /* we meet with a cross extent block merge. */ | ||
2794 | ret = ocfs2_get_right_path(inode, left_path, &right_path); | ||
2795 | if (ret) { | ||
2796 | mlog_errno(ret); | ||
2797 | goto out; | ||
2798 | } | ||
2799 | |||
2800 | right_el = path_leaf_el(right_path); | ||
2801 | next_free = le16_to_cpu(right_el->l_next_free_rec); | ||
2802 | BUG_ON(next_free <= 0); | ||
2803 | right_rec = &right_el->l_recs[0]; | ||
2804 | if (ocfs2_is_empty_extent(right_rec)) { | ||
2805 | BUG_ON(le16_to_cpu(next_free) <= 1); | ||
2806 | right_rec = &right_el->l_recs[1]; | ||
2807 | } | ||
2808 | |||
2809 | BUG_ON(le32_to_cpu(left_rec->e_cpos) + | ||
2810 | le16_to_cpu(left_rec->e_leaf_clusters) != | ||
2811 | le32_to_cpu(right_rec->e_cpos)); | ||
2812 | |||
2813 | subtree_index = ocfs2_find_subtree_root(inode, | ||
2814 | left_path, right_path); | ||
2815 | |||
2816 | ret = ocfs2_extend_rotate_transaction(handle, subtree_index, | ||
2817 | handle->h_buffer_credits, | ||
2818 | right_path); | ||
2819 | if (ret) { | ||
2820 | mlog_errno(ret); | ||
2821 | goto out; | ||
2822 | } | ||
2823 | |||
2824 | root_bh = left_path->p_node[subtree_index].bh; | ||
2825 | BUG_ON(root_bh != right_path->p_node[subtree_index].bh); | ||
2826 | |||
2827 | ret = ocfs2_journal_access(handle, inode, root_bh, | ||
2828 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
2829 | if (ret) { | ||
2830 | mlog_errno(ret); | ||
2831 | goto out; | ||
2832 | } | ||
2833 | |||
2834 | for (i = subtree_index + 1; | ||
2835 | i < path_num_items(right_path); i++) { | ||
2836 | ret = ocfs2_journal_access(handle, inode, | ||
2837 | right_path->p_node[i].bh, | ||
2838 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
2839 | if (ret) { | ||
2840 | mlog_errno(ret); | ||
2841 | goto out; | ||
2842 | } | ||
2843 | |||
2844 | ret = ocfs2_journal_access(handle, inode, | ||
2845 | left_path->p_node[i].bh, | ||
2846 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
2847 | if (ret) { | ||
2848 | mlog_errno(ret); | ||
2849 | goto out; | ||
2850 | } | ||
2851 | } | ||
2852 | |||
2853 | } else { | ||
2854 | BUG_ON(index == le16_to_cpu(el->l_next_free_rec) - 1); | ||
2855 | right_rec = &el->l_recs[index + 1]; | ||
2856 | } | ||
2733 | 2857 | ||
2734 | ret = ocfs2_journal_access(handle, inode, bh, | 2858 | ret = ocfs2_journal_access(handle, inode, bh, |
2735 | OCFS2_JOURNAL_ACCESS_WRITE); | 2859 | OCFS2_JOURNAL_ACCESS_WRITE); |
@@ -2751,30 +2875,156 @@ static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh, | |||
2751 | if (ret) | 2875 | if (ret) |
2752 | mlog_errno(ret); | 2876 | mlog_errno(ret); |
2753 | 2877 | ||
2878 | if (right_path) { | ||
2879 | ret = ocfs2_journal_dirty(handle, path_leaf_bh(right_path)); | ||
2880 | if (ret) | ||
2881 | mlog_errno(ret); | ||
2882 | |||
2883 | ocfs2_complete_edge_insert(inode, handle, left_path, | ||
2884 | right_path, subtree_index); | ||
2885 | } | ||
2886 | out: | ||
2887 | if (right_path) | ||
2888 | ocfs2_free_path(right_path); | ||
2889 | return ret; | ||
2890 | } | ||
2891 | |||
2892 | static int ocfs2_get_left_path(struct inode *inode, | ||
2893 | struct ocfs2_path *right_path, | ||
2894 | struct ocfs2_path **ret_left_path) | ||
2895 | { | ||
2896 | int ret; | ||
2897 | u32 left_cpos; | ||
2898 | struct ocfs2_path *left_path = NULL; | ||
2899 | |||
2900 | *ret_left_path = NULL; | ||
2901 | |||
2902 | /* This function shouldn't be called for non-trees. */ | ||
2903 | BUG_ON(right_path->p_tree_depth == 0); | ||
2904 | |||
2905 | ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, | ||
2906 | right_path, &left_cpos); | ||
2907 | if (ret) { | ||
2908 | mlog_errno(ret); | ||
2909 | goto out; | ||
2910 | } | ||
2911 | |||
2912 | /* This function shouldn't be called for the leftmost leaf. */ | ||
2913 | BUG_ON(left_cpos == 0); | ||
2914 | |||
2915 | left_path = ocfs2_new_path(path_root_bh(right_path), | ||
2916 | path_root_el(right_path)); | ||
2917 | if (!left_path) { | ||
2918 | ret = -ENOMEM; | ||
2919 | mlog_errno(ret); | ||
2920 | goto out; | ||
2921 | } | ||
2922 | |||
2923 | ret = ocfs2_find_path(inode, left_path, left_cpos); | ||
2924 | if (ret) { | ||
2925 | mlog_errno(ret); | ||
2926 | goto out; | ||
2927 | } | ||
2928 | |||
2929 | *ret_left_path = left_path; | ||
2754 | out: | 2930 | out: |
2931 | if (ret) | ||
2932 | ocfs2_free_path(left_path); | ||
2755 | return ret; | 2933 | return ret; |
2756 | } | 2934 | } |
2757 | 2935 | ||
2758 | /* | 2936 | /* |
2759 | * Remove split_rec clusters from the record at index and merge them | 2937 | * Remove split_rec clusters from the record at index and merge them |
2760 | * onto the tail of the record at index - 1. | 2938 | * onto the tail of the record "before" it. |
2939 | * For index > 0, the "before" means the extent rec at index - 1. | ||
2940 | * | ||
2941 | * For index == 0, the "before" means the last record of the previous | ||
2942 | * extent block. And there is also a situation that we may need to | ||
2943 | * remove the rightmost leaf extent block in the right_path and change | ||
2944 | * the right path to indicate the new rightmost path. | ||
2761 | */ | 2945 | */ |
2762 | static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh, | 2946 | static int ocfs2_merge_rec_left(struct inode *inode, |
2947 | struct ocfs2_path *right_path, | ||
2763 | handle_t *handle, | 2948 | handle_t *handle, |
2764 | struct ocfs2_extent_rec *split_rec, | 2949 | struct ocfs2_extent_rec *split_rec, |
2765 | struct ocfs2_extent_list *el, int index) | 2950 | struct ocfs2_cached_dealloc_ctxt *dealloc, |
2951 | int index) | ||
2766 | { | 2952 | { |
2767 | int ret, has_empty_extent = 0; | 2953 | int ret, i, subtree_index = 0, has_empty_extent = 0; |
2768 | unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters); | 2954 | unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters); |
2769 | struct ocfs2_extent_rec *left_rec; | 2955 | struct ocfs2_extent_rec *left_rec; |
2770 | struct ocfs2_extent_rec *right_rec; | 2956 | struct ocfs2_extent_rec *right_rec; |
2957 | struct ocfs2_extent_list *el = path_leaf_el(right_path); | ||
2958 | struct buffer_head *bh = path_leaf_bh(right_path); | ||
2959 | struct buffer_head *root_bh = NULL; | ||
2960 | struct ocfs2_path *left_path = NULL; | ||
2961 | struct ocfs2_extent_list *left_el; | ||
2771 | 2962 | ||
2772 | BUG_ON(index <= 0); | 2963 | BUG_ON(index < 0); |
2773 | 2964 | ||
2774 | left_rec = &el->l_recs[index - 1]; | ||
2775 | right_rec = &el->l_recs[index]; | 2965 | right_rec = &el->l_recs[index]; |
2776 | if (ocfs2_is_empty_extent(&el->l_recs[0])) | 2966 | if (index == 0) { |
2777 | has_empty_extent = 1; | 2967 | /* we meet with a cross extent block merge. */ |
2968 | ret = ocfs2_get_left_path(inode, right_path, &left_path); | ||
2969 | if (ret) { | ||
2970 | mlog_errno(ret); | ||
2971 | goto out; | ||
2972 | } | ||
2973 | |||
2974 | left_el = path_leaf_el(left_path); | ||
2975 | BUG_ON(le16_to_cpu(left_el->l_next_free_rec) != | ||
2976 | le16_to_cpu(left_el->l_count)); | ||
2977 | |||
2978 | left_rec = &left_el->l_recs[ | ||
2979 | le16_to_cpu(left_el->l_next_free_rec) - 1]; | ||
2980 | BUG_ON(le32_to_cpu(left_rec->e_cpos) + | ||
2981 | le16_to_cpu(left_rec->e_leaf_clusters) != | ||
2982 | le32_to_cpu(split_rec->e_cpos)); | ||
2983 | |||
2984 | subtree_index = ocfs2_find_subtree_root(inode, | ||
2985 | left_path, right_path); | ||
2986 | |||
2987 | ret = ocfs2_extend_rotate_transaction(handle, subtree_index, | ||
2988 | handle->h_buffer_credits, | ||
2989 | left_path); | ||
2990 | if (ret) { | ||
2991 | mlog_errno(ret); | ||
2992 | goto out; | ||
2993 | } | ||
2994 | |||
2995 | root_bh = left_path->p_node[subtree_index].bh; | ||
2996 | BUG_ON(root_bh != right_path->p_node[subtree_index].bh); | ||
2997 | |||
2998 | ret = ocfs2_journal_access(handle, inode, root_bh, | ||
2999 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
3000 | if (ret) { | ||
3001 | mlog_errno(ret); | ||
3002 | goto out; | ||
3003 | } | ||
3004 | |||
3005 | for (i = subtree_index + 1; | ||
3006 | i < path_num_items(right_path); i++) { | ||
3007 | ret = ocfs2_journal_access(handle, inode, | ||
3008 | right_path->p_node[i].bh, | ||
3009 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
3010 | if (ret) { | ||
3011 | mlog_errno(ret); | ||
3012 | goto out; | ||
3013 | } | ||
3014 | |||
3015 | ret = ocfs2_journal_access(handle, inode, | ||
3016 | left_path->p_node[i].bh, | ||
3017 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
3018 | if (ret) { | ||
3019 | mlog_errno(ret); | ||
3020 | goto out; | ||
3021 | } | ||
3022 | } | ||
3023 | } else { | ||
3024 | left_rec = &el->l_recs[index - 1]; | ||
3025 | if (ocfs2_is_empty_extent(&el->l_recs[0])) | ||
3026 | has_empty_extent = 1; | ||
3027 | } | ||
2778 | 3028 | ||
2779 | ret = ocfs2_journal_access(handle, inode, bh, | 3029 | ret = ocfs2_journal_access(handle, inode, bh, |
2780 | OCFS2_JOURNAL_ACCESS_WRITE); | 3030 | OCFS2_JOURNAL_ACCESS_WRITE); |
@@ -2790,9 +3040,8 @@ static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh, | |||
2790 | *left_rec = *split_rec; | 3040 | *left_rec = *split_rec; |
2791 | 3041 | ||
2792 | has_empty_extent = 0; | 3042 | has_empty_extent = 0; |
2793 | } else { | 3043 | } else |
2794 | le16_add_cpu(&left_rec->e_leaf_clusters, split_clusters); | 3044 | le16_add_cpu(&left_rec->e_leaf_clusters, split_clusters); |
2795 | } | ||
2796 | 3045 | ||
2797 | le32_add_cpu(&right_rec->e_cpos, split_clusters); | 3046 | le32_add_cpu(&right_rec->e_cpos, split_clusters); |
2798 | le64_add_cpu(&right_rec->e_blkno, | 3047 | le64_add_cpu(&right_rec->e_blkno, |
@@ -2805,13 +3054,44 @@ static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh, | |||
2805 | if (ret) | 3054 | if (ret) |
2806 | mlog_errno(ret); | 3055 | mlog_errno(ret); |
2807 | 3056 | ||
3057 | if (left_path) { | ||
3058 | ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path)); | ||
3059 | if (ret) | ||
3060 | mlog_errno(ret); | ||
3061 | |||
3062 | /* | ||
3063 | * In the situation that the right_rec is empty and the extent | ||
3064 | * block is empty also, ocfs2_complete_edge_insert can't handle | ||
3065 | * it and we need to delete the right extent block. | ||
3066 | */ | ||
3067 | if (le16_to_cpu(right_rec->e_leaf_clusters) == 0 && | ||
3068 | le16_to_cpu(el->l_next_free_rec) == 1) { | ||
3069 | |||
3070 | ret = ocfs2_remove_rightmost_path(inode, handle, | ||
3071 | right_path, dealloc); | ||
3072 | if (ret) { | ||
3073 | mlog_errno(ret); | ||
3074 | goto out; | ||
3075 | } | ||
3076 | |||
3077 | /* Now the rightmost extent block has been deleted. | ||
3078 | * So we use the new rightmost path. | ||
3079 | */ | ||
3080 | ocfs2_mv_path(right_path, left_path); | ||
3081 | left_path = NULL; | ||
3082 | } else | ||
3083 | ocfs2_complete_edge_insert(inode, handle, left_path, | ||
3084 | right_path, subtree_index); | ||
3085 | } | ||
2808 | out: | 3086 | out: |
3087 | if (left_path) | ||
3088 | ocfs2_free_path(left_path); | ||
2809 | return ret; | 3089 | return ret; |
2810 | } | 3090 | } |
2811 | 3091 | ||
2812 | static int ocfs2_try_to_merge_extent(struct inode *inode, | 3092 | static int ocfs2_try_to_merge_extent(struct inode *inode, |
2813 | handle_t *handle, | 3093 | handle_t *handle, |
2814 | struct ocfs2_path *left_path, | 3094 | struct ocfs2_path *path, |
2815 | int split_index, | 3095 | int split_index, |
2816 | struct ocfs2_extent_rec *split_rec, | 3096 | struct ocfs2_extent_rec *split_rec, |
2817 | struct ocfs2_cached_dealloc_ctxt *dealloc, | 3097 | struct ocfs2_cached_dealloc_ctxt *dealloc, |
@@ -2819,7 +3099,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
2819 | 3099 | ||
2820 | { | 3100 | { |
2821 | int ret = 0; | 3101 | int ret = 0; |
2822 | struct ocfs2_extent_list *el = path_leaf_el(left_path); | 3102 | struct ocfs2_extent_list *el = path_leaf_el(path); |
2823 | struct ocfs2_extent_rec *rec = &el->l_recs[split_index]; | 3103 | struct ocfs2_extent_rec *rec = &el->l_recs[split_index]; |
2824 | 3104 | ||
2825 | BUG_ON(ctxt->c_contig_type == CONTIG_NONE); | 3105 | BUG_ON(ctxt->c_contig_type == CONTIG_NONE); |
@@ -2832,7 +3112,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
2832 | * extents - having more than one in a leaf is | 3112 | * extents - having more than one in a leaf is |
2833 | * illegal. | 3113 | * illegal. |
2834 | */ | 3114 | */ |
2835 | ret = ocfs2_rotate_tree_left(inode, handle, left_path, | 3115 | ret = ocfs2_rotate_tree_left(inode, handle, path, |
2836 | dealloc); | 3116 | dealloc); |
2837 | if (ret) { | 3117 | if (ret) { |
2838 | mlog_errno(ret); | 3118 | mlog_errno(ret); |
@@ -2847,7 +3127,6 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
2847 | * Left-right contig implies this. | 3127 | * Left-right contig implies this. |
2848 | */ | 3128 | */ |
2849 | BUG_ON(!ctxt->c_split_covers_rec); | 3129 | BUG_ON(!ctxt->c_split_covers_rec); |
2850 | BUG_ON(split_index == 0); | ||
2851 | 3130 | ||
2852 | /* | 3131 | /* |
2853 | * Since the leftright insert always covers the entire | 3132 | * Since the leftright insert always covers the entire |
@@ -2858,9 +3137,14 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
2858 | * Since the adding of an empty extent shifts | 3137 | * Since the adding of an empty extent shifts |
2859 | * everything back to the right, there's no need to | 3138 | * everything back to the right, there's no need to |
2860 | * update split_index here. | 3139 | * update split_index here. |
3140 | * | ||
3141 | * When the split_index is zero, we need to merge it to the | ||
3142 | * prevoius extent block. It is more efficient and easier | ||
3143 | * if we do merge_right first and merge_left later. | ||
2861 | */ | 3144 | */ |
2862 | ret = ocfs2_merge_rec_left(inode, path_leaf_bh(left_path), | 3145 | ret = ocfs2_merge_rec_right(inode, path, |
2863 | handle, split_rec, el, split_index); | 3146 | handle, split_rec, |
3147 | split_index); | ||
2864 | if (ret) { | 3148 | if (ret) { |
2865 | mlog_errno(ret); | 3149 | mlog_errno(ret); |
2866 | goto out; | 3150 | goto out; |
@@ -2871,32 +3155,30 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
2871 | */ | 3155 | */ |
2872 | BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); | 3156 | BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); |
2873 | 3157 | ||
2874 | /* | 3158 | /* The merge left us with an empty extent, remove it. */ |
2875 | * The left merge left us with an empty extent, remove | 3159 | ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); |
2876 | * it. | ||
2877 | */ | ||
2878 | ret = ocfs2_rotate_tree_left(inode, handle, left_path, dealloc); | ||
2879 | if (ret) { | 3160 | if (ret) { |
2880 | mlog_errno(ret); | 3161 | mlog_errno(ret); |
2881 | goto out; | 3162 | goto out; |
2882 | } | 3163 | } |
2883 | split_index--; | 3164 | |
2884 | rec = &el->l_recs[split_index]; | 3165 | rec = &el->l_recs[split_index]; |
2885 | 3166 | ||
2886 | /* | 3167 | /* |
2887 | * Note that we don't pass split_rec here on purpose - | 3168 | * Note that we don't pass split_rec here on purpose - |
2888 | * we've merged it into the left side. | 3169 | * we've merged it into the rec already. |
2889 | */ | 3170 | */ |
2890 | ret = ocfs2_merge_rec_right(inode, path_leaf_bh(left_path), | 3171 | ret = ocfs2_merge_rec_left(inode, path, |
2891 | handle, rec, el, split_index); | 3172 | handle, rec, |
3173 | dealloc, | ||
3174 | split_index); | ||
3175 | |||
2892 | if (ret) { | 3176 | if (ret) { |
2893 | mlog_errno(ret); | 3177 | mlog_errno(ret); |
2894 | goto out; | 3178 | goto out; |
2895 | } | 3179 | } |
2896 | 3180 | ||
2897 | BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); | 3181 | ret = ocfs2_rotate_tree_left(inode, handle, path, |
2898 | |||
2899 | ret = ocfs2_rotate_tree_left(inode, handle, left_path, | ||
2900 | dealloc); | 3182 | dealloc); |
2901 | /* | 3183 | /* |
2902 | * Error from this last rotate is not critical, so | 3184 | * Error from this last rotate is not critical, so |
@@ -2915,8 +3197,9 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
2915 | */ | 3197 | */ |
2916 | if (ctxt->c_contig_type == CONTIG_RIGHT) { | 3198 | if (ctxt->c_contig_type == CONTIG_RIGHT) { |
2917 | ret = ocfs2_merge_rec_left(inode, | 3199 | ret = ocfs2_merge_rec_left(inode, |
2918 | path_leaf_bh(left_path), | 3200 | path, |
2919 | handle, split_rec, el, | 3201 | handle, split_rec, |
3202 | dealloc, | ||
2920 | split_index); | 3203 | split_index); |
2921 | if (ret) { | 3204 | if (ret) { |
2922 | mlog_errno(ret); | 3205 | mlog_errno(ret); |
@@ -2924,8 +3207,8 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
2924 | } | 3207 | } |
2925 | } else { | 3208 | } else { |
2926 | ret = ocfs2_merge_rec_right(inode, | 3209 | ret = ocfs2_merge_rec_right(inode, |
2927 | path_leaf_bh(left_path), | 3210 | path, |
2928 | handle, split_rec, el, | 3211 | handle, split_rec, |
2929 | split_index); | 3212 | split_index); |
2930 | if (ret) { | 3213 | if (ret) { |
2931 | mlog_errno(ret); | 3214 | mlog_errno(ret); |
@@ -2938,7 +3221,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
2938 | * The merge may have left an empty extent in | 3221 | * The merge may have left an empty extent in |
2939 | * our leaf. Try to rotate it away. | 3222 | * our leaf. Try to rotate it away. |
2940 | */ | 3223 | */ |
2941 | ret = ocfs2_rotate_tree_left(inode, handle, left_path, | 3224 | ret = ocfs2_rotate_tree_left(inode, handle, path, |
2942 | dealloc); | 3225 | dealloc); |
2943 | if (ret) | 3226 | if (ret) |
2944 | mlog_errno(ret); | 3227 | mlog_errno(ret); |
@@ -3498,20 +3781,57 @@ out: | |||
3498 | } | 3781 | } |
3499 | 3782 | ||
3500 | static enum ocfs2_contig_type | 3783 | static enum ocfs2_contig_type |
3501 | ocfs2_figure_merge_contig_type(struct inode *inode, | 3784 | ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path, |
3502 | struct ocfs2_extent_list *el, int index, | 3785 | struct ocfs2_extent_list *el, int index, |
3503 | struct ocfs2_extent_rec *split_rec) | 3786 | struct ocfs2_extent_rec *split_rec) |
3504 | { | 3787 | { |
3505 | struct ocfs2_extent_rec *rec; | 3788 | int status; |
3506 | enum ocfs2_contig_type ret = CONTIG_NONE; | 3789 | enum ocfs2_contig_type ret = CONTIG_NONE; |
3790 | u32 left_cpos, right_cpos; | ||
3791 | struct ocfs2_extent_rec *rec = NULL; | ||
3792 | struct ocfs2_extent_list *new_el; | ||
3793 | struct ocfs2_path *left_path = NULL, *right_path = NULL; | ||
3794 | struct buffer_head *bh; | ||
3795 | struct ocfs2_extent_block *eb; | ||
3796 | |||
3797 | if (index > 0) { | ||
3798 | rec = &el->l_recs[index - 1]; | ||
3799 | } else if (path->p_tree_depth > 0) { | ||
3800 | status = ocfs2_find_cpos_for_left_leaf(inode->i_sb, | ||
3801 | path, &left_cpos); | ||
3802 | if (status) | ||
3803 | goto out; | ||
3804 | |||
3805 | if (left_cpos != 0) { | ||
3806 | left_path = ocfs2_new_path(path_root_bh(path), | ||
3807 | path_root_el(path)); | ||
3808 | if (!left_path) | ||
3809 | goto out; | ||
3810 | |||
3811 | status = ocfs2_find_path(inode, left_path, left_cpos); | ||
3812 | if (status) | ||
3813 | goto out; | ||
3814 | |||
3815 | new_el = path_leaf_el(left_path); | ||
3816 | |||
3817 | if (le16_to_cpu(new_el->l_next_free_rec) != | ||
3818 | le16_to_cpu(new_el->l_count)) { | ||
3819 | bh = path_leaf_bh(left_path); | ||
3820 | eb = (struct ocfs2_extent_block *)bh->b_data; | ||
3821 | OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, | ||
3822 | eb); | ||
3823 | goto out; | ||
3824 | } | ||
3825 | rec = &new_el->l_recs[ | ||
3826 | le16_to_cpu(new_el->l_next_free_rec) - 1]; | ||
3827 | } | ||
3828 | } | ||
3507 | 3829 | ||
3508 | /* | 3830 | /* |
3509 | * We're careful to check for an empty extent record here - | 3831 | * We're careful to check for an empty extent record here - |
3510 | * the merge code will know what to do if it sees one. | 3832 | * the merge code will know what to do if it sees one. |
3511 | */ | 3833 | */ |
3512 | 3834 | if (rec) { | |
3513 | if (index > 0) { | ||
3514 | rec = &el->l_recs[index - 1]; | ||
3515 | if (index == 1 && ocfs2_is_empty_extent(rec)) { | 3835 | if (index == 1 && ocfs2_is_empty_extent(rec)) { |
3516 | if (split_rec->e_cpos == el->l_recs[index].e_cpos) | 3836 | if (split_rec->e_cpos == el->l_recs[index].e_cpos) |
3517 | ret = CONTIG_RIGHT; | 3837 | ret = CONTIG_RIGHT; |
@@ -3520,10 +3840,45 @@ ocfs2_figure_merge_contig_type(struct inode *inode, | |||
3520 | } | 3840 | } |
3521 | } | 3841 | } |
3522 | 3842 | ||
3523 | if (index < (le16_to_cpu(el->l_next_free_rec) - 1)) { | 3843 | rec = NULL; |
3844 | if (index < (le16_to_cpu(el->l_next_free_rec) - 1)) | ||
3845 | rec = &el->l_recs[index + 1]; | ||
3846 | else if (le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count) && | ||
3847 | path->p_tree_depth > 0) { | ||
3848 | status = ocfs2_find_cpos_for_right_leaf(inode->i_sb, | ||
3849 | path, &right_cpos); | ||
3850 | if (status) | ||
3851 | goto out; | ||
3852 | |||
3853 | if (right_cpos == 0) | ||
3854 | goto out; | ||
3855 | |||
3856 | right_path = ocfs2_new_path(path_root_bh(path), | ||
3857 | path_root_el(path)); | ||
3858 | if (!right_path) | ||
3859 | goto out; | ||
3860 | |||
3861 | status = ocfs2_find_path(inode, right_path, right_cpos); | ||
3862 | if (status) | ||
3863 | goto out; | ||
3864 | |||
3865 | new_el = path_leaf_el(right_path); | ||
3866 | rec = &new_el->l_recs[0]; | ||
3867 | if (ocfs2_is_empty_extent(rec)) { | ||
3868 | if (le16_to_cpu(new_el->l_next_free_rec) <= 1) { | ||
3869 | bh = path_leaf_bh(right_path); | ||
3870 | eb = (struct ocfs2_extent_block *)bh->b_data; | ||
3871 | OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, | ||
3872 | eb); | ||
3873 | goto out; | ||
3874 | } | ||
3875 | rec = &new_el->l_recs[1]; | ||
3876 | } | ||
3877 | } | ||
3878 | |||
3879 | if (rec) { | ||
3524 | enum ocfs2_contig_type contig_type; | 3880 | enum ocfs2_contig_type contig_type; |
3525 | 3881 | ||
3526 | rec = &el->l_recs[index + 1]; | ||
3527 | contig_type = ocfs2_extent_contig(inode, rec, split_rec); | 3882 | contig_type = ocfs2_extent_contig(inode, rec, split_rec); |
3528 | 3883 | ||
3529 | if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT) | 3884 | if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT) |
@@ -3532,6 +3887,12 @@ ocfs2_figure_merge_contig_type(struct inode *inode, | |||
3532 | ret = contig_type; | 3887 | ret = contig_type; |
3533 | } | 3888 | } |
3534 | 3889 | ||
3890 | out: | ||
3891 | if (left_path) | ||
3892 | ocfs2_free_path(left_path); | ||
3893 | if (right_path) | ||
3894 | ocfs2_free_path(right_path); | ||
3895 | |||
3535 | return ret; | 3896 | return ret; |
3536 | } | 3897 | } |
3537 | 3898 | ||
@@ -3994,7 +4355,7 @@ static int __ocfs2_mark_extent_written(struct inode *inode, | |||
3994 | goto out; | 4355 | goto out; |
3995 | } | 4356 | } |
3996 | 4357 | ||
3997 | ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, el, | 4358 | ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, path, el, |
3998 | split_index, | 4359 | split_index, |
3999 | split_rec); | 4360 | split_rec); |
4000 | 4361 | ||
@@ -4788,6 +5149,8 @@ static void ocfs2_truncate_log_worker(struct work_struct *work) | |||
4788 | status = ocfs2_flush_truncate_log(osb); | 5149 | status = ocfs2_flush_truncate_log(osb); |
4789 | if (status < 0) | 5150 | if (status < 0) |
4790 | mlog_errno(status); | 5151 | mlog_errno(status); |
5152 | else | ||
5153 | ocfs2_init_inode_steal_slot(osb); | ||
4791 | 5154 | ||
4792 | mlog_exit(status); | 5155 | mlog_exit(status); |
4793 | } | 5156 | } |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 90383ed61005..17964c0505a9 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -467,11 +467,11 @@ handle_t *ocfs2_start_walk_page_trans(struct inode *inode, | |||
467 | unsigned to) | 467 | unsigned to) |
468 | { | 468 | { |
469 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 469 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
470 | handle_t *handle = NULL; | 470 | handle_t *handle; |
471 | int ret = 0; | 471 | int ret = 0; |
472 | 472 | ||
473 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | 473 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); |
474 | if (!handle) { | 474 | if (IS_ERR(handle)) { |
475 | ret = -ENOMEM; | 475 | ret = -ENOMEM; |
476 | mlog_errno(ret); | 476 | mlog_errno(ret); |
477 | goto out; | 477 | goto out; |
@@ -487,7 +487,7 @@ handle_t *ocfs2_start_walk_page_trans(struct inode *inode, | |||
487 | } | 487 | } |
488 | out: | 488 | out: |
489 | if (ret) { | 489 | if (ret) { |
490 | if (handle) | 490 | if (!IS_ERR(handle)) |
491 | ocfs2_commit_trans(osb, handle); | 491 | ocfs2_commit_trans(osb, handle); |
492 | handle = ERR_PTR(ret); | 492 | handle = ERR_PTR(ret); |
493 | } | 493 | } |
diff --git a/fs/ocfs2/cluster/Makefile b/fs/ocfs2/cluster/Makefile index cdd162f13650..bc8c5e7d8608 100644 --- a/fs/ocfs2/cluster/Makefile +++ b/fs/ocfs2/cluster/Makefile | |||
@@ -1,4 +1,4 @@ | |||
1 | obj-$(CONFIG_OCFS2_FS) += ocfs2_nodemanager.o | 1 | obj-$(CONFIG_OCFS2_FS) += ocfs2_nodemanager.o |
2 | 2 | ||
3 | ocfs2_nodemanager-objs := heartbeat.o masklog.o sys.o nodemanager.o \ | 3 | ocfs2_nodemanager-objs := heartbeat.o masklog.o sys.o nodemanager.o \ |
4 | quorum.o tcp.o ver.o | 4 | quorum.o tcp.o netdebug.o ver.o |
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c new file mode 100644 index 000000000000..7bf3c0ea7bd9 --- /dev/null +++ b/fs/ocfs2/cluster/netdebug.c | |||
@@ -0,0 +1,441 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * netdebug.c | ||
5 | * | ||
6 | * debug functionality for o2net | ||
7 | * | ||
8 | * Copyright (C) 2005, 2008 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public | ||
21 | * License along with this program; if not, write to the | ||
22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
23 | * Boston, MA 021110-1307, USA. | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | #ifdef CONFIG_DEBUG_FS | ||
28 | |||
29 | #include <linux/module.h> | ||
30 | #include <linux/types.h> | ||
31 | #include <linux/slab.h> | ||
32 | #include <linux/idr.h> | ||
33 | #include <linux/kref.h> | ||
34 | #include <linux/seq_file.h> | ||
35 | #include <linux/debugfs.h> | ||
36 | |||
37 | #include <linux/uaccess.h> | ||
38 | |||
39 | #include "tcp.h" | ||
40 | #include "nodemanager.h" | ||
41 | #define MLOG_MASK_PREFIX ML_TCP | ||
42 | #include "masklog.h" | ||
43 | |||
44 | #include "tcp_internal.h" | ||
45 | |||
46 | #define O2NET_DEBUG_DIR "o2net" | ||
47 | #define SC_DEBUG_NAME "sock_containers" | ||
48 | #define NST_DEBUG_NAME "send_tracking" | ||
49 | |||
50 | static struct dentry *o2net_dentry; | ||
51 | static struct dentry *sc_dentry; | ||
52 | static struct dentry *nst_dentry; | ||
53 | |||
54 | static DEFINE_SPINLOCK(o2net_debug_lock); | ||
55 | |||
56 | static LIST_HEAD(sock_containers); | ||
57 | static LIST_HEAD(send_tracking); | ||
58 | |||
59 | void o2net_debug_add_nst(struct o2net_send_tracking *nst) | ||
60 | { | ||
61 | spin_lock(&o2net_debug_lock); | ||
62 | list_add(&nst->st_net_debug_item, &send_tracking); | ||
63 | spin_unlock(&o2net_debug_lock); | ||
64 | } | ||
65 | |||
66 | void o2net_debug_del_nst(struct o2net_send_tracking *nst) | ||
67 | { | ||
68 | spin_lock(&o2net_debug_lock); | ||
69 | if (!list_empty(&nst->st_net_debug_item)) | ||
70 | list_del_init(&nst->st_net_debug_item); | ||
71 | spin_unlock(&o2net_debug_lock); | ||
72 | } | ||
73 | |||
74 | static struct o2net_send_tracking | ||
75 | *next_nst(struct o2net_send_tracking *nst_start) | ||
76 | { | ||
77 | struct o2net_send_tracking *nst, *ret = NULL; | ||
78 | |||
79 | assert_spin_locked(&o2net_debug_lock); | ||
80 | |||
81 | list_for_each_entry(nst, &nst_start->st_net_debug_item, | ||
82 | st_net_debug_item) { | ||
83 | /* discover the head of the list */ | ||
84 | if (&nst->st_net_debug_item == &send_tracking) | ||
85 | break; | ||
86 | |||
87 | /* use st_task to detect real nsts in the list */ | ||
88 | if (nst->st_task != NULL) { | ||
89 | ret = nst; | ||
90 | break; | ||
91 | } | ||
92 | } | ||
93 | |||
94 | return ret; | ||
95 | } | ||
96 | |||
97 | static void *nst_seq_start(struct seq_file *seq, loff_t *pos) | ||
98 | { | ||
99 | struct o2net_send_tracking *nst, *dummy_nst = seq->private; | ||
100 | |||
101 | spin_lock(&o2net_debug_lock); | ||
102 | nst = next_nst(dummy_nst); | ||
103 | spin_unlock(&o2net_debug_lock); | ||
104 | |||
105 | return nst; | ||
106 | } | ||
107 | |||
108 | static void *nst_seq_next(struct seq_file *seq, void *v, loff_t *pos) | ||
109 | { | ||
110 | struct o2net_send_tracking *nst, *dummy_nst = seq->private; | ||
111 | |||
112 | spin_lock(&o2net_debug_lock); | ||
113 | nst = next_nst(dummy_nst); | ||
114 | list_del_init(&dummy_nst->st_net_debug_item); | ||
115 | if (nst) | ||
116 | list_add(&dummy_nst->st_net_debug_item, | ||
117 | &nst->st_net_debug_item); | ||
118 | spin_unlock(&o2net_debug_lock); | ||
119 | |||
120 | return nst; /* unused, just needs to be null when done */ | ||
121 | } | ||
122 | |||
123 | static int nst_seq_show(struct seq_file *seq, void *v) | ||
124 | { | ||
125 | struct o2net_send_tracking *nst, *dummy_nst = seq->private; | ||
126 | |||
127 | spin_lock(&o2net_debug_lock); | ||
128 | nst = next_nst(dummy_nst); | ||
129 | |||
130 | if (nst != NULL) { | ||
131 | /* get_task_comm isn't exported. oh well. */ | ||
132 | seq_printf(seq, "%p:\n" | ||
133 | " pid: %lu\n" | ||
134 | " tgid: %lu\n" | ||
135 | " process name: %s\n" | ||
136 | " node: %u\n" | ||
137 | " sc: %p\n" | ||
138 | " message id: %d\n" | ||
139 | " message type: %u\n" | ||
140 | " message key: 0x%08x\n" | ||
141 | " sock acquiry: %lu.%lu\n" | ||
142 | " send start: %lu.%lu\n" | ||
143 | " wait start: %lu.%lu\n", | ||
144 | nst, (unsigned long)nst->st_task->pid, | ||
145 | (unsigned long)nst->st_task->tgid, | ||
146 | nst->st_task->comm, nst->st_node, | ||
147 | nst->st_sc, nst->st_id, nst->st_msg_type, | ||
148 | nst->st_msg_key, | ||
149 | nst->st_sock_time.tv_sec, nst->st_sock_time.tv_usec, | ||
150 | nst->st_send_time.tv_sec, nst->st_send_time.tv_usec, | ||
151 | nst->st_status_time.tv_sec, | ||
152 | nst->st_status_time.tv_usec); | ||
153 | } | ||
154 | |||
155 | spin_unlock(&o2net_debug_lock); | ||
156 | |||
157 | return 0; | ||
158 | } | ||
159 | |||
160 | static void nst_seq_stop(struct seq_file *seq, void *v) | ||
161 | { | ||
162 | } | ||
163 | |||
164 | static struct seq_operations nst_seq_ops = { | ||
165 | .start = nst_seq_start, | ||
166 | .next = nst_seq_next, | ||
167 | .stop = nst_seq_stop, | ||
168 | .show = nst_seq_show, | ||
169 | }; | ||
170 | |||
171 | static int nst_fop_open(struct inode *inode, struct file *file) | ||
172 | { | ||
173 | struct o2net_send_tracking *dummy_nst; | ||
174 | struct seq_file *seq; | ||
175 | int ret; | ||
176 | |||
177 | dummy_nst = kmalloc(sizeof(struct o2net_send_tracking), GFP_KERNEL); | ||
178 | if (dummy_nst == NULL) { | ||
179 | ret = -ENOMEM; | ||
180 | goto out; | ||
181 | } | ||
182 | dummy_nst->st_task = NULL; | ||
183 | |||
184 | ret = seq_open(file, &nst_seq_ops); | ||
185 | if (ret) | ||
186 | goto out; | ||
187 | |||
188 | seq = file->private_data; | ||
189 | seq->private = dummy_nst; | ||
190 | o2net_debug_add_nst(dummy_nst); | ||
191 | |||
192 | dummy_nst = NULL; | ||
193 | |||
194 | out: | ||
195 | kfree(dummy_nst); | ||
196 | return ret; | ||
197 | } | ||
198 | |||
199 | static int nst_fop_release(struct inode *inode, struct file *file) | ||
200 | { | ||
201 | struct seq_file *seq = file->private_data; | ||
202 | struct o2net_send_tracking *dummy_nst = seq->private; | ||
203 | |||
204 | o2net_debug_del_nst(dummy_nst); | ||
205 | return seq_release_private(inode, file); | ||
206 | } | ||
207 | |||
208 | static struct file_operations nst_seq_fops = { | ||
209 | .open = nst_fop_open, | ||
210 | .read = seq_read, | ||
211 | .llseek = seq_lseek, | ||
212 | .release = nst_fop_release, | ||
213 | }; | ||
214 | |||
215 | void o2net_debug_add_sc(struct o2net_sock_container *sc) | ||
216 | { | ||
217 | spin_lock(&o2net_debug_lock); | ||
218 | list_add(&sc->sc_net_debug_item, &sock_containers); | ||
219 | spin_unlock(&o2net_debug_lock); | ||
220 | } | ||
221 | |||
222 | void o2net_debug_del_sc(struct o2net_sock_container *sc) | ||
223 | { | ||
224 | spin_lock(&o2net_debug_lock); | ||
225 | list_del_init(&sc->sc_net_debug_item); | ||
226 | spin_unlock(&o2net_debug_lock); | ||
227 | } | ||
228 | |||
229 | static struct o2net_sock_container | ||
230 | *next_sc(struct o2net_sock_container *sc_start) | ||
231 | { | ||
232 | struct o2net_sock_container *sc, *ret = NULL; | ||
233 | |||
234 | assert_spin_locked(&o2net_debug_lock); | ||
235 | |||
236 | list_for_each_entry(sc, &sc_start->sc_net_debug_item, | ||
237 | sc_net_debug_item) { | ||
238 | /* discover the head of the list miscast as a sc */ | ||
239 | if (&sc->sc_net_debug_item == &sock_containers) | ||
240 | break; | ||
241 | |||
242 | /* use sc_page to detect real scs in the list */ | ||
243 | if (sc->sc_page != NULL) { | ||
244 | ret = sc; | ||
245 | break; | ||
246 | } | ||
247 | } | ||
248 | |||
249 | return ret; | ||
250 | } | ||
251 | |||
252 | static void *sc_seq_start(struct seq_file *seq, loff_t *pos) | ||
253 | { | ||
254 | struct o2net_sock_container *sc, *dummy_sc = seq->private; | ||
255 | |||
256 | spin_lock(&o2net_debug_lock); | ||
257 | sc = next_sc(dummy_sc); | ||
258 | spin_unlock(&o2net_debug_lock); | ||
259 | |||
260 | return sc; | ||
261 | } | ||
262 | |||
263 | static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos) | ||
264 | { | ||
265 | struct o2net_sock_container *sc, *dummy_sc = seq->private; | ||
266 | |||
267 | spin_lock(&o2net_debug_lock); | ||
268 | sc = next_sc(dummy_sc); | ||
269 | list_del_init(&dummy_sc->sc_net_debug_item); | ||
270 | if (sc) | ||
271 | list_add(&dummy_sc->sc_net_debug_item, &sc->sc_net_debug_item); | ||
272 | spin_unlock(&o2net_debug_lock); | ||
273 | |||
274 | return sc; /* unused, just needs to be null when done */ | ||
275 | } | ||
276 | |||
277 | #define TV_SEC_USEC(TV) TV.tv_sec, TV.tv_usec | ||
278 | |||
279 | static int sc_seq_show(struct seq_file *seq, void *v) | ||
280 | { | ||
281 | struct o2net_sock_container *sc, *dummy_sc = seq->private; | ||
282 | |||
283 | spin_lock(&o2net_debug_lock); | ||
284 | sc = next_sc(dummy_sc); | ||
285 | |||
286 | if (sc != NULL) { | ||
287 | struct inet_sock *inet = NULL; | ||
288 | |||
289 | __be32 saddr = 0, daddr = 0; | ||
290 | __be16 sport = 0, dport = 0; | ||
291 | |||
292 | if (sc->sc_sock) { | ||
293 | inet = inet_sk(sc->sc_sock->sk); | ||
294 | /* the stack's structs aren't sparse endian clean */ | ||
295 | saddr = (__force __be32)inet->saddr; | ||
296 | daddr = (__force __be32)inet->daddr; | ||
297 | sport = (__force __be16)inet->sport; | ||
298 | dport = (__force __be16)inet->dport; | ||
299 | } | ||
300 | |||
301 | /* XXX sigh, inet-> doesn't have sparse annotation so any | ||
302 | * use of it here generates a warning with -Wbitwise */ | ||
303 | seq_printf(seq, "%p:\n" | ||
304 | " krefs: %d\n" | ||
305 | " sock: %u.%u.%u.%u:%u -> " | ||
306 | "%u.%u.%u.%u:%u\n" | ||
307 | " remote node: %s\n" | ||
308 | " page off: %zu\n" | ||
309 | " handshake ok: %u\n" | ||
310 | " timer: %lu.%lu\n" | ||
311 | " data ready: %lu.%lu\n" | ||
312 | " advance start: %lu.%lu\n" | ||
313 | " advance stop: %lu.%lu\n" | ||
314 | " func start: %lu.%lu\n" | ||
315 | " func stop: %lu.%lu\n" | ||
316 | " func key: %u\n" | ||
317 | " func type: %u\n", | ||
318 | sc, | ||
319 | atomic_read(&sc->sc_kref.refcount), | ||
320 | NIPQUAD(saddr), inet ? ntohs(sport) : 0, | ||
321 | NIPQUAD(daddr), inet ? ntohs(dport) : 0, | ||
322 | sc->sc_node->nd_name, | ||
323 | sc->sc_page_off, | ||
324 | sc->sc_handshake_ok, | ||
325 | TV_SEC_USEC(sc->sc_tv_timer), | ||
326 | TV_SEC_USEC(sc->sc_tv_data_ready), | ||
327 | TV_SEC_USEC(sc->sc_tv_advance_start), | ||
328 | TV_SEC_USEC(sc->sc_tv_advance_stop), | ||
329 | TV_SEC_USEC(sc->sc_tv_func_start), | ||
330 | TV_SEC_USEC(sc->sc_tv_func_stop), | ||
331 | sc->sc_msg_key, | ||
332 | sc->sc_msg_type); | ||
333 | } | ||
334 | |||
335 | |||
336 | spin_unlock(&o2net_debug_lock); | ||
337 | |||
338 | return 0; | ||
339 | } | ||
340 | |||
341 | static void sc_seq_stop(struct seq_file *seq, void *v) | ||
342 | { | ||
343 | } | ||
344 | |||
345 | static struct seq_operations sc_seq_ops = { | ||
346 | .start = sc_seq_start, | ||
347 | .next = sc_seq_next, | ||
348 | .stop = sc_seq_stop, | ||
349 | .show = sc_seq_show, | ||
350 | }; | ||
351 | |||
352 | static int sc_fop_open(struct inode *inode, struct file *file) | ||
353 | { | ||
354 | struct o2net_sock_container *dummy_sc; | ||
355 | struct seq_file *seq; | ||
356 | int ret; | ||
357 | |||
358 | dummy_sc = kmalloc(sizeof(struct o2net_sock_container), GFP_KERNEL); | ||
359 | if (dummy_sc == NULL) { | ||
360 | ret = -ENOMEM; | ||
361 | goto out; | ||
362 | } | ||
363 | dummy_sc->sc_page = NULL; | ||
364 | |||
365 | ret = seq_open(file, &sc_seq_ops); | ||
366 | if (ret) | ||
367 | goto out; | ||
368 | |||
369 | seq = file->private_data; | ||
370 | seq->private = dummy_sc; | ||
371 | o2net_debug_add_sc(dummy_sc); | ||
372 | |||
373 | dummy_sc = NULL; | ||
374 | |||
375 | out: | ||
376 | kfree(dummy_sc); | ||
377 | return ret; | ||
378 | } | ||
379 | |||
380 | static int sc_fop_release(struct inode *inode, struct file *file) | ||
381 | { | ||
382 | struct seq_file *seq = file->private_data; | ||
383 | struct o2net_sock_container *dummy_sc = seq->private; | ||
384 | |||
385 | o2net_debug_del_sc(dummy_sc); | ||
386 | return seq_release_private(inode, file); | ||
387 | } | ||
388 | |||
389 | static struct file_operations sc_seq_fops = { | ||
390 | .open = sc_fop_open, | ||
391 | .read = seq_read, | ||
392 | .llseek = seq_lseek, | ||
393 | .release = sc_fop_release, | ||
394 | }; | ||
395 | |||
396 | int o2net_debugfs_init(void) | ||
397 | { | ||
398 | o2net_dentry = debugfs_create_dir(O2NET_DEBUG_DIR, NULL); | ||
399 | if (!o2net_dentry) { | ||
400 | mlog_errno(-ENOMEM); | ||
401 | goto bail; | ||
402 | } | ||
403 | |||
404 | nst_dentry = debugfs_create_file(NST_DEBUG_NAME, S_IFREG|S_IRUSR, | ||
405 | o2net_dentry, NULL, | ||
406 | &nst_seq_fops); | ||
407 | if (!nst_dentry) { | ||
408 | mlog_errno(-ENOMEM); | ||
409 | goto bail; | ||
410 | } | ||
411 | |||
412 | sc_dentry = debugfs_create_file(SC_DEBUG_NAME, S_IFREG|S_IRUSR, | ||
413 | o2net_dentry, NULL, | ||
414 | &sc_seq_fops); | ||
415 | if (!sc_dentry) { | ||
416 | mlog_errno(-ENOMEM); | ||
417 | goto bail; | ||
418 | } | ||
419 | |||
420 | return 0; | ||
421 | bail: | ||
422 | if (sc_dentry) | ||
423 | debugfs_remove(sc_dentry); | ||
424 | if (nst_dentry) | ||
425 | debugfs_remove(nst_dentry); | ||
426 | if (o2net_dentry) | ||
427 | debugfs_remove(o2net_dentry); | ||
428 | return -ENOMEM; | ||
429 | } | ||
430 | |||
431 | void o2net_debugfs_exit(void) | ||
432 | { | ||
433 | if (sc_dentry) | ||
434 | debugfs_remove(sc_dentry); | ||
435 | if (nst_dentry) | ||
436 | debugfs_remove(nst_dentry); | ||
437 | if (o2net_dentry) | ||
438 | debugfs_remove(o2net_dentry); | ||
439 | } | ||
440 | |||
441 | #endif /* CONFIG_DEBUG_FS */ | ||
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index 709fba25bf7e..cf9401e8cd0b 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c | |||
@@ -959,7 +959,10 @@ static int __init init_o2nm(void) | |||
959 | cluster_print_version(); | 959 | cluster_print_version(); |
960 | 960 | ||
961 | o2hb_init(); | 961 | o2hb_init(); |
962 | o2net_init(); | 962 | |
963 | ret = o2net_init(); | ||
964 | if (ret) | ||
965 | goto out; | ||
963 | 966 | ||
964 | ocfs2_table_header = register_sysctl_table(ocfs2_root_table); | 967 | ocfs2_table_header = register_sysctl_table(ocfs2_root_table); |
965 | if (!ocfs2_table_header) { | 968 | if (!ocfs2_table_header) { |
diff --git a/fs/ocfs2/cluster/sys.c b/fs/ocfs2/cluster/sys.c index 0c095ce7723d..98429fd68499 100644 --- a/fs/ocfs2/cluster/sys.c +++ b/fs/ocfs2/cluster/sys.c | |||
@@ -57,6 +57,7 @@ static struct kset *o2cb_kset; | |||
57 | void o2cb_sys_shutdown(void) | 57 | void o2cb_sys_shutdown(void) |
58 | { | 58 | { |
59 | mlog_sys_shutdown(); | 59 | mlog_sys_shutdown(); |
60 | sysfs_remove_link(NULL, "o2cb"); | ||
60 | kset_unregister(o2cb_kset); | 61 | kset_unregister(o2cb_kset); |
61 | } | 62 | } |
62 | 63 | ||
@@ -68,6 +69,14 @@ int o2cb_sys_init(void) | |||
68 | if (!o2cb_kset) | 69 | if (!o2cb_kset) |
69 | return -ENOMEM; | 70 | return -ENOMEM; |
70 | 71 | ||
72 | /* | ||
73 | * Create this symlink for backwards compatibility with old | ||
74 | * versions of ocfs2-tools which look for things in /sys/o2cb. | ||
75 | */ | ||
76 | ret = sysfs_create_link(NULL, &o2cb_kset->kobj, "o2cb"); | ||
77 | if (ret) | ||
78 | goto error; | ||
79 | |||
71 | ret = sysfs_create_group(&o2cb_kset->kobj, &o2cb_attr_group); | 80 | ret = sysfs_create_group(&o2cb_kset->kobj, &o2cb_attr_group); |
72 | if (ret) | 81 | if (ret) |
73 | goto error; | 82 | goto error; |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index b8057c51b205..1e44ad14881a 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
@@ -142,23 +142,65 @@ static void o2net_idle_timer(unsigned long data); | |||
142 | static void o2net_sc_postpone_idle(struct o2net_sock_container *sc); | 142 | static void o2net_sc_postpone_idle(struct o2net_sock_container *sc); |
143 | static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc); | 143 | static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc); |
144 | 144 | ||
145 | /* | 145 | static void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, |
146 | * FIXME: These should use to_o2nm_cluster_from_node(), but we end up | 146 | u32 msgkey, struct task_struct *task, u8 node) |
147 | * losing our parent link to the cluster during shutdown. This can be | 147 | { |
148 | * solved by adding a pre-removal callback to configfs, or passing | 148 | #ifdef CONFIG_DEBUG_FS |
149 | * around the cluster with the node. -jeffm | 149 | INIT_LIST_HEAD(&nst->st_net_debug_item); |
150 | */ | 150 | nst->st_task = task; |
151 | static inline int o2net_reconnect_delay(struct o2nm_node *node) | 151 | nst->st_msg_type = msgtype; |
152 | nst->st_msg_key = msgkey; | ||
153 | nst->st_node = node; | ||
154 | #endif | ||
155 | } | ||
156 | |||
157 | static void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) | ||
158 | { | ||
159 | #ifdef CONFIG_DEBUG_FS | ||
160 | do_gettimeofday(&nst->st_sock_time); | ||
161 | #endif | ||
162 | } | ||
163 | |||
164 | static void o2net_set_nst_send_time(struct o2net_send_tracking *nst) | ||
165 | { | ||
166 | #ifdef CONFIG_DEBUG_FS | ||
167 | do_gettimeofday(&nst->st_send_time); | ||
168 | #endif | ||
169 | } | ||
170 | |||
171 | static void o2net_set_nst_status_time(struct o2net_send_tracking *nst) | ||
172 | { | ||
173 | #ifdef CONFIG_DEBUG_FS | ||
174 | do_gettimeofday(&nst->st_status_time); | ||
175 | #endif | ||
176 | } | ||
177 | |||
178 | static void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, | ||
179 | struct o2net_sock_container *sc) | ||
180 | { | ||
181 | #ifdef CONFIG_DEBUG_FS | ||
182 | nst->st_sc = sc; | ||
183 | #endif | ||
184 | } | ||
185 | |||
186 | static void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id) | ||
187 | { | ||
188 | #ifdef CONFIG_DEBUG_FS | ||
189 | nst->st_id = msg_id; | ||
190 | #endif | ||
191 | } | ||
192 | |||
193 | static inline int o2net_reconnect_delay(void) | ||
152 | { | 194 | { |
153 | return o2nm_single_cluster->cl_reconnect_delay_ms; | 195 | return o2nm_single_cluster->cl_reconnect_delay_ms; |
154 | } | 196 | } |
155 | 197 | ||
156 | static inline int o2net_keepalive_delay(struct o2nm_node *node) | 198 | static inline int o2net_keepalive_delay(void) |
157 | { | 199 | { |
158 | return o2nm_single_cluster->cl_keepalive_delay_ms; | 200 | return o2nm_single_cluster->cl_keepalive_delay_ms; |
159 | } | 201 | } |
160 | 202 | ||
161 | static inline int o2net_idle_timeout(struct o2nm_node *node) | 203 | static inline int o2net_idle_timeout(void) |
162 | { | 204 | { |
163 | return o2nm_single_cluster->cl_idle_timeout_ms; | 205 | return o2nm_single_cluster->cl_idle_timeout_ms; |
164 | } | 206 | } |
@@ -296,6 +338,7 @@ static void sc_kref_release(struct kref *kref) | |||
296 | o2nm_node_put(sc->sc_node); | 338 | o2nm_node_put(sc->sc_node); |
297 | sc->sc_node = NULL; | 339 | sc->sc_node = NULL; |
298 | 340 | ||
341 | o2net_debug_del_sc(sc); | ||
299 | kfree(sc); | 342 | kfree(sc); |
300 | } | 343 | } |
301 | 344 | ||
@@ -336,6 +379,7 @@ static struct o2net_sock_container *sc_alloc(struct o2nm_node *node) | |||
336 | 379 | ||
337 | ret = sc; | 380 | ret = sc; |
338 | sc->sc_page = page; | 381 | sc->sc_page = page; |
382 | o2net_debug_add_sc(sc); | ||
339 | sc = NULL; | 383 | sc = NULL; |
340 | page = NULL; | 384 | page = NULL; |
341 | 385 | ||
@@ -399,8 +443,6 @@ static void o2net_set_nn_state(struct o2net_node *nn, | |||
399 | mlog_bug_on_msg(err && valid, "err %d valid %u\n", err, valid); | 443 | mlog_bug_on_msg(err && valid, "err %d valid %u\n", err, valid); |
400 | mlog_bug_on_msg(valid && !sc, "valid %u sc %p\n", valid, sc); | 444 | mlog_bug_on_msg(valid && !sc, "valid %u sc %p\n", valid, sc); |
401 | 445 | ||
402 | /* we won't reconnect after our valid conn goes away for | ||
403 | * this hb iteration.. here so it shows up in the logs */ | ||
404 | if (was_valid && !valid && err == 0) | 446 | if (was_valid && !valid && err == 0) |
405 | err = -ENOTCONN; | 447 | err = -ENOTCONN; |
406 | 448 | ||
@@ -430,11 +472,6 @@ static void o2net_set_nn_state(struct o2net_node *nn, | |||
430 | 472 | ||
431 | if (!was_valid && valid) { | 473 | if (!was_valid && valid) { |
432 | o2quo_conn_up(o2net_num_from_nn(nn)); | 474 | o2quo_conn_up(o2net_num_from_nn(nn)); |
433 | /* this is a bit of a hack. we only try reconnecting | ||
434 | * when heartbeating starts until we get a connection. | ||
435 | * if that connection then dies we don't try reconnecting. | ||
436 | * the only way to start connecting again is to down | ||
437 | * heartbeat and bring it back up. */ | ||
438 | cancel_delayed_work(&nn->nn_connect_expired); | 475 | cancel_delayed_work(&nn->nn_connect_expired); |
439 | printk(KERN_INFO "o2net: %s " SC_NODEF_FMT "\n", | 476 | printk(KERN_INFO "o2net: %s " SC_NODEF_FMT "\n", |
440 | o2nm_this_node() > sc->sc_node->nd_num ? | 477 | o2nm_this_node() > sc->sc_node->nd_num ? |
@@ -451,12 +488,24 @@ static void o2net_set_nn_state(struct o2net_node *nn, | |||
451 | /* delay if we're withing a RECONNECT_DELAY of the | 488 | /* delay if we're withing a RECONNECT_DELAY of the |
452 | * last attempt */ | 489 | * last attempt */ |
453 | delay = (nn->nn_last_connect_attempt + | 490 | delay = (nn->nn_last_connect_attempt + |
454 | msecs_to_jiffies(o2net_reconnect_delay(NULL))) | 491 | msecs_to_jiffies(o2net_reconnect_delay())) |
455 | - jiffies; | 492 | - jiffies; |
456 | if (delay > msecs_to_jiffies(o2net_reconnect_delay(NULL))) | 493 | if (delay > msecs_to_jiffies(o2net_reconnect_delay())) |
457 | delay = 0; | 494 | delay = 0; |
458 | mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay); | 495 | mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay); |
459 | queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay); | 496 | queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay); |
497 | |||
498 | /* | ||
499 | * Delay the expired work after idle timeout. | ||
500 | * | ||
501 | * We might have lots of failed connection attempts that run | ||
502 | * through here but we only cancel the connect_expired work when | ||
503 | * a connection attempt succeeds. So only the first enqueue of | ||
504 | * the connect_expired work will do anything. The rest will see | ||
505 | * that it's already queued and do nothing. | ||
506 | */ | ||
507 | delay += msecs_to_jiffies(o2net_idle_timeout()); | ||
508 | queue_delayed_work(o2net_wq, &nn->nn_connect_expired, delay); | ||
460 | } | 509 | } |
461 | 510 | ||
462 | /* keep track of the nn's sc ref for the caller */ | 511 | /* keep track of the nn's sc ref for the caller */ |
@@ -914,6 +963,9 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, | |||
914 | struct o2net_status_wait nsw = { | 963 | struct o2net_status_wait nsw = { |
915 | .ns_node_item = LIST_HEAD_INIT(nsw.ns_node_item), | 964 | .ns_node_item = LIST_HEAD_INIT(nsw.ns_node_item), |
916 | }; | 965 | }; |
966 | struct o2net_send_tracking nst; | ||
967 | |||
968 | o2net_init_nst(&nst, msg_type, key, current, target_node); | ||
917 | 969 | ||
918 | if (o2net_wq == NULL) { | 970 | if (o2net_wq == NULL) { |
919 | mlog(0, "attempt to tx without o2netd running\n"); | 971 | mlog(0, "attempt to tx without o2netd running\n"); |
@@ -939,6 +991,10 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, | |||
939 | goto out; | 991 | goto out; |
940 | } | 992 | } |
941 | 993 | ||
994 | o2net_debug_add_nst(&nst); | ||
995 | |||
996 | o2net_set_nst_sock_time(&nst); | ||
997 | |||
942 | ret = wait_event_interruptible(nn->nn_sc_wq, | 998 | ret = wait_event_interruptible(nn->nn_sc_wq, |
943 | o2net_tx_can_proceed(nn, &sc, &error)); | 999 | o2net_tx_can_proceed(nn, &sc, &error)); |
944 | if (!ret && error) | 1000 | if (!ret && error) |
@@ -946,6 +1002,8 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, | |||
946 | if (ret) | 1002 | if (ret) |
947 | goto out; | 1003 | goto out; |
948 | 1004 | ||
1005 | o2net_set_nst_sock_container(&nst, sc); | ||
1006 | |||
949 | veclen = caller_veclen + 1; | 1007 | veclen = caller_veclen + 1; |
950 | vec = kmalloc(sizeof(struct kvec) * veclen, GFP_ATOMIC); | 1008 | vec = kmalloc(sizeof(struct kvec) * veclen, GFP_ATOMIC); |
951 | if (vec == NULL) { | 1009 | if (vec == NULL) { |
@@ -972,6 +1030,9 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, | |||
972 | goto out; | 1030 | goto out; |
973 | 1031 | ||
974 | msg->msg_num = cpu_to_be32(nsw.ns_id); | 1032 | msg->msg_num = cpu_to_be32(nsw.ns_id); |
1033 | o2net_set_nst_msg_id(&nst, nsw.ns_id); | ||
1034 | |||
1035 | o2net_set_nst_send_time(&nst); | ||
975 | 1036 | ||
976 | /* finally, convert the message header to network byte-order | 1037 | /* finally, convert the message header to network byte-order |
977 | * and send */ | 1038 | * and send */ |
@@ -986,6 +1047,7 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, | |||
986 | } | 1047 | } |
987 | 1048 | ||
988 | /* wait on other node's handler */ | 1049 | /* wait on other node's handler */ |
1050 | o2net_set_nst_status_time(&nst); | ||
989 | wait_event(nsw.ns_wq, o2net_nsw_completed(nn, &nsw)); | 1051 | wait_event(nsw.ns_wq, o2net_nsw_completed(nn, &nsw)); |
990 | 1052 | ||
991 | /* Note that we avoid overwriting the callers status return | 1053 | /* Note that we avoid overwriting the callers status return |
@@ -998,6 +1060,7 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, | |||
998 | mlog(0, "woken, returning system status %d, user status %d\n", | 1060 | mlog(0, "woken, returning system status %d, user status %d\n", |
999 | ret, nsw.ns_status); | 1061 | ret, nsw.ns_status); |
1000 | out: | 1062 | out: |
1063 | o2net_debug_del_nst(&nst); /* must be before dropping sc and node */ | ||
1001 | if (sc) | 1064 | if (sc) |
1002 | sc_put(sc); | 1065 | sc_put(sc); |
1003 | if (vec) | 1066 | if (vec) |
@@ -1154,23 +1217,23 @@ static int o2net_check_handshake(struct o2net_sock_container *sc) | |||
1154 | * but isn't. This can ultimately cause corruption. | 1217 | * but isn't. This can ultimately cause corruption. |
1155 | */ | 1218 | */ |
1156 | if (be32_to_cpu(hand->o2net_idle_timeout_ms) != | 1219 | if (be32_to_cpu(hand->o2net_idle_timeout_ms) != |
1157 | o2net_idle_timeout(sc->sc_node)) { | 1220 | o2net_idle_timeout()) { |
1158 | mlog(ML_NOTICE, SC_NODEF_FMT " uses a network idle timeout of " | 1221 | mlog(ML_NOTICE, SC_NODEF_FMT " uses a network idle timeout of " |
1159 | "%u ms, but we use %u ms locally. disconnecting\n", | 1222 | "%u ms, but we use %u ms locally. disconnecting\n", |
1160 | SC_NODEF_ARGS(sc), | 1223 | SC_NODEF_ARGS(sc), |
1161 | be32_to_cpu(hand->o2net_idle_timeout_ms), | 1224 | be32_to_cpu(hand->o2net_idle_timeout_ms), |
1162 | o2net_idle_timeout(sc->sc_node)); | 1225 | o2net_idle_timeout()); |
1163 | o2net_ensure_shutdown(nn, sc, -ENOTCONN); | 1226 | o2net_ensure_shutdown(nn, sc, -ENOTCONN); |
1164 | return -1; | 1227 | return -1; |
1165 | } | 1228 | } |
1166 | 1229 | ||
1167 | if (be32_to_cpu(hand->o2net_keepalive_delay_ms) != | 1230 | if (be32_to_cpu(hand->o2net_keepalive_delay_ms) != |
1168 | o2net_keepalive_delay(sc->sc_node)) { | 1231 | o2net_keepalive_delay()) { |
1169 | mlog(ML_NOTICE, SC_NODEF_FMT " uses a keepalive delay of " | 1232 | mlog(ML_NOTICE, SC_NODEF_FMT " uses a keepalive delay of " |
1170 | "%u ms, but we use %u ms locally. disconnecting\n", | 1233 | "%u ms, but we use %u ms locally. disconnecting\n", |
1171 | SC_NODEF_ARGS(sc), | 1234 | SC_NODEF_ARGS(sc), |
1172 | be32_to_cpu(hand->o2net_keepalive_delay_ms), | 1235 | be32_to_cpu(hand->o2net_keepalive_delay_ms), |
1173 | o2net_keepalive_delay(sc->sc_node)); | 1236 | o2net_keepalive_delay()); |
1174 | o2net_ensure_shutdown(nn, sc, -ENOTCONN); | 1237 | o2net_ensure_shutdown(nn, sc, -ENOTCONN); |
1175 | return -1; | 1238 | return -1; |
1176 | } | 1239 | } |
@@ -1193,6 +1256,7 @@ static int o2net_check_handshake(struct o2net_sock_container *sc) | |||
1193 | * shut down already */ | 1256 | * shut down already */ |
1194 | if (nn->nn_sc == sc) { | 1257 | if (nn->nn_sc == sc) { |
1195 | o2net_sc_reset_idle_timer(sc); | 1258 | o2net_sc_reset_idle_timer(sc); |
1259 | atomic_set(&nn->nn_timeout, 0); | ||
1196 | o2net_set_nn_state(nn, sc, 1, 0); | 1260 | o2net_set_nn_state(nn, sc, 1, 0); |
1197 | } | 1261 | } |
1198 | spin_unlock(&nn->nn_lock); | 1262 | spin_unlock(&nn->nn_lock); |
@@ -1347,12 +1411,11 @@ static void o2net_initialize_handshake(void) | |||
1347 | { | 1411 | { |
1348 | o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32( | 1412 | o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32( |
1349 | O2HB_MAX_WRITE_TIMEOUT_MS); | 1413 | O2HB_MAX_WRITE_TIMEOUT_MS); |
1350 | o2net_hand->o2net_idle_timeout_ms = cpu_to_be32( | 1414 | o2net_hand->o2net_idle_timeout_ms = cpu_to_be32(o2net_idle_timeout()); |
1351 | o2net_idle_timeout(NULL)); | ||
1352 | o2net_hand->o2net_keepalive_delay_ms = cpu_to_be32( | 1415 | o2net_hand->o2net_keepalive_delay_ms = cpu_to_be32( |
1353 | o2net_keepalive_delay(NULL)); | 1416 | o2net_keepalive_delay()); |
1354 | o2net_hand->o2net_reconnect_delay_ms = cpu_to_be32( | 1417 | o2net_hand->o2net_reconnect_delay_ms = cpu_to_be32( |
1355 | o2net_reconnect_delay(NULL)); | 1418 | o2net_reconnect_delay()); |
1356 | } | 1419 | } |
1357 | 1420 | ||
1358 | /* ------------------------------------------------------------ */ | 1421 | /* ------------------------------------------------------------ */ |
@@ -1391,14 +1454,15 @@ static void o2net_sc_send_keep_req(struct work_struct *work) | |||
1391 | static void o2net_idle_timer(unsigned long data) | 1454 | static void o2net_idle_timer(unsigned long data) |
1392 | { | 1455 | { |
1393 | struct o2net_sock_container *sc = (struct o2net_sock_container *)data; | 1456 | struct o2net_sock_container *sc = (struct o2net_sock_container *)data; |
1457 | struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); | ||
1394 | struct timeval now; | 1458 | struct timeval now; |
1395 | 1459 | ||
1396 | do_gettimeofday(&now); | 1460 | do_gettimeofday(&now); |
1397 | 1461 | ||
1398 | printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u " | 1462 | printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u " |
1399 | "seconds, shutting it down.\n", SC_NODEF_ARGS(sc), | 1463 | "seconds, shutting it down.\n", SC_NODEF_ARGS(sc), |
1400 | o2net_idle_timeout(sc->sc_node) / 1000, | 1464 | o2net_idle_timeout() / 1000, |
1401 | o2net_idle_timeout(sc->sc_node) % 1000); | 1465 | o2net_idle_timeout() % 1000); |
1402 | mlog(ML_NOTICE, "here are some times that might help debug the " | 1466 | mlog(ML_NOTICE, "here are some times that might help debug the " |
1403 | "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " | 1467 | "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " |
1404 | "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n", | 1468 | "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n", |
@@ -1413,6 +1477,12 @@ static void o2net_idle_timer(unsigned long data) | |||
1413 | sc->sc_tv_func_start.tv_sec, (long) sc->sc_tv_func_start.tv_usec, | 1477 | sc->sc_tv_func_start.tv_sec, (long) sc->sc_tv_func_start.tv_usec, |
1414 | sc->sc_tv_func_stop.tv_sec, (long) sc->sc_tv_func_stop.tv_usec); | 1478 | sc->sc_tv_func_stop.tv_sec, (long) sc->sc_tv_func_stop.tv_usec); |
1415 | 1479 | ||
1480 | /* | ||
1481 | * Initialize the nn_timeout so that the next connection attempt | ||
1482 | * will continue in o2net_start_connect. | ||
1483 | */ | ||
1484 | atomic_set(&nn->nn_timeout, 1); | ||
1485 | |||
1416 | o2net_sc_queue_work(sc, &sc->sc_shutdown_work); | 1486 | o2net_sc_queue_work(sc, &sc->sc_shutdown_work); |
1417 | } | 1487 | } |
1418 | 1488 | ||
@@ -1420,10 +1490,10 @@ static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc) | |||
1420 | { | 1490 | { |
1421 | o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work); | 1491 | o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work); |
1422 | o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work, | 1492 | o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work, |
1423 | msecs_to_jiffies(o2net_keepalive_delay(sc->sc_node))); | 1493 | msecs_to_jiffies(o2net_keepalive_delay())); |
1424 | do_gettimeofday(&sc->sc_tv_timer); | 1494 | do_gettimeofday(&sc->sc_tv_timer); |
1425 | mod_timer(&sc->sc_idle_timeout, | 1495 | mod_timer(&sc->sc_idle_timeout, |
1426 | jiffies + msecs_to_jiffies(o2net_idle_timeout(sc->sc_node))); | 1496 | jiffies + msecs_to_jiffies(o2net_idle_timeout())); |
1427 | } | 1497 | } |
1428 | 1498 | ||
1429 | static void o2net_sc_postpone_idle(struct o2net_sock_container *sc) | 1499 | static void o2net_sc_postpone_idle(struct o2net_sock_container *sc) |
@@ -1447,6 +1517,7 @@ static void o2net_start_connect(struct work_struct *work) | |||
1447 | struct socket *sock = NULL; | 1517 | struct socket *sock = NULL; |
1448 | struct sockaddr_in myaddr = {0, }, remoteaddr = {0, }; | 1518 | struct sockaddr_in myaddr = {0, }, remoteaddr = {0, }; |
1449 | int ret = 0, stop; | 1519 | int ret = 0, stop; |
1520 | unsigned int timeout; | ||
1450 | 1521 | ||
1451 | /* if we're greater we initiate tx, otherwise we accept */ | 1522 | /* if we're greater we initiate tx, otherwise we accept */ |
1452 | if (o2nm_this_node() <= o2net_num_from_nn(nn)) | 1523 | if (o2nm_this_node() <= o2net_num_from_nn(nn)) |
@@ -1466,8 +1537,17 @@ static void o2net_start_connect(struct work_struct *work) | |||
1466 | } | 1537 | } |
1467 | 1538 | ||
1468 | spin_lock(&nn->nn_lock); | 1539 | spin_lock(&nn->nn_lock); |
1469 | /* see if we already have one pending or have given up */ | 1540 | /* |
1470 | stop = (nn->nn_sc || nn->nn_persistent_error); | 1541 | * see if we already have one pending or have given up. |
1542 | * For nn_timeout, it is set when we close the connection | ||
1543 | * because of the idle time out. So it means that we have | ||
1544 | * at least connected to that node successfully once, | ||
1545 | * now try to connect to it again. | ||
1546 | */ | ||
1547 | timeout = atomic_read(&nn->nn_timeout); | ||
1548 | stop = (nn->nn_sc || | ||
1549 | (nn->nn_persistent_error && | ||
1550 | (nn->nn_persistent_error != -ENOTCONN || timeout == 0))); | ||
1471 | spin_unlock(&nn->nn_lock); | 1551 | spin_unlock(&nn->nn_lock); |
1472 | if (stop) | 1552 | if (stop) |
1473 | goto out; | 1553 | goto out; |
@@ -1555,8 +1635,8 @@ static void o2net_connect_expired(struct work_struct *work) | |||
1555 | mlog(ML_ERROR, "no connection established with node %u after " | 1635 | mlog(ML_ERROR, "no connection established with node %u after " |
1556 | "%u.%u seconds, giving up and returning errors.\n", | 1636 | "%u.%u seconds, giving up and returning errors.\n", |
1557 | o2net_num_from_nn(nn), | 1637 | o2net_num_from_nn(nn), |
1558 | o2net_idle_timeout(NULL) / 1000, | 1638 | o2net_idle_timeout() / 1000, |
1559 | o2net_idle_timeout(NULL) % 1000); | 1639 | o2net_idle_timeout() % 1000); |
1560 | 1640 | ||
1561 | o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); | 1641 | o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); |
1562 | } | 1642 | } |
@@ -1579,6 +1659,7 @@ void o2net_disconnect_node(struct o2nm_node *node) | |||
1579 | 1659 | ||
1580 | /* don't reconnect until it's heartbeating again */ | 1660 | /* don't reconnect until it's heartbeating again */ |
1581 | spin_lock(&nn->nn_lock); | 1661 | spin_lock(&nn->nn_lock); |
1662 | atomic_set(&nn->nn_timeout, 0); | ||
1582 | o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); | 1663 | o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); |
1583 | spin_unlock(&nn->nn_lock); | 1664 | spin_unlock(&nn->nn_lock); |
1584 | 1665 | ||
@@ -1610,20 +1691,15 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, | |||
1610 | 1691 | ||
1611 | /* ensure an immediate connect attempt */ | 1692 | /* ensure an immediate connect attempt */ |
1612 | nn->nn_last_connect_attempt = jiffies - | 1693 | nn->nn_last_connect_attempt = jiffies - |
1613 | (msecs_to_jiffies(o2net_reconnect_delay(node)) + 1); | 1694 | (msecs_to_jiffies(o2net_reconnect_delay()) + 1); |
1614 | 1695 | ||
1615 | if (node_num != o2nm_this_node()) { | 1696 | if (node_num != o2nm_this_node()) { |
1616 | /* heartbeat doesn't work unless a local node number is | ||
1617 | * configured and doing so brings up the o2net_wq, so we can | ||
1618 | * use it.. */ | ||
1619 | queue_delayed_work(o2net_wq, &nn->nn_connect_expired, | ||
1620 | msecs_to_jiffies(o2net_idle_timeout(node))); | ||
1621 | |||
1622 | /* believe it or not, accept and node hearbeating testing | 1697 | /* believe it or not, accept and node hearbeating testing |
1623 | * can succeed for this node before we got here.. so | 1698 | * can succeed for this node before we got here.. so |
1624 | * only use set_nn_state to clear the persistent error | 1699 | * only use set_nn_state to clear the persistent error |
1625 | * if that hasn't already happened */ | 1700 | * if that hasn't already happened */ |
1626 | spin_lock(&nn->nn_lock); | 1701 | spin_lock(&nn->nn_lock); |
1702 | atomic_set(&nn->nn_timeout, 0); | ||
1627 | if (nn->nn_persistent_error) | 1703 | if (nn->nn_persistent_error) |
1628 | o2net_set_nn_state(nn, NULL, 0, 0); | 1704 | o2net_set_nn_state(nn, NULL, 0, 0); |
1629 | spin_unlock(&nn->nn_lock); | 1705 | spin_unlock(&nn->nn_lock); |
@@ -1747,6 +1823,7 @@ static int o2net_accept_one(struct socket *sock) | |||
1747 | new_sock = NULL; | 1823 | new_sock = NULL; |
1748 | 1824 | ||
1749 | spin_lock(&nn->nn_lock); | 1825 | spin_lock(&nn->nn_lock); |
1826 | atomic_set(&nn->nn_timeout, 0); | ||
1750 | o2net_set_nn_state(nn, sc, 0, 0); | 1827 | o2net_set_nn_state(nn, sc, 0, 0); |
1751 | spin_unlock(&nn->nn_lock); | 1828 | spin_unlock(&nn->nn_lock); |
1752 | 1829 | ||
@@ -1922,6 +1999,9 @@ int o2net_init(void) | |||
1922 | 1999 | ||
1923 | o2quo_init(); | 2000 | o2quo_init(); |
1924 | 2001 | ||
2002 | if (o2net_debugfs_init()) | ||
2003 | return -ENOMEM; | ||
2004 | |||
1925 | o2net_hand = kzalloc(sizeof(struct o2net_handshake), GFP_KERNEL); | 2005 | o2net_hand = kzalloc(sizeof(struct o2net_handshake), GFP_KERNEL); |
1926 | o2net_keep_req = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL); | 2006 | o2net_keep_req = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL); |
1927 | o2net_keep_resp = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL); | 2007 | o2net_keep_resp = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL); |
@@ -1941,6 +2021,7 @@ int o2net_init(void) | |||
1941 | for (i = 0; i < ARRAY_SIZE(o2net_nodes); i++) { | 2021 | for (i = 0; i < ARRAY_SIZE(o2net_nodes); i++) { |
1942 | struct o2net_node *nn = o2net_nn_from_num(i); | 2022 | struct o2net_node *nn = o2net_nn_from_num(i); |
1943 | 2023 | ||
2024 | atomic_set(&nn->nn_timeout, 0); | ||
1944 | spin_lock_init(&nn->nn_lock); | 2025 | spin_lock_init(&nn->nn_lock); |
1945 | INIT_DELAYED_WORK(&nn->nn_connect_work, o2net_start_connect); | 2026 | INIT_DELAYED_WORK(&nn->nn_connect_work, o2net_start_connect); |
1946 | INIT_DELAYED_WORK(&nn->nn_connect_expired, | 2027 | INIT_DELAYED_WORK(&nn->nn_connect_expired, |
@@ -1962,4 +2043,5 @@ void o2net_exit(void) | |||
1962 | kfree(o2net_hand); | 2043 | kfree(o2net_hand); |
1963 | kfree(o2net_keep_req); | 2044 | kfree(o2net_keep_req); |
1964 | kfree(o2net_keep_resp); | 2045 | kfree(o2net_keep_resp); |
2046 | o2net_debugfs_exit(); | ||
1965 | } | 2047 | } |
diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h index f36f66aab3dd..a705d5d19036 100644 --- a/fs/ocfs2/cluster/tcp.h +++ b/fs/ocfs2/cluster/tcp.h | |||
@@ -117,4 +117,36 @@ int o2net_num_connected_peers(void); | |||
117 | int o2net_init(void); | 117 | int o2net_init(void); |
118 | void o2net_exit(void); | 118 | void o2net_exit(void); |
119 | 119 | ||
120 | struct o2net_send_tracking; | ||
121 | struct o2net_sock_container; | ||
122 | |||
123 | #ifdef CONFIG_DEBUG_FS | ||
124 | int o2net_debugfs_init(void); | ||
125 | void o2net_debugfs_exit(void); | ||
126 | void o2net_debug_add_nst(struct o2net_send_tracking *nst); | ||
127 | void o2net_debug_del_nst(struct o2net_send_tracking *nst); | ||
128 | void o2net_debug_add_sc(struct o2net_sock_container *sc); | ||
129 | void o2net_debug_del_sc(struct o2net_sock_container *sc); | ||
130 | #else | ||
131 | static int o2net_debugfs_init(void) | ||
132 | { | ||
133 | return 0; | ||
134 | } | ||
135 | static void o2net_debugfs_exit(void) | ||
136 | { | ||
137 | } | ||
138 | static void o2net_debug_add_nst(struct o2net_send_tracking *nst) | ||
139 | { | ||
140 | } | ||
141 | static void o2net_debug_del_nst(struct o2net_send_tracking *nst) | ||
142 | { | ||
143 | } | ||
144 | static void o2net_debug_add_sc(struct o2net_sock_container *sc) | ||
145 | { | ||
146 | } | ||
147 | static void o2net_debug_del_sc(struct o2net_sock_container *sc) | ||
148 | { | ||
149 | } | ||
150 | #endif /* CONFIG_DEBUG_FS */ | ||
151 | |||
120 | #endif /* O2CLUSTER_TCP_H */ | 152 | #endif /* O2CLUSTER_TCP_H */ |
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index d25b9af28500..8d58cfe410b1 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h | |||
@@ -95,6 +95,8 @@ struct o2net_node { | |||
95 | unsigned nn_sc_valid:1; | 95 | unsigned nn_sc_valid:1; |
96 | /* if this is set tx just returns it */ | 96 | /* if this is set tx just returns it */ |
97 | int nn_persistent_error; | 97 | int nn_persistent_error; |
98 | /* It is only set to 1 after the idle time out. */ | ||
99 | atomic_t nn_timeout; | ||
98 | 100 | ||
99 | /* threads waiting for an sc to arrive wait on the wq for generation | 101 | /* threads waiting for an sc to arrive wait on the wq for generation |
100 | * to increase. it is increased when a connecting socket succeeds | 102 | * to increase. it is increased when a connecting socket succeeds |
@@ -164,7 +166,9 @@ struct o2net_sock_container { | |||
164 | /* original handlers for the sockets */ | 166 | /* original handlers for the sockets */ |
165 | void (*sc_state_change)(struct sock *sk); | 167 | void (*sc_state_change)(struct sock *sk); |
166 | void (*sc_data_ready)(struct sock *sk, int bytes); | 168 | void (*sc_data_ready)(struct sock *sk, int bytes); |
167 | 169 | #ifdef CONFIG_DEBUG_FS | |
170 | struct list_head sc_net_debug_item; | ||
171 | #endif | ||
168 | struct timeval sc_tv_timer; | 172 | struct timeval sc_tv_timer; |
169 | struct timeval sc_tv_data_ready; | 173 | struct timeval sc_tv_data_ready; |
170 | struct timeval sc_tv_advance_start; | 174 | struct timeval sc_tv_advance_start; |
@@ -206,4 +210,24 @@ struct o2net_status_wait { | |||
206 | struct list_head ns_node_item; | 210 | struct list_head ns_node_item; |
207 | }; | 211 | }; |
208 | 212 | ||
213 | #ifdef CONFIG_DEBUG_FS | ||
214 | /* just for state dumps */ | ||
215 | struct o2net_send_tracking { | ||
216 | struct list_head st_net_debug_item; | ||
217 | struct task_struct *st_task; | ||
218 | struct o2net_sock_container *st_sc; | ||
219 | u32 st_id; | ||
220 | u32 st_msg_type; | ||
221 | u32 st_msg_key; | ||
222 | u8 st_node; | ||
223 | struct timeval st_sock_time; | ||
224 | struct timeval st_send_time; | ||
225 | struct timeval st_status_time; | ||
226 | }; | ||
227 | #else | ||
228 | struct o2net_send_tracking { | ||
229 | u32 dummy; | ||
230 | }; | ||
231 | #endif /* CONFIG_DEBUG_FS */ | ||
232 | |||
209 | #endif /* O2CLUSTER_TCP_INTERNAL_H */ | 233 | #endif /* O2CLUSTER_TCP_INTERNAL_H */ |
diff --git a/fs/ocfs2/dlm/Makefile b/fs/ocfs2/dlm/Makefile index ce3f7c29d270..190361375700 100644 --- a/fs/ocfs2/dlm/Makefile +++ b/fs/ocfs2/dlm/Makefile | |||
@@ -1,6 +1,6 @@ | |||
1 | EXTRA_CFLAGS += -Ifs/ocfs2 | 1 | EXTRA_CFLAGS += -Ifs/ocfs2 |
2 | 2 | ||
3 | obj-$(CONFIG_OCFS2_FS) += ocfs2_dlm.o ocfs2_dlmfs.o | 3 | obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_dlm.o ocfs2_dlmfs.o |
4 | 4 | ||
5 | ocfs2_dlm-objs := dlmdomain.o dlmdebug.o dlmthread.o dlmrecovery.o \ | 5 | ocfs2_dlm-objs := dlmdomain.o dlmdebug.o dlmthread.o dlmrecovery.o \ |
6 | dlmmaster.o dlmast.o dlmconvert.o dlmlock.o dlmunlock.o dlmver.o | 6 | dlmmaster.o dlmast.o dlmconvert.o dlmlock.o dlmunlock.o dlmver.o |
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index dc8ea666efdb..d5a86fb81a49 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h | |||
@@ -49,6 +49,41 @@ | |||
49 | /* Intended to make it easier for us to switch out hash functions */ | 49 | /* Intended to make it easier for us to switch out hash functions */ |
50 | #define dlm_lockid_hash(_n, _l) full_name_hash(_n, _l) | 50 | #define dlm_lockid_hash(_n, _l) full_name_hash(_n, _l) |
51 | 51 | ||
52 | enum dlm_mle_type { | ||
53 | DLM_MLE_BLOCK, | ||
54 | DLM_MLE_MASTER, | ||
55 | DLM_MLE_MIGRATION | ||
56 | }; | ||
57 | |||
58 | struct dlm_lock_name { | ||
59 | u8 len; | ||
60 | u8 name[DLM_LOCKID_NAME_MAX]; | ||
61 | }; | ||
62 | |||
63 | struct dlm_master_list_entry { | ||
64 | struct list_head list; | ||
65 | struct list_head hb_events; | ||
66 | struct dlm_ctxt *dlm; | ||
67 | spinlock_t spinlock; | ||
68 | wait_queue_head_t wq; | ||
69 | atomic_t woken; | ||
70 | struct kref mle_refs; | ||
71 | int inuse; | ||
72 | unsigned long maybe_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
73 | unsigned long vote_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
74 | unsigned long response_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
75 | unsigned long node_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
76 | u8 master; | ||
77 | u8 new_master; | ||
78 | enum dlm_mle_type type; | ||
79 | struct o2hb_callback_func mle_hb_up; | ||
80 | struct o2hb_callback_func mle_hb_down; | ||
81 | union { | ||
82 | struct dlm_lock_resource *res; | ||
83 | struct dlm_lock_name name; | ||
84 | } u; | ||
85 | }; | ||
86 | |||
52 | enum dlm_ast_type { | 87 | enum dlm_ast_type { |
53 | DLM_AST = 0, | 88 | DLM_AST = 0, |
54 | DLM_BAST, | 89 | DLM_BAST, |
@@ -101,6 +136,7 @@ struct dlm_ctxt | |||
101 | struct list_head purge_list; | 136 | struct list_head purge_list; |
102 | struct list_head pending_asts; | 137 | struct list_head pending_asts; |
103 | struct list_head pending_basts; | 138 | struct list_head pending_basts; |
139 | struct list_head tracking_list; | ||
104 | unsigned int purge_count; | 140 | unsigned int purge_count; |
105 | spinlock_t spinlock; | 141 | spinlock_t spinlock; |
106 | spinlock_t ast_lock; | 142 | spinlock_t ast_lock; |
@@ -122,6 +158,9 @@ struct dlm_ctxt | |||
122 | atomic_t remote_resources; | 158 | atomic_t remote_resources; |
123 | atomic_t unknown_resources; | 159 | atomic_t unknown_resources; |
124 | 160 | ||
161 | struct dlm_debug_ctxt *dlm_debug_ctxt; | ||
162 | struct dentry *dlm_debugfs_subroot; | ||
163 | |||
125 | /* NOTE: Next three are protected by dlm_domain_lock */ | 164 | /* NOTE: Next three are protected by dlm_domain_lock */ |
126 | struct kref dlm_refs; | 165 | struct kref dlm_refs; |
127 | enum dlm_ctxt_state dlm_state; | 166 | enum dlm_ctxt_state dlm_state; |
@@ -270,6 +309,9 @@ struct dlm_lock_resource | |||
270 | struct list_head dirty; | 309 | struct list_head dirty; |
271 | struct list_head recovering; // dlm_recovery_ctxt.resources list | 310 | struct list_head recovering; // dlm_recovery_ctxt.resources list |
272 | 311 | ||
312 | /* Added during init and removed during release */ | ||
313 | struct list_head tracking; /* dlm->tracking_list */ | ||
314 | |||
273 | /* unused lock resources have their last_used stamped and are | 315 | /* unused lock resources have their last_used stamped and are |
274 | * put on a list for the dlm thread to run. */ | 316 | * put on a list for the dlm thread to run. */ |
275 | unsigned long last_used; | 317 | unsigned long last_used; |
@@ -963,9 +1005,16 @@ static inline void __dlm_wait_on_lockres(struct dlm_lock_resource *res) | |||
963 | DLM_LOCK_RES_MIGRATING)); | 1005 | DLM_LOCK_RES_MIGRATING)); |
964 | } | 1006 | } |
965 | 1007 | ||
1008 | /* create/destroy slab caches */ | ||
1009 | int dlm_init_master_caches(void); | ||
1010 | void dlm_destroy_master_caches(void); | ||
1011 | |||
1012 | int dlm_init_lock_cache(void); | ||
1013 | void dlm_destroy_lock_cache(void); | ||
966 | 1014 | ||
967 | int dlm_init_mle_cache(void); | 1015 | int dlm_init_mle_cache(void); |
968 | void dlm_destroy_mle_cache(void); | 1016 | void dlm_destroy_mle_cache(void); |
1017 | |||
969 | void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up); | 1018 | void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up); |
970 | int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, | 1019 | int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, |
971 | struct dlm_lock_resource *res); | 1020 | struct dlm_lock_resource *res); |
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 64239b37e5d4..5f6d858770a2 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c | |||
@@ -5,7 +5,7 @@ | |||
5 | * | 5 | * |
6 | * debug functionality for the dlm | 6 | * debug functionality for the dlm |
7 | * | 7 | * |
8 | * Copyright (C) 2004 Oracle. All rights reserved. | 8 | * Copyright (C) 2004, 2008 Oracle. All rights reserved. |
9 | * | 9 | * |
10 | * This program is free software; you can redistribute it and/or | 10 | * This program is free software; you can redistribute it and/or |
11 | * modify it under the terms of the GNU General Public | 11 | * modify it under the terms of the GNU General Public |
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/utsname.h> | 30 | #include <linux/utsname.h> |
31 | #include <linux/sysctl.h> | 31 | #include <linux/sysctl.h> |
32 | #include <linux/spinlock.h> | 32 | #include <linux/spinlock.h> |
33 | #include <linux/debugfs.h> | ||
33 | 34 | ||
34 | #include "cluster/heartbeat.h" | 35 | #include "cluster/heartbeat.h" |
35 | #include "cluster/nodemanager.h" | 36 | #include "cluster/nodemanager.h" |
@@ -37,17 +38,16 @@ | |||
37 | 38 | ||
38 | #include "dlmapi.h" | 39 | #include "dlmapi.h" |
39 | #include "dlmcommon.h" | 40 | #include "dlmcommon.h" |
40 | |||
41 | #include "dlmdomain.h" | 41 | #include "dlmdomain.h" |
42 | #include "dlmdebug.h" | ||
42 | 43 | ||
43 | #define MLOG_MASK_PREFIX ML_DLM | 44 | #define MLOG_MASK_PREFIX ML_DLM |
44 | #include "cluster/masklog.h" | 45 | #include "cluster/masklog.h" |
45 | 46 | ||
47 | int stringify_lockname(const char *lockname, int locklen, char *buf, int len); | ||
48 | |||
46 | void dlm_print_one_lock_resource(struct dlm_lock_resource *res) | 49 | void dlm_print_one_lock_resource(struct dlm_lock_resource *res) |
47 | { | 50 | { |
48 | mlog(ML_NOTICE, "lockres: %.*s, owner=%u, state=%u\n", | ||
49 | res->lockname.len, res->lockname.name, | ||
50 | res->owner, res->state); | ||
51 | spin_lock(&res->spinlock); | 51 | spin_lock(&res->spinlock); |
52 | __dlm_print_one_lock_resource(res); | 52 | __dlm_print_one_lock_resource(res); |
53 | spin_unlock(&res->spinlock); | 53 | spin_unlock(&res->spinlock); |
@@ -58,7 +58,7 @@ static void dlm_print_lockres_refmap(struct dlm_lock_resource *res) | |||
58 | int bit; | 58 | int bit; |
59 | assert_spin_locked(&res->spinlock); | 59 | assert_spin_locked(&res->spinlock); |
60 | 60 | ||
61 | mlog(ML_NOTICE, " refmap nodes: [ "); | 61 | printk(" refmap nodes: [ "); |
62 | bit = 0; | 62 | bit = 0; |
63 | while (1) { | 63 | while (1) { |
64 | bit = find_next_bit(res->refmap, O2NM_MAX_NODES, bit); | 64 | bit = find_next_bit(res->refmap, O2NM_MAX_NODES, bit); |
@@ -70,63 +70,66 @@ static void dlm_print_lockres_refmap(struct dlm_lock_resource *res) | |||
70 | printk("], inflight=%u\n", res->inflight_locks); | 70 | printk("], inflight=%u\n", res->inflight_locks); |
71 | } | 71 | } |
72 | 72 | ||
73 | static void __dlm_print_lock(struct dlm_lock *lock) | ||
74 | { | ||
75 | spin_lock(&lock->spinlock); | ||
76 | |||
77 | printk(" type=%d, conv=%d, node=%u, cookie=%u:%llu, " | ||
78 | "ref=%u, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c), " | ||
79 | "pending=(conv=%c,lock=%c,cancel=%c,unlock=%c)\n", | ||
80 | lock->ml.type, lock->ml.convert_type, lock->ml.node, | ||
81 | dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), | ||
82 | dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), | ||
83 | atomic_read(&lock->lock_refs.refcount), | ||
84 | (list_empty(&lock->ast_list) ? 'y' : 'n'), | ||
85 | (lock->ast_pending ? 'y' : 'n'), | ||
86 | (list_empty(&lock->bast_list) ? 'y' : 'n'), | ||
87 | (lock->bast_pending ? 'y' : 'n'), | ||
88 | (lock->convert_pending ? 'y' : 'n'), | ||
89 | (lock->lock_pending ? 'y' : 'n'), | ||
90 | (lock->cancel_pending ? 'y' : 'n'), | ||
91 | (lock->unlock_pending ? 'y' : 'n')); | ||
92 | |||
93 | spin_unlock(&lock->spinlock); | ||
94 | } | ||
95 | |||
73 | void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) | 96 | void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) |
74 | { | 97 | { |
75 | struct list_head *iter2; | 98 | struct list_head *iter2; |
76 | struct dlm_lock *lock; | 99 | struct dlm_lock *lock; |
100 | char buf[DLM_LOCKID_NAME_MAX]; | ||
77 | 101 | ||
78 | assert_spin_locked(&res->spinlock); | 102 | assert_spin_locked(&res->spinlock); |
79 | 103 | ||
80 | mlog(ML_NOTICE, "lockres: %.*s, owner=%u, state=%u\n", | 104 | stringify_lockname(res->lockname.name, res->lockname.len, |
81 | res->lockname.len, res->lockname.name, | 105 | buf, sizeof(buf) - 1); |
82 | res->owner, res->state); | 106 | printk("lockres: %s, owner=%u, state=%u\n", |
83 | mlog(ML_NOTICE, " last used: %lu, on purge list: %s\n", | 107 | buf, res->owner, res->state); |
84 | res->last_used, list_empty(&res->purge) ? "no" : "yes"); | 108 | printk(" last used: %lu, refcnt: %u, on purge list: %s\n", |
109 | res->last_used, atomic_read(&res->refs.refcount), | ||
110 | list_empty(&res->purge) ? "no" : "yes"); | ||
111 | printk(" on dirty list: %s, on reco list: %s, " | ||
112 | "migrating pending: %s\n", | ||
113 | list_empty(&res->dirty) ? "no" : "yes", | ||
114 | list_empty(&res->recovering) ? "no" : "yes", | ||
115 | res->migration_pending ? "yes" : "no"); | ||
116 | printk(" inflight locks: %d, asts reserved: %d\n", | ||
117 | res->inflight_locks, atomic_read(&res->asts_reserved)); | ||
85 | dlm_print_lockres_refmap(res); | 118 | dlm_print_lockres_refmap(res); |
86 | mlog(ML_NOTICE, " granted queue: \n"); | 119 | printk(" granted queue:\n"); |
87 | list_for_each(iter2, &res->granted) { | 120 | list_for_each(iter2, &res->granted) { |
88 | lock = list_entry(iter2, struct dlm_lock, list); | 121 | lock = list_entry(iter2, struct dlm_lock, list); |
89 | spin_lock(&lock->spinlock); | 122 | __dlm_print_lock(lock); |
90 | mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, " | ||
91 | "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", | ||
92 | lock->ml.type, lock->ml.convert_type, lock->ml.node, | ||
93 | dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), | ||
94 | dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), | ||
95 | list_empty(&lock->ast_list) ? 'y' : 'n', | ||
96 | lock->ast_pending ? 'y' : 'n', | ||
97 | list_empty(&lock->bast_list) ? 'y' : 'n', | ||
98 | lock->bast_pending ? 'y' : 'n'); | ||
99 | spin_unlock(&lock->spinlock); | ||
100 | } | 123 | } |
101 | mlog(ML_NOTICE, " converting queue: \n"); | 124 | printk(" converting queue:\n"); |
102 | list_for_each(iter2, &res->converting) { | 125 | list_for_each(iter2, &res->converting) { |
103 | lock = list_entry(iter2, struct dlm_lock, list); | 126 | lock = list_entry(iter2, struct dlm_lock, list); |
104 | spin_lock(&lock->spinlock); | 127 | __dlm_print_lock(lock); |
105 | mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, " | ||
106 | "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", | ||
107 | lock->ml.type, lock->ml.convert_type, lock->ml.node, | ||
108 | dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), | ||
109 | dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), | ||
110 | list_empty(&lock->ast_list) ? 'y' : 'n', | ||
111 | lock->ast_pending ? 'y' : 'n', | ||
112 | list_empty(&lock->bast_list) ? 'y' : 'n', | ||
113 | lock->bast_pending ? 'y' : 'n'); | ||
114 | spin_unlock(&lock->spinlock); | ||
115 | } | 128 | } |
116 | mlog(ML_NOTICE, " blocked queue: \n"); | 129 | printk(" blocked queue:\n"); |
117 | list_for_each(iter2, &res->blocked) { | 130 | list_for_each(iter2, &res->blocked) { |
118 | lock = list_entry(iter2, struct dlm_lock, list); | 131 | lock = list_entry(iter2, struct dlm_lock, list); |
119 | spin_lock(&lock->spinlock); | 132 | __dlm_print_lock(lock); |
120 | mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, " | ||
121 | "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", | ||
122 | lock->ml.type, lock->ml.convert_type, lock->ml.node, | ||
123 | dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), | ||
124 | dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), | ||
125 | list_empty(&lock->ast_list) ? 'y' : 'n', | ||
126 | lock->ast_pending ? 'y' : 'n', | ||
127 | list_empty(&lock->bast_list) ? 'y' : 'n', | ||
128 | lock->bast_pending ? 'y' : 'n'); | ||
129 | spin_unlock(&lock->spinlock); | ||
130 | } | 133 | } |
131 | } | 134 | } |
132 | 135 | ||
@@ -136,31 +139,6 @@ void dlm_print_one_lock(struct dlm_lock *lockid) | |||
136 | } | 139 | } |
137 | EXPORT_SYMBOL_GPL(dlm_print_one_lock); | 140 | EXPORT_SYMBOL_GPL(dlm_print_one_lock); |
138 | 141 | ||
139 | #if 0 | ||
140 | void dlm_dump_lock_resources(struct dlm_ctxt *dlm) | ||
141 | { | ||
142 | struct dlm_lock_resource *res; | ||
143 | struct hlist_node *iter; | ||
144 | struct hlist_head *bucket; | ||
145 | int i; | ||
146 | |||
147 | mlog(ML_NOTICE, "struct dlm_ctxt: %s, node=%u, key=%u\n", | ||
148 | dlm->name, dlm->node_num, dlm->key); | ||
149 | if (!dlm || !dlm->name) { | ||
150 | mlog(ML_ERROR, "dlm=%p\n", dlm); | ||
151 | return; | ||
152 | } | ||
153 | |||
154 | spin_lock(&dlm->spinlock); | ||
155 | for (i=0; i<DLM_HASH_BUCKETS; i++) { | ||
156 | bucket = dlm_lockres_hash(dlm, i); | ||
157 | hlist_for_each_entry(res, iter, bucket, hash_node) | ||
158 | dlm_print_one_lock_resource(res); | ||
159 | } | ||
160 | spin_unlock(&dlm->spinlock); | ||
161 | } | ||
162 | #endif /* 0 */ | ||
163 | |||
164 | static const char *dlm_errnames[] = { | 142 | static const char *dlm_errnames[] = { |
165 | [DLM_NORMAL] = "DLM_NORMAL", | 143 | [DLM_NORMAL] = "DLM_NORMAL", |
166 | [DLM_GRANTED] = "DLM_GRANTED", | 144 | [DLM_GRANTED] = "DLM_GRANTED", |
@@ -266,3 +244,792 @@ const char *dlm_errname(enum dlm_status err) | |||
266 | return dlm_errnames[err]; | 244 | return dlm_errnames[err]; |
267 | } | 245 | } |
268 | EXPORT_SYMBOL_GPL(dlm_errname); | 246 | EXPORT_SYMBOL_GPL(dlm_errname); |
247 | |||
248 | /* NOTE: This function converts a lockname into a string. It uses knowledge | ||
249 | * of the format of the lockname that should be outside the purview of the dlm. | ||
250 | * We are adding only to make dlm debugging slightly easier. | ||
251 | * | ||
252 | * For more on lockname formats, please refer to dlmglue.c and ocfs2_lockid.h. | ||
253 | */ | ||
254 | int stringify_lockname(const char *lockname, int locklen, char *buf, int len) | ||
255 | { | ||
256 | int out = 0; | ||
257 | __be64 inode_blkno_be; | ||
258 | |||
259 | #define OCFS2_DENTRY_LOCK_INO_START 18 | ||
260 | if (*lockname == 'N') { | ||
261 | memcpy((__be64 *)&inode_blkno_be, | ||
262 | (char *)&lockname[OCFS2_DENTRY_LOCK_INO_START], | ||
263 | sizeof(__be64)); | ||
264 | out += snprintf(buf + out, len - out, "%.*s%08x", | ||
265 | OCFS2_DENTRY_LOCK_INO_START - 1, lockname, | ||
266 | (unsigned int)be64_to_cpu(inode_blkno_be)); | ||
267 | } else | ||
268 | out += snprintf(buf + out, len - out, "%.*s", | ||
269 | locklen, lockname); | ||
270 | return out; | ||
271 | } | ||
272 | |||
273 | static int stringify_nodemap(unsigned long *nodemap, int maxnodes, | ||
274 | char *buf, int len) | ||
275 | { | ||
276 | int out = 0; | ||
277 | int i = -1; | ||
278 | |||
279 | while ((i = find_next_bit(nodemap, maxnodes, i + 1)) < maxnodes) | ||
280 | out += snprintf(buf + out, len - out, "%d ", i); | ||
281 | |||
282 | return out; | ||
283 | } | ||
284 | |||
285 | static int dump_mle(struct dlm_master_list_entry *mle, char *buf, int len) | ||
286 | { | ||
287 | int out = 0; | ||
288 | unsigned int namelen; | ||
289 | const char *name; | ||
290 | char *mle_type; | ||
291 | |||
292 | if (mle->type != DLM_MLE_MASTER) { | ||
293 | namelen = mle->u.name.len; | ||
294 | name = mle->u.name.name; | ||
295 | } else { | ||
296 | namelen = mle->u.res->lockname.len; | ||
297 | name = mle->u.res->lockname.name; | ||
298 | } | ||
299 | |||
300 | if (mle->type == DLM_MLE_BLOCK) | ||
301 | mle_type = "BLK"; | ||
302 | else if (mle->type == DLM_MLE_MASTER) | ||
303 | mle_type = "MAS"; | ||
304 | else | ||
305 | mle_type = "MIG"; | ||
306 | |||
307 | out += stringify_lockname(name, namelen, buf + out, len - out); | ||
308 | out += snprintf(buf + out, len - out, | ||
309 | "\t%3s\tmas=%3u\tnew=%3u\tevt=%1d\tuse=%1d\tref=%3d\n", | ||
310 | mle_type, mle->master, mle->new_master, | ||
311 | !list_empty(&mle->hb_events), | ||
312 | !!mle->inuse, | ||
313 | atomic_read(&mle->mle_refs.refcount)); | ||
314 | |||
315 | out += snprintf(buf + out, len - out, "Maybe="); | ||
316 | out += stringify_nodemap(mle->maybe_map, O2NM_MAX_NODES, | ||
317 | buf + out, len - out); | ||
318 | out += snprintf(buf + out, len - out, "\n"); | ||
319 | |||
320 | out += snprintf(buf + out, len - out, "Vote="); | ||
321 | out += stringify_nodemap(mle->vote_map, O2NM_MAX_NODES, | ||
322 | buf + out, len - out); | ||
323 | out += snprintf(buf + out, len - out, "\n"); | ||
324 | |||
325 | out += snprintf(buf + out, len - out, "Response="); | ||
326 | out += stringify_nodemap(mle->response_map, O2NM_MAX_NODES, | ||
327 | buf + out, len - out); | ||
328 | out += snprintf(buf + out, len - out, "\n"); | ||
329 | |||
330 | out += snprintf(buf + out, len - out, "Node="); | ||
331 | out += stringify_nodemap(mle->node_map, O2NM_MAX_NODES, | ||
332 | buf + out, len - out); | ||
333 | out += snprintf(buf + out, len - out, "\n"); | ||
334 | |||
335 | out += snprintf(buf + out, len - out, "\n"); | ||
336 | |||
337 | return out; | ||
338 | } | ||
339 | |||
340 | void dlm_print_one_mle(struct dlm_master_list_entry *mle) | ||
341 | { | ||
342 | char *buf; | ||
343 | |||
344 | buf = (char *) get_zeroed_page(GFP_NOFS); | ||
345 | if (buf) { | ||
346 | dump_mle(mle, buf, PAGE_SIZE - 1); | ||
347 | free_page((unsigned long)buf); | ||
348 | } | ||
349 | } | ||
350 | |||
351 | #ifdef CONFIG_DEBUG_FS | ||
352 | |||
353 | static struct dentry *dlm_debugfs_root = NULL; | ||
354 | |||
355 | #define DLM_DEBUGFS_DIR "o2dlm" | ||
356 | #define DLM_DEBUGFS_DLM_STATE "dlm_state" | ||
357 | #define DLM_DEBUGFS_LOCKING_STATE "locking_state" | ||
358 | #define DLM_DEBUGFS_MLE_STATE "mle_state" | ||
359 | #define DLM_DEBUGFS_PURGE_LIST "purge_list" | ||
360 | |||
361 | /* begin - utils funcs */ | ||
362 | static void dlm_debug_free(struct kref *kref) | ||
363 | { | ||
364 | struct dlm_debug_ctxt *dc; | ||
365 | |||
366 | dc = container_of(kref, struct dlm_debug_ctxt, debug_refcnt); | ||
367 | |||
368 | kfree(dc); | ||
369 | } | ||
370 | |||
371 | void dlm_debug_put(struct dlm_debug_ctxt *dc) | ||
372 | { | ||
373 | if (dc) | ||
374 | kref_put(&dc->debug_refcnt, dlm_debug_free); | ||
375 | } | ||
376 | |||
377 | static void dlm_debug_get(struct dlm_debug_ctxt *dc) | ||
378 | { | ||
379 | kref_get(&dc->debug_refcnt); | ||
380 | } | ||
381 | |||
382 | static struct debug_buffer *debug_buffer_allocate(void) | ||
383 | { | ||
384 | struct debug_buffer *db = NULL; | ||
385 | |||
386 | db = kzalloc(sizeof(struct debug_buffer), GFP_KERNEL); | ||
387 | if (!db) | ||
388 | goto bail; | ||
389 | |||
390 | db->len = PAGE_SIZE; | ||
391 | db->buf = kmalloc(db->len, GFP_KERNEL); | ||
392 | if (!db->buf) | ||
393 | goto bail; | ||
394 | |||
395 | return db; | ||
396 | bail: | ||
397 | kfree(db); | ||
398 | return NULL; | ||
399 | } | ||
400 | |||
401 | static ssize_t debug_buffer_read(struct file *file, char __user *buf, | ||
402 | size_t nbytes, loff_t *ppos) | ||
403 | { | ||
404 | struct debug_buffer *db = file->private_data; | ||
405 | |||
406 | return simple_read_from_buffer(buf, nbytes, ppos, db->buf, db->len); | ||
407 | } | ||
408 | |||
409 | static loff_t debug_buffer_llseek(struct file *file, loff_t off, int whence) | ||
410 | { | ||
411 | struct debug_buffer *db = file->private_data; | ||
412 | loff_t new = -1; | ||
413 | |||
414 | switch (whence) { | ||
415 | case 0: | ||
416 | new = off; | ||
417 | break; | ||
418 | case 1: | ||
419 | new = file->f_pos + off; | ||
420 | break; | ||
421 | } | ||
422 | |||
423 | if (new < 0 || new > db->len) | ||
424 | return -EINVAL; | ||
425 | |||
426 | return (file->f_pos = new); | ||
427 | } | ||
428 | |||
429 | static int debug_buffer_release(struct inode *inode, struct file *file) | ||
430 | { | ||
431 | struct debug_buffer *db = (struct debug_buffer *)file->private_data; | ||
432 | |||
433 | if (db) | ||
434 | kfree(db->buf); | ||
435 | kfree(db); | ||
436 | |||
437 | return 0; | ||
438 | } | ||
439 | /* end - util funcs */ | ||
440 | |||
441 | /* begin - purge list funcs */ | ||
442 | static int debug_purgelist_print(struct dlm_ctxt *dlm, struct debug_buffer *db) | ||
443 | { | ||
444 | struct dlm_lock_resource *res; | ||
445 | int out = 0; | ||
446 | unsigned long total = 0; | ||
447 | |||
448 | out += snprintf(db->buf + out, db->len - out, | ||
449 | "Dumping Purgelist for Domain: %s\n", dlm->name); | ||
450 | |||
451 | spin_lock(&dlm->spinlock); | ||
452 | list_for_each_entry(res, &dlm->purge_list, purge) { | ||
453 | ++total; | ||
454 | if (db->len - out < 100) | ||
455 | continue; | ||
456 | spin_lock(&res->spinlock); | ||
457 | out += stringify_lockname(res->lockname.name, | ||
458 | res->lockname.len, | ||
459 | db->buf + out, db->len - out); | ||
460 | out += snprintf(db->buf + out, db->len - out, "\t%ld\n", | ||
461 | (jiffies - res->last_used)/HZ); | ||
462 | spin_unlock(&res->spinlock); | ||
463 | } | ||
464 | spin_unlock(&dlm->spinlock); | ||
465 | |||
466 | out += snprintf(db->buf + out, db->len - out, | ||
467 | "Total on list: %ld\n", total); | ||
468 | |||
469 | return out; | ||
470 | } | ||
471 | |||
472 | static int debug_purgelist_open(struct inode *inode, struct file *file) | ||
473 | { | ||
474 | struct dlm_ctxt *dlm = inode->i_private; | ||
475 | struct debug_buffer *db; | ||
476 | |||
477 | db = debug_buffer_allocate(); | ||
478 | if (!db) | ||
479 | goto bail; | ||
480 | |||
481 | db->len = debug_purgelist_print(dlm, db); | ||
482 | |||
483 | file->private_data = db; | ||
484 | |||
485 | return 0; | ||
486 | bail: | ||
487 | return -ENOMEM; | ||
488 | } | ||
489 | |||
490 | static struct file_operations debug_purgelist_fops = { | ||
491 | .open = debug_purgelist_open, | ||
492 | .release = debug_buffer_release, | ||
493 | .read = debug_buffer_read, | ||
494 | .llseek = debug_buffer_llseek, | ||
495 | }; | ||
496 | /* end - purge list funcs */ | ||
497 | |||
498 | /* begin - debug mle funcs */ | ||
499 | static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db) | ||
500 | { | ||
501 | struct dlm_master_list_entry *mle; | ||
502 | int out = 0; | ||
503 | unsigned long total = 0; | ||
504 | |||
505 | out += snprintf(db->buf + out, db->len - out, | ||
506 | "Dumping MLEs for Domain: %s\n", dlm->name); | ||
507 | |||
508 | spin_lock(&dlm->master_lock); | ||
509 | list_for_each_entry(mle, &dlm->master_list, list) { | ||
510 | ++total; | ||
511 | if (db->len - out < 200) | ||
512 | continue; | ||
513 | out += dump_mle(mle, db->buf + out, db->len - out); | ||
514 | } | ||
515 | spin_unlock(&dlm->master_lock); | ||
516 | |||
517 | out += snprintf(db->buf + out, db->len - out, | ||
518 | "Total on list: %ld\n", total); | ||
519 | return out; | ||
520 | } | ||
521 | |||
522 | static int debug_mle_open(struct inode *inode, struct file *file) | ||
523 | { | ||
524 | struct dlm_ctxt *dlm = inode->i_private; | ||
525 | struct debug_buffer *db; | ||
526 | |||
527 | db = debug_buffer_allocate(); | ||
528 | if (!db) | ||
529 | goto bail; | ||
530 | |||
531 | db->len = debug_mle_print(dlm, db); | ||
532 | |||
533 | file->private_data = db; | ||
534 | |||
535 | return 0; | ||
536 | bail: | ||
537 | return -ENOMEM; | ||
538 | } | ||
539 | |||
540 | static struct file_operations debug_mle_fops = { | ||
541 | .open = debug_mle_open, | ||
542 | .release = debug_buffer_release, | ||
543 | .read = debug_buffer_read, | ||
544 | .llseek = debug_buffer_llseek, | ||
545 | }; | ||
546 | |||
547 | /* end - debug mle funcs */ | ||
548 | |||
549 | /* begin - debug lockres funcs */ | ||
550 | static int dump_lock(struct dlm_lock *lock, int list_type, char *buf, int len) | ||
551 | { | ||
552 | int out; | ||
553 | |||
554 | #define DEBUG_LOCK_VERSION 1 | ||
555 | spin_lock(&lock->spinlock); | ||
556 | out = snprintf(buf, len, "LOCK:%d,%d,%d,%d,%d,%d:%lld,%d,%d,%d,%d,%d," | ||
557 | "%d,%d,%d,%d\n", | ||
558 | DEBUG_LOCK_VERSION, | ||
559 | list_type, lock->ml.type, lock->ml.convert_type, | ||
560 | lock->ml.node, | ||
561 | dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), | ||
562 | dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), | ||
563 | !list_empty(&lock->ast_list), | ||
564 | !list_empty(&lock->bast_list), | ||
565 | lock->ast_pending, lock->bast_pending, | ||
566 | lock->convert_pending, lock->lock_pending, | ||
567 | lock->cancel_pending, lock->unlock_pending, | ||
568 | atomic_read(&lock->lock_refs.refcount)); | ||
569 | spin_unlock(&lock->spinlock); | ||
570 | |||
571 | return out; | ||
572 | } | ||
573 | |||
574 | static int dump_lockres(struct dlm_lock_resource *res, char *buf, int len) | ||
575 | { | ||
576 | struct dlm_lock *lock; | ||
577 | int i; | ||
578 | int out = 0; | ||
579 | |||
580 | out += snprintf(buf + out, len - out, "NAME:"); | ||
581 | out += stringify_lockname(res->lockname.name, res->lockname.len, | ||
582 | buf + out, len - out); | ||
583 | out += snprintf(buf + out, len - out, "\n"); | ||
584 | |||
585 | #define DEBUG_LRES_VERSION 1 | ||
586 | out += snprintf(buf + out, len - out, | ||
587 | "LRES:%d,%d,%d,%ld,%d,%d,%d,%d,%d,%d,%d\n", | ||
588 | DEBUG_LRES_VERSION, | ||
589 | res->owner, res->state, res->last_used, | ||
590 | !list_empty(&res->purge), | ||
591 | !list_empty(&res->dirty), | ||
592 | !list_empty(&res->recovering), | ||
593 | res->inflight_locks, res->migration_pending, | ||
594 | atomic_read(&res->asts_reserved), | ||
595 | atomic_read(&res->refs.refcount)); | ||
596 | |||
597 | /* refmap */ | ||
598 | out += snprintf(buf + out, len - out, "RMAP:"); | ||
599 | out += stringify_nodemap(res->refmap, O2NM_MAX_NODES, | ||
600 | buf + out, len - out); | ||
601 | out += snprintf(buf + out, len - out, "\n"); | ||
602 | |||
603 | /* lvb */ | ||
604 | out += snprintf(buf + out, len - out, "LVBX:"); | ||
605 | for (i = 0; i < DLM_LVB_LEN; i++) | ||
606 | out += snprintf(buf + out, len - out, | ||
607 | "%02x", (unsigned char)res->lvb[i]); | ||
608 | out += snprintf(buf + out, len - out, "\n"); | ||
609 | |||
610 | /* granted */ | ||
611 | list_for_each_entry(lock, &res->granted, list) | ||
612 | out += dump_lock(lock, 0, buf + out, len - out); | ||
613 | |||
614 | /* converting */ | ||
615 | list_for_each_entry(lock, &res->converting, list) | ||
616 | out += dump_lock(lock, 1, buf + out, len - out); | ||
617 | |||
618 | /* blocked */ | ||
619 | list_for_each_entry(lock, &res->blocked, list) | ||
620 | out += dump_lock(lock, 2, buf + out, len - out); | ||
621 | |||
622 | out += snprintf(buf + out, len - out, "\n"); | ||
623 | |||
624 | return out; | ||
625 | } | ||
626 | |||
627 | static void *lockres_seq_start(struct seq_file *m, loff_t *pos) | ||
628 | { | ||
629 | struct debug_lockres *dl = m->private; | ||
630 | struct dlm_ctxt *dlm = dl->dl_ctxt; | ||
631 | struct dlm_lock_resource *res = NULL; | ||
632 | |||
633 | spin_lock(&dlm->spinlock); | ||
634 | |||
635 | if (dl->dl_res) { | ||
636 | list_for_each_entry(res, &dl->dl_res->tracking, tracking) { | ||
637 | if (dl->dl_res) { | ||
638 | dlm_lockres_put(dl->dl_res); | ||
639 | dl->dl_res = NULL; | ||
640 | } | ||
641 | if (&res->tracking == &dlm->tracking_list) { | ||
642 | mlog(0, "End of list found, %p\n", res); | ||
643 | dl = NULL; | ||
644 | break; | ||
645 | } | ||
646 | dlm_lockres_get(res); | ||
647 | dl->dl_res = res; | ||
648 | break; | ||
649 | } | ||
650 | } else { | ||
651 | if (!list_empty(&dlm->tracking_list)) { | ||
652 | list_for_each_entry(res, &dlm->tracking_list, tracking) | ||
653 | break; | ||
654 | dlm_lockres_get(res); | ||
655 | dl->dl_res = res; | ||
656 | } else | ||
657 | dl = NULL; | ||
658 | } | ||
659 | |||
660 | if (dl) { | ||
661 | spin_lock(&dl->dl_res->spinlock); | ||
662 | dump_lockres(dl->dl_res, dl->dl_buf, dl->dl_len - 1); | ||
663 | spin_unlock(&dl->dl_res->spinlock); | ||
664 | } | ||
665 | |||
666 | spin_unlock(&dlm->spinlock); | ||
667 | |||
668 | return dl; | ||
669 | } | ||
670 | |||
671 | static void lockres_seq_stop(struct seq_file *m, void *v) | ||
672 | { | ||
673 | } | ||
674 | |||
675 | static void *lockres_seq_next(struct seq_file *m, void *v, loff_t *pos) | ||
676 | { | ||
677 | return NULL; | ||
678 | } | ||
679 | |||
680 | static int lockres_seq_show(struct seq_file *s, void *v) | ||
681 | { | ||
682 | struct debug_lockres *dl = (struct debug_lockres *)v; | ||
683 | |||
684 | seq_printf(s, "%s", dl->dl_buf); | ||
685 | |||
686 | return 0; | ||
687 | } | ||
688 | |||
689 | static struct seq_operations debug_lockres_ops = { | ||
690 | .start = lockres_seq_start, | ||
691 | .stop = lockres_seq_stop, | ||
692 | .next = lockres_seq_next, | ||
693 | .show = lockres_seq_show, | ||
694 | }; | ||
695 | |||
696 | static int debug_lockres_open(struct inode *inode, struct file *file) | ||
697 | { | ||
698 | struct dlm_ctxt *dlm = inode->i_private; | ||
699 | int ret = -ENOMEM; | ||
700 | struct seq_file *seq; | ||
701 | struct debug_lockres *dl = NULL; | ||
702 | |||
703 | dl = kzalloc(sizeof(struct debug_lockres), GFP_KERNEL); | ||
704 | if (!dl) { | ||
705 | mlog_errno(ret); | ||
706 | goto bail; | ||
707 | } | ||
708 | |||
709 | dl->dl_len = PAGE_SIZE; | ||
710 | dl->dl_buf = kmalloc(dl->dl_len, GFP_KERNEL); | ||
711 | if (!dl->dl_buf) { | ||
712 | mlog_errno(ret); | ||
713 | goto bail; | ||
714 | } | ||
715 | |||
716 | ret = seq_open(file, &debug_lockres_ops); | ||
717 | if (ret) { | ||
718 | mlog_errno(ret); | ||
719 | goto bail; | ||
720 | } | ||
721 | |||
722 | seq = (struct seq_file *) file->private_data; | ||
723 | seq->private = dl; | ||
724 | |||
725 | dlm_grab(dlm); | ||
726 | dl->dl_ctxt = dlm; | ||
727 | |||
728 | return 0; | ||
729 | bail: | ||
730 | if (dl) | ||
731 | kfree(dl->dl_buf); | ||
732 | kfree(dl); | ||
733 | return ret; | ||
734 | } | ||
735 | |||
736 | static int debug_lockres_release(struct inode *inode, struct file *file) | ||
737 | { | ||
738 | struct seq_file *seq = (struct seq_file *)file->private_data; | ||
739 | struct debug_lockres *dl = (struct debug_lockres *)seq->private; | ||
740 | |||
741 | if (dl->dl_res) | ||
742 | dlm_lockres_put(dl->dl_res); | ||
743 | dlm_put(dl->dl_ctxt); | ||
744 | kfree(dl->dl_buf); | ||
745 | return seq_release_private(inode, file); | ||
746 | } | ||
747 | |||
748 | static struct file_operations debug_lockres_fops = { | ||
749 | .open = debug_lockres_open, | ||
750 | .release = debug_lockres_release, | ||
751 | .read = seq_read, | ||
752 | .llseek = seq_lseek, | ||
753 | }; | ||
754 | /* end - debug lockres funcs */ | ||
755 | |||
756 | /* begin - debug state funcs */ | ||
757 | static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db) | ||
758 | { | ||
759 | int out = 0; | ||
760 | struct dlm_reco_node_data *node; | ||
761 | char *state; | ||
762 | int lres, rres, ures, tres; | ||
763 | |||
764 | lres = atomic_read(&dlm->local_resources); | ||
765 | rres = atomic_read(&dlm->remote_resources); | ||
766 | ures = atomic_read(&dlm->unknown_resources); | ||
767 | tres = lres + rres + ures; | ||
768 | |||
769 | spin_lock(&dlm->spinlock); | ||
770 | |||
771 | switch (dlm->dlm_state) { | ||
772 | case DLM_CTXT_NEW: | ||
773 | state = "NEW"; break; | ||
774 | case DLM_CTXT_JOINED: | ||
775 | state = "JOINED"; break; | ||
776 | case DLM_CTXT_IN_SHUTDOWN: | ||
777 | state = "SHUTDOWN"; break; | ||
778 | case DLM_CTXT_LEAVING: | ||
779 | state = "LEAVING"; break; | ||
780 | default: | ||
781 | state = "UNKNOWN"; break; | ||
782 | } | ||
783 | |||
784 | /* Domain: xxxxxxxxxx Key: 0xdfbac769 */ | ||
785 | out += snprintf(db->buf + out, db->len - out, | ||
786 | "Domain: %s Key: 0x%08x\n", dlm->name, dlm->key); | ||
787 | |||
788 | /* Thread Pid: xxx Node: xxx State: xxxxx */ | ||
789 | out += snprintf(db->buf + out, db->len - out, | ||
790 | "Thread Pid: %d Node: %d State: %s\n", | ||
791 | dlm->dlm_thread_task->pid, dlm->node_num, state); | ||
792 | |||
793 | /* Number of Joins: xxx Joining Node: xxx */ | ||
794 | out += snprintf(db->buf + out, db->len - out, | ||
795 | "Number of Joins: %d Joining Node: %d\n", | ||
796 | dlm->num_joins, dlm->joining_node); | ||
797 | |||
798 | /* Domain Map: xx xx xx */ | ||
799 | out += snprintf(db->buf + out, db->len - out, "Domain Map: "); | ||
800 | out += stringify_nodemap(dlm->domain_map, O2NM_MAX_NODES, | ||
801 | db->buf + out, db->len - out); | ||
802 | out += snprintf(db->buf + out, db->len - out, "\n"); | ||
803 | |||
804 | /* Live Map: xx xx xx */ | ||
805 | out += snprintf(db->buf + out, db->len - out, "Live Map: "); | ||
806 | out += stringify_nodemap(dlm->live_nodes_map, O2NM_MAX_NODES, | ||
807 | db->buf + out, db->len - out); | ||
808 | out += snprintf(db->buf + out, db->len - out, "\n"); | ||
809 | |||
810 | /* Mastered Resources Total: xxx Locally: xxx Remotely: ... */ | ||
811 | out += snprintf(db->buf + out, db->len - out, | ||
812 | "Mastered Resources Total: %d Locally: %d " | ||
813 | "Remotely: %d Unknown: %d\n", | ||
814 | tres, lres, rres, ures); | ||
815 | |||
816 | /* Lists: Dirty=Empty Purge=InUse PendingASTs=Empty ... */ | ||
817 | out += snprintf(db->buf + out, db->len - out, | ||
818 | "Lists: Dirty=%s Purge=%s PendingASTs=%s " | ||
819 | "PendingBASTs=%s Master=%s\n", | ||
820 | (list_empty(&dlm->dirty_list) ? "Empty" : "InUse"), | ||
821 | (list_empty(&dlm->purge_list) ? "Empty" : "InUse"), | ||
822 | (list_empty(&dlm->pending_asts) ? "Empty" : "InUse"), | ||
823 | (list_empty(&dlm->pending_basts) ? "Empty" : "InUse"), | ||
824 | (list_empty(&dlm->master_list) ? "Empty" : "InUse")); | ||
825 | |||
826 | /* Purge Count: xxx Refs: xxx */ | ||
827 | out += snprintf(db->buf + out, db->len - out, | ||
828 | "Purge Count: %d Refs: %d\n", dlm->purge_count, | ||
829 | atomic_read(&dlm->dlm_refs.refcount)); | ||
830 | |||
831 | /* Dead Node: xxx */ | ||
832 | out += snprintf(db->buf + out, db->len - out, | ||
833 | "Dead Node: %d\n", dlm->reco.dead_node); | ||
834 | |||
835 | /* What about DLM_RECO_STATE_FINALIZE? */ | ||
836 | if (dlm->reco.state == DLM_RECO_STATE_ACTIVE) | ||
837 | state = "ACTIVE"; | ||
838 | else | ||
839 | state = "INACTIVE"; | ||
840 | |||
841 | /* Recovery Pid: xxxx Master: xxx State: xxxx */ | ||
842 | out += snprintf(db->buf + out, db->len - out, | ||
843 | "Recovery Pid: %d Master: %d State: %s\n", | ||
844 | dlm->dlm_reco_thread_task->pid, | ||
845 | dlm->reco.new_master, state); | ||
846 | |||
847 | /* Recovery Map: xx xx */ | ||
848 | out += snprintf(db->buf + out, db->len - out, "Recovery Map: "); | ||
849 | out += stringify_nodemap(dlm->recovery_map, O2NM_MAX_NODES, | ||
850 | db->buf + out, db->len - out); | ||
851 | out += snprintf(db->buf + out, db->len - out, "\n"); | ||
852 | |||
853 | /* Recovery Node State: */ | ||
854 | out += snprintf(db->buf + out, db->len - out, "Recovery Node State:\n"); | ||
855 | list_for_each_entry(node, &dlm->reco.node_data, list) { | ||
856 | switch (node->state) { | ||
857 | case DLM_RECO_NODE_DATA_INIT: | ||
858 | state = "INIT"; | ||
859 | break; | ||
860 | case DLM_RECO_NODE_DATA_REQUESTING: | ||
861 | state = "REQUESTING"; | ||
862 | break; | ||
863 | case DLM_RECO_NODE_DATA_DEAD: | ||
864 | state = "DEAD"; | ||
865 | break; | ||
866 | case DLM_RECO_NODE_DATA_RECEIVING: | ||
867 | state = "RECEIVING"; | ||
868 | break; | ||
869 | case DLM_RECO_NODE_DATA_REQUESTED: | ||
870 | state = "REQUESTED"; | ||
871 | break; | ||
872 | case DLM_RECO_NODE_DATA_DONE: | ||
873 | state = "DONE"; | ||
874 | break; | ||
875 | case DLM_RECO_NODE_DATA_FINALIZE_SENT: | ||
876 | state = "FINALIZE-SENT"; | ||
877 | break; | ||
878 | default: | ||
879 | state = "BAD"; | ||
880 | break; | ||
881 | } | ||
882 | out += snprintf(db->buf + out, db->len - out, "\t%u - %s\n", | ||
883 | node->node_num, state); | ||
884 | } | ||
885 | |||
886 | spin_unlock(&dlm->spinlock); | ||
887 | |||
888 | return out; | ||
889 | } | ||
890 | |||
891 | static int debug_state_open(struct inode *inode, struct file *file) | ||
892 | { | ||
893 | struct dlm_ctxt *dlm = inode->i_private; | ||
894 | struct debug_buffer *db = NULL; | ||
895 | |||
896 | db = debug_buffer_allocate(); | ||
897 | if (!db) | ||
898 | goto bail; | ||
899 | |||
900 | db->len = debug_state_print(dlm, db); | ||
901 | |||
902 | file->private_data = db; | ||
903 | |||
904 | return 0; | ||
905 | bail: | ||
906 | return -ENOMEM; | ||
907 | } | ||
908 | |||
909 | static struct file_operations debug_state_fops = { | ||
910 | .open = debug_state_open, | ||
911 | .release = debug_buffer_release, | ||
912 | .read = debug_buffer_read, | ||
913 | .llseek = debug_buffer_llseek, | ||
914 | }; | ||
915 | /* end - debug state funcs */ | ||
916 | |||
917 | /* files in subroot */ | ||
918 | int dlm_debug_init(struct dlm_ctxt *dlm) | ||
919 | { | ||
920 | struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt; | ||
921 | |||
922 | /* for dumping dlm_ctxt */ | ||
923 | dc->debug_state_dentry = debugfs_create_file(DLM_DEBUGFS_DLM_STATE, | ||
924 | S_IFREG|S_IRUSR, | ||
925 | dlm->dlm_debugfs_subroot, | ||
926 | dlm, &debug_state_fops); | ||
927 | if (!dc->debug_state_dentry) { | ||
928 | mlog_errno(-ENOMEM); | ||
929 | goto bail; | ||
930 | } | ||
931 | |||
932 | /* for dumping lockres */ | ||
933 | dc->debug_lockres_dentry = | ||
934 | debugfs_create_file(DLM_DEBUGFS_LOCKING_STATE, | ||
935 | S_IFREG|S_IRUSR, | ||
936 | dlm->dlm_debugfs_subroot, | ||
937 | dlm, &debug_lockres_fops); | ||
938 | if (!dc->debug_lockres_dentry) { | ||
939 | mlog_errno(-ENOMEM); | ||
940 | goto bail; | ||
941 | } | ||
942 | |||
943 | /* for dumping mles */ | ||
944 | dc->debug_mle_dentry = debugfs_create_file(DLM_DEBUGFS_MLE_STATE, | ||
945 | S_IFREG|S_IRUSR, | ||
946 | dlm->dlm_debugfs_subroot, | ||
947 | dlm, &debug_mle_fops); | ||
948 | if (!dc->debug_mle_dentry) { | ||
949 | mlog_errno(-ENOMEM); | ||
950 | goto bail; | ||
951 | } | ||
952 | |||
953 | /* for dumping lockres on the purge list */ | ||
954 | dc->debug_purgelist_dentry = | ||
955 | debugfs_create_file(DLM_DEBUGFS_PURGE_LIST, | ||
956 | S_IFREG|S_IRUSR, | ||
957 | dlm->dlm_debugfs_subroot, | ||
958 | dlm, &debug_purgelist_fops); | ||
959 | if (!dc->debug_purgelist_dentry) { | ||
960 | mlog_errno(-ENOMEM); | ||
961 | goto bail; | ||
962 | } | ||
963 | |||
964 | dlm_debug_get(dc); | ||
965 | return 0; | ||
966 | |||
967 | bail: | ||
968 | dlm_debug_shutdown(dlm); | ||
969 | return -ENOMEM; | ||
970 | } | ||
971 | |||
972 | void dlm_debug_shutdown(struct dlm_ctxt *dlm) | ||
973 | { | ||
974 | struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt; | ||
975 | |||
976 | if (dc) { | ||
977 | if (dc->debug_purgelist_dentry) | ||
978 | debugfs_remove(dc->debug_purgelist_dentry); | ||
979 | if (dc->debug_mle_dentry) | ||
980 | debugfs_remove(dc->debug_mle_dentry); | ||
981 | if (dc->debug_lockres_dentry) | ||
982 | debugfs_remove(dc->debug_lockres_dentry); | ||
983 | if (dc->debug_state_dentry) | ||
984 | debugfs_remove(dc->debug_state_dentry); | ||
985 | dlm_debug_put(dc); | ||
986 | } | ||
987 | } | ||
988 | |||
989 | /* subroot - domain dir */ | ||
990 | int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm) | ||
991 | { | ||
992 | dlm->dlm_debugfs_subroot = debugfs_create_dir(dlm->name, | ||
993 | dlm_debugfs_root); | ||
994 | if (!dlm->dlm_debugfs_subroot) { | ||
995 | mlog_errno(-ENOMEM); | ||
996 | goto bail; | ||
997 | } | ||
998 | |||
999 | dlm->dlm_debug_ctxt = kzalloc(sizeof(struct dlm_debug_ctxt), | ||
1000 | GFP_KERNEL); | ||
1001 | if (!dlm->dlm_debug_ctxt) { | ||
1002 | mlog_errno(-ENOMEM); | ||
1003 | goto bail; | ||
1004 | } | ||
1005 | kref_init(&dlm->dlm_debug_ctxt->debug_refcnt); | ||
1006 | |||
1007 | return 0; | ||
1008 | bail: | ||
1009 | dlm_destroy_debugfs_subroot(dlm); | ||
1010 | return -ENOMEM; | ||
1011 | } | ||
1012 | |||
1013 | void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm) | ||
1014 | { | ||
1015 | if (dlm->dlm_debugfs_subroot) | ||
1016 | debugfs_remove(dlm->dlm_debugfs_subroot); | ||
1017 | } | ||
1018 | |||
1019 | /* debugfs root */ | ||
1020 | int dlm_create_debugfs_root(void) | ||
1021 | { | ||
1022 | dlm_debugfs_root = debugfs_create_dir(DLM_DEBUGFS_DIR, NULL); | ||
1023 | if (!dlm_debugfs_root) { | ||
1024 | mlog_errno(-ENOMEM); | ||
1025 | return -ENOMEM; | ||
1026 | } | ||
1027 | return 0; | ||
1028 | } | ||
1029 | |||
1030 | void dlm_destroy_debugfs_root(void) | ||
1031 | { | ||
1032 | if (dlm_debugfs_root) | ||
1033 | debugfs_remove(dlm_debugfs_root); | ||
1034 | } | ||
1035 | #endif /* CONFIG_DEBUG_FS */ | ||
diff --git a/fs/ocfs2/dlm/dlmdebug.h b/fs/ocfs2/dlm/dlmdebug.h new file mode 100644 index 000000000000..d34a62a3a625 --- /dev/null +++ b/fs/ocfs2/dlm/dlmdebug.h | |||
@@ -0,0 +1,86 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * dlmdebug.h | ||
5 | * | ||
6 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public | ||
10 | * License as published by the Free Software Foundation; either | ||
11 | * version 2 of the License, or (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
16 | * General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public | ||
19 | * License along with this program; if not, write to the | ||
20 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
21 | * Boston, MA 021110-1307, USA. | ||
22 | * | ||
23 | */ | ||
24 | |||
25 | #ifndef DLMDEBUG_H | ||
26 | #define DLMDEBUG_H | ||
27 | |||
28 | void dlm_print_one_mle(struct dlm_master_list_entry *mle); | ||
29 | |||
30 | #ifdef CONFIG_DEBUG_FS | ||
31 | |||
32 | struct dlm_debug_ctxt { | ||
33 | struct kref debug_refcnt; | ||
34 | struct dentry *debug_state_dentry; | ||
35 | struct dentry *debug_lockres_dentry; | ||
36 | struct dentry *debug_mle_dentry; | ||
37 | struct dentry *debug_purgelist_dentry; | ||
38 | }; | ||
39 | |||
40 | struct debug_buffer { | ||
41 | int len; | ||
42 | char *buf; | ||
43 | }; | ||
44 | |||
45 | struct debug_lockres { | ||
46 | int dl_len; | ||
47 | char *dl_buf; | ||
48 | struct dlm_ctxt *dl_ctxt; | ||
49 | struct dlm_lock_resource *dl_res; | ||
50 | }; | ||
51 | |||
52 | int dlm_debug_init(struct dlm_ctxt *dlm); | ||
53 | void dlm_debug_shutdown(struct dlm_ctxt *dlm); | ||
54 | |||
55 | int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm); | ||
56 | void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm); | ||
57 | |||
58 | int dlm_create_debugfs_root(void); | ||
59 | void dlm_destroy_debugfs_root(void); | ||
60 | |||
61 | #else | ||
62 | |||
63 | static int dlm_debug_init(struct dlm_ctxt *dlm) | ||
64 | { | ||
65 | return 0; | ||
66 | } | ||
67 | static void dlm_debug_shutdown(struct dlm_ctxt *dlm) | ||
68 | { | ||
69 | } | ||
70 | static int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm) | ||
71 | { | ||
72 | return 0; | ||
73 | } | ||
74 | static void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm) | ||
75 | { | ||
76 | } | ||
77 | static int dlm_create_debugfs_root(void) | ||
78 | { | ||
79 | return 0; | ||
80 | } | ||
81 | static void dlm_destroy_debugfs_root(void) | ||
82 | { | ||
83 | } | ||
84 | |||
85 | #endif /* CONFIG_DEBUG_FS */ | ||
86 | #endif /* DLMDEBUG_H */ | ||
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 0879d86113e3..63f8125824e8 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <linux/spinlock.h> | 33 | #include <linux/spinlock.h> |
34 | #include <linux/delay.h> | 34 | #include <linux/delay.h> |
35 | #include <linux/err.h> | 35 | #include <linux/err.h> |
36 | #include <linux/debugfs.h> | ||
36 | 37 | ||
37 | #include "cluster/heartbeat.h" | 38 | #include "cluster/heartbeat.h" |
38 | #include "cluster/nodemanager.h" | 39 | #include "cluster/nodemanager.h" |
@@ -40,8 +41,8 @@ | |||
40 | 41 | ||
41 | #include "dlmapi.h" | 42 | #include "dlmapi.h" |
42 | #include "dlmcommon.h" | 43 | #include "dlmcommon.h" |
43 | |||
44 | #include "dlmdomain.h" | 44 | #include "dlmdomain.h" |
45 | #include "dlmdebug.h" | ||
45 | 46 | ||
46 | #include "dlmver.h" | 47 | #include "dlmver.h" |
47 | 48 | ||
@@ -298,6 +299,8 @@ static int dlm_wait_on_domain_helper(const char *domain) | |||
298 | 299 | ||
299 | static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm) | 300 | static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm) |
300 | { | 301 | { |
302 | dlm_destroy_debugfs_subroot(dlm); | ||
303 | |||
301 | if (dlm->lockres_hash) | 304 | if (dlm->lockres_hash) |
302 | dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES); | 305 | dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES); |
303 | 306 | ||
@@ -395,6 +398,7 @@ static void dlm_destroy_dlm_worker(struct dlm_ctxt *dlm) | |||
395 | static void dlm_complete_dlm_shutdown(struct dlm_ctxt *dlm) | 398 | static void dlm_complete_dlm_shutdown(struct dlm_ctxt *dlm) |
396 | { | 399 | { |
397 | dlm_unregister_domain_handlers(dlm); | 400 | dlm_unregister_domain_handlers(dlm); |
401 | dlm_debug_shutdown(dlm); | ||
398 | dlm_complete_thread(dlm); | 402 | dlm_complete_thread(dlm); |
399 | dlm_complete_recovery_thread(dlm); | 403 | dlm_complete_recovery_thread(dlm); |
400 | dlm_destroy_dlm_worker(dlm); | 404 | dlm_destroy_dlm_worker(dlm); |
@@ -644,6 +648,7 @@ int dlm_shutting_down(struct dlm_ctxt *dlm) | |||
644 | void dlm_unregister_domain(struct dlm_ctxt *dlm) | 648 | void dlm_unregister_domain(struct dlm_ctxt *dlm) |
645 | { | 649 | { |
646 | int leave = 0; | 650 | int leave = 0; |
651 | struct dlm_lock_resource *res; | ||
647 | 652 | ||
648 | spin_lock(&dlm_domain_lock); | 653 | spin_lock(&dlm_domain_lock); |
649 | BUG_ON(dlm->dlm_state != DLM_CTXT_JOINED); | 654 | BUG_ON(dlm->dlm_state != DLM_CTXT_JOINED); |
@@ -673,6 +678,15 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm) | |||
673 | msleep(500); | 678 | msleep(500); |
674 | mlog(0, "%s: more migration to do\n", dlm->name); | 679 | mlog(0, "%s: more migration to do\n", dlm->name); |
675 | } | 680 | } |
681 | |||
682 | /* This list should be empty. If not, print remaining lockres */ | ||
683 | if (!list_empty(&dlm->tracking_list)) { | ||
684 | mlog(ML_ERROR, "Following lockres' are still on the " | ||
685 | "tracking list:\n"); | ||
686 | list_for_each_entry(res, &dlm->tracking_list, tracking) | ||
687 | dlm_print_one_lock_resource(res); | ||
688 | } | ||
689 | |||
676 | dlm_mark_domain_leaving(dlm); | 690 | dlm_mark_domain_leaving(dlm); |
677 | dlm_leave_domain(dlm); | 691 | dlm_leave_domain(dlm); |
678 | dlm_complete_dlm_shutdown(dlm); | 692 | dlm_complete_dlm_shutdown(dlm); |
@@ -1405,6 +1419,12 @@ static int dlm_join_domain(struct dlm_ctxt *dlm) | |||
1405 | goto bail; | 1419 | goto bail; |
1406 | } | 1420 | } |
1407 | 1421 | ||
1422 | status = dlm_debug_init(dlm); | ||
1423 | if (status < 0) { | ||
1424 | mlog_errno(status); | ||
1425 | goto bail; | ||
1426 | } | ||
1427 | |||
1408 | status = dlm_launch_thread(dlm); | 1428 | status = dlm_launch_thread(dlm); |
1409 | if (status < 0) { | 1429 | if (status < 0) { |
1410 | mlog_errno(status); | 1430 | mlog_errno(status); |
@@ -1472,6 +1492,7 @@ bail: | |||
1472 | 1492 | ||
1473 | if (status) { | 1493 | if (status) { |
1474 | dlm_unregister_domain_handlers(dlm); | 1494 | dlm_unregister_domain_handlers(dlm); |
1495 | dlm_debug_shutdown(dlm); | ||
1475 | dlm_complete_thread(dlm); | 1496 | dlm_complete_thread(dlm); |
1476 | dlm_complete_recovery_thread(dlm); | 1497 | dlm_complete_recovery_thread(dlm); |
1477 | dlm_destroy_dlm_worker(dlm); | 1498 | dlm_destroy_dlm_worker(dlm); |
@@ -1484,6 +1505,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, | |||
1484 | u32 key) | 1505 | u32 key) |
1485 | { | 1506 | { |
1486 | int i; | 1507 | int i; |
1508 | int ret; | ||
1487 | struct dlm_ctxt *dlm = NULL; | 1509 | struct dlm_ctxt *dlm = NULL; |
1488 | 1510 | ||
1489 | dlm = kzalloc(sizeof(*dlm), GFP_KERNEL); | 1511 | dlm = kzalloc(sizeof(*dlm), GFP_KERNEL); |
@@ -1516,6 +1538,15 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, | |||
1516 | dlm->key = key; | 1538 | dlm->key = key; |
1517 | dlm->node_num = o2nm_this_node(); | 1539 | dlm->node_num = o2nm_this_node(); |
1518 | 1540 | ||
1541 | ret = dlm_create_debugfs_subroot(dlm); | ||
1542 | if (ret < 0) { | ||
1543 | dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES); | ||
1544 | kfree(dlm->name); | ||
1545 | kfree(dlm); | ||
1546 | dlm = NULL; | ||
1547 | goto leave; | ||
1548 | } | ||
1549 | |||
1519 | spin_lock_init(&dlm->spinlock); | 1550 | spin_lock_init(&dlm->spinlock); |
1520 | spin_lock_init(&dlm->master_lock); | 1551 | spin_lock_init(&dlm->master_lock); |
1521 | spin_lock_init(&dlm->ast_lock); | 1552 | spin_lock_init(&dlm->ast_lock); |
@@ -1526,6 +1557,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, | |||
1526 | INIT_LIST_HEAD(&dlm->reco.node_data); | 1557 | INIT_LIST_HEAD(&dlm->reco.node_data); |
1527 | INIT_LIST_HEAD(&dlm->purge_list); | 1558 | INIT_LIST_HEAD(&dlm->purge_list); |
1528 | INIT_LIST_HEAD(&dlm->dlm_domain_handlers); | 1559 | INIT_LIST_HEAD(&dlm->dlm_domain_handlers); |
1560 | INIT_LIST_HEAD(&dlm->tracking_list); | ||
1529 | dlm->reco.state = 0; | 1561 | dlm->reco.state = 0; |
1530 | 1562 | ||
1531 | INIT_LIST_HEAD(&dlm->pending_asts); | 1563 | INIT_LIST_HEAD(&dlm->pending_asts); |
@@ -1816,21 +1848,49 @@ static int __init dlm_init(void) | |||
1816 | dlm_print_version(); | 1848 | dlm_print_version(); |
1817 | 1849 | ||
1818 | status = dlm_init_mle_cache(); | 1850 | status = dlm_init_mle_cache(); |
1819 | if (status) | 1851 | if (status) { |
1820 | return -1; | 1852 | mlog(ML_ERROR, "Could not create o2dlm_mle slabcache\n"); |
1853 | goto error; | ||
1854 | } | ||
1855 | |||
1856 | status = dlm_init_master_caches(); | ||
1857 | if (status) { | ||
1858 | mlog(ML_ERROR, "Could not create o2dlm_lockres and " | ||
1859 | "o2dlm_lockname slabcaches\n"); | ||
1860 | goto error; | ||
1861 | } | ||
1862 | |||
1863 | status = dlm_init_lock_cache(); | ||
1864 | if (status) { | ||
1865 | mlog(ML_ERROR, "Count not create o2dlm_lock slabcache\n"); | ||
1866 | goto error; | ||
1867 | } | ||
1821 | 1868 | ||
1822 | status = dlm_register_net_handlers(); | 1869 | status = dlm_register_net_handlers(); |
1823 | if (status) { | 1870 | if (status) { |
1824 | dlm_destroy_mle_cache(); | 1871 | mlog(ML_ERROR, "Unable to register network handlers\n"); |
1825 | return -1; | 1872 | goto error; |
1826 | } | 1873 | } |
1827 | 1874 | ||
1875 | status = dlm_create_debugfs_root(); | ||
1876 | if (status) | ||
1877 | goto error; | ||
1878 | |||
1828 | return 0; | 1879 | return 0; |
1880 | error: | ||
1881 | dlm_unregister_net_handlers(); | ||
1882 | dlm_destroy_lock_cache(); | ||
1883 | dlm_destroy_master_caches(); | ||
1884 | dlm_destroy_mle_cache(); | ||
1885 | return -1; | ||
1829 | } | 1886 | } |
1830 | 1887 | ||
1831 | static void __exit dlm_exit (void) | 1888 | static void __exit dlm_exit (void) |
1832 | { | 1889 | { |
1890 | dlm_destroy_debugfs_root(); | ||
1833 | dlm_unregister_net_handlers(); | 1891 | dlm_unregister_net_handlers(); |
1892 | dlm_destroy_lock_cache(); | ||
1893 | dlm_destroy_master_caches(); | ||
1834 | dlm_destroy_mle_cache(); | 1894 | dlm_destroy_mle_cache(); |
1835 | } | 1895 | } |
1836 | 1896 | ||
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index 52578d907d9a..83a9f2972ac8 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c | |||
@@ -53,6 +53,8 @@ | |||
53 | #define MLOG_MASK_PREFIX ML_DLM | 53 | #define MLOG_MASK_PREFIX ML_DLM |
54 | #include "cluster/masklog.h" | 54 | #include "cluster/masklog.h" |
55 | 55 | ||
56 | static struct kmem_cache *dlm_lock_cache = NULL; | ||
57 | |||
56 | static DEFINE_SPINLOCK(dlm_cookie_lock); | 58 | static DEFINE_SPINLOCK(dlm_cookie_lock); |
57 | static u64 dlm_next_cookie = 1; | 59 | static u64 dlm_next_cookie = 1; |
58 | 60 | ||
@@ -64,6 +66,22 @@ static void dlm_init_lock(struct dlm_lock *newlock, int type, | |||
64 | static void dlm_lock_release(struct kref *kref); | 66 | static void dlm_lock_release(struct kref *kref); |
65 | static void dlm_lock_detach_lockres(struct dlm_lock *lock); | 67 | static void dlm_lock_detach_lockres(struct dlm_lock *lock); |
66 | 68 | ||
69 | int dlm_init_lock_cache(void) | ||
70 | { | ||
71 | dlm_lock_cache = kmem_cache_create("o2dlm_lock", | ||
72 | sizeof(struct dlm_lock), | ||
73 | 0, SLAB_HWCACHE_ALIGN, NULL); | ||
74 | if (dlm_lock_cache == NULL) | ||
75 | return -ENOMEM; | ||
76 | return 0; | ||
77 | } | ||
78 | |||
79 | void dlm_destroy_lock_cache(void) | ||
80 | { | ||
81 | if (dlm_lock_cache) | ||
82 | kmem_cache_destroy(dlm_lock_cache); | ||
83 | } | ||
84 | |||
67 | /* Tell us whether we can grant a new lock request. | 85 | /* Tell us whether we can grant a new lock request. |
68 | * locking: | 86 | * locking: |
69 | * caller needs: res->spinlock | 87 | * caller needs: res->spinlock |
@@ -353,7 +371,7 @@ static void dlm_lock_release(struct kref *kref) | |||
353 | mlog(0, "freeing kernel-allocated lksb\n"); | 371 | mlog(0, "freeing kernel-allocated lksb\n"); |
354 | kfree(lock->lksb); | 372 | kfree(lock->lksb); |
355 | } | 373 | } |
356 | kfree(lock); | 374 | kmem_cache_free(dlm_lock_cache, lock); |
357 | } | 375 | } |
358 | 376 | ||
359 | /* associate a lock with it's lockres, getting a ref on the lockres */ | 377 | /* associate a lock with it's lockres, getting a ref on the lockres */ |
@@ -412,7 +430,7 @@ struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie, | |||
412 | struct dlm_lock *lock; | 430 | struct dlm_lock *lock; |
413 | int kernel_allocated = 0; | 431 | int kernel_allocated = 0; |
414 | 432 | ||
415 | lock = kzalloc(sizeof(*lock), GFP_NOFS); | 433 | lock = (struct dlm_lock *) kmem_cache_zalloc(dlm_lock_cache, GFP_NOFS); |
416 | if (!lock) | 434 | if (!lock) |
417 | return NULL; | 435 | return NULL; |
418 | 436 | ||
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index ea6b89577860..efc015c6128a 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
@@ -48,47 +48,11 @@ | |||
48 | #include "dlmapi.h" | 48 | #include "dlmapi.h" |
49 | #include "dlmcommon.h" | 49 | #include "dlmcommon.h" |
50 | #include "dlmdomain.h" | 50 | #include "dlmdomain.h" |
51 | #include "dlmdebug.h" | ||
51 | 52 | ||
52 | #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_MASTER) | 53 | #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_MASTER) |
53 | #include "cluster/masklog.h" | 54 | #include "cluster/masklog.h" |
54 | 55 | ||
55 | enum dlm_mle_type { | ||
56 | DLM_MLE_BLOCK, | ||
57 | DLM_MLE_MASTER, | ||
58 | DLM_MLE_MIGRATION | ||
59 | }; | ||
60 | |||
61 | struct dlm_lock_name | ||
62 | { | ||
63 | u8 len; | ||
64 | u8 name[DLM_LOCKID_NAME_MAX]; | ||
65 | }; | ||
66 | |||
67 | struct dlm_master_list_entry | ||
68 | { | ||
69 | struct list_head list; | ||
70 | struct list_head hb_events; | ||
71 | struct dlm_ctxt *dlm; | ||
72 | spinlock_t spinlock; | ||
73 | wait_queue_head_t wq; | ||
74 | atomic_t woken; | ||
75 | struct kref mle_refs; | ||
76 | int inuse; | ||
77 | unsigned long maybe_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
78 | unsigned long vote_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
79 | unsigned long response_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
80 | unsigned long node_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
81 | u8 master; | ||
82 | u8 new_master; | ||
83 | enum dlm_mle_type type; | ||
84 | struct o2hb_callback_func mle_hb_up; | ||
85 | struct o2hb_callback_func mle_hb_down; | ||
86 | union { | ||
87 | struct dlm_lock_resource *res; | ||
88 | struct dlm_lock_name name; | ||
89 | } u; | ||
90 | }; | ||
91 | |||
92 | static void dlm_mle_node_down(struct dlm_ctxt *dlm, | 56 | static void dlm_mle_node_down(struct dlm_ctxt *dlm, |
93 | struct dlm_master_list_entry *mle, | 57 | struct dlm_master_list_entry *mle, |
94 | struct o2nm_node *node, | 58 | struct o2nm_node *node, |
@@ -128,98 +92,10 @@ static inline int dlm_mle_equal(struct dlm_ctxt *dlm, | |||
128 | return 1; | 92 | return 1; |
129 | } | 93 | } |
130 | 94 | ||
131 | #define dlm_print_nodemap(m) _dlm_print_nodemap(m,#m) | 95 | static struct kmem_cache *dlm_lockres_cache = NULL; |
132 | static void _dlm_print_nodemap(unsigned long *map, const char *mapname) | 96 | static struct kmem_cache *dlm_lockname_cache = NULL; |
133 | { | ||
134 | int i; | ||
135 | printk("%s=[ ", mapname); | ||
136 | for (i=0; i<O2NM_MAX_NODES; i++) | ||
137 | if (test_bit(i, map)) | ||
138 | printk("%d ", i); | ||
139 | printk("]"); | ||
140 | } | ||
141 | |||
142 | static void dlm_print_one_mle(struct dlm_master_list_entry *mle) | ||
143 | { | ||
144 | int refs; | ||
145 | char *type; | ||
146 | char attached; | ||
147 | u8 master; | ||
148 | unsigned int namelen; | ||
149 | const char *name; | ||
150 | struct kref *k; | ||
151 | unsigned long *maybe = mle->maybe_map, | ||
152 | *vote = mle->vote_map, | ||
153 | *resp = mle->response_map, | ||
154 | *node = mle->node_map; | ||
155 | |||
156 | k = &mle->mle_refs; | ||
157 | if (mle->type == DLM_MLE_BLOCK) | ||
158 | type = "BLK"; | ||
159 | else if (mle->type == DLM_MLE_MASTER) | ||
160 | type = "MAS"; | ||
161 | else | ||
162 | type = "MIG"; | ||
163 | refs = atomic_read(&k->refcount); | ||
164 | master = mle->master; | ||
165 | attached = (list_empty(&mle->hb_events) ? 'N' : 'Y'); | ||
166 | |||
167 | if (mle->type != DLM_MLE_MASTER) { | ||
168 | namelen = mle->u.name.len; | ||
169 | name = mle->u.name.name; | ||
170 | } else { | ||
171 | namelen = mle->u.res->lockname.len; | ||
172 | name = mle->u.res->lockname.name; | ||
173 | } | ||
174 | |||
175 | mlog(ML_NOTICE, "%.*s: %3s refs=%3d mas=%3u new=%3u evt=%c inuse=%d ", | ||
176 | namelen, name, type, refs, master, mle->new_master, attached, | ||
177 | mle->inuse); | ||
178 | dlm_print_nodemap(maybe); | ||
179 | printk(", "); | ||
180 | dlm_print_nodemap(vote); | ||
181 | printk(", "); | ||
182 | dlm_print_nodemap(resp); | ||
183 | printk(", "); | ||
184 | dlm_print_nodemap(node); | ||
185 | printk(", "); | ||
186 | printk("\n"); | ||
187 | } | ||
188 | |||
189 | #if 0 | ||
190 | /* Code here is included but defined out as it aids debugging */ | ||
191 | |||
192 | static void dlm_dump_mles(struct dlm_ctxt *dlm) | ||
193 | { | ||
194 | struct dlm_master_list_entry *mle; | ||
195 | |||
196 | mlog(ML_NOTICE, "dumping all mles for domain %s:\n", dlm->name); | ||
197 | spin_lock(&dlm->master_lock); | ||
198 | list_for_each_entry(mle, &dlm->master_list, list) | ||
199 | dlm_print_one_mle(mle); | ||
200 | spin_unlock(&dlm->master_lock); | ||
201 | } | ||
202 | |||
203 | int dlm_dump_all_mles(const char __user *data, unsigned int len) | ||
204 | { | ||
205 | struct dlm_ctxt *dlm; | ||
206 | |||
207 | spin_lock(&dlm_domain_lock); | ||
208 | list_for_each_entry(dlm, &dlm_domains, list) { | ||
209 | mlog(ML_NOTICE, "found dlm: %p, name=%s\n", dlm, dlm->name); | ||
210 | dlm_dump_mles(dlm); | ||
211 | } | ||
212 | spin_unlock(&dlm_domain_lock); | ||
213 | return len; | ||
214 | } | ||
215 | EXPORT_SYMBOL_GPL(dlm_dump_all_mles); | ||
216 | |||
217 | #endif /* 0 */ | ||
218 | |||
219 | |||
220 | static struct kmem_cache *dlm_mle_cache = NULL; | 97 | static struct kmem_cache *dlm_mle_cache = NULL; |
221 | 98 | ||
222 | |||
223 | static void dlm_mle_release(struct kref *kref); | 99 | static void dlm_mle_release(struct kref *kref); |
224 | static void dlm_init_mle(struct dlm_master_list_entry *mle, | 100 | static void dlm_init_mle(struct dlm_master_list_entry *mle, |
225 | enum dlm_mle_type type, | 101 | enum dlm_mle_type type, |
@@ -507,7 +383,7 @@ static void dlm_mle_node_up(struct dlm_ctxt *dlm, | |||
507 | 383 | ||
508 | int dlm_init_mle_cache(void) | 384 | int dlm_init_mle_cache(void) |
509 | { | 385 | { |
510 | dlm_mle_cache = kmem_cache_create("dlm_mle_cache", | 386 | dlm_mle_cache = kmem_cache_create("o2dlm_mle", |
511 | sizeof(struct dlm_master_list_entry), | 387 | sizeof(struct dlm_master_list_entry), |
512 | 0, SLAB_HWCACHE_ALIGN, | 388 | 0, SLAB_HWCACHE_ALIGN, |
513 | NULL); | 389 | NULL); |
@@ -560,6 +436,35 @@ static void dlm_mle_release(struct kref *kref) | |||
560 | * LOCK RESOURCE FUNCTIONS | 436 | * LOCK RESOURCE FUNCTIONS |
561 | */ | 437 | */ |
562 | 438 | ||
439 | int dlm_init_master_caches(void) | ||
440 | { | ||
441 | dlm_lockres_cache = kmem_cache_create("o2dlm_lockres", | ||
442 | sizeof(struct dlm_lock_resource), | ||
443 | 0, SLAB_HWCACHE_ALIGN, NULL); | ||
444 | if (!dlm_lockres_cache) | ||
445 | goto bail; | ||
446 | |||
447 | dlm_lockname_cache = kmem_cache_create("o2dlm_lockname", | ||
448 | DLM_LOCKID_NAME_MAX, 0, | ||
449 | SLAB_HWCACHE_ALIGN, NULL); | ||
450 | if (!dlm_lockname_cache) | ||
451 | goto bail; | ||
452 | |||
453 | return 0; | ||
454 | bail: | ||
455 | dlm_destroy_master_caches(); | ||
456 | return -ENOMEM; | ||
457 | } | ||
458 | |||
459 | void dlm_destroy_master_caches(void) | ||
460 | { | ||
461 | if (dlm_lockname_cache) | ||
462 | kmem_cache_destroy(dlm_lockname_cache); | ||
463 | |||
464 | if (dlm_lockres_cache) | ||
465 | kmem_cache_destroy(dlm_lockres_cache); | ||
466 | } | ||
467 | |||
563 | static void dlm_set_lockres_owner(struct dlm_ctxt *dlm, | 468 | static void dlm_set_lockres_owner(struct dlm_ctxt *dlm, |
564 | struct dlm_lock_resource *res, | 469 | struct dlm_lock_resource *res, |
565 | u8 owner) | 470 | u8 owner) |
@@ -610,6 +515,14 @@ static void dlm_lockres_release(struct kref *kref) | |||
610 | mlog(0, "destroying lockres %.*s\n", res->lockname.len, | 515 | mlog(0, "destroying lockres %.*s\n", res->lockname.len, |
611 | res->lockname.name); | 516 | res->lockname.name); |
612 | 517 | ||
518 | if (!list_empty(&res->tracking)) | ||
519 | list_del_init(&res->tracking); | ||
520 | else { | ||
521 | mlog(ML_ERROR, "Resource %.*s not on the Tracking list\n", | ||
522 | res->lockname.len, res->lockname.name); | ||
523 | dlm_print_one_lock_resource(res); | ||
524 | } | ||
525 | |||
613 | if (!hlist_unhashed(&res->hash_node) || | 526 | if (!hlist_unhashed(&res->hash_node) || |
614 | !list_empty(&res->granted) || | 527 | !list_empty(&res->granted) || |
615 | !list_empty(&res->converting) || | 528 | !list_empty(&res->converting) || |
@@ -642,9 +555,9 @@ static void dlm_lockres_release(struct kref *kref) | |||
642 | BUG_ON(!list_empty(&res->recovering)); | 555 | BUG_ON(!list_empty(&res->recovering)); |
643 | BUG_ON(!list_empty(&res->purge)); | 556 | BUG_ON(!list_empty(&res->purge)); |
644 | 557 | ||
645 | kfree(res->lockname.name); | 558 | kmem_cache_free(dlm_lockname_cache, (void *)res->lockname.name); |
646 | 559 | ||
647 | kfree(res); | 560 | kmem_cache_free(dlm_lockres_cache, res); |
648 | } | 561 | } |
649 | 562 | ||
650 | void dlm_lockres_put(struct dlm_lock_resource *res) | 563 | void dlm_lockres_put(struct dlm_lock_resource *res) |
@@ -677,6 +590,7 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm, | |||
677 | INIT_LIST_HEAD(&res->dirty); | 590 | INIT_LIST_HEAD(&res->dirty); |
678 | INIT_LIST_HEAD(&res->recovering); | 591 | INIT_LIST_HEAD(&res->recovering); |
679 | INIT_LIST_HEAD(&res->purge); | 592 | INIT_LIST_HEAD(&res->purge); |
593 | INIT_LIST_HEAD(&res->tracking); | ||
680 | atomic_set(&res->asts_reserved, 0); | 594 | atomic_set(&res->asts_reserved, 0); |
681 | res->migration_pending = 0; | 595 | res->migration_pending = 0; |
682 | res->inflight_locks = 0; | 596 | res->inflight_locks = 0; |
@@ -692,6 +606,8 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm, | |||
692 | 606 | ||
693 | res->last_used = 0; | 607 | res->last_used = 0; |
694 | 608 | ||
609 | list_add_tail(&res->tracking, &dlm->tracking_list); | ||
610 | |||
695 | memset(res->lvb, 0, DLM_LVB_LEN); | 611 | memset(res->lvb, 0, DLM_LVB_LEN); |
696 | memset(res->refmap, 0, sizeof(res->refmap)); | 612 | memset(res->refmap, 0, sizeof(res->refmap)); |
697 | } | 613 | } |
@@ -700,20 +616,28 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm, | |||
700 | const char *name, | 616 | const char *name, |
701 | unsigned int namelen) | 617 | unsigned int namelen) |
702 | { | 618 | { |
703 | struct dlm_lock_resource *res; | 619 | struct dlm_lock_resource *res = NULL; |
704 | 620 | ||
705 | res = kmalloc(sizeof(struct dlm_lock_resource), GFP_NOFS); | 621 | res = (struct dlm_lock_resource *) |
622 | kmem_cache_zalloc(dlm_lockres_cache, GFP_NOFS); | ||
706 | if (!res) | 623 | if (!res) |
707 | return NULL; | 624 | goto error; |
708 | 625 | ||
709 | res->lockname.name = kmalloc(namelen, GFP_NOFS); | 626 | res->lockname.name = (char *) |
710 | if (!res->lockname.name) { | 627 | kmem_cache_zalloc(dlm_lockname_cache, GFP_NOFS); |
711 | kfree(res); | 628 | if (!res->lockname.name) |
712 | return NULL; | 629 | goto error; |
713 | } | ||
714 | 630 | ||
715 | dlm_init_lockres(dlm, res, name, namelen); | 631 | dlm_init_lockres(dlm, res, name, namelen); |
716 | return res; | 632 | return res; |
633 | |||
634 | error: | ||
635 | if (res && res->lockname.name) | ||
636 | kmem_cache_free(dlm_lockname_cache, (void *)res->lockname.name); | ||
637 | |||
638 | if (res) | ||
639 | kmem_cache_free(dlm_lockres_cache, res); | ||
640 | return NULL; | ||
717 | } | 641 | } |
718 | 642 | ||
719 | void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, | 643 | void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, |
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 1f1873bf41fb..394d25a131a5 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
@@ -27,18 +27,11 @@ | |||
27 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
28 | #include <linux/highmem.h> | 28 | #include <linux/highmem.h> |
29 | #include <linux/mm.h> | 29 | #include <linux/mm.h> |
30 | #include <linux/crc32.h> | ||
31 | #include <linux/kthread.h> | 30 | #include <linux/kthread.h> |
32 | #include <linux/pagemap.h> | 31 | #include <linux/pagemap.h> |
33 | #include <linux/debugfs.h> | 32 | #include <linux/debugfs.h> |
34 | #include <linux/seq_file.h> | 33 | #include <linux/seq_file.h> |
35 | 34 | ||
36 | #include <cluster/heartbeat.h> | ||
37 | #include <cluster/nodemanager.h> | ||
38 | #include <cluster/tcp.h> | ||
39 | |||
40 | #include <dlm/dlmapi.h> | ||
41 | |||
42 | #define MLOG_MASK_PREFIX ML_DLM_GLUE | 35 | #define MLOG_MASK_PREFIX ML_DLM_GLUE |
43 | #include <cluster/masklog.h> | 36 | #include <cluster/masklog.h> |
44 | 37 | ||
@@ -53,6 +46,7 @@ | |||
53 | #include "heartbeat.h" | 46 | #include "heartbeat.h" |
54 | #include "inode.h" | 47 | #include "inode.h" |
55 | #include "journal.h" | 48 | #include "journal.h" |
49 | #include "stackglue.h" | ||
56 | #include "slot_map.h" | 50 | #include "slot_map.h" |
57 | #include "super.h" | 51 | #include "super.h" |
58 | #include "uptodate.h" | 52 | #include "uptodate.h" |
@@ -113,7 +107,8 @@ static void ocfs2_dump_meta_lvb_info(u64 level, | |||
113 | unsigned int line, | 107 | unsigned int line, |
114 | struct ocfs2_lock_res *lockres) | 108 | struct ocfs2_lock_res *lockres) |
115 | { | 109 | { |
116 | struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; | 110 | struct ocfs2_meta_lvb *lvb = |
111 | (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); | ||
117 | 112 | ||
118 | mlog(level, "LVB information for %s (called from %s:%u):\n", | 113 | mlog(level, "LVB information for %s (called from %s:%u):\n", |
119 | lockres->l_name, function, line); | 114 | lockres->l_name, function, line); |
@@ -259,31 +254,6 @@ static struct ocfs2_lock_res_ops ocfs2_flock_lops = { | |||
259 | .flags = 0, | 254 | .flags = 0, |
260 | }; | 255 | }; |
261 | 256 | ||
262 | /* | ||
263 | * This is the filesystem locking protocol version. | ||
264 | * | ||
265 | * Whenever the filesystem does new things with locks (adds or removes a | ||
266 | * lock, orders them differently, does different things underneath a lock), | ||
267 | * the version must be changed. The protocol is negotiated when joining | ||
268 | * the dlm domain. A node may join the domain if its major version is | ||
269 | * identical to all other nodes and its minor version is greater than | ||
270 | * or equal to all other nodes. When its minor version is greater than | ||
271 | * the other nodes, it will run at the minor version specified by the | ||
272 | * other nodes. | ||
273 | * | ||
274 | * If a locking change is made that will not be compatible with older | ||
275 | * versions, the major number must be increased and the minor version set | ||
276 | * to zero. If a change merely adds a behavior that can be disabled when | ||
277 | * speaking to older versions, the minor version must be increased. If a | ||
278 | * change adds a fully backwards compatible change (eg, LVB changes that | ||
279 | * are just ignored by older versions), the version does not need to be | ||
280 | * updated. | ||
281 | */ | ||
282 | const struct dlm_protocol_version ocfs2_locking_protocol = { | ||
283 | .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, | ||
284 | .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, | ||
285 | }; | ||
286 | |||
287 | static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) | 257 | static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) |
288 | { | 258 | { |
289 | return lockres->l_type == OCFS2_LOCK_TYPE_META || | 259 | return lockres->l_type == OCFS2_LOCK_TYPE_META || |
@@ -316,7 +286,7 @@ static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *l | |||
316 | static int ocfs2_lock_create(struct ocfs2_super *osb, | 286 | static int ocfs2_lock_create(struct ocfs2_super *osb, |
317 | struct ocfs2_lock_res *lockres, | 287 | struct ocfs2_lock_res *lockres, |
318 | int level, | 288 | int level, |
319 | int dlm_flags); | 289 | u32 dlm_flags); |
320 | static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, | 290 | static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, |
321 | int wanted); | 291 | int wanted); |
322 | static void ocfs2_cluster_unlock(struct ocfs2_super *osb, | 292 | static void ocfs2_cluster_unlock(struct ocfs2_super *osb, |
@@ -330,10 +300,9 @@ static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, | |||
330 | struct ocfs2_lock_res *lockres); | 300 | struct ocfs2_lock_res *lockres); |
331 | static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, | 301 | static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, |
332 | int convert); | 302 | int convert); |
333 | #define ocfs2_log_dlm_error(_func, _stat, _lockres) do { \ | 303 | #define ocfs2_log_dlm_error(_func, _err, _lockres) do { \ |
334 | mlog(ML_ERROR, "Dlm error \"%s\" while calling %s on " \ | 304 | mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n", \ |
335 | "resource %s: %s\n", dlm_errname(_stat), _func, \ | 305 | _err, _func, _lockres->l_name); \ |
336 | _lockres->l_name, dlm_errmsg(_stat)); \ | ||
337 | } while (0) | 306 | } while (0) |
338 | static int ocfs2_downconvert_thread(void *arg); | 307 | static int ocfs2_downconvert_thread(void *arg); |
339 | static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, | 308 | static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, |
@@ -342,12 +311,13 @@ static int ocfs2_inode_lock_update(struct inode *inode, | |||
342 | struct buffer_head **bh); | 311 | struct buffer_head **bh); |
343 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); | 312 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); |
344 | static inline int ocfs2_highest_compat_lock_level(int level); | 313 | static inline int ocfs2_highest_compat_lock_level(int level); |
345 | static void ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, | 314 | static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, |
346 | int new_level); | 315 | int new_level); |
347 | static int ocfs2_downconvert_lock(struct ocfs2_super *osb, | 316 | static int ocfs2_downconvert_lock(struct ocfs2_super *osb, |
348 | struct ocfs2_lock_res *lockres, | 317 | struct ocfs2_lock_res *lockres, |
349 | int new_level, | 318 | int new_level, |
350 | int lvb); | 319 | int lvb, |
320 | unsigned int generation); | ||
351 | static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, | 321 | static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, |
352 | struct ocfs2_lock_res *lockres); | 322 | struct ocfs2_lock_res *lockres); |
353 | static int ocfs2_cancel_convert(struct ocfs2_super *osb, | 323 | static int ocfs2_cancel_convert(struct ocfs2_super *osb, |
@@ -406,9 +376,9 @@ static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, | |||
406 | res->l_ops = ops; | 376 | res->l_ops = ops; |
407 | res->l_priv = priv; | 377 | res->l_priv = priv; |
408 | 378 | ||
409 | res->l_level = LKM_IVMODE; | 379 | res->l_level = DLM_LOCK_IV; |
410 | res->l_requested = LKM_IVMODE; | 380 | res->l_requested = DLM_LOCK_IV; |
411 | res->l_blocking = LKM_IVMODE; | 381 | res->l_blocking = DLM_LOCK_IV; |
412 | res->l_action = OCFS2_AST_INVALID; | 382 | res->l_action = OCFS2_AST_INVALID; |
413 | res->l_unlock_action = OCFS2_UNLOCK_INVALID; | 383 | res->l_unlock_action = OCFS2_UNLOCK_INVALID; |
414 | 384 | ||
@@ -604,10 +574,10 @@ static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres, | |||
604 | BUG_ON(!lockres); | 574 | BUG_ON(!lockres); |
605 | 575 | ||
606 | switch(level) { | 576 | switch(level) { |
607 | case LKM_EXMODE: | 577 | case DLM_LOCK_EX: |
608 | lockres->l_ex_holders++; | 578 | lockres->l_ex_holders++; |
609 | break; | 579 | break; |
610 | case LKM_PRMODE: | 580 | case DLM_LOCK_PR: |
611 | lockres->l_ro_holders++; | 581 | lockres->l_ro_holders++; |
612 | break; | 582 | break; |
613 | default: | 583 | default: |
@@ -625,11 +595,11 @@ static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres, | |||
625 | BUG_ON(!lockres); | 595 | BUG_ON(!lockres); |
626 | 596 | ||
627 | switch(level) { | 597 | switch(level) { |
628 | case LKM_EXMODE: | 598 | case DLM_LOCK_EX: |
629 | BUG_ON(!lockres->l_ex_holders); | 599 | BUG_ON(!lockres->l_ex_holders); |
630 | lockres->l_ex_holders--; | 600 | lockres->l_ex_holders--; |
631 | break; | 601 | break; |
632 | case LKM_PRMODE: | 602 | case DLM_LOCK_PR: |
633 | BUG_ON(!lockres->l_ro_holders); | 603 | BUG_ON(!lockres->l_ro_holders); |
634 | lockres->l_ro_holders--; | 604 | lockres->l_ro_holders--; |
635 | break; | 605 | break; |
@@ -644,12 +614,12 @@ static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres, | |||
644 | * lock types are added. */ | 614 | * lock types are added. */ |
645 | static inline int ocfs2_highest_compat_lock_level(int level) | 615 | static inline int ocfs2_highest_compat_lock_level(int level) |
646 | { | 616 | { |
647 | int new_level = LKM_EXMODE; | 617 | int new_level = DLM_LOCK_EX; |
648 | 618 | ||
649 | if (level == LKM_EXMODE) | 619 | if (level == DLM_LOCK_EX) |
650 | new_level = LKM_NLMODE; | 620 | new_level = DLM_LOCK_NL; |
651 | else if (level == LKM_PRMODE) | 621 | else if (level == DLM_LOCK_PR) |
652 | new_level = LKM_PRMODE; | 622 | new_level = DLM_LOCK_PR; |
653 | return new_level; | 623 | return new_level; |
654 | } | 624 | } |
655 | 625 | ||
@@ -688,12 +658,12 @@ static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res | |||
688 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); | 658 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); |
689 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); | 659 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); |
690 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); | 660 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); |
691 | BUG_ON(lockres->l_blocking <= LKM_NLMODE); | 661 | BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); |
692 | 662 | ||
693 | lockres->l_level = lockres->l_requested; | 663 | lockres->l_level = lockres->l_requested; |
694 | if (lockres->l_level <= | 664 | if (lockres->l_level <= |
695 | ocfs2_highest_compat_lock_level(lockres->l_blocking)) { | 665 | ocfs2_highest_compat_lock_level(lockres->l_blocking)) { |
696 | lockres->l_blocking = LKM_NLMODE; | 666 | lockres->l_blocking = DLM_LOCK_NL; |
697 | lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); | 667 | lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); |
698 | } | 668 | } |
699 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 669 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); |
@@ -712,7 +682,7 @@ static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lo | |||
712 | * information is already up to data. Convert from NL to | 682 | * information is already up to data. Convert from NL to |
713 | * *anything* however should mark ourselves as needing an | 683 | * *anything* however should mark ourselves as needing an |
714 | * update */ | 684 | * update */ |
715 | if (lockres->l_level == LKM_NLMODE && | 685 | if (lockres->l_level == DLM_LOCK_NL && |
716 | lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) | 686 | lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) |
717 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); | 687 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); |
718 | 688 | ||
@@ -729,7 +699,7 @@ static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *loc | |||
729 | BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY))); | 699 | BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY))); |
730 | BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); | 700 | BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); |
731 | 701 | ||
732 | if (lockres->l_requested > LKM_NLMODE && | 702 | if (lockres->l_requested > DLM_LOCK_NL && |
733 | !(lockres->l_flags & OCFS2_LOCK_LOCAL) && | 703 | !(lockres->l_flags & OCFS2_LOCK_LOCAL) && |
734 | lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) | 704 | lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) |
735 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); | 705 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); |
@@ -767,6 +737,113 @@ static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, | |||
767 | return needs_downconvert; | 737 | return needs_downconvert; |
768 | } | 738 | } |
769 | 739 | ||
740 | /* | ||
741 | * OCFS2_LOCK_PENDING and l_pending_gen. | ||
742 | * | ||
743 | * Why does OCFS2_LOCK_PENDING exist? To close a race between setting | ||
744 | * OCFS2_LOCK_BUSY and calling ocfs2_dlm_lock(). See ocfs2_unblock_lock() | ||
745 | * for more details on the race. | ||
746 | * | ||
747 | * OCFS2_LOCK_PENDING closes the race quite nicely. However, it introduces | ||
748 | * a race on itself. In o2dlm, we can get the ast before ocfs2_dlm_lock() | ||
749 | * returns. The ast clears OCFS2_LOCK_BUSY, and must therefore clear | ||
750 | * OCFS2_LOCK_PENDING at the same time. When ocfs2_dlm_lock() returns, | ||
751 | * the caller is going to try to clear PENDING again. If nothing else is | ||
752 | * happening, __lockres_clear_pending() sees PENDING is unset and does | ||
753 | * nothing. | ||
754 | * | ||
755 | * But what if another path (eg downconvert thread) has just started a | ||
756 | * new locking action? The other path has re-set PENDING. Our path | ||
757 | * cannot clear PENDING, because that will re-open the original race | ||
758 | * window. | ||
759 | * | ||
760 | * [Example] | ||
761 | * | ||
762 | * ocfs2_meta_lock() | ||
763 | * ocfs2_cluster_lock() | ||
764 | * set BUSY | ||
765 | * set PENDING | ||
766 | * drop l_lock | ||
767 | * ocfs2_dlm_lock() | ||
768 | * ocfs2_locking_ast() ocfs2_downconvert_thread() | ||
769 | * clear PENDING ocfs2_unblock_lock() | ||
770 | * take_l_lock | ||
771 | * !BUSY | ||
772 | * ocfs2_prepare_downconvert() | ||
773 | * set BUSY | ||
774 | * set PENDING | ||
775 | * drop l_lock | ||
776 | * take l_lock | ||
777 | * clear PENDING | ||
778 | * drop l_lock | ||
779 | * <window> | ||
780 | * ocfs2_dlm_lock() | ||
781 | * | ||
782 | * So as you can see, we now have a window where l_lock is not held, | ||
783 | * PENDING is not set, and ocfs2_dlm_lock() has not been called. | ||
784 | * | ||
785 | * The core problem is that ocfs2_cluster_lock() has cleared the PENDING | ||
786 | * set by ocfs2_prepare_downconvert(). That wasn't nice. | ||
787 | * | ||
788 | * To solve this we introduce l_pending_gen. A call to | ||
789 | * lockres_clear_pending() will only do so when it is passed a generation | ||
790 | * number that matches the lockres. lockres_set_pending() will return the | ||
791 | * current generation number. When ocfs2_cluster_lock() goes to clear | ||
792 | * PENDING, it passes the generation it got from set_pending(). In our | ||
793 | * example above, the generation numbers will *not* match. Thus, | ||
794 | * ocfs2_cluster_lock() will not clear the PENDING set by | ||
795 | * ocfs2_prepare_downconvert(). | ||
796 | */ | ||
797 | |||
798 | /* Unlocked version for ocfs2_locking_ast() */ | ||
799 | static void __lockres_clear_pending(struct ocfs2_lock_res *lockres, | ||
800 | unsigned int generation, | ||
801 | struct ocfs2_super *osb) | ||
802 | { | ||
803 | assert_spin_locked(&lockres->l_lock); | ||
804 | |||
805 | /* | ||
806 | * The ast and locking functions can race us here. The winner | ||
807 | * will clear pending, the loser will not. | ||
808 | */ | ||
809 | if (!(lockres->l_flags & OCFS2_LOCK_PENDING) || | ||
810 | (lockres->l_pending_gen != generation)) | ||
811 | return; | ||
812 | |||
813 | lockres_clear_flags(lockres, OCFS2_LOCK_PENDING); | ||
814 | lockres->l_pending_gen++; | ||
815 | |||
816 | /* | ||
817 | * The downconvert thread may have skipped us because we | ||
818 | * were PENDING. Wake it up. | ||
819 | */ | ||
820 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) | ||
821 | ocfs2_wake_downconvert_thread(osb); | ||
822 | } | ||
823 | |||
824 | /* Locked version for callers of ocfs2_dlm_lock() */ | ||
825 | static void lockres_clear_pending(struct ocfs2_lock_res *lockres, | ||
826 | unsigned int generation, | ||
827 | struct ocfs2_super *osb) | ||
828 | { | ||
829 | unsigned long flags; | ||
830 | |||
831 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
832 | __lockres_clear_pending(lockres, generation, osb); | ||
833 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
834 | } | ||
835 | |||
836 | static unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres) | ||
837 | { | ||
838 | assert_spin_locked(&lockres->l_lock); | ||
839 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); | ||
840 | |||
841 | lockres_or_flags(lockres, OCFS2_LOCK_PENDING); | ||
842 | |||
843 | return lockres->l_pending_gen; | ||
844 | } | ||
845 | |||
846 | |||
770 | static void ocfs2_blocking_ast(void *opaque, int level) | 847 | static void ocfs2_blocking_ast(void *opaque, int level) |
771 | { | 848 | { |
772 | struct ocfs2_lock_res *lockres = opaque; | 849 | struct ocfs2_lock_res *lockres = opaque; |
@@ -774,7 +851,7 @@ static void ocfs2_blocking_ast(void *opaque, int level) | |||
774 | int needs_downconvert; | 851 | int needs_downconvert; |
775 | unsigned long flags; | 852 | unsigned long flags; |
776 | 853 | ||
777 | BUG_ON(level <= LKM_NLMODE); | 854 | BUG_ON(level <= DLM_LOCK_NL); |
778 | 855 | ||
779 | mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n", | 856 | mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n", |
780 | lockres->l_name, level, lockres->l_level, | 857 | lockres->l_name, level, lockres->l_level, |
@@ -801,14 +878,22 @@ static void ocfs2_blocking_ast(void *opaque, int level) | |||
801 | static void ocfs2_locking_ast(void *opaque) | 878 | static void ocfs2_locking_ast(void *opaque) |
802 | { | 879 | { |
803 | struct ocfs2_lock_res *lockres = opaque; | 880 | struct ocfs2_lock_res *lockres = opaque; |
804 | struct dlm_lockstatus *lksb = &lockres->l_lksb; | 881 | struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); |
805 | unsigned long flags; | 882 | unsigned long flags; |
883 | int status; | ||
806 | 884 | ||
807 | spin_lock_irqsave(&lockres->l_lock, flags); | 885 | spin_lock_irqsave(&lockres->l_lock, flags); |
808 | 886 | ||
809 | if (lksb->status != DLM_NORMAL) { | 887 | status = ocfs2_dlm_lock_status(&lockres->l_lksb); |
810 | mlog(ML_ERROR, "lockres %s: lksb status value of %u!\n", | 888 | |
811 | lockres->l_name, lksb->status); | 889 | if (status == -EAGAIN) { |
890 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | ||
891 | goto out; | ||
892 | } | ||
893 | |||
894 | if (status) { | ||
895 | mlog(ML_ERROR, "lockres %s: lksb status value of %d!\n", | ||
896 | lockres->l_name, status); | ||
812 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 897 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
813 | return; | 898 | return; |
814 | } | 899 | } |
@@ -831,11 +916,23 @@ static void ocfs2_locking_ast(void *opaque) | |||
831 | lockres->l_unlock_action); | 916 | lockres->l_unlock_action); |
832 | BUG(); | 917 | BUG(); |
833 | } | 918 | } |
834 | 919 | out: | |
835 | /* set it to something invalid so if we get called again we | 920 | /* set it to something invalid so if we get called again we |
836 | * can catch it. */ | 921 | * can catch it. */ |
837 | lockres->l_action = OCFS2_AST_INVALID; | 922 | lockres->l_action = OCFS2_AST_INVALID; |
838 | 923 | ||
924 | /* Did we try to cancel this lock? Clear that state */ | ||
925 | if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) | ||
926 | lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; | ||
927 | |||
928 | /* | ||
929 | * We may have beaten the locking functions here. We certainly | ||
930 | * know that dlm_lock() has been called :-) | ||
931 | * Because we can't have two lock calls in flight at once, we | ||
932 | * can use lockres->l_pending_gen. | ||
933 | */ | ||
934 | __lockres_clear_pending(lockres, lockres->l_pending_gen, osb); | ||
935 | |||
839 | wake_up(&lockres->l_event); | 936 | wake_up(&lockres->l_event); |
840 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 937 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
841 | } | 938 | } |
@@ -865,15 +962,15 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, | |||
865 | static int ocfs2_lock_create(struct ocfs2_super *osb, | 962 | static int ocfs2_lock_create(struct ocfs2_super *osb, |
866 | struct ocfs2_lock_res *lockres, | 963 | struct ocfs2_lock_res *lockres, |
867 | int level, | 964 | int level, |
868 | int dlm_flags) | 965 | u32 dlm_flags) |
869 | { | 966 | { |
870 | int ret = 0; | 967 | int ret = 0; |
871 | enum dlm_status status = DLM_NORMAL; | ||
872 | unsigned long flags; | 968 | unsigned long flags; |
969 | unsigned int gen; | ||
873 | 970 | ||
874 | mlog_entry_void(); | 971 | mlog_entry_void(); |
875 | 972 | ||
876 | mlog(0, "lock %s, level = %d, flags = %d\n", lockres->l_name, level, | 973 | mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level, |
877 | dlm_flags); | 974 | dlm_flags); |
878 | 975 | ||
879 | spin_lock_irqsave(&lockres->l_lock, flags); | 976 | spin_lock_irqsave(&lockres->l_lock, flags); |
@@ -886,24 +983,23 @@ static int ocfs2_lock_create(struct ocfs2_super *osb, | |||
886 | lockres->l_action = OCFS2_AST_ATTACH; | 983 | lockres->l_action = OCFS2_AST_ATTACH; |
887 | lockres->l_requested = level; | 984 | lockres->l_requested = level; |
888 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | 985 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); |
986 | gen = lockres_set_pending(lockres); | ||
889 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 987 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
890 | 988 | ||
891 | status = dlmlock(osb->dlm, | 989 | ret = ocfs2_dlm_lock(osb->cconn, |
892 | level, | 990 | level, |
893 | &lockres->l_lksb, | 991 | &lockres->l_lksb, |
894 | dlm_flags, | 992 | dlm_flags, |
895 | lockres->l_name, | 993 | lockres->l_name, |
896 | OCFS2_LOCK_ID_MAX_LEN - 1, | 994 | OCFS2_LOCK_ID_MAX_LEN - 1, |
897 | ocfs2_locking_ast, | 995 | lockres); |
898 | lockres, | 996 | lockres_clear_pending(lockres, gen, osb); |
899 | ocfs2_blocking_ast); | 997 | if (ret) { |
900 | if (status != DLM_NORMAL) { | 998 | ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); |
901 | ocfs2_log_dlm_error("dlmlock", status, lockres); | ||
902 | ret = -EINVAL; | ||
903 | ocfs2_recover_from_dlm_error(lockres, 1); | 999 | ocfs2_recover_from_dlm_error(lockres, 1); |
904 | } | 1000 | } |
905 | 1001 | ||
906 | mlog(0, "lock %s, successfull return from dlmlock\n", lockres->l_name); | 1002 | mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name); |
907 | 1003 | ||
908 | bail: | 1004 | bail: |
909 | mlog_exit(ret); | 1005 | mlog_exit(ret); |
@@ -1016,21 +1112,22 @@ static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw, | |||
1016 | static int ocfs2_cluster_lock(struct ocfs2_super *osb, | 1112 | static int ocfs2_cluster_lock(struct ocfs2_super *osb, |
1017 | struct ocfs2_lock_res *lockres, | 1113 | struct ocfs2_lock_res *lockres, |
1018 | int level, | 1114 | int level, |
1019 | int lkm_flags, | 1115 | u32 lkm_flags, |
1020 | int arg_flags) | 1116 | int arg_flags) |
1021 | { | 1117 | { |
1022 | struct ocfs2_mask_waiter mw; | 1118 | struct ocfs2_mask_waiter mw; |
1023 | enum dlm_status status; | ||
1024 | int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR); | 1119 | int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR); |
1025 | int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */ | 1120 | int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */ |
1026 | unsigned long flags; | 1121 | unsigned long flags; |
1122 | unsigned int gen; | ||
1123 | int noqueue_attempted = 0; | ||
1027 | 1124 | ||
1028 | mlog_entry_void(); | 1125 | mlog_entry_void(); |
1029 | 1126 | ||
1030 | ocfs2_init_mask_waiter(&mw); | 1127 | ocfs2_init_mask_waiter(&mw); |
1031 | 1128 | ||
1032 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) | 1129 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) |
1033 | lkm_flags |= LKM_VALBLK; | 1130 | lkm_flags |= DLM_LKF_VALBLK; |
1034 | 1131 | ||
1035 | again: | 1132 | again: |
1036 | wait = 0; | 1133 | wait = 0; |
@@ -1068,52 +1165,56 @@ again: | |||
1068 | } | 1165 | } |
1069 | 1166 | ||
1070 | if (level > lockres->l_level) { | 1167 | if (level > lockres->l_level) { |
1168 | if (noqueue_attempted > 0) { | ||
1169 | ret = -EAGAIN; | ||
1170 | goto unlock; | ||
1171 | } | ||
1172 | if (lkm_flags & DLM_LKF_NOQUEUE) | ||
1173 | noqueue_attempted = 1; | ||
1174 | |||
1071 | if (lockres->l_action != OCFS2_AST_INVALID) | 1175 | if (lockres->l_action != OCFS2_AST_INVALID) |
1072 | mlog(ML_ERROR, "lockres %s has action %u pending\n", | 1176 | mlog(ML_ERROR, "lockres %s has action %u pending\n", |
1073 | lockres->l_name, lockres->l_action); | 1177 | lockres->l_name, lockres->l_action); |
1074 | 1178 | ||
1075 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { | 1179 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { |
1076 | lockres->l_action = OCFS2_AST_ATTACH; | 1180 | lockres->l_action = OCFS2_AST_ATTACH; |
1077 | lkm_flags &= ~LKM_CONVERT; | 1181 | lkm_flags &= ~DLM_LKF_CONVERT; |
1078 | } else { | 1182 | } else { |
1079 | lockres->l_action = OCFS2_AST_CONVERT; | 1183 | lockres->l_action = OCFS2_AST_CONVERT; |
1080 | lkm_flags |= LKM_CONVERT; | 1184 | lkm_flags |= DLM_LKF_CONVERT; |
1081 | } | 1185 | } |
1082 | 1186 | ||
1083 | lockres->l_requested = level; | 1187 | lockres->l_requested = level; |
1084 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | 1188 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); |
1189 | gen = lockres_set_pending(lockres); | ||
1085 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1190 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1086 | 1191 | ||
1087 | BUG_ON(level == LKM_IVMODE); | 1192 | BUG_ON(level == DLM_LOCK_IV); |
1088 | BUG_ON(level == LKM_NLMODE); | 1193 | BUG_ON(level == DLM_LOCK_NL); |
1089 | 1194 | ||
1090 | mlog(0, "lock %s, convert from %d to level = %d\n", | 1195 | mlog(0, "lock %s, convert from %d to level = %d\n", |
1091 | lockres->l_name, lockres->l_level, level); | 1196 | lockres->l_name, lockres->l_level, level); |
1092 | 1197 | ||
1093 | /* call dlm_lock to upgrade lock now */ | 1198 | /* call dlm_lock to upgrade lock now */ |
1094 | status = dlmlock(osb->dlm, | 1199 | ret = ocfs2_dlm_lock(osb->cconn, |
1095 | level, | 1200 | level, |
1096 | &lockres->l_lksb, | 1201 | &lockres->l_lksb, |
1097 | lkm_flags, | 1202 | lkm_flags, |
1098 | lockres->l_name, | 1203 | lockres->l_name, |
1099 | OCFS2_LOCK_ID_MAX_LEN - 1, | 1204 | OCFS2_LOCK_ID_MAX_LEN - 1, |
1100 | ocfs2_locking_ast, | 1205 | lockres); |
1101 | lockres, | 1206 | lockres_clear_pending(lockres, gen, osb); |
1102 | ocfs2_blocking_ast); | 1207 | if (ret) { |
1103 | if (status != DLM_NORMAL) { | 1208 | if (!(lkm_flags & DLM_LKF_NOQUEUE) || |
1104 | if ((lkm_flags & LKM_NOQUEUE) && | 1209 | (ret != -EAGAIN)) { |
1105 | (status == DLM_NOTQUEUED)) | 1210 | ocfs2_log_dlm_error("ocfs2_dlm_lock", |
1106 | ret = -EAGAIN; | 1211 | ret, lockres); |
1107 | else { | ||
1108 | ocfs2_log_dlm_error("dlmlock", status, | ||
1109 | lockres); | ||
1110 | ret = -EINVAL; | ||
1111 | } | 1212 | } |
1112 | ocfs2_recover_from_dlm_error(lockres, 1); | 1213 | ocfs2_recover_from_dlm_error(lockres, 1); |
1113 | goto out; | 1214 | goto out; |
1114 | } | 1215 | } |
1115 | 1216 | ||
1116 | mlog(0, "lock %s, successfull return from dlmlock\n", | 1217 | mlog(0, "lock %s, successfull return from ocfs2_dlm_lock\n", |
1117 | lockres->l_name); | 1218 | lockres->l_name); |
1118 | 1219 | ||
1119 | /* At this point we've gone inside the dlm and need to | 1220 | /* At this point we've gone inside the dlm and need to |
@@ -1177,9 +1278,9 @@ static int ocfs2_create_new_lock(struct ocfs2_super *osb, | |||
1177 | int ex, | 1278 | int ex, |
1178 | int local) | 1279 | int local) |
1179 | { | 1280 | { |
1180 | int level = ex ? LKM_EXMODE : LKM_PRMODE; | 1281 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
1181 | unsigned long flags; | 1282 | unsigned long flags; |
1182 | int lkm_flags = local ? LKM_LOCAL : 0; | 1283 | u32 lkm_flags = local ? DLM_LKF_LOCAL : 0; |
1183 | 1284 | ||
1184 | spin_lock_irqsave(&lockres->l_lock, flags); | 1285 | spin_lock_irqsave(&lockres->l_lock, flags); |
1185 | BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); | 1286 | BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); |
@@ -1222,7 +1323,7 @@ int ocfs2_create_new_inode_locks(struct inode *inode) | |||
1222 | } | 1323 | } |
1223 | 1324 | ||
1224 | /* | 1325 | /* |
1225 | * We don't want to use LKM_LOCAL on a meta data lock as they | 1326 | * We don't want to use DLM_LKF_LOCAL on a meta data lock as they |
1226 | * don't use a generation in their lock names. | 1327 | * don't use a generation in their lock names. |
1227 | */ | 1328 | */ |
1228 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0); | 1329 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0); |
@@ -1261,7 +1362,7 @@ int ocfs2_rw_lock(struct inode *inode, int write) | |||
1261 | 1362 | ||
1262 | lockres = &OCFS2_I(inode)->ip_rw_lockres; | 1363 | lockres = &OCFS2_I(inode)->ip_rw_lockres; |
1263 | 1364 | ||
1264 | level = write ? LKM_EXMODE : LKM_PRMODE; | 1365 | level = write ? DLM_LOCK_EX : DLM_LOCK_PR; |
1265 | 1366 | ||
1266 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0, | 1367 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0, |
1267 | 0); | 1368 | 0); |
@@ -1274,7 +1375,7 @@ int ocfs2_rw_lock(struct inode *inode, int write) | |||
1274 | 1375 | ||
1275 | void ocfs2_rw_unlock(struct inode *inode, int write) | 1376 | void ocfs2_rw_unlock(struct inode *inode, int write) |
1276 | { | 1377 | { |
1277 | int level = write ? LKM_EXMODE : LKM_PRMODE; | 1378 | int level = write ? DLM_LOCK_EX : DLM_LOCK_PR; |
1278 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres; | 1379 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres; |
1279 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1380 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1280 | 1381 | ||
@@ -1312,7 +1413,7 @@ int ocfs2_open_lock(struct inode *inode) | |||
1312 | lockres = &OCFS2_I(inode)->ip_open_lockres; | 1413 | lockres = &OCFS2_I(inode)->ip_open_lockres; |
1313 | 1414 | ||
1314 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, | 1415 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, |
1315 | LKM_PRMODE, 0, 0); | 1416 | DLM_LOCK_PR, 0, 0); |
1316 | if (status < 0) | 1417 | if (status < 0) |
1317 | mlog_errno(status); | 1418 | mlog_errno(status); |
1318 | 1419 | ||
@@ -1340,16 +1441,16 @@ int ocfs2_try_open_lock(struct inode *inode, int write) | |||
1340 | 1441 | ||
1341 | lockres = &OCFS2_I(inode)->ip_open_lockres; | 1442 | lockres = &OCFS2_I(inode)->ip_open_lockres; |
1342 | 1443 | ||
1343 | level = write ? LKM_EXMODE : LKM_PRMODE; | 1444 | level = write ? DLM_LOCK_EX : DLM_LOCK_PR; |
1344 | 1445 | ||
1345 | /* | 1446 | /* |
1346 | * The file system may already holding a PRMODE/EXMODE open lock. | 1447 | * The file system may already holding a PRMODE/EXMODE open lock. |
1347 | * Since we pass LKM_NOQUEUE, the request won't block waiting on | 1448 | * Since we pass DLM_LKF_NOQUEUE, the request won't block waiting on |
1348 | * other nodes and the -EAGAIN will indicate to the caller that | 1449 | * other nodes and the -EAGAIN will indicate to the caller that |
1349 | * this inode is still in use. | 1450 | * this inode is still in use. |
1350 | */ | 1451 | */ |
1351 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, | 1452 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, |
1352 | level, LKM_NOQUEUE, 0); | 1453 | level, DLM_LKF_NOQUEUE, 0); |
1353 | 1454 | ||
1354 | out: | 1455 | out: |
1355 | mlog_exit(status); | 1456 | mlog_exit(status); |
@@ -1374,10 +1475,10 @@ void ocfs2_open_unlock(struct inode *inode) | |||
1374 | 1475 | ||
1375 | if(lockres->l_ro_holders) | 1476 | if(lockres->l_ro_holders) |
1376 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, | 1477 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, |
1377 | LKM_PRMODE); | 1478 | DLM_LOCK_PR); |
1378 | if(lockres->l_ex_holders) | 1479 | if(lockres->l_ex_holders) |
1379 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, | 1480 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, |
1380 | LKM_EXMODE); | 1481 | DLM_LOCK_EX); |
1381 | 1482 | ||
1382 | out: | 1483 | out: |
1383 | mlog_exit_void(); | 1484 | mlog_exit_void(); |
@@ -1464,7 +1565,7 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock) | |||
1464 | ocfs2_init_mask_waiter(&mw); | 1565 | ocfs2_init_mask_waiter(&mw); |
1465 | 1566 | ||
1466 | if ((lockres->l_flags & OCFS2_LOCK_BUSY) || | 1567 | if ((lockres->l_flags & OCFS2_LOCK_BUSY) || |
1467 | (lockres->l_level > LKM_NLMODE)) { | 1568 | (lockres->l_level > DLM_LOCK_NL)) { |
1468 | mlog(ML_ERROR, | 1569 | mlog(ML_ERROR, |
1469 | "File lock \"%s\" has busy or locked state: flags: 0x%lx, " | 1570 | "File lock \"%s\" has busy or locked state: flags: 0x%lx, " |
1470 | "level: %u\n", lockres->l_name, lockres->l_flags, | 1571 | "level: %u\n", lockres->l_name, lockres->l_flags, |
@@ -1503,14 +1604,12 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock) | |||
1503 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | 1604 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); |
1504 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1605 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1505 | 1606 | ||
1506 | ret = dlmlock(osb->dlm, level, &lockres->l_lksb, lkm_flags, | 1607 | ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags, |
1507 | lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1, | 1608 | lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1, |
1508 | ocfs2_locking_ast, lockres, ocfs2_blocking_ast); | 1609 | lockres); |
1509 | if (ret != DLM_NORMAL) { | 1610 | if (ret) { |
1510 | if (trylock && ret == DLM_NOTQUEUED) | 1611 | if (!trylock || (ret != -EAGAIN)) { |
1511 | ret = -EAGAIN; | 1612 | ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); |
1512 | else { | ||
1513 | ocfs2_log_dlm_error("dlmlock", ret, lockres); | ||
1514 | ret = -EINVAL; | 1613 | ret = -EINVAL; |
1515 | } | 1614 | } |
1516 | 1615 | ||
@@ -1537,6 +1636,10 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock) | |||
1537 | * to just bubble sucess back up to the user. | 1636 | * to just bubble sucess back up to the user. |
1538 | */ | 1637 | */ |
1539 | ret = ocfs2_flock_handle_signal(lockres, level); | 1638 | ret = ocfs2_flock_handle_signal(lockres, level); |
1639 | } else if (!ret && (level > lockres->l_level)) { | ||
1640 | /* Trylock failed asynchronously */ | ||
1641 | BUG_ON(!trylock); | ||
1642 | ret = -EAGAIN; | ||
1540 | } | 1643 | } |
1541 | 1644 | ||
1542 | out: | 1645 | out: |
@@ -1549,6 +1652,7 @@ out: | |||
1549 | void ocfs2_file_unlock(struct file *file) | 1652 | void ocfs2_file_unlock(struct file *file) |
1550 | { | 1653 | { |
1551 | int ret; | 1654 | int ret; |
1655 | unsigned int gen; | ||
1552 | unsigned long flags; | 1656 | unsigned long flags; |
1553 | struct ocfs2_file_private *fp = file->private_data; | 1657 | struct ocfs2_file_private *fp = file->private_data; |
1554 | struct ocfs2_lock_res *lockres = &fp->fp_flock; | 1658 | struct ocfs2_lock_res *lockres = &fp->fp_flock; |
@@ -1572,13 +1676,13 @@ void ocfs2_file_unlock(struct file *file) | |||
1572 | * Fake a blocking ast for the downconvert code. | 1676 | * Fake a blocking ast for the downconvert code. |
1573 | */ | 1677 | */ |
1574 | lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); | 1678 | lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); |
1575 | lockres->l_blocking = LKM_EXMODE; | 1679 | lockres->l_blocking = DLM_LOCK_EX; |
1576 | 1680 | ||
1577 | ocfs2_prepare_downconvert(lockres, LKM_NLMODE); | 1681 | gen = ocfs2_prepare_downconvert(lockres, LKM_NLMODE); |
1578 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | 1682 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); |
1579 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1683 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1580 | 1684 | ||
1581 | ret = ocfs2_downconvert_lock(osb, lockres, LKM_NLMODE, 0); | 1685 | ret = ocfs2_downconvert_lock(osb, lockres, LKM_NLMODE, 0, gen); |
1582 | if (ret) { | 1686 | if (ret) { |
1583 | mlog_errno(ret); | 1687 | mlog_errno(ret); |
1584 | return; | 1688 | return; |
@@ -1601,11 +1705,11 @@ static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, | |||
1601 | * condition. */ | 1705 | * condition. */ |
1602 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { | 1706 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { |
1603 | switch(lockres->l_blocking) { | 1707 | switch(lockres->l_blocking) { |
1604 | case LKM_EXMODE: | 1708 | case DLM_LOCK_EX: |
1605 | if (!lockres->l_ex_holders && !lockres->l_ro_holders) | 1709 | if (!lockres->l_ex_holders && !lockres->l_ro_holders) |
1606 | kick = 1; | 1710 | kick = 1; |
1607 | break; | 1711 | break; |
1608 | case LKM_PRMODE: | 1712 | case DLM_LOCK_PR: |
1609 | if (!lockres->l_ex_holders) | 1713 | if (!lockres->l_ex_holders) |
1610 | kick = 1; | 1714 | kick = 1; |
1611 | break; | 1715 | break; |
@@ -1648,7 +1752,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode) | |||
1648 | 1752 | ||
1649 | mlog_entry_void(); | 1753 | mlog_entry_void(); |
1650 | 1754 | ||
1651 | lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; | 1755 | lvb = (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); |
1652 | 1756 | ||
1653 | /* | 1757 | /* |
1654 | * Invalidate the LVB of a deleted inode - this way other | 1758 | * Invalidate the LVB of a deleted inode - this way other |
@@ -1700,7 +1804,7 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode) | |||
1700 | 1804 | ||
1701 | mlog_meta_lvb(0, lockres); | 1805 | mlog_meta_lvb(0, lockres); |
1702 | 1806 | ||
1703 | lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; | 1807 | lvb = (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); |
1704 | 1808 | ||
1705 | /* We're safe here without the lockres lock... */ | 1809 | /* We're safe here without the lockres lock... */ |
1706 | spin_lock(&oi->ip_lock); | 1810 | spin_lock(&oi->ip_lock); |
@@ -1735,7 +1839,8 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode) | |||
1735 | static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, | 1839 | static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, |
1736 | struct ocfs2_lock_res *lockres) | 1840 | struct ocfs2_lock_res *lockres) |
1737 | { | 1841 | { |
1738 | struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; | 1842 | struct ocfs2_meta_lvb *lvb = |
1843 | (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); | ||
1739 | 1844 | ||
1740 | if (lvb->lvb_version == OCFS2_LVB_VERSION | 1845 | if (lvb->lvb_version == OCFS2_LVB_VERSION |
1741 | && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) | 1846 | && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) |
@@ -1923,7 +2028,8 @@ int ocfs2_inode_lock_full(struct inode *inode, | |||
1923 | int ex, | 2028 | int ex, |
1924 | int arg_flags) | 2029 | int arg_flags) |
1925 | { | 2030 | { |
1926 | int status, level, dlm_flags, acquired; | 2031 | int status, level, acquired; |
2032 | u32 dlm_flags; | ||
1927 | struct ocfs2_lock_res *lockres = NULL; | 2033 | struct ocfs2_lock_res *lockres = NULL; |
1928 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 2034 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1929 | struct buffer_head *local_bh = NULL; | 2035 | struct buffer_head *local_bh = NULL; |
@@ -1950,14 +2056,13 @@ int ocfs2_inode_lock_full(struct inode *inode, | |||
1950 | goto local; | 2056 | goto local; |
1951 | 2057 | ||
1952 | if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) | 2058 | if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) |
1953 | wait_event(osb->recovery_event, | 2059 | ocfs2_wait_for_recovery(osb); |
1954 | ocfs2_node_map_is_empty(osb, &osb->recovery_map)); | ||
1955 | 2060 | ||
1956 | lockres = &OCFS2_I(inode)->ip_inode_lockres; | 2061 | lockres = &OCFS2_I(inode)->ip_inode_lockres; |
1957 | level = ex ? LKM_EXMODE : LKM_PRMODE; | 2062 | level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
1958 | dlm_flags = 0; | 2063 | dlm_flags = 0; |
1959 | if (arg_flags & OCFS2_META_LOCK_NOQUEUE) | 2064 | if (arg_flags & OCFS2_META_LOCK_NOQUEUE) |
1960 | dlm_flags |= LKM_NOQUEUE; | 2065 | dlm_flags |= DLM_LKF_NOQUEUE; |
1961 | 2066 | ||
1962 | status = ocfs2_cluster_lock(osb, lockres, level, dlm_flags, arg_flags); | 2067 | status = ocfs2_cluster_lock(osb, lockres, level, dlm_flags, arg_flags); |
1963 | if (status < 0) { | 2068 | if (status < 0) { |
@@ -1974,8 +2079,7 @@ int ocfs2_inode_lock_full(struct inode *inode, | |||
1974 | * committed to owning this lock so we don't allow signals to | 2079 | * committed to owning this lock so we don't allow signals to |
1975 | * abort the operation. */ | 2080 | * abort the operation. */ |
1976 | if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) | 2081 | if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) |
1977 | wait_event(osb->recovery_event, | 2082 | ocfs2_wait_for_recovery(osb); |
1978 | ocfs2_node_map_is_empty(osb, &osb->recovery_map)); | ||
1979 | 2083 | ||
1980 | local: | 2084 | local: |
1981 | /* | 2085 | /* |
@@ -2109,7 +2213,7 @@ int ocfs2_inode_lock_atime(struct inode *inode, | |||
2109 | void ocfs2_inode_unlock(struct inode *inode, | 2213 | void ocfs2_inode_unlock(struct inode *inode, |
2110 | int ex) | 2214 | int ex) |
2111 | { | 2215 | { |
2112 | int level = ex ? LKM_EXMODE : LKM_PRMODE; | 2216 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
2113 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres; | 2217 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres; |
2114 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 2218 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
2115 | 2219 | ||
@@ -2130,10 +2234,8 @@ int ocfs2_super_lock(struct ocfs2_super *osb, | |||
2130 | int ex) | 2234 | int ex) |
2131 | { | 2235 | { |
2132 | int status = 0; | 2236 | int status = 0; |
2133 | int level = ex ? LKM_EXMODE : LKM_PRMODE; | 2237 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
2134 | struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; | 2238 | struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; |
2135 | struct buffer_head *bh; | ||
2136 | struct ocfs2_slot_info *si = osb->slot_info; | ||
2137 | 2239 | ||
2138 | mlog_entry_void(); | 2240 | mlog_entry_void(); |
2139 | 2241 | ||
@@ -2159,11 +2261,7 @@ int ocfs2_super_lock(struct ocfs2_super *osb, | |||
2159 | goto bail; | 2261 | goto bail; |
2160 | } | 2262 | } |
2161 | if (status) { | 2263 | if (status) { |
2162 | bh = si->si_bh; | 2264 | status = ocfs2_refresh_slot_info(osb); |
2163 | status = ocfs2_read_block(osb, bh->b_blocknr, &bh, 0, | ||
2164 | si->si_inode); | ||
2165 | if (status == 0) | ||
2166 | ocfs2_update_slot_info(si); | ||
2167 | 2265 | ||
2168 | ocfs2_complete_lock_res_refresh(lockres, status); | 2266 | ocfs2_complete_lock_res_refresh(lockres, status); |
2169 | 2267 | ||
@@ -2178,7 +2276,7 @@ bail: | |||
2178 | void ocfs2_super_unlock(struct ocfs2_super *osb, | 2276 | void ocfs2_super_unlock(struct ocfs2_super *osb, |
2179 | int ex) | 2277 | int ex) |
2180 | { | 2278 | { |
2181 | int level = ex ? LKM_EXMODE : LKM_PRMODE; | 2279 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
2182 | struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; | 2280 | struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; |
2183 | 2281 | ||
2184 | if (!ocfs2_mount_local(osb)) | 2282 | if (!ocfs2_mount_local(osb)) |
@@ -2196,7 +2294,7 @@ int ocfs2_rename_lock(struct ocfs2_super *osb) | |||
2196 | if (ocfs2_mount_local(osb)) | 2294 | if (ocfs2_mount_local(osb)) |
2197 | return 0; | 2295 | return 0; |
2198 | 2296 | ||
2199 | status = ocfs2_cluster_lock(osb, lockres, LKM_EXMODE, 0, 0); | 2297 | status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0); |
2200 | if (status < 0) | 2298 | if (status < 0) |
2201 | mlog_errno(status); | 2299 | mlog_errno(status); |
2202 | 2300 | ||
@@ -2208,13 +2306,13 @@ void ocfs2_rename_unlock(struct ocfs2_super *osb) | |||
2208 | struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; | 2306 | struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; |
2209 | 2307 | ||
2210 | if (!ocfs2_mount_local(osb)) | 2308 | if (!ocfs2_mount_local(osb)) |
2211 | ocfs2_cluster_unlock(osb, lockres, LKM_EXMODE); | 2309 | ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); |
2212 | } | 2310 | } |
2213 | 2311 | ||
2214 | int ocfs2_dentry_lock(struct dentry *dentry, int ex) | 2312 | int ocfs2_dentry_lock(struct dentry *dentry, int ex) |
2215 | { | 2313 | { |
2216 | int ret; | 2314 | int ret; |
2217 | int level = ex ? LKM_EXMODE : LKM_PRMODE; | 2315 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
2218 | struct ocfs2_dentry_lock *dl = dentry->d_fsdata; | 2316 | struct ocfs2_dentry_lock *dl = dentry->d_fsdata; |
2219 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); | 2317 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); |
2220 | 2318 | ||
@@ -2235,7 +2333,7 @@ int ocfs2_dentry_lock(struct dentry *dentry, int ex) | |||
2235 | 2333 | ||
2236 | void ocfs2_dentry_unlock(struct dentry *dentry, int ex) | 2334 | void ocfs2_dentry_unlock(struct dentry *dentry, int ex) |
2237 | { | 2335 | { |
2238 | int level = ex ? LKM_EXMODE : LKM_PRMODE; | 2336 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
2239 | struct ocfs2_dentry_lock *dl = dentry->d_fsdata; | 2337 | struct ocfs2_dentry_lock *dl = dentry->d_fsdata; |
2240 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); | 2338 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); |
2241 | 2339 | ||
@@ -2400,7 +2498,7 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) | |||
2400 | lockres->l_blocking); | 2498 | lockres->l_blocking); |
2401 | 2499 | ||
2402 | /* Dump the raw LVB */ | 2500 | /* Dump the raw LVB */ |
2403 | lvb = lockres->l_lksb.lvb; | 2501 | lvb = ocfs2_dlm_lvb(&lockres->l_lksb); |
2404 | for(i = 0; i < DLM_LVB_LEN; i++) | 2502 | for(i = 0; i < DLM_LVB_LEN; i++) |
2405 | seq_printf(m, "0x%x\t", lvb[i]); | 2503 | seq_printf(m, "0x%x\t", lvb[i]); |
2406 | 2504 | ||
@@ -2504,13 +2602,14 @@ static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb) | |||
2504 | int ocfs2_dlm_init(struct ocfs2_super *osb) | 2602 | int ocfs2_dlm_init(struct ocfs2_super *osb) |
2505 | { | 2603 | { |
2506 | int status = 0; | 2604 | int status = 0; |
2507 | u32 dlm_key; | 2605 | struct ocfs2_cluster_connection *conn = NULL; |
2508 | struct dlm_ctxt *dlm = NULL; | ||
2509 | 2606 | ||
2510 | mlog_entry_void(); | 2607 | mlog_entry_void(); |
2511 | 2608 | ||
2512 | if (ocfs2_mount_local(osb)) | 2609 | if (ocfs2_mount_local(osb)) { |
2610 | osb->node_num = 0; | ||
2513 | goto local; | 2611 | goto local; |
2612 | } | ||
2514 | 2613 | ||
2515 | status = ocfs2_dlm_init_debug(osb); | 2614 | status = ocfs2_dlm_init_debug(osb); |
2516 | if (status < 0) { | 2615 | if (status < 0) { |
@@ -2527,26 +2626,31 @@ int ocfs2_dlm_init(struct ocfs2_super *osb) | |||
2527 | goto bail; | 2626 | goto bail; |
2528 | } | 2627 | } |
2529 | 2628 | ||
2530 | /* used by the dlm code to make message headers unique, each | ||
2531 | * node in this domain must agree on this. */ | ||
2532 | dlm_key = crc32_le(0, osb->uuid_str, strlen(osb->uuid_str)); | ||
2533 | |||
2534 | /* for now, uuid == domain */ | 2629 | /* for now, uuid == domain */ |
2535 | dlm = dlm_register_domain(osb->uuid_str, dlm_key, | 2630 | status = ocfs2_cluster_connect(osb->osb_cluster_stack, |
2536 | &osb->osb_locking_proto); | 2631 | osb->uuid_str, |
2537 | if (IS_ERR(dlm)) { | 2632 | strlen(osb->uuid_str), |
2538 | status = PTR_ERR(dlm); | 2633 | ocfs2_do_node_down, osb, |
2634 | &conn); | ||
2635 | if (status) { | ||
2539 | mlog_errno(status); | 2636 | mlog_errno(status); |
2540 | goto bail; | 2637 | goto bail; |
2541 | } | 2638 | } |
2542 | 2639 | ||
2543 | dlm_register_eviction_cb(dlm, &osb->osb_eviction_cb); | 2640 | status = ocfs2_cluster_this_node(&osb->node_num); |
2641 | if (status < 0) { | ||
2642 | mlog_errno(status); | ||
2643 | mlog(ML_ERROR, | ||
2644 | "could not find this host's node number\n"); | ||
2645 | ocfs2_cluster_disconnect(conn, 0); | ||
2646 | goto bail; | ||
2647 | } | ||
2544 | 2648 | ||
2545 | local: | 2649 | local: |
2546 | ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); | 2650 | ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); |
2547 | ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); | 2651 | ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); |
2548 | 2652 | ||
2549 | osb->dlm = dlm; | 2653 | osb->cconn = conn; |
2550 | 2654 | ||
2551 | status = 0; | 2655 | status = 0; |
2552 | bail: | 2656 | bail: |
@@ -2560,14 +2664,19 @@ bail: | |||
2560 | return status; | 2664 | return status; |
2561 | } | 2665 | } |
2562 | 2666 | ||
2563 | void ocfs2_dlm_shutdown(struct ocfs2_super *osb) | 2667 | void ocfs2_dlm_shutdown(struct ocfs2_super *osb, |
2668 | int hangup_pending) | ||
2564 | { | 2669 | { |
2565 | mlog_entry_void(); | 2670 | mlog_entry_void(); |
2566 | 2671 | ||
2567 | dlm_unregister_eviction_cb(&osb->osb_eviction_cb); | ||
2568 | |||
2569 | ocfs2_drop_osb_locks(osb); | 2672 | ocfs2_drop_osb_locks(osb); |
2570 | 2673 | ||
2674 | /* | ||
2675 | * Now that we have dropped all locks and ocfs2_dismount_volume() | ||
2676 | * has disabled recovery, the DLM won't be talking to us. It's | ||
2677 | * safe to tear things down before disconnecting the cluster. | ||
2678 | */ | ||
2679 | |||
2571 | if (osb->dc_task) { | 2680 | if (osb->dc_task) { |
2572 | kthread_stop(osb->dc_task); | 2681 | kthread_stop(osb->dc_task); |
2573 | osb->dc_task = NULL; | 2682 | osb->dc_task = NULL; |
@@ -2576,15 +2685,15 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb) | |||
2576 | ocfs2_lock_res_free(&osb->osb_super_lockres); | 2685 | ocfs2_lock_res_free(&osb->osb_super_lockres); |
2577 | ocfs2_lock_res_free(&osb->osb_rename_lockres); | 2686 | ocfs2_lock_res_free(&osb->osb_rename_lockres); |
2578 | 2687 | ||
2579 | dlm_unregister_domain(osb->dlm); | 2688 | ocfs2_cluster_disconnect(osb->cconn, hangup_pending); |
2580 | osb->dlm = NULL; | 2689 | osb->cconn = NULL; |
2581 | 2690 | ||
2582 | ocfs2_dlm_shutdown_debug(osb); | 2691 | ocfs2_dlm_shutdown_debug(osb); |
2583 | 2692 | ||
2584 | mlog_exit_void(); | 2693 | mlog_exit_void(); |
2585 | } | 2694 | } |
2586 | 2695 | ||
2587 | static void ocfs2_unlock_ast(void *opaque, enum dlm_status status) | 2696 | static void ocfs2_unlock_ast(void *opaque, int error) |
2588 | { | 2697 | { |
2589 | struct ocfs2_lock_res *lockres = opaque; | 2698 | struct ocfs2_lock_res *lockres = opaque; |
2590 | unsigned long flags; | 2699 | unsigned long flags; |
@@ -2595,24 +2704,9 @@ static void ocfs2_unlock_ast(void *opaque, enum dlm_status status) | |||
2595 | lockres->l_unlock_action); | 2704 | lockres->l_unlock_action); |
2596 | 2705 | ||
2597 | spin_lock_irqsave(&lockres->l_lock, flags); | 2706 | spin_lock_irqsave(&lockres->l_lock, flags); |
2598 | /* We tried to cancel a convert request, but it was already | 2707 | if (error) { |
2599 | * granted. All we want to do here is clear our unlock | 2708 | mlog(ML_ERROR, "Dlm passes error %d for lock %s, " |
2600 | * state. The wake_up call done at the bottom is redundant | 2709 | "unlock_action %d\n", error, lockres->l_name, |
2601 | * (ocfs2_prepare_cancel_convert doesn't sleep on this) but doesn't | ||
2602 | * hurt anything anyway */ | ||
2603 | if (status == DLM_CANCELGRANT && | ||
2604 | lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) { | ||
2605 | mlog(0, "Got cancelgrant for %s\n", lockres->l_name); | ||
2606 | |||
2607 | /* We don't clear the busy flag in this case as it | ||
2608 | * should have been cleared by the ast which the dlm | ||
2609 | * has called. */ | ||
2610 | goto complete_unlock; | ||
2611 | } | ||
2612 | |||
2613 | if (status != DLM_NORMAL) { | ||
2614 | mlog(ML_ERROR, "Dlm passes status %d for lock %s, " | ||
2615 | "unlock_action %d\n", status, lockres->l_name, | ||
2616 | lockres->l_unlock_action); | 2710 | lockres->l_unlock_action); |
2617 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 2711 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
2618 | return; | 2712 | return; |
@@ -2624,14 +2718,13 @@ static void ocfs2_unlock_ast(void *opaque, enum dlm_status status) | |||
2624 | lockres->l_action = OCFS2_AST_INVALID; | 2718 | lockres->l_action = OCFS2_AST_INVALID; |
2625 | break; | 2719 | break; |
2626 | case OCFS2_UNLOCK_DROP_LOCK: | 2720 | case OCFS2_UNLOCK_DROP_LOCK: |
2627 | lockres->l_level = LKM_IVMODE; | 2721 | lockres->l_level = DLM_LOCK_IV; |
2628 | break; | 2722 | break; |
2629 | default: | 2723 | default: |
2630 | BUG(); | 2724 | BUG(); |
2631 | } | 2725 | } |
2632 | 2726 | ||
2633 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 2727 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); |
2634 | complete_unlock: | ||
2635 | lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; | 2728 | lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; |
2636 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 2729 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
2637 | 2730 | ||
@@ -2643,16 +2736,16 @@ complete_unlock: | |||
2643 | static int ocfs2_drop_lock(struct ocfs2_super *osb, | 2736 | static int ocfs2_drop_lock(struct ocfs2_super *osb, |
2644 | struct ocfs2_lock_res *lockres) | 2737 | struct ocfs2_lock_res *lockres) |
2645 | { | 2738 | { |
2646 | enum dlm_status status; | 2739 | int ret; |
2647 | unsigned long flags; | 2740 | unsigned long flags; |
2648 | int lkm_flags = 0; | 2741 | u32 lkm_flags = 0; |
2649 | 2742 | ||
2650 | /* We didn't get anywhere near actually using this lockres. */ | 2743 | /* We didn't get anywhere near actually using this lockres. */ |
2651 | if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) | 2744 | if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) |
2652 | goto out; | 2745 | goto out; |
2653 | 2746 | ||
2654 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) | 2747 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) |
2655 | lkm_flags |= LKM_VALBLK; | 2748 | lkm_flags |= DLM_LKF_VALBLK; |
2656 | 2749 | ||
2657 | spin_lock_irqsave(&lockres->l_lock, flags); | 2750 | spin_lock_irqsave(&lockres->l_lock, flags); |
2658 | 2751 | ||
@@ -2678,7 +2771,7 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb, | |||
2678 | 2771 | ||
2679 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { | 2772 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { |
2680 | if (lockres->l_flags & OCFS2_LOCK_ATTACHED && | 2773 | if (lockres->l_flags & OCFS2_LOCK_ATTACHED && |
2681 | lockres->l_level == LKM_EXMODE && | 2774 | lockres->l_level == DLM_LOCK_EX && |
2682 | !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) | 2775 | !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) |
2683 | lockres->l_ops->set_lvb(lockres); | 2776 | lockres->l_ops->set_lvb(lockres); |
2684 | } | 2777 | } |
@@ -2707,15 +2800,15 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb, | |||
2707 | 2800 | ||
2708 | mlog(0, "lock %s\n", lockres->l_name); | 2801 | mlog(0, "lock %s\n", lockres->l_name); |
2709 | 2802 | ||
2710 | status = dlmunlock(osb->dlm, &lockres->l_lksb, lkm_flags, | 2803 | ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags, |
2711 | ocfs2_unlock_ast, lockres); | 2804 | lockres); |
2712 | if (status != DLM_NORMAL) { | 2805 | if (ret) { |
2713 | ocfs2_log_dlm_error("dlmunlock", status, lockres); | 2806 | ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); |
2714 | mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); | 2807 | mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); |
2715 | dlm_print_one_lock(lockres->l_lksb.lockid); | 2808 | ocfs2_dlm_dump_lksb(&lockres->l_lksb); |
2716 | BUG(); | 2809 | BUG(); |
2717 | } | 2810 | } |
2718 | mlog(0, "lock %s, successfull return from dlmunlock\n", | 2811 | mlog(0, "lock %s, successfull return from ocfs2_dlm_unlock\n", |
2719 | lockres->l_name); | 2812 | lockres->l_name); |
2720 | 2813 | ||
2721 | ocfs2_wait_on_busy_lock(lockres); | 2814 | ocfs2_wait_on_busy_lock(lockres); |
@@ -2806,15 +2899,15 @@ int ocfs2_drop_inode_locks(struct inode *inode) | |||
2806 | return status; | 2899 | return status; |
2807 | } | 2900 | } |
2808 | 2901 | ||
2809 | static void ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, | 2902 | static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, |
2810 | int new_level) | 2903 | int new_level) |
2811 | { | 2904 | { |
2812 | assert_spin_locked(&lockres->l_lock); | 2905 | assert_spin_locked(&lockres->l_lock); |
2813 | 2906 | ||
2814 | BUG_ON(lockres->l_blocking <= LKM_NLMODE); | 2907 | BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); |
2815 | 2908 | ||
2816 | if (lockres->l_level <= new_level) { | 2909 | if (lockres->l_level <= new_level) { |
2817 | mlog(ML_ERROR, "lockres->l_level (%u) <= new_level (%u)\n", | 2910 | mlog(ML_ERROR, "lockres->l_level (%d) <= new_level (%d)\n", |
2818 | lockres->l_level, new_level); | 2911 | lockres->l_level, new_level); |
2819 | BUG(); | 2912 | BUG(); |
2820 | } | 2913 | } |
@@ -2825,33 +2918,33 @@ static void ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, | |||
2825 | lockres->l_action = OCFS2_AST_DOWNCONVERT; | 2918 | lockres->l_action = OCFS2_AST_DOWNCONVERT; |
2826 | lockres->l_requested = new_level; | 2919 | lockres->l_requested = new_level; |
2827 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | 2920 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); |
2921 | return lockres_set_pending(lockres); | ||
2828 | } | 2922 | } |
2829 | 2923 | ||
2830 | static int ocfs2_downconvert_lock(struct ocfs2_super *osb, | 2924 | static int ocfs2_downconvert_lock(struct ocfs2_super *osb, |
2831 | struct ocfs2_lock_res *lockres, | 2925 | struct ocfs2_lock_res *lockres, |
2832 | int new_level, | 2926 | int new_level, |
2833 | int lvb) | 2927 | int lvb, |
2928 | unsigned int generation) | ||
2834 | { | 2929 | { |
2835 | int ret, dlm_flags = LKM_CONVERT; | 2930 | int ret; |
2836 | enum dlm_status status; | 2931 | u32 dlm_flags = DLM_LKF_CONVERT; |
2837 | 2932 | ||
2838 | mlog_entry_void(); | 2933 | mlog_entry_void(); |
2839 | 2934 | ||
2840 | if (lvb) | 2935 | if (lvb) |
2841 | dlm_flags |= LKM_VALBLK; | 2936 | dlm_flags |= DLM_LKF_VALBLK; |
2842 | 2937 | ||
2843 | status = dlmlock(osb->dlm, | 2938 | ret = ocfs2_dlm_lock(osb->cconn, |
2844 | new_level, | 2939 | new_level, |
2845 | &lockres->l_lksb, | 2940 | &lockres->l_lksb, |
2846 | dlm_flags, | 2941 | dlm_flags, |
2847 | lockres->l_name, | 2942 | lockres->l_name, |
2848 | OCFS2_LOCK_ID_MAX_LEN - 1, | 2943 | OCFS2_LOCK_ID_MAX_LEN - 1, |
2849 | ocfs2_locking_ast, | 2944 | lockres); |
2850 | lockres, | 2945 | lockres_clear_pending(lockres, generation, osb); |
2851 | ocfs2_blocking_ast); | 2946 | if (ret) { |
2852 | if (status != DLM_NORMAL) { | 2947 | ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); |
2853 | ocfs2_log_dlm_error("dlmlock", status, lockres); | ||
2854 | ret = -EINVAL; | ||
2855 | ocfs2_recover_from_dlm_error(lockres, 1); | 2948 | ocfs2_recover_from_dlm_error(lockres, 1); |
2856 | goto bail; | 2949 | goto bail; |
2857 | } | 2950 | } |
@@ -2862,7 +2955,7 @@ bail: | |||
2862 | return ret; | 2955 | return ret; |
2863 | } | 2956 | } |
2864 | 2957 | ||
2865 | /* returns 1 when the caller should unlock and call dlmunlock */ | 2958 | /* returns 1 when the caller should unlock and call ocfs2_dlm_unlock */ |
2866 | static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, | 2959 | static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, |
2867 | struct ocfs2_lock_res *lockres) | 2960 | struct ocfs2_lock_res *lockres) |
2868 | { | 2961 | { |
@@ -2898,24 +2991,18 @@ static int ocfs2_cancel_convert(struct ocfs2_super *osb, | |||
2898 | struct ocfs2_lock_res *lockres) | 2991 | struct ocfs2_lock_res *lockres) |
2899 | { | 2992 | { |
2900 | int ret; | 2993 | int ret; |
2901 | enum dlm_status status; | ||
2902 | 2994 | ||
2903 | mlog_entry_void(); | 2995 | mlog_entry_void(); |
2904 | mlog(0, "lock %s\n", lockres->l_name); | 2996 | mlog(0, "lock %s\n", lockres->l_name); |
2905 | 2997 | ||
2906 | ret = 0; | 2998 | ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, |
2907 | status = dlmunlock(osb->dlm, | 2999 | DLM_LKF_CANCEL, lockres); |
2908 | &lockres->l_lksb, | 3000 | if (ret) { |
2909 | LKM_CANCEL, | 3001 | ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); |
2910 | ocfs2_unlock_ast, | ||
2911 | lockres); | ||
2912 | if (status != DLM_NORMAL) { | ||
2913 | ocfs2_log_dlm_error("dlmunlock", status, lockres); | ||
2914 | ret = -EINVAL; | ||
2915 | ocfs2_recover_from_dlm_error(lockres, 0); | 3002 | ocfs2_recover_from_dlm_error(lockres, 0); |
2916 | } | 3003 | } |
2917 | 3004 | ||
2918 | mlog(0, "lock %s return from dlmunlock\n", lockres->l_name); | 3005 | mlog(0, "lock %s return from ocfs2_dlm_unlock\n", lockres->l_name); |
2919 | 3006 | ||
2920 | mlog_exit(ret); | 3007 | mlog_exit(ret); |
2921 | return ret; | 3008 | return ret; |
@@ -2930,6 +3017,7 @@ static int ocfs2_unblock_lock(struct ocfs2_super *osb, | |||
2930 | int new_level; | 3017 | int new_level; |
2931 | int ret = 0; | 3018 | int ret = 0; |
2932 | int set_lvb = 0; | 3019 | int set_lvb = 0; |
3020 | unsigned int gen; | ||
2933 | 3021 | ||
2934 | mlog_entry_void(); | 3022 | mlog_entry_void(); |
2935 | 3023 | ||
@@ -2939,6 +3027,32 @@ static int ocfs2_unblock_lock(struct ocfs2_super *osb, | |||
2939 | 3027 | ||
2940 | recheck: | 3028 | recheck: |
2941 | if (lockres->l_flags & OCFS2_LOCK_BUSY) { | 3029 | if (lockres->l_flags & OCFS2_LOCK_BUSY) { |
3030 | /* XXX | ||
3031 | * This is a *big* race. The OCFS2_LOCK_PENDING flag | ||
3032 | * exists entirely for one reason - another thread has set | ||
3033 | * OCFS2_LOCK_BUSY, but has *NOT* yet called dlm_lock(). | ||
3034 | * | ||
3035 | * If we do ocfs2_cancel_convert() before the other thread | ||
3036 | * calls dlm_lock(), our cancel will do nothing. We will | ||
3037 | * get no ast, and we will have no way of knowing the | ||
3038 | * cancel failed. Meanwhile, the other thread will call | ||
3039 | * into dlm_lock() and wait...forever. | ||
3040 | * | ||
3041 | * Why forever? Because another node has asked for the | ||
3042 | * lock first; that's why we're here in unblock_lock(). | ||
3043 | * | ||
3044 | * The solution is OCFS2_LOCK_PENDING. When PENDING is | ||
3045 | * set, we just requeue the unblock. Only when the other | ||
3046 | * thread has called dlm_lock() and cleared PENDING will | ||
3047 | * we then cancel their request. | ||
3048 | * | ||
3049 | * All callers of dlm_lock() must set OCFS2_DLM_PENDING | ||
3050 | * at the same time they set OCFS2_DLM_BUSY. They must | ||
3051 | * clear OCFS2_DLM_PENDING after dlm_lock() returns. | ||
3052 | */ | ||
3053 | if (lockres->l_flags & OCFS2_LOCK_PENDING) | ||
3054 | goto leave_requeue; | ||
3055 | |||
2942 | ctl->requeue = 1; | 3056 | ctl->requeue = 1; |
2943 | ret = ocfs2_prepare_cancel_convert(osb, lockres); | 3057 | ret = ocfs2_prepare_cancel_convert(osb, lockres); |
2944 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 3058 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
@@ -2952,13 +3066,13 @@ recheck: | |||
2952 | 3066 | ||
2953 | /* if we're blocking an exclusive and we have *any* holders, | 3067 | /* if we're blocking an exclusive and we have *any* holders, |
2954 | * then requeue. */ | 3068 | * then requeue. */ |
2955 | if ((lockres->l_blocking == LKM_EXMODE) | 3069 | if ((lockres->l_blocking == DLM_LOCK_EX) |
2956 | && (lockres->l_ex_holders || lockres->l_ro_holders)) | 3070 | && (lockres->l_ex_holders || lockres->l_ro_holders)) |
2957 | goto leave_requeue; | 3071 | goto leave_requeue; |
2958 | 3072 | ||
2959 | /* If it's a PR we're blocking, then only | 3073 | /* If it's a PR we're blocking, then only |
2960 | * requeue if we've got any EX holders */ | 3074 | * requeue if we've got any EX holders */ |
2961 | if (lockres->l_blocking == LKM_PRMODE && | 3075 | if (lockres->l_blocking == DLM_LOCK_PR && |
2962 | lockres->l_ex_holders) | 3076 | lockres->l_ex_holders) |
2963 | goto leave_requeue; | 3077 | goto leave_requeue; |
2964 | 3078 | ||
@@ -3005,7 +3119,7 @@ downconvert: | |||
3005 | ctl->requeue = 0; | 3119 | ctl->requeue = 0; |
3006 | 3120 | ||
3007 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { | 3121 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { |
3008 | if (lockres->l_level == LKM_EXMODE) | 3122 | if (lockres->l_level == DLM_LOCK_EX) |
3009 | set_lvb = 1; | 3123 | set_lvb = 1; |
3010 | 3124 | ||
3011 | /* | 3125 | /* |
@@ -3018,9 +3132,11 @@ downconvert: | |||
3018 | lockres->l_ops->set_lvb(lockres); | 3132 | lockres->l_ops->set_lvb(lockres); |
3019 | } | 3133 | } |
3020 | 3134 | ||
3021 | ocfs2_prepare_downconvert(lockres, new_level); | 3135 | gen = ocfs2_prepare_downconvert(lockres, new_level); |
3022 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 3136 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
3023 | ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb); | 3137 | ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb, |
3138 | gen); | ||
3139 | |||
3024 | leave: | 3140 | leave: |
3025 | mlog_exit(ret); | 3141 | mlog_exit(ret); |
3026 | return ret; | 3142 | return ret; |
@@ -3059,7 +3175,7 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, | |||
3059 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 3175 | (unsigned long long)OCFS2_I(inode)->ip_blkno); |
3060 | } | 3176 | } |
3061 | sync_mapping_buffers(mapping); | 3177 | sync_mapping_buffers(mapping); |
3062 | if (blocking == LKM_EXMODE) { | 3178 | if (blocking == DLM_LOCK_EX) { |
3063 | truncate_inode_pages(mapping, 0); | 3179 | truncate_inode_pages(mapping, 0); |
3064 | } else { | 3180 | } else { |
3065 | /* We only need to wait on the I/O if we're not also | 3181 | /* We only need to wait on the I/O if we're not also |
@@ -3080,8 +3196,8 @@ static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, | |||
3080 | struct inode *inode = ocfs2_lock_res_inode(lockres); | 3196 | struct inode *inode = ocfs2_lock_res_inode(lockres); |
3081 | int checkpointed = ocfs2_inode_fully_checkpointed(inode); | 3197 | int checkpointed = ocfs2_inode_fully_checkpointed(inode); |
3082 | 3198 | ||
3083 | BUG_ON(new_level != LKM_NLMODE && new_level != LKM_PRMODE); | 3199 | BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR); |
3084 | BUG_ON(lockres->l_level != LKM_EXMODE && !checkpointed); | 3200 | BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed); |
3085 | 3201 | ||
3086 | if (checkpointed) | 3202 | if (checkpointed) |
3087 | return 1; | 3203 | return 1; |
@@ -3145,7 +3261,7 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, | |||
3145 | * valid. The downconvert code will retain a PR for this node, | 3261 | * valid. The downconvert code will retain a PR for this node, |
3146 | * so there's no further work to do. | 3262 | * so there's no further work to do. |
3147 | */ | 3263 | */ |
3148 | if (blocking == LKM_PRMODE) | 3264 | if (blocking == DLM_LOCK_PR) |
3149 | return UNBLOCK_CONTINUE; | 3265 | return UNBLOCK_CONTINUE; |
3150 | 3266 | ||
3151 | /* | 3267 | /* |
@@ -3219,6 +3335,45 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, | |||
3219 | return UNBLOCK_CONTINUE_POST; | 3335 | return UNBLOCK_CONTINUE_POST; |
3220 | } | 3336 | } |
3221 | 3337 | ||
3338 | /* | ||
3339 | * This is the filesystem locking protocol. It provides the lock handling | ||
3340 | * hooks for the underlying DLM. It has a maximum version number. | ||
3341 | * The version number allows interoperability with systems running at | ||
3342 | * the same major number and an equal or smaller minor number. | ||
3343 | * | ||
3344 | * Whenever the filesystem does new things with locks (adds or removes a | ||
3345 | * lock, orders them differently, does different things underneath a lock), | ||
3346 | * the version must be changed. The protocol is negotiated when joining | ||
3347 | * the dlm domain. A node may join the domain if its major version is | ||
3348 | * identical to all other nodes and its minor version is greater than | ||
3349 | * or equal to all other nodes. When its minor version is greater than | ||
3350 | * the other nodes, it will run at the minor version specified by the | ||
3351 | * other nodes. | ||
3352 | * | ||
3353 | * If a locking change is made that will not be compatible with older | ||
3354 | * versions, the major number must be increased and the minor version set | ||
3355 | * to zero. If a change merely adds a behavior that can be disabled when | ||
3356 | * speaking to older versions, the minor version must be increased. If a | ||
3357 | * change adds a fully backwards compatible change (eg, LVB changes that | ||
3358 | * are just ignored by older versions), the version does not need to be | ||
3359 | * updated. | ||
3360 | */ | ||
3361 | static struct ocfs2_locking_protocol lproto = { | ||
3362 | .lp_max_version = { | ||
3363 | .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, | ||
3364 | .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, | ||
3365 | }, | ||
3366 | .lp_lock_ast = ocfs2_locking_ast, | ||
3367 | .lp_blocking_ast = ocfs2_blocking_ast, | ||
3368 | .lp_unlock_ast = ocfs2_unlock_ast, | ||
3369 | }; | ||
3370 | |||
3371 | void ocfs2_set_locking_protocol(void) | ||
3372 | { | ||
3373 | ocfs2_stack_glue_set_locking_protocol(&lproto); | ||
3374 | } | ||
3375 | |||
3376 | |||
3222 | static void ocfs2_process_blocked_lock(struct ocfs2_super *osb, | 3377 | static void ocfs2_process_blocked_lock(struct ocfs2_super *osb, |
3223 | struct ocfs2_lock_res *lockres) | 3378 | struct ocfs2_lock_res *lockres) |
3224 | { | 3379 | { |
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index e3cf902404b4..2bb01f09c1b1 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h | |||
@@ -58,7 +58,7 @@ struct ocfs2_meta_lvb { | |||
58 | #define OCFS2_LOCK_NONBLOCK (0x04) | 58 | #define OCFS2_LOCK_NONBLOCK (0x04) |
59 | 59 | ||
60 | int ocfs2_dlm_init(struct ocfs2_super *osb); | 60 | int ocfs2_dlm_init(struct ocfs2_super *osb); |
61 | void ocfs2_dlm_shutdown(struct ocfs2_super *osb); | 61 | void ocfs2_dlm_shutdown(struct ocfs2_super *osb, int hangup_pending); |
62 | void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res); | 62 | void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res); |
63 | void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, | 63 | void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, |
64 | enum ocfs2_lock_type type, | 64 | enum ocfs2_lock_type type, |
@@ -114,5 +114,6 @@ void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb); | |||
114 | struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void); | 114 | struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void); |
115 | void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug); | 115 | void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug); |
116 | 116 | ||
117 | extern const struct dlm_protocol_version ocfs2_locking_protocol; | 117 | /* To set the locking protocol on module initialization */ |
118 | void ocfs2_set_locking_protocol(void); | ||
118 | #endif /* DLMGLUE_H */ | 119 | #endif /* DLMGLUE_H */ |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index ed5d5232e85d..9154c82d3258 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -2242,7 +2242,7 @@ const struct file_operations ocfs2_fops = { | |||
2242 | .open = ocfs2_file_open, | 2242 | .open = ocfs2_file_open, |
2243 | .aio_read = ocfs2_file_aio_read, | 2243 | .aio_read = ocfs2_file_aio_read, |
2244 | .aio_write = ocfs2_file_aio_write, | 2244 | .aio_write = ocfs2_file_aio_write, |
2245 | .ioctl = ocfs2_ioctl, | 2245 | .unlocked_ioctl = ocfs2_ioctl, |
2246 | #ifdef CONFIG_COMPAT | 2246 | #ifdef CONFIG_COMPAT |
2247 | .compat_ioctl = ocfs2_compat_ioctl, | 2247 | .compat_ioctl = ocfs2_compat_ioctl, |
2248 | #endif | 2248 | #endif |
@@ -2258,7 +2258,7 @@ const struct file_operations ocfs2_dops = { | |||
2258 | .fsync = ocfs2_sync_file, | 2258 | .fsync = ocfs2_sync_file, |
2259 | .release = ocfs2_dir_release, | 2259 | .release = ocfs2_dir_release, |
2260 | .open = ocfs2_dir_open, | 2260 | .open = ocfs2_dir_open, |
2261 | .ioctl = ocfs2_ioctl, | 2261 | .unlocked_ioctl = ocfs2_ioctl, |
2262 | #ifdef CONFIG_COMPAT | 2262 | #ifdef CONFIG_COMPAT |
2263 | .compat_ioctl = ocfs2_compat_ioctl, | 2263 | .compat_ioctl = ocfs2_compat_ioctl, |
2264 | #endif | 2264 | #endif |
diff --git a/fs/ocfs2/heartbeat.c b/fs/ocfs2/heartbeat.c index 0758daf64da0..c6e7213db868 100644 --- a/fs/ocfs2/heartbeat.c +++ b/fs/ocfs2/heartbeat.c | |||
@@ -28,9 +28,6 @@ | |||
28 | #include <linux/types.h> | 28 | #include <linux/types.h> |
29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
30 | #include <linux/highmem.h> | 30 | #include <linux/highmem.h> |
31 | #include <linux/kmod.h> | ||
32 | |||
33 | #include <dlm/dlmapi.h> | ||
34 | 31 | ||
35 | #define MLOG_MASK_PREFIX ML_SUPER | 32 | #define MLOG_MASK_PREFIX ML_SUPER |
36 | #include <cluster/masklog.h> | 33 | #include <cluster/masklog.h> |
@@ -48,7 +45,6 @@ static inline void __ocfs2_node_map_set_bit(struct ocfs2_node_map *map, | |||
48 | int bit); | 45 | int bit); |
49 | static inline void __ocfs2_node_map_clear_bit(struct ocfs2_node_map *map, | 46 | static inline void __ocfs2_node_map_clear_bit(struct ocfs2_node_map *map, |
50 | int bit); | 47 | int bit); |
51 | static inline int __ocfs2_node_map_is_empty(struct ocfs2_node_map *map); | ||
52 | 48 | ||
53 | /* special case -1 for now | 49 | /* special case -1 for now |
54 | * TODO: should *really* make sure the calling func never passes -1!! */ | 50 | * TODO: should *really* make sure the calling func never passes -1!! */ |
@@ -62,23 +58,23 @@ static void ocfs2_node_map_init(struct ocfs2_node_map *map) | |||
62 | void ocfs2_init_node_maps(struct ocfs2_super *osb) | 58 | void ocfs2_init_node_maps(struct ocfs2_super *osb) |
63 | { | 59 | { |
64 | spin_lock_init(&osb->node_map_lock); | 60 | spin_lock_init(&osb->node_map_lock); |
65 | ocfs2_node_map_init(&osb->recovery_map); | ||
66 | ocfs2_node_map_init(&osb->osb_recovering_orphan_dirs); | 61 | ocfs2_node_map_init(&osb->osb_recovering_orphan_dirs); |
67 | } | 62 | } |
68 | 63 | ||
69 | static void ocfs2_do_node_down(int node_num, | 64 | void ocfs2_do_node_down(int node_num, void *data) |
70 | struct ocfs2_super *osb) | ||
71 | { | 65 | { |
66 | struct ocfs2_super *osb = data; | ||
67 | |||
72 | BUG_ON(osb->node_num == node_num); | 68 | BUG_ON(osb->node_num == node_num); |
73 | 69 | ||
74 | mlog(0, "ocfs2: node down event for %d\n", node_num); | 70 | mlog(0, "ocfs2: node down event for %d\n", node_num); |
75 | 71 | ||
76 | if (!osb->dlm) { | 72 | if (!osb->cconn) { |
77 | /* | 73 | /* |
78 | * No DLM means we're not even ready to participate yet. | 74 | * No cluster connection means we're not even ready to |
79 | * We check the slots after the DLM comes up, so we will | 75 | * participate yet. We check the slots after the cluster |
80 | * notice the node death then. We can safely ignore it | 76 | * comes up, so we will notice the node death then. We |
81 | * here. | 77 | * can safely ignore it here. |
82 | */ | 78 | */ |
83 | return; | 79 | return; |
84 | } | 80 | } |
@@ -86,61 +82,6 @@ static void ocfs2_do_node_down(int node_num, | |||
86 | ocfs2_recovery_thread(osb, node_num); | 82 | ocfs2_recovery_thread(osb, node_num); |
87 | } | 83 | } |
88 | 84 | ||
89 | /* Called from the dlm when it's about to evict a node. We may also | ||
90 | * get a heartbeat callback later. */ | ||
91 | static void ocfs2_dlm_eviction_cb(int node_num, | ||
92 | void *data) | ||
93 | { | ||
94 | struct ocfs2_super *osb = (struct ocfs2_super *) data; | ||
95 | struct super_block *sb = osb->sb; | ||
96 | |||
97 | mlog(ML_NOTICE, "device (%u,%u): dlm has evicted node %d\n", | ||
98 | MAJOR(sb->s_dev), MINOR(sb->s_dev), node_num); | ||
99 | |||
100 | ocfs2_do_node_down(node_num, osb); | ||
101 | } | ||
102 | |||
103 | void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb) | ||
104 | { | ||
105 | /* Not exactly a heartbeat callback, but leads to essentially | ||
106 | * the same path so we set it up here. */ | ||
107 | dlm_setup_eviction_cb(&osb->osb_eviction_cb, | ||
108 | ocfs2_dlm_eviction_cb, | ||
109 | osb); | ||
110 | } | ||
111 | |||
112 | void ocfs2_stop_heartbeat(struct ocfs2_super *osb) | ||
113 | { | ||
114 | int ret; | ||
115 | char *argv[5], *envp[3]; | ||
116 | |||
117 | if (ocfs2_mount_local(osb)) | ||
118 | return; | ||
119 | |||
120 | if (!osb->uuid_str) { | ||
121 | /* This can happen if we don't get far enough in mount... */ | ||
122 | mlog(0, "No UUID with which to stop heartbeat!\n\n"); | ||
123 | return; | ||
124 | } | ||
125 | |||
126 | argv[0] = (char *)o2nm_get_hb_ctl_path(); | ||
127 | argv[1] = "-K"; | ||
128 | argv[2] = "-u"; | ||
129 | argv[3] = osb->uuid_str; | ||
130 | argv[4] = NULL; | ||
131 | |||
132 | mlog(0, "Run: %s %s %s %s\n", argv[0], argv[1], argv[2], argv[3]); | ||
133 | |||
134 | /* minimal command environment taken from cpu_run_sbin_hotplug */ | ||
135 | envp[0] = "HOME=/"; | ||
136 | envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; | ||
137 | envp[2] = NULL; | ||
138 | |||
139 | ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); | ||
140 | if (ret < 0) | ||
141 | mlog_errno(ret); | ||
142 | } | ||
143 | |||
144 | static inline void __ocfs2_node_map_set_bit(struct ocfs2_node_map *map, | 85 | static inline void __ocfs2_node_map_set_bit(struct ocfs2_node_map *map, |
145 | int bit) | 86 | int bit) |
146 | { | 87 | { |
@@ -192,112 +133,3 @@ int ocfs2_node_map_test_bit(struct ocfs2_super *osb, | |||
192 | return ret; | 133 | return ret; |
193 | } | 134 | } |
194 | 135 | ||
195 | static inline int __ocfs2_node_map_is_empty(struct ocfs2_node_map *map) | ||
196 | { | ||
197 | int bit; | ||
198 | bit = find_next_bit(map->map, map->num_nodes, 0); | ||
199 | if (bit < map->num_nodes) | ||
200 | return 0; | ||
201 | return 1; | ||
202 | } | ||
203 | |||
204 | int ocfs2_node_map_is_empty(struct ocfs2_super *osb, | ||
205 | struct ocfs2_node_map *map) | ||
206 | { | ||
207 | int ret; | ||
208 | BUG_ON(map->num_nodes == 0); | ||
209 | spin_lock(&osb->node_map_lock); | ||
210 | ret = __ocfs2_node_map_is_empty(map); | ||
211 | spin_unlock(&osb->node_map_lock); | ||
212 | return ret; | ||
213 | } | ||
214 | |||
215 | #if 0 | ||
216 | |||
217 | static void __ocfs2_node_map_dup(struct ocfs2_node_map *target, | ||
218 | struct ocfs2_node_map *from) | ||
219 | { | ||
220 | BUG_ON(from->num_nodes == 0); | ||
221 | ocfs2_node_map_init(target); | ||
222 | __ocfs2_node_map_set(target, from); | ||
223 | } | ||
224 | |||
225 | /* returns 1 if bit is the only bit set in target, 0 otherwise */ | ||
226 | int ocfs2_node_map_is_only(struct ocfs2_super *osb, | ||
227 | struct ocfs2_node_map *target, | ||
228 | int bit) | ||
229 | { | ||
230 | struct ocfs2_node_map temp; | ||
231 | int ret; | ||
232 | |||
233 | spin_lock(&osb->node_map_lock); | ||
234 | __ocfs2_node_map_dup(&temp, target); | ||
235 | __ocfs2_node_map_clear_bit(&temp, bit); | ||
236 | ret = __ocfs2_node_map_is_empty(&temp); | ||
237 | spin_unlock(&osb->node_map_lock); | ||
238 | |||
239 | return ret; | ||
240 | } | ||
241 | |||
242 | static void __ocfs2_node_map_set(struct ocfs2_node_map *target, | ||
243 | struct ocfs2_node_map *from) | ||
244 | { | ||
245 | int num_longs, i; | ||
246 | |||
247 | BUG_ON(target->num_nodes != from->num_nodes); | ||
248 | BUG_ON(target->num_nodes == 0); | ||
249 | |||
250 | num_longs = BITS_TO_LONGS(target->num_nodes); | ||
251 | for (i = 0; i < num_longs; i++) | ||
252 | target->map[i] = from->map[i]; | ||
253 | } | ||
254 | |||
255 | #endif /* 0 */ | ||
256 | |||
257 | /* Returns whether the recovery bit was actually set - it may not be | ||
258 | * if a node is still marked as needing recovery */ | ||
259 | int ocfs2_recovery_map_set(struct ocfs2_super *osb, | ||
260 | int num) | ||
261 | { | ||
262 | int set = 0; | ||
263 | |||
264 | spin_lock(&osb->node_map_lock); | ||
265 | |||
266 | if (!test_bit(num, osb->recovery_map.map)) { | ||
267 | __ocfs2_node_map_set_bit(&osb->recovery_map, num); | ||
268 | set = 1; | ||
269 | } | ||
270 | |||
271 | spin_unlock(&osb->node_map_lock); | ||
272 | |||
273 | return set; | ||
274 | } | ||
275 | |||
276 | void ocfs2_recovery_map_clear(struct ocfs2_super *osb, | ||
277 | int num) | ||
278 | { | ||
279 | ocfs2_node_map_clear_bit(osb, &osb->recovery_map, num); | ||
280 | } | ||
281 | |||
282 | int ocfs2_node_map_iterate(struct ocfs2_super *osb, | ||
283 | struct ocfs2_node_map *map, | ||
284 | int idx) | ||
285 | { | ||
286 | int i = idx; | ||
287 | |||
288 | idx = O2NM_INVALID_NODE_NUM; | ||
289 | spin_lock(&osb->node_map_lock); | ||
290 | if ((i != O2NM_INVALID_NODE_NUM) && | ||
291 | (i >= 0) && | ||
292 | (i < map->num_nodes)) { | ||
293 | while(i < map->num_nodes) { | ||
294 | if (test_bit(i, map->map)) { | ||
295 | idx = i; | ||
296 | break; | ||
297 | } | ||
298 | i++; | ||
299 | } | ||
300 | } | ||
301 | spin_unlock(&osb->node_map_lock); | ||
302 | return idx; | ||
303 | } | ||
diff --git a/fs/ocfs2/heartbeat.h b/fs/ocfs2/heartbeat.h index eac63aed7611..74b9c5dda28d 100644 --- a/fs/ocfs2/heartbeat.h +++ b/fs/ocfs2/heartbeat.h | |||
@@ -28,13 +28,10 @@ | |||
28 | 28 | ||
29 | void ocfs2_init_node_maps(struct ocfs2_super *osb); | 29 | void ocfs2_init_node_maps(struct ocfs2_super *osb); |
30 | 30 | ||
31 | void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb); | 31 | void ocfs2_do_node_down(int node_num, void *data); |
32 | void ocfs2_stop_heartbeat(struct ocfs2_super *osb); | ||
33 | 32 | ||
34 | /* node map functions - used to keep track of mounted and in-recovery | 33 | /* node map functions - used to keep track of mounted and in-recovery |
35 | * nodes. */ | 34 | * nodes. */ |
36 | int ocfs2_node_map_is_empty(struct ocfs2_super *osb, | ||
37 | struct ocfs2_node_map *map); | ||
38 | void ocfs2_node_map_set_bit(struct ocfs2_super *osb, | 35 | void ocfs2_node_map_set_bit(struct ocfs2_super *osb, |
39 | struct ocfs2_node_map *map, | 36 | struct ocfs2_node_map *map, |
40 | int bit); | 37 | int bit); |
@@ -44,17 +41,5 @@ void ocfs2_node_map_clear_bit(struct ocfs2_super *osb, | |||
44 | int ocfs2_node_map_test_bit(struct ocfs2_super *osb, | 41 | int ocfs2_node_map_test_bit(struct ocfs2_super *osb, |
45 | struct ocfs2_node_map *map, | 42 | struct ocfs2_node_map *map, |
46 | int bit); | 43 | int bit); |
47 | int ocfs2_node_map_iterate(struct ocfs2_super *osb, | ||
48 | struct ocfs2_node_map *map, | ||
49 | int idx); | ||
50 | static inline int ocfs2_node_map_first_set_bit(struct ocfs2_super *osb, | ||
51 | struct ocfs2_node_map *map) | ||
52 | { | ||
53 | return ocfs2_node_map_iterate(osb, map, 0); | ||
54 | } | ||
55 | int ocfs2_recovery_map_set(struct ocfs2_super *osb, | ||
56 | int num); | ||
57 | void ocfs2_recovery_map_clear(struct ocfs2_super *osb, | ||
58 | int num); | ||
59 | 44 | ||
60 | #endif /* OCFS2_HEARTBEAT_H */ | 45 | #endif /* OCFS2_HEARTBEAT_H */ |
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 5177fba5162b..7b142f0ce995 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c | |||
@@ -7,6 +7,7 @@ | |||
7 | 7 | ||
8 | #include <linux/fs.h> | 8 | #include <linux/fs.h> |
9 | #include <linux/mount.h> | 9 | #include <linux/mount.h> |
10 | #include <linux/smp_lock.h> | ||
10 | 11 | ||
11 | #define MLOG_MASK_PREFIX ML_INODE | 12 | #define MLOG_MASK_PREFIX ML_INODE |
12 | #include <cluster/masklog.h> | 13 | #include <cluster/masklog.h> |
@@ -59,10 +60,6 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags, | |||
59 | goto bail; | 60 | goto bail; |
60 | } | 61 | } |
61 | 62 | ||
62 | status = -EROFS; | ||
63 | if (IS_RDONLY(inode)) | ||
64 | goto bail_unlock; | ||
65 | |||
66 | status = -EACCES; | 63 | status = -EACCES; |
67 | if (!is_owner_or_cap(inode)) | 64 | if (!is_owner_or_cap(inode)) |
68 | goto bail_unlock; | 65 | goto bail_unlock; |
@@ -112,9 +109,9 @@ bail: | |||
112 | return status; | 109 | return status; |
113 | } | 110 | } |
114 | 111 | ||
115 | int ocfs2_ioctl(struct inode * inode, struct file * filp, | 112 | long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
116 | unsigned int cmd, unsigned long arg) | ||
117 | { | 113 | { |
114 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
118 | unsigned int flags; | 115 | unsigned int flags; |
119 | int new_clusters; | 116 | int new_clusters; |
120 | int status; | 117 | int status; |
@@ -133,8 +130,13 @@ int ocfs2_ioctl(struct inode * inode, struct file * filp, | |||
133 | if (get_user(flags, (int __user *) arg)) | 130 | if (get_user(flags, (int __user *) arg)) |
134 | return -EFAULT; | 131 | return -EFAULT; |
135 | 132 | ||
136 | return ocfs2_set_inode_attr(inode, flags, | 133 | status = mnt_want_write(filp->f_path.mnt); |
134 | if (status) | ||
135 | return status; | ||
136 | status = ocfs2_set_inode_attr(inode, flags, | ||
137 | OCFS2_FL_MODIFIABLE); | 137 | OCFS2_FL_MODIFIABLE); |
138 | mnt_drop_write(filp->f_path.mnt); | ||
139 | return status; | ||
138 | case OCFS2_IOC_RESVSP: | 140 | case OCFS2_IOC_RESVSP: |
139 | case OCFS2_IOC_RESVSP64: | 141 | case OCFS2_IOC_RESVSP64: |
140 | case OCFS2_IOC_UNRESVSP: | 142 | case OCFS2_IOC_UNRESVSP: |
@@ -168,9 +170,6 @@ int ocfs2_ioctl(struct inode * inode, struct file * filp, | |||
168 | #ifdef CONFIG_COMPAT | 170 | #ifdef CONFIG_COMPAT |
169 | long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) | 171 | long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) |
170 | { | 172 | { |
171 | struct inode *inode = file->f_path.dentry->d_inode; | ||
172 | int ret; | ||
173 | |||
174 | switch (cmd) { | 173 | switch (cmd) { |
175 | case OCFS2_IOC32_GETFLAGS: | 174 | case OCFS2_IOC32_GETFLAGS: |
176 | cmd = OCFS2_IOC_GETFLAGS; | 175 | cmd = OCFS2_IOC_GETFLAGS; |
@@ -190,9 +189,6 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
190 | return -ENOIOCTLCMD; | 189 | return -ENOIOCTLCMD; |
191 | } | 190 | } |
192 | 191 | ||
193 | lock_kernel(); | 192 | return ocfs2_ioctl(file, cmd, arg); |
194 | ret = ocfs2_ioctl(inode, file, cmd, arg); | ||
195 | unlock_kernel(); | ||
196 | return ret; | ||
197 | } | 193 | } |
198 | #endif | 194 | #endif |
diff --git a/fs/ocfs2/ioctl.h b/fs/ocfs2/ioctl.h index 4d6c4f430d0d..cf9a5ee30fef 100644 --- a/fs/ocfs2/ioctl.h +++ b/fs/ocfs2/ioctl.h | |||
@@ -10,8 +10,7 @@ | |||
10 | #ifndef OCFS2_IOCTL_H | 10 | #ifndef OCFS2_IOCTL_H |
11 | #define OCFS2_IOCTL_H | 11 | #define OCFS2_IOCTL_H |
12 | 12 | ||
13 | int ocfs2_ioctl(struct inode * inode, struct file * filp, | 13 | long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); |
14 | unsigned int cmd, unsigned long arg); | ||
15 | long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg); | 14 | long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg); |
16 | 15 | ||
17 | #endif /* OCFS2_IOCTL_H */ | 16 | #endif /* OCFS2_IOCTL_H */ |
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index f31c7e8c19c3..9698338adc39 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
@@ -64,6 +64,137 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, | |||
64 | int slot); | 64 | int slot); |
65 | static int ocfs2_commit_thread(void *arg); | 65 | static int ocfs2_commit_thread(void *arg); |
66 | 66 | ||
67 | |||
68 | /* | ||
69 | * The recovery_list is a simple linked list of node numbers to recover. | ||
70 | * It is protected by the recovery_lock. | ||
71 | */ | ||
72 | |||
73 | struct ocfs2_recovery_map { | ||
74 | unsigned int rm_used; | ||
75 | unsigned int *rm_entries; | ||
76 | }; | ||
77 | |||
78 | int ocfs2_recovery_init(struct ocfs2_super *osb) | ||
79 | { | ||
80 | struct ocfs2_recovery_map *rm; | ||
81 | |||
82 | mutex_init(&osb->recovery_lock); | ||
83 | osb->disable_recovery = 0; | ||
84 | osb->recovery_thread_task = NULL; | ||
85 | init_waitqueue_head(&osb->recovery_event); | ||
86 | |||
87 | rm = kzalloc(sizeof(struct ocfs2_recovery_map) + | ||
88 | osb->max_slots * sizeof(unsigned int), | ||
89 | GFP_KERNEL); | ||
90 | if (!rm) { | ||
91 | mlog_errno(-ENOMEM); | ||
92 | return -ENOMEM; | ||
93 | } | ||
94 | |||
95 | rm->rm_entries = (unsigned int *)((char *)rm + | ||
96 | sizeof(struct ocfs2_recovery_map)); | ||
97 | osb->recovery_map = rm; | ||
98 | |||
99 | return 0; | ||
100 | } | ||
101 | |||
102 | /* we can't grab the goofy sem lock from inside wait_event, so we use | ||
103 | * memory barriers to make sure that we'll see the null task before | ||
104 | * being woken up */ | ||
105 | static int ocfs2_recovery_thread_running(struct ocfs2_super *osb) | ||
106 | { | ||
107 | mb(); | ||
108 | return osb->recovery_thread_task != NULL; | ||
109 | } | ||
110 | |||
111 | void ocfs2_recovery_exit(struct ocfs2_super *osb) | ||
112 | { | ||
113 | struct ocfs2_recovery_map *rm; | ||
114 | |||
115 | /* disable any new recovery threads and wait for any currently | ||
116 | * running ones to exit. Do this before setting the vol_state. */ | ||
117 | mutex_lock(&osb->recovery_lock); | ||
118 | osb->disable_recovery = 1; | ||
119 | mutex_unlock(&osb->recovery_lock); | ||
120 | wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb)); | ||
121 | |||
122 | /* At this point, we know that no more recovery threads can be | ||
123 | * launched, so wait for any recovery completion work to | ||
124 | * complete. */ | ||
125 | flush_workqueue(ocfs2_wq); | ||
126 | |||
127 | /* | ||
128 | * Now that recovery is shut down, and the osb is about to be | ||
129 | * freed, the osb_lock is not taken here. | ||
130 | */ | ||
131 | rm = osb->recovery_map; | ||
132 | /* XXX: Should we bug if there are dirty entries? */ | ||
133 | |||
134 | kfree(rm); | ||
135 | } | ||
136 | |||
137 | static int __ocfs2_recovery_map_test(struct ocfs2_super *osb, | ||
138 | unsigned int node_num) | ||
139 | { | ||
140 | int i; | ||
141 | struct ocfs2_recovery_map *rm = osb->recovery_map; | ||
142 | |||
143 | assert_spin_locked(&osb->osb_lock); | ||
144 | |||
145 | for (i = 0; i < rm->rm_used; i++) { | ||
146 | if (rm->rm_entries[i] == node_num) | ||
147 | return 1; | ||
148 | } | ||
149 | |||
150 | return 0; | ||
151 | } | ||
152 | |||
153 | /* Behaves like test-and-set. Returns the previous value */ | ||
154 | static int ocfs2_recovery_map_set(struct ocfs2_super *osb, | ||
155 | unsigned int node_num) | ||
156 | { | ||
157 | struct ocfs2_recovery_map *rm = osb->recovery_map; | ||
158 | |||
159 | spin_lock(&osb->osb_lock); | ||
160 | if (__ocfs2_recovery_map_test(osb, node_num)) { | ||
161 | spin_unlock(&osb->osb_lock); | ||
162 | return 1; | ||
163 | } | ||
164 | |||
165 | /* XXX: Can this be exploited? Not from o2dlm... */ | ||
166 | BUG_ON(rm->rm_used >= osb->max_slots); | ||
167 | |||
168 | rm->rm_entries[rm->rm_used] = node_num; | ||
169 | rm->rm_used++; | ||
170 | spin_unlock(&osb->osb_lock); | ||
171 | |||
172 | return 0; | ||
173 | } | ||
174 | |||
175 | static void ocfs2_recovery_map_clear(struct ocfs2_super *osb, | ||
176 | unsigned int node_num) | ||
177 | { | ||
178 | int i; | ||
179 | struct ocfs2_recovery_map *rm = osb->recovery_map; | ||
180 | |||
181 | spin_lock(&osb->osb_lock); | ||
182 | |||
183 | for (i = 0; i < rm->rm_used; i++) { | ||
184 | if (rm->rm_entries[i] == node_num) | ||
185 | break; | ||
186 | } | ||
187 | |||
188 | if (i < rm->rm_used) { | ||
189 | /* XXX: be careful with the pointer math */ | ||
190 | memmove(&(rm->rm_entries[i]), &(rm->rm_entries[i + 1]), | ||
191 | (rm->rm_used - i - 1) * sizeof(unsigned int)); | ||
192 | rm->rm_used--; | ||
193 | } | ||
194 | |||
195 | spin_unlock(&osb->osb_lock); | ||
196 | } | ||
197 | |||
67 | static int ocfs2_commit_cache(struct ocfs2_super *osb) | 198 | static int ocfs2_commit_cache(struct ocfs2_super *osb) |
68 | { | 199 | { |
69 | int status = 0; | 200 | int status = 0; |
@@ -586,8 +717,7 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local) | |||
586 | 717 | ||
587 | mlog_entry_void(); | 718 | mlog_entry_void(); |
588 | 719 | ||
589 | if (!journal) | 720 | BUG_ON(!journal); |
590 | BUG(); | ||
591 | 721 | ||
592 | osb = journal->j_osb; | 722 | osb = journal->j_osb; |
593 | 723 | ||
@@ -650,6 +780,23 @@ bail: | |||
650 | return status; | 780 | return status; |
651 | } | 781 | } |
652 | 782 | ||
783 | static int ocfs2_recovery_completed(struct ocfs2_super *osb) | ||
784 | { | ||
785 | int empty; | ||
786 | struct ocfs2_recovery_map *rm = osb->recovery_map; | ||
787 | |||
788 | spin_lock(&osb->osb_lock); | ||
789 | empty = (rm->rm_used == 0); | ||
790 | spin_unlock(&osb->osb_lock); | ||
791 | |||
792 | return empty; | ||
793 | } | ||
794 | |||
795 | void ocfs2_wait_for_recovery(struct ocfs2_super *osb) | ||
796 | { | ||
797 | wait_event(osb->recovery_event, ocfs2_recovery_completed(osb)); | ||
798 | } | ||
799 | |||
653 | /* | 800 | /* |
654 | * JBD Might read a cached version of another nodes journal file. We | 801 | * JBD Might read a cached version of another nodes journal file. We |
655 | * don't want this as this file changes often and we get no | 802 | * don't want this as this file changes often and we get no |
@@ -848,6 +995,7 @@ static int __ocfs2_recovery_thread(void *arg) | |||
848 | { | 995 | { |
849 | int status, node_num; | 996 | int status, node_num; |
850 | struct ocfs2_super *osb = arg; | 997 | struct ocfs2_super *osb = arg; |
998 | struct ocfs2_recovery_map *rm = osb->recovery_map; | ||
851 | 999 | ||
852 | mlog_entry_void(); | 1000 | mlog_entry_void(); |
853 | 1001 | ||
@@ -863,26 +1011,29 @@ restart: | |||
863 | goto bail; | 1011 | goto bail; |
864 | } | 1012 | } |
865 | 1013 | ||
866 | while(!ocfs2_node_map_is_empty(osb, &osb->recovery_map)) { | 1014 | spin_lock(&osb->osb_lock); |
867 | node_num = ocfs2_node_map_first_set_bit(osb, | 1015 | while (rm->rm_used) { |
868 | &osb->recovery_map); | 1016 | /* It's always safe to remove entry zero, as we won't |
869 | if (node_num == O2NM_INVALID_NODE_NUM) { | 1017 | * clear it until ocfs2_recover_node() has succeeded. */ |
870 | mlog(0, "Out of nodes to recover.\n"); | 1018 | node_num = rm->rm_entries[0]; |
871 | break; | 1019 | spin_unlock(&osb->osb_lock); |
872 | } | ||
873 | 1020 | ||
874 | status = ocfs2_recover_node(osb, node_num); | 1021 | status = ocfs2_recover_node(osb, node_num); |
875 | if (status < 0) { | 1022 | if (!status) { |
1023 | ocfs2_recovery_map_clear(osb, node_num); | ||
1024 | } else { | ||
876 | mlog(ML_ERROR, | 1025 | mlog(ML_ERROR, |
877 | "Error %d recovering node %d on device (%u,%u)!\n", | 1026 | "Error %d recovering node %d on device (%u,%u)!\n", |
878 | status, node_num, | 1027 | status, node_num, |
879 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); | 1028 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); |
880 | mlog(ML_ERROR, "Volume requires unmount.\n"); | 1029 | mlog(ML_ERROR, "Volume requires unmount.\n"); |
881 | continue; | ||
882 | } | 1030 | } |
883 | 1031 | ||
884 | ocfs2_recovery_map_clear(osb, node_num); | 1032 | spin_lock(&osb->osb_lock); |
885 | } | 1033 | } |
1034 | spin_unlock(&osb->osb_lock); | ||
1035 | mlog(0, "All nodes recovered\n"); | ||
1036 | |||
886 | ocfs2_super_unlock(osb, 1); | 1037 | ocfs2_super_unlock(osb, 1); |
887 | 1038 | ||
888 | /* We always run recovery on our own orphan dir - the dead | 1039 | /* We always run recovery on our own orphan dir - the dead |
@@ -893,8 +1044,7 @@ restart: | |||
893 | 1044 | ||
894 | bail: | 1045 | bail: |
895 | mutex_lock(&osb->recovery_lock); | 1046 | mutex_lock(&osb->recovery_lock); |
896 | if (!status && | 1047 | if (!status && !ocfs2_recovery_completed(osb)) { |
897 | !ocfs2_node_map_is_empty(osb, &osb->recovery_map)) { | ||
898 | mutex_unlock(&osb->recovery_lock); | 1048 | mutex_unlock(&osb->recovery_lock); |
899 | goto restart; | 1049 | goto restart; |
900 | } | 1050 | } |
@@ -924,8 +1074,8 @@ void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) | |||
924 | 1074 | ||
925 | /* People waiting on recovery will wait on | 1075 | /* People waiting on recovery will wait on |
926 | * the recovery map to empty. */ | 1076 | * the recovery map to empty. */ |
927 | if (!ocfs2_recovery_map_set(osb, node_num)) | 1077 | if (ocfs2_recovery_map_set(osb, node_num)) |
928 | mlog(0, "node %d already be in recovery.\n", node_num); | 1078 | mlog(0, "node %d already in recovery map.\n", node_num); |
929 | 1079 | ||
930 | mlog(0, "starting recovery thread...\n"); | 1080 | mlog(0, "starting recovery thread...\n"); |
931 | 1081 | ||
@@ -1079,7 +1229,6 @@ static int ocfs2_recover_node(struct ocfs2_super *osb, | |||
1079 | { | 1229 | { |
1080 | int status = 0; | 1230 | int status = 0; |
1081 | int slot_num; | 1231 | int slot_num; |
1082 | struct ocfs2_slot_info *si = osb->slot_info; | ||
1083 | struct ocfs2_dinode *la_copy = NULL; | 1232 | struct ocfs2_dinode *la_copy = NULL; |
1084 | struct ocfs2_dinode *tl_copy = NULL; | 1233 | struct ocfs2_dinode *tl_copy = NULL; |
1085 | 1234 | ||
@@ -1092,8 +1241,8 @@ static int ocfs2_recover_node(struct ocfs2_super *osb, | |||
1092 | * case we should've called ocfs2_journal_load instead. */ | 1241 | * case we should've called ocfs2_journal_load instead. */ |
1093 | BUG_ON(osb->node_num == node_num); | 1242 | BUG_ON(osb->node_num == node_num); |
1094 | 1243 | ||
1095 | slot_num = ocfs2_node_num_to_slot(si, node_num); | 1244 | slot_num = ocfs2_node_num_to_slot(osb, node_num); |
1096 | if (slot_num == OCFS2_INVALID_SLOT) { | 1245 | if (slot_num == -ENOENT) { |
1097 | status = 0; | 1246 | status = 0; |
1098 | mlog(0, "no slot for this node, so no recovery required.\n"); | 1247 | mlog(0, "no slot for this node, so no recovery required.\n"); |
1099 | goto done; | 1248 | goto done; |
@@ -1123,8 +1272,7 @@ static int ocfs2_recover_node(struct ocfs2_super *osb, | |||
1123 | 1272 | ||
1124 | /* Likewise, this would be a strange but ultimately not so | 1273 | /* Likewise, this would be a strange but ultimately not so |
1125 | * harmful place to get an error... */ | 1274 | * harmful place to get an error... */ |
1126 | ocfs2_clear_slot(si, slot_num); | 1275 | status = ocfs2_clear_slot(osb, slot_num); |
1127 | status = ocfs2_update_disk_slots(osb, si); | ||
1128 | if (status < 0) | 1276 | if (status < 0) |
1129 | mlog_errno(status); | 1277 | mlog_errno(status); |
1130 | 1278 | ||
@@ -1184,23 +1332,24 @@ bail: | |||
1184 | * slot info struct has been updated from disk. */ | 1332 | * slot info struct has been updated from disk. */ |
1185 | int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) | 1333 | int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) |
1186 | { | 1334 | { |
1187 | int status, i, node_num; | 1335 | unsigned int node_num; |
1188 | struct ocfs2_slot_info *si = osb->slot_info; | 1336 | int status, i; |
1189 | 1337 | ||
1190 | /* This is called with the super block cluster lock, so we | 1338 | /* This is called with the super block cluster lock, so we |
1191 | * know that the slot map can't change underneath us. */ | 1339 | * know that the slot map can't change underneath us. */ |
1192 | 1340 | ||
1193 | spin_lock(&si->si_lock); | 1341 | spin_lock(&osb->osb_lock); |
1194 | for(i = 0; i < si->si_num_slots; i++) { | 1342 | for (i = 0; i < osb->max_slots; i++) { |
1195 | if (i == osb->slot_num) | 1343 | if (i == osb->slot_num) |
1196 | continue; | 1344 | continue; |
1197 | if (ocfs2_is_empty_slot(si, i)) | 1345 | |
1346 | status = ocfs2_slot_to_node_num_locked(osb, i, &node_num); | ||
1347 | if (status == -ENOENT) | ||
1198 | continue; | 1348 | continue; |
1199 | 1349 | ||
1200 | node_num = si->si_global_node_nums[i]; | 1350 | if (__ocfs2_recovery_map_test(osb, node_num)) |
1201 | if (ocfs2_node_map_test_bit(osb, &osb->recovery_map, node_num)) | ||
1202 | continue; | 1351 | continue; |
1203 | spin_unlock(&si->si_lock); | 1352 | spin_unlock(&osb->osb_lock); |
1204 | 1353 | ||
1205 | /* Ok, we have a slot occupied by another node which | 1354 | /* Ok, we have a slot occupied by another node which |
1206 | * is not in the recovery map. We trylock his journal | 1355 | * is not in the recovery map. We trylock his journal |
@@ -1216,9 +1365,9 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) | |||
1216 | goto bail; | 1365 | goto bail; |
1217 | } | 1366 | } |
1218 | 1367 | ||
1219 | spin_lock(&si->si_lock); | 1368 | spin_lock(&osb->osb_lock); |
1220 | } | 1369 | } |
1221 | spin_unlock(&si->si_lock); | 1370 | spin_unlock(&osb->osb_lock); |
1222 | 1371 | ||
1223 | status = 0; | 1372 | status = 0; |
1224 | bail: | 1373 | bail: |
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 220f3e818e78..db82be2532ed 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
@@ -134,6 +134,10 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb, | |||
134 | 134 | ||
135 | /* Exported only for the journal struct init code in super.c. Do not call. */ | 135 | /* Exported only for the journal struct init code in super.c. Do not call. */ |
136 | void ocfs2_complete_recovery(struct work_struct *work); | 136 | void ocfs2_complete_recovery(struct work_struct *work); |
137 | void ocfs2_wait_for_recovery(struct ocfs2_super *osb); | ||
138 | |||
139 | int ocfs2_recovery_init(struct ocfs2_super *osb); | ||
140 | void ocfs2_recovery_exit(struct ocfs2_super *osb); | ||
137 | 141 | ||
138 | /* | 142 | /* |
139 | * Journal Control: | 143 | * Journal Control: |
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index ab83fd562429..ce0dc147602a 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c | |||
@@ -447,6 +447,8 @@ out_mutex: | |||
447 | iput(main_bm_inode); | 447 | iput(main_bm_inode); |
448 | 448 | ||
449 | out: | 449 | out: |
450 | if (!status) | ||
451 | ocfs2_init_inode_steal_slot(osb); | ||
450 | mlog_exit(status); | 452 | mlog_exit(status); |
451 | return status; | 453 | return status; |
452 | } | 454 | } |
@@ -523,6 +525,8 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, | |||
523 | } | 525 | } |
524 | 526 | ||
525 | ac->ac_inode = local_alloc_inode; | 527 | ac->ac_inode = local_alloc_inode; |
528 | /* We should never use localalloc from another slot */ | ||
529 | ac->ac_alloc_slot = osb->slot_num; | ||
526 | ac->ac_which = OCFS2_AC_USE_LOCAL; | 530 | ac->ac_which = OCFS2_AC_USE_LOCAL; |
527 | get_bh(osb->local_alloc_bh); | 531 | get_bh(osb->local_alloc_bh); |
528 | ac->ac_bh = osb->local_alloc_bh; | 532 | ac->ac_bh = osb->local_alloc_bh; |
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index ae9ad9587516..d5d808fe0140 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
@@ -424,7 +424,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, | |||
424 | fe->i_fs_generation = cpu_to_le32(osb->fs_generation); | 424 | fe->i_fs_generation = cpu_to_le32(osb->fs_generation); |
425 | fe->i_blkno = cpu_to_le64(fe_blkno); | 425 | fe->i_blkno = cpu_to_le64(fe_blkno); |
426 | fe->i_suballoc_bit = cpu_to_le16(suballoc_bit); | 426 | fe->i_suballoc_bit = cpu_to_le16(suballoc_bit); |
427 | fe->i_suballoc_slot = cpu_to_le16(osb->slot_num); | 427 | fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot); |
428 | fe->i_uid = cpu_to_le32(current->fsuid); | 428 | fe->i_uid = cpu_to_le32(current->fsuid); |
429 | if (dir->i_mode & S_ISGID) { | 429 | if (dir->i_mode & S_ISGID) { |
430 | fe->i_gid = cpu_to_le32(dir->i_gid); | 430 | fe->i_gid = cpu_to_le32(dir->i_gid); |
@@ -997,7 +997,7 @@ static int ocfs2_rename(struct inode *old_dir, | |||
997 | * | 997 | * |
998 | * And that's why, just like the VFS, we need a file system | 998 | * And that's why, just like the VFS, we need a file system |
999 | * rename lock. */ | 999 | * rename lock. */ |
1000 | if (old_dentry != new_dentry) { | 1000 | if (old_dir != new_dir && S_ISDIR(old_inode->i_mode)) { |
1001 | status = ocfs2_rename_lock(osb); | 1001 | status = ocfs2_rename_lock(osb); |
1002 | if (status < 0) { | 1002 | if (status < 0) { |
1003 | mlog_errno(status); | 1003 | mlog_errno(status); |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 6546cef212e3..31692379c170 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
@@ -36,11 +36,8 @@ | |||
36 | #include <linux/mutex.h> | 36 | #include <linux/mutex.h> |
37 | #include <linux/jbd.h> | 37 | #include <linux/jbd.h> |
38 | 38 | ||
39 | #include "cluster/nodemanager.h" | 39 | /* For union ocfs2_dlm_lksb */ |
40 | #include "cluster/heartbeat.h" | 40 | #include "stackglue.h" |
41 | #include "cluster/tcp.h" | ||
42 | |||
43 | #include "dlm/dlmapi.h" | ||
44 | 41 | ||
45 | #include "ocfs2_fs.h" | 42 | #include "ocfs2_fs.h" |
46 | #include "ocfs2_lockid.h" | 43 | #include "ocfs2_lockid.h" |
@@ -101,6 +98,9 @@ enum ocfs2_unlock_action { | |||
101 | * dropped. */ | 98 | * dropped. */ |
102 | #define OCFS2_LOCK_QUEUED (0x00000100) /* queued for downconvert */ | 99 | #define OCFS2_LOCK_QUEUED (0x00000100) /* queued for downconvert */ |
103 | #define OCFS2_LOCK_NOCACHE (0x00000200) /* don't use a holder count */ | 100 | #define OCFS2_LOCK_NOCACHE (0x00000200) /* don't use a holder count */ |
101 | #define OCFS2_LOCK_PENDING (0x00000400) /* This lockres is pending a | ||
102 | call to dlm_lock. Only | ||
103 | exists with BUSY set. */ | ||
104 | 104 | ||
105 | struct ocfs2_lock_res_ops; | 105 | struct ocfs2_lock_res_ops; |
106 | 106 | ||
@@ -120,13 +120,14 @@ struct ocfs2_lock_res { | |||
120 | int l_level; | 120 | int l_level; |
121 | unsigned int l_ro_holders; | 121 | unsigned int l_ro_holders; |
122 | unsigned int l_ex_holders; | 122 | unsigned int l_ex_holders; |
123 | struct dlm_lockstatus l_lksb; | 123 | union ocfs2_dlm_lksb l_lksb; |
124 | 124 | ||
125 | /* used from AST/BAST funcs. */ | 125 | /* used from AST/BAST funcs. */ |
126 | enum ocfs2_ast_action l_action; | 126 | enum ocfs2_ast_action l_action; |
127 | enum ocfs2_unlock_action l_unlock_action; | 127 | enum ocfs2_unlock_action l_unlock_action; |
128 | int l_requested; | 128 | int l_requested; |
129 | int l_blocking; | 129 | int l_blocking; |
130 | unsigned int l_pending_gen; | ||
130 | 131 | ||
131 | wait_queue_head_t l_event; | 132 | wait_queue_head_t l_event; |
132 | 133 | ||
@@ -179,6 +180,8 @@ enum ocfs2_mount_options | |||
179 | #define OCFS2_DEFAULT_ATIME_QUANTUM 60 | 180 | #define OCFS2_DEFAULT_ATIME_QUANTUM 60 |
180 | 181 | ||
181 | struct ocfs2_journal; | 182 | struct ocfs2_journal; |
183 | struct ocfs2_slot_info; | ||
184 | struct ocfs2_recovery_map; | ||
182 | struct ocfs2_super | 185 | struct ocfs2_super |
183 | { | 186 | { |
184 | struct task_struct *commit_task; | 187 | struct task_struct *commit_task; |
@@ -190,7 +193,6 @@ struct ocfs2_super | |||
190 | struct ocfs2_slot_info *slot_info; | 193 | struct ocfs2_slot_info *slot_info; |
191 | 194 | ||
192 | spinlock_t node_map_lock; | 195 | spinlock_t node_map_lock; |
193 | struct ocfs2_node_map recovery_map; | ||
194 | 196 | ||
195 | u64 root_blkno; | 197 | u64 root_blkno; |
196 | u64 system_dir_blkno; | 198 | u64 system_dir_blkno; |
@@ -206,25 +208,29 @@ struct ocfs2_super | |||
206 | u32 s_feature_incompat; | 208 | u32 s_feature_incompat; |
207 | u32 s_feature_ro_compat; | 209 | u32 s_feature_ro_compat; |
208 | 210 | ||
209 | /* Protects s_next_generaion, osb_flags. Could protect more on | 211 | /* Protects s_next_generation, osb_flags and s_inode_steal_slot. |
210 | * osb as it's very short lived. */ | 212 | * Could protect more on osb as it's very short lived. |
213 | */ | ||
211 | spinlock_t osb_lock; | 214 | spinlock_t osb_lock; |
212 | u32 s_next_generation; | 215 | u32 s_next_generation; |
213 | unsigned long osb_flags; | 216 | unsigned long osb_flags; |
217 | s16 s_inode_steal_slot; | ||
218 | atomic_t s_num_inodes_stolen; | ||
214 | 219 | ||
215 | unsigned long s_mount_opt; | 220 | unsigned long s_mount_opt; |
216 | unsigned int s_atime_quantum; | 221 | unsigned int s_atime_quantum; |
217 | 222 | ||
218 | u16 max_slots; | 223 | unsigned int max_slots; |
219 | s16 node_num; | 224 | unsigned int node_num; |
220 | s16 slot_num; | 225 | int slot_num; |
221 | s16 preferred_slot; | 226 | int preferred_slot; |
222 | int s_sectsize_bits; | 227 | int s_sectsize_bits; |
223 | int s_clustersize; | 228 | int s_clustersize; |
224 | int s_clustersize_bits; | 229 | int s_clustersize_bits; |
225 | 230 | ||
226 | atomic_t vol_state; | 231 | atomic_t vol_state; |
227 | struct mutex recovery_lock; | 232 | struct mutex recovery_lock; |
233 | struct ocfs2_recovery_map *recovery_map; | ||
228 | struct task_struct *recovery_thread_task; | 234 | struct task_struct *recovery_thread_task; |
229 | int disable_recovery; | 235 | int disable_recovery; |
230 | wait_queue_head_t checkpoint_event; | 236 | wait_queue_head_t checkpoint_event; |
@@ -245,12 +251,11 @@ struct ocfs2_super | |||
245 | struct ocfs2_alloc_stats alloc_stats; | 251 | struct ocfs2_alloc_stats alloc_stats; |
246 | char dev_str[20]; /* "major,minor" of the device */ | 252 | char dev_str[20]; /* "major,minor" of the device */ |
247 | 253 | ||
248 | struct dlm_ctxt *dlm; | 254 | char osb_cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; |
255 | struct ocfs2_cluster_connection *cconn; | ||
249 | struct ocfs2_lock_res osb_super_lockres; | 256 | struct ocfs2_lock_res osb_super_lockres; |
250 | struct ocfs2_lock_res osb_rename_lockres; | 257 | struct ocfs2_lock_res osb_rename_lockres; |
251 | struct dlm_eviction_cb osb_eviction_cb; | ||
252 | struct ocfs2_dlm_debug *osb_dlm_debug; | 258 | struct ocfs2_dlm_debug *osb_dlm_debug; |
253 | struct dlm_protocol_version osb_locking_proto; | ||
254 | 259 | ||
255 | struct dentry *osb_debug_root; | 260 | struct dentry *osb_debug_root; |
256 | 261 | ||
@@ -367,11 +372,24 @@ static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb) | |||
367 | return ret; | 372 | return ret; |
368 | } | 373 | } |
369 | 374 | ||
375 | static inline int ocfs2_userspace_stack(struct ocfs2_super *osb) | ||
376 | { | ||
377 | return (osb->s_feature_incompat & | ||
378 | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK); | ||
379 | } | ||
380 | |||
370 | static inline int ocfs2_mount_local(struct ocfs2_super *osb) | 381 | static inline int ocfs2_mount_local(struct ocfs2_super *osb) |
371 | { | 382 | { |
372 | return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT); | 383 | return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT); |
373 | } | 384 | } |
374 | 385 | ||
386 | static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb) | ||
387 | { | ||
388 | return (osb->s_feature_incompat & | ||
389 | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP); | ||
390 | } | ||
391 | |||
392 | |||
375 | #define OCFS2_IS_VALID_DINODE(ptr) \ | 393 | #define OCFS2_IS_VALID_DINODE(ptr) \ |
376 | (!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE)) | 394 | (!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE)) |
377 | 395 | ||
@@ -522,6 +540,33 @@ static inline unsigned int ocfs2_pages_per_cluster(struct super_block *sb) | |||
522 | return pages_per_cluster; | 540 | return pages_per_cluster; |
523 | } | 541 | } |
524 | 542 | ||
543 | static inline void ocfs2_init_inode_steal_slot(struct ocfs2_super *osb) | ||
544 | { | ||
545 | spin_lock(&osb->osb_lock); | ||
546 | osb->s_inode_steal_slot = OCFS2_INVALID_SLOT; | ||
547 | spin_unlock(&osb->osb_lock); | ||
548 | atomic_set(&osb->s_num_inodes_stolen, 0); | ||
549 | } | ||
550 | |||
551 | static inline void ocfs2_set_inode_steal_slot(struct ocfs2_super *osb, | ||
552 | s16 slot) | ||
553 | { | ||
554 | spin_lock(&osb->osb_lock); | ||
555 | osb->s_inode_steal_slot = slot; | ||
556 | spin_unlock(&osb->osb_lock); | ||
557 | } | ||
558 | |||
559 | static inline s16 ocfs2_get_inode_steal_slot(struct ocfs2_super *osb) | ||
560 | { | ||
561 | s16 slot; | ||
562 | |||
563 | spin_lock(&osb->osb_lock); | ||
564 | slot = osb->s_inode_steal_slot; | ||
565 | spin_unlock(&osb->osb_lock); | ||
566 | |||
567 | return slot; | ||
568 | } | ||
569 | |||
525 | #define ocfs2_set_bit ext2_set_bit | 570 | #define ocfs2_set_bit ext2_set_bit |
526 | #define ocfs2_clear_bit ext2_clear_bit | 571 | #define ocfs2_clear_bit ext2_clear_bit |
527 | #define ocfs2_test_bit ext2_test_bit | 572 | #define ocfs2_test_bit ext2_test_bit |
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 3633edd3982f..52c426665154 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h | |||
@@ -88,7 +88,9 @@ | |||
88 | #define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB | 88 | #define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB |
89 | #define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \ | 89 | #define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \ |
90 | | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \ | 90 | | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \ |
91 | | OCFS2_FEATURE_INCOMPAT_INLINE_DATA) | 91 | | OCFS2_FEATURE_INCOMPAT_INLINE_DATA \ |
92 | | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \ | ||
93 | | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK) | ||
92 | #define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN | 94 | #define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN |
93 | 95 | ||
94 | /* | 96 | /* |
@@ -125,6 +127,21 @@ | |||
125 | /* Support for data packed into inode blocks */ | 127 | /* Support for data packed into inode blocks */ |
126 | #define OCFS2_FEATURE_INCOMPAT_INLINE_DATA 0x0040 | 128 | #define OCFS2_FEATURE_INCOMPAT_INLINE_DATA 0x0040 |
127 | 129 | ||
130 | /* Support for the extended slot map */ | ||
131 | #define OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP 0x100 | ||
132 | |||
133 | |||
134 | /* | ||
135 | * Support for alternate, userspace cluster stacks. If set, the superblock | ||
136 | * field s_cluster_info contains a tag for the alternate stack in use as | ||
137 | * well as the name of the cluster being joined. | ||
138 | * mount.ocfs2 must pass in a matching stack name. | ||
139 | * | ||
140 | * If not set, the classic stack will be used. This is compatbile with | ||
141 | * all older versions. | ||
142 | */ | ||
143 | #define OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK 0x0080 | ||
144 | |||
128 | /* | 145 | /* |
129 | * backup superblock flag is used to indicate that this volume | 146 | * backup superblock flag is used to indicate that this volume |
130 | * has backup superblocks. | 147 | * has backup superblocks. |
@@ -267,6 +284,10 @@ struct ocfs2_new_group_input { | |||
267 | #define OCFS2_VOL_UUID_LEN 16 | 284 | #define OCFS2_VOL_UUID_LEN 16 |
268 | #define OCFS2_MAX_VOL_LABEL_LEN 64 | 285 | #define OCFS2_MAX_VOL_LABEL_LEN 64 |
269 | 286 | ||
287 | /* The alternate, userspace stack fields */ | ||
288 | #define OCFS2_STACK_LABEL_LEN 4 | ||
289 | #define OCFS2_CLUSTER_NAME_LEN 16 | ||
290 | |||
270 | /* Journal limits (in bytes) */ | 291 | /* Journal limits (in bytes) */ |
271 | #define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) | 292 | #define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) |
272 | 293 | ||
@@ -475,6 +496,47 @@ struct ocfs2_extent_block | |||
475 | }; | 496 | }; |
476 | 497 | ||
477 | /* | 498 | /* |
499 | * On disk slot map for OCFS2. This defines the contents of the "slot_map" | ||
500 | * system file. A slot is valid if it contains a node number >= 0. The | ||
501 | * value -1 (0xFFFF) is OCFS2_INVALID_SLOT. This marks a slot empty. | ||
502 | */ | ||
503 | struct ocfs2_slot_map { | ||
504 | /*00*/ __le16 sm_slots[0]; | ||
505 | /* | ||
506 | * Actual on-disk size is one block. OCFS2_MAX_SLOTS is 255, | ||
507 | * 255 * sizeof(__le16) == 512B, within the 512B block minimum blocksize. | ||
508 | */ | ||
509 | }; | ||
510 | |||
511 | struct ocfs2_extended_slot { | ||
512 | /*00*/ __u8 es_valid; | ||
513 | __u8 es_reserved1[3]; | ||
514 | __le32 es_node_num; | ||
515 | /*10*/ | ||
516 | }; | ||
517 | |||
518 | /* | ||
519 | * The extended slot map, used when OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP | ||
520 | * is set. It separates out the valid marker from the node number, and | ||
521 | * has room to grow. Unlike the old slot map, this format is defined by | ||
522 | * i_size. | ||
523 | */ | ||
524 | struct ocfs2_slot_map_extended { | ||
525 | /*00*/ struct ocfs2_extended_slot se_slots[0]; | ||
526 | /* | ||
527 | * Actual size is i_size of the slot_map system file. It should | ||
528 | * match s_max_slots * sizeof(struct ocfs2_extended_slot) | ||
529 | */ | ||
530 | }; | ||
531 | |||
532 | struct ocfs2_cluster_info { | ||
533 | /*00*/ __u8 ci_stack[OCFS2_STACK_LABEL_LEN]; | ||
534 | __le32 ci_reserved; | ||
535 | /*08*/ __u8 ci_cluster[OCFS2_CLUSTER_NAME_LEN]; | ||
536 | /*18*/ | ||
537 | }; | ||
538 | |||
539 | /* | ||
478 | * On disk superblock for OCFS2 | 540 | * On disk superblock for OCFS2 |
479 | * Note that it is contained inside an ocfs2_dinode, so all offsets | 541 | * Note that it is contained inside an ocfs2_dinode, so all offsets |
480 | * are relative to the start of ocfs2_dinode.id2. | 542 | * are relative to the start of ocfs2_dinode.id2. |
@@ -506,7 +568,20 @@ struct ocfs2_super_block { | |||
506 | * group header */ | 568 | * group header */ |
507 | /*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */ | 569 | /*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */ |
508 | /*90*/ __u8 s_uuid[OCFS2_VOL_UUID_LEN]; /* 128-bit uuid */ | 570 | /*90*/ __u8 s_uuid[OCFS2_VOL_UUID_LEN]; /* 128-bit uuid */ |
509 | /*A0*/ | 571 | /*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Selected userspace |
572 | stack. Only valid | ||
573 | with INCOMPAT flag. */ | ||
574 | /*B8*/ __le64 s_reserved2[17]; /* Fill out superblock */ | ||
575 | /*140*/ | ||
576 | |||
577 | /* | ||
578 | * NOTE: As stated above, all offsets are relative to | ||
579 | * ocfs2_dinode.id2, which is at 0xC0 in the inode. | ||
580 | * 0xC0 + 0x140 = 0x200 or 512 bytes. A superblock must fit within | ||
581 | * our smallest blocksize, which is 512 bytes. To ensure this, | ||
582 | * we reserve the space in s_reserved2. Anything past s_reserved2 | ||
583 | * will not be available on the smallest blocksize. | ||
584 | */ | ||
510 | }; | 585 | }; |
511 | 586 | ||
512 | /* | 587 | /* |
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h index 86f3e3799c2b..82c200f7a8f1 100644 --- a/fs/ocfs2/ocfs2_lockid.h +++ b/fs/ocfs2/ocfs2_lockid.h | |||
@@ -100,7 +100,7 @@ static char *ocfs2_lock_type_strings[] = { | |||
100 | static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type) | 100 | static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type) |
101 | { | 101 | { |
102 | #ifdef __KERNEL__ | 102 | #ifdef __KERNEL__ |
103 | mlog_bug_on_msg(type >= OCFS2_NUM_LOCK_TYPES, "%d\n", type); | 103 | BUG_ON(type >= OCFS2_NUM_LOCK_TYPES); |
104 | #endif | 104 | #endif |
105 | return ocfs2_lock_type_strings[type]; | 105 | return ocfs2_lock_type_strings[type]; |
106 | } | 106 | } |
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index 3a50ce555e64..bb5ff8939bf1 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c | |||
@@ -42,81 +42,244 @@ | |||
42 | 42 | ||
43 | #include "buffer_head_io.h" | 43 | #include "buffer_head_io.h" |
44 | 44 | ||
45 | static s16 __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, | 45 | |
46 | s16 global); | 46 | struct ocfs2_slot { |
47 | static void __ocfs2_fill_slot(struct ocfs2_slot_info *si, | 47 | int sl_valid; |
48 | s16 slot_num, | 48 | unsigned int sl_node_num; |
49 | s16 node_num); | 49 | }; |
50 | 50 | ||
51 | /* post the slot information on disk into our slot_info struct. */ | 51 | struct ocfs2_slot_info { |
52 | void ocfs2_update_slot_info(struct ocfs2_slot_info *si) | 52 | int si_extended; |
53 | int si_slots_per_block; | ||
54 | struct inode *si_inode; | ||
55 | unsigned int si_blocks; | ||
56 | struct buffer_head **si_bh; | ||
57 | unsigned int si_num_slots; | ||
58 | struct ocfs2_slot *si_slots; | ||
59 | }; | ||
60 | |||
61 | |||
62 | static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, | ||
63 | unsigned int node_num); | ||
64 | |||
65 | static void ocfs2_invalidate_slot(struct ocfs2_slot_info *si, | ||
66 | int slot_num) | ||
67 | { | ||
68 | BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots)); | ||
69 | si->si_slots[slot_num].sl_valid = 0; | ||
70 | } | ||
71 | |||
72 | static void ocfs2_set_slot(struct ocfs2_slot_info *si, | ||
73 | int slot_num, unsigned int node_num) | ||
74 | { | ||
75 | BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots)); | ||
76 | |||
77 | si->si_slots[slot_num].sl_valid = 1; | ||
78 | si->si_slots[slot_num].sl_node_num = node_num; | ||
79 | } | ||
80 | |||
81 | /* This version is for the extended slot map */ | ||
82 | static void ocfs2_update_slot_info_extended(struct ocfs2_slot_info *si) | ||
83 | { | ||
84 | int b, i, slotno; | ||
85 | struct ocfs2_slot_map_extended *se; | ||
86 | |||
87 | slotno = 0; | ||
88 | for (b = 0; b < si->si_blocks; b++) { | ||
89 | se = (struct ocfs2_slot_map_extended *)si->si_bh[b]->b_data; | ||
90 | for (i = 0; | ||
91 | (i < si->si_slots_per_block) && | ||
92 | (slotno < si->si_num_slots); | ||
93 | i++, slotno++) { | ||
94 | if (se->se_slots[i].es_valid) | ||
95 | ocfs2_set_slot(si, slotno, | ||
96 | le32_to_cpu(se->se_slots[i].es_node_num)); | ||
97 | else | ||
98 | ocfs2_invalidate_slot(si, slotno); | ||
99 | } | ||
100 | } | ||
101 | } | ||
102 | |||
103 | /* | ||
104 | * Post the slot information on disk into our slot_info struct. | ||
105 | * Must be protected by osb_lock. | ||
106 | */ | ||
107 | static void ocfs2_update_slot_info_old(struct ocfs2_slot_info *si) | ||
53 | { | 108 | { |
54 | int i; | 109 | int i; |
55 | __le16 *disk_info; | 110 | struct ocfs2_slot_map *sm; |
56 | 111 | ||
57 | /* we don't read the slot block here as ocfs2_super_lock | 112 | sm = (struct ocfs2_slot_map *)si->si_bh[0]->b_data; |
58 | * should've made sure we have the most recent copy. */ | ||
59 | spin_lock(&si->si_lock); | ||
60 | disk_info = (__le16 *) si->si_bh->b_data; | ||
61 | 113 | ||
62 | for (i = 0; i < si->si_size; i++) | 114 | for (i = 0; i < si->si_num_slots; i++) { |
63 | si->si_global_node_nums[i] = le16_to_cpu(disk_info[i]); | 115 | if (le16_to_cpu(sm->sm_slots[i]) == (u16)OCFS2_INVALID_SLOT) |
116 | ocfs2_invalidate_slot(si, i); | ||
117 | else | ||
118 | ocfs2_set_slot(si, i, le16_to_cpu(sm->sm_slots[i])); | ||
119 | } | ||
120 | } | ||
64 | 121 | ||
65 | spin_unlock(&si->si_lock); | 122 | static void ocfs2_update_slot_info(struct ocfs2_slot_info *si) |
123 | { | ||
124 | /* | ||
125 | * The slot data will have been refreshed when ocfs2_super_lock | ||
126 | * was taken. | ||
127 | */ | ||
128 | if (si->si_extended) | ||
129 | ocfs2_update_slot_info_extended(si); | ||
130 | else | ||
131 | ocfs2_update_slot_info_old(si); | ||
132 | } | ||
133 | |||
134 | int ocfs2_refresh_slot_info(struct ocfs2_super *osb) | ||
135 | { | ||
136 | int ret; | ||
137 | struct ocfs2_slot_info *si = osb->slot_info; | ||
138 | |||
139 | if (si == NULL) | ||
140 | return 0; | ||
141 | |||
142 | BUG_ON(si->si_blocks == 0); | ||
143 | BUG_ON(si->si_bh == NULL); | ||
144 | |||
145 | mlog(0, "Refreshing slot map, reading %u block(s)\n", | ||
146 | si->si_blocks); | ||
147 | |||
148 | /* | ||
149 | * We pass -1 as blocknr because we expect all of si->si_bh to | ||
150 | * be !NULL. Thus, ocfs2_read_blocks() will ignore blocknr. If | ||
151 | * this is not true, the read of -1 (UINT64_MAX) will fail. | ||
152 | */ | ||
153 | ret = ocfs2_read_blocks(osb, -1, si->si_blocks, si->si_bh, 0, | ||
154 | si->si_inode); | ||
155 | if (ret == 0) { | ||
156 | spin_lock(&osb->osb_lock); | ||
157 | ocfs2_update_slot_info(si); | ||
158 | spin_unlock(&osb->osb_lock); | ||
159 | } | ||
160 | |||
161 | return ret; | ||
66 | } | 162 | } |
67 | 163 | ||
68 | /* post the our slot info stuff into it's destination bh and write it | 164 | /* post the our slot info stuff into it's destination bh and write it |
69 | * out. */ | 165 | * out. */ |
70 | int ocfs2_update_disk_slots(struct ocfs2_super *osb, | 166 | static void ocfs2_update_disk_slot_extended(struct ocfs2_slot_info *si, |
71 | struct ocfs2_slot_info *si) | 167 | int slot_num, |
168 | struct buffer_head **bh) | ||
72 | { | 169 | { |
73 | int status, i; | 170 | int blkind = slot_num / si->si_slots_per_block; |
74 | __le16 *disk_info = (__le16 *) si->si_bh->b_data; | 171 | int slotno = slot_num % si->si_slots_per_block; |
172 | struct ocfs2_slot_map_extended *se; | ||
173 | |||
174 | BUG_ON(blkind >= si->si_blocks); | ||
175 | |||
176 | se = (struct ocfs2_slot_map_extended *)si->si_bh[blkind]->b_data; | ||
177 | se->se_slots[slotno].es_valid = si->si_slots[slot_num].sl_valid; | ||
178 | if (si->si_slots[slot_num].sl_valid) | ||
179 | se->se_slots[slotno].es_node_num = | ||
180 | cpu_to_le32(si->si_slots[slot_num].sl_node_num); | ||
181 | *bh = si->si_bh[blkind]; | ||
182 | } | ||
75 | 183 | ||
76 | spin_lock(&si->si_lock); | 184 | static void ocfs2_update_disk_slot_old(struct ocfs2_slot_info *si, |
77 | for (i = 0; i < si->si_size; i++) | 185 | int slot_num, |
78 | disk_info[i] = cpu_to_le16(si->si_global_node_nums[i]); | 186 | struct buffer_head **bh) |
79 | spin_unlock(&si->si_lock); | 187 | { |
188 | int i; | ||
189 | struct ocfs2_slot_map *sm; | ||
190 | |||
191 | sm = (struct ocfs2_slot_map *)si->si_bh[0]->b_data; | ||
192 | for (i = 0; i < si->si_num_slots; i++) { | ||
193 | if (si->si_slots[i].sl_valid) | ||
194 | sm->sm_slots[i] = | ||
195 | cpu_to_le16(si->si_slots[i].sl_node_num); | ||
196 | else | ||
197 | sm->sm_slots[i] = cpu_to_le16(OCFS2_INVALID_SLOT); | ||
198 | } | ||
199 | *bh = si->si_bh[0]; | ||
200 | } | ||
201 | |||
202 | static int ocfs2_update_disk_slot(struct ocfs2_super *osb, | ||
203 | struct ocfs2_slot_info *si, | ||
204 | int slot_num) | ||
205 | { | ||
206 | int status; | ||
207 | struct buffer_head *bh; | ||
208 | |||
209 | spin_lock(&osb->osb_lock); | ||
210 | if (si->si_extended) | ||
211 | ocfs2_update_disk_slot_extended(si, slot_num, &bh); | ||
212 | else | ||
213 | ocfs2_update_disk_slot_old(si, slot_num, &bh); | ||
214 | spin_unlock(&osb->osb_lock); | ||
80 | 215 | ||
81 | status = ocfs2_write_block(osb, si->si_bh, si->si_inode); | 216 | status = ocfs2_write_block(osb, bh, si->si_inode); |
82 | if (status < 0) | 217 | if (status < 0) |
83 | mlog_errno(status); | 218 | mlog_errno(status); |
84 | 219 | ||
85 | return status; | 220 | return status; |
86 | } | 221 | } |
87 | 222 | ||
88 | /* try to find global node in the slot info. Returns | 223 | /* |
89 | * OCFS2_INVALID_SLOT if nothing is found. */ | 224 | * Calculate how many bytes are needed by the slot map. Returns |
90 | static s16 __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, | 225 | * an error if the slot map file is too small. |
91 | s16 global) | 226 | */ |
227 | static int ocfs2_slot_map_physical_size(struct ocfs2_super *osb, | ||
228 | struct inode *inode, | ||
229 | unsigned long long *bytes) | ||
92 | { | 230 | { |
93 | int i; | 231 | unsigned long long bytes_needed; |
94 | s16 ret = OCFS2_INVALID_SLOT; | 232 | |
233 | if (ocfs2_uses_extended_slot_map(osb)) { | ||
234 | bytes_needed = osb->max_slots * | ||
235 | sizeof(struct ocfs2_extended_slot); | ||
236 | } else { | ||
237 | bytes_needed = osb->max_slots * sizeof(__le16); | ||
238 | } | ||
239 | if (bytes_needed > i_size_read(inode)) { | ||
240 | mlog(ML_ERROR, | ||
241 | "Slot map file is too small! (size %llu, needed %llu)\n", | ||
242 | i_size_read(inode), bytes_needed); | ||
243 | return -ENOSPC; | ||
244 | } | ||
245 | |||
246 | *bytes = bytes_needed; | ||
247 | return 0; | ||
248 | } | ||
249 | |||
250 | /* try to find global node in the slot info. Returns -ENOENT | ||
251 | * if nothing is found. */ | ||
252 | static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, | ||
253 | unsigned int node_num) | ||
254 | { | ||
255 | int i, ret = -ENOENT; | ||
95 | 256 | ||
96 | for(i = 0; i < si->si_num_slots; i++) { | 257 | for(i = 0; i < si->si_num_slots; i++) { |
97 | if (global == si->si_global_node_nums[i]) { | 258 | if (si->si_slots[i].sl_valid && |
98 | ret = (s16) i; | 259 | (node_num == si->si_slots[i].sl_node_num)) { |
260 | ret = i; | ||
99 | break; | 261 | break; |
100 | } | 262 | } |
101 | } | 263 | } |
264 | |||
102 | return ret; | 265 | return ret; |
103 | } | 266 | } |
104 | 267 | ||
105 | static s16 __ocfs2_find_empty_slot(struct ocfs2_slot_info *si, s16 preferred) | 268 | static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si, |
269 | int preferred) | ||
106 | { | 270 | { |
107 | int i; | 271 | int i, ret = -ENOSPC; |
108 | s16 ret = OCFS2_INVALID_SLOT; | ||
109 | 272 | ||
110 | if (preferred >= 0 && preferred < si->si_num_slots) { | 273 | if ((preferred >= 0) && (preferred < si->si_num_slots)) { |
111 | if (OCFS2_INVALID_SLOT == si->si_global_node_nums[preferred]) { | 274 | if (!si->si_slots[preferred].sl_valid) { |
112 | ret = preferred; | 275 | ret = preferred; |
113 | goto out; | 276 | goto out; |
114 | } | 277 | } |
115 | } | 278 | } |
116 | 279 | ||
117 | for(i = 0; i < si->si_num_slots; i++) { | 280 | for(i = 0; i < si->si_num_slots; i++) { |
118 | if (OCFS2_INVALID_SLOT == si->si_global_node_nums[i]) { | 281 | if (!si->si_slots[i].sl_valid) { |
119 | ret = (s16) i; | 282 | ret = i; |
120 | break; | 283 | break; |
121 | } | 284 | } |
122 | } | 285 | } |
@@ -124,58 +287,155 @@ out: | |||
124 | return ret; | 287 | return ret; |
125 | } | 288 | } |
126 | 289 | ||
127 | s16 ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, | 290 | int ocfs2_node_num_to_slot(struct ocfs2_super *osb, unsigned int node_num) |
128 | s16 global) | ||
129 | { | 291 | { |
130 | s16 ret; | 292 | int slot; |
293 | struct ocfs2_slot_info *si = osb->slot_info; | ||
131 | 294 | ||
132 | spin_lock(&si->si_lock); | 295 | spin_lock(&osb->osb_lock); |
133 | ret = __ocfs2_node_num_to_slot(si, global); | 296 | slot = __ocfs2_node_num_to_slot(si, node_num); |
134 | spin_unlock(&si->si_lock); | 297 | spin_unlock(&osb->osb_lock); |
135 | return ret; | 298 | |
299 | return slot; | ||
300 | } | ||
301 | |||
302 | int ocfs2_slot_to_node_num_locked(struct ocfs2_super *osb, int slot_num, | ||
303 | unsigned int *node_num) | ||
304 | { | ||
305 | struct ocfs2_slot_info *si = osb->slot_info; | ||
306 | |||
307 | assert_spin_locked(&osb->osb_lock); | ||
308 | |||
309 | BUG_ON(slot_num < 0); | ||
310 | BUG_ON(slot_num > osb->max_slots); | ||
311 | |||
312 | if (!si->si_slots[slot_num].sl_valid) | ||
313 | return -ENOENT; | ||
314 | |||
315 | *node_num = si->si_slots[slot_num].sl_node_num; | ||
316 | return 0; | ||
136 | } | 317 | } |
137 | 318 | ||
138 | static void __ocfs2_fill_slot(struct ocfs2_slot_info *si, | 319 | static void __ocfs2_free_slot_info(struct ocfs2_slot_info *si) |
139 | s16 slot_num, | ||
140 | s16 node_num) | ||
141 | { | 320 | { |
142 | BUG_ON(slot_num == OCFS2_INVALID_SLOT); | 321 | unsigned int i; |
143 | BUG_ON(slot_num >= si->si_num_slots); | 322 | |
144 | BUG_ON((node_num != O2NM_INVALID_NODE_NUM) && | 323 | if (si == NULL) |
145 | (node_num >= O2NM_MAX_NODES)); | 324 | return; |
325 | |||
326 | if (si->si_inode) | ||
327 | iput(si->si_inode); | ||
328 | if (si->si_bh) { | ||
329 | for (i = 0; i < si->si_blocks; i++) { | ||
330 | if (si->si_bh[i]) { | ||
331 | brelse(si->si_bh[i]); | ||
332 | si->si_bh[i] = NULL; | ||
333 | } | ||
334 | } | ||
335 | kfree(si->si_bh); | ||
336 | } | ||
146 | 337 | ||
147 | si->si_global_node_nums[slot_num] = node_num; | 338 | kfree(si); |
148 | } | 339 | } |
149 | 340 | ||
150 | void ocfs2_clear_slot(struct ocfs2_slot_info *si, | 341 | int ocfs2_clear_slot(struct ocfs2_super *osb, int slot_num) |
151 | s16 slot_num) | ||
152 | { | 342 | { |
153 | spin_lock(&si->si_lock); | 343 | struct ocfs2_slot_info *si = osb->slot_info; |
154 | __ocfs2_fill_slot(si, slot_num, OCFS2_INVALID_SLOT); | 344 | |
155 | spin_unlock(&si->si_lock); | 345 | if (si == NULL) |
346 | return 0; | ||
347 | |||
348 | spin_lock(&osb->osb_lock); | ||
349 | ocfs2_invalidate_slot(si, slot_num); | ||
350 | spin_unlock(&osb->osb_lock); | ||
351 | |||
352 | return ocfs2_update_disk_slot(osb, osb->slot_info, slot_num); | ||
156 | } | 353 | } |
157 | 354 | ||
158 | int ocfs2_init_slot_info(struct ocfs2_super *osb) | 355 | static int ocfs2_map_slot_buffers(struct ocfs2_super *osb, |
356 | struct ocfs2_slot_info *si) | ||
159 | { | 357 | { |
160 | int status, i; | 358 | int status = 0; |
161 | u64 blkno; | 359 | u64 blkno; |
360 | unsigned long long blocks, bytes; | ||
361 | unsigned int i; | ||
362 | struct buffer_head *bh; | ||
363 | |||
364 | status = ocfs2_slot_map_physical_size(osb, si->si_inode, &bytes); | ||
365 | if (status) | ||
366 | goto bail; | ||
367 | |||
368 | blocks = ocfs2_blocks_for_bytes(si->si_inode->i_sb, bytes); | ||
369 | BUG_ON(blocks > UINT_MAX); | ||
370 | si->si_blocks = blocks; | ||
371 | if (!si->si_blocks) | ||
372 | goto bail; | ||
373 | |||
374 | if (si->si_extended) | ||
375 | si->si_slots_per_block = | ||
376 | (osb->sb->s_blocksize / | ||
377 | sizeof(struct ocfs2_extended_slot)); | ||
378 | else | ||
379 | si->si_slots_per_block = osb->sb->s_blocksize / sizeof(__le16); | ||
380 | |||
381 | /* The size checks above should ensure this */ | ||
382 | BUG_ON((osb->max_slots / si->si_slots_per_block) > blocks); | ||
383 | |||
384 | mlog(0, "Slot map needs %u buffers for %llu bytes\n", | ||
385 | si->si_blocks, bytes); | ||
386 | |||
387 | si->si_bh = kzalloc(sizeof(struct buffer_head *) * si->si_blocks, | ||
388 | GFP_KERNEL); | ||
389 | if (!si->si_bh) { | ||
390 | status = -ENOMEM; | ||
391 | mlog_errno(status); | ||
392 | goto bail; | ||
393 | } | ||
394 | |||
395 | for (i = 0; i < si->si_blocks; i++) { | ||
396 | status = ocfs2_extent_map_get_blocks(si->si_inode, i, | ||
397 | &blkno, NULL, NULL); | ||
398 | if (status < 0) { | ||
399 | mlog_errno(status); | ||
400 | goto bail; | ||
401 | } | ||
402 | |||
403 | mlog(0, "Reading slot map block %u at %llu\n", i, | ||
404 | (unsigned long long)blkno); | ||
405 | |||
406 | bh = NULL; /* Acquire a fresh bh */ | ||
407 | status = ocfs2_read_block(osb, blkno, &bh, 0, si->si_inode); | ||
408 | if (status < 0) { | ||
409 | mlog_errno(status); | ||
410 | goto bail; | ||
411 | } | ||
412 | |||
413 | si->si_bh[i] = bh; | ||
414 | } | ||
415 | |||
416 | bail: | ||
417 | return status; | ||
418 | } | ||
419 | |||
420 | int ocfs2_init_slot_info(struct ocfs2_super *osb) | ||
421 | { | ||
422 | int status; | ||
162 | struct inode *inode = NULL; | 423 | struct inode *inode = NULL; |
163 | struct buffer_head *bh = NULL; | ||
164 | struct ocfs2_slot_info *si; | 424 | struct ocfs2_slot_info *si; |
165 | 425 | ||
166 | si = kzalloc(sizeof(struct ocfs2_slot_info), GFP_KERNEL); | 426 | si = kzalloc(sizeof(struct ocfs2_slot_info) + |
427 | (sizeof(struct ocfs2_slot) * osb->max_slots), | ||
428 | GFP_KERNEL); | ||
167 | if (!si) { | 429 | if (!si) { |
168 | status = -ENOMEM; | 430 | status = -ENOMEM; |
169 | mlog_errno(status); | 431 | mlog_errno(status); |
170 | goto bail; | 432 | goto bail; |
171 | } | 433 | } |
172 | 434 | ||
173 | spin_lock_init(&si->si_lock); | 435 | si->si_extended = ocfs2_uses_extended_slot_map(osb); |
174 | si->si_num_slots = osb->max_slots; | 436 | si->si_num_slots = osb->max_slots; |
175 | si->si_size = OCFS2_MAX_SLOTS; | 437 | si->si_slots = (struct ocfs2_slot *)((char *)si + |
176 | 438 | sizeof(struct ocfs2_slot_info)); | |
177 | for(i = 0; i < si->si_num_slots; i++) | ||
178 | si->si_global_node_nums[i] = OCFS2_INVALID_SLOT; | ||
179 | 439 | ||
180 | inode = ocfs2_get_system_file_inode(osb, SLOT_MAP_SYSTEM_INODE, | 440 | inode = ocfs2_get_system_file_inode(osb, SLOT_MAP_SYSTEM_INODE, |
181 | OCFS2_INVALID_SLOT); | 441 | OCFS2_INVALID_SLOT); |
@@ -185,61 +445,53 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb) | |||
185 | goto bail; | 445 | goto bail; |
186 | } | 446 | } |
187 | 447 | ||
188 | status = ocfs2_extent_map_get_blocks(inode, 0ULL, &blkno, NULL, NULL); | 448 | si->si_inode = inode; |
189 | if (status < 0) { | 449 | status = ocfs2_map_slot_buffers(osb, si); |
190 | mlog_errno(status); | ||
191 | goto bail; | ||
192 | } | ||
193 | |||
194 | status = ocfs2_read_block(osb, blkno, &bh, 0, inode); | ||
195 | if (status < 0) { | 450 | if (status < 0) { |
196 | mlog_errno(status); | 451 | mlog_errno(status); |
197 | goto bail; | 452 | goto bail; |
198 | } | 453 | } |
199 | 454 | ||
200 | si->si_inode = inode; | 455 | osb->slot_info = (struct ocfs2_slot_info *)si; |
201 | si->si_bh = bh; | ||
202 | osb->slot_info = si; | ||
203 | bail: | 456 | bail: |
204 | if (status < 0 && si) | 457 | if (status < 0 && si) |
205 | ocfs2_free_slot_info(si); | 458 | __ocfs2_free_slot_info(si); |
206 | 459 | ||
207 | return status; | 460 | return status; |
208 | } | 461 | } |
209 | 462 | ||
210 | void ocfs2_free_slot_info(struct ocfs2_slot_info *si) | 463 | void ocfs2_free_slot_info(struct ocfs2_super *osb) |
211 | { | 464 | { |
212 | if (si->si_inode) | 465 | struct ocfs2_slot_info *si = osb->slot_info; |
213 | iput(si->si_inode); | 466 | |
214 | if (si->si_bh) | 467 | osb->slot_info = NULL; |
215 | brelse(si->si_bh); | 468 | __ocfs2_free_slot_info(si); |
216 | kfree(si); | ||
217 | } | 469 | } |
218 | 470 | ||
219 | int ocfs2_find_slot(struct ocfs2_super *osb) | 471 | int ocfs2_find_slot(struct ocfs2_super *osb) |
220 | { | 472 | { |
221 | int status; | 473 | int status; |
222 | s16 slot; | 474 | int slot; |
223 | struct ocfs2_slot_info *si; | 475 | struct ocfs2_slot_info *si; |
224 | 476 | ||
225 | mlog_entry_void(); | 477 | mlog_entry_void(); |
226 | 478 | ||
227 | si = osb->slot_info; | 479 | si = osb->slot_info; |
228 | 480 | ||
481 | spin_lock(&osb->osb_lock); | ||
229 | ocfs2_update_slot_info(si); | 482 | ocfs2_update_slot_info(si); |
230 | 483 | ||
231 | spin_lock(&si->si_lock); | ||
232 | /* search for ourselves first and take the slot if it already | 484 | /* search for ourselves first and take the slot if it already |
233 | * exists. Perhaps we need to mark this in a variable for our | 485 | * exists. Perhaps we need to mark this in a variable for our |
234 | * own journal recovery? Possibly not, though we certainly | 486 | * own journal recovery? Possibly not, though we certainly |
235 | * need to warn to the user */ | 487 | * need to warn to the user */ |
236 | slot = __ocfs2_node_num_to_slot(si, osb->node_num); | 488 | slot = __ocfs2_node_num_to_slot(si, osb->node_num); |
237 | if (slot == OCFS2_INVALID_SLOT) { | 489 | if (slot < 0) { |
238 | /* if no slot yet, then just take 1st available | 490 | /* if no slot yet, then just take 1st available |
239 | * one. */ | 491 | * one. */ |
240 | slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); | 492 | slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); |
241 | if (slot == OCFS2_INVALID_SLOT) { | 493 | if (slot < 0) { |
242 | spin_unlock(&si->si_lock); | 494 | spin_unlock(&osb->osb_lock); |
243 | mlog(ML_ERROR, "no free slots available!\n"); | 495 | mlog(ML_ERROR, "no free slots available!\n"); |
244 | status = -EINVAL; | 496 | status = -EINVAL; |
245 | goto bail; | 497 | goto bail; |
@@ -248,13 +500,13 @@ int ocfs2_find_slot(struct ocfs2_super *osb) | |||
248 | mlog(ML_NOTICE, "slot %d is already allocated to this node!\n", | 500 | mlog(ML_NOTICE, "slot %d is already allocated to this node!\n", |
249 | slot); | 501 | slot); |
250 | 502 | ||
251 | __ocfs2_fill_slot(si, slot, osb->node_num); | 503 | ocfs2_set_slot(si, slot, osb->node_num); |
252 | osb->slot_num = slot; | 504 | osb->slot_num = slot; |
253 | spin_unlock(&si->si_lock); | 505 | spin_unlock(&osb->osb_lock); |
254 | 506 | ||
255 | mlog(0, "taking node slot %d\n", osb->slot_num); | 507 | mlog(0, "taking node slot %d\n", osb->slot_num); |
256 | 508 | ||
257 | status = ocfs2_update_disk_slots(osb, si); | 509 | status = ocfs2_update_disk_slot(osb, si, osb->slot_num); |
258 | if (status < 0) | 510 | if (status < 0) |
259 | mlog_errno(status); | 511 | mlog_errno(status); |
260 | 512 | ||
@@ -265,27 +517,27 @@ bail: | |||
265 | 517 | ||
266 | void ocfs2_put_slot(struct ocfs2_super *osb) | 518 | void ocfs2_put_slot(struct ocfs2_super *osb) |
267 | { | 519 | { |
268 | int status; | 520 | int status, slot_num; |
269 | struct ocfs2_slot_info *si = osb->slot_info; | 521 | struct ocfs2_slot_info *si = osb->slot_info; |
270 | 522 | ||
271 | if (!si) | 523 | if (!si) |
272 | return; | 524 | return; |
273 | 525 | ||
526 | spin_lock(&osb->osb_lock); | ||
274 | ocfs2_update_slot_info(si); | 527 | ocfs2_update_slot_info(si); |
275 | 528 | ||
276 | spin_lock(&si->si_lock); | 529 | slot_num = osb->slot_num; |
277 | __ocfs2_fill_slot(si, osb->slot_num, OCFS2_INVALID_SLOT); | 530 | ocfs2_invalidate_slot(si, osb->slot_num); |
278 | osb->slot_num = OCFS2_INVALID_SLOT; | 531 | osb->slot_num = OCFS2_INVALID_SLOT; |
279 | spin_unlock(&si->si_lock); | 532 | spin_unlock(&osb->osb_lock); |
280 | 533 | ||
281 | status = ocfs2_update_disk_slots(osb, si); | 534 | status = ocfs2_update_disk_slot(osb, si, slot_num); |
282 | if (status < 0) { | 535 | if (status < 0) { |
283 | mlog_errno(status); | 536 | mlog_errno(status); |
284 | goto bail; | 537 | goto bail; |
285 | } | 538 | } |
286 | 539 | ||
287 | bail: | 540 | bail: |
288 | osb->slot_info = NULL; | 541 | ocfs2_free_slot_info(osb); |
289 | ocfs2_free_slot_info(si); | ||
290 | } | 542 | } |
291 | 543 | ||
diff --git a/fs/ocfs2/slot_map.h b/fs/ocfs2/slot_map.h index 1025872aaade..601c95fd7003 100644 --- a/fs/ocfs2/slot_map.h +++ b/fs/ocfs2/slot_map.h | |||
@@ -27,38 +27,18 @@ | |||
27 | #ifndef SLOTMAP_H | 27 | #ifndef SLOTMAP_H |
28 | #define SLOTMAP_H | 28 | #define SLOTMAP_H |
29 | 29 | ||
30 | struct ocfs2_slot_info { | ||
31 | spinlock_t si_lock; | ||
32 | |||
33 | struct inode *si_inode; | ||
34 | struct buffer_head *si_bh; | ||
35 | unsigned int si_num_slots; | ||
36 | unsigned int si_size; | ||
37 | s16 si_global_node_nums[OCFS2_MAX_SLOTS]; | ||
38 | }; | ||
39 | |||
40 | int ocfs2_init_slot_info(struct ocfs2_super *osb); | 30 | int ocfs2_init_slot_info(struct ocfs2_super *osb); |
41 | void ocfs2_free_slot_info(struct ocfs2_slot_info *si); | 31 | void ocfs2_free_slot_info(struct ocfs2_super *osb); |
42 | 32 | ||
43 | int ocfs2_find_slot(struct ocfs2_super *osb); | 33 | int ocfs2_find_slot(struct ocfs2_super *osb); |
44 | void ocfs2_put_slot(struct ocfs2_super *osb); | 34 | void ocfs2_put_slot(struct ocfs2_super *osb); |
45 | 35 | ||
46 | void ocfs2_update_slot_info(struct ocfs2_slot_info *si); | 36 | int ocfs2_refresh_slot_info(struct ocfs2_super *osb); |
47 | int ocfs2_update_disk_slots(struct ocfs2_super *osb, | ||
48 | struct ocfs2_slot_info *si); | ||
49 | |||
50 | s16 ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, | ||
51 | s16 global); | ||
52 | void ocfs2_clear_slot(struct ocfs2_slot_info *si, | ||
53 | s16 slot_num); | ||
54 | 37 | ||
55 | static inline int ocfs2_is_empty_slot(struct ocfs2_slot_info *si, | 38 | int ocfs2_node_num_to_slot(struct ocfs2_super *osb, unsigned int node_num); |
56 | int slot_num) | 39 | int ocfs2_slot_to_node_num_locked(struct ocfs2_super *osb, int slot_num, |
57 | { | 40 | unsigned int *node_num); |
58 | BUG_ON(slot_num == OCFS2_INVALID_SLOT); | ||
59 | assert_spin_locked(&si->si_lock); | ||
60 | 41 | ||
61 | return si->si_global_node_nums[slot_num] == OCFS2_INVALID_SLOT; | 42 | int ocfs2_clear_slot(struct ocfs2_super *osb, int slot_num); |
62 | } | ||
63 | 43 | ||
64 | #endif | 44 | #endif |
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c new file mode 100644 index 000000000000..ac1d74c63bf5 --- /dev/null +++ b/fs/ocfs2/stack_o2cb.c | |||
@@ -0,0 +1,420 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * stack_o2cb.c | ||
5 | * | ||
6 | * Code which interfaces ocfs2 with the o2cb stack. | ||
7 | * | ||
8 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License as published by the Free Software Foundation, version 2. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
17 | * General Public License for more details. | ||
18 | */ | ||
19 | |||
20 | #include <linux/crc32.h> | ||
21 | #include <linux/module.h> | ||
22 | |||
23 | /* Needed for AOP_TRUNCATED_PAGE in mlog_errno() */ | ||
24 | #include <linux/fs.h> | ||
25 | |||
26 | #include "cluster/masklog.h" | ||
27 | #include "cluster/nodemanager.h" | ||
28 | #include "cluster/heartbeat.h" | ||
29 | |||
30 | #include "stackglue.h" | ||
31 | |||
32 | struct o2dlm_private { | ||
33 | struct dlm_eviction_cb op_eviction_cb; | ||
34 | }; | ||
35 | |||
36 | static struct ocfs2_stack_plugin o2cb_stack; | ||
37 | |||
38 | /* These should be identical */ | ||
39 | #if (DLM_LOCK_IV != LKM_IVMODE) | ||
40 | # error Lock modes do not match | ||
41 | #endif | ||
42 | #if (DLM_LOCK_NL != LKM_NLMODE) | ||
43 | # error Lock modes do not match | ||
44 | #endif | ||
45 | #if (DLM_LOCK_CR != LKM_CRMODE) | ||
46 | # error Lock modes do not match | ||
47 | #endif | ||
48 | #if (DLM_LOCK_CW != LKM_CWMODE) | ||
49 | # error Lock modes do not match | ||
50 | #endif | ||
51 | #if (DLM_LOCK_PR != LKM_PRMODE) | ||
52 | # error Lock modes do not match | ||
53 | #endif | ||
54 | #if (DLM_LOCK_PW != LKM_PWMODE) | ||
55 | # error Lock modes do not match | ||
56 | #endif | ||
57 | #if (DLM_LOCK_EX != LKM_EXMODE) | ||
58 | # error Lock modes do not match | ||
59 | #endif | ||
60 | static inline int mode_to_o2dlm(int mode) | ||
61 | { | ||
62 | BUG_ON(mode > LKM_MAXMODE); | ||
63 | |||
64 | return mode; | ||
65 | } | ||
66 | |||
67 | #define map_flag(_generic, _o2dlm) \ | ||
68 | if (flags & (_generic)) { \ | ||
69 | flags &= ~(_generic); \ | ||
70 | o2dlm_flags |= (_o2dlm); \ | ||
71 | } | ||
72 | static int flags_to_o2dlm(u32 flags) | ||
73 | { | ||
74 | int o2dlm_flags = 0; | ||
75 | |||
76 | map_flag(DLM_LKF_NOQUEUE, LKM_NOQUEUE); | ||
77 | map_flag(DLM_LKF_CANCEL, LKM_CANCEL); | ||
78 | map_flag(DLM_LKF_CONVERT, LKM_CONVERT); | ||
79 | map_flag(DLM_LKF_VALBLK, LKM_VALBLK); | ||
80 | map_flag(DLM_LKF_IVVALBLK, LKM_INVVALBLK); | ||
81 | map_flag(DLM_LKF_ORPHAN, LKM_ORPHAN); | ||
82 | map_flag(DLM_LKF_FORCEUNLOCK, LKM_FORCE); | ||
83 | map_flag(DLM_LKF_TIMEOUT, LKM_TIMEOUT); | ||
84 | map_flag(DLM_LKF_LOCAL, LKM_LOCAL); | ||
85 | |||
86 | /* map_flag() should have cleared every flag passed in */ | ||
87 | BUG_ON(flags != 0); | ||
88 | |||
89 | return o2dlm_flags; | ||
90 | } | ||
91 | #undef map_flag | ||
92 | |||
93 | /* | ||
94 | * Map an o2dlm status to standard errno values. | ||
95 | * | ||
96 | * o2dlm only uses a handful of these, and returns even fewer to the | ||
97 | * caller. Still, we try to assign sane values to each error. | ||
98 | * | ||
99 | * The following value pairs have special meanings to dlmglue, thus | ||
100 | * the right hand side needs to stay unique - never duplicate the | ||
101 | * mapping elsewhere in the table! | ||
102 | * | ||
103 | * DLM_NORMAL: 0 | ||
104 | * DLM_NOTQUEUED: -EAGAIN | ||
105 | * DLM_CANCELGRANT: -EBUSY | ||
106 | * DLM_CANCEL: -DLM_ECANCEL | ||
107 | */ | ||
108 | /* Keep in sync with dlmapi.h */ | ||
109 | static int status_map[] = { | ||
110 | [DLM_NORMAL] = 0, /* Success */ | ||
111 | [DLM_GRANTED] = -EINVAL, | ||
112 | [DLM_DENIED] = -EACCES, | ||
113 | [DLM_DENIED_NOLOCKS] = -EACCES, | ||
114 | [DLM_WORKING] = -EACCES, | ||
115 | [DLM_BLOCKED] = -EINVAL, | ||
116 | [DLM_BLOCKED_ORPHAN] = -EINVAL, | ||
117 | [DLM_DENIED_GRACE_PERIOD] = -EACCES, | ||
118 | [DLM_SYSERR] = -ENOMEM, /* It is what it is */ | ||
119 | [DLM_NOSUPPORT] = -EPROTO, | ||
120 | [DLM_CANCELGRANT] = -EBUSY, /* Cancel after grant */ | ||
121 | [DLM_IVLOCKID] = -EINVAL, | ||
122 | [DLM_SYNC] = -EINVAL, | ||
123 | [DLM_BADTYPE] = -EINVAL, | ||
124 | [DLM_BADRESOURCE] = -EINVAL, | ||
125 | [DLM_MAXHANDLES] = -ENOMEM, | ||
126 | [DLM_NOCLINFO] = -EINVAL, | ||
127 | [DLM_NOLOCKMGR] = -EINVAL, | ||
128 | [DLM_NOPURGED] = -EINVAL, | ||
129 | [DLM_BADARGS] = -EINVAL, | ||
130 | [DLM_VOID] = -EINVAL, | ||
131 | [DLM_NOTQUEUED] = -EAGAIN, /* Trylock failed */ | ||
132 | [DLM_IVBUFLEN] = -EINVAL, | ||
133 | [DLM_CVTUNGRANT] = -EPERM, | ||
134 | [DLM_BADPARAM] = -EINVAL, | ||
135 | [DLM_VALNOTVALID] = -EINVAL, | ||
136 | [DLM_REJECTED] = -EPERM, | ||
137 | [DLM_ABORT] = -EINVAL, | ||
138 | [DLM_CANCEL] = -DLM_ECANCEL, /* Successful cancel */ | ||
139 | [DLM_IVRESHANDLE] = -EINVAL, | ||
140 | [DLM_DEADLOCK] = -EDEADLK, | ||
141 | [DLM_DENIED_NOASTS] = -EINVAL, | ||
142 | [DLM_FORWARD] = -EINVAL, | ||
143 | [DLM_TIMEOUT] = -ETIMEDOUT, | ||
144 | [DLM_IVGROUPID] = -EINVAL, | ||
145 | [DLM_VERS_CONFLICT] = -EOPNOTSUPP, | ||
146 | [DLM_BAD_DEVICE_PATH] = -ENOENT, | ||
147 | [DLM_NO_DEVICE_PERMISSION] = -EPERM, | ||
148 | [DLM_NO_CONTROL_DEVICE] = -ENOENT, | ||
149 | [DLM_RECOVERING] = -ENOTCONN, | ||
150 | [DLM_MIGRATING] = -ERESTART, | ||
151 | [DLM_MAXSTATS] = -EINVAL, | ||
152 | }; | ||
153 | |||
154 | static int dlm_status_to_errno(enum dlm_status status) | ||
155 | { | ||
156 | BUG_ON(status > (sizeof(status_map) / sizeof(status_map[0]))); | ||
157 | |||
158 | return status_map[status]; | ||
159 | } | ||
160 | |||
161 | static void o2dlm_lock_ast_wrapper(void *astarg) | ||
162 | { | ||
163 | BUG_ON(o2cb_stack.sp_proto == NULL); | ||
164 | |||
165 | o2cb_stack.sp_proto->lp_lock_ast(astarg); | ||
166 | } | ||
167 | |||
168 | static void o2dlm_blocking_ast_wrapper(void *astarg, int level) | ||
169 | { | ||
170 | BUG_ON(o2cb_stack.sp_proto == NULL); | ||
171 | |||
172 | o2cb_stack.sp_proto->lp_blocking_ast(astarg, level); | ||
173 | } | ||
174 | |||
175 | static void o2dlm_unlock_ast_wrapper(void *astarg, enum dlm_status status) | ||
176 | { | ||
177 | int error = dlm_status_to_errno(status); | ||
178 | |||
179 | BUG_ON(o2cb_stack.sp_proto == NULL); | ||
180 | |||
181 | /* | ||
182 | * In o2dlm, you can get both the lock_ast() for the lock being | ||
183 | * granted and the unlock_ast() for the CANCEL failing. A | ||
184 | * successful cancel sends DLM_NORMAL here. If the | ||
185 | * lock grant happened before the cancel arrived, you get | ||
186 | * DLM_CANCELGRANT. | ||
187 | * | ||
188 | * There's no need for the double-ast. If we see DLM_CANCELGRANT, | ||
189 | * we just ignore it. We expect the lock_ast() to handle the | ||
190 | * granted lock. | ||
191 | */ | ||
192 | if (status == DLM_CANCELGRANT) | ||
193 | return; | ||
194 | |||
195 | o2cb_stack.sp_proto->lp_unlock_ast(astarg, error); | ||
196 | } | ||
197 | |||
198 | static int o2cb_dlm_lock(struct ocfs2_cluster_connection *conn, | ||
199 | int mode, | ||
200 | union ocfs2_dlm_lksb *lksb, | ||
201 | u32 flags, | ||
202 | void *name, | ||
203 | unsigned int namelen, | ||
204 | void *astarg) | ||
205 | { | ||
206 | enum dlm_status status; | ||
207 | int o2dlm_mode = mode_to_o2dlm(mode); | ||
208 | int o2dlm_flags = flags_to_o2dlm(flags); | ||
209 | int ret; | ||
210 | |||
211 | status = dlmlock(conn->cc_lockspace, o2dlm_mode, &lksb->lksb_o2dlm, | ||
212 | o2dlm_flags, name, namelen, | ||
213 | o2dlm_lock_ast_wrapper, astarg, | ||
214 | o2dlm_blocking_ast_wrapper); | ||
215 | ret = dlm_status_to_errno(status); | ||
216 | return ret; | ||
217 | } | ||
218 | |||
219 | static int o2cb_dlm_unlock(struct ocfs2_cluster_connection *conn, | ||
220 | union ocfs2_dlm_lksb *lksb, | ||
221 | u32 flags, | ||
222 | void *astarg) | ||
223 | { | ||
224 | enum dlm_status status; | ||
225 | int o2dlm_flags = flags_to_o2dlm(flags); | ||
226 | int ret; | ||
227 | |||
228 | status = dlmunlock(conn->cc_lockspace, &lksb->lksb_o2dlm, | ||
229 | o2dlm_flags, o2dlm_unlock_ast_wrapper, astarg); | ||
230 | ret = dlm_status_to_errno(status); | ||
231 | return ret; | ||
232 | } | ||
233 | |||
234 | static int o2cb_dlm_lock_status(union ocfs2_dlm_lksb *lksb) | ||
235 | { | ||
236 | return dlm_status_to_errno(lksb->lksb_o2dlm.status); | ||
237 | } | ||
238 | |||
239 | static void *o2cb_dlm_lvb(union ocfs2_dlm_lksb *lksb) | ||
240 | { | ||
241 | return (void *)(lksb->lksb_o2dlm.lvb); | ||
242 | } | ||
243 | |||
244 | static void o2cb_dump_lksb(union ocfs2_dlm_lksb *lksb) | ||
245 | { | ||
246 | dlm_print_one_lock(lksb->lksb_o2dlm.lockid); | ||
247 | } | ||
248 | |||
249 | /* | ||
250 | * Called from the dlm when it's about to evict a node. This is how the | ||
251 | * classic stack signals node death. | ||
252 | */ | ||
253 | static void o2dlm_eviction_cb(int node_num, void *data) | ||
254 | { | ||
255 | struct ocfs2_cluster_connection *conn = data; | ||
256 | |||
257 | mlog(ML_NOTICE, "o2dlm has evicted node %d from group %.*s\n", | ||
258 | node_num, conn->cc_namelen, conn->cc_name); | ||
259 | |||
260 | conn->cc_recovery_handler(node_num, conn->cc_recovery_data); | ||
261 | } | ||
262 | |||
263 | static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn) | ||
264 | { | ||
265 | int rc = 0; | ||
266 | u32 dlm_key; | ||
267 | struct dlm_ctxt *dlm; | ||
268 | struct o2dlm_private *priv; | ||
269 | struct dlm_protocol_version dlm_version; | ||
270 | |||
271 | BUG_ON(conn == NULL); | ||
272 | BUG_ON(o2cb_stack.sp_proto == NULL); | ||
273 | |||
274 | /* for now we only have one cluster/node, make sure we see it | ||
275 | * in the heartbeat universe */ | ||
276 | if (!o2hb_check_local_node_heartbeating()) { | ||
277 | rc = -EINVAL; | ||
278 | goto out; | ||
279 | } | ||
280 | |||
281 | priv = kzalloc(sizeof(struct o2dlm_private), GFP_KERNEL); | ||
282 | if (!priv) { | ||
283 | rc = -ENOMEM; | ||
284 | goto out_free; | ||
285 | } | ||
286 | |||
287 | /* This just fills the structure in. It is safe to pass conn. */ | ||
288 | dlm_setup_eviction_cb(&priv->op_eviction_cb, o2dlm_eviction_cb, | ||
289 | conn); | ||
290 | |||
291 | conn->cc_private = priv; | ||
292 | |||
293 | /* used by the dlm code to make message headers unique, each | ||
294 | * node in this domain must agree on this. */ | ||
295 | dlm_key = crc32_le(0, conn->cc_name, conn->cc_namelen); | ||
296 | dlm_version.pv_major = conn->cc_version.pv_major; | ||
297 | dlm_version.pv_minor = conn->cc_version.pv_minor; | ||
298 | |||
299 | dlm = dlm_register_domain(conn->cc_name, dlm_key, &dlm_version); | ||
300 | if (IS_ERR(dlm)) { | ||
301 | rc = PTR_ERR(dlm); | ||
302 | mlog_errno(rc); | ||
303 | goto out_free; | ||
304 | } | ||
305 | |||
306 | conn->cc_version.pv_major = dlm_version.pv_major; | ||
307 | conn->cc_version.pv_minor = dlm_version.pv_minor; | ||
308 | conn->cc_lockspace = dlm; | ||
309 | |||
310 | dlm_register_eviction_cb(dlm, &priv->op_eviction_cb); | ||
311 | |||
312 | out_free: | ||
313 | if (rc && conn->cc_private) | ||
314 | kfree(conn->cc_private); | ||
315 | |||
316 | out: | ||
317 | return rc; | ||
318 | } | ||
319 | |||
320 | static int o2cb_cluster_disconnect(struct ocfs2_cluster_connection *conn, | ||
321 | int hangup_pending) | ||
322 | { | ||
323 | struct dlm_ctxt *dlm = conn->cc_lockspace; | ||
324 | struct o2dlm_private *priv = conn->cc_private; | ||
325 | |||
326 | dlm_unregister_eviction_cb(&priv->op_eviction_cb); | ||
327 | conn->cc_private = NULL; | ||
328 | kfree(priv); | ||
329 | |||
330 | dlm_unregister_domain(dlm); | ||
331 | conn->cc_lockspace = NULL; | ||
332 | |||
333 | return 0; | ||
334 | } | ||
335 | |||
336 | static void o2hb_stop(const char *group) | ||
337 | { | ||
338 | int ret; | ||
339 | char *argv[5], *envp[3]; | ||
340 | |||
341 | argv[0] = (char *)o2nm_get_hb_ctl_path(); | ||
342 | argv[1] = "-K"; | ||
343 | argv[2] = "-u"; | ||
344 | argv[3] = (char *)group; | ||
345 | argv[4] = NULL; | ||
346 | |||
347 | mlog(0, "Run: %s %s %s %s\n", argv[0], argv[1], argv[2], argv[3]); | ||
348 | |||
349 | /* minimal command environment taken from cpu_run_sbin_hotplug */ | ||
350 | envp[0] = "HOME=/"; | ||
351 | envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; | ||
352 | envp[2] = NULL; | ||
353 | |||
354 | ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); | ||
355 | if (ret < 0) | ||
356 | mlog_errno(ret); | ||
357 | } | ||
358 | |||
359 | /* | ||
360 | * Hangup is a hack for tools compatibility. Older ocfs2-tools software | ||
361 | * expects the filesystem to call "ocfs2_hb_ctl" during unmount. This | ||
362 | * happens regardless of whether the DLM got started, so we can't do it | ||
363 | * in ocfs2_cluster_disconnect(). We bring the o2hb_stop() function into | ||
364 | * the glue and provide a "hangup" API for super.c to call. | ||
365 | * | ||
366 | * Other stacks will eventually provide a NULL ->hangup() pointer. | ||
367 | */ | ||
368 | static void o2cb_cluster_hangup(const char *group, int grouplen) | ||
369 | { | ||
370 | o2hb_stop(group); | ||
371 | } | ||
372 | |||
373 | static int o2cb_cluster_this_node(unsigned int *node) | ||
374 | { | ||
375 | int node_num; | ||
376 | |||
377 | node_num = o2nm_this_node(); | ||
378 | if (node_num == O2NM_INVALID_NODE_NUM) | ||
379 | return -ENOENT; | ||
380 | |||
381 | if (node_num >= O2NM_MAX_NODES) | ||
382 | return -EOVERFLOW; | ||
383 | |||
384 | *node = node_num; | ||
385 | return 0; | ||
386 | } | ||
387 | |||
388 | struct ocfs2_stack_operations o2cb_stack_ops = { | ||
389 | .connect = o2cb_cluster_connect, | ||
390 | .disconnect = o2cb_cluster_disconnect, | ||
391 | .hangup = o2cb_cluster_hangup, | ||
392 | .this_node = o2cb_cluster_this_node, | ||
393 | .dlm_lock = o2cb_dlm_lock, | ||
394 | .dlm_unlock = o2cb_dlm_unlock, | ||
395 | .lock_status = o2cb_dlm_lock_status, | ||
396 | .lock_lvb = o2cb_dlm_lvb, | ||
397 | .dump_lksb = o2cb_dump_lksb, | ||
398 | }; | ||
399 | |||
400 | static struct ocfs2_stack_plugin o2cb_stack = { | ||
401 | .sp_name = "o2cb", | ||
402 | .sp_ops = &o2cb_stack_ops, | ||
403 | .sp_owner = THIS_MODULE, | ||
404 | }; | ||
405 | |||
406 | static int __init o2cb_stack_init(void) | ||
407 | { | ||
408 | return ocfs2_stack_glue_register(&o2cb_stack); | ||
409 | } | ||
410 | |||
411 | static void __exit o2cb_stack_exit(void) | ||
412 | { | ||
413 | ocfs2_stack_glue_unregister(&o2cb_stack); | ||
414 | } | ||
415 | |||
416 | MODULE_AUTHOR("Oracle"); | ||
417 | MODULE_DESCRIPTION("ocfs2 driver for the classic o2cb stack"); | ||
418 | MODULE_LICENSE("GPL"); | ||
419 | module_init(o2cb_stack_init); | ||
420 | module_exit(o2cb_stack_exit); | ||
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c new file mode 100644 index 000000000000..7428663f9cbb --- /dev/null +++ b/fs/ocfs2/stack_user.c | |||
@@ -0,0 +1,883 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * stack_user.c | ||
5 | * | ||
6 | * Code which interfaces ocfs2 with fs/dlm and a userspace stack. | ||
7 | * | ||
8 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License as published by the Free Software Foundation, version 2. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
17 | * General Public License for more details. | ||
18 | */ | ||
19 | |||
20 | #include <linux/module.h> | ||
21 | #include <linux/fs.h> | ||
22 | #include <linux/miscdevice.h> | ||
23 | #include <linux/mutex.h> | ||
24 | #include <linux/reboot.h> | ||
25 | #include <asm/uaccess.h> | ||
26 | |||
27 | #include "ocfs2.h" /* For struct ocfs2_lock_res */ | ||
28 | #include "stackglue.h" | ||
29 | |||
30 | |||
31 | /* | ||
32 | * The control protocol starts with a handshake. Until the handshake | ||
33 | * is complete, the control device will fail all write(2)s. | ||
34 | * | ||
35 | * The handshake is simple. First, the client reads until EOF. Each line | ||
36 | * of output is a supported protocol tag. All protocol tags are a single | ||
37 | * character followed by a two hex digit version number. Currently the | ||
38 | * only things supported is T01, for "Text-base version 0x01". Next, the | ||
39 | * client writes the version they would like to use, including the newline. | ||
40 | * Thus, the protocol tag is 'T01\n'. If the version tag written is | ||
41 | * unknown, -EINVAL is returned. Once the negotiation is complete, the | ||
42 | * client can start sending messages. | ||
43 | * | ||
44 | * The T01 protocol has three messages. First is the "SETN" message. | ||
45 | * It has the following syntax: | ||
46 | * | ||
47 | * SETN<space><8-char-hex-nodenum><newline> | ||
48 | * | ||
49 | * This is 14 characters. | ||
50 | * | ||
51 | * The "SETN" message must be the first message following the protocol. | ||
52 | * It tells ocfs2_control the local node number. | ||
53 | * | ||
54 | * Next comes the "SETV" message. It has the following syntax: | ||
55 | * | ||
56 | * SETV<space><2-char-hex-major><space><2-char-hex-minor><newline> | ||
57 | * | ||
58 | * This is 11 characters. | ||
59 | * | ||
60 | * The "SETV" message sets the filesystem locking protocol version as | ||
61 | * negotiated by the client. The client negotiates based on the maximum | ||
62 | * version advertised in /sys/fs/ocfs2/max_locking_protocol. The major | ||
63 | * number from the "SETV" message must match | ||
64 | * user_stack.sp_proto->lp_max_version.pv_major, and the minor number | ||
65 | * must be less than or equal to ...->lp_max_version.pv_minor. | ||
66 | * | ||
67 | * Once this information has been set, mounts will be allowed. From this | ||
68 | * point on, the "DOWN" message can be sent for node down notification. | ||
69 | * It has the following syntax: | ||
70 | * | ||
71 | * DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline> | ||
72 | * | ||
73 | * eg: | ||
74 | * | ||
75 | * DOWN 632A924FDD844190BDA93C0DF6B94899 00000001\n | ||
76 | * | ||
77 | * This is 47 characters. | ||
78 | */ | ||
79 | |||
80 | /* | ||
81 | * Whether or not the client has done the handshake. | ||
82 | * For now, we have just one protocol version. | ||
83 | */ | ||
84 | #define OCFS2_CONTROL_PROTO "T01\n" | ||
85 | #define OCFS2_CONTROL_PROTO_LEN 4 | ||
86 | |||
87 | /* Handshake states */ | ||
88 | #define OCFS2_CONTROL_HANDSHAKE_INVALID (0) | ||
89 | #define OCFS2_CONTROL_HANDSHAKE_READ (1) | ||
90 | #define OCFS2_CONTROL_HANDSHAKE_PROTOCOL (2) | ||
91 | #define OCFS2_CONTROL_HANDSHAKE_VALID (3) | ||
92 | |||
93 | /* Messages */ | ||
94 | #define OCFS2_CONTROL_MESSAGE_OP_LEN 4 | ||
95 | #define OCFS2_CONTROL_MESSAGE_SETNODE_OP "SETN" | ||
96 | #define OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN 14 | ||
97 | #define OCFS2_CONTROL_MESSAGE_SETVERSION_OP "SETV" | ||
98 | #define OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN 11 | ||
99 | #define OCFS2_CONTROL_MESSAGE_DOWN_OP "DOWN" | ||
100 | #define OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN 47 | ||
101 | #define OCFS2_TEXT_UUID_LEN 32 | ||
102 | #define OCFS2_CONTROL_MESSAGE_VERNUM_LEN 2 | ||
103 | #define OCFS2_CONTROL_MESSAGE_NODENUM_LEN 8 | ||
104 | |||
105 | /* | ||
106 | * ocfs2_live_connection is refcounted because the filesystem and | ||
107 | * miscdevice sides can detach in different order. Let's just be safe. | ||
108 | */ | ||
109 | struct ocfs2_live_connection { | ||
110 | struct list_head oc_list; | ||
111 | struct ocfs2_cluster_connection *oc_conn; | ||
112 | }; | ||
113 | |||
114 | struct ocfs2_control_private { | ||
115 | struct list_head op_list; | ||
116 | int op_state; | ||
117 | int op_this_node; | ||
118 | struct ocfs2_protocol_version op_proto; | ||
119 | }; | ||
120 | |||
121 | /* SETN<space><8-char-hex-nodenum><newline> */ | ||
122 | struct ocfs2_control_message_setn { | ||
123 | char tag[OCFS2_CONTROL_MESSAGE_OP_LEN]; | ||
124 | char space; | ||
125 | char nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN]; | ||
126 | char newline; | ||
127 | }; | ||
128 | |||
129 | /* SETV<space><2-char-hex-major><space><2-char-hex-minor><newline> */ | ||
130 | struct ocfs2_control_message_setv { | ||
131 | char tag[OCFS2_CONTROL_MESSAGE_OP_LEN]; | ||
132 | char space1; | ||
133 | char major[OCFS2_CONTROL_MESSAGE_VERNUM_LEN]; | ||
134 | char space2; | ||
135 | char minor[OCFS2_CONTROL_MESSAGE_VERNUM_LEN]; | ||
136 | char newline; | ||
137 | }; | ||
138 | |||
139 | /* DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline> */ | ||
140 | struct ocfs2_control_message_down { | ||
141 | char tag[OCFS2_CONTROL_MESSAGE_OP_LEN]; | ||
142 | char space1; | ||
143 | char uuid[OCFS2_TEXT_UUID_LEN]; | ||
144 | char space2; | ||
145 | char nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN]; | ||
146 | char newline; | ||
147 | }; | ||
148 | |||
149 | union ocfs2_control_message { | ||
150 | char tag[OCFS2_CONTROL_MESSAGE_OP_LEN]; | ||
151 | struct ocfs2_control_message_setn u_setn; | ||
152 | struct ocfs2_control_message_setv u_setv; | ||
153 | struct ocfs2_control_message_down u_down; | ||
154 | }; | ||
155 | |||
156 | static struct ocfs2_stack_plugin user_stack; | ||
157 | |||
158 | static atomic_t ocfs2_control_opened; | ||
159 | static int ocfs2_control_this_node = -1; | ||
160 | static struct ocfs2_protocol_version running_proto; | ||
161 | |||
162 | static LIST_HEAD(ocfs2_live_connection_list); | ||
163 | static LIST_HEAD(ocfs2_control_private_list); | ||
164 | static DEFINE_MUTEX(ocfs2_control_lock); | ||
165 | |||
166 | static inline void ocfs2_control_set_handshake_state(struct file *file, | ||
167 | int state) | ||
168 | { | ||
169 | struct ocfs2_control_private *p = file->private_data; | ||
170 | p->op_state = state; | ||
171 | } | ||
172 | |||
173 | static inline int ocfs2_control_get_handshake_state(struct file *file) | ||
174 | { | ||
175 | struct ocfs2_control_private *p = file->private_data; | ||
176 | return p->op_state; | ||
177 | } | ||
178 | |||
179 | static struct ocfs2_live_connection *ocfs2_connection_find(const char *name) | ||
180 | { | ||
181 | size_t len = strlen(name); | ||
182 | struct ocfs2_live_connection *c; | ||
183 | |||
184 | BUG_ON(!mutex_is_locked(&ocfs2_control_lock)); | ||
185 | |||
186 | list_for_each_entry(c, &ocfs2_live_connection_list, oc_list) { | ||
187 | if ((c->oc_conn->cc_namelen == len) && | ||
188 | !strncmp(c->oc_conn->cc_name, name, len)) | ||
189 | return c; | ||
190 | } | ||
191 | |||
192 | return c; | ||
193 | } | ||
194 | |||
195 | /* | ||
196 | * ocfs2_live_connection structures are created underneath the ocfs2 | ||
197 | * mount path. Since the VFS prevents multiple calls to | ||
198 | * fill_super(), we can't get dupes here. | ||
199 | */ | ||
200 | static int ocfs2_live_connection_new(struct ocfs2_cluster_connection *conn, | ||
201 | struct ocfs2_live_connection **c_ret) | ||
202 | { | ||
203 | int rc = 0; | ||
204 | struct ocfs2_live_connection *c; | ||
205 | |||
206 | c = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL); | ||
207 | if (!c) | ||
208 | return -ENOMEM; | ||
209 | |||
210 | mutex_lock(&ocfs2_control_lock); | ||
211 | c->oc_conn = conn; | ||
212 | |||
213 | if (atomic_read(&ocfs2_control_opened)) | ||
214 | list_add(&c->oc_list, &ocfs2_live_connection_list); | ||
215 | else { | ||
216 | printk(KERN_ERR | ||
217 | "ocfs2: Userspace control daemon is not present\n"); | ||
218 | rc = -ESRCH; | ||
219 | } | ||
220 | |||
221 | mutex_unlock(&ocfs2_control_lock); | ||
222 | |||
223 | if (!rc) | ||
224 | *c_ret = c; | ||
225 | else | ||
226 | kfree(c); | ||
227 | |||
228 | return rc; | ||
229 | } | ||
230 | |||
231 | /* | ||
232 | * This function disconnects the cluster connection from ocfs2_control. | ||
233 | * Afterwards, userspace can't affect the cluster connection. | ||
234 | */ | ||
235 | static void ocfs2_live_connection_drop(struct ocfs2_live_connection *c) | ||
236 | { | ||
237 | mutex_lock(&ocfs2_control_lock); | ||
238 | list_del_init(&c->oc_list); | ||
239 | c->oc_conn = NULL; | ||
240 | mutex_unlock(&ocfs2_control_lock); | ||
241 | |||
242 | kfree(c); | ||
243 | } | ||
244 | |||
245 | static int ocfs2_control_cfu(void *target, size_t target_len, | ||
246 | const char __user *buf, size_t count) | ||
247 | { | ||
248 | /* The T01 expects write(2) calls to have exactly one command */ | ||
249 | if ((count != target_len) || | ||
250 | (count > sizeof(union ocfs2_control_message))) | ||
251 | return -EINVAL; | ||
252 | |||
253 | if (copy_from_user(target, buf, target_len)) | ||
254 | return -EFAULT; | ||
255 | |||
256 | return 0; | ||
257 | } | ||
258 | |||
259 | static ssize_t ocfs2_control_validate_protocol(struct file *file, | ||
260 | const char __user *buf, | ||
261 | size_t count) | ||
262 | { | ||
263 | ssize_t ret; | ||
264 | char kbuf[OCFS2_CONTROL_PROTO_LEN]; | ||
265 | |||
266 | ret = ocfs2_control_cfu(kbuf, OCFS2_CONTROL_PROTO_LEN, | ||
267 | buf, count); | ||
268 | if (ret) | ||
269 | return ret; | ||
270 | |||
271 | if (strncmp(kbuf, OCFS2_CONTROL_PROTO, OCFS2_CONTROL_PROTO_LEN)) | ||
272 | return -EINVAL; | ||
273 | |||
274 | ocfs2_control_set_handshake_state(file, | ||
275 | OCFS2_CONTROL_HANDSHAKE_PROTOCOL); | ||
276 | |||
277 | return count; | ||
278 | } | ||
279 | |||
280 | static void ocfs2_control_send_down(const char *uuid, | ||
281 | int nodenum) | ||
282 | { | ||
283 | struct ocfs2_live_connection *c; | ||
284 | |||
285 | mutex_lock(&ocfs2_control_lock); | ||
286 | |||
287 | c = ocfs2_connection_find(uuid); | ||
288 | if (c) { | ||
289 | BUG_ON(c->oc_conn == NULL); | ||
290 | c->oc_conn->cc_recovery_handler(nodenum, | ||
291 | c->oc_conn->cc_recovery_data); | ||
292 | } | ||
293 | |||
294 | mutex_unlock(&ocfs2_control_lock); | ||
295 | } | ||
296 | |||
297 | /* | ||
298 | * Called whenever configuration elements are sent to /dev/ocfs2_control. | ||
299 | * If all configuration elements are present, try to set the global | ||
300 | * values. If there is a problem, return an error. Skip any missing | ||
301 | * elements, and only bump ocfs2_control_opened when we have all elements | ||
302 | * and are successful. | ||
303 | */ | ||
304 | static int ocfs2_control_install_private(struct file *file) | ||
305 | { | ||
306 | int rc = 0; | ||
307 | int set_p = 1; | ||
308 | struct ocfs2_control_private *p = file->private_data; | ||
309 | |||
310 | BUG_ON(p->op_state != OCFS2_CONTROL_HANDSHAKE_PROTOCOL); | ||
311 | |||
312 | mutex_lock(&ocfs2_control_lock); | ||
313 | |||
314 | if (p->op_this_node < 0) { | ||
315 | set_p = 0; | ||
316 | } else if ((ocfs2_control_this_node >= 0) && | ||
317 | (ocfs2_control_this_node != p->op_this_node)) { | ||
318 | rc = -EINVAL; | ||
319 | goto out_unlock; | ||
320 | } | ||
321 | |||
322 | if (!p->op_proto.pv_major) { | ||
323 | set_p = 0; | ||
324 | } else if (!list_empty(&ocfs2_live_connection_list) && | ||
325 | ((running_proto.pv_major != p->op_proto.pv_major) || | ||
326 | (running_proto.pv_minor != p->op_proto.pv_minor))) { | ||
327 | rc = -EINVAL; | ||
328 | goto out_unlock; | ||
329 | } | ||
330 | |||
331 | if (set_p) { | ||
332 | ocfs2_control_this_node = p->op_this_node; | ||
333 | running_proto.pv_major = p->op_proto.pv_major; | ||
334 | running_proto.pv_minor = p->op_proto.pv_minor; | ||
335 | } | ||
336 | |||
337 | out_unlock: | ||
338 | mutex_unlock(&ocfs2_control_lock); | ||
339 | |||
340 | if (!rc && set_p) { | ||
341 | /* We set the global values successfully */ | ||
342 | atomic_inc(&ocfs2_control_opened); | ||
343 | ocfs2_control_set_handshake_state(file, | ||
344 | OCFS2_CONTROL_HANDSHAKE_VALID); | ||
345 | } | ||
346 | |||
347 | return rc; | ||
348 | } | ||
349 | |||
350 | static int ocfs2_control_get_this_node(void) | ||
351 | { | ||
352 | int rc; | ||
353 | |||
354 | mutex_lock(&ocfs2_control_lock); | ||
355 | if (ocfs2_control_this_node < 0) | ||
356 | rc = -EINVAL; | ||
357 | else | ||
358 | rc = ocfs2_control_this_node; | ||
359 | mutex_unlock(&ocfs2_control_lock); | ||
360 | |||
361 | return rc; | ||
362 | } | ||
363 | |||
364 | static int ocfs2_control_do_setnode_msg(struct file *file, | ||
365 | struct ocfs2_control_message_setn *msg) | ||
366 | { | ||
367 | long nodenum; | ||
368 | char *ptr = NULL; | ||
369 | struct ocfs2_control_private *p = file->private_data; | ||
370 | |||
371 | if (ocfs2_control_get_handshake_state(file) != | ||
372 | OCFS2_CONTROL_HANDSHAKE_PROTOCOL) | ||
373 | return -EINVAL; | ||
374 | |||
375 | if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_SETNODE_OP, | ||
376 | OCFS2_CONTROL_MESSAGE_OP_LEN)) | ||
377 | return -EINVAL; | ||
378 | |||
379 | if ((msg->space != ' ') || (msg->newline != '\n')) | ||
380 | return -EINVAL; | ||
381 | msg->space = msg->newline = '\0'; | ||
382 | |||
383 | nodenum = simple_strtol(msg->nodestr, &ptr, 16); | ||
384 | if (!ptr || *ptr) | ||
385 | return -EINVAL; | ||
386 | |||
387 | if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) || | ||
388 | (nodenum > INT_MAX) || (nodenum < 0)) | ||
389 | return -ERANGE; | ||
390 | p->op_this_node = nodenum; | ||
391 | |||
392 | return ocfs2_control_install_private(file); | ||
393 | } | ||
394 | |||
395 | static int ocfs2_control_do_setversion_msg(struct file *file, | ||
396 | struct ocfs2_control_message_setv *msg) | ||
397 | { | ||
398 | long major, minor; | ||
399 | char *ptr = NULL; | ||
400 | struct ocfs2_control_private *p = file->private_data; | ||
401 | struct ocfs2_protocol_version *max = | ||
402 | &user_stack.sp_proto->lp_max_version; | ||
403 | |||
404 | if (ocfs2_control_get_handshake_state(file) != | ||
405 | OCFS2_CONTROL_HANDSHAKE_PROTOCOL) | ||
406 | return -EINVAL; | ||
407 | |||
408 | if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_SETVERSION_OP, | ||
409 | OCFS2_CONTROL_MESSAGE_OP_LEN)) | ||
410 | return -EINVAL; | ||
411 | |||
412 | if ((msg->space1 != ' ') || (msg->space2 != ' ') || | ||
413 | (msg->newline != '\n')) | ||
414 | return -EINVAL; | ||
415 | msg->space1 = msg->space2 = msg->newline = '\0'; | ||
416 | |||
417 | major = simple_strtol(msg->major, &ptr, 16); | ||
418 | if (!ptr || *ptr) | ||
419 | return -EINVAL; | ||
420 | minor = simple_strtol(msg->minor, &ptr, 16); | ||
421 | if (!ptr || *ptr) | ||
422 | return -EINVAL; | ||
423 | |||
424 | /* | ||
425 | * The major must be between 1 and 255, inclusive. The minor | ||
426 | * must be between 0 and 255, inclusive. The version passed in | ||
427 | * must be within the maximum version supported by the filesystem. | ||
428 | */ | ||
429 | if ((major == LONG_MIN) || (major == LONG_MAX) || | ||
430 | (major > (u8)-1) || (major < 1)) | ||
431 | return -ERANGE; | ||
432 | if ((minor == LONG_MIN) || (minor == LONG_MAX) || | ||
433 | (minor > (u8)-1) || (minor < 0)) | ||
434 | return -ERANGE; | ||
435 | if ((major != max->pv_major) || | ||
436 | (minor > max->pv_minor)) | ||
437 | return -EINVAL; | ||
438 | |||
439 | p->op_proto.pv_major = major; | ||
440 | p->op_proto.pv_minor = minor; | ||
441 | |||
442 | return ocfs2_control_install_private(file); | ||
443 | } | ||
444 | |||
445 | static int ocfs2_control_do_down_msg(struct file *file, | ||
446 | struct ocfs2_control_message_down *msg) | ||
447 | { | ||
448 | long nodenum; | ||
449 | char *p = NULL; | ||
450 | |||
451 | if (ocfs2_control_get_handshake_state(file) != | ||
452 | OCFS2_CONTROL_HANDSHAKE_VALID) | ||
453 | return -EINVAL; | ||
454 | |||
455 | if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_DOWN_OP, | ||
456 | OCFS2_CONTROL_MESSAGE_OP_LEN)) | ||
457 | return -EINVAL; | ||
458 | |||
459 | if ((msg->space1 != ' ') || (msg->space2 != ' ') || | ||
460 | (msg->newline != '\n')) | ||
461 | return -EINVAL; | ||
462 | msg->space1 = msg->space2 = msg->newline = '\0'; | ||
463 | |||
464 | nodenum = simple_strtol(msg->nodestr, &p, 16); | ||
465 | if (!p || *p) | ||
466 | return -EINVAL; | ||
467 | |||
468 | if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) || | ||
469 | (nodenum > INT_MAX) || (nodenum < 0)) | ||
470 | return -ERANGE; | ||
471 | |||
472 | ocfs2_control_send_down(msg->uuid, nodenum); | ||
473 | |||
474 | return 0; | ||
475 | } | ||
476 | |||
477 | static ssize_t ocfs2_control_message(struct file *file, | ||
478 | const char __user *buf, | ||
479 | size_t count) | ||
480 | { | ||
481 | ssize_t ret; | ||
482 | union ocfs2_control_message msg; | ||
483 | |||
484 | /* Try to catch padding issues */ | ||
485 | WARN_ON(offsetof(struct ocfs2_control_message_down, uuid) != | ||
486 | (sizeof(msg.u_down.tag) + sizeof(msg.u_down.space1))); | ||
487 | |||
488 | memset(&msg, 0, sizeof(union ocfs2_control_message)); | ||
489 | ret = ocfs2_control_cfu(&msg, count, buf, count); | ||
490 | if (ret) | ||
491 | goto out; | ||
492 | |||
493 | if ((count == OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN) && | ||
494 | !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_SETNODE_OP, | ||
495 | OCFS2_CONTROL_MESSAGE_OP_LEN)) | ||
496 | ret = ocfs2_control_do_setnode_msg(file, &msg.u_setn); | ||
497 | else if ((count == OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN) && | ||
498 | !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_SETVERSION_OP, | ||
499 | OCFS2_CONTROL_MESSAGE_OP_LEN)) | ||
500 | ret = ocfs2_control_do_setversion_msg(file, &msg.u_setv); | ||
501 | else if ((count == OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN) && | ||
502 | !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_DOWN_OP, | ||
503 | OCFS2_CONTROL_MESSAGE_OP_LEN)) | ||
504 | ret = ocfs2_control_do_down_msg(file, &msg.u_down); | ||
505 | else | ||
506 | ret = -EINVAL; | ||
507 | |||
508 | out: | ||
509 | return ret ? ret : count; | ||
510 | } | ||
511 | |||
512 | static ssize_t ocfs2_control_write(struct file *file, | ||
513 | const char __user *buf, | ||
514 | size_t count, | ||
515 | loff_t *ppos) | ||
516 | { | ||
517 | ssize_t ret; | ||
518 | |||
519 | switch (ocfs2_control_get_handshake_state(file)) { | ||
520 | case OCFS2_CONTROL_HANDSHAKE_INVALID: | ||
521 | ret = -EINVAL; | ||
522 | break; | ||
523 | |||
524 | case OCFS2_CONTROL_HANDSHAKE_READ: | ||
525 | ret = ocfs2_control_validate_protocol(file, buf, | ||
526 | count); | ||
527 | break; | ||
528 | |||
529 | case OCFS2_CONTROL_HANDSHAKE_PROTOCOL: | ||
530 | case OCFS2_CONTROL_HANDSHAKE_VALID: | ||
531 | ret = ocfs2_control_message(file, buf, count); | ||
532 | break; | ||
533 | |||
534 | default: | ||
535 | BUG(); | ||
536 | ret = -EIO; | ||
537 | break; | ||
538 | } | ||
539 | |||
540 | return ret; | ||
541 | } | ||
542 | |||
543 | /* | ||
544 | * This is a naive version. If we ever have a new protocol, we'll expand | ||
545 | * it. Probably using seq_file. | ||
546 | */ | ||
547 | static ssize_t ocfs2_control_read(struct file *file, | ||
548 | char __user *buf, | ||
549 | size_t count, | ||
550 | loff_t *ppos) | ||
551 | { | ||
552 | char *proto_string = OCFS2_CONTROL_PROTO; | ||
553 | size_t to_write = 0; | ||
554 | |||
555 | if (*ppos >= OCFS2_CONTROL_PROTO_LEN) | ||
556 | return 0; | ||
557 | |||
558 | to_write = OCFS2_CONTROL_PROTO_LEN - *ppos; | ||
559 | if (to_write > count) | ||
560 | to_write = count; | ||
561 | if (copy_to_user(buf, proto_string + *ppos, to_write)) | ||
562 | return -EFAULT; | ||
563 | |||
564 | *ppos += to_write; | ||
565 | |||
566 | /* Have we read the whole protocol list? */ | ||
567 | if (*ppos >= OCFS2_CONTROL_PROTO_LEN) | ||
568 | ocfs2_control_set_handshake_state(file, | ||
569 | OCFS2_CONTROL_HANDSHAKE_READ); | ||
570 | |||
571 | return to_write; | ||
572 | } | ||
573 | |||
574 | static int ocfs2_control_release(struct inode *inode, struct file *file) | ||
575 | { | ||
576 | struct ocfs2_control_private *p = file->private_data; | ||
577 | |||
578 | mutex_lock(&ocfs2_control_lock); | ||
579 | |||
580 | if (ocfs2_control_get_handshake_state(file) != | ||
581 | OCFS2_CONTROL_HANDSHAKE_VALID) | ||
582 | goto out; | ||
583 | |||
584 | if (atomic_dec_and_test(&ocfs2_control_opened)) { | ||
585 | if (!list_empty(&ocfs2_live_connection_list)) { | ||
586 | /* XXX: Do bad things! */ | ||
587 | printk(KERN_ERR | ||
588 | "ocfs2: Unexpected release of ocfs2_control!\n" | ||
589 | " Loss of cluster connection requires " | ||
590 | "an emergency restart!\n"); | ||
591 | emergency_restart(); | ||
592 | } | ||
593 | /* | ||
594 | * Last valid close clears the node number and resets | ||
595 | * the locking protocol version | ||
596 | */ | ||
597 | ocfs2_control_this_node = -1; | ||
598 | running_proto.pv_major = 0; | ||
599 | running_proto.pv_major = 0; | ||
600 | } | ||
601 | |||
602 | out: | ||
603 | list_del_init(&p->op_list); | ||
604 | file->private_data = NULL; | ||
605 | |||
606 | mutex_unlock(&ocfs2_control_lock); | ||
607 | |||
608 | kfree(p); | ||
609 | |||
610 | return 0; | ||
611 | } | ||
612 | |||
613 | static int ocfs2_control_open(struct inode *inode, struct file *file) | ||
614 | { | ||
615 | struct ocfs2_control_private *p; | ||
616 | |||
617 | p = kzalloc(sizeof(struct ocfs2_control_private), GFP_KERNEL); | ||
618 | if (!p) | ||
619 | return -ENOMEM; | ||
620 | p->op_this_node = -1; | ||
621 | |||
622 | mutex_lock(&ocfs2_control_lock); | ||
623 | file->private_data = p; | ||
624 | list_add(&p->op_list, &ocfs2_control_private_list); | ||
625 | mutex_unlock(&ocfs2_control_lock); | ||
626 | |||
627 | return 0; | ||
628 | } | ||
629 | |||
630 | static const struct file_operations ocfs2_control_fops = { | ||
631 | .open = ocfs2_control_open, | ||
632 | .release = ocfs2_control_release, | ||
633 | .read = ocfs2_control_read, | ||
634 | .write = ocfs2_control_write, | ||
635 | .owner = THIS_MODULE, | ||
636 | }; | ||
637 | |||
638 | struct miscdevice ocfs2_control_device = { | ||
639 | .minor = MISC_DYNAMIC_MINOR, | ||
640 | .name = "ocfs2_control", | ||
641 | .fops = &ocfs2_control_fops, | ||
642 | }; | ||
643 | |||
644 | static int ocfs2_control_init(void) | ||
645 | { | ||
646 | int rc; | ||
647 | |||
648 | atomic_set(&ocfs2_control_opened, 0); | ||
649 | |||
650 | rc = misc_register(&ocfs2_control_device); | ||
651 | if (rc) | ||
652 | printk(KERN_ERR | ||
653 | "ocfs2: Unable to register ocfs2_control device " | ||
654 | "(errno %d)\n", | ||
655 | -rc); | ||
656 | |||
657 | return rc; | ||
658 | } | ||
659 | |||
660 | static void ocfs2_control_exit(void) | ||
661 | { | ||
662 | int rc; | ||
663 | |||
664 | rc = misc_deregister(&ocfs2_control_device); | ||
665 | if (rc) | ||
666 | printk(KERN_ERR | ||
667 | "ocfs2: Unable to deregister ocfs2_control device " | ||
668 | "(errno %d)\n", | ||
669 | -rc); | ||
670 | } | ||
671 | |||
672 | static struct dlm_lksb *fsdlm_astarg_to_lksb(void *astarg) | ||
673 | { | ||
674 | struct ocfs2_lock_res *res = astarg; | ||
675 | return &res->l_lksb.lksb_fsdlm; | ||
676 | } | ||
677 | |||
678 | static void fsdlm_lock_ast_wrapper(void *astarg) | ||
679 | { | ||
680 | struct dlm_lksb *lksb = fsdlm_astarg_to_lksb(astarg); | ||
681 | int status = lksb->sb_status; | ||
682 | |||
683 | BUG_ON(user_stack.sp_proto == NULL); | ||
684 | |||
685 | /* | ||
686 | * For now we're punting on the issue of other non-standard errors | ||
687 | * where we can't tell if the unlock_ast or lock_ast should be called. | ||
688 | * The main "other error" that's possible is EINVAL which means the | ||
689 | * function was called with invalid args, which shouldn't be possible | ||
690 | * since the caller here is under our control. Other non-standard | ||
691 | * errors probably fall into the same category, or otherwise are fatal | ||
692 | * which means we can't carry on anyway. | ||
693 | */ | ||
694 | |||
695 | if (status == -DLM_EUNLOCK || status == -DLM_ECANCEL) | ||
696 | user_stack.sp_proto->lp_unlock_ast(astarg, 0); | ||
697 | else | ||
698 | user_stack.sp_proto->lp_lock_ast(astarg); | ||
699 | } | ||
700 | |||
701 | static void fsdlm_blocking_ast_wrapper(void *astarg, int level) | ||
702 | { | ||
703 | BUG_ON(user_stack.sp_proto == NULL); | ||
704 | |||
705 | user_stack.sp_proto->lp_blocking_ast(astarg, level); | ||
706 | } | ||
707 | |||
708 | static int user_dlm_lock(struct ocfs2_cluster_connection *conn, | ||
709 | int mode, | ||
710 | union ocfs2_dlm_lksb *lksb, | ||
711 | u32 flags, | ||
712 | void *name, | ||
713 | unsigned int namelen, | ||
714 | void *astarg) | ||
715 | { | ||
716 | int ret; | ||
717 | |||
718 | if (!lksb->lksb_fsdlm.sb_lvbptr) | ||
719 | lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb + | ||
720 | sizeof(struct dlm_lksb); | ||
721 | |||
722 | ret = dlm_lock(conn->cc_lockspace, mode, &lksb->lksb_fsdlm, | ||
723 | flags|DLM_LKF_NODLCKWT, name, namelen, 0, | ||
724 | fsdlm_lock_ast_wrapper, astarg, | ||
725 | fsdlm_blocking_ast_wrapper); | ||
726 | return ret; | ||
727 | } | ||
728 | |||
729 | static int user_dlm_unlock(struct ocfs2_cluster_connection *conn, | ||
730 | union ocfs2_dlm_lksb *lksb, | ||
731 | u32 flags, | ||
732 | void *astarg) | ||
733 | { | ||
734 | int ret; | ||
735 | |||
736 | ret = dlm_unlock(conn->cc_lockspace, lksb->lksb_fsdlm.sb_lkid, | ||
737 | flags, &lksb->lksb_fsdlm, astarg); | ||
738 | return ret; | ||
739 | } | ||
740 | |||
741 | static int user_dlm_lock_status(union ocfs2_dlm_lksb *lksb) | ||
742 | { | ||
743 | return lksb->lksb_fsdlm.sb_status; | ||
744 | } | ||
745 | |||
746 | static void *user_dlm_lvb(union ocfs2_dlm_lksb *lksb) | ||
747 | { | ||
748 | return (void *)(lksb->lksb_fsdlm.sb_lvbptr); | ||
749 | } | ||
750 | |||
751 | static void user_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb) | ||
752 | { | ||
753 | } | ||
754 | |||
755 | /* | ||
756 | * Compare a requested locking protocol version against the current one. | ||
757 | * | ||
758 | * If the major numbers are different, they are incompatible. | ||
759 | * If the current minor is greater than the request, they are incompatible. | ||
760 | * If the current minor is less than or equal to the request, they are | ||
761 | * compatible, and the requester should run at the current minor version. | ||
762 | */ | ||
763 | static int fs_protocol_compare(struct ocfs2_protocol_version *existing, | ||
764 | struct ocfs2_protocol_version *request) | ||
765 | { | ||
766 | if (existing->pv_major != request->pv_major) | ||
767 | return 1; | ||
768 | |||
769 | if (existing->pv_minor > request->pv_minor) | ||
770 | return 1; | ||
771 | |||
772 | if (existing->pv_minor < request->pv_minor) | ||
773 | request->pv_minor = existing->pv_minor; | ||
774 | |||
775 | return 0; | ||
776 | } | ||
777 | |||
778 | static int user_cluster_connect(struct ocfs2_cluster_connection *conn) | ||
779 | { | ||
780 | dlm_lockspace_t *fsdlm; | ||
781 | struct ocfs2_live_connection *control; | ||
782 | int rc = 0; | ||
783 | |||
784 | BUG_ON(conn == NULL); | ||
785 | |||
786 | rc = ocfs2_live_connection_new(conn, &control); | ||
787 | if (rc) | ||
788 | goto out; | ||
789 | |||
790 | /* | ||
791 | * running_proto must have been set before we allowed any mounts | ||
792 | * to proceed. | ||
793 | */ | ||
794 | if (fs_protocol_compare(&running_proto, &conn->cc_version)) { | ||
795 | printk(KERN_ERR | ||
796 | "Unable to mount with fs locking protocol version " | ||
797 | "%u.%u because the userspace control daemon has " | ||
798 | "negotiated %u.%u\n", | ||
799 | conn->cc_version.pv_major, conn->cc_version.pv_minor, | ||
800 | running_proto.pv_major, running_proto.pv_minor); | ||
801 | rc = -EPROTO; | ||
802 | ocfs2_live_connection_drop(control); | ||
803 | goto out; | ||
804 | } | ||
805 | |||
806 | rc = dlm_new_lockspace(conn->cc_name, strlen(conn->cc_name), | ||
807 | &fsdlm, DLM_LSFL_FS, DLM_LVB_LEN); | ||
808 | if (rc) { | ||
809 | ocfs2_live_connection_drop(control); | ||
810 | goto out; | ||
811 | } | ||
812 | |||
813 | conn->cc_private = control; | ||
814 | conn->cc_lockspace = fsdlm; | ||
815 | out: | ||
816 | return rc; | ||
817 | } | ||
818 | |||
819 | static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn, | ||
820 | int hangup_pending) | ||
821 | { | ||
822 | dlm_release_lockspace(conn->cc_lockspace, 2); | ||
823 | conn->cc_lockspace = NULL; | ||
824 | ocfs2_live_connection_drop(conn->cc_private); | ||
825 | conn->cc_private = NULL; | ||
826 | return 0; | ||
827 | } | ||
828 | |||
829 | static int user_cluster_this_node(unsigned int *this_node) | ||
830 | { | ||
831 | int rc; | ||
832 | |||
833 | rc = ocfs2_control_get_this_node(); | ||
834 | if (rc < 0) | ||
835 | return rc; | ||
836 | |||
837 | *this_node = rc; | ||
838 | return 0; | ||
839 | } | ||
840 | |||
841 | static struct ocfs2_stack_operations user_stack_ops = { | ||
842 | .connect = user_cluster_connect, | ||
843 | .disconnect = user_cluster_disconnect, | ||
844 | .this_node = user_cluster_this_node, | ||
845 | .dlm_lock = user_dlm_lock, | ||
846 | .dlm_unlock = user_dlm_unlock, | ||
847 | .lock_status = user_dlm_lock_status, | ||
848 | .lock_lvb = user_dlm_lvb, | ||
849 | .dump_lksb = user_dlm_dump_lksb, | ||
850 | }; | ||
851 | |||
852 | static struct ocfs2_stack_plugin user_stack = { | ||
853 | .sp_name = "user", | ||
854 | .sp_ops = &user_stack_ops, | ||
855 | .sp_owner = THIS_MODULE, | ||
856 | }; | ||
857 | |||
858 | |||
859 | static int __init user_stack_init(void) | ||
860 | { | ||
861 | int rc; | ||
862 | |||
863 | rc = ocfs2_control_init(); | ||
864 | if (!rc) { | ||
865 | rc = ocfs2_stack_glue_register(&user_stack); | ||
866 | if (rc) | ||
867 | ocfs2_control_exit(); | ||
868 | } | ||
869 | |||
870 | return rc; | ||
871 | } | ||
872 | |||
873 | static void __exit user_stack_exit(void) | ||
874 | { | ||
875 | ocfs2_stack_glue_unregister(&user_stack); | ||
876 | ocfs2_control_exit(); | ||
877 | } | ||
878 | |||
879 | MODULE_AUTHOR("Oracle"); | ||
880 | MODULE_DESCRIPTION("ocfs2 driver for userspace cluster stacks"); | ||
881 | MODULE_LICENSE("GPL"); | ||
882 | module_init(user_stack_init); | ||
883 | module_exit(user_stack_exit); | ||
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c new file mode 100644 index 000000000000..119f60cea9cc --- /dev/null +++ b/fs/ocfs2/stackglue.c | |||
@@ -0,0 +1,568 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * stackglue.c | ||
5 | * | ||
6 | * Code which implements an OCFS2 specific interface to underlying | ||
7 | * cluster stacks. | ||
8 | * | ||
9 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or | ||
12 | * modify it under the terms of the GNU General Public | ||
13 | * License as published by the Free Software Foundation, version 2. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | */ | ||
20 | |||
21 | #include <linux/list.h> | ||
22 | #include <linux/spinlock.h> | ||
23 | #include <linux/module.h> | ||
24 | #include <linux/slab.h> | ||
25 | #include <linux/kmod.h> | ||
26 | #include <linux/fs.h> | ||
27 | #include <linux/kobject.h> | ||
28 | #include <linux/sysfs.h> | ||
29 | |||
30 | #include "ocfs2_fs.h" | ||
31 | |||
32 | #include "stackglue.h" | ||
33 | |||
34 | #define OCFS2_STACK_PLUGIN_O2CB "o2cb" | ||
35 | #define OCFS2_STACK_PLUGIN_USER "user" | ||
36 | |||
37 | static struct ocfs2_locking_protocol *lproto; | ||
38 | static DEFINE_SPINLOCK(ocfs2_stack_lock); | ||
39 | static LIST_HEAD(ocfs2_stack_list); | ||
40 | static char cluster_stack_name[OCFS2_STACK_LABEL_LEN + 1]; | ||
41 | |||
42 | /* | ||
43 | * The stack currently in use. If not null, active_stack->sp_count > 0, | ||
44 | * the module is pinned, and the locking protocol cannot be changed. | ||
45 | */ | ||
46 | static struct ocfs2_stack_plugin *active_stack; | ||
47 | |||
48 | static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name) | ||
49 | { | ||
50 | struct ocfs2_stack_plugin *p; | ||
51 | |||
52 | assert_spin_locked(&ocfs2_stack_lock); | ||
53 | |||
54 | list_for_each_entry(p, &ocfs2_stack_list, sp_list) { | ||
55 | if (!strcmp(p->sp_name, name)) | ||
56 | return p; | ||
57 | } | ||
58 | |||
59 | return NULL; | ||
60 | } | ||
61 | |||
62 | static int ocfs2_stack_driver_request(const char *stack_name, | ||
63 | const char *plugin_name) | ||
64 | { | ||
65 | int rc; | ||
66 | struct ocfs2_stack_plugin *p; | ||
67 | |||
68 | spin_lock(&ocfs2_stack_lock); | ||
69 | |||
70 | /* | ||
71 | * If the stack passed by the filesystem isn't the selected one, | ||
72 | * we can't continue. | ||
73 | */ | ||
74 | if (strcmp(stack_name, cluster_stack_name)) { | ||
75 | rc = -EBUSY; | ||
76 | goto out; | ||
77 | } | ||
78 | |||
79 | if (active_stack) { | ||
80 | /* | ||
81 | * If the active stack isn't the one we want, it cannot | ||
82 | * be selected right now. | ||
83 | */ | ||
84 | if (!strcmp(active_stack->sp_name, plugin_name)) | ||
85 | rc = 0; | ||
86 | else | ||
87 | rc = -EBUSY; | ||
88 | goto out; | ||
89 | } | ||
90 | |||
91 | p = ocfs2_stack_lookup(plugin_name); | ||
92 | if (!p || !try_module_get(p->sp_owner)) { | ||
93 | rc = -ENOENT; | ||
94 | goto out; | ||
95 | } | ||
96 | |||
97 | /* Ok, the stack is pinned */ | ||
98 | p->sp_count++; | ||
99 | active_stack = p; | ||
100 | |||
101 | rc = 0; | ||
102 | |||
103 | out: | ||
104 | spin_unlock(&ocfs2_stack_lock); | ||
105 | return rc; | ||
106 | } | ||
107 | |||
108 | /* | ||
109 | * This function looks up the appropriate stack and makes it active. If | ||
110 | * there is no stack, it tries to load it. It will fail if the stack still | ||
111 | * cannot be found. It will also fail if a different stack is in use. | ||
112 | */ | ||
113 | static int ocfs2_stack_driver_get(const char *stack_name) | ||
114 | { | ||
115 | int rc; | ||
116 | char *plugin_name = OCFS2_STACK_PLUGIN_O2CB; | ||
117 | |||
118 | /* | ||
119 | * Classic stack does not pass in a stack name. This is | ||
120 | * compatible with older tools as well. | ||
121 | */ | ||
122 | if (!stack_name || !*stack_name) | ||
123 | stack_name = OCFS2_STACK_PLUGIN_O2CB; | ||
124 | |||
125 | if (strlen(stack_name) != OCFS2_STACK_LABEL_LEN) { | ||
126 | printk(KERN_ERR | ||
127 | "ocfs2 passed an invalid cluster stack label: \"%s\"\n", | ||
128 | stack_name); | ||
129 | return -EINVAL; | ||
130 | } | ||
131 | |||
132 | /* Anything that isn't the classic stack is a user stack */ | ||
133 | if (strcmp(stack_name, OCFS2_STACK_PLUGIN_O2CB)) | ||
134 | plugin_name = OCFS2_STACK_PLUGIN_USER; | ||
135 | |||
136 | rc = ocfs2_stack_driver_request(stack_name, plugin_name); | ||
137 | if (rc == -ENOENT) { | ||
138 | request_module("ocfs2_stack_%s", plugin_name); | ||
139 | rc = ocfs2_stack_driver_request(stack_name, plugin_name); | ||
140 | } | ||
141 | |||
142 | if (rc == -ENOENT) { | ||
143 | printk(KERN_ERR | ||
144 | "ocfs2: Cluster stack driver \"%s\" cannot be found\n", | ||
145 | plugin_name); | ||
146 | } else if (rc == -EBUSY) { | ||
147 | printk(KERN_ERR | ||
148 | "ocfs2: A different cluster stack is in use\n"); | ||
149 | } | ||
150 | |||
151 | return rc; | ||
152 | } | ||
153 | |||
154 | static void ocfs2_stack_driver_put(void) | ||
155 | { | ||
156 | spin_lock(&ocfs2_stack_lock); | ||
157 | BUG_ON(active_stack == NULL); | ||
158 | BUG_ON(active_stack->sp_count == 0); | ||
159 | |||
160 | active_stack->sp_count--; | ||
161 | if (!active_stack->sp_count) { | ||
162 | module_put(active_stack->sp_owner); | ||
163 | active_stack = NULL; | ||
164 | } | ||
165 | spin_unlock(&ocfs2_stack_lock); | ||
166 | } | ||
167 | |||
168 | int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin) | ||
169 | { | ||
170 | int rc; | ||
171 | |||
172 | spin_lock(&ocfs2_stack_lock); | ||
173 | if (!ocfs2_stack_lookup(plugin->sp_name)) { | ||
174 | plugin->sp_count = 0; | ||
175 | plugin->sp_proto = lproto; | ||
176 | list_add(&plugin->sp_list, &ocfs2_stack_list); | ||
177 | printk(KERN_INFO "ocfs2: Registered cluster interface %s\n", | ||
178 | plugin->sp_name); | ||
179 | rc = 0; | ||
180 | } else { | ||
181 | printk(KERN_ERR "ocfs2: Stack \"%s\" already registered\n", | ||
182 | plugin->sp_name); | ||
183 | rc = -EEXIST; | ||
184 | } | ||
185 | spin_unlock(&ocfs2_stack_lock); | ||
186 | |||
187 | return rc; | ||
188 | } | ||
189 | EXPORT_SYMBOL_GPL(ocfs2_stack_glue_register); | ||
190 | |||
191 | void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin) | ||
192 | { | ||
193 | struct ocfs2_stack_plugin *p; | ||
194 | |||
195 | spin_lock(&ocfs2_stack_lock); | ||
196 | p = ocfs2_stack_lookup(plugin->sp_name); | ||
197 | if (p) { | ||
198 | BUG_ON(p != plugin); | ||
199 | BUG_ON(plugin == active_stack); | ||
200 | BUG_ON(plugin->sp_count != 0); | ||
201 | list_del_init(&plugin->sp_list); | ||
202 | printk(KERN_INFO "ocfs2: Unregistered cluster interface %s\n", | ||
203 | plugin->sp_name); | ||
204 | } else { | ||
205 | printk(KERN_ERR "Stack \"%s\" is not registered\n", | ||
206 | plugin->sp_name); | ||
207 | } | ||
208 | spin_unlock(&ocfs2_stack_lock); | ||
209 | } | ||
210 | EXPORT_SYMBOL_GPL(ocfs2_stack_glue_unregister); | ||
211 | |||
212 | void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto) | ||
213 | { | ||
214 | struct ocfs2_stack_plugin *p; | ||
215 | |||
216 | BUG_ON(proto == NULL); | ||
217 | |||
218 | spin_lock(&ocfs2_stack_lock); | ||
219 | BUG_ON(active_stack != NULL); | ||
220 | |||
221 | lproto = proto; | ||
222 | list_for_each_entry(p, &ocfs2_stack_list, sp_list) { | ||
223 | p->sp_proto = lproto; | ||
224 | } | ||
225 | |||
226 | spin_unlock(&ocfs2_stack_lock); | ||
227 | } | ||
228 | EXPORT_SYMBOL_GPL(ocfs2_stack_glue_set_locking_protocol); | ||
229 | |||
230 | |||
231 | /* | ||
232 | * The ocfs2_dlm_lock() and ocfs2_dlm_unlock() functions take | ||
233 | * "struct ocfs2_lock_res *astarg" instead of "void *astarg" because the | ||
234 | * underlying stack plugins need to pilfer the lksb off of the lock_res. | ||
235 | * If some other structure needs to be passed as an astarg, the plugins | ||
236 | * will need to be given a different avenue to the lksb. | ||
237 | */ | ||
238 | int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn, | ||
239 | int mode, | ||
240 | union ocfs2_dlm_lksb *lksb, | ||
241 | u32 flags, | ||
242 | void *name, | ||
243 | unsigned int namelen, | ||
244 | struct ocfs2_lock_res *astarg) | ||
245 | { | ||
246 | BUG_ON(lproto == NULL); | ||
247 | |||
248 | return active_stack->sp_ops->dlm_lock(conn, mode, lksb, flags, | ||
249 | name, namelen, astarg); | ||
250 | } | ||
251 | EXPORT_SYMBOL_GPL(ocfs2_dlm_lock); | ||
252 | |||
253 | int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn, | ||
254 | union ocfs2_dlm_lksb *lksb, | ||
255 | u32 flags, | ||
256 | struct ocfs2_lock_res *astarg) | ||
257 | { | ||
258 | BUG_ON(lproto == NULL); | ||
259 | |||
260 | return active_stack->sp_ops->dlm_unlock(conn, lksb, flags, astarg); | ||
261 | } | ||
262 | EXPORT_SYMBOL_GPL(ocfs2_dlm_unlock); | ||
263 | |||
264 | int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb) | ||
265 | { | ||
266 | return active_stack->sp_ops->lock_status(lksb); | ||
267 | } | ||
268 | EXPORT_SYMBOL_GPL(ocfs2_dlm_lock_status); | ||
269 | |||
270 | /* | ||
271 | * Why don't we cast to ocfs2_meta_lvb? The "clean" answer is that we | ||
272 | * don't cast at the glue level. The real answer is that the header | ||
273 | * ordering is nigh impossible. | ||
274 | */ | ||
275 | void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb) | ||
276 | { | ||
277 | return active_stack->sp_ops->lock_lvb(lksb); | ||
278 | } | ||
279 | EXPORT_SYMBOL_GPL(ocfs2_dlm_lvb); | ||
280 | |||
281 | void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb) | ||
282 | { | ||
283 | active_stack->sp_ops->dump_lksb(lksb); | ||
284 | } | ||
285 | EXPORT_SYMBOL_GPL(ocfs2_dlm_dump_lksb); | ||
286 | |||
287 | int ocfs2_cluster_connect(const char *stack_name, | ||
288 | const char *group, | ||
289 | int grouplen, | ||
290 | void (*recovery_handler)(int node_num, | ||
291 | void *recovery_data), | ||
292 | void *recovery_data, | ||
293 | struct ocfs2_cluster_connection **conn) | ||
294 | { | ||
295 | int rc = 0; | ||
296 | struct ocfs2_cluster_connection *new_conn; | ||
297 | |||
298 | BUG_ON(group == NULL); | ||
299 | BUG_ON(conn == NULL); | ||
300 | BUG_ON(recovery_handler == NULL); | ||
301 | |||
302 | if (grouplen > GROUP_NAME_MAX) { | ||
303 | rc = -EINVAL; | ||
304 | goto out; | ||
305 | } | ||
306 | |||
307 | new_conn = kzalloc(sizeof(struct ocfs2_cluster_connection), | ||
308 | GFP_KERNEL); | ||
309 | if (!new_conn) { | ||
310 | rc = -ENOMEM; | ||
311 | goto out; | ||
312 | } | ||
313 | |||
314 | memcpy(new_conn->cc_name, group, grouplen); | ||
315 | new_conn->cc_namelen = grouplen; | ||
316 | new_conn->cc_recovery_handler = recovery_handler; | ||
317 | new_conn->cc_recovery_data = recovery_data; | ||
318 | |||
319 | /* Start the new connection at our maximum compatibility level */ | ||
320 | new_conn->cc_version = lproto->lp_max_version; | ||
321 | |||
322 | /* This will pin the stack driver if successful */ | ||
323 | rc = ocfs2_stack_driver_get(stack_name); | ||
324 | if (rc) | ||
325 | goto out_free; | ||
326 | |||
327 | rc = active_stack->sp_ops->connect(new_conn); | ||
328 | if (rc) { | ||
329 | ocfs2_stack_driver_put(); | ||
330 | goto out_free; | ||
331 | } | ||
332 | |||
333 | *conn = new_conn; | ||
334 | |||
335 | out_free: | ||
336 | if (rc) | ||
337 | kfree(new_conn); | ||
338 | |||
339 | out: | ||
340 | return rc; | ||
341 | } | ||
342 | EXPORT_SYMBOL_GPL(ocfs2_cluster_connect); | ||
343 | |||
344 | /* If hangup_pending is 0, the stack driver will be dropped */ | ||
345 | int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn, | ||
346 | int hangup_pending) | ||
347 | { | ||
348 | int ret; | ||
349 | |||
350 | BUG_ON(conn == NULL); | ||
351 | |||
352 | ret = active_stack->sp_ops->disconnect(conn, hangup_pending); | ||
353 | |||
354 | /* XXX Should we free it anyway? */ | ||
355 | if (!ret) { | ||
356 | kfree(conn); | ||
357 | if (!hangup_pending) | ||
358 | ocfs2_stack_driver_put(); | ||
359 | } | ||
360 | |||
361 | return ret; | ||
362 | } | ||
363 | EXPORT_SYMBOL_GPL(ocfs2_cluster_disconnect); | ||
364 | |||
365 | void ocfs2_cluster_hangup(const char *group, int grouplen) | ||
366 | { | ||
367 | BUG_ON(group == NULL); | ||
368 | BUG_ON(group[grouplen] != '\0'); | ||
369 | |||
370 | if (active_stack->sp_ops->hangup) | ||
371 | active_stack->sp_ops->hangup(group, grouplen); | ||
372 | |||
373 | /* cluster_disconnect() was called with hangup_pending==1 */ | ||
374 | ocfs2_stack_driver_put(); | ||
375 | } | ||
376 | EXPORT_SYMBOL_GPL(ocfs2_cluster_hangup); | ||
377 | |||
378 | int ocfs2_cluster_this_node(unsigned int *node) | ||
379 | { | ||
380 | return active_stack->sp_ops->this_node(node); | ||
381 | } | ||
382 | EXPORT_SYMBOL_GPL(ocfs2_cluster_this_node); | ||
383 | |||
384 | |||
385 | /* | ||
386 | * Sysfs bits | ||
387 | */ | ||
388 | |||
389 | static ssize_t ocfs2_max_locking_protocol_show(struct kobject *kobj, | ||
390 | struct kobj_attribute *attr, | ||
391 | char *buf) | ||
392 | { | ||
393 | ssize_t ret = 0; | ||
394 | |||
395 | spin_lock(&ocfs2_stack_lock); | ||
396 | if (lproto) | ||
397 | ret = snprintf(buf, PAGE_SIZE, "%u.%u\n", | ||
398 | lproto->lp_max_version.pv_major, | ||
399 | lproto->lp_max_version.pv_minor); | ||
400 | spin_unlock(&ocfs2_stack_lock); | ||
401 | |||
402 | return ret; | ||
403 | } | ||
404 | |||
405 | static struct kobj_attribute ocfs2_attr_max_locking_protocol = | ||
406 | __ATTR(max_locking_protocol, S_IFREG | S_IRUGO, | ||
407 | ocfs2_max_locking_protocol_show, NULL); | ||
408 | |||
409 | static ssize_t ocfs2_loaded_cluster_plugins_show(struct kobject *kobj, | ||
410 | struct kobj_attribute *attr, | ||
411 | char *buf) | ||
412 | { | ||
413 | ssize_t ret = 0, total = 0, remain = PAGE_SIZE; | ||
414 | struct ocfs2_stack_plugin *p; | ||
415 | |||
416 | spin_lock(&ocfs2_stack_lock); | ||
417 | list_for_each_entry(p, &ocfs2_stack_list, sp_list) { | ||
418 | ret = snprintf(buf, remain, "%s\n", | ||
419 | p->sp_name); | ||
420 | if (ret < 0) { | ||
421 | total = ret; | ||
422 | break; | ||
423 | } | ||
424 | if (ret == remain) { | ||
425 | /* snprintf() didn't fit */ | ||
426 | total = -E2BIG; | ||
427 | break; | ||
428 | } | ||
429 | total += ret; | ||
430 | remain -= ret; | ||
431 | } | ||
432 | spin_unlock(&ocfs2_stack_lock); | ||
433 | |||
434 | return total; | ||
435 | } | ||
436 | |||
437 | static struct kobj_attribute ocfs2_attr_loaded_cluster_plugins = | ||
438 | __ATTR(loaded_cluster_plugins, S_IFREG | S_IRUGO, | ||
439 | ocfs2_loaded_cluster_plugins_show, NULL); | ||
440 | |||
441 | static ssize_t ocfs2_active_cluster_plugin_show(struct kobject *kobj, | ||
442 | struct kobj_attribute *attr, | ||
443 | char *buf) | ||
444 | { | ||
445 | ssize_t ret = 0; | ||
446 | |||
447 | spin_lock(&ocfs2_stack_lock); | ||
448 | if (active_stack) { | ||
449 | ret = snprintf(buf, PAGE_SIZE, "%s\n", | ||
450 | active_stack->sp_name); | ||
451 | if (ret == PAGE_SIZE) | ||
452 | ret = -E2BIG; | ||
453 | } | ||
454 | spin_unlock(&ocfs2_stack_lock); | ||
455 | |||
456 | return ret; | ||
457 | } | ||
458 | |||
459 | static struct kobj_attribute ocfs2_attr_active_cluster_plugin = | ||
460 | __ATTR(active_cluster_plugin, S_IFREG | S_IRUGO, | ||
461 | ocfs2_active_cluster_plugin_show, NULL); | ||
462 | |||
463 | static ssize_t ocfs2_cluster_stack_show(struct kobject *kobj, | ||
464 | struct kobj_attribute *attr, | ||
465 | char *buf) | ||
466 | { | ||
467 | ssize_t ret; | ||
468 | spin_lock(&ocfs2_stack_lock); | ||
469 | ret = snprintf(buf, PAGE_SIZE, "%s\n", cluster_stack_name); | ||
470 | spin_unlock(&ocfs2_stack_lock); | ||
471 | |||
472 | return ret; | ||
473 | } | ||
474 | |||
475 | static ssize_t ocfs2_cluster_stack_store(struct kobject *kobj, | ||
476 | struct kobj_attribute *attr, | ||
477 | const char *buf, size_t count) | ||
478 | { | ||
479 | size_t len = count; | ||
480 | ssize_t ret; | ||
481 | |||
482 | if (len == 0) | ||
483 | return len; | ||
484 | |||
485 | if (buf[len - 1] == '\n') | ||
486 | len--; | ||
487 | |||
488 | if ((len != OCFS2_STACK_LABEL_LEN) || | ||
489 | (strnlen(buf, len) != len)) | ||
490 | return -EINVAL; | ||
491 | |||
492 | spin_lock(&ocfs2_stack_lock); | ||
493 | if (active_stack) { | ||
494 | if (!strncmp(buf, cluster_stack_name, len)) | ||
495 | ret = count; | ||
496 | else | ||
497 | ret = -EBUSY; | ||
498 | } else { | ||
499 | memcpy(cluster_stack_name, buf, len); | ||
500 | ret = count; | ||
501 | } | ||
502 | spin_unlock(&ocfs2_stack_lock); | ||
503 | |||
504 | return ret; | ||
505 | } | ||
506 | |||
507 | |||
508 | static struct kobj_attribute ocfs2_attr_cluster_stack = | ||
509 | __ATTR(cluster_stack, S_IFREG | S_IRUGO | S_IWUSR, | ||
510 | ocfs2_cluster_stack_show, | ||
511 | ocfs2_cluster_stack_store); | ||
512 | |||
513 | static struct attribute *ocfs2_attrs[] = { | ||
514 | &ocfs2_attr_max_locking_protocol.attr, | ||
515 | &ocfs2_attr_loaded_cluster_plugins.attr, | ||
516 | &ocfs2_attr_active_cluster_plugin.attr, | ||
517 | &ocfs2_attr_cluster_stack.attr, | ||
518 | NULL, | ||
519 | }; | ||
520 | |||
521 | static struct attribute_group ocfs2_attr_group = { | ||
522 | .attrs = ocfs2_attrs, | ||
523 | }; | ||
524 | |||
525 | static struct kset *ocfs2_kset; | ||
526 | |||
527 | static void ocfs2_sysfs_exit(void) | ||
528 | { | ||
529 | kset_unregister(ocfs2_kset); | ||
530 | } | ||
531 | |||
532 | static int ocfs2_sysfs_init(void) | ||
533 | { | ||
534 | int ret; | ||
535 | |||
536 | ocfs2_kset = kset_create_and_add("ocfs2", NULL, fs_kobj); | ||
537 | if (!ocfs2_kset) | ||
538 | return -ENOMEM; | ||
539 | |||
540 | ret = sysfs_create_group(&ocfs2_kset->kobj, &ocfs2_attr_group); | ||
541 | if (ret) | ||
542 | goto error; | ||
543 | |||
544 | return 0; | ||
545 | |||
546 | error: | ||
547 | kset_unregister(ocfs2_kset); | ||
548 | return ret; | ||
549 | } | ||
550 | |||
551 | static int __init ocfs2_stack_glue_init(void) | ||
552 | { | ||
553 | strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB); | ||
554 | |||
555 | return ocfs2_sysfs_init(); | ||
556 | } | ||
557 | |||
558 | static void __exit ocfs2_stack_glue_exit(void) | ||
559 | { | ||
560 | lproto = NULL; | ||
561 | ocfs2_sysfs_exit(); | ||
562 | } | ||
563 | |||
564 | MODULE_AUTHOR("Oracle"); | ||
565 | MODULE_DESCRIPTION("ocfs2 cluter stack glue layer"); | ||
566 | MODULE_LICENSE("GPL"); | ||
567 | module_init(ocfs2_stack_glue_init); | ||
568 | module_exit(ocfs2_stack_glue_exit); | ||
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h new file mode 100644 index 000000000000..005e4f170e0f --- /dev/null +++ b/fs/ocfs2/stackglue.h | |||
@@ -0,0 +1,261 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * stackglue.h | ||
5 | * | ||
6 | * Glue to the underlying cluster stack. | ||
7 | * | ||
8 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License as published by the Free Software Foundation, version 2. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
17 | * General Public License for more details. | ||
18 | */ | ||
19 | |||
20 | |||
21 | #ifndef STACKGLUE_H | ||
22 | #define STACKGLUE_H | ||
23 | |||
24 | #include <linux/types.h> | ||
25 | #include <linux/list.h> | ||
26 | #include <linux/dlmconstants.h> | ||
27 | |||
28 | #include "dlm/dlmapi.h" | ||
29 | #include <linux/dlm.h> | ||
30 | |||
31 | /* | ||
32 | * dlmconstants.h does not have a LOCAL flag. We hope to remove it | ||
33 | * some day, but right now we need it. Let's fake it. This value is larger | ||
34 | * than any flag in dlmconstants.h. | ||
35 | */ | ||
36 | #define DLM_LKF_LOCAL 0x00100000 | ||
37 | |||
38 | /* | ||
39 | * This shadows DLM_LOCKSPACE_LEN in fs/dlm/dlm_internal.h. That probably | ||
40 | * wants to be in a public header. | ||
41 | */ | ||
42 | #define GROUP_NAME_MAX 64 | ||
43 | |||
44 | |||
45 | /* | ||
46 | * ocfs2_protocol_version changes when ocfs2 does something different in | ||
47 | * its inter-node behavior. See dlmglue.c for more information. | ||
48 | */ | ||
49 | struct ocfs2_protocol_version { | ||
50 | u8 pv_major; | ||
51 | u8 pv_minor; | ||
52 | }; | ||
53 | |||
54 | /* | ||
55 | * The ocfs2_locking_protocol defines the handlers called on ocfs2's behalf. | ||
56 | */ | ||
57 | struct ocfs2_locking_protocol { | ||
58 | struct ocfs2_protocol_version lp_max_version; | ||
59 | void (*lp_lock_ast)(void *astarg); | ||
60 | void (*lp_blocking_ast)(void *astarg, int level); | ||
61 | void (*lp_unlock_ast)(void *astarg, int error); | ||
62 | }; | ||
63 | |||
64 | |||
65 | /* | ||
66 | * The dlm_lockstatus struct includes lvb space, but the dlm_lksb struct only | ||
67 | * has a pointer to separately allocated lvb space. This struct exists only to | ||
68 | * include in the lksb union to make space for a combined dlm_lksb and lvb. | ||
69 | */ | ||
70 | struct fsdlm_lksb_plus_lvb { | ||
71 | struct dlm_lksb lksb; | ||
72 | char lvb[DLM_LVB_LEN]; | ||
73 | }; | ||
74 | |||
75 | /* | ||
76 | * A union of all lock status structures. We define it here so that the | ||
77 | * size of the union is known. Lock status structures are embedded in | ||
78 | * ocfs2 inodes. | ||
79 | */ | ||
80 | union ocfs2_dlm_lksb { | ||
81 | struct dlm_lockstatus lksb_o2dlm; | ||
82 | struct dlm_lksb lksb_fsdlm; | ||
83 | struct fsdlm_lksb_plus_lvb padding; | ||
84 | }; | ||
85 | |||
86 | /* | ||
87 | * A cluster connection. Mostly opaque to ocfs2, the connection holds | ||
88 | * state for the underlying stack. ocfs2 does use cc_version to determine | ||
89 | * locking compatibility. | ||
90 | */ | ||
91 | struct ocfs2_cluster_connection { | ||
92 | char cc_name[GROUP_NAME_MAX]; | ||
93 | int cc_namelen; | ||
94 | struct ocfs2_protocol_version cc_version; | ||
95 | void (*cc_recovery_handler)(int node_num, void *recovery_data); | ||
96 | void *cc_recovery_data; | ||
97 | void *cc_lockspace; | ||
98 | void *cc_private; | ||
99 | }; | ||
100 | |||
101 | /* | ||
102 | * Each cluster stack implements the stack operations structure. Not used | ||
103 | * in the ocfs2 code, the stackglue code translates generic cluster calls | ||
104 | * into stack operations. | ||
105 | */ | ||
106 | struct ocfs2_stack_operations { | ||
107 | /* | ||
108 | * The fs code calls ocfs2_cluster_connect() to attach a new | ||
109 | * filesystem to the cluster stack. The ->connect() op is passed | ||
110 | * an ocfs2_cluster_connection with the name and recovery field | ||
111 | * filled in. | ||
112 | * | ||
113 | * The stack must set up any notification mechanisms and create | ||
114 | * the filesystem lockspace in the DLM. The lockspace should be | ||
115 | * stored on cc_lockspace. Any other information can be stored on | ||
116 | * cc_private. | ||
117 | * | ||
118 | * ->connect() must not return until it is guaranteed that | ||
119 | * | ||
120 | * - Node down notifications for the filesystem will be recieved | ||
121 | * and passed to conn->cc_recovery_handler(). | ||
122 | * - Locking requests for the filesystem will be processed. | ||
123 | */ | ||
124 | int (*connect)(struct ocfs2_cluster_connection *conn); | ||
125 | |||
126 | /* | ||
127 | * The fs code calls ocfs2_cluster_disconnect() when a filesystem | ||
128 | * no longer needs cluster services. All DLM locks have been | ||
129 | * dropped, and recovery notification is being ignored by the | ||
130 | * fs code. The stack must disengage from the DLM and discontinue | ||
131 | * recovery notification. | ||
132 | * | ||
133 | * Once ->disconnect() has returned, the connection structure will | ||
134 | * be freed. Thus, a stack must not return from ->disconnect() | ||
135 | * until it will no longer reference the conn pointer. | ||
136 | * | ||
137 | * If hangup_pending is zero, ocfs2_cluster_disconnect() will also | ||
138 | * be dropping the reference on the module. | ||
139 | */ | ||
140 | int (*disconnect)(struct ocfs2_cluster_connection *conn, | ||
141 | int hangup_pending); | ||
142 | |||
143 | /* | ||
144 | * ocfs2_cluster_hangup() exists for compatibility with older | ||
145 | * ocfs2 tools. Only the classic stack really needs it. As such | ||
146 | * ->hangup() is not required of all stacks. See the comment by | ||
147 | * ocfs2_cluster_hangup() for more details. | ||
148 | * | ||
149 | * Note that ocfs2_cluster_hangup() can only be called if | ||
150 | * hangup_pending was passed to ocfs2_cluster_disconnect(). | ||
151 | */ | ||
152 | void (*hangup)(const char *group, int grouplen); | ||
153 | |||
154 | /* | ||
155 | * ->this_node() returns the cluster's unique identifier for the | ||
156 | * local node. | ||
157 | */ | ||
158 | int (*this_node)(unsigned int *node); | ||
159 | |||
160 | /* | ||
161 | * Call the underlying dlm lock function. The ->dlm_lock() | ||
162 | * callback should convert the flags and mode as appropriate. | ||
163 | * | ||
164 | * ast and bast functions are not part of the call because the | ||
165 | * stack will likely want to wrap ast and bast calls before passing | ||
166 | * them to stack->sp_proto. | ||
167 | */ | ||
168 | int (*dlm_lock)(struct ocfs2_cluster_connection *conn, | ||
169 | int mode, | ||
170 | union ocfs2_dlm_lksb *lksb, | ||
171 | u32 flags, | ||
172 | void *name, | ||
173 | unsigned int namelen, | ||
174 | void *astarg); | ||
175 | |||
176 | /* | ||
177 | * Call the underlying dlm unlock function. The ->dlm_unlock() | ||
178 | * function should convert the flags as appropriate. | ||
179 | * | ||
180 | * The unlock ast is not passed, as the stack will want to wrap | ||
181 | * it before calling stack->sp_proto->lp_unlock_ast(). | ||
182 | */ | ||
183 | int (*dlm_unlock)(struct ocfs2_cluster_connection *conn, | ||
184 | union ocfs2_dlm_lksb *lksb, | ||
185 | u32 flags, | ||
186 | void *astarg); | ||
187 | |||
188 | /* | ||
189 | * Return the status of the current lock status block. The fs | ||
190 | * code should never dereference the union. The ->lock_status() | ||
191 | * callback pulls out the stack-specific lksb, converts the status | ||
192 | * to a proper errno, and returns it. | ||
193 | */ | ||
194 | int (*lock_status)(union ocfs2_dlm_lksb *lksb); | ||
195 | |||
196 | /* | ||
197 | * Pull the lvb pointer off of the stack-specific lksb. | ||
198 | */ | ||
199 | void *(*lock_lvb)(union ocfs2_dlm_lksb *lksb); | ||
200 | |||
201 | /* | ||
202 | * This is an optoinal debugging hook. If provided, the | ||
203 | * stack can dump debugging information about this lock. | ||
204 | */ | ||
205 | void (*dump_lksb)(union ocfs2_dlm_lksb *lksb); | ||
206 | }; | ||
207 | |||
208 | /* | ||
209 | * Each stack plugin must describe itself by registering a | ||
210 | * ocfs2_stack_plugin structure. This is only seen by stackglue and the | ||
211 | * stack driver. | ||
212 | */ | ||
213 | struct ocfs2_stack_plugin { | ||
214 | char *sp_name; | ||
215 | struct ocfs2_stack_operations *sp_ops; | ||
216 | struct module *sp_owner; | ||
217 | |||
218 | /* These are managed by the stackglue code. */ | ||
219 | struct list_head sp_list; | ||
220 | unsigned int sp_count; | ||
221 | struct ocfs2_locking_protocol *sp_proto; | ||
222 | }; | ||
223 | |||
224 | |||
225 | /* Used by the filesystem */ | ||
226 | int ocfs2_cluster_connect(const char *stack_name, | ||
227 | const char *group, | ||
228 | int grouplen, | ||
229 | void (*recovery_handler)(int node_num, | ||
230 | void *recovery_data), | ||
231 | void *recovery_data, | ||
232 | struct ocfs2_cluster_connection **conn); | ||
233 | int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn, | ||
234 | int hangup_pending); | ||
235 | void ocfs2_cluster_hangup(const char *group, int grouplen); | ||
236 | int ocfs2_cluster_this_node(unsigned int *node); | ||
237 | |||
238 | struct ocfs2_lock_res; | ||
239 | int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn, | ||
240 | int mode, | ||
241 | union ocfs2_dlm_lksb *lksb, | ||
242 | u32 flags, | ||
243 | void *name, | ||
244 | unsigned int namelen, | ||
245 | struct ocfs2_lock_res *astarg); | ||
246 | int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn, | ||
247 | union ocfs2_dlm_lksb *lksb, | ||
248 | u32 flags, | ||
249 | struct ocfs2_lock_res *astarg); | ||
250 | |||
251 | int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb); | ||
252 | void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb); | ||
253 | void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb); | ||
254 | |||
255 | void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto); | ||
256 | |||
257 | |||
258 | /* Used by stack plugins */ | ||
259 | int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin); | ||
260 | void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin); | ||
261 | #endif /* STACKGLUE_H */ | ||
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 72c198a004df..d2d278fb9819 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
@@ -46,6 +46,11 @@ | |||
46 | 46 | ||
47 | #include "buffer_head_io.h" | 47 | #include "buffer_head_io.h" |
48 | 48 | ||
49 | #define NOT_ALLOC_NEW_GROUP 0 | ||
50 | #define ALLOC_NEW_GROUP 1 | ||
51 | |||
52 | #define OCFS2_MAX_INODES_TO_STEAL 1024 | ||
53 | |||
49 | static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); | 54 | static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); |
50 | static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); | 55 | static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); |
51 | static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); | 56 | static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); |
@@ -106,7 +111,7 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode, | |||
106 | u64 *bg_blkno, | 111 | u64 *bg_blkno, |
107 | u16 *bg_bit_off); | 112 | u16 *bg_bit_off); |
108 | 113 | ||
109 | void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) | 114 | static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) |
110 | { | 115 | { |
111 | struct inode *inode = ac->ac_inode; | 116 | struct inode *inode = ac->ac_inode; |
112 | 117 | ||
@@ -117,9 +122,17 @@ void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) | |||
117 | mutex_unlock(&inode->i_mutex); | 122 | mutex_unlock(&inode->i_mutex); |
118 | 123 | ||
119 | iput(inode); | 124 | iput(inode); |
125 | ac->ac_inode = NULL; | ||
120 | } | 126 | } |
121 | if (ac->ac_bh) | 127 | if (ac->ac_bh) { |
122 | brelse(ac->ac_bh); | 128 | brelse(ac->ac_bh); |
129 | ac->ac_bh = NULL; | ||
130 | } | ||
131 | } | ||
132 | |||
133 | void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) | ||
134 | { | ||
135 | ocfs2_free_ac_resource(ac); | ||
123 | kfree(ac); | 136 | kfree(ac); |
124 | } | 137 | } |
125 | 138 | ||
@@ -391,7 +404,8 @@ bail: | |||
391 | static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, | 404 | static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, |
392 | struct ocfs2_alloc_context *ac, | 405 | struct ocfs2_alloc_context *ac, |
393 | int type, | 406 | int type, |
394 | u32 slot) | 407 | u32 slot, |
408 | int alloc_new_group) | ||
395 | { | 409 | { |
396 | int status; | 410 | int status; |
397 | u32 bits_wanted = ac->ac_bits_wanted; | 411 | u32 bits_wanted = ac->ac_bits_wanted; |
@@ -420,6 +434,7 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, | |||
420 | } | 434 | } |
421 | 435 | ||
422 | ac->ac_inode = alloc_inode; | 436 | ac->ac_inode = alloc_inode; |
437 | ac->ac_alloc_slot = slot; | ||
423 | 438 | ||
424 | fe = (struct ocfs2_dinode *) bh->b_data; | 439 | fe = (struct ocfs2_dinode *) bh->b_data; |
425 | if (!OCFS2_IS_VALID_DINODE(fe)) { | 440 | if (!OCFS2_IS_VALID_DINODE(fe)) { |
@@ -446,6 +461,14 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, | |||
446 | goto bail; | 461 | goto bail; |
447 | } | 462 | } |
448 | 463 | ||
464 | if (alloc_new_group != ALLOC_NEW_GROUP) { | ||
465 | mlog(0, "Alloc File %u Full: wanted=%u, free_bits=%u, " | ||
466 | "and we don't alloc a new group for it.\n", | ||
467 | slot, bits_wanted, free_bits); | ||
468 | status = -ENOSPC; | ||
469 | goto bail; | ||
470 | } | ||
471 | |||
449 | status = ocfs2_block_group_alloc(osb, alloc_inode, bh); | 472 | status = ocfs2_block_group_alloc(osb, alloc_inode, bh); |
450 | if (status < 0) { | 473 | if (status < 0) { |
451 | if (status != -ENOSPC) | 474 | if (status != -ENOSPC) |
@@ -490,7 +513,8 @@ int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, | |||
490 | (*ac)->ac_group_search = ocfs2_block_group_search; | 513 | (*ac)->ac_group_search = ocfs2_block_group_search; |
491 | 514 | ||
492 | status = ocfs2_reserve_suballoc_bits(osb, (*ac), | 515 | status = ocfs2_reserve_suballoc_bits(osb, (*ac), |
493 | EXTENT_ALLOC_SYSTEM_INODE, slot); | 516 | EXTENT_ALLOC_SYSTEM_INODE, |
517 | slot, ALLOC_NEW_GROUP); | ||
494 | if (status < 0) { | 518 | if (status < 0) { |
495 | if (status != -ENOSPC) | 519 | if (status != -ENOSPC) |
496 | mlog_errno(status); | 520 | mlog_errno(status); |
@@ -508,10 +532,42 @@ bail: | |||
508 | return status; | 532 | return status; |
509 | } | 533 | } |
510 | 534 | ||
535 | static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb, | ||
536 | struct ocfs2_alloc_context *ac) | ||
537 | { | ||
538 | int i, status = -ENOSPC; | ||
539 | s16 slot = ocfs2_get_inode_steal_slot(osb); | ||
540 | |||
541 | /* Start to steal inodes from the first slot after ours. */ | ||
542 | if (slot == OCFS2_INVALID_SLOT) | ||
543 | slot = osb->slot_num + 1; | ||
544 | |||
545 | for (i = 0; i < osb->max_slots; i++, slot++) { | ||
546 | if (slot == osb->max_slots) | ||
547 | slot = 0; | ||
548 | |||
549 | if (slot == osb->slot_num) | ||
550 | continue; | ||
551 | |||
552 | status = ocfs2_reserve_suballoc_bits(osb, ac, | ||
553 | INODE_ALLOC_SYSTEM_INODE, | ||
554 | slot, NOT_ALLOC_NEW_GROUP); | ||
555 | if (status >= 0) { | ||
556 | ocfs2_set_inode_steal_slot(osb, slot); | ||
557 | break; | ||
558 | } | ||
559 | |||
560 | ocfs2_free_ac_resource(ac); | ||
561 | } | ||
562 | |||
563 | return status; | ||
564 | } | ||
565 | |||
511 | int ocfs2_reserve_new_inode(struct ocfs2_super *osb, | 566 | int ocfs2_reserve_new_inode(struct ocfs2_super *osb, |
512 | struct ocfs2_alloc_context **ac) | 567 | struct ocfs2_alloc_context **ac) |
513 | { | 568 | { |
514 | int status; | 569 | int status; |
570 | s16 slot = ocfs2_get_inode_steal_slot(osb); | ||
515 | 571 | ||
516 | *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); | 572 | *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); |
517 | if (!(*ac)) { | 573 | if (!(*ac)) { |
@@ -525,9 +581,43 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb, | |||
525 | 581 | ||
526 | (*ac)->ac_group_search = ocfs2_block_group_search; | 582 | (*ac)->ac_group_search = ocfs2_block_group_search; |
527 | 583 | ||
584 | /* | ||
585 | * slot is set when we successfully steal inode from other nodes. | ||
586 | * It is reset in 3 places: | ||
587 | * 1. when we flush the truncate log | ||
588 | * 2. when we complete local alloc recovery. | ||
589 | * 3. when we successfully allocate from our own slot. | ||
590 | * After it is set, we will go on stealing inodes until we find the | ||
591 | * need to check our slots to see whether there is some space for us. | ||
592 | */ | ||
593 | if (slot != OCFS2_INVALID_SLOT && | ||
594 | atomic_read(&osb->s_num_inodes_stolen) < OCFS2_MAX_INODES_TO_STEAL) | ||
595 | goto inode_steal; | ||
596 | |||
597 | atomic_set(&osb->s_num_inodes_stolen, 0); | ||
528 | status = ocfs2_reserve_suballoc_bits(osb, *ac, | 598 | status = ocfs2_reserve_suballoc_bits(osb, *ac, |
529 | INODE_ALLOC_SYSTEM_INODE, | 599 | INODE_ALLOC_SYSTEM_INODE, |
530 | osb->slot_num); | 600 | osb->slot_num, ALLOC_NEW_GROUP); |
601 | if (status >= 0) { | ||
602 | status = 0; | ||
603 | |||
604 | /* | ||
605 | * Some inodes must be freed by us, so try to allocate | ||
606 | * from our own next time. | ||
607 | */ | ||
608 | if (slot != OCFS2_INVALID_SLOT) | ||
609 | ocfs2_init_inode_steal_slot(osb); | ||
610 | goto bail; | ||
611 | } else if (status < 0 && status != -ENOSPC) { | ||
612 | mlog_errno(status); | ||
613 | goto bail; | ||
614 | } | ||
615 | |||
616 | ocfs2_free_ac_resource(*ac); | ||
617 | |||
618 | inode_steal: | ||
619 | status = ocfs2_steal_inode_from_other_nodes(osb, *ac); | ||
620 | atomic_inc(&osb->s_num_inodes_stolen); | ||
531 | if (status < 0) { | 621 | if (status < 0) { |
532 | if (status != -ENOSPC) | 622 | if (status != -ENOSPC) |
533 | mlog_errno(status); | 623 | mlog_errno(status); |
@@ -557,7 +647,8 @@ int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb, | |||
557 | 647 | ||
558 | status = ocfs2_reserve_suballoc_bits(osb, ac, | 648 | status = ocfs2_reserve_suballoc_bits(osb, ac, |
559 | GLOBAL_BITMAP_SYSTEM_INODE, | 649 | GLOBAL_BITMAP_SYSTEM_INODE, |
560 | OCFS2_INVALID_SLOT); | 650 | OCFS2_INVALID_SLOT, |
651 | ALLOC_NEW_GROUP); | ||
561 | if (status < 0 && status != -ENOSPC) { | 652 | if (status < 0 && status != -ENOSPC) { |
562 | mlog_errno(status); | 653 | mlog_errno(status); |
563 | goto bail; | 654 | goto bail; |
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index 8799033bb459..544c600662bd 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h | |||
@@ -36,6 +36,7 @@ typedef int (group_search_t)(struct inode *, | |||
36 | struct ocfs2_alloc_context { | 36 | struct ocfs2_alloc_context { |
37 | struct inode *ac_inode; /* which bitmap are we allocating from? */ | 37 | struct inode *ac_inode; /* which bitmap are we allocating from? */ |
38 | struct buffer_head *ac_bh; /* file entry bh */ | 38 | struct buffer_head *ac_bh; /* file entry bh */ |
39 | u32 ac_alloc_slot; /* which slot are we allocating from? */ | ||
39 | u32 ac_bits_wanted; | 40 | u32 ac_bits_wanted; |
40 | u32 ac_bits_given; | 41 | u32 ac_bits_given; |
41 | #define OCFS2_AC_USE_LOCAL 1 | 42 | #define OCFS2_AC_USE_LOCAL 1 |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index bec75aff3d9f..df63ba20ae90 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -40,8 +40,7 @@ | |||
40 | #include <linux/crc32.h> | 40 | #include <linux/crc32.h> |
41 | #include <linux/debugfs.h> | 41 | #include <linux/debugfs.h> |
42 | #include <linux/mount.h> | 42 | #include <linux/mount.h> |
43 | 43 | #include <linux/seq_file.h> | |
44 | #include <cluster/nodemanager.h> | ||
45 | 44 | ||
46 | #define MLOG_MASK_PREFIX ML_SUPER | 45 | #define MLOG_MASK_PREFIX ML_SUPER |
47 | #include <cluster/masklog.h> | 46 | #include <cluster/masklog.h> |
@@ -88,6 +87,7 @@ struct mount_options | |||
88 | unsigned int atime_quantum; | 87 | unsigned int atime_quantum; |
89 | signed short slot; | 88 | signed short slot; |
90 | unsigned int localalloc_opt; | 89 | unsigned int localalloc_opt; |
90 | char cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; | ||
91 | }; | 91 | }; |
92 | 92 | ||
93 | static int ocfs2_parse_options(struct super_block *sb, char *options, | 93 | static int ocfs2_parse_options(struct super_block *sb, char *options, |
@@ -109,7 +109,6 @@ static int ocfs2_sync_fs(struct super_block *sb, int wait); | |||
109 | static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb); | 109 | static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb); |
110 | static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb); | 110 | static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb); |
111 | static void ocfs2_release_system_inodes(struct ocfs2_super *osb); | 111 | static void ocfs2_release_system_inodes(struct ocfs2_super *osb); |
112 | static int ocfs2_fill_local_node_info(struct ocfs2_super *osb); | ||
113 | static int ocfs2_check_volume(struct ocfs2_super *osb); | 112 | static int ocfs2_check_volume(struct ocfs2_super *osb); |
114 | static int ocfs2_verify_volume(struct ocfs2_dinode *di, | 113 | static int ocfs2_verify_volume(struct ocfs2_dinode *di, |
115 | struct buffer_head *bh, | 114 | struct buffer_head *bh, |
@@ -154,6 +153,7 @@ enum { | |||
154 | Opt_commit, | 153 | Opt_commit, |
155 | Opt_localalloc, | 154 | Opt_localalloc, |
156 | Opt_localflocks, | 155 | Opt_localflocks, |
156 | Opt_stack, | ||
157 | Opt_err, | 157 | Opt_err, |
158 | }; | 158 | }; |
159 | 159 | ||
@@ -172,6 +172,7 @@ static match_table_t tokens = { | |||
172 | {Opt_commit, "commit=%u"}, | 172 | {Opt_commit, "commit=%u"}, |
173 | {Opt_localalloc, "localalloc=%d"}, | 173 | {Opt_localalloc, "localalloc=%d"}, |
174 | {Opt_localflocks, "localflocks"}, | 174 | {Opt_localflocks, "localflocks"}, |
175 | {Opt_stack, "cluster_stack=%s"}, | ||
175 | {Opt_err, NULL} | 176 | {Opt_err, NULL} |
176 | }; | 177 | }; |
177 | 178 | ||
@@ -551,8 +552,17 @@ static int ocfs2_verify_heartbeat(struct ocfs2_super *osb) | |||
551 | } | 552 | } |
552 | } | 553 | } |
553 | 554 | ||
555 | if (ocfs2_userspace_stack(osb)) { | ||
556 | if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) { | ||
557 | mlog(ML_ERROR, "Userspace stack expected, but " | ||
558 | "o2cb heartbeat arguments passed to mount\n"); | ||
559 | return -EINVAL; | ||
560 | } | ||
561 | } | ||
562 | |||
554 | if (!(osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL)) { | 563 | if (!(osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL)) { |
555 | if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb)) { | 564 | if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb) && |
565 | !ocfs2_userspace_stack(osb)) { | ||
556 | mlog(ML_ERROR, "Heartbeat has to be started to mount " | 566 | mlog(ML_ERROR, "Heartbeat has to be started to mount " |
557 | "a read-write clustered device.\n"); | 567 | "a read-write clustered device.\n"); |
558 | return -EINVAL; | 568 | return -EINVAL; |
@@ -562,6 +572,35 @@ static int ocfs2_verify_heartbeat(struct ocfs2_super *osb) | |||
562 | return 0; | 572 | return 0; |
563 | } | 573 | } |
564 | 574 | ||
575 | /* | ||
576 | * If we're using a userspace stack, mount should have passed | ||
577 | * a name that matches the disk. If not, mount should not | ||
578 | * have passed a stack. | ||
579 | */ | ||
580 | static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb, | ||
581 | struct mount_options *mopt) | ||
582 | { | ||
583 | if (!ocfs2_userspace_stack(osb) && mopt->cluster_stack[0]) { | ||
584 | mlog(ML_ERROR, | ||
585 | "cluster stack passed to mount, but this filesystem " | ||
586 | "does not support it\n"); | ||
587 | return -EINVAL; | ||
588 | } | ||
589 | |||
590 | if (ocfs2_userspace_stack(osb) && | ||
591 | strncmp(osb->osb_cluster_stack, mopt->cluster_stack, | ||
592 | OCFS2_STACK_LABEL_LEN)) { | ||
593 | mlog(ML_ERROR, | ||
594 | "cluster stack passed to mount (\"%s\") does not " | ||
595 | "match the filesystem (\"%s\")\n", | ||
596 | mopt->cluster_stack, | ||
597 | osb->osb_cluster_stack); | ||
598 | return -EINVAL; | ||
599 | } | ||
600 | |||
601 | return 0; | ||
602 | } | ||
603 | |||
565 | static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | 604 | static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) |
566 | { | 605 | { |
567 | struct dentry *root; | 606 | struct dentry *root; |
@@ -579,15 +618,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
579 | goto read_super_error; | 618 | goto read_super_error; |
580 | } | 619 | } |
581 | 620 | ||
582 | /* for now we only have one cluster/node, make sure we see it | ||
583 | * in the heartbeat universe */ | ||
584 | if (parsed_options.mount_opt & OCFS2_MOUNT_HB_LOCAL) { | ||
585 | if (!o2hb_check_local_node_heartbeating()) { | ||
586 | status = -EINVAL; | ||
587 | goto read_super_error; | ||
588 | } | ||
589 | } | ||
590 | |||
591 | /* probe for superblock */ | 621 | /* probe for superblock */ |
592 | status = ocfs2_sb_probe(sb, &bh, §or_size); | 622 | status = ocfs2_sb_probe(sb, &bh, §or_size); |
593 | if (status < 0) { | 623 | if (status < 0) { |
@@ -609,6 +639,10 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
609 | osb->osb_commit_interval = parsed_options.commit_interval; | 639 | osb->osb_commit_interval = parsed_options.commit_interval; |
610 | osb->local_alloc_size = parsed_options.localalloc_opt; | 640 | osb->local_alloc_size = parsed_options.localalloc_opt; |
611 | 641 | ||
642 | status = ocfs2_verify_userspace_stack(osb, &parsed_options); | ||
643 | if (status) | ||
644 | goto read_super_error; | ||
645 | |||
612 | sb->s_magic = OCFS2_SUPER_MAGIC; | 646 | sb->s_magic = OCFS2_SUPER_MAGIC; |
613 | 647 | ||
614 | /* Hard readonly mode only if: bdev_read_only, MS_RDONLY, | 648 | /* Hard readonly mode only if: bdev_read_only, MS_RDONLY, |
@@ -694,7 +728,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
694 | if (ocfs2_mount_local(osb)) | 728 | if (ocfs2_mount_local(osb)) |
695 | snprintf(nodestr, sizeof(nodestr), "local"); | 729 | snprintf(nodestr, sizeof(nodestr), "local"); |
696 | else | 730 | else |
697 | snprintf(nodestr, sizeof(nodestr), "%d", osb->node_num); | 731 | snprintf(nodestr, sizeof(nodestr), "%u", osb->node_num); |
698 | 732 | ||
699 | printk(KERN_INFO "ocfs2: Mounting device (%s) on (node %s, slot %d) " | 733 | printk(KERN_INFO "ocfs2: Mounting device (%s) on (node %s, slot %d) " |
700 | "with %s data mode.\n", | 734 | "with %s data mode.\n", |
@@ -763,6 +797,7 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
763 | mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; | 797 | mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; |
764 | mopt->slot = OCFS2_INVALID_SLOT; | 798 | mopt->slot = OCFS2_INVALID_SLOT; |
765 | mopt->localalloc_opt = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE; | 799 | mopt->localalloc_opt = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE; |
800 | mopt->cluster_stack[0] = '\0'; | ||
766 | 801 | ||
767 | if (!options) { | 802 | if (!options) { |
768 | status = 1; | 803 | status = 1; |
@@ -864,6 +899,25 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
864 | if (!is_remount) | 899 | if (!is_remount) |
865 | mopt->mount_opt |= OCFS2_MOUNT_LOCALFLOCKS; | 900 | mopt->mount_opt |= OCFS2_MOUNT_LOCALFLOCKS; |
866 | break; | 901 | break; |
902 | case Opt_stack: | ||
903 | /* Check both that the option we were passed | ||
904 | * is of the right length and that it is a proper | ||
905 | * string of the right length. | ||
906 | */ | ||
907 | if (((args[0].to - args[0].from) != | ||
908 | OCFS2_STACK_LABEL_LEN) || | ||
909 | (strnlen(args[0].from, | ||
910 | OCFS2_STACK_LABEL_LEN) != | ||
911 | OCFS2_STACK_LABEL_LEN)) { | ||
912 | mlog(ML_ERROR, | ||
913 | "Invalid cluster_stack option\n"); | ||
914 | status = 0; | ||
915 | goto bail; | ||
916 | } | ||
917 | memcpy(mopt->cluster_stack, args[0].from, | ||
918 | OCFS2_STACK_LABEL_LEN); | ||
919 | mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0'; | ||
920 | break; | ||
867 | default: | 921 | default: |
868 | mlog(ML_ERROR, | 922 | mlog(ML_ERROR, |
869 | "Unrecognized mount option \"%s\" " | 923 | "Unrecognized mount option \"%s\" " |
@@ -922,6 +976,10 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
922 | if (opts & OCFS2_MOUNT_LOCALFLOCKS) | 976 | if (opts & OCFS2_MOUNT_LOCALFLOCKS) |
923 | seq_printf(s, ",localflocks,"); | 977 | seq_printf(s, ",localflocks,"); |
924 | 978 | ||
979 | if (osb->osb_cluster_stack[0]) | ||
980 | seq_printf(s, ",cluster_stack=%.*s", OCFS2_STACK_LABEL_LEN, | ||
981 | osb->osb_cluster_stack); | ||
982 | |||
925 | return 0; | 983 | return 0; |
926 | } | 984 | } |
927 | 985 | ||
@@ -957,6 +1015,8 @@ static int __init ocfs2_init(void) | |||
957 | mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); | 1015 | mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); |
958 | } | 1016 | } |
959 | 1017 | ||
1018 | ocfs2_set_locking_protocol(); | ||
1019 | |||
960 | leave: | 1020 | leave: |
961 | if (status < 0) { | 1021 | if (status < 0) { |
962 | ocfs2_free_mem_caches(); | 1022 | ocfs2_free_mem_caches(); |
@@ -1132,31 +1192,6 @@ static int ocfs2_get_sector(struct super_block *sb, | |||
1132 | return 0; | 1192 | return 0; |
1133 | } | 1193 | } |
1134 | 1194 | ||
1135 | /* ocfs2 1.0 only allows one cluster and node identity per kernel image. */ | ||
1136 | static int ocfs2_fill_local_node_info(struct ocfs2_super *osb) | ||
1137 | { | ||
1138 | int status; | ||
1139 | |||
1140 | /* XXX hold a ref on the node while mounte? easy enough, if | ||
1141 | * desirable. */ | ||
1142 | if (ocfs2_mount_local(osb)) | ||
1143 | osb->node_num = 0; | ||
1144 | else | ||
1145 | osb->node_num = o2nm_this_node(); | ||
1146 | |||
1147 | if (osb->node_num == O2NM_MAX_NODES) { | ||
1148 | mlog(ML_ERROR, "could not find this host's node number\n"); | ||
1149 | status = -ENOENT; | ||
1150 | goto bail; | ||
1151 | } | ||
1152 | |||
1153 | mlog(0, "I am node %d\n", osb->node_num); | ||
1154 | |||
1155 | status = 0; | ||
1156 | bail: | ||
1157 | return status; | ||
1158 | } | ||
1159 | |||
1160 | static int ocfs2_mount_volume(struct super_block *sb) | 1195 | static int ocfs2_mount_volume(struct super_block *sb) |
1161 | { | 1196 | { |
1162 | int status = 0; | 1197 | int status = 0; |
@@ -1168,12 +1203,6 @@ static int ocfs2_mount_volume(struct super_block *sb) | |||
1168 | if (ocfs2_is_hard_readonly(osb)) | 1203 | if (ocfs2_is_hard_readonly(osb)) |
1169 | goto leave; | 1204 | goto leave; |
1170 | 1205 | ||
1171 | status = ocfs2_fill_local_node_info(osb); | ||
1172 | if (status < 0) { | ||
1173 | mlog_errno(status); | ||
1174 | goto leave; | ||
1175 | } | ||
1176 | |||
1177 | status = ocfs2_dlm_init(osb); | 1206 | status = ocfs2_dlm_init(osb); |
1178 | if (status < 0) { | 1207 | if (status < 0) { |
1179 | mlog_errno(status); | 1208 | mlog_errno(status); |
@@ -1224,18 +1253,9 @@ leave: | |||
1224 | return status; | 1253 | return status; |
1225 | } | 1254 | } |
1226 | 1255 | ||
1227 | /* we can't grab the goofy sem lock from inside wait_event, so we use | ||
1228 | * memory barriers to make sure that we'll see the null task before | ||
1229 | * being woken up */ | ||
1230 | static int ocfs2_recovery_thread_running(struct ocfs2_super *osb) | ||
1231 | { | ||
1232 | mb(); | ||
1233 | return osb->recovery_thread_task != NULL; | ||
1234 | } | ||
1235 | |||
1236 | static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) | 1256 | static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) |
1237 | { | 1257 | { |
1238 | int tmp; | 1258 | int tmp, hangup_needed = 0; |
1239 | struct ocfs2_super *osb = NULL; | 1259 | struct ocfs2_super *osb = NULL; |
1240 | char nodestr[8]; | 1260 | char nodestr[8]; |
1241 | 1261 | ||
@@ -1249,25 +1269,16 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) | |||
1249 | 1269 | ||
1250 | ocfs2_truncate_log_shutdown(osb); | 1270 | ocfs2_truncate_log_shutdown(osb); |
1251 | 1271 | ||
1252 | /* disable any new recovery threads and wait for any currently | 1272 | /* This will disable recovery and flush any recovery work. */ |
1253 | * running ones to exit. Do this before setting the vol_state. */ | 1273 | ocfs2_recovery_exit(osb); |
1254 | mutex_lock(&osb->recovery_lock); | ||
1255 | osb->disable_recovery = 1; | ||
1256 | mutex_unlock(&osb->recovery_lock); | ||
1257 | wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb)); | ||
1258 | |||
1259 | /* At this point, we know that no more recovery threads can be | ||
1260 | * launched, so wait for any recovery completion work to | ||
1261 | * complete. */ | ||
1262 | flush_workqueue(ocfs2_wq); | ||
1263 | 1274 | ||
1264 | ocfs2_journal_shutdown(osb); | 1275 | ocfs2_journal_shutdown(osb); |
1265 | 1276 | ||
1266 | ocfs2_sync_blockdev(sb); | 1277 | ocfs2_sync_blockdev(sb); |
1267 | 1278 | ||
1268 | /* No dlm means we've failed during mount, so skip all the | 1279 | /* No cluster connection means we've failed during mount, so skip |
1269 | * steps which depended on that to complete. */ | 1280 | * all the steps which depended on that to complete. */ |
1270 | if (osb->dlm) { | 1281 | if (osb->cconn) { |
1271 | tmp = ocfs2_super_lock(osb, 1); | 1282 | tmp = ocfs2_super_lock(osb, 1); |
1272 | if (tmp < 0) { | 1283 | if (tmp < 0) { |
1273 | mlog_errno(tmp); | 1284 | mlog_errno(tmp); |
@@ -1278,25 +1289,34 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) | |||
1278 | if (osb->slot_num != OCFS2_INVALID_SLOT) | 1289 | if (osb->slot_num != OCFS2_INVALID_SLOT) |
1279 | ocfs2_put_slot(osb); | 1290 | ocfs2_put_slot(osb); |
1280 | 1291 | ||
1281 | if (osb->dlm) | 1292 | if (osb->cconn) |
1282 | ocfs2_super_unlock(osb, 1); | 1293 | ocfs2_super_unlock(osb, 1); |
1283 | 1294 | ||
1284 | ocfs2_release_system_inodes(osb); | 1295 | ocfs2_release_system_inodes(osb); |
1285 | 1296 | ||
1286 | if (osb->dlm) | 1297 | /* |
1287 | ocfs2_dlm_shutdown(osb); | 1298 | * If we're dismounting due to mount error, mount.ocfs2 will clean |
1299 | * up heartbeat. If we're a local mount, there is no heartbeat. | ||
1300 | * If we failed before we got a uuid_str yet, we can't stop | ||
1301 | * heartbeat. Otherwise, do it. | ||
1302 | */ | ||
1303 | if (!mnt_err && !ocfs2_mount_local(osb) && osb->uuid_str) | ||
1304 | hangup_needed = 1; | ||
1305 | |||
1306 | if (osb->cconn) | ||
1307 | ocfs2_dlm_shutdown(osb, hangup_needed); | ||
1288 | 1308 | ||
1289 | debugfs_remove(osb->osb_debug_root); | 1309 | debugfs_remove(osb->osb_debug_root); |
1290 | 1310 | ||
1291 | if (!mnt_err) | 1311 | if (hangup_needed) |
1292 | ocfs2_stop_heartbeat(osb); | 1312 | ocfs2_cluster_hangup(osb->uuid_str, strlen(osb->uuid_str)); |
1293 | 1313 | ||
1294 | atomic_set(&osb->vol_state, VOLUME_DISMOUNTED); | 1314 | atomic_set(&osb->vol_state, VOLUME_DISMOUNTED); |
1295 | 1315 | ||
1296 | if (ocfs2_mount_local(osb)) | 1316 | if (ocfs2_mount_local(osb)) |
1297 | snprintf(nodestr, sizeof(nodestr), "local"); | 1317 | snprintf(nodestr, sizeof(nodestr), "local"); |
1298 | else | 1318 | else |
1299 | snprintf(nodestr, sizeof(nodestr), "%d", osb->node_num); | 1319 | snprintf(nodestr, sizeof(nodestr), "%u", osb->node_num); |
1300 | 1320 | ||
1301 | printk(KERN_INFO "ocfs2: Unmounting device (%s) on (node %s)\n", | 1321 | printk(KERN_INFO "ocfs2: Unmounting device (%s) on (node %s)\n", |
1302 | osb->dev_str, nodestr); | 1322 | osb->dev_str, nodestr); |
@@ -1355,7 +1375,6 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
1355 | sb->s_fs_info = osb; | 1375 | sb->s_fs_info = osb; |
1356 | sb->s_op = &ocfs2_sops; | 1376 | sb->s_op = &ocfs2_sops; |
1357 | sb->s_export_op = &ocfs2_export_ops; | 1377 | sb->s_export_op = &ocfs2_export_ops; |
1358 | osb->osb_locking_proto = ocfs2_locking_protocol; | ||
1359 | sb->s_time_gran = 1; | 1378 | sb->s_time_gran = 1; |
1360 | sb->s_flags |= MS_NOATIME; | 1379 | sb->s_flags |= MS_NOATIME; |
1361 | /* this is needed to support O_LARGEFILE */ | 1380 | /* this is needed to support O_LARGEFILE */ |
@@ -1368,7 +1387,6 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
1368 | osb->s_sectsize_bits = blksize_bits(sector_size); | 1387 | osb->s_sectsize_bits = blksize_bits(sector_size); |
1369 | BUG_ON(!osb->s_sectsize_bits); | 1388 | BUG_ON(!osb->s_sectsize_bits); |
1370 | 1389 | ||
1371 | init_waitqueue_head(&osb->recovery_event); | ||
1372 | spin_lock_init(&osb->dc_task_lock); | 1390 | spin_lock_init(&osb->dc_task_lock); |
1373 | init_waitqueue_head(&osb->dc_event); | 1391 | init_waitqueue_head(&osb->dc_event); |
1374 | osb->dc_work_sequence = 0; | 1392 | osb->dc_work_sequence = 0; |
@@ -1376,6 +1394,7 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
1376 | INIT_LIST_HEAD(&osb->blocked_lock_list); | 1394 | INIT_LIST_HEAD(&osb->blocked_lock_list); |
1377 | osb->blocked_lock_count = 0; | 1395 | osb->blocked_lock_count = 0; |
1378 | spin_lock_init(&osb->osb_lock); | 1396 | spin_lock_init(&osb->osb_lock); |
1397 | ocfs2_init_inode_steal_slot(osb); | ||
1379 | 1398 | ||
1380 | atomic_set(&osb->alloc_stats.moves, 0); | 1399 | atomic_set(&osb->alloc_stats.moves, 0); |
1381 | atomic_set(&osb->alloc_stats.local_data, 0); | 1400 | atomic_set(&osb->alloc_stats.local_data, 0); |
@@ -1388,24 +1407,23 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
1388 | snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", | 1407 | snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", |
1389 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); | 1408 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); |
1390 | 1409 | ||
1391 | mutex_init(&osb->recovery_lock); | 1410 | status = ocfs2_recovery_init(osb); |
1392 | 1411 | if (status) { | |
1393 | osb->disable_recovery = 0; | 1412 | mlog(ML_ERROR, "Unable to initialize recovery state\n"); |
1394 | osb->recovery_thread_task = NULL; | 1413 | mlog_errno(status); |
1414 | goto bail; | ||
1415 | } | ||
1395 | 1416 | ||
1396 | init_waitqueue_head(&osb->checkpoint_event); | 1417 | init_waitqueue_head(&osb->checkpoint_event); |
1397 | atomic_set(&osb->needs_checkpoint, 0); | 1418 | atomic_set(&osb->needs_checkpoint, 0); |
1398 | 1419 | ||
1399 | osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; | 1420 | osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; |
1400 | 1421 | ||
1401 | osb->node_num = O2NM_INVALID_NODE_NUM; | ||
1402 | osb->slot_num = OCFS2_INVALID_SLOT; | 1422 | osb->slot_num = OCFS2_INVALID_SLOT; |
1403 | 1423 | ||
1404 | osb->local_alloc_state = OCFS2_LA_UNUSED; | 1424 | osb->local_alloc_state = OCFS2_LA_UNUSED; |
1405 | osb->local_alloc_bh = NULL; | 1425 | osb->local_alloc_bh = NULL; |
1406 | 1426 | ||
1407 | ocfs2_setup_hb_callbacks(osb); | ||
1408 | |||
1409 | init_waitqueue_head(&osb->osb_mount_event); | 1427 | init_waitqueue_head(&osb->osb_mount_event); |
1410 | 1428 | ||
1411 | osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL); | 1429 | osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL); |
@@ -1455,6 +1473,25 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
1455 | goto bail; | 1473 | goto bail; |
1456 | } | 1474 | } |
1457 | 1475 | ||
1476 | if (ocfs2_userspace_stack(osb)) { | ||
1477 | memcpy(osb->osb_cluster_stack, | ||
1478 | OCFS2_RAW_SB(di)->s_cluster_info.ci_stack, | ||
1479 | OCFS2_STACK_LABEL_LEN); | ||
1480 | osb->osb_cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0'; | ||
1481 | if (strlen(osb->osb_cluster_stack) != OCFS2_STACK_LABEL_LEN) { | ||
1482 | mlog(ML_ERROR, | ||
1483 | "couldn't mount because of an invalid " | ||
1484 | "cluster stack label (%s) \n", | ||
1485 | osb->osb_cluster_stack); | ||
1486 | status = -EINVAL; | ||
1487 | goto bail; | ||
1488 | } | ||
1489 | } else { | ||
1490 | /* The empty string is identical with classic tools that | ||
1491 | * don't know about s_cluster_info. */ | ||
1492 | osb->osb_cluster_stack[0] = '\0'; | ||
1493 | } | ||
1494 | |||
1458 | get_random_bytes(&osb->s_next_generation, sizeof(u32)); | 1495 | get_random_bytes(&osb->s_next_generation, sizeof(u32)); |
1459 | 1496 | ||
1460 | /* FIXME | 1497 | /* FIXME |
@@ -1724,8 +1761,7 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb) | |||
1724 | 1761 | ||
1725 | /* This function assumes that the caller has the main osb resource */ | 1762 | /* This function assumes that the caller has the main osb resource */ |
1726 | 1763 | ||
1727 | if (osb->slot_info) | 1764 | ocfs2_free_slot_info(osb); |
1728 | ocfs2_free_slot_info(osb->slot_info); | ||
1729 | 1765 | ||
1730 | kfree(osb->osb_orphan_wipes); | 1766 | kfree(osb->osb_orphan_wipes); |
1731 | /* FIXME | 1767 | /* FIXME |
@@ -244,21 +244,21 @@ static long do_sys_truncate(const char __user * path, loff_t length) | |||
244 | if (!S_ISREG(inode->i_mode)) | 244 | if (!S_ISREG(inode->i_mode)) |
245 | goto dput_and_out; | 245 | goto dput_and_out; |
246 | 246 | ||
247 | error = vfs_permission(&nd, MAY_WRITE); | 247 | error = mnt_want_write(nd.path.mnt); |
248 | if (error) | 248 | if (error) |
249 | goto dput_and_out; | 249 | goto dput_and_out; |
250 | 250 | ||
251 | error = -EROFS; | 251 | error = vfs_permission(&nd, MAY_WRITE); |
252 | if (IS_RDONLY(inode)) | 252 | if (error) |
253 | goto dput_and_out; | 253 | goto mnt_drop_write_and_out; |
254 | 254 | ||
255 | error = -EPERM; | 255 | error = -EPERM; |
256 | if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) | 256 | if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) |
257 | goto dput_and_out; | 257 | goto mnt_drop_write_and_out; |
258 | 258 | ||
259 | error = get_write_access(inode); | 259 | error = get_write_access(inode); |
260 | if (error) | 260 | if (error) |
261 | goto dput_and_out; | 261 | goto mnt_drop_write_and_out; |
262 | 262 | ||
263 | /* | 263 | /* |
264 | * Make sure that there are no leases. get_write_access() protects | 264 | * Make sure that there are no leases. get_write_access() protects |
@@ -276,6 +276,8 @@ static long do_sys_truncate(const char __user * path, loff_t length) | |||
276 | 276 | ||
277 | put_write_and_out: | 277 | put_write_and_out: |
278 | put_write_access(inode); | 278 | put_write_access(inode); |
279 | mnt_drop_write_and_out: | ||
280 | mnt_drop_write(nd.path.mnt); | ||
279 | dput_and_out: | 281 | dput_and_out: |
280 | path_put(&nd.path); | 282 | path_put(&nd.path); |
281 | out: | 283 | out: |
@@ -457,8 +459,17 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) | |||
457 | if(res || !(mode & S_IWOTH) || | 459 | if(res || !(mode & S_IWOTH) || |
458 | special_file(nd.path.dentry->d_inode->i_mode)) | 460 | special_file(nd.path.dentry->d_inode->i_mode)) |
459 | goto out_path_release; | 461 | goto out_path_release; |
460 | 462 | /* | |
461 | if(IS_RDONLY(nd.path.dentry->d_inode)) | 463 | * This is a rare case where using __mnt_is_readonly() |
464 | * is OK without a mnt_want/drop_write() pair. Since | ||
465 | * no actual write to the fs is performed here, we do | ||
466 | * not need to telegraph to that to anyone. | ||
467 | * | ||
468 | * By doing this, we accept that this access is | ||
469 | * inherently racy and know that the fs may change | ||
470 | * state before we even see this result. | ||
471 | */ | ||
472 | if (__mnt_is_readonly(nd.path.mnt)) | ||
462 | res = -EROFS; | 473 | res = -EROFS; |
463 | 474 | ||
464 | out_path_release: | 475 | out_path_release: |
@@ -567,12 +578,12 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) | |||
567 | 578 | ||
568 | audit_inode(NULL, dentry); | 579 | audit_inode(NULL, dentry); |
569 | 580 | ||
570 | err = -EROFS; | 581 | err = mnt_want_write(file->f_path.mnt); |
571 | if (IS_RDONLY(inode)) | 582 | if (err) |
572 | goto out_putf; | 583 | goto out_putf; |
573 | err = -EPERM; | 584 | err = -EPERM; |
574 | if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) | 585 | if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) |
575 | goto out_putf; | 586 | goto out_drop_write; |
576 | mutex_lock(&inode->i_mutex); | 587 | mutex_lock(&inode->i_mutex); |
577 | if (mode == (mode_t) -1) | 588 | if (mode == (mode_t) -1) |
578 | mode = inode->i_mode; | 589 | mode = inode->i_mode; |
@@ -581,6 +592,8 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) | |||
581 | err = notify_change(dentry, &newattrs); | 592 | err = notify_change(dentry, &newattrs); |
582 | mutex_unlock(&inode->i_mutex); | 593 | mutex_unlock(&inode->i_mutex); |
583 | 594 | ||
595 | out_drop_write: | ||
596 | mnt_drop_write(file->f_path.mnt); | ||
584 | out_putf: | 597 | out_putf: |
585 | fput(file); | 598 | fput(file); |
586 | out: | 599 | out: |
@@ -600,13 +613,13 @@ asmlinkage long sys_fchmodat(int dfd, const char __user *filename, | |||
600 | goto out; | 613 | goto out; |
601 | inode = nd.path.dentry->d_inode; | 614 | inode = nd.path.dentry->d_inode; |
602 | 615 | ||
603 | error = -EROFS; | 616 | error = mnt_want_write(nd.path.mnt); |
604 | if (IS_RDONLY(inode)) | 617 | if (error) |
605 | goto dput_and_out; | 618 | goto dput_and_out; |
606 | 619 | ||
607 | error = -EPERM; | 620 | error = -EPERM; |
608 | if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) | 621 | if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) |
609 | goto dput_and_out; | 622 | goto out_drop_write; |
610 | 623 | ||
611 | mutex_lock(&inode->i_mutex); | 624 | mutex_lock(&inode->i_mutex); |
612 | if (mode == (mode_t) -1) | 625 | if (mode == (mode_t) -1) |
@@ -616,6 +629,8 @@ asmlinkage long sys_fchmodat(int dfd, const char __user *filename, | |||
616 | error = notify_change(nd.path.dentry, &newattrs); | 629 | error = notify_change(nd.path.dentry, &newattrs); |
617 | mutex_unlock(&inode->i_mutex); | 630 | mutex_unlock(&inode->i_mutex); |
618 | 631 | ||
632 | out_drop_write: | ||
633 | mnt_drop_write(nd.path.mnt); | ||
619 | dput_and_out: | 634 | dput_and_out: |
620 | path_put(&nd.path); | 635 | path_put(&nd.path); |
621 | out: | 636 | out: |
@@ -638,9 +653,6 @@ static int chown_common(struct dentry * dentry, uid_t user, gid_t group) | |||
638 | printk(KERN_ERR "chown_common: NULL inode\n"); | 653 | printk(KERN_ERR "chown_common: NULL inode\n"); |
639 | goto out; | 654 | goto out; |
640 | } | 655 | } |
641 | error = -EROFS; | ||
642 | if (IS_RDONLY(inode)) | ||
643 | goto out; | ||
644 | error = -EPERM; | 656 | error = -EPERM; |
645 | if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) | 657 | if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) |
646 | goto out; | 658 | goto out; |
@@ -671,7 +683,12 @@ asmlinkage long sys_chown(const char __user * filename, uid_t user, gid_t group) | |||
671 | error = user_path_walk(filename, &nd); | 683 | error = user_path_walk(filename, &nd); |
672 | if (error) | 684 | if (error) |
673 | goto out; | 685 | goto out; |
686 | error = mnt_want_write(nd.path.mnt); | ||
687 | if (error) | ||
688 | goto out_release; | ||
674 | error = chown_common(nd.path.dentry, user, group); | 689 | error = chown_common(nd.path.dentry, user, group); |
690 | mnt_drop_write(nd.path.mnt); | ||
691 | out_release: | ||
675 | path_put(&nd.path); | 692 | path_put(&nd.path); |
676 | out: | 693 | out: |
677 | return error; | 694 | return error; |
@@ -691,7 +708,12 @@ asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user, | |||
691 | error = __user_walk_fd(dfd, filename, follow, &nd); | 708 | error = __user_walk_fd(dfd, filename, follow, &nd); |
692 | if (error) | 709 | if (error) |
693 | goto out; | 710 | goto out; |
711 | error = mnt_want_write(nd.path.mnt); | ||
712 | if (error) | ||
713 | goto out_release; | ||
694 | error = chown_common(nd.path.dentry, user, group); | 714 | error = chown_common(nd.path.dentry, user, group); |
715 | mnt_drop_write(nd.path.mnt); | ||
716 | out_release: | ||
695 | path_put(&nd.path); | 717 | path_put(&nd.path); |
696 | out: | 718 | out: |
697 | return error; | 719 | return error; |
@@ -705,7 +727,12 @@ asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group | |||
705 | error = user_path_walk_link(filename, &nd); | 727 | error = user_path_walk_link(filename, &nd); |
706 | if (error) | 728 | if (error) |
707 | goto out; | 729 | goto out; |
730 | error = mnt_want_write(nd.path.mnt); | ||
731 | if (error) | ||
732 | goto out_release; | ||
708 | error = chown_common(nd.path.dentry, user, group); | 733 | error = chown_common(nd.path.dentry, user, group); |
734 | mnt_drop_write(nd.path.mnt); | ||
735 | out_release: | ||
709 | path_put(&nd.path); | 736 | path_put(&nd.path); |
710 | out: | 737 | out: |
711 | return error; | 738 | return error; |
@@ -722,14 +749,48 @@ asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group) | |||
722 | if (!file) | 749 | if (!file) |
723 | goto out; | 750 | goto out; |
724 | 751 | ||
752 | error = mnt_want_write(file->f_path.mnt); | ||
753 | if (error) | ||
754 | goto out_fput; | ||
725 | dentry = file->f_path.dentry; | 755 | dentry = file->f_path.dentry; |
726 | audit_inode(NULL, dentry); | 756 | audit_inode(NULL, dentry); |
727 | error = chown_common(dentry, user, group); | 757 | error = chown_common(dentry, user, group); |
758 | mnt_drop_write(file->f_path.mnt); | ||
759 | out_fput: | ||
728 | fput(file); | 760 | fput(file); |
729 | out: | 761 | out: |
730 | return error; | 762 | return error; |
731 | } | 763 | } |
732 | 764 | ||
765 | /* | ||
766 | * You have to be very careful that these write | ||
767 | * counts get cleaned up in error cases and | ||
768 | * upon __fput(). This should probably never | ||
769 | * be called outside of __dentry_open(). | ||
770 | */ | ||
771 | static inline int __get_file_write_access(struct inode *inode, | ||
772 | struct vfsmount *mnt) | ||
773 | { | ||
774 | int error; | ||
775 | error = get_write_access(inode); | ||
776 | if (error) | ||
777 | return error; | ||
778 | /* | ||
779 | * Do not take mount writer counts on | ||
780 | * special files since no writes to | ||
781 | * the mount itself will occur. | ||
782 | */ | ||
783 | if (!special_file(inode->i_mode)) { | ||
784 | /* | ||
785 | * Balanced in __fput() | ||
786 | */ | ||
787 | error = mnt_want_write(mnt); | ||
788 | if (error) | ||
789 | put_write_access(inode); | ||
790 | } | ||
791 | return error; | ||
792 | } | ||
793 | |||
733 | static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, | 794 | static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, |
734 | int flags, struct file *f, | 795 | int flags, struct file *f, |
735 | int (*open)(struct inode *, struct file *)) | 796 | int (*open)(struct inode *, struct file *)) |
@@ -742,9 +803,11 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, | |||
742 | FMODE_PREAD | FMODE_PWRITE; | 803 | FMODE_PREAD | FMODE_PWRITE; |
743 | inode = dentry->d_inode; | 804 | inode = dentry->d_inode; |
744 | if (f->f_mode & FMODE_WRITE) { | 805 | if (f->f_mode & FMODE_WRITE) { |
745 | error = get_write_access(inode); | 806 | error = __get_file_write_access(inode, mnt); |
746 | if (error) | 807 | if (error) |
747 | goto cleanup_file; | 808 | goto cleanup_file; |
809 | if (!special_file(inode->i_mode)) | ||
810 | file_take_write(f); | ||
748 | } | 811 | } |
749 | 812 | ||
750 | f->f_mapping = inode->i_mapping; | 813 | f->f_mapping = inode->i_mapping; |
@@ -784,8 +847,19 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, | |||
784 | 847 | ||
785 | cleanup_all: | 848 | cleanup_all: |
786 | fops_put(f->f_op); | 849 | fops_put(f->f_op); |
787 | if (f->f_mode & FMODE_WRITE) | 850 | if (f->f_mode & FMODE_WRITE) { |
788 | put_write_access(inode); | 851 | put_write_access(inode); |
852 | if (!special_file(inode->i_mode)) { | ||
853 | /* | ||
854 | * We don't consider this a real | ||
855 | * mnt_want/drop_write() pair | ||
856 | * because it all happenend right | ||
857 | * here, so just reset the state. | ||
858 | */ | ||
859 | file_reset_write(f); | ||
860 | mnt_drop_write(mnt); | ||
861 | } | ||
862 | } | ||
789 | file_kill(f); | 863 | file_kill(f); |
790 | f->f_path.dentry = NULL; | 864 | f->f_path.dentry = NULL; |
791 | f->f_path.mnt = NULL; | 865 | f->f_path.mnt = NULL; |
@@ -796,43 +870,6 @@ cleanup_file: | |||
796 | return ERR_PTR(error); | 870 | return ERR_PTR(error); |
797 | } | 871 | } |
798 | 872 | ||
799 | /* | ||
800 | * Note that while the flag value (low two bits) for sys_open means: | ||
801 | * 00 - read-only | ||
802 | * 01 - write-only | ||
803 | * 10 - read-write | ||
804 | * 11 - special | ||
805 | * it is changed into | ||
806 | * 00 - no permissions needed | ||
807 | * 01 - read-permission | ||
808 | * 10 - write-permission | ||
809 | * 11 - read-write | ||
810 | * for the internal routines (ie open_namei()/follow_link() etc). 00 is | ||
811 | * used by symlinks. | ||
812 | */ | ||
813 | static struct file *do_filp_open(int dfd, const char *filename, int flags, | ||
814 | int mode) | ||
815 | { | ||
816 | int namei_flags, error; | ||
817 | struct nameidata nd; | ||
818 | |||
819 | namei_flags = flags; | ||
820 | if ((namei_flags+1) & O_ACCMODE) | ||
821 | namei_flags++; | ||
822 | |||
823 | error = open_namei(dfd, filename, namei_flags, mode, &nd); | ||
824 | if (!error) | ||
825 | return nameidata_to_filp(&nd, flags); | ||
826 | |||
827 | return ERR_PTR(error); | ||
828 | } | ||
829 | |||
830 | struct file *filp_open(const char *filename, int flags, int mode) | ||
831 | { | ||
832 | return do_filp_open(AT_FDCWD, filename, flags, mode); | ||
833 | } | ||
834 | EXPORT_SYMBOL(filp_open); | ||
835 | |||
836 | /** | 873 | /** |
837 | * lookup_instantiate_filp - instantiates the open intent filp | 874 | * lookup_instantiate_filp - instantiates the open intent filp |
838 | * @nd: pointer to nameidata | 875 | * @nd: pointer to nameidata |
diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 03f808c5b79d..6149e4b58c88 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c | |||
@@ -473,6 +473,10 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) | |||
473 | return 0; | 473 | return 0; |
474 | if (IS_ERR(state)) /* I/O error reading the partition table */ | 474 | if (IS_ERR(state)) /* I/O error reading the partition table */ |
475 | return -EIO; | 475 | return -EIO; |
476 | |||
477 | /* tell userspace that the media / partition table may have changed */ | ||
478 | kobject_uevent(&disk->dev.kobj, KOBJ_CHANGE); | ||
479 | |||
476 | for (p = 1; p < state->limit; p++) { | 480 | for (p = 1; p < state->limit; p++) { |
477 | sector_t size = state->parts[p].size; | 481 | sector_t size = state->parts[p].size; |
478 | sector_t from = state->parts[p].from; | 482 | sector_t from = state->parts[p].from; |
@@ -988,7 +988,10 @@ struct file *create_write_pipe(void) | |||
988 | return f; | 988 | return f; |
989 | 989 | ||
990 | err_dentry: | 990 | err_dentry: |
991 | free_pipe_info(inode); | ||
991 | dput(dentry); | 992 | dput(dentry); |
993 | return ERR_PTR(err); | ||
994 | |||
992 | err_inode: | 995 | err_inode: |
993 | free_pipe_info(inode); | 996 | free_pipe_info(inode); |
994 | iput(inode); | 997 | iput(inode); |
diff --git a/fs/pnode.c b/fs/pnode.c index 1d8f5447f3f7..8d5f392ec3d3 100644 --- a/fs/pnode.c +++ b/fs/pnode.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/mnt_namespace.h> | 9 | #include <linux/mnt_namespace.h> |
10 | #include <linux/mount.h> | 10 | #include <linux/mount.h> |
11 | #include <linux/fs.h> | 11 | #include <linux/fs.h> |
12 | #include "internal.h" | ||
12 | #include "pnode.h" | 13 | #include "pnode.h" |
13 | 14 | ||
14 | /* return the next shared peer mount of @p */ | 15 | /* return the next shared peer mount of @p */ |
@@ -27,6 +28,57 @@ static inline struct vfsmount *next_slave(struct vfsmount *p) | |||
27 | return list_entry(p->mnt_slave.next, struct vfsmount, mnt_slave); | 28 | return list_entry(p->mnt_slave.next, struct vfsmount, mnt_slave); |
28 | } | 29 | } |
29 | 30 | ||
31 | /* | ||
32 | * Return true if path is reachable from root | ||
33 | * | ||
34 | * namespace_sem is held, and mnt is attached | ||
35 | */ | ||
36 | static bool is_path_reachable(struct vfsmount *mnt, struct dentry *dentry, | ||
37 | const struct path *root) | ||
38 | { | ||
39 | while (mnt != root->mnt && mnt->mnt_parent != mnt) { | ||
40 | dentry = mnt->mnt_mountpoint; | ||
41 | mnt = mnt->mnt_parent; | ||
42 | } | ||
43 | return mnt == root->mnt && is_subdir(dentry, root->dentry); | ||
44 | } | ||
45 | |||
46 | static struct vfsmount *get_peer_under_root(struct vfsmount *mnt, | ||
47 | struct mnt_namespace *ns, | ||
48 | const struct path *root) | ||
49 | { | ||
50 | struct vfsmount *m = mnt; | ||
51 | |||
52 | do { | ||
53 | /* Check the namespace first for optimization */ | ||
54 | if (m->mnt_ns == ns && is_path_reachable(m, m->mnt_root, root)) | ||
55 | return m; | ||
56 | |||
57 | m = next_peer(m); | ||
58 | } while (m != mnt); | ||
59 | |||
60 | return NULL; | ||
61 | } | ||
62 | |||
63 | /* | ||
64 | * Get ID of closest dominating peer group having a representative | ||
65 | * under the given root. | ||
66 | * | ||
67 | * Caller must hold namespace_sem | ||
68 | */ | ||
69 | int get_dominating_id(struct vfsmount *mnt, const struct path *root) | ||
70 | { | ||
71 | struct vfsmount *m; | ||
72 | |||
73 | for (m = mnt->mnt_master; m != NULL; m = m->mnt_master) { | ||
74 | struct vfsmount *d = get_peer_under_root(m, mnt->mnt_ns, root); | ||
75 | if (d) | ||
76 | return d->mnt_group_id; | ||
77 | } | ||
78 | |||
79 | return 0; | ||
80 | } | ||
81 | |||
30 | static int do_make_slave(struct vfsmount *mnt) | 82 | static int do_make_slave(struct vfsmount *mnt) |
31 | { | 83 | { |
32 | struct vfsmount *peer_mnt = mnt, *master = mnt->mnt_master; | 84 | struct vfsmount *peer_mnt = mnt, *master = mnt->mnt_master; |
@@ -45,7 +97,11 @@ static int do_make_slave(struct vfsmount *mnt) | |||
45 | if (peer_mnt == mnt) | 97 | if (peer_mnt == mnt) |
46 | peer_mnt = NULL; | 98 | peer_mnt = NULL; |
47 | } | 99 | } |
100 | if (IS_MNT_SHARED(mnt) && list_empty(&mnt->mnt_share)) | ||
101 | mnt_release_group_id(mnt); | ||
102 | |||
48 | list_del_init(&mnt->mnt_share); | 103 | list_del_init(&mnt->mnt_share); |
104 | mnt->mnt_group_id = 0; | ||
49 | 105 | ||
50 | if (peer_mnt) | 106 | if (peer_mnt) |
51 | master = peer_mnt; | 107 | master = peer_mnt; |
@@ -67,7 +123,6 @@ static int do_make_slave(struct vfsmount *mnt) | |||
67 | } | 123 | } |
68 | mnt->mnt_master = master; | 124 | mnt->mnt_master = master; |
69 | CLEAR_MNT_SHARED(mnt); | 125 | CLEAR_MNT_SHARED(mnt); |
70 | INIT_LIST_HEAD(&mnt->mnt_slave_list); | ||
71 | return 0; | 126 | return 0; |
72 | } | 127 | } |
73 | 128 | ||
@@ -211,8 +266,7 @@ int propagate_mnt(struct vfsmount *dest_mnt, struct dentry *dest_dentry, | |||
211 | out: | 266 | out: |
212 | spin_lock(&vfsmount_lock); | 267 | spin_lock(&vfsmount_lock); |
213 | while (!list_empty(&tmp_list)) { | 268 | while (!list_empty(&tmp_list)) { |
214 | child = list_entry(tmp_list.next, struct vfsmount, mnt_hash); | 269 | child = list_first_entry(&tmp_list, struct vfsmount, mnt_hash); |
215 | list_del_init(&child->mnt_hash); | ||
216 | umount_tree(child, 0, &umount_list); | 270 | umount_tree(child, 0, &umount_list); |
217 | } | 271 | } |
218 | spin_unlock(&vfsmount_lock); | 272 | spin_unlock(&vfsmount_lock); |
diff --git a/fs/pnode.h b/fs/pnode.h index f249be2fee7a..958665d662af 100644 --- a/fs/pnode.h +++ b/fs/pnode.h | |||
@@ -35,4 +35,6 @@ int propagate_mnt(struct vfsmount *, struct dentry *, struct vfsmount *, | |||
35 | struct list_head *); | 35 | struct list_head *); |
36 | int propagate_umount(struct list_head *); | 36 | int propagate_umount(struct list_head *); |
37 | int propagate_mount_busy(struct vfsmount *, int); | 37 | int propagate_mount_busy(struct vfsmount *, int); |
38 | void mnt_release_group_id(struct vfsmount *); | ||
39 | int get_dominating_id(struct vfsmount *mnt, const struct path *root); | ||
38 | #endif /* _LINUX_PNODE_H */ | 40 | #endif /* _LINUX_PNODE_H */ |
diff --git a/fs/proc/base.c b/fs/proc/base.c index 81d7d145292a..c5e412a00b17 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -502,17 +502,14 @@ static const struct inode_operations proc_def_inode_operations = { | |||
502 | .setattr = proc_setattr, | 502 | .setattr = proc_setattr, |
503 | }; | 503 | }; |
504 | 504 | ||
505 | extern const struct seq_operations mounts_op; | 505 | static int mounts_open_common(struct inode *inode, struct file *file, |
506 | struct proc_mounts { | 506 | const struct seq_operations *op) |
507 | struct seq_file m; | ||
508 | int event; | ||
509 | }; | ||
510 | |||
511 | static int mounts_open(struct inode *inode, struct file *file) | ||
512 | { | 507 | { |
513 | struct task_struct *task = get_proc_task(inode); | 508 | struct task_struct *task = get_proc_task(inode); |
514 | struct nsproxy *nsp; | 509 | struct nsproxy *nsp; |
515 | struct mnt_namespace *ns = NULL; | 510 | struct mnt_namespace *ns = NULL; |
511 | struct fs_struct *fs = NULL; | ||
512 | struct path root; | ||
516 | struct proc_mounts *p; | 513 | struct proc_mounts *p; |
517 | int ret = -EINVAL; | 514 | int ret = -EINVAL; |
518 | 515 | ||
@@ -525,40 +522,61 @@ static int mounts_open(struct inode *inode, struct file *file) | |||
525 | get_mnt_ns(ns); | 522 | get_mnt_ns(ns); |
526 | } | 523 | } |
527 | rcu_read_unlock(); | 524 | rcu_read_unlock(); |
528 | 525 | if (ns) | |
526 | fs = get_fs_struct(task); | ||
529 | put_task_struct(task); | 527 | put_task_struct(task); |
530 | } | 528 | } |
531 | 529 | ||
532 | if (ns) { | 530 | if (!ns) |
533 | ret = -ENOMEM; | 531 | goto err; |
534 | p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL); | 532 | if (!fs) |
535 | if (p) { | 533 | goto err_put_ns; |
536 | file->private_data = &p->m; | 534 | |
537 | ret = seq_open(file, &mounts_op); | 535 | read_lock(&fs->lock); |
538 | if (!ret) { | 536 | root = fs->root; |
539 | p->m.private = ns; | 537 | path_get(&root); |
540 | p->event = ns->event; | 538 | read_unlock(&fs->lock); |
541 | return 0; | 539 | put_fs_struct(fs); |
542 | } | 540 | |
543 | kfree(p); | 541 | ret = -ENOMEM; |
544 | } | 542 | p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL); |
545 | put_mnt_ns(ns); | 543 | if (!p) |
546 | } | 544 | goto err_put_path; |
545 | |||
546 | file->private_data = &p->m; | ||
547 | ret = seq_open(file, op); | ||
548 | if (ret) | ||
549 | goto err_free; | ||
550 | |||
551 | p->m.private = p; | ||
552 | p->ns = ns; | ||
553 | p->root = root; | ||
554 | p->event = ns->event; | ||
555 | |||
556 | return 0; | ||
557 | |||
558 | err_free: | ||
559 | kfree(p); | ||
560 | err_put_path: | ||
561 | path_put(&root); | ||
562 | err_put_ns: | ||
563 | put_mnt_ns(ns); | ||
564 | err: | ||
547 | return ret; | 565 | return ret; |
548 | } | 566 | } |
549 | 567 | ||
550 | static int mounts_release(struct inode *inode, struct file *file) | 568 | static int mounts_release(struct inode *inode, struct file *file) |
551 | { | 569 | { |
552 | struct seq_file *m = file->private_data; | 570 | struct proc_mounts *p = file->private_data; |
553 | struct mnt_namespace *ns = m->private; | 571 | path_put(&p->root); |
554 | put_mnt_ns(ns); | 572 | put_mnt_ns(p->ns); |
555 | return seq_release(inode, file); | 573 | return seq_release(inode, file); |
556 | } | 574 | } |
557 | 575 | ||
558 | static unsigned mounts_poll(struct file *file, poll_table *wait) | 576 | static unsigned mounts_poll(struct file *file, poll_table *wait) |
559 | { | 577 | { |
560 | struct proc_mounts *p = file->private_data; | 578 | struct proc_mounts *p = file->private_data; |
561 | struct mnt_namespace *ns = p->m.private; | 579 | struct mnt_namespace *ns = p->ns; |
562 | unsigned res = 0; | 580 | unsigned res = 0; |
563 | 581 | ||
564 | poll_wait(file, &ns->poll, wait); | 582 | poll_wait(file, &ns->poll, wait); |
@@ -573,6 +591,11 @@ static unsigned mounts_poll(struct file *file, poll_table *wait) | |||
573 | return res; | 591 | return res; |
574 | } | 592 | } |
575 | 593 | ||
594 | static int mounts_open(struct inode *inode, struct file *file) | ||
595 | { | ||
596 | return mounts_open_common(inode, file, &mounts_op); | ||
597 | } | ||
598 | |||
576 | static const struct file_operations proc_mounts_operations = { | 599 | static const struct file_operations proc_mounts_operations = { |
577 | .open = mounts_open, | 600 | .open = mounts_open, |
578 | .read = seq_read, | 601 | .read = seq_read, |
@@ -581,38 +604,22 @@ static const struct file_operations proc_mounts_operations = { | |||
581 | .poll = mounts_poll, | 604 | .poll = mounts_poll, |
582 | }; | 605 | }; |
583 | 606 | ||
584 | extern const struct seq_operations mountstats_op; | 607 | static int mountinfo_open(struct inode *inode, struct file *file) |
585 | static int mountstats_open(struct inode *inode, struct file *file) | ||
586 | { | 608 | { |
587 | int ret = seq_open(file, &mountstats_op); | 609 | return mounts_open_common(inode, file, &mountinfo_op); |
588 | 610 | } | |
589 | if (!ret) { | ||
590 | struct seq_file *m = file->private_data; | ||
591 | struct nsproxy *nsp; | ||
592 | struct mnt_namespace *mnt_ns = NULL; | ||
593 | struct task_struct *task = get_proc_task(inode); | ||
594 | |||
595 | if (task) { | ||
596 | rcu_read_lock(); | ||
597 | nsp = task_nsproxy(task); | ||
598 | if (nsp) { | ||
599 | mnt_ns = nsp->mnt_ns; | ||
600 | if (mnt_ns) | ||
601 | get_mnt_ns(mnt_ns); | ||
602 | } | ||
603 | rcu_read_unlock(); | ||
604 | 611 | ||
605 | put_task_struct(task); | 612 | static const struct file_operations proc_mountinfo_operations = { |
606 | } | 613 | .open = mountinfo_open, |
614 | .read = seq_read, | ||
615 | .llseek = seq_lseek, | ||
616 | .release = mounts_release, | ||
617 | .poll = mounts_poll, | ||
618 | }; | ||
607 | 619 | ||
608 | if (mnt_ns) | 620 | static int mountstats_open(struct inode *inode, struct file *file) |
609 | m->private = mnt_ns; | 621 | { |
610 | else { | 622 | return mounts_open_common(inode, file, &mountstats_op); |
611 | seq_release(inode, file); | ||
612 | ret = -EINVAL; | ||
613 | } | ||
614 | } | ||
615 | return ret; | ||
616 | } | 623 | } |
617 | 624 | ||
618 | static const struct file_operations proc_mountstats_operations = { | 625 | static const struct file_operations proc_mountstats_operations = { |
@@ -1626,7 +1633,6 @@ static int proc_readfd_common(struct file * filp, void * dirent, | |||
1626 | unsigned int fd, ino; | 1633 | unsigned int fd, ino; |
1627 | int retval; | 1634 | int retval; |
1628 | struct files_struct * files; | 1635 | struct files_struct * files; |
1629 | struct fdtable *fdt; | ||
1630 | 1636 | ||
1631 | retval = -ENOENT; | 1637 | retval = -ENOENT; |
1632 | if (!p) | 1638 | if (!p) |
@@ -1649,9 +1655,8 @@ static int proc_readfd_common(struct file * filp, void * dirent, | |||
1649 | if (!files) | 1655 | if (!files) |
1650 | goto out; | 1656 | goto out; |
1651 | rcu_read_lock(); | 1657 | rcu_read_lock(); |
1652 | fdt = files_fdtable(files); | ||
1653 | for (fd = filp->f_pos-2; | 1658 | for (fd = filp->f_pos-2; |
1654 | fd < fdt->max_fds; | 1659 | fd < files_fdtable(files)->max_fds; |
1655 | fd++, filp->f_pos++) { | 1660 | fd++, filp->f_pos++) { |
1656 | char name[PROC_NUMBUF]; | 1661 | char name[PROC_NUMBUF]; |
1657 | int len; | 1662 | int len; |
@@ -2311,6 +2316,7 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
2311 | LNK("root", root), | 2316 | LNK("root", root), |
2312 | LNK("exe", exe), | 2317 | LNK("exe", exe), |
2313 | REG("mounts", S_IRUGO, mounts), | 2318 | REG("mounts", S_IRUGO, mounts), |
2319 | REG("mountinfo", S_IRUGO, mountinfo), | ||
2314 | REG("mountstats", S_IRUSR, mountstats), | 2320 | REG("mountstats", S_IRUSR, mountstats), |
2315 | #ifdef CONFIG_PROC_PAGE_MONITOR | 2321 | #ifdef CONFIG_PROC_PAGE_MONITOR |
2316 | REG("clear_refs", S_IWUSR, clear_refs), | 2322 | REG("clear_refs", S_IWUSR, clear_refs), |
@@ -2643,6 +2649,7 @@ static const struct pid_entry tid_base_stuff[] = { | |||
2643 | LNK("root", root), | 2649 | LNK("root", root), |
2644 | LNK("exe", exe), | 2650 | LNK("exe", exe), |
2645 | REG("mounts", S_IRUGO, mounts), | 2651 | REG("mounts", S_IRUGO, mounts), |
2652 | REG("mountinfo", S_IRUGO, mountinfo), | ||
2646 | #ifdef CONFIG_PROC_PAGE_MONITOR | 2653 | #ifdef CONFIG_PROC_PAGE_MONITOR |
2647 | REG("clear_refs", S_IWUSR, clear_refs), | 2654 | REG("clear_refs", S_IWUSR, clear_refs), |
2648 | REG("smaps", S_IRUGO, smaps), | 2655 | REG("smaps", S_IRUGO, smaps), |
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 4caa5f774fb7..13cd7835d0df 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c | |||
@@ -44,7 +44,9 @@ int seq_open_net(struct inode *ino, struct file *f, | |||
44 | put_net(net); | 44 | put_net(net); |
45 | return -ENOMEM; | 45 | return -ENOMEM; |
46 | } | 46 | } |
47 | #ifdef CONFIG_NET_NS | ||
47 | p->net = net; | 48 | p->net = net; |
49 | #endif | ||
48 | return 0; | 50 | return 0; |
49 | } | 51 | } |
50 | EXPORT_SYMBOL_GPL(seq_open_net); | 52 | EXPORT_SYMBOL_GPL(seq_open_net); |
@@ -52,12 +54,10 @@ EXPORT_SYMBOL_GPL(seq_open_net); | |||
52 | int seq_release_net(struct inode *ino, struct file *f) | 54 | int seq_release_net(struct inode *ino, struct file *f) |
53 | { | 55 | { |
54 | struct seq_file *seq; | 56 | struct seq_file *seq; |
55 | struct seq_net_private *p; | ||
56 | 57 | ||
57 | seq = f->private_data; | 58 | seq = f->private_data; |
58 | p = seq->private; | ||
59 | 59 | ||
60 | put_net(p->net); | 60 | put_net(seq_file_net(seq)); |
61 | seq_release_private(ino, f); | 61 | seq_release_private(ino, f); |
62 | return 0; | 62 | return 0; |
63 | } | 63 | } |
diff --git a/fs/read_write.c b/fs/read_write.c index 49a98718ecdf..f0d1240a5c69 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
@@ -33,7 +33,7 @@ EXPORT_SYMBOL(generic_ro_fops); | |||
33 | 33 | ||
34 | loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) | 34 | loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) |
35 | { | 35 | { |
36 | long long retval; | 36 | loff_t retval; |
37 | struct inode *inode = file->f_mapping->host; | 37 | struct inode *inode = file->f_mapping->host; |
38 | 38 | ||
39 | mutex_lock(&inode->i_mutex); | 39 | mutex_lock(&inode->i_mutex); |
@@ -60,7 +60,7 @@ EXPORT_SYMBOL(generic_file_llseek); | |||
60 | 60 | ||
61 | loff_t remote_llseek(struct file *file, loff_t offset, int origin) | 61 | loff_t remote_llseek(struct file *file, loff_t offset, int origin) |
62 | { | 62 | { |
63 | long long retval; | 63 | loff_t retval; |
64 | 64 | ||
65 | lock_kernel(); | 65 | lock_kernel(); |
66 | switch (origin) { | 66 | switch (origin) { |
@@ -91,7 +91,7 @@ EXPORT_SYMBOL(no_llseek); | |||
91 | 91 | ||
92 | loff_t default_llseek(struct file *file, loff_t offset, int origin) | 92 | loff_t default_llseek(struct file *file, loff_t offset, int origin) |
93 | { | 93 | { |
94 | long long retval; | 94 | loff_t retval; |
95 | 95 | ||
96 | lock_kernel(); | 96 | lock_kernel(); |
97 | switch (origin) { | 97 | switch (origin) { |
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index e0f0f098a523..74363a7aacbc 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c | |||
@@ -4,6 +4,7 @@ | |||
4 | 4 | ||
5 | #include <linux/capability.h> | 5 | #include <linux/capability.h> |
6 | #include <linux/fs.h> | 6 | #include <linux/fs.h> |
7 | #include <linux/mount.h> | ||
7 | #include <linux/reiserfs_fs.h> | 8 | #include <linux/reiserfs_fs.h> |
8 | #include <linux/time.h> | 9 | #include <linux/time.h> |
9 | #include <asm/uaccess.h> | 10 | #include <asm/uaccess.h> |
@@ -25,6 +26,7 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, | |||
25 | unsigned long arg) | 26 | unsigned long arg) |
26 | { | 27 | { |
27 | unsigned int flags; | 28 | unsigned int flags; |
29 | int err = 0; | ||
28 | 30 | ||
29 | switch (cmd) { | 31 | switch (cmd) { |
30 | case REISERFS_IOC_UNPACK: | 32 | case REISERFS_IOC_UNPACK: |
@@ -48,50 +50,67 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, | |||
48 | if (!reiserfs_attrs(inode->i_sb)) | 50 | if (!reiserfs_attrs(inode->i_sb)) |
49 | return -ENOTTY; | 51 | return -ENOTTY; |
50 | 52 | ||
51 | if (IS_RDONLY(inode)) | 53 | err = mnt_want_write(filp->f_path.mnt); |
52 | return -EROFS; | 54 | if (err) |
55 | return err; | ||
53 | 56 | ||
54 | if (!is_owner_or_cap(inode)) | 57 | if (!is_owner_or_cap(inode)) { |
55 | return -EPERM; | 58 | err = -EPERM; |
56 | 59 | goto setflags_out; | |
57 | if (get_user(flags, (int __user *)arg)) | 60 | } |
58 | return -EFAULT; | 61 | if (get_user(flags, (int __user *)arg)) { |
59 | 62 | err = -EFAULT; | |
60 | /* Is it quota file? Do not allow user to mess with it. */ | 63 | goto setflags_out; |
61 | if (IS_NOQUOTA(inode)) | 64 | } |
62 | return -EPERM; | 65 | /* |
66 | * Is it quota file? Do not allow user to mess with it | ||
67 | */ | ||
68 | if (IS_NOQUOTA(inode)) { | ||
69 | err = -EPERM; | ||
70 | goto setflags_out; | ||
71 | } | ||
63 | if (((flags ^ REISERFS_I(inode)-> | 72 | if (((flags ^ REISERFS_I(inode)-> |
64 | i_attrs) & (REISERFS_IMMUTABLE_FL | | 73 | i_attrs) & (REISERFS_IMMUTABLE_FL | |
65 | REISERFS_APPEND_FL)) | 74 | REISERFS_APPEND_FL)) |
66 | && !capable(CAP_LINUX_IMMUTABLE)) | 75 | && !capable(CAP_LINUX_IMMUTABLE)) { |
67 | return -EPERM; | 76 | err = -EPERM; |
68 | 77 | goto setflags_out; | |
78 | } | ||
69 | if ((flags & REISERFS_NOTAIL_FL) && | 79 | if ((flags & REISERFS_NOTAIL_FL) && |
70 | S_ISREG(inode->i_mode)) { | 80 | S_ISREG(inode->i_mode)) { |
71 | int result; | 81 | int result; |
72 | 82 | ||
73 | result = reiserfs_unpack(inode, filp); | 83 | result = reiserfs_unpack(inode, filp); |
74 | if (result) | 84 | if (result) { |
75 | return result; | 85 | err = result; |
86 | goto setflags_out; | ||
87 | } | ||
76 | } | 88 | } |
77 | sd_attrs_to_i_attrs(flags, inode); | 89 | sd_attrs_to_i_attrs(flags, inode); |
78 | REISERFS_I(inode)->i_attrs = flags; | 90 | REISERFS_I(inode)->i_attrs = flags; |
79 | inode->i_ctime = CURRENT_TIME_SEC; | 91 | inode->i_ctime = CURRENT_TIME_SEC; |
80 | mark_inode_dirty(inode); | 92 | mark_inode_dirty(inode); |
81 | return 0; | 93 | setflags_out: |
94 | mnt_drop_write(filp->f_path.mnt); | ||
95 | return err; | ||
82 | } | 96 | } |
83 | case REISERFS_IOC_GETVERSION: | 97 | case REISERFS_IOC_GETVERSION: |
84 | return put_user(inode->i_generation, (int __user *)arg); | 98 | return put_user(inode->i_generation, (int __user *)arg); |
85 | case REISERFS_IOC_SETVERSION: | 99 | case REISERFS_IOC_SETVERSION: |
86 | if (!is_owner_or_cap(inode)) | 100 | if (!is_owner_or_cap(inode)) |
87 | return -EPERM; | 101 | return -EPERM; |
88 | if (IS_RDONLY(inode)) | 102 | err = mnt_want_write(filp->f_path.mnt); |
89 | return -EROFS; | 103 | if (err) |
90 | if (get_user(inode->i_generation, (int __user *)arg)) | 104 | return err; |
91 | return -EFAULT; | 105 | if (get_user(inode->i_generation, (int __user *)arg)) { |
106 | err = -EFAULT; | ||
107 | goto setversion_out; | ||
108 | } | ||
92 | inode->i_ctime = CURRENT_TIME_SEC; | 109 | inode->i_ctime = CURRENT_TIME_SEC; |
93 | mark_inode_dirty(inode); | 110 | mark_inode_dirty(inode); |
94 | return 0; | 111 | setversion_out: |
112 | mnt_drop_write(filp->f_path.mnt); | ||
113 | return err; | ||
95 | default: | 114 | default: |
96 | return -ENOTTY; | 115 | return -ENOTTY; |
97 | } | 116 | } |
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index bb05a3e51b93..060eb3f598e7 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c | |||
@@ -38,7 +38,7 @@ | |||
38 | #include <asm/system.h> | 38 | #include <asm/system.h> |
39 | 39 | ||
40 | #include <linux/time.h> | 40 | #include <linux/time.h> |
41 | #include <asm/semaphore.h> | 41 | #include <linux/semaphore.h> |
42 | 42 | ||
43 | #include <linux/vmalloc.h> | 43 | #include <linux/vmalloc.h> |
44 | #include <linux/reiserfs_fs.h> | 44 | #include <linux/reiserfs_fs.h> |
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 344b9b96cc56..d7c4935c1034 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c | |||
@@ -44,7 +44,6 @@ | |||
44 | #include <net/checksum.h> | 44 | #include <net/checksum.h> |
45 | #include <linux/smp_lock.h> | 45 | #include <linux/smp_lock.h> |
46 | #include <linux/stat.h> | 46 | #include <linux/stat.h> |
47 | #include <asm/semaphore.h> | ||
48 | 47 | ||
49 | #define FL_READONLY 128 | 48 | #define FL_READONLY 128 |
50 | #define FL_DIR_SEM_HELD 256 | 49 | #define FL_DIR_SEM_HELD 256 |
diff --git a/fs/select.c b/fs/select.c index 5633fe980781..00f58c5c7e05 100644 --- a/fs/select.c +++ b/fs/select.c | |||
@@ -260,7 +260,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout) | |||
260 | wait = NULL; | 260 | wait = NULL; |
261 | if (retval || !*timeout || signal_pending(current)) | 261 | if (retval || !*timeout || signal_pending(current)) |
262 | break; | 262 | break; |
263 | if(table.error) { | 263 | if (table.error) { |
264 | retval = table.error; | 264 | retval = table.error; |
265 | break; | 265 | break; |
266 | } | 266 | } |
diff --git a/fs/seq_file.c b/fs/seq_file.c index 853770274f20..3f54dbd6c49b 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c | |||
@@ -25,6 +25,7 @@ | |||
25 | * into the buffer. In case of error ->start() and ->next() return | 25 | * into the buffer. In case of error ->start() and ->next() return |
26 | * ERR_PTR(error). In the end of sequence they return %NULL. ->show() | 26 | * ERR_PTR(error). In the end of sequence they return %NULL. ->show() |
27 | * returns 0 in case of success and negative number in case of error. | 27 | * returns 0 in case of success and negative number in case of error. |
28 | * Returning SEQ_SKIP means "discard this element and move on". | ||
28 | */ | 29 | */ |
29 | int seq_open(struct file *file, const struct seq_operations *op) | 30 | int seq_open(struct file *file, const struct seq_operations *op) |
30 | { | 31 | { |
@@ -114,8 +115,10 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) | |||
114 | if (!p || IS_ERR(p)) | 115 | if (!p || IS_ERR(p)) |
115 | break; | 116 | break; |
116 | err = m->op->show(m, p); | 117 | err = m->op->show(m, p); |
117 | if (err) | 118 | if (err < 0) |
118 | break; | 119 | break; |
120 | if (unlikely(err)) | ||
121 | m->count = 0; | ||
119 | if (m->count < m->size) | 122 | if (m->count < m->size) |
120 | goto Fill; | 123 | goto Fill; |
121 | m->op->stop(m, p); | 124 | m->op->stop(m, p); |
@@ -140,9 +143,10 @@ Fill: | |||
140 | break; | 143 | break; |
141 | } | 144 | } |
142 | err = m->op->show(m, p); | 145 | err = m->op->show(m, p); |
143 | if (err || m->count == m->size) { | 146 | if (m->count == m->size || err) { |
144 | m->count = offs; | 147 | m->count = offs; |
145 | break; | 148 | if (likely(err <= 0)) |
149 | break; | ||
146 | } | 150 | } |
147 | pos = next; | 151 | pos = next; |
148 | } | 152 | } |
@@ -199,8 +203,12 @@ static int traverse(struct seq_file *m, loff_t offset) | |||
199 | if (IS_ERR(p)) | 203 | if (IS_ERR(p)) |
200 | break; | 204 | break; |
201 | error = m->op->show(m, p); | 205 | error = m->op->show(m, p); |
202 | if (error) | 206 | if (error < 0) |
203 | break; | 207 | break; |
208 | if (unlikely(error)) { | ||
209 | error = 0; | ||
210 | m->count = 0; | ||
211 | } | ||
204 | if (m->count == m->size) | 212 | if (m->count == m->size) |
205 | goto Eoverflow; | 213 | goto Eoverflow; |
206 | if (pos + m->count > offset) { | 214 | if (pos + m->count > offset) { |
@@ -239,7 +247,7 @@ Eoverflow: | |||
239 | loff_t seq_lseek(struct file *file, loff_t offset, int origin) | 247 | loff_t seq_lseek(struct file *file, loff_t offset, int origin) |
240 | { | 248 | { |
241 | struct seq_file *m = (struct seq_file *)file->private_data; | 249 | struct seq_file *m = (struct seq_file *)file->private_data; |
242 | long long retval = -EINVAL; | 250 | loff_t retval = -EINVAL; |
243 | 251 | ||
244 | mutex_lock(&m->lock); | 252 | mutex_lock(&m->lock); |
245 | m->version = file->f_version; | 253 | m->version = file->f_version; |
@@ -342,28 +350,40 @@ int seq_printf(struct seq_file *m, const char *f, ...) | |||
342 | } | 350 | } |
343 | EXPORT_SYMBOL(seq_printf); | 351 | EXPORT_SYMBOL(seq_printf); |
344 | 352 | ||
353 | static char *mangle_path(char *s, char *p, char *esc) | ||
354 | { | ||
355 | while (s <= p) { | ||
356 | char c = *p++; | ||
357 | if (!c) { | ||
358 | return s; | ||
359 | } else if (!strchr(esc, c)) { | ||
360 | *s++ = c; | ||
361 | } else if (s + 4 > p) { | ||
362 | break; | ||
363 | } else { | ||
364 | *s++ = '\\'; | ||
365 | *s++ = '0' + ((c & 0300) >> 6); | ||
366 | *s++ = '0' + ((c & 070) >> 3); | ||
367 | *s++ = '0' + (c & 07); | ||
368 | } | ||
369 | } | ||
370 | return NULL; | ||
371 | } | ||
372 | |||
373 | /* | ||
374 | * return the absolute path of 'dentry' residing in mount 'mnt'. | ||
375 | */ | ||
345 | int seq_path(struct seq_file *m, struct path *path, char *esc) | 376 | int seq_path(struct seq_file *m, struct path *path, char *esc) |
346 | { | 377 | { |
347 | if (m->count < m->size) { | 378 | if (m->count < m->size) { |
348 | char *s = m->buf + m->count; | 379 | char *s = m->buf + m->count; |
349 | char *p = d_path(path, s, m->size - m->count); | 380 | char *p = d_path(path, s, m->size - m->count); |
350 | if (!IS_ERR(p)) { | 381 | if (!IS_ERR(p)) { |
351 | while (s <= p) { | 382 | s = mangle_path(s, p, esc); |
352 | char c = *p++; | 383 | if (s) { |
353 | if (!c) { | 384 | p = m->buf + m->count; |
354 | p = m->buf + m->count; | 385 | m->count = s - m->buf; |
355 | m->count = s - m->buf; | 386 | return s - p; |
356 | return s - p; | ||
357 | } else if (!strchr(esc, c)) { | ||
358 | *s++ = c; | ||
359 | } else if (s + 4 > p) { | ||
360 | break; | ||
361 | } else { | ||
362 | *s++ = '\\'; | ||
363 | *s++ = '0' + ((c & 0300) >> 6); | ||
364 | *s++ = '0' + ((c & 070) >> 3); | ||
365 | *s++ = '0' + (c & 07); | ||
366 | } | ||
367 | } | 387 | } |
368 | } | 388 | } |
369 | } | 389 | } |
@@ -372,6 +392,57 @@ int seq_path(struct seq_file *m, struct path *path, char *esc) | |||
372 | } | 392 | } |
373 | EXPORT_SYMBOL(seq_path); | 393 | EXPORT_SYMBOL(seq_path); |
374 | 394 | ||
395 | /* | ||
396 | * Same as seq_path, but relative to supplied root. | ||
397 | * | ||
398 | * root may be changed, see __d_path(). | ||
399 | */ | ||
400 | int seq_path_root(struct seq_file *m, struct path *path, struct path *root, | ||
401 | char *esc) | ||
402 | { | ||
403 | int err = -ENAMETOOLONG; | ||
404 | if (m->count < m->size) { | ||
405 | char *s = m->buf + m->count; | ||
406 | char *p; | ||
407 | |||
408 | spin_lock(&dcache_lock); | ||
409 | p = __d_path(path, root, s, m->size - m->count); | ||
410 | spin_unlock(&dcache_lock); | ||
411 | err = PTR_ERR(p); | ||
412 | if (!IS_ERR(p)) { | ||
413 | s = mangle_path(s, p, esc); | ||
414 | if (s) { | ||
415 | p = m->buf + m->count; | ||
416 | m->count = s - m->buf; | ||
417 | return 0; | ||
418 | } | ||
419 | } | ||
420 | } | ||
421 | m->count = m->size; | ||
422 | return err; | ||
423 | } | ||
424 | |||
425 | /* | ||
426 | * returns the path of the 'dentry' from the root of its filesystem. | ||
427 | */ | ||
428 | int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc) | ||
429 | { | ||
430 | if (m->count < m->size) { | ||
431 | char *s = m->buf + m->count; | ||
432 | char *p = dentry_path(dentry, s, m->size - m->count); | ||
433 | if (!IS_ERR(p)) { | ||
434 | s = mangle_path(s, p, esc); | ||
435 | if (s) { | ||
436 | p = m->buf + m->count; | ||
437 | m->count = s - m->buf; | ||
438 | return s - p; | ||
439 | } | ||
440 | } | ||
441 | } | ||
442 | m->count = m->size; | ||
443 | return -1; | ||
444 | } | ||
445 | |||
375 | static void *single_start(struct seq_file *p, loff_t *pos) | 446 | static void *single_start(struct seq_file *p, loff_t *pos) |
376 | { | 447 | { |
377 | return NULL + (*pos == 0); | 448 | return NULL + (*pos == 0); |
diff --git a/fs/super.c b/fs/super.c index 09008dbd264e..4798350b2bc9 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -37,7 +37,9 @@ | |||
37 | #include <linux/idr.h> | 37 | #include <linux/idr.h> |
38 | #include <linux/kobject.h> | 38 | #include <linux/kobject.h> |
39 | #include <linux/mutex.h> | 39 | #include <linux/mutex.h> |
40 | #include <linux/file.h> | ||
40 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
42 | #include "internal.h" | ||
41 | 43 | ||
42 | 44 | ||
43 | LIST_HEAD(super_blocks); | 45 | LIST_HEAD(super_blocks); |
@@ -567,10 +569,29 @@ static void mark_files_ro(struct super_block *sb) | |||
567 | { | 569 | { |
568 | struct file *f; | 570 | struct file *f; |
569 | 571 | ||
572 | retry: | ||
570 | file_list_lock(); | 573 | file_list_lock(); |
571 | list_for_each_entry(f, &sb->s_files, f_u.fu_list) { | 574 | list_for_each_entry(f, &sb->s_files, f_u.fu_list) { |
572 | if (S_ISREG(f->f_path.dentry->d_inode->i_mode) && file_count(f)) | 575 | struct vfsmount *mnt; |
573 | f->f_mode &= ~FMODE_WRITE; | 576 | if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) |
577 | continue; | ||
578 | if (!file_count(f)) | ||
579 | continue; | ||
580 | if (!(f->f_mode & FMODE_WRITE)) | ||
581 | continue; | ||
582 | f->f_mode &= ~FMODE_WRITE; | ||
583 | if (file_check_writeable(f) != 0) | ||
584 | continue; | ||
585 | file_release_write(f); | ||
586 | mnt = mntget(f->f_path.mnt); | ||
587 | file_list_unlock(); | ||
588 | /* | ||
589 | * This can sleep, so we can't hold | ||
590 | * the file_list_lock() spinlock. | ||
591 | */ | ||
592 | mnt_drop_write(mnt); | ||
593 | mntput(mnt); | ||
594 | goto retry; | ||
574 | } | 595 | } |
575 | file_list_unlock(); | 596 | file_list_unlock(); |
576 | } | 597 | } |
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 4948d9bc405d..a1c3a1fab7f0 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/idr.h> | 20 | #include <linux/idr.h> |
21 | #include <linux/completion.h> | 21 | #include <linux/completion.h> |
22 | #include <linux/mutex.h> | 22 | #include <linux/mutex.h> |
23 | #include <linux/slab.h> | ||
23 | #include "sysfs.h" | 24 | #include "sysfs.h" |
24 | 25 | ||
25 | DEFINE_MUTEX(sysfs_mutex); | 26 | DEFINE_MUTEX(sysfs_mutex); |
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index baa663e69388..ade9a7e6a757 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
14 | #include <linux/kobject.h> | 14 | #include <linux/kobject.h> |
15 | #include <linux/kallsyms.h> | 15 | #include <linux/kallsyms.h> |
16 | #include <linux/slab.h> | ||
16 | #include <linux/namei.h> | 17 | #include <linux/namei.h> |
17 | #include <linux/poll.h> | 18 | #include <linux/poll.h> |
18 | #include <linux/list.h> | 19 | #include <linux/list.h> |
@@ -128,7 +129,7 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos) | |||
128 | ssize_t retval = 0; | 129 | ssize_t retval = 0; |
129 | 130 | ||
130 | mutex_lock(&buffer->mutex); | 131 | mutex_lock(&buffer->mutex); |
131 | if (buffer->needs_read_fill) { | 132 | if (buffer->needs_read_fill || *ppos == 0) { |
132 | retval = fill_read_buffer(file->f_path.dentry,buffer); | 133 | retval = fill_read_buffer(file->f_path.dentry,buffer); |
133 | if (retval) | 134 | if (retval) |
134 | goto out; | 135 | goto out; |
@@ -409,8 +410,7 @@ static int sysfs_release(struct inode *inode, struct file *filp) | |||
409 | * return POLLERR|POLLPRI, and select will return the fd whether | 410 | * return POLLERR|POLLPRI, and select will return the fd whether |
410 | * it is waiting for read, write, or exceptions. | 411 | * it is waiting for read, write, or exceptions. |
411 | * Once poll/select indicates that the value has changed, you | 412 | * Once poll/select indicates that the value has changed, you |
412 | * need to close and re-open the file, as simply seeking and reading | 413 | * need to close and re-open the file, or seek to 0 and read again. |
413 | * again will not get new data, or reset the state of 'poll'. | ||
414 | * Reminder: this only works for attributes which actively support | 414 | * Reminder: this only works for attributes which actively support |
415 | * it, and it is not possible to test an attribute from userspace | 415 | * it, and it is not possible to test an attribute from userspace |
416 | * to see if it supports poll (Neither 'poll' nor 'select' return | 416 | * to see if it supports poll (Neither 'poll' nor 'select' return |
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index 5f66c4466151..817f5966edca 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c | |||
@@ -87,7 +87,14 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char | |||
87 | 87 | ||
88 | void sysfs_remove_link(struct kobject * kobj, const char * name) | 88 | void sysfs_remove_link(struct kobject * kobj, const char * name) |
89 | { | 89 | { |
90 | sysfs_hash_and_remove(kobj->sd, name); | 90 | struct sysfs_dirent *parent_sd = NULL; |
91 | |||
92 | if (!kobj) | ||
93 | parent_sd = &sysfs_root; | ||
94 | else | ||
95 | parent_sd = kobj->sd; | ||
96 | |||
97 | sysfs_hash_and_remove(parent_sd, name); | ||
91 | } | 98 | } |
92 | 99 | ||
93 | static int sysfs_get_target_path(struct sysfs_dirent *parent_sd, | 100 | static int sysfs_get_target_path(struct sysfs_dirent *parent_sd, |
diff --git a/fs/udf/Makefile b/fs/udf/Makefile index be845e7540ef..0d4503f7446d 100644 --- a/fs/udf/Makefile +++ b/fs/udf/Makefile | |||
@@ -6,4 +6,4 @@ obj-$(CONFIG_UDF_FS) += udf.o | |||
6 | 6 | ||
7 | udf-objs := balloc.o dir.o file.o ialloc.o inode.o lowlevel.o namei.o \ | 7 | udf-objs := balloc.o dir.o file.o ialloc.o inode.o lowlevel.o namei.o \ |
8 | partition.o super.o truncate.o symlink.o fsync.o \ | 8 | partition.o super.o truncate.o symlink.o fsync.o \ |
9 | crc.o directory.o misc.o udftime.o unicode.o | 9 | directory.o misc.o udftime.o unicode.o |
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c index f855dcbbdfb8..1b809bd494bd 100644 --- a/fs/udf/balloc.c +++ b/fs/udf/balloc.c | |||
@@ -149,8 +149,7 @@ static bool udf_add_free_space(struct udf_sb_info *sbi, | |||
149 | return false; | 149 | return false; |
150 | 150 | ||
151 | lvid = (struct logicalVolIntegrityDesc *)sbi->s_lvid_bh->b_data; | 151 | lvid = (struct logicalVolIntegrityDesc *)sbi->s_lvid_bh->b_data; |
152 | lvid->freeSpaceTable[partition] = cpu_to_le32(le32_to_cpu( | 152 | le32_add_cpu(&lvid->freeSpaceTable[partition], cnt); |
153 | lvid->freeSpaceTable[partition]) + cnt); | ||
154 | return true; | 153 | return true; |
155 | } | 154 | } |
156 | 155 | ||
@@ -589,10 +588,8 @@ static void udf_table_free_blocks(struct super_block *sb, | |||
589 | sptr = oepos.bh->b_data + epos.offset; | 588 | sptr = oepos.bh->b_data + epos.offset; |
590 | aed = (struct allocExtDesc *) | 589 | aed = (struct allocExtDesc *) |
591 | oepos.bh->b_data; | 590 | oepos.bh->b_data; |
592 | aed->lengthAllocDescs = | 591 | le32_add_cpu(&aed->lengthAllocDescs, |
593 | cpu_to_le32(le32_to_cpu( | 592 | adsize); |
594 | aed->lengthAllocDescs) + | ||
595 | adsize); | ||
596 | } else { | 593 | } else { |
597 | sptr = iinfo->i_ext.i_data + | 594 | sptr = iinfo->i_ext.i_data + |
598 | epos.offset; | 595 | epos.offset; |
@@ -645,9 +642,7 @@ static void udf_table_free_blocks(struct super_block *sb, | |||
645 | mark_inode_dirty(table); | 642 | mark_inode_dirty(table); |
646 | } else { | 643 | } else { |
647 | aed = (struct allocExtDesc *)epos.bh->b_data; | 644 | aed = (struct allocExtDesc *)epos.bh->b_data; |
648 | aed->lengthAllocDescs = | 645 | le32_add_cpu(&aed->lengthAllocDescs, adsize); |
649 | cpu_to_le32(le32_to_cpu( | ||
650 | aed->lengthAllocDescs) + adsize); | ||
651 | udf_update_tag(epos.bh->b_data, epos.offset); | 646 | udf_update_tag(epos.bh->b_data, epos.offset); |
652 | mark_buffer_dirty(epos.bh); | 647 | mark_buffer_dirty(epos.bh); |
653 | } | 648 | } |
diff --git a/fs/udf/crc.c b/fs/udf/crc.c deleted file mode 100644 index b1661296e786..000000000000 --- a/fs/udf/crc.c +++ /dev/null | |||
@@ -1,172 +0,0 @@ | |||
1 | /* | ||
2 | * crc.c | ||
3 | * | ||
4 | * PURPOSE | ||
5 | * Routines to generate, calculate, and test a 16-bit CRC. | ||
6 | * | ||
7 | * DESCRIPTION | ||
8 | * The CRC code was devised by Don P. Mitchell of AT&T Bell Laboratories | ||
9 | * and Ned W. Rhodes of Software Systems Group. It has been published in | ||
10 | * "Design and Validation of Computer Protocols", Prentice Hall, | ||
11 | * Englewood Cliffs, NJ, 1991, Chapter 3, ISBN 0-13-539925-4. | ||
12 | * | ||
13 | * Copyright is held by AT&T. | ||
14 | * | ||
15 | * AT&T gives permission for the free use of the CRC source code. | ||
16 | * | ||
17 | * COPYRIGHT | ||
18 | * This file is distributed under the terms of the GNU General Public | ||
19 | * License (GPL). Copies of the GPL can be obtained from: | ||
20 | * ftp://prep.ai.mit.edu/pub/gnu/GPL | ||
21 | * Each contributing author retains all rights to their own work. | ||
22 | */ | ||
23 | |||
24 | #include "udfdecl.h" | ||
25 | |||
26 | static uint16_t crc_table[256] = { | ||
27 | 0x0000U, 0x1021U, 0x2042U, 0x3063U, 0x4084U, 0x50a5U, 0x60c6U, 0x70e7U, | ||
28 | 0x8108U, 0x9129U, 0xa14aU, 0xb16bU, 0xc18cU, 0xd1adU, 0xe1ceU, 0xf1efU, | ||
29 | 0x1231U, 0x0210U, 0x3273U, 0x2252U, 0x52b5U, 0x4294U, 0x72f7U, 0x62d6U, | ||
30 | 0x9339U, 0x8318U, 0xb37bU, 0xa35aU, 0xd3bdU, 0xc39cU, 0xf3ffU, 0xe3deU, | ||
31 | 0x2462U, 0x3443U, 0x0420U, 0x1401U, 0x64e6U, 0x74c7U, 0x44a4U, 0x5485U, | ||
32 | 0xa56aU, 0xb54bU, 0x8528U, 0x9509U, 0xe5eeU, 0xf5cfU, 0xc5acU, 0xd58dU, | ||
33 | 0x3653U, 0x2672U, 0x1611U, 0x0630U, 0x76d7U, 0x66f6U, 0x5695U, 0x46b4U, | ||
34 | 0xb75bU, 0xa77aU, 0x9719U, 0x8738U, 0xf7dfU, 0xe7feU, 0xd79dU, 0xc7bcU, | ||
35 | 0x48c4U, 0x58e5U, 0x6886U, 0x78a7U, 0x0840U, 0x1861U, 0x2802U, 0x3823U, | ||
36 | 0xc9ccU, 0xd9edU, 0xe98eU, 0xf9afU, 0x8948U, 0x9969U, 0xa90aU, 0xb92bU, | ||
37 | 0x5af5U, 0x4ad4U, 0x7ab7U, 0x6a96U, 0x1a71U, 0x0a50U, 0x3a33U, 0x2a12U, | ||
38 | 0xdbfdU, 0xcbdcU, 0xfbbfU, 0xeb9eU, 0x9b79U, 0x8b58U, 0xbb3bU, 0xab1aU, | ||
39 | 0x6ca6U, 0x7c87U, 0x4ce4U, 0x5cc5U, 0x2c22U, 0x3c03U, 0x0c60U, 0x1c41U, | ||
40 | 0xedaeU, 0xfd8fU, 0xcdecU, 0xddcdU, 0xad2aU, 0xbd0bU, 0x8d68U, 0x9d49U, | ||
41 | 0x7e97U, 0x6eb6U, 0x5ed5U, 0x4ef4U, 0x3e13U, 0x2e32U, 0x1e51U, 0x0e70U, | ||
42 | 0xff9fU, 0xefbeU, 0xdfddU, 0xcffcU, 0xbf1bU, 0xaf3aU, 0x9f59U, 0x8f78U, | ||
43 | 0x9188U, 0x81a9U, 0xb1caU, 0xa1ebU, 0xd10cU, 0xc12dU, 0xf14eU, 0xe16fU, | ||
44 | 0x1080U, 0x00a1U, 0x30c2U, 0x20e3U, 0x5004U, 0x4025U, 0x7046U, 0x6067U, | ||
45 | 0x83b9U, 0x9398U, 0xa3fbU, 0xb3daU, 0xc33dU, 0xd31cU, 0xe37fU, 0xf35eU, | ||
46 | 0x02b1U, 0x1290U, 0x22f3U, 0x32d2U, 0x4235U, 0x5214U, 0x6277U, 0x7256U, | ||
47 | 0xb5eaU, 0xa5cbU, 0x95a8U, 0x8589U, 0xf56eU, 0xe54fU, 0xd52cU, 0xc50dU, | ||
48 | 0x34e2U, 0x24c3U, 0x14a0U, 0x0481U, 0x7466U, 0x6447U, 0x5424U, 0x4405U, | ||
49 | 0xa7dbU, 0xb7faU, 0x8799U, 0x97b8U, 0xe75fU, 0xf77eU, 0xc71dU, 0xd73cU, | ||
50 | 0x26d3U, 0x36f2U, 0x0691U, 0x16b0U, 0x6657U, 0x7676U, 0x4615U, 0x5634U, | ||
51 | 0xd94cU, 0xc96dU, 0xf90eU, 0xe92fU, 0x99c8U, 0x89e9U, 0xb98aU, 0xa9abU, | ||
52 | 0x5844U, 0x4865U, 0x7806U, 0x6827U, 0x18c0U, 0x08e1U, 0x3882U, 0x28a3U, | ||
53 | 0xcb7dU, 0xdb5cU, 0xeb3fU, 0xfb1eU, 0x8bf9U, 0x9bd8U, 0xabbbU, 0xbb9aU, | ||
54 | 0x4a75U, 0x5a54U, 0x6a37U, 0x7a16U, 0x0af1U, 0x1ad0U, 0x2ab3U, 0x3a92U, | ||
55 | 0xfd2eU, 0xed0fU, 0xdd6cU, 0xcd4dU, 0xbdaaU, 0xad8bU, 0x9de8U, 0x8dc9U, | ||
56 | 0x7c26U, 0x6c07U, 0x5c64U, 0x4c45U, 0x3ca2U, 0x2c83U, 0x1ce0U, 0x0cc1U, | ||
57 | 0xef1fU, 0xff3eU, 0xcf5dU, 0xdf7cU, 0xaf9bU, 0xbfbaU, 0x8fd9U, 0x9ff8U, | ||
58 | 0x6e17U, 0x7e36U, 0x4e55U, 0x5e74U, 0x2e93U, 0x3eb2U, 0x0ed1U, 0x1ef0U | ||
59 | }; | ||
60 | |||
61 | /* | ||
62 | * udf_crc | ||
63 | * | ||
64 | * PURPOSE | ||
65 | * Calculate a 16-bit CRC checksum using ITU-T V.41 polynomial. | ||
66 | * | ||
67 | * DESCRIPTION | ||
68 | * The OSTA-UDF(tm) 1.50 standard states that using CRCs is mandatory. | ||
69 | * The polynomial used is: x^16 + x^12 + x^15 + 1 | ||
70 | * | ||
71 | * PRE-CONDITIONS | ||
72 | * data Pointer to the data block. | ||
73 | * size Size of the data block. | ||
74 | * | ||
75 | * POST-CONDITIONS | ||
76 | * <return> CRC of the data block. | ||
77 | * | ||
78 | * HISTORY | ||
79 | * July 21, 1997 - Andrew E. Mileski | ||
80 | * Adapted from OSTA-UDF(tm) 1.50 standard. | ||
81 | */ | ||
82 | uint16_t udf_crc(uint8_t *data, uint32_t size, uint16_t crc) | ||
83 | { | ||
84 | while (size--) | ||
85 | crc = crc_table[(crc >> 8 ^ *(data++)) & 0xffU] ^ (crc << 8); | ||
86 | |||
87 | return crc; | ||
88 | } | ||
89 | |||
90 | /****************************************************************************/ | ||
91 | #if defined(TEST) | ||
92 | |||
93 | /* | ||
94 | * PURPOSE | ||
95 | * Test udf_crc() | ||
96 | * | ||
97 | * HISTORY | ||
98 | * July 21, 1997 - Andrew E. Mileski | ||
99 | * Adapted from OSTA-UDF(tm) 1.50 standard. | ||
100 | */ | ||
101 | |||
102 | unsigned char bytes[] = { 0x70U, 0x6AU, 0x77U }; | ||
103 | |||
104 | int main(void) | ||
105 | { | ||
106 | unsigned short x; | ||
107 | |||
108 | x = udf_crc(bytes, sizeof bytes); | ||
109 | printf("udf_crc: calculated = %4.4x, correct = %4.4x\n", x, 0x3299U); | ||
110 | |||
111 | return 0; | ||
112 | } | ||
113 | |||
114 | #endif /* defined(TEST) */ | ||
115 | |||
116 | /****************************************************************************/ | ||
117 | #if defined(GENERATE) | ||
118 | |||
119 | /* | ||
120 | * PURPOSE | ||
121 | * Generate a table for fast 16-bit CRC calculations (any polynomial). | ||
122 | * | ||
123 | * DESCRIPTION | ||
124 | * The ITU-T V.41 polynomial is 010041. | ||
125 | * | ||
126 | * HISTORY | ||
127 | * July 21, 1997 - Andrew E. Mileski | ||
128 | * Adapted from OSTA-UDF(tm) 1.50 standard. | ||
129 | */ | ||
130 | |||
131 | #include <stdio.h> | ||
132 | |||
133 | int main(int argc, char **argv) | ||
134 | { | ||
135 | unsigned long crc, poly; | ||
136 | int n, i; | ||
137 | |||
138 | /* Get the polynomial */ | ||
139 | sscanf(argv[1], "%lo", &poly); | ||
140 | if (poly & 0xffff0000U) { | ||
141 | fprintf(stderr, "polynomial is too large\en"); | ||
142 | exit(1); | ||
143 | } | ||
144 | |||
145 | printf("/* CRC 0%o */\n", poly); | ||
146 | |||
147 | /* Create a table */ | ||
148 | printf("static unsigned short crc_table[256] = {\n"); | ||
149 | for (n = 0; n < 256; n++) { | ||
150 | if (n % 8 == 0) | ||
151 | printf("\t"); | ||
152 | crc = n << 8; | ||
153 | for (i = 0; i < 8; i++) { | ||
154 | if (crc & 0x8000U) | ||
155 | crc = (crc << 1) ^ poly; | ||
156 | else | ||
157 | crc <<= 1; | ||
158 | crc &= 0xFFFFU; | ||
159 | } | ||
160 | if (n == 255) | ||
161 | printf("0x%04xU ", crc); | ||
162 | else | ||
163 | printf("0x%04xU, ", crc); | ||
164 | if (n % 8 == 7) | ||
165 | printf("\n"); | ||
166 | } | ||
167 | printf("};\n"); | ||
168 | |||
169 | return 0; | ||
170 | } | ||
171 | |||
172 | #endif /* defined(GENERATE) */ | ||
diff --git a/fs/udf/dir.c b/fs/udf/dir.c index 8d8643ada199..62dc270c69d1 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c | |||
@@ -39,13 +39,13 @@ | |||
39 | static int do_udf_readdir(struct inode *dir, struct file *filp, | 39 | static int do_udf_readdir(struct inode *dir, struct file *filp, |
40 | filldir_t filldir, void *dirent) | 40 | filldir_t filldir, void *dirent) |
41 | { | 41 | { |
42 | struct udf_fileident_bh fibh; | 42 | struct udf_fileident_bh fibh = { .sbh = NULL, .ebh = NULL}; |
43 | struct fileIdentDesc *fi = NULL; | 43 | struct fileIdentDesc *fi = NULL; |
44 | struct fileIdentDesc cfi; | 44 | struct fileIdentDesc cfi; |
45 | int block, iblock; | 45 | int block, iblock; |
46 | loff_t nf_pos = (filp->f_pos - 1) << 2; | 46 | loff_t nf_pos = (filp->f_pos - 1) << 2; |
47 | int flen; | 47 | int flen; |
48 | char fname[UDF_NAME_LEN]; | 48 | char *fname = NULL; |
49 | char *nameptr; | 49 | char *nameptr; |
50 | uint16_t liu; | 50 | uint16_t liu; |
51 | uint8_t lfi; | 51 | uint8_t lfi; |
@@ -54,23 +54,32 @@ static int do_udf_readdir(struct inode *dir, struct file *filp, | |||
54 | kernel_lb_addr eloc; | 54 | kernel_lb_addr eloc; |
55 | uint32_t elen; | 55 | uint32_t elen; |
56 | sector_t offset; | 56 | sector_t offset; |
57 | int i, num; | 57 | int i, num, ret = 0; |
58 | unsigned int dt_type; | 58 | unsigned int dt_type; |
59 | struct extent_position epos = { NULL, 0, {0, 0} }; | 59 | struct extent_position epos = { NULL, 0, {0, 0} }; |
60 | struct udf_inode_info *iinfo; | 60 | struct udf_inode_info *iinfo; |
61 | 61 | ||
62 | if (nf_pos >= size) | 62 | if (nf_pos >= size) |
63 | return 0; | 63 | goto out; |
64 | |||
65 | fname = kmalloc(UDF_NAME_LEN, GFP_NOFS); | ||
66 | if (!fname) { | ||
67 | ret = -ENOMEM; | ||
68 | goto out; | ||
69 | } | ||
64 | 70 | ||
65 | if (nf_pos == 0) | 71 | if (nf_pos == 0) |
66 | nf_pos = udf_ext0_offset(dir); | 72 | nf_pos = udf_ext0_offset(dir); |
67 | 73 | ||
68 | fibh.soffset = fibh.eoffset = nf_pos & (dir->i_sb->s_blocksize - 1); | 74 | fibh.soffset = fibh.eoffset = nf_pos & (dir->i_sb->s_blocksize - 1); |
69 | iinfo = UDF_I(dir); | 75 | iinfo = UDF_I(dir); |
70 | if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { | 76 | if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { |
71 | fibh.sbh = fibh.ebh = NULL; | 77 | if (inode_bmap(dir, nf_pos >> dir->i_sb->s_blocksize_bits, |
72 | } else if (inode_bmap(dir, nf_pos >> dir->i_sb->s_blocksize_bits, | 78 | &epos, &eloc, &elen, &offset) |
73 | &epos, &eloc, &elen, &offset) == (EXT_RECORDED_ALLOCATED >> 30)) { | 79 | != (EXT_RECORDED_ALLOCATED >> 30)) { |
80 | ret = -ENOENT; | ||
81 | goto out; | ||
82 | } | ||
74 | block = udf_get_lb_pblock(dir->i_sb, eloc, offset); | 83 | block = udf_get_lb_pblock(dir->i_sb, eloc, offset); |
75 | if ((++offset << dir->i_sb->s_blocksize_bits) < elen) { | 84 | if ((++offset << dir->i_sb->s_blocksize_bits) < elen) { |
76 | if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) | 85 | if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) |
@@ -83,8 +92,8 @@ static int do_udf_readdir(struct inode *dir, struct file *filp, | |||
83 | } | 92 | } |
84 | 93 | ||
85 | if (!(fibh.sbh = fibh.ebh = udf_tread(dir->i_sb, block))) { | 94 | if (!(fibh.sbh = fibh.ebh = udf_tread(dir->i_sb, block))) { |
86 | brelse(epos.bh); | 95 | ret = -EIO; |
87 | return -EIO; | 96 | goto out; |
88 | } | 97 | } |
89 | 98 | ||
90 | if (!(offset & ((16 >> (dir->i_sb->s_blocksize_bits - 9)) - 1))) { | 99 | if (!(offset & ((16 >> (dir->i_sb->s_blocksize_bits - 9)) - 1))) { |
@@ -105,9 +114,6 @@ static int do_udf_readdir(struct inode *dir, struct file *filp, | |||
105 | brelse(bha[i]); | 114 | brelse(bha[i]); |
106 | } | 115 | } |
107 | } | 116 | } |
108 | } else { | ||
109 | brelse(epos.bh); | ||
110 | return -ENOENT; | ||
111 | } | 117 | } |
112 | 118 | ||
113 | while (nf_pos < size) { | 119 | while (nf_pos < size) { |
@@ -115,13 +121,8 @@ static int do_udf_readdir(struct inode *dir, struct file *filp, | |||
115 | 121 | ||
116 | fi = udf_fileident_read(dir, &nf_pos, &fibh, &cfi, &epos, &eloc, | 122 | fi = udf_fileident_read(dir, &nf_pos, &fibh, &cfi, &epos, &eloc, |
117 | &elen, &offset); | 123 | &elen, &offset); |
118 | if (!fi) { | 124 | if (!fi) |
119 | if (fibh.sbh != fibh.ebh) | 125 | goto out; |
120 | brelse(fibh.ebh); | ||
121 | brelse(fibh.sbh); | ||
122 | brelse(epos.bh); | ||
123 | return 0; | ||
124 | } | ||
125 | 126 | ||
126 | liu = le16_to_cpu(cfi.lengthOfImpUse); | 127 | liu = le16_to_cpu(cfi.lengthOfImpUse); |
127 | lfi = cfi.lengthFileIdent; | 128 | lfi = cfi.lengthFileIdent; |
@@ -167,53 +168,23 @@ static int do_udf_readdir(struct inode *dir, struct file *filp, | |||
167 | dt_type = DT_UNKNOWN; | 168 | dt_type = DT_UNKNOWN; |
168 | } | 169 | } |
169 | 170 | ||
170 | if (flen) { | 171 | if (flen && filldir(dirent, fname, flen, filp->f_pos, |
171 | if (filldir(dirent, fname, flen, filp->f_pos, iblock, dt_type) < 0) { | 172 | iblock, dt_type) < 0) |
172 | if (fibh.sbh != fibh.ebh) | 173 | goto out; |
173 | brelse(fibh.ebh); | ||
174 | brelse(fibh.sbh); | ||
175 | brelse(epos.bh); | ||
176 | return 0; | ||
177 | } | ||
178 | } | ||
179 | } /* end while */ | 174 | } /* end while */ |
180 | 175 | ||
181 | filp->f_pos = (nf_pos >> 2) + 1; | 176 | filp->f_pos = (nf_pos >> 2) + 1; |
182 | 177 | ||
178 | out: | ||
183 | if (fibh.sbh != fibh.ebh) | 179 | if (fibh.sbh != fibh.ebh) |
184 | brelse(fibh.ebh); | 180 | brelse(fibh.ebh); |
185 | brelse(fibh.sbh); | 181 | brelse(fibh.sbh); |
186 | brelse(epos.bh); | 182 | brelse(epos.bh); |
183 | kfree(fname); | ||
187 | 184 | ||
188 | return 0; | 185 | return ret; |
189 | } | 186 | } |
190 | 187 | ||
191 | /* | ||
192 | * udf_readdir | ||
193 | * | ||
194 | * PURPOSE | ||
195 | * Read a directory entry. | ||
196 | * | ||
197 | * DESCRIPTION | ||
198 | * Optional - sys_getdents() will return -ENOTDIR if this routine is not | ||
199 | * available. | ||
200 | * | ||
201 | * Refer to sys_getdents() in fs/readdir.c | ||
202 | * sys_getdents() -> . | ||
203 | * | ||
204 | * PRE-CONDITIONS | ||
205 | * filp Pointer to directory file. | ||
206 | * buf Pointer to directory entry buffer. | ||
207 | * filldir Pointer to filldir function. | ||
208 | * | ||
209 | * POST-CONDITIONS | ||
210 | * <return> >=0 on success. | ||
211 | * | ||
212 | * HISTORY | ||
213 | * July 1, 1997 - Andrew E. Mileski | ||
214 | * Written, tested, and released. | ||
215 | */ | ||
216 | |||
217 | static int udf_readdir(struct file *filp, void *dirent, filldir_t filldir) | 188 | static int udf_readdir(struct file *filp, void *dirent, filldir_t filldir) |
218 | { | 189 | { |
219 | struct inode *dir = filp->f_path.dentry->d_inode; | 190 | struct inode *dir = filp->f_path.dentry->d_inode; |
diff --git a/fs/udf/ecma_167.h b/fs/udf/ecma_167.h index 56387711589b..a0974df82b31 100644 --- a/fs/udf/ecma_167.h +++ b/fs/udf/ecma_167.h | |||
@@ -70,19 +70,6 @@ typedef struct { | |||
70 | uint8_t microseconds; | 70 | uint8_t microseconds; |
71 | } __attribute__ ((packed)) timestamp; | 71 | } __attribute__ ((packed)) timestamp; |
72 | 72 | ||
73 | typedef struct { | ||
74 | uint16_t typeAndTimezone; | ||
75 | int16_t year; | ||
76 | uint8_t month; | ||
77 | uint8_t day; | ||
78 | uint8_t hour; | ||
79 | uint8_t minute; | ||
80 | uint8_t second; | ||
81 | uint8_t centiseconds; | ||
82 | uint8_t hundredsOfMicroseconds; | ||
83 | uint8_t microseconds; | ||
84 | } __attribute__ ((packed)) kernel_timestamp; | ||
85 | |||
86 | /* Type and Time Zone (ECMA 167r3 1/7.3.1) */ | 73 | /* Type and Time Zone (ECMA 167r3 1/7.3.1) */ |
87 | #define TIMESTAMP_TYPE_MASK 0xF000 | 74 | #define TIMESTAMP_TYPE_MASK 0xF000 |
88 | #define TIMESTAMP_TYPE_CUT 0x0000 | 75 | #define TIMESTAMP_TYPE_CUT 0x0000 |
diff --git a/fs/udf/file.c b/fs/udf/file.c index 97c71ae7c689..0ed6e146a0d9 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c | |||
@@ -27,7 +27,6 @@ | |||
27 | 27 | ||
28 | #include "udfdecl.h" | 28 | #include "udfdecl.h" |
29 | #include <linux/fs.h> | 29 | #include <linux/fs.h> |
30 | #include <linux/udf_fs.h> | ||
31 | #include <asm/uaccess.h> | 30 | #include <asm/uaccess.h> |
32 | #include <linux/kernel.h> | 31 | #include <linux/kernel.h> |
33 | #include <linux/string.h> /* memset */ | 32 | #include <linux/string.h> /* memset */ |
@@ -144,40 +143,6 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
144 | return retval; | 143 | return retval; |
145 | } | 144 | } |
146 | 145 | ||
147 | /* | ||
148 | * udf_ioctl | ||
149 | * | ||
150 | * PURPOSE | ||
151 | * Issue an ioctl. | ||
152 | * | ||
153 | * DESCRIPTION | ||
154 | * Optional - sys_ioctl() will return -ENOTTY if this routine is not | ||
155 | * available, and the ioctl cannot be handled without filesystem help. | ||
156 | * | ||
157 | * sys_ioctl() handles these ioctls that apply only to regular files: | ||
158 | * FIBMAP [requires udf_block_map()], FIGETBSZ, FIONREAD | ||
159 | * These ioctls are also handled by sys_ioctl(): | ||
160 | * FIOCLEX, FIONCLEX, FIONBIO, FIOASYNC | ||
161 | * All other ioctls are passed to the filesystem. | ||
162 | * | ||
163 | * Refer to sys_ioctl() in fs/ioctl.c | ||
164 | * sys_ioctl() -> . | ||
165 | * | ||
166 | * PRE-CONDITIONS | ||
167 | * inode Pointer to inode that ioctl was issued on. | ||
168 | * filp Pointer to file that ioctl was issued on. | ||
169 | * cmd The ioctl command. | ||
170 | * arg The ioctl argument [can be interpreted as a | ||
171 | * user-space pointer if desired]. | ||
172 | * | ||
173 | * POST-CONDITIONS | ||
174 | * <return> Success (>=0) or an error code (<=0) that | ||
175 | * sys_ioctl() will return. | ||
176 | * | ||
177 | * HISTORY | ||
178 | * July 1, 1997 - Andrew E. Mileski | ||
179 | * Written, tested, and released. | ||
180 | */ | ||
181 | int udf_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, | 146 | int udf_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, |
182 | unsigned long arg) | 147 | unsigned long arg) |
183 | { | 148 | { |
@@ -225,18 +190,6 @@ int udf_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, | |||
225 | return result; | 190 | return result; |
226 | } | 191 | } |
227 | 192 | ||
228 | /* | ||
229 | * udf_release_file | ||
230 | * | ||
231 | * PURPOSE | ||
232 | * Called when all references to the file are closed | ||
233 | * | ||
234 | * DESCRIPTION | ||
235 | * Discard prealloced blocks | ||
236 | * | ||
237 | * HISTORY | ||
238 | * | ||
239 | */ | ||
240 | static int udf_release_file(struct inode *inode, struct file *filp) | 193 | static int udf_release_file(struct inode *inode, struct file *filp) |
241 | { | 194 | { |
242 | if (filp->f_mode & FMODE_WRITE) { | 195 | if (filp->f_mode & FMODE_WRITE) { |
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index 84360315aca2..eb9cfa23dc3d 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c | |||
@@ -21,7 +21,6 @@ | |||
21 | #include "udfdecl.h" | 21 | #include "udfdecl.h" |
22 | #include <linux/fs.h> | 22 | #include <linux/fs.h> |
23 | #include <linux/quotaops.h> | 23 | #include <linux/quotaops.h> |
24 | #include <linux/udf_fs.h> | ||
25 | #include <linux/sched.h> | 24 | #include <linux/sched.h> |
26 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
27 | 26 | ||
@@ -47,11 +46,9 @@ void udf_free_inode(struct inode *inode) | |||
47 | struct logicalVolIntegrityDescImpUse *lvidiu = | 46 | struct logicalVolIntegrityDescImpUse *lvidiu = |
48 | udf_sb_lvidiu(sbi); | 47 | udf_sb_lvidiu(sbi); |
49 | if (S_ISDIR(inode->i_mode)) | 48 | if (S_ISDIR(inode->i_mode)) |
50 | lvidiu->numDirs = | 49 | le32_add_cpu(&lvidiu->numDirs, -1); |
51 | cpu_to_le32(le32_to_cpu(lvidiu->numDirs) - 1); | ||
52 | else | 50 | else |
53 | lvidiu->numFiles = | 51 | le32_add_cpu(&lvidiu->numFiles, -1); |
54 | cpu_to_le32(le32_to_cpu(lvidiu->numFiles) - 1); | ||
55 | 52 | ||
56 | mark_buffer_dirty(sbi->s_lvid_bh); | 53 | mark_buffer_dirty(sbi->s_lvid_bh); |
57 | } | 54 | } |
@@ -105,11 +102,9 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) | |||
105 | lvhd = (struct logicalVolHeaderDesc *) | 102 | lvhd = (struct logicalVolHeaderDesc *) |
106 | (lvid->logicalVolContentsUse); | 103 | (lvid->logicalVolContentsUse); |
107 | if (S_ISDIR(mode)) | 104 | if (S_ISDIR(mode)) |
108 | lvidiu->numDirs = | 105 | le32_add_cpu(&lvidiu->numDirs, 1); |
109 | cpu_to_le32(le32_to_cpu(lvidiu->numDirs) + 1); | ||
110 | else | 106 | else |
111 | lvidiu->numFiles = | 107 | le32_add_cpu(&lvidiu->numFiles, 1); |
112 | cpu_to_le32(le32_to_cpu(lvidiu->numFiles) + 1); | ||
113 | iinfo->i_unique = uniqueID = le64_to_cpu(lvhd->uniqueID); | 108 | iinfo->i_unique = uniqueID = le64_to_cpu(lvhd->uniqueID); |
114 | if (!(++uniqueID & 0x00000000FFFFFFFFUL)) | 109 | if (!(++uniqueID & 0x00000000FFFFFFFFUL)) |
115 | uniqueID += 16; | 110 | uniqueID += 16; |
diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 24cfa55d0fdc..6e74b117aaf0 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <linux/buffer_head.h> | 37 | #include <linux/buffer_head.h> |
38 | #include <linux/writeback.h> | 38 | #include <linux/writeback.h> |
39 | #include <linux/slab.h> | 39 | #include <linux/slab.h> |
40 | #include <linux/crc-itu-t.h> | ||
40 | 41 | ||
41 | #include "udf_i.h" | 42 | #include "udf_i.h" |
42 | #include "udf_sb.h" | 43 | #include "udf_sb.h" |
@@ -66,22 +67,7 @@ static void udf_update_extents(struct inode *, | |||
66 | struct extent_position *); | 67 | struct extent_position *); |
67 | static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int); | 68 | static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int); |
68 | 69 | ||
69 | /* | 70 | |
70 | * udf_delete_inode | ||
71 | * | ||
72 | * PURPOSE | ||
73 | * Clean-up before the specified inode is destroyed. | ||
74 | * | ||
75 | * DESCRIPTION | ||
76 | * This routine is called when the kernel destroys an inode structure | ||
77 | * ie. when iput() finds i_count == 0. | ||
78 | * | ||
79 | * HISTORY | ||
80 | * July 1, 1997 - Andrew E. Mileski | ||
81 | * Written, tested, and released. | ||
82 | * | ||
83 | * Called at the last iput() if i_nlink is zero. | ||
84 | */ | ||
85 | void udf_delete_inode(struct inode *inode) | 71 | void udf_delete_inode(struct inode *inode) |
86 | { | 72 | { |
87 | truncate_inode_pages(&inode->i_data, 0); | 73 | truncate_inode_pages(&inode->i_data, 0); |
@@ -323,9 +309,6 @@ static int udf_get_block(struct inode *inode, sector_t block, | |||
323 | 309 | ||
324 | lock_kernel(); | 310 | lock_kernel(); |
325 | 311 | ||
326 | if (block < 0) | ||
327 | goto abort_negative; | ||
328 | |||
329 | iinfo = UDF_I(inode); | 312 | iinfo = UDF_I(inode); |
330 | if (block == iinfo->i_next_alloc_block + 1) { | 313 | if (block == iinfo->i_next_alloc_block + 1) { |
331 | iinfo->i_next_alloc_block++; | 314 | iinfo->i_next_alloc_block++; |
@@ -347,10 +330,6 @@ static int udf_get_block(struct inode *inode, sector_t block, | |||
347 | abort: | 330 | abort: |
348 | unlock_kernel(); | 331 | unlock_kernel(); |
349 | return err; | 332 | return err; |
350 | |||
351 | abort_negative: | ||
352 | udf_warning(inode->i_sb, "udf_get_block", "block < 0"); | ||
353 | goto abort; | ||
354 | } | 333 | } |
355 | 334 | ||
356 | static struct buffer_head *udf_getblk(struct inode *inode, long block, | 335 | static struct buffer_head *udf_getblk(struct inode *inode, long block, |
@@ -1116,42 +1095,36 @@ static void __udf_read_inode(struct inode *inode) | |||
1116 | fe = (struct fileEntry *)bh->b_data; | 1095 | fe = (struct fileEntry *)bh->b_data; |
1117 | 1096 | ||
1118 | if (fe->icbTag.strategyType == cpu_to_le16(4096)) { | 1097 | if (fe->icbTag.strategyType == cpu_to_le16(4096)) { |
1119 | struct buffer_head *ibh = NULL, *nbh = NULL; | 1098 | struct buffer_head *ibh; |
1120 | struct indirectEntry *ie; | ||
1121 | 1099 | ||
1122 | ibh = udf_read_ptagged(inode->i_sb, iinfo->i_location, 1, | 1100 | ibh = udf_read_ptagged(inode->i_sb, iinfo->i_location, 1, |
1123 | &ident); | 1101 | &ident); |
1124 | if (ident == TAG_IDENT_IE) { | 1102 | if (ident == TAG_IDENT_IE && ibh) { |
1125 | if (ibh) { | 1103 | struct buffer_head *nbh = NULL; |
1126 | kernel_lb_addr loc; | 1104 | kernel_lb_addr loc; |
1127 | ie = (struct indirectEntry *)ibh->b_data; | 1105 | struct indirectEntry *ie; |
1128 | 1106 | ||
1129 | loc = lelb_to_cpu(ie->indirectICB.extLocation); | 1107 | ie = (struct indirectEntry *)ibh->b_data; |
1130 | 1108 | loc = lelb_to_cpu(ie->indirectICB.extLocation); | |
1131 | if (ie->indirectICB.extLength && | 1109 | |
1132 | (nbh = udf_read_ptagged(inode->i_sb, loc, 0, | 1110 | if (ie->indirectICB.extLength && |
1133 | &ident))) { | 1111 | (nbh = udf_read_ptagged(inode->i_sb, loc, 0, |
1134 | if (ident == TAG_IDENT_FE || | 1112 | &ident))) { |
1135 | ident == TAG_IDENT_EFE) { | 1113 | if (ident == TAG_IDENT_FE || |
1136 | memcpy(&iinfo->i_location, | 1114 | ident == TAG_IDENT_EFE) { |
1137 | &loc, | 1115 | memcpy(&iinfo->i_location, |
1138 | sizeof(kernel_lb_addr)); | 1116 | &loc, |
1139 | brelse(bh); | 1117 | sizeof(kernel_lb_addr)); |
1140 | brelse(ibh); | 1118 | brelse(bh); |
1141 | brelse(nbh); | ||
1142 | __udf_read_inode(inode); | ||
1143 | return; | ||
1144 | } else { | ||
1145 | brelse(nbh); | ||
1146 | brelse(ibh); | ||
1147 | } | ||
1148 | } else { | ||
1149 | brelse(ibh); | 1119 | brelse(ibh); |
1120 | brelse(nbh); | ||
1121 | __udf_read_inode(inode); | ||
1122 | return; | ||
1150 | } | 1123 | } |
1124 | brelse(nbh); | ||
1151 | } | 1125 | } |
1152 | } else { | ||
1153 | brelse(ibh); | ||
1154 | } | 1126 | } |
1127 | brelse(ibh); | ||
1155 | } else if (fe->icbTag.strategyType != cpu_to_le16(4)) { | 1128 | } else if (fe->icbTag.strategyType != cpu_to_le16(4)) { |
1156 | printk(KERN_ERR "udf: unsupported strategy type: %d\n", | 1129 | printk(KERN_ERR "udf: unsupported strategy type: %d\n", |
1157 | le16_to_cpu(fe->icbTag.strategyType)); | 1130 | le16_to_cpu(fe->icbTag.strategyType)); |
@@ -1168,8 +1141,6 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) | |||
1168 | { | 1141 | { |
1169 | struct fileEntry *fe; | 1142 | struct fileEntry *fe; |
1170 | struct extendedFileEntry *efe; | 1143 | struct extendedFileEntry *efe; |
1171 | time_t convtime; | ||
1172 | long convtime_usec; | ||
1173 | int offset; | 1144 | int offset; |
1174 | struct udf_sb_info *sbi = UDF_SB(inode->i_sb); | 1145 | struct udf_sb_info *sbi = UDF_SB(inode->i_sb); |
1175 | struct udf_inode_info *iinfo = UDF_I(inode); | 1146 | struct udf_inode_info *iinfo = UDF_I(inode); |
@@ -1257,29 +1228,15 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) | |||
1257 | inode->i_blocks = le64_to_cpu(fe->logicalBlocksRecorded) << | 1228 | inode->i_blocks = le64_to_cpu(fe->logicalBlocksRecorded) << |
1258 | (inode->i_sb->s_blocksize_bits - 9); | 1229 | (inode->i_sb->s_blocksize_bits - 9); |
1259 | 1230 | ||
1260 | if (udf_stamp_to_time(&convtime, &convtime_usec, | 1231 | if (!udf_disk_stamp_to_time(&inode->i_atime, fe->accessTime)) |
1261 | lets_to_cpu(fe->accessTime))) { | ||
1262 | inode->i_atime.tv_sec = convtime; | ||
1263 | inode->i_atime.tv_nsec = convtime_usec * 1000; | ||
1264 | } else { | ||
1265 | inode->i_atime = sbi->s_record_time; | 1232 | inode->i_atime = sbi->s_record_time; |
1266 | } | ||
1267 | 1233 | ||
1268 | if (udf_stamp_to_time(&convtime, &convtime_usec, | 1234 | if (!udf_disk_stamp_to_time(&inode->i_mtime, |
1269 | lets_to_cpu(fe->modificationTime))) { | 1235 | fe->modificationTime)) |
1270 | inode->i_mtime.tv_sec = convtime; | ||
1271 | inode->i_mtime.tv_nsec = convtime_usec * 1000; | ||
1272 | } else { | ||
1273 | inode->i_mtime = sbi->s_record_time; | 1236 | inode->i_mtime = sbi->s_record_time; |
1274 | } | ||
1275 | 1237 | ||
1276 | if (udf_stamp_to_time(&convtime, &convtime_usec, | 1238 | if (!udf_disk_stamp_to_time(&inode->i_ctime, fe->attrTime)) |
1277 | lets_to_cpu(fe->attrTime))) { | ||
1278 | inode->i_ctime.tv_sec = convtime; | ||
1279 | inode->i_ctime.tv_nsec = convtime_usec * 1000; | ||
1280 | } else { | ||
1281 | inode->i_ctime = sbi->s_record_time; | 1239 | inode->i_ctime = sbi->s_record_time; |
1282 | } | ||
1283 | 1240 | ||
1284 | iinfo->i_unique = le64_to_cpu(fe->uniqueID); | 1241 | iinfo->i_unique = le64_to_cpu(fe->uniqueID); |
1285 | iinfo->i_lenEAttr = le32_to_cpu(fe->lengthExtendedAttr); | 1242 | iinfo->i_lenEAttr = le32_to_cpu(fe->lengthExtendedAttr); |
@@ -1289,37 +1246,18 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) | |||
1289 | inode->i_blocks = le64_to_cpu(efe->logicalBlocksRecorded) << | 1246 | inode->i_blocks = le64_to_cpu(efe->logicalBlocksRecorded) << |
1290 | (inode->i_sb->s_blocksize_bits - 9); | 1247 | (inode->i_sb->s_blocksize_bits - 9); |
1291 | 1248 | ||
1292 | if (udf_stamp_to_time(&convtime, &convtime_usec, | 1249 | if (!udf_disk_stamp_to_time(&inode->i_atime, efe->accessTime)) |
1293 | lets_to_cpu(efe->accessTime))) { | ||
1294 | inode->i_atime.tv_sec = convtime; | ||
1295 | inode->i_atime.tv_nsec = convtime_usec * 1000; | ||
1296 | } else { | ||
1297 | inode->i_atime = sbi->s_record_time; | 1250 | inode->i_atime = sbi->s_record_time; |
1298 | } | ||
1299 | 1251 | ||
1300 | if (udf_stamp_to_time(&convtime, &convtime_usec, | 1252 | if (!udf_disk_stamp_to_time(&inode->i_mtime, |
1301 | lets_to_cpu(efe->modificationTime))) { | 1253 | efe->modificationTime)) |
1302 | inode->i_mtime.tv_sec = convtime; | ||
1303 | inode->i_mtime.tv_nsec = convtime_usec * 1000; | ||
1304 | } else { | ||
1305 | inode->i_mtime = sbi->s_record_time; | 1254 | inode->i_mtime = sbi->s_record_time; |
1306 | } | ||
1307 | 1255 | ||
1308 | if (udf_stamp_to_time(&convtime, &convtime_usec, | 1256 | if (!udf_disk_stamp_to_time(&iinfo->i_crtime, efe->createTime)) |
1309 | lets_to_cpu(efe->createTime))) { | ||
1310 | iinfo->i_crtime.tv_sec = convtime; | ||
1311 | iinfo->i_crtime.tv_nsec = convtime_usec * 1000; | ||
1312 | } else { | ||
1313 | iinfo->i_crtime = sbi->s_record_time; | 1257 | iinfo->i_crtime = sbi->s_record_time; |
1314 | } | ||
1315 | 1258 | ||
1316 | if (udf_stamp_to_time(&convtime, &convtime_usec, | 1259 | if (!udf_disk_stamp_to_time(&inode->i_ctime, efe->attrTime)) |
1317 | lets_to_cpu(efe->attrTime))) { | ||
1318 | inode->i_ctime.tv_sec = convtime; | ||
1319 | inode->i_ctime.tv_nsec = convtime_usec * 1000; | ||
1320 | } else { | ||
1321 | inode->i_ctime = sbi->s_record_time; | 1260 | inode->i_ctime = sbi->s_record_time; |
1322 | } | ||
1323 | 1261 | ||
1324 | iinfo->i_unique = le64_to_cpu(efe->uniqueID); | 1262 | iinfo->i_unique = le64_to_cpu(efe->uniqueID); |
1325 | iinfo->i_lenEAttr = le32_to_cpu(efe->lengthExtendedAttr); | 1263 | iinfo->i_lenEAttr = le32_to_cpu(efe->lengthExtendedAttr); |
@@ -1338,6 +1276,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) | |||
1338 | case ICBTAG_FILE_TYPE_REALTIME: | 1276 | case ICBTAG_FILE_TYPE_REALTIME: |
1339 | case ICBTAG_FILE_TYPE_REGULAR: | 1277 | case ICBTAG_FILE_TYPE_REGULAR: |
1340 | case ICBTAG_FILE_TYPE_UNDEF: | 1278 | case ICBTAG_FILE_TYPE_UNDEF: |
1279 | case ICBTAG_FILE_TYPE_VAT20: | ||
1341 | if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) | 1280 | if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) |
1342 | inode->i_data.a_ops = &udf_adinicb_aops; | 1281 | inode->i_data.a_ops = &udf_adinicb_aops; |
1343 | else | 1282 | else |
@@ -1363,6 +1302,15 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) | |||
1363 | inode->i_op = &page_symlink_inode_operations; | 1302 | inode->i_op = &page_symlink_inode_operations; |
1364 | inode->i_mode = S_IFLNK | S_IRWXUGO; | 1303 | inode->i_mode = S_IFLNK | S_IRWXUGO; |
1365 | break; | 1304 | break; |
1305 | case ICBTAG_FILE_TYPE_MAIN: | ||
1306 | udf_debug("METADATA FILE-----\n"); | ||
1307 | break; | ||
1308 | case ICBTAG_FILE_TYPE_MIRROR: | ||
1309 | udf_debug("METADATA MIRROR FILE-----\n"); | ||
1310 | break; | ||
1311 | case ICBTAG_FILE_TYPE_BITMAP: | ||
1312 | udf_debug("METADATA BITMAP FILE-----\n"); | ||
1313 | break; | ||
1366 | default: | 1314 | default: |
1367 | printk(KERN_ERR "udf: udf_fill_inode(ino %ld) failed unknown " | 1315 | printk(KERN_ERR "udf: udf_fill_inode(ino %ld) failed unknown " |
1368 | "file type=%d\n", inode->i_ino, | 1316 | "file type=%d\n", inode->i_ino, |
@@ -1416,21 +1364,6 @@ static mode_t udf_convert_permissions(struct fileEntry *fe) | |||
1416 | return mode; | 1364 | return mode; |
1417 | } | 1365 | } |
1418 | 1366 | ||
1419 | /* | ||
1420 | * udf_write_inode | ||
1421 | * | ||
1422 | * PURPOSE | ||
1423 | * Write out the specified inode. | ||
1424 | * | ||
1425 | * DESCRIPTION | ||
1426 | * This routine is called whenever an inode is synced. | ||
1427 | * Currently this routine is just a placeholder. | ||
1428 | * | ||
1429 | * HISTORY | ||
1430 | * July 1, 1997 - Andrew E. Mileski | ||
1431 | * Written, tested, and released. | ||
1432 | */ | ||
1433 | |||
1434 | int udf_write_inode(struct inode *inode, int sync) | 1367 | int udf_write_inode(struct inode *inode, int sync) |
1435 | { | 1368 | { |
1436 | int ret; | 1369 | int ret; |
@@ -1455,7 +1388,6 @@ static int udf_update_inode(struct inode *inode, int do_sync) | |||
1455 | uint32_t udfperms; | 1388 | uint32_t udfperms; |
1456 | uint16_t icbflags; | 1389 | uint16_t icbflags; |
1457 | uint16_t crclen; | 1390 | uint16_t crclen; |
1458 | kernel_timestamp cpu_time; | ||
1459 | int err = 0; | 1391 | int err = 0; |
1460 | struct udf_sb_info *sbi = UDF_SB(inode->i_sb); | 1392 | struct udf_sb_info *sbi = UDF_SB(inode->i_sb); |
1461 | unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits; | 1393 | unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits; |
@@ -1488,9 +1420,9 @@ static int udf_update_inode(struct inode *inode, int do_sync) | |||
1488 | iinfo->i_location. | 1420 | iinfo->i_location. |
1489 | logicalBlockNum); | 1421 | logicalBlockNum); |
1490 | use->descTag.descCRCLength = cpu_to_le16(crclen); | 1422 | use->descTag.descCRCLength = cpu_to_le16(crclen); |
1491 | use->descTag.descCRC = cpu_to_le16(udf_crc((char *)use + | 1423 | use->descTag.descCRC = cpu_to_le16(crc_itu_t(0, (char *)use + |
1492 | sizeof(tag), crclen, | 1424 | sizeof(tag), |
1493 | 0)); | 1425 | crclen)); |
1494 | use->descTag.tagChecksum = udf_tag_checksum(&use->descTag); | 1426 | use->descTag.tagChecksum = udf_tag_checksum(&use->descTag); |
1495 | 1427 | ||
1496 | mark_buffer_dirty(bh); | 1428 | mark_buffer_dirty(bh); |
@@ -1558,12 +1490,9 @@ static int udf_update_inode(struct inode *inode, int do_sync) | |||
1558 | (inode->i_blocks + (1 << (blocksize_bits - 9)) - 1) >> | 1490 | (inode->i_blocks + (1 << (blocksize_bits - 9)) - 1) >> |
1559 | (blocksize_bits - 9)); | 1491 | (blocksize_bits - 9)); |
1560 | 1492 | ||
1561 | if (udf_time_to_stamp(&cpu_time, inode->i_atime)) | 1493 | udf_time_to_disk_stamp(&fe->accessTime, inode->i_atime); |
1562 | fe->accessTime = cpu_to_lets(cpu_time); | 1494 | udf_time_to_disk_stamp(&fe->modificationTime, inode->i_mtime); |
1563 | if (udf_time_to_stamp(&cpu_time, inode->i_mtime)) | 1495 | udf_time_to_disk_stamp(&fe->attrTime, inode->i_ctime); |
1564 | fe->modificationTime = cpu_to_lets(cpu_time); | ||
1565 | if (udf_time_to_stamp(&cpu_time, inode->i_ctime)) | ||
1566 | fe->attrTime = cpu_to_lets(cpu_time); | ||
1567 | memset(&(fe->impIdent), 0, sizeof(regid)); | 1496 | memset(&(fe->impIdent), 0, sizeof(regid)); |
1568 | strcpy(fe->impIdent.ident, UDF_ID_DEVELOPER); | 1497 | strcpy(fe->impIdent.ident, UDF_ID_DEVELOPER); |
1569 | fe->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; | 1498 | fe->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; |
@@ -1598,14 +1527,10 @@ static int udf_update_inode(struct inode *inode, int do_sync) | |||
1598 | iinfo->i_crtime.tv_nsec > inode->i_ctime.tv_nsec)) | 1527 | iinfo->i_crtime.tv_nsec > inode->i_ctime.tv_nsec)) |
1599 | iinfo->i_crtime = inode->i_ctime; | 1528 | iinfo->i_crtime = inode->i_ctime; |
1600 | 1529 | ||
1601 | if (udf_time_to_stamp(&cpu_time, inode->i_atime)) | 1530 | udf_time_to_disk_stamp(&efe->accessTime, inode->i_atime); |
1602 | efe->accessTime = cpu_to_lets(cpu_time); | 1531 | udf_time_to_disk_stamp(&efe->modificationTime, inode->i_mtime); |
1603 | if (udf_time_to_stamp(&cpu_time, inode->i_mtime)) | 1532 | udf_time_to_disk_stamp(&efe->createTime, iinfo->i_crtime); |
1604 | efe->modificationTime = cpu_to_lets(cpu_time); | 1533 | udf_time_to_disk_stamp(&efe->attrTime, inode->i_ctime); |
1605 | if (udf_time_to_stamp(&cpu_time, iinfo->i_crtime)) | ||
1606 | efe->createTime = cpu_to_lets(cpu_time); | ||
1607 | if (udf_time_to_stamp(&cpu_time, inode->i_ctime)) | ||
1608 | efe->attrTime = cpu_to_lets(cpu_time); | ||
1609 | 1534 | ||
1610 | memset(&(efe->impIdent), 0, sizeof(regid)); | 1535 | memset(&(efe->impIdent), 0, sizeof(regid)); |
1611 | strcpy(efe->impIdent.ident, UDF_ID_DEVELOPER); | 1536 | strcpy(efe->impIdent.ident, UDF_ID_DEVELOPER); |
@@ -1660,8 +1585,8 @@ static int udf_update_inode(struct inode *inode, int do_sync) | |||
1660 | crclen += iinfo->i_lenEAttr + iinfo->i_lenAlloc - | 1585 | crclen += iinfo->i_lenEAttr + iinfo->i_lenAlloc - |
1661 | sizeof(tag); | 1586 | sizeof(tag); |
1662 | fe->descTag.descCRCLength = cpu_to_le16(crclen); | 1587 | fe->descTag.descCRCLength = cpu_to_le16(crclen); |
1663 | fe->descTag.descCRC = cpu_to_le16(udf_crc((char *)fe + sizeof(tag), | 1588 | fe->descTag.descCRC = cpu_to_le16(crc_itu_t(0, (char *)fe + sizeof(tag), |
1664 | crclen, 0)); | 1589 | crclen)); |
1665 | fe->descTag.tagChecksum = udf_tag_checksum(&fe->descTag); | 1590 | fe->descTag.tagChecksum = udf_tag_checksum(&fe->descTag); |
1666 | 1591 | ||
1667 | /* write the data blocks */ | 1592 | /* write the data blocks */ |
@@ -1778,9 +1703,7 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos, | |||
1778 | 1703 | ||
1779 | if (epos->bh) { | 1704 | if (epos->bh) { |
1780 | aed = (struct allocExtDesc *)epos->bh->b_data; | 1705 | aed = (struct allocExtDesc *)epos->bh->b_data; |
1781 | aed->lengthAllocDescs = | 1706 | le32_add_cpu(&aed->lengthAllocDescs, adsize); |
1782 | cpu_to_le32(le32_to_cpu( | ||
1783 | aed->lengthAllocDescs) + adsize); | ||
1784 | } else { | 1707 | } else { |
1785 | iinfo->i_lenAlloc += adsize; | 1708 | iinfo->i_lenAlloc += adsize; |
1786 | mark_inode_dirty(inode); | 1709 | mark_inode_dirty(inode); |
@@ -1830,9 +1753,7 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos, | |||
1830 | mark_inode_dirty(inode); | 1753 | mark_inode_dirty(inode); |
1831 | } else { | 1754 | } else { |
1832 | aed = (struct allocExtDesc *)epos->bh->b_data; | 1755 | aed = (struct allocExtDesc *)epos->bh->b_data; |
1833 | aed->lengthAllocDescs = | 1756 | le32_add_cpu(&aed->lengthAllocDescs, adsize); |
1834 | cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) + | ||
1835 | adsize); | ||
1836 | if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || | 1757 | if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || |
1837 | UDF_SB(inode->i_sb)->s_udfrev >= 0x0201) | 1758 | UDF_SB(inode->i_sb)->s_udfrev >= 0x0201) |
1838 | udf_update_tag(epos->bh->b_data, | 1759 | udf_update_tag(epos->bh->b_data, |
@@ -2046,9 +1967,7 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos, | |||
2046 | mark_inode_dirty(inode); | 1967 | mark_inode_dirty(inode); |
2047 | } else { | 1968 | } else { |
2048 | aed = (struct allocExtDesc *)oepos.bh->b_data; | 1969 | aed = (struct allocExtDesc *)oepos.bh->b_data; |
2049 | aed->lengthAllocDescs = | 1970 | le32_add_cpu(&aed->lengthAllocDescs, -(2 * adsize)); |
2050 | cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) - | ||
2051 | (2 * adsize)); | ||
2052 | if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || | 1971 | if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || |
2053 | UDF_SB(inode->i_sb)->s_udfrev >= 0x0201) | 1972 | UDF_SB(inode->i_sb)->s_udfrev >= 0x0201) |
2054 | udf_update_tag(oepos.bh->b_data, | 1973 | udf_update_tag(oepos.bh->b_data, |
@@ -2065,9 +1984,7 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos, | |||
2065 | mark_inode_dirty(inode); | 1984 | mark_inode_dirty(inode); |
2066 | } else { | 1985 | } else { |
2067 | aed = (struct allocExtDesc *)oepos.bh->b_data; | 1986 | aed = (struct allocExtDesc *)oepos.bh->b_data; |
2068 | aed->lengthAllocDescs = | 1987 | le32_add_cpu(&aed->lengthAllocDescs, -adsize); |
2069 | cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) - | ||
2070 | adsize); | ||
2071 | if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || | 1988 | if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || |
2072 | UDF_SB(inode->i_sb)->s_udfrev >= 0x0201) | 1989 | UDF_SB(inode->i_sb)->s_udfrev >= 0x0201) |
2073 | udf_update_tag(oepos.bh->b_data, | 1990 | udf_update_tag(oepos.bh->b_data, |
@@ -2095,11 +2012,6 @@ int8_t inode_bmap(struct inode *inode, sector_t block, | |||
2095 | int8_t etype; | 2012 | int8_t etype; |
2096 | struct udf_inode_info *iinfo; | 2013 | struct udf_inode_info *iinfo; |
2097 | 2014 | ||
2098 | if (block < 0) { | ||
2099 | printk(KERN_ERR "udf: inode_bmap: block < 0\n"); | ||
2100 | return -1; | ||
2101 | } | ||
2102 | |||
2103 | iinfo = UDF_I(inode); | 2015 | iinfo = UDF_I(inode); |
2104 | pos->offset = 0; | 2016 | pos->offset = 0; |
2105 | pos->block = iinfo->i_location; | 2017 | pos->block = iinfo->i_location; |
diff --git a/fs/udf/lowlevel.c b/fs/udf/lowlevel.c index 579bae71e67e..703843f30ffd 100644 --- a/fs/udf/lowlevel.c +++ b/fs/udf/lowlevel.c | |||
@@ -23,7 +23,6 @@ | |||
23 | #include <linux/cdrom.h> | 23 | #include <linux/cdrom.h> |
24 | #include <asm/uaccess.h> | 24 | #include <asm/uaccess.h> |
25 | 25 | ||
26 | #include <linux/udf_fs.h> | ||
27 | #include "udf_sb.h" | 26 | #include "udf_sb.h" |
28 | 27 | ||
29 | unsigned int udf_get_last_session(struct super_block *sb) | 28 | unsigned int udf_get_last_session(struct super_block *sb) |
diff --git a/fs/udf/misc.c b/fs/udf/misc.c index a1d6da0caf71..84bf0fd4a4f1 100644 --- a/fs/udf/misc.c +++ b/fs/udf/misc.c | |||
@@ -23,8 +23,8 @@ | |||
23 | 23 | ||
24 | #include <linux/fs.h> | 24 | #include <linux/fs.h> |
25 | #include <linux/string.h> | 25 | #include <linux/string.h> |
26 | #include <linux/udf_fs.h> | ||
27 | #include <linux/buffer_head.h> | 26 | #include <linux/buffer_head.h> |
27 | #include <linux/crc-itu-t.h> | ||
28 | 28 | ||
29 | #include "udf_i.h" | 29 | #include "udf_i.h" |
30 | #include "udf_sb.h" | 30 | #include "udf_sb.h" |
@@ -136,8 +136,8 @@ struct genericFormat *udf_add_extendedattr(struct inode *inode, uint32_t size, | |||
136 | /* rewrite CRC + checksum of eahd */ | 136 | /* rewrite CRC + checksum of eahd */ |
137 | crclen = sizeof(struct extendedAttrHeaderDesc) - sizeof(tag); | 137 | crclen = sizeof(struct extendedAttrHeaderDesc) - sizeof(tag); |
138 | eahd->descTag.descCRCLength = cpu_to_le16(crclen); | 138 | eahd->descTag.descCRCLength = cpu_to_le16(crclen); |
139 | eahd->descTag.descCRC = cpu_to_le16(udf_crc((char *)eahd + | 139 | eahd->descTag.descCRC = cpu_to_le16(crc_itu_t(0, (char *)eahd + |
140 | sizeof(tag), crclen, 0)); | 140 | sizeof(tag), crclen)); |
141 | eahd->descTag.tagChecksum = udf_tag_checksum(&eahd->descTag); | 141 | eahd->descTag.tagChecksum = udf_tag_checksum(&eahd->descTag); |
142 | iinfo->i_lenEAttr += size; | 142 | iinfo->i_lenEAttr += size; |
143 | return (struct genericFormat *)&ea[offset]; | 143 | return (struct genericFormat *)&ea[offset]; |
@@ -204,16 +204,15 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block, | |||
204 | { | 204 | { |
205 | tag *tag_p; | 205 | tag *tag_p; |
206 | struct buffer_head *bh = NULL; | 206 | struct buffer_head *bh = NULL; |
207 | struct udf_sb_info *sbi = UDF_SB(sb); | ||
208 | 207 | ||
209 | /* Read the block */ | 208 | /* Read the block */ |
210 | if (block == 0xFFFFFFFF) | 209 | if (block == 0xFFFFFFFF) |
211 | return NULL; | 210 | return NULL; |
212 | 211 | ||
213 | bh = udf_tread(sb, block + sbi->s_session); | 212 | bh = udf_tread(sb, block); |
214 | if (!bh) { | 213 | if (!bh) { |
215 | udf_debug("block=%d, location=%d: read failed\n", | 214 | udf_debug("block=%d, location=%d: read failed\n", |
216 | block + sbi->s_session, location); | 215 | block, location); |
217 | return NULL; | 216 | return NULL; |
218 | } | 217 | } |
219 | 218 | ||
@@ -223,8 +222,7 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block, | |||
223 | 222 | ||
224 | if (location != le32_to_cpu(tag_p->tagLocation)) { | 223 | if (location != le32_to_cpu(tag_p->tagLocation)) { |
225 | udf_debug("location mismatch block %u, tag %u != %u\n", | 224 | udf_debug("location mismatch block %u, tag %u != %u\n", |
226 | block + sbi->s_session, | 225 | block, le32_to_cpu(tag_p->tagLocation), location); |
227 | le32_to_cpu(tag_p->tagLocation), location); | ||
228 | goto error_out; | 226 | goto error_out; |
229 | } | 227 | } |
230 | 228 | ||
@@ -244,13 +242,13 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block, | |||
244 | 242 | ||
245 | /* Verify the descriptor CRC */ | 243 | /* Verify the descriptor CRC */ |
246 | if (le16_to_cpu(tag_p->descCRCLength) + sizeof(tag) > sb->s_blocksize || | 244 | if (le16_to_cpu(tag_p->descCRCLength) + sizeof(tag) > sb->s_blocksize || |
247 | le16_to_cpu(tag_p->descCRC) == udf_crc(bh->b_data + sizeof(tag), | 245 | le16_to_cpu(tag_p->descCRC) == crc_itu_t(0, |
248 | le16_to_cpu(tag_p->descCRCLength), 0)) | 246 | bh->b_data + sizeof(tag), |
247 | le16_to_cpu(tag_p->descCRCLength))) | ||
249 | return bh; | 248 | return bh; |
250 | 249 | ||
251 | udf_debug("Crc failure block %d: crc = %d, crclen = %d\n", | 250 | udf_debug("Crc failure block %d: crc = %d, crclen = %d\n", block, |
252 | block + sbi->s_session, le16_to_cpu(tag_p->descCRC), | 251 | le16_to_cpu(tag_p->descCRC), le16_to_cpu(tag_p->descCRCLength)); |
253 | le16_to_cpu(tag_p->descCRCLength)); | ||
254 | 252 | ||
255 | error_out: | 253 | error_out: |
256 | brelse(bh); | 254 | brelse(bh); |
@@ -270,7 +268,7 @@ void udf_update_tag(char *data, int length) | |||
270 | length -= sizeof(tag); | 268 | length -= sizeof(tag); |
271 | 269 | ||
272 | tptr->descCRCLength = cpu_to_le16(length); | 270 | tptr->descCRCLength = cpu_to_le16(length); |
273 | tptr->descCRC = cpu_to_le16(udf_crc(data + sizeof(tag), length, 0)); | 271 | tptr->descCRC = cpu_to_le16(crc_itu_t(0, data + sizeof(tag), length)); |
274 | tptr->tagChecksum = udf_tag_checksum(tptr); | 272 | tptr->tagChecksum = udf_tag_checksum(tptr); |
275 | } | 273 | } |
276 | 274 | ||
diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 112a5fb0b27b..ba5537d4bc15 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/smp_lock.h> | 31 | #include <linux/smp_lock.h> |
32 | #include <linux/buffer_head.h> | 32 | #include <linux/buffer_head.h> |
33 | #include <linux/sched.h> | 33 | #include <linux/sched.h> |
34 | #include <linux/crc-itu-t.h> | ||
34 | 35 | ||
35 | static inline int udf_match(int len1, const char *name1, int len2, | 36 | static inline int udf_match(int len1, const char *name1, int len2, |
36 | const char *name2) | 37 | const char *name2) |
@@ -97,25 +98,23 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi, | |||
97 | memset(fibh->ebh->b_data, 0x00, padlen + offset); | 98 | memset(fibh->ebh->b_data, 0x00, padlen + offset); |
98 | } | 99 | } |
99 | 100 | ||
100 | crc = udf_crc((uint8_t *)cfi + sizeof(tag), | 101 | crc = crc_itu_t(0, (uint8_t *)cfi + sizeof(tag), |
101 | sizeof(struct fileIdentDesc) - sizeof(tag), 0); | 102 | sizeof(struct fileIdentDesc) - sizeof(tag)); |
102 | 103 | ||
103 | if (fibh->sbh == fibh->ebh) { | 104 | if (fibh->sbh == fibh->ebh) { |
104 | crc = udf_crc((uint8_t *)sfi->impUse, | 105 | crc = crc_itu_t(crc, (uint8_t *)sfi->impUse, |
105 | crclen + sizeof(tag) - | 106 | crclen + sizeof(tag) - |
106 | sizeof(struct fileIdentDesc), crc); | 107 | sizeof(struct fileIdentDesc)); |
107 | } else if (sizeof(struct fileIdentDesc) >= -fibh->soffset) { | 108 | } else if (sizeof(struct fileIdentDesc) >= -fibh->soffset) { |
108 | crc = udf_crc(fibh->ebh->b_data + | 109 | crc = crc_itu_t(crc, fibh->ebh->b_data + |
109 | sizeof(struct fileIdentDesc) + | 110 | sizeof(struct fileIdentDesc) + |
110 | fibh->soffset, | 111 | fibh->soffset, |
111 | crclen + sizeof(tag) - | 112 | crclen + sizeof(tag) - |
112 | sizeof(struct fileIdentDesc), | 113 | sizeof(struct fileIdentDesc)); |
113 | crc); | ||
114 | } else { | 114 | } else { |
115 | crc = udf_crc((uint8_t *)sfi->impUse, | 115 | crc = crc_itu_t(crc, (uint8_t *)sfi->impUse, |
116 | -fibh->soffset - sizeof(struct fileIdentDesc), | 116 | -fibh->soffset - sizeof(struct fileIdentDesc)); |
117 | crc); | 117 | crc = crc_itu_t(crc, fibh->ebh->b_data, fibh->eoffset); |
118 | crc = udf_crc(fibh->ebh->b_data, fibh->eoffset, crc); | ||
119 | } | 118 | } |
120 | 119 | ||
121 | cfi->descTag.descCRC = cpu_to_le16(crc); | 120 | cfi->descTag.descCRC = cpu_to_le16(crc); |
@@ -149,7 +148,7 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir, | |||
149 | struct fileIdentDesc *fi = NULL; | 148 | struct fileIdentDesc *fi = NULL; |
150 | loff_t f_pos; | 149 | loff_t f_pos; |
151 | int block, flen; | 150 | int block, flen; |
152 | char fname[UDF_NAME_LEN]; | 151 | char *fname = NULL; |
153 | char *nameptr; | 152 | char *nameptr; |
154 | uint8_t lfi; | 153 | uint8_t lfi; |
155 | uint16_t liu; | 154 | uint16_t liu; |
@@ -163,12 +162,12 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir, | |||
163 | size = udf_ext0_offset(dir) + dir->i_size; | 162 | size = udf_ext0_offset(dir) + dir->i_size; |
164 | f_pos = udf_ext0_offset(dir); | 163 | f_pos = udf_ext0_offset(dir); |
165 | 164 | ||
165 | fibh->sbh = fibh->ebh = NULL; | ||
166 | fibh->soffset = fibh->eoffset = f_pos & (dir->i_sb->s_blocksize - 1); | 166 | fibh->soffset = fibh->eoffset = f_pos & (dir->i_sb->s_blocksize - 1); |
167 | if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) | 167 | if (dinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { |
168 | fibh->sbh = fibh->ebh = NULL; | 168 | if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits, &epos, |
169 | else if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits, | 169 | &eloc, &elen, &offset) != (EXT_RECORDED_ALLOCATED >> 30)) |
170 | &epos, &eloc, &elen, &offset) == | 170 | goto out_err; |
171 | (EXT_RECORDED_ALLOCATED >> 30)) { | ||
172 | block = udf_get_lb_pblock(dir->i_sb, eloc, offset); | 171 | block = udf_get_lb_pblock(dir->i_sb, eloc, offset); |
173 | if ((++offset << dir->i_sb->s_blocksize_bits) < elen) { | 172 | if ((++offset << dir->i_sb->s_blocksize_bits) < elen) { |
174 | if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) | 173 | if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) |
@@ -179,25 +178,19 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir, | |||
179 | offset = 0; | 178 | offset = 0; |
180 | 179 | ||
181 | fibh->sbh = fibh->ebh = udf_tread(dir->i_sb, block); | 180 | fibh->sbh = fibh->ebh = udf_tread(dir->i_sb, block); |
182 | if (!fibh->sbh) { | 181 | if (!fibh->sbh) |
183 | brelse(epos.bh); | 182 | goto out_err; |
184 | return NULL; | ||
185 | } | ||
186 | } else { | ||
187 | brelse(epos.bh); | ||
188 | return NULL; | ||
189 | } | 183 | } |
190 | 184 | ||
185 | fname = kmalloc(UDF_NAME_LEN, GFP_NOFS); | ||
186 | if (!fname) | ||
187 | goto out_err; | ||
188 | |||
191 | while (f_pos < size) { | 189 | while (f_pos < size) { |
192 | fi = udf_fileident_read(dir, &f_pos, fibh, cfi, &epos, &eloc, | 190 | fi = udf_fileident_read(dir, &f_pos, fibh, cfi, &epos, &eloc, |
193 | &elen, &offset); | 191 | &elen, &offset); |
194 | if (!fi) { | 192 | if (!fi) |
195 | if (fibh->sbh != fibh->ebh) | 193 | goto out_err; |
196 | brelse(fibh->ebh); | ||
197 | brelse(fibh->sbh); | ||
198 | brelse(epos.bh); | ||
199 | return NULL; | ||
200 | } | ||
201 | 194 | ||
202 | liu = le16_to_cpu(cfi->lengthOfImpUse); | 195 | liu = le16_to_cpu(cfi->lengthOfImpUse); |
203 | lfi = cfi->lengthFileIdent; | 196 | lfi = cfi->lengthFileIdent; |
@@ -237,53 +230,22 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir, | |||
237 | 230 | ||
238 | flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi); | 231 | flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi); |
239 | if (flen && udf_match(flen, fname, dentry->d_name.len, | 232 | if (flen && udf_match(flen, fname, dentry->d_name.len, |
240 | dentry->d_name.name)) { | 233 | dentry->d_name.name)) |
241 | brelse(epos.bh); | 234 | goto out_ok; |
242 | return fi; | ||
243 | } | ||
244 | } | 235 | } |
245 | 236 | ||
237 | out_err: | ||
238 | fi = NULL; | ||
246 | if (fibh->sbh != fibh->ebh) | 239 | if (fibh->sbh != fibh->ebh) |
247 | brelse(fibh->ebh); | 240 | brelse(fibh->ebh); |
248 | brelse(fibh->sbh); | 241 | brelse(fibh->sbh); |
242 | out_ok: | ||
249 | brelse(epos.bh); | 243 | brelse(epos.bh); |
244 | kfree(fname); | ||
250 | 245 | ||
251 | return NULL; | 246 | return fi; |
252 | } | 247 | } |
253 | 248 | ||
254 | /* | ||
255 | * udf_lookup | ||
256 | * | ||
257 | * PURPOSE | ||
258 | * Look-up the inode for a given name. | ||
259 | * | ||
260 | * DESCRIPTION | ||
261 | * Required - lookup_dentry() will return -ENOTDIR if this routine is not | ||
262 | * available for a directory. The filesystem is useless if this routine is | ||
263 | * not available for at least the filesystem's root directory. | ||
264 | * | ||
265 | * This routine is passed an incomplete dentry - it must be completed by | ||
266 | * calling d_add(dentry, inode). If the name does not exist, then the | ||
267 | * specified inode must be set to null. An error should only be returned | ||
268 | * when the lookup fails for a reason other than the name not existing. | ||
269 | * Note that the directory inode semaphore is held during the call. | ||
270 | * | ||
271 | * Refer to lookup_dentry() in fs/namei.c | ||
272 | * lookup_dentry() -> lookup() -> real_lookup() -> . | ||
273 | * | ||
274 | * PRE-CONDITIONS | ||
275 | * dir Pointer to inode of parent directory. | ||
276 | * dentry Pointer to dentry to complete. | ||
277 | * nd Pointer to lookup nameidata | ||
278 | * | ||
279 | * POST-CONDITIONS | ||
280 | * <return> Zero on success. | ||
281 | * | ||
282 | * HISTORY | ||
283 | * July 1, 1997 - Andrew E. Mileski | ||
284 | * Written, tested, and released. | ||
285 | */ | ||
286 | |||
287 | static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry, | 249 | static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry, |
288 | struct nameidata *nd) | 250 | struct nameidata *nd) |
289 | { | 251 | { |
@@ -336,11 +298,9 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir, | |||
336 | { | 298 | { |
337 | struct super_block *sb = dir->i_sb; | 299 | struct super_block *sb = dir->i_sb; |
338 | struct fileIdentDesc *fi = NULL; | 300 | struct fileIdentDesc *fi = NULL; |
339 | char name[UDF_NAME_LEN], fname[UDF_NAME_LEN]; | 301 | char *name = NULL; |
340 | int namelen; | 302 | int namelen; |
341 | loff_t f_pos; | 303 | loff_t f_pos; |
342 | int flen; | ||
343 | char *nameptr; | ||
344 | loff_t size = udf_ext0_offset(dir) + dir->i_size; | 304 | loff_t size = udf_ext0_offset(dir) + dir->i_size; |
345 | int nfidlen; | 305 | int nfidlen; |
346 | uint8_t lfi; | 306 | uint8_t lfi; |
@@ -352,16 +312,23 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir, | |||
352 | struct extent_position epos = {}; | 312 | struct extent_position epos = {}; |
353 | struct udf_inode_info *dinfo; | 313 | struct udf_inode_info *dinfo; |
354 | 314 | ||
315 | fibh->sbh = fibh->ebh = NULL; | ||
316 | name = kmalloc(UDF_NAME_LEN, GFP_NOFS); | ||
317 | if (!name) { | ||
318 | *err = -ENOMEM; | ||
319 | goto out_err; | ||
320 | } | ||
321 | |||
355 | if (dentry) { | 322 | if (dentry) { |
356 | if (!dentry->d_name.len) { | 323 | if (!dentry->d_name.len) { |
357 | *err = -EINVAL; | 324 | *err = -EINVAL; |
358 | return NULL; | 325 | goto out_err; |
359 | } | 326 | } |
360 | namelen = udf_put_filename(sb, dentry->d_name.name, name, | 327 | namelen = udf_put_filename(sb, dentry->d_name.name, name, |
361 | dentry->d_name.len); | 328 | dentry->d_name.len); |
362 | if (!namelen) { | 329 | if (!namelen) { |
363 | *err = -ENAMETOOLONG; | 330 | *err = -ENAMETOOLONG; |
364 | return NULL; | 331 | goto out_err; |
365 | } | 332 | } |
366 | } else { | 333 | } else { |
367 | namelen = 0; | 334 | namelen = 0; |
@@ -373,11 +340,14 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir, | |||
373 | 340 | ||
374 | fibh->soffset = fibh->eoffset = f_pos & (dir->i_sb->s_blocksize - 1); | 341 | fibh->soffset = fibh->eoffset = f_pos & (dir->i_sb->s_blocksize - 1); |
375 | dinfo = UDF_I(dir); | 342 | dinfo = UDF_I(dir); |
376 | if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) | 343 | if (dinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { |
377 | fibh->sbh = fibh->ebh = NULL; | 344 | if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits, &epos, |
378 | else if (inode_bmap(dir, f_pos >> dir->i_sb->s_blocksize_bits, | 345 | &eloc, &elen, &offset) != (EXT_RECORDED_ALLOCATED >> 30)) { |
379 | &epos, &eloc, &elen, &offset) == | 346 | block = udf_get_lb_pblock(dir->i_sb, |
380 | (EXT_RECORDED_ALLOCATED >> 30)) { | 347 | dinfo->i_location, 0); |
348 | fibh->soffset = fibh->eoffset = sb->s_blocksize; | ||
349 | goto add; | ||
350 | } | ||
381 | block = udf_get_lb_pblock(dir->i_sb, eloc, offset); | 351 | block = udf_get_lb_pblock(dir->i_sb, eloc, offset); |
382 | if ((++offset << dir->i_sb->s_blocksize_bits) < elen) { | 352 | if ((++offset << dir->i_sb->s_blocksize_bits) < elen) { |
383 | if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) | 353 | if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) |
@@ -389,17 +359,11 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir, | |||
389 | 359 | ||
390 | fibh->sbh = fibh->ebh = udf_tread(dir->i_sb, block); | 360 | fibh->sbh = fibh->ebh = udf_tread(dir->i_sb, block); |
391 | if (!fibh->sbh) { | 361 | if (!fibh->sbh) { |
392 | brelse(epos.bh); | ||
393 | *err = -EIO; | 362 | *err = -EIO; |
394 | return NULL; | 363 | goto out_err; |
395 | } | 364 | } |
396 | 365 | ||
397 | block = dinfo->i_location.logicalBlockNum; | 366 | block = dinfo->i_location.logicalBlockNum; |
398 | } else { | ||
399 | block = udf_get_lb_pblock(dir->i_sb, dinfo->i_location, 0); | ||
400 | fibh->sbh = fibh->ebh = NULL; | ||
401 | fibh->soffset = fibh->eoffset = sb->s_blocksize; | ||
402 | goto add; | ||
403 | } | 367 | } |
404 | 368 | ||
405 | while (f_pos < size) { | 369 | while (f_pos < size) { |
@@ -407,41 +371,16 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir, | |||
407 | &elen, &offset); | 371 | &elen, &offset); |
408 | 372 | ||
409 | if (!fi) { | 373 | if (!fi) { |
410 | if (fibh->sbh != fibh->ebh) | ||
411 | brelse(fibh->ebh); | ||
412 | brelse(fibh->sbh); | ||
413 | brelse(epos.bh); | ||
414 | *err = -EIO; | 374 | *err = -EIO; |
415 | return NULL; | 375 | goto out_err; |
416 | } | 376 | } |
417 | 377 | ||
418 | liu = le16_to_cpu(cfi->lengthOfImpUse); | 378 | liu = le16_to_cpu(cfi->lengthOfImpUse); |
419 | lfi = cfi->lengthFileIdent; | 379 | lfi = cfi->lengthFileIdent; |
420 | 380 | ||
421 | if (fibh->sbh == fibh->ebh) | ||
422 | nameptr = fi->fileIdent + liu; | ||
423 | else { | ||
424 | int poffset; /* Unpaded ending offset */ | ||
425 | |||
426 | poffset = fibh->soffset + sizeof(struct fileIdentDesc) + | ||
427 | liu + lfi; | ||
428 | |||
429 | if (poffset >= lfi) | ||
430 | nameptr = (char *)(fibh->ebh->b_data + | ||
431 | poffset - lfi); | ||
432 | else { | ||
433 | nameptr = fname; | ||
434 | memcpy(nameptr, fi->fileIdent + liu, | ||
435 | lfi - poffset); | ||
436 | memcpy(nameptr + lfi - poffset, | ||
437 | fibh->ebh->b_data, poffset); | ||
438 | } | ||
439 | } | ||
440 | |||
441 | if ((cfi->fileCharacteristics & FID_FILE_CHAR_DELETED) != 0) { | 381 | if ((cfi->fileCharacteristics & FID_FILE_CHAR_DELETED) != 0) { |
442 | if (((sizeof(struct fileIdentDesc) + | 382 | if (((sizeof(struct fileIdentDesc) + |
443 | liu + lfi + 3) & ~3) == nfidlen) { | 383 | liu + lfi + 3) & ~3) == nfidlen) { |
444 | brelse(epos.bh); | ||
445 | cfi->descTag.tagSerialNum = cpu_to_le16(1); | 384 | cfi->descTag.tagSerialNum = cpu_to_le16(1); |
446 | cfi->fileVersionNum = cpu_to_le16(1); | 385 | cfi->fileVersionNum = cpu_to_le16(1); |
447 | cfi->fileCharacteristics = 0; | 386 | cfi->fileCharacteristics = 0; |
@@ -449,27 +388,13 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir, | |||
449 | cfi->lengthOfImpUse = cpu_to_le16(0); | 388 | cfi->lengthOfImpUse = cpu_to_le16(0); |
450 | if (!udf_write_fi(dir, cfi, fi, fibh, NULL, | 389 | if (!udf_write_fi(dir, cfi, fi, fibh, NULL, |
451 | name)) | 390 | name)) |
452 | return fi; | 391 | goto out_ok; |
453 | else { | 392 | else { |
454 | *err = -EIO; | 393 | *err = -EIO; |
455 | return NULL; | 394 | goto out_err; |
456 | } | 395 | } |
457 | } | 396 | } |
458 | } | 397 | } |
459 | |||
460 | if (!lfi || !dentry) | ||
461 | continue; | ||
462 | |||
463 | flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi); | ||
464 | if (flen && udf_match(flen, fname, dentry->d_name.len, | ||
465 | dentry->d_name.name)) { | ||
466 | if (fibh->sbh != fibh->ebh) | ||
467 | brelse(fibh->ebh); | ||
468 | brelse(fibh->sbh); | ||
469 | brelse(epos.bh); | ||
470 | *err = -EEXIST; | ||
471 | return NULL; | ||
472 | } | ||
473 | } | 398 | } |
474 | 399 | ||
475 | add: | 400 | add: |
@@ -496,7 +421,7 @@ add: | |||
496 | fibh->sbh = fibh->ebh = | 421 | fibh->sbh = fibh->ebh = |
497 | udf_expand_dir_adinicb(dir, &block, err); | 422 | udf_expand_dir_adinicb(dir, &block, err); |
498 | if (!fibh->sbh) | 423 | if (!fibh->sbh) |
499 | return NULL; | 424 | goto out_err; |
500 | epos.block = dinfo->i_location; | 425 | epos.block = dinfo->i_location; |
501 | epos.offset = udf_file_entry_alloc_offset(dir); | 426 | epos.offset = udf_file_entry_alloc_offset(dir); |
502 | /* Load extent udf_expand_dir_adinicb() has created */ | 427 | /* Load extent udf_expand_dir_adinicb() has created */ |
@@ -537,11 +462,8 @@ add: | |||
537 | dir->i_sb->s_blocksize_bits); | 462 | dir->i_sb->s_blocksize_bits); |
538 | fibh->ebh = udf_bread(dir, | 463 | fibh->ebh = udf_bread(dir, |
539 | f_pos >> dir->i_sb->s_blocksize_bits, 1, err); | 464 | f_pos >> dir->i_sb->s_blocksize_bits, 1, err); |
540 | if (!fibh->ebh) { | 465 | if (!fibh->ebh) |
541 | brelse(epos.bh); | 466 | goto out_err; |
542 | brelse(fibh->sbh); | ||
543 | return NULL; | ||
544 | } | ||
545 | 467 | ||
546 | if (!fibh->soffset) { | 468 | if (!fibh->soffset) { |
547 | if (udf_next_aext(dir, &epos, &eloc, &elen, 1) == | 469 | if (udf_next_aext(dir, &epos, &eloc, &elen, 1) == |
@@ -572,20 +494,25 @@ add: | |||
572 | cfi->lengthFileIdent = namelen; | 494 | cfi->lengthFileIdent = namelen; |
573 | cfi->lengthOfImpUse = cpu_to_le16(0); | 495 | cfi->lengthOfImpUse = cpu_to_le16(0); |
574 | if (!udf_write_fi(dir, cfi, fi, fibh, NULL, name)) { | 496 | if (!udf_write_fi(dir, cfi, fi, fibh, NULL, name)) { |
575 | brelse(epos.bh); | ||
576 | dir->i_size += nfidlen; | 497 | dir->i_size += nfidlen; |
577 | if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) | 498 | if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) |
578 | dinfo->i_lenAlloc += nfidlen; | 499 | dinfo->i_lenAlloc += nfidlen; |
579 | mark_inode_dirty(dir); | 500 | mark_inode_dirty(dir); |
580 | return fi; | 501 | goto out_ok; |
581 | } else { | 502 | } else { |
582 | brelse(epos.bh); | ||
583 | if (fibh->sbh != fibh->ebh) | ||
584 | brelse(fibh->ebh); | ||
585 | brelse(fibh->sbh); | ||
586 | *err = -EIO; | 503 | *err = -EIO; |
587 | return NULL; | 504 | goto out_err; |
588 | } | 505 | } |
506 | |||
507 | out_err: | ||
508 | fi = NULL; | ||
509 | if (fibh->sbh != fibh->ebh) | ||
510 | brelse(fibh->ebh); | ||
511 | brelse(fibh->sbh); | ||
512 | out_ok: | ||
513 | brelse(epos.bh); | ||
514 | kfree(name); | ||
515 | return fi; | ||
589 | } | 516 | } |
590 | 517 | ||
591 | static int udf_delete_entry(struct inode *inode, struct fileIdentDesc *fi, | 518 | static int udf_delete_entry(struct inode *inode, struct fileIdentDesc *fi, |
@@ -940,7 +867,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, | |||
940 | char *ea; | 867 | char *ea; |
941 | int err; | 868 | int err; |
942 | int block; | 869 | int block; |
943 | char name[UDF_NAME_LEN]; | 870 | char *name = NULL; |
944 | int namelen; | 871 | int namelen; |
945 | struct buffer_head *bh; | 872 | struct buffer_head *bh; |
946 | struct udf_inode_info *iinfo; | 873 | struct udf_inode_info *iinfo; |
@@ -950,6 +877,12 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, | |||
950 | if (!inode) | 877 | if (!inode) |
951 | goto out; | 878 | goto out; |
952 | 879 | ||
880 | name = kmalloc(UDF_NAME_LEN, GFP_NOFS); | ||
881 | if (!name) { | ||
882 | err = -ENOMEM; | ||
883 | goto out_no_entry; | ||
884 | } | ||
885 | |||
953 | iinfo = UDF_I(inode); | 886 | iinfo = UDF_I(inode); |
954 | inode->i_mode = S_IFLNK | S_IRWXUGO; | 887 | inode->i_mode = S_IFLNK | S_IRWXUGO; |
955 | inode->i_data.a_ops = &udf_symlink_aops; | 888 | inode->i_data.a_ops = &udf_symlink_aops; |
@@ -1089,6 +1022,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, | |||
1089 | err = 0; | 1022 | err = 0; |
1090 | 1023 | ||
1091 | out: | 1024 | out: |
1025 | kfree(name); | ||
1092 | unlock_kernel(); | 1026 | unlock_kernel(); |
1093 | return err; | 1027 | return err; |
1094 | 1028 | ||
diff --git a/fs/udf/partition.c b/fs/udf/partition.c index fc533345ab89..63610f026ae1 100644 --- a/fs/udf/partition.c +++ b/fs/udf/partition.c | |||
@@ -24,7 +24,6 @@ | |||
24 | 24 | ||
25 | #include <linux/fs.h> | 25 | #include <linux/fs.h> |
26 | #include <linux/string.h> | 26 | #include <linux/string.h> |
27 | #include <linux/udf_fs.h> | ||
28 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
29 | #include <linux/buffer_head.h> | 28 | #include <linux/buffer_head.h> |
30 | 29 | ||
@@ -55,11 +54,10 @@ uint32_t udf_get_pblock_virt15(struct super_block *sb, uint32_t block, | |||
55 | struct udf_sb_info *sbi = UDF_SB(sb); | 54 | struct udf_sb_info *sbi = UDF_SB(sb); |
56 | struct udf_part_map *map; | 55 | struct udf_part_map *map; |
57 | struct udf_virtual_data *vdata; | 56 | struct udf_virtual_data *vdata; |
58 | struct udf_inode_info *iinfo; | 57 | struct udf_inode_info *iinfo = UDF_I(sbi->s_vat_inode); |
59 | 58 | ||
60 | map = &sbi->s_partmaps[partition]; | 59 | map = &sbi->s_partmaps[partition]; |
61 | vdata = &map->s_type_specific.s_virtual; | 60 | vdata = &map->s_type_specific.s_virtual; |
62 | index = (sb->s_blocksize - vdata->s_start_offset) / sizeof(uint32_t); | ||
63 | 61 | ||
64 | if (block > vdata->s_num_entries) { | 62 | if (block > vdata->s_num_entries) { |
65 | udf_debug("Trying to access block beyond end of VAT " | 63 | udf_debug("Trying to access block beyond end of VAT " |
@@ -67,6 +65,12 @@ uint32_t udf_get_pblock_virt15(struct super_block *sb, uint32_t block, | |||
67 | return 0xFFFFFFFF; | 65 | return 0xFFFFFFFF; |
68 | } | 66 | } |
69 | 67 | ||
68 | if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { | ||
69 | loc = le32_to_cpu(((__le32 *)(iinfo->i_ext.i_data + | ||
70 | vdata->s_start_offset))[block]); | ||
71 | goto translate; | ||
72 | } | ||
73 | index = (sb->s_blocksize - vdata->s_start_offset) / sizeof(uint32_t); | ||
70 | if (block >= index) { | 74 | if (block >= index) { |
71 | block -= index; | 75 | block -= index; |
72 | newblock = 1 + (block / (sb->s_blocksize / sizeof(uint32_t))); | 76 | newblock = 1 + (block / (sb->s_blocksize / sizeof(uint32_t))); |
@@ -89,7 +93,7 @@ uint32_t udf_get_pblock_virt15(struct super_block *sb, uint32_t block, | |||
89 | 93 | ||
90 | brelse(bh); | 94 | brelse(bh); |
91 | 95 | ||
92 | iinfo = UDF_I(sbi->s_vat_inode); | 96 | translate: |
93 | if (iinfo->i_location.partitionReferenceNum == partition) { | 97 | if (iinfo->i_location.partitionReferenceNum == partition) { |
94 | udf_debug("recursive call to udf_get_pblock!\n"); | 98 | udf_debug("recursive call to udf_get_pblock!\n"); |
95 | return 0xFFFFFFFF; | 99 | return 0xFFFFFFFF; |
@@ -263,3 +267,58 @@ int udf_relocate_blocks(struct super_block *sb, long old_block, long *new_block) | |||
263 | 267 | ||
264 | return 0; | 268 | return 0; |
265 | } | 269 | } |
270 | |||
271 | static uint32_t udf_try_read_meta(struct inode *inode, uint32_t block, | ||
272 | uint16_t partition, uint32_t offset) | ||
273 | { | ||
274 | struct super_block *sb = inode->i_sb; | ||
275 | struct udf_part_map *map; | ||
276 | kernel_lb_addr eloc; | ||
277 | uint32_t elen; | ||
278 | sector_t ext_offset; | ||
279 | struct extent_position epos = {}; | ||
280 | uint32_t phyblock; | ||
281 | |||
282 | if (inode_bmap(inode, block, &epos, &eloc, &elen, &ext_offset) != | ||
283 | (EXT_RECORDED_ALLOCATED >> 30)) | ||
284 | phyblock = 0xFFFFFFFF; | ||
285 | else { | ||
286 | map = &UDF_SB(sb)->s_partmaps[partition]; | ||
287 | /* map to sparable/physical partition desc */ | ||
288 | phyblock = udf_get_pblock(sb, eloc.logicalBlockNum, | ||
289 | map->s_partition_num, ext_offset + offset); | ||
290 | } | ||
291 | |||
292 | brelse(epos.bh); | ||
293 | return phyblock; | ||
294 | } | ||
295 | |||
296 | uint32_t udf_get_pblock_meta25(struct super_block *sb, uint32_t block, | ||
297 | uint16_t partition, uint32_t offset) | ||
298 | { | ||
299 | struct udf_sb_info *sbi = UDF_SB(sb); | ||
300 | struct udf_part_map *map; | ||
301 | struct udf_meta_data *mdata; | ||
302 | uint32_t retblk; | ||
303 | struct inode *inode; | ||
304 | |||
305 | udf_debug("READING from METADATA\n"); | ||
306 | |||
307 | map = &sbi->s_partmaps[partition]; | ||
308 | mdata = &map->s_type_specific.s_metadata; | ||
309 | inode = mdata->s_metadata_fe ? : mdata->s_mirror_fe; | ||
310 | |||
311 | /* We shouldn't mount such media... */ | ||
312 | BUG_ON(!inode); | ||
313 | retblk = udf_try_read_meta(inode, block, partition, offset); | ||
314 | if (retblk == 0xFFFFFFFF) { | ||
315 | udf_warning(sb, __func__, "error reading from METADATA, " | ||
316 | "trying to read from MIRROR"); | ||
317 | inode = mdata->s_mirror_fe; | ||
318 | if (!inode) | ||
319 | return 0xFFFFFFFF; | ||
320 | retblk = udf_try_read_meta(inode, block, partition, offset); | ||
321 | } | ||
322 | |||
323 | return retblk; | ||
324 | } | ||
diff --git a/fs/udf/super.c b/fs/udf/super.c index f3ac4abfc946..b564fc140fe4 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c | |||
@@ -55,9 +55,10 @@ | |||
55 | #include <linux/errno.h> | 55 | #include <linux/errno.h> |
56 | #include <linux/mount.h> | 56 | #include <linux/mount.h> |
57 | #include <linux/seq_file.h> | 57 | #include <linux/seq_file.h> |
58 | #include <linux/bitmap.h> | ||
59 | #include <linux/crc-itu-t.h> | ||
58 | #include <asm/byteorder.h> | 60 | #include <asm/byteorder.h> |
59 | 61 | ||
60 | #include <linux/udf_fs.h> | ||
61 | #include "udf_sb.h" | 62 | #include "udf_sb.h" |
62 | #include "udf_i.h" | 63 | #include "udf_i.h" |
63 | 64 | ||
@@ -84,22 +85,19 @@ static void udf_write_super(struct super_block *); | |||
84 | static int udf_remount_fs(struct super_block *, int *, char *); | 85 | static int udf_remount_fs(struct super_block *, int *, char *); |
85 | static int udf_check_valid(struct super_block *, int, int); | 86 | static int udf_check_valid(struct super_block *, int, int); |
86 | static int udf_vrs(struct super_block *sb, int silent); | 87 | static int udf_vrs(struct super_block *sb, int silent); |
87 | static int udf_load_partition(struct super_block *, kernel_lb_addr *); | ||
88 | static int udf_load_logicalvol(struct super_block *, struct buffer_head *, | ||
89 | kernel_lb_addr *); | ||
90 | static void udf_load_logicalvolint(struct super_block *, kernel_extent_ad); | 88 | static void udf_load_logicalvolint(struct super_block *, kernel_extent_ad); |
91 | static void udf_find_anchor(struct super_block *); | 89 | static void udf_find_anchor(struct super_block *); |
92 | static int udf_find_fileset(struct super_block *, kernel_lb_addr *, | 90 | static int udf_find_fileset(struct super_block *, kernel_lb_addr *, |
93 | kernel_lb_addr *); | 91 | kernel_lb_addr *); |
94 | static void udf_load_pvoldesc(struct super_block *, struct buffer_head *); | ||
95 | static void udf_load_fileset(struct super_block *, struct buffer_head *, | 92 | static void udf_load_fileset(struct super_block *, struct buffer_head *, |
96 | kernel_lb_addr *); | 93 | kernel_lb_addr *); |
97 | static int udf_load_partdesc(struct super_block *, struct buffer_head *); | ||
98 | static void udf_open_lvid(struct super_block *); | 94 | static void udf_open_lvid(struct super_block *); |
99 | static void udf_close_lvid(struct super_block *); | 95 | static void udf_close_lvid(struct super_block *); |
100 | static unsigned int udf_count_free(struct super_block *); | 96 | static unsigned int udf_count_free(struct super_block *); |
101 | static int udf_statfs(struct dentry *, struct kstatfs *); | 97 | static int udf_statfs(struct dentry *, struct kstatfs *); |
102 | static int udf_show_options(struct seq_file *, struct vfsmount *); | 98 | static int udf_show_options(struct seq_file *, struct vfsmount *); |
99 | static void udf_error(struct super_block *sb, const char *function, | ||
100 | const char *fmt, ...); | ||
103 | 101 | ||
104 | struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi) | 102 | struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi) |
105 | { | 103 | { |
@@ -587,48 +585,10 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options) | |||
587 | return 0; | 585 | return 0; |
588 | } | 586 | } |
589 | 587 | ||
590 | /* | ||
591 | * udf_set_blocksize | ||
592 | * | ||
593 | * PURPOSE | ||
594 | * Set the block size to be used in all transfers. | ||
595 | * | ||
596 | * DESCRIPTION | ||
597 | * To allow room for a DMA transfer, it is best to guess big when unsure. | ||
598 | * This routine picks 2048 bytes as the blocksize when guessing. This | ||
599 | * should be adequate until devices with larger block sizes become common. | ||
600 | * | ||
601 | * Note that the Linux kernel can currently only deal with blocksizes of | ||
602 | * 512, 1024, 2048, 4096, and 8192 bytes. | ||
603 | * | ||
604 | * PRE-CONDITIONS | ||
605 | * sb Pointer to _locked_ superblock. | ||
606 | * | ||
607 | * POST-CONDITIONS | ||
608 | * sb->s_blocksize Blocksize. | ||
609 | * sb->s_blocksize_bits log2 of blocksize. | ||
610 | * <return> 0 Blocksize is valid. | ||
611 | * <return> 1 Blocksize is invalid. | ||
612 | * | ||
613 | * HISTORY | ||
614 | * July 1, 1997 - Andrew E. Mileski | ||
615 | * Written, tested, and released. | ||
616 | */ | ||
617 | static int udf_set_blocksize(struct super_block *sb, int bsize) | ||
618 | { | ||
619 | if (!sb_min_blocksize(sb, bsize)) { | ||
620 | udf_debug("Bad block size (%d)\n", bsize); | ||
621 | printk(KERN_ERR "udf: bad block size (%d)\n", bsize); | ||
622 | return 0; | ||
623 | } | ||
624 | |||
625 | return sb->s_blocksize; | ||
626 | } | ||
627 | |||
628 | static int udf_vrs(struct super_block *sb, int silent) | 588 | static int udf_vrs(struct super_block *sb, int silent) |
629 | { | 589 | { |
630 | struct volStructDesc *vsd = NULL; | 590 | struct volStructDesc *vsd = NULL; |
631 | int sector = 32768; | 591 | loff_t sector = 32768; |
632 | int sectorsize; | 592 | int sectorsize; |
633 | struct buffer_head *bh = NULL; | 593 | struct buffer_head *bh = NULL; |
634 | int iso9660 = 0; | 594 | int iso9660 = 0; |
@@ -649,7 +609,8 @@ static int udf_vrs(struct super_block *sb, int silent) | |||
649 | sector += (sbi->s_session << sb->s_blocksize_bits); | 609 | sector += (sbi->s_session << sb->s_blocksize_bits); |
650 | 610 | ||
651 | udf_debug("Starting at sector %u (%ld byte sectors)\n", | 611 | udf_debug("Starting at sector %u (%ld byte sectors)\n", |
652 | (sector >> sb->s_blocksize_bits), sb->s_blocksize); | 612 | (unsigned int)(sector >> sb->s_blocksize_bits), |
613 | sb->s_blocksize); | ||
653 | /* Process the sequence (if applicable) */ | 614 | /* Process the sequence (if applicable) */ |
654 | for (; !nsr02 && !nsr03; sector += sectorsize) { | 615 | for (; !nsr02 && !nsr03; sector += sectorsize) { |
655 | /* Read a block */ | 616 | /* Read a block */ |
@@ -719,162 +680,140 @@ static int udf_vrs(struct super_block *sb, int silent) | |||
719 | } | 680 | } |
720 | 681 | ||
721 | /* | 682 | /* |
722 | * udf_find_anchor | 683 | * Check whether there is an anchor block in the given block |
723 | * | ||
724 | * PURPOSE | ||
725 | * Find an anchor volume descriptor. | ||
726 | * | ||
727 | * PRE-CONDITIONS | ||
728 | * sb Pointer to _locked_ superblock. | ||
729 | * lastblock Last block on media. | ||
730 | * | ||
731 | * POST-CONDITIONS | ||
732 | * <return> 1 if not found, 0 if ok | ||
733 | * | ||
734 | * HISTORY | ||
735 | * July 1, 1997 - Andrew E. Mileski | ||
736 | * Written, tested, and released. | ||
737 | */ | 684 | */ |
738 | static void udf_find_anchor(struct super_block *sb) | 685 | static int udf_check_anchor_block(struct super_block *sb, sector_t block, |
686 | bool varconv) | ||
739 | { | 687 | { |
740 | int lastblock; | ||
741 | struct buffer_head *bh = NULL; | 688 | struct buffer_head *bh = NULL; |
689 | tag *t; | ||
742 | uint16_t ident; | 690 | uint16_t ident; |
743 | uint32_t location; | 691 | uint32_t location; |
744 | int i; | ||
745 | struct udf_sb_info *sbi; | ||
746 | 692 | ||
747 | sbi = UDF_SB(sb); | 693 | if (varconv) { |
748 | lastblock = sbi->s_last_block; | 694 | if (udf_fixed_to_variable(block) >= |
695 | sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits) | ||
696 | return 0; | ||
697 | bh = sb_bread(sb, udf_fixed_to_variable(block)); | ||
698 | } | ||
699 | else | ||
700 | bh = sb_bread(sb, block); | ||
749 | 701 | ||
750 | if (lastblock) { | 702 | if (!bh) |
751 | int varlastblock = udf_variable_to_fixed(lastblock); | 703 | return 0; |
752 | int last[] = { lastblock, lastblock - 2, | ||
753 | lastblock - 150, lastblock - 152, | ||
754 | varlastblock, varlastblock - 2, | ||
755 | varlastblock - 150, varlastblock - 152 }; | ||
756 | |||
757 | lastblock = 0; | ||
758 | |||
759 | /* Search for an anchor volume descriptor pointer */ | ||
760 | |||
761 | /* according to spec, anchor is in either: | ||
762 | * block 256 | ||
763 | * lastblock-256 | ||
764 | * lastblock | ||
765 | * however, if the disc isn't closed, it could be 512 */ | ||
766 | |||
767 | for (i = 0; !lastblock && i < ARRAY_SIZE(last); i++) { | ||
768 | ident = location = 0; | ||
769 | if (last[i] >= 0) { | ||
770 | bh = sb_bread(sb, last[i]); | ||
771 | if (bh) { | ||
772 | tag *t = (tag *)bh->b_data; | ||
773 | ident = le16_to_cpu(t->tagIdent); | ||
774 | location = le32_to_cpu(t->tagLocation); | ||
775 | brelse(bh); | ||
776 | } | ||
777 | } | ||
778 | 704 | ||
779 | if (ident == TAG_IDENT_AVDP) { | 705 | t = (tag *)bh->b_data; |
780 | if (location == last[i] - sbi->s_session) { | 706 | ident = le16_to_cpu(t->tagIdent); |
781 | lastblock = last[i] - sbi->s_session; | 707 | location = le32_to_cpu(t->tagLocation); |
782 | sbi->s_anchor[0] = lastblock; | 708 | brelse(bh); |
783 | sbi->s_anchor[1] = lastblock - 256; | 709 | if (ident != TAG_IDENT_AVDP) |
784 | } else if (location == | 710 | return 0; |
785 | udf_variable_to_fixed(last[i]) - | 711 | return location == block; |
786 | sbi->s_session) { | 712 | } |
787 | UDF_SET_FLAG(sb, UDF_FLAG_VARCONV); | ||
788 | lastblock = | ||
789 | udf_variable_to_fixed(last[i]) - | ||
790 | sbi->s_session; | ||
791 | sbi->s_anchor[0] = lastblock; | ||
792 | sbi->s_anchor[1] = lastblock - 256 - | ||
793 | sbi->s_session; | ||
794 | } else { | ||
795 | udf_debug("Anchor found at block %d, " | ||
796 | "location mismatch %d.\n", | ||
797 | last[i], location); | ||
798 | } | ||
799 | } else if (ident == TAG_IDENT_FE || | ||
800 | ident == TAG_IDENT_EFE) { | ||
801 | lastblock = last[i]; | ||
802 | sbi->s_anchor[3] = 512; | ||
803 | } else { | ||
804 | ident = location = 0; | ||
805 | if (last[i] >= 256) { | ||
806 | bh = sb_bread(sb, last[i] - 256); | ||
807 | if (bh) { | ||
808 | tag *t = (tag *)bh->b_data; | ||
809 | ident = le16_to_cpu( | ||
810 | t->tagIdent); | ||
811 | location = le32_to_cpu( | ||
812 | t->tagLocation); | ||
813 | brelse(bh); | ||
814 | } | ||
815 | } | ||
816 | 713 | ||
817 | if (ident == TAG_IDENT_AVDP && | 714 | /* Search for an anchor volume descriptor pointer */ |
818 | location == last[i] - 256 - | 715 | static sector_t udf_scan_anchors(struct super_block *sb, bool varconv, |
819 | sbi->s_session) { | 716 | sector_t lastblock) |
820 | lastblock = last[i]; | 717 | { |
821 | sbi->s_anchor[1] = last[i] - 256; | 718 | sector_t last[6]; |
822 | } else { | 719 | int i; |
823 | ident = location = 0; | 720 | struct udf_sb_info *sbi = UDF_SB(sb); |
824 | if (last[i] >= 312 + sbi->s_session) { | ||
825 | bh = sb_bread(sb, | ||
826 | last[i] - 312 - | ||
827 | sbi->s_session); | ||
828 | if (bh) { | ||
829 | tag *t = (tag *) | ||
830 | bh->b_data; | ||
831 | ident = le16_to_cpu( | ||
832 | t->tagIdent); | ||
833 | location = le32_to_cpu( | ||
834 | t->tagLocation); | ||
835 | brelse(bh); | ||
836 | } | ||
837 | } | ||
838 | 721 | ||
839 | if (ident == TAG_IDENT_AVDP && | 722 | last[0] = lastblock; |
840 | location == udf_variable_to_fixed(last[i]) - 256) { | 723 | last[1] = last[0] - 1; |
841 | UDF_SET_FLAG(sb, | 724 | last[2] = last[0] + 1; |
842 | UDF_FLAG_VARCONV); | 725 | last[3] = last[0] - 2; |
843 | lastblock = udf_variable_to_fixed(last[i]); | 726 | last[4] = last[0] - 150; |
844 | sbi->s_anchor[1] = lastblock - 256; | 727 | last[5] = last[0] - 152; |
845 | } | 728 | |
846 | } | 729 | /* according to spec, anchor is in either: |
847 | } | 730 | * block 256 |
731 | * lastblock-256 | ||
732 | * lastblock | ||
733 | * however, if the disc isn't closed, it could be 512 */ | ||
734 | |||
735 | for (i = 0; i < ARRAY_SIZE(last); i++) { | ||
736 | if (last[i] < 0) | ||
737 | continue; | ||
738 | if (last[i] >= sb->s_bdev->bd_inode->i_size >> | ||
739 | sb->s_blocksize_bits) | ||
740 | continue; | ||
741 | |||
742 | if (udf_check_anchor_block(sb, last[i], varconv)) { | ||
743 | sbi->s_anchor[0] = last[i]; | ||
744 | sbi->s_anchor[1] = last[i] - 256; | ||
745 | return last[i]; | ||
848 | } | 746 | } |
849 | } | ||
850 | 747 | ||
851 | if (!lastblock) { | 748 | if (last[i] < 256) |
852 | /* We haven't found the lastblock. check 312 */ | 749 | continue; |
853 | bh = sb_bread(sb, 312 + sbi->s_session); | ||
854 | if (bh) { | ||
855 | tag *t = (tag *)bh->b_data; | ||
856 | ident = le16_to_cpu(t->tagIdent); | ||
857 | location = le32_to_cpu(t->tagLocation); | ||
858 | brelse(bh); | ||
859 | 750 | ||
860 | if (ident == TAG_IDENT_AVDP && location == 256) | 751 | if (udf_check_anchor_block(sb, last[i] - 256, varconv)) { |
861 | UDF_SET_FLAG(sb, UDF_FLAG_VARCONV); | 752 | sbi->s_anchor[1] = last[i] - 256; |
753 | return last[i]; | ||
862 | } | 754 | } |
863 | } | 755 | } |
864 | 756 | ||
757 | if (udf_check_anchor_block(sb, sbi->s_session + 256, varconv)) { | ||
758 | sbi->s_anchor[0] = sbi->s_session + 256; | ||
759 | return last[0]; | ||
760 | } | ||
761 | if (udf_check_anchor_block(sb, sbi->s_session + 512, varconv)) { | ||
762 | sbi->s_anchor[0] = sbi->s_session + 512; | ||
763 | return last[0]; | ||
764 | } | ||
765 | return 0; | ||
766 | } | ||
767 | |||
768 | /* | ||
769 | * Find an anchor volume descriptor. The function expects sbi->s_lastblock to | ||
770 | * be the last block on the media. | ||
771 | * | ||
772 | * Return 1 if not found, 0 if ok | ||
773 | * | ||
774 | */ | ||
775 | static void udf_find_anchor(struct super_block *sb) | ||
776 | { | ||
777 | sector_t lastblock; | ||
778 | struct buffer_head *bh = NULL; | ||
779 | uint16_t ident; | ||
780 | int i; | ||
781 | struct udf_sb_info *sbi = UDF_SB(sb); | ||
782 | |||
783 | lastblock = udf_scan_anchors(sb, 0, sbi->s_last_block); | ||
784 | if (lastblock) | ||
785 | goto check_anchor; | ||
786 | |||
787 | /* No anchor found? Try VARCONV conversion of block numbers */ | ||
788 | /* Firstly, we try to not convert number of the last block */ | ||
789 | lastblock = udf_scan_anchors(sb, 1, | ||
790 | udf_variable_to_fixed(sbi->s_last_block)); | ||
791 | if (lastblock) { | ||
792 | UDF_SET_FLAG(sb, UDF_FLAG_VARCONV); | ||
793 | goto check_anchor; | ||
794 | } | ||
795 | |||
796 | /* Secondly, we try with converted number of the last block */ | ||
797 | lastblock = udf_scan_anchors(sb, 1, sbi->s_last_block); | ||
798 | if (lastblock) | ||
799 | UDF_SET_FLAG(sb, UDF_FLAG_VARCONV); | ||
800 | |||
801 | check_anchor: | ||
802 | /* | ||
803 | * Check located anchors and the anchor block supplied via | ||
804 | * mount options | ||
805 | */ | ||
865 | for (i = 0; i < ARRAY_SIZE(sbi->s_anchor); i++) { | 806 | for (i = 0; i < ARRAY_SIZE(sbi->s_anchor); i++) { |
866 | if (sbi->s_anchor[i]) { | 807 | if (!sbi->s_anchor[i]) |
867 | bh = udf_read_tagged(sb, sbi->s_anchor[i], | 808 | continue; |
868 | sbi->s_anchor[i], &ident); | 809 | bh = udf_read_tagged(sb, sbi->s_anchor[i], |
869 | if (!bh) | 810 | sbi->s_anchor[i], &ident); |
811 | if (!bh) | ||
812 | sbi->s_anchor[i] = 0; | ||
813 | else { | ||
814 | brelse(bh); | ||
815 | if (ident != TAG_IDENT_AVDP) | ||
870 | sbi->s_anchor[i] = 0; | 816 | sbi->s_anchor[i] = 0; |
871 | else { | ||
872 | brelse(bh); | ||
873 | if ((ident != TAG_IDENT_AVDP) && | ||
874 | (i || (ident != TAG_IDENT_FE && | ||
875 | ident != TAG_IDENT_EFE))) | ||
876 | sbi->s_anchor[i] = 0; | ||
877 | } | ||
878 | } | 817 | } |
879 | } | 818 | } |
880 | 819 | ||
@@ -971,27 +910,30 @@ static int udf_find_fileset(struct super_block *sb, | |||
971 | return 1; | 910 | return 1; |
972 | } | 911 | } |
973 | 912 | ||
974 | static void udf_load_pvoldesc(struct super_block *sb, struct buffer_head *bh) | 913 | static int udf_load_pvoldesc(struct super_block *sb, sector_t block) |
975 | { | 914 | { |
976 | struct primaryVolDesc *pvoldesc; | 915 | struct primaryVolDesc *pvoldesc; |
977 | time_t recording; | ||
978 | long recording_usec; | ||
979 | struct ustr instr; | 916 | struct ustr instr; |
980 | struct ustr outstr; | 917 | struct ustr outstr; |
918 | struct buffer_head *bh; | ||
919 | uint16_t ident; | ||
920 | |||
921 | bh = udf_read_tagged(sb, block, block, &ident); | ||
922 | if (!bh) | ||
923 | return 1; | ||
924 | BUG_ON(ident != TAG_IDENT_PVD); | ||
981 | 925 | ||
982 | pvoldesc = (struct primaryVolDesc *)bh->b_data; | 926 | pvoldesc = (struct primaryVolDesc *)bh->b_data; |
983 | 927 | ||
984 | if (udf_stamp_to_time(&recording, &recording_usec, | 928 | if (udf_disk_stamp_to_time(&UDF_SB(sb)->s_record_time, |
985 | lets_to_cpu(pvoldesc->recordingDateAndTime))) { | 929 | pvoldesc->recordingDateAndTime)) { |
986 | kernel_timestamp ts; | 930 | #ifdef UDFFS_DEBUG |
987 | ts = lets_to_cpu(pvoldesc->recordingDateAndTime); | 931 | timestamp *ts = &pvoldesc->recordingDateAndTime; |
988 | udf_debug("recording time %ld/%ld, %04u/%02u/%02u" | 932 | udf_debug("recording time %04u/%02u/%02u" |
989 | " %02u:%02u (%x)\n", | 933 | " %02u:%02u (%x)\n", |
990 | recording, recording_usec, | 934 | le16_to_cpu(ts->year), ts->month, ts->day, ts->hour, |
991 | ts.year, ts.month, ts.day, ts.hour, | 935 | ts->minute, le16_to_cpu(ts->typeAndTimezone)); |
992 | ts.minute, ts.typeAndTimezone); | 936 | #endif |
993 | UDF_SB(sb)->s_record_time.tv_sec = recording; | ||
994 | UDF_SB(sb)->s_record_time.tv_nsec = recording_usec * 1000; | ||
995 | } | 937 | } |
996 | 938 | ||
997 | if (!udf_build_ustr(&instr, pvoldesc->volIdent, 32)) | 939 | if (!udf_build_ustr(&instr, pvoldesc->volIdent, 32)) |
@@ -1005,6 +947,104 @@ static void udf_load_pvoldesc(struct super_block *sb, struct buffer_head *bh) | |||
1005 | if (!udf_build_ustr(&instr, pvoldesc->volSetIdent, 128)) | 947 | if (!udf_build_ustr(&instr, pvoldesc->volSetIdent, 128)) |
1006 | if (udf_CS0toUTF8(&outstr, &instr)) | 948 | if (udf_CS0toUTF8(&outstr, &instr)) |
1007 | udf_debug("volSetIdent[] = '%s'\n", outstr.u_name); | 949 | udf_debug("volSetIdent[] = '%s'\n", outstr.u_name); |
950 | |||
951 | brelse(bh); | ||
952 | return 0; | ||
953 | } | ||
954 | |||
955 | static int udf_load_metadata_files(struct super_block *sb, int partition) | ||
956 | { | ||
957 | struct udf_sb_info *sbi = UDF_SB(sb); | ||
958 | struct udf_part_map *map; | ||
959 | struct udf_meta_data *mdata; | ||
960 | kernel_lb_addr addr; | ||
961 | int fe_error = 0; | ||
962 | |||
963 | map = &sbi->s_partmaps[partition]; | ||
964 | mdata = &map->s_type_specific.s_metadata; | ||
965 | |||
966 | /* metadata address */ | ||
967 | addr.logicalBlockNum = mdata->s_meta_file_loc; | ||
968 | addr.partitionReferenceNum = map->s_partition_num; | ||
969 | |||
970 | udf_debug("Metadata file location: block = %d part = %d\n", | ||
971 | addr.logicalBlockNum, addr.partitionReferenceNum); | ||
972 | |||
973 | mdata->s_metadata_fe = udf_iget(sb, addr); | ||
974 | |||
975 | if (mdata->s_metadata_fe == NULL) { | ||
976 | udf_warning(sb, __func__, "metadata inode efe not found, " | ||
977 | "will try mirror inode."); | ||
978 | fe_error = 1; | ||
979 | } else if (UDF_I(mdata->s_metadata_fe)->i_alloc_type != | ||
980 | ICBTAG_FLAG_AD_SHORT) { | ||
981 | udf_warning(sb, __func__, "metadata inode efe does not have " | ||
982 | "short allocation descriptors!"); | ||
983 | fe_error = 1; | ||
984 | iput(mdata->s_metadata_fe); | ||
985 | mdata->s_metadata_fe = NULL; | ||
986 | } | ||
987 | |||
988 | /* mirror file entry */ | ||
989 | addr.logicalBlockNum = mdata->s_mirror_file_loc; | ||
990 | addr.partitionReferenceNum = map->s_partition_num; | ||
991 | |||
992 | udf_debug("Mirror metadata file location: block = %d part = %d\n", | ||
993 | addr.logicalBlockNum, addr.partitionReferenceNum); | ||
994 | |||
995 | mdata->s_mirror_fe = udf_iget(sb, addr); | ||
996 | |||
997 | if (mdata->s_mirror_fe == NULL) { | ||
998 | if (fe_error) { | ||
999 | udf_error(sb, __func__, "mirror inode efe not found " | ||
1000 | "and metadata inode is missing too, exiting..."); | ||
1001 | goto error_exit; | ||
1002 | } else | ||
1003 | udf_warning(sb, __func__, "mirror inode efe not found," | ||
1004 | " but metadata inode is OK"); | ||
1005 | } else if (UDF_I(mdata->s_mirror_fe)->i_alloc_type != | ||
1006 | ICBTAG_FLAG_AD_SHORT) { | ||
1007 | udf_warning(sb, __func__, "mirror inode efe does not have " | ||
1008 | "short allocation descriptors!"); | ||
1009 | iput(mdata->s_mirror_fe); | ||
1010 | mdata->s_mirror_fe = NULL; | ||
1011 | if (fe_error) | ||
1012 | goto error_exit; | ||
1013 | } | ||
1014 | |||
1015 | /* | ||
1016 | * bitmap file entry | ||
1017 | * Note: | ||
1018 | * Load only if bitmap file location differs from 0xFFFFFFFF (DCN-5102) | ||
1019 | */ | ||
1020 | if (mdata->s_bitmap_file_loc != 0xFFFFFFFF) { | ||
1021 | addr.logicalBlockNum = mdata->s_bitmap_file_loc; | ||
1022 | addr.partitionReferenceNum = map->s_partition_num; | ||
1023 | |||
1024 | udf_debug("Bitmap file location: block = %d part = %d\n", | ||
1025 | addr.logicalBlockNum, addr.partitionReferenceNum); | ||
1026 | |||
1027 | mdata->s_bitmap_fe = udf_iget(sb, addr); | ||
1028 | |||
1029 | if (mdata->s_bitmap_fe == NULL) { | ||
1030 | if (sb->s_flags & MS_RDONLY) | ||
1031 | udf_warning(sb, __func__, "bitmap inode efe " | ||
1032 | "not found but it's ok since the disc" | ||
1033 | " is mounted read-only"); | ||
1034 | else { | ||
1035 | udf_error(sb, __func__, "bitmap inode efe not " | ||
1036 | "found and attempted read-write mount"); | ||
1037 | goto error_exit; | ||
1038 | } | ||
1039 | } | ||
1040 | } | ||
1041 | |||
1042 | udf_debug("udf_load_metadata_files Ok\n"); | ||
1043 | |||
1044 | return 0; | ||
1045 | |||
1046 | error_exit: | ||
1047 | return 1; | ||
1008 | } | 1048 | } |
1009 | 1049 | ||
1010 | static void udf_load_fileset(struct super_block *sb, struct buffer_head *bh, | 1050 | static void udf_load_fileset(struct super_block *sb, struct buffer_head *bh, |
@@ -1025,10 +1065,9 @@ static void udf_load_fileset(struct super_block *sb, struct buffer_head *bh, | |||
1025 | int udf_compute_nr_groups(struct super_block *sb, u32 partition) | 1065 | int udf_compute_nr_groups(struct super_block *sb, u32 partition) |
1026 | { | 1066 | { |
1027 | struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition]; | 1067 | struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition]; |
1028 | return (map->s_partition_len + | 1068 | return DIV_ROUND_UP(map->s_partition_len + |
1029 | (sizeof(struct spaceBitmapDesc) << 3) + | 1069 | (sizeof(struct spaceBitmapDesc) << 3), |
1030 | (sb->s_blocksize * 8) - 1) / | 1070 | sb->s_blocksize * 8); |
1031 | (sb->s_blocksize * 8); | ||
1032 | } | 1071 | } |
1033 | 1072 | ||
1034 | static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index) | 1073 | static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index) |
@@ -1059,134 +1098,241 @@ static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index) | |||
1059 | return bitmap; | 1098 | return bitmap; |
1060 | } | 1099 | } |
1061 | 1100 | ||
1062 | static int udf_load_partdesc(struct super_block *sb, struct buffer_head *bh) | 1101 | static int udf_fill_partdesc_info(struct super_block *sb, |
1102 | struct partitionDesc *p, int p_index) | ||
1103 | { | ||
1104 | struct udf_part_map *map; | ||
1105 | struct udf_sb_info *sbi = UDF_SB(sb); | ||
1106 | struct partitionHeaderDesc *phd; | ||
1107 | |||
1108 | map = &sbi->s_partmaps[p_index]; | ||
1109 | |||
1110 | map->s_partition_len = le32_to_cpu(p->partitionLength); /* blocks */ | ||
1111 | map->s_partition_root = le32_to_cpu(p->partitionStartingLocation); | ||
1112 | |||
1113 | if (p->accessType == cpu_to_le32(PD_ACCESS_TYPE_READ_ONLY)) | ||
1114 | map->s_partition_flags |= UDF_PART_FLAG_READ_ONLY; | ||
1115 | if (p->accessType == cpu_to_le32(PD_ACCESS_TYPE_WRITE_ONCE)) | ||
1116 | map->s_partition_flags |= UDF_PART_FLAG_WRITE_ONCE; | ||
1117 | if (p->accessType == cpu_to_le32(PD_ACCESS_TYPE_REWRITABLE)) | ||
1118 | map->s_partition_flags |= UDF_PART_FLAG_REWRITABLE; | ||
1119 | if (p->accessType == cpu_to_le32(PD_ACCESS_TYPE_OVERWRITABLE)) | ||
1120 | map->s_partition_flags |= UDF_PART_FLAG_OVERWRITABLE; | ||
1121 | |||
1122 | udf_debug("Partition (%d type %x) starts at physical %d, " | ||
1123 | "block length %d\n", p_index, | ||
1124 | map->s_partition_type, map->s_partition_root, | ||
1125 | map->s_partition_len); | ||
1126 | |||
1127 | if (strcmp(p->partitionContents.ident, PD_PARTITION_CONTENTS_NSR02) && | ||
1128 | strcmp(p->partitionContents.ident, PD_PARTITION_CONTENTS_NSR03)) | ||
1129 | return 0; | ||
1130 | |||
1131 | phd = (struct partitionHeaderDesc *)p->partitionContentsUse; | ||
1132 | if (phd->unallocSpaceTable.extLength) { | ||
1133 | kernel_lb_addr loc = { | ||
1134 | .logicalBlockNum = le32_to_cpu( | ||
1135 | phd->unallocSpaceTable.extPosition), | ||
1136 | .partitionReferenceNum = p_index, | ||
1137 | }; | ||
1138 | |||
1139 | map->s_uspace.s_table = udf_iget(sb, loc); | ||
1140 | if (!map->s_uspace.s_table) { | ||
1141 | udf_debug("cannot load unallocSpaceTable (part %d)\n", | ||
1142 | p_index); | ||
1143 | return 1; | ||
1144 | } | ||
1145 | map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_TABLE; | ||
1146 | udf_debug("unallocSpaceTable (part %d) @ %ld\n", | ||
1147 | p_index, map->s_uspace.s_table->i_ino); | ||
1148 | } | ||
1149 | |||
1150 | if (phd->unallocSpaceBitmap.extLength) { | ||
1151 | struct udf_bitmap *bitmap = udf_sb_alloc_bitmap(sb, p_index); | ||
1152 | if (!bitmap) | ||
1153 | return 1; | ||
1154 | map->s_uspace.s_bitmap = bitmap; | ||
1155 | bitmap->s_extLength = le32_to_cpu( | ||
1156 | phd->unallocSpaceBitmap.extLength); | ||
1157 | bitmap->s_extPosition = le32_to_cpu( | ||
1158 | phd->unallocSpaceBitmap.extPosition); | ||
1159 | map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_BITMAP; | ||
1160 | udf_debug("unallocSpaceBitmap (part %d) @ %d\n", p_index, | ||
1161 | bitmap->s_extPosition); | ||
1162 | } | ||
1163 | |||
1164 | if (phd->partitionIntegrityTable.extLength) | ||
1165 | udf_debug("partitionIntegrityTable (part %d)\n", p_index); | ||
1166 | |||
1167 | if (phd->freedSpaceTable.extLength) { | ||
1168 | kernel_lb_addr loc = { | ||
1169 | .logicalBlockNum = le32_to_cpu( | ||
1170 | phd->freedSpaceTable.extPosition), | ||
1171 | .partitionReferenceNum = p_index, | ||
1172 | }; | ||
1173 | |||
1174 | map->s_fspace.s_table = udf_iget(sb, loc); | ||
1175 | if (!map->s_fspace.s_table) { | ||
1176 | udf_debug("cannot load freedSpaceTable (part %d)\n", | ||
1177 | p_index); | ||
1178 | return 1; | ||
1179 | } | ||
1180 | |||
1181 | map->s_partition_flags |= UDF_PART_FLAG_FREED_TABLE; | ||
1182 | udf_debug("freedSpaceTable (part %d) @ %ld\n", | ||
1183 | p_index, map->s_fspace.s_table->i_ino); | ||
1184 | } | ||
1185 | |||
1186 | if (phd->freedSpaceBitmap.extLength) { | ||
1187 | struct udf_bitmap *bitmap = udf_sb_alloc_bitmap(sb, p_index); | ||
1188 | if (!bitmap) | ||
1189 | return 1; | ||
1190 | map->s_fspace.s_bitmap = bitmap; | ||
1191 | bitmap->s_extLength = le32_to_cpu( | ||
1192 | phd->freedSpaceBitmap.extLength); | ||
1193 | bitmap->s_extPosition = le32_to_cpu( | ||
1194 | phd->freedSpaceBitmap.extPosition); | ||
1195 | map->s_partition_flags |= UDF_PART_FLAG_FREED_BITMAP; | ||
1196 | udf_debug("freedSpaceBitmap (part %d) @ %d\n", p_index, | ||
1197 | bitmap->s_extPosition); | ||
1198 | } | ||
1199 | return 0; | ||
1200 | } | ||
1201 | |||
1202 | static int udf_load_vat(struct super_block *sb, int p_index, int type1_index) | ||
1203 | { | ||
1204 | struct udf_sb_info *sbi = UDF_SB(sb); | ||
1205 | struct udf_part_map *map = &sbi->s_partmaps[p_index]; | ||
1206 | kernel_lb_addr ino; | ||
1207 | struct buffer_head *bh = NULL; | ||
1208 | struct udf_inode_info *vati; | ||
1209 | uint32_t pos; | ||
1210 | struct virtualAllocationTable20 *vat20; | ||
1211 | |||
1212 | /* VAT file entry is in the last recorded block */ | ||
1213 | ino.partitionReferenceNum = type1_index; | ||
1214 | ino.logicalBlockNum = sbi->s_last_block - map->s_partition_root; | ||
1215 | sbi->s_vat_inode = udf_iget(sb, ino); | ||
1216 | if (!sbi->s_vat_inode) | ||
1217 | return 1; | ||
1218 | |||
1219 | if (map->s_partition_type == UDF_VIRTUAL_MAP15) { | ||
1220 | map->s_type_specific.s_virtual.s_start_offset = 0; | ||
1221 | map->s_type_specific.s_virtual.s_num_entries = | ||
1222 | (sbi->s_vat_inode->i_size - 36) >> 2; | ||
1223 | } else if (map->s_partition_type == UDF_VIRTUAL_MAP20) { | ||
1224 | vati = UDF_I(sbi->s_vat_inode); | ||
1225 | if (vati->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { | ||
1226 | pos = udf_block_map(sbi->s_vat_inode, 0); | ||
1227 | bh = sb_bread(sb, pos); | ||
1228 | if (!bh) | ||
1229 | return 1; | ||
1230 | vat20 = (struct virtualAllocationTable20 *)bh->b_data; | ||
1231 | } else { | ||
1232 | vat20 = (struct virtualAllocationTable20 *) | ||
1233 | vati->i_ext.i_data; | ||
1234 | } | ||
1235 | |||
1236 | map->s_type_specific.s_virtual.s_start_offset = | ||
1237 | le16_to_cpu(vat20->lengthHeader); | ||
1238 | map->s_type_specific.s_virtual.s_num_entries = | ||
1239 | (sbi->s_vat_inode->i_size - | ||
1240 | map->s_type_specific.s_virtual. | ||
1241 | s_start_offset) >> 2; | ||
1242 | brelse(bh); | ||
1243 | } | ||
1244 | return 0; | ||
1245 | } | ||
1246 | |||
1247 | static int udf_load_partdesc(struct super_block *sb, sector_t block) | ||
1063 | { | 1248 | { |
1249 | struct buffer_head *bh; | ||
1064 | struct partitionDesc *p; | 1250 | struct partitionDesc *p; |
1065 | int i; | ||
1066 | struct udf_part_map *map; | 1251 | struct udf_part_map *map; |
1067 | struct udf_sb_info *sbi; | 1252 | struct udf_sb_info *sbi = UDF_SB(sb); |
1253 | int i, type1_idx; | ||
1254 | uint16_t partitionNumber; | ||
1255 | uint16_t ident; | ||
1256 | int ret = 0; | ||
1257 | |||
1258 | bh = udf_read_tagged(sb, block, block, &ident); | ||
1259 | if (!bh) | ||
1260 | return 1; | ||
1261 | if (ident != TAG_IDENT_PD) | ||
1262 | goto out_bh; | ||
1068 | 1263 | ||
1069 | p = (struct partitionDesc *)bh->b_data; | 1264 | p = (struct partitionDesc *)bh->b_data; |
1070 | sbi = UDF_SB(sb); | 1265 | partitionNumber = le16_to_cpu(p->partitionNumber); |
1071 | 1266 | ||
1267 | /* First scan for TYPE1, SPARABLE and METADATA partitions */ | ||
1072 | for (i = 0; i < sbi->s_partitions; i++) { | 1268 | for (i = 0; i < sbi->s_partitions; i++) { |
1073 | map = &sbi->s_partmaps[i]; | 1269 | map = &sbi->s_partmaps[i]; |
1074 | udf_debug("Searching map: (%d == %d)\n", | 1270 | udf_debug("Searching map: (%d == %d)\n", |
1075 | map->s_partition_num, | 1271 | map->s_partition_num, partitionNumber); |
1076 | le16_to_cpu(p->partitionNumber)); | 1272 | if (map->s_partition_num == partitionNumber && |
1077 | if (map->s_partition_num == | 1273 | (map->s_partition_type == UDF_TYPE1_MAP15 || |
1078 | le16_to_cpu(p->partitionNumber)) { | 1274 | map->s_partition_type == UDF_SPARABLE_MAP15)) |
1079 | map->s_partition_len = | ||
1080 | le32_to_cpu(p->partitionLength); /* blocks */ | ||
1081 | map->s_partition_root = | ||
1082 | le32_to_cpu(p->partitionStartingLocation); | ||
1083 | if (p->accessType == | ||
1084 | cpu_to_le32(PD_ACCESS_TYPE_READ_ONLY)) | ||
1085 | map->s_partition_flags |= | ||
1086 | UDF_PART_FLAG_READ_ONLY; | ||
1087 | if (p->accessType == | ||
1088 | cpu_to_le32(PD_ACCESS_TYPE_WRITE_ONCE)) | ||
1089 | map->s_partition_flags |= | ||
1090 | UDF_PART_FLAG_WRITE_ONCE; | ||
1091 | if (p->accessType == | ||
1092 | cpu_to_le32(PD_ACCESS_TYPE_REWRITABLE)) | ||
1093 | map->s_partition_flags |= | ||
1094 | UDF_PART_FLAG_REWRITABLE; | ||
1095 | if (p->accessType == | ||
1096 | cpu_to_le32(PD_ACCESS_TYPE_OVERWRITABLE)) | ||
1097 | map->s_partition_flags |= | ||
1098 | UDF_PART_FLAG_OVERWRITABLE; | ||
1099 | |||
1100 | if (!strcmp(p->partitionContents.ident, | ||
1101 | PD_PARTITION_CONTENTS_NSR02) || | ||
1102 | !strcmp(p->partitionContents.ident, | ||
1103 | PD_PARTITION_CONTENTS_NSR03)) { | ||
1104 | struct partitionHeaderDesc *phd; | ||
1105 | |||
1106 | phd = (struct partitionHeaderDesc *) | ||
1107 | (p->partitionContentsUse); | ||
1108 | if (phd->unallocSpaceTable.extLength) { | ||
1109 | kernel_lb_addr loc = { | ||
1110 | .logicalBlockNum = le32_to_cpu(phd->unallocSpaceTable.extPosition), | ||
1111 | .partitionReferenceNum = i, | ||
1112 | }; | ||
1113 | |||
1114 | map->s_uspace.s_table = | ||
1115 | udf_iget(sb, loc); | ||
1116 | if (!map->s_uspace.s_table) { | ||
1117 | udf_debug("cannot load unallocSpaceTable (part %d)\n", i); | ||
1118 | return 1; | ||
1119 | } | ||
1120 | map->s_partition_flags |= | ||
1121 | UDF_PART_FLAG_UNALLOC_TABLE; | ||
1122 | udf_debug("unallocSpaceTable (part %d) @ %ld\n", | ||
1123 | i, map->s_uspace.s_table->i_ino); | ||
1124 | } | ||
1125 | if (phd->unallocSpaceBitmap.extLength) { | ||
1126 | struct udf_bitmap *bitmap = | ||
1127 | udf_sb_alloc_bitmap(sb, i); | ||
1128 | map->s_uspace.s_bitmap = bitmap; | ||
1129 | if (bitmap != NULL) { | ||
1130 | bitmap->s_extLength = | ||
1131 | le32_to_cpu(phd->unallocSpaceBitmap.extLength); | ||
1132 | bitmap->s_extPosition = | ||
1133 | le32_to_cpu(phd->unallocSpaceBitmap.extPosition); | ||
1134 | map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_BITMAP; | ||
1135 | udf_debug("unallocSpaceBitmap (part %d) @ %d\n", | ||
1136 | i, bitmap->s_extPosition); | ||
1137 | } | ||
1138 | } | ||
1139 | if (phd->partitionIntegrityTable.extLength) | ||
1140 | udf_debug("partitionIntegrityTable (part %d)\n", i); | ||
1141 | if (phd->freedSpaceTable.extLength) { | ||
1142 | kernel_lb_addr loc = { | ||
1143 | .logicalBlockNum = le32_to_cpu(phd->freedSpaceTable.extPosition), | ||
1144 | .partitionReferenceNum = i, | ||
1145 | }; | ||
1146 | |||
1147 | map->s_fspace.s_table = | ||
1148 | udf_iget(sb, loc); | ||
1149 | if (!map->s_fspace.s_table) { | ||
1150 | udf_debug("cannot load freedSpaceTable (part %d)\n", i); | ||
1151 | return 1; | ||
1152 | } | ||
1153 | map->s_partition_flags |= | ||
1154 | UDF_PART_FLAG_FREED_TABLE; | ||
1155 | udf_debug("freedSpaceTable (part %d) @ %ld\n", | ||
1156 | i, map->s_fspace.s_table->i_ino); | ||
1157 | } | ||
1158 | if (phd->freedSpaceBitmap.extLength) { | ||
1159 | struct udf_bitmap *bitmap = | ||
1160 | udf_sb_alloc_bitmap(sb, i); | ||
1161 | map->s_fspace.s_bitmap = bitmap; | ||
1162 | if (bitmap != NULL) { | ||
1163 | bitmap->s_extLength = | ||
1164 | le32_to_cpu(phd->freedSpaceBitmap.extLength); | ||
1165 | bitmap->s_extPosition = | ||
1166 | le32_to_cpu(phd->freedSpaceBitmap.extPosition); | ||
1167 | map->s_partition_flags |= UDF_PART_FLAG_FREED_BITMAP; | ||
1168 | udf_debug("freedSpaceBitmap (part %d) @ %d\n", | ||
1169 | i, bitmap->s_extPosition); | ||
1170 | } | ||
1171 | } | ||
1172 | } | ||
1173 | break; | 1275 | break; |
1174 | } | ||
1175 | } | 1276 | } |
1176 | if (i == sbi->s_partitions) | 1277 | |
1278 | if (i >= sbi->s_partitions) { | ||
1177 | udf_debug("Partition (%d) not found in partition map\n", | 1279 | udf_debug("Partition (%d) not found in partition map\n", |
1178 | le16_to_cpu(p->partitionNumber)); | 1280 | partitionNumber); |
1179 | else | 1281 | goto out_bh; |
1180 | udf_debug("Partition (%d:%d type %x) starts at physical %d, " | 1282 | } |
1181 | "block length %d\n", | 1283 | |
1182 | le16_to_cpu(p->partitionNumber), i, | 1284 | ret = udf_fill_partdesc_info(sb, p, i); |
1183 | map->s_partition_type, | 1285 | |
1184 | map->s_partition_root, | 1286 | /* |
1185 | map->s_partition_len); | 1287 | * Now rescan for VIRTUAL or METADATA partitions when SPARABLE and |
1186 | return 0; | 1288 | * PHYSICAL partitions are already set up |
1289 | */ | ||
1290 | type1_idx = i; | ||
1291 | for (i = 0; i < sbi->s_partitions; i++) { | ||
1292 | map = &sbi->s_partmaps[i]; | ||
1293 | |||
1294 | if (map->s_partition_num == partitionNumber && | ||
1295 | (map->s_partition_type == UDF_VIRTUAL_MAP15 || | ||
1296 | map->s_partition_type == UDF_VIRTUAL_MAP20 || | ||
1297 | map->s_partition_type == UDF_METADATA_MAP25)) | ||
1298 | break; | ||
1299 | } | ||
1300 | |||
1301 | if (i >= sbi->s_partitions) | ||
1302 | goto out_bh; | ||
1303 | |||
1304 | ret = udf_fill_partdesc_info(sb, p, i); | ||
1305 | if (ret) | ||
1306 | goto out_bh; | ||
1307 | |||
1308 | if (map->s_partition_type == UDF_METADATA_MAP25) { | ||
1309 | ret = udf_load_metadata_files(sb, i); | ||
1310 | if (ret) { | ||
1311 | printk(KERN_ERR "UDF-fs: error loading MetaData " | ||
1312 | "partition map %d\n", i); | ||
1313 | goto out_bh; | ||
1314 | } | ||
1315 | } else { | ||
1316 | ret = udf_load_vat(sb, i, type1_idx); | ||
1317 | if (ret) | ||
1318 | goto out_bh; | ||
1319 | /* | ||
1320 | * Mark filesystem read-only if we have a partition with | ||
1321 | * virtual map since we don't handle writing to it (we | ||
1322 | * overwrite blocks instead of relocating them). | ||
1323 | */ | ||
1324 | sb->s_flags |= MS_RDONLY; | ||
1325 | printk(KERN_NOTICE "UDF-fs: Filesystem marked read-only " | ||
1326 | "because writing to pseudooverwrite partition is " | ||
1327 | "not implemented.\n"); | ||
1328 | } | ||
1329 | out_bh: | ||
1330 | /* In case loading failed, we handle cleanup in udf_fill_super */ | ||
1331 | brelse(bh); | ||
1332 | return ret; | ||
1187 | } | 1333 | } |
1188 | 1334 | ||
1189 | static int udf_load_logicalvol(struct super_block *sb, struct buffer_head *bh, | 1335 | static int udf_load_logicalvol(struct super_block *sb, sector_t block, |
1190 | kernel_lb_addr *fileset) | 1336 | kernel_lb_addr *fileset) |
1191 | { | 1337 | { |
1192 | struct logicalVolDesc *lvd; | 1338 | struct logicalVolDesc *lvd; |
@@ -1194,12 +1340,21 @@ static int udf_load_logicalvol(struct super_block *sb, struct buffer_head *bh, | |||
1194 | uint8_t type; | 1340 | uint8_t type; |
1195 | struct udf_sb_info *sbi = UDF_SB(sb); | 1341 | struct udf_sb_info *sbi = UDF_SB(sb); |
1196 | struct genericPartitionMap *gpm; | 1342 | struct genericPartitionMap *gpm; |
1343 | uint16_t ident; | ||
1344 | struct buffer_head *bh; | ||
1345 | int ret = 0; | ||
1197 | 1346 | ||
1347 | bh = udf_read_tagged(sb, block, block, &ident); | ||
1348 | if (!bh) | ||
1349 | return 1; | ||
1350 | BUG_ON(ident != TAG_IDENT_LVD); | ||
1198 | lvd = (struct logicalVolDesc *)bh->b_data; | 1351 | lvd = (struct logicalVolDesc *)bh->b_data; |
1199 | 1352 | ||
1200 | i = udf_sb_alloc_partition_maps(sb, le32_to_cpu(lvd->numPartitionMaps)); | 1353 | i = udf_sb_alloc_partition_maps(sb, le32_to_cpu(lvd->numPartitionMaps)); |
1201 | if (i != 0) | 1354 | if (i != 0) { |
1202 | return i; | 1355 | ret = i; |
1356 | goto out_bh; | ||
1357 | } | ||
1203 | 1358 | ||
1204 | for (i = 0, offset = 0; | 1359 | for (i = 0, offset = 0; |
1205 | i < sbi->s_partitions && offset < le32_to_cpu(lvd->mapTableLength); | 1360 | i < sbi->s_partitions && offset < le32_to_cpu(lvd->mapTableLength); |
@@ -1223,12 +1378,12 @@ static int udf_load_logicalvol(struct super_block *sb, struct buffer_head *bh, | |||
1223 | u16 suf = | 1378 | u16 suf = |
1224 | le16_to_cpu(((__le16 *)upm2->partIdent. | 1379 | le16_to_cpu(((__le16 *)upm2->partIdent. |
1225 | identSuffix)[0]); | 1380 | identSuffix)[0]); |
1226 | if (suf == 0x0150) { | 1381 | if (suf < 0x0200) { |
1227 | map->s_partition_type = | 1382 | map->s_partition_type = |
1228 | UDF_VIRTUAL_MAP15; | 1383 | UDF_VIRTUAL_MAP15; |
1229 | map->s_partition_func = | 1384 | map->s_partition_func = |
1230 | udf_get_pblock_virt15; | 1385 | udf_get_pblock_virt15; |
1231 | } else if (suf == 0x0200) { | 1386 | } else { |
1232 | map->s_partition_type = | 1387 | map->s_partition_type = |
1233 | UDF_VIRTUAL_MAP20; | 1388 | UDF_VIRTUAL_MAP20; |
1234 | map->s_partition_func = | 1389 | map->s_partition_func = |
@@ -1238,7 +1393,6 @@ static int udf_load_logicalvol(struct super_block *sb, struct buffer_head *bh, | |||
1238 | UDF_ID_SPARABLE, | 1393 | UDF_ID_SPARABLE, |
1239 | strlen(UDF_ID_SPARABLE))) { | 1394 | strlen(UDF_ID_SPARABLE))) { |
1240 | uint32_t loc; | 1395 | uint32_t loc; |
1241 | uint16_t ident; | ||
1242 | struct sparingTable *st; | 1396 | struct sparingTable *st; |
1243 | struct sparablePartitionMap *spm = | 1397 | struct sparablePartitionMap *spm = |
1244 | (struct sparablePartitionMap *)gpm; | 1398 | (struct sparablePartitionMap *)gpm; |
@@ -1256,22 +1410,64 @@ static int udf_load_logicalvol(struct super_block *sb, struct buffer_head *bh, | |||
1256 | map->s_type_specific.s_sparing. | 1410 | map->s_type_specific.s_sparing. |
1257 | s_spar_map[j] = bh2; | 1411 | s_spar_map[j] = bh2; |
1258 | 1412 | ||
1259 | if (bh2 != NULL) { | 1413 | if (bh2 == NULL) |
1260 | st = (struct sparingTable *) | 1414 | continue; |
1261 | bh2->b_data; | 1415 | |
1262 | if (ident != 0 || strncmp( | 1416 | st = (struct sparingTable *)bh2->b_data; |
1263 | st->sparingIdent.ident, | 1417 | if (ident != 0 || strncmp( |
1264 | UDF_ID_SPARING, | 1418 | st->sparingIdent.ident, |
1265 | strlen(UDF_ID_SPARING))) { | 1419 | UDF_ID_SPARING, |
1266 | brelse(bh2); | 1420 | strlen(UDF_ID_SPARING))) { |
1267 | map->s_type_specific. | 1421 | brelse(bh2); |
1268 | s_sparing. | 1422 | map->s_type_specific.s_sparing. |
1269 | s_spar_map[j] = | 1423 | s_spar_map[j] = NULL; |
1270 | NULL; | ||
1271 | } | ||
1272 | } | 1424 | } |
1273 | } | 1425 | } |
1274 | map->s_partition_func = udf_get_pblock_spar15; | 1426 | map->s_partition_func = udf_get_pblock_spar15; |
1427 | } else if (!strncmp(upm2->partIdent.ident, | ||
1428 | UDF_ID_METADATA, | ||
1429 | strlen(UDF_ID_METADATA))) { | ||
1430 | struct udf_meta_data *mdata = | ||
1431 | &map->s_type_specific.s_metadata; | ||
1432 | struct metadataPartitionMap *mdm = | ||
1433 | (struct metadataPartitionMap *) | ||
1434 | &(lvd->partitionMaps[offset]); | ||
1435 | udf_debug("Parsing Logical vol part %d " | ||
1436 | "type %d id=%s\n", i, type, | ||
1437 | UDF_ID_METADATA); | ||
1438 | |||
1439 | map->s_partition_type = UDF_METADATA_MAP25; | ||
1440 | map->s_partition_func = udf_get_pblock_meta25; | ||
1441 | |||
1442 | mdata->s_meta_file_loc = | ||
1443 | le32_to_cpu(mdm->metadataFileLoc); | ||
1444 | mdata->s_mirror_file_loc = | ||
1445 | le32_to_cpu(mdm->metadataMirrorFileLoc); | ||
1446 | mdata->s_bitmap_file_loc = | ||
1447 | le32_to_cpu(mdm->metadataBitmapFileLoc); | ||
1448 | mdata->s_alloc_unit_size = | ||
1449 | le32_to_cpu(mdm->allocUnitSize); | ||
1450 | mdata->s_align_unit_size = | ||
1451 | le16_to_cpu(mdm->alignUnitSize); | ||
1452 | mdata->s_dup_md_flag = | ||
1453 | mdm->flags & 0x01; | ||
1454 | |||
1455 | udf_debug("Metadata Ident suffix=0x%x\n", | ||
1456 | (le16_to_cpu( | ||
1457 | ((__le16 *) | ||
1458 | mdm->partIdent.identSuffix)[0]))); | ||
1459 | udf_debug("Metadata part num=%d\n", | ||
1460 | le16_to_cpu(mdm->partitionNum)); | ||
1461 | udf_debug("Metadata part alloc unit size=%d\n", | ||
1462 | le32_to_cpu(mdm->allocUnitSize)); | ||
1463 | udf_debug("Metadata file loc=%d\n", | ||
1464 | le32_to_cpu(mdm->metadataFileLoc)); | ||
1465 | udf_debug("Mirror file loc=%d\n", | ||
1466 | le32_to_cpu(mdm->metadataMirrorFileLoc)); | ||
1467 | udf_debug("Bitmap file loc=%d\n", | ||
1468 | le32_to_cpu(mdm->metadataBitmapFileLoc)); | ||
1469 | udf_debug("Duplicate Flag: %d %d\n", | ||
1470 | mdata->s_dup_md_flag, mdm->flags); | ||
1275 | } else { | 1471 | } else { |
1276 | udf_debug("Unknown ident: %s\n", | 1472 | udf_debug("Unknown ident: %s\n", |
1277 | upm2->partIdent.ident); | 1473 | upm2->partIdent.ident); |
@@ -1296,7 +1492,9 @@ static int udf_load_logicalvol(struct super_block *sb, struct buffer_head *bh, | |||
1296 | if (lvd->integritySeqExt.extLength) | 1492 | if (lvd->integritySeqExt.extLength) |
1297 | udf_load_logicalvolint(sb, leea_to_cpu(lvd->integritySeqExt)); | 1493 | udf_load_logicalvolint(sb, leea_to_cpu(lvd->integritySeqExt)); |
1298 | 1494 | ||
1299 | return 0; | 1495 | out_bh: |
1496 | brelse(bh); | ||
1497 | return ret; | ||
1300 | } | 1498 | } |
1301 | 1499 | ||
1302 | /* | 1500 | /* |
@@ -1345,7 +1543,7 @@ static void udf_load_logicalvolint(struct super_block *sb, kernel_extent_ad loc) | |||
1345 | * July 1, 1997 - Andrew E. Mileski | 1543 | * July 1, 1997 - Andrew E. Mileski |
1346 | * Written, tested, and released. | 1544 | * Written, tested, and released. |
1347 | */ | 1545 | */ |
1348 | static int udf_process_sequence(struct super_block *sb, long block, | 1546 | static noinline int udf_process_sequence(struct super_block *sb, long block, |
1349 | long lastblock, kernel_lb_addr *fileset) | 1547 | long lastblock, kernel_lb_addr *fileset) |
1350 | { | 1548 | { |
1351 | struct buffer_head *bh = NULL; | 1549 | struct buffer_head *bh = NULL; |
@@ -1354,19 +1552,25 @@ static int udf_process_sequence(struct super_block *sb, long block, | |||
1354 | struct generic_desc *gd; | 1552 | struct generic_desc *gd; |
1355 | struct volDescPtr *vdp; | 1553 | struct volDescPtr *vdp; |
1356 | int done = 0; | 1554 | int done = 0; |
1357 | int i, j; | ||
1358 | uint32_t vdsn; | 1555 | uint32_t vdsn; |
1359 | uint16_t ident; | 1556 | uint16_t ident; |
1360 | long next_s = 0, next_e = 0; | 1557 | long next_s = 0, next_e = 0; |
1361 | 1558 | ||
1362 | memset(vds, 0, sizeof(struct udf_vds_record) * VDS_POS_LENGTH); | 1559 | memset(vds, 0, sizeof(struct udf_vds_record) * VDS_POS_LENGTH); |
1363 | 1560 | ||
1364 | /* Read the main descriptor sequence */ | 1561 | /* |
1562 | * Read the main descriptor sequence and find which descriptors | ||
1563 | * are in it. | ||
1564 | */ | ||
1365 | for (; (!done && block <= lastblock); block++) { | 1565 | for (; (!done && block <= lastblock); block++) { |
1366 | 1566 | ||
1367 | bh = udf_read_tagged(sb, block, block, &ident); | 1567 | bh = udf_read_tagged(sb, block, block, &ident); |
1368 | if (!bh) | 1568 | if (!bh) { |
1369 | break; | 1569 | printk(KERN_ERR "udf: Block %Lu of volume descriptor " |
1570 | "sequence is corrupted or we could not read " | ||
1571 | "it.\n", (unsigned long long)block); | ||
1572 | return 1; | ||
1573 | } | ||
1370 | 1574 | ||
1371 | /* Process each descriptor (ISO 13346 3/8.3-8.4) */ | 1575 | /* Process each descriptor (ISO 13346 3/8.3-8.4) */ |
1372 | gd = (struct generic_desc *)bh->b_data; | 1576 | gd = (struct generic_desc *)bh->b_data; |
@@ -1432,41 +1636,31 @@ static int udf_process_sequence(struct super_block *sb, long block, | |||
1432 | } | 1636 | } |
1433 | brelse(bh); | 1637 | brelse(bh); |
1434 | } | 1638 | } |
1435 | for (i = 0; i < VDS_POS_LENGTH; i++) { | 1639 | /* |
1436 | if (vds[i].block) { | 1640 | * Now read interesting descriptors again and process them |
1437 | bh = udf_read_tagged(sb, vds[i].block, vds[i].block, | 1641 | * in a suitable order |
1438 | &ident); | 1642 | */ |
1439 | 1643 | if (!vds[VDS_POS_PRIMARY_VOL_DESC].block) { | |
1440 | if (i == VDS_POS_PRIMARY_VOL_DESC) { | 1644 | printk(KERN_ERR "udf: Primary Volume Descriptor not found!\n"); |
1441 | udf_load_pvoldesc(sb, bh); | 1645 | return 1; |
1442 | } else if (i == VDS_POS_LOGICAL_VOL_DESC) { | 1646 | } |
1443 | if (udf_load_logicalvol(sb, bh, fileset)) { | 1647 | if (udf_load_pvoldesc(sb, vds[VDS_POS_PRIMARY_VOL_DESC].block)) |
1444 | brelse(bh); | 1648 | return 1; |
1445 | return 1; | 1649 | |
1446 | } | 1650 | if (vds[VDS_POS_LOGICAL_VOL_DESC].block && udf_load_logicalvol(sb, |
1447 | } else if (i == VDS_POS_PARTITION_DESC) { | 1651 | vds[VDS_POS_LOGICAL_VOL_DESC].block, fileset)) |
1448 | struct buffer_head *bh2 = NULL; | 1652 | return 1; |
1449 | if (udf_load_partdesc(sb, bh)) { | 1653 | |
1450 | brelse(bh); | 1654 | if (vds[VDS_POS_PARTITION_DESC].block) { |
1451 | return 1; | 1655 | /* |
1452 | } | 1656 | * We rescan the whole descriptor sequence to find |
1453 | for (j = vds[i].block + 1; | 1657 | * partition descriptor blocks and process them. |
1454 | j < vds[VDS_POS_TERMINATING_DESC].block; | 1658 | */ |
1455 | j++) { | 1659 | for (block = vds[VDS_POS_PARTITION_DESC].block; |
1456 | bh2 = udf_read_tagged(sb, j, j, &ident); | 1660 | block < vds[VDS_POS_TERMINATING_DESC].block; |
1457 | gd = (struct generic_desc *)bh2->b_data; | 1661 | block++) |
1458 | if (ident == TAG_IDENT_PD) | 1662 | if (udf_load_partdesc(sb, block)) |
1459 | if (udf_load_partdesc(sb, | 1663 | return 1; |
1460 | bh2)) { | ||
1461 | brelse(bh); | ||
1462 | brelse(bh2); | ||
1463 | return 1; | ||
1464 | } | ||
1465 | brelse(bh2); | ||
1466 | } | ||
1467 | } | ||
1468 | brelse(bh); | ||
1469 | } | ||
1470 | } | 1664 | } |
1471 | 1665 | ||
1472 | return 0; | 1666 | return 0; |
@@ -1478,6 +1672,7 @@ static int udf_process_sequence(struct super_block *sb, long block, | |||
1478 | static int udf_check_valid(struct super_block *sb, int novrs, int silent) | 1672 | static int udf_check_valid(struct super_block *sb, int novrs, int silent) |
1479 | { | 1673 | { |
1480 | long block; | 1674 | long block; |
1675 | struct udf_sb_info *sbi = UDF_SB(sb); | ||
1481 | 1676 | ||
1482 | if (novrs) { | 1677 | if (novrs) { |
1483 | udf_debug("Validity check skipped because of novrs option\n"); | 1678 | udf_debug("Validity check skipped because of novrs option\n"); |
@@ -1485,27 +1680,22 @@ static int udf_check_valid(struct super_block *sb, int novrs, int silent) | |||
1485 | } | 1680 | } |
1486 | /* Check that it is NSR02 compliant */ | 1681 | /* Check that it is NSR02 compliant */ |
1487 | /* Process any "CD-ROM Volume Descriptor Set" (ECMA 167 2/8.3.1) */ | 1682 | /* Process any "CD-ROM Volume Descriptor Set" (ECMA 167 2/8.3.1) */ |
1488 | else { | 1683 | block = udf_vrs(sb, silent); |
1489 | block = udf_vrs(sb, silent); | 1684 | if (block == -1) |
1490 | if (block == -1) { | 1685 | udf_debug("Failed to read byte 32768. Assuming open " |
1491 | struct udf_sb_info *sbi = UDF_SB(sb); | 1686 | "disc. Skipping validity check\n"); |
1492 | udf_debug("Failed to read byte 32768. Assuming open " | 1687 | if (block && !sbi->s_last_block) |
1493 | "disc. Skipping validity check\n"); | 1688 | sbi->s_last_block = udf_get_last_block(sb); |
1494 | if (!sbi->s_last_block) | 1689 | return !block; |
1495 | sbi->s_last_block = udf_get_last_block(sb); | ||
1496 | return 0; | ||
1497 | } else | ||
1498 | return !block; | ||
1499 | } | ||
1500 | } | 1690 | } |
1501 | 1691 | ||
1502 | static int udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset) | 1692 | static int udf_load_sequence(struct super_block *sb, kernel_lb_addr *fileset) |
1503 | { | 1693 | { |
1504 | struct anchorVolDescPtr *anchor; | 1694 | struct anchorVolDescPtr *anchor; |
1505 | uint16_t ident; | 1695 | uint16_t ident; |
1506 | struct buffer_head *bh; | 1696 | struct buffer_head *bh; |
1507 | long main_s, main_e, reserve_s, reserve_e; | 1697 | long main_s, main_e, reserve_s, reserve_e; |
1508 | int i, j; | 1698 | int i; |
1509 | struct udf_sb_info *sbi; | 1699 | struct udf_sb_info *sbi; |
1510 | 1700 | ||
1511 | if (!sb) | 1701 | if (!sb) |
@@ -1515,6 +1705,7 @@ static int udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset) | |||
1515 | for (i = 0; i < ARRAY_SIZE(sbi->s_anchor); i++) { | 1705 | for (i = 0; i < ARRAY_SIZE(sbi->s_anchor); i++) { |
1516 | if (!sbi->s_anchor[i]) | 1706 | if (!sbi->s_anchor[i]) |
1517 | continue; | 1707 | continue; |
1708 | |||
1518 | bh = udf_read_tagged(sb, sbi->s_anchor[i], sbi->s_anchor[i], | 1709 | bh = udf_read_tagged(sb, sbi->s_anchor[i], sbi->s_anchor[i], |
1519 | &ident); | 1710 | &ident); |
1520 | if (!bh) | 1711 | if (!bh) |
@@ -1553,76 +1744,6 @@ static int udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset) | |||
1553 | } | 1744 | } |
1554 | udf_debug("Using anchor in block %d\n", sbi->s_anchor[i]); | 1745 | udf_debug("Using anchor in block %d\n", sbi->s_anchor[i]); |
1555 | 1746 | ||
1556 | for (i = 0; i < sbi->s_partitions; i++) { | ||
1557 | kernel_lb_addr uninitialized_var(ino); | ||
1558 | struct udf_part_map *map = &sbi->s_partmaps[i]; | ||
1559 | switch (map->s_partition_type) { | ||
1560 | case UDF_VIRTUAL_MAP15: | ||
1561 | case UDF_VIRTUAL_MAP20: | ||
1562 | if (!sbi->s_last_block) { | ||
1563 | sbi->s_last_block = udf_get_last_block(sb); | ||
1564 | udf_find_anchor(sb); | ||
1565 | } | ||
1566 | |||
1567 | if (!sbi->s_last_block) { | ||
1568 | udf_debug("Unable to determine Lastblock (For " | ||
1569 | "Virtual Partition)\n"); | ||
1570 | return 1; | ||
1571 | } | ||
1572 | |||
1573 | for (j = 0; j < sbi->s_partitions; j++) { | ||
1574 | struct udf_part_map *map2 = &sbi->s_partmaps[j]; | ||
1575 | if (j != i && | ||
1576 | map->s_volumeseqnum == | ||
1577 | map2->s_volumeseqnum && | ||
1578 | map->s_partition_num == | ||
1579 | map2->s_partition_num) { | ||
1580 | ino.partitionReferenceNum = j; | ||
1581 | ino.logicalBlockNum = | ||
1582 | sbi->s_last_block - | ||
1583 | map2->s_partition_root; | ||
1584 | break; | ||
1585 | } | ||
1586 | } | ||
1587 | |||
1588 | if (j == sbi->s_partitions) | ||
1589 | return 1; | ||
1590 | |||
1591 | sbi->s_vat_inode = udf_iget(sb, ino); | ||
1592 | if (!sbi->s_vat_inode) | ||
1593 | return 1; | ||
1594 | |||
1595 | if (map->s_partition_type == UDF_VIRTUAL_MAP15) { | ||
1596 | map->s_type_specific.s_virtual.s_start_offset = | ||
1597 | udf_ext0_offset(sbi->s_vat_inode); | ||
1598 | map->s_type_specific.s_virtual.s_num_entries = | ||
1599 | (sbi->s_vat_inode->i_size - 36) >> 2; | ||
1600 | } else if (map->s_partition_type == UDF_VIRTUAL_MAP20) { | ||
1601 | uint32_t pos; | ||
1602 | struct virtualAllocationTable20 *vat20; | ||
1603 | |||
1604 | pos = udf_block_map(sbi->s_vat_inode, 0); | ||
1605 | bh = sb_bread(sb, pos); | ||
1606 | if (!bh) | ||
1607 | return 1; | ||
1608 | vat20 = (struct virtualAllocationTable20 *) | ||
1609 | bh->b_data + | ||
1610 | udf_ext0_offset(sbi->s_vat_inode); | ||
1611 | map->s_type_specific.s_virtual.s_start_offset = | ||
1612 | le16_to_cpu(vat20->lengthHeader) + | ||
1613 | udf_ext0_offset(sbi->s_vat_inode); | ||
1614 | map->s_type_specific.s_virtual.s_num_entries = | ||
1615 | (sbi->s_vat_inode->i_size - | ||
1616 | map->s_type_specific.s_virtual. | ||
1617 | s_start_offset) >> 2; | ||
1618 | brelse(bh); | ||
1619 | } | ||
1620 | map->s_partition_root = udf_get_pblock(sb, 0, i, 0); | ||
1621 | map->s_partition_len = | ||
1622 | sbi->s_partmaps[ino.partitionReferenceNum]. | ||
1623 | s_partition_len; | ||
1624 | } | ||
1625 | } | ||
1626 | return 0; | 1747 | return 0; |
1627 | } | 1748 | } |
1628 | 1749 | ||
@@ -1630,65 +1751,61 @@ static void udf_open_lvid(struct super_block *sb) | |||
1630 | { | 1751 | { |
1631 | struct udf_sb_info *sbi = UDF_SB(sb); | 1752 | struct udf_sb_info *sbi = UDF_SB(sb); |
1632 | struct buffer_head *bh = sbi->s_lvid_bh; | 1753 | struct buffer_head *bh = sbi->s_lvid_bh; |
1633 | if (bh) { | 1754 | struct logicalVolIntegrityDesc *lvid; |
1634 | kernel_timestamp cpu_time; | 1755 | struct logicalVolIntegrityDescImpUse *lvidiu; |
1635 | struct logicalVolIntegrityDesc *lvid = | 1756 | if (!bh) |
1636 | (struct logicalVolIntegrityDesc *)bh->b_data; | 1757 | return; |
1637 | struct logicalVolIntegrityDescImpUse *lvidiu = | ||
1638 | udf_sb_lvidiu(sbi); | ||
1639 | 1758 | ||
1640 | lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; | 1759 | lvid = (struct logicalVolIntegrityDesc *)bh->b_data; |
1641 | lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; | 1760 | lvidiu = udf_sb_lvidiu(sbi); |
1642 | if (udf_time_to_stamp(&cpu_time, CURRENT_TIME)) | ||
1643 | lvid->recordingDateAndTime = cpu_to_lets(cpu_time); | ||
1644 | lvid->integrityType = LVID_INTEGRITY_TYPE_OPEN; | ||
1645 | 1761 | ||
1646 | lvid->descTag.descCRC = cpu_to_le16( | 1762 | lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; |
1647 | udf_crc((char *)lvid + sizeof(tag), | 1763 | lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; |
1648 | le16_to_cpu(lvid->descTag.descCRCLength), | 1764 | udf_time_to_disk_stamp(&lvid->recordingDateAndTime, |
1649 | 0)); | 1765 | CURRENT_TIME); |
1766 | lvid->integrityType = LVID_INTEGRITY_TYPE_OPEN; | ||
1650 | 1767 | ||
1651 | lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag); | 1768 | lvid->descTag.descCRC = cpu_to_le16( |
1652 | mark_buffer_dirty(bh); | 1769 | crc_itu_t(0, (char *)lvid + sizeof(tag), |
1653 | } | 1770 | le16_to_cpu(lvid->descTag.descCRCLength))); |
1771 | |||
1772 | lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag); | ||
1773 | mark_buffer_dirty(bh); | ||
1654 | } | 1774 | } |
1655 | 1775 | ||
1656 | static void udf_close_lvid(struct super_block *sb) | 1776 | static void udf_close_lvid(struct super_block *sb) |
1657 | { | 1777 | { |
1658 | kernel_timestamp cpu_time; | ||
1659 | struct udf_sb_info *sbi = UDF_SB(sb); | 1778 | struct udf_sb_info *sbi = UDF_SB(sb); |
1660 | struct buffer_head *bh = sbi->s_lvid_bh; | 1779 | struct buffer_head *bh = sbi->s_lvid_bh; |
1661 | struct logicalVolIntegrityDesc *lvid; | 1780 | struct logicalVolIntegrityDesc *lvid; |
1781 | struct logicalVolIntegrityDescImpUse *lvidiu; | ||
1662 | 1782 | ||
1663 | if (!bh) | 1783 | if (!bh) |
1664 | return; | 1784 | return; |
1665 | 1785 | ||
1666 | lvid = (struct logicalVolIntegrityDesc *)bh->b_data; | 1786 | lvid = (struct logicalVolIntegrityDesc *)bh->b_data; |
1667 | 1787 | ||
1668 | if (lvid->integrityType == LVID_INTEGRITY_TYPE_OPEN) { | 1788 | if (lvid->integrityType != LVID_INTEGRITY_TYPE_OPEN) |
1669 | struct logicalVolIntegrityDescImpUse *lvidiu = | 1789 | return; |
1670 | udf_sb_lvidiu(sbi); | 1790 | |
1671 | lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; | 1791 | lvidiu = udf_sb_lvidiu(sbi); |
1672 | lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; | 1792 | lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; |
1673 | if (udf_time_to_stamp(&cpu_time, CURRENT_TIME)) | 1793 | lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; |
1674 | lvid->recordingDateAndTime = cpu_to_lets(cpu_time); | 1794 | udf_time_to_disk_stamp(&lvid->recordingDateAndTime, CURRENT_TIME); |
1675 | if (UDF_MAX_WRITE_VERSION > le16_to_cpu(lvidiu->maxUDFWriteRev)) | 1795 | if (UDF_MAX_WRITE_VERSION > le16_to_cpu(lvidiu->maxUDFWriteRev)) |
1676 | lvidiu->maxUDFWriteRev = | 1796 | lvidiu->maxUDFWriteRev = cpu_to_le16(UDF_MAX_WRITE_VERSION); |
1677 | cpu_to_le16(UDF_MAX_WRITE_VERSION); | 1797 | if (sbi->s_udfrev > le16_to_cpu(lvidiu->minUDFReadRev)) |
1678 | if (sbi->s_udfrev > le16_to_cpu(lvidiu->minUDFReadRev)) | 1798 | lvidiu->minUDFReadRev = cpu_to_le16(sbi->s_udfrev); |
1679 | lvidiu->minUDFReadRev = cpu_to_le16(sbi->s_udfrev); | 1799 | if (sbi->s_udfrev > le16_to_cpu(lvidiu->minUDFWriteRev)) |
1680 | if (sbi->s_udfrev > le16_to_cpu(lvidiu->minUDFWriteRev)) | 1800 | lvidiu->minUDFWriteRev = cpu_to_le16(sbi->s_udfrev); |
1681 | lvidiu->minUDFWriteRev = cpu_to_le16(sbi->s_udfrev); | 1801 | lvid->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_CLOSE); |
1682 | lvid->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_CLOSE); | 1802 | |
1683 | 1803 | lvid->descTag.descCRC = cpu_to_le16( | |
1684 | lvid->descTag.descCRC = cpu_to_le16( | 1804 | crc_itu_t(0, (char *)lvid + sizeof(tag), |
1685 | udf_crc((char *)lvid + sizeof(tag), | 1805 | le16_to_cpu(lvid->descTag.descCRCLength))); |
1686 | le16_to_cpu(lvid->descTag.descCRCLength), | 1806 | |
1687 | 0)); | 1807 | lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag); |
1688 | 1808 | mark_buffer_dirty(bh); | |
1689 | lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag); | ||
1690 | mark_buffer_dirty(bh); | ||
1691 | } | ||
1692 | } | 1809 | } |
1693 | 1810 | ||
1694 | static void udf_sb_free_bitmap(struct udf_bitmap *bitmap) | 1811 | static void udf_sb_free_bitmap(struct udf_bitmap *bitmap) |
@@ -1708,22 +1825,35 @@ static void udf_sb_free_bitmap(struct udf_bitmap *bitmap) | |||
1708 | vfree(bitmap); | 1825 | vfree(bitmap); |
1709 | } | 1826 | } |
1710 | 1827 | ||
1711 | /* | 1828 | static void udf_free_partition(struct udf_part_map *map) |
1712 | * udf_read_super | 1829 | { |
1713 | * | 1830 | int i; |
1714 | * PURPOSE | 1831 | struct udf_meta_data *mdata; |
1715 | * Complete the specified super block. | 1832 | |
1716 | * | 1833 | if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) |
1717 | * PRE-CONDITIONS | 1834 | iput(map->s_uspace.s_table); |
1718 | * sb Pointer to superblock to complete - never NULL. | 1835 | if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) |
1719 | * sb->s_dev Device to read suberblock from. | 1836 | iput(map->s_fspace.s_table); |
1720 | * options Pointer to mount options. | 1837 | if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) |
1721 | * silent Silent flag. | 1838 | udf_sb_free_bitmap(map->s_uspace.s_bitmap); |
1722 | * | 1839 | if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) |
1723 | * HISTORY | 1840 | udf_sb_free_bitmap(map->s_fspace.s_bitmap); |
1724 | * July 1, 1997 - Andrew E. Mileski | 1841 | if (map->s_partition_type == UDF_SPARABLE_MAP15) |
1725 | * Written, tested, and released. | 1842 | for (i = 0; i < 4; i++) |
1726 | */ | 1843 | brelse(map->s_type_specific.s_sparing.s_spar_map[i]); |
1844 | else if (map->s_partition_type == UDF_METADATA_MAP25) { | ||
1845 | mdata = &map->s_type_specific.s_metadata; | ||
1846 | iput(mdata->s_metadata_fe); | ||
1847 | mdata->s_metadata_fe = NULL; | ||
1848 | |||
1849 | iput(mdata->s_mirror_fe); | ||
1850 | mdata->s_mirror_fe = NULL; | ||
1851 | |||
1852 | iput(mdata->s_bitmap_fe); | ||
1853 | mdata->s_bitmap_fe = NULL; | ||
1854 | } | ||
1855 | } | ||
1856 | |||
1727 | static int udf_fill_super(struct super_block *sb, void *options, int silent) | 1857 | static int udf_fill_super(struct super_block *sb, void *options, int silent) |
1728 | { | 1858 | { |
1729 | int i; | 1859 | int i; |
@@ -1776,8 +1906,11 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) | |||
1776 | sbi->s_nls_map = uopt.nls_map; | 1906 | sbi->s_nls_map = uopt.nls_map; |
1777 | 1907 | ||
1778 | /* Set the block size for all transfers */ | 1908 | /* Set the block size for all transfers */ |
1779 | if (!udf_set_blocksize(sb, uopt.blocksize)) | 1909 | if (!sb_min_blocksize(sb, uopt.blocksize)) { |
1910 | udf_debug("Bad block size (%d)\n", uopt.blocksize); | ||
1911 | printk(KERN_ERR "udf: bad block size (%d)\n", uopt.blocksize); | ||
1780 | goto error_out; | 1912 | goto error_out; |
1913 | } | ||
1781 | 1914 | ||
1782 | if (uopt.session == 0xFFFFFFFF) | 1915 | if (uopt.session == 0xFFFFFFFF) |
1783 | sbi->s_session = udf_get_last_session(sb); | 1916 | sbi->s_session = udf_get_last_session(sb); |
@@ -1789,7 +1922,6 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) | |||
1789 | sbi->s_last_block = uopt.lastblock; | 1922 | sbi->s_last_block = uopt.lastblock; |
1790 | sbi->s_anchor[0] = sbi->s_anchor[1] = 0; | 1923 | sbi->s_anchor[0] = sbi->s_anchor[1] = 0; |
1791 | sbi->s_anchor[2] = uopt.anchor; | 1924 | sbi->s_anchor[2] = uopt.anchor; |
1792 | sbi->s_anchor[3] = 256; | ||
1793 | 1925 | ||
1794 | if (udf_check_valid(sb, uopt.novrs, silent)) { | 1926 | if (udf_check_valid(sb, uopt.novrs, silent)) { |
1795 | /* read volume recognition sequences */ | 1927 | /* read volume recognition sequences */ |
@@ -1806,7 +1938,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) | |||
1806 | sb->s_magic = UDF_SUPER_MAGIC; | 1938 | sb->s_magic = UDF_SUPER_MAGIC; |
1807 | sb->s_time_gran = 1000; | 1939 | sb->s_time_gran = 1000; |
1808 | 1940 | ||
1809 | if (udf_load_partition(sb, &fileset)) { | 1941 | if (udf_load_sequence(sb, &fileset)) { |
1810 | printk(KERN_WARNING "UDF-fs: No partition found (1)\n"); | 1942 | printk(KERN_WARNING "UDF-fs: No partition found (1)\n"); |
1811 | goto error_out; | 1943 | goto error_out; |
1812 | } | 1944 | } |
@@ -1856,12 +1988,12 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) | |||
1856 | } | 1988 | } |
1857 | 1989 | ||
1858 | if (!silent) { | 1990 | if (!silent) { |
1859 | kernel_timestamp ts; | 1991 | timestamp ts; |
1860 | udf_time_to_stamp(&ts, sbi->s_record_time); | 1992 | udf_time_to_disk_stamp(&ts, sbi->s_record_time); |
1861 | udf_info("UDF: Mounting volume '%s', " | 1993 | udf_info("UDF: Mounting volume '%s', " |
1862 | "timestamp %04u/%02u/%02u %02u:%02u (%x)\n", | 1994 | "timestamp %04u/%02u/%02u %02u:%02u (%x)\n", |
1863 | sbi->s_volume_ident, ts.year, ts.month, ts.day, | 1995 | sbi->s_volume_ident, le16_to_cpu(ts.year), ts.month, ts.day, |
1864 | ts.hour, ts.minute, ts.typeAndTimezone); | 1996 | ts.hour, ts.minute, le16_to_cpu(ts.typeAndTimezone)); |
1865 | } | 1997 | } |
1866 | if (!(sb->s_flags & MS_RDONLY)) | 1998 | if (!(sb->s_flags & MS_RDONLY)) |
1867 | udf_open_lvid(sb); | 1999 | udf_open_lvid(sb); |
@@ -1890,21 +2022,9 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) | |||
1890 | error_out: | 2022 | error_out: |
1891 | if (sbi->s_vat_inode) | 2023 | if (sbi->s_vat_inode) |
1892 | iput(sbi->s_vat_inode); | 2024 | iput(sbi->s_vat_inode); |
1893 | if (sbi->s_partitions) { | 2025 | if (sbi->s_partitions) |
1894 | struct udf_part_map *map = &sbi->s_partmaps[sbi->s_partition]; | 2026 | for (i = 0; i < sbi->s_partitions; i++) |
1895 | if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) | 2027 | udf_free_partition(&sbi->s_partmaps[i]); |
1896 | iput(map->s_uspace.s_table); | ||
1897 | if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) | ||
1898 | iput(map->s_fspace.s_table); | ||
1899 | if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) | ||
1900 | udf_sb_free_bitmap(map->s_uspace.s_bitmap); | ||
1901 | if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) | ||
1902 | udf_sb_free_bitmap(map->s_fspace.s_bitmap); | ||
1903 | if (map->s_partition_type == UDF_SPARABLE_MAP15) | ||
1904 | for (i = 0; i < 4; i++) | ||
1905 | brelse(map->s_type_specific.s_sparing. | ||
1906 | s_spar_map[i]); | ||
1907 | } | ||
1908 | #ifdef CONFIG_UDF_NLS | 2028 | #ifdef CONFIG_UDF_NLS |
1909 | if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) | 2029 | if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) |
1910 | unload_nls(sbi->s_nls_map); | 2030 | unload_nls(sbi->s_nls_map); |
@@ -1920,8 +2040,8 @@ error_out: | |||
1920 | return -EINVAL; | 2040 | return -EINVAL; |
1921 | } | 2041 | } |
1922 | 2042 | ||
1923 | void udf_error(struct super_block *sb, const char *function, | 2043 | static void udf_error(struct super_block *sb, const char *function, |
1924 | const char *fmt, ...) | 2044 | const char *fmt, ...) |
1925 | { | 2045 | { |
1926 | va_list args; | 2046 | va_list args; |
1927 | 2047 | ||
@@ -1948,19 +2068,6 @@ void udf_warning(struct super_block *sb, const char *function, | |||
1948 | sb->s_id, function, error_buf); | 2068 | sb->s_id, function, error_buf); |
1949 | } | 2069 | } |
1950 | 2070 | ||
1951 | /* | ||
1952 | * udf_put_super | ||
1953 | * | ||
1954 | * PURPOSE | ||
1955 | * Prepare for destruction of the superblock. | ||
1956 | * | ||
1957 | * DESCRIPTION | ||
1958 | * Called before the filesystem is unmounted. | ||
1959 | * | ||
1960 | * HISTORY | ||
1961 | * July 1, 1997 - Andrew E. Mileski | ||
1962 | * Written, tested, and released. | ||
1963 | */ | ||
1964 | static void udf_put_super(struct super_block *sb) | 2071 | static void udf_put_super(struct super_block *sb) |
1965 | { | 2072 | { |
1966 | int i; | 2073 | int i; |
@@ -1969,21 +2076,9 @@ static void udf_put_super(struct super_block *sb) | |||
1969 | sbi = UDF_SB(sb); | 2076 | sbi = UDF_SB(sb); |
1970 | if (sbi->s_vat_inode) | 2077 | if (sbi->s_vat_inode) |
1971 | iput(sbi->s_vat_inode); | 2078 | iput(sbi->s_vat_inode); |
1972 | if (sbi->s_partitions) { | 2079 | if (sbi->s_partitions) |
1973 | struct udf_part_map *map = &sbi->s_partmaps[sbi->s_partition]; | 2080 | for (i = 0; i < sbi->s_partitions; i++) |
1974 | if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE) | 2081 | udf_free_partition(&sbi->s_partmaps[i]); |
1975 | iput(map->s_uspace.s_table); | ||
1976 | if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE) | ||
1977 | iput(map->s_fspace.s_table); | ||
1978 | if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) | ||
1979 | udf_sb_free_bitmap(map->s_uspace.s_bitmap); | ||
1980 | if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP) | ||
1981 | udf_sb_free_bitmap(map->s_fspace.s_bitmap); | ||
1982 | if (map->s_partition_type == UDF_SPARABLE_MAP15) | ||
1983 | for (i = 0; i < 4; i++) | ||
1984 | brelse(map->s_type_specific.s_sparing. | ||
1985 | s_spar_map[i]); | ||
1986 | } | ||
1987 | #ifdef CONFIG_UDF_NLS | 2082 | #ifdef CONFIG_UDF_NLS |
1988 | if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) | 2083 | if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) |
1989 | unload_nls(sbi->s_nls_map); | 2084 | unload_nls(sbi->s_nls_map); |
@@ -1996,19 +2091,6 @@ static void udf_put_super(struct super_block *sb) | |||
1996 | sb->s_fs_info = NULL; | 2091 | sb->s_fs_info = NULL; |
1997 | } | 2092 | } |
1998 | 2093 | ||
1999 | /* | ||
2000 | * udf_stat_fs | ||
2001 | * | ||
2002 | * PURPOSE | ||
2003 | * Return info about the filesystem. | ||
2004 | * | ||
2005 | * DESCRIPTION | ||
2006 | * Called by sys_statfs() | ||
2007 | * | ||
2008 | * HISTORY | ||
2009 | * July 1, 1997 - Andrew E. Mileski | ||
2010 | * Written, tested, and released. | ||
2011 | */ | ||
2012 | static int udf_statfs(struct dentry *dentry, struct kstatfs *buf) | 2094 | static int udf_statfs(struct dentry *dentry, struct kstatfs *buf) |
2013 | { | 2095 | { |
2014 | struct super_block *sb = dentry->d_sb; | 2096 | struct super_block *sb = dentry->d_sb; |
@@ -2035,10 +2117,6 @@ static int udf_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
2035 | return 0; | 2117 | return 0; |
2036 | } | 2118 | } |
2037 | 2119 | ||
2038 | static unsigned char udf_bitmap_lookup[16] = { | ||
2039 | 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 | ||
2040 | }; | ||
2041 | |||
2042 | static unsigned int udf_count_free_bitmap(struct super_block *sb, | 2120 | static unsigned int udf_count_free_bitmap(struct super_block *sb, |
2043 | struct udf_bitmap *bitmap) | 2121 | struct udf_bitmap *bitmap) |
2044 | { | 2122 | { |
@@ -2048,7 +2126,6 @@ static unsigned int udf_count_free_bitmap(struct super_block *sb, | |||
2048 | int block = 0, newblock; | 2126 | int block = 0, newblock; |
2049 | kernel_lb_addr loc; | 2127 | kernel_lb_addr loc; |
2050 | uint32_t bytes; | 2128 | uint32_t bytes; |
2051 | uint8_t value; | ||
2052 | uint8_t *ptr; | 2129 | uint8_t *ptr; |
2053 | uint16_t ident; | 2130 | uint16_t ident; |
2054 | struct spaceBitmapDesc *bm; | 2131 | struct spaceBitmapDesc *bm; |
@@ -2074,13 +2151,10 @@ static unsigned int udf_count_free_bitmap(struct super_block *sb, | |||
2074 | ptr = (uint8_t *)bh->b_data; | 2151 | ptr = (uint8_t *)bh->b_data; |
2075 | 2152 | ||
2076 | while (bytes > 0) { | 2153 | while (bytes > 0) { |
2077 | while ((bytes > 0) && (index < sb->s_blocksize)) { | 2154 | u32 cur_bytes = min_t(u32, bytes, sb->s_blocksize - index); |
2078 | value = ptr[index]; | 2155 | accum += bitmap_weight((const unsigned long *)(ptr + index), |
2079 | accum += udf_bitmap_lookup[value & 0x0f]; | 2156 | cur_bytes * 8); |
2080 | accum += udf_bitmap_lookup[value >> 4]; | 2157 | bytes -= cur_bytes; |
2081 | index++; | ||
2082 | bytes--; | ||
2083 | } | ||
2084 | if (bytes) { | 2158 | if (bytes) { |
2085 | brelse(bh); | 2159 | brelse(bh); |
2086 | newblock = udf_get_lb_pblock(sb, loc, ++block); | 2160 | newblock = udf_get_lb_pblock(sb, loc, ++block); |
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c index 6ec99221e50c..c3265e1385d4 100644 --- a/fs/udf/symlink.c +++ b/fs/udf/symlink.c | |||
@@ -23,7 +23,6 @@ | |||
23 | #include <asm/uaccess.h> | 23 | #include <asm/uaccess.h> |
24 | #include <linux/errno.h> | 24 | #include <linux/errno.h> |
25 | #include <linux/fs.h> | 25 | #include <linux/fs.h> |
26 | #include <linux/udf_fs.h> | ||
27 | #include <linux/time.h> | 26 | #include <linux/time.h> |
28 | #include <linux/mm.h> | 27 | #include <linux/mm.h> |
29 | #include <linux/stat.h> | 28 | #include <linux/stat.h> |
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c index fe61be17cdab..65e19b4f9424 100644 --- a/fs/udf/truncate.c +++ b/fs/udf/truncate.c | |||
@@ -22,7 +22,6 @@ | |||
22 | #include "udfdecl.h" | 22 | #include "udfdecl.h" |
23 | #include <linux/fs.h> | 23 | #include <linux/fs.h> |
24 | #include <linux/mm.h> | 24 | #include <linux/mm.h> |
25 | #include <linux/udf_fs.h> | ||
26 | #include <linux/buffer_head.h> | 25 | #include <linux/buffer_head.h> |
27 | 26 | ||
28 | #include "udf_i.h" | 27 | #include "udf_i.h" |
@@ -180,6 +179,24 @@ void udf_discard_prealloc(struct inode *inode) | |||
180 | brelse(epos.bh); | 179 | brelse(epos.bh); |
181 | } | 180 | } |
182 | 181 | ||
182 | static void udf_update_alloc_ext_desc(struct inode *inode, | ||
183 | struct extent_position *epos, | ||
184 | u32 lenalloc) | ||
185 | { | ||
186 | struct super_block *sb = inode->i_sb; | ||
187 | struct udf_sb_info *sbi = UDF_SB(sb); | ||
188 | |||
189 | struct allocExtDesc *aed = (struct allocExtDesc *) (epos->bh->b_data); | ||
190 | int len = sizeof(struct allocExtDesc); | ||
191 | |||
192 | aed->lengthAllocDescs = cpu_to_le32(lenalloc); | ||
193 | if (!UDF_QUERY_FLAG(sb, UDF_FLAG_STRICT) || sbi->s_udfrev >= 0x0201) | ||
194 | len += lenalloc; | ||
195 | |||
196 | udf_update_tag(epos->bh->b_data, len); | ||
197 | mark_buffer_dirty_inode(epos->bh, inode); | ||
198 | } | ||
199 | |||
183 | void udf_truncate_extents(struct inode *inode) | 200 | void udf_truncate_extents(struct inode *inode) |
184 | { | 201 | { |
185 | struct extent_position epos; | 202 | struct extent_position epos; |
@@ -187,7 +204,6 @@ void udf_truncate_extents(struct inode *inode) | |||
187 | uint32_t elen, nelen = 0, indirect_ext_len = 0, lenalloc; | 204 | uint32_t elen, nelen = 0, indirect_ext_len = 0, lenalloc; |
188 | int8_t etype; | 205 | int8_t etype; |
189 | struct super_block *sb = inode->i_sb; | 206 | struct super_block *sb = inode->i_sb; |
190 | struct udf_sb_info *sbi = UDF_SB(sb); | ||
191 | sector_t first_block = inode->i_size >> sb->s_blocksize_bits, offset; | 207 | sector_t first_block = inode->i_size >> sb->s_blocksize_bits, offset; |
192 | loff_t byte_offset; | 208 | loff_t byte_offset; |
193 | int adsize; | 209 | int adsize; |
@@ -224,35 +240,15 @@ void udf_truncate_extents(struct inode *inode) | |||
224 | if (indirect_ext_len) { | 240 | if (indirect_ext_len) { |
225 | /* We managed to free all extents in the | 241 | /* We managed to free all extents in the |
226 | * indirect extent - free it too */ | 242 | * indirect extent - free it too */ |
227 | if (!epos.bh) | 243 | BUG_ON(!epos.bh); |
228 | BUG(); | ||
229 | udf_free_blocks(sb, inode, epos.block, | 244 | udf_free_blocks(sb, inode, epos.block, |
230 | 0, indirect_ext_len); | 245 | 0, indirect_ext_len); |
231 | } else { | 246 | } else if (!epos.bh) { |
232 | if (!epos.bh) { | 247 | iinfo->i_lenAlloc = lenalloc; |
233 | iinfo->i_lenAlloc = | 248 | mark_inode_dirty(inode); |
234 | lenalloc; | 249 | } else |
235 | mark_inode_dirty(inode); | 250 | udf_update_alloc_ext_desc(inode, |
236 | } else { | 251 | &epos, lenalloc); |
237 | struct allocExtDesc *aed = | ||
238 | (struct allocExtDesc *) | ||
239 | (epos.bh->b_data); | ||
240 | int len = | ||
241 | sizeof(struct allocExtDesc); | ||
242 | |||
243 | aed->lengthAllocDescs = | ||
244 | cpu_to_le32(lenalloc); | ||
245 | if (!UDF_QUERY_FLAG(sb, | ||
246 | UDF_FLAG_STRICT) || | ||
247 | sbi->s_udfrev >= 0x0201) | ||
248 | len += lenalloc; | ||
249 | |||
250 | udf_update_tag(epos.bh->b_data, | ||
251 | len); | ||
252 | mark_buffer_dirty_inode( | ||
253 | epos.bh, inode); | ||
254 | } | ||
255 | } | ||
256 | brelse(epos.bh); | 252 | brelse(epos.bh); |
257 | epos.offset = sizeof(struct allocExtDesc); | 253 | epos.offset = sizeof(struct allocExtDesc); |
258 | epos.block = eloc; | 254 | epos.block = eloc; |
@@ -272,29 +268,14 @@ void udf_truncate_extents(struct inode *inode) | |||
272 | } | 268 | } |
273 | 269 | ||
274 | if (indirect_ext_len) { | 270 | if (indirect_ext_len) { |
275 | if (!epos.bh) | 271 | BUG_ON(!epos.bh); |
276 | BUG(); | ||
277 | udf_free_blocks(sb, inode, epos.block, 0, | 272 | udf_free_blocks(sb, inode, epos.block, 0, |
278 | indirect_ext_len); | 273 | indirect_ext_len); |
279 | } else { | 274 | } else if (!epos.bh) { |
280 | if (!epos.bh) { | 275 | iinfo->i_lenAlloc = lenalloc; |
281 | iinfo->i_lenAlloc = lenalloc; | 276 | mark_inode_dirty(inode); |
282 | mark_inode_dirty(inode); | 277 | } else |
283 | } else { | 278 | udf_update_alloc_ext_desc(inode, &epos, lenalloc); |
284 | struct allocExtDesc *aed = | ||
285 | (struct allocExtDesc *)(epos.bh->b_data); | ||
286 | aed->lengthAllocDescs = cpu_to_le32(lenalloc); | ||
287 | if (!UDF_QUERY_FLAG(sb, UDF_FLAG_STRICT) || | ||
288 | sbi->s_udfrev >= 0x0201) | ||
289 | udf_update_tag(epos.bh->b_data, | ||
290 | lenalloc + | ||
291 | sizeof(struct allocExtDesc)); | ||
292 | else | ||
293 | udf_update_tag(epos.bh->b_data, | ||
294 | sizeof(struct allocExtDesc)); | ||
295 | mark_buffer_dirty_inode(epos.bh, inode); | ||
296 | } | ||
297 | } | ||
298 | } else if (inode->i_size) { | 279 | } else if (inode->i_size) { |
299 | if (byte_offset) { | 280 | if (byte_offset) { |
300 | kernel_long_ad extent; | 281 | kernel_long_ad extent; |
diff --git a/fs/udf/udf_i.h b/fs/udf/udf_i.h index ccc52f16bf7d..4f86b1d98a5d 100644 --- a/fs/udf/udf_i.h +++ b/fs/udf/udf_i.h | |||
@@ -1,10 +1,32 @@ | |||
1 | #ifndef __LINUX_UDF_I_H | 1 | #ifndef _UDF_I_H |
2 | #define __LINUX_UDF_I_H | 2 | #define _UDF_I_H |
3 | |||
4 | struct udf_inode_info { | ||
5 | struct timespec i_crtime; | ||
6 | /* Physical address of inode */ | ||
7 | kernel_lb_addr i_location; | ||
8 | __u64 i_unique; | ||
9 | __u32 i_lenEAttr; | ||
10 | __u32 i_lenAlloc; | ||
11 | __u64 i_lenExtents; | ||
12 | __u32 i_next_alloc_block; | ||
13 | __u32 i_next_alloc_goal; | ||
14 | unsigned i_alloc_type : 3; | ||
15 | unsigned i_efe : 1; /* extendedFileEntry */ | ||
16 | unsigned i_use : 1; /* unallocSpaceEntry */ | ||
17 | unsigned i_strat4096 : 1; | ||
18 | unsigned reserved : 26; | ||
19 | union { | ||
20 | short_ad *i_sad; | ||
21 | long_ad *i_lad; | ||
22 | __u8 *i_data; | ||
23 | } i_ext; | ||
24 | struct inode vfs_inode; | ||
25 | }; | ||
3 | 26 | ||
4 | #include <linux/udf_fs_i.h> | ||
5 | static inline struct udf_inode_info *UDF_I(struct inode *inode) | 27 | static inline struct udf_inode_info *UDF_I(struct inode *inode) |
6 | { | 28 | { |
7 | return list_entry(inode, struct udf_inode_info, vfs_inode); | 29 | return list_entry(inode, struct udf_inode_info, vfs_inode); |
8 | } | 30 | } |
9 | 31 | ||
10 | #endif /* !defined(_LINUX_UDF_I_H) */ | 32 | #endif /* _UDF_I_H) */ |
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h index 737d1c604eea..1c1c514a9725 100644 --- a/fs/udf/udf_sb.h +++ b/fs/udf/udf_sb.h | |||
@@ -1,10 +1,12 @@ | |||
1 | #ifndef __LINUX_UDF_SB_H | 1 | #ifndef __LINUX_UDF_SB_H |
2 | #define __LINUX_UDF_SB_H | 2 | #define __LINUX_UDF_SB_H |
3 | 3 | ||
4 | #include <linux/mutex.h> | ||
5 | |||
4 | /* Since UDF 2.01 is ISO 13346 based... */ | 6 | /* Since UDF 2.01 is ISO 13346 based... */ |
5 | #define UDF_SUPER_MAGIC 0x15013346 | 7 | #define UDF_SUPER_MAGIC 0x15013346 |
6 | 8 | ||
7 | #define UDF_MAX_READ_VERSION 0x0201 | 9 | #define UDF_MAX_READ_VERSION 0x0250 |
8 | #define UDF_MAX_WRITE_VERSION 0x0201 | 10 | #define UDF_MAX_WRITE_VERSION 0x0201 |
9 | 11 | ||
10 | #define UDF_FLAG_USE_EXTENDED_FE 0 | 12 | #define UDF_FLAG_USE_EXTENDED_FE 0 |
@@ -38,6 +40,111 @@ | |||
38 | #define UDF_PART_FLAG_REWRITABLE 0x0040 | 40 | #define UDF_PART_FLAG_REWRITABLE 0x0040 |
39 | #define UDF_PART_FLAG_OVERWRITABLE 0x0080 | 41 | #define UDF_PART_FLAG_OVERWRITABLE 0x0080 |
40 | 42 | ||
43 | #define UDF_MAX_BLOCK_LOADED 8 | ||
44 | |||
45 | #define UDF_TYPE1_MAP15 0x1511U | ||
46 | #define UDF_VIRTUAL_MAP15 0x1512U | ||
47 | #define UDF_VIRTUAL_MAP20 0x2012U | ||
48 | #define UDF_SPARABLE_MAP15 0x1522U | ||
49 | #define UDF_METADATA_MAP25 0x2511U | ||
50 | |||
51 | #pragma pack(1) /* XXX(hch): Why? This file just defines in-core structures */ | ||
52 | |||
53 | struct udf_meta_data { | ||
54 | __u32 s_meta_file_loc; | ||
55 | __u32 s_mirror_file_loc; | ||
56 | __u32 s_bitmap_file_loc; | ||
57 | __u32 s_alloc_unit_size; | ||
58 | __u16 s_align_unit_size; | ||
59 | __u8 s_dup_md_flag; | ||
60 | struct inode *s_metadata_fe; | ||
61 | struct inode *s_mirror_fe; | ||
62 | struct inode *s_bitmap_fe; | ||
63 | }; | ||
64 | |||
65 | struct udf_sparing_data { | ||
66 | __u16 s_packet_len; | ||
67 | struct buffer_head *s_spar_map[4]; | ||
68 | }; | ||
69 | |||
70 | struct udf_virtual_data { | ||
71 | __u32 s_num_entries; | ||
72 | __u16 s_start_offset; | ||
73 | }; | ||
74 | |||
75 | struct udf_bitmap { | ||
76 | __u32 s_extLength; | ||
77 | __u32 s_extPosition; | ||
78 | __u16 s_nr_groups; | ||
79 | struct buffer_head **s_block_bitmap; | ||
80 | }; | ||
81 | |||
82 | struct udf_part_map { | ||
83 | union { | ||
84 | struct udf_bitmap *s_bitmap; | ||
85 | struct inode *s_table; | ||
86 | } s_uspace; | ||
87 | union { | ||
88 | struct udf_bitmap *s_bitmap; | ||
89 | struct inode *s_table; | ||
90 | } s_fspace; | ||
91 | __u32 s_partition_root; | ||
92 | __u32 s_partition_len; | ||
93 | __u16 s_partition_type; | ||
94 | __u16 s_partition_num; | ||
95 | union { | ||
96 | struct udf_sparing_data s_sparing; | ||
97 | struct udf_virtual_data s_virtual; | ||
98 | struct udf_meta_data s_metadata; | ||
99 | } s_type_specific; | ||
100 | __u32 (*s_partition_func)(struct super_block *, __u32, __u16, __u32); | ||
101 | __u16 s_volumeseqnum; | ||
102 | __u16 s_partition_flags; | ||
103 | }; | ||
104 | |||
105 | #pragma pack() | ||
106 | |||
107 | struct udf_sb_info { | ||
108 | struct udf_part_map *s_partmaps; | ||
109 | __u8 s_volume_ident[32]; | ||
110 | |||
111 | /* Overall info */ | ||
112 | __u16 s_partitions; | ||
113 | __u16 s_partition; | ||
114 | |||
115 | /* Sector headers */ | ||
116 | __s32 s_session; | ||
117 | __u32 s_anchor[3]; | ||
118 | __u32 s_last_block; | ||
119 | |||
120 | struct buffer_head *s_lvid_bh; | ||
121 | |||
122 | /* Default permissions */ | ||
123 | mode_t s_umask; | ||
124 | gid_t s_gid; | ||
125 | uid_t s_uid; | ||
126 | |||
127 | /* Root Info */ | ||
128 | struct timespec s_record_time; | ||
129 | |||
130 | /* Fileset Info */ | ||
131 | __u16 s_serial_number; | ||
132 | |||
133 | /* highest UDF revision we have recorded to this media */ | ||
134 | __u16 s_udfrev; | ||
135 | |||
136 | /* Miscellaneous flags */ | ||
137 | __u32 s_flags; | ||
138 | |||
139 | /* Encoding info */ | ||
140 | struct nls_table *s_nls_map; | ||
141 | |||
142 | /* VAT inode */ | ||
143 | struct inode *s_vat_inode; | ||
144 | |||
145 | struct mutex s_alloc_mutex; | ||
146 | }; | ||
147 | |||
41 | static inline struct udf_sb_info *UDF_SB(struct super_block *sb) | 148 | static inline struct udf_sb_info *UDF_SB(struct super_block *sb) |
42 | { | 149 | { |
43 | return sb->s_fs_info; | 150 | return sb->s_fs_info; |
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index 681dc2b66cdb..f3f45d029277 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h | |||
@@ -1,17 +1,37 @@ | |||
1 | #ifndef __UDF_DECL_H | 1 | #ifndef __UDF_DECL_H |
2 | #define __UDF_DECL_H | 2 | #define __UDF_DECL_H |
3 | 3 | ||
4 | #include <linux/udf_fs.h> | ||
5 | #include "ecma_167.h" | 4 | #include "ecma_167.h" |
6 | #include "osta_udf.h" | 5 | #include "osta_udf.h" |
7 | 6 | ||
8 | #include <linux/fs.h> | 7 | #include <linux/fs.h> |
9 | #include <linux/types.h> | 8 | #include <linux/types.h> |
10 | #include <linux/udf_fs_i.h> | ||
11 | #include <linux/udf_fs_sb.h> | ||
12 | #include <linux/buffer_head.h> | 9 | #include <linux/buffer_head.h> |
10 | #include <linux/udf_fs_i.h> | ||
13 | 11 | ||
12 | #include "udf_sb.h" | ||
14 | #include "udfend.h" | 13 | #include "udfend.h" |
14 | #include "udf_i.h" | ||
15 | |||
16 | #define UDF_PREALLOCATE | ||
17 | #define UDF_DEFAULT_PREALLOC_BLOCKS 8 | ||
18 | |||
19 | #define UDFFS_DEBUG | ||
20 | |||
21 | #ifdef UDFFS_DEBUG | ||
22 | #define udf_debug(f, a...) \ | ||
23 | do { \ | ||
24 | printk(KERN_DEBUG "UDF-fs DEBUG %s:%d:%s: ", \ | ||
25 | __FILE__, __LINE__, __func__); \ | ||
26 | printk(f, ##a); \ | ||
27 | } while (0) | ||
28 | #else | ||
29 | #define udf_debug(f, a...) /**/ | ||
30 | #endif | ||
31 | |||
32 | #define udf_info(f, a...) \ | ||
33 | printk(KERN_INFO "UDF-fs INFO " f, ##a); | ||
34 | |||
15 | 35 | ||
16 | #define udf_fixed_to_variable(x) ( ( ( (x) >> 5 ) * 39 ) + ( (x) & 0x0000001F ) ) | 36 | #define udf_fixed_to_variable(x) ( ( ( (x) >> 5 ) * 39 ) + ( (x) & 0x0000001F ) ) |
17 | #define udf_variable_to_fixed(x) ( ( ( (x) / 39 ) << 5 ) + ( (x) % 39 ) ) | 37 | #define udf_variable_to_fixed(x) ( ( ( (x) / 39 ) << 5 ) + ( (x) % 39 ) ) |
@@ -23,16 +43,24 @@ | |||
23 | #define UDF_NAME_LEN 256 | 43 | #define UDF_NAME_LEN 256 |
24 | #define UDF_PATH_LEN 1023 | 44 | #define UDF_PATH_LEN 1023 |
25 | 45 | ||
26 | #define udf_file_entry_alloc_offset(inode)\ | 46 | static inline size_t udf_file_entry_alloc_offset(struct inode *inode) |
27 | (UDF_I(inode)->i_use ?\ | 47 | { |
28 | sizeof(struct unallocSpaceEntry) :\ | 48 | struct udf_inode_info *iinfo = UDF_I(inode); |
29 | ((UDF_I(inode)->i_efe ?\ | 49 | if (iinfo->i_use) |
30 | sizeof(struct extendedFileEntry) :\ | 50 | return sizeof(struct unallocSpaceEntry); |
31 | sizeof(struct fileEntry)) + UDF_I(inode)->i_lenEAttr)) | 51 | else if (iinfo->i_efe) |
32 | 52 | return sizeof(struct extendedFileEntry) + iinfo->i_lenEAttr; | |
33 | #define udf_ext0_offset(inode)\ | 53 | else |
34 | (UDF_I(inode)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB ?\ | 54 | return sizeof(struct fileEntry) + iinfo->i_lenEAttr; |
35 | udf_file_entry_alloc_offset(inode) : 0) | 55 | } |
56 | |||
57 | static inline size_t udf_ext0_offset(struct inode *inode) | ||
58 | { | ||
59 | if (UDF_I(inode)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) | ||
60 | return udf_file_entry_alloc_offset(inode); | ||
61 | else | ||
62 | return 0; | ||
63 | } | ||
36 | 64 | ||
37 | #define udf_get_lb_pblock(sb,loc,offset) udf_get_pblock((sb), (loc).logicalBlockNum, (loc).partitionReferenceNum, (offset)) | 65 | #define udf_get_lb_pblock(sb,loc,offset) udf_get_pblock((sb), (loc).logicalBlockNum, (loc).partitionReferenceNum, (offset)) |
38 | 66 | ||
@@ -83,7 +111,6 @@ struct extent_position { | |||
83 | }; | 111 | }; |
84 | 112 | ||
85 | /* super.c */ | 113 | /* super.c */ |
86 | extern void udf_error(struct super_block *, const char *, const char *, ...); | ||
87 | extern void udf_warning(struct super_block *, const char *, const char *, ...); | 114 | extern void udf_warning(struct super_block *, const char *, const char *, ...); |
88 | 115 | ||
89 | /* namei.c */ | 116 | /* namei.c */ |
@@ -150,6 +177,8 @@ extern uint32_t udf_get_pblock_virt20(struct super_block *, uint32_t, uint16_t, | |||
150 | uint32_t); | 177 | uint32_t); |
151 | extern uint32_t udf_get_pblock_spar15(struct super_block *, uint32_t, uint16_t, | 178 | extern uint32_t udf_get_pblock_spar15(struct super_block *, uint32_t, uint16_t, |
152 | uint32_t); | 179 | uint32_t); |
180 | extern uint32_t udf_get_pblock_meta25(struct super_block *, uint32_t, uint16_t, | ||
181 | uint32_t); | ||
153 | extern int udf_relocate_blocks(struct super_block *, long, long *); | 182 | extern int udf_relocate_blocks(struct super_block *, long, long *); |
154 | 183 | ||
155 | /* unicode.c */ | 184 | /* unicode.c */ |
@@ -157,7 +186,7 @@ extern int udf_get_filename(struct super_block *, uint8_t *, uint8_t *, int); | |||
157 | extern int udf_put_filename(struct super_block *, const uint8_t *, uint8_t *, | 186 | extern int udf_put_filename(struct super_block *, const uint8_t *, uint8_t *, |
158 | int); | 187 | int); |
159 | extern int udf_build_ustr(struct ustr *, dstring *, int); | 188 | extern int udf_build_ustr(struct ustr *, dstring *, int); |
160 | extern int udf_CS0toUTF8(struct ustr *, struct ustr *); | 189 | extern int udf_CS0toUTF8(struct ustr *, const struct ustr *); |
161 | 190 | ||
162 | /* ialloc.c */ | 191 | /* ialloc.c */ |
163 | extern void udf_free_inode(struct inode *); | 192 | extern void udf_free_inode(struct inode *); |
@@ -191,11 +220,9 @@ extern struct fileIdentDesc *udf_get_fileident(void *buffer, int bufsize, | |||
191 | extern long_ad *udf_get_filelongad(uint8_t *, int, uint32_t *, int); | 220 | extern long_ad *udf_get_filelongad(uint8_t *, int, uint32_t *, int); |
192 | extern short_ad *udf_get_fileshortad(uint8_t *, int, uint32_t *, int); | 221 | extern short_ad *udf_get_fileshortad(uint8_t *, int, uint32_t *, int); |
193 | 222 | ||
194 | /* crc.c */ | ||
195 | extern uint16_t udf_crc(uint8_t *, uint32_t, uint16_t); | ||
196 | |||
197 | /* udftime.c */ | 223 | /* udftime.c */ |
198 | extern time_t *udf_stamp_to_time(time_t *, long *, kernel_timestamp); | 224 | extern struct timespec *udf_disk_stamp_to_time(struct timespec *dest, |
199 | extern kernel_timestamp *udf_time_to_stamp(kernel_timestamp *, struct timespec); | 225 | timestamp src); |
226 | extern timestamp *udf_time_to_disk_stamp(timestamp *dest, struct timespec src); | ||
200 | 227 | ||
201 | #endif /* __UDF_DECL_H */ | 228 | #endif /* __UDF_DECL_H */ |
diff --git a/fs/udf/udfend.h b/fs/udf/udfend.h index c4bd1203f857..489f52fb428c 100644 --- a/fs/udf/udfend.h +++ b/fs/udf/udfend.h | |||
@@ -24,17 +24,6 @@ static inline lb_addr cpu_to_lelb(kernel_lb_addr in) | |||
24 | return out; | 24 | return out; |
25 | } | 25 | } |
26 | 26 | ||
27 | static inline kernel_timestamp lets_to_cpu(timestamp in) | ||
28 | { | ||
29 | kernel_timestamp out; | ||
30 | |||
31 | memcpy(&out, &in, sizeof(timestamp)); | ||
32 | out.typeAndTimezone = le16_to_cpu(in.typeAndTimezone); | ||
33 | out.year = le16_to_cpu(in.year); | ||
34 | |||
35 | return out; | ||
36 | } | ||
37 | |||
38 | static inline short_ad lesa_to_cpu(short_ad in) | 27 | static inline short_ad lesa_to_cpu(short_ad in) |
39 | { | 28 | { |
40 | short_ad out; | 29 | short_ad out; |
@@ -85,15 +74,4 @@ static inline kernel_extent_ad leea_to_cpu(extent_ad in) | |||
85 | return out; | 74 | return out; |
86 | } | 75 | } |
87 | 76 | ||
88 | static inline timestamp cpu_to_lets(kernel_timestamp in) | ||
89 | { | ||
90 | timestamp out; | ||
91 | |||
92 | memcpy(&out, &in, sizeof(timestamp)); | ||
93 | out.typeAndTimezone = cpu_to_le16(in.typeAndTimezone); | ||
94 | out.year = cpu_to_le16(in.year); | ||
95 | |||
96 | return out; | ||
97 | } | ||
98 | |||
99 | #endif /* __UDF_ENDIAN_H */ | 77 | #endif /* __UDF_ENDIAN_H */ |
diff --git a/fs/udf/udftime.c b/fs/udf/udftime.c index ce595732ba6f..5f811655c9b5 100644 --- a/fs/udf/udftime.c +++ b/fs/udf/udftime.c | |||
@@ -85,39 +85,38 @@ extern struct timezone sys_tz; | |||
85 | #define SECS_PER_HOUR (60 * 60) | 85 | #define SECS_PER_HOUR (60 * 60) |
86 | #define SECS_PER_DAY (SECS_PER_HOUR * 24) | 86 | #define SECS_PER_DAY (SECS_PER_HOUR * 24) |
87 | 87 | ||
88 | time_t *udf_stamp_to_time(time_t *dest, long *dest_usec, kernel_timestamp src) | 88 | struct timespec *udf_disk_stamp_to_time(struct timespec *dest, timestamp src) |
89 | { | 89 | { |
90 | int yday; | 90 | int yday; |
91 | uint8_t type = src.typeAndTimezone >> 12; | 91 | u16 typeAndTimezone = le16_to_cpu(src.typeAndTimezone); |
92 | u16 year = le16_to_cpu(src.year); | ||
93 | uint8_t type = typeAndTimezone >> 12; | ||
92 | int16_t offset; | 94 | int16_t offset; |
93 | 95 | ||
94 | if (type == 1) { | 96 | if (type == 1) { |
95 | offset = src.typeAndTimezone << 4; | 97 | offset = typeAndTimezone << 4; |
96 | /* sign extent offset */ | 98 | /* sign extent offset */ |
97 | offset = (offset >> 4); | 99 | offset = (offset >> 4); |
98 | if (offset == -2047) /* unspecified offset */ | 100 | if (offset == -2047) /* unspecified offset */ |
99 | offset = 0; | 101 | offset = 0; |
100 | } else { | 102 | } else |
101 | offset = 0; | 103 | offset = 0; |
102 | } | ||
103 | 104 | ||
104 | if ((src.year < EPOCH_YEAR) || | 105 | if ((year < EPOCH_YEAR) || |
105 | (src.year >= EPOCH_YEAR + MAX_YEAR_SECONDS)) { | 106 | (year >= EPOCH_YEAR + MAX_YEAR_SECONDS)) { |
106 | *dest = -1; | ||
107 | *dest_usec = -1; | ||
108 | return NULL; | 107 | return NULL; |
109 | } | 108 | } |
110 | *dest = year_seconds[src.year - EPOCH_YEAR]; | 109 | dest->tv_sec = year_seconds[year - EPOCH_YEAR]; |
111 | *dest -= offset * 60; | 110 | dest->tv_sec -= offset * 60; |
112 | 111 | ||
113 | yday = ((__mon_yday[__isleap(src.year)][src.month - 1]) + src.day - 1); | 112 | yday = ((__mon_yday[__isleap(year)][src.month - 1]) + src.day - 1); |
114 | *dest += (((yday * 24) + src.hour) * 60 + src.minute) * 60 + src.second; | 113 | dest->tv_sec += (((yday * 24) + src.hour) * 60 + src.minute) * 60 + src.second; |
115 | *dest_usec = src.centiseconds * 10000 + | 114 | dest->tv_nsec = 1000 * (src.centiseconds * 10000 + |
116 | src.hundredsOfMicroseconds * 100 + src.microseconds; | 115 | src.hundredsOfMicroseconds * 100 + src.microseconds); |
117 | return dest; | 116 | return dest; |
118 | } | 117 | } |
119 | 118 | ||
120 | kernel_timestamp *udf_time_to_stamp(kernel_timestamp *dest, struct timespec ts) | 119 | timestamp *udf_time_to_disk_stamp(timestamp *dest, struct timespec ts) |
121 | { | 120 | { |
122 | long int days, rem, y; | 121 | long int days, rem, y; |
123 | const unsigned short int *ip; | 122 | const unsigned short int *ip; |
@@ -128,7 +127,7 @@ kernel_timestamp *udf_time_to_stamp(kernel_timestamp *dest, struct timespec ts) | |||
128 | if (!dest) | 127 | if (!dest) |
129 | return NULL; | 128 | return NULL; |
130 | 129 | ||
131 | dest->typeAndTimezone = 0x1000 | (offset & 0x0FFF); | 130 | dest->typeAndTimezone = cpu_to_le16(0x1000 | (offset & 0x0FFF)); |
132 | 131 | ||
133 | ts.tv_sec += offset * 60; | 132 | ts.tv_sec += offset * 60; |
134 | days = ts.tv_sec / SECS_PER_DAY; | 133 | days = ts.tv_sec / SECS_PER_DAY; |
@@ -151,7 +150,7 @@ kernel_timestamp *udf_time_to_stamp(kernel_timestamp *dest, struct timespec ts) | |||
151 | - LEAPS_THRU_END_OF(y - 1)); | 150 | - LEAPS_THRU_END_OF(y - 1)); |
152 | y = yg; | 151 | y = yg; |
153 | } | 152 | } |
154 | dest->year = y; | 153 | dest->year = cpu_to_le16(y); |
155 | ip = __mon_yday[__isleap(y)]; | 154 | ip = __mon_yday[__isleap(y)]; |
156 | for (y = 11; days < (long int)ip[y]; --y) | 155 | for (y = 11; days < (long int)ip[y]; --y) |
157 | continue; | 156 | continue; |
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index e533b11703bf..9fdf8c93c58e 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c | |||
@@ -23,7 +23,7 @@ | |||
23 | #include <linux/kernel.h> | 23 | #include <linux/kernel.h> |
24 | #include <linux/string.h> /* for memset */ | 24 | #include <linux/string.h> /* for memset */ |
25 | #include <linux/nls.h> | 25 | #include <linux/nls.h> |
26 | #include <linux/udf_fs.h> | 26 | #include <linux/crc-itu-t.h> |
27 | 27 | ||
28 | #include "udf_sb.h" | 28 | #include "udf_sb.h" |
29 | 29 | ||
@@ -49,14 +49,16 @@ int udf_build_ustr(struct ustr *dest, dstring *ptr, int size) | |||
49 | { | 49 | { |
50 | int usesize; | 50 | int usesize; |
51 | 51 | ||
52 | if ((!dest) || (!ptr) || (!size)) | 52 | if (!dest || !ptr || !size) |
53 | return -1; | 53 | return -1; |
54 | BUG_ON(size < 2); | ||
54 | 55 | ||
55 | memset(dest, 0, sizeof(struct ustr)); | 56 | usesize = min_t(size_t, ptr[size - 1], sizeof(dest->u_name)); |
56 | usesize = (size > UDF_NAME_LEN) ? UDF_NAME_LEN : size; | 57 | usesize = min(usesize, size - 2); |
57 | dest->u_cmpID = ptr[0]; | 58 | dest->u_cmpID = ptr[0]; |
58 | dest->u_len = ptr[size - 1]; | 59 | dest->u_len = usesize; |
59 | memcpy(dest->u_name, ptr + 1, usesize - 1); | 60 | memcpy(dest->u_name, ptr + 1, usesize); |
61 | memset(dest->u_name + usesize, 0, sizeof(dest->u_name) - usesize); | ||
60 | 62 | ||
61 | return 0; | 63 | return 0; |
62 | } | 64 | } |
@@ -83,9 +85,6 @@ static int udf_build_ustr_exact(struct ustr *dest, dstring *ptr, int exactsize) | |||
83 | * PURPOSE | 85 | * PURPOSE |
84 | * Convert OSTA Compressed Unicode to the UTF-8 equivalent. | 86 | * Convert OSTA Compressed Unicode to the UTF-8 equivalent. |
85 | * | 87 | * |
86 | * DESCRIPTION | ||
87 | * This routine is only called by udf_filldir(). | ||
88 | * | ||
89 | * PRE-CONDITIONS | 88 | * PRE-CONDITIONS |
90 | * utf Pointer to UTF-8 output buffer. | 89 | * utf Pointer to UTF-8 output buffer. |
91 | * ocu Pointer to OSTA Compressed Unicode input buffer | 90 | * ocu Pointer to OSTA Compressed Unicode input buffer |
@@ -99,43 +98,39 @@ static int udf_build_ustr_exact(struct ustr *dest, dstring *ptr, int exactsize) | |||
99 | * November 12, 1997 - Andrew E. Mileski | 98 | * November 12, 1997 - Andrew E. Mileski |
100 | * Written, tested, and released. | 99 | * Written, tested, and released. |
101 | */ | 100 | */ |
102 | int udf_CS0toUTF8(struct ustr *utf_o, struct ustr *ocu_i) | 101 | int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i) |
103 | { | 102 | { |
104 | uint8_t *ocu; | 103 | const uint8_t *ocu; |
105 | uint32_t c; | ||
106 | uint8_t cmp_id, ocu_len; | 104 | uint8_t cmp_id, ocu_len; |
107 | int i; | 105 | int i; |
108 | 106 | ||
109 | ocu = ocu_i->u_name; | ||
110 | |||
111 | ocu_len = ocu_i->u_len; | 107 | ocu_len = ocu_i->u_len; |
112 | cmp_id = ocu_i->u_cmpID; | ||
113 | utf_o->u_len = 0; | ||
114 | |||
115 | if (ocu_len == 0) { | 108 | if (ocu_len == 0) { |
116 | memset(utf_o, 0, sizeof(struct ustr)); | 109 | memset(utf_o, 0, sizeof(struct ustr)); |
117 | utf_o->u_cmpID = 0; | ||
118 | utf_o->u_len = 0; | ||
119 | return 0; | 110 | return 0; |
120 | } | 111 | } |
121 | 112 | ||
122 | if ((cmp_id != 8) && (cmp_id != 16)) { | 113 | cmp_id = ocu_i->u_cmpID; |
114 | if (cmp_id != 8 && cmp_id != 16) { | ||
115 | memset(utf_o, 0, sizeof(struct ustr)); | ||
123 | printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n", | 116 | printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n", |
124 | cmp_id, ocu_i->u_name); | 117 | cmp_id, ocu_i->u_name); |
125 | return 0; | 118 | return 0; |
126 | } | 119 | } |
127 | 120 | ||
121 | ocu = ocu_i->u_name; | ||
122 | utf_o->u_len = 0; | ||
128 | for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN - 3));) { | 123 | for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN - 3));) { |
129 | 124 | ||
130 | /* Expand OSTA compressed Unicode to Unicode */ | 125 | /* Expand OSTA compressed Unicode to Unicode */ |
131 | c = ocu[i++]; | 126 | uint32_t c = ocu[i++]; |
132 | if (cmp_id == 16) | 127 | if (cmp_id == 16) |
133 | c = (c << 8) | ocu[i++]; | 128 | c = (c << 8) | ocu[i++]; |
134 | 129 | ||
135 | /* Compress Unicode to UTF-8 */ | 130 | /* Compress Unicode to UTF-8 */ |
136 | if (c < 0x80U) { | 131 | if (c < 0x80U) |
137 | utf_o->u_name[utf_o->u_len++] = (uint8_t)c; | 132 | utf_o->u_name[utf_o->u_len++] = (uint8_t)c; |
138 | } else if (c < 0x800U) { | 133 | else if (c < 0x800U) { |
139 | utf_o->u_name[utf_o->u_len++] = | 134 | utf_o->u_name[utf_o->u_len++] = |
140 | (uint8_t)(0xc0 | (c >> 6)); | 135 | (uint8_t)(0xc0 | (c >> 6)); |
141 | utf_o->u_name[utf_o->u_len++] = | 136 | utf_o->u_name[utf_o->u_len++] = |
@@ -255,35 +250,32 @@ error_out: | |||
255 | } | 250 | } |
256 | 251 | ||
257 | static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o, | 252 | static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o, |
258 | struct ustr *ocu_i) | 253 | const struct ustr *ocu_i) |
259 | { | 254 | { |
260 | uint8_t *ocu; | 255 | const uint8_t *ocu; |
261 | uint32_t c; | ||
262 | uint8_t cmp_id, ocu_len; | 256 | uint8_t cmp_id, ocu_len; |
263 | int i; | 257 | int i; |
264 | 258 | ||
265 | ocu = ocu_i->u_name; | ||
266 | 259 | ||
267 | ocu_len = ocu_i->u_len; | 260 | ocu_len = ocu_i->u_len; |
268 | cmp_id = ocu_i->u_cmpID; | ||
269 | utf_o->u_len = 0; | ||
270 | |||
271 | if (ocu_len == 0) { | 261 | if (ocu_len == 0) { |
272 | memset(utf_o, 0, sizeof(struct ustr)); | 262 | memset(utf_o, 0, sizeof(struct ustr)); |
273 | utf_o->u_cmpID = 0; | ||
274 | utf_o->u_len = 0; | ||
275 | return 0; | 263 | return 0; |
276 | } | 264 | } |
277 | 265 | ||
278 | if ((cmp_id != 8) && (cmp_id != 16)) { | 266 | cmp_id = ocu_i->u_cmpID; |
267 | if (cmp_id != 8 && cmp_id != 16) { | ||
268 | memset(utf_o, 0, sizeof(struct ustr)); | ||
279 | printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n", | 269 | printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n", |
280 | cmp_id, ocu_i->u_name); | 270 | cmp_id, ocu_i->u_name); |
281 | return 0; | 271 | return 0; |
282 | } | 272 | } |
283 | 273 | ||
274 | ocu = ocu_i->u_name; | ||
275 | utf_o->u_len = 0; | ||
284 | for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN - 3));) { | 276 | for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN - 3));) { |
285 | /* Expand OSTA compressed Unicode to Unicode */ | 277 | /* Expand OSTA compressed Unicode to Unicode */ |
286 | c = ocu[i++]; | 278 | uint32_t c = ocu[i++]; |
287 | if (cmp_id == 16) | 279 | if (cmp_id == 16) |
288 | c = (c << 8) | ocu[i++]; | 280 | c = (c << 8) | ocu[i++]; |
289 | 281 | ||
@@ -463,7 +455,7 @@ static int udf_translate_to_linux(uint8_t *newName, uint8_t *udfName, | |||
463 | } else if (newIndex > 250) | 455 | } else if (newIndex > 250) |
464 | newIndex = 250; | 456 | newIndex = 250; |
465 | newName[newIndex++] = CRC_MARK; | 457 | newName[newIndex++] = CRC_MARK; |
466 | valueCRC = udf_crc(fidName, fidNameLen, 0); | 458 | valueCRC = crc_itu_t(0, fidName, fidNameLen); |
467 | newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12]; | 459 | newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12]; |
468 | newName[newIndex++] = hexChar[(valueCRC & 0x0f00) >> 8]; | 460 | newName[newIndex++] = hexChar[(valueCRC & 0x0f00) >> 8]; |
469 | newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4]; | 461 | newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4]; |
diff --git a/fs/utimes.c b/fs/utimes.c index b18da9c0b97f..a2bef77dc9c9 100644 --- a/fs/utimes.c +++ b/fs/utimes.c | |||
@@ -2,6 +2,7 @@ | |||
2 | #include <linux/file.h> | 2 | #include <linux/file.h> |
3 | #include <linux/fs.h> | 3 | #include <linux/fs.h> |
4 | #include <linux/linkage.h> | 4 | #include <linux/linkage.h> |
5 | #include <linux/mount.h> | ||
5 | #include <linux/namei.h> | 6 | #include <linux/namei.h> |
6 | #include <linux/sched.h> | 7 | #include <linux/sched.h> |
7 | #include <linux/stat.h> | 8 | #include <linux/stat.h> |
@@ -59,6 +60,7 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags | |||
59 | struct inode *inode; | 60 | struct inode *inode; |
60 | struct iattr newattrs; | 61 | struct iattr newattrs; |
61 | struct file *f = NULL; | 62 | struct file *f = NULL; |
63 | struct vfsmount *mnt; | ||
62 | 64 | ||
63 | error = -EINVAL; | 65 | error = -EINVAL; |
64 | if (times && (!nsec_valid(times[0].tv_nsec) || | 66 | if (times && (!nsec_valid(times[0].tv_nsec) || |
@@ -79,18 +81,20 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags | |||
79 | if (!f) | 81 | if (!f) |
80 | goto out; | 82 | goto out; |
81 | dentry = f->f_path.dentry; | 83 | dentry = f->f_path.dentry; |
84 | mnt = f->f_path.mnt; | ||
82 | } else { | 85 | } else { |
83 | error = __user_walk_fd(dfd, filename, (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW, &nd); | 86 | error = __user_walk_fd(dfd, filename, (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW, &nd); |
84 | if (error) | 87 | if (error) |
85 | goto out; | 88 | goto out; |
86 | 89 | ||
87 | dentry = nd.path.dentry; | 90 | dentry = nd.path.dentry; |
91 | mnt = nd.path.mnt; | ||
88 | } | 92 | } |
89 | 93 | ||
90 | inode = dentry->d_inode; | 94 | inode = dentry->d_inode; |
91 | 95 | ||
92 | error = -EROFS; | 96 | error = mnt_want_write(mnt); |
93 | if (IS_RDONLY(inode)) | 97 | if (error) |
94 | goto dput_and_out; | 98 | goto dput_and_out; |
95 | 99 | ||
96 | /* Don't worry, the checks are done in inode_change_ok() */ | 100 | /* Don't worry, the checks are done in inode_change_ok() */ |
@@ -98,7 +102,7 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags | |||
98 | if (times) { | 102 | if (times) { |
99 | error = -EPERM; | 103 | error = -EPERM; |
100 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | 104 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) |
101 | goto dput_and_out; | 105 | goto mnt_drop_write_and_out; |
102 | 106 | ||
103 | if (times[0].tv_nsec == UTIME_OMIT) | 107 | if (times[0].tv_nsec == UTIME_OMIT) |
104 | newattrs.ia_valid &= ~ATTR_ATIME; | 108 | newattrs.ia_valid &= ~ATTR_ATIME; |
@@ -118,22 +122,24 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags | |||
118 | } else { | 122 | } else { |
119 | error = -EACCES; | 123 | error = -EACCES; |
120 | if (IS_IMMUTABLE(inode)) | 124 | if (IS_IMMUTABLE(inode)) |
121 | goto dput_and_out; | 125 | goto mnt_drop_write_and_out; |
122 | 126 | ||
123 | if (!is_owner_or_cap(inode)) { | 127 | if (!is_owner_or_cap(inode)) { |
124 | if (f) { | 128 | if (f) { |
125 | if (!(f->f_mode & FMODE_WRITE)) | 129 | if (!(f->f_mode & FMODE_WRITE)) |
126 | goto dput_and_out; | 130 | goto mnt_drop_write_and_out; |
127 | } else { | 131 | } else { |
128 | error = vfs_permission(&nd, MAY_WRITE); | 132 | error = vfs_permission(&nd, MAY_WRITE); |
129 | if (error) | 133 | if (error) |
130 | goto dput_and_out; | 134 | goto mnt_drop_write_and_out; |
131 | } | 135 | } |
132 | } | 136 | } |
133 | } | 137 | } |
134 | mutex_lock(&inode->i_mutex); | 138 | mutex_lock(&inode->i_mutex); |
135 | error = notify_change(dentry, &newattrs); | 139 | error = notify_change(dentry, &newattrs); |
136 | mutex_unlock(&inode->i_mutex); | 140 | mutex_unlock(&inode->i_mutex); |
141 | mnt_drop_write_and_out: | ||
142 | mnt_drop_write(mnt); | ||
137 | dput_and_out: | 143 | dput_and_out: |
138 | if (f) | 144 | if (f) |
139 | fput(f); | 145 | fput(f); |
diff --git a/fs/xattr.c b/fs/xattr.c index 3acab1615460..89a942f07e1b 100644 --- a/fs/xattr.c +++ b/fs/xattr.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/slab.h> | 11 | #include <linux/slab.h> |
12 | #include <linux/file.h> | 12 | #include <linux/file.h> |
13 | #include <linux/xattr.h> | 13 | #include <linux/xattr.h> |
14 | #include <linux/mount.h> | ||
14 | #include <linux/namei.h> | 15 | #include <linux/namei.h> |
15 | #include <linux/security.h> | 16 | #include <linux/security.h> |
16 | #include <linux/syscalls.h> | 17 | #include <linux/syscalls.h> |
@@ -32,8 +33,6 @@ xattr_permission(struct inode *inode, const char *name, int mask) | |||
32 | * filesystem or on an immutable / append-only inode. | 33 | * filesystem or on an immutable / append-only inode. |
33 | */ | 34 | */ |
34 | if (mask & MAY_WRITE) { | 35 | if (mask & MAY_WRITE) { |
35 | if (IS_RDONLY(inode)) | ||
36 | return -EROFS; | ||
37 | if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) | 36 | if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) |
38 | return -EPERM; | 37 | return -EPERM; |
39 | } | 38 | } |
@@ -262,7 +261,11 @@ sys_setxattr(char __user *path, char __user *name, void __user *value, | |||
262 | error = user_path_walk(path, &nd); | 261 | error = user_path_walk(path, &nd); |
263 | if (error) | 262 | if (error) |
264 | return error; | 263 | return error; |
265 | error = setxattr(nd.path.dentry, name, value, size, flags); | 264 | error = mnt_want_write(nd.path.mnt); |
265 | if (!error) { | ||
266 | error = setxattr(nd.path.dentry, name, value, size, flags); | ||
267 | mnt_drop_write(nd.path.mnt); | ||
268 | } | ||
266 | path_put(&nd.path); | 269 | path_put(&nd.path); |
267 | return error; | 270 | return error; |
268 | } | 271 | } |
@@ -277,7 +280,11 @@ sys_lsetxattr(char __user *path, char __user *name, void __user *value, | |||
277 | error = user_path_walk_link(path, &nd); | 280 | error = user_path_walk_link(path, &nd); |
278 | if (error) | 281 | if (error) |
279 | return error; | 282 | return error; |
280 | error = setxattr(nd.path.dentry, name, value, size, flags); | 283 | error = mnt_want_write(nd.path.mnt); |
284 | if (!error) { | ||
285 | error = setxattr(nd.path.dentry, name, value, size, flags); | ||
286 | mnt_drop_write(nd.path.mnt); | ||
287 | } | ||
281 | path_put(&nd.path); | 288 | path_put(&nd.path); |
282 | return error; | 289 | return error; |
283 | } | 290 | } |
@@ -295,7 +302,11 @@ sys_fsetxattr(int fd, char __user *name, void __user *value, | |||
295 | return error; | 302 | return error; |
296 | dentry = f->f_path.dentry; | 303 | dentry = f->f_path.dentry; |
297 | audit_inode(NULL, dentry); | 304 | audit_inode(NULL, dentry); |
298 | error = setxattr(dentry, name, value, size, flags); | 305 | error = mnt_want_write(f->f_path.mnt); |
306 | if (!error) { | ||
307 | error = setxattr(dentry, name, value, size, flags); | ||
308 | mnt_drop_write(f->f_path.mnt); | ||
309 | } | ||
299 | fput(f); | 310 | fput(f); |
300 | return error; | 311 | return error; |
301 | } | 312 | } |
@@ -482,7 +493,11 @@ sys_removexattr(char __user *path, char __user *name) | |||
482 | error = user_path_walk(path, &nd); | 493 | error = user_path_walk(path, &nd); |
483 | if (error) | 494 | if (error) |
484 | return error; | 495 | return error; |
485 | error = removexattr(nd.path.dentry, name); | 496 | error = mnt_want_write(nd.path.mnt); |
497 | if (!error) { | ||
498 | error = removexattr(nd.path.dentry, name); | ||
499 | mnt_drop_write(nd.path.mnt); | ||
500 | } | ||
486 | path_put(&nd.path); | 501 | path_put(&nd.path); |
487 | return error; | 502 | return error; |
488 | } | 503 | } |
@@ -496,7 +511,11 @@ sys_lremovexattr(char __user *path, char __user *name) | |||
496 | error = user_path_walk_link(path, &nd); | 511 | error = user_path_walk_link(path, &nd); |
497 | if (error) | 512 | if (error) |
498 | return error; | 513 | return error; |
499 | error = removexattr(nd.path.dentry, name); | 514 | error = mnt_want_write(nd.path.mnt); |
515 | if (!error) { | ||
516 | error = removexattr(nd.path.dentry, name); | ||
517 | mnt_drop_write(nd.path.mnt); | ||
518 | } | ||
500 | path_put(&nd.path); | 519 | path_put(&nd.path); |
501 | return error; | 520 | return error; |
502 | } | 521 | } |
@@ -513,7 +532,11 @@ sys_fremovexattr(int fd, char __user *name) | |||
513 | return error; | 532 | return error; |
514 | dentry = f->f_path.dentry; | 533 | dentry = f->f_path.dentry; |
515 | audit_inode(NULL, dentry); | 534 | audit_inode(NULL, dentry); |
516 | error = removexattr(dentry, name); | 535 | error = mnt_want_write(f->f_path.mnt); |
536 | if (!error) { | ||
537 | error = removexattr(dentry, name); | ||
538 | mnt_drop_write(f->f_path.mnt); | ||
539 | } | ||
517 | fput(f); | 540 | fput(f); |
518 | return error; | 541 | return error; |
519 | } | 542 | } |
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index 35115bca036e..524021ff5436 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig | |||
@@ -35,18 +35,6 @@ config XFS_QUOTA | |||
35 | with or without the generic quota support enabled (CONFIG_QUOTA) - | 35 | with or without the generic quota support enabled (CONFIG_QUOTA) - |
36 | they are completely independent subsystems. | 36 | they are completely independent subsystems. |
37 | 37 | ||
38 | config XFS_SECURITY | ||
39 | bool "XFS Security Label support" | ||
40 | depends on XFS_FS | ||
41 | help | ||
42 | Security labels support alternative access control models | ||
43 | implemented by security modules like SELinux. This option | ||
44 | enables an extended attribute namespace for inode security | ||
45 | labels in the XFS filesystem. | ||
46 | |||
47 | If you are not using a security module that requires using | ||
48 | extended attributes for inode security labels, say N. | ||
49 | |||
50 | config XFS_POSIX_ACL | 38 | config XFS_POSIX_ACL |
51 | bool "XFS POSIX ACL support" | 39 | bool "XFS POSIX ACL support" |
52 | depends on XFS_FS | 40 | depends on XFS_FS |
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c index e040f1ce1b6a..9b1bb17a0501 100644 --- a/fs/xfs/linux-2.6/kmem.c +++ b/fs/xfs/linux-2.6/kmem.c | |||
@@ -37,7 +37,7 @@ kmem_alloc(size_t size, unsigned int __nocast flags) | |||
37 | #ifdef DEBUG | 37 | #ifdef DEBUG |
38 | if (unlikely(!(flags & KM_LARGE) && (size > PAGE_SIZE))) { | 38 | if (unlikely(!(flags & KM_LARGE) && (size > PAGE_SIZE))) { |
39 | printk(KERN_WARNING "Large %s attempt, size=%ld\n", | 39 | printk(KERN_WARNING "Large %s attempt, size=%ld\n", |
40 | __FUNCTION__, (long)size); | 40 | __func__, (long)size); |
41 | dump_stack(); | 41 | dump_stack(); |
42 | } | 42 | } |
43 | #endif | 43 | #endif |
@@ -52,7 +52,7 @@ kmem_alloc(size_t size, unsigned int __nocast flags) | |||
52 | if (!(++retries % 100)) | 52 | if (!(++retries % 100)) |
53 | printk(KERN_ERR "XFS: possible memory allocation " | 53 | printk(KERN_ERR "XFS: possible memory allocation " |
54 | "deadlock in %s (mode:0x%x)\n", | 54 | "deadlock in %s (mode:0x%x)\n", |
55 | __FUNCTION__, lflags); | 55 | __func__, lflags); |
56 | congestion_wait(WRITE, HZ/50); | 56 | congestion_wait(WRITE, HZ/50); |
57 | } while (1); | 57 | } while (1); |
58 | } | 58 | } |
@@ -129,7 +129,7 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags) | |||
129 | if (!(++retries % 100)) | 129 | if (!(++retries % 100)) |
130 | printk(KERN_ERR "XFS: possible memory allocation " | 130 | printk(KERN_ERR "XFS: possible memory allocation " |
131 | "deadlock in %s (mode:0x%x)\n", | 131 | "deadlock in %s (mode:0x%x)\n", |
132 | __FUNCTION__, lflags); | 132 | __func__, lflags); |
133 | congestion_wait(WRITE, HZ/50); | 133 | congestion_wait(WRITE, HZ/50); |
134 | } while (1); | 134 | } while (1); |
135 | } | 135 | } |
diff --git a/fs/xfs/linux-2.6/sema.h b/fs/xfs/linux-2.6/sema.h index 2009e6d922ce..3abe7e9ceb33 100644 --- a/fs/xfs/linux-2.6/sema.h +++ b/fs/xfs/linux-2.6/sema.h | |||
@@ -20,8 +20,8 @@ | |||
20 | 20 | ||
21 | #include <linux/time.h> | 21 | #include <linux/time.h> |
22 | #include <linux/wait.h> | 22 | #include <linux/wait.h> |
23 | #include <linux/semaphore.h> | ||
23 | #include <asm/atomic.h> | 24 | #include <asm/atomic.h> |
24 | #include <asm/semaphore.h> | ||
25 | 25 | ||
26 | /* | 26 | /* |
27 | * sema_t structure just maps to struct semaphore in Linux kernel. | 27 | * sema_t structure just maps to struct semaphore in Linux kernel. |
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index e0519529c26c..a55c3b26d840 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -243,8 +243,12 @@ xfs_end_bio_unwritten( | |||
243 | size_t size = ioend->io_size; | 243 | size_t size = ioend->io_size; |
244 | 244 | ||
245 | if (likely(!ioend->io_error)) { | 245 | if (likely(!ioend->io_error)) { |
246 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) | 246 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { |
247 | xfs_iomap_write_unwritten(ip, offset, size); | 247 | int error; |
248 | error = xfs_iomap_write_unwritten(ip, offset, size); | ||
249 | if (error) | ||
250 | ioend->io_error = error; | ||
251 | } | ||
248 | xfs_setfilesize(ioend); | 252 | xfs_setfilesize(ioend); |
249 | } | 253 | } |
250 | xfs_destroy_ioend(ioend); | 254 | xfs_destroy_ioend(ioend); |
@@ -1532,9 +1536,9 @@ xfs_vm_bmap( | |||
1532 | struct xfs_inode *ip = XFS_I(inode); | 1536 | struct xfs_inode *ip = XFS_I(inode); |
1533 | 1537 | ||
1534 | xfs_itrace_entry(XFS_I(inode)); | 1538 | xfs_itrace_entry(XFS_I(inode)); |
1535 | xfs_rwlock(ip, VRWLOCK_READ); | 1539 | xfs_ilock(ip, XFS_IOLOCK_SHARED); |
1536 | xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF); | 1540 | xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF); |
1537 | xfs_rwunlock(ip, VRWLOCK_READ); | 1541 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); |
1538 | return generic_block_bmap(mapping, block, xfs_get_blocks); | 1542 | return generic_block_bmap(mapping, block, xfs_get_blocks); |
1539 | } | 1543 | } |
1540 | 1544 | ||
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index e347bfd47c91..52f6846101d5 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -400,7 +400,7 @@ _xfs_buf_lookup_pages( | |||
400 | printk(KERN_ERR | 400 | printk(KERN_ERR |
401 | "XFS: possible memory allocation " | 401 | "XFS: possible memory allocation " |
402 | "deadlock in %s (mode:0x%x)\n", | 402 | "deadlock in %s (mode:0x%x)\n", |
403 | __FUNCTION__, gfp_mask); | 403 | __func__, gfp_mask); |
404 | 404 | ||
405 | XFS_STATS_INC(xb_page_retries); | 405 | XFS_STATS_INC(xb_page_retries); |
406 | xfsbufd_wakeup(0, gfp_mask); | 406 | xfsbufd_wakeup(0, gfp_mask); |
@@ -598,7 +598,7 @@ xfs_buf_get_flags( | |||
598 | error = _xfs_buf_map_pages(bp, flags); | 598 | error = _xfs_buf_map_pages(bp, flags); |
599 | if (unlikely(error)) { | 599 | if (unlikely(error)) { |
600 | printk(KERN_WARNING "%s: failed to map pages\n", | 600 | printk(KERN_WARNING "%s: failed to map pages\n", |
601 | __FUNCTION__); | 601 | __func__); |
602 | goto no_buffer; | 602 | goto no_buffer; |
603 | } | 603 | } |
604 | } | 604 | } |
@@ -778,7 +778,7 @@ xfs_buf_get_noaddr( | |||
778 | error = _xfs_buf_map_pages(bp, XBF_MAPPED); | 778 | error = _xfs_buf_map_pages(bp, XBF_MAPPED); |
779 | if (unlikely(error)) { | 779 | if (unlikely(error)) { |
780 | printk(KERN_WARNING "%s: failed to map pages\n", | 780 | printk(KERN_WARNING "%s: failed to map pages\n", |
781 | __FUNCTION__); | 781 | __func__); |
782 | goto fail_free_mem; | 782 | goto fail_free_mem; |
783 | } | 783 | } |
784 | 784 | ||
@@ -1060,7 +1060,7 @@ xfs_buf_iostart( | |||
1060 | bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC); | 1060 | bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC); |
1061 | bp->b_flags |= flags & (XBF_DELWRI | XBF_ASYNC); | 1061 | bp->b_flags |= flags & (XBF_DELWRI | XBF_ASYNC); |
1062 | xfs_buf_delwri_queue(bp, 1); | 1062 | xfs_buf_delwri_queue(bp, 1); |
1063 | return status; | 1063 | return 0; |
1064 | } | 1064 | } |
1065 | 1065 | ||
1066 | bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \ | 1066 | bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \ |
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index a3d207de48b8..841d7883528d 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h | |||
@@ -387,11 +387,15 @@ static inline int XFS_bwrite(xfs_buf_t *bp) | |||
387 | return error; | 387 | return error; |
388 | } | 388 | } |
389 | 389 | ||
390 | static inline int xfs_bdwrite(void *mp, xfs_buf_t *bp) | 390 | /* |
391 | * No error can be returned from xfs_buf_iostart for delwri | ||
392 | * buffers as they are queued and no I/O is issued. | ||
393 | */ | ||
394 | static inline void xfs_bdwrite(void *mp, xfs_buf_t *bp) | ||
391 | { | 395 | { |
392 | bp->b_strat = xfs_bdstrat_cb; | 396 | bp->b_strat = xfs_bdstrat_cb; |
393 | bp->b_fspriv3 = mp; | 397 | bp->b_fspriv3 = mp; |
394 | return xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC); | 398 | (void)xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC); |
395 | } | 399 | } |
396 | 400 | ||
397 | #define XFS_bdstrat(bp) xfs_buf_iorequest(bp) | 401 | #define XFS_bdstrat(bp) xfs_buf_iorequest(bp) |
diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h index e7f3da61c6c3..652721ce0ea5 100644 --- a/fs/xfs/linux-2.6/xfs_cred.h +++ b/fs/xfs/linux-2.6/xfs_cred.h | |||
@@ -30,7 +30,7 @@ typedef struct cred { | |||
30 | extern struct cred *sys_cred; | 30 | extern struct cred *sys_cred; |
31 | 31 | ||
32 | /* this is a hack.. (assumes sys_cred is the only cred_t in the system) */ | 32 | /* this is a hack.. (assumes sys_cred is the only cred_t in the system) */ |
33 | static __inline int capable_cred(cred_t *cr, int cid) | 33 | static inline int capable_cred(cred_t *cr, int cid) |
34 | { | 34 | { |
35 | return (cr == sys_cred) ? 1 : capable(cid); | 35 | return (cr == sys_cred) ? 1 : capable(cid); |
36 | } | 36 | } |
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c index ca4f66c4de16..265f0168ab76 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/linux-2.6/xfs_export.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
23 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
24 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
25 | #include "xfs_dir2.h" | ||
25 | #include "xfs_dmapi.h" | 26 | #include "xfs_dmapi.h" |
26 | #include "xfs_mount.h" | 27 | #include "xfs_mount.h" |
27 | #include "xfs_export.h" | 28 | #include "xfs_export.h" |
@@ -30,8 +31,6 @@ | |||
30 | #include "xfs_inode.h" | 31 | #include "xfs_inode.h" |
31 | #include "xfs_vfsops.h" | 32 | #include "xfs_vfsops.h" |
32 | 33 | ||
33 | static struct dentry dotdot = { .d_name.name = "..", .d_name.len = 2, }; | ||
34 | |||
35 | /* | 34 | /* |
36 | * Note that we only accept fileids which are long enough rather than allow | 35 | * Note that we only accept fileids which are long enough rather than allow |
37 | * the parent generation number to default to zero. XFS considers zero a | 36 | * the parent generation number to default to zero. XFS considers zero a |
@@ -66,7 +65,7 @@ xfs_fs_encode_fh( | |||
66 | int len; | 65 | int len; |
67 | 66 | ||
68 | /* Directories don't need their parent encoded, they have ".." */ | 67 | /* Directories don't need their parent encoded, they have ".." */ |
69 | if (S_ISDIR(inode->i_mode)) | 68 | if (S_ISDIR(inode->i_mode) || !connectable) |
70 | fileid_type = FILEID_INO32_GEN; | 69 | fileid_type = FILEID_INO32_GEN; |
71 | else | 70 | else |
72 | fileid_type = FILEID_INO32_GEN_PARENT; | 71 | fileid_type = FILEID_INO32_GEN_PARENT; |
@@ -213,17 +212,16 @@ xfs_fs_get_parent( | |||
213 | struct dentry *child) | 212 | struct dentry *child) |
214 | { | 213 | { |
215 | int error; | 214 | int error; |
216 | bhv_vnode_t *cvp; | 215 | struct xfs_inode *cip; |
217 | struct dentry *parent; | 216 | struct dentry *parent; |
218 | 217 | ||
219 | cvp = NULL; | 218 | error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip); |
220 | error = xfs_lookup(XFS_I(child->d_inode), &dotdot, &cvp); | ||
221 | if (unlikely(error)) | 219 | if (unlikely(error)) |
222 | return ERR_PTR(-error); | 220 | return ERR_PTR(-error); |
223 | 221 | ||
224 | parent = d_alloc_anon(vn_to_inode(cvp)); | 222 | parent = d_alloc_anon(cip->i_vnode); |
225 | if (unlikely(!parent)) { | 223 | if (unlikely(!parent)) { |
226 | VN_RELE(cvp); | 224 | iput(cip->i_vnode); |
227 | return ERR_PTR(-ENOMEM); | 225 | return ERR_PTR(-ENOMEM); |
228 | } | 226 | } |
229 | return parent; | 227 | return parent; |
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index edab1ffbb163..05905246434d 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c | |||
@@ -469,16 +469,11 @@ xfs_file_open_exec( | |||
469 | struct inode *inode) | 469 | struct inode *inode) |
470 | { | 470 | { |
471 | struct xfs_mount *mp = XFS_M(inode->i_sb); | 471 | struct xfs_mount *mp = XFS_M(inode->i_sb); |
472 | struct xfs_inode *ip = XFS_I(inode); | ||
472 | 473 | ||
473 | if (unlikely(mp->m_flags & XFS_MOUNT_DMAPI)) { | 474 | if (unlikely(mp->m_flags & XFS_MOUNT_DMAPI) && |
474 | if (DM_EVENT_ENABLED(XFS_I(inode), DM_EVENT_READ)) { | 475 | DM_EVENT_ENABLED(ip, DM_EVENT_READ)) |
475 | bhv_vnode_t *vp = vn_from_inode(inode); | 476 | return -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL); |
476 | |||
477 | return -XFS_SEND_DATA(mp, DM_EVENT_READ, | ||
478 | vp, 0, 0, 0, NULL); | ||
479 | } | ||
480 | } | ||
481 | |||
482 | return 0; | 477 | return 0; |
483 | } | 478 | } |
484 | #endif /* HAVE_FOP_OPEN_EXEC */ | 479 | #endif /* HAVE_FOP_OPEN_EXEC */ |
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c index ac6d34cc355d..1eefe61f0e10 100644 --- a/fs/xfs/linux-2.6/xfs_fs_subr.c +++ b/fs/xfs/linux-2.6/xfs_fs_subr.c | |||
@@ -17,18 +17,7 @@ | |||
17 | */ | 17 | */ |
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_vnodeops.h" | 19 | #include "xfs_vnodeops.h" |
20 | |||
21 | /* | ||
22 | * The following six includes are needed so that we can include | ||
23 | * xfs_inode.h. What a mess.. | ||
24 | */ | ||
25 | #include "xfs_bmap_btree.h" | 20 | #include "xfs_bmap_btree.h" |
26 | #include "xfs_inum.h" | ||
27 | #include "xfs_dir2.h" | ||
28 | #include "xfs_dir2_sf.h" | ||
29 | #include "xfs_attr_sf.h" | ||
30 | #include "xfs_dinode.h" | ||
31 | |||
32 | #include "xfs_inode.h" | 21 | #include "xfs_inode.h" |
33 | 22 | ||
34 | int fs_noerr(void) { return 0; } | 23 | int fs_noerr(void) { return 0; } |
@@ -42,11 +31,10 @@ xfs_tosspages( | |||
42 | xfs_off_t last, | 31 | xfs_off_t last, |
43 | int fiopt) | 32 | int fiopt) |
44 | { | 33 | { |
45 | bhv_vnode_t *vp = XFS_ITOV(ip); | 34 | struct address_space *mapping = ip->i_vnode->i_mapping; |
46 | struct inode *inode = vn_to_inode(vp); | ||
47 | 35 | ||
48 | if (VN_CACHED(vp)) | 36 | if (mapping->nrpages) |
49 | truncate_inode_pages(inode->i_mapping, first); | 37 | truncate_inode_pages(mapping, first); |
50 | } | 38 | } |
51 | 39 | ||
52 | int | 40 | int |
@@ -56,15 +44,14 @@ xfs_flushinval_pages( | |||
56 | xfs_off_t last, | 44 | xfs_off_t last, |
57 | int fiopt) | 45 | int fiopt) |
58 | { | 46 | { |
59 | bhv_vnode_t *vp = XFS_ITOV(ip); | 47 | struct address_space *mapping = ip->i_vnode->i_mapping; |
60 | struct inode *inode = vn_to_inode(vp); | ||
61 | int ret = 0; | 48 | int ret = 0; |
62 | 49 | ||
63 | if (VN_CACHED(vp)) { | 50 | if (mapping->nrpages) { |
64 | xfs_iflags_clear(ip, XFS_ITRUNCATED); | 51 | xfs_iflags_clear(ip, XFS_ITRUNCATED); |
65 | ret = filemap_write_and_wait(inode->i_mapping); | 52 | ret = filemap_write_and_wait(mapping); |
66 | if (!ret) | 53 | if (!ret) |
67 | truncate_inode_pages(inode->i_mapping, first); | 54 | truncate_inode_pages(mapping, first); |
68 | } | 55 | } |
69 | return ret; | 56 | return ret; |
70 | } | 57 | } |
@@ -77,17 +64,16 @@ xfs_flush_pages( | |||
77 | uint64_t flags, | 64 | uint64_t flags, |
78 | int fiopt) | 65 | int fiopt) |
79 | { | 66 | { |
80 | bhv_vnode_t *vp = XFS_ITOV(ip); | 67 | struct address_space *mapping = ip->i_vnode->i_mapping; |
81 | struct inode *inode = vn_to_inode(vp); | ||
82 | int ret = 0; | 68 | int ret = 0; |
83 | int ret2; | 69 | int ret2; |
84 | 70 | ||
85 | if (VN_DIRTY(vp)) { | 71 | if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { |
86 | xfs_iflags_clear(ip, XFS_ITRUNCATED); | 72 | xfs_iflags_clear(ip, XFS_ITRUNCATED); |
87 | ret = filemap_fdatawrite(inode->i_mapping); | 73 | ret = filemap_fdatawrite(mapping); |
88 | if (flags & XFS_B_ASYNC) | 74 | if (flags & XFS_B_ASYNC) |
89 | return ret; | 75 | return ret; |
90 | ret2 = filemap_fdatawait(inode->i_mapping); | 76 | ret2 = filemap_fdatawait(mapping); |
91 | if (!ret) | 77 | if (!ret) |
92 | ret = ret2; | 78 | ret = ret2; |
93 | } | 79 | } |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index f34bd010eb51..4ddb86b73c6b 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c | |||
@@ -535,8 +535,6 @@ xfs_attrmulti_attr_set( | |||
535 | char *kbuf; | 535 | char *kbuf; |
536 | int error = EFAULT; | 536 | int error = EFAULT; |
537 | 537 | ||
538 | if (IS_RDONLY(inode)) | ||
539 | return -EROFS; | ||
540 | if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) | 538 | if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) |
541 | return EPERM; | 539 | return EPERM; |
542 | if (len > XATTR_SIZE_MAX) | 540 | if (len > XATTR_SIZE_MAX) |
@@ -562,8 +560,6 @@ xfs_attrmulti_attr_remove( | |||
562 | char *name, | 560 | char *name, |
563 | __uint32_t flags) | 561 | __uint32_t flags) |
564 | { | 562 | { |
565 | if (IS_RDONLY(inode)) | ||
566 | return -EROFS; | ||
567 | if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) | 563 | if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) |
568 | return EPERM; | 564 | return EPERM; |
569 | return xfs_attr_remove(XFS_I(inode), name, flags); | 565 | return xfs_attr_remove(XFS_I(inode), name, flags); |
@@ -573,6 +569,7 @@ STATIC int | |||
573 | xfs_attrmulti_by_handle( | 569 | xfs_attrmulti_by_handle( |
574 | xfs_mount_t *mp, | 570 | xfs_mount_t *mp, |
575 | void __user *arg, | 571 | void __user *arg, |
572 | struct file *parfilp, | ||
576 | struct inode *parinode) | 573 | struct inode *parinode) |
577 | { | 574 | { |
578 | int error; | 575 | int error; |
@@ -626,13 +623,21 @@ xfs_attrmulti_by_handle( | |||
626 | &ops[i].am_length, ops[i].am_flags); | 623 | &ops[i].am_length, ops[i].am_flags); |
627 | break; | 624 | break; |
628 | case ATTR_OP_SET: | 625 | case ATTR_OP_SET: |
626 | ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); | ||
627 | if (ops[i].am_error) | ||
628 | break; | ||
629 | ops[i].am_error = xfs_attrmulti_attr_set(inode, | 629 | ops[i].am_error = xfs_attrmulti_attr_set(inode, |
630 | attr_name, ops[i].am_attrvalue, | 630 | attr_name, ops[i].am_attrvalue, |
631 | ops[i].am_length, ops[i].am_flags); | 631 | ops[i].am_length, ops[i].am_flags); |
632 | mnt_drop_write(parfilp->f_path.mnt); | ||
632 | break; | 633 | break; |
633 | case ATTR_OP_REMOVE: | 634 | case ATTR_OP_REMOVE: |
635 | ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); | ||
636 | if (ops[i].am_error) | ||
637 | break; | ||
634 | ops[i].am_error = xfs_attrmulti_attr_remove(inode, | 638 | ops[i].am_error = xfs_attrmulti_attr_remove(inode, |
635 | attr_name, ops[i].am_flags); | 639 | attr_name, ops[i].am_flags); |
640 | mnt_drop_write(parfilp->f_path.mnt); | ||
636 | break; | 641 | break; |
637 | default: | 642 | default: |
638 | ops[i].am_error = EINVAL; | 643 | ops[i].am_error = EINVAL; |
@@ -651,314 +656,6 @@ xfs_attrmulti_by_handle( | |||
651 | return -error; | 656 | return -error; |
652 | } | 657 | } |
653 | 658 | ||
654 | /* prototypes for a few of the stack-hungry cases that have | ||
655 | * their own functions. Functions are defined after their use | ||
656 | * so gcc doesn't get fancy and inline them with -03 */ | ||
657 | |||
658 | STATIC int | ||
659 | xfs_ioc_space( | ||
660 | struct xfs_inode *ip, | ||
661 | struct inode *inode, | ||
662 | struct file *filp, | ||
663 | int flags, | ||
664 | unsigned int cmd, | ||
665 | void __user *arg); | ||
666 | |||
667 | STATIC int | ||
668 | xfs_ioc_bulkstat( | ||
669 | xfs_mount_t *mp, | ||
670 | unsigned int cmd, | ||
671 | void __user *arg); | ||
672 | |||
673 | STATIC int | ||
674 | xfs_ioc_fsgeometry_v1( | ||
675 | xfs_mount_t *mp, | ||
676 | void __user *arg); | ||
677 | |||
678 | STATIC int | ||
679 | xfs_ioc_fsgeometry( | ||
680 | xfs_mount_t *mp, | ||
681 | void __user *arg); | ||
682 | |||
683 | STATIC int | ||
684 | xfs_ioc_xattr( | ||
685 | xfs_inode_t *ip, | ||
686 | struct file *filp, | ||
687 | unsigned int cmd, | ||
688 | void __user *arg); | ||
689 | |||
690 | STATIC int | ||
691 | xfs_ioc_fsgetxattr( | ||
692 | xfs_inode_t *ip, | ||
693 | int attr, | ||
694 | void __user *arg); | ||
695 | |||
696 | STATIC int | ||
697 | xfs_ioc_getbmap( | ||
698 | struct xfs_inode *ip, | ||
699 | int flags, | ||
700 | unsigned int cmd, | ||
701 | void __user *arg); | ||
702 | |||
703 | STATIC int | ||
704 | xfs_ioc_getbmapx( | ||
705 | struct xfs_inode *ip, | ||
706 | void __user *arg); | ||
707 | |||
708 | int | ||
709 | xfs_ioctl( | ||
710 | xfs_inode_t *ip, | ||
711 | struct file *filp, | ||
712 | int ioflags, | ||
713 | unsigned int cmd, | ||
714 | void __user *arg) | ||
715 | { | ||
716 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
717 | xfs_mount_t *mp = ip->i_mount; | ||
718 | int error; | ||
719 | |||
720 | xfs_itrace_entry(XFS_I(inode)); | ||
721 | switch (cmd) { | ||
722 | |||
723 | case XFS_IOC_ALLOCSP: | ||
724 | case XFS_IOC_FREESP: | ||
725 | case XFS_IOC_RESVSP: | ||
726 | case XFS_IOC_UNRESVSP: | ||
727 | case XFS_IOC_ALLOCSP64: | ||
728 | case XFS_IOC_FREESP64: | ||
729 | case XFS_IOC_RESVSP64: | ||
730 | case XFS_IOC_UNRESVSP64: | ||
731 | /* | ||
732 | * Only allow the sys admin to reserve space unless | ||
733 | * unwritten extents are enabled. | ||
734 | */ | ||
735 | if (!xfs_sb_version_hasextflgbit(&mp->m_sb) && | ||
736 | !capable(CAP_SYS_ADMIN)) | ||
737 | return -EPERM; | ||
738 | |||
739 | return xfs_ioc_space(ip, inode, filp, ioflags, cmd, arg); | ||
740 | |||
741 | case XFS_IOC_DIOINFO: { | ||
742 | struct dioattr da; | ||
743 | xfs_buftarg_t *target = | ||
744 | XFS_IS_REALTIME_INODE(ip) ? | ||
745 | mp->m_rtdev_targp : mp->m_ddev_targp; | ||
746 | |||
747 | da.d_mem = da.d_miniosz = 1 << target->bt_sshift; | ||
748 | da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1); | ||
749 | |||
750 | if (copy_to_user(arg, &da, sizeof(da))) | ||
751 | return -XFS_ERROR(EFAULT); | ||
752 | return 0; | ||
753 | } | ||
754 | |||
755 | case XFS_IOC_FSBULKSTAT_SINGLE: | ||
756 | case XFS_IOC_FSBULKSTAT: | ||
757 | case XFS_IOC_FSINUMBERS: | ||
758 | return xfs_ioc_bulkstat(mp, cmd, arg); | ||
759 | |||
760 | case XFS_IOC_FSGEOMETRY_V1: | ||
761 | return xfs_ioc_fsgeometry_v1(mp, arg); | ||
762 | |||
763 | case XFS_IOC_FSGEOMETRY: | ||
764 | return xfs_ioc_fsgeometry(mp, arg); | ||
765 | |||
766 | case XFS_IOC_GETVERSION: | ||
767 | return put_user(inode->i_generation, (int __user *)arg); | ||
768 | |||
769 | case XFS_IOC_FSGETXATTR: | ||
770 | return xfs_ioc_fsgetxattr(ip, 0, arg); | ||
771 | case XFS_IOC_FSGETXATTRA: | ||
772 | return xfs_ioc_fsgetxattr(ip, 1, arg); | ||
773 | case XFS_IOC_GETXFLAGS: | ||
774 | case XFS_IOC_SETXFLAGS: | ||
775 | case XFS_IOC_FSSETXATTR: | ||
776 | return xfs_ioc_xattr(ip, filp, cmd, arg); | ||
777 | |||
778 | case XFS_IOC_FSSETDM: { | ||
779 | struct fsdmidata dmi; | ||
780 | |||
781 | if (copy_from_user(&dmi, arg, sizeof(dmi))) | ||
782 | return -XFS_ERROR(EFAULT); | ||
783 | |||
784 | error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask, | ||
785 | dmi.fsd_dmstate); | ||
786 | return -error; | ||
787 | } | ||
788 | |||
789 | case XFS_IOC_GETBMAP: | ||
790 | case XFS_IOC_GETBMAPA: | ||
791 | return xfs_ioc_getbmap(ip, ioflags, cmd, arg); | ||
792 | |||
793 | case XFS_IOC_GETBMAPX: | ||
794 | return xfs_ioc_getbmapx(ip, arg); | ||
795 | |||
796 | case XFS_IOC_FD_TO_HANDLE: | ||
797 | case XFS_IOC_PATH_TO_HANDLE: | ||
798 | case XFS_IOC_PATH_TO_FSHANDLE: | ||
799 | return xfs_find_handle(cmd, arg); | ||
800 | |||
801 | case XFS_IOC_OPEN_BY_HANDLE: | ||
802 | return xfs_open_by_handle(mp, arg, filp, inode); | ||
803 | |||
804 | case XFS_IOC_FSSETDM_BY_HANDLE: | ||
805 | return xfs_fssetdm_by_handle(mp, arg, inode); | ||
806 | |||
807 | case XFS_IOC_READLINK_BY_HANDLE: | ||
808 | return xfs_readlink_by_handle(mp, arg, inode); | ||
809 | |||
810 | case XFS_IOC_ATTRLIST_BY_HANDLE: | ||
811 | return xfs_attrlist_by_handle(mp, arg, inode); | ||
812 | |||
813 | case XFS_IOC_ATTRMULTI_BY_HANDLE: | ||
814 | return xfs_attrmulti_by_handle(mp, arg, inode); | ||
815 | |||
816 | case XFS_IOC_SWAPEXT: { | ||
817 | error = xfs_swapext((struct xfs_swapext __user *)arg); | ||
818 | return -error; | ||
819 | } | ||
820 | |||
821 | case XFS_IOC_FSCOUNTS: { | ||
822 | xfs_fsop_counts_t out; | ||
823 | |||
824 | error = xfs_fs_counts(mp, &out); | ||
825 | if (error) | ||
826 | return -error; | ||
827 | |||
828 | if (copy_to_user(arg, &out, sizeof(out))) | ||
829 | return -XFS_ERROR(EFAULT); | ||
830 | return 0; | ||
831 | } | ||
832 | |||
833 | case XFS_IOC_SET_RESBLKS: { | ||
834 | xfs_fsop_resblks_t inout; | ||
835 | __uint64_t in; | ||
836 | |||
837 | if (!capable(CAP_SYS_ADMIN)) | ||
838 | return -EPERM; | ||
839 | |||
840 | if (copy_from_user(&inout, arg, sizeof(inout))) | ||
841 | return -XFS_ERROR(EFAULT); | ||
842 | |||
843 | /* input parameter is passed in resblks field of structure */ | ||
844 | in = inout.resblks; | ||
845 | error = xfs_reserve_blocks(mp, &in, &inout); | ||
846 | if (error) | ||
847 | return -error; | ||
848 | |||
849 | if (copy_to_user(arg, &inout, sizeof(inout))) | ||
850 | return -XFS_ERROR(EFAULT); | ||
851 | return 0; | ||
852 | } | ||
853 | |||
854 | case XFS_IOC_GET_RESBLKS: { | ||
855 | xfs_fsop_resblks_t out; | ||
856 | |||
857 | if (!capable(CAP_SYS_ADMIN)) | ||
858 | return -EPERM; | ||
859 | |||
860 | error = xfs_reserve_blocks(mp, NULL, &out); | ||
861 | if (error) | ||
862 | return -error; | ||
863 | |||
864 | if (copy_to_user(arg, &out, sizeof(out))) | ||
865 | return -XFS_ERROR(EFAULT); | ||
866 | |||
867 | return 0; | ||
868 | } | ||
869 | |||
870 | case XFS_IOC_FSGROWFSDATA: { | ||
871 | xfs_growfs_data_t in; | ||
872 | |||
873 | if (!capable(CAP_SYS_ADMIN)) | ||
874 | return -EPERM; | ||
875 | |||
876 | if (copy_from_user(&in, arg, sizeof(in))) | ||
877 | return -XFS_ERROR(EFAULT); | ||
878 | |||
879 | error = xfs_growfs_data(mp, &in); | ||
880 | return -error; | ||
881 | } | ||
882 | |||
883 | case XFS_IOC_FSGROWFSLOG: { | ||
884 | xfs_growfs_log_t in; | ||
885 | |||
886 | if (!capable(CAP_SYS_ADMIN)) | ||
887 | return -EPERM; | ||
888 | |||
889 | if (copy_from_user(&in, arg, sizeof(in))) | ||
890 | return -XFS_ERROR(EFAULT); | ||
891 | |||
892 | error = xfs_growfs_log(mp, &in); | ||
893 | return -error; | ||
894 | } | ||
895 | |||
896 | case XFS_IOC_FSGROWFSRT: { | ||
897 | xfs_growfs_rt_t in; | ||
898 | |||
899 | if (!capable(CAP_SYS_ADMIN)) | ||
900 | return -EPERM; | ||
901 | |||
902 | if (copy_from_user(&in, arg, sizeof(in))) | ||
903 | return -XFS_ERROR(EFAULT); | ||
904 | |||
905 | error = xfs_growfs_rt(mp, &in); | ||
906 | return -error; | ||
907 | } | ||
908 | |||
909 | case XFS_IOC_FREEZE: | ||
910 | if (!capable(CAP_SYS_ADMIN)) | ||
911 | return -EPERM; | ||
912 | |||
913 | if (inode->i_sb->s_frozen == SB_UNFROZEN) | ||
914 | freeze_bdev(inode->i_sb->s_bdev); | ||
915 | return 0; | ||
916 | |||
917 | case XFS_IOC_THAW: | ||
918 | if (!capable(CAP_SYS_ADMIN)) | ||
919 | return -EPERM; | ||
920 | if (inode->i_sb->s_frozen != SB_UNFROZEN) | ||
921 | thaw_bdev(inode->i_sb->s_bdev, inode->i_sb); | ||
922 | return 0; | ||
923 | |||
924 | case XFS_IOC_GOINGDOWN: { | ||
925 | __uint32_t in; | ||
926 | |||
927 | if (!capable(CAP_SYS_ADMIN)) | ||
928 | return -EPERM; | ||
929 | |||
930 | if (get_user(in, (__uint32_t __user *)arg)) | ||
931 | return -XFS_ERROR(EFAULT); | ||
932 | |||
933 | error = xfs_fs_goingdown(mp, in); | ||
934 | return -error; | ||
935 | } | ||
936 | |||
937 | case XFS_IOC_ERROR_INJECTION: { | ||
938 | xfs_error_injection_t in; | ||
939 | |||
940 | if (!capable(CAP_SYS_ADMIN)) | ||
941 | return -EPERM; | ||
942 | |||
943 | if (copy_from_user(&in, arg, sizeof(in))) | ||
944 | return -XFS_ERROR(EFAULT); | ||
945 | |||
946 | error = xfs_errortag_add(in.errtag, mp); | ||
947 | return -error; | ||
948 | } | ||
949 | |||
950 | case XFS_IOC_ERROR_CLEARALL: | ||
951 | if (!capable(CAP_SYS_ADMIN)) | ||
952 | return -EPERM; | ||
953 | |||
954 | error = xfs_errortag_clearall(mp, 1); | ||
955 | return -error; | ||
956 | |||
957 | default: | ||
958 | return -ENOTTY; | ||
959 | } | ||
960 | } | ||
961 | |||
962 | STATIC int | 659 | STATIC int |
963 | xfs_ioc_space( | 660 | xfs_ioc_space( |
964 | struct xfs_inode *ip, | 661 | struct xfs_inode *ip, |
@@ -1179,85 +876,85 @@ xfs_ioc_fsgetxattr( | |||
1179 | } | 876 | } |
1180 | 877 | ||
1181 | STATIC int | 878 | STATIC int |
1182 | xfs_ioc_xattr( | 879 | xfs_ioc_fssetxattr( |
1183 | xfs_inode_t *ip, | 880 | xfs_inode_t *ip, |
1184 | struct file *filp, | 881 | struct file *filp, |
1185 | unsigned int cmd, | ||
1186 | void __user *arg) | 882 | void __user *arg) |
1187 | { | 883 | { |
1188 | struct fsxattr fa; | 884 | struct fsxattr fa; |
1189 | struct bhv_vattr *vattr; | 885 | struct bhv_vattr *vattr; |
1190 | int error = 0; | 886 | int error; |
1191 | int attr_flags; | 887 | int attr_flags; |
1192 | unsigned int flags; | 888 | |
889 | if (copy_from_user(&fa, arg, sizeof(fa))) | ||
890 | return -EFAULT; | ||
1193 | 891 | ||
1194 | vattr = kmalloc(sizeof(*vattr), GFP_KERNEL); | 892 | vattr = kmalloc(sizeof(*vattr), GFP_KERNEL); |
1195 | if (unlikely(!vattr)) | 893 | if (unlikely(!vattr)) |
1196 | return -ENOMEM; | 894 | return -ENOMEM; |
1197 | 895 | ||
1198 | switch (cmd) { | 896 | attr_flags = 0; |
1199 | case XFS_IOC_FSSETXATTR: { | 897 | if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) |
1200 | if (copy_from_user(&fa, arg, sizeof(fa))) { | 898 | attr_flags |= ATTR_NONBLOCK; |
1201 | error = -EFAULT; | ||
1202 | break; | ||
1203 | } | ||
1204 | 899 | ||
1205 | attr_flags = 0; | 900 | vattr->va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | XFS_AT_PROJID; |
1206 | if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) | 901 | vattr->va_xflags = fa.fsx_xflags; |
1207 | attr_flags |= ATTR_NONBLOCK; | 902 | vattr->va_extsize = fa.fsx_extsize; |
903 | vattr->va_projid = fa.fsx_projid; | ||
1208 | 904 | ||
1209 | vattr->va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | XFS_AT_PROJID; | 905 | error = -xfs_setattr(ip, vattr, attr_flags, NULL); |
1210 | vattr->va_xflags = fa.fsx_xflags; | 906 | if (!error) |
1211 | vattr->va_extsize = fa.fsx_extsize; | 907 | vn_revalidate(XFS_ITOV(ip)); /* update flags */ |
1212 | vattr->va_projid = fa.fsx_projid; | 908 | kfree(vattr); |
909 | return 0; | ||
910 | } | ||
1213 | 911 | ||
1214 | error = xfs_setattr(ip, vattr, attr_flags, NULL); | 912 | STATIC int |
1215 | if (likely(!error)) | 913 | xfs_ioc_getxflags( |
1216 | vn_revalidate(XFS_ITOV(ip)); /* update flags */ | 914 | xfs_inode_t *ip, |
1217 | error = -error; | 915 | void __user *arg) |
1218 | break; | 916 | { |
1219 | } | 917 | unsigned int flags; |
1220 | 918 | ||
1221 | case XFS_IOC_GETXFLAGS: { | 919 | flags = xfs_di2lxflags(ip->i_d.di_flags); |
1222 | flags = xfs_di2lxflags(ip->i_d.di_flags); | 920 | if (copy_to_user(arg, &flags, sizeof(flags))) |
1223 | if (copy_to_user(arg, &flags, sizeof(flags))) | 921 | return -EFAULT; |
1224 | error = -EFAULT; | 922 | return 0; |
1225 | break; | 923 | } |
1226 | } | ||
1227 | 924 | ||
1228 | case XFS_IOC_SETXFLAGS: { | 925 | STATIC int |
1229 | if (copy_from_user(&flags, arg, sizeof(flags))) { | 926 | xfs_ioc_setxflags( |
1230 | error = -EFAULT; | 927 | xfs_inode_t *ip, |
1231 | break; | 928 | struct file *filp, |
1232 | } | 929 | void __user *arg) |
930 | { | ||
931 | struct bhv_vattr *vattr; | ||
932 | unsigned int flags; | ||
933 | int attr_flags; | ||
934 | int error; | ||
1233 | 935 | ||
1234 | if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ | 936 | if (copy_from_user(&flags, arg, sizeof(flags))) |
1235 | FS_NOATIME_FL | FS_NODUMP_FL | \ | 937 | return -EFAULT; |
1236 | FS_SYNC_FL)) { | ||
1237 | error = -EOPNOTSUPP; | ||
1238 | break; | ||
1239 | } | ||
1240 | 938 | ||
1241 | attr_flags = 0; | 939 | if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ |
1242 | if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) | 940 | FS_NOATIME_FL | FS_NODUMP_FL | \ |
1243 | attr_flags |= ATTR_NONBLOCK; | 941 | FS_SYNC_FL)) |
942 | return -EOPNOTSUPP; | ||
1244 | 943 | ||
1245 | vattr->va_mask = XFS_AT_XFLAGS; | 944 | vattr = kmalloc(sizeof(*vattr), GFP_KERNEL); |
1246 | vattr->va_xflags = xfs_merge_ioc_xflags(flags, | 945 | if (unlikely(!vattr)) |
1247 | xfs_ip2xflags(ip)); | 946 | return -ENOMEM; |
1248 | 947 | ||
1249 | error = xfs_setattr(ip, vattr, attr_flags, NULL); | 948 | attr_flags = 0; |
1250 | if (likely(!error)) | 949 | if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) |
1251 | vn_revalidate(XFS_ITOV(ip)); /* update flags */ | 950 | attr_flags |= ATTR_NONBLOCK; |
1252 | error = -error; | ||
1253 | break; | ||
1254 | } | ||
1255 | 951 | ||
1256 | default: | 952 | vattr->va_mask = XFS_AT_XFLAGS; |
1257 | error = -ENOTTY; | 953 | vattr->va_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip)); |
1258 | break; | ||
1259 | } | ||
1260 | 954 | ||
955 | error = -xfs_setattr(ip, vattr, attr_flags, NULL); | ||
956 | if (likely(!error)) | ||
957 | vn_revalidate(XFS_ITOV(ip)); /* update flags */ | ||
1261 | kfree(vattr); | 958 | kfree(vattr); |
1262 | return error; | 959 | return error; |
1263 | } | 960 | } |
@@ -1332,3 +1029,259 @@ xfs_ioc_getbmapx( | |||
1332 | 1029 | ||
1333 | return 0; | 1030 | return 0; |
1334 | } | 1031 | } |
1032 | |||
1033 | int | ||
1034 | xfs_ioctl( | ||
1035 | xfs_inode_t *ip, | ||
1036 | struct file *filp, | ||
1037 | int ioflags, | ||
1038 | unsigned int cmd, | ||
1039 | void __user *arg) | ||
1040 | { | ||
1041 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
1042 | xfs_mount_t *mp = ip->i_mount; | ||
1043 | int error; | ||
1044 | |||
1045 | xfs_itrace_entry(XFS_I(inode)); | ||
1046 | switch (cmd) { | ||
1047 | |||
1048 | case XFS_IOC_ALLOCSP: | ||
1049 | case XFS_IOC_FREESP: | ||
1050 | case XFS_IOC_RESVSP: | ||
1051 | case XFS_IOC_UNRESVSP: | ||
1052 | case XFS_IOC_ALLOCSP64: | ||
1053 | case XFS_IOC_FREESP64: | ||
1054 | case XFS_IOC_RESVSP64: | ||
1055 | case XFS_IOC_UNRESVSP64: | ||
1056 | /* | ||
1057 | * Only allow the sys admin to reserve space unless | ||
1058 | * unwritten extents are enabled. | ||
1059 | */ | ||
1060 | if (!xfs_sb_version_hasextflgbit(&mp->m_sb) && | ||
1061 | !capable(CAP_SYS_ADMIN)) | ||
1062 | return -EPERM; | ||
1063 | |||
1064 | return xfs_ioc_space(ip, inode, filp, ioflags, cmd, arg); | ||
1065 | |||
1066 | case XFS_IOC_DIOINFO: { | ||
1067 | struct dioattr da; | ||
1068 | xfs_buftarg_t *target = | ||
1069 | XFS_IS_REALTIME_INODE(ip) ? | ||
1070 | mp->m_rtdev_targp : mp->m_ddev_targp; | ||
1071 | |||
1072 | da.d_mem = da.d_miniosz = 1 << target->bt_sshift; | ||
1073 | da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1); | ||
1074 | |||
1075 | if (copy_to_user(arg, &da, sizeof(da))) | ||
1076 | return -XFS_ERROR(EFAULT); | ||
1077 | return 0; | ||
1078 | } | ||
1079 | |||
1080 | case XFS_IOC_FSBULKSTAT_SINGLE: | ||
1081 | case XFS_IOC_FSBULKSTAT: | ||
1082 | case XFS_IOC_FSINUMBERS: | ||
1083 | return xfs_ioc_bulkstat(mp, cmd, arg); | ||
1084 | |||
1085 | case XFS_IOC_FSGEOMETRY_V1: | ||
1086 | return xfs_ioc_fsgeometry_v1(mp, arg); | ||
1087 | |||
1088 | case XFS_IOC_FSGEOMETRY: | ||
1089 | return xfs_ioc_fsgeometry(mp, arg); | ||
1090 | |||
1091 | case XFS_IOC_GETVERSION: | ||
1092 | return put_user(inode->i_generation, (int __user *)arg); | ||
1093 | |||
1094 | case XFS_IOC_FSGETXATTR: | ||
1095 | return xfs_ioc_fsgetxattr(ip, 0, arg); | ||
1096 | case XFS_IOC_FSGETXATTRA: | ||
1097 | return xfs_ioc_fsgetxattr(ip, 1, arg); | ||
1098 | case XFS_IOC_FSSETXATTR: | ||
1099 | return xfs_ioc_fssetxattr(ip, filp, arg); | ||
1100 | case XFS_IOC_GETXFLAGS: | ||
1101 | return xfs_ioc_getxflags(ip, arg); | ||
1102 | case XFS_IOC_SETXFLAGS: | ||
1103 | return xfs_ioc_setxflags(ip, filp, arg); | ||
1104 | |||
1105 | case XFS_IOC_FSSETDM: { | ||
1106 | struct fsdmidata dmi; | ||
1107 | |||
1108 | if (copy_from_user(&dmi, arg, sizeof(dmi))) | ||
1109 | return -XFS_ERROR(EFAULT); | ||
1110 | |||
1111 | error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask, | ||
1112 | dmi.fsd_dmstate); | ||
1113 | return -error; | ||
1114 | } | ||
1115 | |||
1116 | case XFS_IOC_GETBMAP: | ||
1117 | case XFS_IOC_GETBMAPA: | ||
1118 | return xfs_ioc_getbmap(ip, ioflags, cmd, arg); | ||
1119 | |||
1120 | case XFS_IOC_GETBMAPX: | ||
1121 | return xfs_ioc_getbmapx(ip, arg); | ||
1122 | |||
1123 | case XFS_IOC_FD_TO_HANDLE: | ||
1124 | case XFS_IOC_PATH_TO_HANDLE: | ||
1125 | case XFS_IOC_PATH_TO_FSHANDLE: | ||
1126 | return xfs_find_handle(cmd, arg); | ||
1127 | |||
1128 | case XFS_IOC_OPEN_BY_HANDLE: | ||
1129 | return xfs_open_by_handle(mp, arg, filp, inode); | ||
1130 | |||
1131 | case XFS_IOC_FSSETDM_BY_HANDLE: | ||
1132 | return xfs_fssetdm_by_handle(mp, arg, inode); | ||
1133 | |||
1134 | case XFS_IOC_READLINK_BY_HANDLE: | ||
1135 | return xfs_readlink_by_handle(mp, arg, inode); | ||
1136 | |||
1137 | case XFS_IOC_ATTRLIST_BY_HANDLE: | ||
1138 | return xfs_attrlist_by_handle(mp, arg, inode); | ||
1139 | |||
1140 | case XFS_IOC_ATTRMULTI_BY_HANDLE: | ||
1141 | return xfs_attrmulti_by_handle(mp, arg, filp, inode); | ||
1142 | |||
1143 | case XFS_IOC_SWAPEXT: { | ||
1144 | error = xfs_swapext((struct xfs_swapext __user *)arg); | ||
1145 | return -error; | ||
1146 | } | ||
1147 | |||
1148 | case XFS_IOC_FSCOUNTS: { | ||
1149 | xfs_fsop_counts_t out; | ||
1150 | |||
1151 | error = xfs_fs_counts(mp, &out); | ||
1152 | if (error) | ||
1153 | return -error; | ||
1154 | |||
1155 | if (copy_to_user(arg, &out, sizeof(out))) | ||
1156 | return -XFS_ERROR(EFAULT); | ||
1157 | return 0; | ||
1158 | } | ||
1159 | |||
1160 | case XFS_IOC_SET_RESBLKS: { | ||
1161 | xfs_fsop_resblks_t inout; | ||
1162 | __uint64_t in; | ||
1163 | |||
1164 | if (!capable(CAP_SYS_ADMIN)) | ||
1165 | return -EPERM; | ||
1166 | |||
1167 | if (copy_from_user(&inout, arg, sizeof(inout))) | ||
1168 | return -XFS_ERROR(EFAULT); | ||
1169 | |||
1170 | /* input parameter is passed in resblks field of structure */ | ||
1171 | in = inout.resblks; | ||
1172 | error = xfs_reserve_blocks(mp, &in, &inout); | ||
1173 | if (error) | ||
1174 | return -error; | ||
1175 | |||
1176 | if (copy_to_user(arg, &inout, sizeof(inout))) | ||
1177 | return -XFS_ERROR(EFAULT); | ||
1178 | return 0; | ||
1179 | } | ||
1180 | |||
1181 | case XFS_IOC_GET_RESBLKS: { | ||
1182 | xfs_fsop_resblks_t out; | ||
1183 | |||
1184 | if (!capable(CAP_SYS_ADMIN)) | ||
1185 | return -EPERM; | ||
1186 | |||
1187 | error = xfs_reserve_blocks(mp, NULL, &out); | ||
1188 | if (error) | ||
1189 | return -error; | ||
1190 | |||
1191 | if (copy_to_user(arg, &out, sizeof(out))) | ||
1192 | return -XFS_ERROR(EFAULT); | ||
1193 | |||
1194 | return 0; | ||
1195 | } | ||
1196 | |||
1197 | case XFS_IOC_FSGROWFSDATA: { | ||
1198 | xfs_growfs_data_t in; | ||
1199 | |||
1200 | if (!capable(CAP_SYS_ADMIN)) | ||
1201 | return -EPERM; | ||
1202 | |||
1203 | if (copy_from_user(&in, arg, sizeof(in))) | ||
1204 | return -XFS_ERROR(EFAULT); | ||
1205 | |||
1206 | error = xfs_growfs_data(mp, &in); | ||
1207 | return -error; | ||
1208 | } | ||
1209 | |||
1210 | case XFS_IOC_FSGROWFSLOG: { | ||
1211 | xfs_growfs_log_t in; | ||
1212 | |||
1213 | if (!capable(CAP_SYS_ADMIN)) | ||
1214 | return -EPERM; | ||
1215 | |||
1216 | if (copy_from_user(&in, arg, sizeof(in))) | ||
1217 | return -XFS_ERROR(EFAULT); | ||
1218 | |||
1219 | error = xfs_growfs_log(mp, &in); | ||
1220 | return -error; | ||
1221 | } | ||
1222 | |||
1223 | case XFS_IOC_FSGROWFSRT: { | ||
1224 | xfs_growfs_rt_t in; | ||
1225 | |||
1226 | if (!capable(CAP_SYS_ADMIN)) | ||
1227 | return -EPERM; | ||
1228 | |||
1229 | if (copy_from_user(&in, arg, sizeof(in))) | ||
1230 | return -XFS_ERROR(EFAULT); | ||
1231 | |||
1232 | error = xfs_growfs_rt(mp, &in); | ||
1233 | return -error; | ||
1234 | } | ||
1235 | |||
1236 | case XFS_IOC_FREEZE: | ||
1237 | if (!capable(CAP_SYS_ADMIN)) | ||
1238 | return -EPERM; | ||
1239 | |||
1240 | if (inode->i_sb->s_frozen == SB_UNFROZEN) | ||
1241 | freeze_bdev(inode->i_sb->s_bdev); | ||
1242 | return 0; | ||
1243 | |||
1244 | case XFS_IOC_THAW: | ||
1245 | if (!capable(CAP_SYS_ADMIN)) | ||
1246 | return -EPERM; | ||
1247 | if (inode->i_sb->s_frozen != SB_UNFROZEN) | ||
1248 | thaw_bdev(inode->i_sb->s_bdev, inode->i_sb); | ||
1249 | return 0; | ||
1250 | |||
1251 | case XFS_IOC_GOINGDOWN: { | ||
1252 | __uint32_t in; | ||
1253 | |||
1254 | if (!capable(CAP_SYS_ADMIN)) | ||
1255 | return -EPERM; | ||
1256 | |||
1257 | if (get_user(in, (__uint32_t __user *)arg)) | ||
1258 | return -XFS_ERROR(EFAULT); | ||
1259 | |||
1260 | error = xfs_fs_goingdown(mp, in); | ||
1261 | return -error; | ||
1262 | } | ||
1263 | |||
1264 | case XFS_IOC_ERROR_INJECTION: { | ||
1265 | xfs_error_injection_t in; | ||
1266 | |||
1267 | if (!capable(CAP_SYS_ADMIN)) | ||
1268 | return -EPERM; | ||
1269 | |||
1270 | if (copy_from_user(&in, arg, sizeof(in))) | ||
1271 | return -XFS_ERROR(EFAULT); | ||
1272 | |||
1273 | error = xfs_errortag_add(in.errtag, mp); | ||
1274 | return -error; | ||
1275 | } | ||
1276 | |||
1277 | case XFS_IOC_ERROR_CLEARALL: | ||
1278 | if (!capable(CAP_SYS_ADMIN)) | ||
1279 | return -EPERM; | ||
1280 | |||
1281 | error = xfs_errortag_clearall(mp, 1); | ||
1282 | return -error; | ||
1283 | |||
1284 | default: | ||
1285 | return -ENOTTY; | ||
1286 | } | ||
1287 | } | ||
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index cc4abd3daa49..a1237dad6430 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c | |||
@@ -62,12 +62,11 @@ void | |||
62 | xfs_synchronize_atime( | 62 | xfs_synchronize_atime( |
63 | xfs_inode_t *ip) | 63 | xfs_inode_t *ip) |
64 | { | 64 | { |
65 | bhv_vnode_t *vp; | 65 | struct inode *inode = ip->i_vnode; |
66 | 66 | ||
67 | vp = XFS_ITOV_NULL(ip); | 67 | if (inode) { |
68 | if (vp) { | 68 | ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; |
69 | ip->i_d.di_atime.t_sec = (__int32_t)vp->i_atime.tv_sec; | 69 | ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec; |
70 | ip->i_d.di_atime.t_nsec = (__int32_t)vp->i_atime.tv_nsec; | ||
71 | } | 70 | } |
72 | } | 71 | } |
73 | 72 | ||
@@ -80,11 +79,10 @@ void | |||
80 | xfs_mark_inode_dirty_sync( | 79 | xfs_mark_inode_dirty_sync( |
81 | xfs_inode_t *ip) | 80 | xfs_inode_t *ip) |
82 | { | 81 | { |
83 | bhv_vnode_t *vp; | 82 | struct inode *inode = ip->i_vnode; |
84 | 83 | ||
85 | vp = XFS_ITOV_NULL(ip); | 84 | if (inode) |
86 | if (vp) | 85 | mark_inode_dirty_sync(inode); |
87 | mark_inode_dirty_sync(vn_to_inode(vp)); | ||
88 | } | 86 | } |
89 | 87 | ||
90 | /* | 88 | /* |
@@ -157,13 +155,6 @@ xfs_ichgtime_fast( | |||
157 | */ | 155 | */ |
158 | ASSERT((flags & XFS_ICHGTIME_ACC) == 0); | 156 | ASSERT((flags & XFS_ICHGTIME_ACC) == 0); |
159 | 157 | ||
160 | /* | ||
161 | * We're not supposed to change timestamps in readonly-mounted | ||
162 | * filesystems. Throw it away if anyone asks us. | ||
163 | */ | ||
164 | if (unlikely(IS_RDONLY(inode))) | ||
165 | return; | ||
166 | |||
167 | if (flags & XFS_ICHGTIME_MOD) { | 158 | if (flags & XFS_ICHGTIME_MOD) { |
168 | tvp = &inode->i_mtime; | 159 | tvp = &inode->i_mtime; |
169 | ip->i_d.di_mtime.t_sec = (__int32_t)tvp->tv_sec; | 160 | ip->i_d.di_mtime.t_sec = (__int32_t)tvp->tv_sec; |
@@ -215,66 +206,62 @@ xfs_validate_fields( | |||
215 | */ | 206 | */ |
216 | STATIC int | 207 | STATIC int |
217 | xfs_init_security( | 208 | xfs_init_security( |
218 | bhv_vnode_t *vp, | 209 | struct inode *inode, |
219 | struct inode *dir) | 210 | struct inode *dir) |
220 | { | 211 | { |
221 | struct inode *ip = vn_to_inode(vp); | 212 | struct xfs_inode *ip = XFS_I(inode); |
222 | size_t length; | 213 | size_t length; |
223 | void *value; | 214 | void *value; |
224 | char *name; | 215 | char *name; |
225 | int error; | 216 | int error; |
226 | 217 | ||
227 | error = security_inode_init_security(ip, dir, &name, &value, &length); | 218 | error = security_inode_init_security(inode, dir, &name, |
219 | &value, &length); | ||
228 | if (error) { | 220 | if (error) { |
229 | if (error == -EOPNOTSUPP) | 221 | if (error == -EOPNOTSUPP) |
230 | return 0; | 222 | return 0; |
231 | return -error; | 223 | return -error; |
232 | } | 224 | } |
233 | 225 | ||
234 | error = xfs_attr_set(XFS_I(ip), name, value, | 226 | error = xfs_attr_set(ip, name, value, length, ATTR_SECURE); |
235 | length, ATTR_SECURE); | ||
236 | if (!error) | 227 | if (!error) |
237 | xfs_iflags_set(XFS_I(ip), XFS_IMODIFIED); | 228 | xfs_iflags_set(ip, XFS_IMODIFIED); |
238 | 229 | ||
239 | kfree(name); | 230 | kfree(name); |
240 | kfree(value); | 231 | kfree(value); |
241 | return error; | 232 | return error; |
242 | } | 233 | } |
243 | 234 | ||
244 | /* | 235 | static void |
245 | * Determine whether a process has a valid fs_struct (kernel daemons | 236 | xfs_dentry_to_name( |
246 | * like knfsd don't have an fs_struct). | 237 | struct xfs_name *namep, |
247 | * | 238 | struct dentry *dentry) |
248 | * XXX(hch): nfsd is broken, better fix it instead. | ||
249 | */ | ||
250 | STATIC_INLINE int | ||
251 | xfs_has_fs_struct(struct task_struct *task) | ||
252 | { | 239 | { |
253 | return (task->fs != init_task.fs); | 240 | namep->name = dentry->d_name.name; |
241 | namep->len = dentry->d_name.len; | ||
254 | } | 242 | } |
255 | 243 | ||
256 | STATIC void | 244 | STATIC void |
257 | xfs_cleanup_inode( | 245 | xfs_cleanup_inode( |
258 | struct inode *dir, | 246 | struct inode *dir, |
259 | bhv_vnode_t *vp, | 247 | struct inode *inode, |
260 | struct dentry *dentry, | 248 | struct dentry *dentry, |
261 | int mode) | 249 | int mode) |
262 | { | 250 | { |
263 | struct dentry teardown = {}; | 251 | struct xfs_name teardown; |
264 | 252 | ||
265 | /* Oh, the horror. | 253 | /* Oh, the horror. |
266 | * If we can't add the ACL or we fail in | 254 | * If we can't add the ACL or we fail in |
267 | * xfs_init_security we must back out. | 255 | * xfs_init_security we must back out. |
268 | * ENOSPC can hit here, among other things. | 256 | * ENOSPC can hit here, among other things. |
269 | */ | 257 | */ |
270 | teardown.d_inode = vn_to_inode(vp); | 258 | xfs_dentry_to_name(&teardown, dentry); |
271 | teardown.d_name = dentry->d_name; | ||
272 | 259 | ||
273 | if (S_ISDIR(mode)) | 260 | if (S_ISDIR(mode)) |
274 | xfs_rmdir(XFS_I(dir), &teardown); | 261 | xfs_rmdir(XFS_I(dir), &teardown, XFS_I(inode)); |
275 | else | 262 | else |
276 | xfs_remove(XFS_I(dir), &teardown); | 263 | xfs_remove(XFS_I(dir), &teardown, XFS_I(inode)); |
277 | VN_RELE(vp); | 264 | iput(inode); |
278 | } | 265 | } |
279 | 266 | ||
280 | STATIC int | 267 | STATIC int |
@@ -284,9 +271,10 @@ xfs_vn_mknod( | |||
284 | int mode, | 271 | int mode, |
285 | dev_t rdev) | 272 | dev_t rdev) |
286 | { | 273 | { |
287 | struct inode *ip; | 274 | struct inode *inode; |
288 | bhv_vnode_t *vp = NULL, *dvp = vn_from_inode(dir); | 275 | struct xfs_inode *ip = NULL; |
289 | xfs_acl_t *default_acl = NULL; | 276 | xfs_acl_t *default_acl = NULL; |
277 | struct xfs_name name; | ||
290 | attrexists_t test_default_acl = _ACL_DEFAULT_EXISTS; | 278 | attrexists_t test_default_acl = _ACL_DEFAULT_EXISTS; |
291 | int error; | 279 | int error; |
292 | 280 | ||
@@ -297,59 +285,67 @@ xfs_vn_mknod( | |||
297 | if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff)) | 285 | if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff)) |
298 | return -EINVAL; | 286 | return -EINVAL; |
299 | 287 | ||
300 | if (unlikely(test_default_acl && test_default_acl(dvp))) { | 288 | if (test_default_acl && test_default_acl(dir)) { |
301 | if (!_ACL_ALLOC(default_acl)) { | 289 | if (!_ACL_ALLOC(default_acl)) { |
302 | return -ENOMEM; | 290 | return -ENOMEM; |
303 | } | 291 | } |
304 | if (!_ACL_GET_DEFAULT(dvp, default_acl)) { | 292 | if (!_ACL_GET_DEFAULT(dir, default_acl)) { |
305 | _ACL_FREE(default_acl); | 293 | _ACL_FREE(default_acl); |
306 | default_acl = NULL; | 294 | default_acl = NULL; |
307 | } | 295 | } |
308 | } | 296 | } |
309 | 297 | ||
310 | if (IS_POSIXACL(dir) && !default_acl && xfs_has_fs_struct(current)) | 298 | xfs_dentry_to_name(&name, dentry); |
299 | |||
300 | if (IS_POSIXACL(dir) && !default_acl) | ||
311 | mode &= ~current->fs->umask; | 301 | mode &= ~current->fs->umask; |
312 | 302 | ||
313 | switch (mode & S_IFMT) { | 303 | switch (mode & S_IFMT) { |
314 | case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: | 304 | case S_IFCHR: |
305 | case S_IFBLK: | ||
306 | case S_IFIFO: | ||
307 | case S_IFSOCK: | ||
315 | rdev = sysv_encode_dev(rdev); | 308 | rdev = sysv_encode_dev(rdev); |
316 | case S_IFREG: | 309 | case S_IFREG: |
317 | error = xfs_create(XFS_I(dir), dentry, mode, rdev, &vp, NULL); | 310 | error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip, NULL); |
318 | break; | 311 | break; |
319 | case S_IFDIR: | 312 | case S_IFDIR: |
320 | error = xfs_mkdir(XFS_I(dir), dentry, mode, &vp, NULL); | 313 | error = xfs_mkdir(XFS_I(dir), &name, mode, &ip, NULL); |
321 | break; | 314 | break; |
322 | default: | 315 | default: |
323 | error = EINVAL; | 316 | error = EINVAL; |
324 | break; | 317 | break; |
325 | } | 318 | } |
326 | 319 | ||
327 | if (unlikely(!error)) { | 320 | if (unlikely(error)) |
328 | error = xfs_init_security(vp, dir); | 321 | goto out_free_acl; |
329 | if (error) | ||
330 | xfs_cleanup_inode(dir, vp, dentry, mode); | ||
331 | } | ||
332 | 322 | ||
333 | if (unlikely(default_acl)) { | 323 | inode = ip->i_vnode; |
334 | if (!error) { | 324 | |
335 | error = _ACL_INHERIT(vp, mode, default_acl); | 325 | error = xfs_init_security(inode, dir); |
336 | if (!error) | 326 | if (unlikely(error)) |
337 | xfs_iflags_set(XFS_I(vp), XFS_IMODIFIED); | 327 | goto out_cleanup_inode; |
338 | else | 328 | |
339 | xfs_cleanup_inode(dir, vp, dentry, mode); | 329 | if (default_acl) { |
340 | } | 330 | error = _ACL_INHERIT(inode, mode, default_acl); |
331 | if (unlikely(error)) | ||
332 | goto out_cleanup_inode; | ||
333 | xfs_iflags_set(ip, XFS_IMODIFIED); | ||
341 | _ACL_FREE(default_acl); | 334 | _ACL_FREE(default_acl); |
342 | } | 335 | } |
343 | 336 | ||
344 | if (likely(!error)) { | ||
345 | ASSERT(vp); | ||
346 | ip = vn_to_inode(vp); | ||
347 | 337 | ||
348 | if (S_ISDIR(mode)) | 338 | if (S_ISDIR(mode)) |
349 | xfs_validate_fields(ip); | 339 | xfs_validate_fields(inode); |
350 | d_instantiate(dentry, ip); | 340 | d_instantiate(dentry, inode); |
351 | xfs_validate_fields(dir); | 341 | xfs_validate_fields(dir); |
352 | } | 342 | return -error; |
343 | |||
344 | out_cleanup_inode: | ||
345 | xfs_cleanup_inode(dir, inode, dentry, mode); | ||
346 | out_free_acl: | ||
347 | if (default_acl) | ||
348 | _ACL_FREE(default_acl); | ||
353 | return -error; | 349 | return -error; |
354 | } | 350 | } |
355 | 351 | ||
@@ -378,13 +374,15 @@ xfs_vn_lookup( | |||
378 | struct dentry *dentry, | 374 | struct dentry *dentry, |
379 | struct nameidata *nd) | 375 | struct nameidata *nd) |
380 | { | 376 | { |
381 | bhv_vnode_t *cvp; | 377 | struct xfs_inode *cip; |
378 | struct xfs_name name; | ||
382 | int error; | 379 | int error; |
383 | 380 | ||
384 | if (dentry->d_name.len >= MAXNAMELEN) | 381 | if (dentry->d_name.len >= MAXNAMELEN) |
385 | return ERR_PTR(-ENAMETOOLONG); | 382 | return ERR_PTR(-ENAMETOOLONG); |
386 | 383 | ||
387 | error = xfs_lookup(XFS_I(dir), dentry, &cvp); | 384 | xfs_dentry_to_name(&name, dentry); |
385 | error = xfs_lookup(XFS_I(dir), &name, &cip); | ||
388 | if (unlikely(error)) { | 386 | if (unlikely(error)) { |
389 | if (unlikely(error != ENOENT)) | 387 | if (unlikely(error != ENOENT)) |
390 | return ERR_PTR(-error); | 388 | return ERR_PTR(-error); |
@@ -392,7 +390,7 @@ xfs_vn_lookup( | |||
392 | return NULL; | 390 | return NULL; |
393 | } | 391 | } |
394 | 392 | ||
395 | return d_splice_alias(vn_to_inode(cvp), dentry); | 393 | return d_splice_alias(cip->i_vnode, dentry); |
396 | } | 394 | } |
397 | 395 | ||
398 | STATIC int | 396 | STATIC int |
@@ -401,23 +399,24 @@ xfs_vn_link( | |||
401 | struct inode *dir, | 399 | struct inode *dir, |
402 | struct dentry *dentry) | 400 | struct dentry *dentry) |
403 | { | 401 | { |
404 | struct inode *ip; /* inode of guy being linked to */ | 402 | struct inode *inode; /* inode of guy being linked to */ |
405 | bhv_vnode_t *vp; /* vp of name being linked */ | 403 | struct xfs_name name; |
406 | int error; | 404 | int error; |
407 | 405 | ||
408 | ip = old_dentry->d_inode; /* inode being linked to */ | 406 | inode = old_dentry->d_inode; |
409 | vp = vn_from_inode(ip); | 407 | xfs_dentry_to_name(&name, dentry); |
410 | 408 | ||
411 | VN_HOLD(vp); | 409 | igrab(inode); |
412 | error = xfs_link(XFS_I(dir), vp, dentry); | 410 | error = xfs_link(XFS_I(dir), XFS_I(inode), &name); |
413 | if (unlikely(error)) { | 411 | if (unlikely(error)) { |
414 | VN_RELE(vp); | 412 | iput(inode); |
415 | } else { | 413 | return -error; |
416 | xfs_iflags_set(XFS_I(dir), XFS_IMODIFIED); | ||
417 | xfs_validate_fields(ip); | ||
418 | d_instantiate(dentry, ip); | ||
419 | } | 414 | } |
420 | return -error; | 415 | |
416 | xfs_iflags_set(XFS_I(dir), XFS_IMODIFIED); | ||
417 | xfs_validate_fields(inode); | ||
418 | d_instantiate(dentry, inode); | ||
419 | return 0; | ||
421 | } | 420 | } |
422 | 421 | ||
423 | STATIC int | 422 | STATIC int |
@@ -426,11 +425,13 @@ xfs_vn_unlink( | |||
426 | struct dentry *dentry) | 425 | struct dentry *dentry) |
427 | { | 426 | { |
428 | struct inode *inode; | 427 | struct inode *inode; |
428 | struct xfs_name name; | ||
429 | int error; | 429 | int error; |
430 | 430 | ||
431 | inode = dentry->d_inode; | 431 | inode = dentry->d_inode; |
432 | xfs_dentry_to_name(&name, dentry); | ||
432 | 433 | ||
433 | error = xfs_remove(XFS_I(dir), dentry); | 434 | error = xfs_remove(XFS_I(dir), &name, XFS_I(inode)); |
434 | if (likely(!error)) { | 435 | if (likely(!error)) { |
435 | xfs_validate_fields(dir); /* size needs update */ | 436 | xfs_validate_fields(dir); /* size needs update */ |
436 | xfs_validate_fields(inode); | 437 | xfs_validate_fields(inode); |
@@ -444,29 +445,34 @@ xfs_vn_symlink( | |||
444 | struct dentry *dentry, | 445 | struct dentry *dentry, |
445 | const char *symname) | 446 | const char *symname) |
446 | { | 447 | { |
447 | struct inode *ip; | 448 | struct inode *inode; |
448 | bhv_vnode_t *cvp; /* used to lookup symlink to put in dentry */ | 449 | struct xfs_inode *cip = NULL; |
450 | struct xfs_name name; | ||
449 | int error; | 451 | int error; |
450 | mode_t mode; | 452 | mode_t mode; |
451 | 453 | ||
452 | cvp = NULL; | ||
453 | |||
454 | mode = S_IFLNK | | 454 | mode = S_IFLNK | |
455 | (irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO); | 455 | (irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO); |
456 | xfs_dentry_to_name(&name, dentry); | ||
456 | 457 | ||
457 | error = xfs_symlink(XFS_I(dir), dentry, (char *)symname, mode, | 458 | error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip, NULL); |
458 | &cvp, NULL); | 459 | if (unlikely(error)) |
459 | if (likely(!error && cvp)) { | 460 | goto out; |
460 | error = xfs_init_security(cvp, dir); | 461 | |
461 | if (likely(!error)) { | 462 | inode = cip->i_vnode; |
462 | ip = vn_to_inode(cvp); | 463 | |
463 | d_instantiate(dentry, ip); | 464 | error = xfs_init_security(inode, dir); |
464 | xfs_validate_fields(dir); | 465 | if (unlikely(error)) |
465 | xfs_validate_fields(ip); | 466 | goto out_cleanup_inode; |
466 | } else { | 467 | |
467 | xfs_cleanup_inode(dir, cvp, dentry, 0); | 468 | d_instantiate(dentry, inode); |
468 | } | 469 | xfs_validate_fields(dir); |
469 | } | 470 | xfs_validate_fields(inode); |
471 | return 0; | ||
472 | |||
473 | out_cleanup_inode: | ||
474 | xfs_cleanup_inode(dir, inode, dentry, 0); | ||
475 | out: | ||
470 | return -error; | 476 | return -error; |
471 | } | 477 | } |
472 | 478 | ||
@@ -476,9 +482,12 @@ xfs_vn_rmdir( | |||
476 | struct dentry *dentry) | 482 | struct dentry *dentry) |
477 | { | 483 | { |
478 | struct inode *inode = dentry->d_inode; | 484 | struct inode *inode = dentry->d_inode; |
485 | struct xfs_name name; | ||
479 | int error; | 486 | int error; |
480 | 487 | ||
481 | error = xfs_rmdir(XFS_I(dir), dentry); | 488 | xfs_dentry_to_name(&name, dentry); |
489 | |||
490 | error = xfs_rmdir(XFS_I(dir), &name, XFS_I(inode)); | ||
482 | if (likely(!error)) { | 491 | if (likely(!error)) { |
483 | xfs_validate_fields(inode); | 492 | xfs_validate_fields(inode); |
484 | xfs_validate_fields(dir); | 493 | xfs_validate_fields(dir); |
@@ -494,12 +503,15 @@ xfs_vn_rename( | |||
494 | struct dentry *ndentry) | 503 | struct dentry *ndentry) |
495 | { | 504 | { |
496 | struct inode *new_inode = ndentry->d_inode; | 505 | struct inode *new_inode = ndentry->d_inode; |
497 | bhv_vnode_t *tvp; /* target directory */ | 506 | struct xfs_name oname; |
507 | struct xfs_name nname; | ||
498 | int error; | 508 | int error; |
499 | 509 | ||
500 | tvp = vn_from_inode(ndir); | 510 | xfs_dentry_to_name(&oname, odentry); |
511 | xfs_dentry_to_name(&nname, ndentry); | ||
501 | 512 | ||
502 | error = xfs_rename(XFS_I(odir), odentry, tvp, ndentry); | 513 | error = xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode), |
514 | XFS_I(ndir), &nname); | ||
503 | if (likely(!error)) { | 515 | if (likely(!error)) { |
504 | if (new_inode) | 516 | if (new_inode) |
505 | xfs_validate_fields(new_inode); | 517 | xfs_validate_fields(new_inode); |
@@ -700,11 +712,19 @@ xfs_vn_setattr( | |||
700 | return -error; | 712 | return -error; |
701 | } | 713 | } |
702 | 714 | ||
715 | /* | ||
716 | * block_truncate_page can return an error, but we can't propagate it | ||
717 | * at all here. Leave a complaint + stack trace in the syslog because | ||
718 | * this could be bad. If it is bad, we need to propagate the error further. | ||
719 | */ | ||
703 | STATIC void | 720 | STATIC void |
704 | xfs_vn_truncate( | 721 | xfs_vn_truncate( |
705 | struct inode *inode) | 722 | struct inode *inode) |
706 | { | 723 | { |
707 | block_truncate_page(inode->i_mapping, inode->i_size, xfs_get_blocks); | 724 | int error; |
725 | error = block_truncate_page(inode->i_mapping, inode->i_size, | ||
726 | xfs_get_blocks); | ||
727 | WARN_ON(error); | ||
708 | } | 728 | } |
709 | 729 | ||
710 | STATIC int | 730 | STATIC int |
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 3ca39c4e5d2a..e5143323e71f 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h | |||
@@ -99,7 +99,6 @@ | |||
99 | /* | 99 | /* |
100 | * Feature macros (disable/enable) | 100 | * Feature macros (disable/enable) |
101 | */ | 101 | */ |
102 | #undef HAVE_REFCACHE /* reference cache not needed for NFS in 2.6 */ | ||
103 | #define HAVE_SPLICE /* a splice(2) exists in 2.6, but not in 2.4 */ | 102 | #define HAVE_SPLICE /* a splice(2) exists in 2.6, but not in 2.4 */ |
104 | #ifdef CONFIG_SMP | 103 | #ifdef CONFIG_SMP |
105 | #define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ | 104 | #define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ |
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 166353388490..1ebd8004469c 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c | |||
@@ -51,6 +51,7 @@ | |||
51 | #include "xfs_vnodeops.h" | 51 | #include "xfs_vnodeops.h" |
52 | 52 | ||
53 | #include <linux/capability.h> | 53 | #include <linux/capability.h> |
54 | #include <linux/mount.h> | ||
54 | #include <linux/writeback.h> | 55 | #include <linux/writeback.h> |
55 | 56 | ||
56 | 57 | ||
@@ -176,7 +177,6 @@ xfs_read( | |||
176 | { | 177 | { |
177 | struct file *file = iocb->ki_filp; | 178 | struct file *file = iocb->ki_filp; |
178 | struct inode *inode = file->f_mapping->host; | 179 | struct inode *inode = file->f_mapping->host; |
179 | bhv_vnode_t *vp = XFS_ITOV(ip); | ||
180 | xfs_mount_t *mp = ip->i_mount; | 180 | xfs_mount_t *mp = ip->i_mount; |
181 | size_t size = 0; | 181 | size_t size = 0; |
182 | ssize_t ret = 0; | 182 | ssize_t ret = 0; |
@@ -228,11 +228,11 @@ xfs_read( | |||
228 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | 228 | xfs_ilock(ip, XFS_IOLOCK_SHARED); |
229 | 229 | ||
230 | if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { | 230 | if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { |
231 | bhv_vrwlock_t locktype = VRWLOCK_READ; | ||
232 | int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); | 231 | int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); |
232 | int iolock = XFS_IOLOCK_SHARED; | ||
233 | 233 | ||
234 | ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, vp, *offset, size, | 234 | ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *offset, size, |
235 | dmflags, &locktype); | 235 | dmflags, &iolock); |
236 | if (ret) { | 236 | if (ret) { |
237 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 237 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); |
238 | if (unlikely(ioflags & IO_ISDIRECT)) | 238 | if (unlikely(ioflags & IO_ISDIRECT)) |
@@ -242,7 +242,7 @@ xfs_read( | |||
242 | } | 242 | } |
243 | 243 | ||
244 | if (unlikely(ioflags & IO_ISDIRECT)) { | 244 | if (unlikely(ioflags & IO_ISDIRECT)) { |
245 | if (VN_CACHED(vp)) | 245 | if (inode->i_mapping->nrpages) |
246 | ret = xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK), | 246 | ret = xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK), |
247 | -1, FI_REMAPF_LOCKED); | 247 | -1, FI_REMAPF_LOCKED); |
248 | mutex_unlock(&inode->i_mutex); | 248 | mutex_unlock(&inode->i_mutex); |
@@ -276,7 +276,6 @@ xfs_splice_read( | |||
276 | int flags, | 276 | int flags, |
277 | int ioflags) | 277 | int ioflags) |
278 | { | 278 | { |
279 | bhv_vnode_t *vp = XFS_ITOV(ip); | ||
280 | xfs_mount_t *mp = ip->i_mount; | 279 | xfs_mount_t *mp = ip->i_mount; |
281 | ssize_t ret; | 280 | ssize_t ret; |
282 | 281 | ||
@@ -287,11 +286,11 @@ xfs_splice_read( | |||
287 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | 286 | xfs_ilock(ip, XFS_IOLOCK_SHARED); |
288 | 287 | ||
289 | if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { | 288 | if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { |
290 | bhv_vrwlock_t locktype = VRWLOCK_READ; | 289 | int iolock = XFS_IOLOCK_SHARED; |
291 | int error; | 290 | int error; |
292 | 291 | ||
293 | error = XFS_SEND_DATA(mp, DM_EVENT_READ, vp, *ppos, count, | 292 | error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *ppos, count, |
294 | FILP_DELAY_FLAG(infilp), &locktype); | 293 | FILP_DELAY_FLAG(infilp), &iolock); |
295 | if (error) { | 294 | if (error) { |
296 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 295 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); |
297 | return -error; | 296 | return -error; |
@@ -317,7 +316,6 @@ xfs_splice_write( | |||
317 | int flags, | 316 | int flags, |
318 | int ioflags) | 317 | int ioflags) |
319 | { | 318 | { |
320 | bhv_vnode_t *vp = XFS_ITOV(ip); | ||
321 | xfs_mount_t *mp = ip->i_mount; | 319 | xfs_mount_t *mp = ip->i_mount; |
322 | ssize_t ret; | 320 | ssize_t ret; |
323 | struct inode *inode = outfilp->f_mapping->host; | 321 | struct inode *inode = outfilp->f_mapping->host; |
@@ -330,11 +328,11 @@ xfs_splice_write( | |||
330 | xfs_ilock(ip, XFS_IOLOCK_EXCL); | 328 | xfs_ilock(ip, XFS_IOLOCK_EXCL); |
331 | 329 | ||
332 | if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) { | 330 | if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) { |
333 | bhv_vrwlock_t locktype = VRWLOCK_WRITE; | 331 | int iolock = XFS_IOLOCK_EXCL; |
334 | int error; | 332 | int error; |
335 | 333 | ||
336 | error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, vp, *ppos, count, | 334 | error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, *ppos, count, |
337 | FILP_DELAY_FLAG(outfilp), &locktype); | 335 | FILP_DELAY_FLAG(outfilp), &iolock); |
338 | if (error) { | 336 | if (error) { |
339 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | 337 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); |
340 | return -error; | 338 | return -error; |
@@ -573,14 +571,12 @@ xfs_write( | |||
573 | struct file *file = iocb->ki_filp; | 571 | struct file *file = iocb->ki_filp; |
574 | struct address_space *mapping = file->f_mapping; | 572 | struct address_space *mapping = file->f_mapping; |
575 | struct inode *inode = mapping->host; | 573 | struct inode *inode = mapping->host; |
576 | bhv_vnode_t *vp = XFS_ITOV(xip); | ||
577 | unsigned long segs = nsegs; | 574 | unsigned long segs = nsegs; |
578 | xfs_mount_t *mp; | 575 | xfs_mount_t *mp; |
579 | ssize_t ret = 0, error = 0; | 576 | ssize_t ret = 0, error = 0; |
580 | xfs_fsize_t isize, new_size; | 577 | xfs_fsize_t isize, new_size; |
581 | int iolock; | 578 | int iolock; |
582 | int eventsent = 0; | 579 | int eventsent = 0; |
583 | bhv_vrwlock_t locktype; | ||
584 | size_t ocount = 0, count; | 580 | size_t ocount = 0, count; |
585 | loff_t pos; | 581 | loff_t pos; |
586 | int need_i_mutex; | 582 | int need_i_mutex; |
@@ -607,11 +603,9 @@ xfs_write( | |||
607 | relock: | 603 | relock: |
608 | if (ioflags & IO_ISDIRECT) { | 604 | if (ioflags & IO_ISDIRECT) { |
609 | iolock = XFS_IOLOCK_SHARED; | 605 | iolock = XFS_IOLOCK_SHARED; |
610 | locktype = VRWLOCK_WRITE_DIRECT; | ||
611 | need_i_mutex = 0; | 606 | need_i_mutex = 0; |
612 | } else { | 607 | } else { |
613 | iolock = XFS_IOLOCK_EXCL; | 608 | iolock = XFS_IOLOCK_EXCL; |
614 | locktype = VRWLOCK_WRITE; | ||
615 | need_i_mutex = 1; | 609 | need_i_mutex = 1; |
616 | mutex_lock(&inode->i_mutex); | 610 | mutex_lock(&inode->i_mutex); |
617 | } | 611 | } |
@@ -634,9 +628,8 @@ start: | |||
634 | dmflags |= DM_FLAGS_IMUX; | 628 | dmflags |= DM_FLAGS_IMUX; |
635 | 629 | ||
636 | xfs_iunlock(xip, XFS_ILOCK_EXCL); | 630 | xfs_iunlock(xip, XFS_ILOCK_EXCL); |
637 | error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp, | 631 | error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, xip, |
638 | pos, count, | 632 | pos, count, dmflags, &iolock); |
639 | dmflags, &locktype); | ||
640 | if (error) { | 633 | if (error) { |
641 | goto out_unlock_internal; | 634 | goto out_unlock_internal; |
642 | } | 635 | } |
@@ -664,10 +657,9 @@ start: | |||
664 | return XFS_ERROR(-EINVAL); | 657 | return XFS_ERROR(-EINVAL); |
665 | } | 658 | } |
666 | 659 | ||
667 | if (!need_i_mutex && (VN_CACHED(vp) || pos > xip->i_size)) { | 660 | if (!need_i_mutex && (mapping->nrpages || pos > xip->i_size)) { |
668 | xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); | 661 | xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); |
669 | iolock = XFS_IOLOCK_EXCL; | 662 | iolock = XFS_IOLOCK_EXCL; |
670 | locktype = VRWLOCK_WRITE; | ||
671 | need_i_mutex = 1; | 663 | need_i_mutex = 1; |
672 | mutex_lock(&inode->i_mutex); | 664 | mutex_lock(&inode->i_mutex); |
673 | xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); | 665 | xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); |
@@ -679,10 +671,16 @@ start: | |||
679 | if (new_size > xip->i_size) | 671 | if (new_size > xip->i_size) |
680 | xip->i_new_size = new_size; | 672 | xip->i_new_size = new_size; |
681 | 673 | ||
682 | if (likely(!(ioflags & IO_INVIS))) { | 674 | /* |
675 | * We're not supposed to change timestamps in readonly-mounted | ||
676 | * filesystems. Throw it away if anyone asks us. | ||
677 | */ | ||
678 | if (likely(!(ioflags & IO_INVIS) && | ||
679 | !mnt_want_write(file->f_path.mnt))) { | ||
683 | file_update_time(file); | 680 | file_update_time(file); |
684 | xfs_ichgtime_fast(xip, inode, | 681 | xfs_ichgtime_fast(xip, inode, |
685 | XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 682 | XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
683 | mnt_drop_write(file->f_path.mnt); | ||
686 | } | 684 | } |
687 | 685 | ||
688 | /* | 686 | /* |
@@ -727,7 +725,7 @@ retry: | |||
727 | current->backing_dev_info = mapping->backing_dev_info; | 725 | current->backing_dev_info = mapping->backing_dev_info; |
728 | 726 | ||
729 | if ((ioflags & IO_ISDIRECT)) { | 727 | if ((ioflags & IO_ISDIRECT)) { |
730 | if (VN_CACHED(vp)) { | 728 | if (mapping->nrpages) { |
731 | WARN_ON(need_i_mutex == 0); | 729 | WARN_ON(need_i_mutex == 0); |
732 | xfs_inval_cached_trace(xip, pos, -1, | 730 | xfs_inval_cached_trace(xip, pos, -1, |
733 | (pos & PAGE_CACHE_MASK), -1); | 731 | (pos & PAGE_CACHE_MASK), -1); |
@@ -744,7 +742,6 @@ retry: | |||
744 | mutex_unlock(&inode->i_mutex); | 742 | mutex_unlock(&inode->i_mutex); |
745 | 743 | ||
746 | iolock = XFS_IOLOCK_SHARED; | 744 | iolock = XFS_IOLOCK_SHARED; |
747 | locktype = VRWLOCK_WRITE_DIRECT; | ||
748 | need_i_mutex = 0; | 745 | need_i_mutex = 0; |
749 | } | 746 | } |
750 | 747 | ||
@@ -781,15 +778,15 @@ retry: | |||
781 | 778 | ||
782 | if (ret == -ENOSPC && | 779 | if (ret == -ENOSPC && |
783 | DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { | 780 | DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { |
784 | xfs_rwunlock(xip, locktype); | 781 | xfs_iunlock(xip, iolock); |
785 | if (need_i_mutex) | 782 | if (need_i_mutex) |
786 | mutex_unlock(&inode->i_mutex); | 783 | mutex_unlock(&inode->i_mutex); |
787 | error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp, | 784 | error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, xip, |
788 | DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL, | 785 | DM_RIGHT_NULL, xip, DM_RIGHT_NULL, NULL, NULL, |
789 | 0, 0, 0); /* Delay flag intentionally unused */ | 786 | 0, 0, 0); /* Delay flag intentionally unused */ |
790 | if (need_i_mutex) | 787 | if (need_i_mutex) |
791 | mutex_lock(&inode->i_mutex); | 788 | mutex_lock(&inode->i_mutex); |
792 | xfs_rwlock(xip, locktype); | 789 | xfs_ilock(xip, iolock); |
793 | if (error) | 790 | if (error) |
794 | goto out_unlock_internal; | 791 | goto out_unlock_internal; |
795 | pos = xip->i_size; | 792 | pos = xip->i_size; |
@@ -817,7 +814,8 @@ retry: | |||
817 | /* Handle various SYNC-type writes */ | 814 | /* Handle various SYNC-type writes */ |
818 | if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { | 815 | if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { |
819 | int error2; | 816 | int error2; |
820 | xfs_rwunlock(xip, locktype); | 817 | |
818 | xfs_iunlock(xip, iolock); | ||
821 | if (need_i_mutex) | 819 | if (need_i_mutex) |
822 | mutex_unlock(&inode->i_mutex); | 820 | mutex_unlock(&inode->i_mutex); |
823 | error2 = sync_page_range(inode, mapping, pos, ret); | 821 | error2 = sync_page_range(inode, mapping, pos, ret); |
@@ -825,7 +823,7 @@ retry: | |||
825 | error = error2; | 823 | error = error2; |
826 | if (need_i_mutex) | 824 | if (need_i_mutex) |
827 | mutex_lock(&inode->i_mutex); | 825 | mutex_lock(&inode->i_mutex); |
828 | xfs_rwlock(xip, locktype); | 826 | xfs_ilock(xip, iolock); |
829 | error2 = xfs_write_sync_logforce(mp, xip); | 827 | error2 = xfs_write_sync_logforce(mp, xip); |
830 | if (!error) | 828 | if (!error) |
831 | error = error2; | 829 | error = error2; |
@@ -846,7 +844,7 @@ retry: | |||
846 | xip->i_d.di_size = xip->i_size; | 844 | xip->i_d.di_size = xip->i_size; |
847 | xfs_iunlock(xip, XFS_ILOCK_EXCL); | 845 | xfs_iunlock(xip, XFS_ILOCK_EXCL); |
848 | } | 846 | } |
849 | xfs_rwunlock(xip, locktype); | 847 | xfs_iunlock(xip, iolock); |
850 | out_unlock_mutex: | 848 | out_unlock_mutex: |
851 | if (need_i_mutex) | 849 | if (need_i_mutex) |
852 | mutex_unlock(&inode->i_mutex); | 850 | mutex_unlock(&inode->i_mutex); |
@@ -884,28 +882,23 @@ xfs_bdstrat_cb(struct xfs_buf *bp) | |||
884 | } | 882 | } |
885 | 883 | ||
886 | /* | 884 | /* |
887 | * Wrapper around bdstrat so that we can stop data | 885 | * Wrapper around bdstrat so that we can stop data from going to disk in case |
888 | * from going to disk in case we are shutting down the filesystem. | 886 | * we are shutting down the filesystem. Typically user data goes thru this |
889 | * Typically user data goes thru this path; one of the exceptions | 887 | * path; one of the exceptions is the superblock. |
890 | * is the superblock. | ||
891 | */ | 888 | */ |
892 | int | 889 | void |
893 | xfsbdstrat( | 890 | xfsbdstrat( |
894 | struct xfs_mount *mp, | 891 | struct xfs_mount *mp, |
895 | struct xfs_buf *bp) | 892 | struct xfs_buf *bp) |
896 | { | 893 | { |
897 | ASSERT(mp); | 894 | ASSERT(mp); |
898 | if (!XFS_FORCED_SHUTDOWN(mp)) { | 895 | if (!XFS_FORCED_SHUTDOWN(mp)) { |
899 | /* Grio redirection would go here | ||
900 | * if (XFS_BUF_IS_GRIO(bp)) { | ||
901 | */ | ||
902 | |||
903 | xfs_buf_iorequest(bp); | 896 | xfs_buf_iorequest(bp); |
904 | return 0; | 897 | return; |
905 | } | 898 | } |
906 | 899 | ||
907 | xfs_buftrace("XFSBDSTRAT IOERROR", bp); | 900 | xfs_buftrace("XFSBDSTRAT IOERROR", bp); |
908 | return (xfs_bioerror_relse(bp)); | 901 | xfs_bioerror_relse(bp); |
909 | } | 902 | } |
910 | 903 | ||
911 | /* | 904 | /* |
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h index e200253139cf..e1d498b4ba7a 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.h +++ b/fs/xfs/linux-2.6/xfs_lrw.h | |||
@@ -68,7 +68,8 @@ extern void xfs_inval_cached_trace(struct xfs_inode *, | |||
68 | #define xfs_inval_cached_trace(ip, offset, len, first, last) | 68 | #define xfs_inval_cached_trace(ip, offset, len, first, last) |
69 | #endif | 69 | #endif |
70 | 70 | ||
71 | extern int xfsbdstrat(struct xfs_mount *, struct xfs_buf *); | 71 | /* errors from xfsbdstrat() must be extracted from the buffer */ |
72 | extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *); | ||
72 | extern int xfs_bdstrat_cb(struct xfs_buf *); | 73 | extern int xfs_bdstrat_cb(struct xfs_buf *); |
73 | extern int xfs_dev_is_read_only(struct xfs_mount *, char *); | 74 | extern int xfs_dev_is_read_only(struct xfs_mount *, char *); |
74 | 75 | ||
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h index 8ba7a2fa6c1d..afd0b0d5fdb2 100644 --- a/fs/xfs/linux-2.6/xfs_stats.h +++ b/fs/xfs/linux-2.6/xfs_stats.h | |||
@@ -144,8 +144,8 @@ extern void xfs_cleanup_procfs(void); | |||
144 | # define XFS_STATS_DEC(count) | 144 | # define XFS_STATS_DEC(count) |
145 | # define XFS_STATS_ADD(count, inc) | 145 | # define XFS_STATS_ADD(count, inc) |
146 | 146 | ||
147 | static __inline void xfs_init_procfs(void) { }; | 147 | static inline void xfs_init_procfs(void) { }; |
148 | static __inline void xfs_cleanup_procfs(void) { }; | 148 | static inline void xfs_cleanup_procfs(void) { }; |
149 | 149 | ||
150 | #endif /* !CONFIG_PROC_FS */ | 150 | #endif /* !CONFIG_PROC_FS */ |
151 | 151 | ||
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 8831d9518790..865eb708aa95 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -896,7 +896,8 @@ xfs_fs_write_inode( | |||
896 | struct inode *inode, | 896 | struct inode *inode, |
897 | int sync) | 897 | int sync) |
898 | { | 898 | { |
899 | int error = 0, flags = FLUSH_INODE; | 899 | int error = 0; |
900 | int flags = 0; | ||
900 | 901 | ||
901 | xfs_itrace_entry(XFS_I(inode)); | 902 | xfs_itrace_entry(XFS_I(inode)); |
902 | if (sync) { | 903 | if (sync) { |
@@ -934,7 +935,7 @@ xfs_fs_clear_inode( | |||
934 | xfs_inactive(ip); | 935 | xfs_inactive(ip); |
935 | xfs_iflags_clear(ip, XFS_IMODIFIED); | 936 | xfs_iflags_clear(ip, XFS_IMODIFIED); |
936 | if (xfs_reclaim(ip)) | 937 | if (xfs_reclaim(ip)) |
937 | panic("%s: cannot reclaim 0x%p\n", __FUNCTION__, inode); | 938 | panic("%s: cannot reclaim 0x%p\n", __func__, inode); |
938 | } | 939 | } |
939 | 940 | ||
940 | ASSERT(XFS_I(inode) == NULL); | 941 | ASSERT(XFS_I(inode) == NULL); |
@@ -1027,8 +1028,7 @@ xfs_sync_worker( | |||
1027 | int error; | 1028 | int error; |
1028 | 1029 | ||
1029 | if (!(mp->m_flags & XFS_MOUNT_RDONLY)) | 1030 | if (!(mp->m_flags & XFS_MOUNT_RDONLY)) |
1030 | error = xfs_sync(mp, SYNC_FSDATA | SYNC_BDFLUSH | SYNC_ATTR | | 1031 | error = xfs_sync(mp, SYNC_FSDATA | SYNC_BDFLUSH | SYNC_ATTR); |
1031 | SYNC_REFCACHE | SYNC_SUPER); | ||
1032 | mp->m_sync_seq++; | 1032 | mp->m_sync_seq++; |
1033 | wake_up(&mp->m_wait_single_sync_task); | 1033 | wake_up(&mp->m_wait_single_sync_task); |
1034 | } | 1034 | } |
@@ -1306,7 +1306,7 @@ xfs_fs_fill_super( | |||
1306 | void *data, | 1306 | void *data, |
1307 | int silent) | 1307 | int silent) |
1308 | { | 1308 | { |
1309 | struct inode *rootvp; | 1309 | struct inode *root; |
1310 | struct xfs_mount *mp = NULL; | 1310 | struct xfs_mount *mp = NULL; |
1311 | struct xfs_mount_args *args = xfs_args_allocate(sb, silent); | 1311 | struct xfs_mount_args *args = xfs_args_allocate(sb, silent); |
1312 | int error; | 1312 | int error; |
@@ -1344,19 +1344,18 @@ xfs_fs_fill_super( | |||
1344 | sb->s_time_gran = 1; | 1344 | sb->s_time_gran = 1; |
1345 | set_posix_acl_flag(sb); | 1345 | set_posix_acl_flag(sb); |
1346 | 1346 | ||
1347 | rootvp = igrab(mp->m_rootip->i_vnode); | 1347 | root = igrab(mp->m_rootip->i_vnode); |
1348 | if (!rootvp) { | 1348 | if (!root) { |
1349 | error = ENOENT; | 1349 | error = ENOENT; |
1350 | goto fail_unmount; | 1350 | goto fail_unmount; |
1351 | } | 1351 | } |
1352 | 1352 | if (is_bad_inode(root)) { | |
1353 | sb->s_root = d_alloc_root(vn_to_inode(rootvp)); | 1353 | error = EINVAL; |
1354 | if (!sb->s_root) { | ||
1355 | error = ENOMEM; | ||
1356 | goto fail_vnrele; | 1354 | goto fail_vnrele; |
1357 | } | 1355 | } |
1358 | if (is_bad_inode(sb->s_root->d_inode)) { | 1356 | sb->s_root = d_alloc_root(root); |
1359 | error = EINVAL; | 1357 | if (!sb->s_root) { |
1358 | error = ENOMEM; | ||
1360 | goto fail_vnrele; | 1359 | goto fail_vnrele; |
1361 | } | 1360 | } |
1362 | 1361 | ||
@@ -1378,7 +1377,7 @@ fail_vnrele: | |||
1378 | dput(sb->s_root); | 1377 | dput(sb->s_root); |
1379 | sb->s_root = NULL; | 1378 | sb->s_root = NULL; |
1380 | } else { | 1379 | } else { |
1381 | VN_RELE(rootvp); | 1380 | iput(root); |
1382 | } | 1381 | } |
1383 | 1382 | ||
1384 | fail_unmount: | 1383 | fail_unmount: |
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h index 3efcf45b14ab..3efb7c6d3303 100644 --- a/fs/xfs/linux-2.6/xfs_super.h +++ b/fs/xfs/linux-2.6/xfs_super.h | |||
@@ -50,13 +50,7 @@ extern void xfs_qm_exit(void); | |||
50 | # define set_posix_acl_flag(sb) do { } while (0) | 50 | # define set_posix_acl_flag(sb) do { } while (0) |
51 | #endif | 51 | #endif |
52 | 52 | ||
53 | #ifdef CONFIG_XFS_SECURITY | 53 | #define XFS_SECURITY_STRING "security attributes, " |
54 | # define XFS_SECURITY_STRING "security attributes, " | ||
55 | # define ENOSECURITY 0 | ||
56 | #else | ||
57 | # define XFS_SECURITY_STRING | ||
58 | # define ENOSECURITY EOPNOTSUPP | ||
59 | #endif | ||
60 | 54 | ||
61 | #ifdef CONFIG_XFS_RT | 55 | #ifdef CONFIG_XFS_RT |
62 | # define XFS_REALTIME_STRING "realtime, " | 56 | # define XFS_REALTIME_STRING "realtime, " |
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h index 4da03a4e3520..7e60c7776b1c 100644 --- a/fs/xfs/linux-2.6/xfs_vfs.h +++ b/fs/xfs/linux-2.6/xfs_vfs.h | |||
@@ -49,7 +49,6 @@ typedef struct bhv_vfs_sync_work { | |||
49 | #define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */ | 49 | #define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */ |
50 | #define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */ | 50 | #define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */ |
51 | #define SYNC_IOWAIT 0x0100 /* wait for all I/O to complete */ | 51 | #define SYNC_IOWAIT 0x0100 /* wait for all I/O to complete */ |
52 | #define SYNC_SUPER 0x0200 /* flush superblock to disk */ | ||
53 | 52 | ||
54 | /* | 53 | /* |
55 | * When remounting a filesystem read-only or freezing the filesystem, | 54 | * When remounting a filesystem read-only or freezing the filesystem, |
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index b5ea418693b1..8b4d63ce8694 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h | |||
@@ -23,8 +23,6 @@ struct bhv_vattr; | |||
23 | struct xfs_iomap; | 23 | struct xfs_iomap; |
24 | struct attrlist_cursor_kern; | 24 | struct attrlist_cursor_kern; |
25 | 25 | ||
26 | typedef struct dentry bhv_vname_t; | ||
27 | typedef __u64 bhv_vnumber_t; | ||
28 | typedef struct inode bhv_vnode_t; | 26 | typedef struct inode bhv_vnode_t; |
29 | 27 | ||
30 | #define VN_ISLNK(vp) S_ISLNK((vp)->i_mode) | 28 | #define VN_ISLNK(vp) S_ISLNK((vp)->i_mode) |
@@ -46,18 +44,6 @@ static inline struct inode *vn_to_inode(bhv_vnode_t *vnode) | |||
46 | } | 44 | } |
47 | 45 | ||
48 | /* | 46 | /* |
49 | * Values for the vop_rwlock/rwunlock flags parameter. | ||
50 | */ | ||
51 | typedef enum bhv_vrwlock { | ||
52 | VRWLOCK_NONE, | ||
53 | VRWLOCK_READ, | ||
54 | VRWLOCK_WRITE, | ||
55 | VRWLOCK_WRITE_DIRECT, | ||
56 | VRWLOCK_TRY_READ, | ||
57 | VRWLOCK_TRY_WRITE | ||
58 | } bhv_vrwlock_t; | ||
59 | |||
60 | /* | ||
61 | * Return values for xfs_inactive. A return value of | 47 | * Return values for xfs_inactive. A return value of |
62 | * VN_INACTIVE_NOCACHE implies that the file system behavior | 48 | * VN_INACTIVE_NOCACHE implies that the file system behavior |
63 | * has disassociated its state and bhv_desc_t from the vnode. | 49 | * has disassociated its state and bhv_desc_t from the vnode. |
@@ -73,12 +59,9 @@ typedef enum bhv_vrwlock { | |||
73 | #define IO_INVIS 0x00020 /* don't update inode timestamps */ | 59 | #define IO_INVIS 0x00020 /* don't update inode timestamps */ |
74 | 60 | ||
75 | /* | 61 | /* |
76 | * Flags for vop_iflush call | 62 | * Flags for xfs_inode_flush |
77 | */ | 63 | */ |
78 | #define FLUSH_SYNC 1 /* wait for flush to complete */ | 64 | #define FLUSH_SYNC 1 /* wait for flush to complete */ |
79 | #define FLUSH_INODE 2 /* flush the inode itself */ | ||
80 | #define FLUSH_LOG 4 /* force the last log entry for | ||
81 | * this inode out to disk */ | ||
82 | 65 | ||
83 | /* | 66 | /* |
84 | * Flush/Invalidate options for vop_toss/flush/flushinval_pages. | 67 | * Flush/Invalidate options for vop_toss/flush/flushinval_pages. |
@@ -226,13 +209,6 @@ static inline bhv_vnode_t *vn_grab(bhv_vnode_t *vp) | |||
226 | } | 209 | } |
227 | 210 | ||
228 | /* | 211 | /* |
229 | * Vname handling macros. | ||
230 | */ | ||
231 | #define VNAME(dentry) ((char *) (dentry)->d_name.name) | ||
232 | #define VNAMELEN(dentry) ((dentry)->d_name.len) | ||
233 | #define VNAME_TO_VNODE(dentry) (vn_from_inode((dentry)->d_inode)) | ||
234 | |||
235 | /* | ||
236 | * Dealing with bad inodes | 212 | * Dealing with bad inodes |
237 | */ | 213 | */ |
238 | static inline int VN_BAD(bhv_vnode_t *vp) | 214 | static inline int VN_BAD(bhv_vnode_t *vp) |
@@ -303,9 +279,9 @@ extern void xfs_itrace_hold(struct xfs_inode *, char *, int, inst_t *); | |||
303 | extern void _xfs_itrace_ref(struct xfs_inode *, char *, int, inst_t *); | 279 | extern void _xfs_itrace_ref(struct xfs_inode *, char *, int, inst_t *); |
304 | extern void xfs_itrace_rele(struct xfs_inode *, char *, int, inst_t *); | 280 | extern void xfs_itrace_rele(struct xfs_inode *, char *, int, inst_t *); |
305 | #define xfs_itrace_entry(ip) \ | 281 | #define xfs_itrace_entry(ip) \ |
306 | _xfs_itrace_entry(ip, __FUNCTION__, (inst_t *)__return_address) | 282 | _xfs_itrace_entry(ip, __func__, (inst_t *)__return_address) |
307 | #define xfs_itrace_exit(ip) \ | 283 | #define xfs_itrace_exit(ip) \ |
308 | _xfs_itrace_exit(ip, __FUNCTION__, (inst_t *)__return_address) | 284 | _xfs_itrace_exit(ip, __func__, (inst_t *)__return_address) |
309 | #define xfs_itrace_exit_tag(ip, tag) \ | 285 | #define xfs_itrace_exit_tag(ip, tag) \ |
310 | _xfs_itrace_exit(ip, tag, (inst_t *)__return_address) | 286 | _xfs_itrace_exit(ip, tag, (inst_t *)__return_address) |
311 | #define xfs_itrace_ref(ip) \ | 287 | #define xfs_itrace_ref(ip) \ |
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index 665babcca6a6..631ebb31b295 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c | |||
@@ -1291,7 +1291,7 @@ xfs_qm_dqflush( | |||
1291 | if (flags & XFS_QMOPT_DELWRI) { | 1291 | if (flags & XFS_QMOPT_DELWRI) { |
1292 | xfs_bdwrite(mp, bp); | 1292 | xfs_bdwrite(mp, bp); |
1293 | } else if (flags & XFS_QMOPT_ASYNC) { | 1293 | } else if (flags & XFS_QMOPT_ASYNC) { |
1294 | xfs_bawrite(mp, bp); | 1294 | error = xfs_bawrite(mp, bp); |
1295 | } else { | 1295 | } else { |
1296 | error = xfs_bwrite(mp, bp); | 1296 | error = xfs_bwrite(mp, bp); |
1297 | } | 1297 | } |
@@ -1439,9 +1439,7 @@ xfs_qm_dqpurge( | |||
1439 | uint flags) | 1439 | uint flags) |
1440 | { | 1440 | { |
1441 | xfs_dqhash_t *thishash; | 1441 | xfs_dqhash_t *thishash; |
1442 | xfs_mount_t *mp; | 1442 | xfs_mount_t *mp = dqp->q_mount; |
1443 | |||
1444 | mp = dqp->q_mount; | ||
1445 | 1443 | ||
1446 | ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp)); | 1444 | ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp)); |
1447 | ASSERT(XFS_DQ_IS_HASH_LOCKED(dqp->q_hash)); | 1445 | ASSERT(XFS_DQ_IS_HASH_LOCKED(dqp->q_hash)); |
@@ -1485,6 +1483,7 @@ xfs_qm_dqpurge( | |||
1485 | * we're unmounting, we do care, so we flush it and wait. | 1483 | * we're unmounting, we do care, so we flush it and wait. |
1486 | */ | 1484 | */ |
1487 | if (XFS_DQ_IS_DIRTY(dqp)) { | 1485 | if (XFS_DQ_IS_DIRTY(dqp)) { |
1486 | int error; | ||
1488 | xfs_dqtrace_entry(dqp, "DQPURGE ->DQFLUSH: DQDIRTY"); | 1487 | xfs_dqtrace_entry(dqp, "DQPURGE ->DQFLUSH: DQDIRTY"); |
1489 | /* dqflush unlocks dqflock */ | 1488 | /* dqflush unlocks dqflock */ |
1490 | /* | 1489 | /* |
@@ -1495,7 +1494,10 @@ xfs_qm_dqpurge( | |||
1495 | * We don't care about getting disk errors here. We need | 1494 | * We don't care about getting disk errors here. We need |
1496 | * to purge this dquot anyway, so we go ahead regardless. | 1495 | * to purge this dquot anyway, so we go ahead regardless. |
1497 | */ | 1496 | */ |
1498 | (void) xfs_qm_dqflush(dqp, XFS_QMOPT_SYNC); | 1497 | error = xfs_qm_dqflush(dqp, XFS_QMOPT_SYNC); |
1498 | if (error) | ||
1499 | xfs_fs_cmn_err(CE_WARN, mp, | ||
1500 | "xfs_qm_dqpurge: dquot %p flush failed", dqp); | ||
1499 | xfs_dqflock(dqp); | 1501 | xfs_dqflock(dqp); |
1500 | } | 1502 | } |
1501 | ASSERT(dqp->q_pincount == 0); | 1503 | ASSERT(dqp->q_pincount == 0); |
@@ -1580,12 +1582,18 @@ xfs_qm_dqflock_pushbuf_wait( | |||
1580 | XFS_INCORE_TRYLOCK); | 1582 | XFS_INCORE_TRYLOCK); |
1581 | if (bp != NULL) { | 1583 | if (bp != NULL) { |
1582 | if (XFS_BUF_ISDELAYWRITE(bp)) { | 1584 | if (XFS_BUF_ISDELAYWRITE(bp)) { |
1585 | int error; | ||
1583 | if (XFS_BUF_ISPINNED(bp)) { | 1586 | if (XFS_BUF_ISPINNED(bp)) { |
1584 | xfs_log_force(dqp->q_mount, | 1587 | xfs_log_force(dqp->q_mount, |
1585 | (xfs_lsn_t)0, | 1588 | (xfs_lsn_t)0, |
1586 | XFS_LOG_FORCE); | 1589 | XFS_LOG_FORCE); |
1587 | } | 1590 | } |
1588 | xfs_bawrite(dqp->q_mount, bp); | 1591 | error = xfs_bawrite(dqp->q_mount, bp); |
1592 | if (error) | ||
1593 | xfs_fs_cmn_err(CE_WARN, dqp->q_mount, | ||
1594 | "xfs_qm_dqflock_pushbuf_wait: " | ||
1595 | "pushbuf error %d on dqp %p, bp %p", | ||
1596 | error, dqp, bp); | ||
1589 | } else { | 1597 | } else { |
1590 | xfs_buf_relse(bp); | 1598 | xfs_buf_relse(bp); |
1591 | } | 1599 | } |
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index 1800e8d1f646..36e05ca78412 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c | |||
@@ -146,6 +146,7 @@ xfs_qm_dquot_logitem_push( | |||
146 | xfs_dq_logitem_t *logitem) | 146 | xfs_dq_logitem_t *logitem) |
147 | { | 147 | { |
148 | xfs_dquot_t *dqp; | 148 | xfs_dquot_t *dqp; |
149 | int error; | ||
149 | 150 | ||
150 | dqp = logitem->qli_dquot; | 151 | dqp = logitem->qli_dquot; |
151 | 152 | ||
@@ -161,7 +162,11 @@ xfs_qm_dquot_logitem_push( | |||
161 | * lock without sleeping, then there must not have been | 162 | * lock without sleeping, then there must not have been |
162 | * anyone in the process of flushing the dquot. | 163 | * anyone in the process of flushing the dquot. |
163 | */ | 164 | */ |
164 | xfs_qm_dqflush(dqp, XFS_B_DELWRI); | 165 | error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); |
166 | if (error) | ||
167 | xfs_fs_cmn_err(CE_WARN, dqp->q_mount, | ||
168 | "xfs_qm_dquot_logitem_push: push error %d on dqp %p", | ||
169 | error, dqp); | ||
165 | xfs_dqunlock(dqp); | 170 | xfs_dqunlock(dqp); |
166 | } | 171 | } |
167 | 172 | ||
@@ -262,11 +267,16 @@ xfs_qm_dquot_logitem_pushbuf( | |||
262 | XFS_LOG_FORCE); | 267 | XFS_LOG_FORCE); |
263 | } | 268 | } |
264 | if (dopush) { | 269 | if (dopush) { |
270 | int error; | ||
265 | #ifdef XFSRACEDEBUG | 271 | #ifdef XFSRACEDEBUG |
266 | delay_for_intr(); | 272 | delay_for_intr(); |
267 | delay(300); | 273 | delay(300); |
268 | #endif | 274 | #endif |
269 | xfs_bawrite(mp, bp); | 275 | error = xfs_bawrite(mp, bp); |
276 | if (error) | ||
277 | xfs_fs_cmn_err(CE_WARN, mp, | ||
278 | "xfs_qm_dquot_logitem_pushbuf: pushbuf error %d on qip %p, bp %p", | ||
279 | error, qip, bp); | ||
270 | } else { | 280 | } else { |
271 | xfs_buf_relse(bp); | 281 | xfs_buf_relse(bp); |
272 | } | 282 | } |
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 8e9c5ae6504d..40ea56409561 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c | |||
@@ -304,8 +304,11 @@ xfs_qm_unmount_quotadestroy( | |||
304 | * necessary data structures like quotainfo. This is also responsible for | 304 | * necessary data structures like quotainfo. This is also responsible for |
305 | * running a quotacheck as necessary. We are guaranteed that the superblock | 305 | * running a quotacheck as necessary. We are guaranteed that the superblock |
306 | * is consistently read in at this point. | 306 | * is consistently read in at this point. |
307 | * | ||
308 | * If we fail here, the mount will continue with quota turned off. We don't | ||
309 | * need to inidicate success or failure at all. | ||
307 | */ | 310 | */ |
308 | int | 311 | void |
309 | xfs_qm_mount_quotas( | 312 | xfs_qm_mount_quotas( |
310 | xfs_mount_t *mp, | 313 | xfs_mount_t *mp, |
311 | int mfsi_flags) | 314 | int mfsi_flags) |
@@ -313,7 +316,6 @@ xfs_qm_mount_quotas( | |||
313 | int error = 0; | 316 | int error = 0; |
314 | uint sbf; | 317 | uint sbf; |
315 | 318 | ||
316 | |||
317 | /* | 319 | /* |
318 | * If quotas on realtime volumes is not supported, we disable | 320 | * If quotas on realtime volumes is not supported, we disable |
319 | * quotas immediately. | 321 | * quotas immediately. |
@@ -332,7 +334,8 @@ xfs_qm_mount_quotas( | |||
332 | * Allocate the quotainfo structure inside the mount struct, and | 334 | * Allocate the quotainfo structure inside the mount struct, and |
333 | * create quotainode(s), and change/rev superblock if necessary. | 335 | * create quotainode(s), and change/rev superblock if necessary. |
334 | */ | 336 | */ |
335 | if ((error = xfs_qm_init_quotainfo(mp))) { | 337 | error = xfs_qm_init_quotainfo(mp); |
338 | if (error) { | ||
336 | /* | 339 | /* |
337 | * We must turn off quotas. | 340 | * We must turn off quotas. |
338 | */ | 341 | */ |
@@ -344,12 +347,11 @@ xfs_qm_mount_quotas( | |||
344 | * If any of the quotas are not consistent, do a quotacheck. | 347 | * If any of the quotas are not consistent, do a quotacheck. |
345 | */ | 348 | */ |
346 | if (XFS_QM_NEED_QUOTACHECK(mp) && | 349 | if (XFS_QM_NEED_QUOTACHECK(mp) && |
347 | !(mfsi_flags & XFS_MFSI_NO_QUOTACHECK)) { | 350 | !(mfsi_flags & XFS_MFSI_NO_QUOTACHECK)) { |
348 | if ((error = xfs_qm_quotacheck(mp))) { | 351 | error = xfs_qm_quotacheck(mp); |
349 | /* Quotacheck has failed and quotas have | 352 | if (error) { |
350 | * been disabled. | 353 | /* Quotacheck failed and disabled quotas. */ |
351 | */ | 354 | return; |
352 | return XFS_ERROR(error); | ||
353 | } | 355 | } |
354 | } | 356 | } |
355 | /* | 357 | /* |
@@ -357,12 +359,10 @@ xfs_qm_mount_quotas( | |||
357 | * quotachecked status, since we won't be doing accounting for | 359 | * quotachecked status, since we won't be doing accounting for |
358 | * that type anymore. | 360 | * that type anymore. |
359 | */ | 361 | */ |
360 | if (!XFS_IS_UQUOTA_ON(mp)) { | 362 | if (!XFS_IS_UQUOTA_ON(mp)) |
361 | mp->m_qflags &= ~XFS_UQUOTA_CHKD; | 363 | mp->m_qflags &= ~XFS_UQUOTA_CHKD; |
362 | } | 364 | if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp))) |
363 | if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp))) { | ||
364 | mp->m_qflags &= ~XFS_OQUOTA_CHKD; | 365 | mp->m_qflags &= ~XFS_OQUOTA_CHKD; |
365 | } | ||
366 | 366 | ||
367 | write_changes: | 367 | write_changes: |
368 | /* | 368 | /* |
@@ -392,7 +392,7 @@ xfs_qm_mount_quotas( | |||
392 | xfs_fs_cmn_err(CE_WARN, mp, | 392 | xfs_fs_cmn_err(CE_WARN, mp, |
393 | "Failed to initialize disk quotas."); | 393 | "Failed to initialize disk quotas."); |
394 | } | 394 | } |
395 | return XFS_ERROR(error); | 395 | return; |
396 | } | 396 | } |
397 | 397 | ||
398 | /* | 398 | /* |
@@ -1438,7 +1438,7 @@ xfs_qm_qino_alloc( | |||
1438 | } | 1438 | } |
1439 | 1439 | ||
1440 | 1440 | ||
1441 | STATIC int | 1441 | STATIC void |
1442 | xfs_qm_reset_dqcounts( | 1442 | xfs_qm_reset_dqcounts( |
1443 | xfs_mount_t *mp, | 1443 | xfs_mount_t *mp, |
1444 | xfs_buf_t *bp, | 1444 | xfs_buf_t *bp, |
@@ -1478,8 +1478,6 @@ xfs_qm_reset_dqcounts( | |||
1478 | ddq->d_rtbwarns = 0; | 1478 | ddq->d_rtbwarns = 0; |
1479 | ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1); | 1479 | ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1); |
1480 | } | 1480 | } |
1481 | |||
1482 | return 0; | ||
1483 | } | 1481 | } |
1484 | 1482 | ||
1485 | STATIC int | 1483 | STATIC int |
@@ -1520,7 +1518,7 @@ xfs_qm_dqiter_bufs( | |||
1520 | if (error) | 1518 | if (error) |
1521 | break; | 1519 | break; |
1522 | 1520 | ||
1523 | (void) xfs_qm_reset_dqcounts(mp, bp, firstid, type); | 1521 | xfs_qm_reset_dqcounts(mp, bp, firstid, type); |
1524 | xfs_bdwrite(mp, bp); | 1522 | xfs_bdwrite(mp, bp); |
1525 | /* | 1523 | /* |
1526 | * goto the next block. | 1524 | * goto the next block. |
@@ -1810,7 +1808,7 @@ xfs_qm_dqusage_adjust( | |||
1810 | * Now release the inode. This will send it to 'inactive', and | 1808 | * Now release the inode. This will send it to 'inactive', and |
1811 | * possibly even free blocks. | 1809 | * possibly even free blocks. |
1812 | */ | 1810 | */ |
1813 | VN_RELE(XFS_ITOV(ip)); | 1811 | IRELE(ip); |
1814 | 1812 | ||
1815 | /* | 1813 | /* |
1816 | * Goto next inode. | 1814 | * Goto next inode. |
@@ -1880,6 +1878,14 @@ xfs_qm_quotacheck( | |||
1880 | } while (! done); | 1878 | } while (! done); |
1881 | 1879 | ||
1882 | /* | 1880 | /* |
1881 | * We've made all the changes that we need to make incore. | ||
1882 | * Flush them down to disk buffers if everything was updated | ||
1883 | * successfully. | ||
1884 | */ | ||
1885 | if (!error) | ||
1886 | error = xfs_qm_dqflush_all(mp, XFS_QMOPT_DELWRI); | ||
1887 | |||
1888 | /* | ||
1883 | * We can get this error if we couldn't do a dquot allocation inside | 1889 | * We can get this error if we couldn't do a dquot allocation inside |
1884 | * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the | 1890 | * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the |
1885 | * dirty dquots that might be cached, we just want to get rid of them | 1891 | * dirty dquots that might be cached, we just want to get rid of them |
@@ -1890,11 +1896,6 @@ xfs_qm_quotacheck( | |||
1890 | xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_QUOTAOFF); | 1896 | xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_QUOTAOFF); |
1891 | goto error_return; | 1897 | goto error_return; |
1892 | } | 1898 | } |
1893 | /* | ||
1894 | * We've made all the changes that we need to make incore. | ||
1895 | * Now flush_them down to disk buffers. | ||
1896 | */ | ||
1897 | xfs_qm_dqflush_all(mp, XFS_QMOPT_DELWRI); | ||
1898 | 1899 | ||
1899 | /* | 1900 | /* |
1900 | * We didn't log anything, because if we crashed, we'll have to | 1901 | * We didn't log anything, because if we crashed, we'll have to |
@@ -1926,7 +1927,10 @@ xfs_qm_quotacheck( | |||
1926 | ASSERT(mp->m_quotainfo != NULL); | 1927 | ASSERT(mp->m_quotainfo != NULL); |
1927 | ASSERT(xfs_Gqm != NULL); | 1928 | ASSERT(xfs_Gqm != NULL); |
1928 | xfs_qm_destroy_quotainfo(mp); | 1929 | xfs_qm_destroy_quotainfo(mp); |
1929 | (void)xfs_mount_reset_sbqflags(mp); | 1930 | if (xfs_mount_reset_sbqflags(mp)) { |
1931 | cmn_err(CE_WARN, "XFS quotacheck %s: " | ||
1932 | "Failed to reset quota flags.", mp->m_fsname); | ||
1933 | } | ||
1930 | } else { | 1934 | } else { |
1931 | cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname); | 1935 | cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname); |
1932 | } | 1936 | } |
@@ -1968,7 +1972,7 @@ xfs_qm_init_quotainos( | |||
1968 | if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, | 1972 | if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, |
1969 | 0, 0, &gip, 0))) { | 1973 | 0, 0, &gip, 0))) { |
1970 | if (uip) | 1974 | if (uip) |
1971 | VN_RELE(XFS_ITOV(uip)); | 1975 | IRELE(uip); |
1972 | return XFS_ERROR(error); | 1976 | return XFS_ERROR(error); |
1973 | } | 1977 | } |
1974 | } | 1978 | } |
@@ -1999,7 +2003,7 @@ xfs_qm_init_quotainos( | |||
1999 | sbflags | XFS_SB_GQUOTINO, flags); | 2003 | sbflags | XFS_SB_GQUOTINO, flags); |
2000 | if (error) { | 2004 | if (error) { |
2001 | if (uip) | 2005 | if (uip) |
2002 | VN_RELE(XFS_ITOV(uip)); | 2006 | IRELE(uip); |
2003 | 2007 | ||
2004 | return XFS_ERROR(error); | 2008 | return XFS_ERROR(error); |
2005 | } | 2009 | } |
@@ -2093,12 +2097,17 @@ xfs_qm_shake_freelist( | |||
2093 | * dirty dquots. | 2097 | * dirty dquots. |
2094 | */ | 2098 | */ |
2095 | if (XFS_DQ_IS_DIRTY(dqp)) { | 2099 | if (XFS_DQ_IS_DIRTY(dqp)) { |
2100 | int error; | ||
2096 | xfs_dqtrace_entry(dqp, "DQSHAKE: DQDIRTY"); | 2101 | xfs_dqtrace_entry(dqp, "DQSHAKE: DQDIRTY"); |
2097 | /* | 2102 | /* |
2098 | * We flush it delayed write, so don't bother | 2103 | * We flush it delayed write, so don't bother |
2099 | * releasing the mplock. | 2104 | * releasing the mplock. |
2100 | */ | 2105 | */ |
2101 | (void) xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); | 2106 | error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); |
2107 | if (error) { | ||
2108 | xfs_fs_cmn_err(CE_WARN, dqp->q_mount, | ||
2109 | "xfs_qm_dqflush_all: dquot %p flush failed", dqp); | ||
2110 | } | ||
2102 | xfs_dqunlock(dqp); /* dqflush unlocks dqflock */ | 2111 | xfs_dqunlock(dqp); /* dqflush unlocks dqflock */ |
2103 | dqp = dqp->dq_flnext; | 2112 | dqp = dqp->dq_flnext; |
2104 | continue; | 2113 | continue; |
@@ -2265,12 +2274,17 @@ xfs_qm_dqreclaim_one(void) | |||
2265 | * dirty dquots. | 2274 | * dirty dquots. |
2266 | */ | 2275 | */ |
2267 | if (XFS_DQ_IS_DIRTY(dqp)) { | 2276 | if (XFS_DQ_IS_DIRTY(dqp)) { |
2277 | int error; | ||
2268 | xfs_dqtrace_entry(dqp, "DQRECLAIM: DQDIRTY"); | 2278 | xfs_dqtrace_entry(dqp, "DQRECLAIM: DQDIRTY"); |
2269 | /* | 2279 | /* |
2270 | * We flush it delayed write, so don't bother | 2280 | * We flush it delayed write, so don't bother |
2271 | * releasing the freelist lock. | 2281 | * releasing the freelist lock. |
2272 | */ | 2282 | */ |
2273 | (void) xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); | 2283 | error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); |
2284 | if (error) { | ||
2285 | xfs_fs_cmn_err(CE_WARN, dqp->q_mount, | ||
2286 | "xfs_qm_dqreclaim: dquot %p flush failed", dqp); | ||
2287 | } | ||
2274 | xfs_dqunlock(dqp); /* dqflush unlocks dqflock */ | 2288 | xfs_dqunlock(dqp); /* dqflush unlocks dqflock */ |
2275 | continue; | 2289 | continue; |
2276 | } | 2290 | } |
@@ -2378,9 +2392,9 @@ xfs_qm_write_sb_changes( | |||
2378 | } | 2392 | } |
2379 | 2393 | ||
2380 | xfs_mod_sb(tp, flags); | 2394 | xfs_mod_sb(tp, flags); |
2381 | (void) xfs_trans_commit(tp, 0); | 2395 | error = xfs_trans_commit(tp, 0); |
2382 | 2396 | ||
2383 | return 0; | 2397 | return error; |
2384 | } | 2398 | } |
2385 | 2399 | ||
2386 | 2400 | ||
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h index baf537c1c177..cd2300e374af 100644 --- a/fs/xfs/quota/xfs_qm.h +++ b/fs/xfs/quota/xfs_qm.h | |||
@@ -165,7 +165,7 @@ typedef struct xfs_dquot_acct { | |||
165 | #define XFS_QM_RELE(xqm) ((xqm)->qm_nrefs--) | 165 | #define XFS_QM_RELE(xqm) ((xqm)->qm_nrefs--) |
166 | 166 | ||
167 | extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); | 167 | extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); |
168 | extern int xfs_qm_mount_quotas(xfs_mount_t *, int); | 168 | extern void xfs_qm_mount_quotas(xfs_mount_t *, int); |
169 | extern int xfs_qm_quotacheck(xfs_mount_t *); | 169 | extern int xfs_qm_quotacheck(xfs_mount_t *); |
170 | extern void xfs_qm_unmount_quotadestroy(xfs_mount_t *); | 170 | extern void xfs_qm_unmount_quotadestroy(xfs_mount_t *); |
171 | extern int xfs_qm_unmount_quotas(xfs_mount_t *); | 171 | extern int xfs_qm_unmount_quotas(xfs_mount_t *); |
diff --git a/fs/xfs/quota/xfs_qm_stats.h b/fs/xfs/quota/xfs_qm_stats.h index a50ffabcf554..5b964fc0dc09 100644 --- a/fs/xfs/quota/xfs_qm_stats.h +++ b/fs/xfs/quota/xfs_qm_stats.h | |||
@@ -45,8 +45,8 @@ extern void xfs_qm_cleanup_procfs(void); | |||
45 | 45 | ||
46 | # define XQM_STATS_INC(count) do { } while (0) | 46 | # define XQM_STATS_INC(count) do { } while (0) |
47 | 47 | ||
48 | static __inline void xfs_qm_init_procfs(void) { }; | 48 | static inline void xfs_qm_init_procfs(void) { }; |
49 | static __inline void xfs_qm_cleanup_procfs(void) { }; | 49 | static inline void xfs_qm_cleanup_procfs(void) { }; |
50 | 50 | ||
51 | #endif | 51 | #endif |
52 | 52 | ||
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index d2b8be7e75f9..8342823dbdc3 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c | |||
@@ -279,9 +279,12 @@ xfs_qm_scall_quotaoff( | |||
279 | 279 | ||
280 | /* | 280 | /* |
281 | * Write the LI_QUOTAOFF log record, and do SB changes atomically, | 281 | * Write the LI_QUOTAOFF log record, and do SB changes atomically, |
282 | * and synchronously. | 282 | * and synchronously. If we fail to write, we should abort the |
283 | * operation as it cannot be recovered safely if we crash. | ||
283 | */ | 284 | */ |
284 | xfs_qm_log_quotaoff(mp, &qoffstart, flags); | 285 | error = xfs_qm_log_quotaoff(mp, &qoffstart, flags); |
286 | if (error) | ||
287 | goto out_error; | ||
285 | 288 | ||
286 | /* | 289 | /* |
287 | * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct | 290 | * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct |
@@ -337,7 +340,12 @@ xfs_qm_scall_quotaoff( | |||
337 | * So, we have QUOTAOFF start and end logitems; the start | 340 | * So, we have QUOTAOFF start and end logitems; the start |
338 | * logitem won't get overwritten until the end logitem appears... | 341 | * logitem won't get overwritten until the end logitem appears... |
339 | */ | 342 | */ |
340 | xfs_qm_log_quotaoff_end(mp, qoffstart, flags); | 343 | error = xfs_qm_log_quotaoff_end(mp, qoffstart, flags); |
344 | if (error) { | ||
345 | /* We're screwed now. Shutdown is the only option. */ | ||
346 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | ||
347 | goto out_error; | ||
348 | } | ||
341 | 349 | ||
342 | /* | 350 | /* |
343 | * If quotas is completely disabled, close shop. | 351 | * If quotas is completely disabled, close shop. |
@@ -361,6 +369,7 @@ xfs_qm_scall_quotaoff( | |||
361 | XFS_PURGE_INODE(XFS_QI_GQIP(mp)); | 369 | XFS_PURGE_INODE(XFS_QI_GQIP(mp)); |
362 | XFS_QI_GQIP(mp) = NULL; | 370 | XFS_QI_GQIP(mp) = NULL; |
363 | } | 371 | } |
372 | out_error: | ||
364 | mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); | 373 | mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); |
365 | 374 | ||
366 | return (error); | 375 | return (error); |
@@ -371,12 +380,11 @@ xfs_qm_scall_trunc_qfiles( | |||
371 | xfs_mount_t *mp, | 380 | xfs_mount_t *mp, |
372 | uint flags) | 381 | uint flags) |
373 | { | 382 | { |
374 | int error; | 383 | int error = 0, error2 = 0; |
375 | xfs_inode_t *qip; | 384 | xfs_inode_t *qip; |
376 | 385 | ||
377 | if (!capable(CAP_SYS_ADMIN)) | 386 | if (!capable(CAP_SYS_ADMIN)) |
378 | return XFS_ERROR(EPERM); | 387 | return XFS_ERROR(EPERM); |
379 | error = 0; | ||
380 | if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) { | 388 | if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) { |
381 | qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags); | 389 | qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags); |
382 | return XFS_ERROR(EINVAL); | 390 | return XFS_ERROR(EINVAL); |
@@ -384,22 +392,22 @@ xfs_qm_scall_trunc_qfiles( | |||
384 | 392 | ||
385 | if ((flags & XFS_DQ_USER) && mp->m_sb.sb_uquotino != NULLFSINO) { | 393 | if ((flags & XFS_DQ_USER) && mp->m_sb.sb_uquotino != NULLFSINO) { |
386 | error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip, 0); | 394 | error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip, 0); |
387 | if (! error) { | 395 | if (!error) { |
388 | (void) xfs_truncate_file(mp, qip); | 396 | error = xfs_truncate_file(mp, qip); |
389 | VN_RELE(XFS_ITOV(qip)); | 397 | IRELE(qip); |
390 | } | 398 | } |
391 | } | 399 | } |
392 | 400 | ||
393 | if ((flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) && | 401 | if ((flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) && |
394 | mp->m_sb.sb_gquotino != NULLFSINO) { | 402 | mp->m_sb.sb_gquotino != NULLFSINO) { |
395 | error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip, 0); | 403 | error2 = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip, 0); |
396 | if (! error) { | 404 | if (!error2) { |
397 | (void) xfs_truncate_file(mp, qip); | 405 | error2 = xfs_truncate_file(mp, qip); |
398 | VN_RELE(XFS_ITOV(qip)); | 406 | IRELE(qip); |
399 | } | 407 | } |
400 | } | 408 | } |
401 | 409 | ||
402 | return (error); | 410 | return error ? error : error2; |
403 | } | 411 | } |
404 | 412 | ||
405 | 413 | ||
@@ -552,13 +560,13 @@ xfs_qm_scall_getqstat( | |||
552 | out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks; | 560 | out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks; |
553 | out->qs_uquota.qfs_nextents = uip->i_d.di_nextents; | 561 | out->qs_uquota.qfs_nextents = uip->i_d.di_nextents; |
554 | if (tempuqip) | 562 | if (tempuqip) |
555 | VN_RELE(XFS_ITOV(uip)); | 563 | IRELE(uip); |
556 | } | 564 | } |
557 | if (gip) { | 565 | if (gip) { |
558 | out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks; | 566 | out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks; |
559 | out->qs_gquota.qfs_nextents = gip->i_d.di_nextents; | 567 | out->qs_gquota.qfs_nextents = gip->i_d.di_nextents; |
560 | if (tempgqip) | 568 | if (tempgqip) |
561 | VN_RELE(XFS_ITOV(gip)); | 569 | IRELE(gip); |
562 | } | 570 | } |
563 | if (mp->m_quotainfo) { | 571 | if (mp->m_quotainfo) { |
564 | out->qs_incoredqs = XFS_QI_MPLNDQUOTS(mp); | 572 | out->qs_incoredqs = XFS_QI_MPLNDQUOTS(mp); |
@@ -726,12 +734,12 @@ xfs_qm_scall_setqlim( | |||
726 | xfs_trans_log_dquot(tp, dqp); | 734 | xfs_trans_log_dquot(tp, dqp); |
727 | 735 | ||
728 | xfs_dqtrace_entry(dqp, "Q_SETQLIM: COMMIT"); | 736 | xfs_dqtrace_entry(dqp, "Q_SETQLIM: COMMIT"); |
729 | xfs_trans_commit(tp, 0); | 737 | error = xfs_trans_commit(tp, 0); |
730 | xfs_qm_dqprint(dqp); | 738 | xfs_qm_dqprint(dqp); |
731 | xfs_qm_dqrele(dqp); | 739 | xfs_qm_dqrele(dqp); |
732 | mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); | 740 | mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); |
733 | 741 | ||
734 | return (0); | 742 | return error; |
735 | } | 743 | } |
736 | 744 | ||
737 | STATIC int | 745 | STATIC int |
@@ -1095,7 +1103,7 @@ again: | |||
1095 | * inactive code in hell. | 1103 | * inactive code in hell. |
1096 | */ | 1104 | */ |
1097 | if (vnode_refd) | 1105 | if (vnode_refd) |
1098 | VN_RELE(vp); | 1106 | IRELE(ip); |
1099 | XFS_MOUNT_ILOCK(mp); | 1107 | XFS_MOUNT_ILOCK(mp); |
1100 | /* | 1108 | /* |
1101 | * If an inode was inserted or removed, we gotta | 1109 | * If an inode was inserted or removed, we gotta |
diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c index 129067cfcb86..0b75d302508f 100644 --- a/fs/xfs/support/ktrace.c +++ b/fs/xfs/support/ktrace.c | |||
@@ -24,7 +24,7 @@ static int ktrace_zentries; | |||
24 | void __init | 24 | void __init |
25 | ktrace_init(int zentries) | 25 | ktrace_init(int zentries) |
26 | { | 26 | { |
27 | ktrace_zentries = zentries; | 27 | ktrace_zentries = roundup_pow_of_two(zentries); |
28 | 28 | ||
29 | ktrace_hdr_zone = kmem_zone_init(sizeof(ktrace_t), | 29 | ktrace_hdr_zone = kmem_zone_init(sizeof(ktrace_t), |
30 | "ktrace_hdr"); | 30 | "ktrace_hdr"); |
@@ -47,13 +47,16 @@ ktrace_uninit(void) | |||
47 | * ktrace_alloc() | 47 | * ktrace_alloc() |
48 | * | 48 | * |
49 | * Allocate a ktrace header and enough buffering for the given | 49 | * Allocate a ktrace header and enough buffering for the given |
50 | * number of entries. | 50 | * number of entries. Round the number of entries up to a |
51 | * power of 2 so we can do fast masking to get the index from | ||
52 | * the atomic index counter. | ||
51 | */ | 53 | */ |
52 | ktrace_t * | 54 | ktrace_t * |
53 | ktrace_alloc(int nentries, unsigned int __nocast sleep) | 55 | ktrace_alloc(int nentries, unsigned int __nocast sleep) |
54 | { | 56 | { |
55 | ktrace_t *ktp; | 57 | ktrace_t *ktp; |
56 | ktrace_entry_t *ktep; | 58 | ktrace_entry_t *ktep; |
59 | int entries; | ||
57 | 60 | ||
58 | ktp = (ktrace_t*)kmem_zone_alloc(ktrace_hdr_zone, sleep); | 61 | ktp = (ktrace_t*)kmem_zone_alloc(ktrace_hdr_zone, sleep); |
59 | 62 | ||
@@ -70,11 +73,12 @@ ktrace_alloc(int nentries, unsigned int __nocast sleep) | |||
70 | /* | 73 | /* |
71 | * Special treatment for buffers with the ktrace_zentries entries | 74 | * Special treatment for buffers with the ktrace_zentries entries |
72 | */ | 75 | */ |
73 | if (nentries == ktrace_zentries) { | 76 | entries = roundup_pow_of_two(nentries); |
77 | if (entries == ktrace_zentries) { | ||
74 | ktep = (ktrace_entry_t*)kmem_zone_zalloc(ktrace_ent_zone, | 78 | ktep = (ktrace_entry_t*)kmem_zone_zalloc(ktrace_ent_zone, |
75 | sleep); | 79 | sleep); |
76 | } else { | 80 | } else { |
77 | ktep = (ktrace_entry_t*)kmem_zalloc((nentries * sizeof(*ktep)), | 81 | ktep = (ktrace_entry_t*)kmem_zalloc((entries * sizeof(*ktep)), |
78 | sleep | KM_LARGE); | 82 | sleep | KM_LARGE); |
79 | } | 83 | } |
80 | 84 | ||
@@ -91,8 +95,10 @@ ktrace_alloc(int nentries, unsigned int __nocast sleep) | |||
91 | } | 95 | } |
92 | 96 | ||
93 | ktp->kt_entries = ktep; | 97 | ktp->kt_entries = ktep; |
94 | ktp->kt_nentries = nentries; | 98 | ktp->kt_nentries = entries; |
95 | ktp->kt_index = 0; | 99 | ASSERT(is_power_of_2(entries)); |
100 | ktp->kt_index_mask = entries - 1; | ||
101 | atomic_set(&ktp->kt_index, 0); | ||
96 | ktp->kt_rollover = 0; | 102 | ktp->kt_rollover = 0; |
97 | return ktp; | 103 | return ktp; |
98 | } | 104 | } |
@@ -151,8 +157,6 @@ ktrace_enter( | |||
151 | void *val14, | 157 | void *val14, |
152 | void *val15) | 158 | void *val15) |
153 | { | 159 | { |
154 | static DEFINE_SPINLOCK(wrap_lock); | ||
155 | unsigned long flags; | ||
156 | int index; | 160 | int index; |
157 | ktrace_entry_t *ktep; | 161 | ktrace_entry_t *ktep; |
158 | 162 | ||
@@ -161,12 +165,8 @@ ktrace_enter( | |||
161 | /* | 165 | /* |
162 | * Grab an entry by pushing the index up to the next one. | 166 | * Grab an entry by pushing the index up to the next one. |
163 | */ | 167 | */ |
164 | spin_lock_irqsave(&wrap_lock, flags); | 168 | index = atomic_add_return(1, &ktp->kt_index); |
165 | index = ktp->kt_index; | 169 | index = (index - 1) & ktp->kt_index_mask; |
166 | if (++ktp->kt_index == ktp->kt_nentries) | ||
167 | ktp->kt_index = 0; | ||
168 | spin_unlock_irqrestore(&wrap_lock, flags); | ||
169 | |||
170 | if (!ktp->kt_rollover && index == ktp->kt_nentries - 1) | 170 | if (!ktp->kt_rollover && index == ktp->kt_nentries - 1) |
171 | ktp->kt_rollover = 1; | 171 | ktp->kt_rollover = 1; |
172 | 172 | ||
@@ -199,11 +199,12 @@ int | |||
199 | ktrace_nentries( | 199 | ktrace_nentries( |
200 | ktrace_t *ktp) | 200 | ktrace_t *ktp) |
201 | { | 201 | { |
202 | if (ktp == NULL) { | 202 | int index; |
203 | if (ktp == NULL) | ||
203 | return 0; | 204 | return 0; |
204 | } | ||
205 | 205 | ||
206 | return (ktp->kt_rollover ? ktp->kt_nentries : ktp->kt_index); | 206 | index = atomic_read(&ktp->kt_index) & ktp->kt_index_mask; |
207 | return (ktp->kt_rollover ? ktp->kt_nentries : index); | ||
207 | } | 208 | } |
208 | 209 | ||
209 | /* | 210 | /* |
@@ -228,7 +229,7 @@ ktrace_first(ktrace_t *ktp, ktrace_snap_t *ktsp) | |||
228 | int nentries; | 229 | int nentries; |
229 | 230 | ||
230 | if (ktp->kt_rollover) | 231 | if (ktp->kt_rollover) |
231 | index = ktp->kt_index; | 232 | index = atomic_read(&ktp->kt_index) & ktp->kt_index_mask; |
232 | else | 233 | else |
233 | index = 0; | 234 | index = 0; |
234 | 235 | ||
diff --git a/fs/xfs/support/ktrace.h b/fs/xfs/support/ktrace.h index 56e72b40a859..741d6947ca60 100644 --- a/fs/xfs/support/ktrace.h +++ b/fs/xfs/support/ktrace.h | |||
@@ -30,7 +30,8 @@ typedef struct ktrace_entry { | |||
30 | */ | 30 | */ |
31 | typedef struct ktrace { | 31 | typedef struct ktrace { |
32 | int kt_nentries; /* number of entries in trace buf */ | 32 | int kt_nentries; /* number of entries in trace buf */ |
33 | int kt_index; /* current index in entries */ | 33 | atomic_t kt_index; /* current index in entries */ |
34 | unsigned int kt_index_mask; | ||
34 | int kt_rollover; | 35 | int kt_rollover; |
35 | ktrace_entry_t *kt_entries; /* buffer of entries */ | 36 | ktrace_entry_t *kt_entries; /* buffer of entries */ |
36 | } ktrace_t; | 37 | } ktrace_t; |
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h index 540e4c989825..765aaf65e2d3 100644 --- a/fs/xfs/xfs.h +++ b/fs/xfs/xfs.h | |||
@@ -22,7 +22,7 @@ | |||
22 | #define STATIC | 22 | #define STATIC |
23 | #define DEBUG 1 | 23 | #define DEBUG 1 |
24 | #define XFS_BUF_LOCK_TRACKING 1 | 24 | #define XFS_BUF_LOCK_TRACKING 1 |
25 | /* #define QUOTADEBUG 1 */ | 25 | #define QUOTADEBUG 1 |
26 | #endif | 26 | #endif |
27 | 27 | ||
28 | #ifdef CONFIG_XFS_TRACE | 28 | #ifdef CONFIG_XFS_TRACE |
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 7272fe39a92d..8e130b9720ae 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c | |||
@@ -307,12 +307,13 @@ xfs_acl_vset( | |||
307 | 307 | ||
308 | VN_HOLD(vp); | 308 | VN_HOLD(vp); |
309 | error = xfs_acl_allow_set(vp, kind); | 309 | error = xfs_acl_allow_set(vp, kind); |
310 | if (error) | ||
311 | goto out; | ||
312 | 310 | ||
313 | /* Incoming ACL exists, set file mode based on its value */ | 311 | /* Incoming ACL exists, set file mode based on its value */ |
314 | if (kind == _ACL_TYPE_ACCESS) | 312 | if (!error && kind == _ACL_TYPE_ACCESS) |
315 | xfs_acl_setmode(vp, xfs_acl, &basicperms); | 313 | error = xfs_acl_setmode(vp, xfs_acl, &basicperms); |
314 | |||
315 | if (error) | ||
316 | goto out; | ||
316 | 317 | ||
317 | /* | 318 | /* |
318 | * If we have more than std unix permissions, set up the actual attr. | 319 | * If we have more than std unix permissions, set up the actual attr. |
@@ -323,7 +324,7 @@ xfs_acl_vset( | |||
323 | if (!basicperms) { | 324 | if (!basicperms) { |
324 | xfs_acl_set_attr(vp, xfs_acl, kind, &error); | 325 | xfs_acl_set_attr(vp, xfs_acl, kind, &error); |
325 | } else { | 326 | } else { |
326 | xfs_acl_vremove(vp, _ACL_TYPE_ACCESS); | 327 | error = -xfs_acl_vremove(vp, _ACL_TYPE_ACCESS); |
327 | } | 328 | } |
328 | 329 | ||
329 | out: | 330 | out: |
@@ -707,7 +708,9 @@ xfs_acl_inherit( | |||
707 | 708 | ||
708 | memcpy(cacl, pdaclp, sizeof(xfs_acl_t)); | 709 | memcpy(cacl, pdaclp, sizeof(xfs_acl_t)); |
709 | xfs_acl_filter_mode(mode, cacl); | 710 | xfs_acl_filter_mode(mode, cacl); |
710 | xfs_acl_setmode(vp, cacl, &basicperms); | 711 | error = xfs_acl_setmode(vp, cacl, &basicperms); |
712 | if (error) | ||
713 | goto out_error; | ||
711 | 714 | ||
712 | /* | 715 | /* |
713 | * Set the Default and Access ACL on the file. The mode is already | 716 | * Set the Default and Access ACL on the file. The mode is already |
@@ -720,6 +723,7 @@ xfs_acl_inherit( | |||
720 | xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error); | 723 | xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error); |
721 | if (!error && !basicperms) | 724 | if (!error && !basicperms) |
722 | xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error); | 725 | xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error); |
726 | out_error: | ||
723 | _ACL_FREE(cacl); | 727 | _ACL_FREE(cacl); |
724 | return error; | 728 | return error; |
725 | } | 729 | } |
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index bdbfbbee4959..1956f83489f1 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c | |||
@@ -45,7 +45,7 @@ | |||
45 | #define XFSA_FIXUP_BNO_OK 1 | 45 | #define XFSA_FIXUP_BNO_OK 1 |
46 | #define XFSA_FIXUP_CNT_OK 2 | 46 | #define XFSA_FIXUP_CNT_OK 2 |
47 | 47 | ||
48 | STATIC int | 48 | STATIC void |
49 | xfs_alloc_search_busy(xfs_trans_t *tp, | 49 | xfs_alloc_search_busy(xfs_trans_t *tp, |
50 | xfs_agnumber_t agno, | 50 | xfs_agnumber_t agno, |
51 | xfs_agblock_t bno, | 51 | xfs_agblock_t bno, |
@@ -55,24 +55,24 @@ xfs_alloc_search_busy(xfs_trans_t *tp, | |||
55 | ktrace_t *xfs_alloc_trace_buf; | 55 | ktrace_t *xfs_alloc_trace_buf; |
56 | 56 | ||
57 | #define TRACE_ALLOC(s,a) \ | 57 | #define TRACE_ALLOC(s,a) \ |
58 | xfs_alloc_trace_alloc(__FUNCTION__, s, a, __LINE__) | 58 | xfs_alloc_trace_alloc(__func__, s, a, __LINE__) |
59 | #define TRACE_FREE(s,a,b,x,f) \ | 59 | #define TRACE_FREE(s,a,b,x,f) \ |
60 | xfs_alloc_trace_free(__FUNCTION__, s, mp, a, b, x, f, __LINE__) | 60 | xfs_alloc_trace_free(__func__, s, mp, a, b, x, f, __LINE__) |
61 | #define TRACE_MODAGF(s,a,f) \ | 61 | #define TRACE_MODAGF(s,a,f) \ |
62 | xfs_alloc_trace_modagf(__FUNCTION__, s, mp, a, f, __LINE__) | 62 | xfs_alloc_trace_modagf(__func__, s, mp, a, f, __LINE__) |
63 | #define TRACE_BUSY(__FUNCTION__,s,ag,agb,l,sl,tp) \ | 63 | #define TRACE_BUSY(__func__,s,ag,agb,l,sl,tp) \ |
64 | xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSY, __LINE__) | 64 | xfs_alloc_trace_busy(__func__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSY, __LINE__) |
65 | #define TRACE_UNBUSY(__FUNCTION__,s,ag,sl,tp) \ | 65 | #define TRACE_UNBUSY(__func__,s,ag,sl,tp) \ |
66 | xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, -1, -1, sl, tp, XFS_ALLOC_KTRACE_UNBUSY, __LINE__) | 66 | xfs_alloc_trace_busy(__func__, s, mp, ag, -1, -1, sl, tp, XFS_ALLOC_KTRACE_UNBUSY, __LINE__) |
67 | #define TRACE_BUSYSEARCH(__FUNCTION__,s,ag,agb,l,sl,tp) \ | 67 | #define TRACE_BUSYSEARCH(__func__,s,ag,agb,l,tp) \ |
68 | xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSYSEARCH, __LINE__) | 68 | xfs_alloc_trace_busy(__func__, s, mp, ag, agb, l, 0, tp, XFS_ALLOC_KTRACE_BUSYSEARCH, __LINE__) |
69 | #else | 69 | #else |
70 | #define TRACE_ALLOC(s,a) | 70 | #define TRACE_ALLOC(s,a) |
71 | #define TRACE_FREE(s,a,b,x,f) | 71 | #define TRACE_FREE(s,a,b,x,f) |
72 | #define TRACE_MODAGF(s,a,f) | 72 | #define TRACE_MODAGF(s,a,f) |
73 | #define TRACE_BUSY(s,a,ag,agb,l,sl,tp) | 73 | #define TRACE_BUSY(s,a,ag,agb,l,sl,tp) |
74 | #define TRACE_UNBUSY(fname,s,ag,sl,tp) | 74 | #define TRACE_UNBUSY(fname,s,ag,sl,tp) |
75 | #define TRACE_BUSYSEARCH(fname,s,ag,agb,l,sl,tp) | 75 | #define TRACE_BUSYSEARCH(fname,s,ag,agb,l,tp) |
76 | #endif /* XFS_ALLOC_TRACE */ | 76 | #endif /* XFS_ALLOC_TRACE */ |
77 | 77 | ||
78 | /* | 78 | /* |
@@ -93,7 +93,7 @@ STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, | |||
93 | * Compute aligned version of the found extent. | 93 | * Compute aligned version of the found extent. |
94 | * Takes alignment and min length into account. | 94 | * Takes alignment and min length into account. |
95 | */ | 95 | */ |
96 | STATIC int /* success (>= minlen) */ | 96 | STATIC void |
97 | xfs_alloc_compute_aligned( | 97 | xfs_alloc_compute_aligned( |
98 | xfs_agblock_t foundbno, /* starting block in found extent */ | 98 | xfs_agblock_t foundbno, /* starting block in found extent */ |
99 | xfs_extlen_t foundlen, /* length in found extent */ | 99 | xfs_extlen_t foundlen, /* length in found extent */ |
@@ -116,7 +116,6 @@ xfs_alloc_compute_aligned( | |||
116 | } | 116 | } |
117 | *resbno = bno; | 117 | *resbno = bno; |
118 | *reslen = len; | 118 | *reslen = len; |
119 | return len >= minlen; | ||
120 | } | 119 | } |
121 | 120 | ||
122 | /* | 121 | /* |
@@ -837,9 +836,9 @@ xfs_alloc_ag_vextent_near( | |||
837 | if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) | 836 | if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) |
838 | goto error0; | 837 | goto error0; |
839 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 838 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
840 | if (!xfs_alloc_compute_aligned(ltbno, ltlen, | 839 | xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment, |
841 | args->alignment, args->minlen, | 840 | args->minlen, <bnoa, <lena); |
842 | <bnoa, <lena)) | 841 | if (ltlena < args->minlen) |
843 | continue; | 842 | continue; |
844 | args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); | 843 | args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); |
845 | xfs_alloc_fix_len(args); | 844 | xfs_alloc_fix_len(args); |
@@ -958,9 +957,9 @@ xfs_alloc_ag_vextent_near( | |||
958 | if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) | 957 | if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) |
959 | goto error0; | 958 | goto error0; |
960 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 959 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
961 | if (xfs_alloc_compute_aligned(ltbno, ltlen, | 960 | xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment, |
962 | args->alignment, args->minlen, | 961 | args->minlen, <bnoa, <lena); |
963 | <bnoa, <lena)) | 962 | if (ltlena >= args->minlen) |
964 | break; | 963 | break; |
965 | if ((error = xfs_alloc_decrement(bno_cur_lt, 0, &i))) | 964 | if ((error = xfs_alloc_decrement(bno_cur_lt, 0, &i))) |
966 | goto error0; | 965 | goto error0; |
@@ -974,9 +973,9 @@ xfs_alloc_ag_vextent_near( | |||
974 | if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) | 973 | if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) |
975 | goto error0; | 974 | goto error0; |
976 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 975 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
977 | if (xfs_alloc_compute_aligned(gtbno, gtlen, | 976 | xfs_alloc_compute_aligned(gtbno, gtlen, args->alignment, |
978 | args->alignment, args->minlen, | 977 | args->minlen, >bnoa, >lena); |
979 | >bnoa, >lena)) | 978 | if (gtlena >= args->minlen) |
980 | break; | 979 | break; |
981 | if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i))) | 980 | if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i))) |
982 | goto error0; | 981 | goto error0; |
@@ -2562,9 +2561,10 @@ xfs_alloc_clear_busy(xfs_trans_t *tp, | |||
2562 | 2561 | ||
2563 | 2562 | ||
2564 | /* | 2563 | /* |
2565 | * returns non-zero if any of (agno,bno):len is in a busy list | 2564 | * If we find the extent in the busy list, force the log out to get the |
2565 | * extent out of the busy list so the caller can use it straight away. | ||
2566 | */ | 2566 | */ |
2567 | STATIC int | 2567 | STATIC void |
2568 | xfs_alloc_search_busy(xfs_trans_t *tp, | 2568 | xfs_alloc_search_busy(xfs_trans_t *tp, |
2569 | xfs_agnumber_t agno, | 2569 | xfs_agnumber_t agno, |
2570 | xfs_agblock_t bno, | 2570 | xfs_agblock_t bno, |
@@ -2572,7 +2572,6 @@ xfs_alloc_search_busy(xfs_trans_t *tp, | |||
2572 | { | 2572 | { |
2573 | xfs_mount_t *mp; | 2573 | xfs_mount_t *mp; |
2574 | xfs_perag_busy_t *bsy; | 2574 | xfs_perag_busy_t *bsy; |
2575 | int n; | ||
2576 | xfs_agblock_t uend, bend; | 2575 | xfs_agblock_t uend, bend; |
2577 | xfs_lsn_t lsn; | 2576 | xfs_lsn_t lsn; |
2578 | int cnt; | 2577 | int cnt; |
@@ -2585,21 +2584,18 @@ xfs_alloc_search_busy(xfs_trans_t *tp, | |||
2585 | uend = bno + len - 1; | 2584 | uend = bno + len - 1; |
2586 | 2585 | ||
2587 | /* search pagb_list for this slot, skipping open slots */ | 2586 | /* search pagb_list for this slot, skipping open slots */ |
2588 | for (bsy = mp->m_perag[agno].pagb_list, n = 0; | 2587 | for (bsy = mp->m_perag[agno].pagb_list; cnt; bsy++) { |
2589 | cnt; bsy++, n++) { | ||
2590 | 2588 | ||
2591 | /* | 2589 | /* |
2592 | * (start1,length1) within (start2, length2) | 2590 | * (start1,length1) within (start2, length2) |
2593 | */ | 2591 | */ |
2594 | if (bsy->busy_tp != NULL) { | 2592 | if (bsy->busy_tp != NULL) { |
2595 | bend = bsy->busy_start + bsy->busy_length - 1; | 2593 | bend = bsy->busy_start + bsy->busy_length - 1; |
2596 | if ((bno > bend) || | 2594 | if ((bno > bend) || (uend < bsy->busy_start)) { |
2597 | (uend < bsy->busy_start)) { | ||
2598 | cnt--; | 2595 | cnt--; |
2599 | } else { | 2596 | } else { |
2600 | TRACE_BUSYSEARCH("xfs_alloc_search_busy", | 2597 | TRACE_BUSYSEARCH("xfs_alloc_search_busy", |
2601 | "found1", agno, bno, len, n, | 2598 | "found1", agno, bno, len, tp); |
2602 | tp); | ||
2603 | break; | 2599 | break; |
2604 | } | 2600 | } |
2605 | } | 2601 | } |
@@ -2610,15 +2606,12 @@ xfs_alloc_search_busy(xfs_trans_t *tp, | |||
2610 | * transaction that freed the block | 2606 | * transaction that freed the block |
2611 | */ | 2607 | */ |
2612 | if (cnt) { | 2608 | if (cnt) { |
2613 | TRACE_BUSYSEARCH("xfs_alloc_search_busy", "found", agno, bno, len, n, tp); | 2609 | TRACE_BUSYSEARCH("xfs_alloc_search_busy", "found", agno, bno, len, tp); |
2614 | lsn = bsy->busy_tp->t_commit_lsn; | 2610 | lsn = bsy->busy_tp->t_commit_lsn; |
2615 | spin_unlock(&mp->m_perag[agno].pagb_lock); | 2611 | spin_unlock(&mp->m_perag[agno].pagb_lock); |
2616 | xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC); | 2612 | xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC); |
2617 | } else { | 2613 | } else { |
2618 | TRACE_BUSYSEARCH("xfs_alloc_search_busy", "not-found", agno, bno, len, n, tp); | 2614 | TRACE_BUSYSEARCH("xfs_alloc_search_busy", "not-found", agno, bno, len, tp); |
2619 | n = -1; | ||
2620 | spin_unlock(&mp->m_perag[agno].pagb_lock); | 2615 | spin_unlock(&mp->m_perag[agno].pagb_lock); |
2621 | } | 2616 | } |
2622 | |||
2623 | return n; | ||
2624 | } | 2617 | } |
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index e58f321fdae9..36d781ee5fcc 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c | |||
@@ -2647,14 +2647,6 @@ attr_trusted_capable( | |||
2647 | } | 2647 | } |
2648 | 2648 | ||
2649 | STATIC int | 2649 | STATIC int |
2650 | attr_secure_capable( | ||
2651 | bhv_vnode_t *vp, | ||
2652 | cred_t *cred) | ||
2653 | { | ||
2654 | return -ENOSECURITY; | ||
2655 | } | ||
2656 | |||
2657 | STATIC int | ||
2658 | attr_system_set( | 2650 | attr_system_set( |
2659 | bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags) | 2651 | bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags) |
2660 | { | 2652 | { |
@@ -2724,7 +2716,7 @@ struct attrnames attr_secure = { | |||
2724 | .attr_get = attr_generic_get, | 2716 | .attr_get = attr_generic_get, |
2725 | .attr_set = attr_generic_set, | 2717 | .attr_set = attr_generic_set, |
2726 | .attr_remove = attr_generic_remove, | 2718 | .attr_remove = attr_generic_remove, |
2727 | .attr_capable = attr_secure_capable, | 2719 | .attr_capable = (attrcapable_t)fs_noerr, |
2728 | }; | 2720 | }; |
2729 | 2721 | ||
2730 | struct attrnames attr_user = { | 2722 | struct attrnames attr_user = { |
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index 96ba6aa4ed8c..303d41e4217b 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c | |||
@@ -166,7 +166,7 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes) | |||
166 | 166 | ||
167 | if (!(mp->m_flags & XFS_MOUNT_ATTR2)) { | 167 | if (!(mp->m_flags & XFS_MOUNT_ATTR2)) { |
168 | if (bytes <= XFS_IFORK_ASIZE(dp)) | 168 | if (bytes <= XFS_IFORK_ASIZE(dp)) |
169 | return mp->m_attroffset >> 3; | 169 | return dp->i_d.di_forkoff; |
170 | return 0; | 170 | return 0; |
171 | } | 171 | } |
172 | 172 | ||
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 2def273855a2..eb198c01c35d 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
@@ -323,13 +323,13 @@ xfs_bmap_trace_pre_update( | |||
323 | int whichfork); /* data or attr fork */ | 323 | int whichfork); /* data or attr fork */ |
324 | 324 | ||
325 | #define XFS_BMAP_TRACE_DELETE(d,ip,i,c,w) \ | 325 | #define XFS_BMAP_TRACE_DELETE(d,ip,i,c,w) \ |
326 | xfs_bmap_trace_delete(__FUNCTION__,d,ip,i,c,w) | 326 | xfs_bmap_trace_delete(__func__,d,ip,i,c,w) |
327 | #define XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w) \ | 327 | #define XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w) \ |
328 | xfs_bmap_trace_insert(__FUNCTION__,d,ip,i,c,r1,r2,w) | 328 | xfs_bmap_trace_insert(__func__,d,ip,i,c,r1,r2,w) |
329 | #define XFS_BMAP_TRACE_POST_UPDATE(d,ip,i,w) \ | 329 | #define XFS_BMAP_TRACE_POST_UPDATE(d,ip,i,w) \ |
330 | xfs_bmap_trace_post_update(__FUNCTION__,d,ip,i,w) | 330 | xfs_bmap_trace_post_update(__func__,d,ip,i,w) |
331 | #define XFS_BMAP_TRACE_PRE_UPDATE(d,ip,i,w) \ | 331 | #define XFS_BMAP_TRACE_PRE_UPDATE(d,ip,i,w) \ |
332 | xfs_bmap_trace_pre_update(__FUNCTION__,d,ip,i,w) | 332 | xfs_bmap_trace_pre_update(__func__,d,ip,i,w) |
333 | #else | 333 | #else |
334 | #define XFS_BMAP_TRACE_DELETE(d,ip,i,c,w) | 334 | #define XFS_BMAP_TRACE_DELETE(d,ip,i,c,w) |
335 | #define XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w) | 335 | #define XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w) |
@@ -2402,7 +2402,7 @@ xfs_bmap_extsize_align( | |||
2402 | 2402 | ||
2403 | #define XFS_ALLOC_GAP_UNITS 4 | 2403 | #define XFS_ALLOC_GAP_UNITS 4 |
2404 | 2404 | ||
2405 | STATIC int | 2405 | STATIC void |
2406 | xfs_bmap_adjacent( | 2406 | xfs_bmap_adjacent( |
2407 | xfs_bmalloca_t *ap) /* bmap alloc argument struct */ | 2407 | xfs_bmalloca_t *ap) /* bmap alloc argument struct */ |
2408 | { | 2408 | { |
@@ -2548,7 +2548,6 @@ xfs_bmap_adjacent( | |||
2548 | ap->rval = gotbno; | 2548 | ap->rval = gotbno; |
2549 | } | 2549 | } |
2550 | #undef ISVALID | 2550 | #undef ISVALID |
2551 | return 0; | ||
2552 | } | 2551 | } |
2553 | 2552 | ||
2554 | STATIC int | 2553 | STATIC int |
@@ -4154,16 +4153,21 @@ xfs_bmap_compute_maxlevels( | |||
4154 | * number of leaf entries, is controlled by the type of di_nextents | 4153 | * number of leaf entries, is controlled by the type of di_nextents |
4155 | * (a signed 32-bit number, xfs_extnum_t), or by di_anextents | 4154 | * (a signed 32-bit number, xfs_extnum_t), or by di_anextents |
4156 | * (a signed 16-bit number, xfs_aextnum_t). | 4155 | * (a signed 16-bit number, xfs_aextnum_t). |
4156 | * | ||
4157 | * Note that we can no longer assume that if we are in ATTR1 that | ||
4158 | * the fork offset of all the inodes will be (m_attroffset >> 3) | ||
4159 | * because we could have mounted with ATTR2 and then mounted back | ||
4160 | * with ATTR1, keeping the di_forkoff's fixed but probably at | ||
4161 | * various positions. Therefore, for both ATTR1 and ATTR2 | ||
4162 | * we have to assume the worst case scenario of a minimum size | ||
4163 | * available. | ||
4157 | */ | 4164 | */ |
4158 | if (whichfork == XFS_DATA_FORK) { | 4165 | if (whichfork == XFS_DATA_FORK) { |
4159 | maxleafents = MAXEXTNUM; | 4166 | maxleafents = MAXEXTNUM; |
4160 | sz = (mp->m_flags & XFS_MOUNT_ATTR2) ? | 4167 | sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS); |
4161 | XFS_BMDR_SPACE_CALC(MINDBTPTRS) : mp->m_attroffset; | ||
4162 | } else { | 4168 | } else { |
4163 | maxleafents = MAXAEXTNUM; | 4169 | maxleafents = MAXAEXTNUM; |
4164 | sz = (mp->m_flags & XFS_MOUNT_ATTR2) ? | 4170 | sz = XFS_BMDR_SPACE_CALC(MINABTPTRS); |
4165 | XFS_BMDR_SPACE_CALC(MINABTPTRS) : | ||
4166 | mp->m_sb.sb_inodesize - mp->m_attroffset; | ||
4167 | } | 4171 | } |
4168 | maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0); | 4172 | maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0); |
4169 | minleafrecs = mp->m_bmap_dmnr[0]; | 4173 | minleafrecs = mp->m_bmap_dmnr[0]; |
@@ -5772,7 +5776,6 @@ xfs_getbmap( | |||
5772 | int error; /* return value */ | 5776 | int error; /* return value */ |
5773 | __int64_t fixlen; /* length for -1 case */ | 5777 | __int64_t fixlen; /* length for -1 case */ |
5774 | int i; /* extent number */ | 5778 | int i; /* extent number */ |
5775 | bhv_vnode_t *vp; /* corresponding vnode */ | ||
5776 | int lock; /* lock state */ | 5779 | int lock; /* lock state */ |
5777 | xfs_bmbt_irec_t *map; /* buffer for user's data */ | 5780 | xfs_bmbt_irec_t *map; /* buffer for user's data */ |
5778 | xfs_mount_t *mp; /* file system mount point */ | 5781 | xfs_mount_t *mp; /* file system mount point */ |
@@ -5789,7 +5792,6 @@ xfs_getbmap( | |||
5789 | int bmapi_flags; /* flags for xfs_bmapi */ | 5792 | int bmapi_flags; /* flags for xfs_bmapi */ |
5790 | __int32_t oflags; /* getbmapx bmv_oflags field */ | 5793 | __int32_t oflags; /* getbmapx bmv_oflags field */ |
5791 | 5794 | ||
5792 | vp = XFS_ITOV(ip); | ||
5793 | mp = ip->i_mount; | 5795 | mp = ip->i_mount; |
5794 | 5796 | ||
5795 | whichfork = interface & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK; | 5797 | whichfork = interface & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK; |
@@ -5811,7 +5813,7 @@ xfs_getbmap( | |||
5811 | if ((interface & BMV_IF_NO_DMAPI_READ) == 0 && | 5813 | if ((interface & BMV_IF_NO_DMAPI_READ) == 0 && |
5812 | DM_EVENT_ENABLED(ip, DM_EVENT_READ) && | 5814 | DM_EVENT_ENABLED(ip, DM_EVENT_READ) && |
5813 | whichfork == XFS_DATA_FORK) { | 5815 | whichfork == XFS_DATA_FORK) { |
5814 | error = XFS_SEND_DATA(mp, DM_EVENT_READ, vp, 0, 0, 0, NULL); | 5816 | error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL); |
5815 | if (error) | 5817 | if (error) |
5816 | return XFS_ERROR(error); | 5818 | return XFS_ERROR(error); |
5817 | } | 5819 | } |
@@ -5869,6 +5871,10 @@ xfs_getbmap( | |||
5869 | /* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */ | 5871 | /* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */ |
5870 | error = xfs_flush_pages(ip, (xfs_off_t)0, | 5872 | error = xfs_flush_pages(ip, (xfs_off_t)0, |
5871 | -1, 0, FI_REMAPF); | 5873 | -1, 0, FI_REMAPF); |
5874 | if (error) { | ||
5875 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | ||
5876 | return error; | ||
5877 | } | ||
5872 | } | 5878 | } |
5873 | 5879 | ||
5874 | ASSERT(whichfork == XFS_ATTR_FORK || ip->i_delayed_blks == 0); | 5880 | ASSERT(whichfork == XFS_ATTR_FORK || ip->i_delayed_blks == 0); |
@@ -6162,10 +6168,10 @@ xfs_check_block( | |||
6162 | } | 6168 | } |
6163 | if (*thispa == *pp) { | 6169 | if (*thispa == *pp) { |
6164 | cmn_err(CE_WARN, "%s: thispa(%d) == pp(%d) %Ld", | 6170 | cmn_err(CE_WARN, "%s: thispa(%d) == pp(%d) %Ld", |
6165 | __FUNCTION__, j, i, | 6171 | __func__, j, i, |
6166 | (unsigned long long)be64_to_cpu(*thispa)); | 6172 | (unsigned long long)be64_to_cpu(*thispa)); |
6167 | panic("%s: ptrs are equal in node\n", | 6173 | panic("%s: ptrs are equal in node\n", |
6168 | __FUNCTION__); | 6174 | __func__); |
6169 | } | 6175 | } |
6170 | } | 6176 | } |
6171 | } | 6177 | } |
@@ -6192,7 +6198,7 @@ xfs_bmap_check_leaf_extents( | |||
6192 | xfs_mount_t *mp; /* file system mount structure */ | 6198 | xfs_mount_t *mp; /* file system mount structure */ |
6193 | __be64 *pp; /* pointer to block address */ | 6199 | __be64 *pp; /* pointer to block address */ |
6194 | xfs_bmbt_rec_t *ep; /* pointer to current extent */ | 6200 | xfs_bmbt_rec_t *ep; /* pointer to current extent */ |
6195 | xfs_bmbt_rec_t *lastp; /* pointer to previous extent */ | 6201 | xfs_bmbt_rec_t last = {0, 0}; /* last extent in prev block */ |
6196 | xfs_bmbt_rec_t *nextp; /* pointer to next extent */ | 6202 | xfs_bmbt_rec_t *nextp; /* pointer to next extent */ |
6197 | int bp_release = 0; | 6203 | int bp_release = 0; |
6198 | 6204 | ||
@@ -6262,7 +6268,6 @@ xfs_bmap_check_leaf_extents( | |||
6262 | /* | 6268 | /* |
6263 | * Loop over all leaf nodes checking that all extents are in the right order. | 6269 | * Loop over all leaf nodes checking that all extents are in the right order. |
6264 | */ | 6270 | */ |
6265 | lastp = NULL; | ||
6266 | for (;;) { | 6271 | for (;;) { |
6267 | xfs_fsblock_t nextbno; | 6272 | xfs_fsblock_t nextbno; |
6268 | xfs_extnum_t num_recs; | 6273 | xfs_extnum_t num_recs; |
@@ -6283,18 +6288,16 @@ xfs_bmap_check_leaf_extents( | |||
6283 | */ | 6288 | */ |
6284 | 6289 | ||
6285 | ep = XFS_BTREE_REC_ADDR(xfs_bmbt, block, 1); | 6290 | ep = XFS_BTREE_REC_ADDR(xfs_bmbt, block, 1); |
6291 | if (i) { | ||
6292 | xfs_btree_check_rec(XFS_BTNUM_BMAP, &last, ep); | ||
6293 | } | ||
6286 | for (j = 1; j < num_recs; j++) { | 6294 | for (j = 1; j < num_recs; j++) { |
6287 | nextp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, j + 1); | 6295 | nextp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, j + 1); |
6288 | if (lastp) { | 6296 | xfs_btree_check_rec(XFS_BTNUM_BMAP, ep, nextp); |
6289 | xfs_btree_check_rec(XFS_BTNUM_BMAP, | ||
6290 | (void *)lastp, (void *)ep); | ||
6291 | } | ||
6292 | xfs_btree_check_rec(XFS_BTNUM_BMAP, (void *)ep, | ||
6293 | (void *)(nextp)); | ||
6294 | lastp = ep; | ||
6295 | ep = nextp; | 6297 | ep = nextp; |
6296 | } | 6298 | } |
6297 | 6299 | ||
6300 | last = *ep; | ||
6298 | i += num_recs; | 6301 | i += num_recs; |
6299 | if (bp_release) { | 6302 | if (bp_release) { |
6300 | bp_release = 0; | 6303 | bp_release = 0; |
@@ -6325,13 +6328,13 @@ xfs_bmap_check_leaf_extents( | |||
6325 | return; | 6328 | return; |
6326 | 6329 | ||
6327 | error0: | 6330 | error0: |
6328 | cmn_err(CE_WARN, "%s: at error0", __FUNCTION__); | 6331 | cmn_err(CE_WARN, "%s: at error0", __func__); |
6329 | if (bp_release) | 6332 | if (bp_release) |
6330 | xfs_trans_brelse(NULL, bp); | 6333 | xfs_trans_brelse(NULL, bp); |
6331 | error_norelse: | 6334 | error_norelse: |
6332 | cmn_err(CE_WARN, "%s: BAD after btree leaves for %d extents", | 6335 | cmn_err(CE_WARN, "%s: BAD after btree leaves for %d extents", |
6333 | __FUNCTION__, i); | 6336 | __func__, i); |
6334 | panic("%s: CORRUPTED BTREE OR SOMETHING", __FUNCTION__); | 6337 | panic("%s: CORRUPTED BTREE OR SOMETHING", __func__); |
6335 | return; | 6338 | return; |
6336 | } | 6339 | } |
6337 | #endif | 6340 | #endif |
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 87224b7d7984..6ff70cda451c 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h | |||
@@ -151,7 +151,7 @@ xfs_bmap_trace_exlist( | |||
151 | xfs_extnum_t cnt, /* count of entries in list */ | 151 | xfs_extnum_t cnt, /* count of entries in list */ |
152 | int whichfork); /* data or attr fork */ | 152 | int whichfork); /* data or attr fork */ |
153 | #define XFS_BMAP_TRACE_EXLIST(ip,c,w) \ | 153 | #define XFS_BMAP_TRACE_EXLIST(ip,c,w) \ |
154 | xfs_bmap_trace_exlist(__FUNCTION__,ip,c,w) | 154 | xfs_bmap_trace_exlist(__func__,ip,c,w) |
155 | #else | 155 | #else |
156 | #define XFS_BMAP_TRACE_EXLIST(ip,c,w) | 156 | #define XFS_BMAP_TRACE_EXLIST(ip,c,w) |
157 | #endif | 157 | #endif |
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index bd18987326a3..4f0e849d973e 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c | |||
@@ -275,21 +275,21 @@ xfs_bmbt_trace_cursor( | |||
275 | } | 275 | } |
276 | 276 | ||
277 | #define XFS_BMBT_TRACE_ARGBI(c,b,i) \ | 277 | #define XFS_BMBT_TRACE_ARGBI(c,b,i) \ |
278 | xfs_bmbt_trace_argbi(__FUNCTION__, c, b, i, __LINE__) | 278 | xfs_bmbt_trace_argbi(__func__, c, b, i, __LINE__) |
279 | #define XFS_BMBT_TRACE_ARGBII(c,b,i,j) \ | 279 | #define XFS_BMBT_TRACE_ARGBII(c,b,i,j) \ |
280 | xfs_bmbt_trace_argbii(__FUNCTION__, c, b, i, j, __LINE__) | 280 | xfs_bmbt_trace_argbii(__func__, c, b, i, j, __LINE__) |
281 | #define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j) \ | 281 | #define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j) \ |
282 | xfs_bmbt_trace_argfffi(__FUNCTION__, c, o, b, i, j, __LINE__) | 282 | xfs_bmbt_trace_argfffi(__func__, c, o, b, i, j, __LINE__) |
283 | #define XFS_BMBT_TRACE_ARGI(c,i) \ | 283 | #define XFS_BMBT_TRACE_ARGI(c,i) \ |
284 | xfs_bmbt_trace_argi(__FUNCTION__, c, i, __LINE__) | 284 | xfs_bmbt_trace_argi(__func__, c, i, __LINE__) |
285 | #define XFS_BMBT_TRACE_ARGIFK(c,i,f,s) \ | 285 | #define XFS_BMBT_TRACE_ARGIFK(c,i,f,s) \ |
286 | xfs_bmbt_trace_argifk(__FUNCTION__, c, i, f, s, __LINE__) | 286 | xfs_bmbt_trace_argifk(__func__, c, i, f, s, __LINE__) |
287 | #define XFS_BMBT_TRACE_ARGIFR(c,i,f,r) \ | 287 | #define XFS_BMBT_TRACE_ARGIFR(c,i,f,r) \ |
288 | xfs_bmbt_trace_argifr(__FUNCTION__, c, i, f, r, __LINE__) | 288 | xfs_bmbt_trace_argifr(__func__, c, i, f, r, __LINE__) |
289 | #define XFS_BMBT_TRACE_ARGIK(c,i,k) \ | 289 | #define XFS_BMBT_TRACE_ARGIK(c,i,k) \ |
290 | xfs_bmbt_trace_argik(__FUNCTION__, c, i, k, __LINE__) | 290 | xfs_bmbt_trace_argik(__func__, c, i, k, __LINE__) |
291 | #define XFS_BMBT_TRACE_CURSOR(c,s) \ | 291 | #define XFS_BMBT_TRACE_CURSOR(c,s) \ |
292 | xfs_bmbt_trace_cursor(__FUNCTION__, c, s, __LINE__) | 292 | xfs_bmbt_trace_cursor(__func__, c, s, __LINE__) |
293 | #else | 293 | #else |
294 | #define XFS_BMBT_TRACE_ARGBI(c,b,i) | 294 | #define XFS_BMBT_TRACE_ARGBI(c,b,i) |
295 | #define XFS_BMBT_TRACE_ARGBII(c,b,i,j) | 295 | #define XFS_BMBT_TRACE_ARGBII(c,b,i,j) |
@@ -2027,6 +2027,24 @@ xfs_bmbt_increment( | |||
2027 | 2027 | ||
2028 | /* | 2028 | /* |
2029 | * Insert the current record at the point referenced by cur. | 2029 | * Insert the current record at the point referenced by cur. |
2030 | * | ||
2031 | * A multi-level split of the tree on insert will invalidate the original | ||
2032 | * cursor. It appears, however, that some callers assume that the cursor is | ||
2033 | * always valid. Hence if we do a multi-level split we need to revalidate the | ||
2034 | * cursor. | ||
2035 | * | ||
2036 | * When a split occurs, we will see a new cursor returned. Use that as a | ||
2037 | * trigger to determine if we need to revalidate the original cursor. If we get | ||
2038 | * a split, then use the original irec to lookup up the path of the record we | ||
2039 | * just inserted. | ||
2040 | * | ||
2041 | * Note that the fact that the btree root is in the inode means that we can | ||
2042 | * have the level of the tree change without a "split" occurring at the root | ||
2043 | * level. What happens is that the root is migrated to an allocated block and | ||
2044 | * the inode root is pointed to it. This means a single split can change the | ||
2045 | * level of the tree (level 2 -> level 3) and invalidate the old cursor. Hence | ||
2046 | * the level change should be accounted as a split so as to correctly trigger a | ||
2047 | * revalidation of the old cursor. | ||
2030 | */ | 2048 | */ |
2031 | int /* error */ | 2049 | int /* error */ |
2032 | xfs_bmbt_insert( | 2050 | xfs_bmbt_insert( |
@@ -2039,11 +2057,14 @@ xfs_bmbt_insert( | |||
2039 | xfs_fsblock_t nbno; | 2057 | xfs_fsblock_t nbno; |
2040 | xfs_btree_cur_t *ncur; | 2058 | xfs_btree_cur_t *ncur; |
2041 | xfs_bmbt_rec_t nrec; | 2059 | xfs_bmbt_rec_t nrec; |
2060 | xfs_bmbt_irec_t oirec; /* original irec */ | ||
2042 | xfs_btree_cur_t *pcur; | 2061 | xfs_btree_cur_t *pcur; |
2062 | int splits = 0; | ||
2043 | 2063 | ||
2044 | XFS_BMBT_TRACE_CURSOR(cur, ENTRY); | 2064 | XFS_BMBT_TRACE_CURSOR(cur, ENTRY); |
2045 | level = 0; | 2065 | level = 0; |
2046 | nbno = NULLFSBLOCK; | 2066 | nbno = NULLFSBLOCK; |
2067 | oirec = cur->bc_rec.b; | ||
2047 | xfs_bmbt_disk_set_all(&nrec, &cur->bc_rec.b); | 2068 | xfs_bmbt_disk_set_all(&nrec, &cur->bc_rec.b); |
2048 | ncur = NULL; | 2069 | ncur = NULL; |
2049 | pcur = cur; | 2070 | pcur = cur; |
@@ -2052,11 +2073,13 @@ xfs_bmbt_insert( | |||
2052 | &i))) { | 2073 | &i))) { |
2053 | if (pcur != cur) | 2074 | if (pcur != cur) |
2054 | xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); | 2075 | xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); |
2055 | XFS_BMBT_TRACE_CURSOR(cur, ERROR); | 2076 | goto error0; |
2056 | return error; | ||
2057 | } | 2077 | } |
2058 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 2078 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
2059 | if (pcur != cur && (ncur || nbno == NULLFSBLOCK)) { | 2079 | if (pcur != cur && (ncur || nbno == NULLFSBLOCK)) { |
2080 | /* allocating a new root is effectively a split */ | ||
2081 | if (cur->bc_nlevels != pcur->bc_nlevels) | ||
2082 | splits++; | ||
2060 | cur->bc_nlevels = pcur->bc_nlevels; | 2083 | cur->bc_nlevels = pcur->bc_nlevels; |
2061 | cur->bc_private.b.allocated += | 2084 | cur->bc_private.b.allocated += |
2062 | pcur->bc_private.b.allocated; | 2085 | pcur->bc_private.b.allocated; |
@@ -2070,10 +2093,21 @@ xfs_bmbt_insert( | |||
2070 | xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); | 2093 | xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); |
2071 | } | 2094 | } |
2072 | if (ncur) { | 2095 | if (ncur) { |
2096 | splits++; | ||
2073 | pcur = ncur; | 2097 | pcur = ncur; |
2074 | ncur = NULL; | 2098 | ncur = NULL; |
2075 | } | 2099 | } |
2076 | } while (nbno != NULLFSBLOCK); | 2100 | } while (nbno != NULLFSBLOCK); |
2101 | |||
2102 | if (splits > 1) { | ||
2103 | /* revalidate the old cursor as we had a multi-level split */ | ||
2104 | error = xfs_bmbt_lookup_eq(cur, oirec.br_startoff, | ||
2105 | oirec.br_startblock, oirec.br_blockcount, &i); | ||
2106 | if (error) | ||
2107 | goto error0; | ||
2108 | ASSERT(i == 1); | ||
2109 | } | ||
2110 | |||
2077 | XFS_BMBT_TRACE_CURSOR(cur, EXIT); | 2111 | XFS_BMBT_TRACE_CURSOR(cur, EXIT); |
2078 | *stat = i; | 2112 | *stat = i; |
2079 | return 0; | 2113 | return 0; |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 63debd147eb5..53a71c62025d 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -645,7 +645,12 @@ xfs_buf_item_push( | |||
645 | bp = bip->bli_buf; | 645 | bp = bip->bli_buf; |
646 | 646 | ||
647 | if (XFS_BUF_ISDELAYWRITE(bp)) { | 647 | if (XFS_BUF_ISDELAYWRITE(bp)) { |
648 | xfs_bawrite(bip->bli_item.li_mountp, bp); | 648 | int error; |
649 | error = xfs_bawrite(bip->bli_item.li_mountp, bp); | ||
650 | if (error) | ||
651 | xfs_fs_cmn_err(CE_WARN, bip->bli_item.li_mountp, | ||
652 | "xfs_buf_item_push: pushbuf error %d on bip %p, bp %p", | ||
653 | error, bip, bp); | ||
649 | } else { | 654 | } else { |
650 | xfs_buf_relse(bp); | 655 | xfs_buf_relse(bp); |
651 | } | 656 | } |
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index e92e73f0e6af..7cb26529766b 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c | |||
@@ -44,6 +44,7 @@ | |||
44 | #include "xfs_error.h" | 44 | #include "xfs_error.h" |
45 | #include "xfs_vnodeops.h" | 45 | #include "xfs_vnodeops.h" |
46 | 46 | ||
47 | struct xfs_name xfs_name_dotdot = {"..", 2}; | ||
47 | 48 | ||
48 | void | 49 | void |
49 | xfs_dir_mount( | 50 | xfs_dir_mount( |
@@ -146,8 +147,7 @@ int | |||
146 | xfs_dir_createname( | 147 | xfs_dir_createname( |
147 | xfs_trans_t *tp, | 148 | xfs_trans_t *tp, |
148 | xfs_inode_t *dp, | 149 | xfs_inode_t *dp, |
149 | char *name, | 150 | struct xfs_name *name, |
150 | int namelen, | ||
151 | xfs_ino_t inum, /* new entry inode number */ | 151 | xfs_ino_t inum, /* new entry inode number */ |
152 | xfs_fsblock_t *first, /* bmap's firstblock */ | 152 | xfs_fsblock_t *first, /* bmap's firstblock */ |
153 | xfs_bmap_free_t *flist, /* bmap's freeblock list */ | 153 | xfs_bmap_free_t *flist, /* bmap's freeblock list */ |
@@ -162,9 +162,9 @@ xfs_dir_createname( | |||
162 | return rval; | 162 | return rval; |
163 | XFS_STATS_INC(xs_dir_create); | 163 | XFS_STATS_INC(xs_dir_create); |
164 | 164 | ||
165 | args.name = name; | 165 | args.name = name->name; |
166 | args.namelen = namelen; | 166 | args.namelen = name->len; |
167 | args.hashval = xfs_da_hashname(name, namelen); | 167 | args.hashval = xfs_da_hashname(name->name, name->len); |
168 | args.inumber = inum; | 168 | args.inumber = inum; |
169 | args.dp = dp; | 169 | args.dp = dp; |
170 | args.firstblock = first; | 170 | args.firstblock = first; |
@@ -197,8 +197,7 @@ int | |||
197 | xfs_dir_lookup( | 197 | xfs_dir_lookup( |
198 | xfs_trans_t *tp, | 198 | xfs_trans_t *tp, |
199 | xfs_inode_t *dp, | 199 | xfs_inode_t *dp, |
200 | char *name, | 200 | struct xfs_name *name, |
201 | int namelen, | ||
202 | xfs_ino_t *inum) /* out: inode number */ | 201 | xfs_ino_t *inum) /* out: inode number */ |
203 | { | 202 | { |
204 | xfs_da_args_t args; | 203 | xfs_da_args_t args; |
@@ -207,18 +206,14 @@ xfs_dir_lookup( | |||
207 | 206 | ||
208 | ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); | 207 | ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); |
209 | XFS_STATS_INC(xs_dir_lookup); | 208 | XFS_STATS_INC(xs_dir_lookup); |
209 | memset(&args, 0, sizeof(xfs_da_args_t)); | ||
210 | 210 | ||
211 | args.name = name; | 211 | args.name = name->name; |
212 | args.namelen = namelen; | 212 | args.namelen = name->len; |
213 | args.hashval = xfs_da_hashname(name, namelen); | 213 | args.hashval = xfs_da_hashname(name->name, name->len); |
214 | args.inumber = 0; | ||
215 | args.dp = dp; | 214 | args.dp = dp; |
216 | args.firstblock = NULL; | ||
217 | args.flist = NULL; | ||
218 | args.total = 0; | ||
219 | args.whichfork = XFS_DATA_FORK; | 215 | args.whichfork = XFS_DATA_FORK; |
220 | args.trans = tp; | 216 | args.trans = tp; |
221 | args.justcheck = args.addname = 0; | ||
222 | args.oknoent = 1; | 217 | args.oknoent = 1; |
223 | 218 | ||
224 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) | 219 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) |
@@ -247,8 +242,7 @@ int | |||
247 | xfs_dir_removename( | 242 | xfs_dir_removename( |
248 | xfs_trans_t *tp, | 243 | xfs_trans_t *tp, |
249 | xfs_inode_t *dp, | 244 | xfs_inode_t *dp, |
250 | char *name, | 245 | struct xfs_name *name, |
251 | int namelen, | ||
252 | xfs_ino_t ino, | 246 | xfs_ino_t ino, |
253 | xfs_fsblock_t *first, /* bmap's firstblock */ | 247 | xfs_fsblock_t *first, /* bmap's firstblock */ |
254 | xfs_bmap_free_t *flist, /* bmap's freeblock list */ | 248 | xfs_bmap_free_t *flist, /* bmap's freeblock list */ |
@@ -261,9 +255,9 @@ xfs_dir_removename( | |||
261 | ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); | 255 | ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); |
262 | XFS_STATS_INC(xs_dir_remove); | 256 | XFS_STATS_INC(xs_dir_remove); |
263 | 257 | ||
264 | args.name = name; | 258 | args.name = name->name; |
265 | args.namelen = namelen; | 259 | args.namelen = name->len; |
266 | args.hashval = xfs_da_hashname(name, namelen); | 260 | args.hashval = xfs_da_hashname(name->name, name->len); |
267 | args.inumber = ino; | 261 | args.inumber = ino; |
268 | args.dp = dp; | 262 | args.dp = dp; |
269 | args.firstblock = first; | 263 | args.firstblock = first; |
@@ -329,8 +323,7 @@ int | |||
329 | xfs_dir_replace( | 323 | xfs_dir_replace( |
330 | xfs_trans_t *tp, | 324 | xfs_trans_t *tp, |
331 | xfs_inode_t *dp, | 325 | xfs_inode_t *dp, |
332 | char *name, /* name of entry to replace */ | 326 | struct xfs_name *name, /* name of entry to replace */ |
333 | int namelen, | ||
334 | xfs_ino_t inum, /* new inode number */ | 327 | xfs_ino_t inum, /* new inode number */ |
335 | xfs_fsblock_t *first, /* bmap's firstblock */ | 328 | xfs_fsblock_t *first, /* bmap's firstblock */ |
336 | xfs_bmap_free_t *flist, /* bmap's freeblock list */ | 329 | xfs_bmap_free_t *flist, /* bmap's freeblock list */ |
@@ -345,9 +338,9 @@ xfs_dir_replace( | |||
345 | if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) | 338 | if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) |
346 | return rval; | 339 | return rval; |
347 | 340 | ||
348 | args.name = name; | 341 | args.name = name->name; |
349 | args.namelen = namelen; | 342 | args.namelen = name->len; |
350 | args.hashval = xfs_da_hashname(name, namelen); | 343 | args.hashval = xfs_da_hashname(name->name, name->len); |
351 | args.inumber = inum; | 344 | args.inumber = inum; |
352 | args.dp = dp; | 345 | args.dp = dp; |
353 | args.firstblock = first; | 346 | args.firstblock = first; |
@@ -374,28 +367,29 @@ xfs_dir_replace( | |||
374 | 367 | ||
375 | /* | 368 | /* |
376 | * See if this entry can be added to the directory without allocating space. | 369 | * See if this entry can be added to the directory without allocating space. |
370 | * First checks that the caller couldn't reserve enough space (resblks = 0). | ||
377 | */ | 371 | */ |
378 | int | 372 | int |
379 | xfs_dir_canenter( | 373 | xfs_dir_canenter( |
380 | xfs_trans_t *tp, | 374 | xfs_trans_t *tp, |
381 | xfs_inode_t *dp, | 375 | xfs_inode_t *dp, |
382 | char *name, /* name of entry to add */ | 376 | struct xfs_name *name, /* name of entry to add */ |
383 | int namelen) | 377 | uint resblks) |
384 | { | 378 | { |
385 | xfs_da_args_t args; | 379 | xfs_da_args_t args; |
386 | int rval; | 380 | int rval; |
387 | int v; /* type-checking value */ | 381 | int v; /* type-checking value */ |
388 | 382 | ||
383 | if (resblks) | ||
384 | return 0; | ||
385 | |||
389 | ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); | 386 | ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); |
387 | memset(&args, 0, sizeof(xfs_da_args_t)); | ||
390 | 388 | ||
391 | args.name = name; | 389 | args.name = name->name; |
392 | args.namelen = namelen; | 390 | args.namelen = name->len; |
393 | args.hashval = xfs_da_hashname(name, namelen); | 391 | args.hashval = xfs_da_hashname(name->name, name->len); |
394 | args.inumber = 0; | ||
395 | args.dp = dp; | 392 | args.dp = dp; |
396 | args.firstblock = NULL; | ||
397 | args.flist = NULL; | ||
398 | args.total = 0; | ||
399 | args.whichfork = XFS_DATA_FORK; | 393 | args.whichfork = XFS_DATA_FORK; |
400 | args.trans = tp; | 394 | args.trans = tp; |
401 | args.justcheck = args.addname = args.oknoent = 1; | 395 | args.justcheck = args.addname = args.oknoent = 1; |
diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h index b265197e74cf..6392f939029f 100644 --- a/fs/xfs/xfs_dir2.h +++ b/fs/xfs/xfs_dir2.h | |||
@@ -59,6 +59,8 @@ typedef __uint32_t xfs_dir2_db_t; | |||
59 | */ | 59 | */ |
60 | typedef xfs_off_t xfs_dir2_off_t; | 60 | typedef xfs_off_t xfs_dir2_off_t; |
61 | 61 | ||
62 | extern struct xfs_name xfs_name_dotdot; | ||
63 | |||
62 | /* | 64 | /* |
63 | * Generic directory interface routines | 65 | * Generic directory interface routines |
64 | */ | 66 | */ |
@@ -68,21 +70,21 @@ extern int xfs_dir_isempty(struct xfs_inode *dp); | |||
68 | extern int xfs_dir_init(struct xfs_trans *tp, struct xfs_inode *dp, | 70 | extern int xfs_dir_init(struct xfs_trans *tp, struct xfs_inode *dp, |
69 | struct xfs_inode *pdp); | 71 | struct xfs_inode *pdp); |
70 | extern int xfs_dir_createname(struct xfs_trans *tp, struct xfs_inode *dp, | 72 | extern int xfs_dir_createname(struct xfs_trans *tp, struct xfs_inode *dp, |
71 | char *name, int namelen, xfs_ino_t inum, | 73 | struct xfs_name *name, xfs_ino_t inum, |
72 | xfs_fsblock_t *first, | 74 | xfs_fsblock_t *first, |
73 | struct xfs_bmap_free *flist, xfs_extlen_t tot); | 75 | struct xfs_bmap_free *flist, xfs_extlen_t tot); |
74 | extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp, | 76 | extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp, |
75 | char *name, int namelen, xfs_ino_t *inum); | 77 | struct xfs_name *name, xfs_ino_t *inum); |
76 | extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp, | 78 | extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp, |
77 | char *name, int namelen, xfs_ino_t ino, | 79 | struct xfs_name *name, xfs_ino_t ino, |
78 | xfs_fsblock_t *first, | 80 | xfs_fsblock_t *first, |
79 | struct xfs_bmap_free *flist, xfs_extlen_t tot); | 81 | struct xfs_bmap_free *flist, xfs_extlen_t tot); |
80 | extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp, | 82 | extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp, |
81 | char *name, int namelen, xfs_ino_t inum, | 83 | struct xfs_name *name, xfs_ino_t inum, |
82 | xfs_fsblock_t *first, | 84 | xfs_fsblock_t *first, |
83 | struct xfs_bmap_free *flist, xfs_extlen_t tot); | 85 | struct xfs_bmap_free *flist, xfs_extlen_t tot); |
84 | extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp, | 86 | extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp, |
85 | char *name, int namelen); | 87 | struct xfs_name *name, uint resblks); |
86 | extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino); | 88 | extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino); |
87 | 89 | ||
88 | /* | 90 | /* |
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index eb03eab5ca52..3f3785b10804 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c | |||
@@ -73,7 +73,7 @@ xfs_filestreams_trace( | |||
73 | #define TRACE4(mp,t,a0,a1,a2,a3) TRACE6(mp,t,a0,a1,a2,a3,0,0) | 73 | #define TRACE4(mp,t,a0,a1,a2,a3) TRACE6(mp,t,a0,a1,a2,a3,0,0) |
74 | #define TRACE5(mp,t,a0,a1,a2,a3,a4) TRACE6(mp,t,a0,a1,a2,a3,a4,0) | 74 | #define TRACE5(mp,t,a0,a1,a2,a3,a4) TRACE6(mp,t,a0,a1,a2,a3,a4,0) |
75 | #define TRACE6(mp,t,a0,a1,a2,a3,a4,a5) \ | 75 | #define TRACE6(mp,t,a0,a1,a2,a3,a4,a5) \ |
76 | xfs_filestreams_trace(mp, t, __FUNCTION__, __LINE__, \ | 76 | xfs_filestreams_trace(mp, t, __func__, __LINE__, \ |
77 | (__psunsigned_t)a0, (__psunsigned_t)a1, \ | 77 | (__psunsigned_t)a0, (__psunsigned_t)a1, \ |
78 | (__psunsigned_t)a2, (__psunsigned_t)a3, \ | 78 | (__psunsigned_t)a2, (__psunsigned_t)a3, \ |
79 | (__psunsigned_t)a4, (__psunsigned_t)a5) | 79 | (__psunsigned_t)a4, (__psunsigned_t)a5) |
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 5a146cb22980..a64dfbd565a5 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c | |||
@@ -107,6 +107,16 @@ xfs_ialloc_log_di( | |||
107 | /* | 107 | /* |
108 | * Allocation group level functions. | 108 | * Allocation group level functions. |
109 | */ | 109 | */ |
110 | static inline int | ||
111 | xfs_ialloc_cluster_alignment( | ||
112 | xfs_alloc_arg_t *args) | ||
113 | { | ||
114 | if (xfs_sb_version_hasalign(&args->mp->m_sb) && | ||
115 | args->mp->m_sb.sb_inoalignmt >= | ||
116 | XFS_B_TO_FSBT(args->mp, XFS_INODE_CLUSTER_SIZE(args->mp))) | ||
117 | return args->mp->m_sb.sb_inoalignmt; | ||
118 | return 1; | ||
119 | } | ||
110 | 120 | ||
111 | /* | 121 | /* |
112 | * Allocate new inodes in the allocation group specified by agbp. | 122 | * Allocate new inodes in the allocation group specified by agbp. |
@@ -167,10 +177,24 @@ xfs_ialloc_ag_alloc( | |||
167 | args.mod = args.total = args.wasdel = args.isfl = | 177 | args.mod = args.total = args.wasdel = args.isfl = |
168 | args.userdata = args.minalignslop = 0; | 178 | args.userdata = args.minalignslop = 0; |
169 | args.prod = 1; | 179 | args.prod = 1; |
170 | args.alignment = 1; | 180 | |
171 | /* | 181 | /* |
172 | * Allow space for the inode btree to split. | 182 | * We need to take into account alignment here to ensure that |
183 | * we don't modify the free list if we fail to have an exact | ||
184 | * block. If we don't have an exact match, and every oher | ||
185 | * attempt allocation attempt fails, we'll end up cancelling | ||
186 | * a dirty transaction and shutting down. | ||
187 | * | ||
188 | * For an exact allocation, alignment must be 1, | ||
189 | * however we need to take cluster alignment into account when | ||
190 | * fixing up the freelist. Use the minalignslop field to | ||
191 | * indicate that extra blocks might be required for alignment, | ||
192 | * but not to use them in the actual exact allocation. | ||
173 | */ | 193 | */ |
194 | args.alignment = 1; | ||
195 | args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1; | ||
196 | |||
197 | /* Allow space for the inode btree to split. */ | ||
174 | args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1; | 198 | args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1; |
175 | if ((error = xfs_alloc_vextent(&args))) | 199 | if ((error = xfs_alloc_vextent(&args))) |
176 | return error; | 200 | return error; |
@@ -191,13 +215,8 @@ xfs_ialloc_ag_alloc( | |||
191 | ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN)); | 215 | ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN)); |
192 | args.alignment = args.mp->m_dalign; | 216 | args.alignment = args.mp->m_dalign; |
193 | isaligned = 1; | 217 | isaligned = 1; |
194 | } else if (xfs_sb_version_hasalign(&args.mp->m_sb) && | 218 | } else |
195 | args.mp->m_sb.sb_inoalignmt >= | 219 | args.alignment = xfs_ialloc_cluster_alignment(&args); |
196 | XFS_B_TO_FSBT(args.mp, | ||
197 | XFS_INODE_CLUSTER_SIZE(args.mp))) | ||
198 | args.alignment = args.mp->m_sb.sb_inoalignmt; | ||
199 | else | ||
200 | args.alignment = 1; | ||
201 | /* | 220 | /* |
202 | * Need to figure out where to allocate the inode blocks. | 221 | * Need to figure out where to allocate the inode blocks. |
203 | * Ideally they should be spaced out through the a.g. | 222 | * Ideally they should be spaced out through the a.g. |
@@ -230,12 +249,7 @@ xfs_ialloc_ag_alloc( | |||
230 | args.agbno = be32_to_cpu(agi->agi_root); | 249 | args.agbno = be32_to_cpu(agi->agi_root); |
231 | args.fsbno = XFS_AGB_TO_FSB(args.mp, | 250 | args.fsbno = XFS_AGB_TO_FSB(args.mp, |
232 | be32_to_cpu(agi->agi_seqno), args.agbno); | 251 | be32_to_cpu(agi->agi_seqno), args.agbno); |
233 | if (xfs_sb_version_hasalign(&args.mp->m_sb) && | 252 | args.alignment = xfs_ialloc_cluster_alignment(&args); |
234 | args.mp->m_sb.sb_inoalignmt >= | ||
235 | XFS_B_TO_FSBT(args.mp, XFS_INODE_CLUSTER_SIZE(args.mp))) | ||
236 | args.alignment = args.mp->m_sb.sb_inoalignmt; | ||
237 | else | ||
238 | args.alignment = 1; | ||
239 | if ((error = xfs_alloc_vextent(&args))) | 253 | if ((error = xfs_alloc_vextent(&args))) |
240 | return error; | 254 | return error; |
241 | } | 255 | } |
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 8e09b71f4104..e657c5128460 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
@@ -78,7 +78,6 @@ xfs_iget_core( | |||
78 | xfs_inode_t *ip; | 78 | xfs_inode_t *ip; |
79 | xfs_inode_t *iq; | 79 | xfs_inode_t *iq; |
80 | int error; | 80 | int error; |
81 | xfs_icluster_t *icl, *new_icl = NULL; | ||
82 | unsigned long first_index, mask; | 81 | unsigned long first_index, mask; |
83 | xfs_perag_t *pag; | 82 | xfs_perag_t *pag; |
84 | xfs_agino_t agino; | 83 | xfs_agino_t agino; |
@@ -229,11 +228,9 @@ finish_inode: | |||
229 | } | 228 | } |
230 | 229 | ||
231 | /* | 230 | /* |
232 | * This is a bit messy - we preallocate everything we _might_ | 231 | * Preload the radix tree so we can insert safely under the |
233 | * need before we pick up the ici lock. That way we don't have to | 232 | * write spinlock. |
234 | * juggle locks and go all the way back to the start. | ||
235 | */ | 233 | */ |
236 | new_icl = kmem_zone_alloc(xfs_icluster_zone, KM_SLEEP); | ||
237 | if (radix_tree_preload(GFP_KERNEL)) { | 234 | if (radix_tree_preload(GFP_KERNEL)) { |
238 | xfs_idestroy(ip); | 235 | xfs_idestroy(ip); |
239 | delay(1); | 236 | delay(1); |
@@ -242,17 +239,6 @@ finish_inode: | |||
242 | mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); | 239 | mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); |
243 | first_index = agino & mask; | 240 | first_index = agino & mask; |
244 | write_lock(&pag->pag_ici_lock); | 241 | write_lock(&pag->pag_ici_lock); |
245 | |||
246 | /* | ||
247 | * Find the cluster if it exists | ||
248 | */ | ||
249 | icl = NULL; | ||
250 | if (radix_tree_gang_lookup(&pag->pag_ici_root, (void**)&iq, | ||
251 | first_index, 1)) { | ||
252 | if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) == first_index) | ||
253 | icl = iq->i_cluster; | ||
254 | } | ||
255 | |||
256 | /* | 242 | /* |
257 | * insert the new inode | 243 | * insert the new inode |
258 | */ | 244 | */ |
@@ -267,30 +253,13 @@ finish_inode: | |||
267 | } | 253 | } |
268 | 254 | ||
269 | /* | 255 | /* |
270 | * These values _must_ be set before releasing ihlock! | 256 | * These values _must_ be set before releasing the radix tree lock! |
271 | */ | 257 | */ |
272 | ip->i_udquot = ip->i_gdquot = NULL; | 258 | ip->i_udquot = ip->i_gdquot = NULL; |
273 | xfs_iflags_set(ip, XFS_INEW); | 259 | xfs_iflags_set(ip, XFS_INEW); |
274 | 260 | ||
275 | ASSERT(ip->i_cluster == NULL); | ||
276 | |||
277 | if (!icl) { | ||
278 | spin_lock_init(&new_icl->icl_lock); | ||
279 | INIT_HLIST_HEAD(&new_icl->icl_inodes); | ||
280 | icl = new_icl; | ||
281 | new_icl = NULL; | ||
282 | } else { | ||
283 | ASSERT(!hlist_empty(&icl->icl_inodes)); | ||
284 | } | ||
285 | spin_lock(&icl->icl_lock); | ||
286 | hlist_add_head(&ip->i_cnode, &icl->icl_inodes); | ||
287 | ip->i_cluster = icl; | ||
288 | spin_unlock(&icl->icl_lock); | ||
289 | |||
290 | write_unlock(&pag->pag_ici_lock); | 261 | write_unlock(&pag->pag_ici_lock); |
291 | radix_tree_preload_end(); | 262 | radix_tree_preload_end(); |
292 | if (new_icl) | ||
293 | kmem_zone_free(xfs_icluster_zone, new_icl); | ||
294 | 263 | ||
295 | /* | 264 | /* |
296 | * Link ip to its mount and thread it on the mount's inode list. | 265 | * Link ip to its mount and thread it on the mount's inode list. |
@@ -529,18 +498,6 @@ xfs_iextract( | |||
529 | xfs_put_perag(mp, pag); | 498 | xfs_put_perag(mp, pag); |
530 | 499 | ||
531 | /* | 500 | /* |
532 | * Remove from cluster list | ||
533 | */ | ||
534 | mp = ip->i_mount; | ||
535 | spin_lock(&ip->i_cluster->icl_lock); | ||
536 | hlist_del(&ip->i_cnode); | ||
537 | spin_unlock(&ip->i_cluster->icl_lock); | ||
538 | |||
539 | /* was last inode in cluster? */ | ||
540 | if (hlist_empty(&ip->i_cluster->icl_inodes)) | ||
541 | kmem_zone_free(xfs_icluster_zone, ip->i_cluster); | ||
542 | |||
543 | /* | ||
544 | * Remove from mount's inode list. | 501 | * Remove from mount's inode list. |
545 | */ | 502 | */ |
546 | XFS_MOUNT_ILOCK(mp); | 503 | XFS_MOUNT_ILOCK(mp); |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index f43a6e01d68f..ca12acb90394 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -55,7 +55,6 @@ | |||
55 | 55 | ||
56 | kmem_zone_t *xfs_ifork_zone; | 56 | kmem_zone_t *xfs_ifork_zone; |
57 | kmem_zone_t *xfs_inode_zone; | 57 | kmem_zone_t *xfs_inode_zone; |
58 | kmem_zone_t *xfs_icluster_zone; | ||
59 | 58 | ||
60 | /* | 59 | /* |
61 | * Used in xfs_itruncate(). This is the maximum number of extents | 60 | * Used in xfs_itruncate(). This is the maximum number of extents |
@@ -126,6 +125,90 @@ xfs_inobp_check( | |||
126 | #endif | 125 | #endif |
127 | 126 | ||
128 | /* | 127 | /* |
128 | * Find the buffer associated with the given inode map | ||
129 | * We do basic validation checks on the buffer once it has been | ||
130 | * retrieved from disk. | ||
131 | */ | ||
132 | STATIC int | ||
133 | xfs_imap_to_bp( | ||
134 | xfs_mount_t *mp, | ||
135 | xfs_trans_t *tp, | ||
136 | xfs_imap_t *imap, | ||
137 | xfs_buf_t **bpp, | ||
138 | uint buf_flags, | ||
139 | uint imap_flags) | ||
140 | { | ||
141 | int error; | ||
142 | int i; | ||
143 | int ni; | ||
144 | xfs_buf_t *bp; | ||
145 | |||
146 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, | ||
147 | (int)imap->im_len, buf_flags, &bp); | ||
148 | if (error) { | ||
149 | if (error != EAGAIN) { | ||
150 | cmn_err(CE_WARN, | ||
151 | "xfs_imap_to_bp: xfs_trans_read_buf()returned " | ||
152 | "an error %d on %s. Returning error.", | ||
153 | error, mp->m_fsname); | ||
154 | } else { | ||
155 | ASSERT(buf_flags & XFS_BUF_TRYLOCK); | ||
156 | } | ||
157 | return error; | ||
158 | } | ||
159 | |||
160 | /* | ||
161 | * Validate the magic number and version of every inode in the buffer | ||
162 | * (if DEBUG kernel) or the first inode in the buffer, otherwise. | ||
163 | */ | ||
164 | #ifdef DEBUG | ||
165 | ni = BBTOB(imap->im_len) >> mp->m_sb.sb_inodelog; | ||
166 | #else /* usual case */ | ||
167 | ni = 1; | ||
168 | #endif | ||
169 | |||
170 | for (i = 0; i < ni; i++) { | ||
171 | int di_ok; | ||
172 | xfs_dinode_t *dip; | ||
173 | |||
174 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, | ||
175 | (i << mp->m_sb.sb_inodelog)); | ||
176 | di_ok = be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC && | ||
177 | XFS_DINODE_GOOD_VERSION(dip->di_core.di_version); | ||
178 | if (unlikely(XFS_TEST_ERROR(!di_ok, mp, | ||
179 | XFS_ERRTAG_ITOBP_INOTOBP, | ||
180 | XFS_RANDOM_ITOBP_INOTOBP))) { | ||
181 | if (imap_flags & XFS_IMAP_BULKSTAT) { | ||
182 | xfs_trans_brelse(tp, bp); | ||
183 | return XFS_ERROR(EINVAL); | ||
184 | } | ||
185 | XFS_CORRUPTION_ERROR("xfs_imap_to_bp", | ||
186 | XFS_ERRLEVEL_HIGH, mp, dip); | ||
187 | #ifdef DEBUG | ||
188 | cmn_err(CE_PANIC, | ||
189 | "Device %s - bad inode magic/vsn " | ||
190 | "daddr %lld #%d (magic=%x)", | ||
191 | XFS_BUFTARG_NAME(mp->m_ddev_targp), | ||
192 | (unsigned long long)imap->im_blkno, i, | ||
193 | be16_to_cpu(dip->di_core.di_magic)); | ||
194 | #endif | ||
195 | xfs_trans_brelse(tp, bp); | ||
196 | return XFS_ERROR(EFSCORRUPTED); | ||
197 | } | ||
198 | } | ||
199 | |||
200 | xfs_inobp_check(mp, bp); | ||
201 | |||
202 | /* | ||
203 | * Mark the buffer as an inode buffer now that it looks good | ||
204 | */ | ||
205 | XFS_BUF_SET_VTYPE(bp, B_FS_INO); | ||
206 | |||
207 | *bpp = bp; | ||
208 | return 0; | ||
209 | } | ||
210 | |||
211 | /* | ||
129 | * This routine is called to map an inode number within a file | 212 | * This routine is called to map an inode number within a file |
130 | * system to the buffer containing the on-disk version of the | 213 | * system to the buffer containing the on-disk version of the |
131 | * inode. It returns a pointer to the buffer containing the | 214 | * inode. It returns a pointer to the buffer containing the |
@@ -147,72 +230,19 @@ xfs_inotobp( | |||
147 | xfs_buf_t **bpp, | 230 | xfs_buf_t **bpp, |
148 | int *offset) | 231 | int *offset) |
149 | { | 232 | { |
150 | int di_ok; | ||
151 | xfs_imap_t imap; | 233 | xfs_imap_t imap; |
152 | xfs_buf_t *bp; | 234 | xfs_buf_t *bp; |
153 | int error; | 235 | int error; |
154 | xfs_dinode_t *dip; | ||
155 | 236 | ||
156 | /* | ||
157 | * Call the space management code to find the location of the | ||
158 | * inode on disk. | ||
159 | */ | ||
160 | imap.im_blkno = 0; | 237 | imap.im_blkno = 0; |
161 | error = xfs_imap(mp, tp, ino, &imap, XFS_IMAP_LOOKUP); | 238 | error = xfs_imap(mp, tp, ino, &imap, XFS_IMAP_LOOKUP); |
162 | if (error != 0) { | 239 | if (error) |
163 | cmn_err(CE_WARN, | ||
164 | "xfs_inotobp: xfs_imap() returned an " | ||
165 | "error %d on %s. Returning error.", error, mp->m_fsname); | ||
166 | return error; | 240 | return error; |
167 | } | ||
168 | 241 | ||
169 | /* | 242 | error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, 0); |
170 | * If the inode number maps to a block outside the bounds of the | 243 | if (error) |
171 | * file system then return NULL rather than calling read_buf | ||
172 | * and panicing when we get an error from the driver. | ||
173 | */ | ||
174 | if ((imap.im_blkno + imap.im_len) > | ||
175 | XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { | ||
176 | cmn_err(CE_WARN, | ||
177 | "xfs_inotobp: inode number (%llu + %d) maps to a block outside the bounds " | ||
178 | "of the file system %s. Returning EINVAL.", | ||
179 | (unsigned long long)imap.im_blkno, | ||
180 | imap.im_len, mp->m_fsname); | ||
181 | return XFS_ERROR(EINVAL); | ||
182 | } | ||
183 | |||
184 | /* | ||
185 | * Read in the buffer. If tp is NULL, xfs_trans_read_buf() will | ||
186 | * default to just a read_buf() call. | ||
187 | */ | ||
188 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno, | ||
189 | (int)imap.im_len, XFS_BUF_LOCK, &bp); | ||
190 | |||
191 | if (error) { | ||
192 | cmn_err(CE_WARN, | ||
193 | "xfs_inotobp: xfs_trans_read_buf() returned an " | ||
194 | "error %d on %s. Returning error.", error, mp->m_fsname); | ||
195 | return error; | 244 | return error; |
196 | } | ||
197 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, 0); | ||
198 | di_ok = | ||
199 | be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC && | ||
200 | XFS_DINODE_GOOD_VERSION(dip->di_core.di_version); | ||
201 | if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP, | ||
202 | XFS_RANDOM_ITOBP_INOTOBP))) { | ||
203 | XFS_CORRUPTION_ERROR("xfs_inotobp", XFS_ERRLEVEL_LOW, mp, dip); | ||
204 | xfs_trans_brelse(tp, bp); | ||
205 | cmn_err(CE_WARN, | ||
206 | "xfs_inotobp: XFS_TEST_ERROR() returned an " | ||
207 | "error on %s. Returning EFSCORRUPTED.", mp->m_fsname); | ||
208 | return XFS_ERROR(EFSCORRUPTED); | ||
209 | } | ||
210 | 245 | ||
211 | xfs_inobp_check(mp, bp); | ||
212 | |||
213 | /* | ||
214 | * Set *dipp to point to the on-disk inode in the buffer. | ||
215 | */ | ||
216 | *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); | 246 | *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); |
217 | *bpp = bp; | 247 | *bpp = bp; |
218 | *offset = imap.im_boffset; | 248 | *offset = imap.im_boffset; |
@@ -248,46 +278,21 @@ xfs_itobp( | |||
248 | xfs_dinode_t **dipp, | 278 | xfs_dinode_t **dipp, |
249 | xfs_buf_t **bpp, | 279 | xfs_buf_t **bpp, |
250 | xfs_daddr_t bno, | 280 | xfs_daddr_t bno, |
251 | uint imap_flags) | 281 | uint imap_flags, |
282 | uint buf_flags) | ||
252 | { | 283 | { |
253 | xfs_imap_t imap; | 284 | xfs_imap_t imap; |
254 | xfs_buf_t *bp; | 285 | xfs_buf_t *bp; |
255 | int error; | 286 | int error; |
256 | int i; | ||
257 | int ni; | ||
258 | 287 | ||
259 | if (ip->i_blkno == (xfs_daddr_t)0) { | 288 | if (ip->i_blkno == (xfs_daddr_t)0) { |
260 | /* | ||
261 | * Call the space management code to find the location of the | ||
262 | * inode on disk. | ||
263 | */ | ||
264 | imap.im_blkno = bno; | 289 | imap.im_blkno = bno; |
265 | if ((error = xfs_imap(mp, tp, ip->i_ino, &imap, | 290 | error = xfs_imap(mp, tp, ip->i_ino, &imap, |
266 | XFS_IMAP_LOOKUP | imap_flags))) | 291 | XFS_IMAP_LOOKUP | imap_flags); |
292 | if (error) | ||
267 | return error; | 293 | return error; |
268 | 294 | ||
269 | /* | 295 | /* |
270 | * If the inode number maps to a block outside the bounds | ||
271 | * of the file system then return NULL rather than calling | ||
272 | * read_buf and panicing when we get an error from the | ||
273 | * driver. | ||
274 | */ | ||
275 | if ((imap.im_blkno + imap.im_len) > | ||
276 | XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { | ||
277 | #ifdef DEBUG | ||
278 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: " | ||
279 | "(imap.im_blkno (0x%llx) " | ||
280 | "+ imap.im_len (0x%llx)) > " | ||
281 | " XFS_FSB_TO_BB(mp, " | ||
282 | "mp->m_sb.sb_dblocks) (0x%llx)", | ||
283 | (unsigned long long) imap.im_blkno, | ||
284 | (unsigned long long) imap.im_len, | ||
285 | XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); | ||
286 | #endif /* DEBUG */ | ||
287 | return XFS_ERROR(EINVAL); | ||
288 | } | ||
289 | |||
290 | /* | ||
291 | * Fill in the fields in the inode that will be used to | 296 | * Fill in the fields in the inode that will be used to |
292 | * map the inode to its buffer from now on. | 297 | * map the inode to its buffer from now on. |
293 | */ | 298 | */ |
@@ -305,76 +310,17 @@ xfs_itobp( | |||
305 | } | 310 | } |
306 | ASSERT(bno == 0 || bno == imap.im_blkno); | 311 | ASSERT(bno == 0 || bno == imap.im_blkno); |
307 | 312 | ||
308 | /* | 313 | error = xfs_imap_to_bp(mp, tp, &imap, &bp, buf_flags, imap_flags); |
309 | * Read in the buffer. If tp is NULL, xfs_trans_read_buf() will | 314 | if (error) |
310 | * default to just a read_buf() call. | ||
311 | */ | ||
312 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno, | ||
313 | (int)imap.im_len, XFS_BUF_LOCK, &bp); | ||
314 | if (error) { | ||
315 | #ifdef DEBUG | ||
316 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: " | ||
317 | "xfs_trans_read_buf() returned error %d, " | ||
318 | "imap.im_blkno 0x%llx, imap.im_len 0x%llx", | ||
319 | error, (unsigned long long) imap.im_blkno, | ||
320 | (unsigned long long) imap.im_len); | ||
321 | #endif /* DEBUG */ | ||
322 | return error; | 315 | return error; |
323 | } | ||
324 | |||
325 | /* | ||
326 | * Validate the magic number and version of every inode in the buffer | ||
327 | * (if DEBUG kernel) or the first inode in the buffer, otherwise. | ||
328 | * No validation is done here in userspace (xfs_repair). | ||
329 | */ | ||
330 | #if !defined(__KERNEL__) | ||
331 | ni = 0; | ||
332 | #elif defined(DEBUG) | ||
333 | ni = BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog; | ||
334 | #else /* usual case */ | ||
335 | ni = 1; | ||
336 | #endif | ||
337 | |||
338 | for (i = 0; i < ni; i++) { | ||
339 | int di_ok; | ||
340 | xfs_dinode_t *dip; | ||
341 | 316 | ||
342 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, | 317 | if (!bp) { |
343 | (i << mp->m_sb.sb_inodelog)); | 318 | ASSERT(buf_flags & XFS_BUF_TRYLOCK); |
344 | di_ok = be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC && | 319 | ASSERT(tp == NULL); |
345 | XFS_DINODE_GOOD_VERSION(dip->di_core.di_version); | 320 | *bpp = NULL; |
346 | if (unlikely(XFS_TEST_ERROR(!di_ok, mp, | 321 | return EAGAIN; |
347 | XFS_ERRTAG_ITOBP_INOTOBP, | ||
348 | XFS_RANDOM_ITOBP_INOTOBP))) { | ||
349 | if (imap_flags & XFS_IMAP_BULKSTAT) { | ||
350 | xfs_trans_brelse(tp, bp); | ||
351 | return XFS_ERROR(EINVAL); | ||
352 | } | ||
353 | #ifdef DEBUG | ||
354 | cmn_err(CE_ALERT, | ||
355 | "Device %s - bad inode magic/vsn " | ||
356 | "daddr %lld #%d (magic=%x)", | ||
357 | XFS_BUFTARG_NAME(mp->m_ddev_targp), | ||
358 | (unsigned long long)imap.im_blkno, i, | ||
359 | be16_to_cpu(dip->di_core.di_magic)); | ||
360 | #endif | ||
361 | XFS_CORRUPTION_ERROR("xfs_itobp", XFS_ERRLEVEL_HIGH, | ||
362 | mp, dip); | ||
363 | xfs_trans_brelse(tp, bp); | ||
364 | return XFS_ERROR(EFSCORRUPTED); | ||
365 | } | ||
366 | } | 322 | } |
367 | 323 | ||
368 | xfs_inobp_check(mp, bp); | ||
369 | |||
370 | /* | ||
371 | * Mark the buffer as an inode buffer now that it looks good | ||
372 | */ | ||
373 | XFS_BUF_SET_VTYPE(bp, B_FS_INO); | ||
374 | |||
375 | /* | ||
376 | * Set *dipp to point to the on-disk inode in the buffer. | ||
377 | */ | ||
378 | *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); | 324 | *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); |
379 | *bpp = bp; | 325 | *bpp = bp; |
380 | return 0; | 326 | return 0; |
@@ -878,7 +824,7 @@ xfs_iread( | |||
878 | * return NULL as well. Set i_blkno to 0 so that xfs_itobp() will | 824 | * return NULL as well. Set i_blkno to 0 so that xfs_itobp() will |
879 | * know that this is a new incore inode. | 825 | * know that this is a new incore inode. |
880 | */ | 826 | */ |
881 | error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags); | 827 | error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags, XFS_BUF_LOCK); |
882 | if (error) { | 828 | if (error) { |
883 | kmem_zone_free(xfs_inode_zone, ip); | 829 | kmem_zone_free(xfs_inode_zone, ip); |
884 | return error; | 830 | return error; |
@@ -1518,51 +1464,50 @@ xfs_itruncate_start( | |||
1518 | } | 1464 | } |
1519 | 1465 | ||
1520 | /* | 1466 | /* |
1521 | * Shrink the file to the given new_size. The new | 1467 | * Shrink the file to the given new_size. The new size must be smaller than |
1522 | * size must be smaller than the current size. | 1468 | * the current size. This will free up the underlying blocks in the removed |
1523 | * This will free up the underlying blocks | 1469 | * range after a call to xfs_itruncate_start() or xfs_atruncate_start(). |
1524 | * in the removed range after a call to xfs_itruncate_start() | ||
1525 | * or xfs_atruncate_start(). | ||
1526 | * | 1470 | * |
1527 | * The transaction passed to this routine must have made | 1471 | * The transaction passed to this routine must have made a permanent log |
1528 | * a permanent log reservation of at least XFS_ITRUNCATE_LOG_RES. | 1472 | * reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the |
1529 | * This routine may commit the given transaction and | 1473 | * given transaction and start new ones, so make sure everything involved in |
1530 | * start new ones, so make sure everything involved in | 1474 | * the transaction is tidy before calling here. Some transaction will be |
1531 | * the transaction is tidy before calling here. | 1475 | * returned to the caller to be committed. The incoming transaction must |
1532 | * Some transaction will be returned to the caller to be | 1476 | * already include the inode, and both inode locks must be held exclusively. |
1533 | * committed. The incoming transaction must already include | 1477 | * The inode must also be "held" within the transaction. On return the inode |
1534 | * the inode, and both inode locks must be held exclusively. | 1478 | * will be "held" within the returned transaction. This routine does NOT |
1535 | * The inode must also be "held" within the transaction. On | 1479 | * require any disk space to be reserved for it within the transaction. |
1536 | * return the inode will be "held" within the returned transaction. | ||
1537 | * This routine does NOT require any disk space to be reserved | ||
1538 | * for it within the transaction. | ||
1539 | * | 1480 | * |
1540 | * The fork parameter must be either xfs_attr_fork or xfs_data_fork, | 1481 | * The fork parameter must be either xfs_attr_fork or xfs_data_fork, and it |
1541 | * and it indicates the fork which is to be truncated. For the | 1482 | * indicates the fork which is to be truncated. For the attribute fork we only |
1542 | * attribute fork we only support truncation to size 0. | 1483 | * support truncation to size 0. |
1543 | * | 1484 | * |
1544 | * We use the sync parameter to indicate whether or not the first | 1485 | * We use the sync parameter to indicate whether or not the first transaction |
1545 | * transaction we perform might have to be synchronous. For the attr fork, | 1486 | * we perform might have to be synchronous. For the attr fork, it needs to be |
1546 | * it needs to be so if the unlink of the inode is not yet known to be | 1487 | * so if the unlink of the inode is not yet known to be permanent in the log. |
1547 | * permanent in the log. This keeps us from freeing and reusing the | 1488 | * This keeps us from freeing and reusing the blocks of the attribute fork |
1548 | * blocks of the attribute fork before the unlink of the inode becomes | 1489 | * before the unlink of the inode becomes permanent. |
1549 | * permanent. | ||
1550 | * | 1490 | * |
1551 | * For the data fork, we normally have to run synchronously if we're | 1491 | * For the data fork, we normally have to run synchronously if we're being |
1552 | * being called out of the inactive path or we're being called | 1492 | * called out of the inactive path or we're being called out of the create path |
1553 | * out of the create path where we're truncating an existing file. | 1493 | * where we're truncating an existing file. Either way, the truncate needs to |
1554 | * Either way, the truncate needs to be sync so blocks don't reappear | 1494 | * be sync so blocks don't reappear in the file with altered data in case of a |
1555 | * in the file with altered data in case of a crash. wsync filesystems | 1495 | * crash. wsync filesystems can run the first case async because anything that |
1556 | * can run the first case async because anything that shrinks the inode | 1496 | * shrinks the inode has to run sync so by the time we're called here from |
1557 | * has to run sync so by the time we're called here from inactive, the | 1497 | * inactive, the inode size is permanently set to 0. |
1558 | * inode size is permanently set to 0. | ||
1559 | * | 1498 | * |
1560 | * Calls from the truncate path always need to be sync unless we're | 1499 | * Calls from the truncate path always need to be sync unless we're in a wsync |
1561 | * in a wsync filesystem and the file has already been unlinked. | 1500 | * filesystem and the file has already been unlinked. |
1562 | * | 1501 | * |
1563 | * The caller is responsible for correctly setting the sync parameter. | 1502 | * The caller is responsible for correctly setting the sync parameter. It gets |
1564 | * It gets too hard for us to guess here which path we're being called | 1503 | * too hard for us to guess here which path we're being called out of just |
1565 | * out of just based on inode state. | 1504 | * based on inode state. |
1505 | * | ||
1506 | * If we get an error, we must return with the inode locked and linked into the | ||
1507 | * current transaction. This keeps things simple for the higher level code, | ||
1508 | * because it always knows that the inode is locked and held in the transaction | ||
1509 | * that returns to it whether errors occur or not. We don't mark the inode | ||
1510 | * dirty on error so that transactions can be easily aborted if possible. | ||
1566 | */ | 1511 | */ |
1567 | int | 1512 | int |
1568 | xfs_itruncate_finish( | 1513 | xfs_itruncate_finish( |
@@ -1741,65 +1686,51 @@ xfs_itruncate_finish( | |||
1741 | */ | 1686 | */ |
1742 | error = xfs_bmap_finish(tp, &free_list, &committed); | 1687 | error = xfs_bmap_finish(tp, &free_list, &committed); |
1743 | ntp = *tp; | 1688 | ntp = *tp; |
1689 | if (committed) { | ||
1690 | /* link the inode into the next xact in the chain */ | ||
1691 | xfs_trans_ijoin(ntp, ip, | ||
1692 | XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | ||
1693 | xfs_trans_ihold(ntp, ip); | ||
1694 | } | ||
1695 | |||
1744 | if (error) { | 1696 | if (error) { |
1745 | /* | 1697 | /* |
1746 | * If the bmap finish call encounters an error, | 1698 | * If the bmap finish call encounters an error, return |
1747 | * return to the caller where the transaction | 1699 | * to the caller where the transaction can be properly |
1748 | * can be properly aborted. We just need to | 1700 | * aborted. We just need to make sure we're not |
1749 | * make sure we're not holding any resources | 1701 | * holding any resources that we were not when we came |
1750 | * that we were not when we came in. | 1702 | * in. |
1751 | * | 1703 | * |
1752 | * Aborting from this point might lose some | 1704 | * Aborting from this point might lose some blocks in |
1753 | * blocks in the file system, but oh well. | 1705 | * the file system, but oh well. |
1754 | */ | 1706 | */ |
1755 | xfs_bmap_cancel(&free_list); | 1707 | xfs_bmap_cancel(&free_list); |
1756 | if (committed) { | ||
1757 | /* | ||
1758 | * If the passed in transaction committed | ||
1759 | * in xfs_bmap_finish(), then we want to | ||
1760 | * add the inode to this one before returning. | ||
1761 | * This keeps things simple for the higher | ||
1762 | * level code, because it always knows that | ||
1763 | * the inode is locked and held in the | ||
1764 | * transaction that returns to it whether | ||
1765 | * errors occur or not. We don't mark the | ||
1766 | * inode dirty so that this transaction can | ||
1767 | * be easily aborted if possible. | ||
1768 | */ | ||
1769 | xfs_trans_ijoin(ntp, ip, | ||
1770 | XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | ||
1771 | xfs_trans_ihold(ntp, ip); | ||
1772 | } | ||
1773 | return error; | 1708 | return error; |
1774 | } | 1709 | } |
1775 | 1710 | ||
1776 | if (committed) { | 1711 | if (committed) { |
1777 | /* | 1712 | /* |
1778 | * The first xact was committed, | 1713 | * Mark the inode dirty so it will be logged and |
1779 | * so add the inode to the new one. | 1714 | * moved forward in the log as part of every commit. |
1780 | * Mark it dirty so it will be logged | ||
1781 | * and moved forward in the log as | ||
1782 | * part of every commit. | ||
1783 | */ | 1715 | */ |
1784 | xfs_trans_ijoin(ntp, ip, | ||
1785 | XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | ||
1786 | xfs_trans_ihold(ntp, ip); | ||
1787 | xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); | 1716 | xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); |
1788 | } | 1717 | } |
1718 | |||
1789 | ntp = xfs_trans_dup(ntp); | 1719 | ntp = xfs_trans_dup(ntp); |
1790 | (void) xfs_trans_commit(*tp, 0); | 1720 | error = xfs_trans_commit(*tp, 0); |
1791 | *tp = ntp; | 1721 | *tp = ntp; |
1792 | error = xfs_trans_reserve(ntp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, | 1722 | |
1793 | XFS_TRANS_PERM_LOG_RES, | 1723 | /* link the inode into the next transaction in the chain */ |
1794 | XFS_ITRUNCATE_LOG_COUNT); | ||
1795 | /* | ||
1796 | * Add the inode being truncated to the next chained | ||
1797 | * transaction. | ||
1798 | */ | ||
1799 | xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | 1724 | xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); |
1800 | xfs_trans_ihold(ntp, ip); | 1725 | xfs_trans_ihold(ntp, ip); |
1726 | |||
1727 | if (!error) | ||
1728 | error = xfs_trans_reserve(ntp, 0, | ||
1729 | XFS_ITRUNCATE_LOG_RES(mp), 0, | ||
1730 | XFS_TRANS_PERM_LOG_RES, | ||
1731 | XFS_ITRUNCATE_LOG_COUNT); | ||
1801 | if (error) | 1732 | if (error) |
1802 | return (error); | 1733 | return error; |
1803 | } | 1734 | } |
1804 | /* | 1735 | /* |
1805 | * Only update the size in the case of the data fork, but | 1736 | * Only update the size in the case of the data fork, but |
@@ -1967,7 +1898,7 @@ xfs_iunlink( | |||
1967 | * Here we put the head pointer into our next pointer, | 1898 | * Here we put the head pointer into our next pointer, |
1968 | * and then we fall through to point the head at us. | 1899 | * and then we fall through to point the head at us. |
1969 | */ | 1900 | */ |
1970 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); | 1901 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); |
1971 | if (error) | 1902 | if (error) |
1972 | return error; | 1903 | return error; |
1973 | 1904 | ||
@@ -2075,7 +2006,7 @@ xfs_iunlink_remove( | |||
2075 | * of dealing with the buffer when there is no need to | 2006 | * of dealing with the buffer when there is no need to |
2076 | * change it. | 2007 | * change it. |
2077 | */ | 2008 | */ |
2078 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); | 2009 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); |
2079 | if (error) { | 2010 | if (error) { |
2080 | cmn_err(CE_WARN, | 2011 | cmn_err(CE_WARN, |
2081 | "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", | 2012 | "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", |
@@ -2137,7 +2068,7 @@ xfs_iunlink_remove( | |||
2137 | * Now last_ibp points to the buffer previous to us on | 2068 | * Now last_ibp points to the buffer previous to us on |
2138 | * the unlinked list. Pull us from the list. | 2069 | * the unlinked list. Pull us from the list. |
2139 | */ | 2070 | */ |
2140 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); | 2071 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); |
2141 | if (error) { | 2072 | if (error) { |
2142 | cmn_err(CE_WARN, | 2073 | cmn_err(CE_WARN, |
2143 | "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", | 2074 | "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", |
@@ -2172,13 +2103,6 @@ xfs_iunlink_remove( | |||
2172 | return 0; | 2103 | return 0; |
2173 | } | 2104 | } |
2174 | 2105 | ||
2175 | STATIC_INLINE int xfs_inode_clean(xfs_inode_t *ip) | ||
2176 | { | ||
2177 | return (((ip->i_itemp == NULL) || | ||
2178 | !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) && | ||
2179 | (ip->i_update_core == 0)); | ||
2180 | } | ||
2181 | |||
2182 | STATIC void | 2106 | STATIC void |
2183 | xfs_ifree_cluster( | 2107 | xfs_ifree_cluster( |
2184 | xfs_inode_t *free_ip, | 2108 | xfs_inode_t *free_ip, |
@@ -2400,7 +2324,7 @@ xfs_ifree( | |||
2400 | 2324 | ||
2401 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 2325 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
2402 | 2326 | ||
2403 | error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0, 0); | 2327 | error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); |
2404 | if (error) | 2328 | if (error) |
2405 | return error; | 2329 | return error; |
2406 | 2330 | ||
@@ -2678,14 +2602,31 @@ xfs_imap( | |||
2678 | fsbno = imap->im_blkno ? | 2602 | fsbno = imap->im_blkno ? |
2679 | XFS_DADDR_TO_FSB(mp, imap->im_blkno) : NULLFSBLOCK; | 2603 | XFS_DADDR_TO_FSB(mp, imap->im_blkno) : NULLFSBLOCK; |
2680 | error = xfs_dilocate(mp, tp, ino, &fsbno, &len, &off, flags); | 2604 | error = xfs_dilocate(mp, tp, ino, &fsbno, &len, &off, flags); |
2681 | if (error != 0) { | 2605 | if (error) |
2682 | return error; | 2606 | return error; |
2683 | } | 2607 | |
2684 | imap->im_blkno = XFS_FSB_TO_DADDR(mp, fsbno); | 2608 | imap->im_blkno = XFS_FSB_TO_DADDR(mp, fsbno); |
2685 | imap->im_len = XFS_FSB_TO_BB(mp, len); | 2609 | imap->im_len = XFS_FSB_TO_BB(mp, len); |
2686 | imap->im_agblkno = XFS_FSB_TO_AGBNO(mp, fsbno); | 2610 | imap->im_agblkno = XFS_FSB_TO_AGBNO(mp, fsbno); |
2687 | imap->im_ioffset = (ushort)off; | 2611 | imap->im_ioffset = (ushort)off; |
2688 | imap->im_boffset = (ushort)(off << mp->m_sb.sb_inodelog); | 2612 | imap->im_boffset = (ushort)(off << mp->m_sb.sb_inodelog); |
2613 | |||
2614 | /* | ||
2615 | * If the inode number maps to a block outside the bounds | ||
2616 | * of the file system then return NULL rather than calling | ||
2617 | * read_buf and panicing when we get an error from the | ||
2618 | * driver. | ||
2619 | */ | ||
2620 | if ((imap->im_blkno + imap->im_len) > | ||
2621 | XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { | ||
2622 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " | ||
2623 | "(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > " | ||
2624 | " XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)", | ||
2625 | (unsigned long long) imap->im_blkno, | ||
2626 | (unsigned long long) imap->im_len, | ||
2627 | XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); | ||
2628 | return EINVAL; | ||
2629 | } | ||
2689 | return 0; | 2630 | return 0; |
2690 | } | 2631 | } |
2691 | 2632 | ||
@@ -2826,38 +2767,41 @@ xfs_iunpin( | |||
2826 | } | 2767 | } |
2827 | 2768 | ||
2828 | /* | 2769 | /* |
2829 | * This is called to wait for the given inode to be unpinned. | 2770 | * This is called to unpin an inode. It can be directed to wait or to return |
2830 | * It will sleep until this happens. The caller must have the | 2771 | * immediately without waiting for the inode to be unpinned. The caller must |
2831 | * inode locked in at least shared mode so that the buffer cannot | 2772 | * have the inode locked in at least shared mode so that the buffer cannot be |
2832 | * be subsequently pinned once someone is waiting for it to be | 2773 | * subsequently pinned once someone is waiting for it to be unpinned. |
2833 | * unpinned. | ||
2834 | */ | 2774 | */ |
2835 | STATIC void | 2775 | STATIC void |
2836 | xfs_iunpin_wait( | 2776 | __xfs_iunpin_wait( |
2837 | xfs_inode_t *ip) | 2777 | xfs_inode_t *ip, |
2778 | int wait) | ||
2838 | { | 2779 | { |
2839 | xfs_inode_log_item_t *iip; | 2780 | xfs_inode_log_item_t *iip = ip->i_itemp; |
2840 | xfs_lsn_t lsn; | ||
2841 | 2781 | ||
2842 | ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE | MR_ACCESS)); | 2782 | ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE | MR_ACCESS)); |
2843 | 2783 | if (atomic_read(&ip->i_pincount) == 0) | |
2844 | if (atomic_read(&ip->i_pincount) == 0) { | ||
2845 | return; | 2784 | return; |
2846 | } | ||
2847 | 2785 | ||
2848 | iip = ip->i_itemp; | 2786 | /* Give the log a push to start the unpinning I/O */ |
2849 | if (iip && iip->ili_last_lsn) { | 2787 | xfs_log_force(ip->i_mount, (iip && iip->ili_last_lsn) ? |
2850 | lsn = iip->ili_last_lsn; | 2788 | iip->ili_last_lsn : 0, XFS_LOG_FORCE); |
2851 | } else { | 2789 | if (wait) |
2852 | lsn = (xfs_lsn_t)0; | 2790 | wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0)); |
2853 | } | 2791 | } |
2854 | 2792 | ||
2855 | /* | 2793 | static inline void |
2856 | * Give the log a push so we don't wait here too long. | 2794 | xfs_iunpin_wait( |
2857 | */ | 2795 | xfs_inode_t *ip) |
2858 | xfs_log_force(ip->i_mount, lsn, XFS_LOG_FORCE); | 2796 | { |
2797 | __xfs_iunpin_wait(ip, 1); | ||
2798 | } | ||
2859 | 2799 | ||
2860 | wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0)); | 2800 | static inline void |
2801 | xfs_iunpin_nowait( | ||
2802 | xfs_inode_t *ip) | ||
2803 | { | ||
2804 | __xfs_iunpin_wait(ip, 0); | ||
2861 | } | 2805 | } |
2862 | 2806 | ||
2863 | 2807 | ||
@@ -2932,7 +2876,7 @@ xfs_iextents_copy( | |||
2932 | * format indicates the current state of the fork. | 2876 | * format indicates the current state of the fork. |
2933 | */ | 2877 | */ |
2934 | /*ARGSUSED*/ | 2878 | /*ARGSUSED*/ |
2935 | STATIC int | 2879 | STATIC void |
2936 | xfs_iflush_fork( | 2880 | xfs_iflush_fork( |
2937 | xfs_inode_t *ip, | 2881 | xfs_inode_t *ip, |
2938 | xfs_dinode_t *dip, | 2882 | xfs_dinode_t *dip, |
@@ -2953,16 +2897,16 @@ xfs_iflush_fork( | |||
2953 | static const short extflag[2] = | 2897 | static const short extflag[2] = |
2954 | { XFS_ILOG_DEXT, XFS_ILOG_AEXT }; | 2898 | { XFS_ILOG_DEXT, XFS_ILOG_AEXT }; |
2955 | 2899 | ||
2956 | if (iip == NULL) | 2900 | if (!iip) |
2957 | return 0; | 2901 | return; |
2958 | ifp = XFS_IFORK_PTR(ip, whichfork); | 2902 | ifp = XFS_IFORK_PTR(ip, whichfork); |
2959 | /* | 2903 | /* |
2960 | * This can happen if we gave up in iformat in an error path, | 2904 | * This can happen if we gave up in iformat in an error path, |
2961 | * for the attribute fork. | 2905 | * for the attribute fork. |
2962 | */ | 2906 | */ |
2963 | if (ifp == NULL) { | 2907 | if (!ifp) { |
2964 | ASSERT(whichfork == XFS_ATTR_FORK); | 2908 | ASSERT(whichfork == XFS_ATTR_FORK); |
2965 | return 0; | 2909 | return; |
2966 | } | 2910 | } |
2967 | cp = XFS_DFORK_PTR(dip, whichfork); | 2911 | cp = XFS_DFORK_PTR(dip, whichfork); |
2968 | mp = ip->i_mount; | 2912 | mp = ip->i_mount; |
@@ -3023,8 +2967,145 @@ xfs_iflush_fork( | |||
3023 | ASSERT(0); | 2967 | ASSERT(0); |
3024 | break; | 2968 | break; |
3025 | } | 2969 | } |
2970 | } | ||
2971 | |||
2972 | STATIC int | ||
2973 | xfs_iflush_cluster( | ||
2974 | xfs_inode_t *ip, | ||
2975 | xfs_buf_t *bp) | ||
2976 | { | ||
2977 | xfs_mount_t *mp = ip->i_mount; | ||
2978 | xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); | ||
2979 | unsigned long first_index, mask; | ||
2980 | int ilist_size; | ||
2981 | xfs_inode_t **ilist; | ||
2982 | xfs_inode_t *iq; | ||
2983 | int nr_found; | ||
2984 | int clcount = 0; | ||
2985 | int bufwasdelwri; | ||
2986 | int i; | ||
2987 | |||
2988 | ASSERT(pag->pagi_inodeok); | ||
2989 | ASSERT(pag->pag_ici_init); | ||
2990 | |||
2991 | ilist_size = XFS_INODE_CLUSTER_SIZE(mp) * sizeof(xfs_inode_t *); | ||
2992 | ilist = kmem_alloc(ilist_size, KM_MAYFAIL); | ||
2993 | if (!ilist) | ||
2994 | return 0; | ||
2995 | |||
2996 | mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); | ||
2997 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; | ||
2998 | read_lock(&pag->pag_ici_lock); | ||
2999 | /* really need a gang lookup range call here */ | ||
3000 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, | ||
3001 | first_index, | ||
3002 | XFS_INODE_CLUSTER_SIZE(mp)); | ||
3003 | if (nr_found == 0) | ||
3004 | goto out_free; | ||
3005 | |||
3006 | for (i = 0; i < nr_found; i++) { | ||
3007 | iq = ilist[i]; | ||
3008 | if (iq == ip) | ||
3009 | continue; | ||
3010 | /* if the inode lies outside this cluster, we're done. */ | ||
3011 | if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) | ||
3012 | break; | ||
3013 | /* | ||
3014 | * Do an un-protected check to see if the inode is dirty and | ||
3015 | * is a candidate for flushing. These checks will be repeated | ||
3016 | * later after the appropriate locks are acquired. | ||
3017 | */ | ||
3018 | if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0) | ||
3019 | continue; | ||
3020 | |||
3021 | /* | ||
3022 | * Try to get locks. If any are unavailable or it is pinned, | ||
3023 | * then this inode cannot be flushed and is skipped. | ||
3024 | */ | ||
3025 | |||
3026 | if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) | ||
3027 | continue; | ||
3028 | if (!xfs_iflock_nowait(iq)) { | ||
3029 | xfs_iunlock(iq, XFS_ILOCK_SHARED); | ||
3030 | continue; | ||
3031 | } | ||
3032 | if (xfs_ipincount(iq)) { | ||
3033 | xfs_ifunlock(iq); | ||
3034 | xfs_iunlock(iq, XFS_ILOCK_SHARED); | ||
3035 | continue; | ||
3036 | } | ||
3037 | |||
3038 | /* | ||
3039 | * arriving here means that this inode can be flushed. First | ||
3040 | * re-check that it's dirty before flushing. | ||
3041 | */ | ||
3042 | if (!xfs_inode_clean(iq)) { | ||
3043 | int error; | ||
3044 | error = xfs_iflush_int(iq, bp); | ||
3045 | if (error) { | ||
3046 | xfs_iunlock(iq, XFS_ILOCK_SHARED); | ||
3047 | goto cluster_corrupt_out; | ||
3048 | } | ||
3049 | clcount++; | ||
3050 | } else { | ||
3051 | xfs_ifunlock(iq); | ||
3052 | } | ||
3053 | xfs_iunlock(iq, XFS_ILOCK_SHARED); | ||
3054 | } | ||
3055 | |||
3056 | if (clcount) { | ||
3057 | XFS_STATS_INC(xs_icluster_flushcnt); | ||
3058 | XFS_STATS_ADD(xs_icluster_flushinode, clcount); | ||
3059 | } | ||
3026 | 3060 | ||
3061 | out_free: | ||
3062 | read_unlock(&pag->pag_ici_lock); | ||
3063 | kmem_free(ilist, ilist_size); | ||
3027 | return 0; | 3064 | return 0; |
3065 | |||
3066 | |||
3067 | cluster_corrupt_out: | ||
3068 | /* | ||
3069 | * Corruption detected in the clustering loop. Invalidate the | ||
3070 | * inode buffer and shut down the filesystem. | ||
3071 | */ | ||
3072 | read_unlock(&pag->pag_ici_lock); | ||
3073 | /* | ||
3074 | * Clean up the buffer. If it was B_DELWRI, just release it -- | ||
3075 | * brelse can handle it with no problems. If not, shut down the | ||
3076 | * filesystem before releasing the buffer. | ||
3077 | */ | ||
3078 | bufwasdelwri = XFS_BUF_ISDELAYWRITE(bp); | ||
3079 | if (bufwasdelwri) | ||
3080 | xfs_buf_relse(bp); | ||
3081 | |||
3082 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | ||
3083 | |||
3084 | if (!bufwasdelwri) { | ||
3085 | /* | ||
3086 | * Just like incore_relse: if we have b_iodone functions, | ||
3087 | * mark the buffer as an error and call them. Otherwise | ||
3088 | * mark it as stale and brelse. | ||
3089 | */ | ||
3090 | if (XFS_BUF_IODONE_FUNC(bp)) { | ||
3091 | XFS_BUF_CLR_BDSTRAT_FUNC(bp); | ||
3092 | XFS_BUF_UNDONE(bp); | ||
3093 | XFS_BUF_STALE(bp); | ||
3094 | XFS_BUF_SHUT(bp); | ||
3095 | XFS_BUF_ERROR(bp,EIO); | ||
3096 | xfs_biodone(bp); | ||
3097 | } else { | ||
3098 | XFS_BUF_STALE(bp); | ||
3099 | xfs_buf_relse(bp); | ||
3100 | } | ||
3101 | } | ||
3102 | |||
3103 | /* | ||
3104 | * Unlocks the flush lock | ||
3105 | */ | ||
3106 | xfs_iflush_abort(iq); | ||
3107 | kmem_free(ilist, ilist_size); | ||
3108 | return XFS_ERROR(EFSCORRUPTED); | ||
3028 | } | 3109 | } |
3029 | 3110 | ||
3030 | /* | 3111 | /* |
@@ -3046,11 +3127,7 @@ xfs_iflush( | |||
3046 | xfs_dinode_t *dip; | 3127 | xfs_dinode_t *dip; |
3047 | xfs_mount_t *mp; | 3128 | xfs_mount_t *mp; |
3048 | int error; | 3129 | int error; |
3049 | /* REFERENCED */ | 3130 | int noblock = (flags == XFS_IFLUSH_ASYNC_NOBLOCK); |
3050 | xfs_inode_t *iq; | ||
3051 | int clcount; /* count of inodes clustered */ | ||
3052 | int bufwasdelwri; | ||
3053 | struct hlist_node *entry; | ||
3054 | enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) }; | 3131 | enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) }; |
3055 | 3132 | ||
3056 | XFS_STATS_INC(xs_iflush_count); | 3133 | XFS_STATS_INC(xs_iflush_count); |
@@ -3067,8 +3144,7 @@ xfs_iflush( | |||
3067 | * If the inode isn't dirty, then just release the inode | 3144 | * If the inode isn't dirty, then just release the inode |
3068 | * flush lock and do nothing. | 3145 | * flush lock and do nothing. |
3069 | */ | 3146 | */ |
3070 | if ((ip->i_update_core == 0) && | 3147 | if (xfs_inode_clean(ip)) { |
3071 | ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) { | ||
3072 | ASSERT((iip != NULL) ? | 3148 | ASSERT((iip != NULL) ? |
3073 | !(iip->ili_item.li_flags & XFS_LI_IN_AIL) : 1); | 3149 | !(iip->ili_item.li_flags & XFS_LI_IN_AIL) : 1); |
3074 | xfs_ifunlock(ip); | 3150 | xfs_ifunlock(ip); |
@@ -3076,11 +3152,21 @@ xfs_iflush( | |||
3076 | } | 3152 | } |
3077 | 3153 | ||
3078 | /* | 3154 | /* |
3079 | * We can't flush the inode until it is unpinned, so | 3155 | * We can't flush the inode until it is unpinned, so wait for it if we |
3080 | * wait for it. We know noone new can pin it, because | 3156 | * are allowed to block. We know noone new can pin it, because we are |
3081 | * we are holding the inode lock shared and you need | 3157 | * holding the inode lock shared and you need to hold it exclusively to |
3082 | * to hold it exclusively to pin the inode. | 3158 | * pin the inode. |
3159 | * | ||
3160 | * If we are not allowed to block, force the log out asynchronously so | ||
3161 | * that when we come back the inode will be unpinned. If other inodes | ||
3162 | * in the same cluster are dirty, they will probably write the inode | ||
3163 | * out for us if they occur after the log force completes. | ||
3083 | */ | 3164 | */ |
3165 | if (noblock && xfs_ipincount(ip)) { | ||
3166 | xfs_iunpin_nowait(ip); | ||
3167 | xfs_ifunlock(ip); | ||
3168 | return EAGAIN; | ||
3169 | } | ||
3084 | xfs_iunpin_wait(ip); | 3170 | xfs_iunpin_wait(ip); |
3085 | 3171 | ||
3086 | /* | 3172 | /* |
@@ -3097,15 +3183,6 @@ xfs_iflush( | |||
3097 | } | 3183 | } |
3098 | 3184 | ||
3099 | /* | 3185 | /* |
3100 | * Get the buffer containing the on-disk inode. | ||
3101 | */ | ||
3102 | error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0); | ||
3103 | if (error) { | ||
3104 | xfs_ifunlock(ip); | ||
3105 | return error; | ||
3106 | } | ||
3107 | |||
3108 | /* | ||
3109 | * Decide how buffer will be flushed out. This is done before | 3186 | * Decide how buffer will be flushed out. This is done before |
3110 | * the call to xfs_iflush_int because this field is zeroed by it. | 3187 | * the call to xfs_iflush_int because this field is zeroed by it. |
3111 | */ | 3188 | */ |
@@ -3121,6 +3198,7 @@ xfs_iflush( | |||
3121 | case XFS_IFLUSH_DELWRI_ELSE_SYNC: | 3198 | case XFS_IFLUSH_DELWRI_ELSE_SYNC: |
3122 | flags = 0; | 3199 | flags = 0; |
3123 | break; | 3200 | break; |
3201 | case XFS_IFLUSH_ASYNC_NOBLOCK: | ||
3124 | case XFS_IFLUSH_ASYNC: | 3202 | case XFS_IFLUSH_ASYNC: |
3125 | case XFS_IFLUSH_DELWRI_ELSE_ASYNC: | 3203 | case XFS_IFLUSH_DELWRI_ELSE_ASYNC: |
3126 | flags = INT_ASYNC; | 3204 | flags = INT_ASYNC; |
@@ -3140,6 +3218,7 @@ xfs_iflush( | |||
3140 | case XFS_IFLUSH_DELWRI: | 3218 | case XFS_IFLUSH_DELWRI: |
3141 | flags = INT_DELWRI; | 3219 | flags = INT_DELWRI; |
3142 | break; | 3220 | break; |
3221 | case XFS_IFLUSH_ASYNC_NOBLOCK: | ||
3143 | case XFS_IFLUSH_ASYNC: | 3222 | case XFS_IFLUSH_ASYNC: |
3144 | flags = INT_ASYNC; | 3223 | flags = INT_ASYNC; |
3145 | break; | 3224 | break; |
@@ -3154,94 +3233,41 @@ xfs_iflush( | |||
3154 | } | 3233 | } |
3155 | 3234 | ||
3156 | /* | 3235 | /* |
3157 | * First flush out the inode that xfs_iflush was called with. | 3236 | * Get the buffer containing the on-disk inode. |
3158 | */ | 3237 | */ |
3159 | error = xfs_iflush_int(ip, bp); | 3238 | error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0, |
3160 | if (error) { | 3239 | noblock ? XFS_BUF_TRYLOCK : XFS_BUF_LOCK); |
3161 | goto corrupt_out; | 3240 | if (error || !bp) { |
3241 | xfs_ifunlock(ip); | ||
3242 | return error; | ||
3162 | } | 3243 | } |
3163 | 3244 | ||
3164 | /* | 3245 | /* |
3165 | * inode clustering: | 3246 | * First flush out the inode that xfs_iflush was called with. |
3166 | * see if other inodes can be gathered into this write | ||
3167 | */ | 3247 | */ |
3168 | spin_lock(&ip->i_cluster->icl_lock); | 3248 | error = xfs_iflush_int(ip, bp); |
3169 | ip->i_cluster->icl_buf = bp; | 3249 | if (error) |
3170 | 3250 | goto corrupt_out; | |
3171 | clcount = 0; | ||
3172 | hlist_for_each_entry(iq, entry, &ip->i_cluster->icl_inodes, i_cnode) { | ||
3173 | if (iq == ip) | ||
3174 | continue; | ||
3175 | |||
3176 | /* | ||
3177 | * Do an un-protected check to see if the inode is dirty and | ||
3178 | * is a candidate for flushing. These checks will be repeated | ||
3179 | * later after the appropriate locks are acquired. | ||
3180 | */ | ||
3181 | iip = iq->i_itemp; | ||
3182 | if ((iq->i_update_core == 0) && | ||
3183 | ((iip == NULL) || | ||
3184 | !(iip->ili_format.ilf_fields & XFS_ILOG_ALL)) && | ||
3185 | xfs_ipincount(iq) == 0) { | ||
3186 | continue; | ||
3187 | } | ||
3188 | |||
3189 | /* | ||
3190 | * Try to get locks. If any are unavailable, | ||
3191 | * then this inode cannot be flushed and is skipped. | ||
3192 | */ | ||
3193 | |||
3194 | /* get inode locks (just i_lock) */ | ||
3195 | if (xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) { | ||
3196 | /* get inode flush lock */ | ||
3197 | if (xfs_iflock_nowait(iq)) { | ||
3198 | /* check if pinned */ | ||
3199 | if (xfs_ipincount(iq) == 0) { | ||
3200 | /* arriving here means that | ||
3201 | * this inode can be flushed. | ||
3202 | * first re-check that it's | ||
3203 | * dirty | ||
3204 | */ | ||
3205 | iip = iq->i_itemp; | ||
3206 | if ((iq->i_update_core != 0)|| | ||
3207 | ((iip != NULL) && | ||
3208 | (iip->ili_format.ilf_fields & XFS_ILOG_ALL))) { | ||
3209 | clcount++; | ||
3210 | error = xfs_iflush_int(iq, bp); | ||
3211 | if (error) { | ||
3212 | xfs_iunlock(iq, | ||
3213 | XFS_ILOCK_SHARED); | ||
3214 | goto cluster_corrupt_out; | ||
3215 | } | ||
3216 | } else { | ||
3217 | xfs_ifunlock(iq); | ||
3218 | } | ||
3219 | } else { | ||
3220 | xfs_ifunlock(iq); | ||
3221 | } | ||
3222 | } | ||
3223 | xfs_iunlock(iq, XFS_ILOCK_SHARED); | ||
3224 | } | ||
3225 | } | ||
3226 | spin_unlock(&ip->i_cluster->icl_lock); | ||
3227 | |||
3228 | if (clcount) { | ||
3229 | XFS_STATS_INC(xs_icluster_flushcnt); | ||
3230 | XFS_STATS_ADD(xs_icluster_flushinode, clcount); | ||
3231 | } | ||
3232 | 3251 | ||
3233 | /* | 3252 | /* |
3234 | * If the buffer is pinned then push on the log so we won't | 3253 | * If the buffer is pinned then push on the log now so we won't |
3235 | * get stuck waiting in the write for too long. | 3254 | * get stuck waiting in the write for too long. |
3236 | */ | 3255 | */ |
3237 | if (XFS_BUF_ISPINNED(bp)){ | 3256 | if (XFS_BUF_ISPINNED(bp)) |
3238 | xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); | 3257 | xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); |
3239 | } | 3258 | |
3259 | /* | ||
3260 | * inode clustering: | ||
3261 | * see if other inodes can be gathered into this write | ||
3262 | */ | ||
3263 | error = xfs_iflush_cluster(ip, bp); | ||
3264 | if (error) | ||
3265 | goto cluster_corrupt_out; | ||
3240 | 3266 | ||
3241 | if (flags & INT_DELWRI) { | 3267 | if (flags & INT_DELWRI) { |
3242 | xfs_bdwrite(mp, bp); | 3268 | xfs_bdwrite(mp, bp); |
3243 | } else if (flags & INT_ASYNC) { | 3269 | } else if (flags & INT_ASYNC) { |
3244 | xfs_bawrite(mp, bp); | 3270 | error = xfs_bawrite(mp, bp); |
3245 | } else { | 3271 | } else { |
3246 | error = xfs_bwrite(mp, bp); | 3272 | error = xfs_bwrite(mp, bp); |
3247 | } | 3273 | } |
@@ -3250,52 +3276,11 @@ xfs_iflush( | |||
3250 | corrupt_out: | 3276 | corrupt_out: |
3251 | xfs_buf_relse(bp); | 3277 | xfs_buf_relse(bp); |
3252 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | 3278 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); |
3253 | xfs_iflush_abort(ip); | ||
3254 | /* | ||
3255 | * Unlocks the flush lock | ||
3256 | */ | ||
3257 | return XFS_ERROR(EFSCORRUPTED); | ||
3258 | |||
3259 | cluster_corrupt_out: | 3279 | cluster_corrupt_out: |
3260 | /* Corruption detected in the clustering loop. Invalidate the | ||
3261 | * inode buffer and shut down the filesystem. | ||
3262 | */ | ||
3263 | spin_unlock(&ip->i_cluster->icl_lock); | ||
3264 | |||
3265 | /* | ||
3266 | * Clean up the buffer. If it was B_DELWRI, just release it -- | ||
3267 | * brelse can handle it with no problems. If not, shut down the | ||
3268 | * filesystem before releasing the buffer. | ||
3269 | */ | ||
3270 | if ((bufwasdelwri= XFS_BUF_ISDELAYWRITE(bp))) { | ||
3271 | xfs_buf_relse(bp); | ||
3272 | } | ||
3273 | |||
3274 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | ||
3275 | |||
3276 | if(!bufwasdelwri) { | ||
3277 | /* | ||
3278 | * Just like incore_relse: if we have b_iodone functions, | ||
3279 | * mark the buffer as an error and call them. Otherwise | ||
3280 | * mark it as stale and brelse. | ||
3281 | */ | ||
3282 | if (XFS_BUF_IODONE_FUNC(bp)) { | ||
3283 | XFS_BUF_CLR_BDSTRAT_FUNC(bp); | ||
3284 | XFS_BUF_UNDONE(bp); | ||
3285 | XFS_BUF_STALE(bp); | ||
3286 | XFS_BUF_SHUT(bp); | ||
3287 | XFS_BUF_ERROR(bp,EIO); | ||
3288 | xfs_biodone(bp); | ||
3289 | } else { | ||
3290 | XFS_BUF_STALE(bp); | ||
3291 | xfs_buf_relse(bp); | ||
3292 | } | ||
3293 | } | ||
3294 | |||
3295 | xfs_iflush_abort(iq); | ||
3296 | /* | 3280 | /* |
3297 | * Unlocks the flush lock | 3281 | * Unlocks the flush lock |
3298 | */ | 3282 | */ |
3283 | xfs_iflush_abort(ip); | ||
3299 | return XFS_ERROR(EFSCORRUPTED); | 3284 | return XFS_ERROR(EFSCORRUPTED); |
3300 | } | 3285 | } |
3301 | 3286 | ||
@@ -3325,8 +3310,7 @@ xfs_iflush_int( | |||
3325 | * If the inode isn't dirty, then just release the inode | 3310 | * If the inode isn't dirty, then just release the inode |
3326 | * flush lock and do nothing. | 3311 | * flush lock and do nothing. |
3327 | */ | 3312 | */ |
3328 | if ((ip->i_update_core == 0) && | 3313 | if (xfs_inode_clean(ip)) { |
3329 | ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) { | ||
3330 | xfs_ifunlock(ip); | 3314 | xfs_ifunlock(ip); |
3331 | return 0; | 3315 | return 0; |
3332 | } | 3316 | } |
@@ -3459,16 +3443,9 @@ xfs_iflush_int( | |||
3459 | } | 3443 | } |
3460 | } | 3444 | } |
3461 | 3445 | ||
3462 | if (xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp) == EFSCORRUPTED) { | 3446 | xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp); |
3463 | goto corrupt_out; | 3447 | if (XFS_IFORK_Q(ip)) |
3464 | } | 3448 | xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp); |
3465 | |||
3466 | if (XFS_IFORK_Q(ip)) { | ||
3467 | /* | ||
3468 | * The only error from xfs_iflush_fork is on the data fork. | ||
3469 | */ | ||
3470 | (void) xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp); | ||
3471 | } | ||
3472 | xfs_inobp_check(mp, bp); | 3449 | xfs_inobp_check(mp, bp); |
3473 | 3450 | ||
3474 | /* | 3451 | /* |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index bfcd72cbaeea..93c37697a72c 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -133,19 +133,6 @@ typedef struct dm_attrs_s { | |||
133 | } dm_attrs_t; | 133 | } dm_attrs_t; |
134 | 134 | ||
135 | /* | 135 | /* |
136 | * This is the xfs inode cluster structure. This structure is used by | ||
137 | * xfs_iflush to find inodes that share a cluster and can be flushed to disk at | ||
138 | * the same time. | ||
139 | */ | ||
140 | typedef struct xfs_icluster { | ||
141 | struct hlist_head icl_inodes; /* list of inodes on cluster */ | ||
142 | xfs_daddr_t icl_blkno; /* starting block number of | ||
143 | * the cluster */ | ||
144 | struct xfs_buf *icl_buf; /* the inode buffer */ | ||
145 | spinlock_t icl_lock; /* inode list lock */ | ||
146 | } xfs_icluster_t; | ||
147 | |||
148 | /* | ||
149 | * This is the xfs in-core inode structure. | 136 | * This is the xfs in-core inode structure. |
150 | * Most of the on-disk inode is embedded in the i_d field. | 137 | * Most of the on-disk inode is embedded in the i_d field. |
151 | * | 138 | * |
@@ -240,10 +227,6 @@ typedef struct xfs_inode { | |||
240 | atomic_t i_pincount; /* inode pin count */ | 227 | atomic_t i_pincount; /* inode pin count */ |
241 | wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */ | 228 | wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */ |
242 | spinlock_t i_flags_lock; /* inode i_flags lock */ | 229 | spinlock_t i_flags_lock; /* inode i_flags lock */ |
243 | #ifdef HAVE_REFCACHE | ||
244 | struct xfs_inode **i_refcache; /* ptr to entry in ref cache */ | ||
245 | struct xfs_inode *i_release; /* inode to unref */ | ||
246 | #endif | ||
247 | /* Miscellaneous state. */ | 230 | /* Miscellaneous state. */ |
248 | unsigned short i_flags; /* see defined flags below */ | 231 | unsigned short i_flags; /* see defined flags below */ |
249 | unsigned char i_update_core; /* timestamps/size is dirty */ | 232 | unsigned char i_update_core; /* timestamps/size is dirty */ |
@@ -252,8 +235,6 @@ typedef struct xfs_inode { | |||
252 | unsigned int i_delayed_blks; /* count of delay alloc blks */ | 235 | unsigned int i_delayed_blks; /* count of delay alloc blks */ |
253 | 236 | ||
254 | xfs_icdinode_t i_d; /* most of ondisk inode */ | 237 | xfs_icdinode_t i_d; /* most of ondisk inode */ |
255 | xfs_icluster_t *i_cluster; /* cluster list header */ | ||
256 | struct hlist_node i_cnode; /* cluster link node */ | ||
257 | 238 | ||
258 | xfs_fsize_t i_size; /* in-memory size */ | 239 | xfs_fsize_t i_size; /* in-memory size */ |
259 | xfs_fsize_t i_new_size; /* size when write completes */ | 240 | xfs_fsize_t i_new_size; /* size when write completes */ |
@@ -461,6 +442,7 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags) | |||
461 | #define XFS_IFLUSH_SYNC 3 | 442 | #define XFS_IFLUSH_SYNC 3 |
462 | #define XFS_IFLUSH_ASYNC 4 | 443 | #define XFS_IFLUSH_ASYNC 4 |
463 | #define XFS_IFLUSH_DELWRI 5 | 444 | #define XFS_IFLUSH_DELWRI 5 |
445 | #define XFS_IFLUSH_ASYNC_NOBLOCK 6 | ||
464 | 446 | ||
465 | /* | 447 | /* |
466 | * Flags for xfs_itruncate_start(). | 448 | * Flags for xfs_itruncate_start(). |
@@ -515,7 +497,7 @@ int xfs_finish_reclaim_all(struct xfs_mount *, int); | |||
515 | */ | 497 | */ |
516 | int xfs_itobp(struct xfs_mount *, struct xfs_trans *, | 498 | int xfs_itobp(struct xfs_mount *, struct xfs_trans *, |
517 | xfs_inode_t *, struct xfs_dinode **, struct xfs_buf **, | 499 | xfs_inode_t *, struct xfs_dinode **, struct xfs_buf **, |
518 | xfs_daddr_t, uint); | 500 | xfs_daddr_t, uint, uint); |
519 | int xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, | 501 | int xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, |
520 | xfs_inode_t **, xfs_daddr_t, uint); | 502 | xfs_inode_t **, xfs_daddr_t, uint); |
521 | int xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int); | 503 | int xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int); |
@@ -597,7 +579,6 @@ void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); | |||
597 | #define xfs_inobp_check(mp, bp) | 579 | #define xfs_inobp_check(mp, bp) |
598 | #endif /* DEBUG */ | 580 | #endif /* DEBUG */ |
599 | 581 | ||
600 | extern struct kmem_zone *xfs_icluster_zone; | ||
601 | extern struct kmem_zone *xfs_ifork_zone; | 582 | extern struct kmem_zone *xfs_ifork_zone; |
602 | extern struct kmem_zone *xfs_inode_zone; | 583 | extern struct kmem_zone *xfs_inode_zone; |
603 | extern struct kmem_zone *xfs_ili_zone; | 584 | extern struct kmem_zone *xfs_ili_zone; |
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 2c775b4ae9e6..93b5db453ea2 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include "xfs_btree.h" | 40 | #include "xfs_btree.h" |
41 | #include "xfs_ialloc.h" | 41 | #include "xfs_ialloc.h" |
42 | #include "xfs_rw.h" | 42 | #include "xfs_rw.h" |
43 | #include "xfs_error.h" | ||
43 | 44 | ||
44 | 45 | ||
45 | kmem_zone_t *xfs_ili_zone; /* inode log item zone */ | 46 | kmem_zone_t *xfs_ili_zone; /* inode log item zone */ |
@@ -813,7 +814,12 @@ xfs_inode_item_pushbuf( | |||
813 | XFS_LOG_FORCE); | 814 | XFS_LOG_FORCE); |
814 | } | 815 | } |
815 | if (dopush) { | 816 | if (dopush) { |
816 | xfs_bawrite(mp, bp); | 817 | int error; |
818 | error = xfs_bawrite(mp, bp); | ||
819 | if (error) | ||
820 | xfs_fs_cmn_err(CE_WARN, mp, | ||
821 | "xfs_inode_item_pushbuf: pushbuf error %d on iip %p, bp %p", | ||
822 | error, iip, bp); | ||
817 | } else { | 823 | } else { |
818 | xfs_buf_relse(bp); | 824 | xfs_buf_relse(bp); |
819 | } | 825 | } |
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index bfe92ea17952..40513077ab36 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h | |||
@@ -168,6 +168,14 @@ static inline int xfs_ilog_fext(int w) | |||
168 | return (w == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT); | 168 | return (w == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT); |
169 | } | 169 | } |
170 | 170 | ||
171 | static inline int xfs_inode_clean(xfs_inode_t *ip) | ||
172 | { | ||
173 | return (!ip->i_itemp || | ||
174 | !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) && | ||
175 | !ip->i_update_core; | ||
176 | } | ||
177 | |||
178 | |||
171 | #ifdef __KERNEL__ | 179 | #ifdef __KERNEL__ |
172 | 180 | ||
173 | extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *); | 181 | extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *); |
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index fde37f87d52f..fb3cf1191419 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -802,8 +802,11 @@ xfs_iomap_write_allocate( | |||
802 | */ | 802 | */ |
803 | nimaps = 1; | 803 | nimaps = 1; |
804 | end_fsb = XFS_B_TO_FSB(mp, ip->i_size); | 804 | end_fsb = XFS_B_TO_FSB(mp, ip->i_size); |
805 | xfs_bmap_last_offset(NULL, ip, &last_block, | 805 | error = xfs_bmap_last_offset(NULL, ip, &last_block, |
806 | XFS_DATA_FORK); | 806 | XFS_DATA_FORK); |
807 | if (error) | ||
808 | goto trans_cancel; | ||
809 | |||
807 | last_block = XFS_FILEOFF_MAX(last_block, end_fsb); | 810 | last_block = XFS_FILEOFF_MAX(last_block, end_fsb); |
808 | if ((map_start_fsb + count_fsb) > last_block) { | 811 | if ((map_start_fsb + count_fsb) > last_block) { |
809 | count_fsb = last_block - map_start_fsb; | 812 | count_fsb = last_block - map_start_fsb; |
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index f615e04364f4..eb85bdedad0c 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c | |||
@@ -129,7 +129,7 @@ xfs_bulkstat_one_iget( | |||
129 | return error; | 129 | return error; |
130 | } | 130 | } |
131 | 131 | ||
132 | STATIC int | 132 | STATIC void |
133 | xfs_bulkstat_one_dinode( | 133 | xfs_bulkstat_one_dinode( |
134 | xfs_mount_t *mp, /* mount point for filesystem */ | 134 | xfs_mount_t *mp, /* mount point for filesystem */ |
135 | xfs_ino_t ino, /* inode number to get data for */ | 135 | xfs_ino_t ino, /* inode number to get data for */ |
@@ -198,8 +198,6 @@ xfs_bulkstat_one_dinode( | |||
198 | buf->bs_blocks = be64_to_cpu(dic->di_nblocks); | 198 | buf->bs_blocks = be64_to_cpu(dic->di_nblocks); |
199 | break; | 199 | break; |
200 | } | 200 | } |
201 | |||
202 | return 0; | ||
203 | } | 201 | } |
204 | 202 | ||
205 | STATIC int | 203 | STATIC int |
@@ -614,7 +612,8 @@ xfs_bulkstat( | |||
614 | xfs_buf_relse(bp); | 612 | xfs_buf_relse(bp); |
615 | error = xfs_itobp(mp, NULL, ip, | 613 | error = xfs_itobp(mp, NULL, ip, |
616 | &dip, &bp, bno, | 614 | &dip, &bp, bno, |
617 | XFS_IMAP_BULKSTAT); | 615 | XFS_IMAP_BULKSTAT, |
616 | XFS_BUF_LOCK); | ||
618 | if (!error) | 617 | if (!error) |
619 | clustidx = ip->i_boffset / mp->m_sb.sb_inodesize; | 618 | clustidx = ip->i_boffset / mp->m_sb.sb_inodesize; |
620 | kmem_zone_free(xfs_inode_zone, ip); | 619 | kmem_zone_free(xfs_inode_zone, ip); |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 31f2b04f2c97..afaee301b0ee 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include "xfs_inode.h" | 41 | #include "xfs_inode.h" |
42 | #include "xfs_rw.h" | 42 | #include "xfs_rw.h" |
43 | 43 | ||
44 | kmem_zone_t *xfs_log_ticket_zone; | ||
44 | 45 | ||
45 | #define xlog_write_adv_cnt(ptr, len, off, bytes) \ | 46 | #define xlog_write_adv_cnt(ptr, len, off, bytes) \ |
46 | { (ptr) += (bytes); \ | 47 | { (ptr) += (bytes); \ |
@@ -73,8 +74,6 @@ STATIC int xlog_state_get_iclog_space(xlog_t *log, | |||
73 | xlog_ticket_t *ticket, | 74 | xlog_ticket_t *ticket, |
74 | int *continued_write, | 75 | int *continued_write, |
75 | int *logoffsetp); | 76 | int *logoffsetp); |
76 | STATIC void xlog_state_put_ticket(xlog_t *log, | ||
77 | xlog_ticket_t *tic); | ||
78 | STATIC int xlog_state_release_iclog(xlog_t *log, | 77 | STATIC int xlog_state_release_iclog(xlog_t *log, |
79 | xlog_in_core_t *iclog); | 78 | xlog_in_core_t *iclog); |
80 | STATIC void xlog_state_switch_iclogs(xlog_t *log, | 79 | STATIC void xlog_state_switch_iclogs(xlog_t *log, |
@@ -101,7 +100,6 @@ STATIC void xlog_ungrant_log_space(xlog_t *log, | |||
101 | 100 | ||
102 | 101 | ||
103 | /* local ticket functions */ | 102 | /* local ticket functions */ |
104 | STATIC void xlog_state_ticket_alloc(xlog_t *log); | ||
105 | STATIC xlog_ticket_t *xlog_ticket_get(xlog_t *log, | 103 | STATIC xlog_ticket_t *xlog_ticket_get(xlog_t *log, |
106 | int unit_bytes, | 104 | int unit_bytes, |
107 | int count, | 105 | int count, |
@@ -330,7 +328,7 @@ xfs_log_done(xfs_mount_t *mp, | |||
330 | */ | 328 | */ |
331 | xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)"); | 329 | xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)"); |
332 | xlog_ungrant_log_space(log, ticket); | 330 | xlog_ungrant_log_space(log, ticket); |
333 | xlog_state_put_ticket(log, ticket); | 331 | xlog_ticket_put(log, ticket); |
334 | } else { | 332 | } else { |
335 | xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)"); | 333 | xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)"); |
336 | xlog_regrant_reserve_log_space(log, ticket); | 334 | xlog_regrant_reserve_log_space(log, ticket); |
@@ -384,7 +382,27 @@ _xfs_log_force( | |||
384 | return xlog_state_sync_all(log, flags, log_flushed); | 382 | return xlog_state_sync_all(log, flags, log_flushed); |
385 | else | 383 | else |
386 | return xlog_state_sync(log, lsn, flags, log_flushed); | 384 | return xlog_state_sync(log, lsn, flags, log_flushed); |
387 | } /* xfs_log_force */ | 385 | } /* _xfs_log_force */ |
386 | |||
387 | /* | ||
388 | * Wrapper for _xfs_log_force(), to be used when caller doesn't care | ||
389 | * about errors or whether the log was flushed or not. This is the normal | ||
390 | * interface to use when trying to unpin items or move the log forward. | ||
391 | */ | ||
392 | void | ||
393 | xfs_log_force( | ||
394 | xfs_mount_t *mp, | ||
395 | xfs_lsn_t lsn, | ||
396 | uint flags) | ||
397 | { | ||
398 | int error; | ||
399 | error = _xfs_log_force(mp, lsn, flags, NULL); | ||
400 | if (error) { | ||
401 | xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: " | ||
402 | "error %d returned.", error); | ||
403 | } | ||
404 | } | ||
405 | |||
388 | 406 | ||
389 | /* | 407 | /* |
390 | * Attaches a new iclog I/O completion callback routine during | 408 | * Attaches a new iclog I/O completion callback routine during |
@@ -397,12 +415,10 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */ | |||
397 | void *iclog_hndl, /* iclog to hang callback off */ | 415 | void *iclog_hndl, /* iclog to hang callback off */ |
398 | xfs_log_callback_t *cb) | 416 | xfs_log_callback_t *cb) |
399 | { | 417 | { |
400 | xlog_t *log = mp->m_log; | ||
401 | xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl; | 418 | xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl; |
402 | int abortflg; | 419 | int abortflg; |
403 | 420 | ||
404 | cb->cb_next = NULL; | 421 | spin_lock(&iclog->ic_callback_lock); |
405 | spin_lock(&log->l_icloglock); | ||
406 | abortflg = (iclog->ic_state & XLOG_STATE_IOERROR); | 422 | abortflg = (iclog->ic_state & XLOG_STATE_IOERROR); |
407 | if (!abortflg) { | 423 | if (!abortflg) { |
408 | ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) || | 424 | ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) || |
@@ -411,7 +427,7 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */ | |||
411 | *(iclog->ic_callback_tail) = cb; | 427 | *(iclog->ic_callback_tail) = cb; |
412 | iclog->ic_callback_tail = &(cb->cb_next); | 428 | iclog->ic_callback_tail = &(cb->cb_next); |
413 | } | 429 | } |
414 | spin_unlock(&log->l_icloglock); | 430 | spin_unlock(&iclog->ic_callback_lock); |
415 | return abortflg; | 431 | return abortflg; |
416 | } /* xfs_log_notify */ | 432 | } /* xfs_log_notify */ |
417 | 433 | ||
@@ -471,6 +487,8 @@ xfs_log_reserve(xfs_mount_t *mp, | |||
471 | /* may sleep if need to allocate more tickets */ | 487 | /* may sleep if need to allocate more tickets */ |
472 | internal_ticket = xlog_ticket_get(log, unit_bytes, cnt, | 488 | internal_ticket = xlog_ticket_get(log, unit_bytes, cnt, |
473 | client, flags); | 489 | client, flags); |
490 | if (!internal_ticket) | ||
491 | return XFS_ERROR(ENOMEM); | ||
474 | internal_ticket->t_trans_type = t_type; | 492 | internal_ticket->t_trans_type = t_type; |
475 | *ticket = internal_ticket; | 493 | *ticket = internal_ticket; |
476 | xlog_trace_loggrant(log, internal_ticket, | 494 | xlog_trace_loggrant(log, internal_ticket, |
@@ -636,7 +654,8 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
636 | if (mp->m_flags & XFS_MOUNT_RDONLY) | 654 | if (mp->m_flags & XFS_MOUNT_RDONLY) |
637 | return 0; | 655 | return 0; |
638 | 656 | ||
639 | xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC); | 657 | error = _xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC, NULL); |
658 | ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log))); | ||
640 | 659 | ||
641 | #ifdef DEBUG | 660 | #ifdef DEBUG |
642 | first_iclog = iclog = log->l_iclog; | 661 | first_iclog = iclog = log->l_iclog; |
@@ -675,10 +694,10 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
675 | 694 | ||
676 | spin_lock(&log->l_icloglock); | 695 | spin_lock(&log->l_icloglock); |
677 | iclog = log->l_iclog; | 696 | iclog = log->l_iclog; |
678 | iclog->ic_refcnt++; | 697 | atomic_inc(&iclog->ic_refcnt); |
679 | spin_unlock(&log->l_icloglock); | 698 | spin_unlock(&log->l_icloglock); |
680 | xlog_state_want_sync(log, iclog); | 699 | xlog_state_want_sync(log, iclog); |
681 | (void) xlog_state_release_iclog(log, iclog); | 700 | error = xlog_state_release_iclog(log, iclog); |
682 | 701 | ||
683 | spin_lock(&log->l_icloglock); | 702 | spin_lock(&log->l_icloglock); |
684 | if (!(iclog->ic_state == XLOG_STATE_ACTIVE || | 703 | if (!(iclog->ic_state == XLOG_STATE_ACTIVE || |
@@ -695,7 +714,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
695 | if (tic) { | 714 | if (tic) { |
696 | xlog_trace_loggrant(log, tic, "unmount rec"); | 715 | xlog_trace_loggrant(log, tic, "unmount rec"); |
697 | xlog_ungrant_log_space(log, tic); | 716 | xlog_ungrant_log_space(log, tic); |
698 | xlog_state_put_ticket(log, tic); | 717 | xlog_ticket_put(log, tic); |
699 | } | 718 | } |
700 | } else { | 719 | } else { |
701 | /* | 720 | /* |
@@ -713,11 +732,11 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
713 | */ | 732 | */ |
714 | spin_lock(&log->l_icloglock); | 733 | spin_lock(&log->l_icloglock); |
715 | iclog = log->l_iclog; | 734 | iclog = log->l_iclog; |
716 | iclog->ic_refcnt++; | 735 | atomic_inc(&iclog->ic_refcnt); |
717 | spin_unlock(&log->l_icloglock); | 736 | spin_unlock(&log->l_icloglock); |
718 | 737 | ||
719 | xlog_state_want_sync(log, iclog); | 738 | xlog_state_want_sync(log, iclog); |
720 | (void) xlog_state_release_iclog(log, iclog); | 739 | error = xlog_state_release_iclog(log, iclog); |
721 | 740 | ||
722 | spin_lock(&log->l_icloglock); | 741 | spin_lock(&log->l_icloglock); |
723 | 742 | ||
@@ -732,7 +751,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
732 | } | 751 | } |
733 | } | 752 | } |
734 | 753 | ||
735 | return 0; | 754 | return error; |
736 | } /* xfs_log_unmount_write */ | 755 | } /* xfs_log_unmount_write */ |
737 | 756 | ||
738 | /* | 757 | /* |
@@ -1210,7 +1229,6 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1210 | spin_lock_init(&log->l_icloglock); | 1229 | spin_lock_init(&log->l_icloglock); |
1211 | spin_lock_init(&log->l_grant_lock); | 1230 | spin_lock_init(&log->l_grant_lock); |
1212 | initnsema(&log->l_flushsema, 0, "ic-flush"); | 1231 | initnsema(&log->l_flushsema, 0, "ic-flush"); |
1213 | xlog_state_ticket_alloc(log); /* wait until after icloglock inited */ | ||
1214 | 1232 | ||
1215 | /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ | 1233 | /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ |
1216 | ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); | 1234 | ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); |
@@ -1240,9 +1258,9 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1240 | XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); | 1258 | XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); |
1241 | iclog->ic_bp = bp; | 1259 | iclog->ic_bp = bp; |
1242 | iclog->hic_data = bp->b_addr; | 1260 | iclog->hic_data = bp->b_addr; |
1243 | 1261 | #ifdef DEBUG | |
1244 | log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header); | 1262 | log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header); |
1245 | 1263 | #endif | |
1246 | head = &iclog->ic_header; | 1264 | head = &iclog->ic_header; |
1247 | memset(head, 0, sizeof(xlog_rec_header_t)); | 1265 | memset(head, 0, sizeof(xlog_rec_header_t)); |
1248 | head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM); | 1266 | head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM); |
@@ -1253,10 +1271,11 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1253 | head->h_fmt = cpu_to_be32(XLOG_FMT); | 1271 | head->h_fmt = cpu_to_be32(XLOG_FMT); |
1254 | memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t)); | 1272 | memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t)); |
1255 | 1273 | ||
1256 | |||
1257 | iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize; | 1274 | iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize; |
1258 | iclog->ic_state = XLOG_STATE_ACTIVE; | 1275 | iclog->ic_state = XLOG_STATE_ACTIVE; |
1259 | iclog->ic_log = log; | 1276 | iclog->ic_log = log; |
1277 | atomic_set(&iclog->ic_refcnt, 0); | ||
1278 | spin_lock_init(&iclog->ic_callback_lock); | ||
1260 | iclog->ic_callback_tail = &(iclog->ic_callback); | 1279 | iclog->ic_callback_tail = &(iclog->ic_callback); |
1261 | iclog->ic_datap = (char *)iclog->hic_data + log->l_iclog_hsize; | 1280 | iclog->ic_datap = (char *)iclog->hic_data + log->l_iclog_hsize; |
1262 | 1281 | ||
@@ -1405,7 +1424,7 @@ xlog_sync(xlog_t *log, | |||
1405 | int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb); | 1424 | int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb); |
1406 | 1425 | ||
1407 | XFS_STATS_INC(xs_log_writes); | 1426 | XFS_STATS_INC(xs_log_writes); |
1408 | ASSERT(iclog->ic_refcnt == 0); | 1427 | ASSERT(atomic_read(&iclog->ic_refcnt) == 0); |
1409 | 1428 | ||
1410 | /* Add for LR header */ | 1429 | /* Add for LR header */ |
1411 | count_init = log->l_iclog_hsize + iclog->ic_offset; | 1430 | count_init = log->l_iclog_hsize + iclog->ic_offset; |
@@ -1538,7 +1557,6 @@ STATIC void | |||
1538 | xlog_dealloc_log(xlog_t *log) | 1557 | xlog_dealloc_log(xlog_t *log) |
1539 | { | 1558 | { |
1540 | xlog_in_core_t *iclog, *next_iclog; | 1559 | xlog_in_core_t *iclog, *next_iclog; |
1541 | xlog_ticket_t *tic, *next_tic; | ||
1542 | int i; | 1560 | int i; |
1543 | 1561 | ||
1544 | iclog = log->l_iclog; | 1562 | iclog = log->l_iclog; |
@@ -1559,22 +1577,6 @@ xlog_dealloc_log(xlog_t *log) | |||
1559 | spinlock_destroy(&log->l_icloglock); | 1577 | spinlock_destroy(&log->l_icloglock); |
1560 | spinlock_destroy(&log->l_grant_lock); | 1578 | spinlock_destroy(&log->l_grant_lock); |
1561 | 1579 | ||
1562 | /* XXXsup take a look at this again. */ | ||
1563 | if ((log->l_ticket_cnt != log->l_ticket_tcnt) && | ||
1564 | !XLOG_FORCED_SHUTDOWN(log)) { | ||
1565 | xfs_fs_cmn_err(CE_WARN, log->l_mp, | ||
1566 | "xlog_dealloc_log: (cnt: %d, total: %d)", | ||
1567 | log->l_ticket_cnt, log->l_ticket_tcnt); | ||
1568 | /* ASSERT(log->l_ticket_cnt == log->l_ticket_tcnt); */ | ||
1569 | |||
1570 | } else { | ||
1571 | tic = log->l_unmount_free; | ||
1572 | while (tic) { | ||
1573 | next_tic = tic->t_next; | ||
1574 | kmem_free(tic, PAGE_SIZE); | ||
1575 | tic = next_tic; | ||
1576 | } | ||
1577 | } | ||
1578 | xfs_buf_free(log->l_xbuf); | 1580 | xfs_buf_free(log->l_xbuf); |
1579 | #ifdef XFS_LOG_TRACE | 1581 | #ifdef XFS_LOG_TRACE |
1580 | if (log->l_trace != NULL) { | 1582 | if (log->l_trace != NULL) { |
@@ -1987,7 +1989,7 @@ xlog_state_clean_log(xlog_t *log) | |||
1987 | if (iclog->ic_state == XLOG_STATE_DIRTY) { | 1989 | if (iclog->ic_state == XLOG_STATE_DIRTY) { |
1988 | iclog->ic_state = XLOG_STATE_ACTIVE; | 1990 | iclog->ic_state = XLOG_STATE_ACTIVE; |
1989 | iclog->ic_offset = 0; | 1991 | iclog->ic_offset = 0; |
1990 | iclog->ic_callback = NULL; /* don't need to free */ | 1992 | ASSERT(iclog->ic_callback == NULL); |
1991 | /* | 1993 | /* |
1992 | * If the number of ops in this iclog indicate it just | 1994 | * If the number of ops in this iclog indicate it just |
1993 | * contains the dummy transaction, we can | 1995 | * contains the dummy transaction, we can |
@@ -2190,37 +2192,40 @@ xlog_state_do_callback( | |||
2190 | be64_to_cpu(iclog->ic_header.h_lsn); | 2192 | be64_to_cpu(iclog->ic_header.h_lsn); |
2191 | spin_unlock(&log->l_grant_lock); | 2193 | spin_unlock(&log->l_grant_lock); |
2192 | 2194 | ||
2193 | /* | ||
2194 | * Keep processing entries in the callback list | ||
2195 | * until we come around and it is empty. We | ||
2196 | * need to atomically see that the list is | ||
2197 | * empty and change the state to DIRTY so that | ||
2198 | * we don't miss any more callbacks being added. | ||
2199 | */ | ||
2200 | spin_lock(&log->l_icloglock); | ||
2201 | } else { | 2195 | } else { |
2196 | spin_unlock(&log->l_icloglock); | ||
2202 | ioerrors++; | 2197 | ioerrors++; |
2203 | } | 2198 | } |
2204 | cb = iclog->ic_callback; | ||
2205 | 2199 | ||
2200 | /* | ||
2201 | * Keep processing entries in the callback list until | ||
2202 | * we come around and it is empty. We need to | ||
2203 | * atomically see that the list is empty and change the | ||
2204 | * state to DIRTY so that we don't miss any more | ||
2205 | * callbacks being added. | ||
2206 | */ | ||
2207 | spin_lock(&iclog->ic_callback_lock); | ||
2208 | cb = iclog->ic_callback; | ||
2206 | while (cb) { | 2209 | while (cb) { |
2207 | iclog->ic_callback_tail = &(iclog->ic_callback); | 2210 | iclog->ic_callback_tail = &(iclog->ic_callback); |
2208 | iclog->ic_callback = NULL; | 2211 | iclog->ic_callback = NULL; |
2209 | spin_unlock(&log->l_icloglock); | 2212 | spin_unlock(&iclog->ic_callback_lock); |
2210 | 2213 | ||
2211 | /* perform callbacks in the order given */ | 2214 | /* perform callbacks in the order given */ |
2212 | for (; cb; cb = cb_next) { | 2215 | for (; cb; cb = cb_next) { |
2213 | cb_next = cb->cb_next; | 2216 | cb_next = cb->cb_next; |
2214 | cb->cb_func(cb->cb_arg, aborted); | 2217 | cb->cb_func(cb->cb_arg, aborted); |
2215 | } | 2218 | } |
2216 | spin_lock(&log->l_icloglock); | 2219 | spin_lock(&iclog->ic_callback_lock); |
2217 | cb = iclog->ic_callback; | 2220 | cb = iclog->ic_callback; |
2218 | } | 2221 | } |
2219 | 2222 | ||
2220 | loopdidcallbacks++; | 2223 | loopdidcallbacks++; |
2221 | funcdidcallbacks++; | 2224 | funcdidcallbacks++; |
2222 | 2225 | ||
2226 | spin_lock(&log->l_icloglock); | ||
2223 | ASSERT(iclog->ic_callback == NULL); | 2227 | ASSERT(iclog->ic_callback == NULL); |
2228 | spin_unlock(&iclog->ic_callback_lock); | ||
2224 | if (!(iclog->ic_state & XLOG_STATE_IOERROR)) | 2229 | if (!(iclog->ic_state & XLOG_STATE_IOERROR)) |
2225 | iclog->ic_state = XLOG_STATE_DIRTY; | 2230 | iclog->ic_state = XLOG_STATE_DIRTY; |
2226 | 2231 | ||
@@ -2241,7 +2246,7 @@ xlog_state_do_callback( | |||
2241 | repeats = 0; | 2246 | repeats = 0; |
2242 | xfs_fs_cmn_err(CE_WARN, log->l_mp, | 2247 | xfs_fs_cmn_err(CE_WARN, log->l_mp, |
2243 | "%s: possible infinite loop (%d iterations)", | 2248 | "%s: possible infinite loop (%d iterations)", |
2244 | __FUNCTION__, flushcnt); | 2249 | __func__, flushcnt); |
2245 | } | 2250 | } |
2246 | } while (!ioerrors && loopdidcallbacks); | 2251 | } while (!ioerrors && loopdidcallbacks); |
2247 | 2252 | ||
@@ -2309,7 +2314,7 @@ xlog_state_done_syncing( | |||
2309 | 2314 | ||
2310 | ASSERT(iclog->ic_state == XLOG_STATE_SYNCING || | 2315 | ASSERT(iclog->ic_state == XLOG_STATE_SYNCING || |
2311 | iclog->ic_state == XLOG_STATE_IOERROR); | 2316 | iclog->ic_state == XLOG_STATE_IOERROR); |
2312 | ASSERT(iclog->ic_refcnt == 0); | 2317 | ASSERT(atomic_read(&iclog->ic_refcnt) == 0); |
2313 | ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2); | 2318 | ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2); |
2314 | 2319 | ||
2315 | 2320 | ||
@@ -2391,7 +2396,7 @@ restart: | |||
2391 | ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); | 2396 | ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); |
2392 | head = &iclog->ic_header; | 2397 | head = &iclog->ic_header; |
2393 | 2398 | ||
2394 | iclog->ic_refcnt++; /* prevents sync */ | 2399 | atomic_inc(&iclog->ic_refcnt); /* prevents sync */ |
2395 | log_offset = iclog->ic_offset; | 2400 | log_offset = iclog->ic_offset; |
2396 | 2401 | ||
2397 | /* On the 1st write to an iclog, figure out lsn. This works | 2402 | /* On the 1st write to an iclog, figure out lsn. This works |
@@ -2423,12 +2428,12 @@ restart: | |||
2423 | xlog_state_switch_iclogs(log, iclog, iclog->ic_size); | 2428 | xlog_state_switch_iclogs(log, iclog, iclog->ic_size); |
2424 | 2429 | ||
2425 | /* If I'm the only one writing to this iclog, sync it to disk */ | 2430 | /* If I'm the only one writing to this iclog, sync it to disk */ |
2426 | if (iclog->ic_refcnt == 1) { | 2431 | if (atomic_read(&iclog->ic_refcnt) == 1) { |
2427 | spin_unlock(&log->l_icloglock); | 2432 | spin_unlock(&log->l_icloglock); |
2428 | if ((error = xlog_state_release_iclog(log, iclog))) | 2433 | if ((error = xlog_state_release_iclog(log, iclog))) |
2429 | return error; | 2434 | return error; |
2430 | } else { | 2435 | } else { |
2431 | iclog->ic_refcnt--; | 2436 | atomic_dec(&iclog->ic_refcnt); |
2432 | spin_unlock(&log->l_icloglock); | 2437 | spin_unlock(&log->l_icloglock); |
2433 | } | 2438 | } |
2434 | goto restart; | 2439 | goto restart; |
@@ -2792,18 +2797,6 @@ xlog_ungrant_log_space(xlog_t *log, | |||
2792 | 2797 | ||
2793 | 2798 | ||
2794 | /* | 2799 | /* |
2795 | * Atomically put back used ticket. | ||
2796 | */ | ||
2797 | STATIC void | ||
2798 | xlog_state_put_ticket(xlog_t *log, | ||
2799 | xlog_ticket_t *tic) | ||
2800 | { | ||
2801 | spin_lock(&log->l_icloglock); | ||
2802 | xlog_ticket_put(log, tic); | ||
2803 | spin_unlock(&log->l_icloglock); | ||
2804 | } /* xlog_state_put_ticket */ | ||
2805 | |||
2806 | /* | ||
2807 | * Flush iclog to disk if this is the last reference to the given iclog and | 2800 | * Flush iclog to disk if this is the last reference to the given iclog and |
2808 | * the WANT_SYNC bit is set. | 2801 | * the WANT_SYNC bit is set. |
2809 | * | 2802 | * |
@@ -2813,33 +2806,35 @@ xlog_state_put_ticket(xlog_t *log, | |||
2813 | * | 2806 | * |
2814 | */ | 2807 | */ |
2815 | STATIC int | 2808 | STATIC int |
2816 | xlog_state_release_iclog(xlog_t *log, | 2809 | xlog_state_release_iclog( |
2817 | xlog_in_core_t *iclog) | 2810 | xlog_t *log, |
2811 | xlog_in_core_t *iclog) | ||
2818 | { | 2812 | { |
2819 | int sync = 0; /* do we sync? */ | 2813 | int sync = 0; /* do we sync? */ |
2820 | 2814 | ||
2821 | xlog_assign_tail_lsn(log->l_mp); | 2815 | if (iclog->ic_state & XLOG_STATE_IOERROR) |
2816 | return XFS_ERROR(EIO); | ||
2822 | 2817 | ||
2823 | spin_lock(&log->l_icloglock); | 2818 | ASSERT(atomic_read(&iclog->ic_refcnt) > 0); |
2819 | if (!atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock)) | ||
2820 | return 0; | ||
2824 | 2821 | ||
2825 | if (iclog->ic_state & XLOG_STATE_IOERROR) { | 2822 | if (iclog->ic_state & XLOG_STATE_IOERROR) { |
2826 | spin_unlock(&log->l_icloglock); | 2823 | spin_unlock(&log->l_icloglock); |
2827 | return XFS_ERROR(EIO); | 2824 | return XFS_ERROR(EIO); |
2828 | } | 2825 | } |
2829 | |||
2830 | ASSERT(iclog->ic_refcnt > 0); | ||
2831 | ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE || | 2826 | ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE || |
2832 | iclog->ic_state == XLOG_STATE_WANT_SYNC); | 2827 | iclog->ic_state == XLOG_STATE_WANT_SYNC); |
2833 | 2828 | ||
2834 | if (--iclog->ic_refcnt == 0 && | 2829 | if (iclog->ic_state == XLOG_STATE_WANT_SYNC) { |
2835 | iclog->ic_state == XLOG_STATE_WANT_SYNC) { | 2830 | /* update tail before writing to iclog */ |
2831 | xlog_assign_tail_lsn(log->l_mp); | ||
2836 | sync++; | 2832 | sync++; |
2837 | iclog->ic_state = XLOG_STATE_SYNCING; | 2833 | iclog->ic_state = XLOG_STATE_SYNCING; |
2838 | iclog->ic_header.h_tail_lsn = cpu_to_be64(log->l_tail_lsn); | 2834 | iclog->ic_header.h_tail_lsn = cpu_to_be64(log->l_tail_lsn); |
2839 | xlog_verify_tail_lsn(log, iclog, log->l_tail_lsn); | 2835 | xlog_verify_tail_lsn(log, iclog, log->l_tail_lsn); |
2840 | /* cycle incremented when incrementing curr_block */ | 2836 | /* cycle incremented when incrementing curr_block */ |
2841 | } | 2837 | } |
2842 | |||
2843 | spin_unlock(&log->l_icloglock); | 2838 | spin_unlock(&log->l_icloglock); |
2844 | 2839 | ||
2845 | /* | 2840 | /* |
@@ -2849,11 +2844,9 @@ xlog_state_release_iclog(xlog_t *log, | |||
2849 | * this iclog has consistent data, so we ignore IOERROR | 2844 | * this iclog has consistent data, so we ignore IOERROR |
2850 | * flags after this point. | 2845 | * flags after this point. |
2851 | */ | 2846 | */ |
2852 | if (sync) { | 2847 | if (sync) |
2853 | return xlog_sync(log, iclog); | 2848 | return xlog_sync(log, iclog); |
2854 | } | ||
2855 | return 0; | 2849 | return 0; |
2856 | |||
2857 | } /* xlog_state_release_iclog */ | 2850 | } /* xlog_state_release_iclog */ |
2858 | 2851 | ||
2859 | 2852 | ||
@@ -2953,7 +2946,8 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed) | |||
2953 | * previous iclog and go to sleep. | 2946 | * previous iclog and go to sleep. |
2954 | */ | 2947 | */ |
2955 | if (iclog->ic_state == XLOG_STATE_DIRTY || | 2948 | if (iclog->ic_state == XLOG_STATE_DIRTY || |
2956 | (iclog->ic_refcnt == 0 && iclog->ic_offset == 0)) { | 2949 | (atomic_read(&iclog->ic_refcnt) == 0 |
2950 | && iclog->ic_offset == 0)) { | ||
2957 | iclog = iclog->ic_prev; | 2951 | iclog = iclog->ic_prev; |
2958 | if (iclog->ic_state == XLOG_STATE_ACTIVE || | 2952 | if (iclog->ic_state == XLOG_STATE_ACTIVE || |
2959 | iclog->ic_state == XLOG_STATE_DIRTY) | 2953 | iclog->ic_state == XLOG_STATE_DIRTY) |
@@ -2961,14 +2955,14 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed) | |||
2961 | else | 2955 | else |
2962 | goto maybe_sleep; | 2956 | goto maybe_sleep; |
2963 | } else { | 2957 | } else { |
2964 | if (iclog->ic_refcnt == 0) { | 2958 | if (atomic_read(&iclog->ic_refcnt) == 0) { |
2965 | /* We are the only one with access to this | 2959 | /* We are the only one with access to this |
2966 | * iclog. Flush it out now. There should | 2960 | * iclog. Flush it out now. There should |
2967 | * be a roundoff of zero to show that someone | 2961 | * be a roundoff of zero to show that someone |
2968 | * has already taken care of the roundoff from | 2962 | * has already taken care of the roundoff from |
2969 | * the previous sync. | 2963 | * the previous sync. |
2970 | */ | 2964 | */ |
2971 | iclog->ic_refcnt++; | 2965 | atomic_inc(&iclog->ic_refcnt); |
2972 | lsn = be64_to_cpu(iclog->ic_header.h_lsn); | 2966 | lsn = be64_to_cpu(iclog->ic_header.h_lsn); |
2973 | xlog_state_switch_iclogs(log, iclog, 0); | 2967 | xlog_state_switch_iclogs(log, iclog, 0); |
2974 | spin_unlock(&log->l_icloglock); | 2968 | spin_unlock(&log->l_icloglock); |
@@ -3100,7 +3094,7 @@ try_again: | |||
3100 | already_slept = 1; | 3094 | already_slept = 1; |
3101 | goto try_again; | 3095 | goto try_again; |
3102 | } else { | 3096 | } else { |
3103 | iclog->ic_refcnt++; | 3097 | atomic_inc(&iclog->ic_refcnt); |
3104 | xlog_state_switch_iclogs(log, iclog, 0); | 3098 | xlog_state_switch_iclogs(log, iclog, 0); |
3105 | spin_unlock(&log->l_icloglock); | 3099 | spin_unlock(&log->l_icloglock); |
3106 | if (xlog_state_release_iclog(log, iclog)) | 3100 | if (xlog_state_release_iclog(log, iclog)) |
@@ -3172,92 +3166,19 @@ xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog) | |||
3172 | */ | 3166 | */ |
3173 | 3167 | ||
3174 | /* | 3168 | /* |
3175 | * Algorithm doesn't take into account page size. ;-( | 3169 | * Free a used ticket. |
3176 | */ | ||
3177 | STATIC void | ||
3178 | xlog_state_ticket_alloc(xlog_t *log) | ||
3179 | { | ||
3180 | xlog_ticket_t *t_list; | ||
3181 | xlog_ticket_t *next; | ||
3182 | xfs_caddr_t buf; | ||
3183 | uint i = (PAGE_SIZE / sizeof(xlog_ticket_t)) - 2; | ||
3184 | |||
3185 | /* | ||
3186 | * The kmem_zalloc may sleep, so we shouldn't be holding the | ||
3187 | * global lock. XXXmiken: may want to use zone allocator. | ||
3188 | */ | ||
3189 | buf = (xfs_caddr_t) kmem_zalloc(PAGE_SIZE, KM_SLEEP); | ||
3190 | |||
3191 | spin_lock(&log->l_icloglock); | ||
3192 | |||
3193 | /* Attach 1st ticket to Q, so we can keep track of allocated memory */ | ||
3194 | t_list = (xlog_ticket_t *)buf; | ||
3195 | t_list->t_next = log->l_unmount_free; | ||
3196 | log->l_unmount_free = t_list++; | ||
3197 | log->l_ticket_cnt++; | ||
3198 | log->l_ticket_tcnt++; | ||
3199 | |||
3200 | /* Next ticket becomes first ticket attached to ticket free list */ | ||
3201 | if (log->l_freelist != NULL) { | ||
3202 | ASSERT(log->l_tail != NULL); | ||
3203 | log->l_tail->t_next = t_list; | ||
3204 | } else { | ||
3205 | log->l_freelist = t_list; | ||
3206 | } | ||
3207 | log->l_ticket_cnt++; | ||
3208 | log->l_ticket_tcnt++; | ||
3209 | |||
3210 | /* Cycle through rest of alloc'ed memory, building up free Q */ | ||
3211 | for ( ; i > 0; i--) { | ||
3212 | next = t_list + 1; | ||
3213 | t_list->t_next = next; | ||
3214 | t_list = next; | ||
3215 | log->l_ticket_cnt++; | ||
3216 | log->l_ticket_tcnt++; | ||
3217 | } | ||
3218 | t_list->t_next = NULL; | ||
3219 | log->l_tail = t_list; | ||
3220 | spin_unlock(&log->l_icloglock); | ||
3221 | } /* xlog_state_ticket_alloc */ | ||
3222 | |||
3223 | |||
3224 | /* | ||
3225 | * Put ticket into free list | ||
3226 | * | ||
3227 | * Assumption: log lock is held around this call. | ||
3228 | */ | 3170 | */ |
3229 | STATIC void | 3171 | STATIC void |
3230 | xlog_ticket_put(xlog_t *log, | 3172 | xlog_ticket_put(xlog_t *log, |
3231 | xlog_ticket_t *ticket) | 3173 | xlog_ticket_t *ticket) |
3232 | { | 3174 | { |
3233 | sv_destroy(&ticket->t_sema); | 3175 | sv_destroy(&ticket->t_sema); |
3234 | 3176 | kmem_zone_free(xfs_log_ticket_zone, ticket); | |
3235 | /* | ||
3236 | * Don't think caching will make that much difference. It's | ||
3237 | * more important to make debug easier. | ||
3238 | */ | ||
3239 | #if 0 | ||
3240 | /* real code will want to use LIFO for caching */ | ||
3241 | ticket->t_next = log->l_freelist; | ||
3242 | log->l_freelist = ticket; | ||
3243 | /* no need to clear fields */ | ||
3244 | #else | ||
3245 | /* When we debug, it is easier if tickets are cycled */ | ||
3246 | ticket->t_next = NULL; | ||
3247 | if (log->l_tail) { | ||
3248 | log->l_tail->t_next = ticket; | ||
3249 | } else { | ||
3250 | ASSERT(log->l_freelist == NULL); | ||
3251 | log->l_freelist = ticket; | ||
3252 | } | ||
3253 | log->l_tail = ticket; | ||
3254 | #endif /* DEBUG */ | ||
3255 | log->l_ticket_cnt++; | ||
3256 | } /* xlog_ticket_put */ | 3177 | } /* xlog_ticket_put */ |
3257 | 3178 | ||
3258 | 3179 | ||
3259 | /* | 3180 | /* |
3260 | * Grab ticket off freelist or allocation some more | 3181 | * Allocate and initialise a new log ticket. |
3261 | */ | 3182 | */ |
3262 | STATIC xlog_ticket_t * | 3183 | STATIC xlog_ticket_t * |
3263 | xlog_ticket_get(xlog_t *log, | 3184 | xlog_ticket_get(xlog_t *log, |
@@ -3269,21 +3190,9 @@ xlog_ticket_get(xlog_t *log, | |||
3269 | xlog_ticket_t *tic; | 3190 | xlog_ticket_t *tic; |
3270 | uint num_headers; | 3191 | uint num_headers; |
3271 | 3192 | ||
3272 | alloc: | 3193 | tic = kmem_zone_zalloc(xfs_log_ticket_zone, KM_SLEEP|KM_MAYFAIL); |
3273 | if (log->l_freelist == NULL) | 3194 | if (!tic) |
3274 | xlog_state_ticket_alloc(log); /* potentially sleep */ | 3195 | return NULL; |
3275 | |||
3276 | spin_lock(&log->l_icloglock); | ||
3277 | if (log->l_freelist == NULL) { | ||
3278 | spin_unlock(&log->l_icloglock); | ||
3279 | goto alloc; | ||
3280 | } | ||
3281 | tic = log->l_freelist; | ||
3282 | log->l_freelist = tic->t_next; | ||
3283 | if (log->l_freelist == NULL) | ||
3284 | log->l_tail = NULL; | ||
3285 | log->l_ticket_cnt--; | ||
3286 | spin_unlock(&log->l_icloglock); | ||
3287 | 3196 | ||
3288 | /* | 3197 | /* |
3289 | * Permanent reservations have up to 'cnt'-1 active log operations | 3198 | * Permanent reservations have up to 'cnt'-1 active log operations |
@@ -3611,8 +3520,8 @@ xfs_log_force_umount( | |||
3611 | * before we mark the filesystem SHUTDOWN and wake | 3520 | * before we mark the filesystem SHUTDOWN and wake |
3612 | * everybody up to tell the bad news. | 3521 | * everybody up to tell the bad news. |
3613 | */ | 3522 | */ |
3614 | spin_lock(&log->l_grant_lock); | ||
3615 | spin_lock(&log->l_icloglock); | 3523 | spin_lock(&log->l_icloglock); |
3524 | spin_lock(&log->l_grant_lock); | ||
3616 | mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; | 3525 | mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; |
3617 | XFS_BUF_DONE(mp->m_sb_bp); | 3526 | XFS_BUF_DONE(mp->m_sb_bp); |
3618 | /* | 3527 | /* |
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 4cdac048df5e..d1d678ecb63e 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h | |||
@@ -142,8 +142,9 @@ int _xfs_log_force(struct xfs_mount *mp, | |||
142 | xfs_lsn_t lsn, | 142 | xfs_lsn_t lsn, |
143 | uint flags, | 143 | uint flags, |
144 | int *log_forced); | 144 | int *log_forced); |
145 | #define xfs_log_force(mp, lsn, flags) \ | 145 | void xfs_log_force(struct xfs_mount *mp, |
146 | _xfs_log_force(mp, lsn, flags, NULL); | 146 | xfs_lsn_t lsn, |
147 | uint flags); | ||
147 | int xfs_log_mount(struct xfs_mount *mp, | 148 | int xfs_log_mount(struct xfs_mount *mp, |
148 | struct xfs_buftarg *log_target, | 149 | struct xfs_buftarg *log_target, |
149 | xfs_daddr_t start_block, | 150 | xfs_daddr_t start_block, |
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index c6244cc733c0..8952a392b5f3 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
@@ -242,7 +242,7 @@ typedef struct xlog_res { | |||
242 | 242 | ||
243 | typedef struct xlog_ticket { | 243 | typedef struct xlog_ticket { |
244 | sv_t t_sema; /* sleep on this semaphore : 20 */ | 244 | sv_t t_sema; /* sleep on this semaphore : 20 */ |
245 | struct xlog_ticket *t_next; /* :4|8 */ | 245 | struct xlog_ticket *t_next; /* :4|8 */ |
246 | struct xlog_ticket *t_prev; /* :4|8 */ | 246 | struct xlog_ticket *t_prev; /* :4|8 */ |
247 | xlog_tid_t t_tid; /* transaction identifier : 4 */ | 247 | xlog_tid_t t_tid; /* transaction identifier : 4 */ |
248 | int t_curr_res; /* current reservation in bytes : 4 */ | 248 | int t_curr_res; /* current reservation in bytes : 4 */ |
@@ -324,6 +324,19 @@ typedef struct xlog_rec_ext_header { | |||
324 | * - ic_offset is the current number of bytes written to in this iclog. | 324 | * - ic_offset is the current number of bytes written to in this iclog. |
325 | * - ic_refcnt is bumped when someone is writing to the log. | 325 | * - ic_refcnt is bumped when someone is writing to the log. |
326 | * - ic_state is the state of the iclog. | 326 | * - ic_state is the state of the iclog. |
327 | * | ||
328 | * Because of cacheline contention on large machines, we need to separate | ||
329 | * various resources onto different cachelines. To start with, make the | ||
330 | * structure cacheline aligned. The following fields can be contended on | ||
331 | * by independent processes: | ||
332 | * | ||
333 | * - ic_callback_* | ||
334 | * - ic_refcnt | ||
335 | * - fields protected by the global l_icloglock | ||
336 | * | ||
337 | * so we need to ensure that these fields are located in separate cachelines. | ||
338 | * We'll put all the read-only and l_icloglock fields in the first cacheline, | ||
339 | * and move everything else out to subsequent cachelines. | ||
327 | */ | 340 | */ |
328 | typedef struct xlog_iclog_fields { | 341 | typedef struct xlog_iclog_fields { |
329 | sv_t ic_forcesema; | 342 | sv_t ic_forcesema; |
@@ -332,17 +345,22 @@ typedef struct xlog_iclog_fields { | |||
332 | struct xlog_in_core *ic_prev; | 345 | struct xlog_in_core *ic_prev; |
333 | struct xfs_buf *ic_bp; | 346 | struct xfs_buf *ic_bp; |
334 | struct log *ic_log; | 347 | struct log *ic_log; |
335 | xfs_log_callback_t *ic_callback; | ||
336 | xfs_log_callback_t **ic_callback_tail; | ||
337 | #ifdef XFS_LOG_TRACE | ||
338 | struct ktrace *ic_trace; | ||
339 | #endif | ||
340 | int ic_size; | 348 | int ic_size; |
341 | int ic_offset; | 349 | int ic_offset; |
342 | int ic_refcnt; | ||
343 | int ic_bwritecnt; | 350 | int ic_bwritecnt; |
344 | ushort_t ic_state; | 351 | ushort_t ic_state; |
345 | char *ic_datap; /* pointer to iclog data */ | 352 | char *ic_datap; /* pointer to iclog data */ |
353 | #ifdef XFS_LOG_TRACE | ||
354 | struct ktrace *ic_trace; | ||
355 | #endif | ||
356 | |||
357 | /* Callback structures need their own cacheline */ | ||
358 | spinlock_t ic_callback_lock ____cacheline_aligned_in_smp; | ||
359 | xfs_log_callback_t *ic_callback; | ||
360 | xfs_log_callback_t **ic_callback_tail; | ||
361 | |||
362 | /* reference counts need their own cacheline */ | ||
363 | atomic_t ic_refcnt ____cacheline_aligned_in_smp; | ||
346 | } xlog_iclog_fields_t; | 364 | } xlog_iclog_fields_t; |
347 | 365 | ||
348 | typedef union xlog_in_core2 { | 366 | typedef union xlog_in_core2 { |
@@ -366,6 +384,7 @@ typedef struct xlog_in_core { | |||
366 | #define ic_bp hic_fields.ic_bp | 384 | #define ic_bp hic_fields.ic_bp |
367 | #define ic_log hic_fields.ic_log | 385 | #define ic_log hic_fields.ic_log |
368 | #define ic_callback hic_fields.ic_callback | 386 | #define ic_callback hic_fields.ic_callback |
387 | #define ic_callback_lock hic_fields.ic_callback_lock | ||
369 | #define ic_callback_tail hic_fields.ic_callback_tail | 388 | #define ic_callback_tail hic_fields.ic_callback_tail |
370 | #define ic_trace hic_fields.ic_trace | 389 | #define ic_trace hic_fields.ic_trace |
371 | #define ic_size hic_fields.ic_size | 390 | #define ic_size hic_fields.ic_size |
@@ -383,43 +402,46 @@ typedef struct xlog_in_core { | |||
383 | * that round off problems won't occur when releasing partial reservations. | 402 | * that round off problems won't occur when releasing partial reservations. |
384 | */ | 403 | */ |
385 | typedef struct log { | 404 | typedef struct log { |
405 | /* The following fields don't need locking */ | ||
406 | struct xfs_mount *l_mp; /* mount point */ | ||
407 | struct xfs_buf *l_xbuf; /* extra buffer for log | ||
408 | * wrapping */ | ||
409 | struct xfs_buftarg *l_targ; /* buftarg of log */ | ||
410 | uint l_flags; | ||
411 | uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */ | ||
412 | struct xfs_buf_cancel **l_buf_cancel_table; | ||
413 | int l_iclog_hsize; /* size of iclog header */ | ||
414 | int l_iclog_heads; /* # of iclog header sectors */ | ||
415 | uint l_sectbb_log; /* log2 of sector size in BBs */ | ||
416 | uint l_sectbb_mask; /* sector size (in BBs) | ||
417 | * alignment mask */ | ||
418 | int l_iclog_size; /* size of log in bytes */ | ||
419 | int l_iclog_size_log; /* log power size of log */ | ||
420 | int l_iclog_bufs; /* number of iclog buffers */ | ||
421 | xfs_daddr_t l_logBBstart; /* start block of log */ | ||
422 | int l_logsize; /* size of log in bytes */ | ||
423 | int l_logBBsize; /* size of log in BB chunks */ | ||
424 | |||
386 | /* The following block of fields are changed while holding icloglock */ | 425 | /* The following block of fields are changed while holding icloglock */ |
387 | sema_t l_flushsema; /* iclog flushing semaphore */ | 426 | sema_t l_flushsema ____cacheline_aligned_in_smp; |
427 | /* iclog flushing semaphore */ | ||
388 | int l_flushcnt; /* # of procs waiting on this | 428 | int l_flushcnt; /* # of procs waiting on this |
389 | * sema */ | 429 | * sema */ |
390 | int l_ticket_cnt; /* free ticket count */ | ||
391 | int l_ticket_tcnt; /* total ticket count */ | ||
392 | int l_covered_state;/* state of "covering disk | 430 | int l_covered_state;/* state of "covering disk |
393 | * log entries" */ | 431 | * log entries" */ |
394 | xlog_ticket_t *l_freelist; /* free list of tickets */ | ||
395 | xlog_ticket_t *l_unmount_free;/* kmem_free these addresses */ | ||
396 | xlog_ticket_t *l_tail; /* free list of tickets */ | ||
397 | xlog_in_core_t *l_iclog; /* head log queue */ | 432 | xlog_in_core_t *l_iclog; /* head log queue */ |
398 | spinlock_t l_icloglock; /* grab to change iclog state */ | 433 | spinlock_t l_icloglock; /* grab to change iclog state */ |
399 | xfs_lsn_t l_tail_lsn; /* lsn of 1st LR with unflushed | 434 | xfs_lsn_t l_tail_lsn; /* lsn of 1st LR with unflushed |
400 | * buffers */ | 435 | * buffers */ |
401 | xfs_lsn_t l_last_sync_lsn;/* lsn of last LR on disk */ | 436 | xfs_lsn_t l_last_sync_lsn;/* lsn of last LR on disk */ |
402 | struct xfs_mount *l_mp; /* mount point */ | ||
403 | struct xfs_buf *l_xbuf; /* extra buffer for log | ||
404 | * wrapping */ | ||
405 | struct xfs_buftarg *l_targ; /* buftarg of log */ | ||
406 | xfs_daddr_t l_logBBstart; /* start block of log */ | ||
407 | int l_logsize; /* size of log in bytes */ | ||
408 | int l_logBBsize; /* size of log in BB chunks */ | ||
409 | int l_curr_cycle; /* Cycle number of log writes */ | 437 | int l_curr_cycle; /* Cycle number of log writes */ |
410 | int l_prev_cycle; /* Cycle number before last | 438 | int l_prev_cycle; /* Cycle number before last |
411 | * block increment */ | 439 | * block increment */ |
412 | int l_curr_block; /* current logical log block */ | 440 | int l_curr_block; /* current logical log block */ |
413 | int l_prev_block; /* previous logical log block */ | 441 | int l_prev_block; /* previous logical log block */ |
414 | int l_iclog_size; /* size of log in bytes */ | ||
415 | int l_iclog_size_log; /* log power size of log */ | ||
416 | int l_iclog_bufs; /* number of iclog buffers */ | ||
417 | |||
418 | /* The following field are used for debugging; need to hold icloglock */ | ||
419 | char *l_iclog_bak[XLOG_MAX_ICLOGS]; | ||
420 | 442 | ||
421 | /* The following block of fields are changed while holding grant_lock */ | 443 | /* The following block of fields are changed while holding grant_lock */ |
422 | spinlock_t l_grant_lock; | 444 | spinlock_t l_grant_lock ____cacheline_aligned_in_smp; |
423 | xlog_ticket_t *l_reserve_headq; | 445 | xlog_ticket_t *l_reserve_headq; |
424 | xlog_ticket_t *l_write_headq; | 446 | xlog_ticket_t *l_write_headq; |
425 | int l_grant_reserve_cycle; | 447 | int l_grant_reserve_cycle; |
@@ -427,19 +449,16 @@ typedef struct log { | |||
427 | int l_grant_write_cycle; | 449 | int l_grant_write_cycle; |
428 | int l_grant_write_bytes; | 450 | int l_grant_write_bytes; |
429 | 451 | ||
430 | /* The following fields don't need locking */ | ||
431 | #ifdef XFS_LOG_TRACE | 452 | #ifdef XFS_LOG_TRACE |
432 | struct ktrace *l_trace; | 453 | struct ktrace *l_trace; |
433 | struct ktrace *l_grant_trace; | 454 | struct ktrace *l_grant_trace; |
434 | #endif | 455 | #endif |
435 | uint l_flags; | 456 | |
436 | uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */ | 457 | /* The following field are used for debugging; need to hold icloglock */ |
437 | struct xfs_buf_cancel **l_buf_cancel_table; | 458 | #ifdef DEBUG |
438 | int l_iclog_hsize; /* size of iclog header */ | 459 | char *l_iclog_bak[XLOG_MAX_ICLOGS]; |
439 | int l_iclog_heads; /* # of iclog header sectors */ | 460 | #endif |
440 | uint l_sectbb_log; /* log2 of sector size in BBs */ | 461 | |
441 | uint l_sectbb_mask; /* sector size (in BBs) | ||
442 | * alignment mask */ | ||
443 | } xlog_t; | 462 | } xlog_t; |
444 | 463 | ||
445 | #define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) | 464 | #define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) |
@@ -459,6 +478,8 @@ extern struct xfs_buf *xlog_get_bp(xlog_t *, int); | |||
459 | extern void xlog_put_bp(struct xfs_buf *); | 478 | extern void xlog_put_bp(struct xfs_buf *); |
460 | extern int xlog_bread(xlog_t *, xfs_daddr_t, int, struct xfs_buf *); | 479 | extern int xlog_bread(xlog_t *, xfs_daddr_t, int, struct xfs_buf *); |
461 | 480 | ||
481 | extern kmem_zone_t *xfs_log_ticket_zone; | ||
482 | |||
462 | /* iclog tracing */ | 483 | /* iclog tracing */ |
463 | #define XLOG_TRACE_GRAB_FLUSH 1 | 484 | #define XLOG_TRACE_GRAB_FLUSH 1 |
464 | #define XLOG_TRACE_REL_FLUSH 2 | 485 | #define XLOG_TRACE_REL_FLUSH 2 |
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index b2b70eba282c..e65ab4af0955 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -46,6 +46,7 @@ | |||
46 | #include "xfs_trans_priv.h" | 46 | #include "xfs_trans_priv.h" |
47 | #include "xfs_quota.h" | 47 | #include "xfs_quota.h" |
48 | #include "xfs_rw.h" | 48 | #include "xfs_rw.h" |
49 | #include "xfs_utils.h" | ||
49 | 50 | ||
50 | STATIC int xlog_find_zeroed(xlog_t *, xfs_daddr_t *); | 51 | STATIC int xlog_find_zeroed(xlog_t *, xfs_daddr_t *); |
51 | STATIC int xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t); | 52 | STATIC int xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t); |
@@ -120,7 +121,8 @@ xlog_bread( | |||
120 | XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); | 121 | XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); |
121 | 122 | ||
122 | xfsbdstrat(log->l_mp, bp); | 123 | xfsbdstrat(log->l_mp, bp); |
123 | if ((error = xfs_iowait(bp))) | 124 | error = xfs_iowait(bp); |
125 | if (error) | ||
124 | xfs_ioerror_alert("xlog_bread", log->l_mp, | 126 | xfs_ioerror_alert("xlog_bread", log->l_mp, |
125 | bp, XFS_BUF_ADDR(bp)); | 127 | bp, XFS_BUF_ADDR(bp)); |
126 | return error; | 128 | return error; |
@@ -191,7 +193,7 @@ xlog_header_check_dump( | |||
191 | { | 193 | { |
192 | int b; | 194 | int b; |
193 | 195 | ||
194 | cmn_err(CE_DEBUG, "%s: SB : uuid = ", __FUNCTION__); | 196 | cmn_err(CE_DEBUG, "%s: SB : uuid = ", __func__); |
195 | for (b = 0; b < 16; b++) | 197 | for (b = 0; b < 16; b++) |
196 | cmn_err(CE_DEBUG, "%02x", ((uchar_t *)&mp->m_sb.sb_uuid)[b]); | 198 | cmn_err(CE_DEBUG, "%02x", ((uchar_t *)&mp->m_sb.sb_uuid)[b]); |
197 | cmn_err(CE_DEBUG, ", fmt = %d\n", XLOG_FMT); | 199 | cmn_err(CE_DEBUG, ", fmt = %d\n", XLOG_FMT); |
@@ -1160,10 +1162,14 @@ xlog_write_log_records( | |||
1160 | if (j == 0 && (start_block + endcount > ealign)) { | 1162 | if (j == 0 && (start_block + endcount > ealign)) { |
1161 | offset = XFS_BUF_PTR(bp); | 1163 | offset = XFS_BUF_PTR(bp); |
1162 | balign = BBTOB(ealign - start_block); | 1164 | balign = BBTOB(ealign - start_block); |
1163 | XFS_BUF_SET_PTR(bp, offset + balign, BBTOB(sectbb)); | 1165 | error = XFS_BUF_SET_PTR(bp, offset + balign, |
1164 | if ((error = xlog_bread(log, ealign, sectbb, bp))) | 1166 | BBTOB(sectbb)); |
1167 | if (!error) | ||
1168 | error = xlog_bread(log, ealign, sectbb, bp); | ||
1169 | if (!error) | ||
1170 | error = XFS_BUF_SET_PTR(bp, offset, bufblks); | ||
1171 | if (error) | ||
1165 | break; | 1172 | break; |
1166 | XFS_BUF_SET_PTR(bp, offset, bufblks); | ||
1167 | } | 1173 | } |
1168 | 1174 | ||
1169 | offset = xlog_align(log, start_block, endcount, bp); | 1175 | offset = xlog_align(log, start_block, endcount, bp); |
@@ -2280,7 +2286,9 @@ xlog_recover_do_inode_trans( | |||
2280 | * invalidate the buffer when we write it out below. | 2286 | * invalidate the buffer when we write it out below. |
2281 | */ | 2287 | */ |
2282 | imap.im_blkno = 0; | 2288 | imap.im_blkno = 0; |
2283 | xfs_imap(log->l_mp, NULL, ino, &imap, 0); | 2289 | error = xfs_imap(log->l_mp, NULL, ino, &imap, 0); |
2290 | if (error) | ||
2291 | goto error; | ||
2284 | } | 2292 | } |
2285 | 2293 | ||
2286 | /* | 2294 | /* |
@@ -2964,7 +2972,7 @@ xlog_recover_process_data( | |||
2964 | * Process an extent free intent item that was recovered from | 2972 | * Process an extent free intent item that was recovered from |
2965 | * the log. We need to free the extents that it describes. | 2973 | * the log. We need to free the extents that it describes. |
2966 | */ | 2974 | */ |
2967 | STATIC void | 2975 | STATIC int |
2968 | xlog_recover_process_efi( | 2976 | xlog_recover_process_efi( |
2969 | xfs_mount_t *mp, | 2977 | xfs_mount_t *mp, |
2970 | xfs_efi_log_item_t *efip) | 2978 | xfs_efi_log_item_t *efip) |
@@ -2972,6 +2980,7 @@ xlog_recover_process_efi( | |||
2972 | xfs_efd_log_item_t *efdp; | 2980 | xfs_efd_log_item_t *efdp; |
2973 | xfs_trans_t *tp; | 2981 | xfs_trans_t *tp; |
2974 | int i; | 2982 | int i; |
2983 | int error = 0; | ||
2975 | xfs_extent_t *extp; | 2984 | xfs_extent_t *extp; |
2976 | xfs_fsblock_t startblock_fsb; | 2985 | xfs_fsblock_t startblock_fsb; |
2977 | 2986 | ||
@@ -2995,23 +3004,32 @@ xlog_recover_process_efi( | |||
2995 | * free the memory associated with it. | 3004 | * free the memory associated with it. |
2996 | */ | 3005 | */ |
2997 | xfs_efi_release(efip, efip->efi_format.efi_nextents); | 3006 | xfs_efi_release(efip, efip->efi_format.efi_nextents); |
2998 | return; | 3007 | return XFS_ERROR(EIO); |
2999 | } | 3008 | } |
3000 | } | 3009 | } |
3001 | 3010 | ||
3002 | tp = xfs_trans_alloc(mp, 0); | 3011 | tp = xfs_trans_alloc(mp, 0); |
3003 | xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 0, 0); | 3012 | error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 0, 0); |
3013 | if (error) | ||
3014 | goto abort_error; | ||
3004 | efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); | 3015 | efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); |
3005 | 3016 | ||
3006 | for (i = 0; i < efip->efi_format.efi_nextents; i++) { | 3017 | for (i = 0; i < efip->efi_format.efi_nextents; i++) { |
3007 | extp = &(efip->efi_format.efi_extents[i]); | 3018 | extp = &(efip->efi_format.efi_extents[i]); |
3008 | xfs_free_extent(tp, extp->ext_start, extp->ext_len); | 3019 | error = xfs_free_extent(tp, extp->ext_start, extp->ext_len); |
3020 | if (error) | ||
3021 | goto abort_error; | ||
3009 | xfs_trans_log_efd_extent(tp, efdp, extp->ext_start, | 3022 | xfs_trans_log_efd_extent(tp, efdp, extp->ext_start, |
3010 | extp->ext_len); | 3023 | extp->ext_len); |
3011 | } | 3024 | } |
3012 | 3025 | ||
3013 | efip->efi_flags |= XFS_EFI_RECOVERED; | 3026 | efip->efi_flags |= XFS_EFI_RECOVERED; |
3014 | xfs_trans_commit(tp, 0); | 3027 | error = xfs_trans_commit(tp, 0); |
3028 | return error; | ||
3029 | |||
3030 | abort_error: | ||
3031 | xfs_trans_cancel(tp, XFS_TRANS_ABORT); | ||
3032 | return error; | ||
3015 | } | 3033 | } |
3016 | 3034 | ||
3017 | /* | 3035 | /* |
@@ -3059,7 +3077,7 @@ xlog_recover_check_ail( | |||
3059 | * everything already in the AIL, we stop processing as soon as | 3077 | * everything already in the AIL, we stop processing as soon as |
3060 | * we see something other than an EFI in the AIL. | 3078 | * we see something other than an EFI in the AIL. |
3061 | */ | 3079 | */ |
3062 | STATIC void | 3080 | STATIC int |
3063 | xlog_recover_process_efis( | 3081 | xlog_recover_process_efis( |
3064 | xlog_t *log) | 3082 | xlog_t *log) |
3065 | { | 3083 | { |
@@ -3067,6 +3085,7 @@ xlog_recover_process_efis( | |||
3067 | xfs_efi_log_item_t *efip; | 3085 | xfs_efi_log_item_t *efip; |
3068 | int gen; | 3086 | int gen; |
3069 | xfs_mount_t *mp; | 3087 | xfs_mount_t *mp; |
3088 | int error = 0; | ||
3070 | 3089 | ||
3071 | mp = log->l_mp; | 3090 | mp = log->l_mp; |
3072 | spin_lock(&mp->m_ail_lock); | 3091 | spin_lock(&mp->m_ail_lock); |
@@ -3091,11 +3110,14 @@ xlog_recover_process_efis( | |||
3091 | } | 3110 | } |
3092 | 3111 | ||
3093 | spin_unlock(&mp->m_ail_lock); | 3112 | spin_unlock(&mp->m_ail_lock); |
3094 | xlog_recover_process_efi(mp, efip); | 3113 | error = xlog_recover_process_efi(mp, efip); |
3114 | if (error) | ||
3115 | return error; | ||
3095 | spin_lock(&mp->m_ail_lock); | 3116 | spin_lock(&mp->m_ail_lock); |
3096 | lip = xfs_trans_next_ail(mp, lip, &gen, NULL); | 3117 | lip = xfs_trans_next_ail(mp, lip, &gen, NULL); |
3097 | } | 3118 | } |
3098 | spin_unlock(&mp->m_ail_lock); | 3119 | spin_unlock(&mp->m_ail_lock); |
3120 | return error; | ||
3099 | } | 3121 | } |
3100 | 3122 | ||
3101 | /* | 3123 | /* |
@@ -3115,21 +3137,18 @@ xlog_recover_clear_agi_bucket( | |||
3115 | int error; | 3137 | int error; |
3116 | 3138 | ||
3117 | tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET); | 3139 | tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET); |
3118 | xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp), 0, 0, 0); | 3140 | error = xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp), 0, 0, 0); |
3119 | 3141 | if (!error) | |
3120 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, | 3142 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, |
3121 | XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), | 3143 | XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), |
3122 | XFS_FSS_TO_BB(mp, 1), 0, &agibp); | 3144 | XFS_FSS_TO_BB(mp, 1), 0, &agibp); |
3123 | if (error) { | 3145 | if (error) |
3124 | xfs_trans_cancel(tp, XFS_TRANS_ABORT); | 3146 | goto out_abort; |
3125 | return; | ||
3126 | } | ||
3127 | 3147 | ||
3148 | error = EINVAL; | ||
3128 | agi = XFS_BUF_TO_AGI(agibp); | 3149 | agi = XFS_BUF_TO_AGI(agibp); |
3129 | if (be32_to_cpu(agi->agi_magicnum) != XFS_AGI_MAGIC) { | 3150 | if (be32_to_cpu(agi->agi_magicnum) != XFS_AGI_MAGIC) |
3130 | xfs_trans_cancel(tp, XFS_TRANS_ABORT); | 3151 | goto out_abort; |
3131 | return; | ||
3132 | } | ||
3133 | 3152 | ||
3134 | agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); | 3153 | agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); |
3135 | offset = offsetof(xfs_agi_t, agi_unlinked) + | 3154 | offset = offsetof(xfs_agi_t, agi_unlinked) + |
@@ -3137,7 +3156,17 @@ xlog_recover_clear_agi_bucket( | |||
3137 | xfs_trans_log_buf(tp, agibp, offset, | 3156 | xfs_trans_log_buf(tp, agibp, offset, |
3138 | (offset + sizeof(xfs_agino_t) - 1)); | 3157 | (offset + sizeof(xfs_agino_t) - 1)); |
3139 | 3158 | ||
3140 | (void) xfs_trans_commit(tp, 0); | 3159 | error = xfs_trans_commit(tp, 0); |
3160 | if (error) | ||
3161 | goto out_error; | ||
3162 | return; | ||
3163 | |||
3164 | out_abort: | ||
3165 | xfs_trans_cancel(tp, XFS_TRANS_ABORT); | ||
3166 | out_error: | ||
3167 | xfs_fs_cmn_err(CE_WARN, mp, "xlog_recover_clear_agi_bucket: " | ||
3168 | "failed to clear agi %d. Continuing.", agno); | ||
3169 | return; | ||
3141 | } | 3170 | } |
3142 | 3171 | ||
3143 | /* | 3172 | /* |
@@ -3214,7 +3243,8 @@ xlog_recover_process_iunlinks( | |||
3214 | * next inode in the bucket. | 3243 | * next inode in the bucket. |
3215 | */ | 3244 | */ |
3216 | error = xfs_itobp(mp, NULL, ip, &dip, | 3245 | error = xfs_itobp(mp, NULL, ip, &dip, |
3217 | &ibp, 0, 0); | 3246 | &ibp, 0, 0, |
3247 | XFS_BUF_LOCK); | ||
3218 | ASSERT(error || (dip != NULL)); | 3248 | ASSERT(error || (dip != NULL)); |
3219 | } | 3249 | } |
3220 | 3250 | ||
@@ -3247,7 +3277,7 @@ xlog_recover_process_iunlinks( | |||
3247 | if (ip->i_d.di_mode == 0) | 3277 | if (ip->i_d.di_mode == 0) |
3248 | xfs_iput_new(ip, 0); | 3278 | xfs_iput_new(ip, 0); |
3249 | else | 3279 | else |
3250 | VN_RELE(XFS_ITOV(ip)); | 3280 | IRELE(ip); |
3251 | } else { | 3281 | } else { |
3252 | /* | 3282 | /* |
3253 | * We can't read in the inode | 3283 | * We can't read in the inode |
@@ -3445,7 +3475,7 @@ xlog_valid_rec_header( | |||
3445 | (!rhead->h_version || | 3475 | (!rhead->h_version || |
3446 | (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) { | 3476 | (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) { |
3447 | xlog_warn("XFS: %s: unrecognised log version (%d).", | 3477 | xlog_warn("XFS: %s: unrecognised log version (%d).", |
3448 | __FUNCTION__, be32_to_cpu(rhead->h_version)); | 3478 | __func__, be32_to_cpu(rhead->h_version)); |
3449 | return XFS_ERROR(EIO); | 3479 | return XFS_ERROR(EIO); |
3450 | } | 3480 | } |
3451 | 3481 | ||
@@ -3604,15 +3634,19 @@ xlog_do_recovery_pass( | |||
3604 | * _first_, then the log start (LR header end) | 3634 | * _first_, then the log start (LR header end) |
3605 | * - order is important. | 3635 | * - order is important. |
3606 | */ | 3636 | */ |
3637 | wrapped_hblks = hblks - split_hblks; | ||
3607 | bufaddr = XFS_BUF_PTR(hbp); | 3638 | bufaddr = XFS_BUF_PTR(hbp); |
3608 | XFS_BUF_SET_PTR(hbp, | 3639 | error = XFS_BUF_SET_PTR(hbp, |
3609 | bufaddr + BBTOB(split_hblks), | 3640 | bufaddr + BBTOB(split_hblks), |
3610 | BBTOB(hblks - split_hblks)); | 3641 | BBTOB(hblks - split_hblks)); |
3611 | wrapped_hblks = hblks - split_hblks; | 3642 | if (!error) |
3612 | error = xlog_bread(log, 0, wrapped_hblks, hbp); | 3643 | error = xlog_bread(log, 0, |
3644 | wrapped_hblks, hbp); | ||
3645 | if (!error) | ||
3646 | error = XFS_BUF_SET_PTR(hbp, bufaddr, | ||
3647 | BBTOB(hblks)); | ||
3613 | if (error) | 3648 | if (error) |
3614 | goto bread_err2; | 3649 | goto bread_err2; |
3615 | XFS_BUF_SET_PTR(hbp, bufaddr, BBTOB(hblks)); | ||
3616 | if (!offset) | 3650 | if (!offset) |
3617 | offset = xlog_align(log, 0, | 3651 | offset = xlog_align(log, 0, |
3618 | wrapped_hblks, hbp); | 3652 | wrapped_hblks, hbp); |
@@ -3664,13 +3698,18 @@ xlog_do_recovery_pass( | |||
3664 | * - order is important. | 3698 | * - order is important. |
3665 | */ | 3699 | */ |
3666 | bufaddr = XFS_BUF_PTR(dbp); | 3700 | bufaddr = XFS_BUF_PTR(dbp); |
3667 | XFS_BUF_SET_PTR(dbp, | 3701 | error = XFS_BUF_SET_PTR(dbp, |
3668 | bufaddr + BBTOB(split_bblks), | 3702 | bufaddr + BBTOB(split_bblks), |
3669 | BBTOB(bblks - split_bblks)); | 3703 | BBTOB(bblks - split_bblks)); |
3670 | if ((error = xlog_bread(log, wrapped_hblks, | 3704 | if (!error) |
3671 | bblks - split_bblks, dbp))) | 3705 | error = xlog_bread(log, wrapped_hblks, |
3706 | bblks - split_bblks, | ||
3707 | dbp); | ||
3708 | if (!error) | ||
3709 | error = XFS_BUF_SET_PTR(dbp, bufaddr, | ||
3710 | h_size); | ||
3711 | if (error) | ||
3672 | goto bread_err2; | 3712 | goto bread_err2; |
3673 | XFS_BUF_SET_PTR(dbp, bufaddr, h_size); | ||
3674 | if (!offset) | 3713 | if (!offset) |
3675 | offset = xlog_align(log, wrapped_hblks, | 3714 | offset = xlog_align(log, wrapped_hblks, |
3676 | bblks - split_bblks, dbp); | 3715 | bblks - split_bblks, dbp); |
@@ -3826,7 +3865,8 @@ xlog_do_recover( | |||
3826 | XFS_BUF_READ(bp); | 3865 | XFS_BUF_READ(bp); |
3827 | XFS_BUF_UNASYNC(bp); | 3866 | XFS_BUF_UNASYNC(bp); |
3828 | xfsbdstrat(log->l_mp, bp); | 3867 | xfsbdstrat(log->l_mp, bp); |
3829 | if ((error = xfs_iowait(bp))) { | 3868 | error = xfs_iowait(bp); |
3869 | if (error) { | ||
3830 | xfs_ioerror_alert("xlog_do_recover", | 3870 | xfs_ioerror_alert("xlog_do_recover", |
3831 | log->l_mp, bp, XFS_BUF_ADDR(bp)); | 3871 | log->l_mp, bp, XFS_BUF_ADDR(bp)); |
3832 | ASSERT(0); | 3872 | ASSERT(0); |
@@ -3917,7 +3957,14 @@ xlog_recover_finish( | |||
3917 | * rather than accepting new requests. | 3957 | * rather than accepting new requests. |
3918 | */ | 3958 | */ |
3919 | if (log->l_flags & XLOG_RECOVERY_NEEDED) { | 3959 | if (log->l_flags & XLOG_RECOVERY_NEEDED) { |
3920 | xlog_recover_process_efis(log); | 3960 | int error; |
3961 | error = xlog_recover_process_efis(log); | ||
3962 | if (error) { | ||
3963 | cmn_err(CE_ALERT, | ||
3964 | "Failed to recover EFIs on filesystem: %s", | ||
3965 | log->l_mp->m_fsname); | ||
3966 | return error; | ||
3967 | } | ||
3921 | /* | 3968 | /* |
3922 | * Sync the log to get all the EFIs out of the AIL. | 3969 | * Sync the log to get all the EFIs out of the AIL. |
3923 | * This isn't absolutely necessary, but it helps in | 3970 | * This isn't absolutely necessary, but it helps in |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 8ed164eb9544..2fec452afbcc 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -43,8 +43,9 @@ | |||
43 | #include "xfs_rw.h" | 43 | #include "xfs_rw.h" |
44 | #include "xfs_quota.h" | 44 | #include "xfs_quota.h" |
45 | #include "xfs_fsops.h" | 45 | #include "xfs_fsops.h" |
46 | #include "xfs_utils.h" | ||
46 | 47 | ||
47 | STATIC void xfs_mount_log_sb(xfs_mount_t *, __int64_t); | 48 | STATIC int xfs_mount_log_sb(xfs_mount_t *, __int64_t); |
48 | STATIC int xfs_uuid_mount(xfs_mount_t *); | 49 | STATIC int xfs_uuid_mount(xfs_mount_t *); |
49 | STATIC void xfs_uuid_unmount(xfs_mount_t *mp); | 50 | STATIC void xfs_uuid_unmount(xfs_mount_t *mp); |
50 | STATIC void xfs_unmountfs_wait(xfs_mount_t *); | 51 | STATIC void xfs_unmountfs_wait(xfs_mount_t *); |
@@ -57,7 +58,7 @@ STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, | |||
57 | STATIC void xfs_icsb_sync_counters(xfs_mount_t *); | 58 | STATIC void xfs_icsb_sync_counters(xfs_mount_t *); |
58 | STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t, | 59 | STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t, |
59 | int64_t, int); | 60 | int64_t, int); |
60 | STATIC int xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t); | 61 | STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t); |
61 | 62 | ||
62 | #else | 63 | #else |
63 | 64 | ||
@@ -956,7 +957,6 @@ xfs_mountfs( | |||
956 | { | 957 | { |
957 | xfs_sb_t *sbp = &(mp->m_sb); | 958 | xfs_sb_t *sbp = &(mp->m_sb); |
958 | xfs_inode_t *rip; | 959 | xfs_inode_t *rip; |
959 | bhv_vnode_t *rvp = NULL; | ||
960 | __uint64_t resblks; | 960 | __uint64_t resblks; |
961 | __int64_t update_flags = 0LL; | 961 | __int64_t update_flags = 0LL; |
962 | uint quotamount, quotaflags; | 962 | uint quotamount, quotaflags; |
@@ -964,11 +964,6 @@ xfs_mountfs( | |||
964 | int uuid_mounted = 0; | 964 | int uuid_mounted = 0; |
965 | int error = 0; | 965 | int error = 0; |
966 | 966 | ||
967 | if (mp->m_sb_bp == NULL) { | ||
968 | error = xfs_readsb(mp, mfsi_flags); | ||
969 | if (error) | ||
970 | return error; | ||
971 | } | ||
972 | xfs_mount_common(mp, sbp); | 967 | xfs_mount_common(mp, sbp); |
973 | 968 | ||
974 | /* | 969 | /* |
@@ -1163,7 +1158,6 @@ xfs_mountfs( | |||
1163 | } | 1158 | } |
1164 | 1159 | ||
1165 | ASSERT(rip != NULL); | 1160 | ASSERT(rip != NULL); |
1166 | rvp = XFS_ITOV(rip); | ||
1167 | 1161 | ||
1168 | if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) { | 1162 | if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) { |
1169 | cmn_err(CE_WARN, "XFS: corrupted root inode"); | 1163 | cmn_err(CE_WARN, "XFS: corrupted root inode"); |
@@ -1195,8 +1189,13 @@ xfs_mountfs( | |||
1195 | /* | 1189 | /* |
1196 | * If fs is not mounted readonly, then update the superblock changes. | 1190 | * If fs is not mounted readonly, then update the superblock changes. |
1197 | */ | 1191 | */ |
1198 | if (update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY)) | 1192 | if (update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY)) { |
1199 | xfs_mount_log_sb(mp, update_flags); | 1193 | error = xfs_mount_log_sb(mp, update_flags); |
1194 | if (error) { | ||
1195 | cmn_err(CE_WARN, "XFS: failed to write sb changes"); | ||
1196 | goto error4; | ||
1197 | } | ||
1198 | } | ||
1200 | 1199 | ||
1201 | /* | 1200 | /* |
1202 | * Initialise the XFS quota management subsystem for this mount | 1201 | * Initialise the XFS quota management subsystem for this mount |
@@ -1233,12 +1232,15 @@ xfs_mountfs( | |||
1233 | * | 1232 | * |
1234 | * We default to 5% or 1024 fsbs of space reserved, whichever is smaller. | 1233 | * We default to 5% or 1024 fsbs of space reserved, whichever is smaller. |
1235 | * This may drive us straight to ENOSPC on mount, but that implies | 1234 | * This may drive us straight to ENOSPC on mount, but that implies |
1236 | * we were already there on the last unmount. | 1235 | * we were already there on the last unmount. Warn if this occurs. |
1237 | */ | 1236 | */ |
1238 | resblks = mp->m_sb.sb_dblocks; | 1237 | resblks = mp->m_sb.sb_dblocks; |
1239 | do_div(resblks, 20); | 1238 | do_div(resblks, 20); |
1240 | resblks = min_t(__uint64_t, resblks, 1024); | 1239 | resblks = min_t(__uint64_t, resblks, 1024); |
1241 | xfs_reserve_blocks(mp, &resblks, NULL); | 1240 | error = xfs_reserve_blocks(mp, &resblks, NULL); |
1241 | if (error) | ||
1242 | cmn_err(CE_WARN, "XFS: Unable to allocate reserve blocks. " | ||
1243 | "Continuing without a reserve pool."); | ||
1242 | 1244 | ||
1243 | return 0; | 1245 | return 0; |
1244 | 1246 | ||
@@ -1246,7 +1248,7 @@ xfs_mountfs( | |||
1246 | /* | 1248 | /* |
1247 | * Free up the root inode. | 1249 | * Free up the root inode. |
1248 | */ | 1250 | */ |
1249 | VN_RELE(rvp); | 1251 | IRELE(rip); |
1250 | error3: | 1252 | error3: |
1251 | xfs_log_unmount_dealloc(mp); | 1253 | xfs_log_unmount_dealloc(mp); |
1252 | error2: | 1254 | error2: |
@@ -1274,6 +1276,7 @@ int | |||
1274 | xfs_unmountfs(xfs_mount_t *mp, struct cred *cr) | 1276 | xfs_unmountfs(xfs_mount_t *mp, struct cred *cr) |
1275 | { | 1277 | { |
1276 | __uint64_t resblks; | 1278 | __uint64_t resblks; |
1279 | int error = 0; | ||
1277 | 1280 | ||
1278 | /* | 1281 | /* |
1279 | * We can potentially deadlock here if we have an inode cluster | 1282 | * We can potentially deadlock here if we have an inode cluster |
@@ -1317,9 +1320,15 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr) | |||
1317 | * value does not matter.... | 1320 | * value does not matter.... |
1318 | */ | 1321 | */ |
1319 | resblks = 0; | 1322 | resblks = 0; |
1320 | xfs_reserve_blocks(mp, &resblks, NULL); | 1323 | error = xfs_reserve_blocks(mp, &resblks, NULL); |
1324 | if (error) | ||
1325 | cmn_err(CE_WARN, "XFS: Unable to free reserved block pool. " | ||
1326 | "Freespace may not be correct on next mount."); | ||
1321 | 1327 | ||
1322 | xfs_log_sbcount(mp, 1); | 1328 | error = xfs_log_sbcount(mp, 1); |
1329 | if (error) | ||
1330 | cmn_err(CE_WARN, "XFS: Unable to update superblock counters. " | ||
1331 | "Freespace may not be correct on next mount."); | ||
1323 | xfs_unmountfs_writesb(mp); | 1332 | xfs_unmountfs_writesb(mp); |
1324 | xfs_unmountfs_wait(mp); /* wait for async bufs */ | 1333 | xfs_unmountfs_wait(mp); /* wait for async bufs */ |
1325 | xfs_log_unmount(mp); /* Done! No more fs ops. */ | 1334 | xfs_log_unmount(mp); /* Done! No more fs ops. */ |
@@ -1411,9 +1420,8 @@ xfs_log_sbcount( | |||
1411 | xfs_mod_sb(tp, XFS_SB_IFREE | XFS_SB_ICOUNT | XFS_SB_FDBLOCKS); | 1420 | xfs_mod_sb(tp, XFS_SB_IFREE | XFS_SB_ICOUNT | XFS_SB_FDBLOCKS); |
1412 | if (sync) | 1421 | if (sync) |
1413 | xfs_trans_set_sync(tp); | 1422 | xfs_trans_set_sync(tp); |
1414 | xfs_trans_commit(tp, 0); | 1423 | error = xfs_trans_commit(tp, 0); |
1415 | 1424 | return error; | |
1416 | return 0; | ||
1417 | } | 1425 | } |
1418 | 1426 | ||
1419 | STATIC void | 1427 | STATIC void |
@@ -1462,7 +1470,6 @@ xfs_unmountfs_writesb(xfs_mount_t *mp) | |||
1462 | XFS_BUF_UNASYNC(sbp); | 1470 | XFS_BUF_UNASYNC(sbp); |
1463 | ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp); | 1471 | ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp); |
1464 | xfsbdstrat(mp, sbp); | 1472 | xfsbdstrat(mp, sbp); |
1465 | /* Nevermind errors we might get here. */ | ||
1466 | error = xfs_iowait(sbp); | 1473 | error = xfs_iowait(sbp); |
1467 | if (error) | 1474 | if (error) |
1468 | xfs_ioerror_alert("xfs_unmountfs_writesb", | 1475 | xfs_ioerror_alert("xfs_unmountfs_writesb", |
@@ -1911,24 +1918,27 @@ xfs_uuid_unmount( | |||
1911 | * be altered by the mount options, as well as any potential sb_features2 | 1918 | * be altered by the mount options, as well as any potential sb_features2 |
1912 | * fixup. Only the first superblock is updated. | 1919 | * fixup. Only the first superblock is updated. |
1913 | */ | 1920 | */ |
1914 | STATIC void | 1921 | STATIC int |
1915 | xfs_mount_log_sb( | 1922 | xfs_mount_log_sb( |
1916 | xfs_mount_t *mp, | 1923 | xfs_mount_t *mp, |
1917 | __int64_t fields) | 1924 | __int64_t fields) |
1918 | { | 1925 | { |
1919 | xfs_trans_t *tp; | 1926 | xfs_trans_t *tp; |
1927 | int error; | ||
1920 | 1928 | ||
1921 | ASSERT(fields & (XFS_SB_UNIT | XFS_SB_WIDTH | XFS_SB_UUID | | 1929 | ASSERT(fields & (XFS_SB_UNIT | XFS_SB_WIDTH | XFS_SB_UUID | |
1922 | XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2)); | 1930 | XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2)); |
1923 | 1931 | ||
1924 | tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT); | 1932 | tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT); |
1925 | if (xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, | 1933 | error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, |
1926 | XFS_DEFAULT_LOG_COUNT)) { | 1934 | XFS_DEFAULT_LOG_COUNT); |
1935 | if (error) { | ||
1927 | xfs_trans_cancel(tp, 0); | 1936 | xfs_trans_cancel(tp, 0); |
1928 | return; | 1937 | return error; |
1929 | } | 1938 | } |
1930 | xfs_mod_sb(tp, fields); | 1939 | xfs_mod_sb(tp, fields); |
1931 | xfs_trans_commit(tp, 0); | 1940 | error = xfs_trans_commit(tp, 0); |
1941 | return error; | ||
1932 | } | 1942 | } |
1933 | 1943 | ||
1934 | 1944 | ||
@@ -2189,7 +2199,7 @@ xfs_icsb_counter_disabled( | |||
2189 | return test_bit(field, &mp->m_icsb_counters); | 2199 | return test_bit(field, &mp->m_icsb_counters); |
2190 | } | 2200 | } |
2191 | 2201 | ||
2192 | STATIC int | 2202 | STATIC void |
2193 | xfs_icsb_disable_counter( | 2203 | xfs_icsb_disable_counter( |
2194 | xfs_mount_t *mp, | 2204 | xfs_mount_t *mp, |
2195 | xfs_sb_field_t field) | 2205 | xfs_sb_field_t field) |
@@ -2207,7 +2217,7 @@ xfs_icsb_disable_counter( | |||
2207 | * the m_icsb_mutex. | 2217 | * the m_icsb_mutex. |
2208 | */ | 2218 | */ |
2209 | if (xfs_icsb_counter_disabled(mp, field)) | 2219 | if (xfs_icsb_counter_disabled(mp, field)) |
2210 | return 0; | 2220 | return; |
2211 | 2221 | ||
2212 | xfs_icsb_lock_all_counters(mp); | 2222 | xfs_icsb_lock_all_counters(mp); |
2213 | if (!test_and_set_bit(field, &mp->m_icsb_counters)) { | 2223 | if (!test_and_set_bit(field, &mp->m_icsb_counters)) { |
@@ -2230,8 +2240,6 @@ xfs_icsb_disable_counter( | |||
2230 | } | 2240 | } |
2231 | 2241 | ||
2232 | xfs_icsb_unlock_all_counters(mp); | 2242 | xfs_icsb_unlock_all_counters(mp); |
2233 | |||
2234 | return 0; | ||
2235 | } | 2243 | } |
2236 | 2244 | ||
2237 | STATIC void | 2245 | STATIC void |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 1d8a4728d847..1ed575110ff0 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -66,17 +66,17 @@ struct xfs_mru_cache; | |||
66 | * Prototypes and functions for the Data Migration subsystem. | 66 | * Prototypes and functions for the Data Migration subsystem. |
67 | */ | 67 | */ |
68 | 68 | ||
69 | typedef int (*xfs_send_data_t)(int, bhv_vnode_t *, | 69 | typedef int (*xfs_send_data_t)(int, struct xfs_inode *, |
70 | xfs_off_t, size_t, int, bhv_vrwlock_t *); | 70 | xfs_off_t, size_t, int, int *); |
71 | typedef int (*xfs_send_mmap_t)(struct vm_area_struct *, uint); | 71 | typedef int (*xfs_send_mmap_t)(struct vm_area_struct *, uint); |
72 | typedef int (*xfs_send_destroy_t)(bhv_vnode_t *, dm_right_t); | 72 | typedef int (*xfs_send_destroy_t)(struct xfs_inode *, dm_right_t); |
73 | typedef int (*xfs_send_namesp_t)(dm_eventtype_t, struct xfs_mount *, | 73 | typedef int (*xfs_send_namesp_t)(dm_eventtype_t, struct xfs_mount *, |
74 | bhv_vnode_t *, | 74 | struct xfs_inode *, dm_right_t, |
75 | dm_right_t, bhv_vnode_t *, dm_right_t, | 75 | struct xfs_inode *, dm_right_t, |
76 | char *, char *, mode_t, int, int); | 76 | const char *, const char *, mode_t, int, int); |
77 | typedef int (*xfs_send_mount_t)(struct xfs_mount *, dm_right_t, | 77 | typedef int (*xfs_send_mount_t)(struct xfs_mount *, dm_right_t, |
78 | char *, char *); | 78 | char *, char *); |
79 | typedef void (*xfs_send_unmount_t)(struct xfs_mount *, bhv_vnode_t *, | 79 | typedef void (*xfs_send_unmount_t)(struct xfs_mount *, struct xfs_inode *, |
80 | dm_right_t, mode_t, int, int); | 80 | dm_right_t, mode_t, int, int); |
81 | 81 | ||
82 | typedef struct xfs_dmops { | 82 | typedef struct xfs_dmops { |
@@ -88,20 +88,20 @@ typedef struct xfs_dmops { | |||
88 | xfs_send_unmount_t xfs_send_unmount; | 88 | xfs_send_unmount_t xfs_send_unmount; |
89 | } xfs_dmops_t; | 89 | } xfs_dmops_t; |
90 | 90 | ||
91 | #define XFS_SEND_DATA(mp, ev,vp,off,len,fl,lock) \ | 91 | #define XFS_SEND_DATA(mp, ev,ip,off,len,fl,lock) \ |
92 | (*(mp)->m_dm_ops->xfs_send_data)(ev,vp,off,len,fl,lock) | 92 | (*(mp)->m_dm_ops->xfs_send_data)(ev,ip,off,len,fl,lock) |
93 | #define XFS_SEND_MMAP(mp, vma,fl) \ | 93 | #define XFS_SEND_MMAP(mp, vma,fl) \ |
94 | (*(mp)->m_dm_ops->xfs_send_mmap)(vma,fl) | 94 | (*(mp)->m_dm_ops->xfs_send_mmap)(vma,fl) |
95 | #define XFS_SEND_DESTROY(mp, vp,right) \ | 95 | #define XFS_SEND_DESTROY(mp, ip,right) \ |
96 | (*(mp)->m_dm_ops->xfs_send_destroy)(vp,right) | 96 | (*(mp)->m_dm_ops->xfs_send_destroy)(ip,right) |
97 | #define XFS_SEND_NAMESP(mp, ev,b1,r1,b2,r2,n1,n2,mode,rval,fl) \ | 97 | #define XFS_SEND_NAMESP(mp, ev,b1,r1,b2,r2,n1,n2,mode,rval,fl) \ |
98 | (*(mp)->m_dm_ops->xfs_send_namesp)(ev,NULL,b1,r1,b2,r2,n1,n2,mode,rval,fl) | 98 | (*(mp)->m_dm_ops->xfs_send_namesp)(ev,NULL,b1,r1,b2,r2,n1,n2,mode,rval,fl) |
99 | #define XFS_SEND_PREUNMOUNT(mp,b1,r1,b2,r2,n1,n2,mode,rval,fl) \ | 99 | #define XFS_SEND_PREUNMOUNT(mp,b1,r1,b2,r2,n1,n2,mode,rval,fl) \ |
100 | (*(mp)->m_dm_ops->xfs_send_namesp)(DM_EVENT_PREUNMOUNT,mp,b1,r1,b2,r2,n1,n2,mode,rval,fl) | 100 | (*(mp)->m_dm_ops->xfs_send_namesp)(DM_EVENT_PREUNMOUNT,mp,b1,r1,b2,r2,n1,n2,mode,rval,fl) |
101 | #define XFS_SEND_MOUNT(mp,right,path,name) \ | 101 | #define XFS_SEND_MOUNT(mp,right,path,name) \ |
102 | (*(mp)->m_dm_ops->xfs_send_mount)(mp,right,path,name) | 102 | (*(mp)->m_dm_ops->xfs_send_mount)(mp,right,path,name) |
103 | #define XFS_SEND_UNMOUNT(mp, vp,right,mode,rval,fl) \ | 103 | #define XFS_SEND_UNMOUNT(mp, ip,right,mode,rval,fl) \ |
104 | (*(mp)->m_dm_ops->xfs_send_unmount)(mp,vp,right,mode,rval,fl) | 104 | (*(mp)->m_dm_ops->xfs_send_unmount)(mp,ip,right,mode,rval,fl) |
105 | 105 | ||
106 | 106 | ||
107 | /* | 107 | /* |
@@ -220,7 +220,7 @@ extern void xfs_icsb_sync_counters_flags(struct xfs_mount *, int); | |||
220 | #endif | 220 | #endif |
221 | 221 | ||
222 | typedef struct xfs_ail { | 222 | typedef struct xfs_ail { |
223 | xfs_ail_entry_t xa_ail; | 223 | struct list_head xa_ail; |
224 | uint xa_gen; | 224 | uint xa_gen; |
225 | struct task_struct *xa_task; | 225 | struct task_struct *xa_task; |
226 | xfs_lsn_t xa_target; | 226 | xfs_lsn_t xa_target; |
@@ -401,7 +401,7 @@ typedef struct xfs_mount { | |||
401 | 401 | ||
402 | /* | 402 | /* |
403 | * Allow large block sizes to be reported to userspace programs if the | 403 | * Allow large block sizes to be reported to userspace programs if the |
404 | * "largeio" mount option is used. | 404 | * "largeio" mount option is used. |
405 | * | 405 | * |
406 | * If compatibility mode is specified, simply return the basic unit of caching | 406 | * If compatibility mode is specified, simply return the basic unit of caching |
407 | * so that we don't get inefficient read/modify/write I/O from user apps. | 407 | * so that we don't get inefficient read/modify/write I/O from user apps. |
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index 7eb157a59f9e..ee371890d85d 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c | |||
@@ -36,7 +36,6 @@ | |||
36 | #include "xfs_bmap.h" | 36 | #include "xfs_bmap.h" |
37 | #include "xfs_error.h" | 37 | #include "xfs_error.h" |
38 | #include "xfs_quota.h" | 38 | #include "xfs_quota.h" |
39 | #include "xfs_refcache.h" | ||
40 | #include "xfs_utils.h" | 39 | #include "xfs_utils.h" |
41 | #include "xfs_trans_space.h" | 40 | #include "xfs_trans_space.h" |
42 | #include "xfs_vnodeops.h" | 41 | #include "xfs_vnodeops.h" |
@@ -84,25 +83,23 @@ int xfs_rename_skip, xfs_rename_nskip; | |||
84 | */ | 83 | */ |
85 | STATIC int | 84 | STATIC int |
86 | xfs_lock_for_rename( | 85 | xfs_lock_for_rename( |
87 | xfs_inode_t *dp1, /* old (source) directory inode */ | 86 | xfs_inode_t *dp1, /* in: old (source) directory inode */ |
88 | xfs_inode_t *dp2, /* new (target) directory inode */ | 87 | xfs_inode_t *dp2, /* in: new (target) directory inode */ |
89 | bhv_vname_t *vname1,/* old entry name */ | 88 | xfs_inode_t *ip1, /* in: inode of old entry */ |
90 | bhv_vname_t *vname2,/* new entry name */ | 89 | struct xfs_name *name2, /* in: new entry name */ |
91 | xfs_inode_t **ipp1, /* inode of old entry */ | 90 | xfs_inode_t **ipp2, /* out: inode of new entry, if it |
92 | xfs_inode_t **ipp2, /* inode of new entry, if it | ||
93 | already exists, NULL otherwise. */ | 91 | already exists, NULL otherwise. */ |
94 | xfs_inode_t **i_tab,/* array of inode returned, sorted */ | 92 | xfs_inode_t **i_tab,/* out: array of inode returned, sorted */ |
95 | int *num_inodes) /* number of inodes in array */ | 93 | int *num_inodes) /* out: number of inodes in array */ |
96 | { | 94 | { |
97 | xfs_inode_t *ip1, *ip2, *temp; | 95 | xfs_inode_t *ip2 = NULL; |
96 | xfs_inode_t *temp; | ||
98 | xfs_ino_t inum1, inum2; | 97 | xfs_ino_t inum1, inum2; |
99 | int error; | 98 | int error; |
100 | int i, j; | 99 | int i, j; |
101 | uint lock_mode; | 100 | uint lock_mode; |
102 | int diff_dirs = (dp1 != dp2); | 101 | int diff_dirs = (dp1 != dp2); |
103 | 102 | ||
104 | ip2 = NULL; | ||
105 | |||
106 | /* | 103 | /* |
107 | * First, find out the current inums of the entries so that we | 104 | * First, find out the current inums of the entries so that we |
108 | * can determine the initial locking order. We'll have to | 105 | * can determine the initial locking order. We'll have to |
@@ -110,27 +107,20 @@ xfs_lock_for_rename( | |||
110 | * to see if we still have the right inodes, directories, etc. | 107 | * to see if we still have the right inodes, directories, etc. |
111 | */ | 108 | */ |
112 | lock_mode = xfs_ilock_map_shared(dp1); | 109 | lock_mode = xfs_ilock_map_shared(dp1); |
113 | error = xfs_get_dir_entry(vname1, &ip1); | 110 | IHOLD(ip1); |
114 | if (error) { | 111 | xfs_itrace_ref(ip1); |
115 | xfs_iunlock_map_shared(dp1, lock_mode); | ||
116 | return error; | ||
117 | } | ||
118 | 112 | ||
119 | inum1 = ip1->i_ino; | 113 | inum1 = ip1->i_ino; |
120 | 114 | ||
121 | ASSERT(ip1); | ||
122 | xfs_itrace_ref(ip1); | ||
123 | |||
124 | /* | 115 | /* |
125 | * Unlock dp1 and lock dp2 if they are different. | 116 | * Unlock dp1 and lock dp2 if they are different. |
126 | */ | 117 | */ |
127 | |||
128 | if (diff_dirs) { | 118 | if (diff_dirs) { |
129 | xfs_iunlock_map_shared(dp1, lock_mode); | 119 | xfs_iunlock_map_shared(dp1, lock_mode); |
130 | lock_mode = xfs_ilock_map_shared(dp2); | 120 | lock_mode = xfs_ilock_map_shared(dp2); |
131 | } | 121 | } |
132 | 122 | ||
133 | error = xfs_dir_lookup_int(dp2, lock_mode, vname2, &inum2, &ip2); | 123 | error = xfs_dir_lookup_int(dp2, lock_mode, name2, &inum2, &ip2); |
134 | if (error == ENOENT) { /* target does not need to exist. */ | 124 | if (error == ENOENT) { /* target does not need to exist. */ |
135 | inum2 = 0; | 125 | inum2 = 0; |
136 | } else if (error) { | 126 | } else if (error) { |
@@ -162,6 +152,7 @@ xfs_lock_for_rename( | |||
162 | *num_inodes = 4; | 152 | *num_inodes = 4; |
163 | i_tab[3] = ip2; | 153 | i_tab[3] = ip2; |
164 | } | 154 | } |
155 | *ipp2 = i_tab[3]; | ||
165 | 156 | ||
166 | /* | 157 | /* |
167 | * Sort the elements via bubble sort. (Remember, there are at | 158 | * Sort the elements via bubble sort. (Remember, there are at |
@@ -199,21 +190,6 @@ xfs_lock_for_rename( | |||
199 | xfs_lock_inodes(i_tab, *num_inodes, 0, XFS_ILOCK_SHARED); | 190 | xfs_lock_inodes(i_tab, *num_inodes, 0, XFS_ILOCK_SHARED); |
200 | } | 191 | } |
201 | 192 | ||
202 | /* | ||
203 | * Set the return value. Null out any unused entries in i_tab. | ||
204 | */ | ||
205 | *ipp1 = *ipp2 = NULL; | ||
206 | for (i=0; i < *num_inodes; i++) { | ||
207 | if (i_tab[i]->i_ino == inum1) { | ||
208 | *ipp1 = i_tab[i]; | ||
209 | } | ||
210 | if (i_tab[i]->i_ino == inum2) { | ||
211 | *ipp2 = i_tab[i]; | ||
212 | } | ||
213 | } | ||
214 | for (;i < 4; i++) { | ||
215 | i_tab[i] = NULL; | ||
216 | } | ||
217 | return 0; | 193 | return 0; |
218 | } | 194 | } |
219 | 195 | ||
@@ -223,13 +199,13 @@ xfs_lock_for_rename( | |||
223 | int | 199 | int |
224 | xfs_rename( | 200 | xfs_rename( |
225 | xfs_inode_t *src_dp, | 201 | xfs_inode_t *src_dp, |
226 | bhv_vname_t *src_vname, | 202 | struct xfs_name *src_name, |
227 | bhv_vnode_t *target_dir_vp, | 203 | xfs_inode_t *src_ip, |
228 | bhv_vname_t *target_vname) | 204 | xfs_inode_t *target_dp, |
205 | struct xfs_name *target_name) | ||
229 | { | 206 | { |
230 | bhv_vnode_t *src_dir_vp = XFS_ITOV(src_dp); | ||
231 | xfs_trans_t *tp; | 207 | xfs_trans_t *tp; |
232 | xfs_inode_t *target_dp, *src_ip, *target_ip; | 208 | xfs_inode_t *target_ip; |
233 | xfs_mount_t *mp = src_dp->i_mount; | 209 | xfs_mount_t *mp = src_dp->i_mount; |
234 | int new_parent; /* moving to a new dir */ | 210 | int new_parent; /* moving to a new dir */ |
235 | int src_is_directory; /* src_name is a directory */ | 211 | int src_is_directory; /* src_name is a directory */ |
@@ -243,29 +219,16 @@ xfs_rename( | |||
243 | int spaceres; | 219 | int spaceres; |
244 | int target_link_zero = 0; | 220 | int target_link_zero = 0; |
245 | int num_inodes; | 221 | int num_inodes; |
246 | char *src_name = VNAME(src_vname); | ||
247 | char *target_name = VNAME(target_vname); | ||
248 | int src_namelen = VNAMELEN(src_vname); | ||
249 | int target_namelen = VNAMELEN(target_vname); | ||
250 | 222 | ||
251 | xfs_itrace_entry(src_dp); | 223 | xfs_itrace_entry(src_dp); |
252 | xfs_itrace_entry(xfs_vtoi(target_dir_vp)); | 224 | xfs_itrace_entry(target_dp); |
253 | |||
254 | /* | ||
255 | * Find the XFS behavior descriptor for the target directory | ||
256 | * vnode since it was not handed to us. | ||
257 | */ | ||
258 | target_dp = xfs_vtoi(target_dir_vp); | ||
259 | if (target_dp == NULL) { | ||
260 | return XFS_ERROR(EXDEV); | ||
261 | } | ||
262 | 225 | ||
263 | if (DM_EVENT_ENABLED(src_dp, DM_EVENT_RENAME) || | 226 | if (DM_EVENT_ENABLED(src_dp, DM_EVENT_RENAME) || |
264 | DM_EVENT_ENABLED(target_dp, DM_EVENT_RENAME)) { | 227 | DM_EVENT_ENABLED(target_dp, DM_EVENT_RENAME)) { |
265 | error = XFS_SEND_NAMESP(mp, DM_EVENT_RENAME, | 228 | error = XFS_SEND_NAMESP(mp, DM_EVENT_RENAME, |
266 | src_dir_vp, DM_RIGHT_NULL, | 229 | src_dp, DM_RIGHT_NULL, |
267 | target_dir_vp, DM_RIGHT_NULL, | 230 | target_dp, DM_RIGHT_NULL, |
268 | src_name, target_name, | 231 | src_name->name, target_name->name, |
269 | 0, 0, 0); | 232 | 0, 0, 0); |
270 | if (error) { | 233 | if (error) { |
271 | return error; | 234 | return error; |
@@ -282,10 +245,8 @@ xfs_rename( | |||
282 | * does not exist in the source directory. | 245 | * does not exist in the source directory. |
283 | */ | 246 | */ |
284 | tp = NULL; | 247 | tp = NULL; |
285 | error = xfs_lock_for_rename(src_dp, target_dp, src_vname, | 248 | error = xfs_lock_for_rename(src_dp, target_dp, src_ip, target_name, |
286 | target_vname, &src_ip, &target_ip, inodes, | 249 | &target_ip, inodes, &num_inodes); |
287 | &num_inodes); | ||
288 | |||
289 | if (error) { | 250 | if (error) { |
290 | /* | 251 | /* |
291 | * We have nothing locked, no inode references, and | 252 | * We have nothing locked, no inode references, and |
@@ -331,7 +292,7 @@ xfs_rename( | |||
331 | XFS_BMAP_INIT(&free_list, &first_block); | 292 | XFS_BMAP_INIT(&free_list, &first_block); |
332 | tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME); | 293 | tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME); |
333 | cancel_flags = XFS_TRANS_RELEASE_LOG_RES; | 294 | cancel_flags = XFS_TRANS_RELEASE_LOG_RES; |
334 | spaceres = XFS_RENAME_SPACE_RES(mp, target_namelen); | 295 | spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len); |
335 | error = xfs_trans_reserve(tp, spaceres, XFS_RENAME_LOG_RES(mp), 0, | 296 | error = xfs_trans_reserve(tp, spaceres, XFS_RENAME_LOG_RES(mp), 0, |
336 | XFS_TRANS_PERM_LOG_RES, XFS_RENAME_LOG_COUNT); | 297 | XFS_TRANS_PERM_LOG_RES, XFS_RENAME_LOG_COUNT); |
337 | if (error == ENOSPC) { | 298 | if (error == ENOSPC) { |
@@ -365,10 +326,10 @@ xfs_rename( | |||
365 | * them when they unlock the inodes. Also, we need to be careful | 326 | * them when they unlock the inodes. Also, we need to be careful |
366 | * not to add an inode to the transaction more than once. | 327 | * not to add an inode to the transaction more than once. |
367 | */ | 328 | */ |
368 | VN_HOLD(src_dir_vp); | 329 | IHOLD(src_dp); |
369 | xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL); | 330 | xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL); |
370 | if (new_parent) { | 331 | if (new_parent) { |
371 | VN_HOLD(target_dir_vp); | 332 | IHOLD(target_dp); |
372 | xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL); | 333 | xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL); |
373 | } | 334 | } |
374 | if ((src_ip != src_dp) && (src_ip != target_dp)) { | 335 | if ((src_ip != src_dp) && (src_ip != target_dp)) { |
@@ -389,9 +350,8 @@ xfs_rename( | |||
389 | * If there's no space reservation, check the entry will | 350 | * If there's no space reservation, check the entry will |
390 | * fit before actually inserting it. | 351 | * fit before actually inserting it. |
391 | */ | 352 | */ |
392 | if (spaceres == 0 && | 353 | error = xfs_dir_canenter(tp, target_dp, target_name, spaceres); |
393 | (error = xfs_dir_canenter(tp, target_dp, target_name, | 354 | if (error) |
394 | target_namelen))) | ||
395 | goto error_return; | 355 | goto error_return; |
396 | /* | 356 | /* |
397 | * If target does not exist and the rename crosses | 357 | * If target does not exist and the rename crosses |
@@ -399,8 +359,8 @@ xfs_rename( | |||
399 | * to account for the ".." reference from the new entry. | 359 | * to account for the ".." reference from the new entry. |
400 | */ | 360 | */ |
401 | error = xfs_dir_createname(tp, target_dp, target_name, | 361 | error = xfs_dir_createname(tp, target_dp, target_name, |
402 | target_namelen, src_ip->i_ino, | 362 | src_ip->i_ino, &first_block, |
403 | &first_block, &free_list, spaceres); | 363 | &free_list, spaceres); |
404 | if (error == ENOSPC) | 364 | if (error == ENOSPC) |
405 | goto error_return; | 365 | goto error_return; |
406 | if (error) | 366 | if (error) |
@@ -439,7 +399,7 @@ xfs_rename( | |||
439 | * name at the destination directory, remove it first. | 399 | * name at the destination directory, remove it first. |
440 | */ | 400 | */ |
441 | error = xfs_dir_replace(tp, target_dp, target_name, | 401 | error = xfs_dir_replace(tp, target_dp, target_name, |
442 | target_namelen, src_ip->i_ino, | 402 | src_ip->i_ino, |
443 | &first_block, &free_list, spaceres); | 403 | &first_block, &free_list, spaceres); |
444 | if (error) | 404 | if (error) |
445 | goto abort_return; | 405 | goto abort_return; |
@@ -476,7 +436,8 @@ xfs_rename( | |||
476 | * Rewrite the ".." entry to point to the new | 436 | * Rewrite the ".." entry to point to the new |
477 | * directory. | 437 | * directory. |
478 | */ | 438 | */ |
479 | error = xfs_dir_replace(tp, src_ip, "..", 2, target_dp->i_ino, | 439 | error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot, |
440 | target_dp->i_ino, | ||
480 | &first_block, &free_list, spaceres); | 441 | &first_block, &free_list, spaceres); |
481 | ASSERT(error != EEXIST); | 442 | ASSERT(error != EEXIST); |
482 | if (error) | 443 | if (error) |
@@ -512,8 +473,8 @@ xfs_rename( | |||
512 | goto abort_return; | 473 | goto abort_return; |
513 | } | 474 | } |
514 | 475 | ||
515 | error = xfs_dir_removename(tp, src_dp, src_name, src_namelen, | 476 | error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino, |
516 | src_ip->i_ino, &first_block, &free_list, spaceres); | 477 | &first_block, &free_list, spaceres); |
517 | if (error) | 478 | if (error) |
518 | goto abort_return; | 479 | goto abort_return; |
519 | xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 480 | xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
@@ -580,10 +541,8 @@ xfs_rename( | |||
580 | * the vnode references. | 541 | * the vnode references. |
581 | */ | 542 | */ |
582 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | 543 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); |
583 | if (target_ip != NULL) { | 544 | if (target_ip != NULL) |
584 | xfs_refcache_purge_ip(target_ip); | ||
585 | IRELE(target_ip); | 545 | IRELE(target_ip); |
586 | } | ||
587 | /* | 546 | /* |
588 | * Let interposed file systems know about removed links. | 547 | * Let interposed file systems know about removed links. |
589 | */ | 548 | */ |
@@ -598,9 +557,9 @@ std_return: | |||
598 | if (DM_EVENT_ENABLED(src_dp, DM_EVENT_POSTRENAME) || | 557 | if (DM_EVENT_ENABLED(src_dp, DM_EVENT_POSTRENAME) || |
599 | DM_EVENT_ENABLED(target_dp, DM_EVENT_POSTRENAME)) { | 558 | DM_EVENT_ENABLED(target_dp, DM_EVENT_POSTRENAME)) { |
600 | (void) XFS_SEND_NAMESP (mp, DM_EVENT_POSTRENAME, | 559 | (void) XFS_SEND_NAMESP (mp, DM_EVENT_POSTRENAME, |
601 | src_dir_vp, DM_RIGHT_NULL, | 560 | src_dp, DM_RIGHT_NULL, |
602 | target_dir_vp, DM_RIGHT_NULL, | 561 | target_dp, DM_RIGHT_NULL, |
603 | src_name, target_name, | 562 | src_name->name, target_name->name, |
604 | 0, error, 0); | 563 | 0, error, 0); |
605 | } | 564 | } |
606 | return error; | 565 | return error; |
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 47082c01872d..a0dc6e5bc5b9 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c | |||
@@ -44,6 +44,7 @@ | |||
44 | #include "xfs_rw.h" | 44 | #include "xfs_rw.h" |
45 | #include "xfs_inode_item.h" | 45 | #include "xfs_inode_item.h" |
46 | #include "xfs_trans_space.h" | 46 | #include "xfs_trans_space.h" |
47 | #include "xfs_utils.h" | ||
47 | 48 | ||
48 | 49 | ||
49 | /* | 50 | /* |
@@ -123,14 +124,14 @@ xfs_growfs_rt_alloc( | |||
123 | XFS_GROWRTALLOC_LOG_RES(mp), 0, | 124 | XFS_GROWRTALLOC_LOG_RES(mp), 0, |
124 | XFS_TRANS_PERM_LOG_RES, | 125 | XFS_TRANS_PERM_LOG_RES, |
125 | XFS_DEFAULT_PERM_LOG_COUNT))) | 126 | XFS_DEFAULT_PERM_LOG_COUNT))) |
126 | goto error_exit; | 127 | goto error_cancel; |
127 | cancelflags = XFS_TRANS_RELEASE_LOG_RES; | 128 | cancelflags = XFS_TRANS_RELEASE_LOG_RES; |
128 | /* | 129 | /* |
129 | * Lock the inode. | 130 | * Lock the inode. |
130 | */ | 131 | */ |
131 | if ((error = xfs_trans_iget(mp, tp, ino, 0, | 132 | if ((error = xfs_trans_iget(mp, tp, ino, 0, |
132 | XFS_ILOCK_EXCL, &ip))) | 133 | XFS_ILOCK_EXCL, &ip))) |
133 | goto error_exit; | 134 | goto error_cancel; |
134 | XFS_BMAP_INIT(&flist, &firstblock); | 135 | XFS_BMAP_INIT(&flist, &firstblock); |
135 | /* | 136 | /* |
136 | * Allocate blocks to the bitmap file. | 137 | * Allocate blocks to the bitmap file. |
@@ -143,14 +144,16 @@ xfs_growfs_rt_alloc( | |||
143 | if (!error && nmap < 1) | 144 | if (!error && nmap < 1) |
144 | error = XFS_ERROR(ENOSPC); | 145 | error = XFS_ERROR(ENOSPC); |
145 | if (error) | 146 | if (error) |
146 | goto error_exit; | 147 | goto error_cancel; |
147 | /* | 148 | /* |
148 | * Free any blocks freed up in the transaction, then commit. | 149 | * Free any blocks freed up in the transaction, then commit. |
149 | */ | 150 | */ |
150 | error = xfs_bmap_finish(&tp, &flist, &committed); | 151 | error = xfs_bmap_finish(&tp, &flist, &committed); |
151 | if (error) | 152 | if (error) |
152 | goto error_exit; | 153 | goto error_cancel; |
153 | xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | 154 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); |
155 | if (error) | ||
156 | goto error; | ||
154 | /* | 157 | /* |
155 | * Now we need to clear the allocated blocks. | 158 | * Now we need to clear the allocated blocks. |
156 | * Do this one block per transaction, to keep it simple. | 159 | * Do this one block per transaction, to keep it simple. |
@@ -165,13 +168,13 @@ xfs_growfs_rt_alloc( | |||
165 | */ | 168 | */ |
166 | if ((error = xfs_trans_reserve(tp, 0, | 169 | if ((error = xfs_trans_reserve(tp, 0, |
167 | XFS_GROWRTZERO_LOG_RES(mp), 0, 0, 0))) | 170 | XFS_GROWRTZERO_LOG_RES(mp), 0, 0, 0))) |
168 | goto error_exit; | 171 | goto error_cancel; |
169 | /* | 172 | /* |
170 | * Lock the bitmap inode. | 173 | * Lock the bitmap inode. |
171 | */ | 174 | */ |
172 | if ((error = xfs_trans_iget(mp, tp, ino, 0, | 175 | if ((error = xfs_trans_iget(mp, tp, ino, 0, |
173 | XFS_ILOCK_EXCL, &ip))) | 176 | XFS_ILOCK_EXCL, &ip))) |
174 | goto error_exit; | 177 | goto error_cancel; |
175 | /* | 178 | /* |
176 | * Get a buffer for the block. | 179 | * Get a buffer for the block. |
177 | */ | 180 | */ |
@@ -180,14 +183,16 @@ xfs_growfs_rt_alloc( | |||
180 | mp->m_bsize, 0); | 183 | mp->m_bsize, 0); |
181 | if (bp == NULL) { | 184 | if (bp == NULL) { |
182 | error = XFS_ERROR(EIO); | 185 | error = XFS_ERROR(EIO); |
183 | goto error_exit; | 186 | goto error_cancel; |
184 | } | 187 | } |
185 | memset(XFS_BUF_PTR(bp), 0, mp->m_sb.sb_blocksize); | 188 | memset(XFS_BUF_PTR(bp), 0, mp->m_sb.sb_blocksize); |
186 | xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1); | 189 | xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1); |
187 | /* | 190 | /* |
188 | * Commit the transaction. | 191 | * Commit the transaction. |
189 | */ | 192 | */ |
190 | xfs_trans_commit(tp, 0); | 193 | error = xfs_trans_commit(tp, 0); |
194 | if (error) | ||
195 | goto error; | ||
191 | } | 196 | } |
192 | /* | 197 | /* |
193 | * Go on to the next extent, if any. | 198 | * Go on to the next extent, if any. |
@@ -195,8 +200,9 @@ xfs_growfs_rt_alloc( | |||
195 | oblocks = map.br_startoff + map.br_blockcount; | 200 | oblocks = map.br_startoff + map.br_blockcount; |
196 | } | 201 | } |
197 | return 0; | 202 | return 0; |
198 | error_exit: | 203 | error_cancel: |
199 | xfs_trans_cancel(tp, cancelflags); | 204 | xfs_trans_cancel(tp, cancelflags); |
205 | error: | ||
200 | return error; | 206 | return error; |
201 | } | 207 | } |
202 | 208 | ||
@@ -1875,6 +1881,7 @@ xfs_growfs_rt( | |||
1875 | xfs_trans_t *tp; /* transaction pointer */ | 1881 | xfs_trans_t *tp; /* transaction pointer */ |
1876 | 1882 | ||
1877 | sbp = &mp->m_sb; | 1883 | sbp = &mp->m_sb; |
1884 | cancelflags = 0; | ||
1878 | /* | 1885 | /* |
1879 | * Initial error checking. | 1886 | * Initial error checking. |
1880 | */ | 1887 | */ |
@@ -2041,13 +2048,15 @@ xfs_growfs_rt( | |||
2041 | */ | 2048 | */ |
2042 | mp->m_rsumlevels = nrsumlevels; | 2049 | mp->m_rsumlevels = nrsumlevels; |
2043 | mp->m_rsumsize = nrsumsize; | 2050 | mp->m_rsumsize = nrsumsize; |
2044 | /* | 2051 | |
2045 | * Commit the transaction. | 2052 | error = xfs_trans_commit(tp, 0); |
2046 | */ | 2053 | if (error) { |
2047 | xfs_trans_commit(tp, 0); | 2054 | tp = NULL; |
2055 | break; | ||
2056 | } | ||
2048 | } | 2057 | } |
2049 | 2058 | ||
2050 | if (error) | 2059 | if (error && tp) |
2051 | xfs_trans_cancel(tp, cancelflags); | 2060 | xfs_trans_cancel(tp, cancelflags); |
2052 | 2061 | ||
2053 | /* | 2062 | /* |
@@ -2278,7 +2287,7 @@ xfs_rtmount_inodes( | |||
2278 | ASSERT(sbp->sb_rsumino != NULLFSINO); | 2287 | ASSERT(sbp->sb_rsumino != NULLFSINO); |
2279 | error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip, 0); | 2288 | error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip, 0); |
2280 | if (error) { | 2289 | if (error) { |
2281 | VN_RELE(XFS_ITOV(mp->m_rbmip)); | 2290 | IRELE(mp->m_rbmip); |
2282 | return error; | 2291 | return error; |
2283 | } | 2292 | } |
2284 | ASSERT(mp->m_rsumip != NULL); | 2293 | ASSERT(mp->m_rsumip != NULL); |
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c index cd3ece6cc918..b0f31c09a76d 100644 --- a/fs/xfs/xfs_rw.c +++ b/fs/xfs/xfs_rw.c | |||
@@ -126,11 +126,11 @@ xfs_write_sync_logforce( | |||
126 | * when we return. | 126 | * when we return. |
127 | */ | 127 | */ |
128 | if (iip && iip->ili_last_lsn) { | 128 | if (iip && iip->ili_last_lsn) { |
129 | xfs_log_force(mp, iip->ili_last_lsn, | 129 | error = _xfs_log_force(mp, iip->ili_last_lsn, |
130 | XFS_LOG_FORCE | XFS_LOG_SYNC); | 130 | XFS_LOG_FORCE | XFS_LOG_SYNC, NULL); |
131 | } else if (xfs_ipincount(ip) > 0) { | 131 | } else if (xfs_ipincount(ip) > 0) { |
132 | xfs_log_force(mp, (xfs_lsn_t)0, | 132 | error = _xfs_log_force(mp, (xfs_lsn_t)0, |
133 | XFS_LOG_FORCE | XFS_LOG_SYNC); | 133 | XFS_LOG_FORCE | XFS_LOG_SYNC, NULL); |
134 | } | 134 | } |
135 | 135 | ||
136 | } else { | 136 | } else { |
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 7f40628d85c7..0804207c7391 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h | |||
@@ -113,13 +113,8 @@ struct xfs_mount; | |||
113 | struct xfs_trans; | 113 | struct xfs_trans; |
114 | struct xfs_dquot_acct; | 114 | struct xfs_dquot_acct; |
115 | 115 | ||
116 | typedef struct xfs_ail_entry { | ||
117 | struct xfs_log_item *ail_forw; /* AIL forw pointer */ | ||
118 | struct xfs_log_item *ail_back; /* AIL back pointer */ | ||
119 | } xfs_ail_entry_t; | ||
120 | |||
121 | typedef struct xfs_log_item { | 116 | typedef struct xfs_log_item { |
122 | xfs_ail_entry_t li_ail; /* AIL pointers */ | 117 | struct list_head li_ail; /* AIL pointers */ |
123 | xfs_lsn_t li_lsn; /* last on-disk lsn */ | 118 | xfs_lsn_t li_lsn; /* last on-disk lsn */ |
124 | struct xfs_log_item_desc *li_desc; /* ptr to current desc*/ | 119 | struct xfs_log_item_desc *li_desc; /* ptr to current desc*/ |
125 | struct xfs_mount *li_mountp; /* ptr to fs mount */ | 120 | struct xfs_mount *li_mountp; /* ptr to fs mount */ |
@@ -341,7 +336,6 @@ typedef struct xfs_trans { | |||
341 | unsigned int t_rtx_res; /* # of rt extents resvd */ | 336 | unsigned int t_rtx_res; /* # of rt extents resvd */ |
342 | unsigned int t_rtx_res_used; /* # of resvd rt extents used */ | 337 | unsigned int t_rtx_res_used; /* # of resvd rt extents used */ |
343 | xfs_log_ticket_t t_ticket; /* log mgr ticket */ | 338 | xfs_log_ticket_t t_ticket; /* log mgr ticket */ |
344 | sema_t t_sema; /* sema for commit completion */ | ||
345 | xfs_lsn_t t_lsn; /* log seq num of start of | 339 | xfs_lsn_t t_lsn; /* log seq num of start of |
346 | * transaction. */ | 340 | * transaction. */ |
347 | xfs_lsn_t t_commit_lsn; /* log seq num of end of | 341 | xfs_lsn_t t_commit_lsn; /* log seq num of end of |
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 76d470d8a1e6..1f77c00af566 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c | |||
@@ -28,13 +28,13 @@ | |||
28 | #include "xfs_trans_priv.h" | 28 | #include "xfs_trans_priv.h" |
29 | #include "xfs_error.h" | 29 | #include "xfs_error.h" |
30 | 30 | ||
31 | STATIC void xfs_ail_insert(xfs_ail_entry_t *, xfs_log_item_t *); | 31 | STATIC void xfs_ail_insert(xfs_ail_t *, xfs_log_item_t *); |
32 | STATIC xfs_log_item_t * xfs_ail_delete(xfs_ail_entry_t *, xfs_log_item_t *); | 32 | STATIC xfs_log_item_t * xfs_ail_delete(xfs_ail_t *, xfs_log_item_t *); |
33 | STATIC xfs_log_item_t * xfs_ail_min(xfs_ail_entry_t *); | 33 | STATIC xfs_log_item_t * xfs_ail_min(xfs_ail_t *); |
34 | STATIC xfs_log_item_t * xfs_ail_next(xfs_ail_entry_t *, xfs_log_item_t *); | 34 | STATIC xfs_log_item_t * xfs_ail_next(xfs_ail_t *, xfs_log_item_t *); |
35 | 35 | ||
36 | #ifdef DEBUG | 36 | #ifdef DEBUG |
37 | STATIC void xfs_ail_check(xfs_ail_entry_t *, xfs_log_item_t *); | 37 | STATIC void xfs_ail_check(xfs_ail_t *, xfs_log_item_t *); |
38 | #else | 38 | #else |
39 | #define xfs_ail_check(a,l) | 39 | #define xfs_ail_check(a,l) |
40 | #endif /* DEBUG */ | 40 | #endif /* DEBUG */ |
@@ -57,7 +57,7 @@ xfs_trans_tail_ail( | |||
57 | xfs_log_item_t *lip; | 57 | xfs_log_item_t *lip; |
58 | 58 | ||
59 | spin_lock(&mp->m_ail_lock); | 59 | spin_lock(&mp->m_ail_lock); |
60 | lip = xfs_ail_min(&(mp->m_ail.xa_ail)); | 60 | lip = xfs_ail_min(&mp->m_ail); |
61 | if (lip == NULL) { | 61 | if (lip == NULL) { |
62 | lsn = (xfs_lsn_t)0; | 62 | lsn = (xfs_lsn_t)0; |
63 | } else { | 63 | } else { |
@@ -91,7 +91,7 @@ xfs_trans_push_ail( | |||
91 | { | 91 | { |
92 | xfs_log_item_t *lip; | 92 | xfs_log_item_t *lip; |
93 | 93 | ||
94 | lip = xfs_ail_min(&mp->m_ail.xa_ail); | 94 | lip = xfs_ail_min(&mp->m_ail); |
95 | if (lip && !XFS_FORCED_SHUTDOWN(mp)) { | 95 | if (lip && !XFS_FORCED_SHUTDOWN(mp)) { |
96 | if (XFS_LSN_CMP(threshold_lsn, mp->m_ail.xa_target) > 0) | 96 | if (XFS_LSN_CMP(threshold_lsn, mp->m_ail.xa_target) > 0) |
97 | xfsaild_wakeup(mp, threshold_lsn); | 97 | xfsaild_wakeup(mp, threshold_lsn); |
@@ -111,15 +111,17 @@ xfs_trans_first_push_ail( | |||
111 | { | 111 | { |
112 | xfs_log_item_t *lip; | 112 | xfs_log_item_t *lip; |
113 | 113 | ||
114 | lip = xfs_ail_min(&(mp->m_ail.xa_ail)); | 114 | lip = xfs_ail_min(&mp->m_ail); |
115 | *gen = (int)mp->m_ail.xa_gen; | 115 | *gen = (int)mp->m_ail.xa_gen; |
116 | if (lsn == 0) | 116 | if (lsn == 0) |
117 | return lip; | 117 | return lip; |
118 | 118 | ||
119 | while (lip && (XFS_LSN_CMP(lip->li_lsn, lsn) < 0)) | 119 | list_for_each_entry(lip, &mp->m_ail.xa_ail, li_ail) { |
120 | lip = lip->li_ail.ail_forw; | 120 | if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0) |
121 | return lip; | ||
122 | } | ||
121 | 123 | ||
122 | return lip; | 124 | return NULL; |
123 | } | 125 | } |
124 | 126 | ||
125 | /* | 127 | /* |
@@ -329,7 +331,7 @@ xfs_trans_unlocked_item( | |||
329 | * the call to xfs_log_move_tail() doesn't do anything if there's | 331 | * the call to xfs_log_move_tail() doesn't do anything if there's |
330 | * not enough free space to wake people up so we're safe calling it. | 332 | * not enough free space to wake people up so we're safe calling it. |
331 | */ | 333 | */ |
332 | min_lip = xfs_ail_min(&mp->m_ail.xa_ail); | 334 | min_lip = xfs_ail_min(&mp->m_ail); |
333 | 335 | ||
334 | if (min_lip == lip) | 336 | if (min_lip == lip) |
335 | xfs_log_move_tail(mp, 1); | 337 | xfs_log_move_tail(mp, 1); |
@@ -357,15 +359,13 @@ xfs_trans_update_ail( | |||
357 | xfs_log_item_t *lip, | 359 | xfs_log_item_t *lip, |
358 | xfs_lsn_t lsn) __releases(mp->m_ail_lock) | 360 | xfs_lsn_t lsn) __releases(mp->m_ail_lock) |
359 | { | 361 | { |
360 | xfs_ail_entry_t *ailp; | ||
361 | xfs_log_item_t *dlip=NULL; | 362 | xfs_log_item_t *dlip=NULL; |
362 | xfs_log_item_t *mlip; /* ptr to minimum lip */ | 363 | xfs_log_item_t *mlip; /* ptr to minimum lip */ |
363 | 364 | ||
364 | ailp = &(mp->m_ail.xa_ail); | 365 | mlip = xfs_ail_min(&mp->m_ail); |
365 | mlip = xfs_ail_min(ailp); | ||
366 | 366 | ||
367 | if (lip->li_flags & XFS_LI_IN_AIL) { | 367 | if (lip->li_flags & XFS_LI_IN_AIL) { |
368 | dlip = xfs_ail_delete(ailp, lip); | 368 | dlip = xfs_ail_delete(&mp->m_ail, lip); |
369 | ASSERT(dlip == lip); | 369 | ASSERT(dlip == lip); |
370 | } else { | 370 | } else { |
371 | lip->li_flags |= XFS_LI_IN_AIL; | 371 | lip->li_flags |= XFS_LI_IN_AIL; |
@@ -373,11 +373,11 @@ xfs_trans_update_ail( | |||
373 | 373 | ||
374 | lip->li_lsn = lsn; | 374 | lip->li_lsn = lsn; |
375 | 375 | ||
376 | xfs_ail_insert(ailp, lip); | 376 | xfs_ail_insert(&mp->m_ail, lip); |
377 | mp->m_ail.xa_gen++; | 377 | mp->m_ail.xa_gen++; |
378 | 378 | ||
379 | if (mlip == dlip) { | 379 | if (mlip == dlip) { |
380 | mlip = xfs_ail_min(&(mp->m_ail.xa_ail)); | 380 | mlip = xfs_ail_min(&mp->m_ail); |
381 | spin_unlock(&mp->m_ail_lock); | 381 | spin_unlock(&mp->m_ail_lock); |
382 | xfs_log_move_tail(mp, mlip->li_lsn); | 382 | xfs_log_move_tail(mp, mlip->li_lsn); |
383 | } else { | 383 | } else { |
@@ -407,14 +407,12 @@ xfs_trans_delete_ail( | |||
407 | xfs_mount_t *mp, | 407 | xfs_mount_t *mp, |
408 | xfs_log_item_t *lip) __releases(mp->m_ail_lock) | 408 | xfs_log_item_t *lip) __releases(mp->m_ail_lock) |
409 | { | 409 | { |
410 | xfs_ail_entry_t *ailp; | ||
411 | xfs_log_item_t *dlip; | 410 | xfs_log_item_t *dlip; |
412 | xfs_log_item_t *mlip; | 411 | xfs_log_item_t *mlip; |
413 | 412 | ||
414 | if (lip->li_flags & XFS_LI_IN_AIL) { | 413 | if (lip->li_flags & XFS_LI_IN_AIL) { |
415 | ailp = &(mp->m_ail.xa_ail); | 414 | mlip = xfs_ail_min(&mp->m_ail); |
416 | mlip = xfs_ail_min(ailp); | 415 | dlip = xfs_ail_delete(&mp->m_ail, lip); |
417 | dlip = xfs_ail_delete(ailp, lip); | ||
418 | ASSERT(dlip == lip); | 416 | ASSERT(dlip == lip); |
419 | 417 | ||
420 | 418 | ||
@@ -423,7 +421,7 @@ xfs_trans_delete_ail( | |||
423 | mp->m_ail.xa_gen++; | 421 | mp->m_ail.xa_gen++; |
424 | 422 | ||
425 | if (mlip == dlip) { | 423 | if (mlip == dlip) { |
426 | mlip = xfs_ail_min(&(mp->m_ail.xa_ail)); | 424 | mlip = xfs_ail_min(&mp->m_ail); |
427 | spin_unlock(&mp->m_ail_lock); | 425 | spin_unlock(&mp->m_ail_lock); |
428 | xfs_log_move_tail(mp, (mlip ? mlip->li_lsn : 0)); | 426 | xfs_log_move_tail(mp, (mlip ? mlip->li_lsn : 0)); |
429 | } else { | 427 | } else { |
@@ -440,7 +438,7 @@ xfs_trans_delete_ail( | |||
440 | else { | 438 | else { |
441 | xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, | 439 | xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, |
442 | "%s: attempting to delete a log item that is not in the AIL", | 440 | "%s: attempting to delete a log item that is not in the AIL", |
443 | __FUNCTION__); | 441 | __func__); |
444 | spin_unlock(&mp->m_ail_lock); | 442 | spin_unlock(&mp->m_ail_lock); |
445 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | 443 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); |
446 | } | 444 | } |
@@ -461,7 +459,7 @@ xfs_trans_first_ail( | |||
461 | { | 459 | { |
462 | xfs_log_item_t *lip; | 460 | xfs_log_item_t *lip; |
463 | 461 | ||
464 | lip = xfs_ail_min(&(mp->m_ail.xa_ail)); | 462 | lip = xfs_ail_min(&mp->m_ail); |
465 | *gen = (int)mp->m_ail.xa_gen; | 463 | *gen = (int)mp->m_ail.xa_gen; |
466 | 464 | ||
467 | return lip; | 465 | return lip; |
@@ -485,9 +483,9 @@ xfs_trans_next_ail( | |||
485 | 483 | ||
486 | ASSERT(mp && lip && gen); | 484 | ASSERT(mp && lip && gen); |
487 | if (mp->m_ail.xa_gen == *gen) { | 485 | if (mp->m_ail.xa_gen == *gen) { |
488 | nlip = xfs_ail_next(&(mp->m_ail.xa_ail), lip); | 486 | nlip = xfs_ail_next(&mp->m_ail, lip); |
489 | } else { | 487 | } else { |
490 | nlip = xfs_ail_min(&(mp->m_ail).xa_ail); | 488 | nlip = xfs_ail_min(&mp->m_ail); |
491 | *gen = (int)mp->m_ail.xa_gen; | 489 | *gen = (int)mp->m_ail.xa_gen; |
492 | if (restarts != NULL) { | 490 | if (restarts != NULL) { |
493 | XFS_STATS_INC(xs_push_ail_restarts); | 491 | XFS_STATS_INC(xs_push_ail_restarts); |
@@ -517,8 +515,7 @@ int | |||
517 | xfs_trans_ail_init( | 515 | xfs_trans_ail_init( |
518 | xfs_mount_t *mp) | 516 | xfs_mount_t *mp) |
519 | { | 517 | { |
520 | mp->m_ail.xa_ail.ail_forw = (xfs_log_item_t*)&mp->m_ail.xa_ail; | 518 | INIT_LIST_HEAD(&mp->m_ail.xa_ail); |
521 | mp->m_ail.xa_ail.ail_back = (xfs_log_item_t*)&mp->m_ail.xa_ail; | ||
522 | return xfsaild_start(mp); | 519 | return xfsaild_start(mp); |
523 | } | 520 | } |
524 | 521 | ||
@@ -537,7 +534,7 @@ xfs_trans_ail_destroy( | |||
537 | */ | 534 | */ |
538 | STATIC void | 535 | STATIC void |
539 | xfs_ail_insert( | 536 | xfs_ail_insert( |
540 | xfs_ail_entry_t *base, | 537 | xfs_ail_t *ailp, |
541 | xfs_log_item_t *lip) | 538 | xfs_log_item_t *lip) |
542 | /* ARGSUSED */ | 539 | /* ARGSUSED */ |
543 | { | 540 | { |
@@ -546,27 +543,22 @@ xfs_ail_insert( | |||
546 | /* | 543 | /* |
547 | * If the list is empty, just insert the item. | 544 | * If the list is empty, just insert the item. |
548 | */ | 545 | */ |
549 | if (base->ail_back == (xfs_log_item_t*)base) { | 546 | if (list_empty(&ailp->xa_ail)) { |
550 | base->ail_forw = lip; | 547 | list_add(&lip->li_ail, &ailp->xa_ail); |
551 | base->ail_back = lip; | ||
552 | lip->li_ail.ail_forw = (xfs_log_item_t*)base; | ||
553 | lip->li_ail.ail_back = (xfs_log_item_t*)base; | ||
554 | return; | 548 | return; |
555 | } | 549 | } |
556 | 550 | ||
557 | next_lip = base->ail_back; | 551 | list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) { |
558 | while ((next_lip != (xfs_log_item_t*)base) && | 552 | if (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0) |
559 | (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) > 0)) { | 553 | break; |
560 | next_lip = next_lip->li_ail.ail_back; | ||
561 | } | 554 | } |
562 | ASSERT((next_lip == (xfs_log_item_t*)base) || | 555 | |
556 | ASSERT((&next_lip->li_ail == &ailp->xa_ail) || | ||
563 | (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0)); | 557 | (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0)); |
564 | lip->li_ail.ail_forw = next_lip->li_ail.ail_forw; | ||
565 | lip->li_ail.ail_back = next_lip; | ||
566 | next_lip->li_ail.ail_forw = lip; | ||
567 | lip->li_ail.ail_forw->li_ail.ail_back = lip; | ||
568 | 558 | ||
569 | xfs_ail_check(base, lip); | 559 | list_add(&lip->li_ail, &next_lip->li_ail); |
560 | |||
561 | xfs_ail_check(ailp, lip); | ||
570 | return; | 562 | return; |
571 | } | 563 | } |
572 | 564 | ||
@@ -576,15 +568,13 @@ xfs_ail_insert( | |||
576 | /*ARGSUSED*/ | 568 | /*ARGSUSED*/ |
577 | STATIC xfs_log_item_t * | 569 | STATIC xfs_log_item_t * |
578 | xfs_ail_delete( | 570 | xfs_ail_delete( |
579 | xfs_ail_entry_t *base, | 571 | xfs_ail_t *ailp, |
580 | xfs_log_item_t *lip) | 572 | xfs_log_item_t *lip) |
581 | /* ARGSUSED */ | 573 | /* ARGSUSED */ |
582 | { | 574 | { |
583 | xfs_ail_check(base, lip); | 575 | xfs_ail_check(ailp, lip); |
584 | lip->li_ail.ail_forw->li_ail.ail_back = lip->li_ail.ail_back; | 576 | |
585 | lip->li_ail.ail_back->li_ail.ail_forw = lip->li_ail.ail_forw; | 577 | list_del(&lip->li_ail); |
586 | lip->li_ail.ail_forw = NULL; | ||
587 | lip->li_ail.ail_back = NULL; | ||
588 | 578 | ||
589 | return lip; | 579 | return lip; |
590 | } | 580 | } |
@@ -595,14 +585,13 @@ xfs_ail_delete( | |||
595 | */ | 585 | */ |
596 | STATIC xfs_log_item_t * | 586 | STATIC xfs_log_item_t * |
597 | xfs_ail_min( | 587 | xfs_ail_min( |
598 | xfs_ail_entry_t *base) | 588 | xfs_ail_t *ailp) |
599 | /* ARGSUSED */ | 589 | /* ARGSUSED */ |
600 | { | 590 | { |
601 | register xfs_log_item_t *forw = base->ail_forw; | 591 | if (list_empty(&ailp->xa_ail)) |
602 | if (forw == (xfs_log_item_t*)base) { | ||
603 | return NULL; | 592 | return NULL; |
604 | } | 593 | |
605 | return forw; | 594 | return list_first_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); |
606 | } | 595 | } |
607 | 596 | ||
608 | /* | 597 | /* |
@@ -612,15 +601,14 @@ xfs_ail_min( | |||
612 | */ | 601 | */ |
613 | STATIC xfs_log_item_t * | 602 | STATIC xfs_log_item_t * |
614 | xfs_ail_next( | 603 | xfs_ail_next( |
615 | xfs_ail_entry_t *base, | 604 | xfs_ail_t *ailp, |
616 | xfs_log_item_t *lip) | 605 | xfs_log_item_t *lip) |
617 | /* ARGSUSED */ | 606 | /* ARGSUSED */ |
618 | { | 607 | { |
619 | if (lip->li_ail.ail_forw == (xfs_log_item_t*)base) { | 608 | if (lip->li_ail.next == &ailp->xa_ail) |
620 | return NULL; | 609 | return NULL; |
621 | } | ||
622 | return lip->li_ail.ail_forw; | ||
623 | 610 | ||
611 | return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail); | ||
624 | } | 612 | } |
625 | 613 | ||
626 | #ifdef DEBUG | 614 | #ifdef DEBUG |
@@ -629,57 +617,40 @@ xfs_ail_next( | |||
629 | */ | 617 | */ |
630 | STATIC void | 618 | STATIC void |
631 | xfs_ail_check( | 619 | xfs_ail_check( |
632 | xfs_ail_entry_t *base, | 620 | xfs_ail_t *ailp, |
633 | xfs_log_item_t *lip) | 621 | xfs_log_item_t *lip) |
634 | { | 622 | { |
635 | xfs_log_item_t *prev_lip; | 623 | xfs_log_item_t *prev_lip; |
636 | 624 | ||
637 | prev_lip = base->ail_forw; | 625 | if (list_empty(&ailp->xa_ail)) |
638 | if (prev_lip == (xfs_log_item_t*)base) { | ||
639 | /* | ||
640 | * Make sure the pointers are correct when the list | ||
641 | * is empty. | ||
642 | */ | ||
643 | ASSERT(base->ail_back == (xfs_log_item_t*)base); | ||
644 | return; | 626 | return; |
645 | } | ||
646 | 627 | ||
647 | /* | 628 | /* |
648 | * Check the next and previous entries are valid. | 629 | * Check the next and previous entries are valid. |
649 | */ | 630 | */ |
650 | ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); | 631 | ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); |
651 | prev_lip = lip->li_ail.ail_back; | 632 | prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail); |
652 | if (prev_lip != (xfs_log_item_t*)base) { | 633 | if (&prev_lip->li_ail != &ailp->xa_ail) |
653 | ASSERT(prev_lip->li_ail.ail_forw == lip); | ||
654 | ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); | 634 | ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); |
655 | } | 635 | |
656 | prev_lip = lip->li_ail.ail_forw; | 636 | prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail); |
657 | if (prev_lip != (xfs_log_item_t*)base) { | 637 | if (&prev_lip->li_ail != &ailp->xa_ail) |
658 | ASSERT(prev_lip->li_ail.ail_back == lip); | ||
659 | ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0); | 638 | ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0); |
660 | } | ||
661 | 639 | ||
662 | 640 | ||
663 | #ifdef XFS_TRANS_DEBUG | 641 | #ifdef XFS_TRANS_DEBUG |
664 | /* | 642 | /* |
665 | * Walk the list checking forward and backward pointers, | 643 | * Walk the list checking lsn ordering, and that every entry has the |
666 | * lsn ordering, and that every entry has the XFS_LI_IN_AIL | 644 | * XFS_LI_IN_AIL flag set. This is really expensive, so only do it |
667 | * flag set. This is really expensive, so only do it when | 645 | * when specifically debugging the transaction subsystem. |
668 | * specifically debugging the transaction subsystem. | ||
669 | */ | 646 | */ |
670 | prev_lip = (xfs_log_item_t*)base; | 647 | prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); |
671 | while (lip != (xfs_log_item_t*)base) { | 648 | list_for_each_entry(lip, &ailp->xa_ail, li_ail) { |
672 | if (prev_lip != (xfs_log_item_t*)base) { | 649 | if (&prev_lip->li_ail != &ailp->xa_ail) |
673 | ASSERT(prev_lip->li_ail.ail_forw == lip); | ||
674 | ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); | 650 | ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); |
675 | } | ||
676 | ASSERT(lip->li_ail.ail_back == prev_lip); | ||
677 | ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); | 651 | ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); |
678 | prev_lip = lip; | 652 | prev_lip = lip; |
679 | lip = lip->li_ail.ail_forw; | ||
680 | } | 653 | } |
681 | ASSERT(lip == (xfs_log_item_t*)base); | ||
682 | ASSERT(base->ail_back == prev_lip); | ||
683 | #endif /* XFS_TRANS_DEBUG */ | 654 | #endif /* XFS_TRANS_DEBUG */ |
684 | } | 655 | } |
685 | #endif /* DEBUG */ | 656 | #endif /* DEBUG */ |
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 60b6b898022b..cb0c5839154b 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c | |||
@@ -304,7 +304,8 @@ xfs_trans_read_buf( | |||
304 | if (tp == NULL) { | 304 | if (tp == NULL) { |
305 | bp = xfs_buf_read_flags(target, blkno, len, flags | BUF_BUSY); | 305 | bp = xfs_buf_read_flags(target, blkno, len, flags | BUF_BUSY); |
306 | if (!bp) | 306 | if (!bp) |
307 | return XFS_ERROR(ENOMEM); | 307 | return (flags & XFS_BUF_TRYLOCK) ? |
308 | EAGAIN : XFS_ERROR(ENOMEM); | ||
308 | 309 | ||
309 | if ((bp != NULL) && (XFS_BUF_GETERROR(bp) != 0)) { | 310 | if ((bp != NULL) && (XFS_BUF_GETERROR(bp) != 0)) { |
310 | xfs_ioerror_alert("xfs_trans_read_buf", mp, | 311 | xfs_ioerror_alert("xfs_trans_read_buf", mp, |
@@ -353,17 +354,15 @@ xfs_trans_read_buf( | |||
353 | ASSERT(!XFS_BUF_ISASYNC(bp)); | 354 | ASSERT(!XFS_BUF_ISASYNC(bp)); |
354 | XFS_BUF_READ(bp); | 355 | XFS_BUF_READ(bp); |
355 | xfsbdstrat(tp->t_mountp, bp); | 356 | xfsbdstrat(tp->t_mountp, bp); |
356 | xfs_iowait(bp); | 357 | error = xfs_iowait(bp); |
357 | if (XFS_BUF_GETERROR(bp) != 0) { | 358 | if (error) { |
358 | xfs_ioerror_alert("xfs_trans_read_buf", mp, | 359 | xfs_ioerror_alert("xfs_trans_read_buf", mp, |
359 | bp, blkno); | 360 | bp, blkno); |
360 | error = XFS_BUF_GETERROR(bp); | ||
361 | xfs_buf_relse(bp); | 361 | xfs_buf_relse(bp); |
362 | /* | 362 | /* |
363 | * We can gracefully recover from most | 363 | * We can gracefully recover from most read |
364 | * read errors. Ones we can't are those | 364 | * errors. Ones we can't are those that happen |
365 | * that happen after the transaction's | 365 | * after the transaction's already dirty. |
366 | * already dirty. | ||
367 | */ | 366 | */ |
368 | if (tp->t_flags & XFS_TRANS_DIRTY) | 367 | if (tp->t_flags & XFS_TRANS_DIRTY) |
369 | xfs_force_shutdown(tp->t_mountp, | 368 | xfs_force_shutdown(tp->t_mountp, |
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h index 5c89be475464..0f5191644ab2 100644 --- a/fs/xfs/xfs_types.h +++ b/fs/xfs/xfs_types.h | |||
@@ -160,4 +160,9 @@ typedef enum { | |||
160 | XFS_BTNUM_MAX | 160 | XFS_BTNUM_MAX |
161 | } xfs_btnum_t; | 161 | } xfs_btnum_t; |
162 | 162 | ||
163 | struct xfs_name { | ||
164 | const char *name; | ||
165 | int len; | ||
166 | }; | ||
167 | |||
163 | #endif /* __XFS_TYPES_H__ */ | 168 | #endif /* __XFS_TYPES_H__ */ |
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c index 18a85e746680..2b8dc7e40772 100644 --- a/fs/xfs/xfs_utils.c +++ b/fs/xfs/xfs_utils.c | |||
@@ -40,34 +40,12 @@ | |||
40 | #include "xfs_itable.h" | 40 | #include "xfs_itable.h" |
41 | #include "xfs_utils.h" | 41 | #include "xfs_utils.h" |
42 | 42 | ||
43 | /* | ||
44 | * xfs_get_dir_entry is used to get a reference to an inode given | ||
45 | * its parent directory inode and the name of the file. It does | ||
46 | * not lock the child inode, and it unlocks the directory before | ||
47 | * returning. The directory's generation number is returned for | ||
48 | * use by a later call to xfs_lock_dir_and_entry. | ||
49 | */ | ||
50 | int | ||
51 | xfs_get_dir_entry( | ||
52 | bhv_vname_t *dentry, | ||
53 | xfs_inode_t **ipp) | ||
54 | { | ||
55 | bhv_vnode_t *vp; | ||
56 | |||
57 | vp = VNAME_TO_VNODE(dentry); | ||
58 | |||
59 | *ipp = xfs_vtoi(vp); | ||
60 | if (!*ipp) | ||
61 | return XFS_ERROR(ENOENT); | ||
62 | VN_HOLD(vp); | ||
63 | return 0; | ||
64 | } | ||
65 | 43 | ||
66 | int | 44 | int |
67 | xfs_dir_lookup_int( | 45 | xfs_dir_lookup_int( |
68 | xfs_inode_t *dp, | 46 | xfs_inode_t *dp, |
69 | uint lock_mode, | 47 | uint lock_mode, |
70 | bhv_vname_t *dentry, | 48 | struct xfs_name *name, |
71 | xfs_ino_t *inum, | 49 | xfs_ino_t *inum, |
72 | xfs_inode_t **ipp) | 50 | xfs_inode_t **ipp) |
73 | { | 51 | { |
@@ -75,7 +53,7 @@ xfs_dir_lookup_int( | |||
75 | 53 | ||
76 | xfs_itrace_entry(dp); | 54 | xfs_itrace_entry(dp); |
77 | 55 | ||
78 | error = xfs_dir_lookup(NULL, dp, VNAME(dentry), VNAMELEN(dentry), inum); | 56 | error = xfs_dir_lookup(NULL, dp, name, inum); |
79 | if (!error) { | 57 | if (!error) { |
80 | /* | 58 | /* |
81 | * Unlock the directory. We do this because we can't | 59 | * Unlock the directory. We do this because we can't |
diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h index f857fcccb723..175b126d2cab 100644 --- a/fs/xfs/xfs_utils.h +++ b/fs/xfs/xfs_utils.h | |||
@@ -21,15 +21,14 @@ | |||
21 | #define IRELE(ip) VN_RELE(XFS_ITOV(ip)) | 21 | #define IRELE(ip) VN_RELE(XFS_ITOV(ip)) |
22 | #define IHOLD(ip) VN_HOLD(XFS_ITOV(ip)) | 22 | #define IHOLD(ip) VN_HOLD(XFS_ITOV(ip)) |
23 | 23 | ||
24 | extern int xfs_get_dir_entry (bhv_vname_t *, xfs_inode_t **); | 24 | extern int xfs_dir_lookup_int(xfs_inode_t *, uint, struct xfs_name *, |
25 | extern int xfs_dir_lookup_int (xfs_inode_t *, uint, bhv_vname_t *, xfs_ino_t *, | 25 | xfs_ino_t *, xfs_inode_t **); |
26 | xfs_inode_t **); | 26 | extern int xfs_truncate_file(xfs_mount_t *, xfs_inode_t *); |
27 | extern int xfs_truncate_file (xfs_mount_t *, xfs_inode_t *); | 27 | extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t, |
28 | extern int xfs_dir_ialloc (xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t, | ||
29 | xfs_dev_t, cred_t *, prid_t, int, | 28 | xfs_dev_t, cred_t *, prid_t, int, |
30 | xfs_inode_t **, int *); | 29 | xfs_inode_t **, int *); |
31 | extern int xfs_droplink (xfs_trans_t *, xfs_inode_t *); | 30 | extern int xfs_droplink(xfs_trans_t *, xfs_inode_t *); |
32 | extern int xfs_bumplink (xfs_trans_t *, xfs_inode_t *); | 31 | extern int xfs_bumplink(xfs_trans_t *, xfs_inode_t *); |
33 | extern void xfs_bump_ino_vers2 (xfs_trans_t *, xfs_inode_t *); | 32 | extern void xfs_bump_ino_vers2(xfs_trans_t *, xfs_inode_t *); |
34 | 33 | ||
35 | #endif /* __XFS_UTILS_H__ */ | 34 | #endif /* __XFS_UTILS_H__ */ |
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index 7094caff13cf..fc48158fe479 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c | |||
@@ -43,7 +43,6 @@ | |||
43 | #include "xfs_error.h" | 43 | #include "xfs_error.h" |
44 | #include "xfs_bmap.h" | 44 | #include "xfs_bmap.h" |
45 | #include "xfs_rw.h" | 45 | #include "xfs_rw.h" |
46 | #include "xfs_refcache.h" | ||
47 | #include "xfs_buf_item.h" | 46 | #include "xfs_buf_item.h" |
48 | #include "xfs_log_priv.h" | 47 | #include "xfs_log_priv.h" |
49 | #include "xfs_dir2_trace.h" | 48 | #include "xfs_dir2_trace.h" |
@@ -56,6 +55,7 @@ | |||
56 | #include "xfs_fsops.h" | 55 | #include "xfs_fsops.h" |
57 | #include "xfs_vnodeops.h" | 56 | #include "xfs_vnodeops.h" |
58 | #include "xfs_vfsops.h" | 57 | #include "xfs_vfsops.h" |
58 | #include "xfs_utils.h" | ||
59 | 59 | ||
60 | 60 | ||
61 | int __init | 61 | int __init |
@@ -69,15 +69,17 @@ xfs_init(void) | |||
69 | /* | 69 | /* |
70 | * Initialize all of the zone allocators we use. | 70 | * Initialize all of the zone allocators we use. |
71 | */ | 71 | */ |
72 | xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t), | ||
73 | "xfs_log_ticket"); | ||
72 | xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t), | 74 | xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t), |
73 | "xfs_bmap_free_item"); | 75 | "xfs_bmap_free_item"); |
74 | xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t), | 76 | xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t), |
75 | "xfs_btree_cur"); | 77 | "xfs_btree_cur"); |
76 | xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans"); | 78 | xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t), |
77 | xfs_da_state_zone = | 79 | "xfs_da_state"); |
78 | kmem_zone_init(sizeof(xfs_da_state_t), "xfs_da_state"); | ||
79 | xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf"); | 80 | xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf"); |
80 | xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork"); | 81 | xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork"); |
82 | xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans"); | ||
81 | xfs_acl_zone_init(xfs_acl_zone, "xfs_acl"); | 83 | xfs_acl_zone_init(xfs_acl_zone, "xfs_acl"); |
82 | xfs_mru_cache_init(); | 84 | xfs_mru_cache_init(); |
83 | xfs_filestream_init(); | 85 | xfs_filestream_init(); |
@@ -113,9 +115,6 @@ xfs_init(void) | |||
113 | xfs_ili_zone = | 115 | xfs_ili_zone = |
114 | kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili", | 116 | kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili", |
115 | KM_ZONE_SPREAD, NULL); | 117 | KM_ZONE_SPREAD, NULL); |
116 | xfs_icluster_zone = | ||
117 | kmem_zone_init_flags(sizeof(xfs_icluster_t), "xfs_icluster", | ||
118 | KM_ZONE_SPREAD, NULL); | ||
119 | 118 | ||
120 | /* | 119 | /* |
121 | * Allocate global trace buffers. | 120 | * Allocate global trace buffers. |
@@ -153,11 +152,9 @@ xfs_cleanup(void) | |||
153 | extern kmem_zone_t *xfs_inode_zone; | 152 | extern kmem_zone_t *xfs_inode_zone; |
154 | extern kmem_zone_t *xfs_efd_zone; | 153 | extern kmem_zone_t *xfs_efd_zone; |
155 | extern kmem_zone_t *xfs_efi_zone; | 154 | extern kmem_zone_t *xfs_efi_zone; |
156 | extern kmem_zone_t *xfs_icluster_zone; | ||
157 | 155 | ||
158 | xfs_cleanup_procfs(); | 156 | xfs_cleanup_procfs(); |
159 | xfs_sysctl_unregister(); | 157 | xfs_sysctl_unregister(); |
160 | xfs_refcache_destroy(); | ||
161 | xfs_filestream_uninit(); | 158 | xfs_filestream_uninit(); |
162 | xfs_mru_cache_uninit(); | 159 | xfs_mru_cache_uninit(); |
163 | xfs_acl_zone_destroy(xfs_acl_zone); | 160 | xfs_acl_zone_destroy(xfs_acl_zone); |
@@ -189,7 +186,6 @@ xfs_cleanup(void) | |||
189 | kmem_zone_destroy(xfs_efi_zone); | 186 | kmem_zone_destroy(xfs_efi_zone); |
190 | kmem_zone_destroy(xfs_ifork_zone); | 187 | kmem_zone_destroy(xfs_ifork_zone); |
191 | kmem_zone_destroy(xfs_ili_zone); | 188 | kmem_zone_destroy(xfs_ili_zone); |
192 | kmem_zone_destroy(xfs_icluster_zone); | ||
193 | } | 189 | } |
194 | 190 | ||
195 | /* | 191 | /* |
@@ -573,7 +569,7 @@ xfs_unmount( | |||
573 | #ifdef HAVE_DMAPI | 569 | #ifdef HAVE_DMAPI |
574 | if (mp->m_flags & XFS_MOUNT_DMAPI) { | 570 | if (mp->m_flags & XFS_MOUNT_DMAPI) { |
575 | error = XFS_SEND_PREUNMOUNT(mp, | 571 | error = XFS_SEND_PREUNMOUNT(mp, |
576 | rvp, DM_RIGHT_NULL, rvp, DM_RIGHT_NULL, | 572 | rip, DM_RIGHT_NULL, rip, DM_RIGHT_NULL, |
577 | NULL, NULL, 0, 0, | 573 | NULL, NULL, 0, 0, |
578 | (mp->m_dmevmask & (1<<DM_EVENT_PREUNMOUNT))? | 574 | (mp->m_dmevmask & (1<<DM_EVENT_PREUNMOUNT))? |
579 | 0:DM_FLAGS_UNWANTED); | 575 | 0:DM_FLAGS_UNWANTED); |
@@ -584,11 +580,6 @@ xfs_unmount( | |||
584 | 0 : DM_FLAGS_UNWANTED; | 580 | 0 : DM_FLAGS_UNWANTED; |
585 | } | 581 | } |
586 | #endif | 582 | #endif |
587 | /* | ||
588 | * First blow any referenced inode from this file system | ||
589 | * out of the reference cache, and delete the timer. | ||
590 | */ | ||
591 | xfs_refcache_purge_mp(mp); | ||
592 | 583 | ||
593 | /* | 584 | /* |
594 | * Blow away any referenced inode in the filestreams cache. | 585 | * Blow away any referenced inode in the filestreams cache. |
@@ -607,7 +598,7 @@ xfs_unmount( | |||
607 | /* | 598 | /* |
608 | * Drop the reference count | 599 | * Drop the reference count |
609 | */ | 600 | */ |
610 | VN_RELE(rvp); | 601 | IRELE(rip); |
611 | 602 | ||
612 | /* | 603 | /* |
613 | * If we're forcing a shutdown, typically because of a media error, | 604 | * If we're forcing a shutdown, typically because of a media error, |
@@ -629,7 +620,7 @@ out: | |||
629 | /* Note: mp structure must still exist for | 620 | /* Note: mp structure must still exist for |
630 | * XFS_SEND_UNMOUNT() call. | 621 | * XFS_SEND_UNMOUNT() call. |
631 | */ | 622 | */ |
632 | XFS_SEND_UNMOUNT(mp, error == 0 ? rvp : NULL, | 623 | XFS_SEND_UNMOUNT(mp, error == 0 ? rip : NULL, |
633 | DM_RIGHT_NULL, 0, error, unmount_event_flags); | 624 | DM_RIGHT_NULL, 0, error, unmount_event_flags); |
634 | } | 625 | } |
635 | if (xfs_unmountfs_needed) { | 626 | if (xfs_unmountfs_needed) { |
@@ -646,13 +637,12 @@ out: | |||
646 | return XFS_ERROR(error); | 637 | return XFS_ERROR(error); |
647 | } | 638 | } |
648 | 639 | ||
649 | STATIC int | 640 | STATIC void |
650 | xfs_quiesce_fs( | 641 | xfs_quiesce_fs( |
651 | xfs_mount_t *mp) | 642 | xfs_mount_t *mp) |
652 | { | 643 | { |
653 | int count = 0, pincount; | 644 | int count = 0, pincount; |
654 | 645 | ||
655 | xfs_refcache_purge_mp(mp); | ||
656 | xfs_flush_buftarg(mp->m_ddev_targp, 0); | 646 | xfs_flush_buftarg(mp->m_ddev_targp, 0); |
657 | xfs_finish_reclaim_all(mp, 0); | 647 | xfs_finish_reclaim_all(mp, 0); |
658 | 648 | ||
@@ -671,8 +661,6 @@ xfs_quiesce_fs( | |||
671 | count++; | 661 | count++; |
672 | } | 662 | } |
673 | } while (count < 2); | 663 | } while (count < 2); |
674 | |||
675 | return 0; | ||
676 | } | 664 | } |
677 | 665 | ||
678 | /* | 666 | /* |
@@ -684,6 +672,8 @@ void | |||
684 | xfs_attr_quiesce( | 672 | xfs_attr_quiesce( |
685 | xfs_mount_t *mp) | 673 | xfs_mount_t *mp) |
686 | { | 674 | { |
675 | int error = 0; | ||
676 | |||
687 | /* wait for all modifications to complete */ | 677 | /* wait for all modifications to complete */ |
688 | while (atomic_read(&mp->m_active_trans) > 0) | 678 | while (atomic_read(&mp->m_active_trans) > 0) |
689 | delay(100); | 679 | delay(100); |
@@ -694,7 +684,11 @@ xfs_attr_quiesce( | |||
694 | ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0); | 684 | ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0); |
695 | 685 | ||
696 | /* Push the superblock and write an unmount record */ | 686 | /* Push the superblock and write an unmount record */ |
697 | xfs_log_sbcount(mp, 1); | 687 | error = xfs_log_sbcount(mp, 1); |
688 | if (error) | ||
689 | xfs_fs_cmn_err(CE_WARN, mp, | ||
690 | "xfs_attr_quiesce: failed to log sb changes. " | ||
691 | "Frozen image may not be consistent."); | ||
698 | xfs_log_unmount_write(mp); | 692 | xfs_log_unmount_write(mp); |
699 | xfs_unmountfs_writesb(mp); | 693 | xfs_unmountfs_writesb(mp); |
700 | } | 694 | } |
@@ -790,8 +784,8 @@ xfs_unmount_flush( | |||
790 | goto fscorrupt_out2; | 784 | goto fscorrupt_out2; |
791 | 785 | ||
792 | if (rbmip) { | 786 | if (rbmip) { |
793 | VN_RELE(XFS_ITOV(rbmip)); | 787 | IRELE(rbmip); |
794 | VN_RELE(XFS_ITOV(rsumip)); | 788 | IRELE(rsumip); |
795 | } | 789 | } |
796 | 790 | ||
797 | xfs_iunlock(rip, XFS_ILOCK_EXCL); | 791 | xfs_iunlock(rip, XFS_ILOCK_EXCL); |
@@ -1169,10 +1163,10 @@ xfs_sync_inodes( | |||
1169 | * above, then wait until after we've unlocked | 1163 | * above, then wait until after we've unlocked |
1170 | * the inode to release the reference. This is | 1164 | * the inode to release the reference. This is |
1171 | * because we can be already holding the inode | 1165 | * because we can be already holding the inode |
1172 | * lock when VN_RELE() calls xfs_inactive(). | 1166 | * lock when IRELE() calls xfs_inactive(). |
1173 | * | 1167 | * |
1174 | * Make sure to drop the mount lock before calling | 1168 | * Make sure to drop the mount lock before calling |
1175 | * VN_RELE() so that we don't trip over ourselves if | 1169 | * IRELE() so that we don't trip over ourselves if |
1176 | * we have to go for the mount lock again in the | 1170 | * we have to go for the mount lock again in the |
1177 | * inactive code. | 1171 | * inactive code. |
1178 | */ | 1172 | */ |
@@ -1180,7 +1174,7 @@ xfs_sync_inodes( | |||
1180 | IPOINTER_INSERT(ip, mp); | 1174 | IPOINTER_INSERT(ip, mp); |
1181 | } | 1175 | } |
1182 | 1176 | ||
1183 | VN_RELE(vp); | 1177 | IRELE(ip); |
1184 | 1178 | ||
1185 | vnode_refed = B_FALSE; | 1179 | vnode_refed = B_FALSE; |
1186 | } | 1180 | } |
@@ -1323,30 +1317,8 @@ xfs_syncsub( | |||
1323 | } | 1317 | } |
1324 | 1318 | ||
1325 | /* | 1319 | /* |
1326 | * If this is the periodic sync, then kick some entries out of | ||
1327 | * the reference cache. This ensures that idle entries are | ||
1328 | * eventually kicked out of the cache. | ||
1329 | */ | ||
1330 | if (flags & SYNC_REFCACHE) { | ||
1331 | if (flags & SYNC_WAIT) | ||
1332 | xfs_refcache_purge_mp(mp); | ||
1333 | else | ||
1334 | xfs_refcache_purge_some(mp); | ||
1335 | } | ||
1336 | |||
1337 | /* | ||
1338 | * If asked, update the disk superblock with incore counter values if we | ||
1339 | * are using non-persistent counters so that they don't get too far out | ||
1340 | * of sync if we crash or get a forced shutdown. We don't want to force | ||
1341 | * this to disk, just get a transaction into the iclogs.... | ||
1342 | */ | ||
1343 | if (flags & SYNC_SUPER) | ||
1344 | xfs_log_sbcount(mp, 0); | ||
1345 | |||
1346 | /* | ||
1347 | * Now check to see if the log needs a "dummy" transaction. | 1320 | * Now check to see if the log needs a "dummy" transaction. |
1348 | */ | 1321 | */ |
1349 | |||
1350 | if (!(flags & SYNC_REMOUNT) && xfs_log_need_covered(mp)) { | 1322 | if (!(flags & SYNC_REMOUNT) && xfs_log_need_covered(mp)) { |
1351 | xfs_trans_t *tp; | 1323 | xfs_trans_t *tp; |
1352 | xfs_inode_t *ip; | 1324 | xfs_inode_t *ip; |
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 64c5953feca4..6650601c64f7 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
@@ -48,7 +48,6 @@ | |||
48 | #include "xfs_quota.h" | 48 | #include "xfs_quota.h" |
49 | #include "xfs_utils.h" | 49 | #include "xfs_utils.h" |
50 | #include "xfs_rtalloc.h" | 50 | #include "xfs_rtalloc.h" |
51 | #include "xfs_refcache.h" | ||
52 | #include "xfs_trans_space.h" | 51 | #include "xfs_trans_space.h" |
53 | #include "xfs_log_priv.h" | 52 | #include "xfs_log_priv.h" |
54 | #include "xfs_filestream.h" | 53 | #include "xfs_filestream.h" |
@@ -327,7 +326,7 @@ xfs_setattr( | |||
327 | if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) && | 326 | if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) && |
328 | !(flags & ATTR_DMI)) { | 327 | !(flags & ATTR_DMI)) { |
329 | int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR; | 328 | int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR; |
330 | code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, vp, | 329 | code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, ip, |
331 | vap->va_size, 0, dmflags, NULL); | 330 | vap->va_size, 0, dmflags, NULL); |
332 | if (code) { | 331 | if (code) { |
333 | lock_flags = 0; | 332 | lock_flags = 0; |
@@ -634,6 +633,15 @@ xfs_setattr( | |||
634 | * Truncate file. Must have write permission and not be a directory. | 633 | * Truncate file. Must have write permission and not be a directory. |
635 | */ | 634 | */ |
636 | if (mask & XFS_AT_SIZE) { | 635 | if (mask & XFS_AT_SIZE) { |
636 | /* | ||
637 | * Only change the c/mtime if we are changing the size | ||
638 | * or we are explicitly asked to change it. This handles | ||
639 | * the semantic difference between truncate() and ftruncate() | ||
640 | * as implemented in the VFS. | ||
641 | */ | ||
642 | if (vap->va_size != ip->i_size || (mask & XFS_AT_CTIME)) | ||
643 | timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; | ||
644 | |||
637 | if (vap->va_size > ip->i_size) { | 645 | if (vap->va_size > ip->i_size) { |
638 | xfs_igrow_finish(tp, ip, vap->va_size, | 646 | xfs_igrow_finish(tp, ip, vap->va_size, |
639 | !(flags & ATTR_DMI)); | 647 | !(flags & ATTR_DMI)); |
@@ -662,10 +670,6 @@ xfs_setattr( | |||
662 | */ | 670 | */ |
663 | xfs_iflags_set(ip, XFS_ITRUNCATED); | 671 | xfs_iflags_set(ip, XFS_ITRUNCATED); |
664 | } | 672 | } |
665 | /* | ||
666 | * Have to do this even if the file's size doesn't change. | ||
667 | */ | ||
668 | timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; | ||
669 | } | 673 | } |
670 | 674 | ||
671 | /* | 675 | /* |
@@ -877,7 +881,7 @@ xfs_setattr( | |||
877 | 881 | ||
878 | if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) && | 882 | if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) && |
879 | !(flags & ATTR_DMI)) { | 883 | !(flags & ATTR_DMI)) { |
880 | (void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, vp, DM_RIGHT_NULL, | 884 | (void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL, |
881 | NULL, DM_RIGHT_NULL, NULL, NULL, | 885 | NULL, DM_RIGHT_NULL, NULL, NULL, |
882 | 0, 0, AT_DELAY_FLAG(flags)); | 886 | 0, 0, AT_DELAY_FLAG(flags)); |
883 | } | 887 | } |
@@ -1443,28 +1447,22 @@ xfs_inactive_attrs( | |||
1443 | tp = *tpp; | 1447 | tp = *tpp; |
1444 | mp = ip->i_mount; | 1448 | mp = ip->i_mount; |
1445 | ASSERT(ip->i_d.di_forkoff != 0); | 1449 | ASSERT(ip->i_d.di_forkoff != 0); |
1446 | xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | 1450 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); |
1447 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 1451 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
1452 | if (error) | ||
1453 | goto error_unlock; | ||
1448 | 1454 | ||
1449 | error = xfs_attr_inactive(ip); | 1455 | error = xfs_attr_inactive(ip); |
1450 | if (error) { | 1456 | if (error) |
1451 | *tpp = NULL; | 1457 | goto error_unlock; |
1452 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | ||
1453 | return error; /* goto out */ | ||
1454 | } | ||
1455 | 1458 | ||
1456 | tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); | 1459 | tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); |
1457 | error = xfs_trans_reserve(tp, 0, | 1460 | error = xfs_trans_reserve(tp, 0, |
1458 | XFS_IFREE_LOG_RES(mp), | 1461 | XFS_IFREE_LOG_RES(mp), |
1459 | 0, XFS_TRANS_PERM_LOG_RES, | 1462 | 0, XFS_TRANS_PERM_LOG_RES, |
1460 | XFS_INACTIVE_LOG_COUNT); | 1463 | XFS_INACTIVE_LOG_COUNT); |
1461 | if (error) { | 1464 | if (error) |
1462 | ASSERT(XFS_FORCED_SHUTDOWN(mp)); | 1465 | goto error_cancel; |
1463 | xfs_trans_cancel(tp, 0); | ||
1464 | *tpp = NULL; | ||
1465 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | ||
1466 | return error; | ||
1467 | } | ||
1468 | 1466 | ||
1469 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 1467 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
1470 | xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); | 1468 | xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); |
@@ -1475,6 +1473,14 @@ xfs_inactive_attrs( | |||
1475 | 1473 | ||
1476 | *tpp = tp; | 1474 | *tpp = tp; |
1477 | return 0; | 1475 | return 0; |
1476 | |||
1477 | error_cancel: | ||
1478 | ASSERT(XFS_FORCED_SHUTDOWN(mp)); | ||
1479 | xfs_trans_cancel(tp, 0); | ||
1480 | error_unlock: | ||
1481 | *tpp = NULL; | ||
1482 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | ||
1483 | return error; | ||
1478 | } | 1484 | } |
1479 | 1485 | ||
1480 | int | 1486 | int |
@@ -1520,12 +1526,6 @@ xfs_release( | |||
1520 | xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE); | 1526 | xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE); |
1521 | } | 1527 | } |
1522 | 1528 | ||
1523 | #ifdef HAVE_REFCACHE | ||
1524 | /* If we are in the NFS reference cache then don't do this now */ | ||
1525 | if (ip->i_refcache) | ||
1526 | return 0; | ||
1527 | #endif | ||
1528 | |||
1529 | if (ip->i_d.di_nlink != 0) { | 1529 | if (ip->i_d.di_nlink != 0) { |
1530 | if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && | 1530 | if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && |
1531 | ((ip->i_size > 0) || (VN_CACHED(vp) > 0 || | 1531 | ((ip->i_size > 0) || (VN_CACHED(vp) > 0 || |
@@ -1588,9 +1588,8 @@ xfs_inactive( | |||
1588 | 1588 | ||
1589 | mp = ip->i_mount; | 1589 | mp = ip->i_mount; |
1590 | 1590 | ||
1591 | if (ip->i_d.di_nlink == 0 && DM_EVENT_ENABLED(ip, DM_EVENT_DESTROY)) { | 1591 | if (ip->i_d.di_nlink == 0 && DM_EVENT_ENABLED(ip, DM_EVENT_DESTROY)) |
1592 | (void) XFS_SEND_DESTROY(mp, vp, DM_RIGHT_NULL); | 1592 | XFS_SEND_DESTROY(mp, ip, DM_RIGHT_NULL); |
1593 | } | ||
1594 | 1593 | ||
1595 | error = 0; | 1594 | error = 0; |
1596 | 1595 | ||
@@ -1744,11 +1743,18 @@ xfs_inactive( | |||
1744 | XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_ICOUNT, -1); | 1743 | XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_ICOUNT, -1); |
1745 | 1744 | ||
1746 | /* | 1745 | /* |
1747 | * Just ignore errors at this point. There is | 1746 | * Just ignore errors at this point. There is nothing we can |
1748 | * nothing we can do except to try to keep going. | 1747 | * do except to try to keep going. Make sure it's not a silent |
1748 | * error. | ||
1749 | */ | 1749 | */ |
1750 | (void) xfs_bmap_finish(&tp, &free_list, &committed); | 1750 | error = xfs_bmap_finish(&tp, &free_list, &committed); |
1751 | (void) xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | 1751 | if (error) |
1752 | xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: " | ||
1753 | "xfs_bmap_finish() returned error %d", error); | ||
1754 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | ||
1755 | if (error) | ||
1756 | xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: " | ||
1757 | "xfs_trans_commit() returned error %d", error); | ||
1752 | } | 1758 | } |
1753 | /* | 1759 | /* |
1754 | * Release the dquots held by inode, if any. | 1760 | * Release the dquots held by inode, if any. |
@@ -1765,8 +1771,8 @@ xfs_inactive( | |||
1765 | int | 1771 | int |
1766 | xfs_lookup( | 1772 | xfs_lookup( |
1767 | xfs_inode_t *dp, | 1773 | xfs_inode_t *dp, |
1768 | bhv_vname_t *dentry, | 1774 | struct xfs_name *name, |
1769 | bhv_vnode_t **vpp) | 1775 | xfs_inode_t **ipp) |
1770 | { | 1776 | { |
1771 | xfs_inode_t *ip; | 1777 | xfs_inode_t *ip; |
1772 | xfs_ino_t e_inum; | 1778 | xfs_ino_t e_inum; |
@@ -1779,9 +1785,9 @@ xfs_lookup( | |||
1779 | return XFS_ERROR(EIO); | 1785 | return XFS_ERROR(EIO); |
1780 | 1786 | ||
1781 | lock_mode = xfs_ilock_map_shared(dp); | 1787 | lock_mode = xfs_ilock_map_shared(dp); |
1782 | error = xfs_dir_lookup_int(dp, lock_mode, dentry, &e_inum, &ip); | 1788 | error = xfs_dir_lookup_int(dp, lock_mode, name, &e_inum, &ip); |
1783 | if (!error) { | 1789 | if (!error) { |
1784 | *vpp = XFS_ITOV(ip); | 1790 | *ipp = ip; |
1785 | xfs_itrace_ref(ip); | 1791 | xfs_itrace_ref(ip); |
1786 | } | 1792 | } |
1787 | xfs_iunlock_map_shared(dp, lock_mode); | 1793 | xfs_iunlock_map_shared(dp, lock_mode); |
@@ -1791,19 +1797,16 @@ xfs_lookup( | |||
1791 | int | 1797 | int |
1792 | xfs_create( | 1798 | xfs_create( |
1793 | xfs_inode_t *dp, | 1799 | xfs_inode_t *dp, |
1794 | bhv_vname_t *dentry, | 1800 | struct xfs_name *name, |
1795 | mode_t mode, | 1801 | mode_t mode, |
1796 | xfs_dev_t rdev, | 1802 | xfs_dev_t rdev, |
1797 | bhv_vnode_t **vpp, | 1803 | xfs_inode_t **ipp, |
1798 | cred_t *credp) | 1804 | cred_t *credp) |
1799 | { | 1805 | { |
1800 | char *name = VNAME(dentry); | 1806 | xfs_mount_t *mp = dp->i_mount; |
1801 | xfs_mount_t *mp = dp->i_mount; | ||
1802 | bhv_vnode_t *dir_vp = XFS_ITOV(dp); | ||
1803 | xfs_inode_t *ip; | 1807 | xfs_inode_t *ip; |
1804 | bhv_vnode_t *vp = NULL; | ||
1805 | xfs_trans_t *tp; | 1808 | xfs_trans_t *tp; |
1806 | int error; | 1809 | int error; |
1807 | xfs_bmap_free_t free_list; | 1810 | xfs_bmap_free_t free_list; |
1808 | xfs_fsblock_t first_block; | 1811 | xfs_fsblock_t first_block; |
1809 | boolean_t unlock_dp_on_error = B_FALSE; | 1812 | boolean_t unlock_dp_on_error = B_FALSE; |
@@ -1813,17 +1816,14 @@ xfs_create( | |||
1813 | xfs_prid_t prid; | 1816 | xfs_prid_t prid; |
1814 | struct xfs_dquot *udqp, *gdqp; | 1817 | struct xfs_dquot *udqp, *gdqp; |
1815 | uint resblks; | 1818 | uint resblks; |
1816 | int namelen; | ||
1817 | 1819 | ||
1818 | ASSERT(!*vpp); | 1820 | ASSERT(!*ipp); |
1819 | xfs_itrace_entry(dp); | 1821 | xfs_itrace_entry(dp); |
1820 | 1822 | ||
1821 | namelen = VNAMELEN(dentry); | ||
1822 | |||
1823 | if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) { | 1823 | if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) { |
1824 | error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, | 1824 | error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, |
1825 | dir_vp, DM_RIGHT_NULL, NULL, | 1825 | dp, DM_RIGHT_NULL, NULL, |
1826 | DM_RIGHT_NULL, name, NULL, | 1826 | DM_RIGHT_NULL, name->name, NULL, |
1827 | mode, 0, 0); | 1827 | mode, 0, 0); |
1828 | 1828 | ||
1829 | if (error) | 1829 | if (error) |
@@ -1855,7 +1855,7 @@ xfs_create( | |||
1855 | 1855 | ||
1856 | tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE); | 1856 | tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE); |
1857 | cancel_flags = XFS_TRANS_RELEASE_LOG_RES; | 1857 | cancel_flags = XFS_TRANS_RELEASE_LOG_RES; |
1858 | resblks = XFS_CREATE_SPACE_RES(mp, namelen); | 1858 | resblks = XFS_CREATE_SPACE_RES(mp, name->len); |
1859 | /* | 1859 | /* |
1860 | * Initially assume that the file does not exist and | 1860 | * Initially assume that the file does not exist and |
1861 | * reserve the resources for that case. If that is not | 1861 | * reserve the resources for that case. If that is not |
@@ -1888,7 +1888,8 @@ xfs_create( | |||
1888 | if (error) | 1888 | if (error) |
1889 | goto error_return; | 1889 | goto error_return; |
1890 | 1890 | ||
1891 | if (resblks == 0 && (error = xfs_dir_canenter(tp, dp, name, namelen))) | 1891 | error = xfs_dir_canenter(tp, dp, name, resblks); |
1892 | if (error) | ||
1892 | goto error_return; | 1893 | goto error_return; |
1893 | error = xfs_dir_ialloc(&tp, dp, mode, 1, | 1894 | error = xfs_dir_ialloc(&tp, dp, mode, 1, |
1894 | rdev, credp, prid, resblks > 0, | 1895 | rdev, credp, prid, resblks > 0, |
@@ -1914,11 +1915,11 @@ xfs_create( | |||
1914 | * the transaction cancel unlocking dp so don't do it explicitly in the | 1915 | * the transaction cancel unlocking dp so don't do it explicitly in the |
1915 | * error path. | 1916 | * error path. |
1916 | */ | 1917 | */ |
1917 | VN_HOLD(dir_vp); | 1918 | IHOLD(dp); |
1918 | xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); | 1919 | xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); |
1919 | unlock_dp_on_error = B_FALSE; | 1920 | unlock_dp_on_error = B_FALSE; |
1920 | 1921 | ||
1921 | error = xfs_dir_createname(tp, dp, name, namelen, ip->i_ino, | 1922 | error = xfs_dir_createname(tp, dp, name, ip->i_ino, |
1922 | &first_block, &free_list, resblks ? | 1923 | &first_block, &free_list, resblks ? |
1923 | resblks - XFS_IALLOC_SPACE_RES(mp) : 0); | 1924 | resblks - XFS_IALLOC_SPACE_RES(mp) : 0); |
1924 | if (error) { | 1925 | if (error) { |
@@ -1952,7 +1953,6 @@ xfs_create( | |||
1952 | * vnode to the caller, we bump the vnode ref count now. | 1953 | * vnode to the caller, we bump the vnode ref count now. |
1953 | */ | 1954 | */ |
1954 | IHOLD(ip); | 1955 | IHOLD(ip); |
1955 | vp = XFS_ITOV(ip); | ||
1956 | 1956 | ||
1957 | error = xfs_bmap_finish(&tp, &free_list, &committed); | 1957 | error = xfs_bmap_finish(&tp, &free_list, &committed); |
1958 | if (error) { | 1958 | if (error) { |
@@ -1970,17 +1970,17 @@ xfs_create( | |||
1970 | XFS_QM_DQRELE(mp, udqp); | 1970 | XFS_QM_DQRELE(mp, udqp); |
1971 | XFS_QM_DQRELE(mp, gdqp); | 1971 | XFS_QM_DQRELE(mp, gdqp); |
1972 | 1972 | ||
1973 | *vpp = vp; | 1973 | *ipp = ip; |
1974 | 1974 | ||
1975 | /* Fallthrough to std_return with error = 0 */ | 1975 | /* Fallthrough to std_return with error = 0 */ |
1976 | 1976 | ||
1977 | std_return: | 1977 | std_return: |
1978 | if ((*vpp || (error != 0 && dm_event_sent != 0)) && | 1978 | if ((*ipp || (error != 0 && dm_event_sent != 0)) && |
1979 | DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) { | 1979 | DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) { |
1980 | (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, | 1980 | (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, |
1981 | dir_vp, DM_RIGHT_NULL, | 1981 | dp, DM_RIGHT_NULL, |
1982 | *vpp ? vp:NULL, | 1982 | *ipp ? ip : NULL, |
1983 | DM_RIGHT_NULL, name, NULL, | 1983 | DM_RIGHT_NULL, name->name, NULL, |
1984 | mode, error, 0); | 1984 | mode, error, 0); |
1985 | } | 1985 | } |
1986 | return error; | 1986 | return error; |
@@ -2272,46 +2272,32 @@ int remove_which_error_return = 0; | |||
2272 | int | 2272 | int |
2273 | xfs_remove( | 2273 | xfs_remove( |
2274 | xfs_inode_t *dp, | 2274 | xfs_inode_t *dp, |
2275 | bhv_vname_t *dentry) | 2275 | struct xfs_name *name, |
2276 | xfs_inode_t *ip) | ||
2276 | { | 2277 | { |
2277 | bhv_vnode_t *dir_vp = XFS_ITOV(dp); | ||
2278 | char *name = VNAME(dentry); | ||
2279 | xfs_mount_t *mp = dp->i_mount; | 2278 | xfs_mount_t *mp = dp->i_mount; |
2280 | xfs_inode_t *ip; | ||
2281 | xfs_trans_t *tp = NULL; | 2279 | xfs_trans_t *tp = NULL; |
2282 | int error = 0; | 2280 | int error = 0; |
2283 | xfs_bmap_free_t free_list; | 2281 | xfs_bmap_free_t free_list; |
2284 | xfs_fsblock_t first_block; | 2282 | xfs_fsblock_t first_block; |
2285 | int cancel_flags; | 2283 | int cancel_flags; |
2286 | int committed; | 2284 | int committed; |
2287 | int dm_di_mode = 0; | ||
2288 | int link_zero; | 2285 | int link_zero; |
2289 | uint resblks; | 2286 | uint resblks; |
2290 | int namelen; | ||
2291 | 2287 | ||
2292 | xfs_itrace_entry(dp); | 2288 | xfs_itrace_entry(dp); |
2293 | 2289 | ||
2294 | if (XFS_FORCED_SHUTDOWN(mp)) | 2290 | if (XFS_FORCED_SHUTDOWN(mp)) |
2295 | return XFS_ERROR(EIO); | 2291 | return XFS_ERROR(EIO); |
2296 | 2292 | ||
2297 | namelen = VNAMELEN(dentry); | ||
2298 | |||
2299 | if (!xfs_get_dir_entry(dentry, &ip)) { | ||
2300 | dm_di_mode = ip->i_d.di_mode; | ||
2301 | IRELE(ip); | ||
2302 | } | ||
2303 | |||
2304 | if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) { | 2293 | if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) { |
2305 | error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dir_vp, | 2294 | error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dp, DM_RIGHT_NULL, |
2306 | DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, | 2295 | NULL, DM_RIGHT_NULL, name->name, NULL, |
2307 | name, NULL, dm_di_mode, 0, 0); | 2296 | ip->i_d.di_mode, 0, 0); |
2308 | if (error) | 2297 | if (error) |
2309 | return error; | 2298 | return error; |
2310 | } | 2299 | } |
2311 | 2300 | ||
2312 | /* From this point on, return through std_return */ | ||
2313 | ip = NULL; | ||
2314 | |||
2315 | /* | 2301 | /* |
2316 | * We need to get a reference to ip before we get our log | 2302 | * We need to get a reference to ip before we get our log |
2317 | * reservation. The reason for this is that we cannot call | 2303 | * reservation. The reason for this is that we cannot call |
@@ -2324,13 +2310,7 @@ xfs_remove( | |||
2324 | * when we call xfs_iget. Instead we get an unlocked reference | 2310 | * when we call xfs_iget. Instead we get an unlocked reference |
2325 | * to the inode before getting our log reservation. | 2311 | * to the inode before getting our log reservation. |
2326 | */ | 2312 | */ |
2327 | error = xfs_get_dir_entry(dentry, &ip); | 2313 | IHOLD(ip); |
2328 | if (error) { | ||
2329 | REMOVE_DEBUG_TRACE(__LINE__); | ||
2330 | goto std_return; | ||
2331 | } | ||
2332 | |||
2333 | dm_di_mode = ip->i_d.di_mode; | ||
2334 | 2314 | ||
2335 | xfs_itrace_entry(ip); | 2315 | xfs_itrace_entry(ip); |
2336 | xfs_itrace_ref(ip); | 2316 | xfs_itrace_ref(ip); |
@@ -2398,7 +2378,7 @@ xfs_remove( | |||
2398 | * Entry must exist since we did a lookup in xfs_lock_dir_and_entry. | 2378 | * Entry must exist since we did a lookup in xfs_lock_dir_and_entry. |
2399 | */ | 2379 | */ |
2400 | XFS_BMAP_INIT(&free_list, &first_block); | 2380 | XFS_BMAP_INIT(&free_list, &first_block); |
2401 | error = xfs_dir_removename(tp, dp, name, namelen, ip->i_ino, | 2381 | error = xfs_dir_removename(tp, dp, name, ip->i_ino, |
2402 | &first_block, &free_list, 0); | 2382 | &first_block, &free_list, 0); |
2403 | if (error) { | 2383 | if (error) { |
2404 | ASSERT(error != ENOENT); | 2384 | ASSERT(error != ENOENT); |
@@ -2449,14 +2429,6 @@ xfs_remove( | |||
2449 | } | 2429 | } |
2450 | 2430 | ||
2451 | /* | 2431 | /* |
2452 | * Before we drop our extra reference to the inode, purge it | ||
2453 | * from the refcache if it is there. By waiting until afterwards | ||
2454 | * to do the IRELE, we ensure that we won't go inactive in the | ||
2455 | * xfs_refcache_purge_ip routine (although that would be OK). | ||
2456 | */ | ||
2457 | xfs_refcache_purge_ip(ip); | ||
2458 | |||
2459 | /* | ||
2460 | * If we are using filestreams, kill the stream association. | 2432 | * If we are using filestreams, kill the stream association. |
2461 | * If the file is still open it may get a new one but that | 2433 | * If the file is still open it may get a new one but that |
2462 | * will get killed on last close in xfs_close() so we don't | 2434 | * will get killed on last close in xfs_close() so we don't |
@@ -2472,9 +2444,9 @@ xfs_remove( | |||
2472 | std_return: | 2444 | std_return: |
2473 | if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) { | 2445 | if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) { |
2474 | (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, | 2446 | (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, |
2475 | dir_vp, DM_RIGHT_NULL, | 2447 | dp, DM_RIGHT_NULL, |
2476 | NULL, DM_RIGHT_NULL, | 2448 | NULL, DM_RIGHT_NULL, |
2477 | name, NULL, dm_di_mode, error, 0); | 2449 | name->name, NULL, ip->i_d.di_mode, error, 0); |
2478 | } | 2450 | } |
2479 | return error; | 2451 | return error; |
2480 | 2452 | ||
@@ -2495,14 +2467,6 @@ xfs_remove( | |||
2495 | cancel_flags |= XFS_TRANS_ABORT; | 2467 | cancel_flags |= XFS_TRANS_ABORT; |
2496 | xfs_trans_cancel(tp, cancel_flags); | 2468 | xfs_trans_cancel(tp, cancel_flags); |
2497 | 2469 | ||
2498 | /* | ||
2499 | * Before we drop our extra reference to the inode, purge it | ||
2500 | * from the refcache if it is there. By waiting until afterwards | ||
2501 | * to do the IRELE, we ensure that we won't go inactive in the | ||
2502 | * xfs_refcache_purge_ip routine (although that would be OK). | ||
2503 | */ | ||
2504 | xfs_refcache_purge_ip(ip); | ||
2505 | |||
2506 | IRELE(ip); | 2470 | IRELE(ip); |
2507 | 2471 | ||
2508 | goto std_return; | 2472 | goto std_return; |
@@ -2511,12 +2475,10 @@ xfs_remove( | |||
2511 | int | 2475 | int |
2512 | xfs_link( | 2476 | xfs_link( |
2513 | xfs_inode_t *tdp, | 2477 | xfs_inode_t *tdp, |
2514 | bhv_vnode_t *src_vp, | 2478 | xfs_inode_t *sip, |
2515 | bhv_vname_t *dentry) | 2479 | struct xfs_name *target_name) |
2516 | { | 2480 | { |
2517 | bhv_vnode_t *target_dir_vp = XFS_ITOV(tdp); | ||
2518 | xfs_mount_t *mp = tdp->i_mount; | 2481 | xfs_mount_t *mp = tdp->i_mount; |
2519 | xfs_inode_t *sip = xfs_vtoi(src_vp); | ||
2520 | xfs_trans_t *tp; | 2482 | xfs_trans_t *tp; |
2521 | xfs_inode_t *ips[2]; | 2483 | xfs_inode_t *ips[2]; |
2522 | int error; | 2484 | int error; |
@@ -2525,23 +2487,20 @@ xfs_link( | |||
2525 | int cancel_flags; | 2487 | int cancel_flags; |
2526 | int committed; | 2488 | int committed; |
2527 | int resblks; | 2489 | int resblks; |
2528 | char *target_name = VNAME(dentry); | ||
2529 | int target_namelen; | ||
2530 | 2490 | ||
2531 | xfs_itrace_entry(tdp); | 2491 | xfs_itrace_entry(tdp); |
2532 | xfs_itrace_entry(xfs_vtoi(src_vp)); | 2492 | xfs_itrace_entry(sip); |
2533 | 2493 | ||
2534 | target_namelen = VNAMELEN(dentry); | 2494 | ASSERT(!S_ISDIR(sip->i_d.di_mode)); |
2535 | ASSERT(!VN_ISDIR(src_vp)); | ||
2536 | 2495 | ||
2537 | if (XFS_FORCED_SHUTDOWN(mp)) | 2496 | if (XFS_FORCED_SHUTDOWN(mp)) |
2538 | return XFS_ERROR(EIO); | 2497 | return XFS_ERROR(EIO); |
2539 | 2498 | ||
2540 | if (DM_EVENT_ENABLED(tdp, DM_EVENT_LINK)) { | 2499 | if (DM_EVENT_ENABLED(tdp, DM_EVENT_LINK)) { |
2541 | error = XFS_SEND_NAMESP(mp, DM_EVENT_LINK, | 2500 | error = XFS_SEND_NAMESP(mp, DM_EVENT_LINK, |
2542 | target_dir_vp, DM_RIGHT_NULL, | 2501 | tdp, DM_RIGHT_NULL, |
2543 | src_vp, DM_RIGHT_NULL, | 2502 | sip, DM_RIGHT_NULL, |
2544 | target_name, NULL, 0, 0, 0); | 2503 | target_name->name, NULL, 0, 0, 0); |
2545 | if (error) | 2504 | if (error) |
2546 | return error; | 2505 | return error; |
2547 | } | 2506 | } |
@@ -2556,7 +2515,7 @@ xfs_link( | |||
2556 | 2515 | ||
2557 | tp = xfs_trans_alloc(mp, XFS_TRANS_LINK); | 2516 | tp = xfs_trans_alloc(mp, XFS_TRANS_LINK); |
2558 | cancel_flags = XFS_TRANS_RELEASE_LOG_RES; | 2517 | cancel_flags = XFS_TRANS_RELEASE_LOG_RES; |
2559 | resblks = XFS_LINK_SPACE_RES(mp, target_namelen); | 2518 | resblks = XFS_LINK_SPACE_RES(mp, target_name->len); |
2560 | error = xfs_trans_reserve(tp, resblks, XFS_LINK_LOG_RES(mp), 0, | 2519 | error = xfs_trans_reserve(tp, resblks, XFS_LINK_LOG_RES(mp), 0, |
2561 | XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT); | 2520 | XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT); |
2562 | if (error == ENOSPC) { | 2521 | if (error == ENOSPC) { |
@@ -2584,8 +2543,8 @@ xfs_link( | |||
2584 | * xfs_trans_cancel will both unlock the inodes and | 2543 | * xfs_trans_cancel will both unlock the inodes and |
2585 | * decrement the associated ref counts. | 2544 | * decrement the associated ref counts. |
2586 | */ | 2545 | */ |
2587 | VN_HOLD(src_vp); | 2546 | IHOLD(sip); |
2588 | VN_HOLD(target_dir_vp); | 2547 | IHOLD(tdp); |
2589 | xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); | 2548 | xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); |
2590 | xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); | 2549 | xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); |
2591 | 2550 | ||
@@ -2608,15 +2567,14 @@ xfs_link( | |||
2608 | goto error_return; | 2567 | goto error_return; |
2609 | } | 2568 | } |
2610 | 2569 | ||
2611 | if (resblks == 0 && | 2570 | error = xfs_dir_canenter(tp, tdp, target_name, resblks); |
2612 | (error = xfs_dir_canenter(tp, tdp, target_name, target_namelen))) | 2571 | if (error) |
2613 | goto error_return; | 2572 | goto error_return; |
2614 | 2573 | ||
2615 | XFS_BMAP_INIT(&free_list, &first_block); | 2574 | XFS_BMAP_INIT(&free_list, &first_block); |
2616 | 2575 | ||
2617 | error = xfs_dir_createname(tp, tdp, target_name, target_namelen, | 2576 | error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino, |
2618 | sip->i_ino, &first_block, &free_list, | 2577 | &first_block, &free_list, resblks); |
2619 | resblks); | ||
2620 | if (error) | 2578 | if (error) |
2621 | goto abort_return; | 2579 | goto abort_return; |
2622 | xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 2580 | xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
@@ -2650,9 +2608,9 @@ xfs_link( | |||
2650 | std_return: | 2608 | std_return: |
2651 | if (DM_EVENT_ENABLED(sip, DM_EVENT_POSTLINK)) { | 2609 | if (DM_EVENT_ENABLED(sip, DM_EVENT_POSTLINK)) { |
2652 | (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTLINK, | 2610 | (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTLINK, |
2653 | target_dir_vp, DM_RIGHT_NULL, | 2611 | tdp, DM_RIGHT_NULL, |
2654 | src_vp, DM_RIGHT_NULL, | 2612 | sip, DM_RIGHT_NULL, |
2655 | target_name, NULL, 0, error, 0); | 2613 | target_name->name, NULL, 0, error, 0); |
2656 | } | 2614 | } |
2657 | return error; | 2615 | return error; |
2658 | 2616 | ||
@@ -2669,17 +2627,13 @@ std_return: | |||
2669 | int | 2627 | int |
2670 | xfs_mkdir( | 2628 | xfs_mkdir( |
2671 | xfs_inode_t *dp, | 2629 | xfs_inode_t *dp, |
2672 | bhv_vname_t *dentry, | 2630 | struct xfs_name *dir_name, |
2673 | mode_t mode, | 2631 | mode_t mode, |
2674 | bhv_vnode_t **vpp, | 2632 | xfs_inode_t **ipp, |
2675 | cred_t *credp) | 2633 | cred_t *credp) |
2676 | { | 2634 | { |
2677 | bhv_vnode_t *dir_vp = XFS_ITOV(dp); | ||
2678 | char *dir_name = VNAME(dentry); | ||
2679 | int dir_namelen = VNAMELEN(dentry); | ||
2680 | xfs_mount_t *mp = dp->i_mount; | 2635 | xfs_mount_t *mp = dp->i_mount; |
2681 | xfs_inode_t *cdp; /* inode of created dir */ | 2636 | xfs_inode_t *cdp; /* inode of created dir */ |
2682 | bhv_vnode_t *cvp; /* vnode of created dir */ | ||
2683 | xfs_trans_t *tp; | 2637 | xfs_trans_t *tp; |
2684 | int cancel_flags; | 2638 | int cancel_flags; |
2685 | int error; | 2639 | int error; |
@@ -2700,8 +2654,8 @@ xfs_mkdir( | |||
2700 | 2654 | ||
2701 | if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) { | 2655 | if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) { |
2702 | error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, | 2656 | error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, |
2703 | dir_vp, DM_RIGHT_NULL, NULL, | 2657 | dp, DM_RIGHT_NULL, NULL, |
2704 | DM_RIGHT_NULL, dir_name, NULL, | 2658 | DM_RIGHT_NULL, dir_name->name, NULL, |
2705 | mode, 0, 0); | 2659 | mode, 0, 0); |
2706 | if (error) | 2660 | if (error) |
2707 | return error; | 2661 | return error; |
@@ -2730,7 +2684,7 @@ xfs_mkdir( | |||
2730 | 2684 | ||
2731 | tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR); | 2685 | tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR); |
2732 | cancel_flags = XFS_TRANS_RELEASE_LOG_RES; | 2686 | cancel_flags = XFS_TRANS_RELEASE_LOG_RES; |
2733 | resblks = XFS_MKDIR_SPACE_RES(mp, dir_namelen); | 2687 | resblks = XFS_MKDIR_SPACE_RES(mp, dir_name->len); |
2734 | error = xfs_trans_reserve(tp, resblks, XFS_MKDIR_LOG_RES(mp), 0, | 2688 | error = xfs_trans_reserve(tp, resblks, XFS_MKDIR_LOG_RES(mp), 0, |
2735 | XFS_TRANS_PERM_LOG_RES, XFS_MKDIR_LOG_COUNT); | 2689 | XFS_TRANS_PERM_LOG_RES, XFS_MKDIR_LOG_COUNT); |
2736 | if (error == ENOSPC) { | 2690 | if (error == ENOSPC) { |
@@ -2762,8 +2716,8 @@ xfs_mkdir( | |||
2762 | if (error) | 2716 | if (error) |
2763 | goto error_return; | 2717 | goto error_return; |
2764 | 2718 | ||
2765 | if (resblks == 0 && | 2719 | error = xfs_dir_canenter(tp, dp, dir_name, resblks); |
2766 | (error = xfs_dir_canenter(tp, dp, dir_name, dir_namelen))) | 2720 | if (error) |
2767 | goto error_return; | 2721 | goto error_return; |
2768 | /* | 2722 | /* |
2769 | * create the directory inode. | 2723 | * create the directory inode. |
@@ -2786,15 +2740,15 @@ xfs_mkdir( | |||
2786 | * from here on will result in the transaction cancel | 2740 | * from here on will result in the transaction cancel |
2787 | * unlocking dp so don't do it explicitly in the error path. | 2741 | * unlocking dp so don't do it explicitly in the error path. |
2788 | */ | 2742 | */ |
2789 | VN_HOLD(dir_vp); | 2743 | IHOLD(dp); |
2790 | xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); | 2744 | xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); |
2791 | unlock_dp_on_error = B_FALSE; | 2745 | unlock_dp_on_error = B_FALSE; |
2792 | 2746 | ||
2793 | XFS_BMAP_INIT(&free_list, &first_block); | 2747 | XFS_BMAP_INIT(&free_list, &first_block); |
2794 | 2748 | ||
2795 | error = xfs_dir_createname(tp, dp, dir_name, dir_namelen, cdp->i_ino, | 2749 | error = xfs_dir_createname(tp, dp, dir_name, cdp->i_ino, |
2796 | &first_block, &free_list, resblks ? | 2750 | &first_block, &free_list, resblks ? |
2797 | resblks - XFS_IALLOC_SPACE_RES(mp) : 0); | 2751 | resblks - XFS_IALLOC_SPACE_RES(mp) : 0); |
2798 | if (error) { | 2752 | if (error) { |
2799 | ASSERT(error != ENOSPC); | 2753 | ASSERT(error != ENOSPC); |
2800 | goto error1; | 2754 | goto error1; |
@@ -2817,11 +2771,9 @@ xfs_mkdir( | |||
2817 | if (error) | 2771 | if (error) |
2818 | goto error2; | 2772 | goto error2; |
2819 | 2773 | ||
2820 | cvp = XFS_ITOV(cdp); | ||
2821 | |||
2822 | created = B_TRUE; | 2774 | created = B_TRUE; |
2823 | 2775 | ||
2824 | *vpp = cvp; | 2776 | *ipp = cdp; |
2825 | IHOLD(cdp); | 2777 | IHOLD(cdp); |
2826 | 2778 | ||
2827 | /* | 2779 | /* |
@@ -2858,10 +2810,10 @@ std_return: | |||
2858 | if ((created || (error != 0 && dm_event_sent != 0)) && | 2810 | if ((created || (error != 0 && dm_event_sent != 0)) && |
2859 | DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) { | 2811 | DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) { |
2860 | (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, | 2812 | (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, |
2861 | dir_vp, DM_RIGHT_NULL, | 2813 | dp, DM_RIGHT_NULL, |
2862 | created ? XFS_ITOV(cdp):NULL, | 2814 | created ? cdp : NULL, |
2863 | DM_RIGHT_NULL, | 2815 | DM_RIGHT_NULL, |
2864 | dir_name, NULL, | 2816 | dir_name->name, NULL, |
2865 | mode, error, 0); | 2817 | mode, error, 0); |
2866 | } | 2818 | } |
2867 | return error; | 2819 | return error; |
@@ -2885,20 +2837,17 @@ std_return: | |||
2885 | int | 2837 | int |
2886 | xfs_rmdir( | 2838 | xfs_rmdir( |
2887 | xfs_inode_t *dp, | 2839 | xfs_inode_t *dp, |
2888 | bhv_vname_t *dentry) | 2840 | struct xfs_name *name, |
2841 | xfs_inode_t *cdp) | ||
2889 | { | 2842 | { |
2890 | bhv_vnode_t *dir_vp = XFS_ITOV(dp); | 2843 | bhv_vnode_t *dir_vp = XFS_ITOV(dp); |
2891 | char *name = VNAME(dentry); | ||
2892 | int namelen = VNAMELEN(dentry); | ||
2893 | xfs_mount_t *mp = dp->i_mount; | 2844 | xfs_mount_t *mp = dp->i_mount; |
2894 | xfs_inode_t *cdp; /* child directory */ | ||
2895 | xfs_trans_t *tp; | 2845 | xfs_trans_t *tp; |
2896 | int error; | 2846 | int error; |
2897 | xfs_bmap_free_t free_list; | 2847 | xfs_bmap_free_t free_list; |
2898 | xfs_fsblock_t first_block; | 2848 | xfs_fsblock_t first_block; |
2899 | int cancel_flags; | 2849 | int cancel_flags; |
2900 | int committed; | 2850 | int committed; |
2901 | int dm_di_mode = S_IFDIR; | ||
2902 | int last_cdp_link; | 2851 | int last_cdp_link; |
2903 | uint resblks; | 2852 | uint resblks; |
2904 | 2853 | ||
@@ -2907,24 +2856,15 @@ xfs_rmdir( | |||
2907 | if (XFS_FORCED_SHUTDOWN(mp)) | 2856 | if (XFS_FORCED_SHUTDOWN(mp)) |
2908 | return XFS_ERROR(EIO); | 2857 | return XFS_ERROR(EIO); |
2909 | 2858 | ||
2910 | if (!xfs_get_dir_entry(dentry, &cdp)) { | ||
2911 | dm_di_mode = cdp->i_d.di_mode; | ||
2912 | IRELE(cdp); | ||
2913 | } | ||
2914 | |||
2915 | if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) { | 2859 | if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) { |
2916 | error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, | 2860 | error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, |
2917 | dir_vp, DM_RIGHT_NULL, | 2861 | dp, DM_RIGHT_NULL, |
2918 | NULL, DM_RIGHT_NULL, | 2862 | NULL, DM_RIGHT_NULL, name->name, |
2919 | name, NULL, dm_di_mode, 0, 0); | 2863 | NULL, cdp->i_d.di_mode, 0, 0); |
2920 | if (error) | 2864 | if (error) |
2921 | return XFS_ERROR(error); | 2865 | return XFS_ERROR(error); |
2922 | } | 2866 | } |
2923 | 2867 | ||
2924 | /* Return through std_return after this point. */ | ||
2925 | |||
2926 | cdp = NULL; | ||
2927 | |||
2928 | /* | 2868 | /* |
2929 | * We need to get a reference to cdp before we get our log | 2869 | * We need to get a reference to cdp before we get our log |
2930 | * reservation. The reason for this is that we cannot call | 2870 | * reservation. The reason for this is that we cannot call |
@@ -2937,13 +2877,7 @@ xfs_rmdir( | |||
2937 | * when we call xfs_iget. Instead we get an unlocked reference | 2877 | * when we call xfs_iget. Instead we get an unlocked reference |
2938 | * to the inode before getting our log reservation. | 2878 | * to the inode before getting our log reservation. |
2939 | */ | 2879 | */ |
2940 | error = xfs_get_dir_entry(dentry, &cdp); | 2880 | IHOLD(cdp); |
2941 | if (error) { | ||
2942 | REMOVE_DEBUG_TRACE(__LINE__); | ||
2943 | goto std_return; | ||
2944 | } | ||
2945 | mp = dp->i_mount; | ||
2946 | dm_di_mode = cdp->i_d.di_mode; | ||
2947 | 2881 | ||
2948 | /* | 2882 | /* |
2949 | * Get the dquots for the inodes. | 2883 | * Get the dquots for the inodes. |
@@ -3020,7 +2954,7 @@ xfs_rmdir( | |||
3020 | goto error_return; | 2954 | goto error_return; |
3021 | } | 2955 | } |
3022 | 2956 | ||
3023 | error = xfs_dir_removename(tp, dp, name, namelen, cdp->i_ino, | 2957 | error = xfs_dir_removename(tp, dp, name, cdp->i_ino, |
3024 | &first_block, &free_list, resblks); | 2958 | &first_block, &free_list, resblks); |
3025 | if (error) | 2959 | if (error) |
3026 | goto error1; | 2960 | goto error1; |
@@ -3098,9 +3032,9 @@ xfs_rmdir( | |||
3098 | std_return: | 3032 | std_return: |
3099 | if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) { | 3033 | if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) { |
3100 | (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, | 3034 | (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, |
3101 | dir_vp, DM_RIGHT_NULL, | 3035 | dp, DM_RIGHT_NULL, |
3102 | NULL, DM_RIGHT_NULL, | 3036 | NULL, DM_RIGHT_NULL, |
3103 | name, NULL, dm_di_mode, | 3037 | name->name, NULL, cdp->i_d.di_mode, |
3104 | error, 0); | 3038 | error, 0); |
3105 | } | 3039 | } |
3106 | return error; | 3040 | return error; |
@@ -3118,13 +3052,12 @@ xfs_rmdir( | |||
3118 | int | 3052 | int |
3119 | xfs_symlink( | 3053 | xfs_symlink( |
3120 | xfs_inode_t *dp, | 3054 | xfs_inode_t *dp, |
3121 | bhv_vname_t *dentry, | 3055 | struct xfs_name *link_name, |
3122 | char *target_path, | 3056 | const char *target_path, |
3123 | mode_t mode, | 3057 | mode_t mode, |
3124 | bhv_vnode_t **vpp, | 3058 | xfs_inode_t **ipp, |
3125 | cred_t *credp) | 3059 | cred_t *credp) |
3126 | { | 3060 | { |
3127 | bhv_vnode_t *dir_vp = XFS_ITOV(dp); | ||
3128 | xfs_mount_t *mp = dp->i_mount; | 3061 | xfs_mount_t *mp = dp->i_mount; |
3129 | xfs_trans_t *tp; | 3062 | xfs_trans_t *tp; |
3130 | xfs_inode_t *ip; | 3063 | xfs_inode_t *ip; |
@@ -3140,17 +3073,15 @@ xfs_symlink( | |||
3140 | int nmaps; | 3073 | int nmaps; |
3141 | xfs_bmbt_irec_t mval[SYMLINK_MAPS]; | 3074 | xfs_bmbt_irec_t mval[SYMLINK_MAPS]; |
3142 | xfs_daddr_t d; | 3075 | xfs_daddr_t d; |
3143 | char *cur_chunk; | 3076 | const char *cur_chunk; |
3144 | int byte_cnt; | 3077 | int byte_cnt; |
3145 | int n; | 3078 | int n; |
3146 | xfs_buf_t *bp; | 3079 | xfs_buf_t *bp; |
3147 | xfs_prid_t prid; | 3080 | xfs_prid_t prid; |
3148 | struct xfs_dquot *udqp, *gdqp; | 3081 | struct xfs_dquot *udqp, *gdqp; |
3149 | uint resblks; | 3082 | uint resblks; |
3150 | char *link_name = VNAME(dentry); | ||
3151 | int link_namelen; | ||
3152 | 3083 | ||
3153 | *vpp = NULL; | 3084 | *ipp = NULL; |
3154 | error = 0; | 3085 | error = 0; |
3155 | ip = NULL; | 3086 | ip = NULL; |
3156 | tp = NULL; | 3087 | tp = NULL; |
@@ -3160,44 +3091,17 @@ xfs_symlink( | |||
3160 | if (XFS_FORCED_SHUTDOWN(mp)) | 3091 | if (XFS_FORCED_SHUTDOWN(mp)) |
3161 | return XFS_ERROR(EIO); | 3092 | return XFS_ERROR(EIO); |
3162 | 3093 | ||
3163 | link_namelen = VNAMELEN(dentry); | ||
3164 | |||
3165 | /* | 3094 | /* |
3166 | * Check component lengths of the target path name. | 3095 | * Check component lengths of the target path name. |
3167 | */ | 3096 | */ |
3168 | pathlen = strlen(target_path); | 3097 | pathlen = strlen(target_path); |
3169 | if (pathlen >= MAXPATHLEN) /* total string too long */ | 3098 | if (pathlen >= MAXPATHLEN) /* total string too long */ |
3170 | return XFS_ERROR(ENAMETOOLONG); | 3099 | return XFS_ERROR(ENAMETOOLONG); |
3171 | if (pathlen >= MAXNAMELEN) { /* is any component too long? */ | ||
3172 | int len, total; | ||
3173 | char *path; | ||
3174 | |||
3175 | for (total = 0, path = target_path; total < pathlen;) { | ||
3176 | /* | ||
3177 | * Skip any slashes. | ||
3178 | */ | ||
3179 | while(*path == '/') { | ||
3180 | total++; | ||
3181 | path++; | ||
3182 | } | ||
3183 | |||
3184 | /* | ||
3185 | * Count up to the next slash or end of path. | ||
3186 | * Error out if the component is bigger than MAXNAMELEN. | ||
3187 | */ | ||
3188 | for(len = 0; *path != '/' && total < pathlen;total++, path++) { | ||
3189 | if (++len >= MAXNAMELEN) { | ||
3190 | error = ENAMETOOLONG; | ||
3191 | return error; | ||
3192 | } | ||
3193 | } | ||
3194 | } | ||
3195 | } | ||
3196 | 3100 | ||
3197 | if (DM_EVENT_ENABLED(dp, DM_EVENT_SYMLINK)) { | 3101 | if (DM_EVENT_ENABLED(dp, DM_EVENT_SYMLINK)) { |
3198 | error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dir_vp, | 3102 | error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dp, |
3199 | DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, | 3103 | DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, |
3200 | link_name, target_path, 0, 0, 0); | 3104 | link_name->name, target_path, 0, 0, 0); |
3201 | if (error) | 3105 | if (error) |
3202 | return error; | 3106 | return error; |
3203 | } | 3107 | } |
@@ -3229,7 +3133,7 @@ xfs_symlink( | |||
3229 | fs_blocks = 0; | 3133 | fs_blocks = 0; |
3230 | else | 3134 | else |
3231 | fs_blocks = XFS_B_TO_FSB(mp, pathlen); | 3135 | fs_blocks = XFS_B_TO_FSB(mp, pathlen); |
3232 | resblks = XFS_SYMLINK_SPACE_RES(mp, link_namelen, fs_blocks); | 3136 | resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks); |
3233 | error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0, | 3137 | error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0, |
3234 | XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT); | 3138 | XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT); |
3235 | if (error == ENOSPC && fs_blocks == 0) { | 3139 | if (error == ENOSPC && fs_blocks == 0) { |
@@ -3263,8 +3167,8 @@ xfs_symlink( | |||
3263 | /* | 3167 | /* |
3264 | * Check for ability to enter directory entry, if no space reserved. | 3168 | * Check for ability to enter directory entry, if no space reserved. |
3265 | */ | 3169 | */ |
3266 | if (resblks == 0 && | 3170 | error = xfs_dir_canenter(tp, dp, link_name, resblks); |
3267 | (error = xfs_dir_canenter(tp, dp, link_name, link_namelen))) | 3171 | if (error) |
3268 | goto error_return; | 3172 | goto error_return; |
3269 | /* | 3173 | /* |
3270 | * Initialize the bmap freelist prior to calling either | 3174 | * Initialize the bmap freelist prior to calling either |
@@ -3289,7 +3193,7 @@ xfs_symlink( | |||
3289 | * transaction cancel unlocking dp so don't do it explicitly in the | 3193 | * transaction cancel unlocking dp so don't do it explicitly in the |
3290 | * error path. | 3194 | * error path. |
3291 | */ | 3195 | */ |
3292 | VN_HOLD(dir_vp); | 3196 | IHOLD(dp); |
3293 | xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); | 3197 | xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); |
3294 | unlock_dp_on_error = B_FALSE; | 3198 | unlock_dp_on_error = B_FALSE; |
3295 | 3199 | ||
@@ -3356,8 +3260,8 @@ xfs_symlink( | |||
3356 | /* | 3260 | /* |
3357 | * Create the directory entry for the symlink. | 3261 | * Create the directory entry for the symlink. |
3358 | */ | 3262 | */ |
3359 | error = xfs_dir_createname(tp, dp, link_name, link_namelen, ip->i_ino, | 3263 | error = xfs_dir_createname(tp, dp, link_name, ip->i_ino, |
3360 | &first_block, &free_list, resblks); | 3264 | &first_block, &free_list, resblks); |
3361 | if (error) | 3265 | if (error) |
3362 | goto error1; | 3266 | goto error1; |
3363 | xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 3267 | xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
@@ -3399,19 +3303,14 @@ xfs_symlink( | |||
3399 | std_return: | 3303 | std_return: |
3400 | if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTSYMLINK)) { | 3304 | if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTSYMLINK)) { |
3401 | (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTSYMLINK, | 3305 | (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTSYMLINK, |
3402 | dir_vp, DM_RIGHT_NULL, | 3306 | dp, DM_RIGHT_NULL, |
3403 | error ? NULL : XFS_ITOV(ip), | 3307 | error ? NULL : ip, |
3404 | DM_RIGHT_NULL, link_name, target_path, | 3308 | DM_RIGHT_NULL, link_name->name, |
3405 | 0, error, 0); | 3309 | target_path, 0, error, 0); |
3406 | } | 3310 | } |
3407 | 3311 | ||
3408 | if (!error) { | 3312 | if (!error) |
3409 | bhv_vnode_t *vp; | 3313 | *ipp = ip; |
3410 | |||
3411 | ASSERT(ip); | ||
3412 | vp = XFS_ITOV(ip); | ||
3413 | *vpp = vp; | ||
3414 | } | ||
3415 | return error; | 3314 | return error; |
3416 | 3315 | ||
3417 | error2: | 3316 | error2: |
@@ -3431,60 +3330,11 @@ std_return: | |||
3431 | } | 3330 | } |
3432 | 3331 | ||
3433 | int | 3332 | int |
3434 | xfs_rwlock( | ||
3435 | xfs_inode_t *ip, | ||
3436 | bhv_vrwlock_t locktype) | ||
3437 | { | ||
3438 | if (S_ISDIR(ip->i_d.di_mode)) | ||
3439 | return 1; | ||
3440 | if (locktype == VRWLOCK_WRITE) { | ||
3441 | xfs_ilock(ip, XFS_IOLOCK_EXCL); | ||
3442 | } else if (locktype == VRWLOCK_TRY_READ) { | ||
3443 | return xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED); | ||
3444 | } else if (locktype == VRWLOCK_TRY_WRITE) { | ||
3445 | return xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL); | ||
3446 | } else { | ||
3447 | ASSERT((locktype == VRWLOCK_READ) || | ||
3448 | (locktype == VRWLOCK_WRITE_DIRECT)); | ||
3449 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | ||
3450 | } | ||
3451 | |||
3452 | return 1; | ||
3453 | } | ||
3454 | |||
3455 | |||
3456 | void | ||
3457 | xfs_rwunlock( | ||
3458 | xfs_inode_t *ip, | ||
3459 | bhv_vrwlock_t locktype) | ||
3460 | { | ||
3461 | if (S_ISDIR(ip->i_d.di_mode)) | ||
3462 | return; | ||
3463 | if (locktype == VRWLOCK_WRITE) { | ||
3464 | /* | ||
3465 | * In the write case, we may have added a new entry to | ||
3466 | * the reference cache. This might store a pointer to | ||
3467 | * an inode to be released in this inode. If it is there, | ||
3468 | * clear the pointer and release the inode after unlocking | ||
3469 | * this one. | ||
3470 | */ | ||
3471 | xfs_refcache_iunlock(ip, XFS_IOLOCK_EXCL); | ||
3472 | } else { | ||
3473 | ASSERT((locktype == VRWLOCK_READ) || | ||
3474 | (locktype == VRWLOCK_WRITE_DIRECT)); | ||
3475 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | ||
3476 | } | ||
3477 | return; | ||
3478 | } | ||
3479 | |||
3480 | |||
3481 | int | ||
3482 | xfs_inode_flush( | 3333 | xfs_inode_flush( |
3483 | xfs_inode_t *ip, | 3334 | xfs_inode_t *ip, |
3484 | int flags) | 3335 | int flags) |
3485 | { | 3336 | { |
3486 | xfs_mount_t *mp = ip->i_mount; | 3337 | xfs_mount_t *mp = ip->i_mount; |
3487 | xfs_inode_log_item_t *iip = ip->i_itemp; | ||
3488 | int error = 0; | 3338 | int error = 0; |
3489 | 3339 | ||
3490 | if (XFS_FORCED_SHUTDOWN(mp)) | 3340 | if (XFS_FORCED_SHUTDOWN(mp)) |
@@ -3494,33 +3344,9 @@ xfs_inode_flush( | |||
3494 | * Bypass inodes which have already been cleaned by | 3344 | * Bypass inodes which have already been cleaned by |
3495 | * the inode flush clustering code inside xfs_iflush | 3345 | * the inode flush clustering code inside xfs_iflush |
3496 | */ | 3346 | */ |
3497 | if ((ip->i_update_core == 0) && | 3347 | if (xfs_inode_clean(ip)) |
3498 | ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) | ||
3499 | return 0; | 3348 | return 0; |
3500 | 3349 | ||
3501 | if (flags & FLUSH_LOG) { | ||
3502 | if (iip && iip->ili_last_lsn) { | ||
3503 | xlog_t *log = mp->m_log; | ||
3504 | xfs_lsn_t sync_lsn; | ||
3505 | int log_flags = XFS_LOG_FORCE; | ||
3506 | |||
3507 | spin_lock(&log->l_grant_lock); | ||
3508 | sync_lsn = log->l_last_sync_lsn; | ||
3509 | spin_unlock(&log->l_grant_lock); | ||
3510 | |||
3511 | if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) > 0)) { | ||
3512 | if (flags & FLUSH_SYNC) | ||
3513 | log_flags |= XFS_LOG_SYNC; | ||
3514 | error = xfs_log_force(mp, iip->ili_last_lsn, log_flags); | ||
3515 | if (error) | ||
3516 | return error; | ||
3517 | } | ||
3518 | |||
3519 | if (ip->i_update_core == 0) | ||
3520 | return 0; | ||
3521 | } | ||
3522 | } | ||
3523 | |||
3524 | /* | 3350 | /* |
3525 | * We make this non-blocking if the inode is contended, | 3351 | * We make this non-blocking if the inode is contended, |
3526 | * return EAGAIN to indicate to the caller that they | 3352 | * return EAGAIN to indicate to the caller that they |
@@ -3528,30 +3354,22 @@ xfs_inode_flush( | |||
3528 | * blocking on inodes inside another operation right | 3354 | * blocking on inodes inside another operation right |
3529 | * now, they get caught later by xfs_sync. | 3355 | * now, they get caught later by xfs_sync. |
3530 | */ | 3356 | */ |
3531 | if (flags & FLUSH_INODE) { | 3357 | if (flags & FLUSH_SYNC) { |
3532 | int flush_flags; | 3358 | xfs_ilock(ip, XFS_ILOCK_SHARED); |
3533 | 3359 | xfs_iflock(ip); | |
3534 | if (flags & FLUSH_SYNC) { | 3360 | } else if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { |
3535 | xfs_ilock(ip, XFS_ILOCK_SHARED); | 3361 | if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) { |
3536 | xfs_iflock(ip); | 3362 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
3537 | } else if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { | ||
3538 | if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) { | ||
3539 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
3540 | return EAGAIN; | ||
3541 | } | ||
3542 | } else { | ||
3543 | return EAGAIN; | 3363 | return EAGAIN; |
3544 | } | 3364 | } |
3545 | 3365 | } else { | |
3546 | if (flags & FLUSH_SYNC) | 3366 | return EAGAIN; |
3547 | flush_flags = XFS_IFLUSH_SYNC; | ||
3548 | else | ||
3549 | flush_flags = XFS_IFLUSH_ASYNC; | ||
3550 | |||
3551 | error = xfs_iflush(ip, flush_flags); | ||
3552 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
3553 | } | 3367 | } |
3554 | 3368 | ||
3369 | error = xfs_iflush(ip, (flags & FLUSH_SYNC) ? XFS_IFLUSH_SYNC | ||
3370 | : XFS_IFLUSH_ASYNC_NOBLOCK); | ||
3371 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
3372 | |||
3555 | return error; | 3373 | return error; |
3556 | } | 3374 | } |
3557 | 3375 | ||
@@ -3694,12 +3512,12 @@ xfs_finish_reclaim( | |||
3694 | * We get the flush lock regardless, though, just to make sure | 3512 | * We get the flush lock regardless, though, just to make sure |
3695 | * we don't free it while it is being flushed. | 3513 | * we don't free it while it is being flushed. |
3696 | */ | 3514 | */ |
3697 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | 3515 | if (!locked) { |
3698 | if (!locked) { | 3516 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
3699 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 3517 | xfs_iflock(ip); |
3700 | xfs_iflock(ip); | 3518 | } |
3701 | } | ||
3702 | 3519 | ||
3520 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | ||
3703 | if (ip->i_update_core || | 3521 | if (ip->i_update_core || |
3704 | ((ip->i_itemp != NULL) && | 3522 | ((ip->i_itemp != NULL) && |
3705 | (ip->i_itemp->ili_format.ilf_fields != 0))) { | 3523 | (ip->i_itemp->ili_format.ilf_fields != 0))) { |
@@ -3719,17 +3537,11 @@ xfs_finish_reclaim( | |||
3719 | ASSERT(ip->i_update_core == 0); | 3537 | ASSERT(ip->i_update_core == 0); |
3720 | ASSERT(ip->i_itemp == NULL || | 3538 | ASSERT(ip->i_itemp == NULL || |
3721 | ip->i_itemp->ili_format.ilf_fields == 0); | 3539 | ip->i_itemp->ili_format.ilf_fields == 0); |
3722 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
3723 | } else if (locked) { | ||
3724 | /* | ||
3725 | * We are not interested in doing an iflush if we're | ||
3726 | * in the process of shutting down the filesystem forcibly. | ||
3727 | * So, just reclaim the inode. | ||
3728 | */ | ||
3729 | xfs_ifunlock(ip); | ||
3730 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
3731 | } | 3540 | } |
3732 | 3541 | ||
3542 | xfs_ifunlock(ip); | ||
3543 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
3544 | |||
3733 | reclaim: | 3545 | reclaim: |
3734 | xfs_ireclaim(ip); | 3546 | xfs_ireclaim(ip); |
3735 | return 0; | 3547 | return 0; |
@@ -3845,9 +3657,8 @@ xfs_alloc_file_space( | |||
3845 | end_dmi_offset = offset+len; | 3657 | end_dmi_offset = offset+len; |
3846 | if (end_dmi_offset > ip->i_size) | 3658 | if (end_dmi_offset > ip->i_size) |
3847 | end_dmi_offset = ip->i_size; | 3659 | end_dmi_offset = ip->i_size; |
3848 | error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, XFS_ITOV(ip), | 3660 | error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, offset, |
3849 | offset, end_dmi_offset - offset, | 3661 | end_dmi_offset - offset, 0, NULL); |
3850 | 0, NULL); | ||
3851 | if (error) | 3662 | if (error) |
3852 | return error; | 3663 | return error; |
3853 | } | 3664 | } |
@@ -3956,8 +3767,8 @@ dmapi_enospc_check: | |||
3956 | if (error == ENOSPC && (attr_flags & ATTR_DMI) == 0 && | 3767 | if (error == ENOSPC && (attr_flags & ATTR_DMI) == 0 && |
3957 | DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE)) { | 3768 | DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE)) { |
3958 | error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE, | 3769 | error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE, |
3959 | XFS_ITOV(ip), DM_RIGHT_NULL, | 3770 | ip, DM_RIGHT_NULL, |
3960 | XFS_ITOV(ip), DM_RIGHT_NULL, | 3771 | ip, DM_RIGHT_NULL, |
3961 | NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ | 3772 | NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ |
3962 | if (error == 0) | 3773 | if (error == 0) |
3963 | goto retry; /* Maybe DMAPI app. has made space */ | 3774 | goto retry; /* Maybe DMAPI app. has made space */ |
@@ -4021,7 +3832,8 @@ xfs_zero_remaining_bytes( | |||
4021 | XFS_BUF_READ(bp); | 3832 | XFS_BUF_READ(bp); |
4022 | XFS_BUF_SET_ADDR(bp, XFS_FSB_TO_DB(ip, imap.br_startblock)); | 3833 | XFS_BUF_SET_ADDR(bp, XFS_FSB_TO_DB(ip, imap.br_startblock)); |
4023 | xfsbdstrat(mp, bp); | 3834 | xfsbdstrat(mp, bp); |
4024 | if ((error = xfs_iowait(bp))) { | 3835 | error = xfs_iowait(bp); |
3836 | if (error) { | ||
4025 | xfs_ioerror_alert("xfs_zero_remaining_bytes(read)", | 3837 | xfs_ioerror_alert("xfs_zero_remaining_bytes(read)", |
4026 | mp, bp, XFS_BUF_ADDR(bp)); | 3838 | mp, bp, XFS_BUF_ADDR(bp)); |
4027 | break; | 3839 | break; |
@@ -4033,7 +3845,8 @@ xfs_zero_remaining_bytes( | |||
4033 | XFS_BUF_UNREAD(bp); | 3845 | XFS_BUF_UNREAD(bp); |
4034 | XFS_BUF_WRITE(bp); | 3846 | XFS_BUF_WRITE(bp); |
4035 | xfsbdstrat(mp, bp); | 3847 | xfsbdstrat(mp, bp); |
4036 | if ((error = xfs_iowait(bp))) { | 3848 | error = xfs_iowait(bp); |
3849 | if (error) { | ||
4037 | xfs_ioerror_alert("xfs_zero_remaining_bytes(write)", | 3850 | xfs_ioerror_alert("xfs_zero_remaining_bytes(write)", |
4038 | mp, bp, XFS_BUF_ADDR(bp)); | 3851 | mp, bp, XFS_BUF_ADDR(bp)); |
4039 | break; | 3852 | break; |
@@ -4102,7 +3915,7 @@ xfs_free_file_space( | |||
4102 | DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) { | 3915 | DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) { |
4103 | if (end_dmi_offset > ip->i_size) | 3916 | if (end_dmi_offset > ip->i_size) |
4104 | end_dmi_offset = ip->i_size; | 3917 | end_dmi_offset = ip->i_size; |
4105 | error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, vp, | 3918 | error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, |
4106 | offset, end_dmi_offset - offset, | 3919 | offset, end_dmi_offset - offset, |
4107 | AT_DELAY_FLAG(attr_flags), NULL); | 3920 | AT_DELAY_FLAG(attr_flags), NULL); |
4108 | if (error) | 3921 | if (error) |
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index 4e3970f0e5e3..24c53923dc2c 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h | |||
@@ -23,31 +23,32 @@ int xfs_fsync(struct xfs_inode *ip, int flag, xfs_off_t start, | |||
23 | xfs_off_t stop); | 23 | xfs_off_t stop); |
24 | int xfs_release(struct xfs_inode *ip); | 24 | int xfs_release(struct xfs_inode *ip); |
25 | int xfs_inactive(struct xfs_inode *ip); | 25 | int xfs_inactive(struct xfs_inode *ip); |
26 | int xfs_lookup(struct xfs_inode *dp, bhv_vname_t *dentry, | 26 | int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, |
27 | bhv_vnode_t **vpp); | 27 | struct xfs_inode **ipp); |
28 | int xfs_create(struct xfs_inode *dp, bhv_vname_t *dentry, mode_t mode, | 28 | int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode, |
29 | xfs_dev_t rdev, bhv_vnode_t **vpp, struct cred *credp); | 29 | xfs_dev_t rdev, struct xfs_inode **ipp, struct cred *credp); |
30 | int xfs_remove(struct xfs_inode *dp, bhv_vname_t *dentry); | 30 | int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, |
31 | int xfs_link(struct xfs_inode *tdp, bhv_vnode_t *src_vp, | 31 | struct xfs_inode *ip); |
32 | bhv_vname_t *dentry); | 32 | int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, |
33 | int xfs_mkdir(struct xfs_inode *dp, bhv_vname_t *dentry, | 33 | struct xfs_name *target_name); |
34 | mode_t mode, bhv_vnode_t **vpp, struct cred *credp); | 34 | int xfs_mkdir(struct xfs_inode *dp, struct xfs_name *dir_name, |
35 | int xfs_rmdir(struct xfs_inode *dp, bhv_vname_t *dentry); | 35 | mode_t mode, struct xfs_inode **ipp, struct cred *credp); |
36 | int xfs_rmdir(struct xfs_inode *dp, struct xfs_name *name, | ||
37 | struct xfs_inode *cdp); | ||
36 | int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize, | 38 | int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize, |
37 | xfs_off_t *offset, filldir_t filldir); | 39 | xfs_off_t *offset, filldir_t filldir); |
38 | int xfs_symlink(struct xfs_inode *dp, bhv_vname_t *dentry, | 40 | int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, |
39 | char *target_path, mode_t mode, bhv_vnode_t **vpp, | 41 | const char *target_path, mode_t mode, struct xfs_inode **ipp, |
40 | struct cred *credp); | 42 | struct cred *credp); |
41 | int xfs_rwlock(struct xfs_inode *ip, bhv_vrwlock_t locktype); | ||
42 | void xfs_rwunlock(struct xfs_inode *ip, bhv_vrwlock_t locktype); | ||
43 | int xfs_inode_flush(struct xfs_inode *ip, int flags); | 43 | int xfs_inode_flush(struct xfs_inode *ip, int flags); |
44 | int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); | 44 | int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); |
45 | int xfs_reclaim(struct xfs_inode *ip); | 45 | int xfs_reclaim(struct xfs_inode *ip); |
46 | int xfs_change_file_space(struct xfs_inode *ip, int cmd, | 46 | int xfs_change_file_space(struct xfs_inode *ip, int cmd, |
47 | xfs_flock64_t *bf, xfs_off_t offset, | 47 | xfs_flock64_t *bf, xfs_off_t offset, |
48 | struct cred *credp, int attr_flags); | 48 | struct cred *credp, int attr_flags); |
49 | int xfs_rename(struct xfs_inode *src_dp, bhv_vname_t *src_vname, | 49 | int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name, |
50 | bhv_vnode_t *target_dir_vp, bhv_vname_t *target_vname); | 50 | struct xfs_inode *src_ip, struct xfs_inode *target_dp, |
51 | struct xfs_name *target_name); | ||
51 | int xfs_attr_get(struct xfs_inode *ip, const char *name, char *value, | 52 | int xfs_attr_get(struct xfs_inode *ip, const char *name, char *value, |
52 | int *valuelenp, int flags, cred_t *cred); | 53 | int *valuelenp, int flags, cred_t *cred); |
53 | int xfs_attr_set(struct xfs_inode *dp, const char *name, char *value, | 54 | int xfs_attr_set(struct xfs_inode *dp, const char *name, char *value, |