aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJeff Garzik <jgarzik@pobox.com>2005-09-08 05:39:55 -0400
committerJeff Garzik <jgarzik@pobox.com>2005-09-08 05:39:55 -0400
commitc324b44c34050cf2a9b58830e11c974806bd85d8 (patch)
tree3ac45a783221283925cd698334a8f5e7dd4c1df8 /fs
parent2fcf522509cceea524b6e7ece8fd6759b682175a (diff)
parentcaf39e87cc1182f7dae84eefc43ca14d54c78ef9 (diff)
Merge /spare/repo/linux-2.6/
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig64
-rw-r--r--fs/Makefile1
-rw-r--r--fs/aio.c4
-rw-r--r--fs/binfmt_flat.c2
-rw-r--r--fs/bio.c259
-rw-r--r--fs/buffer.c40
-rw-r--r--fs/cifs/connect.c82
-rw-r--r--fs/cifs/dir.c27
-rw-r--r--fs/compat.c116
-rw-r--r--fs/cramfs/inode.c43
-rw-r--r--fs/devpts/Makefile1
-rw-r--r--fs/devpts/inode.c21
-rw-r--r--fs/devpts/xattr_security.c47
-rw-r--r--fs/ext2/super.c59
-rw-r--r--fs/ext3/super.c92
-rw-r--r--fs/fat/dir.c28
-rw-r--r--fs/file_table.c1
-rw-r--r--fs/freevxfs/vxfs_super.c2
-rw-r--r--fs/hfs/bnode.c21
-rw-r--r--fs/hfs/catalog.c35
-rw-r--r--fs/hfs/dir.c11
-rw-r--r--fs/hfs/hfs.h1
-rw-r--r--fs/hfs/hfs_fs.h8
-rw-r--r--fs/hfs/inode.c2
-rw-r--r--fs/hfs/mdb.c6
-rw-r--r--fs/hfs/super.c68
-rw-r--r--fs/hfs/trans.c116
-rw-r--r--fs/hfsplus/bnode.c21
-rw-r--r--fs/hfsplus/hfsplus_fs.h5
-rw-r--r--fs/hfsplus/options.c26
-rw-r--r--fs/hfsplus/super.c11
-rw-r--r--fs/hostfs/hostfs.h1
-rw-r--r--fs/inode.c3
-rw-r--r--fs/inotify.c16
-rw-r--r--fs/jbd/checkpoint.c2
-rw-r--r--fs/jbd/commit.c38
-rw-r--r--fs/jbd/journal.c38
-rw-r--r--fs/jbd/revoke.c5
-rw-r--r--fs/jbd/transaction.c39
-rw-r--r--fs/jffs/inode-v23.c3
-rw-r--r--fs/jffs2/file.c3
-rw-r--r--fs/jfs/jfs_filsys.h3
-rw-r--r--fs/jfs/namei.c5
-rw-r--r--fs/jfs/super.c48
-rw-r--r--fs/namei.c52
-rw-r--r--fs/namespace.c6
-rw-r--r--fs/nfsd/export.c3
-rw-r--r--fs/nfsd/nfs4idmap.c8
-rw-r--r--fs/nfsd/nfs4recover.c5
-rw-r--r--fs/open.c19
-rw-r--r--fs/pipe.c13
-rw-r--r--fs/proc/base.c159
-rw-r--r--fs/proc/generic.c13
-rw-r--r--fs/proc/task_mmu.c357
-rw-r--r--fs/read_write.c2
-rw-r--r--fs/reiserfs/journal.c4
-rw-r--r--fs/relayfs/Makefile4
-rw-r--r--fs/relayfs/buffers.c189
-rw-r--r--fs/relayfs/buffers.h12
-rw-r--r--fs/relayfs/inode.c609
-rw-r--r--fs/relayfs/relay.c431
-rw-r--r--fs/relayfs/relay.h12
-rw-r--r--fs/ufs/balloc.c12
-rw-r--r--fs/ufs/ialloc.c6
-rw-r--r--fs/ufs/truncate.c9
-rw-r--r--fs/umsdos/notes17
-rw-r--r--fs/xattr.c82
-rw-r--r--fs/xfs/Makefile151
-rw-r--r--fs/xfs/Makefile-linux-2.6141
-rw-r--r--fs/xfs/linux-2.6/kmem.c23
-rw-r--r--fs/xfs/linux-2.6/kmem.h23
-rw-r--r--fs/xfs/linux-2.6/spin.h3
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c259
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.h50
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c117
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h12
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c90
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c18
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c65
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c15
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h13
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.h7
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c166
-rw-r--r--fs/xfs/linux-2.6/xfs_vfs.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_vfs.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.c251
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h60
-rw-r--r--fs/xfs/quota/Makefile1
-rw-r--r--fs/xfs/quota/Makefile-linux-2.653
-rw-r--r--fs/xfs/quota/xfs_dquot.c43
-rw-r--r--fs/xfs/quota/xfs_dquot.h16
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c1
-rw-r--r--fs/xfs/quota/xfs_qm.c26
-rw-r--r--fs/xfs/quota/xfs_qm.h2
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c44
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c16
-rw-r--r--fs/xfs/support/debug.c1
-rw-r--r--fs/xfs/xfs_acl.c6
-rw-r--r--fs/xfs/xfs_bmap.c12
-rw-r--r--fs/xfs/xfs_buf_item.c4
-rw-r--r--fs/xfs/xfs_dmapi.h2
-rw-r--r--fs/xfs/xfs_extfree_item.c2
-rw-r--r--fs/xfs/xfs_iget.c35
-rw-r--r--fs/xfs/xfs_inode.c3
-rw-r--r--fs/xfs/xfs_inode_item.c9
-rw-r--r--fs/xfs/xfs_iomap.c22
-rw-r--r--fs/xfs/xfs_log.c215
-rw-r--r--fs/xfs/xfs_log.h38
-rw-r--r--fs/xfs/xfs_log_priv.h68
-rw-r--r--fs/xfs/xfs_log_recover.c2
-rw-r--r--fs/xfs/xfs_qmops.c78
-rw-r--r--fs/xfs/xfs_quota.h17
-rw-r--r--fs/xfs/xfs_trans.c3
-rw-r--r--fs/xfs/xfs_trans.h2
-rw-r--r--fs/xfs/xfs_trans_ail.c2
-rw-r--r--fs/xfs/xfs_trans_buf.c23
-rw-r--r--fs/xfs/xfs_vfsops.c62
-rw-r--r--fs/xfs/xfs_vnodeops.c92
119 files changed, 4024 insertions, 1794 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index e54be7058359..5e817902cb3b 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -382,10 +382,8 @@ config QUOTA
382 usage (also called disk quotas). Currently, it works for the 382 usage (also called disk quotas). Currently, it works for the
383 ext2, ext3, and reiserfs file system. ext3 also supports journalled 383 ext2, ext3, and reiserfs file system. ext3 also supports journalled
384 quotas for which you don't need to run quotacheck(8) after an unclean 384 quotas for which you don't need to run quotacheck(8) after an unclean
385 shutdown. You need additional software in order to use quota support 385 shutdown.
386 (you can download sources from 386 For further details, read the Quota mini-HOWTO, available from
387 <http://www.sf.net/projects/linuxquota/>). For further details, read
388 the Quota mini-HOWTO, available from
389 <http://www.tldp.org/docs.html#howto>, or the documentation provided 387 <http://www.tldp.org/docs.html#howto>, or the documentation provided
390 with the quota tools. Probably the quota support is only useful for 388 with the quota tools. Probably the quota support is only useful for
391 multi user systems. If unsure, say N. 389 multi user systems. If unsure, say N.
@@ -403,8 +401,7 @@ config QFMT_V2
403 depends on QUOTA 401 depends on QUOTA
404 help 402 help
405 This quota format allows using quotas with 32-bit UIDs/GIDs. If you 403 This quota format allows using quotas with 32-bit UIDs/GIDs. If you
406 need this functionality say Y here. Note that you will need recent 404 need this functionality say Y here.
407 quota utilities (>= 3.01) for new quota format with this kernel.
408 405
409config QUOTACTL 406config QUOTACTL
410 bool 407 bool
@@ -783,28 +780,6 @@ config SYSFS
783 780
784 Designers of embedded systems may wish to say N here to conserve space. 781 Designers of embedded systems may wish to say N here to conserve space.
785 782
786config DEVPTS_FS_XATTR
787 bool "/dev/pts Extended Attributes"
788 depends on UNIX98_PTYS
789 help
790 Extended attributes are name:value pairs associated with inodes by
791 the kernel or by users (see the attr(5) manual page, or visit
792 <http://acl.bestbits.at/> for details).
793
794 If unsure, say N.
795
796config DEVPTS_FS_SECURITY
797 bool "/dev/pts Security Labels"
798 depends on DEVPTS_FS_XATTR
799 help
800 Security labels support alternative access control models
801 implemented by security modules like SELinux. This option
802 enables an extended attribute handler for file security
803 labels in the /dev/pts filesystem.
804
805 If you are not using a security module that requires using
806 extended attributes for file security labels, say N.
807
808config TMPFS 783config TMPFS
809 bool "Virtual memory file system support (former shm fs)" 784 bool "Virtual memory file system support (former shm fs)"
810 help 785 help
@@ -817,27 +792,6 @@ config TMPFS
817 792
818 See <file:Documentation/filesystems/tmpfs.txt> for details. 793 See <file:Documentation/filesystems/tmpfs.txt> for details.
819 794
820config TMPFS_XATTR
821 bool "tmpfs Extended Attributes"
822 depends on TMPFS
823 help
824 Extended attributes are name:value pairs associated with inodes by
825 the kernel or by users (see the attr(5) manual page, or visit
826 <http://acl.bestbits.at/> for details).
827
828 If unsure, say N.
829
830config TMPFS_SECURITY
831 bool "tmpfs Security Labels"
832 depends on TMPFS_XATTR
833 help
834 Security labels support alternative access control models
835 implemented by security modules like SELinux. This option
836 enables an extended attribute handler for file security
837 labels in the tmpfs filesystem.
838 If you are not using a security module that requires using
839 extended attributes for file security labels, say N.
840
841config HUGETLBFS 795config HUGETLBFS
842 bool "HugeTLB file system support" 796 bool "HugeTLB file system support"
843 depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || X86_64 || BROKEN 797 depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || X86_64 || BROKEN
@@ -859,6 +813,18 @@ config RAMFS
859 To compile this as a module, choose M here: the module will be called 813 To compile this as a module, choose M here: the module will be called
860 ramfs. 814 ramfs.
861 815
816config RELAYFS_FS
817 tristate "Relayfs file system support"
818 ---help---
819 Relayfs is a high-speed data relay filesystem designed to provide
820 an efficient mechanism for tools and facilities to relay large
821 amounts of data from kernel space to user space.
822
823 To compile this code as a module, choose M here: the module will be
824 called relayfs.
825
826 If unsure, say N.
827
862endmenu 828endmenu
863 829
864menu "Miscellaneous filesystems" 830menu "Miscellaneous filesystems"
diff --git a/fs/Makefile b/fs/Makefile
index cf95eb894fd5..15158309dee4 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -90,6 +90,7 @@ obj-$(CONFIG_AUTOFS_FS) += autofs/
90obj-$(CONFIG_AUTOFS4_FS) += autofs4/ 90obj-$(CONFIG_AUTOFS4_FS) += autofs4/
91obj-$(CONFIG_ADFS_FS) += adfs/ 91obj-$(CONFIG_ADFS_FS) += adfs/
92obj-$(CONFIG_UDF_FS) += udf/ 92obj-$(CONFIG_UDF_FS) += udf/
93obj-$(CONFIG_RELAYFS_FS) += relayfs/
93obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ 94obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/
94obj-$(CONFIG_JFS_FS) += jfs/ 95obj-$(CONFIG_JFS_FS) += jfs/
95obj-$(CONFIG_XFS_FS) += xfs/ 96obj-$(CONFIG_XFS_FS) += xfs/
diff --git a/fs/aio.c b/fs/aio.c
index 06d7d4390fe7..4f641abac3c0 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -567,6 +567,10 @@ static void use_mm(struct mm_struct *mm)
567 atomic_inc(&mm->mm_count); 567 atomic_inc(&mm->mm_count);
568 tsk->mm = mm; 568 tsk->mm = mm;
569 tsk->active_mm = mm; 569 tsk->active_mm = mm;
570 /*
571 * Note that on UML this *requires* PF_BORROWED_MM to be set, otherwise
572 * it won't work. Update it accordingly if you change it here
573 */
570 activate_mm(active_mm, mm); 574 activate_mm(active_mm, mm);
571 task_unlock(tsk); 575 task_unlock(tsk);
572 576
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index c8998dc66882..7974efa107bc 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -520,7 +520,7 @@ static int load_flat_file(struct linux_binprm * bprm,
520 DBG_FLT("BINFMT_FLAT: ROM mapping of file (we hope)\n"); 520 DBG_FLT("BINFMT_FLAT: ROM mapping of file (we hope)\n");
521 521
522 down_write(&current->mm->mmap_sem); 522 down_write(&current->mm->mmap_sem);
523 textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, MAP_SHARED, 0); 523 textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, MAP_PRIVATE, 0);
524 up_write(&current->mm->mmap_sem); 524 up_write(&current->mm->mmap_sem);
525 if (!textpos || textpos >= (unsigned long) -4096) { 525 if (!textpos || textpos >= (unsigned long) -4096) {
526 if (!textpos) 526 if (!textpos)
diff --git a/fs/bio.c b/fs/bio.c
index 1f2d4649b188..a7d4fd3a3299 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -25,6 +25,7 @@
25#include <linux/module.h> 25#include <linux/module.h>
26#include <linux/mempool.h> 26#include <linux/mempool.h>
27#include <linux/workqueue.h> 27#include <linux/workqueue.h>
28#include <scsi/sg.h> /* for struct sg_iovec */
28 29
29#define BIO_POOL_SIZE 256 30#define BIO_POOL_SIZE 256
30 31
@@ -104,18 +105,22 @@ static inline struct bio_vec *bvec_alloc_bs(unsigned int __nocast gfp_mask, int
104 return bvl; 105 return bvl;
105} 106}
106 107
107/* 108void bio_free(struct bio *bio, struct bio_set *bio_set)
108 * default destructor for a bio allocated with bio_alloc_bioset()
109 */
110static void bio_destructor(struct bio *bio)
111{ 109{
112 const int pool_idx = BIO_POOL_IDX(bio); 110 const int pool_idx = BIO_POOL_IDX(bio);
113 struct bio_set *bs = bio->bi_set;
114 111
115 BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS); 112 BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS);
116 113
117 mempool_free(bio->bi_io_vec, bs->bvec_pools[pool_idx]); 114 mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
118 mempool_free(bio, bs->bio_pool); 115 mempool_free(bio, bio_set->bio_pool);
116}
117
118/*
119 * default destructor for a bio allocated with bio_alloc_bioset()
120 */
121static void bio_fs_destructor(struct bio *bio)
122{
123 bio_free(bio, fs_bio_set);
119} 124}
120 125
121inline void bio_init(struct bio *bio) 126inline void bio_init(struct bio *bio)
@@ -171,8 +176,6 @@ struct bio *bio_alloc_bioset(unsigned int __nocast gfp_mask, int nr_iovecs, stru
171 bio->bi_max_vecs = bvec_slabs[idx].nr_vecs; 176 bio->bi_max_vecs = bvec_slabs[idx].nr_vecs;
172 } 177 }
173 bio->bi_io_vec = bvl; 178 bio->bi_io_vec = bvl;
174 bio->bi_destructor = bio_destructor;
175 bio->bi_set = bs;
176 } 179 }
177out: 180out:
178 return bio; 181 return bio;
@@ -180,7 +183,12 @@ out:
180 183
181struct bio *bio_alloc(unsigned int __nocast gfp_mask, int nr_iovecs) 184struct bio *bio_alloc(unsigned int __nocast gfp_mask, int nr_iovecs)
182{ 185{
183 return bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); 186 struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
187
188 if (bio)
189 bio->bi_destructor = bio_fs_destructor;
190
191 return bio;
184} 192}
185 193
186void zero_fill_bio(struct bio *bio) 194void zero_fill_bio(struct bio *bio)
@@ -273,8 +281,10 @@ struct bio *bio_clone(struct bio *bio, unsigned int __nocast gfp_mask)
273{ 281{
274 struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set); 282 struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set);
275 283
276 if (b) 284 if (b) {
285 b->bi_destructor = bio_fs_destructor;
277 __bio_clone(b, bio); 286 __bio_clone(b, bio);
287 }
278 288
279 return b; 289 return b;
280} 290}
@@ -546,22 +556,34 @@ out_bmd:
546 return ERR_PTR(ret); 556 return ERR_PTR(ret);
547} 557}
548 558
549static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev, 559static struct bio *__bio_map_user_iov(request_queue_t *q,
550 unsigned long uaddr, unsigned int len, 560 struct block_device *bdev,
551 int write_to_vm) 561 struct sg_iovec *iov, int iov_count,
562 int write_to_vm)
552{ 563{
553 unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 564 int i, j;
554 unsigned long start = uaddr >> PAGE_SHIFT; 565 int nr_pages = 0;
555 const int nr_pages = end - start;
556 int ret, offset, i;
557 struct page **pages; 566 struct page **pages;
558 struct bio *bio; 567 struct bio *bio;
568 int cur_page = 0;
569 int ret, offset;
559 570
560 /* 571 for (i = 0; i < iov_count; i++) {
561 * transfer and buffer must be aligned to at least hardsector 572 unsigned long uaddr = (unsigned long)iov[i].iov_base;
562 * size for now, in the future we can relax this restriction 573 unsigned long len = iov[i].iov_len;
563 */ 574 unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
564 if ((uaddr & queue_dma_alignment(q)) || (len & queue_dma_alignment(q))) 575 unsigned long start = uaddr >> PAGE_SHIFT;
576
577 nr_pages += end - start;
578 /*
579 * transfer and buffer must be aligned to at least hardsector
580 * size for now, in the future we can relax this restriction
581 */
582 if ((uaddr & queue_dma_alignment(q)) || (len & queue_dma_alignment(q)))
583 return ERR_PTR(-EINVAL);
584 }
585
586 if (!nr_pages)
565 return ERR_PTR(-EINVAL); 587 return ERR_PTR(-EINVAL);
566 588
567 bio = bio_alloc(GFP_KERNEL, nr_pages); 589 bio = bio_alloc(GFP_KERNEL, nr_pages);
@@ -573,42 +595,54 @@ static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev,
573 if (!pages) 595 if (!pages)
574 goto out; 596 goto out;
575 597
576 down_read(&current->mm->mmap_sem); 598 memset(pages, 0, nr_pages * sizeof(struct page *));
577 ret = get_user_pages(current, current->mm, uaddr, nr_pages, 599
578 write_to_vm, 0, pages, NULL); 600 for (i = 0; i < iov_count; i++) {
579 up_read(&current->mm->mmap_sem); 601 unsigned long uaddr = (unsigned long)iov[i].iov_base;
580 602 unsigned long len = iov[i].iov_len;
581 if (ret < nr_pages) 603 unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
582 goto out; 604 unsigned long start = uaddr >> PAGE_SHIFT;
583 605 const int local_nr_pages = end - start;
584 bio->bi_bdev = bdev; 606 const int page_limit = cur_page + local_nr_pages;
585 607
586 offset = uaddr & ~PAGE_MASK; 608 down_read(&current->mm->mmap_sem);
587 for (i = 0; i < nr_pages; i++) { 609 ret = get_user_pages(current, current->mm, uaddr,
588 unsigned int bytes = PAGE_SIZE - offset; 610 local_nr_pages,
589 611 write_to_vm, 0, &pages[cur_page], NULL);
590 if (len <= 0) 612 up_read(&current->mm->mmap_sem);
591 break; 613
592 614 if (ret < local_nr_pages)
593 if (bytes > len) 615 goto out_unmap;
594 bytes = len; 616
617
618 offset = uaddr & ~PAGE_MASK;
619 for (j = cur_page; j < page_limit; j++) {
620 unsigned int bytes = PAGE_SIZE - offset;
621
622 if (len <= 0)
623 break;
624
625 if (bytes > len)
626 bytes = len;
627
628 /*
629 * sorry...
630 */
631 if (__bio_add_page(q, bio, pages[j], bytes, offset) < bytes)
632 break;
633
634 len -= bytes;
635 offset = 0;
636 }
595 637
638 cur_page = j;
596 /* 639 /*
597 * sorry... 640 * release the pages we didn't map into the bio, if any
598 */ 641 */
599 if (__bio_add_page(q, bio, pages[i], bytes, offset) < bytes) 642 while (j < page_limit)
600 break; 643 page_cache_release(pages[j++]);
601
602 len -= bytes;
603 offset = 0;
604 } 644 }
605 645
606 /*
607 * release the pages we didn't map into the bio, if any
608 */
609 while (i < nr_pages)
610 page_cache_release(pages[i++]);
611
612 kfree(pages); 646 kfree(pages);
613 647
614 /* 648 /*
@@ -617,9 +651,17 @@ static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev,
617 if (!write_to_vm) 651 if (!write_to_vm)
618 bio->bi_rw |= (1 << BIO_RW); 652 bio->bi_rw |= (1 << BIO_RW);
619 653
654 bio->bi_bdev = bdev;
620 bio->bi_flags |= (1 << BIO_USER_MAPPED); 655 bio->bi_flags |= (1 << BIO_USER_MAPPED);
621 return bio; 656 return bio;
622out: 657
658 out_unmap:
659 for (i = 0; i < nr_pages; i++) {
660 if(!pages[i])
661 break;
662 page_cache_release(pages[i]);
663 }
664 out:
623 kfree(pages); 665 kfree(pages);
624 bio_put(bio); 666 bio_put(bio);
625 return ERR_PTR(ret); 667 return ERR_PTR(ret);
@@ -639,9 +681,33 @@ out:
639struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev, 681struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev,
640 unsigned long uaddr, unsigned int len, int write_to_vm) 682 unsigned long uaddr, unsigned int len, int write_to_vm)
641{ 683{
684 struct sg_iovec iov;
685
686 iov.iov_base = (__user void *)uaddr;
687 iov.iov_len = len;
688
689 return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm);
690}
691
692/**
693 * bio_map_user_iov - map user sg_iovec table into bio
694 * @q: the request_queue_t for the bio
695 * @bdev: destination block device
696 * @iov: the iovec.
697 * @iov_count: number of elements in the iovec
698 * @write_to_vm: bool indicating writing to pages or not
699 *
700 * Map the user space address into a bio suitable for io to a block
701 * device. Returns an error pointer in case of error.
702 */
703struct bio *bio_map_user_iov(request_queue_t *q, struct block_device *bdev,
704 struct sg_iovec *iov, int iov_count,
705 int write_to_vm)
706{
642 struct bio *bio; 707 struct bio *bio;
708 int len = 0, i;
643 709
644 bio = __bio_map_user(q, bdev, uaddr, len, write_to_vm); 710 bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm);
645 711
646 if (IS_ERR(bio)) 712 if (IS_ERR(bio))
647 return bio; 713 return bio;
@@ -654,6 +720,9 @@ struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev,
654 */ 720 */
655 bio_get(bio); 721 bio_get(bio);
656 722
723 for (i = 0; i < iov_count; i++)
724 len += iov[i].iov_len;
725
657 if (bio->bi_size == len) 726 if (bio->bi_size == len)
658 return bio; 727 return bio;
659 728
@@ -698,6 +767,82 @@ void bio_unmap_user(struct bio *bio)
698 bio_put(bio); 767 bio_put(bio);
699} 768}
700 769
770static int bio_map_kern_endio(struct bio *bio, unsigned int bytes_done, int err)
771{
772 if (bio->bi_size)
773 return 1;
774
775 bio_put(bio);
776 return 0;
777}
778
779
780static struct bio *__bio_map_kern(request_queue_t *q, void *data,
781 unsigned int len, unsigned int gfp_mask)
782{
783 unsigned long kaddr = (unsigned long)data;
784 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
785 unsigned long start = kaddr >> PAGE_SHIFT;
786 const int nr_pages = end - start;
787 int offset, i;
788 struct bio *bio;
789
790 bio = bio_alloc(gfp_mask, nr_pages);
791 if (!bio)
792 return ERR_PTR(-ENOMEM);
793
794 offset = offset_in_page(kaddr);
795 for (i = 0; i < nr_pages; i++) {
796 unsigned int bytes = PAGE_SIZE - offset;
797
798 if (len <= 0)
799 break;
800
801 if (bytes > len)
802 bytes = len;
803
804 if (__bio_add_page(q, bio, virt_to_page(data), bytes,
805 offset) < bytes)
806 break;
807
808 data += bytes;
809 len -= bytes;
810 offset = 0;
811 }
812
813 bio->bi_end_io = bio_map_kern_endio;
814 return bio;
815}
816
817/**
818 * bio_map_kern - map kernel address into bio
819 * @q: the request_queue_t for the bio
820 * @data: pointer to buffer to map
821 * @len: length in bytes
822 * @gfp_mask: allocation flags for bio allocation
823 *
824 * Map the kernel address into a bio suitable for io to a block
825 * device. Returns an error pointer in case of error.
826 */
827struct bio *bio_map_kern(request_queue_t *q, void *data, unsigned int len,
828 unsigned int gfp_mask)
829{
830 struct bio *bio;
831
832 bio = __bio_map_kern(q, data, len, gfp_mask);
833 if (IS_ERR(bio))
834 return bio;
835
836 if (bio->bi_size == len)
837 return bio;
838
839 /*
840 * Don't support partial mappings.
841 */
842 bio_put(bio);
843 return ERR_PTR(-EINVAL);
844}
845
701/* 846/*
702 * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions 847 * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
703 * for performing direct-IO in BIOs. 848 * for performing direct-IO in BIOs.
@@ -1075,6 +1220,7 @@ subsys_initcall(init_bio);
1075 1220
1076EXPORT_SYMBOL(bio_alloc); 1221EXPORT_SYMBOL(bio_alloc);
1077EXPORT_SYMBOL(bio_put); 1222EXPORT_SYMBOL(bio_put);
1223EXPORT_SYMBOL(bio_free);
1078EXPORT_SYMBOL(bio_endio); 1224EXPORT_SYMBOL(bio_endio);
1079EXPORT_SYMBOL(bio_init); 1225EXPORT_SYMBOL(bio_init);
1080EXPORT_SYMBOL(__bio_clone); 1226EXPORT_SYMBOL(__bio_clone);
@@ -1085,6 +1231,7 @@ EXPORT_SYMBOL(bio_add_page);
1085EXPORT_SYMBOL(bio_get_nr_vecs); 1231EXPORT_SYMBOL(bio_get_nr_vecs);
1086EXPORT_SYMBOL(bio_map_user); 1232EXPORT_SYMBOL(bio_map_user);
1087EXPORT_SYMBOL(bio_unmap_user); 1233EXPORT_SYMBOL(bio_unmap_user);
1234EXPORT_SYMBOL(bio_map_kern);
1088EXPORT_SYMBOL(bio_pair_release); 1235EXPORT_SYMBOL(bio_pair_release);
1089EXPORT_SYMBOL(bio_split); 1236EXPORT_SYMBOL(bio_split);
1090EXPORT_SYMBOL(bio_split_pool); 1237EXPORT_SYMBOL(bio_split_pool);
diff --git a/fs/buffer.c b/fs/buffer.c
index 6a25d7df89b1..1c62203a4906 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -917,8 +917,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
917 * contents - it is a noop if I/O is still in 917 * contents - it is a noop if I/O is still in
918 * flight on potentially older contents. 918 * flight on potentially older contents.
919 */ 919 */
920 wait_on_buffer(bh); 920 ll_rw_block(SWRITE, 1, &bh);
921 ll_rw_block(WRITE, 1, &bh);
922 brelse(bh); 921 brelse(bh);
923 spin_lock(lock); 922 spin_lock(lock);
924 } 923 }
@@ -2793,21 +2792,22 @@ int submit_bh(int rw, struct buffer_head * bh)
2793 2792
2794/** 2793/**
2795 * ll_rw_block: low-level access to block devices (DEPRECATED) 2794 * ll_rw_block: low-level access to block devices (DEPRECATED)
2796 * @rw: whether to %READ or %WRITE or maybe %READA (readahead) 2795 * @rw: whether to %READ or %WRITE or %SWRITE or maybe %READA (readahead)
2797 * @nr: number of &struct buffer_heads in the array 2796 * @nr: number of &struct buffer_heads in the array
2798 * @bhs: array of pointers to &struct buffer_head 2797 * @bhs: array of pointers to &struct buffer_head
2799 * 2798 *
2800 * ll_rw_block() takes an array of pointers to &struct buffer_heads, 2799 * ll_rw_block() takes an array of pointers to &struct buffer_heads, and
2801 * and requests an I/O operation on them, either a %READ or a %WRITE. 2800 * requests an I/O operation on them, either a %READ or a %WRITE. The third
2802 * The third %READA option is described in the documentation for 2801 * %SWRITE is like %WRITE only we make sure that the *current* data in buffers
2803 * generic_make_request() which ll_rw_block() calls. 2802 * are sent to disk. The fourth %READA option is described in the documentation
2803 * for generic_make_request() which ll_rw_block() calls.
2804 * 2804 *
2805 * This function drops any buffer that it cannot get a lock on (with the 2805 * This function drops any buffer that it cannot get a lock on (with the
2806 * BH_Lock state bit), any buffer that appears to be clean when doing a 2806 * BH_Lock state bit) unless SWRITE is required, any buffer that appears to be
2807 * write request, and any buffer that appears to be up-to-date when doing 2807 * clean when doing a write request, and any buffer that appears to be
2808 * read request. Further it marks as clean buffers that are processed for 2808 * up-to-date when doing read request. Further it marks as clean buffers that
2809 * writing (the buffer cache won't assume that they are actually clean until 2809 * are processed for writing (the buffer cache won't assume that they are
2810 * the buffer gets unlocked). 2810 * actually clean until the buffer gets unlocked).
2811 * 2811 *
2812 * ll_rw_block sets b_end_io to simple completion handler that marks 2812 * ll_rw_block sets b_end_io to simple completion handler that marks
2813 * the buffer up-to-date (if approriate), unlocks the buffer and wakes 2813 * the buffer up-to-date (if approriate), unlocks the buffer and wakes
@@ -2823,11 +2823,13 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
2823 for (i = 0; i < nr; i++) { 2823 for (i = 0; i < nr; i++) {
2824 struct buffer_head *bh = bhs[i]; 2824 struct buffer_head *bh = bhs[i];
2825 2825
2826 if (test_set_buffer_locked(bh)) 2826 if (rw == SWRITE)
2827 lock_buffer(bh);
2828 else if (test_set_buffer_locked(bh))
2827 continue; 2829 continue;
2828 2830
2829 get_bh(bh); 2831 get_bh(bh);
2830 if (rw == WRITE) { 2832 if (rw == WRITE || rw == SWRITE) {
2831 if (test_clear_buffer_dirty(bh)) { 2833 if (test_clear_buffer_dirty(bh)) {
2832 bh->b_end_io = end_buffer_write_sync; 2834 bh->b_end_io = end_buffer_write_sync;
2833 submit_bh(WRITE, bh); 2835 submit_bh(WRITE, bh);
@@ -3046,10 +3048,9 @@ struct buffer_head *alloc_buffer_head(unsigned int __nocast gfp_flags)
3046{ 3048{
3047 struct buffer_head *ret = kmem_cache_alloc(bh_cachep, gfp_flags); 3049 struct buffer_head *ret = kmem_cache_alloc(bh_cachep, gfp_flags);
3048 if (ret) { 3050 if (ret) {
3049 preempt_disable(); 3051 get_cpu_var(bh_accounting).nr++;
3050 __get_cpu_var(bh_accounting).nr++;
3051 recalc_bh_state(); 3052 recalc_bh_state();
3052 preempt_enable(); 3053 put_cpu_var(bh_accounting);
3053 } 3054 }
3054 return ret; 3055 return ret;
3055} 3056}
@@ -3059,10 +3060,9 @@ void free_buffer_head(struct buffer_head *bh)
3059{ 3060{
3060 BUG_ON(!list_empty(&bh->b_assoc_buffers)); 3061 BUG_ON(!list_empty(&bh->b_assoc_buffers));
3061 kmem_cache_free(bh_cachep, bh); 3062 kmem_cache_free(bh_cachep, bh);
3062 preempt_disable(); 3063 get_cpu_var(bh_accounting).nr--;
3063 __get_cpu_var(bh_accounting).nr--;
3064 recalc_bh_state(); 3064 recalc_bh_state();
3065 preempt_enable(); 3065 put_cpu_var(bh_accounting);
3066} 3066}
3067EXPORT_SYMBOL(free_buffer_head); 3067EXPORT_SYMBOL(free_buffer_head);
3068 3068
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index e568cc47a7f9..3217ac5f6bd7 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -836,7 +836,7 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
836 /* go from value to value + temp_len condensing 836 /* go from value to value + temp_len condensing
837 double commas to singles. Note that this ends up 837 double commas to singles. Note that this ends up
838 allocating a few bytes too many, which is ok */ 838 allocating a few bytes too many, which is ok */
839 vol->password = kcalloc(1, temp_len, GFP_KERNEL); 839 vol->password = kzalloc(temp_len, GFP_KERNEL);
840 if(vol->password == NULL) { 840 if(vol->password == NULL) {
841 printk("CIFS: no memory for pass\n"); 841 printk("CIFS: no memory for pass\n");
842 return 1; 842 return 1;
@@ -851,7 +851,7 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
851 } 851 }
852 vol->password[j] = 0; 852 vol->password[j] = 0;
853 } else { 853 } else {
854 vol->password = kcalloc(1, temp_len+1, GFP_KERNEL); 854 vol->password = kzalloc(temp_len+1, GFP_KERNEL);
855 if(vol->password == NULL) { 855 if(vol->password == NULL) {
856 printk("CIFS: no memory for pass\n"); 856 printk("CIFS: no memory for pass\n");
857 return 1; 857 return 1;
@@ -1317,7 +1317,7 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
1317 sessinit is sent but no second negprot */ 1317 sessinit is sent but no second negprot */
1318 struct rfc1002_session_packet * ses_init_buf; 1318 struct rfc1002_session_packet * ses_init_buf;
1319 struct smb_hdr * smb_buf; 1319 struct smb_hdr * smb_buf;
1320 ses_init_buf = kcalloc(1, sizeof(struct rfc1002_session_packet), GFP_KERNEL); 1320 ses_init_buf = kzalloc(sizeof(struct rfc1002_session_packet), GFP_KERNEL);
1321 if(ses_init_buf) { 1321 if(ses_init_buf) {
1322 ses_init_buf->trailer.session_req.called_len = 32; 1322 ses_init_buf->trailer.session_req.called_len = 32;
1323 rfc1002mangle(ses_init_buf->trailer.session_req.called_name, 1323 rfc1002mangle(ses_init_buf->trailer.session_req.called_name,
@@ -1964,7 +1964,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
1964/* We look for obvious messed up bcc or strings in response so we do not go off 1964/* We look for obvious messed up bcc or strings in response so we do not go off
1965 the end since (at least) WIN2K and Windows XP have a major bug in not null 1965 the end since (at least) WIN2K and Windows XP have a major bug in not null
1966 terminating last Unicode string in response */ 1966 terminating last Unicode string in response */
1967 ses->serverOS = kcalloc(1, 2 * (len + 1), GFP_KERNEL); 1967 ses->serverOS = kzalloc(2 * (len + 1), GFP_KERNEL);
1968 if(ses->serverOS == NULL) 1968 if(ses->serverOS == NULL)
1969 goto sesssetup_nomem; 1969 goto sesssetup_nomem;
1970 cifs_strfromUCS_le(ses->serverOS, 1970 cifs_strfromUCS_le(ses->serverOS,
@@ -1976,7 +1976,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
1976 if (remaining_words > 0) { 1976 if (remaining_words > 0) {
1977 len = UniStrnlen((wchar_t *)bcc_ptr, 1977 len = UniStrnlen((wchar_t *)bcc_ptr,
1978 remaining_words-1); 1978 remaining_words-1);
1979 ses->serverNOS = kcalloc(1, 2 * (len + 1),GFP_KERNEL); 1979 ses->serverNOS = kzalloc(2 * (len + 1),GFP_KERNEL);
1980 if(ses->serverNOS == NULL) 1980 if(ses->serverNOS == NULL)
1981 goto sesssetup_nomem; 1981 goto sesssetup_nomem;
1982 cifs_strfromUCS_le(ses->serverNOS, 1982 cifs_strfromUCS_le(ses->serverNOS,
@@ -1994,7 +1994,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
1994 len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); 1994 len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words);
1995 /* last string is not always null terminated (for e.g. for Windows XP & 2000) */ 1995 /* last string is not always null terminated (for e.g. for Windows XP & 2000) */
1996 ses->serverDomain = 1996 ses->serverDomain =
1997 kcalloc(1, 2*(len+1),GFP_KERNEL); 1997 kzalloc(2*(len+1),GFP_KERNEL);
1998 if(ses->serverDomain == NULL) 1998 if(ses->serverDomain == NULL)
1999 goto sesssetup_nomem; 1999 goto sesssetup_nomem;
2000 cifs_strfromUCS_le(ses->serverDomain, 2000 cifs_strfromUCS_le(ses->serverDomain,
@@ -2005,22 +2005,22 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2005 } /* else no more room so create dummy domain string */ 2005 } /* else no more room so create dummy domain string */
2006 else 2006 else
2007 ses->serverDomain = 2007 ses->serverDomain =
2008 kcalloc(1, 2, GFP_KERNEL); 2008 kzalloc(2, GFP_KERNEL);
2009 } else { /* no room so create dummy domain and NOS string */ 2009 } else { /* no room so create dummy domain and NOS string */
2010 /* if these kcallocs fail not much we 2010 /* if these kcallocs fail not much we
2011 can do, but better to not fail the 2011 can do, but better to not fail the
2012 sesssetup itself */ 2012 sesssetup itself */
2013 ses->serverDomain = 2013 ses->serverDomain =
2014 kcalloc(1, 2, GFP_KERNEL); 2014 kzalloc(2, GFP_KERNEL);
2015 ses->serverNOS = 2015 ses->serverNOS =
2016 kcalloc(1, 2, GFP_KERNEL); 2016 kzalloc(2, GFP_KERNEL);
2017 } 2017 }
2018 } else { /* ASCII */ 2018 } else { /* ASCII */
2019 len = strnlen(bcc_ptr, 1024); 2019 len = strnlen(bcc_ptr, 1024);
2020 if (((long) bcc_ptr + len) - (long) 2020 if (((long) bcc_ptr + len) - (long)
2021 pByteArea(smb_buffer_response) 2021 pByteArea(smb_buffer_response)
2022 <= BCC(smb_buffer_response)) { 2022 <= BCC(smb_buffer_response)) {
2023 ses->serverOS = kcalloc(1, len + 1,GFP_KERNEL); 2023 ses->serverOS = kzalloc(len + 1,GFP_KERNEL);
2024 if(ses->serverOS == NULL) 2024 if(ses->serverOS == NULL)
2025 goto sesssetup_nomem; 2025 goto sesssetup_nomem;
2026 strncpy(ses->serverOS,bcc_ptr, len); 2026 strncpy(ses->serverOS,bcc_ptr, len);
@@ -2030,7 +2030,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2030 bcc_ptr++; 2030 bcc_ptr++;
2031 2031
2032 len = strnlen(bcc_ptr, 1024); 2032 len = strnlen(bcc_ptr, 1024);
2033 ses->serverNOS = kcalloc(1, len + 1,GFP_KERNEL); 2033 ses->serverNOS = kzalloc(len + 1,GFP_KERNEL);
2034 if(ses->serverNOS == NULL) 2034 if(ses->serverNOS == NULL)
2035 goto sesssetup_nomem; 2035 goto sesssetup_nomem;
2036 strncpy(ses->serverNOS, bcc_ptr, len); 2036 strncpy(ses->serverNOS, bcc_ptr, len);
@@ -2039,7 +2039,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2039 bcc_ptr++; 2039 bcc_ptr++;
2040 2040
2041 len = strnlen(bcc_ptr, 1024); 2041 len = strnlen(bcc_ptr, 1024);
2042 ses->serverDomain = kcalloc(1, len + 1,GFP_KERNEL); 2042 ses->serverDomain = kzalloc(len + 1,GFP_KERNEL);
2043 if(ses->serverDomain == NULL) 2043 if(ses->serverDomain == NULL)
2044 goto sesssetup_nomem; 2044 goto sesssetup_nomem;
2045 strncpy(ses->serverDomain, bcc_ptr, len); 2045 strncpy(ses->serverDomain, bcc_ptr, len);
@@ -2240,7 +2240,7 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2240 the end since (at least) WIN2K and Windows XP have a major bug in not null 2240 the end since (at least) WIN2K and Windows XP have a major bug in not null
2241 terminating last Unicode string in response */ 2241 terminating last Unicode string in response */
2242 ses->serverOS = 2242 ses->serverOS =
2243 kcalloc(1, 2 * (len + 1), GFP_KERNEL); 2243 kzalloc(2 * (len + 1), GFP_KERNEL);
2244 cifs_strfromUCS_le(ses->serverOS, 2244 cifs_strfromUCS_le(ses->serverOS,
2245 (wchar_t *) 2245 (wchar_t *)
2246 bcc_ptr, len, 2246 bcc_ptr, len,
@@ -2254,7 +2254,7 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2254 remaining_words 2254 remaining_words
2255 - 1); 2255 - 1);
2256 ses->serverNOS = 2256 ses->serverNOS =
2257 kcalloc(1, 2 * (len + 1), 2257 kzalloc(2 * (len + 1),
2258 GFP_KERNEL); 2258 GFP_KERNEL);
2259 cifs_strfromUCS_le(ses->serverNOS, 2259 cifs_strfromUCS_le(ses->serverNOS,
2260 (wchar_t *)bcc_ptr, 2260 (wchar_t *)bcc_ptr,
@@ -2267,7 +2267,7 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2267 if (remaining_words > 0) { 2267 if (remaining_words > 0) {
2268 len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); 2268 len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words);
2269 /* last string is not always null terminated (for e.g. for Windows XP & 2000) */ 2269 /* last string is not always null terminated (for e.g. for Windows XP & 2000) */
2270 ses->serverDomain = kcalloc(1, 2*(len+1),GFP_KERNEL); 2270 ses->serverDomain = kzalloc(2*(len+1),GFP_KERNEL);
2271 cifs_strfromUCS_le(ses->serverDomain, 2271 cifs_strfromUCS_le(ses->serverDomain,
2272 (wchar_t *)bcc_ptr, 2272 (wchar_t *)bcc_ptr,
2273 len, 2273 len,
@@ -2278,10 +2278,10 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2278 } /* else no more room so create dummy domain string */ 2278 } /* else no more room so create dummy domain string */
2279 else 2279 else
2280 ses->serverDomain = 2280 ses->serverDomain =
2281 kcalloc(1, 2,GFP_KERNEL); 2281 kzalloc(2,GFP_KERNEL);
2282 } else { /* no room so create dummy domain and NOS string */ 2282 } else { /* no room so create dummy domain and NOS string */
2283 ses->serverDomain = kcalloc(1, 2, GFP_KERNEL); 2283 ses->serverDomain = kzalloc(2, GFP_KERNEL);
2284 ses->serverNOS = kcalloc(1, 2, GFP_KERNEL); 2284 ses->serverNOS = kzalloc(2, GFP_KERNEL);
2285 } 2285 }
2286 } else { /* ASCII */ 2286 } else { /* ASCII */
2287 2287
@@ -2289,7 +2289,7 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2289 if (((long) bcc_ptr + len) - (long) 2289 if (((long) bcc_ptr + len) - (long)
2290 pByteArea(smb_buffer_response) 2290 pByteArea(smb_buffer_response)
2291 <= BCC(smb_buffer_response)) { 2291 <= BCC(smb_buffer_response)) {
2292 ses->serverOS = kcalloc(1, len + 1, GFP_KERNEL); 2292 ses->serverOS = kzalloc(len + 1, GFP_KERNEL);
2293 strncpy(ses->serverOS, bcc_ptr, len); 2293 strncpy(ses->serverOS, bcc_ptr, len);
2294 2294
2295 bcc_ptr += len; 2295 bcc_ptr += len;
@@ -2297,14 +2297,14 @@ CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2297 bcc_ptr++; 2297 bcc_ptr++;
2298 2298
2299 len = strnlen(bcc_ptr, 1024); 2299 len = strnlen(bcc_ptr, 1024);
2300 ses->serverNOS = kcalloc(1, len + 1,GFP_KERNEL); 2300 ses->serverNOS = kzalloc(len + 1,GFP_KERNEL);
2301 strncpy(ses->serverNOS, bcc_ptr, len); 2301 strncpy(ses->serverNOS, bcc_ptr, len);
2302 bcc_ptr += len; 2302 bcc_ptr += len;
2303 bcc_ptr[0] = 0; 2303 bcc_ptr[0] = 0;
2304 bcc_ptr++; 2304 bcc_ptr++;
2305 2305
2306 len = strnlen(bcc_ptr, 1024); 2306 len = strnlen(bcc_ptr, 1024);
2307 ses->serverDomain = kcalloc(1, len + 1, GFP_KERNEL); 2307 ses->serverDomain = kzalloc(len + 1, GFP_KERNEL);
2308 strncpy(ses->serverDomain, bcc_ptr, len); 2308 strncpy(ses->serverDomain, bcc_ptr, len);
2309 bcc_ptr += len; 2309 bcc_ptr += len;
2310 bcc_ptr[0] = 0; 2310 bcc_ptr[0] = 0;
@@ -2554,7 +2554,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2554 the end since (at least) WIN2K and Windows XP have a major bug in not null 2554 the end since (at least) WIN2K and Windows XP have a major bug in not null
2555 terminating last Unicode string in response */ 2555 terminating last Unicode string in response */
2556 ses->serverOS = 2556 ses->serverOS =
2557 kcalloc(1, 2 * (len + 1), GFP_KERNEL); 2557 kzalloc(2 * (len + 1), GFP_KERNEL);
2558 cifs_strfromUCS_le(ses->serverOS, 2558 cifs_strfromUCS_le(ses->serverOS,
2559 (wchar_t *) 2559 (wchar_t *)
2560 bcc_ptr, len, 2560 bcc_ptr, len,
@@ -2569,7 +2569,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2569 remaining_words 2569 remaining_words
2570 - 1); 2570 - 1);
2571 ses->serverNOS = 2571 ses->serverNOS =
2572 kcalloc(1, 2 * (len + 1), 2572 kzalloc(2 * (len + 1),
2573 GFP_KERNEL); 2573 GFP_KERNEL);
2574 cifs_strfromUCS_le(ses-> 2574 cifs_strfromUCS_le(ses->
2575 serverNOS, 2575 serverNOS,
@@ -2586,7 +2586,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2586 len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); 2586 len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words);
2587 /* last string is not always null terminated (for e.g. for Windows XP & 2000) */ 2587 /* last string is not always null terminated (for e.g. for Windows XP & 2000) */
2588 ses->serverDomain = 2588 ses->serverDomain =
2589 kcalloc(1, 2 * 2589 kzalloc(2 *
2590 (len + 2590 (len +
2591 1), 2591 1),
2592 GFP_KERNEL); 2592 GFP_KERNEL);
@@ -2612,13 +2612,13 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2612 } /* else no more room so create dummy domain string */ 2612 } /* else no more room so create dummy domain string */
2613 else 2613 else
2614 ses->serverDomain = 2614 ses->serverDomain =
2615 kcalloc(1, 2, 2615 kzalloc(2,
2616 GFP_KERNEL); 2616 GFP_KERNEL);
2617 } else { /* no room so create dummy domain and NOS string */ 2617 } else { /* no room so create dummy domain and NOS string */
2618 ses->serverDomain = 2618 ses->serverDomain =
2619 kcalloc(1, 2, GFP_KERNEL); 2619 kzalloc(2, GFP_KERNEL);
2620 ses->serverNOS = 2620 ses->serverNOS =
2621 kcalloc(1, 2, GFP_KERNEL); 2621 kzalloc(2, GFP_KERNEL);
2622 } 2622 }
2623 } else { /* ASCII */ 2623 } else { /* ASCII */
2624 len = strnlen(bcc_ptr, 1024); 2624 len = strnlen(bcc_ptr, 1024);
@@ -2626,7 +2626,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2626 pByteArea(smb_buffer_response) 2626 pByteArea(smb_buffer_response)
2627 <= BCC(smb_buffer_response)) { 2627 <= BCC(smb_buffer_response)) {
2628 ses->serverOS = 2628 ses->serverOS =
2629 kcalloc(1, len + 1, 2629 kzalloc(len + 1,
2630 GFP_KERNEL); 2630 GFP_KERNEL);
2631 strncpy(ses->serverOS, 2631 strncpy(ses->serverOS,
2632 bcc_ptr, len); 2632 bcc_ptr, len);
@@ -2637,7 +2637,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2637 2637
2638 len = strnlen(bcc_ptr, 1024); 2638 len = strnlen(bcc_ptr, 1024);
2639 ses->serverNOS = 2639 ses->serverNOS =
2640 kcalloc(1, len + 1, 2640 kzalloc(len + 1,
2641 GFP_KERNEL); 2641 GFP_KERNEL);
2642 strncpy(ses->serverNOS, bcc_ptr, len); 2642 strncpy(ses->serverNOS, bcc_ptr, len);
2643 bcc_ptr += len; 2643 bcc_ptr += len;
@@ -2646,7 +2646,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2646 2646
2647 len = strnlen(bcc_ptr, 1024); 2647 len = strnlen(bcc_ptr, 1024);
2648 ses->serverDomain = 2648 ses->serverDomain =
2649 kcalloc(1, len + 1, 2649 kzalloc(len + 1,
2650 GFP_KERNEL); 2650 GFP_KERNEL);
2651 strncpy(ses->serverDomain, bcc_ptr, len); 2651 strncpy(ses->serverDomain, bcc_ptr, len);
2652 bcc_ptr += len; 2652 bcc_ptr += len;
@@ -2948,7 +2948,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2948 the end since (at least) WIN2K and Windows XP have a major bug in not null 2948 the end since (at least) WIN2K and Windows XP have a major bug in not null
2949 terminating last Unicode string in response */ 2949 terminating last Unicode string in response */
2950 ses->serverOS = 2950 ses->serverOS =
2951 kcalloc(1, 2 * (len + 1), GFP_KERNEL); 2951 kzalloc(2 * (len + 1), GFP_KERNEL);
2952 cifs_strfromUCS_le(ses->serverOS, 2952 cifs_strfromUCS_le(ses->serverOS,
2953 (wchar_t *) 2953 (wchar_t *)
2954 bcc_ptr, len, 2954 bcc_ptr, len,
@@ -2963,7 +2963,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2963 remaining_words 2963 remaining_words
2964 - 1); 2964 - 1);
2965 ses->serverNOS = 2965 ses->serverNOS =
2966 kcalloc(1, 2 * (len + 1), 2966 kzalloc(2 * (len + 1),
2967 GFP_KERNEL); 2967 GFP_KERNEL);
2968 cifs_strfromUCS_le(ses-> 2968 cifs_strfromUCS_le(ses->
2969 serverNOS, 2969 serverNOS,
@@ -2979,7 +2979,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2979 len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); 2979 len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words);
2980 /* last string not always null terminated (e.g. for Windows XP & 2000) */ 2980 /* last string not always null terminated (e.g. for Windows XP & 2000) */
2981 ses->serverDomain = 2981 ses->serverDomain =
2982 kcalloc(1, 2 * 2982 kzalloc(2 *
2983 (len + 2983 (len +
2984 1), 2984 1),
2985 GFP_KERNEL); 2985 GFP_KERNEL);
@@ -3004,17 +3004,17 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
3004 = 0; 3004 = 0;
3005 } /* else no more room so create dummy domain string */ 3005 } /* else no more room so create dummy domain string */
3006 else 3006 else
3007 ses->serverDomain = kcalloc(1, 2,GFP_KERNEL); 3007 ses->serverDomain = kzalloc(2,GFP_KERNEL);
3008 } else { /* no room so create dummy domain and NOS string */ 3008 } else { /* no room so create dummy domain and NOS string */
3009 ses->serverDomain = kcalloc(1, 2, GFP_KERNEL); 3009 ses->serverDomain = kzalloc(2, GFP_KERNEL);
3010 ses->serverNOS = kcalloc(1, 2, GFP_KERNEL); 3010 ses->serverNOS = kzalloc(2, GFP_KERNEL);
3011 } 3011 }
3012 } else { /* ASCII */ 3012 } else { /* ASCII */
3013 len = strnlen(bcc_ptr, 1024); 3013 len = strnlen(bcc_ptr, 1024);
3014 if (((long) bcc_ptr + len) - 3014 if (((long) bcc_ptr + len) -
3015 (long) pByteArea(smb_buffer_response) 3015 (long) pByteArea(smb_buffer_response)
3016 <= BCC(smb_buffer_response)) { 3016 <= BCC(smb_buffer_response)) {
3017 ses->serverOS = kcalloc(1, len + 1,GFP_KERNEL); 3017 ses->serverOS = kzalloc(len + 1,GFP_KERNEL);
3018 strncpy(ses->serverOS,bcc_ptr, len); 3018 strncpy(ses->serverOS,bcc_ptr, len);
3019 3019
3020 bcc_ptr += len; 3020 bcc_ptr += len;
@@ -3022,14 +3022,14 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
3022 bcc_ptr++; 3022 bcc_ptr++;
3023 3023
3024 len = strnlen(bcc_ptr, 1024); 3024 len = strnlen(bcc_ptr, 1024);
3025 ses->serverNOS = kcalloc(1, len+1,GFP_KERNEL); 3025 ses->serverNOS = kzalloc(len+1,GFP_KERNEL);
3026 strncpy(ses->serverNOS, bcc_ptr, len); 3026 strncpy(ses->serverNOS, bcc_ptr, len);
3027 bcc_ptr += len; 3027 bcc_ptr += len;
3028 bcc_ptr[0] = 0; 3028 bcc_ptr[0] = 0;
3029 bcc_ptr++; 3029 bcc_ptr++;
3030 3030
3031 len = strnlen(bcc_ptr, 1024); 3031 len = strnlen(bcc_ptr, 1024);
3032 ses->serverDomain = kcalloc(1, len+1,GFP_KERNEL); 3032 ses->serverDomain = kzalloc(len+1,GFP_KERNEL);
3033 strncpy(ses->serverDomain, bcc_ptr, len); 3033 strncpy(ses->serverDomain, bcc_ptr, len);
3034 bcc_ptr += len; 3034 bcc_ptr += len;
3035 bcc_ptr[0] = 0; 3035 bcc_ptr[0] = 0;
@@ -3141,7 +3141,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3141 if(tcon->nativeFileSystem) 3141 if(tcon->nativeFileSystem)
3142 kfree(tcon->nativeFileSystem); 3142 kfree(tcon->nativeFileSystem);
3143 tcon->nativeFileSystem = 3143 tcon->nativeFileSystem =
3144 kcalloc(1, length + 2, GFP_KERNEL); 3144 kzalloc(length + 2, GFP_KERNEL);
3145 cifs_strfromUCS_le(tcon->nativeFileSystem, 3145 cifs_strfromUCS_le(tcon->nativeFileSystem,
3146 (wchar_t *) bcc_ptr, 3146 (wchar_t *) bcc_ptr,
3147 length, nls_codepage); 3147 length, nls_codepage);
@@ -3159,7 +3159,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3159 if(tcon->nativeFileSystem) 3159 if(tcon->nativeFileSystem)
3160 kfree(tcon->nativeFileSystem); 3160 kfree(tcon->nativeFileSystem);
3161 tcon->nativeFileSystem = 3161 tcon->nativeFileSystem =
3162 kcalloc(1, length + 1, GFP_KERNEL); 3162 kzalloc(length + 1, GFP_KERNEL);
3163 strncpy(tcon->nativeFileSystem, bcc_ptr, 3163 strncpy(tcon->nativeFileSystem, bcc_ptr,
3164 length); 3164 length);
3165 } 3165 }
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 3f3538d4a1fa..d335269bd91c 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -145,24 +145,23 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
145 return -ENOMEM; 145 return -ENOMEM;
146 } 146 }
147 147
148 if(nd) { 148 if(nd && (nd->flags & LOOKUP_OPEN)) {
149 if ((nd->intent.open.flags & O_ACCMODE) == O_RDONLY) 149 int oflags = nd->intent.open.flags;
150 desiredAccess = GENERIC_READ; 150
151 else if ((nd->intent.open.flags & O_ACCMODE) == O_WRONLY) { 151 desiredAccess = 0;
152 desiredAccess = GENERIC_WRITE; 152 if (oflags & FMODE_READ)
153 write_only = TRUE; 153 desiredAccess |= GENERIC_READ;
154 } else if ((nd->intent.open.flags & O_ACCMODE) == O_RDWR) { 154 if (oflags & FMODE_WRITE) {
155 /* GENERIC_ALL is too much permission to request */ 155 desiredAccess |= GENERIC_WRITE;
156 /* can cause unnecessary access denied on create */ 156 if (!(oflags & FMODE_READ))
157 /* desiredAccess = GENERIC_ALL; */ 157 write_only = TRUE;
158 desiredAccess = GENERIC_READ | GENERIC_WRITE;
159 } 158 }
160 159
161 if((nd->intent.open.flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) 160 if((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
162 disposition = FILE_CREATE; 161 disposition = FILE_CREATE;
163 else if((nd->intent.open.flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) 162 else if((oflags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
164 disposition = FILE_OVERWRITE_IF; 163 disposition = FILE_OVERWRITE_IF;
165 else if((nd->intent.open.flags & O_CREAT) == O_CREAT) 164 else if((oflags & O_CREAT) == O_CREAT)
166 disposition = FILE_OPEN_IF; 165 disposition = FILE_OPEN_IF;
167 else { 166 else {
168 cFYI(1,("Create flag not set in create function")); 167 cFYI(1,("Create flag not set in create function"));
diff --git a/fs/compat.c b/fs/compat.c
index 6b06b6bae35e..8c665705c6a0 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -310,96 +310,6 @@ static int __init init_sys32_ioctl(void)
310 310
311__initcall(init_sys32_ioctl); 311__initcall(init_sys32_ioctl);
312 312
313int register_ioctl32_conversion(unsigned int cmd,
314 ioctl_trans_handler_t handler)
315{
316 struct ioctl_trans *t;
317 struct ioctl_trans *new_t;
318 unsigned long hash = ioctl32_hash(cmd);
319
320 new_t = kmalloc(sizeof(*new_t), GFP_KERNEL);
321 if (!new_t)
322 return -ENOMEM;
323
324 down_write(&ioctl32_sem);
325 for (t = ioctl32_hash_table[hash]; t; t = t->next) {
326 if (t->cmd == cmd) {
327 printk(KERN_ERR "Trying to register duplicated ioctl32 "
328 "handler %x\n", cmd);
329 up_write(&ioctl32_sem);
330 kfree(new_t);
331 return -EINVAL;
332 }
333 }
334 new_t->next = NULL;
335 new_t->cmd = cmd;
336 new_t->handler = handler;
337 ioctl32_insert_translation(new_t);
338
339 up_write(&ioctl32_sem);
340 return 0;
341}
342EXPORT_SYMBOL(register_ioctl32_conversion);
343
344static inline int builtin_ioctl(struct ioctl_trans *t)
345{
346 return t >= ioctl_start && t < (ioctl_start + ioctl_table_size);
347}
348
349/* Problem:
350 This function cannot unregister duplicate ioctls, because they are not
351 unique.
352 When they happen we need to extend the prototype to pass the handler too. */
353
354int unregister_ioctl32_conversion(unsigned int cmd)
355{
356 unsigned long hash = ioctl32_hash(cmd);
357 struct ioctl_trans *t, *t1;
358
359 down_write(&ioctl32_sem);
360
361 t = ioctl32_hash_table[hash];
362 if (!t) {
363 up_write(&ioctl32_sem);
364 return -EINVAL;
365 }
366
367 if (t->cmd == cmd) {
368 if (builtin_ioctl(t)) {
369 printk("%p tried to unregister builtin ioctl %x\n",
370 __builtin_return_address(0), cmd);
371 } else {
372 ioctl32_hash_table[hash] = t->next;
373 up_write(&ioctl32_sem);
374 kfree(t);
375 return 0;
376 }
377 }
378 while (t->next) {
379 t1 = t->next;
380 if (t1->cmd == cmd) {
381 if (builtin_ioctl(t1)) {
382 printk("%p tried to unregister builtin "
383 "ioctl %x\n",
384 __builtin_return_address(0), cmd);
385 goto out;
386 } else {
387 t->next = t1->next;
388 up_write(&ioctl32_sem);
389 kfree(t1);
390 return 0;
391 }
392 }
393 t = t1;
394 }
395 printk(KERN_ERR "Trying to free unknown 32bit ioctl handler %x\n",
396 cmd);
397out:
398 up_write(&ioctl32_sem);
399 return -EINVAL;
400}
401EXPORT_SYMBOL(unregister_ioctl32_conversion);
402
403static void compat_ioctl_error(struct file *filp, unsigned int fd, 313static void compat_ioctl_error(struct file *filp, unsigned int fd,
404 unsigned int cmd, unsigned long arg) 314 unsigned int cmd, unsigned long arg)
405{ 315{
@@ -720,14 +630,14 @@ compat_sys_io_submit(aio_context_t ctx_id, int nr, u32 __user *iocb)
720struct compat_ncp_mount_data { 630struct compat_ncp_mount_data {
721 compat_int_t version; 631 compat_int_t version;
722 compat_uint_t ncp_fd; 632 compat_uint_t ncp_fd;
723 compat_uid_t mounted_uid; 633 __compat_uid_t mounted_uid;
724 compat_pid_t wdog_pid; 634 compat_pid_t wdog_pid;
725 unsigned char mounted_vol[NCP_VOLNAME_LEN + 1]; 635 unsigned char mounted_vol[NCP_VOLNAME_LEN + 1];
726 compat_uint_t time_out; 636 compat_uint_t time_out;
727 compat_uint_t retry_count; 637 compat_uint_t retry_count;
728 compat_uint_t flags; 638 compat_uint_t flags;
729 compat_uid_t uid; 639 __compat_uid_t uid;
730 compat_gid_t gid; 640 __compat_gid_t gid;
731 compat_mode_t file_mode; 641 compat_mode_t file_mode;
732 compat_mode_t dir_mode; 642 compat_mode_t dir_mode;
733}; 643};
@@ -784,9 +694,9 @@ static void *do_ncp_super_data_conv(void *raw_data)
784 694
785struct compat_smb_mount_data { 695struct compat_smb_mount_data {
786 compat_int_t version; 696 compat_int_t version;
787 compat_uid_t mounted_uid; 697 __compat_uid_t mounted_uid;
788 compat_uid_t uid; 698 __compat_uid_t uid;
789 compat_gid_t gid; 699 __compat_gid_t gid;
790 compat_mode_t file_mode; 700 compat_mode_t file_mode;
791 compat_mode_t dir_mode; 701 compat_mode_t dir_mode;
792}; 702};
@@ -1365,6 +1275,16 @@ out:
1365} 1275}
1366 1276
1367/* 1277/*
1278 * Exactly like fs/open.c:sys_open(), except that it doesn't set the
1279 * O_LARGEFILE flag.
1280 */
1281asmlinkage long
1282compat_sys_open(const char __user *filename, int flags, int mode)
1283{
1284 return do_sys_open(filename, flags, mode);
1285}
1286
1287/*
1368 * compat_count() counts the number of arguments/envelopes. It is basically 1288 * compat_count() counts the number of arguments/envelopes. It is basically
1369 * a copy of count() from fs/exec.c, except that it works with 32 bit argv 1289 * a copy of count() from fs/exec.c, except that it works with 32 bit argv
1370 * and envp pointers. 1290 * and envp pointers.
@@ -1808,8 +1728,8 @@ struct compat_nfsctl_export {
1808 compat_dev_t ex32_dev; 1728 compat_dev_t ex32_dev;
1809 compat_ino_t ex32_ino; 1729 compat_ino_t ex32_ino;
1810 compat_int_t ex32_flags; 1730 compat_int_t ex32_flags;
1811 compat_uid_t ex32_anon_uid; 1731 __compat_uid_t ex32_anon_uid;
1812 compat_gid_t ex32_anon_gid; 1732 __compat_gid_t ex32_anon_gid;
1813}; 1733};
1814 1734
1815struct compat_nfsctl_fdparm { 1735struct compat_nfsctl_fdparm {
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 6c285efa2004..7fe85415ae7c 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -39,12 +39,47 @@ static DECLARE_MUTEX(read_mutex);
39#define CRAMINO(x) ((x)->offset?(x)->offset<<2:1) 39#define CRAMINO(x) ((x)->offset?(x)->offset<<2:1)
40#define OFFSET(x) ((x)->i_ino) 40#define OFFSET(x) ((x)->i_ino)
41 41
42static struct inode *get_cramfs_inode(struct super_block *sb, struct cramfs_inode * cramfs_inode) 42
43static int cramfs_iget5_test(struct inode *inode, void *opaque)
44{
45 struct cramfs_inode *cramfs_inode = opaque;
46
47 if (inode->i_ino != CRAMINO(cramfs_inode))
48 return 0; /* does not match */
49
50 if (inode->i_ino != 1)
51 return 1;
52
53 /* all empty directories, char, block, pipe, and sock, share inode #1 */
54
55 if ((inode->i_mode != cramfs_inode->mode) ||
56 (inode->i_gid != cramfs_inode->gid) ||
57 (inode->i_uid != cramfs_inode->uid))
58 return 0; /* does not match */
59
60 if ((S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) &&
61 (inode->i_rdev != old_decode_dev(cramfs_inode->size)))
62 return 0; /* does not match */
63
64 return 1; /* matches */
65}
66
67static int cramfs_iget5_set(struct inode *inode, void *opaque)
68{
69 struct cramfs_inode *cramfs_inode = opaque;
70 inode->i_ino = CRAMINO(cramfs_inode);
71 return 0;
72}
73
74static struct inode *get_cramfs_inode(struct super_block *sb,
75 struct cramfs_inode * cramfs_inode)
43{ 76{
44 struct inode * inode = new_inode(sb); 77 struct inode *inode = iget5_locked(sb, CRAMINO(cramfs_inode),
78 cramfs_iget5_test, cramfs_iget5_set,
79 cramfs_inode);
45 static struct timespec zerotime; 80 static struct timespec zerotime;
46 81
47 if (inode) { 82 if (inode && (inode->i_state & I_NEW)) {
48 inode->i_mode = cramfs_inode->mode; 83 inode->i_mode = cramfs_inode->mode;
49 inode->i_uid = cramfs_inode->uid; 84 inode->i_uid = cramfs_inode->uid;
50 inode->i_size = cramfs_inode->size; 85 inode->i_size = cramfs_inode->size;
@@ -58,7 +93,6 @@ static struct inode *get_cramfs_inode(struct super_block *sb, struct cramfs_inod
58 but it's the best we can do without reading the directory 93 but it's the best we can do without reading the directory
59 contents. 1 yields the right result in GNU find, even 94 contents. 1 yields the right result in GNU find, even
60 without -noleaf option. */ 95 without -noleaf option. */
61 insert_inode_hash(inode);
62 if (S_ISREG(inode->i_mode)) { 96 if (S_ISREG(inode->i_mode)) {
63 inode->i_fop = &generic_ro_fops; 97 inode->i_fop = &generic_ro_fops;
64 inode->i_data.a_ops = &cramfs_aops; 98 inode->i_data.a_ops = &cramfs_aops;
@@ -74,6 +108,7 @@ static struct inode *get_cramfs_inode(struct super_block *sb, struct cramfs_inod
74 init_special_inode(inode, inode->i_mode, 108 init_special_inode(inode, inode->i_mode,
75 old_decode_dev(cramfs_inode->size)); 109 old_decode_dev(cramfs_inode->size));
76 } 110 }
111 unlock_new_inode(inode);
77 } 112 }
78 return inode; 113 return inode;
79} 114}
diff --git a/fs/devpts/Makefile b/fs/devpts/Makefile
index 5800df2e50c8..236696efcbac 100644
--- a/fs/devpts/Makefile
+++ b/fs/devpts/Makefile
@@ -5,4 +5,3 @@
5obj-$(CONFIG_UNIX98_PTYS) += devpts.o 5obj-$(CONFIG_UNIX98_PTYS) += devpts.o
6 6
7devpts-$(CONFIG_UNIX98_PTYS) := inode.o 7devpts-$(CONFIG_UNIX98_PTYS) := inode.o
8devpts-$(CONFIG_DEVPTS_FS_SECURITY) += xattr_security.o
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 1571c8d6c232..f2be44d4491f 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -18,28 +18,9 @@
18#include <linux/mount.h> 18#include <linux/mount.h>
19#include <linux/tty.h> 19#include <linux/tty.h>
20#include <linux/devpts_fs.h> 20#include <linux/devpts_fs.h>
21#include <linux/xattr.h>
22 21
23#define DEVPTS_SUPER_MAGIC 0x1cd1 22#define DEVPTS_SUPER_MAGIC 0x1cd1
24 23
25extern struct xattr_handler devpts_xattr_security_handler;
26
27static struct xattr_handler *devpts_xattr_handlers[] = {
28#ifdef CONFIG_DEVPTS_FS_SECURITY
29 &devpts_xattr_security_handler,
30#endif
31 NULL
32};
33
34static struct inode_operations devpts_file_inode_operations = {
35#ifdef CONFIG_DEVPTS_FS_XATTR
36 .setxattr = generic_setxattr,
37 .getxattr = generic_getxattr,
38 .listxattr = generic_listxattr,
39 .removexattr = generic_removexattr,
40#endif
41};
42
43static struct vfsmount *devpts_mnt; 24static struct vfsmount *devpts_mnt;
44static struct dentry *devpts_root; 25static struct dentry *devpts_root;
45 26
@@ -102,7 +83,6 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
102 s->s_blocksize_bits = 10; 83 s->s_blocksize_bits = 10;
103 s->s_magic = DEVPTS_SUPER_MAGIC; 84 s->s_magic = DEVPTS_SUPER_MAGIC;
104 s->s_op = &devpts_sops; 85 s->s_op = &devpts_sops;
105 s->s_xattr = devpts_xattr_handlers;
106 s->s_time_gran = 1; 86 s->s_time_gran = 1;
107 87
108 inode = new_inode(s); 88 inode = new_inode(s);
@@ -175,7 +155,6 @@ int devpts_pty_new(struct tty_struct *tty)
175 inode->i_gid = config.setgid ? config.gid : current->fsgid; 155 inode->i_gid = config.setgid ? config.gid : current->fsgid;
176 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 156 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
177 init_special_inode(inode, S_IFCHR|config.mode, device); 157 init_special_inode(inode, S_IFCHR|config.mode, device);
178 inode->i_op = &devpts_file_inode_operations;
179 inode->u.generic_ip = tty; 158 inode->u.generic_ip = tty;
180 159
181 dentry = get_node(number); 160 dentry = get_node(number);
diff --git a/fs/devpts/xattr_security.c b/fs/devpts/xattr_security.c
deleted file mode 100644
index 864cb5c79baa..000000000000
--- a/fs/devpts/xattr_security.c
+++ /dev/null
@@ -1,47 +0,0 @@
1/*
2 * Security xattr support for devpts.
3 *
4 * Author: Stephen Smalley <sds@epoch.ncsc.mil>
5 * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published by the Free
9 * Software Foundation; either version 2 of the License, or (at your option)
10 * any later version.
11 */
12#include <linux/string.h>
13#include <linux/fs.h>
14#include <linux/security.h>
15#include <linux/xattr.h>
16
17static size_t
18devpts_xattr_security_list(struct inode *inode, char *list, size_t list_len,
19 const char *name, size_t name_len)
20{
21 return security_inode_listsecurity(inode, list, list_len);
22}
23
24static int
25devpts_xattr_security_get(struct inode *inode, const char *name,
26 void *buffer, size_t size)
27{
28 if (strcmp(name, "") == 0)
29 return -EINVAL;
30 return security_inode_getsecurity(inode, name, buffer, size);
31}
32
33static int
34devpts_xattr_security_set(struct inode *inode, const char *name,
35 const void *value, size_t size, int flags)
36{
37 if (strcmp(name, "") == 0)
38 return -EINVAL;
39 return security_inode_setsecurity(inode, name, value, size, flags);
40}
41
42struct xattr_handler devpts_xattr_security_handler = {
43 .prefix = XATTR_SECURITY_PREFIX,
44 .list = devpts_xattr_security_list,
45 .get = devpts_xattr_security_get,
46 .set = devpts_xattr_security_set,
47};
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index dcfe331dc4c4..3c0c7c6a5b44 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -19,6 +19,7 @@
19#include <linux/config.h> 19#include <linux/config.h>
20#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/string.h> 21#include <linux/string.h>
22#include <linux/fs.h>
22#include <linux/slab.h> 23#include <linux/slab.h>
23#include <linux/init.h> 24#include <linux/init.h>
24#include <linux/blkdev.h> 25#include <linux/blkdev.h>
@@ -27,6 +28,8 @@
27#include <linux/buffer_head.h> 28#include <linux/buffer_head.h>
28#include <linux/smp_lock.h> 29#include <linux/smp_lock.h>
29#include <linux/vfs.h> 30#include <linux/vfs.h>
31#include <linux/seq_file.h>
32#include <linux/mount.h>
30#include <asm/uaccess.h> 33#include <asm/uaccess.h>
31#include "ext2.h" 34#include "ext2.h"
32#include "xattr.h" 35#include "xattr.h"
@@ -201,6 +204,26 @@ static void ext2_clear_inode(struct inode *inode)
201#endif 204#endif
202} 205}
203 206
207static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs)
208{
209 struct ext2_sb_info *sbi = EXT2_SB(vfs->mnt_sb);
210
211 if (sbi->s_mount_opt & EXT2_MOUNT_GRPID)
212 seq_puts(seq, ",grpid");
213 else
214 seq_puts(seq, ",nogrpid");
215
216#if defined(CONFIG_QUOTA)
217 if (sbi->s_mount_opt & EXT2_MOUNT_USRQUOTA)
218 seq_puts(seq, ",usrquota");
219
220 if (sbi->s_mount_opt & EXT2_MOUNT_GRPQUOTA)
221 seq_puts(seq, ",grpquota");
222#endif
223
224 return 0;
225}
226
204#ifdef CONFIG_QUOTA 227#ifdef CONFIG_QUOTA
205static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off); 228static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off);
206static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off); 229static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off);
@@ -218,6 +241,7 @@ static struct super_operations ext2_sops = {
218 .statfs = ext2_statfs, 241 .statfs = ext2_statfs,
219 .remount_fs = ext2_remount, 242 .remount_fs = ext2_remount,
220 .clear_inode = ext2_clear_inode, 243 .clear_inode = ext2_clear_inode,
244 .show_options = ext2_show_options,
221#ifdef CONFIG_QUOTA 245#ifdef CONFIG_QUOTA
222 .quota_read = ext2_quota_read, 246 .quota_read = ext2_quota_read,
223 .quota_write = ext2_quota_write, 247 .quota_write = ext2_quota_write,
@@ -256,10 +280,11 @@ static unsigned long get_sb_block(void **data)
256 280
257enum { 281enum {
258 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, 282 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
259 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 283 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic,
260 Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_nobh, 284 Opt_err_ro, Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug,
261 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_xip, 285 Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr,
262 Opt_ignore, Opt_err, 286 Opt_acl, Opt_noacl, Opt_xip, Opt_ignore, Opt_err, Opt_quota,
287 Opt_usrquota, Opt_grpquota
263}; 288};
264 289
265static match_table_t tokens = { 290static match_table_t tokens = {
@@ -288,10 +313,10 @@ static match_table_t tokens = {
288 {Opt_acl, "acl"}, 313 {Opt_acl, "acl"},
289 {Opt_noacl, "noacl"}, 314 {Opt_noacl, "noacl"},
290 {Opt_xip, "xip"}, 315 {Opt_xip, "xip"},
291 {Opt_ignore, "grpquota"}, 316 {Opt_grpquota, "grpquota"},
292 {Opt_ignore, "noquota"}, 317 {Opt_ignore, "noquota"},
293 {Opt_ignore, "quota"}, 318 {Opt_quota, "quota"},
294 {Opt_ignore, "usrquota"}, 319 {Opt_usrquota, "usrquota"},
295 {Opt_err, NULL} 320 {Opt_err, NULL}
296}; 321};
297 322
@@ -406,6 +431,26 @@ static int parse_options (char * options,
406 printk("EXT2 xip option not supported\n"); 431 printk("EXT2 xip option not supported\n");
407#endif 432#endif
408 break; 433 break;
434
435#if defined(CONFIG_QUOTA)
436 case Opt_quota:
437 case Opt_usrquota:
438 set_opt(sbi->s_mount_opt, USRQUOTA);
439 break;
440
441 case Opt_grpquota:
442 set_opt(sbi->s_mount_opt, GRPQUOTA);
443 break;
444#else
445 case Opt_quota:
446 case Opt_usrquota:
447 case Opt_grpquota:
448 printk(KERN_ERR
449 "EXT2-fs: quota operations not supported.\n");
450
451 break;
452#endif
453
409 case Opt_ignore: 454 case Opt_ignore:
410 break; 455 break;
411 default: 456 default:
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 3c3c6e399fb3..a93c3609025d 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -35,6 +35,7 @@
35#include <linux/mount.h> 35#include <linux/mount.h>
36#include <linux/namei.h> 36#include <linux/namei.h>
37#include <linux/quotaops.h> 37#include <linux/quotaops.h>
38#include <linux/seq_file.h>
38#include <asm/uaccess.h> 39#include <asm/uaccess.h>
39#include "xattr.h" 40#include "xattr.h"
40#include "acl.h" 41#include "acl.h"
@@ -509,8 +510,41 @@ static void ext3_clear_inode(struct inode *inode)
509 kfree(rsv); 510 kfree(rsv);
510} 511}
511 512
512#ifdef CONFIG_QUOTA 513static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
514{
515 struct ext3_sb_info *sbi = EXT3_SB(vfs->mnt_sb);
516
517 if (sbi->s_mount_opt & EXT3_MOUNT_JOURNAL_DATA)
518 seq_puts(seq, ",data=journal");
519
520 if (sbi->s_mount_opt & EXT3_MOUNT_ORDERED_DATA)
521 seq_puts(seq, ",data=ordered");
522
523 if (sbi->s_mount_opt & EXT3_MOUNT_WRITEBACK_DATA)
524 seq_puts(seq, ",data=writeback");
525
526#if defined(CONFIG_QUOTA)
527 if (sbi->s_jquota_fmt)
528 seq_printf(seq, ",jqfmt=%s",
529 (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0");
530
531 if (sbi->s_qf_names[USRQUOTA])
532 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
533
534 if (sbi->s_qf_names[GRPQUOTA])
535 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
513 536
537 if (sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA)
538 seq_puts(seq, ",usrquota");
539
540 if (sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)
541 seq_puts(seq, ",grpquota");
542#endif
543
544 return 0;
545}
546
547#ifdef CONFIG_QUOTA
514#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") 548#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group")
515#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) 549#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
516 550
@@ -569,6 +603,7 @@ static struct super_operations ext3_sops = {
569 .statfs = ext3_statfs, 603 .statfs = ext3_statfs,
570 .remount_fs = ext3_remount, 604 .remount_fs = ext3_remount,
571 .clear_inode = ext3_clear_inode, 605 .clear_inode = ext3_clear_inode,
606 .show_options = ext3_show_options,
572#ifdef CONFIG_QUOTA 607#ifdef CONFIG_QUOTA
573 .quota_read = ext3_quota_read, 608 .quota_read = ext3_quota_read,
574 .quota_write = ext3_quota_write, 609 .quota_write = ext3_quota_write,
@@ -590,7 +625,8 @@ enum {
590 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 625 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
591 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 626 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
592 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, 627 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
593 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, 628 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
629 Opt_grpquota
594}; 630};
595 631
596static match_table_t tokens = { 632static match_table_t tokens = {
@@ -634,10 +670,10 @@ static match_table_t tokens = {
634 {Opt_grpjquota, "grpjquota=%s"}, 670 {Opt_grpjquota, "grpjquota=%s"},
635 {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, 671 {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
636 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, 672 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
637 {Opt_quota, "grpquota"}, 673 {Opt_grpquota, "grpquota"},
638 {Opt_noquota, "noquota"}, 674 {Opt_noquota, "noquota"},
639 {Opt_quota, "quota"}, 675 {Opt_quota, "quota"},
640 {Opt_quota, "usrquota"}, 676 {Opt_usrquota, "usrquota"},
641 {Opt_barrier, "barrier=%u"}, 677 {Opt_barrier, "barrier=%u"},
642 {Opt_err, NULL}, 678 {Opt_err, NULL},
643 {Opt_resize, "resize"}, 679 {Opt_resize, "resize"},
@@ -903,7 +939,13 @@ clear_qf_name:
903 sbi->s_jquota_fmt = QFMT_VFS_V0; 939 sbi->s_jquota_fmt = QFMT_VFS_V0;
904 break; 940 break;
905 case Opt_quota: 941 case Opt_quota:
942 case Opt_usrquota:
906 set_opt(sbi->s_mount_opt, QUOTA); 943 set_opt(sbi->s_mount_opt, QUOTA);
944 set_opt(sbi->s_mount_opt, USRQUOTA);
945 break;
946 case Opt_grpquota:
947 set_opt(sbi->s_mount_opt, QUOTA);
948 set_opt(sbi->s_mount_opt, GRPQUOTA);
907 break; 949 break;
908 case Opt_noquota: 950 case Opt_noquota:
909 if (sb_any_quota_enabled(sb)) { 951 if (sb_any_quota_enabled(sb)) {
@@ -912,8 +954,13 @@ clear_qf_name:
912 return 0; 954 return 0;
913 } 955 }
914 clear_opt(sbi->s_mount_opt, QUOTA); 956 clear_opt(sbi->s_mount_opt, QUOTA);
957 clear_opt(sbi->s_mount_opt, USRQUOTA);
958 clear_opt(sbi->s_mount_opt, GRPQUOTA);
915 break; 959 break;
916#else 960#else
961 case Opt_quota:
962 case Opt_usrquota:
963 case Opt_grpquota:
917 case Opt_usrjquota: 964 case Opt_usrjquota:
918 case Opt_grpjquota: 965 case Opt_grpjquota:
919 case Opt_offusrjquota: 966 case Opt_offusrjquota:
@@ -924,7 +971,6 @@ clear_qf_name:
924 "EXT3-fs: journalled quota options not " 971 "EXT3-fs: journalled quota options not "
925 "supported.\n"); 972 "supported.\n");
926 break; 973 break;
927 case Opt_quota:
928 case Opt_noquota: 974 case Opt_noquota:
929 break; 975 break;
930#endif 976#endif
@@ -962,14 +1008,38 @@ clear_qf_name:
962 } 1008 }
963 } 1009 }
964#ifdef CONFIG_QUOTA 1010#ifdef CONFIG_QUOTA
965 if (!sbi->s_jquota_fmt && (sbi->s_qf_names[USRQUOTA] || 1011 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
966 sbi->s_qf_names[GRPQUOTA])) { 1012 if ((sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA) &&
967 printk(KERN_ERR 1013 sbi->s_qf_names[USRQUOTA])
968 "EXT3-fs: journalled quota format not specified.\n"); 1014 clear_opt(sbi->s_mount_opt, USRQUOTA);
969 return 0; 1015
1016 if ((sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA) &&
1017 sbi->s_qf_names[GRPQUOTA])
1018 clear_opt(sbi->s_mount_opt, GRPQUOTA);
1019
1020 if ((sbi->s_qf_names[USRQUOTA] &&
1021 (sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)) ||
1022 (sbi->s_qf_names[GRPQUOTA] &&
1023 (sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA))) {
1024 printk(KERN_ERR "EXT3-fs: old and new quota "
1025 "format mixing.\n");
1026 return 0;
1027 }
1028
1029 if (!sbi->s_jquota_fmt) {
1030 printk(KERN_ERR "EXT3-fs: journalled quota format "
1031 "not specified.\n");
1032 return 0;
1033 }
1034 } else {
1035 if (sbi->s_jquota_fmt) {
1036 printk(KERN_ERR "EXT3-fs: journalled quota format "
1037 "specified with no journalling "
1038 "enabled.\n");
1039 return 0;
1040 }
970 } 1041 }
971#endif 1042#endif
972
973 return 1; 1043 return 1;
974} 1044}
975 1045
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index e5ae1b720dde..895049b2ac9c 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -30,6 +30,29 @@ static inline loff_t fat_make_i_pos(struct super_block *sb,
30 | (de - (struct msdos_dir_entry *)bh->b_data); 30 | (de - (struct msdos_dir_entry *)bh->b_data);
31} 31}
32 32
33static inline void fat_dir_readahead(struct inode *dir, sector_t iblock,
34 sector_t phys)
35{
36 struct super_block *sb = dir->i_sb;
37 struct msdos_sb_info *sbi = MSDOS_SB(sb);
38 struct buffer_head *bh;
39 int sec;
40
41 /* This is not a first sector of cluster, or sec_per_clus == 1 */
42 if ((iblock & (sbi->sec_per_clus - 1)) || sbi->sec_per_clus == 1)
43 return;
44 /* root dir of FAT12/FAT16 */
45 if ((sbi->fat_bits != 32) && (dir->i_ino == MSDOS_ROOT_INO))
46 return;
47
48 bh = sb_getblk(sb, phys);
49 if (bh && !buffer_uptodate(bh)) {
50 for (sec = 0; sec < sbi->sec_per_clus; sec++)
51 sb_breadahead(sb, phys + sec);
52 }
53 brelse(bh);
54}
55
33/* Returns the inode number of the directory entry at offset pos. If bh is 56/* Returns the inode number of the directory entry at offset pos. If bh is
34 non-NULL, it is brelse'd before. Pos is incremented. The buffer header is 57 non-NULL, it is brelse'd before. Pos is incremented. The buffer header is
35 returned in bh. 58 returned in bh.
@@ -58,6 +81,8 @@ next:
58 if (err || !phys) 81 if (err || !phys)
59 return -1; /* beyond EOF or error */ 82 return -1; /* beyond EOF or error */
60 83
84 fat_dir_readahead(dir, iblock, phys);
85
61 *bh = sb_bread(sb, phys); 86 *bh = sb_bread(sb, phys);
62 if (*bh == NULL) { 87 if (*bh == NULL) {
63 printk(KERN_ERR "FAT: Directory bread(block %llu) failed\n", 88 printk(KERN_ERR "FAT: Directory bread(block %llu) failed\n",
@@ -635,8 +660,7 @@ RecEnd:
635EODir: 660EODir:
636 filp->f_pos = cpos; 661 filp->f_pos = cpos;
637FillFailed: 662FillFailed:
638 if (bh) 663 brelse(bh);
639 brelse(bh);
640 if (unicode) 664 if (unicode)
641 free_page((unsigned long)unicode); 665 free_page((unsigned long)unicode);
642out: 666out:
diff --git a/fs/file_table.c b/fs/file_table.c
index 1d3de78e6bc9..43e9e1737de2 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -89,7 +89,6 @@ struct file *get_empty_filp(void)
89 rwlock_init(&f->f_owner.lock); 89 rwlock_init(&f->f_owner.lock);
90 /* f->f_version: 0 */ 90 /* f->f_version: 0 */
91 INIT_LIST_HEAD(&f->f_list); 91 INIT_LIST_HEAD(&f->f_list);
92 f->f_maxcount = INT_MAX;
93 return f; 92 return f;
94 93
95over: 94over:
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index 27f66d3e8a04..6aa6fbe4f8ee 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -155,7 +155,7 @@ static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent)
155 155
156 sbp->s_flags |= MS_RDONLY; 156 sbp->s_flags |= MS_RDONLY;
157 157
158 infp = kcalloc(1, sizeof(*infp), GFP_KERNEL); 158 infp = kzalloc(sizeof(*infp), GFP_KERNEL);
159 if (!infp) { 159 if (!infp) {
160 printk(KERN_WARNING "vxfs: unable to allocate incore superblock\n"); 160 printk(KERN_WARNING "vxfs: unable to allocate incore superblock\n");
161 return -ENOMEM; 161 return -ENOMEM;
diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c
index a096c5a56664..3d5cdc6847c0 100644
--- a/fs/hfs/bnode.c
+++ b/fs/hfs/bnode.c
@@ -13,8 +13,6 @@
13 13
14#include "btree.h" 14#include "btree.h"
15 15
16#define REF_PAGES 0
17
18void hfs_bnode_read(struct hfs_bnode *node, void *buf, 16void hfs_bnode_read(struct hfs_bnode *node, void *buf,
19 int off, int len) 17 int off, int len)
20{ 18{
@@ -289,9 +287,7 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
289 page_cache_release(page); 287 page_cache_release(page);
290 goto fail; 288 goto fail;
291 } 289 }
292#if !REF_PAGES
293 page_cache_release(page); 290 page_cache_release(page);
294#endif
295 node->page[i] = page; 291 node->page[i] = page;
296 } 292 }
297 293
@@ -449,13 +445,6 @@ void hfs_bnode_get(struct hfs_bnode *node)
449{ 445{
450 if (node) { 446 if (node) {
451 atomic_inc(&node->refcnt); 447 atomic_inc(&node->refcnt);
452#if REF_PAGES
453 {
454 int i;
455 for (i = 0; i < node->tree->pages_per_bnode; i++)
456 get_page(node->page[i]);
457 }
458#endif
459 dprint(DBG_BNODE_REFS, "get_node(%d:%d): %d\n", 448 dprint(DBG_BNODE_REFS, "get_node(%d:%d): %d\n",
460 node->tree->cnid, node->this, atomic_read(&node->refcnt)); 449 node->tree->cnid, node->this, atomic_read(&node->refcnt));
461 } 450 }
@@ -472,20 +461,12 @@ void hfs_bnode_put(struct hfs_bnode *node)
472 node->tree->cnid, node->this, atomic_read(&node->refcnt)); 461 node->tree->cnid, node->this, atomic_read(&node->refcnt));
473 if (!atomic_read(&node->refcnt)) 462 if (!atomic_read(&node->refcnt))
474 BUG(); 463 BUG();
475 if (!atomic_dec_and_lock(&node->refcnt, &tree->hash_lock)) { 464 if (!atomic_dec_and_lock(&node->refcnt, &tree->hash_lock))
476#if REF_PAGES
477 for (i = 0; i < tree->pages_per_bnode; i++)
478 put_page(node->page[i]);
479#endif
480 return; 465 return;
481 }
482 for (i = 0; i < tree->pages_per_bnode; i++) { 466 for (i = 0; i < tree->pages_per_bnode; i++) {
483 if (!node->page[i]) 467 if (!node->page[i])
484 continue; 468 continue;
485 mark_page_accessed(node->page[i]); 469 mark_page_accessed(node->page[i]);
486#if REF_PAGES
487 put_page(node->page[i]);
488#endif
489 } 470 }
490 471
491 if (test_bit(HFS_BNODE_DELETED, &node->flags)) { 472 if (test_bit(HFS_BNODE_DELETED, &node->flags)) {
diff --git a/fs/hfs/catalog.c b/fs/hfs/catalog.c
index 65dedefcabfc..2fcd679f0238 100644
--- a/fs/hfs/catalog.c
+++ b/fs/hfs/catalog.c
@@ -20,12 +20,12 @@
20 * 20 *
21 * Given the ID of the parent and the name build a search key. 21 * Given the ID of the parent and the name build a search key.
22 */ 22 */
23void hfs_cat_build_key(btree_key *key, u32 parent, struct qstr *name) 23void hfs_cat_build_key(struct super_block *sb, btree_key *key, u32 parent, struct qstr *name)
24{ 24{
25 key->cat.reserved = 0; 25 key->cat.reserved = 0;
26 key->cat.ParID = cpu_to_be32(parent); 26 key->cat.ParID = cpu_to_be32(parent);
27 if (name) { 27 if (name) {
28 hfs_triv2mac(&key->cat.CName, name); 28 hfs_asc2mac(sb, &key->cat.CName, name);
29 key->key_len = 6 + key->cat.CName.len; 29 key->key_len = 6 + key->cat.CName.len;
30 } else { 30 } else {
31 memset(&key->cat.CName, 0, sizeof(struct hfs_name)); 31 memset(&key->cat.CName, 0, sizeof(struct hfs_name));
@@ -62,13 +62,14 @@ static int hfs_cat_build_record(hfs_cat_rec *rec, u32 cnid, struct inode *inode)
62 } 62 }
63} 63}
64 64
65static int hfs_cat_build_thread(hfs_cat_rec *rec, int type, 65static int hfs_cat_build_thread(struct super_block *sb,
66 hfs_cat_rec *rec, int type,
66 u32 parentid, struct qstr *name) 67 u32 parentid, struct qstr *name)
67{ 68{
68 rec->type = type; 69 rec->type = type;
69 memset(rec->thread.reserved, 0, sizeof(rec->thread.reserved)); 70 memset(rec->thread.reserved, 0, sizeof(rec->thread.reserved));
70 rec->thread.ParID = cpu_to_be32(parentid); 71 rec->thread.ParID = cpu_to_be32(parentid);
71 hfs_triv2mac(&rec->thread.CName, name); 72 hfs_asc2mac(sb, &rec->thread.CName, name);
72 return sizeof(struct hfs_cat_thread); 73 return sizeof(struct hfs_cat_thread);
73} 74}
74 75
@@ -93,8 +94,8 @@ int hfs_cat_create(u32 cnid, struct inode *dir, struct qstr *str, struct inode *
93 sb = dir->i_sb; 94 sb = dir->i_sb;
94 hfs_find_init(HFS_SB(sb)->cat_tree, &fd); 95 hfs_find_init(HFS_SB(sb)->cat_tree, &fd);
95 96
96 hfs_cat_build_key(fd.search_key, cnid, NULL); 97 hfs_cat_build_key(sb, fd.search_key, cnid, NULL);
97 entry_size = hfs_cat_build_thread(&entry, S_ISDIR(inode->i_mode) ? 98 entry_size = hfs_cat_build_thread(sb, &entry, S_ISDIR(inode->i_mode) ?
98 HFS_CDR_THD : HFS_CDR_FTH, 99 HFS_CDR_THD : HFS_CDR_FTH,
99 dir->i_ino, str); 100 dir->i_ino, str);
100 err = hfs_brec_find(&fd); 101 err = hfs_brec_find(&fd);
@@ -107,7 +108,7 @@ int hfs_cat_create(u32 cnid, struct inode *dir, struct qstr *str, struct inode *
107 if (err) 108 if (err)
108 goto err2; 109 goto err2;
109 110
110 hfs_cat_build_key(fd.search_key, dir->i_ino, str); 111 hfs_cat_build_key(sb, fd.search_key, dir->i_ino, str);
111 entry_size = hfs_cat_build_record(&entry, cnid, inode); 112 entry_size = hfs_cat_build_record(&entry, cnid, inode);
112 err = hfs_brec_find(&fd); 113 err = hfs_brec_find(&fd);
113 if (err != -ENOENT) { 114 if (err != -ENOENT) {
@@ -127,7 +128,7 @@ int hfs_cat_create(u32 cnid, struct inode *dir, struct qstr *str, struct inode *
127 return 0; 128 return 0;
128 129
129err1: 130err1:
130 hfs_cat_build_key(fd.search_key, cnid, NULL); 131 hfs_cat_build_key(sb, fd.search_key, cnid, NULL);
131 if (!hfs_brec_find(&fd)) 132 if (!hfs_brec_find(&fd))
132 hfs_brec_remove(&fd); 133 hfs_brec_remove(&fd);
133err2: 134err2:
@@ -176,7 +177,7 @@ int hfs_cat_find_brec(struct super_block *sb, u32 cnid,
176 hfs_cat_rec rec; 177 hfs_cat_rec rec;
177 int res, len, type; 178 int res, len, type;
178 179
179 hfs_cat_build_key(fd->search_key, cnid, NULL); 180 hfs_cat_build_key(sb, fd->search_key, cnid, NULL);
180 res = hfs_brec_read(fd, &rec, sizeof(rec)); 181 res = hfs_brec_read(fd, &rec, sizeof(rec));
181 if (res) 182 if (res)
182 return res; 183 return res;
@@ -211,7 +212,7 @@ int hfs_cat_delete(u32 cnid, struct inode *dir, struct qstr *str)
211 sb = dir->i_sb; 212 sb = dir->i_sb;
212 hfs_find_init(HFS_SB(sb)->cat_tree, &fd); 213 hfs_find_init(HFS_SB(sb)->cat_tree, &fd);
213 214
214 hfs_cat_build_key(fd.search_key, dir->i_ino, str); 215 hfs_cat_build_key(sb, fd.search_key, dir->i_ino, str);
215 res = hfs_brec_find(&fd); 216 res = hfs_brec_find(&fd);
216 if (res) 217 if (res)
217 goto out; 218 goto out;
@@ -239,7 +240,7 @@ int hfs_cat_delete(u32 cnid, struct inode *dir, struct qstr *str)
239 if (res) 240 if (res)
240 goto out; 241 goto out;
241 242
242 hfs_cat_build_key(fd.search_key, cnid, NULL); 243 hfs_cat_build_key(sb, fd.search_key, cnid, NULL);
243 res = hfs_brec_find(&fd); 244 res = hfs_brec_find(&fd);
244 if (!res) { 245 if (!res) {
245 res = hfs_brec_remove(&fd); 246 res = hfs_brec_remove(&fd);
@@ -280,7 +281,7 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, struct qstr *src_name,
280 dst_fd = src_fd; 281 dst_fd = src_fd;
281 282
282 /* find the old dir entry and read the data */ 283 /* find the old dir entry and read the data */
283 hfs_cat_build_key(src_fd.search_key, src_dir->i_ino, src_name); 284 hfs_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name);
284 err = hfs_brec_find(&src_fd); 285 err = hfs_brec_find(&src_fd);
285 if (err) 286 if (err)
286 goto out; 287 goto out;
@@ -289,7 +290,7 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, struct qstr *src_name,
289 src_fd.entrylength); 290 src_fd.entrylength);
290 291
291 /* create new dir entry with the data from the old entry */ 292 /* create new dir entry with the data from the old entry */
292 hfs_cat_build_key(dst_fd.search_key, dst_dir->i_ino, dst_name); 293 hfs_cat_build_key(sb, dst_fd.search_key, dst_dir->i_ino, dst_name);
293 err = hfs_brec_find(&dst_fd); 294 err = hfs_brec_find(&dst_fd);
294 if (err != -ENOENT) { 295 if (err != -ENOENT) {
295 if (!err) 296 if (!err)
@@ -305,7 +306,7 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, struct qstr *src_name,
305 mark_inode_dirty(dst_dir); 306 mark_inode_dirty(dst_dir);
306 307
307 /* finally remove the old entry */ 308 /* finally remove the old entry */
308 hfs_cat_build_key(src_fd.search_key, src_dir->i_ino, src_name); 309 hfs_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name);
309 err = hfs_brec_find(&src_fd); 310 err = hfs_brec_find(&src_fd);
310 if (err) 311 if (err)
311 goto out; 312 goto out;
@@ -321,7 +322,7 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, struct qstr *src_name,
321 goto out; 322 goto out;
322 323
323 /* remove old thread entry */ 324 /* remove old thread entry */
324 hfs_cat_build_key(src_fd.search_key, cnid, NULL); 325 hfs_cat_build_key(sb, src_fd.search_key, cnid, NULL);
325 err = hfs_brec_find(&src_fd); 326 err = hfs_brec_find(&src_fd);
326 if (err) 327 if (err)
327 goto out; 328 goto out;
@@ -330,8 +331,8 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, struct qstr *src_name,
330 goto out; 331 goto out;
331 332
332 /* create new thread entry */ 333 /* create new thread entry */
333 hfs_cat_build_key(dst_fd.search_key, cnid, NULL); 334 hfs_cat_build_key(sb, dst_fd.search_key, cnid, NULL);
334 entry_size = hfs_cat_build_thread(&entry, type == HFS_CDR_FIL ? HFS_CDR_FTH : HFS_CDR_THD, 335 entry_size = hfs_cat_build_thread(sb, &entry, type == HFS_CDR_FIL ? HFS_CDR_FTH : HFS_CDR_THD,
335 dst_dir->i_ino, dst_name); 336 dst_dir->i_ino, dst_name);
336 err = hfs_brec_find(&dst_fd); 337 err = hfs_brec_find(&dst_fd);
337 if (err != -ENOENT) { 338 if (err != -ENOENT) {
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index c55998262aed..e1f24befba58 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -28,7 +28,7 @@ static struct dentry *hfs_lookup(struct inode *dir, struct dentry *dentry,
28 dentry->d_op = &hfs_dentry_operations; 28 dentry->d_op = &hfs_dentry_operations;
29 29
30 hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd); 30 hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd);
31 hfs_cat_build_key(fd.search_key, dir->i_ino, &dentry->d_name); 31 hfs_cat_build_key(dir->i_sb, fd.search_key, dir->i_ino, &dentry->d_name);
32 res = hfs_brec_read(&fd, &rec, sizeof(rec)); 32 res = hfs_brec_read(&fd, &rec, sizeof(rec));
33 if (res) { 33 if (res) {
34 hfs_find_exit(&fd); 34 hfs_find_exit(&fd);
@@ -56,7 +56,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
56 struct inode *inode = filp->f_dentry->d_inode; 56 struct inode *inode = filp->f_dentry->d_inode;
57 struct super_block *sb = inode->i_sb; 57 struct super_block *sb = inode->i_sb;
58 int len, err; 58 int len, err;
59 char strbuf[HFS_NAMELEN + 1]; 59 char strbuf[HFS_MAX_NAMELEN];
60 union hfs_cat_rec entry; 60 union hfs_cat_rec entry;
61 struct hfs_find_data fd; 61 struct hfs_find_data fd;
62 struct hfs_readdir_data *rd; 62 struct hfs_readdir_data *rd;
@@ -66,7 +66,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
66 return 0; 66 return 0;
67 67
68 hfs_find_init(HFS_SB(sb)->cat_tree, &fd); 68 hfs_find_init(HFS_SB(sb)->cat_tree, &fd);
69 hfs_cat_build_key(fd.search_key, inode->i_ino, NULL); 69 hfs_cat_build_key(sb, fd.search_key, inode->i_ino, NULL);
70 err = hfs_brec_find(&fd); 70 err = hfs_brec_find(&fd);
71 if (err) 71 if (err)
72 goto out; 72 goto out;
@@ -111,7 +111,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
111 } 111 }
112 hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength); 112 hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength);
113 type = entry.type; 113 type = entry.type;
114 len = hfs_mac2triv(strbuf, &fd.key->cat.CName); 114 len = hfs_mac2asc(sb, strbuf, &fd.key->cat.CName);
115 if (type == HFS_CDR_DIR) { 115 if (type == HFS_CDR_DIR) {
116 if (fd.entrylength < sizeof(struct hfs_cat_dir)) { 116 if (fd.entrylength < sizeof(struct hfs_cat_dir)) {
117 printk("HFS: small dir entry\n"); 117 printk("HFS: small dir entry\n");
@@ -307,7 +307,8 @@ static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry,
307 old_dir, &old_dentry->d_name, 307 old_dir, &old_dentry->d_name,
308 new_dir, &new_dentry->d_name); 308 new_dir, &new_dentry->d_name);
309 if (!res) 309 if (!res)
310 hfs_cat_build_key((btree_key *)&HFS_I(old_dentry->d_inode)->cat_key, 310 hfs_cat_build_key(old_dir->i_sb,
311 (btree_key *)&HFS_I(old_dentry->d_inode)->cat_key,
311 new_dir->i_ino, &new_dentry->d_name); 312 new_dir->i_ino, &new_dentry->d_name);
312 return res; 313 return res;
313} 314}
diff --git a/fs/hfs/hfs.h b/fs/hfs/hfs.h
index df6b33adee3b..88099ab1a180 100644
--- a/fs/hfs/hfs.h
+++ b/fs/hfs/hfs.h
@@ -25,6 +25,7 @@
25#define HFS_SECTOR_SIZE 512 /* size of an HFS sector */ 25#define HFS_SECTOR_SIZE 512 /* size of an HFS sector */
26#define HFS_SECTOR_SIZE_BITS 9 /* log_2(HFS_SECTOR_SIZE) */ 26#define HFS_SECTOR_SIZE_BITS 9 /* log_2(HFS_SECTOR_SIZE) */
27#define HFS_NAMELEN 31 /* maximum length of an HFS filename */ 27#define HFS_NAMELEN 31 /* maximum length of an HFS filename */
28#define HFS_MAX_NAMELEN 128
28#define HFS_MAX_VALENCE 32767U 29#define HFS_MAX_VALENCE 32767U
29 30
30/* Meanings of the drAtrb field of the MDB, 31/* Meanings of the drAtrb field of the MDB,
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 0dc8ef8e14de..aae019aadf88 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -141,6 +141,8 @@ struct hfs_sb_info {
141 141
142 int session, part; 142 int session, part;
143 143
144 struct nls_table *nls_io, *nls_disk;
145
144 struct semaphore bitmap_lock; 146 struct semaphore bitmap_lock;
145 147
146 unsigned long flags; 148 unsigned long flags;
@@ -168,7 +170,7 @@ extern int hfs_cat_create(u32, struct inode *, struct qstr *, struct inode *);
168extern int hfs_cat_delete(u32, struct inode *, struct qstr *); 170extern int hfs_cat_delete(u32, struct inode *, struct qstr *);
169extern int hfs_cat_move(u32, struct inode *, struct qstr *, 171extern int hfs_cat_move(u32, struct inode *, struct qstr *,
170 struct inode *, struct qstr *); 172 struct inode *, struct qstr *);
171extern void hfs_cat_build_key(btree_key *, u32, struct qstr *); 173extern void hfs_cat_build_key(struct super_block *, btree_key *, u32, struct qstr *);
172 174
173/* dir.c */ 175/* dir.c */
174extern struct file_operations hfs_dir_operations; 176extern struct file_operations hfs_dir_operations;
@@ -222,8 +224,8 @@ extern int hfs_strcmp(const unsigned char *, unsigned int,
222extern int hfs_compare_dentry(struct dentry *, struct qstr *, struct qstr *); 224extern int hfs_compare_dentry(struct dentry *, struct qstr *, struct qstr *);
223 225
224/* trans.c */ 226/* trans.c */
225extern void hfs_triv2mac(struct hfs_name *, struct qstr *); 227extern void hfs_asc2mac(struct super_block *, struct hfs_name *, struct qstr *);
226extern int hfs_mac2triv(char *, const struct hfs_name *); 228extern int hfs_mac2asc(struct super_block *, char *, const struct hfs_name *);
227 229
228extern struct timezone sys_tz; 230extern struct timezone sys_tz;
229 231
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 751912326094..f1570b9f9de3 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -160,7 +160,7 @@ struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, int mode)
160 160
161 init_MUTEX(&HFS_I(inode)->extents_lock); 161 init_MUTEX(&HFS_I(inode)->extents_lock);
162 INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list); 162 INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list);
163 hfs_cat_build_key((btree_key *)&HFS_I(inode)->cat_key, dir->i_ino, name); 163 hfs_cat_build_key(sb, (btree_key *)&HFS_I(inode)->cat_key, dir->i_ino, name);
164 inode->i_ino = HFS_SB(sb)->next_id++; 164 inode->i_ino = HFS_SB(sb)->next_id++;
165 inode->i_mode = mode; 165 inode->i_mode = mode;
166 inode->i_uid = current->fsuid; 166 inode->i_uid = current->fsuid;
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index 217e32f37e0b..0a473f79c89f 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -10,6 +10,7 @@
10 10
11#include <linux/cdrom.h> 11#include <linux/cdrom.h>
12#include <linux/genhd.h> 12#include <linux/genhd.h>
13#include <linux/nls.h>
13 14
14#include "hfs_fs.h" 15#include "hfs_fs.h"
15#include "btree.h" 16#include "btree.h"
@@ -343,6 +344,11 @@ void hfs_mdb_put(struct super_block *sb)
343 brelse(HFS_SB(sb)->mdb_bh); 344 brelse(HFS_SB(sb)->mdb_bh);
344 brelse(HFS_SB(sb)->alt_mdb_bh); 345 brelse(HFS_SB(sb)->alt_mdb_bh);
345 346
347 if (HFS_SB(sb)->nls_io)
348 unload_nls(HFS_SB(sb)->nls_io);
349 if (HFS_SB(sb)->nls_disk)
350 unload_nls(HFS_SB(sb)->nls_disk);
351
346 kfree(HFS_SB(sb)); 352 kfree(HFS_SB(sb));
347 sb->s_fs_info = NULL; 353 sb->s_fs_info = NULL;
348} 354}
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index ab783f6afa3b..c5074aeafcae 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -15,8 +15,11 @@
15#include <linux/config.h> 15#include <linux/config.h>
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/blkdev.h> 17#include <linux/blkdev.h>
18#include <linux/mount.h>
18#include <linux/init.h> 19#include <linux/init.h>
20#include <linux/nls.h>
19#include <linux/parser.h> 21#include <linux/parser.h>
22#include <linux/seq_file.h>
20#include <linux/vfs.h> 23#include <linux/vfs.h>
21 24
22#include "hfs_fs.h" 25#include "hfs_fs.h"
@@ -111,6 +114,32 @@ static int hfs_remount(struct super_block *sb, int *flags, char *data)
111 return 0; 114 return 0;
112} 115}
113 116
117static int hfs_show_options(struct seq_file *seq, struct vfsmount *mnt)
118{
119 struct hfs_sb_info *sbi = HFS_SB(mnt->mnt_sb);
120
121 if (sbi->s_creator != cpu_to_be32(0x3f3f3f3f))
122 seq_printf(seq, ",creator=%.4s", (char *)&sbi->s_creator);
123 if (sbi->s_type != cpu_to_be32(0x3f3f3f3f))
124 seq_printf(seq, ",type=%.4s", (char *)&sbi->s_type);
125 seq_printf(seq, ",uid=%u,gid=%u", sbi->s_uid, sbi->s_gid);
126 if (sbi->s_file_umask != 0133)
127 seq_printf(seq, ",file_umask=%o", sbi->s_file_umask);
128 if (sbi->s_dir_umask != 0022)
129 seq_printf(seq, ",dir_umask=%o", sbi->s_dir_umask);
130 if (sbi->part >= 0)
131 seq_printf(seq, ",part=%u", sbi->part);
132 if (sbi->session >= 0)
133 seq_printf(seq, ",session=%u", sbi->session);
134 if (sbi->nls_disk)
135 seq_printf(seq, ",codepage=%s", sbi->nls_disk->charset);
136 if (sbi->nls_io)
137 seq_printf(seq, ",iocharset=%s", sbi->nls_io->charset);
138 if (sbi->s_quiet)
139 seq_printf(seq, ",quiet");
140 return 0;
141}
142
114static struct inode *hfs_alloc_inode(struct super_block *sb) 143static struct inode *hfs_alloc_inode(struct super_block *sb)
115{ 144{
116 struct hfs_inode_info *i; 145 struct hfs_inode_info *i;
@@ -133,11 +162,13 @@ static struct super_operations hfs_super_operations = {
133 .write_super = hfs_write_super, 162 .write_super = hfs_write_super,
134 .statfs = hfs_statfs, 163 .statfs = hfs_statfs,
135 .remount_fs = hfs_remount, 164 .remount_fs = hfs_remount,
165 .show_options = hfs_show_options,
136}; 166};
137 167
138enum { 168enum {
139 opt_uid, opt_gid, opt_umask, opt_file_umask, opt_dir_umask, 169 opt_uid, opt_gid, opt_umask, opt_file_umask, opt_dir_umask,
140 opt_part, opt_session, opt_type, opt_creator, opt_quiet, 170 opt_part, opt_session, opt_type, opt_creator, opt_quiet,
171 opt_codepage, opt_iocharset,
141 opt_err 172 opt_err
142}; 173};
143 174
@@ -152,6 +183,8 @@ static match_table_t tokens = {
152 { opt_type, "type=%s" }, 183 { opt_type, "type=%s" },
153 { opt_creator, "creator=%s" }, 184 { opt_creator, "creator=%s" },
154 { opt_quiet, "quiet" }, 185 { opt_quiet, "quiet" },
186 { opt_codepage, "codepage=%s" },
187 { opt_iocharset, "iocharset=%s" },
155 { opt_err, NULL } 188 { opt_err, NULL }
156}; 189};
157 190
@@ -257,11 +290,46 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
257 case opt_quiet: 290 case opt_quiet:
258 hsb->s_quiet = 1; 291 hsb->s_quiet = 1;
259 break; 292 break;
293 case opt_codepage:
294 if (hsb->nls_disk) {
295 printk("HFS+-fs: unable to change codepage\n");
296 return 0;
297 }
298 p = match_strdup(&args[0]);
299 hsb->nls_disk = load_nls(p);
300 if (!hsb->nls_disk) {
301 printk("HFS+-fs: unable to load codepage \"%s\"\n", p);
302 kfree(p);
303 return 0;
304 }
305 kfree(p);
306 break;
307 case opt_iocharset:
308 if (hsb->nls_io) {
309 printk("HFS: unable to change iocharset\n");
310 return 0;
311 }
312 p = match_strdup(&args[0]);
313 hsb->nls_io = load_nls(p);
314 if (!hsb->nls_io) {
315 printk("HFS: unable to load iocharset \"%s\"\n", p);
316 kfree(p);
317 return 0;
318 }
319 kfree(p);
320 break;
260 default: 321 default:
261 return 0; 322 return 0;
262 } 323 }
263 } 324 }
264 325
326 if (hsb->nls_disk && !hsb->nls_io) {
327 hsb->nls_io = load_nls_default();
328 if (!hsb->nls_io) {
329 printk("HFS: unable to load default iocharset\n");
330 return 0;
331 }
332 }
265 hsb->s_dir_umask &= 0777; 333 hsb->s_dir_umask &= 0777;
266 hsb->s_file_umask &= 0577; 334 hsb->s_file_umask &= 0577;
267 335
diff --git a/fs/hfs/trans.c b/fs/hfs/trans.c
index fb9720abbadd..e673a88b8ae7 100644
--- a/fs/hfs/trans.c
+++ b/fs/hfs/trans.c
@@ -9,12 +9,15 @@
9 * with ':' vs. '/' as the path-element separator. 9 * with ':' vs. '/' as the path-element separator.
10 */ 10 */
11 11
12#include <linux/types.h>
13#include <linux/nls.h>
14
12#include "hfs_fs.h" 15#include "hfs_fs.h"
13 16
14/*================ Global functions ================*/ 17/*================ Global functions ================*/
15 18
16/* 19/*
17 * hfs_mac2triv() 20 * hfs_mac2asc()
18 * 21 *
19 * Given a 'Pascal String' (a string preceded by a length byte) in 22 * Given a 'Pascal String' (a string preceded by a length byte) in
20 * the Macintosh character set produce the corresponding filename using 23 * the Macintosh character set produce the corresponding filename using
@@ -27,23 +30,58 @@
27 * by ':' which never appears in HFS filenames. All other characters 30 * by ':' which never appears in HFS filenames. All other characters
28 * are passed unchanged from input to output. 31 * are passed unchanged from input to output.
29 */ 32 */
30int hfs_mac2triv(char *out, const struct hfs_name *in) 33int hfs_mac2asc(struct super_block *sb, char *out, const struct hfs_name *in)
31{ 34{
32 const char *p; 35 struct nls_table *nls_disk = HFS_SB(sb)->nls_disk;
33 char c; 36 struct nls_table *nls_io = HFS_SB(sb)->nls_io;
34 int i, len; 37 const char *src;
38 char *dst;
39 int srclen, dstlen, size;
40
41 src = in->name;
42 srclen = in->len;
43 dst = out;
44 dstlen = HFS_MAX_NAMELEN;
45 if (nls_io) {
46 wchar_t ch;
35 47
36 len = in->len; 48 while (srclen > 0) {
37 p = in->name; 49 if (nls_disk) {
38 for (i = 0; i < len; i++) { 50 size = nls_disk->char2uni(src, srclen, &ch);
39 c = *p++; 51 if (size <= 0) {
40 *out++ = c == '/' ? ':' : c; 52 ch = '?';
53 size = 1;
54 }
55 src += size;
56 srclen -= size;
57 } else {
58 ch = *src++;
59 srclen--;
60 }
61 if (ch == '/')
62 ch = ':';
63 size = nls_io->uni2char(ch, dst, dstlen);
64 if (size < 0) {
65 if (size == -ENAMETOOLONG)
66 goto out;
67 *dst = '?';
68 size = 1;
69 }
70 dst += size;
71 dstlen -= size;
72 }
73 } else {
74 char ch;
75
76 while (--srclen >= 0)
77 *dst++ = (ch = *src++) == '/' ? ':' : ch;
41 } 78 }
42 return i; 79out:
80 return dst - out;
43} 81}
44 82
45/* 83/*
46 * hfs_triv2mac() 84 * hfs_asc2mac()
47 * 85 *
48 * Given an ASCII string (not null-terminated) and its length, 86 * Given an ASCII string (not null-terminated) and its length,
49 * generate the corresponding filename in the Macintosh character set 87 * generate the corresponding filename in the Macintosh character set
@@ -54,19 +92,57 @@ int hfs_mac2triv(char *out, const struct hfs_name *in)
54 * This routine is a inverse to hfs_mac2triv(). 92 * This routine is a inverse to hfs_mac2triv().
55 * A ':' is replaced by a '/'. 93 * A ':' is replaced by a '/'.
56 */ 94 */
57void hfs_triv2mac(struct hfs_name *out, struct qstr *in) 95void hfs_asc2mac(struct super_block *sb, struct hfs_name *out, struct qstr *in)
58{ 96{
97 struct nls_table *nls_disk = HFS_SB(sb)->nls_disk;
98 struct nls_table *nls_io = HFS_SB(sb)->nls_io;
59 const char *src; 99 const char *src;
60 char *dst, c; 100 char *dst;
61 int i, len; 101 int srclen, dstlen, size;
62 102
63 out->len = len = min((unsigned int)HFS_NAMELEN, in->len);
64 src = in->name; 103 src = in->name;
104 srclen = in->len;
65 dst = out->name; 105 dst = out->name;
66 for (i = 0; i < len; i++) { 106 dstlen = HFS_NAMELEN;
67 c = *src++; 107 if (nls_io) {
68 *dst++ = c == ':' ? '/' : c; 108 wchar_t ch;
109
110 while (srclen > 0) {
111 size = nls_io->char2uni(src, srclen, &ch);
112 if (size < 0) {
113 ch = '?';
114 size = 1;
115 }
116 src += size;
117 srclen -= size;
118 if (ch == ':')
119 ch = '/';
120 if (nls_disk) {
121 size = nls_disk->uni2char(ch, dst, dstlen);
122 if (size < 0) {
123 if (size == -ENAMETOOLONG)
124 goto out;
125 *dst = '?';
126 size = 1;
127 }
128 dst += size;
129 dstlen -= size;
130 } else {
131 *dst++ = ch > 0xff ? '?' : ch;
132 dstlen--;
133 }
134 }
135 } else {
136 char ch;
137
138 if (dstlen > srclen)
139 dstlen = srclen;
140 while (--dstlen >= 0)
141 *dst++ = (ch = *src++) == ':' ? '/' : ch;
69 } 142 }
70 for (; i < HFS_NAMELEN; i++) 143out:
144 out->len = dst - (char *)out->name;
145 dstlen = HFS_NAMELEN - out->len;
146 while (--dstlen >= 0)
71 *dst++ = 0; 147 *dst++ = 0;
72} 148}
diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c
index 8868d3b766fd..b85abc6e6f83 100644
--- a/fs/hfsplus/bnode.c
+++ b/fs/hfsplus/bnode.c
@@ -18,8 +18,6 @@
18#include "hfsplus_fs.h" 18#include "hfsplus_fs.h"
19#include "hfsplus_raw.h" 19#include "hfsplus_raw.h"
20 20
21#define REF_PAGES 0
22
23/* Copy a specified range of bytes from the raw data of a node */ 21/* Copy a specified range of bytes from the raw data of a node */
24void hfs_bnode_read(struct hfs_bnode *node, void *buf, int off, int len) 22void hfs_bnode_read(struct hfs_bnode *node, void *buf, int off, int len)
25{ 23{
@@ -450,9 +448,7 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
450 page_cache_release(page); 448 page_cache_release(page);
451 goto fail; 449 goto fail;
452 } 450 }
453#if !REF_PAGES
454 page_cache_release(page); 451 page_cache_release(page);
455#endif
456 node->page[i] = page; 452 node->page[i] = page;
457 } 453 }
458 454
@@ -612,13 +608,6 @@ void hfs_bnode_get(struct hfs_bnode *node)
612{ 608{
613 if (node) { 609 if (node) {
614 atomic_inc(&node->refcnt); 610 atomic_inc(&node->refcnt);
615#if REF_PAGES
616 {
617 int i;
618 for (i = 0; i < node->tree->pages_per_bnode; i++)
619 get_page(node->page[i]);
620 }
621#endif
622 dprint(DBG_BNODE_REFS, "get_node(%d:%d): %d\n", 611 dprint(DBG_BNODE_REFS, "get_node(%d:%d): %d\n",
623 node->tree->cnid, node->this, atomic_read(&node->refcnt)); 612 node->tree->cnid, node->this, atomic_read(&node->refcnt));
624 } 613 }
@@ -635,20 +624,12 @@ void hfs_bnode_put(struct hfs_bnode *node)
635 node->tree->cnid, node->this, atomic_read(&node->refcnt)); 624 node->tree->cnid, node->this, atomic_read(&node->refcnt));
636 if (!atomic_read(&node->refcnt)) 625 if (!atomic_read(&node->refcnt))
637 BUG(); 626 BUG();
638 if (!atomic_dec_and_lock(&node->refcnt, &tree->hash_lock)) { 627 if (!atomic_dec_and_lock(&node->refcnt, &tree->hash_lock))
639#if REF_PAGES
640 for (i = 0; i < tree->pages_per_bnode; i++)
641 put_page(node->page[i]);
642#endif
643 return; 628 return;
644 }
645 for (i = 0; i < tree->pages_per_bnode; i++) { 629 for (i = 0; i < tree->pages_per_bnode; i++) {
646 if (!node->page[i]) 630 if (!node->page[i])
647 continue; 631 continue;
648 mark_page_accessed(node->page[i]); 632 mark_page_accessed(node->page[i]);
649#if REF_PAGES
650 put_page(node->page[i]);
651#endif
652 } 633 }
653 634
654 if (test_bit(HFS_BNODE_DELETED, &node->flags)) { 635 if (test_bit(HFS_BNODE_DELETED, &node->flags)) {
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 533094a570df..2bc0cdd30e56 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -343,8 +343,9 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name,
343ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size); 343ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size);
344 344
345/* options.c */ 345/* options.c */
346int parse_options(char *, struct hfsplus_sb_info *); 346int hfsplus_parse_options(char *, struct hfsplus_sb_info *);
347void fill_defaults(struct hfsplus_sb_info *); 347void hfsplus_fill_defaults(struct hfsplus_sb_info *);
348int hfsplus_show_options(struct seq_file *, struct vfsmount *);
348 349
349/* tables.c */ 350/* tables.c */
350extern u16 hfsplus_case_fold_table[]; 351extern u16 hfsplus_case_fold_table[];
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c
index 1cca0102c98d..cca0818aa4ca 100644
--- a/fs/hfsplus/options.c
+++ b/fs/hfsplus/options.c
@@ -13,6 +13,8 @@
13#include <linux/sched.h> 13#include <linux/sched.h>
14#include <linux/parser.h> 14#include <linux/parser.h>
15#include <linux/nls.h> 15#include <linux/nls.h>
16#include <linux/mount.h>
17#include <linux/seq_file.h>
16#include "hfsplus_fs.h" 18#include "hfsplus_fs.h"
17 19
18enum { 20enum {
@@ -38,7 +40,7 @@ static match_table_t tokens = {
38}; 40};
39 41
40/* Initialize an options object to reasonable defaults */ 42/* Initialize an options object to reasonable defaults */
41void fill_defaults(struct hfsplus_sb_info *opts) 43void hfsplus_fill_defaults(struct hfsplus_sb_info *opts)
42{ 44{
43 if (!opts) 45 if (!opts)
44 return; 46 return;
@@ -63,7 +65,7 @@ static inline int match_fourchar(substring_t *arg, u32 *result)
63 65
64/* Parse options from mount. Returns 0 on failure */ 66/* Parse options from mount. Returns 0 on failure */
65/* input is the options passed to mount() as a string */ 67/* input is the options passed to mount() as a string */
66int parse_options(char *input, struct hfsplus_sb_info *sbi) 68int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi)
67{ 69{
68 char *p; 70 char *p;
69 substring_t args[MAX_OPT_ARGS]; 71 substring_t args[MAX_OPT_ARGS];
@@ -160,3 +162,23 @@ done:
160 162
161 return 1; 163 return 1;
162} 164}
165
166int hfsplus_show_options(struct seq_file *seq, struct vfsmount *mnt)
167{
168 struct hfsplus_sb_info *sbi = &HFSPLUS_SB(mnt->mnt_sb);
169
170 if (sbi->creator != HFSPLUS_DEF_CR_TYPE)
171 seq_printf(seq, ",creator=%.4s", (char *)&sbi->creator);
172 if (sbi->type != HFSPLUS_DEF_CR_TYPE)
173 seq_printf(seq, ",type=%.4s", (char *)&sbi->type);
174 seq_printf(seq, ",umask=%o,uid=%u,gid=%u", sbi->umask, sbi->uid, sbi->gid);
175 if (sbi->part >= 0)
176 seq_printf(seq, ",part=%u", sbi->part);
177 if (sbi->session >= 0)
178 seq_printf(seq, ",session=%u", sbi->session);
179 if (sbi->nls)
180 seq_printf(seq, ",nls=%s", sbi->nls->charset);
181 if (sbi->flags & HFSPLUS_SB_NODECOMPOSE)
182 seq_printf(seq, ",nodecompose");
183 return 0;
184}
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index d55ad67b8e42..fd0f0f050e1d 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -217,8 +217,7 @@ static void hfsplus_put_super(struct super_block *sb)
217 vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_UNMNT); 217 vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_UNMNT);
218 vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_INCNSTNT); 218 vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_INCNSTNT);
219 mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh); 219 mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh);
220 ll_rw_block(WRITE, 1, &HFSPLUS_SB(sb).s_vhbh); 220 sync_dirty_buffer(HFSPLUS_SB(sb).s_vhbh);
221 wait_on_buffer(HFSPLUS_SB(sb).s_vhbh);
222 } 221 }
223 222
224 hfs_btree_close(HFSPLUS_SB(sb).cat_tree); 223 hfs_btree_close(HFSPLUS_SB(sb).cat_tree);
@@ -277,6 +276,7 @@ static struct super_operations hfsplus_sops = {
277 .write_super = hfsplus_write_super, 276 .write_super = hfsplus_write_super,
278 .statfs = hfsplus_statfs, 277 .statfs = hfsplus_statfs,
279 .remount_fs = hfsplus_remount, 278 .remount_fs = hfsplus_remount,
279 .show_options = hfsplus_show_options,
280}; 280};
281 281
282static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) 282static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
@@ -297,8 +297,8 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
297 memset(sbi, 0, sizeof(HFSPLUS_SB(sb))); 297 memset(sbi, 0, sizeof(HFSPLUS_SB(sb)));
298 sb->s_fs_info = sbi; 298 sb->s_fs_info = sbi;
299 INIT_HLIST_HEAD(&sbi->rsrc_inodes); 299 INIT_HLIST_HEAD(&sbi->rsrc_inodes);
300 fill_defaults(sbi); 300 hfsplus_fill_defaults(sbi);
301 if (!parse_options(data, sbi)) { 301 if (!hfsplus_parse_options(data, sbi)) {
302 if (!silent) 302 if (!silent)
303 printk("HFS+-fs: unable to parse mount options\n"); 303 printk("HFS+-fs: unable to parse mount options\n");
304 err = -EINVAL; 304 err = -EINVAL;
@@ -415,8 +415,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
415 vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT); 415 vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT);
416 vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT); 416 vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT);
417 mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh); 417 mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh);
418 ll_rw_block(WRITE, 1, &HFSPLUS_SB(sb).s_vhbh); 418 sync_dirty_buffer(HFSPLUS_SB(sb).s_vhbh);
419 wait_on_buffer(HFSPLUS_SB(sb).s_vhbh);
420 419
421 if (!HFSPLUS_SB(sb).hidden_dir) { 420 if (!HFSPLUS_SB(sb).hidden_dir) {
422 printk("HFS+: create hidden dir...\n"); 421 printk("HFS+: create hidden dir...\n");
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index 67bca0d4a33b..cca3fb693f99 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -49,7 +49,6 @@ struct hostfs_iattr {
49 struct timespec ia_atime; 49 struct timespec ia_atime;
50 struct timespec ia_mtime; 50 struct timespec ia_mtime;
51 struct timespec ia_ctime; 51 struct timespec ia_ctime;
52 unsigned int ia_attr_flags;
53}; 52};
54 53
55extern int stat_file(const char *path, unsigned long long *inode_out, 54extern int stat_file(const char *path, unsigned long long *inode_out,
diff --git a/fs/inode.c b/fs/inode.c
index e57f1724db3e..71df1b1e8f75 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1195,9 +1195,6 @@ void update_atime(struct inode *inode)
1195 if (!timespec_equal(&inode->i_atime, &now)) { 1195 if (!timespec_equal(&inode->i_atime, &now)) {
1196 inode->i_atime = now; 1196 inode->i_atime = now;
1197 mark_inode_dirty_sync(inode); 1197 mark_inode_dirty_sync(inode);
1198 } else {
1199 if (!timespec_equal(&inode->i_atime, &now))
1200 inode->i_atime = now;
1201 } 1198 }
1202} 1199}
1203 1200
diff --git a/fs/inotify.c b/fs/inotify.c
index 2e4e2a57708c..a37e9fb1da58 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -37,6 +37,7 @@
37#include <asm/ioctls.h> 37#include <asm/ioctls.h>
38 38
39static atomic_t inotify_cookie; 39static atomic_t inotify_cookie;
40static atomic_t inotify_watches;
40 41
41static kmem_cache_t *watch_cachep; 42static kmem_cache_t *watch_cachep;
42static kmem_cache_t *event_cachep; 43static kmem_cache_t *event_cachep;
@@ -422,6 +423,7 @@ static struct inotify_watch *create_watch(struct inotify_device *dev,
422 get_inotify_watch(watch); 423 get_inotify_watch(watch);
423 424
424 atomic_inc(&dev->user->inotify_watches); 425 atomic_inc(&dev->user->inotify_watches);
426 atomic_inc(&inotify_watches);
425 427
426 return watch; 428 return watch;
427} 429}
@@ -454,6 +456,7 @@ static void remove_watch_no_event(struct inotify_watch *watch,
454 list_del(&watch->d_list); 456 list_del(&watch->d_list);
455 457
456 atomic_dec(&dev->user->inotify_watches); 458 atomic_dec(&dev->user->inotify_watches);
459 atomic_dec(&inotify_watches);
457 idr_remove(&dev->idr, watch->wd); 460 idr_remove(&dev->idr, watch->wd);
458 put_inotify_watch(watch); 461 put_inotify_watch(watch);
459} 462}
@@ -532,6 +535,9 @@ void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask,
532 struct dentry *parent; 535 struct dentry *parent;
533 struct inode *inode; 536 struct inode *inode;
534 537
538 if (!atomic_read (&inotify_watches))
539 return;
540
535 spin_lock(&dentry->d_lock); 541 spin_lock(&dentry->d_lock);
536 parent = dentry->d_parent; 542 parent = dentry->d_parent;
537 inode = parent->d_inode; 543 inode = parent->d_inode;
@@ -925,6 +931,7 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
925 struct nameidata nd; 931 struct nameidata nd;
926 struct file *filp; 932 struct file *filp;
927 int ret, fput_needed; 933 int ret, fput_needed;
934 int mask_add = 0;
928 935
929 filp = fget_light(fd, &fput_needed); 936 filp = fget_light(fd, &fput_needed);
930 if (unlikely(!filp)) 937 if (unlikely(!filp))
@@ -947,6 +954,9 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
947 down(&inode->inotify_sem); 954 down(&inode->inotify_sem);
948 down(&dev->sem); 955 down(&dev->sem);
949 956
957 if (mask & IN_MASK_ADD)
958 mask_add = 1;
959
950 /* don't let user-space set invalid bits: we don't want flags set */ 960 /* don't let user-space set invalid bits: we don't want flags set */
951 mask &= IN_ALL_EVENTS; 961 mask &= IN_ALL_EVENTS;
952 if (unlikely(!mask)) { 962 if (unlikely(!mask)) {
@@ -960,7 +970,10 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
960 */ 970 */
961 old = inode_find_dev(inode, dev); 971 old = inode_find_dev(inode, dev);
962 if (unlikely(old)) { 972 if (unlikely(old)) {
963 old->mask = mask; 973 if (mask_add)
974 old->mask |= mask;
975 else
976 old->mask = mask;
964 ret = old->wd; 977 ret = old->wd;
965 goto out; 978 goto out;
966 } 979 }
@@ -1043,6 +1056,7 @@ static int __init inotify_setup(void)
1043 inotify_max_user_watches = 8192; 1056 inotify_max_user_watches = 8192;
1044 1057
1045 atomic_set(&inotify_cookie, 0); 1058 atomic_set(&inotify_cookie, 0);
1059 atomic_set(&inotify_watches, 0);
1046 1060
1047 watch_cachep = kmem_cache_create("inotify_watch_cache", 1061 watch_cachep = kmem_cache_create("inotify_watch_cache",
1048 sizeof(struct inotify_watch), 1062 sizeof(struct inotify_watch),
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 5a97e346bd95..014a51fd00d7 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -204,7 +204,7 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
204 int i; 204 int i;
205 205
206 spin_unlock(&journal->j_list_lock); 206 spin_unlock(&journal->j_list_lock);
207 ll_rw_block(WRITE, *batch_count, bhs); 207 ll_rw_block(SWRITE, *batch_count, bhs);
208 spin_lock(&journal->j_list_lock); 208 spin_lock(&journal->j_list_lock);
209 for (i = 0; i < *batch_count; i++) { 209 for (i = 0; i < *batch_count; i++) {
210 struct buffer_head *bh = bhs[i]; 210 struct buffer_head *bh = bhs[i];
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index dac720c837ab..2a3e310f79ef 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -358,7 +358,7 @@ write_out_data:
358 jbd_debug(2, "submit %d writes\n", 358 jbd_debug(2, "submit %d writes\n",
359 bufs); 359 bufs);
360 spin_unlock(&journal->j_list_lock); 360 spin_unlock(&journal->j_list_lock);
361 ll_rw_block(WRITE, bufs, wbuf); 361 ll_rw_block(SWRITE, bufs, wbuf);
362 journal_brelse_array(wbuf, bufs); 362 journal_brelse_array(wbuf, bufs);
363 bufs = 0; 363 bufs = 0;
364 goto write_out_data; 364 goto write_out_data;
@@ -381,7 +381,7 @@ write_out_data:
381 381
382 if (bufs) { 382 if (bufs) {
383 spin_unlock(&journal->j_list_lock); 383 spin_unlock(&journal->j_list_lock);
384 ll_rw_block(WRITE, bufs, wbuf); 384 ll_rw_block(SWRITE, bufs, wbuf);
385 journal_brelse_array(wbuf, bufs); 385 journal_brelse_array(wbuf, bufs);
386 spin_lock(&journal->j_list_lock); 386 spin_lock(&journal->j_list_lock);
387 } 387 }
@@ -720,11 +720,17 @@ wait_for_iobuf:
720 J_ASSERT(commit_transaction->t_log_list == NULL); 720 J_ASSERT(commit_transaction->t_log_list == NULL);
721 721
722restart_loop: 722restart_loop:
723 /*
724 * As there are other places (journal_unmap_buffer()) adding buffers
725 * to this list we have to be careful and hold the j_list_lock.
726 */
727 spin_lock(&journal->j_list_lock);
723 while (commit_transaction->t_forget) { 728 while (commit_transaction->t_forget) {
724 transaction_t *cp_transaction; 729 transaction_t *cp_transaction;
725 struct buffer_head *bh; 730 struct buffer_head *bh;
726 731
727 jh = commit_transaction->t_forget; 732 jh = commit_transaction->t_forget;
733 spin_unlock(&journal->j_list_lock);
728 bh = jh2bh(jh); 734 bh = jh2bh(jh);
729 jbd_lock_bh_state(bh); 735 jbd_lock_bh_state(bh);
730 J_ASSERT_JH(jh, jh->b_transaction == commit_transaction || 736 J_ASSERT_JH(jh, jh->b_transaction == commit_transaction ||
@@ -792,9 +798,25 @@ restart_loop:
792 journal_remove_journal_head(bh); /* needs a brelse */ 798 journal_remove_journal_head(bh); /* needs a brelse */
793 release_buffer_page(bh); 799 release_buffer_page(bh);
794 } 800 }
801 cond_resched_lock(&journal->j_list_lock);
802 }
803 spin_unlock(&journal->j_list_lock);
804 /*
805 * This is a bit sleazy. We borrow j_list_lock to protect
806 * journal->j_committing_transaction in __journal_remove_checkpoint.
807 * Really, __journal_remove_checkpoint should be using j_state_lock but
808 * it's a bit hassle to hold that across __journal_remove_checkpoint
809 */
810 spin_lock(&journal->j_state_lock);
811 spin_lock(&journal->j_list_lock);
812 /*
813 * Now recheck if some buffers did not get attached to the transaction
814 * while the lock was dropped...
815 */
816 if (commit_transaction->t_forget) {
795 spin_unlock(&journal->j_list_lock); 817 spin_unlock(&journal->j_list_lock);
796 if (cond_resched()) 818 spin_unlock(&journal->j_state_lock);
797 goto restart_loop; 819 goto restart_loop;
798 } 820 }
799 821
800 /* Done with this transaction! */ 822 /* Done with this transaction! */
@@ -803,14 +825,6 @@ restart_loop:
803 825
804 J_ASSERT(commit_transaction->t_state == T_COMMIT); 826 J_ASSERT(commit_transaction->t_state == T_COMMIT);
805 827
806 /*
807 * This is a bit sleazy. We borrow j_list_lock to protect
808 * journal->j_committing_transaction in __journal_remove_checkpoint.
809 * Really, __jornal_remove_checkpoint should be using j_state_lock but
810 * it's a bit hassle to hold that across __journal_remove_checkpoint
811 */
812 spin_lock(&journal->j_state_lock);
813 spin_lock(&journal->j_list_lock);
814 commit_transaction->t_state = T_FINISHED; 828 commit_transaction->t_state = T_FINISHED;
815 J_ASSERT(commit_transaction == journal->j_committing_transaction); 829 J_ASSERT(commit_transaction == journal->j_committing_transaction);
816 journal->j_commit_sequence = commit_transaction->t_tid; 830 journal->j_commit_sequence = commit_transaction->t_tid;
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 5e7b43949517..7ae2c4fe506b 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -65,7 +65,6 @@ EXPORT_SYMBOL(journal_set_features);
65EXPORT_SYMBOL(journal_create); 65EXPORT_SYMBOL(journal_create);
66EXPORT_SYMBOL(journal_load); 66EXPORT_SYMBOL(journal_load);
67EXPORT_SYMBOL(journal_destroy); 67EXPORT_SYMBOL(journal_destroy);
68EXPORT_SYMBOL(journal_recover);
69EXPORT_SYMBOL(journal_update_superblock); 68EXPORT_SYMBOL(journal_update_superblock);
70EXPORT_SYMBOL(journal_abort); 69EXPORT_SYMBOL(journal_abort);
71EXPORT_SYMBOL(journal_errno); 70EXPORT_SYMBOL(journal_errno);
@@ -81,6 +80,7 @@ EXPORT_SYMBOL(journal_try_to_free_buffers);
81EXPORT_SYMBOL(journal_force_commit); 80EXPORT_SYMBOL(journal_force_commit);
82 81
83static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); 82static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
83static void __journal_abort_soft (journal_t *journal, int errno);
84 84
85/* 85/*
86 * Helper function used to manage commit timeouts 86 * Helper function used to manage commit timeouts
@@ -93,16 +93,6 @@ static void commit_timeout(unsigned long __data)
93 wake_up_process(p); 93 wake_up_process(p);
94} 94}
95 95
96/* Static check for data structure consistency. There's no code
97 * invoked --- we'll just get a linker failure if things aren't right.
98 */
99void __journal_internal_check(void)
100{
101 extern void journal_bad_superblock_size(void);
102 if (sizeof(struct journal_superblock_s) != 1024)
103 journal_bad_superblock_size();
104}
105
106/* 96/*
107 * kjournald: The main thread function used to manage a logging device 97 * kjournald: The main thread function used to manage a logging device
108 * journal. 98 * journal.
@@ -119,16 +109,12 @@ void __journal_internal_check(void)
119 * known as checkpointing, and this thread is responsible for that job. 109 * known as checkpointing, and this thread is responsible for that job.
120 */ 110 */
121 111
122journal_t *current_journal; // AKPM: debug 112static int kjournald(void *arg)
123
124int kjournald(void *arg)
125{ 113{
126 journal_t *journal = (journal_t *) arg; 114 journal_t *journal = (journal_t *) arg;
127 transaction_t *transaction; 115 transaction_t *transaction;
128 struct timer_list timer; 116 struct timer_list timer;
129 117
130 current_journal = journal;
131
132 daemonize("kjournald"); 118 daemonize("kjournald");
133 119
134 /* Set up an interval timer which can be used to trigger a 120 /* Set up an interval timer which can be used to trigger a
@@ -193,6 +179,8 @@ loop:
193 if (transaction && time_after_eq(jiffies, 179 if (transaction && time_after_eq(jiffies,
194 transaction->t_expires)) 180 transaction->t_expires))
195 should_sleep = 0; 181 should_sleep = 0;
182 if (journal->j_flags & JFS_UNMOUNT)
183 should_sleep = 0;
196 if (should_sleep) { 184 if (should_sleep) {
197 spin_unlock(&journal->j_state_lock); 185 spin_unlock(&journal->j_state_lock);
198 schedule(); 186 schedule();
@@ -969,7 +957,7 @@ void journal_update_superblock(journal_t *journal, int wait)
969 if (wait) 957 if (wait)
970 sync_dirty_buffer(bh); 958 sync_dirty_buffer(bh);
971 else 959 else
972 ll_rw_block(WRITE, 1, &bh); 960 ll_rw_block(SWRITE, 1, &bh);
973 961
974out: 962out:
975 /* If we have just flushed the log (by marking s_start==0), then 963 /* If we have just flushed the log (by marking s_start==0), then
@@ -1439,7 +1427,7 @@ int journal_wipe(journal_t *journal, int write)
1439 * device this journal is present. 1427 * device this journal is present.
1440 */ 1428 */
1441 1429
1442const char *journal_dev_name(journal_t *journal, char *buffer) 1430static const char *journal_dev_name(journal_t *journal, char *buffer)
1443{ 1431{
1444 struct block_device *bdev; 1432 struct block_device *bdev;
1445 1433
@@ -1485,7 +1473,7 @@ void __journal_abort_hard(journal_t *journal)
1485 1473
1486/* Soft abort: record the abort error status in the journal superblock, 1474/* Soft abort: record the abort error status in the journal superblock,
1487 * but don't do any other IO. */ 1475 * but don't do any other IO. */
1488void __journal_abort_soft (journal_t *journal, int errno) 1476static void __journal_abort_soft (journal_t *journal, int errno)
1489{ 1477{
1490 if (journal->j_flags & JFS_ABORT) 1478 if (journal->j_flags & JFS_ABORT)
1491 return; 1479 return;
@@ -1880,7 +1868,7 @@ EXPORT_SYMBOL(journal_enable_debug);
1880 1868
1881static struct proc_dir_entry *proc_jbd_debug; 1869static struct proc_dir_entry *proc_jbd_debug;
1882 1870
1883int read_jbd_debug(char *page, char **start, off_t off, 1871static int read_jbd_debug(char *page, char **start, off_t off,
1884 int count, int *eof, void *data) 1872 int count, int *eof, void *data)
1885{ 1873{
1886 int ret; 1874 int ret;
@@ -1890,7 +1878,7 @@ int read_jbd_debug(char *page, char **start, off_t off,
1890 return ret; 1878 return ret;
1891} 1879}
1892 1880
1893int write_jbd_debug(struct file *file, const char __user *buffer, 1881static int write_jbd_debug(struct file *file, const char __user *buffer,
1894 unsigned long count, void *data) 1882 unsigned long count, void *data)
1895{ 1883{
1896 char buf[32]; 1884 char buf[32];
@@ -1979,6 +1967,14 @@ static int __init journal_init(void)
1979{ 1967{
1980 int ret; 1968 int ret;
1981 1969
1970/* Static check for data structure consistency. There's no code
1971 * invoked --- we'll just get a linker failure if things aren't right.
1972 */
1973 extern void journal_bad_superblock_size(void);
1974 if (sizeof(struct journal_superblock_s) != 1024)
1975 journal_bad_superblock_size();
1976
1977
1982 ret = journal_init_caches(); 1978 ret = journal_init_caches();
1983 if (ret != 0) 1979 if (ret != 0)
1984 journal_destroy_caches(); 1980 journal_destroy_caches();
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index d327a598f861..a56144183462 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -116,7 +116,8 @@ static inline int hash(journal_t *journal, unsigned long block)
116 (block << (hash_shift - 12))) & (table->hash_size - 1); 116 (block << (hash_shift - 12))) & (table->hash_size - 1);
117} 117}
118 118
119int insert_revoke_hash(journal_t *journal, unsigned long blocknr, tid_t seq) 119static int insert_revoke_hash(journal_t *journal, unsigned long blocknr,
120 tid_t seq)
120{ 121{
121 struct list_head *hash_list; 122 struct list_head *hash_list;
122 struct jbd_revoke_record_s *record; 123 struct jbd_revoke_record_s *record;
@@ -613,7 +614,7 @@ static void flush_descriptor(journal_t *journal,
613 set_buffer_jwrite(bh); 614 set_buffer_jwrite(bh);
614 BUFFER_TRACE(bh, "write"); 615 BUFFER_TRACE(bh, "write");
615 set_buffer_dirty(bh); 616 set_buffer_dirty(bh);
616 ll_rw_block(WRITE, 1, &bh); 617 ll_rw_block(SWRITE, 1, &bh);
617} 618}
618#endif 619#endif
619 620
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 77b7662b840b..c6ec66fd8766 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -490,23 +490,21 @@ void journal_unlock_updates (journal_t *journal)
490 */ 490 */
491static void jbd_unexpected_dirty_buffer(struct journal_head *jh) 491static void jbd_unexpected_dirty_buffer(struct journal_head *jh)
492{ 492{
493 struct buffer_head *bh = jh2bh(jh);
494 int jlist; 493 int jlist;
495 494
496 if (buffer_dirty(bh)) { 495 /* If this buffer is one which might reasonably be dirty
497 /* If this buffer is one which might reasonably be dirty 496 * --- ie. data, or not part of this journal --- then
498 * --- ie. data, or not part of this journal --- then 497 * we're OK to leave it alone, but otherwise we need to
499 * we're OK to leave it alone, but otherwise we need to 498 * move the dirty bit to the journal's own internal
500 * move the dirty bit to the journal's own internal 499 * JBDDirty bit. */
501 * JBDDirty bit. */ 500 jlist = jh->b_jlist;
502 jlist = jh->b_jlist; 501
503 502 if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
504 if (jlist == BJ_Metadata || jlist == BJ_Reserved || 503 jlist == BJ_Shadow || jlist == BJ_Forget) {
505 jlist == BJ_Shadow || jlist == BJ_Forget) { 504 struct buffer_head *bh = jh2bh(jh);
506 if (test_clear_buffer_dirty(jh2bh(jh))) { 505
507 set_bit(BH_JBDDirty, &jh2bh(jh)->b_state); 506 if (test_clear_buffer_dirty(bh))
508 } 507 set_buffer_jbddirty(bh);
509 }
510 } 508 }
511} 509}
512 510
@@ -574,9 +572,14 @@ repeat:
574 if (jh->b_next_transaction) 572 if (jh->b_next_transaction)
575 J_ASSERT_JH(jh, jh->b_next_transaction == 573 J_ASSERT_JH(jh, jh->b_next_transaction ==
576 transaction); 574 transaction);
577 JBUFFER_TRACE(jh, "Unexpected dirty buffer"); 575 }
578 jbd_unexpected_dirty_buffer(jh); 576 /*
579 } 577 * In any case we need to clean the dirty flag and we must
578 * do it under the buffer lock to be sure we don't race
579 * with running write-out.
580 */
581 JBUFFER_TRACE(jh, "Unexpected dirty buffer");
582 jbd_unexpected_dirty_buffer(jh);
580 } 583 }
581 584
582 unlock_buffer(bh); 585 unlock_buffer(bh);
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index bfbeb4c86e03..777b90057b89 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -1629,9 +1629,6 @@ static int jffs_fsync(struct file *f, struct dentry *d, int datasync)
1629} 1629}
1630 1630
1631 1631
1632extern int generic_file_open(struct inode *, struct file *) __attribute__((weak));
1633extern loff_t generic_file_llseek(struct file *, loff_t, int) __attribute__((weak));
1634
1635static struct file_operations jffs_file_operations = 1632static struct file_operations jffs_file_operations =
1636{ 1633{
1637 .open = generic_file_open, 1634 .open = generic_file_open,
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index bd9ed9b0247b..8279bf0133ff 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -21,9 +21,6 @@
21#include <linux/jffs2.h> 21#include <linux/jffs2.h>
22#include "nodelist.h" 22#include "nodelist.h"
23 23
24extern int generic_file_open(struct inode *, struct file *) __attribute__((weak));
25extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) __attribute__((weak));
26
27static int jffs2_commit_write (struct file *filp, struct page *pg, 24static int jffs2_commit_write (struct file *filp, struct page *pg,
28 unsigned start, unsigned end); 25 unsigned start, unsigned end);
29static int jffs2_prepare_write (struct file *filp, struct page *pg, 26static int jffs2_prepare_write (struct file *filp, struct page *pg,
diff --git a/fs/jfs/jfs_filsys.h b/fs/jfs/jfs_filsys.h
index 86ccac80f0ab..72a5588faeca 100644
--- a/fs/jfs/jfs_filsys.h
+++ b/fs/jfs/jfs_filsys.h
@@ -37,6 +37,9 @@
37#define JFS_ERR_CONTINUE 0x00000004 /* continue */ 37#define JFS_ERR_CONTINUE 0x00000004 /* continue */
38#define JFS_ERR_PANIC 0x00000008 /* panic */ 38#define JFS_ERR_PANIC 0x00000008 /* panic */
39 39
40#define JFS_USRQUOTA 0x00000010
41#define JFS_GRPQUOTA 0x00000020
42
40/* platform option (conditional compilation) */ 43/* platform option (conditional compilation) */
41#define JFS_AIX 0x80000000 /* AIX support */ 44#define JFS_AIX 0x80000000 /* AIX support */
42/* POSIX name/directory support */ 45/* POSIX name/directory support */
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 1cae14e741eb..49ccde3937f9 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -1390,6 +1390,8 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc
1390 1390
1391 jfs_info("jfs_lookup: name = %s", name); 1391 jfs_info("jfs_lookup: name = %s", name);
1392 1392
1393 if (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2)
1394 dentry->d_op = &jfs_ci_dentry_operations;
1393 1395
1394 if ((name[0] == '.') && (len == 1)) 1396 if ((name[0] == '.') && (len == 1))
1395 inum = dip->i_ino; 1397 inum = dip->i_ino;
@@ -1417,9 +1419,6 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc
1417 return ERR_PTR(-EACCES); 1419 return ERR_PTR(-EACCES);
1418 } 1420 }
1419 1421
1420 if (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2)
1421 dentry->d_op = &jfs_ci_dentry_operations;
1422
1423 dentry = d_splice_alias(ip, dentry); 1422 dentry = d_splice_alias(ip, dentry);
1424 1423
1425 if (dentry && (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2)) 1424 if (dentry && (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2))
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 9ff89720f93b..71bc34b96b2b 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -23,9 +23,11 @@
23#include <linux/parser.h> 23#include <linux/parser.h>
24#include <linux/completion.h> 24#include <linux/completion.h>
25#include <linux/vfs.h> 25#include <linux/vfs.h>
26#include <linux/mount.h>
26#include <linux/moduleparam.h> 27#include <linux/moduleparam.h>
27#include <linux/posix_acl.h> 28#include <linux/posix_acl.h>
28#include <asm/uaccess.h> 29#include <asm/uaccess.h>
30#include <linux/seq_file.h>
29 31
30#include "jfs_incore.h" 32#include "jfs_incore.h"
31#include "jfs_filsys.h" 33#include "jfs_filsys.h"
@@ -192,7 +194,8 @@ static void jfs_put_super(struct super_block *sb)
192 194
193enum { 195enum {
194 Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize, 196 Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize,
195 Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, 197 Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota,
198 Opt_usrquota, Opt_grpquota
196}; 199};
197 200
198static match_table_t tokens = { 201static match_table_t tokens = {
@@ -204,8 +207,8 @@ static match_table_t tokens = {
204 {Opt_errors, "errors=%s"}, 207 {Opt_errors, "errors=%s"},
205 {Opt_ignore, "noquota"}, 208 {Opt_ignore, "noquota"},
206 {Opt_ignore, "quota"}, 209 {Opt_ignore, "quota"},
207 {Opt_ignore, "usrquota"}, 210 {Opt_usrquota, "usrquota"},
208 {Opt_ignore, "grpquota"}, 211 {Opt_grpquota, "grpquota"},
209 {Opt_err, NULL} 212 {Opt_err, NULL}
210}; 213};
211 214
@@ -293,6 +296,24 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
293 } 296 }
294 break; 297 break;
295 } 298 }
299
300#if defined(CONFIG_QUOTA)
301 case Opt_quota:
302 case Opt_usrquota:
303 *flag |= JFS_USRQUOTA;
304 break;
305 case Opt_grpquota:
306 *flag |= JFS_GRPQUOTA;
307 break;
308#else
309 case Opt_usrquota:
310 case Opt_grpquota:
311 case Opt_quota:
312 printk(KERN_ERR
313 "JFS: quota operations not supported\n");
314 break;
315#endif
316
296 default: 317 default:
297 printk("jfs: Unrecognized mount option \"%s\" " 318 printk("jfs: Unrecognized mount option \"%s\" "
298 " or missing value\n", p); 319 " or missing value\n", p);
@@ -539,6 +560,26 @@ static int jfs_sync_fs(struct super_block *sb, int wait)
539 return 0; 560 return 0;
540} 561}
541 562
563static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
564{
565 struct jfs_sb_info *sbi = JFS_SBI(vfs->mnt_sb);
566
567 if (sbi->flag & JFS_NOINTEGRITY)
568 seq_puts(seq, ",nointegrity");
569 else
570 seq_puts(seq, ",integrity");
571
572#if defined(CONFIG_QUOTA)
573 if (sbi->flag & JFS_USRQUOTA)
574 seq_puts(seq, ",usrquota");
575
576 if (sbi->flag & JFS_GRPQUOTA)
577 seq_puts(seq, ",grpquota");
578#endif
579
580 return 0;
581}
582
542static struct super_operations jfs_super_operations = { 583static struct super_operations jfs_super_operations = {
543 .alloc_inode = jfs_alloc_inode, 584 .alloc_inode = jfs_alloc_inode,
544 .destroy_inode = jfs_destroy_inode, 585 .destroy_inode = jfs_destroy_inode,
@@ -552,6 +593,7 @@ static struct super_operations jfs_super_operations = {
552 .unlockfs = jfs_unlockfs, 593 .unlockfs = jfs_unlockfs,
553 .statfs = jfs_statfs, 594 .statfs = jfs_statfs,
554 .remount_fs = jfs_remount, 595 .remount_fs = jfs_remount,
596 .show_options = jfs_show_options
555}; 597};
556 598
557static struct export_operations jfs_export_operations = { 599static struct export_operations jfs_export_operations = {
diff --git a/fs/namei.c b/fs/namei.c
index 6ec1f0fefc5b..145e852c4bd0 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -525,6 +525,22 @@ static inline int __do_follow_link(struct path *path, struct nameidata *nd)
525 return error; 525 return error;
526} 526}
527 527
528static inline void dput_path(struct path *path, struct nameidata *nd)
529{
530 dput(path->dentry);
531 if (path->mnt != nd->mnt)
532 mntput(path->mnt);
533}
534
535static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
536{
537 dput(nd->dentry);
538 if (nd->mnt != path->mnt)
539 mntput(nd->mnt);
540 nd->mnt = path->mnt;
541 nd->dentry = path->dentry;
542}
543
528/* 544/*
529 * This limits recursive symlink follows to 8, while 545 * This limits recursive symlink follows to 8, while
530 * limiting consecutive symlinks to 40. 546 * limiting consecutive symlinks to 40.
@@ -552,9 +568,7 @@ static inline int do_follow_link(struct path *path, struct nameidata *nd)
552 nd->depth--; 568 nd->depth--;
553 return err; 569 return err;
554loop: 570loop:
555 dput(path->dentry); 571 dput_path(path, nd);
556 if (path->mnt != nd->mnt)
557 mntput(path->mnt);
558 path_release(nd); 572 path_release(nd);
559 return err; 573 return err;
560} 574}
@@ -813,13 +827,8 @@ static fastcall int __link_path_walk(const char * name, struct nameidata *nd)
813 err = -ENOTDIR; 827 err = -ENOTDIR;
814 if (!inode->i_op) 828 if (!inode->i_op)
815 break; 829 break;
816 } else { 830 } else
817 dput(nd->dentry); 831 path_to_nameidata(&next, nd);
818 if (nd->mnt != next.mnt)
819 mntput(nd->mnt);
820 nd->mnt = next.mnt;
821 nd->dentry = next.dentry;
822 }
823 err = -ENOTDIR; 832 err = -ENOTDIR;
824 if (!inode->i_op->lookup) 833 if (!inode->i_op->lookup)
825 break; 834 break;
@@ -859,13 +868,8 @@ last_component:
859 if (err) 868 if (err)
860 goto return_err; 869 goto return_err;
861 inode = nd->dentry->d_inode; 870 inode = nd->dentry->d_inode;
862 } else { 871 } else
863 dput(nd->dentry); 872 path_to_nameidata(&next, nd);
864 if (nd->mnt != next.mnt)
865 mntput(nd->mnt);
866 nd->mnt = next.mnt;
867 nd->dentry = next.dentry;
868 }
869 err = -ENOENT; 873 err = -ENOENT;
870 if (!inode) 874 if (!inode)
871 break; 875 break;
@@ -901,9 +905,7 @@ return_reval:
901return_base: 905return_base:
902 return 0; 906 return 0;
903out_dput: 907out_dput:
904 dput(next.dentry); 908 dput_path(&next, nd);
905 if (nd->mnt != next.mnt)
906 mntput(next.mnt);
907 break; 909 break;
908 } 910 }
909 path_release(nd); 911 path_release(nd);
@@ -1507,11 +1509,7 @@ do_last:
1507 if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link) 1509 if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link)
1508 goto do_link; 1510 goto do_link;
1509 1511
1510 dput(nd->dentry); 1512 path_to_nameidata(&path, nd);
1511 nd->dentry = path.dentry;
1512 if (nd->mnt != path.mnt)
1513 mntput(nd->mnt);
1514 nd->mnt = path.mnt;
1515 error = -EISDIR; 1513 error = -EISDIR;
1516 if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode)) 1514 if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode))
1517 goto exit; 1515 goto exit;
@@ -1522,9 +1520,7 @@ ok:
1522 return 0; 1520 return 0;
1523 1521
1524exit_dput: 1522exit_dput:
1525 dput(path.dentry); 1523 dput_path(&path, nd);
1526 if (nd->mnt != path.mnt)
1527 mntput(path.mnt);
1528exit: 1524exit:
1529 path_release(nd); 1525 path_release(nd);
1530 return error; 1526 return error;
diff --git a/fs/namespace.c b/fs/namespace.c
index 79bd8a46e1e7..34156260c9b6 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -40,7 +40,7 @@ static inline int sysfs_init(void)
40 __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); 40 __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
41 41
42static struct list_head *mount_hashtable; 42static struct list_head *mount_hashtable;
43static int hash_mask, hash_bits; 43static int hash_mask __read_mostly, hash_bits __read_mostly;
44static kmem_cache_t *mnt_cache; 44static kmem_cache_t *mnt_cache;
45 45
46static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) 46static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
@@ -1334,8 +1334,12 @@ asmlinkage long sys_pivot_root(const char __user *new_root, const char __user *p
1334 error = -EINVAL; 1334 error = -EINVAL;
1335 if (user_nd.mnt->mnt_root != user_nd.dentry) 1335 if (user_nd.mnt->mnt_root != user_nd.dentry)
1336 goto out2; /* not a mountpoint */ 1336 goto out2; /* not a mountpoint */
1337 if (user_nd.mnt->mnt_parent == user_nd.mnt)
1338 goto out2; /* not attached */
1337 if (new_nd.mnt->mnt_root != new_nd.dentry) 1339 if (new_nd.mnt->mnt_root != new_nd.dentry)
1338 goto out2; /* not a mountpoint */ 1340 goto out2; /* not a mountpoint */
1341 if (new_nd.mnt->mnt_parent == new_nd.mnt)
1342 goto out2; /* not attached */
1339 tmp = old_nd.mnt; /* make sure we can reach put_old from new_root */ 1343 tmp = old_nd.mnt; /* make sure we can reach put_old from new_root */
1340 spin_lock(&vfsmount_lock); 1344 spin_lock(&vfsmount_lock);
1341 if (tmp != new_nd.mnt) { 1345 if (tmp != new_nd.mnt) {
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 9a11aa39e2e4..057aff745506 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -26,6 +26,7 @@
26#include <linux/namei.h> 26#include <linux/namei.h>
27#include <linux/mount.h> 27#include <linux/mount.h>
28#include <linux/hash.h> 28#include <linux/hash.h>
29#include <linux/module.h>
29 30
30#include <linux/sunrpc/svc.h> 31#include <linux/sunrpc/svc.h>
31#include <linux/nfsd/nfsd.h> 32#include <linux/nfsd/nfsd.h>
@@ -221,6 +222,7 @@ static int expkey_show(struct seq_file *m,
221} 222}
222 223
223struct cache_detail svc_expkey_cache = { 224struct cache_detail svc_expkey_cache = {
225 .owner = THIS_MODULE,
224 .hash_size = EXPKEY_HASHMAX, 226 .hash_size = EXPKEY_HASHMAX,
225 .hash_table = expkey_table, 227 .hash_table = expkey_table,
226 .name = "nfsd.fh", 228 .name = "nfsd.fh",
@@ -456,6 +458,7 @@ static int svc_export_show(struct seq_file *m,
456 return 0; 458 return 0;
457} 459}
458struct cache_detail svc_export_cache = { 460struct cache_detail svc_export_cache = {
461 .owner = THIS_MODULE,
459 .hash_size = EXPORT_HASHMAX, 462 .hash_size = EXPORT_HASHMAX,
460 .hash_table = export_table, 463 .hash_table = export_table,
461 .name = "nfsd.export", 464 .name = "nfsd.export",
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 5605a26efc57..13369650cdf9 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -187,6 +187,7 @@ static int idtoname_parse(struct cache_detail *, char *, int);
187static struct ent *idtoname_lookup(struct ent *, int); 187static struct ent *idtoname_lookup(struct ent *, int);
188 188
189static struct cache_detail idtoname_cache = { 189static struct cache_detail idtoname_cache = {
190 .owner = THIS_MODULE,
190 .hash_size = ENT_HASHMAX, 191 .hash_size = ENT_HASHMAX,
191 .hash_table = idtoname_table, 192 .hash_table = idtoname_table,
192 .name = "nfs4.idtoname", 193 .name = "nfs4.idtoname",
@@ -320,6 +321,7 @@ static struct ent *nametoid_lookup(struct ent *, int);
320static int nametoid_parse(struct cache_detail *, char *, int); 321static int nametoid_parse(struct cache_detail *, char *, int);
321 322
322static struct cache_detail nametoid_cache = { 323static struct cache_detail nametoid_cache = {
324 .owner = THIS_MODULE,
323 .hash_size = ENT_HASHMAX, 325 .hash_size = ENT_HASHMAX,
324 .hash_table = nametoid_table, 326 .hash_table = nametoid_table,
325 .name = "nfs4.nametoid", 327 .name = "nfs4.nametoid",
@@ -404,8 +406,10 @@ nfsd_idmap_init(void)
404void 406void
405nfsd_idmap_shutdown(void) 407nfsd_idmap_shutdown(void)
406{ 408{
407 cache_unregister(&idtoname_cache); 409 if (cache_unregister(&idtoname_cache))
408 cache_unregister(&nametoid_cache); 410 printk(KERN_ERR "nfsd: failed to unregister idtoname cache\n");
411 if (cache_unregister(&nametoid_cache))
412 printk(KERN_ERR "nfsd: failed to unregister nametoid cache\n");
409} 413}
410 414
411/* 415/*
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 57ed50fe7f85..954cf893d50c 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -93,7 +93,7 @@ nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname)
93 93
94 dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n", 94 dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n",
95 clname->len, clname->data); 95 clname->len, clname->data);
96 tfm = crypto_alloc_tfm("md5", 0); 96 tfm = crypto_alloc_tfm("md5", CRYPTO_TFM_REQ_MAY_SLEEP);
97 if (tfm == NULL) 97 if (tfm == NULL)
98 goto out; 98 goto out;
99 cksum.len = crypto_tfm_alg_digestsize(tfm); 99 cksum.len = crypto_tfm_alg_digestsize(tfm);
@@ -114,8 +114,7 @@ nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname)
114 kfree(cksum.data); 114 kfree(cksum.data);
115 status = nfs_ok; 115 status = nfs_ok;
116out: 116out:
117 if (tfm) 117 crypto_free_tfm(tfm);
118 crypto_free_tfm(tfm);
119 return status; 118 return status;
120} 119}
121 120
diff --git a/fs/open.c b/fs/open.c
index 32bf05e2996d..4ee2dcc31c28 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -933,16 +933,11 @@ void fastcall fd_install(unsigned int fd, struct file * file)
933 933
934EXPORT_SYMBOL(fd_install); 934EXPORT_SYMBOL(fd_install);
935 935
936asmlinkage long sys_open(const char __user * filename, int flags, int mode) 936long do_sys_open(const char __user *filename, int flags, int mode)
937{ 937{
938 char * tmp; 938 char *tmp = getname(filename);
939 int fd; 939 int fd = PTR_ERR(tmp);
940 940
941 if (force_o_largefile())
942 flags |= O_LARGEFILE;
943
944 tmp = getname(filename);
945 fd = PTR_ERR(tmp);
946 if (!IS_ERR(tmp)) { 941 if (!IS_ERR(tmp)) {
947 fd = get_unused_fd(); 942 fd = get_unused_fd();
948 if (fd >= 0) { 943 if (fd >= 0) {
@@ -959,6 +954,14 @@ asmlinkage long sys_open(const char __user * filename, int flags, int mode)
959 } 954 }
960 return fd; 955 return fd;
961} 956}
957
958asmlinkage long sys_open(const char __user *filename, int flags, int mode)
959{
960 if (force_o_largefile())
961 flags |= O_LARGEFILE;
962
963 return do_sys_open(filename, flags, mode);
964}
962EXPORT_SYMBOL_GPL(sys_open); 965EXPORT_SYMBOL_GPL(sys_open);
963 966
964#ifndef __alpha__ 967#ifndef __alpha__
diff --git a/fs/pipe.c b/fs/pipe.c
index 25aa09f9d09d..2c7a23dde2d8 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -415,6 +415,10 @@ pipe_poll(struct file *filp, poll_table *wait)
415 415
416 if (filp->f_mode & FMODE_WRITE) { 416 if (filp->f_mode & FMODE_WRITE) {
417 mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0; 417 mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0;
418 /*
419 * Most Unices do not set POLLERR for FIFOs but on Linux they
420 * behave exactly like pipes for poll().
421 */
418 if (!PIPE_READERS(*inode)) 422 if (!PIPE_READERS(*inode))
419 mask |= POLLERR; 423 mask |= POLLERR;
420 } 424 }
@@ -422,9 +426,6 @@ pipe_poll(struct file *filp, poll_table *wait)
422 return mask; 426 return mask;
423} 427}
424 428
425/* FIXME: most Unices do not set POLLERR for fifos */
426#define fifo_poll pipe_poll
427
428static int 429static int
429pipe_release(struct inode *inode, int decr, int decw) 430pipe_release(struct inode *inode, int decr, int decw)
430{ 431{
@@ -568,7 +569,7 @@ struct file_operations read_fifo_fops = {
568 .read = pipe_read, 569 .read = pipe_read,
569 .readv = pipe_readv, 570 .readv = pipe_readv,
570 .write = bad_pipe_w, 571 .write = bad_pipe_w,
571 .poll = fifo_poll, 572 .poll = pipe_poll,
572 .ioctl = pipe_ioctl, 573 .ioctl = pipe_ioctl,
573 .open = pipe_read_open, 574 .open = pipe_read_open,
574 .release = pipe_read_release, 575 .release = pipe_read_release,
@@ -580,7 +581,7 @@ struct file_operations write_fifo_fops = {
580 .read = bad_pipe_r, 581 .read = bad_pipe_r,
581 .write = pipe_write, 582 .write = pipe_write,
582 .writev = pipe_writev, 583 .writev = pipe_writev,
583 .poll = fifo_poll, 584 .poll = pipe_poll,
584 .ioctl = pipe_ioctl, 585 .ioctl = pipe_ioctl,
585 .open = pipe_write_open, 586 .open = pipe_write_open,
586 .release = pipe_write_release, 587 .release = pipe_write_release,
@@ -593,7 +594,7 @@ struct file_operations rdwr_fifo_fops = {
593 .readv = pipe_readv, 594 .readv = pipe_readv,
594 .write = pipe_write, 595 .write = pipe_write,
595 .writev = pipe_writev, 596 .writev = pipe_writev,
596 .poll = fifo_poll, 597 .poll = pipe_poll,
597 .ioctl = pipe_ioctl, 598 .ioctl = pipe_ioctl,
598 .open = pipe_rdwr_open, 599 .open = pipe_rdwr_open,
599 .release = pipe_rdwr_release, 600 .release = pipe_rdwr_release,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 491f2d9f89ac..84751f3f52d5 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -11,6 +11,40 @@
11 * go into icache. We cache the reference to task_struct upon lookup too. 11 * go into icache. We cache the reference to task_struct upon lookup too.
12 * Eventually it should become a filesystem in its own. We don't use the 12 * Eventually it should become a filesystem in its own. We don't use the
13 * rest of procfs anymore. 13 * rest of procfs anymore.
14 *
15 *
16 * Changelog:
17 * 17-Jan-2005
18 * Allan Bezerra
19 * Bruna Moreira <bruna.moreira@indt.org.br>
20 * Edjard Mota <edjard.mota@indt.org.br>
21 * Ilias Biris <ilias.biris@indt.org.br>
22 * Mauricio Lin <mauricio.lin@indt.org.br>
23 *
24 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
25 *
26 * A new process specific entry (smaps) included in /proc. It shows the
27 * size of rss for each memory area. The maps entry lacks information
28 * about physical memory size (rss) for each mapped file, i.e.,
29 * rss information for executables and library files.
30 * This additional information is useful for any tools that need to know
31 * about physical memory consumption for a process specific library.
32 *
33 * Changelog:
34 * 21-Feb-2005
35 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
36 * Pud inclusion in the page table walking.
37 *
38 * ChangeLog:
39 * 10-Mar-2005
40 * 10LE Instituto Nokia de Tecnologia - INdT:
41 * A better way to walks through the page table as suggested by Hugh Dickins.
42 *
43 * Simo Piiroinen <simo.piiroinen@nokia.com>:
44 * Smaps information related to shared, private, clean and dirty pages.
45 *
46 * Paul Mundt <paul.mundt@nokia.com>:
47 * Overall revision about smaps.
14 */ 48 */
15 49
16#include <asm/uaccess.h> 50#include <asm/uaccess.h>
@@ -65,8 +99,10 @@ enum pid_directory_inos {
65 PROC_TGID_STAT, 99 PROC_TGID_STAT,
66 PROC_TGID_STATM, 100 PROC_TGID_STATM,
67 PROC_TGID_MAPS, 101 PROC_TGID_MAPS,
102 PROC_TGID_NUMA_MAPS,
68 PROC_TGID_MOUNTS, 103 PROC_TGID_MOUNTS,
69 PROC_TGID_WCHAN, 104 PROC_TGID_WCHAN,
105 PROC_TGID_SMAPS,
70#ifdef CONFIG_SCHEDSTATS 106#ifdef CONFIG_SCHEDSTATS
71 PROC_TGID_SCHEDSTAT, 107 PROC_TGID_SCHEDSTAT,
72#endif 108#endif
@@ -83,7 +119,6 @@ enum pid_directory_inos {
83#ifdef CONFIG_AUDITSYSCALL 119#ifdef CONFIG_AUDITSYSCALL
84 PROC_TGID_LOGINUID, 120 PROC_TGID_LOGINUID,
85#endif 121#endif
86 PROC_TGID_FD_DIR,
87 PROC_TGID_OOM_SCORE, 122 PROC_TGID_OOM_SCORE,
88 PROC_TGID_OOM_ADJUST, 123 PROC_TGID_OOM_ADJUST,
89 PROC_TID_INO, 124 PROC_TID_INO,
@@ -102,8 +137,10 @@ enum pid_directory_inos {
102 PROC_TID_STAT, 137 PROC_TID_STAT,
103 PROC_TID_STATM, 138 PROC_TID_STATM,
104 PROC_TID_MAPS, 139 PROC_TID_MAPS,
140 PROC_TID_NUMA_MAPS,
105 PROC_TID_MOUNTS, 141 PROC_TID_MOUNTS,
106 PROC_TID_WCHAN, 142 PROC_TID_WCHAN,
143 PROC_TID_SMAPS,
107#ifdef CONFIG_SCHEDSTATS 144#ifdef CONFIG_SCHEDSTATS
108 PROC_TID_SCHEDSTAT, 145 PROC_TID_SCHEDSTAT,
109#endif 146#endif
@@ -120,9 +157,11 @@ enum pid_directory_inos {
120#ifdef CONFIG_AUDITSYSCALL 157#ifdef CONFIG_AUDITSYSCALL
121 PROC_TID_LOGINUID, 158 PROC_TID_LOGINUID,
122#endif 159#endif
123 PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */
124 PROC_TID_OOM_SCORE, 160 PROC_TID_OOM_SCORE,
125 PROC_TID_OOM_ADJUST, 161 PROC_TID_OOM_ADJUST,
162
163 /* Add new entries before this */
164 PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */
126}; 165};
127 166
128struct pid_entry { 167struct pid_entry {
@@ -144,6 +183,9 @@ static struct pid_entry tgid_base_stuff[] = {
144 E(PROC_TGID_STAT, "stat", S_IFREG|S_IRUGO), 183 E(PROC_TGID_STAT, "stat", S_IFREG|S_IRUGO),
145 E(PROC_TGID_STATM, "statm", S_IFREG|S_IRUGO), 184 E(PROC_TGID_STATM, "statm", S_IFREG|S_IRUGO),
146 E(PROC_TGID_MAPS, "maps", S_IFREG|S_IRUGO), 185 E(PROC_TGID_MAPS, "maps", S_IFREG|S_IRUGO),
186#ifdef CONFIG_NUMA
187 E(PROC_TGID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO),
188#endif
147 E(PROC_TGID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), 189 E(PROC_TGID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR),
148#ifdef CONFIG_SECCOMP 190#ifdef CONFIG_SECCOMP
149 E(PROC_TGID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), 191 E(PROC_TGID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR),
@@ -152,6 +194,7 @@ static struct pid_entry tgid_base_stuff[] = {
152 E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO), 194 E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO),
153 E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO), 195 E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO),
154 E(PROC_TGID_MOUNTS, "mounts", S_IFREG|S_IRUGO), 196 E(PROC_TGID_MOUNTS, "mounts", S_IFREG|S_IRUGO),
197 E(PROC_TGID_SMAPS, "smaps", S_IFREG|S_IRUGO),
155#ifdef CONFIG_SECURITY 198#ifdef CONFIG_SECURITY
156 E(PROC_TGID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), 199 E(PROC_TGID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO),
157#endif 200#endif
@@ -180,6 +223,9 @@ static struct pid_entry tid_base_stuff[] = {
180 E(PROC_TID_STAT, "stat", S_IFREG|S_IRUGO), 223 E(PROC_TID_STAT, "stat", S_IFREG|S_IRUGO),
181 E(PROC_TID_STATM, "statm", S_IFREG|S_IRUGO), 224 E(PROC_TID_STATM, "statm", S_IFREG|S_IRUGO),
182 E(PROC_TID_MAPS, "maps", S_IFREG|S_IRUGO), 225 E(PROC_TID_MAPS, "maps", S_IFREG|S_IRUGO),
226#ifdef CONFIG_NUMA
227 E(PROC_TID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO),
228#endif
183 E(PROC_TID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), 229 E(PROC_TID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR),
184#ifdef CONFIG_SECCOMP 230#ifdef CONFIG_SECCOMP
185 E(PROC_TID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), 231 E(PROC_TID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR),
@@ -188,6 +234,7 @@ static struct pid_entry tid_base_stuff[] = {
188 E(PROC_TID_ROOT, "root", S_IFLNK|S_IRWXUGO), 234 E(PROC_TID_ROOT, "root", S_IFLNK|S_IRWXUGO),
189 E(PROC_TID_EXE, "exe", S_IFLNK|S_IRWXUGO), 235 E(PROC_TID_EXE, "exe", S_IFLNK|S_IRWXUGO),
190 E(PROC_TID_MOUNTS, "mounts", S_IFREG|S_IRUGO), 236 E(PROC_TID_MOUNTS, "mounts", S_IFREG|S_IRUGO),
237 E(PROC_TID_SMAPS, "smaps", S_IFREG|S_IRUGO),
191#ifdef CONFIG_SECURITY 238#ifdef CONFIG_SECURITY
192 E(PROC_TID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), 239 E(PROC_TID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO),
193#endif 240#endif
@@ -251,15 +298,21 @@ static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsm
251 return -ENOENT; 298 return -ENOENT;
252} 299}
253 300
254static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 301static struct fs_struct *get_fs_struct(struct task_struct *task)
255{ 302{
256 struct fs_struct *fs; 303 struct fs_struct *fs;
257 int result = -ENOENT; 304 task_lock(task);
258 task_lock(proc_task(inode)); 305 fs = task->fs;
259 fs = proc_task(inode)->fs;
260 if(fs) 306 if(fs)
261 atomic_inc(&fs->count); 307 atomic_inc(&fs->count);
262 task_unlock(proc_task(inode)); 308 task_unlock(task);
309 return fs;
310}
311
312static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
313{
314 struct fs_struct *fs = get_fs_struct(proc_task(inode));
315 int result = -ENOENT;
263 if (fs) { 316 if (fs) {
264 read_lock(&fs->lock); 317 read_lock(&fs->lock);
265 *mnt = mntget(fs->pwdmnt); 318 *mnt = mntget(fs->pwdmnt);
@@ -273,13 +326,8 @@ static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfs
273 326
274static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 327static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
275{ 328{
276 struct fs_struct *fs; 329 struct fs_struct *fs = get_fs_struct(proc_task(inode));
277 int result = -ENOENT; 330 int result = -ENOENT;
278 task_lock(proc_task(inode));
279 fs = proc_task(inode)->fs;
280 if(fs)
281 atomic_inc(&fs->count);
282 task_unlock(proc_task(inode));
283 if (fs) { 331 if (fs) {
284 read_lock(&fs->lock); 332 read_lock(&fs->lock);
285 *mnt = mntget(fs->rootmnt); 333 *mnt = mntget(fs->rootmnt);
@@ -298,33 +346,6 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf
298 (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \ 346 (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \
299 security_ptrace(current,task) == 0)) 347 security_ptrace(current,task) == 0))
300 348
301static int may_ptrace_attach(struct task_struct *task)
302{
303 int retval = 0;
304
305 task_lock(task);
306
307 if (!task->mm)
308 goto out;
309 if (((current->uid != task->euid) ||
310 (current->uid != task->suid) ||
311 (current->uid != task->uid) ||
312 (current->gid != task->egid) ||
313 (current->gid != task->sgid) ||
314 (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
315 goto out;
316 rmb();
317 if (task->mm->dumpable != 1 && !capable(CAP_SYS_PTRACE))
318 goto out;
319 if (security_ptrace(current, task))
320 goto out;
321
322 retval = 1;
323out:
324 task_unlock(task);
325 return retval;
326}
327
328static int proc_pid_environ(struct task_struct *task, char * buffer) 349static int proc_pid_environ(struct task_struct *task, char * buffer)
329{ 350{
330 int res = 0; 351 int res = 0;
@@ -334,7 +355,7 @@ static int proc_pid_environ(struct task_struct *task, char * buffer)
334 if (len > PAGE_SIZE) 355 if (len > PAGE_SIZE)
335 len = PAGE_SIZE; 356 len = PAGE_SIZE;
336 res = access_process_vm(task, mm->env_start, buffer, len, 0); 357 res = access_process_vm(task, mm->env_start, buffer, len, 0);
337 if (!may_ptrace_attach(task)) 358 if (!ptrace_may_attach(task))
338 res = -ESRCH; 359 res = -ESRCH;
339 mmput(mm); 360 mmput(mm);
340 } 361 }
@@ -515,6 +536,46 @@ static struct file_operations proc_maps_operations = {
515 .release = seq_release, 536 .release = seq_release,
516}; 537};
517 538
539#ifdef CONFIG_NUMA
540extern struct seq_operations proc_pid_numa_maps_op;
541static int numa_maps_open(struct inode *inode, struct file *file)
542{
543 struct task_struct *task = proc_task(inode);
544 int ret = seq_open(file, &proc_pid_numa_maps_op);
545 if (!ret) {
546 struct seq_file *m = file->private_data;
547 m->private = task;
548 }
549 return ret;
550}
551
552static struct file_operations proc_numa_maps_operations = {
553 .open = numa_maps_open,
554 .read = seq_read,
555 .llseek = seq_lseek,
556 .release = seq_release,
557};
558#endif
559
560extern struct seq_operations proc_pid_smaps_op;
561static int smaps_open(struct inode *inode, struct file *file)
562{
563 struct task_struct *task = proc_task(inode);
564 int ret = seq_open(file, &proc_pid_smaps_op);
565 if (!ret) {
566 struct seq_file *m = file->private_data;
567 m->private = task;
568 }
569 return ret;
570}
571
572static struct file_operations proc_smaps_operations = {
573 .open = smaps_open,
574 .read = seq_read,
575 .llseek = seq_lseek,
576 .release = seq_release,
577};
578
518extern struct seq_operations mounts_op; 579extern struct seq_operations mounts_op;
519static int mounts_open(struct inode *inode, struct file *file) 580static int mounts_open(struct inode *inode, struct file *file)
520{ 581{
@@ -597,7 +658,7 @@ static ssize_t mem_read(struct file * file, char __user * buf,
597 int ret = -ESRCH; 658 int ret = -ESRCH;
598 struct mm_struct *mm; 659 struct mm_struct *mm;
599 660
600 if (!MAY_PTRACE(task) || !may_ptrace_attach(task)) 661 if (!MAY_PTRACE(task) || !ptrace_may_attach(task))
601 goto out; 662 goto out;
602 663
603 ret = -ENOMEM; 664 ret = -ENOMEM;
@@ -623,7 +684,7 @@ static ssize_t mem_read(struct file * file, char __user * buf,
623 684
624 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 685 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
625 retval = access_process_vm(task, src, page, this_len, 0); 686 retval = access_process_vm(task, src, page, this_len, 0);
626 if (!retval || !MAY_PTRACE(task) || !may_ptrace_attach(task)) { 687 if (!retval || !MAY_PTRACE(task) || !ptrace_may_attach(task)) {
627 if (!ret) 688 if (!ret)
628 ret = -EIO; 689 ret = -EIO;
629 break; 690 break;
@@ -661,7 +722,7 @@ static ssize_t mem_write(struct file * file, const char * buf,
661 struct task_struct *task = proc_task(file->f_dentry->d_inode); 722 struct task_struct *task = proc_task(file->f_dentry->d_inode);
662 unsigned long dst = *ppos; 723 unsigned long dst = *ppos;
663 724
664 if (!MAY_PTRACE(task) || !may_ptrace_attach(task)) 725 if (!MAY_PTRACE(task) || !ptrace_may_attach(task))
665 return -ESRCH; 726 return -ESRCH;
666 727
667 page = (char *)__get_free_page(GFP_USER); 728 page = (char *)__get_free_page(GFP_USER);
@@ -1524,6 +1585,12 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
1524 case PROC_TGID_MAPS: 1585 case PROC_TGID_MAPS:
1525 inode->i_fop = &proc_maps_operations; 1586 inode->i_fop = &proc_maps_operations;
1526 break; 1587 break;
1588#ifdef CONFIG_NUMA
1589 case PROC_TID_NUMA_MAPS:
1590 case PROC_TGID_NUMA_MAPS:
1591 inode->i_fop = &proc_numa_maps_operations;
1592 break;
1593#endif
1527 case PROC_TID_MEM: 1594 case PROC_TID_MEM:
1528 case PROC_TGID_MEM: 1595 case PROC_TGID_MEM:
1529 inode->i_op = &proc_mem_inode_operations; 1596 inode->i_op = &proc_mem_inode_operations;
@@ -1539,6 +1606,10 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
1539 case PROC_TGID_MOUNTS: 1606 case PROC_TGID_MOUNTS:
1540 inode->i_fop = &proc_mounts_operations; 1607 inode->i_fop = &proc_mounts_operations;
1541 break; 1608 break;
1609 case PROC_TID_SMAPS:
1610 case PROC_TGID_SMAPS:
1611 inode->i_fop = &proc_smaps_operations;
1612 break;
1542#ifdef CONFIG_SECURITY 1613#ifdef CONFIG_SECURITY
1543 case PROC_TID_ATTR: 1614 case PROC_TID_ATTR:
1544 inode->i_nlink = 2; 1615 inode->i_nlink = 2;
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index abe8920313fb..8a8c34461d48 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -249,6 +249,18 @@ out:
249 return error; 249 return error;
250} 250}
251 251
252static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry,
253 struct kstat *stat)
254{
255 struct inode *inode = dentry->d_inode;
256 struct proc_dir_entry *de = PROC_I(inode)->pde;
257 if (de && de->nlink)
258 inode->i_nlink = de->nlink;
259
260 generic_fillattr(inode, stat);
261 return 0;
262}
263
252static struct inode_operations proc_file_inode_operations = { 264static struct inode_operations proc_file_inode_operations = {
253 .setattr = proc_notify_change, 265 .setattr = proc_notify_change,
254}; 266};
@@ -475,6 +487,7 @@ static struct file_operations proc_dir_operations = {
475 */ 487 */
476static struct inode_operations proc_dir_inode_operations = { 488static struct inode_operations proc_dir_inode_operations = {
477 .lookup = proc_lookup, 489 .lookup = proc_lookup,
490 .getattr = proc_getattr,
478 .setattr = proc_notify_change, 491 .setattr = proc_notify_change,
479}; 492};
480 493
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 28b4a0253a92..c7ef3e48e35b 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -2,8 +2,13 @@
2#include <linux/hugetlb.h> 2#include <linux/hugetlb.h>
3#include <linux/mount.h> 3#include <linux/mount.h>
4#include <linux/seq_file.h> 4#include <linux/seq_file.h>
5#include <linux/highmem.h>
6#include <linux/pagemap.h>
7#include <linux/mempolicy.h>
8
5#include <asm/elf.h> 9#include <asm/elf.h>
6#include <asm/uaccess.h> 10#include <asm/uaccess.h>
11#include <asm/tlbflush.h>
7#include "internal.h" 12#include "internal.h"
8 13
9char *task_mem(struct mm_struct *mm, char *buffer) 14char *task_mem(struct mm_struct *mm, char *buffer)
@@ -87,49 +92,58 @@ static void pad_len_spaces(struct seq_file *m, int len)
87 seq_printf(m, "%*c", len, ' '); 92 seq_printf(m, "%*c", len, ' ');
88} 93}
89 94
90static int show_map(struct seq_file *m, void *v) 95struct mem_size_stats
96{
97 unsigned long resident;
98 unsigned long shared_clean;
99 unsigned long shared_dirty;
100 unsigned long private_clean;
101 unsigned long private_dirty;
102};
103
104static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss)
91{ 105{
92 struct task_struct *task = m->private; 106 struct task_struct *task = m->private;
93 struct vm_area_struct *map = v; 107 struct vm_area_struct *vma = v;
94 struct mm_struct *mm = map->vm_mm; 108 struct mm_struct *mm = vma->vm_mm;
95 struct file *file = map->vm_file; 109 struct file *file = vma->vm_file;
96 int flags = map->vm_flags; 110 int flags = vma->vm_flags;
97 unsigned long ino = 0; 111 unsigned long ino = 0;
98 dev_t dev = 0; 112 dev_t dev = 0;
99 int len; 113 int len;
100 114
101 if (file) { 115 if (file) {
102 struct inode *inode = map->vm_file->f_dentry->d_inode; 116 struct inode *inode = vma->vm_file->f_dentry->d_inode;
103 dev = inode->i_sb->s_dev; 117 dev = inode->i_sb->s_dev;
104 ino = inode->i_ino; 118 ino = inode->i_ino;
105 } 119 }
106 120
107 seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", 121 seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n",
108 map->vm_start, 122 vma->vm_start,
109 map->vm_end, 123 vma->vm_end,
110 flags & VM_READ ? 'r' : '-', 124 flags & VM_READ ? 'r' : '-',
111 flags & VM_WRITE ? 'w' : '-', 125 flags & VM_WRITE ? 'w' : '-',
112 flags & VM_EXEC ? 'x' : '-', 126 flags & VM_EXEC ? 'x' : '-',
113 flags & VM_MAYSHARE ? 's' : 'p', 127 flags & VM_MAYSHARE ? 's' : 'p',
114 map->vm_pgoff << PAGE_SHIFT, 128 vma->vm_pgoff << PAGE_SHIFT,
115 MAJOR(dev), MINOR(dev), ino, &len); 129 MAJOR(dev), MINOR(dev), ino, &len);
116 130
117 /* 131 /*
118 * Print the dentry name for named mappings, and a 132 * Print the dentry name for named mappings, and a
119 * special [heap] marker for the heap: 133 * special [heap] marker for the heap:
120 */ 134 */
121 if (map->vm_file) { 135 if (file) {
122 pad_len_spaces(m, len); 136 pad_len_spaces(m, len);
123 seq_path(m, file->f_vfsmnt, file->f_dentry, ""); 137 seq_path(m, file->f_vfsmnt, file->f_dentry, "\n");
124 } else { 138 } else {
125 if (mm) { 139 if (mm) {
126 if (map->vm_start <= mm->start_brk && 140 if (vma->vm_start <= mm->start_brk &&
127 map->vm_end >= mm->brk) { 141 vma->vm_end >= mm->brk) {
128 pad_len_spaces(m, len); 142 pad_len_spaces(m, len);
129 seq_puts(m, "[heap]"); 143 seq_puts(m, "[heap]");
130 } else { 144 } else {
131 if (map->vm_start <= mm->start_stack && 145 if (vma->vm_start <= mm->start_stack &&
132 map->vm_end >= mm->start_stack) { 146 vma->vm_end >= mm->start_stack) {
133 147
134 pad_len_spaces(m, len); 148 pad_len_spaces(m, len);
135 seq_puts(m, "[stack]"); 149 seq_puts(m, "[stack]");
@@ -141,24 +155,146 @@ static int show_map(struct seq_file *m, void *v)
141 } 155 }
142 } 156 }
143 seq_putc(m, '\n'); 157 seq_putc(m, '\n');
144 if (m->count < m->size) /* map is copied successfully */ 158
145 m->version = (map != get_gate_vma(task))? map->vm_start: 0; 159 if (mss)
160 seq_printf(m,
161 "Size: %8lu kB\n"
162 "Rss: %8lu kB\n"
163 "Shared_Clean: %8lu kB\n"
164 "Shared_Dirty: %8lu kB\n"
165 "Private_Clean: %8lu kB\n"
166 "Private_Dirty: %8lu kB\n",
167 (vma->vm_end - vma->vm_start) >> 10,
168 mss->resident >> 10,
169 mss->shared_clean >> 10,
170 mss->shared_dirty >> 10,
171 mss->private_clean >> 10,
172 mss->private_dirty >> 10);
173
174 if (m->count < m->size) /* vma is copied successfully */
175 m->version = (vma != get_gate_vma(task))? vma->vm_start: 0;
146 return 0; 176 return 0;
147} 177}
148 178
179static int show_map(struct seq_file *m, void *v)
180{
181 return show_map_internal(m, v, 0);
182}
183
184static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
185 unsigned long addr, unsigned long end,
186 struct mem_size_stats *mss)
187{
188 pte_t *pte, ptent;
189 unsigned long pfn;
190 struct page *page;
191
192 pte = pte_offset_map(pmd, addr);
193 do {
194 ptent = *pte;
195 if (pte_none(ptent) || !pte_present(ptent))
196 continue;
197
198 mss->resident += PAGE_SIZE;
199 pfn = pte_pfn(ptent);
200 if (!pfn_valid(pfn))
201 continue;
202
203 page = pfn_to_page(pfn);
204 if (page_count(page) >= 2) {
205 if (pte_dirty(ptent))
206 mss->shared_dirty += PAGE_SIZE;
207 else
208 mss->shared_clean += PAGE_SIZE;
209 } else {
210 if (pte_dirty(ptent))
211 mss->private_dirty += PAGE_SIZE;
212 else
213 mss->private_clean += PAGE_SIZE;
214 }
215 } while (pte++, addr += PAGE_SIZE, addr != end);
216 pte_unmap(pte - 1);
217 cond_resched_lock(&vma->vm_mm->page_table_lock);
218}
219
220static inline void smaps_pmd_range(struct vm_area_struct *vma, pud_t *pud,
221 unsigned long addr, unsigned long end,
222 struct mem_size_stats *mss)
223{
224 pmd_t *pmd;
225 unsigned long next;
226
227 pmd = pmd_offset(pud, addr);
228 do {
229 next = pmd_addr_end(addr, end);
230 if (pmd_none_or_clear_bad(pmd))
231 continue;
232 smaps_pte_range(vma, pmd, addr, next, mss);
233 } while (pmd++, addr = next, addr != end);
234}
235
236static inline void smaps_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
237 unsigned long addr, unsigned long end,
238 struct mem_size_stats *mss)
239{
240 pud_t *pud;
241 unsigned long next;
242
243 pud = pud_offset(pgd, addr);
244 do {
245 next = pud_addr_end(addr, end);
246 if (pud_none_or_clear_bad(pud))
247 continue;
248 smaps_pmd_range(vma, pud, addr, next, mss);
249 } while (pud++, addr = next, addr != end);
250}
251
252static inline void smaps_pgd_range(struct vm_area_struct *vma,
253 unsigned long addr, unsigned long end,
254 struct mem_size_stats *mss)
255{
256 pgd_t *pgd;
257 unsigned long next;
258
259 pgd = pgd_offset(vma->vm_mm, addr);
260 do {
261 next = pgd_addr_end(addr, end);
262 if (pgd_none_or_clear_bad(pgd))
263 continue;
264 smaps_pud_range(vma, pgd, addr, next, mss);
265 } while (pgd++, addr = next, addr != end);
266}
267
268static int show_smap(struct seq_file *m, void *v)
269{
270 struct vm_area_struct *vma = v;
271 struct mm_struct *mm = vma->vm_mm;
272 struct mem_size_stats mss;
273
274 memset(&mss, 0, sizeof mss);
275
276 if (mm) {
277 spin_lock(&mm->page_table_lock);
278 smaps_pgd_range(vma, vma->vm_start, vma->vm_end, &mss);
279 spin_unlock(&mm->page_table_lock);
280 }
281
282 return show_map_internal(m, v, &mss);
283}
284
149static void *m_start(struct seq_file *m, loff_t *pos) 285static void *m_start(struct seq_file *m, loff_t *pos)
150{ 286{
151 struct task_struct *task = m->private; 287 struct task_struct *task = m->private;
152 unsigned long last_addr = m->version; 288 unsigned long last_addr = m->version;
153 struct mm_struct *mm; 289 struct mm_struct *mm;
154 struct vm_area_struct *map, *tail_map; 290 struct vm_area_struct *vma, *tail_vma;
155 loff_t l = *pos; 291 loff_t l = *pos;
156 292
157 /* 293 /*
158 * We remember last_addr rather than next_addr to hit with 294 * We remember last_addr rather than next_addr to hit with
159 * mmap_cache most of the time. We have zero last_addr at 295 * mmap_cache most of the time. We have zero last_addr at
160 * the begining and also after lseek. We will have -1 last_addr 296 * the beginning and also after lseek. We will have -1 last_addr
161 * after the end of the maps. 297 * after the end of the vmas.
162 */ 298 */
163 299
164 if (last_addr == -1UL) 300 if (last_addr == -1UL)
@@ -168,47 +304,47 @@ static void *m_start(struct seq_file *m, loff_t *pos)
168 if (!mm) 304 if (!mm)
169 return NULL; 305 return NULL;
170 306
171 tail_map = get_gate_vma(task); 307 tail_vma = get_gate_vma(task);
172 down_read(&mm->mmap_sem); 308 down_read(&mm->mmap_sem);
173 309
174 /* Start with last addr hint */ 310 /* Start with last addr hint */
175 if (last_addr && (map = find_vma(mm, last_addr))) { 311 if (last_addr && (vma = find_vma(mm, last_addr))) {
176 map = map->vm_next; 312 vma = vma->vm_next;
177 goto out; 313 goto out;
178 } 314 }
179 315
180 /* 316 /*
181 * Check the map index is within the range and do 317 * Check the vma index is within the range and do
182 * sequential scan until m_index. 318 * sequential scan until m_index.
183 */ 319 */
184 map = NULL; 320 vma = NULL;
185 if ((unsigned long)l < mm->map_count) { 321 if ((unsigned long)l < mm->map_count) {
186 map = mm->mmap; 322 vma = mm->mmap;
187 while (l-- && map) 323 while (l-- && vma)
188 map = map->vm_next; 324 vma = vma->vm_next;
189 goto out; 325 goto out;
190 } 326 }
191 327
192 if (l != mm->map_count) 328 if (l != mm->map_count)
193 tail_map = NULL; /* After gate map */ 329 tail_vma = NULL; /* After gate vma */
194 330
195out: 331out:
196 if (map) 332 if (vma)
197 return map; 333 return vma;
198 334
199 /* End of maps has reached */ 335 /* End of vmas has been reached */
200 m->version = (tail_map != NULL)? 0: -1UL; 336 m->version = (tail_vma != NULL)? 0: -1UL;
201 up_read(&mm->mmap_sem); 337 up_read(&mm->mmap_sem);
202 mmput(mm); 338 mmput(mm);
203 return tail_map; 339 return tail_vma;
204} 340}
205 341
206static void m_stop(struct seq_file *m, void *v) 342static void m_stop(struct seq_file *m, void *v)
207{ 343{
208 struct task_struct *task = m->private; 344 struct task_struct *task = m->private;
209 struct vm_area_struct *map = v; 345 struct vm_area_struct *vma = v;
210 if (map && map != get_gate_vma(task)) { 346 if (vma && vma != get_gate_vma(task)) {
211 struct mm_struct *mm = map->vm_mm; 347 struct mm_struct *mm = vma->vm_mm;
212 up_read(&mm->mmap_sem); 348 up_read(&mm->mmap_sem);
213 mmput(mm); 349 mmput(mm);
214 } 350 }
@@ -217,14 +353,14 @@ static void m_stop(struct seq_file *m, void *v)
217static void *m_next(struct seq_file *m, void *v, loff_t *pos) 353static void *m_next(struct seq_file *m, void *v, loff_t *pos)
218{ 354{
219 struct task_struct *task = m->private; 355 struct task_struct *task = m->private;
220 struct vm_area_struct *map = v; 356 struct vm_area_struct *vma = v;
221 struct vm_area_struct *tail_map = get_gate_vma(task); 357 struct vm_area_struct *tail_vma = get_gate_vma(task);
222 358
223 (*pos)++; 359 (*pos)++;
224 if (map && (map != tail_map) && map->vm_next) 360 if (vma && (vma != tail_vma) && vma->vm_next)
225 return map->vm_next; 361 return vma->vm_next;
226 m_stop(m, v); 362 m_stop(m, v);
227 return (map != tail_map)? tail_map: NULL; 363 return (vma != tail_vma)? tail_vma: NULL;
228} 364}
229 365
230struct seq_operations proc_pid_maps_op = { 366struct seq_operations proc_pid_maps_op = {
@@ -233,3 +369,140 @@ struct seq_operations proc_pid_maps_op = {
233 .stop = m_stop, 369 .stop = m_stop,
234 .show = show_map 370 .show = show_map
235}; 371};
372
373struct seq_operations proc_pid_smaps_op = {
374 .start = m_start,
375 .next = m_next,
376 .stop = m_stop,
377 .show = show_smap
378};
379
380#ifdef CONFIG_NUMA
381
382struct numa_maps {
383 unsigned long pages;
384 unsigned long anon;
385 unsigned long mapped;
386 unsigned long mapcount_max;
387 unsigned long node[MAX_NUMNODES];
388};
389
390/*
391 * Calculate numa node maps for a vma
392 */
393static struct numa_maps *get_numa_maps(const struct vm_area_struct *vma)
394{
395 struct page *page;
396 unsigned long vaddr;
397 struct mm_struct *mm = vma->vm_mm;
398 int i;
399 struct numa_maps *md = kmalloc(sizeof(struct numa_maps), GFP_KERNEL);
400
401 if (!md)
402 return NULL;
403 md->pages = 0;
404 md->anon = 0;
405 md->mapped = 0;
406 md->mapcount_max = 0;
407 for_each_node(i)
408 md->node[i] =0;
409
410 spin_lock(&mm->page_table_lock);
411 for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) {
412 page = follow_page(mm, vaddr, 0);
413 if (page) {
414 int count = page_mapcount(page);
415
416 if (count)
417 md->mapped++;
418 if (count > md->mapcount_max)
419 md->mapcount_max = count;
420 md->pages++;
421 if (PageAnon(page))
422 md->anon++;
423 md->node[page_to_nid(page)]++;
424 }
425 }
426 spin_unlock(&mm->page_table_lock);
427 return md;
428}
429
430static int show_numa_map(struct seq_file *m, void *v)
431{
432 struct task_struct *task = m->private;
433 struct vm_area_struct *vma = v;
434 struct mempolicy *pol;
435 struct numa_maps *md;
436 struct zone **z;
437 int n;
438 int first;
439
440 if (!vma->vm_mm)
441 return 0;
442
443 md = get_numa_maps(vma);
444 if (!md)
445 return 0;
446
447 seq_printf(m, "%08lx", vma->vm_start);
448 pol = get_vma_policy(task, vma, vma->vm_start);
449 /* Print policy */
450 switch (pol->policy) {
451 case MPOL_PREFERRED:
452 seq_printf(m, " prefer=%d", pol->v.preferred_node);
453 break;
454 case MPOL_BIND:
455 seq_printf(m, " bind={");
456 first = 1;
457 for (z = pol->v.zonelist->zones; *z; z++) {
458
459 if (!first)
460 seq_putc(m, ',');
461 else
462 first = 0;
463 seq_printf(m, "%d/%s", (*z)->zone_pgdat->node_id,
464 (*z)->name);
465 }
466 seq_putc(m, '}');
467 break;
468 case MPOL_INTERLEAVE:
469 seq_printf(m, " interleave={");
470 first = 1;
471 for_each_node(n) {
472 if (test_bit(n, pol->v.nodes)) {
473 if (!first)
474 seq_putc(m,',');
475 else
476 first = 0;
477 seq_printf(m, "%d",n);
478 }
479 }
480 seq_putc(m, '}');
481 break;
482 default:
483 seq_printf(m," default");
484 break;
485 }
486 seq_printf(m, " MaxRef=%lu Pages=%lu Mapped=%lu",
487 md->mapcount_max, md->pages, md->mapped);
488 if (md->anon)
489 seq_printf(m," Anon=%lu",md->anon);
490
491 for_each_online_node(n) {
492 if (md->node[n])
493 seq_printf(m, " N%d=%lu", n, md->node[n]);
494 }
495 seq_putc(m, '\n');
496 kfree(md);
497 if (m->count < m->size) /* vma is copied successfully */
498 m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0;
499 return 0;
500}
501
502struct seq_operations proc_pid_numa_maps_op = {
503 .start = m_start,
504 .next = m_next,
505 .stop = m_stop,
506 .show = show_numa_map
507};
508#endif
diff --git a/fs/read_write.c b/fs/read_write.c
index 563abd09b5c8..b60324aaa2b6 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -188,7 +188,7 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count
188 struct inode *inode; 188 struct inode *inode;
189 loff_t pos; 189 loff_t pos;
190 190
191 if (unlikely(count > file->f_maxcount)) 191 if (unlikely(count > INT_MAX))
192 goto Einval; 192 goto Einval;
193 pos = *ppos; 193 pos = *ppos;
194 if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) 194 if (unlikely((pos < 0) || (loff_t) (pos + count) < 0))
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index ca7989b04be3..a8e29e9bbbd0 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1034,7 +1034,7 @@ static int flush_commit_list(struct super_block *s,
1034 SB_ONDISK_JOURNAL_SIZE(s); 1034 SB_ONDISK_JOURNAL_SIZE(s);
1035 tbh = journal_find_get_block(s, bn); 1035 tbh = journal_find_get_block(s, bn);
1036 if (buffer_dirty(tbh)) /* redundant, ll_rw_block() checks */ 1036 if (buffer_dirty(tbh)) /* redundant, ll_rw_block() checks */
1037 ll_rw_block(WRITE, 1, &tbh); 1037 ll_rw_block(SWRITE, 1, &tbh);
1038 put_bh(tbh); 1038 put_bh(tbh);
1039 } 1039 }
1040 atomic_dec(&journal->j_async_throttle); 1040 atomic_dec(&journal->j_async_throttle);
@@ -2172,7 +2172,7 @@ static int journal_read_transaction(struct super_block *p_s_sb,
2172 /* flush out the real blocks */ 2172 /* flush out the real blocks */
2173 for (i = 0; i < get_desc_trans_len(desc); i++) { 2173 for (i = 0; i < get_desc_trans_len(desc); i++) {
2174 set_buffer_dirty(real_blocks[i]); 2174 set_buffer_dirty(real_blocks[i]);
2175 ll_rw_block(WRITE, 1, real_blocks + i); 2175 ll_rw_block(SWRITE, 1, real_blocks + i);
2176 } 2176 }
2177 for (i = 0; i < get_desc_trans_len(desc); i++) { 2177 for (i = 0; i < get_desc_trans_len(desc); i++) {
2178 wait_on_buffer(real_blocks[i]); 2178 wait_on_buffer(real_blocks[i]);
diff --git a/fs/relayfs/Makefile b/fs/relayfs/Makefile
new file mode 100644
index 000000000000..e76e182cdb38
--- /dev/null
+++ b/fs/relayfs/Makefile
@@ -0,0 +1,4 @@
1obj-$(CONFIG_RELAYFS_FS) += relayfs.o
2
3relayfs-y := relay.o inode.o buffers.o
4
diff --git a/fs/relayfs/buffers.c b/fs/relayfs/buffers.c
new file mode 100644
index 000000000000..2aa8e2719999
--- /dev/null
+++ b/fs/relayfs/buffers.c
@@ -0,0 +1,189 @@
1/*
2 * RelayFS buffer management code.
3 *
4 * Copyright (C) 2002-2005 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp
5 * Copyright (C) 1999-2005 - Karim Yaghmour (karim@opersys.com)
6 *
7 * This file is released under the GPL.
8 */
9
10#include <linux/module.h>
11#include <linux/vmalloc.h>
12#include <linux/mm.h>
13#include <linux/relayfs_fs.h>
14#include "relay.h"
15#include "buffers.h"
16
17/*
18 * close() vm_op implementation for relayfs file mapping.
19 */
20static void relay_file_mmap_close(struct vm_area_struct *vma)
21{
22 struct rchan_buf *buf = vma->vm_private_data;
23 buf->chan->cb->buf_unmapped(buf, vma->vm_file);
24}
25
26/*
27 * nopage() vm_op implementation for relayfs file mapping.
28 */
29static struct page *relay_buf_nopage(struct vm_area_struct *vma,
30 unsigned long address,
31 int *type)
32{
33 struct page *page;
34 struct rchan_buf *buf = vma->vm_private_data;
35 unsigned long offset = address - vma->vm_start;
36
37 if (address > vma->vm_end)
38 return NOPAGE_SIGBUS; /* Disallow mremap */
39 if (!buf)
40 return NOPAGE_OOM;
41
42 page = vmalloc_to_page(buf->start + offset);
43 if (!page)
44 return NOPAGE_OOM;
45 get_page(page);
46
47 if (type)
48 *type = VM_FAULT_MINOR;
49
50 return page;
51}
52
53/*
54 * vm_ops for relay file mappings.
55 */
56static struct vm_operations_struct relay_file_mmap_ops = {
57 .nopage = relay_buf_nopage,
58 .close = relay_file_mmap_close,
59};
60
61/**
62 * relay_mmap_buf: - mmap channel buffer to process address space
63 * @buf: relay channel buffer
64 * @vma: vm_area_struct describing memory to be mapped
65 *
66 * Returns 0 if ok, negative on error
67 *
68 * Caller should already have grabbed mmap_sem.
69 */
70int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma)
71{
72 unsigned long length = vma->vm_end - vma->vm_start;
73 struct file *filp = vma->vm_file;
74
75 if (!buf)
76 return -EBADF;
77
78 if (length != (unsigned long)buf->chan->alloc_size)
79 return -EINVAL;
80
81 vma->vm_ops = &relay_file_mmap_ops;
82 vma->vm_private_data = buf;
83 buf->chan->cb->buf_mapped(buf, filp);
84
85 return 0;
86}
87
88/**
89 * relay_alloc_buf - allocate a channel buffer
90 * @buf: the buffer struct
91 * @size: total size of the buffer
92 *
93 * Returns a pointer to the resulting buffer, NULL if unsuccessful
94 */
95static void *relay_alloc_buf(struct rchan_buf *buf, unsigned long size)
96{
97 void *mem;
98 unsigned int i, j, n_pages;
99
100 size = PAGE_ALIGN(size);
101 n_pages = size >> PAGE_SHIFT;
102
103 buf->page_array = kcalloc(n_pages, sizeof(struct page *), GFP_KERNEL);
104 if (!buf->page_array)
105 return NULL;
106
107 for (i = 0; i < n_pages; i++) {
108 buf->page_array[i] = alloc_page(GFP_KERNEL);
109 if (unlikely(!buf->page_array[i]))
110 goto depopulate;
111 }
112 mem = vmap(buf->page_array, n_pages, GFP_KERNEL, PAGE_KERNEL);
113 if (!mem)
114 goto depopulate;
115
116 memset(mem, 0, size);
117 buf->page_count = n_pages;
118 return mem;
119
120depopulate:
121 for (j = 0; j < i; j++)
122 __free_page(buf->page_array[j]);
123 kfree(buf->page_array);
124 return NULL;
125}
126
127/**
128 * relay_create_buf - allocate and initialize a channel buffer
129 * @alloc_size: size of the buffer to allocate
130 * @n_subbufs: number of sub-buffers in the channel
131 *
132 * Returns channel buffer if successful, NULL otherwise
133 */
134struct rchan_buf *relay_create_buf(struct rchan *chan)
135{
136 struct rchan_buf *buf = kcalloc(1, sizeof(struct rchan_buf), GFP_KERNEL);
137 if (!buf)
138 return NULL;
139
140 buf->padding = kmalloc(chan->n_subbufs * sizeof(size_t *), GFP_KERNEL);
141 if (!buf->padding)
142 goto free_buf;
143
144 buf->start = relay_alloc_buf(buf, chan->alloc_size);
145 if (!buf->start)
146 goto free_buf;
147
148 buf->chan = chan;
149 kref_get(&buf->chan->kref);
150 return buf;
151
152free_buf:
153 kfree(buf->padding);
154 kfree(buf);
155 return NULL;
156}
157
158/**
159 * relay_destroy_buf - destroy an rchan_buf struct and associated buffer
160 * @buf: the buffer struct
161 */
162void relay_destroy_buf(struct rchan_buf *buf)
163{
164 struct rchan *chan = buf->chan;
165 unsigned int i;
166
167 if (likely(buf->start)) {
168 vunmap(buf->start);
169 for (i = 0; i < buf->page_count; i++)
170 __free_page(buf->page_array[i]);
171 kfree(buf->page_array);
172 }
173 kfree(buf->padding);
174 kfree(buf);
175 kref_put(&chan->kref, relay_destroy_channel);
176}
177
178/**
179 * relay_remove_buf - remove a channel buffer
180 *
181 * Removes the file from the relayfs fileystem, which also frees the
182 * rchan_buf_struct and the channel buffer. Should only be called from
183 * kref_put().
184 */
185void relay_remove_buf(struct kref *kref)
186{
187 struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref);
188 relayfs_remove(buf->dentry);
189}
diff --git a/fs/relayfs/buffers.h b/fs/relayfs/buffers.h
new file mode 100644
index 000000000000..37a12493f641
--- /dev/null
+++ b/fs/relayfs/buffers.h
@@ -0,0 +1,12 @@
1#ifndef _BUFFERS_H
2#define _BUFFERS_H
3
4/* This inspired by rtai/shmem */
5#define FIX_SIZE(x) (((x) - 1) & PAGE_MASK) + PAGE_SIZE
6
7extern int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma);
8extern struct rchan_buf *relay_create_buf(struct rchan *chan);
9extern void relay_destroy_buf(struct rchan_buf *buf);
10extern void relay_remove_buf(struct kref *kref);
11
12#endif/* _BUFFERS_H */
diff --git a/fs/relayfs/inode.c b/fs/relayfs/inode.c
new file mode 100644
index 000000000000..0f7f88d067ad
--- /dev/null
+++ b/fs/relayfs/inode.c
@@ -0,0 +1,609 @@
1/*
2 * VFS-related code for RelayFS, a high-speed data relay filesystem.
3 *
4 * Copyright (C) 2003-2005 - Tom Zanussi <zanussi@us.ibm.com>, IBM Corp
5 * Copyright (C) 2003-2005 - Karim Yaghmour <karim@opersys.com>
6 *
7 * Based on ramfs, Copyright (C) 2002 - Linus Torvalds
8 *
9 * This file is released under the GPL.
10 */
11
12#include <linux/module.h>
13#include <linux/fs.h>
14#include <linux/mount.h>
15#include <linux/pagemap.h>
16#include <linux/init.h>
17#include <linux/string.h>
18#include <linux/backing-dev.h>
19#include <linux/namei.h>
20#include <linux/poll.h>
21#include <linux/relayfs_fs.h>
22#include "relay.h"
23#include "buffers.h"
24
25#define RELAYFS_MAGIC 0xF0B4A981
26
27static struct vfsmount * relayfs_mount;
28static int relayfs_mount_count;
29static kmem_cache_t * relayfs_inode_cachep;
30
31static struct backing_dev_info relayfs_backing_dev_info = {
32 .ra_pages = 0, /* No readahead */
33 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
34};
35
36static struct inode *relayfs_get_inode(struct super_block *sb, int mode,
37 struct rchan *chan)
38{
39 struct rchan_buf *buf = NULL;
40 struct inode *inode;
41
42 if (S_ISREG(mode)) {
43 BUG_ON(!chan);
44 buf = relay_create_buf(chan);
45 if (!buf)
46 return NULL;
47 }
48
49 inode = new_inode(sb);
50 if (!inode) {
51 relay_destroy_buf(buf);
52 return NULL;
53 }
54
55 inode->i_mode = mode;
56 inode->i_uid = 0;
57 inode->i_gid = 0;
58 inode->i_blksize = PAGE_CACHE_SIZE;
59 inode->i_blocks = 0;
60 inode->i_mapping->backing_dev_info = &relayfs_backing_dev_info;
61 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
62 switch (mode & S_IFMT) {
63 case S_IFREG:
64 inode->i_fop = &relayfs_file_operations;
65 RELAYFS_I(inode)->buf = buf;
66 break;
67 case S_IFDIR:
68 inode->i_op = &simple_dir_inode_operations;
69 inode->i_fop = &simple_dir_operations;
70
71 /* directory inodes start off with i_nlink == 2 (for "." entry) */
72 inode->i_nlink++;
73 break;
74 default:
75 break;
76 }
77
78 return inode;
79}
80
81/**
82 * relayfs_create_entry - create a relayfs directory or file
83 * @name: the name of the file to create
84 * @parent: parent directory
85 * @mode: mode
86 * @chan: relay channel associated with the file
87 *
88 * Returns the new dentry, NULL on failure
89 *
90 * Creates a file or directory with the specifed permissions.
91 */
92static struct dentry *relayfs_create_entry(const char *name,
93 struct dentry *parent,
94 int mode,
95 struct rchan *chan)
96{
97 struct dentry *d;
98 struct inode *inode;
99 int error = 0;
100
101 BUG_ON(!name || !(S_ISREG(mode) || S_ISDIR(mode)));
102
103 error = simple_pin_fs("relayfs", &relayfs_mount, &relayfs_mount_count);
104 if (error) {
105 printk(KERN_ERR "Couldn't mount relayfs: errcode %d\n", error);
106 return NULL;
107 }
108
109 if (!parent && relayfs_mount && relayfs_mount->mnt_sb)
110 parent = relayfs_mount->mnt_sb->s_root;
111
112 if (!parent) {
113 simple_release_fs(&relayfs_mount, &relayfs_mount_count);
114 return NULL;
115 }
116
117 parent = dget(parent);
118 down(&parent->d_inode->i_sem);
119 d = lookup_one_len(name, parent, strlen(name));
120 if (IS_ERR(d)) {
121 d = NULL;
122 goto release_mount;
123 }
124
125 if (d->d_inode) {
126 d = NULL;
127 goto release_mount;
128 }
129
130 inode = relayfs_get_inode(parent->d_inode->i_sb, mode, chan);
131 if (!inode) {
132 d = NULL;
133 goto release_mount;
134 }
135
136 d_instantiate(d, inode);
137 dget(d); /* Extra count - pin the dentry in core */
138
139 if (S_ISDIR(mode))
140 parent->d_inode->i_nlink++;
141
142 goto exit;
143
144release_mount:
145 simple_release_fs(&relayfs_mount, &relayfs_mount_count);
146
147exit:
148 up(&parent->d_inode->i_sem);
149 dput(parent);
150 return d;
151}
152
153/**
154 * relayfs_create_file - create a file in the relay filesystem
155 * @name: the name of the file to create
156 * @parent: parent directory
157 * @mode: mode, if not specied the default perms are used
158 * @chan: channel associated with the file
159 *
160 * Returns file dentry if successful, NULL otherwise.
161 *
162 * The file will be created user r on behalf of current user.
163 */
164struct dentry *relayfs_create_file(const char *name, struct dentry *parent,
165 int mode, struct rchan *chan)
166{
167 if (!mode)
168 mode = S_IRUSR;
169 mode = (mode & S_IALLUGO) | S_IFREG;
170
171 return relayfs_create_entry(name, parent, mode, chan);
172}
173
174/**
175 * relayfs_create_dir - create a directory in the relay filesystem
176 * @name: the name of the directory to create
177 * @parent: parent directory, NULL if parent should be fs root
178 *
179 * Returns directory dentry if successful, NULL otherwise.
180 *
181 * The directory will be created world rwx on behalf of current user.
182 */
183struct dentry *relayfs_create_dir(const char *name, struct dentry *parent)
184{
185 int mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
186 return relayfs_create_entry(name, parent, mode, NULL);
187}
188
189/**
190 * relayfs_remove - remove a file or directory in the relay filesystem
191 * @dentry: file or directory dentry
192 *
193 * Returns 0 if successful, negative otherwise.
194 */
195int relayfs_remove(struct dentry *dentry)
196{
197 struct dentry *parent;
198 int error = 0;
199
200 if (!dentry)
201 return -EINVAL;
202 parent = dentry->d_parent;
203 if (!parent)
204 return -EINVAL;
205
206 parent = dget(parent);
207 down(&parent->d_inode->i_sem);
208 if (dentry->d_inode) {
209 if (S_ISDIR(dentry->d_inode->i_mode))
210 error = simple_rmdir(parent->d_inode, dentry);
211 else
212 error = simple_unlink(parent->d_inode, dentry);
213 if (!error)
214 d_delete(dentry);
215 }
216 if (!error)
217 dput(dentry);
218 up(&parent->d_inode->i_sem);
219 dput(parent);
220
221 if (!error)
222 simple_release_fs(&relayfs_mount, &relayfs_mount_count);
223
224 return error;
225}
226
227/**
228 * relayfs_remove_dir - remove a directory in the relay filesystem
229 * @dentry: directory dentry
230 *
231 * Returns 0 if successful, negative otherwise.
232 */
233int relayfs_remove_dir(struct dentry *dentry)
234{
235 return relayfs_remove(dentry);
236}
237
238/**
239 * relayfs_open - open file op for relayfs files
240 * @inode: the inode
241 * @filp: the file
242 *
243 * Increments the channel buffer refcount.
244 */
245static int relayfs_open(struct inode *inode, struct file *filp)
246{
247 struct rchan_buf *buf = RELAYFS_I(inode)->buf;
248 kref_get(&buf->kref);
249
250 return 0;
251}
252
253/**
254 * relayfs_mmap - mmap file op for relayfs files
255 * @filp: the file
256 * @vma: the vma describing what to map
257 *
258 * Calls upon relay_mmap_buf to map the file into user space.
259 */
260static int relayfs_mmap(struct file *filp, struct vm_area_struct *vma)
261{
262 struct inode *inode = filp->f_dentry->d_inode;
263 return relay_mmap_buf(RELAYFS_I(inode)->buf, vma);
264}
265
266/**
267 * relayfs_poll - poll file op for relayfs files
268 * @filp: the file
269 * @wait: poll table
270 *
271 * Poll implemention.
272 */
273static unsigned int relayfs_poll(struct file *filp, poll_table *wait)
274{
275 unsigned int mask = 0;
276 struct inode *inode = filp->f_dentry->d_inode;
277 struct rchan_buf *buf = RELAYFS_I(inode)->buf;
278
279 if (buf->finalized)
280 return POLLERR;
281
282 if (filp->f_mode & FMODE_READ) {
283 poll_wait(filp, &buf->read_wait, wait);
284 if (!relay_buf_empty(buf))
285 mask |= POLLIN | POLLRDNORM;
286 }
287
288 return mask;
289}
290
291/**
292 * relayfs_release - release file op for relayfs files
293 * @inode: the inode
294 * @filp: the file
295 *
296 * Decrements the channel refcount, as the filesystem is
297 * no longer using it.
298 */
299static int relayfs_release(struct inode *inode, struct file *filp)
300{
301 struct rchan_buf *buf = RELAYFS_I(inode)->buf;
302 kref_put(&buf->kref, relay_remove_buf);
303
304 return 0;
305}
306
307/**
308 * relayfs_read_consume - update the consumed count for the buffer
309 */
310static void relayfs_read_consume(struct rchan_buf *buf,
311 size_t read_pos,
312 size_t bytes_consumed)
313{
314 size_t subbuf_size = buf->chan->subbuf_size;
315 size_t n_subbufs = buf->chan->n_subbufs;
316 size_t read_subbuf;
317
318 if (buf->bytes_consumed + bytes_consumed > subbuf_size) {
319 relay_subbufs_consumed(buf->chan, buf->cpu, 1);
320 buf->bytes_consumed = 0;
321 }
322
323 buf->bytes_consumed += bytes_consumed;
324 read_subbuf = read_pos / buf->chan->subbuf_size;
325 if (buf->bytes_consumed + buf->padding[read_subbuf] == subbuf_size) {
326 if ((read_subbuf == buf->subbufs_produced % n_subbufs) &&
327 (buf->offset == subbuf_size))
328 return;
329 relay_subbufs_consumed(buf->chan, buf->cpu, 1);
330 buf->bytes_consumed = 0;
331 }
332}
333
334/**
335 * relayfs_read_avail - boolean, are there unconsumed bytes available?
336 */
337static int relayfs_read_avail(struct rchan_buf *buf, size_t read_pos)
338{
339 size_t bytes_produced, bytes_consumed, write_offset;
340 size_t subbuf_size = buf->chan->subbuf_size;
341 size_t n_subbufs = buf->chan->n_subbufs;
342 size_t produced = buf->subbufs_produced % n_subbufs;
343 size_t consumed = buf->subbufs_consumed % n_subbufs;
344
345 write_offset = buf->offset > subbuf_size ? subbuf_size : buf->offset;
346
347 if (consumed > produced) {
348 if ((produced > n_subbufs) &&
349 (produced + n_subbufs - consumed <= n_subbufs))
350 produced += n_subbufs;
351 } else if (consumed == produced) {
352 if (buf->offset > subbuf_size) {
353 produced += n_subbufs;
354 if (buf->subbufs_produced == buf->subbufs_consumed)
355 consumed += n_subbufs;
356 }
357 }
358
359 if (buf->offset > subbuf_size)
360 bytes_produced = (produced - 1) * subbuf_size + write_offset;
361 else
362 bytes_produced = produced * subbuf_size + write_offset;
363 bytes_consumed = consumed * subbuf_size + buf->bytes_consumed;
364
365 if (bytes_produced == bytes_consumed)
366 return 0;
367
368 relayfs_read_consume(buf, read_pos, 0);
369
370 return 1;
371}
372
373/**
374 * relayfs_read_subbuf_avail - return bytes available in sub-buffer
375 */
376static size_t relayfs_read_subbuf_avail(size_t read_pos,
377 struct rchan_buf *buf)
378{
379 size_t padding, avail = 0;
380 size_t read_subbuf, read_offset, write_subbuf, write_offset;
381 size_t subbuf_size = buf->chan->subbuf_size;
382
383 write_subbuf = (buf->data - buf->start) / subbuf_size;
384 write_offset = buf->offset > subbuf_size ? subbuf_size : buf->offset;
385 read_subbuf = read_pos / subbuf_size;
386 read_offset = read_pos % subbuf_size;
387 padding = buf->padding[read_subbuf];
388
389 if (read_subbuf == write_subbuf) {
390 if (read_offset + padding < write_offset)
391 avail = write_offset - (read_offset + padding);
392 } else
393 avail = (subbuf_size - padding) - read_offset;
394
395 return avail;
396}
397
398/**
399 * relayfs_read_start_pos - find the first available byte to read
400 *
401 * If the read_pos is in the middle of padding, return the
402 * position of the first actually available byte, otherwise
403 * return the original value.
404 */
405static size_t relayfs_read_start_pos(size_t read_pos,
406 struct rchan_buf *buf)
407{
408 size_t read_subbuf, padding, padding_start, padding_end;
409 size_t subbuf_size = buf->chan->subbuf_size;
410 size_t n_subbufs = buf->chan->n_subbufs;
411
412 read_subbuf = read_pos / subbuf_size;
413 padding = buf->padding[read_subbuf];
414 padding_start = (read_subbuf + 1) * subbuf_size - padding;
415 padding_end = (read_subbuf + 1) * subbuf_size;
416 if (read_pos >= padding_start && read_pos < padding_end) {
417 read_subbuf = (read_subbuf + 1) % n_subbufs;
418 read_pos = read_subbuf * subbuf_size;
419 }
420
421 return read_pos;
422}
423
424/**
425 * relayfs_read_end_pos - return the new read position
426 */
427static size_t relayfs_read_end_pos(struct rchan_buf *buf,
428 size_t read_pos,
429 size_t count)
430{
431 size_t read_subbuf, padding, end_pos;
432 size_t subbuf_size = buf->chan->subbuf_size;
433 size_t n_subbufs = buf->chan->n_subbufs;
434
435 read_subbuf = read_pos / subbuf_size;
436 padding = buf->padding[read_subbuf];
437 if (read_pos % subbuf_size + count + padding == subbuf_size)
438 end_pos = (read_subbuf + 1) * subbuf_size;
439 else
440 end_pos = read_pos + count;
441 if (end_pos >= subbuf_size * n_subbufs)
442 end_pos = 0;
443
444 return end_pos;
445}
446
447/**
448 * relayfs_read - read file op for relayfs files
449 * @filp: the file
450 * @buffer: the userspace buffer
451 * @count: number of bytes to read
452 * @ppos: position to read from
453 *
454 * Reads count bytes or the number of bytes available in the
455 * current sub-buffer being read, whichever is smaller.
456 */
457static ssize_t relayfs_read(struct file *filp,
458 char __user *buffer,
459 size_t count,
460 loff_t *ppos)
461{
462 struct inode *inode = filp->f_dentry->d_inode;
463 struct rchan_buf *buf = RELAYFS_I(inode)->buf;
464 size_t read_start, avail;
465 ssize_t ret = 0;
466 void *from;
467
468 down(&inode->i_sem);
469 if(!relayfs_read_avail(buf, *ppos))
470 goto out;
471
472 read_start = relayfs_read_start_pos(*ppos, buf);
473 avail = relayfs_read_subbuf_avail(read_start, buf);
474 if (!avail)
475 goto out;
476
477 from = buf->start + read_start;
478 ret = count = min(count, avail);
479 if (copy_to_user(buffer, from, count)) {
480 ret = -EFAULT;
481 goto out;
482 }
483 relayfs_read_consume(buf, read_start, count);
484 *ppos = relayfs_read_end_pos(buf, read_start, count);
485out:
486 up(&inode->i_sem);
487 return ret;
488}
489
490/**
491 * relayfs alloc_inode() implementation
492 */
493static struct inode *relayfs_alloc_inode(struct super_block *sb)
494{
495 struct relayfs_inode_info *p = kmem_cache_alloc(relayfs_inode_cachep, SLAB_KERNEL);
496 if (!p)
497 return NULL;
498 p->buf = NULL;
499
500 return &p->vfs_inode;
501}
502
503/**
504 * relayfs destroy_inode() implementation
505 */
506static void relayfs_destroy_inode(struct inode *inode)
507{
508 if (RELAYFS_I(inode)->buf)
509 relay_destroy_buf(RELAYFS_I(inode)->buf);
510
511 kmem_cache_free(relayfs_inode_cachep, RELAYFS_I(inode));
512}
513
514static void init_once(void *p, kmem_cache_t *cachep, unsigned long flags)
515{
516 struct relayfs_inode_info *i = p;
517 if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR)
518 inode_init_once(&i->vfs_inode);
519}
520
521struct file_operations relayfs_file_operations = {
522 .open = relayfs_open,
523 .poll = relayfs_poll,
524 .mmap = relayfs_mmap,
525 .read = relayfs_read,
526 .llseek = no_llseek,
527 .release = relayfs_release,
528};
529
530static struct super_operations relayfs_ops = {
531 .statfs = simple_statfs,
532 .drop_inode = generic_delete_inode,
533 .alloc_inode = relayfs_alloc_inode,
534 .destroy_inode = relayfs_destroy_inode,
535};
536
537static int relayfs_fill_super(struct super_block * sb, void * data, int silent)
538{
539 struct inode *inode;
540 struct dentry *root;
541 int mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
542
543 sb->s_blocksize = PAGE_CACHE_SIZE;
544 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
545 sb->s_magic = RELAYFS_MAGIC;
546 sb->s_op = &relayfs_ops;
547 inode = relayfs_get_inode(sb, mode, NULL);
548
549 if (!inode)
550 return -ENOMEM;
551
552 root = d_alloc_root(inode);
553 if (!root) {
554 iput(inode);
555 return -ENOMEM;
556 }
557 sb->s_root = root;
558
559 return 0;
560}
561
562static struct super_block * relayfs_get_sb(struct file_system_type *fs_type,
563 int flags, const char *dev_name,
564 void *data)
565{
566 return get_sb_single(fs_type, flags, data, relayfs_fill_super);
567}
568
569static struct file_system_type relayfs_fs_type = {
570 .owner = THIS_MODULE,
571 .name = "relayfs",
572 .get_sb = relayfs_get_sb,
573 .kill_sb = kill_litter_super,
574};
575
576static int __init init_relayfs_fs(void)
577{
578 int err;
579
580 relayfs_inode_cachep = kmem_cache_create("relayfs_inode_cache",
581 sizeof(struct relayfs_inode_info), 0,
582 0, init_once, NULL);
583 if (!relayfs_inode_cachep)
584 return -ENOMEM;
585
586 err = register_filesystem(&relayfs_fs_type);
587 if (err)
588 kmem_cache_destroy(relayfs_inode_cachep);
589
590 return err;
591}
592
593static void __exit exit_relayfs_fs(void)
594{
595 unregister_filesystem(&relayfs_fs_type);
596 kmem_cache_destroy(relayfs_inode_cachep);
597}
598
599module_init(init_relayfs_fs)
600module_exit(exit_relayfs_fs)
601
602EXPORT_SYMBOL_GPL(relayfs_file_operations);
603EXPORT_SYMBOL_GPL(relayfs_create_dir);
604EXPORT_SYMBOL_GPL(relayfs_remove_dir);
605
606MODULE_AUTHOR("Tom Zanussi <zanussi@us.ibm.com> and Karim Yaghmour <karim@opersys.com>");
607MODULE_DESCRIPTION("Relay Filesystem");
608MODULE_LICENSE("GPL");
609
diff --git a/fs/relayfs/relay.c b/fs/relayfs/relay.c
new file mode 100644
index 000000000000..16446a15c96d
--- /dev/null
+++ b/fs/relayfs/relay.c
@@ -0,0 +1,431 @@
1/*
2 * Public API and common code for RelayFS.
3 *
4 * See Documentation/filesystems/relayfs.txt for an overview of relayfs.
5 *
6 * Copyright (C) 2002-2005 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp
7 * Copyright (C) 1999-2005 - Karim Yaghmour (karim@opersys.com)
8 *
9 * This file is released under the GPL.
10 */
11
12#include <linux/errno.h>
13#include <linux/stddef.h>
14#include <linux/slab.h>
15#include <linux/module.h>
16#include <linux/string.h>
17#include <linux/relayfs_fs.h>
18#include "relay.h"
19#include "buffers.h"
20
21/**
22 * relay_buf_empty - boolean, is the channel buffer empty?
23 * @buf: channel buffer
24 *
25 * Returns 1 if the buffer is empty, 0 otherwise.
26 */
27int relay_buf_empty(struct rchan_buf *buf)
28{
29 return (buf->subbufs_produced - buf->subbufs_consumed) ? 0 : 1;
30}
31
32/**
33 * relay_buf_full - boolean, is the channel buffer full?
34 * @buf: channel buffer
35 *
36 * Returns 1 if the buffer is full, 0 otherwise.
37 */
38int relay_buf_full(struct rchan_buf *buf)
39{
40 size_t ready = buf->subbufs_produced - buf->subbufs_consumed;
41 return (ready >= buf->chan->n_subbufs) ? 1 : 0;
42}
43
44/*
45 * High-level relayfs kernel API and associated functions.
46 */
47
48/*
49 * rchan_callback implementations defining default channel behavior. Used
50 * in place of corresponding NULL values in client callback struct.
51 */
52
53/*
54 * subbuf_start() default callback. Does nothing.
55 */
56static int subbuf_start_default_callback (struct rchan_buf *buf,
57 void *subbuf,
58 void *prev_subbuf,
59 size_t prev_padding)
60{
61 if (relay_buf_full(buf))
62 return 0;
63
64 return 1;
65}
66
67/*
68 * buf_mapped() default callback. Does nothing.
69 */
70static void buf_mapped_default_callback(struct rchan_buf *buf,
71 struct file *filp)
72{
73}
74
75/*
76 * buf_unmapped() default callback. Does nothing.
77 */
78static void buf_unmapped_default_callback(struct rchan_buf *buf,
79 struct file *filp)
80{
81}
82
83/* relay channel default callbacks */
84static struct rchan_callbacks default_channel_callbacks = {
85 .subbuf_start = subbuf_start_default_callback,
86 .buf_mapped = buf_mapped_default_callback,
87 .buf_unmapped = buf_unmapped_default_callback,
88};
89
90/**
91 * wakeup_readers - wake up readers waiting on a channel
92 * @private: the channel buffer
93 *
94 * This is the work function used to defer reader waking. The
95 * reason waking is deferred is that calling directly from write
96 * causes problems if you're writing from say the scheduler.
97 */
98static void wakeup_readers(void *private)
99{
100 struct rchan_buf *buf = private;
101 wake_up_interruptible(&buf->read_wait);
102}
103
104/**
105 * __relay_reset - reset a channel buffer
106 * @buf: the channel buffer
107 * @init: 1 if this is a first-time initialization
108 *
109 * See relay_reset for description of effect.
110 */
111static inline void __relay_reset(struct rchan_buf *buf, unsigned int init)
112{
113 size_t i;
114
115 if (init) {
116 init_waitqueue_head(&buf->read_wait);
117 kref_init(&buf->kref);
118 INIT_WORK(&buf->wake_readers, NULL, NULL);
119 } else {
120 cancel_delayed_work(&buf->wake_readers);
121 flush_scheduled_work();
122 }
123
124 buf->subbufs_produced = 0;
125 buf->subbufs_consumed = 0;
126 buf->bytes_consumed = 0;
127 buf->finalized = 0;
128 buf->data = buf->start;
129 buf->offset = 0;
130
131 for (i = 0; i < buf->chan->n_subbufs; i++)
132 buf->padding[i] = 0;
133
134 buf->chan->cb->subbuf_start(buf, buf->data, NULL, 0);
135}
136
137/**
138 * relay_reset - reset the channel
139 * @chan: the channel
140 *
141 * This has the effect of erasing all data from all channel buffers
142 * and restarting the channel in its initial state. The buffers
143 * are not freed, so any mappings are still in effect.
144 *
145 * NOTE: Care should be taken that the channel isn't actually
146 * being used by anything when this call is made.
147 */
148void relay_reset(struct rchan *chan)
149{
150 unsigned int i;
151
152 if (!chan)
153 return;
154
155 for (i = 0; i < NR_CPUS; i++) {
156 if (!chan->buf[i])
157 continue;
158 __relay_reset(chan->buf[i], 0);
159 }
160}
161
162/**
163 * relay_open_buf - create a new channel buffer in relayfs
164 *
165 * Internal - used by relay_open().
166 */
167static struct rchan_buf *relay_open_buf(struct rchan *chan,
168 const char *filename,
169 struct dentry *parent)
170{
171 struct rchan_buf *buf;
172 struct dentry *dentry;
173
174 /* Create file in fs */
175 dentry = relayfs_create_file(filename, parent, S_IRUSR, chan);
176 if (!dentry)
177 return NULL;
178
179 buf = RELAYFS_I(dentry->d_inode)->buf;
180 buf->dentry = dentry;
181 __relay_reset(buf, 1);
182
183 return buf;
184}
185
186/**
187 * relay_close_buf - close a channel buffer
188 * @buf: channel buffer
189 *
190 * Marks the buffer finalized and restores the default callbacks.
191 * The channel buffer and channel buffer data structure are then freed
192 * automatically when the last reference is given up.
193 */
194static inline void relay_close_buf(struct rchan_buf *buf)
195{
196 buf->finalized = 1;
197 buf->chan->cb = &default_channel_callbacks;
198 cancel_delayed_work(&buf->wake_readers);
199 flush_scheduled_work();
200 kref_put(&buf->kref, relay_remove_buf);
201}
202
203static inline void setup_callbacks(struct rchan *chan,
204 struct rchan_callbacks *cb)
205{
206 if (!cb) {
207 chan->cb = &default_channel_callbacks;
208 return;
209 }
210
211 if (!cb->subbuf_start)
212 cb->subbuf_start = subbuf_start_default_callback;
213 if (!cb->buf_mapped)
214 cb->buf_mapped = buf_mapped_default_callback;
215 if (!cb->buf_unmapped)
216 cb->buf_unmapped = buf_unmapped_default_callback;
217 chan->cb = cb;
218}
219
220/**
221 * relay_open - create a new relayfs channel
222 * @base_filename: base name of files to create
223 * @parent: dentry of parent directory, NULL for root directory
224 * @subbuf_size: size of sub-buffers
225 * @n_subbufs: number of sub-buffers
226 * @cb: client callback functions
227 *
228 * Returns channel pointer if successful, NULL otherwise.
229 *
230 * Creates a channel buffer for each cpu using the sizes and
231 * attributes specified. The created channel buffer files
232 * will be named base_filename0...base_filenameN-1. File
233 * permissions will be S_IRUSR.
234 */
235struct rchan *relay_open(const char *base_filename,
236 struct dentry *parent,
237 size_t subbuf_size,
238 size_t n_subbufs,
239 struct rchan_callbacks *cb)
240{
241 unsigned int i;
242 struct rchan *chan;
243 char *tmpname;
244
245 if (!base_filename)
246 return NULL;
247
248 if (!(subbuf_size && n_subbufs))
249 return NULL;
250
251 chan = kcalloc(1, sizeof(struct rchan), GFP_KERNEL);
252 if (!chan)
253 return NULL;
254
255 chan->version = RELAYFS_CHANNEL_VERSION;
256 chan->n_subbufs = n_subbufs;
257 chan->subbuf_size = subbuf_size;
258 chan->alloc_size = FIX_SIZE(subbuf_size * n_subbufs);
259 setup_callbacks(chan, cb);
260 kref_init(&chan->kref);
261
262 tmpname = kmalloc(NAME_MAX + 1, GFP_KERNEL);
263 if (!tmpname)
264 goto free_chan;
265
266 for_each_online_cpu(i) {
267 sprintf(tmpname, "%s%d", base_filename, i);
268 chan->buf[i] = relay_open_buf(chan, tmpname, parent);
269 chan->buf[i]->cpu = i;
270 if (!chan->buf[i])
271 goto free_bufs;
272 }
273
274 kfree(tmpname);
275 return chan;
276
277free_bufs:
278 for (i = 0; i < NR_CPUS; i++) {
279 if (!chan->buf[i])
280 break;
281 relay_close_buf(chan->buf[i]);
282 }
283 kfree(tmpname);
284
285free_chan:
286 kref_put(&chan->kref, relay_destroy_channel);
287 return NULL;
288}
289
290/**
291 * relay_switch_subbuf - switch to a new sub-buffer
292 * @buf: channel buffer
293 * @length: size of current event
294 *
295 * Returns either the length passed in or 0 if full.
296
297 * Performs sub-buffer-switch tasks such as invoking callbacks,
298 * updating padding counts, waking up readers, etc.
299 */
300size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length)
301{
302 void *old, *new;
303 size_t old_subbuf, new_subbuf;
304
305 if (unlikely(length > buf->chan->subbuf_size))
306 goto toobig;
307
308 if (buf->offset != buf->chan->subbuf_size + 1) {
309 buf->prev_padding = buf->chan->subbuf_size - buf->offset;
310 old_subbuf = buf->subbufs_produced % buf->chan->n_subbufs;
311 buf->padding[old_subbuf] = buf->prev_padding;
312 buf->subbufs_produced++;
313 if (waitqueue_active(&buf->read_wait)) {
314 PREPARE_WORK(&buf->wake_readers, wakeup_readers, buf);
315 schedule_delayed_work(&buf->wake_readers, 1);
316 }
317 }
318
319 old = buf->data;
320 new_subbuf = buf->subbufs_produced % buf->chan->n_subbufs;
321 new = buf->start + new_subbuf * buf->chan->subbuf_size;
322 buf->offset = 0;
323 if (!buf->chan->cb->subbuf_start(buf, new, old, buf->prev_padding)) {
324 buf->offset = buf->chan->subbuf_size + 1;
325 return 0;
326 }
327 buf->data = new;
328 buf->padding[new_subbuf] = 0;
329
330 if (unlikely(length + buf->offset > buf->chan->subbuf_size))
331 goto toobig;
332
333 return length;
334
335toobig:
336 printk(KERN_WARNING "relayfs: event too large (%Zd)\n", length);
337 WARN_ON(1);
338 return 0;
339}
340
341/**
342 * relay_subbufs_consumed - update the buffer's sub-buffers-consumed count
343 * @chan: the channel
344 * @cpu: the cpu associated with the channel buffer to update
345 * @subbufs_consumed: number of sub-buffers to add to current buf's count
346 *
347 * Adds to the channel buffer's consumed sub-buffer count.
348 * subbufs_consumed should be the number of sub-buffers newly consumed,
349 * not the total consumed.
350 *
351 * NOTE: kernel clients don't need to call this function if the channel
352 * mode is 'overwrite'.
353 */
354void relay_subbufs_consumed(struct rchan *chan,
355 unsigned int cpu,
356 size_t subbufs_consumed)
357{
358 struct rchan_buf *buf;
359
360 if (!chan)
361 return;
362
363 if (cpu >= NR_CPUS || !chan->buf[cpu])
364 return;
365
366 buf = chan->buf[cpu];
367 buf->subbufs_consumed += subbufs_consumed;
368 if (buf->subbufs_consumed > buf->subbufs_produced)
369 buf->subbufs_consumed = buf->subbufs_produced;
370}
371
372/**
373 * relay_destroy_channel - free the channel struct
374 *
375 * Should only be called from kref_put().
376 */
377void relay_destroy_channel(struct kref *kref)
378{
379 struct rchan *chan = container_of(kref, struct rchan, kref);
380 kfree(chan);
381}
382
383/**
384 * relay_close - close the channel
385 * @chan: the channel
386 *
387 * Closes all channel buffers and frees the channel.
388 */
389void relay_close(struct rchan *chan)
390{
391 unsigned int i;
392
393 if (!chan)
394 return;
395
396 for (i = 0; i < NR_CPUS; i++) {
397 if (!chan->buf[i])
398 continue;
399 relay_close_buf(chan->buf[i]);
400 }
401
402 kref_put(&chan->kref, relay_destroy_channel);
403}
404
405/**
406 * relay_flush - close the channel
407 * @chan: the channel
408 *
409 * Flushes all channel buffers i.e. forces buffer switch.
410 */
411void relay_flush(struct rchan *chan)
412{
413 unsigned int i;
414
415 if (!chan)
416 return;
417
418 for (i = 0; i < NR_CPUS; i++) {
419 if (!chan->buf[i])
420 continue;
421 relay_switch_subbuf(chan->buf[i], 0);
422 }
423}
424
425EXPORT_SYMBOL_GPL(relay_open);
426EXPORT_SYMBOL_GPL(relay_close);
427EXPORT_SYMBOL_GPL(relay_flush);
428EXPORT_SYMBOL_GPL(relay_reset);
429EXPORT_SYMBOL_GPL(relay_subbufs_consumed);
430EXPORT_SYMBOL_GPL(relay_switch_subbuf);
431EXPORT_SYMBOL_GPL(relay_buf_full);
diff --git a/fs/relayfs/relay.h b/fs/relayfs/relay.h
new file mode 100644
index 000000000000..703503fa22b6
--- /dev/null
+++ b/fs/relayfs/relay.h
@@ -0,0 +1,12 @@
1#ifndef _RELAY_H
2#define _RELAY_H
3
4struct dentry *relayfs_create_file(const char *name,
5 struct dentry *parent,
6 int mode,
7 struct rchan *chan);
8extern int relayfs_remove(struct dentry *dentry);
9extern int relay_buf_empty(struct rchan_buf *buf);
10extern void relay_destroy_channel(struct kref *kref);
11
12#endif /* _RELAY_H */
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 997640c99c7d..faf1512173eb 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -114,8 +114,7 @@ void ufs_free_fragments (struct inode * inode, unsigned fragment, unsigned count
114 ubh_mark_buffer_dirty (USPI_UBH); 114 ubh_mark_buffer_dirty (USPI_UBH);
115 ubh_mark_buffer_dirty (UCPI_UBH); 115 ubh_mark_buffer_dirty (UCPI_UBH);
116 if (sb->s_flags & MS_SYNCHRONOUS) { 116 if (sb->s_flags & MS_SYNCHRONOUS) {
117 ubh_wait_on_buffer (UCPI_UBH); 117 ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **)&ucpi);
118 ubh_ll_rw_block (WRITE, 1, (struct ufs_buffer_head **)&ucpi);
119 ubh_wait_on_buffer (UCPI_UBH); 118 ubh_wait_on_buffer (UCPI_UBH);
120 } 119 }
121 sb->s_dirt = 1; 120 sb->s_dirt = 1;
@@ -200,8 +199,7 @@ do_more:
200 ubh_mark_buffer_dirty (USPI_UBH); 199 ubh_mark_buffer_dirty (USPI_UBH);
201 ubh_mark_buffer_dirty (UCPI_UBH); 200 ubh_mark_buffer_dirty (UCPI_UBH);
202 if (sb->s_flags & MS_SYNCHRONOUS) { 201 if (sb->s_flags & MS_SYNCHRONOUS) {
203 ubh_wait_on_buffer (UCPI_UBH); 202 ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **)&ucpi);
204 ubh_ll_rw_block (WRITE, 1, (struct ufs_buffer_head **)&ucpi);
205 ubh_wait_on_buffer (UCPI_UBH); 203 ubh_wait_on_buffer (UCPI_UBH);
206 } 204 }
207 205
@@ -459,8 +457,7 @@ ufs_add_fragments (struct inode * inode, unsigned fragment,
459 ubh_mark_buffer_dirty (USPI_UBH); 457 ubh_mark_buffer_dirty (USPI_UBH);
460 ubh_mark_buffer_dirty (UCPI_UBH); 458 ubh_mark_buffer_dirty (UCPI_UBH);
461 if (sb->s_flags & MS_SYNCHRONOUS) { 459 if (sb->s_flags & MS_SYNCHRONOUS) {
462 ubh_wait_on_buffer (UCPI_UBH); 460 ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **)&ucpi);
463 ubh_ll_rw_block (WRITE, 1, (struct ufs_buffer_head **)&ucpi);
464 ubh_wait_on_buffer (UCPI_UBH); 461 ubh_wait_on_buffer (UCPI_UBH);
465 } 462 }
466 sb->s_dirt = 1; 463 sb->s_dirt = 1;
@@ -585,8 +582,7 @@ succed:
585 ubh_mark_buffer_dirty (USPI_UBH); 582 ubh_mark_buffer_dirty (USPI_UBH);
586 ubh_mark_buffer_dirty (UCPI_UBH); 583 ubh_mark_buffer_dirty (UCPI_UBH);
587 if (sb->s_flags & MS_SYNCHRONOUS) { 584 if (sb->s_flags & MS_SYNCHRONOUS) {
588 ubh_wait_on_buffer (UCPI_UBH); 585 ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **)&ucpi);
589 ubh_ll_rw_block (WRITE, 1, (struct ufs_buffer_head **)&ucpi);
590 ubh_wait_on_buffer (UCPI_UBH); 586 ubh_wait_on_buffer (UCPI_UBH);
591 } 587 }
592 sb->s_dirt = 1; 588 sb->s_dirt = 1;
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 61a6b1542fc5..0938945b9cbc 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -124,8 +124,7 @@ void ufs_free_inode (struct inode * inode)
124 ubh_mark_buffer_dirty (USPI_UBH); 124 ubh_mark_buffer_dirty (USPI_UBH);
125 ubh_mark_buffer_dirty (UCPI_UBH); 125 ubh_mark_buffer_dirty (UCPI_UBH);
126 if (sb->s_flags & MS_SYNCHRONOUS) { 126 if (sb->s_flags & MS_SYNCHRONOUS) {
127 ubh_wait_on_buffer (UCPI_UBH); 127 ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **) &ucpi);
128 ubh_ll_rw_block (WRITE, 1, (struct ufs_buffer_head **) &ucpi);
129 ubh_wait_on_buffer (UCPI_UBH); 128 ubh_wait_on_buffer (UCPI_UBH);
130 } 129 }
131 130
@@ -249,8 +248,7 @@ cg_found:
249 ubh_mark_buffer_dirty (USPI_UBH); 248 ubh_mark_buffer_dirty (USPI_UBH);
250 ubh_mark_buffer_dirty (UCPI_UBH); 249 ubh_mark_buffer_dirty (UCPI_UBH);
251 if (sb->s_flags & MS_SYNCHRONOUS) { 250 if (sb->s_flags & MS_SYNCHRONOUS) {
252 ubh_wait_on_buffer (UCPI_UBH); 251 ubh_ll_rw_block (SWRITE, 1, (struct ufs_buffer_head **) &ucpi);
253 ubh_ll_rw_block (WRITE, 1, (struct ufs_buffer_head **) &ucpi);
254 ubh_wait_on_buffer (UCPI_UBH); 252 ubh_wait_on_buffer (UCPI_UBH);
255 } 253 }
256 sb->s_dirt = 1; 254 sb->s_dirt = 1;
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index e312bf8bad9f..61d2e35012a4 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -285,8 +285,7 @@ next:;
285 } 285 }
286 } 286 }
287 if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) { 287 if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) {
288 ubh_wait_on_buffer (ind_ubh); 288 ubh_ll_rw_block (SWRITE, 1, &ind_ubh);
289 ubh_ll_rw_block (WRITE, 1, &ind_ubh);
290 ubh_wait_on_buffer (ind_ubh); 289 ubh_wait_on_buffer (ind_ubh);
291 } 290 }
292 ubh_brelse (ind_ubh); 291 ubh_brelse (ind_ubh);
@@ -353,8 +352,7 @@ static int ufs_trunc_dindirect (struct inode *inode, unsigned offset, __fs32 *p)
353 } 352 }
354 } 353 }
355 if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) { 354 if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) {
356 ubh_wait_on_buffer (dind_bh); 355 ubh_ll_rw_block (SWRITE, 1, &dind_bh);
357 ubh_ll_rw_block (WRITE, 1, &dind_bh);
358 ubh_wait_on_buffer (dind_bh); 356 ubh_wait_on_buffer (dind_bh);
359 } 357 }
360 ubh_brelse (dind_bh); 358 ubh_brelse (dind_bh);
@@ -418,8 +416,7 @@ static int ufs_trunc_tindirect (struct inode * inode)
418 } 416 }
419 } 417 }
420 if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) { 418 if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) {
421 ubh_wait_on_buffer (tind_bh); 419 ubh_ll_rw_block (SWRITE, 1, &tind_bh);
422 ubh_ll_rw_block (WRITE, 1, &tind_bh);
423 ubh_wait_on_buffer (tind_bh); 420 ubh_wait_on_buffer (tind_bh);
424 } 421 }
425 ubh_brelse (tind_bh); 422 ubh_brelse (tind_bh);
diff --git a/fs/umsdos/notes b/fs/umsdos/notes
deleted file mode 100644
index 3c47d1f4fc47..000000000000
--- a/fs/umsdos/notes
+++ /dev/null
@@ -1,17 +0,0 @@
1This file contain idea and things I don't want to forget
2
3Possible bug in fs/read_write.c
4Function sys_readdir()
5
6 There is a call the verify_area that does not take in account
7 the count parameter. I guess it should read
8
9 error = verify_area(VERIFY_WRITE, dirent, count*sizeof (*dirent));
10
11 instead of
12
13 error = verify_area(VERIFY_WRITE, dirent, sizeof (*dirent));
14
15 Of course, now , count is always 1
16
17
diff --git a/fs/xattr.c b/fs/xattr.c
index 6acd5c63da91..3f9c64bea151 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -51,20 +51,29 @@ setxattr(struct dentry *d, char __user *name, void __user *value,
51 } 51 }
52 } 52 }
53 53
54 down(&d->d_inode->i_sem);
55 error = security_inode_setxattr(d, kname, kvalue, size, flags);
56 if (error)
57 goto out;
54 error = -EOPNOTSUPP; 58 error = -EOPNOTSUPP;
55 if (d->d_inode->i_op && d->d_inode->i_op->setxattr) { 59 if (d->d_inode->i_op && d->d_inode->i_op->setxattr) {
56 down(&d->d_inode->i_sem); 60 error = d->d_inode->i_op->setxattr(d, kname, kvalue,
57 error = security_inode_setxattr(d, kname, kvalue, size, flags); 61 size, flags);
58 if (error)
59 goto out;
60 error = d->d_inode->i_op->setxattr(d, kname, kvalue, size, flags);
61 if (!error) { 62 if (!error) {
62 fsnotify_xattr(d); 63 fsnotify_xattr(d);
63 security_inode_post_setxattr(d, kname, kvalue, size, flags); 64 security_inode_post_setxattr(d, kname, kvalue,
65 size, flags);
64 } 66 }
65out: 67 } else if (!strncmp(kname, XATTR_SECURITY_PREFIX,
66 up(&d->d_inode->i_sem); 68 sizeof XATTR_SECURITY_PREFIX - 1)) {
69 const char *suffix = kname + sizeof XATTR_SECURITY_PREFIX - 1;
70 error = security_inode_setsecurity(d->d_inode, suffix, kvalue,
71 size, flags);
72 if (!error)
73 fsnotify_xattr(d);
67 } 74 }
75out:
76 up(&d->d_inode->i_sem);
68 if (kvalue) 77 if (kvalue)
69 kfree(kvalue); 78 kfree(kvalue);
70 return error; 79 return error;
@@ -139,20 +148,25 @@ getxattr(struct dentry *d, char __user *name, void __user *value, size_t size)
139 return -ENOMEM; 148 return -ENOMEM;
140 } 149 }
141 150
151 error = security_inode_getxattr(d, kname);
152 if (error)
153 goto out;
142 error = -EOPNOTSUPP; 154 error = -EOPNOTSUPP;
143 if (d->d_inode->i_op && d->d_inode->i_op->getxattr) { 155 if (d->d_inode->i_op && d->d_inode->i_op->getxattr)
144 error = security_inode_getxattr(d, kname);
145 if (error)
146 goto out;
147 error = d->d_inode->i_op->getxattr(d, kname, kvalue, size); 156 error = d->d_inode->i_op->getxattr(d, kname, kvalue, size);
148 if (error > 0) { 157 else if (!strncmp(kname, XATTR_SECURITY_PREFIX,
149 if (size && copy_to_user(value, kvalue, error)) 158 sizeof XATTR_SECURITY_PREFIX - 1)) {
150 error = -EFAULT; 159 const char *suffix = kname + sizeof XATTR_SECURITY_PREFIX - 1;
151 } else if (error == -ERANGE && size >= XATTR_SIZE_MAX) { 160 error = security_inode_getsecurity(d->d_inode, suffix, kvalue,
152 /* The file system tried to returned a value bigger 161 size);
153 than XATTR_SIZE_MAX bytes. Not possible. */ 162 }
154 error = -E2BIG; 163 if (error > 0) {
155 } 164 if (size && copy_to_user(value, kvalue, error))
165 error = -EFAULT;
166 } else if (error == -ERANGE && size >= XATTR_SIZE_MAX) {
167 /* The file system tried to returned a value bigger
168 than XATTR_SIZE_MAX bytes. Not possible. */
169 error = -E2BIG;
156 } 170 }
157out: 171out:
158 if (kvalue) 172 if (kvalue)
@@ -221,20 +235,24 @@ listxattr(struct dentry *d, char __user *list, size_t size)
221 return -ENOMEM; 235 return -ENOMEM;
222 } 236 }
223 237
238 error = security_inode_listxattr(d);
239 if (error)
240 goto out;
224 error = -EOPNOTSUPP; 241 error = -EOPNOTSUPP;
225 if (d->d_inode->i_op && d->d_inode->i_op->listxattr) { 242 if (d->d_inode->i_op && d->d_inode->i_op->listxattr) {
226 error = security_inode_listxattr(d);
227 if (error)
228 goto out;
229 error = d->d_inode->i_op->listxattr(d, klist, size); 243 error = d->d_inode->i_op->listxattr(d, klist, size);
230 if (error > 0) { 244 } else {
231 if (size && copy_to_user(list, klist, error)) 245 error = security_inode_listsecurity(d->d_inode, klist, size);
232 error = -EFAULT; 246 if (size && error >= size)
233 } else if (error == -ERANGE && size >= XATTR_LIST_MAX) { 247 error = -ERANGE;
234 /* The file system tried to returned a list bigger 248 }
235 than XATTR_LIST_MAX bytes. Not possible. */ 249 if (error > 0) {
236 error = -E2BIG; 250 if (size && copy_to_user(list, klist, error))
237 } 251 error = -EFAULT;
252 } else if (error == -ERANGE && size >= XATTR_LIST_MAX) {
253 /* The file system tried to returned a list bigger
254 than XATTR_LIST_MAX bytes. Not possible. */
255 error = -E2BIG;
238 } 256 }
239out: 257out:
240 if (klist) 258 if (klist)
@@ -307,6 +325,8 @@ removexattr(struct dentry *d, char __user *name)
307 down(&d->d_inode->i_sem); 325 down(&d->d_inode->i_sem);
308 error = d->d_inode->i_op->removexattr(d, kname); 326 error = d->d_inode->i_op->removexattr(d, kname);
309 up(&d->d_inode->i_sem); 327 up(&d->d_inode->i_sem);
328 if (!error)
329 fsnotify_xattr(d);
310 } 330 }
311out: 331out:
312 return error; 332 return error;
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index d3ff78354638..49e3e7e5e3dc 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -1,150 +1 @@
1# include $(TOPDIR)/fs/xfs/Makefile-linux-$(VERSION).$(PATCHLEVEL)
2# Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
3#
4# This program is free software; you can redistribute it and/or modify it
5# under the terms of version 2 of the GNU General Public License as
6# published by the Free Software Foundation.
7#
8# This program is distributed in the hope that it would be useful, but
9# WITHOUT ANY WARRANTY; without even the implied warranty of
10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11#
12# Further, this software is distributed without any warranty that it is
13# free of the rightful claim of any third person regarding infringement
14# or the like. Any license provided herein, whether implied or
15# otherwise, applies only to this software file. Patent licenses, if
16# any, provided herein do not apply to combinations of this program with
17# other software, or any other product whatsoever.
18#
19# You should have received a copy of the GNU General Public License along
20# with this program; if not, write the Free Software Foundation, Inc., 59
21# Temple Place - Suite 330, Boston MA 02111-1307, USA.
22#
23# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24# Mountain View, CA 94043, or:
25#
26# http://www.sgi.com
27#
28# For further information regarding this notice, see:
29#
30# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31#
32
33EXTRA_CFLAGS += -Ifs/xfs -Ifs/xfs/linux-2.6 -funsigned-char
34
35ifeq ($(CONFIG_XFS_DEBUG),y)
36 EXTRA_CFLAGS += -g -DSTATIC="" -DDEBUG
37 EXTRA_CFLAGS += -DPAGEBUF_LOCK_TRACKING
38endif
39ifeq ($(CONFIG_XFS_TRACE),y)
40 EXTRA_CFLAGS += -DXFS_ALLOC_TRACE
41 EXTRA_CFLAGS += -DXFS_ATTR_TRACE
42 EXTRA_CFLAGS += -DXFS_BLI_TRACE
43 EXTRA_CFLAGS += -DXFS_BMAP_TRACE
44 EXTRA_CFLAGS += -DXFS_BMBT_TRACE
45 EXTRA_CFLAGS += -DXFS_DIR_TRACE
46 EXTRA_CFLAGS += -DXFS_DIR2_TRACE
47 EXTRA_CFLAGS += -DXFS_DQUOT_TRACE
48 EXTRA_CFLAGS += -DXFS_ILOCK_TRACE
49 EXTRA_CFLAGS += -DXFS_LOG_TRACE
50 EXTRA_CFLAGS += -DXFS_RW_TRACE
51 EXTRA_CFLAGS += -DPAGEBUF_TRACE
52 EXTRA_CFLAGS += -DXFS_VNODE_TRACE
53endif
54
55obj-$(CONFIG_XFS_FS) += xfs.o
56
57xfs-$(CONFIG_XFS_QUOTA) += $(addprefix quota/, \
58 xfs_dquot.o \
59 xfs_dquot_item.o \
60 xfs_trans_dquot.o \
61 xfs_qm_syscalls.o \
62 xfs_qm_bhv.o \
63 xfs_qm.o)
64ifeq ($(CONFIG_XFS_QUOTA),y)
65xfs-$(CONFIG_PROC_FS) += quota/xfs_qm_stats.o
66endif
67
68xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o
69xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
70xfs-$(CONFIG_PROC_FS) += linux-2.6/xfs_stats.o
71xfs-$(CONFIG_SYSCTL) += linux-2.6/xfs_sysctl.o
72xfs-$(CONFIG_COMPAT) += linux-2.6/xfs_ioctl32.o
73xfs-$(CONFIG_XFS_EXPORT) += linux-2.6/xfs_export.o
74
75
76xfs-y += xfs_alloc.o \
77 xfs_alloc_btree.o \
78 xfs_attr.o \
79 xfs_attr_leaf.o \
80 xfs_behavior.o \
81 xfs_bit.o \
82 xfs_bmap.o \
83 xfs_bmap_btree.o \
84 xfs_btree.o \
85 xfs_buf_item.o \
86 xfs_da_btree.o \
87 xfs_dir.o \
88 xfs_dir2.o \
89 xfs_dir2_block.o \
90 xfs_dir2_data.o \
91 xfs_dir2_leaf.o \
92 xfs_dir2_node.o \
93 xfs_dir2_sf.o \
94 xfs_dir_leaf.o \
95 xfs_error.o \
96 xfs_extfree_item.o \
97 xfs_fsops.o \
98 xfs_ialloc.o \
99 xfs_ialloc_btree.o \
100 xfs_iget.o \
101 xfs_inode.o \
102 xfs_inode_item.o \
103 xfs_iocore.o \
104 xfs_iomap.o \
105 xfs_itable.o \
106 xfs_dfrag.o \
107 xfs_log.o \
108 xfs_log_recover.o \
109 xfs_macros.o \
110 xfs_mount.o \
111 xfs_rename.o \
112 xfs_trans.o \
113 xfs_trans_ail.o \
114 xfs_trans_buf.o \
115 xfs_trans_extfree.o \
116 xfs_trans_inode.o \
117 xfs_trans_item.o \
118 xfs_utils.o \
119 xfs_vfsops.o \
120 xfs_vnodeops.o \
121 xfs_rw.o \
122 xfs_dmops.o \
123 xfs_qmops.o
124
125xfs-$(CONFIG_XFS_TRACE) += xfs_dir2_trace.o
126
127# Objects in linux-2.6/
128xfs-y += $(addprefix linux-2.6/, \
129 kmem.o \
130 xfs_aops.o \
131 xfs_buf.o \
132 xfs_file.o \
133 xfs_fs_subr.o \
134 xfs_globals.o \
135 xfs_ioctl.o \
136 xfs_iops.o \
137 xfs_lrw.o \
138 xfs_super.o \
139 xfs_vfs.o \
140 xfs_vnode.o)
141
142# Objects in support/
143xfs-y += $(addprefix support/, \
144 debug.o \
145 move.o \
146 qsort.o \
147 uuid.o)
148
149xfs-$(CONFIG_XFS_TRACE) += support/ktrace.o
150
diff --git a/fs/xfs/Makefile-linux-2.6 b/fs/xfs/Makefile-linux-2.6
new file mode 100644
index 000000000000..fbfcbe5a7cda
--- /dev/null
+++ b/fs/xfs/Makefile-linux-2.6
@@ -0,0 +1,141 @@
1#
2# Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
3#
4# This program is free software; you can redistribute it and/or modify it
5# under the terms of version 2 of the GNU General Public License as
6# published by the Free Software Foundation.
7#
8# This program is distributed in the hope that it would be useful, but
9# WITHOUT ANY WARRANTY; without even the implied warranty of
10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11#
12# Further, this software is distributed without any warranty that it is
13# free of the rightful claim of any third person regarding infringement
14# or the like. Any license provided herein, whether implied or
15# otherwise, applies only to this software file. Patent licenses, if
16# any, provided herein do not apply to combinations of this program with
17# other software, or any other product whatsoever.
18#
19# You should have received a copy of the GNU General Public License along
20# with this program; if not, write the Free Software Foundation, Inc., 59
21# Temple Place - Suite 330, Boston MA 02111-1307, USA.
22#
23# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24# Mountain View, CA 94043, or:
25#
26# http://www.sgi.com
27#
28# For further information regarding this notice, see:
29#
30# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31#
32
33EXTRA_CFLAGS += -Ifs/xfs -Ifs/xfs/linux-2.6 -funsigned-char
34
35XFS_LINUX := linux-2.6
36
37ifeq ($(CONFIG_XFS_DEBUG),y)
38 EXTRA_CFLAGS += -g -DSTATIC="" -DDEBUG
39 EXTRA_CFLAGS += -DPAGEBUF_LOCK_TRACKING
40endif
41ifeq ($(CONFIG_XFS_TRACE),y)
42 EXTRA_CFLAGS += -DXFS_ALLOC_TRACE
43 EXTRA_CFLAGS += -DXFS_ATTR_TRACE
44 EXTRA_CFLAGS += -DXFS_BLI_TRACE
45 EXTRA_CFLAGS += -DXFS_BMAP_TRACE
46 EXTRA_CFLAGS += -DXFS_BMBT_TRACE
47 EXTRA_CFLAGS += -DXFS_DIR_TRACE
48 EXTRA_CFLAGS += -DXFS_DIR2_TRACE
49 EXTRA_CFLAGS += -DXFS_DQUOT_TRACE
50 EXTRA_CFLAGS += -DXFS_ILOCK_TRACE
51 EXTRA_CFLAGS += -DXFS_LOG_TRACE
52 EXTRA_CFLAGS += -DXFS_RW_TRACE
53 EXTRA_CFLAGS += -DPAGEBUF_TRACE
54 EXTRA_CFLAGS += -DXFS_VNODE_TRACE
55endif
56
57obj-$(CONFIG_XFS_FS) += xfs.o
58obj-$(CONFIG_XFS_QUOTA) += quota/
59
60xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o
61xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
62xfs-$(CONFIG_PROC_FS) += $(XFS_LINUX)/xfs_stats.o
63xfs-$(CONFIG_SYSCTL) += $(XFS_LINUX)/xfs_sysctl.o
64xfs-$(CONFIG_COMPAT) += $(XFS_LINUX)/xfs_ioctl32.o
65xfs-$(CONFIG_XFS_EXPORT) += $(XFS_LINUX)/xfs_export.o
66
67
68xfs-y += xfs_alloc.o \
69 xfs_alloc_btree.o \
70 xfs_attr.o \
71 xfs_attr_leaf.o \
72 xfs_behavior.o \
73 xfs_bit.o \
74 xfs_bmap.o \
75 xfs_bmap_btree.o \
76 xfs_btree.o \
77 xfs_buf_item.o \
78 xfs_da_btree.o \
79 xfs_dir.o \
80 xfs_dir2.o \
81 xfs_dir2_block.o \
82 xfs_dir2_data.o \
83 xfs_dir2_leaf.o \
84 xfs_dir2_node.o \
85 xfs_dir2_sf.o \
86 xfs_dir_leaf.o \
87 xfs_error.o \
88 xfs_extfree_item.o \
89 xfs_fsops.o \
90 xfs_ialloc.o \
91 xfs_ialloc_btree.o \
92 xfs_iget.o \
93 xfs_inode.o \
94 xfs_inode_item.o \
95 xfs_iocore.o \
96 xfs_iomap.o \
97 xfs_itable.o \
98 xfs_dfrag.o \
99 xfs_log.o \
100 xfs_log_recover.o \
101 xfs_macros.o \
102 xfs_mount.o \
103 xfs_rename.o \
104 xfs_trans.o \
105 xfs_trans_ail.o \
106 xfs_trans_buf.o \
107 xfs_trans_extfree.o \
108 xfs_trans_inode.o \
109 xfs_trans_item.o \
110 xfs_utils.o \
111 xfs_vfsops.o \
112 xfs_vnodeops.o \
113 xfs_rw.o \
114 xfs_dmops.o \
115 xfs_qmops.o
116
117xfs-$(CONFIG_XFS_TRACE) += xfs_dir2_trace.o
118
119# Objects in linux/
120xfs-y += $(addprefix $(XFS_LINUX)/, \
121 kmem.o \
122 xfs_aops.o \
123 xfs_buf.o \
124 xfs_file.o \
125 xfs_fs_subr.o \
126 xfs_globals.o \
127 xfs_ioctl.o \
128 xfs_iops.o \
129 xfs_lrw.o \
130 xfs_super.o \
131 xfs_vfs.o \
132 xfs_vnode.o)
133
134# Objects in support/
135xfs-y += $(addprefix support/, \
136 debug.o \
137 move.o \
138 uuid.o)
139
140xfs-$(CONFIG_XFS_TRACE) += support/ktrace.o
141
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index 364ea8c386b1..4b184559f231 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -45,11 +45,11 @@
45 45
46 46
47void * 47void *
48kmem_alloc(size_t size, int flags) 48kmem_alloc(size_t size, unsigned int __nocast flags)
49{ 49{
50 int retries = 0; 50 int retries = 0;
51 int lflags = kmem_flags_convert(flags); 51 unsigned int lflags = kmem_flags_convert(flags);
52 void *ptr; 52 void *ptr;
53 53
54 do { 54 do {
55 if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS) 55 if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS)
@@ -67,7 +67,7 @@ kmem_alloc(size_t size, int flags)
67} 67}
68 68
69void * 69void *
70kmem_zalloc(size_t size, int flags) 70kmem_zalloc(size_t size, unsigned int __nocast flags)
71{ 71{
72 void *ptr; 72 void *ptr;
73 73
@@ -89,7 +89,8 @@ kmem_free(void *ptr, size_t size)
89} 89}
90 90
91void * 91void *
92kmem_realloc(void *ptr, size_t newsize, size_t oldsize, int flags) 92kmem_realloc(void *ptr, size_t newsize, size_t oldsize,
93 unsigned int __nocast flags)
93{ 94{
94 void *new; 95 void *new;
95 96
@@ -104,11 +105,11 @@ kmem_realloc(void *ptr, size_t newsize, size_t oldsize, int flags)
104} 105}
105 106
106void * 107void *
107kmem_zone_alloc(kmem_zone_t *zone, int flags) 108kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
108{ 109{
109 int retries = 0; 110 int retries = 0;
110 int lflags = kmem_flags_convert(flags); 111 unsigned int lflags = kmem_flags_convert(flags);
111 void *ptr; 112 void *ptr;
112 113
113 do { 114 do {
114 ptr = kmem_cache_alloc(zone, lflags); 115 ptr = kmem_cache_alloc(zone, lflags);
@@ -123,7 +124,7 @@ kmem_zone_alloc(kmem_zone_t *zone, int flags)
123} 124}
124 125
125void * 126void *
126kmem_zone_zalloc(kmem_zone_t *zone, int flags) 127kmem_zone_zalloc(kmem_zone_t *zone, unsigned int __nocast flags)
127{ 128{
128 void *ptr; 129 void *ptr;
129 130
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index 1397b669b059..109fcf27e256 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -39,10 +39,10 @@
39/* 39/*
40 * memory management routines 40 * memory management routines
41 */ 41 */
42#define KM_SLEEP 0x0001 42#define KM_SLEEP 0x0001u
43#define KM_NOSLEEP 0x0002 43#define KM_NOSLEEP 0x0002u
44#define KM_NOFS 0x0004 44#define KM_NOFS 0x0004u
45#define KM_MAYFAIL 0x0008 45#define KM_MAYFAIL 0x0008u
46 46
47#define kmem_zone kmem_cache_s 47#define kmem_zone kmem_cache_s
48#define kmem_zone_t kmem_cache_t 48#define kmem_zone_t kmem_cache_t
@@ -81,9 +81,9 @@ typedef unsigned long xfs_pflags_t;
81 *(NSTATEP) = *(OSTATEP); \ 81 *(NSTATEP) = *(OSTATEP); \
82} while (0) 82} while (0)
83 83
84static __inline unsigned int kmem_flags_convert(int flags) 84static __inline unsigned int kmem_flags_convert(unsigned int __nocast flags)
85{ 85{
86 int lflags = __GFP_NOWARN; /* we'll report problems, if need be */ 86 unsigned int lflags = __GFP_NOWARN; /* we'll report problems, if need be */
87 87
88#ifdef DEBUG 88#ifdef DEBUG
89 if (unlikely(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL))) { 89 if (unlikely(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL))) {
@@ -125,12 +125,13 @@ kmem_zone_destroy(kmem_zone_t *zone)
125 BUG(); 125 BUG();
126} 126}
127 127
128extern void *kmem_zone_zalloc(kmem_zone_t *, int); 128extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
129extern void *kmem_zone_alloc(kmem_zone_t *, int); 129extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
130 130
131extern void *kmem_alloc(size_t, int); 131extern void *kmem_alloc(size_t, unsigned int __nocast);
132extern void *kmem_realloc(void *, size_t, size_t, int); 132extern void *kmem_realloc(void *, size_t, size_t,
133extern void *kmem_zalloc(size_t, int); 133 unsigned int __nocast);
134extern void *kmem_zalloc(size_t, unsigned int __nocast);
134extern void kmem_free(void *, size_t); 135extern void kmem_free(void *, size_t);
135 136
136typedef struct shrinker *kmem_shaker_t; 137typedef struct shrinker *kmem_shaker_t;
diff --git a/fs/xfs/linux-2.6/spin.h b/fs/xfs/linux-2.6/spin.h
index bcf60a0b8df0..0039504069a5 100644
--- a/fs/xfs/linux-2.6/spin.h
+++ b/fs/xfs/linux-2.6/spin.h
@@ -45,6 +45,9 @@
45typedef spinlock_t lock_t; 45typedef spinlock_t lock_t;
46 46
47#define SPLDECL(s) unsigned long s 47#define SPLDECL(s) unsigned long s
48#ifndef DEFINE_SPINLOCK
49#define DEFINE_SPINLOCK(s) spinlock_t s = SPIN_LOCK_UNLOCKED
50#endif
48 51
49#define spinlock_init(lock, name) spin_lock_init(lock) 52#define spinlock_init(lock, name) spin_lock_init(lock)
50#define spinlock_destroy(lock) 53#define spinlock_destroy(lock)
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index a3a4b5aaf5d9..c6c077978fe3 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -104,66 +104,114 @@ xfs_page_trace(
104#define xfs_page_trace(tag, inode, page, mask) 104#define xfs_page_trace(tag, inode, page, mask)
105#endif 105#endif
106 106
107void 107/*
108linvfs_unwritten_done( 108 * Schedule IO completion handling on a xfsdatad if this was
109 struct buffer_head *bh, 109 * the final hold on this ioend.
110 int uptodate) 110 */
111STATIC void
112xfs_finish_ioend(
113 xfs_ioend_t *ioend)
111{ 114{
112 xfs_buf_t *pb = (xfs_buf_t *)bh->b_private; 115 if (atomic_dec_and_test(&ioend->io_remaining))
116 queue_work(xfsdatad_workqueue, &ioend->io_work);
117}
113 118
114 ASSERT(buffer_unwritten(bh)); 119STATIC void
115 bh->b_end_io = NULL; 120xfs_destroy_ioend(
116 clear_buffer_unwritten(bh); 121 xfs_ioend_t *ioend)
117 if (!uptodate) 122{
118 pagebuf_ioerror(pb, EIO); 123 vn_iowake(ioend->io_vnode);
119 if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) { 124 mempool_free(ioend, xfs_ioend_pool);
120 pagebuf_iodone(pb, 1, 1);
121 }
122 end_buffer_async_write(bh, uptodate);
123} 125}
124 126
125/* 127/*
126 * Issue transactions to convert a buffer range from unwritten 128 * Issue transactions to convert a buffer range from unwritten
127 * to written extents (buffered IO). 129 * to written extents.
128 */ 130 */
129STATIC void 131STATIC void
130linvfs_unwritten_convert( 132xfs_end_bio_unwritten(
131 xfs_buf_t *bp) 133 void *data)
132{ 134{
133 vnode_t *vp = XFS_BUF_FSPRIVATE(bp, vnode_t *); 135 xfs_ioend_t *ioend = data;
134 int error; 136 vnode_t *vp = ioend->io_vnode;
137 xfs_off_t offset = ioend->io_offset;
138 size_t size = ioend->io_size;
139 struct buffer_head *bh, *next;
140 int error;
141
142 if (ioend->io_uptodate)
143 VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error);
144
145 /* ioend->io_buffer_head is only non-NULL for buffered I/O */
146 for (bh = ioend->io_buffer_head; bh; bh = next) {
147 next = bh->b_private;
148
149 bh->b_end_io = NULL;
150 clear_buffer_unwritten(bh);
151 end_buffer_async_write(bh, ioend->io_uptodate);
152 }
135 153
136 BUG_ON(atomic_read(&bp->pb_hold) < 1); 154 xfs_destroy_ioend(ioend);
137 VOP_BMAP(vp, XFS_BUF_OFFSET(bp), XFS_BUF_SIZE(bp),
138 BMAPI_UNWRITTEN, NULL, NULL, error);
139 XFS_BUF_SET_FSPRIVATE(bp, NULL);
140 XFS_BUF_CLR_IODONE_FUNC(bp);
141 XFS_BUF_UNDATAIO(bp);
142 iput(LINVFS_GET_IP(vp));
143 pagebuf_iodone(bp, 0, 0);
144} 155}
145 156
146/* 157/*
147 * Issue transactions to convert a buffer range from unwritten 158 * Allocate and initialise an IO completion structure.
148 * to written extents (direct IO). 159 * We need to track unwritten extent write completion here initially.
160 * We'll need to extend this for updating the ondisk inode size later
161 * (vs. incore size).
149 */ 162 */
150STATIC void 163STATIC xfs_ioend_t *
151linvfs_unwritten_convert_direct( 164xfs_alloc_ioend(
152 struct kiocb *iocb, 165 struct inode *inode)
153 loff_t offset,
154 ssize_t size,
155 void *private)
156{ 166{
157 struct inode *inode = iocb->ki_filp->f_dentry->d_inode; 167 xfs_ioend_t *ioend;
158 ASSERT(!private || inode == (struct inode *)private);
159 168
160 /* private indicates an unwritten extent lay beneath this IO */ 169 ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS);
161 if (private && size > 0) {
162 vnode_t *vp = LINVFS_GET_VP(inode);
163 int error;
164 170
165 VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error); 171 /*
166 } 172 * Set the count to 1 initially, which will prevent an I/O
173 * completion callback from happening before we have started
174 * all the I/O from calling the completion routine too early.
175 */
176 atomic_set(&ioend->io_remaining, 1);
177 ioend->io_uptodate = 1; /* cleared if any I/O fails */
178 ioend->io_vnode = LINVFS_GET_VP(inode);
179 ioend->io_buffer_head = NULL;
180 atomic_inc(&ioend->io_vnode->v_iocount);
181 ioend->io_offset = 0;
182 ioend->io_size = 0;
183
184 INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend);
185
186 return ioend;
187}
188
189void
190linvfs_unwritten_done(
191 struct buffer_head *bh,
192 int uptodate)
193{
194 xfs_ioend_t *ioend = bh->b_private;
195 static spinlock_t unwritten_done_lock = SPIN_LOCK_UNLOCKED;
196 unsigned long flags;
197
198 ASSERT(buffer_unwritten(bh));
199 bh->b_end_io = NULL;
200
201 if (!uptodate)
202 ioend->io_uptodate = 0;
203
204 /*
205 * Deep magic here. We reuse b_private in the buffer_heads to build
206 * a chain for completing the I/O from user context after we've issued
207 * a transaction to convert the unwritten extent.
208 */
209 spin_lock_irqsave(&unwritten_done_lock, flags);
210 bh->b_private = ioend->io_buffer_head;
211 ioend->io_buffer_head = bh;
212 spin_unlock_irqrestore(&unwritten_done_lock, flags);
213
214 xfs_finish_ioend(ioend);
167} 215}
168 216
169STATIC int 217STATIC int
@@ -255,7 +303,7 @@ xfs_probe_unwritten_page(
255 struct address_space *mapping, 303 struct address_space *mapping,
256 pgoff_t index, 304 pgoff_t index,
257 xfs_iomap_t *iomapp, 305 xfs_iomap_t *iomapp,
258 xfs_buf_t *pb, 306 xfs_ioend_t *ioend,
259 unsigned long max_offset, 307 unsigned long max_offset,
260 unsigned long *fsbs, 308 unsigned long *fsbs,
261 unsigned int bbits) 309 unsigned int bbits)
@@ -283,7 +331,7 @@ xfs_probe_unwritten_page(
283 break; 331 break;
284 xfs_map_at_offset(page, bh, p_offset, bbits, iomapp); 332 xfs_map_at_offset(page, bh, p_offset, bbits, iomapp);
285 set_buffer_unwritten_io(bh); 333 set_buffer_unwritten_io(bh);
286 bh->b_private = pb; 334 bh->b_private = ioend;
287 p_offset += bh->b_size; 335 p_offset += bh->b_size;
288 (*fsbs)++; 336 (*fsbs)++;
289 } while ((bh = bh->b_this_page) != head); 337 } while ((bh = bh->b_this_page) != head);
@@ -434,34 +482,15 @@ xfs_map_unwritten(
434{ 482{
435 struct buffer_head *bh = curr; 483 struct buffer_head *bh = curr;
436 xfs_iomap_t *tmp; 484 xfs_iomap_t *tmp;
437 xfs_buf_t *pb; 485 xfs_ioend_t *ioend;
438 loff_t offset, size; 486 loff_t offset;
439 unsigned long nblocks = 0; 487 unsigned long nblocks = 0;
440 488
441 offset = start_page->index; 489 offset = start_page->index;
442 offset <<= PAGE_CACHE_SHIFT; 490 offset <<= PAGE_CACHE_SHIFT;
443 offset += p_offset; 491 offset += p_offset;
444 492
445 /* get an "empty" pagebuf to manage IO completion 493 ioend = xfs_alloc_ioend(inode);
446 * Proper values will be set before returning */
447 pb = pagebuf_lookup(iomapp->iomap_target, 0, 0, 0);
448 if (!pb)
449 return -EAGAIN;
450
451 /* Take a reference to the inode to prevent it from
452 * being reclaimed while we have outstanding unwritten
453 * extent IO on it.
454 */
455 if ((igrab(inode)) != inode) {
456 pagebuf_free(pb);
457 return -EAGAIN;
458 }
459
460 /* Set the count to 1 initially, this will stop an I/O
461 * completion callout which happens before we have started
462 * all the I/O from calling pagebuf_iodone too early.
463 */
464 atomic_set(&pb->pb_io_remaining, 1);
465 494
466 /* First map forwards in the page consecutive buffers 495 /* First map forwards in the page consecutive buffers
467 * covering this unwritten extent 496 * covering this unwritten extent
@@ -474,12 +503,12 @@ xfs_map_unwritten(
474 break; 503 break;
475 xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp); 504 xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp);
476 set_buffer_unwritten_io(bh); 505 set_buffer_unwritten_io(bh);
477 bh->b_private = pb; 506 bh->b_private = ioend;
478 p_offset += bh->b_size; 507 p_offset += bh->b_size;
479 nblocks++; 508 nblocks++;
480 } while ((bh = bh->b_this_page) != head); 509 } while ((bh = bh->b_this_page) != head);
481 510
482 atomic_add(nblocks, &pb->pb_io_remaining); 511 atomic_add(nblocks, &ioend->io_remaining);
483 512
484 /* If we reached the end of the page, map forwards in any 513 /* If we reached the end of the page, map forwards in any
485 * following pages which are also covered by this extent. 514 * following pages which are also covered by this extent.
@@ -496,13 +525,13 @@ xfs_map_unwritten(
496 tloff = min(tlast, tloff); 525 tloff = min(tlast, tloff);
497 for (tindex = start_page->index + 1; tindex < tloff; tindex++) { 526 for (tindex = start_page->index + 1; tindex < tloff; tindex++) {
498 page = xfs_probe_unwritten_page(mapping, 527 page = xfs_probe_unwritten_page(mapping,
499 tindex, iomapp, pb, 528 tindex, iomapp, ioend,
500 PAGE_CACHE_SIZE, &bs, bbits); 529 PAGE_CACHE_SIZE, &bs, bbits);
501 if (!page) 530 if (!page)
502 break; 531 break;
503 nblocks += bs; 532 nblocks += bs;
504 atomic_add(bs, &pb->pb_io_remaining); 533 atomic_add(bs, &ioend->io_remaining);
505 xfs_convert_page(inode, page, iomapp, wbc, pb, 534 xfs_convert_page(inode, page, iomapp, wbc, ioend,
506 startio, all_bh); 535 startio, all_bh);
507 /* stop if converting the next page might add 536 /* stop if converting the next page might add
508 * enough blocks that the corresponding byte 537 * enough blocks that the corresponding byte
@@ -514,12 +543,12 @@ xfs_map_unwritten(
514 if (tindex == tlast && 543 if (tindex == tlast &&
515 (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) { 544 (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) {
516 page = xfs_probe_unwritten_page(mapping, 545 page = xfs_probe_unwritten_page(mapping,
517 tindex, iomapp, pb, 546 tindex, iomapp, ioend,
518 pg_offset, &bs, bbits); 547 pg_offset, &bs, bbits);
519 if (page) { 548 if (page) {
520 nblocks += bs; 549 nblocks += bs;
521 atomic_add(bs, &pb->pb_io_remaining); 550 atomic_add(bs, &ioend->io_remaining);
522 xfs_convert_page(inode, page, iomapp, wbc, pb, 551 xfs_convert_page(inode, page, iomapp, wbc, ioend,
523 startio, all_bh); 552 startio, all_bh);
524 if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits)) 553 if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits))
525 goto enough; 554 goto enough;
@@ -528,21 +557,9 @@ xfs_map_unwritten(
528 } 557 }
529 558
530enough: 559enough:
531 size = nblocks; /* NB: using 64bit number here */ 560 ioend->io_size = (xfs_off_t)nblocks << block_bits;
532 size <<= block_bits; /* convert fsb's to byte range */ 561 ioend->io_offset = offset;
533 562 xfs_finish_ioend(ioend);
534 XFS_BUF_DATAIO(pb);
535 XFS_BUF_ASYNC(pb);
536 XFS_BUF_SET_SIZE(pb, size);
537 XFS_BUF_SET_COUNT(pb, size);
538 XFS_BUF_SET_OFFSET(pb, offset);
539 XFS_BUF_SET_FSPRIVATE(pb, LINVFS_GET_VP(inode));
540 XFS_BUF_SET_IODONE_FUNC(pb, linvfs_unwritten_convert);
541
542 if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
543 pagebuf_iodone(pb, 1, 1);
544 }
545
546 return 0; 563 return 0;
547} 564}
548 565
@@ -787,7 +804,7 @@ xfs_page_state_convert(
787 continue; 804 continue;
788 if (!iomp) { 805 if (!iomp) {
789 err = xfs_map_blocks(inode, offset, len, &iomap, 806 err = xfs_map_blocks(inode, offset, len, &iomap,
790 BMAPI_READ|BMAPI_IGNSTATE); 807 BMAPI_WRITE|BMAPI_IGNSTATE);
791 if (err) { 808 if (err) {
792 goto error; 809 goto error;
793 } 810 }
@@ -1028,6 +1045,44 @@ linvfs_get_blocks_direct(
1028 create, 1, BMAPI_WRITE|BMAPI_DIRECT); 1045 create, 1, BMAPI_WRITE|BMAPI_DIRECT);
1029} 1046}
1030 1047
1048STATIC void
1049linvfs_end_io_direct(
1050 struct kiocb *iocb,
1051 loff_t offset,
1052 ssize_t size,
1053 void *private)
1054{
1055 xfs_ioend_t *ioend = iocb->private;
1056
1057 /*
1058 * Non-NULL private data means we need to issue a transaction to
1059 * convert a range from unwritten to written extents. This needs
1060 * to happen from process contect but aio+dio I/O completion
1061 * happens from irq context so we need to defer it to a workqueue.
1062 * This is not nessecary for synchronous direct I/O, but we do
1063 * it anyway to keep the code uniform and simpler.
1064 *
1065 * The core direct I/O code might be changed to always call the
1066 * completion handler in the future, in which case all this can
1067 * go away.
1068 */
1069 if (private && size > 0) {
1070 ioend->io_offset = offset;
1071 ioend->io_size = size;
1072 xfs_finish_ioend(ioend);
1073 } else {
1074 ASSERT(size >= 0);
1075 xfs_destroy_ioend(ioend);
1076 }
1077
1078 /*
1079 * blockdev_direct_IO can return an error even afer the I/O
1080 * completion handler was called. Thus we need to protect
1081 * against double-freeing.
1082 */
1083 iocb->private = NULL;
1084}
1085
1031STATIC ssize_t 1086STATIC ssize_t
1032linvfs_direct_IO( 1087linvfs_direct_IO(
1033 int rw, 1088 int rw,
@@ -1042,16 +1097,23 @@ linvfs_direct_IO(
1042 xfs_iomap_t iomap; 1097 xfs_iomap_t iomap;
1043 int maps = 1; 1098 int maps = 1;
1044 int error; 1099 int error;
1100 ssize_t ret;
1045 1101
1046 VOP_BMAP(vp, offset, 0, BMAPI_DEVICE, &iomap, &maps, error); 1102 VOP_BMAP(vp, offset, 0, BMAPI_DEVICE, &iomap, &maps, error);
1047 if (error) 1103 if (error)
1048 return -error; 1104 return -error;
1049 1105
1050 return blockdev_direct_IO_own_locking(rw, iocb, inode, 1106 iocb->private = xfs_alloc_ioend(inode);
1107
1108 ret = blockdev_direct_IO_own_locking(rw, iocb, inode,
1051 iomap.iomap_target->pbr_bdev, 1109 iomap.iomap_target->pbr_bdev,
1052 iov, offset, nr_segs, 1110 iov, offset, nr_segs,
1053 linvfs_get_blocks_direct, 1111 linvfs_get_blocks_direct,
1054 linvfs_unwritten_convert_direct); 1112 linvfs_end_io_direct);
1113
1114 if (unlikely(ret <= 0 && iocb->private))
1115 xfs_destroy_ioend(iocb->private);
1116 return ret;
1055} 1117}
1056 1118
1057 1119
@@ -1202,6 +1264,16 @@ out_unlock:
1202 return error; 1264 return error;
1203} 1265}
1204 1266
1267STATIC int
1268linvfs_invalidate_page(
1269 struct page *page,
1270 unsigned long offset)
1271{
1272 xfs_page_trace(XFS_INVALIDPAGE_ENTER,
1273 page->mapping->host, page, offset);
1274 return block_invalidatepage(page, offset);
1275}
1276
1205/* 1277/*
1206 * Called to move a page into cleanable state - and from there 1278 * Called to move a page into cleanable state - and from there
1207 * to be released. Possibly the page is already clean. We always 1279 * to be released. Possibly the page is already clean. We always
@@ -1279,6 +1351,7 @@ struct address_space_operations linvfs_aops = {
1279 .writepage = linvfs_writepage, 1351 .writepage = linvfs_writepage,
1280 .sync_page = block_sync_page, 1352 .sync_page = block_sync_page,
1281 .releasepage = linvfs_release_page, 1353 .releasepage = linvfs_release_page,
1354 .invalidatepage = linvfs_invalidate_page,
1282 .prepare_write = linvfs_prepare_write, 1355 .prepare_write = linvfs_prepare_write,
1283 .commit_write = generic_commit_write, 1356 .commit_write = generic_commit_write,
1284 .bmap = linvfs_bmap, 1357 .bmap = linvfs_bmap,
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
new file mode 100644
index 000000000000..2fa62974a04d
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -0,0 +1,50 @@
1/*
2 * Copyright (c) 2005 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32#ifndef __XFS_AOPS_H__
33#define __XFS_AOPS_H__
34
35extern struct workqueue_struct *xfsdatad_workqueue;
36extern mempool_t *xfs_ioend_pool;
37
38typedef void (*xfs_ioend_func_t)(void *);
39
40typedef struct xfs_ioend {
41 unsigned int io_uptodate; /* I/O status register */
42 atomic_t io_remaining; /* hold count */
43 struct vnode *io_vnode; /* file being written to */
44 struct buffer_head *io_buffer_head;/* buffer linked list head */
45 size_t io_size; /* size of the extent */
46 xfs_off_t io_offset; /* offset in the file */
47 struct work_struct io_work; /* xfsdatad work queue */
48} xfs_ioend_t;
49
50#endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index df0cba239dd5..655bf4a78afe 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify it 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as 5 * under the terms of version 2 of the GNU General Public License as
@@ -54,6 +54,7 @@
54#include <linux/percpu.h> 54#include <linux/percpu.h>
55#include <linux/blkdev.h> 55#include <linux/blkdev.h>
56#include <linux/hash.h> 56#include <linux/hash.h>
57#include <linux/kthread.h>
57 58
58#include "xfs_linux.h" 59#include "xfs_linux.h"
59 60
@@ -67,7 +68,7 @@ STATIC int xfsbufd_wakeup(int, unsigned int);
67STATIC void pagebuf_delwri_queue(xfs_buf_t *, int); 68STATIC void pagebuf_delwri_queue(xfs_buf_t *, int);
68 69
69STATIC struct workqueue_struct *xfslogd_workqueue; 70STATIC struct workqueue_struct *xfslogd_workqueue;
70STATIC struct workqueue_struct *xfsdatad_workqueue; 71struct workqueue_struct *xfsdatad_workqueue;
71 72
72/* 73/*
73 * Pagebuf debugging 74 * Pagebuf debugging
@@ -590,8 +591,10 @@ found:
590 PB_SET_OWNER(pb); 591 PB_SET_OWNER(pb);
591 } 592 }
592 593
593 if (pb->pb_flags & PBF_STALE) 594 if (pb->pb_flags & PBF_STALE) {
595 ASSERT((pb->pb_flags & _PBF_DELWRI_Q) == 0);
594 pb->pb_flags &= PBF_MAPPED; 596 pb->pb_flags &= PBF_MAPPED;
597 }
595 PB_TRACE(pb, "got_lock", 0); 598 PB_TRACE(pb, "got_lock", 0);
596 XFS_STATS_INC(pb_get_locked); 599 XFS_STATS_INC(pb_get_locked);
597 return (pb); 600 return (pb);
@@ -700,25 +703,6 @@ xfs_buf_read_flags(
700} 703}
701 704
702/* 705/*
703 * Create a skeletal pagebuf (no pages associated with it).
704 */
705xfs_buf_t *
706pagebuf_lookup(
707 xfs_buftarg_t *target,
708 loff_t ioff,
709 size_t isize,
710 page_buf_flags_t flags)
711{
712 xfs_buf_t *pb;
713
714 pb = pagebuf_allocate(flags);
715 if (pb) {
716 _pagebuf_initialize(pb, target, ioff, isize, flags);
717 }
718 return pb;
719}
720
721/*
722 * If we are not low on memory then do the readahead in a deadlock 706 * If we are not low on memory then do the readahead in a deadlock
723 * safe manner. 707 * safe manner.
724 */ 708 */
@@ -913,22 +897,23 @@ pagebuf_rele(
913 do_free = 0; 897 do_free = 0;
914 } 898 }
915 899
916 if (pb->pb_flags & PBF_DELWRI) { 900 if (pb->pb_flags & PBF_FS_MANAGED) {
917 pb->pb_flags |= PBF_ASYNC;
918 atomic_inc(&pb->pb_hold);
919 pagebuf_delwri_queue(pb, 0);
920 do_free = 0;
921 } else if (pb->pb_flags & PBF_FS_MANAGED) {
922 do_free = 0; 901 do_free = 0;
923 } 902 }
924 903
925 if (do_free) { 904 if (do_free) {
905 ASSERT((pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)) == 0);
926 list_del_init(&pb->pb_hash_list); 906 list_del_init(&pb->pb_hash_list);
927 spin_unlock(&hash->bh_lock); 907 spin_unlock(&hash->bh_lock);
928 pagebuf_free(pb); 908 pagebuf_free(pb);
929 } else { 909 } else {
930 spin_unlock(&hash->bh_lock); 910 spin_unlock(&hash->bh_lock);
931 } 911 }
912 } else {
913 /*
914 * Catch reference count leaks
915 */
916 ASSERT(atomic_read(&pb->pb_hold) >= 0);
932 } 917 }
933} 918}
934 919
@@ -1006,13 +991,24 @@ pagebuf_lock(
1006 * pagebuf_unlock 991 * pagebuf_unlock
1007 * 992 *
1008 * pagebuf_unlock releases the lock on the buffer object created by 993 * pagebuf_unlock releases the lock on the buffer object created by
1009 * pagebuf_lock or pagebuf_cond_lock (not any 994 * pagebuf_lock or pagebuf_cond_lock (not any pinning of underlying pages
1010 * pinning of underlying pages created by pagebuf_pin). 995 * created by pagebuf_pin).
996 *
997 * If the buffer is marked delwri but is not queued, do so before we
998 * unlock the buffer as we need to set flags correctly. We also need to
999 * take a reference for the delwri queue because the unlocker is going to
1000 * drop their's and they don't know we just queued it.
1011 */ 1001 */
1012void 1002void
1013pagebuf_unlock( /* unlock buffer */ 1003pagebuf_unlock( /* unlock buffer */
1014 xfs_buf_t *pb) /* buffer to unlock */ 1004 xfs_buf_t *pb) /* buffer to unlock */
1015{ 1005{
1006 if ((pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)) == PBF_DELWRI) {
1007 atomic_inc(&pb->pb_hold);
1008 pb->pb_flags |= PBF_ASYNC;
1009 pagebuf_delwri_queue(pb, 0);
1010 }
1011
1016 PB_CLEAR_OWNER(pb); 1012 PB_CLEAR_OWNER(pb);
1017 up(&pb->pb_sema); 1013 up(&pb->pb_sema);
1018 PB_TRACE(pb, "unlock", 0); 1014 PB_TRACE(pb, "unlock", 0);
@@ -1249,8 +1245,8 @@ bio_end_io_pagebuf(
1249 int error) 1245 int error)
1250{ 1246{
1251 xfs_buf_t *pb = (xfs_buf_t *)bio->bi_private; 1247 xfs_buf_t *pb = (xfs_buf_t *)bio->bi_private;
1252 unsigned int i, blocksize = pb->pb_target->pbr_bsize; 1248 unsigned int blocksize = pb->pb_target->pbr_bsize;
1253 struct bio_vec *bvec = bio->bi_io_vec; 1249 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1254 1250
1255 if (bio->bi_size) 1251 if (bio->bi_size)
1256 return 1; 1252 return 1;
@@ -1258,10 +1254,12 @@ bio_end_io_pagebuf(
1258 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) 1254 if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
1259 pb->pb_error = EIO; 1255 pb->pb_error = EIO;
1260 1256
1261 for (i = 0; i < bio->bi_vcnt; i++, bvec++) { 1257 do {
1262 struct page *page = bvec->bv_page; 1258 struct page *page = bvec->bv_page;
1263 1259
1264 if (pb->pb_error) { 1260 if (unlikely(pb->pb_error)) {
1261 if (pb->pb_flags & PBF_READ)
1262 ClearPageUptodate(page);
1265 SetPageError(page); 1263 SetPageError(page);
1266 } else if (blocksize == PAGE_CACHE_SIZE) { 1264 } else if (blocksize == PAGE_CACHE_SIZE) {
1267 SetPageUptodate(page); 1265 SetPageUptodate(page);
@@ -1270,10 +1268,13 @@ bio_end_io_pagebuf(
1270 set_page_region(page, bvec->bv_offset, bvec->bv_len); 1268 set_page_region(page, bvec->bv_offset, bvec->bv_len);
1271 } 1269 }
1272 1270
1271 if (--bvec >= bio->bi_io_vec)
1272 prefetchw(&bvec->bv_page->flags);
1273
1273 if (_pagebuf_iolocked(pb)) { 1274 if (_pagebuf_iolocked(pb)) {
1274 unlock_page(page); 1275 unlock_page(page);
1275 } 1276 }
1276 } 1277 } while (bvec >= bio->bi_io_vec);
1277 1278
1278 _pagebuf_iodone(pb, 1); 1279 _pagebuf_iodone(pb, 1);
1279 bio_put(bio); 1280 bio_put(bio);
@@ -1511,6 +1512,11 @@ again:
1511 ASSERT(btp == bp->pb_target); 1512 ASSERT(btp == bp->pb_target);
1512 if (!(bp->pb_flags & PBF_FS_MANAGED)) { 1513 if (!(bp->pb_flags & PBF_FS_MANAGED)) {
1513 spin_unlock(&hash->bh_lock); 1514 spin_unlock(&hash->bh_lock);
1515 /*
1516 * Catch superblock reference count leaks
1517 * immediately
1518 */
1519 BUG_ON(bp->pb_bn == 0);
1514 delay(100); 1520 delay(100);
1515 goto again; 1521 goto again;
1516 } 1522 }
@@ -1686,17 +1692,20 @@ pagebuf_delwri_queue(
1686 int unlock) 1692 int unlock)
1687{ 1693{
1688 PB_TRACE(pb, "delwri_q", (long)unlock); 1694 PB_TRACE(pb, "delwri_q", (long)unlock);
1689 ASSERT(pb->pb_flags & PBF_DELWRI); 1695 ASSERT((pb->pb_flags & (PBF_DELWRI|PBF_ASYNC)) ==
1696 (PBF_DELWRI|PBF_ASYNC));
1690 1697
1691 spin_lock(&pbd_delwrite_lock); 1698 spin_lock(&pbd_delwrite_lock);
1692 /* If already in the queue, dequeue and place at tail */ 1699 /* If already in the queue, dequeue and place at tail */
1693 if (!list_empty(&pb->pb_list)) { 1700 if (!list_empty(&pb->pb_list)) {
1701 ASSERT(pb->pb_flags & _PBF_DELWRI_Q);
1694 if (unlock) { 1702 if (unlock) {
1695 atomic_dec(&pb->pb_hold); 1703 atomic_dec(&pb->pb_hold);
1696 } 1704 }
1697 list_del(&pb->pb_list); 1705 list_del(&pb->pb_list);
1698 } 1706 }
1699 1707
1708 pb->pb_flags |= _PBF_DELWRI_Q;
1700 list_add_tail(&pb->pb_list, &pbd_delwrite_queue); 1709 list_add_tail(&pb->pb_list, &pbd_delwrite_queue);
1701 pb->pb_queuetime = jiffies; 1710 pb->pb_queuetime = jiffies;
1702 spin_unlock(&pbd_delwrite_lock); 1711 spin_unlock(&pbd_delwrite_lock);
@@ -1713,10 +1722,11 @@ pagebuf_delwri_dequeue(
1713 1722
1714 spin_lock(&pbd_delwrite_lock); 1723 spin_lock(&pbd_delwrite_lock);
1715 if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) { 1724 if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) {
1725 ASSERT(pb->pb_flags & _PBF_DELWRI_Q);
1716 list_del_init(&pb->pb_list); 1726 list_del_init(&pb->pb_list);
1717 dequeued = 1; 1727 dequeued = 1;
1718 } 1728 }
1719 pb->pb_flags &= ~PBF_DELWRI; 1729 pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q);
1720 spin_unlock(&pbd_delwrite_lock); 1730 spin_unlock(&pbd_delwrite_lock);
1721 1731
1722 if (dequeued) 1732 if (dequeued)
@@ -1733,9 +1743,7 @@ pagebuf_runall_queues(
1733} 1743}
1734 1744
1735/* Defines for pagebuf daemon */ 1745/* Defines for pagebuf daemon */
1736STATIC DECLARE_COMPLETION(xfsbufd_done);
1737STATIC struct task_struct *xfsbufd_task; 1746STATIC struct task_struct *xfsbufd_task;
1738STATIC int xfsbufd_active;
1739STATIC int xfsbufd_force_flush; 1747STATIC int xfsbufd_force_flush;
1740STATIC int xfsbufd_force_sleep; 1748STATIC int xfsbufd_force_sleep;
1741 1749
@@ -1761,14 +1769,8 @@ xfsbufd(
1761 xfs_buftarg_t *target; 1769 xfs_buftarg_t *target;
1762 xfs_buf_t *pb, *n; 1770 xfs_buf_t *pb, *n;
1763 1771
1764 /* Set up the thread */
1765 daemonize("xfsbufd");
1766 current->flags |= PF_MEMALLOC; 1772 current->flags |= PF_MEMALLOC;
1767 1773
1768 xfsbufd_task = current;
1769 xfsbufd_active = 1;
1770 barrier();
1771
1772 INIT_LIST_HEAD(&tmp); 1774 INIT_LIST_HEAD(&tmp);
1773 do { 1775 do {
1774 if (unlikely(freezing(current))) { 1776 if (unlikely(freezing(current))) {
@@ -1795,7 +1797,7 @@ xfsbufd(
1795 break; 1797 break;
1796 } 1798 }
1797 1799
1798 pb->pb_flags &= ~PBF_DELWRI; 1800 pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q);
1799 pb->pb_flags |= PBF_WRITE; 1801 pb->pb_flags |= PBF_WRITE;
1800 list_move(&pb->pb_list, &tmp); 1802 list_move(&pb->pb_list, &tmp);
1801 } 1803 }
@@ -1816,9 +1818,9 @@ xfsbufd(
1816 purge_addresses(); 1818 purge_addresses();
1817 1819
1818 xfsbufd_force_flush = 0; 1820 xfsbufd_force_flush = 0;
1819 } while (xfsbufd_active); 1821 } while (!kthread_should_stop());
1820 1822
1821 complete_and_exit(&xfsbufd_done, 0); 1823 return 0;
1822} 1824}
1823 1825
1824/* 1826/*
@@ -1845,15 +1847,13 @@ xfs_flush_buftarg(
1845 if (pb->pb_target != target) 1847 if (pb->pb_target != target)
1846 continue; 1848 continue;
1847 1849
1848 ASSERT(pb->pb_flags & PBF_DELWRI); 1850 ASSERT(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q));
1849 PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb)); 1851 PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb));
1850 if (pagebuf_ispin(pb)) { 1852 if (pagebuf_ispin(pb)) {
1851 pincount++; 1853 pincount++;
1852 continue; 1854 continue;
1853 } 1855 }
1854 1856
1855 pb->pb_flags &= ~PBF_DELWRI;
1856 pb->pb_flags |= PBF_WRITE;
1857 list_move(&pb->pb_list, &tmp); 1857 list_move(&pb->pb_list, &tmp);
1858 } 1858 }
1859 spin_unlock(&pbd_delwrite_lock); 1859 spin_unlock(&pbd_delwrite_lock);
@@ -1862,12 +1862,14 @@ xfs_flush_buftarg(
1862 * Dropped the delayed write list lock, now walk the temporary list 1862 * Dropped the delayed write list lock, now walk the temporary list
1863 */ 1863 */
1864 list_for_each_entry_safe(pb, n, &tmp, pb_list) { 1864 list_for_each_entry_safe(pb, n, &tmp, pb_list) {
1865 pagebuf_lock(pb);
1866 pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q);
1867 pb->pb_flags |= PBF_WRITE;
1865 if (wait) 1868 if (wait)
1866 pb->pb_flags &= ~PBF_ASYNC; 1869 pb->pb_flags &= ~PBF_ASYNC;
1867 else 1870 else
1868 list_del_init(&pb->pb_list); 1871 list_del_init(&pb->pb_list);
1869 1872
1870 pagebuf_lock(pb);
1871 pagebuf_iostrategy(pb); 1873 pagebuf_iostrategy(pb);
1872 } 1874 }
1873 1875
@@ -1901,9 +1903,11 @@ xfs_buf_daemons_start(void)
1901 if (!xfsdatad_workqueue) 1903 if (!xfsdatad_workqueue)
1902 goto out_destroy_xfslogd_workqueue; 1904 goto out_destroy_xfslogd_workqueue;
1903 1905
1904 error = kernel_thread(xfsbufd, NULL, CLONE_FS|CLONE_FILES); 1906 xfsbufd_task = kthread_run(xfsbufd, NULL, "xfsbufd");
1905 if (error < 0) 1907 if (IS_ERR(xfsbufd_task)) {
1908 error = PTR_ERR(xfsbufd_task);
1906 goto out_destroy_xfsdatad_workqueue; 1909 goto out_destroy_xfsdatad_workqueue;
1910 }
1907 return 0; 1911 return 0;
1908 1912
1909 out_destroy_xfsdatad_workqueue: 1913 out_destroy_xfsdatad_workqueue:
@@ -1920,10 +1924,7 @@ xfs_buf_daemons_start(void)
1920STATIC void 1924STATIC void
1921xfs_buf_daemons_stop(void) 1925xfs_buf_daemons_stop(void)
1922{ 1926{
1923 xfsbufd_active = 0; 1927 kthread_stop(xfsbufd_task);
1924 barrier();
1925 wait_for_completion(&xfsbufd_done);
1926
1927 destroy_workqueue(xfslogd_workqueue); 1928 destroy_workqueue(xfslogd_workqueue);
1928 destroy_workqueue(xfsdatad_workqueue); 1929 destroy_workqueue(xfsdatad_workqueue);
1929} 1930}
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 3f8f69a66aea..67c19f799232 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -89,6 +89,7 @@ typedef enum page_buf_flags_e { /* pb_flags values */
89 _PBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */ 89 _PBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */
90 _PBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */ 90 _PBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */
91 _PBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ 91 _PBF_RUN_QUEUES = (1 << 19),/* run block device task queue */
92 _PBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */
92} page_buf_flags_t; 93} page_buf_flags_t;
93 94
94#define PBF_UPDATE (PBF_READ | PBF_WRITE) 95#define PBF_UPDATE (PBF_READ | PBF_WRITE)
@@ -206,13 +207,6 @@ extern xfs_buf_t *xfs_buf_read_flags( /* allocate and read a buffer */
206#define xfs_buf_read(target, blkno, len, flags) \ 207#define xfs_buf_read(target, blkno, len, flags) \
207 xfs_buf_read_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED) 208 xfs_buf_read_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED)
208 209
209extern xfs_buf_t *pagebuf_lookup(
210 xfs_buftarg_t *,
211 loff_t, /* starting offset of range */
212 size_t, /* length of range */
213 page_buf_flags_t); /* PBF_READ, PBF_WRITE, */
214 /* PBF_FORCEIO, */
215
216extern xfs_buf_t *pagebuf_get_empty( /* allocate pagebuf struct with */ 210extern xfs_buf_t *pagebuf_get_empty( /* allocate pagebuf struct with */
217 /* no memory or disk address */ 211 /* no memory or disk address */
218 size_t len, 212 size_t len,
@@ -344,8 +338,6 @@ extern void pagebuf_trace(
344 338
345 339
346 340
347
348
349/* These are just for xfs_syncsub... it sets an internal variable 341/* These are just for xfs_syncsub... it sets an internal variable
350 * then passes it to VOP_FLUSH_PAGES or adds the flags to a newly gotten buf_t 342 * then passes it to VOP_FLUSH_PAGES or adds the flags to a newly gotten buf_t
351 */ 343 */
@@ -452,7 +444,7 @@ extern void pagebuf_trace(
452 444
453#define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->pb_addr) 445#define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->pb_addr)
454 446
455extern inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset) 447static inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset)
456{ 448{
457 if (bp->pb_flags & PBF_MAPPED) 449 if (bp->pb_flags & PBF_MAPPED)
458 return XFS_BUF_PTR(bp) + offset; 450 return XFS_BUF_PTR(bp) + offset;
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index f1ce4323f56e..3881622bcf08 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -311,6 +311,31 @@ linvfs_fsync(
311 311
312#define nextdp(dp) ((struct xfs_dirent *)((char *)(dp) + (dp)->d_reclen)) 312#define nextdp(dp) ((struct xfs_dirent *)((char *)(dp) + (dp)->d_reclen))
313 313
314#ifdef CONFIG_XFS_DMAPI
315
316STATIC struct page *
317linvfs_filemap_nopage(
318 struct vm_area_struct *area,
319 unsigned long address,
320 int *type)
321{
322 struct inode *inode = area->vm_file->f_dentry->d_inode;
323 vnode_t *vp = LINVFS_GET_VP(inode);
324 xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp);
325 int error;
326
327 ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI);
328
329 error = XFS_SEND_MMAP(mp, area, 0);
330 if (error)
331 return NULL;
332
333 return filemap_nopage(area, address, type);
334}
335
336#endif /* CONFIG_XFS_DMAPI */
337
338
314STATIC int 339STATIC int
315linvfs_readdir( 340linvfs_readdir(
316 struct file *filp, 341 struct file *filp,
@@ -390,14 +415,6 @@ done:
390 return -error; 415 return -error;
391} 416}
392 417
393#ifdef CONFIG_XFS_DMAPI
394STATIC void
395linvfs_mmap_close(
396 struct vm_area_struct *vma)
397{
398 xfs_dm_mm_put(vma);
399}
400#endif /* CONFIG_XFS_DMAPI */
401 418
402STATIC int 419STATIC int
403linvfs_file_mmap( 420linvfs_file_mmap(
@@ -411,16 +428,11 @@ linvfs_file_mmap(
411 428
412 vma->vm_ops = &linvfs_file_vm_ops; 429 vma->vm_ops = &linvfs_file_vm_ops;
413 430
414 if (vp->v_vfsp->vfs_flag & VFS_DMI) {
415 xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp);
416
417 error = -XFS_SEND_MMAP(mp, vma, 0);
418 if (error)
419 return error;
420#ifdef CONFIG_XFS_DMAPI 431#ifdef CONFIG_XFS_DMAPI
432 if (vp->v_vfsp->vfs_flag & VFS_DMI) {
421 vma->vm_ops = &linvfs_dmapi_file_vm_ops; 433 vma->vm_ops = &linvfs_dmapi_file_vm_ops;
422#endif
423 } 434 }
435#endif /* CONFIG_XFS_DMAPI */
424 436
425 VOP_SETATTR(vp, &va, XFS_AT_UPDATIME, NULL, error); 437 VOP_SETATTR(vp, &va, XFS_AT_UPDATIME, NULL, error);
426 if (!error) 438 if (!error)
@@ -474,6 +486,7 @@ linvfs_ioctl_invis(
474 return error; 486 return error;
475} 487}
476 488
489#ifdef CONFIG_XFS_DMAPI
477#ifdef HAVE_VMOP_MPROTECT 490#ifdef HAVE_VMOP_MPROTECT
478STATIC int 491STATIC int
479linvfs_mprotect( 492linvfs_mprotect(
@@ -494,6 +507,7 @@ linvfs_mprotect(
494 return error; 507 return error;
495} 508}
496#endif /* HAVE_VMOP_MPROTECT */ 509#endif /* HAVE_VMOP_MPROTECT */
510#endif /* CONFIG_XFS_DMAPI */
497 511
498#ifdef HAVE_FOP_OPEN_EXEC 512#ifdef HAVE_FOP_OPEN_EXEC
499/* If the user is attempting to execute a file that is offline then 513/* If the user is attempting to execute a file that is offline then
@@ -528,49 +542,10 @@ open_exec_out:
528} 542}
529#endif /* HAVE_FOP_OPEN_EXEC */ 543#endif /* HAVE_FOP_OPEN_EXEC */
530 544
531/*
532 * Temporary workaround to the AIO direct IO write problem.
533 * This code can go and we can revert to do_sync_write once
534 * the writepage(s) rework is merged.
535 */
536STATIC ssize_t
537linvfs_write(
538 struct file *filp,
539 const char __user *buf,
540 size_t len,
541 loff_t *ppos)
542{
543 struct kiocb kiocb;
544 ssize_t ret;
545
546 init_sync_kiocb(&kiocb, filp);
547 kiocb.ki_pos = *ppos;
548 ret = __linvfs_write(&kiocb, buf, 0, len, kiocb.ki_pos);
549 *ppos = kiocb.ki_pos;
550 return ret;
551}
552STATIC ssize_t
553linvfs_write_invis(
554 struct file *filp,
555 const char __user *buf,
556 size_t len,
557 loff_t *ppos)
558{
559 struct kiocb kiocb;
560 ssize_t ret;
561
562 init_sync_kiocb(&kiocb, filp);
563 kiocb.ki_pos = *ppos;
564 ret = __linvfs_write(&kiocb, buf, IO_INVIS, len, kiocb.ki_pos);
565 *ppos = kiocb.ki_pos;
566 return ret;
567}
568
569
570struct file_operations linvfs_file_operations = { 545struct file_operations linvfs_file_operations = {
571 .llseek = generic_file_llseek, 546 .llseek = generic_file_llseek,
572 .read = do_sync_read, 547 .read = do_sync_read,
573 .write = linvfs_write, 548 .write = do_sync_write,
574 .readv = linvfs_readv, 549 .readv = linvfs_readv,
575 .writev = linvfs_writev, 550 .writev = linvfs_writev,
576 .aio_read = linvfs_aio_read, 551 .aio_read = linvfs_aio_read,
@@ -592,7 +567,7 @@ struct file_operations linvfs_file_operations = {
592struct file_operations linvfs_invis_file_operations = { 567struct file_operations linvfs_invis_file_operations = {
593 .llseek = generic_file_llseek, 568 .llseek = generic_file_llseek,
594 .read = do_sync_read, 569 .read = do_sync_read,
595 .write = linvfs_write_invis, 570 .write = do_sync_write,
596 .readv = linvfs_readv_invis, 571 .readv = linvfs_readv_invis,
597 .writev = linvfs_writev_invis, 572 .writev = linvfs_writev_invis,
598 .aio_read = linvfs_aio_read_invis, 573 .aio_read = linvfs_aio_read_invis,
@@ -626,8 +601,7 @@ static struct vm_operations_struct linvfs_file_vm_ops = {
626 601
627#ifdef CONFIG_XFS_DMAPI 602#ifdef CONFIG_XFS_DMAPI
628static struct vm_operations_struct linvfs_dmapi_file_vm_ops = { 603static struct vm_operations_struct linvfs_dmapi_file_vm_ops = {
629 .close = linvfs_mmap_close, 604 .nopage = linvfs_filemap_nopage,
630 .nopage = filemap_nopage,
631 .populate = filemap_populate, 605 .populate = filemap_populate,
632#ifdef HAVE_VMOP_MPROTECT 606#ifdef HAVE_VMOP_MPROTECT
633 .mprotect = linvfs_mprotect, 607 .mprotect = linvfs_mprotect,
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 05a447e51cc0..6a3326bcd8d0 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -141,13 +141,19 @@ xfs_find_handle(
141 return -XFS_ERROR(EINVAL); 141 return -XFS_ERROR(EINVAL);
142 } 142 }
143 143
144 /* we need the vnode */ 144 switch (inode->i_mode & S_IFMT) {
145 vp = LINVFS_GET_VP(inode); 145 case S_IFREG:
146 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) { 146 case S_IFDIR:
147 case S_IFLNK:
148 break;
149 default:
147 iput(inode); 150 iput(inode);
148 return -XFS_ERROR(EBADF); 151 return -XFS_ERROR(EBADF);
149 } 152 }
150 153
154 /* we need the vnode */
155 vp = LINVFS_GET_VP(inode);
156
151 /* now we can grab the fsid */ 157 /* now we can grab the fsid */
152 memcpy(&handle.ha_fsid, vp->v_vfsp->vfs_altfsid, sizeof(xfs_fsid_t)); 158 memcpy(&handle.ha_fsid, vp->v_vfsp->vfs_altfsid, sizeof(xfs_fsid_t));
153 hsize = sizeof(xfs_fsid_t); 159 hsize = sizeof(xfs_fsid_t);
@@ -386,7 +392,7 @@ xfs_readlink_by_handle(
386 return -error; 392 return -error;
387 393
388 /* Restrict this handle operation to symlinks only. */ 394 /* Restrict this handle operation to symlinks only. */
389 if (vp->v_type != VLNK) { 395 if (!S_ISLNK(inode->i_mode)) {
390 VN_RELE(vp); 396 VN_RELE(vp);
391 return -XFS_ERROR(EINVAL); 397 return -XFS_ERROR(EINVAL);
392 } 398 }
@@ -982,10 +988,10 @@ xfs_ioc_space(
982 if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND)) 988 if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND))
983 return -XFS_ERROR(EPERM); 989 return -XFS_ERROR(EPERM);
984 990
985 if (!(filp->f_flags & FMODE_WRITE)) 991 if (!(filp->f_mode & FMODE_WRITE))
986 return -XFS_ERROR(EBADF); 992 return -XFS_ERROR(EBADF);
987 993
988 if (vp->v_type != VREG) 994 if (!VN_ISREG(vp))
989 return -XFS_ERROR(EINVAL); 995 return -XFS_ERROR(EINVAL);
990 996
991 if (copy_from_user(&bf, arg, sizeof(bf))) 997 if (copy_from_user(&bf, arg, sizeof(bf)))
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index 0f8f1384eb36..4636b7f86f1f 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -47,8 +47,52 @@
47#include "xfs_vnode.h" 47#include "xfs_vnode.h"
48#include "xfs_dfrag.h" 48#include "xfs_dfrag.h"
49 49
50#define _NATIVE_IOC(cmd, type) \
51 _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type))
52
50#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) 53#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
51#define BROKEN_X86_ALIGNMENT 54#define BROKEN_X86_ALIGNMENT
55/* on ia32 l_start is on a 32-bit boundary */
56typedef struct xfs_flock64_32 {
57 __s16 l_type;
58 __s16 l_whence;
59 __s64 l_start __attribute__((packed));
60 /* len == 0 means until end of file */
61 __s64 l_len __attribute__((packed));
62 __s32 l_sysid;
63 __u32 l_pid;
64 __s32 l_pad[4]; /* reserve area */
65} xfs_flock64_32_t;
66
67#define XFS_IOC_ALLOCSP_32 _IOW ('X', 10, struct xfs_flock64_32)
68#define XFS_IOC_FREESP_32 _IOW ('X', 11, struct xfs_flock64_32)
69#define XFS_IOC_ALLOCSP64_32 _IOW ('X', 36, struct xfs_flock64_32)
70#define XFS_IOC_FREESP64_32 _IOW ('X', 37, struct xfs_flock64_32)
71#define XFS_IOC_RESVSP_32 _IOW ('X', 40, struct xfs_flock64_32)
72#define XFS_IOC_UNRESVSP_32 _IOW ('X', 41, struct xfs_flock64_32)
73#define XFS_IOC_RESVSP64_32 _IOW ('X', 42, struct xfs_flock64_32)
74#define XFS_IOC_UNRESVSP64_32 _IOW ('X', 43, struct xfs_flock64_32)
75
76/* just account for different alignment */
77STATIC unsigned long
78xfs_ioctl32_flock(
79 unsigned long arg)
80{
81 xfs_flock64_32_t __user *p32 = (void __user *)arg;
82 xfs_flock64_t __user *p = compat_alloc_user_space(sizeof(*p));
83
84 if (copy_in_user(&p->l_type, &p32->l_type, sizeof(s16)) ||
85 copy_in_user(&p->l_whence, &p32->l_whence, sizeof(s16)) ||
86 copy_in_user(&p->l_start, &p32->l_start, sizeof(s64)) ||
87 copy_in_user(&p->l_len, &p32->l_len, sizeof(s64)) ||
88 copy_in_user(&p->l_sysid, &p32->l_sysid, sizeof(s32)) ||
89 copy_in_user(&p->l_pid, &p32->l_pid, sizeof(u32)) ||
90 copy_in_user(&p->l_pad, &p32->l_pad, 4*sizeof(u32)))
91 return -EFAULT;
92
93 return (unsigned long)p;
94}
95
52#else 96#else
53 97
54typedef struct xfs_fsop_bulkreq32 { 98typedef struct xfs_fsop_bulkreq32 {
@@ -103,7 +147,6 @@ __linvfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
103/* not handled 147/* not handled
104 case XFS_IOC_FD_TO_HANDLE: 148 case XFS_IOC_FD_TO_HANDLE:
105 case XFS_IOC_PATH_TO_HANDLE: 149 case XFS_IOC_PATH_TO_HANDLE:
106 case XFS_IOC_PATH_TO_HANDLE:
107 case XFS_IOC_PATH_TO_FSHANDLE: 150 case XFS_IOC_PATH_TO_FSHANDLE:
108 case XFS_IOC_OPEN_BY_HANDLE: 151 case XFS_IOC_OPEN_BY_HANDLE:
109 case XFS_IOC_FSSETDM_BY_HANDLE: 152 case XFS_IOC_FSSETDM_BY_HANDLE:
@@ -124,8 +167,21 @@ __linvfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
124 case XFS_IOC_ERROR_CLEARALL: 167 case XFS_IOC_ERROR_CLEARALL:
125 break; 168 break;
126 169
127#ifndef BROKEN_X86_ALIGNMENT 170#ifdef BROKEN_X86_ALIGNMENT
128 /* xfs_flock_t and xfs_bstat_t have wrong u32 vs u64 alignment */ 171 /* xfs_flock_t has wrong u32 vs u64 alignment */
172 case XFS_IOC_ALLOCSP_32:
173 case XFS_IOC_FREESP_32:
174 case XFS_IOC_ALLOCSP64_32:
175 case XFS_IOC_FREESP64_32:
176 case XFS_IOC_RESVSP_32:
177 case XFS_IOC_UNRESVSP_32:
178 case XFS_IOC_RESVSP64_32:
179 case XFS_IOC_UNRESVSP64_32:
180 arg = xfs_ioctl32_flock(arg);
181 cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
182 break;
183
184#else /* These are handled fine if no alignment issues */
129 case XFS_IOC_ALLOCSP: 185 case XFS_IOC_ALLOCSP:
130 case XFS_IOC_FREESP: 186 case XFS_IOC_FREESP:
131 case XFS_IOC_RESVSP: 187 case XFS_IOC_RESVSP:
@@ -134,6 +190,9 @@ __linvfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
134 case XFS_IOC_FREESP64: 190 case XFS_IOC_FREESP64:
135 case XFS_IOC_RESVSP64: 191 case XFS_IOC_RESVSP64:
136 case XFS_IOC_UNRESVSP64: 192 case XFS_IOC_UNRESVSP64:
193 break;
194
195 /* xfs_bstat_t still has wrong u32 vs u64 alignment */
137 case XFS_IOC_SWAPEXT: 196 case XFS_IOC_SWAPEXT:
138 break; 197 break;
139 198
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index f252605514eb..77708a8c9f87 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -140,7 +140,6 @@ linvfs_mknod(
140 140
141 memset(&va, 0, sizeof(va)); 141 memset(&va, 0, sizeof(va));
142 va.va_mask = XFS_AT_TYPE|XFS_AT_MODE; 142 va.va_mask = XFS_AT_TYPE|XFS_AT_MODE;
143 va.va_type = IFTOVT(mode);
144 va.va_mode = mode; 143 va.va_mode = mode;
145 144
146 switch (mode & S_IFMT) { 145 switch (mode & S_IFMT) {
@@ -308,14 +307,13 @@ linvfs_symlink(
308 cvp = NULL; 307 cvp = NULL;
309 308
310 memset(&va, 0, sizeof(va)); 309 memset(&va, 0, sizeof(va));
311 va.va_type = VLNK; 310 va.va_mode = S_IFLNK |
312 va.va_mode = irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO; 311 (irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO);
313 va.va_mask = XFS_AT_TYPE|XFS_AT_MODE; 312 va.va_mask = XFS_AT_TYPE|XFS_AT_MODE;
314 313
315 error = 0; 314 error = 0;
316 VOP_SYMLINK(dvp, dentry, &va, (char *)symname, &cvp, NULL, error); 315 VOP_SYMLINK(dvp, dentry, &va, (char *)symname, &cvp, NULL, error);
317 if (!error && cvp) { 316 if (!error && cvp) {
318 ASSERT(cvp->v_type == VLNK);
319 ip = LINVFS_GET_IP(cvp); 317 ip = LINVFS_GET_IP(cvp);
320 d_instantiate(dentry, ip); 318 d_instantiate(dentry, ip);
321 validate_fields(dir); 319 validate_fields(dir);
@@ -425,9 +423,14 @@ linvfs_follow_link(
425 return NULL; 423 return NULL;
426} 424}
427 425
428static void linvfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) 426STATIC void
427linvfs_put_link(
428 struct dentry *dentry,
429 struct nameidata *nd,
430 void *p)
429{ 431{
430 char *s = nd_get_link(nd); 432 char *s = nd_get_link(nd);
433
431 if (!IS_ERR(s)) 434 if (!IS_ERR(s))
432 kfree(s); 435 kfree(s);
433} 436}
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 42dc5e4662ed..68c5d885ed9c 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -64,7 +64,6 @@
64#include <sema.h> 64#include <sema.h>
65#include <time.h> 65#include <time.h>
66 66
67#include <support/qsort.h>
68#include <support/ktrace.h> 67#include <support/ktrace.h>
69#include <support/debug.h> 68#include <support/debug.h>
70#include <support/move.h> 69#include <support/move.h>
@@ -104,6 +103,7 @@
104#include <xfs_stats.h> 103#include <xfs_stats.h>
105#include <xfs_sysctl.h> 104#include <xfs_sysctl.h>
106#include <xfs_iops.h> 105#include <xfs_iops.h>
106#include <xfs_aops.h>
107#include <xfs_super.h> 107#include <xfs_super.h>
108#include <xfs_globals.h> 108#include <xfs_globals.h>
109#include <xfs_fs_subr.h> 109#include <xfs_fs_subr.h>
@@ -254,11 +254,18 @@ static inline void set_buffer_unwritten_io(struct buffer_head *bh)
254#define MAX(a,b) (max(a,b)) 254#define MAX(a,b) (max(a,b))
255#define howmany(x, y) (((x)+((y)-1))/(y)) 255#define howmany(x, y) (((x)+((y)-1))/(y))
256#define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) 256#define roundup(x, y) ((((x)+((y)-1))/(y))*(y))
257#define qsort(a,n,s,fn) sort(a,n,s,fn,NULL)
257 258
259/*
260 * Various platform dependent calls that don't fit anywhere else
261 */
258#define xfs_stack_trace() dump_stack() 262#define xfs_stack_trace() dump_stack()
259
260#define xfs_itruncate_data(ip, off) \ 263#define xfs_itruncate_data(ip, off) \
261 (-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off))) 264 (-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off)))
265#define xfs_statvfs_fsid(statp, mp) \
266 ({ u64 id = huge_encode_dev((mp)->m_dev); \
267 __kernel_fsid_t *fsid = &(statp)->f_fsid; \
268 (fsid->val[0] = (u32)id, fsid->val[1] = (u32)(id >> 32)); })
262 269
263 270
264/* Move the kernel do_div definition off to one side */ 271/* Move the kernel do_div definition off to one side */
@@ -371,6 +378,4 @@ static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y)
371 return(x * y); 378 return(x * y);
372} 379}
373 380
374#define qsort(a, n, s, cmp) sort(a, n, s, cmp, NULL)
375
376#endif /* __XFS_LINUX__ */ 381#endif /* __XFS_LINUX__ */
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index acab58c48043..3b5fabe8dae9 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -660,9 +660,6 @@ xfs_write(
660 (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? 660 (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
661 mp->m_rtdev_targp : mp->m_ddev_targp; 661 mp->m_rtdev_targp : mp->m_ddev_targp;
662 662
663 if (ioflags & IO_ISAIO)
664 return XFS_ERROR(-ENOSYS);
665
666 if ((pos & target->pbr_smask) || (count & target->pbr_smask)) 663 if ((pos & target->pbr_smask) || (count & target->pbr_smask))
667 return XFS_ERROR(-EINVAL); 664 return XFS_ERROR(-EINVAL);
668 665
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index f197a720e394..6294dcdb797c 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -70,9 +70,10 @@ struct xfs_iomap;
70#define XFS_SENDFILE_ENTER 21 70#define XFS_SENDFILE_ENTER 21
71#define XFS_WRITEPAGE_ENTER 22 71#define XFS_WRITEPAGE_ENTER 22
72#define XFS_RELEASEPAGE_ENTER 23 72#define XFS_RELEASEPAGE_ENTER 23
73#define XFS_IOMAP_ALLOC_ENTER 24 73#define XFS_INVALIDPAGE_ENTER 24
74#define XFS_IOMAP_ALLOC_MAP 25 74#define XFS_IOMAP_ALLOC_ENTER 25
75#define XFS_IOMAP_UNWRITTEN 26 75#define XFS_IOMAP_ALLOC_MAP 26
76#define XFS_IOMAP_UNWRITTEN 27
76extern void xfs_rw_enter_trace(int, struct xfs_iocore *, 77extern void xfs_rw_enter_trace(int, struct xfs_iocore *,
77 void *, size_t, loff_t, int); 78 void *, size_t, loff_t, int);
78extern void xfs_inval_cached_trace(struct xfs_iocore *, 79extern void xfs_inval_cached_trace(struct xfs_iocore *,
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index f6dd7de25927..0da87bfc9999 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -70,11 +70,15 @@
70#include <linux/namei.h> 70#include <linux/namei.h>
71#include <linux/init.h> 71#include <linux/init.h>
72#include <linux/mount.h> 72#include <linux/mount.h>
73#include <linux/mempool.h>
73#include <linux/writeback.h> 74#include <linux/writeback.h>
75#include <linux/kthread.h>
74 76
75STATIC struct quotactl_ops linvfs_qops; 77STATIC struct quotactl_ops linvfs_qops;
76STATIC struct super_operations linvfs_sops; 78STATIC struct super_operations linvfs_sops;
77STATIC kmem_zone_t *linvfs_inode_zone; 79STATIC kmem_zone_t *xfs_vnode_zone;
80STATIC kmem_zone_t *xfs_ioend_zone;
81mempool_t *xfs_ioend_pool;
78 82
79STATIC struct xfs_mount_args * 83STATIC struct xfs_mount_args *
80xfs_args_allocate( 84xfs_args_allocate(
@@ -138,24 +142,25 @@ STATIC __inline__ void
138xfs_set_inodeops( 142xfs_set_inodeops(
139 struct inode *inode) 143 struct inode *inode)
140{ 144{
141 vnode_t *vp = LINVFS_GET_VP(inode); 145 switch (inode->i_mode & S_IFMT) {
142 146 case S_IFREG:
143 if (vp->v_type == VNON) {
144 vn_mark_bad(vp);
145 } else if (S_ISREG(inode->i_mode)) {
146 inode->i_op = &linvfs_file_inode_operations; 147 inode->i_op = &linvfs_file_inode_operations;
147 inode->i_fop = &linvfs_file_operations; 148 inode->i_fop = &linvfs_file_operations;
148 inode->i_mapping->a_ops = &linvfs_aops; 149 inode->i_mapping->a_ops = &linvfs_aops;
149 } else if (S_ISDIR(inode->i_mode)) { 150 break;
151 case S_IFDIR:
150 inode->i_op = &linvfs_dir_inode_operations; 152 inode->i_op = &linvfs_dir_inode_operations;
151 inode->i_fop = &linvfs_dir_operations; 153 inode->i_fop = &linvfs_dir_operations;
152 } else if (S_ISLNK(inode->i_mode)) { 154 break;
155 case S_IFLNK:
153 inode->i_op = &linvfs_symlink_inode_operations; 156 inode->i_op = &linvfs_symlink_inode_operations;
154 if (inode->i_blocks) 157 if (inode->i_blocks)
155 inode->i_mapping->a_ops = &linvfs_aops; 158 inode->i_mapping->a_ops = &linvfs_aops;
156 } else { 159 break;
160 default:
157 inode->i_op = &linvfs_file_inode_operations; 161 inode->i_op = &linvfs_file_inode_operations;
158 init_special_inode(inode, inode->i_mode, inode->i_rdev); 162 init_special_inode(inode, inode->i_mode, inode->i_rdev);
163 break;
159 } 164 }
160} 165}
161 166
@@ -167,16 +172,23 @@ xfs_revalidate_inode(
167{ 172{
168 struct inode *inode = LINVFS_GET_IP(vp); 173 struct inode *inode = LINVFS_GET_IP(vp);
169 174
170 inode->i_mode = (ip->i_d.di_mode & MODEMASK) | VTTOIF(vp->v_type); 175 inode->i_mode = ip->i_d.di_mode;
171 inode->i_nlink = ip->i_d.di_nlink; 176 inode->i_nlink = ip->i_d.di_nlink;
172 inode->i_uid = ip->i_d.di_uid; 177 inode->i_uid = ip->i_d.di_uid;
173 inode->i_gid = ip->i_d.di_gid; 178 inode->i_gid = ip->i_d.di_gid;
174 if (((1 << vp->v_type) & ((1<<VBLK) | (1<<VCHR))) == 0) { 179
180 switch (inode->i_mode & S_IFMT) {
181 case S_IFBLK:
182 case S_IFCHR:
183 inode->i_rdev =
184 MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
185 sysv_minor(ip->i_df.if_u2.if_rdev));
186 break;
187 default:
175 inode->i_rdev = 0; 188 inode->i_rdev = 0;
176 } else { 189 break;
177 xfs_dev_t dev = ip->i_df.if_u2.if_rdev;
178 inode->i_rdev = MKDEV(sysv_major(dev) & 0x1ff, sysv_minor(dev));
179 } 190 }
191
180 inode->i_blksize = PAGE_CACHE_SIZE; 192 inode->i_blksize = PAGE_CACHE_SIZE;
181 inode->i_generation = ip->i_d.di_gen; 193 inode->i_generation = ip->i_d.di_gen;
182 i_size_write(inode, ip->i_d.di_size); 194 i_size_write(inode, ip->i_d.di_size);
@@ -231,7 +243,6 @@ xfs_initialize_vnode(
231 * finish our work. 243 * finish our work.
232 */ 244 */
233 if (ip->i_d.di_mode != 0 && unlock && (inode->i_state & I_NEW)) { 245 if (ip->i_d.di_mode != 0 && unlock && (inode->i_state & I_NEW)) {
234 vp->v_type = IFTOVT(ip->i_d.di_mode);
235 xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip); 246 xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip);
236 xfs_set_inodeops(inode); 247 xfs_set_inodeops(inode);
237 248
@@ -274,8 +285,7 @@ linvfs_alloc_inode(
274{ 285{
275 vnode_t *vp; 286 vnode_t *vp;
276 287
277 vp = (vnode_t *)kmem_cache_alloc(linvfs_inode_zone, 288 vp = kmem_cache_alloc(xfs_vnode_zone, kmem_flags_convert(KM_SLEEP));
278 kmem_flags_convert(KM_SLEEP));
279 if (!vp) 289 if (!vp)
280 return NULL; 290 return NULL;
281 return LINVFS_GET_IP(vp); 291 return LINVFS_GET_IP(vp);
@@ -285,11 +295,11 @@ STATIC void
285linvfs_destroy_inode( 295linvfs_destroy_inode(
286 struct inode *inode) 296 struct inode *inode)
287{ 297{
288 kmem_cache_free(linvfs_inode_zone, LINVFS_GET_VP(inode)); 298 kmem_zone_free(xfs_vnode_zone, LINVFS_GET_VP(inode));
289} 299}
290 300
291STATIC void 301STATIC void
292init_once( 302linvfs_inode_init_once(
293 void *data, 303 void *data,
294 kmem_cache_t *cachep, 304 kmem_cache_t *cachep,
295 unsigned long flags) 305 unsigned long flags)
@@ -302,21 +312,41 @@ init_once(
302} 312}
303 313
304STATIC int 314STATIC int
305init_inodecache( void ) 315linvfs_init_zones(void)
306{ 316{
307 linvfs_inode_zone = kmem_cache_create("linvfs_icache", 317 xfs_vnode_zone = kmem_cache_create("xfs_vnode",
308 sizeof(vnode_t), 0, SLAB_RECLAIM_ACCOUNT, 318 sizeof(vnode_t), 0, SLAB_RECLAIM_ACCOUNT,
309 init_once, NULL); 319 linvfs_inode_init_once, NULL);
310 if (linvfs_inode_zone == NULL) 320 if (!xfs_vnode_zone)
311 return -ENOMEM; 321 goto out;
322
323 xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
324 if (!xfs_ioend_zone)
325 goto out_destroy_vnode_zone;
326
327 xfs_ioend_pool = mempool_create(4 * MAX_BUF_PER_PAGE,
328 mempool_alloc_slab, mempool_free_slab,
329 xfs_ioend_zone);
330 if (!xfs_ioend_pool)
331 goto out_free_ioend_zone;
332
312 return 0; 333 return 0;
334
335
336 out_free_ioend_zone:
337 kmem_zone_destroy(xfs_ioend_zone);
338 out_destroy_vnode_zone:
339 kmem_zone_destroy(xfs_vnode_zone);
340 out:
341 return -ENOMEM;
313} 342}
314 343
315STATIC void 344STATIC void
316destroy_inodecache( void ) 345linvfs_destroy_zones(void)
317{ 346{
318 if (kmem_cache_destroy(linvfs_inode_zone)) 347 mempool_destroy(xfs_ioend_pool);
319 printk(KERN_WARNING "%s: cache still in use!\n", __FUNCTION__); 348 kmem_zone_destroy(xfs_vnode_zone);
349 kmem_zone_destroy(xfs_ioend_zone);
320} 350}
321 351
322/* 352/*
@@ -354,17 +384,38 @@ linvfs_clear_inode(
354 struct inode *inode) 384 struct inode *inode)
355{ 385{
356 vnode_t *vp = LINVFS_GET_VP(inode); 386 vnode_t *vp = LINVFS_GET_VP(inode);
387 int error, cache;
357 388
358 if (vp) { 389 vn_trace_entry(vp, "clear_inode", (inst_t *)__return_address);
359 vn_rele(vp); 390
360 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 391 XFS_STATS_INC(vn_rele);
361 /* 392 XFS_STATS_INC(vn_remove);
362 * Do all our cleanup, and remove this vnode. 393 XFS_STATS_INC(vn_reclaim);
363 */ 394 XFS_STATS_DEC(vn_active);
364 vn_remove(vp); 395
396 /*
397 * This can happen because xfs_iget_core calls xfs_idestroy if we
398 * find an inode with di_mode == 0 but without IGET_CREATE set.
399 */
400 if (vp->v_fbhv)
401 VOP_INACTIVE(vp, NULL, cache);
402
403 VN_LOCK(vp);
404 vp->v_flag &= ~VMODIFIED;
405 VN_UNLOCK(vp, 0);
406
407 if (vp->v_fbhv) {
408 VOP_RECLAIM(vp, error);
409 if (error)
410 panic("vn_purge: cannot reclaim");
365 } 411 }
366}
367 412
413 ASSERT(vp->v_fbhv == NULL);
414
415#ifdef XFS_VNODE_TRACE
416 ktrace_free(vp->v_trace);
417#endif
418}
368 419
369/* 420/*
370 * Enqueue a work item to be picked up by the vfs xfssyncd thread. 421 * Enqueue a work item to be picked up by the vfs xfssyncd thread.
@@ -466,25 +517,16 @@ xfssyncd(
466{ 517{
467 long timeleft; 518 long timeleft;
468 vfs_t *vfsp = (vfs_t *) arg; 519 vfs_t *vfsp = (vfs_t *) arg;
469 struct list_head tmp;
470 struct vfs_sync_work *work, *n; 520 struct vfs_sync_work *work, *n;
521 LIST_HEAD (tmp);
471 522
472 daemonize("xfssyncd");
473
474 vfsp->vfs_sync_work.w_vfs = vfsp;
475 vfsp->vfs_sync_work.w_syncer = vfs_sync_worker;
476 vfsp->vfs_sync_task = current;
477 wmb();
478 wake_up(&vfsp->vfs_wait_sync_task);
479
480 INIT_LIST_HEAD(&tmp);
481 timeleft = (xfs_syncd_centisecs * HZ) / 100; 523 timeleft = (xfs_syncd_centisecs * HZ) / 100;
482 for (;;) { 524 for (;;) {
483 set_current_state(TASK_INTERRUPTIBLE); 525 set_current_state(TASK_INTERRUPTIBLE);
484 timeleft = schedule_timeout(timeleft); 526 timeleft = schedule_timeout(timeleft);
485 /* swsusp */ 527 /* swsusp */
486 try_to_freeze(); 528 try_to_freeze();
487 if (vfsp->vfs_flag & VFS_UMOUNT) 529 if (kthread_should_stop())
488 break; 530 break;
489 531
490 spin_lock(&vfsp->vfs_sync_lock); 532 spin_lock(&vfsp->vfs_sync_lock);
@@ -513,10 +555,6 @@ xfssyncd(
513 } 555 }
514 } 556 }
515 557
516 vfsp->vfs_sync_task = NULL;
517 wmb();
518 wake_up(&vfsp->vfs_wait_sync_task);
519
520 return 0; 558 return 0;
521} 559}
522 560
@@ -524,13 +562,11 @@ STATIC int
524linvfs_start_syncd( 562linvfs_start_syncd(
525 vfs_t *vfsp) 563 vfs_t *vfsp)
526{ 564{
527 int pid; 565 vfsp->vfs_sync_work.w_syncer = vfs_sync_worker;
528 566 vfsp->vfs_sync_work.w_vfs = vfsp;
529 pid = kernel_thread(xfssyncd, (void *) vfsp, 567 vfsp->vfs_sync_task = kthread_run(xfssyncd, vfsp, "xfssyncd");
530 CLONE_VM | CLONE_FS | CLONE_FILES); 568 if (IS_ERR(vfsp->vfs_sync_task))
531 if (pid < 0) 569 return -PTR_ERR(vfsp->vfs_sync_task);
532 return -pid;
533 wait_event(vfsp->vfs_wait_sync_task, vfsp->vfs_sync_task);
534 return 0; 570 return 0;
535} 571}
536 572
@@ -538,11 +574,7 @@ STATIC void
538linvfs_stop_syncd( 574linvfs_stop_syncd(
539 vfs_t *vfsp) 575 vfs_t *vfsp)
540{ 576{
541 vfsp->vfs_flag |= VFS_UMOUNT; 577 kthread_stop(vfsp->vfs_sync_task);
542 wmb();
543
544 wake_up_process(vfsp->vfs_sync_task);
545 wait_event(vfsp->vfs_wait_sync_task, !vfsp->vfs_sync_task);
546} 578}
547 579
548STATIC void 580STATIC void
@@ -866,9 +898,9 @@ init_xfs_fs( void )
866 898
867 ktrace_init(64); 899 ktrace_init(64);
868 900
869 error = init_inodecache(); 901 error = linvfs_init_zones();
870 if (error < 0) 902 if (error < 0)
871 goto undo_inodecache; 903 goto undo_zones;
872 904
873 error = pagebuf_init(); 905 error = pagebuf_init();
874 if (error < 0) 906 if (error < 0)
@@ -889,9 +921,9 @@ undo_register:
889 pagebuf_terminate(); 921 pagebuf_terminate();
890 922
891undo_pagebuf: 923undo_pagebuf:
892 destroy_inodecache(); 924 linvfs_destroy_zones();
893 925
894undo_inodecache: 926undo_zones:
895 return error; 927 return error;
896} 928}
897 929
@@ -903,7 +935,7 @@ exit_xfs_fs( void )
903 unregister_filesystem(&xfs_fs_type); 935 unregister_filesystem(&xfs_fs_type);
904 xfs_cleanup(); 936 xfs_cleanup();
905 pagebuf_terminate(); 937 pagebuf_terminate();
906 destroy_inodecache(); 938 linvfs_destroy_zones();
907 ktrace_uninit(); 939 ktrace_uninit();
908} 940}
909 941
diff --git a/fs/xfs/linux-2.6/xfs_vfs.c b/fs/xfs/linux-2.6/xfs_vfs.c
index 669c61644959..34cc902ec119 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.c
+++ b/fs/xfs/linux-2.6/xfs_vfs.c
@@ -251,7 +251,6 @@ vfs_allocate( void )
251 bhv_head_init(VFS_BHVHEAD(vfsp), "vfs"); 251 bhv_head_init(VFS_BHVHEAD(vfsp), "vfs");
252 INIT_LIST_HEAD(&vfsp->vfs_sync_list); 252 INIT_LIST_HEAD(&vfsp->vfs_sync_list);
253 spin_lock_init(&vfsp->vfs_sync_lock); 253 spin_lock_init(&vfsp->vfs_sync_lock);
254 init_waitqueue_head(&vfsp->vfs_wait_sync_task);
255 init_waitqueue_head(&vfsp->vfs_wait_single_sync_task); 254 init_waitqueue_head(&vfsp->vfs_wait_single_sync_task);
256 return vfsp; 255 return vfsp;
257} 256}
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
index 7ee1f714e9ba..f0ab574fb47a 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.h
+++ b/fs/xfs/linux-2.6/xfs_vfs.h
@@ -65,7 +65,6 @@ typedef struct vfs {
65 spinlock_t vfs_sync_lock; /* work item list lock */ 65 spinlock_t vfs_sync_lock; /* work item list lock */
66 int vfs_sync_seq; /* sync thread generation no. */ 66 int vfs_sync_seq; /* sync thread generation no. */
67 wait_queue_head_t vfs_wait_single_sync_task; 67 wait_queue_head_t vfs_wait_single_sync_task;
68 wait_queue_head_t vfs_wait_sync_task;
69} vfs_t; 68} vfs_t;
70 69
71#define vfs_fbhv vfs_bh.bh_first /* 1st on vfs behavior chain */ 70#define vfs_fbhv vfs_bh.bh_first /* 1st on vfs behavior chain */
@@ -96,7 +95,6 @@ typedef enum {
96#define VFS_RDONLY 0x0001 /* read-only vfs */ 95#define VFS_RDONLY 0x0001 /* read-only vfs */
97#define VFS_GRPID 0x0002 /* group-ID assigned from directory */ 96#define VFS_GRPID 0x0002 /* group-ID assigned from directory */
98#define VFS_DMI 0x0004 /* filesystem has the DMI enabled */ 97#define VFS_DMI 0x0004 /* filesystem has the DMI enabled */
99#define VFS_UMOUNT 0x0008 /* unmount in progress */
100#define VFS_END 0x0008 /* max flag */ 98#define VFS_END 0x0008 /* max flag */
101 99
102#define SYNC_ATTR 0x0001 /* sync attributes */ 100#define SYNC_ATTR 0x0001 /* sync attributes */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index 250cad54e892..268f45bf6a9a 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -42,93 +42,33 @@ DEFINE_SPINLOCK(vnumber_lock);
42 */ 42 */
43#define NVSYNC 37 43#define NVSYNC 37
44#define vptosync(v) (&vsync[((unsigned long)v) % NVSYNC]) 44#define vptosync(v) (&vsync[((unsigned long)v) % NVSYNC])
45sv_t vsync[NVSYNC]; 45STATIC wait_queue_head_t vsync[NVSYNC];
46
47/*
48 * Translate stat(2) file types to vnode types and vice versa.
49 * Aware of numeric order of S_IFMT and vnode type values.
50 */
51enum vtype iftovt_tab[] = {
52 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
53 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON
54};
55
56u_short vttoif_tab[] = {
57 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO, 0, S_IFSOCK
58};
59 46
60 47
61void 48void
62vn_init(void) 49vn_init(void)
63{ 50{
64 register sv_t *svp; 51 int i;
65 register int i;
66 52
67 for (svp = vsync, i = 0; i < NVSYNC; i++, svp++) 53 for (i = 0; i < NVSYNC; i++)
68 init_sv(svp, SV_DEFAULT, "vsy", i); 54 init_waitqueue_head(&vsync[i]);
69} 55}
70 56
71/* 57void
72 * Clean a vnode of filesystem-specific data and prepare it for reuse. 58vn_iowait(
73 */
74STATIC int
75vn_reclaim(
76 struct vnode *vp) 59 struct vnode *vp)
77{ 60{
78 int error; 61 wait_queue_head_t *wq = vptosync(vp);
79 62
80 XFS_STATS_INC(vn_reclaim); 63 wait_event(*wq, (atomic_read(&vp->v_iocount) == 0));
81 vn_trace_entry(vp, "vn_reclaim", (inst_t *)__return_address);
82
83 /*
84 * Only make the VOP_RECLAIM call if there are behaviors
85 * to call.
86 */
87 if (vp->v_fbhv) {
88 VOP_RECLAIM(vp, error);
89 if (error)
90 return -error;
91 }
92 ASSERT(vp->v_fbhv == NULL);
93
94 VN_LOCK(vp);
95 vp->v_flag &= (VRECLM|VWAIT);
96 VN_UNLOCK(vp, 0);
97
98 vp->v_type = VNON;
99 vp->v_fbhv = NULL;
100
101#ifdef XFS_VNODE_TRACE
102 ktrace_free(vp->v_trace);
103 vp->v_trace = NULL;
104#endif
105
106 return 0;
107}
108
109STATIC void
110vn_wakeup(
111 struct vnode *vp)
112{
113 VN_LOCK(vp);
114 if (vp->v_flag & VWAIT)
115 sv_broadcast(vptosync(vp));
116 vp->v_flag &= ~(VRECLM|VWAIT|VMODIFIED);
117 VN_UNLOCK(vp, 0);
118} 64}
119 65
120int 66void
121vn_wait( 67vn_iowake(
122 struct vnode *vp) 68 struct vnode *vp)
123{ 69{
124 VN_LOCK(vp); 70 if (atomic_dec_and_test(&vp->v_iocount))
125 if (vp->v_flag & (VINACT | VRECLM)) { 71 wake_up(vptosync(vp));
126 vp->v_flag |= VWAIT;
127 sv_wait(vptosync(vp), PINOD, &vp->v_lock, 0);
128 return 1;
129 }
130 VN_UNLOCK(vp, 0);
131 return 0;
132} 72}
133 73
134struct vnode * 74struct vnode *
@@ -154,6 +94,8 @@ vn_initialize(
154 /* Initialize the first behavior and the behavior chain head. */ 94 /* Initialize the first behavior and the behavior chain head. */
155 vn_bhv_head_init(VN_BHV_HEAD(vp), "vnode"); 95 vn_bhv_head_init(VN_BHV_HEAD(vp), "vnode");
156 96
97 atomic_set(&vp->v_iocount, 0);
98
157#ifdef XFS_VNODE_TRACE 99#ifdef XFS_VNODE_TRACE
158 vp->v_trace = ktrace_alloc(VNODE_TRACE_SIZE, KM_SLEEP); 100 vp->v_trace = ktrace_alloc(VNODE_TRACE_SIZE, KM_SLEEP);
159#endif /* XFS_VNODE_TRACE */ 101#endif /* XFS_VNODE_TRACE */
@@ -163,30 +105,6 @@ vn_initialize(
163} 105}
164 106
165/* 107/*
166 * Get a reference on a vnode.
167 */
168vnode_t *
169vn_get(
170 struct vnode *vp,
171 vmap_t *vmap)
172{
173 struct inode *inode;
174
175 XFS_STATS_INC(vn_get);
176 inode = LINVFS_GET_IP(vp);
177 if (inode->i_state & I_FREEING)
178 return NULL;
179
180 inode = ilookup(vmap->v_vfsp->vfs_super, vmap->v_ino);
181 if (!inode) /* Inode not present */
182 return NULL;
183
184 vn_trace_exit(vp, "vn_get", (inst_t *)__return_address);
185
186 return vp;
187}
188
189/*
190 * Revalidate the Linux inode from the vattr. 108 * Revalidate the Linux inode from the vattr.
191 * Note: i_size _not_ updated; we must hold the inode 109 * Note: i_size _not_ updated; we must hold the inode
192 * semaphore when doing that - callers responsibility. 110 * semaphore when doing that - callers responsibility.
@@ -198,7 +116,7 @@ vn_revalidate_core(
198{ 116{
199 struct inode *inode = LINVFS_GET_IP(vp); 117 struct inode *inode = LINVFS_GET_IP(vp);
200 118
201 inode->i_mode = VTTOIF(vap->va_type) | vap->va_mode; 119 inode->i_mode = vap->va_mode;
202 inode->i_nlink = vap->va_nlink; 120 inode->i_nlink = vap->va_nlink;
203 inode->i_uid = vap->va_uid; 121 inode->i_uid = vap->va_uid;
204 inode->i_gid = vap->va_gid; 122 inode->i_gid = vap->va_gid;
@@ -247,71 +165,6 @@ vn_revalidate(
247} 165}
248 166
249/* 167/*
250 * purge a vnode from the cache
251 * At this point the vnode is guaranteed to have no references (vn_count == 0)
252 * The caller has to make sure that there are no ways someone could
253 * get a handle (via vn_get) on the vnode (usually done via a mount/vfs lock).
254 */
255void
256vn_purge(
257 struct vnode *vp,
258 vmap_t *vmap)
259{
260 vn_trace_entry(vp, "vn_purge", (inst_t *)__return_address);
261
262again:
263 /*
264 * Check whether vp has already been reclaimed since our caller
265 * sampled its version while holding a filesystem cache lock that
266 * its VOP_RECLAIM function acquires.
267 */
268 VN_LOCK(vp);
269 if (vp->v_number != vmap->v_number) {
270 VN_UNLOCK(vp, 0);
271 return;
272 }
273
274 /*
275 * If vp is being reclaimed or inactivated, wait until it is inert,
276 * then proceed. Can't assume that vnode is actually reclaimed
277 * just because the reclaimed flag is asserted -- a vn_alloc
278 * reclaim can fail.
279 */
280 if (vp->v_flag & (VINACT | VRECLM)) {
281 ASSERT(vn_count(vp) == 0);
282 vp->v_flag |= VWAIT;
283 sv_wait(vptosync(vp), PINOD, &vp->v_lock, 0);
284 goto again;
285 }
286
287 /*
288 * Another process could have raced in and gotten this vnode...
289 */
290 if (vn_count(vp) > 0) {
291 VN_UNLOCK(vp, 0);
292 return;
293 }
294
295 XFS_STATS_DEC(vn_active);
296 vp->v_flag |= VRECLM;
297 VN_UNLOCK(vp, 0);
298
299 /*
300 * Call VOP_RECLAIM and clean vp. The FSYNC_INVAL flag tells
301 * vp's filesystem to flush and invalidate all cached resources.
302 * When vn_reclaim returns, vp should have no private data,
303 * either in a system cache or attached to v_data.
304 */
305 if (vn_reclaim(vp) != 0)
306 panic("vn_purge: cannot reclaim");
307
308 /*
309 * Wakeup anyone waiting for vp to be reclaimed.
310 */
311 vn_wakeup(vp);
312}
313
314/*
315 * Add a reference to a referenced vnode. 168 * Add a reference to a referenced vnode.
316 */ 169 */
317struct vnode * 170struct vnode *
@@ -330,80 +183,6 @@ vn_hold(
330 return vp; 183 return vp;
331} 184}
332 185
333/*
334 * Call VOP_INACTIVE on last reference.
335 */
336void
337vn_rele(
338 struct vnode *vp)
339{
340 int vcnt;
341 int cache;
342
343 XFS_STATS_INC(vn_rele);
344
345 VN_LOCK(vp);
346
347 vn_trace_entry(vp, "vn_rele", (inst_t *)__return_address);
348 vcnt = vn_count(vp);
349
350 /*
351 * Since we always get called from put_inode we know
352 * that i_count won't be decremented after we
353 * return.
354 */
355 if (!vcnt) {
356 /*
357 * As soon as we turn this on, noone can find us in vn_get
358 * until we turn off VINACT or VRECLM
359 */
360 vp->v_flag |= VINACT;
361 VN_UNLOCK(vp, 0);
362
363 /*
364 * Do not make the VOP_INACTIVE call if there
365 * are no behaviors attached to the vnode to call.
366 */
367 if (vp->v_fbhv)
368 VOP_INACTIVE(vp, NULL, cache);
369
370 VN_LOCK(vp);
371 if (vp->v_flag & VWAIT)
372 sv_broadcast(vptosync(vp));
373
374 vp->v_flag &= ~(VINACT|VWAIT|VRECLM|VMODIFIED);
375 }
376
377 VN_UNLOCK(vp, 0);
378
379 vn_trace_exit(vp, "vn_rele", (inst_t *)__return_address);
380}
381
382/*
383 * Finish the removal of a vnode.
384 */
385void
386vn_remove(
387 struct vnode *vp)
388{
389 vmap_t vmap;
390
391 /* Make sure we don't do this to the same vnode twice */
392 if (!(vp->v_fbhv))
393 return;
394
395 XFS_STATS_INC(vn_remove);
396 vn_trace_exit(vp, "vn_remove", (inst_t *)__return_address);
397
398 /*
399 * After the following purge the vnode
400 * will no longer exist.
401 */
402 VMAP(vp, vmap);
403 vn_purge(vp, &vmap);
404}
405
406
407#ifdef XFS_VNODE_TRACE 186#ifdef XFS_VNODE_TRACE
408 187
409#define KTRACE_ENTER(vp, vk, s, line, ra) \ 188#define KTRACE_ENTER(vp, vk, s, line, ra) \
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index a6e57c647be4..35f306cebb87 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify it 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as 5 * under the terms of version 2 of the GNU General Public License as
@@ -65,10 +65,6 @@ struct vattr;
65struct xfs_iomap; 65struct xfs_iomap;
66struct attrlist_cursor_kern; 66struct attrlist_cursor_kern;
67 67
68/*
69 * Vnode types. VNON means no type.
70 */
71enum vtype { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VFIFO, VBAD, VSOCK };
72 68
73typedef xfs_ino_t vnumber_t; 69typedef xfs_ino_t vnumber_t;
74typedef struct dentry vname_t; 70typedef struct dentry vname_t;
@@ -77,15 +73,14 @@ typedef bhv_head_t vn_bhv_head_t;
77/* 73/*
78 * MP locking protocols: 74 * MP locking protocols:
79 * v_flag, v_vfsp VN_LOCK/VN_UNLOCK 75 * v_flag, v_vfsp VN_LOCK/VN_UNLOCK
80 * v_type read-only or fs-dependent
81 */ 76 */
82typedef struct vnode { 77typedef struct vnode {
83 __u32 v_flag; /* vnode flags (see below) */ 78 __u32 v_flag; /* vnode flags (see below) */
84 enum vtype v_type; /* vnode type */
85 struct vfs *v_vfsp; /* ptr to containing VFS */ 79 struct vfs *v_vfsp; /* ptr to containing VFS */
86 vnumber_t v_number; /* in-core vnode number */ 80 vnumber_t v_number; /* in-core vnode number */
87 vn_bhv_head_t v_bh; /* behavior head */ 81 vn_bhv_head_t v_bh; /* behavior head */
88 spinlock_t v_lock; /* VN_LOCK/VN_UNLOCK */ 82 spinlock_t v_lock; /* VN_LOCK/VN_UNLOCK */
83 atomic_t v_iocount; /* outstanding I/O count */
89#ifdef XFS_VNODE_TRACE 84#ifdef XFS_VNODE_TRACE
90 struct ktrace *v_trace; /* trace header structure */ 85 struct ktrace *v_trace; /* trace header structure */
91#endif 86#endif
@@ -93,6 +88,12 @@ typedef struct vnode {
93 /* inode MUST be last */ 88 /* inode MUST be last */
94} vnode_t; 89} vnode_t;
95 90
91#define VN_ISLNK(vp) S_ISLNK((vp)->v_inode.i_mode)
92#define VN_ISREG(vp) S_ISREG((vp)->v_inode.i_mode)
93#define VN_ISDIR(vp) S_ISDIR((vp)->v_inode.i_mode)
94#define VN_ISCHR(vp) S_ISCHR((vp)->v_inode.i_mode)
95#define VN_ISBLK(vp) S_ISBLK((vp)->v_inode.i_mode)
96
96#define v_fbhv v_bh.bh_first /* first behavior */ 97#define v_fbhv v_bh.bh_first /* first behavior */
97#define v_fops v_bh.bh_first->bd_ops /* first behavior ops */ 98#define v_fops v_bh.bh_first->bd_ops /* first behavior ops */
98 99
@@ -133,22 +134,8 @@ typedef enum {
133#define LINVFS_GET_IP(vp) (&(vp)->v_inode) 134#define LINVFS_GET_IP(vp) (&(vp)->v_inode)
134 135
135/* 136/*
136 * Convert between vnode types and inode formats (since POSIX.1
137 * defines mode word of stat structure in terms of inode formats).
138 */
139extern enum vtype iftovt_tab[];
140extern u_short vttoif_tab[];
141#define IFTOVT(mode) (iftovt_tab[((mode) & S_IFMT) >> 12])
142#define VTTOIF(indx) (vttoif_tab[(int)(indx)])
143#define MAKEIMODE(indx, mode) (int)(VTTOIF(indx) | (mode))
144
145
146/*
147 * Vnode flags. 137 * Vnode flags.
148 */ 138 */
149#define VINACT 0x1 /* vnode is being inactivated */
150#define VRECLM 0x2 /* vnode is being reclaimed */
151#define VWAIT 0x4 /* waiting for VINACT/VRECLM to end */
152#define VMODIFIED 0x8 /* XFS inode state possibly differs */ 139#define VMODIFIED 0x8 /* XFS inode state possibly differs */
153 /* to the Linux inode state. */ 140 /* to the Linux inode state. */
154 141
@@ -408,7 +395,6 @@ typedef struct vnodeops {
408 */ 395 */
409typedef struct vattr { 396typedef struct vattr {
410 int va_mask; /* bit-mask of attributes present */ 397 int va_mask; /* bit-mask of attributes present */
411 enum vtype va_type; /* vnode type (for create) */
412 mode_t va_mode; /* file access mode and type */ 398 mode_t va_mode; /* file access mode and type */
413 xfs_nlink_t va_nlink; /* number of references to file */ 399 xfs_nlink_t va_nlink; /* number of references to file */
414 uid_t va_uid; /* owner user id */ 400 uid_t va_uid; /* owner user id */
@@ -498,27 +484,12 @@ typedef struct vattr {
498 * Check whether mandatory file locking is enabled. 484 * Check whether mandatory file locking is enabled.
499 */ 485 */
500#define MANDLOCK(vp, mode) \ 486#define MANDLOCK(vp, mode) \
501 ((vp)->v_type == VREG && ((mode) & (VSGID|(VEXEC>>3))) == VSGID) 487 (VN_ISREG(vp) && ((mode) & (VSGID|(VEXEC>>3))) == VSGID)
502 488
503extern void vn_init(void); 489extern void vn_init(void);
504extern int vn_wait(struct vnode *);
505extern vnode_t *vn_initialize(struct inode *); 490extern vnode_t *vn_initialize(struct inode *);
506 491
507/* 492/*
508 * Acquiring and invalidating vnodes:
509 *
510 * if (vn_get(vp, version, 0))
511 * ...;
512 * vn_purge(vp, version);
513 *
514 * vn_get and vn_purge must be called with vmap_t arguments, sampled
515 * while a lock that the vnode's VOP_RECLAIM function acquires is
516 * held, to ensure that the vnode sampled with the lock held isn't
517 * recycled (VOP_RECLAIMed) or deallocated between the release of the lock
518 * and the subsequent vn_get or vn_purge.
519 */
520
521/*
522 * vnode_map structures _must_ match vn_epoch and vnode structure sizes. 493 * vnode_map structures _must_ match vn_epoch and vnode structure sizes.
523 */ 494 */
524typedef struct vnode_map { 495typedef struct vnode_map {
@@ -531,11 +502,11 @@ typedef struct vnode_map {
531 (vmap).v_number = (vp)->v_number, \ 502 (vmap).v_number = (vp)->v_number, \
532 (vmap).v_ino = (vp)->v_inode.i_ino; } 503 (vmap).v_ino = (vp)->v_inode.i_ino; }
533 504
534extern void vn_purge(struct vnode *, vmap_t *);
535extern vnode_t *vn_get(struct vnode *, vmap_t *);
536extern int vn_revalidate(struct vnode *); 505extern int vn_revalidate(struct vnode *);
537extern void vn_revalidate_core(struct vnode *, vattr_t *); 506extern void vn_revalidate_core(struct vnode *, vattr_t *);
538extern void vn_remove(struct vnode *); 507
508extern void vn_iowait(struct vnode *vp);
509extern void vn_iowake(struct vnode *vp);
539 510
540static inline int vn_count(struct vnode *vp) 511static inline int vn_count(struct vnode *vp)
541{ 512{
@@ -546,7 +517,6 @@ static inline int vn_count(struct vnode *vp)
546 * Vnode reference counting functions (and macros for compatibility). 517 * Vnode reference counting functions (and macros for compatibility).
547 */ 518 */
548extern vnode_t *vn_hold(struct vnode *); 519extern vnode_t *vn_hold(struct vnode *);
549extern void vn_rele(struct vnode *);
550 520
551#if defined(XFS_VNODE_TRACE) 521#if defined(XFS_VNODE_TRACE)
552#define VN_HOLD(vp) \ 522#define VN_HOLD(vp) \
@@ -560,6 +530,12 @@ extern void vn_rele(struct vnode *);
560#define VN_RELE(vp) (iput(LINVFS_GET_IP(vp))) 530#define VN_RELE(vp) (iput(LINVFS_GET_IP(vp)))
561#endif 531#endif
562 532
533static inline struct vnode *vn_grab(struct vnode *vp)
534{
535 struct inode *inode = igrab(LINVFS_GET_IP(vp));
536 return inode ? LINVFS_GET_VP(inode) : NULL;
537}
538
563/* 539/*
564 * Vname handling macros. 540 * Vname handling macros.
565 */ 541 */
diff --git a/fs/xfs/quota/Makefile b/fs/xfs/quota/Makefile
new file mode 100644
index 000000000000..7a4f725b2824
--- /dev/null
+++ b/fs/xfs/quota/Makefile
@@ -0,0 +1 @@
include $(TOPDIR)/fs/xfs/quota/Makefile-linux-$(VERSION).$(PATCHLEVEL)
diff --git a/fs/xfs/quota/Makefile-linux-2.6 b/fs/xfs/quota/Makefile-linux-2.6
new file mode 100644
index 000000000000..8b7b676718b9
--- /dev/null
+++ b/fs/xfs/quota/Makefile-linux-2.6
@@ -0,0 +1,53 @@
1#
2# Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved.
3#
4# This program is free software; you can redistribute it and/or modify it
5# under the terms of version 2 of the GNU General Public License as
6# published by the Free Software Foundation.
7#
8# This program is distributed in the hope that it would be useful, but
9# WITHOUT ANY WARRANTY; without even the implied warranty of
10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11#
12# Further, this software is distributed without any warranty that it is
13# free of the rightful claim of any third person regarding infringement
14# or the like. Any license provided herein, whether implied or
15# otherwise, applies only to this software file. Patent licenses, if
16# any, provided herein do not apply to combinations of this program with
17# other software, or any other product whatsoever.
18#
19# You should have received a copy of the GNU General Public License along
20# with this program; if not, write the Free Software Foundation, Inc., 59
21# Temple Place - Suite 330, Boston MA 02111-1307, USA.
22#
23# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24# Mountain View, CA 94043, or:
25#
26# http://www.sgi.com
27#
28# For further information regarding this notice, see:
29#
30# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31#
32
33EXTRA_CFLAGS += -I $(TOPDIR)/fs/xfs -I $(TOPDIR)/fs/xfs/linux-2.6
34
35ifeq ($(CONFIG_XFS_DEBUG),y)
36 EXTRA_CFLAGS += -g -DDEBUG
37 #EXTRA_CFLAGS += -DQUOTADEBUG
38endif
39ifeq ($(CONFIG_XFS_TRACE),y)
40 EXTRA_CFLAGS += -DXFS_DQUOT_TRACE
41 EXTRA_CFLAGS += -DXFS_VNODE_TRACE
42endif
43
44obj-$(CONFIG_XFS_QUOTA) += xfs_quota.o
45
46xfs_quota-y += xfs_dquot.o \
47 xfs_dquot_item.o \
48 xfs_trans_dquot.o \
49 xfs_qm_syscalls.o \
50 xfs_qm_bhv.o \
51 xfs_qm.o
52
53xfs_quota-$(CONFIG_PROC_FS) += xfs_qm_stats.o
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 46ce1e3ce1d6..e2e8d35fa4d0 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -421,7 +421,7 @@ xfs_qm_init_dquot_blk(
421 */ 421 */
422STATIC int 422STATIC int
423xfs_qm_dqalloc( 423xfs_qm_dqalloc(
424 xfs_trans_t *tp, 424 xfs_trans_t **tpp,
425 xfs_mount_t *mp, 425 xfs_mount_t *mp,
426 xfs_dquot_t *dqp, 426 xfs_dquot_t *dqp,
427 xfs_inode_t *quotip, 427 xfs_inode_t *quotip,
@@ -433,6 +433,7 @@ xfs_qm_dqalloc(
433 xfs_bmbt_irec_t map; 433 xfs_bmbt_irec_t map;
434 int nmaps, error, committed; 434 int nmaps, error, committed;
435 xfs_buf_t *bp; 435 xfs_buf_t *bp;
436 xfs_trans_t *tp = *tpp;
436 437
437 ASSERT(tp != NULL); 438 ASSERT(tp != NULL);
438 xfs_dqtrace_entry(dqp, "DQALLOC"); 439 xfs_dqtrace_entry(dqp, "DQALLOC");
@@ -492,10 +493,32 @@ xfs_qm_dqalloc(
492 xfs_qm_init_dquot_blk(tp, mp, INT_GET(dqp->q_core.d_id, ARCH_CONVERT), 493 xfs_qm_init_dquot_blk(tp, mp, INT_GET(dqp->q_core.d_id, ARCH_CONVERT),
493 dqp->dq_flags & XFS_DQ_ALLTYPES, bp); 494 dqp->dq_flags & XFS_DQ_ALLTYPES, bp);
494 495
495 if ((error = xfs_bmap_finish(&tp, &flist, firstblock, &committed))) { 496 /*
497 * xfs_bmap_finish() may commit the current transaction and
498 * start a second transaction if the freelist is not empty.
499 *
500 * Since we still want to modify this buffer, we need to
501 * ensure that the buffer is not released on commit of
502 * the first transaction and ensure the buffer is added to the
503 * second transaction.
504 *
505 * If there is only one transaction then don't stop the buffer
506 * from being released when it commits later on.
507 */
508
509 xfs_trans_bhold(tp, bp);
510
511 if ((error = xfs_bmap_finish(tpp, &flist, firstblock, &committed))) {
496 goto error1; 512 goto error1;
497 } 513 }
498 514
515 if (committed) {
516 tp = *tpp;
517 xfs_trans_bjoin(tp, bp);
518 } else {
519 xfs_trans_bhold_release(tp, bp);
520 }
521
499 *O_bpp = bp; 522 *O_bpp = bp;
500 return 0; 523 return 0;
501 524
@@ -514,7 +537,7 @@ xfs_qm_dqalloc(
514 */ 537 */
515STATIC int 538STATIC int
516xfs_qm_dqtobp( 539xfs_qm_dqtobp(
517 xfs_trans_t *tp, 540 xfs_trans_t **tpp,
518 xfs_dquot_t *dqp, 541 xfs_dquot_t *dqp,
519 xfs_disk_dquot_t **O_ddpp, 542 xfs_disk_dquot_t **O_ddpp,
520 xfs_buf_t **O_bpp, 543 xfs_buf_t **O_bpp,
@@ -528,6 +551,7 @@ xfs_qm_dqtobp(
528 xfs_disk_dquot_t *ddq; 551 xfs_disk_dquot_t *ddq;
529 xfs_dqid_t id; 552 xfs_dqid_t id;
530 boolean_t newdquot; 553 boolean_t newdquot;
554 xfs_trans_t *tp = (tpp ? *tpp : NULL);
531 555
532 mp = dqp->q_mount; 556 mp = dqp->q_mount;
533 id = INT_GET(dqp->q_core.d_id, ARCH_CONVERT); 557 id = INT_GET(dqp->q_core.d_id, ARCH_CONVERT);
@@ -579,9 +603,10 @@ xfs_qm_dqtobp(
579 return (ENOENT); 603 return (ENOENT);
580 604
581 ASSERT(tp); 605 ASSERT(tp);
582 if ((error = xfs_qm_dqalloc(tp, mp, dqp, quotip, 606 if ((error = xfs_qm_dqalloc(tpp, mp, dqp, quotip,
583 dqp->q_fileoffset, &bp))) 607 dqp->q_fileoffset, &bp)))
584 return (error); 608 return (error);
609 tp = *tpp;
585 newdquot = B_TRUE; 610 newdquot = B_TRUE;
586 } else { 611 } else {
587 /* 612 /*
@@ -645,7 +670,7 @@ xfs_qm_dqtobp(
645/* ARGSUSED */ 670/* ARGSUSED */
646STATIC int 671STATIC int
647xfs_qm_dqread( 672xfs_qm_dqread(
648 xfs_trans_t *tp, 673 xfs_trans_t **tpp,
649 xfs_dqid_t id, 674 xfs_dqid_t id,
650 xfs_dquot_t *dqp, /* dquot to get filled in */ 675 xfs_dquot_t *dqp, /* dquot to get filled in */
651 uint flags) 676 uint flags)
@@ -653,15 +678,19 @@ xfs_qm_dqread(
653 xfs_disk_dquot_t *ddqp; 678 xfs_disk_dquot_t *ddqp;
654 xfs_buf_t *bp; 679 xfs_buf_t *bp;
655 int error; 680 int error;
681 xfs_trans_t *tp;
682
683 ASSERT(tpp);
656 684
657 /* 685 /*
658 * get a pointer to the on-disk dquot and the buffer containing it 686 * get a pointer to the on-disk dquot and the buffer containing it
659 * dqp already knows its own type (GROUP/USER). 687 * dqp already knows its own type (GROUP/USER).
660 */ 688 */
661 xfs_dqtrace_entry(dqp, "DQREAD"); 689 xfs_dqtrace_entry(dqp, "DQREAD");
662 if ((error = xfs_qm_dqtobp(tp, dqp, &ddqp, &bp, flags))) { 690 if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) {
663 return (error); 691 return (error);
664 } 692 }
693 tp = *tpp;
665 694
666 /* copy everything from disk dquot to the incore dquot */ 695 /* copy everything from disk dquot to the incore dquot */
667 memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t)); 696 memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
@@ -740,7 +769,7 @@ xfs_qm_idtodq(
740 * Read it from disk; xfs_dqread() takes care of 769 * Read it from disk; xfs_dqread() takes care of
741 * all the necessary initialization of dquot's fields (locks, etc) 770 * all the necessary initialization of dquot's fields (locks, etc)
742 */ 771 */
743 if ((error = xfs_qm_dqread(tp, id, dqp, flags))) { 772 if ((error = xfs_qm_dqread(&tp, id, dqp, flags))) {
744 /* 773 /*
745 * This can happen if quotas got turned off (ESRCH), 774 * This can happen if quotas got turned off (ESRCH),
746 * or if the dquot didn't exist on disk and we ask to 775 * or if the dquot didn't exist on disk and we ask to
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index 39175103c8e0..8ebc87176c78 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify it 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as 5 * under the terms of version 2 of the GNU General Public License as
@@ -113,20 +113,6 @@ typedef struct xfs_dquot {
113 113
114#define XFS_DQHOLD(dqp) ((dqp)->q_nrefs++) 114#define XFS_DQHOLD(dqp) ((dqp)->q_nrefs++)
115 115
116/*
117 * Quota Accounting/Enforcement flags
118 */
119#define XFS_ALL_QUOTA_ACCT \
120 (XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT)
121#define XFS_ALL_QUOTA_ENFD (XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD)
122#define XFS_ALL_QUOTA_CHKD (XFS_UQUOTA_CHKD | XFS_OQUOTA_CHKD)
123
124#define XFS_IS_QUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT)
125#define XFS_IS_QUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ENFD)
126#define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT)
127#define XFS_IS_PQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_PQUOTA_ACCT)
128#define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT)
129
130#ifdef DEBUG 116#ifdef DEBUG
131static inline int 117static inline int
132XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp) 118XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp)
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index f5271b7b1e84..e74eaa7dd1bc 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -509,6 +509,7 @@ xfs_qm_qoff_logitem_format(xfs_qoff_logitem_t *qf,
509 509
510 log_vector->i_addr = (xfs_caddr_t)&(qf->qql_format); 510 log_vector->i_addr = (xfs_caddr_t)&(qf->qql_format);
511 log_vector->i_len = sizeof(xfs_qoff_logitem_t); 511 log_vector->i_len = sizeof(xfs_qoff_logitem_t);
512 XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_QUOTAOFF);
512 qf->qql_format.qf_size = 1; 513 qf->qql_format.qf_size = 1;
513} 514}
514 515
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index f665ca8f9e96..efde16e0a913 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify it 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as 5 * under the terms of version 2 of the GNU General Public License as
@@ -365,16 +365,6 @@ xfs_qm_mount_quotas(
365 int error = 0; 365 int error = 0;
366 uint sbf; 366 uint sbf;
367 367
368 /*
369 * If a file system had quotas running earlier, but decided to
370 * mount without -o uquota/pquota/gquota options, revoke the
371 * quotachecked license, and bail out.
372 */
373 if (! XFS_IS_QUOTA_ON(mp) &&
374 (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT)) {
375 mp->m_qflags = 0;
376 goto write_changes;
377 }
378 368
379 /* 369 /*
380 * If quotas on realtime volumes is not supported, we disable 370 * If quotas on realtime volumes is not supported, we disable
@@ -388,11 +378,8 @@ xfs_qm_mount_quotas(
388 goto write_changes; 378 goto write_changes;
389 } 379 }
390 380
391#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
392 cmn_err(CE_NOTE, "Attempting to turn on disk quotas.");
393#endif
394
395 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 381 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
382
396 /* 383 /*
397 * Allocate the quotainfo structure inside the mount struct, and 384 * Allocate the quotainfo structure inside the mount struct, and
398 * create quotainode(s), and change/rev superblock if necessary. 385 * create quotainode(s), and change/rev superblock if necessary.
@@ -410,19 +397,14 @@ xfs_qm_mount_quotas(
410 */ 397 */
411 if (XFS_QM_NEED_QUOTACHECK(mp) && 398 if (XFS_QM_NEED_QUOTACHECK(mp) &&
412 !(mfsi_flags & XFS_MFSI_NO_QUOTACHECK)) { 399 !(mfsi_flags & XFS_MFSI_NO_QUOTACHECK)) {
413#ifdef DEBUG
414 cmn_err(CE_NOTE, "Doing a quotacheck. Please wait.");
415#endif
416 if ((error = xfs_qm_quotacheck(mp))) { 400 if ((error = xfs_qm_quotacheck(mp))) {
417 /* Quotacheck has failed and quotas have 401 /* Quotacheck has failed and quotas have
418 * been disabled. 402 * been disabled.
419 */ 403 */
420 return XFS_ERROR(error); 404 return XFS_ERROR(error);
421 } 405 }
422#ifdef DEBUG
423 cmn_err(CE_NOTE, "Done quotacheck.");
424#endif
425 } 406 }
407
426 write_changes: 408 write_changes:
427 /* 409 /*
428 * We actually don't have to acquire the SB_LOCK at all. 410 * We actually don't have to acquire the SB_LOCK at all.
@@ -2010,7 +1992,7 @@ xfs_qm_quotacheck(
2010 ASSERT(mp->m_quotainfo != NULL); 1992 ASSERT(mp->m_quotainfo != NULL);
2011 ASSERT(xfs_Gqm != NULL); 1993 ASSERT(xfs_Gqm != NULL);
2012 xfs_qm_destroy_quotainfo(mp); 1994 xfs_qm_destroy_quotainfo(mp);
2013 xfs_mount_reset_sbqflags(mp); 1995 (void)xfs_mount_reset_sbqflags(mp);
2014 } else { 1996 } else {
2015 cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname); 1997 cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname);
2016 } 1998 }
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index b03eecf3b6cb..0b00b3c67015 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -184,8 +184,6 @@ typedef struct xfs_dquot_acct {
184#define XFS_QM_HOLD(xqm) ((xqm)->qm_nrefs++) 184#define XFS_QM_HOLD(xqm) ((xqm)->qm_nrefs++)
185#define XFS_QM_RELE(xqm) ((xqm)->qm_nrefs--) 185#define XFS_QM_RELE(xqm) ((xqm)->qm_nrefs--)
186 186
187extern void xfs_mount_reset_sbqflags(xfs_mount_t *);
188
189extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); 187extern void xfs_qm_destroy_quotainfo(xfs_mount_t *);
190extern int xfs_qm_mount_quotas(xfs_mount_t *, int); 188extern int xfs_qm_mount_quotas(xfs_mount_t *, int);
191extern void xfs_qm_mount_quotainit(xfs_mount_t *, uint); 189extern void xfs_qm_mount_quotainit(xfs_mount_t *, uint);
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index dc3c37a1e158..8890a18a99d8 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify it 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as 5 * under the terms of version 2 of the GNU General Public License as
@@ -229,48 +229,6 @@ xfs_qm_syncall(
229 return error; 229 return error;
230} 230}
231 231
232/*
233 * Clear the quotaflags in memory and in the superblock.
234 */
235void
236xfs_mount_reset_sbqflags(
237 xfs_mount_t *mp)
238{
239 xfs_trans_t *tp;
240 unsigned long s;
241
242 mp->m_qflags = 0;
243 /*
244 * It is OK to look at sb_qflags here in mount path,
245 * without SB_LOCK.
246 */
247 if (mp->m_sb.sb_qflags == 0)
248 return;
249 s = XFS_SB_LOCK(mp);
250 mp->m_sb.sb_qflags = 0;
251 XFS_SB_UNLOCK(mp, s);
252
253 /*
254 * if the fs is readonly, let the incore superblock run
255 * with quotas off but don't flush the update out to disk
256 */
257 if (XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY)
258 return;
259#ifdef QUOTADEBUG
260 xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes");
261#endif
262 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
263 if (xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
264 XFS_DEFAULT_LOG_COUNT)) {
265 xfs_trans_cancel(tp, 0);
266 xfs_fs_cmn_err(CE_ALERT, mp,
267 "xfs_mount_reset_sbqflags: Superblock update failed!");
268 return;
269 }
270 xfs_mod_sb(tp, XFS_SB_QFLAGS);
271 xfs_trans_commit(tp, 0, NULL);
272}
273
274STATIC int 232STATIC int
275xfs_qm_newmount( 233xfs_qm_newmount(
276 xfs_mount_t *mp, 234 xfs_mount_t *mp,
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 68e98962dbef..15e02e8a9d4f 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -1053,7 +1053,6 @@ xfs_qm_dqrele_all_inodes(
1053 struct xfs_mount *mp, 1053 struct xfs_mount *mp,
1054 uint flags) 1054 uint flags)
1055{ 1055{
1056 vmap_t vmap;
1057 xfs_inode_t *ip, *topino; 1056 xfs_inode_t *ip, *topino;
1058 uint ireclaims; 1057 uint ireclaims;
1059 vnode_t *vp; 1058 vnode_t *vp;
@@ -1061,8 +1060,8 @@ xfs_qm_dqrele_all_inodes(
1061 1060
1062 ASSERT(mp->m_quotainfo); 1061 ASSERT(mp->m_quotainfo);
1063 1062
1064again:
1065 XFS_MOUNT_ILOCK(mp); 1063 XFS_MOUNT_ILOCK(mp);
1064again:
1066 ip = mp->m_inodes; 1065 ip = mp->m_inodes;
1067 if (ip == NULL) { 1066 if (ip == NULL) {
1068 XFS_MOUNT_IUNLOCK(mp); 1067 XFS_MOUNT_IUNLOCK(mp);
@@ -1090,18 +1089,14 @@ again:
1090 } 1089 }
1091 vnode_refd = B_FALSE; 1090 vnode_refd = B_FALSE;
1092 if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) { 1091 if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) {
1093 /*
1094 * Sample vp mapping while holding the mplock, lest
1095 * we come across a non-existent vnode.
1096 */
1097 VMAP(vp, vmap);
1098 ireclaims = mp->m_ireclaims; 1092 ireclaims = mp->m_ireclaims;
1099 topino = mp->m_inodes; 1093 topino = mp->m_inodes;
1100 XFS_MOUNT_IUNLOCK(mp); 1094 vp = vn_grab(vp);
1095 if (!vp)
1096 goto again;
1101 1097
1098 XFS_MOUNT_IUNLOCK(mp);
1102 /* XXX restart limit ? */ 1099 /* XXX restart limit ? */
1103 if ( ! (vp = vn_get(vp, &vmap)))
1104 goto again;
1105 xfs_ilock(ip, XFS_ILOCK_EXCL); 1100 xfs_ilock(ip, XFS_ILOCK_EXCL);
1106 vnode_refd = B_TRUE; 1101 vnode_refd = B_TRUE;
1107 } else { 1102 } else {
@@ -1137,7 +1132,6 @@ again:
1137 */ 1132 */
1138 if (topino != mp->m_inodes || mp->m_ireclaims != ireclaims) { 1133 if (topino != mp->m_inodes || mp->m_ireclaims != ireclaims) {
1139 /* XXX use a sentinel */ 1134 /* XXX use a sentinel */
1140 XFS_MOUNT_IUNLOCK(mp);
1141 goto again; 1135 goto again;
1142 } 1136 }
1143 ip = ip->i_mnext; 1137 ip = ip->i_mnext;
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index 4ed7b6928cd7..4e1a5ec22fa3 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -31,6 +31,7 @@
31 */ 31 */
32 32
33#include "debug.h" 33#include "debug.h"
34#include "spin.h"
34 35
35#include <asm/page.h> 36#include <asm/page.h>
36#include <linux/sched.h> 37#include <linux/sched.h>
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 8d01dce8c532..92fd1d67f878 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -85,7 +85,7 @@ xfs_acl_vhasacl_default(
85{ 85{
86 int error; 86 int error;
87 87
88 if (vp->v_type != VDIR) 88 if (!VN_ISDIR(vp))
89 return 0; 89 return 0;
90 xfs_acl_get_attr(vp, NULL, _ACL_TYPE_DEFAULT, ATTR_KERNOVAL, &error); 90 xfs_acl_get_attr(vp, NULL, _ACL_TYPE_DEFAULT, ATTR_KERNOVAL, &error);
91 return (error == 0); 91 return (error == 0);
@@ -389,7 +389,7 @@ xfs_acl_allow_set(
389 389
390 if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND)) 390 if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND))
391 return EPERM; 391 return EPERM;
392 if (kind == _ACL_TYPE_DEFAULT && vp->v_type != VDIR) 392 if (kind == _ACL_TYPE_DEFAULT && !VN_ISDIR(vp))
393 return ENOTDIR; 393 return ENOTDIR;
394 if (vp->v_vfsp->vfs_flag & VFS_RDONLY) 394 if (vp->v_vfsp->vfs_flag & VFS_RDONLY)
395 return EROFS; 395 return EROFS;
@@ -750,7 +750,7 @@ xfs_acl_inherit(
750 * If the new file is a directory, its default ACL is a copy of 750 * If the new file is a directory, its default ACL is a copy of
751 * the containing directory's default ACL. 751 * the containing directory's default ACL.
752 */ 752 */
753 if (vp->v_type == VDIR) 753 if (VN_ISDIR(vp))
754 xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error); 754 xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error);
755 if (!error && !basicperms) 755 if (!error && !basicperms)
756 xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error); 756 xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error);
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 6f5d283888aa..3e76def1283d 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -4754,10 +4754,20 @@ xfs_bmapi(
4754 error = xfs_mod_incore_sb(mp, 4754 error = xfs_mod_incore_sb(mp,
4755 XFS_SBS_FDBLOCKS, 4755 XFS_SBS_FDBLOCKS,
4756 -(alen), rsvd); 4756 -(alen), rsvd);
4757 if (!error) 4757 if (!error) {
4758 error = xfs_mod_incore_sb(mp, 4758 error = xfs_mod_incore_sb(mp,
4759 XFS_SBS_FDBLOCKS, 4759 XFS_SBS_FDBLOCKS,
4760 -(indlen), rsvd); 4760 -(indlen), rsvd);
4761 if (error && rt) {
4762 xfs_mod_incore_sb(ip->i_mount,
4763 XFS_SBS_FREXTENTS,
4764 extsz, rsvd);
4765 } else if (error) {
4766 xfs_mod_incore_sb(ip->i_mount,
4767 XFS_SBS_FDBLOCKS,
4768 alen, rsvd);
4769 }
4770 }
4761 4771
4762 if (error) { 4772 if (error) {
4763 if (XFS_IS_QUOTA_ON(ip->i_mount)) 4773 if (XFS_IS_QUOTA_ON(ip->i_mount))
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 30b8285ad476..a264657acfd9 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -274,6 +274,7 @@ xfs_buf_item_format(
274 ((bip->bli_format.blf_map_size - 1) * sizeof(uint))); 274 ((bip->bli_format.blf_map_size - 1) * sizeof(uint)));
275 vecp->i_addr = (xfs_caddr_t)&bip->bli_format; 275 vecp->i_addr = (xfs_caddr_t)&bip->bli_format;
276 vecp->i_len = base_size; 276 vecp->i_len = base_size;
277 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BFORMAT);
277 vecp++; 278 vecp++;
278 nvecs = 1; 279 nvecs = 1;
279 280
@@ -320,12 +321,14 @@ xfs_buf_item_format(
320 buffer_offset = first_bit * XFS_BLI_CHUNK; 321 buffer_offset = first_bit * XFS_BLI_CHUNK;
321 vecp->i_addr = xfs_buf_offset(bp, buffer_offset); 322 vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
322 vecp->i_len = nbits * XFS_BLI_CHUNK; 323 vecp->i_len = nbits * XFS_BLI_CHUNK;
324 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK);
323 nvecs++; 325 nvecs++;
324 break; 326 break;
325 } else if (next_bit != last_bit + 1) { 327 } else if (next_bit != last_bit + 1) {
326 buffer_offset = first_bit * XFS_BLI_CHUNK; 328 buffer_offset = first_bit * XFS_BLI_CHUNK;
327 vecp->i_addr = xfs_buf_offset(bp, buffer_offset); 329 vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
328 vecp->i_len = nbits * XFS_BLI_CHUNK; 330 vecp->i_len = nbits * XFS_BLI_CHUNK;
331 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK);
329 nvecs++; 332 nvecs++;
330 vecp++; 333 vecp++;
331 first_bit = next_bit; 334 first_bit = next_bit;
@@ -337,6 +340,7 @@ xfs_buf_item_format(
337 buffer_offset = first_bit * XFS_BLI_CHUNK; 340 buffer_offset = first_bit * XFS_BLI_CHUNK;
338 vecp->i_addr = xfs_buf_offset(bp, buffer_offset); 341 vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
339 vecp->i_len = nbits * XFS_BLI_CHUNK; 342 vecp->i_len = nbits * XFS_BLI_CHUNK;
343 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK);
340/* You would think we need to bump the nvecs here too, but we do not 344/* You would think we need to bump the nvecs here too, but we do not
341 * this number is used by recovery, and it gets confused by the boundary 345 * this number is used by recovery, and it gets confused by the boundary
342 * split here 346 * split here
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h
index 55c17adaaa37..19e872856f6b 100644
--- a/fs/xfs/xfs_dmapi.h
+++ b/fs/xfs/xfs_dmapi.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify it 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as 5 * under the terms of version 2 of the GNU General Public License as
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index db7cbd1bc857..cc7d1494a45d 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -107,6 +107,7 @@ xfs_efi_item_format(xfs_efi_log_item_t *efip,
107 107
108 log_vector->i_addr = (xfs_caddr_t)&(efip->efi_format); 108 log_vector->i_addr = (xfs_caddr_t)&(efip->efi_format);
109 log_vector->i_len = size; 109 log_vector->i_len = size;
110 XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_EFI_FORMAT);
110 ASSERT(size >= sizeof(xfs_efi_log_format_t)); 111 ASSERT(size >= sizeof(xfs_efi_log_format_t));
111} 112}
112 113
@@ -426,6 +427,7 @@ xfs_efd_item_format(xfs_efd_log_item_t *efdp,
426 427
427 log_vector->i_addr = (xfs_caddr_t)&(efdp->efd_format); 428 log_vector->i_addr = (xfs_caddr_t)&(efdp->efd_format);
428 log_vector->i_len = size; 429 log_vector->i_len = size;
430 XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_EFD_FORMAT);
429 ASSERT(size >= sizeof(xfs_efd_log_format_t)); 431 ASSERT(size >= sizeof(xfs_efd_log_format_t));
430} 432}
431 433
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index d3da00045f26..0d9ae8fb4138 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -30,6 +30,8 @@
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ 30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */ 31 */
32 32
33#include <linux/delay.h>
34
33#include "xfs.h" 35#include "xfs.h"
34 36
35#include "xfs_macros.h" 37#include "xfs_macros.h"
@@ -505,17 +507,15 @@ xfs_iget(
505 vnode_t *vp = NULL; 507 vnode_t *vp = NULL;
506 int error; 508 int error;
507 509
508retry:
509 XFS_STATS_INC(xs_ig_attempts); 510 XFS_STATS_INC(xs_ig_attempts);
510 511
512retry:
511 if ((inode = iget_locked(XFS_MTOVFS(mp)->vfs_super, ino))) { 513 if ((inode = iget_locked(XFS_MTOVFS(mp)->vfs_super, ino))) {
512 bhv_desc_t *bdp; 514 bhv_desc_t *bdp;
513 xfs_inode_t *ip; 515 xfs_inode_t *ip;
514 int newnode;
515 516
516 vp = LINVFS_GET_VP(inode); 517 vp = LINVFS_GET_VP(inode);
517 if (inode->i_state & I_NEW) { 518 if (inode->i_state & I_NEW) {
518inode_allocate:
519 vn_initialize(inode); 519 vn_initialize(inode);
520 error = xfs_iget_core(vp, mp, tp, ino, flags, 520 error = xfs_iget_core(vp, mp, tp, ino, flags,
521 lock_flags, ipp, bno); 521 lock_flags, ipp, bno);
@@ -526,32 +526,25 @@ inode_allocate:
526 iput(inode); 526 iput(inode);
527 } 527 }
528 } else { 528 } else {
529 /* These are true if the inode is in inactive or 529 /*
530 * reclaim. The linux inode is about to go away, 530 * If the inode is not fully constructed due to
531 * wait for that path to finish, and try again. 531 * filehandle mistmatches wait for the inode to go
532 * away and try again.
533 *
534 * iget_locked will call __wait_on_freeing_inode
535 * to wait for the inode to go away.
532 */ 536 */
533 if (vp->v_flag & (VINACT | VRECLM)) { 537 if (is_bad_inode(inode) ||
534 vn_wait(vp); 538 ((bdp = vn_bhv_lookup(VN_BHV_HEAD(vp),
539 &xfs_vnodeops)) == NULL)) {
535 iput(inode); 540 iput(inode);
541 delay(1);
536 goto retry; 542 goto retry;
537 } 543 }
538 544
539 if (is_bad_inode(inode)) {
540 iput(inode);
541 return EIO;
542 }
543
544 bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops);
545 if (bdp == NULL) {
546 XFS_STATS_INC(xs_ig_dup);
547 goto inode_allocate;
548 }
549 ip = XFS_BHVTOI(bdp); 545 ip = XFS_BHVTOI(bdp);
550 if (lock_flags != 0) 546 if (lock_flags != 0)
551 xfs_ilock(ip, lock_flags); 547 xfs_ilock(ip, lock_flags);
552 newnode = (ip->i_d.di_mode == 0);
553 if (newnode)
554 xfs_iocore_inode_reinit(ip);
555 XFS_STATS_INC(xs_ig_found); 548 XFS_STATS_INC(xs_ig_found);
556 *ipp = ip; 549 *ipp = ip;
557 error = 0; 550 error = 0;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 34bdf5909687..db43308aae93 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1128,7 +1128,6 @@ xfs_ialloc(
1128 ASSERT(ip != NULL); 1128 ASSERT(ip != NULL);
1129 1129
1130 vp = XFS_ITOV(ip); 1130 vp = XFS_ITOV(ip);
1131 vp->v_type = IFTOVT(mode);
1132 ip->i_d.di_mode = (__uint16_t)mode; 1131 ip->i_d.di_mode = (__uint16_t)mode;
1133 ip->i_d.di_onlink = 0; 1132 ip->i_d.di_onlink = 0;
1134 ip->i_d.di_nlink = nlink; 1133 ip->i_d.di_nlink = nlink;
@@ -1250,7 +1249,7 @@ xfs_ialloc(
1250 */ 1249 */
1251 xfs_trans_log_inode(tp, ip, flags); 1250 xfs_trans_log_inode(tp, ip, flags);
1252 1251
1253 /* now that we have a v_type we can set Linux inode ops (& unlock) */ 1252 /* now that we have an i_mode we can set Linux inode ops (& unlock) */
1254 VFS_INIT_VNODE(XFS_MTOVFS(tp->t_mountp), vp, XFS_ITOBHV(ip), 1); 1253 VFS_INIT_VNODE(XFS_MTOVFS(tp->t_mountp), vp, XFS_ITOBHV(ip), 1);
1255 1254
1256 *ipp = ip; 1255 *ipp = ip;
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 0eed30f5cb19..276ec70eb7f9 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -248,6 +248,7 @@ xfs_inode_item_format(
248 248
249 vecp->i_addr = (xfs_caddr_t)&iip->ili_format; 249 vecp->i_addr = (xfs_caddr_t)&iip->ili_format;
250 vecp->i_len = sizeof(xfs_inode_log_format_t); 250 vecp->i_len = sizeof(xfs_inode_log_format_t);
251 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IFORMAT);
251 vecp++; 252 vecp++;
252 nvecs = 1; 253 nvecs = 1;
253 254
@@ -292,6 +293,7 @@ xfs_inode_item_format(
292 293
293 vecp->i_addr = (xfs_caddr_t)&ip->i_d; 294 vecp->i_addr = (xfs_caddr_t)&ip->i_d;
294 vecp->i_len = sizeof(xfs_dinode_core_t); 295 vecp->i_len = sizeof(xfs_dinode_core_t);
296 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE);
295 vecp++; 297 vecp++;
296 nvecs++; 298 nvecs++;
297 iip->ili_format.ilf_fields |= XFS_ILOG_CORE; 299 iip->ili_format.ilf_fields |= XFS_ILOG_CORE;
@@ -349,6 +351,7 @@ xfs_inode_item_format(
349 vecp->i_addr = 351 vecp->i_addr =
350 (char *)(ip->i_df.if_u1.if_extents); 352 (char *)(ip->i_df.if_u1.if_extents);
351 vecp->i_len = ip->i_df.if_bytes; 353 vecp->i_len = ip->i_df.if_bytes;
354 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT);
352 } else 355 } else
353#endif 356#endif
354 { 357 {
@@ -367,6 +370,7 @@ xfs_inode_item_format(
367 vecp->i_addr = (xfs_caddr_t)ext_buffer; 370 vecp->i_addr = (xfs_caddr_t)ext_buffer;
368 vecp->i_len = xfs_iextents_copy(ip, ext_buffer, 371 vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
369 XFS_DATA_FORK); 372 XFS_DATA_FORK);
373 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT);
370 } 374 }
371 ASSERT(vecp->i_len <= ip->i_df.if_bytes); 375 ASSERT(vecp->i_len <= ip->i_df.if_bytes);
372 iip->ili_format.ilf_dsize = vecp->i_len; 376 iip->ili_format.ilf_dsize = vecp->i_len;
@@ -384,6 +388,7 @@ xfs_inode_item_format(
384 ASSERT(ip->i_df.if_broot != NULL); 388 ASSERT(ip->i_df.if_broot != NULL);
385 vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot; 389 vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot;
386 vecp->i_len = ip->i_df.if_broot_bytes; 390 vecp->i_len = ip->i_df.if_broot_bytes;
391 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IBROOT);
387 vecp++; 392 vecp++;
388 nvecs++; 393 nvecs++;
389 iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; 394 iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes;
@@ -409,6 +414,7 @@ xfs_inode_item_format(
409 ASSERT((ip->i_df.if_real_bytes == 0) || 414 ASSERT((ip->i_df.if_real_bytes == 0) ||
410 (ip->i_df.if_real_bytes == data_bytes)); 415 (ip->i_df.if_real_bytes == data_bytes));
411 vecp->i_len = (int)data_bytes; 416 vecp->i_len = (int)data_bytes;
417 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ILOCAL);
412 vecp++; 418 vecp++;
413 nvecs++; 419 nvecs++;
414 iip->ili_format.ilf_dsize = (unsigned)data_bytes; 420 iip->ili_format.ilf_dsize = (unsigned)data_bytes;
@@ -486,6 +492,7 @@ xfs_inode_item_format(
486 vecp->i_len = xfs_iextents_copy(ip, ext_buffer, 492 vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
487 XFS_ATTR_FORK); 493 XFS_ATTR_FORK);
488#endif 494#endif
495 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_EXT);
489 iip->ili_format.ilf_asize = vecp->i_len; 496 iip->ili_format.ilf_asize = vecp->i_len;
490 vecp++; 497 vecp++;
491 nvecs++; 498 nvecs++;
@@ -500,6 +507,7 @@ xfs_inode_item_format(
500 ASSERT(ip->i_afp->if_broot != NULL); 507 ASSERT(ip->i_afp->if_broot != NULL);
501 vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot; 508 vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot;
502 vecp->i_len = ip->i_afp->if_broot_bytes; 509 vecp->i_len = ip->i_afp->if_broot_bytes;
510 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_BROOT);
503 vecp++; 511 vecp++;
504 nvecs++; 512 nvecs++;
505 iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; 513 iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes;
@@ -523,6 +531,7 @@ xfs_inode_item_format(
523 ASSERT((ip->i_afp->if_real_bytes == 0) || 531 ASSERT((ip->i_afp->if_real_bytes == 0) ||
524 (ip->i_afp->if_real_bytes == data_bytes)); 532 (ip->i_afp->if_real_bytes == data_bytes));
525 vecp->i_len = (int)data_bytes; 533 vecp->i_len = (int)data_bytes;
534 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_LOCAL);
526 vecp++; 535 vecp++;
527 nvecs++; 536 nvecs++;
528 iip->ili_format.ilf_asize = (unsigned)data_bytes; 537 iip->ili_format.ilf_asize = (unsigned)data_bytes;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 2edd6769e5d3..d0f5be63cddb 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -226,13 +226,12 @@ xfs_iomap(
226 xfs_iomap_enter_trace(XFS_IOMAP_READ_ENTER, io, offset, count); 226 xfs_iomap_enter_trace(XFS_IOMAP_READ_ENTER, io, offset, count);
227 lockmode = XFS_LCK_MAP_SHARED(mp, io); 227 lockmode = XFS_LCK_MAP_SHARED(mp, io);
228 bmapi_flags = XFS_BMAPI_ENTIRE; 228 bmapi_flags = XFS_BMAPI_ENTIRE;
229 if (flags & BMAPI_IGNSTATE)
230 bmapi_flags |= XFS_BMAPI_IGSTATE;
231 break; 229 break;
232 case BMAPI_WRITE: 230 case BMAPI_WRITE:
233 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, io, offset, count); 231 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, io, offset, count);
234 lockmode = XFS_ILOCK_EXCL|XFS_EXTSIZE_WR; 232 lockmode = XFS_ILOCK_EXCL|XFS_EXTSIZE_WR;
235 bmapi_flags = 0; 233 if (flags & BMAPI_IGNSTATE)
234 bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE;
236 XFS_ILOCK(mp, io, lockmode); 235 XFS_ILOCK(mp, io, lockmode);
237 break; 236 break;
238 case BMAPI_ALLOCATE: 237 case BMAPI_ALLOCATE:
@@ -391,9 +390,9 @@ xfs_iomap_write_direct(
391 xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS], *imapp; 390 xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS], *imapp;
392 xfs_bmap_free_t free_list; 391 xfs_bmap_free_t free_list;
393 int aeof; 392 int aeof;
394 xfs_filblks_t datablocks, qblocks, resblks; 393 xfs_filblks_t qblocks, resblks;
395 int committed; 394 int committed;
396 int numrtextents; 395 int resrtextents;
397 396
398 /* 397 /*
399 * Make sure that the dquots are there. This doesn't hold 398 * Make sure that the dquots are there. This doesn't hold
@@ -434,14 +433,14 @@ xfs_iomap_write_direct(
434 433
435 if (!(extsz = ip->i_d.di_extsize)) 434 if (!(extsz = ip->i_d.di_extsize))
436 extsz = mp->m_sb.sb_rextsize; 435 extsz = mp->m_sb.sb_rextsize;
437 numrtextents = qblocks = (count_fsb + extsz - 1); 436 resrtextents = qblocks = (count_fsb + extsz - 1);
438 do_div(numrtextents, mp->m_sb.sb_rextsize); 437 do_div(resrtextents, mp->m_sb.sb_rextsize);
438 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
439 quota_flag = XFS_QMOPT_RES_RTBLKS; 439 quota_flag = XFS_QMOPT_RES_RTBLKS;
440 datablocks = 0;
441 } else { 440 } else {
442 datablocks = qblocks = count_fsb; 441 resrtextents = 0;
442 resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, count_fsb);
443 quota_flag = XFS_QMOPT_RES_REGBLKS; 443 quota_flag = XFS_QMOPT_RES_REGBLKS;
444 numrtextents = 0;
445 } 444 }
446 445
447 /* 446 /*
@@ -449,9 +448,8 @@ xfs_iomap_write_direct(
449 */ 448 */
450 xfs_iunlock(ip, XFS_ILOCK_EXCL); 449 xfs_iunlock(ip, XFS_ILOCK_EXCL);
451 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 450 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
452 resblks = XFS_DIOSTRAT_SPACE_RES(mp, datablocks);
453 error = xfs_trans_reserve(tp, resblks, 451 error = xfs_trans_reserve(tp, resblks,
454 XFS_WRITE_LOG_RES(mp), numrtextents, 452 XFS_WRITE_LOG_RES(mp), resrtextents,
455 XFS_TRANS_PERM_LOG_RES, 453 XFS_TRANS_PERM_LOG_RES,
456 XFS_WRITE_LOG_COUNT); 454 XFS_WRITE_LOG_COUNT);
457 455
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 1cd2ac163877..54a6f1142403 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -159,11 +159,15 @@ xfs_buftarg_t *xlog_target;
159void 159void
160xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string) 160xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string)
161{ 161{
162 if (! log->l_grant_trace) { 162 unsigned long cnts;
163 log->l_grant_trace = ktrace_alloc(1024, KM_NOSLEEP); 163
164 if (! log->l_grant_trace) 164 if (!log->l_grant_trace) {
165 log->l_grant_trace = ktrace_alloc(2048, KM_NOSLEEP);
166 if (!log->l_grant_trace)
165 return; 167 return;
166 } 168 }
169 /* ticket counts are 1 byte each */
170 cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8;
167 171
168 ktrace_enter(log->l_grant_trace, 172 ktrace_enter(log->l_grant_trace,
169 (void *)tic, 173 (void *)tic,
@@ -178,10 +182,10 @@ xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string)
178 (void *)((unsigned long)CYCLE_LSN(log->l_tail_lsn)), 182 (void *)((unsigned long)CYCLE_LSN(log->l_tail_lsn)),
179 (void *)((unsigned long)BLOCK_LSN(log->l_tail_lsn)), 183 (void *)((unsigned long)BLOCK_LSN(log->l_tail_lsn)),
180 (void *)string, 184 (void *)string,
181 (void *)((unsigned long)13), 185 (void *)((unsigned long)tic->t_trans_type),
182 (void *)((unsigned long)14), 186 (void *)cnts,
183 (void *)((unsigned long)15), 187 (void *)((unsigned long)tic->t_curr_res),
184 (void *)((unsigned long)16)); 188 (void *)((unsigned long)tic->t_unit_res));
185} 189}
186 190
187void 191void
@@ -274,9 +278,11 @@ xfs_log_done(xfs_mount_t *mp,
274 * Release ticket if not permanent reservation or a specifc 278 * Release ticket if not permanent reservation or a specifc
275 * request has been made to release a permanent reservation. 279 * request has been made to release a permanent reservation.
276 */ 280 */
281 xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)");
277 xlog_ungrant_log_space(log, ticket); 282 xlog_ungrant_log_space(log, ticket);
278 xlog_state_put_ticket(log, ticket); 283 xlog_state_put_ticket(log, ticket);
279 } else { 284 } else {
285 xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)");
280 xlog_regrant_reserve_log_space(log, ticket); 286 xlog_regrant_reserve_log_space(log, ticket);
281 } 287 }
282 288
@@ -399,7 +405,8 @@ xfs_log_reserve(xfs_mount_t *mp,
399 int cnt, 405 int cnt,
400 xfs_log_ticket_t *ticket, 406 xfs_log_ticket_t *ticket,
401 __uint8_t client, 407 __uint8_t client,
402 uint flags) 408 uint flags,
409 uint t_type)
403{ 410{
404 xlog_t *log = mp->m_log; 411 xlog_t *log = mp->m_log;
405 xlog_ticket_t *internal_ticket; 412 xlog_ticket_t *internal_ticket;
@@ -421,13 +428,19 @@ xfs_log_reserve(xfs_mount_t *mp,
421 if (*ticket != NULL) { 428 if (*ticket != NULL) {
422 ASSERT(flags & XFS_LOG_PERM_RESERV); 429 ASSERT(flags & XFS_LOG_PERM_RESERV);
423 internal_ticket = (xlog_ticket_t *)*ticket; 430 internal_ticket = (xlog_ticket_t *)*ticket;
431 xlog_trace_loggrant(log, internal_ticket, "xfs_log_reserve: existing ticket (permanent trans)");
424 xlog_grant_push_ail(mp, internal_ticket->t_unit_res); 432 xlog_grant_push_ail(mp, internal_ticket->t_unit_res);
425 retval = xlog_regrant_write_log_space(log, internal_ticket); 433 retval = xlog_regrant_write_log_space(log, internal_ticket);
426 } else { 434 } else {
427 /* may sleep if need to allocate more tickets */ 435 /* may sleep if need to allocate more tickets */
428 internal_ticket = xlog_ticket_get(log, unit_bytes, cnt, 436 internal_ticket = xlog_ticket_get(log, unit_bytes, cnt,
429 client, flags); 437 client, flags);
438 internal_ticket->t_trans_type = t_type;
430 *ticket = internal_ticket; 439 *ticket = internal_ticket;
440 xlog_trace_loggrant(log, internal_ticket,
441 (internal_ticket->t_flags & XLOG_TIC_PERM_RESERV) ?
442 "xfs_log_reserve: create new ticket (permanent trans)" :
443 "xfs_log_reserve: create new ticket");
431 xlog_grant_push_ail(mp, 444 xlog_grant_push_ail(mp,
432 (internal_ticket->t_unit_res * 445 (internal_ticket->t_unit_res *
433 internal_ticket->t_cnt)); 446 internal_ticket->t_cnt));
@@ -601,8 +614,9 @@ xfs_log_unmount_write(xfs_mount_t *mp)
601 if (! (XLOG_FORCED_SHUTDOWN(log))) { 614 if (! (XLOG_FORCED_SHUTDOWN(log))) {
602 reg[0].i_addr = (void*)&magic; 615 reg[0].i_addr = (void*)&magic;
603 reg[0].i_len = sizeof(magic); 616 reg[0].i_len = sizeof(magic);
617 XLOG_VEC_SET_TYPE(&reg[0], XLOG_REG_TYPE_UNMOUNT);
604 618
605 error = xfs_log_reserve(mp, 600, 1, &tic, XFS_LOG, 0); 619 error = xfs_log_reserve(mp, 600, 1, &tic, XFS_LOG, 0, 0);
606 if (!error) { 620 if (!error) {
607 /* remove inited flag */ 621 /* remove inited flag */
608 ((xlog_ticket_t *)tic)->t_flags = 0; 622 ((xlog_ticket_t *)tic)->t_flags = 0;
@@ -1272,6 +1286,7 @@ xlog_commit_record(xfs_mount_t *mp,
1272 1286
1273 reg[0].i_addr = NULL; 1287 reg[0].i_addr = NULL;
1274 reg[0].i_len = 0; 1288 reg[0].i_len = 0;
1289 XLOG_VEC_SET_TYPE(&reg[0], XLOG_REG_TYPE_COMMIT);
1275 1290
1276 ASSERT_ALWAYS(iclog); 1291 ASSERT_ALWAYS(iclog);
1277 if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp, 1292 if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp,
@@ -1605,6 +1620,117 @@ xlog_state_finish_copy(xlog_t *log,
1605 1620
1606 1621
1607/* 1622/*
1623 * print out info relating to regions written which consume
1624 * the reservation
1625 */
1626#if defined(XFS_LOG_RES_DEBUG)
1627STATIC void
1628xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
1629{
1630 uint i;
1631 uint ophdr_spc = ticket->t_res_num_ophdrs * (uint)sizeof(xlog_op_header_t);
1632
1633 /* match with XLOG_REG_TYPE_* in xfs_log.h */
1634 static char *res_type_str[XLOG_REG_TYPE_MAX] = {
1635 "bformat",
1636 "bchunk",
1637 "efi_format",
1638 "efd_format",
1639 "iformat",
1640 "icore",
1641 "iext",
1642 "ibroot",
1643 "ilocal",
1644 "iattr_ext",
1645 "iattr_broot",
1646 "iattr_local",
1647 "qformat",
1648 "dquot",
1649 "quotaoff",
1650 "LR header",
1651 "unmount",
1652 "commit",
1653 "trans header"
1654 };
1655 static char *trans_type_str[XFS_TRANS_TYPE_MAX] = {
1656 "SETATTR_NOT_SIZE",
1657 "SETATTR_SIZE",
1658 "INACTIVE",
1659 "CREATE",
1660 "CREATE_TRUNC",
1661 "TRUNCATE_FILE",
1662 "REMOVE",
1663 "LINK",
1664 "RENAME",
1665 "MKDIR",
1666 "RMDIR",
1667 "SYMLINK",
1668 "SET_DMATTRS",
1669 "GROWFS",
1670 "STRAT_WRITE",
1671 "DIOSTRAT",
1672 "WRITE_SYNC",
1673 "WRITEID",
1674 "ADDAFORK",
1675 "ATTRINVAL",
1676 "ATRUNCATE",
1677 "ATTR_SET",
1678 "ATTR_RM",
1679 "ATTR_FLAG",
1680 "CLEAR_AGI_BUCKET",
1681 "QM_SBCHANGE",
1682 "DUMMY1",
1683 "DUMMY2",
1684 "QM_QUOTAOFF",
1685 "QM_DQALLOC",
1686 "QM_SETQLIM",
1687 "QM_DQCLUSTER",
1688 "QM_QINOCREATE",
1689 "QM_QUOTAOFF_END",
1690 "SB_UNIT",
1691 "FSYNC_TS",
1692 "GROWFSRT_ALLOC",
1693 "GROWFSRT_ZERO",
1694 "GROWFSRT_FREE",
1695 "SWAPEXT"
1696 };
1697
1698 xfs_fs_cmn_err(CE_WARN, mp,
1699 "xfs_log_write: reservation summary:\n"
1700 " trans type = %s (%u)\n"
1701 " unit res = %d bytes\n"
1702 " current res = %d bytes\n"
1703 " total reg = %u bytes (o/flow = %u bytes)\n"
1704 " ophdrs = %u (ophdr space = %u bytes)\n"
1705 " ophdr + reg = %u bytes\n"
1706 " num regions = %u\n",
1707 ((ticket->t_trans_type <= 0 ||
1708 ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ?
1709 "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]),
1710 ticket->t_trans_type,
1711 ticket->t_unit_res,
1712 ticket->t_curr_res,
1713 ticket->t_res_arr_sum, ticket->t_res_o_flow,
1714 ticket->t_res_num_ophdrs, ophdr_spc,
1715 ticket->t_res_arr_sum +
1716 ticket->t_res_o_flow + ophdr_spc,
1717 ticket->t_res_num);
1718
1719 for (i = 0; i < ticket->t_res_num; i++) {
1720 uint r_type = ticket->t_res_arr[i].r_type;
1721 cmn_err(CE_WARN,
1722 "region[%u]: %s - %u bytes\n",
1723 i,
1724 ((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ?
1725 "bad-rtype" : res_type_str[r_type-1]),
1726 ticket->t_res_arr[i].r_len);
1727 }
1728}
1729#else
1730#define xlog_print_tic_res(mp, ticket)
1731#endif
1732
1733/*
1608 * Write some region out to in-core log 1734 * Write some region out to in-core log
1609 * 1735 *
1610 * This will be called when writing externally provided regions or when 1736 * This will be called when writing externally provided regions or when
@@ -1677,16 +1803,21 @@ xlog_write(xfs_mount_t * mp,
1677 * xlog_op_header_t and may need to be double word aligned. 1803 * xlog_op_header_t and may need to be double word aligned.
1678 */ 1804 */
1679 len = 0; 1805 len = 0;
1680 if (ticket->t_flags & XLOG_TIC_INITED) /* acct for start rec of xact */ 1806 if (ticket->t_flags & XLOG_TIC_INITED) { /* acct for start rec of xact */
1681 len += sizeof(xlog_op_header_t); 1807 len += sizeof(xlog_op_header_t);
1808 XLOG_TIC_ADD_OPHDR(ticket);
1809 }
1682 1810
1683 for (index = 0; index < nentries; index++) { 1811 for (index = 0; index < nentries; index++) {
1684 len += sizeof(xlog_op_header_t); /* each region gets >= 1 */ 1812 len += sizeof(xlog_op_header_t); /* each region gets >= 1 */
1813 XLOG_TIC_ADD_OPHDR(ticket);
1685 len += reg[index].i_len; 1814 len += reg[index].i_len;
1815 XLOG_TIC_ADD_REGION(ticket, reg[index].i_len, reg[index].i_type);
1686 } 1816 }
1687 contwr = *start_lsn = 0; 1817 contwr = *start_lsn = 0;
1688 1818
1689 if (ticket->t_curr_res < len) { 1819 if (ticket->t_curr_res < len) {
1820 xlog_print_tic_res(mp, ticket);
1690#ifdef DEBUG 1821#ifdef DEBUG
1691 xlog_panic( 1822 xlog_panic(
1692 "xfs_log_write: reservation ran out. Need to up reservation"); 1823 "xfs_log_write: reservation ran out. Need to up reservation");
@@ -1790,6 +1921,7 @@ xlog_write(xfs_mount_t * mp,
1790 len += sizeof(xlog_op_header_t); /* from splitting of region */ 1921 len += sizeof(xlog_op_header_t); /* from splitting of region */
1791 /* account for new log op header */ 1922 /* account for new log op header */
1792 ticket->t_curr_res -= sizeof(xlog_op_header_t); 1923 ticket->t_curr_res -= sizeof(xlog_op_header_t);
1924 XLOG_TIC_ADD_OPHDR(ticket);
1793 } 1925 }
1794 xlog_verify_dest_ptr(log, ptr); 1926 xlog_verify_dest_ptr(log, ptr);
1795 1927
@@ -2282,6 +2414,9 @@ restart:
2282 */ 2414 */
2283 if (log_offset == 0) { 2415 if (log_offset == 0) {
2284 ticket->t_curr_res -= log->l_iclog_hsize; 2416 ticket->t_curr_res -= log->l_iclog_hsize;
2417 XLOG_TIC_ADD_REGION(ticket,
2418 log->l_iclog_hsize,
2419 XLOG_REG_TYPE_LRHEADER);
2285 INT_SET(head->h_cycle, ARCH_CONVERT, log->l_curr_cycle); 2420 INT_SET(head->h_cycle, ARCH_CONVERT, log->l_curr_cycle);
2286 ASSIGN_LSN(head->h_lsn, log); 2421 ASSIGN_LSN(head->h_lsn, log);
2287 ASSERT(log->l_curr_block >= 0); 2422 ASSERT(log->l_curr_block >= 0);
@@ -2468,6 +2603,7 @@ xlog_regrant_write_log_space(xlog_t *log,
2468#endif 2603#endif
2469 2604
2470 tic->t_curr_res = tic->t_unit_res; 2605 tic->t_curr_res = tic->t_unit_res;
2606 XLOG_TIC_RESET_RES(tic);
2471 2607
2472 if (tic->t_cnt > 0) 2608 if (tic->t_cnt > 0)
2473 return (0); 2609 return (0);
@@ -2608,6 +2744,7 @@ xlog_regrant_reserve_log_space(xlog_t *log,
2608 XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'w'); 2744 XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'w');
2609 XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'r'); 2745 XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'r');
2610 ticket->t_curr_res = ticket->t_unit_res; 2746 ticket->t_curr_res = ticket->t_unit_res;
2747 XLOG_TIC_RESET_RES(ticket);
2611 xlog_trace_loggrant(log, ticket, 2748 xlog_trace_loggrant(log, ticket,
2612 "xlog_regrant_reserve_log_space: sub current res"); 2749 "xlog_regrant_reserve_log_space: sub current res");
2613 xlog_verify_grant_head(log, 1); 2750 xlog_verify_grant_head(log, 1);
@@ -2624,6 +2761,7 @@ xlog_regrant_reserve_log_space(xlog_t *log,
2624 xlog_verify_grant_head(log, 0); 2761 xlog_verify_grant_head(log, 0);
2625 GRANT_UNLOCK(log, s); 2762 GRANT_UNLOCK(log, s);
2626 ticket->t_curr_res = ticket->t_unit_res; 2763 ticket->t_curr_res = ticket->t_unit_res;
2764 XLOG_TIC_RESET_RES(ticket);
2627} /* xlog_regrant_reserve_log_space */ 2765} /* xlog_regrant_reserve_log_space */
2628 2766
2629 2767
@@ -3179,29 +3317,57 @@ xlog_ticket_get(xlog_t *log,
3179 * and their unit amount is the total amount of space required. 3317 * and their unit amount is the total amount of space required.
3180 * 3318 *
3181 * The following lines of code account for non-transaction data 3319 * The following lines of code account for non-transaction data
3182 * which occupy space in the on-disk log. 3320 * which occupy space in the on-disk log.
3321 *
3322 * Normal form of a transaction is:
3323 * <oph><trans-hdr><start-oph><reg1-oph><reg1><reg2-oph>...<commit-oph>
3324 * and then there are LR hdrs, split-recs and roundoff at end of syncs.
3325 *
3326 * We need to account for all the leadup data and trailer data
3327 * around the transaction data.
3328 * And then we need to account for the worst case in terms of using
3329 * more space.
3330 * The worst case will happen if:
3331 * - the placement of the transaction happens to be such that the
3332 * roundoff is at its maximum
3333 * - the transaction data is synced before the commit record is synced
3334 * i.e. <transaction-data><roundoff> | <commit-rec><roundoff>
3335 * Therefore the commit record is in its own Log Record.
3336 * This can happen as the commit record is called with its
3337 * own region to xlog_write().
3338 * This then means that in the worst case, roundoff can happen for
3339 * the commit-rec as well.
3340 * The commit-rec is smaller than padding in this scenario and so it is
3341 * not added separately.
3183 */ 3342 */
3184 3343
3344 /* for trans header */
3345 unit_bytes += sizeof(xlog_op_header_t);
3346 unit_bytes += sizeof(xfs_trans_header_t);
3347
3185 /* for start-rec */ 3348 /* for start-rec */
3186 unit_bytes += sizeof(xlog_op_header_t); 3349 unit_bytes += sizeof(xlog_op_header_t);
3350
3351 /* for LR headers */
3352 num_headers = ((unit_bytes + log->l_iclog_size-1) >> log->l_iclog_size_log);
3353 unit_bytes += log->l_iclog_hsize * num_headers;
3354
3355 /* for commit-rec LR header - note: padding will subsume the ophdr */
3356 unit_bytes += log->l_iclog_hsize;
3357
3358 /* for split-recs - ophdrs added when data split over LRs */
3359 unit_bytes += sizeof(xlog_op_header_t) * num_headers;
3187 3360
3188 /* for padding */ 3361 /* for roundoff padding for transaction data and one for commit record */
3189 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) && 3362 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) &&
3190 log->l_mp->m_sb.sb_logsunit > 1) { 3363 log->l_mp->m_sb.sb_logsunit > 1) {
3191 /* log su roundoff */ 3364 /* log su roundoff */
3192 unit_bytes += log->l_mp->m_sb.sb_logsunit; 3365 unit_bytes += 2*log->l_mp->m_sb.sb_logsunit;
3193 } else { 3366 } else {
3194 /* BB roundoff */ 3367 /* BB roundoff */
3195 unit_bytes += BBSIZE; 3368 unit_bytes += 2*BBSIZE;
3196 } 3369 }
3197 3370
3198 /* for commit-rec */
3199 unit_bytes += sizeof(xlog_op_header_t);
3200
3201 /* for LR headers */
3202 num_headers = ((unit_bytes + log->l_iclog_size-1) >> log->l_iclog_size_log);
3203 unit_bytes += log->l_iclog_hsize * num_headers;
3204
3205 tic->t_unit_res = unit_bytes; 3371 tic->t_unit_res = unit_bytes;
3206 tic->t_curr_res = unit_bytes; 3372 tic->t_curr_res = unit_bytes;
3207 tic->t_cnt = cnt; 3373 tic->t_cnt = cnt;
@@ -3209,10 +3375,13 @@ xlog_ticket_get(xlog_t *log,
3209 tic->t_tid = (xlog_tid_t)((__psint_t)tic & 0xffffffff); 3375 tic->t_tid = (xlog_tid_t)((__psint_t)tic & 0xffffffff);
3210 tic->t_clientid = client; 3376 tic->t_clientid = client;
3211 tic->t_flags = XLOG_TIC_INITED; 3377 tic->t_flags = XLOG_TIC_INITED;
3378 tic->t_trans_type = 0;
3212 if (xflags & XFS_LOG_PERM_RESERV) 3379 if (xflags & XFS_LOG_PERM_RESERV)
3213 tic->t_flags |= XLOG_TIC_PERM_RESERV; 3380 tic->t_flags |= XLOG_TIC_PERM_RESERV;
3214 sv_init(&(tic->t_sema), SV_DEFAULT, "logtick"); 3381 sv_init(&(tic->t_sema), SV_DEFAULT, "logtick");
3215 3382
3383 XLOG_TIC_RESET_RES(tic);
3384
3216 return tic; 3385 return tic;
3217} /* xlog_ticket_get */ 3386} /* xlog_ticket_get */
3218 3387
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 0db122ddda3f..18961119fc65 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -114,9 +114,44 @@ xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
114#define XFS_VOLUME 0x2 114#define XFS_VOLUME 0x2
115#define XFS_LOG 0xaa 115#define XFS_LOG 0xaa
116 116
117
118/* Region types for iovec's i_type */
119#if defined(XFS_LOG_RES_DEBUG)
120#define XLOG_REG_TYPE_BFORMAT 1
121#define XLOG_REG_TYPE_BCHUNK 2
122#define XLOG_REG_TYPE_EFI_FORMAT 3
123#define XLOG_REG_TYPE_EFD_FORMAT 4
124#define XLOG_REG_TYPE_IFORMAT 5
125#define XLOG_REG_TYPE_ICORE 6
126#define XLOG_REG_TYPE_IEXT 7
127#define XLOG_REG_TYPE_IBROOT 8
128#define XLOG_REG_TYPE_ILOCAL 9
129#define XLOG_REG_TYPE_IATTR_EXT 10
130#define XLOG_REG_TYPE_IATTR_BROOT 11
131#define XLOG_REG_TYPE_IATTR_LOCAL 12
132#define XLOG_REG_TYPE_QFORMAT 13
133#define XLOG_REG_TYPE_DQUOT 14
134#define XLOG_REG_TYPE_QUOTAOFF 15
135#define XLOG_REG_TYPE_LRHEADER 16
136#define XLOG_REG_TYPE_UNMOUNT 17
137#define XLOG_REG_TYPE_COMMIT 18
138#define XLOG_REG_TYPE_TRANSHDR 19
139#define XLOG_REG_TYPE_MAX 19
140#endif
141
142#if defined(XFS_LOG_RES_DEBUG)
143#define XLOG_VEC_SET_TYPE(vecp, t) ((vecp)->i_type = (t))
144#else
145#define XLOG_VEC_SET_TYPE(vecp, t)
146#endif
147
148
117typedef struct xfs_log_iovec { 149typedef struct xfs_log_iovec {
118 xfs_caddr_t i_addr; /* beginning address of region */ 150 xfs_caddr_t i_addr; /* beginning address of region */
119 int i_len; /* length in bytes of region */ 151 int i_len; /* length in bytes of region */
152#if defined(XFS_LOG_RES_DEBUG)
153 uint i_type; /* type of region */
154#endif
120} xfs_log_iovec_t; 155} xfs_log_iovec_t;
121 156
122typedef void* xfs_log_ticket_t; 157typedef void* xfs_log_ticket_t;
@@ -159,7 +194,8 @@ int xfs_log_reserve(struct xfs_mount *mp,
159 int count, 194 int count,
160 xfs_log_ticket_t *ticket, 195 xfs_log_ticket_t *ticket,
161 __uint8_t clientid, 196 __uint8_t clientid,
162 uint flags); 197 uint flags,
198 uint t_type);
163int xfs_log_write(struct xfs_mount *mp, 199int xfs_log_write(struct xfs_mount *mp,
164 xfs_log_iovec_t region[], 200 xfs_log_iovec_t region[],
165 int nentries, 201 int nentries,
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 1a1d452f15f9..eb7fdc6ebc32 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -335,18 +335,66 @@ typedef __uint32_t xlog_tid_t;
335 335
336#define XLOG_COVER_OPS 5 336#define XLOG_COVER_OPS 5
337 337
338
339/* Ticket reservation region accounting */
340#if defined(XFS_LOG_RES_DEBUG)
341#define XLOG_TIC_LEN_MAX 15
342#define XLOG_TIC_RESET_RES(t) ((t)->t_res_num = \
343 (t)->t_res_arr_sum = (t)->t_res_num_ophdrs = 0)
344#define XLOG_TIC_ADD_OPHDR(t) ((t)->t_res_num_ophdrs++)
345#define XLOG_TIC_ADD_REGION(t, len, type) \
346 do { \
347 if ((t)->t_res_num == XLOG_TIC_LEN_MAX) { \
348 /* add to overflow and start again */ \
349 (t)->t_res_o_flow += (t)->t_res_arr_sum; \
350 (t)->t_res_num = 0; \
351 (t)->t_res_arr_sum = 0; \
352 } \
353 (t)->t_res_arr[(t)->t_res_num].r_len = (len); \
354 (t)->t_res_arr[(t)->t_res_num].r_type = (type); \
355 (t)->t_res_arr_sum += (len); \
356 (t)->t_res_num++; \
357 } while (0)
358
359/*
360 * Reservation region
361 * As would be stored in xfs_log_iovec but without the i_addr which
362 * we don't care about.
363 */
364typedef struct xlog_res {
365 uint r_len;
366 uint r_type;
367} xlog_res_t;
368#else
369#define XLOG_TIC_RESET_RES(t)
370#define XLOG_TIC_ADD_OPHDR(t)
371#define XLOG_TIC_ADD_REGION(t, len, type)
372#endif
373
374
338typedef struct xlog_ticket { 375typedef struct xlog_ticket {
339 sv_t t_sema; /* sleep on this semaphore :20 */ 376 sv_t t_sema; /* sleep on this semaphore : 20 */
340 struct xlog_ticket *t_next; /* : 4 */ 377 struct xlog_ticket *t_next; /* :4|8 */
341 struct xlog_ticket *t_prev; /* : 4 */ 378 struct xlog_ticket *t_prev; /* :4|8 */
342 xlog_tid_t t_tid; /* transaction identifier : 4 */ 379 xlog_tid_t t_tid; /* transaction identifier : 4 */
343 int t_curr_res; /* current reservation in bytes : 4 */ 380 int t_curr_res; /* current reservation in bytes : 4 */
344 int t_unit_res; /* unit reservation in bytes : 4 */ 381 int t_unit_res; /* unit reservation in bytes : 4 */
345 __uint8_t t_ocnt; /* original count : 1 */ 382 char t_ocnt; /* original count : 1 */
346 __uint8_t t_cnt; /* current count : 1 */ 383 char t_cnt; /* current count : 1 */
347 __uint8_t t_clientid; /* who does this belong to; : 1 */ 384 char t_clientid; /* who does this belong to; : 1 */
348 __uint8_t t_flags; /* properties of reservation : 1 */ 385 char t_flags; /* properties of reservation : 1 */
386 uint t_trans_type; /* transaction type : 4 */
387
388#if defined (XFS_LOG_RES_DEBUG)
389 /* reservation array fields */
390 uint t_res_num; /* num in array : 4 */
391 xlog_res_t t_res_arr[XLOG_TIC_LEN_MAX]; /* array of res : X */
392 uint t_res_num_ophdrs; /* num op hdrs : 4 */
393 uint t_res_arr_sum; /* array sum : 4 */
394 uint t_res_o_flow; /* sum overflow : 4 */
395#endif
349} xlog_ticket_t; 396} xlog_ticket_t;
397
350#endif 398#endif
351 399
352 400
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 0aac28ddb81c..14faabaabf29 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1387,7 +1387,7 @@ xlog_recover_add_to_cont_trans(
1387 old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; 1387 old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
1388 old_len = item->ri_buf[item->ri_cnt-1].i_len; 1388 old_len = item->ri_buf[item->ri_cnt-1].i_len;
1389 1389
1390 ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0); 1390 ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0u);
1391 memcpy(&ptr[old_len], dp, len); /* d, s, l */ 1391 memcpy(&ptr[old_len], dp, len); /* d, s, l */
1392 item->ri_buf[item->ri_cnt-1].i_len += len; 1392 item->ri_buf[item->ri_cnt-1].i_len += len;
1393 item->ri_buf[item->ri_cnt-1].i_addr = ptr; 1393 item->ri_buf[item->ri_cnt-1].i_addr = ptr;
diff --git a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c
index 4f40c92863d5..a6cd6324e946 100644
--- a/fs/xfs/xfs_qmops.c
+++ b/fs/xfs/xfs_qmops.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify it 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as 5 * under the terms of version 2 of the GNU General Public License as
@@ -42,7 +42,8 @@
42#include "xfs_dir2.h" 42#include "xfs_dir2.h"
43#include "xfs_dmapi.h" 43#include "xfs_dmapi.h"
44#include "xfs_mount.h" 44#include "xfs_mount.h"
45 45#include "xfs_quota.h"
46#include "xfs_error.h"
46 47
47STATIC struct xfs_dquot * 48STATIC struct xfs_dquot *
48xfs_dqvopchown_default( 49xfs_dqvopchown_default(
@@ -54,8 +55,79 @@ xfs_dqvopchown_default(
54 return NULL; 55 return NULL;
55} 56}
56 57
58/*
59 * Clear the quotaflags in memory and in the superblock.
60 */
61int
62xfs_mount_reset_sbqflags(xfs_mount_t *mp)
63{
64 int error;
65 xfs_trans_t *tp;
66 unsigned long s;
67
68 mp->m_qflags = 0;
69 /*
70 * It is OK to look at sb_qflags here in mount path,
71 * without SB_LOCK.
72 */
73 if (mp->m_sb.sb_qflags == 0)
74 return 0;
75 s = XFS_SB_LOCK(mp);
76 mp->m_sb.sb_qflags = 0;
77 XFS_SB_UNLOCK(mp, s);
78
79 /*
80 * if the fs is readonly, let the incore superblock run
81 * with quotas off but don't flush the update out to disk
82 */
83 if (XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY)
84 return 0;
85#ifdef QUOTADEBUG
86 xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes");
87#endif
88 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
89 if ((error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
90 XFS_DEFAULT_LOG_COUNT))) {
91 xfs_trans_cancel(tp, 0);
92 xfs_fs_cmn_err(CE_ALERT, mp,
93 "xfs_mount_reset_sbqflags: Superblock update failed!");
94 return error;
95 }
96 xfs_mod_sb(tp, XFS_SB_QFLAGS);
97 error = xfs_trans_commit(tp, 0, NULL);
98 return error;
99}
100
101STATIC int
102xfs_noquota_init(
103 xfs_mount_t *mp,
104 uint *needquotamount,
105 uint *quotaflags)
106{
107 int error = 0;
108
109 *quotaflags = 0;
110 *needquotamount = B_FALSE;
111
112 ASSERT(!XFS_IS_QUOTA_ON(mp));
113
114 /*
115 * If a file system had quotas running earlier, but decided to
116 * mount without -o uquota/pquota/gquota options, revoke the
117 * quotachecked license.
118 */
119 if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) {
120 cmn_err(CE_NOTE,
121 "XFS resetting qflags for filesystem %s",
122 mp->m_fsname);
123
124 error = xfs_mount_reset_sbqflags(mp);
125 }
126 return error;
127}
128
57xfs_qmops_t xfs_qmcore_stub = { 129xfs_qmops_t xfs_qmcore_stub = {
58 .xfs_qminit = (xfs_qminit_t) fs_noerr, 130 .xfs_qminit = (xfs_qminit_t) xfs_noquota_init,
59 .xfs_qmdone = (xfs_qmdone_t) fs_noerr, 131 .xfs_qmdone = (xfs_qmdone_t) fs_noerr,
60 .xfs_qmmount = (xfs_qmmount_t) fs_noerr, 132 .xfs_qmmount = (xfs_qmmount_t) fs_noerr,
61 .xfs_qmunmount = (xfs_qmunmount_t) fs_noerr, 133 .xfs_qmunmount = (xfs_qmunmount_t) fs_noerr,
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 7134576ae7fa..32cb79752d5d 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify it 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as 5 * under the terms of version 2 of the GNU General Public License as
@@ -160,6 +160,20 @@ typedef struct xfs_qoff_logformat {
160#define XFS_GQUOTA_ACCT 0x0040 /* group quota accounting ON */ 160#define XFS_GQUOTA_ACCT 0x0040 /* group quota accounting ON */
161 161
162/* 162/*
163 * Quota Accounting/Enforcement flags
164 */
165#define XFS_ALL_QUOTA_ACCT \
166 (XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT)
167#define XFS_ALL_QUOTA_ENFD (XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD)
168#define XFS_ALL_QUOTA_CHKD (XFS_UQUOTA_CHKD | XFS_OQUOTA_CHKD)
169
170#define XFS_IS_QUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT)
171#define XFS_IS_QUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ENFD)
172#define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT)
173#define XFS_IS_PQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_PQUOTA_ACCT)
174#define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT)
175
176/*
163 * Incore only flags for quotaoff - these bits get cleared when quota(s) 177 * Incore only flags for quotaoff - these bits get cleared when quota(s)
164 * are in the process of getting turned off. These flags are in m_qflags but 178 * are in the process of getting turned off. These flags are in m_qflags but
165 * never in sb_qflags. 179 * never in sb_qflags.
@@ -362,6 +376,7 @@ typedef struct xfs_dqtrxops {
362 f | XFS_QMOPT_RES_REGBLKS) 376 f | XFS_QMOPT_RES_REGBLKS)
363 377
364extern int xfs_qm_dqcheck(xfs_disk_dquot_t *, xfs_dqid_t, uint, uint, char *); 378extern int xfs_qm_dqcheck(xfs_disk_dquot_t *, xfs_dqid_t, uint, uint, char *);
379extern int xfs_mount_reset_sbqflags(struct xfs_mount *);
365 380
366extern struct bhv_vfsops xfs_qmops; 381extern struct bhv_vfsops xfs_qmops;
367 382
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 06dfca531f79..92efe272b83d 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -276,7 +276,7 @@ xfs_trans_reserve(
276 276
277 error = xfs_log_reserve(tp->t_mountp, logspace, logcount, 277 error = xfs_log_reserve(tp->t_mountp, logspace, logcount,
278 &tp->t_ticket, 278 &tp->t_ticket,
279 XFS_TRANSACTION, log_flags); 279 XFS_TRANSACTION, log_flags, tp->t_type);
280 if (error) { 280 if (error) {
281 goto undo_blocks; 281 goto undo_blocks;
282 } 282 }
@@ -1032,6 +1032,7 @@ xfs_trans_fill_vecs(
1032 tp->t_header.th_num_items = nitems; 1032 tp->t_header.th_num_items = nitems;
1033 log_vector->i_addr = (xfs_caddr_t)&tp->t_header; 1033 log_vector->i_addr = (xfs_caddr_t)&tp->t_header;
1034 log_vector->i_len = sizeof(xfs_trans_header_t); 1034 log_vector->i_len = sizeof(xfs_trans_header_t);
1035 XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_TRANSHDR);
1035} 1036}
1036 1037
1037 1038
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index ec541d66fa2a..a263aec8b3a6 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -112,6 +112,7 @@ typedef struct xfs_trans_header {
112#define XFS_TRANS_GROWFSRT_ZERO 38 112#define XFS_TRANS_GROWFSRT_ZERO 38
113#define XFS_TRANS_GROWFSRT_FREE 39 113#define XFS_TRANS_GROWFSRT_FREE 39
114#define XFS_TRANS_SWAPEXT 40 114#define XFS_TRANS_SWAPEXT 40
115#define XFS_TRANS_TYPE_MAX 40
115/* new transaction types need to be reflected in xfs_logprint(8) */ 116/* new transaction types need to be reflected in xfs_logprint(8) */
116 117
117 118
@@ -998,6 +999,7 @@ struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *, int);
998void xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *); 999void xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *);
999void xfs_trans_bjoin(xfs_trans_t *, struct xfs_buf *); 1000void xfs_trans_bjoin(xfs_trans_t *, struct xfs_buf *);
1000void xfs_trans_bhold(xfs_trans_t *, struct xfs_buf *); 1001void xfs_trans_bhold(xfs_trans_t *, struct xfs_buf *);
1002void xfs_trans_bhold_release(xfs_trans_t *, struct xfs_buf *);
1001void xfs_trans_binval(xfs_trans_t *, struct xfs_buf *); 1003void xfs_trans_binval(xfs_trans_t *, struct xfs_buf *);
1002void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *); 1004void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
1003void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *); 1005void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 7bc5eab4c2c1..2a71b4f91bfa 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -379,8 +379,8 @@ xfs_trans_delete_ail(
379 else { 379 else {
380 xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, 380 xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp,
381 "xfs_trans_delete_ail: attempting to delete a log item that is not in the AIL"); 381 "xfs_trans_delete_ail: attempting to delete a log item that is not in the AIL");
382 xfs_force_shutdown(mp, XFS_CORRUPT_INCORE);
383 AIL_UNLOCK(mp, s); 382 AIL_UNLOCK(mp, s);
383 xfs_force_shutdown(mp, XFS_CORRUPT_INCORE);
384 } 384 }
385 } 385 }
386} 386}
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 144da7a85466..e733293dd7f4 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -714,6 +714,29 @@ xfs_trans_bhold(xfs_trans_t *tp,
714} 714}
715 715
716/* 716/*
717 * Cancel the previous buffer hold request made on this buffer
718 * for this transaction.
719 */
720void
721xfs_trans_bhold_release(xfs_trans_t *tp,
722 xfs_buf_t *bp)
723{
724 xfs_buf_log_item_t *bip;
725
726 ASSERT(XFS_BUF_ISBUSY(bp));
727 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
728 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
729
730 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
731 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
732 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
733 ASSERT(atomic_read(&bip->bli_refcount) > 0);
734 ASSERT(bip->bli_flags & XFS_BLI_HOLD);
735 bip->bli_flags &= ~XFS_BLI_HOLD;
736 xfs_buf_item_trace("BHOLD RELEASE", bip);
737}
738
739/*
717 * This is called to mark bytes first through last inclusive of the given 740 * This is called to mark bytes first through last inclusive of the given
718 * buffer as needing to be logged when the transaction is committed. 741 * buffer as needing to be logged when the transaction is committed.
719 * The buffer must already be associated with the given transaction. 742 * The buffer must already be associated with the given transaction.
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 42bcc0215203..f1a904e23ade 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -795,7 +795,6 @@ xfs_statvfs(
795 xfs_mount_t *mp; 795 xfs_mount_t *mp;
796 xfs_sb_t *sbp; 796 xfs_sb_t *sbp;
797 unsigned long s; 797 unsigned long s;
798 u64 id;
799 798
800 mp = XFS_BHVTOM(bdp); 799 mp = XFS_BHVTOM(bdp);
801 sbp = &(mp->m_sb); 800 sbp = &(mp->m_sb);
@@ -823,9 +822,7 @@ xfs_statvfs(
823 statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); 822 statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
824 XFS_SB_UNLOCK(mp, s); 823 XFS_SB_UNLOCK(mp, s);
825 824
826 id = huge_encode_dev(mp->m_dev); 825 xfs_statvfs_fsid(statp, mp);
827 statp->f_fsid.val[0] = (u32)id;
828 statp->f_fsid.val[1] = (u32)(id >> 32);
829 statp->f_namelen = MAXNAMELEN - 1; 826 statp->f_namelen = MAXNAMELEN - 1;
830 827
831 return 0; 828 return 0;
@@ -906,7 +903,6 @@ xfs_sync_inodes(
906 xfs_inode_t *ip_next; 903 xfs_inode_t *ip_next;
907 xfs_buf_t *bp; 904 xfs_buf_t *bp;
908 vnode_t *vp = NULL; 905 vnode_t *vp = NULL;
909 vmap_t vmap;
910 int error; 906 int error;
911 int last_error; 907 int last_error;
912 uint64_t fflag; 908 uint64_t fflag;
@@ -1101,48 +1097,21 @@ xfs_sync_inodes(
1101 * lock in xfs_ireclaim() after the inode is pulled from 1097 * lock in xfs_ireclaim() after the inode is pulled from
1102 * the mount list will sleep until we release it here. 1098 * the mount list will sleep until we release it here.
1103 * This keeps the vnode from being freed while we reference 1099 * This keeps the vnode from being freed while we reference
1104 * it. It is also cheaper and simpler than actually doing 1100 * it.
1105 * a vn_get() for every inode we touch here.
1106 */ 1101 */
1107 if (xfs_ilock_nowait(ip, lock_flags) == 0) { 1102 if (xfs_ilock_nowait(ip, lock_flags) == 0) {
1108
1109 if ((flags & SYNC_BDFLUSH) || (vp == NULL)) { 1103 if ((flags & SYNC_BDFLUSH) || (vp == NULL)) {
1110 ip = ip->i_mnext; 1104 ip = ip->i_mnext;
1111 continue; 1105 continue;
1112 } 1106 }
1113 1107
1114 /* 1108 vp = vn_grab(vp);
1115 * We need to unlock the inode list lock in order
1116 * to lock the inode. Insert a marker record into
1117 * the inode list to remember our position, dropping
1118 * the lock is now done inside the IPOINTER_INSERT
1119 * macro.
1120 *
1121 * We also use the inode list lock to protect us
1122 * in taking a snapshot of the vnode version number
1123 * for use in calling vn_get().
1124 */
1125 VMAP(vp, vmap);
1126 IPOINTER_INSERT(ip, mp);
1127
1128 vp = vn_get(vp, &vmap);
1129 if (vp == NULL) { 1109 if (vp == NULL) {
1130 /* 1110 ip = ip->i_mnext;
1131 * The vnode was reclaimed once we let go
1132 * of the inode list lock. Skip to the
1133 * next list entry. Remove the marker.
1134 */
1135
1136 XFS_MOUNT_ILOCK(mp);
1137
1138 mount_locked = B_TRUE;
1139 vnode_refed = B_FALSE;
1140
1141 IPOINTER_REMOVE(ip, mp);
1142
1143 continue; 1111 continue;
1144 } 1112 }
1145 1113
1114 IPOINTER_INSERT(ip, mp);
1146 xfs_ilock(ip, lock_flags); 1115 xfs_ilock(ip, lock_flags);
1147 1116
1148 ASSERT(vp == XFS_ITOV(ip)); 1117 ASSERT(vp == XFS_ITOV(ip));
@@ -1533,7 +1502,10 @@ xfs_syncsub(
1533 * eventually kicked out of the cache. 1502 * eventually kicked out of the cache.
1534 */ 1503 */
1535 if (flags & SYNC_REFCACHE) { 1504 if (flags & SYNC_REFCACHE) {
1536 xfs_refcache_purge_some(mp); 1505 if (flags & SYNC_WAIT)
1506 xfs_refcache_purge_mp(mp);
1507 else
1508 xfs_refcache_purge_some(mp);
1537 } 1509 }
1538 1510
1539 /* 1511 /*
@@ -1649,6 +1621,10 @@ xfs_vget(
1649#define MNTOPT_SWIDTH "swidth" /* data volume stripe width */ 1621#define MNTOPT_SWIDTH "swidth" /* data volume stripe width */
1650#define MNTOPT_NOUUID "nouuid" /* ignore filesystem UUID */ 1622#define MNTOPT_NOUUID "nouuid" /* ignore filesystem UUID */
1651#define MNTOPT_MTPT "mtpt" /* filesystem mount point */ 1623#define MNTOPT_MTPT "mtpt" /* filesystem mount point */
1624#define MNTOPT_GRPID "grpid" /* group-ID from parent directory */
1625#define MNTOPT_NOGRPID "nogrpid" /* group-ID from current process */
1626#define MNTOPT_BSDGROUPS "bsdgroups" /* group-ID from parent directory */
1627#define MNTOPT_SYSVGROUPS "sysvgroups" /* group-ID from current process */
1652#define MNTOPT_ALLOCSIZE "allocsize" /* preferred allocation size */ 1628#define MNTOPT_ALLOCSIZE "allocsize" /* preferred allocation size */
1653#define MNTOPT_IHASHSIZE "ihashsize" /* size of inode hash table */ 1629#define MNTOPT_IHASHSIZE "ihashsize" /* size of inode hash table */
1654#define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */ 1630#define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */
@@ -1769,6 +1745,12 @@ xfs_parseargs(
1769 } 1745 }
1770 args->flags |= XFSMNT_IHASHSIZE; 1746 args->flags |= XFSMNT_IHASHSIZE;
1771 args->ihashsize = simple_strtoul(value, &eov, 10); 1747 args->ihashsize = simple_strtoul(value, &eov, 10);
1748 } else if (!strcmp(this_char, MNTOPT_GRPID) ||
1749 !strcmp(this_char, MNTOPT_BSDGROUPS)) {
1750 vfsp->vfs_flag |= VFS_GRPID;
1751 } else if (!strcmp(this_char, MNTOPT_NOGRPID) ||
1752 !strcmp(this_char, MNTOPT_SYSVGROUPS)) {
1753 vfsp->vfs_flag &= ~VFS_GRPID;
1772 } else if (!strcmp(this_char, MNTOPT_WSYNC)) { 1754 } else if (!strcmp(this_char, MNTOPT_WSYNC)) {
1773 args->flags |= XFSMNT_WSYNC; 1755 args->flags |= XFSMNT_WSYNC;
1774 } else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) { 1756 } else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) {
@@ -1890,6 +1872,7 @@ xfs_showargs(
1890 }; 1872 };
1891 struct proc_xfs_info *xfs_infop; 1873 struct proc_xfs_info *xfs_infop;
1892 struct xfs_mount *mp = XFS_BHVTOM(bhv); 1874 struct xfs_mount *mp = XFS_BHVTOM(bhv);
1875 struct vfs *vfsp = XFS_MTOVFS(mp);
1893 1876
1894 for (xfs_infop = xfs_info; xfs_infop->flag; xfs_infop++) { 1877 for (xfs_infop = xfs_info; xfs_infop->flag; xfs_infop++) {
1895 if (mp->m_flags & xfs_infop->flag) 1878 if (mp->m_flags & xfs_infop->flag)
@@ -1926,7 +1909,10 @@ xfs_showargs(
1926 1909
1927 if (!(mp->m_flags & XFS_MOUNT_32BITINOOPT)) 1910 if (!(mp->m_flags & XFS_MOUNT_32BITINOOPT))
1928 seq_printf(m, "," MNTOPT_64BITINODE); 1911 seq_printf(m, "," MNTOPT_64BITINODE);
1929 1912
1913 if (vfsp->vfs_flag & VFS_GRPID)
1914 seq_printf(m, "," MNTOPT_GRPID);
1915
1930 return 0; 1916 return 0;
1931} 1917}
1932 1918
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 1377c868f3f4..58bfe629b933 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -104,7 +104,7 @@ xfs_open(
104 * If it's a directory with any blocks, read-ahead block 0 104 * If it's a directory with any blocks, read-ahead block 0
105 * as we're almost certain to have the next operation be a read there. 105 * as we're almost certain to have the next operation be a read there.
106 */ 106 */
107 if (vp->v_type == VDIR && ip->i_d.di_nextents > 0) { 107 if (VN_ISDIR(vp) && ip->i_d.di_nextents > 0) {
108 mode = xfs_ilock_map_shared(ip); 108 mode = xfs_ilock_map_shared(ip);
109 if (ip->i_d.di_nextents > 0) 109 if (ip->i_d.di_nextents > 0)
110 (void)xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK); 110 (void)xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK);
@@ -163,18 +163,21 @@ xfs_getattr(
163 /* 163 /*
164 * Copy from in-core inode. 164 * Copy from in-core inode.
165 */ 165 */
166 vap->va_type = vp->v_type; 166 vap->va_mode = ip->i_d.di_mode;
167 vap->va_mode = ip->i_d.di_mode & MODEMASK;
168 vap->va_uid = ip->i_d.di_uid; 167 vap->va_uid = ip->i_d.di_uid;
169 vap->va_gid = ip->i_d.di_gid; 168 vap->va_gid = ip->i_d.di_gid;
170 vap->va_projid = ip->i_d.di_projid; 169 vap->va_projid = ip->i_d.di_projid;
171 170
172 /* 171 /*
173 * Check vnode type block/char vs. everything else. 172 * Check vnode type block/char vs. everything else.
174 * Do it with bitmask because that's faster than looking
175 * for multiple values individually.
176 */ 173 */
177 if (((1 << vp->v_type) & ((1<<VBLK) | (1<<VCHR))) == 0) { 174 switch (ip->i_d.di_mode & S_IFMT) {
175 case S_IFBLK:
176 case S_IFCHR:
177 vap->va_rdev = ip->i_df.if_u2.if_rdev;
178 vap->va_blocksize = BLKDEV_IOSIZE;
179 break;
180 default:
178 vap->va_rdev = 0; 181 vap->va_rdev = 0;
179 182
180 if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) { 183 if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) {
@@ -224,9 +227,7 @@ xfs_getattr(
224 (ip->i_d.di_extsize << mp->m_sb.sb_blocklog) : 227 (ip->i_d.di_extsize << mp->m_sb.sb_blocklog) :
225 (mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog); 228 (mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog);
226 } 229 }
227 } else { 230 break;
228 vap->va_rdev = ip->i_df.if_u2.if_rdev;
229 vap->va_blocksize = BLKDEV_IOSIZE;
230 } 231 }
231 232
232 vap->va_atime.tv_sec = ip->i_d.di_atime.t_sec; 233 vap->va_atime.tv_sec = ip->i_d.di_atime.t_sec;
@@ -468,7 +469,7 @@ xfs_setattr(
468 m |= S_ISGID; 469 m |= S_ISGID;
469#if 0 470#if 0
470 /* Linux allows this, Irix doesn't. */ 471 /* Linux allows this, Irix doesn't. */
471 if ((vap->va_mode & S_ISVTX) && vp->v_type != VDIR) 472 if ((vap->va_mode & S_ISVTX) && !VN_ISDIR(vp))
472 m |= S_ISVTX; 473 m |= S_ISVTX;
473#endif 474#endif
474 if (m && !capable(CAP_FSETID)) 475 if (m && !capable(CAP_FSETID))
@@ -546,10 +547,10 @@ xfs_setattr(
546 goto error_return; 547 goto error_return;
547 } 548 }
548 549
549 if (vp->v_type == VDIR) { 550 if (VN_ISDIR(vp)) {
550 code = XFS_ERROR(EISDIR); 551 code = XFS_ERROR(EISDIR);
551 goto error_return; 552 goto error_return;
552 } else if (vp->v_type != VREG) { 553 } else if (!VN_ISREG(vp)) {
553 code = XFS_ERROR(EINVAL); 554 code = XFS_ERROR(EINVAL);
554 goto error_return; 555 goto error_return;
555 } 556 }
@@ -1567,7 +1568,7 @@ xfs_release(
1567 vp = BHV_TO_VNODE(bdp); 1568 vp = BHV_TO_VNODE(bdp);
1568 ip = XFS_BHVTOI(bdp); 1569 ip = XFS_BHVTOI(bdp);
1569 1570
1570 if ((vp->v_type != VREG) || (ip->i_d.di_mode == 0)) { 1571 if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0)) {
1571 return 0; 1572 return 0;
1572 } 1573 }
1573 1574
@@ -1895,7 +1896,7 @@ xfs_create(
1895 dp = XFS_BHVTOI(dir_bdp); 1896 dp = XFS_BHVTOI(dir_bdp);
1896 mp = dp->i_mount; 1897 mp = dp->i_mount;
1897 1898
1898 dm_di_mode = vap->va_mode|VTTOIF(vap->va_type); 1899 dm_di_mode = vap->va_mode;
1899 namelen = VNAMELEN(dentry); 1900 namelen = VNAMELEN(dentry);
1900 1901
1901 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) { 1902 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) {
@@ -1973,8 +1974,7 @@ xfs_create(
1973 (error = XFS_DIR_CANENTER(mp, tp, dp, name, namelen))) 1974 (error = XFS_DIR_CANENTER(mp, tp, dp, name, namelen)))
1974 goto error_return; 1975 goto error_return;
1975 rdev = (vap->va_mask & XFS_AT_RDEV) ? vap->va_rdev : 0; 1976 rdev = (vap->va_mask & XFS_AT_RDEV) ? vap->va_rdev : 0;
1976 error = xfs_dir_ialloc(&tp, dp, 1977 error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 1,
1977 MAKEIMODE(vap->va_type,vap->va_mode), 1,
1978 rdev, credp, prid, resblks > 0, 1978 rdev, credp, prid, resblks > 0,
1979 &ip, &committed); 1979 &ip, &committed);
1980 if (error) { 1980 if (error) {
@@ -2620,7 +2620,7 @@ xfs_link(
2620 vn_trace_entry(src_vp, __FUNCTION__, (inst_t *)__return_address); 2620 vn_trace_entry(src_vp, __FUNCTION__, (inst_t *)__return_address);
2621 2621
2622 target_namelen = VNAMELEN(dentry); 2622 target_namelen = VNAMELEN(dentry);
2623 if (src_vp->v_type == VDIR) 2623 if (VN_ISDIR(src_vp))
2624 return XFS_ERROR(EPERM); 2624 return XFS_ERROR(EPERM);
2625 2625
2626 src_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(src_vp), &xfs_vnodeops); 2626 src_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(src_vp), &xfs_vnodeops);
@@ -2805,7 +2805,7 @@ xfs_mkdir(
2805 2805
2806 tp = NULL; 2806 tp = NULL;
2807 dp_joined_to_trans = B_FALSE; 2807 dp_joined_to_trans = B_FALSE;
2808 dm_di_mode = vap->va_mode|VTTOIF(vap->va_type); 2808 dm_di_mode = vap->va_mode;
2809 2809
2810 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) { 2810 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) {
2811 error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, 2811 error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE,
@@ -2879,8 +2879,7 @@ xfs_mkdir(
2879 /* 2879 /*
2880 * create the directory inode. 2880 * create the directory inode.
2881 */ 2881 */
2882 error = xfs_dir_ialloc(&tp, dp, 2882 error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 2,
2883 MAKEIMODE(vap->va_type,vap->va_mode), 2,
2884 0, credp, prid, resblks > 0, 2883 0, credp, prid, resblks > 0,
2885 &cdp, NULL); 2884 &cdp, NULL);
2886 if (error) { 2885 if (error) {
@@ -3650,7 +3649,7 @@ xfs_rwlock(
3650 vnode_t *vp; 3649 vnode_t *vp;
3651 3650
3652 vp = BHV_TO_VNODE(bdp); 3651 vp = BHV_TO_VNODE(bdp);
3653 if (vp->v_type == VDIR) 3652 if (VN_ISDIR(vp))
3654 return 1; 3653 return 1;
3655 ip = XFS_BHVTOI(bdp); 3654 ip = XFS_BHVTOI(bdp);
3656 if (locktype == VRWLOCK_WRITE) { 3655 if (locktype == VRWLOCK_WRITE) {
@@ -3681,7 +3680,7 @@ xfs_rwunlock(
3681 vnode_t *vp; 3680 vnode_t *vp;
3682 3681
3683 vp = BHV_TO_VNODE(bdp); 3682 vp = BHV_TO_VNODE(bdp);
3684 if (vp->v_type == VDIR) 3683 if (VN_ISDIR(vp))
3685 return; 3684 return;
3686 ip = XFS_BHVTOI(bdp); 3685 ip = XFS_BHVTOI(bdp);
3687 if (locktype == VRWLOCK_WRITE) { 3686 if (locktype == VRWLOCK_WRITE) {
@@ -3847,51 +3846,10 @@ xfs_reclaim(
3847 return 0; 3846 return 0;
3848 } 3847 }
3849 3848
3850 if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { 3849 vn_iowait(vp);
3851 if (ip->i_d.di_size > 0) {
3852 /*
3853 * Flush and invalidate any data left around that is
3854 * a part of this file.
3855 *
3856 * Get the inode's i/o lock so that buffers are pushed
3857 * out while holding the proper lock. We can't hold
3858 * the inode lock here since flushing out buffers may
3859 * cause us to try to get the lock in xfs_strategy().
3860 *
3861 * We don't have to call remapf() here, because there
3862 * cannot be any mapped file references to this vnode
3863 * since it is being reclaimed.
3864 */
3865 xfs_ilock(ip, XFS_IOLOCK_EXCL);
3866
3867 /*
3868 * If we hit an IO error, we need to make sure that the
3869 * buffer and page caches of file data for
3870 * the file are tossed away. We don't want to use
3871 * VOP_FLUSHINVAL_PAGES here because we don't want dirty
3872 * pages to stay attached to the vnode, but be
3873 * marked P_BAD. pdflush/vnode_pagebad
3874 * hates that.
3875 */
3876 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
3877 VOP_FLUSHINVAL_PAGES(vp, 0, -1, FI_NONE);
3878 } else {
3879 VOP_TOSS_PAGES(vp, 0, -1, FI_NONE);
3880 }
3881 3850
3882 ASSERT(VN_CACHED(vp) == 0); 3851 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
3883 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || 3852 ASSERT(VN_CACHED(vp) == 0);
3884 ip->i_delayed_blks == 0);
3885 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
3886 } else if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
3887 /*
3888 * di_size field may not be quite accurate if we're
3889 * shutting down.
3890 */
3891 VOP_TOSS_PAGES(vp, 0, -1, FI_NONE);
3892 ASSERT(VN_CACHED(vp) == 0);
3893 }
3894 }
3895 3853
3896 /* If we have nothing to flush with this inode then complete the 3854 /* If we have nothing to flush with this inode then complete the
3897 * teardown now, otherwise break the link between the xfs inode 3855 * teardown now, otherwise break the link between the xfs inode
@@ -4567,7 +4525,7 @@ xfs_change_file_space(
4567 /* 4525 /*
4568 * must be a regular file and have write permission 4526 * must be a regular file and have write permission
4569 */ 4527 */
4570 if (vp->v_type != VREG) 4528 if (!VN_ISREG(vp))
4571 return XFS_ERROR(EINVAL); 4529 return XFS_ERROR(EINVAL);
4572 4530
4573 xfs_ilock(ip, XFS_ILOCK_SHARED); 4531 xfs_ilock(ip, XFS_ILOCK_SHARED);